From ef246da8e63c486780dca4d9b4d79589cbebf5e5 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sat, 24 Jan 2026 21:14:13 +0200
Subject: dma-buf: Rename .move_notify() callback to a clearer identifier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename the .move_notify() callback to .invalidate_mappings() to make its
purpose explicit and highlight that it is responsible for invalidating
existing mappings.

Suggested-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/20260124-dmabuf-revoke-v5-1-f98fca917e96@nvidia.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/linux/dma-buf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 91f4939db89b..d9ee4499b37d 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -407,7 +407,7 @@ struct dma_buf {
 	 *   through the device.
 	 *
 	 * - Dynamic importers should set fences for any access that they can't
-	 *   disable immediately from their &dma_buf_attach_ops.move_notify
+	 *   disable immediately from their &dma_buf_attach_ops.invalidate_mappings
 	 *   callback.
 	 *
 	 * IMPORTANT:
@@ -446,7 +446,7 @@ struct dma_buf_attach_ops {
 	bool allow_peer2peer;
 
 	/**
-	 * @move_notify: [optional] notification that the DMA-buf is moving
+	 * @invalidate_mappings: [optional] notification that the DMA-buf is moving
 	 *
 	 * If this callback is provided the framework can avoid pinning the
 	 * backing store while mappings exists.
@@ -463,7 +463,7 @@ struct dma_buf_attach_ops {
 	 * New mappings can be created after this callback returns, and will
 	 * point to the new location of the DMA-buf.
 	 */
-	void (*move_notify)(struct dma_buf_attachment *attach);
+	void (*invalidate_mappings)(struct dma_buf_attachment *attach);
 };
 
 /**
-- 
cgit v1.2.3


From 95308225e5baeaae1e313816059c59a0036ab6b2 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sat, 24 Jan 2026 21:14:14 +0200
Subject: dma-buf: Rename dma_buf_move_notify() to
 dma_buf_invalidate_mappings()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Along with renaming the .move_notify() callback, rename the corresponding
dma-buf core function. This makes the expected behavior clear to exporters
calling this function.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://lore.kernel.org/r/20260124-dmabuf-revoke-v5-2-f98fca917e96@nvidia.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-buf.c                  | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
 drivers/gpu/drm/xe/xe_bo.c                 | 2 +-
 drivers/iommu/iommufd/selftest.c           | 2 +-
 drivers/vfio/pci/vfio_pci_dmabuf.c         | 4 ++--
 include/linux/dma-buf.h                    | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index cc9b88214d97..1c257607a623 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -981,7 +981,7 @@ dma_buf_pin_on_map(struct dma_buf_attachment *attach)
  * 3. Exporters must hold the dma-buf reservation lock when calling these
  *    functions:
  *
- *     - dma_buf_move_notify()
+ *     - dma_buf_invalidate_mappings()
  */
 
 /**
@@ -1323,14 +1323,14 @@ void dma_buf_unmap_attachment_unlocked(struct dma_buf_attachment *attach,
 EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment_unlocked, "DMA_BUF");
 
 /**
- * dma_buf_move_notify - notify attachments that DMA-buf is moving
+ * dma_buf_invalidate_mappings - notify attachments that DMA-buf is moving
  *
  * @dmabuf:	[in]	buffer which is moving
  *
  * Informs all attachments that they need to destroy and recreate all their
  * mappings.
  */
-void dma_buf_move_notify(struct dma_buf *dmabuf)
+void dma_buf_invalidate_mappings(struct dma_buf *dmabuf)
 {
 	struct dma_buf_attachment *attach;
 
@@ -1340,7 +1340,7 @@ void dma_buf_move_notify(struct dma_buf *dmabuf)
 		if (attach->importer_ops)
 			attach->importer_ops->invalidate_mappings(attach);
 }
-EXPORT_SYMBOL_NS_GPL(dma_buf_move_notify, "DMA_BUF");
+EXPORT_SYMBOL_NS_GPL(dma_buf_invalidate_mappings, "DMA_BUF");
 
 /**
  * DOC: cpu access
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e08f58de4b17..f73dc99d1887 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1270,7 +1270,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 
 	if (abo->tbo.base.dma_buf && !drm_gem_is_imported(&abo->tbo.base) &&
 	    old_mem && old_mem->mem_type != TTM_PL_SYSTEM)
-		dma_buf_move_notify(abo->tbo.base.dma_buf);
+		dma_buf_invalidate_mappings(abo->tbo.base.dma_buf);
 
 	/* move_notify is called before move happens */
 	trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index b0bd31d14bb9..94712b05edff 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -819,7 +819,7 @@ static int xe_bo_move_notify(struct xe_bo *bo,
 
 	/* Don't call move_notify() for imported dma-bufs. */
 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
-		dma_buf_move_notify(ttm_bo->base.dma_buf);
+		dma_buf_invalidate_mappings(ttm_bo->base.dma_buf);
 
 	/*
 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index c4322fd26f93..fd47953db4a3 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -2073,7 +2073,7 @@ static int iommufd_test_dmabuf_revoke(struct iommufd_ucmd *ucmd, int fd,
 	priv = dmabuf->priv;
 	dma_resv_lock(dmabuf->resv, NULL);
 	priv->revoked = revoked;
-	dma_buf_move_notify(dmabuf);
+	dma_buf_invalidate_mappings(dmabuf);
 	dma_resv_unlock(dmabuf->resv);
 
 err_put:
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
index d4d0f7d08c53..362e3d149817 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -320,7 +320,7 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
 		if (priv->revoked != revoked) {
 			dma_resv_lock(priv->dmabuf->resv, NULL);
 			priv->revoked = revoked;
-			dma_buf_move_notify(priv->dmabuf);
+			dma_buf_invalidate_mappings(priv->dmabuf);
 			dma_resv_unlock(priv->dmabuf->resv);
 		}
 		fput(priv->dmabuf->file);
@@ -341,7 +341,7 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
 		list_del_init(&priv->dmabufs_elm);
 		priv->vdev = NULL;
 		priv->revoked = true;
-		dma_buf_move_notify(priv->dmabuf);
+		dma_buf_invalidate_mappings(priv->dmabuf);
 		dma_resv_unlock(priv->dmabuf->resv);
 		vfio_device_put_registration(&vdev->vdev);
 		fput(priv->dmabuf->file);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index d9ee4499b37d..d0470af8887e 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -588,7 +588,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
 					enum dma_data_direction);
 void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
 				enum dma_data_direction);
-void dma_buf_move_notify(struct dma_buf *dma_buf);
+void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
 int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 			     enum dma_data_direction dir);
 int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
-- 
cgit v1.2.3


From 2bcbc706dfa02ae50118173a6f6d8a12e735480c Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 19 Dec 2025 11:41:54 +0100
Subject: dma-buf: add dma_fence_was_initialized function v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some driver use fence->ops to test if a fence was initialized or not.
The problem is that this utilizes internal behavior of the dma_fence
implementation.

So better abstract that into a function.

v2: use a flag instead of testing fence->ops, rename the function, move
    to the beginning of the patch set.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Link: https://lore.kernel.org/r/20260120105655.7134-2-christian.koenig@amd.com
---
 drivers/dma-buf/dma-fence.c             |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 13 +++++++------
 drivers/gpu/drm/qxl/qxl_release.c       |  2 +-
 include/linux/dma-fence.h               | 15 +++++++++++++++
 4 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 21c5c30b4f34..c9a036b0d592 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -1054,7 +1054,7 @@ __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
 	fence->lock = lock;
 	fence->context = context;
 	fence->seqno = seqno;
-	fence->flags = flags;
+	fence->flags = flags | BIT(DMA_FENCE_FLAG_INITIALIZED_BIT);
 	fence->error = 0;
 
 	trace_dma_fence_init(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index aaf5477fcd7a..f05683d59f8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -282,9 +282,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
 	unsigned i;
 
 	/* Check if any fences were initialized */
-	if (job->base.s_fence && job->base.s_fence->finished.ops)
+	if (job->base.s_fence &&
+	    dma_fence_was_initialized(&job->base.s_fence->finished))
 		f = &job->base.s_fence->finished;
-	else if (job->hw_fence && job->hw_fence->base.ops)
+	else if (dma_fence_was_initialized(&job->hw_fence->base))
 		f = &job->hw_fence->base;
 	else
 		f = NULL;
@@ -301,11 +302,11 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 
 	amdgpu_sync_free(&job->explicit_sync);
 
-	if (job->hw_fence->base.ops)
+	if (dma_fence_was_initialized(&job->hw_fence->base))
 		dma_fence_put(&job->hw_fence->base);
 	else
 		kfree(job->hw_fence);
-	if (job->hw_vm_fence->base.ops)
+	if (dma_fence_was_initialized(&job->hw_vm_fence->base))
 		dma_fence_put(&job->hw_vm_fence->base);
 	else
 		kfree(job->hw_vm_fence);
@@ -339,11 +340,11 @@ void amdgpu_job_free(struct amdgpu_job *job)
 	if (job->gang_submit != &job->base.s_fence->scheduled)
 		dma_fence_put(job->gang_submit);
 
-	if (job->hw_fence->base.ops)
+	if (dma_fence_was_initialized(&job->hw_fence->base))
 		dma_fence_put(&job->hw_fence->base);
 	else
 		kfree(job->hw_fence);
-	if (job->hw_vm_fence->base.ops)
+	if (dma_fence_was_initialized(&job->hw_vm_fence->base))
 		dma_fence_put(&job->hw_vm_fence->base);
 	else
 		kfree(job->hw_vm_fence);
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 7b3c9a6016db..06b0b2aa7953 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -146,7 +146,7 @@ qxl_release_free(struct qxl_device *qdev,
 	idr_remove(&qdev->release_idr, release->id);
 	spin_unlock(&qdev->release_idr_lock);
 
-	if (release->base.ops) {
+	if (dma_fence_was_initialized(&release->base)) {
 		WARN_ON(list_empty(&release->bos));
 		qxl_release_free_list(release);
 
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index d4c92fd35092..9c4d25289239 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -48,6 +48,7 @@ struct seq_file;
  * atomic ops (bit_*), so taking the spinlock will not be needed most
  * of the time.
  *
+ * DMA_FENCE_FLAG_INITIALIZED_BIT - fence was initialized
  * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
  * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
  * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
@@ -98,6 +99,7 @@ struct dma_fence {
 };
 
 enum dma_fence_flag_bits {
+	DMA_FENCE_FLAG_INITIALIZED_BIT,
 	DMA_FENCE_FLAG_SEQNO64_BIT,
 	DMA_FENCE_FLAG_SIGNALED_BIT,
 	DMA_FENCE_FLAG_TIMESTAMP_BIT,
@@ -263,6 +265,19 @@ void dma_fence_release(struct kref *kref);
 void dma_fence_free(struct dma_fence *fence);
 void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq);
 
+/**
+ * dma_fence_was_initialized - test if fence was initialized
+ * @fence: fence to test
+ *
+ * Return: True if fence was ever initialized, false otherwise. Works correctly
+ * only when memory backing the fence structure is zero initialized on
+ * allocation.
+ */
+static inline bool dma_fence_was_initialized(struct dma_fence *fence)
+{
+	return fence && test_bit(DMA_FENCE_FLAG_INITIALIZED_BIT, &fence->flags);
+}
+
 /**
  * dma_fence_put - decreases refcount of the fence
  * @fence: fence to reduce refcount of
-- 
cgit v1.2.3


From 4a9671a03f2be13acde0cb15c5208767a9cc56e4 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelagnelf@nvidia.com>
Date: Fri, 6 Feb 2026 08:52:38 +1000
Subject: gpu: Move DRM buddy allocator one level up (part one)

Move the DRM buddy allocator one level up so that it can be used by GPU
drivers (example, nova-core) that have usecases other than DRM (such as
VFIO vGPU support). Modify the API, structures and Kconfigs to use
"gpu_buddy" terminology. Adapt the drivers and tests to use the new API.

The commit cannot be split due to bisectability, however no functional
change is intended. Verified by running K-UNIT tests and build tested
various configurations.

Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
[airlied: I've split this into two so git can find copies easier.
I've also just nuked drm_random library, that stuff needs to be done
elsewhere and only the buddy tests seem to be using it].
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 Documentation/gpu/drm-mm.rst                  |    6 +-
 drivers/gpu/Makefile                          |    2 +-
 drivers/gpu/buddy.c                           | 1336 +++++++++++++++++++++++++
 drivers/gpu/drm/Kconfig                       |    4 -
 drivers/gpu/drm/Kconfig.debug                 |    1 -
 drivers/gpu/drm/Makefile                      |    3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |    2 +-
 drivers/gpu/drm/drm_buddy.c                   | 1336 -------------------------
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c       |    2 +-
 drivers/gpu/drm/i915/i915_scatterlist.c       |    2 +-
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |    2 +-
 drivers/gpu/drm/lib/drm_random.c              |   44 -
 drivers/gpu/drm/lib/drm_random.h              |   28 -
 drivers/gpu/drm/tests/Makefile                |    1 -
 drivers/gpu/drm/tests/drm_buddy_test.c        |  928 -----------------
 drivers/gpu/drm/tests/drm_exec_test.c         |    2 -
 drivers/gpu/drm/tests/drm_mm_test.c           |    2 -
 drivers/gpu/drm/ttm/tests/ttm_mock_manager.h  |    2 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h    |    2 +-
 drivers/gpu/tests/Makefile                    |    4 +
 drivers/gpu/tests/gpu_buddy_test.c            |  928 +++++++++++++++++
 drivers/gpu/tests/gpu_random.c                |   44 +
 drivers/gpu/tests/gpu_random.h                |   28 +
 include/drm/drm_buddy.h                       |  171 ----
 include/linux/gpu_buddy.h                     |  171 ++++
 25 files changed, 2522 insertions(+), 2529 deletions(-)
 create mode 100644 drivers/gpu/buddy.c
 delete mode 100644 drivers/gpu/drm/drm_buddy.c
 delete mode 100644 drivers/gpu/drm/lib/drm_random.c
 delete mode 100644 drivers/gpu/drm/lib/drm_random.h
 delete mode 100644 drivers/gpu/drm/tests/drm_buddy_test.c
 create mode 100644 drivers/gpu/tests/Makefile
 create mode 100644 drivers/gpu/tests/gpu_buddy_test.c
 create mode 100644 drivers/gpu/tests/gpu_random.c
 create mode 100644 drivers/gpu/tests/gpu_random.h
 delete mode 100644 include/drm/drm_buddy.h
 create mode 100644 include/linux/gpu_buddy.h

(limited to 'include/linux')

diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index f22433470c76..ceee0e663237 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -526,10 +526,10 @@ DRM GPUVM Function References
 DRM Buddy Allocator
 ===================
 
-DRM Buddy Function References
------------------------------
+Buddy Allocator Function References (GPU buddy)
+-----------------------------------------------
 
-.. kernel-doc:: drivers/gpu/drm/drm_buddy.c
+.. kernel-doc:: drivers/gpu/buddy.c
    :export:
 
 DRM Cache Handling and Fast WC memcpy()
diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index 36a54d456630..c5292ee2c852 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -2,7 +2,7 @@
 # drm/tegra depends on host1x, so if both drivers are built-in care must be
 # taken to initialize them in the correct order. Link order is the only way
 # to ensure this currently.
-obj-y			+= host1x/ drm/ vga/
+obj-y			+= host1x/ drm/ vga/ tests/
 obj-$(CONFIG_IMX_IPUV3_CORE)	+= ipu-v3/
 obj-$(CONFIG_TRACE_GPU_MEM)		+= trace/
 obj-$(CONFIG_NOVA_CORE)		+= nova-core/
diff --git a/drivers/gpu/buddy.c b/drivers/gpu/buddy.c
new file mode 100644
index 000000000000..4cc63d961d26
--- /dev/null
+++ b/drivers/gpu/buddy.c
@@ -0,0 +1,1336 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <kunit/test-bug.h>
+
+#include <linux/export.h>
+#include <linux/kmemleak.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+
+#include <linux/gpu_buddy.h>
+#include <drm/drm_print.h>
+
+enum drm_buddy_free_tree {
+	DRM_BUDDY_CLEAR_TREE = 0,
+	DRM_BUDDY_DIRTY_TREE,
+	DRM_BUDDY_MAX_FREE_TREES,
+};
+
+static struct kmem_cache *slab_blocks;
+
+#define for_each_free_tree(tree) \
+	for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++)
+
+static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
+					       struct drm_buddy_block *parent,
+					       unsigned int order,
+					       u64 offset)
+{
+	struct drm_buddy_block *block;
+
+	BUG_ON(order > DRM_BUDDY_MAX_ORDER);
+
+	block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL);
+	if (!block)
+		return NULL;
+
+	block->header = offset;
+	block->header |= order;
+	block->parent = parent;
+
+	RB_CLEAR_NODE(&block->rb);
+
+	BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
+	return block;
+}
+
+static void drm_block_free(struct drm_buddy *mm,
+			   struct drm_buddy_block *block)
+{
+	kmem_cache_free(slab_blocks, block);
+}
+
+static enum drm_buddy_free_tree
+get_block_tree(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_is_clear(block) ?
+	       DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
+}
+
+static struct drm_buddy_block *
+rbtree_get_free_block(const struct rb_node *node)
+{
+	return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
+}
+
+static struct drm_buddy_block *
+rbtree_last_free_block(struct rb_root *root)
+{
+	return rbtree_get_free_block(rb_last(root));
+}
+
+static bool rbtree_is_empty(struct rb_root *root)
+{
+	return RB_EMPTY_ROOT(root);
+}
+
+static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block,
+					const struct drm_buddy_block *node)
+{
+	return drm_buddy_block_offset(block) < drm_buddy_block_offset(node);
+}
+
+static bool rbtree_block_offset_less(struct rb_node *block,
+				     const struct rb_node *node)
+{
+	return drm_buddy_block_offset_less(rbtree_get_free_block(block),
+					   rbtree_get_free_block(node));
+}
+
+static void rbtree_insert(struct drm_buddy *mm,
+			  struct drm_buddy_block *block,
+			  enum drm_buddy_free_tree tree)
+{
+	rb_add(&block->rb,
+	       &mm->free_trees[tree][drm_buddy_block_order(block)],
+	       rbtree_block_offset_less);
+}
+
+static void rbtree_remove(struct drm_buddy *mm,
+			  struct drm_buddy_block *block)
+{
+	unsigned int order = drm_buddy_block_order(block);
+	enum drm_buddy_free_tree tree;
+	struct rb_root *root;
+
+	tree = get_block_tree(block);
+	root = &mm->free_trees[tree][order];
+
+	rb_erase(&block->rb, root);
+	RB_CLEAR_NODE(&block->rb);
+}
+
+static void clear_reset(struct drm_buddy_block *block)
+{
+	block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+	block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_allocated(struct drm_buddy *mm,
+			   struct drm_buddy_block *block)
+{
+	block->header &= ~DRM_BUDDY_HEADER_STATE;
+	block->header |= DRM_BUDDY_ALLOCATED;
+
+	rbtree_remove(mm, block);
+}
+
+static void mark_free(struct drm_buddy *mm,
+		      struct drm_buddy_block *block)
+{
+	enum drm_buddy_free_tree tree;
+
+	block->header &= ~DRM_BUDDY_HEADER_STATE;
+	block->header |= DRM_BUDDY_FREE;
+
+	tree = get_block_tree(block);
+	rbtree_insert(mm, block, tree);
+}
+
+static void mark_split(struct drm_buddy *mm,
+		       struct drm_buddy_block *block)
+{
+	block->header &= ~DRM_BUDDY_HEADER_STATE;
+	block->header |= DRM_BUDDY_SPLIT;
+
+	rbtree_remove(mm, block);
+}
+
+static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+	return s1 <= e2 && e1 >= s2;
+}
+
+static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+	return s1 <= s2 && e1 >= e2;
+}
+
+static struct drm_buddy_block *
+__get_buddy(struct drm_buddy_block *block)
+{
+	struct drm_buddy_block *parent;
+
+	parent = block->parent;
+	if (!parent)
+		return NULL;
+
+	if (parent->left == block)
+		return parent->right;
+
+	return parent->left;
+}
+
+static unsigned int __drm_buddy_free(struct drm_buddy *mm,
+				     struct drm_buddy_block *block,
+				     bool force_merge)
+{
+	struct drm_buddy_block *parent;
+	unsigned int order;
+
+	while ((parent = block->parent)) {
+		struct drm_buddy_block *buddy;
+
+		buddy = __get_buddy(block);
+
+		if (!drm_buddy_block_is_free(buddy))
+			break;
+
+		if (!force_merge) {
+			/*
+			 * Check the block and its buddy clear state and exit
+			 * the loop if they both have the dissimilar state.
+			 */
+			if (drm_buddy_block_is_clear(block) !=
+			    drm_buddy_block_is_clear(buddy))
+				break;
+
+			if (drm_buddy_block_is_clear(block))
+				mark_cleared(parent);
+		}
+
+		rbtree_remove(mm, buddy);
+		if (force_merge && drm_buddy_block_is_clear(buddy))
+			mm->clear_avail -= drm_buddy_block_size(mm, buddy);
+
+		drm_block_free(mm, block);
+		drm_block_free(mm, buddy);
+
+		block = parent;
+	}
+
+	order = drm_buddy_block_order(block);
+	mark_free(mm, block);
+
+	return order;
+}
+
+static int __force_merge(struct drm_buddy *mm,
+			 u64 start,
+			 u64 end,
+			 unsigned int min_order)
+{
+	unsigned int tree, order;
+	int i;
+
+	if (!min_order)
+		return -ENOMEM;
+
+	if (min_order > mm->max_order)
+		return -EINVAL;
+
+	for_each_free_tree(tree) {
+		for (i = min_order - 1; i >= 0; i--) {
+			struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
+
+			while (iter) {
+				struct drm_buddy_block *block, *buddy;
+				u64 block_start, block_end;
+
+				block = rbtree_get_free_block(iter);
+				iter = rb_prev(iter);
+
+				if (!block || !block->parent)
+					continue;
+
+				block_start = drm_buddy_block_offset(block);
+				block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+
+				if (!contains(start, end, block_start, block_end))
+					continue;
+
+				buddy = __get_buddy(block);
+				if (!drm_buddy_block_is_free(buddy))
+					continue;
+
+				WARN_ON(drm_buddy_block_is_clear(block) ==
+					drm_buddy_block_is_clear(buddy));
+
+				/*
+				 * Advance to the next node when the current node is the buddy,
+				 * as freeing the block will also remove its buddy from the tree.
+				 */
+				if (iter == &buddy->rb)
+					iter = rb_prev(iter);
+
+				rbtree_remove(mm, block);
+				if (drm_buddy_block_is_clear(block))
+					mm->clear_avail -= drm_buddy_block_size(mm, block);
+
+				order = __drm_buddy_free(mm, block, true);
+				if (order >= min_order)
+					return 0;
+			}
+		}
+	}
+
+	return -ENOMEM;
+}
+
+/**
+ * drm_buddy_init - init memory manager
+ *
+ * @mm: DRM buddy manager to initialize
+ * @size: size in bytes to manage
+ * @chunk_size: minimum page size in bytes for our allocations
+ *
+ * Initializes the memory manager and its resources.
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
+{
+	unsigned int i, j, root_count = 0;
+	u64 offset = 0;
+
+	if (size < chunk_size)
+		return -EINVAL;
+
+	if (chunk_size < SZ_4K)
+		return -EINVAL;
+
+	if (!is_power_of_2(chunk_size))
+		return -EINVAL;
+
+	size = round_down(size, chunk_size);
+
+	mm->size = size;
+	mm->avail = size;
+	mm->clear_avail = 0;
+	mm->chunk_size = chunk_size;
+	mm->max_order = ilog2(size) - ilog2(chunk_size);
+
+	BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
+
+	mm->free_trees = kmalloc_array(DRM_BUDDY_MAX_FREE_TREES,
+				       sizeof(*mm->free_trees),
+				       GFP_KERNEL);
+	if (!mm->free_trees)
+		return -ENOMEM;
+
+	for_each_free_tree(i) {
+		mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
+						  sizeof(struct rb_root),
+						  GFP_KERNEL);
+		if (!mm->free_trees[i])
+			goto out_free_tree;
+
+		for (j = 0; j <= mm->max_order; ++j)
+			mm->free_trees[i][j] = RB_ROOT;
+	}
+
+	mm->n_roots = hweight64(size);
+
+	mm->roots = kmalloc_array(mm->n_roots,
+				  sizeof(struct drm_buddy_block *),
+				  GFP_KERNEL);
+	if (!mm->roots)
+		goto out_free_tree;
+
+	/*
+	 * Split into power-of-two blocks, in case we are given a size that is
+	 * not itself a power-of-two.
+	 */
+	do {
+		struct drm_buddy_block *root;
+		unsigned int order;
+		u64 root_size;
+
+		order = ilog2(size) - ilog2(chunk_size);
+		root_size = chunk_size << order;
+
+		root = drm_block_alloc(mm, NULL, order, offset);
+		if (!root)
+			goto out_free_roots;
+
+		mark_free(mm, root);
+
+		BUG_ON(root_count > mm->max_order);
+		BUG_ON(drm_buddy_block_size(mm, root) < chunk_size);
+
+		mm->roots[root_count] = root;
+
+		offset += root_size;
+		size -= root_size;
+		root_count++;
+	} while (size);
+
+	return 0;
+
+out_free_roots:
+	while (root_count--)
+		drm_block_free(mm, mm->roots[root_count]);
+	kfree(mm->roots);
+out_free_tree:
+	while (i--)
+		kfree(mm->free_trees[i]);
+	kfree(mm->free_trees);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(drm_buddy_init);
+
+/**
+ * drm_buddy_fini - tear down the memory manager
+ *
+ * @mm: DRM buddy manager to free
+ *
+ * Cleanup memory manager resources and the freetree
+ */
+void drm_buddy_fini(struct drm_buddy *mm)
+{
+	u64 root_size, size, start;
+	unsigned int order;
+	int i;
+
+	size = mm->size;
+
+	for (i = 0; i < mm->n_roots; ++i) {
+		order = ilog2(size) - ilog2(mm->chunk_size);
+		start = drm_buddy_block_offset(mm->roots[i]);
+		__force_merge(mm, start, start + size, order);
+
+		if (WARN_ON(!drm_buddy_block_is_free(mm->roots[i])))
+			kunit_fail_current_test("buddy_fini() root");
+
+		drm_block_free(mm, mm->roots[i]);
+
+		root_size = mm->chunk_size << order;
+		size -= root_size;
+	}
+
+	WARN_ON(mm->avail != mm->size);
+
+	for_each_free_tree(i)
+		kfree(mm->free_trees[i]);
+	kfree(mm->free_trees);
+	kfree(mm->roots);
+}
+EXPORT_SYMBOL(drm_buddy_fini);
+
+static int split_block(struct drm_buddy *mm,
+		       struct drm_buddy_block *block)
+{
+	unsigned int block_order = drm_buddy_block_order(block) - 1;
+	u64 offset = drm_buddy_block_offset(block);
+
+	BUG_ON(!drm_buddy_block_is_free(block));
+	BUG_ON(!drm_buddy_block_order(block));
+
+	block->left = drm_block_alloc(mm, block, block_order, offset);
+	if (!block->left)
+		return -ENOMEM;
+
+	block->right = drm_block_alloc(mm, block, block_order,
+				       offset + (mm->chunk_size << block_order));
+	if (!block->right) {
+		drm_block_free(mm, block->left);
+		return -ENOMEM;
+	}
+
+	mark_split(mm, block);
+
+	if (drm_buddy_block_is_clear(block)) {
+		mark_cleared(block->left);
+		mark_cleared(block->right);
+		clear_reset(block);
+	}
+
+	mark_free(mm, block->left);
+	mark_free(mm, block->right);
+
+	return 0;
+}
+
+/**
+ * drm_get_buddy - get buddy address
+ *
+ * @block: DRM buddy block
+ *
+ * Returns the corresponding buddy block for @block, or NULL
+ * if this is a root block and can't be merged further.
+ * Requires some kind of locking to protect against
+ * any concurrent allocate and free operations.
+ */
+struct drm_buddy_block *
+drm_get_buddy(struct drm_buddy_block *block)
+{
+	return __get_buddy(block);
+}
+EXPORT_SYMBOL(drm_get_buddy);
+
+/**
+ * drm_buddy_reset_clear - reset blocks clear state
+ *
+ * @mm: DRM buddy manager
+ * @is_clear: blocks clear state
+ *
+ * Reset the clear state based on @is_clear value for each block
+ * in the freetree.
+ */
+void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
+{
+	enum drm_buddy_free_tree src_tree, dst_tree;
+	u64 root_size, size, start;
+	unsigned int order;
+	int i;
+
+	size = mm->size;
+	for (i = 0; i < mm->n_roots; ++i) {
+		order = ilog2(size) - ilog2(mm->chunk_size);
+		start = drm_buddy_block_offset(mm->roots[i]);
+		__force_merge(mm, start, start + size, order);
+
+		root_size = mm->chunk_size << order;
+		size -= root_size;
+	}
+
+	src_tree = is_clear ? DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
+	dst_tree = is_clear ? DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
+
+	for (i = 0; i <= mm->max_order; ++i) {
+		struct rb_root *root = &mm->free_trees[src_tree][i];
+		struct drm_buddy_block *block, *tmp;
+
+		rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
+			rbtree_remove(mm, block);
+			if (is_clear) {
+				mark_cleared(block);
+				mm->clear_avail += drm_buddy_block_size(mm, block);
+			} else {
+				clear_reset(block);
+				mm->clear_avail -= drm_buddy_block_size(mm, block);
+			}
+
+			rbtree_insert(mm, block, dst_tree);
+		}
+	}
+}
+EXPORT_SYMBOL(drm_buddy_reset_clear);
+
+/**
+ * drm_buddy_free_block - free a block
+ *
+ * @mm: DRM buddy manager
+ * @block: block to be freed
+ */
+void drm_buddy_free_block(struct drm_buddy *mm,
+			  struct drm_buddy_block *block)
+{
+	BUG_ON(!drm_buddy_block_is_allocated(block));
+	mm->avail += drm_buddy_block_size(mm, block);
+	if (drm_buddy_block_is_clear(block))
+		mm->clear_avail += drm_buddy_block_size(mm, block);
+
+	__drm_buddy_free(mm, block, false);
+}
+EXPORT_SYMBOL(drm_buddy_free_block);
+
+static void __drm_buddy_free_list(struct drm_buddy *mm,
+				  struct list_head *objects,
+				  bool mark_clear,
+				  bool mark_dirty)
+{
+	struct drm_buddy_block *block, *on;
+
+	WARN_ON(mark_dirty && mark_clear);
+
+	list_for_each_entry_safe(block, on, objects, link) {
+		if (mark_clear)
+			mark_cleared(block);
+		else if (mark_dirty)
+			clear_reset(block);
+		drm_buddy_free_block(mm, block);
+		cond_resched();
+	}
+	INIT_LIST_HEAD(objects);
+}
+
+static void drm_buddy_free_list_internal(struct drm_buddy *mm,
+					 struct list_head *objects)
+{
+	/*
+	 * Don't touch the clear/dirty bit, since allocation is still internal
+	 * at this point. For example we might have just failed part of the
+	 * allocation.
+	 */
+	__drm_buddy_free_list(mm, objects, false, false);
+}
+
+/**
+ * drm_buddy_free_list - free blocks
+ *
+ * @mm: DRM buddy manager
+ * @objects: input list head to free blocks
+ * @flags: optional flags like DRM_BUDDY_CLEARED
+ */
+void drm_buddy_free_list(struct drm_buddy *mm,
+			 struct list_head *objects,
+			 unsigned int flags)
+{
+	bool mark_clear = flags & DRM_BUDDY_CLEARED;
+
+	__drm_buddy_free_list(mm, objects, mark_clear, !mark_clear);
+}
+EXPORT_SYMBOL(drm_buddy_free_list);
+
+static bool block_incompatible(struct drm_buddy_block *block, unsigned int flags)
+{
+	bool needs_clear = flags & DRM_BUDDY_CLEAR_ALLOCATION;
+
+	return needs_clear != drm_buddy_block_is_clear(block);
+}
+
+static struct drm_buddy_block *
+__alloc_range_bias(struct drm_buddy *mm,
+		   u64 start, u64 end,
+		   unsigned int order,
+		   unsigned long flags,
+		   bool fallback)
+{
+	u64 req_size = mm->chunk_size << order;
+	struct drm_buddy_block *block;
+	struct drm_buddy_block *buddy;
+	LIST_HEAD(dfs);
+	int err;
+	int i;
+
+	end = end - 1;
+
+	for (i = 0; i < mm->n_roots; ++i)
+		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
+
+	do {
+		u64 block_start;
+		u64 block_end;
+
+		block = list_first_entry_or_null(&dfs,
+						 struct drm_buddy_block,
+						 tmp_link);
+		if (!block)
+			break;
+
+		list_del(&block->tmp_link);
+
+		if (drm_buddy_block_order(block) < order)
+			continue;
+
+		block_start = drm_buddy_block_offset(block);
+		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+
+		if (!overlaps(start, end, block_start, block_end))
+			continue;
+
+		if (drm_buddy_block_is_allocated(block))
+			continue;
+
+		if (block_start < start || block_end > end) {
+			u64 adjusted_start = max(block_start, start);
+			u64 adjusted_end = min(block_end, end);
+
+			if (round_down(adjusted_end + 1, req_size) <=
+			    round_up(adjusted_start, req_size))
+				continue;
+		}
+
+		if (!fallback && block_incompatible(block, flags))
+			continue;
+
+		if (contains(start, end, block_start, block_end) &&
+		    order == drm_buddy_block_order(block)) {
+			/*
+			 * Find the free block within the range.
+			 */
+			if (drm_buddy_block_is_free(block))
+				return block;
+
+			continue;
+		}
+
+		if (!drm_buddy_block_is_split(block)) {
+			err = split_block(mm, block);
+			if (unlikely(err))
+				goto err_undo;
+		}
+
+		list_add(&block->right->tmp_link, &dfs);
+		list_add(&block->left->tmp_link, &dfs);
+	} while (1);
+
+	return ERR_PTR(-ENOSPC);
+
+err_undo:
+	/*
+	 * We really don't want to leave around a bunch of split blocks, since
+	 * bigger is better, so make sure we merge everything back before we
+	 * free the allocated blocks.
+	 */
+	buddy = __get_buddy(block);
+	if (buddy &&
+	    (drm_buddy_block_is_free(block) &&
+	     drm_buddy_block_is_free(buddy)))
+		__drm_buddy_free(mm, block, false);
+	return ERR_PTR(err);
+}
+
+static struct drm_buddy_block *
+__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
+			     u64 start, u64 end,
+			     unsigned int order,
+			     unsigned long flags)
+{
+	struct drm_buddy_block *block;
+	bool fallback = false;
+
+	block = __alloc_range_bias(mm, start, end, order,
+				   flags, fallback);
+	if (IS_ERR(block))
+		return __alloc_range_bias(mm, start, end, order,
+					  flags, !fallback);
+
+	return block;
+}
+
+static struct drm_buddy_block *
+get_maxblock(struct drm_buddy *mm,
+	     unsigned int order,
+	     enum drm_buddy_free_tree tree)
+{
+	struct drm_buddy_block *max_block = NULL, *block = NULL;
+	struct rb_root *root;
+	unsigned int i;
+
+	for (i = order; i <= mm->max_order; ++i) {
+		root = &mm->free_trees[tree][i];
+		block = rbtree_last_free_block(root);
+		if (!block)
+			continue;
+
+		if (!max_block) {
+			max_block = block;
+			continue;
+		}
+
+		if (drm_buddy_block_offset(block) >
+		    drm_buddy_block_offset(max_block)) {
+			max_block = block;
+		}
+	}
+
+	return max_block;
+}
+
+static struct drm_buddy_block *
+alloc_from_freetree(struct drm_buddy *mm,
+		    unsigned int order,
+		    unsigned long flags)
+{
+	struct drm_buddy_block *block = NULL;
+	struct rb_root *root;
+	enum drm_buddy_free_tree tree;
+	unsigned int tmp;
+	int err;
+
+	tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ?
+		DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
+
+	if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
+		block = get_maxblock(mm, order, tree);
+		if (block)
+			/* Store the obtained block order */
+			tmp = drm_buddy_block_order(block);
+	} else {
+		for (tmp = order; tmp <= mm->max_order; ++tmp) {
+			/* Get RB tree root for this order and tree */
+			root = &mm->free_trees[tree][tmp];
+			block = rbtree_last_free_block(root);
+			if (block)
+				break;
+		}
+	}
+
+	if (!block) {
+		/* Try allocating from the other tree */
+		tree = (tree == DRM_BUDDY_CLEAR_TREE) ?
+			DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
+
+		for (tmp = order; tmp <= mm->max_order; ++tmp) {
+			root = &mm->free_trees[tree][tmp];
+			block = rbtree_last_free_block(root);
+			if (block)
+				break;
+		}
+
+		if (!block)
+			return ERR_PTR(-ENOSPC);
+	}
+
+	BUG_ON(!drm_buddy_block_is_free(block));
+
+	while (tmp != order) {
+		err = split_block(mm, block);
+		if (unlikely(err))
+			goto err_undo;
+
+		block = block->right;
+		tmp--;
+	}
+	return block;
+
+err_undo:
+	if (tmp != order)
+		__drm_buddy_free(mm, block, false);
+	return ERR_PTR(err);
+}
+
+static int __alloc_range(struct drm_buddy *mm,
+			 struct list_head *dfs,
+			 u64 start, u64 size,
+			 struct list_head *blocks,
+			 u64 *total_allocated_on_err)
+{
+	struct drm_buddy_block *block;
+	struct drm_buddy_block *buddy;
+	u64 total_allocated = 0;
+	LIST_HEAD(allocated);
+	u64 end;
+	int err;
+
+	end = start + size - 1;
+
+	do {
+		u64 block_start;
+		u64 block_end;
+
+		block = list_first_entry_or_null(dfs,
+						 struct drm_buddy_block,
+						 tmp_link);
+		if (!block)
+			break;
+
+		list_del(&block->tmp_link);
+
+		block_start = drm_buddy_block_offset(block);
+		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+
+		if (!overlaps(start, end, block_start, block_end))
+			continue;
+
+		if (drm_buddy_block_is_allocated(block)) {
+			err = -ENOSPC;
+			goto err_free;
+		}
+
+		if (contains(start, end, block_start, block_end)) {
+			if (drm_buddy_block_is_free(block)) {
+				mark_allocated(mm, block);
+				total_allocated += drm_buddy_block_size(mm, block);
+				mm->avail -= drm_buddy_block_size(mm, block);
+				if (drm_buddy_block_is_clear(block))
+					mm->clear_avail -= drm_buddy_block_size(mm, block);
+				list_add_tail(&block->link, &allocated);
+				continue;
+			} else if (!mm->clear_avail) {
+				err = -ENOSPC;
+				goto err_free;
+			}
+		}
+
+		if (!drm_buddy_block_is_split(block)) {
+			err = split_block(mm, block);
+			if (unlikely(err))
+				goto err_undo;
+		}
+
+		list_add(&block->right->tmp_link, dfs);
+		list_add(&block->left->tmp_link, dfs);
+	} while (1);
+
+	if (total_allocated < size) {
+		err = -ENOSPC;
+		goto err_free;
+	}
+
+	list_splice_tail(&allocated, blocks);
+
+	return 0;
+
+err_undo:
+	/*
+	 * We really don't want to leave around a bunch of split blocks, since
+	 * bigger is better, so make sure we merge everything back before we
+	 * free the allocated blocks.
+	 */
+	buddy = __get_buddy(block);
+	if (buddy &&
+	    (drm_buddy_block_is_free(block) &&
+	     drm_buddy_block_is_free(buddy)))
+		__drm_buddy_free(mm, block, false);
+
+err_free:
+	if (err == -ENOSPC && total_allocated_on_err) {
+		list_splice_tail(&allocated, blocks);
+		*total_allocated_on_err = total_allocated;
+	} else {
+		drm_buddy_free_list_internal(mm, &allocated);
+	}
+
+	return err;
+}
+
+static int __drm_buddy_alloc_range(struct drm_buddy *mm,
+				   u64 start,
+				   u64 size,
+				   u64 *total_allocated_on_err,
+				   struct list_head *blocks)
+{
+	LIST_HEAD(dfs);
+	int i;
+
+	for (i = 0; i < mm->n_roots; ++i)
+		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
+
+	return __alloc_range(mm, &dfs, start, size,
+			     blocks, total_allocated_on_err);
+}
+
+static int __alloc_contig_try_harder(struct drm_buddy *mm,
+				     u64 size,
+				     u64 min_block_size,
+				     struct list_head *blocks)
+{
+	u64 rhs_offset, lhs_offset, lhs_size, filled;
+	struct drm_buddy_block *block;
+	unsigned int tree, order;
+	LIST_HEAD(blocks_lhs);
+	unsigned long pages;
+	u64 modify_size;
+	int err;
+
+	modify_size = rounddown_pow_of_two(size);
+	pages = modify_size >> ilog2(mm->chunk_size);
+	order = fls(pages) - 1;
+	if (order == 0)
+		return -ENOSPC;
+
+	for_each_free_tree(tree) {
+		struct rb_root *root;
+		struct rb_node *iter;
+
+		root = &mm->free_trees[tree][order];
+		if (rbtree_is_empty(root))
+			continue;
+
+		iter = rb_last(root);
+		while (iter) {
+			block = rbtree_get_free_block(iter);
+
+			/* Allocate blocks traversing RHS */
+			rhs_offset = drm_buddy_block_offset(block);
+			err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
+						       &filled, blocks);
+			if (!err || err != -ENOSPC)
+				return err;
+
+			lhs_size = max((size - filled), min_block_size);
+			if (!IS_ALIGNED(lhs_size, min_block_size))
+				lhs_size = round_up(lhs_size, min_block_size);
+
+			/* Allocate blocks traversing LHS */
+			lhs_offset = drm_buddy_block_offset(block) - lhs_size;
+			err =  __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
+						       NULL, &blocks_lhs);
+			if (!err) {
+				list_splice(&blocks_lhs, blocks);
+				return 0;
+			} else if (err != -ENOSPC) {
+				drm_buddy_free_list_internal(mm, blocks);
+				return err;
+			}
+			/* Free blocks for the next iteration */
+			drm_buddy_free_list_internal(mm, blocks);
+
+			iter = rb_prev(iter);
+		}
+	}
+
+	return -ENOSPC;
+}
+
+/**
+ * drm_buddy_block_trim - free unused pages
+ *
+ * @mm: DRM buddy manager
+ * @start: start address to begin the trimming.
+ * @new_size: original size requested
+ * @blocks: Input and output list of allocated blocks.
+ * MUST contain single block as input to be trimmed.
+ * On success will contain the newly allocated blocks
+ * making up the @new_size. Blocks always appear in
+ * ascending order
+ *
+ * For contiguous allocation, we round up the size to the nearest
+ * power of two value, drivers consume *actual* size, so remaining
+ * portions are unused and can be optionally freed with this function
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+int drm_buddy_block_trim(struct drm_buddy *mm,
+			 u64 *start,
+			 u64 new_size,
+			 struct list_head *blocks)
+{
+	struct drm_buddy_block *parent;
+	struct drm_buddy_block *block;
+	u64 block_start, block_end;
+	LIST_HEAD(dfs);
+	u64 new_start;
+	int err;
+
+	if (!list_is_singular(blocks))
+		return -EINVAL;
+
+	block = list_first_entry(blocks,
+				 struct drm_buddy_block,
+				 link);
+
+	block_start = drm_buddy_block_offset(block);
+	block_end = block_start + drm_buddy_block_size(mm, block);
+
+	if (WARN_ON(!drm_buddy_block_is_allocated(block)))
+		return -EINVAL;
+
+	if (new_size > drm_buddy_block_size(mm, block))
+		return -EINVAL;
+
+	if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
+		return -EINVAL;
+
+	if (new_size == drm_buddy_block_size(mm, block))
+		return 0;
+
+	new_start = block_start;
+	if (start) {
+		new_start = *start;
+
+		if (new_start < block_start)
+			return -EINVAL;
+
+		if (!IS_ALIGNED(new_start, mm->chunk_size))
+			return -EINVAL;
+
+		if (range_overflows(new_start, new_size, block_end))
+			return -EINVAL;
+	}
+
+	list_del(&block->link);
+	mark_free(mm, block);
+	mm->avail += drm_buddy_block_size(mm, block);
+	if (drm_buddy_block_is_clear(block))
+		mm->clear_avail += drm_buddy_block_size(mm, block);
+
+	/* Prevent recursively freeing this node */
+	parent = block->parent;
+	block->parent = NULL;
+
+	list_add(&block->tmp_link, &dfs);
+	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
+	if (err) {
+		mark_allocated(mm, block);
+		mm->avail -= drm_buddy_block_size(mm, block);
+		if (drm_buddy_block_is_clear(block))
+			mm->clear_avail -= drm_buddy_block_size(mm, block);
+		list_add(&block->link, blocks);
+	}
+
+	block->parent = parent;
+	return err;
+}
+EXPORT_SYMBOL(drm_buddy_block_trim);
+
+static struct drm_buddy_block *
+__drm_buddy_alloc_blocks(struct drm_buddy *mm,
+			 u64 start, u64 end,
+			 unsigned int order,
+			 unsigned long flags)
+{
+	if (flags & DRM_BUDDY_RANGE_ALLOCATION)
+		/* Allocate traversing within the range */
+		return  __drm_buddy_alloc_range_bias(mm, start, end,
+						     order, flags);
+	else
+		/* Allocate from freetree */
+		return alloc_from_freetree(mm, order, flags);
+}
+
+/**
+ * drm_buddy_alloc_blocks - allocate power-of-two blocks
+ *
+ * @mm: DRM buddy manager to allocate from
+ * @start: start of the allowed range for this block
+ * @end: end of the allowed range for this block
+ * @size: size of the allocation in bytes
+ * @min_block_size: alignment of the allocation
+ * @blocks: output list head to add allocated blocks
+ * @flags: DRM_BUDDY_*_ALLOCATION flags
+ *
+ * alloc_range_bias() called on range limitations, which traverses
+ * the tree and returns the desired block.
+ *
+ * alloc_from_freetree() called when *no* range restrictions
+ * are enforced, which picks the block from the freetree.
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+			   u64 start, u64 end, u64 size,
+			   u64 min_block_size,
+			   struct list_head *blocks,
+			   unsigned long flags)
+{
+	struct drm_buddy_block *block = NULL;
+	u64 original_size, original_min_size;
+	unsigned int min_order, order;
+	LIST_HEAD(allocated);
+	unsigned long pages;
+	int err;
+
+	if (size < mm->chunk_size)
+		return -EINVAL;
+
+	if (min_block_size < mm->chunk_size)
+		return -EINVAL;
+
+	if (!is_power_of_2(min_block_size))
+		return -EINVAL;
+
+	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
+		return -EINVAL;
+
+	if (end > mm->size)
+		return -EINVAL;
+
+	if (range_overflows(start, size, mm->size))
+		return -EINVAL;
+
+	/* Actual range allocation */
+	if (start + size == end) {
+		if (!IS_ALIGNED(start | end, min_block_size))
+			return -EINVAL;
+
+		return __drm_buddy_alloc_range(mm, start, size, NULL, blocks);
+	}
+
+	original_size = size;
+	original_min_size = min_block_size;
+
+	/* Roundup the size to power of 2 */
+	if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) {
+		size = roundup_pow_of_two(size);
+		min_block_size = size;
+	/* Align size value to min_block_size */
+	} else if (!IS_ALIGNED(size, min_block_size)) {
+		size = round_up(size, min_block_size);
+	}
+
+	pages = size >> ilog2(mm->chunk_size);
+	order = fls(pages) - 1;
+	min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
+
+	if (order > mm->max_order || size > mm->size) {
+		if ((flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) &&
+		    !(flags & DRM_BUDDY_RANGE_ALLOCATION))
+			return __alloc_contig_try_harder(mm, original_size,
+							 original_min_size, blocks);
+
+		return -EINVAL;
+	}
+
+	do {
+		order = min(order, (unsigned int)fls(pages) - 1);
+		BUG_ON(order > mm->max_order);
+		BUG_ON(order < min_order);
+
+		do {
+			block = __drm_buddy_alloc_blocks(mm, start,
+							 end,
+							 order,
+							 flags);
+			if (!IS_ERR(block))
+				break;
+
+			if (order-- == min_order) {
+				/* Try allocation through force merge method */
+				if (mm->clear_avail &&
+				    !__force_merge(mm, start, end, min_order)) {
+					block = __drm_buddy_alloc_blocks(mm, start,
+									 end,
+									 min_order,
+									 flags);
+					if (!IS_ERR(block)) {
+						order = min_order;
+						break;
+					}
+				}
+
+				/*
+				 * Try contiguous block allocation through
+				 * try harder method.
+				 */
+				if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
+				    !(flags & DRM_BUDDY_RANGE_ALLOCATION))
+					return __alloc_contig_try_harder(mm,
+									 original_size,
+									 original_min_size,
+									 blocks);
+				err = -ENOSPC;
+				goto err_free;
+			}
+		} while (1);
+
+		mark_allocated(mm, block);
+		mm->avail -= drm_buddy_block_size(mm, block);
+		if (drm_buddy_block_is_clear(block))
+			mm->clear_avail -= drm_buddy_block_size(mm, block);
+		kmemleak_update_trace(block);
+		list_add_tail(&block->link, &allocated);
+
+		pages -= BIT(order);
+
+		if (!pages)
+			break;
+	} while (1);
+
+	/* Trim the allocated block to the required size */
+	if (!(flags & DRM_BUDDY_TRIM_DISABLE) &&
+	    original_size != size) {
+		struct list_head *trim_list;
+		LIST_HEAD(temp);
+		u64 trim_size;
+
+		trim_list = &allocated;
+		trim_size = original_size;
+
+		if (!list_is_singular(&allocated)) {
+			block = list_last_entry(&allocated, typeof(*block), link);
+			list_move(&block->link, &temp);
+			trim_list = &temp;
+			trim_size = drm_buddy_block_size(mm, block) -
+				(size - original_size);
+		}
+
+		drm_buddy_block_trim(mm,
+				     NULL,
+				     trim_size,
+				     trim_list);
+
+		if (!list_empty(&temp))
+			list_splice_tail(trim_list, &allocated);
+	}
+
+	list_splice_tail(&allocated, blocks);
+	return 0;
+
+err_free:
+	drm_buddy_free_list_internal(mm, &allocated);
+	return err;
+}
+EXPORT_SYMBOL(drm_buddy_alloc_blocks);
+
+/**
+ * drm_buddy_block_print - print block information
+ *
+ * @mm: DRM buddy manager
+ * @block: DRM buddy block
+ * @p: DRM printer to use
+ */
+void drm_buddy_block_print(struct drm_buddy *mm,
+			   struct drm_buddy_block *block,
+			   struct drm_printer *p)
+{
+	u64 start = drm_buddy_block_offset(block);
+	u64 size = drm_buddy_block_size(mm, block);
+
+	drm_printf(p, "%#018llx-%#018llx: %llu\n", start, start + size, size);
+}
+EXPORT_SYMBOL(drm_buddy_block_print);
+
+/**
+ * drm_buddy_print - print allocator state
+ *
+ * @mm: DRM buddy manager
+ * @p: DRM printer to use
+ */
+void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
+{
+	int order;
+
+	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
+		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
+
+	for (order = mm->max_order; order >= 0; order--) {
+		struct drm_buddy_block *block, *tmp;
+		struct rb_root *root;
+		u64 count = 0, free;
+		unsigned int tree;
+
+		for_each_free_tree(tree) {
+			root = &mm->free_trees[tree][order];
+
+			rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
+				BUG_ON(!drm_buddy_block_is_free(block));
+				count++;
+			}
+		}
+
+		drm_printf(p, "order-%2d ", order);
+
+		free = count * (mm->chunk_size << order);
+		if (free < SZ_1M)
+			drm_printf(p, "free: %8llu KiB", free >> 10);
+		else
+			drm_printf(p, "free: %8llu MiB", free >> 20);
+
+		drm_printf(p, ", blocks: %llu\n", count);
+	}
+}
+EXPORT_SYMBOL(drm_buddy_print);
+
+static void drm_buddy_module_exit(void)
+{
+	kmem_cache_destroy(slab_blocks);
+}
+
+static int __init drm_buddy_module_init(void)
+{
+	slab_blocks = KMEM_CACHE(drm_buddy_block, 0);
+	if (!slab_blocks)
+		return -ENOMEM;
+
+	return 0;
+}
+
+module_init(drm_buddy_module_init);
+module_exit(drm_buddy_module_exit);
+
+MODULE_DESCRIPTION("DRM Buddy Allocator");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 5888eb147ed1..862ff4000969 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -269,10 +269,6 @@ config DRM_SCHED
 config DRM_PANEL_BACKLIGHT_QUIRKS
 	tristate
 
-config DRM_LIB_RANDOM
-	bool
-	default n
-
 config DRM_PRIVACY_SCREEN
 	bool
 	default n
diff --git a/drivers/gpu/drm/Kconfig.debug b/drivers/gpu/drm/Kconfig.debug
index 05dc43c0b8c5..3b7886865335 100644
--- a/drivers/gpu/drm/Kconfig.debug
+++ b/drivers/gpu/drm/Kconfig.debug
@@ -69,7 +69,6 @@ config DRM_KUNIT_TEST
 	select DRM_EXPORT_FOR_TESTS if m
 	select DRM_GEM_SHMEM_HELPER
 	select DRM_KUNIT_TEST_HELPERS
-	select DRM_LIB_RANDOM
 	select DRM_SYSFB_HELPER
 	select PRIME_NUMBERS
 	default KUNIT_ALL_TESTS
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 75840ec4d782..892859cfe95f 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -79,7 +79,6 @@ drm-$(CONFIG_DRM_CLIENT) += \
 	drm_client_event.o \
 	drm_client_modeset.o \
 	drm_client_sysrq.o
-drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 drm-$(CONFIG_DRM_PANEL) += drm_panel.o
 drm-$(CONFIG_OF) += drm_of.o
@@ -115,7 +114,7 @@ drm_gpusvm_helper-$(CONFIG_ZONE_DEVICE) += \
 
 obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o
 
-obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
+obj-$(CONFIG_DRM_BUDDY) += ../buddy.o
 
 drm_dma_helper-y := drm_gem_dma_helper.o
 drm_dma_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_dma.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
index 5f5fd9a911c2..874779618056 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -24,7 +24,7 @@
 #ifndef __AMDGPU_VRAM_MGR_H__
 #define __AMDGPU_VRAM_MGR_H__
 
-#include <drm/drm_buddy.h>
+#include <linux/gpu_buddy.h>
 
 struct amdgpu_vram_mgr {
 	struct ttm_resource_manager manager;
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
deleted file mode 100644
index fd34d3755f7c..000000000000
--- a/drivers/gpu/drm/drm_buddy.c
+++ /dev/null
@@ -1,1336 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2021 Intel Corporation
- */
-
-#include <kunit/test-bug.h>
-
-#include <linux/export.h>
-#include <linux/kmemleak.h>
-#include <linux/module.h>
-#include <linux/sizes.h>
-
-#include <drm/drm_buddy.h>
-#include <drm/drm_print.h>
-
-enum drm_buddy_free_tree {
-	DRM_BUDDY_CLEAR_TREE = 0,
-	DRM_BUDDY_DIRTY_TREE,
-	DRM_BUDDY_MAX_FREE_TREES,
-};
-
-static struct kmem_cache *slab_blocks;
-
-#define for_each_free_tree(tree) \
-	for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++)
-
-static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
-					       struct drm_buddy_block *parent,
-					       unsigned int order,
-					       u64 offset)
-{
-	struct drm_buddy_block *block;
-
-	BUG_ON(order > DRM_BUDDY_MAX_ORDER);
-
-	block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL);
-	if (!block)
-		return NULL;
-
-	block->header = offset;
-	block->header |= order;
-	block->parent = parent;
-
-	RB_CLEAR_NODE(&block->rb);
-
-	BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
-	return block;
-}
-
-static void drm_block_free(struct drm_buddy *mm,
-			   struct drm_buddy_block *block)
-{
-	kmem_cache_free(slab_blocks, block);
-}
-
-static enum drm_buddy_free_tree
-get_block_tree(struct drm_buddy_block *block)
-{
-	return drm_buddy_block_is_clear(block) ?
-	       DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
-}
-
-static struct drm_buddy_block *
-rbtree_get_free_block(const struct rb_node *node)
-{
-	return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
-}
-
-static struct drm_buddy_block *
-rbtree_last_free_block(struct rb_root *root)
-{
-	return rbtree_get_free_block(rb_last(root));
-}
-
-static bool rbtree_is_empty(struct rb_root *root)
-{
-	return RB_EMPTY_ROOT(root);
-}
-
-static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block,
-					const struct drm_buddy_block *node)
-{
-	return drm_buddy_block_offset(block) < drm_buddy_block_offset(node);
-}
-
-static bool rbtree_block_offset_less(struct rb_node *block,
-				     const struct rb_node *node)
-{
-	return drm_buddy_block_offset_less(rbtree_get_free_block(block),
-					   rbtree_get_free_block(node));
-}
-
-static void rbtree_insert(struct drm_buddy *mm,
-			  struct drm_buddy_block *block,
-			  enum drm_buddy_free_tree tree)
-{
-	rb_add(&block->rb,
-	       &mm->free_trees[tree][drm_buddy_block_order(block)],
-	       rbtree_block_offset_less);
-}
-
-static void rbtree_remove(struct drm_buddy *mm,
-			  struct drm_buddy_block *block)
-{
-	unsigned int order = drm_buddy_block_order(block);
-	enum drm_buddy_free_tree tree;
-	struct rb_root *root;
-
-	tree = get_block_tree(block);
-	root = &mm->free_trees[tree][order];
-
-	rb_erase(&block->rb, root);
-	RB_CLEAR_NODE(&block->rb);
-}
-
-static void clear_reset(struct drm_buddy_block *block)
-{
-	block->header &= ~DRM_BUDDY_HEADER_CLEAR;
-}
-
-static void mark_cleared(struct drm_buddy_block *block)
-{
-	block->header |= DRM_BUDDY_HEADER_CLEAR;
-}
-
-static void mark_allocated(struct drm_buddy *mm,
-			   struct drm_buddy_block *block)
-{
-	block->header &= ~DRM_BUDDY_HEADER_STATE;
-	block->header |= DRM_BUDDY_ALLOCATED;
-
-	rbtree_remove(mm, block);
-}
-
-static void mark_free(struct drm_buddy *mm,
-		      struct drm_buddy_block *block)
-{
-	enum drm_buddy_free_tree tree;
-
-	block->header &= ~DRM_BUDDY_HEADER_STATE;
-	block->header |= DRM_BUDDY_FREE;
-
-	tree = get_block_tree(block);
-	rbtree_insert(mm, block, tree);
-}
-
-static void mark_split(struct drm_buddy *mm,
-		       struct drm_buddy_block *block)
-{
-	block->header &= ~DRM_BUDDY_HEADER_STATE;
-	block->header |= DRM_BUDDY_SPLIT;
-
-	rbtree_remove(mm, block);
-}
-
-static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
-{
-	return s1 <= e2 && e1 >= s2;
-}
-
-static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
-{
-	return s1 <= s2 && e1 >= e2;
-}
-
-static struct drm_buddy_block *
-__get_buddy(struct drm_buddy_block *block)
-{
-	struct drm_buddy_block *parent;
-
-	parent = block->parent;
-	if (!parent)
-		return NULL;
-
-	if (parent->left == block)
-		return parent->right;
-
-	return parent->left;
-}
-
-static unsigned int __drm_buddy_free(struct drm_buddy *mm,
-				     struct drm_buddy_block *block,
-				     bool force_merge)
-{
-	struct drm_buddy_block *parent;
-	unsigned int order;
-
-	while ((parent = block->parent)) {
-		struct drm_buddy_block *buddy;
-
-		buddy = __get_buddy(block);
-
-		if (!drm_buddy_block_is_free(buddy))
-			break;
-
-		if (!force_merge) {
-			/*
-			 * Check the block and its buddy clear state and exit
-			 * the loop if they both have the dissimilar state.
-			 */
-			if (drm_buddy_block_is_clear(block) !=
-			    drm_buddy_block_is_clear(buddy))
-				break;
-
-			if (drm_buddy_block_is_clear(block))
-				mark_cleared(parent);
-		}
-
-		rbtree_remove(mm, buddy);
-		if (force_merge && drm_buddy_block_is_clear(buddy))
-			mm->clear_avail -= drm_buddy_block_size(mm, buddy);
-
-		drm_block_free(mm, block);
-		drm_block_free(mm, buddy);
-
-		block = parent;
-	}
-
-	order = drm_buddy_block_order(block);
-	mark_free(mm, block);
-
-	return order;
-}
-
-static int __force_merge(struct drm_buddy *mm,
-			 u64 start,
-			 u64 end,
-			 unsigned int min_order)
-{
-	unsigned int tree, order;
-	int i;
-
-	if (!min_order)
-		return -ENOMEM;
-
-	if (min_order > mm->max_order)
-		return -EINVAL;
-
-	for_each_free_tree(tree) {
-		for (i = min_order - 1; i >= 0; i--) {
-			struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
-
-			while (iter) {
-				struct drm_buddy_block *block, *buddy;
-				u64 block_start, block_end;
-
-				block = rbtree_get_free_block(iter);
-				iter = rb_prev(iter);
-
-				if (!block || !block->parent)
-					continue;
-
-				block_start = drm_buddy_block_offset(block);
-				block_end = block_start + drm_buddy_block_size(mm, block) - 1;
-
-				if (!contains(start, end, block_start, block_end))
-					continue;
-
-				buddy = __get_buddy(block);
-				if (!drm_buddy_block_is_free(buddy))
-					continue;
-
-				WARN_ON(drm_buddy_block_is_clear(block) ==
-					drm_buddy_block_is_clear(buddy));
-
-				/*
-				 * Advance to the next node when the current node is the buddy,
-				 * as freeing the block will also remove its buddy from the tree.
-				 */
-				if (iter == &buddy->rb)
-					iter = rb_prev(iter);
-
-				rbtree_remove(mm, block);
-				if (drm_buddy_block_is_clear(block))
-					mm->clear_avail -= drm_buddy_block_size(mm, block);
-
-				order = __drm_buddy_free(mm, block, true);
-				if (order >= min_order)
-					return 0;
-			}
-		}
-	}
-
-	return -ENOMEM;
-}
-
-/**
- * drm_buddy_init - init memory manager
- *
- * @mm: DRM buddy manager to initialize
- * @size: size in bytes to manage
- * @chunk_size: minimum page size in bytes for our allocations
- *
- * Initializes the memory manager and its resources.
- *
- * Returns:
- * 0 on success, error code on failure.
- */
-int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
-{
-	unsigned int i, j, root_count = 0;
-	u64 offset = 0;
-
-	if (size < chunk_size)
-		return -EINVAL;
-
-	if (chunk_size < SZ_4K)
-		return -EINVAL;
-
-	if (!is_power_of_2(chunk_size))
-		return -EINVAL;
-
-	size = round_down(size, chunk_size);
-
-	mm->size = size;
-	mm->avail = size;
-	mm->clear_avail = 0;
-	mm->chunk_size = chunk_size;
-	mm->max_order = ilog2(size) - ilog2(chunk_size);
-
-	BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
-
-	mm->free_trees = kmalloc_array(DRM_BUDDY_MAX_FREE_TREES,
-				       sizeof(*mm->free_trees),
-				       GFP_KERNEL);
-	if (!mm->free_trees)
-		return -ENOMEM;
-
-	for_each_free_tree(i) {
-		mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
-						  sizeof(struct rb_root),
-						  GFP_KERNEL);
-		if (!mm->free_trees[i])
-			goto out_free_tree;
-
-		for (j = 0; j <= mm->max_order; ++j)
-			mm->free_trees[i][j] = RB_ROOT;
-	}
-
-	mm->n_roots = hweight64(size);
-
-	mm->roots = kmalloc_array(mm->n_roots,
-				  sizeof(struct drm_buddy_block *),
-				  GFP_KERNEL);
-	if (!mm->roots)
-		goto out_free_tree;
-
-	/*
-	 * Split into power-of-two blocks, in case we are given a size that is
-	 * not itself a power-of-two.
-	 */
-	do {
-		struct drm_buddy_block *root;
-		unsigned int order;
-		u64 root_size;
-
-		order = ilog2(size) - ilog2(chunk_size);
-		root_size = chunk_size << order;
-
-		root = drm_block_alloc(mm, NULL, order, offset);
-		if (!root)
-			goto out_free_roots;
-
-		mark_free(mm, root);
-
-		BUG_ON(root_count > mm->max_order);
-		BUG_ON(drm_buddy_block_size(mm, root) < chunk_size);
-
-		mm->roots[root_count] = root;
-
-		offset += root_size;
-		size -= root_size;
-		root_count++;
-	} while (size);
-
-	return 0;
-
-out_free_roots:
-	while (root_count--)
-		drm_block_free(mm, mm->roots[root_count]);
-	kfree(mm->roots);
-out_free_tree:
-	while (i--)
-		kfree(mm->free_trees[i]);
-	kfree(mm->free_trees);
-	return -ENOMEM;
-}
-EXPORT_SYMBOL(drm_buddy_init);
-
-/**
- * drm_buddy_fini - tear down the memory manager
- *
- * @mm: DRM buddy manager to free
- *
- * Cleanup memory manager resources and the freetree
- */
-void drm_buddy_fini(struct drm_buddy *mm)
-{
-	u64 root_size, size, start;
-	unsigned int order;
-	int i;
-
-	size = mm->size;
-
-	for (i = 0; i < mm->n_roots; ++i) {
-		order = ilog2(size) - ilog2(mm->chunk_size);
-		start = drm_buddy_block_offset(mm->roots[i]);
-		__force_merge(mm, start, start + size, order);
-
-		if (WARN_ON(!drm_buddy_block_is_free(mm->roots[i])))
-			kunit_fail_current_test("buddy_fini() root");
-
-		drm_block_free(mm, mm->roots[i]);
-
-		root_size = mm->chunk_size << order;
-		size -= root_size;
-	}
-
-	WARN_ON(mm->avail != mm->size);
-
-	for_each_free_tree(i)
-		kfree(mm->free_trees[i]);
-	kfree(mm->free_trees);
-	kfree(mm->roots);
-}
-EXPORT_SYMBOL(drm_buddy_fini);
-
-static int split_block(struct drm_buddy *mm,
-		       struct drm_buddy_block *block)
-{
-	unsigned int block_order = drm_buddy_block_order(block) - 1;
-	u64 offset = drm_buddy_block_offset(block);
-
-	BUG_ON(!drm_buddy_block_is_free(block));
-	BUG_ON(!drm_buddy_block_order(block));
-
-	block->left = drm_block_alloc(mm, block, block_order, offset);
-	if (!block->left)
-		return -ENOMEM;
-
-	block->right = drm_block_alloc(mm, block, block_order,
-				       offset + (mm->chunk_size << block_order));
-	if (!block->right) {
-		drm_block_free(mm, block->left);
-		return -ENOMEM;
-	}
-
-	mark_split(mm, block);
-
-	if (drm_buddy_block_is_clear(block)) {
-		mark_cleared(block->left);
-		mark_cleared(block->right);
-		clear_reset(block);
-	}
-
-	mark_free(mm, block->left);
-	mark_free(mm, block->right);
-
-	return 0;
-}
-
-/**
- * drm_get_buddy - get buddy address
- *
- * @block: DRM buddy block
- *
- * Returns the corresponding buddy block for @block, or NULL
- * if this is a root block and can't be merged further.
- * Requires some kind of locking to protect against
- * any concurrent allocate and free operations.
- */
-struct drm_buddy_block *
-drm_get_buddy(struct drm_buddy_block *block)
-{
-	return __get_buddy(block);
-}
-EXPORT_SYMBOL(drm_get_buddy);
-
-/**
- * drm_buddy_reset_clear - reset blocks clear state
- *
- * @mm: DRM buddy manager
- * @is_clear: blocks clear state
- *
- * Reset the clear state based on @is_clear value for each block
- * in the freetree.
- */
-void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
-{
-	enum drm_buddy_free_tree src_tree, dst_tree;
-	u64 root_size, size, start;
-	unsigned int order;
-	int i;
-
-	size = mm->size;
-	for (i = 0; i < mm->n_roots; ++i) {
-		order = ilog2(size) - ilog2(mm->chunk_size);
-		start = drm_buddy_block_offset(mm->roots[i]);
-		__force_merge(mm, start, start + size, order);
-
-		root_size = mm->chunk_size << order;
-		size -= root_size;
-	}
-
-	src_tree = is_clear ? DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
-	dst_tree = is_clear ? DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
-
-	for (i = 0; i <= mm->max_order; ++i) {
-		struct rb_root *root = &mm->free_trees[src_tree][i];
-		struct drm_buddy_block *block, *tmp;
-
-		rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
-			rbtree_remove(mm, block);
-			if (is_clear) {
-				mark_cleared(block);
-				mm->clear_avail += drm_buddy_block_size(mm, block);
-			} else {
-				clear_reset(block);
-				mm->clear_avail -= drm_buddy_block_size(mm, block);
-			}
-
-			rbtree_insert(mm, block, dst_tree);
-		}
-	}
-}
-EXPORT_SYMBOL(drm_buddy_reset_clear);
-
-/**
- * drm_buddy_free_block - free a block
- *
- * @mm: DRM buddy manager
- * @block: block to be freed
- */
-void drm_buddy_free_block(struct drm_buddy *mm,
-			  struct drm_buddy_block *block)
-{
-	BUG_ON(!drm_buddy_block_is_allocated(block));
-	mm->avail += drm_buddy_block_size(mm, block);
-	if (drm_buddy_block_is_clear(block))
-		mm->clear_avail += drm_buddy_block_size(mm, block);
-
-	__drm_buddy_free(mm, block, false);
-}
-EXPORT_SYMBOL(drm_buddy_free_block);
-
-static void __drm_buddy_free_list(struct drm_buddy *mm,
-				  struct list_head *objects,
-				  bool mark_clear,
-				  bool mark_dirty)
-{
-	struct drm_buddy_block *block, *on;
-
-	WARN_ON(mark_dirty && mark_clear);
-
-	list_for_each_entry_safe(block, on, objects, link) {
-		if (mark_clear)
-			mark_cleared(block);
-		else if (mark_dirty)
-			clear_reset(block);
-		drm_buddy_free_block(mm, block);
-		cond_resched();
-	}
-	INIT_LIST_HEAD(objects);
-}
-
-static void drm_buddy_free_list_internal(struct drm_buddy *mm,
-					 struct list_head *objects)
-{
-	/*
-	 * Don't touch the clear/dirty bit, since allocation is still internal
-	 * at this point. For example we might have just failed part of the
-	 * allocation.
-	 */
-	__drm_buddy_free_list(mm, objects, false, false);
-}
-
-/**
- * drm_buddy_free_list - free blocks
- *
- * @mm: DRM buddy manager
- * @objects: input list head to free blocks
- * @flags: optional flags like DRM_BUDDY_CLEARED
- */
-void drm_buddy_free_list(struct drm_buddy *mm,
-			 struct list_head *objects,
-			 unsigned int flags)
-{
-	bool mark_clear = flags & DRM_BUDDY_CLEARED;
-
-	__drm_buddy_free_list(mm, objects, mark_clear, !mark_clear);
-}
-EXPORT_SYMBOL(drm_buddy_free_list);
-
-static bool block_incompatible(struct drm_buddy_block *block, unsigned int flags)
-{
-	bool needs_clear = flags & DRM_BUDDY_CLEAR_ALLOCATION;
-
-	return needs_clear != drm_buddy_block_is_clear(block);
-}
-
-static struct drm_buddy_block *
-__alloc_range_bias(struct drm_buddy *mm,
-		   u64 start, u64 end,
-		   unsigned int order,
-		   unsigned long flags,
-		   bool fallback)
-{
-	u64 req_size = mm->chunk_size << order;
-	struct drm_buddy_block *block;
-	struct drm_buddy_block *buddy;
-	LIST_HEAD(dfs);
-	int err;
-	int i;
-
-	end = end - 1;
-
-	for (i = 0; i < mm->n_roots; ++i)
-		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
-
-	do {
-		u64 block_start;
-		u64 block_end;
-
-		block = list_first_entry_or_null(&dfs,
-						 struct drm_buddy_block,
-						 tmp_link);
-		if (!block)
-			break;
-
-		list_del(&block->tmp_link);
-
-		if (drm_buddy_block_order(block) < order)
-			continue;
-
-		block_start = drm_buddy_block_offset(block);
-		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
-
-		if (!overlaps(start, end, block_start, block_end))
-			continue;
-
-		if (drm_buddy_block_is_allocated(block))
-			continue;
-
-		if (block_start < start || block_end > end) {
-			u64 adjusted_start = max(block_start, start);
-			u64 adjusted_end = min(block_end, end);
-
-			if (round_down(adjusted_end + 1, req_size) <=
-			    round_up(adjusted_start, req_size))
-				continue;
-		}
-
-		if (!fallback && block_incompatible(block, flags))
-			continue;
-
-		if (contains(start, end, block_start, block_end) &&
-		    order == drm_buddy_block_order(block)) {
-			/*
-			 * Find the free block within the range.
-			 */
-			if (drm_buddy_block_is_free(block))
-				return block;
-
-			continue;
-		}
-
-		if (!drm_buddy_block_is_split(block)) {
-			err = split_block(mm, block);
-			if (unlikely(err))
-				goto err_undo;
-		}
-
-		list_add(&block->right->tmp_link, &dfs);
-		list_add(&block->left->tmp_link, &dfs);
-	} while (1);
-
-	return ERR_PTR(-ENOSPC);
-
-err_undo:
-	/*
-	 * We really don't want to leave around a bunch of split blocks, since
-	 * bigger is better, so make sure we merge everything back before we
-	 * free the allocated blocks.
-	 */
-	buddy = __get_buddy(block);
-	if (buddy &&
-	    (drm_buddy_block_is_free(block) &&
-	     drm_buddy_block_is_free(buddy)))
-		__drm_buddy_free(mm, block, false);
-	return ERR_PTR(err);
-}
-
-static struct drm_buddy_block *
-__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
-			     u64 start, u64 end,
-			     unsigned int order,
-			     unsigned long flags)
-{
-	struct drm_buddy_block *block;
-	bool fallback = false;
-
-	block = __alloc_range_bias(mm, start, end, order,
-				   flags, fallback);
-	if (IS_ERR(block))
-		return __alloc_range_bias(mm, start, end, order,
-					  flags, !fallback);
-
-	return block;
-}
-
-static struct drm_buddy_block *
-get_maxblock(struct drm_buddy *mm,
-	     unsigned int order,
-	     enum drm_buddy_free_tree tree)
-{
-	struct drm_buddy_block *max_block = NULL, *block = NULL;
-	struct rb_root *root;
-	unsigned int i;
-
-	for (i = order; i <= mm->max_order; ++i) {
-		root = &mm->free_trees[tree][i];
-		block = rbtree_last_free_block(root);
-		if (!block)
-			continue;
-
-		if (!max_block) {
-			max_block = block;
-			continue;
-		}
-
-		if (drm_buddy_block_offset(block) >
-		    drm_buddy_block_offset(max_block)) {
-			max_block = block;
-		}
-	}
-
-	return max_block;
-}
-
-static struct drm_buddy_block *
-alloc_from_freetree(struct drm_buddy *mm,
-		    unsigned int order,
-		    unsigned long flags)
-{
-	struct drm_buddy_block *block = NULL;
-	struct rb_root *root;
-	enum drm_buddy_free_tree tree;
-	unsigned int tmp;
-	int err;
-
-	tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ?
-		DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
-
-	if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
-		block = get_maxblock(mm, order, tree);
-		if (block)
-			/* Store the obtained block order */
-			tmp = drm_buddy_block_order(block);
-	} else {
-		for (tmp = order; tmp <= mm->max_order; ++tmp) {
-			/* Get RB tree root for this order and tree */
-			root = &mm->free_trees[tree][tmp];
-			block = rbtree_last_free_block(root);
-			if (block)
-				break;
-		}
-	}
-
-	if (!block) {
-		/* Try allocating from the other tree */
-		tree = (tree == DRM_BUDDY_CLEAR_TREE) ?
-			DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
-
-		for (tmp = order; tmp <= mm->max_order; ++tmp) {
-			root = &mm->free_trees[tree][tmp];
-			block = rbtree_last_free_block(root);
-			if (block)
-				break;
-		}
-
-		if (!block)
-			return ERR_PTR(-ENOSPC);
-	}
-
-	BUG_ON(!drm_buddy_block_is_free(block));
-
-	while (tmp != order) {
-		err = split_block(mm, block);
-		if (unlikely(err))
-			goto err_undo;
-
-		block = block->right;
-		tmp--;
-	}
-	return block;
-
-err_undo:
-	if (tmp != order)
-		__drm_buddy_free(mm, block, false);
-	return ERR_PTR(err);
-}
-
-static int __alloc_range(struct drm_buddy *mm,
-			 struct list_head *dfs,
-			 u64 start, u64 size,
-			 struct list_head *blocks,
-			 u64 *total_allocated_on_err)
-{
-	struct drm_buddy_block *block;
-	struct drm_buddy_block *buddy;
-	u64 total_allocated = 0;
-	LIST_HEAD(allocated);
-	u64 end;
-	int err;
-
-	end = start + size - 1;
-
-	do {
-		u64 block_start;
-		u64 block_end;
-
-		block = list_first_entry_or_null(dfs,
-						 struct drm_buddy_block,
-						 tmp_link);
-		if (!block)
-			break;
-
-		list_del(&block->tmp_link);
-
-		block_start = drm_buddy_block_offset(block);
-		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
-
-		if (!overlaps(start, end, block_start, block_end))
-			continue;
-
-		if (drm_buddy_block_is_allocated(block)) {
-			err = -ENOSPC;
-			goto err_free;
-		}
-
-		if (contains(start, end, block_start, block_end)) {
-			if (drm_buddy_block_is_free(block)) {
-				mark_allocated(mm, block);
-				total_allocated += drm_buddy_block_size(mm, block);
-				mm->avail -= drm_buddy_block_size(mm, block);
-				if (drm_buddy_block_is_clear(block))
-					mm->clear_avail -= drm_buddy_block_size(mm, block);
-				list_add_tail(&block->link, &allocated);
-				continue;
-			} else if (!mm->clear_avail) {
-				err = -ENOSPC;
-				goto err_free;
-			}
-		}
-
-		if (!drm_buddy_block_is_split(block)) {
-			err = split_block(mm, block);
-			if (unlikely(err))
-				goto err_undo;
-		}
-
-		list_add(&block->right->tmp_link, dfs);
-		list_add(&block->left->tmp_link, dfs);
-	} while (1);
-
-	if (total_allocated < size) {
-		err = -ENOSPC;
-		goto err_free;
-	}
-
-	list_splice_tail(&allocated, blocks);
-
-	return 0;
-
-err_undo:
-	/*
-	 * We really don't want to leave around a bunch of split blocks, since
-	 * bigger is better, so make sure we merge everything back before we
-	 * free the allocated blocks.
-	 */
-	buddy = __get_buddy(block);
-	if (buddy &&
-	    (drm_buddy_block_is_free(block) &&
-	     drm_buddy_block_is_free(buddy)))
-		__drm_buddy_free(mm, block, false);
-
-err_free:
-	if (err == -ENOSPC && total_allocated_on_err) {
-		list_splice_tail(&allocated, blocks);
-		*total_allocated_on_err = total_allocated;
-	} else {
-		drm_buddy_free_list_internal(mm, &allocated);
-	}
-
-	return err;
-}
-
-static int __drm_buddy_alloc_range(struct drm_buddy *mm,
-				   u64 start,
-				   u64 size,
-				   u64 *total_allocated_on_err,
-				   struct list_head *blocks)
-{
-	LIST_HEAD(dfs);
-	int i;
-
-	for (i = 0; i < mm->n_roots; ++i)
-		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
-
-	return __alloc_range(mm, &dfs, start, size,
-			     blocks, total_allocated_on_err);
-}
-
-static int __alloc_contig_try_harder(struct drm_buddy *mm,
-				     u64 size,
-				     u64 min_block_size,
-				     struct list_head *blocks)
-{
-	u64 rhs_offset, lhs_offset, lhs_size, filled;
-	struct drm_buddy_block *block;
-	unsigned int tree, order;
-	LIST_HEAD(blocks_lhs);
-	unsigned long pages;
-	u64 modify_size;
-	int err;
-
-	modify_size = rounddown_pow_of_two(size);
-	pages = modify_size >> ilog2(mm->chunk_size);
-	order = fls(pages) - 1;
-	if (order == 0)
-		return -ENOSPC;
-
-	for_each_free_tree(tree) {
-		struct rb_root *root;
-		struct rb_node *iter;
-
-		root = &mm->free_trees[tree][order];
-		if (rbtree_is_empty(root))
-			continue;
-
-		iter = rb_last(root);
-		while (iter) {
-			block = rbtree_get_free_block(iter);
-
-			/* Allocate blocks traversing RHS */
-			rhs_offset = drm_buddy_block_offset(block);
-			err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
-						       &filled, blocks);
-			if (!err || err != -ENOSPC)
-				return err;
-
-			lhs_size = max((size - filled), min_block_size);
-			if (!IS_ALIGNED(lhs_size, min_block_size))
-				lhs_size = round_up(lhs_size, min_block_size);
-
-			/* Allocate blocks traversing LHS */
-			lhs_offset = drm_buddy_block_offset(block) - lhs_size;
-			err =  __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
-						       NULL, &blocks_lhs);
-			if (!err) {
-				list_splice(&blocks_lhs, blocks);
-				return 0;
-			} else if (err != -ENOSPC) {
-				drm_buddy_free_list_internal(mm, blocks);
-				return err;
-			}
-			/* Free blocks for the next iteration */
-			drm_buddy_free_list_internal(mm, blocks);
-
-			iter = rb_prev(iter);
-		}
-	}
-
-	return -ENOSPC;
-}
-
-/**
- * drm_buddy_block_trim - free unused pages
- *
- * @mm: DRM buddy manager
- * @start: start address to begin the trimming.
- * @new_size: original size requested
- * @blocks: Input and output list of allocated blocks.
- * MUST contain single block as input to be trimmed.
- * On success will contain the newly allocated blocks
- * making up the @new_size. Blocks always appear in
- * ascending order
- *
- * For contiguous allocation, we round up the size to the nearest
- * power of two value, drivers consume *actual* size, so remaining
- * portions are unused and can be optionally freed with this function
- *
- * Returns:
- * 0 on success, error code on failure.
- */
-int drm_buddy_block_trim(struct drm_buddy *mm,
-			 u64 *start,
-			 u64 new_size,
-			 struct list_head *blocks)
-{
-	struct drm_buddy_block *parent;
-	struct drm_buddy_block *block;
-	u64 block_start, block_end;
-	LIST_HEAD(dfs);
-	u64 new_start;
-	int err;
-
-	if (!list_is_singular(blocks))
-		return -EINVAL;
-
-	block = list_first_entry(blocks,
-				 struct drm_buddy_block,
-				 link);
-
-	block_start = drm_buddy_block_offset(block);
-	block_end = block_start + drm_buddy_block_size(mm, block);
-
-	if (WARN_ON(!drm_buddy_block_is_allocated(block)))
-		return -EINVAL;
-
-	if (new_size > drm_buddy_block_size(mm, block))
-		return -EINVAL;
-
-	if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
-		return -EINVAL;
-
-	if (new_size == drm_buddy_block_size(mm, block))
-		return 0;
-
-	new_start = block_start;
-	if (start) {
-		new_start = *start;
-
-		if (new_start < block_start)
-			return -EINVAL;
-
-		if (!IS_ALIGNED(new_start, mm->chunk_size))
-			return -EINVAL;
-
-		if (range_overflows(new_start, new_size, block_end))
-			return -EINVAL;
-	}
-
-	list_del(&block->link);
-	mark_free(mm, block);
-	mm->avail += drm_buddy_block_size(mm, block);
-	if (drm_buddy_block_is_clear(block))
-		mm->clear_avail += drm_buddy_block_size(mm, block);
-
-	/* Prevent recursively freeing this node */
-	parent = block->parent;
-	block->parent = NULL;
-
-	list_add(&block->tmp_link, &dfs);
-	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
-	if (err) {
-		mark_allocated(mm, block);
-		mm->avail -= drm_buddy_block_size(mm, block);
-		if (drm_buddy_block_is_clear(block))
-			mm->clear_avail -= drm_buddy_block_size(mm, block);
-		list_add(&block->link, blocks);
-	}
-
-	block->parent = parent;
-	return err;
-}
-EXPORT_SYMBOL(drm_buddy_block_trim);
-
-static struct drm_buddy_block *
-__drm_buddy_alloc_blocks(struct drm_buddy *mm,
-			 u64 start, u64 end,
-			 unsigned int order,
-			 unsigned long flags)
-{
-	if (flags & DRM_BUDDY_RANGE_ALLOCATION)
-		/* Allocate traversing within the range */
-		return  __drm_buddy_alloc_range_bias(mm, start, end,
-						     order, flags);
-	else
-		/* Allocate from freetree */
-		return alloc_from_freetree(mm, order, flags);
-}
-
-/**
- * drm_buddy_alloc_blocks - allocate power-of-two blocks
- *
- * @mm: DRM buddy manager to allocate from
- * @start: start of the allowed range for this block
- * @end: end of the allowed range for this block
- * @size: size of the allocation in bytes
- * @min_block_size: alignment of the allocation
- * @blocks: output list head to add allocated blocks
- * @flags: DRM_BUDDY_*_ALLOCATION flags
- *
- * alloc_range_bias() called on range limitations, which traverses
- * the tree and returns the desired block.
- *
- * alloc_from_freetree() called when *no* range restrictions
- * are enforced, which picks the block from the freetree.
- *
- * Returns:
- * 0 on success, error code on failure.
- */
-int drm_buddy_alloc_blocks(struct drm_buddy *mm,
-			   u64 start, u64 end, u64 size,
-			   u64 min_block_size,
-			   struct list_head *blocks,
-			   unsigned long flags)
-{
-	struct drm_buddy_block *block = NULL;
-	u64 original_size, original_min_size;
-	unsigned int min_order, order;
-	LIST_HEAD(allocated);
-	unsigned long pages;
-	int err;
-
-	if (size < mm->chunk_size)
-		return -EINVAL;
-
-	if (min_block_size < mm->chunk_size)
-		return -EINVAL;
-
-	if (!is_power_of_2(min_block_size))
-		return -EINVAL;
-
-	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
-		return -EINVAL;
-
-	if (end > mm->size)
-		return -EINVAL;
-
-	if (range_overflows(start, size, mm->size))
-		return -EINVAL;
-
-	/* Actual range allocation */
-	if (start + size == end) {
-		if (!IS_ALIGNED(start | end, min_block_size))
-			return -EINVAL;
-
-		return __drm_buddy_alloc_range(mm, start, size, NULL, blocks);
-	}
-
-	original_size = size;
-	original_min_size = min_block_size;
-
-	/* Roundup the size to power of 2 */
-	if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) {
-		size = roundup_pow_of_two(size);
-		min_block_size = size;
-	/* Align size value to min_block_size */
-	} else if (!IS_ALIGNED(size, min_block_size)) {
-		size = round_up(size, min_block_size);
-	}
-
-	pages = size >> ilog2(mm->chunk_size);
-	order = fls(pages) - 1;
-	min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
-
-	if (order > mm->max_order || size > mm->size) {
-		if ((flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) &&
-		    !(flags & DRM_BUDDY_RANGE_ALLOCATION))
-			return __alloc_contig_try_harder(mm, original_size,
-							 original_min_size, blocks);
-
-		return -EINVAL;
-	}
-
-	do {
-		order = min(order, (unsigned int)fls(pages) - 1);
-		BUG_ON(order > mm->max_order);
-		BUG_ON(order < min_order);
-
-		do {
-			block = __drm_buddy_alloc_blocks(mm, start,
-							 end,
-							 order,
-							 flags);
-			if (!IS_ERR(block))
-				break;
-
-			if (order-- == min_order) {
-				/* Try allocation through force merge method */
-				if (mm->clear_avail &&
-				    !__force_merge(mm, start, end, min_order)) {
-					block = __drm_buddy_alloc_blocks(mm, start,
-									 end,
-									 min_order,
-									 flags);
-					if (!IS_ERR(block)) {
-						order = min_order;
-						break;
-					}
-				}
-
-				/*
-				 * Try contiguous block allocation through
-				 * try harder method.
-				 */
-				if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
-				    !(flags & DRM_BUDDY_RANGE_ALLOCATION))
-					return __alloc_contig_try_harder(mm,
-									 original_size,
-									 original_min_size,
-									 blocks);
-				err = -ENOSPC;
-				goto err_free;
-			}
-		} while (1);
-
-		mark_allocated(mm, block);
-		mm->avail -= drm_buddy_block_size(mm, block);
-		if (drm_buddy_block_is_clear(block))
-			mm->clear_avail -= drm_buddy_block_size(mm, block);
-		kmemleak_update_trace(block);
-		list_add_tail(&block->link, &allocated);
-
-		pages -= BIT(order);
-
-		if (!pages)
-			break;
-	} while (1);
-
-	/* Trim the allocated block to the required size */
-	if (!(flags & DRM_BUDDY_TRIM_DISABLE) &&
-	    original_size != size) {
-		struct list_head *trim_list;
-		LIST_HEAD(temp);
-		u64 trim_size;
-
-		trim_list = &allocated;
-		trim_size = original_size;
-
-		if (!list_is_singular(&allocated)) {
-			block = list_last_entry(&allocated, typeof(*block), link);
-			list_move(&block->link, &temp);
-			trim_list = &temp;
-			trim_size = drm_buddy_block_size(mm, block) -
-				(size - original_size);
-		}
-
-		drm_buddy_block_trim(mm,
-				     NULL,
-				     trim_size,
-				     trim_list);
-
-		if (!list_empty(&temp))
-			list_splice_tail(trim_list, &allocated);
-	}
-
-	list_splice_tail(&allocated, blocks);
-	return 0;
-
-err_free:
-	drm_buddy_free_list_internal(mm, &allocated);
-	return err;
-}
-EXPORT_SYMBOL(drm_buddy_alloc_blocks);
-
-/**
- * drm_buddy_block_print - print block information
- *
- * @mm: DRM buddy manager
- * @block: DRM buddy block
- * @p: DRM printer to use
- */
-void drm_buddy_block_print(struct drm_buddy *mm,
-			   struct drm_buddy_block *block,
-			   struct drm_printer *p)
-{
-	u64 start = drm_buddy_block_offset(block);
-	u64 size = drm_buddy_block_size(mm, block);
-
-	drm_printf(p, "%#018llx-%#018llx: %llu\n", start, start + size, size);
-}
-EXPORT_SYMBOL(drm_buddy_block_print);
-
-/**
- * drm_buddy_print - print allocator state
- *
- * @mm: DRM buddy manager
- * @p: DRM printer to use
- */
-void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
-{
-	int order;
-
-	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
-		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
-
-	for (order = mm->max_order; order >= 0; order--) {
-		struct drm_buddy_block *block, *tmp;
-		struct rb_root *root;
-		u64 count = 0, free;
-		unsigned int tree;
-
-		for_each_free_tree(tree) {
-			root = &mm->free_trees[tree][order];
-
-			rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
-				BUG_ON(!drm_buddy_block_is_free(block));
-				count++;
-			}
-		}
-
-		drm_printf(p, "order-%2d ", order);
-
-		free = count * (mm->chunk_size << order);
-		if (free < SZ_1M)
-			drm_printf(p, "free: %8llu KiB", free >> 10);
-		else
-			drm_printf(p, "free: %8llu MiB", free >> 20);
-
-		drm_printf(p, ", blocks: %llu\n", count);
-	}
-}
-EXPORT_SYMBOL(drm_buddy_print);
-
-static void drm_buddy_module_exit(void)
-{
-	kmem_cache_destroy(slab_blocks);
-}
-
-static int __init drm_buddy_module_init(void)
-{
-	slab_blocks = KMEM_CACHE(drm_buddy_block, 0);
-	if (!slab_blocks)
-		return -ENOMEM;
-
-	return 0;
-}
-
-module_init(drm_buddy_module_init);
-module_exit(drm_buddy_module_exit);
-
-MODULE_DESCRIPTION("DRM Buddy Allocator");
-MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index f65fe86c02b5..eeda5daa544f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -5,7 +5,7 @@
 
 #include <linux/shmem_fs.h>
 
-#include <drm/drm_buddy.h>
+#include <linux/gpu_buddy.h>
 #include <drm/drm_print.h>
 #include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_tt.h>
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
index 4d830740946d..30246f02bcfe 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.c
+++ b/drivers/gpu/drm/i915/i915_scatterlist.c
@@ -7,7 +7,7 @@
 #include "i915_scatterlist.h"
 #include "i915_ttm_buddy_manager.h"
 
-#include <drm/drm_buddy.h>
+#include <linux/gpu_buddy.h>
 #include <drm/drm_mm.h>
 
 #include <linux/slab.h>
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index d5c6e6605086..6b256d95badd 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -5,7 +5,7 @@
 
 #include <linux/slab.h>
 
-#include <drm/drm_buddy.h>
+#include <linux/gpu_buddy.h>
 #include <drm/drm_print.h>
 #include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_bo.h>
diff --git a/drivers/gpu/drm/lib/drm_random.c b/drivers/gpu/drm/lib/drm_random.c
deleted file mode 100644
index 0e9dba1ef4af..000000000000
--- a/drivers/gpu/drm/lib/drm_random.c
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/bitops.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-
-#include "drm_random.h"
-
-u32 drm_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state)
-{
-	return upper_32_bits((u64)prandom_u32_state(state) * ep_ro);
-}
-EXPORT_SYMBOL(drm_prandom_u32_max_state);
-
-void drm_random_reorder(unsigned int *order, unsigned int count,
-			struct rnd_state *state)
-{
-	unsigned int i, j;
-
-	for (i = 0; i < count; ++i) {
-		BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32));
-		j = drm_prandom_u32_max_state(count, state);
-		swap(order[i], order[j]);
-	}
-}
-EXPORT_SYMBOL(drm_random_reorder);
-
-unsigned int *drm_random_order(unsigned int count, struct rnd_state *state)
-{
-	unsigned int *order, i;
-
-	order = kmalloc_array(count, sizeof(*order), GFP_KERNEL);
-	if (!order)
-		return order;
-
-	for (i = 0; i < count; i++)
-		order[i] = i;
-
-	drm_random_reorder(order, count, state);
-	return order;
-}
-EXPORT_SYMBOL(drm_random_order);
diff --git a/drivers/gpu/drm/lib/drm_random.h b/drivers/gpu/drm/lib/drm_random.h
deleted file mode 100644
index 9f827260a89d..000000000000
--- a/drivers/gpu/drm/lib/drm_random.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __DRM_RANDOM_H__
-#define __DRM_RANDOM_H__
-
-/* This is a temporary home for a couple of utility functions that should
- * be transposed to lib/ at the earliest convenience.
- */
-
-#include <linux/prandom.h>
-
-#define DRM_RND_STATE_INITIALIZER(seed__) ({				\
-	struct rnd_state state__;					\
-	prandom_seed_state(&state__, (seed__));				\
-	state__;							\
-})
-
-#define DRM_RND_STATE(name__, seed__) \
-	struct rnd_state name__ = DRM_RND_STATE_INITIALIZER(seed__)
-
-unsigned int *drm_random_order(unsigned int count,
-			       struct rnd_state *state);
-void drm_random_reorder(unsigned int *order,
-			unsigned int count,
-			struct rnd_state *state);
-u32 drm_prandom_u32_max_state(u32 ep_ro,
-			      struct rnd_state *state);
-
-#endif /* !__DRM_RANDOM_H__ */
diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
index 87d5d5f9332a..d2e2e3d8349a 100644
--- a/drivers/gpu/drm/tests/Makefile
+++ b/drivers/gpu/drm/tests/Makefile
@@ -7,7 +7,6 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
 	drm_atomic_test.o \
 	drm_atomic_state_test.o \
 	drm_bridge_test.o \
-	drm_buddy_test.o \
 	drm_cmdline_parser_test.o \
 	drm_connector_test.o \
 	drm_damage_helper_test.o \
diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c
deleted file mode 100644
index e6f8459c6c54..000000000000
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ /dev/null
@@ -1,928 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2019 Intel Corporation
- * Copyright © 2022 Maíra Canal <mairacanal@riseup.net>
- */
-
-#include <kunit/test.h>
-
-#include <linux/prime_numbers.h>
-#include <linux/sched/signal.h>
-#include <linux/sizes.h>
-
-#include <drm/drm_buddy.h>
-
-#include "../lib/drm_random.h"
-
-static unsigned int random_seed;
-
-static inline u64 get_size(int order, u64 chunk_size)
-{
-	return (1 << order) * chunk_size;
-}
-
-static void drm_test_buddy_fragmentation_performance(struct kunit *test)
-{
-	struct drm_buddy_block *block, *tmp;
-	int num_blocks, i, ret, count = 0;
-	LIST_HEAD(allocated_blocks);
-	unsigned long elapsed_ms;
-	LIST_HEAD(reverse_list);
-	LIST_HEAD(test_blocks);
-	LIST_HEAD(clear_list);
-	LIST_HEAD(dirty_list);
-	LIST_HEAD(free_list);
-	struct drm_buddy mm;
-	u64 mm_size = SZ_4G;
-	ktime_t start, end;
-
-	/*
-	 * Allocation under severe fragmentation
-	 *
-	 * Create severe fragmentation by allocating the entire 4 GiB address space
-	 * as tiny 8 KiB blocks but forcing a 64 KiB alignment. The resulting pattern
-	 * leaves many scattered holes. Split the allocations into two groups and
-	 * return them with different flags to block coalescing, then repeatedly
-	 * allocate and free 64 KiB blocks while timing the loop. This stresses how
-	 * quickly the allocator can satisfy larger, aligned requests from a pool of
-	 * highly fragmented space.
-	 */
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
-			       "buddy_init failed\n");
-
-	num_blocks = mm_size / SZ_64K;
-
-	start = ktime_get();
-	/* Allocate with maximum fragmentation - 8K blocks with 64K alignment */
-	for (i = 0; i < num_blocks; i++)
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
-								    &allocated_blocks, 0),
-					"buddy_alloc hit an error size=%u\n", SZ_8K);
-
-	list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
-		if (count % 4 == 0 || count % 4 == 3)
-			list_move_tail(&block->link, &clear_list);
-		else
-			list_move_tail(&block->link, &dirty_list);
-		count++;
-	}
-
-	/* Free with different flags to ensure no coalescing */
-	drm_buddy_free_list(&mm, &clear_list, DRM_BUDDY_CLEARED);
-	drm_buddy_free_list(&mm, &dirty_list, 0);
-
-	for (i = 0; i < num_blocks; i++)
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K,
-								    &test_blocks, 0),
-					"buddy_alloc hit an error size=%u\n", SZ_64K);
-	drm_buddy_free_list(&mm, &test_blocks, 0);
-
-	end = ktime_get();
-	elapsed_ms = ktime_to_ms(ktime_sub(end, start));
-
-	kunit_info(test, "Fragmented allocation took %lu ms\n", elapsed_ms);
-
-	drm_buddy_fini(&mm);
-
-	/*
-	 * Reverse free order under fragmentation
-	 *
-	 * Construct a fragmented 4 GiB space by allocating every 8 KiB block with
-	 * 64 KiB alignment, creating a dense scatter of small regions. Half of the
-	 * blocks are selectively freed to form sparse gaps, while the remaining
-	 * allocations are preserved, reordered in reverse, and released back with
-	 * the cleared flag. This models a pathological reverse-ordered free pattern
-	 * and measures how quickly the allocator can merge and reclaim space when
-	 * deallocation occurs in the opposite order of allocation, exposing the
-	 * cost difference between a linear freelist scan and an ordered tree lookup.
-	 */
-	ret = drm_buddy_init(&mm, mm_size, SZ_4K);
-	KUNIT_ASSERT_EQ(test, ret, 0);
-
-	start = ktime_get();
-	/* Allocate maximum fragmentation */
-	for (i = 0; i < num_blocks; i++)
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
-								    &allocated_blocks, 0),
-					"buddy_alloc hit an error size=%u\n", SZ_8K);
-
-	list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
-		if (count % 2 == 0)
-			list_move_tail(&block->link, &free_list);
-		count++;
-	}
-	drm_buddy_free_list(&mm, &free_list, DRM_BUDDY_CLEARED);
-
-	list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link)
-		list_move(&block->link, &reverse_list);
-	drm_buddy_free_list(&mm, &reverse_list, DRM_BUDDY_CLEARED);
-
-	end = ktime_get();
-	elapsed_ms = ktime_to_ms(ktime_sub(end, start));
-
-	kunit_info(test, "Reverse-ordered free took %lu ms\n", elapsed_ms);
-
-	drm_buddy_fini(&mm);
-}
-
-static void drm_test_buddy_alloc_range_bias(struct kunit *test)
-{
-	u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem;
-	DRM_RND_STATE(prng, random_seed);
-	unsigned int i, count, *order;
-	struct drm_buddy_block *block;
-	unsigned long flags;
-	struct drm_buddy mm;
-	LIST_HEAD(allocated);
-
-	bias_size = SZ_1M;
-	ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size);
-	ps = max(SZ_4K, ps);
-	mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */
-
-	kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps);
-
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
-			       "buddy_init failed\n");
-
-	count = mm_size / bias_size;
-	order = drm_random_order(count, &prng);
-	KUNIT_EXPECT_TRUE(test, order);
-
-	/*
-	 * Idea is to split the address space into uniform bias ranges, and then
-	 * in some random order allocate within each bias, using various
-	 * patterns within. This should detect if allocations leak out from a
-	 * given bias, for example.
-	 */
-
-	for (i = 0; i < count; i++) {
-		LIST_HEAD(tmp);
-		u32 size;
-
-		bias_start = order[i] * bias_size;
-		bias_end = bias_start + bias_size;
-		bias_rem = bias_size;
-
-		/* internal round_up too big */
-		KUNIT_ASSERT_TRUE_MSG(test,
-				      drm_buddy_alloc_blocks(&mm, bias_start,
-							     bias_end, bias_size + ps, bias_size,
-							     &allocated,
-							     DRM_BUDDY_RANGE_ALLOCATION),
-				      "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
-				      bias_start, bias_end, bias_size, bias_size);
-
-		/* size too big */
-		KUNIT_ASSERT_TRUE_MSG(test,
-				      drm_buddy_alloc_blocks(&mm, bias_start,
-							     bias_end, bias_size + ps, ps,
-							     &allocated,
-							     DRM_BUDDY_RANGE_ALLOCATION),
-				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
-				      bias_start, bias_end, bias_size + ps, ps);
-
-		/* bias range too small for size */
-		KUNIT_ASSERT_TRUE_MSG(test,
-				      drm_buddy_alloc_blocks(&mm, bias_start + ps,
-							     bias_end, bias_size, ps,
-							     &allocated,
-							     DRM_BUDDY_RANGE_ALLOCATION),
-				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
-				      bias_start + ps, bias_end, bias_size, ps);
-
-		/* bias misaligned */
-		KUNIT_ASSERT_TRUE_MSG(test,
-				      drm_buddy_alloc_blocks(&mm, bias_start + ps,
-							     bias_end - ps,
-							     bias_size >> 1, bias_size >> 1,
-							     &allocated,
-							     DRM_BUDDY_RANGE_ALLOCATION),
-				      "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n",
-				      bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1);
-
-		/* single big page */
-		KUNIT_ASSERT_FALSE_MSG(test,
-				       drm_buddy_alloc_blocks(&mm, bias_start,
-							      bias_end, bias_size, bias_size,
-							      &tmp,
-							      DRM_BUDDY_RANGE_ALLOCATION),
-				       "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n",
-				       bias_start, bias_end, bias_size, bias_size);
-		drm_buddy_free_list(&mm, &tmp, 0);
-
-		/* single page with internal round_up */
-		KUNIT_ASSERT_FALSE_MSG(test,
-				       drm_buddy_alloc_blocks(&mm, bias_start,
-							      bias_end, ps, bias_size,
-							      &tmp,
-							      DRM_BUDDY_RANGE_ALLOCATION),
-				       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
-				       bias_start, bias_end, ps, bias_size);
-		drm_buddy_free_list(&mm, &tmp, 0);
-
-		/* random size within */
-		size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
-		if (size)
-			KUNIT_ASSERT_FALSE_MSG(test,
-					       drm_buddy_alloc_blocks(&mm, bias_start,
-								      bias_end, size, ps,
-								      &tmp,
-								      DRM_BUDDY_RANGE_ALLOCATION),
-					       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
-					       bias_start, bias_end, size, ps);
-
-		bias_rem -= size;
-		/* too big for current avail */
-		KUNIT_ASSERT_TRUE_MSG(test,
-				      drm_buddy_alloc_blocks(&mm, bias_start,
-							     bias_end, bias_rem + ps, ps,
-							     &allocated,
-							     DRM_BUDDY_RANGE_ALLOCATION),
-				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
-				      bias_start, bias_end, bias_rem + ps, ps);
-
-		if (bias_rem) {
-			/* random fill of the remainder */
-			size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
-			size = max(size, ps);
-
-			KUNIT_ASSERT_FALSE_MSG(test,
-					       drm_buddy_alloc_blocks(&mm, bias_start,
-								      bias_end, size, ps,
-								      &allocated,
-								      DRM_BUDDY_RANGE_ALLOCATION),
-					       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
-					       bias_start, bias_end, size, ps);
-			/*
-			 * Intentionally allow some space to be left
-			 * unallocated, and ideally not always on the bias
-			 * boundaries.
-			 */
-			drm_buddy_free_list(&mm, &tmp, 0);
-		} else {
-			list_splice_tail(&tmp, &allocated);
-		}
-	}
-
-	kfree(order);
-	drm_buddy_free_list(&mm, &allocated, 0);
-	drm_buddy_fini(&mm);
-
-	/*
-	 * Something more free-form. Idea is to pick a random starting bias
-	 * range within the address space and then start filling it up. Also
-	 * randomly grow the bias range in both directions as we go along. This
-	 * should give us bias start/end which is not always uniform like above,
-	 * and in some cases will require the allocator to jump over already
-	 * allocated nodes in the middle of the address space.
-	 */
-
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
-			       "buddy_init failed\n");
-
-	bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
-	bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps);
-	bias_end = max(bias_end, bias_start + ps);
-	bias_rem = bias_end - bias_start;
-
-	do {
-		u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
-
-		KUNIT_ASSERT_FALSE_MSG(test,
-				       drm_buddy_alloc_blocks(&mm, bias_start,
-							      bias_end, size, ps,
-							      &allocated,
-							      DRM_BUDDY_RANGE_ALLOCATION),
-				       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
-				       bias_start, bias_end, size, ps);
-		bias_rem -= size;
-
-		/*
-		 * Try to randomly grow the bias range in both directions, or
-		 * only one, or perhaps don't grow at all.
-		 */
-		do {
-			u32 old_bias_start = bias_start;
-			u32 old_bias_end = bias_end;
-
-			if (bias_start)
-				bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps);
-			if (bias_end != mm_size)
-				bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps);
-
-			bias_rem += old_bias_start - bias_start;
-			bias_rem += bias_end - old_bias_end;
-		} while (!bias_rem && (bias_start || bias_end != mm_size));
-	} while (bias_rem);
-
-	KUNIT_ASSERT_EQ(test, bias_start, 0);
-	KUNIT_ASSERT_EQ(test, bias_end, mm_size);
-	KUNIT_ASSERT_TRUE_MSG(test,
-			      drm_buddy_alloc_blocks(&mm, bias_start, bias_end,
-						     ps, ps,
-						     &allocated,
-						     DRM_BUDDY_RANGE_ALLOCATION),
-			      "buddy_alloc passed with bias(%x-%x), size=%u\n",
-			      bias_start, bias_end, ps);
-
-	drm_buddy_free_list(&mm, &allocated, 0);
-	drm_buddy_fini(&mm);
-
-	/*
-	 * Allocate cleared blocks in the bias range when the DRM buddy's clear avail is
-	 * zero. This will validate the bias range allocation in scenarios like system boot
-	 * when no cleared blocks are available and exercise the fallback path too. The resulting
-	 * blocks should always be dirty.
-	 */
-
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
-			       "buddy_init failed\n");
-
-	bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
-	bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps);
-	bias_end = max(bias_end, bias_start + ps);
-	bias_rem = bias_end - bias_start;
-
-	flags = DRM_BUDDY_CLEAR_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION;
-	size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
-
-	KUNIT_ASSERT_FALSE_MSG(test,
-			       drm_buddy_alloc_blocks(&mm, bias_start,
-						      bias_end, size, ps,
-						      &allocated,
-						      flags),
-			       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
-			       bias_start, bias_end, size, ps);
-
-	list_for_each_entry(block, &allocated, link)
-		KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
-
-	drm_buddy_free_list(&mm, &allocated, 0);
-	drm_buddy_fini(&mm);
-}
-
-static void drm_test_buddy_alloc_clear(struct kunit *test)
-{
-	unsigned long n_pages, total, i = 0;
-	const unsigned long ps = SZ_4K;
-	struct drm_buddy_block *block;
-	const int max_order = 12;
-	LIST_HEAD(allocated);
-	struct drm_buddy mm;
-	unsigned int order;
-	u32 mm_size, size;
-	LIST_HEAD(dirty);
-	LIST_HEAD(clean);
-
-	mm_size = SZ_4K << max_order;
-	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
-
-	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
-
-	/*
-	 * Idea is to allocate and free some random portion of the address space,
-	 * returning those pages as non-dirty and randomly alternate between
-	 * requesting dirty and non-dirty pages (not going over the limit
-	 * we freed as non-dirty), putting that into two separate lists.
-	 * Loop over both lists at the end checking that the dirty list
-	 * is indeed all dirty pages and vice versa. Free it all again,
-	 * keeping the dirty/clear status.
-	 */
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
-							    5 * ps, ps, &allocated,
-							    DRM_BUDDY_TOPDOWN_ALLOCATION),
-				"buddy_alloc hit an error size=%lu\n", 5 * ps);
-	drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
-
-	n_pages = 10;
-	do {
-		unsigned long flags;
-		struct list_head *list;
-		int slot = i % 2;
-
-		if (slot == 0) {
-			list = &dirty;
-			flags = 0;
-		} else {
-			list = &clean;
-			flags = DRM_BUDDY_CLEAR_ALLOCATION;
-		}
-
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
-								    ps, ps, list,
-								    flags),
-					"buddy_alloc hit an error size=%lu\n", ps);
-	} while (++i < n_pages);
-
-	list_for_each_entry(block, &clean, link)
-		KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), true);
-
-	list_for_each_entry(block, &dirty, link)
-		KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
-
-	drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED);
-
-	/*
-	 * Trying to go over the clear limit for some allocation.
-	 * The allocation should never fail with reasonable page-size.
-	 */
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
-							    10 * ps, ps, &clean,
-							    DRM_BUDDY_CLEAR_ALLOCATION),
-				"buddy_alloc hit an error size=%lu\n", 10 * ps);
-
-	drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED);
-	drm_buddy_free_list(&mm, &dirty, 0);
-	drm_buddy_fini(&mm);
-
-	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
-
-	/*
-	 * Create a new mm. Intentionally fragment the address space by creating
-	 * two alternating lists. Free both lists, one as dirty the other as clean.
-	 * Try to allocate double the previous size with matching min_page_size. The
-	 * allocation should never fail as it calls the force_merge. Also check that
-	 * the page is always dirty after force_merge. Free the page as dirty, then
-	 * repeat the whole thing, increment the order until we hit the max_order.
-	 */
-
-	i = 0;
-	n_pages = mm_size / ps;
-	do {
-		struct list_head *list;
-		int slot = i % 2;
-
-		if (slot == 0)
-			list = &dirty;
-		else
-			list = &clean;
-
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
-								    ps, ps, list, 0),
-					"buddy_alloc hit an error size=%lu\n", ps);
-	} while (++i < n_pages);
-
-	drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED);
-	drm_buddy_free_list(&mm, &dirty, 0);
-
-	order = 1;
-	do {
-		size = SZ_4K << order;
-
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
-								    size, size, &allocated,
-								    DRM_BUDDY_CLEAR_ALLOCATION),
-					"buddy_alloc hit an error size=%u\n", size);
-		total = 0;
-		list_for_each_entry(block, &allocated, link) {
-			if (size != mm_size)
-				KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
-			total += drm_buddy_block_size(&mm, block);
-		}
-		KUNIT_EXPECT_EQ(test, total, size);
-
-		drm_buddy_free_list(&mm, &allocated, 0);
-	} while (++order <= max_order);
-
-	drm_buddy_fini(&mm);
-
-	/*
-	 * Create a new mm with a non power-of-two size. Allocate a random size from each
-	 * root, free as cleared and then call fini. This will ensure the multi-root
-	 * force merge during fini.
-	 */
-	mm_size = (SZ_4K << max_order) + (SZ_4K << (max_order - 2));
-
-	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
-	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
-							    4 * ps, ps, &allocated,
-							    DRM_BUDDY_RANGE_ALLOCATION),
-				"buddy_alloc hit an error size=%lu\n", 4 * ps);
-	drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
-							    2 * ps, ps, &allocated,
-							    DRM_BUDDY_CLEAR_ALLOCATION),
-				"buddy_alloc hit an error size=%lu\n", 2 * ps);
-	drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size,
-							    ps, ps, &allocated,
-							    DRM_BUDDY_RANGE_ALLOCATION),
-				"buddy_alloc hit an error size=%lu\n", ps);
-	drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
-	drm_buddy_fini(&mm);
-}
-
-static void drm_test_buddy_alloc_contiguous(struct kunit *test)
-{
-	const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K;
-	unsigned long i, n_pages, total;
-	struct drm_buddy_block *block;
-	struct drm_buddy mm;
-	LIST_HEAD(left);
-	LIST_HEAD(middle);
-	LIST_HEAD(right);
-	LIST_HEAD(allocated);
-
-	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
-
-	/*
-	 * Idea is to fragment the address space by alternating block
-	 * allocations between three different lists; one for left, middle and
-	 * right. We can then free a list to simulate fragmentation. In
-	 * particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
-	 * including the try_harder path.
-	 */
-
-	i = 0;
-	n_pages = mm_size / ps;
-	do {
-		struct list_head *list;
-		int slot = i % 3;
-
-		if (slot == 0)
-			list = &left;
-		else if (slot == 1)
-			list = &middle;
-		else
-			list = &right;
-		KUNIT_ASSERT_FALSE_MSG(test,
-				       drm_buddy_alloc_blocks(&mm, 0, mm_size,
-							      ps, ps, list, 0),
-				       "buddy_alloc hit an error size=%lu\n",
-				       ps);
-	} while (++i < n_pages);
-
-	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
-							   3 * ps, ps, &allocated,
-							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
-
-	drm_buddy_free_list(&mm, &middle, 0);
-	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
-							   3 * ps, ps, &allocated,
-							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
-	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
-							   2 * ps, ps, &allocated,
-							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-			       "buddy_alloc didn't error size=%lu\n", 2 * ps);
-
-	drm_buddy_free_list(&mm, &right, 0);
-	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
-							   3 * ps, ps, &allocated,
-							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
-	/*
-	 * At this point we should have enough contiguous space for 2 blocks,
-	 * however they are never buddies (since we freed middle and right) so
-	 * will require the try_harder logic to find them.
-	 */
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
-							    2 * ps, ps, &allocated,
-							    DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-			       "buddy_alloc hit an error size=%lu\n", 2 * ps);
-
-	drm_buddy_free_list(&mm, &left, 0);
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
-							    3 * ps, ps, &allocated,
-							    DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-			       "buddy_alloc hit an error size=%lu\n", 3 * ps);
-
-	total = 0;
-	list_for_each_entry(block, &allocated, link)
-		total += drm_buddy_block_size(&mm, block);
-
-	KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
-
-	drm_buddy_free_list(&mm, &allocated, 0);
-	drm_buddy_fini(&mm);
-}
-
-static void drm_test_buddy_alloc_pathological(struct kunit *test)
-{
-	u64 mm_size, size, start = 0;
-	struct drm_buddy_block *block;
-	const int max_order = 3;
-	unsigned long flags = 0;
-	int order, top;
-	struct drm_buddy mm;
-	LIST_HEAD(blocks);
-	LIST_HEAD(holes);
-	LIST_HEAD(tmp);
-
-	/*
-	 * Create a pot-sized mm, then allocate one of each possible
-	 * order within. This should leave the mm with exactly one
-	 * page left. Free the largest block, then whittle down again.
-	 * Eventually we will have a fully 50% fragmented mm.
-	 */
-
-	mm_size = SZ_4K << max_order;
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
-			       "buddy_init failed\n");
-
-	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
-
-	for (top = max_order; top; top--) {
-		/* Make room by freeing the largest allocated block */
-		block = list_first_entry_or_null(&blocks, typeof(*block), link);
-		if (block) {
-			list_del(&block->link);
-			drm_buddy_free_block(&mm, block);
-		}
-
-		for (order = top; order--;) {
-			size = get_size(order, mm.chunk_size);
-			KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start,
-									    mm_size, size, size,
-										&tmp, flags),
-					"buddy_alloc hit -ENOMEM with order=%d, top=%d\n",
-					order, top);
-
-			block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
-			KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
-
-			list_move_tail(&block->link, &blocks);
-		}
-
-		/* There should be one final page for this sub-allocation */
-		size = get_size(0, mm.chunk_size);
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
-								    size, size, &tmp, flags),
-							   "buddy_alloc hit -ENOMEM for hole\n");
-
-		block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
-		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
-
-		list_move_tail(&block->link, &holes);
-
-		size = get_size(top, mm.chunk_size);
-		KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
-								   size, size, &tmp, flags),
-							  "buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!",
-							  top, max_order);
-	}
-
-	drm_buddy_free_list(&mm, &holes, 0);
-
-	/* Nothing larger than blocks of chunk_size now available */
-	for (order = 1; order <= max_order; order++) {
-		size = get_size(order, mm.chunk_size);
-		KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
-								   size, size, &tmp, flags),
-							  "buddy_alloc unexpectedly succeeded at order %d, it should be full!",
-							  order);
-	}
-
-	list_splice_tail(&holes, &blocks);
-	drm_buddy_free_list(&mm, &blocks, 0);
-	drm_buddy_fini(&mm);
-}
-
-static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
-{
-	u64 mm_size, size, start = 0;
-	struct drm_buddy_block *block, *bn;
-	const unsigned int max_order = 16;
-	unsigned long flags = 0;
-	struct drm_buddy mm;
-	unsigned int order;
-	LIST_HEAD(blocks);
-	LIST_HEAD(tmp);
-
-	/*
-	 * Create a pot-sized mm, then allocate one of each possible
-	 * order within. This should leave the mm with exactly one
-	 * page left.
-	 */
-
-	mm_size = SZ_4K << max_order;
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
-			       "buddy_init failed\n");
-
-	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
-
-	for (order = 0; order < max_order; order++) {
-		size = get_size(order, mm.chunk_size);
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
-								    size, size, &tmp, flags),
-							   "buddy_alloc hit -ENOMEM with order=%d\n",
-							   order);
-
-		block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
-		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
-
-		list_move_tail(&block->link, &blocks);
-	}
-
-	/* And now the last remaining block available */
-	size = get_size(0, mm.chunk_size);
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
-							    size, size, &tmp, flags),
-						   "buddy_alloc hit -ENOMEM on final alloc\n");
-
-	block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
-	KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
-
-	list_move_tail(&block->link, &blocks);
-
-	/* Should be completely full! */
-	for (order = max_order; order--;) {
-		size = get_size(order, mm.chunk_size);
-		KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
-								   size, size, &tmp, flags),
-							  "buddy_alloc unexpectedly succeeded, it should be full!");
-	}
-
-	block = list_last_entry(&blocks, typeof(*block), link);
-	list_del(&block->link);
-	drm_buddy_free_block(&mm, block);
-
-	/* As we free in increasing size, we make available larger blocks */
-	order = 1;
-	list_for_each_entry_safe(block, bn, &blocks, link) {
-		list_del(&block->link);
-		drm_buddy_free_block(&mm, block);
-
-		size = get_size(order, mm.chunk_size);
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
-								    size, size, &tmp, flags),
-							   "buddy_alloc hit -ENOMEM with order=%d\n",
-							   order);
-
-		block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
-		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
-
-		list_del(&block->link);
-		drm_buddy_free_block(&mm, block);
-		order++;
-	}
-
-	/* To confirm, now the whole mm should be available */
-	size = get_size(max_order, mm.chunk_size);
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
-							    size, size, &tmp, flags),
-						   "buddy_alloc (realloc) hit -ENOMEM with order=%d\n",
-						   max_order);
-
-	block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
-	KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
-
-	list_del(&block->link);
-	drm_buddy_free_block(&mm, block);
-	drm_buddy_free_list(&mm, &blocks, 0);
-	drm_buddy_fini(&mm);
-}
-
-static void drm_test_buddy_alloc_optimistic(struct kunit *test)
-{
-	u64 mm_size, size, start = 0;
-	struct drm_buddy_block *block;
-	unsigned long flags = 0;
-	const int max_order = 16;
-	struct drm_buddy mm;
-	LIST_HEAD(blocks);
-	LIST_HEAD(tmp);
-	int order;
-
-	/*
-	 * Create a mm with one block of each order available, and
-	 * try to allocate them all.
-	 */
-
-	mm_size = SZ_4K * ((1 << (max_order + 1)) - 1);
-
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
-			       "buddy_init failed\n");
-
-	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
-
-	for (order = 0; order <= max_order; order++) {
-		size = get_size(order, mm.chunk_size);
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
-								    size, size, &tmp, flags),
-							   "buddy_alloc hit -ENOMEM with order=%d\n",
-							   order);
-
-		block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
-		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
-
-		list_move_tail(&block->link, &blocks);
-	}
-
-	/* Should be completely full! */
-	size = get_size(0, mm.chunk_size);
-	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
-							   size, size, &tmp, flags),
-						  "buddy_alloc unexpectedly succeeded, it should be full!");
-
-	drm_buddy_free_list(&mm, &blocks, 0);
-	drm_buddy_fini(&mm);
-}
-
-static void drm_test_buddy_alloc_limit(struct kunit *test)
-{
-	u64 size = U64_MAX, start = 0;
-	struct drm_buddy_block *block;
-	unsigned long flags = 0;
-	LIST_HEAD(allocated);
-	struct drm_buddy mm;
-
-	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, size, SZ_4K));
-
-	KUNIT_EXPECT_EQ_MSG(test, mm.max_order, DRM_BUDDY_MAX_ORDER,
-			    "mm.max_order(%d) != %d\n", mm.max_order,
-						DRM_BUDDY_MAX_ORDER);
-
-	size = mm.chunk_size << mm.max_order;
-	KUNIT_EXPECT_FALSE(test, drm_buddy_alloc_blocks(&mm, start, size, size,
-							mm.chunk_size, &allocated, flags));
-
-	block = list_first_entry_or_null(&allocated, struct drm_buddy_block, link);
-	KUNIT_EXPECT_TRUE(test, block);
-
-	KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_order(block), mm.max_order,
-			    "block order(%d) != %d\n",
-						drm_buddy_block_order(block), mm.max_order);
-
-	KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_size(&mm, block),
-			    BIT_ULL(mm.max_order) * mm.chunk_size,
-						"block size(%llu) != %llu\n",
-						drm_buddy_block_size(&mm, block),
-						BIT_ULL(mm.max_order) * mm.chunk_size);
-
-	drm_buddy_free_list(&mm, &allocated, 0);
-	drm_buddy_fini(&mm);
-}
-
-static void drm_test_buddy_alloc_exceeds_max_order(struct kunit *test)
-{
-	u64 mm_size = SZ_8G + SZ_2G, size = SZ_8G + SZ_1G, min_block_size = SZ_8G;
-	struct drm_buddy mm;
-	LIST_HEAD(blocks);
-	int err;
-
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
-			       "buddy_init failed\n");
-
-	/* CONTIGUOUS allocation should succeed via try_harder fallback */
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, size,
-							    SZ_4K, &blocks,
-							    DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-			       "buddy_alloc hit an error size=%llu\n", size);
-	drm_buddy_free_list(&mm, &blocks, 0);
-
-	/* Non-CONTIGUOUS with large min_block_size should return -EINVAL */
-	err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0);
-	KUNIT_EXPECT_EQ(test, err, -EINVAL);
-
-	/* Non-CONTIGUOUS + RANGE with large min_block_size should return -EINVAL */
-	err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks,
-				     DRM_BUDDY_RANGE_ALLOCATION);
-	KUNIT_EXPECT_EQ(test, err, -EINVAL);
-
-	/* CONTIGUOUS + RANGE should return -EINVAL (no try_harder for RANGE) */
-	err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks,
-				     DRM_BUDDY_CONTIGUOUS_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION);
-	KUNIT_EXPECT_EQ(test, err, -EINVAL);
-
-	drm_buddy_fini(&mm);
-}
-
-static int drm_buddy_suite_init(struct kunit_suite *suite)
-{
-	while (!random_seed)
-		random_seed = get_random_u32();
-
-	kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n",
-		   random_seed);
-
-	return 0;
-}
-
-static struct kunit_case drm_buddy_tests[] = {
-	KUNIT_CASE(drm_test_buddy_alloc_limit),
-	KUNIT_CASE(drm_test_buddy_alloc_optimistic),
-	KUNIT_CASE(drm_test_buddy_alloc_pessimistic),
-	KUNIT_CASE(drm_test_buddy_alloc_pathological),
-	KUNIT_CASE(drm_test_buddy_alloc_contiguous),
-	KUNIT_CASE(drm_test_buddy_alloc_clear),
-	KUNIT_CASE(drm_test_buddy_alloc_range_bias),
-	KUNIT_CASE(drm_test_buddy_fragmentation_performance),
-	KUNIT_CASE(drm_test_buddy_alloc_exceeds_max_order),
-	{}
-};
-
-static struct kunit_suite drm_buddy_test_suite = {
-	.name = "drm_buddy",
-	.suite_init = drm_buddy_suite_init,
-	.test_cases = drm_buddy_tests,
-};
-
-kunit_test_suite(drm_buddy_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Kunit test for drm_buddy functions");
-MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/tests/drm_exec_test.c b/drivers/gpu/drm/tests/drm_exec_test.c
index 3a20c788c51f..2fc47f3b463b 100644
--- a/drivers/gpu/drm/tests/drm_exec_test.c
+++ b/drivers/gpu/drm/tests/drm_exec_test.c
@@ -16,8 +16,6 @@
 #include <drm/drm_gem.h>
 #include <drm/drm_kunit_helpers.h>
 
-#include "../lib/drm_random.h"
-
 struct drm_exec_priv {
 	struct device *dev;
 	struct drm_device *drm;
diff --git a/drivers/gpu/drm/tests/drm_mm_test.c b/drivers/gpu/drm/tests/drm_mm_test.c
index aec9eccdeae9..e24a619059d8 100644
--- a/drivers/gpu/drm/tests/drm_mm_test.c
+++ b/drivers/gpu/drm/tests/drm_mm_test.c
@@ -16,8 +16,6 @@
 #include <drm/drm_mm.h>
 #include <drm/drm_print.h>
 
-#include "../lib/drm_random.h"
-
 enum {
 	BEST,
 	BOTTOMUP,
diff --git a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h
index e4c95f86a467..96ea8c9aae34 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h
+++ b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h
@@ -5,7 +5,7 @@
 #ifndef TTM_MOCK_MANAGER_H
 #define TTM_MOCK_MANAGER_H
 
-#include <drm/drm_buddy.h>
+#include <linux/gpu_buddy.h>
 
 struct ttm_mock_manager {
 	struct ttm_resource_manager man;
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
index a71e14818ec2..babeec5511d9 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -6,7 +6,7 @@
 #ifndef _XE_TTM_VRAM_MGR_TYPES_H_
 #define _XE_TTM_VRAM_MGR_TYPES_H_
 
-#include <drm/drm_buddy.h>
+#include <linux/gpu_buddy.h>
 #include <drm/ttm/ttm_device.h>
 
 /**
diff --git a/drivers/gpu/tests/Makefile b/drivers/gpu/tests/Makefile
new file mode 100644
index 000000000000..8e7654e87d82
--- /dev/null
+++ b/drivers/gpu/tests/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+gpu_buddy_tests-y = gpu_buddy_test.o gpu_random.o
+obj-$(CONFIG_DRM_KUNIT_TEST) += gpu_buddy_tests.o
diff --git a/drivers/gpu/tests/gpu_buddy_test.c b/drivers/gpu/tests/gpu_buddy_test.c
new file mode 100644
index 000000000000..b905932da990
--- /dev/null
+++ b/drivers/gpu/tests/gpu_buddy_test.c
@@ -0,0 +1,928 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ * Copyright © 2022 Maíra Canal <mairacanal@riseup.net>
+ */
+
+#include <kunit/test.h>
+
+#include <linux/prime_numbers.h>
+#include <linux/sched/signal.h>
+#include <linux/sizes.h>
+
+#include <linux/gpu_buddy.h>
+
+#include "gpu_random.h"
+
+static unsigned int random_seed;
+
+static inline u64 get_size(int order, u64 chunk_size)
+{
+	return (1 << order) * chunk_size;
+}
+
+static void drm_test_buddy_fragmentation_performance(struct kunit *test)
+{
+	struct drm_buddy_block *block, *tmp;
+	int num_blocks, i, ret, count = 0;
+	LIST_HEAD(allocated_blocks);
+	unsigned long elapsed_ms;
+	LIST_HEAD(reverse_list);
+	LIST_HEAD(test_blocks);
+	LIST_HEAD(clear_list);
+	LIST_HEAD(dirty_list);
+	LIST_HEAD(free_list);
+	struct drm_buddy mm;
+	u64 mm_size = SZ_4G;
+	ktime_t start, end;
+
+	/*
+	 * Allocation under severe fragmentation
+	 *
+	 * Create severe fragmentation by allocating the entire 4 GiB address space
+	 * as tiny 8 KiB blocks but forcing a 64 KiB alignment. The resulting pattern
+	 * leaves many scattered holes. Split the allocations into two groups and
+	 * return them with different flags to block coalescing, then repeatedly
+	 * allocate and free 64 KiB blocks while timing the loop. This stresses how
+	 * quickly the allocator can satisfy larger, aligned requests from a pool of
+	 * highly fragmented space.
+	 */
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+			       "buddy_init failed\n");
+
+	num_blocks = mm_size / SZ_64K;
+
+	start = ktime_get();
+	/* Allocate with maximum fragmentation - 8K blocks with 64K alignment */
+	for (i = 0; i < num_blocks; i++)
+		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
+								    &allocated_blocks, 0),
+					"buddy_alloc hit an error size=%u\n", SZ_8K);
+
+	list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
+		if (count % 4 == 0 || count % 4 == 3)
+			list_move_tail(&block->link, &clear_list);
+		else
+			list_move_tail(&block->link, &dirty_list);
+		count++;
+	}
+
+	/* Free with different flags to ensure no coalescing */
+	drm_buddy_free_list(&mm, &clear_list, DRM_BUDDY_CLEARED);
+	drm_buddy_free_list(&mm, &dirty_list, 0);
+
+	for (i = 0; i < num_blocks; i++)
+		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K,
+								    &test_blocks, 0),
+					"buddy_alloc hit an error size=%u\n", SZ_64K);
+	drm_buddy_free_list(&mm, &test_blocks, 0);
+
+	end = ktime_get();
+	elapsed_ms = ktime_to_ms(ktime_sub(end, start));
+
+	kunit_info(test, "Fragmented allocation took %lu ms\n", elapsed_ms);
+
+	drm_buddy_fini(&mm);
+
+	/*
+	 * Reverse free order under fragmentation
+	 *
+	 * Construct a fragmented 4 GiB space by allocating every 8 KiB block with
+	 * 64 KiB alignment, creating a dense scatter of small regions. Half of the
+	 * blocks are selectively freed to form sparse gaps, while the remaining
+	 * allocations are preserved, reordered in reverse, and released back with
+	 * the cleared flag. This models a pathological reverse-ordered free pattern
+	 * and measures how quickly the allocator can merge and reclaim space when
+	 * deallocation occurs in the opposite order of allocation, exposing the
+	 * cost difference between a linear freelist scan and an ordered tree lookup.
+	 */
+	ret = drm_buddy_init(&mm, mm_size, SZ_4K);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	start = ktime_get();
+	/* Allocate maximum fragmentation */
+	for (i = 0; i < num_blocks; i++)
+		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
+								    &allocated_blocks, 0),
+					"buddy_alloc hit an error size=%u\n", SZ_8K);
+
+	list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
+		if (count % 2 == 0)
+			list_move_tail(&block->link, &free_list);
+		count++;
+	}
+	drm_buddy_free_list(&mm, &free_list, DRM_BUDDY_CLEARED);
+
+	list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link)
+		list_move(&block->link, &reverse_list);
+	drm_buddy_free_list(&mm, &reverse_list, DRM_BUDDY_CLEARED);
+
+	end = ktime_get();
+	elapsed_ms = ktime_to_ms(ktime_sub(end, start));
+
+	kunit_info(test, "Reverse-ordered free took %lu ms\n", elapsed_ms);
+
+	drm_buddy_fini(&mm);
+}
+
+static void drm_test_buddy_alloc_range_bias(struct kunit *test)
+{
+	u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem;
+	DRM_RND_STATE(prng, random_seed);
+	unsigned int i, count, *order;
+	struct drm_buddy_block *block;
+	unsigned long flags;
+	struct drm_buddy mm;
+	LIST_HEAD(allocated);
+
+	bias_size = SZ_1M;
+	ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size);
+	ps = max(SZ_4K, ps);
+	mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */
+
+	kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps);
+
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+			       "buddy_init failed\n");
+
+	count = mm_size / bias_size;
+	order = drm_random_order(count, &prng);
+	KUNIT_EXPECT_TRUE(test, order);
+
+	/*
+	 * Idea is to split the address space into uniform bias ranges, and then
+	 * in some random order allocate within each bias, using various
+	 * patterns within. This should detect if allocations leak out from a
+	 * given bias, for example.
+	 */
+
+	for (i = 0; i < count; i++) {
+		LIST_HEAD(tmp);
+		u32 size;
+
+		bias_start = order[i] * bias_size;
+		bias_end = bias_start + bias_size;
+		bias_rem = bias_size;
+
+		/* internal round_up too big */
+		KUNIT_ASSERT_TRUE_MSG(test,
+				      drm_buddy_alloc_blocks(&mm, bias_start,
+							     bias_end, bias_size + ps, bias_size,
+							     &allocated,
+							     DRM_BUDDY_RANGE_ALLOCATION),
+				      "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+				      bias_start, bias_end, bias_size, bias_size);
+
+		/* size too big */
+		KUNIT_ASSERT_TRUE_MSG(test,
+				      drm_buddy_alloc_blocks(&mm, bias_start,
+							     bias_end, bias_size + ps, ps,
+							     &allocated,
+							     DRM_BUDDY_RANGE_ALLOCATION),
+				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+				      bias_start, bias_end, bias_size + ps, ps);
+
+		/* bias range too small for size */
+		KUNIT_ASSERT_TRUE_MSG(test,
+				      drm_buddy_alloc_blocks(&mm, bias_start + ps,
+							     bias_end, bias_size, ps,
+							     &allocated,
+							     DRM_BUDDY_RANGE_ALLOCATION),
+				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+				      bias_start + ps, bias_end, bias_size, ps);
+
+		/* bias misaligned */
+		KUNIT_ASSERT_TRUE_MSG(test,
+				      drm_buddy_alloc_blocks(&mm, bias_start + ps,
+							     bias_end - ps,
+							     bias_size >> 1, bias_size >> 1,
+							     &allocated,
+							     DRM_BUDDY_RANGE_ALLOCATION),
+				      "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+				      bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1);
+
+		/* single big page */
+		KUNIT_ASSERT_FALSE_MSG(test,
+				       drm_buddy_alloc_blocks(&mm, bias_start,
+							      bias_end, bias_size, bias_size,
+							      &tmp,
+							      DRM_BUDDY_RANGE_ALLOCATION),
+				       "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n",
+				       bias_start, bias_end, bias_size, bias_size);
+		drm_buddy_free_list(&mm, &tmp, 0);
+
+		/* single page with internal round_up */
+		KUNIT_ASSERT_FALSE_MSG(test,
+				       drm_buddy_alloc_blocks(&mm, bias_start,
+							      bias_end, ps, bias_size,
+							      &tmp,
+							      DRM_BUDDY_RANGE_ALLOCATION),
+				       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+				       bias_start, bias_end, ps, bias_size);
+		drm_buddy_free_list(&mm, &tmp, 0);
+
+		/* random size within */
+		size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
+		if (size)
+			KUNIT_ASSERT_FALSE_MSG(test,
+					       drm_buddy_alloc_blocks(&mm, bias_start,
+								      bias_end, size, ps,
+								      &tmp,
+								      DRM_BUDDY_RANGE_ALLOCATION),
+					       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+					       bias_start, bias_end, size, ps);
+
+		bias_rem -= size;
+		/* too big for current avail */
+		KUNIT_ASSERT_TRUE_MSG(test,
+				      drm_buddy_alloc_blocks(&mm, bias_start,
+							     bias_end, bias_rem + ps, ps,
+							     &allocated,
+							     DRM_BUDDY_RANGE_ALLOCATION),
+				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+				      bias_start, bias_end, bias_rem + ps, ps);
+
+		if (bias_rem) {
+			/* random fill of the remainder */
+			size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
+			size = max(size, ps);
+
+			KUNIT_ASSERT_FALSE_MSG(test,
+					       drm_buddy_alloc_blocks(&mm, bias_start,
+								      bias_end, size, ps,
+								      &allocated,
+								      DRM_BUDDY_RANGE_ALLOCATION),
+					       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+					       bias_start, bias_end, size, ps);
+			/*
+			 * Intentionally allow some space to be left
+			 * unallocated, and ideally not always on the bias
+			 * boundaries.
+			 */
+			drm_buddy_free_list(&mm, &tmp, 0);
+		} else {
+			list_splice_tail(&tmp, &allocated);
+		}
+	}
+
+	kfree(order);
+	drm_buddy_free_list(&mm, &allocated, 0);
+	drm_buddy_fini(&mm);
+
+	/*
+	 * Something more free-form. Idea is to pick a random starting bias
+	 * range within the address space and then start filling it up. Also
+	 * randomly grow the bias range in both directions as we go along. This
+	 * should give us bias start/end which is not always uniform like above,
+	 * and in some cases will require the allocator to jump over already
+	 * allocated nodes in the middle of the address space.
+	 */
+
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+			       "buddy_init failed\n");
+
+	bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
+	bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps);
+	bias_end = max(bias_end, bias_start + ps);
+	bias_rem = bias_end - bias_start;
+
+	do {
+		u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
+
+		KUNIT_ASSERT_FALSE_MSG(test,
+				       drm_buddy_alloc_blocks(&mm, bias_start,
+							      bias_end, size, ps,
+							      &allocated,
+							      DRM_BUDDY_RANGE_ALLOCATION),
+				       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+				       bias_start, bias_end, size, ps);
+		bias_rem -= size;
+
+		/*
+		 * Try to randomly grow the bias range in both directions, or
+		 * only one, or perhaps don't grow at all.
+		 */
+		do {
+			u32 old_bias_start = bias_start;
+			u32 old_bias_end = bias_end;
+
+			if (bias_start)
+				bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps);
+			if (bias_end != mm_size)
+				bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps);
+
+			bias_rem += old_bias_start - bias_start;
+			bias_rem += bias_end - old_bias_end;
+		} while (!bias_rem && (bias_start || bias_end != mm_size));
+	} while (bias_rem);
+
+	KUNIT_ASSERT_EQ(test, bias_start, 0);
+	KUNIT_ASSERT_EQ(test, bias_end, mm_size);
+	KUNIT_ASSERT_TRUE_MSG(test,
+			      drm_buddy_alloc_blocks(&mm, bias_start, bias_end,
+						     ps, ps,
+						     &allocated,
+						     DRM_BUDDY_RANGE_ALLOCATION),
+			      "buddy_alloc passed with bias(%x-%x), size=%u\n",
+			      bias_start, bias_end, ps);
+
+	drm_buddy_free_list(&mm, &allocated, 0);
+	drm_buddy_fini(&mm);
+
+	/*
+	 * Allocate cleared blocks in the bias range when the DRM buddy's clear avail is
+	 * zero. This will validate the bias range allocation in scenarios like system boot
+	 * when no cleared blocks are available and exercise the fallback path too. The resulting
+	 * blocks should always be dirty.
+	 */
+
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+			       "buddy_init failed\n");
+
+	bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
+	bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps);
+	bias_end = max(bias_end, bias_start + ps);
+	bias_rem = bias_end - bias_start;
+
+	flags = DRM_BUDDY_CLEAR_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION;
+	size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
+
+	KUNIT_ASSERT_FALSE_MSG(test,
+			       drm_buddy_alloc_blocks(&mm, bias_start,
+						      bias_end, size, ps,
+						      &allocated,
+						      flags),
+			       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+			       bias_start, bias_end, size, ps);
+
+	list_for_each_entry(block, &allocated, link)
+		KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+
+	drm_buddy_free_list(&mm, &allocated, 0);
+	drm_buddy_fini(&mm);
+}
+
+static void drm_test_buddy_alloc_clear(struct kunit *test)
+{
+	unsigned long n_pages, total, i = 0;
+	const unsigned long ps = SZ_4K;
+	struct drm_buddy_block *block;
+	const int max_order = 12;
+	LIST_HEAD(allocated);
+	struct drm_buddy mm;
+	unsigned int order;
+	u32 mm_size, size;
+	LIST_HEAD(dirty);
+	LIST_HEAD(clean);
+
+	mm_size = SZ_4K << max_order;
+	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+
+	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
+
+	/*
+	 * Idea is to allocate and free some random portion of the address space,
+	 * returning those pages as non-dirty and randomly alternate between
+	 * requesting dirty and non-dirty pages (not going over the limit
+	 * we freed as non-dirty), putting that into two separate lists.
+	 * Loop over both lists at the end checking that the dirty list
+	 * is indeed all dirty pages and vice versa. Free it all again,
+	 * keeping the dirty/clear status.
+	 */
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							    5 * ps, ps, &allocated,
+							    DRM_BUDDY_TOPDOWN_ALLOCATION),
+				"buddy_alloc hit an error size=%lu\n", 5 * ps);
+	drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
+
+	n_pages = 10;
+	do {
+		unsigned long flags;
+		struct list_head *list;
+		int slot = i % 2;
+
+		if (slot == 0) {
+			list = &dirty;
+			flags = 0;
+		} else {
+			list = &clean;
+			flags = DRM_BUDDY_CLEAR_ALLOCATION;
+		}
+
+		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+								    ps, ps, list,
+								    flags),
+					"buddy_alloc hit an error size=%lu\n", ps);
+	} while (++i < n_pages);
+
+	list_for_each_entry(block, &clean, link)
+		KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), true);
+
+	list_for_each_entry(block, &dirty, link)
+		KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+
+	drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED);
+
+	/*
+	 * Trying to go over the clear limit for some allocation.
+	 * The allocation should never fail with reasonable page-size.
+	 */
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							    10 * ps, ps, &clean,
+							    DRM_BUDDY_CLEAR_ALLOCATION),
+				"buddy_alloc hit an error size=%lu\n", 10 * ps);
+
+	drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED);
+	drm_buddy_free_list(&mm, &dirty, 0);
+	drm_buddy_fini(&mm);
+
+	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+
+	/*
+	 * Create a new mm. Intentionally fragment the address space by creating
+	 * two alternating lists. Free both lists, one as dirty the other as clean.
+	 * Try to allocate double the previous size with matching min_page_size. The
+	 * allocation should never fail as it calls the force_merge. Also check that
+	 * the page is always dirty after force_merge. Free the page as dirty, then
+	 * repeat the whole thing, increment the order until we hit the max_order.
+	 */
+
+	i = 0;
+	n_pages = mm_size / ps;
+	do {
+		struct list_head *list;
+		int slot = i % 2;
+
+		if (slot == 0)
+			list = &dirty;
+		else
+			list = &clean;
+
+		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+								    ps, ps, list, 0),
+					"buddy_alloc hit an error size=%lu\n", ps);
+	} while (++i < n_pages);
+
+	drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED);
+	drm_buddy_free_list(&mm, &dirty, 0);
+
+	order = 1;
+	do {
+		size = SZ_4K << order;
+
+		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+								    size, size, &allocated,
+								    DRM_BUDDY_CLEAR_ALLOCATION),
+					"buddy_alloc hit an error size=%u\n", size);
+		total = 0;
+		list_for_each_entry(block, &allocated, link) {
+			if (size != mm_size)
+				KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+			total += drm_buddy_block_size(&mm, block);
+		}
+		KUNIT_EXPECT_EQ(test, total, size);
+
+		drm_buddy_free_list(&mm, &allocated, 0);
+	} while (++order <= max_order);
+
+	drm_buddy_fini(&mm);
+
+	/*
+	 * Create a new mm with a non power-of-two size. Allocate a random size from each
+	 * root, free as cleared and then call fini. This will ensure the multi-root
+	 * force merge during fini.
+	 */
+	mm_size = (SZ_4K << max_order) + (SZ_4K << (max_order - 2));
+
+	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
+							    4 * ps, ps, &allocated,
+							    DRM_BUDDY_RANGE_ALLOCATION),
+				"buddy_alloc hit an error size=%lu\n", 4 * ps);
+	drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
+							    2 * ps, ps, &allocated,
+							    DRM_BUDDY_CLEAR_ALLOCATION),
+				"buddy_alloc hit an error size=%lu\n", 2 * ps);
+	drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size,
+							    ps, ps, &allocated,
+							    DRM_BUDDY_RANGE_ALLOCATION),
+				"buddy_alloc hit an error size=%lu\n", ps);
+	drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
+	drm_buddy_fini(&mm);
+}
+
+static void drm_test_buddy_alloc_contiguous(struct kunit *test)
+{
+	const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K;
+	unsigned long i, n_pages, total;
+	struct drm_buddy_block *block;
+	struct drm_buddy mm;
+	LIST_HEAD(left);
+	LIST_HEAD(middle);
+	LIST_HEAD(right);
+	LIST_HEAD(allocated);
+
+	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+
+	/*
+	 * Idea is to fragment the address space by alternating block
+	 * allocations between three different lists; one for left, middle and
+	 * right. We can then free a list to simulate fragmentation. In
+	 * particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
+	 * including the try_harder path.
+	 */
+
+	i = 0;
+	n_pages = mm_size / ps;
+	do {
+		struct list_head *list;
+		int slot = i % 3;
+
+		if (slot == 0)
+			list = &left;
+		else if (slot == 1)
+			list = &middle;
+		else
+			list = &right;
+		KUNIT_ASSERT_FALSE_MSG(test,
+				       drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							      ps, ps, list, 0),
+				       "buddy_alloc hit an error size=%lu\n",
+				       ps);
+	} while (++i < n_pages);
+
+	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							   3 * ps, ps, &allocated,
+							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
+
+	drm_buddy_free_list(&mm, &middle, 0);
+	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							   3 * ps, ps, &allocated,
+							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
+	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							   2 * ps, ps, &allocated,
+							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc didn't error size=%lu\n", 2 * ps);
+
+	drm_buddy_free_list(&mm, &right, 0);
+	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							   3 * ps, ps, &allocated,
+							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
+	/*
+	 * At this point we should have enough contiguous space for 2 blocks,
+	 * however they are never buddies (since we freed middle and right) so
+	 * will require the try_harder logic to find them.
+	 */
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							    2 * ps, ps, &allocated,
+							    DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc hit an error size=%lu\n", 2 * ps);
+
+	drm_buddy_free_list(&mm, &left, 0);
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							    3 * ps, ps, &allocated,
+							    DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc hit an error size=%lu\n", 3 * ps);
+
+	total = 0;
+	list_for_each_entry(block, &allocated, link)
+		total += drm_buddy_block_size(&mm, block);
+
+	KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
+
+	drm_buddy_free_list(&mm, &allocated, 0);
+	drm_buddy_fini(&mm);
+}
+
+static void drm_test_buddy_alloc_pathological(struct kunit *test)
+{
+	u64 mm_size, size, start = 0;
+	struct drm_buddy_block *block;
+	const int max_order = 3;
+	unsigned long flags = 0;
+	int order, top;
+	struct drm_buddy mm;
+	LIST_HEAD(blocks);
+	LIST_HEAD(holes);
+	LIST_HEAD(tmp);
+
+	/*
+	 * Create a pot-sized mm, then allocate one of each possible
+	 * order within. This should leave the mm with exactly one
+	 * page left. Free the largest block, then whittle down again.
+	 * Eventually we will have a fully 50% fragmented mm.
+	 */
+
+	mm_size = SZ_4K << max_order;
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+			       "buddy_init failed\n");
+
+	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
+
+	for (top = max_order; top; top--) {
+		/* Make room by freeing the largest allocated block */
+		block = list_first_entry_or_null(&blocks, typeof(*block), link);
+		if (block) {
+			list_del(&block->link);
+			drm_buddy_free_block(&mm, block);
+		}
+
+		for (order = top; order--;) {
+			size = get_size(order, mm.chunk_size);
+			KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start,
+									    mm_size, size, size,
+										&tmp, flags),
+					"buddy_alloc hit -ENOMEM with order=%d, top=%d\n",
+					order, top);
+
+			block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+			KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
+
+			list_move_tail(&block->link, &blocks);
+		}
+
+		/* There should be one final page for this sub-allocation */
+		size = get_size(0, mm.chunk_size);
+		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+								    size, size, &tmp, flags),
+							   "buddy_alloc hit -ENOMEM for hole\n");
+
+		block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
+
+		list_move_tail(&block->link, &holes);
+
+		size = get_size(top, mm.chunk_size);
+		KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+								   size, size, &tmp, flags),
+							  "buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!",
+							  top, max_order);
+	}
+
+	drm_buddy_free_list(&mm, &holes, 0);
+
+	/* Nothing larger than blocks of chunk_size now available */
+	for (order = 1; order <= max_order; order++) {
+		size = get_size(order, mm.chunk_size);
+		KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+								   size, size, &tmp, flags),
+							  "buddy_alloc unexpectedly succeeded at order %d, it should be full!",
+							  order);
+	}
+
+	list_splice_tail(&holes, &blocks);
+	drm_buddy_free_list(&mm, &blocks, 0);
+	drm_buddy_fini(&mm);
+}
+
+static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
+{
+	u64 mm_size, size, start = 0;
+	struct drm_buddy_block *block, *bn;
+	const unsigned int max_order = 16;
+	unsigned long flags = 0;
+	struct drm_buddy mm;
+	unsigned int order;
+	LIST_HEAD(blocks);
+	LIST_HEAD(tmp);
+
+	/*
+	 * Create a pot-sized mm, then allocate one of each possible
+	 * order within. This should leave the mm with exactly one
+	 * page left.
+	 */
+
+	mm_size = SZ_4K << max_order;
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+			       "buddy_init failed\n");
+
+	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
+
+	for (order = 0; order < max_order; order++) {
+		size = get_size(order, mm.chunk_size);
+		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+								    size, size, &tmp, flags),
+							   "buddy_alloc hit -ENOMEM with order=%d\n",
+							   order);
+
+		block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
+
+		list_move_tail(&block->link, &blocks);
+	}
+
+	/* And now the last remaining block available */
+	size = get_size(0, mm.chunk_size);
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+							    size, size, &tmp, flags),
+						   "buddy_alloc hit -ENOMEM on final alloc\n");
+
+	block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+	KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
+
+	list_move_tail(&block->link, &blocks);
+
+	/* Should be completely full! */
+	for (order = max_order; order--;) {
+		size = get_size(order, mm.chunk_size);
+		KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+								   size, size, &tmp, flags),
+							  "buddy_alloc unexpectedly succeeded, it should be full!");
+	}
+
+	block = list_last_entry(&blocks, typeof(*block), link);
+	list_del(&block->link);
+	drm_buddy_free_block(&mm, block);
+
+	/* As we free in increasing size, we make available larger blocks */
+	order = 1;
+	list_for_each_entry_safe(block, bn, &blocks, link) {
+		list_del(&block->link);
+		drm_buddy_free_block(&mm, block);
+
+		size = get_size(order, mm.chunk_size);
+		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+								    size, size, &tmp, flags),
+							   "buddy_alloc hit -ENOMEM with order=%d\n",
+							   order);
+
+		block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
+
+		list_del(&block->link);
+		drm_buddy_free_block(&mm, block);
+		order++;
+	}
+
+	/* To confirm, now the whole mm should be available */
+	size = get_size(max_order, mm.chunk_size);
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+							    size, size, &tmp, flags),
+						   "buddy_alloc (realloc) hit -ENOMEM with order=%d\n",
+						   max_order);
+
+	block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+	KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
+
+	list_del(&block->link);
+	drm_buddy_free_block(&mm, block);
+	drm_buddy_free_list(&mm, &blocks, 0);
+	drm_buddy_fini(&mm);
+}
+
+static void drm_test_buddy_alloc_optimistic(struct kunit *test)
+{
+	u64 mm_size, size, start = 0;
+	struct drm_buddy_block *block;
+	unsigned long flags = 0;
+	const int max_order = 16;
+	struct drm_buddy mm;
+	LIST_HEAD(blocks);
+	LIST_HEAD(tmp);
+	int order;
+
+	/*
+	 * Create a mm with one block of each order available, and
+	 * try to allocate them all.
+	 */
+
+	mm_size = SZ_4K * ((1 << (max_order + 1)) - 1);
+
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+			       "buddy_init failed\n");
+
+	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
+
+	for (order = 0; order <= max_order; order++) {
+		size = get_size(order, mm.chunk_size);
+		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+								    size, size, &tmp, flags),
+							   "buddy_alloc hit -ENOMEM with order=%d\n",
+							   order);
+
+		block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
+
+		list_move_tail(&block->link, &blocks);
+	}
+
+	/* Should be completely full! */
+	size = get_size(0, mm.chunk_size);
+	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+							   size, size, &tmp, flags),
+						  "buddy_alloc unexpectedly succeeded, it should be full!");
+
+	drm_buddy_free_list(&mm, &blocks, 0);
+	drm_buddy_fini(&mm);
+}
+
+static void drm_test_buddy_alloc_limit(struct kunit *test)
+{
+	u64 size = U64_MAX, start = 0;
+	struct drm_buddy_block *block;
+	unsigned long flags = 0;
+	LIST_HEAD(allocated);
+	struct drm_buddy mm;
+
+	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, size, SZ_4K));
+
+	KUNIT_EXPECT_EQ_MSG(test, mm.max_order, DRM_BUDDY_MAX_ORDER,
+			    "mm.max_order(%d) != %d\n", mm.max_order,
+						DRM_BUDDY_MAX_ORDER);
+
+	size = mm.chunk_size << mm.max_order;
+	KUNIT_EXPECT_FALSE(test, drm_buddy_alloc_blocks(&mm, start, size, size,
+							mm.chunk_size, &allocated, flags));
+
+	block = list_first_entry_or_null(&allocated, struct drm_buddy_block, link);
+	KUNIT_EXPECT_TRUE(test, block);
+
+	KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_order(block), mm.max_order,
+			    "block order(%d) != %d\n",
+						drm_buddy_block_order(block), mm.max_order);
+
+	KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_size(&mm, block),
+			    BIT_ULL(mm.max_order) * mm.chunk_size,
+						"block size(%llu) != %llu\n",
+						drm_buddy_block_size(&mm, block),
+						BIT_ULL(mm.max_order) * mm.chunk_size);
+
+	drm_buddy_free_list(&mm, &allocated, 0);
+	drm_buddy_fini(&mm);
+}
+
+static void drm_test_buddy_alloc_exceeds_max_order(struct kunit *test)
+{
+	u64 mm_size = SZ_8G + SZ_2G, size = SZ_8G + SZ_1G, min_block_size = SZ_8G;
+	struct drm_buddy mm;
+	LIST_HEAD(blocks);
+	int err;
+
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+			       "buddy_init failed\n");
+
+	/* CONTIGUOUS allocation should succeed via try_harder fallback */
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, size,
+							    SZ_4K, &blocks,
+							    DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc hit an error size=%llu\n", size);
+	drm_buddy_free_list(&mm, &blocks, 0);
+
+	/* Non-CONTIGUOUS with large min_block_size should return -EINVAL */
+	err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0);
+	KUNIT_EXPECT_EQ(test, err, -EINVAL);
+
+	/* Non-CONTIGUOUS + RANGE with large min_block_size should return -EINVAL */
+	err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks,
+				     DRM_BUDDY_RANGE_ALLOCATION);
+	KUNIT_EXPECT_EQ(test, err, -EINVAL);
+
+	/* CONTIGUOUS + RANGE should return -EINVAL (no try_harder for RANGE) */
+	err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks,
+				     DRM_BUDDY_CONTIGUOUS_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION);
+	KUNIT_EXPECT_EQ(test, err, -EINVAL);
+
+	drm_buddy_fini(&mm);
+}
+
+static int drm_buddy_suite_init(struct kunit_suite *suite)
+{
+	while (!random_seed)
+		random_seed = get_random_u32();
+
+	kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n",
+		   random_seed);
+
+	return 0;
+}
+
+static struct kunit_case drm_buddy_tests[] = {
+	KUNIT_CASE(drm_test_buddy_alloc_limit),
+	KUNIT_CASE(drm_test_buddy_alloc_optimistic),
+	KUNIT_CASE(drm_test_buddy_alloc_pessimistic),
+	KUNIT_CASE(drm_test_buddy_alloc_pathological),
+	KUNIT_CASE(drm_test_buddy_alloc_contiguous),
+	KUNIT_CASE(drm_test_buddy_alloc_clear),
+	KUNIT_CASE(drm_test_buddy_alloc_range_bias),
+	KUNIT_CASE(drm_test_buddy_fragmentation_performance),
+	KUNIT_CASE(drm_test_buddy_alloc_exceeds_max_order),
+	{}
+};
+
+static struct kunit_suite drm_buddy_test_suite = {
+	.name = "drm_buddy",
+	.suite_init = drm_buddy_suite_init,
+	.test_cases = drm_buddy_tests,
+};
+
+kunit_test_suite(drm_buddy_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Kunit test for drm_buddy functions");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/tests/gpu_random.c b/drivers/gpu/tests/gpu_random.c
new file mode 100644
index 000000000000..ddd1f594b5d5
--- /dev/null
+++ b/drivers/gpu/tests/gpu_random.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bitops.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "gpu_random.h"
+
+u32 drm_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state)
+{
+	return upper_32_bits((u64)prandom_u32_state(state) * ep_ro);
+}
+EXPORT_SYMBOL(drm_prandom_u32_max_state);
+
+void drm_random_reorder(unsigned int *order, unsigned int count,
+			struct rnd_state *state)
+{
+	unsigned int i, j;
+
+	for (i = 0; i < count; ++i) {
+		BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32));
+		j = drm_prandom_u32_max_state(count, state);
+		swap(order[i], order[j]);
+	}
+}
+EXPORT_SYMBOL(drm_random_reorder);
+
+unsigned int *drm_random_order(unsigned int count, struct rnd_state *state)
+{
+	unsigned int *order, i;
+
+	order = kmalloc_array(count, sizeof(*order), GFP_KERNEL);
+	if (!order)
+		return order;
+
+	for (i = 0; i < count; i++)
+		order[i] = i;
+
+	drm_random_reorder(order, count, state);
+	return order;
+}
+EXPORT_SYMBOL(drm_random_order);
diff --git a/drivers/gpu/tests/gpu_random.h b/drivers/gpu/tests/gpu_random.h
new file mode 100644
index 000000000000..9f827260a89d
--- /dev/null
+++ b/drivers/gpu/tests/gpu_random.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DRM_RANDOM_H__
+#define __DRM_RANDOM_H__
+
+/* This is a temporary home for a couple of utility functions that should
+ * be transposed to lib/ at the earliest convenience.
+ */
+
+#include <linux/prandom.h>
+
+#define DRM_RND_STATE_INITIALIZER(seed__) ({				\
+	struct rnd_state state__;					\
+	prandom_seed_state(&state__, (seed__));				\
+	state__;							\
+})
+
+#define DRM_RND_STATE(name__, seed__) \
+	struct rnd_state name__ = DRM_RND_STATE_INITIALIZER(seed__)
+
+unsigned int *drm_random_order(unsigned int count,
+			       struct rnd_state *state);
+void drm_random_reorder(unsigned int *order,
+			unsigned int count,
+			struct rnd_state *state);
+u32 drm_prandom_u32_max_state(u32 ep_ro,
+			      struct rnd_state *state);
+
+#endif /* !__DRM_RANDOM_H__ */
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
deleted file mode 100644
index b909fa8f810a..000000000000
--- a/include/drm/drm_buddy.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2021 Intel Corporation
- */
-
-#ifndef __DRM_BUDDY_H__
-#define __DRM_BUDDY_H__
-
-#include <linux/bitops.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/rbtree.h>
-
-struct drm_printer;
-
-#define DRM_BUDDY_RANGE_ALLOCATION		BIT(0)
-#define DRM_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
-#define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
-#define DRM_BUDDY_CLEAR_ALLOCATION		BIT(3)
-#define DRM_BUDDY_CLEARED			BIT(4)
-#define DRM_BUDDY_TRIM_DISABLE			BIT(5)
-
-struct drm_buddy_block {
-#define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
-#define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
-#define   DRM_BUDDY_ALLOCATED	   (1 << 10)
-#define   DRM_BUDDY_FREE	   (2 << 10)
-#define   DRM_BUDDY_SPLIT	   (3 << 10)
-#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
-/* Free to be used, if needed in the future */
-#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
-#define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
-	u64 header;
-
-	struct drm_buddy_block *left;
-	struct drm_buddy_block *right;
-	struct drm_buddy_block *parent;
-
-	void *private; /* owned by creator */
-
-	/*
-	 * While the block is allocated by the user through drm_buddy_alloc*,
-	 * the user has ownership of the link, for example to maintain within
-	 * a list, if so desired. As soon as the block is freed with
-	 * drm_buddy_free* ownership is given back to the mm.
-	 */
-	union {
-		struct rb_node rb;
-		struct list_head link;
-	};
-
-	struct list_head tmp_link;
-};
-
-/* Order-zero must be at least SZ_4K */
-#define DRM_BUDDY_MAX_ORDER (63 - 12)
-
-/*
- * Binary Buddy System.
- *
- * Locking should be handled by the user, a simple mutex around
- * drm_buddy_alloc* and drm_buddy_free* should suffice.
- */
-struct drm_buddy {
-	/* Maintain a free list for each order. */
-	struct rb_root **free_trees;
-
-	/*
-	 * Maintain explicit binary tree(s) to track the allocation of the
-	 * address space. This gives us a simple way of finding a buddy block
-	 * and performing the potentially recursive merge step when freeing a
-	 * block.  Nodes are either allocated or free, in which case they will
-	 * also exist on the respective free list.
-	 */
-	struct drm_buddy_block **roots;
-
-	/*
-	 * Anything from here is public, and remains static for the lifetime of
-	 * the mm. Everything above is considered do-not-touch.
-	 */
-	unsigned int n_roots;
-	unsigned int max_order;
-
-	/* Must be at least SZ_4K */
-	u64 chunk_size;
-	u64 size;
-	u64 avail;
-	u64 clear_avail;
-};
-
-static inline u64
-drm_buddy_block_offset(const struct drm_buddy_block *block)
-{
-	return block->header & DRM_BUDDY_HEADER_OFFSET;
-}
-
-static inline unsigned int
-drm_buddy_block_order(struct drm_buddy_block *block)
-{
-	return block->header & DRM_BUDDY_HEADER_ORDER;
-}
-
-static inline unsigned int
-drm_buddy_block_state(struct drm_buddy_block *block)
-{
-	return block->header & DRM_BUDDY_HEADER_STATE;
-}
-
-static inline bool
-drm_buddy_block_is_allocated(struct drm_buddy_block *block)
-{
-	return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
-}
-
-static inline bool
-drm_buddy_block_is_clear(struct drm_buddy_block *block)
-{
-	return block->header & DRM_BUDDY_HEADER_CLEAR;
-}
-
-static inline bool
-drm_buddy_block_is_free(struct drm_buddy_block *block)
-{
-	return drm_buddy_block_state(block) == DRM_BUDDY_FREE;
-}
-
-static inline bool
-drm_buddy_block_is_split(struct drm_buddy_block *block)
-{
-	return drm_buddy_block_state(block) == DRM_BUDDY_SPLIT;
-}
-
-static inline u64
-drm_buddy_block_size(struct drm_buddy *mm,
-		     struct drm_buddy_block *block)
-{
-	return mm->chunk_size << drm_buddy_block_order(block);
-}
-
-int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
-
-void drm_buddy_fini(struct drm_buddy *mm);
-
-struct drm_buddy_block *
-drm_get_buddy(struct drm_buddy_block *block);
-
-int drm_buddy_alloc_blocks(struct drm_buddy *mm,
-			   u64 start, u64 end, u64 size,
-			   u64 min_page_size,
-			   struct list_head *blocks,
-			   unsigned long flags);
-
-int drm_buddy_block_trim(struct drm_buddy *mm,
-			 u64 *start,
-			 u64 new_size,
-			 struct list_head *blocks);
-
-void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear);
-
-void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
-
-void drm_buddy_free_list(struct drm_buddy *mm,
-			 struct list_head *objects,
-			 unsigned int flags);
-
-void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
-void drm_buddy_block_print(struct drm_buddy *mm,
-			   struct drm_buddy_block *block,
-			   struct drm_printer *p);
-#endif
diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
new file mode 100644
index 000000000000..b909fa8f810a
--- /dev/null
+++ b/include/linux/gpu_buddy.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __DRM_BUDDY_H__
+#define __DRM_BUDDY_H__
+
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/rbtree.h>
+
+struct drm_printer;
+
+#define DRM_BUDDY_RANGE_ALLOCATION		BIT(0)
+#define DRM_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
+#define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
+#define DRM_BUDDY_CLEAR_ALLOCATION		BIT(3)
+#define DRM_BUDDY_CLEARED			BIT(4)
+#define DRM_BUDDY_TRIM_DISABLE			BIT(5)
+
+struct drm_buddy_block {
+#define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
+#define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
+#define   DRM_BUDDY_ALLOCATED	   (1 << 10)
+#define   DRM_BUDDY_FREE	   (2 << 10)
+#define   DRM_BUDDY_SPLIT	   (3 << 10)
+#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
+/* Free to be used, if needed in the future */
+#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
+#define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
+	u64 header;
+
+	struct drm_buddy_block *left;
+	struct drm_buddy_block *right;
+	struct drm_buddy_block *parent;
+
+	void *private; /* owned by creator */
+
+	/*
+	 * While the block is allocated by the user through drm_buddy_alloc*,
+	 * the user has ownership of the link, for example to maintain within
+	 * a list, if so desired. As soon as the block is freed with
+	 * drm_buddy_free* ownership is given back to the mm.
+	 */
+	union {
+		struct rb_node rb;
+		struct list_head link;
+	};
+
+	struct list_head tmp_link;
+};
+
+/* Order-zero must be at least SZ_4K */
+#define DRM_BUDDY_MAX_ORDER (63 - 12)
+
+/*
+ * Binary Buddy System.
+ *
+ * Locking should be handled by the user, a simple mutex around
+ * drm_buddy_alloc* and drm_buddy_free* should suffice.
+ */
+struct drm_buddy {
+	/* Maintain a free list for each order. */
+	struct rb_root **free_trees;
+
+	/*
+	 * Maintain explicit binary tree(s) to track the allocation of the
+	 * address space. This gives us a simple way of finding a buddy block
+	 * and performing the potentially recursive merge step when freeing a
+	 * block.  Nodes are either allocated or free, in which case they will
+	 * also exist on the respective free list.
+	 */
+	struct drm_buddy_block **roots;
+
+	/*
+	 * Anything from here is public, and remains static for the lifetime of
+	 * the mm. Everything above is considered do-not-touch.
+	 */
+	unsigned int n_roots;
+	unsigned int max_order;
+
+	/* Must be at least SZ_4K */
+	u64 chunk_size;
+	u64 size;
+	u64 avail;
+	u64 clear_avail;
+};
+
+static inline u64
+drm_buddy_block_offset(const struct drm_buddy_block *block)
+{
+	return block->header & DRM_BUDDY_HEADER_OFFSET;
+}
+
+static inline unsigned int
+drm_buddy_block_order(struct drm_buddy_block *block)
+{
+	return block->header & DRM_BUDDY_HEADER_ORDER;
+}
+
+static inline unsigned int
+drm_buddy_block_state(struct drm_buddy_block *block)
+{
+	return block->header & DRM_BUDDY_HEADER_STATE;
+}
+
+static inline bool
+drm_buddy_block_is_allocated(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
+}
+
+static inline bool
+drm_buddy_block_is_clear(struct drm_buddy_block *block)
+{
+	return block->header & DRM_BUDDY_HEADER_CLEAR;
+}
+
+static inline bool
+drm_buddy_block_is_free(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_state(block) == DRM_BUDDY_FREE;
+}
+
+static inline bool
+drm_buddy_block_is_split(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_state(block) == DRM_BUDDY_SPLIT;
+}
+
+static inline u64
+drm_buddy_block_size(struct drm_buddy *mm,
+		     struct drm_buddy_block *block)
+{
+	return mm->chunk_size << drm_buddy_block_order(block);
+}
+
+int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
+
+void drm_buddy_fini(struct drm_buddy *mm);
+
+struct drm_buddy_block *
+drm_get_buddy(struct drm_buddy_block *block);
+
+int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+			   u64 start, u64 end, u64 size,
+			   u64 min_page_size,
+			   struct list_head *blocks,
+			   unsigned long flags);
+
+int drm_buddy_block_trim(struct drm_buddy *mm,
+			 u64 *start,
+			 u64 new_size,
+			 struct list_head *blocks);
+
+void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear);
+
+void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
+
+void drm_buddy_free_list(struct drm_buddy *mm,
+			 struct list_head *objects,
+			 unsigned int flags);
+
+void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
+void drm_buddy_block_print(struct drm_buddy *mm,
+			   struct drm_buddy_block *block,
+			   struct drm_printer *p);
+#endif
-- 
cgit v1.2.3


From ba110db8e1bc206c13fd7d985e79b033f53bfdea Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelagnelf@nvidia.com>
Date: Fri, 6 Feb 2026 08:52:38 +1000
Subject: gpu: Move DRM buddy allocator one level up (part two)

Move the DRM buddy allocator one level up so that it can be used by GPU
drivers (example, nova-core) that have usecases other than DRM (such as
VFIO vGPU support). Modify the API, structures and Kconfigs to use
"gpu_buddy" terminology. Adapt the drivers and tests to use the new API.

The commit cannot be split due to bisectability, however no functional
change is intended. Verified by running K-UNIT tests and build tested
various configurations.

Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
[airlied: I've split this into two so git can find copies easier.
I've also just nuked drm_random library, that stuff needs to be done
elsewhere and only the buddy tests seem to be using it].
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 Documentation/gpu/drm-mm.rst                       |   6 +
 MAINTAINERS                                        |   8 +-
 drivers/gpu/Kconfig                                |  13 +
 drivers/gpu/Makefile                               |   1 +
 drivers/gpu/buddy.c                                | 556 ++++++++++-----------
 drivers/gpu/drm/Kconfig                            |   1 +
 drivers/gpu/drm/Makefile                           |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c            |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h     |  12 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c       |  79 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h       |  18 +-
 drivers/gpu/drm/drm_buddy.c                        |  77 +++
 drivers/gpu/drm/i915/i915_scatterlist.c            |   8 +-
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c      |  55 +-
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.h      |   4 +-
 .../gpu/drm/i915/selftests/intel_memory_region.c   |  20 +-
 drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c   |   4 +-
 drivers/gpu/drm/ttm/tests/ttm_mock_manager.c       |  18 +-
 drivers/gpu/drm/ttm/tests/ttm_mock_manager.h       |   2 +-
 drivers/gpu/drm/xe/xe_res_cursor.h                 |  34 +-
 drivers/gpu/drm/xe/xe_svm.c                        |  12 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c               |  71 +--
 drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h         |   2 +-
 drivers/gpu/tests/Makefile                         |   2 +-
 drivers/gpu/tests/gpu_buddy_test.c                 | 412 +++++++--------
 drivers/gpu/tests/gpu_random.c                     |  16 +-
 drivers/gpu/tests/gpu_random.h                     |  18 +-
 drivers/video/Kconfig                              |   1 +
 include/drm/drm_buddy.h                            |  18 +
 include/linux/gpu_buddy.h                          | 124 ++---
 30 files changed, 855 insertions(+), 741 deletions(-)
 create mode 100644 drivers/gpu/Kconfig
 create mode 100644 drivers/gpu/drm/drm_buddy.c
 create mode 100644 include/drm/drm_buddy.h

(limited to 'include/linux')

diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index ceee0e663237..32fb506db05b 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -532,6 +532,12 @@ Buddy Allocator Function References (GPU buddy)
 .. kernel-doc:: drivers/gpu/buddy.c
    :export:
 
+DRM Buddy Specific Logging Function References
+----------------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_buddy.c
+   :export:
+
 DRM Cache Handling and Fast WC memcpy()
 =======================================
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 086cbf5c36b3..f2bec2c0d7e3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8797,15 +8797,17 @@ T:	git https://gitlab.freedesktop.org/drm/misc/kernel.git
 F:	drivers/gpu/drm/ttm/
 F:	include/drm/ttm/
 
-DRM BUDDY ALLOCATOR
+GPU BUDDY ALLOCATOR
 M:	Matthew Auld <matthew.auld@intel.com>
 M:	Arun Pravin <arunpravin.paneerselvam@amd.com>
 R:	Christian Koenig <christian.koenig@amd.com>
 L:	dri-devel@lists.freedesktop.org
 S:	Maintained
 T:	git https://gitlab.freedesktop.org/drm/misc/kernel.git
-F:	drivers/gpu/drm/drm_buddy.c
-F:	drivers/gpu/drm/tests/drm_buddy_test.c
+F:	drivers/gpu/drm_buddy.c
+F:	drivers/gpu/buddy.c
+F:	drivers/gpu/tests/gpu_buddy_test.c
+F:	include/linux/gpu_buddy.h
 F:	include/drm/drm_buddy.h
 
 DRM AUTOMATED TESTING
diff --git a/drivers/gpu/Kconfig b/drivers/gpu/Kconfig
new file mode 100644
index 000000000000..ebb2ad4b7ea0
--- /dev/null
+++ b/drivers/gpu/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config GPU_BUDDY
+       bool
+       help
+         A page based buddy allocator for GPU memory.
+
+config GPU_BUDDY_KUNIT_TEST
+       tristate "KUnit tests for GPU buddy allocator" if !KUNIT_ALL_TESTS
+       depends on GPU_BUDDY && KUNIT
+       default KUNIT_ALL_TESTS
+       help
+         KUnit tests for the GPU buddy allocator.
diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index c5292ee2c852..5cd54d06e262 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -6,3 +6,4 @@ obj-y			+= host1x/ drm/ vga/ tests/
 obj-$(CONFIG_IMX_IPUV3_CORE)	+= ipu-v3/
 obj-$(CONFIG_TRACE_GPU_MEM)		+= trace/
 obj-$(CONFIG_NOVA_CORE)		+= nova-core/
+obj-$(CONFIG_GPU_BUDDY)		+= buddy.o
diff --git a/drivers/gpu/buddy.c b/drivers/gpu/buddy.c
index 4cc63d961d26..603c59a2013a 100644
--- a/drivers/gpu/buddy.c
+++ b/drivers/gpu/buddy.c
@@ -11,27 +11,17 @@
 #include <linux/sizes.h>
 
 #include <linux/gpu_buddy.h>
-#include <drm/drm_print.h>
-
-enum drm_buddy_free_tree {
-	DRM_BUDDY_CLEAR_TREE = 0,
-	DRM_BUDDY_DIRTY_TREE,
-	DRM_BUDDY_MAX_FREE_TREES,
-};
 
 static struct kmem_cache *slab_blocks;
 
-#define for_each_free_tree(tree) \
-	for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++)
-
-static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
-					       struct drm_buddy_block *parent,
+static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm,
+					       struct gpu_buddy_block *parent,
 					       unsigned int order,
 					       u64 offset)
 {
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 
-	BUG_ON(order > DRM_BUDDY_MAX_ORDER);
+	BUG_ON(order > GPU_BUDDY_MAX_ORDER);
 
 	block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL);
 	if (!block)
@@ -43,30 +33,30 @@ static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
 
 	RB_CLEAR_NODE(&block->rb);
 
-	BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
+	BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED);
 	return block;
 }
 
-static void drm_block_free(struct drm_buddy *mm,
-			   struct drm_buddy_block *block)
+static void gpu_block_free(struct gpu_buddy *mm,
+			   struct gpu_buddy_block *block)
 {
 	kmem_cache_free(slab_blocks, block);
 }
 
-static enum drm_buddy_free_tree
-get_block_tree(struct drm_buddy_block *block)
+static enum gpu_buddy_free_tree
+get_block_tree(struct gpu_buddy_block *block)
 {
-	return drm_buddy_block_is_clear(block) ?
-	       DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
+	return gpu_buddy_block_is_clear(block) ?
+	       GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
 }
 
-static struct drm_buddy_block *
+static struct gpu_buddy_block *
 rbtree_get_free_block(const struct rb_node *node)
 {
-	return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
+	return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL;
 }
 
-static struct drm_buddy_block *
+static struct gpu_buddy_block *
 rbtree_last_free_block(struct rb_root *root)
 {
 	return rbtree_get_free_block(rb_last(root));
@@ -77,33 +67,33 @@ static bool rbtree_is_empty(struct rb_root *root)
 	return RB_EMPTY_ROOT(root);
 }
 
-static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block,
-					const struct drm_buddy_block *node)
+static bool gpu_buddy_block_offset_less(const struct gpu_buddy_block *block,
+					const struct gpu_buddy_block *node)
 {
-	return drm_buddy_block_offset(block) < drm_buddy_block_offset(node);
+	return gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node);
 }
 
 static bool rbtree_block_offset_less(struct rb_node *block,
 				     const struct rb_node *node)
 {
-	return drm_buddy_block_offset_less(rbtree_get_free_block(block),
+	return gpu_buddy_block_offset_less(rbtree_get_free_block(block),
 					   rbtree_get_free_block(node));
 }
 
-static void rbtree_insert(struct drm_buddy *mm,
-			  struct drm_buddy_block *block,
-			  enum drm_buddy_free_tree tree)
+static void rbtree_insert(struct gpu_buddy *mm,
+			  struct gpu_buddy_block *block,
+			  enum gpu_buddy_free_tree tree)
 {
 	rb_add(&block->rb,
-	       &mm->free_trees[tree][drm_buddy_block_order(block)],
+	       &mm->free_trees[tree][gpu_buddy_block_order(block)],
 	       rbtree_block_offset_less);
 }
 
-static void rbtree_remove(struct drm_buddy *mm,
-			  struct drm_buddy_block *block)
+static void rbtree_remove(struct gpu_buddy *mm,
+			  struct gpu_buddy_block *block)
 {
-	unsigned int order = drm_buddy_block_order(block);
-	enum drm_buddy_free_tree tree;
+	unsigned int order = gpu_buddy_block_order(block);
+	enum gpu_buddy_free_tree tree;
 	struct rb_root *root;
 
 	tree = get_block_tree(block);
@@ -113,42 +103,42 @@ static void rbtree_remove(struct drm_buddy *mm,
 	RB_CLEAR_NODE(&block->rb);
 }
 
-static void clear_reset(struct drm_buddy_block *block)
+static void clear_reset(struct gpu_buddy_block *block)
 {
-	block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+	block->header &= ~GPU_BUDDY_HEADER_CLEAR;
 }
 
-static void mark_cleared(struct drm_buddy_block *block)
+static void mark_cleared(struct gpu_buddy_block *block)
 {
-	block->header |= DRM_BUDDY_HEADER_CLEAR;
+	block->header |= GPU_BUDDY_HEADER_CLEAR;
 }
 
-static void mark_allocated(struct drm_buddy *mm,
-			   struct drm_buddy_block *block)
+static void mark_allocated(struct gpu_buddy *mm,
+			   struct gpu_buddy_block *block)
 {
-	block->header &= ~DRM_BUDDY_HEADER_STATE;
-	block->header |= DRM_BUDDY_ALLOCATED;
+	block->header &= ~GPU_BUDDY_HEADER_STATE;
+	block->header |= GPU_BUDDY_ALLOCATED;
 
 	rbtree_remove(mm, block);
 }
 
-static void mark_free(struct drm_buddy *mm,
-		      struct drm_buddy_block *block)
+static void mark_free(struct gpu_buddy *mm,
+		      struct gpu_buddy_block *block)
 {
-	enum drm_buddy_free_tree tree;
+	enum gpu_buddy_free_tree tree;
 
-	block->header &= ~DRM_BUDDY_HEADER_STATE;
-	block->header |= DRM_BUDDY_FREE;
+	block->header &= ~GPU_BUDDY_HEADER_STATE;
+	block->header |= GPU_BUDDY_FREE;
 
 	tree = get_block_tree(block);
 	rbtree_insert(mm, block, tree);
 }
 
-static void mark_split(struct drm_buddy *mm,
-		       struct drm_buddy_block *block)
+static void mark_split(struct gpu_buddy *mm,
+		       struct gpu_buddy_block *block)
 {
-	block->header &= ~DRM_BUDDY_HEADER_STATE;
-	block->header |= DRM_BUDDY_SPLIT;
+	block->header &= ~GPU_BUDDY_HEADER_STATE;
+	block->header |= GPU_BUDDY_SPLIT;
 
 	rbtree_remove(mm, block);
 }
@@ -163,10 +153,10 @@ static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
 	return s1 <= s2 && e1 >= e2;
 }
 
-static struct drm_buddy_block *
-__get_buddy(struct drm_buddy_block *block)
+static struct gpu_buddy_block *
+__get_buddy(struct gpu_buddy_block *block)
 {
-	struct drm_buddy_block *parent;
+	struct gpu_buddy_block *parent;
 
 	parent = block->parent;
 	if (!parent)
@@ -178,19 +168,19 @@ __get_buddy(struct drm_buddy_block *block)
 	return parent->left;
 }
 
-static unsigned int __drm_buddy_free(struct drm_buddy *mm,
-				     struct drm_buddy_block *block,
+static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
+				     struct gpu_buddy_block *block,
 				     bool force_merge)
 {
-	struct drm_buddy_block *parent;
+	struct gpu_buddy_block *parent;
 	unsigned int order;
 
 	while ((parent = block->parent)) {
-		struct drm_buddy_block *buddy;
+		struct gpu_buddy_block *buddy;
 
 		buddy = __get_buddy(block);
 
-		if (!drm_buddy_block_is_free(buddy))
+		if (!gpu_buddy_block_is_free(buddy))
 			break;
 
 		if (!force_merge) {
@@ -198,31 +188,31 @@ static unsigned int __drm_buddy_free(struct drm_buddy *mm,
 			 * Check the block and its buddy clear state and exit
 			 * the loop if they both have the dissimilar state.
 			 */
-			if (drm_buddy_block_is_clear(block) !=
-			    drm_buddy_block_is_clear(buddy))
+			if (gpu_buddy_block_is_clear(block) !=
+			    gpu_buddy_block_is_clear(buddy))
 				break;
 
-			if (drm_buddy_block_is_clear(block))
+			if (gpu_buddy_block_is_clear(block))
 				mark_cleared(parent);
 		}
 
 		rbtree_remove(mm, buddy);
-		if (force_merge && drm_buddy_block_is_clear(buddy))
-			mm->clear_avail -= drm_buddy_block_size(mm, buddy);
+		if (force_merge && gpu_buddy_block_is_clear(buddy))
+			mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
 
-		drm_block_free(mm, block);
-		drm_block_free(mm, buddy);
+		gpu_block_free(mm, block);
+		gpu_block_free(mm, buddy);
 
 		block = parent;
 	}
 
-	order = drm_buddy_block_order(block);
+	order = gpu_buddy_block_order(block);
 	mark_free(mm, block);
 
 	return order;
 }
 
-static int __force_merge(struct drm_buddy *mm,
+static int __force_merge(struct gpu_buddy *mm,
 			 u64 start,
 			 u64 end,
 			 unsigned int min_order)
@@ -241,7 +231,7 @@ static int __force_merge(struct drm_buddy *mm,
 			struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
 
 			while (iter) {
-				struct drm_buddy_block *block, *buddy;
+				struct gpu_buddy_block *block, *buddy;
 				u64 block_start, block_end;
 
 				block = rbtree_get_free_block(iter);
@@ -250,18 +240,18 @@ static int __force_merge(struct drm_buddy *mm,
 				if (!block || !block->parent)
 					continue;
 
-				block_start = drm_buddy_block_offset(block);
-				block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+				block_start = gpu_buddy_block_offset(block);
+				block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
 
 				if (!contains(start, end, block_start, block_end))
 					continue;
 
 				buddy = __get_buddy(block);
-				if (!drm_buddy_block_is_free(buddy))
+				if (!gpu_buddy_block_is_free(buddy))
 					continue;
 
-				WARN_ON(drm_buddy_block_is_clear(block) ==
-					drm_buddy_block_is_clear(buddy));
+				WARN_ON(gpu_buddy_block_is_clear(block) ==
+					gpu_buddy_block_is_clear(buddy));
 
 				/*
 				 * Advance to the next node when the current node is the buddy,
@@ -271,10 +261,10 @@ static int __force_merge(struct drm_buddy *mm,
 					iter = rb_prev(iter);
 
 				rbtree_remove(mm, block);
-				if (drm_buddy_block_is_clear(block))
-					mm->clear_avail -= drm_buddy_block_size(mm, block);
+				if (gpu_buddy_block_is_clear(block))
+					mm->clear_avail -= gpu_buddy_block_size(mm, block);
 
-				order = __drm_buddy_free(mm, block, true);
+				order = __gpu_buddy_free(mm, block, true);
 				if (order >= min_order)
 					return 0;
 			}
@@ -285,9 +275,9 @@ static int __force_merge(struct drm_buddy *mm,
 }
 
 /**
- * drm_buddy_init - init memory manager
+ * gpu_buddy_init - init memory manager
  *
- * @mm: DRM buddy manager to initialize
+ * @mm: GPU buddy manager to initialize
  * @size: size in bytes to manage
  * @chunk_size: minimum page size in bytes for our allocations
  *
@@ -296,7 +286,7 @@ static int __force_merge(struct drm_buddy *mm,
  * Returns:
  * 0 on success, error code on failure.
  */
-int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
+int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
 {
 	unsigned int i, j, root_count = 0;
 	u64 offset = 0;
@@ -318,9 +308,9 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
 	mm->chunk_size = chunk_size;
 	mm->max_order = ilog2(size) - ilog2(chunk_size);
 
-	BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
+	BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER);
 
-	mm->free_trees = kmalloc_array(DRM_BUDDY_MAX_FREE_TREES,
+	mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
 				       sizeof(*mm->free_trees),
 				       GFP_KERNEL);
 	if (!mm->free_trees)
@@ -340,7 +330,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
 	mm->n_roots = hweight64(size);
 
 	mm->roots = kmalloc_array(mm->n_roots,
-				  sizeof(struct drm_buddy_block *),
+				  sizeof(struct gpu_buddy_block *),
 				  GFP_KERNEL);
 	if (!mm->roots)
 		goto out_free_tree;
@@ -350,21 +340,21 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
 	 * not itself a power-of-two.
 	 */
 	do {
-		struct drm_buddy_block *root;
+		struct gpu_buddy_block *root;
 		unsigned int order;
 		u64 root_size;
 
 		order = ilog2(size) - ilog2(chunk_size);
 		root_size = chunk_size << order;
 
-		root = drm_block_alloc(mm, NULL, order, offset);
+		root = gpu_block_alloc(mm, NULL, order, offset);
 		if (!root)
 			goto out_free_roots;
 
 		mark_free(mm, root);
 
 		BUG_ON(root_count > mm->max_order);
-		BUG_ON(drm_buddy_block_size(mm, root) < chunk_size);
+		BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size);
 
 		mm->roots[root_count] = root;
 
@@ -377,7 +367,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
 
 out_free_roots:
 	while (root_count--)
-		drm_block_free(mm, mm->roots[root_count]);
+		gpu_block_free(mm, mm->roots[root_count]);
 	kfree(mm->roots);
 out_free_tree:
 	while (i--)
@@ -385,16 +375,16 @@ out_free_tree:
 	kfree(mm->free_trees);
 	return -ENOMEM;
 }
-EXPORT_SYMBOL(drm_buddy_init);
+EXPORT_SYMBOL(gpu_buddy_init);
 
 /**
- * drm_buddy_fini - tear down the memory manager
+ * gpu_buddy_fini - tear down the memory manager
  *
- * @mm: DRM buddy manager to free
+ * @mm: GPU buddy manager to free
  *
  * Cleanup memory manager resources and the freetree
  */
-void drm_buddy_fini(struct drm_buddy *mm)
+void gpu_buddy_fini(struct gpu_buddy *mm)
 {
 	u64 root_size, size, start;
 	unsigned int order;
@@ -404,13 +394,13 @@ void drm_buddy_fini(struct drm_buddy *mm)
 
 	for (i = 0; i < mm->n_roots; ++i) {
 		order = ilog2(size) - ilog2(mm->chunk_size);
-		start = drm_buddy_block_offset(mm->roots[i]);
+		start = gpu_buddy_block_offset(mm->roots[i]);
 		__force_merge(mm, start, start + size, order);
 
-		if (WARN_ON(!drm_buddy_block_is_free(mm->roots[i])))
+		if (WARN_ON(!gpu_buddy_block_is_free(mm->roots[i])))
 			kunit_fail_current_test("buddy_fini() root");
 
-		drm_block_free(mm, mm->roots[i]);
+		gpu_block_free(mm, mm->roots[i]);
 
 		root_size = mm->chunk_size << order;
 		size -= root_size;
@@ -423,31 +413,31 @@ void drm_buddy_fini(struct drm_buddy *mm)
 	kfree(mm->free_trees);
 	kfree(mm->roots);
 }
-EXPORT_SYMBOL(drm_buddy_fini);
+EXPORT_SYMBOL(gpu_buddy_fini);
 
-static int split_block(struct drm_buddy *mm,
-		       struct drm_buddy_block *block)
+static int split_block(struct gpu_buddy *mm,
+		       struct gpu_buddy_block *block)
 {
-	unsigned int block_order = drm_buddy_block_order(block) - 1;
-	u64 offset = drm_buddy_block_offset(block);
+	unsigned int block_order = gpu_buddy_block_order(block) - 1;
+	u64 offset = gpu_buddy_block_offset(block);
 
-	BUG_ON(!drm_buddy_block_is_free(block));
-	BUG_ON(!drm_buddy_block_order(block));
+	BUG_ON(!gpu_buddy_block_is_free(block));
+	BUG_ON(!gpu_buddy_block_order(block));
 
-	block->left = drm_block_alloc(mm, block, block_order, offset);
+	block->left = gpu_block_alloc(mm, block, block_order, offset);
 	if (!block->left)
 		return -ENOMEM;
 
-	block->right = drm_block_alloc(mm, block, block_order,
+	block->right = gpu_block_alloc(mm, block, block_order,
 				       offset + (mm->chunk_size << block_order));
 	if (!block->right) {
-		drm_block_free(mm, block->left);
+		gpu_block_free(mm, block->left);
 		return -ENOMEM;
 	}
 
 	mark_split(mm, block);
 
-	if (drm_buddy_block_is_clear(block)) {
+	if (gpu_buddy_block_is_clear(block)) {
 		mark_cleared(block->left);
 		mark_cleared(block->right);
 		clear_reset(block);
@@ -460,34 +450,34 @@ static int split_block(struct drm_buddy *mm,
 }
 
 /**
- * drm_get_buddy - get buddy address
+ * gpu_get_buddy - get buddy address
  *
- * @block: DRM buddy block
+ * @block: GPU buddy block
  *
  * Returns the corresponding buddy block for @block, or NULL
  * if this is a root block and can't be merged further.
  * Requires some kind of locking to protect against
  * any concurrent allocate and free operations.
  */
-struct drm_buddy_block *
-drm_get_buddy(struct drm_buddy_block *block)
+struct gpu_buddy_block *
+gpu_get_buddy(struct gpu_buddy_block *block)
 {
 	return __get_buddy(block);
 }
-EXPORT_SYMBOL(drm_get_buddy);
+EXPORT_SYMBOL(gpu_get_buddy);
 
 /**
- * drm_buddy_reset_clear - reset blocks clear state
+ * gpu_buddy_reset_clear - reset blocks clear state
  *
- * @mm: DRM buddy manager
+ * @mm: GPU buddy manager
  * @is_clear: blocks clear state
  *
  * Reset the clear state based on @is_clear value for each block
  * in the freetree.
  */
-void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
+void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear)
 {
-	enum drm_buddy_free_tree src_tree, dst_tree;
+	enum gpu_buddy_free_tree src_tree, dst_tree;
 	u64 root_size, size, start;
 	unsigned int order;
 	int i;
@@ -495,60 +485,60 @@ void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
 	size = mm->size;
 	for (i = 0; i < mm->n_roots; ++i) {
 		order = ilog2(size) - ilog2(mm->chunk_size);
-		start = drm_buddy_block_offset(mm->roots[i]);
+		start = gpu_buddy_block_offset(mm->roots[i]);
 		__force_merge(mm, start, start + size, order);
 
 		root_size = mm->chunk_size << order;
 		size -= root_size;
 	}
 
-	src_tree = is_clear ? DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
-	dst_tree = is_clear ? DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
+	src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
+	dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
 
 	for (i = 0; i <= mm->max_order; ++i) {
 		struct rb_root *root = &mm->free_trees[src_tree][i];
-		struct drm_buddy_block *block, *tmp;
+		struct gpu_buddy_block *block, *tmp;
 
 		rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
 			rbtree_remove(mm, block);
 			if (is_clear) {
 				mark_cleared(block);
-				mm->clear_avail += drm_buddy_block_size(mm, block);
+				mm->clear_avail += gpu_buddy_block_size(mm, block);
 			} else {
 				clear_reset(block);
-				mm->clear_avail -= drm_buddy_block_size(mm, block);
+				mm->clear_avail -= gpu_buddy_block_size(mm, block);
 			}
 
 			rbtree_insert(mm, block, dst_tree);
 		}
 	}
 }
-EXPORT_SYMBOL(drm_buddy_reset_clear);
+EXPORT_SYMBOL(gpu_buddy_reset_clear);
 
 /**
- * drm_buddy_free_block - free a block
+ * gpu_buddy_free_block - free a block
  *
- * @mm: DRM buddy manager
+ * @mm: GPU buddy manager
  * @block: block to be freed
  */
-void drm_buddy_free_block(struct drm_buddy *mm,
-			  struct drm_buddy_block *block)
+void gpu_buddy_free_block(struct gpu_buddy *mm,
+			  struct gpu_buddy_block *block)
 {
-	BUG_ON(!drm_buddy_block_is_allocated(block));
-	mm->avail += drm_buddy_block_size(mm, block);
-	if (drm_buddy_block_is_clear(block))
-		mm->clear_avail += drm_buddy_block_size(mm, block);
+	BUG_ON(!gpu_buddy_block_is_allocated(block));
+	mm->avail += gpu_buddy_block_size(mm, block);
+	if (gpu_buddy_block_is_clear(block))
+		mm->clear_avail += gpu_buddy_block_size(mm, block);
 
-	__drm_buddy_free(mm, block, false);
+	__gpu_buddy_free(mm, block, false);
 }
-EXPORT_SYMBOL(drm_buddy_free_block);
+EXPORT_SYMBOL(gpu_buddy_free_block);
 
-static void __drm_buddy_free_list(struct drm_buddy *mm,
+static void __gpu_buddy_free_list(struct gpu_buddy *mm,
 				  struct list_head *objects,
 				  bool mark_clear,
 				  bool mark_dirty)
 {
-	struct drm_buddy_block *block, *on;
+	struct gpu_buddy_block *block, *on;
 
 	WARN_ON(mark_dirty && mark_clear);
 
@@ -557,13 +547,13 @@ static void __drm_buddy_free_list(struct drm_buddy *mm,
 			mark_cleared(block);
 		else if (mark_dirty)
 			clear_reset(block);
-		drm_buddy_free_block(mm, block);
+		gpu_buddy_free_block(mm, block);
 		cond_resched();
 	}
 	INIT_LIST_HEAD(objects);
 }
 
-static void drm_buddy_free_list_internal(struct drm_buddy *mm,
+static void gpu_buddy_free_list_internal(struct gpu_buddy *mm,
 					 struct list_head *objects)
 {
 	/*
@@ -571,43 +561,43 @@ static void drm_buddy_free_list_internal(struct drm_buddy *mm,
 	 * at this point. For example we might have just failed part of the
 	 * allocation.
 	 */
-	__drm_buddy_free_list(mm, objects, false, false);
+	__gpu_buddy_free_list(mm, objects, false, false);
 }
 
 /**
- * drm_buddy_free_list - free blocks
+ * gpu_buddy_free_list - free blocks
  *
- * @mm: DRM buddy manager
+ * @mm: GPU buddy manager
  * @objects: input list head to free blocks
- * @flags: optional flags like DRM_BUDDY_CLEARED
+ * @flags: optional flags like GPU_BUDDY_CLEARED
  */
-void drm_buddy_free_list(struct drm_buddy *mm,
+void gpu_buddy_free_list(struct gpu_buddy *mm,
 			 struct list_head *objects,
 			 unsigned int flags)
 {
-	bool mark_clear = flags & DRM_BUDDY_CLEARED;
+	bool mark_clear = flags & GPU_BUDDY_CLEARED;
 
-	__drm_buddy_free_list(mm, objects, mark_clear, !mark_clear);
+	__gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear);
 }
-EXPORT_SYMBOL(drm_buddy_free_list);
+EXPORT_SYMBOL(gpu_buddy_free_list);
 
-static bool block_incompatible(struct drm_buddy_block *block, unsigned int flags)
+static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags)
 {
-	bool needs_clear = flags & DRM_BUDDY_CLEAR_ALLOCATION;
+	bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION;
 
-	return needs_clear != drm_buddy_block_is_clear(block);
+	return needs_clear != gpu_buddy_block_is_clear(block);
 }
 
-static struct drm_buddy_block *
-__alloc_range_bias(struct drm_buddy *mm,
+static struct gpu_buddy_block *
+__alloc_range_bias(struct gpu_buddy *mm,
 		   u64 start, u64 end,
 		   unsigned int order,
 		   unsigned long flags,
 		   bool fallback)
 {
 	u64 req_size = mm->chunk_size << order;
-	struct drm_buddy_block *block;
-	struct drm_buddy_block *buddy;
+	struct gpu_buddy_block *block;
+	struct gpu_buddy_block *buddy;
 	LIST_HEAD(dfs);
 	int err;
 	int i;
@@ -622,23 +612,23 @@ __alloc_range_bias(struct drm_buddy *mm,
 		u64 block_end;
 
 		block = list_first_entry_or_null(&dfs,
-						 struct drm_buddy_block,
+						 struct gpu_buddy_block,
 						 tmp_link);
 		if (!block)
 			break;
 
 		list_del(&block->tmp_link);
 
-		if (drm_buddy_block_order(block) < order)
+		if (gpu_buddy_block_order(block) < order)
 			continue;
 
-		block_start = drm_buddy_block_offset(block);
-		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+		block_start = gpu_buddy_block_offset(block);
+		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
 
 		if (!overlaps(start, end, block_start, block_end))
 			continue;
 
-		if (drm_buddy_block_is_allocated(block))
+		if (gpu_buddy_block_is_allocated(block))
 			continue;
 
 		if (block_start < start || block_end > end) {
@@ -654,17 +644,17 @@ __alloc_range_bias(struct drm_buddy *mm,
 			continue;
 
 		if (contains(start, end, block_start, block_end) &&
-		    order == drm_buddy_block_order(block)) {
+		    order == gpu_buddy_block_order(block)) {
 			/*
 			 * Find the free block within the range.
 			 */
-			if (drm_buddy_block_is_free(block))
+			if (gpu_buddy_block_is_free(block))
 				return block;
 
 			continue;
 		}
 
-		if (!drm_buddy_block_is_split(block)) {
+		if (!gpu_buddy_block_is_split(block)) {
 			err = split_block(mm, block);
 			if (unlikely(err))
 				goto err_undo;
@@ -684,19 +674,19 @@ err_undo:
 	 */
 	buddy = __get_buddy(block);
 	if (buddy &&
-	    (drm_buddy_block_is_free(block) &&
-	     drm_buddy_block_is_free(buddy)))
-		__drm_buddy_free(mm, block, false);
+	    (gpu_buddy_block_is_free(block) &&
+	     gpu_buddy_block_is_free(buddy)))
+		__gpu_buddy_free(mm, block, false);
 	return ERR_PTR(err);
 }
 
-static struct drm_buddy_block *
-__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
+static struct gpu_buddy_block *
+__gpu_buddy_alloc_range_bias(struct gpu_buddy *mm,
 			     u64 start, u64 end,
 			     unsigned int order,
 			     unsigned long flags)
 {
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	bool fallback = false;
 
 	block = __alloc_range_bias(mm, start, end, order,
@@ -708,12 +698,12 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
 	return block;
 }
 
-static struct drm_buddy_block *
-get_maxblock(struct drm_buddy *mm,
+static struct gpu_buddy_block *
+get_maxblock(struct gpu_buddy *mm,
 	     unsigned int order,
-	     enum drm_buddy_free_tree tree)
+	     enum gpu_buddy_free_tree tree)
 {
-	struct drm_buddy_block *max_block = NULL, *block = NULL;
+	struct gpu_buddy_block *max_block = NULL, *block = NULL;
 	struct rb_root *root;
 	unsigned int i;
 
@@ -728,8 +718,8 @@ get_maxblock(struct drm_buddy *mm,
 			continue;
 		}
 
-		if (drm_buddy_block_offset(block) >
-		    drm_buddy_block_offset(max_block)) {
+		if (gpu_buddy_block_offset(block) >
+		    gpu_buddy_block_offset(max_block)) {
 			max_block = block;
 		}
 	}
@@ -737,25 +727,25 @@ get_maxblock(struct drm_buddy *mm,
 	return max_block;
 }
 
-static struct drm_buddy_block *
-alloc_from_freetree(struct drm_buddy *mm,
+static struct gpu_buddy_block *
+alloc_from_freetree(struct gpu_buddy *mm,
 		    unsigned int order,
 		    unsigned long flags)
 {
-	struct drm_buddy_block *block = NULL;
+	struct gpu_buddy_block *block = NULL;
 	struct rb_root *root;
-	enum drm_buddy_free_tree tree;
+	enum gpu_buddy_free_tree tree;
 	unsigned int tmp;
 	int err;
 
-	tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ?
-		DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
+	tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
+		GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
 
-	if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
+	if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) {
 		block = get_maxblock(mm, order, tree);
 		if (block)
 			/* Store the obtained block order */
-			tmp = drm_buddy_block_order(block);
+			tmp = gpu_buddy_block_order(block);
 	} else {
 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
 			/* Get RB tree root for this order and tree */
@@ -768,8 +758,8 @@ alloc_from_freetree(struct drm_buddy *mm,
 
 	if (!block) {
 		/* Try allocating from the other tree */
-		tree = (tree == DRM_BUDDY_CLEAR_TREE) ?
-			DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
+		tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
+			GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
 
 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
 			root = &mm->free_trees[tree][tmp];
@@ -782,7 +772,7 @@ alloc_from_freetree(struct drm_buddy *mm,
 			return ERR_PTR(-ENOSPC);
 	}
 
-	BUG_ON(!drm_buddy_block_is_free(block));
+	BUG_ON(!gpu_buddy_block_is_free(block));
 
 	while (tmp != order) {
 		err = split_block(mm, block);
@@ -796,18 +786,18 @@ alloc_from_freetree(struct drm_buddy *mm,
 
 err_undo:
 	if (tmp != order)
-		__drm_buddy_free(mm, block, false);
+		__gpu_buddy_free(mm, block, false);
 	return ERR_PTR(err);
 }
 
-static int __alloc_range(struct drm_buddy *mm,
+static int __alloc_range(struct gpu_buddy *mm,
 			 struct list_head *dfs,
 			 u64 start, u64 size,
 			 struct list_head *blocks,
 			 u64 *total_allocated_on_err)
 {
-	struct drm_buddy_block *block;
-	struct drm_buddy_block *buddy;
+	struct gpu_buddy_block *block;
+	struct gpu_buddy_block *buddy;
 	u64 total_allocated = 0;
 	LIST_HEAD(allocated);
 	u64 end;
@@ -820,31 +810,31 @@ static int __alloc_range(struct drm_buddy *mm,
 		u64 block_end;
 
 		block = list_first_entry_or_null(dfs,
-						 struct drm_buddy_block,
+						 struct gpu_buddy_block,
 						 tmp_link);
 		if (!block)
 			break;
 
 		list_del(&block->tmp_link);
 
-		block_start = drm_buddy_block_offset(block);
-		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+		block_start = gpu_buddy_block_offset(block);
+		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
 
 		if (!overlaps(start, end, block_start, block_end))
 			continue;
 
-		if (drm_buddy_block_is_allocated(block)) {
+		if (gpu_buddy_block_is_allocated(block)) {
 			err = -ENOSPC;
 			goto err_free;
 		}
 
 		if (contains(start, end, block_start, block_end)) {
-			if (drm_buddy_block_is_free(block)) {
+			if (gpu_buddy_block_is_free(block)) {
 				mark_allocated(mm, block);
-				total_allocated += drm_buddy_block_size(mm, block);
-				mm->avail -= drm_buddy_block_size(mm, block);
-				if (drm_buddy_block_is_clear(block))
-					mm->clear_avail -= drm_buddy_block_size(mm, block);
+				total_allocated += gpu_buddy_block_size(mm, block);
+				mm->avail -= gpu_buddy_block_size(mm, block);
+				if (gpu_buddy_block_is_clear(block))
+					mm->clear_avail -= gpu_buddy_block_size(mm, block);
 				list_add_tail(&block->link, &allocated);
 				continue;
 			} else if (!mm->clear_avail) {
@@ -853,7 +843,7 @@ static int __alloc_range(struct drm_buddy *mm,
 			}
 		}
 
-		if (!drm_buddy_block_is_split(block)) {
+		if (!gpu_buddy_block_is_split(block)) {
 			err = split_block(mm, block);
 			if (unlikely(err))
 				goto err_undo;
@@ -880,22 +870,22 @@ err_undo:
 	 */
 	buddy = __get_buddy(block);
 	if (buddy &&
-	    (drm_buddy_block_is_free(block) &&
-	     drm_buddy_block_is_free(buddy)))
-		__drm_buddy_free(mm, block, false);
+	    (gpu_buddy_block_is_free(block) &&
+	     gpu_buddy_block_is_free(buddy)))
+		__gpu_buddy_free(mm, block, false);
 
 err_free:
 	if (err == -ENOSPC && total_allocated_on_err) {
 		list_splice_tail(&allocated, blocks);
 		*total_allocated_on_err = total_allocated;
 	} else {
-		drm_buddy_free_list_internal(mm, &allocated);
+		gpu_buddy_free_list_internal(mm, &allocated);
 	}
 
 	return err;
 }
 
-static int __drm_buddy_alloc_range(struct drm_buddy *mm,
+static int __gpu_buddy_alloc_range(struct gpu_buddy *mm,
 				   u64 start,
 				   u64 size,
 				   u64 *total_allocated_on_err,
@@ -911,13 +901,13 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm,
 			     blocks, total_allocated_on_err);
 }
 
-static int __alloc_contig_try_harder(struct drm_buddy *mm,
+static int __alloc_contig_try_harder(struct gpu_buddy *mm,
 				     u64 size,
 				     u64 min_block_size,
 				     struct list_head *blocks)
 {
 	u64 rhs_offset, lhs_offset, lhs_size, filled;
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	unsigned int tree, order;
 	LIST_HEAD(blocks_lhs);
 	unsigned long pages;
@@ -943,8 +933,8 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
 			block = rbtree_get_free_block(iter);
 
 			/* Allocate blocks traversing RHS */
-			rhs_offset = drm_buddy_block_offset(block);
-			err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
+			rhs_offset = gpu_buddy_block_offset(block);
+			err =  __gpu_buddy_alloc_range(mm, rhs_offset, size,
 						       &filled, blocks);
 			if (!err || err != -ENOSPC)
 				return err;
@@ -954,18 +944,18 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
 				lhs_size = round_up(lhs_size, min_block_size);
 
 			/* Allocate blocks traversing LHS */
-			lhs_offset = drm_buddy_block_offset(block) - lhs_size;
-			err =  __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
+			lhs_offset = gpu_buddy_block_offset(block) - lhs_size;
+			err =  __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size,
 						       NULL, &blocks_lhs);
 			if (!err) {
 				list_splice(&blocks_lhs, blocks);
 				return 0;
 			} else if (err != -ENOSPC) {
-				drm_buddy_free_list_internal(mm, blocks);
+				gpu_buddy_free_list_internal(mm, blocks);
 				return err;
 			}
 			/* Free blocks for the next iteration */
-			drm_buddy_free_list_internal(mm, blocks);
+			gpu_buddy_free_list_internal(mm, blocks);
 
 			iter = rb_prev(iter);
 		}
@@ -975,9 +965,9 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
 }
 
 /**
- * drm_buddy_block_trim - free unused pages
+ * gpu_buddy_block_trim - free unused pages
  *
- * @mm: DRM buddy manager
+ * @mm: GPU buddy manager
  * @start: start address to begin the trimming.
  * @new_size: original size requested
  * @blocks: Input and output list of allocated blocks.
@@ -993,13 +983,13 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
  * Returns:
  * 0 on success, error code on failure.
  */
-int drm_buddy_block_trim(struct drm_buddy *mm,
+int gpu_buddy_block_trim(struct gpu_buddy *mm,
 			 u64 *start,
 			 u64 new_size,
 			 struct list_head *blocks)
 {
-	struct drm_buddy_block *parent;
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *parent;
+	struct gpu_buddy_block *block;
 	u64 block_start, block_end;
 	LIST_HEAD(dfs);
 	u64 new_start;
@@ -1009,22 +999,22 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
 		return -EINVAL;
 
 	block = list_first_entry(blocks,
-				 struct drm_buddy_block,
+				 struct gpu_buddy_block,
 				 link);
 
-	block_start = drm_buddy_block_offset(block);
-	block_end = block_start + drm_buddy_block_size(mm, block);
+	block_start = gpu_buddy_block_offset(block);
+	block_end = block_start + gpu_buddy_block_size(mm, block);
 
-	if (WARN_ON(!drm_buddy_block_is_allocated(block)))
+	if (WARN_ON(!gpu_buddy_block_is_allocated(block)))
 		return -EINVAL;
 
-	if (new_size > drm_buddy_block_size(mm, block))
+	if (new_size > gpu_buddy_block_size(mm, block))
 		return -EINVAL;
 
 	if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
 		return -EINVAL;
 
-	if (new_size == drm_buddy_block_size(mm, block))
+	if (new_size == gpu_buddy_block_size(mm, block))
 		return 0;
 
 	new_start = block_start;
@@ -1043,9 +1033,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
 
 	list_del(&block->link);
 	mark_free(mm, block);
-	mm->avail += drm_buddy_block_size(mm, block);
-	if (drm_buddy_block_is_clear(block))
-		mm->clear_avail += drm_buddy_block_size(mm, block);
+	mm->avail += gpu_buddy_block_size(mm, block);
+	if (gpu_buddy_block_is_clear(block))
+		mm->clear_avail += gpu_buddy_block_size(mm, block);
 
 	/* Prevent recursively freeing this node */
 	parent = block->parent;
@@ -1055,26 +1045,26 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
 	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
 	if (err) {
 		mark_allocated(mm, block);
-		mm->avail -= drm_buddy_block_size(mm, block);
-		if (drm_buddy_block_is_clear(block))
-			mm->clear_avail -= drm_buddy_block_size(mm, block);
+		mm->avail -= gpu_buddy_block_size(mm, block);
+		if (gpu_buddy_block_is_clear(block))
+			mm->clear_avail -= gpu_buddy_block_size(mm, block);
 		list_add(&block->link, blocks);
 	}
 
 	block->parent = parent;
 	return err;
 }
-EXPORT_SYMBOL(drm_buddy_block_trim);
+EXPORT_SYMBOL(gpu_buddy_block_trim);
 
-static struct drm_buddy_block *
-__drm_buddy_alloc_blocks(struct drm_buddy *mm,
+static struct gpu_buddy_block *
+__gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
 			 u64 start, u64 end,
 			 unsigned int order,
 			 unsigned long flags)
 {
-	if (flags & DRM_BUDDY_RANGE_ALLOCATION)
+	if (flags & GPU_BUDDY_RANGE_ALLOCATION)
 		/* Allocate traversing within the range */
-		return  __drm_buddy_alloc_range_bias(mm, start, end,
+		return  __gpu_buddy_alloc_range_bias(mm, start, end,
 						     order, flags);
 	else
 		/* Allocate from freetree */
@@ -1082,15 +1072,15 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
 }
 
 /**
- * drm_buddy_alloc_blocks - allocate power-of-two blocks
+ * gpu_buddy_alloc_blocks - allocate power-of-two blocks
  *
- * @mm: DRM buddy manager to allocate from
+ * @mm: GPU buddy manager to allocate from
  * @start: start of the allowed range for this block
  * @end: end of the allowed range for this block
  * @size: size of the allocation in bytes
  * @min_block_size: alignment of the allocation
  * @blocks: output list head to add allocated blocks
- * @flags: DRM_BUDDY_*_ALLOCATION flags
+ * @flags: GPU_BUDDY_*_ALLOCATION flags
  *
  * alloc_range_bias() called on range limitations, which traverses
  * the tree and returns the desired block.
@@ -1101,13 +1091,13 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
  * Returns:
  * 0 on success, error code on failure.
  */
-int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
 			   u64 start, u64 end, u64 size,
 			   u64 min_block_size,
 			   struct list_head *blocks,
 			   unsigned long flags)
 {
-	struct drm_buddy_block *block = NULL;
+	struct gpu_buddy_block *block = NULL;
 	u64 original_size, original_min_size;
 	unsigned int min_order, order;
 	LIST_HEAD(allocated);
@@ -1137,14 +1127,14 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 		if (!IS_ALIGNED(start | end, min_block_size))
 			return -EINVAL;
 
-		return __drm_buddy_alloc_range(mm, start, size, NULL, blocks);
+		return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks);
 	}
 
 	original_size = size;
 	original_min_size = min_block_size;
 
 	/* Roundup the size to power of 2 */
-	if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) {
+	if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) {
 		size = roundup_pow_of_two(size);
 		min_block_size = size;
 	/* Align size value to min_block_size */
@@ -1157,8 +1147,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 	min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
 
 	if (order > mm->max_order || size > mm->size) {
-		if ((flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) &&
-		    !(flags & DRM_BUDDY_RANGE_ALLOCATION))
+		if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) &&
+		    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
 			return __alloc_contig_try_harder(mm, original_size,
 							 original_min_size, blocks);
 
@@ -1171,7 +1161,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 		BUG_ON(order < min_order);
 
 		do {
-			block = __drm_buddy_alloc_blocks(mm, start,
+			block = __gpu_buddy_alloc_blocks(mm, start,
 							 end,
 							 order,
 							 flags);
@@ -1182,7 +1172,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 				/* Try allocation through force merge method */
 				if (mm->clear_avail &&
 				    !__force_merge(mm, start, end, min_order)) {
-					block = __drm_buddy_alloc_blocks(mm, start,
+					block = __gpu_buddy_alloc_blocks(mm, start,
 									 end,
 									 min_order,
 									 flags);
@@ -1196,8 +1186,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 				 * Try contiguous block allocation through
 				 * try harder method.
 				 */
-				if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
-				    !(flags & DRM_BUDDY_RANGE_ALLOCATION))
+				if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION &&
+				    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
 					return __alloc_contig_try_harder(mm,
 									 original_size,
 									 original_min_size,
@@ -1208,9 +1198,9 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 		} while (1);
 
 		mark_allocated(mm, block);
-		mm->avail -= drm_buddy_block_size(mm, block);
-		if (drm_buddy_block_is_clear(block))
-			mm->clear_avail -= drm_buddy_block_size(mm, block);
+		mm->avail -= gpu_buddy_block_size(mm, block);
+		if (gpu_buddy_block_is_clear(block))
+			mm->clear_avail -= gpu_buddy_block_size(mm, block);
 		kmemleak_update_trace(block);
 		list_add_tail(&block->link, &allocated);
 
@@ -1221,7 +1211,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 	} while (1);
 
 	/* Trim the allocated block to the required size */
-	if (!(flags & DRM_BUDDY_TRIM_DISABLE) &&
+	if (!(flags & GPU_BUDDY_TRIM_DISABLE) &&
 	    original_size != size) {
 		struct list_head *trim_list;
 		LIST_HEAD(temp);
@@ -1234,11 +1224,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 			block = list_last_entry(&allocated, typeof(*block), link);
 			list_move(&block->link, &temp);
 			trim_list = &temp;
-			trim_size = drm_buddy_block_size(mm, block) -
+			trim_size = gpu_buddy_block_size(mm, block) -
 				(size - original_size);
 		}
 
-		drm_buddy_block_trim(mm,
+		gpu_buddy_block_trim(mm,
 				     NULL,
 				     trim_size,
 				     trim_list);
@@ -1251,44 +1241,42 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 	return 0;
 
 err_free:
-	drm_buddy_free_list_internal(mm, &allocated);
+	gpu_buddy_free_list_internal(mm, &allocated);
 	return err;
 }
-EXPORT_SYMBOL(drm_buddy_alloc_blocks);
+EXPORT_SYMBOL(gpu_buddy_alloc_blocks);
 
 /**
- * drm_buddy_block_print - print block information
+ * gpu_buddy_block_print - print block information
  *
- * @mm: DRM buddy manager
- * @block: DRM buddy block
- * @p: DRM printer to use
+ * @mm: GPU buddy manager
+ * @block: GPU buddy block
  */
-void drm_buddy_block_print(struct drm_buddy *mm,
-			   struct drm_buddy_block *block,
-			   struct drm_printer *p)
+void gpu_buddy_block_print(struct gpu_buddy *mm,
+			   struct gpu_buddy_block *block)
 {
-	u64 start = drm_buddy_block_offset(block);
-	u64 size = drm_buddy_block_size(mm, block);
+	u64 start = gpu_buddy_block_offset(block);
+	u64 size = gpu_buddy_block_size(mm, block);
 
-	drm_printf(p, "%#018llx-%#018llx: %llu\n", start, start + size, size);
+	pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size);
 }
-EXPORT_SYMBOL(drm_buddy_block_print);
+EXPORT_SYMBOL(gpu_buddy_block_print);
 
 /**
- * drm_buddy_print - print allocator state
+ * gpu_buddy_print - print allocator state
  *
- * @mm: DRM buddy manager
- * @p: DRM printer to use
+ * @mm: GPU buddy manager
+ * @p: GPU printer to use
  */
-void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
+void gpu_buddy_print(struct gpu_buddy *mm)
 {
 	int order;
 
-	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
-		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
+	pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
+		mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
 
 	for (order = mm->max_order; order >= 0; order--) {
-		struct drm_buddy_block *block, *tmp;
+		struct gpu_buddy_block *block, *tmp;
 		struct rb_root *root;
 		u64 count = 0, free;
 		unsigned int tree;
@@ -1297,40 +1285,38 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
 			root = &mm->free_trees[tree][order];
 
 			rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
-				BUG_ON(!drm_buddy_block_is_free(block));
+				BUG_ON(!gpu_buddy_block_is_free(block));
 				count++;
 			}
 		}
 
-		drm_printf(p, "order-%2d ", order);
-
 		free = count * (mm->chunk_size << order);
 		if (free < SZ_1M)
-			drm_printf(p, "free: %8llu KiB", free >> 10);
+			pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
+				order, free >> 10, count);
 		else
-			drm_printf(p, "free: %8llu MiB", free >> 20);
-
-		drm_printf(p, ", blocks: %llu\n", count);
+			pr_info("order-%2d free: %8llu MiB, blocks: %llu\n",
+				order, free >> 20, count);
 	}
 }
-EXPORT_SYMBOL(drm_buddy_print);
+EXPORT_SYMBOL(gpu_buddy_print);
 
-static void drm_buddy_module_exit(void)
+static void gpu_buddy_module_exit(void)
 {
 	kmem_cache_destroy(slab_blocks);
 }
 
-static int __init drm_buddy_module_init(void)
+static int __init gpu_buddy_module_init(void)
 {
-	slab_blocks = KMEM_CACHE(drm_buddy_block, 0);
+	slab_blocks = KMEM_CACHE(gpu_buddy_block, 0);
 	if (!slab_blocks)
 		return -ENOMEM;
 
 	return 0;
 }
 
-module_init(drm_buddy_module_init);
-module_exit(drm_buddy_module_exit);
+module_init(gpu_buddy_module_init);
+module_exit(gpu_buddy_module_exit);
 
-MODULE_DESCRIPTION("DRM Buddy Allocator");
+MODULE_DESCRIPTION("GPU Buddy Allocator");
 MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 862ff4000969..758f2eb3d588 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -220,6 +220,7 @@ config DRM_GPUSVM
 config DRM_BUDDY
 	tristate
 	depends on DRM
+	select GPU_BUDDY
 	help
 	  A page based buddy allocator
 
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 892859cfe95f..d0e37f8c2a46 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -114,7 +114,7 @@ drm_gpusvm_helper-$(CONFIG_ZONE_DEVICE) += \
 
 obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o
 
-obj-$(CONFIG_DRM_BUDDY) += ../buddy.o
+obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
 
 drm_dma_helper-y := drm_gem_dma_helper.o
 drm_dma_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_dma.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index f582113d78b7..149f8f942eae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -5663,7 +5663,7 @@ int amdgpu_ras_add_critical_region(struct amdgpu_device *adev,
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	struct amdgpu_vram_mgr_resource *vres;
 	struct ras_critical_region *region;
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	int ret = 0;
 
 	if (!bo || !bo->tbo.resource)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index be2e56ce1355..8908d9e08a30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -55,7 +55,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
 				    uint64_t start, uint64_t size,
 				    struct amdgpu_res_cursor *cur)
 {
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	struct list_head *head, *next;
 	struct drm_mm_node *node;
 
@@ -71,7 +71,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
 		head = &to_amdgpu_vram_mgr_resource(res)->blocks;
 
 		block = list_first_entry_or_null(head,
-						 struct drm_buddy_block,
+						 struct gpu_buddy_block,
 						 link);
 		if (!block)
 			goto fallback;
@@ -81,7 +81,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
 
 			next = block->link.next;
 			if (next != head)
-				block = list_entry(next, struct drm_buddy_block, link);
+				block = list_entry(next, struct gpu_buddy_block, link);
 		}
 
 		cur->start = amdgpu_vram_mgr_block_start(block) + start;
@@ -125,7 +125,7 @@ fallback:
  */
 static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
 {
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	struct drm_mm_node *node;
 	struct list_head *next;
 
@@ -146,7 +146,7 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
 		block = cur->node;
 
 		next = block->link.next;
-		block = list_entry(next, struct drm_buddy_block, link);
+		block = list_entry(next, struct gpu_buddy_block, link);
 
 		cur->node = block;
 		cur->start = amdgpu_vram_mgr_block_start(block);
@@ -175,7 +175,7 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
  */
 static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
 {
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 
 	switch (cur->mem_type) {
 	case TTM_PL_VRAM:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 9d934c07fa6b..cd94f6efb7cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -25,6 +25,7 @@
 #include <linux/dma-mapping.h>
 #include <drm/ttm/ttm_range_manager.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_buddy.h>
 
 #include "amdgpu.h"
 #include "amdgpu_vm.h"
@@ -52,15 +53,15 @@ to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
 	return container_of(mgr, struct amdgpu_device, mman.vram_mgr);
 }
 
-static inline struct drm_buddy_block *
+static inline struct gpu_buddy_block *
 amdgpu_vram_mgr_first_block(struct list_head *list)
 {
-	return list_first_entry_or_null(list, struct drm_buddy_block, link);
+	return list_first_entry_or_null(list, struct gpu_buddy_block, link);
 }
 
 static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
 {
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	u64 start, size;
 
 	block = amdgpu_vram_mgr_first_block(head);
@@ -71,7 +72,7 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
 		start = amdgpu_vram_mgr_block_start(block);
 		size = amdgpu_vram_mgr_block_size(block);
 
-		block = list_entry(block->link.next, struct drm_buddy_block, link);
+		block = list_entry(block->link.next, struct gpu_buddy_block, link);
 		if (start + size != amdgpu_vram_mgr_block_start(block))
 			return false;
 	}
@@ -81,7 +82,7 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
 
 static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head *head)
 {
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	u64 size = 0;
 
 	list_for_each_entry(block, head, link)
@@ -254,7 +255,7 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = {
  * Calculate how many bytes of the DRM BUDDY block are inside visible VRAM
  */
 static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
-				    struct drm_buddy_block *block)
+				    struct gpu_buddy_block *block)
 {
 	u64 start = amdgpu_vram_mgr_block_start(block);
 	u64 end = start + amdgpu_vram_mgr_block_size(block);
@@ -279,7 +280,7 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct ttm_resource *res = bo->tbo.resource;
 	struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	u64 usage = 0;
 
 	if (amdgpu_gmc_vram_full_visible(&adev->gmc))
@@ -299,15 +300,15 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
 {
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
-	struct drm_buddy *mm = &mgr->mm;
+	struct gpu_buddy *mm = &mgr->mm;
 	struct amdgpu_vram_reservation *rsv, *temp;
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	uint64_t vis_usage;
 
 	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) {
-		if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
+		if (gpu_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
 					   rsv->size, mm->chunk_size, &rsv->allocated,
-					   DRM_BUDDY_RANGE_ALLOCATION))
+					   GPU_BUDDY_RANGE_ALLOCATION))
 			continue;
 
 		block = amdgpu_vram_mgr_first_block(&rsv->allocated);
@@ -403,7 +404,7 @@ int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr,
 			uint64_t address, struct amdgpu_vram_block_info *info)
 {
 	struct amdgpu_vram_mgr_resource *vres;
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	u64 start, size;
 	int ret = -ENOENT;
 
@@ -450,8 +451,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	struct amdgpu_vram_mgr_resource *vres;
 	u64 size, remaining_size, lpfn, fpfn;
 	unsigned int adjust_dcc_size = 0;
-	struct drm_buddy *mm = &mgr->mm;
-	struct drm_buddy_block *block;
+	struct gpu_buddy *mm = &mgr->mm;
+	struct gpu_buddy_block *block;
 	unsigned long pages_per_block;
 	int r;
 
@@ -493,17 +494,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	INIT_LIST_HEAD(&vres->blocks);
 
 	if (place->flags & TTM_PL_FLAG_TOPDOWN)
-		vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+		vres->flags |= GPU_BUDDY_TOPDOWN_ALLOCATION;
 
 	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
-		vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+		vres->flags |= GPU_BUDDY_CONTIGUOUS_ALLOCATION;
 
 	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
-		vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
+		vres->flags |= GPU_BUDDY_CLEAR_ALLOCATION;
 
 	if (fpfn || lpfn != mgr->mm.size)
 		/* Allocate blocks in desired range */
-		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+		vres->flags |= GPU_BUDDY_RANGE_ALLOCATION;
 
 	if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC &&
 	    adev->gmc.gmc_funcs->get_dcc_alignment)
@@ -516,7 +517,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 		dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size);
 		remaining_size = (u64)dcc_size;
 
-		vres->flags |= DRM_BUDDY_TRIM_DISABLE;
+		vres->flags |= GPU_BUDDY_TRIM_DISABLE;
 	}
 
 	mutex_lock(&mgr->lock);
@@ -536,7 +537,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 
 		BUG_ON(min_block_size < mm->chunk_size);
 
-		r = drm_buddy_alloc_blocks(mm, fpfn,
+		r = gpu_buddy_alloc_blocks(mm, fpfn,
 					   lpfn,
 					   size,
 					   min_block_size,
@@ -545,7 +546,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 
 		if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul &&
 		    !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) {
-			vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+			vres->flags &= ~GPU_BUDDY_CONTIGUOUS_ALLOCATION;
 			pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT),
 						tbo->page_alignment);
 
@@ -566,7 +567,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	list_add_tail(&vres->vres_node, &mgr->allocated_vres_list);
 
 	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
-		struct drm_buddy_block *dcc_block;
+		struct gpu_buddy_block *dcc_block;
 		unsigned long dcc_start;
 		u64 trim_start;
 
@@ -576,7 +577,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 			roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block),
 				adjust_dcc_size);
 		trim_start = (u64)dcc_start;
-		drm_buddy_block_trim(mm, &trim_start,
+		gpu_buddy_block_trim(mm, &trim_start,
 				     (u64)vres->base.size,
 				     &vres->blocks);
 	}
@@ -614,7 +615,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	return 0;
 
 error_free_blocks:
-	drm_buddy_free_list(mm, &vres->blocks, 0);
+	gpu_buddy_free_list(mm, &vres->blocks, 0);
 	mutex_unlock(&mgr->lock);
 error_fini:
 	ttm_resource_fini(man, &vres->base);
@@ -637,8 +638,8 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
 	struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
-	struct drm_buddy *mm = &mgr->mm;
-	struct drm_buddy_block *block;
+	struct gpu_buddy *mm = &mgr->mm;
+	struct gpu_buddy_block *block;
 	uint64_t vis_usage = 0;
 
 	mutex_lock(&mgr->lock);
@@ -649,7 +650,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
 	list_for_each_entry(block, &vres->blocks, link)
 		vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
 
-	drm_buddy_free_list(mm, &vres->blocks, vres->flags);
+	gpu_buddy_free_list(mm, &vres->blocks, vres->flags);
 	amdgpu_vram_mgr_do_reserve(man);
 	mutex_unlock(&mgr->lock);
 
@@ -688,7 +689,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
 	if (!*sgt)
 		return -ENOMEM;
 
-	/* Determine the number of DRM_BUDDY blocks to export */
+	/* Determine the number of GPU_BUDDY blocks to export */
 	amdgpu_res_first(res, offset, length, &cursor);
 	while (cursor.remaining) {
 		num_entries++;
@@ -704,10 +705,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
 		sg->length = 0;
 
 	/*
-	 * Walk down DRM_BUDDY blocks to populate scatterlist nodes
-	 * @note: Use iterator api to get first the DRM_BUDDY block
+	 * Walk down GPU_BUDDY blocks to populate scatterlist nodes
+	 * @note: Use iterator api to get first the GPU_BUDDY block
 	 * and the number of bytes from it. Access the following
-	 * DRM_BUDDY block(s) if more buffer needs to exported
+	 * GPU_BUDDY block(s) if more buffer needs to exported
 	 */
 	amdgpu_res_first(res, offset, length, &cursor);
 	for_each_sgtable_sg((*sgt), sg, i) {
@@ -792,10 +793,10 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr)
 void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev)
 {
 	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
-	struct drm_buddy *mm = &mgr->mm;
+	struct gpu_buddy *mm = &mgr->mm;
 
 	mutex_lock(&mgr->lock);
-	drm_buddy_reset_clear(mm, false);
+	gpu_buddy_reset_clear(mm, false);
 	mutex_unlock(&mgr->lock);
 }
 
@@ -815,7 +816,7 @@ static bool amdgpu_vram_mgr_intersects(struct ttm_resource_manager *man,
 				       size_t size)
 {
 	struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 
 	/* Check each drm buddy block individually */
 	list_for_each_entry(block, &mgr->blocks, link) {
@@ -848,7 +849,7 @@ static bool amdgpu_vram_mgr_compatible(struct ttm_resource_manager *man,
 				       size_t size)
 {
 	struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 
 	/* Check each drm buddy block individually */
 	list_for_each_entry(block, &mgr->blocks, link) {
@@ -877,7 +878,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
 				  struct drm_printer *printer)
 {
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
-	struct drm_buddy *mm = &mgr->mm;
+	struct gpu_buddy *mm = &mgr->mm;
 	struct amdgpu_vram_reservation *rsv;
 
 	drm_printf(printer, "  vis usage:%llu\n",
@@ -930,7 +931,7 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
 	mgr->default_page_size = PAGE_SIZE;
 
 	man->func = &amdgpu_vram_mgr_func;
-	err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+	err = gpu_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
 	if (err)
 		return err;
 
@@ -965,11 +966,11 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
 		kfree(rsv);
 
 	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
-		drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
+		gpu_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
 		kfree(rsv);
 	}
 	if (!adev->gmc.is_app_apu)
-		drm_buddy_fini(&mgr->mm);
+		gpu_buddy_fini(&mgr->mm);
 	mutex_unlock(&mgr->lock);
 
 	ttm_resource_manager_cleanup(man);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
index 874779618056..429a21a2e9b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -28,7 +28,7 @@
 
 struct amdgpu_vram_mgr {
 	struct ttm_resource_manager manager;
-	struct drm_buddy mm;
+	struct gpu_buddy mm;
 	/* protects access to buffer objects */
 	struct mutex lock;
 	struct list_head reservations_pending;
@@ -57,19 +57,19 @@ struct amdgpu_vram_mgr_resource {
 	struct amdgpu_vres_task task;
 };
 
-static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block)
+static inline u64 amdgpu_vram_mgr_block_start(struct gpu_buddy_block *block)
 {
-	return drm_buddy_block_offset(block);
+	return gpu_buddy_block_offset(block);
 }
 
-static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
+static inline u64 amdgpu_vram_mgr_block_size(struct gpu_buddy_block *block)
 {
-	return (u64)PAGE_SIZE << drm_buddy_block_order(block);
+	return (u64)PAGE_SIZE << gpu_buddy_block_order(block);
 }
 
-static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
+static inline bool amdgpu_vram_mgr_is_cleared(struct gpu_buddy_block *block)
 {
-	return drm_buddy_block_is_clear(block);
+	return gpu_buddy_block_is_clear(block);
 }
 
 static inline struct amdgpu_vram_mgr_resource *
@@ -82,8 +82,8 @@ static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res)
 {
 	struct amdgpu_vram_mgr_resource *ares = to_amdgpu_vram_mgr_resource(res);
 
-	WARN_ON(ares->flags & DRM_BUDDY_CLEARED);
-	ares->flags |= DRM_BUDDY_CLEARED;
+	WARN_ON(ares->flags & GPU_BUDDY_CLEARED);
+	ares->flags |= GPU_BUDDY_CLEARED;
 }
 
 int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr,
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
new file mode 100644
index 000000000000..841f3de5f307
--- /dev/null
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <kunit/test-bug.h>
+
+#include <linux/export.h>
+#include <linux/kmemleak.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+
+#include <linux/gpu_buddy.h>
+#include <drm/drm_buddy.h>
+#include <drm/drm_print.h>
+
+/**
+ * drm_buddy_block_print - print block information
+ *
+ * @mm: DRM buddy manager
+ * @block: DRM buddy block
+ * @p: DRM printer to use
+ */
+void drm_buddy_block_print(struct gpu_buddy *mm,
+			   struct gpu_buddy_block *block,
+			   struct drm_printer *p)
+{
+	u64 start = gpu_buddy_block_offset(block);
+	u64 size = gpu_buddy_block_size(mm, block);
+
+	drm_printf(p, "%#018llx-%#018llx: %llu\n", start, start + size, size);
+}
+EXPORT_SYMBOL(drm_buddy_block_print);
+
+/**
+ * drm_buddy_print - print allocator state
+ *
+ * @mm: DRM buddy manager
+ * @p: DRM printer to use
+ */
+void drm_buddy_print(struct gpu_buddy *mm, struct drm_printer *p)
+{
+	int order;
+
+	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
+		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
+
+	for (order = mm->max_order; order >= 0; order--) {
+		struct gpu_buddy_block *block, *tmp;
+		struct rb_root *root;
+		u64 count = 0, free;
+		unsigned int tree;
+
+		for_each_free_tree(tree) {
+			root = &mm->free_trees[tree][order];
+
+			rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
+				BUG_ON(!gpu_buddy_block_is_free(block));
+				count++;
+			}
+		}
+
+		drm_printf(p, "order-%2d ", order);
+
+		free = count * (mm->chunk_size << order);
+		if (free < SZ_1M)
+			drm_printf(p, "free: %8llu KiB", free >> 10);
+		else
+			drm_printf(p, "free: %8llu MiB", free >> 20);
+
+		drm_printf(p, ", blocks: %llu\n", count);
+	}
+}
+EXPORT_SYMBOL(drm_buddy_print);
+
+MODULE_DESCRIPTION("DRM-specific GPU Buddy Allocator Print Helpers");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
index 30246f02bcfe..6a34dae13769 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.c
+++ b/drivers/gpu/drm/i915/i915_scatterlist.c
@@ -167,9 +167,9 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
 	struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
 	const u64 size = res->size;
 	const u32 max_segment = round_down(UINT_MAX, page_alignment);
-	struct drm_buddy *mm = bman_res->mm;
+	struct gpu_buddy *mm = bman_res->mm;
 	struct list_head *blocks = &bman_res->blocks;
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	struct i915_refct_sgt *rsgt;
 	struct scatterlist *sg;
 	struct sg_table *st;
@@ -202,8 +202,8 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
 	list_for_each_entry(block, blocks, link) {
 		u64 block_size, offset;
 
-		block_size = min_t(u64, size, drm_buddy_block_size(mm, block));
-		offset = drm_buddy_block_offset(block);
+		block_size = min_t(u64, size, gpu_buddy_block_size(mm, block));
+		offset = gpu_buddy_block_offset(block);
 
 		while (block_size) {
 			u64 len;
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 6b256d95badd..c5ca90088705 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -6,6 +6,7 @@
 #include <linux/slab.h>
 
 #include <linux/gpu_buddy.h>
+#include <drm/drm_buddy.h>
 #include <drm/drm_print.h>
 #include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_bo.h>
@@ -16,7 +17,7 @@
 
 struct i915_ttm_buddy_manager {
 	struct ttm_resource_manager manager;
-	struct drm_buddy mm;
+	struct gpu_buddy mm;
 	struct list_head reserved;
 	struct mutex lock;
 	unsigned long visible_size;
@@ -38,7 +39,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 {
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
 	struct i915_ttm_buddy_resource *bman_res;
-	struct drm_buddy *mm = &bman->mm;
+	struct gpu_buddy *mm = &bman->mm;
 	unsigned long n_pages, lpfn;
 	u64 min_page_size;
 	u64 size;
@@ -57,13 +58,13 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	bman_res->mm = mm;
 
 	if (place->flags & TTM_PL_FLAG_TOPDOWN)
-		bman_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+		bman_res->flags |= GPU_BUDDY_TOPDOWN_ALLOCATION;
 
 	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
-		bman_res->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+		bman_res->flags |= GPU_BUDDY_CONTIGUOUS_ALLOCATION;
 
 	if (place->fpfn || lpfn != man->size)
-		bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+		bman_res->flags |= GPU_BUDDY_RANGE_ALLOCATION;
 
 	GEM_BUG_ON(!bman_res->base.size);
 	size = bman_res->base.size;
@@ -89,7 +90,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 		goto err_free_res;
 	}
 
-	err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+	err = gpu_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
 				     (u64)lpfn << PAGE_SHIFT,
 				     (u64)n_pages << PAGE_SHIFT,
 				     min_page_size,
@@ -101,15 +102,15 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	if (lpfn <= bman->visible_size) {
 		bman_res->used_visible_size = PFN_UP(bman_res->base.size);
 	} else {
-		struct drm_buddy_block *block;
+		struct gpu_buddy_block *block;
 
 		list_for_each_entry(block, &bman_res->blocks, link) {
 			unsigned long start =
-				drm_buddy_block_offset(block) >> PAGE_SHIFT;
+				gpu_buddy_block_offset(block) >> PAGE_SHIFT;
 
 			if (start < bman->visible_size) {
 				unsigned long end = start +
-					(drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+					(gpu_buddy_block_size(mm, block) >> PAGE_SHIFT);
 
 				bman_res->used_visible_size +=
 					min(end, bman->visible_size) - start;
@@ -126,7 +127,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	return 0;
 
 err_free_blocks:
-	drm_buddy_free_list(mm, &bman_res->blocks, 0);
+	gpu_buddy_free_list(mm, &bman_res->blocks, 0);
 	mutex_unlock(&bman->lock);
 err_free_res:
 	ttm_resource_fini(man, &bman_res->base);
@@ -141,7 +142,7 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man,
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
 
 	mutex_lock(&bman->lock);
-	drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
+	gpu_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
 	bman->visible_avail += bman_res->used_visible_size;
 	mutex_unlock(&bman->lock);
 
@@ -156,8 +157,8 @@ static bool i915_ttm_buddy_man_intersects(struct ttm_resource_manager *man,
 {
 	struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
-	struct drm_buddy *mm = &bman->mm;
-	struct drm_buddy_block *block;
+	struct gpu_buddy *mm = &bman->mm;
+	struct gpu_buddy_block *block;
 
 	if (!place->fpfn && !place->lpfn)
 		return true;
@@ -176,9 +177,9 @@ static bool i915_ttm_buddy_man_intersects(struct ttm_resource_manager *man,
 	/* Check each drm buddy block individually */
 	list_for_each_entry(block, &bman_res->blocks, link) {
 		unsigned long fpfn =
-			drm_buddy_block_offset(block) >> PAGE_SHIFT;
+			gpu_buddy_block_offset(block) >> PAGE_SHIFT;
 		unsigned long lpfn = fpfn +
-			(drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+			(gpu_buddy_block_size(mm, block) >> PAGE_SHIFT);
 
 		if (place->fpfn < lpfn && place->lpfn > fpfn)
 			return true;
@@ -194,8 +195,8 @@ static bool i915_ttm_buddy_man_compatible(struct ttm_resource_manager *man,
 {
 	struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
-	struct drm_buddy *mm = &bman->mm;
-	struct drm_buddy_block *block;
+	struct gpu_buddy *mm = &bman->mm;
+	struct gpu_buddy_block *block;
 
 	if (!place->fpfn && !place->lpfn)
 		return true;
@@ -209,9 +210,9 @@ static bool i915_ttm_buddy_man_compatible(struct ttm_resource_manager *man,
 	/* Check each drm buddy block individually */
 	list_for_each_entry(block, &bman_res->blocks, link) {
 		unsigned long fpfn =
-			drm_buddy_block_offset(block) >> PAGE_SHIFT;
+			gpu_buddy_block_offset(block) >> PAGE_SHIFT;
 		unsigned long lpfn = fpfn +
-			(drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+			(gpu_buddy_block_size(mm, block) >> PAGE_SHIFT);
 
 		if (fpfn < place->fpfn || lpfn > place->lpfn)
 			return false;
@@ -224,7 +225,7 @@ static void i915_ttm_buddy_man_debug(struct ttm_resource_manager *man,
 				     struct drm_printer *printer)
 {
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 
 	mutex_lock(&bman->lock);
 	drm_printf(printer, "default_page_size: %lluKiB\n",
@@ -293,7 +294,7 @@ int i915_ttm_buddy_man_init(struct ttm_device *bdev,
 	if (!bman)
 		return -ENOMEM;
 
-	err = drm_buddy_init(&bman->mm, size, chunk_size);
+	err = gpu_buddy_init(&bman->mm, size, chunk_size);
 	if (err)
 		goto err_free_bman;
 
@@ -333,7 +334,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type)
 {
 	struct ttm_resource_manager *man = ttm_manager_type(bdev, type);
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
-	struct drm_buddy *mm = &bman->mm;
+	struct gpu_buddy *mm = &bman->mm;
 	int ret;
 
 	ttm_resource_manager_set_used(man, false);
@@ -345,8 +346,8 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type)
 	ttm_set_driver_manager(bdev, type, NULL);
 
 	mutex_lock(&bman->lock);
-	drm_buddy_free_list(mm, &bman->reserved, 0);
-	drm_buddy_fini(mm);
+	gpu_buddy_free_list(mm, &bman->reserved, 0);
+	gpu_buddy_fini(mm);
 	bman->visible_avail += bman->visible_reserved;
 	WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
 	mutex_unlock(&bman->lock);
@@ -371,15 +372,15 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
 			       u64 start, u64 size)
 {
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
-	struct drm_buddy *mm = &bman->mm;
+	struct gpu_buddy *mm = &bman->mm;
 	unsigned long fpfn = start >> PAGE_SHIFT;
 	unsigned long flags = 0;
 	int ret;
 
-	flags |= DRM_BUDDY_RANGE_ALLOCATION;
+	flags |= GPU_BUDDY_RANGE_ALLOCATION;
 
 	mutex_lock(&bman->lock);
-	ret = drm_buddy_alloc_blocks(mm, start,
+	ret = gpu_buddy_alloc_blocks(mm, start,
 				     start + size,
 				     size, mm->chunk_size,
 				     &bman->reserved,
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
index d64620712830..1cff018c1689 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
@@ -13,7 +13,7 @@
 
 struct ttm_device;
 struct ttm_resource_manager;
-struct drm_buddy;
+struct gpu_buddy;
 
 /**
  * struct i915_ttm_buddy_resource
@@ -33,7 +33,7 @@ struct i915_ttm_buddy_resource {
 	struct list_head blocks;
 	unsigned long flags;
 	unsigned long used_visible_size;
-	struct drm_buddy *mm;
+	struct gpu_buddy *mm;
 };
 
 /**
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index 7b856b5090f9..8307390943a2 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -6,7 +6,7 @@
 #include <linux/prime_numbers.h>
 #include <linux/sort.h>
 
-#include <drm/drm_buddy.h>
+#include <linux/gpu_buddy.h>
 
 #include "../i915_selftest.h"
 
@@ -371,7 +371,7 @@ static int igt_mock_splintered_region(void *arg)
 	struct drm_i915_private *i915 = mem->i915;
 	struct i915_ttm_buddy_resource *res;
 	struct drm_i915_gem_object *obj;
-	struct drm_buddy *mm;
+	struct gpu_buddy *mm;
 	unsigned int expected_order;
 	LIST_HEAD(objects);
 	u64 size;
@@ -447,8 +447,8 @@ static int igt_mock_max_segment(void *arg)
 	struct drm_i915_private *i915 = mem->i915;
 	struct i915_ttm_buddy_resource *res;
 	struct drm_i915_gem_object *obj;
-	struct drm_buddy_block *block;
-	struct drm_buddy *mm;
+	struct gpu_buddy_block *block;
+	struct gpu_buddy *mm;
 	struct list_head *blocks;
 	struct scatterlist *sg;
 	I915_RND_STATE(prng);
@@ -487,8 +487,8 @@ static int igt_mock_max_segment(void *arg)
 	mm = res->mm;
 	size = 0;
 	list_for_each_entry(block, blocks, link) {
-		if (drm_buddy_block_size(mm, block) > size)
-			size = drm_buddy_block_size(mm, block);
+		if (gpu_buddy_block_size(mm, block) > size)
+			size = gpu_buddy_block_size(mm, block);
 	}
 	if (size < max_segment) {
 		pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n",
@@ -527,14 +527,14 @@ static u64 igt_object_mappable_total(struct drm_i915_gem_object *obj)
 	struct intel_memory_region *mr = obj->mm.region;
 	struct i915_ttm_buddy_resource *bman_res =
 		to_ttm_buddy_resource(obj->mm.res);
-	struct drm_buddy *mm = bman_res->mm;
-	struct drm_buddy_block *block;
+	struct gpu_buddy *mm = bman_res->mm;
+	struct gpu_buddy_block *block;
 	u64 total;
 
 	total = 0;
 	list_for_each_entry(block, &bman_res->blocks, link) {
-		u64 start = drm_buddy_block_offset(block);
-		u64 end = start + drm_buddy_block_size(mm, block);
+		u64 start = gpu_buddy_block_offset(block);
+		u64 end = start + gpu_buddy_block_size(mm, block);
 
 		if (start < resource_size(&mr->io))
 			total += min_t(u64, end, resource_size(&mr->io)) - start;
diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
index 6d95447a989d..e32f3c8d7b84 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
@@ -251,7 +251,7 @@ static void ttm_bo_validate_basic(struct kunit *test)
 				   NULL, &dummy_ttm_bo_destroy);
 	KUNIT_EXPECT_EQ(test, err, 0);
 
-	snd_place = ttm_place_kunit_init(test, snd_mem, DRM_BUDDY_TOPDOWN_ALLOCATION);
+	snd_place = ttm_place_kunit_init(test, snd_mem, GPU_BUDDY_TOPDOWN_ALLOCATION);
 	snd_placement = ttm_placement_kunit_init(test, snd_place, 1);
 
 	err = ttm_bo_validate(bo, snd_placement, &ctx_val);
@@ -263,7 +263,7 @@ static void ttm_bo_validate_basic(struct kunit *test)
 	KUNIT_EXPECT_TRUE(test, ttm_tt_is_populated(bo->ttm));
 	KUNIT_EXPECT_EQ(test, bo->resource->mem_type, snd_mem);
 	KUNIT_EXPECT_EQ(test, bo->resource->placement,
-			DRM_BUDDY_TOPDOWN_ALLOCATION);
+			GPU_BUDDY_TOPDOWN_ALLOCATION);
 
 	ttm_bo_fini(bo);
 	ttm_mock_manager_fini(priv->ttm_dev, snd_mem);
diff --git a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c
index dd395229e388..294d56d9067e 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c
@@ -31,7 +31,7 @@ static int ttm_mock_manager_alloc(struct ttm_resource_manager *man,
 {
 	struct ttm_mock_manager *manager = to_mock_mgr(man);
 	struct ttm_mock_resource *mock_res;
-	struct drm_buddy *mm = &manager->mm;
+	struct gpu_buddy *mm = &manager->mm;
 	u64 lpfn, fpfn, alloc_size;
 	int err;
 
@@ -47,14 +47,14 @@ static int ttm_mock_manager_alloc(struct ttm_resource_manager *man,
 	INIT_LIST_HEAD(&mock_res->blocks);
 
 	if (place->flags & TTM_PL_FLAG_TOPDOWN)
-		mock_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+		mock_res->flags |= GPU_BUDDY_TOPDOWN_ALLOCATION;
 
 	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
-		mock_res->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+		mock_res->flags |= GPU_BUDDY_CONTIGUOUS_ALLOCATION;
 
 	alloc_size = (uint64_t)mock_res->base.size;
 	mutex_lock(&manager->lock);
-	err = drm_buddy_alloc_blocks(mm, fpfn, lpfn, alloc_size,
+	err = gpu_buddy_alloc_blocks(mm, fpfn, lpfn, alloc_size,
 				     manager->default_page_size,
 				     &mock_res->blocks,
 				     mock_res->flags);
@@ -67,7 +67,7 @@ static int ttm_mock_manager_alloc(struct ttm_resource_manager *man,
 	return 0;
 
 error_free_blocks:
-	drm_buddy_free_list(mm, &mock_res->blocks, 0);
+	gpu_buddy_free_list(mm, &mock_res->blocks, 0);
 	ttm_resource_fini(man, &mock_res->base);
 	mutex_unlock(&manager->lock);
 
@@ -79,10 +79,10 @@ static void ttm_mock_manager_free(struct ttm_resource_manager *man,
 {
 	struct ttm_mock_manager *manager = to_mock_mgr(man);
 	struct ttm_mock_resource *mock_res = to_mock_mgr_resource(res);
-	struct drm_buddy *mm = &manager->mm;
+	struct gpu_buddy *mm = &manager->mm;
 
 	mutex_lock(&manager->lock);
-	drm_buddy_free_list(mm, &mock_res->blocks, 0);
+	gpu_buddy_free_list(mm, &mock_res->blocks, 0);
 	mutex_unlock(&manager->lock);
 
 	ttm_resource_fini(man, res);
@@ -106,7 +106,7 @@ int ttm_mock_manager_init(struct ttm_device *bdev, u32 mem_type, u32 size)
 
 	mutex_init(&manager->lock);
 
-	err = drm_buddy_init(&manager->mm, size, PAGE_SIZE);
+	err = gpu_buddy_init(&manager->mm, size, PAGE_SIZE);
 
 	if (err) {
 		kfree(manager);
@@ -142,7 +142,7 @@ void ttm_mock_manager_fini(struct ttm_device *bdev, u32 mem_type)
 	ttm_resource_manager_set_used(man, false);
 
 	mutex_lock(&mock_man->lock);
-	drm_buddy_fini(&mock_man->mm);
+	gpu_buddy_fini(&mock_man->mm);
 	mutex_unlock(&mock_man->lock);
 
 	ttm_set_driver_manager(bdev, mem_type, NULL);
diff --git a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h
index 96ea8c9aae34..08710756fd8e 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h
+++ b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h
@@ -9,7 +9,7 @@
 
 struct ttm_mock_manager {
 	struct ttm_resource_manager man;
-	struct drm_buddy mm;
+	struct gpu_buddy mm;
 	u64 default_page_size;
 	/* protects allocations of mock buffer objects */
 	struct mutex lock;
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
index 4e00008b7081..5f4ab08c0686 100644
--- a/drivers/gpu/drm/xe/xe_res_cursor.h
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -58,7 +58,7 @@ struct xe_res_cursor {
 	/** @dma_addr: Current element in a struct drm_pagemap_addr array */
 	const struct drm_pagemap_addr *dma_addr;
 	/** @mm: Buddy allocator for VRAM cursor */
-	struct drm_buddy *mm;
+	struct gpu_buddy *mm;
 	/**
 	 * @dma_start: DMA start address for the current segment.
 	 * This may be different to @dma_addr.addr since elements in
@@ -69,7 +69,7 @@ struct xe_res_cursor {
 	u64 dma_seg_size;
 };
 
-static struct drm_buddy *xe_res_get_buddy(struct ttm_resource *res)
+static struct gpu_buddy *xe_res_get_buddy(struct ttm_resource *res)
 {
 	struct ttm_resource_manager *mgr;
 
@@ -104,30 +104,30 @@ static inline void xe_res_first(struct ttm_resource *res,
 	case XE_PL_STOLEN:
 	case XE_PL_VRAM0:
 	case XE_PL_VRAM1: {
-		struct drm_buddy_block *block;
+		struct gpu_buddy_block *block;
 		struct list_head *head, *next;
-		struct drm_buddy *mm = xe_res_get_buddy(res);
+		struct gpu_buddy *mm = xe_res_get_buddy(res);
 
 		head = &to_xe_ttm_vram_mgr_resource(res)->blocks;
 
 		block = list_first_entry_or_null(head,
-						 struct drm_buddy_block,
+						 struct gpu_buddy_block,
 						 link);
 		if (!block)
 			goto fallback;
 
-		while (start >= drm_buddy_block_size(mm, block)) {
-			start -= drm_buddy_block_size(mm, block);
+		while (start >= gpu_buddy_block_size(mm, block)) {
+			start -= gpu_buddy_block_size(mm, block);
 
 			next = block->link.next;
 			if (next != head)
-				block = list_entry(next, struct drm_buddy_block,
+				block = list_entry(next, struct gpu_buddy_block,
 						   link);
 		}
 
 		cur->mm = mm;
-		cur->start = drm_buddy_block_offset(block) + start;
-		cur->size = min(drm_buddy_block_size(mm, block) - start,
+		cur->start = gpu_buddy_block_offset(block) + start;
+		cur->size = min(gpu_buddy_block_size(mm, block) - start,
 				size);
 		cur->remaining = size;
 		cur->node = block;
@@ -259,7 +259,7 @@ static inline void xe_res_first_dma(const struct drm_pagemap_addr *dma_addr,
  */
 static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
 {
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	struct list_head *next;
 	u64 start;
 
@@ -295,18 +295,18 @@ static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
 		block = cur->node;
 
 		next = block->link.next;
-		block = list_entry(next, struct drm_buddy_block, link);
+		block = list_entry(next, struct gpu_buddy_block, link);
 
 
-		while (start >= drm_buddy_block_size(cur->mm, block)) {
-			start -= drm_buddy_block_size(cur->mm, block);
+		while (start >= gpu_buddy_block_size(cur->mm, block)) {
+			start -= gpu_buddy_block_size(cur->mm, block);
 
 			next = block->link.next;
-			block = list_entry(next, struct drm_buddy_block, link);
+			block = list_entry(next, struct gpu_buddy_block, link);
 		}
 
-		cur->start = drm_buddy_block_offset(block) + start;
-		cur->size = min(drm_buddy_block_size(cur->mm, block) - start,
+		cur->start = gpu_buddy_block_offset(block) + start;
+		cur->size = min(gpu_buddy_block_size(cur->mm, block) - start,
 				cur->remaining);
 		cur->node = block;
 		break;
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 213f0334518a..cda3bf7e2418 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -747,7 +747,7 @@ static u64 block_offset_to_pfn(struct drm_pagemap *dpagemap, u64 offset)
 	return PHYS_PFN(offset + xpagemap->hpa_base);
 }
 
-static struct drm_buddy *vram_to_buddy(struct xe_vram_region *vram)
+static struct gpu_buddy *vram_to_buddy(struct xe_vram_region *vram)
 {
 	return &vram->ttm.mm;
 }
@@ -758,17 +758,17 @@ static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocati
 	struct xe_bo *bo = to_xe_bo(devmem_allocation);
 	struct ttm_resource *res = bo->ttm.resource;
 	struct list_head *blocks = &to_xe_ttm_vram_mgr_resource(res)->blocks;
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	int j = 0;
 
 	list_for_each_entry(block, blocks, link) {
 		struct xe_vram_region *vr = block->private;
-		struct drm_buddy *buddy = vram_to_buddy(vr);
+		struct gpu_buddy *buddy = vram_to_buddy(vr);
 		u64 block_pfn = block_offset_to_pfn(devmem_allocation->dpagemap,
-						    drm_buddy_block_offset(block));
+						    gpu_buddy_block_offset(block));
 		int i;
 
-		for (i = 0; i < drm_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i)
+		for (i = 0; i < gpu_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i)
 			pfn[j++] = block_pfn + i;
 	}
 
@@ -1033,7 +1033,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
 	struct dma_fence *pre_migrate_fence = NULL;
 	struct xe_device *xe = vr->xe;
 	struct device *dev = xe->drm.dev;
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	struct xe_validation_ctx vctx;
 	struct list_head *blocks;
 	struct drm_exec exec;
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 6553a19f7cf2..d119217d566a 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -6,6 +6,7 @@
 
 #include <drm/drm_managed.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_buddy.h>
 
 #include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_range_manager.h>
@@ -16,16 +17,16 @@
 #include "xe_ttm_vram_mgr.h"
 #include "xe_vram_types.h"
 
-static inline struct drm_buddy_block *
+static inline struct gpu_buddy_block *
 xe_ttm_vram_mgr_first_block(struct list_head *list)
 {
-	return list_first_entry_or_null(list, struct drm_buddy_block, link);
+	return list_first_entry_or_null(list, struct gpu_buddy_block, link);
 }
 
-static inline bool xe_is_vram_mgr_blocks_contiguous(struct drm_buddy *mm,
+static inline bool xe_is_vram_mgr_blocks_contiguous(struct gpu_buddy *mm,
 						    struct list_head *head)
 {
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	u64 start, size;
 
 	block = xe_ttm_vram_mgr_first_block(head);
@@ -33,12 +34,12 @@ static inline bool xe_is_vram_mgr_blocks_contiguous(struct drm_buddy *mm,
 		return false;
 
 	while (head != block->link.next) {
-		start = drm_buddy_block_offset(block);
-		size = drm_buddy_block_size(mm, block);
+		start = gpu_buddy_block_offset(block);
+		size = gpu_buddy_block_size(mm, block);
 
-		block = list_entry(block->link.next, struct drm_buddy_block,
+		block = list_entry(block->link.next, struct gpu_buddy_block,
 				   link);
-		if (start + size != drm_buddy_block_offset(block))
+		if (start + size != gpu_buddy_block_offset(block))
 			return false;
 	}
 
@@ -52,7 +53,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 {
 	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
 	struct xe_ttm_vram_mgr_resource *vres;
-	struct drm_buddy *mm = &mgr->mm;
+	struct gpu_buddy *mm = &mgr->mm;
 	u64 size, min_page_size;
 	unsigned long lpfn;
 	int err;
@@ -79,10 +80,10 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 	INIT_LIST_HEAD(&vres->blocks);
 
 	if (place->flags & TTM_PL_FLAG_TOPDOWN)
-		vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+		vres->flags |= GPU_BUDDY_TOPDOWN_ALLOCATION;
 
 	if (place->fpfn || lpfn != man->size >> PAGE_SHIFT)
-		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+		vres->flags |= GPU_BUDDY_RANGE_ALLOCATION;
 
 	if (WARN_ON(!vres->base.size)) {
 		err = -EINVAL;
@@ -118,27 +119,27 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 		lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn);
 	}
 
-	err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+	err = gpu_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
 				     (u64)lpfn << PAGE_SHIFT, size,
 				     min_page_size, &vres->blocks, vres->flags);
 	if (err)
 		goto error_unlock;
 
 	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-		if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks))
+		if (!gpu_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks))
 			size = vres->base.size;
 	}
 
 	if (lpfn <= mgr->visible_size >> PAGE_SHIFT) {
 		vres->used_visible_size = size;
 	} else {
-		struct drm_buddy_block *block;
+		struct gpu_buddy_block *block;
 
 		list_for_each_entry(block, &vres->blocks, link) {
-			u64 start = drm_buddy_block_offset(block);
+			u64 start = gpu_buddy_block_offset(block);
 
 			if (start < mgr->visible_size) {
-				u64 end = start + drm_buddy_block_size(mm, block);
+				u64 end = start + gpu_buddy_block_size(mm, block);
 
 				vres->used_visible_size +=
 					min(end, mgr->visible_size) - start;
@@ -158,11 +159,11 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 	 * the object.
 	 */
 	if (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) {
-		struct drm_buddy_block *block = list_first_entry(&vres->blocks,
+		struct gpu_buddy_block *block = list_first_entry(&vres->blocks,
 								 typeof(*block),
 								 link);
 
-		vres->base.start = drm_buddy_block_offset(block) >> PAGE_SHIFT;
+		vres->base.start = gpu_buddy_block_offset(block) >> PAGE_SHIFT;
 	} else {
 		vres->base.start = XE_BO_INVALID_OFFSET;
 	}
@@ -184,10 +185,10 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
 	struct xe_ttm_vram_mgr_resource *vres =
 		to_xe_ttm_vram_mgr_resource(res);
 	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
-	struct drm_buddy *mm = &mgr->mm;
+	struct gpu_buddy *mm = &mgr->mm;
 
 	mutex_lock(&mgr->lock);
-	drm_buddy_free_list(mm, &vres->blocks, 0);
+	gpu_buddy_free_list(mm, &vres->blocks, 0);
 	mgr->visible_avail += vres->used_visible_size;
 	mutex_unlock(&mgr->lock);
 
@@ -200,7 +201,7 @@ static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man,
 				  struct drm_printer *printer)
 {
 	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
-	struct drm_buddy *mm = &mgr->mm;
+	struct gpu_buddy *mm = &mgr->mm;
 
 	mutex_lock(&mgr->lock);
 	drm_printf(printer, "default_page_size: %lluKiB\n",
@@ -223,8 +224,8 @@ static bool xe_ttm_vram_mgr_intersects(struct ttm_resource_manager *man,
 	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
 	struct xe_ttm_vram_mgr_resource *vres =
 		to_xe_ttm_vram_mgr_resource(res);
-	struct drm_buddy *mm = &mgr->mm;
-	struct drm_buddy_block *block;
+	struct gpu_buddy *mm = &mgr->mm;
+	struct gpu_buddy_block *block;
 
 	if (!place->fpfn && !place->lpfn)
 		return true;
@@ -234,9 +235,9 @@ static bool xe_ttm_vram_mgr_intersects(struct ttm_resource_manager *man,
 
 	list_for_each_entry(block, &vres->blocks, link) {
 		unsigned long fpfn =
-			drm_buddy_block_offset(block) >> PAGE_SHIFT;
+			gpu_buddy_block_offset(block) >> PAGE_SHIFT;
 		unsigned long lpfn = fpfn +
-			(drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+			(gpu_buddy_block_size(mm, block) >> PAGE_SHIFT);
 
 		if (place->fpfn < lpfn && place->lpfn > fpfn)
 			return true;
@@ -253,8 +254,8 @@ static bool xe_ttm_vram_mgr_compatible(struct ttm_resource_manager *man,
 	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
 	struct xe_ttm_vram_mgr_resource *vres =
 		to_xe_ttm_vram_mgr_resource(res);
-	struct drm_buddy *mm = &mgr->mm;
-	struct drm_buddy_block *block;
+	struct gpu_buddy *mm = &mgr->mm;
+	struct gpu_buddy_block *block;
 
 	if (!place->fpfn && !place->lpfn)
 		return true;
@@ -264,9 +265,9 @@ static bool xe_ttm_vram_mgr_compatible(struct ttm_resource_manager *man,
 
 	list_for_each_entry(block, &vres->blocks, link) {
 		unsigned long fpfn =
-			drm_buddy_block_offset(block) >> PAGE_SHIFT;
+			gpu_buddy_block_offset(block) >> PAGE_SHIFT;
 		unsigned long lpfn = fpfn +
-			(drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+			(gpu_buddy_block_size(mm, block) >> PAGE_SHIFT);
 
 		if (fpfn < place->fpfn || lpfn > place->lpfn)
 			return false;
@@ -296,7 +297,7 @@ static void xe_ttm_vram_mgr_fini(struct drm_device *dev, void *arg)
 
 	WARN_ON_ONCE(mgr->visible_avail != mgr->visible_size);
 
-	drm_buddy_fini(&mgr->mm);
+	gpu_buddy_fini(&mgr->mm);
 
 	ttm_resource_manager_cleanup(&mgr->manager);
 
@@ -327,7 +328,7 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
 	mgr->visible_avail = io_size;
 
 	ttm_resource_manager_init(man, &xe->ttm, size);
-	err = drm_buddy_init(&mgr->mm, man->size, default_page_size);
+	err = gpu_buddy_init(&mgr->mm, man->size, default_page_size);
 	if (err)
 		return err;
 
@@ -375,7 +376,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
 	if (!*sgt)
 		return -ENOMEM;
 
-	/* Determine the number of DRM_BUDDY blocks to export */
+	/* Determine the number of GPU_BUDDY blocks to export */
 	xe_res_first(res, offset, length, &cursor);
 	while (cursor.remaining) {
 		num_entries++;
@@ -392,10 +393,10 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
 		sg->length = 0;
 
 	/*
-	 * Walk down DRM_BUDDY blocks to populate scatterlist nodes
-	 * @note: Use iterator api to get first the DRM_BUDDY block
+	 * Walk down GPU_BUDDY blocks to populate scatterlist nodes
+	 * @note: Use iterator api to get first the GPU_BUDDY block
 	 * and the number of bytes from it. Access the following
-	 * DRM_BUDDY block(s) if more buffer needs to exported
+	 * GPU_BUDDY block(s) if more buffer needs to exported
 	 */
 	xe_res_first(res, offset, length, &cursor);
 	for_each_sgtable_sg((*sgt), sg, i) {
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
index babeec5511d9..9106da056b49 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -18,7 +18,7 @@ struct xe_ttm_vram_mgr {
 	/** @manager: Base TTM resource manager */
 	struct ttm_resource_manager manager;
 	/** @mm: DRM buddy allocator which manages the VRAM */
-	struct drm_buddy mm;
+	struct gpu_buddy mm;
 	/** @visible_size: Proped size of the CPU visible portion */
 	u64 visible_size;
 	/** @visible_avail: CPU visible portion still unallocated */
diff --git a/drivers/gpu/tests/Makefile b/drivers/gpu/tests/Makefile
index 8e7654e87d82..4183e6e2de45 100644
--- a/drivers/gpu/tests/Makefile
+++ b/drivers/gpu/tests/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 
 gpu_buddy_tests-y = gpu_buddy_test.o gpu_random.o
-obj-$(CONFIG_DRM_KUNIT_TEST) += gpu_buddy_tests.o
+obj-$(CONFIG_GPU_BUDDY_KUNIT_TEST) += gpu_buddy_tests.o
diff --git a/drivers/gpu/tests/gpu_buddy_test.c b/drivers/gpu/tests/gpu_buddy_test.c
index b905932da990..450e71deed90 100644
--- a/drivers/gpu/tests/gpu_buddy_test.c
+++ b/drivers/gpu/tests/gpu_buddy_test.c
@@ -21,9 +21,9 @@ static inline u64 get_size(int order, u64 chunk_size)
 	return (1 << order) * chunk_size;
 }
 
-static void drm_test_buddy_fragmentation_performance(struct kunit *test)
+static void gpu_test_buddy_fragmentation_performance(struct kunit *test)
 {
-	struct drm_buddy_block *block, *tmp;
+	struct gpu_buddy_block *block, *tmp;
 	int num_blocks, i, ret, count = 0;
 	LIST_HEAD(allocated_blocks);
 	unsigned long elapsed_ms;
@@ -32,7 +32,7 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test)
 	LIST_HEAD(clear_list);
 	LIST_HEAD(dirty_list);
 	LIST_HEAD(free_list);
-	struct drm_buddy mm;
+	struct gpu_buddy mm;
 	u64 mm_size = SZ_4G;
 	ktime_t start, end;
 
@@ -47,7 +47,7 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test)
 	 * quickly the allocator can satisfy larger, aligned requests from a pool of
 	 * highly fragmented space.
 	 */
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
 			       "buddy_init failed\n");
 
 	num_blocks = mm_size / SZ_64K;
@@ -55,7 +55,7 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test)
 	start = ktime_get();
 	/* Allocate with maximum fragmentation - 8K blocks with 64K alignment */
 	for (i = 0; i < num_blocks; i++)
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
+		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
 								    &allocated_blocks, 0),
 					"buddy_alloc hit an error size=%u\n", SZ_8K);
 
@@ -68,21 +68,21 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test)
 	}
 
 	/* Free with different flags to ensure no coalescing */
-	drm_buddy_free_list(&mm, &clear_list, DRM_BUDDY_CLEARED);
-	drm_buddy_free_list(&mm, &dirty_list, 0);
+	gpu_buddy_free_list(&mm, &clear_list, GPU_BUDDY_CLEARED);
+	gpu_buddy_free_list(&mm, &dirty_list, 0);
 
 	for (i = 0; i < num_blocks; i++)
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K,
+		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K,
 								    &test_blocks, 0),
 					"buddy_alloc hit an error size=%u\n", SZ_64K);
-	drm_buddy_free_list(&mm, &test_blocks, 0);
+	gpu_buddy_free_list(&mm, &test_blocks, 0);
 
 	end = ktime_get();
 	elapsed_ms = ktime_to_ms(ktime_sub(end, start));
 
 	kunit_info(test, "Fragmented allocation took %lu ms\n", elapsed_ms);
 
-	drm_buddy_fini(&mm);
+	gpu_buddy_fini(&mm);
 
 	/*
 	 * Reverse free order under fragmentation
@@ -96,13 +96,13 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test)
 	 * deallocation occurs in the opposite order of allocation, exposing the
 	 * cost difference between a linear freelist scan and an ordered tree lookup.
 	 */
-	ret = drm_buddy_init(&mm, mm_size, SZ_4K);
+	ret = gpu_buddy_init(&mm, mm_size, SZ_4K);
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	start = ktime_get();
 	/* Allocate maximum fragmentation */
 	for (i = 0; i < num_blocks; i++)
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
+		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
 								    &allocated_blocks, 0),
 					"buddy_alloc hit an error size=%u\n", SZ_8K);
 
@@ -111,28 +111,28 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test)
 			list_move_tail(&block->link, &free_list);
 		count++;
 	}
-	drm_buddy_free_list(&mm, &free_list, DRM_BUDDY_CLEARED);
+	gpu_buddy_free_list(&mm, &free_list, GPU_BUDDY_CLEARED);
 
 	list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link)
 		list_move(&block->link, &reverse_list);
-	drm_buddy_free_list(&mm, &reverse_list, DRM_BUDDY_CLEARED);
+	gpu_buddy_free_list(&mm, &reverse_list, GPU_BUDDY_CLEARED);
 
 	end = ktime_get();
 	elapsed_ms = ktime_to_ms(ktime_sub(end, start));
 
 	kunit_info(test, "Reverse-ordered free took %lu ms\n", elapsed_ms);
 
-	drm_buddy_fini(&mm);
+	gpu_buddy_fini(&mm);
 }
 
-static void drm_test_buddy_alloc_range_bias(struct kunit *test)
+static void gpu_test_buddy_alloc_range_bias(struct kunit *test)
 {
 	u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem;
-	DRM_RND_STATE(prng, random_seed);
+	GPU_RND_STATE(prng, random_seed);
 	unsigned int i, count, *order;
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	unsigned long flags;
-	struct drm_buddy mm;
+	struct gpu_buddy mm;
 	LIST_HEAD(allocated);
 
 	bias_size = SZ_1M;
@@ -142,11 +142,11 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
 
 	kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps);
 
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
 			       "buddy_init failed\n");
 
 	count = mm_size / bias_size;
-	order = drm_random_order(count, &prng);
+	order = gpu_random_order(count, &prng);
 	KUNIT_EXPECT_TRUE(test, order);
 
 	/*
@@ -166,79 +166,79 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
 
 		/* internal round_up too big */
 		KUNIT_ASSERT_TRUE_MSG(test,
-				      drm_buddy_alloc_blocks(&mm, bias_start,
+				      gpu_buddy_alloc_blocks(&mm, bias_start,
 							     bias_end, bias_size + ps, bias_size,
 							     &allocated,
-							     DRM_BUDDY_RANGE_ALLOCATION),
+							     GPU_BUDDY_RANGE_ALLOCATION),
 				      "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
 				      bias_start, bias_end, bias_size, bias_size);
 
 		/* size too big */
 		KUNIT_ASSERT_TRUE_MSG(test,
-				      drm_buddy_alloc_blocks(&mm, bias_start,
+				      gpu_buddy_alloc_blocks(&mm, bias_start,
 							     bias_end, bias_size + ps, ps,
 							     &allocated,
-							     DRM_BUDDY_RANGE_ALLOCATION),
+							     GPU_BUDDY_RANGE_ALLOCATION),
 				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
 				      bias_start, bias_end, bias_size + ps, ps);
 
 		/* bias range too small for size */
 		KUNIT_ASSERT_TRUE_MSG(test,
-				      drm_buddy_alloc_blocks(&mm, bias_start + ps,
+				      gpu_buddy_alloc_blocks(&mm, bias_start + ps,
 							     bias_end, bias_size, ps,
 							     &allocated,
-							     DRM_BUDDY_RANGE_ALLOCATION),
+							     GPU_BUDDY_RANGE_ALLOCATION),
 				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
 				      bias_start + ps, bias_end, bias_size, ps);
 
 		/* bias misaligned */
 		KUNIT_ASSERT_TRUE_MSG(test,
-				      drm_buddy_alloc_blocks(&mm, bias_start + ps,
+				      gpu_buddy_alloc_blocks(&mm, bias_start + ps,
 							     bias_end - ps,
 							     bias_size >> 1, bias_size >> 1,
 							     &allocated,
-							     DRM_BUDDY_RANGE_ALLOCATION),
+							     GPU_BUDDY_RANGE_ALLOCATION),
 				      "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n",
 				      bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1);
 
 		/* single big page */
 		KUNIT_ASSERT_FALSE_MSG(test,
-				       drm_buddy_alloc_blocks(&mm, bias_start,
+				       gpu_buddy_alloc_blocks(&mm, bias_start,
 							      bias_end, bias_size, bias_size,
 							      &tmp,
-							      DRM_BUDDY_RANGE_ALLOCATION),
+							      GPU_BUDDY_RANGE_ALLOCATION),
 				       "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n",
 				       bias_start, bias_end, bias_size, bias_size);
-		drm_buddy_free_list(&mm, &tmp, 0);
+		gpu_buddy_free_list(&mm, &tmp, 0);
 
 		/* single page with internal round_up */
 		KUNIT_ASSERT_FALSE_MSG(test,
-				       drm_buddy_alloc_blocks(&mm, bias_start,
+				       gpu_buddy_alloc_blocks(&mm, bias_start,
 							      bias_end, ps, bias_size,
 							      &tmp,
-							      DRM_BUDDY_RANGE_ALLOCATION),
+							      GPU_BUDDY_RANGE_ALLOCATION),
 				       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
 				       bias_start, bias_end, ps, bias_size);
-		drm_buddy_free_list(&mm, &tmp, 0);
+		gpu_buddy_free_list(&mm, &tmp, 0);
 
 		/* random size within */
 		size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
 		if (size)
 			KUNIT_ASSERT_FALSE_MSG(test,
-					       drm_buddy_alloc_blocks(&mm, bias_start,
+					       gpu_buddy_alloc_blocks(&mm, bias_start,
 								      bias_end, size, ps,
 								      &tmp,
-								      DRM_BUDDY_RANGE_ALLOCATION),
+								      GPU_BUDDY_RANGE_ALLOCATION),
 					       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
 					       bias_start, bias_end, size, ps);
 
 		bias_rem -= size;
 		/* too big for current avail */
 		KUNIT_ASSERT_TRUE_MSG(test,
-				      drm_buddy_alloc_blocks(&mm, bias_start,
+				      gpu_buddy_alloc_blocks(&mm, bias_start,
 							     bias_end, bias_rem + ps, ps,
 							     &allocated,
-							     DRM_BUDDY_RANGE_ALLOCATION),
+							     GPU_BUDDY_RANGE_ALLOCATION),
 				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
 				      bias_start, bias_end, bias_rem + ps, ps);
 
@@ -248,10 +248,10 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
 			size = max(size, ps);
 
 			KUNIT_ASSERT_FALSE_MSG(test,
-					       drm_buddy_alloc_blocks(&mm, bias_start,
+					       gpu_buddy_alloc_blocks(&mm, bias_start,
 								      bias_end, size, ps,
 								      &allocated,
-								      DRM_BUDDY_RANGE_ALLOCATION),
+								      GPU_BUDDY_RANGE_ALLOCATION),
 					       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
 					       bias_start, bias_end, size, ps);
 			/*
@@ -259,15 +259,15 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
 			 * unallocated, and ideally not always on the bias
 			 * boundaries.
 			 */
-			drm_buddy_free_list(&mm, &tmp, 0);
+			gpu_buddy_free_list(&mm, &tmp, 0);
 		} else {
 			list_splice_tail(&tmp, &allocated);
 		}
 	}
 
 	kfree(order);
-	drm_buddy_free_list(&mm, &allocated, 0);
-	drm_buddy_fini(&mm);
+	gpu_buddy_free_list(&mm, &allocated, 0);
+	gpu_buddy_fini(&mm);
 
 	/*
 	 * Something more free-form. Idea is to pick a random starting bias
@@ -278,7 +278,7 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
 	 * allocated nodes in the middle of the address space.
 	 */
 
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
 			       "buddy_init failed\n");
 
 	bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
@@ -290,10 +290,10 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
 		u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
 
 		KUNIT_ASSERT_FALSE_MSG(test,
-				       drm_buddy_alloc_blocks(&mm, bias_start,
+				       gpu_buddy_alloc_blocks(&mm, bias_start,
 							      bias_end, size, ps,
 							      &allocated,
-							      DRM_BUDDY_RANGE_ALLOCATION),
+							      GPU_BUDDY_RANGE_ALLOCATION),
 				       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
 				       bias_start, bias_end, size, ps);
 		bias_rem -= size;
@@ -319,24 +319,24 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
 	KUNIT_ASSERT_EQ(test, bias_start, 0);
 	KUNIT_ASSERT_EQ(test, bias_end, mm_size);
 	KUNIT_ASSERT_TRUE_MSG(test,
-			      drm_buddy_alloc_blocks(&mm, bias_start, bias_end,
+			      gpu_buddy_alloc_blocks(&mm, bias_start, bias_end,
 						     ps, ps,
 						     &allocated,
-						     DRM_BUDDY_RANGE_ALLOCATION),
+						     GPU_BUDDY_RANGE_ALLOCATION),
 			      "buddy_alloc passed with bias(%x-%x), size=%u\n",
 			      bias_start, bias_end, ps);
 
-	drm_buddy_free_list(&mm, &allocated, 0);
-	drm_buddy_fini(&mm);
+	gpu_buddy_free_list(&mm, &allocated, 0);
+	gpu_buddy_fini(&mm);
 
 	/*
-	 * Allocate cleared blocks in the bias range when the DRM buddy's clear avail is
+	 * Allocate cleared blocks in the bias range when the GPU buddy's clear avail is
 	 * zero. This will validate the bias range allocation in scenarios like system boot
 	 * when no cleared blocks are available and exercise the fallback path too. The resulting
 	 * blocks should always be dirty.
 	 */
 
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
 			       "buddy_init failed\n");
 
 	bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
@@ -344,11 +344,11 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
 	bias_end = max(bias_end, bias_start + ps);
 	bias_rem = bias_end - bias_start;
 
-	flags = DRM_BUDDY_CLEAR_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION;
+	flags = GPU_BUDDY_CLEAR_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION;
 	size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
 
 	KUNIT_ASSERT_FALSE_MSG(test,
-			       drm_buddy_alloc_blocks(&mm, bias_start,
+			       gpu_buddy_alloc_blocks(&mm, bias_start,
 						      bias_end, size, ps,
 						      &allocated,
 						      flags),
@@ -356,27 +356,27 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
 			       bias_start, bias_end, size, ps);
 
 	list_for_each_entry(block, &allocated, link)
-		KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+		KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
 
-	drm_buddy_free_list(&mm, &allocated, 0);
-	drm_buddy_fini(&mm);
+	gpu_buddy_free_list(&mm, &allocated, 0);
+	gpu_buddy_fini(&mm);
 }
 
-static void drm_test_buddy_alloc_clear(struct kunit *test)
+static void gpu_test_buddy_alloc_clear(struct kunit *test)
 {
 	unsigned long n_pages, total, i = 0;
 	const unsigned long ps = SZ_4K;
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	const int max_order = 12;
 	LIST_HEAD(allocated);
-	struct drm_buddy mm;
+	struct gpu_buddy mm;
 	unsigned int order;
 	u32 mm_size, size;
 	LIST_HEAD(dirty);
 	LIST_HEAD(clean);
 
 	mm_size = SZ_4K << max_order;
-	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
 
 	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
 
@@ -389,11 +389,11 @@ static void drm_test_buddy_alloc_clear(struct kunit *test)
 	 * is indeed all dirty pages and vice versa. Free it all again,
 	 * keeping the dirty/clear status.
 	 */
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
 							    5 * ps, ps, &allocated,
-							    DRM_BUDDY_TOPDOWN_ALLOCATION),
+							    GPU_BUDDY_TOPDOWN_ALLOCATION),
 				"buddy_alloc hit an error size=%lu\n", 5 * ps);
-	drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
+	gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
 
 	n_pages = 10;
 	do {
@@ -406,37 +406,37 @@ static void drm_test_buddy_alloc_clear(struct kunit *test)
 			flags = 0;
 		} else {
 			list = &clean;
-			flags = DRM_BUDDY_CLEAR_ALLOCATION;
+			flags = GPU_BUDDY_CLEAR_ALLOCATION;
 		}
 
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
 								    ps, ps, list,
 								    flags),
 					"buddy_alloc hit an error size=%lu\n", ps);
 	} while (++i < n_pages);
 
 	list_for_each_entry(block, &clean, link)
-		KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), true);
+		KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), true);
 
 	list_for_each_entry(block, &dirty, link)
-		KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+		KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
 
-	drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED);
+	gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
 
 	/*
 	 * Trying to go over the clear limit for some allocation.
 	 * The allocation should never fail with reasonable page-size.
 	 */
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
 							    10 * ps, ps, &clean,
-							    DRM_BUDDY_CLEAR_ALLOCATION),
+							    GPU_BUDDY_CLEAR_ALLOCATION),
 				"buddy_alloc hit an error size=%lu\n", 10 * ps);
 
-	drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED);
-	drm_buddy_free_list(&mm, &dirty, 0);
-	drm_buddy_fini(&mm);
+	gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
+	gpu_buddy_free_list(&mm, &dirty, 0);
+	gpu_buddy_fini(&mm);
 
-	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
 
 	/*
 	 * Create a new mm. Intentionally fragment the address space by creating
@@ -458,34 +458,34 @@ static void drm_test_buddy_alloc_clear(struct kunit *test)
 		else
 			list = &clean;
 
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
 								    ps, ps, list, 0),
 					"buddy_alloc hit an error size=%lu\n", ps);
 	} while (++i < n_pages);
 
-	drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED);
-	drm_buddy_free_list(&mm, &dirty, 0);
+	gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
+	gpu_buddy_free_list(&mm, &dirty, 0);
 
 	order = 1;
 	do {
 		size = SZ_4K << order;
 
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
 								    size, size, &allocated,
-								    DRM_BUDDY_CLEAR_ALLOCATION),
+								    GPU_BUDDY_CLEAR_ALLOCATION),
 					"buddy_alloc hit an error size=%u\n", size);
 		total = 0;
 		list_for_each_entry(block, &allocated, link) {
 			if (size != mm_size)
-				KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
-			total += drm_buddy_block_size(&mm, block);
+				KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
+			total += gpu_buddy_block_size(&mm, block);
 		}
 		KUNIT_EXPECT_EQ(test, total, size);
 
-		drm_buddy_free_list(&mm, &allocated, 0);
+		gpu_buddy_free_list(&mm, &allocated, 0);
 	} while (++order <= max_order);
 
-	drm_buddy_fini(&mm);
+	gpu_buddy_fini(&mm);
 
 	/*
 	 * Create a new mm with a non power-of-two size. Allocate a random size from each
@@ -494,44 +494,44 @@ static void drm_test_buddy_alloc_clear(struct kunit *test)
 	 */
 	mm_size = (SZ_4K << max_order) + (SZ_4K << (max_order - 2));
 
-	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
 	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
 							    4 * ps, ps, &allocated,
-							    DRM_BUDDY_RANGE_ALLOCATION),
+							    GPU_BUDDY_RANGE_ALLOCATION),
 				"buddy_alloc hit an error size=%lu\n", 4 * ps);
-	drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
+	gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
 							    2 * ps, ps, &allocated,
-							    DRM_BUDDY_CLEAR_ALLOCATION),
+							    GPU_BUDDY_CLEAR_ALLOCATION),
 				"buddy_alloc hit an error size=%lu\n", 2 * ps);
-	drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size,
+	gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size,
 							    ps, ps, &allocated,
-							    DRM_BUDDY_RANGE_ALLOCATION),
+							    GPU_BUDDY_RANGE_ALLOCATION),
 				"buddy_alloc hit an error size=%lu\n", ps);
-	drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
-	drm_buddy_fini(&mm);
+	gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
+	gpu_buddy_fini(&mm);
 }
 
-static void drm_test_buddy_alloc_contiguous(struct kunit *test)
+static void gpu_test_buddy_alloc_contiguous(struct kunit *test)
 {
 	const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K;
 	unsigned long i, n_pages, total;
-	struct drm_buddy_block *block;
-	struct drm_buddy mm;
+	struct gpu_buddy_block *block;
+	struct gpu_buddy mm;
 	LIST_HEAD(left);
 	LIST_HEAD(middle);
 	LIST_HEAD(right);
 	LIST_HEAD(allocated);
 
-	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
 
 	/*
 	 * Idea is to fragment the address space by alternating block
 	 * allocations between three different lists; one for left, middle and
 	 * right. We can then free a list to simulate fragmentation. In
-	 * particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
+	 * particular we want to exercise the GPU_BUDDY_CONTIGUOUS_ALLOCATION,
 	 * including the try_harder path.
 	 */
 
@@ -548,66 +548,66 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test)
 		else
 			list = &right;
 		KUNIT_ASSERT_FALSE_MSG(test,
-				       drm_buddy_alloc_blocks(&mm, 0, mm_size,
+				       gpu_buddy_alloc_blocks(&mm, 0, mm_size,
 							      ps, ps, list, 0),
 				       "buddy_alloc hit an error size=%lu\n",
 				       ps);
 	} while (++i < n_pages);
 
-	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
 							   3 * ps, ps, &allocated,
-							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+							   GPU_BUDDY_CONTIGUOUS_ALLOCATION),
 			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
 
-	drm_buddy_free_list(&mm, &middle, 0);
-	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+	gpu_buddy_free_list(&mm, &middle, 0);
+	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
 							   3 * ps, ps, &allocated,
-							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+							   GPU_BUDDY_CONTIGUOUS_ALLOCATION),
 			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
-	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
 							   2 * ps, ps, &allocated,
-							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+							   GPU_BUDDY_CONTIGUOUS_ALLOCATION),
 			       "buddy_alloc didn't error size=%lu\n", 2 * ps);
 
-	drm_buddy_free_list(&mm, &right, 0);
-	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+	gpu_buddy_free_list(&mm, &right, 0);
+	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
 							   3 * ps, ps, &allocated,
-							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+							   GPU_BUDDY_CONTIGUOUS_ALLOCATION),
 			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
 	/*
 	 * At this point we should have enough contiguous space for 2 blocks,
 	 * however they are never buddies (since we freed middle and right) so
 	 * will require the try_harder logic to find them.
 	 */
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
 							    2 * ps, ps, &allocated,
-							    DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+							    GPU_BUDDY_CONTIGUOUS_ALLOCATION),
 			       "buddy_alloc hit an error size=%lu\n", 2 * ps);
 
-	drm_buddy_free_list(&mm, &left, 0);
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+	gpu_buddy_free_list(&mm, &left, 0);
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
 							    3 * ps, ps, &allocated,
-							    DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+							    GPU_BUDDY_CONTIGUOUS_ALLOCATION),
 			       "buddy_alloc hit an error size=%lu\n", 3 * ps);
 
 	total = 0;
 	list_for_each_entry(block, &allocated, link)
-		total += drm_buddy_block_size(&mm, block);
+		total += gpu_buddy_block_size(&mm, block);
 
 	KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
 
-	drm_buddy_free_list(&mm, &allocated, 0);
-	drm_buddy_fini(&mm);
+	gpu_buddy_free_list(&mm, &allocated, 0);
+	gpu_buddy_fini(&mm);
 }
 
-static void drm_test_buddy_alloc_pathological(struct kunit *test)
+static void gpu_test_buddy_alloc_pathological(struct kunit *test)
 {
 	u64 mm_size, size, start = 0;
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	const int max_order = 3;
 	unsigned long flags = 0;
 	int order, top;
-	struct drm_buddy mm;
+	struct gpu_buddy mm;
 	LIST_HEAD(blocks);
 	LIST_HEAD(holes);
 	LIST_HEAD(tmp);
@@ -620,7 +620,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
 	 */
 
 	mm_size = SZ_4K << max_order;
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
 			       "buddy_init failed\n");
 
 	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
@@ -630,18 +630,18 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
 		block = list_first_entry_or_null(&blocks, typeof(*block), link);
 		if (block) {
 			list_del(&block->link);
-			drm_buddy_free_block(&mm, block);
+			gpu_buddy_free_block(&mm, block);
 		}
 
 		for (order = top; order--;) {
 			size = get_size(order, mm.chunk_size);
-			KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start,
+			KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start,
 									    mm_size, size, size,
 										&tmp, flags),
 					"buddy_alloc hit -ENOMEM with order=%d, top=%d\n",
 					order, top);
 
-			block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+			block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
 			KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
 
 			list_move_tail(&block->link, &blocks);
@@ -649,45 +649,45 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
 
 		/* There should be one final page for this sub-allocation */
 		size = get_size(0, mm.chunk_size);
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
 								    size, size, &tmp, flags),
 							   "buddy_alloc hit -ENOMEM for hole\n");
 
-		block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+		block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
 		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
 
 		list_move_tail(&block->link, &holes);
 
 		size = get_size(top, mm.chunk_size);
-		KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+		KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
 								   size, size, &tmp, flags),
 							  "buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!",
 							  top, max_order);
 	}
 
-	drm_buddy_free_list(&mm, &holes, 0);
+	gpu_buddy_free_list(&mm, &holes, 0);
 
 	/* Nothing larger than blocks of chunk_size now available */
 	for (order = 1; order <= max_order; order++) {
 		size = get_size(order, mm.chunk_size);
-		KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+		KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
 								   size, size, &tmp, flags),
 							  "buddy_alloc unexpectedly succeeded at order %d, it should be full!",
 							  order);
 	}
 
 	list_splice_tail(&holes, &blocks);
-	drm_buddy_free_list(&mm, &blocks, 0);
-	drm_buddy_fini(&mm);
+	gpu_buddy_free_list(&mm, &blocks, 0);
+	gpu_buddy_fini(&mm);
 }
 
-static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
+static void gpu_test_buddy_alloc_pessimistic(struct kunit *test)
 {
 	u64 mm_size, size, start = 0;
-	struct drm_buddy_block *block, *bn;
+	struct gpu_buddy_block *block, *bn;
 	const unsigned int max_order = 16;
 	unsigned long flags = 0;
-	struct drm_buddy mm;
+	struct gpu_buddy mm;
 	unsigned int order;
 	LIST_HEAD(blocks);
 	LIST_HEAD(tmp);
@@ -699,19 +699,19 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
 	 */
 
 	mm_size = SZ_4K << max_order;
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
 			       "buddy_init failed\n");
 
 	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
 
 	for (order = 0; order < max_order; order++) {
 		size = get_size(order, mm.chunk_size);
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
 								    size, size, &tmp, flags),
 							   "buddy_alloc hit -ENOMEM with order=%d\n",
 							   order);
 
-		block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+		block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
 		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
 
 		list_move_tail(&block->link, &blocks);
@@ -719,11 +719,11 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
 
 	/* And now the last remaining block available */
 	size = get_size(0, mm.chunk_size);
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
 							    size, size, &tmp, flags),
 						   "buddy_alloc hit -ENOMEM on final alloc\n");
 
-	block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+	block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
 	KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
 
 	list_move_tail(&block->link, &blocks);
@@ -731,58 +731,58 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
 	/* Should be completely full! */
 	for (order = max_order; order--;) {
 		size = get_size(order, mm.chunk_size);
-		KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+		KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
 								   size, size, &tmp, flags),
 							  "buddy_alloc unexpectedly succeeded, it should be full!");
 	}
 
 	block = list_last_entry(&blocks, typeof(*block), link);
 	list_del(&block->link);
-	drm_buddy_free_block(&mm, block);
+	gpu_buddy_free_block(&mm, block);
 
 	/* As we free in increasing size, we make available larger blocks */
 	order = 1;
 	list_for_each_entry_safe(block, bn, &blocks, link) {
 		list_del(&block->link);
-		drm_buddy_free_block(&mm, block);
+		gpu_buddy_free_block(&mm, block);
 
 		size = get_size(order, mm.chunk_size);
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
 								    size, size, &tmp, flags),
 							   "buddy_alloc hit -ENOMEM with order=%d\n",
 							   order);
 
-		block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+		block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
 		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
 
 		list_del(&block->link);
-		drm_buddy_free_block(&mm, block);
+		gpu_buddy_free_block(&mm, block);
 		order++;
 	}
 
 	/* To confirm, now the whole mm should be available */
 	size = get_size(max_order, mm.chunk_size);
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
 							    size, size, &tmp, flags),
 						   "buddy_alloc (realloc) hit -ENOMEM with order=%d\n",
 						   max_order);
 
-	block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+	block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
 	KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
 
 	list_del(&block->link);
-	drm_buddy_free_block(&mm, block);
-	drm_buddy_free_list(&mm, &blocks, 0);
-	drm_buddy_fini(&mm);
+	gpu_buddy_free_block(&mm, block);
+	gpu_buddy_free_list(&mm, &blocks, 0);
+	gpu_buddy_fini(&mm);
 }
 
-static void drm_test_buddy_alloc_optimistic(struct kunit *test)
+static void gpu_test_buddy_alloc_optimistic(struct kunit *test)
 {
 	u64 mm_size, size, start = 0;
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	unsigned long flags = 0;
 	const int max_order = 16;
-	struct drm_buddy mm;
+	struct gpu_buddy mm;
 	LIST_HEAD(blocks);
 	LIST_HEAD(tmp);
 	int order;
@@ -794,19 +794,19 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test)
 
 	mm_size = SZ_4K * ((1 << (max_order + 1)) - 1);
 
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
 			       "buddy_init failed\n");
 
 	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
 
 	for (order = 0; order <= max_order; order++) {
 		size = get_size(order, mm.chunk_size);
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
 								    size, size, &tmp, flags),
 							   "buddy_alloc hit -ENOMEM with order=%d\n",
 							   order);
 
-		block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+		block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
 		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
 
 		list_move_tail(&block->link, &blocks);
@@ -814,115 +814,115 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test)
 
 	/* Should be completely full! */
 	size = get_size(0, mm.chunk_size);
-	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
 							   size, size, &tmp, flags),
 						  "buddy_alloc unexpectedly succeeded, it should be full!");
 
-	drm_buddy_free_list(&mm, &blocks, 0);
-	drm_buddy_fini(&mm);
+	gpu_buddy_free_list(&mm, &blocks, 0);
+	gpu_buddy_fini(&mm);
 }
 
-static void drm_test_buddy_alloc_limit(struct kunit *test)
+static void gpu_test_buddy_alloc_limit(struct kunit *test)
 {
 	u64 size = U64_MAX, start = 0;
-	struct drm_buddy_block *block;
+	struct gpu_buddy_block *block;
 	unsigned long flags = 0;
 	LIST_HEAD(allocated);
-	struct drm_buddy mm;
+	struct gpu_buddy mm;
 
-	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, size, SZ_4K));
+	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, size, SZ_4K));
 
-	KUNIT_EXPECT_EQ_MSG(test, mm.max_order, DRM_BUDDY_MAX_ORDER,
+	KUNIT_EXPECT_EQ_MSG(test, mm.max_order, GPU_BUDDY_MAX_ORDER,
 			    "mm.max_order(%d) != %d\n", mm.max_order,
-						DRM_BUDDY_MAX_ORDER);
+						GPU_BUDDY_MAX_ORDER);
 
 	size = mm.chunk_size << mm.max_order;
-	KUNIT_EXPECT_FALSE(test, drm_buddy_alloc_blocks(&mm, start, size, size,
+	KUNIT_EXPECT_FALSE(test, gpu_buddy_alloc_blocks(&mm, start, size, size,
 							mm.chunk_size, &allocated, flags));
 
-	block = list_first_entry_or_null(&allocated, struct drm_buddy_block, link);
+	block = list_first_entry_or_null(&allocated, struct gpu_buddy_block, link);
 	KUNIT_EXPECT_TRUE(test, block);
 
-	KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_order(block), mm.max_order,
+	KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_order(block), mm.max_order,
 			    "block order(%d) != %d\n",
-						drm_buddy_block_order(block), mm.max_order);
+						gpu_buddy_block_order(block), mm.max_order);
 
-	KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_size(&mm, block),
+	KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_size(&mm, block),
 			    BIT_ULL(mm.max_order) * mm.chunk_size,
 						"block size(%llu) != %llu\n",
-						drm_buddy_block_size(&mm, block),
+						gpu_buddy_block_size(&mm, block),
 						BIT_ULL(mm.max_order) * mm.chunk_size);
 
-	drm_buddy_free_list(&mm, &allocated, 0);
-	drm_buddy_fini(&mm);
+	gpu_buddy_free_list(&mm, &allocated, 0);
+	gpu_buddy_fini(&mm);
 }
 
-static void drm_test_buddy_alloc_exceeds_max_order(struct kunit *test)
+static void gpu_test_buddy_alloc_exceeds_max_order(struct kunit *test)
 {
 	u64 mm_size = SZ_8G + SZ_2G, size = SZ_8G + SZ_1G, min_block_size = SZ_8G;
-	struct drm_buddy mm;
+	struct gpu_buddy mm;
 	LIST_HEAD(blocks);
 	int err;
 
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
 			       "buddy_init failed\n");
 
 	/* CONTIGUOUS allocation should succeed via try_harder fallback */
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, size,
+	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, size,
 							    SZ_4K, &blocks,
-							    DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+							    GPU_BUDDY_CONTIGUOUS_ALLOCATION),
 			       "buddy_alloc hit an error size=%llu\n", size);
-	drm_buddy_free_list(&mm, &blocks, 0);
+	gpu_buddy_free_list(&mm, &blocks, 0);
 
 	/* Non-CONTIGUOUS with large min_block_size should return -EINVAL */
-	err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0);
+	err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0);
 	KUNIT_EXPECT_EQ(test, err, -EINVAL);
 
 	/* Non-CONTIGUOUS + RANGE with large min_block_size should return -EINVAL */
-	err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks,
-				     DRM_BUDDY_RANGE_ALLOCATION);
+	err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks,
+				     GPU_BUDDY_RANGE_ALLOCATION);
 	KUNIT_EXPECT_EQ(test, err, -EINVAL);
 
 	/* CONTIGUOUS + RANGE should return -EINVAL (no try_harder for RANGE) */
-	err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks,
-				     DRM_BUDDY_CONTIGUOUS_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION);
+	err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks,
+				     GPU_BUDDY_CONTIGUOUS_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION);
 	KUNIT_EXPECT_EQ(test, err, -EINVAL);
 
-	drm_buddy_fini(&mm);
+	gpu_buddy_fini(&mm);
 }
 
-static int drm_buddy_suite_init(struct kunit_suite *suite)
+static int gpu_buddy_suite_init(struct kunit_suite *suite)
 {
 	while (!random_seed)
 		random_seed = get_random_u32();
 
-	kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n",
+	kunit_info(suite, "Testing GPU buddy manager, with random_seed=0x%x\n",
 		   random_seed);
 
 	return 0;
 }
 
-static struct kunit_case drm_buddy_tests[] = {
-	KUNIT_CASE(drm_test_buddy_alloc_limit),
-	KUNIT_CASE(drm_test_buddy_alloc_optimistic),
-	KUNIT_CASE(drm_test_buddy_alloc_pessimistic),
-	KUNIT_CASE(drm_test_buddy_alloc_pathological),
-	KUNIT_CASE(drm_test_buddy_alloc_contiguous),
-	KUNIT_CASE(drm_test_buddy_alloc_clear),
-	KUNIT_CASE(drm_test_buddy_alloc_range_bias),
-	KUNIT_CASE(drm_test_buddy_fragmentation_performance),
-	KUNIT_CASE(drm_test_buddy_alloc_exceeds_max_order),
+static struct kunit_case gpu_buddy_tests[] = {
+	KUNIT_CASE(gpu_test_buddy_alloc_limit),
+	KUNIT_CASE(gpu_test_buddy_alloc_optimistic),
+	KUNIT_CASE(gpu_test_buddy_alloc_pessimistic),
+	KUNIT_CASE(gpu_test_buddy_alloc_pathological),
+	KUNIT_CASE(gpu_test_buddy_alloc_contiguous),
+	KUNIT_CASE(gpu_test_buddy_alloc_clear),
+	KUNIT_CASE(gpu_test_buddy_alloc_range_bias),
+	KUNIT_CASE(gpu_test_buddy_fragmentation_performance),
+	KUNIT_CASE(gpu_test_buddy_alloc_exceeds_max_order),
 	{}
 };
 
-static struct kunit_suite drm_buddy_test_suite = {
-	.name = "drm_buddy",
-	.suite_init = drm_buddy_suite_init,
-	.test_cases = drm_buddy_tests,
+static struct kunit_suite gpu_buddy_test_suite = {
+	.name = "gpu_buddy",
+	.suite_init = gpu_buddy_suite_init,
+	.test_cases = gpu_buddy_tests,
 };
 
-kunit_test_suite(drm_buddy_test_suite);
+kunit_test_suite(gpu_buddy_test_suite);
 
 MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Kunit test for drm_buddy functions");
+MODULE_DESCRIPTION("Kunit test for gpu_buddy functions");
 MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/tests/gpu_random.c b/drivers/gpu/tests/gpu_random.c
index ddd1f594b5d5..6356372f7e52 100644
--- a/drivers/gpu/tests/gpu_random.c
+++ b/drivers/gpu/tests/gpu_random.c
@@ -8,26 +8,26 @@
 
 #include "gpu_random.h"
 
-u32 drm_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state)
+u32 gpu_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state)
 {
 	return upper_32_bits((u64)prandom_u32_state(state) * ep_ro);
 }
-EXPORT_SYMBOL(drm_prandom_u32_max_state);
+EXPORT_SYMBOL(gpu_prandom_u32_max_state);
 
-void drm_random_reorder(unsigned int *order, unsigned int count,
+void gpu_random_reorder(unsigned int *order, unsigned int count,
 			struct rnd_state *state)
 {
 	unsigned int i, j;
 
 	for (i = 0; i < count; ++i) {
 		BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32));
-		j = drm_prandom_u32_max_state(count, state);
+		j = gpu_prandom_u32_max_state(count, state);
 		swap(order[i], order[j]);
 	}
 }
-EXPORT_SYMBOL(drm_random_reorder);
+EXPORT_SYMBOL(gpu_random_reorder);
 
-unsigned int *drm_random_order(unsigned int count, struct rnd_state *state)
+unsigned int *gpu_random_order(unsigned int count, struct rnd_state *state)
 {
 	unsigned int *order, i;
 
@@ -38,7 +38,7 @@ unsigned int *drm_random_order(unsigned int count, struct rnd_state *state)
 	for (i = 0; i < count; i++)
 		order[i] = i;
 
-	drm_random_reorder(order, count, state);
+	gpu_random_reorder(order, count, state);
 	return order;
 }
-EXPORT_SYMBOL(drm_random_order);
+EXPORT_SYMBOL(gpu_random_order);
diff --git a/drivers/gpu/tests/gpu_random.h b/drivers/gpu/tests/gpu_random.h
index 9f827260a89d..b68cf3448264 100644
--- a/drivers/gpu/tests/gpu_random.h
+++ b/drivers/gpu/tests/gpu_random.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __DRM_RANDOM_H__
-#define __DRM_RANDOM_H__
+#ifndef __GPU_RANDOM_H__
+#define __GPU_RANDOM_H__
 
 /* This is a temporary home for a couple of utility functions that should
  * be transposed to lib/ at the earliest convenience.
@@ -8,21 +8,21 @@
 
 #include <linux/prandom.h>
 
-#define DRM_RND_STATE_INITIALIZER(seed__) ({				\
+#define GPU_RND_STATE_INITIALIZER(seed__) ({				\
 	struct rnd_state state__;					\
 	prandom_seed_state(&state__, (seed__));				\
 	state__;							\
 })
 
-#define DRM_RND_STATE(name__, seed__) \
-	struct rnd_state name__ = DRM_RND_STATE_INITIALIZER(seed__)
+#define GPU_RND_STATE(name__, seed__) \
+	struct rnd_state name__ = GPU_RND_STATE_INITIALIZER(seed__)
 
-unsigned int *drm_random_order(unsigned int count,
+unsigned int *gpu_random_order(unsigned int count,
 			       struct rnd_state *state);
-void drm_random_reorder(unsigned int *order,
+void gpu_random_reorder(unsigned int *order,
 			unsigned int count,
 			struct rnd_state *state);
-u32 drm_prandom_u32_max_state(u32 ep_ro,
+u32 gpu_prandom_u32_max_state(u32 ep_ro,
 			      struct rnd_state *state);
 
-#endif /* !__DRM_RANDOM_H__ */
+#endif /* !__GPU_RANDOM_H__ */
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index d51777df12d1..0adb1e2fa533 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -37,6 +37,7 @@ source "drivers/char/agp/Kconfig"
 
 source "drivers/gpu/vga/Kconfig"
 
+source "drivers/gpu/Kconfig"
 source "drivers/gpu/host1x/Kconfig"
 source "drivers/gpu/ipu-v3/Kconfig"
 source "drivers/gpu/nova-core/Kconfig"
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
new file mode 100644
index 000000000000..3054369bebff
--- /dev/null
+++ b/include/drm/drm_buddy.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __DRM_BUDDY_H__
+#define __DRM_BUDDY_H__
+
+#include <linux/gpu_buddy.h>
+
+struct drm_printer;
+
+/* DRM-specific GPU Buddy Allocator print helpers */
+void drm_buddy_print(struct gpu_buddy *mm, struct drm_printer *p);
+void drm_buddy_block_print(struct gpu_buddy *mm,
+			   struct gpu_buddy_block *block,
+			   struct drm_printer *p);
+#endif
diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
index b909fa8f810a..07ac65db6d2e 100644
--- a/include/linux/gpu_buddy.h
+++ b/include/linux/gpu_buddy.h
@@ -3,8 +3,8 @@
  * Copyright © 2021 Intel Corporation
  */
 
-#ifndef __DRM_BUDDY_H__
-#define __DRM_BUDDY_H__
+#ifndef __GPU_BUDDY_H__
+#define __GPU_BUDDY_H__
 
 #include <linux/bitops.h>
 #include <linux/list.h>
@@ -12,38 +12,45 @@
 #include <linux/sched.h>
 #include <linux/rbtree.h>
 
-struct drm_printer;
-
-#define DRM_BUDDY_RANGE_ALLOCATION		BIT(0)
-#define DRM_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
-#define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
-#define DRM_BUDDY_CLEAR_ALLOCATION		BIT(3)
-#define DRM_BUDDY_CLEARED			BIT(4)
-#define DRM_BUDDY_TRIM_DISABLE			BIT(5)
-
-struct drm_buddy_block {
-#define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
-#define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
-#define   DRM_BUDDY_ALLOCATED	   (1 << 10)
-#define   DRM_BUDDY_FREE	   (2 << 10)
-#define   DRM_BUDDY_SPLIT	   (3 << 10)
-#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
+#define GPU_BUDDY_RANGE_ALLOCATION		BIT(0)
+#define GPU_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
+#define GPU_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
+#define GPU_BUDDY_CLEAR_ALLOCATION		BIT(3)
+#define GPU_BUDDY_CLEARED			BIT(4)
+#define GPU_BUDDY_TRIM_DISABLE			BIT(5)
+
+enum gpu_buddy_free_tree {
+	GPU_BUDDY_CLEAR_TREE = 0,
+	GPU_BUDDY_DIRTY_TREE,
+	GPU_BUDDY_MAX_FREE_TREES,
+};
+
+#define for_each_free_tree(tree) \
+	for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++)
+
+struct gpu_buddy_block {
+#define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
+#define GPU_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
+#define   GPU_BUDDY_ALLOCATED	   (1 << 10)
+#define   GPU_BUDDY_FREE	   (2 << 10)
+#define   GPU_BUDDY_SPLIT	   (3 << 10)
+#define GPU_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
 /* Free to be used, if needed in the future */
-#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
-#define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
+#define GPU_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
+#define GPU_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
 	u64 header;
 
-	struct drm_buddy_block *left;
-	struct drm_buddy_block *right;
-	struct drm_buddy_block *parent;
+	struct gpu_buddy_block *left;
+	struct gpu_buddy_block *right;
+	struct gpu_buddy_block *parent;
 
 	void *private; /* owned by creator */
 
 	/*
-	 * While the block is allocated by the user through drm_buddy_alloc*,
+	 * While the block is allocated by the user through gpu_buddy_alloc*,
 	 * the user has ownership of the link, for example to maintain within
 	 * a list, if so desired. As soon as the block is freed with
-	 * drm_buddy_free* ownership is given back to the mm.
+	 * gpu_buddy_free* ownership is given back to the mm.
 	 */
 	union {
 		struct rb_node rb;
@@ -54,15 +61,15 @@ struct drm_buddy_block {
 };
 
 /* Order-zero must be at least SZ_4K */
-#define DRM_BUDDY_MAX_ORDER (63 - 12)
+#define GPU_BUDDY_MAX_ORDER (63 - 12)
 
 /*
  * Binary Buddy System.
  *
  * Locking should be handled by the user, a simple mutex around
- * drm_buddy_alloc* and drm_buddy_free* should suffice.
+ * gpu_buddy_alloc* and gpu_buddy_free* should suffice.
  */
-struct drm_buddy {
+struct gpu_buddy {
 	/* Maintain a free list for each order. */
 	struct rb_root **free_trees;
 
@@ -73,7 +80,7 @@ struct drm_buddy {
 	 * block.  Nodes are either allocated or free, in which case they will
 	 * also exist on the respective free list.
 	 */
-	struct drm_buddy_block **roots;
+	struct gpu_buddy_block **roots;
 
 	/*
 	 * Anything from here is public, and remains static for the lifetime of
@@ -90,82 +97,81 @@ struct drm_buddy {
 };
 
 static inline u64
-drm_buddy_block_offset(const struct drm_buddy_block *block)
+gpu_buddy_block_offset(const struct gpu_buddy_block *block)
 {
-	return block->header & DRM_BUDDY_HEADER_OFFSET;
+	return block->header & GPU_BUDDY_HEADER_OFFSET;
 }
 
 static inline unsigned int
-drm_buddy_block_order(struct drm_buddy_block *block)
+gpu_buddy_block_order(struct gpu_buddy_block *block)
 {
-	return block->header & DRM_BUDDY_HEADER_ORDER;
+	return block->header & GPU_BUDDY_HEADER_ORDER;
 }
 
 static inline unsigned int
-drm_buddy_block_state(struct drm_buddy_block *block)
+gpu_buddy_block_state(struct gpu_buddy_block *block)
 {
-	return block->header & DRM_BUDDY_HEADER_STATE;
+	return block->header & GPU_BUDDY_HEADER_STATE;
 }
 
 static inline bool
-drm_buddy_block_is_allocated(struct drm_buddy_block *block)
+gpu_buddy_block_is_allocated(struct gpu_buddy_block *block)
 {
-	return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
+	return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED;
 }
 
 static inline bool
-drm_buddy_block_is_clear(struct drm_buddy_block *block)
+gpu_buddy_block_is_clear(struct gpu_buddy_block *block)
 {
-	return block->header & DRM_BUDDY_HEADER_CLEAR;
+	return block->header & GPU_BUDDY_HEADER_CLEAR;
 }
 
 static inline bool
-drm_buddy_block_is_free(struct drm_buddy_block *block)
+gpu_buddy_block_is_free(struct gpu_buddy_block *block)
 {
-	return drm_buddy_block_state(block) == DRM_BUDDY_FREE;
+	return gpu_buddy_block_state(block) == GPU_BUDDY_FREE;
 }
 
 static inline bool
-drm_buddy_block_is_split(struct drm_buddy_block *block)
+gpu_buddy_block_is_split(struct gpu_buddy_block *block)
 {
-	return drm_buddy_block_state(block) == DRM_BUDDY_SPLIT;
+	return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
 }
 
 static inline u64
-drm_buddy_block_size(struct drm_buddy *mm,
-		     struct drm_buddy_block *block)
+gpu_buddy_block_size(struct gpu_buddy *mm,
+		     struct gpu_buddy_block *block)
 {
-	return mm->chunk_size << drm_buddy_block_order(block);
+	return mm->chunk_size << gpu_buddy_block_order(block);
 }
 
-int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
+int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size);
 
-void drm_buddy_fini(struct drm_buddy *mm);
+void gpu_buddy_fini(struct gpu_buddy *mm);
 
-struct drm_buddy_block *
-drm_get_buddy(struct drm_buddy_block *block);
+struct gpu_buddy_block *
+gpu_get_buddy(struct gpu_buddy_block *block);
 
-int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
 			   u64 start, u64 end, u64 size,
 			   u64 min_page_size,
 			   struct list_head *blocks,
 			   unsigned long flags);
 
-int drm_buddy_block_trim(struct drm_buddy *mm,
+int gpu_buddy_block_trim(struct gpu_buddy *mm,
 			 u64 *start,
 			 u64 new_size,
 			 struct list_head *blocks);
 
-void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear);
+void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear);
 
-void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
+void gpu_buddy_free_block(struct gpu_buddy *mm, struct gpu_buddy_block *block);
 
-void drm_buddy_free_list(struct drm_buddy *mm,
+void gpu_buddy_free_list(struct gpu_buddy *mm,
 			 struct list_head *objects,
 			 unsigned int flags);
 
-void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
-void drm_buddy_block_print(struct drm_buddy *mm,
-			   struct drm_buddy_block *block,
-			   struct drm_printer *p);
+void gpu_buddy_print(struct gpu_buddy *mm);
+void gpu_buddy_block_print(struct gpu_buddy *mm,
+			   struct gpu_buddy_block *block);
 #endif
-- 
cgit v1.2.3


From a69d1ab971a624c6f112cea61536569d579c3215 Mon Sep 17 00:00:00 2001
From: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Date: Tue, 10 Feb 2026 12:56:53 +0100
Subject: mm: Fix a hmm_range_fault() livelock / starvation problem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If hmm_range_fault() fails a folio_trylock() in do_swap_page,
trying to acquire the lock of a device-private folio for migration,
to ram, the function will spin until it succeeds grabbing the lock.

However, if the process holding the lock is depending on a work
item to be completed, which is scheduled on the same CPU as the
spinning hmm_range_fault(), that work item might be starved and
we end up in a livelock / starvation situation which is never
resolved.

This can happen, for example if the process holding the
device-private folio lock is stuck in
   migrate_device_unmap()->lru_add_drain_all()
sinc lru_add_drain_all() requires a short work-item
to be run on all online cpus to complete.

A prerequisite for this to happen is:
a) Both zone device and system memory folios are considered in
   migrate_device_unmap(), so that there is a reason to call
   lru_add_drain_all() for a system memory folio while a
   folio lock is held on a zone device folio.
b) The zone device folio has an initial mapcount > 1 which causes
   at least one migration PTE entry insertion to be deferred to
   try_to_migrate(), which can happen after the call to
   lru_add_drain_all().
c) No or voluntary only preemption.

This all seems pretty unlikely to happen, but indeed is hit by
the "xe_exec_system_allocator" igt test.

Resolve this by waiting for the folio to be unlocked if the
folio_trylock() fails in do_swap_page().

Rename migration_entry_wait_on_locked() to
softleaf_entry_wait_unlock() and update its documentation to
indicate the new use-case.

Future code improvements might consider moving
the lru_add_drain_all() call in migrate_device_unmap() to be
called *after* all pages have migration entries inserted.
That would eliminate also b) above.

v2:
- Instead of a cond_resched() in hmm_range_fault(),
  eliminate the problem by waiting for the folio to be unlocked
  in do_swap_page() (Alistair Popple, Andrew Morton)
v3:
- Add a stub migration_entry_wait_on_locked() for the
  !CONFIG_MIGRATION case. (Kernel Test Robot)
v4:
- Rename migrate_entry_wait_on_locked() to
  softleaf_entry_wait_on_locked() and update docs (Alistair Popple)
v5:
- Add a WARN_ON_ONCE() for the !CONFIG_MIGRATION
  version of softleaf_entry_wait_on_locked().
- Modify wording around function names in the commit message
  (Andrew Morton)

Suggested-by: Alistair Popple <apopple@nvidia.com>
Fixes: 1afaeb8293c9 ("mm/migrate: Trylock device page in do_swap_page")
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: linux-mm@kvack.org
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: <stable@vger.kernel.org> # v6.15+
Reviewed-by: John Hubbard <jhubbard@nvidia.com> #v3
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Link: https://patch.msgid.link/20260210115653.92413-1-thomas.hellstrom@linux.intel.com
---
 include/linux/migrate.h | 10 +++++++++-
 mm/filemap.c            | 15 ++++++++++-----
 mm/memory.c             |  3 ++-
 mm/migrate.c            |  8 ++++----
 mm/migrate_device.c     |  2 +-
 5 files changed, 26 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 26ca00c325d9..d5af2b7f577b 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -65,7 +65,7 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list);
 
 int migrate_huge_page_move_mapping(struct address_space *mapping,
 		struct folio *dst, struct folio *src);
-void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
+void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
 		__releases(ptl);
 void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
 int folio_migrate_mapping(struct address_space *mapping,
@@ -97,6 +97,14 @@ static inline int set_movable_ops(const struct movable_operations *ops, enum pag
 	return -ENOSYS;
 }
 
+static inline void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
+	__releases(ptl)
+{
+	WARN_ON_ONCE(1);
+
+	spin_unlock(ptl);
+}
+
 #endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/mm/filemap.c b/mm/filemap.c
index ebd75684cb0a..d98e4883f13d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1379,14 +1379,16 @@ repeat:
 
 #ifdef CONFIG_MIGRATION
 /**
- * migration_entry_wait_on_locked - Wait for a migration entry to be removed
- * @entry: migration swap entry.
+ * softleaf_entry_wait_on_locked - Wait for a migration entry or
+ * device_private entry to be removed.
+ * @entry: migration or device_private swap entry.
  * @ptl: already locked ptl. This function will drop the lock.
  *
- * Wait for a migration entry referencing the given page to be removed. This is
+ * Wait for a migration entry referencing the given page, or device_private
+ * entry referencing a dvice_private page to be unlocked. This is
  * equivalent to folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE) except
  * this can be called without taking a reference on the page. Instead this
- * should be called while holding the ptl for the migration entry referencing
+ * should be called while holding the ptl for @entry referencing
  * the page.
  *
  * Returns after unlocking the ptl.
@@ -1394,7 +1396,7 @@ repeat:
  * This follows the same logic as folio_wait_bit_common() so see the comments
  * there.
  */
-void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
+void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
 	__releases(ptl)
 {
 	struct wait_page_queue wait_page;
@@ -1428,6 +1430,9 @@ void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
 	 * If a migration entry exists for the page the migration path must hold
 	 * a valid reference to the page, and it must take the ptl to remove the
 	 * migration entry. So the page is valid until the ptl is dropped.
+	 * Similarly any path attempting to drop the last reference to a
+	 * device-private page needs to grab the ptl to remove the device-private
+	 * entry.
 	 */
 	spin_unlock(ptl);
 
diff --git a/mm/memory.c b/mm/memory.c
index 2a55edc48a65..0ad50df25846 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4681,7 +4681,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 				unlock_page(vmf->page);
 				put_page(vmf->page);
 			} else {
-				pte_unmap_unlock(vmf->pte, vmf->ptl);
+				pte_unmap(vmf->pte);
+				softleaf_entry_wait_on_locked(entry, vmf->ptl);
 			}
 		} else if (softleaf_is_hwpoison(entry)) {
 			ret = VM_FAULT_HWPOISON;
diff --git a/mm/migrate.c b/mm/migrate.c
index 5169f9717f60..75e384b042ef 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -499,7 +499,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 	if (!softleaf_is_migration(entry))
 		goto out;
 
-	migration_entry_wait_on_locked(entry, ptl);
+	softleaf_entry_wait_on_locked(entry, ptl);
 	return;
 out:
 	spin_unlock(ptl);
@@ -531,10 +531,10 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, p
 		 * If migration entry existed, safe to release vma lock
 		 * here because the pgtable page won't be freed without the
 		 * pgtable lock released.  See comment right above pgtable
-		 * lock release in migration_entry_wait_on_locked().
+		 * lock release in softleaf_entry_wait_on_locked().
 		 */
 		hugetlb_vma_unlock_read(vma);
-		migration_entry_wait_on_locked(entry, ptl);
+		softleaf_entry_wait_on_locked(entry, ptl);
 		return;
 	}
 
@@ -552,7 +552,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
 	ptl = pmd_lock(mm, pmd);
 	if (!pmd_is_migration_entry(*pmd))
 		goto unlock;
-	migration_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl);
+	softleaf_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl);
 	return;
 unlock:
 	spin_unlock(ptl);
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 23379663b1e1..deab89fd4541 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -176,7 +176,7 @@ static int migrate_vma_collect_huge_pmd(pmd_t *pmdp, unsigned long start,
 		}
 
 		if (softleaf_is_migration(entry)) {
-			migration_entry_wait_on_locked(entry, ptl);
+			softleaf_entry_wait_on_locked(entry, ptl);
 			spin_unlock(ptl);
 			return -EAGAIN;
 		}
-- 
cgit v1.2.3


From 022ac075088366b62e130da5e1b200bc93a47191 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Thu, 12 Feb 2026 13:34:22 -0800
Subject: bpf: use reg->var_off instead of reg->off for pointers

This commit consolidates static and varying pointer offset tracking
logic. All offsets are now represented solely using `.var_off` and
min/max fields. The reasons are twofold:
- This simplifies pointer tracking code, as each relevant function
  needs to check the `.var_off` field anyway.
- It makes it easier to widen pointer registers for the purpose of loop
  convergence checks, by forgoing the `regsafe()` logic demanding
  `.off` fields to be identical.

The changes are spread across many functions and are hard to group
into smaller patches. Some of the logical changes include:
- Checks in __check_ptr_off_reg() are reordered so that the
  tnum_is_const() check is done before operating on reg->var_off.value.
- check_packet_access() now uses check_mem_region_access() to handle
  possible 'off' overflow cases.
- In check_helper_mem_access() utility functions like
  check_packet_access() are now called with 'off=0', as these utility
  functions now account for the complete register offset range.
- In check_reg_type() a call to __check_ptr_off_reg() is added before
  a call to btf_struct_ids_match(). This prevents
  btf_struct_ids_match() from potentially working on non-constant
  reg->var_off.value.
- regsafe() is relaxed to avoid comparing '.off' field for pointers.

As a precaution, the changes are verified in [1] by adding a pass
checking that no pointer has non-zero '.off' field on each
do_check_insn() iteration.

[1] https://github.com/eddyz87/bpf/tree/ptrs-off-migration

Notable selftests changes:
- `.var_off` value changed because it now combines static and varying
  offsets. Affected tests:
  - linked_list/incorrect_node_var_off
  - linked_list/incorrect_head_var_off2
  - verifier_align/packet_variable_offset

- Overflowing `smax_value` bound leads to a pointer with big negative
  or positive offset to be rejected immediately (previously overflowing
  `rX += const` instruction updated `.off` field avoiding the overflow).
  Affected tests:
  - verifier_align/dubious_pointer_arithmetic
  - verifier_bounds/var_off_insn_off_test1

- Invalid access to packet now reports full offset inside a packet.
  Affected tests:
  - verifier_direct_packet_access/test23_x_pkt_ptr_4

- A change in check_mem_region_access() behavior:
  when register `.smin_value` is negative, it reports
  "rX min value is negative..." before calling into __check_mem_access()
  which reports "invalid access to ...".
  In the tests below, the `.off` field was negative, while `.smin_value`
  remained positive. This is no longer the case after the changes in
  this commit. Affected tests:
  - verifier_gotox/jump_table_invalid_mem_acceess_neg
  - verifier_helper_packet_access/test15_cls_helper_fail_sub
  - verifier_helper_value_access/imm_out_of_bound_2
  - verifier_helper_value_access/reg_out_of_bound_2
  - verifier_meta_access/meta_access_test2
  - verifier_value_ptr_arith/known_scalar_from_different_maps
  - lower_oob_arith_test_1
  - value_ptr_known_scalar_3
  - access_value_ptr_known_scalar

- Usage of check_mem_region_access() instead of __check_mem_access()
  in check_packet_access() changes the reported message from
  "rX offset is outside ..." to "rX min/max value is outside ...".
  Affected tests:
  - verifier_xdp_direct_packet_access/*

- In check_func_arg_reg_off() the check for zero offset now operates
  on `.var_off` field instead of `.off` field. For tests where the
  pattern looks like `kfunc(reg_with_var_off, ...)`, this changes the
  reported error:
  - previously the error "variable ... access ... disallowed"
    was reported by __check_ptr_off_reg();
  - now "R1 must have zero offset ..." is reported by
    check_func_arg_reg_off() itself.
  Affected tests:
  - verifier/calls.c
    "calls: invalid kfunc call: PTR_TO_BTF_ID with variable offset"

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260212-ptrs-off-migration-v2-2-00820e4d3438@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h                       |   3 +-
 kernel/bpf/log.c                                   |   2 +
 kernel/bpf/verifier.c                              | 317 ++++++++-------------
 .../testing/selftests/bpf/prog_tests/linked_list.c |   4 +-
 .../selftests/bpf/progs/exceptions_assert.c        |   2 +-
 tools/testing/selftests/bpf/progs/iters.c          |   6 +-
 .../selftests/bpf/progs/mem_rdonly_untrusted.c     |   2 +-
 tools/testing/selftests/bpf/progs/verifier_align.c |  40 ++-
 .../testing/selftests/bpf/progs/verifier_bounds.c  |   2 +-
 .../bpf/progs/verifier_direct_packet_access.c      |   4 +-
 tools/testing/selftests/bpf/progs/verifier_gotox.c |   4 +-
 .../bpf/progs/verifier_helper_packet_access.c      |   2 +-
 .../bpf/progs/verifier_helper_value_access.c       |   4 +-
 .../testing/selftests/bpf/progs/verifier_int_ptr.c |   2 +-
 .../selftests/bpf/progs/verifier_meta_access.c     |   2 +-
 .../selftests/bpf/progs/verifier_spill_fill.c      |   8 +-
 .../selftests/bpf/progs/verifier_stack_ptr.c       |   4 +-
 .../selftests/bpf/progs/verifier_value_ptr_arith.c |  10 +-
 .../bpf/progs/verifier_xdp_direct_packet_access.c  |  64 ++---
 tools/testing/selftests/bpf/verifier/calls.c       |   2 +-
 20 files changed, 206 insertions(+), 278 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index ef8e45a362d9..a97bdbf3a07b 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -38,8 +38,7 @@ struct bpf_reg_state {
 	/* Ordering of fields matters.  See states_equal() */
 	enum bpf_reg_type type;
 	/*
-	 * Fixed part of pointer offset, pointer types only.
-	 * Or constant delta between "linked" scalars with the same ID.
+	 * Constant delta between "linked" scalars with the same ID.
 	 */
 	s32 off;
 	union {
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index a0c3b35de2ce..39a731392d65 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -581,6 +581,8 @@ int tnum_strn(char *str, size_t size, struct tnum a)
 	if (a.mask == 0) {
 		if (is_unum_decimal(a.value))
 			return snprintf(str, size, "%llu", a.value);
+		if (is_snum_decimal(a.value))
+			return snprintf(str, size, "%lld", a.value);
 		else
 			return snprintf(str, size, "%#llx", a.value);
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3bf72eacbec2..2c5794dad668 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -654,7 +654,7 @@ static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_s
 		return -EINVAL;
 	}
 
-	off = reg->off + reg->var_off.value;
+	off = reg->var_off.value;
 	if (off % BPF_REG_SIZE) {
 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
 		return -EINVAL;
@@ -2281,11 +2281,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
 				struct btf_field_graph_root *ds_head)
 {
-	__mark_reg_known_zero(&regs[regno]);
+	__mark_reg_known(&regs[regno], ds_head->node_offset);
 	regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
 	regs[regno].btf = ds_head->btf;
 	regs[regno].btf_id = ds_head->value_btf_id;
-	regs[regno].off = ds_head->node_offset;
 }
 
 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
@@ -2316,7 +2315,6 @@ static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
 	 */
 	return reg->type == which &&
 	       reg->id == 0 &&
-	       reg->off == 0 &&
 	       tnum_equals_const(reg->var_off, 0);
 }
 
@@ -5302,7 +5300,6 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
  * tracks the effects of the write, considering that each stack slot in the
  * dynamic range is potentially written to.
  *
- * 'off' includes 'regno->off'.
  * 'value_regno' can be -1, meaning that an unknown value is being written to
  * the stack.
  *
@@ -5724,7 +5721,6 @@ static int check_stack_read(struct bpf_verifier_env *env,
  * check_stack_write_var_off.
  *
  * 'ptr_regno' is the register used as a pointer into the stack.
- * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
  * 'value_regno' is the register whose value we're writing to the stack. It can
  * be -1, meaning that we're not writing from a register.
  *
@@ -5761,14 +5757,14 @@ static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
 	u32 cap = bpf_map_flags_to_cap(map);
 
 	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
-		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
-			map->value_size, off, size);
+		verbose(env, "write into map forbidden, value_size=%d off=%lld size=%d\n",
+			map->value_size, reg->smin_value + off, size);
 		return -EACCES;
 	}
 
 	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
-		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
-			map->value_size, off, size);
+		verbose(env, "read from map forbidden, value_size=%d off=%lld size=%d\n",
+			map->value_size, reg->smin_value + off, size);
 		return -EACCES;
 	}
 
@@ -5875,24 +5871,24 @@ static int __check_ptr_off_reg(struct bpf_verifier_env *env,
 	 * is only allowed in its original, unmodified form.
 	 */
 
-	if (reg->off < 0) {
-		verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
-			reg_type_str(env, reg->type), regno, reg->off);
+	if (!tnum_is_const(reg->var_off)) {
+		char tn_buf[48];
+
+		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+		verbose(env, "variable %s access var_off=%s disallowed\n",
+			reg_type_str(env, reg->type), tn_buf);
 		return -EACCES;
 	}
 
-	if (!fixed_off_ok && reg->off) {
-		verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
-			reg_type_str(env, reg->type), regno, reg->off);
+	if (reg->smin_value < 0) {
+		verbose(env, "negative offset %s ptr R%d off=%lld disallowed\n",
+			reg_type_str(env, reg->type), regno, reg->var_off.value);
 		return -EACCES;
 	}
 
-	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
-		char tn_buf[48];
-
-		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
-		verbose(env, "variable %s access var_off=%s disallowed\n",
-			reg_type_str(env, reg->type), tn_buf);
+	if (!fixed_off_ok && reg->var_off.value != 0) {
+		verbose(env, "dereference of modified %s ptr R%d off=%lld disallowed\n",
+			reg_type_str(env, reg->type), regno, reg->var_off.value);
 		return -EACCES;
 	}
 
@@ -5934,14 +5930,14 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
 	/* For ref_ptr case, release function check should ensure we get one
 	 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
 	 * normal store of unreferenced kptr, we must ensure var_off is zero.
-	 * Since ref_ptr cannot be accessed directly by BPF insns, checks for
-	 * reg->off and reg->ref_obj_id are not needed here.
+	 * Since ref_ptr cannot be accessed directly by BPF insns, check for
+	 * reg->ref_obj_id is not needed here.
 	 */
 	if (__check_ptr_off_reg(env, reg, regno, true))
 		return -EACCES;
 
 	/* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
-	 * we also need to take into account the reg->off.
+	 * we also need to take into account the reg->var_off.
 	 *
 	 * We want to support cases like:
 	 *
@@ -5952,19 +5948,19 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
 	 *
 	 * struct foo *v;
 	 * v = func();	      // PTR_TO_BTF_ID
-	 * val->foo = v;      // reg->off is zero, btf and btf_id match type
-	 * val->bar = &v->br; // reg->off is still zero, but we need to retry with
+	 * val->foo = v;      // reg->var_off is zero, btf and btf_id match type
+	 * val->bar = &v->br; // reg->var_off is still zero, but we need to retry with
 	 *                    // first member type of struct after comparison fails
-	 * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
+	 * val->baz = &v->bz; // reg->var_off is non-zero, so struct needs to be walked
 	 *                    // to match type
 	 *
-	 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
+	 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->var_off
 	 * is zero. We must also ensure that btf_struct_ids_match does not walk
 	 * the struct to match type against first member of struct, i.e. reject
 	 * second case from above. Hence, when type is BPF_KPTR_REF, we set
 	 * strict mode to true for type match.
 	 */
-	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
+	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->var_off.value,
 				  kptr_field->kptr.btf, kptr_field->kptr.btf_id,
 				  kptr_field->type != BPF_KPTR_UNREF))
 		goto bad_type;
@@ -6273,27 +6269,14 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 	struct bpf_reg_state *reg = reg_state(env, regno);
 	int err;
 
-	/* We may have added a variable offset to the packet pointer; but any
-	 * reg->range we have comes after that.  We are only checking the fixed
-	 * offset.
-	 */
-
-	/* We don't allow negative numbers, because we aren't tracking enough
-	 * detail to prove they're safe.
-	 */
-	if (reg->smin_value < 0) {
-		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
-			regno);
-		return -EACCES;
+	if (reg->range < 0) {
+		verbose(env, "R%d offset is outside of the packet\n", regno);
+		return -EINVAL;
 	}
 
-	err = reg->range < 0 ? -EINVAL :
-	      __check_mem_access(env, regno, off, size, reg->range,
-				 zero_size_allowed);
-	if (err) {
-		verbose(env, "R%d offset is outside of the packet\n", regno);
+	err = check_mem_region_access(env, regno, off, size, reg->range, zero_size_allowed);
+	if (err)
 		return err;
-	}
 
 	/* __check_mem_access has made sure "off + size - 1" is within u16.
 	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
@@ -6305,7 +6288,7 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 		max_t(u32, env->prog->aux->max_pkt_offset,
 		      off + reg->umax_value + size - 1);
 
-	return err;
+	return 0;
 }
 
 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
@@ -6522,14 +6505,14 @@ static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
 	 */
 	ip_align = 2;
 
-	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
+	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + off));
 	if (!tnum_is_aligned(reg_off, size)) {
 		char tn_buf[48];
 
 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 		verbose(env,
-			"misaligned packet access off %d+%s+%d+%d size %d\n",
-			ip_align, tn_buf, reg->off, off, size);
+			"misaligned packet access off %d+%s+%d size %d\n",
+			ip_align, tn_buf, off, size);
 		return -EACCES;
 	}
 
@@ -6547,13 +6530,13 @@ static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
 	if (!strict || size == 1)
 		return 0;
 
-	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
+	reg_off = tnum_add(reg->var_off, tnum_const(off));
 	if (!tnum_is_aligned(reg_off, size)) {
 		char tn_buf[48];
 
 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
-		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
-			pointer_desc, tn_buf, reg->off, off, size);
+		verbose(env, "misaligned %saccess off %s+%d size %d\n",
+			pointer_desc, tn_buf, off, size);
 		return -EACCES;
 	}
 
@@ -6891,7 +6874,7 @@ static int __check_buffer_access(struct bpf_verifier_env *env,
 			regno, buf_info, off, size);
 		return -EACCES;
 	}
-	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+	if (!tnum_is_const(reg->var_off)) {
 		char tn_buf[48];
 
 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
@@ -6914,8 +6897,8 @@ static int check_tp_buffer_access(struct bpf_verifier_env *env,
 	if (err)
 		return err;
 
-	if (off + size > env->prog->aux->max_tp_access)
-		env->prog->aux->max_tp_access = off + size;
+	env->prog->aux->max_tp_access = max(reg->var_off.value + off + size,
+					    env->prog->aux->max_tp_access);
 
 	return 0;
 }
@@ -6933,8 +6916,7 @@ static int check_buffer_access(struct bpf_verifier_env *env,
 	if (err)
 		return err;
 
-	if (off + size > *max_access)
-		*max_access = off + size;
+	*max_access = max(reg->var_off.value + off + size, *max_access);
 
 	return 0;
 }
@@ -7327,13 +7309,8 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 			tname);
 		return -EINVAL;
 	}
-	if (off < 0) {
-		verbose(env,
-			"R%d is ptr_%s invalid negative access: off=%d\n",
-			regno, tname, off);
-		return -EACCES;
-	}
-	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+
+	if (!tnum_is_const(reg->var_off)) {
 		char tn_buf[48];
 
 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
@@ -7343,6 +7320,15 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 		return -EACCES;
 	}
 
+	off += reg->var_off.value;
+
+	if (off < 0) {
+		verbose(env,
+			"R%d is ptr_%s invalid negative access: off=%d\n",
+			regno, tname, off);
+		return -EACCES;
+	}
+
 	if (reg->type & MEM_USER) {
 		verbose(env,
 			"R%d is ptr_%s access user memory: off=%d\n",
@@ -7589,8 +7575,8 @@ static int check_stack_access_within_bounds(
 
 	if (err) {
 		if (tnum_is_const(reg->var_off)) {
-			verbose(env, "invalid%s stack R%d off=%d size=%d\n",
-				err_extra, regno, off, access_size);
+			verbose(env, "invalid%s stack R%d off=%lld size=%d\n",
+				err_extra, regno, min_off, access_size);
 		} else {
 			char tn_buf[48];
 
@@ -7636,14 +7622,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 	if (size < 0)
 		return size;
 
-	/* alignment checks will add in reg->off themselves */
 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
 	if (err)
 		return err;
 
-	/* for access checks, reg->off is just part of off */
-	off += reg->off;
-
 	if (reg->type == PTR_TO_MAP_KEY) {
 		if (t == BPF_WRITE) {
 			verbose(env, "write to change key R%d not allowed\n", regno);
@@ -8122,8 +8104,6 @@ static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn)
  * on the access type and privileges, that all elements of the stack are
  * initialized.
  *
- * 'off' includes 'regno->off', but not its dynamic part (if any).
- *
  * All registers that have been spilled on the stack in the slots within the
  * read offsets are marked as read.
  */
@@ -8284,7 +8264,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 	switch (base_type(reg->type)) {
 	case PTR_TO_PACKET:
 	case PTR_TO_PACKET_META:
-		return check_packet_access(env, regno, reg->off, access_size,
+		return check_packet_access(env, regno, 0, access_size,
 					   zero_size_allowed);
 	case PTR_TO_MAP_KEY:
 		if (access_type == BPF_WRITE) {
@@ -8292,12 +8272,12 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 				reg_type_str(env, reg->type));
 			return -EACCES;
 		}
-		return check_mem_region_access(env, regno, reg->off, access_size,
+		return check_mem_region_access(env, regno, 0, access_size,
 					       reg->map_ptr->key_size, false);
 	case PTR_TO_MAP_VALUE:
-		if (check_map_access_type(env, regno, reg->off, access_size, access_type))
+		if (check_map_access_type(env, regno, 0, access_size, access_type))
 			return -EACCES;
-		return check_map_access(env, regno, reg->off, access_size,
+		return check_map_access(env, regno, 0, access_size,
 					zero_size_allowed, ACCESS_HELPER);
 	case PTR_TO_MEM:
 		if (type_is_rdonly_mem(reg->type)) {
@@ -8307,7 +8287,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 				return -EACCES;
 			}
 		}
-		return check_mem_region_access(env, regno, reg->off,
+		return check_mem_region_access(env, regno, 0,
 					       access_size, reg->mem_size,
 					       zero_size_allowed);
 	case PTR_TO_BUF:
@@ -8322,16 +8302,16 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 		} else {
 			max_access = &env->prog->aux->max_rdwr_access;
 		}
-		return check_buffer_access(env, reg, regno, reg->off,
+		return check_buffer_access(env, reg, regno, 0,
 					   access_size, zero_size_allowed,
 					   max_access);
 	case PTR_TO_STACK:
 		return check_stack_range_initialized(
 				env,
-				regno, reg->off, access_size,
+				regno, 0, access_size,
 				zero_size_allowed, access_type, meta);
 	case PTR_TO_BTF_ID:
-		return check_ptr_to_btf_access(env, regs, regno, reg->off,
+		return check_ptr_to_btf_access(env, regs, regno, 0,
 					       access_size, BPF_READ, -1);
 	case PTR_TO_CTX:
 		/* in case the function doesn't know how to access the context,
@@ -8543,9 +8523,9 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, int flags)
 		return -EINVAL;
 	}
 	spin_lock_off = is_res_lock ? rec->res_spin_lock_off : rec->spin_lock_off;
-	if (spin_lock_off != val + reg->off) {
+	if (spin_lock_off != val) {
 		verbose(env, "off %lld doesn't point to 'struct %s_lock' that is at %d\n",
-			val + reg->off, lock_str, spin_lock_off);
+			val, lock_str, spin_lock_off);
 		return -EINVAL;
 	}
 	if (is_lock) {
@@ -8660,9 +8640,9 @@ static int check_map_field_pointer(struct bpf_verifier_env *env, u32 regno,
 		verifier_bug(env, "unsupported BTF field type: %s\n", struct_name);
 		return -EINVAL;
 	}
-	if (field_off != val + reg->off) {
+	if (field_off != val) {
 		verbose(env, "off %lld doesn't point to 'struct %s' that is at %d\n",
-			val + reg->off, struct_name, field_off);
+			val, struct_name, field_off);
 		return -EINVAL;
 	}
 	if (map_desc->ptr) {
@@ -8730,7 +8710,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
 		return -EINVAL;
 	}
 
-	kptr_off = reg->off + reg->var_off.value;
+	kptr_off = reg->var_off.value;
 	kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
 	if (!kptr_field) {
 		verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
@@ -9373,7 +9353,7 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
 	struct bpf_reg_state *reg = reg_state(env, regno);
 	enum bpf_reg_type expected, type = reg->type;
 	const struct bpf_reg_types *compatible;
-	int i, j;
+	int i, j, err;
 
 	compatible = compatible_reg_types[base_type(arg_type)];
 	if (!compatible) {
@@ -9476,8 +9456,12 @@ found:
 				return -EACCES;
 			}
 
-			if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
-						  btf_vmlinux, *arg_btf_id,
+			err = __check_ptr_off_reg(env, reg, regno, true);
+			if (err)
+				return err;
+
+			if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id,
+						  reg->var_off.value, btf_vmlinux, *arg_btf_id,
 						  strict_type_match)) {
 				verbose(env, "R%d is of type %s but %s is expected\n",
 					regno, btf_type_name(reg->btf, reg->btf_id),
@@ -9555,12 +9539,11 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env,
 		 * because fixed_off_ok is false, but checking here allows us
 		 * to give the user a better error message.
 		 */
-		if (reg->off) {
+		if (!tnum_is_const(reg->var_off) || reg->var_off.value != 0) {
 			verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
 				regno);
 			return -EINVAL;
 		}
-		return __check_ptr_off_reg(env, reg, regno, false);
 	}
 
 	switch (type) {
@@ -9657,7 +9640,7 @@ static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
 	if (reg->type == CONST_PTR_TO_DYNPTR)
 		return reg->dynptr.type;
 
-	spi = __get_spi(reg->off);
+	spi = __get_spi(reg->var_off.value);
 	if (spi < 0) {
 		verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
 		return BPF_DYNPTR_TYPE_INVALID;
@@ -9698,13 +9681,13 @@ static int check_reg_const_str(struct bpf_verifier_env *env,
 		return -EACCES;
 	}
 
-	err = check_map_access(env, regno, reg->off,
-			       map->value_size - reg->off, false,
+	err = check_map_access(env, regno, 0,
+			       map->value_size - reg->var_off.value, false,
 			       ACCESS_HELPER);
 	if (err)
 		return err;
 
-	map_off = reg->off + reg->var_off.value;
+	map_off = reg->var_off.value;
 	err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
 	if (err) {
 		verbose(env, "direct value access on string failed\n");
@@ -9741,7 +9724,7 @@ static int get_constant_map_key(struct bpf_verifier_env *env,
 	if (!tnum_is_const(key->var_off))
 		return -EOPNOTSUPP;
 
-	stack_off = key->off + key->var_off.value;
+	stack_off = key->var_off.value;
 	slot = -stack_off - 1;
 	spi = slot / BPF_REG_SIZE;
 	off = slot % BPF_REG_SIZE;
@@ -11073,7 +11056,8 @@ static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
 	 */
 	struct btf_field *field;
 
-	field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off,
+	field = reg_find_field_offset(&caller->regs[BPF_REG_1],
+				      caller->regs[BPF_REG_1].var_off.value,
 				      BPF_RB_ROOT);
 	if (!field || !field->graph_root.value_btf_id)
 		return -EFAULT;
@@ -11449,7 +11433,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
 	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
 	 * and map_direct_value_addr is set.
 	 */
-	fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
+	fmt_map_off = fmt_reg->var_off.value;
 	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
 						  fmt_map_off);
 	if (err) {
@@ -12755,13 +12739,12 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
 	    btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
 		strict_type_match = true;
 
-	WARN_ON_ONCE(is_kfunc_release(meta) &&
-		     (reg->off || !tnum_is_const(reg->var_off) ||
-		      reg->var_off.value));
+	WARN_ON_ONCE(is_kfunc_release(meta) && !tnum_is_const(reg->var_off));
 
 	reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
 	reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
-	struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match);
+	struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->var_off.value,
+					   meta->btf, ref_id, strict_type_match);
 	/* If kfunc is accepting a projection type (ie. __sk_buff), it cannot
 	 * actually use it -- it must cast to the underlying type. So we allow
 	 * caller to pass in the underlying type.
@@ -13133,7 +13116,7 @@ __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
 	}
 
 	rec = reg_btf_record(reg);
-	head_off = reg->off + reg->var_off.value;
+	head_off = reg->var_off.value;
 	field = btf_record_find(rec, head_off, head_field_type);
 	if (!field) {
 		verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
@@ -13200,7 +13183,7 @@ __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
 		return -EINVAL;
 	}
 
-	node_off = reg->off + reg->var_off.value;
+	node_off = reg->var_off.value;
 	field = reg_find_field_offset(reg, node_off, node_field_type);
 	if (!field) {
 		verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
@@ -14228,7 +14211,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	    meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
 	    meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
 		release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
-		insn_aux->insert_off = regs[BPF_REG_2].off;
+		insn_aux->insert_off = regs[BPF_REG_2].var_off.value;
 		insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
 		err = ref_convert_owning_non_owning(env, release_ref_obj_id);
 		if (err) {
@@ -14459,11 +14442,13 @@ static bool check_reg_sane_offset_ptr(struct bpf_verifier_env *env,
 				      const struct bpf_reg_state *reg,
 				      enum bpf_reg_type type)
 {
+	bool known = tnum_is_const(reg->var_off);
+	s64 val = reg->var_off.value;
 	s64 smin = reg->smin_value;
 
-	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
-		verbose(env, "%s pointer offset %d is not allowed\n",
-			reg_type_str(env, type), reg->off);
+	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
+		verbose(env, "%s pointer offset %lld is not allowed\n",
+			reg_type_str(env, type), val);
 		return false;
 	}
 
@@ -14497,13 +14482,11 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
 		 * currently prohibited for unprivileged.
 		 */
 		max = MAX_BPF_STACK + mask_to_left;
-		ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
+		ptr_limit = -ptr_reg->var_off.value;
 		break;
 	case PTR_TO_MAP_VALUE:
 		max = ptr_reg->map_ptr->value_size;
-		ptr_limit = (mask_to_left ?
-			     ptr_reg->smin_value :
-			     ptr_reg->umax_value) + ptr_reg->off;
+		ptr_limit = mask_to_left ? ptr_reg->smin_value : ptr_reg->umax_value;
 		break;
 	default:
 		return REASON_TYPE;
@@ -14734,9 +14717,6 @@ static int sanitize_err(struct bpf_verifier_env *env,
  * Variable offset is prohibited for unprivileged mode for simplicity since it
  * requires corresponding support in Spectre masking for stack ALU.  See also
  * retrieve_ptr_limit().
- *
- *
- * 'off' includes 'reg->off'.
  */
 static int check_stack_access_for_ptr_arithmetic(
 				struct bpf_verifier_env *env,
@@ -14777,11 +14757,11 @@ static int sanitize_check_bounds(struct bpf_verifier_env *env,
 	switch (dst_reg->type) {
 	case PTR_TO_STACK:
 		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
-					dst_reg->off + dst_reg->var_off.value))
+							  dst_reg->var_off.value))
 			return -EACCES;
 		break;
 	case PTR_TO_MAP_VALUE:
-		if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
+		if (check_map_access(env, dst, 0, 1, false, ACCESS_HELPER)) {
 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
 				"prohibited for !root\n", dst);
 			return -EACCES;
@@ -14905,23 +14885,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 
 	switch (opcode) {
 	case BPF_ADD:
-		/* We can take a fixed offset as long as it doesn't overflow
-		 * the s32 'off' field
-		 */
-		if (known && (ptr_reg->off + smin_val ==
-			      (s64)(s32)(ptr_reg->off + smin_val))) {
-			/* pointer += K.  Accumulate it into fixed offset */
-			dst_reg->smin_value = smin_ptr;
-			dst_reg->smax_value = smax_ptr;
-			dst_reg->umin_value = umin_ptr;
-			dst_reg->umax_value = umax_ptr;
-			dst_reg->var_off = ptr_reg->var_off;
-			dst_reg->off = ptr_reg->off + smin_val;
-			dst_reg->raw = ptr_reg->raw;
-			break;
-		}
-		/* A new variable offset is created.  Note that off_reg->off
-		 * == 0, since it's a scalar.
+		/*
 		 * dst_reg gets the pointer type and since some positive
 		 * integer value was added to the pointer, give it a new 'id'
 		 * if it's a PTR_TO_PACKET.
@@ -14940,9 +14904,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 			dst_reg->umax_value = U64_MAX;
 		}
 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
-		dst_reg->off = ptr_reg->off;
 		dst_reg->raw = ptr_reg->raw;
-		if (reg_is_pkt_pointer(ptr_reg)) {
+		if (!known && reg_is_pkt_pointer(ptr_reg)) {
 			dst_reg->id = ++env->id_gen;
 			/* something was added to pkt_ptr, set range to zero */
 			memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
@@ -14964,19 +14927,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 				dst);
 			return -EACCES;
 		}
-		if (known && (ptr_reg->off - smin_val ==
-			      (s64)(s32)(ptr_reg->off - smin_val))) {
-			/* pointer -= K.  Subtract it from fixed offset */
-			dst_reg->smin_value = smin_ptr;
-			dst_reg->smax_value = smax_ptr;
-			dst_reg->umin_value = umin_ptr;
-			dst_reg->umax_value = umax_ptr;
-			dst_reg->var_off = ptr_reg->var_off;
-			dst_reg->id = ptr_reg->id;
-			dst_reg->off = ptr_reg->off - smin_val;
-			dst_reg->raw = ptr_reg->raw;
-			break;
-		}
 		/* A new variable offset is created.  If the subtrahend is known
 		 * nonnegative, then any reg->range we had before is still good.
 		 */
@@ -14996,9 +14946,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 			dst_reg->umax_value = umax_ptr - umin_val;
 		}
 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
-		dst_reg->off = ptr_reg->off;
 		dst_reg->raw = ptr_reg->raw;
-		if (reg_is_pkt_pointer(ptr_reg)) {
+		if (!known && reg_is_pkt_pointer(ptr_reg)) {
 			dst_reg->id = ++env->id_gen;
 			/* something was added to pkt_ptr, set range to zero */
 			if (smin_val < 0)
@@ -16542,19 +16491,17 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
 	struct bpf_reg_state *reg;
 	int new_range;
 
-	if (dst_reg->off < 0 ||
-	    (dst_reg->off == 0 && range_right_open))
+	if (dst_reg->umax_value == 0 && range_right_open)
 		/* This doesn't give us any range */
 		return;
 
-	if (dst_reg->umax_value > MAX_PACKET_OFF ||
-	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
+	if (dst_reg->umax_value > MAX_PACKET_OFF)
 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
 		 * than pkt_end, but that's because it's also less than pkt.
 		 */
 		return;
 
-	new_range = dst_reg->off;
+	new_range = dst_reg->umax_value;
 	if (range_right_open)
 		new_range++;
 
@@ -16603,7 +16550,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
 	/* If our ids match, then we must have the same max_value.  And we
 	 * don't care about the other reg's fixed offset, since if it's too big
 	 * the range won't allow anything.
-	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
+	 * dst_reg->umax_value is known < MAX_PACKET_OFF, therefore it fits in a u16.
 	 */
 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
 		if (reg->type == type && reg->id == dst_reg->id)
@@ -17129,29 +17076,24 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 {
 	if (type_may_be_null(reg->type) && reg->id == id &&
 	    (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
-		/* Old offset (both fixed and variable parts) should have been
-		 * known-zero, because we don't allow pointer arithmetic on
-		 * pointers that might be NULL. If we see this happening, don't
-		 * convert the register.
+		/* Old offset should have been known-zero, because we don't
+		 * allow pointer arithmetic on pointers that might be NULL.
+		 * If we see this happening, don't convert the register.
 		 *
 		 * But in some cases, some helpers that return local kptrs
-		 * advance offset for the returned pointer. In those cases, it
-		 * is fine to expect to see reg->off.
+		 * advance offset for the returned pointer. In those cases,
+		 * it is fine to expect to see reg->var_off.
 		 */
-		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
-			return;
 		if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
-		    WARN_ON_ONCE(reg->off))
+		    WARN_ON_ONCE(!tnum_equals_const(reg->var_off, 0)))
 			return;
-
 		if (is_null) {
-			reg->type = SCALAR_VALUE;
 			/* We don't need id and ref_obj_id from this point
 			 * onwards anymore, thus we should better reset it,
 			 * so that state pruning has chances to take effect.
 			 */
-			reg->id = 0;
-			reg->ref_obj_id = 0;
+			__mark_reg_known_zero(reg);
+			reg->type = SCALAR_VALUE;
 
 			return;
 		}
@@ -17731,22 +17673,24 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	}
 
 	map = env->used_maps[aux->map_index];
-	dst_reg->map_ptr = map;
 
 	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
 	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
 		if (map->map_type == BPF_MAP_TYPE_ARENA) {
 			__mark_reg_unknown(env, dst_reg);
+			dst_reg->map_ptr = map;
 			return 0;
 		}
+		__mark_reg_known(dst_reg, aux->map_off);
 		dst_reg->type = PTR_TO_MAP_VALUE;
-		dst_reg->off = aux->map_off;
+		dst_reg->map_ptr = map;
 		WARN_ON_ONCE(map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
 			     map->max_entries != 1);
 		/* We want reg->id to be same (0) as map_value is not distinct */
 	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
 		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
 		dst_reg->type = CONST_PTR_TO_MAP;
+		dst_reg->map_ptr = map;
 	} else {
 		verifier_bug(env, "unexpected src reg value for ldimm64");
 		return -EFAULT;
@@ -19852,11 +19796,6 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
 		 */
 		if (rold->range > rcur->range)
 			return false;
-		/* If the offsets don't match, we can't trust our alignment;
-		 * nor can we be sure that we won't fall out of range.
-		 */
-		if (rold->off != rcur->off)
-			return false;
 		/* id relations must be preserved */
 		if (!check_ids(rold->id, rcur->id, idmap))
 			return false;
@@ -19872,8 +19811,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
 		return true;
 	case PTR_TO_INSN:
 		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
-			rold->off == rcur->off && range_within(rold, rcur) &&
-			tnum_in(rold->var_off, rcur->var_off);
+		       range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
 	default:
 		return regs_exact(rold, rcur, idmap);
 	}
@@ -20486,7 +20424,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 					 * so we can assume valid iter and reg state,
 					 * no need for extra (re-)validations
 					 */
-					spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
+					spi = __get_spi(iter_reg->var_off.value);
 					iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
 					if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
 						loop = true;
@@ -20892,19 +20830,16 @@ static int indirect_jump_min_max_index(struct bpf_verifier_env *env,
 				       u32 *pmin_index, u32 *pmax_index)
 {
 	struct bpf_reg_state *reg = reg_state(env, regno);
-	u64 min_index, max_index;
+	u64 min_index = reg->umin_value;
+	u64 max_index = reg->umax_value;
 	const u32 size = 8;
 
-	if (check_add_overflow(reg->umin_value, reg->off, &min_index) ||
-		(min_index > (u64) U32_MAX * size)) {
-		verbose(env, "the sum of R%u umin_value %llu and off %u is too big\n",
-			     regno, reg->umin_value, reg->off);
+	if (min_index > (u64) U32_MAX * size) {
+		verbose(env, "the sum of R%u umin_value %llu is too big\n", regno, reg->umin_value);
 		return -ERANGE;
 	}
-	if (check_add_overflow(reg->umax_value, reg->off, &max_index) ||
-		(max_index > (u64) U32_MAX * size)) {
-		verbose(env, "the sum of R%u umax_value %llu and off %u is too big\n",
-			     regno, reg->umax_value, reg->off);
+	if (max_index > (u64) U32_MAX * size) {
+		verbose(env, "the sum of R%u umax_value %llu is too big\n", regno, reg->umax_value);
 		return -ERANGE;
 	}
 
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 14c5a7ef0e87..6f25b5f39a79 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -87,12 +87,12 @@ static struct {
 	{ "incorrect_value_type",
 	  "operation on bpf_list_head expects arg#1 bpf_list_node at offset=48 in struct foo, "
 	  "but arg is at offset=0 in struct bar" },
-	{ "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
+	{ "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0x1ffffffff) disallowed" },
 	{ "incorrect_node_off1", "bpf_list_node not found at offset=49" },
 	{ "incorrect_node_off2", "arg#1 offset=0, but expected bpf_list_node at offset=48 in struct foo" },
 	{ "no_head_type", "bpf_list_head not found at offset=0" },
 	{ "incorrect_head_var_off1", "R1 doesn't have constant offset" },
-	{ "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
+	{ "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0x1ffffffff) disallowed" },
 	{ "incorrect_head_off1", "bpf_list_head not found at offset=25" },
 	{ "incorrect_head_off2", "bpf_list_head not found at offset=1" },
 	{ "pop_front_off", "off 48 doesn't point to 'struct bpf_spin_lock' that is at 40" },
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index a01c2736890f..ed00dd551ffb 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -114,7 +114,7 @@ int check_assert_single_range_u64(struct __sk_buff *ctx)
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R1=pkt(off=64,r=64) R2=pkt_end() R6=pkt(r=64) R10=fp0")
+__msg(": R1=pkt(r=64,imm=64) R2=pkt_end() R6=pkt(r=64) R10=fp0")
 int check_assert_generic(struct __sk_buff *ctx)
 {
 	u8 *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 7f27b517d5d5..86b74e3579d9 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -1651,7 +1651,7 @@ int clean_live_states(const void *ctx)
 
 SEC("?raw_tp")
 __flag(BPF_F_TEST_STATE_FREQ)
-__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__failure __msg("misaligned stack access off -31+0 size 8")
 __naked int absent_mark_in_the_middle_state(void)
 {
 	/* This is equivalent to C program below.
@@ -1726,7 +1726,7 @@ static int noop(void)
 
 SEC("?raw_tp")
 __flag(BPF_F_TEST_STATE_FREQ)
-__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__failure __msg("misaligned stack access off -31+0 size 8")
 __naked int absent_mark_in_the_middle_state2(void)
 {
 	/* This is equivalent to C program below.
@@ -1802,7 +1802,7 @@ __naked int absent_mark_in_the_middle_state2(void)
 
 SEC("?raw_tp")
 __flag(BPF_F_TEST_STATE_FREQ)
-__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__failure __msg("misaligned stack access off -31+0 size 8")
 __naked int absent_mark_in_the_middle_state3(void)
 {
 	/*
diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
index 3b984b6ae7c0..5b4453747c23 100644
--- a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
+++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
@@ -8,7 +8,7 @@
 SEC("tp_btf/sys_enter")
 __success
 __log_level(2)
-__msg("r8 = *(u64 *)(r7 +0)          ; R7=ptr_nameidata(off={{[0-9]+}}) R8=rdonly_untrusted_mem(sz=0)")
+__msg("r8 = *(u64 *)(r7 +0)          ; R7=ptr_nameidata(imm={{[0-9]+}}) R8=rdonly_untrusted_mem(sz=0)")
 __msg("r9 = *(u8 *)(r8 +0)           ; R8=rdonly_untrusted_mem(sz=0) R9=scalar")
 int btf_id_to_ptr_mem(void *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/verifier_align.c b/tools/testing/selftests/bpf/progs/verifier_align.c
index 90362d61f1fe..24553ce62881 100644
--- a/tools/testing/selftests/bpf/progs/verifier_align.c
+++ b/tools/testing/selftests/bpf/progs/verifier_align.c
@@ -131,7 +131,7 @@ LBL ":"							\
 SEC("tc")
 __success __log_level(2)
 __flag(BPF_F_ANY_ALIGNMENT)
-__msg("6: R0=pkt(off=8,r=8)")
+__msg("6: R0=pkt(r=8,imm=8)")
 __msg("6: {{.*}} R3={{[^)]*}}var_off=(0x0; 0xff)")
 __msg("7: {{.*}} R3={{[^)]*}}var_off=(0x0; 0x1fe)")
 __msg("8: {{.*}} R3={{[^)]*}}var_off=(0x0; 0x3fc)")
@@ -203,10 +203,10 @@ __naked void unknown_mul(void)
 SEC("tc")
 __success __log_level(2)
 __msg("2: {{.*}} R5=pkt(r=0)")
-__msg("4: {{.*}} R5=pkt(off=14,r=0)")
-__msg("5: {{.*}} R4=pkt(off=14,r=0)")
+__msg("4: {{.*}} R5=pkt(r=0,imm=14)")
+__msg("5: {{.*}} R4=pkt(r=0,imm=14)")
 __msg("9: {{.*}} R2=pkt(r=18)")
-__msg("10: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xff){{.*}} R5=pkt(off=14,r=18)")
+__msg("10: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xff){{.*}} R5=pkt(r=18,imm=14)")
 __msg("13: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xffff)")
 __msg("14: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xffff)")
 __naked void packet_const_offset(void)
@@ -247,14 +247,14 @@ __msg("7: {{.*}} R6={{[^)]*}}var_off=(0x0; 0x3fc)")
 /* Offset is added to packet pointer R5, resulting in
  * known fixed offset, and variable offset from R6.
  */
-__msg("11: {{.*}} R5=pkt(id=1,off=14,")
+__msg("11: {{.*}} R5=pkt(id=1,{{[^)]*}},var_off=(0x2; 0x7fc)")
 /* At the time the word size load is performed from R5,
  * it's total offset is NET_IP_ALIGN + reg->off (0) +
  * reg->aux_off (14) which is 16.  Then the variable
  * offset is considered using reg->aux_off_align which
  * is 4 and meets the load's requirements.
  */
-__msg("15: {{.*}} R4={{[^)]*}}var_off=(0x0; 0x3fc){{.*}} R5={{[^)]*}}var_off=(0x0; 0x3fc)")
+__msg("15: {{.*}} R4={{[^)]*}}var_off=(0x2; 0x7fc){{.*}} R5={{[^)]*}}var_off=(0x2; 0x7fc)")
 /* Variable offset is added to R5 packet pointer,
  * resulting in auxiliary alignment of 4. To avoid BPF
  * verifier's precision backtracking logging
@@ -266,37 +266,37 @@ __msg("18: {{.*}} R4={{[^)]*}}var_off=(0x0; 0x3fc){{.*}} R5={{[^)]*}}var_off=(0x
 /* Constant offset is added to R5, resulting in
  * reg->off of 14.
  */
-__msg("19: {{.*}} R5=pkt(id=2,off=14,")
+__msg("19: {{.*}} R5=pkt(id=2,{{[^)]*}}var_off=(0x2; 0x7fc)")
 /* At the time the word size load is performed from R5,
  * its total fixed offset is NET_IP_ALIGN + reg->off
  * (14) which is 16.  Then the variable offset is 4-byte
  * aligned, so the total offset is 4-byte aligned and
  * meets the load's requirements.
  */
-__msg("24: {{.*}} R4={{[^)]*}}var_off=(0x0; 0x3fc){{.*}} R5={{[^)]*}}var_off=(0x0; 0x3fc)")
+__msg("24: {{.*}} R4={{[^)]*}}var_off=(0x2; 0x7fc){{.*}} R5={{[^)]*}}var_off=(0x2; 0x7fc)")
 /* Constant offset is added to R5 packet pointer,
  * resulting in reg->off value of 14.
  */
-__msg("26: {{.*}} R5=pkt(off=14,r=8)")
+__msg("26: {{.*}} R5=pkt(r=8,imm=14)")
 /* Variable offset is added to R5, resulting in a
  * variable offset of (4n). See comment for insn #18
  * for R4 = R5 trick.
  */
-__msg("28: {{.*}} R4={{[^)]*}}var_off=(0x0; 0x3fc){{.*}} R5={{[^)]*}}var_off=(0x0; 0x3fc)")
+__msg("28: {{.*}} R4={{[^)]*}}var_off=(0x2; 0x7fc){{.*}} R5={{[^)]*}}var_off=(0x2; 0x7fc)")
 /* Constant is added to R5 again, setting reg->off to 18. */
-__msg("29: {{.*}} R5=pkt(id=3,off=18,")
+__msg("29: {{.*}} R5=pkt(id=3,{{[^)]*}}var_off=(0x2; 0x7fc)")
 /* And once more we add a variable; resulting {{[^)]*}}var_off
  * is still (4n), fixed offset is not changed.
  * Also, we create a new reg->id.
  */
-__msg("31: {{.*}} R4={{[^)]*}}var_off=(0x0; 0x7fc){{.*}} R5={{[^)]*}}var_off=(0x0; 0x7fc)")
+__msg("31: {{.*}} R4={{[^)]*}}var_off=(0x2; 0xffc){{.*}} R5={{[^)]*}}var_off=(0x2; 0xffc)")
 /* At the time the word size load is performed from R5,
  * its total fixed offset is NET_IP_ALIGN + reg->off (18)
  * which is 20.  Then the variable offset is (4n), so
  * the total offset is 4-byte aligned and meets the
  * load's requirements.
  */
-__msg("35: {{.*}} R4={{[^)]*}}var_off=(0x0; 0x7fc){{.*}} R5={{[^)]*}}var_off=(0x0; 0x7fc)")
+__msg("35: {{.*}} R4={{[^)]*}}var_off=(0x2; 0xffc){{.*}} R5={{[^)]*}}var_off=(0x2; 0xffc)")
 __naked void packet_variable_offset(void)
 {
 	asm volatile ("					\
@@ -430,16 +430,10 @@ __msg("6: {{.*}} R5={{[^)]*}}var_off=(0x2; 0xfffffffffffffffc)")
 /* Checked s>=0 */
 __msg("9: {{.*}} R5={{[^)]*}}var_off=(0x2; 0x7ffffffffffffffc)")
 /* packet pointer + nonnegative (4n+2) */
-__msg("11: {{.*}} R6={{[^)]*}}var_off=(0x2; 0x7ffffffffffffffc)")
-__msg("12: {{.*}} R4={{[^)]*}}var_off=(0x2; 0x7ffffffffffffffc)")
-/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
- * We checked the bounds, but it might have been able
- * to overflow if the packet pointer started in the
- * upper half of the address space.
- * So we did not get a 'range' on R6, and the access
- * attempt will fail.
- */
-__msg("15: {{.*}} R6={{[^)]*}}var_off=(0x2; 0x7ffffffffffffffc)")
+__msg("11: {{.*}} R4={{[^)]*}}var_off=(0x2; 0x7ffffffffffffffc){{.*}} R6={{[^)]*}}var_off=(0x2; 0x7ffffffffffffffc)")
+__msg("12: (07) r4 += 4")
+/* packet smax bound overflow */
+__msg("pkt pointer offset -9223372036854775808 is not allowed")
 __naked void dubious_pointer_arithmetic(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index 560531404bce..d195eaa67d75 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -202,7 +202,7 @@ l0_%=:	/* exit */					\
 
 SEC("tc")
 __description("bounds check based on reg_off + var_off + insn_off. test1")
-__failure __msg("value_size=8 off=1073741825")
+__failure __msg("map_value pointer offset 1073741822 is not allowed")
 __naked void var_off_insn_off_test1(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
index 911caa8fd1b7..4ee3b7a708f7 100644
--- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
@@ -412,7 +412,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("tc")
 __description("direct packet access: test17 (pruning, alignment)")
-__failure __msg("misaligned packet access off 2+0+15+-4 size 4")
+__failure __msg("misaligned packet access off 2+15+-4 size 4")
 __flag(BPF_F_STRICT_ALIGNMENT)
 __naked void packet_access_test17_pruning_alignment(void)
 {
@@ -569,7 +569,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("tc")
 __description("direct packet access: test23 (x += pkt_ptr, 4)")
-__failure __msg("invalid access to packet, off=0 size=8, R5(id=3,off=0,r=0)")
+__failure __msg("invalid access to packet, off=31 size=8, R5(id=3,off=31,r=0)")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void test23_x_pkt_ptr_4(void)
 {
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotox.c b/tools/testing/selftests/bpf/progs/verifier_gotox.c
index 607dad058ca1..548dce00f5fb 100644
--- a/tools/testing/selftests/bpf/progs/verifier_gotox.c
+++ b/tools/testing/selftests/bpf/progs/verifier_gotox.c
@@ -131,7 +131,7 @@ DEFINE_INVALID_SIZE_PROG(u16, __failure __msg("Invalid read of 2 bytes from insn
 DEFINE_INVALID_SIZE_PROG(u8,  __failure __msg("Invalid read of 1 bytes from insn_array"))
 
 SEC("socket")
-__failure __msg("misaligned value access off 0+1+0 size 8")
+__failure __msg("misaligned value access off 1+0 size 8")
 __naked void jump_table_misaligned_access(void)
 {
 	asm volatile ("						\
@@ -187,7 +187,7 @@ jt0_%=:								\
 }
 
 SEC("socket")
-__failure __msg("invalid access to map value, value_size=16 off=-24 size=8")
+__failure __msg("R0 min value is negative")
 __naked void jump_table_invalid_mem_acceess_neg(void)
 {
 	asm volatile ("						\
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c
index 74f5f9cd153d..71cee3f58324 100644
--- a/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c
@@ -360,7 +360,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("tc")
 __description("helper access to packet: test15, cls helper fail sub")
-__failure __msg("invalid access to packet")
+__failure __msg("R1 min value is negative")
 __naked void test15_cls_helper_fail_sub(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
index 886498b5e6f3..6d2a38597c34 100644
--- a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
@@ -1100,7 +1100,7 @@ l0_%=:	exit;						\
 
 SEC("tracepoint")
 __description("map helper access to adjusted map (via const imm): out-of-bound 2")
-__failure __msg("invalid access to map value, value_size=16 off=-4 size=8")
+__failure __msg("R2 min value is negative")
 __naked void imm_out_of_bound_2(void)
 {
 	asm volatile ("					\
@@ -1176,7 +1176,7 @@ l0_%=:	exit;						\
 
 SEC("tracepoint")
 __description("map helper access to adjusted map (via const reg): out-of-bound 2")
-__failure __msg("invalid access to map value, value_size=16 off=-4 size=8")
+__failure __msg("R2 min value is negative")
 __naked void reg_out_of_bound_2(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
index 59e34d558654..6627f44faf4b 100644
--- a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
@@ -65,7 +65,7 @@ __naked void ptr_to_long_half_uninitialized(void)
 
 SEC("cgroup/sysctl")
 __description("arg pointer to long misaligned")
-__failure __msg("misaligned stack access off 0+-20+0 size 8")
+__failure __msg("misaligned stack access off -20+0 size 8")
 __naked void arg_ptr_to_long_misaligned(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_meta_access.c b/tools/testing/selftests/bpf/progs/verifier_meta_access.c
index d81722fb5f19..62235f032ffe 100644
--- a/tools/testing/selftests/bpf/progs/verifier_meta_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_meta_access.c
@@ -27,7 +27,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("meta access, test2")
-__failure __msg("invalid access to packet, off=-8")
+__failure __msg("R0 min value is negative")
 __naked void meta_access_test2(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 7a13dbd794b2..893d3bb024a0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -656,7 +656,7 @@ __msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
 __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0")
 __msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
 __msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1")
-__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
+__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,imm=1) R2=1")
 /* validate load from fp-16, which was initialized using BPF_STX_MEM */
 __msg("12: (79) r2 = *(u64 *)(r10 -16)       ; R2=1 R10=fp0 fp-16=1")
 __msg("13: (0f) r1 += r2")
@@ -673,7 +673,7 @@ __msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
 __msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
 __msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0")
 __msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
-__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
+__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,imm=1) R2=1")
 __naked void stack_load_preserves_const_precision(void)
 {
 	asm volatile (
@@ -732,7 +732,7 @@ __msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
 __msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0")
 __msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
 __msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1")
-__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
+__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,imm=1) R2=1")
 /* validate load from fp-16, which was initialized using BPF_STX_MEM */
 __msg("12: (61) r2 = *(u32 *)(r10 -16)       ; R2=1 R10=fp0 fp-16=????1")
 __msg("13: (0f) r1 += r2")
@@ -748,7 +748,7 @@ __msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
 __msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
 __msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0")
 __msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
-__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
+__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,imm=1) R2=1")
 __naked void stack_load_preserves_const_precision_subreg(void)
 {
 	asm volatile (
diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
index 24aabc6083fd..8e8cf8232255 100644
--- a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
@@ -37,7 +37,7 @@ __naked void ptr_to_stack_store_load(void)
 
 SEC("socket")
 __description("PTR_TO_STACK store/load - bad alignment on off")
-__failure __msg("misaligned stack access off 0+-8+2 size 8")
+__failure __msg("misaligned stack access off -8+2 size 8")
 __failure_unpriv
 __naked void load_bad_alignment_on_off(void)
 {
@@ -53,7 +53,7 @@ __naked void load_bad_alignment_on_off(void)
 
 SEC("socket")
 __description("PTR_TO_STACK store/load - bad alignment on reg")
-__failure __msg("misaligned stack access off 0+-10+8 size 8")
+__failure __msg("misaligned stack access off -10+8 size 8")
 __failure_unpriv
 __naked void load_bad_alignment_on_reg(void)
 {
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
index af7938ce56cb..b3b701b44550 100644
--- a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
@@ -346,7 +346,7 @@ l2_%=:	r0 = 1;						\
 SEC("socket")
 __description("map access: value_ptr -= known scalar from different maps")
 __success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__msg_unpriv("R0 min value is negative")
 __retval(1)
 __naked void known_scalar_from_different_maps(void)
 {
@@ -683,9 +683,7 @@ l0_%=:	r0 = 1;						\
 
 SEC("socket")
 __description("map access: value_ptr -= known scalar, lower oob arith, test 1")
-__failure __msg("R0 min value is outside of the allowed memory range")
-__failure_unpriv
-__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__failure __msg("R0 min value is negative")
 __naked void lower_oob_arith_test_1(void)
 {
 	asm volatile ("					\
@@ -840,7 +838,7 @@ l0_%=:	r0 = 1;						\
 
 SEC("socket")
 __description("map access: value_ptr += known scalar, 3")
-__failure __msg("invalid access to map value")
+__failure __msg("R0 min value is negative")
 __failure_unpriv
 __naked void value_ptr_known_scalar_3(void)
 {
@@ -1207,7 +1205,7 @@ l0_%=:	r0 = 1;						\
 
 SEC("socket")
 __description("map access: value_ptr -= known scalar")
-__failure __msg("R0 min value is outside of the allowed memory range")
+__failure __msg("R0 min value is negative")
 __failure_unpriv
 __naked void access_value_ptr_known_scalar(void)
 {
diff --git a/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c
index df2dfd1b15d1..0b86d95a4133 100644
--- a/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c
@@ -69,7 +69,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data' > pkt_end, bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_end_bad_access_1_1(void)
 {
@@ -131,7 +131,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_1(void)
 {
@@ -173,7 +173,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_end > pkt_data', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_2(void)
 {
@@ -279,7 +279,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_3(void)
 {
@@ -384,7 +384,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_end < pkt_data', bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_data_bad_access_1_1(void)
 {
@@ -446,7 +446,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_end < pkt_data', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_4(void)
 {
@@ -487,7 +487,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_5(void)
 {
@@ -590,7 +590,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_end >= pkt_data', bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_data_bad_access_1_2(void)
 {
@@ -654,7 +654,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_6(void)
 {
@@ -697,7 +697,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data' <= pkt_end, bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_end_bad_access_1_2(void)
 {
@@ -761,7 +761,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_7(void)
 {
@@ -803,7 +803,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_8(void)
 {
@@ -905,7 +905,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_meta' > pkt_data, bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_data_bad_access_1_3(void)
 {
@@ -926,7 +926,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_meta' > pkt_data, bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_data_bad_access_2_5(void)
 {
@@ -967,7 +967,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_9(void)
 {
@@ -1009,7 +1009,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_10(void)
 {
@@ -1031,7 +1031,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data > pkt_meta', bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_meta_bad_access_2_1(void)
 {
@@ -1115,7 +1115,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_11(void)
 {
@@ -1137,7 +1137,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_meta' < pkt_data, bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_data_bad_access_2_6(void)
 {
@@ -1220,7 +1220,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data < pkt_meta', bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_meta_bad_access_1_1(void)
 {
@@ -1241,7 +1241,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data < pkt_meta', bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_meta_bad_access_2_2(void)
 {
@@ -1282,7 +1282,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_12(void)
 {
@@ -1323,7 +1323,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_13(void)
 {
@@ -1344,7 +1344,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_meta' >= pkt_data, bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_data_bad_access_2_7(void)
 {
@@ -1426,7 +1426,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data >= pkt_meta', bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_meta_bad_access_1_2(void)
 {
@@ -1448,7 +1448,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data >= pkt_meta', bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_meta_bad_access_2_3(void)
 {
@@ -1490,7 +1490,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_14(void)
 {
@@ -1533,7 +1533,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_meta' <= pkt_data, bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_data_bad_access_1_4(void)
 {
@@ -1555,7 +1555,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_meta' <= pkt_data, bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_data_bad_access_2_8(void)
 {
@@ -1597,7 +1597,7 @@ l1_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_15(void)
 {
@@ -1639,7 +1639,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void corner_case_1_bad_access_16(void)
 {
@@ -1660,7 +1660,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("XDP pkt read, pkt_data <= pkt_meta', bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
 __flag(BPF_F_ANY_ALIGNMENT)
 __naked void pkt_meta_bad_access_2_4(void)
 {
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 9ca83dce100d..86887130a0ef 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -220,7 +220,7 @@
 	},
 	.result_unpriv = REJECT,
 	.result = REJECT,
-	.errstr = "variable trusted_ptr_ access var_off=(0x0; 0x7) disallowed",
+	.errstr = "R1 must have zero offset when passed to release func or trusted arg to kfunc",
 },
 {
 	"calls: invalid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID",
-- 
cgit v1.2.3


From 3d91c618aca403a7f7d2064272f528a97b849475 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Thu, 12 Feb 2026 13:34:24 -0800
Subject: bpf: rename bpf_reg_state->off to bpf_reg_state->delta

This field is now used only for linked scalar registers tracking.
Rename it to 'delta' to better describe it's purpose:
constant delta between "linked" scalars with the same ID.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260212-ptrs-off-migration-v2-4-00820e4d3438@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |  6 +++---
 kernel/bpf/log.c             |  8 ++++----
 kernel/bpf/verifier.c        | 18 +++++++++---------
 3 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index a97bdbf3a07b..c1e30096ea7b 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -40,7 +40,7 @@ struct bpf_reg_state {
 	/*
 	 * Constant delta between "linked" scalars with the same ID.
 	 */
-	s32 off;
+	s32 delta;
 	union {
 		/* valid when type == PTR_TO_PACKET */
 		int range;
@@ -145,9 +145,9 @@ struct bpf_reg_state {
 	 * Upper bit of ID is used to remember relationship between "linked"
 	 * registers. Example:
 	 * r1 = r2;    both will have r1->id == r2->id == N
-	 * r1 += 10;   r1->id == N | BPF_ADD_CONST and r1->off == 10
+	 * r1 += 10;   r1->id == N | BPF_ADD_CONST and r1->delta == 10
 	 * r3 = r2;    both will have r3->id == r2->id == N
-	 * w3 += 10;   r3->id == N | BPF_ADD_CONST32 and r3->off == 10
+	 * w3 += 10;   r3->id == N | BPF_ADD_CONST32 and r3->delta == 10
 	 */
 #define BPF_ADD_CONST64 (1U << 31)
 #define BPF_ADD_CONST32 (1U << 30)
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 39a731392d65..37d72b052192 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -694,7 +694,7 @@ static void print_reg_state(struct bpf_verifier_env *env,
 		if (state->frameno != reg->frameno)
 			verbose(env, "[%d]", reg->frameno);
 		if (tnum_is_const(reg->var_off)) {
-			verbose_snum(env, reg->var_off.value + reg->off);
+			verbose_snum(env, reg->var_off.value + reg->delta);
 			return;
 		}
 	}
@@ -704,7 +704,7 @@ static void print_reg_state(struct bpf_verifier_env *env,
 	if (reg->id)
 		verbose_a("id=%d", reg->id & ~BPF_ADD_CONST);
 	if (reg->id & BPF_ADD_CONST)
-		verbose(env, "%+d", reg->off);
+		verbose(env, "%+d", reg->delta);
 	if (reg->ref_obj_id)
 		verbose_a("ref_obj_id=%d", reg->ref_obj_id);
 	if (type_is_non_owning_ref(reg->type))
@@ -716,9 +716,9 @@ static void print_reg_state(struct bpf_verifier_env *env,
 			  reg->map_ptr->key_size,
 			  reg->map_ptr->value_size);
 	}
-	if (t != SCALAR_VALUE && reg->off) {
+	if (t != SCALAR_VALUE && reg->delta) {
 		verbose_a("off=");
-		verbose_snum(env, reg->off);
+		verbose_snum(env, reg->delta);
 	}
 	if (type_is_pkt_pointer(t)) {
 		verbose_a("r=");
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2c5794dad668..0162f946032f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5095,7 +5095,7 @@ static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
 		 * Cleared it, since multiple rX += const are not supported.
 		 */
 		src_reg->id = 0;
-		src_reg->off = 0;
+		src_reg->delta = 0;
 	}
 
 	if (!src_reg->id && !tnum_is_const(src_reg->var_off))
@@ -16219,14 +16219,14 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 			 * we cannot accumulate another val into rx->off.
 			 */
 clear_id:
-			dst_reg->off = 0;
+			dst_reg->delta = 0;
 			dst_reg->id = 0;
 		} else {
 			if (alu32)
 				dst_reg->id |= BPF_ADD_CONST32;
 			else
 				dst_reg->id |= BPF_ADD_CONST64;
-			dst_reg->off = off;
+			dst_reg->delta = off;
 		}
 	} else {
 		/*
@@ -17305,18 +17305,18 @@ static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_s
 		if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST))
 			continue;
 		if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
-		    reg->off == known_reg->off) {
+		    reg->delta == known_reg->delta) {
 			s32 saved_subreg_def = reg->subreg_def;
 
 			copy_register_state(reg, known_reg);
 			reg->subreg_def = saved_subreg_def;
 		} else {
 			s32 saved_subreg_def = reg->subreg_def;
-			s32 saved_off = reg->off;
+			s32 saved_off = reg->delta;
 			u32 saved_id = reg->id;
 
 			fake_reg.type = SCALAR_VALUE;
-			__mark_reg_known(&fake_reg, (s64)reg->off - (s64)known_reg->off);
+			__mark_reg_known(&fake_reg, (s64)reg->delta - (s64)known_reg->delta);
 
 			/* reg = known_reg; reg += delta */
 			copy_register_state(reg, known_reg);
@@ -17324,7 +17324,7 @@ static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_s
 			 * Must preserve off, id and subreg_def flag,
 			 * otherwise another sync_linked_regs() will be incorrect.
 			 */
-			reg->off = saved_off;
+			reg->delta = saved_off;
 			reg->id = saved_id;
 			reg->subreg_def = saved_subreg_def;
 
@@ -19629,7 +19629,7 @@ static void clear_singular_ids(struct bpf_verifier_env *env,
 			continue;
 		if (idset_cnt_get(idset, reg->id & ~BPF_ADD_CONST) == 1) {
 			reg->id = 0;
-			reg->off = 0;
+			reg->delta = 0;
 		}
 	}));
 }
@@ -19766,7 +19766,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
 			return false;
 
 		/* Both have offset linkage: offsets must match */
-		if ((rold->id & BPF_ADD_CONST) && rold->off != rcur->off)
+		if ((rold->id & BPF_ADD_CONST) && rold->delta != rcur->delta)
 			return false;
 
 		if (!check_scalar_ids(rold->id, rcur->id, idmap))
-- 
cgit v1.2.3


From a235d3bcd28ba2d472f5b4cb6b259baeab75bafd Mon Sep 17 00:00:00 2001
From: Kundan Kumar <kundan.kumar@samsung.com>
Date: Fri, 13 Feb 2026 11:16:31 +0530
Subject: writeback: prep helpers for dirty-limit and writeback accounting

Add helper APIs needed by filesystems to avoid poking into writeback
internals.

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Andreas Gruenbacher <agruenba@redhat.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Kundan Kumar <kundan.kumar@samsung.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Link: https://patch.msgid.link/20260213054634.79785-2-kundan.kumar@samsung.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/backing-dev.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0c8342747cab..5b7d12b40d5e 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -136,6 +136,19 @@ static inline bool mapping_can_writeback(struct address_space *mapping)
 	return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK;
 }
 
+/* Must not be used by file systems that support cgroup writeback */
+static inline int bdi_wb_dirty_exceeded(struct backing_dev_info *bdi)
+{
+	return bdi->wb.dirty_exceeded;
+}
+
+/* Must not be used by file systems that support cgroup writeback */
+static inline void bdi_wb_stat_mod(struct inode *inode, enum wb_stat_item item,
+				   s64 amount)
+{
+	wb_stat_mod(&inode_to_bdi(inode)->wb, item, amount);
+}
+
 #ifdef CONFIG_CGROUP_WRITEBACK
 
 struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi,
-- 
cgit v1.2.3


From 0d799df5b147e08828887fe7299efd7a9e0eea40 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 19 Feb 2026 07:50:01 +0100
Subject: fs: mark bool_names static

The bool_names array is only used in fs_parser.c so mark it static.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260219065014.3550402-2-hch@lst.de
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/fs_parser.c            | 3 +--
 include/linux/fs_parser.h | 2 --
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index c092a9f79e32..46993e31137d 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -13,7 +13,7 @@
 #include <linux/namei.h>
 #include "internal.h"
 
-const struct constant_table bool_names[] = {
+static const struct constant_table bool_names[] = {
 	{ "0",		false },
 	{ "1",		true },
 	{ "false",	false },
@@ -22,7 +22,6 @@ const struct constant_table bool_names[] = {
 	{ "yes",	true },
 	{ },
 };
-EXPORT_SYMBOL(bool_names);
 
 static const struct constant_table *
 __lookup_constant(const struct constant_table *tbl, const char *name)
diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
index 5e8a3b546033..ac8253cca2bc 100644
--- a/include/linux/fs_parser.h
+++ b/include/linux/fs_parser.h
@@ -84,8 +84,6 @@ extern int fs_lookup_param(struct fs_context *fc,
 
 extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found);
 
-extern const struct constant_table bool_names[];
-
 #ifdef CONFIG_VALIDATE_FS_PARSER
 extern bool fs_validate_description(const char *name,
 				    const struct fs_parameter_spec *desc);
-- 
cgit v1.2.3


From 8823db29744fceda9f94e674f74deea446c620b3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 19 Feb 2026 07:50:02 +0100
Subject: fs: remove fsparam_blob / fs_param_is_blob

These are not used anywhere even after the fs_context conversion is
finished, so remove them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260219065014.3550402-3-hch@lst.de
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 Documentation/filesystems/mount_api.rst | 2 --
 fs/fs_parser.c                          | 9 ---------
 include/linux/fs_parser.h               | 3 +--
 3 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst
index a064234fed5b..b4a0f23914a6 100644
--- a/Documentation/filesystems/mount_api.rst
+++ b/Documentation/filesystems/mount_api.rst
@@ -647,7 +647,6 @@ The members are as follows:
 	fs_param_is_u64		64-bit unsigned int	result->uint_64
 	fs_param_is_enum	Enum value name 	result->uint_32
 	fs_param_is_string	Arbitrary string	param->string
-	fs_param_is_blob	Binary blob		param->blob
 	fs_param_is_blockdev	Blockdev path		* Needs lookup
 	fs_param_is_path	Path			* Needs lookup
 	fs_param_is_fd		File descriptor		result->int_32
@@ -681,7 +680,6 @@ The members are as follows:
 	fsparam_u64()		fs_param_is_u64
 	fsparam_enum()		fs_param_is_enum
 	fsparam_string()	fs_param_is_string
-	fsparam_blob()		fs_param_is_blob
 	fsparam_bdev()		fs_param_is_blockdev
 	fsparam_path()		fs_param_is_path
 	fsparam_fd()		fs_param_is_fd
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index 46993e31137d..79e8fe9176fa 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -277,15 +277,6 @@ int fs_param_is_string(struct p_log *log, const struct fs_parameter_spec *p,
 }
 EXPORT_SYMBOL(fs_param_is_string);
 
-int fs_param_is_blob(struct p_log *log, const struct fs_parameter_spec *p,
-		     struct fs_parameter *param, struct fs_parse_result *result)
-{
-	if (param->type != fs_value_is_blob)
-		return fs_param_bad_value(log, param);
-	return 0;
-}
-EXPORT_SYMBOL(fs_param_is_blob);
-
 int fs_param_is_fd(struct p_log *log, const struct fs_parameter_spec *p,
 		  struct fs_parameter *param, struct fs_parse_result *result)
 {
diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
index ac8253cca2bc..961562b101c5 100644
--- a/include/linux/fs_parser.h
+++ b/include/linux/fs_parser.h
@@ -27,7 +27,7 @@ typedef int fs_param_type(struct p_log *,
  * The type of parameter expected.
  */
 fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64,
-	fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev,
+	fs_param_is_enum, fs_param_is_string, fs_param_is_blockdev,
 	fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid,
 	fs_param_is_file_or_string;
 
@@ -125,7 +125,6 @@ static inline bool fs_validate_description(const char *name,
 #define fsparam_enum(NAME, OPT, array)	__fsparam(fs_param_is_enum, NAME, OPT, 0, array)
 #define fsparam_string(NAME, OPT) \
 				__fsparam(fs_param_is_string, NAME, OPT, 0, NULL)
-#define fsparam_blob(NAME, OPT)	__fsparam(fs_param_is_blob, NAME, OPT, 0, NULL)
 #define fsparam_bdev(NAME, OPT)	__fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL)
 #define fsparam_path(NAME, OPT)	__fsparam(fs_param_is_path, NAME, OPT, 0, NULL)
 #define fsparam_fd(NAME, OPT)	__fsparam(fs_param_is_fd, NAME, OPT, 0, NULL)
-- 
cgit v1.2.3


From d2f2f7cf8e898f7b80fe031a263df9c7de94b0b7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 19 Feb 2026 07:50:03 +0100
Subject: fs: remove fsparam_path / fs_param_is_path

These are not used anywhere even after the fs_context conversion is
finished, so remove them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260219065014.3550402-4-hch@lst.de
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 Documentation/filesystems/mount_api.rst | 2 --
 fs/fs_parser.c                          | 7 -------
 include/linux/fs_parser.h               | 3 +--
 3 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst
index b4a0f23914a6..e8b94357b4df 100644
--- a/Documentation/filesystems/mount_api.rst
+++ b/Documentation/filesystems/mount_api.rst
@@ -648,7 +648,6 @@ The members are as follows:
 	fs_param_is_enum	Enum value name 	result->uint_32
 	fs_param_is_string	Arbitrary string	param->string
 	fs_param_is_blockdev	Blockdev path		* Needs lookup
-	fs_param_is_path	Path			* Needs lookup
 	fs_param_is_fd		File descriptor		result->int_32
 	fs_param_is_uid		User ID (u32)           result->uid
 	fs_param_is_gid		Group ID (u32)          result->gid
@@ -681,7 +680,6 @@ The members are as follows:
 	fsparam_enum()		fs_param_is_enum
 	fsparam_string()	fs_param_is_string
 	fsparam_bdev()		fs_param_is_blockdev
-	fsparam_path()		fs_param_is_path
 	fsparam_fd()		fs_param_is_fd
 	fsparam_uid()		fs_param_is_uid
 	fsparam_gid()		fs_param_is_gid
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index 79e8fe9176fa..b4cc4cce518a 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -361,13 +361,6 @@ int fs_param_is_blockdev(struct p_log *log, const struct fs_parameter_spec *p,
 }
 EXPORT_SYMBOL(fs_param_is_blockdev);
 
-int fs_param_is_path(struct p_log *log, const struct fs_parameter_spec *p,
-		     struct fs_parameter *param, struct fs_parse_result *result)
-{
-	return 0;
-}
-EXPORT_SYMBOL(fs_param_is_path);
-
 #ifdef CONFIG_VALIDATE_FS_PARSER
 /**
  * fs_validate_description - Validate a parameter specification array
diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
index 961562b101c5..98b83708f92b 100644
--- a/include/linux/fs_parser.h
+++ b/include/linux/fs_parser.h
@@ -28,7 +28,7 @@ typedef int fs_param_type(struct p_log *,
  */
 fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64,
 	fs_param_is_enum, fs_param_is_string, fs_param_is_blockdev,
-	fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid,
+	fs_param_is_fd, fs_param_is_uid, fs_param_is_gid,
 	fs_param_is_file_or_string;
 
 /*
@@ -126,7 +126,6 @@ static inline bool fs_validate_description(const char *name,
 #define fsparam_string(NAME, OPT) \
 				__fsparam(fs_param_is_string, NAME, OPT, 0, NULL)
 #define fsparam_bdev(NAME, OPT)	__fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL)
-#define fsparam_path(NAME, OPT)	__fsparam(fs_param_is_path, NAME, OPT, 0, NULL)
 #define fsparam_fd(NAME, OPT)	__fsparam(fs_param_is_fd, NAME, OPT, 0, NULL)
 #define fsparam_file_or_string(NAME, OPT) \
 				__fsparam(fs_param_is_file_or_string, NAME, OPT, 0, NULL)
-- 
cgit v1.2.3


From 95cef38e70250234a254e6228eb7342b6deaaffa Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 17 Feb 2026 16:56:18 +0100
Subject: firmware: google: Export coreboot table entries

Move types for coreboot table entries to <linux/coreboot.h>. Allows
drivers in other subsystems to use these structures.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Tzung-Bi Shih <tzungbi@kernel.org>
Acked-by: Julius Werner <jwerner@chromium.org>
Link: https://patch.msgid.link/20260217155836.96267-9-tzimmermann@suse.de
---
 MAINTAINERS                                    |  1 +
 drivers/firmware/google/coreboot_table.c       | 10 ++++
 drivers/firmware/google/coreboot_table.h       | 60 +----------------------
 drivers/firmware/google/framebuffer-coreboot.c |  2 -
 include/linux/coreboot.h                       | 66 ++++++++++++++++++++++++++
 5 files changed, 78 insertions(+), 61 deletions(-)
 create mode 100644 include/linux/coreboot.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 4a2d5e8f0f63..d0dfcfd15e59 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10753,6 +10753,7 @@ L:	chrome-platform@lists.linux.dev
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/chrome-platform/linux.git
 F:	drivers/firmware/google/
+F:	include/linux/coreboot.h
 
 GOOGLE TENSOR SoC SUPPORT
 M:	Peter Griffin <peter.griffin@linaro.org>
diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c
index a031d6fe6bc5..c769631ea15d 100644
--- a/drivers/firmware/google/coreboot_table.c
+++ b/drivers/firmware/google/coreboot_table.c
@@ -22,6 +22,16 @@
 
 #include "coreboot_table.h"
 
+/* Coreboot table header structure */
+struct coreboot_table_header {
+	char signature[4];
+	u32 header_bytes;
+	u32 header_checksum;
+	u32 table_bytes;
+	u32 table_checksum;
+	u32 table_entries;
+};
+
 #define CB_DEV(d) container_of(d, struct coreboot_device, dev)
 #define CB_DRV(d) container_of_const(d, struct coreboot_driver, drv)
 
diff --git a/drivers/firmware/google/coreboot_table.h b/drivers/firmware/google/coreboot_table.h
index 17e9e5c3f6e1..616ca3903e5c 100644
--- a/drivers/firmware/google/coreboot_table.h
+++ b/drivers/firmware/google/coreboot_table.h
@@ -12,67 +12,9 @@
 #ifndef __COREBOOT_TABLE_H
 #define __COREBOOT_TABLE_H
 
+#include <linux/coreboot.h>
 #include <linux/device.h>
 
-struct coreboot_device_id;
-
-/* Coreboot table header structure */
-struct coreboot_table_header {
-	char signature[4];
-	u32 header_bytes;
-	u32 header_checksum;
-	u32 table_bytes;
-	u32 table_checksum;
-	u32 table_entries;
-};
-
-/* List of coreboot entry structures that is used */
-/* Generic */
-struct coreboot_table_entry {
-	u32 tag;
-	u32 size;
-};
-
-/* Points to a CBMEM entry */
-struct lb_cbmem_ref {
-	u32 tag;
-	u32 size;
-
-	u64 cbmem_addr;
-};
-
-#define LB_TAG_CBMEM_ENTRY 0x31
-
-/* Corresponds to LB_TAG_CBMEM_ENTRY */
-struct lb_cbmem_entry {
-	u32 tag;
-	u32 size;
-
-	u64 address;
-	u32 entry_size;
-	u32 id;
-};
-
-/* Describes framebuffer setup by coreboot */
-struct lb_framebuffer {
-	u32 tag;
-	u32 size;
-
-	u64 physical_address;
-	u32 x_resolution;
-	u32 y_resolution;
-	u32 bytes_per_line;
-	u8  bits_per_pixel;
-	u8  red_mask_pos;
-	u8  red_mask_size;
-	u8  green_mask_pos;
-	u8  green_mask_size;
-	u8  blue_mask_pos;
-	u8  blue_mask_size;
-	u8  reserved_mask_pos;
-	u8  reserved_mask_size;
-};
-
 /* A device, additionally with information from coreboot. */
 struct coreboot_device {
 	struct device dev;
diff --git a/drivers/firmware/google/framebuffer-coreboot.c b/drivers/firmware/google/framebuffer-coreboot.c
index 81aa522edb1e..fab3f28655d3 100644
--- a/drivers/firmware/google/framebuffer-coreboot.c
+++ b/drivers/firmware/google/framebuffer-coreboot.c
@@ -21,8 +21,6 @@
 
 #include "coreboot_table.h"
 
-#define CB_TAG_FRAMEBUFFER 0x12
-
 #if defined(CONFIG_PCI)
 static bool framebuffer_pci_dev_is_enabled(struct pci_dev *pdev)
 {
diff --git a/include/linux/coreboot.h b/include/linux/coreboot.h
new file mode 100644
index 000000000000..48705b439c6e
--- /dev/null
+++ b/include/linux/coreboot.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * coreboot.h
+ *
+ * Coreboot device and driver interfaces.
+ *
+ * Copyright 2014 Gerd Hoffmann <kraxel@redhat.com>
+ * Copyright 2017 Google Inc.
+ * Copyright 2017 Samuel Holland <samuel@sholland.org>
+ */
+
+#ifndef _LINUX_COREBOOT_H
+#define _LINUX_COREBOOT_H
+
+#include <linux/types.h>
+
+/* List of coreboot entry structures that is used */
+
+#define CB_TAG_FRAMEBUFFER 0x12
+#define LB_TAG_CBMEM_ENTRY 0x31
+
+/* Generic */
+struct coreboot_table_entry {
+	u32 tag;
+	u32 size;
+};
+
+/* Points to a CBMEM entry */
+struct lb_cbmem_ref {
+	u32 tag;
+	u32 size;
+
+	u64 cbmem_addr;
+};
+
+/* Corresponds to LB_TAG_CBMEM_ENTRY */
+struct lb_cbmem_entry {
+	u32 tag;
+	u32 size;
+
+	u64 address;
+	u32 entry_size;
+	u32 id;
+};
+
+/* Describes framebuffer setup by coreboot */
+struct lb_framebuffer {
+	u32 tag;
+	u32 size;
+
+	u64 physical_address;
+	u32 x_resolution;
+	u32 y_resolution;
+	u32 bytes_per_line;
+	u8  bits_per_pixel;
+	u8  red_mask_pos;
+	u8  red_mask_size;
+	u8  green_mask_pos;
+	u8  green_mask_size;
+	u8  blue_mask_pos;
+	u8  blue_mask_size;
+	u8  reserved_mask_pos;
+	u8  reserved_mask_size;
+};
+
+#endif /* _LINUX_COREBOOT_H */
-- 
cgit v1.2.3


From 27fc52b5505a3acca96b884a4bf1345344e5a566 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 17 Feb 2026 16:56:19 +0100
Subject: firmware: google: Pack structures for coreboot table entries

Pack the fields in the coreboot table entries. These entries are part of
the coreboot ABI, so they don't follow regular calling conventions. Fields
of type u64 are aligned to boundaries of 4 bytes instead of 8. [1]

So far this has not been a problem. In the future, padding bytes should
be added where explicit alignment is required.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://github.com/coreboot/coreboot/blob/main/payloads/libpayload/include/coreboot_tables.h#L96 # [1]
Suggested-by: Julius Werner <jwerner@chromium.org>
Acked-by: Julius Werner <jwerner@chromium.org>
Acked-by: Tzung-Bi Shih <tzungbi@kernel.org>
Link: https://patch.msgid.link/20260217155836.96267-10-tzimmermann@suse.de
---
 include/linux/coreboot.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coreboot.h b/include/linux/coreboot.h
index 48705b439c6e..5746b99a070d 100644
--- a/include/linux/coreboot.h
+++ b/include/linux/coreboot.h
@@ -12,8 +12,11 @@
 #ifndef _LINUX_COREBOOT_H
 #define _LINUX_COREBOOT_H
 
+#include <linux/compiler_attributes.h>
 #include <linux/types.h>
 
+typedef __aligned(4) u64 cb_u64;
+
 /* List of coreboot entry structures that is used */
 
 #define CB_TAG_FRAMEBUFFER 0x12
@@ -30,7 +33,7 @@ struct lb_cbmem_ref {
 	u32 tag;
 	u32 size;
 
-	u64 cbmem_addr;
+	cb_u64 cbmem_addr;
 };
 
 /* Corresponds to LB_TAG_CBMEM_ENTRY */
@@ -38,7 +41,7 @@ struct lb_cbmem_entry {
 	u32 tag;
 	u32 size;
 
-	u64 address;
+	cb_u64 address;
 	u32 entry_size;
 	u32 id;
 };
@@ -48,7 +51,7 @@ struct lb_framebuffer {
 	u32 tag;
 	u32 size;
 
-	u64 physical_address;
+	cb_u64 physical_address;
 	u32 x_resolution;
 	u32 y_resolution;
 	u32 bytes_per_line;
-- 
cgit v1.2.3


From a29a1f0ec8d69ee917a9d4c84b844df0decff0ef Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 17 Feb 2026 16:56:21 +0100
Subject: drm/sysfb: corebootdrm: Add DRM driver for coreboot framebuffers

Add corebootdrm, a DRM driver for coreboot framebuffers. The driver
supports a pre-initialized framebuffer with various packed RGB formats.
The driver code is fairly small and uses the same logic as the other
sysfb drivers. Most of the implementation comes from existing sysfb
helpers.

Until now, coreboot relied on simpledrm or simplefb for boot-up graphics
output. Initialize the platform device for corebootdrm in the same place
in framebuffer_probe(). With a later commit, the simple-framebuffer should
be removed.

v4:
- sort include statements (Tzung-Bi)
v3:
- comment on _HAS_LFB semantics (Tzung-Bi)
- fix typo in commit description (Tzung-Bi)
- comment on simple-framebuffer being obsolete for coreboot
v2:
- reimplement as platform driver
- limit resources and mappings to known framebuffer memory; no
  page alignment
- create corebootdrm device from coreboot framebuffer code

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Julius Werner <jwerner@chromium.org>
Acked-by: Tzung-Bi Shih <tzungbi@kernel.org> # coreboot
Link: https://patch.msgid.link/20260217155836.96267-12-tzimmermann@suse.de
---
 drivers/firmware/google/Kconfig                |   3 +-
 drivers/firmware/google/framebuffer-coreboot.c |  22 +-
 drivers/gpu/drm/sysfb/Kconfig                  |  16 +
 drivers/gpu/drm/sysfb/Makefile                 |   1 +
 drivers/gpu/drm/sysfb/corebootdrm.c            | 412 +++++++++++++++++++++++++
 include/linux/coreboot.h                       |   8 +
 6 files changed, 458 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/sysfb/corebootdrm.c

(limited to 'include/linux')

diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig
index 3ab3e089328b..b78c644fa253 100644
--- a/drivers/firmware/google/Kconfig
+++ b/drivers/firmware/google/Kconfig
@@ -63,7 +63,8 @@ config GOOGLE_FRAMEBUFFER_COREBOOT
 	help
 	  This option enables the kernel to search for a framebuffer in
 	  the coreboot table.  If found, it is registered with a platform
-	  device of type simple-framebuffer.
+	  device of type coreboot-framebuffer. Using the old device of
+	  type simple-framebuffer is deprecated.
 
 config GOOGLE_MEMCONSOLE_COREBOOT
 	tristate "Firmware Memory Console"
diff --git a/drivers/firmware/google/framebuffer-coreboot.c b/drivers/firmware/google/framebuffer-coreboot.c
index fab3f28655d3..2c63a9bd0dcb 100644
--- a/drivers/firmware/google/framebuffer-coreboot.c
+++ b/drivers/firmware/google/framebuffer-coreboot.c
@@ -76,22 +76,23 @@ static struct device *framebuffer_parent_dev(struct resource *res)
 	return NULL;
 }
 
-static const struct simplefb_format formats[] = SIMPLEFB_FORMATS;
-
 static int framebuffer_probe(struct coreboot_device *dev)
 {
-	int i;
 	struct lb_framebuffer *fb = &dev->framebuffer;
 	struct device *parent;
 	struct platform_device *pdev;
 	struct resource res;
 	int ret;
+#if !IS_ENABLED(CONFIG_DRM_COREBOOTDRM)
 	struct simplefb_platform_data pdata = {
 		.width = fb->x_resolution,
 		.height = fb->y_resolution,
 		.stride = fb->bytes_per_line,
 		.format = NULL,
 	};
+	int i;
+	static const struct simplefb_format formats[] = SIMPLEFB_FORMATS;
+#endif
 
 	/*
 	 * On coreboot systems, the advertised LB_TAG_FRAMEBUFFER entry
@@ -118,6 +119,20 @@ static int framebuffer_probe(struct coreboot_device *dev)
 	if (IS_ERR(parent))
 		return PTR_ERR(parent);
 
+#if IS_ENABLED(CONFIG_DRM_COREBOOTDRM)
+	pdev = platform_device_register_resndata(parent, "coreboot-framebuffer", 0,
+						 &res, 1, fb, fb->size);
+	if (IS_ERR(pdev)) {
+		pr_warn("coreboot: could not register framebuffer\n");
+		ret = PTR_ERR(pdev);
+		goto out_put_device_parent;
+	}
+#else
+	/*
+	 * FIXME: Coreboot systems should use a driver that binds to
+	 *        coreboot-framebuffer devices. Remove support for
+	 *        simple-framebuffer at some point.
+	 */
 	for (i = 0; i < ARRAY_SIZE(formats); ++i) {
 		if (fb->bits_per_pixel     == formats[i].bits_per_pixel &&
 		    fb->red_mask_pos       == formats[i].red.offset &&
@@ -142,6 +157,7 @@ static int framebuffer_probe(struct coreboot_device *dev)
 		pr_warn("coreboot: could not register framebuffer\n");
 		goto out_put_device_parent;
 	}
+#endif
 
 	ret = 0;
 
diff --git a/drivers/gpu/drm/sysfb/Kconfig b/drivers/gpu/drm/sysfb/Kconfig
index 9c9884c7efc6..2559ead6cf1f 100644
--- a/drivers/gpu/drm/sysfb/Kconfig
+++ b/drivers/gpu/drm/sysfb/Kconfig
@@ -7,6 +7,22 @@ config DRM_SYSFB_HELPER
 	tristate
 	depends on DRM
 
+config DRM_COREBOOTDRM
+	tristate "Coreboot framebuffer driver"
+	depends on DRM && MMU
+	depends on GOOGLE_FRAMEBUFFER_COREBOOT
+	select APERTURE_HELPERS
+	select DRM_CLIENT_SELECTION
+	select DRM_GEM_SHMEM_HELPER
+	select DRM_KMS_HELPER
+	select DRM_SYSFB_HELPER
+	help
+	  DRM driver for coreboot-provided framebuffers.
+
+	  This driver assumes that the display hardware has been initialized
+	  by coreboot firmware before the kernel boots. Scanout buffer, size,
+	  and display format must be provided via coreboot framebuffer device.
+
 config DRM_EFIDRM
 	tristate "EFI framebuffer driver"
 	depends on DRM && MMU && EFI && (!SYSFB_SIMPLEFB || COMPILE_TEST)
diff --git a/drivers/gpu/drm/sysfb/Makefile b/drivers/gpu/drm/sysfb/Makefile
index a156c496413d..85c9087ab03d 100644
--- a/drivers/gpu/drm/sysfb/Makefile
+++ b/drivers/gpu/drm/sysfb/Makefile
@@ -6,6 +6,7 @@ drm_sysfb_helper-y := \
 drm_sysfb_helper-$(CONFIG_SCREEN_INFO) += drm_sysfb_screen_info.o
 obj-$(CONFIG_DRM_SYSFB_HELPER)	+= drm_sysfb_helper.o
 
+obj-$(CONFIG_DRM_COREBOOTDRM)	+= corebootdrm.o
 obj-$(CONFIG_DRM_EFIDRM)	+= efidrm.o
 obj-$(CONFIG_DRM_OFDRM)		+= ofdrm.o
 obj-$(CONFIG_DRM_SIMPLEDRM)	+= simpledrm.o
diff --git a/drivers/gpu/drm/sysfb/corebootdrm.c b/drivers/gpu/drm/sysfb/corebootdrm.c
new file mode 100644
index 000000000000..745318580a5d
--- /dev/null
+++ b/drivers/gpu/drm/sysfb/corebootdrm.c
@@ -0,0 +1,412 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/aperture.h>
+#include <linux/coreboot.h>
+#include <linux/minmax.h>
+#include <linux/platform_device.h>
+
+#include <drm/clients/drm_client_setup.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_state_helper.h>
+#include <drm/drm_connector.h>
+#include <drm/drm_damage_helper.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fbdev_shmem.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_gem_atomic_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_modeset_helper_vtables.h>
+#include <drm/drm_print.h>
+#include <drm/drm_probe_helper.h>
+
+#include "drm_sysfb_helper.h"
+
+#define DRIVER_NAME	"corebootdrm"
+#define DRIVER_DESC	"DRM driver for Coreboot framebuffers"
+#define DRIVER_MAJOR	1
+#define DRIVER_MINOR	0
+
+static const struct drm_format_info *
+corebootdrm_get_format_fb(struct drm_device *dev, const struct lb_framebuffer *fb)
+{
+	static const struct drm_sysfb_format formats[] = {
+		{ PIXEL_FORMAT_XRGB1555, DRM_FORMAT_XRGB1555, },
+		{ PIXEL_FORMAT_RGB565, DRM_FORMAT_RGB565, },
+		{ PIXEL_FORMAT_RGB888, DRM_FORMAT_RGB888, },
+		{ PIXEL_FORMAT_XRGB8888, DRM_FORMAT_XRGB8888, },
+		{ PIXEL_FORMAT_XBGR8888, DRM_FORMAT_XBGR8888, },
+		{ PIXEL_FORMAT_XRGB2101010, DRM_FORMAT_XRGB2101010, },
+	};
+	const struct pixel_format pixel = {
+		.bits_per_pixel = fb->bits_per_pixel,
+		.indexed  = false,
+		.alpha = {
+			.offset = 0,
+			.length = 0,
+		},
+		.red = {
+			.offset = fb->red_mask_pos,
+			.length = fb->red_mask_size,
+		},
+		.green = {
+			.offset = fb->green_mask_pos,
+			.length = fb->green_mask_size,
+		},
+		.blue = {
+			.offset = fb->blue_mask_pos,
+			.length = fb->blue_mask_size,
+		},
+	};
+
+	return drm_sysfb_get_format(dev, formats, ARRAY_SIZE(formats), &pixel);
+}
+
+static int corebootdrm_get_width_fb(struct drm_device *dev, const struct lb_framebuffer *fb)
+{
+	return drm_sysfb_get_validated_int0(dev, "width", fb->x_resolution, INT_MAX);
+}
+
+static int corebootdrm_get_height_fb(struct drm_device *dev, const struct lb_framebuffer *fb)
+{
+	return drm_sysfb_get_validated_int0(dev, "height", fb->y_resolution, INT_MAX);
+}
+
+static int corebootdrm_get_pitch_fb(struct drm_device *dev, const struct drm_format_info *format,
+				    unsigned int width, const struct lb_framebuffer *fb)
+{
+	u64 bytes_per_line = fb->bytes_per_line;
+
+	if (!bytes_per_line)
+		bytes_per_line = drm_format_info_min_pitch(format, 0, width);
+
+	return drm_sysfb_get_validated_int0(dev, "pitch", bytes_per_line, INT_MAX);
+}
+
+static resource_size_t corebootdrm_get_size_fb(struct drm_device *dev, unsigned int height,
+					       unsigned int pitch,
+					       const struct lb_framebuffer *fb)
+{
+	resource_size_t size;
+
+	if (check_mul_overflow(height, pitch, &size))
+		return 0;
+
+	return size;
+}
+
+static phys_addr_t corebootdrm_get_address_fb(struct drm_device *dev, resource_size_t size,
+					      const struct lb_framebuffer *fb)
+{
+	if (size > PHYS_ADDR_MAX)
+		return 0;
+	if (!fb->physical_address)
+		return 0;
+	if (fb->physical_address > (PHYS_ADDR_MAX - size))
+		return 0;
+
+	return fb->physical_address;
+}
+
+/*
+ * Simple Framebuffer device
+ */
+
+struct corebootdrm_device {
+	struct drm_sysfb_device sysfb;
+
+	/* modesetting */
+	u32 formats[DRM_SYSFB_PLANE_NFORMATS(1)];
+	struct drm_plane primary_plane;
+	struct drm_crtc crtc;
+	struct drm_encoder encoder;
+	struct drm_connector connector;
+};
+
+/*
+ * Modesetting
+ */
+
+static const u64 corebootdrm_primary_plane_format_modifiers[] = {
+	DRM_SYSFB_PLANE_FORMAT_MODIFIERS,
+};
+
+static const struct drm_plane_helper_funcs corebootdrm_primary_plane_helper_funcs = {
+	DRM_SYSFB_PLANE_HELPER_FUNCS,
+};
+
+static const struct drm_plane_funcs corebootdrm_primary_plane_funcs = {
+	DRM_SYSFB_PLANE_FUNCS,
+	.destroy = drm_plane_cleanup,
+};
+
+static const struct drm_crtc_helper_funcs corebootdrm_crtc_helper_funcs = {
+	DRM_SYSFB_CRTC_HELPER_FUNCS,
+};
+
+static const struct drm_crtc_funcs corebootdrm_crtc_funcs = {
+	DRM_SYSFB_CRTC_FUNCS,
+	.destroy = drm_crtc_cleanup,
+};
+
+static const struct drm_encoder_funcs corebootdrm_encoder_funcs = {
+	.destroy = drm_encoder_cleanup,
+};
+
+static const struct drm_connector_helper_funcs corebootdrm_connector_helper_funcs = {
+	DRM_SYSFB_CONNECTOR_HELPER_FUNCS,
+};
+
+static const struct drm_connector_funcs corebootdrm_connector_funcs = {
+	DRM_SYSFB_CONNECTOR_FUNCS,
+	.destroy = drm_connector_cleanup,
+};
+
+static const struct drm_mode_config_funcs corebootdrm_mode_config_funcs = {
+	DRM_SYSFB_MODE_CONFIG_FUNCS,
+};
+
+static int corebootdrm_mode_config_init(struct corebootdrm_device *cdev)
+{
+	struct drm_sysfb_device *sysfb = &cdev->sysfb;
+	struct drm_device *dev = &sysfb->dev;
+	const struct drm_format_info *format = sysfb->fb_format;
+	unsigned int width = sysfb->fb_mode.hdisplay;
+	unsigned int height = sysfb->fb_mode.vdisplay;
+	struct drm_plane *primary_plane;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+	size_t nformats;
+	int ret;
+
+	ret = drmm_mode_config_init(dev);
+	if (ret)
+		return ret;
+
+	dev->mode_config.min_width = width;
+	dev->mode_config.max_width = max_t(unsigned int, width, DRM_SHADOW_PLANE_MAX_WIDTH);
+	dev->mode_config.min_height = height;
+	dev->mode_config.max_height = max_t(unsigned int, height, DRM_SHADOW_PLANE_MAX_HEIGHT);
+	dev->mode_config.funcs = &corebootdrm_mode_config_funcs;
+	dev->mode_config.preferred_depth = format->depth;
+
+	/* Primary plane */
+
+	nformats = drm_sysfb_build_fourcc_list(dev, &format->format, 1,
+					       cdev->formats, ARRAY_SIZE(cdev->formats));
+
+	primary_plane = &cdev->primary_plane;
+	ret = drm_universal_plane_init(dev, primary_plane, 0, &corebootdrm_primary_plane_funcs,
+				       cdev->formats, nformats,
+				       corebootdrm_primary_plane_format_modifiers,
+				       DRM_PLANE_TYPE_PRIMARY, NULL);
+	if (ret)
+		return ret;
+	drm_plane_helper_add(primary_plane, &corebootdrm_primary_plane_helper_funcs);
+	drm_plane_enable_fb_damage_clips(primary_plane);
+
+	/* CRTC */
+
+	crtc = &cdev->crtc;
+	ret = drm_crtc_init_with_planes(dev, crtc, primary_plane, NULL,
+					&corebootdrm_crtc_funcs, NULL);
+	if (ret)
+		return ret;
+	drm_crtc_helper_add(crtc, &corebootdrm_crtc_helper_funcs);
+
+	/* Encoder */
+
+	encoder = &cdev->encoder;
+	ret = drm_encoder_init(dev, encoder, &corebootdrm_encoder_funcs,
+			       DRM_MODE_ENCODER_NONE, NULL);
+	if (ret)
+		return ret;
+	encoder->possible_crtcs = drm_crtc_mask(crtc);
+
+	/* Connector */
+
+	connector = &cdev->connector;
+	ret = drm_connector_init(dev, connector, &corebootdrm_connector_funcs,
+				 DRM_MODE_CONNECTOR_Unknown);
+	if (ret)
+		return ret;
+	drm_connector_helper_add(connector, &corebootdrm_connector_helper_funcs);
+	drm_connector_set_panel_orientation_with_quirk(connector,
+						       DRM_MODE_PANEL_ORIENTATION_UNKNOWN,
+						       width, height);
+
+	ret = drm_connector_attach_encoder(connector, encoder);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * DRM driver
+ */
+
+DEFINE_DRM_GEM_FOPS(corebootdrm_fops);
+
+static struct drm_driver corebootdrm_drm_driver = {
+	DRM_GEM_SHMEM_DRIVER_OPS,
+	DRM_FBDEV_SHMEM_DRIVER_OPS,
+	.name			= DRIVER_NAME,
+	.desc			= DRIVER_DESC,
+	.major			= DRIVER_MAJOR,
+	.minor			= DRIVER_MINOR,
+	.driver_features	= DRIVER_ATOMIC | DRIVER_GEM | DRIVER_MODESET,
+	.fops			= &corebootdrm_fops,
+};
+
+/*
+ * Coreboot driver
+ */
+
+static int corebootdrm_probe(struct platform_device *pdev)
+{
+	const struct lb_framebuffer *fb = dev_get_platdata(&pdev->dev);
+	struct corebootdrm_device *cdev;
+	struct drm_sysfb_device *sysfb;
+	struct drm_device *dev;
+	const struct drm_format_info *format;
+	int width, height, pitch;
+	resource_size_t size;
+	phys_addr_t address;
+	struct resource *res, *mem = NULL;
+	struct resource aperture;
+	void __iomem *screen_base;
+	int ret;
+
+	cdev = devm_drm_dev_alloc(&pdev->dev, &corebootdrm_drm_driver,
+				  struct corebootdrm_device, sysfb.dev);
+	if (IS_ERR(cdev))
+		return PTR_ERR(cdev);
+	platform_set_drvdata(pdev, cdev);
+
+	sysfb = &cdev->sysfb;
+	dev = &sysfb->dev;
+
+	if (!fb) {
+		drm_err(dev, "coreboot framebuffer not found\n");
+		return -EINVAL;
+	} else if (!LB_FRAMEBUFFER_HAS_LFB(fb)) {
+		drm_err(dev, "coreboot framebuffer entry too small\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Hardware settings
+	 */
+
+	format = corebootdrm_get_format_fb(dev, fb);
+	if (!format)
+		return -EINVAL;
+	width = corebootdrm_get_width_fb(dev, fb);
+	if (width < 0)
+		return width;
+	height = corebootdrm_get_height_fb(dev, fb);
+	if (height < 0)
+		return height;
+	pitch = corebootdrm_get_pitch_fb(dev, format, width, fb);
+	if (pitch < 0)
+		return pitch;
+	size = corebootdrm_get_size_fb(dev, height, pitch, fb);
+	if (!size)
+		return -EINVAL;
+	address = corebootdrm_get_address_fb(dev, size, fb);
+	if (!address)
+		return -EINVAL;
+
+	sysfb->fb_mode = drm_sysfb_mode(width, height, 0, 0);
+	sysfb->fb_format = format;
+	sysfb->fb_pitch = pitch;
+
+	drm_dbg(dev, "display mode={" DRM_MODE_FMT "}\n", DRM_MODE_ARG(&sysfb->fb_mode));
+	drm_dbg(dev, "framebuffer format=%p4cc, size=%dx%d, pitch=%d byte\n",
+		&format->format, width, height, pitch);
+
+	/*
+	 * Memory management
+	 */
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		drm_err(dev, "memory resource not found\n");
+		return -EINVAL;
+	}
+
+	mem = devm_request_mem_region(&pdev->dev, res->start, resource_size(res),
+				      dev->driver->name);
+	if (!mem) {
+		drm_warn(dev, "could not acquire memory resource at %pr\n", res);
+		/*
+		 * We cannot make this fatal. Sometimes this comes from magic
+		 * spaces our resource handlers simply don't know about. Use
+		 * the memory resource as-is and try to map that instead.
+		 */
+		mem = res;
+	}
+
+	drm_dbg(dev, "using memory resource at %pr\n", mem);
+
+	aperture = DEFINE_RES_MEM(address, size);
+	if (!resource_contains(mem, &aperture)) {
+		drm_err(dev, "framebuffer aperture at invalid memory range %pr\n", &aperture);
+		return -EINVAL;
+	}
+
+	ret = devm_aperture_acquire_for_platform_device(pdev, address, size);
+	if (ret) {
+		drm_err(dev, "could not acquire framebuffer aperture: %d\n", ret);
+		return ret;
+	}
+
+	screen_base = devm_ioremap_wc(&pdev->dev, address, size);
+	if (!screen_base)
+		return -ENOMEM;
+
+	iosys_map_set_vaddr_iomem(&sysfb->fb_addr, screen_base);
+
+	/*
+	 * DRM mode setting and registration
+	 */
+
+	ret = corebootdrm_mode_config_init(cdev);
+	if (ret)
+		return ret;
+
+	drm_mode_config_reset(dev);
+
+	ret = drm_dev_register(dev, 0);
+	if (ret)
+		return ret;
+
+	drm_client_setup(dev, sysfb->fb_format);
+
+	return 0;
+}
+
+static void corebootdrm_remove(struct platform_device *pdev)
+{
+	struct corebootdrm_device *cdev = platform_get_drvdata(pdev);
+	struct drm_device *dev = &cdev->sysfb.dev;
+
+	drm_dev_unplug(dev);
+}
+
+static struct platform_driver corebootdrm_platform_driver = {
+	.driver = {
+		.name = "coreboot-framebuffer",
+	},
+	.probe = corebootdrm_probe,
+	.remove = corebootdrm_remove,
+};
+
+module_platform_driver(corebootdrm_platform_driver);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
diff --git a/include/linux/coreboot.h b/include/linux/coreboot.h
index 5746b99a070d..885da106fee3 100644
--- a/include/linux/coreboot.h
+++ b/include/linux/coreboot.h
@@ -13,6 +13,7 @@
 #define _LINUX_COREBOOT_H
 
 #include <linux/compiler_attributes.h>
+#include <linux/stddef.h>
 #include <linux/types.h>
 
 typedef __aligned(4) u64 cb_u64;
@@ -66,4 +67,11 @@ struct lb_framebuffer {
 	u8  reserved_mask_size;
 };
 
+/*
+ * True if the coreboot-provided data is large enough to hold information
+ * on the linear framebuffer. False otherwise.
+ */
+#define LB_FRAMEBUFFER_HAS_LFB(__fb) \
+	((__fb)->size >= offsetofend(struct lb_framebuffer, reserved_mask_size))
+
 #endif /* _LINUX_COREBOOT_H */
-- 
cgit v1.2.3


From 058fc04b8587ad07a86dfa8f99d8d99db0a55443 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 17 Feb 2026 16:56:22 +0100
Subject: drm/sysfb: corebootdrm: Support panel orientation

Add fields and constants for coreboot framebuffer orientation. Set
corebootdrm's DRM connector state from the values. Not all firmware
provides orientation, so make it optional. Systems without, continue
to use unknown orientation.

v3:
- comment on _HAS_ORIENTATION semantics (Tzung-Bi)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Julius Werner <jwerner@chromium.org>
Acked-by: Tzung-Bi Shih <tzungbi@kernel.org> # coreboot
Link: https://patch.msgid.link/20260217155836.96267-13-tzimmermann@suse.de
---
 drivers/gpu/drm/sysfb/corebootdrm.c | 30 ++++++++++++++++++++++++++----
 include/linux/coreboot.h            | 13 +++++++++++++
 2 files changed, 39 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/sysfb/corebootdrm.c b/drivers/gpu/drm/sysfb/corebootdrm.c
index 745318580a5d..5dc6f3c76f7b 100644
--- a/drivers/gpu/drm/sysfb/corebootdrm.c
+++ b/drivers/gpu/drm/sysfb/corebootdrm.c
@@ -110,6 +110,26 @@ static phys_addr_t corebootdrm_get_address_fb(struct drm_device *dev, resource_s
 	return fb->physical_address;
 }
 
+static enum drm_panel_orientation corebootdrm_get_orientation_fb(struct drm_device *dev,
+								 const struct lb_framebuffer *fb)
+{
+	if (!LB_FRAMEBUFFER_HAS_ORIENTATION(fb))
+		return DRM_MODE_PANEL_ORIENTATION_UNKNOWN;
+
+	switch (fb->orientation) {
+	case LB_FRAMEBUFFER_ORIENTATION_NORMAL:
+		return DRM_MODE_PANEL_ORIENTATION_NORMAL;
+	case LB_FRAMEBUFFER_ORIENTATION_BOTTOM_UP:
+		return DRM_MODE_PANEL_ORIENTATION_BOTTOM_UP;
+	case LB_FRAMEBUFFER_ORIENTATION_LEFT_UP:
+		return DRM_MODE_PANEL_ORIENTATION_LEFT_UP;
+	case LB_FRAMEBUFFER_ORIENTATION_RIGHT_UP:
+		return DRM_MODE_PANEL_ORIENTATION_RIGHT_UP;
+	}
+
+	return DRM_MODE_PANEL_ORIENTATION_UNKNOWN;
+}
+
 /*
  * Simple Framebuffer device
  */
@@ -168,7 +188,8 @@ static const struct drm_mode_config_funcs corebootdrm_mode_config_funcs = {
 	DRM_SYSFB_MODE_CONFIG_FUNCS,
 };
 
-static int corebootdrm_mode_config_init(struct corebootdrm_device *cdev)
+static int corebootdrm_mode_config_init(struct corebootdrm_device *cdev,
+					enum drm_panel_orientation orientation)
 {
 	struct drm_sysfb_device *sysfb = &cdev->sysfb;
 	struct drm_device *dev = &sysfb->dev;
@@ -234,8 +255,7 @@ static int corebootdrm_mode_config_init(struct corebootdrm_device *cdev)
 	if (ret)
 		return ret;
 	drm_connector_helper_add(connector, &corebootdrm_connector_helper_funcs);
-	drm_connector_set_panel_orientation_with_quirk(connector,
-						       DRM_MODE_PANEL_ORIENTATION_UNKNOWN,
+	drm_connector_set_panel_orientation_with_quirk(connector, orientation,
 						       width, height);
 
 	ret = drm_connector_attach_encoder(connector, encoder);
@@ -276,6 +296,7 @@ static int corebootdrm_probe(struct platform_device *pdev)
 	int width, height, pitch;
 	resource_size_t size;
 	phys_addr_t address;
+	enum drm_panel_orientation orientation;
 	struct resource *res, *mem = NULL;
 	struct resource aperture;
 	void __iomem *screen_base;
@@ -320,6 +341,7 @@ static int corebootdrm_probe(struct platform_device *pdev)
 	address = corebootdrm_get_address_fb(dev, size, fb);
 	if (!address)
 		return -EINVAL;
+	orientation = corebootdrm_get_orientation_fb(dev, fb);
 
 	sysfb->fb_mode = drm_sysfb_mode(width, height, 0, 0);
 	sysfb->fb_format = format;
@@ -375,7 +397,7 @@ static int corebootdrm_probe(struct platform_device *pdev)
 	 * DRM mode setting and registration
 	 */
 
-	ret = corebootdrm_mode_config_init(cdev);
+	ret = corebootdrm_mode_config_init(cdev, orientation);
 	if (ret)
 		return ret;
 
diff --git a/include/linux/coreboot.h b/include/linux/coreboot.h
index 885da106fee3..5d40ca7a1d89 100644
--- a/include/linux/coreboot.h
+++ b/include/linux/coreboot.h
@@ -47,6 +47,11 @@ struct lb_cbmem_entry {
 	u32 id;
 };
 
+#define LB_FRAMEBUFFER_ORIENTATION_NORMAL	0
+#define LB_FRAMEBUFFER_ORIENTATION_BOTTOM_UP	1
+#define LB_FRAMEBUFFER_ORIENTATION_LEFT_UP	2
+#define LB_FRAMEBUFFER_ORIENTATION_RIGHT_UP	3
+
 /* Describes framebuffer setup by coreboot */
 struct lb_framebuffer {
 	u32 tag;
@@ -65,6 +70,7 @@ struct lb_framebuffer {
 	u8  blue_mask_size;
 	u8  reserved_mask_pos;
 	u8  reserved_mask_size;
+	u8  orientation;
 };
 
 /*
@@ -74,4 +80,11 @@ struct lb_framebuffer {
 #define LB_FRAMEBUFFER_HAS_LFB(__fb) \
 	((__fb)->size >= offsetofend(struct lb_framebuffer, reserved_mask_size))
 
+/*
+ * True if the coreboot-provided data is large enough to hold information
+ * on the display orientation. False otherwise.
+ */
+#define LB_FRAMEBUFFER_HAS_ORIENTATION(__fb) \
+	((__fb)->size >= offsetofend(struct lb_framebuffer, orientation))
+
 #endif /* _LINUX_COREBOOT_H */
-- 
cgit v1.2.3


From dc652a33cf08ecd7c9935bf9168a1a27c9a246f0 Mon Sep 17 00:00:00 2001
From: Brian Masney <bmasney@redhat.com>
Date: Fri, 12 Dec 2025 08:41:42 +0900
Subject: clk: remove round_rate() clk ops

The round_rate() clk ops is deprecated, and all in tree drivers have
been converted, so let's go ahead and remove any references to the
round_rate() clk ops.

Signed-off-by: Brian Masney <bmasney@redhat.com>
---
 Documentation/driver-api/clk.rst |  9 +--------
 drivers/clk/clk.c                | 39 ++++++++++++++-------------------------
 include/linux/clk-provider.h     | 18 ++++++------------
 3 files changed, 21 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/clk.rst b/Documentation/driver-api/clk.rst
index 93bab5336dfd..c6aca8186a78 100644
--- a/Documentation/driver-api/clk.rst
+++ b/Documentation/driver-api/clk.rst
@@ -77,9 +77,6 @@ the operations defined in clk-provider.h::
 		void		(*disable_unused)(struct clk_hw *hw);
 		unsigned long	(*recalc_rate)(struct clk_hw *hw,
 						unsigned long parent_rate);
-		long		(*round_rate)(struct clk_hw *hw,
-						unsigned long rate,
-						unsigned long *parent_rate);
 		int		(*determine_rate)(struct clk_hw *hw,
 						  struct clk_rate_request *req);
 		int		(*set_parent)(struct clk_hw *hw, u8 index);
@@ -220,9 +217,7 @@ optional or must be evaluated on a case-by-case basis.
    +----------------+------+-------------+---------------+-------------+------+
    |.recalc_rate    |      | y           |               |             |      |
    +----------------+------+-------------+---------------+-------------+------+
-   |.round_rate     |      | y [1]_      |               |             |      |
-   +----------------+------+-------------+---------------+-------------+------+
-   |.determine_rate |      | y [1]_      |               |             |      |
+   |.determine_rate |      | y           |               |             |      |
    +----------------+------+-------------+---------------+-------------+------+
    |.set_rate       |      | y           |               |             |      |
    +----------------+------+-------------+---------------+-------------+------+
@@ -238,8 +233,6 @@ optional or must be evaluated on a case-by-case basis.
    |.init           |      |             |               |             |      |
    +----------------+------+-------------+---------------+-------------+------+
 
-.. [1] either one of round_rate or determine_rate is required.
-
 Finally, register your clock at run-time with a hardware-specific
 registration function.  This function simply populates struct clk_foo's
 data and then passes the common struct clk parameters to the framework
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 47093cda9df3..fd418dc988b1 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1560,8 +1560,6 @@ late_initcall_sync(clk_disable_unused);
 static int clk_core_determine_round_nolock(struct clk_core *core,
 					   struct clk_rate_request *req)
 {
-	long rate;
-
 	lockdep_assert_held(&prepare_lock);
 
 	if (!core)
@@ -1591,13 +1589,6 @@ static int clk_core_determine_round_nolock(struct clk_core *core,
 		req->rate = core->rate;
 	} else if (core->ops->determine_rate) {
 		return core->ops->determine_rate(core->hw, req);
-	} else if (core->ops->round_rate) {
-		rate = core->ops->round_rate(core->hw, req->rate,
-					     &req->best_parent_rate);
-		if (rate < 0)
-			return rate;
-
-		req->rate = rate;
 	} else {
 		return -EINVAL;
 	}
@@ -1682,7 +1673,7 @@ EXPORT_SYMBOL_GPL(clk_hw_forward_rate_request);
 
 static bool clk_core_can_round(struct clk_core * const core)
 {
-	return core->ops->determine_rate || core->ops->round_rate;
+	return core->ops->determine_rate;
 }
 
 static int clk_core_round_rate_nolock(struct clk_core *core,
@@ -1750,11 +1741,11 @@ EXPORT_SYMBOL_GPL(__clk_determine_rate);
  * use.
  *
  * Context: prepare_lock must be held.
- *          For clk providers to call from within clk_ops such as .round_rate,
+ *          For clk providers to call from within clk_ops such as
  *          .determine_rate.
  *
- * Return: returns rounded rate of hw clk if clk supports round_rate operation
- *         else returns the parent rate.
+ * Return: returns rounded rate of hw clk if clk supports determine_rate
+ *         operation; else returns the parent rate.
  */
 unsigned long clk_hw_round_rate(struct clk_hw *hw, unsigned long rate)
 {
@@ -2569,12 +2560,13 @@ err:
  *
  * Setting the CLK_SET_RATE_PARENT flag allows the rate change operation to
  * propagate up to clk's parent; whether or not this happens depends on the
- * outcome of clk's .round_rate implementation.  If *parent_rate is unchanged
- * after calling .round_rate then upstream parent propagation is ignored.  If
- * *parent_rate comes back with a new rate for clk's parent then we propagate
- * up to clk's parent and set its rate.  Upward propagation will continue
- * until either a clk does not support the CLK_SET_RATE_PARENT flag or
- * .round_rate stops requesting changes to clk's parent_rate.
+ * outcome of clk's .determine_rate implementation. If req->best_parent_rate
+ * is unchanged after calling .determine_rate then upstream parent propagation
+ * is ignored.  If req->best_parent_rate comes back with a new rate for clk's
+ * parent then we propagate up to clk's parent and set its rate. Upward
+ * propagation will continue until either a clk does not support the
+ * CLK_SET_RATE_PARENT flag or .determine_rate stops requesting changes to
+ * clk's parent_rate.
  *
  * Rate changes are accomplished via tree traversal that also recalculates the
  * rates for the clocks and fires off POST_RATE_CHANGE notifiers.
@@ -2703,8 +2695,6 @@ static int clk_set_rate_range_nolock(struct clk *clk,
 	 * FIXME:
 	 * There is a catch. It may fail for the usual reason (clock
 	 * broken, clock protected, etc) but also because:
-	 * - round_rate() was not favorable and fell on the wrong
-	 *   side of the boundary
 	 * - the determine_rate() callback does not really check for
 	 *   this corner case when determining the rate
 	 */
@@ -3915,10 +3905,9 @@ static int __clk_core_init(struct clk_core *core)
 	}
 
 	/* check that clk_ops are sane.  See Documentation/driver-api/clk.rst */
-	if (core->ops->set_rate &&
-	    !((core->ops->round_rate || core->ops->determine_rate) &&
-	      core->ops->recalc_rate)) {
-		pr_err("%s: %s must implement .round_rate or .determine_rate in addition to .recalc_rate\n",
+	if (core->ops->set_rate && !core->ops->determine_rate &&
+	      core->ops->recalc_rate) {
+		pr_err("%s: %s must implement .determine_rate in addition to .recalc_rate\n",
 		       __func__, core->name);
 		ret = -EINVAL;
 		goto out;
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 630705a47129..1cda2c78dffa 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -136,10 +136,6 @@ struct clk_duty {
  *		0. Returns the calculated rate. Optional, but recommended - if
  *		this op is not set then clock rate will be initialized to 0.
  *
- * @round_rate:	Given a target rate as input, returns the closest rate actually
- *		supported by the clock. The parent rate is an input/output
- *		parameter.
- *
  * @determine_rate: Given a target rate as input, returns the closest rate
  *		actually supported by the clock, and optionally the parent clock
  *		that should be used to provide the clock rate.
@@ -163,13 +159,13 @@ struct clk_duty {
  *
  * @set_rate:	Change the rate of this clock. The requested rate is specified
  *		by the second argument, which should typically be the return
- *		of .round_rate call.  The third argument gives the parent rate
- *		which is likely helpful for most .set_rate implementation.
+ *		of .determine_rate call.  The third argument gives the parent
+ *		rate which is likely helpful for most .set_rate implementation.
  *		Returns 0 on success, -EERROR otherwise.
  *
  * @set_rate_and_parent: Change the rate and the parent of this clock. The
  *		requested rate is specified by the second argument, which
- *		should typically be the return of .round_rate call.  The
+ *		should typically be the return of clk_round_rate() call.  The
  *		third argument gives the parent rate which is likely helpful
  *		for most .set_rate_and_parent implementation. The fourth
  *		argument gives the parent index. This callback is optional (and
@@ -244,8 +240,6 @@ struct clk_ops {
 	void		(*restore_context)(struct clk_hw *hw);
 	unsigned long	(*recalc_rate)(struct clk_hw *hw,
 					unsigned long parent_rate);
-	long		(*round_rate)(struct clk_hw *hw, unsigned long rate,
-					unsigned long *parent_rate);
 	int		(*determine_rate)(struct clk_hw *hw,
 					  struct clk_rate_request *req);
 	int		(*set_parent)(struct clk_hw *hw, u8 index);
@@ -679,7 +673,7 @@ struct clk_div_table {
  * @lock:	register lock
  *
  * Clock with an adjustable divider affecting its output frequency.  Implements
- * .recalc_rate, .set_rate and .round_rate
+ * .recalc_rate, .set_rate and .determine_rate
  *
  * @flags:
  * CLK_DIVIDER_ONE_BASED - by default the divisor is the value read from the
@@ -1126,7 +1120,7 @@ void of_fixed_factor_clk_setup(struct device_node *node);
  *
  * Clock with a fixed multiplier and divider. The output frequency is the
  * parent clock rate divided by div and multiplied by mult.
- * Implements .recalc_rate, .set_rate, .round_rate and .recalc_accuracy
+ * Implements .recalc_rate, .set_rate, .determine_rate and .recalc_accuracy
  *
  * Flags:
  * * CLK_FIXED_FACTOR_FIXED_ACCURACY - Use the value in @acc instead of the
@@ -1254,7 +1248,7 @@ void clk_hw_unregister_fractional_divider(struct clk_hw *hw);
  * @lock:	register lock
  *
  * Clock with an adjustable multiplier affecting its output frequency.
- * Implements .recalc_rate, .set_rate and .round_rate
+ * Implements .recalc_rate, .set_rate and .determine_rate
  *
  * @flags:
  * CLK_MULTIPLIER_ZERO_BYPASS - By default, the multiplier is the value read
-- 
cgit v1.2.3


From 4b5231d608d00749a2346a3dd11bd6d05c0662e3 Mon Sep 17 00:00:00 2001
From: Brian Masney <bmasney@redhat.com>
Date: Thu, 8 Jan 2026 16:16:44 -0500
Subject: clk: divider: remove divider_ro_round_rate_parent()

There are no remaining users of divider_ro_round_rate_parent(), so let's
go ahead and remove it.

Signed-off-by: Brian Masney <bmasney@redhat.com>
---
 drivers/clk/clk-divider.c    | 22 ----------------------
 include/linux/clk-provider.h | 15 ---------------
 2 files changed, 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index 45e7ebde4a8b..26610dd976ec 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -409,28 +409,6 @@ long divider_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent,
 }
 EXPORT_SYMBOL_GPL(divider_round_rate_parent);
 
-long divider_ro_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent,
-				  unsigned long rate, unsigned long *prate,
-				  const struct clk_div_table *table, u8 width,
-				  unsigned long flags, unsigned int val)
-{
-	struct clk_rate_request req;
-	int ret;
-
-	clk_hw_init_rate_request(hw, &req, rate);
-	req.best_parent_rate = *prate;
-	req.best_parent_hw = parent;
-
-	ret = divider_ro_determine_rate(hw, &req, table, width, flags, val);
-	if (ret)
-		return ret;
-
-	*prate = req.best_parent_rate;
-
-	return req.rate;
-}
-EXPORT_SYMBOL_GPL(divider_ro_round_rate_parent);
-
 static int clk_divider_determine_rate(struct clk_hw *hw,
 				      struct clk_rate_request *req)
 {
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 1cda2c78dffa..0d31077749fb 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -737,10 +737,6 @@ long divider_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent,
 			       unsigned long rate, unsigned long *prate,
 			       const struct clk_div_table *table,
 			       u8 width, unsigned long flags);
-long divider_ro_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent,
-				  unsigned long rate, unsigned long *prate,
-				  const struct clk_div_table *table, u8 width,
-				  unsigned long flags, unsigned int val);
 int divider_determine_rate(struct clk_hw *hw, struct clk_rate_request *req,
 			   const struct clk_div_table *table, u8 width,
 			   unsigned long flags);
@@ -1440,17 +1436,6 @@ static inline long divider_round_rate(struct clk_hw *hw, unsigned long rate,
 					 rate, prate, table, width, flags);
 }
 
-static inline long divider_ro_round_rate(struct clk_hw *hw, unsigned long rate,
-					 unsigned long *prate,
-					 const struct clk_div_table *table,
-					 u8 width, unsigned long flags,
-					 unsigned int val)
-{
-	return divider_ro_round_rate_parent(hw, clk_hw_get_parent(hw),
-					    rate, prate, table, width, flags,
-					    val);
-}
-
 /*
  * FIXME clock api without lock protection
  */
-- 
cgit v1.2.3


From d4851759742c1322f498021dab882d322fc34a1d Mon Sep 17 00:00:00 2001
From: Brian Masney <bmasney@redhat.com>
Date: Thu, 8 Jan 2026 16:16:45 -0500
Subject: clk: divider: remove divider_round_rate() and
 divider_round_rate_parent()

There are no remaining users of divider_round_rate() and
divider_round_rate_parent(), so let's go ahead and remove them.

Signed-off-by: Brian Masney <bmasney@redhat.com>
---
 drivers/clk/clk-divider.c    | 22 ----------------------
 include/linux/clk-provider.h | 13 -------------
 2 files changed, 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index 26610dd976ec..b3b485d23ea8 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -387,28 +387,6 @@ int divider_ro_determine_rate(struct clk_hw *hw, struct clk_rate_request *req,
 }
 EXPORT_SYMBOL_GPL(divider_ro_determine_rate);
 
-long divider_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent,
-			       unsigned long rate, unsigned long *prate,
-			       const struct clk_div_table *table,
-			       u8 width, unsigned long flags)
-{
-	struct clk_rate_request req;
-	int ret;
-
-	clk_hw_init_rate_request(hw, &req, rate);
-	req.best_parent_rate = *prate;
-	req.best_parent_hw = parent;
-
-	ret = divider_determine_rate(hw, &req, table, width, flags);
-	if (ret)
-		return ret;
-
-	*prate = req.best_parent_rate;
-
-	return req.rate;
-}
-EXPORT_SYMBOL_GPL(divider_round_rate_parent);
-
 static int clk_divider_determine_rate(struct clk_hw *hw,
 				      struct clk_rate_request *req)
 {
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 0d31077749fb..4d21602d7dbd 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -733,10 +733,6 @@ extern const struct clk_ops clk_divider_ro_ops;
 unsigned long divider_recalc_rate(struct clk_hw *hw, unsigned long parent_rate,
 		unsigned int val, const struct clk_div_table *table,
 		unsigned long flags, unsigned long width);
-long divider_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent,
-			       unsigned long rate, unsigned long *prate,
-			       const struct clk_div_table *table,
-			       u8 width, unsigned long flags);
 int divider_determine_rate(struct clk_hw *hw, struct clk_rate_request *req,
 			   const struct clk_div_table *table, u8 width,
 			   unsigned long flags);
@@ -1427,15 +1423,6 @@ static inline void __clk_hw_set_clk(struct clk_hw *dst, struct clk_hw *src)
 	dst->core = src->core;
 }
 
-static inline long divider_round_rate(struct clk_hw *hw, unsigned long rate,
-				      unsigned long *prate,
-				      const struct clk_div_table *table,
-				      u8 width, unsigned long flags)
-{
-	return divider_round_rate_parent(hw, clk_hw_get_parent(hw),
-					 rate, prate, table, width, flags);
-}
-
 /*
  * FIXME clock api without lock protection
  */
-- 
cgit v1.2.3


From 5cab6d386bd30c3bb4efceb05b25842a6f144693 Mon Sep 17 00:00:00 2001
From: Sanjay Yadav <sanjay.kumar.yadav@intel.com>
Date: Thu, 12 Feb 2026 14:55:29 +0530
Subject: drm/buddy: Add kernel-doc for allocator structures and flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add missing kernel-doc for GPU buddy allocator flags,
gpu_buddy_block, and gpu_buddy. The documentation covers block
header fields, allocator roots, free trees, and allocation flags
such as RANGE, TOPDOWN, CONTIGUOUS, CLEAR, and TRIM_DISABLE.
Private members are marked with kernel-doc private markers
and documented with regular comments.

No functional changes.

v2:
- Corrected GPU_BUDDY_CLEAR_TREE and GPU_BUDDY_DIRTY_TREE index
  values (Arun)
- Rebased after DRM buddy allocator moved to drivers/gpu/
- Updated commit message

v3:
- Document reserved bits 8:6 in header layout (Arun)
- Fix checkpatch warning

Cc: Christian König <christian.koenig@amd.com>
Cc: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Sanjay Yadav <sanjay.kumar.yadav@intel.com>
Reviewed-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Link: https://patch.msgid.link/20260212092527.718455-5-sanjay.kumar.yadav@intel.com
---
 include/linux/gpu_buddy.h | 123 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 103 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
index 07ac65db6d2e..bf2a42256536 100644
--- a/include/linux/gpu_buddy.h
+++ b/include/linux/gpu_buddy.h
@@ -12,11 +12,58 @@
 #include <linux/sched.h>
 #include <linux/rbtree.h>
 
+/**
+ * GPU_BUDDY_RANGE_ALLOCATION - Allocate within a specific address range
+ *
+ * When set, allocation is restricted to the range [start, end) specified
+ * in gpu_buddy_alloc_blocks(). Without this flag, start/end are ignored
+ * and allocation can use any free space.
+ */
 #define GPU_BUDDY_RANGE_ALLOCATION		BIT(0)
+
+/**
+ * GPU_BUDDY_TOPDOWN_ALLOCATION - Allocate from top of address space
+ *
+ * Allocate starting from high addresses and working down. Useful for
+ * separating different allocation types (e.g., kernel vs userspace)
+ * to reduce fragmentation.
+ */
 #define GPU_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
+
+/**
+ * GPU_BUDDY_CONTIGUOUS_ALLOCATION - Require physically contiguous blocks
+ *
+ * The allocation must be satisfied with a single contiguous block.
+ * If the requested size cannot be allocated contiguously, the
+ * allocation fails with -ENOSPC.
+ */
 #define GPU_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
+
+/**
+ * GPU_BUDDY_CLEAR_ALLOCATION - Prefer pre-cleared (zeroed) memory
+ *
+ * Attempt to allocate from the clear tree first. If insufficient clear
+ * memory is available, falls back to dirty memory. Useful when the
+ * caller needs zeroed memory and wants to avoid GPU clear operations.
+ */
 #define GPU_BUDDY_CLEAR_ALLOCATION		BIT(3)
+
+/**
+ * GPU_BUDDY_CLEARED - Mark returned blocks as cleared
+ *
+ * Used with gpu_buddy_free_list() to indicate that the memory being
+ * freed has been cleared (zeroed). The blocks will be placed in the
+ * clear tree for future GPU_BUDDY_CLEAR_ALLOCATION requests.
+ */
 #define GPU_BUDDY_CLEARED			BIT(4)
+
+/**
+ * GPU_BUDDY_TRIM_DISABLE - Disable automatic block trimming
+ *
+ * By default, if an allocation is smaller than the allocated block,
+ * excess memory is trimmed and returned to the free pool. This flag
+ * disables trimming, keeping the full power-of-two block size.
+ */
 #define GPU_BUDDY_TRIM_DISABLE			BIT(5)
 
 enum gpu_buddy_free_tree {
@@ -28,7 +75,28 @@ enum gpu_buddy_free_tree {
 #define for_each_free_tree(tree) \
 	for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++)
 
+/**
+ * struct gpu_buddy_block - Block within a buddy allocator
+ *
+ * Each block in the buddy allocator is represented by this structure.
+ * Blocks are organized in a binary tree where each parent block can be
+ * split into two children (left and right buddies). The allocator manages
+ * blocks at various orders (power-of-2 sizes) from chunk_size up to the
+ * largest contiguous region.
+ *
+ * @private: Private data owned by the allocator user (e.g., driver-specific data)
+ * @link: List node for user ownership while block is allocated
+ */
 struct gpu_buddy_block {
+/* private: */
+	/*
+	 * Header bit layout:
+	 * - Bits 63:12: block offset within the address space
+	 * - Bits 11:10: state (ALLOCATED, FREE, or SPLIT)
+	 * - Bit 9: clear bit (1 if memory is zeroed)
+	 * - Bits 8:6: reserved
+	 * - Bits 5:0: order (log2 of size relative to chunk_size)
+	 */
 #define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
 #define GPU_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
 #define   GPU_BUDDY_ALLOCATED	   (1 << 10)
@@ -43,7 +111,7 @@ struct gpu_buddy_block {
 	struct gpu_buddy_block *left;
 	struct gpu_buddy_block *right;
 	struct gpu_buddy_block *parent;
-
+/* public: */
 	void *private; /* owned by creator */
 
 	/*
@@ -53,43 +121,58 @@ struct gpu_buddy_block {
 	 * gpu_buddy_free* ownership is given back to the mm.
 	 */
 	union {
+/* private: */
 		struct rb_node rb;
+/* public: */
 		struct list_head link;
 	};
-
+/* private: */
 	struct list_head tmp_link;
 };
 
 /* Order-zero must be at least SZ_4K */
 #define GPU_BUDDY_MAX_ORDER (63 - 12)
 
-/*
- * Binary Buddy System.
+/**
+ * struct gpu_buddy - GPU binary buddy allocator
+ *
+ * The buddy allocator provides efficient power-of-two memory allocation
+ * with fast allocation and free operations. It is commonly used for GPU
+ * memory management where allocations can be split into power-of-two
+ * block sizes.
  *
- * Locking should be handled by the user, a simple mutex around
- * gpu_buddy_alloc* and gpu_buddy_free* should suffice.
+ * Locking should be handled by the user; a simple mutex around
+ * gpu_buddy_alloc_blocks() and gpu_buddy_free_block()/gpu_buddy_free_list()
+ * should suffice.
+ *
+ * @n_roots: Number of root blocks in the roots array.
+ * @max_order: Maximum block order (log2 of largest block size / chunk_size).
+ * @chunk_size: Minimum allocation granularity in bytes. Must be at least SZ_4K.
+ * @size: Total size of the address space managed by this allocator in bytes.
+ * @avail: Total free space currently available for allocation in bytes.
+ * @clear_avail: Free space available in the clear tree (zeroed memory) in bytes.
+ *               This is a subset of @avail.
  */
 struct gpu_buddy {
-	/* Maintain a free list for each order. */
-	struct rb_root **free_trees;
-
+/* private: */
 	/*
-	 * Maintain explicit binary tree(s) to track the allocation of the
-	 * address space. This gives us a simple way of finding a buddy block
-	 * and performing the potentially recursive merge step when freeing a
-	 * block.  Nodes are either allocated or free, in which case they will
-	 * also exist on the respective free list.
+	 * Array of red-black trees for free block management.
+	 * Indexed as free_trees[clear/dirty][order] where:
+	 * - Index 0 (GPU_BUDDY_CLEAR_TREE): blocks with zeroed content
+	 * - Index 1 (GPU_BUDDY_DIRTY_TREE): blocks with unknown content
+	 * Each tree holds free blocks of the corresponding order.
 	 */
-	struct gpu_buddy_block **roots;
-
+	struct rb_root **free_trees;
 	/*
-	 * Anything from here is public, and remains static for the lifetime of
-	 * the mm. Everything above is considered do-not-touch.
+	 * Array of root blocks representing the top-level blocks of the
+	 * binary tree(s). Multiple roots exist when the total size is not
+	 * a power of two, with each root being the largest power-of-two
+	 * that fits in the remaining space.
 	 */
+	struct gpu_buddy_block **roots;
+/* public: */
 	unsigned int n_roots;
 	unsigned int max_order;
-
-	/* Must be at least SZ_4K */
 	u64 chunk_size;
 	u64 size;
 	u64 avail;
-- 
cgit v1.2.3


From df8c7892e06efa5df2aa780a338f33a4f666370b Mon Sep 17 00:00:00 2001
From: Sanjay Yadav <sanjay.kumar.yadav@intel.com>
Date: Thu, 12 Feb 2026 14:55:30 +0530
Subject: drm/buddy: Move internal helpers to buddy.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move gpu_buddy_block_state(), gpu_buddy_block_is_allocated(),
and gpu_buddy_block_is_split() from gpu_buddy.h to gpu_buddy.c
as static functions since they have no external callers.

Remove gpu_get_buddy() as it was an unused exported wrapper
around the internal __get_buddy().

No functional changes.

v2:
- Rebased after DRM buddy allocator moved to drivers/gpu/
- Keep gpu_buddy_block_is_free() in header since it's now
  used by drm_buddy.c
- Updated commit message

Cc: Christian König <christian.koenig@amd.com>
Cc: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Sanjay Yadav <sanjay.kumar.yadav@intel.com>
Reviewed-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Link: https://patch.msgid.link/20260212092527.718455-6-sanjay.kumar.yadav@intel.com
---
 drivers/gpu/buddy.c       | 35 ++++++++++++++++++-----------------
 include/linux/gpu_buddy.h | 25 ++-----------------------
 2 files changed, 20 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/buddy.c b/drivers/gpu/buddy.c
index 603c59a2013a..b27761246d4b 100644
--- a/drivers/gpu/buddy.c
+++ b/drivers/gpu/buddy.c
@@ -14,6 +14,24 @@
 
 static struct kmem_cache *slab_blocks;
 
+static unsigned int
+gpu_buddy_block_state(struct gpu_buddy_block *block)
+{
+	return block->header & GPU_BUDDY_HEADER_STATE;
+}
+
+static bool
+gpu_buddy_block_is_allocated(struct gpu_buddy_block *block)
+{
+	return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED;
+}
+
+static bool
+gpu_buddy_block_is_split(struct gpu_buddy_block *block)
+{
+	return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
+}
+
 static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm,
 					       struct gpu_buddy_block *parent,
 					       unsigned int order,
@@ -449,23 +467,6 @@ static int split_block(struct gpu_buddy *mm,
 	return 0;
 }
 
-/**
- * gpu_get_buddy - get buddy address
- *
- * @block: GPU buddy block
- *
- * Returns the corresponding buddy block for @block, or NULL
- * if this is a root block and can't be merged further.
- * Requires some kind of locking to protect against
- * any concurrent allocate and free operations.
- */
-struct gpu_buddy_block *
-gpu_get_buddy(struct gpu_buddy_block *block)
-{
-	return __get_buddy(block);
-}
-EXPORT_SYMBOL(gpu_get_buddy);
-
 /**
  * gpu_buddy_reset_clear - reset blocks clear state
  *
diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
index bf2a42256536..f1fb6eff604a 100644
--- a/include/linux/gpu_buddy.h
+++ b/include/linux/gpu_buddy.h
@@ -191,16 +191,10 @@ gpu_buddy_block_order(struct gpu_buddy_block *block)
 	return block->header & GPU_BUDDY_HEADER_ORDER;
 }
 
-static inline unsigned int
-gpu_buddy_block_state(struct gpu_buddy_block *block)
-{
-	return block->header & GPU_BUDDY_HEADER_STATE;
-}
-
 static inline bool
-gpu_buddy_block_is_allocated(struct gpu_buddy_block *block)
+gpu_buddy_block_is_free(struct gpu_buddy_block *block)
 {
-	return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED;
+	return (block->header & GPU_BUDDY_HEADER_STATE) == GPU_BUDDY_FREE;
 }
 
 static inline bool
@@ -209,18 +203,6 @@ gpu_buddy_block_is_clear(struct gpu_buddy_block *block)
 	return block->header & GPU_BUDDY_HEADER_CLEAR;
 }
 
-static inline bool
-gpu_buddy_block_is_free(struct gpu_buddy_block *block)
-{
-	return gpu_buddy_block_state(block) == GPU_BUDDY_FREE;
-}
-
-static inline bool
-gpu_buddy_block_is_split(struct gpu_buddy_block *block)
-{
-	return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
-}
-
 static inline u64
 gpu_buddy_block_size(struct gpu_buddy *mm,
 		     struct gpu_buddy_block *block)
@@ -232,9 +214,6 @@ int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size);
 
 void gpu_buddy_fini(struct gpu_buddy *mm);
 
-struct gpu_buddy_block *
-gpu_get_buddy(struct gpu_buddy_block *block);
-
 int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
 			   u64 start, u64 end, u64 size,
 			   u64 min_page_size,
-- 
cgit v1.2.3


From 8167d7f674648cfd428ed49773522f9df5c4fdfd Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 15 Feb 2026 21:44:18 -0800
Subject: soundwire: sdw.h: repair names and format of kernel-doc comments

Fix all kernel-doc warnings in sdw.h:

Warning: include/linux/soundwire/sdw.h:538 cannot understand function prototype: 'enum sdw_reg_bank'
Warning: include/linux/soundwire/sdw.h:779 struct member 'port_num' not described in 'sdw_transport_params'
Warning: include/linux/soundwire/sdw.h:792 struct member 'port_num' not described in 'sdw_enable_ch'
Warning: include/linux/soundwire/sdw.h:892 cannot understand function prototype: 'struct sdw_port_config'
Warning: include/linux/soundwire/sdw.h:906 cannot understand function prototype: 'struct sdw_stream_config'
Warning: include/linux/soundwire/sdw.h:925 cannot understand function prototype: 'enum sdw_stream_state'
Warning: include/linux/soundwire/sdw.h:942 cannot understand function prototype: 'struct sdw_stream_params'
Warning: include/linux/soundwire/sdw.h:960 cannot understand function prototype: 'struct sdw_stream_runtime'
Warning: include/linux/soundwire/sdw.h:1047 struct member 'bpt_stream_refcount' not described in 'sdw_bus'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://patch.msgid.link/20260216054418.2766846-1-rdunlap@infradead.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index f462717acf20..6147eb1fb210 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -532,7 +532,7 @@ struct sdw_slave_intr_status {
 };
 
 /**
- * sdw_reg_bank - SoundWire register banks
+ * enum sdw_reg_bank - SoundWire register banks
  * @SDW_BANK0: Soundwire register bank 0
  * @SDW_BANK1: Soundwire register bank 1
  */
@@ -751,7 +751,7 @@ struct sdw_port_params {
  * struct sdw_transport_params: Data Port Transport Parameters
  *
  * @blk_grp_ctrl_valid: Port implements block group control
- * @num: Port number
+ * @port_num: Port number
  * @blk_grp_ctrl: Block group control value
  * @sample_interval: Sample interval
  * @offset1: Blockoffset of the payload data
@@ -782,7 +782,7 @@ struct sdw_transport_params {
 /**
  * struct sdw_enable_ch: Enable/disable Data Port channel
  *
- * @num: Port number
+ * @port_num: Port number
  * @ch_mask: Active channel mask
  * @enable: Enable (true) /disable (false) channel
  */
@@ -885,7 +885,7 @@ void sdw_bus_master_delete(struct sdw_bus *bus);
 void sdw_show_ping_status(struct sdw_bus *bus, bool sync_delay);
 
 /**
- * sdw_port_config: Master or Slave Port configuration
+ * struct sdw_port_config: Master or Slave Port configuration
  *
  * @num: Port number
  * @ch_mask: channels mask for port
@@ -896,7 +896,7 @@ struct sdw_port_config {
 };
 
 /**
- * sdw_stream_config: Master or Slave stream configuration
+ * struct sdw_stream_config: Master or Slave stream configuration
  *
  * @frame_rate: Audio frame rate of the stream, in Hz
  * @ch_count: Channel count of the stream
@@ -913,7 +913,7 @@ struct sdw_stream_config {
 };
 
 /**
- * sdw_stream_state: Stream states
+ * enum sdw_stream_state: Stream states
  *
  * @SDW_STREAM_ALLOCATED: New stream allocated.
  * @SDW_STREAM_CONFIGURED: Stream configured
@@ -934,7 +934,7 @@ enum sdw_stream_state {
 };
 
 /**
- * sdw_stream_params: Stream parameters
+ * struct sdw_stream_params: Stream parameters
  *
  * @rate: Sampling frequency, in Hz
  * @ch_count: Number of channels
@@ -947,7 +947,7 @@ struct sdw_stream_params {
 };
 
 /**
- * sdw_stream_runtime: Runtime stream parameters
+ * struct sdw_stream_runtime: Runtime stream parameters
  *
  * @name: SoundWire stream name
  * @params: Stream parameters
@@ -983,7 +983,7 @@ struct sdw_stream_runtime {
  * @defer_msg: Defer message
  * @params: Current bus parameters
  * @stream_refcount: number of streams currently using this bus
- * @btp_stream_refcount: number of BTP streams currently using this bus (should
+ * @bpt_stream_refcount: number of BTP streams currently using this bus (should
  * be zero or one, multiple streams per link is not supported).
  * @bpt_stream: pointer stored to handle BTP streams.
  * @ops: Master callback ops
-- 
cgit v1.2.3


From 88440208c6074e639a7ccc038c6a7ed4b6f8bb99 Mon Sep 17 00:00:00 2001
From: Tomas Melin <tomas.melin@vaisala.com>
Date: Tue, 10 Feb 2026 10:53:34 +0000
Subject: iio: industrialio-backend: support backend capabilities
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Not all backends support the full set of capabilities provided by the
industrialio-backend framework. Capability bits can be used in frontends
and backends for checking for a certain feature set, or if using
related functions can be expected to fail.

Capability bits should be set by a compatible backend and provided when
registering the backend.

Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Reviewed-by: David Lechner <dlechner@baylibre.com>
Signed-off-by: Tomas Melin <tomas.melin@vaisala.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/industrialio-backend.c | 16 ++++++++++++++++
 include/linux/iio/backend.h        | 24 ++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-backend.c b/drivers/iio/industrialio-backend.c
index 447b694d6d5f..1afd00763da9 100644
--- a/drivers/iio/industrialio-backend.c
+++ b/drivers/iio/industrialio-backend.c
@@ -56,6 +56,7 @@ struct iio_backend {
 	void *priv;
 	const char *name;
 	unsigned int cached_reg_addr;
+	u32 caps;
 	/*
 	 * This index is relative to the frontend. Meaning that for
 	 * frontends with multiple backends, this will be the index of this
@@ -774,6 +775,20 @@ int iio_backend_extend_chan_spec(struct iio_backend *back,
 }
 EXPORT_SYMBOL_NS_GPL(iio_backend_extend_chan_spec, "IIO_BACKEND");
 
+/**
+ * iio_backend_has_caps - Check if backend has specific capabilities
+ * @back: Backend device
+ * @caps: Capabilities to check
+ *
+ * RETURNS:
+ * True if backend has all the requested capabilities, false otherwise.
+ */
+bool iio_backend_has_caps(struct iio_backend *back, u32 caps)
+{
+	return (back->caps & caps) == caps;
+}
+EXPORT_SYMBOL_NS_GPL(iio_backend_has_caps, "IIO_BACKEND");
+
 static void iio_backend_release(void *arg)
 {
 	struct iio_backend *back = arg;
@@ -1114,6 +1129,7 @@ int devm_iio_backend_register(struct device *dev,
 
 	back->ops = info->ops;
 	back->name = info->name;
+	back->caps = info->caps;
 	back->owner = dev->driver->owner;
 	back->dev = dev;
 	back->priv = priv;
diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h
index 7f815f3fed6a..4d15c2a9802c 100644
--- a/include/linux/iio/backend.h
+++ b/include/linux/iio/backend.h
@@ -84,6 +84,27 @@ enum iio_backend_filter_type {
 	IIO_BACKEND_FILTER_TYPE_MAX
 };
 
+/**
+ * enum iio_backend_capabilities - Backend capabilities
+ * Backend capabilities can be used by frontends to check if a given
+ * functionality is supported by the backend. This is useful for frontend
+ * devices which are expected to work with alternative backend
+ * implementations. Capabilities are loosely coupled with operations,
+ * meaning that a capability requires certain operations to be implemented
+ * by the backend. A capability might be mapped to a single operation or
+ * multiple operations.
+ *
+ * @IIO_BACKEND_CAP_CALIBRATION: Backend supports digital interface
+ * calibration. Calibration procedure is device specific.
+ * @IIO_BACKEND_CAP_BUFFER: Support for IIO buffer interface.
+ * @IIO_BACKEND_CAP_ENABLE: Backend can be explicitly enabled/disabled.
+ */
+enum iio_backend_capabilities {
+	IIO_BACKEND_CAP_CALIBRATION = BIT(0),
+	IIO_BACKEND_CAP_BUFFER = BIT(1),
+	IIO_BACKEND_CAP_ENABLE = BIT(2),
+};
+
 /**
  * struct iio_backend_ops - operations structure for an iio_backend
  * @enable: Enable backend.
@@ -179,10 +200,12 @@ struct iio_backend_ops {
  * struct iio_backend_info - info structure for an iio_backend
  * @name: Backend name.
  * @ops: Backend operations.
+ * @caps: Backend capabilities. (bitmask of enum iio_backend_capabilities).
  */
 struct iio_backend_info {
 	const char *name;
 	const struct iio_backend_ops *ops;
+	u32 caps;
 };
 
 int iio_backend_chan_enable(struct iio_backend *back, unsigned int chan);
@@ -235,6 +258,7 @@ int iio_backend_read_raw(struct iio_backend *back,
 			 long mask);
 int iio_backend_extend_chan_spec(struct iio_backend *back,
 				 struct iio_chan_spec *chan);
+bool iio_backend_has_caps(struct iio_backend *back, u32 caps);
 void *iio_backend_get_priv(const struct iio_backend *conv);
 struct iio_backend *devm_iio_backend_get(struct device *dev, const char *name);
 struct iio_backend *devm_iio_backend_fwnode_get(struct device *dev,
-- 
cgit v1.2.3


From 8b65eb52d93e4e496bd26e6867152344554eb39e Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Tue, 17 Feb 2026 11:15:10 -0800
Subject: locking/mutex: Rename mutex_init_lockep()

Typo, this wants to be _lockdep().

Fixes: 51d7a054521d ("locking/mutex: Redo __mutex_init() to reduce generated code size")
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260217191512.1180151-2-dave@stgolabs.net
---
 include/linux/mutex.h  | 4 ++--
 kernel/locking/mutex.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index ecaa0440f6ec..8126da959088 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -87,12 +87,12 @@ do {									\
 	struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-void mutex_init_lockep(struct mutex *lock, const char *name, struct lock_class_key *key);
+void mutex_init_lockdep(struct mutex *lock, const char *name, struct lock_class_key *key);
 
 static inline void __mutex_init(struct mutex *lock, const char *name,
 				struct lock_class_key *key)
 {
-	mutex_init_lockep(lock, name, key);
+	mutex_init_lockdep(lock, name, key);
 }
 #else
 extern void mutex_init_generic(struct mutex *lock);
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 2a1d165b3167..c867f6c15530 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -171,7 +171,7 @@ static __always_inline bool __mutex_unlock_fast(struct mutex *lock)
 
 #else /* !CONFIG_DEBUG_LOCK_ALLOC */
 
-void mutex_init_lockep(struct mutex *lock, const char *name, struct lock_class_key *key)
+void mutex_init_lockdep(struct mutex *lock, const char *name, struct lock_class_key *key)
 {
 	__mutex_init_generic(lock);
 
@@ -181,7 +181,7 @@ void mutex_init_lockep(struct mutex *lock, const char *name, struct lock_class_k
 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
 	lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP);
 }
-EXPORT_SYMBOL(mutex_init_lockep);
+EXPORT_SYMBOL(mutex_init_lockdep);
 #endif /* !CONFIG_DEBUG_LOCK_ALLOC */
 
 static inline void __mutex_set_flag(struct mutex *lock, unsigned long flag)
-- 
cgit v1.2.3


From babcde3be8c9148aa60a14b17831e8f249854963 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Tue, 17 Feb 2026 11:15:11 -0800
Subject: locking/mutex: Fix wrong comment for CONFIG_DEBUG_LOCK_ALLOC

... that endif block should be CONFIG_DEBUG_LOCK_ALLOC, not
CONFIG_LOCKDEP.

Fixes: 51d7a054521d ("locking/mutex: Redo __mutex_init() to reduce generated code size")
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260217191512.1180151-3-dave@stgolabs.net
---
 include/linux/mutex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 8126da959088..f57d2a97da57 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -146,7 +146,7 @@ static inline void __mutex_init(struct mutex *lock, const char *name,
 {
 	mutex_rt_init_generic(lock);
 }
-#endif /* !CONFIG_LOCKDEP */
+#endif /* !CONFIG_DEBUG_LOCK_ALLOC */
 #endif /* CONFIG_PREEMPT_RT */
 
 #ifdef CONFIG_DEBUG_MUTEXES
-- 
cgit v1.2.3


From 50214dc4382055352fb1d7b9779550dabf5059e5 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Tue, 17 Feb 2026 11:15:12 -0800
Subject: locking/mutex: Add killable flavor to guard definitions

The mutex guard family defines _try and _intr variants but is missing
the killable one.

Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260217191512.1180151-4-dave@stgolabs.net
---
 include/linux/mutex.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index f57d2a97da57..2f648ee204e7 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -253,6 +253,7 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) __cond_a
 DEFINE_LOCK_GUARD_1(mutex, struct mutex, mutex_lock(_T->lock), mutex_unlock(_T->lock))
 DEFINE_LOCK_GUARD_1_COND(mutex, _try, mutex_trylock(_T->lock))
 DEFINE_LOCK_GUARD_1_COND(mutex, _intr, mutex_lock_interruptible(_T->lock), _RET == 0)
+DEFINE_LOCK_GUARD_1_COND(mutex, _kill, mutex_lock_killable(_T->lock), _RET == 0)
 DEFINE_LOCK_GUARD_1(mutex_init, struct mutex, mutex_init(_T->lock), /* */)
 
 DECLARE_LOCK_GUARD_1_ATTRS(mutex,	__acquires(_T), __releases(*(struct mutex **)_T))
@@ -261,6 +262,8 @@ DECLARE_LOCK_GUARD_1_ATTRS(mutex_try,	__acquires(_T), __releases(*(struct mutex
 #define class_mutex_try_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_try, _T)
 DECLARE_LOCK_GUARD_1_ATTRS(mutex_intr,	__acquires(_T), __releases(*(struct mutex **)_T))
 #define class_mutex_intr_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_intr, _T)
+DECLARE_LOCK_GUARD_1_ATTRS(mutex_kill,	__acquires(_T), __releases(*(struct mutex **)_T))
+#define class_mutex_kill_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_kill, _T)
 DECLARE_LOCK_GUARD_1_ATTRS(mutex_init,	__acquires(_T), __releases(*(struct mutex **)_T))
 #define class_mutex_init_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_init, _T)
 
-- 
cgit v1.2.3


From 263ff314cc5602599d481b0912a381555fcbad28 Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@sandisk.com>
Date: Fri, 28 Nov 2025 07:20:11 +0200
Subject: mmc: core: Add quirk for incorrect manufacturing date

Some eMMC vendors need to report manufacturing dates beyond 2025 but are
reluctant to update the EXT_CSD revision from 8 to 9. Changing the
Updating the EXT_CSD revision may involve additional testing or
qualification steps with customers. To ease this transition and avoid a
full re-qualification process, a workaround is needed. This
patch introduces a temporary quirk that re-purposes the year codes
corresponding to 2010, 2011, and 2012 to represent the years 2026, 2027,
and 2028, respectively. This solution is only valid for this three-year
period.

After 2028, vendors must update their firmware to set EXT_CSD_REV=9 to
continue reporting the correct manufacturing date in compliance with the
JEDEC standard.

The `MMC_QUIRK_BROKEN_MDT` is introduced and enabled for all Sandisk
devices to handle this behavior.

Signed-off-by: Avri Altman <avri.altman@sandisk.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/card.h   | 6 ++++++
 drivers/mmc/core/mmc.c    | 5 +++++
 drivers/mmc/core/quirks.h | 3 +++
 include/linux/mmc/card.h  | 1 +
 4 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h
index 1200951bab08..a9619dd45270 100644
--- a/drivers/mmc/core/card.h
+++ b/drivers/mmc/core/card.h
@@ -89,6 +89,7 @@ struct mmc_fixup {
 #define CID_MANFID_MICRON       0x13
 #define CID_MANFID_SAMSUNG      0x15
 #define CID_MANFID_APACER       0x27
+#define CID_MANFID_SANDISK_MMC  0x45
 #define CID_MANFID_SWISSBIT     0x5D
 #define CID_MANFID_KINGSTON     0x70
 #define CID_MANFID_HYNIX	0x90
@@ -305,4 +306,9 @@ static inline int mmc_card_no_uhs_ddr50_tuning(const struct mmc_card *c)
 	return c->quirks & MMC_QUIRK_NO_UHS_DDR50_TUNING;
 }
 
+static inline int mmc_card_broken_mdt(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_BROKEN_MDT;
+}
+
 #endif
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index f744dd501842..8846550a8892 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -676,6 +676,11 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
 			/* Adjust production date as per JEDEC JESD84-B51B September 2025 */
 			if (card->cid.year < 2023)
 				card->cid.year += 16;
+		} else {
+			/* Handle vendors with broken MDT reporting */
+			if (mmc_card_broken_mdt(card) && card->cid.year >= 2010 &&
+			    card->cid.year <= 2012)
+				card->cid.year += 16;
 		}
 	}
 
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index c417ed34c057..f5e8a0f6d11b 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -170,6 +170,9 @@ static const struct mmc_fixup __maybe_unused mmc_ext_csd_fixups[] = {
 	MMC_FIXUP_EXT_CSD_REV(CID_NAME_ANY, CID_MANFID_NUMONYX,
 			      0x014e, add_quirk, MMC_QUIRK_BROKEN_HPI, 6),
 
+	MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_MMC, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_BROKEN_MDT),
+
 	END_FIXUP
 };
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index e9e964c20e53..4722dd7e46ce 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -329,6 +329,7 @@ struct mmc_card {
 #define MMC_QUIRK_BROKEN_CACHE_FLUSH	(1<<16)	/* Don't flush cache until the write has occurred */
 #define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY	(1<<17) /* Disable broken SD poweroff notify support */
 #define MMC_QUIRK_NO_UHS_DDR50_TUNING	(1<<18) /* Disable DDR50 tuning */
+#define MMC_QUIRK_BROKEN_MDT    (1<<19) /* Wrong manufacturing year */
 
 	bool			written_flag;	/* Indicates eMMC has been written since power on */
 	bool			reenable_cmdq;	/* Re-enable Command Queue */
-- 
cgit v1.2.3


From c0b68bc25efeaca8a1ef3a0cfab5fbf5f81d002d Mon Sep 17 00:00:00 2001
From: Jeff Chen <jeff.chen_1@nxp.com>
Date: Tue, 13 Jan 2026 11:15:17 +0800
Subject: mmc: sdio: add NXP vendor and IW61x device IDs

Add NXP's SDIO vendor ID (0x0471) and IW61x device ID (0x0205) to
sdio_ids.h for future support of NXP Wi-Fi chips over SDIO.

Signed-off-by: Jeff Chen <jeff.chen_1@nxp.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/sdio_ids.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 673cbdf43453..39ac2b612e4a 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -116,6 +116,9 @@
 #define SDIO_VENDOR_ID_MICROCHIP_WILC		0x0296
 #define SDIO_DEVICE_ID_MICROCHIP_WILC1000	0x5347
 
+#define SDIO_VENDOR_ID_NXP			0x0471
+#define SDIO_DEVICE_ID_NXP_IW61X		0x0205
+
 #define SDIO_VENDOR_ID_REALTEK			0x024c
 #define SDIO_DEVICE_ID_REALTEK_RTW8723BS	0xb723
 #define SDIO_DEVICE_ID_REALTEK_RTW8821BS	0xb821
-- 
cgit v1.2.3


From d6bf2e64dec87322f2b11565ddb59c0e967f96e3 Mon Sep 17 00:00:00 2001
From: Luke Wang <ziniu.wang_1@nxp.com>
Date: Wed, 4 Feb 2026 11:40:03 +0800
Subject: mmc: core: Optimize time for secure erase/trim for some Kingston
 eMMCs

Kingston eMMC IY2964 and IB2932 takes a fixed ~2 seconds for each secure
erase/trim operation regardless of size - that is, a single secure
erase/trim operation of 1MB takes the same time as 1GB. With default
calculated 3.5MB max discard size, secure erase 1GB requires ~300 separate
operations taking ~10 minutes total.

Add a card quirk, MMC_QUIRK_FIXED_SECURE_ERASE_TRIM_TIME, to set maximum
secure erase size for those devices. This allows 1GB secure erase to
complete in a single operation, reducing time from 10 minutes to just 2
seconds.

Signed-off-by: Luke Wang <ziniu.wang_1@nxp.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/card.h   | 5 +++++
 drivers/mmc/core/queue.c  | 9 +++++++--
 drivers/mmc/core/quirks.h | 9 +++++++++
 include/linux/mmc/card.h  | 1 +
 4 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h
index a9619dd45270..a7c364d0030a 100644
--- a/drivers/mmc/core/card.h
+++ b/drivers/mmc/core/card.h
@@ -311,4 +311,9 @@ static inline int mmc_card_broken_mdt(const struct mmc_card *c)
 	return c->quirks & MMC_QUIRK_BROKEN_MDT;
 }
 
+static inline int mmc_card_fixed_secure_erase_trim_time(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_FIXED_SECURE_ERASE_TRIM_TIME;
+}
+
 #endif
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 13000fc57e2e..39fcb662c43f 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -184,8 +184,13 @@ static void mmc_queue_setup_discard(struct mmc_card *card,
 		return;
 
 	lim->max_hw_discard_sectors = max_discard;
-	if (mmc_card_can_secure_erase_trim(card))
-		lim->max_secure_erase_sectors = max_discard;
+	if (mmc_card_can_secure_erase_trim(card)) {
+		if (mmc_card_fixed_secure_erase_trim_time(card))
+			lim->max_secure_erase_sectors = UINT_MAX >> card->erase_shift;
+		else
+			lim->max_secure_erase_sectors = max_discard;
+	}
+
 	if (mmc_card_can_trim(card) && card->erased_byte == 0)
 		lim->max_write_zeroes_sectors = max_discard;
 
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index f5e8a0f6d11b..6f727b4a60a5 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -153,6 +153,15 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = {
 	MMC_FIXUP("M62704", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc,
 		  MMC_QUIRK_TRIM_BROKEN),
 
+	/*
+	 * On Some Kingston eMMCs, secure erase/trim time is independent
+	 * of erase size, fixed at approximately 2 seconds.
+	 */
+	MMC_FIXUP("IY2964", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc,
+		  MMC_QUIRK_FIXED_SECURE_ERASE_TRIM_TIME),
+	MMC_FIXUP("IB2932", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc,
+		  MMC_QUIRK_FIXED_SECURE_ERASE_TRIM_TIME),
+
 	END_FIXUP
 };
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 4722dd7e46ce..9dc4750296af 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -330,6 +330,7 @@ struct mmc_card {
 #define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY	(1<<17) /* Disable broken SD poweroff notify support */
 #define MMC_QUIRK_NO_UHS_DDR50_TUNING	(1<<18) /* Disable DDR50 tuning */
 #define MMC_QUIRK_BROKEN_MDT    (1<<19) /* Wrong manufacturing year */
+#define MMC_QUIRK_FIXED_SECURE_ERASE_TRIM_TIME	(1<<20) /* Secure erase/trim time is fixed regardless of size */
 
 	bool			written_flag;	/* Indicates eMMC has been written since power on */
 	bool			reenable_cmdq;	/* Re-enable Command Queue */
-- 
cgit v1.2.3


From 94d709be8c0dc875dfc9ebb64d3b8093d0790c15 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 16 Feb 2026 14:31:57 +0100
Subject: xattr: add rcu_head and rhash_head to struct simple_xattr

In preparation for converting simple_xattrs from rbtree to rhashtable,
add rhash_head and rcu_head members to struct simple_xattr. The
rhashtable implementation will use rhash_head for hash table linkage
and RCU-based lockless reads, requiring that replaced or removed xattr
entries be freed via call_rcu() rather than immediately.

Add simple_xattr_free_rcu() which schedules RCU-deferred freeing of an
xattr entry.  This will be used by callers of simple_xattr_set() once
they switch to the rhashtable-based xattr store.

No functional changes.

Link: https://patch.msgid.link/20260216-work-xattr-socket-v1-1-c2efa4f74cb7@kernel.org
Acked-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xattr.c            | 23 +++++++++++++++++++++++
 include/linux/xattr.h |  4 ++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/fs/xattr.c b/fs/xattr.c
index 3e49e612e1ba..9cbb1917bcb2 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1197,6 +1197,29 @@ void simple_xattr_free(struct simple_xattr *xattr)
 	kvfree(xattr);
 }
 
+static void simple_xattr_rcu_free(struct rcu_head *head)
+{
+	struct simple_xattr *xattr;
+
+	xattr = container_of(head, struct simple_xattr, rcu);
+	simple_xattr_free(xattr);
+}
+
+/**
+ * simple_xattr_free_rcu - free an xattr object after an RCU grace period
+ * @xattr: the xattr object
+ *
+ * Schedule RCU-deferred freeing of an xattr entry. This is used by
+ * rhashtable-based callers of simple_xattr_set() that replace or remove
+ * an existing entry while concurrent RCU readers may still be accessing
+ * it.
+ */
+void simple_xattr_free_rcu(struct simple_xattr *xattr)
+{
+	if (xattr)
+		call_rcu(&xattr->rcu, simple_xattr_rcu_free);
+}
+
 /**
  * simple_xattr_alloc - allocate new xattr object
  * @value: value of the xattr object
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 296b5ee5c979..fdbd2095414a 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -16,6 +16,7 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
+#include <linux/rhashtable-types.h>
 #include <linux/user_namespace.h>
 #include <uapi/linux/xattr.h>
 
@@ -112,6 +113,8 @@ struct simple_xattrs {
 
 struct simple_xattr {
 	struct rb_node rb_node;
+	struct rhash_head hash_node;
+	struct rcu_head rcu;
 	char *name;
 	size_t size;
 	char value[] __counted_by(size);
@@ -122,6 +125,7 @@ void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space);
 size_t simple_xattr_space(const char *name, size_t size);
 struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
 void simple_xattr_free(struct simple_xattr *xattr);
+void simple_xattr_free_rcu(struct simple_xattr *xattr);
 int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
 		     void *buffer, size_t size);
 struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
-- 
cgit v1.2.3


From b32c4a213698ab351b44da2fd1b2a5976c7fa033 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 16 Feb 2026 14:31:58 +0100
Subject: xattr: add rhashtable-based simple_xattr infrastructure

Add rhashtable support to the simple_xattr subsystem while keeping the
existing rbtree code fully functional. This allows consumers to be
migrated one at a time without breaking any intermediate build.

struct simple_xattrs gains a dispatch flag and a union holding either
the rbtree (rb_root + rwlock) or rhashtable state:

  struct simple_xattrs {
      bool use_rhashtable;
      union {
          struct { struct rb_root rb_root; rwlock_t lock; };
          struct rhashtable ht;
      };
  };

simple_xattrs_init() continues to set up the rbtree path for existing
embedded-struct callers.

Add simple_xattrs_alloc() which dynamically allocates a simple_xattrs
and initializes the rhashtable path. This is the entry point for
consumers switching to pointer-based lazy allocation.

The five core functions (get, set, list, add, free) dispatch based on
the use_rhashtable flag.

Existing callers continue to use the rbtree path unchanged. As each
consumer is converted it will switch to simple_xattrs_alloc() and the
rhashtable path. Once all consumers are converted a follow-up patch
will remove the rbtree code.

Link: https://patch.msgid.link/20260216-work-xattr-socket-v1-2-c2efa4f74cb7@kernel.org
Acked-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xattr.c            | 439 ++++++++++++++++++++++++++++++++++++++------------
 include/linux/xattr.h |  25 ++-
 mm/shmem.c            |   2 +-
 3 files changed, 357 insertions(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/fs/xattr.c b/fs/xattr.c
index 9cbb1917bcb2..1d98ea459b7b 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -22,6 +22,7 @@
 #include <linux/audit.h>
 #include <linux/vmalloc.h>
 #include <linux/posix_acl_xattr.h>
+#include <linux/rhashtable.h>
 
 #include <linux/uaccess.h>
 
@@ -1228,22 +1229,25 @@ void simple_xattr_free_rcu(struct simple_xattr *xattr)
  * Allocate a new xattr object and initialize respective members. The caller is
  * responsible for handling the name of the xattr.
  *
- * Return: On success a new xattr object is returned. On failure NULL is
- * returned.
+ * Return: New xattr object on success, NULL if @value is NULL, ERR_PTR on
+ * failure.
  */
 struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
 {
 	struct simple_xattr *new_xattr;
 	size_t len;
 
+	if (!value)
+		return NULL;
+
 	/* wrap around? */
 	len = sizeof(*new_xattr) + size;
 	if (len < sizeof(*new_xattr))
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	new_xattr = kvmalloc(len, GFP_KERNEL_ACCOUNT);
 	if (!new_xattr)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	new_xattr->size = size;
 	memcpy(new_xattr->value, value, size);
@@ -1287,6 +1291,33 @@ static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node,
 	return rbtree_simple_xattr_cmp(xattr->name, node);
 }
 
+static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed)
+{
+	const char *name = data;
+	return jhash(name, strlen(name), seed);
+}
+
+static u32 simple_xattr_obj_hashfn(const void *obj, u32 len, u32 seed)
+{
+	const struct simple_xattr *xattr = obj;
+	return jhash(xattr->name, strlen(xattr->name), seed);
+}
+
+static int simple_xattr_obj_cmpfn(struct rhashtable_compare_arg *arg,
+				   const void *obj)
+{
+	const struct simple_xattr *xattr = obj;
+	return strcmp(xattr->name, arg->key);
+}
+
+static const struct rhashtable_params simple_xattr_params = {
+	.head_offset    = offsetof(struct simple_xattr, hash_node),
+	.hashfn         = simple_xattr_hashfn,
+	.obj_hashfn     = simple_xattr_obj_hashfn,
+	.obj_cmpfn      = simple_xattr_obj_cmpfn,
+	.automatic_shrinking = true,
+};
+
 /**
  * simple_xattr_get - get an xattr object
  * @xattrs: the header of the xattr object
@@ -1306,22 +1337,41 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
 		     void *buffer, size_t size)
 {
 	struct simple_xattr *xattr = NULL;
-	struct rb_node *rbp;
 	int ret = -ENODATA;
 
-	read_lock(&xattrs->lock);
-	rbp = rb_find(name, &xattrs->rb_root, rbtree_simple_xattr_cmp);
-	if (rbp) {
-		xattr = rb_entry(rbp, struct simple_xattr, rb_node);
-		ret = xattr->size;
-		if (buffer) {
-			if (size < xattr->size)
-				ret = -ERANGE;
-			else
-				memcpy(buffer, xattr->value, xattr->size);
+	if (xattrs->use_rhashtable) {
+		guard(rcu)();
+		xattr = rhashtable_lookup(&xattrs->ht, name,
+					  simple_xattr_params);
+		if (xattr) {
+			ret = xattr->size;
+			if (buffer) {
+				if (size < xattr->size)
+					ret = -ERANGE;
+				else
+					memcpy(buffer, xattr->value,
+					       xattr->size);
+			}
+		}
+	} else {
+		struct rb_node *rbp;
+
+		read_lock(&xattrs->lock);
+		rbp = rb_find(name, &xattrs->rb_root,
+			      rbtree_simple_xattr_cmp);
+		if (rbp) {
+			xattr = rb_entry(rbp, struct simple_xattr, rb_node);
+			ret = xattr->size;
+			if (buffer) {
+				if (size < xattr->size)
+					ret = -ERANGE;
+				else
+					memcpy(buffer, xattr->value,
+					       xattr->size);
+			}
 		}
+		read_unlock(&xattrs->lock);
 	}
-	read_unlock(&xattrs->lock);
 	return ret;
 }
 
@@ -1355,78 +1405,134 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
 				      const char *name, const void *value,
 				      size_t size, int flags)
 {
-	struct simple_xattr *old_xattr = NULL, *new_xattr = NULL;
-	struct rb_node *parent = NULL, **rbp;
-	int err = 0, ret;
+	struct simple_xattr *old_xattr = NULL;
+	int err = 0;
 
-	/* value == NULL means remove */
-	if (value) {
-		new_xattr = simple_xattr_alloc(value, size);
-		if (!new_xattr)
-			return ERR_PTR(-ENOMEM);
+	CLASS(simple_xattr, new_xattr)(value, size);
+	if (IS_ERR(new_xattr))
+		return new_xattr;
 
+	if (new_xattr) {
 		new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
-		if (!new_xattr->name) {
-			simple_xattr_free(new_xattr);
+		if (!new_xattr->name)
 			return ERR_PTR(-ENOMEM);
-		}
 	}
 
-	write_lock(&xattrs->lock);
-	rbp = &xattrs->rb_root.rb_node;
-	while (*rbp) {
-		parent = *rbp;
-		ret = rbtree_simple_xattr_cmp(name, *rbp);
-		if (ret < 0)
-			rbp = &(*rbp)->rb_left;
-		else if (ret > 0)
-			rbp = &(*rbp)->rb_right;
-		else
-			old_xattr = rb_entry(*rbp, struct simple_xattr, rb_node);
-		if (old_xattr)
-			break;
-	}
+	if (xattrs->use_rhashtable) {
+		/*
+		 * Lookup is safe without RCU here since writes are
+		 * serialized by the caller.
+		 */
+		old_xattr = rhashtable_lookup_fast(&xattrs->ht, name,
+						   simple_xattr_params);
+
+		if (old_xattr) {
+			/* Fail if XATTR_CREATE is requested and the xattr exists. */
+			if (flags & XATTR_CREATE)
+				return ERR_PTR(-EEXIST);
+
+			if (new_xattr) {
+				err = rhashtable_replace_fast(&xattrs->ht,
+							     &old_xattr->hash_node,
+							     &new_xattr->hash_node,
+							     simple_xattr_params);
+				if (err)
+					return ERR_PTR(err);
+			} else {
+				err = rhashtable_remove_fast(&xattrs->ht,
+							    &old_xattr->hash_node,
+							    simple_xattr_params);
+				if (err)
+					return ERR_PTR(err);
+			}
+		} else {
+			/* Fail if XATTR_REPLACE is requested but no xattr is found. */
+			if (flags & XATTR_REPLACE)
+				return ERR_PTR(-ENODATA);
+
+			/*
+			 * If XATTR_CREATE or no flags are specified together
+			 * with a new value simply insert it.
+			 */
+			if (new_xattr) {
+				err = rhashtable_insert_fast(&xattrs->ht,
+							    &new_xattr->hash_node,
+							    simple_xattr_params);
+				if (err)
+					return ERR_PTR(err);
+			}
 
-	if (old_xattr) {
-		/* Fail if XATTR_CREATE is requested and the xattr exists. */
-		if (flags & XATTR_CREATE) {
-			err = -EEXIST;
-			goto out_unlock;
+			/*
+			 * If XATTR_CREATE or no flags are specified and
+			 * neither an old or new xattr exist then we don't
+			 * need to do anything.
+			 */
 		}
-
-		if (new_xattr)
-			rb_replace_node(&old_xattr->rb_node,
-					&new_xattr->rb_node, &xattrs->rb_root);
-		else
-			rb_erase(&old_xattr->rb_node, &xattrs->rb_root);
 	} else {
-		/* Fail if XATTR_REPLACE is requested but no xattr is found. */
-		if (flags & XATTR_REPLACE) {
-			err = -ENODATA;
-			goto out_unlock;
-		}
+		struct rb_node *parent = NULL, **rbp;
+		int ret;
 
-		/*
-		 * If XATTR_CREATE or no flags are specified together with a
-		 * new value simply insert it.
-		 */
-		if (new_xattr) {
-			rb_link_node(&new_xattr->rb_node, parent, rbp);
-			rb_insert_color(&new_xattr->rb_node, &xattrs->rb_root);
+		write_lock(&xattrs->lock);
+		rbp = &xattrs->rb_root.rb_node;
+		while (*rbp) {
+			parent = *rbp;
+			ret = rbtree_simple_xattr_cmp(name, *rbp);
+			if (ret < 0)
+				rbp = &(*rbp)->rb_left;
+			else if (ret > 0)
+				rbp = &(*rbp)->rb_right;
+			else
+				old_xattr = rb_entry(*rbp, struct simple_xattr,
+						     rb_node);
+			if (old_xattr)
+				break;
 		}
 
-		/*
-		 * If XATTR_CREATE or no flags are specified and neither an
-		 * old or new xattr exist then we don't need to do anything.
-		 */
-	}
+		if (old_xattr) {
+			/* Fail if XATTR_CREATE is requested and the xattr exists. */
+			if (flags & XATTR_CREATE) {
+				err = -EEXIST;
+				goto out_unlock;
+			}
+
+			if (new_xattr)
+				rb_replace_node(&old_xattr->rb_node,
+						&new_xattr->rb_node,
+						&xattrs->rb_root);
+			else
+				rb_erase(&old_xattr->rb_node,
+					 &xattrs->rb_root);
+		} else {
+			/* Fail if XATTR_REPLACE is requested but no xattr is found. */
+			if (flags & XATTR_REPLACE) {
+				err = -ENODATA;
+				goto out_unlock;
+			}
+
+			/*
+			 * If XATTR_CREATE or no flags are specified together
+			 * with a new value simply insert it.
+			 */
+			if (new_xattr) {
+				rb_link_node(&new_xattr->rb_node, parent, rbp);
+				rb_insert_color(&new_xattr->rb_node,
+						&xattrs->rb_root);
+			}
+
+			/*
+			 * If XATTR_CREATE or no flags are specified and
+			 * neither an old or new xattr exist then we don't
+			 * need to do anything.
+			 */
+		}
 
 out_unlock:
-	write_unlock(&xattrs->lock);
-	if (!err)
-		return old_xattr;
-	simple_xattr_free(new_xattr);
-	return ERR_PTR(err);
+		write_unlock(&xattrs->lock);
+		if (err)
+			return ERR_PTR(err);
+	}
+	retain_and_null_ptr(new_xattr);
+	return old_xattr;
 }
 
 static bool xattr_is_trusted(const char *name)
@@ -1467,7 +1573,6 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
 {
 	bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
 	struct simple_xattr *xattr;
-	struct rb_node *rbp;
 	ssize_t remaining_size = size;
 	int err = 0;
 
@@ -1487,23 +1592,62 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
 	remaining_size -= err;
 	err = 0;
 
-	read_lock(&xattrs->lock);
-	for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) {
-		xattr = rb_entry(rbp, struct simple_xattr, rb_node);
+	if (!xattrs)
+		return size - remaining_size;
 
-		/* skip "trusted." attributes for unprivileged callers */
-		if (!trusted && xattr_is_trusted(xattr->name))
-			continue;
+	if (xattrs->use_rhashtable) {
+		struct rhashtable_iter iter;
 
-		/* skip MAC labels; these are provided by LSM above */
-		if (xattr_is_maclabel(xattr->name))
-			continue;
+		rhashtable_walk_enter(&xattrs->ht, &iter);
+		rhashtable_walk_start(&iter);
 
-		err = xattr_list_one(&buffer, &remaining_size, xattr->name);
-		if (err)
-			break;
+		while ((xattr = rhashtable_walk_next(&iter)) != NULL) {
+			if (IS_ERR(xattr)) {
+				if (PTR_ERR(xattr) == -EAGAIN)
+					continue;
+				err = PTR_ERR(xattr);
+				break;
+			}
+
+			/* skip "trusted." attributes for unprivileged callers */
+			if (!trusted && xattr_is_trusted(xattr->name))
+				continue;
+
+			/* skip MAC labels; these are provided by LSM above */
+			if (xattr_is_maclabel(xattr->name))
+				continue;
+
+			err = xattr_list_one(&buffer, &remaining_size,
+					     xattr->name);
+			if (err)
+				break;
+		}
+
+		rhashtable_walk_stop(&iter);
+		rhashtable_walk_exit(&iter);
+	} else {
+		struct rb_node *rbp;
+
+		read_lock(&xattrs->lock);
+		for (rbp = rb_first(&xattrs->rb_root); rbp;
+		     rbp = rb_next(rbp)) {
+			xattr = rb_entry(rbp, struct simple_xattr, rb_node);
+
+			/* skip "trusted." attributes for unprivileged callers */
+			if (!trusted && xattr_is_trusted(xattr->name))
+				continue;
+
+			/* skip MAC labels; these are provided by LSM above */
+			if (xattr_is_maclabel(xattr->name))
+				continue;
+
+			err = xattr_list_one(&buffer, &remaining_size,
+					     xattr->name);
+			if (err)
+				break;
+		}
+		read_unlock(&xattrs->lock);
 	}
-	read_unlock(&xattrs->lock);
 
 	return err ? err : size - remaining_size;
 }
@@ -1536,9 +1680,16 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node,
 void simple_xattr_add(struct simple_xattrs *xattrs,
 		      struct simple_xattr *new_xattr)
 {
-	write_lock(&xattrs->lock);
-	rb_add(&new_xattr->rb_node, &xattrs->rb_root, rbtree_simple_xattr_less);
-	write_unlock(&xattrs->lock);
+	if (xattrs->use_rhashtable) {
+		WARN_ON(rhashtable_insert_fast(&xattrs->ht,
+					       &new_xattr->hash_node,
+					       simple_xattr_params));
+	} else {
+		write_lock(&xattrs->lock);
+		rb_add(&new_xattr->rb_node, &xattrs->rb_root,
+		       rbtree_simple_xattr_less);
+		write_unlock(&xattrs->lock);
+	}
 }
 
 /**
@@ -1549,10 +1700,80 @@ void simple_xattr_add(struct simple_xattrs *xattrs,
  */
 void simple_xattrs_init(struct simple_xattrs *xattrs)
 {
+	xattrs->use_rhashtable = false;
 	xattrs->rb_root = RB_ROOT;
 	rwlock_init(&xattrs->lock);
 }
 
+/**
+ * simple_xattrs_alloc - allocate and initialize a new xattr header
+ *
+ * Dynamically allocate a simple_xattrs header and initialize the
+ * underlying rhashtable. This is intended for consumers that want
+ * rhashtable-based xattr storage.
+ *
+ * Return: On success a new simple_xattrs is returned. On failure an
+ * ERR_PTR is returned.
+ */
+struct simple_xattrs *simple_xattrs_alloc(void)
+{
+	struct simple_xattrs *xattrs __free(kfree) = NULL;
+
+	xattrs = kzalloc(sizeof(*xattrs), GFP_KERNEL);
+	if (!xattrs)
+		return ERR_PTR(-ENOMEM);
+
+	xattrs->use_rhashtable = true;
+	if (rhashtable_init(&xattrs->ht, &simple_xattr_params))
+		return ERR_PTR(-ENOMEM);
+
+	return no_free_ptr(xattrs);
+}
+
+/**
+ * simple_xattrs_lazy_alloc - get or allocate xattrs for a set operation
+ * @xattrsp: pointer to the xattrs pointer (may point to NULL)
+ * @value: value being set (NULL means remove)
+ * @flags: xattr set flags
+ *
+ * For lazily-allocated xattrs on the write path. If no xattrs exist yet
+ * and this is a remove operation, returns the appropriate result without
+ * allocating. Otherwise ensures xattrs is allocated and published with
+ * store-release semantics.
+ *
+ * Return: On success a valid pointer to the xattrs is returned. On
+ * failure or early-exit an ERR_PTR or NULL is returned. Callers should
+ * check with IS_ERR_OR_NULL() and propagate with PTR_ERR() which
+ * correctly returns 0 for the NULL no-op case.
+ */
+struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
+					       const void *value, int flags)
+{
+	struct simple_xattrs *xattrs;
+
+	xattrs = READ_ONCE(*xattrsp);
+	if (xattrs)
+		return xattrs;
+
+	if (!value)
+		return (flags & XATTR_REPLACE) ? ERR_PTR(-ENODATA) : NULL;
+
+	xattrs = simple_xattrs_alloc();
+	if (!IS_ERR(xattrs))
+		smp_store_release(xattrsp, xattrs);
+	return xattrs;
+}
+
+static void simple_xattr_ht_free(void *ptr, void *arg)
+{
+	struct simple_xattr *xattr = ptr;
+	size_t *freed_space = arg;
+
+	if (freed_space)
+		*freed_space += simple_xattr_space(xattr->name, xattr->size);
+	simple_xattr_free(xattr);
+}
+
 /**
  * simple_xattrs_free - free xattrs
  * @xattrs: xattr header whose xattrs to destroy
@@ -1563,22 +1784,28 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
  */
 void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
 {
-	struct rb_node *rbp;
-
 	if (freed_space)
 		*freed_space = 0;
-	rbp = rb_first(&xattrs->rb_root);
-	while (rbp) {
-		struct simple_xattr *xattr;
-		struct rb_node *rbp_next;
-
-		rbp_next = rb_next(rbp);
-		xattr = rb_entry(rbp, struct simple_xattr, rb_node);
-		rb_erase(&xattr->rb_node, &xattrs->rb_root);
-		if (freed_space)
-			*freed_space += simple_xattr_space(xattr->name,
-							   xattr->size);
-		simple_xattr_free(xattr);
-		rbp = rbp_next;
+
+	if (xattrs->use_rhashtable) {
+		rhashtable_free_and_destroy(&xattrs->ht,
+					    simple_xattr_ht_free, freed_space);
+	} else {
+		struct rb_node *rbp;
+
+		rbp = rb_first(&xattrs->rb_root);
+		while (rbp) {
+			struct simple_xattr *xattr;
+			struct rb_node *rbp_next;
+
+			rbp_next = rb_next(rbp);
+			xattr = rb_entry(rbp, struct simple_xattr, rb_node);
+			rb_erase(&xattr->rb_node, &xattrs->rb_root);
+			if (freed_space)
+				*freed_space += simple_xattr_space(xattr->name,
+								   xattr->size);
+			simple_xattr_free(xattr);
+			rbp = rbp_next;
+		}
 	}
 }
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index fdbd2095414a..832a44358661 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -107,8 +107,14 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler)
 }
 
 struct simple_xattrs {
-	struct rb_root rb_root;
-	rwlock_t lock;
+	bool use_rhashtable;
+	union {
+		struct {
+			struct rb_root rb_root;
+			rwlock_t lock;
+		};
+		struct rhashtable ht;
+	};
 };
 
 struct simple_xattr {
@@ -121,6 +127,9 @@ struct simple_xattr {
 };
 
 void simple_xattrs_init(struct simple_xattrs *xattrs);
+struct simple_xattrs *simple_xattrs_alloc(void);
+struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
+					       const void *value, int flags);
 void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space);
 size_t simple_xattr_space(const char *name, size_t size);
 struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
@@ -137,4 +146,16 @@ void simple_xattr_add(struct simple_xattrs *xattrs,
 		      struct simple_xattr *new_xattr);
 int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name);
 
+DEFINE_CLASS(simple_xattr,
+	     struct simple_xattr *,
+	     if (!IS_ERR_OR_NULL(_T)) simple_xattr_free(_T),
+	     simple_xattr_alloc(value, size),
+	     const void *value, size_t size)
+
+DEFINE_CLASS(simple_xattrs,
+            struct simple_xattrs *,
+            if (!IS_ERR_OR_NULL(_T)) { simple_xattrs_free(_T, NULL); kfree(_T); },
+            simple_xattrs_alloc(),
+            void)
+
 #endif	/* _LINUX_XATTR_H */
diff --git a/mm/shmem.c b/mm/shmem.c
index b40f3cd48961..35c2f8748668 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -4278,7 +4278,7 @@ static int shmem_initxattrs(struct inode *inode,
 
 	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
 		new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
-		if (!new_xattr)
+		if (IS_ERR(new_xattr))
 			break;
 
 		len = strlen(xattr->name) + 1;
-- 
cgit v1.2.3


From 52b364fed6e1578e551fee20c76fecb3fc0e10ed Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 16 Feb 2026 14:31:59 +0100
Subject: shmem: adapt to rhashtable-based simple_xattrs with lazy allocation

Adapt tmpfs/shmem to use the rhashtable-based xattr path and switch
from an embedded struct to pointer-based lazy allocation.

Change shmem_inode_info.xattrs from embedded 'struct simple_xattrs' to
a pointer 'struct simple_xattrs *', initialized to NULL. This avoids
the rhashtable overhead for every tmpfs inode, which helps when a lot of
inodes exist.

The xattr store is allocated on first use:

- shmem_initxattrs(): Allocates via simple_xattrs_alloc() when
  security modules set initial xattrs during inode creation.

- shmem_xattr_handler_set(): Allocates on first setxattr, with a
  short-circuit for removal when no xattrs are stored yet.

All read paths (shmem_xattr_handler_get, shmem_listxattr) check for
NULL xattrs pointer and return -ENODATA or 0 respectively.

Replaced xattr entries are freed via simple_xattr_free_rcu() to allow
concurrent RCU readers to finish.

shmem_evict_inode() conditionally frees the xattr store only when
allocated.

Also change simple_xattr_add() from void to int to propagate
rhashtable insertion failures. shmem_initxattrs() is the only caller.

Link: https://patch.msgid.link/20260216-work-xattr-socket-v1-3-c2efa4f74cb7@kernel.org
Acked-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xattr.c               | 26 +++++++++++++-------------
 include/linux/shmem_fs.h |  2 +-
 include/linux/xattr.h    |  4 ++--
 mm/shmem.c               | 44 +++++++++++++++++++++++++++++++-------------
 4 files changed, 47 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/xattr.c b/fs/xattr.c
index 1d98ea459b7b..eb45ae0fd17f 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1677,19 +1677,19 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node,
  * of matching xattrs is wanted. Should only be called during inode
  * initialization when a few distinct initial xattrs are supposed to be set.
  */
-void simple_xattr_add(struct simple_xattrs *xattrs,
-		      struct simple_xattr *new_xattr)
-{
-	if (xattrs->use_rhashtable) {
-		WARN_ON(rhashtable_insert_fast(&xattrs->ht,
-					       &new_xattr->hash_node,
-					       simple_xattr_params));
-	} else {
-		write_lock(&xattrs->lock);
-		rb_add(&new_xattr->rb_node, &xattrs->rb_root,
-		       rbtree_simple_xattr_less);
-		write_unlock(&xattrs->lock);
-	}
+int simple_xattr_add(struct simple_xattrs *xattrs,
+		     struct simple_xattr *new_xattr)
+{
+	if (xattrs->use_rhashtable)
+		return rhashtable_insert_fast(&xattrs->ht,
+					      &new_xattr->hash_node,
+					      simple_xattr_params);
+
+	write_lock(&xattrs->lock);
+	rb_add(&new_xattr->rb_node, &xattrs->rb_root,
+	       rbtree_simple_xattr_less);
+	write_unlock(&xattrs->lock);
+	return 0;
 }
 
 /**
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index a8273b32e041..f6a2d3402d76 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -48,7 +48,7 @@ struct shmem_inode_info {
 	};
 	struct timespec64	i_crtime;	/* file creation time */
 	struct shared_policy	policy;		/* NUMA memory alloc policy */
-	struct simple_xattrs	xattrs;		/* list of xattrs */
+	struct simple_xattrs	*xattrs;	/* list of xattrs */
 	pgoff_t			fallocend;	/* highest fallocate endindex */
 	unsigned int		fsflags;	/* for FS_IOC_[SG]ETFLAGS */
 	atomic_t		stop_eviction;	/* hold when working on inode */
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 832a44358661..6e619e185e90 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -142,8 +142,8 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
 				      size_t size, int flags);
 ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
 			  char *buffer, size_t size);
-void simple_xattr_add(struct simple_xattrs *xattrs,
-		      struct simple_xattr *new_xattr);
+int simple_xattr_add(struct simple_xattrs *xattrs,
+		     struct simple_xattr *new_xattr);
 int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name);
 
 DEFINE_CLASS(simple_xattr,
diff --git a/mm/shmem.c b/mm/shmem.c
index 35c2f8748668..0b0e577e880a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1425,7 +1425,10 @@ static void shmem_evict_inode(struct inode *inode)
 		}
 	}
 
-	simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+	if (info->xattrs) {
+		simple_xattrs_free(info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+		kfree(info->xattrs);
+	}
 	shmem_free_inode(inode->i_sb, freed);
 	WARN_ON(inode->i_blocks);
 	clear_inode(inode);
@@ -3101,7 +3104,6 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
 		shmem_set_inode_flags(inode, info->fsflags, NULL);
 	INIT_LIST_HEAD(&info->shrinklist);
 	INIT_LIST_HEAD(&info->swaplist);
-	simple_xattrs_init(&info->xattrs);
 	cache_no_acl(inode);
 	if (sbinfo->noswap)
 		mapping_set_unevictable(inode->i_mapping);
@@ -4255,10 +4257,13 @@ static int shmem_initxattrs(struct inode *inode,
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	const struct xattr *xattr;
-	struct simple_xattr *new_xattr;
 	size_t ispace = 0;
 	size_t len;
 
+	CLASS(simple_xattrs, xattrs)();
+	if (IS_ERR(xattrs))
+		return PTR_ERR(xattrs);
+
 	if (sbinfo->max_inodes) {
 		for (xattr = xattr_array; xattr->name != NULL; xattr++) {
 			ispace += simple_xattr_space(xattr->name,
@@ -4277,24 +4282,24 @@ static int shmem_initxattrs(struct inode *inode,
 	}
 
 	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
-		new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
+		CLASS(simple_xattr, new_xattr)(xattr->value, xattr->value_len);
 		if (IS_ERR(new_xattr))
 			break;
 
 		len = strlen(xattr->name) + 1;
 		new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
 					  GFP_KERNEL_ACCOUNT);
-		if (!new_xattr->name) {
-			kvfree(new_xattr);
+		if (!new_xattr->name)
 			break;
-		}
 
 		memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
 		       XATTR_SECURITY_PREFIX_LEN);
 		memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
 		       xattr->name, len);
 
-		simple_xattr_add(&info->xattrs, new_xattr);
+		if (simple_xattr_add(xattrs, new_xattr))
+			break;
+		retain_and_null_ptr(new_xattr);
 	}
 
 	if (xattr->name != NULL) {
@@ -4303,10 +4308,10 @@ static int shmem_initxattrs(struct inode *inode,
 			sbinfo->free_ispace += ispace;
 			raw_spin_unlock(&sbinfo->stat_lock);
 		}
-		simple_xattrs_free(&info->xattrs, NULL);
 		return -ENOMEM;
 	}
 
+	smp_store_release(&info->xattrs, no_free_ptr(xattrs));
 	return 0;
 }
 
@@ -4315,9 +4320,14 @@ static int shmem_xattr_handler_get(const struct xattr_handler *handler,
 				   const char *name, void *buffer, size_t size)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct simple_xattrs *xattrs;
+
+	xattrs = READ_ONCE(info->xattrs);
+	if (!xattrs)
+		return -ENODATA;
 
 	name = xattr_full_name(handler, name);
-	return simple_xattr_get(&info->xattrs, name, buffer, size);
+	return simple_xattr_get(xattrs, name, buffer, size);
 }
 
 static int shmem_xattr_handler_set(const struct xattr_handler *handler,
@@ -4328,10 +4338,16 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	struct simple_xattrs *xattrs;
 	struct simple_xattr *old_xattr;
 	size_t ispace = 0;
 
 	name = xattr_full_name(handler, name);
+
+	xattrs = simple_xattrs_lazy_alloc(&info->xattrs, value, flags);
+	if (IS_ERR_OR_NULL(xattrs))
+		return PTR_ERR(xattrs);
+
 	if (value && sbinfo->max_inodes) {
 		ispace = simple_xattr_space(name, size);
 		raw_spin_lock(&sbinfo->stat_lock);
@@ -4344,13 +4360,13 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
 			return -ENOSPC;
 	}
 
-	old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags);
+	old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
 	if (!IS_ERR(old_xattr)) {
 		ispace = 0;
 		if (old_xattr && sbinfo->max_inodes)
 			ispace = simple_xattr_space(old_xattr->name,
 						    old_xattr->size);
-		simple_xattr_free(old_xattr);
+		simple_xattr_free_rcu(old_xattr);
 		old_xattr = NULL;
 		inode_set_ctime_current(inode);
 		inode_inc_iversion(inode);
@@ -4391,7 +4407,9 @@ static const struct xattr_handler * const shmem_xattr_handlers[] = {
 static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
 	struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
-	return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
+
+	return simple_xattr_list(d_inode(dentry), READ_ONCE(info->xattrs),
+				 buffer, size);
 }
 #endif /* CONFIG_TMPFS_XATTR */
 
-- 
cgit v1.2.3


From f4cc3ab824d6772a48ca9d9c74ac623b3309985d Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 7 Oct 2025 14:06:05 +0200
Subject: dma-buf: protected fence ops by RCU v8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The fence ops of a dma_fence currently need to life as long as the
dma_fence is alive. This means that the module which originally issued
a dma_fence can't unload unless all fences are freed up.

As first step to solve this issue protect the fence ops by RCU.

While it is counter intuitive to protect a constant function pointer table
by RCU it allows modules to wait for an RCU grace period before they
unload, to make sure that nobody is executing their functions any more.

This patch has not much functional change, but only adds the RCU
handling for the static checker to test.

v2: make one the now duplicated lockdep warnings a comment instead.
v3: Add more documentation to ->wait and ->release callback.
v4: fix typo in documentation
v5: rebased on drm-tip
v6: improve code comments
v7: improve commit message and code comments
v8: fix sparse rcu warnings

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://lore.kernel.org/r/20260219160822.1529-2-christian.koenig@amd.com
---
 drivers/dma-buf/dma-fence.c             | 71 +++++++++++++++++++++++----------
 drivers/gpu/drm/drm_crtc.c              |  2 +-
 drivers/gpu/drm/scheduler/sched_fence.c |  4 +-
 include/linux/dma-fence.h               | 33 ++++++++++++---
 4 files changed, 80 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 7e8db99186c2..076e6e6c75be 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -522,6 +522,7 @@ EXPORT_SYMBOL(dma_fence_signal);
 signed long
 dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
 {
+	const struct dma_fence_ops *ops;
 	signed long ret;
 
 	if (WARN_ON(timeout < 0))
@@ -533,15 +534,22 @@ dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
 
 	dma_fence_enable_sw_signaling(fence);
 
-	if (trace_dma_fence_wait_start_enabled()) {
-		rcu_read_lock();
-		trace_dma_fence_wait_start(fence);
+	rcu_read_lock();
+	ops = rcu_dereference(fence->ops);
+	trace_dma_fence_wait_start(fence);
+	if (ops->wait) {
+		/*
+		 * Implementing the wait ops is deprecated and not supported for
+		 * issuers of fences who need their lifetime to be independent
+		 * of their module after they signal, so it is ok to use the
+		 * ops outside the RCU protected section.
+		 */
+		rcu_read_unlock();
+		ret = ops->wait(fence, intr, timeout);
+	} else {
 		rcu_read_unlock();
-	}
-	if (fence->ops->wait)
-		ret = fence->ops->wait(fence, intr, timeout);
-	else
 		ret = dma_fence_default_wait(fence, intr, timeout);
+	}
 	if (trace_dma_fence_wait_end_enabled()) {
 		rcu_read_lock();
 		trace_dma_fence_wait_end(fence);
@@ -562,6 +570,7 @@ void dma_fence_release(struct kref *kref)
 {
 	struct dma_fence *fence =
 		container_of(kref, struct dma_fence, refcount);
+	const struct dma_fence_ops *ops;
 
 	rcu_read_lock();
 	trace_dma_fence_destroy(fence);
@@ -593,12 +602,12 @@ void dma_fence_release(struct kref *kref)
 		spin_unlock_irqrestore(fence->lock, flags);
 	}
 
-	rcu_read_unlock();
-
-	if (fence->ops->release)
-		fence->ops->release(fence);
+	ops = rcu_dereference(fence->ops);
+	if (ops->release)
+		ops->release(fence);
 	else
 		dma_fence_free(fence);
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(dma_fence_release);
 
@@ -617,6 +626,7 @@ EXPORT_SYMBOL(dma_fence_free);
 
 static bool __dma_fence_enable_signaling(struct dma_fence *fence)
 {
+	const struct dma_fence_ops *ops;
 	bool was_set;
 
 	lockdep_assert_held(fence->lock);
@@ -627,14 +637,18 @@ static bool __dma_fence_enable_signaling(struct dma_fence *fence)
 	if (dma_fence_test_signaled_flag(fence))
 		return false;
 
-	if (!was_set && fence->ops->enable_signaling) {
+	rcu_read_lock();
+	ops = rcu_dereference(fence->ops);
+	if (!was_set && ops->enable_signaling) {
 		trace_dma_fence_enable_signal(fence);
 
-		if (!fence->ops->enable_signaling(fence)) {
+		if (!ops->enable_signaling(fence)) {
+			rcu_read_unlock();
 			dma_fence_signal_locked(fence);
 			return false;
 		}
 	}
+	rcu_read_unlock();
 
 	return true;
 }
@@ -1007,8 +1021,13 @@ EXPORT_SYMBOL(dma_fence_wait_any_timeout);
  */
 void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
 {
-	if (fence->ops->set_deadline && !dma_fence_is_signaled(fence))
-		fence->ops->set_deadline(fence, deadline);
+	const struct dma_fence_ops *ops;
+
+	rcu_read_lock();
+	ops = rcu_dereference(fence->ops);
+	if (ops->set_deadline && !dma_fence_is_signaled(fence))
+		ops->set_deadline(fence, deadline);
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(dma_fence_set_deadline);
 
@@ -1049,7 +1068,13 @@ __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
 	BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name);
 
 	kref_init(&fence->refcount);
-	fence->ops = ops;
+	/*
+	 * While it is counter intuitive to protect a constant function pointer
+	 * table by RCU it allows modules to wait for an RCU grace period
+	 * before they unload, to make sure that nobody is executing their
+	 * functions any more.
+	 */
+	RCU_INIT_POINTER(fence->ops, ops);
 	INIT_LIST_HEAD(&fence->cb_list);
 	fence->lock = lock;
 	fence->context = context;
@@ -1129,11 +1154,12 @@ EXPORT_SYMBOL(dma_fence_init64);
  */
 const char __rcu *dma_fence_driver_name(struct dma_fence *fence)
 {
-	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
-			 "RCU protection is required for safe access to returned string");
+	const struct dma_fence_ops *ops;
 
+	/* RCU protection is required for safe access to returned string */
+	ops = rcu_dereference(fence->ops);
 	if (!dma_fence_test_signaled_flag(fence))
-		return (const char __rcu *)fence->ops->get_driver_name(fence);
+		return (const char __rcu *)ops->get_driver_name(fence);
 	else
 		return (const char __rcu *)"detached-driver";
 }
@@ -1161,11 +1187,12 @@ EXPORT_SYMBOL(dma_fence_driver_name);
  */
 const char __rcu *dma_fence_timeline_name(struct dma_fence *fence)
 {
-	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
-			 "RCU protection is required for safe access to returned string");
+	const struct dma_fence_ops *ops;
 
+	/* RCU protection is required for safe access to returned string */
+	ops = rcu_dereference(fence->ops);
 	if (!dma_fence_test_signaled_flag(fence))
-		return (const char __rcu *)fence->ops->get_driver_name(fence);
+		return (const char __rcu *)ops->get_driver_name(fence);
 	else
 		return (const char __rcu *)"signaled-timeline";
 }
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 90684f30a048..960fdc1cc6ba 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -158,7 +158,7 @@ static const struct dma_fence_ops drm_crtc_fence_ops;
 
 static struct drm_crtc *fence_to_crtc(struct dma_fence *fence)
 {
-	BUG_ON(fence->ops != &drm_crtc_fence_ops);
+	BUG_ON(rcu_access_pointer(fence->ops) != &drm_crtc_fence_ops);
 	return container_of(fence->lock, struct drm_crtc, fence_lock);
 }
 
diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
index 9391d6f0dc01..a27786cb86fb 100644
--- a/drivers/gpu/drm/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/scheduler/sched_fence.c
@@ -195,10 +195,10 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = {
 
 struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
 {
-	if (f->ops == &drm_sched_fence_ops_scheduled)
+	if (rcu_access_pointer(f->ops) == &drm_sched_fence_ops_scheduled)
 		return container_of(f, struct drm_sched_fence, scheduled);
 
-	if (f->ops == &drm_sched_fence_ops_finished)
+	if (rcu_access_pointer(f->ops) == &drm_sched_fence_ops_finished)
 		return container_of(f, struct drm_sched_fence, finished);
 
 	return NULL;
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 9c4d25289239..fa3cfe3e98ac 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -67,7 +67,7 @@ struct seq_file;
  */
 struct dma_fence {
 	spinlock_t *lock;
-	const struct dma_fence_ops *ops;
+	const struct dma_fence_ops __rcu *ops;
 	/*
 	 * We clear the callback list on kref_put so that by the time we
 	 * release the fence it is unused. No one should be adding to the
@@ -220,6 +220,10 @@ struct dma_fence_ops {
 	 * timed out. Can also return other error values on custom implementations,
 	 * which should be treated as if the fence is signaled. For example a hardware
 	 * lockup could be reported like that.
+	 *
+	 * Implementing this callback prevents the fence from detaching after
+	 * signaling and so it is necessary for the module providing the
+	 * dma_fence_ops to stay loaded as long as the dma_fence exists.
 	 */
 	signed long (*wait)(struct dma_fence *fence,
 			    bool intr, signed long timeout);
@@ -231,6 +235,13 @@ struct dma_fence_ops {
 	 * Can be called from irq context.  This callback is optional. If it is
 	 * NULL, then dma_fence_free() is instead called as the default
 	 * implementation.
+	 *
+	 * Implementing this callback prevents the fence from detaching after
+	 * signaling and so it is necessary for the module providing the
+	 * dma_fence_ops to stay loaded as long as the dma_fence exists.
+	 *
+	 * If the callback is implemented the memory backing the dma_fence
+	 * object must be freed RCU safe.
 	 */
 	void (*release)(struct dma_fence *fence);
 
@@ -454,13 +465,19 @@ dma_fence_test_signaled_flag(struct dma_fence *fence)
 static inline bool
 dma_fence_is_signaled_locked(struct dma_fence *fence)
 {
+	const struct dma_fence_ops *ops;
+
 	if (dma_fence_test_signaled_flag(fence))
 		return true;
 
-	if (fence->ops->signaled && fence->ops->signaled(fence)) {
+	rcu_read_lock();
+	ops = rcu_dereference(fence->ops);
+	if (ops->signaled && ops->signaled(fence)) {
+		rcu_read_unlock();
 		dma_fence_signal_locked(fence);
 		return true;
 	}
+	rcu_read_unlock();
 
 	return false;
 }
@@ -484,13 +501,19 @@ dma_fence_is_signaled_locked(struct dma_fence *fence)
 static inline bool
 dma_fence_is_signaled(struct dma_fence *fence)
 {
+	const struct dma_fence_ops *ops;
+
 	if (dma_fence_test_signaled_flag(fence))
 		return true;
 
-	if (fence->ops->signaled && fence->ops->signaled(fence)) {
+	rcu_read_lock();
+	ops = rcu_dereference(fence->ops);
+	if (ops->signaled && ops->signaled(fence)) {
+		rcu_read_unlock();
 		dma_fence_signal(fence);
 		return true;
 	}
+	rcu_read_unlock();
 
 	return false;
 }
@@ -695,7 +718,7 @@ extern const struct dma_fence_ops dma_fence_chain_ops;
  */
 static inline bool dma_fence_is_array(struct dma_fence *fence)
 {
-	return fence->ops == &dma_fence_array_ops;
+	return rcu_access_pointer(fence->ops) == &dma_fence_array_ops;
 }
 
 /**
@@ -706,7 +729,7 @@ static inline bool dma_fence_is_array(struct dma_fence *fence)
  */
 static inline bool dma_fence_is_chain(struct dma_fence *fence)
 {
-	return fence->ops == &dma_fence_chain_ops;
+	return rcu_access_pointer(fence->ops) == &dma_fence_chain_ops;
 }
 
 /**
-- 
cgit v1.2.3


From 541c8f2468b933acc5d129e84bd264923675a66e Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 8 Oct 2025 18:12:46 +0200
Subject: dma-buf: detach fence ops on signal v3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When neither a release nor a wait backend ops is specified it is possible
to let the dma_fence live on independently of the module who issued it.

This makes it possible to unload drivers and only wait for all their
fences to signal.

v2: fix typo in comment
v3: fix sparse rcu warnings

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Philipp Stanner <phasta@kernel.org>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://lore.kernel.org/r/20260219160822.1529-3-christian.koenig@amd.com
---
 drivers/dma-buf/dma-fence.c | 18 ++++++++++++++----
 include/linux/dma-fence.h   |  4 ++--
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 076e6e6c75be..3279d82ffa98 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -362,6 +362,7 @@ void __dma_fence_might_wait(void)
 void dma_fence_signal_timestamp_locked(struct dma_fence *fence,
 				      ktime_t timestamp)
 {
+	const struct dma_fence_ops *ops;
 	struct dma_fence_cb *cur, *tmp;
 	struct list_head cb_list;
 
@@ -371,6 +372,15 @@ void dma_fence_signal_timestamp_locked(struct dma_fence *fence,
 				      &fence->flags)))
 		return;
 
+	/*
+	 * When neither a release nor a wait operation is specified set the ops
+	 * pointer to NULL to allow the fence structure to become independent
+	 * from who originally issued it.
+	 */
+	ops = rcu_dereference_protected(fence->ops, true);
+	if (!ops->release && !ops->wait)
+		RCU_INIT_POINTER(fence->ops, NULL);
+
 	/* Stash the cb_list before replacing it with the timestamp */
 	list_replace(&fence->cb_list, &cb_list);
 
@@ -537,7 +547,7 @@ dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
 	rcu_read_lock();
 	ops = rcu_dereference(fence->ops);
 	trace_dma_fence_wait_start(fence);
-	if (ops->wait) {
+	if (ops && ops->wait) {
 		/*
 		 * Implementing the wait ops is deprecated and not supported for
 		 * issuers of fences who need their lifetime to be independent
@@ -603,7 +613,7 @@ void dma_fence_release(struct kref *kref)
 	}
 
 	ops = rcu_dereference(fence->ops);
-	if (ops->release)
+	if (ops && ops->release)
 		ops->release(fence);
 	else
 		dma_fence_free(fence);
@@ -639,7 +649,7 @@ static bool __dma_fence_enable_signaling(struct dma_fence *fence)
 
 	rcu_read_lock();
 	ops = rcu_dereference(fence->ops);
-	if (!was_set && ops->enable_signaling) {
+	if (!was_set && ops && ops->enable_signaling) {
 		trace_dma_fence_enable_signal(fence);
 
 		if (!ops->enable_signaling(fence)) {
@@ -1025,7 +1035,7 @@ void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
 
 	rcu_read_lock();
 	ops = rcu_dereference(fence->ops);
-	if (ops->set_deadline && !dma_fence_is_signaled(fence))
+	if (ops && ops->set_deadline && !dma_fence_is_signaled(fence))
 		ops->set_deadline(fence, deadline);
 	rcu_read_unlock();
 }
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index fa3cfe3e98ac..9ff2c4a09cdc 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -472,7 +472,7 @@ dma_fence_is_signaled_locked(struct dma_fence *fence)
 
 	rcu_read_lock();
 	ops = rcu_dereference(fence->ops);
-	if (ops->signaled && ops->signaled(fence)) {
+	if (ops && ops->signaled && ops->signaled(fence)) {
 		rcu_read_unlock();
 		dma_fence_signal_locked(fence);
 		return true;
@@ -508,7 +508,7 @@ dma_fence_is_signaled(struct dma_fence *fence)
 
 	rcu_read_lock();
 	ops = rcu_dereference(fence->ops);
-	if (ops->signaled && ops->signaled(fence)) {
+	if (ops && ops->signaled && ops->signaled(fence)) {
 		rcu_read_unlock();
 		dma_fence_signal(fence);
 		return true;
-- 
cgit v1.2.3


From 3e5067931b5df667f5350fafe4410554e228e53e Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 9 Oct 2025 10:40:06 +0200
Subject: dma-buf: abstract fence locking v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add dma_fence_lock_irqsafe() and dma_fence_unlock_irqrestore() wrappers
and mechanically apply them everywhere.

Just a pre-requisite cleanup for a follow up patch.

v2: add some missing i915 bits, add abstraction for lockdep assertion as
    well
v3: one more suggestion by Tvrtko

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Link: https://lore.kernel.org/r/20260219160822.1529-4-christian.koenig@amd.com
---
 drivers/dma-buf/dma-fence.c                 | 48 +++++++++++++----------------
 drivers/dma-buf/st-dma-fence.c              |  6 ++--
 drivers/dma-buf/sw_sync.c                   | 14 ++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c    |  4 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      |  4 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  2 +-
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c |  2 +-
 drivers/gpu/drm/i915/i915_active.c          | 19 +++++++-----
 drivers/gpu/drm/nouveau/nouveau_drm.c       |  5 +--
 drivers/gpu/drm/scheduler/sched_fence.c     |  6 ++--
 drivers/gpu/drm/xe/xe_sched_job.c           |  4 +--
 include/linux/dma-fence.h                   | 38 +++++++++++++++++++++++
 12 files changed, 96 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 3279d82ffa98..698260c49f52 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -366,7 +366,7 @@ void dma_fence_signal_timestamp_locked(struct dma_fence *fence,
 	struct dma_fence_cb *cur, *tmp;
 	struct list_head cb_list;
 
-	lockdep_assert_held(fence->lock);
+	dma_fence_assert_held(fence);
 
 	if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 				      &fence->flags)))
@@ -414,9 +414,9 @@ void dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp)
 	if (WARN_ON(!fence))
 		return;
 
-	spin_lock_irqsave(fence->lock, flags);
+	dma_fence_lock_irqsave(fence, flags);
 	dma_fence_signal_timestamp_locked(fence, timestamp);
-	spin_unlock_irqrestore(fence->lock, flags);
+	dma_fence_unlock_irqrestore(fence, flags);
 }
 EXPORT_SYMBOL(dma_fence_signal_timestamp);
 
@@ -475,9 +475,9 @@ bool dma_fence_check_and_signal(struct dma_fence *fence)
 	unsigned long flags;
 	bool ret;
 
-	spin_lock_irqsave(fence->lock, flags);
+	dma_fence_lock_irqsave(fence, flags);
 	ret = dma_fence_check_and_signal_locked(fence);
-	spin_unlock_irqrestore(fence->lock, flags);
+	dma_fence_unlock_irqrestore(fence, flags);
 
 	return ret;
 }
@@ -503,9 +503,9 @@ void dma_fence_signal(struct dma_fence *fence)
 
 	tmp = dma_fence_begin_signalling();
 
-	spin_lock_irqsave(fence->lock, flags);
+	dma_fence_lock_irqsave(fence, flags);
 	dma_fence_signal_timestamp_locked(fence, ktime_get());
-	spin_unlock_irqrestore(fence->lock, flags);
+	dma_fence_unlock_irqrestore(fence, flags);
 
 	dma_fence_end_signalling(tmp);
 }
@@ -606,10 +606,10 @@ void dma_fence_release(struct kref *kref)
 		 * don't leave chains dangling. We set the error flag first
 		 * so that the callbacks know this signal is due to an error.
 		 */
-		spin_lock_irqsave(fence->lock, flags);
+		dma_fence_lock_irqsave(fence, flags);
 		fence->error = -EDEADLK;
 		dma_fence_signal_locked(fence);
-		spin_unlock_irqrestore(fence->lock, flags);
+		dma_fence_unlock_irqrestore(fence, flags);
 	}
 
 	ops = rcu_dereference(fence->ops);
@@ -639,7 +639,7 @@ static bool __dma_fence_enable_signaling(struct dma_fence *fence)
 	const struct dma_fence_ops *ops;
 	bool was_set;
 
-	lockdep_assert_held(fence->lock);
+	dma_fence_assert_held(fence);
 
 	was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 				   &fence->flags);
@@ -675,9 +675,9 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(fence->lock, flags);
+	dma_fence_lock_irqsave(fence, flags);
 	__dma_fence_enable_signaling(fence);
-	spin_unlock_irqrestore(fence->lock, flags);
+	dma_fence_unlock_irqrestore(fence, flags);
 }
 EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
 
@@ -717,8 +717,7 @@ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
 		return -ENOENT;
 	}
 
-	spin_lock_irqsave(fence->lock, flags);
-
+	dma_fence_lock_irqsave(fence, flags);
 	if (__dma_fence_enable_signaling(fence)) {
 		cb->func = func;
 		list_add_tail(&cb->node, &fence->cb_list);
@@ -726,8 +725,7 @@ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
 		INIT_LIST_HEAD(&cb->node);
 		ret = -ENOENT;
 	}
-
-	spin_unlock_irqrestore(fence->lock, flags);
+	dma_fence_unlock_irqrestore(fence, flags);
 
 	return ret;
 }
@@ -750,9 +748,9 @@ int dma_fence_get_status(struct dma_fence *fence)
 	unsigned long flags;
 	int status;
 
-	spin_lock_irqsave(fence->lock, flags);
+	dma_fence_lock_irqsave(fence, flags);
 	status = dma_fence_get_status_locked(fence);
-	spin_unlock_irqrestore(fence->lock, flags);
+	dma_fence_unlock_irqrestore(fence, flags);
 
 	return status;
 }
@@ -782,13 +780,11 @@ dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
 	unsigned long flags;
 	bool ret;
 
-	spin_lock_irqsave(fence->lock, flags);
-
+	dma_fence_lock_irqsave(fence, flags);
 	ret = !list_empty(&cb->node);
 	if (ret)
 		list_del_init(&cb->node);
-
-	spin_unlock_irqrestore(fence->lock, flags);
+	dma_fence_unlock_irqrestore(fence, flags);
 
 	return ret;
 }
@@ -827,7 +823,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
 	unsigned long flags;
 	signed long ret = timeout ? timeout : 1;
 
-	spin_lock_irqsave(fence->lock, flags);
+	dma_fence_lock_irqsave(fence, flags);
 
 	if (dma_fence_test_signaled_flag(fence))
 		goto out;
@@ -851,11 +847,11 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
 			__set_current_state(TASK_INTERRUPTIBLE);
 		else
 			__set_current_state(TASK_UNINTERRUPTIBLE);
-		spin_unlock_irqrestore(fence->lock, flags);
+		dma_fence_unlock_irqrestore(fence, flags);
 
 		ret = schedule_timeout(ret);
 
-		spin_lock_irqsave(fence->lock, flags);
+		dma_fence_lock_irqsave(fence, flags);
 		if (ret > 0 && intr && signal_pending(current))
 			ret = -ERESTARTSYS;
 	}
@@ -865,7 +861,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
 	__set_current_state(TASK_RUNNING);
 
 out:
-	spin_unlock_irqrestore(fence->lock, flags);
+	dma_fence_unlock_irqrestore(fence, flags);
 	return ret;
 }
 EXPORT_SYMBOL(dma_fence_default_wait);
diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c
index 73ed6fd48a13..5d0d9abc6e21 100644
--- a/drivers/dma-buf/st-dma-fence.c
+++ b/drivers/dma-buf/st-dma-fence.c
@@ -410,8 +410,10 @@ struct race_thread {
 
 static void __wait_for_callbacks(struct dma_fence *f)
 {
-	spin_lock_irq(f->lock);
-	spin_unlock_irq(f->lock);
+	unsigned long flags;
+
+	dma_fence_lock_irqsave(f, flags);
+	dma_fence_unlock_irqrestore(f, flags);
 }
 
 static int thread_signal_callback(void *arg)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 963a72324d16..8df20b0218a9 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -156,12 +156,12 @@ static void timeline_fence_release(struct dma_fence *fence)
 	struct sync_timeline *parent = dma_fence_parent(fence);
 	unsigned long flags;
 
-	spin_lock_irqsave(fence->lock, flags);
+	dma_fence_lock_irqsave(fence, flags);
 	if (!list_empty(&pt->link)) {
 		list_del(&pt->link);
 		rb_erase(&pt->node, &parent->pt_tree);
 	}
-	spin_unlock_irqrestore(fence->lock, flags);
+	dma_fence_unlock_irqrestore(fence, flags);
 
 	sync_timeline_put(parent);
 	dma_fence_free(fence);
@@ -179,7 +179,7 @@ static void timeline_fence_set_deadline(struct dma_fence *fence, ktime_t deadlin
 	struct sync_pt *pt = dma_fence_to_sync_pt(fence);
 	unsigned long flags;
 
-	spin_lock_irqsave(fence->lock, flags);
+	dma_fence_lock_irqsave(fence, flags);
 	if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) {
 		if (ktime_before(deadline, pt->deadline))
 			pt->deadline = deadline;
@@ -187,7 +187,7 @@ static void timeline_fence_set_deadline(struct dma_fence *fence, ktime_t deadlin
 		pt->deadline = deadline;
 		__set_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags);
 	}
-	spin_unlock_irqrestore(fence->lock, flags);
+	dma_fence_unlock_irqrestore(fence, flags);
 }
 
 static const struct dma_fence_ops timeline_fence_ops = {
@@ -431,13 +431,13 @@ static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long a
 		goto put_fence;
 	}
 
-	spin_lock_irqsave(fence->lock, flags);
+	dma_fence_lock_irqsave(fence, flags);
 	if (!test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) {
 		ret = -ENOENT;
 		goto unlock;
 	}
 	data.deadline_ns = ktime_to_ns(pt->deadline);
-	spin_unlock_irqrestore(fence->lock, flags);
+	dma_fence_unlock_irqrestore(fence, flags);
 
 	dma_fence_put(fence);
 
@@ -450,7 +450,7 @@ static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long a
 	return 0;
 
 unlock:
-	spin_unlock_irqrestore(fence->lock, flags);
+	dma_fence_unlock_irqrestore(fence, flags);
 put_fence:
 	dma_fence_put(fence);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 4638a686a84e..7c047f5a1549 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -479,10 +479,10 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
 	if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
 		return false;
 
-	spin_lock_irqsave(fence->lock, flags);
+	dma_fence_lock_irqsave(fence, flags);
 	if (!dma_fence_is_signaled_locked(fence))
 		dma_fence_set_error(fence, -ENODATA);
-	spin_unlock_irqrestore(fence->lock, flags);
+	dma_fence_unlock_irqrestore(fence, flags);
 
 	while (!dma_fence_is_signaled(fence) &&
 	       ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f2beb980e3c3..8b095087feb4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2785,8 +2785,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	dma_fence_put(vm->last_unlocked);
 	dma_fence_wait(vm->last_tlb_flush, false);
 	/* Make sure that all fence callbacks have completed */
-	spin_lock_irqsave(vm->last_tlb_flush->lock, flags);
-	spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags);
+	dma_fence_lock_irqsave(vm->last_tlb_flush, flags);
+	dma_fence_unlock_irqrestore(vm->last_tlb_flush, flags);
 	dma_fence_put(vm->last_tlb_flush);
 
 	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 806d62ed61ef..a914ceec90aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -639,7 +639,7 @@ static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm)
 	 * sure that the dma_fence structure isn't freed up.
 	 */
 	rcu_read_lock();
-	lock = vm->last_tlb_flush->lock;
+	lock = dma_fence_spinlock(vm->last_tlb_flush);
 	rcu_read_unlock();
 
 	spin_lock_irqsave(lock, flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index a2b413982ce6..c10ac0ab3bfa 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -148,7 +148,7 @@ __dma_fence_signal__notify(struct dma_fence *fence,
 {
 	struct dma_fence_cb *cur, *tmp;
 
-	lockdep_assert_held(fence->lock);
+	dma_fence_assert_held(fence);
 
 	list_for_each_entry_safe(cur, tmp, list, node) {
 		INIT_LIST_HEAD(&cur->node);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 25c46d7b1ea7..cd44cbfb53b5 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -1045,9 +1045,10 @@ __i915_active_fence_set(struct i915_active_fence *active,
 	 * nesting rules for the fence->lock; the inner lock is always the
 	 * older lock.
 	 */
-	spin_lock_irqsave(fence->lock, flags);
+	dma_fence_lock_irqsave(fence, flags);
 	if (prev)
-		spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+		spin_lock_nested(dma_fence_spinlock(prev),
+				 SINGLE_DEPTH_NESTING);
 
 	/*
 	 * A does the cmpxchg first, and so it sees C or NULL, as before, or
@@ -1061,17 +1062,18 @@ __i915_active_fence_set(struct i915_active_fence *active,
 	 */
 	while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) {
 		if (prev) {
-			spin_unlock(prev->lock);
+			spin_unlock(dma_fence_spinlock(prev));
 			dma_fence_put(prev);
 		}
-		spin_unlock_irqrestore(fence->lock, flags);
+		dma_fence_unlock_irqrestore(fence, flags);
 
 		prev = i915_active_fence_get(active);
 		GEM_BUG_ON(prev == fence);
 
-		spin_lock_irqsave(fence->lock, flags);
+		dma_fence_lock_irqsave(fence, flags);
 		if (prev)
-			spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+			spin_lock_nested(dma_fence_spinlock(prev),
+					 SINGLE_DEPTH_NESTING);
 	}
 
 	/*
@@ -1088,10 +1090,11 @@ __i915_active_fence_set(struct i915_active_fence *active,
 	 */
 	if (prev) {
 		__list_del_entry(&active->cb.node);
-		spin_unlock(prev->lock); /* serialise with prev->cb_list */
+		/* serialise with prev->cb_list */
+		spin_unlock(dma_fence_spinlock(prev));
 	}
 	list_add_tail(&active->cb.node, &fence->cb_list);
-	spin_unlock_irqrestore(fence->lock, flags);
+	dma_fence_unlock_irqrestore(fence, flags);
 
 	return prev;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index cb22237ac17d..17c114645d9f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -156,12 +156,13 @@ nouveau_name(struct drm_device *dev)
 static inline bool
 nouveau_cli_work_ready(struct dma_fence *fence)
 {
+	unsigned long flags;
 	bool ret = true;
 
-	spin_lock_irq(fence->lock);
+	dma_fence_lock_irqsave(fence, flags);
 	if (!dma_fence_is_signaled_locked(fence))
 		ret = false;
-	spin_unlock_irq(fence->lock);
+	dma_fence_unlock_irqrestore(fence, flags);
 
 	if (ret == true)
 		dma_fence_put(fence);
diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
index a27786cb86fb..096fe28aa9c9 100644
--- a/drivers/gpu/drm/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/scheduler/sched_fence.c
@@ -156,19 +156,19 @@ static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
 	struct dma_fence *parent;
 	unsigned long flags;
 
-	spin_lock_irqsave(&fence->lock, flags);
+	dma_fence_lock_irqsave(f, flags);
 
 	/* If we already have an earlier deadline, keep it: */
 	if (test_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
 	    ktime_before(fence->deadline, deadline)) {
-		spin_unlock_irqrestore(&fence->lock, flags);
+		dma_fence_unlock_irqrestore(f, flags);
 		return;
 	}
 
 	fence->deadline = deadline;
 	set_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
 
-	spin_unlock_irqrestore(&fence->lock, flags);
+	dma_fence_unlock_irqrestore(f, flags);
 
 	/*
 	 * smp_load_aquire() to ensure that if we are racing another
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 3927666fe556..ae5b38b2a884 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -190,11 +190,11 @@ static bool xe_fence_set_error(struct dma_fence *fence, int error)
 	unsigned long irq_flags;
 	bool signaled;
 
-	spin_lock_irqsave(fence->lock, irq_flags);
+	dma_fence_lock_irqsave(fence, irq_flags);
 	signaled = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
 	if (!signaled)
 		dma_fence_set_error(fence, error);
-	spin_unlock_irqrestore(fence->lock, irq_flags);
+	dma_fence_unlock_irqrestore(fence, irq_flags);
 
 	return signaled;
 }
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 9ff2c4a09cdc..85d6eac9fa85 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -377,6 +377,44 @@ dma_fence_get_rcu_safe(struct dma_fence __rcu **fencep)
 	} while (1);
 }
 
+/**
+ * dma_fence_spinlock - return pointer to the spinlock protecting the fence
+ * @fence: the fence to get the lock from
+ *
+ * Return the pointer to the extern lock.
+ */
+static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence)
+{
+	return fence->lock;
+}
+
+/**
+ * dma_fence_lock_irqsave - irqsave lock the fence
+ * @fence: the fence to lock
+ * @flags: where to store the CPU flags.
+ *
+ * Lock the fence, preventing it from changing to the signaled state.
+ */
+#define dma_fence_lock_irqsave(fence, flags)	\
+	spin_lock_irqsave(fence->lock, flags)
+
+/**
+ * dma_fence_unlock_irqrestore - unlock the fence and irqrestore
+ * @fence: the fence to unlock
+ * @flags the CPU flags to restore
+ *
+ * Unlock the fence, allowing it to change it's state to signaled again.
+ */
+#define dma_fence_unlock_irqrestore(fence, flags)	\
+	spin_unlock_irqrestore(fence->lock, flags)
+
+/**
+ * dma_fence_assert_held - lockdep assertion that fence is locked
+ * @fence: the fence which should be locked
+ */
+#define dma_fence_assert_held(fence)	\
+	lockdep_assert_held(dma_fence_spinlock(fence));
+
 #ifdef CONFIG_LOCKDEP
 bool dma_fence_begin_signalling(void);
 void dma_fence_end_signalling(bool cookie);
-- 
cgit v1.2.3


From 1f32f310a13c9fb67a9993ab67f596b3f960206f Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 9 Oct 2025 10:40:06 +0200
Subject: dma-buf: inline spinlock for fence protection v5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement per-fence spinlocks, allowing implementations to not give an
external spinlock to protect the fence internal state. Instead a spinlock
embedded into the fence structure itself is used in this case.

Shared spinlocks have the problem that implementations need to guarantee
that the lock lives at least as long all fences referencing them.

Using a per-fence spinlock allows completely decoupling spinlock producer
and consumer life times, simplifying the handling in most use cases.

v2: improve naming, coverage and function documentation
v3: fix one additional locking in the selftests
v4: separate out some changes to make the patch smaller,
    fix one amdgpu crash found by CI systems
v5: improve comments

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://lore.kernel.org/r/20260219160822.1529-5-christian.koenig@amd.com
---
 drivers/dma-buf/dma-fence.c             | 21 ++++++++++++++++-----
 drivers/dma-buf/sync_debug.h            |  2 +-
 drivers/gpu/drm/drm_crtc.c              |  2 +-
 drivers/gpu/drm/drm_writeback.c         |  2 +-
 drivers/gpu/drm/nouveau/nouveau_fence.c |  3 ++-
 drivers/gpu/drm/qxl/qxl_release.c       |  3 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c   |  3 ++-
 drivers/gpu/drm/xe/xe_hw_fence.c        |  3 ++-
 include/linux/dma-fence.h               | 19 +++++++++++++------
 9 files changed, 40 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 698260c49f52..4ad863d2a52c 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -343,7 +343,6 @@ void __dma_fence_might_wait(void)
 }
 #endif
 
-
 /**
  * dma_fence_signal_timestamp_locked - signal completion of a fence
  * @fence: the fence to signal
@@ -1070,7 +1069,6 @@ static void
 __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
 	         spinlock_t *lock, u64 context, u64 seqno, unsigned long flags)
 {
-	BUG_ON(!lock);
 	BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name);
 
 	kref_init(&fence->refcount);
@@ -1082,10 +1080,15 @@ __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
 	 */
 	RCU_INIT_POINTER(fence->ops, ops);
 	INIT_LIST_HEAD(&fence->cb_list);
-	fence->lock = lock;
 	fence->context = context;
 	fence->seqno = seqno;
 	fence->flags = flags | BIT(DMA_FENCE_FLAG_INITIALIZED_BIT);
+	if (lock) {
+		fence->extern_lock = lock;
+	} else {
+		spin_lock_init(&fence->inline_lock);
+		fence->flags |= BIT(DMA_FENCE_FLAG_INLINE_LOCK_BIT);
+	}
 	fence->error = 0;
 
 	trace_dma_fence_init(fence);
@@ -1095,7 +1098,7 @@ __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
  * dma_fence_init - Initialize a custom fence.
  * @fence: the fence to initialize
  * @ops: the dma_fence_ops for operations on this fence
- * @lock: the irqsafe spinlock to use for locking this fence
+ * @lock: optional irqsafe spinlock to use for locking this fence
  * @context: the execution context this fence is run on
  * @seqno: a linear increasing sequence number for this context
  *
@@ -1105,6 +1108,10 @@ __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
  *
  * context and seqno are used for easy comparison between fences, allowing
  * to check which fence is later by simply using dma_fence_later().
+ *
+ * It is strongly discouraged to provide an external lock because this couples
+ * lock and fence life time. This is only allowed for legacy use cases when
+ * multiple fences need to be prevented from signaling out of order.
  */
 void
 dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
@@ -1118,7 +1125,7 @@ EXPORT_SYMBOL(dma_fence_init);
  * dma_fence_init64 - Initialize a custom fence with 64-bit seqno support.
  * @fence: the fence to initialize
  * @ops: the dma_fence_ops for operations on this fence
- * @lock: the irqsafe spinlock to use for locking this fence
+ * @lock: optional irqsafe spinlock to use for locking this fence
  * @context: the execution context this fence is run on
  * @seqno: a linear increasing sequence number for this context
  *
@@ -1128,6 +1135,10 @@ EXPORT_SYMBOL(dma_fence_init);
  *
  * Context and seqno are used for easy comparison between fences, allowing
  * to check which fence is later by simply using dma_fence_later().
+ *
+ * It is strongly discouraged to provide an external lock because this couples
+ * lock and fence life time. This is only allowed for legacy use cases when
+ * multiple fences need to be prevented from signaling out of order.
  */
 void
 dma_fence_init64(struct dma_fence *fence, const struct dma_fence_ops *ops,
diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h
index 02af347293d0..c49324505b20 100644
--- a/drivers/dma-buf/sync_debug.h
+++ b/drivers/dma-buf/sync_debug.h
@@ -47,7 +47,7 @@ struct sync_timeline {
 
 static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence)
 {
-	return container_of(fence->lock, struct sync_timeline, lock);
+	return container_of(fence->extern_lock, struct sync_timeline, lock);
 }
 
 /**
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 960fdc1cc6ba..8d6f721c2c9a 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -159,7 +159,7 @@ static const struct dma_fence_ops drm_crtc_fence_ops;
 static struct drm_crtc *fence_to_crtc(struct dma_fence *fence)
 {
 	BUG_ON(rcu_access_pointer(fence->ops) != &drm_crtc_fence_ops);
-	return container_of(fence->lock, struct drm_crtc, fence_lock);
+	return container_of(fence->extern_lock, struct drm_crtc, fence_lock);
 }
 
 static const char *drm_crtc_fence_get_driver_name(struct dma_fence *fence)
diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
index 09362cf4f22f..4da5d6094721 100644
--- a/drivers/gpu/drm/drm_writeback.c
+++ b/drivers/gpu/drm/drm_writeback.c
@@ -81,7 +81,7 @@
  *	From userspace, this property will always read as zero.
  */
 
-#define fence_to_wb_connector(x) container_of(x->lock, \
+#define fence_to_wb_connector(x) container_of(x->extern_lock, \
 					      struct drm_writeback_connector, \
 					      fence_lock)
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 903d326927ca..edbe9e08ba0f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -41,7 +41,8 @@ static const struct dma_fence_ops nouveau_fence_ops_legacy;
 static inline struct nouveau_fence_chan *
 nouveau_fctx(struct nouveau_fence *fence)
 {
-	return container_of(fence->base.lock, struct nouveau_fence_chan, lock);
+	return container_of(fence->base.extern_lock, struct nouveau_fence_chan,
+			    lock);
 }
 
 static bool
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 720d6d57151c..06979d0e8a9f 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -62,7 +62,8 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr,
 	struct qxl_device *qdev;
 	unsigned long cur, end = jiffies + timeout;
 
-	qdev = container_of(fence->lock, struct qxl_device, release_lock);
+	qdev = container_of(fence->extern_lock, struct qxl_device,
+			    release_lock);
 
 	if (!wait_event_timeout(qdev->release_event,
 				(dma_fence_is_signaled(fence) ||
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 3469e2c9e706..4ef84ff9b638 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -47,7 +47,8 @@ struct vmw_event_fence_action {
 static struct vmw_fence_manager *
 fman_from_fence(struct vmw_fence_obj *fence)
 {
-	return container_of(fence->base.lock, struct vmw_fence_manager, lock);
+	return container_of(fence->base.extern_lock, struct vmw_fence_manager,
+			    lock);
 }
 
 static void vmw_fence_obj_destroy(struct dma_fence *f)
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
index ae8ed15b64c5..14720623ad00 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence.c
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -124,7 +124,8 @@ static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence);
 
 static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence)
 {
-	return container_of(fence->dma.lock, struct xe_hw_fence_irq, lock);
+	return container_of(fence->dma.extern_lock, struct xe_hw_fence_irq,
+			    lock);
 }
 
 static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence)
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 85d6eac9fa85..3dc93f068bf6 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -34,7 +34,8 @@ struct seq_file;
  * @ops: dma_fence_ops associated with this fence
  * @rcu: used for releasing fence with kfree_rcu
  * @cb_list: list of all callbacks to call
- * @lock: spin_lock_irqsave used for locking
+ * @extern_lock: external spin_lock_irqsave used for locking (deprecated)
+ * @inline_lock: alternative internal spin_lock_irqsave used for locking
  * @context: execution context this fence belongs to, returned by
  *           dma_fence_context_alloc()
  * @seqno: the sequence number of this fence inside the execution context,
@@ -49,6 +50,7 @@ struct seq_file;
  * of the time.
  *
  * DMA_FENCE_FLAG_INITIALIZED_BIT - fence was initialized
+ * DMA_FENCE_FLAG_INLINE_LOCK_BIT - use inline spinlock instead of external one
  * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
  * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
  * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
@@ -66,7 +68,10 @@ struct seq_file;
  * been completed, or never called at all.
  */
 struct dma_fence {
-	spinlock_t *lock;
+	union {
+		spinlock_t *extern_lock;
+		spinlock_t inline_lock;
+	};
 	const struct dma_fence_ops __rcu *ops;
 	/*
 	 * We clear the callback list on kref_put so that by the time we
@@ -100,6 +105,7 @@ struct dma_fence {
 
 enum dma_fence_flag_bits {
 	DMA_FENCE_FLAG_INITIALIZED_BIT,
+	DMA_FENCE_FLAG_INLINE_LOCK_BIT,
 	DMA_FENCE_FLAG_SEQNO64_BIT,
 	DMA_FENCE_FLAG_SIGNALED_BIT,
 	DMA_FENCE_FLAG_TIMESTAMP_BIT,
@@ -381,11 +387,12 @@ dma_fence_get_rcu_safe(struct dma_fence __rcu **fencep)
  * dma_fence_spinlock - return pointer to the spinlock protecting the fence
  * @fence: the fence to get the lock from
  *
- * Return the pointer to the extern lock.
+ * Return either the pointer to the embedded or the external spin lock.
  */
 static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence)
 {
-	return fence->lock;
+	return test_bit(DMA_FENCE_FLAG_INLINE_LOCK_BIT, &fence->flags) ?
+		&fence->inline_lock : fence->extern_lock;
 }
 
 /**
@@ -396,7 +403,7 @@ static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence)
  * Lock the fence, preventing it from changing to the signaled state.
  */
 #define dma_fence_lock_irqsave(fence, flags)	\
-	spin_lock_irqsave(fence->lock, flags)
+	spin_lock_irqsave(dma_fence_spinlock(fence), flags)
 
 /**
  * dma_fence_unlock_irqrestore - unlock the fence and irqrestore
@@ -406,7 +413,7 @@ static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence)
  * Unlock the fence, allowing it to change it's state to signaled again.
  */
 #define dma_fence_unlock_irqrestore(fence, flags)	\
-	spin_unlock_irqrestore(fence->lock, flags)
+	spin_unlock_irqrestore(dma_fence_spinlock(fence), flags)
 
 /**
  * dma_fence_assert_held - lockdep assertion that fence is locked
-- 
cgit v1.2.3


From 5943243914b9fed8e26edcb9d45421721a5e3576 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 9 Oct 2025 16:18:53 +0200
Subject: dma-buf: use inline lock for the dma-fence-array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using the inline lock is now the recommended way for dma_fence
implementations.

So use this approach for the framework's internal fences as well.

Also saves about 4 bytes for the external spinlock.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Philipp Stanner <phasta@kernel.org>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://lore.kernel.org/r/20260219160822.1529-8-christian.koenig@amd.com
---
 drivers/dma-buf/dma-fence-array.c | 5 ++---
 include/linux/dma-fence-array.h   | 1 -
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c
index 37e2c6179d77..cd970eceaefb 100644
--- a/drivers/dma-buf/dma-fence-array.c
+++ b/drivers/dma-buf/dma-fence-array.c
@@ -204,9 +204,8 @@ void dma_fence_array_init(struct dma_fence_array *array,
 
 	array->num_fences = num_fences;
 
-	spin_lock_init(&array->lock);
-	dma_fence_init(&array->base, &dma_fence_array_ops, &array->lock,
-		       context, seqno);
+	dma_fence_init(&array->base, &dma_fence_array_ops, NULL, context,
+		       seqno);
 	init_irq_work(&array->work, irq_dma_fence_array_work);
 
 	atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences);
diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h
index 079b3dec0a16..370b3d2bba37 100644
--- a/include/linux/dma-fence-array.h
+++ b/include/linux/dma-fence-array.h
@@ -38,7 +38,6 @@ struct dma_fence_array_cb {
 struct dma_fence_array {
 	struct dma_fence base;
 
-	spinlock_t lock;
 	unsigned num_fences;
 	atomic_t num_pending;
 	struct dma_fence **fences;
-- 
cgit v1.2.3


From a408c0ca0c411ca1ead995bdae3112a806c87556 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 9 Oct 2025 16:32:33 +0200
Subject: dma-buf: use inline lock for the dma-fence-chain
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using the inline lock is now the recommended way for dma_fence
implementations.

So use this approach for the framework's internal fences as well.

Also saves about 4 bytes for the external spinlock.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Philipp Stanner <phasta@kernel.org>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://lore.kernel.org/r/20260219160822.1529-9-christian.koenig@amd.com
---
 drivers/dma-buf/dma-fence-chain.c | 3 +--
 include/linux/dma-fence-chain.h   | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
index a8a90acf4f34..a707792b6025 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -245,7 +245,6 @@ void dma_fence_chain_init(struct dma_fence_chain *chain,
 	struct dma_fence_chain *prev_chain = to_dma_fence_chain(prev);
 	uint64_t context;
 
-	spin_lock_init(&chain->lock);
 	rcu_assign_pointer(chain->prev, prev);
 	chain->fence = fence;
 	chain->prev_seqno = 0;
@@ -261,7 +260,7 @@ void dma_fence_chain_init(struct dma_fence_chain *chain,
 			seqno = max(prev->seqno, seqno);
 	}
 
-	dma_fence_init64(&chain->base, &dma_fence_chain_ops, &chain->lock,
+	dma_fence_init64(&chain->base, &dma_fence_chain_ops, NULL,
 			 context, seqno);
 
 	/*
diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
index 5cd3ba53b4a1..df3beadf1515 100644
--- a/include/linux/dma-fence-chain.h
+++ b/include/linux/dma-fence-chain.h
@@ -46,7 +46,6 @@ struct dma_fence_chain {
 		 */
 		struct irq_work work;
 	};
-	spinlock_t lock;
 };
 
 
-- 
cgit v1.2.3


From 9fe89f022c05d99c052d6bc088b82d4ff83bf463 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 27 Jan 2026 16:17:48 +0100
Subject: sched/fair: More complex proportional newidle balance

It turns out that a few workloads (easyWave, fio) have a fairly low
success rate on newidle balance, but still benefit greatly from having
it anyway.

Luckliky these workloads have a faily low newidle rate, so the cost if
doing the newidle is relatively low, even if unsuccessfull.

Add a simple rate based part to the newidle ratio compute, such that
low rate newidle will still have a high newidle ratio.

This cures the easyWave and fio workloads while not affecting the
schbench numbers either (which have a very high newidle rate).

Reported-by: Mario Roy <marioeroy@gmail.com>
Reported-by: "Mohamed Abuelfotoh, Hazem" <abuehaze@amazon.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Mario Roy <marioeroy@gmail.com>
Tested-by: "Mohamed Abuelfotoh, Hazem" <abuehaze@amazon.com>
Link: https://patch.msgid.link/20260127151748.GA1079264@noisy.programming.kicks-ass.net
---
 include/linux/sched/topology.h |  1 +
 kernel/sched/fair.c            | 27 +++++++++++++++++++++++++--
 kernel/sched/features.h        |  1 +
 kernel/sched/topology.c        |  3 +++
 4 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 45c0022b91ce..a1e1032426dc 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -95,6 +95,7 @@ struct sched_domain {
 	unsigned int newidle_call;
 	unsigned int newidle_success;
 	unsigned int newidle_ratio;
+	u64 newidle_stamp;
 	u64 max_newidle_lb_cost;
 	unsigned long last_decay_max_lb_cost;
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bf948db905ed..66afa0ac7396 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -12289,7 +12289,30 @@ static inline void update_newidle_stats(struct sched_domain *sd, unsigned int su
 	sd->newidle_success += success;
 
 	if (sd->newidle_call >= 1024) {
-		sd->newidle_ratio = sd->newidle_success;
+		u64 now = sched_clock();
+		s64 delta = now - sd->newidle_stamp;
+		sd->newidle_stamp = now;
+		int ratio = 0;
+
+		if (delta < 0)
+			delta = 0;
+
+		if (sched_feat(NI_RATE)) {
+			/*
+			 * ratio  delta   freq
+			 *
+			 * 1024 -  4  s -  128 Hz
+			 *  512 -  2  s -  256 Hz
+			 *  256 -  1  s -  512 Hz
+			 *  128 - .5  s - 1024 Hz
+			 *   64 - .25 s - 2048 Hz
+			 */
+			ratio = delta >> 22;
+		}
+
+		ratio += sd->newidle_success;
+
+		sd->newidle_ratio = min(1024, ratio);
 		sd->newidle_call /= 2;
 		sd->newidle_success /= 2;
 	}
@@ -12996,7 +13019,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
 		if (sd->flags & SD_BALANCE_NEWIDLE) {
 			unsigned int weight = 1;
 
-			if (sched_feat(NI_RANDOM)) {
+			if (sched_feat(NI_RANDOM) && sd->newidle_ratio < 1024) {
 				/*
 				 * Throw a 1k sided dice; and only run
 				 * newidle_balance according to the success
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 136a6584be79..37d5928fa6dd 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -126,3 +126,4 @@ SCHED_FEAT(LATENCY_WARN, false)
  * Do newidle balancing proportional to its success rate using randomization.
  */
 SCHED_FEAT(NI_RANDOM, true)
+SCHED_FEAT(NI_RATE, true)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 32dcddaead82..061f8c85f555 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/sched/isolation.h>
+#include <linux/sched/clock.h>
 #include <linux/bsearch.h>
 #include "sched.h"
 
@@ -1642,6 +1643,7 @@ sd_init(struct sched_domain_topology_level *tl,
 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
 	int sd_id, sd_weight, sd_flags = 0;
 	struct cpumask *sd_span;
+	u64 now = sched_clock();
 
 	sd_weight = cpumask_weight(tl->mask(tl, cpu));
 
@@ -1679,6 +1681,7 @@ sd_init(struct sched_domain_topology_level *tl,
 		.newidle_call		= 512,
 		.newidle_success	= 256,
 		.newidle_ratio		= 512,
+		.newidle_stamp		= now,
 
 		.max_newidle_lb_cost	= 0,
 		.last_decay_max_lb_cost	= jiffies,
-- 
cgit v1.2.3


From be6d4c9e9d714ebbf358be41332726a0f94b9ffa Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sat, 31 Jan 2026 07:34:16 +0200
Subject: dma-buf: Add dma_buf_attach_revocable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some exporters need a flow to synchronously revoke access to the DMA-buf
by importers. Once revoke is completed the importer is not permitted to
touch the memory otherwise they may get IOMMU faults, AERs, or worse.

DMA-buf today defines a revoke flow, for both pinned and dynamic
importers, which is broadly:

	dma_resv_lock(dmabuf->resv, NULL);
	// Prevent new mappings from being established
	priv->revoked = true;

	// Tell all importers to eventually unmap
	dma_buf_invalidate_mappings(dmabuf);

	// Wait for any inprogress fences on the old mapping
	dma_resv_wait_timeout(dmabuf->resv,
			      DMA_RESV_USAGE_BOOKKEEP, false,
			      MAX_SCHEDULE_TIMEOUT);
	dma_resv_unlock(dmabuf->resv, NULL);

	// Wait for all importers to complete unmap
	wait_for_completion(&priv->unmapped_comp);

This works well, and an importer that continues to access the DMA-buf
after unmapping it is very buggy.

However, the final wait for unmap is effectively unbounded. Several
importers do not support invalidate_mappings() at all and won't unmap
until userspace triggers it.

This unbounded wait is not suitable for exporters like VFIO and RDMA tha
need to issue revoke as part of their normal operations.

Add dma_buf_attach_revocable() to allow exporters to determine the
difference between importers that can complete the above in bounded time,
and those that can't. It can be called inside the exporter's attach op to
reject incompatible importers.

Document these details about how dma_buf_invalidate_mappings() works and
what the required sequence is to achieve a full revocation.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Link: https://lore.kernel.org/r/20260131-dmabuf-revoke-v7-6-463d956bd527@nvidia.com
---
 drivers/dma-buf/dma-buf.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/dma-buf.h   |  9 +++------
 2 files changed, 50 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 3b32f15fbc18..a202a308c079 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -1318,13 +1318,59 @@ void dma_buf_unmap_attachment_unlocked(struct dma_buf_attachment *attach,
 }
 EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment_unlocked, "DMA_BUF");
 
+/**
+ * dma_buf_attach_revocable - check if a DMA-buf importer implements
+ * revoke semantics.
+ * @attach: the DMA-buf attachment to check
+ *
+ * Returns true if the DMA-buf importer can support the revoke sequence
+ * explained in dma_buf_invalidate_mappings() within bounded time. Meaning the
+ * importer implements invalidate_mappings() and ensures that unmap is called as
+ * a result.
+ */
+bool dma_buf_attach_revocable(struct dma_buf_attachment *attach)
+{
+	return attach->importer_ops &&
+	       attach->importer_ops->invalidate_mappings;
+}
+EXPORT_SYMBOL_NS_GPL(dma_buf_attach_revocable, "DMA_BUF");
+
 /**
  * dma_buf_invalidate_mappings - notify attachments that DMA-buf is moving
  *
  * @dmabuf:	[in]	buffer which is moving
  *
  * Informs all attachments that they need to destroy and recreate all their
- * mappings.
+ * mappings. If the attachment is dynamic then the dynamic importer is expected
+ * to invalidate any caches it has of the mapping result and perform a new
+ * mapping request before allowing HW to do any further DMA.
+ *
+ * If the attachment is pinned then this informs the pinned importer that the
+ * underlying mapping is no longer available. Pinned importers may take this is
+ * as a permanent revocation and never establish new mappings so exporters
+ * should not trigger it lightly.
+ *
+ * Upon return importers may continue to access the DMA-buf memory. The caller
+ * must do two additional waits to ensure that the memory is no longer being
+ * accessed:
+ *  1) Until dma_resv_wait_timeout() retires fences the importer is allowed to
+ *     fully access the memory.
+ *  2) Until the importer calls unmap it is allowed to speculatively
+ *     read-and-discard the memory. It must not write to the memory.
+ *
+ * A caller wishing to use dma_buf_invalidate_mappings() to fully stop access to
+ * the DMA-buf must wait for both. Dynamic callers can often use just the first.
+ *
+ * All importers providing a invalidate_mappings() op must ensure that unmap is
+ * called within bounded time after the op.
+ *
+ * Pinned importers that do not support a invalidate_mappings() op will
+ * eventually perform unmap when they are done with the buffer, which may be an
+ * ubounded time from calling this function. dma_buf_attach_revocable() can be
+ * used to prevent such importers from attaching.
+ *
+ * Importers are free to request a new mapping in parallel as this function
+ * returns.
  */
 void dma_buf_invalidate_mappings(struct dma_buf *dmabuf)
 {
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index e744b8f9bfad..166933b82e27 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -456,12 +456,8 @@ struct dma_buf_attach_ops {
 	 * called with this lock held as well. This makes sure that no mapping
 	 * is created concurrently with an ongoing move operation.
 	 *
-	 * Mappings stay valid and are not directly affected by this callback.
-	 * But the DMA-buf can now be in a different physical location, so all
-	 * mappings should be destroyed and re-created as soon as possible.
-	 *
-	 * New mappings can be created after this callback returns, and will
-	 * point to the new location of the DMA-buf.
+	 * See the kdoc for dma_buf_invalidate_mappings() for details on the
+	 * required behavior.
 	 */
 	void (*invalidate_mappings)(struct dma_buf_attachment *attach);
 };
@@ -579,6 +575,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
 void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
 				enum dma_data_direction);
 void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
+bool dma_buf_attach_revocable(struct dma_buf_attachment *attach);
 int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 			     enum dma_data_direction dir);
 int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
-- 
cgit v1.2.3


From ebf1ccff79c43f860cbd2f9d6cfab9a462d0cb2d Mon Sep 17 00:00:00 2001
From: Andrea Righi <arighi@nvidia.com>
Date: Wed, 18 Feb 2026 09:32:17 +0100
Subject: sched_ext: Fix ops.dequeue() semantics

Currently, ops.dequeue() is only invoked when the sched_ext core knows
that a task resides in BPF-managed data structures, which causes it to
miss scheduling property change events. In addition, ops.dequeue()
callbacks are completely skipped when tasks are dispatched to non-local
DSQs from ops.select_cpu(). As a result, BPF schedulers cannot reliably
track task state.

Fix this by guaranteeing that each task entering the BPF scheduler's
custody triggers exactly one ops.dequeue() call when it leaves that
custody, whether the exit is due to a dispatch (regular or via a core
scheduling pick) or to a scheduling property change (e.g.
sched_setaffinity(), sched_setscheduler(), set_user_nice(), NUMA
balancing, etc.).

BPF scheduler custody concept: a task is considered to be in the BPF
scheduler's custody when the scheduler is responsible for managing its
lifecycle. This includes tasks dispatched to user-created DSQs or stored
in the BPF scheduler's internal data structures from ops.enqueue().
Custody ends when the task is dispatched to a terminal DSQ (such as the
local DSQ or %SCX_DSQ_GLOBAL), selected by core scheduling, or removed
due to a property change.

Tasks directly dispatched to terminal DSQs bypass the BPF scheduler
entirely and are never in its custody. Terminal DSQs include:
 - Local DSQs (%SCX_DSQ_LOCAL or %SCX_DSQ_LOCAL_ON): per-CPU queues
   where tasks go directly to execution.
 - Global DSQ (%SCX_DSQ_GLOBAL): the built-in fallback queue where the
   BPF scheduler is considered "done" with the task.

As a result, ops.dequeue() is not invoked for tasks directly dispatched
to terminal DSQs.

To identify dequeues triggered by scheduling property changes, introduce
the new ops.dequeue() flag %SCX_DEQ_SCHED_CHANGE: when this flag is set,
the dequeue was caused by a scheduling property change.

New ops.dequeue() semantics:
 - ops.dequeue() is invoked exactly once when the task leaves the BPF
   scheduler's custody, in one of the following cases:
   a) regular dispatch: a task dispatched to a user DSQ or stored in
      internal BPF data structures is moved to a terminal DSQ
      (ops.dequeue() called without any special flags set),
   b) core scheduling dispatch: core-sched picks task before dispatch
      (ops.dequeue() called with %SCX_DEQ_CORE_SCHED_EXEC flag set),
   c) property change: task properties modified before dispatch,
      (ops.dequeue() called with %SCX_DEQ_SCHED_CHANGE flag set).

This allows BPF schedulers to:
 - reliably track task ownership and lifecycle,
 - maintain accurate accounting of managed tasks,
 - update internal state when tasks change properties.

Cc: Tejun Heo <tj@kernel.org>
Cc: Emil Tsalapatis <emil@etsalapatis.com>
Cc: Kuba Piecuch <jpiecuch@google.com>
Signed-off-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 Documentation/scheduler/sched-ext.rst           |  78 +++++++++++++++--
 include/linux/sched/ext.h                       |   1 +
 kernel/sched/ext.c                              | 110 ++++++++++++++++++++++--
 kernel/sched/ext_internal.h                     |   7 ++
 tools/sched_ext/include/scx/enum_defs.autogen.h |   1 +
 tools/sched_ext/include/scx/enums.autogen.bpf.h |   2 +
 tools/sched_ext/include/scx/enums.autogen.h     |   1 +
 7 files changed, 184 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/scheduler/sched-ext.rst b/Documentation/scheduler/sched-ext.rst
index 9e2882d937b4..7cb77fd2e4d7 100644
--- a/Documentation/scheduler/sched-ext.rst
+++ b/Documentation/scheduler/sched-ext.rst
@@ -228,16 +228,23 @@ The following briefly shows how a waking task is scheduled and executed.
    scheduler can wake up any cpu using the ``scx_bpf_kick_cpu()`` helper,
    using ``ops.select_cpu()`` judiciously can be simpler and more efficient.
 
-   A task can be immediately inserted into a DSQ from ``ops.select_cpu()``
-   by calling ``scx_bpf_dsq_insert()``. If the task is inserted into
-   ``SCX_DSQ_LOCAL`` from ``ops.select_cpu()``, it will be inserted into the
-   local DSQ of whichever CPU is returned from ``ops.select_cpu()``.
-   Additionally, inserting directly from ``ops.select_cpu()`` will cause the
-   ``ops.enqueue()`` callback to be skipped.
-
    Note that the scheduler core will ignore an invalid CPU selection, for
    example, if it's outside the allowed cpumask of the task.
 
+   A task can be immediately inserted into a DSQ from ``ops.select_cpu()``
+   by calling ``scx_bpf_dsq_insert()`` or ``scx_bpf_dsq_insert_vtime()``.
+
+   If the task is inserted into ``SCX_DSQ_LOCAL`` from
+   ``ops.select_cpu()``, it will be added to the local DSQ of whichever CPU
+   is returned from ``ops.select_cpu()``. Additionally, inserting directly
+   from ``ops.select_cpu()`` will cause the ``ops.enqueue()`` callback to
+   be skipped.
+
+   Any other attempt to store a task in BPF-internal data structures from
+   ``ops.select_cpu()`` does not prevent ``ops.enqueue()`` from being
+   invoked. This is discouraged, as it can introduce racy behavior or
+   inconsistent state.
+
 2. Once the target CPU is selected, ``ops.enqueue()`` is invoked (unless the
    task was inserted directly from ``ops.select_cpu()``). ``ops.enqueue()``
    can make one of the following decisions:
@@ -251,6 +258,61 @@ The following briefly shows how a waking task is scheduled and executed.
 
    * Queue the task on the BPF side.
 
+   **Task State Tracking and ops.dequeue() Semantics**
+
+   A task is in the "BPF scheduler's custody" when the BPF scheduler is
+   responsible for managing its lifecycle. A task enters custody when it is
+   dispatched to a user DSQ or stored in the BPF scheduler's internal data
+   structures. Custody is entered only from ``ops.enqueue()`` for those
+   operations. The only exception is dispatching to a user DSQ from
+   ``ops.select_cpu()``: although the task is not yet technically in BPF
+   scheduler custody at that point, the dispatch has the same semantic
+   effect as dispatching from ``ops.enqueue()`` for custody-related
+   purposes.
+
+   Once ``ops.enqueue()`` is called, the task may or may not enter custody
+   depending on what the scheduler does:
+
+   * **Directly dispatched to terminal DSQs** (``SCX_DSQ_LOCAL``,
+     ``SCX_DSQ_LOCAL_ON | cpu``, or ``SCX_DSQ_GLOBAL``): the BPF scheduler
+     is done with the task - it either goes straight to a CPU's local run
+     queue or to the global DSQ as a fallback. The task never enters (or
+     exits) BPF custody, and ``ops.dequeue()`` will not be called.
+
+   * **Dispatch to user-created DSQs** (custom DSQs): the task enters the
+     BPF scheduler's custody. When the task later leaves BPF custody
+     (dispatched to a terminal DSQ, picked by core-sched, or dequeued for
+     sleep/property changes), ``ops.dequeue()`` will be called exactly
+     once.
+
+   * **Stored in BPF data structures** (e.g., internal BPF queues): the
+     task is in BPF custody. ``ops.dequeue()`` will be called when it
+     leaves (e.g., when ``ops.dispatch()`` moves it to a terminal DSQ, or
+     on property change / sleep).
+
+   When a task leaves BPF scheduler custody, ``ops.dequeue()`` is invoked.
+   The dequeue can happen for different reasons, distinguished by flags:
+
+   1. **Regular dispatch**: when a task in BPF custody is dispatched to a
+      terminal DSQ from ``ops.dispatch()`` (leaving BPF custody for
+      execution), ``ops.dequeue()`` is triggered without any special flags.
+
+   2. **Core scheduling pick**: when ``CONFIG_SCHED_CORE`` is enabled and
+      core scheduling picks a task for execution while it's still in BPF
+      custody, ``ops.dequeue()`` is called with the
+      ``SCX_DEQ_CORE_SCHED_EXEC`` flag.
+
+   3. **Scheduling property change**: when a task property changes (via
+      operations like ``sched_setaffinity()``, ``sched_setscheduler()``,
+      priority changes, CPU migrations, etc.) while the task is still in
+      BPF custody, ``ops.dequeue()`` is called with the
+      ``SCX_DEQ_SCHED_CHANGE`` flag set in ``deq_flags``.
+
+   **Important**: Once a task has left BPF custody (e.g., after being
+   dispatched to a terminal DSQ), property changes will not trigger
+   ``ops.dequeue()``, since the task is no longer managed by the BPF
+   scheduler.
+
 3. When a CPU is ready to schedule, it first looks at its local DSQ. If
    empty, it then looks at the global DSQ. If there still isn't a task to
    run, ``ops.dispatch()`` is invoked which can use the following two
@@ -318,6 +380,8 @@ by a sched_ext scheduler:
                 /* Any usable CPU becomes available */
 
                 ops.dispatch(); /* Task is moved to a local DSQ */
+
+                ops.dequeue(); /* Exiting BPF scheduler */
             }
             ops.running();      /* Task starts running on its assigned CPU */
             while (task->scx.slice > 0 && task is runnable)
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index bcb962d5ee7d..4601e5ecb43c 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -84,6 +84,7 @@ struct scx_dispatch_q {
 /* scx_entity.flags */
 enum scx_ent_flags {
 	SCX_TASK_QUEUED		= 1 << 0, /* on ext runqueue */
+	SCX_TASK_IN_CUSTODY	= 1 << 1, /* in custody, needs ops.dequeue() when leaving */
 	SCX_TASK_RESET_RUNNABLE_AT = 1 << 2, /* runnable_at should be reset */
 	SCX_TASK_DEQD_FOR_SLEEP	= 1 << 3, /* last dequeue was for SLEEP */
 
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 044bb2168dd0..d5e688b9acc0 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -986,12 +986,45 @@ static void refill_task_slice_dfl(struct scx_sched *sch, struct task_struct *p)
 	__scx_add_event(sch, SCX_EV_REFILL_SLICE_DFL, 1);
 }
 
+/*
+ * Return true if @p is moving due to an internal SCX migration, false
+ * otherwise.
+ */
+static inline bool task_scx_migrating(struct task_struct *p)
+{
+	/*
+	 * We only need to check sticky_cpu: it is set to the destination
+	 * CPU in move_remote_task_to_local_dsq() before deactivate_task()
+	 * and cleared when the task is enqueued on the destination, so it
+	 * is only non-negative during an internal SCX migration.
+	 */
+	return p->scx.sticky_cpu >= 0;
+}
+
+/*
+ * Call ops.dequeue() if the task is in BPF custody and not migrating.
+ * Clears %SCX_TASK_IN_CUSTODY when the callback is invoked.
+ */
+static void call_task_dequeue(struct scx_sched *sch, struct rq *rq,
+			      struct task_struct *p, u64 deq_flags)
+{
+	if (!(p->scx.flags & SCX_TASK_IN_CUSTODY) || task_scx_migrating(p))
+		return;
+
+	if (SCX_HAS_OP(sch, dequeue))
+		SCX_CALL_OP_TASK(sch, SCX_KF_REST, dequeue, rq, p, deq_flags);
+
+	p->scx.flags &= ~SCX_TASK_IN_CUSTODY;
+}
+
 static void local_dsq_post_enq(struct scx_dispatch_q *dsq, struct task_struct *p,
 			       u64 enq_flags)
 {
 	struct rq *rq = container_of(dsq, struct rq, scx.local_dsq);
 	bool preempt = false;
 
+	call_task_dequeue(scx_root, rq, p, 0);
+
 	/*
 	 * If @rq is in balance, the CPU is already vacant and looking for the
 	 * next task to run. No need to preempt or trigger resched after moving
@@ -1115,17 +1148,34 @@ static void dispatch_enqueue(struct scx_sched *sch, struct rq *rq,
 	p->scx.ddsp_dsq_id = SCX_DSQ_INVALID;
 	p->scx.ddsp_enq_flags = 0;
 
+	/*
+	 * Update custody and call ops.dequeue() before clearing ops_state:
+	 * once ops_state is cleared, waiters in ops_dequeue() can proceed
+	 * and dequeue_task_scx() will RMW p->scx.flags. If we clear
+	 * ops_state first, both sides would modify p->scx.flags
+	 * concurrently in a non-atomic way.
+	 */
+	if (is_local) {
+		local_dsq_post_enq(dsq, p, enq_flags);
+	} else {
+		/*
+		 * Task on global/bypass DSQ: leave custody, task on
+		 * non-terminal DSQ: enter custody.
+		 */
+		if (dsq->id == SCX_DSQ_GLOBAL || dsq->id == SCX_DSQ_BYPASS)
+			call_task_dequeue(sch, rq, p, 0);
+		else
+			p->scx.flags |= SCX_TASK_IN_CUSTODY;
+
+		raw_spin_unlock(&dsq->lock);
+	}
+
 	/*
 	 * We're transitioning out of QUEUEING or DISPATCHING. store_release to
 	 * match waiters' load_acquire.
 	 */
 	if (enq_flags & SCX_ENQ_CLEAR_OPSS)
 		atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_NONE);
-
-	if (is_local)
-		local_dsq_post_enq(dsq, p, enq_flags);
-	else
-		raw_spin_unlock(&dsq->lock);
 }
 
 static void task_unlink_from_dsq(struct task_struct *p,
@@ -1405,6 +1455,12 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
 	if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
 		goto direct;
 
+	/*
+	 * Task is now in BPF scheduler's custody. Set %SCX_TASK_IN_CUSTODY
+	 * so ops.dequeue() is called when it leaves custody.
+	 */
+	p->scx.flags |= SCX_TASK_IN_CUSTODY;
+
 	/*
 	 * If not directly dispatched, QUEUEING isn't clear yet and dispatch or
 	 * dequeue may be waiting. The store_release matches their load_acquire.
@@ -1522,6 +1578,14 @@ static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags)
 {
 	struct scx_sched *sch = scx_root;
 	unsigned long opss;
+	u64 op_deq_flags = deq_flags;
+
+	/*
+	 * Set %SCX_DEQ_SCHED_CHANGE when the dequeue is due to a property
+	 * change (not sleep or core-sched pick).
+	 */
+	if (!(op_deq_flags & (DEQUEUE_SLEEP | SCX_DEQ_CORE_SCHED_EXEC)))
+		op_deq_flags |= SCX_DEQ_SCHED_CHANGE;
 
 	/* dequeue is always temporary, don't reset runnable_at */
 	clr_task_runnable(p, false);
@@ -1539,10 +1603,8 @@ static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags)
 		 */
 		BUG();
 	case SCX_OPSS_QUEUED:
-		if (SCX_HAS_OP(sch, dequeue))
-			SCX_CALL_OP_TASK(sch, SCX_KF_REST, dequeue, rq,
-					 p, deq_flags);
-
+		/* A queued task must always be in BPF scheduler's custody */
+		WARN_ON_ONCE(!(p->scx.flags & SCX_TASK_IN_CUSTODY));
 		if (atomic_long_try_cmpxchg(&p->scx.ops_state, &opss,
 					    SCX_OPSS_NONE))
 			break;
@@ -1565,6 +1627,22 @@ static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags)
 		BUG_ON(atomic_long_read(&p->scx.ops_state) != SCX_OPSS_NONE);
 		break;
 	}
+
+	/*
+	 * Call ops.dequeue() if the task is still in BPF custody.
+	 *
+	 * The code that clears ops_state to %SCX_OPSS_NONE does not always
+	 * clear %SCX_TASK_IN_CUSTODY: in dispatch_to_local_dsq(), when
+	 * we're moving a task that was in %SCX_OPSS_DISPATCHING to a
+	 * remote CPU's local DSQ, we only set ops_state to %SCX_OPSS_NONE
+	 * so that a concurrent dequeue can proceed, but we clear
+	 * %SCX_TASK_IN_CUSTODY only when we later enqueue or move the
+	 * task. So we can see NONE + IN_CUSTODY here and we must handle
+	 * it. Similarly, after waiting on %SCX_OPSS_DISPATCHING we see
+	 * NONE but the task may still have %SCX_TASK_IN_CUSTODY set until
+	 * it is enqueued on the destination.
+	 */
+	call_task_dequeue(sch, rq, p, op_deq_flags);
 }
 
 static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags)
@@ -2935,6 +3013,13 @@ static void scx_enable_task(struct task_struct *p)
 
 	lockdep_assert_rq_held(rq);
 
+	/*
+	 * Verify the task is not in BPF scheduler's custody. If flag
+	 * transitions are consistent, the flag should always be clear
+	 * here.
+	 */
+	WARN_ON_ONCE(p->scx.flags & SCX_TASK_IN_CUSTODY);
+
 	/*
 	 * Set the weight before calling ops.enable() so that the scheduler
 	 * doesn't see a stale value if they inspect the task struct.
@@ -2966,6 +3051,13 @@ static void scx_disable_task(struct task_struct *p)
 	if (SCX_HAS_OP(sch, disable))
 		SCX_CALL_OP_TASK(sch, SCX_KF_REST, disable, rq, p);
 	scx_set_task_state(p, SCX_TASK_READY);
+
+	/*
+	 * Verify the task is not in BPF scheduler's custody. If flag
+	 * transitions are consistent, the flag should always be clear
+	 * here.
+	 */
+	WARN_ON_ONCE(p->scx.flags & SCX_TASK_IN_CUSTODY);
 }
 
 static void scx_exit_task(struct task_struct *p)
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index 386c677e4c9a..befa9a5d6e53 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -982,6 +982,13 @@ enum scx_deq_flags {
 	 * it hasn't been dispatched yet. Dequeue from the BPF side.
 	 */
 	SCX_DEQ_CORE_SCHED_EXEC	= 1LLU << 32,
+
+	/*
+	 * The task is being dequeued due to a property change (e.g.,
+	 * sched_setaffinity(), sched_setscheduler(), set_user_nice(),
+	 * etc.).
+	 */
+	SCX_DEQ_SCHED_CHANGE	= 1LLU << 33,
 };
 
 enum scx_pick_idle_cpu_flags {
diff --git a/tools/sched_ext/include/scx/enum_defs.autogen.h b/tools/sched_ext/include/scx/enum_defs.autogen.h
index c2c33df9292c..dcc945304760 100644
--- a/tools/sched_ext/include/scx/enum_defs.autogen.h
+++ b/tools/sched_ext/include/scx/enum_defs.autogen.h
@@ -21,6 +21,7 @@
 #define HAVE_SCX_CPU_PREEMPT_UNKNOWN
 #define HAVE_SCX_DEQ_SLEEP
 #define HAVE_SCX_DEQ_CORE_SCHED_EXEC
+#define HAVE_SCX_DEQ_SCHED_CHANGE
 #define HAVE_SCX_DSQ_FLAG_BUILTIN
 #define HAVE_SCX_DSQ_FLAG_LOCAL_ON
 #define HAVE_SCX_DSQ_INVALID
diff --git a/tools/sched_ext/include/scx/enums.autogen.bpf.h b/tools/sched_ext/include/scx/enums.autogen.bpf.h
index 2f8002bcc19a..5da50f937684 100644
--- a/tools/sched_ext/include/scx/enums.autogen.bpf.h
+++ b/tools/sched_ext/include/scx/enums.autogen.bpf.h
@@ -127,3 +127,5 @@ const volatile u64 __SCX_ENQ_CLEAR_OPSS __weak;
 const volatile u64 __SCX_ENQ_DSQ_PRIQ __weak;
 #define SCX_ENQ_DSQ_PRIQ __SCX_ENQ_DSQ_PRIQ
 
+const volatile u64 __SCX_DEQ_SCHED_CHANGE __weak;
+#define SCX_DEQ_SCHED_CHANGE __SCX_DEQ_SCHED_CHANGE
diff --git a/tools/sched_ext/include/scx/enums.autogen.h b/tools/sched_ext/include/scx/enums.autogen.h
index fedec938584b..fc9a7a4d9dea 100644
--- a/tools/sched_ext/include/scx/enums.autogen.h
+++ b/tools/sched_ext/include/scx/enums.autogen.h
@@ -46,4 +46,5 @@
 	SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_LAST); \
 	SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_CLEAR_OPSS); \
 	SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_DSQ_PRIQ); \
+	SCX_ENUM_SET(skel, scx_deq_flags, SCX_DEQ_SCHED_CHANGE); \
 } while (0)
-- 
cgit v1.2.3


From baff45179e90276a14acb9dffce17ff517708453 Mon Sep 17 00:00:00 2001
From: Jishnu Prakash <jishnu.prakash@oss.qualcomm.com>
Date: Fri, 30 Jan 2026 17:24:20 +0530
Subject: iio: adc: Add support for QCOM PMIC5 Gen3 ADC

The ADC architecture on PMIC5 Gen3 is similar to that on PMIC5 Gen2,
with all SW communication to ADC going through PMK8550 which
communicates with other PMICs through PBS.

One major difference is that the register interface used here is that
of an SDAM (Shared Direct Access Memory) peripheral present on PMK8550.
There may be more than one SDAM used for ADC5 Gen3 and each has eight
channels, which may be used for either immediate reads (same functionality
as previous PMIC5 and PMIC5 Gen2 ADC peripherals) or recurring measurements
(same as ADC_TM functionality).

By convention, we reserve the first channel of the first SDAM for all
immediate reads and use the remaining channels across all SDAMs for
ADC_TM monitoring functionality.

Add support for PMIC5 Gen3 ADC driver for immediate read functionality.
ADC_TM is implemented as an auxiliary thermal driver under this ADC
driver.

Signed-off-by: Jishnu Prakash <jishnu.prakash@oss.qualcomm.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/Kconfig                       |  26 +
 drivers/iio/adc/Makefile                      |   1 +
 drivers/iio/adc/qcom-spmi-adc5-gen3.c         | 860 ++++++++++++++++++++++++++
 include/linux/iio/adc/qcom-adc5-gen3-common.h | 211 +++++++
 4 files changed, 1098 insertions(+)
 create mode 100644 drivers/iio/adc/qcom-spmi-adc5-gen3.c
 create mode 100644 include/linux/iio/adc/qcom-adc5-gen3-common.h

(limited to 'include/linux')

diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 60038ae8dfc4..1f5915dd192d 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -1366,6 +1366,32 @@ config QCOM_SPMI_ADC5
 	  To compile this driver as a module, choose M here: the module will
 	  be called qcom-spmi-adc5.
 
+config QCOM_SPMI_ADC5_GEN3
+	tristate "Qualcomm Technologies Inc. SPMI PMIC5 GEN3 ADC"
+	depends on SPMI && THERMAL
+	select REGMAP_SPMI
+	select QCOM_VADC_COMMON
+	select AUXILIARY_BUS
+	help
+	  IIO Voltage PMIC5 Gen3 ADC driver for Qualcomm Technologies Inc.
+
+	  The driver supports reading multiple channels. The ADC is a 16-bit
+	  sigma-delta ADC. The hardware supports calibrated results for
+	  conversion requests and clients include reading phone power supply
+	  voltage, on board system thermistors connected to the PMIC ADC,
+	  PMIC die temperature, charger temperature, battery current, USB
+	  voltage input and voltage signals connected to supported PMIC GPIO
+	  pins. The hardware supports internal pull-up for thermistors and can
+	  choose between a 30k, 100k or 400k ohm pull up using the ADC channels.
+
+	  In addition, the same driver supports ADC thermal monitoring devices
+	  too. They appear as thermal zones with multiple trip points. A thermal
+	  client sets threshold temperature for both warm and cool trips and
+	  gets updated when a threshold is reached.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called qcom-spmi-adc5-gen3.
+
 config RCAR_GYRO_ADC
 	tristate "Renesas R-Car GyroADC driver"
 	depends on ARCH_RCAR_GEN2 || COMPILE_TEST
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index c76550415ff1..097357d146ba 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -116,6 +116,7 @@ obj-$(CONFIG_PAC1934) += pac1934.o
 obj-$(CONFIG_PALMAS_GPADC) += palmas_gpadc.o
 obj-$(CONFIG_QCOM_PM8XXX_XOADC) += qcom-pm8xxx-xoadc.o
 obj-$(CONFIG_QCOM_SPMI_ADC5) += qcom-spmi-adc5.o
+obj-$(CONFIG_QCOM_SPMI_ADC5_GEN3) += qcom-spmi-adc5-gen3.o
 obj-$(CONFIG_QCOM_SPMI_IADC) += qcom-spmi-iadc.o
 obj-$(CONFIG_QCOM_SPMI_RRADC) += qcom-spmi-rradc.o
 obj-$(CONFIG_QCOM_SPMI_VADC) += qcom-spmi-vadc.o
diff --git a/drivers/iio/adc/qcom-spmi-adc5-gen3.c b/drivers/iio/adc/qcom-spmi-adc5-gen3.c
new file mode 100644
index 000000000000..f8168a14b907
--- /dev/null
+++ b/drivers/iio/adc/qcom-spmi-adc5-gen3.c
@@ -0,0 +1,860 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ */
+
+#include <linux/auxiliary_bus.h>
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/cleanup.h>
+#include <linux/completion.h>
+#include <linux/container_of.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/device/devres.h>
+#include <linux/dev_printk.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/iio/adc/qcom-adc5-gen3-common.h>
+#include <linux/iio/iio.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/types.h>
+#include <linux/unaligned.h>
+
+#define ADC5_GEN3_VADC_SDAM			0x0
+
+struct adc5_chip;
+
+/**
+ * struct adc5_channel_prop - ADC channel structure
+ * @common_props: structure with ADC channel properties (common to TM usage).
+ * @adc_tm: indicates TM type if the channel is used for TM measurements.
+ * @chip: pointer to top-level ADC device structure.
+ */
+struct adc5_channel_prop {
+	struct adc5_channel_common_prop common_props;
+	int adc_tm;
+	struct adc5_chip *chip;
+};
+
+/**
+ * struct adc5_chip - ADC private structure.
+ * @dev: SPMI ADC5 Gen3 device.
+ * @dev_data: Top-level ADC device data.
+ * @nchannels: number of ADC channels.
+ * @chan_props: array of ADC channel properties.
+ * @iio_chans: array of IIO channels specification.
+ * @complete: ADC result notification after interrupt is received.
+ * @lock: ADC lock for access to the peripheral, to prevent concurrent
+ *	requests from multiple clients.
+ * @data: software configuration data.
+ * @n_tm_channels: number of ADC channels used for TM measurements.
+ * @handler: TM callback to be called for threshold violation interrupt
+ *	on first SDAM.
+ * @tm_aux: pointer to auxiliary TM device.
+ */
+struct adc5_chip {
+	struct device *dev;
+	struct adc5_device_data dev_data;
+	unsigned int nchannels;
+	struct adc5_channel_prop *chan_props;
+	struct iio_chan_spec *iio_chans;
+	struct completion complete;
+	struct mutex lock;
+	const struct adc5_data *data;
+	unsigned int n_tm_channels;
+	void (*handler)(struct auxiliary_device *tm_aux);
+	struct auxiliary_device *tm_aux;
+};
+
+int adc5_gen3_read(struct adc5_device_data *adc, unsigned int sdam_index,
+		   u16 offset, u8 *data, int len)
+{
+	return regmap_bulk_read(adc->regmap,
+				adc->base[sdam_index].base_addr + offset,
+				data, len);
+}
+EXPORT_SYMBOL_NS_GPL(adc5_gen3_read, "QCOM_SPMI_ADC5_GEN3");
+
+int adc5_gen3_write(struct adc5_device_data *adc, unsigned int sdam_index,
+		    u16 offset, u8 *data, int len)
+{
+	return regmap_bulk_write(adc->regmap,
+				 adc->base[sdam_index].base_addr + offset,
+				 data, len);
+}
+EXPORT_SYMBOL_NS_GPL(adc5_gen3_write, "QCOM_SPMI_ADC5_GEN3");
+
+static int adc5_gen3_read_voltage_data(struct adc5_chip *adc, u16 *data)
+{
+	u8 rslt[2];
+	int ret;
+
+	ret = adc5_gen3_read(&adc->dev_data, ADC5_GEN3_VADC_SDAM,
+			     ADC5_GEN3_CH_DATA0(0), rslt, sizeof(rslt));
+	if (ret)
+		return ret;
+
+	*data = get_unaligned_le16(rslt);
+
+	if (*data == ADC5_USR_DATA_CHECK) {
+		dev_err(adc->dev, "Invalid data:%#x\n", *data);
+		return -EINVAL;
+	}
+
+	dev_dbg(adc->dev, "voltage raw code:%#x\n", *data);
+
+	return 0;
+}
+
+void adc5_gen3_update_dig_param(struct adc5_channel_common_prop *prop, u8 *data)
+{
+	/* Update calibration select and decimation ratio select */
+	*data &= ~(ADC5_GEN3_DIG_PARAM_CAL_SEL_MASK | ADC5_GEN3_DIG_PARAM_DEC_RATIO_SEL_MASK);
+	*data |= FIELD_PREP(ADC5_GEN3_DIG_PARAM_CAL_SEL_MASK, prop->cal_method);
+	*data |= FIELD_PREP(ADC5_GEN3_DIG_PARAM_DEC_RATIO_SEL_MASK, prop->decimation);
+}
+EXPORT_SYMBOL_NS_GPL(adc5_gen3_update_dig_param, "QCOM_SPMI_ADC5_GEN3");
+
+#define ADC5_GEN3_READ_CONFIG_REGS 7
+
+static int adc5_gen3_configure(struct adc5_chip *adc,
+			       struct adc5_channel_common_prop *prop)
+{
+	u8 buf[ADC5_GEN3_READ_CONFIG_REGS];
+	u8 conv_req = 0;
+	int ret;
+
+	ret = adc5_gen3_read(&adc->dev_data, ADC5_GEN3_VADC_SDAM, ADC5_GEN3_SID,
+			     buf, sizeof(buf));
+	if (ret)
+		return ret;
+
+	/* Write SID */
+	buf[0] = FIELD_PREP(ADC5_GEN3_SID_MASK, prop->sid);
+
+	/*
+	 * Use channel 0 by default for immediate conversion and to indicate
+	 * there is an actual conversion request
+	 */
+	buf[1] = ADC5_GEN3_CHAN_CONV_REQ | 0;
+
+	buf[2] = ADC5_GEN3_TIME_IMMEDIATE;
+
+	/* Digital param selection */
+	adc5_gen3_update_dig_param(prop, &buf[3]);
+
+	/* Update fast average sample value */
+	buf[4] = FIELD_PREP(ADC5_GEN3_FAST_AVG_CTL_SAMPLES_MASK,
+			    prop->avg_samples) | ADC5_GEN3_FAST_AVG_CTL_EN;
+
+	/* Select ADC channel */
+	buf[5] = prop->channel;
+
+	/* Select HW settle delay for channel */
+	buf[6] = FIELD_PREP(ADC5_GEN3_HW_SETTLE_DELAY_MASK,
+			    prop->hw_settle_time_us);
+
+	reinit_completion(&adc->complete);
+
+	ret = adc5_gen3_write(&adc->dev_data, ADC5_GEN3_VADC_SDAM, ADC5_GEN3_SID,
+			      buf, sizeof(buf));
+	if (ret)
+		return ret;
+
+	conv_req = ADC5_GEN3_CONV_REQ_REQ;
+	return adc5_gen3_write(&adc->dev_data, ADC5_GEN3_VADC_SDAM,
+			       ADC5_GEN3_CONV_REQ, &conv_req, sizeof(conv_req));
+}
+
+/*
+ * Worst case delay from PBS in readying handshake bit  can be up to 15ms, when
+ * PBS is busy running other simultaneous transactions, while in the best case,
+ * it is already ready at this point. Assigning polling delay and retry count
+ * accordingly.
+ */
+
+#define ADC5_GEN3_HS_DELAY_US			100
+#define ADC5_GEN3_HS_RETRY_COUNT		150
+
+int adc5_gen3_poll_wait_hs(struct adc5_device_data *adc,
+			   unsigned int sdam_index)
+{
+	u8 conv_req = ADC5_GEN3_CONV_REQ_REQ;
+	int ret, count;
+	u8 status = 0;
+
+	for (count = 0; count < ADC5_GEN3_HS_RETRY_COUNT; count++) {
+		ret = adc5_gen3_read(adc, sdam_index, ADC5_GEN3_HS, &status, sizeof(status));
+		if (ret)
+			return ret;
+
+		if (status == ADC5_GEN3_HS_READY) {
+			ret = adc5_gen3_read(adc, sdam_index, ADC5_GEN3_CONV_REQ,
+					     &conv_req, sizeof(conv_req));
+			if (ret)
+				return ret;
+
+			if (!conv_req)
+				return 0;
+		}
+
+		fsleep(ADC5_GEN3_HS_DELAY_US);
+	}
+
+	pr_err("Setting HS ready bit timed out, sdam_index:%d, status:%#x\n",
+	       sdam_index, status);
+	return -ETIMEDOUT;
+}
+EXPORT_SYMBOL_NS_GPL(adc5_gen3_poll_wait_hs, "QCOM_SPMI_ADC5_GEN3");
+
+int adc5_gen3_status_clear(struct adc5_device_data *adc,
+			   int sdam_index, u16 offset, u8 *val, int len)
+{
+	u8 value;
+	int ret;
+
+	ret = adc5_gen3_write(adc, sdam_index, offset, val, len);
+	if (ret)
+		return ret;
+
+	/* To indicate conversion request is only to clear a status */
+	value = 0;
+	ret = adc5_gen3_write(adc, sdam_index, ADC5_GEN3_PERPH_CH, &value,
+			      sizeof(value));
+	if (ret)
+		return ret;
+
+	value = ADC5_GEN3_CONV_REQ_REQ;
+	return adc5_gen3_write(adc, sdam_index, ADC5_GEN3_CONV_REQ, &value,
+			      sizeof(value));
+}
+EXPORT_SYMBOL_NS_GPL(adc5_gen3_status_clear, "QCOM_SPMI_ADC5_GEN3");
+
+/*
+ * Worst case delay from PBS for conversion time can be up to 500ms, when PBS
+ * has timed out twice, once for the initial attempt and once for a retry of
+ * the same transaction.
+ */
+
+#define ADC5_GEN3_CONV_TIMEOUT_MS	501
+
+static int adc5_gen3_do_conversion(struct adc5_chip *adc,
+				   struct adc5_channel_common_prop *prop,
+				   u16 *data_volt)
+{
+	unsigned long rc;
+	int ret;
+	u8 val;
+
+	guard(mutex)(&adc->lock);
+	ret = adc5_gen3_poll_wait_hs(&adc->dev_data, ADC5_GEN3_VADC_SDAM);
+	if (ret)
+		return ret;
+
+	ret = adc5_gen3_configure(adc, prop);
+	if (ret) {
+		dev_err(adc->dev, "ADC configure failed with %d\n", ret);
+		return ret;
+	}
+
+	/* No support for polling mode at present */
+	rc = wait_for_completion_timeout(&adc->complete,
+					 msecs_to_jiffies(ADC5_GEN3_CONV_TIMEOUT_MS));
+	if (!rc) {
+		dev_err(adc->dev, "Reading ADC channel %s timed out\n",
+			prop->label);
+		return -ETIMEDOUT;
+	}
+
+	ret = adc5_gen3_read_voltage_data(adc, data_volt);
+	if (ret)
+		return ret;
+
+	val = BIT(0);
+	return adc5_gen3_status_clear(&adc->dev_data, ADC5_GEN3_VADC_SDAM,
+				      ADC5_GEN3_EOC_CLR, &val, 1);
+}
+
+static irqreturn_t adc5_gen3_isr(int irq, void *dev_id)
+{
+	struct adc5_chip *adc = dev_id;
+	struct device *dev = adc->dev;
+	struct auxiliary_device *adev;
+	u8 status, eoc_status, val;
+	u8 tm_status[2];
+	int ret;
+
+	ret = adc5_gen3_read(&adc->dev_data, ADC5_GEN3_VADC_SDAM,
+			     ADC5_GEN3_STATUS1, &status, sizeof(status));
+	if (ret) {
+		dev_err(dev, "adc read status1 failed with %d\n", ret);
+		return IRQ_HANDLED;
+	}
+
+	ret = adc5_gen3_read(&adc->dev_data, ADC5_GEN3_VADC_SDAM,
+			     ADC5_GEN3_EOC_STS, &eoc_status, sizeof(eoc_status));
+	if (ret) {
+		dev_err(dev, "adc read eoc status failed with %d\n", ret);
+		return IRQ_HANDLED;
+	}
+
+	if (status & ADC5_GEN3_STATUS1_CONV_FAULT) {
+		dev_err_ratelimited(dev,
+				    "Unexpected conversion fault, status:%#x, eoc_status:%#x\n",
+				    status, eoc_status);
+		val = ADC5_GEN3_CONV_ERR_CLR_REQ;
+		adc5_gen3_status_clear(&adc->dev_data, ADC5_GEN3_VADC_SDAM,
+				       ADC5_GEN3_CONV_ERR_CLR, &val, 1);
+		return IRQ_HANDLED;
+	}
+
+	/* CHAN0 is the preconfigured channel for immediate conversion */
+	if (eoc_status & ADC5_GEN3_EOC_CHAN_0)
+		complete(&adc->complete);
+
+	ret = adc5_gen3_read(&adc->dev_data, ADC5_GEN3_VADC_SDAM,
+			     ADC5_GEN3_TM_HIGH_STS, tm_status, sizeof(tm_status));
+	if (ret) {
+		dev_err(dev, "adc read TM status failed with %d\n", ret);
+		return IRQ_HANDLED;
+	}
+
+	dev_dbg(dev, "Interrupt status:%#x, EOC status:%#x, high:%#x, low:%#x\n",
+		status, eoc_status, tm_status[0], tm_status[1]);
+
+	if (tm_status[0] || tm_status[1]) {
+		adev = adc->tm_aux;
+		if (!adev || !adev->dev.driver) {
+			dev_err(dev, "adc_tm auxiliary device not initialized\n");
+			return IRQ_HANDLED;
+		}
+
+		adc->handler(adev);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int adc5_gen3_fwnode_xlate(struct iio_dev *indio_dev,
+				  const struct fwnode_reference_args *iiospec)
+{
+	struct adc5_chip *adc = iio_priv(indio_dev);
+	int i, v_channel;
+
+	for (i = 0; i < adc->nchannels; i++) {
+		v_channel = ADC5_GEN3_V_CHAN(adc->chan_props[i].common_props);
+		if (v_channel == iiospec->args[0])
+			return i;
+	}
+
+	return -ENOENT;
+}
+
+static int adc5_gen3_read_raw(struct iio_dev *indio_dev,
+			      struct iio_chan_spec const *chan, int *val,
+			      int *val2, long mask)
+{
+	struct adc5_chip *adc = iio_priv(indio_dev);
+	struct adc5_channel_common_prop *prop;
+	u16 adc_code_volt;
+	int ret;
+
+	prop = &adc->chan_props[chan->address].common_props;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_PROCESSED:
+		ret = adc5_gen3_do_conversion(adc, prop, &adc_code_volt);
+		if (ret)
+			return ret;
+
+		ret = qcom_adc5_hw_scale(prop->scale_fn_type, prop->prescale,
+					 adc->data, adc_code_volt, val);
+		if (ret)
+			return ret;
+
+		return IIO_VAL_INT;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int adc5_gen3_read_label(struct iio_dev *indio_dev,
+				const struct iio_chan_spec *chan, char *label)
+{
+	struct adc5_chip *adc = iio_priv(indio_dev);
+	struct adc5_channel_prop *prop;
+
+	prop = &adc->chan_props[chan->address];
+	return sprintf(label, "%s\n", prop->common_props.label);
+}
+
+static const struct iio_info adc5_gen3_info = {
+	.read_raw = adc5_gen3_read_raw,
+	.read_label = adc5_gen3_read_label,
+	.fwnode_xlate = adc5_gen3_fwnode_xlate,
+};
+
+struct adc5_channels {
+	unsigned int prescale_index;
+	enum iio_chan_type type;
+	long info_mask;
+	enum vadc_scale_fn_type scale_fn_type;
+};
+
+/* In these definitions, _pre refers to an index into adc5_prescale_ratios. */
+#define ADC5_CHAN(_type, _mask, _pre, _scale)	\
+	{						\
+		.prescale_index = _pre,			\
+		.type = _type,				\
+		.info_mask = _mask,			\
+		.scale_fn_type = _scale,		\
+	},						\
+
+#define ADC5_CHAN_TEMP(_pre, _scale)		\
+	ADC5_CHAN(IIO_TEMP, BIT(IIO_CHAN_INFO_PROCESSED), _pre, _scale)	\
+
+#define ADC5_CHAN_VOLT(_pre, _scale)		\
+	ADC5_CHAN(IIO_VOLTAGE, BIT(IIO_CHAN_INFO_PROCESSED), _pre, _scale)	\
+
+#define ADC5_CHAN_CUR(_pre, _scale)		\
+	ADC5_CHAN(IIO_CURRENT, BIT(IIO_CHAN_INFO_PROCESSED), _pre, _scale)	\
+
+static const struct adc5_channels adc5_gen3_chans_pmic[ADC5_MAX_CHANNEL] = {
+	[ADC5_GEN3_REF_GND]		= ADC5_CHAN_VOLT(0, SCALE_HW_CALIB_DEFAULT)
+	[ADC5_GEN3_1P25VREF]		= ADC5_CHAN_VOLT(0, SCALE_HW_CALIB_DEFAULT)
+	[ADC5_GEN3_VPH_PWR]		= ADC5_CHAN_VOLT(1, SCALE_HW_CALIB_DEFAULT)
+	[ADC5_GEN3_VBAT_SNS_QBG]	= ADC5_CHAN_VOLT(1, SCALE_HW_CALIB_DEFAULT)
+	[ADC5_GEN3_USB_SNS_V_16]	= ADC5_CHAN_TEMP(8, SCALE_HW_CALIB_DEFAULT)
+	[ADC5_GEN3_VIN_DIV16_MUX]	= ADC5_CHAN_TEMP(8, SCALE_HW_CALIB_DEFAULT)
+	[ADC5_GEN3_DIE_TEMP]		= ADC5_CHAN_TEMP(0,
+						SCALE_HW_CALIB_PMIC_THERM_PM7)
+	[ADC5_GEN3_AMUX1_THM_100K_PU]	= ADC5_CHAN_TEMP(0,
+					SCALE_HW_CALIB_THERM_100K_PU_PM7)
+	[ADC5_GEN3_AMUX2_THM_100K_PU]	= ADC5_CHAN_TEMP(0,
+					SCALE_HW_CALIB_THERM_100K_PU_PM7)
+	[ADC5_GEN3_AMUX3_THM_100K_PU]	= ADC5_CHAN_TEMP(0,
+					SCALE_HW_CALIB_THERM_100K_PU_PM7)
+	[ADC5_GEN3_AMUX4_THM_100K_PU]	= ADC5_CHAN_TEMP(0,
+					SCALE_HW_CALIB_THERM_100K_PU_PM7)
+	[ADC5_GEN3_AMUX5_THM_100K_PU]	= ADC5_CHAN_TEMP(0,
+					SCALE_HW_CALIB_THERM_100K_PU_PM7)
+	[ADC5_GEN3_AMUX6_THM_100K_PU]	= ADC5_CHAN_TEMP(0,
+					SCALE_HW_CALIB_THERM_100K_PU_PM7)
+	[ADC5_GEN3_AMUX1_GPIO_100K_PU]	= ADC5_CHAN_TEMP(0,
+					SCALE_HW_CALIB_THERM_100K_PU_PM7)
+	[ADC5_GEN3_AMUX2_GPIO_100K_PU]	= ADC5_CHAN_TEMP(0,
+					SCALE_HW_CALIB_THERM_100K_PU_PM7)
+	[ADC5_GEN3_AMUX3_GPIO_100K_PU]	= ADC5_CHAN_TEMP(0,
+					SCALE_HW_CALIB_THERM_100K_PU_PM7)
+	[ADC5_GEN3_AMUX4_GPIO_100K_PU]	= ADC5_CHAN_TEMP(0,
+					SCALE_HW_CALIB_THERM_100K_PU_PM7)
+};
+
+static int adc5_gen3_get_fw_channel_data(struct adc5_chip *adc,
+					 struct adc5_channel_prop *prop,
+					 struct fwnode_handle *fwnode)
+{
+	const char *name = fwnode_get_name(fwnode);
+	const struct adc5_data *data = adc->data;
+	struct device *dev = adc->dev;
+	const char *channel_name;
+	u32 chan, value, sid;
+	u32 varr[2];
+	int ret;
+
+	ret = fwnode_property_read_u32(fwnode, "reg", &chan);
+	if (ret < 0)
+		return dev_err_probe(dev, ret, "invalid channel number %s\n",
+				     name);
+
+	/*
+	 * Value read from "reg" is virtual channel number
+	 * virtual channel number = sid << 8 | channel number
+	 */
+	sid = FIELD_GET(ADC5_GEN3_VIRTUAL_SID_MASK, chan);
+	chan = FIELD_GET(ADC5_GEN3_CHANNEL_MASK, chan);
+
+	if (chan > ADC5_MAX_CHANNEL)
+		return dev_err_probe(dev, -EINVAL,
+				     "%s invalid channel number %d\n",
+				     name, chan);
+
+	prop->common_props.channel = chan;
+	prop->common_props.sid = sid;
+
+	if (!adc->data->adc_chans[chan].info_mask)
+		return dev_err_probe(dev, -EINVAL, "Channel %#x not supported\n", chan);
+
+	channel_name = name;
+	fwnode_property_read_string(fwnode, "label", &channel_name);
+	prop->common_props.label = channel_name;
+
+	value = data->decimation[ADC5_DECIMATION_DEFAULT];
+	fwnode_property_read_u32(fwnode, "qcom,decimation", &value);
+	ret = qcom_adc5_decimation_from_dt(value, data->decimation);
+	if (ret < 0)
+		return dev_err_probe(dev, ret, "%#x invalid decimation %d\n",
+				     chan, value);
+	prop->common_props.decimation = ret;
+
+	prop->common_props.prescale = adc->data->adc_chans[chan].prescale_index;
+	ret = fwnode_property_read_u32_array(fwnode, "qcom,pre-scaling", varr, 2);
+	if (!ret) {
+		ret = qcom_adc5_prescaling_from_dt(varr[0], varr[1]);
+		if (ret < 0)
+			return dev_err_probe(dev, ret,
+					     "%#x invalid pre-scaling <%d %d>\n",
+					     chan, varr[0], varr[1]);
+		prop->common_props.prescale = ret;
+	}
+
+	value = data->hw_settle_1[VADC_DEF_HW_SETTLE_TIME];
+	fwnode_property_read_u32(fwnode, "qcom,hw-settle-time", &value);
+	ret = qcom_adc5_hw_settle_time_from_dt(value, data->hw_settle_1);
+	if (ret < 0)
+		return dev_err_probe(dev, ret,
+				     "%#x invalid hw-settle-time %d us\n",
+				     chan, value);
+	prop->common_props.hw_settle_time_us = ret;
+
+	value = BIT(VADC_DEF_AVG_SAMPLES);
+	fwnode_property_read_u32(fwnode, "qcom,avg-samples", &value);
+	ret = qcom_adc5_avg_samples_from_dt(value);
+	if (ret < 0)
+		return dev_err_probe(dev, ret, "%#x invalid avg-samples %d\n",
+				     chan, value);
+	prop->common_props.avg_samples = ret;
+
+	if (fwnode_property_read_bool(fwnode, "qcom,ratiometric"))
+		prop->common_props.cal_method = ADC5_RATIOMETRIC_CAL;
+	else
+		prop->common_props.cal_method = ADC5_ABSOLUTE_CAL;
+
+	prop->adc_tm = fwnode_property_read_bool(fwnode, "qcom,adc-tm");
+	if (prop->adc_tm) {
+		adc->n_tm_channels++;
+		if (adc->n_tm_channels > (adc->dev_data.num_sdams * 8 - 1))
+			return dev_err_probe(dev, -EINVAL,
+					     "Number of TM nodes %u greater than channels supported:%u\n",
+					     adc->n_tm_channels,
+					     adc->dev_data.num_sdams * 8 - 1);
+	}
+
+	return 0;
+}
+
+static const struct adc5_data adc5_gen3_data_pmic = {
+	.full_scale_code_volt = 0x70e4,
+	.adc_chans = adc5_gen3_chans_pmic,
+	.info = &adc5_gen3_info,
+	.decimation = (unsigned int [ADC5_DECIMATION_SAMPLES_MAX])
+			   { 85, 340, 1360 },
+	.hw_settle_1 = (unsigned int [VADC_HW_SETTLE_SAMPLES_MAX])
+			   { 15, 100, 200, 300,
+			     400, 500, 600, 700,
+			     1000, 2000, 4000, 8000,
+			     16000, 32000, 64000, 128000 },
+};
+
+static const struct of_device_id adc5_match_table[] = {
+	{
+		.compatible = "qcom,spmi-adc5-gen3",
+		.data = &adc5_gen3_data_pmic,
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, adc5_match_table);
+
+static int adc5_get_fw_data(struct adc5_chip *adc)
+{
+	const struct adc5_channels *adc_chan;
+	struct adc5_channel_prop *chan_props;
+	struct iio_chan_spec *iio_chan;
+	struct device *dev = adc->dev;
+	unsigned int index = 0;
+	int ret;
+
+	adc->nchannels = device_get_child_node_count(dev);
+	if (!adc->nchannels)
+		return dev_err_probe(dev, -EINVAL, "No ADC channels found\n");
+
+	adc->iio_chans = devm_kcalloc(dev, adc->nchannels,
+				      sizeof(*adc->iio_chans), GFP_KERNEL);
+	if (!adc->iio_chans)
+		return -ENOMEM;
+
+	adc->chan_props = devm_kcalloc(dev, adc->nchannels,
+				       sizeof(*adc->chan_props), GFP_KERNEL);
+	if (!adc->chan_props)
+		return -ENOMEM;
+
+	chan_props = adc->chan_props;
+	adc->n_tm_channels = 0;
+	iio_chan = adc->iio_chans;
+	adc->data = device_get_match_data(dev);
+
+	device_for_each_child_node_scoped(dev, child) {
+		ret = adc5_gen3_get_fw_channel_data(adc, chan_props, child);
+		if (ret)
+			return ret;
+
+		chan_props->chip = adc;
+		adc_chan = &adc->data->adc_chans[chan_props->common_props.channel];
+		chan_props->common_props.scale_fn_type = adc_chan->scale_fn_type;
+
+		iio_chan->channel = ADC5_GEN3_V_CHAN(chan_props->common_props);
+		iio_chan->info_mask_separate = adc_chan->info_mask;
+		iio_chan->type = adc_chan->type;
+		iio_chan->address = index;
+		iio_chan->indexed = 1;
+		iio_chan++;
+		chan_props++;
+		index++;
+	}
+
+	return 0;
+}
+
+static void adc5_gen3_uninit_aux(void *data)
+{
+	auxiliary_device_uninit(data);
+}
+
+static void adc5_gen3_delete_aux(void *data)
+{
+	auxiliary_device_delete(data);
+}
+
+static void adc5_gen3_aux_device_release(struct device *dev) {}
+
+static int adc5_gen3_add_aux_tm_device(struct adc5_chip *adc)
+{
+	struct tm5_aux_dev_wrapper *aux_device;
+	int i, ret, i_tm = 0;
+
+	aux_device = devm_kzalloc(adc->dev, sizeof(*aux_device), GFP_KERNEL);
+	if (!aux_device)
+		return -ENOMEM;
+
+	aux_device->aux_dev.name = "adc5_tm_gen3";
+	aux_device->aux_dev.dev.parent = adc->dev;
+	aux_device->aux_dev.dev.release = adc5_gen3_aux_device_release;
+
+	aux_device->tm_props = devm_kcalloc(adc->dev, adc->n_tm_channels,
+					    sizeof(*aux_device->tm_props),
+					    GFP_KERNEL);
+	if (!aux_device->tm_props)
+		return -ENOMEM;
+
+	aux_device->dev_data = &adc->dev_data;
+
+	for (i = 0; i < adc->nchannels; i++) {
+		if (!adc->chan_props[i].adc_tm)
+			continue;
+		aux_device->tm_props[i_tm] = adc->chan_props[i].common_props;
+		i_tm++;
+	}
+
+	device_set_of_node_from_dev(&aux_device->aux_dev.dev, adc->dev);
+
+	aux_device->n_tm_channels = adc->n_tm_channels;
+
+	ret = auxiliary_device_init(&aux_device->aux_dev);
+	if (ret)
+		return ret;
+
+	ret = devm_add_action_or_reset(adc->dev, adc5_gen3_uninit_aux,
+				       &aux_device->aux_dev);
+	if (ret)
+		return ret;
+
+	ret = auxiliary_device_add(&aux_device->aux_dev);
+	if (ret)
+		return ret;
+	ret = devm_add_action_or_reset(adc->dev, adc5_gen3_delete_aux,
+				       &aux_device->aux_dev);
+	if (ret)
+		return ret;
+
+	adc->tm_aux = &aux_device->aux_dev;
+
+	return 0;
+}
+
+void adc5_gen3_mutex_lock(struct device *dev)
+	__acquires(&adc->lock)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev->parent);
+	struct adc5_chip *adc = iio_priv(indio_dev);
+
+	mutex_lock(&adc->lock);
+}
+EXPORT_SYMBOL_NS_GPL(adc5_gen3_mutex_lock, "QCOM_SPMI_ADC5_GEN3");
+
+void adc5_gen3_mutex_unlock(struct device *dev)
+	__releases(&adc->lock)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev->parent);
+	struct adc5_chip *adc = iio_priv(indio_dev);
+
+	mutex_unlock(&adc->lock);
+}
+EXPORT_SYMBOL_NS_GPL(adc5_gen3_mutex_unlock, "QCOM_SPMI_ADC5_GEN3");
+
+int adc5_gen3_get_scaled_reading(struct device *dev,
+				 struct adc5_channel_common_prop *common_props,
+				 int *val)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev->parent);
+	struct adc5_chip *adc = iio_priv(indio_dev);
+	u16 adc_code_volt;
+	int ret;
+
+	ret = adc5_gen3_do_conversion(adc, common_props, &adc_code_volt);
+	if (ret)
+		return ret;
+
+	return qcom_adc5_hw_scale(common_props->scale_fn_type,
+				  common_props->prescale,
+				  adc->data, adc_code_volt, val);
+}
+EXPORT_SYMBOL_NS_GPL(adc5_gen3_get_scaled_reading, "QCOM_SPMI_ADC5_GEN3");
+
+int adc5_gen3_therm_code_to_temp(struct device *dev,
+				 struct adc5_channel_common_prop *common_props,
+				 u16 code, int *val)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev->parent);
+	struct adc5_chip *adc = iio_priv(indio_dev);
+
+	return qcom_adc5_hw_scale(common_props->scale_fn_type,
+				  common_props->prescale,
+				  adc->data, code, val);
+}
+EXPORT_SYMBOL_NS_GPL(adc5_gen3_therm_code_to_temp, "QCOM_SPMI_ADC5_GEN3");
+
+void adc5_gen3_register_tm_event_notifier(struct device *dev,
+					  void (*handler)(struct auxiliary_device *))
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev->parent);
+	struct adc5_chip *adc = iio_priv(indio_dev);
+
+	adc->handler = handler;
+}
+EXPORT_SYMBOL_NS_GPL(adc5_gen3_register_tm_event_notifier, "QCOM_SPMI_ADC5_GEN3");
+
+static int adc5_gen3_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct iio_dev *indio_dev;
+	struct adc5_chip *adc;
+	struct regmap *regmap;
+	int ret, i;
+	u32 *reg;
+
+	regmap = dev_get_regmap(dev->parent, NULL);
+	if (!regmap)
+		return -ENODEV;
+
+	indio_dev = devm_iio_device_alloc(dev, sizeof(*adc));
+	if (!indio_dev)
+		return -ENOMEM;
+
+	adc = iio_priv(indio_dev);
+	adc->dev_data.regmap = regmap;
+	adc->dev = dev;
+
+	ret = device_property_count_u32(dev, "reg");
+	if (ret < 0)
+		return ret;
+
+	adc->dev_data.num_sdams = ret;
+
+	reg = devm_kcalloc(dev, adc->dev_data.num_sdams, sizeof(u32),
+			   GFP_KERNEL);
+	if (!reg)
+		return -ENOMEM;
+
+	ret = device_property_read_u32_array(dev, "reg", reg,
+					     adc->dev_data.num_sdams);
+	if (ret)
+		return dev_err_probe(dev, ret,
+				     "Failed to read reg property\n");
+
+	adc->dev_data.base = devm_kcalloc(dev, adc->dev_data.num_sdams,
+					  sizeof(*adc->dev_data.base),
+					  GFP_KERNEL);
+	if (!adc->dev_data.base)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, indio_dev);
+	init_completion(&adc->complete);
+	ret = devm_mutex_init(dev, &adc->lock);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < adc->dev_data.num_sdams; i++) {
+		adc->dev_data.base[i].base_addr = reg[i];
+
+		ret = platform_get_irq(pdev, i);
+		if (ret < 0)
+			return dev_err_probe(dev, ret,
+					     "Getting IRQ %d failed\n", i);
+
+		adc->dev_data.base[i].irq = ret;
+
+		adc->dev_data.base[i].irq_name = devm_kasprintf(dev, GFP_KERNEL,
+								"sdam%d", i);
+		if (!adc->dev_data.base[i].irq_name)
+			return -ENOMEM;
+	}
+
+	ret = devm_request_irq(dev, adc->dev_data.base[ADC5_GEN3_VADC_SDAM].irq,
+			       adc5_gen3_isr, 0,
+			       adc->dev_data.base[ADC5_GEN3_VADC_SDAM].irq_name,
+			       adc);
+	if (ret)
+		return dev_err_probe(dev, ret,
+				     "Failed to request SDAM%d irq\n",
+				     ADC5_GEN3_VADC_SDAM);
+
+	ret = adc5_get_fw_data(adc);
+	if (ret)
+		return ret;
+
+	if (adc->n_tm_channels > 0) {
+		ret = adc5_gen3_add_aux_tm_device(adc);
+		if (ret)
+			dev_err_probe(dev, ret,
+				      "Failed to add auxiliary TM device\n");
+	}
+
+	indio_dev->name = "spmi-adc5-gen3";
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->info = &adc5_gen3_info;
+	indio_dev->channels = adc->iio_chans;
+	indio_dev->num_channels = adc->nchannels;
+
+	return devm_iio_device_register(dev, indio_dev);
+}
+
+static struct platform_driver adc5_gen3_driver = {
+	.driver = {
+		.name = "qcom-spmi-adc5-gen3",
+		.of_match_table = adc5_match_table,
+	},
+	.probe = adc5_gen3_probe,
+};
+module_platform_driver(adc5_gen3_driver);
+
+MODULE_DESCRIPTION("Qualcomm Technologies Inc. PMIC5 Gen3 ADC driver");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS("QCOM_SPMI_ADC5_GEN3");
diff --git a/include/linux/iio/adc/qcom-adc5-gen3-common.h b/include/linux/iio/adc/qcom-adc5-gen3-common.h
new file mode 100644
index 000000000000..6303eaa6640b
--- /dev/null
+++ b/include/linux/iio/adc/qcom-adc5-gen3-common.h
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ *
+ * Code used in the main and auxiliary Qualcomm PMIC voltage ADCs
+ * of type ADC5 Gen3.
+ */
+
+#ifndef QCOM_ADC5_GEN3_COMMON_H
+#define QCOM_ADC5_GEN3_COMMON_H
+
+#include <linux/auxiliary_bus.h>
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/device.h>
+#include <linux/iio/adc/qcom-vadc-common.h>
+#include <linux/regmap.h>
+#include <linux/types.h>
+
+#define ADC5_GEN3_HS				0x45
+#define ADC5_GEN3_HS_BUSY			BIT(7)
+#define ADC5_GEN3_HS_READY			BIT(0)
+
+#define ADC5_GEN3_STATUS1			0x46
+#define ADC5_GEN3_STATUS1_CONV_FAULT		BIT(7)
+#define ADC5_GEN3_STATUS1_THR_CROSS		BIT(6)
+#define ADC5_GEN3_STATUS1_EOC			BIT(0)
+
+#define ADC5_GEN3_TM_EN_STS			0x47
+#define ADC5_GEN3_TM_HIGH_STS			0x48
+#define ADC5_GEN3_TM_LOW_STS			0x49
+
+#define ADC5_GEN3_EOC_STS			0x4a
+#define ADC5_GEN3_EOC_CHAN_0			BIT(0)
+
+#define ADC5_GEN3_EOC_CLR			0x4b
+#define ADC5_GEN3_TM_HIGH_STS_CLR		0x4c
+#define ADC5_GEN3_TM_LOW_STS_CLR		0x4d
+#define ADC5_GEN3_CONV_ERR_CLR			0x4e
+#define ADC5_GEN3_CONV_ERR_CLR_REQ		BIT(0)
+
+#define ADC5_GEN3_SID				0x4f
+#define ADC5_GEN3_SID_MASK			GENMASK(3, 0)
+
+#define ADC5_GEN3_PERPH_CH			0x50
+#define ADC5_GEN3_CHAN_CONV_REQ			BIT(7)
+
+#define ADC5_GEN3_TIMER_SEL			0x51
+#define ADC5_GEN3_TIME_IMMEDIATE		0x1
+
+#define ADC5_GEN3_DIG_PARAM			0x52
+#define ADC5_GEN3_DIG_PARAM_CAL_SEL_MASK	GENMASK(5, 4)
+#define ADC5_GEN3_DIG_PARAM_DEC_RATIO_SEL_MASK	GENMASK(3, 2)
+
+#define ADC5_GEN3_FAST_AVG			0x53
+#define ADC5_GEN3_FAST_AVG_CTL_EN		BIT(7)
+#define ADC5_GEN3_FAST_AVG_CTL_SAMPLES_MASK	GENMASK(2, 0)
+
+#define ADC5_GEN3_ADC_CH_SEL_CTL		0x54
+#define ADC5_GEN3_DELAY_CTL			0x55
+#define ADC5_GEN3_HW_SETTLE_DELAY_MASK		GENMASK(3, 0)
+
+#define ADC5_GEN3_CH_EN				0x56
+#define ADC5_GEN3_HIGH_THR_INT_EN		BIT(1)
+#define ADC5_GEN3_LOW_THR_INT_EN		BIT(0)
+
+#define ADC5_GEN3_LOW_THR0			0x57
+#define ADC5_GEN3_LOW_THR1			0x58
+#define ADC5_GEN3_HIGH_THR0			0x59
+#define ADC5_GEN3_HIGH_THR1			0x5a
+
+#define ADC5_GEN3_CH_DATA0(channel)	(0x5c + (channel) * 2)
+#define ADC5_GEN3_CH_DATA1(channel)	(0x5d + (channel) * 2)
+
+#define ADC5_GEN3_CONV_REQ			0xe5
+#define ADC5_GEN3_CONV_REQ_REQ			BIT(0)
+
+#define ADC5_GEN3_VIRTUAL_SID_MASK		GENMASK(15, 8)
+#define ADC5_GEN3_CHANNEL_MASK			GENMASK(7, 0)
+#define ADC5_GEN3_V_CHAN(x)		\
+	(FIELD_PREP(ADC5_GEN3_VIRTUAL_SID_MASK, (x).sid) | (x).channel)
+
+/* ADC channels for PMIC5 Gen3 */
+#define ADC5_GEN3_REF_GND			0x00
+#define ADC5_GEN3_1P25VREF			0x01
+#define ADC5_GEN3_DIE_TEMP			0x03
+#define ADC5_GEN3_USB_SNS_V_16			0x11
+#define ADC5_GEN3_VIN_DIV16_MUX			0x12
+#define ADC5_GEN3_VPH_PWR			0x8e
+#define ADC5_GEN3_VBAT_SNS_QBG			0x8f
+/* 100k pull-up channels */
+#define ADC5_GEN3_AMUX1_THM_100K_PU		0x44
+#define ADC5_GEN3_AMUX2_THM_100K_PU		0x45
+#define ADC5_GEN3_AMUX3_THM_100K_PU		0x46
+#define ADC5_GEN3_AMUX4_THM_100K_PU		0x47
+#define ADC5_GEN3_AMUX5_THM_100K_PU		0x48
+#define ADC5_GEN3_AMUX6_THM_100K_PU		0x49
+#define ADC5_GEN3_AMUX1_GPIO_100K_PU		0x4a
+#define ADC5_GEN3_AMUX2_GPIO_100K_PU		0x4b
+#define ADC5_GEN3_AMUX3_GPIO_100K_PU		0x4c
+#define ADC5_GEN3_AMUX4_GPIO_100K_PU		0x4d
+
+#define ADC5_MAX_CHANNEL			0xc0
+
+enum adc5_cal_method {
+	ADC5_NO_CAL = 0,
+	ADC5_RATIOMETRIC_CAL,
+	ADC5_ABSOLUTE_CAL,
+};
+
+enum adc5_time_select {
+	MEAS_INT_DISABLE = 0,
+	MEAS_INT_IMMEDIATE,
+	MEAS_INT_50MS,
+	MEAS_INT_100MS,
+	MEAS_INT_1S,
+	MEAS_INT_NONE,
+};
+
+/**
+ * struct adc5_sdam_data - data per SDAM allocated for adc usage
+ * @base_addr: base address for the ADC SDAM peripheral.
+ * @irq_name: ADC IRQ name.
+ * @irq: ADC IRQ number.
+ */
+struct adc5_sdam_data {
+	u16 base_addr;
+	const char *irq_name;
+	int irq;
+};
+
+/**
+ * struct adc5_device_data - Top-level ADC device data
+ * @regmap: ADC peripheral register map field.
+ * @base: array of SDAM data.
+ * @num_sdams: number of ADC SDAM peripherals.
+ */
+struct adc5_device_data {
+	struct regmap *regmap;
+	struct adc5_sdam_data *base;
+	int num_sdams;
+};
+
+/**
+ * struct adc5_channel_common_prop - ADC channel properties (common to ADC and TM).
+ * @channel: channel number, refer to the channel list.
+ * @cal_method: calibration method.
+ * @decimation: sampling rate supported for the channel.
+ * @sid: ID of PMIC owning the channel.
+ * @label: Channel name used in device tree.
+ * @prescale: channel scaling performed on the input signal.
+ * @hw_settle_time_us: the time between AMUX being configured and the
+ *	start of conversion in uS.
+ * @avg_samples: ability to provide single result from the ADC
+ *	that is an average of multiple measurements.
+ * @scale_fn_type: Represents the scaling function to convert voltage
+ *	physical units desired by the client for the channel.
+ */
+struct adc5_channel_common_prop {
+	unsigned int channel;
+	enum adc5_cal_method cal_method;
+	unsigned int decimation;
+	unsigned int sid;
+	const char *label;
+	unsigned int prescale;
+	unsigned int hw_settle_time_us;
+	unsigned int avg_samples;
+	enum vadc_scale_fn_type scale_fn_type;
+};
+
+/**
+ * struct tm5_aux_dev_wrapper - wrapper structure around TM auxiliary device
+ * @aux_dev: TM auxiliary device structure.
+ * @dev_data: Top-level ADC device data.
+ * @tm_props: Array of common ADC channel properties for TM channels.
+ * @n_tm_channels: number of TM channels.
+ */
+struct tm5_aux_dev_wrapper {
+	struct auxiliary_device aux_dev;
+	struct adc5_device_data *dev_data;
+	struct adc5_channel_common_prop *tm_props;
+	unsigned int n_tm_channels;
+};
+
+int adc5_gen3_read(struct adc5_device_data *adc, unsigned int sdam_index,
+		   u16 offset, u8 *data, int len);
+
+int adc5_gen3_write(struct adc5_device_data *adc, unsigned int sdam_index,
+		    u16 offset, u8 *data, int len);
+
+int adc5_gen3_poll_wait_hs(struct adc5_device_data *adc,
+			   unsigned int sdam_index);
+
+void adc5_gen3_update_dig_param(struct adc5_channel_common_prop *prop,
+				u8 *data);
+
+int adc5_gen3_status_clear(struct adc5_device_data *adc,
+			   int sdam_index, u16 offset, u8 *val, int len);
+
+void adc5_gen3_mutex_lock(struct device *dev);
+void adc5_gen3_mutex_unlock(struct device *dev);
+int adc5_gen3_get_scaled_reading(struct device *dev,
+				 struct adc5_channel_common_prop *common_props,
+				 int *val);
+int adc5_gen3_therm_code_to_temp(struct device *dev,
+				 struct adc5_channel_common_prop *common_props,
+				 u16 code, int *val);
+void adc5_gen3_register_tm_event_notifier(struct device *dev,
+					  void (*handler)(struct auxiliary_device *));
+
+#endif /* QCOM_ADC5_GEN3_COMMON_H */
-- 
cgit v1.2.3


From 3d35d41169d000f4fbf3c23999b8443e1173efce Mon Sep 17 00:00:00 2001
From: Fabio Baltieri <fabiobaltieri@chromium.org>
Date: Mon, 23 Feb 2026 13:25:55 -0800
Subject: Input: export input_default_setkeycode

Export input_default_setkeycode so that a driver can set a custom
setkeycode handler to take some driver specific action but still call
the default handler at some point.

Signed-off-by: Fabio Baltieri <fabiobaltieri@chromium.org>
Reviewed-by: Tzung-Bi Shih <tzungbi@kernel.org>
Link: https://patch.msgid.link/20260222003717.471977-1-dmitry.torokhov@gmail.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/input.c | 23 ++++++++++++++++++++---
 include/linux/input.h |  4 ++++
 2 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index a500e1e276c2..c227eaa6271a 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -800,14 +800,30 @@ static int input_default_getkeycode(struct input_dev *dev,
 	return 0;
 }
 
-static int input_default_setkeycode(struct input_dev *dev,
-				    const struct input_keymap_entry *ke,
-				    unsigned int *old_keycode)
+/**
+ * input_default_setkeycode - default setkeycode method
+ * @dev: input device which keymap is being updated.
+ * @ke: new keymap entry.
+ * @old_keycode: pointer to the location where old keycode should be stored.
+ *
+ * This function is the default implementation of &input_dev.setkeycode()
+ * method. It is typically used when a driver does not provide its own
+ * implementation, but it is also exported so drivers can extend it.
+ *
+ * The function must be called with &input_dev.event_lock held.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+int input_default_setkeycode(struct input_dev *dev,
+			     const struct input_keymap_entry *ke,
+			     unsigned int *old_keycode)
 {
 	unsigned int index;
 	int error;
 	int i;
 
+	lockdep_assert_held(&dev->event_lock);
+
 	if (!dev->keycodesize)
 		return -EINVAL;
 
@@ -861,6 +877,7 @@ static int input_default_setkeycode(struct input_dev *dev,
 	__set_bit(ke->keycode, dev->keybit);
 	return 0;
 }
+EXPORT_SYMBOL(input_default_setkeycode);
 
 /**
  * input_get_keycode - retrieve keycode currently mapped to a given scancode
diff --git a/include/linux/input.h b/include/linux/input.h
index 7d7cb0593a63..06ca62328db1 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -517,6 +517,10 @@ INPUT_GENERATE_ABS_ACCESSORS(res, resolution)
 int input_scancode_to_scalar(const struct input_keymap_entry *ke,
 			     unsigned int *scancode);
 
+int input_default_setkeycode(struct input_dev *dev,
+			     const struct input_keymap_entry *ke,
+			     unsigned int *old_keycode);
+
 int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke);
 int input_set_keycode(struct input_dev *dev,
 		      const struct input_keymap_entry *ke);
-- 
cgit v1.2.3


From fa4f81a8c15d4018eb2053b093bf1584777e80d4 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 19 Feb 2026 10:47:03 +0900
Subject: ata: libata-scsi: make ata_scsi_simulate() static

ata_scsi_simulate() is called only from libata-scsi.c. Move this
function definition as a static function before its call in
__ata_scsi_queuecmd() and remove its declaration from
include/linux/libata.h.

No functional changes.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 drivers/ata/libata-scsi.c | 147 +++++++++++++++++++++++-----------------------
 include/linux/libata.h    |   1 -
 2 files changed, 73 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 41918e21d0f8..ad628b398fc3 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4420,6 +4420,79 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd)
 	return NULL;
 }
 
+/**
+ *	ata_scsi_simulate - simulate SCSI command on ATA device
+ *	@dev: the target device
+ *	@cmd: SCSI command being sent to device.
+ *
+ *	Interprets and directly executes a select list of SCSI commands
+ *	that can be handled internally.
+ *
+ *	LOCKING:
+ *	spin_lock_irqsave(host lock)
+ */
+static void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd)
+{
+	const u8 *scsicmd = cmd->cmnd;
+	u8 tmp8;
+
+	switch (scsicmd[0]) {
+	case INQUIRY:
+		ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_inquiry);
+		break;
+
+	case MODE_SENSE:
+	case MODE_SENSE_10:
+		ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_mode_sense);
+		break;
+
+	case READ_CAPACITY:
+	case SERVICE_ACTION_IN_16:
+		ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_read_cap);
+		break;
+
+	case REPORT_LUNS:
+		ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_report_luns);
+		break;
+
+	case REQUEST_SENSE:
+		ata_scsi_set_sense(dev, cmd, 0, 0, 0);
+		break;
+
+	/* if we reach this, then writeback caching is disabled,
+	 * turning this into a no-op.
+	 */
+	case SYNCHRONIZE_CACHE:
+	case SYNCHRONIZE_CACHE_16:
+		fallthrough;
+
+	/* no-op's, complete with success */
+	case REZERO_UNIT:
+	case SEEK_6:
+	case SEEK_10:
+	case TEST_UNIT_READY:
+		break;
+
+	case SEND_DIAGNOSTIC:
+		tmp8 = scsicmd[1] & ~(1 << 3);
+		if (tmp8 != 0x4 || scsicmd[3] || scsicmd[4])
+			ata_scsi_set_invalid_field(dev, cmd, 1, 0xff);
+		break;
+
+	case MAINTENANCE_IN:
+		ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_maint_in);
+		break;
+
+	/* all other commands */
+	default:
+		ata_scsi_set_sense(dev, cmd, ILLEGAL_REQUEST, 0x20, 0x0);
+		/* "Invalid command operation code" */
+		break;
+	}
+
+	scsi_done(cmd);
+}
+
 enum scsi_qc_status __ata_scsi_queuecmd(struct scsi_cmnd *scmd,
 					struct ata_device *dev)
 {
@@ -4522,80 +4595,6 @@ enum scsi_qc_status ata_scsi_queuecmd(struct Scsi_Host *shost,
 }
 EXPORT_SYMBOL_GPL(ata_scsi_queuecmd);
 
-/**
- *	ata_scsi_simulate - simulate SCSI command on ATA device
- *	@dev: the target device
- *	@cmd: SCSI command being sent to device.
- *
- *	Interprets and directly executes a select list of SCSI commands
- *	that can be handled internally.
- *
- *	LOCKING:
- *	spin_lock_irqsave(host lock)
- */
-
-void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd)
-{
-	const u8 *scsicmd = cmd->cmnd;
-	u8 tmp8;
-
-	switch(scsicmd[0]) {
-	case INQUIRY:
-		ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_inquiry);
-		break;
-
-	case MODE_SENSE:
-	case MODE_SENSE_10:
-		ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_mode_sense);
-		break;
-
-	case READ_CAPACITY:
-	case SERVICE_ACTION_IN_16:
-		ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_read_cap);
-		break;
-
-	case REPORT_LUNS:
-		ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_report_luns);
-		break;
-
-	case REQUEST_SENSE:
-		ata_scsi_set_sense(dev, cmd, 0, 0, 0);
-		break;
-
-	/* if we reach this, then writeback caching is disabled,
-	 * turning this into a no-op.
-	 */
-	case SYNCHRONIZE_CACHE:
-	case SYNCHRONIZE_CACHE_16:
-		fallthrough;
-
-	/* no-op's, complete with success */
-	case REZERO_UNIT:
-	case SEEK_6:
-	case SEEK_10:
-	case TEST_UNIT_READY:
-		break;
-
-	case SEND_DIAGNOSTIC:
-		tmp8 = scsicmd[1] & ~(1 << 3);
-		if (tmp8 != 0x4 || scsicmd[3] || scsicmd[4])
-			ata_scsi_set_invalid_field(dev, cmd, 1, 0xff);
-		break;
-
-	case MAINTENANCE_IN:
-		ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_maint_in);
-		break;
-
-	/* all other commands */
-	default:
-		ata_scsi_set_sense(dev, cmd, ILLEGAL_REQUEST, 0x20, 0x0);
-		/* "Invalid command operation code" */
-		break;
-	}
-
-	scsi_done(cmd);
-}
-
 int ata_scsi_add_hosts(struct ata_host *host, const struct scsi_host_template *sht)
 {
 	int i, rc;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 00346ce3af5e..db87c99e4189 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1205,7 +1205,6 @@ extern unsigned int ata_do_dev_read_id(struct ata_device *dev,
 				       struct ata_taskfile *tf, __le16 *id);
 extern void ata_qc_complete(struct ata_queued_cmd *qc);
 extern u64 ata_qc_get_active(struct ata_port *ap);
-extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd);
 extern int ata_std_bios_param(struct scsi_device *sdev,
 			      struct gendisk *unused,
 			      sector_t capacity, int geom[]);
-- 
cgit v1.2.3


From ecfa23b486b22844855844202424bc1966cebb33 Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Tue, 17 Feb 2026 12:26:15 +0100
Subject: jiffies: Remove unused __jiffy_arch_data

The __jiffy_arch_data definition was added in 2017 by commit 60b0a8c3d248
("frv: declare jiffies to be located in the .data section") for the needs
of the frv port. The frv support was removed in 2018 by commit fd8773f9f544
("arch: remove frv port") and no other architecture has required
__jiffy_arch_data. Therefore, remove this unused definition.

Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260217112638.1525094-1-petr.pavlu@suse.com
---
 include/linux/jiffies.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index fdef2c155c27..1a393d160420 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -67,10 +67,6 @@ extern void register_refined_jiffies(long clock_tick_rate);
 /* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
 #define USER_TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
 
-#ifndef __jiffy_arch_data
-#define __jiffy_arch_data
-#endif
-
 /*
  * The 64-bit value is not atomic on 32-bit systems - you MUST NOT read it
  * without sampling the sequence number in jiffies_lock.
@@ -83,7 +79,7 @@ extern void register_refined_jiffies(long clock_tick_rate);
  * See arch/ARCH/kernel/vmlinux.lds.S
  */
 extern u64 __cacheline_aligned_in_smp jiffies_64;
-extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies;
+extern unsigned long volatile __cacheline_aligned_in_smp jiffies;
 
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void);
-- 
cgit v1.2.3


From 38ab6557234d8629407a824be90e82514d6129a0 Mon Sep 17 00:00:00 2001
From: Sander Vanheule <sander@svanheule.net>
Date: Fri, 20 Feb 2026 17:01:11 +0100
Subject: regmap: sort header includes

Sort the included headers to make spotting duplicates easier and avoid
discussions on where to add new includes.

Signed-off-by: Sander Vanheule <sander@svanheule.net>
Link: https://patch.msgid.link/20260220160112.543391-1-sander@svanheule.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index caff2240bdab..c8a6a05bdba1 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -10,15 +10,15 @@
  * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
  */
 
-#include <linux/list.h>
-#include <linux/rbtree.h>
-#include <linux/ktime.h>
+#include <linux/bug.h>
 #include <linux/delay.h>
 #include <linux/err.h>
-#include <linux/bug.h>
-#include <linux/lockdep.h>
-#include <linux/iopoll.h>
 #include <linux/fwnode.h>
+#include <linux/iopoll.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/rbtree.h>
 
 struct module;
 struct clk;
-- 
cgit v1.2.3


From 37983fad7f3ef296fa0504c8e945987459dc5487 Mon Sep 17 00:00:00 2001
From: Sander Vanheule <sander@svanheule.net>
Date: Fri, 20 Feb 2026 17:01:12 +0100
Subject: regmap: define cleanup helper for regmap_field

For temporary field allocation, the user has to perform manual cleanup,
or rely on devm_regmap_field_alloc() to (eventually) clean up the
allocated resources when an error occurs.

Add a cleanup helper that takes care of freeing the allocated
regmap_field whenever it goes out of scope.

This can simplify this example:

    struct regmap_field *field = regmap_field_alloc(...);
    if (IS_ERR(field))
        return PTR_ERR(field);

    int err = regmap_field_read(...);
    if (err)
        goto out;

    /* some logic that may also error */

    err = regmap_field_write(...);

  out:
    regmap_field_free(field);

    return err;

into the shorter:

    struct regmap_field *field __free(regmap_field) = regmap_field_alloc(...);
    if (IS_ERR(field))
        return PTR_ERR(field);

    int err = regmap_field_read(...);
    if (err)
        return err;

    /* some logic that may also error */

    return regmap_field_write(...);

Signed-off-by: Sander Vanheule <sander@svanheule.net>
Link: https://patch.msgid.link/20260220160112.543391-2-sander@svanheule.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index c8a6a05bdba1..f1c5cb63c171 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -11,6 +11,7 @@
  */
 
 #include <linux/bug.h>
+#include <linux/cleanup.h>
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/fwnode.h>
@@ -1460,6 +1461,8 @@ struct regmap_field *regmap_field_alloc(struct regmap *regmap,
 		struct reg_field reg_field);
 void regmap_field_free(struct regmap_field *field);
 
+DEFINE_FREE(regmap_field, struct regmap_field *, if (_T) regmap_field_free(_T))
+
 struct regmap_field *devm_regmap_field_alloc(struct device *dev,
 		struct regmap *regmap, struct reg_field reg_field);
 void devm_regmap_field_free(struct device *dev,	struct regmap_field *field);
-- 
cgit v1.2.3


From aa8671af0c380c15989b88325a2d5a6c5341771d Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 24 Feb 2026 12:10:43 +0100
Subject: PCI/PTM: Drop pci_enable_ptm() granularity parameter

No pci_enable_ptm() callers supply the "granularity" pointer where the
clock granularity would be returned.

Drop the unused pci_enable_ptm() parameter.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
[bhelgaas: commit log]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Link: https://patch.msgid.link/20260224111044.3487873-5-mika.westerberg@linux.intel.com
---
 drivers/net/ethernet/intel/ice/ice_main.c      |  2 +-
 drivers/net/ethernet/intel/idpf/idpf_main.c    |  2 +-
 drivers/net/ethernet/intel/igc/igc_main.c      |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  2 +-
 drivers/pci/pcie/ptm.c                         | 11 +++--------
 include/linux/pci.h                            |  4 ++--
 6 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index ebf48feffb30..b35c4e4ecd2a 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5028,7 +5028,7 @@ static int ice_init(struct ice_pf *pf)
 	}
 
 	if (pf->hw.mac_type == ICE_MAC_E830) {
-		err = pci_enable_ptm(pf->pdev, NULL);
+		err = pci_enable_ptm(pf->pdev);
 		if (err)
 			dev_dbg(dev, "PCIe PTM not supported by PCIe bus/controller\n");
 	}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c
index 0dd741dcfcdb..ab3c409e587b 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_main.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_main.c
@@ -257,7 +257,7 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_free;
 	}
 
-	err = pci_enable_ptm(pdev, NULL);
+	err = pci_enable_ptm(pdev);
 	if (err)
 		pci_dbg(pdev, "PCIe PTM is not supported by PCIe bus/controller\n");
 
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 27e5c2109138..b030acf94ac4 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -7123,7 +7123,7 @@ static int igc_probe(struct pci_dev *pdev,
 	if (err)
 		goto err_pci_reg;
 
-	err = pci_enable_ptm(pdev, NULL);
+	err = pci_enable_ptm(pdev);
 	if (err < 0)
 		dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n");
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index fdc3ba20912e..0b94d4ed0ef6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -960,7 +960,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
 
 	mlx5_pci_vsc_init(dev);
 
-	pci_enable_ptm(pdev, NULL);
+	pci_enable_ptm(pdev);
 
 	return 0;
 
diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c
index 91a598ed534c..2c848ae4f15f 100644
--- a/drivers/pci/pcie/ptm.c
+++ b/drivers/pci/pcie/ptm.c
@@ -88,7 +88,7 @@ void pci_ptm_init(struct pci_dev *dev)
 
 	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
 	    pci_pcie_type(dev) == PCI_EXP_TYPE_UPSTREAM)
-		pci_enable_ptm(dev, NULL);
+		pci_enable_ptm(dev);
 }
 
 void pci_save_ptm_state(struct pci_dev *dev)
@@ -182,15 +182,13 @@ static int __pci_enable_ptm(struct pci_dev *dev)
 /**
  * pci_enable_ptm() - Enable Precision Time Measurement
  * @dev: PCI device
- * @granularity: pointer to return granularity
  *
- * Enable Precision Time Measurement for @dev.  If successful and
- * @granularity is non-NULL, return the Effective Granularity.
+ * Enable Precision Time Measurement for @dev.
  *
  * Return: zero if successful, or -EINVAL if @dev lacks a PTM Capability or
  * is not a PTM Root and lacks an upstream path of PTM-enabled devices.
  */
-int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
+int pci_enable_ptm(struct pci_dev *dev)
 {
 	int rc;
 	char clock_desc[8];
@@ -201,9 +199,6 @@ int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
 
 	dev->ptm_enabled = 1;
 
-	if (granularity)
-		*granularity = dev->ptm_granularity;
-
 	switch (dev->ptm_granularity) {
 	case 0:
 		snprintf(clock_desc, sizeof(clock_desc), "unknown");
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1c270f1d5123..8aaa72dcb164 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1975,11 +1975,11 @@ struct pci_ptm_debugfs {
 };
 
 #ifdef CONFIG_PCIE_PTM
-int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
+int pci_enable_ptm(struct pci_dev *dev);
 void pci_disable_ptm(struct pci_dev *dev);
 bool pcie_ptm_enabled(struct pci_dev *dev);
 #else
-static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
+static inline int pci_enable_ptm(struct pci_dev *dev)
 { return -EINVAL; }
 static inline void pci_disable_ptm(struct pci_dev *dev) { }
 static inline bool pcie_ptm_enabled(struct pci_dev *dev)
-- 
cgit v1.2.3


From e02902dd493bf9c9b05353c761737ac514ad7a5c Mon Sep 17 00:00:00 2001
From: Antoniu Miclaus <antoniu.miclaus@analog.com>
Date: Mon, 23 Feb 2026 18:21:01 +0200
Subject: spi: add devm_spi_new_ancillary_device()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a devres-managed version of spi_new_ancillary_device() that
automatically unregisters the ancillary SPI device when the parent
device is removed.

This follows the same devm_add_action_or_reset() pattern used by the
other managed SPI functions (devm_spi_optimize_message,
devm_spi_register_controller, etc.) and eliminates the need for drivers
to open-code their own devm cleanup callbacks for ancillary devices.

Acked-by: Nuno Sá <nuno.sa@analog.com>
Signed-off-by: Antoniu Miclaus <antoniu.miclaus@analog.com>
Link: https://patch.msgid.link/20260223162110.156746-3-antoniu.miclaus@analog.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 40 ++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi.h |  1 +
 2 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 8fbed4754de4..26cc10aa7533 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2747,6 +2747,46 @@ err_out:
 }
 EXPORT_SYMBOL_GPL(spi_new_ancillary_device);
 
+static void devm_spi_unregister_device(void *spi)
+{
+	spi_unregister_device(spi);
+}
+
+/**
+ * devm_spi_new_ancillary_device() - Register managed ancillary SPI device
+ * @spi:         Pointer to the main SPI device registering the ancillary device
+ * @chip_select: Chip Select of the ancillary device
+ *
+ * Register an ancillary SPI device; for example some chips have a chip-select
+ * for normal device usage and another one for setup/firmware upload.
+ *
+ * This is the managed version of spi_new_ancillary_device(). The ancillary
+ * device will be unregistered automatically when the parent SPI device is
+ * unregistered.
+ *
+ * This may only be called from main SPI device's probe routine.
+ *
+ * Return: Pointer to new ancillary device on success; ERR_PTR on failure
+ */
+struct spi_device *devm_spi_new_ancillary_device(struct spi_device *spi,
+						 u8 chip_select)
+{
+	struct spi_device *ancillary;
+	int ret;
+
+	ancillary = spi_new_ancillary_device(spi, chip_select);
+	if (IS_ERR(ancillary))
+		return ancillary;
+
+	ret = devm_add_action_or_reset(&spi->dev, devm_spi_unregister_device,
+				       ancillary);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return ancillary;
+}
+EXPORT_SYMBOL_GPL(devm_spi_new_ancillary_device);
+
 #ifdef CONFIG_ACPI
 struct acpi_spi_lookup {
 	struct spi_controller 	*ctlr;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index af7cfee7b8f6..1c9aab627583 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -387,6 +387,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
 }
 
 extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 chip_select);
+extern struct spi_device *devm_spi_new_ancillary_device(struct spi_device *spi, u8 chip_select);
 
 /* Use a define to avoid include chaining to get THIS_MODULE */
 #define spi_register_driver(driver) \
-- 
cgit v1.2.3


From 477174ac35c510d0ed3043f5bd4fba25546a21ce Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Tue, 24 Feb 2026 05:56:37 +0000
Subject: sched_ext: Optimize sched_ext_entity layout for cache locality

Reorder struct sched_ext_entity to place ops_state, ddsp_dsq_id, and
ddsp_enq_flags immediately after dsq. These fields are accessed together
in the do_enqueue_task() and finish_dispatch() hot paths but were
previously spread across three different cache lines. Grouping them on
the same cache line reduces cache misses on every enqueue and dispatch
operation.

Signed-off-by: David Carlier <devnexen@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched/ext.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 4601e5ecb43c..0150b3fe6230 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -163,6 +163,9 @@ struct scx_dsq_list_node {
  */
 struct sched_ext_entity {
 	struct scx_dispatch_q	*dsq;
+	atomic_long_t		ops_state;
+	u64			ddsp_dsq_id;
+	u64			ddsp_enq_flags;
 	struct scx_dsq_list_node dsq_list;	/* dispatch order */
 	struct rb_node		dsq_priq;	/* p->scx.dsq_vtime order */
 	u32			dsq_seq;
@@ -174,7 +177,6 @@ struct sched_ext_entity {
 	s32			selected_cpu;
 	u32			kf_mask;	/* see scx_kf_mask above */
 	struct task_struct	*kf_tasks[2];	/* see SCX_CALL_OP_TASK() */
-	atomic_long_t		ops_state;
 
 	struct list_head	runnable_node;	/* rq->scx.runnable_list */
 	unsigned long		runnable_at;
@@ -182,8 +184,6 @@ struct sched_ext_entity {
 #ifdef CONFIG_SCHED_CORE
 	u64			core_sched_at;	/* see scx_prio_less() */
 #endif
-	u64			ddsp_dsq_id;
-	u64			ddsp_enq_flags;
 
 	/* BPF scheduler modifiable fields */
 
-- 
cgit v1.2.3


From cd0aa651535092afd5d776bfe94e4fdf750f89c3 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Mon, 23 Feb 2026 09:34:41 +0000
Subject: net: stmmac: pass interface mode into fix_mac_speed() method

Pass the current interface mode reported by phylink into the
fix_mac_speed() method. This will be used by qcom-ethqos for its
"SGMII" configuration.

Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Tested-by: Mohd Ayaan Anwar <mohd.anwar@oss.qualcomm.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1vuSKv-0000000AScG-1zv6@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c |  3 ++-
 drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c         | 11 +++++++----
 drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c    |  3 ++-
 drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c |  3 ++-
 drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c     |  3 ++-
 drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c         | 11 +++++++----
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c       |  3 ++-
 include/linux/stmmac.h                                  |  3 ++-
 8 files changed, 26 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index d043bad4a862..0495437d3a6e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -112,7 +112,8 @@ static int dwc_qos_probe(struct platform_device *pdev,
 #define AUTO_CAL_STATUS 0x880c
 #define  AUTO_CAL_STATUS_ACTIVE BIT(31)
 
-static void tegra_eqos_fix_speed(void *bsp_priv, int speed, unsigned int mode)
+static void tegra_eqos_fix_speed(void *bsp_priv, phy_interface_t interface,
+				 int speed, unsigned int mode)
 {
 	struct tegra_eqos *eqos = bsp_priv;
 	bool needs_calibration = false;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index c4e85197629d..9f5a15b81f8a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -48,7 +48,8 @@ struct imx_dwmac_ops {
 
 	int (*fix_soc_reset)(struct stmmac_priv *priv);
 	int (*set_intf_mode)(struct imx_priv_data *dwmac, u8 phy_intf_sel);
-	void (*fix_mac_speed)(void *priv, int speed, unsigned int mode);
+	void (*fix_mac_speed)(void *priv, phy_interface_t interface,
+			      int speed, unsigned int mode);
 };
 
 struct imx_priv_data {
@@ -160,7 +161,8 @@ static int imx_dwmac_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i,
 	return stmmac_set_clk_tx_rate(bsp_priv, clk_tx_i, interface, speed);
 }
 
-static void imx_dwmac_fix_speed(void *priv, int speed, unsigned int mode)
+static void imx_dwmac_fix_speed(void *priv, phy_interface_t interface,
+				int speed, unsigned int mode)
 {
 	struct plat_stmmacenet_data *plat_dat;
 	struct imx_priv_data *dwmac = priv;
@@ -185,13 +187,14 @@ static void imx_dwmac_fix_speed(void *priv, int speed, unsigned int mode)
 		dev_err(dwmac->dev, "failed to set tx rate %lu\n", rate);
 }
 
-static void imx93_dwmac_fix_speed(void *priv, int speed, unsigned int mode)
+static void imx93_dwmac_fix_speed(void *priv, phy_interface_t interface,
+				  int speed, unsigned int mode)
 {
 	struct imx_priv_data *dwmac = priv;
 	unsigned int iface;
 	int ctrl, old_ctrl;
 
-	imx_dwmac_fix_speed(priv, speed, mode);
+	imx_dwmac_fix_speed(priv, interface, speed, mode);
 
 	if (!dwmac || mode != MLO_AN_FIXED)
 		return;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
index 815213223583..9c51c96223ad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
@@ -143,7 +143,8 @@ static struct stmmac_pci_info loongson_gmac_pci_info = {
 	.setup = loongson_gmac_data,
 };
 
-static void loongson_gnet_fix_speed(void *priv, int speed, unsigned int mode)
+static void loongson_gnet_fix_speed(void *priv, phy_interface_t interface,
+				    int speed, unsigned int mode)
 {
 	struct loongson_data *ld = (struct loongson_data *)priv;
 	struct net_device *ndev = dev_get_drvdata(ld->dev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index bd5d3bf90400..9b29516a5a7c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -643,7 +643,8 @@ static void ethqos_configure(struct qcom_ethqos *ethqos, int speed)
 	return ethqos->configure_func(ethqos, speed);
 }
 
-static void ethqos_fix_mac_speed(void *priv, int speed, unsigned int mode)
+static void ethqos_fix_mac_speed(void *priv, phy_interface_t interface,
+				 int speed, unsigned int mode)
 {
 	struct qcom_ethqos *ethqos = priv;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index 4c8991f3b38d..c6b99814d391 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -72,7 +72,8 @@ struct socfpga_dwmac {
 	const struct socfpga_dwmac_ops *ops;
 };
 
-static void socfpga_dwmac_fix_mac_speed(void *bsp_priv, int speed,
+static void socfpga_dwmac_fix_mac_speed(void *bsp_priv,
+					phy_interface_t interface, int speed,
 					unsigned int mode)
 {
 	struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)bsp_priv;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index f50547b67fbc..6ebbf95d158f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -91,11 +91,13 @@ struct sti_dwmac {
 	struct regmap *regmap;
 	bool gmac_en;
 	int speed;
-	void (*fix_retime_src)(void *priv, int speed, unsigned int mode);
+	void (*fix_retime_src)(void *priv, phy_interface_t interface,
+			       int speed, unsigned int mode);
 };
 
 struct sti_dwmac_of_data {
-	void (*fix_retime_src)(void *priv, int speed, unsigned int mode);
+	void (*fix_retime_src)(void *priv, phy_interface_t interface,
+			       int speed, unsigned int mode);
 };
 
 enum {
@@ -114,7 +116,8 @@ static u32 stih4xx_tx_retime_val[] = {
 				 | STIH4XX_ETH_SEL_INTERNAL_NOTEXT_PHYCLK,
 };
 
-static void stih4xx_fix_retime_src(void *priv, int spd, unsigned int mode)
+static void stih4xx_fix_retime_src(void *priv, phy_interface_t interface,
+				   int spd, unsigned int mode)
 {
 	struct sti_dwmac *dwmac = priv;
 	u32 src = dwmac->tx_retime_src;
@@ -170,7 +173,7 @@ static int sti_set_phy_intf_sel(void *bsp_priv, u8 phy_intf_sel)
 	val = (dwmac->interface == PHY_INTERFACE_MODE_REVMII) ? 0 : ENMII;
 	regmap_update_bits(regmap, reg, ENMII_MASK, val);
 
-	dwmac->fix_retime_src(dwmac, dwmac->speed, 0);
+	dwmac->fix_retime_src(dwmac, dwmac->interface, dwmac->speed, 0);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 82375d34ad57..d7c730179a7f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1071,7 +1071,8 @@ static void stmmac_mac_link_up(struct phylink_config *config,
 	}
 
 	if (priv->plat->fix_mac_speed)
-		priv->plat->fix_mac_speed(priv->plat->bsp_priv, speed, mode);
+		priv->plat->fix_mac_speed(priv->plat->bsp_priv, interface,
+					  speed, mode);
 
 	if (!duplex)
 		ctrl &= ~priv->hw->link.duplex;
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 32352a216567..b96ae9dadfab 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -256,7 +256,8 @@ struct plat_stmmacenet_data {
 	int (*set_phy_intf_sel)(void *priv, u8 phy_intf_sel);
 	int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i,
 			       phy_interface_t interface, int speed);
-	void (*fix_mac_speed)(void *priv, int speed, unsigned int mode);
+	void (*fix_mac_speed)(void *priv, phy_interface_t interface,
+			      int speed, unsigned int mode);
 	int (*fix_soc_reset)(struct stmmac_priv *priv);
 	int (*serdes_powerup)(struct net_device *ndev, void *priv);
 	void (*serdes_powerdown)(struct net_device *ndev, void *priv);
-- 
cgit v1.2.3


From 46a9d97069cab311738c950d0fcef85a459c7b8f Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Tue, 24 Feb 2026 11:06:38 +0900
Subject: ata: libata-eh: avoid unnecessary calls to
 ata_scsi_port_error_handler()

When handling SCSI command timeouts, if we had no actual command
timeouts (either because the command was a deferred qc or the completion
path won the race with ata_scsi_cmd_error_handler()), we do not need to
go through a port error handling, as there was in fact no errors at all.

Modify ata_scsi_cmd_error_handler() to return the number of commands
that timed out and use this return value in ata_scsi_error() to call
ata_scsi_port_error_handler() only if we had command timeouts, or if
the port EH has already been scheduled due to failed commands.
Otherwise, simply call scsi_eh_flush_done_q() to finish the completed
commands without running the full port error handling.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 drivers/ata/libata-eh.c | 28 +++++++++++++++++++---------
 include/linux/libata.h  |  3 ++-
 2 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 72a22b6c9682..12c6740398fa 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -560,21 +560,27 @@ void ata_scsi_error(struct Scsi_Host *host)
 {
 	struct ata_port *ap = ata_shost_to_port(host);
 	unsigned long flags;
+	int nr_timedout;
 	LIST_HEAD(eh_work_q);
 
 	spin_lock_irqsave(host->host_lock, flags);
 	list_splice_init(&host->eh_cmd_q, &eh_work_q);
 	spin_unlock_irqrestore(host->host_lock, flags);
 
-	ata_scsi_cmd_error_handler(host, ap, &eh_work_q);
-
-	/* If we timed raced normal completion and there is nothing to
-	   recover nr_timedout == 0 why exactly are we doing error recovery ? */
-	ata_scsi_port_error_handler(host, ap);
+	/*
+	 * First check what errors we got with ata_scsi_cmd_error_handler().
+	 * If we had no command timeouts and EH is not scheduled for this port,
+	 * meaning that we do not have any failed command, then there is no
+	 * need to go through the full port error handling. We only need to
+	 * flush the completed commands we have.
+	 */
+	nr_timedout = ata_scsi_cmd_error_handler(host, ap, &eh_work_q);
+	if (nr_timedout || ata_port_eh_scheduled(ap))
+		ata_scsi_port_error_handler(host, ap);
+	else
+		scsi_eh_flush_done_q(&ap->eh_done_q);
 
-	/* finish or retry handled scmd's and clean up */
 	WARN_ON(!list_empty(&eh_work_q));
-
 }
 
 /**
@@ -586,9 +592,11 @@ void ata_scsi_error(struct Scsi_Host *host)
  * process the given list of commands and return those finished to the
  * ap->eh_done_q.  This function is the first part of the libata error
  * handler which processes a given list of failed commands.
+ *
+ * Return the number of commands that timed out.
  */
-void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
-				struct list_head *eh_work_q)
+int ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
+			       struct list_head *eh_work_q)
 {
 	int i;
 	unsigned long flags;
@@ -678,6 +686,8 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
 	ap->eh_tries = ATA_EH_MAX_TRIES;
 
 	spin_unlock_irqrestore(ap->lock, flags);
+
+	return nr_timedout;
 }
 EXPORT_SYMBOL(ata_scsi_cmd_error_handler);
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index db87c99e4189..5c085ef4eda7 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1225,7 +1225,8 @@ extern int ata_ncq_prio_enable(struct ata_port *ap, struct scsi_device *sdev,
 extern struct ata_device *ata_dev_pair(struct ata_device *adev);
 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev);
 extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap);
-extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, struct list_head *eh_q);
+int ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
+			       struct list_head *eh_q);
 
 /*
  * SATA specific code - drivers/ata/libata-sata.c
-- 
cgit v1.2.3


From d9d5e1bdd18074ea27985c777ddc3a8a0b007468 Mon Sep 17 00:00:00 2001
From: Koichiro Den <den@valinux.co.jp>
Date: Mon, 16 Feb 2026 00:22:16 +0900
Subject: dmaengine: dw-edma: Add virtual IRQ for interrupt-emulation doorbells

Interrupt emulation can assert the dw-edma IRQ line without updating the
DONE/ABORT bits. With the shared read/write/common IRQ handlers, the
driver cannot reliably distinguish such an emulated interrupt from a
real one and leaving a level IRQ asserted may wedge the line.

Allocate a dedicated, requestable Linux virtual IRQ (db_irq) for
interrupt emulation and attach an irq_chip whose .irq_ack runs the
core-specific deassert sequence (.ack_emulated_irq()). The physical
dw-edma interrupt handlers raise this virtual IRQ via
generic_handle_irq(), ensuring emulated IRQs are always deasserted.

Export the virtual IRQ number (db_irq) and the doorbell register offset
(db_offset) via struct dw_edma_chip so platform users can expose
interrupt emulation as a doorbell.

Without this, a single interrupt-emulation write can leave the level IRQ
line asserted and cause the generic IRQ layer to disable it.

Signed-off-by: Koichiro Den <den@valinux.co.jp>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260215152216.3393561-3-den@valinux.co.jp
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dw-edma/dw-edma-core.c | 127 +++++++++++++++++++++++++++++++++++--
 include/linux/dma/edma.h           |   6 ++
 2 files changed, 128 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c
index e7d698b352d3..cd34a3ea602d 100644
--- a/drivers/dma/dw-edma/dw-edma-core.c
+++ b/drivers/dma/dw-edma/dw-edma-core.c
@@ -663,7 +663,96 @@ static void dw_edma_abort_interrupt(struct dw_edma_chan *chan)
 	chan->status = EDMA_ST_IDLE;
 }
 
-static inline irqreturn_t dw_edma_interrupt_write(int irq, void *data)
+static void dw_edma_emul_irq_ack(struct irq_data *d)
+{
+	struct dw_edma *dw = irq_data_get_irq_chip_data(d);
+
+	dw_edma_core_ack_emulated_irq(dw);
+}
+
+/*
+ * irq_chip implementation for interrupt-emulation doorbells.
+ *
+ * The emulated source has no mask/unmask mechanism. With handle_level_irq(),
+ * the flow is therefore:
+ *   1) .irq_ack() deasserts the source
+ *   2) registered handlers (if any) are dispatched
+ * Since deassertion is already done in .irq_ack(), handlers do not need to take
+ * care of it, hence IRQCHIP_ONESHOT_SAFE.
+ */
+static struct irq_chip dw_edma_emul_irqchip = {
+	.name		= "dw-edma-emul",
+	.irq_ack	= dw_edma_emul_irq_ack,
+	.flags		= IRQCHIP_ONESHOT_SAFE | IRQCHIP_SKIP_SET_WAKE,
+};
+
+static int dw_edma_emul_irq_alloc(struct dw_edma *dw)
+{
+	struct dw_edma_chip *chip = dw->chip;
+	int virq;
+
+	chip->db_irq = 0;
+	chip->db_offset = ~0;
+
+	/*
+	 * Only meaningful when the core provides the deassert sequence
+	 * for interrupt emulation.
+	 */
+	if (!dw->core->ack_emulated_irq)
+		return 0;
+
+	/*
+	 * Allocate a single, requestable Linux virtual IRQ number.
+	 * Use >= 1 so that 0 can remain a "not available" sentinel.
+	 */
+	virq = irq_alloc_desc(NUMA_NO_NODE);
+	if (virq < 0)
+		return virq;
+
+	irq_set_chip_and_handler(virq, &dw_edma_emul_irqchip, handle_level_irq);
+	irq_set_chip_data(virq, dw);
+	irq_set_noprobe(virq);
+
+	chip->db_irq = virq;
+	chip->db_offset = dw_edma_core_db_offset(dw);
+
+	return 0;
+}
+
+static void dw_edma_emul_irq_free(struct dw_edma *dw)
+{
+	struct dw_edma_chip *chip = dw->chip;
+
+	if (!chip)
+		return;
+	if (chip->db_irq <= 0)
+		return;
+
+	irq_free_descs(chip->db_irq, 1);
+	chip->db_irq = 0;
+	chip->db_offset = ~0;
+}
+
+static inline irqreturn_t dw_edma_interrupt_emulated(void *data)
+{
+	struct dw_edma_irq *dw_irq = data;
+	struct dw_edma *dw = dw_irq->dw;
+	int db_irq = dw->chip->db_irq;
+
+	if (db_irq > 0) {
+		/*
+		 * Interrupt emulation may assert the IRQ line without updating the
+		 * normal DONE/ABORT status bits. With a shared IRQ handler we
+		 * cannot reliably detect such events by status registers alone, so
+		 * always perform the core-specific deassert sequence.
+		 */
+		generic_handle_irq(db_irq);
+		return IRQ_HANDLED;
+	}
+	return IRQ_NONE;
+}
+
+static inline irqreturn_t dw_edma_interrupt_write_inner(int irq, void *data)
 {
 	struct dw_edma_irq *dw_irq = data;
 
@@ -672,7 +761,7 @@ static inline irqreturn_t dw_edma_interrupt_write(int irq, void *data)
 				       dw_edma_abort_interrupt);
 }
 
-static inline irqreturn_t dw_edma_interrupt_read(int irq, void *data)
+static inline irqreturn_t dw_edma_interrupt_read_inner(int irq, void *data)
 {
 	struct dw_edma_irq *dw_irq = data;
 
@@ -681,12 +770,33 @@ static inline irqreturn_t dw_edma_interrupt_read(int irq, void *data)
 				       dw_edma_abort_interrupt);
 }
 
-static irqreturn_t dw_edma_interrupt_common(int irq, void *data)
+static inline irqreturn_t dw_edma_interrupt_write(int irq, void *data)
+{
+	irqreturn_t ret = IRQ_NONE;
+
+	ret |= dw_edma_interrupt_write_inner(irq, data);
+	ret |= dw_edma_interrupt_emulated(data);
+
+	return ret;
+}
+
+static inline irqreturn_t dw_edma_interrupt_read(int irq, void *data)
 {
 	irqreturn_t ret = IRQ_NONE;
 
-	ret |= dw_edma_interrupt_write(irq, data);
-	ret |= dw_edma_interrupt_read(irq, data);
+	ret |= dw_edma_interrupt_read_inner(irq, data);
+	ret |= dw_edma_interrupt_emulated(data);
+
+	return ret;
+}
+
+static inline irqreturn_t dw_edma_interrupt_common(int irq, void *data)
+{
+	irqreturn_t ret = IRQ_NONE;
+
+	ret |= dw_edma_interrupt_write_inner(irq, data);
+	ret |= dw_edma_interrupt_read_inner(irq, data);
+	ret |= dw_edma_interrupt_emulated(data);
 
 	return ret;
 }
@@ -973,6 +1083,11 @@ int dw_edma_probe(struct dw_edma_chip *chip)
 	if (err)
 		return err;
 
+	/* Allocate a dedicated virtual IRQ for interrupt-emulation doorbells */
+	err = dw_edma_emul_irq_alloc(dw);
+	if (err)
+		dev_warn(dev, "Failed to allocate emulation IRQ: %d\n", err);
+
 	/* Setup write/read channels */
 	err = dw_edma_channel_setup(dw, wr_alloc, rd_alloc);
 	if (err)
@@ -988,6 +1103,7 @@ int dw_edma_probe(struct dw_edma_chip *chip)
 err_irq_free:
 	for (i = (dw->nr_irqs - 1); i >= 0; i--)
 		free_irq(chip->ops->irq_vector(dev, i), &dw->irq[i]);
+	dw_edma_emul_irq_free(dw);
 
 	return err;
 }
@@ -1010,6 +1126,7 @@ int dw_edma_remove(struct dw_edma_chip *chip)
 	/* Free irqs */
 	for (i = (dw->nr_irqs - 1); i >= 0; i--)
 		free_irq(chip->ops->irq_vector(dev, i), &dw->irq[i]);
+	dw_edma_emul_irq_free(dw);
 
 	/* Deregister eDMA device */
 	dma_async_device_unregister(&dw->dma);
diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h
index 270b5458aecf..9da53c75e49b 100644
--- a/include/linux/dma/edma.h
+++ b/include/linux/dma/edma.h
@@ -73,6 +73,8 @@ enum dw_edma_chip_flags {
  * @ll_region_rd:	 DMA descriptor link list memory for read channel
  * @dt_region_wr:	 DMA data memory for write channel
  * @dt_region_rd:	 DMA data memory for read channel
+ * @db_irq:		 Virtual IRQ dedicated to interrupt emulation
+ * @db_offset:		 Offset from DMA register base
  * @mf:			 DMA register map format
  * @dw:			 struct dw_edma that is filled by dw_edma_probe()
  */
@@ -94,6 +96,10 @@ struct dw_edma_chip {
 	struct dw_edma_region	dt_region_wr[EDMA_MAX_WR_CH];
 	struct dw_edma_region	dt_region_rd[EDMA_MAX_RD_CH];
 
+	/* interrupt emulation */
+	int			db_irq;
+	resource_size_t		db_offset;
+
 	enum dw_edma_map_format	mf;
 
 	struct dw_edma		*dw;
-- 
cgit v1.2.3


From 0289ada4a31661016a0611a41a4886bb958e9985 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@arm.com>
Date: Mon, 9 Feb 2026 12:44:33 +0000
Subject: coresight: Fix memory leak in coresight_alloc_device_name()

The memory leak detector reports:

  echo clear > /sys/kernel/debug/kmemleak
  modprobe coresight_funnel
  rmmod coresight_funnel

  # Scan memory leak and report it
  echo scan > /sys/kernel/debug/kmemleak
  cat /sys/kernel/debug/kmemleak
  unreferenced object 0xffff0008020c7200 (size 64):
    comm "modprobe", pid 410, jiffies 4295333721
    hex dump (first 32 bytes):
      d8 da fe 7e 09 00 ff ff e8 2e ff 7e 09 00 ff ff  ...~.......~....
      b0 6c ff 7e 09 00 ff ff 30 83 00 7f 09 00 ff ff  .l.~....0.......
    backtrace (crc 4116a690):
      kmemleak_alloc+0xd8/0xf8
      __kmalloc_node_track_caller_noprof+0x2c8/0x6f0
      krealloc_node_align_noprof+0x13c/0x2c8
      coresight_alloc_device_name+0xe4/0x158 [coresight]
      0xffffd327ecef8394
      0xffffd327ecef85ec
      amba_probe+0x118/0x1c8
      really_probe+0xc8/0x3f0
      __driver_probe_device+0x88/0x190
      driver_probe_device+0x44/0x120
      __driver_attach+0x100/0x238
      bus_for_each_dev+0x84/0xf0
      driver_attach+0x2c/0x40
      bus_add_driver+0x128/0x258
      driver_register+0x64/0x138
      __amba_driver_register+0x2c/0x48

The memory leak is caused by not freeing the device list that maintains
device indices.

This device list preserves stable device indices across unbind and
rebind device operations, so it does not share the same lifetime as a
device instances and must only be freed when the module is unloaded.

Some modules do not implement a module exit callback because they are
registered using module_platform_driver().  As a result, the device
list cannot be released during module exit for those modules.

Fix this by moving the device list into the core layer.  As a general
solution, instead of maintaining a static list in each driver, drivers
now allocate device lists via coresight_allocate_device_list() and
device indices via coresight_allocate_device_idx().

The list is released only when the core module is unloaded by calling
coresight_release_device_list(), avoiding the leak.

Fixes: 0f5f9b6ba9e1 ("coresight: Use platform agnostic names")
Reviewed-by: James Clark <james.clark@linaro.org>
Signed-off-by: Leo Yan <leo.yan@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20260209-arm_coresight_refactor_dev_register-v4-1-62d6042f76f7@arm.com
---
 drivers/hwtracing/coresight/coresight-catu.c       |   4 +-
 drivers/hwtracing/coresight/coresight-core.c       | 127 +++++++++++++++------
 drivers/hwtracing/coresight/coresight-ctcu-core.c  |   4 +-
 drivers/hwtracing/coresight/coresight-cti-core.c   |  19 ++-
 drivers/hwtracing/coresight/coresight-dummy.c      |   7 +-
 drivers/hwtracing/coresight/coresight-etb10.c      |   4 +-
 drivers/hwtracing/coresight/coresight-funnel.c     |   4 +-
 drivers/hwtracing/coresight/coresight-replicator.c |   4 +-
 drivers/hwtracing/coresight/coresight-stm.c        |   4 +-
 drivers/hwtracing/coresight/coresight-tmc-core.c   |  12 +-
 drivers/hwtracing/coresight/coresight-tnoc.c       |   4 +-
 drivers/hwtracing/coresight/coresight-tpda.c       |   4 +-
 drivers/hwtracing/coresight/coresight-tpdm.c       |   4 +-
 drivers/hwtracing/coresight/coresight-tpiu.c       |   4 +-
 drivers/hwtracing/coresight/ultrasoc-smb.c         |   4 +-
 include/linux/coresight.h                          |  14 +--
 16 files changed, 120 insertions(+), 103 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c
index dfd035852b12..ce71dcddfca2 100644
--- a/drivers/hwtracing/coresight/coresight-catu.c
+++ b/drivers/hwtracing/coresight/coresight-catu.c
@@ -30,8 +30,6 @@
 #define catu_dbg(x, ...) do {} while (0)
 #endif
 
-DEFINE_CORESIGHT_DEVLIST(catu_devs, "catu");
-
 struct catu_etr_buf {
 	struct tmc_sg_table *catu_table;
 	dma_addr_t sladdr;
@@ -530,7 +528,7 @@ static int __catu_probe(struct device *dev, struct resource *res)
 	if (ret)
 		return ret;
 
-	catu_desc.name = coresight_alloc_device_name(&catu_devs, dev);
+	catu_desc.name = coresight_alloc_device_name("catu", dev);
 	if (!catu_desc.name)
 		return -ENOMEM;
 
diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 80e26396ad0a..6881fdc5da92 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -53,6 +53,9 @@ struct coresight_node {
 const u32 coresight_barrier_pkt[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
 EXPORT_SYMBOL_GPL(coresight_barrier_pkt);
 
+/* List maintains the device index */
+static LIST_HEAD(coresight_dev_idx_list);
+
 static const struct cti_assoc_op *cti_assoc_ops;
 
 void coresight_set_cti_ops(const struct cti_assoc_op *cti_op)
@@ -1438,22 +1441,55 @@ void coresight_unregister(struct coresight_device *csdev)
 }
 EXPORT_SYMBOL_GPL(coresight_unregister);
 
+static struct coresight_dev_list *
+coresight_allocate_device_list(const char *prefix)
+{
+	struct coresight_dev_list *list;
 
-/*
- * coresight_search_device_idx - Search the fwnode handle of a device
- * in the given dev_idx list. Must be called with the coresight_mutex held.
- *
- * Returns the index of the entry, when found. Otherwise, -ENOENT.
- */
-static int coresight_search_device_idx(struct coresight_dev_list *dict,
-				       struct fwnode_handle *fwnode)
+	/* Check if have already allocated */
+	list_for_each_entry(list, &coresight_dev_idx_list, node) {
+		if (!strcmp(list->pfx, prefix))
+			return list;
+	}
+
+	list = kzalloc(sizeof(*list), GFP_KERNEL);
+	if (!list)
+		return NULL;
+
+	list->pfx = kstrdup(prefix, GFP_KERNEL);
+	if (!list->pfx) {
+		kfree(list);
+		return NULL;
+	}
+
+	list_add(&list->node, &coresight_dev_idx_list);
+	return list;
+}
+
+static int coresight_allocate_device_idx(struct coresight_dev_list *list,
+					 struct device *dev)
 {
-	int i;
+	struct fwnode_handle **fwnode_list;
+	struct fwnode_handle *fwnode = dev_fwnode(dev);
+	int idx;
+
+	for (idx = 0; idx < list->nr_idx; idx++)
+		if (list->fwnode_list[idx] == fwnode)
+			return idx;
+
+	/* Make space for the new entry */
+	idx = list->nr_idx;
+	fwnode_list = krealloc_array(list->fwnode_list,
+				     idx + 1, sizeof(*list->fwnode_list),
+				     GFP_KERNEL);
+	if (!fwnode_list)
+		return -ENOMEM;
 
-	for (i = 0; i < dict->nr_idx; i++)
-		if (dict->fwnode_list[i] == fwnode)
-			return i;
-	return -ENOENT;
+	fwnode_list[idx] = fwnode;
+	list->fwnode_list = fwnode_list;
+	list->nr_idx = idx + 1;
+
+	return idx;
 }
 
 static bool coresight_compare_type(enum coresight_dev_type type_a,
@@ -1527,45 +1563,63 @@ bool coresight_loses_context_with_cpu(struct device *dev)
 EXPORT_SYMBOL_GPL(coresight_loses_context_with_cpu);
 
 /*
- * coresight_alloc_device_name - Get an index for a given device in the
- * device index list specific to a driver. An index is allocated for a
- * device and is tracked with the fwnode_handle to prevent allocating
+ * coresight_alloc_device_name - Get an index for a given device in the list
+ * specific to a driver (presented by the prefix string). An index is allocated
+ * for a device and is tracked with the fwnode_handle to prevent allocating
  * duplicate indices for the same device (e.g, if we defer probing of
  * a device due to dependencies), in case the index is requested again.
  */
-char *coresight_alloc_device_name(struct coresight_dev_list *dict,
-				  struct device *dev)
+char *coresight_alloc_device_name(const char *prefix, struct device *dev)
 {
-	int idx;
+	struct coresight_dev_list *list;
 	char *name = NULL;
-	struct fwnode_handle **list;
+	int idx;
 
 	mutex_lock(&coresight_mutex);
 
-	idx = coresight_search_device_idx(dict, dev_fwnode(dev));
-	if (idx < 0) {
-		/* Make space for the new entry */
-		idx = dict->nr_idx;
-		list = krealloc_array(dict->fwnode_list,
-				      idx + 1, sizeof(*dict->fwnode_list),
-				      GFP_KERNEL);
-		if (ZERO_OR_NULL_PTR(list)) {
-			idx = -ENOMEM;
-			goto done;
-		}
+	list = coresight_allocate_device_list(prefix);
+	if (!list)
+		goto done;
 
-		list[idx] = dev_fwnode(dev);
-		dict->fwnode_list = list;
-		dict->nr_idx = idx + 1;
-	}
+	idx = coresight_allocate_device_idx(list, dev);
 
-	name = devm_kasprintf(dev, GFP_KERNEL, "%s%d", dict->pfx, idx);
+	/*
+	 * If index allocation fails, the device list is not released here;
+	 * it is instead freed later by coresight_release_device_list() when
+	 * the coresight_core module is unloaded.
+	 */
+	if (idx < 0)
+		goto done;
+
+	name = devm_kasprintf(dev, GFP_KERNEL, "%s%d", list->pfx, idx);
 done:
 	mutex_unlock(&coresight_mutex);
 	return name;
 }
 EXPORT_SYMBOL_GPL(coresight_alloc_device_name);
 
+static void coresight_release_device_list(void)
+{
+	struct coresight_dev_list *list, *next;
+	int i;
+
+	/*
+	 * Here is no need to take coresight_mutex; this is during core module
+	 * unloading, no race condition with other modules.
+	 */
+
+	list_for_each_entry_safe(list, next, &coresight_dev_idx_list, node) {
+		for (i = 0; i < list->nr_idx; i++)
+			list->fwnode_list[i] = NULL;
+		list->nr_idx = 0;
+		list_del(&list->node);
+
+		kfree(list->pfx);
+		kfree(list->fwnode_list);
+		kfree(list);
+	}
+}
+
 const struct bus_type coresight_bustype = {
 	.name	= "coresight",
 };
@@ -1639,6 +1693,7 @@ static void __exit coresight_exit(void)
 					     &coresight_notifier);
 	etm_perf_exit();
 	bus_unregister(&coresight_bustype);
+	coresight_release_device_list();
 }
 
 module_init(coresight_init);
diff --git a/drivers/hwtracing/coresight/coresight-ctcu-core.c b/drivers/hwtracing/coresight/coresight-ctcu-core.c
index abed15eb72b4..6813ae6e929b 100644
--- a/drivers/hwtracing/coresight/coresight-ctcu-core.c
+++ b/drivers/hwtracing/coresight/coresight-ctcu-core.c
@@ -19,8 +19,6 @@
 #include "coresight-ctcu.h"
 #include "coresight-priv.h"
 
-DEFINE_CORESIGHT_DEVLIST(ctcu_devs, "ctcu");
-
 #define ctcu_writel(drvdata, val, offset)	__raw_writel((val), drvdata->base + offset)
 #define ctcu_readl(drvdata, offset)		__raw_readl(drvdata->base + offset)
 
@@ -187,7 +185,7 @@ static int ctcu_probe(struct platform_device *pdev)
 	void __iomem *base;
 	int i, ret;
 
-	desc.name = coresight_alloc_device_name(&ctcu_devs, dev);
+	desc.name = coresight_alloc_device_name("ctcu", dev);
 	if (!desc.name)
 		return -ENOMEM;
 
diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c
index 7a8f1ef6b94e..fddc8f31b91d 100644
--- a/drivers/hwtracing/coresight/coresight-cti-core.c
+++ b/drivers/hwtracing/coresight/coresight-cti-core.c
@@ -48,15 +48,6 @@ static int nr_cti_cpu;
 /* quick lookup list for CPU bound CTIs when power handling */
 static struct cti_drvdata *cti_cpu_drvdata[NR_CPUS];
 
-/*
- * CTI naming. CTI bound to cores will have the name cti_cpu<N> where
- * N is the CPU ID. System CTIs will have the name cti_sys<I> where I
- * is an index allocated by order of discovery.
- *
- * CTI device name list - for CTI not bound to cores.
- */
-DEFINE_CORESIGHT_DEVLIST(cti_sys_devs, "cti_sys");
-
 /* write set of regs to hardware - call with spinlock claimed */
 void cti_write_all_hw_regs(struct cti_drvdata *drvdata)
 {
@@ -889,12 +880,18 @@ static int cti_probe(struct amba_device *adev, const struct amba_id *id)
 	/* default to powered - could change on PM notifications */
 	drvdata->config.hw_powered = true;
 
-	/* set up device name - will depend if cpu bound or otherwise */
+	/*
+	 * Set up device name - will depend if cpu bound or otherwise.
+	 *
+	 * CTI bound to cores will have the name cti_cpu<N> where N is th
+	 * eCPU ID. System CTIs will have the name cti_sys<I> where I is an
+	 * index allocated by order of discovery.
+	 */
 	if (drvdata->ctidev.cpu >= 0)
 		cti_desc.name = devm_kasprintf(dev, GFP_KERNEL, "cti_cpu%d",
 					       drvdata->ctidev.cpu);
 	else
-		cti_desc.name = coresight_alloc_device_name(&cti_sys_devs, dev);
+		cti_desc.name = coresight_alloc_device_name("cti_sys", dev);
 	if (!cti_desc.name)
 		return -ENOMEM;
 
diff --git a/drivers/hwtracing/coresight/coresight-dummy.c b/drivers/hwtracing/coresight/coresight-dummy.c
index 14322c99e29d..c176a2f57300 100644
--- a/drivers/hwtracing/coresight/coresight-dummy.c
+++ b/drivers/hwtracing/coresight/coresight-dummy.c
@@ -19,9 +19,6 @@ struct dummy_drvdata {
 	u8				traceid;
 };
 
-DEFINE_CORESIGHT_DEVLIST(source_devs, "dummy_source");
-DEFINE_CORESIGHT_DEVLIST(sink_devs, "dummy_sink");
-
 static int dummy_source_enable(struct coresight_device *csdev,
 			       struct perf_event *event, enum cs_mode mode,
 			       __maybe_unused struct coresight_path *path)
@@ -126,7 +123,7 @@ static int dummy_probe(struct platform_device *pdev)
 
 	if (of_device_is_compatible(node, "arm,coresight-dummy-source")) {
 
-		desc.name = coresight_alloc_device_name(&source_devs, dev);
+		desc.name = coresight_alloc_device_name("dummy_source", dev);
 		if (!desc.name)
 			return -ENOMEM;
 
@@ -155,7 +152,7 @@ static int dummy_probe(struct platform_device *pdev)
 		drvdata->traceid = (u8)trace_id;
 
 	} else if (of_device_is_compatible(node, "arm,coresight-dummy-sink")) {
-		desc.name = coresight_alloc_device_name(&sink_devs, dev);
+		desc.name = coresight_alloc_device_name("dummy_sink", dev);
 		if (!desc.name)
 			return -ENOMEM;
 
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index 6657602d8f2e..b952a1d47f12 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -63,8 +63,6 @@
 #define ETB_FFSR_BIT		1
 #define ETB_FRAME_SIZE_WORDS	4
 
-DEFINE_CORESIGHT_DEVLIST(etb_devs, "etb");
-
 /**
  * struct etb_drvdata - specifics associated to an ETB component
  * @base:	memory mapped base address for this component.
@@ -722,7 +720,7 @@ static int etb_probe(struct amba_device *adev, const struct amba_id *id)
 	struct resource *res = &adev->res;
 	struct coresight_desc desc = { 0 };
 
-	desc.name = coresight_alloc_device_name(&etb_devs, dev);
+	desc.name = coresight_alloc_device_name("etb", dev);
 	if (!desc.name)
 		return -ENOMEM;
 
diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c
index 3b248e54471a..3f56ceccd8c9 100644
--- a/drivers/hwtracing/coresight/coresight-funnel.c
+++ b/drivers/hwtracing/coresight/coresight-funnel.c
@@ -30,8 +30,6 @@
 #define FUNNEL_HOLDTIME		(0x7 << FUNNEL_HOLDTIME_SHFT)
 #define FUNNEL_ENSx_MASK	0xff
 
-DEFINE_CORESIGHT_DEVLIST(funnel_devs, "funnel");
-
 /**
  * struct funnel_drvdata - specifics associated to a funnel component
  * @base:	memory mapped base address for this component.
@@ -223,7 +221,7 @@ static int funnel_probe(struct device *dev, struct resource *res)
 	    of_device_is_compatible(dev->of_node, "arm,coresight-funnel"))
 		dev_warn_once(dev, "Uses OBSOLETE CoreSight funnel binding\n");
 
-	desc.name = coresight_alloc_device_name(&funnel_devs, dev);
+	desc.name = coresight_alloc_device_name("funnel", dev);
 	if (!desc.name)
 		return -ENOMEM;
 
diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c
index e6472658235d..07fc04f53b88 100644
--- a/drivers/hwtracing/coresight/coresight-replicator.c
+++ b/drivers/hwtracing/coresight/coresight-replicator.c
@@ -24,8 +24,6 @@
 #define REPLICATOR_IDFILTER0		0x000
 #define REPLICATOR_IDFILTER1		0x004
 
-DEFINE_CORESIGHT_DEVLIST(replicator_devs, "replicator");
-
 /**
  * struct replicator_drvdata - specifics associated to a replicator component
  * @base:	memory mapped base address for this component. Also indicates
@@ -230,7 +228,7 @@ static int replicator_probe(struct device *dev, struct resource *res)
 		dev_warn_once(dev,
 			      "Uses OBSOLETE CoreSight replicator binding\n");
 
-	desc.name = coresight_alloc_device_name(&replicator_devs, dev);
+	desc.name = coresight_alloc_device_name("replicator", dev);
 	if (!desc.name)
 		return -ENOMEM;
 
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
index e68529bf89c9..aca6cec7885a 100644
--- a/drivers/hwtracing/coresight/coresight-stm.c
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -110,8 +110,6 @@ struct channel_space {
 	unsigned long		*guaranteed;
 };
 
-DEFINE_CORESIGHT_DEVLIST(stm_devs, "stm");
-
 /**
  * struct stm_drvdata - specifics associated to an STM component
  * @base:		memory mapped base address for this component.
@@ -834,7 +832,7 @@ static int __stm_probe(struct device *dev, struct resource *res)
 	struct resource ch_res;
 	struct coresight_desc desc = { 0 };
 
-	desc.name = coresight_alloc_device_name(&stm_devs, dev);
+	desc.name = coresight_alloc_device_name("stm", dev);
 	if (!desc.name)
 		return -ENOMEM;
 
diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c
index 36599c431be6..58b469ee73b4 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-core.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-core.c
@@ -32,10 +32,6 @@
 #include "coresight-priv.h"
 #include "coresight-tmc.h"
 
-DEFINE_CORESIGHT_DEVLIST(etb_devs, "tmc_etb");
-DEFINE_CORESIGHT_DEVLIST(etf_devs, "tmc_etf");
-DEFINE_CORESIGHT_DEVLIST(etr_devs, "tmc_etr");
-
 int tmc_wait_for_tmcready(struct tmc_drvdata *drvdata)
 {
 	struct coresight_device *csdev = drvdata->csdev;
@@ -777,7 +773,7 @@ static int __tmc_probe(struct device *dev, struct resource *res)
 	struct coresight_platform_data *pdata = NULL;
 	struct tmc_drvdata *drvdata;
 	struct coresight_desc desc = { 0 };
-	struct coresight_dev_list *dev_list = NULL;
+	const char *dev_list = NULL;
 
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
@@ -827,7 +823,7 @@ static int __tmc_probe(struct device *dev, struct resource *res)
 		desc.type = CORESIGHT_DEV_TYPE_SINK;
 		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
 		desc.ops = &tmc_etb_cs_ops;
-		dev_list = &etb_devs;
+		dev_list = "tmc_etb";
 		break;
 	case TMC_CONFIG_TYPE_ETR:
 		desc.groups = coresight_etr_groups;
@@ -839,7 +835,7 @@ static int __tmc_probe(struct device *dev, struct resource *res)
 			goto out;
 		idr_init(&drvdata->idr);
 		mutex_init(&drvdata->idr_mutex);
-		dev_list = &etr_devs;
+		dev_list = "tmc_etr";
 		break;
 	case TMC_CONFIG_TYPE_ETF:
 		desc.groups = coresight_etf_groups;
@@ -847,7 +843,7 @@ static int __tmc_probe(struct device *dev, struct resource *res)
 		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
 		desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_FIFO;
 		desc.ops = &tmc_etf_cs_ops;
-		dev_list = &etf_devs;
+		dev_list = "tmc_etf";
 		break;
 	default:
 		pr_err("%s: Unsupported TMC config\n", desc.name);
diff --git a/drivers/hwtracing/coresight/coresight-tnoc.c b/drivers/hwtracing/coresight/coresight-tnoc.c
index 1128612e70a7..96a25877b824 100644
--- a/drivers/hwtracing/coresight/coresight-tnoc.c
+++ b/drivers/hwtracing/coresight/coresight-tnoc.c
@@ -47,8 +47,6 @@ struct trace_noc_drvdata {
 	int			atid;
 };
 
-DEFINE_CORESIGHT_DEVLIST(trace_noc_devs, "traceNoc");
-
 static void trace_noc_enable_hw(struct trace_noc_drvdata *drvdata)
 {
 	u32 val;
@@ -191,7 +189,7 @@ static int _tnoc_probe(struct device *dev, struct resource *res)
 	struct coresight_desc desc = { 0 };
 	int ret;
 
-	desc.name = coresight_alloc_device_name(&trace_noc_devs, dev);
+	desc.name = coresight_alloc_device_name("traceNoc", dev);
 	if (!desc.name)
 		return -ENOMEM;
 
diff --git a/drivers/hwtracing/coresight/coresight-tpda.c b/drivers/hwtracing/coresight/coresight-tpda.c
index 7055f8f13427..89c8f71f0aff 100644
--- a/drivers/hwtracing/coresight/coresight-tpda.c
+++ b/drivers/hwtracing/coresight/coresight-tpda.c
@@ -20,8 +20,6 @@
 #include "coresight-trace-id.h"
 #include "coresight-tpdm.h"
 
-DEFINE_CORESIGHT_DEVLIST(tpda_devs, "tpda");
-
 static void tpda_clear_element_size(struct coresight_device *csdev)
 {
 	struct tpda_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -585,7 +583,7 @@ static int tpda_probe(struct amba_device *adev, const struct amba_id *id)
 	if (ret)
 		return ret;
 
-	desc.name = coresight_alloc_device_name(&tpda_devs, dev);
+	desc.name = coresight_alloc_device_name("tpda", dev);
 	if (!desc.name)
 		return -ENOMEM;
 	desc.type = CORESIGHT_DEV_TYPE_LINK;
diff --git a/drivers/hwtracing/coresight/coresight-tpdm.c b/drivers/hwtracing/coresight/coresight-tpdm.c
index 06e0a905a67d..da77bdaad0a4 100644
--- a/drivers/hwtracing/coresight/coresight-tpdm.c
+++ b/drivers/hwtracing/coresight/coresight-tpdm.c
@@ -19,8 +19,6 @@
 #include "coresight-priv.h"
 #include "coresight-tpdm.h"
 
-DEFINE_CORESIGHT_DEVLIST(tpdm_devs, "tpdm");
-
 static bool tpdm_has_dsb_dataset(struct tpdm_drvdata *drvdata)
 {
 	return (drvdata->datasets & TPDM_PIDR0_DS_DSB);
@@ -1416,7 +1414,7 @@ static int tpdm_probe(struct device *dev, struct resource *res)
 	}
 
 	/* Set up coresight component description */
-	desc.name = coresight_alloc_device_name(&tpdm_devs, dev);
+	desc.name = coresight_alloc_device_name("tpdm", dev);
 	if (!desc.name)
 		return -ENOMEM;
 	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
index aaa44bc521c3..b8560b140e0f 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -49,8 +49,6 @@
 #define FFCR_FON_MAN		BIT(6)
 #define FFCR_STOP_FI		BIT(12)
 
-DEFINE_CORESIGHT_DEVLIST(tpiu_devs, "tpiu");
-
 /*
  * @base:	memory mapped base address for this component.
  * @atclk:	optional clock for the core parts of the TPIU.
@@ -134,7 +132,7 @@ static int __tpiu_probe(struct device *dev, struct resource *res)
 	struct coresight_desc desc = { 0 };
 	int ret;
 
-	desc.name = coresight_alloc_device_name(&tpiu_devs, dev);
+	desc.name = coresight_alloc_device_name("tpiu", dev);
 	if (!desc.name)
 		return -ENOMEM;
 
diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.c b/drivers/hwtracing/coresight/ultrasoc-smb.c
index 8f7922a5e534..5776f63468fa 100644
--- a/drivers/hwtracing/coresight/ultrasoc-smb.c
+++ b/drivers/hwtracing/coresight/ultrasoc-smb.c
@@ -17,8 +17,6 @@
 #include "coresight-priv.h"
 #include "ultrasoc-smb.h"
 
-DEFINE_CORESIGHT_DEVLIST(sink_devs, "ultra_smb");
-
 #define ULTRASOC_SMB_DSM_UUID	"82ae1283-7f6a-4cbe-aa06-53e8fb24db18"
 
 static bool smb_buffer_not_empty(struct smb_drv_data *drvdata)
@@ -478,7 +476,7 @@ static int smb_register_sink(struct platform_device *pdev,
 	desc.pdata = pdata;
 	desc.dev = &pdev->dev;
 	desc.groups = smb_sink_groups;
-	desc.name = coresight_alloc_device_name(&sink_devs, &pdev->dev);
+	desc.name = coresight_alloc_device_name("ultra_smb", &pdev->dev);
 	if (!desc.name) {
 		dev_err(&pdev->dev, "Failed to alloc coresight device name");
 		return -ENOMEM;
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 2b48be97fcd0..2131febebee9 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -306,24 +306,19 @@ struct coresight_device {
  * coresight_dev_list - Mapping for devices to "name" index for device
  * names.
  *
+ * @node:		Node on the global device index list.
  * @nr_idx:		Number of entries already allocated.
  * @pfx:		Prefix pattern for device name.
  * @fwnode_list:	Array of fwnode_handles associated with each allocated
  *			index, upto nr_idx entries.
  */
 struct coresight_dev_list {
+	struct list_head	node;
 	int			nr_idx;
-	const char		*pfx;
+	char			*pfx;
 	struct fwnode_handle	**fwnode_list;
 };
 
-#define DEFINE_CORESIGHT_DEVLIST(var, dev_pfx)				\
-static struct coresight_dev_list (var) = {				\
-						.pfx = dev_pfx,		\
-						.nr_idx = 0,		\
-						.fwnode_list = NULL,	\
-}
-
 #define to_coresight_device(d) container_of(d, struct coresight_device, dev)
 
 /**
@@ -663,8 +658,7 @@ void coresight_clear_self_claim_tag(struct csdev_access *csa);
 void coresight_clear_self_claim_tag_unlocked(struct csdev_access *csa);
 void coresight_disclaim_device(struct coresight_device *csdev);
 void coresight_disclaim_device_unlocked(struct coresight_device *csdev);
-char *coresight_alloc_device_name(struct coresight_dev_list *devs,
-					 struct device *dev);
+char *coresight_alloc_device_name(const char *prefix, struct device *dev);
 
 bool coresight_loses_context_with_cpu(struct device *dev);
 
-- 
cgit v1.2.3


From 59509da0cb51dc48e4edc57d7d3ef1d424c58fc9 Mon Sep 17 00:00:00 2001
From: Amit Kumar Mahapatra <amit.kumar-mahapatra@amd.com>
Date: Wed, 4 Feb 2026 09:32:17 +0100
Subject: mtd: Move struct mtd_concat definition to header file

To enable a more generic approach for concatenating MTD devices,
struct mtd_concat should be accessible beyond the mtdconcat driver.
Therefore, the definition is being moved to a header file.

Signed-off-by: Amit Kumar Mahapatra <amit.kumar-mahapatra@amd.com>
Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/mtdconcat.c    | 12 ------------
 include/linux/mtd/concat.h | 12 ++++++++++++
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 9eb5d919d9ba..241d15235d01 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -20,18 +20,6 @@
 
 #include <asm/div64.h>
 
-/*
- * Our storage structure:
- * Subdev points to an array of pointers to struct mtd_info objects
- * which is allocated along with this structure
- *
- */
-struct mtd_concat {
-	struct mtd_info mtd;
-	int num_subdev;
-	struct mtd_info **subdev;
-};
-
 /*
  * how to calculate the size required for the above structure,
  * including the pointer array subdev points to:
diff --git a/include/linux/mtd/concat.h b/include/linux/mtd/concat.h
index d6f653e07426..b42d9af87c4e 100644
--- a/include/linux/mtd/concat.h
+++ b/include/linux/mtd/concat.h
@@ -9,6 +9,18 @@
 #define MTD_CONCAT_H
 
 
+/*
+ * Our storage structure:
+ * Subdev points to an array of pointers to struct mtd_info objects
+ * which is allocated along with this structure
+ *
+ */
+struct mtd_concat {
+	struct mtd_info mtd;
+	int num_subdev;
+	struct mtd_info **subdev;
+};
+
 struct mtd_info *mtd_concat_create(
     struct mtd_info *subdev[],  /* subdevices to concatenate */
     int num_devs,               /* number of subdevices      */
-- 
cgit v1.2.3


From 43db6366fc2de02050e66389f5628d3fdc9af10a Mon Sep 17 00:00:00 2001
From: Amit Kumar Mahapatra <amit.kumar-mahapatra@amd.com>
Date: Wed, 4 Feb 2026 09:32:18 +0100
Subject: mtd: Add driver for concatenating devices

Introducing CONFIG_MTD_VIRT_CONCAT to separate the legacy flow from the new
approach, where only the concatenated partition is registered as an MTD
device, while the individual partitions that form it are not registered
independently, as they are typically not required by the user.
CONFIG_MTD_VIRT_CONCAT is a boolean configuration option that depends on
CONFIG_MTD_PARTITIONED_MASTER. When enabled, it allows flash nodes to be
exposed as individual MTD devices along with the other partitions.

The solution focuses on fixed-partitions description only as it depends on
device boundaries. It supports multiple sets of concatenated devices, each
comprising two or more partitions.

    flash@0 {
            reg = <0>;
            partitions {
                    compatible = "fixed-partitions";

                    part0@0 {
                            part-concat-next = <&flash0_part1>;
                            label = "part0_0";
                            reg = <0x0 0x800000>;
                    };

                    flash0_part1: part1@800000 {
                            label = "part0_1";
                            reg = <800000 0x800000>;
                    };

                    part2@1000000 {
                            part-concat-next = <&flash1_part0>;
                            label = "part0_2";
                            reg = <0x800000 0x800000>;
                    };
            };
    };

    flash@1 {
            reg = <1>;
            partitions {
                    compatible = "fixed-partitions";

                    flash1_part0: part1@0 {
                            label = "part1_0";
                            reg = <0x0 0x800000>;
                    };

                    part1@800000 {
                            label = "part1_1";
                            reg = <0x800000 0x800000>;
                    };
            };
    };

The partitions that gets created are

flash@0
part0_0-part0_1-concat
flash@1
part1_1
part0_2-part1_0-concat

Suggested-by: Bernhard Frauendienst <kernel@nospam.obeliks.de>
Suggested-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Amit Kumar Mahapatra <amit.kumar-mahapatra@amd.com>
Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/Kconfig           |   9 ++
 drivers/mtd/Makefile          |   1 +
 drivers/mtd/mtd_virt_concat.c | 363 ++++++++++++++++++++++++++++++++++++++++++
 drivers/mtd/mtdcore.c         |  21 +++
 drivers/mtd/mtdpart.c         |   6 +
 include/linux/mtd/concat.h    |  51 +++++-
 6 files changed, 450 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mtd/mtd_virt_concat.c

(limited to 'include/linux')

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 796a2eccbef0..0421c6208de7 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -206,6 +206,15 @@ config MTD_PARTITIONED_MASTER
 	  the parent of the partition device be the master device, rather than
 	  what lies behind the master.
 
+config MTD_VIRT_CONCAT
+	bool "Virtual concatenated MTD devices"
+	depends on MTD_PARTITIONED_MASTER
+	help
+	  The driver enables the creation of virtual MTD device by
+	  concatenating multiple physical MTD devices into a single
+	  entity. This allows for the creation of partitions larger than
+	  the individual physical chips, extending across chip boundaries.
+
 source "drivers/mtd/chips/Kconfig"
 
 source "drivers/mtd/maps/Kconfig"
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index 593d0593a038..7b6dd53e8150 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -6,6 +6,7 @@
 # Core functionality.
 obj-$(CONFIG_MTD)		+= mtd.o
 mtd-y				:= mtdcore.o mtdsuper.o mtdconcat.o mtdpart.o mtdchar.o
+mtd-$(CONFIG_MTD_VIRT_CONCAT)	+= mtd_virt_concat.o
 
 obj-y				+= parsers/
 
diff --git a/drivers/mtd/mtd_virt_concat.c b/drivers/mtd/mtd_virt_concat.c
new file mode 100644
index 000000000000..aea88d1c9bc5
--- /dev/null
+++ b/drivers/mtd/mtd_virt_concat.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Virtual concat MTD device driver
+ *
+ * Copyright (C) 2018 Bernhard Frauendienst
+ * Author: Bernhard Frauendienst <kernel@nospam.obeliks.de>
+ */
+
+#include <linux/device.h>
+#include <linux/mtd/mtd.h>
+#include "mtdcore.h"
+#include <linux/mtd/partitions.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/mtd/concat.h>
+
+#define CONCAT_PROP "part-concat-next"
+#define CONCAT_POSTFIX "concat"
+#define MIN_DEV_PER_CONCAT 1
+
+static LIST_HEAD(concat_node_list);
+
+/**
+ * struct mtd_virt_concat_node - components of a concatenation
+ * @head: List handle
+ * @count: Number of nodes
+ * @nodes: Pointer to the nodes (partitions) to concatenate
+ * @concat: Concatenation container
+ */
+struct mtd_virt_concat_node {
+	struct list_head head;
+	unsigned int count;
+	struct device_node **nodes;
+	struct mtd_concat *concat;
+};
+
+/**
+ * mtd_is_part_concat - Check if the device is already part
+ *                       of a concatenated device
+ * @dev:        pointer to 'device_node'
+ *
+ * Return: true if the device is already part of a concatenation,
+ *         false otherwise.
+ */
+static bool mtd_is_part_concat(struct device_node *dev)
+{
+	struct mtd_virt_concat_node *item;
+	int idx;
+
+	list_for_each_entry(item, &concat_node_list, head) {
+		for (idx = 0; idx < item->count; idx++) {
+			if (item->nodes[idx] == dev)
+				return true;
+		}
+	}
+	return false;
+}
+
+static void mtd_virt_concat_put_mtd_devices(struct mtd_concat *concat)
+{
+	int i;
+
+	for (i = 0; i < concat->num_subdev; i++)
+		put_mtd_device(concat->subdev[i]);
+}
+
+void mtd_virt_concat_destroy_joins(void)
+{
+	struct mtd_virt_concat_node *item, *tmp;
+	struct mtd_info *mtd;
+
+	list_for_each_entry_safe(item, tmp, &concat_node_list, head) {
+		mtd = &item->concat->mtd;
+		if (item->concat) {
+			mtd_device_unregister(mtd);
+			kfree(mtd->name);
+			mtd_concat_destroy(mtd);
+			mtd_virt_concat_put_mtd_devices(item->concat);
+		}
+	}
+}
+
+/**
+ * mtd_virt_concat_destroy - Destroy the concat that includes the mtd object
+ * @mtd:        pointer to 'mtd_info'
+ *
+ * Return: 0 on success, -error otherwise.
+ */
+int mtd_virt_concat_destroy(struct mtd_info *mtd)
+{
+	struct mtd_info *child, *master = mtd_get_master(mtd);
+	struct mtd_virt_concat_node *item, *tmp;
+	struct mtd_concat *concat;
+	int idx, ret = 0;
+	bool is_mtd_found;
+
+	list_for_each_entry_safe(item, tmp, &concat_node_list, head) {
+		is_mtd_found = false;
+
+		/* Find the concat item that hold the mtd device */
+		for (idx = 0; idx < item->count; idx++) {
+			if (item->nodes[idx] == mtd->dev.of_node) {
+				is_mtd_found = true;
+				break;
+			}
+		}
+		if (!is_mtd_found)
+			continue;
+		concat = item->concat;
+
+		/*
+		 * Since this concatenated device is being removed, retrieve
+		 * all MTD devices that are part of it and register them
+		 * individually.
+		 */
+		for (idx = 0; idx < concat->num_subdev; idx++) {
+			child = concat->subdev[idx];
+			if (child->dev.of_node != mtd->dev.of_node) {
+				ret = add_mtd_device(child);
+				if (ret)
+					goto out;
+			}
+		}
+		/* Destroy the concat */
+		if (concat->mtd.name) {
+			del_mtd_device(&concat->mtd);
+			kfree(concat->mtd.name);
+			mtd_concat_destroy(&concat->mtd);
+			mtd_virt_concat_put_mtd_devices(item->concat);
+		}
+
+		for (idx = 0; idx < item->count; idx++)
+			of_node_put(item->nodes[idx]);
+
+		kfree(item->nodes);
+		kfree(item);
+	}
+	return 0;
+out:
+	mutex_lock(&master->master.partitions_lock);
+	list_del(&child->part.node);
+	mutex_unlock(&master->master.partitions_lock);
+	kfree(mtd->name);
+	kfree(mtd);
+
+	return ret;
+}
+
+/**
+ * mtd_virt_concat_create_item - Create a concat item
+ * @parts:        pointer to 'device_node'
+ * @count:        number of mtd devices that make up
+ *                the concatenated device.
+ *
+ * Return: 0 on success, -error otherwise.
+ */
+static int mtd_virt_concat_create_item(struct device_node *parts,
+				       unsigned int count)
+{
+	struct mtd_virt_concat_node *item;
+	struct mtd_concat *concat;
+	int i;
+
+	for (i = 0; i < (count - 1); i++) {
+		if (mtd_is_part_concat(of_parse_phandle(parts, CONCAT_PROP, i)))
+			return 0;
+	}
+
+	item = kzalloc(sizeof(*item), GFP_KERNEL);
+	if (!item)
+		return -ENOMEM;
+
+	item->count = count;
+	item->nodes = kcalloc(count, sizeof(*item->nodes), GFP_KERNEL);
+	if (!item->nodes) {
+		kfree(item);
+		return -ENOMEM;
+	}
+
+	/*
+	 * The partition in which "part-concat-next" property
+	 * is defined is the first device in the list of concat
+	 * devices.
+	 */
+	item->nodes[0] = parts;
+
+	for (i = 1; i < count; i++)
+		item->nodes[i] = of_parse_phandle(parts, CONCAT_PROP, (i - 1));
+
+	concat = kzalloc(sizeof(*concat), GFP_KERNEL);
+	if (!concat) {
+		kfree(item);
+		return -ENOMEM;
+	}
+
+	concat->subdev = kcalloc(count, sizeof(*concat->subdev), GFP_KERNEL);
+	if (!concat->subdev) {
+		kfree(item);
+		kfree(concat);
+		return -ENOMEM;
+	}
+	item->concat = concat;
+
+	list_add_tail(&item->head, &concat_node_list);
+
+	return 0;
+}
+
+void mtd_virt_concat_destroy_items(void)
+{
+	struct mtd_virt_concat_node *item, *temp;
+	int i;
+
+	list_for_each_entry_safe(item, temp, &concat_node_list, head) {
+		for (i = 0; i < item->count; i++)
+			of_node_put(item->nodes[i]);
+
+		kfree(item->nodes);
+		kfree(item);
+	}
+}
+
+/**
+ * mtd_virt_concat_create_add - Add a mtd device to the concat list
+ * @mtd:        pointer to 'mtd_info'
+ *
+ * Return: true on success, false otherwise.
+ */
+bool mtd_virt_concat_add(struct mtd_info *mtd)
+{
+	struct mtd_virt_concat_node *item;
+	struct mtd_concat *concat;
+	int idx;
+
+	list_for_each_entry(item, &concat_node_list, head) {
+		concat = item->concat;
+		for (idx = 0; idx < item->count; idx++) {
+			if (item->nodes[idx] == mtd->dev.of_node) {
+				concat->subdev[concat->num_subdev++] = mtd;
+				return true;
+			}
+		}
+	}
+	return false;
+}
+
+/**
+ * mtd_virt_concat_node_create - List all the concatenations found in DT
+ *
+ * Return: 0 on success, -error otherwise.
+ */
+int mtd_virt_concat_node_create(void)
+{
+	struct device_node *parts = NULL;
+	int ret = 0, count = 0;
+
+	/* List all the concatenations found in DT */
+	do {
+		parts = of_find_node_with_property(parts, CONCAT_PROP);
+		if (!of_device_is_available(parts))
+			continue;
+
+		if (mtd_is_part_concat(parts))
+			continue;
+
+		count = of_count_phandle_with_args(parts, CONCAT_PROP, NULL);
+		if (count < MIN_DEV_PER_CONCAT)
+			continue;
+
+		/*
+		 * The partition in which "part-concat-next" property is defined
+		 * is also part of the concat device, so increament count by 1.
+		 */
+		count++;
+
+		ret = mtd_virt_concat_create_item(parts, count);
+		if (ret) {
+			of_node_put(parts);
+			goto destroy_items;
+		}
+	} while (parts);
+
+	return ret;
+
+destroy_items:
+	mtd_virt_concat_destroy_items();
+
+	return ret;
+}
+
+/**
+ * mtd_virt_concat_create_join - Create and register the concatenated
+ *                                 MTD device.
+ *
+ * Return: 0 on success, -error otherwise.
+ */
+int mtd_virt_concat_create_join(void)
+{
+	struct mtd_virt_concat_node *item;
+	struct mtd_concat *concat;
+	struct mtd_info *mtd;
+	ssize_t name_sz;
+	int ret, idx;
+	char *name;
+
+	list_for_each_entry(item, &concat_node_list, head) {
+		concat = item->concat;
+		/*
+		 * Check if item->count != concat->num_subdev, it indicates
+		 * that the MTD information for all devices included in the
+		 * concatenation are not handy, concat MTD device can't be
+		 * created hence switch to next concat device.
+		 */
+		if (item->count != concat->num_subdev) {
+			continue;
+		} else {
+			/* Calculate the legth of the name of the virtual device */
+			for (idx = 0, name_sz = 0; idx < concat->num_subdev; idx++)
+				name_sz += (strlen(concat->subdev[idx]->name) + 1);
+			name_sz += strlen(CONCAT_POSTFIX);
+			name = kmalloc(name_sz + 1, GFP_KERNEL);
+			if (!name) {
+				mtd_virt_concat_put_mtd_devices(concat);
+				return -ENOMEM;
+			}
+
+			ret = 0;
+			for (idx = 0; idx < concat->num_subdev; idx++) {
+				ret += sprintf((name + ret), "%s-",
+					       concat->subdev[idx]->name);
+			}
+			sprintf((name + ret), CONCAT_POSTFIX);
+
+			if (concat->mtd.name) {
+				ret = memcmp(concat->mtd.name, name, name_sz);
+				if (ret == 0)
+					continue;
+			}
+			mtd = mtd_concat_create(concat->subdev, concat->num_subdev, name);
+			if (!mtd) {
+				kfree(name);
+				return -ENXIO;
+			}
+			concat->mtd = *mtd;
+			/* Arbitrary set the first device as parent */
+			concat->mtd.dev.parent = concat->subdev[0]->dev.parent;
+			concat->mtd.dev = concat->subdev[0]->dev;
+
+			/* Add the mtd device */
+			ret = add_mtd_device(&concat->mtd);
+			if (ret)
+				goto destroy_concat;
+		}
+	}
+
+	return 0;
+
+destroy_concat:
+	mtd_concat_destroy(mtd);
+
+	return ret;
+}
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 64808493b4f5..576537774628 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -34,6 +34,7 @@
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
+#include <linux/mtd/concat.h>
 
 #include "mtdcore.h"
 
@@ -1120,6 +1121,12 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types,
 			goto out;
 	}
 
+	if (IS_REACHABLE(CONFIG_MTD_VIRT_CONCAT)) {
+		ret = mtd_virt_concat_node_create();
+		if (ret < 0)
+			goto out;
+	}
+
 	/* Prefer parsed partitions over driver-provided fallback */
 	ret = parse_mtd_partitions(mtd, types, parser_data);
 	if (ret == -EPROBE_DEFER)
@@ -1137,6 +1144,11 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types,
 	if (ret)
 		goto out;
 
+	if (IS_REACHABLE(CONFIG_MTD_VIRT_CONCAT)) {
+		ret = mtd_virt_concat_create_join();
+		if (ret < 0)
+			goto out;
+	}
 	/*
 	 * FIXME: some drivers unfortunately call this function more than once.
 	 * So we have to check if we've already assigned the reboot notifier.
@@ -1186,6 +1198,11 @@ int mtd_device_unregister(struct mtd_info *master)
 	nvmem_unregister(master->otp_user_nvmem);
 	nvmem_unregister(master->otp_factory_nvmem);
 
+	if (IS_REACHABLE(CONFIG_MTD_VIRT_CONCAT)) {
+		err = mtd_virt_concat_destroy(master);
+		if (err)
+			return err;
+	}
 	err = del_mtd_partitions(master);
 	if (err)
 		return err;
@@ -2621,6 +2638,10 @@ err_reg:
 
 static void __exit cleanup_mtd(void)
 {
+	if (IS_REACHABLE(CONFIG_MTD_VIRT_CONCAT)) {
+		mtd_virt_concat_destroy_joins();
+		mtd_virt_concat_destroy_items();
+	}
 	debugfs_remove_recursive(dfs_dir_mtd);
 	cleanup_mtdchar();
 	if (proc_mtd)
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index e016cfbc7224..795a94e6b482 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -18,6 +18,7 @@
 #include <linux/err.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/mtd/concat.h>
 
 #include "mtdcore.h"
 
@@ -409,6 +410,11 @@ int add_mtd_partitions(struct mtd_info *parent,
 			goto err_del_partitions;
 		}
 
+		if (IS_REACHABLE(CONFIG_MTD_VIRT_CONCAT)) {
+			if (mtd_virt_concat_add(child))
+				continue;
+		}
+
 		mutex_lock(&master->master.partitions_lock);
 		list_add_tail(&child->part.node, &parent->partitions);
 		mutex_unlock(&master->master.partitions_lock);
diff --git a/include/linux/mtd/concat.h b/include/linux/mtd/concat.h
index b42d9af87c4e..2cd9d48958a8 100644
--- a/include/linux/mtd/concat.h
+++ b/include/linux/mtd/concat.h
@@ -28,5 +28,54 @@ struct mtd_info *mtd_concat_create(
 
 void mtd_concat_destroy(struct mtd_info *mtd);
 
-#endif
+/**
+ * mtd_virt_concat_node_create - Create a component for concatenation
+ *
+ * Returns a positive number representing the no. of devices found for
+ * concatenation, or a negative error code.
+ *
+ * List all the devices for concatenations found in DT and create a
+ * component for concatenation.
+ */
+int mtd_virt_concat_node_create(void);
+
+/**
+ * mtd_virt_concat_add - add mtd_info object to the list of subdevices for concatenation
+ * @mtd: pointer to new MTD device info structure
+ *
+ * Returns true if the mtd_info object is added successfully else returns false.
+ *
+ * The mtd_info object is added to the list of subdevices for concatenation.
+ * It returns true if a match is found, and false if all subdevices have
+ * already been added or if the mtd_info object does not match any of the
+ * intended MTD devices.
+ */
+bool mtd_virt_concat_add(struct mtd_info *mtd);
 
+/**
+ * mtd_virt_concat_create_join - Create and register the concatenated MTD device
+ *
+ * Returns 0 on succes, or a negative error code.
+ *
+ * Creates and registers the concatenated MTD device
+ */
+int mtd_virt_concat_create_join(void);
+
+/**
+ * mtd_virt_concat_destroy - Remove the concat that includes a specific mtd device
+ *                           as one of its components.
+ * @mtd: pointer to MTD device info structure.
+ *
+ * Returns 0 on succes, or a negative error code.
+ *
+ * If the mtd_info object is part of a concatenated device, all other MTD devices
+ * within that concat are registered individually. The concatenated device is then
+ * removed, along with its concatenation component.
+ *
+ */
+int mtd_virt_concat_destroy(struct mtd_info *mtd);
+
+void mtd_virt_concat_destroy_joins(void);
+void mtd_virt_concat_destroy_items(void);
+
+#endif
-- 
cgit v1.2.3


From 43479bb3703f17da6cdfaa2a7f4b93db9c6908bc Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 5 Feb 2026 19:49:15 +0100
Subject: mtd: spinand: Clean the flags section

Mention that we are declaring the main SPI NAND flags with a comment.
Align the values with tabs.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 6a024cf1c53a..58abd306ebe3 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -477,8 +477,9 @@ struct spinand_ecc_info {
 	const struct mtd_ooblayout_ops *ooblayout;
 };
 
-#define SPINAND_HAS_QE_BIT		BIT(0)
-#define SPINAND_HAS_CR_FEAT_BIT		BIT(1)
+/* SPI NAND flags */
+#define SPINAND_HAS_QE_BIT				BIT(0)
+#define SPINAND_HAS_CR_FEAT_BIT				BIT(1)
 #define SPINAND_HAS_PROG_PLANE_SELECT_BIT		BIT(2)
 #define SPINAND_HAS_READ_PLANE_SELECT_BIT		BIT(3)
 #define SPINAND_NO_RAW_ACCESS				BIT(4)
-- 
cgit v1.2.3


From 15c9ed1d8286dc0297f01347dc74f5a8cbc173de Mon Sep 17 00:00:00 2001
From: Qingfang Deng <dqfext@gmail.com>
Date: Tue, 24 Feb 2026 09:50:52 +0800
Subject: pppoe: remove kernel-mode relay support

The kernel-mode PPPoE relay feature and its two associated ioctls
(PPPOEIOCSFWD and PPPOEIOCDFWD) are not used by any existing userspace
PPPoE implementations. The most commonly-used package, RP-PPPoE [1],
handles the relaying entirely in userspace.

This legacy code has remained in the driver since its introduction in
kernel 2.3.99-pre7 for over two decades, but has served no practical
purpose.

Remove the unused relay code.

[1] https://dianne.skoll.ca/projects/rp-pppoe/

Signed-off-by: Qingfang Deng <dqfext@gmail.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Link: https://patch.msgid.link/20260224015053.42472-1-dqfext@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ppp/pppoe.c       | 79 -------------------------------------------
 drivers/net/ppp/pppox.c       |  3 --
 include/linux/if_pppox.h      |  6 ----
 include/uapi/linux/if_pppox.h | 10 ------
 4 files changed, 98 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 7900cc3212a5..1ac61c273b28 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -237,25 +237,6 @@ static inline struct pppox_sock *get_item(struct pppoe_net *pn, __be16 sid,
 	return po;
 }
 
-static inline struct pppox_sock *__get_item_by_addr(struct net *net,
-						    struct sockaddr_pppox *sp)
-{
-	struct net_device *dev;
-	struct pppoe_net *pn;
-	struct pppox_sock *pppox_sock = NULL;
-
-	int ifindex;
-
-	dev = dev_get_by_name_rcu(net, sp->sa_addr.pppoe.dev);
-	if (dev) {
-		ifindex = dev->ifindex;
-		pn = pppoe_pernet(net);
-		pppox_sock = __get_item(pn, sp->sa_addr.pppoe.sid,
-					sp->sa_addr.pppoe.remote, ifindex);
-	}
-	return pppox_sock;
-}
-
 static inline void delete_item(struct pppoe_net *pn, __be16 sid,
 					char *addr, int ifindex)
 {
@@ -369,7 +350,6 @@ static struct notifier_block pppoe_notifier = {
 static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
 {
 	struct pppox_sock *po = pppox_sk(sk);
-	struct pppox_sock *relay_po;
 
 	/* Backlog receive. Semantics of backlog rcv preclude any code from
 	 * executing in lock_sock()/release_sock() bounds; meaning sk->sk_state
@@ -378,17 +358,6 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
 
 	if (sk->sk_state & PPPOX_BOUND) {
 		ppp_input(&po->chan, skb);
-	} else if (sk->sk_state & PPPOX_RELAY) {
-		relay_po = __get_item_by_addr(sock_net(sk),
-					      &po->pppoe_relay);
-		if (relay_po == NULL)
-			goto abort_kfree;
-
-		if ((sk_pppox(relay_po)->sk_state & PPPOX_CONNECTED) == 0)
-			goto abort_kfree;
-
-		if (!__pppoe_xmit(sk_pppox(relay_po), skb))
-			goto abort_kfree;
 	} else {
 		if (sock_queue_rcv_skb(sk, skb))
 			goto abort_kfree;
@@ -656,7 +625,6 @@ static int pppoe_connect(struct socket *sock, struct sockaddr_unsized *uservaddr
 
 		po->pppoe_ifindex = 0;
 		memset(&po->pppoe_pa, 0, sizeof(po->pppoe_pa));
-		memset(&po->pppoe_relay, 0, sizeof(po->pppoe_relay));
 		memset(&po->chan, 0, sizeof(po->chan));
 		po->next = NULL;
 		po->num = 0;
@@ -783,53 +751,6 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd,
 		err = 0;
 		break;
 
-	case PPPOEIOCSFWD:
-	{
-		struct pppox_sock *relay_po;
-
-		err = -EBUSY;
-		if (sk->sk_state & (PPPOX_BOUND | PPPOX_DEAD))
-			break;
-
-		err = -ENOTCONN;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		/* PPPoE address from the user specifies an outbound
-		   PPPoE address which frames are forwarded to */
-		err = -EFAULT;
-		if (copy_from_user(&po->pppoe_relay,
-				   (void __user *)arg,
-				   sizeof(struct sockaddr_pppox)))
-			break;
-
-		err = -EINVAL;
-		if (po->pppoe_relay.sa_family != AF_PPPOX ||
-		    po->pppoe_relay.sa_protocol != PX_PROTO_OE)
-			break;
-
-		/* Check that the socket referenced by the address
-		   actually exists. */
-		rcu_read_lock();
-		relay_po = __get_item_by_addr(sock_net(sk), &po->pppoe_relay);
-		rcu_read_unlock();
-		if (!relay_po)
-			break;
-
-		sk->sk_state |= PPPOX_RELAY;
-		err = 0;
-		break;
-	}
-
-	case PPPOEIOCDFWD:
-		err = -EALREADY;
-		if (!(sk->sk_state & PPPOX_RELAY))
-			break;
-
-		sk->sk_state &= ~PPPOX_RELAY;
-		err = 0;
-		break;
-
 	default:
 		err = -ENOTTY;
 	}
diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c
index 08364f10a43f..5861a2f6ce3e 100644
--- a/drivers/net/ppp/pppox.c
+++ b/drivers/net/ppp/pppox.c
@@ -102,9 +102,6 @@ EXPORT_SYMBOL(pppox_ioctl);
 #ifdef CONFIG_COMPAT
 int pppox_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
-	if (cmd == PPPOEIOCSFWD32)
-		cmd = PPPOEIOCSFWD;
-
 	return pppox_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
 }
 
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index db45d6f1c4f4..8bbf676c2a85 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -25,8 +25,6 @@ struct pppoe_opt {
 	struct net_device      *dev;	  /* device associated with socket*/
 	int			ifindex;  /* ifindex of device associated with socket */
 	struct pppoe_addr	pa;	  /* what this socket is bound to*/
-	struct sockaddr_pppox	relay;	  /* what socket data will be
-					     relayed to (PPPoE relaying) */
 	struct work_struct      padt_work;/* Work item for handling PADT */
 };
 
@@ -53,7 +51,6 @@ struct pppox_sock {
 #define pppoe_dev	proto.pppoe.dev
 #define pppoe_ifindex	proto.pppoe.ifindex
 #define pppoe_pa	proto.pppoe.pa
-#define pppoe_relay	proto.pppoe.relay
 
 static inline struct pppox_sock *pppox_sk(struct sock *sk)
 {
@@ -80,14 +77,11 @@ extern void pppox_unbind_sock(struct sock *sk);/* delete ppp-channel binding */
 extern int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 extern int pppox_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 
-#define PPPOEIOCSFWD32    _IOW(0xB1 ,0, compat_size_t)
-
 /* PPPoX socket states */
 enum {
     PPPOX_NONE		= 0,  /* initial state */
     PPPOX_CONNECTED	= 1,  /* connection established ==TCP_ESTABLISHED */
     PPPOX_BOUND		= 2,  /* bound to ppp device */
-    PPPOX_RELAY		= 4,  /* forwarding is enabled */
     PPPOX_DEAD		= 16  /* dead, useless, please clean me up!*/
 };
 
diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h
index 29b804aa7474..7ae044d71fb7 100644
--- a/include/uapi/linux/if_pppox.h
+++ b/include/uapi/linux/if_pppox.h
@@ -103,16 +103,6 @@ struct sockaddr_pppol2tpv3in6 {
 	struct pppol2tpv3in6_addr pppol2tp;
 } __packed;
 
-/*********************************************************************
- *
- * ioctl interface for defining forwarding of connections
- *
- ********************************************************************/
-
-#define PPPOEIOCSFWD	_IOW(0xB1 ,0, size_t)
-#define PPPOEIOCDFWD	_IO(0xB1 ,1)
-/*#define PPPOEIOCGFWD	_IOWR(0xB1,2, size_t)*/
-
 /* Codes to identify message types */
 #define PADI_CODE	0x09
 #define PADO_CODE	0x07
-- 
cgit v1.2.3


From 1ae2f435350ec05224a39995c3a680aa6fdae5a5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 23 Feb 2026 16:29:37 +0100
Subject: ACPI: x86: cmos_rtc: Create a CMOS RTC platform device

Make the CMOS RTC ACPI scan handler create a platform device that will
be used subsequently by rtc-cmos for driver binding on x86 systems with
ACPI and update add_rtc_cmos() to skip registering a fallback platform
device for the CMOS RTC when the above one has been registered.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com> # x86
Link: https://patch.msgid.link/1962427.tdWV9SEqCh@rafael.j.wysocki
---
 arch/x86/kernel/rtc.c       | 4 ++++
 drivers/acpi/x86/cmos_rtc.c | 8 ++++++++
 include/linux/acpi.h        | 4 ++++
 3 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 51a849a79c98..b112178e8185 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -2,6 +2,7 @@
 /*
  * RTC related functions
  */
+#include <linux/acpi.h>
 #include <linux/platform_device.h>
 #include <linux/mc146818rtc.h>
 #include <linux/export.h>
@@ -146,6 +147,9 @@ static __init int add_rtc_cmos(void)
 		}
 	}
 #endif
+	if (cmos_rtc_platform_device_present)
+		return 0;
+
 	if (!x86_platform.legacy.rtc)
 		return -ENODEV;
 
diff --git a/drivers/acpi/x86/cmos_rtc.c b/drivers/acpi/x86/cmos_rtc.c
index 45db7e51cbe6..bdd66dfd4a44 100644
--- a/drivers/acpi/x86/cmos_rtc.c
+++ b/drivers/acpi/x86/cmos_rtc.c
@@ -24,6 +24,8 @@ static const struct acpi_device_id acpi_cmos_rtc_ids[] = {
 	{}
 };
 
+bool cmos_rtc_platform_device_present;
+
 static bool cmos_rtc_space_handler_present __read_mostly;
 
 static acpi_status acpi_cmos_rtc_space_handler(u32 function,
@@ -103,6 +105,12 @@ static int acpi_cmos_rtc_attach(struct acpi_device *adev,
 	if (ret < 0)
 		return ret;
 
+	if (IS_ERR_OR_NULL(acpi_create_platform_device(adev, NULL))) {
+		pr_err("Failed to create CMOS-RTC platform device\n");
+		return 0;
+	} else {
+		cmos_rtc_platform_device_present = true;
+	}
 	return 1;
 }
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 4d2f0bed7a06..2bdb801cee01 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -791,6 +791,8 @@ const char *acpi_get_subsystem_id(acpi_handle handle);
 int acpi_mrrm_max_mem_region(void);
 #endif
 
+extern bool cmos_rtc_platform_device_present;
+
 #else	/* !CONFIG_ACPI */
 
 #define acpi_disabled 1
@@ -1116,6 +1118,8 @@ static inline int acpi_mrrm_max_mem_region(void)
 	return 1;
 }
 
+#define cmos_rtc_platform_device_present	false
+
 #endif	/* !CONFIG_ACPI */
 
 #ifdef CONFIG_ACPI_HMAT
-- 
cgit v1.2.3


From 2a78e42104444f948698f1225deaf515e9b7224d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 23 Feb 2026 16:30:21 +0100
Subject: ACPI: x86/rtc-cmos: Use platform device for driver binding

Modify the rtc-cmos driver to bind to a platform device on systems with
ACPI via acpi_match_table and advertise the CMOST RTC ACPI device IDs
for driver auto-loading.  Note that adding the requisite device IDs to
it and exposing them via MODULE_DEVICE_TABLE() is sufficient for this
purpose.

Since the ACPI device IDs in question are the same as for the CMOS RTC
ACPI scan handler, put them into a common header file and use the
definition from there in both places.

Additionally, to prevent a PNP device from being created for the CMOS
RTC if a platform one is present already, make is_cmos_rtc_device()
check cmos_rtc_platform_device_present introduced previously.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://patch.msgid.link/13969123.uLZWGnKmhe@rafael.j.wysocki
---
 drivers/acpi/acpi_pnp.c     |  2 +-
 drivers/acpi/x86/cmos_rtc.c |  5 +----
 drivers/rtc/rtc-cmos.c      | 10 ++++++++++
 include/linux/acpi.h        |  6 ++++++
 4 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/acpi_pnp.c b/drivers/acpi/acpi_pnp.c
index 85d9f78619a2..4ad8f56d1a5d 100644
--- a/drivers/acpi/acpi_pnp.c
+++ b/drivers/acpi/acpi_pnp.c
@@ -368,7 +368,7 @@ static int is_cmos_rtc_device(struct acpi_device *adev)
 		{ "PNP0B02" },
 		{""},
 	};
-	return !acpi_match_device_ids(adev, ids);
+	return !cmos_rtc_platform_device_present && !acpi_match_device_ids(adev, ids);
 }
 
 bool acpi_is_pnp_device(struct acpi_device *adev)
diff --git a/drivers/acpi/x86/cmos_rtc.c b/drivers/acpi/x86/cmos_rtc.c
index bdd66dfd4a44..a6df5b991c96 100644
--- a/drivers/acpi/x86/cmos_rtc.c
+++ b/drivers/acpi/x86/cmos_rtc.c
@@ -18,10 +18,7 @@
 #include "../internal.h"
 
 static const struct acpi_device_id acpi_cmos_rtc_ids[] = {
-	{ "PNP0B00" },
-	{ "PNP0B01" },
-	{ "PNP0B02" },
-	{}
+	ACPI_CMOS_RTC_IDS
 };
 
 bool cmos_rtc_platform_device_present;
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 0743c6acd6e2..7457f42fd6f0 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -27,6 +27,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/acpi.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -1476,6 +1477,14 @@ static __init void cmos_of_init(struct platform_device *pdev)
 #else
 static inline void cmos_of_init(struct platform_device *pdev) {}
 #endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id acpi_cmos_rtc_ids[] = {
+	ACPI_CMOS_RTC_IDS
+};
+MODULE_DEVICE_TABLE(acpi, acpi_cmos_rtc_ids);
+#endif
+
 /*----------------------------------------------------------------*/
 
 /* Platform setup should have set up an RTC device, when PNP is
@@ -1530,6 +1539,7 @@ static struct platform_driver cmos_platform_driver = {
 		.name		= driver_name,
 		.pm		= &cmos_pm_ops,
 		.of_match_table = of_match_ptr(of_cmos_match),
+		.acpi_match_table = ACPI_PTR(acpi_cmos_rtc_ids),
 	}
 };
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 2bdb801cee01..5ecdcdaf31aa 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -791,6 +791,12 @@ const char *acpi_get_subsystem_id(acpi_handle handle);
 int acpi_mrrm_max_mem_region(void);
 #endif
 
+#define ACPI_CMOS_RTC_IDS	\
+	{ "PNP0B00", },		\
+	{ "PNP0B01", },		\
+	{ "PNP0B02", },		\
+	{ "", }
+
 extern bool cmos_rtc_platform_device_present;
 
 #else	/* !CONFIG_ACPI */
-- 
cgit v1.2.3


From c8e9b1d9febc83ee94944695a07cfd40a1b29743 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 25 Feb 2026 21:12:20 -0800
Subject: dmaengine: fsl-edma: fix all kernel-doc warnings

Use the correct kernel-doc format and struct member names to eliminate
these kernel-doc warnings:

Warning: include/linux/platform_data/dma-mcf-edma.h:35 struct member
 'dma_channels' not described in 'mcf_edma_platform_data'
Warning: include/linux/platform_data/dma-mcf-edma.h:35 struct member
 'slave_map' not described in 'mcf_edma_platform_data'
Warning: include/linux/platform_data/dma-mcf-edma.h:35 struct member
 'slavecnt' not described in 'mcf_edma_platform_data'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260226051220.548566-1-rdunlap@infradead.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/platform_data/dma-mcf-edma.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/dma-mcf-edma.h b/include/linux/platform_data/dma-mcf-edma.h
index d718ccfa3421..0b31af66a1ac 100644
--- a/include/linux/platform_data/dma-mcf-edma.h
+++ b/include/linux/platform_data/dma-mcf-edma.h
@@ -26,8 +26,9 @@ bool mcf_edma_filter_fn(struct dma_chan *chan, void *param);
 /**
  * struct mcf_edma_platform_data - platform specific data for eDMA engine
  *
- * @ver			The eDMA module version.
- * @dma_channels	The number of eDMA channels.
+ * @dma_channels:	The number of eDMA channels.
+ * @slave_map:		Slave device map
+ * @slavecnt:		Number of entries in @slave_map
  */
 struct mcf_edma_platform_data {
 	int dma_channels;
-- 
cgit v1.2.3


From b2d51bc1601c762c63f19c119589a0a0c44bc8ec Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 26 Feb 2026 10:20:23 +0100
Subject: gpio: generic: Don't use 'proxy' headers

Update header inclusions to follow IWYU (Include What You Use)
principle.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linusw@kernel.org>
Link: https://patch.msgid.link/20260226092023.4096921-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
 drivers/gpio/gpio-mmio.c     | 4 +---
 include/linux/gpio/generic.h | 8 +++++++-
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c
index edbcaad57d00..0941d034a49c 100644
--- a/drivers/gpio/gpio-mmio.c
+++ b/drivers/gpio/gpio-mmio.c
@@ -42,18 +42,16 @@ o        `                     ~~~~\___/~~~~    ` controller in FPGA is ,.`
 
 #include <linux/bitops.h>
 #include <linux/cleanup.h>
-#include <linux/compiler.h>
 #include <linux/err.h>
-#include <linux/init.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
+#include <linux/limits.h>
 #include <linux/log2.h>
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/property.h>
-#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h
index ff566dc9c3cb..de43c06c83ef 100644
--- a/include/linux/gpio/generic.h
+++ b/include/linux/gpio/generic.h
@@ -3,9 +3,15 @@
 #ifndef __LINUX_GPIO_GENERIC_H
 #define __LINUX_GPIO_GENERIC_H
 
+#include <linux/bits.h>
+#include <linux/bug.h>
 #include <linux/cleanup.h>
-#include <linux/gpio/driver.h>
+#include <linux/container_of.h>
+#include <linux/errno.h>
 #include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <linux/gpio/driver.h>
 
 struct device;
 
-- 
cgit v1.2.3


From fa4a3a95139e7293c1333a33bd7b19e7261e3bd0 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Date: Mon, 23 Feb 2026 18:20:06 +0100
Subject: gpio: introduce a header for symbols shared by suppliers and
 consumers

GPIO_LINE_DIRECTION_IN/OUT definitions are used both in supplier (GPIO
controller drivers) as well as consumer code. In order to not force the
consumers to include gpio/driver.h or - even worse - to redefine these
values, create a new header file - gpio/defs.h - and move them over
there. Include this header from both gpio/consumer.h and gpio/driver.h.

Reviewed-by: Linus Walleij <linusw@kernel.org>
Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20260223172006.204268-1-bartosz.golaszewski@oss.qualcomm.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
 include/linux/gpio/consumer.h | 2 ++
 include/linux/gpio/defs.h     | 9 +++++++++
 include/linux/gpio/driver.h   | 5 ++---
 3 files changed, 13 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/gpio/defs.h

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 0d8408582918..3efb5cb1e1d1 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -6,6 +6,8 @@
 #include <linux/err.h>
 #include <linux/types.h>
 
+#include "defs.h"
+
 struct acpi_device;
 struct device;
 struct fwnode_handle;
diff --git a/include/linux/gpio/defs.h b/include/linux/gpio/defs.h
new file mode 100644
index 000000000000..b69fd7c041b2
--- /dev/null
+++ b/include/linux/gpio/defs.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __LINUX_GPIO_DEFS_H
+#define __LINUX_GPIO_DEFS_H
+
+#define GPIO_LINE_DIRECTION_IN		1
+#define GPIO_LINE_DIRECTION_OUT		0
+
+#endif /* __LINUX_GPIO_DEFS_H */
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index fabe2baf7b50..5f5ddcbfa445 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -20,6 +20,8 @@
 #include <asm/msi.h>
 #endif
 
+#include "defs.h"
+
 struct device;
 struct irq_chip;
 struct irq_data;
@@ -42,9 +44,6 @@ union gpio_irq_fwspec {
 #endif
 };
 
-#define GPIO_LINE_DIRECTION_IN	1
-#define GPIO_LINE_DIRECTION_OUT	0
-
 /**
  * struct gpio_irq_chip - GPIO interrupt controller
  */
-- 
cgit v1.2.3


From 0a93d30861617ecf207dcc4c6c736435fac36dae Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 24 Feb 2026 17:35:42 +0100
Subject: hrtimer: Provide a static branch based hrtimer_hres_enabled()

The scheduler evaluates this via hrtimer_is_hres_active() every time it has
to update HRTICK. This needs to follow three pointers, which is expensive.

Provide a static branch based mechanism to avoid that.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163429.136503358@kernel.org
---
 include/linux/hrtimer.h | 13 +++++++++----
 kernel/time/hrtimer.c   | 28 +++++++++++++++++++++++++---
 2 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 74adbd4e7003..c9ca105ba009 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -153,17 +153,22 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
+extern unsigned int hrtimer_resolution;
 struct clock_event_device;
 
 extern void hrtimer_interrupt(struct clock_event_device *dev);
 
-extern unsigned int hrtimer_resolution;
+extern struct static_key_false hrtimer_highres_enabled_key;
 
-#else
+static inline bool hrtimer_highres_enabled(void)
+{
+	return static_branch_likely(&hrtimer_highres_enabled_key);
+}
 
+#else  /* CONFIG_HIGH_RES_TIMERS */
 #define hrtimer_resolution	(unsigned int)LOW_RES_NSEC
-
-#endif
+static inline bool hrtimer_highres_enabled(void) { return false; }
+#endif  /* !CONFIG_HIGH_RES_TIMERS */
 
 static inline ktime_t
 __hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 3088db419aa6..67917ce696d4 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -126,6 +126,25 @@ static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base)
 		return likely(base->online);
 }
 
+#ifdef CONFIG_HIGH_RES_TIMERS
+DEFINE_STATIC_KEY_FALSE(hrtimer_highres_enabled_key);
+
+static void hrtimer_hres_workfn(struct work_struct *work)
+{
+	static_branch_enable(&hrtimer_highres_enabled_key);
+}
+
+static DECLARE_WORK(hrtimer_hres_work, hrtimer_hres_workfn);
+
+static inline void hrtimer_schedule_hres_work(void)
+{
+	if (!hrtimer_highres_enabled())
+		schedule_work(&hrtimer_hres_work);
+}
+#else
+static inline void hrtimer_schedule_hres_work(void) { }
+#endif
+
 /*
  * Functions and macros which are different for UP/SMP systems are kept in a
  * single place
@@ -649,7 +668,9 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
 }
 
 /*
- * Is the high resolution mode active ?
+ * Is the high resolution mode active in the CPU base. This cannot use the
+ * static key as the CPUs are switched to high resolution mode
+ * asynchronously.
  */
 static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
 {
@@ -750,6 +771,7 @@ static void hrtimer_switch_to_hres(void)
 	tick_setup_sched_timer(true);
 	/* "Retrigger" the interrupt to get things going */
 	retrigger_next_event(NULL);
+	hrtimer_schedule_hres_work();
 }
 
 #else
@@ -947,11 +969,10 @@ static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base,
  */
 void clock_was_set(unsigned int bases)
 {
-	struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases);
 	cpumask_var_t mask;
 	int cpu;
 
-	if (!hrtimer_hres_active(cpu_base) && !tick_nohz_is_active())
+	if (!hrtimer_highres_enabled() && !tick_nohz_is_active())
 		goto out_timerfd;
 
 	if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
@@ -962,6 +983,7 @@ void clock_was_set(unsigned int bases)
 	/* Avoid interrupting CPUs if possible */
 	cpus_read_lock();
 	for_each_online_cpu(cpu) {
+		struct hrtimer_cpu_base *cpu_base;
 		unsigned long flags;
 
 		cpu_base = &per_cpu(hrtimer_bases, cpu);
-- 
cgit v1.2.3


From c3a92213eb3dd8ea6f664d16a08eda800e34eaad Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 24 Feb 2026 17:35:47 +0100
Subject: sched: Use hrtimer_highres_enabled()

Use the static branch based variant and thereby avoid following three
pointers.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163429.203610956@kernel.org
---
 include/linux/hrtimer.h |  6 ------
 kernel/sched/sched.h    | 37 +++++++++----------------------------
 2 files changed, 9 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index c9ca105ba009..b5003856fd60 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -146,12 +146,6 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
 	return ktime_sub(timer->node.expires, hrtimer_cb_get_time(timer));
 }
 
-static inline int hrtimer_is_hres_active(struct hrtimer *timer)
-{
-	return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?
-		timer->base->cpu_base->hres_active : 0;
-}
-
 #ifdef CONFIG_HIGH_RES_TIMERS
 extern unsigned int hrtimer_resolution;
 struct clock_event_device;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 73bc20c47631..0aa089dfaaa4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3019,25 +3019,19 @@ extern unsigned int sysctl_numa_balancing_hot_threshold;
  *  - enabled by features
  *  - hrtimer is actually high res
  */
-static inline int hrtick_enabled(struct rq *rq)
+static inline bool hrtick_enabled(struct rq *rq)
 {
-	if (!cpu_active(cpu_of(rq)))
-		return 0;
-	return hrtimer_is_hres_active(&rq->hrtick_timer);
+	return cpu_active(cpu_of(rq)) && hrtimer_highres_enabled();
 }
 
-static inline int hrtick_enabled_fair(struct rq *rq)
+static inline bool hrtick_enabled_fair(struct rq *rq)
 {
-	if (!sched_feat(HRTICK))
-		return 0;
-	return hrtick_enabled(rq);
+	return sched_feat(HRTICK) && hrtick_enabled(rq);
 }
 
-static inline int hrtick_enabled_dl(struct rq *rq)
+static inline bool hrtick_enabled_dl(struct rq *rq)
 {
-	if (!sched_feat(HRTICK_DL))
-		return 0;
-	return hrtick_enabled(rq);
+	return sched_feat(HRTICK_DL) && hrtick_enabled(rq);
 }
 
 extern void hrtick_start(struct rq *rq, u64 delay);
@@ -3047,22 +3041,9 @@ static inline bool hrtick_active(struct rq *rq)
 }
 
 #else /* !CONFIG_SCHED_HRTICK: */
-
-static inline int hrtick_enabled_fair(struct rq *rq)
-{
-	return 0;
-}
-
-static inline int hrtick_enabled_dl(struct rq *rq)
-{
-	return 0;
-}
-
-static inline int hrtick_enabled(struct rq *rq)
-{
-	return 0;
-}
-
+static inline bool hrtick_enabled_fair(struct rq *rq) { return false; }
+static inline bool hrtick_enabled_dl(struct rq *rq) { return false; }
+static inline bool hrtick_enabled(struct rq *rq) { return false; }
 #endif /* !CONFIG_SCHED_HRTICK */
 
 #ifndef arch_scale_freq_tick
-- 
cgit v1.2.3


From b7dd64778aa3f89de9afa1e81171cfe110ddc525 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 24 Feb 2026 17:36:01 +0100
Subject: hrtimer: Provide LAZY_REARM mode

The hrtick timer is frequently rearmed before expiry and most of the time
the new expiry is past the armed one. As this happens on every context
switch it becomes expensive with scheduling heavy work loads especially in
virtual machines as the "hardware" reprogamming implies a VM exit.

Add a lazy rearm mode flag which skips the reprogamming if:

    1) The timer was the first expiring timer before the rearm

    2) The new expiry time is farther out than the armed time

This avoids a massive amount of reprogramming operations of the hrtick
timer for the price of eventually taking the alredy armed interrupt for
nothing.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163429.408524456@kernel.org
---
 include/linux/hrtimer.h       |  8 ++++++++
 include/linux/hrtimer_types.h |  3 +++
 kernel/time/hrtimer.c         | 17 ++++++++++++++++-
 3 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index b5003856fd60..c924bb2498db 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -31,6 +31,13 @@
  *				  soft irq context
  * HRTIMER_MODE_HARD		- Timer callback function will be executed in
  *				  hard irq context even on PREEMPT_RT.
+ * HRTIMER_MODE_LAZY_REARM	- Avoid reprogramming if the timer was the
+ *				  first expiring timer and is moved into the
+ *				  future. Special mode for the HRTICK timer to
+ *				  avoid extensive reprogramming of the hardware,
+ *				  which is expensive in virtual machines. Risks
+ *				  a pointless expiry, but that's better than
+ *				  reprogramming on every context switch,
  */
 enum hrtimer_mode {
 	HRTIMER_MODE_ABS	= 0x00,
@@ -38,6 +45,7 @@ enum hrtimer_mode {
 	HRTIMER_MODE_PINNED	= 0x02,
 	HRTIMER_MODE_SOFT	= 0x04,
 	HRTIMER_MODE_HARD	= 0x08,
+	HRTIMER_MODE_LAZY_REARM	= 0x10,
 
 	HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED,
 	HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED,
diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h
index 8fbbb6bdf7a1..64381c64cdbd 100644
--- a/include/linux/hrtimer_types.h
+++ b/include/linux/hrtimer_types.h
@@ -33,6 +33,8 @@ enum hrtimer_restart {
  * @is_soft:	Set if hrtimer will be expired in soft interrupt context.
  * @is_hard:	Set if hrtimer will be expired in hard interrupt context
  *		even on RT.
+ * @is_lazy:	Set if the timer is frequently rearmed to avoid updates
+ *		of the clock event device
  *
  * The hrtimer structure must be initialized by hrtimer_setup()
  */
@@ -45,6 +47,7 @@ struct hrtimer {
 	u8				is_rel;
 	u8				is_soft;
 	u8				is_hard;
+	u8				is_lazy;
 };
 
 #endif /* _LINUX_HRTIMER_TYPES_H */
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 67917ce696d4..e54f8b59f6b4 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1152,7 +1152,7 @@ static void __remove_hrtimer(struct hrtimer *timer,
 	 * an superfluous call to hrtimer_force_reprogram() on the
 	 * remote cpu later on if the same timer gets enqueued again.
 	 */
-	if (reprogram && timer == cpu_base->next_timer)
+	if (reprogram && timer == cpu_base->next_timer && !timer->is_lazy)
 		hrtimer_force_reprogram(cpu_base, 1);
 }
 
@@ -1321,6 +1321,20 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 		return 0;
 	}
 
+	/*
+	 * Special case for the HRTICK timer. It is frequently rearmed and most
+	 * of the time moves the expiry into the future. That's expensive in
+	 * virtual machines and it's better to take the pointless already armed
+	 * interrupt than reprogramming the hardware on every context switch.
+	 *
+	 * If the new expiry is before the armed time, then reprogramming is
+	 * required.
+	 */
+	if (timer->is_lazy) {
+		if (new_base->cpu_base->expires_next <= hrtimer_get_expires(timer))
+			return 0;
+	}
+
 	/*
 	 * Timer was forced to stay on the current CPU to avoid
 	 * reprogramming on removal and enqueue. Force reprogram the
@@ -1675,6 +1689,7 @@ static void __hrtimer_setup(struct hrtimer *timer,
 	base += hrtimer_clockid_to_base(clock_id);
 	timer->is_soft = softtimer;
 	timer->is_hard = !!(mode & HRTIMER_MODE_HARD);
+	timer->is_lazy = !!(mode & HRTIMER_MODE_LAZY_REARM);
 	timer->base = &cpu_base->clock_base[base];
 	timerqueue_init(&timer->node);
 
-- 
cgit v1.2.3


From 70802807398c65f5a49b2baec87e1f6c8db43de6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 24 Feb 2026 17:36:15 +0100
Subject: clockevents: Remove redundant CLOCK_EVT_FEAT_KTIME

The only real usecase for this is the hrtimer based broadcast device.
No point in using two different feature flags for this.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163429.609049777@kernel.org
---
 include/linux/clockchips.h           | 1 -
 kernel/time/clockevents.c            | 4 ++--
 kernel/time/tick-broadcast-hrtimer.c | 1 -
 3 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index b0df28ddd394..5e8f7819f6a6 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -45,7 +45,6 @@ enum clock_event_state {
  */
 # define CLOCK_EVT_FEAT_PERIODIC	0x000001
 # define CLOCK_EVT_FEAT_ONESHOT		0x000002
-# define CLOCK_EVT_FEAT_KTIME		0x000004
 
 /*
  * x86(64) specific (mis)features:
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index eaae1ce9f060..5abaeef08e6a 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -319,8 +319,8 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
 	WARN_ONCE(!clockevent_state_oneshot(dev), "Current state: %d\n",
 		  clockevent_get_state(dev));
 
-	/* Shortcut for clockevent devices that can deal with ktime. */
-	if (dev->features & CLOCK_EVT_FEAT_KTIME)
+	/* ktime_t based reprogramming for the broadcast hrtimer device */
+	if (unlikely(dev->features & CLOCK_EVT_FEAT_HRTIMER))
 		return dev->set_next_ktime(expires, dev);
 
 	delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index a88b72b0f35e..51f6a1032c83 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -78,7 +78,6 @@ static struct clock_event_device ce_broadcast_hrtimer = {
 	.set_state_shutdown	= bc_shutdown,
 	.set_next_ktime		= bc_set_next,
 	.features		= CLOCK_EVT_FEAT_ONESHOT |
-				  CLOCK_EVT_FEAT_KTIME |
 				  CLOCK_EVT_FEAT_HRTIMER,
 	.rating			= 0,
 	.bound_on		= -1,
-- 
cgit v1.2.3


From 2e27beeb66e43f3b84aef5a07e486a5d50695c06 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 24 Feb 2026 17:36:20 +0100
Subject: timekeeping: Allow inlining clocksource::read()

On some architectures clocksource::read() boils down to a single
instruction, so the indirect function call is just a massive overhead
especially with speculative execution mitigations in effect.

Allow architectures to enable conditional inlining of that read to avoid
that by:

   - providing a static branch to switch to the inlined variant

   - disabling the branch before clocksource changes

   - enabling the branch after a clocksource change, when the clocksource
     indicates in a feature flag that it is the one which provides the
     inlined variant

This is intentionally not a static call as that would only remove the
indirect call, but not the rest of the overhead.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163429.675151545@kernel.org
---
 include/linux/clocksource.h |  2 ++
 kernel/time/Kconfig         |  3 ++
 kernel/time/timekeeping.c   | 74 +++++++++++++++++++++++++++++++++------------
 3 files changed, 60 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 65b7c41471c3..54366d5c4d19 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -149,6 +149,8 @@ struct clocksource {
 #define CLOCK_SOURCE_SUSPEND_NONSTOP		0x80
 #define CLOCK_SOURCE_RESELECT			0x100
 #define CLOCK_SOURCE_VERIFY_PERCPU		0x200
+#define CLOCK_SOURCE_CAN_INLINE_READ		0x400
+
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0)
 
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 7c6a52f7836c..07b048ba0cca 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -17,6 +17,9 @@ config ARCH_CLOCKSOURCE_DATA
 config ARCH_CLOCKSOURCE_INIT
 	bool
 
+config ARCH_WANTS_CLOCKSOURCE_READ_INLINE
+	bool
+
 # Timekeeping vsyscall support
 config GENERIC_TIME_VSYSCALL
 	bool
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 91fa2003351c..63aa31f02ebc 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -3,34 +3,30 @@
  *  Kernel timekeeping code and accessor functions. Based on code from
  *  timer.c, moved in commit 8524070b7982.
  */
-#include <linux/timekeeper_internal.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
+#include <linux/audit.h>
+#include <linux/clocksource.h>
+#include <linux/compiler.h>
+#include <linux/jiffies.h>
 #include <linux/kobject.h>
-#include <linux/percpu.h>
-#include <linux/init.h>
-#include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/nmi.h>
-#include <linux/sched.h>
-#include <linux/sched/loadavg.h>
+#include <linux/pvclock_gtod.h>
+#include <linux/random.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/loadavg.h>
+#include <linux/static_key.h>
+#include <linux/stop_machine.h>
 #include <linux/syscore_ops.h>
-#include <linux/clocksource.h>
-#include <linux/jiffies.h>
+#include <linux/tick.h>
 #include <linux/time.h>
 #include <linux/timex.h>
-#include <linux/tick.h>
-#include <linux/stop_machine.h>
-#include <linux/pvclock_gtod.h>
-#include <linux/compiler.h>
-#include <linux/audit.h>
-#include <linux/random.h>
+#include <linux/timekeeper_internal.h>
 
 #include <vdso/auxclock.h>
 
 #include "tick-internal.h"
-#include "ntp_internal.h"
 #include "timekeeping_internal.h"
+#include "ntp_internal.h"
 
 #define TK_CLEAR_NTP		(1 << 0)
 #define TK_CLOCK_WAS_SET	(1 << 1)
@@ -275,6 +271,11 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 	tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
 }
 
+#ifdef CONFIG_ARCH_WANTS_CLOCKSOURCE_READ_INLINE
+#include <asm/clock_inlined.h>
+
+static DEFINE_STATIC_KEY_FALSE(clocksource_read_inlined);
+
 /*
  * tk_clock_read - atomic clocksource read() helper
  *
@@ -288,13 +289,36 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
  * a read of the fast-timekeeper tkrs (which is protected by its own locking
  * and update logic).
  */
-static inline u64 tk_clock_read(const struct tk_read_base *tkr)
+static __always_inline u64 tk_clock_read(const struct tk_read_base *tkr)
 {
 	struct clocksource *clock = READ_ONCE(tkr->clock);
 
+	if (static_branch_likely(&clocksource_read_inlined))
+		return arch_inlined_clocksource_read(clock);
+
 	return clock->read(clock);
 }
 
+static inline void clocksource_disable_inline_read(void)
+{
+	static_branch_disable(&clocksource_read_inlined);
+}
+
+static inline void clocksource_enable_inline_read(void)
+{
+	static_branch_enable(&clocksource_read_inlined);
+}
+#else
+static __always_inline u64 tk_clock_read(const struct tk_read_base *tkr)
+{
+	struct clocksource *clock = READ_ONCE(tkr->clock);
+
+	return clock->read(clock);
+}
+static inline void clocksource_disable_inline_read(void) { }
+static inline void clocksource_enable_inline_read(void) { }
+#endif
+
 /**
  * tk_setup_internals - Set up internals to use clocksource clock.
  *
@@ -375,7 +399,7 @@ static noinline u64 delta_to_ns_safe(const struct tk_read_base *tkr, u64 delta)
 	return mul_u64_u32_add_u64_shr(delta, tkr->mult, tkr->xtime_nsec, tkr->shift);
 }
 
-static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
+static __always_inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
 {
 	/* Calculate the delta since the last update_wall_time() */
 	u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask;
@@ -1631,7 +1655,19 @@ int timekeeping_notify(struct clocksource *clock)
 
 	if (tk->tkr_mono.clock == clock)
 		return 0;
+
+	/* Disable inlined reads accross the clocksource switch */
+	clocksource_disable_inline_read();
+
 	stop_machine(change_clocksource, clock, NULL);
+
+	/*
+	 * If the clocksource has been selected and supports inlined reads
+	 * enable the branch.
+	 */
+	if (tk->tkr_mono.clock == clock && clock->flags & CLOCK_SOURCE_CAN_INLINE_READ)
+		clocksource_enable_inline_read();
+
 	tick_clock_notify();
 	return tk->tkr_mono.clock == clock ? 0 : -1;
 }
-- 
cgit v1.2.3


From cd38bdb8e696a1a1eb12fc6662a6e420977aacfd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 24 Feb 2026 17:36:40 +0100
Subject: timekeeping: Provide infrastructure for coupled clockevents

Some architectures have clockevent devices which are coupled to the system
clocksource by implementing a less than or equal comparator which compares
the programmed absolute expiry time against the underlying time
counter. Well known examples are TSC/TSC deadline timer and the S390 TOD
clocksource/comparator.

While the concept is nice it has some downsides:

  1) The clockevents core code is strictly based on relative expiry times
     as that's the most common case for clockevent device hardware. That
     requires to convert the absolute expiry time provided by the caller
     (hrtimers, NOHZ code) to a relative expiry time by reading and
     substracting the current time.

     The clockevent::set_next_event() callback must then read the counter
     again to convert the relative expiry back into a absolute one.

  2) The conversion factors from nanoseconds to counter clock cycles are
     set up when the clockevent is registered. When NTP applies corrections
     then the clockevent conversion factors can deviate from the
     clocksource conversion substantially which either results in timers
     firing late or in the worst case early. The early expiry then needs to
     do a reprogam with a short delta.

     In most cases this is papered over by the fact that the read in the
     set_next_event() callback happens after the read which is used to
     calculate the delta. So the tendency is that timers expire mostly
     late.

All of this can be avoided by providing support for these devices in the
core code:

  1) The timekeeping core keeps track of the last update to the clocksource
     by storing the base nanoseconds and the corresponding clocksource
     counter value. That's used to keep the conversion math for reading the
     time within 64-bit in the common case.

     This information can be used to avoid both reads of the underlying
     clocksource in the clockevents reprogramming path:

     delta = expiry - base_ns;
     cycles = base_cycles + ((delta * clockevent::mult) >> clockevent::shift);

     The resulting cycles value can be directly used to program the
     comparator.

  2) As #1 does not longer provide the "compensation" through the second
     read the deviation of the clocksource and clockevent conversions
     caused by NTP become more prominent.

     This can be cured by letting the timekeeping core compute and store
     the reverse conversion factors when the clocksource cycles to
     nanoseconds factors are modified by NTP:

         CS::MULT      (1 << NS_TO_CYC_SHIFT)
     --------------- = ----------------------
     (1 << CS:SHIFT)       NS_TO_CYC_MULT

     Ergo: NS_TO_CYC_MULT = (1 << (CS::SHIFT + NS_TO_CYC_SHIFT)) / CS::MULT

     The NS_TO_CYC_SHIFT value is calculated when the clocksource is
     installed so that it aims for a one hour maximum sleep time.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163429.944763521@kernel.org
---
 include/linux/clocksource.h         |   1 +
 include/linux/timekeeper_internal.h |   8 +++
 kernel/time/Kconfig                 |   3 +
 kernel/time/timekeeping.c           | 110 ++++++++++++++++++++++++++++++++++++
 kernel/time/timekeeping.h           |   2 +
 5 files changed, 124 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 54366d5c4d19..25774fc5b53d 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -150,6 +150,7 @@ struct clocksource {
 #define CLOCK_SOURCE_RESELECT			0x100
 #define CLOCK_SOURCE_VERIFY_PERCPU		0x200
 #define CLOCK_SOURCE_CAN_INLINE_READ		0x400
+#define CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT	0x800
 
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0)
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index b8ae89ea28ab..e36d11e33e0c 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -72,6 +72,10 @@ struct tk_read_base {
  * @id:				The timekeeper ID
  * @tkr_raw:			The readout base structure for CLOCK_MONOTONIC_RAW
  * @raw_sec:			CLOCK_MONOTONIC_RAW  time in seconds
+ * @cs_id:			The ID of the current clocksource
+ * @cs_ns_to_cyc_mult:		Multiplicator for nanoseconds to cycles conversion
+ * @cs_ns_to_cyc_shift:		Shift value for nanoseconds to cycles conversion
+ * @cs_ns_to_cyc_maxns:		Maximum nanoseconds to cyles conversion range
  * @clock_was_set_seq:		The sequence number of clock was set events
  * @cs_was_changed_seq:		The sequence number of clocksource change events
  * @clock_valid:		Indicator for valid clock
@@ -159,6 +163,10 @@ struct timekeeper {
 	u64			raw_sec;
 
 	/* Cachline 3 and 4 (timekeeping internal variables): */
+	enum clocksource_ids	cs_id;
+	u32			cs_ns_to_cyc_mult;
+	u32			cs_ns_to_cyc_shift;
+	u64			cs_ns_to_cyc_maxns;
 	unsigned int		clock_was_set_seq;
 	u8			cs_was_changed_seq;
 	u8			clock_valid;
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 07b048ba0cca..b51bc5625129 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -47,6 +47,9 @@ config GENERIC_CLOCKEVENTS_BROADCAST_IDLE
 config GENERIC_CLOCKEVENTS_MIN_ADJUST
 	bool
 
+config GENERIC_CLOCKEVENTS_COUPLED
+	bool
+
 # Generic update of CMOS clock
 config GENERIC_CMOS_UPDATE
 	bool
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 63aa31f02ebc..b7a0f93011e0 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -391,6 +391,20 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	tk->tkr_raw.mult = clock->mult;
 	tk->ntp_err_mult = 0;
 	tk->skip_second_overflow = 0;
+
+	tk->cs_id = clock->id;
+
+	/* Coupled clockevent data */
+	if (IS_ENABLED(CONFIG_GENERIC_CLOCKEVENTS_COUPLED) &&
+	    clock->flags & CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT) {
+		/*
+		 * Aim for an one hour maximum delta and use KHz to handle
+		 * clocksources with a frequency above 4GHz correctly as
+		 * the frequency argument of clocks_calc_mult_shift() is u32.
+		 */
+		clocks_calc_mult_shift(&tk->cs_ns_to_cyc_mult, &tk->cs_ns_to_cyc_shift,
+				       NSEC_PER_MSEC, clock->freq_khz, 3600 * 1000);
+	}
 }
 
 /* Timekeeper helper functions. */
@@ -720,6 +734,36 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
 	tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
 }
 
+static inline void tk_update_ns_to_cyc(struct timekeeper *tks, struct timekeeper *tkc)
+{
+	struct tk_read_base *tkrs = &tks->tkr_mono;
+	struct tk_read_base *tkrc = &tkc->tkr_mono;
+	unsigned int shift;
+
+	if (!IS_ENABLED(CONFIG_GENERIC_CLOCKEVENTS_COUPLED) ||
+	    !(tkrs->clock->flags & CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT))
+		return;
+
+	if (tkrs->mult == tkrc->mult && tkrs->shift == tkrc->shift)
+		return;
+	/*
+	 * The conversion math is simple:
+	 *
+	 *      CS::MULT       (1 << NS_TO_CYC_SHIFT)
+	 *   --------------- = ----------------------
+	 *   (1 << CS:SHIFT)       NS_TO_CYC_MULT
+	 *
+	 * Ergo:
+	 *
+	 *   NS_TO_CYC_MULT = (1 << (CS::SHIFT + NS_TO_CYC_SHIFT)) / CS::MULT
+	 *
+	 * NS_TO_CYC_SHIFT has been set up in tk_setup_internals()
+	 */
+	shift = tkrs->shift + tks->cs_ns_to_cyc_shift;
+	tks->cs_ns_to_cyc_mult = (u32)div_u64(1ULL << shift, tkrs->mult);
+	tks->cs_ns_to_cyc_maxns = div_u64(tkrs->clock->mask, tks->cs_ns_to_cyc_mult);
+}
+
 /*
  * Restore the shadow timekeeper from the real timekeeper.
  */
@@ -754,6 +798,7 @@ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int act
 	tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real;
 
 	if (tk->id == TIMEKEEPER_CORE) {
+		tk_update_ns_to_cyc(tk, &tkd->timekeeper);
 		update_vsyscall(tk);
 		update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
 
@@ -808,6 +853,71 @@ static void timekeeping_forward_now(struct timekeeper *tk)
 	tk_update_coarse_nsecs(tk);
 }
 
+/*
+ * ktime_expiry_to_cycles - Convert a expiry time to clocksource cycles
+ * @id:		Clocksource ID which is required for validity
+ * @expires_ns:	Absolute CLOCK_MONOTONIC expiry time (nsecs) to be converted
+ * @cycles:	Pointer to storage for corresponding absolute cycles value
+ *
+ * Convert a CLOCK_MONOTONIC based absolute expiry time to a cycles value
+ * based on the correlated clocksource of the clockevent device by using
+ * the base nanoseconds and cycles values of the last timekeeper update and
+ * converting the delta between @expires_ns and base nanoseconds to cycles.
+ *
+ * This only works for clockevent devices which are using a less than or
+ * equal comparator against the clocksource.
+ *
+ * Utilizing this avoids two clocksource reads for such devices, the
+ * ktime_get() in clockevents_program_event() to calculate the delta expiry
+ * value and the readout in the device::set_next_event() callback to
+ * convert the delta back to a absolute comparator value.
+ *
+ * Returns: True if @id matches the current clocksource ID, false otherwise
+ */
+bool ktime_expiry_to_cycles(enum clocksource_ids id, ktime_t expires_ns, u64 *cycles)
+{
+	struct timekeeper *tk = &tk_core.timekeeper;
+	struct tk_read_base *tkrm = &tk->tkr_mono;
+	ktime_t base_ns, delta_ns, max_ns;
+	u64 base_cycles, delta_cycles;
+	unsigned int seq;
+	u32 mult, shift;
+
+	/*
+	 * Racy check to avoid the seqcount overhead when ID does not match. If
+	 * the relevant clocksource is installed concurrently, then this will
+	 * just delay the switch over to this mechanism until the next event is
+	 * programmed. If the ID is not matching the clock events code will use
+	 * the regular relative set_next_event() callback as before.
+	 */
+	if (data_race(tk->cs_id) != id)
+		return false;
+
+	do {
+		seq = read_seqcount_begin(&tk_core.seq);
+
+		if (tk->cs_id != id)
+			return false;
+
+		base_cycles = tkrm->cycle_last;
+		base_ns = tkrm->base + (tkrm->xtime_nsec >> tkrm->shift);
+
+		mult = tk->cs_ns_to_cyc_mult;
+		shift = tk->cs_ns_to_cyc_shift;
+		max_ns = tk->cs_ns_to_cyc_maxns;
+
+	} while (read_seqcount_retry(&tk_core.seq, seq));
+
+	/* Prevent negative deltas and multiplication overflows */
+	delta_ns = min(expires_ns - base_ns, max_ns);
+	delta_ns = max(delta_ns, 0);
+
+	/* Convert to cycles */
+	delta_cycles = ((u64)delta_ns * mult) >> shift;
+	*cycles = base_cycles + delta_cycles;
+	return true;
+}
+
 /**
  * ktime_get_real_ts64 - Returns the time of day in a timespec64.
  * @ts:		pointer to the timespec to be set
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index 543beba096c7..198d0608db74 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -9,6 +9,8 @@ extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq,
 					    ktime_t *offs_boot,
 					    ktime_t *offs_tai);
 
+bool ktime_expiry_to_cycles(enum clocksource_ids id, ktime_t expires_ns, u64 *cycles);
+
 extern int timekeeping_valid_for_hres(void);
 extern u64 timekeeping_max_deferment(void);
 extern void timekeeping_warp_clock(void);
-- 
cgit v1.2.3


From 89f951a1e8ad781e7ac70eccddab0e0c270485f9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 24 Feb 2026 17:36:45 +0100
Subject: clockevents: Provide support for clocksource coupled comparators

Some clockevent devices are coupled to the system clocksource by
implementing a less than or equal comparator which compares the programmed
absolute expiry time against the underlying time counter.

The timekeeping core provides a function to convert and absolute
CLOCK_MONOTONIC based expiry time to a absolute clock cycles time which can
be directly fed into the comparator. That spares two time reads in the next
event progamming path, one to convert the absolute nanoseconds time to a
delta value and the other to convert the delta value back to a absolute
time value suitable for the comparator.

Provide a new clocksource callback which takes the absolute cycle value and
wire it up in clockevents_program_event(). Similar to clocksources allow
architectures to inline the rearm operation.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163430.010425428@kernel.org
---
 include/linux/clockchips.h |  7 +++++--
 kernel/time/Kconfig        |  4 ++++
 kernel/time/clockevents.c  | 44 +++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 48 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 5e8f7819f6a6..92d90220c0d4 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -43,8 +43,9 @@ enum clock_event_state {
 /*
  * Clock event features
  */
-# define CLOCK_EVT_FEAT_PERIODIC	0x000001
-# define CLOCK_EVT_FEAT_ONESHOT		0x000002
+# define CLOCK_EVT_FEAT_PERIODIC		0x000001
+# define CLOCK_EVT_FEAT_ONESHOT			0x000002
+# define CLOCK_EVT_FEAT_CLOCKSOURCE_COUPLED	0x000004
 
 /*
  * x86(64) specific (mis)features:
@@ -100,6 +101,7 @@ struct clock_event_device {
 	void			(*event_handler)(struct clock_event_device *);
 	int			(*set_next_event)(unsigned long evt, struct clock_event_device *);
 	int			(*set_next_ktime)(ktime_t expires, struct clock_event_device *);
+	void			(*set_next_coupled)(u64 cycles, struct clock_event_device *);
 	ktime_t			next_event;
 	u64			max_delta_ns;
 	u64			min_delta_ns;
@@ -107,6 +109,7 @@ struct clock_event_device {
 	u32			shift;
 	enum clock_event_state	state_use_accessors;
 	unsigned int		features;
+	enum clocksource_ids	cs_id;
 	unsigned long		retries;
 
 	int			(*set_state_periodic)(struct clock_event_device *);
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index b51bc5625129..e1968ab8b37f 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -50,6 +50,10 @@ config GENERIC_CLOCKEVENTS_MIN_ADJUST
 config GENERIC_CLOCKEVENTS_COUPLED
 	bool
 
+config GENERIC_CLOCKEVENTS_COUPLED_INLINE
+	select GENERIC_CLOCKEVENTS_COUPLED
+	bool
+
 # Generic update of CMOS clock
 config GENERIC_CMOS_UPDATE
 	bool
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 5abaeef08e6a..83712aa1d385 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -292,6 +292,38 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
 
 #endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_COUPLED
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_COUPLED_INLINE
+#include <asm/clock_inlined.h>
+#else
+static __always_inline void
+arch_inlined_clockevent_set_next_coupled(u64 u64 cycles, struct clock_event_device *dev) { }
+#endif
+
+static inline bool clockevent_set_next_coupled(struct clock_event_device *dev, ktime_t expires)
+{
+	u64 cycles;
+
+	if (unlikely(!(dev->features & CLOCK_EVT_FEAT_CLOCKSOURCE_COUPLED)))
+		return false;
+
+	if (unlikely(!ktime_expiry_to_cycles(dev->cs_id, expires, &cycles)))
+		return false;
+
+	if (IS_ENABLED(CONFIG_GENERIC_CLOCKEVENTS_COUPLED_INLINE))
+		arch_inlined_clockevent_set_next_coupled(cycles, dev);
+	else
+		dev->set_next_coupled(cycles, dev);
+	return true;
+}
+
+#else
+static inline bool clockevent_set_next_coupled(struct clock_event_device *dev, ktime_t expires)
+{
+	return false;
+}
+#endif
+
 /**
  * clockevents_program_event - Reprogram the clock event device.
  * @dev:	device to program
@@ -300,11 +332,10 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
  *
  * Returns 0 on success, -ETIME when the event is in the past.
  */
-int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
-			      bool force)
+int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, bool force)
 {
-	unsigned long long clc;
 	int64_t delta;
+	u64 cycles;
 	int rc;
 
 	if (WARN_ON_ONCE(expires < 0))
@@ -323,6 +354,9 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
 	if (unlikely(dev->features & CLOCK_EVT_FEAT_HRTIMER))
 		return dev->set_next_ktime(expires, dev);
 
+	if (likely(clockevent_set_next_coupled(dev, expires)))
+		return 0;
+
 	delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
 	if (delta <= 0)
 		return force ? clockevents_program_min_delta(dev) : -ETIME;
@@ -330,8 +364,8 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
 	delta = min(delta, (int64_t) dev->max_delta_ns);
 	delta = max(delta, (int64_t) dev->min_delta_ns);
 
-	clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
-	rc = dev->set_next_event((unsigned long) clc, dev);
+	cycles = ((u64)delta * dev->mult) >> dev->shift;
+	rc = dev->set_next_event((unsigned long) cycles, dev);
 
 	return (rc && force) ? clockevents_program_min_delta(dev) : rc;
 }
-- 
cgit v1.2.3


From 7d27eafe54659d19cef10dab4520cbcdfb17b0e3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 24 Feb 2026 17:37:18 +0100
Subject: hrtimer: Replace the bitfield in hrtimer_cpu_base

Use bool for the various flags as that creates better code in the hot path.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163430.475262618@kernel.org
---
 include/linux/hrtimer_defs.h | 10 +++++-----
 kernel/time/hrtimer.c        | 25 +++++++++++++------------
 2 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index 02b010df6570..f9fbf9a48f59 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -83,11 +83,11 @@ struct hrtimer_cpu_base {
 	unsigned int			cpu;
 	unsigned int			active_bases;
 	unsigned int			clock_was_set_seq;
-	unsigned int			hres_active		: 1,
-					in_hrtirq		: 1,
-					hang_detected		: 1,
-					softirq_activated       : 1,
-					online			: 1;
+	bool				hres_active;
+	bool				in_hrtirq;
+	bool				hang_detected;
+	bool				softirq_activated;
+	bool				online;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	unsigned int			nr_events;
 	unsigned short			nr_retries;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index e6f02e980371..3b80a4453ee6 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -741,7 +741,7 @@ static void hrtimer_switch_to_hres(void)
 		pr_warn("Could not switch to high resolution mode on CPU %u\n",	base->cpu);
 		return;
 	}
-	base->hres_active = 1;
+	base->hres_active = true;
 	hrtimer_resolution = HIGH_RES_NSEC;
 
 	tick_setup_sched_timer(true);
@@ -1854,7 +1854,7 @@ static __latent_entropy void hrtimer_run_softirq(void)
 	now = hrtimer_update_base(cpu_base);
 	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT);
 
-	cpu_base->softirq_activated = 0;
+	cpu_base->softirq_activated = false;
 	hrtimer_update_softirq_timer(cpu_base, true);
 
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
@@ -1881,7 +1881,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 	entry_time = now = hrtimer_update_base(cpu_base);
 retry:
-	cpu_base->in_hrtirq = 1;
+	cpu_base->in_hrtirq = true;
 	/*
 	 * Set expires_next to KTIME_MAX, which prevents that remote CPUs queue
 	 * timers while __hrtimer_run_queues() is expiring the clock bases.
@@ -1892,7 +1892,7 @@ retry:
 
 	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
 		cpu_base->softirq_expires_next = KTIME_MAX;
-		cpu_base->softirq_activated = 1;
+		cpu_base->softirq_activated = true;
 		raise_timer_softirq(HRTIMER_SOFTIRQ);
 	}
 
@@ -1905,12 +1905,12 @@ retry:
 	 * against it.
 	 */
 	cpu_base->expires_next = expires_next;
-	cpu_base->in_hrtirq = 0;
+	cpu_base->in_hrtirq = false;
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 
 	/* Reprogramming necessary ? */
 	if (!tick_program_event(expires_next, 0)) {
-		cpu_base->hang_detected = 0;
+		cpu_base->hang_detected = false;
 		return;
 	}
 
@@ -1939,7 +1939,7 @@ retry:
 	 * time away.
 	 */
 	cpu_base->nr_hangs++;
-	cpu_base->hang_detected = 1;
+	cpu_base->hang_detected = true;
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 
 	delta = ktime_sub(now, entry_time);
@@ -1987,7 +1987,7 @@ void hrtimer_run_queues(void)
 
 	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
 		cpu_base->softirq_expires_next = KTIME_MAX;
-		cpu_base->softirq_activated = 1;
+		cpu_base->softirq_activated = true;
 		raise_timer_softirq(HRTIMER_SOFTIRQ);
 	}
 
@@ -2239,13 +2239,14 @@ int hrtimers_cpu_starting(unsigned int cpu)
 
 	/* Clear out any left over state from a CPU down operation */
 	cpu_base->active_bases = 0;
-	cpu_base->hres_active = 0;
-	cpu_base->hang_detected = 0;
+	cpu_base->hres_active = false;
+	cpu_base->hang_detected = false;
 	cpu_base->next_timer = NULL;
 	cpu_base->softirq_next_timer = NULL;
 	cpu_base->expires_next = KTIME_MAX;
 	cpu_base->softirq_expires_next = KTIME_MAX;
-	cpu_base->online = 1;
+	cpu_base->softirq_activated = false;
+	cpu_base->online = true;
 	return 0;
 }
 
@@ -2303,7 +2304,7 @@ int hrtimers_cpu_dying(unsigned int dying_cpu)
 	smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
 
 	raw_spin_unlock(&new_base->lock);
-	old_base->online = 0;
+	old_base->online = false;
 	raw_spin_unlock(&old_base->lock);
 
 	return 0;
-- 
cgit v1.2.3


From 22f011be7aaa77ca8f502b9dd07b7334f9965d18 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 24 Feb 2026 17:37:23 +0100
Subject: hrtimer: Convert state and properties to boolean

All 'u8' flags are true booleans, so make it entirely clear that these can
only contain true or false.

This is especially true for hrtimer::state, which has a historical leftover
of using the state with bitwise operations. That was used in the early
hrtimer implementation with several bits, but then converted to a boolean
state. But that conversion missed to replace the bit OR and bit check
operations all over the place, which creates suboptimal code. As of today
'state' is a misnomer because it's only purpose is to reflect whether the
timer is enqueued into the RB-tree or not. Rename it to 'is_queued' and
make all operations on it boolean.

This reduces text size from 8926 to 8732 bytes.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163430.542427240@kernel.org
---
 include/linux/hrtimer.h       | 31 ++---------------------
 include/linux/hrtimer_types.h | 12 ++++-----
 kernel/time/hrtimer.c         | 58 +++++++++++++++++++++++++++++--------------
 kernel/time/timer_list.c      |  2 +-
 4 files changed, 49 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index c924bb2498db..4ad4a454b4c5 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -63,33 +63,6 @@ enum hrtimer_mode {
 	HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD,
 };
 
-/*
- * Values to track state of the timer
- *
- * Possible states:
- *
- * 0x00		inactive
- * 0x01		enqueued into rbtree
- *
- * The callback state is not part of the timer->state because clearing it would
- * mean touching the timer after the callback, this makes it impossible to free
- * the timer from the callback function.
- *
- * Therefore we track the callback state in:
- *
- *	timer->base->cpu_base->running == timer
- *
- * On SMP it is possible to have a "callback function running and enqueued"
- * status. It happens for example when a posix timer expired and the callback
- * queued a signal. Between dropping the lock which protects the posix timer
- * and reacquiring the base lock of the hrtimer, another CPU can deliver the
- * signal and rearm the timer.
- *
- * All state transitions are protected by cpu_base->lock.
- */
-#define HRTIMER_STATE_INACTIVE	0x00
-#define HRTIMER_STATE_ENQUEUED	0x01
-
 /**
  * struct hrtimer_sleeper - simple sleeper structure
  * @timer:	embedded timer structure
@@ -300,8 +273,8 @@ extern bool hrtimer_active(const struct hrtimer *timer);
  */
 static inline bool hrtimer_is_queued(struct hrtimer *timer)
 {
-	/* The READ_ONCE pairs with the update functions of timer->state */
-	return !!(READ_ONCE(timer->state) & HRTIMER_STATE_ENQUEUED);
+	/* The READ_ONCE pairs with the update functions of timer->is_queued */
+	return READ_ONCE(timer->is_queued);
 }
 
 /*
diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h
index 64381c64cdbd..0e22bc91d00f 100644
--- a/include/linux/hrtimer_types.h
+++ b/include/linux/hrtimer_types.h
@@ -28,7 +28,7 @@ enum hrtimer_restart {
  *		was armed.
  * @function:	timer expiry callback function
  * @base:	pointer to the timer base (per cpu and per clock)
- * @state:	state information (See bit values above)
+ * @is_queued:	Indicates whether a timer is enqueued or not
  * @is_rel:	Set if the timer was armed relative
  * @is_soft:	Set if hrtimer will be expired in soft interrupt context.
  * @is_hard:	Set if hrtimer will be expired in hard interrupt context
@@ -43,11 +43,11 @@ struct hrtimer {
 	ktime_t				_softexpires;
 	enum hrtimer_restart		(*__private function)(struct hrtimer *);
 	struct hrtimer_clock_base	*base;
-	u8				state;
-	u8				is_rel;
-	u8				is_soft;
-	u8				is_hard;
-	u8				is_lazy;
+	bool				is_queued;
+	bool				is_rel;
+	bool				is_soft;
+	bool				is_hard;
+	bool				is_lazy;
 };
 
 #endif /* _LINUX_HRTIMER_TYPES_H */
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 3b80a4453ee6..6bab3b7eb0de 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -49,6 +49,28 @@
 
 #include "tick-internal.h"
 
+/*
+ * Constants to set the queued state of the timer (INACTIVE, ENQUEUED)
+ *
+ * The callback state is kept separate in the CPU base because having it in
+ * the timer would required touching the timer after the callback, which
+ * makes it impossible to free the timer from the callback function.
+ *
+ * Therefore we track the callback state in:
+ *
+ *	timer->base->cpu_base->running == timer
+ *
+ * On SMP it is possible to have a "callback function running and enqueued"
+ * status. It happens for example when a posix timer expired and the callback
+ * queued a signal. Between dropping the lock which protects the posix timer
+ * and reacquiring the base lock of the hrtimer, another CPU can deliver the
+ * signal and rearm the timer.
+ *
+ * All state transitions are protected by cpu_base->lock.
+ */
+#define HRTIMER_STATE_INACTIVE	false
+#define HRTIMER_STATE_ENQUEUED	true
+
 /*
  * The resolution of the clocks. The resolution value is returned in
  * the clock_getres() system call to give application programmers an
@@ -1038,7 +1060,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
 	if (delta < 0)
 		return 0;
 
-	if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED))
+	if (WARN_ON(timer->is_queued))
 		return 0;
 
 	if (interval < hrtimer_resolution)
@@ -1082,7 +1104,7 @@ static bool enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *ba
 	base->cpu_base->active_bases |= 1 << base->index;
 
 	/* Pairs with the lockless read in hrtimer_is_queued() */
-	WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED);
+	WRITE_ONCE(timer->is_queued, HRTIMER_STATE_ENQUEUED);
 
 	return timerqueue_add(&base->active, &timer->node);
 }
@@ -1096,18 +1118,18 @@ static bool enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *ba
  * anyway (e.g. timer interrupt)
  */
 static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,
-			     u8 newstate, bool reprogram)
+			     bool newstate, bool reprogram)
 {
 	struct hrtimer_cpu_base *cpu_base = base->cpu_base;
-	u8 state = timer->state;
 
 	lockdep_assert_held(&cpu_base->lock);
 
-	/* Pairs with the lockless read in hrtimer_is_queued() */
-	WRITE_ONCE(timer->state, newstate);
-	if (!(state & HRTIMER_STATE_ENQUEUED))
+	if (!timer->is_queued)
 		return;
 
+	/* Pairs with the lockless read in hrtimer_is_queued() */
+	WRITE_ONCE(timer->is_queued, newstate);
+
 	if (!timerqueue_del(&base->active, &timer->node))
 		cpu_base->active_bases &= ~(1 << base->index);
 
@@ -1127,11 +1149,11 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *b
 static inline bool remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,
 				 bool restart, bool keep_local)
 {
-	u8 state = timer->state;
+	bool queued_state = timer->is_queued;
 
 	lockdep_assert_held(&base->cpu_base->lock);
 
-	if (state & HRTIMER_STATE_ENQUEUED) {
+	if (queued_state) {
 		bool reprogram;
 
 		debug_hrtimer_deactivate(timer);
@@ -1153,11 +1175,11 @@ static inline bool remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_ba
 		 * and a moment later when it's requeued).
 		 */
 		if (!restart)
-			state = HRTIMER_STATE_INACTIVE;
+			queued_state = HRTIMER_STATE_INACTIVE;
 		else
 			reprogram &= !keep_local;
 
-		__remove_hrtimer(timer, base, state, reprogram);
+		__remove_hrtimer(timer, base, queued_state, reprogram);
 		return true;
 	}
 	return false;
@@ -1704,7 +1726,7 @@ bool hrtimer_active(const struct hrtimer *timer)
 		base = READ_ONCE(timer->base);
 		seq = raw_read_seqcount_begin(&base->seq);
 
-		if (timer->state != HRTIMER_STATE_INACTIVE || base->running == timer)
+		if (timer->is_queued || base->running == timer)
 			return true;
 
 	} while (read_seqcount_retry(&base->seq, seq) || base != READ_ONCE(timer->base));
@@ -1721,7 +1743,7 @@ EXPORT_SYMBOL_GPL(hrtimer_active);
  *  - callback:	the timer is being ran
  *  - post:	the timer is inactive or (re)queued
  *
- * On the read side we ensure we observe timer->state and cpu_base->running
+ * On the read side we ensure we observe timer->is_queued and cpu_base->running
  * from the same section, if anything changed while we looked at it, we retry.
  * This includes timer->base changing because sequence numbers alone are
  * insufficient for that.
@@ -1744,11 +1766,11 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, struct hrtimer_cloc
 	base->running = timer;
 
 	/*
-	 * Separate the ->running assignment from the ->state assignment.
+	 * Separate the ->running assignment from the ->is_queued assignment.
 	 *
 	 * As with a regular write barrier, this ensures the read side in
 	 * hrtimer_active() cannot observe base->running == NULL &&
-	 * timer->state == INACTIVE.
+	 * timer->is_queued == INACTIVE.
 	 */
 	raw_write_seqcount_barrier(&base->seq);
 
@@ -1787,15 +1809,15 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, struct hrtimer_cloc
 	 * hrtimer_start_range_ns() can have popped in and enqueued the timer
 	 * for us already.
 	 */
-	if (restart != HRTIMER_NORESTART && !(timer->state & HRTIMER_STATE_ENQUEUED))
+	if (restart == HRTIMER_RESTART && !timer->is_queued)
 		enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS, false);
 
 	/*
-	 * Separate the ->running assignment from the ->state assignment.
+	 * Separate the ->running assignment from the ->is_queued assignment.
 	 *
 	 * As with a regular write barrier, this ensures the read side in
 	 * hrtimer_active() cannot observe base->running.timer == NULL &&
-	 * timer->state == INACTIVE.
+	 * timer->is_queued == INACTIVE.
 	 */
 	raw_write_seqcount_barrier(&base->seq);
 
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 488e47e96e93..19e61826b7de 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -47,7 +47,7 @@ print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
 	    int idx, u64 now)
 {
 	SEQ_printf(m, " #%d: <%p>, %ps", idx, taddr, ACCESS_PRIVATE(timer, function));
-	SEQ_printf(m, ", S:%02x", timer->state);
+	SEQ_printf(m, ", S:%02x", timer->is_queued);
 	SEQ_printf(m, "\n");
 	SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n",
 		(unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)),
-- 
cgit v1.2.3


From 9e07a9c980eaa93fd1bba722d31eeb4bf0cbbfb4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 24 Feb 2026 17:37:53 +0100
Subject: hrtimer: Rename hrtimer_cpu_base::in_hrtirq to deferred_rearm

The upcoming deferred rearming scheme has the same effect as the deferred
rearming when the hrtimer interrupt is executing. So it can reuse the
in_hrtirq flag, but when it gets deferred beyond the hrtimer interrupt
path, then the name does not make sense anymore.

Rename it to deferred_rearm upfront to keep the actual functional change
separate from the mechanical rename churn.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163430.935623347@kernel.org
---
 include/linux/hrtimer_defs.h |  4 ++--
 kernel/time/hrtimer.c        | 28 +++++++++-------------------
 2 files changed, 11 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index f9fbf9a48f59..2c3bdbd562d2 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -53,7 +53,7 @@ enum  hrtimer_base_type {
  * @active_bases:	Bitfield to mark bases with active timers
  * @clock_was_set_seq:	Sequence counter of clock was set events
  * @hres_active:	State of high resolution mode
- * @in_hrtirq:		hrtimer_interrupt() is currently executing
+ * @deferred_rearm:	A deferred rearm is pending
  * @hang_detected:	The last hrtimer interrupt detected a hang
  * @softirq_activated:	displays, if the softirq is raised - update of softirq
  *			related settings is not required then.
@@ -84,7 +84,7 @@ struct hrtimer_cpu_base {
 	unsigned int			active_bases;
 	unsigned int			clock_was_set_seq;
 	bool				hres_active;
-	bool				in_hrtirq;
+	bool				deferred_rearm;
 	bool				hang_detected;
 	bool				softirq_activated;
 	bool				online;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 2e05a1885d24..6f05d2569286 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -883,11 +883,8 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
 	if (expires >= cpu_base->expires_next)
 		return;
 
-	/*
-	 * If the hrtimer interrupt is running, then it will reevaluate the
-	 * clock bases and reprogram the clock event device.
-	 */
-	if (cpu_base->in_hrtirq)
+	/* If a deferred rearm is pending skip reprogramming the device */
+	if (cpu_base->deferred_rearm)
 		return;
 
 	cpu_base->next_timer = timer;
@@ -921,12 +918,8 @@ static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, unsigned int act
 	if (seq == cpu_base->clock_was_set_seq)
 		return false;
 
-	/*
-	 * If the remote CPU is currently handling an hrtimer interrupt, it
-	 * will reevaluate the first expiring timer of all clock bases
-	 * before reprogramming. Nothing to do here.
-	 */
-	if (cpu_base->in_hrtirq)
+	/* If a deferred rearm is pending the remote CPU will take care of it */
+	if (cpu_base->deferred_rearm)
 		return false;
 
 	/*
@@ -1334,11 +1327,8 @@ static bool __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 del
 		first = enqueue_hrtimer(timer, base, mode, was_armed);
 	}
 
-	/*
-	 * If the hrtimer interrupt is running, then it will reevaluate the
-	 * clock bases and reprogram the clock event device.
-	 */
-	if (cpu_base->in_hrtirq)
+	/* If a deferred rearm is pending skip reprogramming the device */
+	if (cpu_base->deferred_rearm)
 		return false;
 
 	if (!was_first || cpu_base != this_cpu_base) {
@@ -1947,14 +1937,14 @@ static __latent_entropy void hrtimer_run_softirq(void)
 
 /*
  * Very similar to hrtimer_force_reprogram(), except it deals with
- * in_hrtirq and hang_detected.
+ * deferred_rearm and hang_detected.
  */
 static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now)
 {
 	ktime_t expires_next = hrtimer_update_next_event(cpu_base);
 
 	cpu_base->expires_next = expires_next;
-	cpu_base->in_hrtirq = false;
+	cpu_base->deferred_rearm = false;
 
 	if (unlikely(cpu_base->hang_detected)) {
 		/*
@@ -1985,7 +1975,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 	entry_time = now = hrtimer_update_base(cpu_base);
 retry:
-	cpu_base->in_hrtirq = true;
+	cpu_base->deferred_rearm = true;
 	/*
 	 * Set expires_next to KTIME_MAX, which prevents that remote CPUs queue
 	 * timers while __hrtimer_run_queues() is expiring the clock bases.
-- 
cgit v1.2.3


From a43b4856bc039675165a50d9ef5f41b28520f0f4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 24 Feb 2026 17:37:58 +0100
Subject: hrtimer: Prepare stubs for deferred rearming

The hrtimer interrupt expires timers and at the end of the interrupt it
rearms the clockevent device for the next expiring timer.

That's obviously correct, but in the case that a expired timer set
NEED_RESCHED the return from interrupt ends up in schedule(). If HRTICK is
enabled then schedule() will modify the hrtick timer, which causes another
reprogramming of the hardware.

That can be avoided by deferring the rearming to the return from interrupt
path and if the return results in a immediate schedule() invocation then it
can be deferred until the end of schedule().

To make this correct the affected code parts need to be made aware of this.

Provide empty stubs for the deferred rearming mechanism, so that the
relevant code changes for entry, softirq and scheduler can be split up into
separate changes independent of the actual enablement in the hrtimer code.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163431.000891171@kernel.org
---
 include/linux/hrtimer.h       |  1 +
 include/linux/hrtimer_rearm.h | 21 +++++++++++++++++++++
 kernel/time/Kconfig           |  4 ++++
 3 files changed, 26 insertions(+)
 create mode 100644 include/linux/hrtimer_rearm.h

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 4ad4a454b4c5..c087b7142330 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -13,6 +13,7 @@
 #define _LINUX_HRTIMER_H
 
 #include <linux/hrtimer_defs.h>
+#include <linux/hrtimer_rearm.h>
 #include <linux/hrtimer_types.h>
 #include <linux/init.h>
 #include <linux/list.h>
diff --git a/include/linux/hrtimer_rearm.h b/include/linux/hrtimer_rearm.h
new file mode 100644
index 000000000000..6293076c03a6
--- /dev/null
+++ b/include/linux/hrtimer_rearm.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _LINUX_HRTIMER_REARM_H
+#define _LINUX_HRTIMER_REARM_H
+
+#ifdef CONFIG_HRTIMER_REARM_DEFERRED
+static __always_inline void __hrtimer_rearm_deferred(void) { }
+static __always_inline void hrtimer_rearm_deferred(void) { }
+static __always_inline void hrtimer_rearm_deferred_tif(unsigned long tif_work) { }
+static __always_inline bool
+hrtimer_rearm_deferred_user_irq(unsigned long *tif_work, const unsigned long tif_mask) { return false; }
+static __always_inline bool hrtimer_test_and_clear_rearm_deferred(void) { return false; }
+#else  /* CONFIG_HRTIMER_REARM_DEFERRED */
+static __always_inline void __hrtimer_rearm_deferred(void) { }
+static __always_inline void hrtimer_rearm_deferred(void) { }
+static __always_inline void hrtimer_rearm_deferred_tif(unsigned long tif_work) { }
+static __always_inline bool
+hrtimer_rearm_deferred_user_irq(unsigned long *tif_work, const unsigned long tif_mask) { return false; }
+static __always_inline bool hrtimer_test_and_clear_rearm_deferred(void) { return false; }
+#endif  /* !CONFIG_HRTIMER_REARM_DEFERRED */
+
+#endif
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index e1968ab8b37f..b95bfee3f592 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -58,6 +58,10 @@ config GENERIC_CLOCKEVENTS_COUPLED_INLINE
 config GENERIC_CMOS_UPDATE
 	bool
 
+# Deferred rearming of the hrtimer interrupt
+config HRTIMER_REARM_DEFERRED
+       def_bool n
+
 # Select to handle posix CPU timers from task_work
 # and not from the timer interrupt context
 config HAVE_POSIX_CPU_TIMERS_TASK_WORK
-- 
cgit v1.2.3


From 0e98eb14814ef669e07ca6effaa03df2e57ef956 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 24 Feb 2026 17:38:03 +0100
Subject: entry: Prepare for deferred hrtimer rearming

The hrtimer interrupt expires timers and at the end of the interrupt it
rearms the clockevent device for the next expiring timer.

That's obviously correct, but in the case that a expired timer sets
NEED_RESCHED the return from interrupt ends up in schedule(). If HRTICK is
enabled then schedule() will modify the hrtick timer, which causes another
reprogramming of the hardware.

That can be avoided by deferring the rearming to the return from interrupt
path and if the return results in a immediate schedule() invocation then it
can be deferred until the end of schedule(), which avoids multiple rearms
and re-evaluation of the timer wheel.

As this is only relevant for interrupt to user return split the work masks
up and hand them in as arguments from the relevant exit to user functions,
which allows the compiler to optimize the deferred handling out for the
syscall exit to user case.

Add the rearm checks to the approritate places in the exit to user loop and
the interrupt return to kernel path, so that the rearming is always
guaranteed.

In the return to user space path this is handled in the same way as
TIF_RSEQ to avoid extra instructions in the fast path, which are truly
hurtful for device interrupt heavy work loads as the extra instructions and
conditionals while benign at first sight accumulate quickly into measurable
regressions. The return from syscall path is completely unaffected due to
the above mentioned split so syscall heavy workloads wont have any extra
burden.

For now this is just placing empty stubs at the right places which are all
optimized out by the compiler until the actual functionality is in place.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163431.066469985@kernel.org
---
 include/linux/irq-entry-common.h | 25 +++++++++++++++++++------
 include/linux/rseq_entry.h       | 16 +++++++++++++---
 kernel/entry/common.c            |  4 +++-
 3 files changed, 35 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h
index d26d1b1bcbfb..b976946b3cdb 100644
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -3,6 +3,7 @@
 #define __LINUX_IRQENTRYCOMMON_H
 
 #include <linux/context_tracking.h>
+#include <linux/hrtimer_rearm.h>
 #include <linux/kmsan.h>
 #include <linux/rseq_entry.h>
 #include <linux/static_call_types.h>
@@ -33,6 +34,14 @@
 	 _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | _TIF_RSEQ |		\
 	 ARCH_EXIT_TO_USER_MODE_WORK)
 
+#ifdef CONFIG_HRTIMER_REARM_DEFERRED
+# define EXIT_TO_USER_MODE_WORK_SYSCALL	(EXIT_TO_USER_MODE_WORK)
+# define EXIT_TO_USER_MODE_WORK_IRQ	(EXIT_TO_USER_MODE_WORK | _TIF_HRTIMER_REARM)
+#else
+# define EXIT_TO_USER_MODE_WORK_SYSCALL	(EXIT_TO_USER_MODE_WORK)
+# define EXIT_TO_USER_MODE_WORK_IRQ	(EXIT_TO_USER_MODE_WORK)
+#endif
+
 /**
  * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs
  * @regs:	Pointer to currents pt_regs
@@ -203,6 +212,7 @@ unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work
 /**
  * __exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
  * @regs:	Pointer to pt_regs on entry stack
+ * @work_mask:	Which TIF bits need to be evaluated
  *
  * 1) check that interrupts are disabled
  * 2) call tick_nohz_user_enter_prepare()
@@ -212,7 +222,8 @@ unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work
  *
  * Don't invoke directly, use the syscall/irqentry_ prefixed variants below
  */
-static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs)
+static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs,
+							const unsigned long work_mask)
 {
 	unsigned long ti_work;
 
@@ -222,8 +233,10 @@ static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs)
 	tick_nohz_user_enter_prepare();
 
 	ti_work = read_thread_flags();
-	if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
-		ti_work = exit_to_user_mode_loop(regs, ti_work);
+	if (unlikely(ti_work & work_mask)) {
+		if (!hrtimer_rearm_deferred_user_irq(&ti_work, work_mask))
+			ti_work = exit_to_user_mode_loop(regs, ti_work);
+	}
 
 	arch_exit_to_user_mode_prepare(regs, ti_work);
 }
@@ -239,7 +252,7 @@ static __always_inline void __exit_to_user_mode_validate(void)
 /* Temporary workaround to keep ARM64 alive */
 static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *regs)
 {
-	__exit_to_user_mode_prepare(regs);
+	__exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK);
 	rseq_exit_to_user_mode_legacy();
 	__exit_to_user_mode_validate();
 }
@@ -253,7 +266,7 @@ static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *reg
  */
 static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
 {
-	__exit_to_user_mode_prepare(regs);
+	__exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK_SYSCALL);
 	rseq_syscall_exit_to_user_mode();
 	__exit_to_user_mode_validate();
 }
@@ -267,7 +280,7 @@ static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *re
  */
 static __always_inline void irqentry_exit_to_user_mode_prepare(struct pt_regs *regs)
 {
-	__exit_to_user_mode_prepare(regs);
+	__exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK_IRQ);
 	rseq_irqentry_exit_to_user_mode();
 	__exit_to_user_mode_validate();
 }
diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h
index cbc4a791618b..17956e119e81 100644
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -40,6 +40,7 @@ DECLARE_PER_CPU(struct rseq_stats, rseq_stats);
 #endif /* !CONFIG_RSEQ_STATS */
 
 #ifdef CONFIG_RSEQ
+#include <linux/hrtimer_rearm.h>
 #include <linux/jump_label.h>
 #include <linux/rseq.h>
 #include <linux/sched/signal.h>
@@ -110,7 +111,7 @@ static __always_inline void rseq_slice_clear_grant(struct task_struct *t)
 	t->rseq.slice.state.granted = false;
 }
 
-static __always_inline bool rseq_grant_slice_extension(bool work_pending)
+static __always_inline bool __rseq_grant_slice_extension(bool work_pending)
 {
 	struct task_struct *curr = current;
 	struct rseq_slice_ctrl usr_ctrl;
@@ -215,11 +216,20 @@ efault:
 	return false;
 }
 
+static __always_inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask)
+{
+	if (unlikely(__rseq_grant_slice_extension(ti_work & mask))) {
+		hrtimer_rearm_deferred_tif(ti_work);
+		return true;
+	}
+	return false;
+}
+
 #else /* CONFIG_RSEQ_SLICE_EXTENSION */
 static inline bool rseq_slice_extension_enabled(void) { return false; }
 static inline bool rseq_arm_slice_extension_timer(void) { return false; }
 static inline void rseq_slice_clear_grant(struct task_struct *t) { }
-static inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
+static inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) { return false; }
 #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */
 
 bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
@@ -778,7 +788,7 @@ static inline void rseq_syscall_exit_to_user_mode(void) { }
 static inline void rseq_irqentry_exit_to_user_mode(void) { }
 static inline void rseq_exit_to_user_mode_legacy(void) { }
 static inline void rseq_debug_syscall_return(struct pt_regs *regs) { }
-static inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
+static inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) { return false; }
 #endif /* !CONFIG_RSEQ */
 
 #endif /* _LINUX_RSEQ_ENTRY_H */
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 9ef63e414791..9e1a6afb07f2 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -50,7 +50,7 @@ static __always_inline unsigned long __exit_to_user_mode_loop(struct pt_regs *re
 		local_irq_enable_exit_to_user(ti_work);
 
 		if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) {
-			if (!rseq_grant_slice_extension(ti_work & TIF_SLICE_EXT_DENY))
+			if (!rseq_grant_slice_extension(ti_work, TIF_SLICE_EXT_DENY))
 				schedule();
 		}
 
@@ -225,6 +225,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
 		 */
 		if (state.exit_rcu) {
 			instrumentation_begin();
+			hrtimer_rearm_deferred();
 			/* Tell the tracer that IRET will enable interrupts */
 			trace_hardirqs_on_prepare();
 			lockdep_hardirqs_on_prepare();
@@ -238,6 +239,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
 		if (IS_ENABLED(CONFIG_PREEMPTION))
 			irqentry_exit_cond_resched();
 
+		hrtimer_rearm_deferred();
 		/* Covers both tracing and lockdep */
 		trace_hardirqs_on();
 		instrumentation_end();
-- 
cgit v1.2.3


From 15dd3a9488557d3e6ebcecacab79f4e56b69ab54 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 24 Feb 2026 17:38:18 +0100
Subject: hrtimer: Push reprogramming timers into the interrupt return path

Currently hrtimer_interrupt() runs expired timers, which can re-arm
themselves, after which it computes the next expiration time and
re-programs the hardware.

However, things like HRTICK, a highres timer driving preemption, cannot
re-arm itself at the point of running, since the next task has not been
determined yet. The schedule() in the interrupt return path will switch to
the next task, which then causes a new hrtimer to be programmed.

This then results in reprogramming the hardware at least twice, once after
running the timers, and once upon selecting the new task.

Notably, *both* events happen in the interrupt.

By pushing the hrtimer reprogram all the way into the interrupt return
path, it runs after schedule() picks the new task and the double reprogram
can be avoided.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163431.273488269@kernel.org
---
 include/asm-generic/thread_info_tif.h |  5 ++-
 include/linux/hrtimer_rearm.h         | 72 ++++++++++++++++++++++++++++++++---
 kernel/time/Kconfig                   |  4 +-
 kernel/time/hrtimer.c                 | 38 +++++++++++++++---
 4 files changed, 107 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/thread_info_tif.h b/include/asm-generic/thread_info_tif.h
index da1610a78f92..528e6fc7efe9 100644
--- a/include/asm-generic/thread_info_tif.h
+++ b/include/asm-generic/thread_info_tif.h
@@ -41,11 +41,14 @@
 #define _TIF_PATCH_PENDING	BIT(TIF_PATCH_PENDING)
 
 #ifdef HAVE_TIF_RESTORE_SIGMASK
-# define TIF_RESTORE_SIGMASK	10	// Restore signal mask in do_signal() */
+# define TIF_RESTORE_SIGMASK	10	// Restore signal mask in do_signal()
 # define _TIF_RESTORE_SIGMASK	BIT(TIF_RESTORE_SIGMASK)
 #endif
 
 #define TIF_RSEQ		11	// Run RSEQ fast path
 #define _TIF_RSEQ		BIT(TIF_RSEQ)
 
+#define TIF_HRTIMER_REARM	12       // re-arm the timer
+#define _TIF_HRTIMER_REARM	BIT(TIF_HRTIMER_REARM)
+
 #endif /* _ASM_GENERIC_THREAD_INFO_TIF_H_ */
diff --git a/include/linux/hrtimer_rearm.h b/include/linux/hrtimer_rearm.h
index 6293076c03a6..a6f2e5d5e1c7 100644
--- a/include/linux/hrtimer_rearm.h
+++ b/include/linux/hrtimer_rearm.h
@@ -3,12 +3,74 @@
 #define _LINUX_HRTIMER_REARM_H
 
 #ifdef CONFIG_HRTIMER_REARM_DEFERRED
-static __always_inline void __hrtimer_rearm_deferred(void) { }
-static __always_inline void hrtimer_rearm_deferred(void) { }
-static __always_inline void hrtimer_rearm_deferred_tif(unsigned long tif_work) { }
+#include <linux/thread_info.h>
+
+void __hrtimer_rearm_deferred(void);
+
+/*
+ * This is purely CPU local, so check the TIF bit first to avoid the overhead of
+ * the atomic test_and_clear_bit() operation for the common case where the bit
+ * is not set.
+ */
+static __always_inline bool hrtimer_test_and_clear_rearm_deferred_tif(unsigned long tif_work)
+{
+	lockdep_assert_irqs_disabled();
+
+	if (unlikely(tif_work & _TIF_HRTIMER_REARM)) {
+		clear_thread_flag(TIF_HRTIMER_REARM);
+		return true;
+	}
+	return false;
+}
+
+#define TIF_REARM_MASK	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_HRTIMER_REARM)
+
+/* Invoked from the exit to user before invoking exit_to_user_mode_loop() */
 static __always_inline bool
-hrtimer_rearm_deferred_user_irq(unsigned long *tif_work, const unsigned long tif_mask) { return false; }
-static __always_inline bool hrtimer_test_and_clear_rearm_deferred(void) { return false; }
+hrtimer_rearm_deferred_user_irq(unsigned long *tif_work, const unsigned long tif_mask)
+{
+	/* Help the compiler to optimize the function out for syscall returns */
+	if (!(tif_mask & _TIF_HRTIMER_REARM))
+		return false;
+	/*
+	 * Rearm the timer if none of the resched flags is set before going into
+	 * the loop which re-enables interrupts.
+	 */
+	if (unlikely((*tif_work & TIF_REARM_MASK) == _TIF_HRTIMER_REARM)) {
+		clear_thread_flag(TIF_HRTIMER_REARM);
+		__hrtimer_rearm_deferred();
+		/* Don't go into the loop if HRTIMER_REARM was the only flag */
+		*tif_work &= ~TIF_HRTIMER_REARM;
+		return !*tif_work;
+	}
+	return false;
+}
+
+/* Invoked from the time slice extension decision function */
+static __always_inline void hrtimer_rearm_deferred_tif(unsigned long tif_work)
+{
+	if (hrtimer_test_and_clear_rearm_deferred_tif(tif_work))
+		__hrtimer_rearm_deferred();
+}
+
+/*
+ * This is to be called on all irqentry_exit() paths that will enable
+ * interrupts.
+ */
+static __always_inline void hrtimer_rearm_deferred(void)
+{
+	hrtimer_rearm_deferred_tif(read_thread_flags());
+}
+
+/*
+ * Invoked from the scheduler on entry to __schedule() so it can defer
+ * rearming after the load balancing callbacks which might change hrtick.
+ */
+static __always_inline bool hrtimer_test_and_clear_rearm_deferred(void)
+{
+	return hrtimer_test_and_clear_rearm_deferred_tif(read_thread_flags());
+}
+
 #else  /* CONFIG_HRTIMER_REARM_DEFERRED */
 static __always_inline void __hrtimer_rearm_deferred(void) { }
 static __always_inline void hrtimer_rearm_deferred(void) { }
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index b95bfee3f592..6d6aace0a693 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -60,7 +60,9 @@ config GENERIC_CMOS_UPDATE
 
 # Deferred rearming of the hrtimer interrupt
 config HRTIMER_REARM_DEFERRED
-       def_bool n
+       def_bool y
+       depends on GENERIC_ENTRY && HAVE_GENERIC_TIF_BITS
+       depends on HIGH_RES_TIMERS && SCHED_HRTICK
 
 # Select to handle posix CPU timers from task_work
 # and not from the timer interrupt context
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 6f05d2569286..2e5f0e292efb 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1939,10 +1939,9 @@ static __latent_entropy void hrtimer_run_softirq(void)
  * Very similar to hrtimer_force_reprogram(), except it deals with
  * deferred_rearm and hang_detected.
  */
-static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now)
+static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now,
+			  ktime_t expires_next, bool deferred)
 {
-	ktime_t expires_next = hrtimer_update_next_event(cpu_base);
-
 	cpu_base->expires_next = expires_next;
 	cpu_base->deferred_rearm = false;
 
@@ -1954,9 +1953,37 @@ static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now)
 		expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
 		cpu_base->hang_detected = false;
 	}
-	hrtimer_rearm_event(expires_next, false);
+	hrtimer_rearm_event(expires_next, deferred);
+}
+
+#ifdef CONFIG_HRTIMER_REARM_DEFERRED
+void __hrtimer_rearm_deferred(void)
+{
+	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+	ktime_t now, expires_next;
+
+	if (!cpu_base->deferred_rearm)
+		return;
+
+	guard(raw_spinlock)(&cpu_base->lock);
+	now = hrtimer_update_base(cpu_base);
+	expires_next = hrtimer_update_next_event(cpu_base);
+	hrtimer_rearm(cpu_base, now, expires_next, true);
 }
 
+static __always_inline void
+hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now, ktime_t expires_next)
+{
+	set_thread_flag(TIF_HRTIMER_REARM);
+}
+#else  /* CONFIG_HRTIMER_REARM_DEFERRED */
+static __always_inline void
+hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now, ktime_t expires_next)
+{
+	hrtimer_rearm(cpu_base, now, expires_next, false);
+}
+#endif  /* !CONFIG_HRTIMER_REARM_DEFERRED */
+
 /*
  * High resolution timer interrupt
  * Called with interrupts disabled
@@ -2014,9 +2041,10 @@ retry:
 		cpu_base->hang_detected = true;
 	}
 
-	hrtimer_rearm(cpu_base, now);
+	hrtimer_interrupt_rearm(cpu_base, now, expires_next);
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 }
+
 #endif /* !CONFIG_HIGH_RES_TIMERS */
 
 /*
-- 
cgit v1.2.3


From b95c4442b02162904e9012e670b602ebeb3c6c1b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 24 Feb 2026 17:38:23 +0100
Subject: hrtimer: Avoid re-evaluation when nothing changed

Most times there is no change between hrtimer_interrupt() deferring the rearm
and the invocation of hrtimer_rearm_deferred(). In those cases it's a pointless
exercise to re-evaluate the next expiring timer.

Cache the required data and use it if nothing changed.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163431.338569372@kernel.org
---
 include/linux/hrtimer_defs.h | 53 ++++++++++++++++++++++----------------------
 kernel/time/hrtimer.c        | 45 +++++++++++++++++++++++++------------
 2 files changed, 58 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index 2c3bdbd562d2..b6846efec210 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -47,32 +47,31 @@ enum  hrtimer_base_type {
 
 /**
  * struct hrtimer_cpu_base - the per cpu clock bases
- * @lock:		lock protecting the base and associated clock bases
- *			and timers
- * @cpu:		cpu number
- * @active_bases:	Bitfield to mark bases with active timers
- * @clock_was_set_seq:	Sequence counter of clock was set events
- * @hres_active:	State of high resolution mode
- * @deferred_rearm:	A deferred rearm is pending
- * @hang_detected:	The last hrtimer interrupt detected a hang
- * @softirq_activated:	displays, if the softirq is raised - update of softirq
- *			related settings is not required then.
- * @nr_events:		Total number of hrtimer interrupt events
- * @nr_retries:		Total number of hrtimer interrupt retries
- * @nr_hangs:		Total number of hrtimer interrupt hangs
- * @max_hang_time:	Maximum time spent in hrtimer_interrupt
- * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
- *			 expired
- * @online:		CPU is online from an hrtimers point of view
- * @timer_waiters:	A hrtimer_cancel() invocation waits for the timer
- *			callback to finish.
- * @expires_next:	absolute time of the next event, is required for remote
- *			hrtimer enqueue; it is the total first expiry time (hard
- *			and soft hrtimer are taken into account)
- * @next_timer:		Pointer to the first expiring timer
- * @softirq_expires_next: Time to check, if soft queues needs also to be expired
- * @softirq_next_timer: Pointer to the first expiring softirq based timer
- * @clock_base:		array of clock bases for this cpu
+ * @lock:			lock protecting the base and associated clock bases and timers
+ * @cpu:			cpu number
+ * @active_bases:		Bitfield to mark bases with active timers
+ * @clock_was_set_seq:		Sequence counter of clock was set events
+ * @hres_active:		State of high resolution mode
+ * @deferred_rearm:		A deferred rearm is pending
+ * @deferred_needs_update:	The deferred rearm must re-evaluate the first timer
+ * @hang_detected:		The last hrtimer interrupt detected a hang
+ * @softirq_activated:		displays, if the softirq is raised - update of softirq
+ *				related settings is not required then.
+ * @nr_events:			Total number of hrtimer interrupt events
+ * @nr_retries:			Total number of hrtimer interrupt retries
+ * @nr_hangs:			Total number of hrtimer interrupt hangs
+ * @max_hang_time:		Maximum time spent in hrtimer_interrupt
+ * @softirq_expiry_lock:	Lock which is taken while softirq based hrtimer are expired
+ * @online:			CPU is online from an hrtimers point of view
+ * @timer_waiters:		A hrtimer_cancel() waiters for the timer callback to finish.
+ * @expires_next:		Absolute time of the next event, is required for remote
+ *				hrtimer enqueue; it is the total first expiry time (hard
+ *				and soft hrtimer are taken into account)
+ * @next_timer:			Pointer to the first expiring timer
+ * @softirq_expires_next:	Time to check, if soft queues needs also to be expired
+ * @softirq_next_timer:		Pointer to the first expiring softirq based timer
+ * @deferred_expires_next:	Cached expires next value for deferred rearm
+ * @clock_base:			Array of clock bases for this cpu
  *
  * Note: next_timer is just an optimization for __remove_hrtimer().
  *	 Do not dereference the pointer because it is not reliable on
@@ -85,6 +84,7 @@ struct hrtimer_cpu_base {
 	unsigned int			clock_was_set_seq;
 	bool				hres_active;
 	bool				deferred_rearm;
+	bool				deferred_needs_update;
 	bool				hang_detected;
 	bool				softirq_activated;
 	bool				online;
@@ -102,6 +102,7 @@ struct hrtimer_cpu_base {
 	struct hrtimer			*next_timer;
 	ktime_t				softirq_expires_next;
 	struct hrtimer			*softirq_next_timer;
+	ktime_t				deferred_expires_next;
 	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
 	call_single_data_t		csd;
 } ____cacheline_aligned;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 2e5f0e292efb..e9592cb1e39a 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -919,8 +919,10 @@ static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, unsigned int act
 		return false;
 
 	/* If a deferred rearm is pending the remote CPU will take care of it */
-	if (cpu_base->deferred_rearm)
+	if (cpu_base->deferred_rearm) {
+		cpu_base->deferred_needs_update = true;
 		return false;
+	}
 
 	/*
 	 * Walk the affected clock bases and check whether the first expiring
@@ -1141,7 +1143,12 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *b
 	 * a local timer is removed to be immediately restarted. That's handled
 	 * at the call site.
 	 */
-	if (reprogram && timer == cpu_base->next_timer && !timer->is_lazy)
+	if (!reprogram || timer != cpu_base->next_timer || timer->is_lazy)
+		return;
+
+	if (cpu_base->deferred_rearm)
+		cpu_base->deferred_needs_update = true;
+	else
 		hrtimer_force_reprogram(cpu_base, /* skip_equal */ true);
 }
 
@@ -1328,8 +1335,10 @@ static bool __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 del
 	}
 
 	/* If a deferred rearm is pending skip reprogramming the device */
-	if (cpu_base->deferred_rearm)
+	if (cpu_base->deferred_rearm) {
+		cpu_base->deferred_needs_update = true;
 		return false;
+	}
 
 	if (!was_first || cpu_base != this_cpu_base) {
 		/*
@@ -1939,8 +1948,7 @@ static __latent_entropy void hrtimer_run_softirq(void)
  * Very similar to hrtimer_force_reprogram(), except it deals with
  * deferred_rearm and hang_detected.
  */
-static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now,
-			  ktime_t expires_next, bool deferred)
+static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next, bool deferred)
 {
 	cpu_base->expires_next = expires_next;
 	cpu_base->deferred_rearm = false;
@@ -1950,7 +1958,7 @@ static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now,
 		 * Give the system a chance to do something else than looping
 		 * on hrtimer interrupts.
 		 */
-		expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
+		expires_next = ktime_add_ns(ktime_get(), 100 * NSEC_PER_MSEC);
 		cpu_base->hang_detected = false;
 	}
 	hrtimer_rearm_event(expires_next, deferred);
@@ -1960,27 +1968,36 @@ static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now,
 void __hrtimer_rearm_deferred(void)
 {
 	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
-	ktime_t now, expires_next;
+	ktime_t expires_next;
 
 	if (!cpu_base->deferred_rearm)
 		return;
 
 	guard(raw_spinlock)(&cpu_base->lock);
-	now = hrtimer_update_base(cpu_base);
-	expires_next = hrtimer_update_next_event(cpu_base);
-	hrtimer_rearm(cpu_base, now, expires_next, true);
+	if (cpu_base->deferred_needs_update) {
+		hrtimer_update_base(cpu_base);
+		expires_next = hrtimer_update_next_event(cpu_base);
+	} else {
+		/* No timer added/removed. Use the cached value */
+		expires_next = cpu_base->deferred_expires_next;
+	}
+	hrtimer_rearm(cpu_base, expires_next, true);
 }
 
 static __always_inline void
-hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now, ktime_t expires_next)
+hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next)
 {
+	/* hrtimer_interrupt() just re-evaluated the first expiring timer */
+	cpu_base->deferred_needs_update = false;
+	/* Cache the expiry time */
+	cpu_base->deferred_expires_next = expires_next;
 	set_thread_flag(TIF_HRTIMER_REARM);
 }
 #else  /* CONFIG_HRTIMER_REARM_DEFERRED */
 static __always_inline void
-hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now, ktime_t expires_next)
+hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next)
 {
-	hrtimer_rearm(cpu_base, now, expires_next, false);
+	hrtimer_rearm(cpu_base, expires_next, false);
 }
 #endif  /* !CONFIG_HRTIMER_REARM_DEFERRED */
 
@@ -2041,7 +2058,7 @@ retry:
 		cpu_base->hang_detected = true;
 	}
 
-	hrtimer_interrupt_rearm(cpu_base, now, expires_next);
+	hrtimer_interrupt_rearm(cpu_base, expires_next);
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 }
 
-- 
cgit v1.2.3


From eddffab8282e388dddf032f3295fcec87eb08095 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 24 Feb 2026 17:38:28 +0100
Subject: hrtimer: Keep track of first expiring timer per clock base

Evaluating the next expiry time of all clock bases is cache line expensive
as the expiry time of the first expiring timer is not cached in the base
and requires to access the timer itself, which is definitely in a different
cache line.

It's way more efficient to keep track of the expiry time on enqueue and
dequeue operations as the relevant data is already in the cache at that
point.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163431.404839710@kernel.org
---
 include/linux/hrtimer_defs.h |  2 ++
 kernel/time/hrtimer.c        | 37 ++++++++++++++++++++++++++++++++++---
 2 files changed, 36 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index b6846efec210..fb38df4c0b64 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -19,6 +19,7 @@
  *			timer to a base on another cpu.
  * @clockid:		clock id for per_cpu support
  * @seq:		seqcount around __run_hrtimer
+ * @expires_next:	Absolute time of the next event in this clock base
  * @running:		pointer to the currently running hrtimer
  * @active:		red black tree root node for the active timers
  * @offset:		offset of this clock to the monotonic base
@@ -28,6 +29,7 @@ struct hrtimer_clock_base {
 	unsigned int		index;
 	clockid_t		clockid;
 	seqcount_raw_spinlock_t	seq;
+	ktime_t			expires_next;
 	struct hrtimer		*running;
 	struct timerqueue_head	active;
 	ktime_t			offset;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index e9592cb1e39a..d70899a9ddc1 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1107,7 +1107,18 @@ static bool enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *ba
 	/* Pairs with the lockless read in hrtimer_is_queued() */
 	WRITE_ONCE(timer->is_queued, HRTIMER_STATE_ENQUEUED);
 
-	return timerqueue_add(&base->active, &timer->node);
+	if (!timerqueue_add(&base->active, &timer->node))
+		return false;
+
+	base->expires_next = hrtimer_get_expires(timer);
+	return true;
+}
+
+static inline void base_update_next_timer(struct hrtimer_clock_base *base)
+{
+	struct timerqueue_node *next = timerqueue_getnext(&base->active);
+
+	base->expires_next = next ? next->expires : KTIME_MAX;
 }
 
 /*
@@ -1122,6 +1133,7 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *b
 			     bool newstate, bool reprogram)
 {
 	struct hrtimer_cpu_base *cpu_base = base->cpu_base;
+	bool was_first;
 
 	lockdep_assert_held(&cpu_base->lock);
 
@@ -1131,9 +1143,17 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *b
 	/* Pairs with the lockless read in hrtimer_is_queued() */
 	WRITE_ONCE(timer->is_queued, newstate);
 
+	was_first = &timer->node == timerqueue_getnext(&base->active);
+
 	if (!timerqueue_del(&base->active, &timer->node))
 		cpu_base->active_bases &= ~(1 << base->index);
 
+	/* Nothing to update if this was not the first timer in the base */
+	if (!was_first)
+		return;
+
+	base_update_next_timer(base);
+
 	/*
 	 * If reprogram is false don't update cpu_base->next_timer and do not
 	 * touch the clock event device.
@@ -1182,9 +1202,12 @@ static inline bool
 remove_and_enqueue_same_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
 			     const enum hrtimer_mode mode, ktime_t expires, u64 delta_ns)
 {
+	bool was_first = false;
+
 	/* Remove it from the timer queue if active */
 	if (timer->is_queued) {
 		debug_hrtimer_deactivate(timer);
+		was_first = &timer->node == timerqueue_getnext(&base->active);
 		timerqueue_del(&base->active, &timer->node);
 	}
 
@@ -1197,8 +1220,16 @@ remove_and_enqueue_same_base(struct hrtimer *timer, struct hrtimer_clock_base *b
 	/* Pairs with the lockless read in hrtimer_is_queued() */
 	WRITE_ONCE(timer->is_queued, HRTIMER_STATE_ENQUEUED);
 
-	/* Returns true if this is the first expiring timer */
-	return timerqueue_add(&base->active, &timer->node);
+	/* If it's the first expiring timer now or again, update base */
+	if (timerqueue_add(&base->active, &timer->node)) {
+		base->expires_next = expires;
+		return true;
+	}
+
+	if (was_first)
+		base_update_next_timer(base);
+
+	return false;
 }
 
 static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
-- 
cgit v1.2.3


From 671047943dce5af24e023bca3c5cc244d7565f5a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 24 Feb 2026 17:38:47 +0100
Subject: rbtree: Provide rbtree with links

Some RB tree users require quick access to the next and the previous node,
e.g. to check whether a modification of the node results in a change of the
nodes position in the tree. If the node position does not change, then the
modification can happen in place without going through a full enqueue
requeue cycle. A upcoming use case for this are the timer queues of the
hrtimer subsystem as they can optimize for timers which are frequently
rearmed while enqueued.

This can be obviously achieved with rb_next() and rb_prev(), but those
turned out to be quite expensive for hotpath operations depending on the
tree depth.

Add a linked RB tree variant where add() and erase() maintain the links
between the nodes. Like the cached variant it provides a pointer to the
left most node in the root.

It intentionally does not use a [h]list head as there is no real need for
true list operations as the list is strictly coupled to the tree and
and cannot be manipulated independently.

It sets the nodes previous pointer to NULL for the left most node and the
next pointer to NULL for the right most node. This allows a quick check
especially for the left most node without consulting the list head address,
which creates better code.

Aside of the rb_leftmost cached pointer this could trivially provide a
rb_rightmost pointer as well, but there is no usage for that (yet).

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163431.668401024@kernel.org
---
 include/linux/rbtree.h       | 81 +++++++++++++++++++++++++++++++++++++++-----
 include/linux/rbtree_types.h | 16 +++++++++
 lib/rbtree.c                 | 17 ++++++++++
 3 files changed, 105 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 4091e978aef2..48acdc3889dd 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -35,10 +35,15 @@
 #define RB_CLEAR_NODE(node)  \
 	((node)->__rb_parent_color = (unsigned long)(node))
 
+#define RB_EMPTY_LINKED_NODE(lnode)  RB_EMPTY_NODE(&(lnode)->node)
+#define RB_CLEAR_LINKED_NODE(lnode)  ({					\
+	RB_CLEAR_NODE(&(lnode)->node);					\
+	(lnode)->prev = (lnode)->next = NULL;				\
+})
 
 extern void rb_insert_color(struct rb_node *, struct rb_root *);
 extern void rb_erase(struct rb_node *, struct rb_root *);
-
+extern bool rb_erase_linked(struct rb_node_linked *, struct rb_root_linked *);
 
 /* Find logical next and previous nodes in a tree */
 extern struct rb_node *rb_next(const struct rb_node *);
@@ -213,15 +218,10 @@ rb_add_cached(struct rb_node *node, struct rb_root_cached *tree,
 	return leftmost ? node : NULL;
 }
 
-/**
- * rb_add() - insert @node into @tree
- * @node: node to insert
- * @tree: tree to insert @node into
- * @less: operator defining the (partial) node order
- */
 static __always_inline void
-rb_add(struct rb_node *node, struct rb_root *tree,
-       bool (*less)(struct rb_node *, const struct rb_node *))
+__rb_add(struct rb_node *node, struct rb_root *tree,
+	 bool (*less)(struct rb_node *, const struct rb_node *),
+	 void (*linkop)(struct rb_node *, struct rb_node *, struct rb_node **))
 {
 	struct rb_node **link = &tree->rb_node;
 	struct rb_node *parent = NULL;
@@ -234,10 +234,73 @@ rb_add(struct rb_node *node, struct rb_root *tree,
 			link = &parent->rb_right;
 	}
 
+	linkop(node, parent, link);
 	rb_link_node(node, parent, link);
 	rb_insert_color(node, tree);
 }
 
+#define __node_2_linked_node(_n) \
+	rb_entry((_n), struct rb_node_linked, node)
+
+static inline void
+rb_link_linked_node(struct rb_node *node, struct rb_node *parent, struct rb_node **link)
+{
+	if (!parent)
+		return;
+
+	struct rb_node_linked *nnew = __node_2_linked_node(node);
+	struct rb_node_linked *npar = __node_2_linked_node(parent);
+
+	if (link == &parent->rb_left) {
+		nnew->prev = npar->prev;
+		nnew->next = npar;
+		npar->prev = nnew;
+		if (nnew->prev)
+			nnew->prev->next = nnew;
+	} else {
+		nnew->next = npar->next;
+		nnew->prev = npar;
+		npar->next = nnew;
+		if (nnew->next)
+			nnew->next->prev = nnew;
+	}
+}
+
+/**
+ * rb_add_linked() - insert @node into the leftmost linked tree @tree
+ * @node: node to insert
+ * @tree: linked tree to insert @node into
+ * @less: operator defining the (partial) node order
+ *
+ * Returns @true when @node is the new leftmost, @false otherwise.
+ */
+static __always_inline bool
+rb_add_linked(struct rb_node_linked *node, struct rb_root_linked *tree,
+	      bool (*less)(struct rb_node *, const struct rb_node *))
+{
+	__rb_add(&node->node, &tree->rb_root, less, rb_link_linked_node);
+	if (!node->prev)
+		tree->rb_leftmost = node;
+	return !node->prev;
+}
+
+/* Empty linkop function which is optimized away by the compiler */
+static __always_inline void
+rb_link_noop(struct rb_node *n, struct rb_node *p, struct rb_node **l) { }
+
+/**
+ * rb_add() - insert @node into @tree
+ * @node: node to insert
+ * @tree: tree to insert @node into
+ * @less: operator defining the (partial) node order
+ */
+static __always_inline void
+rb_add(struct rb_node *node, struct rb_root *tree,
+       bool (*less)(struct rb_node *, const struct rb_node *))
+{
+	__rb_add(node, tree, less, rb_link_noop);
+}
+
 /**
  * rb_find_add_cached() - find equivalent @node in @tree, or add @node
  * @node: node to look-for / insert
diff --git a/include/linux/rbtree_types.h b/include/linux/rbtree_types.h
index 45b6ecde3665..3c7ae53e8139 100644
--- a/include/linux/rbtree_types.h
+++ b/include/linux/rbtree_types.h
@@ -9,6 +9,12 @@ struct rb_node {
 } __attribute__((aligned(sizeof(long))));
 /* The alignment might seem pointless, but allegedly CRIS needs it */
 
+struct rb_node_linked {
+	struct rb_node		node;
+	struct rb_node_linked	*prev;
+	struct rb_node_linked	*next;
+};
+
 struct rb_root {
 	struct rb_node *rb_node;
 };
@@ -28,7 +34,17 @@ struct rb_root_cached {
 	struct rb_node *rb_leftmost;
 };
 
+/*
+ * Leftmost tree with links. This would allow a trivial rb_rightmost update,
+ * but that has been omitted due to the lack of users.
+ */
+struct rb_root_linked {
+	struct rb_root		rb_root;
+	struct rb_node_linked	*rb_leftmost;
+};
+
 #define RB_ROOT (struct rb_root) { NULL, }
 #define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
+#define RB_ROOT_LINKED (struct rb_root_linked) { {NULL, }, NULL }
 
 #endif
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 18d42bcf4ec9..5790d6ecba4e 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -446,6 +446,23 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 }
 EXPORT_SYMBOL(rb_erase);
 
+bool rb_erase_linked(struct rb_node_linked *node, struct rb_root_linked *root)
+{
+	if (node->prev)
+		node->prev->next = node->next;
+	else
+		root->rb_leftmost = node->next;
+
+	if (node->next)
+		node->next->prev = node->prev;
+
+	rb_erase(&node->node, &root->rb_root);
+	RB_CLEAR_LINKED_NODE(node);
+
+	return !!root->rb_leftmost;
+}
+EXPORT_SYMBOL_GPL(rb_erase_linked);
+
 /*
  * Augmented rbtree manipulation functions.
  *
-- 
cgit v1.2.3


From 1339eeb73d6b99cf3aa9981f3f91d6ac4a49c72e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 24 Feb 2026 17:38:52 +0100
Subject: timerqueue: Provide linked timerqueue

The hrtimer subsystem wants to peak ahead to the next and previous timer to
evaluated whether a to be rearmed timer can stay at the same position in
the RB tree with the new expiry time.

The linked RB tree provides the infrastructure for this as it maintains
links to the previous and next nodes for each entry in the tree.

Provide timerqueue wrappers around that.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163431.734827095@kernel.org
---
 include/linux/timerqueue.h       | 56 ++++++++++++++++++++++++++++++++++------
 include/linux/timerqueue_types.h | 15 ++++++++---
 lib/timerqueue.c                 | 14 ++++++++++
 3 files changed, 74 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index d306d9dd2207..7d0aaa766580 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -5,12 +5,11 @@
 #include <linux/rbtree.h>
 #include <linux/timerqueue_types.h>
 
-extern bool timerqueue_add(struct timerqueue_head *head,
-			   struct timerqueue_node *node);
-extern bool timerqueue_del(struct timerqueue_head *head,
-			   struct timerqueue_node *node);
-extern struct timerqueue_node *timerqueue_iterate_next(
-						struct timerqueue_node *node);
+bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node);
+bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node);
+struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node);
+
+bool timerqueue_linked_add(struct timerqueue_linked_head *head, struct timerqueue_linked_node *node);
 
 /**
  * timerqueue_getnext - Returns the timer with the earliest expiration time
@@ -19,8 +18,7 @@ extern struct timerqueue_node *timerqueue_iterate_next(
  *
  * Returns a pointer to the timer node that has the earliest expiration time.
  */
-static inline
-struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
+static inline struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
 {
 	struct rb_node *leftmost = rb_first_cached(&head->rb_root);
 
@@ -41,4 +39,46 @@ static inline void timerqueue_init_head(struct timerqueue_head *head)
 {
 	head->rb_root = RB_ROOT_CACHED;
 }
+
+/* Timer queues with linked nodes */
+
+static __always_inline
+struct timerqueue_linked_node *timerqueue_linked_first(struct timerqueue_linked_head *head)
+{
+	return rb_entry_safe(head->rb_root.rb_leftmost, struct timerqueue_linked_node, node);
+}
+
+static __always_inline
+struct timerqueue_linked_node *timerqueue_linked_next(struct timerqueue_linked_node *node)
+{
+	return rb_entry_safe(node->node.next, struct timerqueue_linked_node, node);
+}
+
+static __always_inline
+struct timerqueue_linked_node *timerqueue_linked_prev(struct timerqueue_linked_node *node)
+{
+	return rb_entry_safe(node->node.prev, struct timerqueue_linked_node, node);
+}
+
+static __always_inline
+bool timerqueue_linked_del(struct timerqueue_linked_head *head, struct timerqueue_linked_node *node)
+{
+	return rb_erase_linked(&node->node, &head->rb_root);
+}
+
+static __always_inline void timerqueue_linked_init(struct timerqueue_linked_node *node)
+{
+	RB_CLEAR_LINKED_NODE(&node->node);
+}
+
+static __always_inline bool timerqueue_linked_node_queued(struct timerqueue_linked_node *node)
+{
+	return !RB_EMPTY_LINKED_NODE(&node->node);
+}
+
+static __always_inline void timerqueue_linked_init_head(struct timerqueue_linked_head *head)
+{
+	head->rb_root = RB_ROOT_LINKED;
+}
+
 #endif /* _LINUX_TIMERQUEUE_H */
diff --git a/include/linux/timerqueue_types.h b/include/linux/timerqueue_types.h
index dc298d0923e3..be2218b147c4 100644
--- a/include/linux/timerqueue_types.h
+++ b/include/linux/timerqueue_types.h
@@ -6,12 +6,21 @@
 #include <linux/types.h>
 
 struct timerqueue_node {
-	struct rb_node node;
-	ktime_t expires;
+	struct rb_node		node;
+	ktime_t			expires;
 };
 
 struct timerqueue_head {
-	struct rb_root_cached rb_root;
+	struct rb_root_cached	rb_root;
+};
+
+struct timerqueue_linked_node {
+	struct rb_node_linked		node;
+	ktime_t				expires;
+};
+
+struct timerqueue_linked_head {
+	struct rb_root_linked		rb_root;
 };
 
 #endif /* _LINUX_TIMERQUEUE_TYPES_H */
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index cdb9c7658478..e2a1e08cb4bd 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -82,3 +82,17 @@ struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node)
 	return container_of(next, struct timerqueue_node, node);
 }
 EXPORT_SYMBOL_GPL(timerqueue_iterate_next);
+
+#define __node_2_tq_linked(_n) \
+	container_of(rb_entry((_n), struct rb_node_linked, node), struct timerqueue_linked_node, node)
+
+static __always_inline bool __tq_linked_less(struct rb_node *a, const struct rb_node *b)
+{
+	return __node_2_tq_linked(a)->expires < __node_2_tq_linked(b)->expires;
+}
+
+bool timerqueue_linked_add(struct timerqueue_linked_head *head, struct timerqueue_linked_node *node)
+{
+	return rb_add_linked(&node->node, &head->rb_root, __tq_linked_less);
+}
+EXPORT_SYMBOL_GPL(timerqueue_linked_add);
-- 
cgit v1.2.3


From b7418e6e9b87b849af4df93d527ff83498d1e4c3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 24 Feb 2026 17:38:57 +0100
Subject: hrtimer: Use linked timerqueue

To prepare for optimizing the rearming of enqueued timers, switch to the
linked timerqueue. That allows to check whether the new expiry time changes
the position of the timer in the RB tree or not, by checking the new expiry
time against the previous and the next timers expiry.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163431.806643179@kernel.org
---
 include/linux/hrtimer_defs.h  | 16 ++++++++--------
 include/linux/hrtimer_types.h |  8 ++++----
 kernel/time/hrtimer.c         | 34 +++++++++++++++++-----------------
 kernel/time/timer_list.c      | 10 ++++------
 4 files changed, 33 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index fb38df4c0b64..0f851b2432c3 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -25,14 +25,14 @@
  * @offset:		offset of this clock to the monotonic base
  */
 struct hrtimer_clock_base {
-	struct hrtimer_cpu_base	*cpu_base;
-	unsigned int		index;
-	clockid_t		clockid;
-	seqcount_raw_spinlock_t	seq;
-	ktime_t			expires_next;
-	struct hrtimer		*running;
-	struct timerqueue_head	active;
-	ktime_t			offset;
+	struct hrtimer_cpu_base		*cpu_base;
+	unsigned int			index;
+	clockid_t			clockid;
+	seqcount_raw_spinlock_t		seq;
+	ktime_t				expires_next;
+	struct hrtimer			*running;
+	struct timerqueue_linked_head	active;
+	ktime_t				offset;
 } __hrtimer_clock_base_align;
 
 enum  hrtimer_base_type {
diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h
index 0e22bc91d00f..b5dacc8271a4 100644
--- a/include/linux/hrtimer_types.h
+++ b/include/linux/hrtimer_types.h
@@ -17,7 +17,7 @@ enum hrtimer_restart {
 
 /**
  * struct hrtimer - the basic hrtimer structure
- * @node:	timerqueue node, which also manages node.expires,
+ * @node:	Linked timerqueue node, which also manages node.expires,
  *		the absolute expiry time in the hrtimers internal
  *		representation. The time is related to the clock on
  *		which the timer is based. Is setup by adding
@@ -39,15 +39,15 @@ enum hrtimer_restart {
  * The hrtimer structure must be initialized by hrtimer_setup()
  */
 struct hrtimer {
-	struct timerqueue_node		node;
-	ktime_t				_softexpires;
-	enum hrtimer_restart		(*__private function)(struct hrtimer *);
+	struct timerqueue_linked_node	node;
 	struct hrtimer_clock_base	*base;
 	bool				is_queued;
 	bool				is_rel;
 	bool				is_soft;
 	bool				is_hard;
 	bool				is_lazy;
+	ktime_t				_softexpires;
+	enum hrtimer_restart		(*__private function)(struct hrtimer *);
 };
 
 #endif /* _LINUX_HRTIMER_TYPES_H */
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index d1e58482e0a9..5e45982363ce 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -557,10 +557,10 @@ static ktime_t hrtimer_bases_next_event_without(struct hrtimer_cpu_base *cpu_bas
 		 * If the excluded timer is the first on this base evaluate the
 		 * next timer.
 		 */
-		struct timerqueue_node *node = timerqueue_getnext(&base->active);
+		struct timerqueue_linked_node *node = timerqueue_linked_first(&base->active);
 
 		if (unlikely(&exclude->node == node)) {
-			node = timerqueue_iterate_next(node);
+			node = timerqueue_linked_next(node);
 			if (!node)
 				continue;
 			expires = ktime_sub(node->expires, base->offset);
@@ -576,7 +576,7 @@ static ktime_t hrtimer_bases_next_event_without(struct hrtimer_cpu_base *cpu_bas
 
 static __always_inline struct hrtimer *clock_base_next_timer(struct hrtimer_clock_base *base)
 {
-	struct timerqueue_node *next = timerqueue_getnext(&base->active);
+	struct timerqueue_linked_node *next = timerqueue_linked_first(&base->active);
 
 	return container_of(next, struct hrtimer, node);
 }
@@ -938,9 +938,9 @@ static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, unsigned int act
 	active &= cpu_base->active_bases;
 
 	for_each_active_base(base, cpu_base, active) {
-		struct timerqueue_node *next;
+		struct timerqueue_linked_node *next;
 
-		next = timerqueue_getnext(&base->active);
+		next = timerqueue_linked_first(&base->active);
 		expires = ktime_sub(next->expires, base->offset);
 		if (expires < cpu_base->expires_next)
 			return true;
@@ -1112,7 +1112,7 @@ static bool enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *ba
 	/* Pairs with the lockless read in hrtimer_is_queued() */
 	WRITE_ONCE(timer->is_queued, HRTIMER_STATE_ENQUEUED);
 
-	if (!timerqueue_add(&base->active, &timer->node))
+	if (!timerqueue_linked_add(&base->active, &timer->node))
 		return false;
 
 	base->expires_next = hrtimer_get_expires(timer);
@@ -1121,7 +1121,7 @@ static bool enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *ba
 
 static inline void base_update_next_timer(struct hrtimer_clock_base *base)
 {
-	struct timerqueue_node *next = timerqueue_getnext(&base->active);
+	struct timerqueue_linked_node *next = timerqueue_linked_first(&base->active);
 
 	base->expires_next = next ? next->expires : KTIME_MAX;
 }
@@ -1148,9 +1148,9 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *b
 	/* Pairs with the lockless read in hrtimer_is_queued() */
 	WRITE_ONCE(timer->is_queued, newstate);
 
-	was_first = &timer->node == timerqueue_getnext(&base->active);
+	was_first = !timerqueue_linked_prev(&timer->node);
 
-	if (!timerqueue_del(&base->active, &timer->node))
+	if (!timerqueue_linked_del(&base->active, &timer->node))
 		cpu_base->active_bases &= ~(1 << base->index);
 
 	/* Nothing to update if this was not the first timer in the base */
@@ -1212,8 +1212,8 @@ remove_and_enqueue_same_base(struct hrtimer *timer, struct hrtimer_clock_base *b
 	/* Remove it from the timer queue if active */
 	if (timer->is_queued) {
 		debug_hrtimer_deactivate(timer);
-		was_first = &timer->node == timerqueue_getnext(&base->active);
-		timerqueue_del(&base->active, &timer->node);
+		was_first = !timerqueue_linked_prev(&timer->node);
+		timerqueue_linked_del(&base->active, &timer->node);
 	}
 
 	/* Set the new expiry time */
@@ -1226,7 +1226,7 @@ remove_and_enqueue_same_base(struct hrtimer *timer, struct hrtimer_clock_base *b
 	WRITE_ONCE(timer->is_queued, HRTIMER_STATE_ENQUEUED);
 
 	/* If it's the first expiring timer now or again, update base */
-	if (timerqueue_add(&base->active, &timer->node)) {
+	if (timerqueue_linked_add(&base->active, &timer->node)) {
 		base->expires_next = expires;
 		return true;
 	}
@@ -1758,7 +1758,7 @@ static void __hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*fn)(st
 	timer->is_hard = !!(mode & HRTIMER_MODE_HARD);
 	timer->is_lazy = !!(mode & HRTIMER_MODE_LAZY_REARM);
 	timer->base = &cpu_base->clock_base[base];
-	timerqueue_init(&timer->node);
+	timerqueue_linked_init(&timer->node);
 
 	if (WARN_ON_ONCE(!fn))
 		ACCESS_PRIVATE(timer, function) = hrtimer_dummy_timeout;
@@ -1923,7 +1923,7 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, struct hrtimer_cloc
 
 static __always_inline struct hrtimer *clock_base_next_timer_safe(struct hrtimer_clock_base *base)
 {
-	struct timerqueue_node *next = timerqueue_getnext(&base->active);
+	struct timerqueue_linked_node *next = timerqueue_linked_first(&base->active);
 
 	return next ? container_of(next, struct hrtimer, node) : NULL;
 }
@@ -2369,7 +2369,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
 
 		clock_b->cpu_base = cpu_base;
 		seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock);
-		timerqueue_init_head(&clock_b->active);
+		timerqueue_linked_init_head(&clock_b->active);
 	}
 
 	cpu_base->cpu = cpu;
@@ -2399,10 +2399,10 @@ int hrtimers_cpu_starting(unsigned int cpu)
 static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 				struct hrtimer_clock_base *new_base)
 {
-	struct timerqueue_node *node;
+	struct timerqueue_linked_node *node;
 	struct hrtimer *timer;
 
-	while ((node = timerqueue_getnext(&old_base->active))) {
+	while ((node = timerqueue_linked_first(&old_base->active))) {
 		timer = container_of(node, struct hrtimer, node);
 		BUG_ON(hrtimer_callback_running(timer));
 		debug_hrtimer_deactivate(timer);
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 19e61826b7de..e2e14fd1b466 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -56,13 +56,11 @@ print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
 		(long long)(ktime_to_ns(hrtimer_get_expires(timer)) - now));
 }
 
-static void
-print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
-		    u64 now)
+static void print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base, u64 now)
 {
+	struct timerqueue_linked_node *curr;
 	struct hrtimer *timer, tmp;
 	unsigned long next = 0, i;
-	struct timerqueue_node *curr;
 	unsigned long flags;
 
 next_one:
@@ -72,13 +70,13 @@ next_one:
 
 	raw_spin_lock_irqsave(&base->cpu_base->lock, flags);
 
-	curr = timerqueue_getnext(&base->active);
+	curr = timerqueue_linked_first(&base->active);
 	/*
 	 * Crude but we have to do this O(N*N) thing, because
 	 * we have to unlock the base when printing:
 	 */
 	while (curr && i < next) {
-		curr = timerqueue_iterate_next(curr);
+		curr = timerqueue_linked_next(curr);
 		i++;
 	}
 
-- 
cgit v1.2.3


From 38e18d825f7281fdc16d3241df5115ce6eaeaf79 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 25 Feb 2026 10:32:41 -0800
Subject: locking: Fix rwlock and spinlock lock context annotations

Fix two incorrect rwlock_t lock context annotations. Add the raw_spinlock_t
lock context annotations that are missing.

Fixes: f16a802d402d ("locking/rwlock, spinlock: Support Clang's context analysis")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Marco Elver <elver@google.com>
Link: https://patch.msgid.link/20260225183244.4035378-2-bvanassche@acm.org
---
 include/linux/rwlock.h         | 4 ++--
 include/linux/rwlock_api_smp.h | 6 ++++--
 include/linux/spinlock.h       | 3 ++-
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index 3390d21c95dd..21ceefc4a49f 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -30,10 +30,10 @@ do {								\
 
 #ifdef CONFIG_DEBUG_SPINLOCK
  extern void do_raw_read_lock(rwlock_t *lock) __acquires_shared(lock);
- extern int do_raw_read_trylock(rwlock_t *lock);
+ extern int do_raw_read_trylock(rwlock_t *lock) __cond_acquires_shared(true, lock);
  extern void do_raw_read_unlock(rwlock_t *lock) __releases_shared(lock);
  extern void do_raw_write_lock(rwlock_t *lock) __acquires(lock);
- extern int do_raw_write_trylock(rwlock_t *lock);
+extern int do_raw_write_trylock(rwlock_t *lock) __cond_acquires(true, lock);
  extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock);
 #else
 # define do_raw_read_lock(rwlock)	do {__acquire_shared(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0)
diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
index 61a852609eab..9e02a5f28cd1 100644
--- a/include/linux/rwlock_api_smp.h
+++ b/include/linux/rwlock_api_smp.h
@@ -23,7 +23,7 @@ void __lockfunc _raw_write_lock_bh(rwlock_t *lock)	__acquires(lock);
 void __lockfunc _raw_read_lock_irq(rwlock_t *lock)	__acquires_shared(lock);
 void __lockfunc _raw_write_lock_irq(rwlock_t *lock)	__acquires(lock);
 unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
-							__acquires(lock);
+							__acquires_shared(lock);
 unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
 							__acquires(lock);
 int __lockfunc _raw_read_trylock(rwlock_t *lock)	__cond_acquires_shared(true, lock);
@@ -36,7 +36,7 @@ void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)	__releases_shared(lock);
 void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)	__releases(lock);
 void __lockfunc
 _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
-							__releases(lock);
+							__releases_shared(lock);
 void __lockfunc
 _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 							__releases(lock);
@@ -116,6 +116,7 @@ _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 #endif
 
 static inline int __raw_read_trylock(rwlock_t *lock)
+	__cond_acquires_shared(true, lock)
 {
 	preempt_disable();
 	if (do_raw_read_trylock(lock)) {
@@ -127,6 +128,7 @@ static inline int __raw_read_trylock(rwlock_t *lock)
 }
 
 static inline int __raw_write_trylock(rwlock_t *lock)
+	__cond_acquires(true, lock)
 {
 	preempt_disable();
 	if (do_raw_write_trylock(lock)) {
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index e1e2f144af9b..241277cd34cf 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -178,7 +178,7 @@ do {									\
 
 #ifdef CONFIG_DEBUG_SPINLOCK
  extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock);
- extern int do_raw_spin_trylock(raw_spinlock_t *lock);
+ extern int do_raw_spin_trylock(raw_spinlock_t *lock) __cond_acquires(true, lock);
  extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock);
 #else
 static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
@@ -189,6 +189,7 @@ static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
 }
 
 static inline int do_raw_spin_trylock(raw_spinlock_t *lock)
+	__cond_acquires(true, lock)
 {
 	int ret = arch_spin_trylock(&(lock)->raw_lock);
 
-- 
cgit v1.2.3


From 39be7b21af24d1d2ed3b18caac57dd219fef226e Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 25 Feb 2026 10:32:42 -0800
Subject: signal: Fix the lock_task_sighand() annotation

lock_task_sighand() may return NULL. Make this clear in its lock context
annotation.

Fixes: 04e49d926f43 ("sched: Enable context analysis for core.c and fair.c")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Marco Elver <elver@google.com>
Link: https://patch.msgid.link/20260225183244.4035378-3-bvanassche@acm.org
---
 include/linux/sched/signal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index a22248aebcf9..a4835a7de07e 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -739,7 +739,7 @@ static inline int thread_group_empty(struct task_struct *p)
 
 extern struct sighand_struct *lock_task_sighand(struct task_struct *task,
 						unsigned long *flags)
-	__acquires(&task->sighand->siglock);
+	__cond_acquires(nonnull, &task->sighand->siglock);
 
 static inline void unlock_task_sighand(struct task_struct *task,
 						unsigned long *flags)
-- 
cgit v1.2.3


From 3dcef70e41ab13483803c536ddea8d5f1803ee25 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 25 Feb 2026 10:32:43 -0800
Subject: ww-mutex: Fix the ww_acquire_ctx function annotations

The ww_acquire_done() call is optional. Reflect this in the annotations of
ww_acquire_done().

Fixes: 47907461e4f6 ("locking/ww_mutex: Support Clang's context analysis")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: Marco Elver <elver@google.com>
Link: https://patch.msgid.link/20260225183244.4035378-4-bvanassche@acm.org
---
 include/linux/ww_mutex.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 85b1fff02fde..0c95ead5a297 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -181,7 +181,7 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
  * data structures.
  */
 static inline void ww_acquire_done(struct ww_acquire_ctx *ctx)
-	__releases(ctx) __acquires_shared(ctx) __no_context_analysis
+	__must_hold(ctx)
 {
 #ifdef DEBUG_WW_MUTEXES
 	lockdep_assert_held(ctx);
@@ -199,7 +199,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx)
  * mutexes have been released with ww_mutex_unlock.
  */
 static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx)
-	__releases_shared(ctx) __no_context_analysis
+	__releases(ctx) __no_context_analysis
 {
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	mutex_release(&ctx->first_lock_dep_map, _THIS_IP_);
-- 
cgit v1.2.3


From 4a91a33f15c634fb3477d122bdf1eef098d77ee3 Mon Sep 17 00:00:00 2001
From: Mallesh Koujalagi <mallesh.koujalagi@intel.com>
Date: Fri, 27 Feb 2026 14:24:01 +0530
Subject: workqueue: Update documentation as per system_percpu_wq naming

Update documentation to use "per-CPU workqueue" instead of
"global workqueue" to match the system_wq to system_percpu_wq
rename. The workqueue behavior remains unchanged; this just
aligns terminology with the clearer naming.

Fixes: a2be943b46b4 ("workqueue: replace use of system_wq with system_percpu_wq")
Signed-off-by: Mallesh Koujalagi <mallesh.koujalagi@intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a4749f56398f..fc5744402a66 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -712,14 +712,14 @@ static inline bool schedule_work_on(int cpu, struct work_struct *work)
 }
 
 /**
- * schedule_work - put work task in global workqueue
+ * schedule_work - put work task in per-CPU workqueue
  * @work: job to be done
  *
- * Returns %false if @work was already on the kernel-global workqueue and
+ * Returns %false if @work was already on the system per-CPU workqueue and
  * %true otherwise.
  *
- * This puts a job in the kernel-global workqueue if it was not already
- * queued and leaves it in the same position on the kernel-global
+ * This puts a job in the system per-CPU workqueue if it was not already
+ * queued and leaves it in the same position on the system per-CPU
  * workqueue otherwise.
  *
  * Shares the same memory-ordering properties of queue_work(), cf. the
@@ -796,12 +796,12 @@ extern void __warn_flushing_systemwide_wq(void)
 })
 
 /**
- * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
+ * schedule_delayed_work_on - queue work in per-CPU workqueue on CPU after delay
  * @cpu: cpu to use
  * @dwork: job to be done
  * @delay: number of jiffies to wait
  *
- * After waiting for a given time this puts a job in the kernel-global
+ * After waiting for a given time this puts a job in the system per-CPU
  * workqueue on the specified CPU.
  */
 static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
@@ -811,11 +811,11 @@ static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
 }
 
 /**
- * schedule_delayed_work - put work task in global workqueue after delay
+ * schedule_delayed_work - put work task in per-CPU workqueue after delay
  * @dwork: job to be done
  * @delay: number of jiffies to wait or 0 for immediate execution
  *
- * After waiting for a given time this puts a job in the kernel-global
+ * After waiting for a given time this puts a job in the system per-CPU
  * workqueue.
  */
 static inline bool schedule_delayed_work(struct delayed_work *dwork,
-- 
cgit v1.2.3


From e1092d5e15e6a9b168bf830af9a26d7ea17cd57d Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 24 Feb 2026 12:10:44 +0100
Subject: PCI/PTM: Do not enable PTM automatically for Root and Switch Upstream
 Ports

Currently we enable PTM automatically for Root and Switch Upstream Ports if
the advertised capabilities support the relevant role. However, there are a
few issues with this. First of all, if there is no Endpoint that actually
needs the PTM functionality, this is just wasting link bandwidth. There are
just a couple of drivers calling pci_ptm_enable() in the tree.

Secondly, we do the enablement in pci_ptm_init() that is called pretty
early for the Switch Upstream Port before Downstream Ports are even
enumerated. Since the Upstream Port configuration affects the whole Switch,
enabling it this early might cause PTM requests to be sent. We actually do
see effects of this:

  pcieport 0000:00:07.1: pciehp: Slot(6-1): Card present
  pcieport 0000:00:07.1: pciehp: Slot(6-1): Link Up
  pci 0000:2c:00.0: [8086:5786] type 01 class 0x060400 PCIe Switch Upstream Port
  ...
  pci 0000:2c:00.0: PTM enabled, 4ns granularity

At this point we have only enumerated the Switch Upstream Port and now
PTM got enabled which immediately triggers a flood of errors:

  pcieport 0000:00:07.1: AER: Multiple Uncorrectable (Non-Fatal) error message received from 0000:00:07.1
  pcieport 0000:00:07.1: PCIe Bus Error: severity=Uncorrectable (Non-Fatal), type=Transaction Layer, (Receiver ID)
  pcieport 0000:00:07.1:   device [8086:d44f] error status/mask=00200000/00000000
  pcieport 0000:00:07.1:    [21] ACSViol                (First)
  pcieport 0000:00:07.1: AER:   TLP Header: 0x34000000 0x00000052 0x00000000 0x00000000
  pcieport 0000:00:07.1: AER: device recovery successful
  pcieport 0000:00:07.1: AER: Uncorrectable (Non-Fatal) error message received from 0000:00:07.1

In the above TLP Header the Requester ID is 0 which causes an error as we
have ACS Source Validation enabled.

Change the PTM enablement to happen at the time pci_enable_ptm() is called.
It will try to enable PTM first for upstream devices before enabling for
the Endpoint itself. For disable path we need to keep count of how many
times PTM has been enabled and disable it only on the last, so change the
dev->ptm_enabled to a counter (and rename it to dev->ptm_enable_cnt
analogous to dev->pci_enable_cnt).

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/20260224111044.3487873-6-mika.westerberg@linux.intel.com
---
 drivers/pci/pcie/ptm.c | 68 +++++++++++++++++++++++++++++---------------------
 include/linux/pci.h    |  2 +-
 2 files changed, 40 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c
index 2c848ae4f15f..a41ffd1914de 100644
--- a/drivers/pci/pcie/ptm.c
+++ b/drivers/pci/pcie/ptm.c
@@ -52,6 +52,7 @@ void pci_ptm_init(struct pci_dev *dev)
 		return;
 
 	dev->ptm_cap = ptm;
+	atomic_set(&dev->ptm_enable_cnt, 0);
 	pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_PTM, sizeof(u32));
 
 	pci_read_config_dword(dev, ptm + PCI_PTM_CAP, &cap);
@@ -85,10 +86,6 @@ void pci_ptm_init(struct pci_dev *dev)
 		dev->ptm_responder = 1;
 	if (cap & PCI_PTM_CAP_REQ)
 		dev->ptm_requester = 1;
-
-	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
-	    pci_pcie_type(dev) == PCI_EXP_TYPE_UPSTREAM)
-		pci_enable_ptm(dev);
 }
 
 void pci_save_ptm_state(struct pci_dev *dev)
@@ -129,26 +126,11 @@ void pci_restore_ptm_state(struct pci_dev *dev)
 static int __pci_enable_ptm(struct pci_dev *dev)
 {
 	u16 ptm = dev->ptm_cap;
-	struct pci_dev *ups;
 	u32 ctrl;
 
 	if (!ptm)
 		return -EINVAL;
 
-	/*
-	 * A device uses local PTM Messages to request time information
-	 * from a PTM Root that's farther upstream.  Every device along the
-	 * path must support PTM and have it enabled so it can handle the
-	 * messages.  Therefore, if this device is not a PTM Root, the
-	 * upstream link partner must have PTM enabled before we can enable
-	 * PTM.
-	 */
-	if (!dev->ptm_root) {
-		ups = pci_upstream_ptm(dev);
-		if (!ups || !ups->ptm_enabled)
-			return -EINVAL;
-	}
-
 	switch (pci_pcie_type(dev)) {
 	case PCI_EXP_TYPE_ROOT_PORT:
 		if (!dev->ptm_root)
@@ -193,11 +175,35 @@ int pci_enable_ptm(struct pci_dev *dev)
 	int rc;
 	char clock_desc[8];
 
+	/*
+	 * A device uses local PTM Messages to request time information
+	 * from a PTM Root that's farther upstream. Every device along
+	 * the path must support PTM and have it enabled so it can
+	 * handle the messages. Therefore, if this device is not a PTM
+	 * Root, the upstream link partner must have PTM enabled before
+	 * we can enable PTM.
+	 */
+	if (!dev->ptm_root) {
+		struct pci_dev *parent;
+
+		parent = pci_upstream_ptm(dev);
+		if (!parent)
+			return -EINVAL;
+		/* Enable PTM for the parent */
+		rc = pci_enable_ptm(parent);
+		if (rc)
+			return rc;
+	}
+
+	/* Already enabled? */
+	if (atomic_inc_return(&dev->ptm_enable_cnt) > 1)
+		return 0;
+
 	rc = __pci_enable_ptm(dev);
-	if (rc)
+	if (rc) {
+		atomic_dec(&dev->ptm_enable_cnt);
 		return rc;
-
-	dev->ptm_enabled = 1;
+	}
 
 	switch (dev->ptm_granularity) {
 	case 0:
@@ -239,27 +245,31 @@ static void __pci_disable_ptm(struct pci_dev *dev)
  */
 void pci_disable_ptm(struct pci_dev *dev)
 {
-	if (dev->ptm_enabled) {
+	struct pci_dev *parent;
+
+	if (atomic_dec_and_test(&dev->ptm_enable_cnt))
 		__pci_disable_ptm(dev);
-		dev->ptm_enabled = 0;
-	}
+
+	parent = pci_upstream_ptm(dev);
+	if (parent)
+		pci_disable_ptm(parent);
 }
 EXPORT_SYMBOL(pci_disable_ptm);
 
 /*
- * Disable PTM, but preserve dev->ptm_enabled so we silently re-enable it on
+ * Disable PTM, but preserve dev->ptm_enable_cnt so we silently re-enable it on
  * resume if necessary.
  */
 void pci_suspend_ptm(struct pci_dev *dev)
 {
-	if (dev->ptm_enabled)
+	if (atomic_read(&dev->ptm_enable_cnt))
 		__pci_disable_ptm(dev);
 }
 
 /* If PTM was enabled before suspend, re-enable it when resuming */
 void pci_resume_ptm(struct pci_dev *dev)
 {
-	if (dev->ptm_enabled)
+	if (atomic_read(&dev->ptm_enable_cnt))
 		__pci_enable_ptm(dev);
 }
 
@@ -268,7 +278,7 @@ bool pcie_ptm_enabled(struct pci_dev *dev)
 	if (!dev)
 		return false;
 
-	return dev->ptm_enabled;
+	return atomic_read(&dev->ptm_enable_cnt);
 }
 EXPORT_SYMBOL(pcie_ptm_enabled);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8aaa72dcb164..c620d4b6c52e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -518,7 +518,7 @@ struct pci_dev {
 	unsigned int	ptm_root:1;
 	unsigned int	ptm_responder:1;
 	unsigned int	ptm_requester:1;
-	unsigned int	ptm_enabled:1;
+	atomic_t	ptm_enable_cnt;
 	u8		ptm_granularity;
 #endif
 #ifdef CONFIG_PCI_MSI
-- 
cgit v1.2.3


From 11c0663a595801b6e6f7a937adec8532706ef486 Mon Sep 17 00:00:00 2001
From: Jens Emil Schulz Østergaard <jensemil.schulzostergaard@microchip.com>
Date: Thu, 26 Feb 2026 09:24:19 +0100
Subject: net: phy: micrel: Add support for lan9645x internal phy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LAN9645X is a family of switch chips with 5 internal copper phys. The
internal PHY is based on parts of LAN8832. This is a low-power, single
port triple-speed (10BASE-T/100BASE-TX/1000BASE-T) ethernet physical
layer transceiver (PHY) that supports transmission and reception of data
on standard CAT-5, as well as CAT-5e and CAT-6 Unshielded Twisted
Pair (UTP) cables.

Add support for the internal PHY of the lan9645x chip family.

Reviewed-by: Steen Hegelund <Steen.Hegelund@microchip.com>
Reviewed-by: Daniel Machon <daniel.machon@microchip.com>
Signed-off-by: Jens Emil Schulz Østergaard <jensemil.schulzostergaard@microchip.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/20260226-phy_micrel_add_support_for_lan9645x_internal_phy-v3-1-1fe82379962b@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/micrel.c   | 152 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/micrel_phy.h |   1 +
 2 files changed, 153 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index c6b011a9d636..2aa1dedd21b8 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -6523,6 +6523,142 @@ static void lan8842_get_phy_stats(struct phy_device *phydev,
 	stats->tx_errors = priv->phy_stats.tx_errors;
 }
 
+#define LAN9645X_CTRL_REG			0x1f
+#define LAN9645X_CTRL_REG_SW_SOFT_RST		BIT(1)
+
+#define LAN9645X_DAC_ICAS_AMP_POWER_DOWN	0x47
+#define LAN9645X_BTRX_QBIAS_POWER_DOWN		0x46
+#define LAN9645X_TX_LOW_I_CH_CD_POWER_MGMT	0x45
+#define LAN9645X_TX_LOW_I_CH_B_POWER_MGMT	0x44
+#define LAN9645X_TX_LOW_I_CH_A_POWER_MGMT	0x43
+
+static const struct lanphy_reg_data force_dac_tx_errata[] = {
+	/* Force channel A/B/C/D TX on */
+	{ LAN8814_PAGE_POWER_REGS,
+	  LAN9645X_DAC_ICAS_AMP_POWER_DOWN,
+	  0 },
+	/* Force channel A/B/C/D QBias on */
+	{ LAN8814_PAGE_POWER_REGS,
+	  LAN9645X_BTRX_QBIAS_POWER_DOWN,
+	  0xaa },
+	/* Tx low I on channel C/D overwrite */
+	{ LAN8814_PAGE_POWER_REGS,
+	  LAN9645X_TX_LOW_I_CH_CD_POWER_MGMT,
+	  0xbfff },
+	/* Channel B low I overwrite */
+	{ LAN8814_PAGE_POWER_REGS,
+	  LAN9645X_TX_LOW_I_CH_B_POWER_MGMT,
+	  0xabbf },
+	/* Channel A low I overwrite */
+	{ LAN8814_PAGE_POWER_REGS,
+	  LAN9645X_TX_LOW_I_CH_A_POWER_MGMT,
+	  0xbd3f },
+};
+
+static int lan9645x_config_init(struct phy_device *phydev)
+{
+	int ret;
+
+	/* Apply erratas from previous generations.  */
+	ret = lan8842_erratas(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* Apply errata for an issue where bringing a port down, can cause a few
+	 * CRC errors for traffic flowing through adjacent ports.
+	 */
+	return lanphy_write_reg_data(phydev, force_dac_tx_errata,
+				     ARRAY_SIZE(force_dac_tx_errata));
+}
+
+static int lan9645x_suspend(struct phy_device *phydev)
+{
+	int ret, val;
+
+	/* Force link down before software power down (SPD), by doing software
+	 * soft reset. This resets the PHY, but keeps all register configuration
+	 * intact. The bit self clears.
+	 *
+	 * This is needed as a workaround for an issue where performing SPD on a
+	 * port can bring adjacent ports down, when there is traffic flowing
+	 * through the ports.
+	 */
+	ret = phy_set_bits(phydev, LAN9645X_CTRL_REG,
+			   LAN9645X_CTRL_REG_SW_SOFT_RST);
+	if (ret)
+		return ret;
+
+	ret = phy_read_poll_timeout(phydev, LAN9645X_CTRL_REG, val,
+				    !(val & LAN9645X_CTRL_REG_SW_SOFT_RST),
+				    3000, 100000, true);
+	if (ret)
+		return ret;
+
+	return genphy_suspend(phydev);
+}
+
+static int lan9645x_config_intr(struct phy_device *phydev)
+{
+	int err;
+
+	/* enable / disable interrupts */
+	if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+		/* This is an internal PHY of lan9645x and is not possible to
+		 * change the polarity of irq sources in the OIC (CPU_INTR)
+		 * found in lan9645x. Therefore change the polarity of the
+		 * interrupt in the PHY from being active low instead of active
+		 * high.
+		 */
+		err = phy_write(phydev, LAN8804_CONTROL,
+				LAN8804_CONTROL_INTR_POLARITY);
+		if (err)
+			return err;
+
+		/* By default interrupt buffer is open-drain in which case the
+		 * interrupt can be active only low. Therefore change the
+		 * interrupt buffer to be push-pull to be able to change
+		 * interrupt polarity.
+		 */
+		err = phy_write(phydev, LAN8804_OUTPUT_CONTROL,
+				LAN8804_OUTPUT_CONTROL_INTR_BUFFER);
+		if (err)
+			return err;
+
+		err = lan8814_ack_interrupt(phydev);
+		if (err)
+			return err;
+
+		err = phy_write(phydev, LAN8814_INTC,
+				LAN8814_INT_LINK | LAN8814_INT_FLF);
+	} else {
+		err = phy_write(phydev, LAN8814_INTC, 0);
+		if (err)
+			return err;
+
+		err = lan8814_ack_interrupt(phydev);
+	}
+
+	return err;
+}
+
+static irqreturn_t lan9645x_handle_interrupt(struct phy_device *phydev)
+{
+	int status;
+
+	status = phy_read(phydev, LAN8814_INTS);
+	if (status < 0) {
+		phy_error(phydev);
+		return IRQ_NONE;
+	}
+
+	if (status & (LAN8814_INT_LINK | LAN8814_INT_FLF)) {
+		phy_trigger_machine(phydev);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
 static struct phy_driver ksphy_driver[] = {
 {
 	PHY_ID_MATCH_MODEL(PHY_ID_KS8737),
@@ -6761,6 +6897,21 @@ static struct phy_driver ksphy_driver[] = {
 	.set_tunable	= lan8842_set_tunable,
 	.cable_test_start	= lan8814_cable_test_start,
 	.cable_test_get_status	= ksz886x_cable_test_get_status,
+}, {
+	PHY_ID_MATCH_MODEL(PHY_ID_LAN9645X),
+	.name		= "Microchip LAN9645X Gigabit PHY",
+	.config_init	= lan9645x_config_init,
+	.driver_data	= &ksz9021_type,
+	.probe		= kszphy_probe,
+	.soft_reset	= genphy_soft_reset,
+	.suspend	= lan9645x_suspend,
+	.resume		= genphy_resume,
+	.config_intr	= lan9645x_config_intr,
+	.handle_interrupt = lan9645x_handle_interrupt,
+	.get_tunable	= lan8842_get_tunable,
+	.set_tunable	= lan8842_set_tunable,
+	.get_phy_stats	= lan8842_get_phy_stats,
+	.update_stats	= lan8842_update_stats,
 }, {
 	PHY_ID_MATCH_MODEL(PHY_ID_KSZ9131),
 	.name		= "Microchip KSZ9131 Gigabit PHY",
@@ -6859,6 +7010,7 @@ static const struct mdio_device_id __maybe_unused micrel_tbl[] = {
 	{ PHY_ID_MATCH_MODEL(PHY_ID_LAN8804) },
 	{ PHY_ID_MATCH_MODEL(PHY_ID_LAN8841) },
 	{ PHY_ID_MATCH_MODEL(PHY_ID_LAN8842) },
+	{ PHY_ID_MATCH_MODEL(PHY_ID_LAN9645X) },
 	{ }
 };
 
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index ca691641788b..9c6f9817383f 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -33,6 +33,7 @@
 #define PHY_ID_LAN8804		0x00221670
 #define PHY_ID_LAN8841		0x00221650
 #define PHY_ID_LAN8842		0x002216C0
+#define PHY_ID_LAN9645X		0x002216D0
 
 #define PHY_ID_KSZ886X		0x00221430
 #define PHY_ID_KSZ8863		0x00221435
-- 
cgit v1.2.3


From 6466441a5ecd1c1168264e4c322bae455579b156 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 26 Feb 2026 04:12:13 +0000
Subject: net: inline skb_add_rx_frag_netmem()

This critical helper (via skb_add_rx_frag()) is mostly used
from drivers rx fast path.

It is time to inline it, this actually saves space in vmlinux:

size vmlinux.old vmlinux
   text    data     bss     dec     hex filename
37350766        23092977        4846992 65290735        3e441ef vmlinux.old
37350600        23092977        4846992 65290569        3e44149 vmlinux

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260226041213.1892561-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 13 +++++++++++--
 net/core/skbuff.c      | 11 -----------
 2 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index daa4e4944ce3..9cc98f850f1d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2682,8 +2682,17 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i,
 	shinfo->nr_frags = i + 1;
 }
 
-void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
-			    int off, int size, unsigned int truesize);
+static inline void skb_add_rx_frag_netmem(struct sk_buff *skb, int i,
+					  netmem_ref netmem, int off,
+					  int size, unsigned int truesize)
+{
+	DEBUG_NET_WARN_ON_ONCE(size > truesize);
+
+	skb_fill_netmem_desc(skb, i, netmem, off, size);
+	skb->len += size;
+	skb->data_len += size;
+	skb->truesize += truesize;
+}
 
 static inline void skb_add_rx_frag(struct sk_buff *skb, int i,
 				   struct page *page, int off, int size,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0e217041958a..513cbfed19bc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -891,17 +891,6 @@ skb_fail:
 }
 EXPORT_SYMBOL(napi_alloc_skb);
 
-void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
-			    int off, int size, unsigned int truesize)
-{
-	DEBUG_NET_WARN_ON_ONCE(size > truesize);
-
-	skb_fill_netmem_desc(skb, i, netmem, off, size);
-	skb->len += size;
-	skb->data_len += size;
-	skb->truesize += truesize;
-}
-EXPORT_SYMBOL(skb_add_rx_frag_netmem);
 
 void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
 			  unsigned int truesize)
-- 
cgit v1.2.3


From 8021729acf21f4bf3c43866b8919b68968028478 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 25 Feb 2026 21:12:57 -0800
Subject: iio: tsl2772: fix all kernel-doc warnings

Use the correct kernel-doc notation for struct members to eliminate
kernel-doc warnings:

Warning: include/linux/platform_data/tsl2772.h:88 struct member
 'prox_diode' not described in 'tsl2772_settings'
Warning: include/linux/platform_data/tsl2772.h:88 struct member
 'prox_power' not described in 'tsl2772_settings'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/platform_data/tsl2772.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/tsl2772.h b/include/linux/platform_data/tsl2772.h
index f8ade15a35e2..f042e82b39c3 100644
--- a/include/linux/platform_data/tsl2772.h
+++ b/include/linux/platform_data/tsl2772.h
@@ -61,9 +61,9 @@ struct tsl2772_lux {
  *  @prox_pulse_count:      Number if proximity emitter pulses.
  *  @prox_max_samples_cal:  The number of samples that are taken when performing
  *                          a proximity calibration.
- *  @prox_diode             Which diode(s) to use for driving the external
+ *  @prox_diode:            Which diode(s) to use for driving the external
  *                          LED(s) for proximity sensing.
- *  @prox_power             The amount of power to use for the external LED(s).
+ *  @prox_power:            The amount of power to use for the external LED(s).
  */
 struct tsl2772_settings {
 	int als_time;
-- 
cgit v1.2.3


From a2be37eedb52ea26938fa4cc9de1ff84963c57ad Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Date: Tue, 24 Feb 2026 11:42:04 +0100
Subject: firmware: exynos-acpm: Drop fake 'const' on handle pointer

All the functions operating on the 'handle' pointer are claiming it is a
pointer to const thus they should not modify the handle.  In fact that's
a false statement, because first thing these functions do is drop the
cast to const with container_of:

  struct acpm_info *acpm = handle_to_acpm_info(handle);

And with such cast the handle is easily writable with simple:

  acpm->handle.ops.pmic_ops.read_reg = NULL;

The code is not correct logically, either, because functions like
acpm_get_by_node() and acpm_handle_put() are meant to modify the handle
reference counting, thus they must modify the handle.  Modification here
happens anyway, even if the reference counting is stored in the
container which the handle is part of.

The code does not have actual visible bug, but incorrect 'const'
annotations could lead to incorrect compiler decisions.

Fixes: a88927b534ba ("firmware: add Exynos ACPM protocol driver")
Cc: stable@vger.kernel.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Link: https://patch.msgid.link/20260224104203.42950-2-krzysztof.kozlowski@oss.qualcomm.com
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 drivers/clk/samsung/clk-acpm.c                     |  4 +--
 drivers/firmware/samsung/exynos-acpm-dvfs.c        |  4 +--
 drivers/firmware/samsung/exynos-acpm-dvfs.h        |  4 +--
 drivers/firmware/samsung/exynos-acpm-pmic.c        | 10 +++---
 drivers/firmware/samsung/exynos-acpm-pmic.h        | 10 +++---
 drivers/firmware/samsung/exynos-acpm.c             | 16 +++++----
 drivers/firmware/samsung/exynos-acpm.h             |  2 +-
 drivers/mfd/sec-acpm.c                             | 10 +++---
 .../linux/firmware/samsung/exynos-acpm-protocol.h  | 40 +++++++++-------------
 9 files changed, 48 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/samsung/clk-acpm.c b/drivers/clk/samsung/clk-acpm.c
index b90809ce3f88..d8944160793a 100644
--- a/drivers/clk/samsung/clk-acpm.c
+++ b/drivers/clk/samsung/clk-acpm.c
@@ -20,7 +20,7 @@ struct acpm_clk {
 	u32 id;
 	struct clk_hw hw;
 	unsigned int mbox_chan_id;
-	const struct acpm_handle *handle;
+	struct acpm_handle *handle;
 };
 
 struct acpm_clk_variant {
@@ -113,7 +113,7 @@ static int acpm_clk_register(struct device *dev, struct acpm_clk *aclk,
 
 static int acpm_clk_probe(struct platform_device *pdev)
 {
-	const struct acpm_handle *acpm_handle;
+	struct acpm_handle *acpm_handle;
 	struct clk_hw_onecell_data *clk_data;
 	struct clk_hw **hws;
 	struct device *dev = &pdev->dev;
diff --git a/drivers/firmware/samsung/exynos-acpm-dvfs.c b/drivers/firmware/samsung/exynos-acpm-dvfs.c
index 17e7be7757b3..06bdf62dea1f 100644
--- a/drivers/firmware/samsung/exynos-acpm-dvfs.c
+++ b/drivers/firmware/samsung/exynos-acpm-dvfs.c
@@ -43,7 +43,7 @@ static void acpm_dvfs_init_set_rate_cmd(u32 cmd[4], unsigned int clk_id,
 	cmd[3] = ktime_to_ms(ktime_get());
 }
 
-int acpm_dvfs_set_rate(const struct acpm_handle *handle,
+int acpm_dvfs_set_rate(struct acpm_handle *handle,
 		       unsigned int acpm_chan_id, unsigned int clk_id,
 		       unsigned long rate)
 {
@@ -63,7 +63,7 @@ static void acpm_dvfs_init_get_rate_cmd(u32 cmd[4], unsigned int clk_id)
 	cmd[3] = ktime_to_ms(ktime_get());
 }
 
-unsigned long acpm_dvfs_get_rate(const struct acpm_handle *handle,
+unsigned long acpm_dvfs_get_rate(struct acpm_handle *handle,
 				 unsigned int acpm_chan_id, unsigned int clk_id)
 {
 	struct acpm_xfer xfer;
diff --git a/drivers/firmware/samsung/exynos-acpm-dvfs.h b/drivers/firmware/samsung/exynos-acpm-dvfs.h
index 9f2778e649c9..b37b15426102 100644
--- a/drivers/firmware/samsung/exynos-acpm-dvfs.h
+++ b/drivers/firmware/samsung/exynos-acpm-dvfs.h
@@ -11,10 +11,10 @@
 
 struct acpm_handle;
 
-int acpm_dvfs_set_rate(const struct acpm_handle *handle,
+int acpm_dvfs_set_rate(struct acpm_handle *handle,
 		       unsigned int acpm_chan_id, unsigned int id,
 		       unsigned long rate);
-unsigned long acpm_dvfs_get_rate(const struct acpm_handle *handle,
+unsigned long acpm_dvfs_get_rate(struct acpm_handle *handle,
 				 unsigned int acpm_chan_id,
 				 unsigned int clk_id);
 
diff --git a/drivers/firmware/samsung/exynos-acpm-pmic.c b/drivers/firmware/samsung/exynos-acpm-pmic.c
index 26a9024d8ed8..0c50993cc9a8 100644
--- a/drivers/firmware/samsung/exynos-acpm-pmic.c
+++ b/drivers/firmware/samsung/exynos-acpm-pmic.c
@@ -77,7 +77,7 @@ static void acpm_pmic_init_read_cmd(u32 cmd[4], u8 type, u8 reg, u8 chan)
 	cmd[3] = ktime_to_ms(ktime_get());
 }
 
-int acpm_pmic_read_reg(const struct acpm_handle *handle,
+int acpm_pmic_read_reg(struct acpm_handle *handle,
 		       unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
 		       u8 *buf)
 {
@@ -107,7 +107,7 @@ static void acpm_pmic_init_bulk_read_cmd(u32 cmd[4], u8 type, u8 reg, u8 chan,
 		 FIELD_PREP(ACPM_PMIC_VALUE, count);
 }
 
-int acpm_pmic_bulk_read(const struct acpm_handle *handle,
+int acpm_pmic_bulk_read(struct acpm_handle *handle,
 			unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
 			u8 count, u8 *buf)
 {
@@ -150,7 +150,7 @@ static void acpm_pmic_init_write_cmd(u32 cmd[4], u8 type, u8 reg, u8 chan,
 	cmd[3] = ktime_to_ms(ktime_get());
 }
 
-int acpm_pmic_write_reg(const struct acpm_handle *handle,
+int acpm_pmic_write_reg(struct acpm_handle *handle,
 			unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
 			u8 value)
 {
@@ -187,7 +187,7 @@ static void acpm_pmic_init_bulk_write_cmd(u32 cmd[4], u8 type, u8 reg, u8 chan,
 	}
 }
 
-int acpm_pmic_bulk_write(const struct acpm_handle *handle,
+int acpm_pmic_bulk_write(struct acpm_handle *handle,
 			 unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
 			 u8 count, const u8 *buf)
 {
@@ -220,7 +220,7 @@ static void acpm_pmic_init_update_cmd(u32 cmd[4], u8 type, u8 reg, u8 chan,
 	cmd[3] = ktime_to_ms(ktime_get());
 }
 
-int acpm_pmic_update_reg(const struct acpm_handle *handle,
+int acpm_pmic_update_reg(struct acpm_handle *handle,
 			 unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
 			 u8 value, u8 mask)
 {
diff --git a/drivers/firmware/samsung/exynos-acpm-pmic.h b/drivers/firmware/samsung/exynos-acpm-pmic.h
index 078421888a14..88ae9aada2ae 100644
--- a/drivers/firmware/samsung/exynos-acpm-pmic.h
+++ b/drivers/firmware/samsung/exynos-acpm-pmic.h
@@ -11,19 +11,19 @@
 
 struct acpm_handle;
 
-int acpm_pmic_read_reg(const struct acpm_handle *handle,
+int acpm_pmic_read_reg(struct acpm_handle *handle,
 		       unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
 		       u8 *buf);
-int acpm_pmic_bulk_read(const struct acpm_handle *handle,
+int acpm_pmic_bulk_read(struct acpm_handle *handle,
 			unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
 			u8 count, u8 *buf);
-int acpm_pmic_write_reg(const struct acpm_handle *handle,
+int acpm_pmic_write_reg(struct acpm_handle *handle,
 			unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
 			u8 value);
-int acpm_pmic_bulk_write(const struct acpm_handle *handle,
+int acpm_pmic_bulk_write(struct acpm_handle *handle,
 			 unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
 			 u8 count, const u8 *buf);
-int acpm_pmic_update_reg(const struct acpm_handle *handle,
+int acpm_pmic_update_reg(struct acpm_handle *handle,
 			 unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
 			 u8 value, u8 mask);
 #endif /* __EXYNOS_ACPM_PMIC_H__ */
diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c
index c616ac951a0d..16c46ed60837 100644
--- a/drivers/firmware/samsung/exynos-acpm.c
+++ b/drivers/firmware/samsung/exynos-acpm.c
@@ -410,7 +410,7 @@ static int acpm_wait_for_message_response(struct acpm_chan *achan,
  *
  * Return: 0 on success, -errno otherwise.
  */
-int acpm_do_xfer(const struct acpm_handle *handle, const struct acpm_xfer *xfer)
+int acpm_do_xfer(struct acpm_handle *handle, const struct acpm_xfer *xfer)
 {
 	struct acpm_info *acpm = handle_to_acpm_info(handle);
 	struct exynos_mbox_msg msg;
@@ -674,7 +674,7 @@ static int acpm_probe(struct platform_device *pdev)
  * acpm_handle_put() - release the handle acquired by acpm_get_by_phandle.
  * @handle:	Handle acquired by acpm_get_by_phandle.
  */
-static void acpm_handle_put(const struct acpm_handle *handle)
+static void acpm_handle_put(struct acpm_handle *handle)
 {
 	struct acpm_info *acpm = handle_to_acpm_info(handle);
 	struct device *dev = acpm->dev;
@@ -700,9 +700,11 @@ static void devm_acpm_release(struct device *dev, void *res)
  * @np:		ACPM device tree node.
  *
  * Return: pointer to handle on success, ERR_PTR(-errno) otherwise.
+ *
+ * Note: handle CANNOT be pointer to const
  */
-static const struct acpm_handle *acpm_get_by_node(struct device *dev,
-						  struct device_node *np)
+static struct acpm_handle *acpm_get_by_node(struct device *dev,
+					    struct device_node *np)
 {
 	struct platform_device *pdev;
 	struct device_link *link;
@@ -743,10 +745,10 @@ static const struct acpm_handle *acpm_get_by_node(struct device *dev,
  *
  * Return: pointer to handle on success, ERR_PTR(-errno) otherwise.
  */
-const struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
-						struct device_node *np)
+struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
+					  struct device_node *np)
 {
-	const struct acpm_handle **ptr, *handle;
+	struct acpm_handle **ptr, *handle;
 
 	ptr = devres_alloc(devm_acpm_release, sizeof(*ptr), GFP_KERNEL);
 	if (!ptr)
diff --git a/drivers/firmware/samsung/exynos-acpm.h b/drivers/firmware/samsung/exynos-acpm.h
index 8392fcb91f45..5df8354dc96c 100644
--- a/drivers/firmware/samsung/exynos-acpm.h
+++ b/drivers/firmware/samsung/exynos-acpm.h
@@ -17,7 +17,7 @@ struct acpm_xfer {
 
 struct acpm_handle;
 
-int acpm_do_xfer(const struct acpm_handle *handle,
+int acpm_do_xfer(struct acpm_handle *handle,
 		 const struct acpm_xfer *xfer);
 
 #endif /* __EXYNOS_ACPM_H__ */
diff --git a/drivers/mfd/sec-acpm.c b/drivers/mfd/sec-acpm.c
index 537ea65685bf..0e23b9d9f7ee 100644
--- a/drivers/mfd/sec-acpm.c
+++ b/drivers/mfd/sec-acpm.c
@@ -367,7 +367,7 @@ static const struct regmap_config s2mpg11_regmap_config_meter = {
 };
 
 struct sec_pmic_acpm_shared_bus_context {
-	const struct acpm_handle *acpm;
+	struct acpm_handle *acpm;
 	unsigned int acpm_chan_id;
 	u8 speedy_channel;
 };
@@ -390,7 +390,7 @@ static int sec_pmic_acpm_bus_write(void *context, const void *data,
 				   size_t count)
 {
 	struct sec_pmic_acpm_bus_context *ctx = context;
-	const struct acpm_handle *acpm = ctx->shared->acpm;
+	struct acpm_handle *acpm = ctx->shared->acpm;
 	const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops;
 	size_t val_count = count - BITS_TO_BYTES(ACPM_ADDR_BITS);
 	const u8 *d = data;
@@ -410,7 +410,7 @@ static int sec_pmic_acpm_bus_read(void *context, const void *reg_buf, size_t reg
 				  void *val_buf, size_t val_size)
 {
 	struct sec_pmic_acpm_bus_context *ctx = context;
-	const struct acpm_handle *acpm = ctx->shared->acpm;
+	struct acpm_handle *acpm = ctx->shared->acpm;
 	const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops;
 	const u8 *r = reg_buf;
 	u8 reg;
@@ -429,7 +429,7 @@ static int sec_pmic_acpm_bus_reg_update_bits(void *context, unsigned int reg, un
 					     unsigned int val)
 {
 	struct sec_pmic_acpm_bus_context *ctx = context;
-	const struct acpm_handle *acpm = ctx->shared->acpm;
+	struct acpm_handle *acpm = ctx->shared->acpm;
 	const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops;
 
 	return pmic_ops->update_reg(acpm, ctx->shared->acpm_chan_id, ctx->type, reg & 0xff,
@@ -480,7 +480,7 @@ static int sec_pmic_acpm_probe(struct platform_device *pdev)
 	struct regmap *regmap_common, *regmap_pmic, *regmap;
 	const struct sec_pmic_acpm_platform_data *pdata;
 	struct sec_pmic_acpm_shared_bus_context *shared_ctx;
-	const struct acpm_handle *acpm;
+	struct acpm_handle *acpm;
 	struct device *dev = &pdev->dev;
 	int ret, irq;
 
diff --git a/include/linux/firmware/samsung/exynos-acpm-protocol.h b/include/linux/firmware/samsung/exynos-acpm-protocol.h
index 2091da965a5a..13f17dc4443b 100644
--- a/include/linux/firmware/samsung/exynos-acpm-protocol.h
+++ b/include/linux/firmware/samsung/exynos-acpm-protocol.h
@@ -14,30 +14,24 @@ struct acpm_handle;
 struct device_node;
 
 struct acpm_dvfs_ops {
-	int (*set_rate)(const struct acpm_handle *handle,
-			unsigned int acpm_chan_id, unsigned int clk_id,
-			unsigned long rate);
-	unsigned long (*get_rate)(const struct acpm_handle *handle,
+	int (*set_rate)(struct acpm_handle *handle, unsigned int acpm_chan_id,
+			unsigned int clk_id, unsigned long rate);
+	unsigned long (*get_rate)(struct acpm_handle *handle,
 				  unsigned int acpm_chan_id,
 				  unsigned int clk_id);
 };
 
 struct acpm_pmic_ops {
-	int (*read_reg)(const struct acpm_handle *handle,
-			unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
-			u8 *buf);
-	int (*bulk_read)(const struct acpm_handle *handle,
-			 unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
-			 u8 count, u8 *buf);
-	int (*write_reg)(const struct acpm_handle *handle,
-			 unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
-			 u8 value);
-	int (*bulk_write)(const struct acpm_handle *handle,
-			  unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
-			  u8 count, const u8 *buf);
-	int (*update_reg)(const struct acpm_handle *handle,
-			  unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
-			  u8 value, u8 mask);
+	int (*read_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id,
+			u8 type, u8 reg, u8 chan, u8 *buf);
+	int (*bulk_read)(struct acpm_handle *handle, unsigned int acpm_chan_id,
+			 u8 type, u8 reg, u8 chan, u8 count, u8 *buf);
+	int (*write_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id,
+			 u8 type, u8 reg, u8 chan, u8 value);
+	int (*bulk_write)(struct acpm_handle *handle, unsigned int acpm_chan_id,
+			  u8 type, u8 reg, u8 chan, u8 count, const u8 *buf);
+	int (*update_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id,
+			  u8 type, u8 reg, u8 chan, u8 value, u8 mask);
 };
 
 struct acpm_ops {
@@ -56,12 +50,12 @@ struct acpm_handle {
 struct device;
 
 #if IS_ENABLED(CONFIG_EXYNOS_ACPM_PROTOCOL)
-const struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
-						struct device_node *np);
+struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
+					  struct device_node *np);
 #else
 
-static inline const struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
-							      struct device_node *np)
+static inline struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
+							struct device_node *np)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From 2a76a626670b2ef391da37f457e8e51f168432a6 Mon Sep 17 00:00:00 2001
From: Taha Ed-Dafili <0rayn.dev@gmail.com>
Date: Thu, 26 Feb 2026 15:11:03 +0000
Subject: iio: core: Add IIO_EV_INFO_SCALE to event info

Implement support for IIO_EV_INFO_SCALE in the internal enum
iio_event_info to allow proper ABI compliance.

This allows drivers (like the ADXL345) to expose event scale
attributes using the standard IIO ABI rather than manual
device attributes.

Signed-off-by: Taha Ed-Dafili <0rayn.dev@gmail.com>
Reviewed-by: David Lechner <dlechner@baylibre.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/industrialio-event.c | 1 +
 include/linux/iio/types.h        | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c
index 4149efcd5539..a0d6fcf2a9c9 100644
--- a/drivers/iio/industrialio-event.c
+++ b/drivers/iio/industrialio-event.c
@@ -256,6 +256,7 @@ static const char * const iio_ev_info_text[] = {
 	[IIO_EV_INFO_TAP2_MIN_DELAY] = "tap2_min_delay",
 	[IIO_EV_INFO_RUNNING_PERIOD] = "runningperiod",
 	[IIO_EV_INFO_RUNNING_COUNT] = "runningcount",
+	[IIO_EV_INFO_SCALE] = "scale",
 };
 
 static enum iio_event_direction iio_ev_attr_dir(struct iio_dev_attr *attr)
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index 34eebad12d2c..4e3099defc1d 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -21,6 +21,7 @@ enum iio_event_info {
 	IIO_EV_INFO_TAP2_MIN_DELAY,
 	IIO_EV_INFO_RUNNING_PERIOD,
 	IIO_EV_INFO_RUNNING_COUNT,
+	IIO_EV_INFO_SCALE,
 };
 
 #define IIO_VAL_INT 1
-- 
cgit v1.2.3


From d3b693a13b39bce16e284e1c737874966b3a96de Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 28 Feb 2026 17:47:43 -0800
Subject: spi: spi-mem: clean up kernel-doc in spi-mem.h

Eliminate all kernel-doc warnings in spi-mem.h:
- add missing struct member descriptions
- don't use "struct" for function descrptions

Warning: include/linux/spi/spi-mem.h:202 struct member 'cmd' not described
 in 'spi_mem_op'
Warning: include/linux/spi/spi-mem.h:202 struct member 'addr' not described
 in 'spi_mem_op'
Warning: include/linux/spi/spi-mem.h:202 struct member 'dummy' not
 described in 'spi_mem_op'
Warning: include/linux/spi/spi-mem.h:202 struct member 'data' not described
 in 'spi_mem_op'

Warning: include/linux/spi/spi-mem.h:286 Incorrect use of kernel-doc
 format: * struct spi_mem_get_drvdata() - get driver private data
 attached to a SPI mem
Warning: include/linux/spi/spi-mem.h:298 Incorrect use of kernel-doc
 format: * struct spi_controller_mem_ops - SPI memory operations
Warning: include/linux/spi/spi-mem.h:362 expecting prototype for struct
 spi_mem_set_drvdata. Prototype was for struct spi_controller_mem_ops instead

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260301014743.3133167-1-rdunlap@infradead.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi-mem.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 5774e554c0f0..37f709784350 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -130,11 +130,13 @@ enum spi_mem_data_dir {
 
 /**
  * struct spi_mem_op - describes a SPI memory operation
+ * @cmd: the complete command
  * @cmd.nbytes: number of opcode bytes (only 1 or 2 are valid). The opcode is
  *		sent MSB-first.
  * @cmd.buswidth: number of IO lines used to transmit the command
  * @cmd.opcode: operation opcode
  * @cmd.dtr: whether the command opcode should be sent in DTR mode or not
+ * @addr: the address attributes
  * @addr.nbytes: number of address bytes to send. Can be zero if the operation
  *		 does not need to send an address
  * @addr.buswidth: number of IO lines used to transmit the address cycles
@@ -143,10 +145,12 @@ enum spi_mem_data_dir {
  *	      Note that only @addr.nbytes are taken into account in this
  *	      address value, so users should make sure the value fits in the
  *	      assigned number of bytes.
+ * @dummy: data for dummy operation
  * @dummy.nbytes: number of dummy bytes to send after an opcode or address. Can
  *		  be zero if the operation does not require dummy bytes
  * @dummy.buswidth: number of IO lanes used to transmit the dummy bytes
  * @dummy.dtr: whether the dummy bytes should be sent in DTR mode or not
+ * @data: the data attributes
  * @data.buswidth: number of IO lanes used to send/receive the data
  * @data.dtr: whether the data should be sent in DTR mode or not
  * @data.ecc: whether error correction is required or not
@@ -273,7 +277,7 @@ struct spi_mem {
 };
 
 /**
- * struct spi_mem_set_drvdata() - attach driver private data to a SPI mem
+ * spi_mem_set_drvdata() - attach driver private data to a SPI mem
  *				  device
  * @mem: memory device
  * @data: data to attach to the memory device
@@ -284,7 +288,7 @@ static inline void spi_mem_set_drvdata(struct spi_mem *mem, void *data)
 }
 
 /**
- * struct spi_mem_get_drvdata() - get driver private data attached to a SPI mem
+ * spi_mem_get_drvdata() - get driver private data attached to a SPI mem
  *				  device
  * @mem: memory device
  *
-- 
cgit v1.2.3


From 73942a6ea26bd7e02b7c260b8b7aa942397be894 Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding@opensource.cirrus.com>
Date: Tue, 24 Feb 2026 16:18:05 +0000
Subject: firmware: cs_dsp: Add API to hibernate the DSP

For some parts, the DSP is kept running when in low power mode
(hibernation), leaving the firmware ALSA controls enabled, but the
registers are inaccessible. Attempts to access volatile firmware
controls whilst in this state would produce errors in the kernel log
due to a regmap_raw_read() into DSP registers whilst the regmap is in
cache_only.

To remove this error log, add a hibernating flag to indicate that the
controls are inaccessible, so we no longer try to read or write to the
registers whilst the regmap is in cache_only.

This would still produce an error when trying to read or write to these
controls, but this would be a different error (-EPERM instead of
-EBUSY), and would not produce a spurious error log in the kernel.

Upon wake from hibernation, the control caches are re-synced to the
hardware, if the DSP is running.

Signed-off-by: Stefan Binding <sbinding@opensource.cirrus.com>
Link: https://patch.msgid.link/20260224161821.93365-2-sbinding@opensource.cirrus.com
Reviewed-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/firmware/cirrus/cs_dsp.c       | 49 +++++++++++++++++++++++++++++++---
 include/linux/firmware/cirrus/cs_dsp.h |  3 +++
 2 files changed, 48 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c
index b4f1c01e3b5b..f9d8a883900d 100644
--- a/drivers/firmware/cirrus/cs_dsp.c
+++ b/drivers/firmware/cirrus/cs_dsp.c
@@ -515,6 +515,7 @@ void cs_dsp_init_debugfs(struct cs_dsp *dsp, struct dentry *debugfs_root)
 
 	debugfs_create_bool("booted", 0444, root, &dsp->booted);
 	debugfs_create_bool("running", 0444, root, &dsp->running);
+	debugfs_create_bool("hibernating", 0444, root, &dsp->hibernating);
 	debugfs_create_x32("fw_id", 0444, root, &dsp->fw_id);
 	debugfs_create_x32("fw_version", 0444, root, &dsp->fw_id_version);
 
@@ -703,7 +704,7 @@ int cs_dsp_coeff_write_acked_control(struct cs_dsp_coeff_ctl *ctl, unsigned int
 
 	lockdep_assert_held(&dsp->pwr_lock);
 
-	if (!dsp->running)
+	if (!dsp->running || dsp->hibernating)
 		return -EPERM;
 
 	ret = cs_dsp_coeff_base_reg(ctl, &reg, 0);
@@ -827,7 +828,7 @@ int cs_dsp_coeff_write_ctrl(struct cs_dsp_coeff_ctl *ctl,
 	}
 
 	ctl->set = 1;
-	if (ctl->enabled && ctl->dsp->running)
+	if (ctl->enabled && ctl->dsp->running && !ctl->dsp->hibernating)
 		ret = cs_dsp_coeff_write_ctrl_raw(ctl, off, buf, len);
 
 	if (ret < 0)
@@ -920,12 +921,12 @@ int cs_dsp_coeff_read_ctrl(struct cs_dsp_coeff_ctl *ctl,
 		return -EINVAL;
 
 	if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) {
-		if (ctl->enabled && ctl->dsp->running)
+		if (ctl->enabled && ctl->dsp->running && !ctl->dsp->hibernating)
 			return cs_dsp_coeff_read_ctrl_raw(ctl, off, buf, len);
 		else
 			return -EPERM;
 	} else {
-		if (!ctl->flags && ctl->enabled && ctl->dsp->running)
+		if (!ctl->flags && ctl->enabled && ctl->dsp->running && !ctl->dsp->hibernating)
 			ret = cs_dsp_coeff_read_ctrl_raw(ctl, 0, ctl->cache, ctl->len);
 
 		if (buf != ctl->cache)
@@ -1108,6 +1109,44 @@ err_ctl:
 	return ret;
 }
 
+
+/**
+ * cs_dsp_hibernate() - Disable or enable all controls for a DSP
+ * @dsp: pointer to DSP structure
+ * @hibernate: whether to set controls to cache only mode
+ *
+ * When @hibernate is true, the DSP is entering hibernation mode where the
+ * regmap is inaccessible, and all controls become cache only.
+ * When @hibernate is false, the DSP has exited hibernation mode. If the DSP
+ * is running, all controls are re-synced to the DSP.
+ *
+ */
+void cs_dsp_hibernate(struct cs_dsp *dsp, bool hibernate)
+{
+	mutex_lock(&dsp->pwr_lock);
+
+	if (!dsp->running) {
+		cs_dsp_dbg(dsp, "Cannot hibernate, DSP not running\n");
+		goto out;
+	}
+
+	if (dsp->hibernating == hibernate)
+		goto out;
+
+	cs_dsp_dbg(dsp, "Set hibernating to %d\n", hibernate);
+	dsp->hibernating = hibernate;
+
+	if (!dsp->hibernating && dsp->running) {
+		int ret = cs_dsp_coeff_sync_controls(dsp);
+
+		if (ret)
+			cs_dsp_err(dsp, "Error syncing controls: %d\n", ret);
+	}
+out:
+	mutex_unlock(&dsp->pwr_lock);
+}
+EXPORT_SYMBOL_NS_GPL(cs_dsp_hibernate, "FW_CS_DSP");
+
 struct cs_dsp_coeff_parsed_alg {
 	int id;
 	const u8 *name;
@@ -2498,6 +2537,7 @@ int cs_dsp_adsp1_power_up(struct cs_dsp *dsp,
 		goto err_ena;
 
 	dsp->booted = true;
+	dsp->hibernating = false;
 
 	/* Start the core running */
 	regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30,
@@ -2776,6 +2816,7 @@ int cs_dsp_power_up(struct cs_dsp *dsp,
 		dsp->ops->disable_core(dsp);
 
 	dsp->booted = true;
+	dsp->hibernating = false;
 
 	mutex_unlock(&dsp->pwr_lock);
 
diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h
index 0ec1cdc5585d..4e3baa557068 100644
--- a/include/linux/firmware/cirrus/cs_dsp.h
+++ b/include/linux/firmware/cirrus/cs_dsp.h
@@ -179,6 +179,7 @@ struct cs_dsp {
 
 	bool booted;
 	bool running;
+	bool hibernating;
 
 	struct list_head ctl_list;
 
@@ -354,4 +355,6 @@ int cs_dsp_chunk_write(struct cs_dsp_chunk *ch, int nbits, u32 val);
 int cs_dsp_chunk_flush(struct cs_dsp_chunk *ch);
 int cs_dsp_chunk_read(struct cs_dsp_chunk *ch, int nbits);
 
+void cs_dsp_hibernate(struct cs_dsp *dsp, bool hibernating);
+
 #endif
-- 
cgit v1.2.3


From bd77375097357b46af00db1316ceab5e82ccbc8b Mon Sep 17 00:00:00 2001
From: Kavita Kavita <kavita.kavita@oss.qualcomm.com>
Date: Fri, 27 Feb 2026 00:25:51 +0530
Subject: wifi: cfg80211: add support for IEEE 802.1X Authentication Protocol

Add an extended feature flag NL80211_EXT_FEATURE_IEEE8021X_AUTH to
allow a driver to indicate support for the IEEE 802.1X authentication
protocol in non-AP STA mode, as defined in
"IEEE P802.11bi/D4.0, 12.16.5".

In case of SME in userspace, the Authentication frame body is prepared
in userspace while the driver finalizes the Authentication frame once
it receives the required fields and elements. The driver indicates
support for IEEE 802.1X authentication using the extended feature flag
so that userspace can initiate IEEE 802.1X authentication.

When the feature flag is set, process IEEE 802.1X Authentication frames
from userspace in non-AP STA mode. If the flag is not set, reject
IEEE 802.1X Authentication frames.

Define a new authentication type NL80211_AUTHTYPE_IEEE8021X for
IEEE 802.1X authentication.

Signed-off-by: Kavita Kavita <kavita.kavita@oss.qualcomm.com>
Link: https://patch.msgid.link/20260226185553.1516290-4-kavita.kavita@oss.qualcomm.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h    |  1 +
 include/uapi/linux/nl80211.h |  9 +++++++++
 net/wireless/nl80211.c       | 14 ++++++++++++--
 3 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 0aa2fb8f88de..1bf806f85372 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1358,6 +1358,7 @@ struct ieee80211_tdls_data {
 #define WLAN_AUTH_FILS_SK 4
 #define WLAN_AUTH_FILS_SK_PFS 5
 #define WLAN_AUTH_FILS_PK 6
+#define WLAN_AUTH_IEEE8021X 8
 #define WLAN_AUTH_EPPKE 9
 #define WLAN_AUTH_LEAP 128
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index fe2c8c8d6dd6..0b7a06c2b9f7 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -5491,6 +5491,8 @@ enum nl80211_bss_status {
  * @NL80211_AUTHTYPE_FILS_SK_PFS: Fast Initial Link Setup shared key with PFS
  * @NL80211_AUTHTYPE_FILS_PK: Fast Initial Link Setup public key
  * @NL80211_AUTHTYPE_EPPKE: Enhanced Privacy Protection Key Exchange
+ * @NL80211_AUTHTYPE_IEEE8021X: IEEE 802.1X authentication utilizing
+ *	Authentication frames
  * @__NL80211_AUTHTYPE_NUM: internal
  * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm
  * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by
@@ -5507,6 +5509,7 @@ enum nl80211_auth_type {
 	NL80211_AUTHTYPE_FILS_SK_PFS,
 	NL80211_AUTHTYPE_FILS_PK,
 	NL80211_AUTHTYPE_EPPKE,
+	NL80211_AUTHTYPE_IEEE8021X,
 
 	/* keep last */
 	__NL80211_AUTHTYPE_NUM,
@@ -6820,6 +6823,11 @@ enum nl80211_feature_flags {
  *	frames in both non‑AP STA and AP mode as specified in
  *	"IEEE P802.11bi/D3.0, 12.16.6".
  *
+ * @NL80211_EXT_FEATURE_IEEE8021X_AUTH: Driver supports IEEE 802.1X
+ *	authentication utilizing Authentication frames with user space SME
+ *	(NL80211_CMD_AUTHENTICATE) in non-AP STA mode, as specified in
+ *	"IEEE P802.11bi/D4.0, 12.16.5".
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -6898,6 +6906,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_BEACON_RATE_EHT,
 	NL80211_EXT_FEATURE_EPPKE,
 	NL80211_EXT_FEATURE_ASSOC_FRAME_ENCRYPTION,
+	NL80211_EXT_FEATURE_IEEE8021X_AUTH,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f54b3cca6975..de7956dbe0a0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6550,6 +6550,10 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
 					     NL80211_EXT_FEATURE_EPPKE) &&
 		    auth_type == NL80211_AUTHTYPE_EPPKE)
 			return false;
+		if (!wiphy_ext_feature_isset(&rdev->wiphy,
+					     NL80211_EXT_FEATURE_IEEE8021X_AUTH) &&
+		    auth_type == NL80211_AUTHTYPE_IEEE8021X)
+			return false;
 		return true;
 	case NL80211_CMD_CONNECT:
 		if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) &&
@@ -6571,6 +6575,10 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
 					     NL80211_EXT_FEATURE_EPPKE) &&
 		    auth_type == NL80211_AUTHTYPE_EPPKE)
 			return false;
+		if (!wiphy_ext_feature_isset(&rdev->wiphy,
+					     NL80211_EXT_FEATURE_IEEE8021X_AUTH) &&
+		    auth_type == NL80211_AUTHTYPE_IEEE8021X)
+			return false;
 		return true;
 	case NL80211_CMD_START_AP:
 		if (!wiphy_ext_feature_isset(&rdev->wiphy,
@@ -12103,7 +12111,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 	     auth_type == NL80211_AUTHTYPE_FILS_SK ||
 	     auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
 	     auth_type == NL80211_AUTHTYPE_FILS_PK ||
-	     auth_type == NL80211_AUTHTYPE_EPPKE) &&
+	     auth_type == NL80211_AUTHTYPE_EPPKE ||
+	     auth_type == NL80211_AUTHTYPE_IEEE8021X) &&
 	    !info->attrs[NL80211_ATTR_AUTH_DATA])
 		return -EINVAL;
 
@@ -12112,7 +12121,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 		    auth_type != NL80211_AUTHTYPE_FILS_SK &&
 		    auth_type != NL80211_AUTHTYPE_FILS_SK_PFS &&
 		    auth_type != NL80211_AUTHTYPE_FILS_PK &&
-		    auth_type != NL80211_AUTHTYPE_EPPKE)
+		    auth_type != NL80211_AUTHTYPE_EPPKE &&
+		    auth_type != NL80211_AUTHTYPE_IEEE8021X)
 			return -EINVAL;
 		req.auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]);
 		req.auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]);
-- 
cgit v1.2.3


From 9347878b1513beee1a26bb249f5dc8326d450f75 Mon Sep 17 00:00:00 2001
From: Kavita Kavita <kavita.kavita@oss.qualcomm.com>
Date: Fri, 27 Feb 2026 00:25:52 +0530
Subject: wifi: mac80211: Add support for IEEE 802.1X authentication protocol
 in non-AP STA mode

Add support for the IEEE 802.1X authentication protocol in non-AP STA
mode, as specified in "IEEE P802.11bi/D4.0, 12.16.5".

IEEE 802.1X authentication involves multiple Authentication frame
exchanges, with the non-AP STA and AP alternating transaction
sequence numbers. The number of Authentication frame exchanges
depends on the EAP method in use. For IEEE 802.1X authentication,
process only Authentication frames with the expected transaction
sequence number.

For IEEE 802.1X Authentication, Table 9-71 specifies that the
Encapsulation Length field as specified in Clause 9.4.1.82 shall be
present in all IEEE 802.1X Authentication frames. Drop the frame in
the mac80211 if the Encapsulation Length field is missing.

After receiving the final Authentication frame with status code
WLAN_STATUS_8021X_AUTH_SUCCESS from the AP, mac80211 marks the state
as authenticated, as it indicates the EAP handshake has completed
successfully over the Authentication frames as specified in
Clause 12.16.5.

In the PMKSA caching case, only two Authentication frames are
exchanged if the AP identifies a valid PMKSA, then as specified
in Clause 12.16.8.3, the AP shall set the Status Code to
WLAN_STATUS_SUCCESS in the final Authentication frame and must not
include an encapsulated EAPOL PDU. This frame will be the final
Authentication frame from the AP when PMKSA caching is enabled,
and mac80211 marks the state as authenticated.

In case of authentication success or failure, forward the
Authentication frame to userspace(e.g. wpa_supplicant), and let
userspace validate the Authentication frame from the AP as per the
specification.

Signed-off-by: Kavita Kavita <kavita.kavita@oss.qualcomm.com>
Link: https://patch.msgid.link/20260226185553.1516290-5-kavita.kavita@oss.qualcomm.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h |  1 +
 net/mac80211/mlme.c       | 78 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 73 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 1bf806f85372..3651b2e6c518 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1508,6 +1508,7 @@ enum ieee80211_statuscode {
 	WLAN_STATUS_SAE_PK = 127,
 	WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING = 133,
 	WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED = 134,
+	WLAN_STATUS_8021X_AUTH_SUCCESS = 153,
 };
 
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 810bea1aacc5..7957eacc5ab7 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4920,7 +4920,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_mgmt *mgmt, size_t len)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	u16 auth_alg, auth_transaction, status_code;
+	u16 auth_alg, auth_transaction, status_code, encap_len;
 	struct ieee80211_event event = {
 		.type = MLME_EVENT,
 		.u.mlme.data = AUTH_EVENT,
@@ -4929,6 +4929,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 		.subtype = IEEE80211_STYPE_AUTH,
 	};
 	bool sae_need_confirm = false;
+	bool auth_fail = false;
 
 	lockdep_assert_wiphy(sdata->local->hw.wiphy);
 
@@ -4945,6 +4946,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 	auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
 	status_code = le16_to_cpu(mgmt->u.auth.status_code);
 
+	/*
+	 * IEEE 802.1X Authentication:
+	 * Header + Authentication Algorithm Number(2 byte) + Authentication
+	 * Transaction Sequence Number(2 byte) + Status Code(2 byte) +
+	 * Encapsulation Length(2 byte).
+	 */
+	if (auth_alg == WLAN_AUTH_IEEE8021X && len < 24 + 8)
+		return;
+
 	info.link_id = ifmgd->auth_data->link_id;
 
 	if (auth_alg != ifmgd->auth_data->algorithm ||
@@ -4960,7 +4970,24 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 		goto notify_driver;
 	}
 
-	if (status_code != WLAN_STATUS_SUCCESS) {
+	switch (auth_alg) {
+	case WLAN_AUTH_IEEE8021X:
+		if (status_code != WLAN_STATUS_SUCCESS &&
+		    status_code != WLAN_STATUS_8021X_AUTH_SUCCESS)
+			auth_fail = true;
+
+		if (!auth_fail) {
+			/* Indicates length of encapsulated EAPOL PDU */
+			encap_len = get_unaligned_le16(mgmt->u.auth.variable);
+		}
+		break;
+	default:
+		if (status_code != WLAN_STATUS_SUCCESS)
+			auth_fail = true;
+		break;
+	}
+
+	if (auth_fail) {
 		cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
 
 		if (auth_alg == WLAN_AUTH_SAE &&
@@ -4997,6 +5024,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 	case WLAN_AUTH_FILS_SK_PFS:
 	case WLAN_AUTH_FILS_PK:
 	case WLAN_AUTH_EPPKE:
+	case WLAN_AUTH_IEEE8021X:
 		break;
 	case WLAN_AUTH_SHARED_KEY:
 		if (ifmgd->auth_data->expected_transaction != 4) {
@@ -5017,8 +5045,37 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 	if (ifmgd->auth_data->algorithm != WLAN_AUTH_SAE ||
 	    (auth_transaction == 2 &&
 	     ifmgd->auth_data->expected_transaction == 2)) {
-		if (!ieee80211_mark_sta_auth(sdata))
-			return; /* ignore frame -- wait for timeout */
+		switch (ifmgd->auth_data->algorithm) {
+		case WLAN_AUTH_IEEE8021X:
+			/*
+			 * IEEE 802.1X authentication:
+			 * - When the full EAP handshake completes over the
+			 *   Authentication process, the responder sets the
+			 *   Status Code to WLAN_STATUS_8021X_AUTH_SUCCESS as
+			 *   specified in "IEEE P802.11bi/D4.0, 12.16.5".
+			 *
+			 * - In the PMKSA caching case, only two Authentication
+			 *   frames are exchanged if the responder (e.g., AP)
+			 *   identifies a valid PMKSA, then as specified in
+			 *   "IEEE P802.11bi/D4.0, 12.16.8.3", the responder
+			 *   shall set the Status Code to SUCCESS in the final
+			 *   Authentication frame and must not include an
+			 *   encapsulated EAPOL PDU.
+			 *
+			 * Both conditions are treated as successful
+			 * authentication, so mark the state to Authenticated.
+			 */
+			if (status_code != WLAN_STATUS_8021X_AUTH_SUCCESS &&
+			    !(status_code == WLAN_STATUS_SUCCESS &&
+			      encap_len == 0))
+				break;
+			fallthrough;
+		default:
+			if (!ieee80211_mark_sta_auth(sdata))
+				return; /* ignore frame -- wait for timeout */
+
+			break;
+		}
 	} else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
 		   auth_transaction == 1) {
 		sae_need_confirm = true;
@@ -8460,6 +8517,10 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
 	} else if (auth_data->algorithm == WLAN_AUTH_EPPKE) {
 		trans = auth_data->trans;
 		status = auth_data->status;
+	} else if (auth_data->algorithm == WLAN_AUTH_IEEE8021X) {
+		trans = auth_data->trans;
+		status = auth_data->status;
+		auth_data->expected_transaction = trans + 1;
 	}
 
 	if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
@@ -9117,7 +9178,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 		}
 
 		if (ifmgd->auth_data &&
-		    ifmgd->auth_data->algorithm == WLAN_AUTH_EPPKE)
+		    (ifmgd->auth_data->algorithm == WLAN_AUTH_EPPKE ||
+		     ifmgd->auth_data->algorithm == WLAN_AUTH_IEEE8021X))
 			new_sta->sta.epp_peer = true;
 
 		new_sta->sta.mlo = mlo;
@@ -9377,6 +9439,9 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	case NL80211_AUTHTYPE_EPPKE:
 		auth_alg = WLAN_AUTH_EPPKE;
 		break;
+	case NL80211_AUTHTYPE_IEEE8021X:
+		auth_alg = WLAN_AUTH_IEEE8021X;
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -9402,7 +9467,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 
 	if (req->auth_data_len >= 4) {
 		if (req->auth_type == NL80211_AUTHTYPE_SAE ||
-		    req->auth_type == NL80211_AUTHTYPE_EPPKE) {
+		    req->auth_type == NL80211_AUTHTYPE_EPPKE ||
+		    req->auth_type == NL80211_AUTHTYPE_IEEE8021X) {
 			__le16 *pos = (__le16 *) req->auth_data;
 
 			auth_data->trans = le16_to_cpu(pos[0]);
-- 
cgit v1.2.3


From 25ab7b6f34c74ea555b4489b57f7219612991433 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 16 Feb 2026 14:32:02 +0100
Subject: xattr: remove rbtree-based simple_xattr infrastructure

Now that all consumers (shmem, kernfs, pidfs) have been converted to
use the rhashtable-based simple_xattrs with pointer-based lazy
allocation, remove the legacy rbtree code path. The rhashtable
implementation provides O(1) average-case lookup with RCU-based lockless
reads, replacing the O(log n) rbtree with reader-writer spinlock
contention.

Link: https://patch.msgid.link/20260216-work-xattr-socket-v1-6-c2efa4f74cb7@kernel.org
Acked-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xattr.c            | 387 +++++++++++++-------------------------------------
 include/linux/xattr.h |  12 +-
 2 files changed, 103 insertions(+), 296 deletions(-)

(limited to 'include/linux')

diff --git a/fs/xattr.c b/fs/xattr.c
index eb45ae0fd17f..64803097e1dc 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1200,20 +1200,18 @@ void simple_xattr_free(struct simple_xattr *xattr)
 
 static void simple_xattr_rcu_free(struct rcu_head *head)
 {
-	struct simple_xattr *xattr;
+	struct simple_xattr *xattr = container_of(head, struct simple_xattr, rcu);
 
-	xattr = container_of(head, struct simple_xattr, rcu);
 	simple_xattr_free(xattr);
 }
 
 /**
- * simple_xattr_free_rcu - free an xattr object after an RCU grace period
+ * simple_xattr_free_rcu - free an xattr object with RCU delay
  * @xattr: the xattr object
  *
- * Schedule RCU-deferred freeing of an xattr entry. This is used by
- * rhashtable-based callers of simple_xattr_set() that replace or remove
- * an existing entry while concurrent RCU readers may still be accessing
- * it.
+ * Free the xattr object after an RCU grace period. This must be used when
+ * the xattr was removed from a data structure that concurrent RCU readers
+ * may still be traversing. Can handle @xattr being NULL.
  */
 void simple_xattr_free_rcu(struct simple_xattr *xattr)
 {
@@ -1254,43 +1252,6 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
 	return new_xattr;
 }
 
-/**
- * rbtree_simple_xattr_cmp - compare xattr name with current rbtree xattr entry
- * @key: xattr name
- * @node: current node
- *
- * Compare the xattr name with the xattr name attached to @node in the rbtree.
- *
- * Return: Negative value if continuing left, positive if continuing right, 0
- * if the xattr attached to @node matches @key.
- */
-static int rbtree_simple_xattr_cmp(const void *key, const struct rb_node *node)
-{
-	const char *xattr_name = key;
-	const struct simple_xattr *xattr;
-
-	xattr = rb_entry(node, struct simple_xattr, rb_node);
-	return strcmp(xattr->name, xattr_name);
-}
-
-/**
- * rbtree_simple_xattr_node_cmp - compare two xattr rbtree nodes
- * @new_node: new node
- * @node: current node
- *
- * Compare the xattr attached to @new_node with the xattr attached to @node.
- *
- * Return: Negative value if continuing left, positive if continuing right, 0
- * if the xattr attached to @new_node matches the xattr attached to @node.
- */
-static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node,
-					const struct rb_node *node)
-{
-	struct simple_xattr *xattr;
-	xattr = rb_entry(new_node, struct simple_xattr, rb_node);
-	return rbtree_simple_xattr_cmp(xattr->name, node);
-}
-
 static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed)
 {
 	const char *name = data;
@@ -1336,41 +1297,19 @@ static const struct rhashtable_params simple_xattr_params = {
 int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
 		     void *buffer, size_t size)
 {
-	struct simple_xattr *xattr = NULL;
+	struct simple_xattr *xattr;
 	int ret = -ENODATA;
 
-	if (xattrs->use_rhashtable) {
-		guard(rcu)();
-		xattr = rhashtable_lookup(&xattrs->ht, name,
-					  simple_xattr_params);
-		if (xattr) {
-			ret = xattr->size;
-			if (buffer) {
-				if (size < xattr->size)
-					ret = -ERANGE;
-				else
-					memcpy(buffer, xattr->value,
-					       xattr->size);
-			}
-		}
-	} else {
-		struct rb_node *rbp;
-
-		read_lock(&xattrs->lock);
-		rbp = rb_find(name, &xattrs->rb_root,
-			      rbtree_simple_xattr_cmp);
-		if (rbp) {
-			xattr = rb_entry(rbp, struct simple_xattr, rb_node);
-			ret = xattr->size;
-			if (buffer) {
-				if (size < xattr->size)
-					ret = -ERANGE;
-				else
-					memcpy(buffer, xattr->value,
-					       xattr->size);
-			}
+	guard(rcu)();
+	xattr = rhashtable_lookup(&xattrs->ht, name, simple_xattr_params);
+	if (xattr) {
+		ret = xattr->size;
+		if (buffer) {
+			if (size < xattr->size)
+				ret = -ERANGE;
+			else
+				memcpy(buffer, xattr->value, xattr->size);
 		}
-		read_unlock(&xattrs->lock);
 	}
 	return ret;
 }
@@ -1398,6 +1337,11 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
  * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
  * XATTR_REPLACE we fail as mentioned above.
  *
+ * Note: Callers must externally serialize writes. All current callers hold
+ * the inode lock for write operations. The lookup->replace/remove sequence
+ * is not atomic with respect to the rhashtable's per-bucket locking, but
+ * is safe because writes are serialized by the caller.
+ *
  * Return: On success, the removed or replaced xattr is returned, to be freed
  * by the caller; or NULL if none. On failure a negative error code is returned.
  */
@@ -1406,7 +1350,7 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
 				      size_t size, int flags)
 {
 	struct simple_xattr *old_xattr = NULL;
-	int err = 0;
+	int err;
 
 	CLASS(simple_xattr, new_xattr)(value, size);
 	if (IS_ERR(new_xattr))
@@ -1418,119 +1362,52 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
 			return ERR_PTR(-ENOMEM);
 	}
 
-	if (xattrs->use_rhashtable) {
-		/*
-		 * Lookup is safe without RCU here since writes are
-		 * serialized by the caller.
-		 */
-		old_xattr = rhashtable_lookup_fast(&xattrs->ht, name,
-						   simple_xattr_params);
-
-		if (old_xattr) {
-			/* Fail if XATTR_CREATE is requested and the xattr exists. */
-			if (flags & XATTR_CREATE)
-				return ERR_PTR(-EEXIST);
-
-			if (new_xattr) {
-				err = rhashtable_replace_fast(&xattrs->ht,
-							     &old_xattr->hash_node,
-							     &new_xattr->hash_node,
-							     simple_xattr_params);
-				if (err)
-					return ERR_PTR(err);
-			} else {
-				err = rhashtable_remove_fast(&xattrs->ht,
-							    &old_xattr->hash_node,
-							    simple_xattr_params);
-				if (err)
-					return ERR_PTR(err);
-			}
-		} else {
-			/* Fail if XATTR_REPLACE is requested but no xattr is found. */
-			if (flags & XATTR_REPLACE)
-				return ERR_PTR(-ENODATA);
-
-			/*
-			 * If XATTR_CREATE or no flags are specified together
-			 * with a new value simply insert it.
-			 */
-			if (new_xattr) {
-				err = rhashtable_insert_fast(&xattrs->ht,
-							    &new_xattr->hash_node,
-							    simple_xattr_params);
-				if (err)
-					return ERR_PTR(err);
-			}
-
-			/*
-			 * If XATTR_CREATE or no flags are specified and
-			 * neither an old or new xattr exist then we don't
-			 * need to do anything.
-			 */
-		}
-	} else {
-		struct rb_node *parent = NULL, **rbp;
-		int ret;
-
-		write_lock(&xattrs->lock);
-		rbp = &xattrs->rb_root.rb_node;
-		while (*rbp) {
-			parent = *rbp;
-			ret = rbtree_simple_xattr_cmp(name, *rbp);
-			if (ret < 0)
-				rbp = &(*rbp)->rb_left;
-			else if (ret > 0)
-				rbp = &(*rbp)->rb_right;
-			else
-				old_xattr = rb_entry(*rbp, struct simple_xattr,
-						     rb_node);
-			if (old_xattr)
-				break;
-		}
+	/* Lookup is safe without RCU here since writes are serialized. */
+	old_xattr = rhashtable_lookup_fast(&xattrs->ht, name,
+					   simple_xattr_params);
 
-		if (old_xattr) {
-			/* Fail if XATTR_CREATE is requested and the xattr exists. */
-			if (flags & XATTR_CREATE) {
-				err = -EEXIST;
-				goto out_unlock;
-			}
+	if (old_xattr) {
+		/* Fail if XATTR_CREATE is requested and the xattr exists. */
+		if (flags & XATTR_CREATE)
+			return ERR_PTR(-EEXIST);
 
-			if (new_xattr)
-				rb_replace_node(&old_xattr->rb_node,
-						&new_xattr->rb_node,
-						&xattrs->rb_root);
-			else
-				rb_erase(&old_xattr->rb_node,
-					 &xattrs->rb_root);
+		if (new_xattr) {
+			err = rhashtable_replace_fast(&xattrs->ht,
+						      &old_xattr->hash_node,
+						      &new_xattr->hash_node,
+						      simple_xattr_params);
+			if (err)
+				return ERR_PTR(err);
 		} else {
-			/* Fail if XATTR_REPLACE is requested but no xattr is found. */
-			if (flags & XATTR_REPLACE) {
-				err = -ENODATA;
-				goto out_unlock;
-			}
-
-			/*
-			 * If XATTR_CREATE or no flags are specified together
-			 * with a new value simply insert it.
-			 */
-			if (new_xattr) {
-				rb_link_node(&new_xattr->rb_node, parent, rbp);
-				rb_insert_color(&new_xattr->rb_node,
-						&xattrs->rb_root);
-			}
+			err = rhashtable_remove_fast(&xattrs->ht,
+						     &old_xattr->hash_node,
+						     simple_xattr_params);
+			if (err)
+				return ERR_PTR(err);
+		}
+	} else {
+		/* Fail if XATTR_REPLACE is requested but no xattr is found. */
+		if (flags & XATTR_REPLACE)
+			return ERR_PTR(-ENODATA);
 
-			/*
-			 * If XATTR_CREATE or no flags are specified and
-			 * neither an old or new xattr exist then we don't
-			 * need to do anything.
-			 */
+		/*
+		 * If XATTR_CREATE or no flags are specified together with a
+		 * new value simply insert it.
+		 */
+		if (new_xattr) {
+			err = rhashtable_insert_fast(&xattrs->ht,
+						     &new_xattr->hash_node,
+						     simple_xattr_params);
+			if (err)
+				return ERR_PTR(err);
 		}
 
-out_unlock:
-		write_unlock(&xattrs->lock);
-		if (err)
-			return ERR_PTR(err);
+		/*
+		 * If XATTR_CREATE or no flags are specified and neither an
+		 * old or new xattr exist then we don't need to do anything.
+		 */
 	}
+
 	retain_and_null_ptr(new_xattr);
 	return old_xattr;
 }
@@ -1572,6 +1449,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
 			  char *buffer, size_t size)
 {
 	bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+	struct rhashtable_iter iter;
 	struct simple_xattr *xattr;
 	ssize_t remaining_size = size;
 	int err = 0;
@@ -1595,77 +1473,34 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
 	if (!xattrs)
 		return size - remaining_size;
 
-	if (xattrs->use_rhashtable) {
-		struct rhashtable_iter iter;
-
-		rhashtable_walk_enter(&xattrs->ht, &iter);
-		rhashtable_walk_start(&iter);
-
-		while ((xattr = rhashtable_walk_next(&iter)) != NULL) {
-			if (IS_ERR(xattr)) {
-				if (PTR_ERR(xattr) == -EAGAIN)
-					continue;
-				err = PTR_ERR(xattr);
-				break;
-			}
-
-			/* skip "trusted." attributes for unprivileged callers */
-			if (!trusted && xattr_is_trusted(xattr->name))
-				continue;
+	rhashtable_walk_enter(&xattrs->ht, &iter);
+	rhashtable_walk_start(&iter);
 
-			/* skip MAC labels; these are provided by LSM above */
-			if (xattr_is_maclabel(xattr->name))
+	while ((xattr = rhashtable_walk_next(&iter)) != NULL) {
+		if (IS_ERR(xattr)) {
+			if (PTR_ERR(xattr) == -EAGAIN)
 				continue;
-
-			err = xattr_list_one(&buffer, &remaining_size,
-					     xattr->name);
-			if (err)
-				break;
+			err = PTR_ERR(xattr);
+			break;
 		}
 
-		rhashtable_walk_stop(&iter);
-		rhashtable_walk_exit(&iter);
-	} else {
-		struct rb_node *rbp;
-
-		read_lock(&xattrs->lock);
-		for (rbp = rb_first(&xattrs->rb_root); rbp;
-		     rbp = rb_next(rbp)) {
-			xattr = rb_entry(rbp, struct simple_xattr, rb_node);
-
-			/* skip "trusted." attributes for unprivileged callers */
-			if (!trusted && xattr_is_trusted(xattr->name))
-				continue;
+		/* skip "trusted." attributes for unprivileged callers */
+		if (!trusted && xattr_is_trusted(xattr->name))
+			continue;
 
-			/* skip MAC labels; these are provided by LSM above */
-			if (xattr_is_maclabel(xattr->name))
-				continue;
+		/* skip MAC labels; these are provided by LSM above */
+		if (xattr_is_maclabel(xattr->name))
+			continue;
 
-			err = xattr_list_one(&buffer, &remaining_size,
-					     xattr->name);
-			if (err)
-				break;
-		}
-		read_unlock(&xattrs->lock);
+		err = xattr_list_one(&buffer, &remaining_size, xattr->name);
+		if (err)
+			break;
 	}
 
-	return err ? err : size - remaining_size;
-}
+	rhashtable_walk_stop(&iter);
+	rhashtable_walk_exit(&iter);
 
-/**
- * rbtree_simple_xattr_less - compare two xattr rbtree nodes
- * @new_node: new node
- * @node: current node
- *
- * Compare the xattr attached to @new_node with the xattr attached to @node.
- * Note that this function technically tolerates duplicate entries.
- *
- * Return: True if insertion point in the rbtree is found.
- */
-static bool rbtree_simple_xattr_less(struct rb_node *new_node,
-				     const struct rb_node *node)
-{
-	return rbtree_simple_xattr_node_cmp(new_node, node) < 0;
+	return err ? err : size - remaining_size;
 }
 
 /**
@@ -1676,33 +1511,29 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node,
  * Add an xattr object to @xattrs. This assumes no replacement or removal
  * of matching xattrs is wanted. Should only be called during inode
  * initialization when a few distinct initial xattrs are supposed to be set.
+ *
+ * Return: On success zero is returned. On failure a negative error code is
+ * returned.
  */
 int simple_xattr_add(struct simple_xattrs *xattrs,
 		     struct simple_xattr *new_xattr)
 {
-	if (xattrs->use_rhashtable)
-		return rhashtable_insert_fast(&xattrs->ht,
-					      &new_xattr->hash_node,
-					      simple_xattr_params);
-
-	write_lock(&xattrs->lock);
-	rb_add(&new_xattr->rb_node, &xattrs->rb_root,
-	       rbtree_simple_xattr_less);
-	write_unlock(&xattrs->lock);
-	return 0;
+	return rhashtable_insert_fast(&xattrs->ht, &new_xattr->hash_node,
+				      simple_xattr_params);
 }
 
 /**
  * simple_xattrs_init - initialize new xattr header
  * @xattrs: header to initialize
  *
- * Initialize relevant fields of a an xattr header.
+ * Initialize the rhashtable used to store xattr objects.
+ *
+ * Return: On success zero is returned. On failure a negative error code is
+ * returned.
  */
-void simple_xattrs_init(struct simple_xattrs *xattrs)
+int simple_xattrs_init(struct simple_xattrs *xattrs)
 {
-	xattrs->use_rhashtable = false;
-	xattrs->rb_root = RB_ROOT;
-	rwlock_init(&xattrs->lock);
+	return rhashtable_init(&xattrs->ht, &simple_xattr_params);
 }
 
 /**
@@ -1710,7 +1541,8 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
  *
  * Dynamically allocate a simple_xattrs header and initialize the
  * underlying rhashtable. This is intended for consumers that want
- * rhashtable-based xattr storage.
+ * to lazily allocate xattr storage only when the first xattr is set,
+ * avoiding the per-inode rhashtable overhead when no xattrs are used.
  *
  * Return: On success a new simple_xattrs is returned. On failure an
  * ERR_PTR is returned.
@@ -1718,14 +1550,15 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
 struct simple_xattrs *simple_xattrs_alloc(void)
 {
 	struct simple_xattrs *xattrs __free(kfree) = NULL;
+	int ret;
 
 	xattrs = kzalloc(sizeof(*xattrs), GFP_KERNEL);
 	if (!xattrs)
 		return ERR_PTR(-ENOMEM);
 
-	xattrs->use_rhashtable = true;
-	if (rhashtable_init(&xattrs->ht, &simple_xattr_params))
-		return ERR_PTR(-ENOMEM);
+	ret = simple_xattrs_init(xattrs);
+	if (ret)
+		return ERR_PTR(ret);
 
 	return no_free_ptr(xattrs);
 }
@@ -1784,28 +1617,10 @@ static void simple_xattr_ht_free(void *ptr, void *arg)
  */
 void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
 {
+	might_sleep();
+
 	if (freed_space)
 		*freed_space = 0;
-
-	if (xattrs->use_rhashtable) {
-		rhashtable_free_and_destroy(&xattrs->ht,
-					    simple_xattr_ht_free, freed_space);
-	} else {
-		struct rb_node *rbp;
-
-		rbp = rb_first(&xattrs->rb_root);
-		while (rbp) {
-			struct simple_xattr *xattr;
-			struct rb_node *rbp_next;
-
-			rbp_next = rb_next(rbp);
-			xattr = rb_entry(rbp, struct simple_xattr, rb_node);
-			rb_erase(&xattr->rb_node, &xattrs->rb_root);
-			if (freed_space)
-				*freed_space += simple_xattr_space(xattr->name,
-								   xattr->size);
-			simple_xattr_free(xattr);
-			rbp = rbp_next;
-		}
-	}
+	rhashtable_free_and_destroy(&xattrs->ht, simple_xattr_ht_free,
+				    freed_space);
 }
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 6e619e185e90..3b5a5fd684eb 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -107,18 +107,10 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler)
 }
 
 struct simple_xattrs {
-	bool use_rhashtable;
-	union {
-		struct {
-			struct rb_root rb_root;
-			rwlock_t lock;
-		};
-		struct rhashtable ht;
-	};
+	struct rhashtable ht;
 };
 
 struct simple_xattr {
-	struct rb_node rb_node;
 	struct rhash_head hash_node;
 	struct rcu_head rcu;
 	char *name;
@@ -126,7 +118,7 @@ struct simple_xattr {
 	char value[] __counted_by(size);
 };
 
-void simple_xattrs_init(struct simple_xattrs *xattrs);
+int simple_xattrs_init(struct simple_xattrs *xattrs);
 struct simple_xattrs *simple_xattrs_alloc(void);
 struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
 					       const void *value, int flags);
-- 
cgit v1.2.3


From 4fbe9e78bb415dd632ff63a9f620af0be58ef820 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 16 Feb 2026 14:32:05 +0100
Subject: xattr: move user limits for xattrs to generic infra

Link: https://patch.msgid.link/20260216-work-xattr-socket-v1-9-c2efa4f74cb7@kernel.org
Acked-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/kernfs/inode.c           | 75 ++-------------------------------------------
 fs/kernfs/kernfs-internal.h |  3 +-
 fs/xattr.c                  | 65 +++++++++++++++++++++++++++++++++++++++
 include/linux/kernfs.h      |  2 --
 include/linux/xattr.h       | 18 +++++++++++
 5 files changed, 87 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index dfc3315b5afc..1de10500842d 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -45,8 +45,7 @@ static struct kernfs_iattrs *__kernfs_iattrs(struct kernfs_node *kn, bool alloc)
 	ret->ia_mtime = ret->ia_atime;
 	ret->ia_ctime = ret->ia_atime;
 
-	atomic_set(&ret->nr_user_xattrs, 0);
-	atomic_set(&ret->user_xattr_size, 0);
+	simple_xattr_limits_init(&ret->xattr_limits);
 
 	/* If someone raced us, recognize it. */
 	if (!try_cmpxchg(&kn->iattr, &attr, ret))
@@ -355,69 +354,6 @@ static int kernfs_vfs_xattr_set(const struct xattr_handler *handler,
 	return kernfs_xattr_set(kn, name, value, size, flags);
 }
 
-static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn,
-				     const char *full_name,
-				     struct simple_xattrs *xattrs,
-				     const void *value, size_t size, int flags)
-{
-	struct kernfs_iattrs *attr = kernfs_iattrs_noalloc(kn);
-	atomic_t *sz = &attr->user_xattr_size;
-	atomic_t *nr = &attr->nr_user_xattrs;
-	struct simple_xattr *old_xattr;
-	int ret;
-
-	if (atomic_inc_return(nr) > KERNFS_MAX_USER_XATTRS) {
-		ret = -ENOSPC;
-		goto dec_count_out;
-	}
-
-	if (atomic_add_return(size, sz) > KERNFS_USER_XATTR_SIZE_LIMIT) {
-		ret = -ENOSPC;
-		goto dec_size_out;
-	}
-
-	old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags);
-	if (!old_xattr)
-		return 0;
-
-	if (IS_ERR(old_xattr)) {
-		ret = PTR_ERR(old_xattr);
-		goto dec_size_out;
-	}
-
-	ret = 0;
-	size = old_xattr->size;
-	simple_xattr_free_rcu(old_xattr);
-dec_size_out:
-	atomic_sub(size, sz);
-dec_count_out:
-	atomic_dec(nr);
-	return ret;
-}
-
-static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn,
-				    const char *full_name,
-				    struct simple_xattrs *xattrs,
-				    const void *value, size_t size, int flags)
-{
-	struct kernfs_iattrs *attr = kernfs_iattrs_noalloc(kn);
-	atomic_t *sz = &attr->user_xattr_size;
-	atomic_t *nr = &attr->nr_user_xattrs;
-	struct simple_xattr *old_xattr;
-
-	old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags);
-	if (!old_xattr)
-		return 0;
-
-	if (IS_ERR(old_xattr))
-		return PTR_ERR(old_xattr);
-
-	atomic_sub(old_xattr->size, sz);
-	atomic_dec(nr);
-	simple_xattr_free_rcu(old_xattr);
-	return 0;
-}
-
 static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
 				     struct mnt_idmap *idmap,
 				     struct dentry *unused, struct inode *inode,
@@ -440,13 +376,8 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
 	if (IS_ERR_OR_NULL(xattrs))
 		return PTR_ERR(xattrs);
 
-	if (value)
-		return kernfs_vfs_user_xattr_add(kn, full_name, xattrs,
-						 value, size, flags);
-	else
-		return kernfs_vfs_user_xattr_rm(kn, full_name, xattrs,
-						value, size, flags);
-
+	return simple_xattr_set_limited(xattrs, &attrs->xattr_limits,
+					full_name, value, size, flags);
 }
 
 static const struct xattr_handler kernfs_trusted_xattr_handler = {
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 1324ed8c0661..1d3831e3a270 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -27,8 +27,7 @@ struct kernfs_iattrs {
 	struct timespec64	ia_ctime;
 
 	struct simple_xattrs	*xattrs;
-	atomic_t		nr_user_xattrs;
-	atomic_t		user_xattr_size;
+	struct simple_xattr_limits xattr_limits;
 };
 
 struct kernfs_root {
diff --git a/fs/xattr.c b/fs/xattr.c
index 328ed7558dfc..5e559b1c651f 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1427,6 +1427,71 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
 	return old_xattr;
 }
 
+static inline void simple_xattr_limits_dec(struct simple_xattr_limits *limits,
+					   size_t size)
+{
+	atomic_sub(size, &limits->xattr_size);
+	atomic_dec(&limits->nr_xattrs);
+}
+
+static inline int simple_xattr_limits_inc(struct simple_xattr_limits *limits,
+					  size_t size)
+{
+	if (atomic_inc_return(&limits->nr_xattrs) > SIMPLE_XATTR_MAX_NR) {
+		atomic_dec(&limits->nr_xattrs);
+		return -ENOSPC;
+	}
+
+	if (atomic_add_return(size, &limits->xattr_size) <= SIMPLE_XATTR_MAX_SIZE)
+		return 0;
+
+	simple_xattr_limits_dec(limits, size);
+	return -ENOSPC;
+}
+
+/**
+ * simple_xattr_set_limited - set an xattr with per-inode user.* limits
+ * @xattrs: the header of the xattr object
+ * @limits: per-inode limit counters for user.* xattrs
+ * @name: the name of the xattr to set or remove
+ * @value: the value to store (NULL to remove)
+ * @size: the size of @value
+ * @flags: XATTR_CREATE, XATTR_REPLACE, or 0
+ *
+ * Like simple_xattr_set(), but enforces per-inode count and total value size
+ * limits for user.* xattrs. Uses speculative pre-increment of the atomic
+ * counters to avoid races without requiring external locks.
+ *
+ * Return: On success zero is returned. On failure a negative error code is
+ * returned.
+ */
+int simple_xattr_set_limited(struct simple_xattrs *xattrs,
+			     struct simple_xattr_limits *limits,
+			     const char *name, const void *value,
+			     size_t size, int flags)
+{
+	struct simple_xattr *old_xattr;
+	int ret;
+
+	if (value) {
+		ret = simple_xattr_limits_inc(limits, size);
+		if (ret)
+			return ret;
+	}
+
+	old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
+	if (IS_ERR(old_xattr)) {
+		if (value)
+			simple_xattr_limits_dec(limits, size);
+		return PTR_ERR(old_xattr);
+	}
+	if (old_xattr) {
+		simple_xattr_limits_dec(limits, old_xattr->size);
+		simple_xattr_free_rcu(old_xattr);
+	}
+	return 0;
+}
+
 static bool xattr_is_trusted(const char *name)
 {
 	return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index b5a5f32fdfd1..d8f57f0af5e4 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -99,8 +99,6 @@ enum kernfs_node_type {
 
 #define KERNFS_TYPE_MASK		0x000f
 #define KERNFS_FLAG_MASK		~KERNFS_TYPE_MASK
-#define KERNFS_MAX_USER_XATTRS		128
-#define KERNFS_USER_XATTR_SIZE_LIMIT	(128 << 10)
 
 enum kernfs_node_flag {
 	KERNFS_ACTIVATED	= 0x0010,
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 3b5a5fd684eb..8b6601367eae 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -118,6 +118,20 @@ struct simple_xattr {
 	char value[] __counted_by(size);
 };
 
+#define SIMPLE_XATTR_MAX_NR		128
+#define SIMPLE_XATTR_MAX_SIZE		(128 << 10)
+
+struct simple_xattr_limits {
+	atomic_t	nr_xattrs;	/* current user.* xattr count */
+	atomic_t	xattr_size;	/* current total user.* value bytes */
+};
+
+static inline void simple_xattr_limits_init(struct simple_xattr_limits *limits)
+{
+	atomic_set(&limits->nr_xattrs, 0);
+	atomic_set(&limits->xattr_size, 0);
+}
+
 int simple_xattrs_init(struct simple_xattrs *xattrs);
 struct simple_xattrs *simple_xattrs_alloc(void);
 struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
@@ -132,6 +146,10 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
 struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
 				      const char *name, const void *value,
 				      size_t size, int flags);
+int simple_xattr_set_limited(struct simple_xattrs *xattrs,
+			     struct simple_xattr_limits *limits,
+			     const char *name, const void *value,
+			     size_t size, int flags);
 ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
 			  char *buffer, size_t size);
 int simple_xattr_add(struct simple_xattrs *xattrs,
-- 
cgit v1.2.3


From cc2f5e2aeb6c69556837e45756b3ddded98b3898 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 28 Feb 2026 17:48:02 -0800
Subject: pinctrl: pinconf-generic: fix an enum name description

Correct an enum name in a kernel-doc comment to avoid kernel-doc
warnings:

Warning: include/linux/pinctrl/pinconf-generic.h:161 Enum value
 'PIN_CONFIG_SKEW_DELAY_OUTPUT_PS' not described in enum 'pin_config_param'
Warning: include/linux/pinctrl/pinconf-generic.h:161 Excess enum value
 '@PIN_CONFIG_SKEW_DELAY_OUPUT_PS' description in 'pin_config_param'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Linus Walleij <linusw@kernel.org>
---
 include/linux/pinctrl/pinconf-generic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 89277808ea61..531dc3e9b3f7 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -115,7 +115,7 @@ struct pinctrl_map;
  * @PIN_CONFIG_SKEW_DELAY_INPUT_PS: if the pin has independent values for the
  *	programmable skew rate (on inputs) and latch delay (on outputs), then
  *	this parameter specifies the clock skew only. The argument is in ps.
- * @PIN_CONFIG_SKEW_DELAY_OUPUT_PS: if the pin has independent values for the
+ * @PIN_CONFIG_SKEW_DELAY_OUTPUT_PS: if the pin has independent values for the
  *	programmable skew rate (on inputs) and latch delay (on outputs), then
  *	this parameter specifies the latch delay only. The argument is in ps.
  * @PIN_CONFIG_SLEEP_HARDWARE_STATE: indicate this is sleep related state.
-- 
cgit v1.2.3


From 94798081732abfb5748471d5c3cced6ff187fa36 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 13 Feb 2026 18:52:43 -0800
Subject: driver core: platform: add kerneldoc to struct platform_device_info

Add kernel documentation for struct platform_device_info and its
individual members. While at it remove an extra indent level from the
structure definition.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Link: https://patch.msgid.link/20260214025246.2095239-2-dmitry.torokhov@gmail.com
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 include/linux/platform_device.h | 53 ++++++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 813da101b5bf..5f54217930e1 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -118,22 +118,53 @@ extern int platform_get_irq_byname_optional(struct platform_device *dev,
 					    const char *name);
 extern int platform_add_devices(struct platform_device **, int);
 
+/**
+ * struct platform_device_info - set of parameters for creating a platform device
+ * @parent: parent device for the new platform device.
+ * @fwnode: firmware node associated with the device.
+ * @of_node_reused: indicates that device tree node associated with the device
+ *	is shared with another device, typically its ancestor. Setting this to
+ *	%true prevents the device from being matched via the OF match table,
+ *	and stops the device core from automatically binding pinctrl
+ *	configuration to avoid disrupting the other device.
+ * @name: name of the device.
+ * @id: instance ID of the device. Use %PLATFORM_DEVID_NONE if there is only
+ *	one instance of the device, or %PLATFORM_DEVID_AUTO to let the
+ *	kernel automatically assign a unique instance ID.
+ * @res: set of resources to attach to the device.
+ * @num_res: number of entries in @res.
+ * @data: device-specific data for this platform device.
+ * @size_data: size of device-specific data.
+ * @dma_mask: DMA mask for the device.
+ * @properties: a set of software properties for the device. If provided,
+ *	a managed software node will be automatically created and
+ *	assigned to the device. The properties array must be terminated
+ *	with a sentinel entry.
+ *
+ * This structure is used to hold information needed to create and register
+ * a platform device using platform_device_register_full().
+ *
+ * platform_device_register_full() makes deep copies of @name, @res, @data and
+ * @properties, so the caller does not need to keep them after registration.
+ * If the registration is performed during initialization, these can be marked
+ * as __initconst.
+ */
 struct platform_device_info {
-		struct device *parent;
-		struct fwnode_handle *fwnode;
-		bool of_node_reused;
+	struct device *parent;
+	struct fwnode_handle *fwnode;
+	bool of_node_reused;
 
-		const char *name;
-		int id;
+	const char *name;
+	int id;
 
-		const struct resource *res;
-		unsigned int num_res;
+	const struct resource *res;
+	unsigned int num_res;
 
-		const void *data;
-		size_t size_data;
-		u64 dma_mask;
+	const void *data;
+	size_t size_data;
+	u64 dma_mask;
 
-		const struct property_entry *properties;
+	const struct property_entry *properties;
 };
 extern struct platform_device *platform_device_register_full(
 		const struct platform_device_info *pdevinfo);
-- 
cgit v1.2.3


From 0fc434bc2c45fceb9356f2138911db0f454b8ca6 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 13 Feb 2026 18:52:44 -0800
Subject: driver core: platform: allow attaching software nodes when creating
 devices

Extend platform_device_info structure with an optional pointer to a
software node to be used as a secondary firmware node for the device
being created. If software node has not been registered yet it will be
automatically registered.

This reduces boilerplate needed when switching legacy board code to
static device properties/GPIO references.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Link: https://patch.msgid.link/20260214025246.2095239-3-dmitry.torokhov@gmail.com
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 drivers/base/platform.c         | 9 ++++++++-
 include/linux/platform_device.h | 7 ++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index b45d41b018ca..ec467ccd05b3 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -850,6 +850,9 @@ struct platform_device *platform_device_register_full(
 	int ret;
 	struct platform_device *pdev;
 
+	if (pdevinfo->swnode && pdevinfo->properties)
+		return ERR_PTR(-EINVAL);
+
 	pdev = platform_device_alloc(pdevinfo->name, pdevinfo->id);
 	if (!pdev)
 		return ERR_PTR(-ENOMEM);
@@ -875,7 +878,11 @@ struct platform_device *platform_device_register_full(
 	if (ret)
 		goto err;
 
-	if (pdevinfo->properties) {
+	if (pdevinfo->swnode) {
+		ret = device_add_software_node(&pdev->dev, pdevinfo->swnode);
+		if (ret)
+			goto err;
+	} else if (pdevinfo->properties) {
 		ret = device_create_managed_software_node(&pdev->dev,
 							  pdevinfo->properties, NULL);
 		if (ret)
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 5f54217930e1..754e4bf2771a 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -136,10 +136,14 @@ extern int platform_add_devices(struct platform_device **, int);
  * @data: device-specific data for this platform device.
  * @size_data: size of device-specific data.
  * @dma_mask: DMA mask for the device.
+ * @swnode: a secondary software node to be attached to the device. The node
+ *	will be automatically registered and its lifetime tied to the platform
+ *	device if it is not registered yet.
  * @properties: a set of software properties for the device. If provided,
  *	a managed software node will be automatically created and
  *	assigned to the device. The properties array must be terminated
- *	with a sentinel entry.
+ *	with a sentinel entry. Specifying both @properties and @swnode is not
+ *	allowed.
  *
  * This structure is used to hold information needed to create and register
  * a platform device using platform_device_register_full().
@@ -164,6 +168,7 @@ struct platform_device_info {
 	size_t size_data;
 	u64 dma_mask;
 
+	const struct software_node *swnode;
 	const struct property_entry *properties;
 };
 extern struct platform_device *platform_device_register_full(
-- 
cgit v1.2.3


From ba51cf9fcf511df8c7026feda4b7d65999d3517c Mon Sep 17 00:00:00 2001
From: Michael Guralnik <michaelgur@nvidia.com>
Date: Thu, 26 Feb 2026 15:52:12 +0200
Subject: net/mlx5: Drop MR cache related code

Following mlx5_ib move to using FRMR pools, drop all unused code of MR
cache.

Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Reviewed-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Edward Srouji <edwards@nvidia.com>
Link: https://patch.msgid.link/20260226-frmr_pools-v4-7-95360b54f15e@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 67 +-------------------------
 include/linux/mlx5/driver.h                    | 11 -----
 2 files changed, 1 insertion(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index fdc3ba20912e..4b59f3f7c6f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -110,74 +110,9 @@ static struct mlx5_profile profile[] = {
 
 	},
 	[2] = {
-		.mask		= MLX5_PROF_MASK_QP_SIZE |
-				  MLX5_PROF_MASK_MR_CACHE,
+		.mask		= MLX5_PROF_MASK_QP_SIZE,
 		.log_max_qp	= LOG_MAX_SUPPORTED_QPS,
 		.num_cmd_caches = MLX5_NUM_COMMAND_CACHES,
-		.mr_cache[0]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[1]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[2]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[3]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[4]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[5]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[6]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[7]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[8]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[9]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[10]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[11]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[12]	= {
-			.size	= 64,
-			.limit	= 32
-		},
-		.mr_cache[13]	= {
-			.size	= 32,
-			.limit	= 16
-		},
-		.mr_cache[14]	= {
-			.size	= 16,
-			.limit	= 8
-		},
-		.mr_cache[15]	= {
-			.size	= 8,
-			.limit	= 4
-		},
 	},
 	[3] = {
 		.mask		= MLX5_PROF_MASK_QP_SIZE,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 04dcd09f7517..27d64f09683f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -705,23 +705,12 @@ struct mlx5_st;
 
 enum {
 	MLX5_PROF_MASK_QP_SIZE		= (u64)1 << 0,
-	MLX5_PROF_MASK_MR_CACHE		= (u64)1 << 1,
-};
-
-enum {
-	MKEY_CACHE_LAST_STD_ENTRY = 20,
-	MLX5_IMR_KSM_CACHE_ENTRY,
-	MAX_MKEY_CACHE_ENTRIES
 };
 
 struct mlx5_profile {
 	u64	mask;
 	u8	log_max_qp;
 	u8	num_cmd_caches;
-	struct {
-		int	size;
-		int	limit;
-	} mr_cache[MAX_MKEY_CACHE_ENTRIES];
 };
 
 struct mlx5_hca_cap {
-- 
cgit v1.2.3


From fe3c03b84ae69f34992a5e72cbb8384b9ebad738 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 1 Mar 2026 16:51:54 -0800
Subject: cred: fix kernel-doc warnings in cred.h

Use the correct function parameter names, function names, or kernel-doc
format, and add function return comment sections to avoid kernel-doc
warnings:

Warning: include/linux/cred.h:43 function parameter 'gi' not described
 in 'get_group_info'
Warning: include/linux/cred.h:43 No description found for return value
 of 'get_group_info'
Warning: include/linux/cred.h:213 No description found for return value
 of 'get_cred_many'
Warning: include/linux/cred.h:260 function parameter '_cred' not described
 in 'put_cred_many'
Warning: include/linux/cred.h:260 expecting prototype for put_cred().
 Prototype was for put_cred_many() instead

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
[PM: subject tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/cred.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index ed1609d78cd7..c6676265a985 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -33,12 +33,14 @@ struct group_info {
 
 /**
  * get_group_info - Get a reference to a group info structure
- * @group_info: The group info to reference
+ * @gi: The group info to reference
  *
  * This gets a reference to a set of supplementary groups.
  *
  * If the caller is accessing a task's credentials, they must hold the RCU read
  * lock when reading.
+ *
+ * Returns: @gi
  */
 static inline struct group_info *get_group_info(struct group_info *gi)
 {
@@ -209,6 +211,8 @@ DEFINE_CLASS(override_creds,
  * usage count.  The purpose of this is to attempt to catch at compile time the
  * accidental alteration of a set of credentials that should be considered
  * immutable.
+ *
+ * Returns: @cred when the references are acquired, NULL otherwise.
  */
 static inline const struct cred *get_cred_many(const struct cred *cred, int nr)
 {
@@ -246,8 +250,8 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred)
 }
 
 /**
- * put_cred - Release a reference to a set of credentials
- * @cred: The credentials to release
+ * put_cred_many - Release a reference to a set of credentials
+ * @_cred: The credentials to release
  * @nr: Number of references to release
  *
  * Release a reference to a set of credentials, deleting them when the last ref
-- 
cgit v1.2.3


From 0b16e69d17d8c35c5c9d5918bf596c75a44655d3 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 24 Feb 2026 17:20:36 -0800
Subject: KVM: x86: Use scratch field in MMIO fragment to hold small write
 values

When exiting to userspace to service an emulated MMIO write, copy the
to-be-written value to a scratch field in the MMIO fragment if the size
of the data payload is 8 bytes or less, i.e. can fit in a single chunk,
instead of pointing the fragment directly at the source value.

This fixes a class of use-after-free bugs that occur when the emulator
initiates a write using an on-stack, local variable as the source, the
write splits a page boundary, *and* both pages are MMIO pages.  Because
KVM's ABI only allows for physically contiguous MMIO requests, accesses
that split MMIO pages are separated into two fragments, and are sent to
userspace one at a time.  When KVM attempts to complete userspace MMIO in
response to KVM_RUN after the first fragment, KVM will detect the second
fragment and generate a second userspace exit, and reference the on-stack
variable.

The issue is most visible if the second KVM_RUN is performed by a separate
task, in which case the stack of the initiating task can show up as truly
freed data.

  ==================================================================
  BUG: KASAN: use-after-free in complete_emulated_mmio+0x305/0x420
  Read of size 1 at addr ffff888009c378d1 by task syz-executor417/984

  CPU: 1 PID: 984 Comm: syz-executor417 Not tainted 5.10.0-182.0.0.95.h2627.eulerosv2r13.x86_64 #3
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 Call Trace:
  dump_stack+0xbe/0xfd
  print_address_description.constprop.0+0x19/0x170
  __kasan_report.cold+0x6c/0x84
  kasan_report+0x3a/0x50
  check_memory_region+0xfd/0x1f0
  memcpy+0x20/0x60
  complete_emulated_mmio+0x305/0x420
  kvm_arch_vcpu_ioctl_run+0x63f/0x6d0
  kvm_vcpu_ioctl+0x413/0xb20
  __se_sys_ioctl+0x111/0x160
  do_syscall_64+0x30/0x40
  entry_SYSCALL_64_after_hwframe+0x67/0xd1
  RIP: 0033:0x42477d
  Code: <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
  RSP: 002b:00007faa8e6890e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
  RAX: ffffffffffffffda RBX: 00000000004d7338 RCX: 000000000042477d
  RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000005
  RBP: 00000000004d7330 R08: 00007fff28d546df R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000246 R12: 00000000004d733c
  R13: 0000000000000000 R14: 000000000040a200 R15: 00007fff28d54720

  The buggy address belongs to the page:
  page:0000000029f6a428 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x9c37
  flags: 0xfffffc0000000(node=0|zone=1|lastcpupid=0x1fffff)
  raw: 000fffffc0000000 0000000000000000 ffffea0000270dc8 0000000000000000
  raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected

  Memory state around the buggy address:
  ffff888009c37780: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
  ffff888009c37800: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
  >ffff888009c37880: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
                                                   ^
  ffff888009c37900: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
  ffff888009c37980: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
  ==================================================================

The bug can also be reproduced with a targeted KVM-Unit-Test by hacking
KVM to fill a large on-stack variable in complete_emulated_mmio(), i.e. by
overwrite the data value with garbage.

Limit the use of the scratch fields to 8-byte or smaller accesses, and to
just writes, as larger accesses and reads are not affected thanks to
implementation details in the emulator, but add a sanity check to ensure
those details don't change in the future.  Specifically, KVM never uses
on-stack variables for accesses larger that 8 bytes, e.g. uses an operand
in the emulator context, and *all* reads are buffered through the mem_read
cache.

Note!  Using the scratch field for reads is not only unnecessary, it's
also extremely difficult to handle correctly.  As above, KVM buffers all
reads through the mem_read cache, and heavily relies on that behavior when
re-emulating the instruction after a userspace MMIO read exit.  If a read
splits a page, the first page is NOT an MMIO page, and the second page IS
an MMIO page, then the MMIO fragment needs to point at _just_ the second
chunk of the destination, i.e. its position in the mem_read cache.  Taking
the "obvious" approach of copying the fragment value into the destination
when re-emulating the instruction would clobber the first chunk of the
destination, i.e. would clobber the data that was read from guest memory.

Fixes: f78146b0f923 ("KVM: Fix page-crossing MMIO")
Suggested-by: Yashu Zhang <zhangjiaji1@huawei.com>
Reported-by: Yashu Zhang <zhangjiaji1@huawei.com>
Closes: https://lore.kernel.org/all/369eaaa2b3c1425c85e8477066391bc7@huawei.com
Cc: stable@vger.kernel.org
Tested-by: Tom Lendacky <thomas.lendacky@gmail.com>
Tested-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Link: https://patch.msgid.link/20260225012049.920665-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/x86.c       | 14 +++++++++++++-
 include/linux/kvm_host.h |  3 ++-
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a03530795707..9bb9d7f078fc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8225,7 +8225,13 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
 	WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
 	frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
 	frag->gpa = gpa;
-	frag->data = val;
+	if (write && bytes <= 8u) {
+		frag->val = 0;
+		frag->data = &frag->val;
+		memcpy(&frag->val, val, bytes);
+	} else {
+		frag->data = val;
+	}
 	frag->len = bytes;
 	return X86EMUL_CONTINUE;
 }
@@ -8240,6 +8246,9 @@ static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
 	gpa_t gpa;
 	int rc;
 
+	if (WARN_ON_ONCE((bytes > 8u || !ops->write) && object_is_on_stack(val)))
+		return X86EMUL_UNHANDLEABLE;
+
 	if (ops->read_write_prepare &&
 		  ops->read_write_prepare(vcpu, val, bytes))
 		return X86EMUL_CONTINUE;
@@ -11846,6 +11855,9 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 		frag++;
 		vcpu->mmio_cur_fragment++;
 	} else {
+		if (WARN_ON_ONCE(frag->data == &frag->val))
+			return -EIO;
+
 		/* Go forward to the next mmio piece. */
 		frag->data += len;
 		frag->gpa += len;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 34759a262b28..abb309372035 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -318,7 +318,8 @@ static inline bool kvm_vcpu_can_poll(ktime_t cur, ktime_t stop)
 struct kvm_mmio_fragment {
 	gpa_t gpa;
 	void *data;
-	unsigned len;
+	u64 val;
+	unsigned int len;
 };
 
 struct kvm_vcpu {
-- 
cgit v1.2.3


From 44a2ec96d374806ee74454ea915615536a76b152 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 27 Feb 2026 09:53:13 +0000
Subject: net: stmmac: remove plat_dat->port_node

There are repeated instances of:

	fwnode = priv->plat->port_node;
	if (!fwnode)
		fwnode = dev_fwnode(priv->device);

However, the only place that ->port_node is set is
stmmac_probe_config_dt():

	struct device_node *np = pdev->dev.of_node;
...
	/* PHYLINK automatically parses the phy-handle property */
	plat->port_node = of_fwnode_handle(np);

which is equivalent to dev_fwnode(&pdev->dev) and, as priv->device
will be &pdev->dev, is also equivalent to dev_fwnode(priv->device).

Thus, plat_dat->port_node doesn't provide any extra benefit over
using dev_fwnode(priv->device) directly.

There is one case where port_node is used directly, which can be
found in stmmac_pcs_setup(). This may cause a change of behaviour
as PCI drivers do not populate plat_dat->port_node, but
dev_fwnode(priv->device) may be valid. PCI-based stmmac should
be tested.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1vvuX3-0000000Avme-3oej@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c     | 13 +++----------
 drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c     |  7 ++-----
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c |  3 ---
 include/linux/stmmac.h                                |  1 -
 4 files changed, 5 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 5c144ac259af..4e788f54bbbc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1252,10 +1252,7 @@ static int stmmac_init_phy(struct net_device *dev)
 	    xpcs_get_an_mode(priv->hw->xpcs, mode) == DW_AN_C73)
 		return 0;
 
-	fwnode = priv->plat->port_node;
-	if (!fwnode)
-		fwnode = dev_fwnode(priv->device);
-
+	fwnode = dev_fwnode(priv->device);
 	if (fwnode)
 		phy_fwnode = fwnode_get_phy_node(fwnode);
 	else
@@ -1313,7 +1310,6 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv)
 {
 	struct stmmac_mdio_bus_data *mdio_bus_data;
 	struct phylink_config *config;
-	struct fwnode_handle *fwnode;
 	struct phylink_pcs *pcs;
 	struct phylink *phylink;
 
@@ -1400,11 +1396,8 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv)
 			config->wol_mac_support |= WAKE_MAGIC;
 	}
 
-	fwnode = priv->plat->port_node;
-	if (!fwnode)
-		fwnode = dev_fwnode(priv->device);
-
-	phylink = phylink_create(config, fwnode, priv->plat->phy_interface,
+	phylink = phylink_create(config, dev_fwnode(priv->device),
+				 priv->plat->phy_interface,
 				 &stmmac_phylink_mac_ops);
 	if (IS_ERR(phylink))
 		return PTR_ERR(phylink);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index a7c2496b39f2..485a0d790baa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -430,7 +430,7 @@ int stmmac_pcs_setup(struct net_device *ndev)
 	struct dw_xpcs *xpcs = NULL;
 	int addr, ret;
 
-	devnode = priv->plat->port_node;
+	devnode = dev_fwnode(priv->device);
 
 	if (priv->plat->pcs_init) {
 		ret = priv->plat->pcs_init(priv);
@@ -649,10 +649,7 @@ int stmmac_mdio_register(struct net_device *ndev)
 		stmmac_xgmac2_mdio_read_c45(new_bus, 0, 0, 0);
 
 	/* If fixed-link is set, skip PHY scanning */
-	fwnode = priv->plat->port_node;
-	if (!fwnode)
-		fwnode = dev_fwnode(priv->device);
-
+	fwnode = dev_fwnode(priv->device);
 	if (fwnode) {
 		fixed_node = fwnode_get_named_child_node(fwnode, "fixed-link");
 		if (fixed_node) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 5c9fd91a1db9..c34998486293 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -446,9 +446,6 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
 	 * they are not converted to phylink. */
 	plat->phy_node = of_parse_phandle(np, "phy-handle", 0);
 
-	/* PHYLINK automatically parses the phy-handle property */
-	plat->port_node = of_fwnode_handle(np);
-
 	/* Get max speed of operation from device tree */
 	of_property_read_u32(np, "max-speed", &plat->max_speed);
 
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index b96ae9dadfab..77e51eaa5ec5 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -225,7 +225,6 @@ struct plat_stmmacenet_data {
 	phy_interface_t phy_interface;
 	struct stmmac_mdio_bus_data *mdio_bus_data;
 	struct device_node *phy_node;
-	struct fwnode_handle *port_node;
 	struct device_node *mdio_node;
 	struct stmmac_dma_cfg *dma_cfg;
 	struct stmmac_safety_feature_cfg *safety_feat_cfg;
-- 
cgit v1.2.3


From 1558705afbb293549fdedd539682bc5240e1964b Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 27 Feb 2026 09:53:59 +0000
Subject: net: stmmac: make dma_cfg mixed/fixed burst boolean

struct stmmac_dma_cfg mixed_burst/fixed_burst members are both boolean
in nature - of_property_read_bool() are used to read these from DT, and
they are only tested for non-zero values. Use bool to avoid unnecessary
padding in this structure.

Update dwmac-intel to initialise these using true rather than '1', and
remove the '0' initialisers as the struct is already zero initialised
on allocation.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1vvuXn-0000000AvnX-4A1u@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 4 +---
 include/linux/stmmac.h                            | 4 ++--
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 92d77b0c2f54..ece2a0c38562 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -636,8 +636,6 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 
 	plat->dma_cfg->pbl = 32;
 	plat->dma_cfg->pblx8 = true;
-	plat->dma_cfg->fixed_burst = 0;
-	plat->dma_cfg->mixed_burst = 0;
 	plat->dma_cfg->aal = 0;
 	plat->dma_cfg->dche = true;
 
@@ -1106,7 +1104,7 @@ static int quark_default_data(struct pci_dev *pdev,
 
 	plat->dma_cfg->pbl = 16;
 	plat->dma_cfg->pblx8 = true;
-	plat->dma_cfg->fixed_burst = 1;
+	plat->dma_cfg->fixed_burst = true;
 	/* AXI (TODO) */
 
 	return 0;
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 77e51eaa5ec5..2fc169c7117e 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -97,8 +97,8 @@ struct stmmac_dma_cfg {
 	int txpbl;
 	int rxpbl;
 	bool pblx8;
-	int fixed_burst;
-	int mixed_burst;
+	bool fixed_burst;
+	bool mixed_burst;
 	bool aal;
 	bool eame;
 	bool multi_msi_en;
-- 
cgit v1.2.3


From 4480d5fa1f6ebe7dfc546e14371d63c8b915a82d Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: Sat, 28 Feb 2026 22:17:31 +0000
Subject: ipmr/ip6mr: Convert net->ipv[46].ipmr_seq to atomic_t.

We will no longer hold RTNL for ipmr_mfc_add() and ipmr_mfc_delete().

MFC entry can be loosely connected with VIF by its index for
mrt->vif_table[] (stored in mfc_parent), but the two tables are
not synchronised.  i.e. Even if VIF 1 is removed, MFC for VIF 1
is not automatically removed.

The only field that the MFC/VIF interfaces share is
net->ipv[46].ipmr_seq, which is protected by RTNL.

Adding a new mutex for both just to protect a single field is overkill.

Let's convert the field to atomic_t.

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260228221800.1082070-14-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mroute_base.h | 8 ++++----
 include/net/netns/ipv4.h    | 2 +-
 include/net/netns/ipv6.h    | 2 +-
 net/ipv4/ipmr.c             | 4 ++--
 net/ipv6/ip6mr.c            | 4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 0075f6e5c3da..0baa6f994da9 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -76,7 +76,7 @@ static inline int mr_call_vif_notifiers(struct net *net,
 					struct vif_device *vif,
 					struct net_device *vif_dev,
 					unsigned short vif_index, u32 tb_id,
-					unsigned int *ipmr_seq)
+					atomic_t *ipmr_seq)
 {
 	struct vif_entry_notifier_info info = {
 		.info = {
@@ -89,7 +89,7 @@ static inline int mr_call_vif_notifiers(struct net *net,
 	};
 
 	ASSERT_RTNL();
-	(*ipmr_seq)++;
+	atomic_inc(ipmr_seq);
 	return call_fib_notifiers(net, event_type, &info.info);
 }
 
@@ -198,7 +198,7 @@ static inline int mr_call_mfc_notifiers(struct net *net,
 					unsigned short family,
 					enum fib_event_type event_type,
 					struct mr_mfc *mfc, u32 tb_id,
-					unsigned int *ipmr_seq)
+					atomic_t *ipmr_seq)
 {
 	struct mfc_entry_notifier_info info = {
 		.info = {
@@ -209,7 +209,7 @@ static inline int mr_call_mfc_notifiers(struct net *net,
 	};
 
 	ASSERT_RTNL();
-	(*ipmr_seq)++;
+	atomic_inc(ipmr_seq);
 	return call_fib_notifiers(net, event_type, &info.info);
 }
 
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 380ff34c0233..94dca64fec41 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -280,7 +280,7 @@ struct netns_ipv4 {
 	struct fib_rules_ops	*mr_rules_ops;
 #endif
 	struct fib_notifier_ops	*ipmr_notifier_ops;
-	unsigned int		ipmr_seq;	/* protected by rtnl_mutex */
+	atomic_t		ipmr_seq;
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 34bdb1308e8f..499e4288170f 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -118,7 +118,7 @@ struct netns_ipv6 {
 	struct seg6_pernet_data *seg6_data;
 	struct fib_notifier_ops	*notifier_ops;
 	struct fib_notifier_ops	*ip6mr_notifier_ops;
-	unsigned int ipmr_seq; /* protected by rtnl_mutex */
+	atomic_t		ipmr_seq;
 	struct {
 		struct hlist_head head;
 		spinlock_t	lock;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 07f2d4f8dcbe..6ec73796d84d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -3226,7 +3226,7 @@ static const struct net_protocol pim_protocol = {
 
 static unsigned int ipmr_seq_read(const struct net *net)
 {
-	return READ_ONCE(net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net);
+	return atomic_read(&net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net);
 }
 
 static int ipmr_dump(struct net *net, struct notifier_block *nb,
@@ -3247,7 +3247,7 @@ static int __net_init ipmr_notifier_init(struct net *net)
 {
 	struct fib_notifier_ops *ops;
 
-	net->ipv4.ipmr_seq = 0;
+	atomic_set(&net->ipv4.ipmr_seq, 0);
 
 	ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
 	if (IS_ERR(ops))
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index e047a4680ab0..85010ff21c98 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1280,7 +1280,7 @@ static int ip6mr_device_event(struct notifier_block *this,
 
 static unsigned int ip6mr_seq_read(const struct net *net)
 {
-	return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
+	return atomic_read(&net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
 }
 
 static int ip6mr_dump(struct net *net, struct notifier_block *nb,
@@ -1305,7 +1305,7 @@ static int __net_init ip6mr_notifier_init(struct net *net)
 {
 	struct fib_notifier_ops *ops;
 
-	net->ipv6.ipmr_seq = 0;
+	atomic_set(&net->ipv6.ipmr_seq, 0);
 
 	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
 	if (IS_ERR(ops))
-- 
cgit v1.2.3


From bddafc06ca5ee1be4d10061f7954c6d6be5dc1d8 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: Sat, 28 Feb 2026 22:17:33 +0000
Subject: ipmr: Don't hold RTNL for ipmr_rtm_route().

ipmr_mfc_add() and ipmr_mfc_delete() are already protected
by a dedicated mutex.

rtm_to_ipmr_mfcc() calls __ipmr_get_table(), __dev_get_by_index(),
amd ipmr_find_vif().

Once __dev_get_by_index() is converted to dev_get_by_index_rcu(),
we can move the other two functions under that same RCU section
and drop RTNL for ipmr_rtm_route().

Let's do that conversion and drop ASSERT_RTNL() in
mr_call_mfc_notifiers().

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260228221800.1082070-16-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mroute_base.h |  1 -
 net/ipv4/ipmr.c             | 34 +++++++++++++++++++++-------------
 2 files changed, 21 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 0baa6f994da9..cf3374580f74 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -208,7 +208,6 @@ static inline int mr_call_mfc_notifiers(struct net *net,
 		.tb_id = tb_id
 	};
 
-	ASSERT_RTNL();
 	atomic_inc(ipmr_seq);
 	return call_fib_notifiers(net, event_type, &info.info);
 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index d4983d8a9b2a..8a08d09b4c30 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1211,7 +1211,6 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
 	struct net *net = read_pnet(&mrt->net);
 	struct mfc_cache *c;
 
-	/* The entries are added/deleted only under RTNL */
 	rcu_read_lock();
 	c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
 				   mfc->mfcc_mcastgrp.s_addr, parent);
@@ -1238,7 +1237,6 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 	if (mfc->mfcc_parent >= MAXVIFS)
 		return -ENFILE;
 
-	/* The entries are added/deleted only under RTNL */
 	rcu_read_lock();
 	c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
 				   mfc->mfcc_mcastgrp.s_addr, parent);
@@ -2853,10 +2851,10 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
 {
 	struct net_device *dev = NULL;
 	u32 tblid = RT_TABLE_DEFAULT;
+	int ret, rem, iif = 0;
 	struct mr_table *mrt;
 	struct nlattr *attr;
 	struct rtmsg *rtm;
-	int ret, rem;
 
 	ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX,
 					rtm_ipmr_policy, extack);
@@ -2883,11 +2881,7 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
 			mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr);
 			break;
 		case RTA_IIF:
-			dev = __dev_get_by_index(net, nla_get_u32(attr));
-			if (!dev) {
-				ret = -ENODEV;
-				goto out;
-			}
+			iif = nla_get_u32(attr);
 			break;
 		case RTA_MULTIPATH:
 			if (ipmr_nla_get_ttls(attr, mfcc) < 0) {
@@ -2903,16 +2897,30 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
 			break;
 		}
 	}
+
+	rcu_read_lock();
+
 	mrt = __ipmr_get_table(net, tblid);
 	if (!mrt) {
 		ret = -ENOENT;
-		goto out;
+		goto unlock;
 	}
+
+	if (iif) {
+		dev = dev_get_by_index_rcu(net, iif);
+		if (!dev) {
+			ret = -ENODEV;
+			goto unlock;
+		}
+
+		mfcc->mfcc_parent = ipmr_find_vif(mrt, dev);
+	}
+
 	*mrtret = mrt;
 	*mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0;
-	if (dev)
-		mfcc->mfcc_parent = ipmr_find_vif(mrt, dev);
 
+unlock:
+	rcu_read_unlock();
 out:
 	return ret;
 }
@@ -3343,9 +3351,9 @@ static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = {
 	{.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETLINK,
 	 .dumpit = ipmr_rtm_dumplink, .flags = RTNL_FLAG_DUMP_UNLOCKED},
 	{.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_NEWROUTE,
-	 .doit = ipmr_rtm_route},
+	 .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED},
 	{.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE,
-	 .doit = ipmr_rtm_route},
+	 .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED},
 	{.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE,
 	 .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute,
 	 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
-- 
cgit v1.2.3


From c69855ada28656fdd7e197b6e24cd40a04fe14d3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 28 Feb 2026 14:08:45 -0800
Subject: atm: atmdev: add function parameter names and description

kernel-doc reports function parameters not described for parameters
that are not named. Add parameter names for these functions and then
describe the function parameters in kernel-doc format.

Fixes these warnings:
Warning: include/linux/atmdev.h:316 function parameter '' not described
 in 'register_atm_ioctl'
Warning: include/linux/atmdev.h:321 function parameter '' not described
 in 'deregister_atm_ioctl'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260228220845.2978547-1-rdunlap@infradead.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/atmdev.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 70807c679f1a..82a32526df64 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -309,17 +309,19 @@ struct atm_ioctl {
 
 /**
  * register_atm_ioctl - register handler for ioctl operations
+ * @ioctl: ioctl handler to register
  *
  * Special (non-device) handlers of ioctl's should
  * register here. If you're a normal device, you should
  * set .ioctl in your atmdev_ops instead.
  */
-void register_atm_ioctl(struct atm_ioctl *);
+void register_atm_ioctl(struct atm_ioctl *ioctl);
 
 /**
  * deregister_atm_ioctl - remove the ioctl handler
+ * @ioctl: ioctl handler to deregister
  */
-void deregister_atm_ioctl(struct atm_ioctl *);
+void deregister_atm_ioctl(struct atm_ioctl *ioctl);
 
 
 /* register_atmdevice_notifier - register atm_dev notify events
-- 
cgit v1.2.3


From 83419c8fdbbc1dacd12fa614c5a3561e498aac5f Mon Sep 17 00:00:00 2001
From: Emil Tsalapatis <emil@etsalapatis.com>
Date: Sat, 28 Feb 2026 13:47:55 -0500
Subject: bpf: Factor out program return value calculation

Factor the return value range calculation logic in check_return_code
out of the function in preparation for separating the return value
validation logic for BPF_EXIT and bpf_throw().

No functional changes. The change made in return_retval_code's handling
of PROG_TRACING program types (not error'ing on the default case) is a
no-op because the match on the program attach type is exhaustive.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Emil Tsalapatis <emil@etsalapatis.com>
Link: https://lore.kernel.org/r/20260228184759.108145-2-emil@etsalapatis.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |   1 +
 kernel/bpf/verifier.c        | 226 ++++++++++++++++++++++++-------------------
 2 files changed, 126 insertions(+), 101 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c1e30096ea7b..090aa26d1c98 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -265,6 +265,7 @@ struct bpf_reference_state {
 struct bpf_retval_range {
 	s32 minval;
 	s32 maxval;
+	bool return_32bit;
 };
 
 /* state of the program:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 34f89ed29c47..89e6d2f6bdf6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2953,7 +2953,11 @@ static void init_reg_state(struct bpf_verifier_env *env,
 
 static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
 {
-	return (struct bpf_retval_range){ minval, maxval };
+	/*
+	 * return_32bit is set to false by default and set explicitly
+	 * by the caller when necessary.
+	 */
+	return (struct bpf_retval_range){ minval, maxval, false };
 }
 
 #define BPF_MAIN_FUNC (-1)
@@ -11175,10 +11179,9 @@ static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
 	return is_rbtree_lock_required_kfunc(kfunc_btf_id);
 }
 
-static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg,
-				bool return_32bit)
+static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg)
 {
-	if (return_32bit)
+	if (range.return_32bit)
 		return range.minval <= reg->s32_min_value && reg->s32_max_value <= range.maxval;
 	else
 		return range.minval <= reg->smin_value && reg->smax_value <= range.maxval;
@@ -11222,7 +11225,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 			return err;
 
 		/* enforce R0 return value range, and bpf_callback_t returns 64bit */
-		if (!retval_range_within(callee->callback_ret_range, r0, false)) {
+		if (!retval_range_within(callee->callback_ret_range, r0)) {
 			verbose_invalid_scalar(env, r0, callee->callback_ret_range,
 					       "At callback return", "R0");
 			return -EINVAL;
@@ -17841,6 +17844,115 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	return 0;
 }
 
+
+static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_range *range)
+{
+	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
+
+	/* Default return value range. */
+	*range = retval_range(0, 1);
+
+	switch (prog_type) {
+	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+		switch (env->prog->expected_attach_type) {
+		case BPF_CGROUP_UDP4_RECVMSG:
+		case BPF_CGROUP_UDP6_RECVMSG:
+		case BPF_CGROUP_UNIX_RECVMSG:
+		case BPF_CGROUP_INET4_GETPEERNAME:
+		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_UNIX_GETPEERNAME:
+		case BPF_CGROUP_INET4_GETSOCKNAME:
+		case BPF_CGROUP_INET6_GETSOCKNAME:
+		case BPF_CGROUP_UNIX_GETSOCKNAME:
+			*range = retval_range(1, 1);
+			break;
+		case BPF_CGROUP_INET4_BIND:
+		case BPF_CGROUP_INET6_BIND:
+			*range = retval_range(0, 3);
+			break;
+		default:
+			break;
+		}
+		break;
+	case BPF_PROG_TYPE_CGROUP_SKB:
+		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS)
+			*range = retval_range(0, 3);
+		break;
+	case BPF_PROG_TYPE_CGROUP_SOCK:
+	case BPF_PROG_TYPE_SOCK_OPS:
+	case BPF_PROG_TYPE_CGROUP_DEVICE:
+	case BPF_PROG_TYPE_CGROUP_SYSCTL:
+	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+		break;
+	case BPF_PROG_TYPE_RAW_TRACEPOINT:
+		if (!env->prog->aux->attach_btf_id)
+			return false;
+		*range = retval_range(0, 0);
+		break;
+	case BPF_PROG_TYPE_TRACING:
+		switch (env->prog->expected_attach_type) {
+		case BPF_TRACE_FENTRY:
+		case BPF_TRACE_FEXIT:
+		case BPF_TRACE_FSESSION:
+			*range = retval_range(0, 0);
+			break;
+		case BPF_TRACE_RAW_TP:
+		case BPF_MODIFY_RETURN:
+			return false;
+		case BPF_TRACE_ITER:
+		default:
+			break;
+		}
+		break;
+	case BPF_PROG_TYPE_KPROBE:
+		switch (env->prog->expected_attach_type) {
+		case BPF_TRACE_KPROBE_SESSION:
+		case BPF_TRACE_UPROBE_SESSION:
+			break;
+		default:
+			return false;
+		}
+		break;
+	case BPF_PROG_TYPE_SK_LOOKUP:
+		*range = retval_range(SK_DROP, SK_PASS);
+		break;
+
+	case BPF_PROG_TYPE_LSM:
+		if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
+			/* no range found, any return value is allowed */
+			if (!get_func_retval_range(env->prog, range))
+				return false;
+			/* no restricted range, any return value is allowed */
+			if (range->minval == S32_MIN && range->maxval == S32_MAX)
+				return false;
+			range->return_32bit = true;
+		} else if (!env->prog->aux->attach_func_proto->type) {
+			/* Make sure programs that attach to void
+			 * hooks don't try to modify return value.
+			 */
+			*range = retval_range(1, 1);
+		}
+		break;
+
+	case BPF_PROG_TYPE_NETFILTER:
+		*range = retval_range(NF_DROP, NF_ACCEPT);
+		break;
+	case BPF_PROG_TYPE_STRUCT_OPS:
+		*range = retval_range(0, 0);
+		break;
+	case BPF_PROG_TYPE_EXT:
+		/* freplace program can return anything as its return value
+		 * depends on the to-be-replaced kernel func or bpf program.
+		 */
+	default:
+		return false;
+	}
+
+	/* Continue calculating. */
+
+	return true;
+}
+
 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
 {
 	const char *exit_ctx = "At program exit";
@@ -17849,18 +17961,17 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 	struct bpf_reg_state *reg = reg_state(env, regno);
 	struct bpf_retval_range range = retval_range(0, 1);
 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
-	int err;
 	struct bpf_func_state *frame = env->cur_state->frame[0];
 	const bool is_subprog = frame->subprogno;
-	bool return_32bit = false;
 	const struct btf_type *reg_type, *ret_type = NULL;
+	int err;
 
 	/* LSM and struct_ops func-ptr's return type could be "void" */
 	if (!is_subprog || frame->in_exception_callback_fn) {
 		switch (prog_type) {
 		case BPF_PROG_TYPE_LSM:
 			if (prog->expected_attach_type == BPF_LSM_CGROUP)
-				/* See below, can be 0 or 0-1 depending on hook. */
+				/* See return_retval_range, can be 0 or 0-1 depending on hook. */
 				break;
 			if (!prog->aux->attach_func_proto->type)
 				return 0;
@@ -17918,101 +18029,14 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 		return 0;
 	}
 
-	switch (prog_type) {
-	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
-		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
-		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
-		    env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG ||
-		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
-		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
-		    env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME ||
-		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
-		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME ||
-		    env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME)
-			range = retval_range(1, 1);
-		if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
-		    env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
-			range = retval_range(0, 3);
-		break;
-	case BPF_PROG_TYPE_CGROUP_SKB:
-		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
-			range = retval_range(0, 3);
-			enforce_attach_type_range = tnum_range(2, 3);
-		}
-		break;
-	case BPF_PROG_TYPE_CGROUP_SOCK:
-	case BPF_PROG_TYPE_SOCK_OPS:
-	case BPF_PROG_TYPE_CGROUP_DEVICE:
-	case BPF_PROG_TYPE_CGROUP_SYSCTL:
-	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
-		break;
-	case BPF_PROG_TYPE_RAW_TRACEPOINT:
-		if (!env->prog->aux->attach_btf_id)
-			return 0;
-		range = retval_range(0, 0);
-		break;
-	case BPF_PROG_TYPE_TRACING:
-		switch (env->prog->expected_attach_type) {
-		case BPF_TRACE_FENTRY:
-		case BPF_TRACE_FEXIT:
-		case BPF_TRACE_FSESSION:
-			range = retval_range(0, 0);
-			break;
-		case BPF_TRACE_RAW_TP:
-		case BPF_MODIFY_RETURN:
-			return 0;
-		case BPF_TRACE_ITER:
-			break;
-		default:
-			return -ENOTSUPP;
-		}
-		break;
-	case BPF_PROG_TYPE_KPROBE:
-		switch (env->prog->expected_attach_type) {
-		case BPF_TRACE_KPROBE_SESSION:
-		case BPF_TRACE_UPROBE_SESSION:
-			range = retval_range(0, 1);
-			break;
-		default:
-			return 0;
-		}
-		break;
-	case BPF_PROG_TYPE_SK_LOOKUP:
-		range = retval_range(SK_DROP, SK_PASS);
-		break;
+	if (prog_type == BPF_PROG_TYPE_STRUCT_OPS && !ret_type)
+		return 0;
 
-	case BPF_PROG_TYPE_LSM:
-		if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
-			/* no range found, any return value is allowed */
-			if (!get_func_retval_range(env->prog, &range))
-				return 0;
-			/* no restricted range, any return value is allowed */
-			if (range.minval == S32_MIN && range.maxval == S32_MAX)
-				return 0;
-			return_32bit = true;
-		} else if (!env->prog->aux->attach_func_proto->type) {
-			/* Make sure programs that attach to void
-			 * hooks don't try to modify return value.
-			 */
-			range = retval_range(1, 1);
-		}
-		break;
+	if (prog_type == BPF_PROG_TYPE_CGROUP_SKB && (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS))
+		enforce_attach_type_range = tnum_range(2, 3);
 
-	case BPF_PROG_TYPE_NETFILTER:
-		range = retval_range(NF_DROP, NF_ACCEPT);
-		break;
-	case BPF_PROG_TYPE_STRUCT_OPS:
-		if (!ret_type)
-			return 0;
-		range = retval_range(0, 0);
-		break;
-	case BPF_PROG_TYPE_EXT:
-		/* freplace program can return anything as its return value
-		 * depends on the to-be-replaced kernel func or bpf program.
-		 */
-	default:
+	if (!return_retval_range(env, &range))
 		return 0;
-	}
 
 enforce_retval:
 	if (reg->type != SCALAR_VALUE) {
@@ -18025,7 +18049,7 @@ enforce_retval:
 	if (err)
 		return err;
 
-	if (!retval_range_within(range, reg, return_32bit)) {
+	if (!retval_range_within(range, reg)) {
 		verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
 		if (!is_subprog &&
 		    prog->expected_attach_type == BPF_LSM_CGROUP &&
-- 
cgit v1.2.3


From 2864fb6aa947703d290b52b1b030b0b74d0a6128 Mon Sep 17 00:00:00 2001
From: André Draszik <andre.draszik@linaro.org>
Date: Mon, 2 Mar 2026 13:32:08 +0000
Subject: power: supply: max17042: initial support for Maxim MAX77759
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Maxim MAX77759 is a companion PMIC intended for use in mobile
phones and tablets. It is used on Google Pixel 6 and 6 Pro (oriole and
raven). Amongst others, it contains a fuel gauge that is similar to the
ones supported by this driver.

The fuel gauge can measure battery charge and discharge current,
battery voltage, battery temperature, and the Type C connector's
temperature.

The MAX77759 incorporates the Maxim ModelGauge m5 algorithm. It, as
well as previous generations like m3 on max17047/max17050, requires
the host to save/restore some register values across power cycles to
maintain full accuracy. Extending the driver for such support is out of
scope in this initial commit.

Reviewed-by: Peter Griffin <peter.griffin@linaro.org>
Signed-off-by: André Draszik <andre.draszik@linaro.org>
Link: https://patch.msgid.link/20260302-max77759-fg-v3-9-3c5f01dbda23@linaro.org
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/max17042_battery.c | 59 ++++++++++++++++++++++++++++++---
 include/linux/power/max17042_battery.h  | 24 ++++++++++++--
 2 files changed, 77 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index e21d2bd7e231..b9a21cef2cc6 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -650,7 +650,8 @@ static void max17042_write_config_regs(struct max17042_chip *chip)
 	regmap_write(map, MAX17042_RelaxCFG, config->relax_cfg);
 	if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17047 ||
 			chip->chip_type == MAXIM_DEVICE_TYPE_MAX17050 ||
-			chip->chip_type == MAXIM_DEVICE_TYPE_MAX17055)
+			chip->chip_type == MAXIM_DEVICE_TYPE_MAX17055 ||
+			chip->chip_type == MAXIM_DEVICE_TYPE_MAX77759)
 		regmap_write(map, MAX17047_FullSOCThr,
 						config->full_soc_thresh);
 }
@@ -787,7 +788,8 @@ static inline void max17042_override_por_values(struct max17042_chip *chip)
 
 	if ((chip->chip_type == MAXIM_DEVICE_TYPE_MAX17042) ||
 	    (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17047) ||
-	    (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17050)) {
+	    (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17050) ||
+	    (chip->chip_type == MAXIM_DEVICE_TYPE_MAX77759)) {
 		max17042_override_por(map, MAX17042_IAvg_empty, config->iavg_empty);
 		max17042_override_por(map, MAX17042_TempNom, config->temp_nom);
 		max17042_override_por(map, MAX17042_TempLim, config->temp_lim);
@@ -796,7 +798,8 @@ static inline void max17042_override_por_values(struct max17042_chip *chip)
 
 	if ((chip->chip_type == MAXIM_DEVICE_TYPE_MAX17047) ||
 	    (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17050) ||
-	    (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17055)) {
+	    (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17055) ||
+	    (chip->chip_type == MAXIM_DEVICE_TYPE_MAX77759)) {
 		max17042_override_por(map, MAX17047_V_empty, config->vempty);
 	}
 }
@@ -1019,6 +1022,45 @@ static const struct regmap_config max17042_regmap_config = {
 	.val_format_endian = REGMAP_ENDIAN_NATIVE,
 };
 
+static const struct regmap_range max77759_fg_registers[] = {
+	regmap_reg_range(MAX17042_STATUS, MAX77759_MixAtFull),
+	regmap_reg_range(MAX17042_VFSOC0Enable, MAX17042_VFSOC0Enable),
+	regmap_reg_range(MAX17042_MLOCKReg1, MAX17042_MLOCKReg2),
+	regmap_reg_range(MAX17042_MODELChrTbl, MAX17055_TimerH),
+	regmap_reg_range(MAX77759_IIn, MAX77759_IIn),
+	regmap_reg_range(MAX17055_AtQResidual, MAX17055_AtAvCap),
+	regmap_reg_range(MAX17042_OCVInternal, MAX17042_OCVInternal),
+	regmap_reg_range(MAX17042_VFSOC, MAX17042_VFSOC),
+};
+
+static const struct regmap_range max77759_fg_ro_registers[] = {
+	regmap_reg_range(MAX17042_FSTAT, MAX17042_FSTAT),
+	regmap_reg_range(MAX17042_OCVInternal, MAX17042_OCVInternal),
+	regmap_reg_range(MAX17042_VFSOC, MAX17042_VFSOC),
+};
+
+static const struct regmap_access_table max77759_fg_write_table = {
+	.yes_ranges = max77759_fg_registers,
+	.n_yes_ranges = ARRAY_SIZE(max77759_fg_registers),
+	.no_ranges = max77759_fg_ro_registers,
+	.n_no_ranges = ARRAY_SIZE(max77759_fg_ro_registers),
+};
+
+static const struct regmap_access_table max77759_fg_rd_table = {
+	.yes_ranges = max77759_fg_registers,
+	.n_yes_ranges = ARRAY_SIZE(max77759_fg_registers),
+};
+
+static const struct regmap_config max77759_fg_regmap_cfg = {
+	.reg_bits = 8,
+	.val_bits = 16,
+	.max_register = 0xff,
+	.wr_table = &max77759_fg_write_table,
+	.rd_table = &max77759_fg_rd_table,
+	.val_format_endian = REGMAP_ENDIAN_NATIVE,
+	.cache_type = REGCACHE_NONE,
+};
+
 static const struct power_supply_desc max17042_psy_desc = {
 	.name		= "max170xx_battery",
 	.type		= POWER_SUPPLY_TYPE_BATTERY,
@@ -1045,6 +1087,7 @@ static int max17042_probe(struct i2c_client *client, struct device *dev, int irq
 {
 	struct i2c_adapter *adapter = client->adapter;
 	const struct power_supply_desc *max17042_desc = &max17042_psy_desc;
+	const struct regmap_config *regmap_config;
 	struct power_supply_config psy_cfg = {};
 	struct max17042_chip *chip;
 	int ret;
@@ -1060,7 +1103,12 @@ static int max17042_probe(struct i2c_client *client, struct device *dev, int irq
 
 	chip->dev = dev;
 	chip->chip_type = chip_type;
-	chip->regmap = devm_regmap_init_i2c(client, &max17042_regmap_config);
+
+	if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX77759)
+		regmap_config = &max77759_fg_regmap_cfg;
+	else
+		regmap_config = &max17042_regmap_config;
+	chip->regmap = devm_regmap_init_i2c(client, regmap_config);
 	if (IS_ERR(chip->regmap))
 		return dev_err_probe(dev, PTR_ERR(chip->regmap),
 				     "Failed to initialize regmap\n");
@@ -1241,6 +1289,8 @@ static const struct of_device_id max17042_dt_match[] __used = {
 		.data = (void *) MAXIM_DEVICE_TYPE_MAX17055 },
 	{ .compatible = "maxim,max77705-battery",
 		.data = (void *) MAXIM_DEVICE_TYPE_MAX17047 },
+	{ .compatible = "maxim,max77759-fg",
+		.data = (void *) MAXIM_DEVICE_TYPE_MAX77759 },
 	{ .compatible = "maxim,max77849-battery",
 		.data = (void *) MAXIM_DEVICE_TYPE_MAX17047 },
 	{ },
@@ -1253,6 +1303,7 @@ static const struct i2c_device_id max17042_id[] = {
 	{ "max17047", MAXIM_DEVICE_TYPE_MAX17047 },
 	{ "max17050", MAXIM_DEVICE_TYPE_MAX17050 },
 	{ "max17055", MAXIM_DEVICE_TYPE_MAX17055 },
+	{ "max77759-fg", MAXIM_DEVICE_TYPE_MAX77759 },
 	{ "max77849-battery", MAXIM_DEVICE_TYPE_MAX17047 },
 	{ }
 };
diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h
index c417abd2ab70..05097f08ea36 100644
--- a/include/linux/power/max17042_battery.h
+++ b/include/linux/power/max17042_battery.h
@@ -105,7 +105,7 @@ enum max17042_register {
 
 	MAX17042_OCV		= 0xEE,
 
-	MAX17042_OCVInternal	= 0xFB,  /* MAX17055 VFOCV */
+	MAX17042_OCVInternal	= 0xFB, /* MAX17055/77759 VFOCV */
 
 	MAX17042_VFSOC		= 0xFF,
 };
@@ -156,7 +156,7 @@ enum max17055_register {
 	MAX17055_AtAvCap	= 0xDF,
 };
 
-/* Registers specific to max17047/50/55 */
+/* Registers specific to max17047/50/55/77759 */
 enum max17047_register {
 	MAX17047_QRTbl00	= 0x12,
 	MAX17047_FullSOCThr	= 0x13,
@@ -167,12 +167,32 @@ enum max17047_register {
 	MAX17047_QRTbl30	= 0x42,
 };
 
+enum max77759_register {
+	MAX77759_AvgTA0		= 0x26,
+	MAX77759_AtTTF		= 0x33,
+	MAX77759_Tconvert	= 0x34,
+	MAX77759_AvgCurrent0	= 0x3B,
+	MAX77759_THMHOT		= 0x40,
+	MAX77759_CTESample	= 0x41,
+	MAX77759_ISys		= 0x43,
+	MAX77759_AvgVCell0	= 0x44,
+	MAX77759_RlxSOC		= 0x47,
+	MAX77759_AvgISys	= 0x4B,
+	MAX77759_QH0		= 0x4C,
+	MAX77759_MixAtFull	= 0x4F,
+	MAX77759_VSys		= 0xB1,
+	MAX77759_TAlrtTh2	= 0xB2,
+	MAX77759_VByp		= 0xB3,
+	MAX77759_IIn		= 0xD0,
+};
+
 enum max170xx_chip_type {
 	MAXIM_DEVICE_TYPE_UNKNOWN	= 0,
 	MAXIM_DEVICE_TYPE_MAX17042,
 	MAXIM_DEVICE_TYPE_MAX17047,
 	MAXIM_DEVICE_TYPE_MAX17050,
 	MAXIM_DEVICE_TYPE_MAX17055,
+	MAXIM_DEVICE_TYPE_MAX77759,
 
 	MAXIM_DEVICE_TYPE_NUM
 };
-- 
cgit v1.2.3


From 83a86e27c34d06ec2dc117fb293e80f78402df49 Mon Sep 17 00:00:00 2001
From: André Draszik <andre.draszik@linaro.org>
Date: Mon, 2 Mar 2026 13:32:09 +0000
Subject: power: supply: max17042: consider task period (max77759)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Several (register) values reported by the fuel gauge depend on its
internal task period and it needs to be taken into account when
calculating results. All relevant example formulas in the data sheet
assume the default task period (of 5760) and final results need to be
adjusted based on the task period in effect.

Update the code as and where necessary.

Reviewed-by: Peter Griffin <peter.griffin@linaro.org>
Signed-off-by: André Draszik <andre.draszik@linaro.org>
Link: https://patch.msgid.link/20260302-max77759-fg-v3-10-3c5f01dbda23@linaro.org
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/max17042_battery.c | 20 ++++++++++++++++++++
 include/linux/power/max17042_battery.h  |  1 +
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index b9a21cef2cc6..bafbf8706055 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -61,6 +61,7 @@ struct max17042_chip {
 	struct work_struct work;
 	int    init_complete;
 	int    irq;
+	int    task_period;
 };
 
 static enum power_supply_property max17042_battery_props[] = {
@@ -331,6 +332,8 @@ static int max17042_get_property(struct power_supply *psy,
 			return ret;
 
 		data64 = data * 5000000ll;
+		data64 *= chip->task_period;
+		do_div(data64, MAX17042_DEFAULT_TASK_PERIOD);
 		do_div(data64, chip->pdata->r_sns);
 		val->intval = data64;
 		break;
@@ -340,6 +343,8 @@ static int max17042_get_property(struct power_supply *psy,
 			return ret;
 
 		data64 = data * 5000000ll;
+		data64 *= chip->task_period;
+		do_div(data64, MAX17042_DEFAULT_TASK_PERIOD);
 		do_div(data64, chip->pdata->r_sns);
 		val->intval = data64;
 		break;
@@ -349,6 +354,8 @@ static int max17042_get_property(struct power_supply *psy,
 			return ret;
 
 		data64 = data * 5000000ll;
+		data64 *= chip->task_period;
+		do_div(data64, MAX17042_DEFAULT_TASK_PERIOD);
 		do_div(data64, chip->pdata->r_sns);
 		val->intval = data64;
 		break;
@@ -358,6 +365,8 @@ static int max17042_get_property(struct power_supply *psy,
 			return ret;
 
 		data64 = sign_extend64(data, 15) * 5000000ll;
+		data64 *= chip->task_period;
+		data64 = div_s64(data64, MAX17042_DEFAULT_TASK_PERIOD);
 		val->intval = div_s64(data64, chip->pdata->r_sns);
 		break;
 	case POWER_SUPPLY_PROP_TEMP:
@@ -1142,6 +1151,17 @@ static int max17042_probe(struct i2c_client *client, struct device *dev, int irq
 		regmap_write(chip->regmap, MAX17042_LearnCFG, 0x0007);
 	}
 
+	chip->task_period = MAX17042_DEFAULT_TASK_PERIOD;
+	if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX77759) {
+		ret = regmap_read(chip->regmap, MAX17042_TaskPeriod, &val);
+		if (ret)
+			return dev_err_probe(dev, ret,
+					     "failed to read task period\n");
+		chip->task_period = val;
+	}
+	dev_dbg(dev, "task period: %#.4x (%d)\n", chip->task_period,
+		chip->task_period);
+
 	chip->battery = devm_power_supply_register(dev, max17042_desc,
 						   &psy_cfg);
 	if (IS_ERR(chip->battery))
diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h
index 05097f08ea36..d5b08313cf11 100644
--- a/include/linux/power/max17042_battery.h
+++ b/include/linux/power/max17042_battery.h
@@ -17,6 +17,7 @@
 #define MAX17042_DEFAULT_VMAX		(4500) /* LiHV cell max */
 #define MAX17042_DEFAULT_TEMP_MIN	(0)    /* For sys without temp sensor */
 #define MAX17042_DEFAULT_TEMP_MAX	(700)  /* 70 degrees Celcius */
+#define MAX17042_DEFAULT_TASK_PERIOD	(5760)
 
 /* Consider RepCap which is less then 10 units below FullCAP full */
 #define MAX17042_FULL_THRESHOLD		10
-- 
cgit v1.2.3


From 48f7a50c027dd2abb9e7b8a6ecc8e531d87f2c21 Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Date: Tue, 17 Feb 2026 14:11:11 +0100
Subject: stop_machine: Fix the documentation for a NULL cpus argument
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A recent refactoring of the kernel-docs for stop machine changed the
description of the cpus parameter from "NULL = any online cpu"
to "NULL = run on each online CPU".

However the callback is only executed on a single CPU, not all of them.
The old wording was a bit ambiguous and could have been read both ways.

Reword the documentation to be correct again and hopefully also clearer.

Fixes: fc6f89dc7078 ("stop_machine: Improve kernel-doc function-header comments")
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 include/linux/stop_machine.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 72820503514c..01011113d226 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -99,7 +99,7 @@ static inline void print_stop_info(const char *log_lvl, struct task_struct *task
  * stop_machine: freeze the machine on all CPUs and run this function
  * @fn: the function to run
  * @data: the data ptr to pass to @fn()
- * @cpus: the cpus to run @fn() on (NULL = run on each online CPU)
+ * @cpus: the cpus to run @fn() on (NULL = one unspecified online CPU)
  *
  * Description: This causes a thread to be scheduled on every CPU, which
  * will run with interrupts disabled.  Each CPU specified by @cpus will
@@ -133,7 +133,7 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
  * stop_machine_cpuslocked: freeze the machine on all CPUs and run this function
  * @fn: the function to run
  * @data: the data ptr to pass to @fn()
- * @cpus: the cpus to run @fn() on (NULL = run on each online CPU)
+ * @cpus: the cpus to run @fn() on (NULL = one unspecified online CPU)
  *
  * Same as above.  Avoids nested calls to cpus_read_lock().
  *
-- 
cgit v1.2.3


From 1ac252ad036cdb18f5fb7f76bb6061adfed9cedf Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Tue, 3 Mar 2026 23:04:04 +0200
Subject: rculist_bl: add hlist_bl_for_each_entry_continue_rcu

Change the old hlist_bl_first_rcu to hlist_bl_first_rcu_dereference
to indicate that it is a RCU dereference.

Add hlist_bl_next_rcu and hlist_bl_first_rcu to use RCU pointers
and use them to fix sparse warnings.

Add hlist_bl_for_each_entry_continue_rcu.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/linux/rculist_bl.h | 49 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h
index 0b952d06eb0b..36363b876e53 100644
--- a/include/linux/rculist_bl.h
+++ b/include/linux/rculist_bl.h
@@ -8,21 +8,31 @@
 #include <linux/list_bl.h>
 #include <linux/rcupdate.h>
 
+/* return the first ptr or next element in an RCU protected list */
+#define hlist_bl_first_rcu(head)	\
+	(*((struct hlist_bl_node __rcu **)(&(head)->first)))
+#define hlist_bl_next_rcu(node)	\
+	(*((struct hlist_bl_node __rcu **)(&(node)->next)))
+
 static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h,
 					struct hlist_bl_node *n)
 {
 	LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
 	LIST_BL_BUG_ON(((unsigned long)h->first & LIST_BL_LOCKMASK) !=
 							LIST_BL_LOCKMASK);
-	rcu_assign_pointer(h->first,
+	rcu_assign_pointer(hlist_bl_first_rcu(h),
 		(struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK));
 }
 
-static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h)
-{
-	return (struct hlist_bl_node *)
-		((unsigned long)rcu_dereference_check(h->first, hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK);
-}
+#define hlist_bl_first_rcu_dereference(head)				\
+({									\
+	struct hlist_bl_head *__head = (head);				\
+									\
+	(struct hlist_bl_node *)					\
+	((unsigned long)rcu_dereference_check(hlist_bl_first_rcu(__head), \
+					      hlist_bl_is_locked(__head)) & \
+					      ~LIST_BL_LOCKMASK);	\
+})
 
 /**
  * hlist_bl_del_rcu - deletes entry from hash list without re-initialization
@@ -73,7 +83,7 @@ static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n,
 {
 	struct hlist_bl_node *first;
 
-	/* don't need hlist_bl_first_rcu because we're under lock */
+	/* don't need hlist_bl_first_rcu* because we're under lock */
 	first = hlist_bl_first(h);
 
 	n->next = first;
@@ -93,9 +103,30 @@ static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n,
  *
  */
 #define hlist_bl_for_each_entry_rcu(tpos, pos, head, member)		\
-	for (pos = hlist_bl_first_rcu(head);				\
+	for (pos = hlist_bl_first_rcu_dereference(head);		\
 		pos &&							\
 		({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \
-		pos = rcu_dereference_raw(pos->next))
+		pos = rcu_dereference_raw(hlist_bl_next_rcu(pos)))
+
+/**
+ * hlist_bl_for_each_entry_continue_rcu - continue iteration over list of given
+ *   type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_bl_node to use as a loop cursor.
+ * @member:	the name of the hlist_bl_node within the struct.
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position which must have been in the list when the RCU read
+ * lock was taken.
+ * This would typically require either that you obtained the node from a
+ * previous walk of the list in the same RCU read-side critical section, or
+ * that you held some sort of non-RCU reference (such as a reference count)
+ * to keep the node alive *and* in the list.
+ */
+#define hlist_bl_for_each_entry_continue_rcu(tpos, pos, member)		\
+	for (pos = rcu_dereference_raw(hlist_bl_next_rcu(&(tpos)->member)); \
+	     pos &&							\
+	     ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \
+	     pos = rcu_dereference_raw(hlist_bl_next_rcu(pos)))
 
 #endif
-- 
cgit v1.2.3


From 44d93cf1abb6a85d65c3b4b027c82d44263de6a5 Mon Sep 17 00:00:00 2001
From: Karthikeyan Kathirvel <karthikeyan.kathirvel@oss.qualcomm.com>
Date: Wed, 4 Mar 2026 14:23:42 +0530
Subject: wifi: UHR: define DPS/DBE/P-EDCA elements and fix size parsing

Add UHR Operation and Capability definitions and parsing helpers:

- Define ieee80211_uhr_dps_info, ieee80211_uhr_dbe_info,
  ieee80211_uhr_p_edca_info with masks.
- Update ieee80211_uhr_oper_size_ok() to account for optional
  DPS/DBE/P-EDCA blocks.
- Move NPCA pointer position after DPS Operation Parameter if it is
  present in ieee80211_uhr_oper_size_ok().
- Move NPCA pointer position after DPS info if it is present in
  ieee80211_uhr_npca_info().

Signed-off-by: Karthikeyan Kathirvel <karthikeyan.kathirvel@oss.qualcomm.com>
Link: https://patch.msgid.link/20260304085343.1093993-2-karthikeyan.kathirvel@oss.qualcomm.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211-uhr.h | 271 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 265 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211-uhr.h b/include/linux/ieee80211-uhr.h
index 132acced7d79..9729d23e4766 100644
--- a/include/linux/ieee80211-uhr.h
+++ b/include/linux/ieee80211-uhr.h
@@ -29,11 +29,216 @@ struct ieee80211_uhr_operation {
 #define IEEE80211_UHR_NPCA_PARAMS_MOPLEN		0x00400000
 #define IEEE80211_UHR_NPCA_PARAMS_DIS_SUBCH_BMAP_PRES	0x00800000
 
+/**
+ * struct ieee80211_uhr_npca_info - npca operation information
+ *
+ * This structure is the "NPCA Operation Parameters field format" of "UHR
+ * Operation Element" fields as described in P802.11bn_D1.3
+ * subclause 9.4.2.353. See Figure 9-aa4.
+ *
+ * Refer to IEEE80211_UHR_NPCA*
+ * @params:
+ *	NPCA Primary Channel - NPCA primary channel
+ *	NPCA_Min Duration Threshold - Minimum duration of inter-BSS activity
+ *	NPCA Switching Delay -
+ *		Time needed by an NPCA AP to switch from the
+ *		BSS primary channel to the NPCA primary channel
+ *		in the unit of 4 µs.
+ *	NPCA Switching Back Delay -
+ *		Time to switch from the NPCA primary channel
+ *		to the BSS primary channel in the unit of 4 µs.
+ *	NPCA Initial QSRC -
+ *		Initialize the EDCAF QSRC[AC] variables
+ *		when an NPCA STA in the BSS
+ *		switches to NPCA operation.
+ *	NPCA MOPLEN -
+ *		Indicates which conditions can be used to
+ *		initiate an NPCA operation,
+ *		1 -> both PHYLEN NPCA operation and MOPLEN
+ *		NPCA operation are
+ *		permitted in the BSS
+ *		0 -> only PHYLEN NPCA operation is allowed in the BSS.
+ *	NPCA Disabled Subchannel Bitmap Present -
+ *		Indicates whether the NPCA Disabled Subchannel
+ *		Bitmap field is present. A 1 in this field indicates that
+ *		the NPCA Disabled Subchannel Bitmap field is present
+ * @dis_subch_bmap:
+ *		A bit in the bitmap that lies within the BSS bandwidth is set
+ *		to 1 to indicate that the corresponding 20 MHz subchannel is
+ *		punctured and is set to 0 to indicate that the corresponding
+ *		20 MHz subchannel is not punctured. A bit in the bitmap that
+ *		falls outside of the BSS bandwidth is reserved. This field is
+ *		present when the value of the NPCA Disabled Subchannel Bitmap
+ *		Field Present field is equal to 1, and not present, otherwise
+ */
 struct ieee80211_uhr_npca_info {
 	__le32 params;
 	__le16 dis_subch_bmap[];
 } __packed;
 
+#define IEEE80211_UHR_DPS_PADDING_DELAY			0x0000003F
+#define IEEE80211_UHR_DPS_TRANSITION_DELAY		0x00003F00
+#define IEEE80211_UHR_DPS_ICF_REQUIRED			0x00010000
+#define IEEE80211_UHR_DPS_PARAMETERIZED_FLAG		0x00020000
+#define IEEE80211_UHR_DPS_LC_MODE_BW			0x001C0000
+#define IEEE80211_UHR_DPS_LC_MODE_NSS			0x01E00000
+#define IEEE80211_UHR_DPS_LC_MODE_MCS			0x1E000000
+#define IEEE80211_UHR_DPS_MOBILE_AP_DPS_STATIC_HCM	0x20000000
+
+/**
+ * struct ieee80211_uhr_dps_info - DPS operation information
+ *
+ * This structure is the "DPS Operation Parameter field" of "UHR
+ * Operation Element" fields as described in P802.11bn_D1.3
+ * subclause 9.4.1.87. See Figure 9-207u.
+ *
+ * Refer to IEEE80211_UHR_DPS*
+ * @params:
+ *	DPS Padding Delay -
+ *		Indicates the minimum MAC padding
+ *		duration that is required by a DPS STA
+ *		in an ICF to cause the STA to transition
+ *		from the lower capability mode to the
+ *		higher capability mode. The DPS Padding
+ *		Delay field is in units of 4 µs.
+ *	DPS Transition Delay -
+ *		Indicates the amount of time required by a
+ *		DPS STA to transition from the higher
+ *		capability mode to the lower capability
+ *		mode. The DPS Transition Delay field is in
+ *		units of 4 µs.
+ *	ICF Required -
+ *		Indicates when the DPS assisting STA needs
+ *		to transmit an ICF frame to the peer DPS STA
+ *		before performing the frame exchanges with
+ *		the peer DPS STA in a TXOP.
+ *			1 -> indicates that the transmission of the
+ *			ICF frame to the peer DPS STA prior to
+ *			any frame exchange is needed.
+ *			0 -> ICF transmission before the frame
+ *			exchanges with the peer DPS STA is only
+ *			needed if the frame exchange is performed
+ *			in the HC mode.
+ *	Parameterized Flag -
+ *		0 -> indicates that only 20 MHz, 1 SS,
+ *		non-HT PPDU format with the data
+ *		rate of 6, 12, and 24 Mb/s as the
+ *		default mode are supported by the
+ *		DPS STA in the LC mode
+ *		1 -> indicates that a bandwidth up to the
+ *		bandwidth indicated in the LC Mode
+ *		Bandwidth field, a number of spatial
+ *		streams up to the NSS indicated in
+ *		the LC Mode Nss field, and an MCS up
+ *		to the MCS indicated in the LC Mode
+ *		MCS fields are supported by the DPS
+ *		STA in the LC mode as the
+ *		parameterized mode.
+ *	LC Mode Bandwidth -
+ *		Indicates the maximum bandwidth supported
+ *		by the STA in the LC mode.
+ *	LC Mode NSS -
+ *		Indicates the maximum number of the spatial
+ *		streams supported by the STA in the LC mode.
+ *	LC Mode MCS -
+ *		Indicates the highest MCS supported by the STA
+ *		in the LC mode.
+ *	Mobile AP DPS Static HCM -
+ *		1 -> indicates that it will remain in the DPS high
+ *		capability mode until the next TBTT on that
+ *		link.
+ *		0 -> otherwise.
+ */
+struct ieee80211_uhr_dps_info {
+	__le32 params;
+} __packed;
+
+#define IEEE80211_UHR_DBE_OPER_BANDWIDTH			0x07
+#define IEEE80211_UHR_DBE_OPER_DIS_SUBCHANNEL_BITMAP_PRES	0x08
+
+/**
+ * enum ieee80211_uhr_dbe_oper_bw - DBE Operational Bandwidth
+ *
+ * Encoding for the DBE Operational Bandwidth field in the UHR Operation
+ * element (DBE Operation Parameters).
+ *
+ * @IEEE80211_UHR_DBE_OPER_BW_40: 40 MHz operational DBE bandwidth
+ * @IEEE80211_UHR_DBE_OPER_BW_80: 80 MHz operational DBE bandwidth
+ * @IEEE80211_UHR_DBE_OPER_BW_160: 160 MHz operational DBE bandwidth
+ * @IEEE80211_UHR_DBE_OPER_BW_320_1: 320-1 MHz operational DBE bandwidth
+ * @IEEE80211_UHR_DBE_OPER_BW_320_2: 320-2 MHz operational DBE bandwidth
+ */
+enum ieee80211_uhr_dbe_oper_bw {
+	IEEE80211_UHR_DBE_OPER_BW_40 = 1,
+	IEEE80211_UHR_DBE_OPER_BW_80 = 2,
+	IEEE80211_UHR_DBE_OPER_BW_160 = 3,
+	IEEE80211_UHR_DBE_OPER_BW_320_1 = 4,
+	IEEE80211_UHR_DBE_OPER_BW_320_2 = 5,
+};
+
+/**
+ * struct ieee80211_uhr_dbe_info - DBE operation information
+ *
+ * This structure is the "DBE Operation Parameters field" of
+ * "UHR Operation Element" fields as described in P802.11bn_D1.3
+ * subclause 9.4.2.353. See Figure 9-aa6.
+ *
+ * Refer to IEEE80211_UHR_DBE_OPER*
+ * @params:
+ *	B0-B2 - DBE Operational Bandwidth field, see
+ *	"enum ieee80211_uhr_dbe_oper_bw" for values.
+ *	Value 0 is reserved.
+ *	Value 1 indicates 40 MHz operational DBE bandwidth.
+ *	Value 2 indicates 80 MHz operational DBE bandwidth.
+ *	Value 3 indicates 160 MHz operational DBE bandwidth.
+ *	Value 4 indicates 320-1 MHz operational DBE bandwidth.
+ *	Value 5 indicates 320-2 MHz operational DBE bandwidth.
+ *	Values 6 to 7 are reserved.
+ *	B3 - DBE Disabled Subchannel Bitmap Present.
+ * @dis_subch_bmap: DBE Disabled Subchannel Bitmap field is set to indicate
+ *	disabled 20 MHz subchannels within the DBE Bandwidth.
+ */
+struct ieee80211_uhr_dbe_info {
+	u8 params;
+	__le16 dis_subch_bmap[];
+} __packed;
+
+#define IEEE80211_UHR_P_EDCA_ECWMIN		0x0F
+#define IEEE80211_UHR_P_EDCA_ECWMAX		0xF0
+#define IEEE80211_UHR_P_EDCA_AIFSN		0x000F
+#define IEEE80211_UHR_P_EDCA_CW_DS		0x0030
+#define IEEE80211_UHR_P_EDCA_PSRC_THRESHOLD	0x01C0
+#define IEEE80211_UHR_P_EDCA_QSRC_THRESHOLD	0x0600
+
+/**
+ * struct ieee80211_uhr_p_edca_info - P-EDCA operation information
+ *
+ * This structure is the "P-EDCA Operation Parameters field" of
+ * "UHR Operation Element" fields as described in P802.11bn_D1.3
+ * subclause 9.4.2.353. See Figure 9-aa5.
+ *
+ * Refer to IEEE80211_UHR_P_EDCA*
+ * @p_edca_ec: P-EDCA ECWmin and ECWmax.
+ *	These fields indicate the CWmin and CWmax values used by a
+ *	P-EDCA STA during P-EDCA contention.
+ * @params: AIFSN, CW DS, PSRC threshold, and QSRC threshold.
+ *	- The AIFSN field indicates the AIFSN value used by a P-EDCA STA
+ *	  during P-EDCA contention.
+ *	- The CW DS field indicates the value used for randomization of the
+ *	  transmission slot of the DS-CTS frame. The value 3 is reserved.
+ *	  The value 0 indicates that randomization is not enabled.
+ *	- The P-EDCA PSRC threshold field indicates the maximum number of
+ *	  allowed consecutive DS-CTS transmissions. The value 0 and values
+ *	  greater than 4 are reserved.
+ *	- The P-EDCA QSRC threshold field indicates the value of the
+ *	  QSRC[AC_VO] counter required to start P-EDCA contention. The
+ *	  value 0 is reserved.
+ */
+struct ieee80211_uhr_p_edca_info {
+	u8 p_edca_ec;
+	__le16 params;
+} __packed;
+
 static inline bool ieee80211_uhr_oper_size_ok(const u8 *data, u8 len,
 					      bool beacon)
 {
@@ -47,19 +252,52 @@ static inline bool ieee80211_uhr_oper_size_ok(const u8 *data, u8 len,
 	if (beacon)
 		return true;
 
-	/* FIXME: DPS, DBE, P-EDCA (consider order, also relative to NPCA) */
+	/* DPS Operation Parameters (fixed 4 bytes) */
+	if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_DPS_ENA)) {
+		needed += sizeof(struct ieee80211_uhr_dps_info);
+		if (len < needed)
+			return false;
+	}
 
+	/* NPCA Operation Parameters (fixed 4 bytes + optional 2 bytes) */
 	if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_NPCA_ENA)) {
 		const struct ieee80211_uhr_npca_info *npca =
-			(const void *)oper->variable;
+			(const void *)(data + needed);
 
 		needed += sizeof(*npca);
-
 		if (len < needed)
 			return false;
 
-		if (npca->params & cpu_to_le32(IEEE80211_UHR_NPCA_PARAMS_DIS_SUBCH_BMAP_PRES))
+		if (npca->params &
+		    cpu_to_le32(IEEE80211_UHR_NPCA_PARAMS_DIS_SUBCH_BMAP_PRES)) {
 			needed += sizeof(npca->dis_subch_bmap[0]);
+			if (len < needed)
+				return false;
+		}
+	}
+
+	/* P-EDCA Operation Parameters (fixed 3 bytes) */
+	if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_PEDCA_ENA)) {
+		needed += sizeof(struct ieee80211_uhr_p_edca_info);
+		if (len < needed)
+			return false;
+	}
+
+	/* DBE Operation Parameters (fixed 1 byte + optional 2 bytes) */
+	if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_DBE_ENA)) {
+		const struct ieee80211_uhr_dbe_info *dbe =
+			(const void *)(data + needed);
+
+		needed += sizeof(*dbe);
+		if (len < needed)
+			return false;
+
+		if (dbe->params &
+		    IEEE80211_UHR_DBE_OPER_DIS_SUBCHANNEL_BITMAP_PRES) {
+			needed += sizeof(dbe->dis_subch_bmap[0]);
+			if (len < needed)
+				return false;
+		}
 	}
 
 	return len >= needed;
@@ -72,12 +310,15 @@ static inline bool ieee80211_uhr_oper_size_ok(const u8 *data, u8 len,
 static inline const struct ieee80211_uhr_npca_info *
 ieee80211_uhr_npca_info(const struct ieee80211_uhr_operation *oper)
 {
+	const u8 *pos = oper->variable;
+
 	if (!(oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_NPCA_ENA)))
 		return NULL;
 
-	/* FIXME: DPS */
+	if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_DPS_ENA))
+		pos += sizeof(struct ieee80211_uhr_dps_info);
 
-	return (const void *)oper->variable;
+	return (const void *)pos;
 }
 
 static inline const __le16 *
@@ -131,6 +372,24 @@ ieee80211_uhr_npca_dis_subch_bitmap(const struct ieee80211_uhr_operation *oper)
 #define IEEE80211_UHR_MAC_CAP_DBE_EHT_MCS_MAP_160_PRES	0x08
 #define IEEE80211_UHR_MAC_CAP_DBE_EHT_MCS_MAP_320_PRES	0x10
 
+/**
+ * enum ieee80211_uhr_dbe_max_supported_bw - DBE Maximum Supported Bandwidth
+ *
+ * As per spec P802.11bn_D1.3 "Table 9-bb5—Encoding of the DBE Maximum
+ * Supported Bandwidth field".
+ *
+ * @IEEE80211_UHR_DBE_MAX_BW_40: Indicates 40 MHz DBE max supported bw
+ * @IEEE80211_UHR_DBE_MAX_BW_80: Indicates 80 MHz DBE max supported bw
+ * @IEEE80211_UHR_DBE_MAX_BW_160: Indicates 160 MHz DBE max supported bw
+ * @IEEE80211_UHR_DBE_MAX_BW_320: Indicates 320 MHz DBE max supported bw
+ */
+enum ieee80211_uhr_dbe_max_supported_bw {
+	IEEE80211_UHR_DBE_MAX_BW_40 = 1,
+	IEEE80211_UHR_DBE_MAX_BW_80 = 2,
+	IEEE80211_UHR_DBE_MAX_BW_160 = 3,
+	IEEE80211_UHR_DBE_MAX_BW_320 = 4,
+};
+
 struct ieee80211_uhr_cap_mac {
 	u8 mac_cap[5];
 } __packed;
-- 
cgit v1.2.3


From 4059172b2a78a71d15d8fcd8d3fd8ea1ba65d25b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 13 Feb 2026 17:26:47 -0800
Subject: KVM: x86: Move kvm_rebooting to x86

Move kvm_rebooting, which is only read by x86, to KVM x86 so that it can
be moved again to core x86 code.  Add a "shutdown" arch hook to facilate
setting the flag in KVM x86, along with a pile of comments to provide more
context around what KVM x86 is doing and why.

Reviewed-by: Chao Gao <chao.gao@intel.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Tested-by: Chao Gao <chao.gao@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Sagi Shahar <sagis@google.com>
Link: https://patch.msgid.link/20260214012702.2368778-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/x86.c       | 22 ++++++++++++++++++++++
 arch/x86/kvm/x86.h       |  1 +
 include/linux/kvm_host.h |  8 +++++++-
 virt/kvm/kvm_main.c      | 14 +++++++-------
 4 files changed, 37 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a03530795707..7ac3578e6ec0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -700,6 +700,9 @@ static void drop_user_return_notifiers(void)
 		kvm_on_user_return(&msrs->urn);
 }
 
+__visible bool kvm_rebooting;
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_rebooting);
+
 /*
  * Handle a fault on a hardware virtualization (VMX or SVM) instruction.
  *
@@ -13177,6 +13180,25 @@ int kvm_arch_enable_virtualization_cpu(void)
 	return 0;
 }
 
+void kvm_arch_shutdown(void)
+{
+	/*
+	 * Set kvm_rebooting to indicate that KVM has asynchronously disabled
+	 * hardware virtualization, i.e. that errors and/or exceptions on SVM
+	 * and VMX instructions are expected and should be ignored.
+	 */
+	kvm_rebooting = true;
+
+	/*
+	 * Ensure kvm_rebooting is visible before IPIs are sent to other CPUs
+	 * to disable virtualization.  Effectively pairs with the reception of
+	 * the IPI (kvm_rebooting is read in task/exception context, but only
+	 * _needs_ to be read as %true after the IPI function callback disables
+	 * virtualization).
+	 */
+	smp_wmb();
+}
+
 void kvm_arch_disable_virtualization_cpu(void)
 {
 	kvm_x86_call(disable_virtualization_cpu)();
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 94d4f07aaaa0..b314649e5c02 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -54,6 +54,7 @@ struct kvm_host_values {
 	u64 arch_capabilities;
 };
 
+extern bool kvm_rebooting;
 void kvm_spurious_fault(void);
 
 #define SIZE_OF_MEMSLOTS_HASHTABLE \
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 34759a262b28..7c4ebd5210ec 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1627,6 +1627,13 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
 #endif
 
 #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+/*
+ * kvm_arch_shutdown() is invoked immediately prior to forcefully disabling
+ * hardware virtualization on all CPUs via IPI function calls (in preparation
+ * for shutdown or reboot), e.g. to allow arch code to prepare for disabling
+ * virtualization while KVM may be actively running vCPUs.
+ */
+void kvm_arch_shutdown(void);
 /*
  * kvm_arch_{enable,disable}_virtualization() are called on one CPU, under
  * kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of
@@ -2313,7 +2320,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
 
 #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
 extern bool enable_virt_at_load;
-extern bool kvm_rebooting;
 #endif
 
 extern unsigned int halt_poll_ns;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1bc1da66b4b0..d27bf2488b12 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5578,13 +5578,15 @@ bool enable_virt_at_load = true;
 module_param(enable_virt_at_load, bool, 0444);
 EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_virt_at_load);
 
-__visible bool kvm_rebooting;
-EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_rebooting);
-
 static DEFINE_PER_CPU(bool, virtualization_enabled);
 static DEFINE_MUTEX(kvm_usage_lock);
 static int kvm_usage_count;
 
+__weak void kvm_arch_shutdown(void)
+{
+
+}
+
 __weak void kvm_arch_enable_virtualization(void)
 {
 
@@ -5638,10 +5640,9 @@ static int kvm_offline_cpu(unsigned int cpu)
 
 static void kvm_shutdown(void *data)
 {
+	kvm_arch_shutdown();
+
 	/*
-	 * Disable hardware virtualization and set kvm_rebooting to indicate
-	 * that KVM has asynchronously disabled hardware virtualization, i.e.
-	 * that relevant errors and exceptions aren't entirely unexpected.
 	 * Some flavors of hardware virtualization need to be disabled before
 	 * transferring control to firmware (to perform shutdown/reboot), e.g.
 	 * on x86, virtualization can block INIT interrupts, which are used by
@@ -5650,7 +5651,6 @@ static void kvm_shutdown(void *data)
 	 * 100% comprehensive.
 	 */
 	pr_info("kvm: exiting hardware virtualization\n");
-	kvm_rebooting = true;
 	on_each_cpu(kvm_disable_virtualization_cpu, NULL, 1);
 }
 
-- 
cgit v1.2.3


From d30372d0b7e637475c79a785d055f4eb8c863656 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 13 Feb 2026 17:27:01 -0800
Subject: KVM: Bury kvm_{en,dis}able_virtualization() in kvm_main.c once more

Now that TDX handles doing VMXON without KVM's involvement, bury the
top-level APIs to enable and disable virtualization back in kvm_main.c.

No functional change intended.

Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Tested-by: Chao Gao <chao.gao@intel.com>
Tested-by: Sagi Shahar <sagis@google.com>
Link: https://patch.msgid.link/20260214012702.2368778-16-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/kvm_host.h |  8 --------
 virt/kvm/kvm_main.c      | 17 +++++++++++++----
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7c4ebd5210ec..fbd549bdf052 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2613,12 +2613,4 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
 				    struct kvm_pre_fault_memory *range);
 #endif
 
-#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
-int kvm_enable_virtualization(void);
-void kvm_disable_virtualization(void);
-#else
-static inline int kvm_enable_virtualization(void) { return 0; }
-static inline void kvm_disable_virtualization(void) { }
-#endif
-
 #endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d27bf2488b12..a9ccf9a1c41e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1102,6 +1102,9 @@ static inline struct kvm_io_bus *kvm_get_bus_for_destruction(struct kvm *kvm,
 					 !refcount_read(&kvm->users_count));
 }
 
+static int kvm_enable_virtualization(void);
+static void kvm_disable_virtualization(void);
+
 static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
 {
 	struct kvm *kvm = kvm_arch_alloc_vm();
@@ -5689,7 +5692,7 @@ static struct syscore kvm_syscore = {
 	.ops = &kvm_syscore_ops,
 };
 
-int kvm_enable_virtualization(void)
+static int kvm_enable_virtualization(void)
 {
 	int r;
 
@@ -5734,9 +5737,8 @@ err_cpuhp:
 	--kvm_usage_count;
 	return r;
 }
-EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_enable_virtualization);
 
-void kvm_disable_virtualization(void)
+static void kvm_disable_virtualization(void)
 {
 	guard(mutex)(&kvm_usage_lock);
 
@@ -5747,7 +5749,6 @@ void kvm_disable_virtualization(void)
 	cpuhp_remove_state(CPUHP_AP_KVM_ONLINE);
 	kvm_arch_disable_virtualization();
 }
-EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_disable_virtualization);
 
 static int kvm_init_virtualization(void)
 {
@@ -5763,6 +5764,14 @@ static void kvm_uninit_virtualization(void)
 		kvm_disable_virtualization();
 }
 #else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
+static int kvm_enable_virtualization(void)
+{
+	return 0;
+}
+static void kvm_disable_virtualization(void)
+{
+
+}
 static int kvm_init_virtualization(void)
 {
 	return 0;
-- 
cgit v1.2.3


From c66e0f453d1afa82534383c58d503238a43fa76c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 4 Mar 2026 01:27:47 +0000
Subject: net: use ktime_t in struct scm_timestamping_internal

Instead of using struct timespec64 in scm_timestamping_internal,
use ktime_t, saving 24 bytes in kernel stack.

This makes tcp_update_recv_tstamps() small enough to be inlined.

The ktime_t -> timespec64 conversions happen after socket lock
has been released in tcp_recvmsg(), and only if the application
requested them.

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 0/2 grow/shrink: 5/4 up/down: 146/-277 (-131)
Function                                     old     new   delta
tcp_zerocopy_receive                        2383    2425     +42
mptcp_recvmsg                               1565    1607     +42
tcp_recvmsg_locked                          3797    3823     +26
put_cmsg_scm_timestamping64                  131     149     +18
put_cmsg_scm_timestamping                    131     149     +18
__pfx_tcp_update_recv_tstamps                 16       -     -16
do_tcp_getsockopt                           4024    4006     -18
tcp_recv_timestamp                           474     430     -44
tcp_zc_handle_leftover                       417     371     -46
__sock_recv_timestamp                       1087    1031     -56
tcp_update_recv_tstamps                       97       -     -97
Total: Before=25223788, After=25223657, chg -0.00%

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Link: https://patch.msgid.link/20260304012747.881644-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/socket.h |  2 +-
 include/net/tcp.h      | 11 +++++++--
 net/core/scm.c         | 12 ++++++----
 net/ipv4/tcp.c         | 61 ++++++++++++++++++--------------------------------
 net/socket.c           | 23 +++++++++----------
 5 files changed, 51 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index ec715ad4bf25..ec4a0a025793 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -415,7 +415,7 @@ struct __kernel_timespec;
 struct old_timespec32;
 
 struct scm_timestamping_internal {
-	struct timespec64 ts[3];
+	ktime_t ts[3];
 };
 
 extern void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9cf8785ef0b4..fea6081cf6c7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -503,8 +503,15 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		int flags);
 int tcp_set_rcvlowat(struct sock *sk, int val);
 int tcp_set_window_clamp(struct sock *sk, int val);
-void tcp_update_recv_tstamps(struct sk_buff *skb,
-			     struct scm_timestamping_internal *tss);
+
+static inline void
+tcp_update_recv_tstamps(struct sk_buff *skb,
+			struct scm_timestamping_internal *tss)
+{
+	tss->ts[0] = skb->tstamp;
+	tss->ts[2] = skb_hwtstamps(skb)->hwtstamp;
+}
+
 void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
 			struct scm_timestamping_internal *tss);
 void tcp_data_ready(struct sock *sk);
diff --git a/net/core/scm.c b/net/core/scm.c
index a29aa8fb8065..eec13f50ecaf 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -318,8 +318,10 @@ void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_int
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(tss.ts); i++) {
-		tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec;
-		tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec;
+		struct timespec64 tv = ktime_to_timespec64(tss_internal->ts[i]);
+
+		tss.ts[i].tv_sec = tv.tv_sec;
+		tss.ts[i].tv_nsec = tv.tv_nsec;
 	}
 
 	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_NEW, sizeof(tss), &tss);
@@ -332,8 +334,10 @@ void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_inter
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(tss.ts); i++) {
-		tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec;
-		tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec;
+		struct timespec64 tv = ktime_to_timespec64(tss_internal->ts[i]);
+
+		tss.ts[i].tv_sec = tv.tv_sec;
+		tss.ts[i].tv_nsec = tv.tv_nsec;
 	}
 
 	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_OLD, sizeof(tss), &tss);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5997e0fb7a45..1c8be22a361e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1871,20 +1871,6 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
 }
 EXPORT_IPV6_MOD(tcp_set_rcvlowat);
 
-void tcp_update_recv_tstamps(struct sk_buff *skb,
-			     struct scm_timestamping_internal *tss)
-{
-	if (skb->tstamp)
-		tss->ts[0] = ktime_to_timespec64(skb->tstamp);
-	else
-		tss->ts[0] = (struct timespec64) {0};
-
-	if (skb_hwtstamps(skb)->hwtstamp)
-		tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp);
-	else
-		tss->ts[2] = (struct timespec64) {0};
-}
-
 #ifdef CONFIG_MMU
 static const struct vm_operations_struct tcp_vm_ops = {
 };
@@ -2376,22 +2362,23 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
 {
 	int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
 	u32 tsflags = READ_ONCE(sk->sk_tsflags);
-	bool has_timestamping = false;
 
-	if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
+	if (tss->ts[0]) {
 		if (sock_flag(sk, SOCK_RCVTSTAMP)) {
+			struct timespec64 tv = ktime_to_timespec64(tss->ts[0]);
+
 			if (sock_flag(sk, SOCK_RCVTSTAMPNS)) {
 				if (new_tstamp) {
 					struct __kernel_timespec kts = {
-						.tv_sec = tss->ts[0].tv_sec,
-						.tv_nsec = tss->ts[0].tv_nsec,
+						.tv_sec = tv.tv_sec,
+						.tv_nsec = tv.tv_nsec,
 					};
 					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
 						 sizeof(kts), &kts);
 				} else {
 					struct __kernel_old_timespec ts_old = {
-						.tv_sec = tss->ts[0].tv_sec,
-						.tv_nsec = tss->ts[0].tv_nsec,
+						.tv_sec = tv.tv_sec,
+						.tv_nsec = tv.tv_nsec,
 					};
 					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
 						 sizeof(ts_old), &ts_old);
@@ -2399,41 +2386,37 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
 			} else {
 				if (new_tstamp) {
 					struct __kernel_sock_timeval stv = {
-						.tv_sec = tss->ts[0].tv_sec,
-						.tv_usec = tss->ts[0].tv_nsec / 1000,
+						.tv_sec = tv.tv_sec,
+						.tv_usec = tv.tv_nsec / 1000,
 					};
 					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
 						 sizeof(stv), &stv);
 				} else {
-					struct __kernel_old_timeval tv = {
-						.tv_sec = tss->ts[0].tv_sec,
-						.tv_usec = tss->ts[0].tv_nsec / 1000,
+					struct __kernel_old_timeval otv = {
+						.tv_sec = tv.tv_sec,
+						.tv_usec = tv.tv_nsec / 1000,
 					};
 					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
-						 sizeof(tv), &tv);
+						 sizeof(otv), &otv);
 				}
 			}
 		}
 
-		if (tsflags & SOF_TIMESTAMPING_SOFTWARE &&
+		if (!(tsflags & SOF_TIMESTAMPING_SOFTWARE &&
 		    (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE ||
-		     !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER)))
-			has_timestamping = true;
-		else
-			tss->ts[0] = (struct timespec64) {0};
+		     !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))))
+			tss->ts[0] = 0;
 	}
 
-	if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
-		if (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE &&
+	if (tss->ts[2]) {
+		if (!(tsflags & SOF_TIMESTAMPING_RAW_HARDWARE &&
 		    (tsflags & SOF_TIMESTAMPING_RX_HARDWARE ||
-		     !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER)))
-			has_timestamping = true;
-		else
-			tss->ts[2] = (struct timespec64) {0};
+		     !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))))
+			tss->ts[2] = 0;
 	}
 
-	if (has_timestamping) {
-		tss->ts[1] = (struct timespec64) {0};
+	if (tss->ts[0] | tss->ts[2]) {
+		tss->ts[1] = 0;
 		if (sock_flag(sk, SOCK_TSTAMP_NEW))
 			put_cmsg_scm_timestamping64(msg, tss);
 		else
diff --git a/net/socket.c b/net/socket.c
index 05952188127f..68829d09bcf1 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -912,11 +912,10 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 {
 	int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
 	int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
-	struct scm_timestamping_internal tss;
-	int empty = 1, false_tstamp = 0;
 	struct skb_shared_hwtstamps *shhwtstamps =
 		skb_hwtstamps(skb);
-	int if_index;
+	struct scm_timestamping_internal tss;
+	int if_index, false_tstamp = 0;
 	ktime_t hwtstamp;
 	u32 tsflags;
 
@@ -961,12 +960,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 
 	memset(&tss, 0, sizeof(tss));
 	tsflags = READ_ONCE(sk->sk_tsflags);
-	if ((tsflags & SOF_TIMESTAMPING_SOFTWARE &&
-	     (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE ||
-	      skb_is_err_queue(skb) ||
-	      !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) &&
-	    ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
-		empty = 0;
+	if (tsflags & SOF_TIMESTAMPING_SOFTWARE &&
+	    (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE ||
+	    skb_is_err_queue(skb) ||
+	    !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER)))
+		tss.ts[0] = skb->tstamp;
+
 	if (shhwtstamps &&
 	    (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE &&
 	     (tsflags & SOF_TIMESTAMPING_RX_HARDWARE ||
@@ -983,15 +982,15 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 			hwtstamp = ptp_convert_timestamp(&hwtstamp,
 							 READ_ONCE(sk->sk_bind_phc));
 
-		if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
-			empty = 0;
+		if (hwtstamp) {
+			tss.ts[2] = hwtstamp;
 
 			if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
 			    !skb_is_err_queue(skb))
 				put_ts_pktinfo(msg, skb, if_index);
 		}
 	}
-	if (!empty) {
+	if (tss.ts[0] | tss.ts[2]) {
 		if (sock_flag(sk, SOCK_TSTAMP_NEW))
 			put_cmsg_scm_timestamping64(msg, &tss);
 		else
-- 
cgit v1.2.3


From 01b7768578a68abe597cfb36ebe0fc47c9305f88 Mon Sep 17 00:00:00 2001
From: Maher Sanalla <msanalla@nvidia.com>
Date: Wed, 25 Feb 2026 16:19:31 +0200
Subject: net/mlx5: Add TLP emulation device capabilities

Introduce the hardware structures and definitions needed for the driver
support of TLP emulation in mlx5_ifc.

Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 775cb0c56865..a3948b36820d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1389,6 +1389,26 @@ struct mlx5_ifc_virtio_emulation_cap_bits {
 	u8         reserved_at_1c0[0x640];
 };
 
+struct mlx5_ifc_tlp_dev_emu_capabilities_bits {
+	u8         reserved_at_0[0x20];
+
+	u8         reserved_at_20[0x13];
+	u8         log_tlp_rsp_gw_page_stride[0x5];
+	u8         reserved_at_38[0x8];
+
+	u8         reserved_at_40[0xc0];
+
+	u8         reserved_at_100[0xc];
+	u8         tlp_rsp_gw_num_pages[0x4];
+	u8         reserved_at_110[0x10];
+
+	u8         reserved_at_120[0xa0];
+
+	u8         tlp_rsp_gw_pages_bar_offset[0x40];
+
+	u8         reserved_at_200[0x600];
+};
+
 enum {
 	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE     = 0x0,
 	MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES    = 0x2,
@@ -1961,7 +1981,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_rqt[0x5];
 	u8         reserved_at_390[0x3];
 	u8         log_max_rqt_size[0x5];
-	u8         reserved_at_398[0x1];
+	u8         tlp_device_emulation_manager[0x1];
 	u8	   vnic_env_cnt_bar_uar_access[0x1];
 	u8	   vnic_env_cnt_odp_page_fault[0x1];
 	u8         log_max_tis_per_sq[0x5];
@@ -3830,6 +3850,7 @@ union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_tls_cap_bits tls_cap;
 	struct mlx5_ifc_device_mem_cap_bits device_mem_cap;
 	struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap;
+	struct mlx5_ifc_tlp_dev_emu_capabilities_bits tlp_dev_emu_capabilities;
 	struct mlx5_ifc_macsec_cap_bits macsec_cap;
 	struct mlx5_ifc_crypto_cap_bits crypto_cap;
 	struct mlx5_ifc_ipsec_cap_bits ipsec_cap;
-- 
cgit v1.2.3


From 385a06f74ff7a03e3fb0b15fb87cfeb052d75073 Mon Sep 17 00:00:00 2001
From: Maher Sanalla <msanalla@nvidia.com>
Date: Wed, 25 Feb 2026 16:19:32 +0200
Subject: net/mlx5: Expose TLP emulation capabilities

Expose and query TLP device emulation caps on driver load.

Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw.c   | 6 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 +
 include/linux/mlx5/device.h                    | 9 +++++++++
 3 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index eeb4437975f2..55249f405841 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -255,6 +255,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 			return err;
 	}
 
+	if (MLX5_CAP_GEN(dev, tlp_device_emulation_manager)) {
+		err = mlx5_core_get_caps_mode(dev, MLX5_CAP_TLP_EMULATION, HCA_CAP_OPMOD_GET_CUR);
+		if (err)
+			return err;
+	}
+
 	if (MLX5_CAP_GEN(dev, ipsec_offload)) {
 		err = mlx5_core_get_caps_mode(dev, MLX5_CAP_IPSEC, HCA_CAP_OPMOD_GET_CUR);
 		if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index fdc3ba20912e..b0bc4a7d4a93 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1772,6 +1772,7 @@ static const int types[] = {
 	MLX5_CAP_CRYPTO,
 	MLX5_CAP_SHAMPO,
 	MLX5_CAP_ADV_RDMA,
+	MLX5_CAP_TLP_EMULATION,
 };
 
 static void mlx5_hca_caps_free(struct mlx5_core_dev *dev)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index b37fe39cef27..25c6b42140b2 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1259,6 +1259,7 @@ enum mlx5_cap_type {
 	MLX5_CAP_PORT_SELECTION = 0x25,
 	MLX5_CAP_ADV_VIRTUALIZATION = 0x26,
 	MLX5_CAP_ADV_RDMA = 0x28,
+	MLX5_CAP_TLP_EMULATION = 0x2a,
 	/* NUM OF CAP Types */
 	MLX5_CAP_NUM
 };
@@ -1481,6 +1482,14 @@ enum mlx5_qcam_feature_groups {
 	MLX5_GET64(virtio_emulation_cap, \
 		(mdev)->caps.hca[MLX5_CAP_VDPA_EMULATION]->cur, cap)
 
+#define MLX5_CAP_DEV_TLP_EMULATION(mdev, cap)\
+	MLX5_GET(tlp_dev_emu_capabilities, \
+		(mdev)->caps.hca[MLX5_CAP_TLP_EMULATION]->cur, cap)
+
+#define MLX5_CAP64_DEV_TLP_EMULATION(mdev, cap)\
+	MLX5_GET64(tlp_dev_emu_capabilities, \
+		(mdev)->caps.hca[MLX5_CAP_TLP_EMULATION]->cur, cap)
+
 #define MLX5_CAP_IPSEC(mdev, cap)\
 	MLX5_GET(ipsec_cap, (mdev)->caps.hca[MLX5_CAP_IPSEC]->cur, cap)
 
-- 
cgit v1.2.3


From 90503f9ffee927c3abdc94a4862d13ae71ea9442 Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Thu, 12 Feb 2026 15:30:39 -0800
Subject: powercap: intel_rapl: Use unit conversion macros from units.h

Replace hardcoded numeric constants with standard unit conversion
macros from linux/units.h for better code clarity and
self-documentation.

Add MICROJOULE_PER_JOULE and NANOJOULE_PER_JOULE to units.h to
support energy unit conversions, following the existing pattern
for power units.

No functional changes.

Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://patch.msgid.link/20260212233044.329790-8-sathyanarayanan.kuppuswamy@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c | 19 ++++++++++---------
 include/linux/units.h                |  3 +++
 2 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 8dde3b0eb454..380893baf987 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -24,6 +24,7 @@
 #include <linux/suspend.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
+#include <linux/units.h>
 
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
@@ -964,13 +965,13 @@ static int rapl_check_unit_core(struct rapl_domain *rd)
 	}
 
 	value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
-	rd->energy_unit = (ENERGY_UNIT_SCALE * 1000000) >> value;
+	rd->energy_unit = (ENERGY_UNIT_SCALE * MICROJOULE_PER_JOULE) >> value;
 
 	value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
-	rd->power_unit = 1000000 >> value;
+	rd->power_unit = MICROWATT_PER_WATT >> value;
 
 	value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
-	rd->time_unit = 1000000 >> value;
+	rd->time_unit = USEC_PER_SEC >> value;
 
 	pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
 		 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
@@ -995,10 +996,10 @@ static int rapl_check_unit_atom(struct rapl_domain *rd)
 	rd->energy_unit = ENERGY_UNIT_SCALE * (1ULL << value);
 
 	value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
-	rd->power_unit = (1ULL << value) * 1000;
+	rd->power_unit = (1ULL << value) * MILLIWATT_PER_WATT;
 
 	value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
-	rd->time_unit = 1000000 >> value;
+	rd->time_unit = USEC_PER_SEC >> value;
 
 	pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n",
 		 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
@@ -1169,13 +1170,13 @@ static int rapl_check_unit_tpmi(struct rapl_domain *rd)
 	}
 
 	value = (ra.value & TPMI_ENERGY_UNIT_MASK) >> TPMI_ENERGY_UNIT_OFFSET;
-	rd->energy_unit = (ENERGY_UNIT_SCALE * 1000000) >> value;
+	rd->energy_unit = (ENERGY_UNIT_SCALE * MICROJOULE_PER_JOULE) >> value;
 
 	value = (ra.value & TPMI_POWER_UNIT_MASK) >> TPMI_POWER_UNIT_OFFSET;
-	rd->power_unit = 1000000 >> value;
+	rd->power_unit = MICROWATT_PER_WATT >> value;
 
 	value = (ra.value & TPMI_TIME_UNIT_MASK) >> TPMI_TIME_UNIT_OFFSET;
-	rd->time_unit = 1000000 >> value;
+	rd->time_unit = USEC_PER_SEC >> value;
 
 	pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
 		 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
@@ -1208,7 +1209,7 @@ static const struct rapl_defaults rapl_defaults_spr_server = {
 	.check_unit = rapl_check_unit_core,
 	.set_floor_freq = set_floor_freq_default,
 	.compute_time_window = rapl_compute_time_window_core,
-	.psys_domain_energy_unit = 1000000000,
+	.psys_domain_energy_unit = NANOJOULE_PER_JOULE,
 	.spr_psys_bits = true,
 };
 
diff --git a/include/linux/units.h b/include/linux/units.h
index 80d57c50b9e3..c6d78988613a 100644
--- a/include/linux/units.h
+++ b/include/linux/units.h
@@ -57,6 +57,9 @@
 #define MICROWATT_PER_MILLIWATT	1000UL
 #define MICROWATT_PER_WATT	1000000UL
 
+#define MICROJOULE_PER_JOULE	1000000UL
+#define NANOJOULE_PER_JOULE	1000000000UL
+
 #define BYTES_PER_KBIT		(KILO / BITS_PER_BYTE)
 #define BYTES_PER_MBIT		(MEGA / BITS_PER_BYTE)
 #define BYTES_PER_GBIT		(GIGA / BITS_PER_BYTE)
-- 
cgit v1.2.3


From d7ca7d1488cc916dbf0a6a594abbda81d4eaeee9 Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Thu, 12 Feb 2026 15:30:40 -0800
Subject: powercap: intel_rapl: Allow interface drivers to configure
 rapl_defaults

RAPL default settings vary across different RAPL interfaces (MSR, TPMI,
MMIO). Currently, these defaults are stored in the common RAPL driver,
which requires interface-specific handling logic and makes the common
layer unnecessarily complex. There is no strong reason for the common
code to own these defaults, since they are inherently
interface-specific.

To prepare for moving default configuration into the individual
interface drivers,

 1. Move struct rapl_defaults into a shared header so that interface
    drivers can directly populate their own default settings.

 2. Change the @defaults field in struct rapl_if_priv from void * to
    const struct rapl_defaults * to improve type safety and readability
    and update the common driver to use the typed defaults structure.

 3. Update all internal getter functions and local pointers to use
    const struct rapl_defaults * to maintain const-correctness.

 4. Rename and export the common helper functions (check_unit,
    set_floor_freq, compute_time_window) so interface drivers may
    reuse or override them as appropriate.

No functional changes. This is a preparatory refactoring to allow
interface drivers to supply their own RAPL default settings.

Co-developed-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://patch.msgid.link/20260212233044.329790-9-sathyanarayanan.kuppuswamy@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c | 64 ++++++++++++++++--------------------
 include/linux/intel_rapl.h           | 17 ++++++++--
 2 files changed, 43 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 380893baf987..7c95eb658c16 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -221,20 +221,10 @@ static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim)
 #define power_zone_to_rapl_domain(_zone) \
 	container_of(_zone, struct rapl_domain, power_zone)
 
-struct rapl_defaults {
-	u8 floor_freq_reg_addr;
-	int (*check_unit)(struct rapl_domain *rd);
-	void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
-	u64 (*compute_time_window)(struct rapl_domain *rd, u64 val,
-				    bool to_raw);
-	unsigned int dram_domain_energy_unit;
-	unsigned int psys_domain_energy_unit;
-	bool spr_psys_bits;
-};
-static struct rapl_defaults *defaults_msr;
+static const struct rapl_defaults *defaults_msr;
 static const struct rapl_defaults defaults_tpmi;
 
-static struct rapl_defaults *get_defaults(struct rapl_package *rp)
+static const struct rapl_defaults *get_defaults(struct rapl_package *rp)
 {
 	return rp->priv->defaults;
 }
@@ -351,7 +341,7 @@ static int find_nr_power_limit(struct rapl_domain *rd)
 static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
 {
 	struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
-	struct rapl_defaults *defaults = get_defaults(rd->rp);
+	const struct rapl_defaults *defaults = get_defaults(rd->rp);
 	u64 val;
 	int ret;
 
@@ -640,7 +630,7 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
 			   u64 value, int to_raw)
 {
 	u64 units = 1;
-	struct rapl_defaults *defaults = get_defaults(rd->rp);
+	const struct rapl_defaults *defaults = get_defaults(rd->rp);
 	u64 scale = 1;
 
 	switch (type) {
@@ -785,11 +775,11 @@ static int rapl_config(struct rapl_package *rp)
 	/* MMIO I/F shares the same register layout as MSR registers */
 	case RAPL_IF_MMIO:
 	case RAPL_IF_MSR:
-		rp->priv->defaults = (void *)defaults_msr;
+		rp->priv->defaults = defaults_msr;
 		rp->priv->rpi = (void *)rpi_msr;
 		break;
 	case RAPL_IF_TPMI:
-		rp->priv->defaults = (void *)&defaults_tpmi;
+		rp->priv->defaults = &defaults_tpmi;
 		rp->priv->rpi = (void *)rpi_tpmi;
 		break;
 	default:
@@ -806,7 +796,7 @@ static int rapl_config(struct rapl_package *rp)
 static enum rapl_primitives
 prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
 {
-	struct rapl_defaults *defaults = get_defaults(rd->rp);
+	const struct rapl_defaults *defaults = get_defaults(rd->rp);
 
 	if (!defaults->spr_psys_bits)
 		return prim;
@@ -951,7 +941,7 @@ static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
  * power unit : microWatts  : Represented in milliWatts by default
  * time unit  : microseconds: Represented in seconds by default
  */
-static int rapl_check_unit_core(struct rapl_domain *rd)
+int rapl_default_check_unit(struct rapl_domain *rd)
 {
 	struct reg_action ra;
 	u32 value;
@@ -978,6 +968,7 @@ static int rapl_check_unit_core(struct rapl_domain *rd)
 
 	return 0;
 }
+EXPORT_SYMBOL_NS_GPL(rapl_default_check_unit, "INTEL_RAPL");
 
 static int rapl_check_unit_atom(struct rapl_domain *rd)
 {
@@ -1071,7 +1062,7 @@ static void package_power_limit_irq_restore(struct rapl_package *rp)
 	wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
 }
 
-static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
+void rapl_default_set_floor_freq(struct rapl_domain *rd, bool mode)
 {
 	int i;
 
@@ -1085,11 +1076,12 @@ static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
 		rapl_write_pl_data(rd, i, PL_CLAMP, mode);
 	}
 }
+EXPORT_SYMBOL_NS_GPL(rapl_default_set_floor_freq, "INTEL_RAPL");
 
 static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
 {
 	static u32 power_ctrl_orig_val;
-	struct rapl_defaults *defaults = get_defaults(rd->rp);
+	const struct rapl_defaults *defaults = get_defaults(rd->rp);
 	u32 mdata;
 
 	if (!defaults->floor_freq_reg_addr) {
@@ -1110,8 +1102,7 @@ static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
 		       defaults->floor_freq_reg_addr, mdata);
 }
 
-static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value,
-					 bool to_raw)
+u64 rapl_default_compute_time_window(struct rapl_domain *rd, u64 value, bool to_raw)
 {
 	u64 f, y;		/* fraction and exp. used for time unit */
 
@@ -1142,6 +1133,7 @@ static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value,
 	}
 	return value;
 }
+EXPORT_SYMBOL_NS_GPL(rapl_default_compute_time_window, "INTEL_RAPL");
 
 static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value,
 					 bool to_raw)
@@ -1187,28 +1179,28 @@ static int rapl_check_unit_tpmi(struct rapl_domain *rd)
 static const struct rapl_defaults defaults_tpmi = {
 	.check_unit = rapl_check_unit_tpmi,
 	/* Reuse existing logic, ignore the PL_CLAMP failures and enable all Power Limits */
-	.set_floor_freq = set_floor_freq_default,
-	.compute_time_window = rapl_compute_time_window_core,
+	.set_floor_freq = rapl_default_set_floor_freq,
+	.compute_time_window = rapl_default_compute_time_window,
 };
 
 static const struct rapl_defaults rapl_defaults_core = {
 	.floor_freq_reg_addr = 0,
-	.check_unit = rapl_check_unit_core,
-	.set_floor_freq = set_floor_freq_default,
-	.compute_time_window = rapl_compute_time_window_core,
+	.check_unit = rapl_default_check_unit,
+	.set_floor_freq = rapl_default_set_floor_freq,
+	.compute_time_window = rapl_default_compute_time_window,
 };
 
 static const struct rapl_defaults rapl_defaults_hsw_server = {
-	.check_unit = rapl_check_unit_core,
-	.set_floor_freq = set_floor_freq_default,
-	.compute_time_window = rapl_compute_time_window_core,
+	.check_unit = rapl_default_check_unit,
+	.set_floor_freq = rapl_default_set_floor_freq,
+	.compute_time_window = rapl_default_compute_time_window,
 	.dram_domain_energy_unit = 15300,
 };
 
 static const struct rapl_defaults rapl_defaults_spr_server = {
-	.check_unit = rapl_check_unit_core,
-	.set_floor_freq = set_floor_freq_default,
-	.compute_time_window = rapl_compute_time_window_core,
+	.check_unit = rapl_default_check_unit,
+	.set_floor_freq = rapl_default_set_floor_freq,
+	.compute_time_window = rapl_default_compute_time_window,
 	.psys_domain_energy_unit = NANOJOULE_PER_JOULE,
 	.spr_psys_bits = true,
 };
@@ -1242,7 +1234,7 @@ static const struct rapl_defaults rapl_defaults_cht = {
 };
 
 static const struct rapl_defaults rapl_defaults_amd = {
-	.check_unit = rapl_check_unit_core,
+	.check_unit = rapl_default_check_unit,
 };
 
 static const struct x86_cpu_id rapl_ids[] __initconst = {
@@ -1448,7 +1440,7 @@ static int rapl_check_domain(int domain, struct rapl_package *rp)
  */
 static int rapl_get_domain_unit(struct rapl_domain *rd)
 {
-	struct rapl_defaults *defaults = get_defaults(rd->rp);
+	const struct rapl_defaults *defaults = get_defaults(rd->rp);
 	int ret;
 
 	if (!rd->regs[RAPL_DOMAIN_REG_UNIT].val) {
@@ -2341,7 +2333,7 @@ static int __init rapl_init(void)
 
 	id = x86_match_cpu(rapl_ids);
 	if (id) {
-		defaults_msr = (struct rapl_defaults *)id->driver_data;
+		defaults_msr = (const struct rapl_defaults *)id->driver_data;
 
 		rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0);
 		if (!rapl_msr_platdev)
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index fa1f328d6712..6d694099a3ad 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -128,6 +128,16 @@ struct reg_action {
 	int err;
 };
 
+struct rapl_defaults {
+	u8 floor_freq_reg_addr;
+	int (*check_unit)(struct rapl_domain *rd);
+	void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
+	u64 (*compute_time_window)(struct rapl_domain *rd, u64 val, bool to_raw);
+	unsigned int dram_domain_energy_unit;
+	unsigned int psys_domain_energy_unit;
+	bool spr_psys_bits;
+};
+
 /**
  * struct rapl_if_priv: private data for different RAPL interfaces
  * @control_type:		Each RAPL interface must have its own powercap
@@ -142,7 +152,7 @@ struct reg_action {
  *				registers.
  * @write_raw:			Callback for writing RAPL interface specific
  *				registers.
- * @defaults:			internal pointer to interface default settings
+ * @defaults:			pointer to default settings
  * @rpi:			internal pointer to interface primitive info
  */
 struct rapl_if_priv {
@@ -154,7 +164,7 @@ struct rapl_if_priv {
 	int limits[RAPL_DOMAIN_MAX];
 	int (*read_raw)(int id, struct reg_action *ra, bool pmu_ctx);
 	int (*write_raw)(int id, struct reg_action *ra);
-	void *defaults;
+	const struct rapl_defaults *defaults;
 	void *rpi;
 };
 
@@ -211,6 +221,9 @@ void rapl_remove_package_cpuslocked(struct rapl_package *rp);
 struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu);
 struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu);
 void rapl_remove_package(struct rapl_package *rp);
+int rapl_default_check_unit(struct rapl_domain *rd);
+void rapl_default_set_floor_freq(struct rapl_domain *rd, bool mode);
+u64 rapl_default_compute_time_window(struct rapl_domain *rd, u64 value, bool to_raw);
 
 #ifdef CONFIG_PERF_EVENTS
 int rapl_package_add_pmu(struct rapl_package *rp);
-- 
cgit v1.2.3


From cc39325f927850473d3a84b029ae6f9b508e9bd1 Mon Sep 17 00:00:00 2001
From: Mohsin Bashir <mohsin.bashr@gmail.com>
Date: Mon, 2 Mar 2026 15:01:45 -0800
Subject: net: ethtool: Track pause storm events

With TX pause enabled, if a device is unable to pass packets up to the
stack (e.g., CPU is hanged), the device can cause pause storm. Given
that devices can have native support to protect the neighbor from such
flooding, such events need some tracking. This support is to track TX
pause storm events for better observability.

Reviewed-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Mohsin Bashir <mohsin.bashr@gmail.com>
Link: https://patch.msgid.link/20260302230149.1580195-2-mohsin.bashr@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/netlink/specs/ethtool.yaml       | 13 +++++++++++++
 include/linux/ethtool.h                        |  2 ++
 include/uapi/linux/ethtool_netlink_generated.h |  1 +
 net/ethtool/pause.c                            |  4 +++-
 4 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 0a2d2343f79a..4707063af3b4 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -879,6 +879,19 @@ attribute-sets:
       -
         name: rx-frames
         type: u64
+      -
+        name: tx-pause-storm-events
+        type: u64
+        doc: >-
+            TX pause storm event count. Increments each time device
+            detects that its pause assertion condition has been true
+            for too long for normal operation. As a result, the device
+            has temporarily disabled its own Pause TX function to
+            protect the network from itself.
+            This counter should never increment under normal overload
+            conditions; it indicates catastrophic failure like an OS
+            crash. The rate of incrementing is implementation specific.
+
   -
     name: pause
     attr-cnt-name: __ethtool-a-pause-cnt
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 798abec67a1b..83c375840835 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -512,12 +512,14 @@ struct ethtool_eth_ctrl_stats {
  *
  *	Equivalent to `30.3.4.3 aPAUSEMACCtrlFramesReceived`
  *	from the standard.
+ * @tx_pause_storm_events: TX pause storm event count (see ethtool.yaml).
  */
 struct ethtool_pause_stats {
 	enum ethtool_mac_stats_src src;
 	struct_group(stats,
 		u64 tx_pause_frames;
 		u64 rx_pause_frames;
+		u64 tx_pause_storm_events;
 	);
 };
 
diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h
index 556a0c834df5..114b83017297 100644
--- a/include/uapi/linux/ethtool_netlink_generated.h
+++ b/include/uapi/linux/ethtool_netlink_generated.h
@@ -381,6 +381,7 @@ enum {
 	ETHTOOL_A_PAUSE_STAT_PAD,
 	ETHTOOL_A_PAUSE_STAT_TX_FRAMES,
 	ETHTOOL_A_PAUSE_STAT_RX_FRAMES,
+	ETHTOOL_A_PAUSE_STAT_TX_PAUSE_STORM_EVENTS,
 
 	__ETHTOOL_A_PAUSE_STAT_CNT,
 	ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1)
diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c
index 0f9af1e66548..5d28f642764c 100644
--- a/net/ethtool/pause.c
+++ b/net/ethtool/pause.c
@@ -130,7 +130,9 @@ static int pause_put_stats(struct sk_buff *skb,
 	if (ethtool_put_stat(skb, pause_stats->tx_pause_frames,
 			     ETHTOOL_A_PAUSE_STAT_TX_FRAMES, pad) ||
 	    ethtool_put_stat(skb, pause_stats->rx_pause_frames,
-			     ETHTOOL_A_PAUSE_STAT_RX_FRAMES, pad))
+			     ETHTOOL_A_PAUSE_STAT_RX_FRAMES, pad) ||
+	    ethtool_put_stat(skb, pause_stats->tx_pause_storm_events,
+			     ETHTOOL_A_PAUSE_STAT_TX_PAUSE_STORM_EVENTS, pad))
 		goto err_cancel;
 
 	nla_nest_end(skb, nest);
-- 
cgit v1.2.3


From 31a6a07eefeb4c84bd6730fbe9e95fd9221712cf Mon Sep 17 00:00:00 2001
From: Coiby Xu <coxu@redhat.com>
Date: Fri, 13 Feb 2026 09:28:46 +0800
Subject: integrity: Make arch_ima_get_secureboot integrity-wide

EVM and other LSMs need the ability to query the secure boot status of
the system, without directly calling the IMA arch_ima_get_secureboot
function. Refactor the secure boot status check into a general function
named arch_get_secureboot.

Reported-and-suggested-by: Mimi Zohar <zohar@linux.ibm.com>
Suggested-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Coiby Xu <coxu@redhat.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 MAINTAINERS                                   |  1 +
 arch/powerpc/kernel/ima_arch.c                |  5 ---
 arch/powerpc/kernel/secure_boot.c             |  6 +++
 arch/s390/kernel/ima_arch.c                   |  6 ---
 arch/s390/kernel/ipl.c                        |  5 +++
 arch/x86/include/asm/efi.h                    |  4 +-
 arch/x86/platform/efi/efi.c                   |  2 +-
 include/linux/ima.h                           |  7 +---
 include/linux/secure_boot.h                   | 19 +++++++++
 security/integrity/Makefile                   |  3 +-
 security/integrity/efi_secureboot.c           | 56 +++++++++++++++++++++++++++
 security/integrity/ima/ima_appraise.c         |  2 +-
 security/integrity/ima/ima_efi.c              | 47 +---------------------
 security/integrity/ima/ima_main.c             |  3 +-
 security/integrity/integrity.h                |  1 +
 security/integrity/platform_certs/load_uefi.c |  2 +-
 security/integrity/secure_boot.c              | 16 ++++++++
 17 files changed, 115 insertions(+), 70 deletions(-)
 create mode 100644 include/linux/secure_boot.h
 create mode 100644 security/integrity/efi_secureboot.c
 create mode 100644 security/integrity/secure_boot.c

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 61bf550fd37c..04823afa8b74 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12668,6 +12668,7 @@ R:	Eric Snowberg <eric.snowberg@oracle.com>
 L:	linux-integrity@vger.kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
+F:	include/linux/secure_boot.h
 F:	security/integrity/
 F:	security/integrity/ima/
 
diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c
index b7029beed847..0d8892a03526 100644
--- a/arch/powerpc/kernel/ima_arch.c
+++ b/arch/powerpc/kernel/ima_arch.c
@@ -7,11 +7,6 @@
 #include <linux/ima.h>
 #include <asm/secure_boot.h>
 
-bool arch_ima_get_secureboot(void)
-{
-	return is_ppc_secureboot_enabled();
-}
-
 /*
  * The "secure_rules" are enabled only on "secureboot" enabled systems.
  * These rules verify the file signatures against known good values.
diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c
index 3a28795b4ed8..28436c1599e0 100644
--- a/arch/powerpc/kernel/secure_boot.c
+++ b/arch/powerpc/kernel/secure_boot.c
@@ -5,6 +5,7 @@
  */
 #include <linux/types.h>
 #include <linux/of.h>
+#include <linux/secure_boot.h>
 #include <linux/string_choices.h>
 #include <asm/secure_boot.h>
 
@@ -44,6 +45,11 @@ out:
 	return enabled;
 }
 
+bool arch_get_secureboot(void)
+{
+	return is_ppc_secureboot_enabled();
+}
+
 bool is_ppc_trustedboot_enabled(void)
 {
 	struct device_node *node;
diff --git a/arch/s390/kernel/ima_arch.c b/arch/s390/kernel/ima_arch.c
index f3c3e6e1c5d3..6ccbe34ce408 100644
--- a/arch/s390/kernel/ima_arch.c
+++ b/arch/s390/kernel/ima_arch.c
@@ -1,12 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <linux/ima.h>
-#include <asm/boot_data.h>
-
-bool arch_ima_get_secureboot(void)
-{
-	return ipl_secure_flag;
-}
 
 const char * const *arch_get_ima_policy(void)
 {
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 049c557c452f..bdbbedf52580 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2504,6 +2504,11 @@ out:
 	return buf;
 }
 
+bool arch_get_secureboot(void)
+{
+	return ipl_secure_flag;
+}
+
 int ipl_report_free(struct ipl_report *report)
 {
 	struct ipl_report_component *comp, *ncomp;
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index f227a70ac91f..ee382b56dd7b 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -401,9 +401,9 @@ extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
 extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap,
 				     void *buf, struct efi_mem_range *mem);
 
-extern enum efi_secureboot_mode __x86_ima_efi_boot_mode(void);
+enum efi_secureboot_mode __x86_efi_boot_mode(void);
 
-#define arch_ima_efi_boot_mode	__x86_ima_efi_boot_mode()
+#define arch_efi_boot_mode __x86_efi_boot_mode()
 
 #ifdef CONFIG_EFI_RUNTIME_MAP
 int efi_get_runtime_map_size(void);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index d00c6de7f3b7..74032f3ab9b0 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -920,7 +920,7 @@ umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n)
 	return attr->mode;
 }
 
-enum efi_secureboot_mode __x86_ima_efi_boot_mode(void)
+enum efi_secureboot_mode __x86_efi_boot_mode(void)
 {
 	return boot_params.secure_boot;
 }
diff --git a/include/linux/ima.h b/include/linux/ima.h
index abf8923f8fc5..8e08baf16c2f 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -11,6 +11,7 @@
 #include <linux/fs.h>
 #include <linux/security.h>
 #include <linux/kexec.h>
+#include <linux/secure_boot.h>
 #include <crypto/hash_info.h>
 struct linux_binprm;
 
@@ -73,14 +74,8 @@ int ima_validate_range(phys_addr_t phys, size_t size);
 #endif
 
 #ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT
-extern bool arch_ima_get_secureboot(void);
 extern const char * const *arch_get_ima_policy(void);
 #else
-static inline bool arch_ima_get_secureboot(void)
-{
-	return false;
-}
-
 static inline const char * const *arch_get_ima_policy(void)
 {
 	return NULL;
diff --git a/include/linux/secure_boot.h b/include/linux/secure_boot.h
new file mode 100644
index 000000000000..3ded3f03655c
--- /dev/null
+++ b/include/linux/secure_boot.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2026 Red Hat, Inc. All Rights Reserved.
+ *
+ * Author: Coiby Xu <coxu@redhat.com>
+ */
+
+#ifndef _LINUX_SECURE_BOOT_H
+#define _LINUX_SECURE_BOOT_H
+
+#include <linux/types.h>
+
+/*
+ * Returns true if the platform secure boot is enabled.
+ * Returns false if disabled or not supported.
+ */
+bool arch_get_secureboot(void);
+
+#endif /* _LINUX_SECURE_BOOT_H */
diff --git a/security/integrity/Makefile b/security/integrity/Makefile
index 92b63039c654..548665e2b702 100644
--- a/security/integrity/Makefile
+++ b/security/integrity/Makefile
@@ -5,7 +5,7 @@
 
 obj-$(CONFIG_INTEGRITY) += integrity.o
 
-integrity-y := iint.o
+integrity-y := iint.o secure_boot.o
 integrity-$(CONFIG_INTEGRITY_AUDIT) += integrity_audit.o
 integrity-$(CONFIG_INTEGRITY_SIGNATURE) += digsig.o
 integrity-$(CONFIG_INTEGRITY_ASYMMETRIC_KEYS) += digsig_asymmetric.o
@@ -18,6 +18,7 @@ integrity-$(CONFIG_LOAD_IPL_KEYS) += platform_certs/load_ipl_s390.o
 integrity-$(CONFIG_LOAD_PPC_KEYS) += platform_certs/efi_parser.o \
                                      platform_certs/load_powerpc.o \
                                      platform_certs/keyring_handler.o
+integrity-$(CONFIG_EFI) += efi_secureboot.o
 # The relative order of the 'ima' and 'evm' LSMs depends on the order below.
 obj-$(CONFIG_IMA)			+= ima/
 obj-$(CONFIG_EVM)			+= evm/
diff --git a/security/integrity/efi_secureboot.c b/security/integrity/efi_secureboot.c
new file mode 100644
index 000000000000..bfd4260a83a3
--- /dev/null
+++ b/security/integrity/efi_secureboot.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-1.0+
+/*
+ * Copyright (C) 2018 IBM Corporation
+ */
+#include <linux/efi.h>
+#include <linux/secure_boot.h>
+#include <asm/efi.h>
+
+#ifndef arch_efi_boot_mode
+#define arch_efi_boot_mode efi_secureboot_mode_unset
+#endif
+
+static enum efi_secureboot_mode get_sb_mode(void)
+{
+	enum efi_secureboot_mode mode;
+
+	if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) {
+		pr_info("integrity: secureboot mode unknown, no efi\n");
+		return efi_secureboot_mode_unknown;
+	}
+
+	mode = efi_get_secureboot_mode(efi.get_variable);
+	if (mode == efi_secureboot_mode_disabled)
+		pr_info("integrity: secureboot mode disabled\n");
+	else if (mode == efi_secureboot_mode_unknown)
+		pr_info("integrity: secureboot mode unknown\n");
+	else
+		pr_info("integrity: secureboot mode enabled\n");
+	return mode;
+}
+
+/*
+ * Query secure boot status
+ *
+ * Note don't call this function too early e.g. in __setup hook otherwise the
+ * kernel may hang when calling efi_get_secureboot_mode.
+ *
+ */
+bool arch_get_secureboot(void)
+{
+	static enum efi_secureboot_mode sb_mode;
+	static bool initialized;
+
+	if (!initialized && efi_enabled(EFI_BOOT)) {
+		sb_mode = arch_efi_boot_mode;
+
+		if (sb_mode == efi_secureboot_mode_unset)
+			sb_mode = get_sb_mode();
+		initialized = true;
+	}
+
+	if (sb_mode == efi_secureboot_mode_enabled)
+		return true;
+	else
+		return false;
+}
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 16c20c578ea8..ee2e0891febc 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -27,7 +27,7 @@ core_param(ima_appraise, ima_appraise_cmdline_default, charp, 0);
 void __init ima_appraise_parse_cmdline(void)
 {
 	const char *str = ima_appraise_cmdline_default;
-	bool sb_state = arch_ima_get_secureboot();
+	bool sb_state = arch_get_secureboot();
 	int appraisal_state = ima_appraise;
 
 	if (!str)
diff --git a/security/integrity/ima/ima_efi.c b/security/integrity/ima/ima_efi.c
index 138029bfcce1..78191879dd98 100644
--- a/security/integrity/ima/ima_efi.c
+++ b/security/integrity/ima/ima_efi.c
@@ -2,52 +2,9 @@
 /*
  * Copyright (C) 2018 IBM Corporation
  */
-#include <linux/efi.h>
 #include <linux/module.h>
 #include <linux/ima.h>
-#include <asm/efi.h>
-
-#ifndef arch_ima_efi_boot_mode
-#define arch_ima_efi_boot_mode efi_secureboot_mode_unset
-#endif
-
-static enum efi_secureboot_mode get_sb_mode(void)
-{
-	enum efi_secureboot_mode mode;
-
-	if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) {
-		pr_info("ima: secureboot mode unknown, no efi\n");
-		return efi_secureboot_mode_unknown;
-	}
-
-	mode = efi_get_secureboot_mode(efi.get_variable);
-	if (mode == efi_secureboot_mode_disabled)
-		pr_info("ima: secureboot mode disabled\n");
-	else if (mode == efi_secureboot_mode_unknown)
-		pr_info("ima: secureboot mode unknown\n");
-	else
-		pr_info("ima: secureboot mode enabled\n");
-	return mode;
-}
-
-bool arch_ima_get_secureboot(void)
-{
-	static enum efi_secureboot_mode sb_mode;
-	static bool initialized;
-
-	if (!initialized && efi_enabled(EFI_BOOT)) {
-		sb_mode = arch_ima_efi_boot_mode;
-
-		if (sb_mode == efi_secureboot_mode_unset)
-			sb_mode = get_sb_mode();
-		initialized = true;
-	}
-
-	if (sb_mode == efi_secureboot_mode_enabled)
-		return true;
-	else
-		return false;
-}
+#include <linux/secure_boot.h>
 
 /* secureboot arch rules */
 static const char * const sb_arch_rules[] = {
@@ -67,7 +24,7 @@ static const char * const sb_arch_rules[] = {
 
 const char * const *arch_get_ima_policy(void)
 {
-	if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) {
+	if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_get_secureboot()) {
 		if (IS_ENABLED(CONFIG_MODULE_SIG))
 			set_module_sig_enforced();
 		if (IS_ENABLED(CONFIG_KEXEC_SIG))
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 1d6229b156fb..5808b52c8426 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -953,8 +953,7 @@ static int ima_load_data(enum kernel_load_data_id id, bool contents)
 
 	switch (id) {
 	case LOADING_KEXEC_IMAGE:
-		if (IS_ENABLED(CONFIG_KEXEC_SIG)
-		    && arch_ima_get_secureboot()) {
+		if (IS_ENABLED(CONFIG_KEXEC_SIG) && arch_get_secureboot()) {
 			pr_err("impossible to appraise a kernel image without a file descriptor; try using kexec_file_load syscall.\n");
 			return -EACCES;
 		}
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 7b388b66cf80..4636629533af 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -14,6 +14,7 @@
 
 #include <linux/types.h>
 #include <linux/integrity.h>
+#include <linux/secure_boot.h>
 #include <crypto/sha1.h>
 #include <crypto/hash.h>
 #include <linux/key.h>
diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c
index d1fdd113450a..c0d6948446c3 100644
--- a/security/integrity/platform_certs/load_uefi.c
+++ b/security/integrity/platform_certs/load_uefi.c
@@ -212,7 +212,7 @@ static int __init load_uefi_certs(void)
 	}
 
 	/* the MOK/MOKx can not be trusted when secure boot is disabled */
-	if (!arch_ima_get_secureboot())
+	if (!arch_get_secureboot())
 		return 0;
 
 	mokx = get_cert_list(L"MokListXRT", &mok_var, &mokxsize, &status);
diff --git a/security/integrity/secure_boot.c b/security/integrity/secure_boot.c
new file mode 100644
index 000000000000..fc2693c286f8
--- /dev/null
+++ b/security/integrity/secure_boot.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026 Red Hat, Inc. All Rights Reserved.
+ *
+ * Author: Coiby Xu <coxu@redhat.com>
+ */
+#include <linux/secure_boot.h>
+
+/*
+ * Default weak implementation.
+ * Architectures that support secure boot must override this.
+ */
+__weak bool arch_get_secureboot(void)
+{
+	return false;
+}
-- 
cgit v1.2.3


From 0ec959cf4b5a609d7f27bf84064ef5372e30ab80 Mon Sep 17 00:00:00 2001
From: Coiby Xu <coxu@redhat.com>
Date: Tue, 30 Sep 2025 10:26:56 +0800
Subject: evm: fix security.evm for a file with IMA signature

When both IMA and EVM fix modes are enabled, accessing a file with IMA
signature but missing EVM HMAC won't cause security.evm to be fixed.

Add a function evm_fix_hmac which will be explicitly called to fix EVM
HMAC for this case.

Suggested-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Coiby Xu <coxu@redhat.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 include/linux/evm.h                   |  8 ++++++++
 security/integrity/evm/evm_main.c     | 28 ++++++++++++++++++++++++++++
 security/integrity/ima/ima_appraise.c |  5 +++++
 3 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/evm.h b/include/linux/evm.h
index ddece4a6b25d..913f4573b203 100644
--- a/include/linux/evm.h
+++ b/include/linux/evm.h
@@ -18,6 +18,8 @@ extern enum integrity_status evm_verifyxattr(struct dentry *dentry,
 					     const char *xattr_name,
 					     void *xattr_value,
 					     size_t xattr_value_len);
+int evm_fix_hmac(struct dentry *dentry, const char *xattr_name,
+		 const char *xattr_value, size_t xattr_value_len);
 int evm_inode_init_security(struct inode *inode, struct inode *dir,
 			    const struct qstr *qstr, struct xattr *xattrs,
 			    int *xattr_count);
@@ -51,6 +53,12 @@ static inline enum integrity_status evm_verifyxattr(struct dentry *dentry,
 {
 	return INTEGRITY_UNKNOWN;
 }
+
+static inline int evm_fix_hmac(struct dentry *dentry, const char *xattr_name,
+			       const char *xattr_value, size_t xattr_value_len)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 static inline int evm_inode_init_security(struct inode *inode, struct inode *dir,
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index cfc3531cf53f..1b0089b4b796 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -795,6 +795,34 @@ bool evm_revalidate_status(const char *xattr_name)
 	return true;
 }
 
+/**
+ * evm_fix_hmac - Calculate the HMAC and add it to security.evm for fix mode
+ * @dentry: pointer to the affected dentry which doesn't yet have security.evm
+ *          xattr
+ * @xattr_name: pointer to the affected extended attribute name
+ * @xattr_value: pointer to the new extended attribute value
+ * @xattr_value_len: pointer to the new extended attribute value length
+ *
+ * Expects to be called with i_mutex locked.
+ *
+ * Return: 0 on success, -EPERM/-ENOMEM/-EOPNOTSUPP on failure
+ */
+int evm_fix_hmac(struct dentry *dentry, const char *xattr_name,
+		 const char *xattr_value, size_t xattr_value_len)
+
+{
+	if (!evm_fixmode || !evm_revalidate_status((xattr_name)))
+		return -EPERM;
+
+	if (!(evm_initialized & EVM_INIT_HMAC))
+		return -EPERM;
+
+	if (is_unsupported_hmac_fs(dentry))
+		return -EOPNOTSUPP;
+
+	return evm_update_evmxattr(dentry, xattr_name, xattr_value, xattr_value_len);
+}
+
 /**
  * evm_inode_post_setxattr - update 'security.evm' to reflect the changes
  * @dentry: pointer to the affected dentry
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index ee2e0891febc..0d41d102626a 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -591,6 +591,11 @@ out:
 		     xattr_value->type != EVM_IMA_XATTR_DIGSIG)) {
 			if (!ima_fix_xattr(dentry, iint))
 				status = INTEGRITY_PASS;
+		} else if (status == INTEGRITY_NOLABEL) {
+			if (!evm_fix_hmac(dentry, XATTR_NAME_IMA,
+					  (const char *)xattr_value,
+					  xattr_len))
+				status = INTEGRITY_PASS;
 		}
 
 		/*
-- 
cgit v1.2.3


From fab0c75d500fd23de6ea1b30e44635418a6dae65 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Wed, 4 Mar 2026 10:10:22 -0800
Subject: x86/cpu: Add platform ID to CPU matching structure

The existing x86_match_cpu() infrastructure can be used to match
a bunch of attributes of a CPU: vendor, family, model, steppings
and CPU features.

But, there's one more attribute that's missing and unable to be
matched against: the platform ID, enumerated on Intel CPUs in
MSR_IA32_PLATFORM_ID. It is a little more obscure and is only
queried during microcode loading. This is because Intel sometimes
has CPUs with identical family/model/stepping but which need
different microcode. These CPUs are differentiated with the
platform ID.

Add a field in 'struct x86_cpu_id' for the platform ID. Similar
to the stepping field, make the new field a mask of platform IDs.
Some examples:

	0x01: matches only platform ID 0x0
	0x02: matches only platform ID 0x1
	0x03: matches platform IDs 0x0 or 0x1
	0x80: matches only platform ID 0x7
	0xff: matches all 8 possible platform IDs

Since the mask is only a byte wide, it nestles in next to another
u8 and does not even increase the size of 'struct x86_cpu_id'.

Reserve the all 0's value as the wildcard (X86_PLATFORM_ANY). This
avoids forcing changes to existing 'struct x86_cpu_id' users.  They
can just continue to fill the field with 0's and their matching will
work exactly as before.

Note: If someone is ever looking for space in 'struct x86_cpu_id',
this new field could probably get stuck over in ->driver_data
for the one user that there is.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Sohil Mehta <sohil.mehta@intel.com>
Link: https://patch.msgid.link/20260304181022.058DF07C@davehans-spike.ostc.intel.com
---
 arch/x86/kernel/cpu/match.c     | 3 +++
 include/linux/mod_devicetable.h | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 6af1e8baeb0f..4604802692da 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -76,6 +76,9 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
 		if (m->steppings != X86_STEPPING_ANY &&
 		    !(BIT(c->x86_stepping) & m->steppings))
 			continue;
+		if (m->platform_mask != X86_PLATFORM_ANY &&
+		    !(BIT(c->intel_platform_id) & m->platform_mask))
+			continue;
 		if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature))
 			continue;
 		if (!x86_match_vendor_cpu_type(c, m))
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 5b1725fe9707..23ff24080dfd 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -691,6 +691,7 @@ struct x86_cpu_id {
 	__u16 feature;	/* bit index */
 	/* Solely for kernel-internal use: DO NOT EXPORT to userspace! */
 	__u16 flags;
+	__u8  platform_mask;
 	__u8  type;
 	kernel_ulong_t driver_data;
 };
@@ -702,6 +703,7 @@ struct x86_cpu_id {
 #define X86_STEPPING_ANY 0
 #define X86_STEP_MIN 0
 #define X86_STEP_MAX 0xf
+#define X86_PLATFORM_ANY 0x0
 #define X86_FEATURE_ANY 0	/* Same as FPU, you can't test for that */
 #define X86_CPU_TYPE_ANY 0
 
-- 
cgit v1.2.3


From de70eef32e10883fe74f6df635c616785b24b867 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 25 Feb 2026 21:13:09 -0800
Subject: ARM: omap: fix all kernel-doc warnings

Use the correct struct member names to avoid kernel-doc warnings:

Warning: include/linux/platform_data/voltage-omap.h:27 struct member
 'volt_nominal' not described in 'omap_volt_data'
Warning: include/linux/platform_data/voltage-omap.h:27 struct member
 'vp_errgain' not described in 'omap_volt_data'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260226051309.556228-1-rdunlap@infradead.org
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 include/linux/platform_data/voltage-omap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/voltage-omap.h b/include/linux/platform_data/voltage-omap.h
index 6d74e507dbd2..2b48f2b0135d 100644
--- a/include/linux/platform_data/voltage-omap.h
+++ b/include/linux/platform_data/voltage-omap.h
@@ -10,14 +10,14 @@
 
 /**
  * struct omap_volt_data - Omap voltage specific data.
- * @voltage_nominal:	The possible voltage value in uV
+ * @volt_nominal:	The possible voltage value in uV
  * @sr_efuse_offs:	The offset of the efuse register(from system
  *			control module base address) from where to read
  *			the n-target value for the smartreflex module.
  * @sr_errminlimit:	Error min limit value for smartreflex. This value
  *			differs at differnet opp and thus is linked
  *			with voltage.
- * @vp_errorgain:	Error gain value for the voltage processor. This
+ * @vp_errgain:		Error gain value for the voltage processor. This
  *			field also differs according to the voltage/opp.
  */
 struct omap_volt_data {
-- 
cgit v1.2.3


From d4d8c6e6fd2a1c5144339884ca5f66e654ad54a5 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: Tue, 3 Mar 2026 23:54:16 +0000
Subject: tcp: Initialise ehash secrets during connect() and listen().

inet_ehashfn() and inet6_ehashfn() initialise random secrets
on the first call by net_get_random_once().

While the init part is patched out using static keys, with
CONFIG_STACKPROTECTOR_STRONG=y, this causes a compiler to
generate a stack canary due to an automatic variable,
unsigned long ___flags, in the DO_ONCE() macro being passed
to __do_once_start().

With FDO, this is visible in __inet_lookup_established() and
__inet6_lookup_established() too.

Let's initialise the secrets by get_random_sleepable_once()
in the slow paths: inet_hash() for listen(), and
inet_hash_connect() and inet6_hash_connect() for connect().

Note that IPv6 listener will initialise both IPv4 & IPv6 secrets
in inet_hash() for IPv4-mapped IPv6 address.

With the patch, the stack size is reduced by 16 bytes (___flags
 + a stack canary) and NOPs for the static key go away.

Before: __inet6_lookup_established()

       ...
       push   %rbx
       sub    $0x38,%rsp                # stack is 56 bytes
       mov    %edx,%ebx                 # sport
       mov    %gs:0x299419f(%rip),%rax  # load stack canary
       mov    %rax,0x30(%rsp)              and store it onto stack
       mov    0x440(%rdi),%r15          # net->ipv4.tcp_death_row.hashinfo
       nop
 32:   mov    %r8d,%ebp                 # hnum
       shl    $0x10,%ebp                # hnum << 16
       nop
 3d:   mov    0x70(%rsp),%r14d          # sdif
       or     %ebx,%ebp                 # INET_COMBINED_PORTS(sport, hnum)
       mov    0x11a8382(%rip),%eax      # inet6_ehashfn() ...

After: __inet6_lookup_established()

       ...
       push   %rbx
       sub    $0x28,%rsp                # stack is 40 bytes
       mov    0x60(%rsp),%ebp           # sdif
       mov    %r8d,%r14d                # hnum
       shl    $0x10,%r14d               # hnum << 16
       or     %edx,%r14d                # INET_COMBINED_PORTS(sport, hnum)
       mov    0x440(%rdi),%rax          # net->ipv4.tcp_death_row.hashinfo
       mov    0x1194f09(%rip),%r10d     # inet6_ehashfn() ...

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260303235424.3877267-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/net.h            |  2 ++
 include/net/inet6_hashtables.h |  2 ++
 net/ipv4/inet_hashtables.c     | 17 +++++++++++++++--
 net/ipv6/inet6_hashtables.c    | 13 ++++++++++---
 4 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index f58b38ab37f8..a8e818de95b3 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -304,6 +304,8 @@ do {									\
 
 #define net_get_random_once(buf, nbytes)			\
 	get_random_once((buf), (nbytes))
+#define net_get_random_sleepable_once(buf, nbytes)		\
+	get_random_sleepable_once((buf), (nbytes))
 
 /*
  * E.g. XFS meta- & log-data is in slab pages, or bcache meta
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index c16de5b7963f..2cc5d416bbb5 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -24,6 +24,8 @@
 
 struct inet_hashinfo;
 
+void inet6_init_ehash_secret(void);
+
 static inline unsigned int __inet6_ehashfn(const u32 lhash,
 				    const u16 lport,
 				    const u32 fhash,
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 61e654b395be..ac7b67c603b5 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -30,12 +30,16 @@
 #include <net/sock_reuseport.h>
 #include <net/tcp.h>
 
+static void inet_init_ehash_secret(void)
+{
+	net_get_random_sleepable_once(&inet_ehash_secret,
+				      sizeof(inet_ehash_secret));
+}
+
 u32 inet_ehashfn(const struct net *net, const __be32 laddr,
 		 const __u16 lport, const __be32 faddr,
 		 const __be16 fport)
 {
-	net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));
-
 	return lport + __inet_ehashfn(laddr, 0, faddr, fport,
 				      inet_ehash_secret + net_hash_mix(net));
 }
@@ -793,6 +797,13 @@ int inet_hash(struct sock *sk)
 		local_bh_enable();
 		return 0;
 	}
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sk->sk_family == AF_INET6)
+		inet6_init_ehash_secret();
+#endif
+	inet_init_ehash_secret();
+
 	WARN_ON(!sk_unhashed(sk));
 	ilb2 = inet_lhash2_bucket_sk(hashinfo, sk);
 
@@ -1239,6 +1250,8 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
 	if (!inet_sk(sk)->inet_num)
 		port_offset = inet_sk_port_offset(sk);
 
+	inet_init_ehash_secret();
+
 	hash_port0 = inet_ehashfn(net, inet->inet_rcv_saddr, 0,
 				  inet->inet_daddr, inet->inet_dport);
 
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 182d38e6d6d8..72bc68fef48a 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -23,15 +23,20 @@
 #include <net/sock_reuseport.h>
 #include <net/tcp.h>
 
+void inet6_init_ehash_secret(void)
+{
+	net_get_random_sleepable_once(&inet6_ehash_secret,
+				      sizeof(inet6_ehash_secret));
+	net_get_random_sleepable_once(&tcp_ipv6_hash_secret,
+				      sizeof(tcp_ipv6_hash_secret));
+}
+
 u32 inet6_ehashfn(const struct net *net,
 		  const struct in6_addr *laddr, const u16 lport,
 		  const struct in6_addr *faddr, const __be16 fport)
 {
 	u32 lhash, fhash;
 
-	net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret));
-	net_get_random_once(&tcp_ipv6_hash_secret, sizeof(tcp_ipv6_hash_secret));
-
 	lhash = (__force u32)laddr->s6_addr32[3];
 	fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret);
 
@@ -363,6 +368,8 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
 	if (!inet_sk(sk)->inet_num)
 		port_offset = inet6_sk_port_offset(sk);
 
+	inet6_init_ehash_secret();
+
 	hash_port0 = inet6_ehashfn(net, daddr, 0, saddr, inet->inet_dport);
 
 	return __inet_hash_connect(death_row, sk, port_offset, hash_port0,
-- 
cgit v1.2.3


From 5b30afc20b3fea29b9beb83c6415c4ff06f774aa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 4 Mar 2026 11:26:47 -1000
Subject: cgroup: Expose some cgroup helpers

Expose the following through cgroup.h:

- cgroup_on_dfl()
- cgroup_is_dead()
- cgroup_for_each_live_child()
- cgroup_for_each_live_descendant_pre()
- cgroup_for_each_live_descendant_post()

Until now, these didn't need to be exposed because controllers only cared
about the css hierarchy. The planned sched_ext hierarchical scheduler
support will be based on the default cgroup hierarchy, which is in line
with the existing BPF cgroup support, and thus needs these exposed.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h          | 65 +++++++++++++++++++++++++++++++++++++++--
 kernel/cgroup/cgroup-internal.h |  6 ----
 kernel/cgroup/cgroup.c          | 55 ----------------------------------
 3 files changed, 63 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index bc892e3b37ee..e52160e85af4 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -42,6 +42,14 @@ struct kernel_clone_args;
 
 #ifdef CONFIG_CGROUPS
 
+/*
+ * To avoid confusing the compiler (and generating warnings) with code
+ * that attempts to access what would be a 0-element array (i.e. sized
+ * to a potentially empty array when CGROUP_SUBSYS_COUNT == 0), this
+ * constant expression can be added.
+ */
+#define CGROUP_HAS_SUBSYS_CONFIG	(CGROUP_SUBSYS_COUNT > 0)
+
 enum css_task_iter_flags {
 	CSS_TASK_ITER_PROCS    = (1U << 0),  /* walk only threadgroup leaders */
 	CSS_TASK_ITER_THREADED = (1U << 1),  /* walk all threaded css_sets in the domain */
@@ -76,6 +84,7 @@ enum cgroup_lifetime_events {
 extern struct file_system_type cgroup_fs_type;
 extern struct cgroup_root cgrp_dfl_root;
 extern struct css_set init_css_set;
+extern struct mutex cgroup_mutex;
 extern spinlock_t css_set_lock;
 extern struct blocking_notifier_head cgroup_lifetime_notifier;
 
@@ -103,6 +112,8 @@ extern struct blocking_notifier_head cgroup_lifetime_notifier;
 #define cgroup_subsys_on_dfl(ss)						\
 	static_branch_likely(&ss ## _on_dfl_key)
 
+bool cgroup_on_dfl(const struct cgroup *cgrp);
+
 bool css_has_online_children(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
 struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
@@ -274,6 +285,32 @@ void css_task_iter_end(struct css_task_iter *it);
 	for ((pos) = css_next_descendant_post(NULL, (css)); (pos);	\
 	     (pos) = css_next_descendant_post((pos), (css)))
 
+/* iterate over child cgrps, lock should be held throughout iteration */
+#define cgroup_for_each_live_child(child, cgrp)				\
+	list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
+		if (({ lockdep_assert_held(&cgroup_mutex);		\
+		       cgroup_is_dead(child); }))			\
+			;						\
+		else
+
+/* walk live descendants in pre order */
+#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)		\
+	css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL))	\
+		if (({ lockdep_assert_held(&cgroup_mutex);		\
+		       (dsct) = (d_css)->cgroup;			\
+		       cgroup_is_dead(dsct); }))			\
+			;						\
+		else
+
+/* walk live descendants in postorder */
+#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp)		\
+	css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL))	\
+		if (({ lockdep_assert_held(&cgroup_mutex);		\
+		       (dsct) = (d_css)->cgroup;			\
+		       cgroup_is_dead(dsct); }))			\
+			;						\
+		else
+
 /**
  * cgroup_taskset_for_each - iterate cgroup_taskset
  * @task: the loop cursor
@@ -336,6 +373,27 @@ static inline u64 cgroup_id(const struct cgroup *cgrp)
 	return cgrp->kn->id;
 }
 
+/**
+ * cgroup_css - obtain a cgroup's css for the specified subsystem
+ * @cgrp: the cgroup of interest
+ * @ss: the subsystem of interest (%NULL returns @cgrp->self)
+ *
+ * Return @cgrp's css (cgroup_subsys_state) associated with @ss.  This
+ * function must be called either under cgroup_mutex or rcu_read_lock() and
+ * the caller is responsible for pinning the returned css if it wants to
+ * keep accessing it outside the said locks.  This function may return
+ * %NULL if @cgrp doesn't have @subsys_id enabled.
+ */
+static inline struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
+						     struct cgroup_subsys *ss)
+{
+	if (CGROUP_HAS_SUBSYS_CONFIG && ss)
+		return rcu_dereference_check(cgrp->subsys[ss->id],
+					lockdep_is_held(&cgroup_mutex));
+	else
+		return &cgrp->self;
+}
+
 /**
  * css_is_dying - test whether the specified css is dying
  * @css: target css
@@ -372,6 +430,11 @@ static inline bool css_is_self(struct cgroup_subsys_state *css)
 	return false;
 }
 
+static inline bool cgroup_is_dead(const struct cgroup *cgrp)
+{
+	return !(cgrp->self.flags & CSS_ONLINE);
+}
+
 static inline void cgroup_get(struct cgroup *cgrp)
 {
 	css_get(&cgrp->self);
@@ -387,8 +450,6 @@ static inline void cgroup_put(struct cgroup *cgrp)
 	css_put(&cgrp->self);
 }
 
-extern struct mutex cgroup_mutex;
-
 static inline void cgroup_lock(void)
 {
 	mutex_lock(&cgroup_mutex);
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 3bfe37693d68..58797123b752 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -184,11 +184,6 @@ extern bool cgrp_dfl_visible;
 	for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT &&		\
 	     (((ss) = cgroup_subsys[ssid]) || true); (ssid)++)
 
-static inline bool cgroup_is_dead(const struct cgroup *cgrp)
-{
-	return !(cgrp->self.flags & CSS_ONLINE);
-}
-
 static inline bool notify_on_release(const struct cgroup *cgrp)
 {
 	return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -222,7 +217,6 @@ static inline void get_css_set(struct css_set *cset)
 }
 
 bool cgroup_ssid_enabled(int ssid);
-bool cgroup_on_dfl(const struct cgroup *cgrp);
 
 struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root);
 struct cgroup *task_cgroup_from_root(struct task_struct *task,
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index be1d71dda317..cdc63be63f2c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -68,14 +68,6 @@
 /* let's not notify more than 100 times per second */
 #define CGROUP_FILE_NOTIFY_MIN_INTV	DIV_ROUND_UP(HZ, 100)
 
-/*
- * To avoid confusing the compiler (and generating warnings) with code
- * that attempts to access what would be a 0-element array (i.e. sized
- * to a potentially empty array when CGROUP_SUBSYS_COUNT == 0), this
- * constant expression can be added.
- */
-#define CGROUP_HAS_SUBSYS_CONFIG	(CGROUP_SUBSYS_COUNT > 0)
-
 /*
  * cgroup_mutex is the master lock.  Any modification to cgroup or its
  * hierarchy must be performed while holding it.
@@ -509,27 +501,6 @@ static u32 cgroup_ss_mask(struct cgroup *cgrp)
 	return cgrp->root->subsys_mask;
 }
 
-/**
- * cgroup_css - obtain a cgroup's css for the specified subsystem
- * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest (%NULL returns @cgrp->self)
- *
- * Return @cgrp's css (cgroup_subsys_state) associated with @ss.  This
- * function must be called either under cgroup_mutex or rcu_read_lock() and
- * the caller is responsible for pinning the returned css if it wants to
- * keep accessing it outside the said locks.  This function may return
- * %NULL if @cgrp doesn't have @subsys_id enabled.
- */
-static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
-					      struct cgroup_subsys *ss)
-{
-	if (CGROUP_HAS_SUBSYS_CONFIG && ss)
-		return rcu_dereference_check(cgrp->subsys[ss->id],
-					lockdep_is_held(&cgroup_mutex));
-	else
-		return &cgrp->self;
-}
-
 /**
  * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
  * @cgrp: the cgroup of interest
@@ -741,32 +712,6 @@ EXPORT_SYMBOL_GPL(of_css);
 	}								\
 } while (false)
 
-/* iterate over child cgrps, lock should be held throughout iteration */
-#define cgroup_for_each_live_child(child, cgrp)				\
-	list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
-		if (({ lockdep_assert_held(&cgroup_mutex);		\
-		       cgroup_is_dead(child); }))			\
-			;						\
-		else
-
-/* walk live descendants in pre order */
-#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)		\
-	css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL))	\
-		if (({ lockdep_assert_held(&cgroup_mutex);		\
-		       (dsct) = (d_css)->cgroup;			\
-		       cgroup_is_dead(dsct); }))			\
-			;						\
-		else
-
-/* walk live descendants in postorder */
-#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp)		\
-	css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL))	\
-		if (({ lockdep_assert_held(&cgroup_mutex);		\
-		       (dsct) = (d_css)->cgroup;			\
-		       cgroup_is_dead(dsct); }))			\
-			;						\
-		else
-
 /*
  * The default css_set - used by init and its children prior to any
  * hierarchies being mounted. It contains a pointer to the root state
-- 
cgit v1.2.3


From 336faf5d9115ca6982b82cf122e68738ea8c4173 Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Wed, 25 Feb 2026 09:16:53 +1100
Subject: VFS: make lookup_one_qstr_excl() static.

lookup_one_qstr_excl() is no longer used outside of namei.c, so
make it static.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neil@brown.name>
Link: https://patch.msgid.link/20260224222542.3458677-9-neilb@ownmail.net
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 Documentation/filesystems/porting.rst | 7 +++++++
 fs/namei.c                            | 5 ++---
 include/linux/namei.h                 | 3 ---
 3 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 52ff1d19405b..1dd31ab417a2 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1361,3 +1361,10 @@ to match what strlen() would return if it was ran on the string.
 
 However, if the string is freely accessible for the duration of inode's
 lifetime, consider using inode_set_cached_link() instead.
+
+---
+
+**mandatory**
+
+lookup_one_qstr_excl() is no longer exported - use start_creating() or
+similar.
diff --git a/fs/namei.c b/fs/namei.c
index 11c9a4a6c396..a5daa62399d7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1782,8 +1782,8 @@ static struct dentry *lookup_dcache(const struct qstr *name,
  * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed.
  * Will return -EEXIST if name is found and LOOKUP_EXCL was passed.
  */
-struct dentry *lookup_one_qstr_excl(const struct qstr *name,
-				    struct dentry *base, unsigned int flags)
+static struct dentry *lookup_one_qstr_excl(const struct qstr *name,
+					   struct dentry *base, unsigned int flags)
 {
 	struct dentry *dentry;
 	struct dentry *old;
@@ -1820,7 +1820,6 @@ found:
 	}
 	return dentry;
 }
-EXPORT_SYMBOL(lookup_one_qstr_excl);
 
 /**
  * lookup_fast - do fast lockless (but racy) lookup of a dentry
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 58600cf234bc..c7a7288cdd25 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -54,9 +54,6 @@ extern int path_pts(struct path *path);
 
 extern int user_path_at(int, const char __user *, unsigned, struct path *);
 
-struct dentry *lookup_one_qstr_excl(const struct qstr *name,
-				    struct dentry *base,
-				    unsigned int flags);
 extern int kern_path(const char *, unsigned, struct path *);
 struct dentry *kern_path_parent(const char *name, struct path *parent);
 
-- 
cgit v1.2.3


From 08e6183ed2568e733e05e7e1c9de737d91c21155 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 26 Feb 2026 18:36:07 +0100
Subject: wifi: move action code from per-type frame structs

The action code actually serves to identify the type of action
frame, so it really isn't part of the per-type structure. Pull
it out and have it in the general action frame format.

In theory, whether or not the action code is present in this
way is up to each category, but all categories that are defined
right now all have that value.

While at it, and since this change requires changing all users,
remove the 'u' and make it an anonymous union in this case, so
that all code using this changes.

Change IEEE80211_MIN_ACTION_SIZE to take an argument which says
how much of the frame is needed, e.g. category, action_code or
the specific frame type that's defined in the union. Again this
also ensures that all code is updated.

In some cases, fix bugs where the SKB length was checked after
having accessed beyond the checked length, in particular in FTM
code, e.g. ieee80211_is_ftm().

Link: https://patch.msgid.link/20260226183607.67e71846b59e.I9a24328e3ffcaae179466a935f1c3345029f9961@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath11k/mac.c              |   4 +-
 drivers/net/wireless/ath/ath12k/mac.c              |   4 +-
 drivers/net/wireless/ath/ath12k/wifi7/hw.c         |   2 +-
 drivers/net/wireless/intel/iwlwifi/mld/time_sync.c |   6 +-
 .../net/wireless/intel/iwlwifi/mvm/ftm-initiator.c |   7 +-
 drivers/net/wireless/intel/iwlwifi/mvm/time-sync.c |   6 +-
 drivers/net/wireless/marvell/mwifiex/tdls.c        |  12 +-
 drivers/net/wireless/marvell/mwl8k.c               |   4 +-
 .../net/wireless/mediatek/mt76/mt76_connac_mac.c   |   6 +-
 drivers/net/wireless/mediatek/mt76/mt7925/mac.c    |   4 +-
 drivers/net/wireless/mediatek/mt76/mt7996/mac.c    |   4 +-
 drivers/net/wireless/realtek/rtl8xxxu/core.c       |  14 +--
 drivers/net/wireless/realtek/rtlwifi/base.c        |  28 ++---
 drivers/net/wireless/realtek/rtlwifi/pci.c         |   2 +-
 drivers/net/wireless/silabs/wfx/data_rx.c          |   8 +-
 include/linux/ieee80211.h                          |  83 +++++---------
 net/mac80211/agg-rx.c                              |  27 ++---
 net/mac80211/agg-tx.c                              |  28 ++---
 net/mac80211/eht.c                                 |  21 ++--
 net/mac80211/ht.c                                  |  31 +++---
 net/mac80211/ibss.c                                |  18 +--
 net/mac80211/iface.c                               |  18 +--
 net/mac80211/mesh.c                                |  14 +--
 net/mac80211/mesh_hwmp.c                           |  20 ++--
 net/mac80211/mesh_plink.c                          |  21 ++--
 net/mac80211/mlme.c                                |  82 ++++++--------
 net/mac80211/rx.c                                  | 123 +++++++++------------
 net/mac80211/s1g.c                                 |  28 ++---
 net/mac80211/spectmgmt.c                           |  31 +++---
 net/mac80211/tdls.c                                |  29 ++---
 net/mac80211/util.c                                |   5 +-
 net/mac80211/vht.c                                 |  10 +-
 32 files changed, 308 insertions(+), 392 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index d1c121e943cb..a48b6bf1f29a 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -6288,10 +6288,10 @@ static int ath11k_mac_mgmt_action_frame_fill_elem_data(struct ath11k_vif *arvif,
 	lockdep_assert_held(&ar->conf_mutex);
 
 	/* make sure category field is present */
-	if (skb->len < IEEE80211_MIN_ACTION_SIZE)
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE(category))
 		return -EINVAL;
 
-	remaining_len = skb->len - IEEE80211_MIN_ACTION_SIZE;
+	remaining_len = skb->len - IEEE80211_MIN_ACTION_SIZE(category);
 	has_protected = ieee80211_has_protected(hdr->frame_control);
 
 	/* In case of SW crypto and hdr protected (PMF), packet will already be encrypted,
diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c
index af7590639dbf..a03881c73d68 100644
--- a/drivers/net/wireless/ath/ath12k/mac.c
+++ b/drivers/net/wireless/ath/ath12k/mac.c
@@ -9119,10 +9119,10 @@ static int ath12k_mac_mgmt_action_frame_fill_elem_data(struct ath12k_link_vif *a
 	lockdep_assert_wiphy(wiphy);
 
 	/* make sure category field is present */
-	if (skb->len < IEEE80211_MIN_ACTION_SIZE)
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE(category))
 		return -EINVAL;
 
-	remaining_len = skb->len - IEEE80211_MIN_ACTION_SIZE;
+	remaining_len = skb->len - IEEE80211_MIN_ACTION_SIZE(category);
 	has_protected = ieee80211_has_protected(hdr->frame_control);
 
 	/* In case of SW crypto and hdr protected (PMF), packet will already be encrypted,
diff --git a/drivers/net/wireless/ath/ath12k/wifi7/hw.c b/drivers/net/wireless/ath/ath12k/wifi7/hw.c
index 27acdfc35459..ec6dba96640b 100644
--- a/drivers/net/wireless/ath/ath12k/wifi7/hw.c
+++ b/drivers/net/wireless/ath/ath12k/wifi7/hw.c
@@ -104,7 +104,7 @@ static bool ath12k_is_addba_resp_action_code(struct ieee80211_mgmt *mgmt)
 	if (mgmt->u.action.category != WLAN_CATEGORY_BACK)
 		return false;
 
-	if (mgmt->u.action.u.addba_resp.action_code != WLAN_ACTION_ADDBA_RESP)
+	if (mgmt->u.action.action_code != WLAN_ACTION_ADDBA_RESP)
 		return false;
 
 	return true;
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/time_sync.c b/drivers/net/wireless/intel/iwlwifi/mld/time_sync.c
index 897ab65b71aa..474dd555e70b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/time_sync.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/time_sync.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2025 Intel Corporation
+ * Copyright (C) 2025-2026 Intel Corporation
  */
 
 #include "mld.h"
@@ -116,9 +116,9 @@ static bool iwl_mld_is_skb_match(struct sk_buff *skb, u8 *addr, u8 dialog_token)
 	u8 skb_dialog_token;
 
 	if (ieee80211_is_timing_measurement(skb))
-		skb_dialog_token = mgmt->u.action.u.wnm_timing_msr.dialog_token;
+		skb_dialog_token = mgmt->u.action.wnm_timing_msr.dialog_token;
 	else
-		skb_dialog_token = mgmt->u.action.u.ftm.dialog_token;
+		skb_dialog_token = mgmt->u.action.ftm.dialog_token;
 
 	if ((ether_addr_equal(mgmt->sa, addr) ||
 	     ether_addr_equal(mgmt->da, addr)) &&
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index ebc569e94f55..1b67836b1fac 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
  * Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2025 Intel Corporation
+ * Copyright (C) 2018-2026 Intel Corporation
  */
 #include <linux/etherdevice.h>
 #include <linux/math64.h>
@@ -1409,8 +1409,7 @@ void iwl_mvm_ftm_lc_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
 	struct iwl_mvm_loc_entry *entry;
 	const u8 *ies, *lci, *civic, *msr_ie;
 	size_t ies_len, lci_len = 0, civic_len = 0;
-	size_t baselen = IEEE80211_MIN_ACTION_SIZE +
-			 sizeof(mgmt->u.action.u.ftm);
+	size_t baselen = IEEE80211_MIN_ACTION_SIZE(ftm);
 	static const u8 rprt_type_lci = IEEE80211_SPCT_MSR_RPRT_TYPE_LCI;
 	static const u8 rprt_type_civic = IEEE80211_SPCT_MSR_RPRT_TYPE_CIVIC;
 
@@ -1419,7 +1418,7 @@ void iwl_mvm_ftm_lc_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
 
 	lockdep_assert_held(&mvm->mutex);
 
-	ies = mgmt->u.action.u.ftm.variable;
+	ies = mgmt->u.action.ftm.variable;
 	ies_len = len - baselen;
 
 	msr_ie = cfg80211_find_ie_match(WLAN_EID_MEASURE_REPORT, ies, ies_len,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-sync.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-sync.c
index edae3e24192b..039b4daac73f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-sync.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-sync.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2022 Intel Corporation
+ * Copyright (C) 2022, 2026 Intel Corporation
  */
 
 #include "mvm.h"
@@ -18,9 +18,9 @@ static bool iwl_mvm_is_skb_match(struct sk_buff *skb, u8 *addr, u8 dialog_token)
 	u8 skb_dialog_token;
 
 	if (ieee80211_is_timing_measurement(skb))
-		skb_dialog_token = mgmt->u.action.u.wnm_timing_msr.dialog_token;
+		skb_dialog_token = mgmt->u.action.wnm_timing_msr.dialog_token;
 	else
-		skb_dialog_token = mgmt->u.action.u.ftm.dialog_token;
+		skb_dialog_token = mgmt->u.action.ftm.dialog_token;
 
 	if ((ether_addr_equal(mgmt->sa, addr) ||
 	     ether_addr_equal(mgmt->da, addr)) &&
diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c
index a4cf323e704b..845f2a22e071 100644
--- a/drivers/net/wireless/marvell/mwifiex/tdls.c
+++ b/drivers/net/wireless/marvell/mwifiex/tdls.c
@@ -755,16 +755,12 @@ mwifiex_construct_tdls_action_frame(struct mwifiex_private *priv,
 	switch (action_code) {
 	case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
 		/* See the layout of 'struct ieee80211_mgmt'. */
-		extra = sizeof(mgmt->u.action.u.tdls_discover_resp) +
-			sizeof(mgmt->u.action.category);
+		extra = IEEE80211_MIN_ACTION_SIZE(tdls_discover_resp) - 24;
 		skb_put(skb, extra);
 		mgmt->u.action.category = WLAN_CATEGORY_PUBLIC;
-		mgmt->u.action.u.tdls_discover_resp.action_code =
-					      WLAN_PUB_ACTION_TDLS_DISCOVER_RES;
-		mgmt->u.action.u.tdls_discover_resp.dialog_token =
-								   dialog_token;
-		mgmt->u.action.u.tdls_discover_resp.capability =
-							     cpu_to_le16(capab);
+		mgmt->u.action.action_code = WLAN_PUB_ACTION_TDLS_DISCOVER_RES;
+		mgmt->u.action.tdls_discover_resp.dialog_token = dialog_token;
+		mgmt->u.action.tdls_discover_resp.capability = cpu_to_le16(capab);
 		/* move back for addr4 */
 		memmove(pos + ETH_ALEN, &mgmt->u.action, extra);
 		/* init address 4 */
diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c
index 99321d180f34..b1af02180341 100644
--- a/drivers/net/wireless/marvell/mwl8k.c
+++ b/drivers/net/wireless/marvell/mwl8k.c
@@ -1985,9 +1985,9 @@ mwl8k_txq_xmit(struct ieee80211_hw *hw,
 	 */
 	if (unlikely(ieee80211_is_action(wh->frame_control) &&
 	    mgmt->u.action.category == WLAN_CATEGORY_BACK &&
-	    mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ &&
+	    mgmt->u.action.action_code == WLAN_ACTION_ADDBA_REQ &&
 	    priv->ap_fw)) {
-		u16 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
+		u16 capab = le16_to_cpu(mgmt->u.action.addba_req.capab);
 		tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
 		index = mwl8k_tid_queue_mapping(tid);
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index b41ca1410da9..f946ddc20a47 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -413,10 +413,10 @@ mt76_connac2_mac_write_txwi_80211(struct mt76_dev *dev, __le32 *txwi,
 	u32 val;
 
 	if (ieee80211_is_action(fc) &&
-	    skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + 1 + 2 &&
+	    skb->len >= IEEE80211_MIN_ACTION_SIZE(addba_req.capab) &&
 	    mgmt->u.action.category == WLAN_CATEGORY_BACK &&
-	    mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) {
-		u16 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
+	    mgmt->u.action.action_code == WLAN_ACTION_ADDBA_REQ) {
+		u16 capab = le16_to_cpu(mgmt->u.action.addba_req.capab);
 
 		txwi[5] |= cpu_to_le32(MT_TXD5_ADD_BA);
 		tid = (capab >> 2) & IEEE80211_QOS_CTL_TID_MASK;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
index 0d9435900423..caaf71c31480 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
@@ -668,9 +668,9 @@ mt7925_mac_write_txwi_80211(struct mt76_dev *dev, __le32 *txwi,
 	u32 val;
 
 	if (ieee80211_is_action(fc) &&
-	    skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 &&
+	    skb->len >= IEEE80211_MIN_ACTION_SIZE(action_code) &&
 	    mgmt->u.action.category == WLAN_CATEGORY_BACK &&
-	    mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ)
+	    mgmt->u.action.action_code == WLAN_ACTION_ADDBA_REQ)
 		tid = MT_TX_ADDBA;
 	else if (ieee80211_is_mgmt(hdr->frame_control))
 		tid = MT_TX_NORMAL;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
index d4f3ee943b47..84cbf36b493c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
@@ -800,9 +800,9 @@ mt7996_mac_write_txwi_80211(struct mt7996_dev *dev, __le32 *txwi,
 	u32 val;
 
 	if (ieee80211_is_action(fc) &&
-	    skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 &&
+	    skb->len >= IEEE80211_MIN_ACTION_SIZE(action_code) &&
 	    mgmt->u.action.category == WLAN_CATEGORY_BACK &&
-	    mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) {
+	    mgmt->u.action.action_code == WLAN_ACTION_ADDBA_REQ) {
 		if (is_mt7990(&dev->mt76))
 			txwi[6] |= cpu_to_le32(FIELD_PREP(MT_TXD6_TID_ADDBA, tid));
 		else
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/core.c b/drivers/net/wireless/realtek/rtl8xxxu/core.c
index 794187d28caa..804fc604e5f8 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/core.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/core.c
@@ -5146,10 +5146,10 @@ static void rtl8xxxu_dump_action(struct device *dev,
 	if (!(rtl8xxxu_debug & RTL8XXXU_DEBUG_ACTION))
 		return;
 
-	switch (mgmt->u.action.u.addba_resp.action_code) {
+	switch (mgmt->u.action.action_code) {
 	case WLAN_ACTION_ADDBA_RESP:
-		cap = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
-		timeout = le16_to_cpu(mgmt->u.action.u.addba_resp.timeout);
+		cap = le16_to_cpu(mgmt->u.action.addba_resp.capab);
+		timeout = le16_to_cpu(mgmt->u.action.addba_resp.timeout);
 		dev_info(dev, "WLAN_ACTION_ADDBA_RESP: "
 			 "timeout %i, tid %02x, buf_size %02x, policy %02x, "
 			 "status %02x\n",
@@ -5157,11 +5157,11 @@ static void rtl8xxxu_dump_action(struct device *dev,
 			 (cap & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2,
 			 (cap & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6,
 			 (cap >> 1) & 0x1,
-			 le16_to_cpu(mgmt->u.action.u.addba_resp.status));
+			 le16_to_cpu(mgmt->u.action.addba_resp.status));
 		break;
 	case WLAN_ACTION_ADDBA_REQ:
-		cap = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
-		timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout);
+		cap = le16_to_cpu(mgmt->u.action.addba_req.capab);
+		timeout = le16_to_cpu(mgmt->u.action.addba_req.timeout);
 		dev_info(dev, "WLAN_ACTION_ADDBA_REQ: "
 			 "timeout %i, tid %02x, buf_size %02x, policy %02x\n",
 			 timeout,
@@ -5171,7 +5171,7 @@ static void rtl8xxxu_dump_action(struct device *dev,
 		break;
 	default:
 		dev_info(dev, "action frame %02x\n",
-			 mgmt->u.action.u.addba_resp.action_code);
+			 mgmt->u.action.action_code);
 		break;
 	}
 }
diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index 0ac9cf0937aa..aad377864e73 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -1409,7 +1409,7 @@ bool rtl_action_proc(struct ieee80211_hw *hw, struct sk_buff *skb, u8 is_tx)
 				sta_entry =
 					(struct rtl_sta_info *)sta->drv_priv;
 				capab =
-				  le16_to_cpu(mgmt->u.action.u.addba_req.capab);
+				  le16_to_cpu(mgmt->u.action.addba_req.capab);
 				tid = (capab &
 				       IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
 				if (tid >= MAX_TID_COUNT) {
@@ -2392,35 +2392,35 @@ static struct sk_buff *rtl_make_smps_action(struct ieee80211_hw *hw,
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *action_frame;
 
-	/* 27 = header + category + action + smps mode */
-	skb = dev_alloc_skb(27 + hw->extra_tx_headroom);
+	skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(ht_smps) +
+			    hw->extra_tx_headroom);
 	if (!skb)
 		return NULL;
 
 	skb_reserve(skb, hw->extra_tx_headroom);
-	action_frame = skb_put_zero(skb, 27);
+	action_frame = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(ht_smps));
 	memcpy(action_frame->da, da, ETH_ALEN);
 	memcpy(action_frame->sa, rtlefuse->dev_addr, ETH_ALEN);
 	memcpy(action_frame->bssid, bssid, ETH_ALEN);
 	action_frame->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 						  IEEE80211_STYPE_ACTION);
 	action_frame->u.action.category = WLAN_CATEGORY_HT;
-	action_frame->u.action.u.ht_smps.action = WLAN_HT_ACTION_SMPS;
+	action_frame->u.action.action_code = WLAN_HT_ACTION_SMPS;
 	switch (smps) {
 	case IEEE80211_SMPS_AUTOMATIC:/* 0 */
 	case IEEE80211_SMPS_NUM_MODES:/* 4 */
 		WARN_ON(1);
 		fallthrough;
 	case IEEE80211_SMPS_OFF:/* 1 */ /*MIMO_PS_NOLIMIT*/
-		action_frame->u.action.u.ht_smps.smps_control =
+		action_frame->u.action.ht_smps.smps_control =
 				WLAN_HT_SMPS_CONTROL_DISABLED;/* 0 */
 		break;
 	case IEEE80211_SMPS_STATIC:/* 2 */ /*MIMO_PS_STATIC*/
-		action_frame->u.action.u.ht_smps.smps_control =
+		action_frame->u.action.ht_smps.smps_control =
 				WLAN_HT_SMPS_CONTROL_STATIC;/* 1 */
 		break;
 	case IEEE80211_SMPS_DYNAMIC:/* 3 */ /*MIMO_PS_DYNAMIC*/
-		action_frame->u.action.u.ht_smps.smps_control =
+		action_frame->u.action.ht_smps.smps_control =
 				WLAN_HT_SMPS_CONTROL_DYNAMIC;/* 3 */
 		break;
 	}
@@ -2519,25 +2519,25 @@ struct sk_buff *rtl_make_del_ba(struct ieee80211_hw *hw,
 	struct ieee80211_mgmt *action_frame;
 	u16 params;
 
-	/* 27 = header + category + action + smps mode */
-	skb = dev_alloc_skb(34 + hw->extra_tx_headroom);
+	skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(delba) +
+			    hw->extra_tx_headroom);
 	if (!skb)
 		return NULL;
 
 	skb_reserve(skb, hw->extra_tx_headroom);
-	action_frame = skb_put_zero(skb, 34);
+	action_frame = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(delba));
 	memcpy(action_frame->sa, sa, ETH_ALEN);
 	memcpy(action_frame->da, rtlefuse->dev_addr, ETH_ALEN);
 	memcpy(action_frame->bssid, bssid, ETH_ALEN);
 	action_frame->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 						  IEEE80211_STYPE_ACTION);
 	action_frame->u.action.category = WLAN_CATEGORY_BACK;
-	action_frame->u.action.u.delba.action_code = WLAN_ACTION_DELBA;
+	action_frame->u.action.action_code = WLAN_ACTION_DELBA;
 	params = (u16)(1 << 11);	/* bit 11 initiator */
 	params |= (u16)(tid << 12);	/* bit 15:12 TID number */
 
-	action_frame->u.action.u.delba.params = cpu_to_le16(params);
-	action_frame->u.action.u.delba.reason_code =
+	action_frame->u.action.delba.params = cpu_to_le16(params);
+	action_frame->u.action.delba.reason_code =
 		cpu_to_le16(WLAN_REASON_QSTA_TIMEOUT);
 
 	return skb;
diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c
index d080469264cf..19e2ff62d9f1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/pci.c
+++ b/drivers/net/wireless/realtek/rtlwifi/pci.c
@@ -507,7 +507,7 @@ static void _rtl_pci_tx_isr(struct ieee80211_hw *hw, int prio)
 		if (ieee80211_is_action(fc)) {
 			struct ieee80211_mgmt *action_frame =
 				(struct ieee80211_mgmt *)skb->data;
-			if (action_frame->u.action.u.ht_smps.action ==
+			if (action_frame->u.action.action_code ==
 			    WLAN_HT_ACTION_SMPS) {
 				dev_kfree_skb(skb);
 				goto tx_status_ok;
diff --git a/drivers/net/wireless/silabs/wfx/data_rx.c b/drivers/net/wireless/silabs/wfx/data_rx.c
index e099a9e65bae..15c06b2b8633 100644
--- a/drivers/net/wireless/silabs/wfx/data_rx.c
+++ b/drivers/net/wireless/silabs/wfx/data_rx.c
@@ -21,14 +21,14 @@ static void wfx_rx_handle_ba(struct wfx_vif *wvif, struct ieee80211_mgmt *mgmt)
 	if (wfx_api_older_than(wvif->wdev, 3, 6))
 		return;
 
-	switch (mgmt->u.action.u.addba_req.action_code) {
+	switch (mgmt->u.action.action_code) {
 	case WLAN_ACTION_ADDBA_REQ:
-		params = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
+		params = le16_to_cpu(mgmt->u.action.addba_req.capab);
 		tid = (params & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
 		ieee80211_start_rx_ba_session_offl(vif, mgmt->sa, tid);
 		break;
 	case WLAN_ACTION_DELBA:
-		params = le16_to_cpu(mgmt->u.action.u.delba.params);
+		params = le16_to_cpu(mgmt->u.action.delba.params);
 		tid = (params &  IEEE80211_DELBA_PARAM_TID_MASK) >> 12;
 		ieee80211_stop_rx_ba_session_offl(vif, mgmt->sa, tid);
 		break;
@@ -80,7 +80,7 @@ void wfx_rx_cb(struct wfx_vif *wvif, const struct wfx_hif_ind_rx *arg, struct sk
 	 */
 	if (ieee80211_is_action(frame->frame_control) &&
 	    mgmt->u.action.category == WLAN_CATEGORY_BACK &&
-	    skb->len > IEEE80211_MIN_ACTION_SIZE) {
+	    skb->len > IEEE80211_MIN_ACTION_SIZE(action_code)) {
 		wfx_rx_handle_ba(wvif, mgmt);
 		goto drop;
 	}
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 3651b2e6c518..aea360e90cb1 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1046,31 +1046,28 @@ struct ieee80211_mgmt {
 		} __packed probe_resp;
 		struct {
 			u8 category;
+			u8 action_code;
 			union {
 				struct {
-					u8 action_code;
 					u8 dialog_token;
 					u8 status_code;
 					u8 variable[];
 				} __packed wme_action;
 				struct{
-					u8 action_code;
+					u8 no_fixed_fields[0];
 					u8 variable[];
 				} __packed chan_switch;
 				struct{
-					u8 action_code;
 					struct ieee80211_ext_chansw_ie data;
 					u8 variable[];
 				} __packed ext_chan_switch;
 				struct{
-					u8 action_code;
 					u8 dialog_token;
 					u8 element_id;
 					u8 length;
 					struct ieee80211_msrment_ie msr_elem;
 				} __packed measurement;
 				struct{
-					u8 action_code;
 					u8 dialog_token;
 					__le16 capab;
 					__le16 timeout;
@@ -1079,7 +1076,6 @@ struct ieee80211_mgmt {
 					u8 variable[];
 				} __packed addba_req;
 				struct{
-					u8 action_code;
 					u8 dialog_token;
 					__le16 status;
 					__le16 capab;
@@ -1088,54 +1084,45 @@ struct ieee80211_mgmt {
 					u8 variable[];
 				} __packed addba_resp;
 				struct{
-					u8 action_code;
 					__le16 params;
 					__le16 reason_code;
 				} __packed delba;
 				struct {
-					u8 action_code;
+					u8 no_fixed_fields[0];
 					u8 variable[];
 				} __packed self_prot;
 				struct{
-					u8 action_code;
+					u8 no_fixed_fields[0];
 					u8 variable[];
 				} __packed mesh_action;
 				struct {
-					u8 action;
 					u8 trans_id[WLAN_SA_QUERY_TR_ID_LEN];
 				} __packed sa_query;
 				struct {
-					u8 action;
 					u8 smps_control;
 				} __packed ht_smps;
 				struct {
-					u8 action_code;
 					u8 chanwidth;
 				} __packed ht_notify_cw;
 				struct {
-					u8 action_code;
 					u8 dialog_token;
 					__le16 capability;
 					u8 variable[];
 				} __packed tdls_discover_resp;
 				struct {
-					u8 action_code;
 					u8 operating_mode;
 				} __packed vht_opmode_notif;
 				struct {
-					u8 action_code;
 					u8 membership[WLAN_MEMBERSHIP_LEN];
 					u8 position[WLAN_USER_POSITION_LEN];
 				} __packed vht_group_notif;
 				struct {
-					u8 action_code;
 					u8 dialog_token;
 					u8 tpc_elem_id;
 					u8 tpc_elem_length;
 					struct ieee80211_tpc_report_ie tpc;
 				} __packed tpc_report;
 				struct {
-					u8 action_code;
 					u8 dialog_token;
 					u8 follow_up;
 					u8 tod[6];
@@ -1145,11 +1132,10 @@ struct ieee80211_mgmt {
 					u8 variable[];
 				} __packed ftm;
 				struct {
-					u8 action_code;
+					u8 no_fixed_fields[0];
 					u8 variable[];
 				} __packed s1g;
 				struct {
-					u8 action_code;
 					u8 dialog_token;
 					u8 follow_up;
 					u32 tod;
@@ -1158,41 +1144,37 @@ struct ieee80211_mgmt {
 					u8 max_toa_error;
 				} __packed wnm_timing_msr;
 				struct {
-					u8 action_code;
 					u8 dialog_token;
 					u8 variable[];
 				} __packed ttlm_req;
 				struct {
-					u8 action_code;
 					u8 dialog_token;
 					__le16 status_code;
 					u8 variable[];
 				} __packed ttlm_res;
 				struct {
-					u8 action_code;
+					u8 no_fixed_fields[0];
+					/* no variable fields either */
 				} __packed ttlm_tear_down;
 				struct {
-					u8 action_code;
 					u8 dialog_token;
 					u8 variable[];
 				} __packed ml_reconf_req;
 				struct {
-					u8 action_code;
 					u8 dialog_token;
 					u8 count;
 					u8 variable[];
 				} __packed ml_reconf_resp;
 				struct {
-					u8 action_code;
+					u8 no_fixed_fields[0];
 					u8 variable[];
 				} __packed epcs;
 				struct {
-					u8 action_code;
 					u8 dialog_token;
 					u8 control;
 					u8 variable[];
 				} __packed eml_omn;
-			} u;
+			};
 		} __packed action;
 		DECLARE_FLEX_ARRAY(u8, body); /* Generic frame body */
 	} u;
@@ -1210,8 +1192,7 @@ struct ieee80211_mgmt {
 
 #define BSS_MEMBERSHIP_SELECTOR_MIN	BSS_MEMBERSHIP_SELECTOR_UHR_PHY
 
-/* mgmt header + 1 byte category code */
-#define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u)
+#define IEEE80211_MIN_ACTION_SIZE(type)	offsetofend(struct ieee80211_mgmt, u.action.type)
 
 
 /* Management MIC information element (IEEE 802.11w) for CMAC */
@@ -2391,7 +2372,7 @@ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb)
 	if (!ieee80211_is_action(fc))
 		return false;
 
-	if (skb->len < offsetofend(typeof(*mgmt), u.action.u.ftm.action_code))
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE(action_code))
 		return true;
 
 	/* action frame - additionally check for non-bufferable FTM */
@@ -2400,8 +2381,8 @@ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb)
 	    mgmt->u.action.category != WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION)
 		return true;
 
-	if (mgmt->u.action.u.ftm.action_code == WLAN_PUB_ACTION_FTM_REQUEST ||
-	    mgmt->u.action.u.ftm.action_code == WLAN_PUB_ACTION_FTM_RESPONSE)
+	if (mgmt->u.action.action_code == WLAN_PUB_ACTION_FTM_REQUEST ||
+	    mgmt->u.action.action_code == WLAN_PUB_ACTION_FTM_RESPONSE)
 		return false;
 
 	return true;
@@ -2451,7 +2432,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr)
  */
 static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb)
 {
-	if (skb->len < IEEE80211_MIN_ACTION_SIZE)
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE(category))
 		return false;
 	return _ieee80211_is_robust_mgmt_frame((void *)skb->data);
 }
@@ -2467,7 +2448,7 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr,
 {
 	struct ieee80211_mgmt *mgmt = (void *)hdr;
 
-	if (len < IEEE80211_MIN_ACTION_SIZE)
+	if (len < IEEE80211_MIN_ACTION_SIZE(category))
 		return false;
 	if (!ieee80211_is_action(hdr->frame_control))
 		return false;
@@ -2485,13 +2466,14 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr,
 static inline bool
 ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb)
 {
+	struct ieee80211_mgmt *mgmt = (void *)skb->data;
 	u8 action;
 
 	if (!ieee80211_is_public_action((void *)skb->data, skb->len) ||
-	    skb->len < IEEE80211_MIN_ACTION_SIZE + 1)
+	    skb->len < IEEE80211_MIN_ACTION_SIZE(action_code))
 		return false;
 
-	action = *(u8 *)(skb->data + IEEE80211_MIN_ACTION_SIZE);
+	action = mgmt->u.action.action_code;
 
 	return action != WLAN_PUB_ACTION_20_40_BSS_COEX &&
 		action != WLAN_PUB_ACTION_DSE_REG_LOC_ANN &&
@@ -2530,7 +2512,7 @@ static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr)
  */
 static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb)
 {
-	if (skb->len < IEEE80211_MIN_ACTION_SIZE)
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE(category))
 		return false;
 	return _ieee80211_is_group_privacy_action((void *)skb->data);
 }
@@ -2626,8 +2608,7 @@ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb)
 	if (!ieee80211_is_action(mgmt->frame_control))
 		return false;
 
-	if (skb->len < IEEE80211_MIN_ACTION_SIZE +
-		       sizeof(mgmt->u.action.u.tpc_report))
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE(tpc_report))
 		return false;
 
 	/*
@@ -2646,12 +2627,11 @@ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb)
 		return false;
 
 	/* both spectrum mgmt and link measurement have same action code */
-	if (mgmt->u.action.u.tpc_report.action_code !=
-	    WLAN_ACTION_SPCT_TPC_RPRT)
+	if (mgmt->u.action.action_code != WLAN_ACTION_SPCT_TPC_RPRT)
 		return false;
 
-	if (mgmt->u.action.u.tpc_report.tpc_elem_id != WLAN_EID_TPC_REPORT ||
-	    mgmt->u.action.u.tpc_report.tpc_elem_length !=
+	if (mgmt->u.action.tpc_report.tpc_elem_id != WLAN_EID_TPC_REPORT ||
+	    mgmt->u.action.tpc_report.tpc_elem_length !=
 	    sizeof(struct ieee80211_tpc_report_ie))
 		return false;
 
@@ -2667,16 +2647,15 @@ static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb)
 {
 	struct ieee80211_mgmt *mgmt = (void *)skb->data;
 
-	if (skb->len < IEEE80211_MIN_ACTION_SIZE)
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE(wnm_timing_msr))
 		return false;
 
 	if (!ieee80211_is_action(mgmt->frame_control))
 		return false;
 
 	if (mgmt->u.action.category == WLAN_CATEGORY_WNM_UNPROTECTED &&
-	    mgmt->u.action.u.wnm_timing_msr.action_code ==
-		WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE &&
-	    skb->len >= offsetofend(typeof(*mgmt), u.action.u.wnm_timing_msr))
+	    mgmt->u.action.action_code ==
+			WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE)
 		return true;
 
 	return false;
@@ -2691,15 +2670,13 @@ static inline bool ieee80211_is_ftm(struct sk_buff *skb)
 {
 	struct ieee80211_mgmt *mgmt = (void *)skb->data;
 
-	if (!ieee80211_is_public_action((void *)mgmt, skb->len))
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE(ftm))
 		return false;
 
-	if (mgmt->u.action.u.ftm.action_code ==
-		WLAN_PUB_ACTION_FTM_RESPONSE &&
-	    skb->len >= offsetofend(typeof(*mgmt), u.action.u.ftm))
-		return true;
+	if (!ieee80211_is_public_action((void *)mgmt, skb->len))
+		return false;
 
-	return false;
+	return mgmt->u.action.action_code == WLAN_PUB_ACTION_FTM_RESPONSE;
 }
 
 struct element {
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index f301a8622bee..0a2be8cb600f 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -9,7 +9,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2025 Intel Corporation
+ * Copyright (C) 2018-2026 Intel Corporation
  */
 
 /**
@@ -251,19 +251,20 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
 	skb_reserve(skb, local->hw.extra_tx_headroom);
 	mgmt = ieee80211_mgmt_ba(skb, da, sdata);
 
-	skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
+	skb_put(skb, 2 + sizeof(mgmt->u.action.addba_resp));
 	mgmt->u.action.category = WLAN_CATEGORY_BACK;
-	mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
-	mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
+	mgmt->u.action.action_code = WLAN_ACTION_ADDBA_RESP;
+
+	mgmt->u.action.addba_resp.dialog_token = dialog_token;
 
 	capab = u16_encode_bits(amsdu, IEEE80211_ADDBA_PARAM_AMSDU_MASK);
 	capab |= u16_encode_bits(policy, IEEE80211_ADDBA_PARAM_POLICY_MASK);
 	capab |= u16_encode_bits(tid, IEEE80211_ADDBA_PARAM_TID_MASK);
 	capab |= u16_encode_bits(buf_size, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK);
 
-	mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab);
-	mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
-	mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
+	mgmt->u.action.addba_resp.capab = cpu_to_le16(capab);
+	mgmt->u.action.addba_resp.timeout = cpu_to_le16(timeout);
+	mgmt->u.action.addba_resp.status = cpu_to_le16(status);
 
 	if (sta->sta.valid_links || sta->sta.deflink.he_cap.has_he)
 		ieee80211_add_addbaext(skb, req_addba_ext_data, buf_size);
@@ -477,22 +478,22 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	u8 dialog_token, addba_ext_data;
 
 	/* extract session parameters from addba request frame */
-	dialog_token = mgmt->u.action.u.addba_req.dialog_token;
-	timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout);
+	dialog_token = mgmt->u.action.addba_req.dialog_token;
+	timeout = le16_to_cpu(mgmt->u.action.addba_req.timeout);
 	start_seq_num =
-		le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4;
+		le16_to_cpu(mgmt->u.action.addba_req.start_seq_num) >> 4;
 
-	capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
+	capab = le16_to_cpu(mgmt->u.action.addba_req.capab);
 	ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1;
 	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
 	buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
 
 	addba_ext_data =
 		ieee80211_retrieve_addba_ext_data(sta,
-						  mgmt->u.action.u.addba_req.variable,
+						  mgmt->u.action.addba_req.variable,
 						  len -
 						  offsetof(typeof(*mgmt),
-							   u.action.u.addba_req.variable),
+							   u.action.addba_req.variable),
 						  &buf_size);
 
 	__ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 93b47a7ba9c4..d5a62b8d5a80 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -9,7 +9,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2024 Intel Corporation
+ * Copyright (C) 2018-2026 Intel Corporation
  */
 
 #include <linux/ieee80211.h>
@@ -68,7 +68,7 @@ static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid,
 	struct ieee80211_mgmt *mgmt;
 	u16 capab;
 
-	skb = dev_alloc_skb(sizeof(*mgmt) +
+	skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(addba_req) +
 			    2 + sizeof(struct ieee80211_addba_ext_ie) +
 			    local->hw.extra_tx_headroom);
 	if (!skb)
@@ -77,21 +77,21 @@ static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid,
 	skb_reserve(skb, local->hw.extra_tx_headroom);
 	mgmt = ieee80211_mgmt_ba(skb, sta->sta.addr, sdata);
 
-	skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req));
+	skb_put(skb, 2 + sizeof(mgmt->u.action.addba_req));
 
 	mgmt->u.action.category = WLAN_CATEGORY_BACK;
-	mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ;
+	mgmt->u.action.action_code = WLAN_ACTION_ADDBA_REQ;
 
-	mgmt->u.action.u.addba_req.dialog_token = dialog_token;
+	mgmt->u.action.addba_req.dialog_token = dialog_token;
 	capab = IEEE80211_ADDBA_PARAM_AMSDU_MASK;
 	capab |= IEEE80211_ADDBA_PARAM_POLICY_MASK;
 	capab |= u16_encode_bits(tid, IEEE80211_ADDBA_PARAM_TID_MASK);
 	capab |= u16_encode_bits(agg_size, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK);
 
-	mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab);
+	mgmt->u.action.addba_req.capab = cpu_to_le16(capab);
 
-	mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout);
-	mgmt->u.action.u.addba_req.start_seq_num =
+	mgmt->u.action.addba_req.timeout = cpu_to_le16(timeout);
+	mgmt->u.action.addba_req.start_seq_num =
 					cpu_to_le16(start_seq_num << 4);
 
 	if (sta->sta.deflink.he_cap.has_he)
@@ -978,15 +978,15 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 
 	lockdep_assert_wiphy(sta->local->hw.wiphy);
 
-	capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
+	capab = le16_to_cpu(mgmt->u.action.addba_resp.capab);
 	amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK;
 	tid = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_TID_MASK);
 	buf_size = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK);
 
 	ieee80211_retrieve_addba_ext_data(sta,
-					  mgmt->u.action.u.addba_resp.variable,
+					  mgmt->u.action.addba_resp.variable,
 					  len - offsetof(typeof(*mgmt),
-							 u.action.u.addba_resp.variable),
+							 u.action.addba_resp.variable),
 					  &buf_size);
 
 	buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes);
@@ -999,7 +999,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 	if (!tid_tx)
 		return;
 
-	if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) {
+	if (mgmt->u.action.addba_resp.dialog_token != tid_tx->dialog_token) {
 		ht_dbg(sta->sdata, "wrong addBA response token, %pM tid %d\n",
 		       sta->sta.addr, tid);
 		return;
@@ -1029,7 +1029,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 	 * is set to 0, the Buffer Size subfield is set to a value
 	 * of at least 1.
 	 */
-	if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
+	if (le16_to_cpu(mgmt->u.action.addba_resp.status)
 			== WLAN_STATUS_SUCCESS && buf_size) {
 		if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED,
 				     &tid_tx->state)) {
@@ -1046,7 +1046,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 		sta->ampdu_mlme.addba_req_num[tid] = 0;
 
 		tid_tx->timeout =
-			le16_to_cpu(mgmt->u.action.u.addba_resp.timeout);
+			le16_to_cpu(mgmt->u.action.addba_resp.timeout);
 
 		if (tid_tx->timeout) {
 			mod_timer(&tid_tx->session_timer,
diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c
index 078e1e23d8d1..768bfc4e737d 100644
--- a/net/mac80211/eht.c
+++ b/net/mac80211/eht.c
@@ -108,7 +108,7 @@ static void
 ieee80211_send_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata,
 				 struct ieee80211_mgmt *req, int opt_len)
 {
-	int len = offsetofend(struct ieee80211_mgmt, u.action.u.eml_omn);
+	int len = IEEE80211_MIN_ACTION_SIZE(eml_omn);
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_mgmt *mgmt;
 	struct sk_buff *skb;
@@ -127,16 +127,15 @@ ieee80211_send_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata,
 	memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
 
 	mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT;
-	mgmt->u.action.u.eml_omn.action_code =
-		WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF;
-	mgmt->u.action.u.eml_omn.dialog_token =
-		req->u.action.u.eml_omn.dialog_token;
-	mgmt->u.action.u.eml_omn.control = req->u.action.u.eml_omn.control &
+	mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF;
+	mgmt->u.action.eml_omn.dialog_token =
+		req->u.action.eml_omn.dialog_token;
+	mgmt->u.action.eml_omn.control = req->u.action.eml_omn.control &
 		~(IEEE80211_EML_CTRL_EMLSR_PARAM_UPDATE |
 		  IEEE80211_EML_CTRL_INDEV_COEX_ACT);
 	/* Copy optional fields from the received notification frame */
-	memcpy(mgmt->u.action.u.eml_omn.variable,
-	       req->u.action.u.eml_omn.variable, opt_len);
+	memcpy(mgmt->u.action.eml_omn.variable,
+	       req->u.action.eml_omn.variable, opt_len);
 
 	ieee80211_tx_skb(sdata, skb);
 }
@@ -144,14 +143,14 @@ ieee80211_send_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata,
 void ieee80211_rx_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata,
 				    struct sk_buff *skb)
 {
-	int len = offsetofend(struct ieee80211_mgmt, u.action.u.eml_omn);
+	int len = IEEE80211_MIN_ACTION_SIZE(eml_omn);
 	enum nl80211_iftype type = ieee80211_vif_type_p2p(&sdata->vif);
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	const struct wiphy_iftype_ext_capab *ift_ext_capa;
 	struct ieee80211_mgmt *mgmt = (void *)skb->data;
 	struct ieee80211_local *local = sdata->local;
-	u8 control = mgmt->u.action.u.eml_omn.control;
-	u8 *ptr = mgmt->u.action.u.eml_omn.variable;
+	u8 control = mgmt->u.action.eml_omn.control;
+	u8 *ptr = mgmt->u.action.eml_omn.variable;
 	struct ieee80211_eml_params eml_params = {
 		.link_id = status->link_id,
 		.control = control,
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 1c82a28b03de..9e2469a8ce64 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -9,7 +9,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
  * Copyright 2017	Intel Deutschland GmbH
- * Copyright(c) 2020-2025 Intel Corporation
+ * Copyright(c) 2020-2026 Intel Corporation
  */
 
 #include <linux/ieee80211.h>
@@ -462,22 +462,23 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_mgmt *mgmt;
 	u16 params;
 
-	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
+	skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(delba) +
+			    local->hw.extra_tx_headroom);
 	if (!skb)
 		return;
 
 	skb_reserve(skb, local->hw.extra_tx_headroom);
 	mgmt = ieee80211_mgmt_ba(skb, da, sdata);
 
-	skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba));
+	skb_put(skb, 2 + sizeof(mgmt->u.action.delba));
 
 	mgmt->u.action.category = WLAN_CATEGORY_BACK;
-	mgmt->u.action.u.delba.action_code = WLAN_ACTION_DELBA;
+	mgmt->u.action.action_code = WLAN_ACTION_DELBA;
 	params = (u16)(initiator << 11); 	/* bit 11 initiator */
 	params |= (u16)(tid << 12); 		/* bit 15:12 TID number */
 
-	mgmt->u.action.u.delba.params = cpu_to_le16(params);
-	mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code);
+	mgmt->u.action.delba.params = cpu_to_le16(params);
+	mgmt->u.action.delba.reason_code = cpu_to_le16(reason_code);
 
 	ieee80211_tx_skb(sdata, skb);
 }
@@ -489,14 +490,14 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
 	u16 tid, params;
 	u16 initiator;
 
-	params = le16_to_cpu(mgmt->u.action.u.delba.params);
+	params = le16_to_cpu(mgmt->u.action.delba.params);
 	tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12;
 	initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11;
 
 	ht_dbg_ratelimited(sdata, "delba from %pM (%s) tid %d reason code %d\n",
 			   mgmt->sa, initiator ? "initiator" : "recipient",
 			   tid,
-			   le16_to_cpu(mgmt->u.action.u.delba.reason_code));
+			   le16_to_cpu(mgmt->u.action.delba.reason_code));
 
 	if (initiator == WLAN_BACK_INITIATOR)
 		__ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0,
@@ -530,20 +531,20 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_tx_info *info;
 	u8 status_link_id = link_id < 0 ? 0 : link_id;
 
-	/* 27 = header + category + action + smps mode */
-	skb = dev_alloc_skb(27 + local->hw.extra_tx_headroom);
+	skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(ht_smps) +
+			    local->hw.extra_tx_headroom);
 	if (!skb)
 		return -ENOMEM;
 
 	skb_reserve(skb, local->hw.extra_tx_headroom);
-	action_frame = skb_put(skb, 27);
+	action_frame = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(ht_smps));
 	memcpy(action_frame->da, da, ETH_ALEN);
 	memcpy(action_frame->sa, sdata->dev->dev_addr, ETH_ALEN);
 	memcpy(action_frame->bssid, bssid, ETH_ALEN);
 	action_frame->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 						  IEEE80211_STYPE_ACTION);
 	action_frame->u.action.category = WLAN_CATEGORY_HT;
-	action_frame->u.action.u.ht_smps.action = WLAN_HT_ACTION_SMPS;
+	action_frame->u.action.action_code = WLAN_HT_ACTION_SMPS;
 	switch (smps) {
 	case IEEE80211_SMPS_AUTOMATIC:
 	case IEEE80211_SMPS_NUM_MODES:
@@ -551,15 +552,15 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
 		smps = IEEE80211_SMPS_OFF;
 		fallthrough;
 	case IEEE80211_SMPS_OFF:
-		action_frame->u.action.u.ht_smps.smps_control =
+		action_frame->u.action.ht_smps.smps_control =
 				WLAN_HT_SMPS_CONTROL_DISABLED;
 		break;
 	case IEEE80211_SMPS_STATIC:
-		action_frame->u.action.u.ht_smps.smps_control =
+		action_frame->u.action.ht_smps.smps_control =
 				WLAN_HT_SMPS_CONTROL_STATIC;
 		break;
 	case IEEE80211_SMPS_DYNAMIC:
-		action_frame->u.action.u.ht_smps.smps_control =
+		action_frame->u.action.ht_smps.smps_control =
 				WLAN_HT_SMPS_CONTROL_DYNAMIC;
 		break;
 	}
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 168f84a1353b..0298272c37ec 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -9,7 +9,7 @@
  * Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright(c) 2016 Intel Deutschland GmbH
- * Copyright(c) 2018-2025 Intel Corporation
+ * Copyright(c) 2018-2026 Intel Corporation
  */
 
 #include <linux/delay.h>
@@ -888,19 +888,11 @@ ieee80211_rx_mgmt_spectrum_mgmt(struct ieee80211_sub_if_data *sdata,
 				struct ieee80211_rx_status *rx_status,
 				struct ieee802_11_elems *elems)
 {
-	int required_len;
-
-	if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+	if (len < IEEE80211_MIN_ACTION_SIZE(chan_switch))
 		return;
 
 	/* CSA is the only action we handle for now */
-	if (mgmt->u.action.u.measurement.action_code !=
-	    WLAN_ACTION_SPCT_CHL_SWITCH)
-		return;
-
-	required_len = IEEE80211_MIN_ACTION_SIZE +
-		       sizeof(mgmt->u.action.u.chan_switch);
-	if (len < required_len)
+	if (mgmt->u.action.action_code != WLAN_ACTION_SPCT_CHL_SWITCH)
 		return;
 
 	if (!sdata->vif.bss_conf.csa_active)
@@ -1613,12 +1605,12 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 		case WLAN_CATEGORY_SPECTRUM_MGMT:
 			ies_len = skb->len -
 				  offsetof(struct ieee80211_mgmt,
-					   u.action.u.chan_switch.variable);
+					   u.action.chan_switch.variable);
 
 			if (ies_len < 0)
 				break;
 
-			elems = ieee802_11_parse_elems(mgmt->u.action.u.chan_switch.variable,
+			elems = ieee802_11_parse_elems(mgmt->u.action.chan_switch.variable,
 						       ies_len,
 						       IEEE80211_FTYPE_MGMT |
 						       IEEE80211_STYPE_ACTION,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 676b2a43c9f2..2e391cec73a0 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1579,7 +1579,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
 
 		sta = sta_info_get_bss(sdata, mgmt->sa);
 		if (sta) {
-			switch (mgmt->u.action.u.addba_req.action_code) {
+			switch (mgmt->u.action.action_code) {
 			case WLAN_ACTION_ADDBA_REQ:
 				ieee80211_process_addba_request(local, sta,
 								mgmt, len);
@@ -1599,9 +1599,9 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
 		}
 	} else if (ieee80211_is_action(mgmt->frame_control) &&
 		   mgmt->u.action.category == WLAN_CATEGORY_HT) {
-		switch (mgmt->u.action.u.ht_smps.action) {
+		switch (mgmt->u.action.action_code) {
 		case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
-			u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth;
+			u8 chanwidth = mgmt->u.action.ht_notify_cw.chanwidth;
 			struct ieee80211_rx_status *status;
 			struct link_sta_info *link_sta;
 			struct sta_info *sta;
@@ -1628,7 +1628,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
 		}
 	} else if (ieee80211_is_action(mgmt->frame_control) &&
 		   mgmt->u.action.category == WLAN_CATEGORY_VHT) {
-		switch (mgmt->u.action.u.vht_group_notif.action_code) {
+		switch (mgmt->u.action.action_code) {
 		case WLAN_VHT_ACTION_OPMODE_NOTIF: {
 			struct ieee80211_rx_status *status;
 			enum nl80211_band band;
@@ -1637,7 +1637,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
 
 			status = IEEE80211_SKB_RXCB(skb);
 			band = status->band;
-			opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
+			opmode = mgmt->u.action.vht_opmode_notif.operating_mode;
 
 			sta = sta_info_get_bss(sdata, mgmt->sa);
 
@@ -1658,7 +1658,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
 		}
 	} else if (ieee80211_is_action(mgmt->frame_control) &&
 		   mgmt->u.action.category == WLAN_CATEGORY_S1G) {
-		switch (mgmt->u.action.u.s1g.action_code) {
+		switch (mgmt->u.action.action_code) {
 		case WLAN_S1G_TWT_TEARDOWN:
 		case WLAN_S1G_TWT_SETUP:
 			ieee80211_s1g_rx_twt_action(sdata, skb);
@@ -1669,7 +1669,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
 	} else if (ieee80211_is_action(mgmt->frame_control) &&
 		   mgmt->u.action.category == WLAN_CATEGORY_PROTECTED_EHT) {
 		if (sdata->vif.type == NL80211_IFTYPE_AP) {
-			switch (mgmt->u.action.u.eml_omn.action_code) {
+			switch (mgmt->u.action.action_code) {
 			case WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF:
 				ieee80211_rx_eml_op_mode_notif(sdata, skb);
 				break;
@@ -1677,7 +1677,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
 				break;
 			}
 		} else if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-			switch (mgmt->u.action.u.ttlm_req.action_code) {
+			switch (mgmt->u.action.action_code) {
 			case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ:
 				ieee80211_process_neg_ttlm_req(sdata, mgmt,
 							       skb->len);
@@ -1765,7 +1765,7 @@ static void ieee80211_iface_process_status(struct ieee80211_sub_if_data *sdata,
 
 	if (ieee80211_is_action(mgmt->frame_control) &&
 	    mgmt->u.action.category == WLAN_CATEGORY_S1G) {
-		switch (mgmt->u.action.u.s1g.action_code) {
+		switch (mgmt->u.action.action_code) {
 		case WLAN_S1G_TWT_TEARDOWN:
 		case WLAN_S1G_TWT_SETUP:
 			ieee80211_s1g_status_twt_action(sdata, skb);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 28624e57aa49..6696c611dfa4 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2018 - 2025 Intel Corporation
+ * Copyright (C) 2018-2026 Intel Corporation
  * Authors:    Luis Carlos Cobo <luisca@cozybit.com>
  * 	       Javier Cardona <javier@cozybit.com>
  */
@@ -19,8 +19,7 @@ static struct kmem_cache *rm_cache;
 
 bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt)
 {
-	return (mgmt->u.action.u.mesh_action.action_code ==
-			WLAN_MESH_ACTION_HWMP_PATH_SELECTION);
+	return mgmt->u.action.action_code == WLAN_MESH_ACTION_HWMP_PATH_SELECTION;
 }
 
 void ieee80211s_init(void)
@@ -1618,13 +1617,12 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
 	size_t baselen;
 	u8 *pos;
 
-	if (mgmt->u.action.u.measurement.action_code !=
-	    WLAN_ACTION_SPCT_CHL_SWITCH)
+	if (mgmt->u.action.action_code != WLAN_ACTION_SPCT_CHL_SWITCH)
 		return;
 
-	pos = mgmt->u.action.u.chan_switch.variable;
+	pos = mgmt->u.action.chan_switch.variable;
 	baselen = offsetof(struct ieee80211_mgmt,
-			   u.action.u.chan_switch.variable);
+			   u.action.chan_switch.variable);
 	elems = ieee802_11_parse_elems(pos, len - baselen,
 				       IEEE80211_FTYPE_MGMT |
 				       IEEE80211_STYPE_ACTION,
@@ -1670,7 +1668,7 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 {
 	switch (mgmt->u.action.category) {
 	case WLAN_CATEGORY_SELF_PROTECTED:
-		switch (mgmt->u.action.u.self_prot.action_code) {
+		switch (mgmt->u.action.action_code) {
 		case WLAN_SP_MESH_PEERING_OPEN:
 		case WLAN_SP_MESH_PEERING_CLOSE:
 		case WLAN_SP_MESH_PEERING_CONFIRM:
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 98d5aaa36d00..9d89ebcce1c1 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2019, 2021-2023, 2025 Intel Corporation
+ * Copyright (C) 2019, 2021-2023, 2025-2026 Intel Corporation
  * Author:     Luis Carlos Cobo <luisca@cozybit.com>
  */
 
@@ -105,12 +105,11 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 				  u32 lifetime, u32 metric, u32 preq_id,
 				  struct ieee80211_sub_if_data *sdata)
 {
+	int hdr_len = IEEE80211_MIN_ACTION_SIZE(mesh_action);
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos, ie_len;
-	int hdr_len = offsetofend(struct ieee80211_mgmt,
-				  u.action.u.mesh_action);
 
 	skb = dev_alloc_skb(local->tx_headroom +
 			    hdr_len +
@@ -127,8 +126,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 	/* BSSID == SA */
 	memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
 	mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION;
-	mgmt->u.action.u.mesh_action.action_code =
-					WLAN_MESH_ACTION_HWMP_PATH_SELECTION;
+	mgmt->u.action.action_code = WLAN_MESH_ACTION_HWMP_PATH_SELECTION;
 
 	switch (action) {
 	case MPATH_PREQ:
@@ -237,13 +235,12 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
 		       u8 ttl, const u8 *target, u32 target_sn,
 		       u16 target_rcode, const u8 *ra)
 {
+	int hdr_len = IEEE80211_MIN_ACTION_SIZE(mesh_action);
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos, ie_len;
-	int hdr_len = offsetofend(struct ieee80211_mgmt,
-				  u.action.u.mesh_action);
 
 	if (time_before(jiffies, ifmsh->next_perr))
 		return -EAGAIN;
@@ -265,8 +262,7 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
 	/* BSSID == SA */
 	memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
 	mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION;
-	mgmt->u.action.u.mesh_action.action_code =
-					WLAN_MESH_ACTION_HWMP_PATH_SELECTION;
+	mgmt->u.action.action_code = WLAN_MESH_ACTION_HWMP_PATH_SELECTION;
 	ie_len = 15;
 	pos = skb_put(skb, 2 + ie_len);
 	*pos++ = WLAN_EID_PERR;
@@ -938,7 +934,7 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
 	struct sta_info *sta;
 
 	/* need action_code */
-	if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+	if (len < IEEE80211_MIN_ACTION_SIZE(mesh_action))
 		return;
 
 	rcu_read_lock();
@@ -949,8 +945,8 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
 	}
 	rcu_read_unlock();
 
-	baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt;
-	elems = ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable,
+	baselen = mgmt->u.action.mesh_action.variable - (u8 *)mgmt;
+	elems = ieee802_11_parse_elems(mgmt->u.action.mesh_action.variable,
 				       len - baselen,
 				       IEEE80211_FTYPE_MGMT |
 				       IEEE80211_STYPE_ACTION,
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 04c931cd2063..7d823a55636f 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2019, 2021-2025 Intel Corporation
+ * Copyright (C) 2019, 2021-2026 Intel Corporation
  * Author:     Luis Carlos Cobo <luisca@cozybit.com>
  */
 #include <linux/gfp.h>
@@ -13,7 +13,7 @@
 #include "rate.h"
 #include "mesh.h"
 
-#define PLINK_CNF_AID(mgmt) ((mgmt)->u.action.u.self_prot.variable + 2)
+#define PLINK_CNF_AID(mgmt) ((mgmt)->u.action.self_prot.variable + 2)
 #define PLINK_GET_LLID(p) (p + 2)
 #define PLINK_GET_PLID(p) (p + 4)
 
@@ -215,6 +215,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 			       enum ieee80211_self_protected_actioncode action,
 			       u8 *da, u16 llid, u16 plid, u16 reason)
 {
+	int hdr_len = IEEE80211_MIN_ACTION_SIZE(self_prot);
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_tx_info *info;
@@ -223,7 +224,6 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 	u16 peering_proto = 0;
 	u8 *pos, ie_len = 4;
 	u8 ie_len_he_cap, ie_len_eht_cap;
-	int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot);
 	int err = -ENOMEM;
 
 	ie_len_he_cap = ieee80211_ie_len_he_cap(sdata);
@@ -260,7 +260,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
 	mgmt->u.action.category = WLAN_CATEGORY_SELF_PROTECTED;
-	mgmt->u.action.u.self_prot.action_code = action;
+	mgmt->u.action.action_code = action;
 
 	if (action != WLAN_SP_MESH_PEERING_CLOSE) {
 		struct ieee80211_supported_band *sband;
@@ -1141,7 +1141,7 @@ mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata,
 		return;
 	}
 
-	ftype = mgmt->u.action.u.self_prot.action_code;
+	ftype = mgmt->u.action.action_code;
 	if ((ftype == WLAN_SP_MESH_PEERING_OPEN && ie_len != 4) ||
 	    (ftype == WLAN_SP_MESH_PEERING_CONFIRM && ie_len != 6) ||
 	    (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len != 6
@@ -1224,8 +1224,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
 	size_t baselen;
 	u8 *baseaddr;
 
-	/* need action_code, aux */
-	if (len < IEEE80211_MIN_ACTION_SIZE + 3)
+	/* need aux */
+	if (len < IEEE80211_MIN_ACTION_SIZE(self_prot) + 1)
 		return;
 
 	if (sdata->u.mesh.user_mpm)
@@ -1238,10 +1238,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
 		return;
 	}
 
-	baseaddr = mgmt->u.action.u.self_prot.variable;
-	baselen = (u8 *) mgmt->u.action.u.self_prot.variable - (u8 *) mgmt;
-	if (mgmt->u.action.u.self_prot.action_code ==
-						WLAN_SP_MESH_PEERING_CONFIRM) {
+	baseaddr = mgmt->u.action.self_prot.variable;
+	baselen = mgmt->u.action.self_prot.variable - (u8 *)mgmt;
+	if (mgmt->u.action.action_code == WLAN_SP_MESH_PEERING_CONFIRM) {
 		baseaddr += 4;
 		baselen += 4;
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 170330d924a3..da79df92994d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -7957,7 +7957,7 @@ ieee80211_send_neg_ttlm_req(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_mgmt *mgmt;
 	struct sk_buff *skb;
-	int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_req);
+	int hdr_len = IEEE80211_MIN_ACTION_SIZE(ttlm_req);
 	int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 +
 		2 * 2 * IEEE80211_TTLM_NUM_TIDS;
 
@@ -7974,9 +7974,8 @@ ieee80211_send_neg_ttlm_req(struct ieee80211_sub_if_data *sdata,
 	memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
 
 	mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT;
-	mgmt->u.action.u.ttlm_req.action_code =
-		WLAN_PROTECTED_EHT_ACTION_TTLM_REQ;
-	mgmt->u.action.u.ttlm_req.dialog_token = dialog_token;
+	mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_TTLM_REQ;
+	mgmt->u.action.ttlm_req.dialog_token = dialog_token;
 	ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm);
 	ieee80211_tx_skb(sdata, skb);
 }
@@ -8026,7 +8025,7 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_mgmt *mgmt;
 	struct sk_buff *skb;
-	int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_res);
+	int hdr_len = IEEE80211_MIN_ACTION_SIZE(ttlm_res);
 	int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 +
 		2 * 2 * IEEE80211_TTLM_NUM_TIDS;
 	u16 status_code;
@@ -8044,9 +8043,8 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
 	memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
 
 	mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT;
-	mgmt->u.action.u.ttlm_res.action_code =
-		WLAN_PROTECTED_EHT_ACTION_TTLM_RES;
-	mgmt->u.action.u.ttlm_res.dialog_token = dialog_token;
+	mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_TTLM_RES;
+	mgmt->u.action.ttlm_res.dialog_token = dialog_token;
 	switch (ttlm_res) {
 	default:
 		WARN_ON(1);
@@ -8063,7 +8061,7 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
 		break;
 	}
 
-	mgmt->u.action.u.ttlm_res.status_code = cpu_to_le16(status_code);
+	mgmt->u.action.ttlm_res.status_code = cpu_to_le16(status_code);
 	ieee80211_tx_skb(sdata, skb);
 }
 
@@ -8163,10 +8161,9 @@ void ieee80211_process_neg_ttlm_req(struct ieee80211_sub_if_data *sdata,
 	if (!ieee80211_vif_is_mld(&sdata->vif))
 		return;
 
-	dialog_token = mgmt->u.action.u.ttlm_req.dialog_token;
-	ies_len  = len - offsetof(struct ieee80211_mgmt,
-				  u.action.u.ttlm_req.variable);
-	elems = ieee802_11_parse_elems(mgmt->u.action.u.ttlm_req.variable,
+	dialog_token = mgmt->u.action.ttlm_req.dialog_token;
+	ies_len  = len - IEEE80211_MIN_ACTION_SIZE(ttlm_req);
+	elems = ieee802_11_parse_elems(mgmt->u.action.ttlm_req.variable,
 				       ies_len,
 				       IEEE80211_FTYPE_MGMT |
 				       IEEE80211_STYPE_ACTION,
@@ -8217,8 +8214,7 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_mgmt *mgmt, size_t len)
 {
 	if (!ieee80211_vif_is_mld(&sdata->vif) ||
-	    mgmt->u.action.u.ttlm_req.dialog_token !=
-	    sdata->u.mgd.dialog_token_alloc)
+	    mgmt->u.action.ttlm_res.dialog_token != sdata->u.mgd.dialog_token_alloc)
 		return;
 
 	wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
@@ -8232,7 +8228,7 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
 	 * This can be better implemented in the future, to handle request
 	 * rejections.
 	 */
-	if (le16_to_cpu(mgmt->u.action.u.ttlm_res.status_code) != WLAN_STATUS_SUCCESS)
+	if (le16_to_cpu(mgmt->u.action.ttlm_res.status_code) != WLAN_STATUS_SUCCESS)
 		__ieee80211_disconnect(sdata);
 }
 
@@ -8265,12 +8261,11 @@ static void ieee80211_teardown_ttlm_work(struct wiphy *wiphy,
 
 void ieee80211_send_teardown_neg_ttlm(struct ieee80211_vif *vif)
 {
+	int frame_len = IEEE80211_MIN_ACTION_SIZE(ttlm_tear_down);
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_mgmt *mgmt;
 	struct sk_buff *skb;
-	int frame_len = offsetofend(struct ieee80211_mgmt,
-				  u.action.u.ttlm_tear_down);
 	struct ieee80211_tx_info *info;
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + frame_len);
@@ -8286,8 +8281,7 @@ void ieee80211_send_teardown_neg_ttlm(struct ieee80211_vif *vif)
 	memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
 
 	mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT;
-	mgmt->u.action.u.ttlm_tear_down.action_code =
-		WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN;
+	mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN;
 
 	info = IEEE80211_SKB_CB(skb);
 	info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
@@ -8370,13 +8364,13 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 		case WLAN_CATEGORY_SPECTRUM_MGMT:
 			ies_len = skb->len -
 				  offsetof(struct ieee80211_mgmt,
-					   u.action.u.chan_switch.variable);
+					   u.action.chan_switch.variable);
 
 			if (ies_len < 0)
 				break;
 
 			/* CSA IE cannot be overridden, no need for BSSID */
-			elems = ieee802_11_parse_elems(mgmt->u.action.u.chan_switch.variable,
+			elems = ieee802_11_parse_elems(mgmt->u.action.chan_switch.variable,
 						       ies_len,
 						       IEEE80211_FTYPE_MGMT |
 						       IEEE80211_STYPE_ACTION,
@@ -8398,7 +8392,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 		case WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION:
 			ies_len = skb->len -
 				  offsetof(struct ieee80211_mgmt,
-					   u.action.u.ext_chan_switch.variable);
+					   u.action.ext_chan_switch.variable);
 
 			if (ies_len < 0)
 				break;
@@ -8407,7 +8401,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 			 * extended CSA IE can't be overridden, no need for
 			 * BSSID
 			 */
-			elems = ieee802_11_parse_elems(mgmt->u.action.u.ext_chan_switch.variable,
+			elems = ieee802_11_parse_elems(mgmt->u.action.ext_chan_switch.variable,
 						       ies_len,
 						       IEEE80211_FTYPE_MGMT |
 						       IEEE80211_STYPE_ACTION,
@@ -8424,7 +8418,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 
 				/* for the handling code pretend it was an IE */
 				elems->ext_chansw_ie =
-					&mgmt->u.action.u.ext_chan_switch.data;
+					&mgmt->u.action.ext_chan_switch.data;
 
 				ieee80211_sta_process_chanswitch(link,
 								 rx_status->mactime,
@@ -10426,25 +10420,25 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata,
 	u8 *pos;
 
 	if (!ieee80211_vif_is_mld(&sdata->vif) ||
-	    len < offsetofend(typeof(*mgmt), u.action.u.ml_reconf_resp) ||
-	    mgmt->u.action.u.ml_reconf_resp.dialog_token !=
-	    sdata->u.mgd.reconf.dialog_token ||
+	    len < IEEE80211_MIN_ACTION_SIZE(ml_reconf_resp) ||
+	    mgmt->u.action.ml_reconf_resp.dialog_token !=
+		sdata->u.mgd.reconf.dialog_token ||
 	    !sta_changed_links)
 		return;
 
-	pos = mgmt->u.action.u.ml_reconf_resp.variable;
-	len -= offsetofend(typeof(*mgmt), u.action.u.ml_reconf_resp);
+	pos = mgmt->u.action.ml_reconf_resp.variable;
+	len -= offsetofend(typeof(*mgmt), u.action.ml_reconf_resp);
 
 	/* each status duple is 3 octets */
-	if (len < mgmt->u.action.u.ml_reconf_resp.count * 3) {
+	if (len < mgmt->u.action.ml_reconf_resp.count * 3) {
 		sdata_info(sdata,
 			   "mlo: reconf: unexpected len=%zu, count=%u\n",
-			   len, mgmt->u.action.u.ml_reconf_resp.count);
+			   len, mgmt->u.action.ml_reconf_resp.count);
 		goto disconnect;
 	}
 
 	link_mask = sta_changed_links;
-	for (i = 0; i < mgmt->u.action.u.ml_reconf_resp.count; i++) {
+	for (i = 0; i < mgmt->u.action.ml_reconf_resp.count; i++) {
 		u16 status = get_unaligned_le16(pos + 1);
 
 		link_id = *pos;
@@ -10729,8 +10723,7 @@ ieee80211_build_ml_reconf_req(struct ieee80211_sub_if_data *sdata,
 		return NULL;
 
 	skb_reserve(skb, local->hw.extra_tx_headroom);
-	mgmt = skb_put_zero(skb, offsetofend(struct ieee80211_mgmt,
-					     u.action.u.ml_reconf_req));
+	mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(ml_reconf_req));
 
 	/* Add the MAC header */
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
@@ -10741,12 +10734,11 @@ ieee80211_build_ml_reconf_req(struct ieee80211_sub_if_data *sdata,
 
 	/* Add the action frame fixed fields */
 	mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT;
-	mgmt->u.action.u.ml_reconf_req.action_code =
-		WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_REQ;
+	mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_REQ;
 
 	/* allocate a dialog token and store it */
 	sdata->u.mgd.reconf.dialog_token = ++sdata->u.mgd.dialog_token_alloc;
-	mgmt->u.action.u.ml_reconf_req.dialog_token =
+	mgmt->u.action.ml_reconf_req.dialog_token =
 		sdata->u.mgd.reconf.dialog_token;
 
 	/* Add the ML reconfiguration element and the common information  */
@@ -11116,11 +11108,10 @@ static bool ieee80211_mgd_epcs_supp(struct ieee80211_sub_if_data *sdata)
 
 int ieee80211_mgd_set_epcs(struct ieee80211_sub_if_data *sdata, bool enable)
 {
+	int frame_len = IEEE80211_MIN_ACTION_SIZE(epcs) + (enable ? 1 : 0);
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_mgmt *mgmt;
 	struct sk_buff *skb;
-	int frame_len = offsetofend(struct ieee80211_mgmt,
-				    u.action.u.epcs) + (enable ? 1 : 0);
 
 	if (!ieee80211_mgd_epcs_supp(sdata))
 		return -EINVAL;
@@ -11149,15 +11140,15 @@ int ieee80211_mgd_set_epcs(struct ieee80211_sub_if_data *sdata, bool enable)
 
 	mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT;
 	if (enable) {
-		u8 *pos = mgmt->u.action.u.epcs.variable;
+		u8 *pos = mgmt->u.action.epcs.variable;
 
-		mgmt->u.action.u.epcs.action_code =
+		mgmt->u.action.action_code =
 			WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_REQ;
 
 		*pos = ++sdata->u.mgd.dialog_token_alloc;
 		sdata->u.mgd.epcs.dialog_token = *pos;
 	} else {
-		mgmt->u.action.u.epcs.action_code =
+		mgmt->u.action.action_code =
 			WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_TEARDOWN;
 
 		ieee80211_epcs_teardown(sdata);
@@ -11246,7 +11237,7 @@ void ieee80211_process_epcs_ena_resp(struct ieee80211_sub_if_data *sdata,
 		return;
 
 	/* Handle dialog token and status code */
-	pos = mgmt->u.action.u.epcs.variable;
+	pos = mgmt->u.action.epcs.variable;
 	dialog_token = *pos;
 	status_code = get_unaligned_le16(pos + 1);
 
@@ -11268,8 +11259,7 @@ void ieee80211_process_epcs_ena_resp(struct ieee80211_sub_if_data *sdata,
 		return;
 
 	pos += IEEE80211_EPCS_ENA_RESP_BODY_LEN;
-	ies_len = len - offsetof(struct ieee80211_mgmt,
-				 u.action.u.epcs.variable) -
+	ies_len = len - IEEE80211_MIN_ACTION_SIZE(epcs) -
 		IEEE80211_EPCS_ENA_RESP_BODY_LEN;
 
 	elems = ieee802_11_parse_elems(pos, ies_len,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6c4b549444c6..3bd046bebf9e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -274,7 +274,7 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
 	if (!sdata)
 		return;
 
-	BUILD_BUG_ON(sizeof(action) != IEEE80211_MIN_ACTION_SIZE + 1);
+	BUILD_BUG_ON(sizeof(action) != IEEE80211_MIN_ACTION_SIZE(action_code));
 
 	if (skb->len < rtap_space + sizeof(action) +
 		       VHT_MUMIMO_GROUPS_DATA_LEN)
@@ -1162,7 +1162,7 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 			u8 category;
 
 			/* make sure category field is present */
-			if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE)
+			if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE(category))
 				return RX_DROP_U_RUNT_ACTION;
 
 			mgmt = (struct ieee80211_mgmt *)hdr;
@@ -3422,7 +3422,7 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
 		return;
 	}
 
-	if (len < 24 + 1 + sizeof(resp->u.action.u.sa_query)) {
+	if (len < IEEE80211_MIN_ACTION_SIZE(sa_query)) {
 		/* Too short SA Query request frame */
 		return;
 	}
@@ -3432,17 +3432,16 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
 		return;
 
 	skb_reserve(skb, local->hw.extra_tx_headroom);
-	resp = skb_put_zero(skb, 24);
+	resp = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(sa_query));
 	memcpy(resp->da, sdata->vif.cfg.ap_addr, ETH_ALEN);
 	memcpy(resp->sa, sdata->vif.addr, ETH_ALEN);
 	memcpy(resp->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
 	resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					  IEEE80211_STYPE_ACTION);
-	skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query));
 	resp->u.action.category = WLAN_CATEGORY_SA_QUERY;
-	resp->u.action.u.sa_query.action = WLAN_ACTION_SA_QUERY_RESPONSE;
-	memcpy(resp->u.action.u.sa_query.trans_id,
-	       mgmt->u.action.u.sa_query.trans_id,
+	resp->u.action.action_code = WLAN_ACTION_SA_QUERY_RESPONSE;
+	memcpy(resp->u.action.sa_query.trans_id,
+	       mgmt->u.action.sa_query.trans_id,
 	       WLAN_SA_QUERY_TR_ID_LEN);
 
 	ieee80211_tx_skb(sdata, skb);
@@ -3516,7 +3515,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
 
 	/* drop too small action frames */
 	if (ieee80211_is_action(mgmt->frame_control) &&
-	    rx->skb->len < IEEE80211_MIN_ACTION_SIZE)
+	    rx->skb->len < IEEE80211_MIN_ACTION_SIZE(category))
 		return RX_DROP_U_RUNT_ACTION;
 
 	/* Drop non-broadcast Beacon frames */
@@ -3565,29 +3564,28 @@ ieee80211_process_rx_twt_action(struct ieee80211_rx_data *rx)
 	if (!rx->sta)
 		return false;
 
-	switch (mgmt->u.action.u.s1g.action_code) {
+	switch (mgmt->u.action.action_code) {
 	case WLAN_S1G_TWT_SETUP: {
 		struct ieee80211_twt_setup *twt;
 
-		if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE +
-				   1 + /* action code */
+		if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE(action_code) +
 				   sizeof(struct ieee80211_twt_setup) +
 				   2 /* TWT req_type agrt */)
 			break;
 
-		twt = (void *)mgmt->u.action.u.s1g.variable;
+		twt = (void *)mgmt->u.action.s1g.variable;
 		if (twt->element_id != WLAN_EID_S1G_TWT)
 			break;
 
-		if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE +
-				   4 + /* action code + token + tlv */
+		if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE(action_code) +
+				   3 + /* token + tlv */
 				   twt->length)
 			break;
 
 		return true; /* queue the frame */
 	}
 	case WLAN_S1G_TWT_TEARDOWN:
-		if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + 2)
+		if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE(action_code) + 1)
 			break;
 
 		return true; /* queue the frame */
@@ -3632,10 +3630,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			break;
 
 		/* verify action & smps_control/chanwidth are present */
-		if (len < IEEE80211_MIN_ACTION_SIZE + 2)
+		if (len < IEEE80211_MIN_ACTION_SIZE(ht_smps))
 			goto invalid;
 
-		switch (mgmt->u.action.u.ht_smps.action) {
+		switch (mgmt->u.action.action_code) {
 		case WLAN_HT_ACTION_SMPS: {
 			struct ieee80211_supported_band *sband;
 			enum ieee80211_smps_mode smps_mode;
@@ -3646,7 +3644,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 				goto handled;
 
 			/* convert to HT capability */
-			switch (mgmt->u.action.u.ht_smps.smps_control) {
+			switch (mgmt->u.action.ht_smps.smps_control) {
 			case WLAN_HT_SMPS_CONTROL_DISABLED:
 				smps_mode = IEEE80211_SMPS_OFF;
 				break;
@@ -3679,7 +3677,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			goto handled;
 		}
 		case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
-			u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth;
+			u8 chanwidth = mgmt->u.action.ht_notify_cw.chanwidth;
 
 			if (chanwidth != IEEE80211_HT_CHANWIDTH_20MHZ &&
 			    chanwidth != IEEE80211_HT_CHANWIDTH_ANY)
@@ -3699,7 +3697,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		break;
 	case WLAN_CATEGORY_PUBLIC:
 	case WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION:
-		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+		if (len < IEEE80211_MIN_ACTION_SIZE(action_code))
 			goto invalid;
 		if (sdata->vif.type != NL80211_IFTYPE_STATION)
 			break;
@@ -3707,11 +3705,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			break;
 		if (!ether_addr_equal(mgmt->bssid, sdata->deflink.u.mgd.bssid))
 			break;
-		if (mgmt->u.action.u.ext_chan_switch.action_code !=
+		if (mgmt->u.action.action_code !=
 				WLAN_PUB_ACTION_EXT_CHANSW_ANN)
 			break;
-		if (len < offsetof(struct ieee80211_mgmt,
-				   u.action.u.ext_chan_switch.variable))
+		if (len < IEEE80211_MIN_ACTION_SIZE(ext_chan_switch))
 			goto invalid;
 		goto queue;
 	case WLAN_CATEGORY_VHT:
@@ -3723,18 +3720,18 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			break;
 
 		/* verify action code is present */
-		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+		if (len < IEEE80211_MIN_ACTION_SIZE(action_code))
 			goto invalid;
 
-		switch (mgmt->u.action.u.vht_opmode_notif.action_code) {
+		switch (mgmt->u.action.action_code) {
 		case WLAN_VHT_ACTION_OPMODE_NOTIF: {
 			/* verify opmode is present */
-			if (len < IEEE80211_MIN_ACTION_SIZE + 2)
+			if (len < IEEE80211_MIN_ACTION_SIZE(vht_opmode_notif))
 				goto invalid;
 			goto queue;
 		}
 		case WLAN_VHT_ACTION_GROUPID_MGMT: {
-			if (len < IEEE80211_MIN_ACTION_SIZE + 25)
+			if (len < IEEE80211_MIN_ACTION_SIZE(vht_group_notif))
 				goto invalid;
 			goto queue;
 		}
@@ -3751,23 +3748,20 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			break;
 
 		/* verify action_code is present */
-		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+		if (len < IEEE80211_MIN_ACTION_SIZE(action_code))
 			break;
 
-		switch (mgmt->u.action.u.addba_req.action_code) {
+		switch (mgmt->u.action.action_code) {
 		case WLAN_ACTION_ADDBA_REQ:
-			if (len < (IEEE80211_MIN_ACTION_SIZE +
-				   sizeof(mgmt->u.action.u.addba_req)))
+			if (len < IEEE80211_MIN_ACTION_SIZE(addba_req))
 				goto invalid;
 			break;
 		case WLAN_ACTION_ADDBA_RESP:
-			if (len < (IEEE80211_MIN_ACTION_SIZE +
-				   sizeof(mgmt->u.action.u.addba_resp)))
+			if (len < IEEE80211_MIN_ACTION_SIZE(addba_resp))
 				goto invalid;
 			break;
 		case WLAN_ACTION_DELBA:
-			if (len < (IEEE80211_MIN_ACTION_SIZE +
-				   sizeof(mgmt->u.action.u.delba)))
+			if (len < IEEE80211_MIN_ACTION_SIZE(delba))
 				goto invalid;
 			break;
 		default:
@@ -3777,16 +3771,15 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		goto queue;
 	case WLAN_CATEGORY_SPECTRUM_MGMT:
 		/* verify action_code is present */
-		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+		if (len < IEEE80211_MIN_ACTION_SIZE(action_code))
 			break;
 
-		switch (mgmt->u.action.u.measurement.action_code) {
+		switch (mgmt->u.action.action_code) {
 		case WLAN_ACTION_SPCT_MSR_REQ:
 			if (status->band != NL80211_BAND_5GHZ)
 				break;
 
-			if (len < (IEEE80211_MIN_ACTION_SIZE +
-				   sizeof(mgmt->u.action.u.measurement)))
+			if (len < IEEE80211_MIN_ACTION_SIZE(measurement))
 				break;
 
 			if (sdata->vif.type != NL80211_IFTYPE_STATION)
@@ -3796,8 +3789,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			goto handled;
 		case WLAN_ACTION_SPCT_CHL_SWITCH: {
 			u8 *bssid;
-			if (len < (IEEE80211_MIN_ACTION_SIZE +
-				   sizeof(mgmt->u.action.u.chan_switch)))
+			if (len < IEEE80211_MIN_ACTION_SIZE(chan_switch))
 				break;
 
 			if (sdata->vif.type != NL80211_IFTYPE_STATION &&
@@ -3822,11 +3814,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		}
 		break;
 	case WLAN_CATEGORY_SELF_PROTECTED:
-		if (len < (IEEE80211_MIN_ACTION_SIZE +
-			   sizeof(mgmt->u.action.u.self_prot.action_code)))
+		if (len < IEEE80211_MIN_ACTION_SIZE(self_prot))
 			break;
 
-		switch (mgmt->u.action.u.self_prot.action_code) {
+		switch (mgmt->u.action.action_code) {
 		case WLAN_SP_MESH_PEERING_OPEN:
 		case WLAN_SP_MESH_PEERING_CLOSE:
 		case WLAN_SP_MESH_PEERING_CONFIRM:
@@ -3844,8 +3835,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		}
 		break;
 	case WLAN_CATEGORY_MESH_ACTION:
-		if (len < (IEEE80211_MIN_ACTION_SIZE +
-			   sizeof(mgmt->u.action.u.mesh_action.action_code)))
+		if (len < IEEE80211_MIN_ACTION_SIZE(action_code))
 			break;
 
 		if (!ieee80211_vif_is_mesh(&sdata->vif))
@@ -3855,11 +3845,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			break;
 		goto queue;
 	case WLAN_CATEGORY_S1G:
-		if (len < offsetofend(typeof(*mgmt),
-				      u.action.u.s1g.action_code))
+		if (len < IEEE80211_MIN_ACTION_SIZE(action_code))
 			break;
 
-		switch (mgmt->u.action.u.s1g.action_code) {
+		switch (mgmt->u.action.action_code) {
 		case WLAN_S1G_TWT_SETUP:
 		case WLAN_S1G_TWT_TEARDOWN:
 			if (ieee80211_process_rx_twt_action(rx))
@@ -3870,33 +3859,29 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		}
 		break;
 	case WLAN_CATEGORY_PROTECTED_EHT:
-		if (len < offsetofend(typeof(*mgmt),
-				      u.action.u.ttlm_req.action_code))
+		if (len < IEEE80211_MIN_ACTION_SIZE(action_code))
 			break;
 
-		switch (mgmt->u.action.u.ttlm_req.action_code) {
+		switch (mgmt->u.action.action_code) {
 		case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ:
 			if (sdata->vif.type != NL80211_IFTYPE_STATION)
 				break;
 
-			if (len < offsetofend(typeof(*mgmt),
-					      u.action.u.ttlm_req))
+			if (len < IEEE80211_MIN_ACTION_SIZE(ttlm_req))
 				goto invalid;
 			goto queue;
 		case WLAN_PROTECTED_EHT_ACTION_TTLM_RES:
 			if (sdata->vif.type != NL80211_IFTYPE_STATION)
 				break;
 
-			if (len < offsetofend(typeof(*mgmt),
-					      u.action.u.ttlm_res))
+			if (len < IEEE80211_MIN_ACTION_SIZE(ttlm_res))
 				goto invalid;
 			goto queue;
 		case WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN:
 			if (sdata->vif.type != NL80211_IFTYPE_STATION)
 				break;
 
-			if (len < offsetofend(typeof(*mgmt),
-					      u.action.u.ttlm_tear_down))
+			if (len < IEEE80211_MIN_ACTION_SIZE(ttlm_tear_down))
 				goto invalid;
 			goto queue;
 		case WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_RESP:
@@ -3906,34 +3891,29 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			/* The reconfiguration response action frame must
 			 * least one 'Status Duple' entry (3 octets)
 			 */
-			if (len <
-			    offsetofend(typeof(*mgmt),
-					u.action.u.ml_reconf_resp) + 3)
+			if (len < IEEE80211_MIN_ACTION_SIZE(ml_reconf_resp) + 3)
 				goto invalid;
 			goto queue;
 		case WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_RESP:
 			if (sdata->vif.type != NL80211_IFTYPE_STATION)
 				break;
 
-			if (len < offsetofend(typeof(*mgmt),
-					      u.action.u.epcs) +
-			    IEEE80211_EPCS_ENA_RESP_BODY_LEN)
+			if (len < IEEE80211_MIN_ACTION_SIZE(epcs) +
+				  IEEE80211_EPCS_ENA_RESP_BODY_LEN)
 				goto invalid;
 			goto queue;
 		case WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_TEARDOWN:
 			if (sdata->vif.type != NL80211_IFTYPE_STATION)
 				break;
 
-			if (len < offsetofend(typeof(*mgmt),
-					      u.action.u.epcs))
+			if (len < IEEE80211_MIN_ACTION_SIZE(epcs))
 				goto invalid;
 			goto queue;
 		case WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF:
 			if (sdata->vif.type != NL80211_IFTYPE_AP)
 				break;
 
-			if (len < offsetofend(typeof(*mgmt),
-					      u.action.u.eml_omn))
+			if (len < IEEE80211_MIN_ACTION_SIZE(eml_omn))
 				goto invalid;
 			goto queue;
 		default:
@@ -4015,11 +3995,10 @@ ieee80211_rx_h_action_post_userspace(struct ieee80211_rx_data *rx)
 
 	switch (mgmt->u.action.category) {
 	case WLAN_CATEGORY_SA_QUERY:
-		if (len < (IEEE80211_MIN_ACTION_SIZE +
-			   sizeof(mgmt->u.action.u.sa_query)))
+		if (len < IEEE80211_MIN_ACTION_SIZE(sa_query))
 			break;
 
-		switch (mgmt->u.action.u.sa_query.action) {
+		switch (mgmt->u.action.action_code) {
 		case WLAN_ACTION_SA_QUERY_REQUEST:
 			if (sdata->vif.type != NL80211_IFTYPE_STATION)
 				break;
diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c
index 1f68df6e8067..297abaa6fecf 100644
--- a/net/mac80211/s1g.c
+++ b/net/mac80211/s1g.c
@@ -2,7 +2,7 @@
 /*
  * S1G handling
  * Copyright(c) 2020 Adapt-IP
- * Copyright (C) 2023 Intel Corporation
+ * Copyright (C) 2023, 2026 Intel Corporation
  */
 #include <linux/ieee80211.h>
 #include <net/mac80211.h>
@@ -27,14 +27,14 @@ bool ieee80211_s1g_is_twt_setup(struct sk_buff *skb)
 	if (likely(mgmt->u.action.category != WLAN_CATEGORY_S1G))
 		return false;
 
-	return mgmt->u.action.u.s1g.action_code == WLAN_S1G_TWT_SETUP;
+	return mgmt->u.action.action_code == WLAN_S1G_TWT_SETUP;
 }
 
 static void
 ieee80211_s1g_send_twt_setup(struct ieee80211_sub_if_data *sdata, const u8 *da,
 			     const u8 *bssid, struct ieee80211_twt_setup *twt)
 {
-	int len = IEEE80211_MIN_ACTION_SIZE + 4 + twt->length;
+	int len = IEEE80211_MIN_ACTION_SIZE(s1g) + 3 + twt->length;
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_mgmt *mgmt;
 	struct sk_buff *skb;
@@ -52,8 +52,8 @@ ieee80211_s1g_send_twt_setup(struct ieee80211_sub_if_data *sdata, const u8 *da,
 	memcpy(mgmt->bssid, bssid, ETH_ALEN);
 
 	mgmt->u.action.category = WLAN_CATEGORY_S1G;
-	mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_SETUP;
-	memcpy(mgmt->u.action.u.s1g.variable, twt, 3 + twt->length);
+	mgmt->u.action.action_code = WLAN_S1G_TWT_SETUP;
+	memcpy(mgmt->u.action.s1g.variable, twt, 3 + twt->length);
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
 					IEEE80211_TX_INTFL_MLME_CONN_TX |
@@ -71,12 +71,12 @@ ieee80211_s1g_send_twt_teardown(struct ieee80211_sub_if_data *sdata,
 	u8 *id;
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
-			    IEEE80211_MIN_ACTION_SIZE + 2);
+			    IEEE80211_MIN_ACTION_SIZE(s1g) + 1);
 	if (!skb)
 		return;
 
 	skb_reserve(skb, local->hw.extra_tx_headroom);
-	mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE + 2);
+	mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(s1g) + 1);
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					  IEEE80211_STYPE_ACTION);
 	memcpy(mgmt->da, da, ETH_ALEN);
@@ -84,8 +84,8 @@ ieee80211_s1g_send_twt_teardown(struct ieee80211_sub_if_data *sdata,
 	memcpy(mgmt->bssid, bssid, ETH_ALEN);
 
 	mgmt->u.action.category = WLAN_CATEGORY_S1G;
-	mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_TEARDOWN;
-	id = (u8 *)mgmt->u.action.u.s1g.variable;
+	mgmt->u.action.action_code = WLAN_S1G_TWT_TEARDOWN;
+	id = (u8 *)mgmt->u.action.s1g.variable;
 	*id = flowid;
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
@@ -98,7 +98,7 @@ ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata,
 			   struct sta_info *sta, struct sk_buff *skb)
 {
 	struct ieee80211_mgmt *mgmt = (void *)skb->data;
-	struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable;
+	struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.s1g.variable;
 	struct ieee80211_twt_params *twt_agrt = (void *)twt->params;
 
 	twt_agrt->req_type &= cpu_to_le16(~IEEE80211_TWT_REQTYPE_REQUEST);
@@ -128,7 +128,7 @@ ieee80211_s1g_rx_twt_teardown(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
 
 	drv_twt_teardown_request(sdata->local, sdata, &sta->sta,
-				 mgmt->u.action.u.s1g.variable[0]);
+				 mgmt->u.action.s1g.variable[0]);
 }
 
 static void
@@ -136,7 +136,7 @@ ieee80211_s1g_tx_twt_setup_fail(struct ieee80211_sub_if_data *sdata,
 				struct sta_info *sta, struct sk_buff *skb)
 {
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
-	struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable;
+	struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.s1g.variable;
 	struct ieee80211_twt_params *twt_agrt = (void *)twt->params;
 	u8 flowid = le16_get_bits(twt_agrt->req_type,
 				  IEEE80211_TWT_REQTYPE_FLOWID);
@@ -160,7 +160,7 @@ void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata,
 	if (!sta)
 		return;
 
-	switch (mgmt->u.action.u.s1g.action_code) {
+	switch (mgmt->u.action.action_code) {
 	case WLAN_S1G_TWT_SETUP:
 		ieee80211_s1g_rx_twt_setup(sdata, sta, skb);
 		break;
@@ -185,7 +185,7 @@ void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata,
 	if (!sta)
 		return;
 
-	switch (mgmt->u.action.u.s1g.action_code) {
+	switch (mgmt->u.action.action_code) {
 	case WLAN_S1G_TWT_SETUP:
 		/* process failed twt setup frames */
 		ieee80211_s1g_tx_twt_setup_fail(sdata, sta, skb);
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 7422888d3640..e2eaf8d8d7ff 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -9,7 +9,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2008, Intel Corporation
  * Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018, 2020, 2022-2024 Intel Corporation
+ * Copyright (C) 2018, 2020, 2022-2024, 2026 Intel Corporation
  */
 
 #include <linux/ieee80211.h>
@@ -409,35 +409,30 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *msr_report;
 
-	skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom +
-				sizeof(struct ieee80211_msrment_ie));
+	skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(measurement) +
+			    local->hw.extra_tx_headroom);
 	if (!skb)
 		return;
 
 	skb_reserve(skb, local->hw.extra_tx_headroom);
-	msr_report = skb_put_zero(skb, 24);
+	msr_report = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(measurement));
 	memcpy(msr_report->da, da, ETH_ALEN);
 	memcpy(msr_report->sa, sdata->vif.addr, ETH_ALEN);
 	memcpy(msr_report->bssid, bssid, ETH_ALEN);
 	msr_report->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 						IEEE80211_STYPE_ACTION);
 
-	skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement));
 	msr_report->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT;
-	msr_report->u.action.u.measurement.action_code =
-				WLAN_ACTION_SPCT_MSR_RPRT;
-	msr_report->u.action.u.measurement.dialog_token = dialog_token;
+	msr_report->u.action.action_code = WLAN_ACTION_SPCT_MSR_RPRT;
 
-	msr_report->u.action.u.measurement.element_id = WLAN_EID_MEASURE_REPORT;
-	msr_report->u.action.u.measurement.length =
+	msr_report->u.action.measurement.dialog_token = dialog_token;
+	msr_report->u.action.measurement.element_id = WLAN_EID_MEASURE_REPORT;
+	msr_report->u.action.measurement.length =
 			sizeof(struct ieee80211_msrment_ie);
-
-	memset(&msr_report->u.action.u.measurement.msr_elem, 0,
-		sizeof(struct ieee80211_msrment_ie));
-	msr_report->u.action.u.measurement.msr_elem.token = request_ie->token;
-	msr_report->u.action.u.measurement.msr_elem.mode |=
+	msr_report->u.action.measurement.msr_elem.token = request_ie->token;
+	msr_report->u.action.measurement.msr_elem.mode |=
 			IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED;
-	msr_report->u.action.u.measurement.msr_elem.type = request_ie->type;
+	msr_report->u.action.measurement.msr_elem.type = request_ie->type;
 
 	ieee80211_tx_skb(sdata, skb);
 }
@@ -454,7 +449,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
 	 * TODO: Answer basic measurement as unmeasured
 	 */
 	ieee80211_send_refuse_measurement_request(sdata,
-			&mgmt->u.action.u.measurement.msr_elem,
+			&mgmt->u.action.measurement.msr_elem,
 			mgmt->sa, mgmt->bssid,
-			mgmt->u.action.u.measurement.dialog_token);
+			mgmt->u.action.measurement.dialog_token);
 }
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index dbbfe2d6842f..1f30a4eda374 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -6,7 +6,7 @@
  * Copyright 2014, Intel Corporation
  * Copyright 2014  Intel Mobile Communications GmbH
  * Copyright 2015 - 2016 Intel Deutschland GmbH
- * Copyright (C) 2019, 2021-2025 Intel Corporation
+ * Copyright (C) 2019, 2021-2026 Intel Corporation
  */
 
 #include <linux/ieee80211.h>
@@ -879,28 +879,23 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_mgmt *mgmt;
 
-	mgmt = skb_put_zero(skb, 24);
+	if (action_code != WLAN_PUB_ACTION_TDLS_DISCOVER_RES)
+		return -EINVAL;
+
+	mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(tdls_discover_resp));
 	memcpy(mgmt->da, peer, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	memcpy(mgmt->bssid, link->u.mgd.bssid, ETH_ALEN);
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					  IEEE80211_STYPE_ACTION);
 
-	switch (action_code) {
-	case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
-		skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp));
-		mgmt->u.action.category = WLAN_CATEGORY_PUBLIC;
-		mgmt->u.action.u.tdls_discover_resp.action_code =
-			WLAN_PUB_ACTION_TDLS_DISCOVER_RES;
-		mgmt->u.action.u.tdls_discover_resp.dialog_token =
-			dialog_token;
-		mgmt->u.action.u.tdls_discover_resp.capability =
-			cpu_to_le16(ieee80211_get_tdls_sta_capab(link,
-								 status_code));
-		break;
-	default:
-		return -EINVAL;
-	}
+	mgmt->u.action.category = WLAN_CATEGORY_PUBLIC;
+	mgmt->u.action.action_code = WLAN_PUB_ACTION_TDLS_DISCOVER_RES;
+
+	mgmt->u.action.tdls_discover_resp.dialog_token = dialog_token;
+	mgmt->u.action.tdls_discover_resp.capability =
+		cpu_to_le16(ieee80211_get_tdls_sta_capab(link,
+							 status_code));
 
 	return 0;
 }
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index b2e6c8b98381..55054de62508 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -3766,12 +3766,11 @@ again:
 int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
 			      struct cfg80211_csa_settings *csa_settings)
 {
+	int hdr_len = IEEE80211_MIN_ACTION_SIZE(chan_switch);
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 	struct ieee80211_local *local = sdata->local;
 	int freq;
-	int hdr_len = offsetofend(struct ieee80211_mgmt,
-				  u.action.u.chan_switch);
 	u8 *pos;
 
 	if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
@@ -3800,7 +3799,7 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
 		memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN);
 	}
 	mgmt->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT;
-	mgmt->u.action.u.chan_switch.action_code = WLAN_ACTION_SPCT_CHL_SWITCH;
+	mgmt->u.action.action_code = WLAN_ACTION_SPCT_CHL_SWITCH;
 	pos = skb_put(skb, 5);
 	*pos++ = WLAN_EID_CHANNEL_SWITCH;			/* EID */
 	*pos++ = 3;						/* IE length */
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index b099d79e8fbb..80120f9f17b6 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -4,7 +4,7 @@
  *
  * Portions of this file
  * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2024 Intel Corporation
+ * Copyright (C) 2018-2026 Intel Corporation
  */
 
 #include <linux/ieee80211.h>
@@ -723,17 +723,17 @@ void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
 	if (!link_conf->mu_mimo_owner)
 		return;
 
-	if (!memcmp(mgmt->u.action.u.vht_group_notif.position,
+	if (!memcmp(mgmt->u.action.vht_group_notif.position,
 		    link_conf->mu_group.position, WLAN_USER_POSITION_LEN) &&
-	    !memcmp(mgmt->u.action.u.vht_group_notif.membership,
+	    !memcmp(mgmt->u.action.vht_group_notif.membership,
 		    link_conf->mu_group.membership, WLAN_MEMBERSHIP_LEN))
 		return;
 
 	memcpy(link_conf->mu_group.membership,
-	       mgmt->u.action.u.vht_group_notif.membership,
+	       mgmt->u.action.vht_group_notif.membership,
 	       WLAN_MEMBERSHIP_LEN);
 	memcpy(link_conf->mu_group.position,
-	       mgmt->u.action.u.vht_group_notif.position,
+	       mgmt->u.action.vht_group_notif.position,
 	       WLAN_USER_POSITION_LEN);
 
 	ieee80211_link_info_change_notify(sdata, link,
-- 
cgit v1.2.3


From 9aa84d5c6c99480c523aeb7a6ce93b6635f3e290 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 4 Mar 2026 14:41:48 +0100
Subject: wifi: ieee80211: fix UHR operation DBE vs. P-EDCA order

Draft P802.11bn_D1.3 switched the order here to align with
the order of the fields. Adjust the code accordingly.

Link: https://patch.msgid.link/20260304144148.ce45942294e1.I22ab3f16e6376a19c3953cf81dd67105ea8e529d@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211-uhr.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211-uhr.h b/include/linux/ieee80211-uhr.h
index 9729d23e4766..d199f3ebdba0 100644
--- a/include/linux/ieee80211-uhr.h
+++ b/include/linux/ieee80211-uhr.h
@@ -12,8 +12,8 @@
 
 #define IEEE80211_UHR_OPER_PARAMS_DPS_ENA		0x0001
 #define IEEE80211_UHR_OPER_PARAMS_NPCA_ENA		0x0002
-#define IEEE80211_UHR_OPER_PARAMS_DBE_ENA		0x0004
-#define IEEE80211_UHR_OPER_PARAMS_PEDCA_ENA		0x0008
+#define IEEE80211_UHR_OPER_PARAMS_PEDCA_ENA		0x0004
+#define IEEE80211_UHR_OPER_PARAMS_DBE_ENA		0x0008
 
 struct ieee80211_uhr_operation {
 	__le16 params;
-- 
cgit v1.2.3


From 98acd4c1d9f7dc9c426e840c16e81b57315ff84b Mon Sep 17 00:00:00 2001
From: Ria Thomas <ria.thomas@morsemicro.com>
Date: Thu, 5 Mar 2026 14:43:04 +0530
Subject: wifi: mac80211: add support for NDP ADDBA/DELBA for S1G

S1G defines use of NDP Block Ack (BA) for aggregation, requiring negotiation
of NDP ADDBA/DELBA action frames. If the S1G recipient supports HT-immediate
block ack, the sender must send an NDP ADDBA Request indicating it expects
only NDP BlockAck frames for the agreement.

Introduce support for NDP ADDBA and DELBA exchange in mac80211. The
implementation negotiates the BA mechanism during setup based on station
capabilities and driver support (IEEE80211_HW_SUPPORTS_NDP_BLOCKACK).
If negotiation fails due to mismatched expectations, a rejection with status code
WLAN_STATUS_REJECTED_NDP_BLOCK_ACK_SUGGESTED is returned as per IEEE 802.11-2024.

Trace sample:

IEEE 802.11 Wireless Management
    Fixed parameters
        Category code: Block Ack (3)
        Action code: NDP ADDBA Request (0x80)
        Dialog token: 0x01
        Block Ack Parameters: 0x1003, A-MSDUs, Block Ack Policy
            .... .... .... ...1 = A-MSDUs: Permitted in QoS Data MPDUs
            .... .... .... ..1. = Block Ack Policy: Immediate Block Ack
            .... .... ..00 00.. = Traffic Identifier: 0x0
            0001 0000 00.. .... = Number of Buffers (1 Buffer = 2304 Bytes): 64
        Block Ack Timeout: 0x0000
        Block Ack Starting Sequence Control (SSC): 0x0010
            .... .... .... 0000 = Fragment: 0
            0000 0000 0001 .... = Starting Sequence Number: 1

IEEE 802.11 Wireless Management
    Fixed parameters
        Category code: Block Ack (3)
        Action code: NDP ADDBA Response (0x81)
        Dialog token: 0x02
        Status code: BlockAck negotiation refused because, due to buffer constraints and other unspecified reasons, the recipient prefers to generate only NDP BlockAck frames (0x006d)
        Block Ack Parameters: 0x1002, Block Ack Policy
            .... .... .... ...0 = A-MSDUs: Not Permitted
            .... .... .... ..1. = Block Ack Policy: Immediate Block Ack
            .... .... ..00 00.. = Traffic Identifier: 0x0
            0001 0000 00.. .... = Number of Buffers (1 Buffer = 2304 Bytes): 64
        Block Ack Timeout: 0x0000

Signed-off-by: Ria Thomas <ria.thomas@morsemicro.com>
Link: https://patch.msgid.link/20260305091304.310990-1-ria.thomas@morsemicro.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211-ht.h |  3 +++
 include/linux/ieee80211.h    |  2 ++
 include/net/mac80211.h       |  4 ++++
 net/mac80211/agg-rx.c        | 24 +++++++++++++++++++++---
 net/mac80211/agg-tx.c        | 13 +++++++++----
 net/mac80211/debugfs.c       |  1 +
 net/mac80211/ht.c            |  8 +++++---
 net/mac80211/ieee80211_i.h   |  6 +++++-
 net/mac80211/iface.c         |  3 +++
 net/mac80211/rx.c            | 11 +++++++++--
 net/mac80211/s1g.c           |  8 ++++++++
 net/mac80211/sta_info.h      |  2 ++
 12 files changed, 72 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211-ht.h b/include/linux/ieee80211-ht.h
index 21bbf470540f..7612b72f9c7c 100644
--- a/include/linux/ieee80211-ht.h
+++ b/include/linux/ieee80211-ht.h
@@ -281,6 +281,9 @@ enum ieee80211_back_actioncode {
 	WLAN_ACTION_ADDBA_REQ = 0,
 	WLAN_ACTION_ADDBA_RESP = 1,
 	WLAN_ACTION_DELBA = 2,
+	WLAN_ACTION_NDP_ADDBA_REQ = 128,
+	WLAN_ACTION_NDP_ADDBA_RESP = 129,
+	WLAN_ACTION_NDP_DELBA = 130,
 };
 
 /* BACK (block-ack) parties */
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index aea360e90cb1..52db36120314 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1482,6 +1482,8 @@ enum ieee80211_statuscode {
 	WLAN_STATUS_REJECT_DSE_BAND = 96,
 	WLAN_STATUS_DENIED_WITH_SUGGESTED_BAND_AND_CHANNEL = 99,
 	WLAN_STATUS_DENIED_DUE_TO_SPECTRUM_MANAGEMENT = 103,
+	/* 802.11ah */
+	WLAN_STATUS_REJECTED_NDP_BLOCK_ACK_SUGGESTED = 109,
 	/* 802.11ai */
 	WLAN_STATUS_FILS_AUTHENTICATION_FAILURE = 112,
 	WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 113,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9f8251fb9832..9cc482191ab9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2913,6 +2913,9 @@ struct ieee80211_txq {
  *	HW flag so drivers can opt in according to their own control, e.g. in
  *	testing.
  *
+ * @IEEE80211_HW_SUPPORTS_NDP_BLOCKACK: HW can transmit/receive S1G NDP
+ *	BlockAck frames.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2973,6 +2976,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_DISALLOW_PUNCTURING,
 	IEEE80211_HW_HANDLES_QUIET_CSA,
 	IEEE80211_HW_STRICT,
+	IEEE80211_HW_SUPPORTS_NDP_BLOCKACK,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 0a2be8cb600f..0140ac826b23 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -94,7 +94,8 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 	/* check if this is a self generated aggregation halt */
 	if (initiator == WLAN_BACK_RECIPIENT && tx)
 		ieee80211_send_delba(sta->sdata, sta->sta.addr,
-				     tid, WLAN_BACK_RECIPIENT, reason);
+				     tid, WLAN_BACK_RECIPIENT, reason,
+				     ieee80211_s1g_use_ndp_ba(sta->sdata, sta));
 
 	/*
 	 * return here in case tid_rx is not assigned - which will happen if
@@ -240,6 +241,7 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 	bool amsdu = ieee80211_hw_check(&local->hw, SUPPORTS_AMSDU_IN_AMPDU);
+	bool use_ndp = ieee80211_s1g_use_ndp_ba(sdata, sta);
 	u16 capab;
 
 	skb = dev_alloc_skb(sizeof(*mgmt) +
@@ -253,7 +255,8 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
 
 	skb_put(skb, 2 + sizeof(mgmt->u.action.addba_resp));
 	mgmt->u.action.category = WLAN_CATEGORY_BACK;
-	mgmt->u.action.action_code = WLAN_ACTION_ADDBA_RESP;
+	mgmt->u.action.action_code = use_ndp ?
+		WLAN_ACTION_NDP_ADDBA_RESP : WLAN_ACTION_ADDBA_RESP;
 
 	mgmt->u.action.addba_resp.dialog_token = dialog_token;
 
@@ -276,6 +279,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 				     u8 dialog_token, u16 timeout,
 				     u16 start_seq_num, u16 ba_policy, u16 tid,
 				     u16 buf_size, bool tx, bool auto_seq,
+				     bool req_ndp,
 				     const u8 addba_ext_data)
 {
 	struct ieee80211_local *local = sta->sdata->local;
@@ -301,6 +305,18 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 		goto end;
 	}
 
+	if (tx && ieee80211_s1g_use_ndp_ba(sta->sdata, sta) && !req_ndp) {
+		/*
+		 * According to IEEE 802.11-2024: Inform S1G originator
+		 * ADDBA rejected as NDP BlockAck is preferred
+		 */
+		status = WLAN_STATUS_REJECTED_NDP_BLOCK_ACK_SUGGESTED;
+		ht_dbg(sta->sdata,
+		       "Rejecting AddBA Req from %pM tid %u - require NDP BlockAck\n",
+		       sta->sta.addr, tid);
+		goto end;
+	}
+
 	if (!sta->sta.valid_links &&
 	    !sta->sta.deflink.ht_cap.ht_supported &&
 	    !sta->sta.deflink.he_cap.has_he &&
@@ -474,6 +490,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 				     struct ieee80211_mgmt *mgmt,
 				     size_t len)
 {
+	bool req_ndp = mgmt->u.action.action_code == WLAN_ACTION_NDP_ADDBA_REQ;
 	u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num;
 	u8 dialog_token, addba_ext_data;
 
@@ -498,7 +515,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 
 	__ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
 					start_seq_num, ba_policy, tid,
-					buf_size, true, false, addba_ext_data);
+					buf_size, true, false,
+					req_ndp, addba_ext_data);
 }
 
 void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index d5a62b8d5a80..01d927b88264 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -60,7 +60,7 @@
 
 static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid,
 					 u8 dialog_token, u16 start_seq_num,
-					 u16 agg_size, u16 timeout)
+					 u16 agg_size, u16 timeout, bool ndp)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
@@ -80,7 +80,8 @@ static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid,
 	skb_put(skb, 2 + sizeof(mgmt->u.action.addba_req));
 
 	mgmt->u.action.category = WLAN_CATEGORY_BACK;
-	mgmt->u.action.action_code = WLAN_ACTION_ADDBA_REQ;
+	mgmt->u.action.action_code = ndp ?
+		WLAN_ACTION_NDP_ADDBA_REQ : WLAN_ACTION_ADDBA_REQ;
 
 	mgmt->u.action.addba_req.dialog_token = dialog_token;
 	capab = IEEE80211_ADDBA_PARAM_AMSDU_MASK;
@@ -484,7 +485,8 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta,
 
 	/* send AddBA request */
 	ieee80211_send_addba_request(sta, tid, tid_tx->dialog_token,
-				     tid_tx->ssn, buf_size, tid_tx->timeout);
+				     tid_tx->ssn, buf_size, tid_tx->timeout,
+				     tid_tx->ndp);
 
 	WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state));
 }
@@ -521,6 +523,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	 */
 	synchronize_net();
 
+	tid_tx->ndp = ieee80211_s1g_use_ndp_ba(sdata, sta);
 	params.ssn = sta->tid_seq[tid] >> 4;
 	ret = drv_ampdu_action(local, sdata, &params);
 	tid_tx->ssn = params.ssn;
@@ -940,7 +943,9 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid,
 
 	if (send_delba)
 		ieee80211_send_delba(sdata, sta->sta.addr, tid,
-			WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
+				     WLAN_BACK_INITIATOR,
+				     WLAN_REASON_QSTA_NOT_USE,
+				     tid_tx->ndp);
 }
 
 void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index d02f07368c51..e8d0a8b71d59 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -490,6 +490,7 @@ static const char *hw_flag_names[] = {
 	FLAG(DISALLOW_PUNCTURING),
 	FLAG(HANDLES_QUIET_CSA),
 	FLAG(STRICT),
+	FLAG(SUPPORTS_NDP_BLOCKACK),
 #undef FLAG
 };
 
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 9e2469a8ce64..33f1e1b235e9 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -379,7 +379,7 @@ void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work)
 				       sta->ampdu_mlme.tid_rx_manage_offl))
 			__ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
 							IEEE80211_MAX_AMPDU_BUF_HT,
-							false, true, 0);
+							false, true, false, 0);
 
 		if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
 				       sta->ampdu_mlme.tid_rx_manage_offl))
@@ -455,7 +455,8 @@ void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work)
 
 void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 			  const u8 *da, u16 tid,
-			  u16 initiator, u16 reason_code)
+			  u16 initiator, u16 reason_code,
+			  bool use_ndp)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
@@ -473,7 +474,8 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 	skb_put(skb, 2 + sizeof(mgmt->u.action.delba));
 
 	mgmt->u.action.category = WLAN_CATEGORY_BACK;
-	mgmt->u.action.action_code = WLAN_ACTION_DELBA;
+	mgmt->u.action.action_code = use_ndp ?
+		WLAN_ACTION_NDP_DELBA : WLAN_ACTION_DELBA;
 	params = (u16)(initiator << 11); 	/* bit 11 initiator */
 	params |= (u16)(tid << 12); 		/* bit 15:12 TID number */
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a4babf7624e5..d71e0c6d2165 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2190,7 +2190,8 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
 				       struct link_sta_info *link_sta);
 void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 			  const u8 *da, u16 tid,
-			  u16 initiator, u16 reason_code);
+			  u16 initiator, u16 reason_code,
+			  bool use_ndp);
 int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
 			       enum ieee80211_smps_mode smps, const u8 *da,
 			       const u8 *bssid, int link_id);
@@ -2206,6 +2207,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 				     u8 dialog_token, u16 timeout,
 				     u16 start_seq_num, u16 ba_policy, u16 tid,
 				     u16 buf_size, bool tx, bool auto_seq,
+				     bool req_ndp,
 				     const u8 addba_ext_data);
 void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
 					 enum ieee80211_agg_stop_reason reason);
@@ -2331,6 +2333,8 @@ void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata,
 void ieee80211_s1g_cap_to_sta_s1g_cap(struct ieee80211_sub_if_data *sdata,
 				      const struct ieee80211_s1g_cap *s1g_cap_ie,
 				      struct link_sta_info *link_sta);
+bool ieee80211_s1g_use_ndp_ba(const struct ieee80211_sub_if_data *sdata,
+			      const struct sta_info *sta);
 
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 2e391cec73a0..40ce0bb72726 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1581,14 +1581,17 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
 		if (sta) {
 			switch (mgmt->u.action.action_code) {
 			case WLAN_ACTION_ADDBA_REQ:
+			case WLAN_ACTION_NDP_ADDBA_REQ:
 				ieee80211_process_addba_request(local, sta,
 								mgmt, len);
 				break;
 			case WLAN_ACTION_ADDBA_RESP:
+			case WLAN_ACTION_NDP_ADDBA_RESP:
 				ieee80211_process_addba_resp(local, sta,
 							     mgmt, len);
 				break;
 			case WLAN_ACTION_DELBA:
+			case WLAN_ACTION_NDP_DELBA:
 				ieee80211_process_delba(sdata, sta,
 							mgmt, len);
 				break;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3bd046bebf9e..19c33f7a8193 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1475,7 +1475,9 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 		    !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg))
 			ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid,
 					     WLAN_BACK_RECIPIENT,
-					     WLAN_REASON_QSTA_REQUIRE_SETUP);
+					     WLAN_REASON_QSTA_REQUIRE_SETUP,
+					     ieee80211_s1g_use_ndp_ba(rx->sdata,
+								      rx->sta));
 		goto dont_reorder;
 	}
 
@@ -3372,7 +3374,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 		    !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg))
 			ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid,
 					     WLAN_BACK_RECIPIENT,
-					     WLAN_REASON_QSTA_REQUIRE_SETUP);
+					     WLAN_REASON_QSTA_REQUIRE_SETUP,
+					     ieee80211_s1g_use_ndp_ba(rx->sdata,
+								      rx->sta));
 
 		tid_agg_rx = rcu_dereference(rx->sta->ampdu_mlme.tid_rx[tid]);
 		if (!tid_agg_rx)
@@ -3753,14 +3757,17 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 
 		switch (mgmt->u.action.action_code) {
 		case WLAN_ACTION_ADDBA_REQ:
+		case WLAN_ACTION_NDP_ADDBA_REQ:
 			if (len < IEEE80211_MIN_ACTION_SIZE(addba_req))
 				goto invalid;
 			break;
 		case WLAN_ACTION_ADDBA_RESP:
+		case WLAN_ACTION_NDP_ADDBA_RESP:
 			if (len < IEEE80211_MIN_ACTION_SIZE(addba_resp))
 				goto invalid;
 			break;
 		case WLAN_ACTION_DELBA:
+		case WLAN_ACTION_NDP_DELBA:
 			if (len < IEEE80211_MIN_ACTION_SIZE(delba))
 				goto invalid;
 			break;
diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c
index 297abaa6fecf..5af4a0c6c642 100644
--- a/net/mac80211/s1g.c
+++ b/net/mac80211/s1g.c
@@ -220,3 +220,11 @@ void ieee80211_s1g_cap_to_sta_s1g_cap(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_sta_recalc_aggregates(&link_sta->sta->sta);
 }
+
+bool ieee80211_s1g_use_ndp_ba(const struct ieee80211_sub_if_data *sdata,
+			      const struct sta_info *sta)
+{
+	return sdata->vif.cfg.s1g &&
+		ieee80211_hw_check(&sdata->local->hw, SUPPORTS_NDP_BLOCKACK) &&
+		(sta && sta->sta.deflink.s1g_cap.s1g);
+}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index f1b1bbf2a2d4..58ccbea7f6f6 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -171,6 +171,7 @@ struct sta_info;
  * @bar_pending: BAR needs to be re-sent
  * @amsdu: support A-MSDU within A-MDPU
  * @ssn: starting sequence number of the session
+ * @ndp: this session is using NDP Block ACKs
  *
  * This structure's lifetime is managed by RCU, assignments to
  * the array holding it must hold the aggregation mutex.
@@ -199,6 +200,7 @@ struct tid_ampdu_tx {
 	u16 failed_bar_ssn;
 	bool bar_pending;
 	bool amsdu;
+	bool ndp;
 	u8 tid;
 };
 
-- 
cgit v1.2.3


From 96fefcabf340fcf8b3208dcd8685961955a66040 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 4 Mar 2026 10:32:31 -0500
Subject: vfs: widen inode hash/lookup functions to u64

Change the inode hash/lookup VFS API functions to accept u64 parameters
instead of unsigned long for inode numbers and hash values. This is
preparation for widening i_ino itself to u64, which will allow
filesystems to store full 64-bit inode numbers on 32-bit architectures.

Since unsigned long implicitly widens to u64 on all architectures, this
change is backward-compatible with all existing callers.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://patch.msgid.link/20260304-iino-u64-v3-1-2257ad83d372@kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/f2fs/node.c     |  2 +-
 fs/inode.c         | 36 ++++++++++++++++++------------------
 include/linux/fs.h | 46 ++++++++++++++++++++++------------------------
 3 files changed, 41 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 74992fd9c9b6..5ca6f518cfa1 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1997,7 +1997,7 @@ out:
 	return ret;
 }
 
-static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data)
+static int f2fs_match_ino(struct inode *inode, u64 ino, void *data)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	bool clean;
diff --git a/fs/inode.c b/fs/inode.c
index cc12b68e021b..62df5dda0589 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -672,7 +672,7 @@ static inline void inode_sb_list_del(struct inode *inode)
 	}
 }
 
-static unsigned long hash(struct super_block *sb, unsigned long hashval)
+static unsigned long hash(struct super_block *sb, u64 hashval)
 {
 	unsigned long tmp;
 
@@ -685,12 +685,12 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval)
 /**
  *	__insert_inode_hash - hash an inode
  *	@inode: unhashed inode
- *	@hashval: unsigned long value used to locate this object in the
+ *	@hashval: u64 value used to locate this object in the
  *		inode_hashtable.
  *
  *	Add an inode to the inode hash for this superblock.
  */
-void __insert_inode_hash(struct inode *inode, unsigned long hashval)
+void __insert_inode_hash(struct inode *inode, u64 hashval)
 {
 	struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
 
@@ -1087,7 +1087,7 @@ repeat:
  * iget_locked for details.
  */
 static struct inode *find_inode_fast(struct super_block *sb,
-				struct hlist_head *head, unsigned long ino,
+				struct hlist_head *head, u64 ino,
 				bool hash_locked, bool *isnew)
 {
 	struct inode *inode = NULL;
@@ -1301,7 +1301,7 @@ EXPORT_SYMBOL(unlock_two_nondirectories);
  * Note that both @test and @set are called with the inode_hash_lock held, so
  * they can't sleep.
  */
-struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
+struct inode *inode_insert5(struct inode *inode, u64 hashval,
 			    int (*test)(struct inode *, void *),
 			    int (*set)(struct inode *, void *), void *data)
 {
@@ -1378,7 +1378,7 @@ EXPORT_SYMBOL(inode_insert5);
  * Note that both @test and @set are called with the inode_hash_lock held, so
  * they can't sleep.
  */
-struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
+struct inode *iget5_locked(struct super_block *sb, u64 hashval,
 		int (*test)(struct inode *, void *),
 		int (*set)(struct inode *, void *), void *data)
 {
@@ -1408,7 +1408,7 @@ EXPORT_SYMBOL(iget5_locked);
  * This is equivalent to iget5_locked, except the @test callback must
  * tolerate the inode not being stable, including being mid-teardown.
  */
-struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval,
+struct inode *iget5_locked_rcu(struct super_block *sb, u64 hashval,
 		int (*test)(struct inode *, void *),
 		int (*set)(struct inode *, void *), void *data)
 {
@@ -1455,7 +1455,7 @@ EXPORT_SYMBOL_GPL(iget5_locked_rcu);
  * hashed, and with the I_NEW flag set.  The file system gets to fill it in
  * before unlocking it via unlock_new_inode().
  */
-struct inode *iget_locked(struct super_block *sb, unsigned long ino)
+struct inode *iget_locked(struct super_block *sb, u64 ino)
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
@@ -1527,7 +1527,7 @@ EXPORT_SYMBOL(iget_locked);
  *
  * Returns 1 if the inode number is unique, 0 if it is not.
  */
-static int test_inode_iunique(struct super_block *sb, unsigned long ino)
+static int test_inode_iunique(struct super_block *sb, u64 ino)
 {
 	struct hlist_head *b = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
@@ -1616,7 +1616,7 @@ EXPORT_SYMBOL(igrab);
  *
  * Note2: @test is called with the inode_hash_lock held, so can't sleep.
  */
-struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
+struct inode *ilookup5_nowait(struct super_block *sb, u64 hashval,
 		int (*test)(struct inode *, void *), void *data, bool *isnew)
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
@@ -1647,7 +1647,7 @@ EXPORT_SYMBOL(ilookup5_nowait);
  *
  * Note: @test is called with the inode_hash_lock held, so can't sleep.
  */
-struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
+struct inode *ilookup5(struct super_block *sb, u64 hashval,
 		int (*test)(struct inode *, void *), void *data)
 {
 	struct inode *inode;
@@ -1677,7 +1677,7 @@ EXPORT_SYMBOL(ilookup5);
  * Search for the inode @ino in the inode cache, and if the inode is in the
  * cache, the inode is returned with an incremented reference count.
  */
-struct inode *ilookup(struct super_block *sb, unsigned long ino)
+struct inode *ilookup(struct super_block *sb, u64 ino)
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
@@ -1726,8 +1726,8 @@ EXPORT_SYMBOL(ilookup);
  * very carefully implemented.
  */
 struct inode *find_inode_nowait(struct super_block *sb,
-				unsigned long hashval,
-				int (*match)(struct inode *, unsigned long,
+				u64 hashval,
+				int (*match)(struct inode *, u64,
 					     void *),
 				void *data)
 {
@@ -1773,7 +1773,7 @@ EXPORT_SYMBOL(find_inode_nowait);
  *
  * The caller must hold the RCU read lock.
  */
-struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
+struct inode *find_inode_rcu(struct super_block *sb, u64 hashval,
 			     int (*test)(struct inode *, void *), void *data)
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
@@ -1812,7 +1812,7 @@ EXPORT_SYMBOL(find_inode_rcu);
  * The caller must hold the RCU read lock.
  */
 struct inode *find_inode_by_ino_rcu(struct super_block *sb,
-				    unsigned long ino)
+				    u64 ino)
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
@@ -1833,7 +1833,7 @@ EXPORT_SYMBOL(find_inode_by_ino_rcu);
 int insert_inode_locked(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
-	ino_t ino = inode->i_ino;
+	u64 ino = inode->i_ino;
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 	bool isnew;
 
@@ -1884,7 +1884,7 @@ repeat:
 }
 EXPORT_SYMBOL(insert_inode_locked);
 
-int insert_inode_locked4(struct inode *inode, unsigned long hashval,
+int insert_inode_locked4(struct inode *inode, u64 hashval,
 		int (*test)(struct inode *, void *), void *data)
 {
 	struct inode *old;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b3dd145b25e..b4f5e5fdbe4b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2934,33 +2934,31 @@ static inline int inode_generic_drop(struct inode *inode)
 }
 extern void d_mark_dontcache(struct inode *inode);
 
-extern struct inode *ilookup5_nowait(struct super_block *sb,
-		unsigned long hashval, int (*test)(struct inode *, void *),
-		void *data, bool *isnew);
-extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
-		int (*test)(struct inode *, void *), void *data);
-extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
-
-extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
-		int (*test)(struct inode *, void *),
-		int (*set)(struct inode *, void *),
-		void *data);
-struct inode *iget5_locked(struct super_block *, unsigned long,
+struct inode *ilookup5_nowait(struct super_block *sb, u64 hashval,
+			      int (*test)(struct inode *, void *), void *data,
+			      bool *isnew);
+struct inode *ilookup5(struct super_block *sb, u64 hashval,
+		       int (*test)(struct inode *, void *), void *data);
+struct inode *ilookup(struct super_block *sb, u64 ino);
+
+struct inode *inode_insert5(struct inode *inode, u64 hashval,
+			    int (*test)(struct inode *, void *),
+			    int (*set)(struct inode *, void *), void *data);
+struct inode *iget5_locked(struct super_block *, u64,
 			   int (*test)(struct inode *, void *),
 			   int (*set)(struct inode *, void *), void *);
-struct inode *iget5_locked_rcu(struct super_block *, unsigned long,
+struct inode *iget5_locked_rcu(struct super_block *, u64,
 			       int (*test)(struct inode *, void *),
 			       int (*set)(struct inode *, void *), void *);
-extern struct inode * iget_locked(struct super_block *, unsigned long);
-extern struct inode *find_inode_nowait(struct super_block *,
-				       unsigned long,
-				       int (*match)(struct inode *,
-						    unsigned long, void *),
-				       void *data);
-extern struct inode *find_inode_rcu(struct super_block *, unsigned long,
-				    int (*)(struct inode *, void *), void *);
-extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long);
-extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
+struct inode *iget_locked(struct super_block *, u64);
+struct inode *find_inode_nowait(struct super_block *, u64,
+				int (*match)(struct inode *, u64, void *),
+				void *data);
+struct inode *find_inode_rcu(struct super_block *, u64,
+			     int (*)(struct inode *, void *), void *);
+struct inode *find_inode_by_ino_rcu(struct super_block *, u64);
+int insert_inode_locked4(struct inode *, u64,
+			 int (*test)(struct inode *, void *), void *);
 extern int insert_inode_locked(struct inode *);
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
@@ -3015,7 +3013,7 @@ int setattr_should_drop_sgid(struct mnt_idmap *idmap,
  */
 #define alloc_inode_sb(_sb, _cache, _gfp) kmem_cache_alloc_lru(_cache, &_sb->s_inode_lru, _gfp)
 
-extern void __insert_inode_hash(struct inode *, unsigned long hashval);
+void __insert_inode_hash(struct inode *, u64 hashval);
 static inline void insert_inode_hash(struct inode *inode)
 {
 	__insert_inode_hash(inode, inode->i_ino);
-- 
cgit v1.2.3


From 125dfa218134df7cc112667e92984de9d8cd0bf6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 4 Mar 2026 10:32:32 -0500
Subject: audit: widen ino fields to u64

inode->i_ino is being widened from unsigned long to u64. The audit
subsystem uses unsigned long ino in struct fields, function parameters,
and local variables that store inode numbers from arbitrary filesystems.
On 32-bit platforms this truncates inode numbers that exceed 32 bits,
which will cause incorrect audit log entries and broken watch/mark
comparisons.

Widen all audit ino fields, parameters, and locals to u64, and update
the inode format string from %lu to %llu to match.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://patch.msgid.link/20260304-iino-u64-v3-2-2257ad83d372@kernel.org
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/audit.h   |  2 +-
 kernel/audit.h          | 13 ++++++-------
 kernel/audit_fsnotify.c |  4 ++--
 kernel/audit_watch.c    | 12 ++++++------
 kernel/auditsc.c        |  4 ++--
 5 files changed, 17 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index b642b5faca65..b915aaa7ed73 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -15,7 +15,7 @@
 #include <uapi/linux/audit.h>
 #include <uapi/linux/fanotify.h>
 
-#define AUDIT_INO_UNSET ((unsigned long)-1)
+#define AUDIT_INO_UNSET ((u64)-1)
 #define AUDIT_DEV_UNSET ((dev_t)-1)
 
 struct audit_sig_info {
diff --git a/kernel/audit.h b/kernel/audit.h
index 7c401729e21b..ac81fa02bcd7 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -76,7 +76,7 @@ struct audit_names {
 	int			name_len;	/* number of chars to log */
 	bool			hidden;		/* don't log this record */
 
-	unsigned long		ino;
+	u64			ino;
 	dev_t			dev;
 	umode_t			mode;
 	kuid_t			uid;
@@ -225,9 +225,9 @@ extern int auditd_test_task(struct task_struct *task);
 #define AUDIT_INODE_BUCKETS	32
 extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
 
-static inline int audit_hash_ino(u32 ino)
+static inline int audit_hash_ino(u64 ino)
 {
-	return (ino & (AUDIT_INODE_BUCKETS-1));
+	return ((u32)ino & (AUDIT_INODE_BUCKETS-1));
 }
 
 /* Indicates that audit should log the full pathname. */
@@ -277,16 +277,15 @@ extern int audit_to_watch(struct audit_krule *krule, char *path, int len,
 extern int audit_add_watch(struct audit_krule *krule, struct list_head **list);
 extern void audit_remove_watch_rule(struct audit_krule *krule);
 extern char *audit_watch_path(struct audit_watch *watch);
-extern int audit_watch_compare(struct audit_watch *watch, unsigned long ino,
-			       dev_t dev);
+extern int audit_watch_compare(struct audit_watch *watch, u64 ino, dev_t dev);
 
 extern struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule,
 						    char *pathname, int len);
 extern char *audit_mark_path(struct audit_fsnotify_mark *mark);
 extern void audit_remove_mark(struct audit_fsnotify_mark *audit_mark);
 extern void audit_remove_mark_rule(struct audit_krule *krule);
-extern int audit_mark_compare(struct audit_fsnotify_mark *mark,
-			      unsigned long ino, dev_t dev);
+extern int audit_mark_compare(struct audit_fsnotify_mark *mark, u64 ino,
+			      dev_t dev);
 extern int audit_dupe_exe(struct audit_krule *new, struct audit_krule *old);
 extern int audit_exe_compare(struct task_struct *tsk,
 			     struct audit_fsnotify_mark *mark);
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index a4401f651060..711454f9f724 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -25,7 +25,7 @@
  */
 struct audit_fsnotify_mark {
 	dev_t dev;		/* associated superblock device */
-	unsigned long ino;	/* associated inode number */
+	u64 ino;		/* associated inode number */
 	char *path;		/* insertion path */
 	struct fsnotify_mark mark; /* fsnotify mark on the inode */
 	struct audit_krule *rule;
@@ -57,7 +57,7 @@ char *audit_mark_path(struct audit_fsnotify_mark *mark)
 	return mark->path;
 }
 
-int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long ino, dev_t dev)
+int audit_mark_compare(struct audit_fsnotify_mark *mark, u64 ino, dev_t dev)
 {
 	if (mark->ino == AUDIT_INO_UNSET)
 		return 0;
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 096faac2435c..33577f0f54ef 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -37,7 +37,7 @@ struct audit_watch {
 	refcount_t		count;	/* reference count */
 	dev_t			dev;	/* associated superblock device */
 	char			*path;	/* insertion path */
-	unsigned long		ino;	/* associated inode number */
+	u64			ino;	/* associated inode number */
 	struct audit_parent	*parent; /* associated parent */
 	struct list_head	wlist;	/* entry in parent->watches list */
 	struct list_head	rules;	/* anchor for krule->rlist */
@@ -125,7 +125,7 @@ char *audit_watch_path(struct audit_watch *watch)
 	return watch->path;
 }
 
-int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev)
+int audit_watch_compare(struct audit_watch *watch, u64 ino, dev_t dev)
 {
 	return (watch->ino != AUDIT_INO_UNSET) &&
 		(watch->ino == ino) &&
@@ -244,7 +244,7 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc
 /* Update inode info in audit rules based on filesystem event. */
 static void audit_update_watch(struct audit_parent *parent,
 			       const struct qstr *dname, dev_t dev,
-			       unsigned long ino, unsigned invalidating)
+			       u64 ino, unsigned invalidating)
 {
 	struct audit_watch *owatch, *nwatch, *nextw;
 	struct audit_krule *r, *nextr;
@@ -285,7 +285,7 @@ static void audit_update_watch(struct audit_parent *parent,
 				list_del(&oentry->rule.list);
 				audit_panic("error updating watch, removing");
 			} else {
-				int h = audit_hash_ino((u32)ino);
+				int h = audit_hash_ino(ino);
 
 				/*
 				 * nentry->rule.watch == oentry->rule.watch so
@@ -439,7 +439,7 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
 
 	audit_add_to_parent(krule, parent);
 
-	h = audit_hash_ino((u32)watch->ino);
+	h = audit_hash_ino(watch->ino);
 	*list = &audit_inode_hash[h];
 error:
 	path_put(&parent_path);
@@ -527,7 +527,7 @@ int audit_dupe_exe(struct audit_krule *new, struct audit_krule *old)
 int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
 {
 	struct file *exe_file;
-	unsigned long ino;
+	u64 ino;
 	dev_t dev;
 
 	/* only do exe filtering if we are recording @current events/records */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f6af6a8f68c4..ab54fccba215 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -886,7 +886,7 @@ static int audit_filter_inode_name(struct task_struct *tsk,
 				   struct audit_names *n,
 				   struct audit_context *ctx)
 {
-	int h = audit_hash_ino((u32)n->ino);
+	int h = audit_hash_ino(n->ino);
 	struct list_head *list = &audit_inode_hash[h];
 
 	return __audit_filter_op(tsk, ctx, list, n, ctx->major);
@@ -1534,7 +1534,7 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n,
 		audit_log_format(ab, " name=(null)");
 
 	if (n->ino != AUDIT_INO_UNSET)
-		audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#ho ouid=%u ogid=%u rdev=%02x:%02x",
+		audit_log_format(ab, " inode=%llu dev=%02x:%02x mode=%#ho ouid=%u ogid=%u rdev=%02x:%02x",
 				 n->ino,
 				 MAJOR(n->dev),
 				 MINOR(n->dev),
-- 
cgit v1.2.3


From 0b2600f81cefcdfcda58d50df7be8fd48ada8ce2 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 4 Mar 2026 10:32:42 -0500
Subject: treewide: change inode->i_ino from unsigned long to u64

On 32-bit architectures, unsigned long is only 32 bits wide, which
causes 64-bit inode numbers to be silently truncated. Several
filesystems (NFS, XFS, BTRFS, etc.) can generate inode numbers that
exceed 32 bits, and this truncation can lead to inode number collisions
and other subtle bugs on 32-bit systems.

Change the type of inode->i_ino from unsigned long to u64 to ensure that
inode numbers are always represented as 64-bit values regardless of
architecture. Update all format specifiers treewide from %lu/%lx to
%llu/%llx to match the new type, along with corresponding local variable
types.

This is the bulk treewide conversion. Earlier patches in this series
handled trace events separately to allow trace field reordering for
better struct packing on 32-bit.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://patch.msgid.link/20260304-iino-u64-v3-12-2257ad83d372@kernel.org
Acked-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 drivers/dma-buf/dma-buf.c                  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  4 +--
 fs/9p/vfs_addr.c                           |  4 +--
 fs/9p/vfs_inode.c                          |  6 ++--
 fs/9p/vfs_inode_dotl.c                     |  6 ++--
 fs/affs/amigaffs.c                         | 10 +++----
 fs/affs/bitmap.c                           |  2 +-
 fs/affs/dir.c                              |  2 +-
 fs/affs/file.c                             | 20 ++++++-------
 fs/affs/inode.c                            | 12 ++++----
 fs/affs/namei.c                            | 14 ++++-----
 fs/affs/symlink.c                          |  2 +-
 fs/afs/dir.c                               | 10 +++----
 fs/afs/dir_search.c                        |  2 +-
 fs/afs/dynroot.c                           |  2 +-
 fs/afs/inode.c                             |  2 +-
 fs/autofs/inode.c                          |  2 +-
 fs/befs/linuxvfs.c                         | 28 ++++++++---------
 fs/bfs/dir.c                               |  4 +--
 fs/cachefiles/io.c                         |  6 ++--
 fs/cachefiles/namei.c                      | 12 ++++----
 fs/cachefiles/xattr.c                      |  2 +-
 fs/ceph/crypto.c                           |  4 +--
 fs/coda/dir.c                              |  2 +-
 fs/coda/inode.c                            |  2 +-
 fs/cramfs/inode.c                          |  2 +-
 fs/crypto/crypto.c                         |  2 +-
 fs/crypto/hooks.c                          |  2 +-
 fs/crypto/keyring.c                        |  4 +--
 fs/crypto/keysetup.c                       |  2 +-
 fs/dcache.c                                |  4 +--
 fs/ecryptfs/crypto.c                       |  6 ++--
 fs/ecryptfs/file.c                         |  2 +-
 fs/efs/inode.c                             |  6 ++--
 fs/eventpoll.c                             |  2 +-
 fs/exportfs/expfs.c                        |  4 +--
 fs/ext2/dir.c                              | 10 +++----
 fs/ext2/ialloc.c                           |  9 +++---
 fs/ext2/inode.c                            |  2 +-
 fs/ext2/xattr.c                            | 14 ++++-----
 fs/ext4/dir.c                              |  2 +-
 fs/ext4/ext4.h                             |  4 +--
 fs/ext4/extents.c                          |  8 ++---
 fs/ext4/extents_status.c                   | 28 ++++++++---------
 fs/ext4/fast_commit.c                      |  8 ++---
 fs/ext4/ialloc.c                           | 10 +++----
 fs/ext4/indirect.c                         |  2 +-
 fs/ext4/inline.c                           | 14 ++++-----
 fs/ext4/inode.c                            | 22 +++++++-------
 fs/ext4/ioctl.c                            |  4 +--
 fs/ext4/mballoc.c                          |  6 ++--
 fs/ext4/migrate.c                          |  2 +-
 fs/ext4/move_extent.c                      | 20 ++++++-------
 fs/ext4/namei.c                            | 10 +++----
 fs/ext4/orphan.c                           | 16 +++++-----
 fs/ext4/page-io.c                          | 10 +++----
 fs/ext4/super.c                            | 22 +++++++-------
 fs/ext4/xattr.c                            | 10 +++----
 fs/f2fs/compress.c                         |  4 +--
 fs/f2fs/dir.c                              |  2 +-
 fs/f2fs/extent_cache.c                     |  8 ++---
 fs/f2fs/f2fs.h                             |  6 ++--
 fs/f2fs/file.c                             | 12 ++++----
 fs/f2fs/gc.c                               |  2 +-
 fs/f2fs/inline.c                           |  4 +--
 fs/f2fs/inode.c                            | 48 +++++++++++++++---------------
 fs/f2fs/namei.c                            |  8 ++---
 fs/f2fs/node.c                             | 10 +++----
 fs/f2fs/recovery.c                         | 10 +++----
 fs/f2fs/xattr.c                            | 10 +++----
 fs/freevxfs/vxfs_bmap.c                    |  4 +--
 fs/fserror.c                               |  2 +-
 fs/hfs/catalog.c                           |  2 +-
 fs/hfs/extent.c                            |  4 +--
 fs/hfs/inode.c                             |  4 +--
 fs/hfsplus/attributes.c                    | 10 +++----
 fs/hfsplus/catalog.c                       |  2 +-
 fs/hfsplus/dir.c                           |  6 ++--
 fs/hfsplus/extents.c                       |  6 ++--
 fs/hfsplus/inode.c                         |  8 ++---
 fs/hfsplus/super.c                         |  6 ++--
 fs/hfsplus/xattr.c                         | 10 +++----
 fs/hpfs/dir.c                              |  4 +--
 fs/hpfs/dnode.c                            |  4 +--
 fs/hpfs/ea.c                               |  4 +--
 fs/hpfs/inode.c                            |  4 +--
 fs/inode.c                                 | 13 ++++----
 fs/iomap/ioend.c                           |  2 +-
 fs/isofs/compress.c                        |  2 +-
 fs/isofs/dir.c                             |  2 +-
 fs/isofs/inode.c                           |  6 ++--
 fs/isofs/namei.c                           |  2 +-
 fs/jbd2/journal.c                          |  4 +--
 fs/jbd2/transaction.c                      |  2 +-
 fs/jffs2/dir.c                             |  4 +--
 fs/jffs2/file.c                            |  4 +--
 fs/jffs2/fs.c                              | 18 +++++------
 fs/jfs/inode.c                             |  2 +-
 fs/jfs/jfs_imap.c                          |  2 +-
 fs/jfs/jfs_metapage.c                      |  2 +-
 fs/lockd/svclock.c                         |  8 ++---
 fs/lockd/svcsubs.c                         |  2 +-
 fs/locks.c                                 |  6 ++--
 fs/minix/inode.c                           | 10 +++----
 fs/nfs/dir.c                               | 20 ++++++-------
 fs/nfs/file.c                              |  8 ++---
 fs/nfs/filelayout/filelayout.c             |  8 ++---
 fs/nfs/flexfilelayout/flexfilelayout.c     |  8 ++---
 fs/nfs/inode.c                             |  6 ++--
 fs/nfs/nfs4proc.c                          |  4 +--
 fs/nfs/pnfs.c                              | 12 ++++----
 fs/nfsd/export.c                           |  2 +-
 fs/nfsd/nfs4state.c                        |  4 +--
 fs/nfsd/nfsfh.c                            |  4 +--
 fs/nfsd/vfs.c                              |  2 +-
 fs/nilfs2/alloc.c                          | 10 +++----
 fs/nilfs2/bmap.c                           |  2 +-
 fs/nilfs2/btnode.c                         |  2 +-
 fs/nilfs2/btree.c                          | 12 ++++----
 fs/nilfs2/dir.c                            | 12 ++++----
 fs/nilfs2/direct.c                         |  4 +--
 fs/nilfs2/gcinode.c                        |  2 +-
 fs/nilfs2/inode.c                          |  8 ++---
 fs/nilfs2/mdt.c                            |  2 +-
 fs/nilfs2/namei.c                          |  2 +-
 fs/nilfs2/segment.c                        |  2 +-
 fs/notify/fdinfo.c                         |  4 +--
 fs/nsfs.c                                  |  4 +--
 fs/ntfs3/super.c                           |  2 +-
 fs/ocfs2/alloc.c                           |  2 +-
 fs/ocfs2/aops.c                            |  4 +--
 fs/ocfs2/dir.c                             |  8 ++---
 fs/ocfs2/dlmfs/dlmfs.c                     | 10 +++----
 fs/ocfs2/extent_map.c                      | 12 ++++----
 fs/ocfs2/inode.c                           |  2 +-
 fs/ocfs2/quota_local.c                     |  2 +-
 fs/ocfs2/refcounttree.c                    | 10 +++----
 fs/ocfs2/xattr.c                           |  4 +--
 fs/orangefs/inode.c                        |  2 +-
 fs/overlayfs/export.c                      |  2 +-
 fs/overlayfs/namei.c                       |  4 +--
 fs/overlayfs/util.c                        |  2 +-
 fs/pipe.c                                  |  2 +-
 fs/proc/fd.c                               |  2 +-
 fs/proc/task_mmu.c                         |  4 +--
 fs/qnx4/inode.c                            |  4 +--
 fs/qnx6/inode.c                            |  2 +-
 fs/ubifs/debug.c                           |  8 ++---
 fs/ubifs/dir.c                             | 28 ++++++++---------
 fs/ubifs/file.c                            | 28 ++++++++---------
 fs/ubifs/journal.c                         |  6 ++--
 fs/ubifs/super.c                           | 16 +++++-----
 fs/ubifs/tnc.c                             |  4 +--
 fs/ubifs/xattr.c                           | 14 ++++-----
 fs/udf/directory.c                         | 18 +++++------
 fs/udf/file.c                              |  2 +-
 fs/udf/inode.c                             | 12 ++++----
 fs/udf/namei.c                             |  8 ++---
 fs/udf/super.c                             |  2 +-
 fs/ufs/balloc.c                            |  6 ++--
 fs/ufs/dir.c                               | 10 +++----
 fs/ufs/ialloc.c                            |  6 ++--
 fs/ufs/inode.c                             | 18 +++++------
 fs/ufs/ufs_fs.h                            |  6 ++--
 fs/ufs/util.c                              |  2 +-
 fs/verity/init.c                           |  2 +-
 fs/zonefs/super.c                          |  8 ++---
 include/linux/fs.h                         |  2 +-
 kernel/events/uprobes.c                    |  4 +--
 net/netrom/af_netrom.c                     |  4 +--
 net/rose/af_rose.c                         |  4 +--
 net/socket.c                               |  2 +-
 net/x25/x25_proc.c                         |  4 +--
 security/apparmor/apparmorfs.c             |  4 +--
 security/integrity/integrity_audit.c       |  2 +-
 security/ipe/audit.c                       |  2 +-
 security/lsm_audit.c                       | 10 +++----
 security/selinux/hooks.c                   | 10 +++----
 security/smack/smack_lsm.c                 | 12 ++++----
 179 files changed, 607 insertions(+), 607 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 11711874a325..8c16c8c425cc 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -1708,7 +1708,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 
 
 		spin_lock(&buf_obj->name_lock);
-		seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08lu\t%s\n",
+		seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08llu\t%s\n",
 				buf_obj->size,
 				buf_obj->file->f_flags, buf_obj->file->f_mode,
 				file_count(buf_obj->file),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 1fb956400696..aaa8cdc122c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1676,9 +1676,9 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
 	attachment = READ_ONCE(bo->tbo.base.import_attach);
 
 	if (attachment)
-		seq_printf(m, " imported from ino:%lu", file_inode(dma_buf->file)->i_ino);
+		seq_printf(m, " imported from ino:%llu", file_inode(dma_buf->file)->i_ino);
 	else if (dma_buf)
-		seq_printf(m, " exported as ino:%lu", file_inode(dma_buf->file)->i_ino);
+		seq_printf(m, " exported as ino:%llu", file_inode(dma_buf->file)->i_ino);
 
 	amdgpu_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
 	amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 862164181bac..c21d33830f5f 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -36,7 +36,7 @@ static void v9fs_begin_writeback(struct netfs_io_request *wreq)
 
 	fid = v9fs_fid_find_inode(wreq->inode, true, INVALID_UID, true);
 	if (!fid) {
-		WARN_ONCE(1, "folio expected an open fid inode->i_ino=%lx\n",
+		WARN_ONCE(1, "folio expected an open fid inode->i_ino=%llx\n",
 			  wreq->inode->i_ino);
 		return;
 	}
@@ -133,7 +133,7 @@ static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
 	return 0;
 
 no_fid:
-	WARN_ONCE(1, "folio expected an open fid inode->i_ino=%lx\n",
+	WARN_ONCE(1, "folio expected an open fid inode->i_ino=%llx\n",
 		  rreq->inode->i_ino);
 	return -EINVAL;
 }
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 97abe65bf7c1..d1508b1fe109 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1245,7 +1245,7 @@ static int
 v9fs_vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
 		 struct dentry *dentry, const char *symname)
 {
-	p9_debug(P9_DEBUG_VFS, " %lu,%pd,%s\n",
+	p9_debug(P9_DEBUG_VFS, " %llu,%pd,%s\n",
 		 dir->i_ino, dentry, symname);
 
 	return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname);
@@ -1269,7 +1269,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
 	char name[1 + U32_MAX_DIGITS + 2]; /* sign + number + \n + \0 */
 	struct p9_fid *oldfid;
 
-	p9_debug(P9_DEBUG_VFS, " %lu,%pd,%pd\n",
+	p9_debug(P9_DEBUG_VFS, " %llu,%pd,%pd\n",
 		 dir->i_ino, dentry, old_dentry);
 
 	oldfid = v9fs_fid_clone(old_dentry);
@@ -1305,7 +1305,7 @@ v9fs_vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	char name[2 + U32_MAX_DIGITS + 1 + U32_MAX_DIGITS + 1];
 	u32 perm;
 
-	p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %x MAJOR: %u MINOR: %u\n",
+	p9_debug(P9_DEBUG_VFS, " %llu,%pd mode: %x MAJOR: %u MINOR: %u\n",
 		 dir->i_ino, dentry, mode,
 		 MAJOR(rdev), MINOR(rdev));
 
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 643e759eacb2..71796a89bcf4 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -691,7 +691,7 @@ v9fs_vfs_symlink_dotl(struct mnt_idmap *idmap, struct inode *dir,
 	struct p9_fid *fid = NULL;
 
 	name = dentry->d_name.name;
-	p9_debug(P9_DEBUG_VFS, "%lu,%s,%s\n", dir->i_ino, name, symname);
+	p9_debug(P9_DEBUG_VFS, "%llu,%s,%s\n", dir->i_ino, name, symname);
 
 	dfid = v9fs_parent_fid(dentry);
 	if (IS_ERR(dfid)) {
@@ -734,7 +734,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
 	struct p9_fid *dfid, *oldfid;
 	struct v9fs_session_info *v9ses;
 
-	p9_debug(P9_DEBUG_VFS, "dir ino: %lu, old_name: %pd, new_name: %pd\n",
+	p9_debug(P9_DEBUG_VFS, "dir ino: %llu, old_name: %pd, new_name: %pd\n",
 		 dir->i_ino, old_dentry, dentry);
 
 	v9ses = v9fs_inode2v9ses(dir);
@@ -798,7 +798,7 @@ v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir,
 	struct p9_qid qid;
 	struct posix_acl *dacl = NULL, *pacl = NULL;
 
-	p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %x MAJOR: %u MINOR: %u\n",
+	p9_debug(P9_DEBUG_VFS, " %llu,%pd mode: %x MAJOR: %u MINOR: %u\n",
 		 dir->i_ino, dentry, omode,
 		 MAJOR(rdev), MINOR(rdev));
 
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index fd669daa4e7b..d8a96d8cc826 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -33,7 +33,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh)
 	ino = bh->b_blocknr;
 	offset = affs_hash_name(sb, AFFS_TAIL(sb, bh)->name + 1, AFFS_TAIL(sb, bh)->name[0]);
 
-	pr_debug("%s(dir=%lu, ino=%d)\n", __func__, dir->i_ino, ino);
+	pr_debug("%s(dir=%llu, ino=%d)\n", __func__, dir->i_ino, ino);
 
 	dir_bh = affs_bread(sb, dir->i_ino);
 	if (!dir_bh)
@@ -83,7 +83,7 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh)
 	sb = dir->i_sb;
 	rem_ino = rem_bh->b_blocknr;
 	offset = affs_hash_name(sb, AFFS_TAIL(sb, rem_bh)->name+1, AFFS_TAIL(sb, rem_bh)->name[0]);
-	pr_debug("%s(dir=%lu, ino=%d, hashval=%d)\n", __func__, dir->i_ino,
+	pr_debug("%s(dir=%llu, ino=%d, hashval=%d)\n", __func__, dir->i_ino,
 		 rem_ino, offset);
 
 	bh = affs_bread(sb, dir->i_ino);
@@ -128,7 +128,7 @@ affs_fix_dcache(struct inode *inode, u32 entry_ino)
 	spin_lock(&inode->i_lock);
 	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
 		if (entry_ino == (u32)(long)dentry->d_fsdata) {
-			dentry->d_fsdata = (void *)inode->i_ino;
+			dentry->d_fsdata = (void *)(unsigned long)inode->i_ino;
 			break;
 		}
 	}
@@ -147,7 +147,7 @@ affs_remove_link(struct dentry *dentry)
 	u32 link_ino, ino;
 	int retval;
 
-	pr_debug("%s(key=%ld)\n", __func__, inode->i_ino);
+	pr_debug("%s(key=%llu)\n", __func__, inode->i_ino);
 	retval = -EIO;
 	bh = affs_bread(sb, inode->i_ino);
 	if (!bh)
@@ -279,7 +279,7 @@ affs_remove_header(struct dentry *dentry)
 	if (!inode)
 		goto done;
 
-	pr_debug("%s(key=%ld)\n", __func__, inode->i_ino);
+	pr_debug("%s(key=%llu)\n", __func__, inode->i_ino);
 	retval = -EIO;
 	bh = affs_bread(sb, (u32)(long)dentry->d_fsdata);
 	if (!bh)
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index 5ba9ef2742f6..40bc4ce6af4a 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -125,7 +125,7 @@ affs_alloc_block(struct inode *inode, u32 goal)
 	sb = inode->i_sb;
 	sbi = AFFS_SB(sb);
 
-	pr_debug("balloc(inode=%lu,goal=%u): ", inode->i_ino, goal);
+	pr_debug("balloc(inode=%llu,goal=%u): ", inode->i_ino, goal);
 
 	if (AFFS_I(inode)->i_pa_cnt) {
 		pr_debug("%d\n", AFFS_I(inode)->i_lastalloc+1);
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 5c8d83387a39..11e2bac2e391 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -90,7 +90,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
 	u32			 ino;
 	int			 error = 0;
 
-	pr_debug("%s(ino=%lu,f_pos=%llx)\n", __func__, inode->i_ino, ctx->pos);
+	pr_debug("%s(ino=%llu,f_pos=%llx)\n", __func__, inode->i_ino, ctx->pos);
 
 	if (ctx->pos < 2) {
 		data->ino = 0;
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 6c9258359ddb..a51dee9d7d7e 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -24,7 +24,7 @@ static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
 static int
 affs_file_open(struct inode *inode, struct file *filp)
 {
-	pr_debug("open(%lu,%d)\n",
+	pr_debug("open(%llu,%d)\n",
 		 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
 	atomic_inc(&AFFS_I(inode)->i_opencnt);
 	return 0;
@@ -33,7 +33,7 @@ affs_file_open(struct inode *inode, struct file *filp)
 static int
 affs_file_release(struct inode *inode, struct file *filp)
 {
-	pr_debug("release(%lu, %d)\n",
+	pr_debug("release(%llu, %d)\n",
 		 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
 
 	if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) {
@@ -301,7 +301,7 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
 	struct buffer_head	*ext_bh;
 	u32			 ext;
 
-	pr_debug("%s(%lu, %llu)\n", __func__, inode->i_ino,
+	pr_debug("%s(%llu, %llu)\n", __func__, inode->i_ino,
 		 (unsigned long long)block);
 
 	BUG_ON(block > (sector_t)0x7fffffffUL);
@@ -534,7 +534,7 @@ static int affs_do_read_folio_ofs(struct folio *folio, size_t to, int create)
 	size_t bidx, boff, bsize;
 	u32 tmp;
 
-	pr_debug("%s(%lu, %ld, 0, %zu)\n", __func__, inode->i_ino,
+	pr_debug("%s(%llu, %ld, 0, %zu)\n", __func__, inode->i_ino,
 		 folio->index, to);
 	BUG_ON(to > folio_size(folio));
 	bsize = AFFS_SB(sb)->s_data_blksize;
@@ -566,7 +566,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
 	u32 size, bsize;
 	u32 tmp;
 
-	pr_debug("%s(%lu, %d)\n", __func__, inode->i_ino, newsize);
+	pr_debug("%s(%llu, %d)\n", __func__, inode->i_ino, newsize);
 	bsize = AFFS_SB(sb)->s_data_blksize;
 	bh = NULL;
 	size = AFFS_I(inode)->mmu_private;
@@ -634,7 +634,7 @@ static int affs_read_folio_ofs(struct file *file, struct folio *folio)
 	size_t to;
 	int err;
 
-	pr_debug("%s(%lu, %ld)\n", __func__, inode->i_ino, folio->index);
+	pr_debug("%s(%llu, %ld)\n", __func__, inode->i_ino, folio->index);
 	to = folio_size(folio);
 	if (folio_pos(folio) + to > inode->i_size) {
 		to = inode->i_size - folio_pos(folio);
@@ -658,7 +658,7 @@ static int affs_write_begin_ofs(const struct kiocb *iocb,
 	pgoff_t index;
 	int err = 0;
 
-	pr_debug("%s(%lu, %llu, %llu)\n", __func__, inode->i_ino, pos,
+	pr_debug("%s(%llu, %llu, %llu)\n", __func__, inode->i_ino, pos,
 		 pos + len);
 	if (pos > AFFS_I(inode)->mmu_private) {
 		/* XXX: this probably leaves a too-big i_size in case of
@@ -710,7 +710,7 @@ static int affs_write_end_ofs(const struct kiocb *iocb,
 	 * due to write_begin.
 	 */
 
-	pr_debug("%s(%lu, %llu, %llu)\n", __func__, inode->i_ino, pos,
+	pr_debug("%s(%llu, %llu, %llu)\n", __func__, inode->i_ino, pos,
 		 pos + len);
 	bsize = AFFS_SB(sb)->s_data_blksize;
 	data = folio_address(folio);
@@ -854,7 +854,7 @@ affs_free_prealloc(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
 
-	pr_debug("free_prealloc(ino=%lu)\n", inode->i_ino);
+	pr_debug("free_prealloc(ino=%llu)\n", inode->i_ino);
 
 	while (AFFS_I(inode)->i_pa_cnt) {
 		AFFS_I(inode)->i_pa_cnt--;
@@ -874,7 +874,7 @@ affs_truncate(struct inode *inode)
 	struct buffer_head *ext_bh;
 	int i;
 
-	pr_debug("truncate(inode=%lu, oldsize=%llu, newsize=%llu)\n",
+	pr_debug("truncate(inode=%llu, oldsize=%llu, newsize=%llu)\n",
 		 inode->i_ino, AFFS_I(inode)->mmu_private, inode->i_size);
 
 	last_blk = 0;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 0bfc7d151dcd..561fc0185e89 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -32,7 +32,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
 	if (!(inode_state_read_once(inode) & I_NEW))
 		return inode;
 
-	pr_debug("affs_iget(%lu)\n", inode->i_ino);
+	pr_debug("affs_iget(%llu)\n", inode->i_ino);
 
 	block = inode->i_ino;
 	bh = affs_bread(sb, block);
@@ -171,14 +171,14 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	uid_t			 uid;
 	gid_t			 gid;
 
-	pr_debug("write_inode(%lu)\n", inode->i_ino);
+	pr_debug("write_inode(%llu)\n", inode->i_ino);
 
 	if (!inode->i_nlink)
 		// possibly free block
 		return 0;
 	bh = affs_bread(sb, inode->i_ino);
 	if (!bh) {
-		affs_error(sb,"write_inode","Cannot read block %lu",inode->i_ino);
+		affs_error(sb, "write_inode", "Cannot read block %llu", inode->i_ino);
 		return -EIO;
 	}
 	tail = AFFS_TAIL(sb, bh);
@@ -219,7 +219,7 @@ affs_notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
 	struct inode *inode = d_inode(dentry);
 	int error;
 
-	pr_debug("notify_change(%lu,0x%x)\n", inode->i_ino, attr->ia_valid);
+	pr_debug("notify_change(%llu,0x%x)\n", inode->i_ino, attr->ia_valid);
 
 	error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
 	if (error)
@@ -260,7 +260,7 @@ void
 affs_evict_inode(struct inode *inode)
 {
 	unsigned long cache_page;
-	pr_debug("evict_inode(ino=%lu, nlink=%u)\n",
+	pr_debug("evict_inode(ino=%llu, nlink=%u)\n",
 		 inode->i_ino, inode->i_nlink);
 	truncate_inode_pages_final(&inode->i_data);
 
@@ -353,7 +353,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
 	u32 block = 0;
 	int retval;
 
-	pr_debug("%s(dir=%lu, inode=%lu, \"%pd\", type=%d)\n", __func__,
+	pr_debug("%s(dir=%llu, inode=%llu, \"%pd\", type=%d)\n", __func__,
 		 dir->i_ino, inode->i_ino, dentry, type);
 
 	retval = -EIO;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index f883be50db12..870532192600 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -235,7 +235,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 int
 affs_unlink(struct inode *dir, struct dentry *dentry)
 {
-	pr_debug("%s(dir=%lu, %lu \"%pd\")\n", __func__, dir->i_ino,
+	pr_debug("%s(dir=%llu, %llu \"%pd\")\n", __func__, dir->i_ino,
 		 d_inode(dentry)->i_ino, dentry);
 
 	return affs_remove_header(dentry);
@@ -249,7 +249,7 @@ affs_create(struct mnt_idmap *idmap, struct inode *dir,
 	struct inode	*inode;
 	int		 error;
 
-	pr_debug("%s(%lu,\"%pd\",0%ho)\n",
+	pr_debug("%s(%llu,\"%pd\",0%ho)\n",
 		 __func__, dir->i_ino, dentry, mode);
 
 	inode = affs_new_inode(dir);
@@ -280,7 +280,7 @@ affs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 	struct inode		*inode;
 	int			 error;
 
-	pr_debug("%s(%lu,\"%pd\",0%ho)\n",
+	pr_debug("%s(%llu,\"%pd\",0%ho)\n",
 		 __func__, dir->i_ino, dentry, mode);
 
 	inode = affs_new_inode(dir);
@@ -306,7 +306,7 @@ affs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 int
 affs_rmdir(struct inode *dir, struct dentry *dentry)
 {
-	pr_debug("%s(dir=%lu, %lu \"%pd\")\n", __func__, dir->i_ino,
+	pr_debug("%s(dir=%llu, %llu \"%pd\")\n", __func__, dir->i_ino,
 		 d_inode(dentry)->i_ino, dentry);
 
 	return affs_remove_header(dentry);
@@ -323,7 +323,7 @@ affs_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	int			 i, maxlen, error;
 	char			 c, lc;
 
-	pr_debug("%s(%lu,\"%pd\" -> \"%s\")\n",
+	pr_debug("%s(%llu,\"%pd\" -> \"%s\")\n",
 		 __func__, dir->i_ino, dentry, symname);
 
 	maxlen = AFFS_SB(sb)->s_hashsize * sizeof(u32) - 1;
@@ -395,7 +395,7 @@ affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 {
 	struct inode *inode = d_inode(old_dentry);
 
-	pr_debug("%s(%lu, %lu, \"%pd\")\n", __func__, inode->i_ino, dir->i_ino,
+	pr_debug("%s(%llu, %llu, \"%pd\")\n", __func__, inode->i_ino, dir->i_ino,
 		 dentry);
 
 	return affs_add_entry(dir, inode, dentry, ST_LINKFILE);
@@ -511,7 +511,7 @@ int affs_rename2(struct mnt_idmap *idmap, struct inode *old_dir,
 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
 		return -EINVAL;
 
-	pr_debug("%s(old=%lu,\"%pd\" to new=%lu,\"%pd\")\n", __func__,
+	pr_debug("%s(old=%llu,\"%pd\" to new=%llu,\"%pd\")\n", __func__,
 		 old_dir->i_ino, old_dentry, new_dir->i_ino, new_dentry);
 
 	if (flags & RENAME_EXCHANGE)
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index 094aec8d17b8..de31ed2e71df 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -21,7 +21,7 @@ static int affs_symlink_read_folio(struct file *file, struct folio *folio)
 	char			 c;
 	char			 lc;
 
-	pr_debug("get_link(ino=%lu)\n", inode->i_ino);
+	pr_debug("get_link(ino=%llu)\n", inode->i_ino);
 
 	bh = affs_bread(inode->i_sb, inode->i_ino);
 	if (!bh)
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 78caef3f1338..aaaa55878ffd 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -148,7 +148,7 @@ static bool afs_dir_check_block(struct afs_vnode *dvnode, size_t progress,
 				union afs_xdr_dir_block *block)
 {
 	if (block->hdr.magic != AFS_DIR_MAGIC) {
-		pr_warn("%s(%lx): [%zx] bad magic %04x\n",
+		pr_warn("%s(%llx): [%zx] bad magic %04x\n",
 		       __func__, dvnode->netfs.inode.i_ino,
 		       progress, ntohs(block->hdr.magic));
 		trace_afs_dir_check_failed(dvnode, progress);
@@ -214,7 +214,7 @@ static int afs_dir_check(struct afs_vnode *dvnode)
  */
 static int afs_dir_open(struct inode *inode, struct file *file)
 {
-	_enter("{%lu}", inode->i_ino);
+	_enter("{%llu}", inode->i_ino);
 
 	BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048);
 	BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32);
@@ -523,7 +523,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
 	int retry_limit = 100;
 	int ret;
 
-	_enter("{%lu},%llx,,", dir->i_ino, ctx->pos);
+	_enter("{%llu},%llx,,", dir->i_ino, ctx->pos);
 
 	do {
 		if (--retry_limit < 0) {
@@ -610,7 +610,7 @@ static int afs_do_lookup_one(struct inode *dir, const struct qstr *name,
 	};
 	int ret;
 
-	_enter("{%lu},{%.*s},", dir->i_ino, name->len, name->name);
+	_enter("{%llu},{%.*s},", dir->i_ino, name->len, name->name);
 
 	/* search the directory */
 	ret = afs_dir_iterate(dir, &cookie.ctx, NULL, _dir_version);
@@ -783,7 +783,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
 	long ret;
 	int i;
 
-	_enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
+	_enter("{%llu},%p{%pd},", dir->i_ino, dentry, dentry);
 
 	cookie = kzalloc_obj(struct afs_lookup_cookie);
 	if (!cookie)
diff --git a/fs/afs/dir_search.c b/fs/afs/dir_search.c
index d2516e55b5ed..104411c0692f 100644
--- a/fs/afs/dir_search.c
+++ b/fs/afs/dir_search.c
@@ -194,7 +194,7 @@ int afs_dir_search(struct afs_vnode *dvnode, const struct qstr *name,
 	struct afs_dir_iter iter = { .dvnode = dvnode, };
 	int ret, retry_limit = 3;
 
-	_enter("{%lu},,,", dvnode->netfs.inode.i_ino);
+	_enter("{%llu},,,", dvnode->netfs.inode.i_ino);
 
 	if (!afs_dir_init_iter(&iter, name))
 		return -ENOENT;
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index aa56e8951e03..1d5e33bc7502 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -59,7 +59,7 @@ static struct inode *afs_iget_pseudo_dir(struct super_block *sb, ino_t ino)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	_debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }",
+	_debug("GOT INODE %p { ino=%llu, vl=%llx, vn=%llx, u=%x }",
 	       inode, inode->i_ino, fid.vid, fid.vnode, fid.unique);
 
 	vnode = AFS_FS_I(inode);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index dde1857fcabb..a5173434f786 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -683,7 +683,7 @@ int afs_getattr(struct mnt_idmap *idmap, const struct path *path,
 	struct key *key;
 	int ret, seq;
 
-	_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
+	_enter("{ ino=%llu v=%u }", inode->i_ino, inode->i_generation);
 
 	if (vnode->volume &&
 	    !(query_flags & AT_STATX_DONT_SYNC) &&
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index c53dc551053b..c1e210cec436 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -92,7 +92,7 @@ static int autofs_show_options(struct seq_file *m, struct dentry *root)
 		seq_puts(m, ",ignore");
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	if (sbi->pipe)
-		seq_printf(m, ",pipe_ino=%ld", file_inode(sbi->pipe)->i_ino);
+		seq_printf(m, ",pipe_ino=%llu", file_inode(sbi->pipe)->i_ino);
 	else
 		seq_puts(m, ",pipe_ino=-1");
 #endif
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index cecbc92f959a..c12caae9a967 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -140,20 +140,20 @@ befs_get_block(struct inode *inode, sector_t block,
 	int res;
 	ulong disk_off;
 
-	befs_debug(sb, "---> befs_get_block() for inode %lu, block %ld",
-		   (unsigned long)inode->i_ino, (long)block);
+	befs_debug(sb, "---> befs_get_block() for inode %llu, block %ld",
+		   inode->i_ino, (long)block);
 	if (create) {
 		befs_error(sb, "befs_get_block() was asked to write to "
-			   "block %ld in inode %lu", (long)block,
-			   (unsigned long)inode->i_ino);
+			   "block %ld in inode %llu", (long)block,
+			   inode->i_ino);
 		return -EPERM;
 	}
 
 	res = befs_fblock2brun(sb, ds, block, &run);
 	if (res != BEFS_OK) {
 		befs_error(sb,
-			   "<--- %s for inode %lu, block %ld ERROR",
-			   __func__, (unsigned long)inode->i_ino,
+			   "<--- %s for inode %llu, block %ld ERROR",
+			   __func__, inode->i_ino,
 			   (long)block);
 		return -EFBIG;
 	}
@@ -162,8 +162,8 @@ befs_get_block(struct inode *inode, sector_t block,
 
 	map_bh(bh_result, inode->i_sb, disk_off);
 
-	befs_debug(sb, "<--- %s for inode %lu, block %ld, disk address %lu",
-		  __func__, (unsigned long)inode->i_ino, (long)block,
+	befs_debug(sb, "<--- %s for inode %llu, block %ld, disk address %lu",
+		  __func__, inode->i_ino, (long)block,
 		  (unsigned long)disk_off);
 
 	return 0;
@@ -181,7 +181,7 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 	char *utfname;
 	const char *name = dentry->d_name.name;
 
-	befs_debug(sb, "---> %s name %pd inode %ld", __func__,
+	befs_debug(sb, "---> %s name %pd inode %llu", __func__,
 		   dentry, dir->i_ino);
 
 	/* Convert to UTF-8 */
@@ -224,7 +224,7 @@ befs_readdir(struct file *file, struct dir_context *ctx)
 	size_t keysize;
 	char keybuf[BEFS_NAME_LEN + 1];
 
-	befs_debug(sb, "---> %s name %pD, inode %ld, ctx->pos %lld",
+	befs_debug(sb, "---> %s name %pD, inode %llu, ctx->pos %lld",
 		  __func__, file, inode->i_ino, ctx->pos);
 
 	while (1) {
@@ -233,7 +233,7 @@ befs_readdir(struct file *file, struct dir_context *ctx)
 
 		if (result == BEFS_ERR) {
 			befs_debug(sb, "<--- %s ERROR", __func__);
-			befs_error(sb, "IO error reading %pD (inode %lu)",
+			befs_error(sb, "IO error reading %pD (inode %llu)",
 				   file, inode->i_ino);
 			return -EIO;
 
@@ -324,7 +324,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
 	bh = sb_bread(sb, inode->i_ino);
 	if (!bh) {
 		befs_error(sb, "unable to read inode block - "
-			   "inode = %lu", inode->i_ino);
+			   "inode = %llu", inode->i_ino);
 		goto unacquire_none;
 	}
 
@@ -333,7 +333,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
 	befs_dump_inode(sb, raw_inode);
 
 	if (befs_check_inode(sb, raw_inode, inode->i_ino) != BEFS_OK) {
-		befs_error(sb, "Bad inode: %lu", inode->i_ino);
+		befs_error(sb, "Bad inode: %llu", inode->i_ino);
 		goto unacquire_bh;
 	}
 
@@ -407,7 +407,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
 			inode->i_op = &simple_symlink_inode_operations;
 		}
 	} else {
-		befs_error(sb, "Inode %lu is not a regular file, "
+		befs_error(sb, "Inode %llu is not a regular file, "
 			   "directory or symlink. THAT IS WRONG! BeFS has no "
 			   "on disk special files", inode->i_ino);
 		goto unacquire_bh;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index c375e22c4c0c..481514db4eae 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -35,7 +35,7 @@ static int bfs_readdir(struct file *f, struct dir_context *ctx)
 	int block;
 
 	if (ctx->pos & (BFS_DIRENT_SIZE - 1)) {
-		printf("Bad f_pos=%08lx for %s:%08lx\n",
+		printf("Bad f_pos=%08lx for %s:%08llx\n",
 					(unsigned long)ctx->pos,
 					dir->i_sb->s_id, dir->i_ino);
 		return -EINVAL;
@@ -180,7 +180,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
 		goto out_brelse;
 
 	if (!inode->i_nlink) {
-		printf("unlinking non-existent file %s:%lu (nlink=%d)\n",
+		printf("unlinking non-existent file %s:%llu (nlink=%d)\n",
 					inode->i_sb->s_id, inode->i_ino,
 					inode->i_nlink);
 		set_nlink(inode, 1);
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index eaf47851c65f..d879b80a0bed 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -93,7 +93,7 @@ static int cachefiles_read(struct netfs_cache_resources *cres,
 	object = cachefiles_cres_object(cres);
 	file = cachefiles_cres_file(cres);
 
-	_enter("%pD,%li,%llx,%zx/%llx",
+	_enter("%pD,%llu,%llx,%zx/%llx",
 	       file, file_inode(file)->i_ino, start_pos, len,
 	       i_size_read(file_inode(file)));
 
@@ -214,7 +214,7 @@ static int cachefiles_query_occupancy(struct netfs_cache_resources *cres,
 	file = cachefiles_cres_file(cres);
 	granularity = max_t(size_t, object->volume->cache->bsize, granularity);
 
-	_enter("%pD,%li,%llx,%zx/%llx",
+	_enter("%pD,%llu,%llx,%zx/%llx",
 	       file, file_inode(file)->i_ino, start, len,
 	       i_size_read(file_inode(file)));
 
@@ -294,7 +294,7 @@ int __cachefiles_write(struct cachefiles_object *object,
 	fscache_count_write();
 	cache = object->volume->cache;
 
-	_enter("%pD,%li,%llx,%zx/%llx",
+	_enter("%pD,%llu,%llx,%zx/%llx",
 	       file, file_inode(file)->i_ino, start_pos, len,
 	       i_size_read(file_inode(file)));
 
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index e5ec90dccc27..4fdf7687aacb 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -147,7 +147,7 @@ retry:
 		}
 		ASSERT(d_backing_inode(subdir));
 
-		_debug("mkdir -> %pd{ino=%lu}",
+		_debug("mkdir -> %pd{ino=%llu}",
 		       subdir, d_backing_inode(subdir)->i_ino);
 		if (_is_new)
 			*_is_new = true;
@@ -158,7 +158,7 @@ retry:
 	end_creating_keep(subdir);
 
 	if (!__cachefiles_mark_inode_in_use(NULL, d_inode(subdir))) {
-		pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
+		pr_notice("cachefiles: Inode already in use: %pd (B=%llx)\n",
 			  subdir, d_inode(subdir)->i_ino);
 		goto mark_error;
 	}
@@ -183,7 +183,7 @@ retry:
 	    !d_backing_inode(subdir)->i_op->unlink)
 		goto check_error;
 
-	_leave(" = [%lu]", d_backing_inode(subdir)->i_ino);
+	_leave(" = [%llu]", d_backing_inode(subdir)->i_ino);
 	return subdir;
 
 check_error:
@@ -529,7 +529,7 @@ static bool cachefiles_create_file(struct cachefiles_object *object)
 
 	set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags);
 	set_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags);
-	_debug("create -> %pD{ino=%lu}", file, file_inode(file)->i_ino);
+	_debug("create -> %pD{ino=%llu}", file, file_inode(file)->i_ino);
 	object->file = file;
 	return true;
 }
@@ -549,7 +549,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
 	_enter("%pd", dentry);
 
 	if (!cachefiles_mark_inode_in_use(object, d_inode(dentry))) {
-		pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
+		pr_notice("cachefiles: Inode already in use: %pd (B=%llx)\n",
 			  dentry, d_inode(dentry)->i_ino);
 		return false;
 	}
@@ -657,7 +657,7 @@ bool cachefiles_look_up_object(struct cachefiles_object *object)
 	if (!ret)
 		return false;
 
-	_leave(" = t [%lu]", file_inode(object->file)->i_ino);
+	_leave(" = t [%llu]", file_inode(object->file)->i_ino);
 	return true;
 
 new_file:
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index 52383b1d0ba6..f8ae78b3f7b6 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -179,7 +179,7 @@ int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
 			ret = 0;
 		else if (ret != -ENOMEM)
 			cachefiles_io_error(cache,
-					    "Can't remove xattr from %lu"
+					    "Can't remove xattr from %llu"
 					    " (error %d)",
 					    d_backing_inode(dentry)->i_ino, -ret);
 	}
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c
index f3de43ccb470..64d240759277 100644
--- a/fs/ceph/crypto.c
+++ b/fs/ceph/crypto.c
@@ -272,7 +272,7 @@ int ceph_encode_encrypted_dname(struct inode *parent, char *buf, int elen)
 	/* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */
 	WARN_ON(elen > 240);
 	if (dir != parent) // leading _ is already there; append _<inum>
-		elen += 1 + sprintf(p + elen, "_%ld", dir->i_ino);
+		elen += 1 + sprintf(p + elen, "_%llu", dir->i_ino);
 
 out:
 	kfree(cryptbuf);
@@ -377,7 +377,7 @@ int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname,
 	if (!ret && (dir != fname->dir)) {
 		char tmp_buf[BASE64_CHARS(NAME_MAX)];
 
-		name_len = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%ld",
+		name_len = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%llu",
 				    oname->len, oname->name, dir->i_ino);
 		memcpy(oname->name, tmp_buf, name_len);
 		oname->len = name_len;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index c64b8cd81568..d6b9fc3cc1ca 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -533,7 +533,7 @@ int coda_revalidate_inode(struct inode *inode)
 		coda_vattr_to_iattr(inode, &attr);
 
 		if ((old_mode & S_IFMT) != (inode->i_mode & S_IFMT)) {
-			pr_warn("inode %ld, fid %s changed type!\n",
+			pr_warn("inode %llu, fid %s changed type!\n",
 				inode->i_ino, coda_f2s(&(cii->c_fid)));
 		}
 
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index ad1654f3adf8..40b43866e6a5 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -257,7 +257,7 @@ static int coda_fill_super(struct super_block *sb, struct fs_context *fc)
 		goto error;
 	} 
 
-	pr_info("%s: rootinode is %ld dev %s\n",
+	pr_info("%s: rootinode is %llu dev %s\n",
 		__func__, root->i_ino, root->i_sb->s_id);
 	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index e0ba9cd640dc..4edbfccd0bbe 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -125,7 +125,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
 				old_decode_dev(cramfs_inode->size));
 		break;
 	default:
-		printk(KERN_DEBUG "CRAMFS: Invalid file type 0%04o for inode %lu.\n",
+		printk(KERN_DEBUG "CRAMFS: Invalid file type 0%04o for inode %llu.\n",
 		       inode->i_mode, inode->i_ino);
 		iget_failed(inode);
 		return ERR_PTR(-EIO);
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 07f9cbfe3ea4..570a2231c945 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -365,7 +365,7 @@ void fscrypt_msg(const struct inode *inode, const char *level,
 	vaf.fmt = fmt;
 	vaf.va = &args;
 	if (inode && inode->i_ino)
-		printk("%sfscrypt (%s, inode %lu): %pV\n",
+		printk("%sfscrypt (%s, inode %llu): %pV\n",
 		       level, inode->i_sb->s_id, inode->i_ino, &vaf);
 	else if (inode)
 		printk("%sfscrypt (%s): %pV\n", level, inode->i_sb->s_id, &vaf);
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index b97de0d1430f..a7a8a3f581a0 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -62,7 +62,7 @@ int fscrypt_file_open(struct inode *inode, struct file *filp)
 	dentry_parent = dget_parent(dentry);
 	if (!fscrypt_has_permitted_context(d_inode(dentry_parent), inode)) {
 		fscrypt_warn(inode,
-			     "Inconsistent encryption context (parent directory: %lu)",
+			     "Inconsistent encryption context (parent directory: %llu)",
 			     d_inode(dentry_parent)->i_ino);
 		err = -EPERM;
 	}
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index 9ec6e5ef0947..be8e6e8011f2 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -969,8 +969,8 @@ static int check_for_busy_inodes(struct super_block *sb,
 {
 	struct list_head *pos;
 	size_t busy_count = 0;
-	unsigned long ino;
 	char ino_str[50] = "";
+	u64 ino;
 
 	spin_lock(&mk->mk_decrypted_inodes_lock);
 
@@ -994,7 +994,7 @@ static int check_for_busy_inodes(struct super_block *sb,
 
 	/* If the inode is currently being created, ino may still be 0. */
 	if (ino)
-		snprintf(ino_str, sizeof(ino_str), ", including ino %lu", ino);
+		snprintf(ino_str, sizeof(ino_str), ", including ino %llu", ino);
 
 	fscrypt_warn(NULL,
 		     "%s: %zu inode(s) still busy after removing key with %s %*phN%s",
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index 40fa05688d3a..df58ca4a5e3c 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -91,7 +91,7 @@ select_encryption_mode(const union fscrypt_policy *policy,
 	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
 		return &fscrypt_modes[fscrypt_policy_fnames_mode(policy)];
 
-	WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
+	WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %llu, which is not encryptable (file type %d)\n",
 		  inode->i_ino, (inode->i_mode & S_IFMT));
 	return ERR_PTR(-EINVAL);
 }
diff --git a/fs/dcache.c b/fs/dcache.c
index 7ba1801d8132..0a4ffda07360 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1637,11 +1637,11 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
 	if (dentry == _data && dentry->d_lockref.count == 1)
 		return D_WALK_CONTINUE;
 
-	WARN(1, "BUG: Dentry %p{i=%lx,n=%pd} "
+	WARN(1, "BUG: Dentry %p{i=%llx,n=%pd} "
 			" still in use (%d) [unmount of %s %s]\n",
 		       dentry,
 		       dentry->d_inode ?
-		       dentry->d_inode->i_ino : 0UL,
+		       dentry->d_inode->i_ino : (u64)0,
 		       dentry,
 		       dentry->d_lockref.count,
 		       dentry->d_sb->s_type->name,
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 3b59346d68c5..f25c9a49e251 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1313,7 +1313,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
 		rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode);
 		if (rc) {
 			printk(KERN_DEBUG "Valid eCryptfs headers not found in "
-			       "file header region or xattr region, inode %lu\n",
+			       "file header region or xattr region, inode %llu\n",
 				ecryptfs_inode->i_ino);
 			rc = -EINVAL;
 			goto out;
@@ -1323,7 +1323,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
 						ECRYPTFS_DONT_VALIDATE_HEADER_SIZE);
 		if (rc) {
 			printk(KERN_DEBUG "Valid eCryptfs headers not found in "
-			       "file xattr region either, inode %lu\n",
+			       "file xattr region either, inode %llu\n",
 				ecryptfs_inode->i_ino);
 			rc = -EINVAL;
 		}
@@ -1335,7 +1335,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
 			       "crypto metadata only in the extended attribute "
 			       "region, but eCryptfs was mounted without "
 			       "xattr support enabled. eCryptfs will not treat "
-			       "this like an encrypted file, inode %lu\n",
+			       "this like an encrypted file, inode %llu\n",
 				ecryptfs_inode->i_ino);
 			rc = -EINVAL;
 		}
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 7929411837cf..49b0fbe0428a 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -253,7 +253,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 	if (rc)
 		goto out_put;
 	ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = "
-			"[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino,
+			"[0x%.16llx] size: [0x%.16llx]\n", inode, inode->i_ino,
 			(unsigned long long)i_size_read(inode));
 	goto out;
 out_put:
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 28407578f83a..4b132729e638 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -132,7 +132,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
 	for(i = 0; i < EFS_DIRECTEXTENTS; i++) {
 		extent_copy(&(efs_inode->di_u.di_extents[i]), &(in->extents[i]));
 		if (i < in->numextents && in->extents[i].cooked.ex_magic != 0) {
-			pr_warn("extent %d has bad magic number in inode %lu\n",
+			pr_warn("extent %d has bad magic number in inode %llu\n",
 				i, inode->i_ino);
 			brelse(bh);
 			goto read_inode_error;
@@ -140,7 +140,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
 	}
 
 	brelse(bh);
-	pr_debug("efs_iget(): inode %lu, extents %d, mode %o\n",
+	pr_debug("efs_iget(): inode %llu, extents %d, mode %o\n",
 		 inode->i_ino, in->numextents, inode->i_mode);
 	switch (inode->i_mode & S_IFMT) {
 		case S_IFDIR: 
@@ -171,7 +171,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
 	return inode;
         
 read_inode_error:
-	pr_warn("failed to read inode %lu\n", inode->i_ino);
+	pr_warn("failed to read inode %llu\n", inode->i_ino);
 	iget_failed(inode);
 	return ERR_PTR(-EIO);
 }
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a8c278c50083..a1731dafb1e1 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1080,7 +1080,7 @@ static void ep_show_fdinfo(struct seq_file *m, struct file *f)
 		struct inode *inode = file_inode(epi->ffd.file);
 
 		seq_printf(m, "tfd: %8d events: %8x data: %16llx "
-			   " pos:%lli ino:%lx sdev:%x\n",
+			   " pos:%lli ino:%llx sdev:%x\n",
 			   epi->ffd.fd, epi->event.events,
 			   (long long)epi->event.data,
 			   (long long)epi->ffd.file->f_pos,
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 6c9be60a3e48..5c3183ce350e 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -130,12 +130,12 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
 		parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
 
 	if (IS_ERR(parent)) {
-		dprintk("get_parent of %lu failed, err %ld\n",
+		dprintk("get_parent of %llu failed, err %ld\n",
 			dentry->d_inode->i_ino, PTR_ERR(parent));
 		return parent;
 	}
 
-	dprintk("%s: find name of %lu in %lu\n", __func__,
+	dprintk("%s: find name of %llu in %llu\n", __func__,
 		dentry->d_inode->i_ino, parent->d_inode->i_ino);
 	err = exportfs_get_name(mnt, parent, nbuf, dentry);
 	if (err == -ENOENT)
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 395fc36c089b..278d4be8ecbe 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -141,7 +141,7 @@ out:
 Ebadsize:
 	if (!quiet)
 		ext2_error(sb, __func__,
-			"size of directory #%lu is not a multiple "
+			"size of directory #%llu is not a multiple "
 			"of chunk size", dir->i_ino);
 	goto fail;
 Eshort:
@@ -160,7 +160,7 @@ Einumber:
 	error = "inode out of bounds";
 bad_entry:
 	if (!quiet)
-		ext2_error(sb, __func__, "bad entry in directory #%lu: : %s - "
+		ext2_error(sb, __func__, "bad entry in directory #%llu: : %s - "
 			"offset=%llu, inode=%lu, rec_len=%d, name_len=%d",
 			dir->i_ino, error, folio_pos(folio) + offs,
 			(unsigned long) le32_to_cpu(p->inode),
@@ -170,7 +170,7 @@ Eend:
 	if (!quiet) {
 		p = (ext2_dirent *)(kaddr + offs);
 		ext2_error(sb, "ext2_check_folio",
-			"entry in directory #%lu spans the page boundary"
+			"entry in directory #%llu spans the page boundary"
 			"offset=%llu, inode=%lu",
 			dir->i_ino, folio_pos(folio) + offs,
 			(unsigned long) le32_to_cpu(p->inode));
@@ -281,7 +281,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
 
 		if (IS_ERR(kaddr)) {
 			ext2_error(sb, __func__,
-				   "bad page in #%lu",
+				   "bad page in #%llu",
 				   inode->i_ino);
 			ctx->pos += PAGE_SIZE - offset;
 			return PTR_ERR(kaddr);
@@ -383,7 +383,7 @@ struct ext2_dir_entry_2 *ext2_find_entry (struct inode *dir,
 		/* next folio is past the blocks we've got */
 		if (unlikely(n > (dir->i_blocks >> (PAGE_SHIFT - 9)))) {
 			ext2_error(dir->i_sb, __func__,
-				"dir %lu size %lld exceeds block count %llu",
+				"dir %llu size %lld exceeds block count %llu",
 				dir->i_ino, dir->i_size,
 				(unsigned long long)dir->i_blocks);
 			goto out;
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index fdf63e9c6e7c..bf21b57cf98c 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -169,9 +169,10 @@ static void ext2_preread_inode(struct inode *inode)
 	unsigned long block_group;
 	unsigned long offset;
 	unsigned long block;
+	unsigned int ino = inode->i_ino;
 	struct ext2_group_desc * gdp;
 
-	block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
+	block_group = (ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
 	gdp = ext2_get_group_desc(inode->i_sb, block_group, NULL);
 	if (gdp == NULL)
 		return;
@@ -179,7 +180,7 @@ static void ext2_preread_inode(struct inode *inode)
 	/*
 	 * Figure out the offset within the block group inode table
 	 */
-	offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
+	offset = ((ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
 				EXT2_INODE_SIZE(inode->i_sb);
 	block = le32_to_cpu(gdp->bg_inode_table) +
 				(offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
@@ -381,7 +382,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
 	 *
 	 * So add our directory's i_ino into the starting point for the hash.
 	 */
-	group = (group + parent->i_ino) % ngroups;
+	group = (group + (unsigned int)parent->i_ino) % ngroups;
 
 	/*
 	 * Use a quadratic hash to find a group with a free inode and some
@@ -589,7 +590,7 @@ got:
 		goto fail_free_drop;
 
 	mark_inode_dirty(inode);
-	ext2_debug("allocating inode %lu\n", inode->i_ino);
+	ext2_debug("allocating inode %llu\n", inode->i_ino);
 	ext2_preread_inode(inode);
 	return inode;
 
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index dbfe9098a124..45286c0c3b6b 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1152,7 +1152,7 @@ static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int de
 			 */ 
 			if (!bh) {
 				ext2_error(inode->i_sb, "ext2_free_branches",
-					"Read failure, inode=%ld, block=%ld",
+					"Read failure, inode=%llu, block=%ld",
 					inode->i_ino, nr);
 				continue;
 			}
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index c885dcc3bd0d..14ada70db36a 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -227,7 +227,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
 	if (!ext2_xattr_header_valid(HDR(bh))) {
 bad_block:
 		ext2_error(inode->i_sb, "ext2_xattr_get",
-			"inode %ld: bad block %d", inode->i_ino,
+			"inode %llu: bad block %d", inode->i_ino,
 			EXT2_I(inode)->i_file_acl);
 		error = -EIO;
 		goto cleanup;
@@ -313,7 +313,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 	if (!ext2_xattr_header_valid(HDR(bh))) {
 bad_block:
 		ext2_error(inode->i_sb, "ext2_xattr_list",
-			"inode %ld: bad block %d", inode->i_ino,
+			"inode %llu: bad block %d", inode->i_ino,
 			EXT2_I(inode)->i_file_acl);
 		error = -EIO;
 		goto cleanup;
@@ -454,7 +454,7 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name,
 		if (!ext2_xattr_header_valid(header)) {
 bad_block:
 			ext2_error(sb, "ext2_xattr_set",
-				"inode %ld: bad block %d", inode->i_ino, 
+				"inode %llu: bad block %d", inode->i_ino,
 				   EXT2_I(inode)->i_file_acl);
 			error = -EIO;
 			goto cleanup;
@@ -833,7 +833,7 @@ ext2_xattr_delete_inode(struct inode *inode)
 
 	if (!ext2_data_block_valid(sbi, EXT2_I(inode)->i_file_acl, 1)) {
 		ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
-			"inode %ld: xattr block %d is out of data blocks range",
+			"inode %llu: xattr block %d is out of data blocks range",
 			inode->i_ino, EXT2_I(inode)->i_file_acl);
 		goto cleanup;
 	}
@@ -841,14 +841,14 @@ ext2_xattr_delete_inode(struct inode *inode)
 	bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
 	if (!bh) {
 		ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
-			"inode %ld: block %d read error", inode->i_ino,
+			"inode %llu: block %d read error", inode->i_ino,
 			EXT2_I(inode)->i_file_acl);
 		goto cleanup;
 	}
 	ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
 	if (!ext2_xattr_header_valid(HDR(bh))) {
 		ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
-			"inode %ld: bad block %d", inode->i_ino,
+			"inode %llu: bad block %d", inode->i_ino,
 			EXT2_I(inode)->i_file_acl);
 		goto cleanup;
 	}
@@ -952,7 +952,7 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
 		bh = sb_bread(inode->i_sb, ce->e_value);
 		if (!bh) {
 			ext2_error(inode->i_sb, "ext2_xattr_cache_find",
-				"inode %ld: block %ld read error",
+				"inode %llu: block %ld read error",
 				inode->i_ino, (unsigned long) ce->e_value);
 		} else {
 			lock_buffer(bh);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 28b2a3deb954..17edd678fa87 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -535,7 +535,7 @@ static int call_filldir(struct file *file, struct dir_context *ctx,
 	struct super_block *sb = inode->i_sb;
 
 	if (!fname) {
-		ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: "
+		ext4_msg(sb, KERN_ERR, "%s:%d: inode #%llu: comm %s: "
 			 "called with null fname?!?", __func__, __LINE__,
 			 inode->i_ino, current->comm);
 		return 0;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 293f698b7042..85e6c2b543a8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -92,7 +92,7 @@
  */
 #ifdef CONFIG_EXT4_DEBUG
 #define ext_debug(ino, fmt, ...)					\
-	pr_debug("[%s/%d] EXT4-fs (%s): ino %lu: (%s, %d): %s:" fmt,	\
+	pr_debug("[%s/%d] EXT4-fs (%s): ino %llu: (%s, %d): %s:" fmt,	\
 		 current->comm, task_pid_nr(current),			\
 		 ino->i_sb->s_id, ino->i_ino, __FILE__, __LINE__,	\
 		 __func__, ##__VA_ARGS__)
@@ -3229,7 +3229,7 @@ extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
 extern __printf(7, 8)
 void __ext4_grp_locked_error(const char *, unsigned int,
 			     struct super_block *, ext4_group_t,
-			     unsigned long, ext4_fsblk_t,
+			     u64, ext4_fsblk_t,
 			     const char *, ...);
 
 #define EXT4_ERROR_INODE(inode, fmt, a...) \
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ae3804f36535..042e1555a674 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4603,7 +4603,7 @@ retry:
 		}
 		ret = ext4_map_blocks(handle, inode, &map, flags);
 		if (ret <= 0) {
-			ext4_debug("inode #%lu: block %u: len %u: "
+			ext4_debug("inode #%llu: block %u: len %u: "
 				   "ext4_ext_map_blocks returned %d",
 				   inode->i_ino, map.m_lblk,
 				   map.m_len, ret);
@@ -4955,7 +4955,7 @@ int ext4_convert_unwritten_extents_atomic(handle_t *handle, struct inode *inode,
 		ret = ext4_map_blocks(handle, inode, &map, flags);
 		if (ret != max_blocks)
 			ext4_msg(inode->i_sb, KERN_INFO,
-				     "inode #%lu: block %u: len %u: "
+				     "inode #%llu: block %u: len %u: "
 				     "split block mapping found for atomic write, "
 				     "ret = %d",
 				     inode->i_ino, map.m_lblk,
@@ -4974,7 +4974,7 @@ int ext4_convert_unwritten_extents_atomic(handle_t *handle, struct inode *inode,
 
 	if (ret <= 0 || ret2)
 		ext4_warning(inode->i_sb,
-			     "inode #%lu: block %u: len %u: "
+			     "inode #%llu: block %u: len %u: "
 			     "returned %d or %d",
 			     inode->i_ino, map.m_lblk,
 			     map.m_len, ret, ret2);
@@ -5031,7 +5031,7 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
 				      EXT4_EX_NOCACHE);
 		if (ret <= 0)
 			ext4_warning(inode->i_sb,
-				     "inode #%lu: block %u: len %u: "
+				     "inode #%llu: block %u: len %u: "
 				     "ext4_ext_map_blocks returned %d",
 				     inode->i_ino, map.m_lblk,
 				     map.m_len, ret);
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index a1538bac51c6..6e4a191e8219 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -214,7 +214,7 @@ static void ext4_es_print_tree(struct inode *inode)
 	struct ext4_es_tree *tree;
 	struct rb_node *node;
 
-	printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino);
+	printk(KERN_DEBUG "status extents for inode %llu:", inode->i_ino);
 	tree = &EXT4_I(inode)->i_es_tree;
 	node = rb_first(&tree->root);
 	while (node) {
@@ -703,7 +703,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
 		if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
 			if (in_range(es->es_lblk, ee_block, ee_len)) {
 				pr_warn("ES insert assertion failed for "
-					"inode: %lu we can find an extent "
+					"inode: %llu we can find an extent "
 					"at block [%d/%d/%llu/%c], but we "
 					"want to add a delayed/hole extent "
 					"[%d/%d/%llu/%x]\n",
@@ -721,7 +721,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
 		 */
 		if (es->es_lblk < ee_block ||
 		    ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
-			pr_warn("ES insert assertion failed for inode: %lu "
+			pr_warn("ES insert assertion failed for inode: %llu "
 				"ex_status [%d/%d/%llu/%c] != "
 				"es_status [%d/%d/%llu/%c]\n", inode->i_ino,
 				ee_block, ee_len, ee_start,
@@ -731,7 +731,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
 		}
 
 		if (ee_status ^ es_status) {
-			pr_warn("ES insert assertion failed for inode: %lu "
+			pr_warn("ES insert assertion failed for inode: %llu "
 				"ex_status [%d/%d/%llu/%c] != "
 				"es_status [%d/%d/%llu/%c]\n", inode->i_ino,
 				ee_block, ee_len, ee_start,
@@ -744,7 +744,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
 		 * that we don't want to add an written/unwritten extent.
 		 */
 		if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
-			pr_warn("ES insert assertion failed for inode: %lu "
+			pr_warn("ES insert assertion failed for inode: %llu "
 				"can't find an extent at block %d but we want "
 				"to add a written/unwritten extent "
 				"[%d/%d/%llu/%x]\n", inode->i_ino,
@@ -779,7 +779,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
 			 * We want to add a delayed/hole extent but this
 			 * block has been allocated.
 			 */
-			pr_warn("ES insert assertion failed for inode: %lu "
+			pr_warn("ES insert assertion failed for inode: %llu "
 				"We can find blocks but we want to add a "
 				"delayed/hole extent [%d/%d/%llu/%x]\n",
 				inode->i_ino, es->es_lblk, es->es_len,
@@ -788,13 +788,13 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
 		} else if (ext4_es_is_written(es)) {
 			if (retval != es->es_len) {
 				pr_warn("ES insert assertion failed for "
-					"inode: %lu retval %d != es_len %d\n",
+					"inode: %llu retval %d != es_len %d\n",
 					inode->i_ino, retval, es->es_len);
 				return;
 			}
 			if (map.m_pblk != ext4_es_pblock(es)) {
 				pr_warn("ES insert assertion failed for "
-					"inode: %lu m_pblk %llu != "
+					"inode: %llu m_pblk %llu != "
 					"es_pblk %llu\n",
 					inode->i_ino, map.m_pblk,
 					ext4_es_pblock(es));
@@ -809,7 +809,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
 		}
 	} else if (retval == 0) {
 		if (ext4_es_is_written(es)) {
-			pr_warn("ES insert assertion failed for inode: %lu "
+			pr_warn("ES insert assertion failed for inode: %llu "
 				"We can't find the block but we want to add "
 				"a written extent [%d/%d/%llu/%x]\n",
 				inode->i_ino, es->es_lblk, es->es_len,
@@ -919,7 +919,7 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
 	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
 		return;
 
-	es_debug("add [%u/%u) %llu %x %d to extent status tree of inode %lu\n",
+	es_debug("add [%u/%u) %llu %x %d to extent status tree of inode %llu\n",
 		 lblk, len, pblk, status, delalloc_reserve_used, inode->i_ino);
 
 	if (!len)
@@ -1631,7 +1631,7 @@ void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
 	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
 		return;
 
-	es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
+	es_debug("remove [%u/%u) from extent status tree of inode %llu\n",
 		 lblk, len, inode->i_ino);
 
 	if (!len)
@@ -1821,7 +1821,7 @@ int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "  %lu shrunk objects\n", es_stats->es_stats_shrunk);
 	if (inode_cnt)
 		seq_printf(seq,
-		    "maximum:\n  %lu inode (%u objects, %u reclaimable)\n"
+		    "maximum:\n  %llu inode (%u objects, %u reclaimable)\n"
 		    "  %llu us max scan time\n",
 		    max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
 		    div_u64(es_stats->es_stats_max_scan_time, 1000));
@@ -1998,7 +1998,7 @@ static void ext4_print_pending_tree(struct inode *inode)
 	struct rb_node *node;
 	struct pending_reservation *pr;
 
-	printk(KERN_DEBUG "pending reservations for inode %lu:", inode->i_ino);
+	printk(KERN_DEBUG "pending reservations for inode %llu:", inode->i_ino);
 	tree = &EXT4_I(inode)->i_pending_tree;
 	node = rb_first(&tree->root);
 	while (node) {
@@ -2214,7 +2214,7 @@ void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
 	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
 		return;
 
-	es_debug("add [%u/%u) delayed to extent status tree of inode %lu\n",
+	es_debug("add [%u/%u) delayed to extent status tree of inode %llu\n",
 		 lblk, len, inode->i_ino);
 	if (!len)
 		return;
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index f575751f1cae..379fb66dedbc 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -616,7 +616,7 @@ static int __track_range(handle_t *handle, struct inode *inode, void *arg,
 		(struct __track_range_args *)arg;
 
 	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
-		ext4_debug("Special inode %ld being modified\n", inode->i_ino);
+		ext4_debug("Special inode %llu being modified\n", inode->i_ino);
 		return -ECANCELED;
 	}
 
@@ -914,7 +914,7 @@ static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
 	spin_unlock(&ei->i_fc_lock);
 
 	cur_lblk_off = old_blk_size;
-	ext4_debug("will try writing %d to %d for inode %ld\n",
+	ext4_debug("will try writing %d to %d for inode %llu\n",
 		   cur_lblk_off, new_blk_size, inode->i_ino);
 
 	while (cur_lblk_off <= new_blk_size) {
@@ -1792,7 +1792,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
 
 	cur = start;
 	remaining = len;
-	ext4_debug("ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
+	ext4_debug("ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %llu\n",
 		  start, start_pblk, len, ext4_ext_is_unwritten(ex),
 		  inode->i_ino);
 
@@ -1903,7 +1903,7 @@ ext4_fc_replay_del_range(struct super_block *sb,
 	if (ret)
 		goto out;
 
-	ext4_debug("DEL_RANGE, inode %ld, lblk %d, len %d\n",
+	ext4_debug("DEL_RANGE, inode %llu, lblk %d, len %d\n",
 			inode->i_ino, le32_to_cpu(lrange.fc_lblk),
 			le32_to_cpu(lrange.fc_len));
 	while (remaining > 0) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b20a1bf866ab..628a74b2bbe6 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -253,13 +253,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 		return;
 	}
 	if (icount_read(inode) > 1) {
-		ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d",
+		ext4_msg(sb, KERN_ERR, "%s:%d: inode #%llu: count=%d",
 			 __func__, __LINE__, inode->i_ino,
 			 icount_read(inode));
 		return;
 	}
 	if (inode->i_nlink) {
-		ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n",
+		ext4_msg(sb, KERN_ERR, "%s:%d: inode #%llu: nlink=%d\n",
 			 __func__, __LINE__, inode->i_ino, inode->i_nlink);
 		return;
 	}
@@ -631,7 +631,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 	 *
 	 * So add our directory's i_ino into the starting point for the hash.
 	 */
-	*group = (*group + parent->i_ino) % ngroups;
+	*group = (*group + (unsigned int)parent->i_ino) % ngroups;
 
 	/*
 	 * Use a quadratic hash to find a group with a free inode and some free
@@ -1275,7 +1275,7 @@ got:
 		 * twice.
 		 */
 		err = -EIO;
-		ext4_error(sb, "failed to insert inode %lu: doubly allocated?",
+		ext4_error(sb, "failed to insert inode %llu: doubly allocated?",
 			   inode->i_ino);
 		ext4_mark_group_bitmap_corrupted(sb, group,
 					EXT4_GROUP_INFO_IBITMAP_CORRUPT);
@@ -1344,7 +1344,7 @@ got:
 		goto fail_free_drop;
 	}
 
-	ext4_debug("allocating inode %lu\n", inode->i_ino);
+	ext4_debug("allocating inode %llu\n", inode->i_ino);
 	trace_ext4_allocate_inode(inode, dir, mode);
 	brelse(inode_bitmap_bh);
 	return ret;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index da76353b3a57..5aec759eed70 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -102,7 +102,7 @@ static int ext4_block_to_path(struct inode *inode,
 		offsets[n++] = i_block & (ptrs - 1);
 		final = ptrs;
 	} else {
-		ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
+		ext4_warning(inode->i_sb, "block %lu > max in inode %llu",
 			     i_block + direct_blocks +
 			     indirect_blocks + double_blocks, inode->i_ino);
 	}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 1f6bc05593df..f846fcb7db24 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -119,7 +119,7 @@ int ext4_get_max_inline_size(struct inode *inode)
 	error = ext4_get_inode_loc(inode, &iloc);
 	if (error) {
 		ext4_error_inode_err(inode, __func__, __LINE__, 0, -error,
-				     "can't get inode location %lu",
+				     "can't get inode location %llu",
 				     inode->i_ino);
 		return 0;
 	}
@@ -512,7 +512,7 @@ static int ext4_read_inline_folio(struct inode *inode, struct folio *folio)
 	BUG_ON(folio->index);
 
 	if (!EXT4_I(inode)->i_inline_off) {
-		ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.",
+		ext4_warning(inode->i_sb, "inode %llu doesn't have inline data.",
 			     inode->i_ino);
 		goto out;
 	}
@@ -934,7 +934,7 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
 	struct ext4_dir_entry_2 *de = inline_start;
 	void *dlimit = inline_start + inline_size;
 
-	trace_printk("inode %lu\n", dir->i_ino);
+	trace_printk("inode %llu\n", dir->i_ino);
 	offset = 0;
 	while ((void *)de < dlimit) {
 		de_len = ext4_rec_len_from_disk(de->rec_len, inline_size);
@@ -1071,7 +1071,7 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
 	ret = ext4_create_inline_data(handle, inode, inline_size);
 	if (ret) {
 		ext4_msg(inode->i_sb, KERN_EMERG,
-			"error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)",
+			"error restoring inline_data for inode -- potential data loss! (inode %llu, error %d)",
 			inode->i_ino, ret);
 		return;
 	}
@@ -1740,7 +1740,7 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
 	err = ext4_get_inode_loc(dir, &iloc);
 	if (err) {
 		EXT4_ERROR_INODE_ERR(dir, -err,
-				     "error %d getting inode %lu block",
+				     "error %d getting inode %llu block",
 				     err, dir->i_ino);
 		return false;
 	}
@@ -1755,7 +1755,7 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
 	de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block;
 	if (!le32_to_cpu(de->inode)) {
 		ext4_warning(dir->i_sb,
-			     "bad inline directory (dir #%lu) - no `..'",
+			     "bad inline directory (dir #%llu) - no `..'",
 			     dir->i_ino);
 		goto out;
 	}
@@ -1769,7 +1769,7 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
 					 iloc.bh, inline_pos,
 					 inline_size, offset)) {
 			ext4_warning(dir->i_sb,
-				     "bad inline directory (dir #%lu) - "
+				     "bad inline directory (dir #%llu) - "
 				     "inode %u, rec_len %u, name_len %d"
 				     "inline size %d",
 				     dir->i_ino, le32_to_cpu(de->inode),
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 396dc3a5d16b..d50f31124a78 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -262,7 +262,7 @@ void ext4_evict_inode(struct inode *inode)
 		err = ext4_truncate(inode);
 		if (err) {
 			ext4_error_err(inode->i_sb, -err,
-				       "couldn't truncate inode %lu (err %d)",
+				       "couldn't truncate inode %llu (err %d)",
 				       inode->i_ino, err);
 			goto stop_handle;
 		}
@@ -342,7 +342,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
 	spin_lock(&ei->i_block_reservation_lock);
 	trace_ext4_da_update_reserve_space(inode, used, quota_claim);
 	if (unlikely(used > ei->i_reserved_data_blocks)) {
-		ext4_warning(inode->i_sb, "%s: ino %lu, used %d "
+		ext4_warning(inode->i_sb, "%s: ino %llu, used %d "
 			 "with only %d reserved data blocks",
 			 __func__, inode->i_ino, used,
 			 ei->i_reserved_data_blocks);
@@ -475,7 +475,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
 	if (es_map->m_lblk != map->m_lblk ||
 	    es_map->m_flags != map->m_flags ||
 	    es_map->m_pblk != map->m_pblk) {
-		printk("ES cache assertion failed for inode: %lu "
+		printk("ES cache assertion failed for inode: %llu "
 		       "es_cached ex [%d/%d/%llu/%x] != "
 		       "found ex [%d/%d/%llu/%x] retval %d flags %x\n",
 		       inode->i_ino, es_map->m_lblk, es_map->m_len,
@@ -515,7 +515,7 @@ static int ext4_map_query_blocks_next_in_leaf(handle_t *handle,
 	if (unlikely(retval != map2.m_len)) {
 		ext4_warning(inode->i_sb,
 			     "ES len assertion failed for inode "
-			     "%lu: retval %d != map->m_len %d",
+			     "%llu: retval %d != map->m_len %d",
 			     inode->i_ino, retval, map2.m_len);
 		WARN_ON(1);
 	}
@@ -563,7 +563,7 @@ int ext4_map_query_blocks(handle_t *handle, struct inode *inode,
 	if (unlikely(retval != map->m_len)) {
 		ext4_warning(inode->i_sb,
 			     "ES len assertion failed for inode "
-			     "%lu: retval %d != map->m_len %d",
+			     "%llu: retval %d != map->m_len %d",
 			     inode->i_ino, retval, map->m_len);
 		WARN_ON(1);
 	}
@@ -630,7 +630,7 @@ int ext4_map_create_blocks(handle_t *handle, struct inode *inode,
 
 	if (unlikely(retval != map->m_len)) {
 		ext4_warning(inode->i_sb,
-			     "ES len assertion failed for inode %lu: "
+			     "ES len assertion failed for inode %llu: "
 			     "retval %d != map->m_len %d",
 			     inode->i_ino, retval, map->m_len);
 		WARN_ON(1);
@@ -937,7 +937,7 @@ int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
 {
 	int ret = 0;
 
-	ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n",
+	ext4_debug("ext4_get_block_unwritten: inode %llu, create flag %d\n",
 		   inode->i_ino, create);
 	ret = _ext4_get_block(inode, iblock, bh_result,
 			       EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT);
@@ -1659,7 +1659,7 @@ void ext4_da_release_space(struct inode *inode, int to_free)
 		 * harmless to return without any action.
 		 */
 		ext4_warning(inode->i_sb, "ext4_da_release_space: "
-			 "ino %lu, to_free %d with only %d reserved "
+			 "ino %llu, to_free %d with only %d reserved "
 			 "data blocks", inode->i_ino, to_free,
 			 ei->i_reserved_data_blocks);
 		WARN_ON(1);
@@ -2491,7 +2491,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
 			}
 			ext4_msg(sb, KERN_CRIT,
 				 "Delayed block allocation failed for "
-				 "inode %lu at logical offset %llu with"
+				 "inode %llu at logical offset %llu with"
 				 " max blocks %u with error %d",
 				 inode->i_ino,
 				 (unsigned long long)map->m_lblk,
@@ -2535,7 +2535,7 @@ update_disksize:
 		err2 = ext4_mark_inode_dirty(handle, inode);
 		if (err2) {
 			ext4_error_err(inode->i_sb, -err2,
-				       "Failed to mark inode %lu dirty",
+				       "Failed to mark inode %llu dirty",
 				       inode->i_ino);
 		}
 		if (!err)
@@ -2909,7 +2909,7 @@ retry:
 		if (IS_ERR(handle)) {
 			ret = PTR_ERR(handle);
 			ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
-			       "%ld pages, ino %lu; err %d", __func__,
+			       "%ld pages, ino %llu; err %d", __func__,
 				wbc->nr_to_write, inode->i_ino, ret);
 			/* Release allocated io_end */
 			ext4_put_io_end(mpd->io_submit.io_end);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 3ae9cb50a0c0..1d0c3d4bdf47 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -477,7 +477,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
 	if (err < 0) {
 		/* No need to update quota information. */
 		ext4_warning(inode->i_sb,
-			"couldn't mark inode #%lu dirty (err %d)",
+			"couldn't mark inode #%llu dirty (err %d)",
 			inode->i_ino, err);
 		/* Revert all changes: */
 		swap_inode_data(inode, inode_bl);
@@ -493,7 +493,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
 	if (err < 0) {
 		/* No need to update quota information. */
 		ext4_warning(inode_bl->i_sb,
-			"couldn't mark inode #%lu dirty (err %d)",
+			"couldn't mark inode #%llu dirty (err %d)",
 			inode_bl->i_ino, err);
 		goto revert;
 	}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 20e9fdaf4301..9e8041ac5623 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2266,7 +2266,7 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
 	folio_get(ac->ac_buddy_folio);
 	/* store last allocated for subsequent stream allocation */
 	if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
-		int hash = ac->ac_inode->i_ino % sbi->s_mb_nr_global_goals;
+		int hash = (unsigned int)ac->ac_inode->i_ino % sbi->s_mb_nr_global_goals;
 
 		WRITE_ONCE(sbi->s_mb_last_groups[hash], ac->ac_f_ex.fe_group);
 	}
@@ -3032,7 +3032,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 
 	/* if stream allocation is enabled, use global goal */
 	if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
-		int hash = ac->ac_inode->i_ino % sbi->s_mb_nr_global_goals;
+		int hash = (unsigned int)ac->ac_inode->i_ino % sbi->s_mb_nr_global_goals;
 
 		ac->ac_g_ex.fe_group = READ_ONCE(sbi->s_mb_last_groups[hash]);
 		ac->ac_g_ex.fe_start = -1;
@@ -5628,7 +5628,7 @@ void ext4_discard_preallocations(struct inode *inode)
 	if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
 		return;
 
-	mb_debug(sb, "discard preallocation for inode %lu\n",
+	mb_debug(sb, "discard preallocation for inode %llu\n",
 		 inode->i_ino);
 	trace_ext4_discard_preallocations(inode,
 			atomic_read(&ei->i_prealloc_active));
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 96ab95167bd6..477d43d7e294 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -455,7 +455,7 @@ int ext4_ext_migrate(struct inode *inode)
 	 * log, so disable fast commits for this transaction.
 	 */
 	ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_MIGRATE, handle);
-	goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
+	goal = ((((u32)inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
 		EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
 	owner[0] = i_uid_read(inode);
 	owner[1] = i_gid_read(inode);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index ce1f738dff93..ab17c1d3a7b5 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -420,21 +420,21 @@ static int mext_check_validity(struct inode *orig_inode,
 
 	/* origin and donor should be different inodes */
 	if (orig_inode == donor_inode) {
-		ext4_debug("ext4 move extent: The argument files should not be same inode [ino:orig %lu, donor %lu]\n",
+		ext4_debug("ext4 move extent: The argument files should not be same inode [ino:orig %llu, donor %llu]\n",
 			   orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
 
 	/* origin and donor should belone to the same filesystem */
 	if (orig_inode->i_sb != donor_inode->i_sb) {
-		ext4_debug("ext4 move extent: The argument files should be in same FS [ino:orig %lu, donor %lu]\n",
+		ext4_debug("ext4 move extent: The argument files should be in same FS [ino:orig %llu, donor %llu]\n",
 			   orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
 
 	/* Regular file check */
 	if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
-		ext4_debug("ext4 move extent: The argument files should be regular file [ino:orig %lu, donor %lu]\n",
+		ext4_debug("ext4 move extent: The argument files should be regular file [ino:orig %llu, donor %llu]\n",
 			   orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
@@ -477,26 +477,26 @@ static int mext_check_validity(struct inode *orig_inode,
 	}
 
 	if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
-		ext4_debug("ext4 move extent: suid or sgid is set to donor file [ino:orig %lu, donor %lu]\n",
+		ext4_debug("ext4 move extent: suid or sgid is set to donor file [ino:orig %llu, donor %llu]\n",
 			   orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
 
 	if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode)) {
-		ext4_debug("ext4 move extent: donor should not be immutable or append file [ino:orig %lu, donor %lu]\n",
+		ext4_debug("ext4 move extent: donor should not be immutable or append file [ino:orig %llu, donor %llu]\n",
 			   orig_inode->i_ino, donor_inode->i_ino);
 		return -EPERM;
 	}
 
 	/* Ext4 move extent does not support swap files */
 	if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
-		ext4_debug("ext4 move extent: The argument files should not be swap files [ino:orig %lu, donor %lu]\n",
+		ext4_debug("ext4 move extent: The argument files should not be swap files [ino:orig %llu, donor %llu]\n",
 			   orig_inode->i_ino, donor_inode->i_ino);
 		return -ETXTBSY;
 	}
 
 	if (ext4_is_quota_file(orig_inode) || ext4_is_quota_file(donor_inode)) {
-		ext4_debug("ext4 move extent: The argument files should not be quota files [ino:orig %lu, donor %lu]\n",
+		ext4_debug("ext4 move extent: The argument files should not be quota files [ino:orig %llu, donor %llu]\n",
 			   orig_inode->i_ino, donor_inode->i_ino);
 		return -EOPNOTSUPP;
 	}
@@ -523,7 +523,7 @@ static int mext_check_adjust_range(struct inode *orig_inode,
 	/* Start offset should be same */
 	if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
 	    (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
-		ext4_debug("ext4 move extent: orig and donor's start offsets are not aligned [ino:orig %lu, donor %lu]\n",
+		ext4_debug("ext4 move extent: orig and donor's start offsets are not aligned [ino:orig %llu, donor %llu]\n",
 			   orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
@@ -533,7 +533,7 @@ static int mext_check_adjust_range(struct inode *orig_inode,
 	    (*len > EXT_MAX_BLOCKS) ||
 	    (donor_start + *len >= EXT_MAX_BLOCKS) ||
 	    (orig_start + *len >= EXT_MAX_BLOCKS))  {
-		ext4_debug("ext4 move extent: Can't handle over [%u] blocks [ino:orig %lu, donor %lu]\n",
+		ext4_debug("ext4 move extent: Can't handle over [%u] blocks [ino:orig %llu, donor %llu]\n",
 			   EXT_MAX_BLOCKS,
 			   orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
@@ -550,7 +550,7 @@ static int mext_check_adjust_range(struct inode *orig_inode,
 	else if (donor_eof < donor_start + *len - 1)
 		*len = donor_eof - donor_start;
 	if (!*len) {
-		ext4_debug("ext4 move extent: len should not be 0 [ino:orig %lu, donor %lu]\n",
+		ext4_debug("ext4 move extent: len should not be 0 [ino:orig %llu, donor %llu]\n",
 			   orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index c4b5e252af0e..503dc9ffd614 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -144,7 +144,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
 		bh = ext4_bread(NULL, inode, block, 0);
 	if (IS_ERR(bh)) {
 		__ext4_warning(inode->i_sb, func, line,
-			       "inode #%lu: lblock %lu: comm %s: "
+			       "inode #%llu: lblock %lu: comm %s: "
 			       "error %ld reading directory block",
 			       inode->i_ino, (unsigned long)block,
 			       current->comm, PTR_ERR(bh));
@@ -841,7 +841,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 	indirect = root->info.indirect_levels;
 	if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
 		ext4_warning(dir->i_sb,
-			     "Directory (ino: %lu) htree depth %#06x exceed"
+			     "Directory (ino: %llu) htree depth %#06x exceed"
 			     "supported value", dir->i_ino,
 			     ext4_dir_htree_level(dir->i_sb));
 		if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
@@ -1793,7 +1793,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
 		    (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
 		    !fscrypt_has_permitted_context(dir, inode)) {
 			ext4_warning(inode->i_sb,
-				     "Inconsistent encryption contexts: %lu/%lu",
+				     "Inconsistent encryption contexts: %llu/%llu",
 				     dir->i_ino, inode->i_ino);
 			iput(inode);
 			return ERR_PTR(-EPERM);
@@ -2227,7 +2227,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
 		csum_size = sizeof(struct ext4_dir_entry_tail);
 
 	blocksize =  dir->i_sb->s_blocksize;
-	dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
+	dxtrace(printk(KERN_DEBUG "Creating index: inode %llu\n", dir->i_ino));
 	BUFFER_TRACE(bh, "get_write_access");
 	retval = ext4_journal_get_write_access(handle, dir->i_sb, bh,
 					       EXT4_JTR_NONE);
@@ -2523,7 +2523,7 @@ again:
 			restart = 1;
 		}
 		if (add_level && levels == ext4_dir_htree_level(sb)) {
-			ext4_warning(sb, "Directory (ino: %lu) index full, "
+			ext4_warning(sb, "Directory (ino: %llu) index full, "
 					 "reach max htree level :%d",
 					 dir->i_ino, levels);
 			if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c
index c0022f0bff87..64ea47624233 100644
--- a/fs/ext4/orphan.c
+++ b/fs/ext4/orphan.c
@@ -179,8 +179,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 	} else
 		brelse(iloc.bh);
 
-	ext4_debug("superblock will point to %lu\n", inode->i_ino);
-	ext4_debug("orphan inode %lu will point to %d\n",
+	ext4_debug("superblock will point to %llu\n", inode->i_ino);
+	ext4_debug("orphan inode %llu will point to %d\n",
 			inode->i_ino, NEXT_ORPHAN(inode));
 out:
 	ext4_std_error(sb, err);
@@ -249,7 +249,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
 	}
 
 	mutex_lock(&sbi->s_orphan_lock);
-	ext4_debug("remove inode %lu from orphan list\n", inode->i_ino);
+	ext4_debug("remove inode %llu from orphan list\n", inode->i_ino);
 
 	prev = ei->i_orphan.prev;
 	list_del_init(&ei->i_orphan);
@@ -284,7 +284,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
 		struct inode *i_prev =
 			&list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
 
-		ext4_debug("orphan inode %lu will point to %u\n",
+		ext4_debug("orphan inode %llu will point to %u\n",
 			  i_prev->i_ino, ino_next);
 		err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
 		if (err) {
@@ -328,9 +328,9 @@ static void ext4_process_orphan(struct inode *inode,
 	if (inode->i_nlink) {
 		if (test_opt(sb, DEBUG))
 			ext4_msg(sb, KERN_DEBUG,
-				"%s: truncating inode %lu to %lld bytes",
+				"%s: truncating inode %llu to %lld bytes",
 				__func__, inode->i_ino, inode->i_size);
-		ext4_debug("truncating inode %lu to %lld bytes\n",
+		ext4_debug("truncating inode %llu to %lld bytes\n",
 			   inode->i_ino, inode->i_size);
 		inode_lock(inode);
 		truncate_inode_pages(inode->i_mapping, inode->i_size);
@@ -349,9 +349,9 @@ static void ext4_process_orphan(struct inode *inode,
 	} else {
 		if (test_opt(sb, DEBUG))
 			ext4_msg(sb, KERN_DEBUG,
-				"%s: deleting unreferenced inode %lu",
+				"%s: deleting unreferenced inode %llu",
 				__func__, inode->i_ino);
-		ext4_debug("deleting unreferenced inode %lu\n",
+		ext4_debug("deleting unreferenced inode %llu\n",
 			   inode->i_ino);
 		(*nr_orphans)++;
 	}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index a8c95eee91b7..86011275ad83 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -180,7 +180,7 @@ static int ext4_end_io_end(ext4_io_end_t *io_end)
 	struct super_block *sb = inode->i_sb;
 	int ret = 0;
 
-	ext4_debug("ext4_end_io_nolock: io_end 0x%p from inode %lu,list->next 0x%p,"
+	ext4_debug("ext4_end_io_nolock: io_end 0x%p from inode %llu,list->next 0x%p,"
 		   "list->prev 0x%p\n",
 		   io_end, inode->i_ino, io_end->list.next, io_end->list.prev);
 
@@ -204,7 +204,7 @@ static int ext4_end_io_end(ext4_io_end_t *io_end)
 		ext4_msg(sb, KERN_EMERG,
 			 "failed to convert unwritten extents to written "
 			 "extents -- potential data loss!  "
-			 "(inode %lu, error %d)", inode->i_ino, ret);
+			 "(inode %llu, error %d)", inode->i_ino, ret);
 	}
 
 	ext4_clear_io_unwritten_flag(io_end);
@@ -221,7 +221,7 @@ static void dump_completed_IO(struct inode *inode, struct list_head *head)
 	if (list_empty(head))
 		return;
 
-	ext4_debug("Dump inode %lu completed io list\n", inode->i_ino);
+	ext4_debug("Dump inode %llu completed io list\n", inode->i_ino);
 	list_for_each_entry(io_end, head, list) {
 		cur = &io_end->list;
 		before = cur->prev;
@@ -229,7 +229,7 @@ static void dump_completed_IO(struct inode *inode, struct list_head *head)
 		after = cur->next;
 		io_end1 = container_of(after, ext4_io_end_t, list);
 
-		ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
+		ext4_debug("io 0x%p from inode %llu,prev 0x%p,next 0x%p\n",
 			    io_end, inode->i_ino, io_end0, io_end1);
 	}
 #endif
@@ -366,7 +366,7 @@ static void ext4_end_bio(struct bio *bio)
 	if (bio->bi_status) {
 		struct inode *inode = io_end->inode;
 
-		ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
+		ext4_warning(inode->i_sb, "I/O error %d writing to inode %llu "
 			     "starting block %llu)",
 			     bio->bi_status, inode->i_ino,
 			     (unsigned long long)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 43f680c750ae..781c083000c2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -848,12 +848,12 @@ void __ext4_error_inode(struct inode *inode, const char *function,
 		vaf.va = &args;
 		if (block)
 			printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
-			       "inode #%lu: block %llu: comm %s: %pV\n",
+			       "inode #%llu: block %llu: comm %s: %pV\n",
 			       inode->i_sb->s_id, function, line, inode->i_ino,
 			       block, current->comm, &vaf);
 		else
 			printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
-			       "inode #%lu: comm %s: %pV\n",
+			       "inode #%llu: comm %s: %pV\n",
 			       inode->i_sb->s_id, function, line, inode->i_ino,
 			       current->comm, &vaf);
 		va_end(args);
@@ -888,13 +888,13 @@ void __ext4_error_file(struct file *file, const char *function,
 		vaf.va = &args;
 		if (block)
 			printk(KERN_CRIT
-			       "EXT4-fs error (device %s): %s:%d: inode #%lu: "
+			       "EXT4-fs error (device %s): %s:%d: inode #%llu: "
 			       "block %llu: comm %s: path %s: %pV\n",
 			       inode->i_sb->s_id, function, line, inode->i_ino,
 			       block, current->comm, path, &vaf);
 		else
 			printk(KERN_CRIT
-			       "EXT4-fs error (device %s): %s:%d: inode #%lu: "
+			       "EXT4-fs error (device %s): %s:%d: inode #%llu: "
 			       "comm %s: path %s: %pV\n",
 			       inode->i_sb->s_id, function, line, inode->i_ino,
 			       current->comm, path, &vaf);
@@ -1035,14 +1035,14 @@ void __ext4_warning_inode(const struct inode *inode, const char *function,
 	vaf.fmt = fmt;
 	vaf.va = &args;
 	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
-	       "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
+	       "inode #%llu: comm %s: %pV\n", inode->i_sb->s_id,
 	       function, line, inode->i_ino, current->comm, &vaf);
 	va_end(args);
 }
 
 void __ext4_grp_locked_error(const char *function, unsigned int line,
 			     struct super_block *sb, ext4_group_t grp,
-			     unsigned long ino, ext4_fsblk_t block,
+			     u64 ino, ext4_fsblk_t block,
 			     const char *fmt, ...)
 __releases(bitlock)
 __acquires(bitlock)
@@ -1061,7 +1061,7 @@ __acquires(bitlock)
 		printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
 		       sb->s_id, function, line, grp);
 		if (ino)
-			printk(KERN_CONT "inode %lu: ", ino);
+			printk(KERN_CONT "inode %llu: ", ino);
 		if (block)
 			printk(KERN_CONT "block %llu:",
 			       (unsigned long long) block);
@@ -1170,7 +1170,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 	list_for_each(l, &sbi->s_orphan) {
 		struct inode *inode = orphan_list_entry(l);
 		printk(KERN_ERR "  "
-		       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
+		       "inode %s:%llu at %p: mode %o, nlink %d, next %d\n",
 		       inode->i_sb->s_id, inode->i_ino, inode,
 		       inode->i_mode, inode->i_nlink,
 		       NEXT_ORPHAN(inode));
@@ -1446,7 +1446,7 @@ static void ext4_free_in_core_inode(struct inode *inode)
 {
 	fscrypt_free_inode(inode);
 	if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
-		pr_warn("%s: inode %ld still in fc list",
+		pr_warn("%s: inode %llu still in fc list",
 			__func__, inode->i_ino);
 	}
 	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
@@ -1456,7 +1456,7 @@ static void ext4_destroy_inode(struct inode *inode)
 {
 	if (ext4_inode_orphan_tracked(inode)) {
 		ext4_msg(inode->i_sb, KERN_ERR,
-			 "Inode %lu (%p): inode tracked as orphan!",
+			 "Inode %llu (%p): inode tracked as orphan!",
 			 inode->i_ino, EXT4_I(inode));
 		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 				EXT4_I(inode), sizeof(struct ext4_inode_info),
@@ -1467,7 +1467,7 @@ static void ext4_destroy_inode(struct inode *inode)
 	if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ERROR_FS) &&
 	    WARN_ON_ONCE(EXT4_I(inode)->i_reserved_data_blocks))
 		ext4_msg(inode->i_sb, KERN_ERR,
-			 "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
+			 "Inode %llu (%p): i_reserved_data_blocks (%u) not cleared!",
 			 inode->i_ino, EXT4_I(inode),
 			 EXT4_I(inode)->i_reserved_data_blocks);
 }
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 7bf9ba19a89d..60aec4712f7f 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -64,7 +64,7 @@
 
 #ifdef EXT4_XATTR_DEBUG
 # define ea_idebug(inode, fmt, ...)					\
-	printk(KERN_DEBUG "inode %s:%lu: " fmt "\n",			\
+	printk(KERN_DEBUG "inode %s:%llu: " fmt "\n",			\
 	       inode->i_sb->s_id, inode->i_ino, ##__VA_ARGS__)
 # define ea_bdebug(bh, fmt, ...)					\
 	printk(KERN_DEBUG "block %pg:%lu: " fmt "\n",			\
@@ -1035,7 +1035,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
 	ref_count = ext4_xattr_inode_get_ref(ea_inode);
 	if ((ref_count == 0 && ref_change < 0) || (ref_count == U64_MAX && ref_change > 0)) {
 		ext4_error_inode(ea_inode, __func__, __LINE__, 0,
-			"EA inode %lu ref wraparound: ref_count=%lld ref_change=%d",
+			"EA inode %llu ref wraparound: ref_count=%lld ref_change=%d",
 			ea_inode->i_ino, ref_count, ref_change);
 		brelse(iloc.bh);
 		ret = -EFSCORRUPTED;
@@ -1046,7 +1046,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
 
 	if (ref_change > 0) {
 		if (ref_count == 1) {
-			WARN_ONCE(ea_inode->i_nlink, "EA inode %lu i_nlink=%u",
+			WARN_ONCE(ea_inode->i_nlink, "EA inode %llu i_nlink=%u",
 				  ea_inode->i_ino, ea_inode->i_nlink);
 
 			set_nlink(ea_inode, 1);
@@ -1055,7 +1055,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
 	} else {
 		if (ref_count == 0) {
 			WARN_ONCE(ea_inode->i_nlink != 1,
-				  "EA inode %lu i_nlink=%u",
+				  "EA inode %llu i_nlink=%u",
 				  ea_inode->i_ino, ea_inode->i_nlink);
 
 			clear_nlink(ea_inode);
@@ -2854,7 +2854,7 @@ shift:
 
 cleanup:
 	if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) {
-		ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.",
+		ext4_warning(inode->i_sb, "Unable to expand inode %llu. Delete some EAs or run e2fsck.",
 			     inode->i_ino);
 		mnt_count = le16_to_cpu(sbi->s_es->s_mnt_count);
 	}
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 8c76400ba631..0b8be500db65 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -773,7 +773,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task)
 			if (!is_inode_flag_set(dic->inode, FI_COMPRESS_CORRUPT)) {
 				set_inode_flag(dic->inode, FI_COMPRESS_CORRUPT);
 				f2fs_info_ratelimited(sbi,
-					"checksum invalid, nid = %lu, %x vs %x",
+					"checksum invalid, nid = %llu, %x vs %x",
 					dic->inode->i_ino,
 					provided, calculated);
 			}
@@ -932,7 +932,7 @@ bool f2fs_sanity_check_cluster(struct dnode_of_data *dn)
 
 	return false;
 out:
-	f2fs_warn(sbi, "access invalid cluster, ino:%lu, nid:%u, ofs_in_node:%u, reason:%s",
+	f2fs_warn(sbi, "access invalid cluster, ino:%llu, nid:%u, ofs_in_node:%u, reason:%s",
 			dn->inode->i_ino, dn->nid, dn->ofs_in_node, reason);
 	set_sbi_flag(sbi, SBI_NEED_FSCK);
 	return true;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index f70092e231f0..38802ee2e40d 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -368,7 +368,7 @@ start_find_entry:
 
 	max_depth = F2FS_I(dir)->i_current_depth;
 	if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
-		f2fs_warn(F2FS_I_SB(dir), "Corrupted max_depth of %lu: %u",
+		f2fs_warn(F2FS_I_SB(dir), "Corrupted max_depth of %llu: %u",
 			  dir->i_ino, max_depth);
 		max_depth = MAX_DIR_HASH_DEPTH;
 		f2fs_i_depth_write(dir, max_depth);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 0ed84cc065a7..d73aeef333a2 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -34,7 +34,7 @@ bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio)
 	if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) ||
 	    !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1,
 					DATA_GENERIC_ENHANCE)) {
-		f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
+		f2fs_warn(sbi, "%s: inode (ino=%llx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
 			  __func__, inode->i_ino,
 			  ei.blk, ei.fofs, ei.len);
 		return false;
@@ -50,14 +50,14 @@ bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio)
 
 		if (devi == 0) {
 			f2fs_warn(sbi,
-			    "%s: inode (ino=%lx) is an alias of meta device",
+			    "%s: inode (ino=%llx) is an alias of meta device",
 			    __func__, inode->i_ino);
 			return false;
 		}
 
 		if (bdev_is_zoned(FDEV(devi).bdev)) {
 			f2fs_warn(sbi,
-			    "%s: device alias inode (ino=%lx)'s extent info "
+			    "%s: device alias inode (ino=%llx)'s extent info "
 			    "[%u, %u, %u] maps to zoned block device",
 			    __func__, inode->i_ino, ei.blk, ei.fofs, ei.len);
 			return false;
@@ -65,7 +65,7 @@ bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio)
 		return true;
 	}
 
-	f2fs_warn(sbi, "%s: device alias inode (ino=%lx)'s extent info "
+	f2fs_warn(sbi, "%s: device alias inode (ino=%llx)'s extent info "
 			"[%u, %u, %u] is inconsistent w/ any devices",
 			__func__, inode->i_ino, ei.blk, ei.fofs, ei.len);
 	return false;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index bb34e864d0ef..760e6d80bbdd 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2706,7 +2706,7 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
 
 	spin_lock(&sbi->stat_lock);
 	if (unlikely(sbi->total_valid_block_count < count)) {
-		f2fs_warn(sbi, "Inconsistent total_valid_block_count:%u, ino:%lu, count:%u",
+		f2fs_warn(sbi, "Inconsistent total_valid_block_count:%u, ino:%llu, count:%u",
 			  sbi->total_valid_block_count, inode->i_ino, count);
 		sbi->total_valid_block_count = 0;
 		set_sbi_flag(sbi, SBI_NEED_FSCK);
@@ -2719,7 +2719,7 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
 					sbi->current_reserved_blocks + count);
 	spin_unlock(&sbi->stat_lock);
 	if (unlikely(inode->i_blocks < sectors)) {
-		f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu",
+		f2fs_warn(sbi, "Inconsistent i_blocks, ino:%llu, iblocks:%llu, sectors:%llu",
 			  inode->i_ino,
 			  (unsigned long long)inode->i_blocks,
 			  (unsigned long long)sectors);
@@ -2993,7 +2993,7 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
 		dquot_free_inode(inode);
 	} else {
 		if (unlikely(inode->i_blocks == 0)) {
-			f2fs_warn(sbi, "dec_valid_node_count: inconsistent i_blocks, ino:%lu, iblocks:%llu",
+			f2fs_warn(sbi, "dec_valid_node_count: inconsistent i_blocks, ino:%llu, iblocks:%llu",
 				  inode->i_ino,
 				  (unsigned long long)inode->i_blocks);
 			set_sbi_flag(sbi, SBI_NEED_FSCK);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c8a2f17a8f11..a7957e03ee03 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1917,7 +1917,7 @@ next_alloc:
 				f2fs_up_write(&sbi->pin_sem);
 				err = -ENOSPC;
 				f2fs_warn_ratelimited(sbi,
-					"ino:%lu, start:%lu, end:%lu, need to trigger GC to "
+					"ino:%llu, start:%lu, end:%lu, need to trigger GC to "
 					"reclaim enough free segment when checkpoint is enabled",
 					inode->i_ino, pg_start, pg_end);
 				goto out_err;
@@ -2307,7 +2307,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
 	 * f2fs_is_atomic_file.
 	 */
 	if (get_dirty_pages(inode))
-		f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u",
+		f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%llu, npages=%u",
 			  inode->i_ino, get_dirty_pages(inode));
 	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
 	if (ret)
@@ -3494,7 +3494,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
 		return -EINVAL;
 
 	if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) {
-		f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials",
+		f2fs_warn(sbi, "%s: Enable GC = ino %llx after %x GC trials",
 			  __func__, inode->i_ino, fi->i_gc_failures);
 		clear_inode_flag(inode, FI_PIN_FILE);
 		return -EAGAIN;
@@ -3679,7 +3679,7 @@ static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
 
 	if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) {
 		f2fs_warn(F2FS_I_SB(inode),
-			  "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem",
+			  "Can't enable fs-verity on inode %llu: the verity feature is not enabled on this filesystem",
 			  inode->i_ino);
 		return -EOPNOTSUPP;
 	}
@@ -3950,7 +3950,7 @@ out:
 	} else if (released_blocks &&
 			atomic_read(&fi->i_compr_blocks)) {
 		set_sbi_flag(sbi, SBI_NEED_FSCK);
-		f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
+		f2fs_warn(sbi, "%s: partial blocks were released i_ino=%llx "
 			"iblocks=%llu, released=%u, compr_blocks=%u, "
 			"run fsck to fix.",
 			__func__, inode->i_ino, inode->i_blocks,
@@ -4133,7 +4133,7 @@ unlock_inode:
 	} else if (reserved_blocks &&
 			atomic_read(&fi->i_compr_blocks)) {
 		set_sbi_flag(sbi, SBI_NEED_FSCK);
-		f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx "
+		f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%llx "
 			"iblocks=%llu, reserved=%u, compr_blocks=%u, "
 			"run fsck to fix.",
 			__func__, inode->i_ino, inode->i_blocks,
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index f46b2673d31f..c0c8a1056d6b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1622,7 +1622,7 @@ next_step:
 				iput(inode);
 				set_sbi_flag(sbi, SBI_NEED_FSCK);
 				f2fs_err_ratelimited(sbi,
-					"inode %lx has both inline_data flag and "
+					"inode %llu has both inline_data flag and "
 					"data block, nid=%u, ofs_in_node=%u",
 					inode->i_ino, dni.nid, ofs_in_node);
 				continue;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 0a1052d5ee62..2669439b9413 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -176,7 +176,7 @@ int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio)
 	if (unlikely(dn->data_blkaddr != NEW_ADDR)) {
 		f2fs_put_dnode(dn);
 		set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
-		f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
+		f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%llu, i_addr[0]:0x%x, run fsck to fix.",
 			  __func__, dn->inode->i_ino, dn->data_blkaddr);
 		f2fs_handle_error(fio.sbi, ERROR_INVALID_BLKADDR);
 		return -EFSCORRUPTED;
@@ -431,7 +431,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct folio *ifolio,
 	if (unlikely(dn.data_blkaddr != NEW_ADDR)) {
 		f2fs_put_dnode(&dn);
 		set_sbi_flag(F2FS_F_SB(folio), SBI_NEED_FSCK);
-		f2fs_warn(F2FS_F_SB(folio), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
+		f2fs_warn(F2FS_F_SB(folio), "%s: corrupted inline inode ino=%llu, i_addr[0]:0x%x, run fsck to fix.",
 			  __func__, dir->i_ino, dn.data_blkaddr);
 		f2fs_handle_error(F2FS_F_SB(folio), ERROR_INVALID_BLKADDR);
 		err = -EFSCORRUPTED;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index e0f850b3f0c3..f27198d6695b 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -203,14 +203,14 @@ static bool sanity_check_compress_inode(struct inode *inode,
 
 	if (ri->i_compress_algorithm >= COMPRESS_MAX) {
 		f2fs_warn(sbi,
-			"%s: inode (ino=%lx) has unsupported compress algorithm: %u, run fsck to fix",
+			"%s: inode (ino=%llx) has unsupported compress algorithm: %u, run fsck to fix",
 			__func__, inode->i_ino, ri->i_compress_algorithm);
 		return false;
 	}
 	if (le64_to_cpu(ri->i_compr_blocks) >
 			SECTOR_TO_BLOCK(inode->i_blocks)) {
 		f2fs_warn(sbi,
-			"%s: inode (ino=%lx) has inconsistent i_compr_blocks:%llu, i_blocks:%llu, run fsck to fix",
+			"%s: inode (ino=%llx) has inconsistent i_compr_blocks:%llu, i_blocks:%llu, run fsck to fix",
 			__func__, inode->i_ino, le64_to_cpu(ri->i_compr_blocks),
 			SECTOR_TO_BLOCK(inode->i_blocks));
 		return false;
@@ -218,7 +218,7 @@ static bool sanity_check_compress_inode(struct inode *inode,
 	if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
 		ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) {
 		f2fs_warn(sbi,
-			"%s: inode (ino=%lx) has unsupported log cluster size: %u, run fsck to fix",
+			"%s: inode (ino=%llx) has unsupported log cluster size: %u, run fsck to fix",
 			__func__, inode->i_ino, ri->i_log_cluster_size);
 		return false;
 	}
@@ -262,7 +262,7 @@ static bool sanity_check_compress_inode(struct inode *inode,
 
 	return true;
 err_level:
-	f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported compress level: %u, run fsck to fix",
+	f2fs_warn(sbi, "%s: inode (ino=%llx) has unsupported compress level: %u, run fsck to fix",
 		  __func__, inode->i_ino, clevel);
 	return false;
 }
@@ -276,40 +276,40 @@ static bool sanity_check_inode(struct inode *inode, struct folio *node_folio)
 
 	iblocks = le64_to_cpu(F2FS_INODE(node_folio)->i_blocks);
 	if (!iblocks) {
-		f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.",
+		f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%llx iblocks=%llu, run fsck to fix.",
 			  __func__, inode->i_ino, iblocks);
 		return false;
 	}
 
 	if (ino_of_node(node_folio) != nid_of_node(node_folio)) {
-		f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.",
+		f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%llx, ino,nid: [%u, %u] run fsck to fix.",
 			  __func__, inode->i_ino,
 			  ino_of_node(node_folio), nid_of_node(node_folio));
 		return false;
 	}
 
 	if (ino_of_node(node_folio) == fi->i_xattr_nid) {
-		f2fs_warn(sbi, "%s: corrupted inode i_ino=%lx, xnid=%x, run fsck to fix.",
+		f2fs_warn(sbi, "%s: corrupted inode i_ino=%llx, xnid=%x, run fsck to fix.",
 			  __func__, inode->i_ino, fi->i_xattr_nid);
 		return false;
 	}
 
 	if (S_ISDIR(inode->i_mode) && unlikely(inode->i_nlink == 1)) {
-		f2fs_warn(sbi, "%s: directory inode (ino=%lx) has a single i_nlink",
+		f2fs_warn(sbi, "%s: directory inode (ino=%llx) has a single i_nlink",
 			  __func__, inode->i_ino);
 		return false;
 	}
 
 	if (f2fs_has_extra_attr(inode)) {
 		if (!f2fs_sb_has_extra_attr(sbi)) {
-			f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off",
+			f2fs_warn(sbi, "%s: inode (ino=%llx) is with extra_attr, but extra_attr feature is off",
 				  __func__, inode->i_ino);
 			return false;
 		}
 		if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
 			fi->i_extra_isize < F2FS_MIN_EXTRA_ATTR_SIZE ||
 			fi->i_extra_isize % sizeof(__le32)) {
-			f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu",
+			f2fs_warn(sbi, "%s: inode (ino=%llx) has corrupted i_extra_isize: %d, max: %zu",
 				  __func__, inode->i_ino, fi->i_extra_isize,
 				  F2FS_TOTAL_EXTRA_ATTR_SIZE);
 			return false;
@@ -327,7 +327,7 @@ static bool sanity_check_inode(struct inode *inode, struct folio *node_folio)
 		f2fs_has_inline_xattr(inode) &&
 		(fi->i_inline_xattr_size < MIN_INLINE_XATTR_SIZE ||
 		fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) {
-		f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, min: %zu, max: %lu",
+		f2fs_warn(sbi, "%s: inode (ino=%llx) has corrupted i_inline_xattr_size: %d, min: %zu, max: %lu",
 			  __func__, inode->i_ino, fi->i_inline_xattr_size,
 			  MIN_INLINE_XATTR_SIZE, MAX_INLINE_XATTR_SIZE);
 		return false;
@@ -335,64 +335,64 @@ static bool sanity_check_inode(struct inode *inode, struct folio *node_folio)
 
 	if (!f2fs_sb_has_extra_attr(sbi)) {
 		if (f2fs_sb_has_project_quota(sbi)) {
-			f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
+			f2fs_warn(sbi, "%s: corrupted inode ino=%llx, wrong feature flag: %u, run fsck to fix.",
 				  __func__, inode->i_ino, F2FS_FEATURE_PRJQUOTA);
 			return false;
 		}
 		if (f2fs_sb_has_inode_chksum(sbi)) {
-			f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
+			f2fs_warn(sbi, "%s: corrupted inode ino=%llx, wrong feature flag: %u, run fsck to fix.",
 				  __func__, inode->i_ino, F2FS_FEATURE_INODE_CHKSUM);
 			return false;
 		}
 		if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
-			f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
+			f2fs_warn(sbi, "%s: corrupted inode ino=%llx, wrong feature flag: %u, run fsck to fix.",
 				  __func__, inode->i_ino, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR);
 			return false;
 		}
 		if (f2fs_sb_has_inode_crtime(sbi)) {
-			f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
+			f2fs_warn(sbi, "%s: corrupted inode ino=%llx, wrong feature flag: %u, run fsck to fix.",
 				  __func__, inode->i_ino, F2FS_FEATURE_INODE_CRTIME);
 			return false;
 		}
 		if (f2fs_sb_has_compression(sbi)) {
-			f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
+			f2fs_warn(sbi, "%s: corrupted inode ino=%llx, wrong feature flag: %u, run fsck to fix.",
 				  __func__, inode->i_ino, F2FS_FEATURE_COMPRESSION);
 			return false;
 		}
 	}
 
 	if (f2fs_sanity_check_inline_data(inode, node_folio)) {
-		f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix",
+		f2fs_warn(sbi, "%s: inode (ino=%llx, mode=%u) should not have inline_data, run fsck to fix",
 			  __func__, inode->i_ino, inode->i_mode);
 		return false;
 	}
 
 	if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) {
-		f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_dentry, run fsck to fix",
+		f2fs_warn(sbi, "%s: inode (ino=%llx, mode=%u) should not have inline_dentry, run fsck to fix",
 			  __func__, inode->i_ino, inode->i_mode);
 		return false;
 	}
 
 	if ((fi->i_flags & F2FS_CASEFOLD_FL) && !f2fs_sb_has_casefold(sbi)) {
-		f2fs_warn(sbi, "%s: inode (ino=%lx) has casefold flag, but casefold feature is off",
+		f2fs_warn(sbi, "%s: inode (ino=%llx) has casefold flag, but casefold feature is off",
 			  __func__, inode->i_ino);
 		return false;
 	}
 
 	if (fi->i_xattr_nid && f2fs_check_nid_range(sbi, fi->i_xattr_nid)) {
-		f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_xattr_nid: %u, run fsck to fix.",
+		f2fs_warn(sbi, "%s: inode (ino=%llx) has corrupted i_xattr_nid: %u, run fsck to fix.",
 			  __func__, inode->i_ino, fi->i_xattr_nid);
 		return false;
 	}
 
 	if (IS_DEVICE_ALIASING(inode)) {
 		if (!f2fs_sb_has_device_alias(sbi)) {
-			f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but the feature is off",
+			f2fs_warn(sbi, "%s: inode (ino=%llx) has device alias flag, but the feature is off",
 				  __func__, inode->i_ino);
 			return false;
 		}
 		if (!f2fs_is_pinned_file(inode)) {
-			f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but is not pinned",
+			f2fs_warn(sbi, "%s: inode (ino=%llx) has device alias flag, but is not pinned",
 				  __func__, inode->i_ino);
 			return false;
 		}
@@ -925,7 +925,7 @@ retry:
 			 */
 			if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
 				f2fs_warn(F2FS_I_SB(inode),
-					"f2fs_evict_inode: inconsistent node id, ino:%lu",
+					"f2fs_evict_inode: inconsistent node id, ino:%llu",
 					inode->i_ino);
 				f2fs_inode_synced(inode);
 				set_sbi_flag(sbi, SBI_NEED_FSCK);
@@ -954,7 +954,7 @@ retry:
 		 */
 		if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
 			f2fs_warn(sbi,
-				"f2fs_evict_inode: inode is dirty, ino:%lu",
+				"f2fs_evict_inode: inode is dirty, ino:%llu",
 				inode->i_ino);
 			f2fs_inode_synced(inode);
 			set_sbi_flag(sbi, SBI_NEED_FSCK);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index e360f08a9586..efbb0732d420 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -505,7 +505,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
 	}
 
 	if (inode->i_nlink == 0) {
-		f2fs_warn(F2FS_I_SB(inode), "%s: inode (ino=%lx) has zero i_nlink",
+		f2fs_warn(F2FS_I_SB(inode), "%s: inode (ino=%llx) has zero i_nlink",
 			  __func__, inode->i_ino);
 		err = -EFSCORRUPTED;
 		set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
@@ -515,7 +515,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
 	if (IS_ENCRYPTED(dir) &&
 	    (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
 	    !fscrypt_has_permitted_context(dir, inode)) {
-		f2fs_warn(F2FS_I_SB(inode), "Inconsistent encryption contexts: %lu/%lu",
+		f2fs_warn(F2FS_I_SB(inode), "Inconsistent encryption contexts: %llu/%llu",
 			  dir->i_ino, inode->i_ino);
 		err = -EPERM;
 		goto out_iput;
@@ -573,11 +573,11 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
 	}
 
 	if (unlikely(inode->i_nlink == 0)) {
-		f2fs_warn(sbi, "%s: inode (ino=%lx) has zero i_nlink",
+		f2fs_warn(sbi, "%s: inode (ino=%llx) has zero i_nlink",
 			  __func__, inode->i_ino);
 		goto corrupted;
 	} else if (S_ISDIR(inode->i_mode) && unlikely(inode->i_nlink == 1)) {
-		f2fs_warn(sbi, "%s: directory inode (ino=%lx) has a single i_nlink",
+		f2fs_warn(sbi, "%s: directory inode (ino=%llx) has a single i_nlink",
 			  __func__, inode->i_ino);
 		goto corrupted;
 	}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 5ca6f518cfa1..5a7679c6317e 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -847,7 +847,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
 			err = -EFSCORRUPTED;
 			f2fs_err_ratelimited(sbi,
 				"inode mapping table is corrupted, run fsck to fix it, "
-				"ino:%lu, nid:%u, level:%d, offset:%d",
+				"ino:%llu, nid:%u, level:%d, offset:%d",
 				dn->inode->i_ino, nids[i], level, offset[level]);
 			set_sbi_flag(sbi, SBI_NEED_FSCK);
 			goto release_pages;
@@ -1013,7 +1013,7 @@ static int truncate_dnode(struct dnode_of_data *dn)
 		return PTR_ERR(folio);
 
 	if (IS_INODE(folio) || ino_of_node(folio) != dn->inode->i_ino) {
-		f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u",
+		f2fs_err(sbi, "incorrect node reference, ino: %llu, nid: %u, ino_of_node: %u",
 				dn->inode->i_ino, dn->nid, ino_of_node(folio));
 		set_sbi_flag(sbi, SBI_NEED_FSCK);
 		f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE);
@@ -1194,7 +1194,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
 	if (level <= 0) {
 		if (!level) {
 			level = -EFSCORRUPTED;
-			f2fs_err(sbi, "%s: inode ino=%lx has corrupted node block, from:%lu addrs:%u",
+			f2fs_err(sbi, "%s: inode ino=%llx has corrupted node block, from:%lu addrs:%u",
 					__func__, inode->i_ino,
 					from, ADDRS_PER_INODE(inode));
 			set_sbi_flag(sbi, SBI_NEED_FSCK);
@@ -1265,7 +1265,7 @@ skip_partial:
 			set_sbi_flag(F2FS_F_SB(folio), SBI_NEED_FSCK);
 			f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
 			f2fs_err_ratelimited(sbi,
-				"truncate node fail, ino:%lu, nid:%u, "
+				"truncate node fail, ino:%llu, nid:%u, "
 				"offset[0]:%d, offset[1]:%d, nofs:%d",
 				inode->i_ino, dn.nid, offset[0],
 				offset[1], nofs);
@@ -1351,7 +1351,7 @@ int f2fs_remove_inode_page(struct inode *inode)
 
 	if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) {
 		f2fs_warn(F2FS_I_SB(inode),
-			"f2fs_remove_inode_page: inconsistent i_blocks, ino:%lu, iblocks:%llu",
+			"f2fs_remove_inode_page: inconsistent i_blocks, ino:%llu, iblocks:%llu",
 			inode->i_ino, (unsigned long long)inode->i_blocks);
 		set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
 	}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index a26071f2b0bc..3d3dacec9482 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -232,7 +232,7 @@ out:
 		name = "<encrypted>";
 	else
 		name = raw_inode->i_name;
-	f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d",
+	f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %llu, err = %d",
 		    __func__, ino_of_node(ifolio), name,
 		    IS_ERR(dir) ? 0 : dir->i_ino, err);
 	return err;
@@ -532,7 +532,7 @@ got_it:
 
 	max_addrs = ADDRS_PER_PAGE(dn->node_folio, dn->inode);
 	if (ofs_in_node >= max_addrs) {
-		f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u",
+		f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%llu, nid:%u, max:%u",
 			ofs_in_node, dn->inode->i_ino, nid, max_addrs);
 		f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUMMARY);
 		return -EFSCORRUPTED;
@@ -674,7 +674,7 @@ retry_dn:
 	f2fs_bug_on(sbi, ni.ino != ino_of_node(folio));
 
 	if (ofs_of_node(dn.node_folio) != ofs_of_node(folio)) {
-		f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
+		f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%llu, ofs:%u, %u",
 			  inode->i_ino, ofs_of_node(dn.node_folio),
 			  ofs_of_node(folio));
 		err = -EFSCORRUPTED;
@@ -748,7 +748,7 @@ retry_prev:
 
 			if (f2fs_is_valid_blkaddr(sbi, dest,
 					DATA_GENERIC_ENHANCE_UPDATE)) {
-				f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u",
+				f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%llu, ofs:%u",
 					dest, inode->i_ino, dn.ofs_in_node);
 				err = -EFSCORRUPTED;
 				goto err;
@@ -768,7 +768,7 @@ retry_prev:
 err:
 	f2fs_put_dnode(&dn);
 out:
-	f2fs_notice(sbi, "recover_data: ino = %lx, nid = %x (i_size: %s), "
+	f2fs_notice(sbi, "recover_data: ino = %llx, nid = %x (i_size: %s), "
 		    "range (%u, %u), recovered = %d, err = %d",
 		    inode->i_ino, nid_of_node(folio),
 		    file_keep_isize(inode) ? "keep" : "recover",
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 941dc62a6d6f..610d5810074d 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -365,7 +365,7 @@ static int lookup_all_xattrs(struct inode *inode, struct folio *ifolio,
 
 	*xe = __find_xattr(cur_addr, last_txattr_addr, NULL, index, len, name);
 	if (!*xe) {
-		f2fs_err(F2FS_I_SB(inode), "lookup inode (%lu) has corrupted xattr",
+		f2fs_err(F2FS_I_SB(inode), "lookup inode (%llu) has corrupted xattr",
 								inode->i_ino);
 		set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
 		err = -ENODATA;
@@ -585,7 +585,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
 
 		if ((void *)(entry) + sizeof(__u32) > last_base_addr ||
 			(void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) {
-			f2fs_err(F2FS_I_SB(inode), "list inode (%lu) has corrupted xattr",
+			f2fs_err(F2FS_I_SB(inode), "list inode (%llu) has corrupted xattr",
 						inode->i_ino);
 			set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
 			f2fs_handle_error(F2FS_I_SB(inode),
@@ -664,14 +664,14 @@ retry:
 		if (!F2FS_I(inode)->i_xattr_nid) {
 			error = f2fs_recover_xattr_data(inode, NULL);
 			f2fs_notice(F2FS_I_SB(inode),
-				"recover xattr in inode (%lu), error(%d)",
+				"recover xattr in inode (%llu), error(%d)",
 					inode->i_ino, error);
 			if (!error) {
 				kfree(base_addr);
 				goto retry;
 			}
 		}
-		f2fs_err(F2FS_I_SB(inode), "set inode (%lu) has corrupted xattr",
+		f2fs_err(F2FS_I_SB(inode), "set inode (%llu) has corrupted xattr",
 								inode->i_ino);
 		set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
 		error = -EFSCORRUPTED;
@@ -699,7 +699,7 @@ retry:
 	while (!IS_XATTR_LAST_ENTRY(last)) {
 		if ((void *)(last) + sizeof(__u32) > last_base_addr ||
 			(void *)XATTR_NEXT_ENTRY(last) > last_base_addr) {
-			f2fs_err(F2FS_I_SB(inode), "inode (%lu) has invalid last xattr entry, entry_size: %zu",
+			f2fs_err(F2FS_I_SB(inode), "inode (%llu) has invalid last xattr entry, entry_size: %zu",
 					inode->i_ino, ENTRY_SIZE(last));
 			set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
 			error = -EFSCORRUPTED;
diff --git a/fs/freevxfs/vxfs_bmap.c b/fs/freevxfs/vxfs_bmap.c
index 26d367e3668d..e85222892038 100644
--- a/fs/freevxfs/vxfs_bmap.c
+++ b/fs/freevxfs/vxfs_bmap.c
@@ -260,12 +260,12 @@ vxfs_bmap1(struct inode *ip, long iblock)
 	if (VXFS_ISIMMED(vip))
 		goto unsupp;
 
-	printk(KERN_WARNING "vxfs: inode %ld has no valid orgtype (%x)\n",
+	printk(KERN_WARNING "vxfs: inode %llu has no valid orgtype (%x)\n",
 			ip->i_ino, vip->vii_orgtype);
 	BUG();
 
 unsupp:
-	printk(KERN_WARNING "vxfs: inode %ld has an unsupported orgtype (%x)\n",
+	printk(KERN_WARNING "vxfs: inode %llu has an unsupported orgtype (%x)\n",
 			ip->i_ino, vip->vii_orgtype);
 	return 0;
 }
diff --git a/fs/fserror.c b/fs/fserror.c
index 06ca86adab9b..1e4d11fd9562 100644
--- a/fs/fserror.c
+++ b/fs/fserror.c
@@ -176,7 +176,7 @@ lost_event:
 lost:
 	if (inode)
 		pr_err_ratelimited(
- "%s: lost file I/O error report for ino %lu type %u pos 0x%llx len 0x%llx error %d",
+ "%s: lost file I/O error report for ino %llu type %u pos 0x%llx len 0x%llx error %d",
 		       sb->s_id, inode->i_ino, type, pos, len, error);
 	else
 		pr_err_ratelimited(
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c
index b80ba40e3877..7f5339ee57c1 100644
--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -417,7 +417,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, const struct qstr *src_name,
 	int entry_size, type;
 	int err;
 
-	hfs_dbg("cnid %u - (ino %lu, name %s) - (ino %lu, name %s)\n",
+	hfs_dbg("cnid %u - (ino %llu, name %s) - (ino %llu, name %s)\n",
 		cnid, src_dir->i_ino, src_name->name,
 		dst_dir->i_ino, dst_name->name);
 	sb = src_dir->i_sb;
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index a097908b269d..f066a99a863b 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -411,7 +411,7 @@ int hfs_extend_file(struct inode *inode)
 		goto out;
 	}
 
-	hfs_dbg("ino %lu, start %u, len %u\n", inode->i_ino, start, len);
+	hfs_dbg("ino %llu, start %u, len %u\n", inode->i_ino, start, len);
 	if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks) {
 		if (!HFS_I(inode)->first_blocks) {
 			hfs_dbg("first_extent: start %u, len %u\n",
@@ -482,7 +482,7 @@ void hfs_file_truncate(struct inode *inode)
 	u32 size;
 	int res;
 
-	hfs_dbg("ino %lu, phys_size %llu -> i_size %llu\n",
+	hfs_dbg("ino %llu, phys_size %llu -> i_size %llu\n",
 		inode->i_ino, (long long)HFS_I(inode)->phys_size,
 		inode->i_size);
 	if (inode->i_size > HFS_I(inode)->phys_size) {
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 878535db64d6..95f0333a608b 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -270,7 +270,7 @@ void hfs_delete_inode(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
 
-	hfs_dbg("ino %lu\n", inode->i_ino);
+	hfs_dbg("ino %llu\n", inode->i_ino);
 	if (S_ISDIR(inode->i_mode)) {
 		atomic64_dec(&HFS_SB(sb)->folder_count);
 		if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID))
@@ -455,7 +455,7 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	hfs_cat_rec rec;
 	int res;
 
-	hfs_dbg("ino %lu\n", inode->i_ino);
+	hfs_dbg("ino %llu\n", inode->i_ino);
 	res = hfs_ext_write_extent(inode);
 	if (res)
 		return res;
diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c
index 4b79cd606276..174cd13106ad 100644
--- a/fs/hfsplus/attributes.c
+++ b/fs/hfsplus/attributes.c
@@ -203,7 +203,7 @@ int hfsplus_create_attr_nolock(struct inode *inode, const char *name,
 	int entry_size;
 	int err;
 
-	hfs_dbg("name %s, ino %ld\n",
+	hfs_dbg("name %s, ino %llu\n",
 		name ? name : NULL, inode->i_ino);
 
 	if (name) {
@@ -255,7 +255,7 @@ int hfsplus_create_attr(struct inode *inode,
 	hfsplus_attr_entry *entry_ptr;
 	int err;
 
-	hfs_dbg("name %s, ino %ld\n",
+	hfs_dbg("name %s, ino %llu\n",
 		name ? name : NULL, inode->i_ino);
 
 	if (!HFSPLUS_SB(sb)->attr_tree) {
@@ -337,7 +337,7 @@ int hfsplus_delete_attr_nolock(struct inode *inode, const char *name,
 	struct super_block *sb = inode->i_sb;
 	int err;
 
-	hfs_dbg("name %s, ino %ld\n",
+	hfs_dbg("name %s, ino %llu\n",
 		name ? name : NULL, inode->i_ino);
 
 	if (name) {
@@ -367,7 +367,7 @@ int hfsplus_delete_attr(struct inode *inode, const char *name)
 	struct super_block *sb = inode->i_sb;
 	struct hfs_find_data fd;
 
-	hfs_dbg("name %s, ino %ld\n",
+	hfs_dbg("name %s, ino %llu\n",
 		name ? name : NULL, inode->i_ino);
 
 	if (!HFSPLUS_SB(sb)->attr_tree) {
@@ -436,7 +436,7 @@ int hfsplus_replace_attr(struct inode *inode,
 	hfsplus_attr_entry *entry_ptr;
 	int err = 0;
 
-	hfs_dbg("name %s, ino %ld\n",
+	hfs_dbg("name %s, ino %llu\n",
 		name ? name : NULL, inode->i_ino);
 
 	if (!HFSPLUS_SB(sb)->attr_tree) {
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 02c1eee4a4b8..0e961e99b985 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -441,7 +441,7 @@ int hfsplus_rename_cat(u32 cnid,
 	int entry_size, type;
 	int err;
 
-	hfs_dbg("cnid %u - ino %lu, name %s - ino %lu, name %s\n",
+	hfs_dbg("cnid %u - ino %llu, name %s - ino %llu, name %s\n",
 		cnid, src_dir->i_ino, src_name->name,
 		dst_dir->i_ino, dst_name->name);
 	err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd);
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index d559bf8625f8..054f6da46033 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -313,7 +313,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
 	if (!S_ISREG(inode->i_mode))
 		return -EPERM;
 
-	hfs_dbg("src_dir->i_ino %lu, dst_dir->i_ino %lu, inode->i_ino %lu\n",
+	hfs_dbg("src_dir->i_ino %llu, dst_dir->i_ino %llu, inode->i_ino %llu\n",
 		src_dir->i_ino, dst_dir->i_ino, inode->i_ino);
 
 	mutex_lock(&sbi->vh_mutex);
@@ -385,7 +385,7 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
 	if (HFSPLUS_IS_RSRC(inode))
 		return -EPERM;
 
-	hfs_dbg("dir->i_ino %lu, inode->i_ino %lu\n",
+	hfs_dbg("dir->i_ino %llu, inode->i_ino %llu\n",
 		dir->i_ino, inode->i_ino);
 
 	mutex_lock(&sbi->vh_mutex);
@@ -393,7 +393,7 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
 	if (inode->i_ino == cnid &&
 	    atomic_read(&HFSPLUS_I(inode)->opencnt)) {
 		str.name = name;
-		str.len = sprintf(name, "temp%lu", inode->i_ino);
+		str.len = sprintf(name, "temp%llu", inode->i_ino);
 		res = hfsplus_rename_cat(inode->i_ino,
 					 dir, &dentry->d_name,
 					 sbi->hidden_dir, &str);
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 8e886514d27f..474fde1a1653 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -275,7 +275,7 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
 	mutex_unlock(&hip->extents_lock);
 
 done:
-	hfs_dbg("ino %lu, iblock %llu - dblock %u\n",
+	hfs_dbg("ino %llu, iblock %llu - dblock %u\n",
 		inode->i_ino, (long long)iblock, dblock);
 
 	mask = (1 << sbi->fs_shift) - 1;
@@ -476,7 +476,7 @@ int hfsplus_file_extend(struct inode *inode, bool zeroout)
 			goto out;
 	}
 
-	hfs_dbg("ino %lu, start %u, len %u\n", inode->i_ino, start, len);
+	hfs_dbg("ino %llu, start %u, len %u\n", inode->i_ino, start, len);
 
 	if (hip->alloc_blocks <= hip->first_blocks) {
 		if (!hip->first_blocks) {
@@ -545,7 +545,7 @@ void hfsplus_file_truncate(struct inode *inode)
 	u32 alloc_cnt, blk_cnt, start;
 	int res;
 
-	hfs_dbg("ino %lu, phys_size %llu -> i_size %llu\n",
+	hfs_dbg("ino %llu, phys_size %llu -> i_size %llu\n",
 		inode->i_ino, (long long)hip->phys_size, inode->i_size);
 
 	if (inode->i_size > hip->phys_size) {
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 922ff41df042..02be32dc6833 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -230,7 +230,7 @@ static int hfsplus_get_perms(struct inode *inode,
 		inode->i_flags &= ~S_APPEND;
 	return 0;
 bad_type:
-	pr_err("invalid file type 0%04o for inode %lu\n", mode, inode->i_ino);
+	pr_err("invalid file type 0%04o for inode %llu\n", mode, inode->i_ino);
 	return -EIO;
 }
 
@@ -328,7 +328,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
 	struct hfsplus_vh *vhdr = sbi->s_vhdr;
 	int error = 0, error2;
 
-	hfs_dbg("inode->i_ino %lu, start %llu, end %llu\n",
+	hfs_dbg("inode->i_ino %llu, start %llu, end %llu\n",
 		inode->i_ino, start, end);
 
 	error = file_write_and_wait_range(file, start, end);
@@ -639,7 +639,7 @@ int hfsplus_cat_write_inode(struct inode *inode)
 	hfsplus_cat_entry entry;
 	int res = 0;
 
-	hfs_dbg("inode->i_ino %lu\n", inode->i_ino);
+	hfs_dbg("inode->i_ino %llu\n", inode->i_ino);
 
 	if (HFSPLUS_IS_RSRC(inode))
 		main_inode = HFSPLUS_I(inode)->rsrc_inode;
@@ -716,7 +716,7 @@ out:
 	if (!res) {
 		res = hfs_btree_write(tree);
 		if (res) {
-			pr_err("b-tree write err: %d, ino %lu\n",
+			pr_err("b-tree write err: %d, ino %llu\n",
 			       res, inode->i_ino);
 		}
 	}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 7229a8ae89f9..b3917249c206 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -156,7 +156,7 @@ static int hfsplus_system_write_inode(struct inode *inode)
 		int err = hfs_btree_write(tree);
 
 		if (err) {
-			pr_err("b-tree write err: %d, ino %lu\n",
+			pr_err("b-tree write err: %d, ino %llu\n",
 			       err, inode->i_ino);
 			return err;
 		}
@@ -169,7 +169,7 @@ static int hfsplus_write_inode(struct inode *inode,
 {
 	int err;
 
-	hfs_dbg("ino %lu\n", inode->i_ino);
+	hfs_dbg("ino %llu\n", inode->i_ino);
 
 	err = hfsplus_ext_write_extent(inode);
 	if (err)
@@ -184,7 +184,7 @@ static int hfsplus_write_inode(struct inode *inode,
 
 static void hfsplus_evict_inode(struct inode *inode)
 {
-	hfs_dbg("ino %lu\n", inode->i_ino);
+	hfs_dbg("ino %llu\n", inode->i_ino);
 	truncate_inode_pages_final(&inode->i_data);
 	clear_inode(inode);
 	if (HFSPLUS_IS_RSRC(inode)) {
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index 9904944cbd54..c70bb6f494b2 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -277,7 +277,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name,
 	u16 folder_finderinfo_len = sizeof(DInfo) + sizeof(DXInfo);
 	u16 file_finderinfo_len = sizeof(FInfo) + sizeof(FXInfo);
 
-	hfs_dbg("ino %lu, name %s, value %p, size %zu\n",
+	hfs_dbg("ino %llu, name %s, value %p, size %zu\n",
 		inode->i_ino, name ? name : NULL,
 		value, size);
 
@@ -447,7 +447,7 @@ int hfsplus_setxattr(struct inode *inode, const char *name,
 		NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1;
 	int res;
 
-	hfs_dbg("ino %lu, name %s, prefix %s, prefixlen %zu, "
+	hfs_dbg("ino %llu, name %s, prefix %s, prefixlen %zu, "
 		"value %p, size %zu\n",
 		inode->i_ino, name ? name : NULL,
 		prefix ? prefix : NULL, prefixlen,
@@ -607,7 +607,7 @@ ssize_t hfsplus_getxattr(struct inode *inode, const char *name,
 	int res;
 	char *xattr_name;
 
-	hfs_dbg("ino %lu, name %s, prefix %s\n",
+	hfs_dbg("ino %llu, name %s, prefix %s\n",
 		inode->i_ino, name ? name : NULL,
 		prefix ? prefix : NULL);
 
@@ -717,7 +717,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
 	size_t strbuf_size;
 	int xattr_name_len;
 
-	hfs_dbg("ino %lu\n", inode->i_ino);
+	hfs_dbg("ino %llu\n", inode->i_ino);
 
 	if (!is_xattr_operation_supported(inode))
 		return -EOPNOTSUPP;
@@ -819,7 +819,7 @@ static int hfsplus_removexattr(struct inode *inode, const char *name)
 	int is_xattr_acl_deleted;
 	int is_all_xattrs_deleted;
 
-	hfs_dbg("ino %lu, name %s\n",
+	hfs_dbg("ino %llu, name %s\n",
 		inode->i_ino, name ? name : NULL);
 
 	if (!HFSPLUS_SB(inode->i_sb)->attr_tree)
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index ceb50b2dc91a..3bf11202e2d3 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -96,8 +96,8 @@ static int hpfs_readdir(struct file *file, struct dir_context *ctx)
 		}
 		if (!fnode_is_dir(fno)) {
 			e = 1;
-			hpfs_error(inode->i_sb, "not a directory, fnode %08lx",
-					(unsigned long)inode->i_ino);
+			hpfs_error(inode->i_sb, "not a directory, fnode %08llx",
+					inode->i_ino);
 		}
 		if (hpfs_inode->i_dno != le32_to_cpu(fno->u.external[0].disk_secno)) {
 			e = 1;
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index dde764ebe246..8c6aa060fd87 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -550,9 +550,9 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
 			if (hpfs_sb(i->i_sb)->sb_chk)
 				if (up != i->i_ino) {
 					hpfs_error(i->i_sb,
-						   "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx",
+						   "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08llx",
 						   dno, up,
-						   (unsigned long)i->i_ino);
+						   i->i_ino);
 					return;
 				}
 			if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c
index 2149d3ca530b..4664f9ab06ee 100644
--- a/fs/hpfs/ea.c
+++ b/fs/hpfs/ea.c
@@ -245,8 +245,8 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
 		fnode->ea_offs = cpu_to_le16(0xc4);
 	}
 	if (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200) {
-		hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x",
-			(unsigned long)inode->i_ino,
+		hpfs_error(s, "fnode %08llx: ea_offs == %03x, ea_size_s == %03x",
+			inode->i_ino,
 			le16_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s));
 		return;
 	}
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 93d528f4f4f2..0e932cc8be1b 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -250,8 +250,8 @@ void hpfs_write_inode_nolock(struct inode *i)
 			hpfs_brelse4(&qbh);
 		} else
 			hpfs_error(i->i_sb,
-				"directory %08lx doesn't have '.' entry",
-				(unsigned long)i->i_ino);
+				"directory %08llx doesn't have '.' entry",
+				i->i_ino);
 	}
 	mark_buffer_dirty(bh);
 	brelse(bh);
diff --git a/fs/inode.c b/fs/inode.c
index 62df5dda0589..5ad169d51728 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -726,7 +726,7 @@ void dump_mapping(const struct address_space *mapping)
 	struct dentry *dentry_ptr;
 	struct dentry dentry;
 	char fname[64] = {};
-	unsigned long ino;
+	u64 ino;
 
 	/*
 	 * If mapping is an invalid pointer, we don't want to crash
@@ -750,14 +750,14 @@ void dump_mapping(const struct address_space *mapping)
 	}
 
 	if (!dentry_first) {
-		pr_warn("aops:%ps ino:%lx\n", a_ops, ino);
+		pr_warn("aops:%ps ino:%llx\n", a_ops, ino);
 		return;
 	}
 
 	dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
 	if (get_kernel_nofault(dentry, dentry_ptr) ||
 	    !dentry.d_parent || !dentry.d_name.name) {
-		pr_warn("aops:%ps ino:%lx invalid dentry:%px\n",
+		pr_warn("aops:%ps ino:%llx invalid dentry:%px\n",
 				a_ops, ino, dentry_ptr);
 		return;
 	}
@@ -768,7 +768,7 @@ void dump_mapping(const struct address_space *mapping)
 	 * Even if strncpy_from_kernel_nofault() succeeded,
 	 * the fname could be unreliable
 	 */
-	pr_warn("aops:%ps ino:%lx dentry name(?):\"%s\"\n",
+	pr_warn("aops:%ps ino:%llx dentry name(?):\"%s\"\n",
 		a_ops, ino, fname);
 }
 
@@ -2641,9 +2641,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
 		/* leave it no_open_fops */
 		break;
 	default:
-		printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
-				  " inode %s:%lu\n", mode, inode->i_sb->s_id,
-				  inode->i_ino);
+		pr_debug("init_special_inode: bogus i_mode (%o) for inode %s:%llu\n",
+			 mode, inode->i_sb->s_id, inode->i_ino);
 		break;
 	}
 }
diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c
index e4d57cb969f1..e655763a82ce 100644
--- a/fs/iomap/ioend.c
+++ b/fs/iomap/ioend.c
@@ -48,7 +48,7 @@ static u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend)
 		mapping_set_error(inode->i_mapping, ioend->io_error);
 		if (!bio_flagged(bio, BIO_QUIET)) {
 			pr_err_ratelimited(
-"%s: writeback error on inode %lu, offset %lld, sector %llu",
+"%s: writeback error on inode %llu, offset %lld, sector %llu",
 				inode->i_sb->s_id, inode->i_ino,
 				ioend->io_offset, ioend->io_sector);
 		}
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 50b4cb3aea87..397568b9c7e7 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -156,7 +156,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
 				else {
 					printk(KERN_DEBUG
 					       "zisofs: zisofs_inflate returned"
-					       " %d, inode = %lu,"
+					       " %d, inode = %llu,"
 					       " page idx = %d, bh idx = %d,"
 					       " avail_in = %ld,"
 					       " avail_out = %ld\n",
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 2ca16c3fe5ef..2fd9948d606e 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -152,7 +152,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *file,
 		    de_len < de->name_len[0] +
 					sizeof(struct iso_directory_record)) {
 			printk(KERN_NOTICE "iso9660: Corrupted directory entry"
-			       " in block %lu of inode %lu\n", block,
+			       " in block %lu of inode %llu\n", block,
 			       inode->i_ino);
 			brelse(bh);
 			return -EIO;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 5c01536c5e8f..3593e02e75fe 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1261,7 +1261,7 @@ out_noread:
 
 out_toomany:
 	printk(KERN_INFO "%s: More than 100 file sections ?!?, aborting...\n"
-		"isofs_read_level3_size: inode=%lu\n",
+		"isofs_read_level3_size: inode=%llu\n",
 		__func__, inode->i_ino);
 	goto out;
 }
@@ -1380,7 +1380,7 @@ static int isofs_read_inode(struct inode *inode, int relocated)
 	/* I have no idea what file_unit_size is used for, so
 	   we will flag it for now */
 	if (de->file_unit_size[0] != 0) {
-		printk(KERN_DEBUG "ISOFS: File unit size != 0 for ISO file (%ld).\n",
+		printk(KERN_DEBUG "ISOFS: File unit size != 0 for ISO file (%llu).\n",
 			inode->i_ino);
 	}
 
@@ -1450,7 +1450,7 @@ static int isofs_read_inode(struct inode *inode, int relocated)
 		/* XXX - parse_rock_ridge_inode() had already set i_rdev. */
 		init_special_inode(inode, inode->i_mode, inode->i_rdev);
 	} else {
-		printk(KERN_DEBUG "ISOFS: Invalid file type 0%04o for inode %lu.\n",
+		printk(KERN_DEBUG "ISOFS: Invalid file type 0%04o for inode %llu.\n",
 			inode->i_mode, inode->i_ino);
 		ret = -EIO;
 		goto fail;
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 58f80e1b3ac0..8dd3911717e0 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -100,7 +100,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
 		/* Basic sanity check, whether name doesn't exceed dir entry */
 		if (de_len < dlen + sizeof(struct iso_directory_record)) {
 			printk(KERN_NOTICE "iso9660: Corrupted directory entry"
-			       " in block %lu of inode %lu\n", block,
+			       " in block %lu of inode %llu\n", block,
 			       dir->i_ino);
 			brelse(bh);
 			return 0;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index cb2c529a8f1b..b60918ed8a99 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1677,7 +1677,7 @@ journal_t *jbd2_journal_init_inode(struct inode *inode)
 		return err ? ERR_PTR(err) : ERR_PTR(-EINVAL);
 	}
 
-	jbd2_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
+	jbd2_debug(1, "JBD2: inode %s/%llu, size %lld, bits %d, blksize %ld\n",
 		  inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size,
 		  inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
 
@@ -1689,7 +1689,7 @@ journal_t *jbd2_journal_init_inode(struct inode *inode)
 
 	journal->j_inode = inode;
 	snprintf(journal->j_devname, sizeof(journal->j_devname),
-		 "%pg-%lu", journal->j_dev, journal->j_inode->i_ino);
+		 "%pg-%llu", journal->j_dev, journal->j_inode->i_ino);
 	strreplace(journal->j_devname, '/', '!');
 	jbd2_stats_proc_init(journal);
 
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index dca4b5d8aaaa..a90f9092706c 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2651,7 +2651,7 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
 		return -EROFS;
 	journal = transaction->t_journal;
 
-	jbd2_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
+	jbd2_debug(4, "Adding inode %llu, tid:%d\n", jinode->i_vfs_inode->i_ino,
 			transaction->t_tid);
 
 	spin_lock(&journal->j_list_lock);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 2b38ce1fd8e8..c4088c3b4ac0 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -129,7 +129,7 @@ static int jffs2_readdir(struct file *file, struct dir_context *ctx)
 	struct jffs2_full_dirent *fd;
 	unsigned long curofs = 1;
 
-	jffs2_dbg(1, "jffs2_readdir() for dir_i #%lu\n", inode->i_ino);
+	jffs2_dbg(1, "jffs2_readdir() for dir_i #%llu\n", inode->i_ino);
 
 	if (!dir_emit_dots(file, ctx))
 		return 0;
@@ -211,7 +211,7 @@ static int jffs2_create(struct mnt_idmap *idmap, struct inode *dir_i,
 
 	jffs2_free_raw_inode(ri);
 
-	jffs2_dbg(1, "%s(): Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
+	jffs2_dbg(1, "%s(): Created ino #%llu with mode %o, nlink %d(%d). nrpages %ld\n",
 		  __func__, inode->i_ino, inode->i_mode, inode->i_nlink,
 		  f->inocache->pino_nlink, inode->i_mapping->nrpages);
 
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 5e1ef4bc009b..1e18d3a79840 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -88,7 +88,7 @@ static int jffs2_do_readpage_nolock(struct inode *inode, struct folio *folio)
 	unsigned char *kaddr;
 	int ret;
 
-	jffs2_dbg(2, "%s(): ino #%lu, page at offset 0x%lx\n",
+	jffs2_dbg(2, "%s(): ino #%llu, page at offset 0x%lx\n",
 		  __func__, inode->i_ino, folio->index << PAGE_SHIFT);
 
 	BUG_ON(!folio_test_locked(folio));
@@ -259,7 +259,7 @@ static int jffs2_write_end(const struct kiocb *iocb,
 	uint32_t writtenlen = 0;
 	void *buf;
 
-	jffs2_dbg(1, "%s(): ino #%lu, page at 0x%llx, range %d-%d, flags %lx\n",
+	jffs2_dbg(1, "%s(): ino #%llu, page at 0x%llx, range %d-%d, flags %lx\n",
 		  __func__, inode->i_ino, folio_pos(folio),
 		  start, end, folio->flags.f);
 
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index c3ce2c868f7a..6ada8369a762 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -43,7 +43,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
 	int ret;
 	int alloc_type = ALLOC_NORMAL;
 
-	jffs2_dbg(1, "%s(): ino #%lu\n", __func__, inode->i_ino);
+	jffs2_dbg(1, "%s(): ino #%llu\n", __func__, inode->i_ino);
 
 	/* Special cases - we don't want more than one data node
 	   for these types on the medium at any time. So setattr
@@ -243,7 +243,7 @@ void jffs2_evict_inode (struct inode *inode)
 	struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 
-	jffs2_dbg(1, "%s(): ino #%lu mode %o\n",
+	jffs2_dbg(1, "%s(): ino #%llu mode %o\n",
 		  __func__, inode->i_ino, inode->i_mode);
 	truncate_inode_pages_final(&inode->i_data);
 	clear_inode(inode);
@@ -334,8 +334,8 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
 		ret = jffs2_read_dnode(c, f, f->metadata, (char *)&jdev, 0, f->metadata->size);
 		if (ret < 0) {
 			/* Eep */
-			pr_notice("Read device numbers for inode %lu failed\n",
-				  (unsigned long)inode->i_ino);
+			pr_notice("Read device numbers for inode %llu failed\n",
+				  inode->i_ino);
 			goto error;
 		}
 		if (f->metadata->size == sizeof(jdev.old_id))
@@ -351,8 +351,8 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
 		break;
 
 	default:
-		pr_warn("%s(): Bogus i_mode %o for ino %lu\n",
-			__func__, inode->i_mode, (unsigned long)inode->i_ino);
+		pr_warn("%s(): Bogus i_mode %o for ino %llu\n",
+			__func__, inode->i_mode, inode->i_ino);
 	}
 
 	mutex_unlock(&f->sem);
@@ -374,12 +374,12 @@ void jffs2_dirty_inode(struct inode *inode, int flags)
 	struct iattr iattr;
 
 	if (!(inode_state_read_once(inode) & I_DIRTY_DATASYNC)) {
-		jffs2_dbg(2, "%s(): not calling setattr() for ino #%lu\n",
+		jffs2_dbg(2, "%s(): not calling setattr() for ino #%llu\n",
 			  __func__, inode->i_ino);
 		return;
 	}
 
-	jffs2_dbg(1, "%s(): calling setattr() for ino #%lu\n",
+	jffs2_dbg(1, "%s(): calling setattr() for ino #%llu\n",
 		  __func__, inode->i_ino);
 
 	iattr.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_MTIME|ATTR_CTIME;
@@ -428,7 +428,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
 	struct jffs2_inode_info *f;
 	int ret;
 
-	jffs2_dbg(1, "%s(): dir_i %ld, mode 0x%x\n",
+	jffs2_dbg(1, "%s(): dir_i %llu, mode 0x%x\n",
 		  __func__, dir_i->i_ino, mode);
 
 	c = JFFS2_SB_INFO(sb);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 4709762713ef..c7914dbc91ed 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -64,7 +64,7 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
 		inode->i_op = &jfs_file_inode_operations;
 		init_special_inode(inode, inode->i_mode, inode->i_rdev);
 	} else {
-		printk(KERN_DEBUG "JFS: Invalid file type 0%04o for inode %lu.\n",
+		printk(KERN_DEBUG "JFS: Invalid file type 0%04o for inode %llu.\n",
 		       inode->i_mode, inode->i_ino);
 		iget_failed(inode);
 		return ERR_PTR(-EIO);
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 294a67327c73..13ab21e66f51 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -302,7 +302,7 @@ int diRead(struct inode *ip)
 	unsigned long pageno;
 	int rel_inode;
 
-	jfs_info("diRead: ino = %ld", ip->i_ino);
+	jfs_info("diRead: ino = %llu", ip->i_ino);
 
 	ipimap = sbi->ipimap;
 	JFS_IP(ip)->ipimap = ipimap;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 64c6eaa7f3f2..c95804f6dc19 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -692,7 +692,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 	unsigned long page_index;
 	unsigned long page_offset;
 
-	jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d",
+	jfs_info("__get_metapage: ino = %llu, lblock = 0x%lx, abs=%d",
 		 inode->i_ino, lblock, absolute);
 
 	l2bsize = inode->i_blkbits;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 255a847ca0b6..0b6be8b8aeb1 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -487,7 +487,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	int			async_block = 0;
 	__be32			ret;
 
-	dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
+	dprintk("lockd: nlmsvc_lock(%s/%llu, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
 				inode->i_sb->s_id, inode->i_ino,
 				lock->fl.c.flc_type,
 				lock->fl.c.flc_pid,
@@ -617,7 +617,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 	int			mode;
 	__be32			ret;
 
-	dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
+	dprintk("lockd: nlmsvc_testlock(%s/%llu, ty=%d, %Ld-%Ld)\n",
 				nlmsvc_file_inode(file)->i_sb->s_id,
 				nlmsvc_file_inode(file)->i_ino,
 				lock->fl.c.flc_type,
@@ -676,7 +676,7 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
 {
 	int	error = 0;
 
-	dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
+	dprintk("lockd: nlmsvc_unlock(%s/%llu, pi=%d, %Ld-%Ld)\n",
 				nlmsvc_file_inode(file)->i_sb->s_id,
 				nlmsvc_file_inode(file)->i_ino,
 				lock->fl.c.flc_pid,
@@ -716,7 +716,7 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
 	int status = 0;
 	int mode;
 
-	dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
+	dprintk("lockd: nlmsvc_cancel(%s/%llu, pi=%d, %Ld-%Ld)\n",
 				nlmsvc_file_inode(file)->i_sb->s_id,
 				nlmsvc_file_inode(file)->i_ino,
 				lock->fl.c.flc_pid,
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index dd0214dcb695..79f3dd2fd366 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -47,7 +47,7 @@ static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
 {
 	struct inode *inode = nlmsvc_file_inode(file);
 
-	dprintk("lockd: %s %s/%ld\n",
+	dprintk("lockd: %s %s/%llu\n",
 		msg, inode->i_sb->s_id, inode->i_ino);
 }
 #else
diff --git a/fs/locks.c b/fs/locks.c
index d13ec930b7bb..d8b066fb4210 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -234,7 +234,7 @@ locks_check_ctx_lists(struct inode *inode)
 	if (unlikely(!list_empty(&ctx->flc_flock) ||
 		     !list_empty(&ctx->flc_posix) ||
 		     !list_empty(&ctx->flc_lease))) {
-		pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
+		pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%llx:\n",
 			MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
 			inode->i_ino);
 		locks_dump_ctx_list(&ctx->flc_flock, "FLOCK");
@@ -251,7 +251,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list, char *list_
 
 	list_for_each_entry(flc, list, flc_list)
 		if (flc->flc_file == filp)
-			pr_warn("Leaked %s lock on dev=0x%x:0x%x ino=0x%lx "
+			pr_warn("Leaked %s lock on dev=0x%x:0x%x ino=0x%llx "
 				" fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
 				list_type, MAJOR(inode->i_sb->s_dev),
 				MINOR(inode->i_sb->s_dev), inode->i_ino,
@@ -2896,7 +2896,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock_core *flc,
 			     (type == F_RDLCK) ? "READ" : "UNLCK");
 	if (inode) {
 		/* userspace relies on this representation of dev_t */
-		seq_printf(f, "%d %02x:%02x:%lu ", pid,
+		seq_printf(f, "%d %02x:%02x:%llu ", pid,
 				MAJOR(inode->i_sb->s_dev),
 				MINOR(inode->i_sb->s_dev), inode->i_ino);
 	} else {
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 99541c6a5bbf..838b072b6cf0 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -36,7 +36,7 @@ void __minix_error_inode(struct inode *inode, const char *function,
 	vaf.fmt = fmt;
 	vaf.va = &args;
 	printk(KERN_CRIT "minix-fs error (device %s): %s:%d: "
-	       "inode #%lu: comm %s: %pV\n",
+	       "inode #%llu: comm %s: %pV\n",
 	       inode->i_sb->s_id, function, line, inode->i_ino,
 	       current->comm, &vaf);
 	va_end(args);
@@ -520,7 +520,7 @@ void minix_set_inode(struct inode *inode, dev_t rdev)
 		   S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 		init_special_inode(inode, inode->i_mode, rdev);
 	} else {
-		printk(KERN_DEBUG "MINIX-fs: Invalid file type 0%04o for inode %lu.\n",
+		printk(KERN_DEBUG "MINIX-fs: Invalid file type 0%04o for inode %llu.\n",
 		       inode->i_mode, inode->i_ino);
 		make_bad_inode(inode);
 	}
@@ -542,7 +542,7 @@ static struct inode *V1_minix_iget(struct inode *inode)
 		return ERR_PTR(-EIO);
 	}
 	if (raw_inode->i_nlinks == 0) {
-		printk("MINIX-fs: deleted inode referenced: %lu\n",
+		printk("MINIX-fs: deleted inode referenced: %llu\n",
 		       inode->i_ino);
 		brelse(bh);
 		iget_failed(inode);
@@ -580,7 +580,7 @@ static struct inode *V2_minix_iget(struct inode *inode)
 		return ERR_PTR(-EIO);
 	}
 	if (raw_inode->i_nlinks == 0) {
-		printk("MINIX-fs: deleted inode referenced: %lu\n",
+		printk("MINIX-fs: deleted inode referenced: %llu\n",
 		       inode->i_ino);
 		brelse(bh);
 		iget_failed(inode);
@@ -692,7 +692,7 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
 	if (wbc->sync_mode == WB_SYNC_ALL && buffer_dirty(bh)) {
 		sync_dirty_buffer(bh);
 		if (buffer_req(bh) && !buffer_uptodate(bh)) {
-			printk("IO error syncing minix inode [%s:%08lx]\n",
+			printk("IO error syncing minix inode [%s:%08llx]\n",
 				inode->i_sb->s_id, inode->i_ino);
 			err = -EIO;
 		}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2402f57c8e7d..ddc3789363a5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1906,7 +1906,7 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
 	}
 
 	error = nfs_lookup_verify_inode(inode, flags);
-	dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
+	dfprintk(LOOKUPCACHE, "NFS: %s: inode %llu is %s\n",
 			__func__, inode->i_ino, error ? "invalid" : "valid");
 	return !error;
 }
@@ -2121,7 +2121,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	/* Expect a negative dentry */
 	BUG_ON(d_inode(dentry));
 
-	dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n",
+	dfprintk(VFS, "NFS: atomic_open(%s/%llu), %pd\n",
 			dir->i_sb->s_id, dir->i_ino, dentry);
 
 	err = nfs_check_flags(open_flags);
@@ -2404,7 +2404,7 @@ static int nfs_do_create(struct inode *dir, struct dentry *dentry,
 
 	open_flags |= O_CREAT;
 
-	dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
+	dfprintk(VFS, "NFS: create(%s/%llu), %pd\n",
 			dir->i_sb->s_id, dir->i_ino, dentry);
 
 	attr.ia_mode = mode;
@@ -2442,7 +2442,7 @@ nfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	struct iattr attr;
 	int status;
 
-	dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n",
+	dfprintk(VFS, "NFS: mknod(%s/%llu), %pd\n",
 			dir->i_sb->s_id, dir->i_ino, dentry);
 
 	attr.ia_mode = mode;
@@ -2469,7 +2469,7 @@ struct dentry *nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 	struct iattr attr;
 	struct dentry *ret;
 
-	dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n",
+	dfprintk(VFS, "NFS: mkdir(%s/%llu), %pd\n",
 			dir->i_sb->s_id, dir->i_ino, dentry);
 
 	attr.ia_valid = ATTR_MODE;
@@ -2507,7 +2507,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	int error;
 
-	dfprintk(VFS, "NFS: rmdir(%s/%lu), %pd\n",
+	dfprintk(VFS, "NFS: rmdir(%s/%llu), %pd\n",
 			dir->i_sb->s_id, dir->i_ino, dentry);
 
 	trace_nfs_rmdir_enter(dir, dentry);
@@ -2578,7 +2578,7 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
 {
 	int error;
 
-	dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
+	dfprintk(VFS, "NFS: unlink(%s/%llu, %pd)\n", dir->i_sb->s_id,
 		dir->i_ino, dentry);
 
 	trace_nfs_unlink_enter(dir, dentry);
@@ -2638,7 +2638,7 @@ int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	unsigned int pathlen = strlen(symname);
 	int error;
 
-	dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s)\n", dir->i_sb->s_id,
+	dfprintk(VFS, "NFS: symlink(%s/%llu, %pd, %s)\n", dir->i_sb->s_id,
 		dir->i_ino, dentry, symname);
 
 	if (pathlen > PAGE_SIZE)
@@ -2660,7 +2660,7 @@ int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	error = NFS_PROTO(dir)->symlink(dir, dentry, folio, pathlen, &attr);
 	trace_nfs_symlink_exit(dir, dentry, error);
 	if (error != 0) {
-		dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
+		dfprintk(VFS, "NFS: symlink(%s/%llu, %pd, %s) error %d\n",
 			dir->i_sb->s_id, dir->i_ino,
 			dentry, symname, error);
 		d_drop(dentry);
@@ -3414,7 +3414,7 @@ out:
 	if (!res && (mask & MAY_EXEC))
 		res = nfs_execute_ok(inode, mask);
 
-	dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
+	dfprintk(VFS, "NFS: permission(%s/%llu), mask=0x%x, res=%d\n",
 		inode->i_sb->s_id, inode->i_ino, mask, res);
 	return res;
 out_notsup:
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5d08b6409c28..25048a3c2364 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -391,7 +391,7 @@ static int nfs_write_begin(const struct kiocb *iocb,
 
 	trace_nfs_write_begin(file_inode(file), pos, len);
 
-	dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
+	dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%llu), %u@%lld)\n",
 		file, mapping->host->i_ino, len, (long long) pos);
 	nfs_truncate_last_folio(mapping, i_size_read(mapping->host), pos);
 
@@ -432,7 +432,7 @@ static int nfs_write_end(const struct kiocb *iocb,
 	int status;
 
 	trace_nfs_write_end(file_inode(file), pos, len);
-	dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n",
+	dfprintk(PAGECACHE, "NFS: write_end(%pD2(%llu), %u@%lld)\n",
 		file, mapping->host->i_ino, len, (long long) pos);
 
 	/*
@@ -557,7 +557,7 @@ static int nfs_launder_folio(struct folio *folio)
 	struct inode *inode = folio->mapping->host;
 	int ret;
 
-	dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n",
+	dfprintk(PAGECACHE, "NFS: launder_folio(%llu, %llu)\n",
 		inode->i_ino, folio_pos(folio));
 
 	folio_wait_private_2(folio); /* [DEPRECATED] */
@@ -647,7 +647,7 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
 	struct address_space *mapping;
 	struct folio *folio = page_folio(vmf->page);
 
-	dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
+	dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%llu), offset %lld)\n",
 		 filp, filp->f_mapping->host->i_ino,
 		 (long long)folio_pos(folio));
 
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 90a11afa5d05..e85380e3b11d 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -241,7 +241,7 @@ filelayout_set_layoutcommit(struct nfs_pgio_header *hdr)
 
 	/* Note: if the write is unstable, don't set end_offs until commit */
 	pnfs_set_layoutcommit(hdr->inode, hdr->lseg, end_offs);
-	dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
+	dprintk("%s inode %llu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
 		(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
 }
 
@@ -456,7 +456,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr)
 	u32 j, idx;
 	struct nfs_fh *fh;
 
-	dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
+	dprintk("--> %s ino %llu pgbase %u req %zu@%llu\n",
 		__func__, hdr->inode->i_ino,
 		hdr->args.pgbase, (size_t)hdr->args.count, offset);
 
@@ -514,7 +514,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 	if (IS_ERR(ds_clnt))
 		return PNFS_NOT_ATTEMPTED;
 
-	dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n",
+	dprintk("%s ino %llu sync %d req %zu@%llu DS: %s cl_count %d\n",
 		__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
 		offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count));
 
@@ -1001,7 +1001,7 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
 	if (IS_ERR(ds_clnt))
 		goto out_err;
 
-	dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
+	dprintk("%s ino %llu, how %d cl_count %d\n", __func__,
 		data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count));
 	data->commit_done_cb = filelayout_commit_done_cb;
 	refcount_inc(&ds->ds_clp->cl_count);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index f67773d52830..8b1559171fe3 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1631,7 +1631,7 @@ ff_layout_set_layoutcommit(struct inode *inode,
 		return;
 
 	pnfs_set_layoutcommit(inode, lseg, end_offset);
-	dprintk("%s inode %lu pls_end_pos %llu\n", __func__, inode->i_ino,
+	dprintk("%s inode %llu pls_end_pos %llu\n", __func__, inode->i_ino,
 		(unsigned long long) NFS_I(inode)->layout->plh_lwb);
 }
 
@@ -2136,7 +2136,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
 	u32 dss_id;
 	bool ds_fatal_error = false;
 
-	dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
+	dprintk("--> %s ino %llu pgbase %u req %zu@%llu\n",
 		__func__, hdr->inode->i_ino,
 		hdr->args.pgbase, (size_t)hdr->args.count, offset);
 
@@ -2245,7 +2245,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 
 	vers = nfs4_ff_layout_ds_version(mirror, dss_id);
 
-	dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
+	dprintk("%s ino %llu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
 		__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
 		offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count),
 		vers);
@@ -2336,7 +2336,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
 
 	vers = nfs4_ff_layout_ds_version(mirror, dss_id);
 
-	dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__,
+	dprintk("%s ino %llu, how %d cl_count %d vers %d\n", __func__,
 		data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count),
 		vers);
 	data->commit_done_cb = ff_layout_commit_done_cb;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 4786343eeee0..98a8f0de1199 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2258,7 +2258,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 	bool attr_changed = false;
 	bool have_delegation;
 
-	dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%llx)\n",
+	dfprintk(VFS, "NFS: %s(%s/%llu fh_crc=0x%08x ct=%d info=0x%llx)\n",
 			__func__, inode->i_sb->s_id, inode->i_ino,
 			nfs_display_fhandle_hash(NFS_FH(inode)),
 			icount_read(inode), fattr->valid);
@@ -2288,7 +2288,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		/*
 		* Big trouble! The inode has become a different object.
 		*/
-		printk(KERN_DEBUG "NFS: %s: inode %lu mode changed, %07o to %07o\n",
+		printk(KERN_DEBUG "NFS: %s: inode %llu mode changed, %07o to %07o\n",
 				__func__, inode->i_ino, inode->i_mode, fattr->mode);
 		goto out_err;
 	}
@@ -2358,7 +2358,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 				if (S_ISDIR(inode->i_mode))
 					nfs_force_lookup_revalidate(inode);
 				attr_changed = true;
-				dprintk("NFS: change_attr change on server for file %s/%ld\n",
+				dprintk("NFS: change_attr change on server for file %s/%llu\n",
 						inode->i_sb->s_id,
 						inode->i_ino);
 			} else if (!have_delegation) {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 91bcf67bd743..d839a97df822 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4714,7 +4714,7 @@ static int _nfs4_proc_lookupp(struct inode *inode,
 	nfs_fattr_init(fattr);
 	nfs4_init_sequence(server->nfs_client, &args.seq_args, &res.seq_res, 0, 0);
 
-	dprintk("NFS call  lookupp ino=0x%lx\n", inode->i_ino);
+	dprintk("NFS call  lookupp ino=0x%llx\n", inode->i_ino);
 	status = nfs4_do_call_sync(clnt, server, &msg, &args.seq_args,
 				   &res.seq_res, task_flags);
 	dprintk("NFS reply lookupp: %d\n", status);
@@ -10019,7 +10019,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
 	int status = 0;
 
 	dprintk("NFS: initiating layoutcommit call. sync %d "
-		"lbw: %llu inode %lu\n", sync,
+		"lbw: %llu inode %llu\n", sync,
 		data->args.lastbytewritten,
 		data->args.inode->i_ino);
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bc13d1e69449..e79deb9bf664 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -891,7 +891,7 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
 	while (!list_empty(layout_list)) {
 		lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
 				plh_bulk_destroy);
-		dprintk("%s freeing layout for inode %lu\n", __func__,
+		dprintk("%s freeing layout for inode %llu\n", __func__,
 			lo->plh_inode->i_ino);
 		inode = lo->plh_inode;
 
@@ -1440,7 +1440,7 @@ _pnfs_return_layout(struct inode *ino)
 	int status = 0;
 	bool send, valid_layout;
 
-	dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
+	dprintk("NFS: %s for inode %llu\n", __func__, ino->i_ino);
 
 	spin_lock(&ino->i_lock);
 	lo = nfsi->layout;
@@ -3055,7 +3055,7 @@ pnfs_try_to_write_data(struct nfs_pgio_header *hdr,
 
 	hdr->mds_ops = call_ops;
 
-	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
+	dprintk("%s: Writing ino:%llu %u@%llu (how %d)\n", __func__,
 		inode->i_ino, hdr->args.count, hdr->args.offset, how);
 	trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how);
 	if (trypnfs != PNFS_NOT_ATTEMPTED)
@@ -3181,7 +3181,7 @@ pnfs_try_to_read_data(struct nfs_pgio_header *hdr,
 
 	hdr->mds_ops = call_ops;
 
-	dprintk("%s: Reading ino:%lu %u@%llu\n",
+	dprintk("%s: Reading ino:%llu %u@%llu\n",
 		__func__, inode->i_ino, hdr->args.count, hdr->args.offset);
 
 	trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr);
@@ -3314,7 +3314,7 @@ pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg,
 	if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
 		nfsi->layout->plh_lwb = end_pos;
 		mark_as_dirty = true;
-		dprintk("%s: Set layoutcommit for inode %lu ",
+		dprintk("%s: Set layoutcommit for inode %llu ",
 			__func__, inode->i_ino);
 	} else if (end_pos > nfsi->layout->plh_lwb)
 		nfsi->layout->plh_lwb = end_pos;
@@ -3363,7 +3363,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 	if (!pnfs_layoutcommit_outstanding(inode))
 		return 0;
 
-	dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
+	dprintk("--> %s inode %llu\n", __func__, inode->i_ino);
 
 	status = -EAGAIN;
 	if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8fdbba7cad96..d2259d948cc3 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1039,7 +1039,7 @@ exp_rootfh(struct net *net, struct auth_domain *clp, char *name,
 	}
 	inode = d_inode(path.dentry);
 
-	dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
+	dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%llu)\n",
 		 name, path.dentry, clp->name,
 		 inode->i_sb->s_id, inode->i_ino);
 	exp = exp_parent(cd, clp, &path);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6b9c399b89df..a569d89ac912 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1253,7 +1253,7 @@ static void nfsd4_finalize_deleg_timestamps(struct nfs4_delegation *dp, struct f
 	if (ret) {
 		struct inode *inode = file_inode(f);
 
-		pr_notice_ratelimited("nfsd: Unable to update timestamps on inode %02x:%02x:%lu: %d\n",
+		pr_notice_ratelimited("nfsd: Unable to update timestamps on inode %02x:%02x:%llu: %d\n",
 					MAJOR(inode->i_sb->s_dev),
 					MINOR(inode->i_sb->s_dev),
 					inode->i_ino, ret);
@@ -2888,7 +2888,7 @@ static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
 {
 	struct inode *inode = file_inode(f->nf_file);
 
-	seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
+	seq_printf(s, "superblock: \"%02x:%02x:%llu\"",
 					MAJOR(inode->i_sb->s_dev),
 					 MINOR(inode->i_sb->s_dev),
 					 inode->i_ino);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index ed85dd43da18..ee72c9565e4f 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -601,9 +601,9 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 	struct inode * inode = d_inode(dentry);
 	dev_t ex_dev = exp_sb(exp)->s_dev;
 
-	dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",
+	dprintk("nfsd: fh_compose(exp %02x:%02x/%llu %pd2, ino=%llu)\n",
 		MAJOR(ex_dev), MINOR(ex_dev),
-		(long) d_inode(exp->ex_path.dentry)->i_ino,
+		d_inode(exp->ex_path.dentry)->i_ino,
 		dentry,
 		(inode ? inode->i_ino : 0));
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c884c3f34afb..eafdf7b7890f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1163,7 +1163,7 @@ nfsd_direct_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	} else if (unlikely(host_err == -EINVAL)) {
 		struct inode *inode = d_inode(fhp->fh_dentry);
 
-		pr_info_ratelimited("nfsd: Direct I/O alignment failure on %s/%ld\n",
+		pr_info_ratelimited("nfsd: Direct I/O alignment failure on %s/%llu\n",
 				    inode->i_sb->s_id, inode->i_ino);
 		host_err = -ESERVERFAULT;
 	}
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index e7eebb04f9a4..7b1cd2baefcf 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -707,7 +707,7 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
 
 	if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
 		nilfs_warn(inode->i_sb,
-			   "%s (ino=%lu): entry number %llu already freed",
+			   "%s (ino=%llu): entry number %llu already freed",
 			   __func__, inode->i_ino,
 			   (unsigned long long)req->pr_entry_nr);
 	else
@@ -748,7 +748,7 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
 
 	if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
 		nilfs_warn(inode->i_sb,
-			   "%s (ino=%lu): entry number %llu already freed",
+			   "%s (ino=%llu): entry number %llu already freed",
 			   __func__, inode->i_ino,
 			   (unsigned long long)req->pr_entry_nr);
 	else
@@ -861,7 +861,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
 			if (!nilfs_clear_bit_atomic(lock, group_offset,
 						    bitmap)) {
 				nilfs_warn(inode->i_sb,
-					   "%s (ino=%lu): entry number %llu already freed",
+					   "%s (ino=%llu): entry number %llu already freed",
 					   __func__, inode->i_ino,
 					   (unsigned long long)entry_nrs[j]);
 			} else {
@@ -906,7 +906,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
 							      last_nrs[k]);
 			if (ret && ret != -ENOENT)
 				nilfs_warn(inode->i_sb,
-					   "error %d deleting block that object (entry=%llu, ino=%lu) belongs to",
+					   "error %d deleting block that object (entry=%llu, ino=%llu) belongs to",
 					   ret, (unsigned long long)last_nrs[k],
 					   inode->i_ino);
 		}
@@ -923,7 +923,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
 			ret = nilfs_palloc_delete_bitmap_block(inode, group);
 			if (ret && ret != -ENOENT)
 				nilfs_warn(inode->i_sb,
-					   "error %d deleting bitmap block of group=%lu, ino=%lu",
+					   "error %d deleting bitmap block of group=%lu, ino=%llu",
 					   ret, group, inode->i_ino);
 		}
 	}
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index ccc1a7aa52d2..824f2bd91c16 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -33,7 +33,7 @@ static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
 
 	if (err == -EINVAL) {
 		__nilfs_error(inode->i_sb, fname,
-			      "broken bmap (inode number=%lu)", inode->i_ino);
+			      "broken bmap (inode number=%llu)", inode->i_ino);
 		err = -EIO;
 	}
 	return err;
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 568367129092..2e553d698d0f 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -64,7 +64,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
 		 * clearing of an abandoned b-tree node is missing somewhere).
 		 */
 		nilfs_error(inode->i_sb,
-			    "state inconsistency probably due to duplicate use of b-tree node block address %llu (ino=%lu)",
+			    "state inconsistency probably due to duplicate use of b-tree node block address %llu (ino=%llu)",
 			    (unsigned long long)blocknr, inode->i_ino);
 		goto failed;
 	}
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index dd0c8e560ef6..3c03f5a741d1 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -353,7 +353,7 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
 		     nchildren <= 0 ||
 		     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
 		nilfs_crit(inode->i_sb,
-			   "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
+			   "bad btree node (ino=%llu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
 			   inode->i_ino, (unsigned long long)blocknr, level,
 			   flags, nchildren);
 		ret = 1;
@@ -384,7 +384,7 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
 		     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX ||
 		     (nchildren == 0 && level > NILFS_BTREE_LEVEL_NODE_MIN))) {
 		nilfs_crit(inode->i_sb,
-			   "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
+			   "bad btree root (ino=%llu): level = %d, flags = 0x%x, nchildren = %d",
 			   inode->i_ino, level, flags, nchildren);
 		ret = 1;
 	}
@@ -453,7 +453,7 @@ static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
 	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
 		dump_stack();
 		nilfs_crit(btree->b_inode->i_sb,
-			   "btree level mismatch (ino=%lu): %d != %d",
+			   "btree level mismatch (ino=%llu): %d != %d",
 			   btree->b_inode->i_ino,
 			   nilfs_btree_node_get_level(node), level);
 		return 1;
@@ -521,7 +521,7 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
  out_no_wait:
 	if (!buffer_uptodate(bh)) {
 		nilfs_err(btree->b_inode->i_sb,
-			  "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
+			  "I/O error reading b-tree node block (ino=%llu, blocknr=%llu)",
 			  btree->b_inode->i_ino, (unsigned long long)ptr);
 		brelse(bh);
 		return -EIO;
@@ -2104,7 +2104,7 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree,
 	if (ret < 0) {
 		if (unlikely(ret == -ENOENT)) {
 			nilfs_crit(btree->b_inode->i_sb,
-				   "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
+				   "writing node/leaf block does not appear in b-tree (ino=%llu) at key=%llu, level=%d",
 				   btree->b_inode->i_ino,
 				   (unsigned long long)key, level);
 			ret = -EINVAL;
@@ -2146,7 +2146,7 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
 	    level >= NILFS_BTREE_LEVEL_MAX) {
 		dump_stack();
 		nilfs_warn(btree->b_inode->i_sb,
-			   "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
+			   "invalid btree level: %d (key=%llu, ino=%llu, blocknr=%llu)",
 			   level, (unsigned long long)key,
 			   btree->b_inode->i_ino,
 			   (unsigned long long)bh->b_blocknr);
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index b243199036df..3653db5cdb65 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -150,7 +150,7 @@ out:
 
 Ebadsize:
 	nilfs_error(sb,
-		    "size of directory #%lu is not a multiple of chunk size",
+		    "size of directory #%llu is not a multiple of chunk size",
 		    dir->i_ino);
 	goto fail;
 Eshort:
@@ -169,7 +169,7 @@ Einumber:
 	error = "disallowed inode number";
 bad_entry:
 	nilfs_error(sb,
-		    "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%zd, name_len=%d",
+		    "bad entry in directory #%llu: %s - offset=%lu, inode=%lu, rec_len=%zd, name_len=%d",
 		    dir->i_ino, error, (folio->index << PAGE_SHIFT) + offs,
 		    (unsigned long)le64_to_cpu(p->inode),
 		    rec_len, p->name_len);
@@ -177,7 +177,7 @@ bad_entry:
 Eend:
 	p = (struct nilfs_dir_entry *)(kaddr + offs);
 	nilfs_error(sb,
-		    "entry in directory #%lu spans the page boundary offset=%lu, inode=%lu",
+		    "entry in directory #%llu spans the page boundary offset=%lu, inode=%lu",
 		    dir->i_ino, (folio->index << PAGE_SHIFT) + offs,
 		    (unsigned long)le64_to_cpu(p->inode));
 fail:
@@ -251,7 +251,7 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx)
 
 		kaddr = nilfs_get_folio(inode, n, &folio);
 		if (IS_ERR(kaddr)) {
-			nilfs_error(sb, "bad page in #%lu", inode->i_ino);
+			nilfs_error(sb, "bad page in #%llu", inode->i_ino);
 			ctx->pos += PAGE_SIZE - offset;
 			return -EIO;
 		}
@@ -336,7 +336,7 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir,
 		/* next folio is past the blocks we've got */
 		if (unlikely(n > (dir->i_blocks >> (PAGE_SHIFT - 9)))) {
 			nilfs_error(dir->i_sb,
-			       "dir %lu size %lld exceeds block count %llu",
+			       "dir %llu size %lld exceeds block count %llu",
 			       dir->i_ino, dir->i_size,
 			       (unsigned long long)dir->i_blocks);
 			goto out;
@@ -382,7 +382,7 @@ struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop)
 	return next_de;
 
 fail:
-	nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg);
+	nilfs_error(dir->i_sb, "directory #%llu %s", dir->i_ino, msg);
 	folio_release_kmap(folio, de);
 	return NULL;
 }
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 2d8dc6b35b54..8bd0b1374e25 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -338,7 +338,7 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap,
 	key = nilfs_bmap_data_get_key(bmap, *bh);
 	if (unlikely(key > NILFS_DIRECT_KEY_MAX)) {
 		nilfs_crit(bmap->b_inode->i_sb,
-			   "%s (ino=%lu): invalid key: %llu",
+			   "%s (ino=%llu): invalid key: %llu",
 			   __func__,
 			   bmap->b_inode->i_ino, (unsigned long long)key);
 		return -EINVAL;
@@ -346,7 +346,7 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap,
 	ptr = nilfs_direct_get_ptr(bmap, key);
 	if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) {
 		nilfs_crit(bmap->b_inode->i_sb,
-			   "%s (ino=%lu): invalid pointer: %llu",
+			   "%s (ino=%llu): invalid pointer: %llu",
 			   __func__,
 			   bmap->b_inode->i_ino, (unsigned long long)ptr);
 		return -EINVAL;
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 561c220799c7..62d4c1b787e9 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -137,7 +137,7 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
 		struct inode *inode = bh->b_folio->mapping->host;
 
 		nilfs_err(inode->i_sb,
-			  "I/O error reading %s block for GC (ino=%lu, vblocknr=%llu)",
+			  "I/O error reading %s block for GC (ino=%llu, vblocknr=%llu)",
 			  buffer_nilfs_node(bh) ? "node" : "data",
 			  inode->i_ino, (unsigned long long)bh->b_blocknr);
 		return -EIO;
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 51bde45d5865..51f7e125a311 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -108,7 +108,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
 				 * be locked in this case.
 				 */
 				nilfs_warn(inode->i_sb,
-					   "%s (ino=%lu): a race condition while inserting a data block at offset=%llu",
+					   "%s (ino=%llu): a race condition while inserting a data block at offset=%llu",
 					   __func__, inode->i_ino,
 					   (unsigned long long)blkoff);
 				err = -EAGAIN;
@@ -789,7 +789,7 @@ repeat:
 		goto repeat;
 
 failed:
-	nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)",
+	nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%llu)",
 		   ret, ii->vfs_inode.i_ino);
 }
 
@@ -1026,7 +1026,7 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
 			 * this inode.
 			 */
 			nilfs_warn(inode->i_sb,
-				   "cannot set file dirty (ino=%lu): the file is being freed",
+				   "cannot set file dirty (ino=%llu): the file is being freed",
 				   inode->i_ino);
 			spin_unlock(&nilfs->ns_inode_lock);
 			return -EINVAL; /*
@@ -1057,7 +1057,7 @@ int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
 	err = nilfs_load_inode_block(inode, &ibh);
 	if (unlikely(err)) {
 		nilfs_warn(inode->i_sb,
-			   "cannot mark inode dirty (ino=%lu): error %d loading inode block",
+			   "cannot mark inode dirty (ino=%llu): error %d loading inode block",
 			   inode->i_ino, err);
 		return err;
 	}
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 946b0d3534a5..09adb40c65e5 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -203,7 +203,7 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
 	err = -EIO;
 	if (!buffer_uptodate(first_bh)) {
 		nilfs_err(inode->i_sb,
-			  "I/O error reading meta-data file (ino=%lu, block-offset=%lu)",
+			  "I/O error reading meta-data file (ino=%llu, block-offset=%lu)",
 			  inode->i_ino, block);
 		goto failed_bh;
 	}
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 40f4b1a28705..40ac679ec56e 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -292,7 +292,7 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
 
 	if (!inode->i_nlink) {
 		nilfs_warn(inode->i_sb,
-			   "deleting nonexistent file (ino=%lu), %d",
+			   "deleting nonexistent file (ino=%llu), %d",
 			   inode->i_ino, inode->i_nlink);
 		set_nlink(inode, 1);
 	}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 098a3bd103e0..4b1bf559f352 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2024,7 +2024,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
 				ifile, ii->vfs_inode.i_ino, &ibh);
 			if (unlikely(err)) {
 				nilfs_warn(sci->sc_super,
-					   "log writer: error %d getting inode block (ino=%lu)",
+					   "log writer: error %d getting inode block (ino=%llu)",
 					   err, ii->vfs_inode.i_ino);
 				return err;
 			}
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 9cc7eb863643..0f731eddeb8b 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -84,7 +84,7 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 	inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
 	inode = igrab(fsnotify_conn_inode(mark->connector));
 	if (inode) {
-		seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:0 ",
+		seq_printf(m, "inotify wd:%x ino:%llx sdev:%x mask:%x ignored_mask:0 ",
 			   inode_mark->wd, inode->i_ino, inode->i_sb->s_dev,
 			   inotify_mark_user_mask(mark));
 		show_mark_fhandle(m, inode);
@@ -111,7 +111,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 		inode = igrab(fsnotify_conn_inode(mark->connector));
 		if (!inode)
 			return;
-		seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ",
+		seq_printf(m, "fanotify ino:%llx sdev:%x mflags:%x mask:%x ignored_mask:%x ",
 			   inode->i_ino, inode->i_sb->s_dev,
 			   mflags, mark->mask, mark->ignore_mask);
 		show_mark_fhandle(m, inode);
diff --git a/fs/nsfs.c b/fs/nsfs.c
index db91de208645..eac326b85314 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -46,7 +46,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
 	struct ns_common *ns = inode->i_private;
 	const struct proc_ns_operations *ns_ops = ns->ops;
 
-	return dynamic_dname(buffer, buflen, "%s:[%lu]",
+	return dynamic_dname(buffer, buflen, "%s:[%llu]",
 		ns_ops->name, inode->i_ino);
 }
 
@@ -394,7 +394,7 @@ static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry)
 	const struct ns_common *ns = inode->i_private;
 	const struct proc_ns_operations *ns_ops = ns->ops;
 
-	seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino);
+	seq_printf(seq, "%s:[%llu]", ns_ops->name, inode->i_ino);
 	return 0;
 }
 
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 174a7cb202a0..51aa008e126a 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -153,7 +153,7 @@ void ntfs_inode_printk(struct inode *inode, const char *fmt, ...)
 	vaf.fmt = printk_skip_level(fmt);
 	vaf.va = &args;
 
-	printk("%c%cntfs3(%s): ino=%lx,%s %pV\n", KERN_SOH_ASCII, level,
+	printk("%c%cntfs3(%s): ino=%llx,%s %pV\n", KERN_SOH_ASCII, level,
 	       sb->s_id, inode->i_ino, name ? name : "", &vaf);
 
 	va_end(args);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 344fd4d95fbc..d40f5d205bce 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7318,7 +7318,7 @@ start:
 		 * to check it up here before changing the tree.
 		*/
 		if (root_el->l_tree_depth && rec->e_int_clusters == 0) {
-			mlog(ML_ERROR, "Inode %lu has an empty "
+			mlog(ML_ERROR, "Inode %llu has an empty "
 				    "extent record, depth %u\n", inode->i_ino,
 				    le16_to_cpu(root_el->l_tree_depth));
 			status = ocfs2_remove_rightmost_empty_extent(osb,
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 17ba79f443ee..c7ad912ec7a0 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -137,7 +137,7 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
 			      (unsigned long long)iblock, bh_result, create);
 
 	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE)
-		mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n",
+		mlog(ML_NOTICE, "get_block on system inode 0x%p (%llu)\n",
 		     inode, inode->i_ino);
 
 	if (S_ISLNK(inode->i_mode)) {
@@ -2146,7 +2146,7 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
 	    ((iblock + ((len - 1) >> i_blkbits)) > endblk))
 		len = (endblk - iblock + 1) << i_blkbits;
 
-	mlog(0, "get block of %lu at %llu:%u req %u\n",
+	mlog(0, "get block of %llu at %llu:%u req %u\n",
 			inode->i_ino, pos, len, total_len);
 
 	/*
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 1c8abf2c592c..b82fe4431eb1 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -794,7 +794,7 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
 	if (le16_to_cpu(el->l_count) !=
 	    ocfs2_extent_recs_per_dx_root(inode->i_sb)) {
 		ret = ocfs2_error(inode->i_sb,
-				  "Inode %lu has invalid extent list length %u\n",
+				  "Inode %llu has invalid extent list length %u\n",
 				  inode->i_ino, le16_to_cpu(el->l_count));
 		goto out;
 	}
@@ -812,7 +812,7 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
 
 		if (el->l_tree_depth) {
 			ret = ocfs2_error(inode->i_sb,
-					  "Inode %lu has non zero tree depth in btree tree block %llu\n",
+					  "Inode %llu has non zero tree depth in btree tree block %llu\n",
 					  inode->i_ino,
 					  (unsigned long long)eb_bh->b_blocknr);
 			goto out;
@@ -821,7 +821,7 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
 
 	if (le16_to_cpu(el->l_next_free_rec) == 0) {
 		ret = ocfs2_error(inode->i_sb,
-				  "Inode %lu has empty extent list at depth %u\n",
+				  "Inode %llu has empty extent list at depth %u\n",
 				  inode->i_ino,
 				  le16_to_cpu(el->l_tree_depth));
 		goto out;
@@ -839,7 +839,7 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
 
 	if (!found) {
 		ret = ocfs2_error(inode->i_sb,
-				  "Inode %lu has bad extent record (%u, %u, 0) in btree\n",
+				  "Inode %llu has bad extent record (%u, %u, 0) in btree\n",
 				  inode->i_ino,
 				  le32_to_cpu(rec->e_cpos),
 				  ocfs2_rec_clusters(el, rec));
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 45cce261da65..5821e33df78f 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -123,7 +123,7 @@ static int dlmfs_file_open(struct inode *inode,
 	if (S_ISDIR(inode->i_mode))
 		BUG();
 
-	mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino,
+	mlog(0, "open called on inode %llu, flags 0x%x\n", inode->i_ino,
 		file->f_flags);
 
 	status = dlmfs_decode_open_flags(file->f_flags, &level, &flags);
@@ -170,7 +170,7 @@ static int dlmfs_file_release(struct inode *inode,
 	if (S_ISDIR(inode->i_mode))
 		BUG();
 
-	mlog(0, "close called on inode %lu\n", inode->i_ino);
+	mlog(0, "close called on inode %llu\n", inode->i_ino);
 
 	if (fp) {
 		level = fp->fp_lock_level;
@@ -242,7 +242,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
 	int bytes_left;
 	struct inode *inode = file_inode(filp);
 
-	mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
+	mlog(0, "inode %llu, count = %zu, *ppos = %llu\n",
 		inode->i_ino, count, *ppos);
 
 	if (*ppos >= DLM_LVB_LEN)
@@ -301,7 +301,7 @@ static void dlmfs_evict_inode(struct inode *inode)
 
 	clear_inode(inode);
 
-	mlog(0, "inode %lu\n", inode->i_ino);
+	mlog(0, "inode %llu\n", inode->i_ino);
 
 	ip = DLMFS_I(inode);
 	lockres = &ip->ip_lockres;
@@ -490,7 +490,7 @@ static int dlmfs_unlink(struct inode *dir,
 	int status;
 	struct inode *inode = d_inode(dentry);
 
-	mlog(0, "unlink inode %lu\n", inode->i_ino);
+	mlog(0, "unlink inode %llu\n", inode->i_ino);
 
 	/* if there are no current holders, or none that are waiting
 	 * to acquire a lock, this basically destroys our lockres. */
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index d68229422dda..eb5dcd17d437 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -291,7 +291,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
 
 	if (el->l_tree_depth) {
 		ocfs2_error(inode->i_sb,
-			    "Inode %lu has non zero tree depth in leaf block %llu\n",
+			    "Inode %llu has non zero tree depth in leaf block %llu\n",
 			    inode->i_ino,
 			    (unsigned long long)eb_bh->b_blocknr);
 		ret = -EROFS;
@@ -427,7 +427,7 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
 
 		if (el->l_tree_depth) {
 			ocfs2_error(inode->i_sb,
-				    "Inode %lu has non zero tree depth in leaf block %llu\n",
+				    "Inode %llu has non zero tree depth in leaf block %llu\n",
 				    inode->i_ino,
 				    (unsigned long long)eb_bh->b_blocknr);
 			ret = -EROFS;
@@ -437,7 +437,7 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
 
 	if (le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count)) {
 		ocfs2_error(inode->i_sb,
-			    "Inode %lu has an invalid extent (next_free_rec %u, count %u)\n",
+			    "Inode %llu has an invalid extent (next_free_rec %u, count %u)\n",
 			    inode->i_ino,
 			    le16_to_cpu(el->l_next_free_rec),
 			    le16_to_cpu(el->l_count));
@@ -472,7 +472,7 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
 
 	if (!rec->e_blkno) {
 		ocfs2_error(inode->i_sb,
-			    "Inode %lu has bad extent record (%u, %u, 0)\n",
+			    "Inode %llu has bad extent record (%u, %u, 0)\n",
 			    inode->i_ino,
 			    le32_to_cpu(rec->e_cpos),
 			    ocfs2_rec_clusters(el, rec));
@@ -561,7 +561,7 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 
 		if (el->l_tree_depth) {
 			ocfs2_error(inode->i_sb,
-				    "Inode %lu has non zero tree depth in xattr leaf block %llu\n",
+				    "Inode %llu has non zero tree depth in xattr leaf block %llu\n",
 				    inode->i_ino,
 				    (unsigned long long)eb_bh->b_blocknr);
 			ret = -EROFS;
@@ -580,7 +580,7 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 
 		if (!rec->e_blkno) {
 			ocfs2_error(inode->i_sb,
-				    "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
+				    "Inode %llu has bad extent record (%u, %u, 0) in xattr\n",
 				    inode->i_ino,
 				    le32_to_cpu(rec->e_cpos),
 				    ocfs2_rec_clusters(el, rec));
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 03a51662ea8e..26025ba2656c 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1196,7 +1196,7 @@ static void ocfs2_clear_inode(struct inode *inode)
 				inode->i_nlink);
 
 	mlog_bug_on_msg(osb == NULL,
-			"Inode=%lu\n", inode->i_ino);
+			"Inode=%llu\n", inode->i_ino);
 
 	dquot_drop(inode);
 
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index c4e0117d8977..269b0f27d567 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -471,7 +471,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
 	qsize_t spacechange, inodechange;
 	unsigned int memalloc;
 
-	trace_ocfs2_recover_local_quota_file((unsigned long)lqinode->i_ino, type);
+	trace_ocfs2_recover_local_quota_file(lqinode->i_ino, type);
 
 	list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) {
 		chunk = rchunk->rc_chunk;
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index c1cdececdfa4..6d7f44d3e929 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2341,7 +2341,7 @@ static int ocfs2_mark_extent_refcounted(struct inode *inode,
 					   cpos, len, phys);
 
 	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
-		ret = ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n",
+		ret = ocfs2_error(inode->i_sb, "Inode %llu want to use refcount tree, but the feature bit is not set in the super block\n",
 				  inode->i_ino);
 		goto out;
 	}
@@ -2524,7 +2524,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
 	u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno);
 
 	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
-		ret = ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n",
+		ret = ocfs2_error(inode->i_sb, "Inode %llu want to use refcount tree, but the feature bit is not set in the super block\n",
 				  inode->i_ino);
 		goto out;
 	}
@@ -2650,7 +2650,7 @@ static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
 
 		if (el->l_tree_depth) {
 			ret = ocfs2_error(inode->i_sb,
-					  "Inode %lu has non zero tree depth in leaf block %llu\n",
+					  "Inode %llu has non zero tree depth in leaf block %llu\n",
 					  inode->i_ino,
 					  (unsigned long long)eb_bh->b_blocknr);
 			goto out;
@@ -2662,7 +2662,7 @@ static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
 		rec = &el->l_recs[i];
 
 		if (ocfs2_is_empty_extent(rec)) {
-			mlog_bug_on_msg(i != 0, "Inode %lu has empty record in "
+			mlog_bug_on_msg(i != 0, "Inode %llu has empty record in "
 					"index %d\n", inode->i_ino, i);
 			continue;
 		}
@@ -3325,7 +3325,7 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
 	if (!ocfs2_refcount_tree(osb)) {
-		return ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n",
+		return ocfs2_error(inode->i_sb, "Inode %llu want to use refcount tree, but the feature bit is not set in the super block\n",
 				   inode->i_ino);
 	}
 
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 42ee5db362d3..4d55ad963ac5 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3741,7 +3741,7 @@ static int ocfs2_xattr_get_rec(struct inode *inode,
 
 		if (el->l_tree_depth) {
 			ret = ocfs2_error(inode->i_sb,
-					  "Inode %lu has non zero tree depth in xattr tree block %llu\n",
+					  "Inode %llu has non zero tree depth in xattr tree block %llu\n",
 					  inode->i_ino,
 					  (unsigned long long)eb_bh->b_blocknr);
 			goto out;
@@ -3758,7 +3758,7 @@ static int ocfs2_xattr_get_rec(struct inode *inode,
 	}
 
 	if (!e_blkno) {
-		ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
+		ret = ocfs2_error(inode->i_sb, "Inode %llu has bad extent record (%u, %u, 0) in xattr\n",
 				  inode->i_ino,
 				  le32_to_cpu(rec->e_cpos),
 				  ocfs2_rec_clusters(el, rec));
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 2d4710d0e05e..9e8a2a9e5229 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -1062,7 +1062,7 @@ struct inode *orangefs_iget(struct super_block *sb,
 	unlock_new_inode(inode);
 
 	gossip_debug(GOSSIP_INODE_DEBUG,
-		     "iget handle %pU, fsid %d hash %ld i_ino %lu\n",
+		     "iget handle %pU, fsid %d hash %ld i_ino %llu\n",
 		     &ref->khandle,
 		     ref->fs_id,
 		     hash,
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 83f80fdb1567..0a35d1a20f13 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -262,7 +262,7 @@ out:
 	return err;
 
 fail:
-	pr_warn_ratelimited("failed to encode file handle (ino=%lu, err=%i)\n",
+	pr_warn_ratelimited("failed to encode file handle (ino=%llu, err=%i)\n",
 			    inode->i_ino, err);
 	goto out;
 }
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index d8dd4b052984..ca899fdfaafd 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -591,7 +591,7 @@ out:
 
 fail:
 	inode = d_inode(real);
-	pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
+	pr_warn_ratelimited("failed to verify %s (%pd2, ino=%llu, err=%i)\n",
 			    is_upper ? "upper" : "origin", real,
 			    inode ? inode->i_ino : 0, err);
 	goto out;
@@ -831,7 +831,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
 			index = NULL;
 			goto out;
 		}
-		pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
+		pr_warn_ratelimited("failed inode index lookup (ino=%llu, key=%.*s, err=%i);\n"
 				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
 				    d_inode(origin)->i_ino, name.len, name.name,
 				    err);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 3f1b763a8bb4..2edad9a14648 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -1092,7 +1092,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
 	got_write = true;
 	inode = d_inode(upperdentry);
 	if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
-		pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
+		pr_warn_ratelimited("cleanup linked index (%pd2, ino=%llu, nlink=%u)\n",
 				    upperdentry, inode->i_ino, inode->i_nlink);
 		/*
 		 * We either have a bug with persistent union nlink or a lower
diff --git a/fs/pipe.c b/fs/pipe.c
index b44a756c0b41..9841648c9cf3 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -873,7 +873,7 @@ static struct vfsmount *pipe_mnt __ro_after_init;
  */
 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
 {
-	return dynamic_dname(buffer, buflen, "pipe:[%lu]",
+	return dynamic_dname(buffer, buflen, "pipe:[%llu]",
 				d_inode(dentry)->i_ino);
 }
 
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 9eeccff49b2a..aae1a83e8846 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -54,7 +54,7 @@ static int seq_show(struct seq_file *m, void *v)
 	if (ret)
 		return ret;
 
-	seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\nino:\t%lu\n",
+	seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\nino:\t%llu\n",
 		   (long long)file->f_pos, f_flags,
 		   real_mount(file->f_path.mnt)->mnt_id,
 		   file_inode(file)->i_ino);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e091931d7ca1..751b9ba160fb 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -442,7 +442,7 @@ static void get_vma_name(struct vm_area_struct *vma,
 static void show_vma_header_prefix(struct seq_file *m,
 				   unsigned long start, unsigned long end,
 				   vm_flags_t flags, unsigned long long pgoff,
-				   dev_t dev, unsigned long ino)
+				   dev_t dev, u64 ino)
 {
 	seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
 	seq_put_hex_ll(m, NULL, start, 8);
@@ -465,7 +465,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 	const struct path *path;
 	const char *name_fmt, *name;
 	vm_flags_t flags = vma->vm_flags;
-	unsigned long ino = 0;
+	u64 ino = 0;
 	unsigned long long pgoff = 0;
 	unsigned long start, end;
 	dev_t dev = 0;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 8aeb63d397cf..4deb0eeadbde 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -62,7 +62,7 @@ static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_h
 {
 	unsigned long phys;
 
-	QNX4DEBUG((KERN_INFO "qnx4: qnx4_get_block inode=[%ld] iblock=[%ld]\n",inode->i_ino,iblock));
+	QNX4DEBUG((KERN_INFO "qnx4: qnx4_get_block inode=[%llu] iblock=[%ld]\n", inode->i_ino, iblock));
 
 	phys = qnx4_block_map( inode, iblock );
 	if ( phys ) {
@@ -128,7 +128,7 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
 			brelse( bh );
 	}
 
-	QNX4DEBUG((KERN_INFO "qnx4: mapping block %ld of inode %ld = %ld\n",iblock,inode->i_ino,block));
+	QNX4DEBUG((KERN_INFO "qnx4: mapping block %ld of inode %llu = %ld\n", iblock, inode->i_ino, block));
 	return block;
 }
 
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index c4049bb8bd60..6de49333acad 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -75,7 +75,7 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
 {
 	unsigned phys;
 
-	pr_debug("qnx6_get_block inode=[%ld] iblock=[%ld]\n",
+	pr_debug("qnx6_get_block inode=[%llu] iblock=[%ld]\n",
 		 inode->i_ino, (unsigned long)iblock);
 
 	phys = qnx6_block_map(inode, iblock);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 160c16aa7b6e..5794de5a9069 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -230,7 +230,7 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
 	int count = 2;
 
 	pr_err("Dump in-memory inode:");
-	pr_err("\tinode          %lu\n", inode->i_ino);
+	pr_err("\tinode          %llu\n", inode->i_ino);
 	pr_err("\tsize           %llu\n",
 	       (unsigned long long)i_size_read(inode));
 	pr_err("\tnlink          %u\n", inode->i_nlink);
@@ -1101,7 +1101,7 @@ int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode)
 	if (ui->ui_size != ui->synced_i_size && !ui->dirty) {
 		ubifs_err(c, "ui_size is %lld, synced_i_size is %lld, but inode is clean",
 			  ui->ui_size, ui->synced_i_size);
-		ubifs_err(c, "i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino,
+		ubifs_err(c, "i_ino %llu, i_mode %#x, i_size %lld", inode->i_ino,
 			  inode->i_mode, i_size_read(inode));
 		dump_stack();
 		err = -EINVAL;
@@ -1163,7 +1163,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
 	kfree(pdent);
 
 	if (i_size_read(dir) != size) {
-		ubifs_err(c, "directory inode %lu has size %llu, but calculated size is %llu",
+		ubifs_err(c, "directory inode %llu has size %llu, but calculated size is %llu",
 			  dir->i_ino, (unsigned long long)i_size_read(dir),
 			  (unsigned long long)size);
 		ubifs_dump_inode(c, dir);
@@ -1171,7 +1171,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
 		return -EINVAL;
 	}
 	if (dir->i_nlink != nlink) {
-		ubifs_err(c, "directory inode %lu has nlink %u, but calculated nlink is %u",
+		ubifs_err(c, "directory inode %llu has nlink %u, but calculated nlink is %u",
 			  dir->i_ino, dir->i_nlink, nlink);
 		ubifs_dump_inode(c, dir);
 		dump_stack();
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 4c9f57f3b2ad..86d41e077e4d 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -223,7 +223,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
 	struct ubifs_info *c = dir->i_sb->s_fs_info;
 	struct fscrypt_name nm;
 
-	dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino);
+	dbg_gen("'%pd' in dir ino %llu", dentry, dir->i_ino);
 
 	err = fscrypt_prepare_lookup(dir, dentry, &nm);
 	if (err == -ENOENT)
@@ -281,7 +281,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
 	if (IS_ENCRYPTED(dir) &&
 	    (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
 	    !fscrypt_has_permitted_context(dir, inode)) {
-		ubifs_warn(c, "Inconsistent encryption contexts: %lu/%lu",
+		ubifs_warn(c, "Inconsistent encryption contexts: %llu/%llu",
 			   dir->i_ino, inode->i_ino);
 		iput(inode);
 		inode = ERR_PTR(-EPERM);
@@ -318,7 +318,7 @@ static int ubifs_create(struct mnt_idmap *idmap, struct inode *dir,
 	 * parent directory inode.
 	 */
 
-	dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
+	dbg_gen("dent '%pd', mode %#hx in dir ino %llu",
 		dentry, mode, dir->i_ino);
 
 	err = ubifs_budget_space(c, &req);
@@ -386,7 +386,7 @@ static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry)
 	 * atomically.
 	 */
 
-	dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
+	dbg_gen("dent '%pd', mode %#hx in dir ino %llu",
 		dentry, mode, dir->i_ino);
 
 	inode = ubifs_new_inode(c, dir, mode, false);
@@ -460,7 +460,7 @@ static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
 	 * be released via writeback.
 	 */
 
-	dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
+	dbg_gen("dent '%pd', mode %#hx in dir ino %llu",
 		dentry, mode, dir->i_ino);
 
 	err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm);
@@ -589,7 +589,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
 	bool encrypted = IS_ENCRYPTED(dir);
 	struct ubifs_dir_data *data = file->private_data;
 
-	dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos);
+	dbg_gen("dir ino %llu, f_pos %#llx", dir->i_ino, ctx->pos);
 
 	if (ctx->pos > UBIFS_S_KEY_HASH_MASK || ctx->pos == 2)
 		/*
@@ -764,7 +764,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
 	 * changing the parent inode.
 	 */
 
-	dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu",
+	dbg_gen("dent '%pd' to ino %llu (nlink %d) in dir ino %llu",
 		dentry, inode->i_ino,
 		inode->i_nlink, dir->i_ino);
 	ubifs_assert(c, inode_is_locked(dir));
@@ -836,7 +836,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
 	 * deletions.
 	 */
 
-	dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu",
+	dbg_gen("dent '%pd' from ino %llu (nlink %d) in dir ino %llu",
 		dentry, inode->i_ino,
 		inode->i_nlink, dir->i_ino);
 
@@ -941,7 +941,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
 	 * because we have extra space reserved for deletions.
 	 */
 
-	dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry,
+	dbg_gen("directory '%pd', ino %llu in dir ino %llu", dentry,
 		inode->i_ino, dir->i_ino);
 	ubifs_assert(c, inode_is_locked(dir));
 	ubifs_assert(c, inode_is_locked(inode));
@@ -1018,7 +1018,7 @@ static struct dentry *ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 	 * directory inode.
 	 */
 
-	dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
+	dbg_gen("dent '%pd', mode %#hx in dir ino %llu",
 		dentry, mode, dir->i_ino);
 
 	err = ubifs_budget_space(c, &req);
@@ -1096,7 +1096,7 @@ static int ubifs_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	 * directory inode.
 	 */
 
-	dbg_gen("dent '%pd' in dir ino %lu", dentry, dir->i_ino);
+	dbg_gen("dent '%pd' in dir ino %llu", dentry, dir->i_ino);
 
 	if (S_ISBLK(mode) || S_ISCHR(mode)) {
 		dev = kmalloc_obj(union ubifs_dev_desc, GFP_NOFS);
@@ -1183,7 +1183,7 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir,
 					.dirtied_ino = 1 };
 	struct fscrypt_name nm;
 
-	dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry,
+	dbg_gen("dent '%pd', target '%s' in dir ino %llu", dentry,
 		symname, dir->i_ino);
 
 	err = fscrypt_prepare_symlink(dir, symname, len, UBIFS_MAX_INO_DATA,
@@ -1349,7 +1349,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 *   ino_req: marks the target inode as dirty and does not write it.
 	 */
 
-	dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu flags 0x%x",
+	dbg_gen("dent '%pd' ino %llu in dir ino %llu to dent '%pd' in dir ino %llu flags 0x%x",
 		old_dentry, old_inode->i_ino, old_dir->i_ino,
 		new_dentry, new_dir->i_ino, flags);
 
@@ -1597,7 +1597,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
 	 * parent directory inodes.
 	 */
 
-	dbg_gen("dent '%pd' ino %lu in dir ino %lu exchange dent '%pd' ino %lu in dir ino %lu",
+	dbg_gen("dent '%pd' ino %llu in dir ino %llu exchange dent '%pd' ino %llu in dir ino %llu",
 		old_dentry, fst_inode->i_ino, old_dir->i_ino,
 		new_dentry, snd_inode->i_ino, new_dir->i_ino);
 
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index cd04755e792a..e73c28b12f97 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -90,7 +90,7 @@ static int read_block(struct inode *inode, struct folio *folio, size_t offset,
 	return 0;
 
 dump:
-	ubifs_err(c, "bad data node (block %u, inode %lu)",
+	ubifs_err(c, "bad data node (block %u, inode %llu)",
 		  block, inode->i_ino);
 	ubifs_dump_node(c, dn, UBIFS_MAX_DATA_NODE_SZ);
 	return -EINVAL;
@@ -106,7 +106,7 @@ static int do_readpage(struct folio *folio)
 	loff_t i_size = i_size_read(inode);
 	size_t offset = 0;
 
-	dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
+	dbg_gen("ino %llu, pg %lu, i_size %lld, flags %#lx",
 		inode->i_ino, folio->index, i_size, folio->flags.f);
 	ubifs_assert(c, !folio_test_checked(folio));
 	ubifs_assert(c, !folio->private);
@@ -162,7 +162,7 @@ static int do_readpage(struct folio *folio)
 			dbg_gen("hole");
 			err = 0;
 		} else {
-			ubifs_err(c, "cannot read page %lu of inode %lu, error %d",
+			ubifs_err(c, "cannot read page %lu of inode %llu, error %d",
 				  folio->index, inode->i_ino, err);
 		}
 	}
@@ -212,7 +212,7 @@ static int write_begin_slow(struct address_space *mapping,
 	int err, appending = !!(pos + len > inode->i_size);
 	struct folio *folio;
 
-	dbg_gen("ino %lu, pos %llu, len %u, i_size %lld",
+	dbg_gen("ino %llu, pos %llu, len %u, i_size %lld",
 		inode->i_ino, pos, len, inode->i_size);
 
 	/*
@@ -526,7 +526,7 @@ static int ubifs_write_end(const struct kiocb *iocb,
 	loff_t end_pos = pos + len;
 	int appending = !!(end_pos > inode->i_size);
 
-	dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld",
+	dbg_gen("ino %llu, pos %llu, pg %lu, len %u, copied %d, i_size %lld",
 		inode->i_ino, pos, folio->index, len, copied, inode->i_size);
 
 	if (unlikely(copied < len && !folio_test_uptodate(folio))) {
@@ -599,7 +599,7 @@ static int populate_page(struct ubifs_info *c, struct folio *folio,
 	size_t offset = 0;
 	pgoff_t end_index;
 
-	dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
+	dbg_gen("ino %llu, pg %lu, i_size %lld, flags %#lx",
 		inode->i_ino, folio->index, i_size, folio->flags.f);
 
 	end_index = (i_size - 1) >> PAGE_SHIFT;
@@ -680,7 +680,7 @@ out_hole:
 	return 0;
 
 out_err:
-	ubifs_err(c, "bad data node (block %u, inode %lu)",
+	ubifs_err(c, "bad data node (block %u, inode %llu)",
 		  page_block, inode->i_ino);
 	return -EINVAL;
 }
@@ -913,7 +913,7 @@ static int do_writepage(struct folio *folio, size_t len)
 	}
 	if (err) {
 		mapping_set_error(folio->mapping, err);
-		ubifs_err(c, "cannot write folio %lu of inode %lu, error %d",
+		ubifs_err(c, "cannot write folio %lu of inode %llu, error %d",
 			  folio->index, inode->i_ino, err);
 		ubifs_ro_mode(c, err);
 	}
@@ -987,7 +987,7 @@ static int ubifs_writepage(struct folio *folio, struct writeback_control *wbc)
 	loff_t i_size =  i_size_read(inode), synced_i_size;
 	int err, len = folio_size(folio);
 
-	dbg_gen("ino %lu, pg %lu, pg flags %#lx",
+	dbg_gen("ino %llu, pg %lu, pg flags %#lx",
 		inode->i_ino, folio->index, folio->flags.f);
 	ubifs_assert(c, folio->private != NULL);
 
@@ -1106,7 +1106,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
 	int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1;
 	struct ubifs_inode *ui = ubifs_inode(inode);
 
-	dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size);
+	dbg_gen("ino %llu, size %lld -> %lld", inode->i_ino, old_size, new_size);
 	memset(&req, 0, sizeof(struct ubifs_budget_req));
 
 	/*
@@ -1258,7 +1258,7 @@ int ubifs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 	struct inode *inode = d_inode(dentry);
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
 
-	dbg_gen("ino %lu, mode %#x, ia_valid %#x",
+	dbg_gen("ino %llu, mode %#x, ia_valid %#x",
 		inode->i_ino, inode->i_mode, attr->ia_valid);
 	err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
 	if (err)
@@ -1308,7 +1308,7 @@ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
 	int err;
 
-	dbg_gen("syncing inode %lu", inode->i_ino);
+	dbg_gen("syncing inode %llu", inode->i_ino);
 
 	if (c->ro_mount)
 		/*
@@ -1495,7 +1495,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
 	struct ubifs_budget_req req = { .new_page = 1 };
 	int err, update_time;
 
-	dbg_gen("ino %lu, pg %lu, i_size %lld",	inode->i_ino, folio->index,
+	dbg_gen("ino %llu, pg %lu, i_size %lld",	inode->i_ino, folio->index,
 		i_size_read(inode));
 	ubifs_assert(c, !c->ro_media && !c->ro_mount);
 
@@ -1531,7 +1531,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
 	err = ubifs_budget_space(c, &req);
 	if (unlikely(err)) {
 		if (err == -ENOSPC)
-			ubifs_warn(c, "out of space for mmapped file (inode number %lu)",
+			ubifs_warn(c, "out of space for mmapped file (inode number %llu)",
 				   inode->i_ino);
 		return VM_FAULT_SIGBUS;
 	}
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index e28ab4395e5c..40a95a2fad50 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -982,7 +982,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
 	int kill_xattrs = ui->xattr_cnt && last_reference;
 	u8 hash[UBIFS_HASH_ARR_SZ];
 
-	dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink);
+	dbg_jnl("ino %llu, nlink %u", inode->i_ino, inode->i_nlink);
 
 	if (kill_xattrs && ui->xattr_cnt > ubifs_xattr_max_cnt(c)) {
 		ubifs_err(c, "Cannot delete inode, it has too many xattrs!");
@@ -1743,7 +1743,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
 			int dn_len = le32_to_cpu(dn->size);
 
 			if (dn_len <= 0 || dn_len > UBIFS_BLOCK_SIZE) {
-				ubifs_err(c, "bad data node (block %u, inode %lu)",
+				ubifs_err(c, "bad data node (block %u, inode %llu)",
 					  blk, inode->i_ino);
 				ubifs_dump_node(c, dn, dn_size);
 				err = -EUCLEAN;
@@ -1987,7 +1987,7 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
 	u8 hash_host[UBIFS_HASH_ARR_SZ];
 	u8 hash[UBIFS_HASH_ARR_SZ];
 
-	dbg_jnl("ino %lu, ino %lu", host->i_ino, inode->i_ino);
+	dbg_jnl("ino %llu, ino %llu", host->i_ino, inode->i_ino);
 	ubifs_assert(c, inode->i_nlink > 0);
 	ubifs_assert(c, mutex_is_locked(&host_ui->ui_mutex));
 
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 03bf924756ca..9a77d8b64ffa 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -92,7 +92,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
 		return 5;
 
 	if (!ubifs_compr_present(c, ui->compr_type)) {
-		ubifs_warn(c, "inode %lu uses '%s' compression, but it was not compiled in",
+		ubifs_warn(c, "inode %llu uses '%s' compression, but it was not compiled in",
 			   inode->i_ino, ubifs_compr_name(c, ui->compr_type));
 	}
 
@@ -248,14 +248,14 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
 	return inode;
 
 out_invalid:
-	ubifs_err(c, "inode %lu validation failed, error %d", inode->i_ino, err);
+	ubifs_err(c, "inode %llu validation failed, error %d", inode->i_ino, err);
 	ubifs_dump_node(c, ino, UBIFS_MAX_INO_NODE_SZ);
 	ubifs_dump_inode(c, inode);
 	err = -EINVAL;
 out_ino:
 	kfree(ino);
 out:
-	ubifs_err(c, "failed to read inode %lu, error %d", inode->i_ino, err);
+	ubifs_err(c, "failed to read inode %llu, error %d", inode->i_ino, err);
 	iget_failed(inode);
 	return ERR_PTR(err);
 }
@@ -316,12 +316,12 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	 * As an optimization, do not write orphan inodes to the media just
 	 * because this is not needed.
 	 */
-	dbg_gen("inode %lu, mode %#x, nlink %u",
+	dbg_gen("inode %llu, mode %#x, nlink %u",
 		inode->i_ino, (int)inode->i_mode, inode->i_nlink);
 	if (inode->i_nlink) {
 		err = ubifs_jnl_write_inode(c, inode);
 		if (err)
-			ubifs_err(c, "can't write inode %lu, error %d",
+			ubifs_err(c, "can't write inode %llu, error %d",
 				  inode->i_ino, err);
 		else
 			err = dbg_check_inode_size(c, inode, ui->ui_size);
@@ -357,7 +357,7 @@ static void ubifs_evict_inode(struct inode *inode)
 		 */
 		goto out;
 
-	dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
+	dbg_gen("inode %llu, mode %#x", inode->i_ino, (int)inode->i_mode);
 	ubifs_assert(c, !icount_read(inode));
 
 	truncate_inode_pages_final(&inode->i_data);
@@ -375,7 +375,7 @@ static void ubifs_evict_inode(struct inode *inode)
 		 * Worst case we have a lost orphan inode wasting space, so a
 		 * simple error message is OK here.
 		 */
-		ubifs_err(c, "can't delete inode %lu, error %d",
+		ubifs_err(c, "can't delete inode %llu, error %d",
 			  inode->i_ino, err);
 
 out:
@@ -399,7 +399,7 @@ static void ubifs_dirty_inode(struct inode *inode, int flags)
 	ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
 	if (!ui->dirty) {
 		ui->dirty = 1;
-		dbg_gen("inode %lu",  inode->i_ino);
+		dbg_gen("inode %llu",  inode->i_ino);
 	}
 }
 
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 694b08d27d7d..c9d8935f6678 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -3561,8 +3561,8 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
 
 out_dump:
 	block = key_block(c, key);
-	ubifs_err(c, "inode %lu has size %lld, but there are data at offset %lld",
-		  (unsigned long)inode->i_ino, size,
+	ubifs_err(c, "inode %llu has size %lld, but there are data at offset %lld",
+		  inode->i_ino, size,
 		  ((loff_t)block) << UBIFS_BLOCK_SHIFT);
 	mutex_unlock(&c->tnc_mutex);
 	ubifs_dump_inode(c, inode);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index c21a0c2b3e90..b5a9ab9d8a10 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -76,7 +76,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
 				.dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
 
 	if (host_ui->xattr_cnt >= ubifs_xattr_max_cnt(c)) {
-		ubifs_err(c, "inode %lu already has too many xattrs (%d), cannot create more",
+		ubifs_err(c, "inode %llu already has too many xattrs (%d), cannot create more",
 			  host->i_ino, host_ui->xattr_cnt);
 		return -ENOSPC;
 	}
@@ -88,7 +88,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
 	 */
 	names_len = host_ui->xattr_names + host_ui->xattr_cnt + fname_len(nm) + 1;
 	if (names_len > XATTR_LIST_MAX) {
-		ubifs_err(c, "cannot add one more xattr name to inode %lu, total names length would become %d, max. is %d",
+		ubifs_err(c, "cannot add one more xattr name to inode %llu, total names length would become %d, max. is %d",
 			  host->i_ino, names_len, XATTR_LIST_MAX);
 		return -ENOSPC;
 	}
@@ -390,7 +390,7 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 	int err, len, written = 0;
 	struct fscrypt_name nm = {0};
 
-	dbg_gen("ino %lu ('%pd'), buffer size %zd", host->i_ino,
+	dbg_gen("ino %llu ('%pd'), buffer size %zd", host->i_ino,
 		dentry, size);
 
 	down_read(&host_ui->xattr_sem);
@@ -498,7 +498,7 @@ int ubifs_purge_xattrs(struct inode *host)
 	if (ubifs_inode(host)->xattr_cnt <= ubifs_xattr_max_cnt(c))
 		return 0;
 
-	ubifs_warn(c, "inode %lu has too many xattrs, doing a non-atomic deletion",
+	ubifs_warn(c, "inode %llu has too many xattrs, doing a non-atomic deletion",
 		   host->i_ino);
 
 	down_write(&ubifs_inode(host)->xattr_sem);
@@ -641,7 +641,7 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode,
 					   &init_xattrs, NULL);
 	if (err) {
 		struct ubifs_info *c = dentry->i_sb->s_fs_info;
-		ubifs_err(c, "cannot initialize security for inode %lu, error %d",
+		ubifs_err(c, "cannot initialize security for inode %llu, error %d",
 			  inode->i_ino, err);
 	}
 	return err;
@@ -652,7 +652,7 @@ static int xattr_get(const struct xattr_handler *handler,
 			   struct dentry *dentry, struct inode *inode,
 			   const char *name, void *buffer, size_t size)
 {
-	dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name,
+	dbg_gen("xattr '%s', ino %llu ('%pd'), buf size %zd", name,
 		inode->i_ino, dentry, size);
 
 	name = xattr_full_name(handler, name);
@@ -665,7 +665,7 @@ static int xattr_set(const struct xattr_handler *handler,
 			   const char *name, const void *value,
 			   size_t size, int flags)
 {
-	dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd",
+	dbg_gen("xattr '%s', host ino %llu ('%pd'), size %zd",
 		name, inode->i_ino, dentry, size);
 
 	name = xattr_full_name(handler, name);
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 632453aa3893..f5c81e13eacb 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -22,7 +22,7 @@ static int udf_verify_fi(struct udf_fileident_iter *iter)
 
 	if (iter->fi.descTag.tagIdent != cpu_to_le16(TAG_IDENT_FID)) {
 		udf_err(iter->dir->i_sb,
-			"directory (ino %lu) has entry at pos %llu with incorrect tag %x\n",
+			"directory (ino %llu) has entry at pos %llu with incorrect tag %x\n",
 			iter->dir->i_ino, (unsigned long long)iter->pos,
 			le16_to_cpu(iter->fi.descTag.tagIdent));
 		return -EFSCORRUPTED;
@@ -30,7 +30,7 @@ static int udf_verify_fi(struct udf_fileident_iter *iter)
 	len = udf_dir_entry_len(&iter->fi);
 	if (le16_to_cpu(iter->fi.lengthOfImpUse) & 3) {
 		udf_err(iter->dir->i_sb,
-			"directory (ino %lu) has entry at pos %llu with unaligned length of impUse field\n",
+			"directory (ino %llu) has entry at pos %llu with unaligned length of impUse field\n",
 			iter->dir->i_ino, (unsigned long long)iter->pos);
 		return -EFSCORRUPTED;
 	}
@@ -41,20 +41,20 @@ static int udf_verify_fi(struct udf_fileident_iter *iter)
 	 */
 	if (len > 1 << iter->dir->i_blkbits) {
 		udf_err(iter->dir->i_sb,
-			"directory (ino %lu) has too big (%u) entry at pos %llu\n",
+			"directory (ino %llu) has too big (%u) entry at pos %llu\n",
 			iter->dir->i_ino, len, (unsigned long long)iter->pos);
 		return -EFSCORRUPTED;
 	}
 	if (iter->pos + len > iter->dir->i_size) {
 		udf_err(iter->dir->i_sb,
-			"directory (ino %lu) has entry past directory size at pos %llu\n",
+			"directory (ino %llu) has entry past directory size at pos %llu\n",
 			iter->dir->i_ino, (unsigned long long)iter->pos);
 		return -EFSCORRUPTED;
 	}
 	if (udf_dir_entry_len(&iter->fi) !=
 	    sizeof(struct tag) + le16_to_cpu(iter->fi.descTag.descCRCLength)) {
 		udf_err(iter->dir->i_sb,
-			"directory (ino %lu) has entry where CRC length (%u) does not match entry length (%u)\n",
+			"directory (ino %llu) has entry where CRC length (%u) does not match entry length (%u)\n",
 			iter->dir->i_ino,
 			(unsigned)le16_to_cpu(iter->fi.descTag.descCRCLength),
 			(unsigned)(udf_dir_entry_len(&iter->fi) -
@@ -78,7 +78,7 @@ static int udf_copy_fi(struct udf_fileident_iter *iter)
 	}
 	if (iter->dir->i_size < iter->pos + sizeof(struct fileIdentDesc)) {
 		udf_err(iter->dir->i_sb,
-			"directory (ino %lu) has entry straddling EOF\n",
+			"directory (ino %llu) has entry straddling EOF\n",
 			iter->dir->i_ino);
 		return -EFSCORRUPTED;
 	}
@@ -184,7 +184,7 @@ static int udf_fiiter_advance_blk(struct udf_fileident_iter *iter)
 			return 0;
 		}
 		udf_err(iter->dir->i_sb,
-			"extent after position %llu not allocated in directory (ino %lu)\n",
+			"extent after position %llu not allocated in directory (ino %llu)\n",
 			(unsigned long long)iter->pos, iter->dir->i_ino);
 		return -EFSCORRUPTED;
 	}
@@ -272,7 +272,7 @@ int udf_fiiter_init(struct udf_fileident_iter *iter, struct inode *dir,
 		if (pos == dir->i_size)
 			return 0;
 		udf_err(dir->i_sb,
-			"position %llu not allocated in directory (ino %lu)\n",
+			"position %llu not allocated in directory (ino %llu)\n",
 			(unsigned long long)pos, dir->i_ino);
 		err = -EFSCORRUPTED;
 		goto out;
@@ -483,7 +483,7 @@ int udf_fiiter_append_blk(struct udf_fileident_iter *iter)
 		   &iter->loffset, &etype);
 	if (err <= 0 || etype != (EXT_RECORDED_ALLOCATED >> 30)) {
 		udf_err(iter->dir->i_sb,
-			"block %llu not allocated in directory (ino %lu)\n",
+			"block %llu not allocated in directory (ino %llu)\n",
 			(unsigned long long)block, iter->dir->i_ino);
 		return -EFSCORRUPTED;
 	}
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 32ae7cfd72c5..b043fe10e5d6 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -133,7 +133,7 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	int result;
 
 	if (file_permission(filp, MAY_READ) != 0) {
-		udf_debug("no permission to access inode %lu\n", inode->i_ino);
+		udf_debug("no permission to access inode %llu\n", inode->i_ino);
 		return -EPERM;
 	}
 
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 7fae8002344a..902f81729bd8 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -147,7 +147,7 @@ void udf_evict_inode(struct inode *inode)
 		if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
 		    inode->i_size != iinfo->i_lenExtents) {
 			udf_warn(inode->i_sb,
-				 "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
+				 "Inode %llu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
 				 inode->i_ino, inode->i_mode,
 				 (unsigned long long)inode->i_size,
 				 (unsigned long long)iinfo->i_lenExtents);
@@ -1386,13 +1386,13 @@ reread:
 	 */
 	bh = udf_read_ptagged(inode->i_sb, iloc, 0, &ident);
 	if (!bh) {
-		udf_err(inode->i_sb, "(ino %lu) failed !bh\n", inode->i_ino);
+		udf_err(inode->i_sb, "(ino %llu) failed !bh\n", inode->i_ino);
 		return -EIO;
 	}
 
 	if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE &&
 	    ident != TAG_IDENT_USE) {
-		udf_err(inode->i_sb, "(ino %lu) failed ident=%u\n",
+		udf_err(inode->i_sb, "(ino %llu) failed ident=%u\n",
 			inode->i_ino, ident);
 		goto out;
 	}
@@ -1641,7 +1641,7 @@ reread:
 		udf_debug("METADATA BITMAP FILE-----\n");
 		break;
 	default:
-		udf_err(inode->i_sb, "(ino %lu) failed unknown file type=%u\n",
+		udf_err(inode->i_sb, "(ino %llu) failed unknown file type=%u\n",
 			inode->i_ino, fe->icbTag.fileType);
 		goto out;
 	}
@@ -1942,7 +1942,7 @@ finish:
 	if (do_sync) {
 		sync_dirty_buffer(bh);
 		if (buffer_write_io_error(bh)) {
-			udf_warn(inode->i_sb, "IO error syncing udf inode [%08lx]\n",
+			udf_warn(inode->i_sb, "IO error syncing udf inode [%08llx]\n",
 				 inode->i_ino);
 			err = -EIO;
 		}
@@ -2224,7 +2224,7 @@ int udf_next_aext(struct inode *inode, struct extent_position *epos,
 
 		if (++indirections > UDF_MAX_INDIR_EXTS) {
 			udf_err(inode->i_sb,
-				"too many indirect extents in inode %lu\n",
+				"too many indirect extents in inode %llu\n",
 				inode->i_ino);
 			return -EFSCORRUPTED;
 		}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 5f2e9a892bff..ccafcaa96809 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -550,7 +550,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
 		goto end_unlink;
 
 	if (!inode->i_nlink) {
-		udf_debug("Deleting nonexistent file (%lu), %u\n",
+		udf_debug("Deleting nonexistent file (%llu), %u\n",
 			  inode->i_ino, inode->i_nlink);
 		set_nlink(inode, 1);
 	}
@@ -809,7 +809,7 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 					       &diriter);
 		if (retval == -ENOENT) {
 			udf_err(old_inode->i_sb,
-				"directory (ino %lu) has no '..' entry\n",
+				"directory (ino %llu) has no '..' entry\n",
 				old_inode->i_ino);
 			retval = -EFSCORRUPTED;
 		}
@@ -821,7 +821,7 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 				old_dir->i_ino) {
 			retval = -EFSCORRUPTED;
 			udf_err(old_inode->i_sb,
-				"directory (ino %lu) has parent entry pointing to another inode (%lu != %u)\n",
+				"directory (ino %llu) has parent entry pointing to another inode (%llu != %u)\n",
 				old_inode->i_ino, old_dir->i_ino,
 				udf_get_lb_pblock(old_inode->i_sb, &tloc, 0));
 			goto out_oiter;
@@ -869,7 +869,7 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 	retval = udf_fiiter_find_entry(old_dir, &old_dentry->d_name, &oiter);
 	if (retval) {
 		udf_err(old_dir->i_sb,
-			"failed to find renamed entry again in directory (ino %lu)\n",
+			"failed to find renamed entry again in directory (ino %llu)\n",
 			old_dir->i_ino);
 	} else {
 		udf_fiiter_delete_entry(&oiter);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 27f463fd1d89..3a2d66c7e856 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1166,7 +1166,7 @@ static int udf_fill_partdesc_info(struct super_block *sb,
 		}
 		map->s_uspace.s_table = inode;
 		map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE;
-		udf_debug("unallocSpaceTable (part %d) @ %lu\n",
+		udf_debug("unallocSpaceTable (part %d) @ %llu\n",
 			  p_index, map->s_uspace.s_table->i_ino);
 	}
 
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 194ed3ab945e..628edfde3a9f 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -245,7 +245,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
 	sector_t end, i;
 	struct buffer_head *head, *bh;
 
-	UFSD("ENTER, ino %lu, count %u, oldb %llu, newb %llu\n",
+	UFSD("ENTER, ino %llu, count %u, oldb %llu, newb %llu\n",
 	      inode->i_ino, count,
 	     (unsigned long long)oldb, (unsigned long long)newb);
 
@@ -340,7 +340,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
 	unsigned cgno, oldcount, newcount;
 	u64 tmp, request, result;
 	
-	UFSD("ENTER, ino %lu, fragment %llu, goal %llu, count %u\n",
+	UFSD("ENTER, ino %llu, fragment %llu, goal %llu, count %u\n",
 	     inode->i_ino, (unsigned long long)fragment,
 	     (unsigned long long)goal, count);
 	
@@ -583,7 +583,7 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
 	unsigned oldcg, i, j, k, allocsize;
 	u64 result;
 	
-	UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n",
+	UFSD("ENTER, ino %llu, cgno %u, goal %llu, count %u\n",
 	     inode->i_ino, cgno, (unsigned long long)goal, count);
 
 	sb = inode->i_sb;
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 43f1578ab866..f10a50f7e78b 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -150,7 +150,7 @@ out:
 
 Ebadsize:
 	ufs_error(sb, __func__,
-		  "size of directory #%lu is not a multiple of chunk size",
+		  "size of directory #%llu is not a multiple of chunk size",
 		  dir->i_ino
 	);
 	goto fail;
@@ -169,7 +169,7 @@ Espan:
 Einumber:
 	error = "inode out of bounds";
 bad_entry:
-	ufs_error(sb, __func__, "bad entry in directory #%lu: %s - "
+	ufs_error(sb, __func__, "bad entry in directory #%llu: %s - "
 		   "offset=%llu, rec_len=%d, name_len=%d",
 		   dir->i_ino, error, folio_pos(folio) + offs,
 		   rec_len, ufs_get_de_namlen(sb, p));
@@ -177,7 +177,7 @@ bad_entry:
 Eend:
 	p = (struct ufs_dir_entry *)(kaddr + offs);
 	ufs_error(sb, __func__,
-		   "entry in directory #%lu spans the page boundary"
+		   "entry in directory #%llu spans the page boundary"
 		   "offset=%llu",
 		   dir->i_ino, folio_pos(folio) + offs);
 fail:
@@ -258,7 +258,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,
 	struct ufs_inode_info *ui = UFS_I(dir);
 	struct ufs_dir_entry *de;
 
-	UFSD("ENTER, dir_ino %lu, name %s, namlen %u\n", dir->i_ino, name, namelen);
+	UFSD("ENTER, dir_ino %llu, name %s, namlen %u\n", dir->i_ino, name, namelen);
 
 	if (npages == 0 || namelen > UFS_MAXNAMLEN)
 		goto out;
@@ -434,7 +434,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
 
 		if (IS_ERR(kaddr)) {
 			ufs_error(sb, __func__,
-				  "bad page in #%lu",
+				  "bad page in #%llu",
 				  inode->i_ino);
 			ctx->pos += PAGE_SIZE - offset;
 			return PTR_ERR(kaddr);
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 73531827ecee..8e51f4630d18 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -63,7 +63,7 @@ void ufs_free_inode (struct inode * inode)
 	int is_directory;
 	unsigned ino, cg, bit;
 	
-	UFSD("ENTER, ino %lu\n", inode->i_ino);
+	UFSD("ENTER, ino %llu\n", inode->i_ino);
 
 	sb = inode->i_sb;
 	uspi = UFS_SB(sb)->s_uspi;
@@ -317,7 +317,7 @@ cg_found:
 		bh = sb_bread(sb, uspi->s_sbbase + ufs_inotofsba(inode->i_ino));
 		if (!bh) {
 			ufs_warning(sb, "ufs_read_inode",
-				    "unable to read inode %lu\n",
+				    "unable to read inode %llu\n",
 				    inode->i_ino);
 			err = -EIO;
 			goto fail_remove_inode;
@@ -336,7 +336,7 @@ cg_found:
 	}
 	mutex_unlock(&sbi->s_lock);
 
-	UFSD("allocating inode %lu\n", inode->i_ino);
+	UFSD("allocating inode %llu\n", inode->i_ino);
 	UFSD("EXIT\n");
 	return inode;
 
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e2b0a35de2a7..2a8728c87979 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -400,7 +400,7 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 
 	mutex_lock(&UFS_I(inode)->truncate_mutex);
 
-	UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
+	UFSD("ENTER, ino %llu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
 	if (unlikely(!depth)) {
 		ufs_warning(sb, "ufs_get_block", "block > big");
 		err = -EIO;
@@ -595,7 +595,7 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
 	struct super_block *sb = inode->i_sb;
 	umode_t mode;
 
-	UFSD("Reading ufs2 inode, ino %lu\n", inode->i_ino);
+	UFSD("Reading ufs2 inode, ino %llu\n", inode->i_ino);
 	/*
 	 * Copy data to the in-core inode.
 	 */
@@ -662,7 +662,7 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
 
 	bh = sb_bread(sb, uspi->s_sbbase + ufs_inotofsba(inode->i_ino));
 	if (!bh) {
-		ufs_warning(sb, "ufs_read_inode", "unable to read inode %lu\n",
+		ufs_warning(sb, "ufs_read_inode", "unable to read inode %llu\n",
 			    inode->i_ino);
 		goto bad_inode;
 	}
@@ -793,17 +793,17 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	struct buffer_head * bh;
 
-	UFSD("ENTER, ino %lu\n", inode->i_ino);
+	UFSD("ENTER, ino %llu\n", inode->i_ino);
 
 	if (inode->i_ino < UFS_ROOTINO ||
 	    inode->i_ino > (uspi->s_ncg * uspi->s_ipg)) {
-		ufs_warning (sb, "ufs_read_inode", "bad inode number (%lu)\n", inode->i_ino);
+		ufs_warning (sb, "ufs_read_inode", "bad inode number (%llu)\n", inode->i_ino);
 		return -1;
 	}
 
 	bh = sb_bread(sb, ufs_inotofsba(inode->i_ino));
 	if (!bh) {
-		ufs_warning (sb, "ufs_read_inode", "unable to read inode %lu\n", inode->i_ino);
+		ufs_warning (sb, "ufs_read_inode", "unable to read inode %llu\n", inode->i_ino);
 		return -1;
 	}
 	if (uspi->fs_magic == UFS2_MAGIC) {
@@ -891,7 +891,7 @@ static void ufs_trunc_direct(struct inode *inode)
 	unsigned int old_tail, new_tail;
 	struct to_free ctx = {.inode = inode};
 
-	UFSD("ENTER: ino %lu\n", inode->i_ino);
+	UFSD("ENTER: ino %llu\n", inode->i_ino);
 
 	new_frags = DIRECT_FRAGMENT;
 	// new_frags = first fragment past the new EOF
@@ -956,7 +956,7 @@ static void ufs_trunc_direct(struct inode *inode)
 		}
 	}
 done:
-	UFSD("EXIT: ino %lu\n", inode->i_ino);
+	UFSD("EXIT: ino %llu\n", inode->i_ino);
 }
 
 static void free_full_branch(struct inode *inode, u64 ind_block, int depth)
@@ -1169,7 +1169,7 @@ static int ufs_truncate(struct inode *inode, loff_t size)
 {
 	int err = 0;
 
-	UFSD("ENTER: ino %lu, i_size: %llu, old_i_size: %llu\n",
+	UFSD("ENTER: ino %llu, i_size: %llu, old_i_size: %llu\n",
 	     inode->i_ino, (unsigned long long)size,
 	     (unsigned long long)i_size_read(inode));
 
diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h
index 0905f9a16b91..b8dc354ae90f 100644
--- a/fs/ufs/ufs_fs.h
+++ b/fs/ufs/ufs_fs.h
@@ -226,10 +226,10 @@ typedef __u16 __bitwise __fs16;
  *     inode number to cylinder group number.
  *     inode number to file system block address.
  */
-#define	ufs_inotocg(x)		((x) / uspi->s_ipg)
-#define	ufs_inotocgoff(x)	((x) % uspi->s_ipg)
+#define	ufs_inotocg(x)		((unsigned int)(x) / uspi->s_ipg)
+#define	ufs_inotocgoff(x)	((unsigned int)(x) % uspi->s_ipg)
 #define	ufs_inotofsba(x)	(((u64)ufs_cgimin(ufs_inotocg(x))) + ufs_inotocgoff(x) / uspi->s_inopf)
-#define	ufs_inotofsbo(x)	((x) % uspi->s_inopf)
+#define	ufs_inotofsbo(x)	((unsigned int)(x) % uspi->s_inopf)
 
 /*
  * Compute the cylinder and rotational position of a cyl block addr.
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index 034b1d82c355..dff6f74618de 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -203,7 +203,7 @@ struct folio *ufs_get_locked_folio(struct address_space *mapping,
 		folio = read_mapping_folio(mapping, index, NULL);
 
 		if (IS_ERR(folio)) {
-			printk(KERN_ERR "ufs_change_blocknr: read_mapping_folio error: ino %lu, index: %lu\n",
+			printk(KERN_ERR "ufs_change_blocknr: read_mapping_folio error: ino %llu, index: %lu\n",
 			       mapping->host->i_ino, index);
 			return folio;
 		}
diff --git a/fs/verity/init.c b/fs/verity/init.c
index d65206608583..3aa55dec88fc 100644
--- a/fs/verity/init.c
+++ b/fs/verity/init.c
@@ -50,7 +50,7 @@ void fsverity_msg(const struct inode *inode, const char *level,
 	vaf.fmt = fmt;
 	vaf.va = &args;
 	if (inode)
-		printk("%sfs-verity (%s, inode %lu): %pV\n",
+		printk("%sfs-verity (%s, inode %llu): %pV\n",
 		       level, inode->i_sb->s_id, inode->i_ino, &vaf);
 	else
 		printk("%sfs-verity: %pV\n", level, &vaf);
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index e83b2ec5e49f..9b646cb5335d 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -297,7 +297,7 @@ static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone,
 	 */
 	if (isize != data_size)
 		zonefs_warn(sb,
-			    "inode %lu: invalid size %lld (should be %lld)\n",
+			    "inode %llu: invalid size %lld (should be %lld)\n",
 			    inode->i_ino, isize, data_size);
 
 	/*
@@ -308,7 +308,7 @@ static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone,
 	 */
 	if ((z->z_flags & ZONEFS_ZONE_OFFLINE) ||
 	    (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) {
-		zonefs_warn(sb, "inode %lu: read/write access disabled\n",
+		zonefs_warn(sb, "inode %llu: read/write access disabled\n",
 			    inode->i_ino);
 		if (!(z->z_flags & ZONEFS_ZONE_OFFLINE))
 			z->z_flags |= ZONEFS_ZONE_OFFLINE;
@@ -316,7 +316,7 @@ static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone,
 		data_size = 0;
 	} else if ((z->z_flags & ZONEFS_ZONE_READONLY) ||
 		   (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) {
-		zonefs_warn(sb, "inode %lu: write access disabled\n",
+		zonefs_warn(sb, "inode %llu: write access disabled\n",
 			    inode->i_ino);
 		if (!(z->z_flags & ZONEFS_ZONE_READONLY))
 			z->z_flags |= ZONEFS_ZONE_READONLY;
@@ -402,7 +402,7 @@ void __zonefs_io_error(struct inode *inode, bool write)
 	memalloc_noio_restore(noio_flag);
 
 	if (ret != 1) {
-		zonefs_err(sb, "Get inode %lu zone information failed %d\n",
+		zonefs_err(sb, "Get inode %llu zone information failed %d\n",
 			   inode->i_ino, ret);
 		zonefs_warn(sb, "remounting filesystem read-only\n");
 		sb->s_flags |= SB_RDONLY;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b4f5e5fdbe4b..e820c14f9c5a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -783,7 +783,7 @@ struct inode {
 #endif
 
 	/* Stat data, not accessed from path walking */
-	unsigned long		i_ino;
+	u64			i_ino;
 	/*
 	 * Filesystems may only read i_nlink directly.  They shall use the
 	 * following functions for modification:
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 923b24b321cc..4084e926e284 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -344,7 +344,7 @@ out:
 static void update_ref_ctr_warn(struct uprobe *uprobe,
 				struct mm_struct *mm, short d)
 {
-	pr_warn("ref_ctr %s failed for inode: 0x%lx offset: "
+	pr_warn("ref_ctr %s failed for inode: 0x%llx offset: "
 		"0x%llx ref_ctr_offset: 0x%llx of mm: 0x%p\n",
 		d > 0 ? "increment" : "decrement", uprobe->inode->i_ino,
 		(unsigned long long) uprobe->offset,
@@ -982,7 +982,7 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
 static void
 ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe)
 {
-	pr_warn("ref_ctr_offset mismatch. inode: 0x%lx offset: 0x%llx "
+	pr_warn("ref_ctr_offset mismatch. inode: 0x%llx offset: 0x%llx "
 		"ref_ctr_offset(old): 0x%llx ref_ctr_offset(new): 0x%llx\n",
 		uprobe->inode->i_ino, (unsigned long long) uprobe->offset,
 		(unsigned long long) cur_uprobe->ref_ctr_offset,
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index b816c56124ab..5fc54836dfa8 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1305,7 +1305,7 @@ static int nr_info_show(struct seq_file *seq, void *v)
 		seq_printf(seq, "%-9s ", ax2asc(buf, &nr->user_addr));
 		seq_printf(seq, "%-9s ", ax2asc(buf, &nr->dest_addr));
 		seq_printf(seq,
-"%-9s %-3s  %02X/%02X %02X/%02X %2d %3d %3d %3d %3lu/%03lu %2lu/%02lu %3lu/%03lu %3lu/%03lu %2d/%02d %3d %5d %5d %ld\n",
+"%-9s %-3s  %02X/%02X %02X/%02X %2d %3d %3d %3d %3lu/%03lu %2lu/%02lu %3lu/%03lu %3lu/%03lu %2d/%02d %3d %5d %5d %llu\n",
 			ax2asc(buf, &nr->source_addr),
 			devname,
 			nr->my_index,
@@ -1329,7 +1329,7 @@ static int nr_info_show(struct seq_file *seq, void *v)
 			nr->window,
 			sk_wmem_alloc_get(s),
 			sk_rmem_alloc_get(s),
-			s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L);
+			s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : (u64)0);
 
 		bh_unlock_sock(s);
 	}
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 841d62481048..53557176b41e 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1479,7 +1479,7 @@ static int rose_info_show(struct seq_file *seq, void *v)
 			callsign = ax2asc(buf, &rose->source_call);
 
 		seq_printf(seq,
-			   "%-10s %-9s %-5s %3.3X %05d  %d  %d  %d  %d %3lu %3lu %3lu %3lu %3lu %3lu/%03lu %5d %5d %ld\n",
+			   "%-10s %-9s %-5s %3.3X %05d  %d  %d  %d  %d %3lu %3lu %3lu %3lu %3lu %3lu/%03lu %5d %5d %llu\n",
 			rose2asc(rsbuf, &rose->source_addr),
 			callsign,
 			devname,
@@ -1498,7 +1498,7 @@ static int rose_info_show(struct seq_file *seq, void *v)
 			rose->idle / (60 * HZ),
 			sk_wmem_alloc_get(s),
 			sk_rmem_alloc_get(s),
-			s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L);
+			s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : (u64)0);
 	}
 
 	return 0;
diff --git a/net/socket.c b/net/socket.c
index 136b98c54fb3..395c271afb1d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -373,7 +373,7 @@ static const struct super_operations sockfs_ops = {
  */
 static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
 {
-	return dynamic_dname(buffer, buflen, "socket:[%lu]",
+	return dynamic_dname(buffer, buflen, "socket:[%llu]",
 				d_inode(dentry)->i_ino);
 }
 
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 0412814a2295..7e0dbff8f538 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -96,7 +96,7 @@ static int x25_seq_socket_show(struct seq_file *seq, void *v)
 		devname = x25->neighbour->dev->name;
 
 	seq_printf(seq, "%-10s %-10s %-5s %3.3X  %d  %d  %d  %d %3lu %3lu "
-			"%3lu %3lu %3lu %5d %5d %ld\n",
+			"%3lu %3lu %3lu %5d %5d %llu\n",
 		   !x25->dest_addr.x25_addr[0] ? "*" : x25->dest_addr.x25_addr,
 		   !x25->source_addr.x25_addr[0] ? "*" : x25->source_addr.x25_addr,
 		   devname, x25->lci & 0x0FFF, x25->state, x25->vs, x25->vr,
@@ -104,7 +104,7 @@ static int x25_seq_socket_show(struct seq_file *seq, void *v)
 		   x25->t21 / HZ, x25->t22 / HZ, x25->t23 / HZ,
 		   sk_wmem_alloc_get(s),
 		   sk_rmem_alloc_get(s),
-		   s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L);
+		   s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : (u64)0);
 out:
 	return 0;
 }
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 2f84bd23edb6..7b645f40e71c 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -149,7 +149,7 @@ static int aafs_count;
 
 static int aafs_show_path(struct seq_file *seq, struct dentry *dentry)
 {
-	seq_printf(seq, "%s:[%lu]", AAFS_NAME, d_inode(dentry)->i_ino);
+	seq_printf(seq, "%s:[%llu]", AAFS_NAME, d_inode(dentry)->i_ino);
 	return 0;
 }
 
@@ -2644,7 +2644,7 @@ static int policy_readlink(struct dentry *dentry, char __user *buffer,
 	char name[32];
 	int res;
 
-	res = snprintf(name, sizeof(name), "%s:[%lu]", AAFS_NAME,
+	res = snprintf(name, sizeof(name), "%s:[%llu]", AAFS_NAME,
 		       d_inode(dentry)->i_ino);
 	if (res > 0 && res < sizeof(name))
 		res = readlink_copy(buffer, buflen, name, strlen(name));
diff --git a/security/integrity/integrity_audit.c b/security/integrity/integrity_audit.c
index 0ec5e4c22cb2..d8d9e5ff1cd2 100644
--- a/security/integrity/integrity_audit.c
+++ b/security/integrity/integrity_audit.c
@@ -62,7 +62,7 @@ void integrity_audit_message(int audit_msgno, struct inode *inode,
 	if (inode) {
 		audit_log_format(ab, " dev=");
 		audit_log_untrustedstring(ab, inode->i_sb->s_id);
-		audit_log_format(ab, " ino=%lu", inode->i_ino);
+		audit_log_format(ab, " ino=%llu", inode->i_ino);
 	}
 	audit_log_format(ab, " res=%d errno=%d", !result, errno);
 	audit_log_end(ab);
diff --git a/security/ipe/audit.c b/security/ipe/audit.c
index 3f0deeb54912..93fb59fbddd6 100644
--- a/security/ipe/audit.c
+++ b/security/ipe/audit.c
@@ -153,7 +153,7 @@ void ipe_audit_match(const struct ipe_eval_ctx *const ctx,
 		if (inode) {
 			audit_log_format(ab, " dev=");
 			audit_log_untrustedstring(ab, inode->i_sb->s_id);
-			audit_log_format(ab, " ino=%lu", inode->i_ino);
+			audit_log_format(ab, " ino=%llu", inode->i_ino);
 		} else {
 			audit_log_format(ab, " dev=? ino=?");
 		}
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 7d623b00495c..737f5a263a8f 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -202,7 +202,7 @@ void audit_log_lsm_data(struct audit_buffer *ab,
 		if (inode) {
 			audit_log_format(ab, " dev=");
 			audit_log_untrustedstring(ab, inode->i_sb->s_id);
-			audit_log_format(ab, " ino=%lu", inode->i_ino);
+			audit_log_format(ab, " ino=%llu", inode->i_ino);
 		}
 		break;
 	}
@@ -215,7 +215,7 @@ void audit_log_lsm_data(struct audit_buffer *ab,
 		if (inode) {
 			audit_log_format(ab, " dev=");
 			audit_log_untrustedstring(ab, inode->i_sb->s_id);
-			audit_log_format(ab, " ino=%lu", inode->i_ino);
+			audit_log_format(ab, " ino=%llu", inode->i_ino);
 		}
 		break;
 	}
@@ -228,7 +228,7 @@ void audit_log_lsm_data(struct audit_buffer *ab,
 		if (inode) {
 			audit_log_format(ab, " dev=");
 			audit_log_untrustedstring(ab, inode->i_sb->s_id);
-			audit_log_format(ab, " ino=%lu", inode->i_ino);
+			audit_log_format(ab, " ino=%llu", inode->i_ino);
 		}
 
 		audit_log_format(ab, " ioctlcmd=0x%hx", a->u.op->cmd);
@@ -246,7 +246,7 @@ void audit_log_lsm_data(struct audit_buffer *ab,
 		if (inode) {
 			audit_log_format(ab, " dev=");
 			audit_log_untrustedstring(ab, inode->i_sb->s_id);
-			audit_log_format(ab, " ino=%lu", inode->i_ino);
+			audit_log_format(ab, " ino=%llu", inode->i_ino);
 		}
 		break;
 	}
@@ -265,7 +265,7 @@ void audit_log_lsm_data(struct audit_buffer *ab,
 		}
 		audit_log_format(ab, " dev=");
 		audit_log_untrustedstring(ab, inode->i_sb->s_id);
-		audit_log_format(ab, " ino=%lu", inode->i_ino);
+		audit_log_format(ab, " ino=%llu", inode->i_ino);
 		rcu_read_unlock();
 		break;
 	}
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index d8224ea113d1..8f38de4d223e 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1400,7 +1400,7 @@ static int inode_doinit_use_xattr(struct inode *inode, struct dentry *dentry,
 	if (rc < 0) {
 		kfree(context);
 		if (rc != -ENODATA) {
-			pr_warn("SELinux: %s:  getxattr returned %d for dev=%s ino=%ld\n",
+			pr_warn("SELinux: %s:  getxattr returned %d for dev=%s ino=%llu\n",
 				__func__, -rc, inode->i_sb->s_id, inode->i_ino);
 			return rc;
 		}
@@ -1412,13 +1412,13 @@ static int inode_doinit_use_xattr(struct inode *inode, struct dentry *dentry,
 					     def_sid, GFP_NOFS);
 	if (rc) {
 		char *dev = inode->i_sb->s_id;
-		unsigned long ino = inode->i_ino;
+		u64 ino = inode->i_ino;
 
 		if (rc == -EINVAL) {
-			pr_notice_ratelimited("SELinux: inode=%lu on dev=%s was found to have an invalid context=%s.  This indicates you may need to relabel the inode or the filesystem in question.\n",
+			pr_notice_ratelimited("SELinux: inode=%llu on dev=%s was found to have an invalid context=%s.  This indicates you may need to relabel the inode or the filesystem in question.\n",
 					      ino, dev, context);
 		} else {
-			pr_warn("SELinux: %s:  context_to_sid(%s) returned %d for dev=%s ino=%ld\n",
+			pr_warn("SELinux: %s:  context_to_sid(%s) returned %d for dev=%s ino=%llu\n",
 				__func__, context, -rc, dev, ino);
 		}
 	}
@@ -3477,7 +3477,7 @@ static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name,
 					   &newsid);
 	if (rc) {
 		pr_err("SELinux:  unable to map context to SID"
-		       "for (%s, %lu), rc=%d\n",
+		       "for (%s, %llu), rc=%d\n",
 		       inode->i_sb->s_id, inode->i_ino, -rc);
 		return;
 	}
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 98af9d7b9434..2eb3368a3632 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -182,7 +182,7 @@ static int smk_bu_inode(struct inode *inode, int mode, int rc)
 	char acc[SMK_NUM_ACCESS_TYPE + 1];
 
 	if (isp->smk_flags & SMK_INODE_IMPURE)
-		pr_info("Smack Unconfined Corruption: inode=(%s %ld) %s\n",
+		pr_info("Smack Unconfined Corruption: inode=(%s %llu) %s\n",
 			inode->i_sb->s_id, inode->i_ino, current->comm);
 
 	if (rc <= 0)
@@ -195,7 +195,7 @@ static int smk_bu_inode(struct inode *inode, int mode, int rc)
 
 	smk_bu_mode(mode, acc);
 
-	pr_info("Smack %s: (%s %s %s) inode=(%s %ld) %s\n", smk_bu_mess[rc],
+	pr_info("Smack %s: (%s %s %s) inode=(%s %llu) %s\n", smk_bu_mess[rc],
 		tsp->smk_task->smk_known, isp->smk_inode->smk_known, acc,
 		inode->i_sb->s_id, inode->i_ino, current->comm);
 	return 0;
@@ -214,7 +214,7 @@ static int smk_bu_file(struct file *file, int mode, int rc)
 	char acc[SMK_NUM_ACCESS_TYPE + 1];
 
 	if (isp->smk_flags & SMK_INODE_IMPURE)
-		pr_info("Smack Unconfined Corruption: inode=(%s %ld) %s\n",
+		pr_info("Smack Unconfined Corruption: inode=(%s %llu) %s\n",
 			inode->i_sb->s_id, inode->i_ino, current->comm);
 
 	if (rc <= 0)
@@ -223,7 +223,7 @@ static int smk_bu_file(struct file *file, int mode, int rc)
 		rc = 0;
 
 	smk_bu_mode(mode, acc);
-	pr_info("Smack %s: (%s %s %s) file=(%s %ld %pD) %s\n", smk_bu_mess[rc],
+	pr_info("Smack %s: (%s %s %s) file=(%s %llu %pD) %s\n", smk_bu_mess[rc],
 		sskp->smk_known, smk_of_inode(inode)->smk_known, acc,
 		inode->i_sb->s_id, inode->i_ino, file,
 		current->comm);
@@ -244,7 +244,7 @@ static int smk_bu_credfile(const struct cred *cred, struct file *file,
 	char acc[SMK_NUM_ACCESS_TYPE + 1];
 
 	if (isp->smk_flags & SMK_INODE_IMPURE)
-		pr_info("Smack Unconfined Corruption: inode=(%s %ld) %s\n",
+		pr_info("Smack Unconfined Corruption: inode=(%s %llu) %s\n",
 			inode->i_sb->s_id, inode->i_ino, current->comm);
 
 	if (rc <= 0)
@@ -253,7 +253,7 @@ static int smk_bu_credfile(const struct cred *cred, struct file *file,
 		rc = 0;
 
 	smk_bu_mode(mode, acc);
-	pr_info("Smack %s: (%s %s %s) file=(%s %ld %pD) %s\n", smk_bu_mess[rc],
+	pr_info("Smack %s: (%s %s %s) file=(%s %llu %pD) %s\n", smk_bu_mess[rc],
 		sskp->smk_known, smk_of_inode(inode)->smk_known, acc,
 		inode->i_sb->s_id, inode->i_ino, file,
 		current->comm);
-- 
cgit v1.2.3


From ebeca1f930eac8f11f815d58eb38fa5d07e7c16e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 6 Mar 2026 07:58:03 -1000
Subject: sched_ext: Introduce cgroup sub-sched support

A system often runs multiple workloads especially in multi-tenant server
environments where a system is split into partitions servicing separate
more-or-less independent workloads each requiring an application-specific
scheduler. To support such and other use cases, sched_ext is in the process
of growing multiple scheduler support.

When partitioning a system in terms of CPUs for such use cases, an
oft-taken approach is hard partitioning the system using cpuset. While it
would be possible to tie sched_ext multiple scheduler support to cpuset
partitions, such an approach would have fundamental limitations stemming
from the lack of dynamism and flexibility.

Users often don't care which specific CPUs are assigned to which workload
and want to take advantage of optimizations which are enabled by running
workloads on a larger machine - e.g. opportunistic over-commit, improving
latency critical workload characteristics while maintaining bandwidth
fairness, employing control mechanisms based on different criteria than
on-CPU time for e.g. flexible memory bandwidth isolation, packing similar
parts from different workloads on same L3s to improve cache efficiency,
and so on.

As this sort of dynamic behaviors are impossible or difficult to implement
with hard partitioning, sched_ext is implementing cgroup sub-sched support
where schedulers can be attached to the cgroup hierarchy and a parent
scheduler is responsible for controlling the CPUs that each child can use
at any given moment. This makes CPU distribution dynamically controlled by
BPF allowing high flexibility.

This patch adds the skeletal sched_ext cgroup sub-sched support:

- sched_ext_ops.sub_cgroup_id and .sub_attach/detach() are added. Non-zero
  sub_cgroup_id indicates that the scheduler is to be attached to the
  identified cgroup. A sub-sched is attached to the cgroup iff the nearest
  ancestor scheduler implements .sub_attach() and grants the attachment. Max
  nesting depth is limited by SCX_SUB_MAX_DEPTH.

- When a scheduler exits, all its descendant schedulers are exited
  together. Also, cgroup.scx_sched added which points to the effective
  scheduler instance for the cgroup. This is updated on scheduler
  init/exit and inherited on cgroup online. When a cgroup is offlined, the
  attached scheduler is automatically exited.

- Sub-sched support is gated on CONFIG_EXT_SUB_SCHED which is
  automatically enabled if both SCX and cgroups are enabled. Sub-sched
  support is not tied to the CPU controller but rather the cgroup
  hierarchy itself. This is intentional as the support for cpu.weight and
  cpu.max based resource control is orthogonal to sub-sched support. Note
  that CONFIG_CGROUPS around cgroup subtree iteration support for
  scx_task_iter is replaced with CONFIG_EXT_SUB_SCHED for consistency.

- This allows loading sub-scheds and most framework operations such as
  propagating disable down the hierarchy work. However, sub-scheds are not
  operational yet and all tasks stay with the root sched. This will serve
  as the basis for building up full sub-sched support.

- DSQs point to the scx_sched they belong to.

- scx_qmap is updated to allow attachment of sub-scheds and also serving
  as sub-scheds.

- scx_is_descendant() is added but not yet used in this patch. It is used by
  later changes in the series and placed here as this is where the function
  belongs.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
---
 include/linux/cgroup-defs.h    |   4 +
 include/linux/sched/ext.h      |   3 +
 init/Kconfig                   |   4 +
 kernel/sched/ext.c             | 532 ++++++++++++++++++++++++++++++++++++++---
 kernel/sched/ext_internal.h    |  67 +++++-
 tools/sched_ext/scx_qmap.bpf.c |   9 +-
 tools/sched_ext/scx_qmap.c     |  13 +-
 7 files changed, 596 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index bb92f5c169ca..dd61767cf9bb 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -17,6 +17,7 @@
 #include <linux/refcount.h>
 #include <linux/percpu-refcount.h>
 #include <linux/percpu-rwsem.h>
+#include <linux/sched.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/workqueue.h>
 #include <linux/bpf-cgroup-defs.h>
@@ -624,6 +625,9 @@ struct cgroup {
 #ifdef CONFIG_BPF_SYSCALL
 	struct bpf_local_storage __rcu  *bpf_cgrp_storage;
 #endif
+#ifdef CONFIG_EXT_SUB_SCHED
+	struct scx_sched __rcu *scx_sched;
+#endif
 
 	/* All ancestors including self */
 	union {
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 0150b3fe6230..fa4349b319e6 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -78,6 +78,7 @@ struct scx_dispatch_q {
 	u64			id;
 	struct rhash_head	hash_node;
 	struct llist_node	free_node;
+	struct scx_sched	*sched;
 	struct rcu_head		rcu;
 };
 
@@ -157,6 +158,8 @@ struct scx_dsq_list_node {
 		.priv = (__priv),						\
 	}
 
+struct scx_sched;
+
 /*
  * The following is embedded in task_struct and contains all fields necessary
  * for a task to be scheduled by SCX.
diff --git a/init/Kconfig b/init/Kconfig
index b55deae9256c..06abd8e272cb 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1176,6 +1176,10 @@ config EXT_GROUP_SCHED
 
 endif #CGROUP_SCHED
 
+config EXT_SUB_SCHED
+        def_bool y
+        depends on SCHED_CLASS_EXT
+
 config SCHED_MM_CID
 	def_bool y
 	depends on SMP && RSEQ
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 142845bcddaa..bb3e33b660da 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -9,6 +9,8 @@
 #include <linux/btf_ids.h>
 #include "ext_idle.h"
 
+static DEFINE_RAW_SPINLOCK(scx_sched_lock);
+
 /*
  * NOTE: sched_ext is in the process of growing multiple scheduler support and
  * scx_root usage is in a transitional state. Naked dereferences are safe if the
@@ -19,6 +21,12 @@
  */
 static struct scx_sched __rcu *scx_root;
 
+/*
+ * All scheds, writers must hold both scx_enable_mutex and scx_sched_lock.
+ * Readers can hold either or rcu_read_lock().
+ */
+static LIST_HEAD(scx_sched_all);
+
 /*
  * During exit, a task may schedule after losing its PIDs. When disabling the
  * BPF scheduler, we need to be able to iterate tasks in every state to
@@ -197,6 +205,7 @@ static void process_ddsp_deferred_locals(struct rq *rq);
 static bool task_dead_and_done(struct task_struct *p);
 static u32 reenq_local(struct rq *rq);
 static void scx_kick_cpu(struct scx_sched *sch, s32 cpu, u64 flags);
+static void scx_disable(struct scx_sched *sch, enum scx_exit_kind kind);
 static bool scx_vexit(struct scx_sched *sch, enum scx_exit_kind kind,
 		      s64 exit_code, const char *fmt, va_list args);
 
@@ -245,6 +254,88 @@ static bool u32_before(u32 a, u32 b)
 	return (s32)(a - b) < 0;
 }
 
+#ifdef CONFIG_EXT_SUB_SCHED
+/**
+ * scx_parent - Find the parent sched
+ * @sch: sched to find the parent of
+ *
+ * Returns the parent scheduler or %NULL if @sch is root.
+ */
+static struct scx_sched *scx_parent(struct scx_sched *sch)
+{
+	if (sch->level)
+		return sch->ancestors[sch->level - 1];
+	else
+		return NULL;
+}
+
+/**
+ * scx_next_descendant_pre - find the next descendant for pre-order walk
+ * @pos: the current position (%NULL to initiate traversal)
+ * @root: sched whose descendants to walk
+ *
+ * To be used by scx_for_each_descendant_pre(). Find the next descendant to
+ * visit for pre-order traversal of @root's descendants. @root is included in
+ * the iteration and the first node to be visited.
+ */
+static struct scx_sched *scx_next_descendant_pre(struct scx_sched *pos,
+						 struct scx_sched *root)
+{
+	struct scx_sched *next;
+
+	lockdep_assert(lockdep_is_held(&scx_enable_mutex) ||
+		       lockdep_is_held(&scx_sched_lock));
+
+	/* if first iteration, visit @root */
+	if (!pos)
+		return root;
+
+	/* visit the first child if exists */
+	next = list_first_entry_or_null(&pos->children, struct scx_sched, sibling);
+	if (next)
+		return next;
+
+	/* no child, visit my or the closest ancestor's next sibling */
+	while (pos != root) {
+		if (!list_is_last(&pos->sibling, &scx_parent(pos)->children))
+			return list_next_entry(pos, sibling);
+		pos = scx_parent(pos);
+	}
+
+	return NULL;
+}
+#else	/* CONFIG_EXT_SUB_SCHED */
+static struct scx_sched *scx_parent(struct scx_sched *sch) { return NULL; }
+static struct scx_sched *scx_next_descendant_pre(struct scx_sched *pos, struct scx_sched *root) { return pos ? NULL : root; }
+#endif	/* CONFIG_EXT_SUB_SCHED */
+
+/**
+ * scx_is_descendant - Test whether sched is a descendant
+ * @sch: sched to test
+ * @ancestor: ancestor sched to test against
+ *
+ * Test whether @sch is a descendant of @ancestor.
+ */
+static bool scx_is_descendant(struct scx_sched *sch, struct scx_sched *ancestor)
+{
+	if (sch->level < ancestor->level)
+		return false;
+	return sch->ancestors[ancestor->level] == ancestor;
+}
+
+/**
+ * scx_for_each_descendant_pre - pre-order walk of a sched's descendants
+ * @pos: iteration cursor
+ * @root: sched to walk the descendants of
+ *
+ * Walk @root's descendants. @root is included in the iteration and the first
+ * node to be visited. Must be called with either scx_enable_mutex or
+ * scx_sched_lock held.
+ */
+#define scx_for_each_descendant_pre(pos, root)					\
+	for ((pos) = scx_next_descendant_pre(NULL, (root)); (pos);		\
+	     (pos) = scx_next_descendant_pre((pos), (root)))
+
 static struct scx_dispatch_q *find_global_dsq(struct scx_sched *sch,
 					      struct task_struct *p)
 {
@@ -514,7 +605,7 @@ struct scx_task_iter {
 	struct rq_flags			rf;
 	u32				cnt;
 	bool				list_locked;
-#ifdef CONFIG_CGROUPS
+#ifdef CONFIG_EXT_SUB_SCHED
 	struct cgroup			*cgrp;
 	struct cgroup_subsys_state	*css_pos;
 	struct css_task_iter		css_iter;
@@ -553,7 +644,7 @@ static void scx_task_iter_start(struct scx_task_iter *iter, struct cgroup *cgrp)
 {
 	memset(iter, 0, sizeof(*iter));
 
-#ifdef CONFIG_CGROUPS
+#ifdef CONFIG_EXT_SUB_SCHED
 	if (cgrp) {
 		lockdep_assert_held(&cgroup_mutex);
 		iter->cgrp = cgrp;
@@ -614,7 +705,7 @@ static void __scx_task_iter_maybe_relock(struct scx_task_iter *iter)
  */
 static void scx_task_iter_stop(struct scx_task_iter *iter)
 {
-#ifdef CONFIG_CGROUPS
+#ifdef CONFIG_EXT_SUB_SCHED
 	if (iter->cgrp) {
 		if (iter->css_pos)
 			css_task_iter_end(&iter->css_iter);
@@ -645,7 +736,7 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
 		cond_resched();
 	}
 
-#ifdef CONFIG_CGROUPS
+#ifdef CONFIG_EXT_SUB_SCHED
 	if (iter->cgrp) {
 		while (iter->css_pos) {
 			struct task_struct *p;
@@ -3032,7 +3123,10 @@ static int scx_init_task(struct task_struct *p, struct task_group *tg, bool fork
 	scx_set_task_state(p, SCX_TASK_INIT);
 
 	if (p->scx.disallow) {
-		if (unlikely(fork)) {
+		if (unlikely(scx_parent(sch))) {
+			scx_error(sch, "non-root ops.init_task() set task->scx.disallow for %s[%d]",
+				  p->comm, p->pid);
+		} else if (unlikely(fork)) {
 			scx_error(sch, "ops.init_task() set task->scx.disallow for %s[%d] during fork",
 				  p->comm, p->pid);
 		} else {
@@ -3555,25 +3649,51 @@ void scx_group_set_bandwidth(struct task_group *tg,
 
 	percpu_up_read(&scx_cgroup_ops_rwsem);
 }
+#endif	/* CONFIG_EXT_GROUP_SCHED */
+
+#if defined(CONFIG_EXT_GROUP_SCHED) || defined(CONFIG_EXT_SUB_SCHED)
+static struct cgroup *root_cgroup(void)
+{
+	return &cgrp_dfl_root.cgrp;
+}
+
+static struct cgroup *sch_cgroup(struct scx_sched *sch)
+{
+	return sch->cgrp;
+}
+
+/* for each descendant of @cgrp including self, set ->scx_sched to @sch */
+static void set_cgroup_sched(struct cgroup *cgrp, struct scx_sched *sch)
+{
+	struct cgroup *pos;
+	struct cgroup_subsys_state *css;
+
+	cgroup_for_each_live_descendant_pre(pos, css, cgrp)
+		rcu_assign_pointer(pos->scx_sched, sch);
+}
 
 static void scx_cgroup_lock(void)
 {
+#ifdef CONFIG_EXT_GROUP_SCHED
 	percpu_down_write(&scx_cgroup_ops_rwsem);
+#endif
 	cgroup_lock();
 }
 
 static void scx_cgroup_unlock(void)
 {
 	cgroup_unlock();
+#ifdef CONFIG_EXT_GROUP_SCHED
 	percpu_up_write(&scx_cgroup_ops_rwsem);
+#endif
 }
-
-#else	/* CONFIG_EXT_GROUP_SCHED */
-
+#else	/* CONFIG_EXT_GROUP_SCHED || CONFIG_EXT_SUB_SCHED */
+static struct cgroup *root_cgroup(void) { return NULL; }
+static struct cgroup *sch_cgroup(struct scx_sched *sch) { return NULL; }
+static void set_cgroup_sched(struct cgroup *cgrp, struct scx_sched *sch) {}
 static void scx_cgroup_lock(void) {}
 static void scx_cgroup_unlock(void) {}
-
-#endif	/* CONFIG_EXT_GROUP_SCHED */
+#endif	/* CONFIG_EXT_GROUP_SCHED || CONFIG_EXT_SUB_SCHED */
 
 /*
  * Omitted operations:
@@ -3622,13 +3742,15 @@ DEFINE_SCHED_CLASS(ext) = {
 #endif
 };
 
-static void init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id)
+static void init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id,
+		     struct scx_sched *sch)
 {
 	memset(dsq, 0, sizeof(*dsq));
 
 	raw_spin_lock_init(&dsq->lock);
 	INIT_LIST_HEAD(&dsq->list);
 	dsq->id = dsq_id;
+	dsq->sched = sch;
 }
 
 static void free_dsq_irq_workfn(struct irq_work *irq_work)
@@ -3826,6 +3948,12 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
 	irq_work_sync(&sch->error_irq_work);
 	kthread_destroy_worker(sch->helper);
 
+#ifdef CONFIG_EXT_SUB_SCHED
+	kfree(sch->cgrp_path);
+	if (sch_cgroup(sch))
+		cgroup_put(sch_cgroup(sch));
+#endif	/* CONFIG_EXT_SUB_SCHED */
+
 	free_percpu(sch->pcpu);
 
 	for_each_node_state(node, N_POSSIBLE)
@@ -4405,6 +4533,8 @@ static const char *scx_exit_reason(enum scx_exit_kind kind)
 		return "unregistered from the main kernel";
 	case SCX_EXIT_SYSRQ:
 		return "disabled by sysrq-S";
+	case SCX_EXIT_PARENT:
+		return "parent exiting";
 	case SCX_EXIT_ERROR:
 		return "runtime error";
 	case SCX_EXIT_ERROR_BPF:
@@ -4430,6 +4560,69 @@ static void free_kick_syncs(void)
 	}
 }
 
+#ifdef CONFIG_EXT_SUB_SCHED
+static DECLARE_WAIT_QUEUE_HEAD(scx_unlink_waitq);
+
+static void drain_descendants(struct scx_sched *sch)
+{
+	/*
+	 * Child scheds that finished the critical part of disabling will take
+	 * themselves off @sch->children. Wait for it to drain. As propagation
+	 * is recursive, empty @sch->children means that all proper descendant
+	 * scheds reached unlinking stage.
+	 */
+	wait_event(scx_unlink_waitq, list_empty(&sch->children));
+}
+
+static void scx_sub_disable(struct scx_sched *sch)
+{
+	struct scx_sched *parent = scx_parent(sch);
+
+	drain_descendants(sch);
+
+	mutex_lock(&scx_enable_mutex);
+	percpu_down_write(&scx_fork_rwsem);
+	scx_cgroup_lock();
+
+	set_cgroup_sched(sch_cgroup(sch), parent);
+
+	/* TODO - perform actual disabling here */
+
+	scx_cgroup_unlock();
+	percpu_up_write(&scx_fork_rwsem);
+
+	raw_spin_lock_irq(&scx_sched_lock);
+	list_del_init(&sch->sibling);
+	list_del_rcu(&sch->all);
+	raw_spin_unlock_irq(&scx_sched_lock);
+
+	mutex_unlock(&scx_enable_mutex);
+
+	/*
+	 * @sch is now unlinked from the parent's children list. Notify and call
+	 * ops.sub_detach/exit(). Note that ops.sub_detach/exit() must be called
+	 * after unlinking and releasing all locks. See scx_claim_exit().
+	 */
+	wake_up_all(&scx_unlink_waitq);
+
+	if (sch->ops.sub_detach && sch->sub_attached) {
+		struct scx_sub_detach_args sub_detach_args = {
+			.ops = &sch->ops,
+			.cgroup_path = sch->cgrp_path,
+		};
+		SCX_CALL_OP(parent, SCX_KF_UNLOCKED, sub_detach, NULL,
+			    &sub_detach_args);
+	}
+
+	if (sch->ops.exit)
+		SCX_CALL_OP(sch, SCX_KF_UNLOCKED, exit, NULL, sch->exit_info);
+	kobject_del(&sch->kobj);
+}
+#else	/* CONFIG_EXT_SUB_SCHED */
+static void drain_descendants(struct scx_sched *sch) { }
+static void scx_sub_disable(struct scx_sched *sch) { }
+#endif	/* CONFIG_EXT_SUB_SCHED */
+
 static void scx_root_disable(struct scx_sched *sch)
 {
 	struct scx_exit_info *ei = sch->exit_info;
@@ -4437,9 +4630,10 @@ static void scx_root_disable(struct scx_sched *sch)
 	struct task_struct *p;
 	int cpu;
 
-	/* guarantee forward progress by bypassing scx_ops */
+	/* guarantee forward progress and wait for descendants to be disabled */
 	scx_bypass(true);
 	WRITE_ONCE(scx_aborting, false);
+	drain_descendants(sch);
 
 	switch (scx_set_enable_state(SCX_DISABLING)) {
 	case SCX_DISABLING:
@@ -4498,6 +4692,11 @@ static void scx_root_disable(struct scx_sched *sch)
 		scx_exit_task(p);
 	}
 	scx_task_iter_stop(&sti);
+
+	scx_cgroup_lock();
+	set_cgroup_sched(sch_cgroup(sch), NULL);
+	scx_cgroup_unlock();
+
 	percpu_up_write(&scx_fork_rwsem);
 
 	/*
@@ -4534,6 +4733,10 @@ static void scx_root_disable(struct scx_sched *sch)
 
 	cancel_delayed_work_sync(&scx_watchdog_work);
 
+	raw_spin_lock_irq(&scx_sched_lock);
+	list_del_rcu(&sch->all);
+	raw_spin_unlock_irq(&scx_sched_lock);
+
 	/*
 	 * scx_root clearing must be inside cpus_read_lock(). See
 	 * handle_hotplug().
@@ -4591,6 +4794,24 @@ static bool scx_claim_exit(struct scx_sched *sch, enum scx_exit_kind kind)
 	 * successfully reach scx_bypass().
 	 */
 	WRITE_ONCE(scx_aborting, true);
+
+	/*
+	 * Propagate exits to descendants immediately. Each has a dedicated
+	 * helper kthread and can run in parallel. While most of disabling is
+	 * serialized, running them in separate threads allows parallelizing
+	 * ops.exit(), which can take arbitrarily long prolonging bypass mode.
+	 *
+	 * This doesn't cause recursions as propagation only takes place for
+	 * non-propagation exits.
+	 */
+	if (kind != SCX_EXIT_PARENT) {
+		scoped_guard (raw_spinlock_irqsave, &scx_sched_lock) {
+			struct scx_sched *pos;
+			scx_for_each_descendant_pre(pos, sch)
+				scx_disable(pos, SCX_EXIT_PARENT);
+		}
+	}
+
 	return true;
 }
 
@@ -4611,7 +4832,10 @@ static void scx_disable_workfn(struct kthread_work *work)
 	ei->kind = kind;
 	ei->reason = scx_exit_reason(ei->kind);
 
-	scx_root_disable(sch);
+	if (scx_parent(sch))
+		scx_sub_disable(sch);
+	else
+		scx_root_disable(sch);
 }
 
 static void scx_disable(struct scx_sched *sch, enum scx_exit_kind kind)
@@ -4987,12 +5211,15 @@ static int alloc_kick_syncs(void)
 	return 0;
 }
 
-static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
+static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
+						 struct cgroup *cgrp,
+						 struct scx_sched *parent)
 {
 	struct scx_sched *sch;
+	s32 level = parent ? parent->level + 1 : 0;
 	int node, ret;
 
-	sch = kzalloc_obj(*sch);
+	sch = kzalloc_flex(*sch, ancestors, level);
 	if (!sch)
 		return ERR_PTR(-ENOMEM);
 
@@ -5021,7 +5248,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
 			goto err_free_gdsqs;
 		}
 
-		init_dsq(dsq, SCX_DSQ_GLOBAL);
+		init_dsq(dsq, SCX_DSQ_GLOBAL, sch);
 		sch->global_dsqs[node] = dsq;
 	}
 
@@ -5039,6 +5266,12 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
 
 	sched_set_fifo(sch->helper->task);
 
+	if (parent)
+		memcpy(sch->ancestors, parent->ancestors,
+		       level * sizeof(parent->ancestors[0]));
+	sch->ancestors[level] = sch;
+	sch->level = level;
+
 	atomic_set(&sch->exit_kind, SCX_EXIT_NONE);
 	init_irq_work(&sch->error_irq_work, scx_error_irq_workfn);
 	kthread_init_work(&sch->disable_work, scx_disable_workfn);
@@ -5046,10 +5279,46 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
 	ops->priv = sch;
 
 	sch->kobj.kset = scx_kset;
+
+#ifdef CONFIG_EXT_SUB_SCHED
+	char *buf = kzalloc(PATH_MAX, GFP_KERNEL);
+	if (!buf)
+		goto err_stop_helper;
+	cgroup_path(cgrp, buf, PATH_MAX);
+	sch->cgrp_path = kstrdup(buf, GFP_KERNEL);
+	kfree(buf);
+	if (!sch->cgrp_path)
+		goto err_stop_helper;
+
+	sch->cgrp = cgrp;
+	INIT_LIST_HEAD(&sch->children);
+	INIT_LIST_HEAD(&sch->sibling);
+
+	if (parent)
+		ret = kobject_init_and_add(&sch->kobj, &scx_ktype,
+					   &parent->sub_kset->kobj,
+					   "sub-%llu", cgroup_id(cgrp));
+	else
+		ret = kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root");
+
+	if (ret < 0) {
+		kfree(sch->cgrp_path);
+		goto err_stop_helper;
+	}
+
+	if (ops->sub_attach) {
+		sch->sub_kset = kset_create_and_add("sub", NULL, &sch->kobj);
+		if (!sch->sub_kset) {
+			kobject_put(&sch->kobj);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+
+#else	/* CONFIG_EXT_SUB_SCHED */
 	ret = kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root");
 	if (ret < 0)
 		goto err_stop_helper;
-
+#endif	/* CONFIG_EXT_SUB_SCHED */
 	return sch;
 
 err_stop_helper:
@@ -5157,7 +5426,7 @@ static void scx_root_enable_workfn(struct kthread_work *work)
 	if (ret)
 		goto err_unlock;
 
-	sch = scx_alloc_and_add_sched(ops);
+	sch = scx_alloc_and_add_sched(ops, root_cgroup(), NULL);
 	if (IS_ERR(sch)) {
 		ret = PTR_ERR(sch);
 		goto err_free_ksyncs;
@@ -5174,8 +5443,13 @@ static void scx_root_enable_workfn(struct kthread_work *work)
 
 	atomic_long_set(&scx_nr_rejected, 0);
 
-	for_each_possible_cpu(cpu)
-		cpu_rq(cpu)->scx.cpuperf_target = SCX_CPUPERF_ONE;
+	for_each_possible_cpu(cpu) {
+		struct rq *rq = cpu_rq(cpu);
+
+		rq->scx.local_dsq.sched = sch;
+		rq->scx.bypass_dsq.sched = sch;
+		rq->scx.cpuperf_target = SCX_CPUPERF_ONE;
+	}
 
 	/*
 	 * Keep CPUs stable during enable so that the BPF scheduler can track
@@ -5189,6 +5463,10 @@ static void scx_root_enable_workfn(struct kthread_work *work)
 	 */
 	rcu_assign_pointer(scx_root, sch);
 
+	raw_spin_lock_irq(&scx_sched_lock);
+	list_add_tail_rcu(&sch->all, &scx_sched_all);
+	raw_spin_unlock_irq(&scx_sched_lock);
+
 	scx_idle_enable(ops);
 
 	if (sch->ops.init) {
@@ -5278,6 +5556,7 @@ static void scx_root_enable_workfn(struct kthread_work *work)
 	 * never sees uninitialized tasks.
 	 */
 	scx_cgroup_lock();
+	set_cgroup_sched(sch_cgroup(sch), sch);
 	ret = scx_cgroup_init(sch);
 	if (ret)
 		goto err_disable_unlock_all;
@@ -5392,6 +5671,185 @@ err_disable:
 	cmd->ret = 0;
 }
 
+#ifdef CONFIG_EXT_SUB_SCHED
+/* verify that a scheduler can be attached to @cgrp and return the parent */
+static struct scx_sched *find_parent_sched(struct cgroup *cgrp)
+{
+	struct scx_sched *parent = cgrp->scx_sched;
+	struct scx_sched *pos;
+
+	lockdep_assert_held(&scx_sched_lock);
+
+	/* can't attach twice to the same cgroup */
+	if (parent->cgrp == cgrp)
+		return ERR_PTR(-EBUSY);
+
+	/* does $parent allow sub-scheds? */
+	if (!parent->ops.sub_attach)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	/* can't insert between $parent and its exiting children */
+	list_for_each_entry(pos, &parent->children, sibling)
+		if (cgroup_is_descendant(pos->cgrp, cgrp))
+			return ERR_PTR(-EBUSY);
+
+	return parent;
+}
+
+static void scx_sub_enable_workfn(struct kthread_work *work)
+{
+	struct scx_enable_cmd *cmd = container_of(work, struct scx_enable_cmd, work);
+	struct sched_ext_ops *ops = cmd->ops;
+	struct cgroup *cgrp;
+	struct scx_sched *parent, *sch;
+	s32 ret;
+
+	mutex_lock(&scx_enable_mutex);
+
+	if (!scx_enabled()) {
+		ret = -ENODEV;
+		goto out_unlock;
+	}
+
+	cgrp = cgroup_get_from_id(ops->sub_cgroup_id);
+	if (IS_ERR(cgrp)) {
+		ret = PTR_ERR(cgrp);
+		goto out_unlock;
+	}
+
+	raw_spin_lock_irq(&scx_sched_lock);
+	parent = find_parent_sched(cgrp);
+	if (IS_ERR(parent)) {
+		raw_spin_unlock_irq(&scx_sched_lock);
+		ret = PTR_ERR(parent);
+		goto out_put_cgrp;
+	}
+	kobject_get(&parent->kobj);
+	raw_spin_unlock_irq(&scx_sched_lock);
+
+	sch = scx_alloc_and_add_sched(ops, cgrp, parent);
+	kobject_put(&parent->kobj);
+	if (IS_ERR(sch)) {
+		ret = PTR_ERR(sch);
+		goto out_put_cgrp;
+	}
+
+	raw_spin_lock_irq(&scx_sched_lock);
+	list_add_tail(&sch->sibling, &parent->children);
+	list_add_tail_rcu(&sch->all, &scx_sched_all);
+	raw_spin_unlock_irq(&scx_sched_lock);
+
+	if (sch->level >= SCX_SUB_MAX_DEPTH) {
+		scx_error(sch, "max nesting depth %d violated",
+			  SCX_SUB_MAX_DEPTH);
+		goto err_disable;
+	}
+
+	if (sch->ops.init) {
+		ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, init, NULL);
+		if (ret) {
+			ret = ops_sanitize_err(sch, "init", ret);
+			scx_error(sch, "ops.init() failed (%d)", ret);
+			goto err_disable;
+		}
+		sch->exit_info->flags |= SCX_EFLAG_INITIALIZED;
+	}
+
+	if (validate_ops(sch, ops))
+		goto err_disable;
+
+	struct scx_sub_attach_args sub_attach_args = {
+		.ops = &sch->ops,
+		.cgroup_path = sch->cgrp_path,
+	};
+
+	ret = SCX_CALL_OP_RET(parent, SCX_KF_UNLOCKED, sub_attach, NULL,
+			      &sub_attach_args);
+	if (ret) {
+		ret = ops_sanitize_err(sch, "sub_attach", ret);
+		scx_error(sch, "parent rejected (%d)", ret);
+		goto err_disable;
+	}
+	sch->sub_attached = true;
+
+	percpu_down_write(&scx_fork_rwsem);
+	scx_cgroup_lock();
+
+	/*
+	 * Set cgroup->scx_sched's and check CSS_ONLINE. Either we see
+	 * !CSS_ONLINE or scx_cgroup_lifetime_notify() sees and shoots us down.
+	 */
+	set_cgroup_sched(sch_cgroup(sch), sch);
+	if (!(cgrp->self.flags & CSS_ONLINE)) {
+		scx_error(sch, "cgroup is not online");
+		goto err_unlock_and_disable;
+	}
+
+	/* TODO - perform actual enabling here */
+
+	scx_cgroup_unlock();
+	percpu_up_write(&scx_fork_rwsem);
+
+	pr_info("sched_ext: BPF sub-scheduler \"%s\" enabled\n", sch->ops.name);
+	kobject_uevent(&sch->kobj, KOBJ_ADD);
+	ret = 0;
+	goto out_unlock;
+
+out_put_cgrp:
+	cgroup_put(cgrp);
+out_unlock:
+	mutex_unlock(&scx_enable_mutex);
+	cmd->ret = ret;
+	return;
+
+err_unlock_and_disable:
+	scx_cgroup_unlock();
+	percpu_up_write(&scx_fork_rwsem);
+err_disable:
+	mutex_unlock(&scx_enable_mutex);
+	kthread_flush_work(&sch->disable_work);
+	cmd->ret = 0;
+}
+
+static s32 scx_cgroup_lifetime_notify(struct notifier_block *nb,
+				      unsigned long action, void *data)
+{
+	struct cgroup *cgrp = data;
+	struct cgroup *parent = cgroup_parent(cgrp);
+
+	if (!cgroup_on_dfl(cgrp))
+		return NOTIFY_OK;
+
+	switch (action) {
+	case CGROUP_LIFETIME_ONLINE:
+		/* inherit ->scx_sched from $parent */
+		if (parent)
+			rcu_assign_pointer(cgrp->scx_sched, parent->scx_sched);
+		break;
+	case CGROUP_LIFETIME_OFFLINE:
+		/* if there is a sched attached, shoot it down */
+		if (cgrp->scx_sched && cgrp->scx_sched->cgrp == cgrp)
+			scx_exit(cgrp->scx_sched, SCX_EXIT_UNREG_KERN,
+				 SCX_ECODE_RSN_CGROUP_OFFLINE,
+				 "cgroup %llu going offline", cgroup_id(cgrp));
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block scx_cgroup_lifetime_nb = {
+	.notifier_call = scx_cgroup_lifetime_notify,
+};
+
+static s32 __init scx_cgroup_lifetime_notifier_init(void)
+{
+	return blocking_notifier_chain_register(&cgroup_lifetime_notifier,
+						&scx_cgroup_lifetime_nb);
+}
+core_initcall(scx_cgroup_lifetime_notifier_init);
+#endif	/* CONFIG_EXT_SUB_SCHED */
+
 static s32 scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
 {
 	static struct kthread_worker *helper;
@@ -5418,7 +5876,12 @@ static s32 scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
 		mutex_unlock(&helper_mutex);
 	}
 
-	kthread_init_work(&cmd.work, scx_root_enable_workfn);
+#ifdef CONFIG_EXT_SUB_SCHED
+	if (ops->sub_cgroup_id > 1)
+		kthread_init_work(&cmd.work, scx_sub_enable_workfn);
+	else
+#endif	/* CONFIG_EXT_SUB_SCHED */
+		kthread_init_work(&cmd.work, scx_root_enable_workfn);
 	cmd.ops = ops;
 
 	kthread_queue_work(READ_ONCE(helper), &cmd.work);
@@ -5520,6 +5983,11 @@ static int bpf_scx_init_member(const struct btf_type *t,
 	case offsetof(struct sched_ext_ops, hotplug_seq):
 		ops->hotplug_seq = *(u64 *)(udata + moff);
 		return 1;
+#ifdef CONFIG_EXT_SUB_SCHED
+	case offsetof(struct sched_ext_ops, sub_cgroup_id):
+		ops->sub_cgroup_id = *(u64 *)(udata + moff);
+		return 1;
+#endif	/* CONFIG_EXT_SUB_SCHED */
 	}
 
 	return 0;
@@ -5542,6 +6010,8 @@ static int bpf_scx_check_member(const struct btf_type *t,
 	case offsetof(struct sched_ext_ops, cpu_offline):
 	case offsetof(struct sched_ext_ops, init):
 	case offsetof(struct sched_ext_ops, exit):
+	case offsetof(struct sched_ext_ops, sub_attach):
+	case offsetof(struct sched_ext_ops, sub_detach):
 		break;
 	default:
 		if (prog->sleepable)
@@ -5619,7 +6089,9 @@ static void sched_ext_ops__cgroup_cancel_move(struct task_struct *p, struct cgro
 static void sched_ext_ops__cgroup_set_weight(struct cgroup *cgrp, u32 weight) {}
 static void sched_ext_ops__cgroup_set_bandwidth(struct cgroup *cgrp, u64 period_us, u64 quota_us, u64 burst_us) {}
 static void sched_ext_ops__cgroup_set_idle(struct cgroup *cgrp, bool idle) {}
-#endif
+#endif	/* CONFIG_EXT_GROUP_SCHED */
+static s32 sched_ext_ops__sub_attach(struct scx_sub_attach_args *args) { return -EINVAL; }
+static void sched_ext_ops__sub_detach(struct scx_sub_detach_args *args) {}
 static void sched_ext_ops__cpu_online(s32 cpu) {}
 static void sched_ext_ops__cpu_offline(s32 cpu) {}
 static s32 sched_ext_ops__init(void) { return -EINVAL; }
@@ -5659,6 +6131,8 @@ static struct sched_ext_ops __bpf_ops_sched_ext_ops = {
 	.cgroup_set_bandwidth	= sched_ext_ops__cgroup_set_bandwidth,
 	.cgroup_set_idle	= sched_ext_ops__cgroup_set_idle,
 #endif
+	.sub_attach		= sched_ext_ops__sub_attach,
+	.sub_detach		= sched_ext_ops__sub_detach,
 	.cpu_online		= sched_ext_ops__cpu_online,
 	.cpu_offline		= sched_ext_ops__cpu_offline,
 	.init			= sched_ext_ops__init,
@@ -5941,8 +6415,10 @@ void __init init_sched_ext_class(void)
 		struct rq *rq = cpu_rq(cpu);
 		int  n = cpu_to_node(cpu);
 
-		init_dsq(&rq->scx.local_dsq, SCX_DSQ_LOCAL);
-		init_dsq(&rq->scx.bypass_dsq, SCX_DSQ_BYPASS);
+		/* local/bypass dsq's sch will be set during scx_root_enable() */
+		init_dsq(&rq->scx.local_dsq, SCX_DSQ_LOCAL, NULL);
+		init_dsq(&rq->scx.bypass_dsq, SCX_DSQ_BYPASS, NULL);
+
 		INIT_LIST_HEAD(&rq->scx.runnable_list);
 		INIT_LIST_HEAD(&rq->scx.ddsp_deferred_locals);
 
@@ -6598,16 +7074,16 @@ __bpf_kfunc s32 scx_bpf_create_dsq(u64 dsq_id, s32 node)
 	if (!dsq)
 		return -ENOMEM;
 
-	init_dsq(dsq, dsq_id);
-
 	rcu_read_lock();
 
 	sch = rcu_dereference(scx_root);
-	if (sch)
+	if (sch) {
+		init_dsq(dsq, dsq_id, sch);
 		ret = rhashtable_lookup_insert_fast(&sch->dsq_hash, &dsq->hash_node,
 						    dsq_hash_params);
-	else
+	} else {
 		ret = -ENODEV;
+	}
 
 	rcu_read_unlock();
 	if (ret)
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index 417d3c6f02fe..75b7f57e20ab 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -28,6 +28,8 @@ enum scx_consts {
 	SCX_BYPASS_LB_DONOR_PCT		= 125,
 	SCX_BYPASS_LB_MIN_DELTA_DIV	= 4,
 	SCX_BYPASS_LB_BATCH		= 256,
+
+	SCX_SUB_MAX_DEPTH		= 4,
 };
 
 enum scx_exit_kind {
@@ -38,6 +40,7 @@ enum scx_exit_kind {
 	SCX_EXIT_UNREG_BPF,	/* BPF-initiated unregistration */
 	SCX_EXIT_UNREG_KERN,	/* kernel-initiated unregistration */
 	SCX_EXIT_SYSRQ,		/* requested by 'S' sysrq */
+	SCX_EXIT_PARENT,	/* parent exiting */
 
 	SCX_EXIT_ERROR = 1024,	/* runtime error, error msg contains details */
 	SCX_EXIT_ERROR_BPF,	/* ERROR but triggered through scx_bpf_error() */
@@ -62,6 +65,7 @@ enum scx_exit_kind {
 enum scx_exit_code {
 	/* Reasons */
 	SCX_ECODE_RSN_HOTPLUG	= 1LLU << 32,
+	SCX_ECODE_RSN_CGROUP_OFFLINE = 2LLU << 32,
 
 	/* Actions */
 	SCX_ECODE_ACT_RESTART	= 1LLU << 48,
@@ -213,7 +217,7 @@ struct scx_exit_task_args {
 	bool cancelled;
 };
 
-/* argument container for ops->cgroup_init() */
+/* argument container for ops.cgroup_init() */
 struct scx_cgroup_init_args {
 	/* the weight of the cgroup [1..10000] */
 	u32			weight;
@@ -236,12 +240,12 @@ enum scx_cpu_preempt_reason {
 };
 
 /*
- * Argument container for ops->cpu_acquire(). Currently empty, but may be
+ * Argument container for ops.cpu_acquire(). Currently empty, but may be
  * expanded in the future.
  */
 struct scx_cpu_acquire_args {};
 
-/* argument container for ops->cpu_release() */
+/* argument container for ops.cpu_release() */
 struct scx_cpu_release_args {
 	/* the reason the CPU was preempted */
 	enum scx_cpu_preempt_reason reason;
@@ -250,9 +254,7 @@ struct scx_cpu_release_args {
 	struct task_struct	*task;
 };
 
-/*
- * Informational context provided to dump operations.
- */
+/* informational context provided to dump operations */
 struct scx_dump_ctx {
 	enum scx_exit_kind	kind;
 	s64			exit_code;
@@ -261,6 +263,18 @@ struct scx_dump_ctx {
 	u64			at_jiffies;
 };
 
+/* argument container for ops.sub_attach() */
+struct scx_sub_attach_args {
+	struct sched_ext_ops	*ops;
+	char			*cgroup_path;
+};
+
+/* argument container for ops.sub_detach() */
+struct scx_sub_detach_args {
+	struct sched_ext_ops	*ops;
+	char			*cgroup_path;
+};
+
 /**
  * struct sched_ext_ops - Operation table for BPF scheduler implementation
  *
@@ -721,6 +735,20 @@ struct sched_ext_ops {
 
 #endif	/* CONFIG_EXT_GROUP_SCHED */
 
+	/**
+	 * @sub_attach: Attach a sub-scheduler
+	 * @args: argument container, see the struct definition
+	 *
+	 * Return 0 to accept the sub-scheduler. -errno to reject.
+	 */
+	s32 (*sub_attach)(struct scx_sub_attach_args *args);
+
+	/**
+	 * @sub_detach: Detach a sub-scheduler
+	 * @args: argument container, see the struct definition
+	 */
+	void (*sub_detach)(struct scx_sub_detach_args *args);
+
 	/*
 	 * All online ops must come before ops.cpu_online().
 	 */
@@ -762,6 +790,10 @@ struct sched_ext_ops {
 	 */
 	void (*exit)(struct scx_exit_info *info);
 
+	/*
+	 * Data fields must comes after all ops fields.
+	 */
+
 	/**
 	 * @dispatch_max_batch: Max nr of tasks that dispatch() can dispatch
 	 */
@@ -796,6 +828,12 @@ struct sched_ext_ops {
 	 */
 	u64 hotplug_seq;
 
+	/**
+	 * @cgroup_id: When >1, attach the scheduler as a sub-scheduler on the
+	 * specified cgroup.
+	 */
+	u64 sub_cgroup_id;
+
 	/**
 	 * @name: BPF scheduler's name
 	 *
@@ -900,6 +938,8 @@ struct scx_sched {
 	struct scx_dispatch_q	**global_dsqs;
 	struct scx_sched_pcpu __percpu *pcpu;
 
+	s32			level;
+
 	/*
 	 * Updates to the following warned bitfields can race causing RMW issues
 	 * but it doesn't really matter.
@@ -907,6 +947,18 @@ struct scx_sched {
 	bool			warned_zero_slice:1;
 	bool			warned_deprecated_rq:1;
 
+	struct list_head	all;
+
+#ifdef CONFIG_EXT_SUB_SCHED
+	struct list_head	children;
+	struct list_head	sibling;
+	struct cgroup		*cgrp;
+	char			*cgrp_path;
+	struct kset		*sub_kset;
+
+	bool			sub_attached;
+#endif	/* CONFIG_EXT_SUB_SCHED */
+
 	atomic_t		exit_kind;
 	struct scx_exit_info	*exit_info;
 
@@ -916,6 +968,9 @@ struct scx_sched {
 	struct irq_work		error_irq_work;
 	struct kthread_work	disable_work;
 	struct rcu_work		rcu_work;
+
+	/* all ancestors including self */
+	struct scx_sched	*ancestors[];
 };
 
 enum scx_wake_flags {
diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
index d51d8c38f1cf..ff6ff34177ab 100644
--- a/tools/sched_ext/scx_qmap.bpf.c
+++ b/tools/sched_ext/scx_qmap.bpf.c
@@ -41,6 +41,7 @@ const volatile u32 dsp_batch;
 const volatile bool highpri_boosting;
 const volatile bool print_dsqs_and_events;
 const volatile bool print_msgs;
+const volatile u64 sub_cgroup_id;
 const volatile s32 disallow_tgid;
 const volatile bool suppress_dump;
 
@@ -862,7 +863,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init)
 	struct bpf_timer *timer;
 	s32 ret;
 
-	if (print_msgs)
+	if (print_msgs && !sub_cgroup_id)
 		print_cpus();
 
 	ret = scx_bpf_create_dsq(SHARED_DSQ, -1);
@@ -892,6 +893,11 @@ void BPF_STRUCT_OPS(qmap_exit, struct scx_exit_info *ei)
 	UEI_RECORD(uei, ei);
 }
 
+s32 BPF_STRUCT_OPS(qmap_sub_attach, struct scx_sub_attach_args *args)
+{
+	return 0;
+}
+
 SCX_OPS_DEFINE(qmap_ops,
 	       .select_cpu		= (void *)qmap_select_cpu,
 	       .enqueue			= (void *)qmap_enqueue,
@@ -907,6 +913,7 @@ SCX_OPS_DEFINE(qmap_ops,
 	       .cgroup_init		= (void *)qmap_cgroup_init,
 	       .cgroup_set_weight	= (void *)qmap_cgroup_set_weight,
 	       .cgroup_set_bandwidth	= (void *)qmap_cgroup_set_bandwidth,
+	       .sub_attach		= (void *)qmap_sub_attach,
 	       .cpu_online		= (void *)qmap_cpu_online,
 	       .cpu_offline		= (void *)qmap_cpu_offline,
 	       .init			= (void *)qmap_init,
diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c
index ef701d45ba43..5d762d10f4db 100644
--- a/tools/sched_ext/scx_qmap.c
+++ b/tools/sched_ext/scx_qmap.c
@@ -10,6 +10,7 @@
 #include <inttypes.h>
 #include <signal.h>
 #include <libgen.h>
+#include <sys/stat.h>
 #include <bpf/bpf.h>
 #include <scx/common.h>
 #include "scx_qmap.bpf.skel.h"
@@ -67,7 +68,7 @@ int main(int argc, char **argv)
 
 	skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
 
-	while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PMHd:D:Spvh")) != -1) {
+	while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PMHc:d:D:Spvh")) != -1) {
 		switch (opt) {
 		case 's':
 			skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
@@ -96,6 +97,16 @@ int main(int argc, char **argv)
 		case 'H':
 			skel->rodata->highpri_boosting = true;
 			break;
+		case 'c': {
+			struct stat st;
+			if (stat(optarg, &st) < 0) {
+				perror("stat");
+				return 1;
+			}
+			skel->struct_ops.qmap_ops->sub_cgroup_id = st.st_ino;
+			skel->rodata->sub_cgroup_id = st.st_ino;
+			break;
+		}
 		case 'd':
 			skel->rodata->disallow_tgid = strtol(optarg, NULL, 0);
 			if (skel->rodata->disallow_tgid < 0)
-- 
cgit v1.2.3


From 88234b075c3fc23d57406e1867523b6aba783ebf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 6 Mar 2026 07:58:03 -1000
Subject: sched_ext: Introduce scx_task_sched[_rcu]()

In preparation of multiple scheduler support, add p->scx.sched which points
to the scx_sched instance that the task is scheduled by, which is currently
always scx_root. Add scx_task_sched[_rcu]() accessors which return the
associated scx_sched of the specified task and replace the raw scx_root
dereferences with it where applicable. scx_task_on_sched() is also added to
test whether a given task is on the specified sched.

As scx_root is still the only scheduler, this shouldn't introduce
user-visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
---
 include/linux/sched/ext.h   |  7 +++++
 kernel/sched/ext.c          | 63 ++++++++++++++++++++++++++++-----------------
 kernel/sched/ext_internal.h | 59 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 105 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index fa4349b319e6..3213e31c7979 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -165,6 +165,13 @@ struct scx_sched;
  * for a task to be scheduled by SCX.
  */
 struct sched_ext_entity {
+#ifdef CONFIG_CGROUPS
+	/*
+	 * Associated scx_sched. Updated either during fork or while holding
+	 * both p->pi_lock and rq lock.
+	 */
+	struct scx_sched __rcu	*sched;
+#endif
 	struct scx_dispatch_q	*dsq;
 	atomic_long_t		ops_state;
 	u64			ddsp_dsq_id;
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index bb3e33b660da..d56539449f26 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -19,7 +19,7 @@ static DEFINE_RAW_SPINLOCK(scx_sched_lock);
  * are used as temporary markers to indicate that the dereferences need to be
  * updated to point to the associated scheduler instances rather than scx_root.
  */
-static struct scx_sched __rcu *scx_root;
+struct scx_sched __rcu *scx_root;
 
 /*
  * All scheds, writers must hold both scx_enable_mutex and scx_sched_lock.
@@ -304,9 +304,15 @@ static struct scx_sched *scx_next_descendant_pre(struct scx_sched *pos,
 
 	return NULL;
 }
+
+static void scx_set_task_sched(struct task_struct *p, struct scx_sched *sch)
+{
+	rcu_assign_pointer(p->scx.sched, sch);
+}
 #else	/* CONFIG_EXT_SUB_SCHED */
 static struct scx_sched *scx_parent(struct scx_sched *sch) { return NULL; }
 static struct scx_sched *scx_next_descendant_pre(struct scx_sched *pos, struct scx_sched *root) { return pos ? NULL : root; }
+static void scx_set_task_sched(struct task_struct *p, struct scx_sched *sch) {}
 #endif	/* CONFIG_EXT_SUB_SCHED */
 
 /**
@@ -1542,7 +1548,7 @@ static bool scx_rq_online(struct rq *rq)
 static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
 			    int sticky_cpu)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch = scx_task_sched(p);
 	struct task_struct **ddsp_taskp;
 	struct scx_dispatch_q *dsq;
 	unsigned long qseq;
@@ -1672,7 +1678,7 @@ static void clr_task_runnable(struct task_struct *p, bool reset_runnable_at)
 
 static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch = scx_task_sched(p);
 	int sticky_cpu = p->scx.sticky_cpu;
 
 	if (enq_flags & ENQUEUE_WAKEUP)
@@ -1723,7 +1729,7 @@ out:
 
 static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch = scx_task_sched(p);
 	unsigned long opss;
 	u64 op_deq_flags = deq_flags;
 
@@ -1794,7 +1800,7 @@ static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags)
 
 static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch = scx_task_sched(p);
 
 	if (!(p->scx.flags & SCX_TASK_QUEUED)) {
 		WARN_ON_ONCE(task_runnable(p));
@@ -1838,8 +1844,8 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
 
 static void yield_task_scx(struct rq *rq)
 {
-	struct scx_sched *sch = scx_root;
 	struct task_struct *p = rq->donor;
+	struct scx_sched *sch = scx_task_sched(p);
 
 	if (SCX_HAS_OP(sch, yield))
 		SCX_CALL_OP_2TASKS_RET(sch, SCX_KF_REST, yield, rq, p, NULL);
@@ -1849,10 +1855,10 @@ static void yield_task_scx(struct rq *rq)
 
 static bool yield_to_task_scx(struct rq *rq, struct task_struct *to)
 {
-	struct scx_sched *sch = scx_root;
 	struct task_struct *from = rq->donor;
+	struct scx_sched *sch = scx_task_sched(from);
 
-	if (SCX_HAS_OP(sch, yield))
+	if (SCX_HAS_OP(sch, yield) && sch == scx_task_sched(to))
 		return SCX_CALL_OP_2TASKS_RET(sch, SCX_KF_REST, yield, rq,
 					      from, to);
 	else
@@ -2517,7 +2523,7 @@ static void process_ddsp_deferred_locals(struct rq *rq)
 	 */
 	while ((p = list_first_entry_or_null(&rq->scx.ddsp_deferred_locals,
 				struct task_struct, scx.dsq_list.node))) {
-		struct scx_sched *sch = scx_root;
+		struct scx_sched *sch = scx_task_sched(p);
 		struct scx_dispatch_q *dsq;
 
 		list_del_init(&p->scx.dsq_list.node);
@@ -2531,7 +2537,7 @@ static void process_ddsp_deferred_locals(struct rq *rq)
 
 static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch = scx_task_sched(p);
 
 	if (p->scx.flags & SCX_TASK_QUEUED) {
 		/*
@@ -2628,7 +2634,7 @@ static void switch_class(struct rq *rq, struct task_struct *next)
 static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
 			      struct task_struct *next)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch = scx_task_sched(p);
 
 	/* see kick_cpus_irq_workfn() */
 	smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1);
@@ -2722,14 +2728,14 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
 	if (keep_prev) {
 		p = prev;
 		if (!p->scx.slice)
-			refill_task_slice_dfl(rcu_dereference_sched(scx_root), p);
+			refill_task_slice_dfl(scx_task_sched(p), p);
 	} else {
 		p = first_local_task(rq);
 		if (!p)
 			return NULL;
 
 		if (unlikely(!p->scx.slice)) {
-			struct scx_sched *sch = rcu_dereference_sched(scx_root);
+			struct scx_sched *sch = scx_task_sched(p);
 
 			if (!scx_rq_bypassing(rq) && !sch->warned_zero_slice) {
 				printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in %s()\n",
@@ -2817,7 +2823,7 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
 
 static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flags)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch = scx_task_sched(p);
 	bool rq_bypass;
 
 	/*
@@ -2878,7 +2884,7 @@ static void task_woken_scx(struct rq *rq, struct task_struct *p)
 static void set_cpus_allowed_scx(struct task_struct *p,
 				 struct affinity_context *ac)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch = scx_task_sched(p);
 
 	set_cpus_allowed_common(p, ac);
 
@@ -3022,7 +3028,7 @@ void scx_tick(struct rq *rq)
 
 static void task_tick_scx(struct rq *rq, struct task_struct *curr, int queued)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch = scx_task_sched(curr);
 
 	update_curr_scx(rq);
 
@@ -3212,11 +3218,12 @@ static void scx_disable_task(struct task_struct *p)
 
 static void scx_exit_task(struct task_struct *p)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch = scx_task_sched(p);
 	struct scx_exit_task_args args = {
 		.cancelled = false,
 	};
 
+	lockdep_assert_held(&p->pi_lock);
 	lockdep_assert_rq_held(task_rq(p));
 
 	switch (scx_get_task_state(p)) {
@@ -3238,6 +3245,7 @@ static void scx_exit_task(struct task_struct *p)
 	if (SCX_HAS_OP(sch, exit_task))
 		SCX_CALL_OP_TASK(sch, SCX_KF_REST, exit_task, task_rq(p),
 				 p, &args);
+	scx_set_task_sched(p, NULL);
 	scx_set_task_state(p, SCX_TASK_NONE);
 }
 
@@ -3267,12 +3275,18 @@ void scx_pre_fork(struct task_struct *p)
 
 int scx_fork(struct task_struct *p, struct kernel_clone_args *kargs)
 {
+	s32 ret;
+
 	percpu_rwsem_assert_held(&scx_fork_rwsem);
 
-	if (scx_init_task_enabled)
-		return scx_init_task(p, task_group(p), true);
-	else
-		return 0;
+	if (scx_init_task_enabled) {
+		ret = scx_init_task(p, task_group(p), true);
+		if (!ret)
+			scx_set_task_sched(p, scx_root);
+		return ret;
+	}
+
+	return 0;
 }
 
 void scx_post_fork(struct task_struct *p)
@@ -3377,7 +3391,7 @@ void sched_ext_dead(struct task_struct *p)
 static void reweight_task_scx(struct rq *rq, struct task_struct *p,
 			      const struct load_weight *lw)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch = scx_task_sched(p);
 
 	lockdep_assert_rq_held(task_rq(p));
 
@@ -3396,7 +3410,7 @@ static void prio_changed_scx(struct rq *rq, struct task_struct *p, u64 oldprio)
 
 static void switching_to_scx(struct rq *rq, struct task_struct *p)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch = scx_task_sched(p);
 
 	if (task_dead_and_done(p))
 		return;
@@ -4062,7 +4076,7 @@ bool scx_allow_ttwu_queue(const struct task_struct *p)
 	if (!scx_enabled())
 		return true;
 
-	sch = rcu_dereference_sched(scx_root);
+	sch = scx_task_sched(p);
 	if (unlikely(!sch))
 		return true;
 
@@ -5582,6 +5596,7 @@ static void scx_root_enable_workfn(struct kthread_work *work)
 			goto err_disable_unlock_all;
 		}
 
+		scx_set_task_sched(p, sch);
 		scx_set_task_state(p, SCX_TASK_READY);
 
 		put_task_struct(p);
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index 75b7f57e20ab..0612006019da 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -1223,6 +1223,7 @@ enum scx_ops_state {
 #define SCX_OPSS_STATE_MASK	((1LU << SCX_OPSS_QSEQ_SHIFT) - 1)
 #define SCX_OPSS_QSEQ_MASK	(~SCX_OPSS_STATE_MASK)
 
+extern struct scx_sched __rcu *scx_root;
 DECLARE_PER_CPU(struct rq *, scx_locked_rq_state);
 
 /*
@@ -1243,3 +1244,61 @@ static inline bool scx_rq_bypassing(struct rq *rq)
 {
 	return unlikely(rq->scx.flags & SCX_RQ_BYPASSING);
 }
+
+#ifdef CONFIG_EXT_SUB_SCHED
+/**
+ * scx_task_sched - Find scx_sched scheduling a task
+ * @p: task of interest
+ *
+ * Return @p's scheduler instance. Must be called with @p's pi_lock or rq lock
+ * held.
+ */
+static inline struct scx_sched *scx_task_sched(const struct task_struct *p)
+{
+	return rcu_dereference_protected(p->scx.sched,
+					 lockdep_is_held(&p->pi_lock) ||
+					 lockdep_is_held(__rq_lockp(task_rq(p))));
+}
+
+/**
+ * scx_task_sched_rcu - Find scx_sched scheduling a task
+ * @p: task of interest
+ *
+ * Return @p's scheduler instance. The returned scx_sched is RCU protected.
+ */
+static inline struct scx_sched *scx_task_sched_rcu(const struct task_struct *p)
+{
+	return rcu_dereference_all(p->scx.sched);
+}
+
+/**
+ * scx_task_on_sched - Is a task on the specified sched?
+ * @sch: sched to test against
+ * @p: task of interest
+ *
+ * Returns %true if @p is on @sch, %false otherwise.
+ */
+static inline bool scx_task_on_sched(struct scx_sched *sch,
+				     const struct task_struct *p)
+{
+	return rcu_access_pointer(p->scx.sched) == sch;
+}
+#else	/* CONFIG_EXT_SUB_SCHED */
+static inline struct scx_sched *scx_task_sched(const struct task_struct *p)
+{
+	return rcu_dereference_protected(scx_root,
+					 lockdep_is_held(&p->pi_lock) ||
+					 lockdep_is_held(__rq_lockp(task_rq(p))));
+}
+
+static inline struct scx_sched *scx_task_sched_rcu(const struct task_struct *p)
+{
+	return rcu_dereference_all(scx_root);
+}
+
+static inline bool scx_task_on_sched(struct scx_sched *sch,
+				     const struct task_struct *p)
+{
+	return true;
+}
+#endif	/* CONFIG_EXT_SUB_SCHED */
-- 
cgit v1.2.3


From 337ec00b1d9c676f637651c2cefddb8612b867ee Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 6 Mar 2026 07:58:04 -1000
Subject: sched_ext: Implement cgroup sub-sched enabling and disabling

The preceding changes implemented the framework to support cgroup
sub-scheds and updated scheduling paths and kfuncs so that they have
minimal but working support for sub-scheds. However, actual sub-sched
enabling/disabling hasn't been implemented yet and all tasks stayed on
scx_root.

Implement cgroup sub-sched enabling and disabling to actually activate
sub-scheds:

- Both enable and disable operations bypass only the tasks in the subtree
  of the child being enabled or disabled to limit disruptions.

- When enabling, all candidate tasks are first initialized for the child
  sched. Once that succeeds, the tasks are exited for the parent and then
  switched over to the child. This adds a bit of complication but
  guarantees that child scheduler failures are always contained.

- Disabling works the same way in the other direction. However, when the
  parent may fail to initialize a task, disabling is propagated up to the
  parent. While this means that a parent sched fail due to a child sched
  event, the failure can only originate from the parent itself (its
  ops.init_task()). The only effect a malfunctioning child can have on the
  parent is attempting to move the tasks back to the parent.

After this change, although not all the necessary mechanisms are in place
yet, sub-scheds can take control of their tasks and schedule them.

v2: Fix missing scx_cgroup_unlock()/percpu_up_write() in abort path
    (Cheng-Yang Chou).

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
---
 include/linux/sched/ext.h |   1 +
 kernel/sched/ext.c        | 285 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 280 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 3213e31c7979..f354d7d34306 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -88,6 +88,7 @@ enum scx_ent_flags {
 	SCX_TASK_IN_CUSTODY	= 1 << 1, /* in custody, needs ops.dequeue() when leaving */
 	SCX_TASK_RESET_RUNNABLE_AT = 1 << 2, /* runnable_at should be reset */
 	SCX_TASK_DEQD_FOR_SLEEP	= 1 << 3, /* last dequeue was for SLEEP */
+	SCX_TASK_SUB_INIT	= 1 << 4, /* task being initialized for a sub sched */
 
 	SCX_TASK_STATE_SHIFT	= 8,	  /* bit 8 and 9 are used to carry scx_task_state */
 	SCX_TASK_STATE_BITS	= 2,
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 3f237b9da970..70d0f9e8ef61 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -51,6 +51,17 @@ DEFINE_STATIC_KEY_FALSE(__scx_switched_all);
 static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0);
 static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0);
 
+#ifdef CONFIG_EXT_SUB_SCHED
+/*
+ * The sub sched being enabled. Used by scx_disable_and_exit_task() to exit
+ * tasks for the sub-sched being enabled. Use a global variable instead of a
+ * per-task field as all enables are serialized.
+ */
+static struct scx_sched *scx_enabling_sub_sched;
+#else
+#define scx_enabling_sub_sched	(struct scx_sched *)NULL
+#endif	/* CONFIG_EXT_SUB_SCHED */
+
 /*
  * A monotically increasing sequence number that is incremented every time a
  * scheduler is enabled. This can be used by to check if any custom sched_ext
@@ -3342,6 +3353,17 @@ static void scx_disable_and_exit_task(struct scx_sched *sch,
 {
 	__scx_disable_and_exit_task(sch, p);
 
+	/*
+	 * If set, @p exited between __scx_init_task() and scx_enable_task() in
+	 * scx_sub_enable() and is initialized for both the associated sched and
+	 * its parent. Disable and exit for the child too.
+	 */
+	if ((p->scx.flags & SCX_TASK_SUB_INIT) &&
+	    !WARN_ON_ONCE(!scx_enabling_sub_sched)) {
+		__scx_disable_and_exit_task(scx_enabling_sub_sched, p);
+		p->scx.flags &= ~SCX_TASK_SUB_INIT;
+	}
+
 	scx_set_task_sched(p, NULL);
 	scx_set_task_state(p, SCX_TASK_NONE);
 }
@@ -3377,9 +3399,14 @@ int scx_fork(struct task_struct *p, struct kernel_clone_args *kargs)
 	percpu_rwsem_assert_held(&scx_fork_rwsem);
 
 	if (scx_init_task_enabled) {
-		ret = scx_init_task(scx_root, p, true);
+#ifdef CONFIG_EXT_SUB_SCHED
+		struct scx_sched *sch = kargs->cset->dfl_cgrp->scx_sched;
+#else
+		struct scx_sched *sch = scx_root;
+#endif
+		ret = scx_init_task(sch, p, true);
 		if (!ret)
-			scx_set_task_sched(p, scx_root);
+			scx_set_task_sched(p, sch);
 		return ret;
 	}
 
@@ -4643,9 +4670,9 @@ static void scx_bypass(struct scx_sched *sch, bool bypass)
 		struct rq *rq = cpu_rq(cpu);
 		struct task_struct *p, *n;
 
+		raw_spin_lock(&scx_sched_lock);
 		raw_spin_rq_lock(rq);
 
-		raw_spin_lock(&scx_sched_lock);
 		scx_for_each_descendant_pre(pos, sch) {
 			struct scx_sched_pcpu *pcpu = per_cpu_ptr(pos->pcpu, cpu);
 
@@ -4654,6 +4681,7 @@ static void scx_bypass(struct scx_sched *sch, bool bypass)
 			else
 				pcpu->flags &= ~SCX_SCHED_PCPU_BYPASSING;
 		}
+
 		raw_spin_unlock(&scx_sched_lock);
 
 		/*
@@ -4798,23 +4826,139 @@ static void drain_descendants(struct scx_sched *sch)
 	wait_event(scx_unlink_waitq, list_empty(&sch->children));
 }
 
+static void scx_fail_parent(struct scx_sched *sch,
+			    struct task_struct *failed, s32 fail_code)
+{
+	struct scx_sched *parent = scx_parent(sch);
+	struct scx_task_iter sti;
+	struct task_struct *p;
+
+	scx_error(parent, "ops.init_task() failed (%d) for %s[%d] while disabling a sub-scheduler",
+		  fail_code, failed->comm, failed->pid);
+
+	/*
+	 * Once $parent is bypassed, it's safe to put SCX_TASK_NONE tasks into
+	 * it. This may cause downstream failures on the BPF side but $parent is
+	 * dying anyway.
+	 */
+	scx_bypass(parent, true);
+
+	scx_task_iter_start(&sti, sch->cgrp);
+	while ((p = scx_task_iter_next_locked(&sti))) {
+		if (scx_task_on_sched(parent, p))
+			continue;
+
+		scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
+			scx_disable_and_exit_task(sch, p);
+			rcu_assign_pointer(p->scx.sched, parent);
+		}
+	}
+	scx_task_iter_stop(&sti);
+}
+
 static void scx_sub_disable(struct scx_sched *sch)
 {
 	struct scx_sched *parent = scx_parent(sch);
+	struct scx_task_iter sti;
+	struct task_struct *p;
+	int ret;
 
+	/*
+	 * Guarantee forward progress and wait for descendants to be disabled.
+	 * To limit
+	 * disruptions, $parent is not bypassed. Tasks are fully prepped and
+	 * then inserted back into $parent.
+	 */
+	scx_bypass(sch, true);
 	drain_descendants(sch);
 
+	/*
+	 * Here, every runnable task is guaranteed to make forward progress and
+	 * we can safely use blocking synchronization constructs. Actually
+	 * disable ops.
+	 */
 	mutex_lock(&scx_enable_mutex);
 	percpu_down_write(&scx_fork_rwsem);
 	scx_cgroup_lock();
 
 	set_cgroup_sched(sch_cgroup(sch), parent);
 
-	/* TODO - perform actual disabling here */
+	scx_task_iter_start(&sti, sch->cgrp);
+	while ((p = scx_task_iter_next_locked(&sti))) {
+		struct rq *rq;
+		struct rq_flags rf;
+
+		/* filter out duplicate visits */
+		if (scx_task_on_sched(parent, p))
+			continue;
+
+		/*
+		 * By the time control reaches here, all descendant schedulers
+		 * should already have been disabled.
+		 */
+		WARN_ON_ONCE(!scx_task_on_sched(sch, p));
+
+		/*
+		 * If $p is about to be freed, nothing prevents $sch from
+		 * unloading before $p reaches sched_ext_free(). Disable and
+		 * exit $p right away.
+		 */
+		if (!tryget_task_struct(p)) {
+			scx_disable_and_exit_task(sch, p);
+			continue;
+		}
+
+		scx_task_iter_unlock(&sti);
+
+		/*
+		 * $p is READY or ENABLED on @sch. Initialize for $parent,
+		 * disable and exit from @sch, and then switch over to $parent.
+		 *
+		 * If a task fails to initialize for $parent, the only available
+		 * action is disabling $parent too. While this allows disabling
+		 * of a child sched to cause the parent scheduler to fail, the
+		 * failure can only originate from ops.init_task() of the
+		 * parent. A child can't directly affect the parent through its
+		 * own failures.
+		 */
+		ret = __scx_init_task(parent, p, false);
+		if (ret) {
+			scx_fail_parent(sch, p, ret);
+			put_task_struct(p);
+			break;
+		}
+
+		rq = task_rq_lock(p, &rf);
+		scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
+			/*
+			 * $p is initialized for $parent and still attached to
+			 * @sch. Disable and exit for @sch, switch over to
+			 * $parent, override the state to READY to account for
+			 * $p having already been initialized, and then enable.
+			 */
+			scx_disable_and_exit_task(sch, p);
+			scx_set_task_state(p, SCX_TASK_INIT);
+			rcu_assign_pointer(p->scx.sched, parent);
+			scx_set_task_state(p, SCX_TASK_READY);
+			scx_enable_task(parent, p);
+		}
+		task_rq_unlock(rq, p, &rf);
+
+		put_task_struct(p);
+	}
+	scx_task_iter_stop(&sti);
 
 	scx_cgroup_unlock();
 	percpu_up_write(&scx_fork_rwsem);
 
+	/*
+	 * All tasks are moved off of @sch but there may still be on-going
+	 * operations (e.g. ops.select_cpu()). Drain them by flushing RCU. Use
+	 * the expedited version as ancestors may be waiting in bypass mode.
+	 * Also, tell the parent that there is no need to keep running bypass
+	 * DSQs for us.
+	 */
+	synchronize_rcu_expedited();
 	disable_bypass_dsp(sch);
 
 	raw_spin_lock_irq(&scx_sched_lock);
@@ -5933,13 +6077,30 @@ static struct scx_sched *find_parent_sched(struct cgroup *cgrp)
 	return parent;
 }
 
+static bool assert_task_ready_or_enabled(struct task_struct *p)
+{
+	enum scx_task_state state = scx_get_task_state(p);
+
+	switch (state) {
+	case SCX_TASK_READY:
+	case SCX_TASK_ENABLED:
+		return true;
+	default:
+		WARN_ONCE(true, "sched_ext: Invalid task state %d for %s[%d] during enabling sub sched",
+			  state, p->comm, p->pid);
+		return false;
+	}
+}
+
 static void scx_sub_enable_workfn(struct kthread_work *work)
 {
 	struct scx_enable_cmd *cmd = container_of(work, struct scx_enable_cmd, work);
 	struct sched_ext_ops *ops = cmd->ops;
 	struct cgroup *cgrp;
 	struct scx_sched *parent, *sch;
-	s32 ret;
+	struct scx_task_iter sti;
+	struct task_struct *p;
+	s32 i, ret;
 
 	mutex_lock(&scx_enable_mutex);
 
@@ -6011,6 +6172,12 @@ static void scx_sub_enable_workfn(struct kthread_work *work)
 	}
 	sch->sub_attached = true;
 
+	scx_bypass(sch, true);
+
+	for (i = SCX_OPI_BEGIN; i < SCX_OPI_END; i++)
+		if (((void (**)(void))ops)[i])
+			set_bit(i, sch->has_op);
+
 	percpu_down_write(&scx_fork_rwsem);
 	scx_cgroup_lock();
 
@@ -6024,16 +6191,121 @@ static void scx_sub_enable_workfn(struct kthread_work *work)
 		goto err_unlock_and_disable;
 	}
 
-	/* TODO - perform actual enabling here */
+	/*
+	 * Initialize tasks for the new child $sch without exiting them for
+	 * $parent so that the tasks can always be reverted back to $parent
+	 * sched on child init failure.
+	 */
+	WARN_ON_ONCE(scx_enabling_sub_sched);
+	scx_enabling_sub_sched = sch;
+
+	scx_task_iter_start(&sti, sch->cgrp);
+	while ((p = scx_task_iter_next_locked(&sti))) {
+		struct rq *rq;
+		struct rq_flags rf;
+
+		/*
+		 * Task iteration may visit the same task twice when racing
+		 * against exiting. Use %SCX_TASK_SUB_INIT to mark tasks which
+		 * finished __scx_init_task() and skip if set.
+		 *
+		 * A task may exit and get freed between __scx_init_task()
+		 * completion and scx_enable_task(). In such cases,
+		 * scx_disable_and_exit_task() must exit the task for both the
+		 * parent and child scheds.
+		 */
+		if (p->scx.flags & SCX_TASK_SUB_INIT)
+			continue;
+
+		/* see scx_root_enable() */
+		if (!tryget_task_struct(p))
+			continue;
+
+		if (!assert_task_ready_or_enabled(p)) {
+			ret = -EINVAL;
+			goto abort;
+		}
+
+		scx_task_iter_unlock(&sti);
+
+		/*
+		 * As $p is still on $parent, it can't be transitioned to INIT.
+		 * Let's worry about task state later. Use __scx_init_task().
+		 */
+		ret = __scx_init_task(sch, p, false);
+		if (ret)
+			goto abort;
+
+		rq = task_rq_lock(p, &rf);
+		p->scx.flags |= SCX_TASK_SUB_INIT;
+		task_rq_unlock(rq, p, &rf);
+
+		put_task_struct(p);
+	}
+	scx_task_iter_stop(&sti);
+
+	/*
+	 * All tasks are prepped. Disable/exit tasks for $parent and enable for
+	 * the new @sch.
+	 */
+	scx_task_iter_start(&sti, sch->cgrp);
+	while ((p = scx_task_iter_next_locked(&sti))) {
+		/*
+		 * Use clearing of %SCX_TASK_SUB_INIT to detect and skip
+		 * duplicate iterations.
+		 */
+		if (!(p->scx.flags & SCX_TASK_SUB_INIT))
+			continue;
+
+		scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
+			/*
+			 * $p must be either READY or ENABLED. If ENABLED,
+			 * __scx_disabled_and_exit_task() first disables and
+			 * makes it READY. However, after exiting $p, it will
+			 * leave $p as READY.
+			 */
+			assert_task_ready_or_enabled(p);
+			__scx_disable_and_exit_task(parent, p);
+
+			/*
+			 * $p is now only initialized for @sch and READY, which
+			 * is what we want. Assign it to @sch and enable.
+			 */
+			rcu_assign_pointer(p->scx.sched, sch);
+			scx_enable_task(sch, p);
+
+			p->scx.flags &= ~SCX_TASK_SUB_INIT;
+		}
+	}
+	scx_task_iter_stop(&sti);
+
+	scx_enabling_sub_sched = NULL;
 
 	scx_cgroup_unlock();
 	percpu_up_write(&scx_fork_rwsem);
 
+	scx_bypass(sch, false);
+
 	pr_info("sched_ext: BPF sub-scheduler \"%s\" enabled\n", sch->ops.name);
 	kobject_uevent(&sch->kobj, KOBJ_ADD);
 	ret = 0;
 	goto out_unlock;
 
+abort:
+	put_task_struct(p);
+	scx_task_iter_stop(&sti);
+	scx_enabling_sub_sched = NULL;
+
+	scx_task_iter_start(&sti, sch->cgrp);
+	while ((p = scx_task_iter_next_locked(&sti))) {
+		if (p->scx.flags & SCX_TASK_SUB_INIT) {
+			__scx_disable_and_exit_task(sch, p);
+			p->scx.flags &= ~SCX_TASK_SUB_INIT;
+		}
+	}
+	scx_task_iter_stop(&sti);
+	scx_cgroup_unlock();
+	percpu_up_write(&scx_fork_rwsem);
 out_put_cgrp:
 	cgroup_put(cgrp);
 out_unlock:
@@ -6042,6 +6314,7 @@ out_unlock:
 	return;
 
 err_unlock_and_disable:
+	/* we'll soon enter disable path, keep bypass on */
 	scx_cgroup_unlock();
 	percpu_up_write(&scx_fork_rwsem);
 err_disable:
-- 
cgit v1.2.3


From 3cd963fa915c494a6d0da0287bd10cb6f2204f9e Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 5 Mar 2026 10:42:57 +0000
Subject: net: stmmac: mdio_bus_data->default_an_inband is boolean

default_an_inband is declared as an unsigned int, but is set to true/
false and is assigned to phylink_config's member of the same name
which is a bool. Declare this also as a bool for consistency.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Tested-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Link: https://patch.msgid.link/E1vy6AT-0000000BtxD-2qm7@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 2fc169c7117e..678d03d6d3bd 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -86,10 +86,10 @@ struct stmmac_priv;
 struct stmmac_mdio_bus_data {
 	unsigned int phy_mask;
 	unsigned int pcs_mask;
-	unsigned int default_an_inband;
 	int *irqs;
 	int probed_phy_irq;
 	bool needs_reset;
+	bool default_an_inband;
 };
 
 struct stmmac_dma_cfg {
-- 
cgit v1.2.3


From e4fd855c52ec5af34d920206190be29919fadca3 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 5 Mar 2026 10:43:02 +0000
Subject: net: stmmac: make pcs_mask and phy_mask u32

The PCS and PHY masks are passed to the mdio bus layer as phy_mask
to prevent bus addresses between 0 and 31 inclusive being scanned,
and this is declared as u32. Also declare these as u32 in stmmac
for type consistency.

Since this is a u32, use BIT_U32() rather than BIT() to generate
values for these fields.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Tested-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Link: https://patch.msgid.link/E1vy6AY-0000000BtxJ-3smT@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c    | 2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 2 +-
 include/linux/stmmac.h                               | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index ece2a0c38562..fc13bfb47783 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -699,7 +699,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	/* Intel mgbe SGMII interface uses pcs-xcps */
 	if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII ||
 	    plat->phy_interface == PHY_INTERFACE_MODE_1000BASEX) {
-		plat->mdio_bus_data->pcs_mask = BIT(INTEL_MGBE_XPCS_ADDR);
+		plat->mdio_bus_data->pcs_mask = BIT_U32(INTEL_MGBE_XPCS_ADDR);
 		plat->mdio_bus_data->default_an_inband = true;
 		plat->select_pcs = intel_mgbe_select_pcs;
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
index b913fe5af488..ada6c6ef1f5c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
@@ -168,7 +168,7 @@ static int loongson_gnet_data(struct pci_dev *pdev,
 	loongson_default_data(pdev, plat);
 
 	plat->phy_interface = PHY_INTERFACE_MODE_GMII;
-	plat->mdio_bus_data->phy_mask = ~(u32)BIT(2);
+	plat->mdio_bus_data->phy_mask = ~BIT_U32(2);
 	plat->fix_mac_speed = loongson_gnet_fix_speed;
 
 	return 0;
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 678d03d6d3bd..965ada809fdf 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -84,8 +84,8 @@ struct stmmac_priv;
 /* Platfrom data for platform device structure's platform_data field */
 
 struct stmmac_mdio_bus_data {
-	unsigned int phy_mask;
-	unsigned int pcs_mask;
+	u32 phy_mask;
+	u32 pcs_mask;
 	int *irqs;
 	int probed_phy_irq;
 	bool needs_reset;
-- 
cgit v1.2.3


From 260d27b3aec9f30d68f9f3cacc674655897eb745 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 4 Mar 2026 21:17:28 +0100
Subject: net: phy: remove phy_attach

378e6523ebb1 ("net: bcmgenet: remove unused platform code") removed
the last user of phy_attach(). So remove this function.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/8812176a-e319-4e9f-815d-99ea339df8b2@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/phy_device.c | 38 --------------------------------------
 include/linux/phy.h          |  2 --
 2 files changed, 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 3bd415710bf3..d1cbcfc3d2a6 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1895,44 +1895,6 @@ error_put_device:
 }
 EXPORT_SYMBOL(phy_attach_direct);
 
-/**
- * phy_attach - attach a network device to a particular PHY device
- * @dev: network device to attach
- * @bus_id: Bus ID of PHY device to attach
- * @interface: PHY device's interface
- *
- * Description: Same as phy_attach_direct() except that a PHY bus_id
- *     string is passed instead of a pointer to a struct phy_device.
- */
-struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
-			      phy_interface_t interface)
-{
-	struct phy_device *phydev;
-	struct device *d;
-	int rc;
-
-	if (!dev)
-		return ERR_PTR(-EINVAL);
-
-	/* Search the list of PHY devices on the mdio bus for the
-	 * PHY with the requested name
-	 */
-	d = bus_find_device_by_name(&mdio_bus_type, NULL, bus_id);
-	if (!d) {
-		pr_err("PHY %s not found\n", bus_id);
-		return ERR_PTR(-ENODEV);
-	}
-	phydev = to_phy_device(d);
-
-	rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface);
-	put_device(d);
-	if (rc)
-		return ERR_PTR(rc);
-
-	return phydev;
-}
-EXPORT_SYMBOL(phy_attach);
-
 /**
  * phy_detach - detach a PHY device from its network device
  * @phydev: target phy_device struct
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 6f9979a26892..e9b0d7427b0e 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -2152,8 +2152,6 @@ int phy_suspend(struct phy_device *phydev);
 int phy_resume(struct phy_device *phydev);
 int __phy_resume(struct phy_device *phydev);
 int phy_loopback(struct phy_device *phydev, bool enable, int speed);
-struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
-			      phy_interface_t interface);
 struct phy_device *phy_find_next(struct mii_bus *bus, struct phy_device *pos);
 int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 		      u32 flags, phy_interface_t interface);
-- 
cgit v1.2.3


From c9429bf56405a326845a8a35357b5bdf1dc4558c Mon Sep 17 00:00:00 2001
From: Mykyta Yatsenko <yatsenko@meta.com>
Date: Tue, 24 Feb 2026 19:29:54 +0000
Subject: rhashtable: consolidate hash computation in rht_key_get_hash()

The else-if and else branches in rht_key_get_hash() both compute a hash
using either params.hashfn or jhash, differing only in the source of
key_len (params.key_len vs ht->p.key_len). Merge the two branches into
one by using the ternary `params.key_len ?: ht->p.key_len` to select
the key length, removing the duplicated logic.

This also improves the performance of the else branch which previously
always used jhash and never fell through to jhash2. This branch is going
to be used by BPF resizable hashmap, which wraps rhashtable:
https://lore.kernel.org/bpf/20260205-rhash-v1-0-30dd6d63c462@meta.com/

Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/rhashtable.h | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 133ccb39137a..0480509a6339 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -129,10 +129,10 @@ static __always_inline unsigned int rht_key_get_hash(struct rhashtable *ht,
 	unsigned int hash;
 
 	/* params must be equal to ht->p if it isn't constant. */
-	if (!__builtin_constant_p(params.key_len))
+	if (!__builtin_constant_p(params.key_len)) {
 		hash = ht->p.hashfn(key, ht->key_len, hash_rnd);
-	else if (params.key_len) {
-		unsigned int key_len = params.key_len;
+	} else {
+		unsigned int key_len = params.key_len ? : ht->p.key_len;
 
 		if (params.hashfn)
 			hash = params.hashfn(key, key_len, hash_rnd);
@@ -140,13 +140,6 @@ static __always_inline unsigned int rht_key_get_hash(struct rhashtable *ht,
 			hash = jhash(key, key_len, hash_rnd);
 		else
 			hash = jhash2(key, key_len / sizeof(u32), hash_rnd);
-	} else {
-		unsigned int key_len = ht->p.key_len;
-
-		if (params.hashfn)
-			hash = params.hashfn(key, key_len, hash_rnd);
-		else
-			hash = jhash(key, key_len, hash_rnd);
 	}
 
 	return hash;
-- 
cgit v1.2.3


From 30b0515342db48ac9ffd9999648de0f7ca1d6a87 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 7 Mar 2026 05:29:50 -1000
Subject: sched_ext: Add per-CPU data to DSQs

Add per-CPU data structure to dispatch queues. Each DSQ now has a percpu
scx_dsq_pcpu which contains a back-pointer to the DSQ. This will be used by
future changes to implement per-CPU reenqueue tracking for user DSQs.

init_dsq() now allocates the percpu data and can fail, so it returns an
error code. All callers are updated to handle failures. exit_dsq() is added
to free the percpu data and is called from all DSQ cleanup paths.

In scx_bpf_create_dsq(), init_dsq() is called before rcu_read_lock() since
alloc_percpu() requires GFP_KERNEL context, and dsq->sched is set
afterwards.

v2: Fix err_free_pcpu to only exit_dsq() initialized bypass DSQs (Andrea
    Righi).

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
---
 include/linux/sched/ext.h |  5 +++
 kernel/sched/ext.c        | 87 +++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 77 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index f354d7d34306..98cc1f41b91e 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -62,6 +62,10 @@ enum scx_dsq_id_flags {
 	SCX_DSQ_LOCAL_CPU_MASK	= 0xffffffffLLU,
 };
 
+struct scx_dsq_pcpu {
+	struct scx_dispatch_q	*dsq;
+};
+
 /*
  * A dispatch queue (DSQ) can be either a FIFO or p->scx.dsq_vtime ordered
  * queue. A built-in DSQ is always a FIFO. The built-in local DSQs are used to
@@ -79,6 +83,7 @@ struct scx_dispatch_q {
 	struct rhash_head	hash_node;
 	struct llist_node	free_node;
 	struct scx_sched	*sched;
+	struct scx_dsq_pcpu __percpu *pcpu;
 	struct rcu_head		rcu;
 };
 
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index d8ea12ddc206..aea09eb36873 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4020,15 +4020,42 @@ DEFINE_SCHED_CLASS(ext) = {
 #endif
 };
 
-static void init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id,
-		     struct scx_sched *sch)
+static s32 init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id,
+		    struct scx_sched *sch)
 {
+	s32 cpu;
+
 	memset(dsq, 0, sizeof(*dsq));
 
 	raw_spin_lock_init(&dsq->lock);
 	INIT_LIST_HEAD(&dsq->list);
 	dsq->id = dsq_id;
 	dsq->sched = sch;
+
+	dsq->pcpu = alloc_percpu(struct scx_dsq_pcpu);
+	if (!dsq->pcpu)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		struct scx_dsq_pcpu *pcpu = per_cpu_ptr(dsq->pcpu, cpu);
+
+		pcpu->dsq = dsq;
+	}
+
+	return 0;
+}
+
+static void exit_dsq(struct scx_dispatch_q *dsq)
+{
+	free_percpu(dsq->pcpu);
+}
+
+static void free_dsq_rcufn(struct rcu_head *rcu)
+{
+	struct scx_dispatch_q *dsq = container_of(rcu, struct scx_dispatch_q, rcu);
+
+	exit_dsq(dsq);
+	kfree(dsq);
 }
 
 static void free_dsq_irq_workfn(struct irq_work *irq_work)
@@ -4037,7 +4064,7 @@ static void free_dsq_irq_workfn(struct irq_work *irq_work)
 	struct scx_dispatch_q *dsq, *tmp_dsq;
 
 	llist_for_each_entry_safe(dsq, tmp_dsq, to_free, free_node)
-		kfree_rcu(dsq, rcu);
+		call_rcu(&dsq->rcu, free_dsq_rcufn);
 }
 
 static DEFINE_IRQ_WORK(free_dsq_irq_work, free_dsq_irq_workfn);
@@ -4234,15 +4261,17 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
 		cgroup_put(sch_cgroup(sch));
 #endif	/* CONFIG_EXT_SUB_SCHED */
 
-	/*
-	 * $sch would have entered bypass mode before the RCU grace period. As
-	 * that blocks new deferrals, all deferred_reenq_local_node's must be
-	 * off-list by now.
-	 */
 	for_each_possible_cpu(cpu) {
 		struct scx_sched_pcpu *pcpu = per_cpu_ptr(sch->pcpu, cpu);
 
+		/*
+		 * $sch would have entered bypass mode before the RCU grace
+		 * period. As that blocks new deferrals, all
+		 * deferred_reenq_local_node's must be off-list by now.
+		 */
 		WARN_ON_ONCE(!list_empty(&pcpu->deferred_reenq_local.node));
+
+		exit_dsq(bypass_dsq(sch, cpu));
 	}
 
 	free_percpu(sch->pcpu);
@@ -5787,6 +5816,9 @@ static int alloc_kick_syncs(void)
 
 static void free_pnode(struct scx_sched_pnode *pnode)
 {
+	if (!pnode)
+		return;
+	exit_dsq(&pnode->global_dsq);
 	kfree(pnode);
 }
 
@@ -5798,7 +5830,10 @@ static struct scx_sched_pnode *alloc_pnode(struct scx_sched *sch, int node)
 	if (!pnode)
 		return NULL;
 
-	init_dsq(&pnode->global_dsq, SCX_DSQ_GLOBAL, sch);
+	if (init_dsq(&pnode->global_dsq, SCX_DSQ_GLOBAL, sch)) {
+		kfree(pnode);
+		return NULL;
+	}
 
 	return pnode;
 }
@@ -5809,7 +5844,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
 {
 	struct scx_sched *sch;
 	s32 level = parent ? parent->level + 1 : 0;
-	s32 node, cpu, ret;
+	s32 node, cpu, ret, bypass_fail_cpu = nr_cpu_ids;
 
 	sch = kzalloc_flex(*sch, ancestors, level);
 	if (!sch)
@@ -5848,8 +5883,13 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
 		goto err_free_pnode;
 	}
 
-	for_each_possible_cpu(cpu)
-		init_dsq(bypass_dsq(sch, cpu), SCX_DSQ_BYPASS, sch);
+	for_each_possible_cpu(cpu) {
+		ret = init_dsq(bypass_dsq(sch, cpu), SCX_DSQ_BYPASS, sch);
+		if (ret) {
+			bypass_fail_cpu = cpu;
+			goto err_free_pcpu;
+		}
+	}
 
 	for_each_possible_cpu(cpu) {
 		struct scx_sched_pcpu *pcpu = per_cpu_ptr(sch->pcpu, cpu);
@@ -5931,6 +5971,11 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
 err_stop_helper:
 	kthread_destroy_worker(sch->helper);
 err_free_pcpu:
+	for_each_possible_cpu(cpu) {
+		if (cpu == bypass_fail_cpu)
+			break;
+		exit_dsq(bypass_dsq(sch, cpu));
+	}
 	free_percpu(sch->pcpu);
 err_free_pnode:
 	for_each_node_state(node, N_POSSIBLE)
@@ -7173,7 +7218,7 @@ void __init init_sched_ext_class(void)
 		int  n = cpu_to_node(cpu);
 
 		/* local_dsq's sch will be set during scx_root_enable() */
-		init_dsq(&rq->scx.local_dsq, SCX_DSQ_LOCAL, NULL);
+		BUG_ON(init_dsq(&rq->scx.local_dsq, SCX_DSQ_LOCAL, NULL));
 
 		INIT_LIST_HEAD(&rq->scx.runnable_list);
 		INIT_LIST_HEAD(&rq->scx.ddsp_deferred_locals);
@@ -7872,11 +7917,21 @@ __bpf_kfunc s32 scx_bpf_create_dsq(u64 dsq_id, s32 node, const struct bpf_prog_a
 	if (!dsq)
 		return -ENOMEM;
 
+	/*
+	 * init_dsq() must be called in GFP_KERNEL context. Init it with NULL
+	 * @sch and update afterwards.
+	 */
+	ret = init_dsq(dsq, dsq_id, NULL);
+	if (ret) {
+		kfree(dsq);
+		return ret;
+	}
+
 	rcu_read_lock();
 
 	sch = scx_prog_sched(aux);
 	if (sch) {
-		init_dsq(dsq, dsq_id, sch);
+		dsq->sched = sch;
 		ret = rhashtable_lookup_insert_fast(&sch->dsq_hash, &dsq->hash_node,
 						    dsq_hash_params);
 	} else {
@@ -7884,8 +7939,10 @@ __bpf_kfunc s32 scx_bpf_create_dsq(u64 dsq_id, s32 node, const struct bpf_prog_a
 	}
 
 	rcu_read_unlock();
-	if (ret)
+	if (ret) {
+		exit_dsq(dsq);
 		kfree(dsq);
+	}
 	return ret;
 }
 
-- 
cgit v1.2.3


From 35250720d6ed1e83e0d1e12b7e8bf7b8316d7d58 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 7 Mar 2026 05:29:50 -1000
Subject: sched_ext: Factor out nldsq_cursor_next_task() and
 nldsq_cursor_lost_task()

Factor out cursor-based DSQ iteration from bpf_iter_scx_dsq_next() into
nldsq_cursor_next_task() and the task-lost check from scx_dsq_move() into
nldsq_cursor_lost_task() to prepare for reuse.

As ->priv is only used to record dsq->seq for cursors, update
INIT_DSQ_LIST_CURSOR() to take the DSQ pointer and set ->priv from dsq->seq
so that users don't have to read it manually. Move scx_dsq_iter_flags enum
earlier so nldsq_cursor_next_task() can use SCX_DSQ_ITER_REV.

bypass_lb_cpu() now sets cursor.priv to dsq->seq but doesn't use it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
---
 include/linux/sched/ext.h |   6 +-
 kernel/sched/ext.c        | 154 +++++++++++++++++++++++++++++-----------------
 2 files changed, 102 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 98cc1f41b91e..303f57dfb947 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -157,11 +157,11 @@ struct scx_dsq_list_node {
 	u32			priv;		/* can be used by iter cursor */
 };
 
-#define INIT_DSQ_LIST_CURSOR(__node, __flags, __priv)				\
+#define INIT_DSQ_LIST_CURSOR(__cursor, __dsq, __flags)				\
 	(struct scx_dsq_list_node) {						\
-		.node = LIST_HEAD_INIT((__node).node),				\
+		.node = LIST_HEAD_INIT((__cursor).node),			\
 		.flags = SCX_DSQ_LNODE_ITER_CURSOR | (__flags),			\
-		.priv = (__priv),						\
+		.priv = READ_ONCE((__dsq)->seq),				\
 	}
 
 struct scx_sched;
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index aea09eb36873..f51e4c20cd95 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -570,9 +570,22 @@ static __always_inline bool scx_kf_allowed_on_arg_tasks(struct scx_sched *sch,
 	return true;
 }
 
+enum scx_dsq_iter_flags {
+	/* iterate in the reverse dispatch order */
+	SCX_DSQ_ITER_REV		= 1U << 16,
+
+	__SCX_DSQ_ITER_HAS_SLICE	= 1U << 30,
+	__SCX_DSQ_ITER_HAS_VTIME	= 1U << 31,
+
+	__SCX_DSQ_ITER_USER_FLAGS	= SCX_DSQ_ITER_REV,
+	__SCX_DSQ_ITER_ALL_FLAGS	= __SCX_DSQ_ITER_USER_FLAGS |
+					  __SCX_DSQ_ITER_HAS_SLICE |
+					  __SCX_DSQ_ITER_HAS_VTIME,
+};
+
 /**
  * nldsq_next_task - Iterate to the next task in a non-local DSQ
- * @dsq: user dsq being iterated
+ * @dsq: non-local dsq being iterated
  * @cur: current position, %NULL to start iteration
  * @rev: walk backwards
  *
@@ -612,6 +625,85 @@ static struct task_struct *nldsq_next_task(struct scx_dispatch_q *dsq,
 	for ((p) = nldsq_next_task((dsq), NULL, false); (p);			\
 	     (p) = nldsq_next_task((dsq), (p), false))
 
+/**
+ * nldsq_cursor_next_task - Iterate to the next task given a cursor in a non-local DSQ
+ * @cursor: scx_dsq_list_node initialized with INIT_DSQ_LIST_CURSOR()
+ * @dsq: non-local dsq being iterated
+ *
+ * Find the next task in a cursor based iteration. The caller must have
+ * initialized @cursor using INIT_DSQ_LIST_CURSOR() and can release the DSQ lock
+ * between the iteration steps.
+ *
+ * Only tasks which were queued before @cursor was initialized are visible. This
+ * bounds the iteration and guarantees that vtime never jumps in the other
+ * direction while iterating.
+ */
+static struct task_struct *nldsq_cursor_next_task(struct scx_dsq_list_node *cursor,
+						  struct scx_dispatch_q *dsq)
+{
+	bool rev = cursor->flags & SCX_DSQ_ITER_REV;
+	struct task_struct *p;
+
+	lockdep_assert_held(&dsq->lock);
+	BUG_ON(!(cursor->flags & SCX_DSQ_LNODE_ITER_CURSOR));
+
+	if (list_empty(&cursor->node))
+		p = NULL;
+	else
+		p = container_of(cursor, struct task_struct, scx.dsq_list);
+
+	/* skip cursors and tasks that were queued after @cursor init */
+	do {
+		p = nldsq_next_task(dsq, p, rev);
+	} while (p && unlikely(u32_before(cursor->priv, p->scx.dsq_seq)));
+
+	if (p) {
+		if (rev)
+			list_move_tail(&cursor->node, &p->scx.dsq_list.node);
+		else
+			list_move(&cursor->node, &p->scx.dsq_list.node);
+	} else {
+		list_del_init(&cursor->node);
+	}
+
+	return p;
+}
+
+/**
+ * nldsq_cursor_lost_task - Test whether someone else took the task since iteration
+ * @cursor: scx_dsq_list_node initialized with INIT_DSQ_LIST_CURSOR()
+ * @rq: rq @p was on
+ * @dsq: dsq @p was on
+ * @p: target task
+ *
+ * @p is a task returned by nldsq_cursor_next_task(). The locks may have been
+ * dropped and re-acquired inbetween. Verify that no one else took or is in the
+ * process of taking @p from @dsq.
+ *
+ * On %false return, the caller can assume full ownership of @p.
+ */
+static bool nldsq_cursor_lost_task(struct scx_dsq_list_node *cursor,
+				   struct rq *rq, struct scx_dispatch_q *dsq,
+				   struct task_struct *p)
+{
+	lockdep_assert_rq_held(rq);
+	lockdep_assert_held(&dsq->lock);
+
+	/*
+	 * @p could have already left $src_dsq, got re-enqueud, or be in the
+	 * process of being consumed by someone else.
+	 */
+	if (unlikely(p->scx.dsq != dsq ||
+		     u32_before(cursor->priv, p->scx.dsq_seq) ||
+		     p->scx.holding_cpu >= 0))
+		return true;
+
+	/* if @p has stayed on @dsq, its rq couldn't have changed */
+	if (WARN_ON_ONCE(rq != task_rq(p)))
+		return true;
+
+	return false;
+}
 
 /*
  * BPF DSQ iterator. Tasks in a non-local DSQ can be iterated in [reverse]
@@ -619,19 +711,6 @@ static struct task_struct *nldsq_next_task(struct scx_dispatch_q *dsq,
  * changes without breaking backward compatibility. Can be used with
  * bpf_for_each(). See bpf_iter_scx_dsq_*().
  */
-enum scx_dsq_iter_flags {
-	/* iterate in the reverse dispatch order */
-	SCX_DSQ_ITER_REV		= 1U << 16,
-
-	__SCX_DSQ_ITER_HAS_SLICE	= 1U << 30,
-	__SCX_DSQ_ITER_HAS_VTIME	= 1U << 31,
-
-	__SCX_DSQ_ITER_USER_FLAGS	= SCX_DSQ_ITER_REV,
-	__SCX_DSQ_ITER_ALL_FLAGS	= __SCX_DSQ_ITER_USER_FLAGS |
-					  __SCX_DSQ_ITER_HAS_SLICE |
-					  __SCX_DSQ_ITER_HAS_VTIME,
-};
-
 struct bpf_iter_scx_dsq_kern {
 	struct scx_dsq_list_node	cursor;
 	struct scx_dispatch_q		*dsq;
@@ -4497,7 +4576,7 @@ static u32 bypass_lb_cpu(struct scx_sched *sch, s32 donor,
 	struct rq *donor_rq = cpu_rq(donor);
 	struct scx_dispatch_q *donor_dsq = bypass_dsq(sch, donor);
 	struct task_struct *p, *n;
-	struct scx_dsq_list_node cursor = INIT_DSQ_LIST_CURSOR(cursor, 0, 0);
+	struct scx_dsq_list_node cursor = INIT_DSQ_LIST_CURSOR(cursor, donor_dsq, 0);
 	s32 delta = READ_ONCE(donor_dsq->nr) - nr_donor_target;
 	u32 nr_balanced = 0, min_delta_us;
 
@@ -7542,14 +7621,8 @@ static bool scx_dsq_move(struct bpf_iter_scx_dsq_kern *kit,
 	locked_rq = src_rq;
 	raw_spin_lock(&src_dsq->lock);
 
-	/*
-	 * Did someone else get to it? @p could have already left $src_dsq, got
-	 * re-enqueud, or be in the process of being consumed by someone else.
-	 */
-	if (unlikely(p->scx.dsq != src_dsq ||
-		     u32_before(kit->cursor.priv, p->scx.dsq_seq) ||
-		     p->scx.holding_cpu >= 0) ||
-	    WARN_ON_ONCE(src_rq != task_rq(p))) {
+	/* did someone else get to it while we dropped the locks? */
+	if (nldsq_cursor_lost_task(&kit->cursor, src_rq, src_dsq, p)) {
 		raw_spin_unlock(&src_dsq->lock);
 		goto out;
 	}
@@ -8188,8 +8261,7 @@ __bpf_kfunc int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id,
 	if (!kit->dsq)
 		return -ENOENT;
 
-	kit->cursor = INIT_DSQ_LIST_CURSOR(kit->cursor, flags,
-					   READ_ONCE(kit->dsq->seq));
+	kit->cursor = INIT_DSQ_LIST_CURSOR(kit->cursor, kit->dsq, flags);
 
 	return 0;
 }
@@ -8203,41 +8275,13 @@ __bpf_kfunc int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id,
 __bpf_kfunc struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it)
 {
 	struct bpf_iter_scx_dsq_kern *kit = (void *)it;
-	bool rev = kit->cursor.flags & SCX_DSQ_ITER_REV;
-	struct task_struct *p;
-	unsigned long flags;
 
 	if (!kit->dsq)
 		return NULL;
 
-	raw_spin_lock_irqsave(&kit->dsq->lock, flags);
+	guard(raw_spinlock_irqsave)(&kit->dsq->lock);
 
-	if (list_empty(&kit->cursor.node))
-		p = NULL;
-	else
-		p = container_of(&kit->cursor, struct task_struct, scx.dsq_list);
-
-	/*
-	 * Only tasks which were queued before the iteration started are
-	 * visible. This bounds BPF iterations and guarantees that vtime never
-	 * jumps in the other direction while iterating.
-	 */
-	do {
-		p = nldsq_next_task(kit->dsq, p, rev);
-	} while (p && unlikely(u32_before(kit->cursor.priv, p->scx.dsq_seq)));
-
-	if (p) {
-		if (rev)
-			list_move_tail(&kit->cursor.node, &p->scx.dsq_list.node);
-		else
-			list_move(&kit->cursor.node, &p->scx.dsq_list.node);
-	} else {
-		list_del_init(&kit->cursor.node);
-	}
-
-	raw_spin_unlock_irqrestore(&kit->dsq->lock, flags);
-
-	return p;
+	return nldsq_cursor_next_task(&kit->cursor, kit->dsq);
 }
 
 /**
-- 
cgit v1.2.3


From 84b1a0ea0b7c23dec240783a592e480780efe459 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 7 Mar 2026 05:29:50 -1000
Subject: sched_ext: Implement scx_bpf_dsq_reenq() for user DSQs

scx_bpf_dsq_reenq() currently only supports local DSQs. Extend it to support
user-defined DSQs by adding a deferred re-enqueue mechanism similar to the
local DSQ handling.

Add per-cpu deferred_reenq_user_node/flags to scx_dsq_pcpu and
deferred_reenq_users list to scx_rq. When scx_bpf_dsq_reenq() is called on a
user DSQ, the DSQ's per-cpu node is added to the current rq's deferred list.
process_deferred_reenq_users() then iterates the DSQ using the cursor helpers
and re-enqueues each task.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
---
 include/linux/sched/ext.h      |   6 ++
 kernel/sched/ext.c             | 128 +++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h           |   1 +
 tools/sched_ext/scx_qmap.bpf.c |  57 +++++++++++++++++-
 4 files changed, 190 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 303f57dfb947..e77504faa0bc 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -62,8 +62,14 @@ enum scx_dsq_id_flags {
 	SCX_DSQ_LOCAL_CPU_MASK	= 0xffffffffLLU,
 };
 
+struct scx_deferred_reenq_user {
+	struct list_head	node;
+	u64			flags;
+};
+
 struct scx_dsq_pcpu {
 	struct scx_dispatch_q	*dsq;
+	struct scx_deferred_reenq_user deferred_reenq_user;
 };
 
 /*
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index f51e4c20cd95..805c6689c99a 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1180,6 +1180,18 @@ static void schedule_dsq_reenq(struct scx_sched *sch, struct scx_dispatch_q *dsq
 			drl->flags |= reenq_flags;
 		}
 
+		schedule_deferred(rq);
+	} else if (!(dsq->id & SCX_DSQ_FLAG_BUILTIN)) {
+		struct rq *rq = this_rq();
+		struct scx_dsq_pcpu *dsq_pcpu = per_cpu_ptr(dsq->pcpu, cpu_of(rq));
+		struct scx_deferred_reenq_user *dru = &dsq_pcpu->deferred_reenq_user;
+
+		scoped_guard (raw_spinlock_irqsave, &rq->scx.deferred_reenq_lock) {
+			if (list_empty(&dru->node))
+				list_move_tail(&dru->node, &rq->scx.deferred_reenq_users);
+			dru->flags |= reenq_flags;
+		}
+
 		schedule_deferred(rq);
 	} else {
 		scx_error(sch, "DSQ 0x%llx not allowed for reenq", dsq->id);
@@ -3784,12 +3796,108 @@ static void process_deferred_reenq_locals(struct rq *rq)
 	}
 }
 
+static void reenq_user(struct rq *rq, struct scx_dispatch_q *dsq, u64 reenq_flags)
+{
+	struct rq *locked_rq = rq;
+	struct scx_sched *sch = dsq->sched;
+	struct scx_dsq_list_node cursor = INIT_DSQ_LIST_CURSOR(cursor, dsq, 0);
+	struct task_struct *p;
+	s32 nr_enqueued = 0;
+
+	lockdep_assert_rq_held(rq);
+
+	raw_spin_lock(&dsq->lock);
+
+	while (likely(!READ_ONCE(sch->bypass_depth))) {
+		struct rq *task_rq;
+
+		p = nldsq_cursor_next_task(&cursor, dsq);
+		if (!p)
+			break;
+
+		if (!task_should_reenq(p, reenq_flags))
+			continue;
+
+		task_rq = task_rq(p);
+
+		if (locked_rq != task_rq) {
+			if (locked_rq)
+				raw_spin_rq_unlock(locked_rq);
+			if (unlikely(!raw_spin_rq_trylock(task_rq))) {
+				raw_spin_unlock(&dsq->lock);
+				raw_spin_rq_lock(task_rq);
+				raw_spin_lock(&dsq->lock);
+			}
+			locked_rq = task_rq;
+
+			/* did we lose @p while switching locks? */
+			if (nldsq_cursor_lost_task(&cursor, task_rq, dsq, p))
+				continue;
+		}
+
+		/* @p is on @dsq, its rq and @dsq are locked */
+		dispatch_dequeue_locked(p, dsq);
+		raw_spin_unlock(&dsq->lock);
+		do_enqueue_task(task_rq, p, SCX_ENQ_REENQ, -1);
+
+		if (!(++nr_enqueued % SCX_TASK_ITER_BATCH)) {
+			raw_spin_rq_unlock(locked_rq);
+			locked_rq = NULL;
+			cpu_relax();
+		}
+
+		raw_spin_lock(&dsq->lock);
+	}
+
+	list_del_init(&cursor.node);
+	raw_spin_unlock(&dsq->lock);
+
+	if (locked_rq != rq) {
+		if (locked_rq)
+			raw_spin_rq_unlock(locked_rq);
+		raw_spin_rq_lock(rq);
+	}
+}
+
+static void process_deferred_reenq_users(struct rq *rq)
+{
+	lockdep_assert_rq_held(rq);
+
+	while (true) {
+		struct scx_dispatch_q *dsq;
+		u64 reenq_flags = 0;
+
+		scoped_guard (raw_spinlock, &rq->scx.deferred_reenq_lock) {
+			struct scx_deferred_reenq_user *dru =
+				list_first_entry_or_null(&rq->scx.deferred_reenq_users,
+							 struct scx_deferred_reenq_user,
+							 node);
+			struct scx_dsq_pcpu *dsq_pcpu;
+
+			if (!dru)
+				return;
+
+			dsq_pcpu = container_of(dru, struct scx_dsq_pcpu,
+						deferred_reenq_user);
+			dsq = dsq_pcpu->dsq;
+			swap(dru->flags, reenq_flags);
+			list_del_init(&dru->node);
+		}
+
+		BUG_ON(dsq->id & SCX_DSQ_FLAG_BUILTIN);
+		reenq_user(rq, dsq, reenq_flags);
+	}
+}
+
 static void run_deferred(struct rq *rq)
 {
 	process_ddsp_deferred_locals(rq);
 
 	if (!list_empty(&rq->scx.deferred_reenq_locals))
 		process_deferred_reenq_locals(rq);
+
+	if (!list_empty(&rq->scx.deferred_reenq_users))
+		process_deferred_reenq_users(rq);
 }
 
 #ifdef CONFIG_NO_HZ_FULL
@@ -4119,6 +4227,7 @@ static s32 init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id,
 		struct scx_dsq_pcpu *pcpu = per_cpu_ptr(dsq->pcpu, cpu);
 
 		pcpu->dsq = dsq;
+		INIT_LIST_HEAD(&pcpu->deferred_reenq_user.node);
 	}
 
 	return 0;
@@ -4126,6 +4235,23 @@ static s32 init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id,
 
 static void exit_dsq(struct scx_dispatch_q *dsq)
 {
+	s32 cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct scx_dsq_pcpu *pcpu = per_cpu_ptr(dsq->pcpu, cpu);
+		struct scx_deferred_reenq_user *dru = &pcpu->deferred_reenq_user;
+		struct rq *rq = cpu_rq(cpu);
+
+		/*
+		 * There must have been a RCU grace period since the last
+		 * insertion and @dsq should be off the deferred list by now.
+		 */
+		if (WARN_ON_ONCE(!list_empty(&dru->node))) {
+			guard(raw_spinlock_irqsave)(&rq->scx.deferred_reenq_lock);
+			list_del_init(&dru->node);
+		}
+	}
+
 	free_percpu(dsq->pcpu);
 }
 
@@ -7308,6 +7434,7 @@ void __init init_sched_ext_class(void)
 		BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_wait, GFP_KERNEL, n));
 		raw_spin_lock_init(&rq->scx.deferred_reenq_lock);
 		INIT_LIST_HEAD(&rq->scx.deferred_reenq_locals);
+		INIT_LIST_HEAD(&rq->scx.deferred_reenq_users);
 		rq->scx.deferred_irq_work = IRQ_WORK_INIT_HARD(deferred_irq_workfn);
 		rq->scx.kick_cpus_irq_work = IRQ_WORK_INIT_HARD(kick_cpus_irq_workfn);
 
@@ -8354,6 +8481,7 @@ __bpf_kfunc struct task_struct *scx_bpf_dsq_peek(u64 dsq_id,
  * supported:
  *
  * - Local DSQs (%SCX_DSQ_LOCAL or %SCX_DSQ_LOCAL_ON | $cpu)
+ * - User DSQs
  *
  * Re-enqueues are performed asynchronously. Can be called from anywhere.
  */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0794852524e7..893f89ce2a77 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -810,6 +810,7 @@ struct scx_rq {
 
 	raw_spinlock_t		deferred_reenq_lock;
 	struct list_head	deferred_reenq_locals;	/* scheds requesting reenq of local DSQ */
+	struct list_head	deferred_reenq_users;	/* user DSQs requesting reenq */
 	struct balance_callback	deferred_bal_cb;
 	struct irq_work		deferred_irq_work;
 	struct irq_work		kick_cpus_irq_work;
diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
index 83e8289e8c0c..a4a1b84fe359 100644
--- a/tools/sched_ext/scx_qmap.bpf.c
+++ b/tools/sched_ext/scx_qmap.bpf.c
@@ -26,8 +26,11 @@
 
 enum consts {
 	ONE_SEC_IN_NS		= 1000000000,
+	ONE_MSEC_IN_NS		= 1000000,
+	LOWPRI_INTV_NS		= 10 * ONE_MSEC_IN_NS,
 	SHARED_DSQ		= 0,
 	HIGHPRI_DSQ		= 1,
+	LOWPRI_DSQ		= 2,
 	HIGHPRI_WEIGHT		= 8668,		/* this is what -20 maps to */
 };
 
@@ -172,6 +175,9 @@ s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p,
 	if (!(tctx = lookup_task_ctx(p)))
 		return -ESRCH;
 
+	if (p->scx.weight < 2 && !(p->flags & PF_KTHREAD))
+		return prev_cpu;
+
 	cpu = pick_direct_dispatch_cpu(p, prev_cpu);
 
 	if (cpu >= 0) {
@@ -242,6 +248,13 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
 		return;
 	}
 
+	/* see lowpri_timerfn() */
+	if (__COMPAT_has_generic_reenq() &&
+	    p->scx.weight < 2 && !(p->flags & PF_KTHREAD) && !(enq_flags & SCX_ENQ_REENQ)) {
+		scx_bpf_dsq_insert(p, LOWPRI_DSQ, slice_ns, enq_flags);
+		return;
+	}
+
 	/* if select_cpu() wasn't called, try direct dispatch */
 	if (!__COMPAT_is_enq_cpu_selected(enq_flags) &&
 	    (cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) {
@@ -873,6 +886,28 @@ static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
 	return 0;
 }
 
+struct lowpri_timer {
+	struct bpf_timer timer;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, u32);
+	__type(value, struct lowpri_timer);
+} lowpri_timer SEC(".maps");
+
+/*
+ * Nice 19 tasks are put into the lowpri DSQ. Every 10ms, reenq is triggered and
+ * the tasks are transferred to SHARED_DSQ.
+ */
+static int lowpri_timerfn(void *map, int *key, struct bpf_timer *timer)
+{
+	scx_bpf_dsq_reenq(LOWPRI_DSQ, 0);
+	bpf_timer_start(timer, LOWPRI_INTV_NS, 0);
+	return 0;
+}
+
 s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init)
 {
 	u32 key = 0;
@@ -894,14 +929,32 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init)
 		return ret;
 	}
 
+	ret = scx_bpf_create_dsq(LOWPRI_DSQ, -1);
+	if (ret)
+		return ret;
+
 	timer = bpf_map_lookup_elem(&monitor_timer, &key);
 	if (!timer)
 		return -ESRCH;
-
 	bpf_timer_init(timer, &monitor_timer, CLOCK_MONOTONIC);
 	bpf_timer_set_callback(timer, monitor_timerfn);
+	ret = bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
+	if (ret)
+		return ret;
 
-	return bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
+	if (__COMPAT_has_generic_reenq()) {
+		/* see lowpri_timerfn() */
+		timer = bpf_map_lookup_elem(&lowpri_timer, &key);
+		if (!timer)
+			return -ESRCH;
+		bpf_timer_init(timer, &lowpri_timer, CLOCK_MONOTONIC);
+		bpf_timer_set_callback(timer, lowpri_timerfn);
+		ret = bpf_timer_start(timer, LOWPRI_INTV_NS, 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 void BPF_STRUCT_OPS(qmap_exit, struct scx_exit_info *ei)
-- 
cgit v1.2.3


From 7203d77d6e04f83f7b78838eed099d9cac31700b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 7 Mar 2026 05:29:50 -1000
Subject: sched_ext: Simplify task state handling

Task states (NONE, INIT, READY, ENABLED) were defined in a separate enum with
unshifted values and then shifted when stored in scx_entity.flags. Simplify by
defining them as pre-shifted values directly in scx_ent_flags and removing the
separate scx_task_state enum. This removes the need for shifting when
reading/writing state values.

scx_get_task_state() now returns the masked flags value directly.
scx_set_task_state() accepts the pre-shifted state value. scx_dump_task()
shifts down for display to maintain readable output.

No functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
---
 include/linux/sched/ext.h | 28 ++++++++++++++++------------
 kernel/sched/ext.c        | 19 +++++++++----------
 2 files changed, 25 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index e77504faa0bc..e822b374b17f 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -93,7 +93,7 @@ struct scx_dispatch_q {
 	struct rcu_head		rcu;
 };
 
-/* scx_entity.flags */
+/* sched_ext_entity.flags */
 enum scx_ent_flags {
 	SCX_TASK_QUEUED		= 1 << 0, /* on ext runqueue */
 	SCX_TASK_IN_CUSTODY	= 1 << 1, /* in custody, needs ops.dequeue() when leaving */
@@ -101,21 +101,25 @@ enum scx_ent_flags {
 	SCX_TASK_DEQD_FOR_SLEEP	= 1 << 3, /* last dequeue was for SLEEP */
 	SCX_TASK_SUB_INIT	= 1 << 4, /* task being initialized for a sub sched */
 
-	SCX_TASK_STATE_SHIFT	= 8,	  /* bit 8 and 9 are used to carry scx_task_state */
+	/*
+	 * Bits 8 and 9 are used to carry task state:
+	 *
+	 * NONE		ops.init_task() not called yet
+	 * INIT		ops.init_task() succeeded, but task can be cancelled
+	 * READY	fully initialized, but not in sched_ext
+	 * ENABLED	fully initialized and in sched_ext
+	 */
+	SCX_TASK_STATE_SHIFT	= 8,	  /* bits 8 and 9 are used to carry task state */
 	SCX_TASK_STATE_BITS	= 2,
 	SCX_TASK_STATE_MASK	= ((1 << SCX_TASK_STATE_BITS) - 1) << SCX_TASK_STATE_SHIFT,
 
-	SCX_TASK_CURSOR		= 1 << 31, /* iteration cursor, not a task */
-};
-
-/* scx_entity.flags & SCX_TASK_STATE_MASK */
-enum scx_task_state {
-	SCX_TASK_NONE,		/* ops.init_task() not called yet */
-	SCX_TASK_INIT,		/* ops.init_task() succeeded, but task can be cancelled */
-	SCX_TASK_READY,		/* fully initialized, but not in sched_ext */
-	SCX_TASK_ENABLED,	/* fully initialized and in sched_ext */
+	SCX_TASK_NONE		= 0 << SCX_TASK_STATE_SHIFT,
+	SCX_TASK_INIT		= 1 << SCX_TASK_STATE_SHIFT,
+	SCX_TASK_READY		= 2 << SCX_TASK_STATE_SHIFT,
+	SCX_TASK_ENABLED	= 3 << SCX_TASK_STATE_SHIFT,
 
-	SCX_TASK_NR_STATES,
+	/* iteration cursor, not a task */
+	SCX_TASK_CURSOR		= 1 << 31,
 };
 
 /* scx_entity.dsq_flags */
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index ee756f1a70e1..f55e1603fc8c 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3284,18 +3284,16 @@ static struct cgroup *tg_cgrp(struct task_group *tg)
 
 #endif	/* CONFIG_EXT_GROUP_SCHED */
 
-static enum scx_task_state scx_get_task_state(const struct task_struct *p)
+static u32 scx_get_task_state(const struct task_struct *p)
 {
-	return (p->scx.flags & SCX_TASK_STATE_MASK) >> SCX_TASK_STATE_SHIFT;
+	return p->scx.flags & SCX_TASK_STATE_MASK;
 }
 
-static void scx_set_task_state(struct task_struct *p, enum scx_task_state state)
+static void scx_set_task_state(struct task_struct *p, u32 state)
 {
-	enum scx_task_state prev_state = scx_get_task_state(p);
+	u32 prev_state = scx_get_task_state(p);
 	bool warn = false;
 
-	BUILD_BUG_ON(SCX_TASK_NR_STATES > (1 << SCX_TASK_STATE_BITS));
-
 	switch (state) {
 	case SCX_TASK_NONE:
 		break;
@@ -3313,11 +3311,11 @@ static void scx_set_task_state(struct task_struct *p, enum scx_task_state state)
 		return;
 	}
 
-	WARN_ONCE(warn, "sched_ext: Invalid task state transition %d -> %d for %s[%d]",
+	WARN_ONCE(warn, "sched_ext: Invalid task state transition 0x%x -> 0x%x for %s[%d]",
 		  prev_state, state, p->comm, p->pid);
 
 	p->scx.flags &= ~SCX_TASK_STATE_MASK;
-	p->scx.flags |= state << SCX_TASK_STATE_SHIFT;
+	p->scx.flags |= state;
 }
 
 static int __scx_init_task(struct scx_sched *sch, struct task_struct *p, bool fork)
@@ -5794,7 +5792,8 @@ static void scx_dump_task(struct scx_sched *sch,
 		  own_marker, sch_id_buf,
 		  jiffies_delta_msecs(p->scx.runnable_at, dctx->at_jiffies));
 	dump_line(s, "      scx_state/flags=%u/0x%x dsq_flags=0x%x ops_state/qseq=%lu/%lu",
-		  scx_get_task_state(p), p->scx.flags & ~SCX_TASK_STATE_MASK,
+		  scx_get_task_state(p) >> SCX_TASK_STATE_SHIFT,
+		  p->scx.flags & ~SCX_TASK_STATE_MASK,
 		  p->scx.dsq_flags, ops_state & SCX_OPSS_STATE_MASK,
 		  ops_state >> SCX_OPSS_QSEQ_SHIFT);
 	dump_line(s, "      sticky/holding_cpu=%d/%d dsq_id=%s",
@@ -6558,7 +6557,7 @@ static struct scx_sched *find_parent_sched(struct cgroup *cgrp)
 
 static bool assert_task_ready_or_enabled(struct task_struct *p)
 {
-	enum scx_task_state state = scx_get_task_state(p);
+	u32 state = scx_get_task_state(p);
 
 	switch (state) {
 	case SCX_TASK_READY:
-- 
cgit v1.2.3


From ce897abc21b2d5e74981ff2b848f3a08a580d50a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 7 Mar 2026 05:29:50 -1000
Subject: sched_ext: Add SCX_TASK_REENQ_REASON flags

SCX_ENQ_REENQ indicates that a task is being re-enqueued but doesn't tell the
BPF scheduler why. Add SCX_TASK_REENQ_REASON flags using bits 12-13 of
p->scx.flags to communicate the reason during ops.enqueue():

- NONE: Not being reenqueued
- KFUNC: Reenqueued by scx_bpf_dsq_reenq() and friends

More reasons will be added.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
---
 include/linux/sched/ext.h   | 15 +++++++++++++++
 kernel/sched/ext.c          | 25 ++++++++++++++++++++++---
 kernel/sched/ext_internal.h | 10 +++-------
 3 files changed, 40 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index e822b374b17f..60a4f65d0174 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -118,6 +118,21 @@ enum scx_ent_flags {
 	SCX_TASK_READY		= 2 << SCX_TASK_STATE_SHIFT,
 	SCX_TASK_ENABLED	= 3 << SCX_TASK_STATE_SHIFT,
 
+	/*
+	 * Bits 12 and 13 are used to carry reenqueue reason. In addition to
+	 * %SCX_ENQ_REENQ flag, ops.enqueue() can also test for
+	 * %SCX_TASK_REENQ_REASON_NONE to distinguish reenqueues.
+	 *
+	 * NONE		not being reenqueued
+	 * KFUNC	reenqueued by scx_bpf_dsq_reenq() and friends
+	 */
+	SCX_TASK_REENQ_REASON_SHIFT = 12,
+	SCX_TASK_REENQ_REASON_BITS = 2,
+	SCX_TASK_REENQ_REASON_MASK = ((1 << SCX_TASK_REENQ_REASON_BITS) - 1) << SCX_TASK_REENQ_REASON_SHIFT,
+
+	SCX_TASK_REENQ_NONE	= 0 << SCX_TASK_REENQ_REASON_SHIFT,
+	SCX_TASK_REENQ_KFUNC	= 1 << SCX_TASK_REENQ_REASON_SHIFT,
+
 	/* iteration cursor, not a task */
 	SCX_TASK_CURSOR		= 1 << 31,
 };
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index f55e1603fc8c..d5849ed4cd3e 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3728,8 +3728,10 @@ static void process_ddsp_deferred_locals(struct rq *rq)
 	}
 }
 
-static bool task_should_reenq(struct task_struct *p, u64 reenq_flags)
+static bool task_should_reenq(struct task_struct *p, u64 reenq_flags, u32 *reason)
 {
+	*reason = SCX_TASK_REENQ_KFUNC;
+
 	if (reenq_flags & SCX_REENQ_ANY)
 		return true;
 	return false;
@@ -3751,6 +3753,7 @@ static u32 reenq_local(struct scx_sched *sch, struct rq *rq, u64 reenq_flags)
 	list_for_each_entry_safe(p, n, &rq->scx.local_dsq.list,
 				 scx.dsq_list.node) {
 		struct scx_sched *task_sch = scx_task_sched(p);
+		u32 reason;
 
 		/*
 		 * If @p is being migrated, @p's current CPU may not agree with
@@ -3769,16 +3772,24 @@ static u32 reenq_local(struct scx_sched *sch, struct rq *rq, u64 reenq_flags)
 		if (!scx_is_descendant(task_sch, sch))
 			continue;
 
-		if (!task_should_reenq(p, reenq_flags))
+		if (!task_should_reenq(p, reenq_flags, &reason))
 			continue;
 
 		dispatch_dequeue(rq, p);
+
+		if (WARN_ON_ONCE(p->scx.flags & SCX_TASK_REENQ_REASON_MASK))
+			p->scx.flags &= ~SCX_TASK_REENQ_REASON_MASK;
+		p->scx.flags |= reason;
+
 		list_add_tail(&p->scx.dsq_list.node, &tasks);
 	}
 
 	list_for_each_entry_safe(p, n, &tasks, scx.dsq_list.node) {
 		list_del_init(&p->scx.dsq_list.node);
+
 		do_enqueue_task(rq, p, SCX_ENQ_REENQ, -1);
+
+		p->scx.flags &= ~SCX_TASK_REENQ_REASON_MASK;
 		nr_enqueued++;
 	}
 
@@ -3832,12 +3843,13 @@ static void reenq_user(struct rq *rq, struct scx_dispatch_q *dsq, u64 reenq_flag
 
 	while (likely(!READ_ONCE(sch->bypass_depth))) {
 		struct rq *task_rq;
+		u32 reason;
 
 		p = nldsq_cursor_next_task(&cursor, dsq);
 		if (!p)
 			break;
 
-		if (!task_should_reenq(p, reenq_flags))
+		if (!task_should_reenq(p, reenq_flags, &reason))
 			continue;
 
 		task_rq = task_rq(p);
@@ -3860,8 +3872,15 @@ static void reenq_user(struct rq *rq, struct scx_dispatch_q *dsq, u64 reenq_flag
 		/* @p is on @dsq, its rq and @dsq are locked */
 		dispatch_dequeue_locked(p, dsq);
 		raw_spin_unlock(&dsq->lock);
+
+		if (WARN_ON_ONCE(p->scx.flags & SCX_TASK_REENQ_REASON_MASK))
+			p->scx.flags &= ~SCX_TASK_REENQ_REASON_MASK;
+		p->scx.flags |= reason;
+
 		do_enqueue_task(task_rq, p, SCX_ENQ_REENQ, -1);
 
+		p->scx.flags &= ~SCX_TASK_REENQ_REASON_MASK;
+
 		if (!(++nr_enqueued % SCX_TASK_ITER_BATCH)) {
 			raw_spin_rq_unlock(locked_rq);
 			locked_rq = NULL;
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index d9eda2e8701c..f8df73044515 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -1080,13 +1080,9 @@ enum scx_enq_flags {
 	SCX_ENQ_PREEMPT		= 1LLU << 32,
 
 	/*
-	 * The task being enqueued was previously enqueued on the current CPU's
-	 * %SCX_DSQ_LOCAL, but was removed from it in a call to the
-	 * scx_bpf_reenqueue_local() kfunc. If scx_bpf_reenqueue_local() was
-	 * invoked in a ->cpu_release() callback, and the task is again
-	 * dispatched back to %SCX_LOCAL_DSQ by this current ->enqueue(), the
-	 * task will not be scheduled on the CPU until at least the next invocation
-	 * of the ->cpu_acquire() callback.
+	 * The task being enqueued was previously enqueued on a DSQ, but was
+	 * removed and is being re-enqueued. See SCX_TASK_REENQ_* flags to find
+	 * out why a given task is being reenqueued.
 	 */
 	SCX_ENQ_REENQ		= 1LLU << 40,
 
-- 
cgit v1.2.3


From 1ea4b473504b6dc6a0d21c298519aff2d52433c9 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 5 Mar 2026 19:55:41 +0000
Subject: locking/rwsem: Remove the list_head from struct rw_semaphore

Instead of embedding a list_head in struct rw_semaphore, store a pointer
to the first waiter.  The list of waiters remains a doubly linked list
so we can efficiently add to the tail of the list, remove from the front
(or middle) of the list.

Some of the list manipulation becomes more complicated, but it's a
reasonable tradeoff on the slow paths to shrink some core data structures
like struct inode.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260305195545.3707590-2-willy@infradead.org
---
 include/linux/rwsem.h  |  8 ++---
 kernel/locking/rwsem.c | 90 ++++++++++++++++++++++++++++++++------------------
 2 files changed, 62 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 9bf1d93d3d7b..e7829531c4ba 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -57,7 +57,7 @@ context_lock_struct(rw_semaphore) {
 	struct optimistic_spin_queue osq; /* spinner MCS lock */
 #endif
 	raw_spinlock_t wait_lock;
-	struct list_head wait_list;
+	struct rwsem_waiter *first_waiter;
 #ifdef CONFIG_DEBUG_RWSEMS
 	void *magic;
 #endif
@@ -106,7 +106,7 @@ static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *
 	  .owner = ATOMIC_LONG_INIT(0),				\
 	  __RWSEM_OPT_INIT(name)				\
 	  .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\
-	  .wait_list = LIST_HEAD_INIT((name).wait_list),	\
+	  .first_waiter = NULL,					\
 	  __RWSEM_DEBUG_INIT(name)				\
 	  __RWSEM_DEP_MAP_INIT(name) }
 
@@ -129,9 +129,9 @@ do {								\
  * rwsem to see if somebody from an incompatible type is wanting access to the
  * lock.
  */
-static inline int rwsem_is_contended(struct rw_semaphore *sem)
+static inline bool rwsem_is_contended(struct rw_semaphore *sem)
 {
-	return !list_empty(&sem->wait_list);
+	return sem->first_waiter != NULL;
 }
 
 #if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 24df4d98f7d2..e66f37ebc6f6 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -72,7 +72,7 @@
 		#c, atomic_long_read(&(sem)->count),		\
 		(unsigned long) sem->magic,			\
 		atomic_long_read(&(sem)->owner), (long)current,	\
-		list_empty(&(sem)->wait_list) ? "" : "not "))	\
+		(sem)->first_waiter ? "" : "not "))		\
 			debug_locks_off();			\
 	} while (0)
 #else
@@ -321,7 +321,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 #endif
 	atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
 	raw_spin_lock_init(&sem->wait_lock);
-	INIT_LIST_HEAD(&sem->wait_list);
+	sem->first_waiter = NULL;
 	atomic_long_set(&sem->owner, 0L);
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 	osq_lock_init(&sem->osq);
@@ -341,8 +341,6 @@ struct rwsem_waiter {
 	unsigned long timeout;
 	bool handoff_set;
 };
-#define rwsem_first_waiter(sem) \
-	list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
 
 enum rwsem_wake_type {
 	RWSEM_WAKE_ANY,		/* Wake whatever's at head of wait list */
@@ -365,12 +363,21 @@ enum rwsem_wake_type {
  */
 #define MAX_READERS_WAKEUP	0x100
 
-static inline void
-rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
+static inline
+bool __rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
 {
-	lockdep_assert_held(&sem->wait_lock);
-	list_add_tail(&waiter->list, &sem->wait_list);
-	/* caller will set RWSEM_FLAG_WAITERS */
+	if (list_empty(&waiter->list)) {
+		sem->first_waiter = NULL;
+		return true;
+	}
+
+	if (sem->first_waiter == waiter) {
+		sem->first_waiter = list_first_entry(&waiter->list,
+						     struct rwsem_waiter, list);
+	}
+	list_del(&waiter->list);
+
+	return false;
 }
 
 /*
@@ -385,14 +392,23 @@ static inline bool
 rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
 {
 	lockdep_assert_held(&sem->wait_lock);
-	list_del(&waiter->list);
-	if (likely(!list_empty(&sem->wait_list)))
+	if (__rwsem_del_waiter(sem, waiter))
 		return true;
-
 	atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count);
 	return false;
 }
 
+static inline
+struct rwsem_waiter *next_waiter(const struct rw_semaphore *sem,
+				 const struct rwsem_waiter *waiter)
+{
+	struct rwsem_waiter *next = list_first_entry(&waiter->list,
+						     struct rwsem_waiter, list);
+	if (next == sem->first_waiter)
+		return NULL;
+	return next;
+}
+
 /*
  * handle the lock release when processes blocked on it that can now run
  * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
@@ -411,7 +427,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
 			    enum rwsem_wake_type wake_type,
 			    struct wake_q_head *wake_q)
 {
-	struct rwsem_waiter *waiter, *tmp;
+	struct rwsem_waiter *waiter, *next;
 	long oldcount, woken = 0, adjustment = 0;
 	struct list_head wlist;
 
@@ -421,7 +437,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
 	 * Take a peek at the queue head waiter such that we can determine
 	 * the wakeup(s) to perform.
 	 */
-	waiter = rwsem_first_waiter(sem);
+	waiter = sem->first_waiter;
 
 	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
 		if (wake_type == RWSEM_WAKE_ANY) {
@@ -506,25 +522,28 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
 	 *    put them into wake_q to be woken up later.
 	 */
 	INIT_LIST_HEAD(&wlist);
-	list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
+	do {
+		next = next_waiter(sem, waiter);
 		if (waiter->type == RWSEM_WAITING_FOR_WRITE)
 			continue;
 
 		woken++;
 		list_move_tail(&waiter->list, &wlist);
+		if (sem->first_waiter == waiter)
+			sem->first_waiter = next;
 
 		/*
 		 * Limit # of readers that can be woken up per wakeup call.
 		 */
 		if (unlikely(woken >= MAX_READERS_WAKEUP))
 			break;
-	}
+	} while ((waiter = next) != NULL);
 
 	adjustment = woken * RWSEM_READER_BIAS - adjustment;
 	lockevent_cond_inc(rwsem_wake_reader, woken);
 
 	oldcount = atomic_long_read(&sem->count);
-	if (list_empty(&sem->wait_list)) {
+	if (!sem->first_waiter) {
 		/*
 		 * Combined with list_move_tail() above, this implies
 		 * rwsem_del_waiter().
@@ -545,7 +564,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
 		atomic_long_add(adjustment, &sem->count);
 
 	/* 2nd pass */
-	list_for_each_entry_safe(waiter, tmp, &wlist, list) {
+	list_for_each_entry_safe(waiter, next, &wlist, list) {
 		struct task_struct *tsk;
 
 		tsk = waiter->task;
@@ -577,7 +596,7 @@ rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
 		      struct wake_q_head *wake_q)
 		      __releases(&sem->wait_lock)
 {
-	bool first = rwsem_first_waiter(sem) == waiter;
+	bool first = sem->first_waiter == waiter;
 
 	wake_q_init(wake_q);
 
@@ -603,7 +622,7 @@ rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
 static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
 					struct rwsem_waiter *waiter)
 {
-	struct rwsem_waiter *first = rwsem_first_waiter(sem);
+	struct rwsem_waiter *first = sem->first_waiter;
 	long count, new;
 
 	lockdep_assert_held(&sem->wait_lock);
@@ -639,7 +658,7 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
 			new |= RWSEM_WRITER_LOCKED;
 			new &= ~RWSEM_FLAG_HANDOFF;
 
-			if (list_is_singular(&sem->wait_list))
+			if (list_empty(&first->list))
 				new &= ~RWSEM_FLAG_WAITERS;
 		}
 	} while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
@@ -659,7 +678,8 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
 	 * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on
 	 * success.
 	 */
-	list_del(&waiter->list);
+	__rwsem_del_waiter(sem, waiter);
+
 	rwsem_set_owner(sem);
 	return true;
 }
@@ -994,7 +1014,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
 {
 	long adjustment = -RWSEM_READER_BIAS;
 	long rcnt = (count >> RWSEM_READER_SHIFT);
-	struct rwsem_waiter waiter;
+	struct rwsem_waiter waiter, *first;
 	DEFINE_WAKE_Q(wake_q);
 
 	/*
@@ -1019,7 +1039,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
 		 */
 		if ((rcnt == 1) && (count & RWSEM_FLAG_WAITERS)) {
 			raw_spin_lock_irq(&sem->wait_lock);
-			if (!list_empty(&sem->wait_list))
+			if (sem->first_waiter)
 				rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED,
 						&wake_q);
 			raw_spin_unlock_irq(&sem->wait_lock);
@@ -1035,7 +1055,8 @@ queue:
 	waiter.handoff_set = false;
 
 	raw_spin_lock_irq(&sem->wait_lock);
-	if (list_empty(&sem->wait_list)) {
+	first = sem->first_waiter;
+	if (!first) {
 		/*
 		 * In case the wait queue is empty and the lock isn't owned
 		 * by a writer, this reader can exit the slowpath and return
@@ -1051,8 +1072,11 @@ queue:
 			return sem;
 		}
 		adjustment += RWSEM_FLAG_WAITERS;
+		INIT_LIST_HEAD(&waiter.list);
+		sem->first_waiter = &waiter;
+	} else {
+		list_add_tail(&waiter.list, &first->list);
 	}
-	rwsem_add_waiter(sem, &waiter);
 
 	/* we're now waiting on the lock, but no longer actively locking */
 	count = atomic_long_add_return(adjustment, &sem->count);
@@ -1110,7 +1134,7 @@ out_nolock:
 static struct rw_semaphore __sched *
 rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
 {
-	struct rwsem_waiter waiter;
+	struct rwsem_waiter waiter, *first;
 	DEFINE_WAKE_Q(wake_q);
 
 	/* do optimistic spinning and steal lock if possible */
@@ -1129,10 +1153,10 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
 	waiter.handoff_set = false;
 
 	raw_spin_lock_irq(&sem->wait_lock);
-	rwsem_add_waiter(sem, &waiter);
 
-	/* we're now waiting on the lock */
-	if (rwsem_first_waiter(sem) != &waiter) {
+	first = sem->first_waiter;
+	if (first) {
+		list_add_tail(&waiter.list, &first->list);
 		rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count),
 				       &wake_q);
 		if (!wake_q_empty(&wake_q)) {
@@ -1145,6 +1169,8 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
 			raw_spin_lock_irq(&sem->wait_lock);
 		}
 	} else {
+		INIT_LIST_HEAD(&waiter.list);
+		sem->first_waiter = &waiter;
 		atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
 	}
 
@@ -1218,7 +1244,7 @@ static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
-	if (!list_empty(&sem->wait_list))
+	if (sem->first_waiter)
 		rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -1239,7 +1265,7 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
-	if (!list_empty(&sem->wait_list))
+	if (sem->first_waiter)
 		rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-- 
cgit v1.2.3


From b9bdd4b6840454ef87f61b6506c9635c57a81650 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 5 Mar 2026 19:55:42 +0000
Subject: locking/semaphore: Remove the list_head from struct semaphore

Instead of embedding a list_head in struct semaphore, store a pointer to
the first waiter.  The list of waiters remains a doubly linked list so
we can efficiently add to the tail of the list and remove from the front
(or middle) of the list.

Some of the list manipulation becomes more complicated, but it's a
reasonable tradeoff on the slow paths to shrink data structures
which embed a semaphore.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260305195545.3707590-3-willy@infradead.org
---
 drivers/acpi/osl.c         |  2 +-
 include/linux/semaphore.h  |  4 ++--
 kernel/locking/semaphore.c | 41 +++++++++++++++++++++++++++++++----------
 3 files changed, 34 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 5b777316b9ac..2af0db9210fe 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1257,7 +1257,7 @@ acpi_status acpi_os_delete_semaphore(acpi_handle handle)
 
 	ACPI_DEBUG_PRINT((ACPI_DB_MUTEX, "Deleting semaphore[%p].\n", handle));
 
-	BUG_ON(!list_empty(&sem->wait_list));
+	BUG_ON(sem->first_waiter);
 	kfree(sem);
 	sem = NULL;
 
diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index 89706157e622..a4c8651ef021 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -15,7 +15,7 @@
 struct semaphore {
 	raw_spinlock_t		lock;
 	unsigned int		count;
-	struct list_head	wait_list;
+	struct semaphore_waiter *first_waiter;
 
 #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
 	unsigned long		last_holder;
@@ -33,7 +33,7 @@ struct semaphore {
 {									\
 	.lock		= __RAW_SPIN_LOCK_UNLOCKED((name).lock),	\
 	.count		= n,						\
-	.wait_list	= LIST_HEAD_INIT((name).wait_list)		\
+	.first_waiter	= NULL						\
 	__LAST_HOLDER_SEMAPHORE_INITIALIZER				\
 }
 
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index 3ef032e22f7e..74d41433ba13 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -21,7 +21,7 @@
  * too.
  *
  * The ->count variable represents how many more tasks can acquire this
- * semaphore.  If it's zero, there may be tasks waiting on the wait_list.
+ * semaphore.  If it's zero, there may be waiters.
  */
 
 #include <linux/compiler.h>
@@ -226,7 +226,7 @@ void __sched up(struct semaphore *sem)
 
 	hung_task_sem_clear_if_holder(sem);
 
-	if (likely(list_empty(&sem->wait_list)))
+	if (likely(!sem->first_waiter))
 		sem->count++;
 	else
 		__up(sem, &wake_q);
@@ -244,6 +244,21 @@ struct semaphore_waiter {
 	bool up;
 };
 
+static inline
+void sem_del_waiter(struct semaphore *sem, struct semaphore_waiter *waiter)
+{
+	if (list_empty(&waiter->list)) {
+		sem->first_waiter = NULL;
+		return;
+	}
+
+	if (sem->first_waiter == waiter) {
+		sem->first_waiter = list_first_entry(&waiter->list,
+						     struct semaphore_waiter, list);
+	}
+	list_del(&waiter->list);
+}
+
 /*
  * Because this function is inlined, the 'state' parameter will be
  * constant, and thus optimised away by the compiler.  Likewise the
@@ -252,9 +267,15 @@ struct semaphore_waiter {
 static inline int __sched ___down_common(struct semaphore *sem, long state,
 								long timeout)
 {
-	struct semaphore_waiter waiter;
-
-	list_add_tail(&waiter.list, &sem->wait_list);
+	struct semaphore_waiter waiter, *first;
+
+	first = sem->first_waiter;
+	if (first) {
+		list_add_tail(&waiter.list, &first->list);
+	} else {
+		INIT_LIST_HEAD(&waiter.list);
+		sem->first_waiter = &waiter;
+	}
 	waiter.task = current;
 	waiter.up = false;
 
@@ -274,11 +295,11 @@ static inline int __sched ___down_common(struct semaphore *sem, long state,
 	}
 
  timed_out:
-	list_del(&waiter.list);
+	sem_del_waiter(sem, &waiter);
 	return -ETIME;
 
  interrupted:
-	list_del(&waiter.list);
+	sem_del_waiter(sem, &waiter);
 	return -EINTR;
 }
 
@@ -321,9 +342,9 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long timeout)
 static noinline void __sched __up(struct semaphore *sem,
 				  struct wake_q_head *wake_q)
 {
-	struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
-						struct semaphore_waiter, list);
-	list_del(&waiter->list);
+	struct semaphore_waiter *waiter = sem->first_waiter;
+
+	sem_del_waiter(sem, waiter);
 	waiter->up = true;
 	wake_q_add(wake_q, waiter->task);
 }
-- 
cgit v1.2.3


From 25500ba7e77ce9d3d9b5a1929d41a2ee2e23f6fe Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 5 Mar 2026 19:55:43 +0000
Subject: locking/mutex: Remove the list_head from struct mutex

Instead of embedding a list_head in struct mutex, store a pointer to
the first waiter.  The list of waiters remains a doubly linked list so
we can efficiently add to the tail of the list, remove from the front
(or middle) of the list.

Some of the list manipulation becomes more complicated, but it's a
reasonable tradeoff on the slow paths to shrink data structures which
embed a mutex like struct file.

Some of the debug checks have to be deleted because there's no equivalent
to checking them in the new scheme (eg an empty waiter->list now means
that it is the only waiter, not that the waiter is no longer on the list).

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260305195545.3707590-4-willy@infradead.org
---
 include/linux/mutex.h        |  2 +-
 include/linux/mutex_types.h  |  2 +-
 kernel/locking/mutex-debug.c |  5 +----
 kernel/locking/mutex.c       | 49 ++++++++++++++++++++++++--------------------
 kernel/locking/ww_mutex.h    | 25 +++++++---------------
 5 files changed, 37 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 2f648ee204e7..c471b129f703 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -79,7 +79,7 @@ do {									\
 #define __MUTEX_INITIALIZER(lockname) \
 		{ .owner = ATOMIC_LONG_INIT(0) \
 		, .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
-		, .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
+		, .first_waiter = NULL \
 		__DEBUG_MUTEX_INITIALIZER(lockname) \
 		__DEP_MAP_MUTEX_INITIALIZER(lockname) }
 
diff --git a/include/linux/mutex_types.h b/include/linux/mutex_types.h
index 80975935ec48..a8f119f81177 100644
--- a/include/linux/mutex_types.h
+++ b/include/linux/mutex_types.h
@@ -44,7 +44,7 @@ context_lock_struct(mutex) {
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	struct optimistic_spin_queue osq; /* Spinner MCS lock */
 #endif
-	struct list_head	wait_list;
+	struct mutex_waiter	*first_waiter;
 #ifdef CONFIG_DEBUG_MUTEXES
 	void			*magic;
 #endif
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 2c6b02d4699b..94930d506bcf 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -37,9 +37,8 @@ void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
 void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
 {
 	lockdep_assert_held(&lock->wait_lock);
-	DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
+	DEBUG_LOCKS_WARN_ON(!lock->first_waiter);
 	DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
-	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
 }
 
 void debug_mutex_free_waiter(struct mutex_waiter *waiter)
@@ -62,7 +61,6 @@ void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 {
 	struct mutex *blocked_on = __get_task_blocked_on(task);
 
-	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
 	DEBUG_LOCKS_WARN_ON(waiter->task != task);
 	DEBUG_LOCKS_WARN_ON(blocked_on && blocked_on != lock);
 
@@ -74,7 +72,6 @@ void debug_mutex_unlock(struct mutex *lock)
 {
 	if (likely(debug_locks)) {
 		DEBUG_LOCKS_WARN_ON(lock->magic != lock);
-		DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
 	}
 }
 
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index c867f6c15530..95f1822122a1 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -47,7 +47,7 @@ static void __mutex_init_generic(struct mutex *lock)
 {
 	atomic_long_set(&lock->owner, 0);
 	raw_spin_lock_init(&lock->wait_lock);
-	INIT_LIST_HEAD(&lock->wait_list);
+	lock->first_waiter = NULL;
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	osq_lock_init(&lock->osq);
 #endif
@@ -194,33 +194,42 @@ static inline void __mutex_clear_flag(struct mutex *lock, unsigned long flag)
 	atomic_long_andnot(flag, &lock->owner);
 }
 
-static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_waiter *waiter)
-{
-	return list_first_entry(&lock->wait_list, struct mutex_waiter, list) == waiter;
-}
-
 /*
  * Add @waiter to a given location in the lock wait_list and set the
  * FLAG_WAITERS flag if it's the first waiter.
  */
 static void
 __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
-		   struct list_head *list)
+		   struct mutex_waiter *first)
 {
 	hung_task_set_blocker(lock, BLOCKER_TYPE_MUTEX);
 	debug_mutex_add_waiter(lock, waiter, current);
 
-	list_add_tail(&waiter->list, list);
-	if (__mutex_waiter_is_first(lock, waiter))
+	if (!first)
+		first = lock->first_waiter;
+
+	if (first) {
+		list_add_tail(&waiter->list, &first->list);
+	} else {
+		INIT_LIST_HEAD(&waiter->list);
+		lock->first_waiter = waiter;
 		__mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
+	}
 }
 
 static void
 __mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter)
 {
-	list_del(&waiter->list);
-	if (likely(list_empty(&lock->wait_list)))
+	if (list_empty(&waiter->list)) {
 		__mutex_clear_flag(lock, MUTEX_FLAGS);
+		lock->first_waiter = NULL;
+	} else {
+		if (lock->first_waiter == waiter) {
+			lock->first_waiter = list_first_entry(&waiter->list,
+							      struct mutex_waiter, list);
+		}
+		list_del(&waiter->list);
+	}
 
 	debug_mutex_remove_waiter(lock, waiter, current);
 	hung_task_clear_blocker();
@@ -340,7 +349,7 @@ bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
 	 * Similarly, stop spinning if we are no longer the
 	 * first waiter.
 	 */
-	if (waiter && !__mutex_waiter_is_first(lock, waiter))
+	if (waiter && lock->first_waiter != waiter)
 		return false;
 
 	return true;
@@ -645,7 +654,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
 
 	if (!use_ww_ctx) {
 		/* add waiting tasks to the end of the waitqueue (FIFO): */
-		__mutex_add_waiter(lock, &waiter, &lock->wait_list);
+		__mutex_add_waiter(lock, &waiter, NULL);
 	} else {
 		/*
 		 * Add in stamp order, waking up waiters that must kill
@@ -691,7 +700,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
 
 		schedule_preempt_disabled();
 
-		first = __mutex_waiter_is_first(lock, &waiter);
+		first = lock->first_waiter == &waiter;
 
 		/*
 		 * As we likely have been woken up by task
@@ -734,8 +743,7 @@ acquired:
 		 * Wound-Wait; we stole the lock (!first_waiter), check the
 		 * waiters as anyone might want to wound us.
 		 */
-		if (!ww_ctx->is_wait_die &&
-		    !__mutex_waiter_is_first(lock, &waiter))
+		if (!ww_ctx->is_wait_die && lock->first_waiter != &waiter)
 			__ww_mutex_check_waiters(lock, ww_ctx, &wake_q);
 	}
 
@@ -931,6 +939,7 @@ EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
 static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip)
 {
 	struct task_struct *next = NULL;
+	struct mutex_waiter *waiter;
 	DEFINE_WAKE_Q(wake_q);
 	unsigned long owner;
 	unsigned long flags;
@@ -962,12 +971,8 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
 
 	raw_spin_lock_irqsave(&lock->wait_lock, flags);
 	debug_mutex_unlock(lock);
-	if (!list_empty(&lock->wait_list)) {
-		/* get the first entry from the wait-list: */
-		struct mutex_waiter *waiter =
-			list_first_entry(&lock->wait_list,
-					 struct mutex_waiter, list);
-
+	waiter = lock->first_waiter;
+	if (waiter) {
 		next = waiter->task;
 
 		debug_mutex_wake_waiter(lock, waiter);
diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
index 31a785afee6c..a0847e91ae04 100644
--- a/kernel/locking/ww_mutex.h
+++ b/kernel/locking/ww_mutex.h
@@ -8,20 +8,14 @@
 static inline struct mutex_waiter *
 __ww_waiter_first(struct mutex *lock)
 {
-	struct mutex_waiter *w;
-
-	w = list_first_entry(&lock->wait_list, struct mutex_waiter, list);
-	if (list_entry_is_head(w, &lock->wait_list, list))
-		return NULL;
-
-	return w;
+	return lock->first_waiter;
 }
 
 static inline struct mutex_waiter *
 __ww_waiter_next(struct mutex *lock, struct mutex_waiter *w)
 {
 	w = list_next_entry(w, list);
-	if (list_entry_is_head(w, &lock->wait_list, list))
+	if (lock->first_waiter == w)
 		return NULL;
 
 	return w;
@@ -31,7 +25,7 @@ static inline struct mutex_waiter *
 __ww_waiter_prev(struct mutex *lock, struct mutex_waiter *w)
 {
 	w = list_prev_entry(w, list);
-	if (list_entry_is_head(w, &lock->wait_list, list))
+	if (lock->first_waiter == w)
 		return NULL;
 
 	return w;
@@ -40,22 +34,17 @@ __ww_waiter_prev(struct mutex *lock, struct mutex_waiter *w)
 static inline struct mutex_waiter *
 __ww_waiter_last(struct mutex *lock)
 {
-	struct mutex_waiter *w;
-
-	w = list_last_entry(&lock->wait_list, struct mutex_waiter, list);
-	if (list_entry_is_head(w, &lock->wait_list, list))
-		return NULL;
+	struct mutex_waiter *w = lock->first_waiter;
 
+	if (w)
+		w = list_prev_entry(w, list);
 	return w;
 }
 
 static inline void
 __ww_waiter_add(struct mutex *lock, struct mutex_waiter *waiter, struct mutex_waiter *pos)
 {
-	struct list_head *p = &lock->wait_list;
-	if (pos)
-		p = &pos->list;
-	__mutex_add_waiter(lock, waiter, p);
+	__mutex_add_waiter(lock, waiter, pos);
 }
 
 static inline struct task_struct *
-- 
cgit v1.2.3


From 07574b8ebaac7927e2355b4f343b03b50e04494c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 20 Jan 2026 13:40:30 +0100
Subject: compiler-context-analysys: Add __cond_releases()

Useful for things like unlock fastpaths, which on success release the
lock.

Suggested-by: Marco Elver <elver@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Marco Elver <elver@google.com>
Link: https://patch.msgid.link/20260121111213.634625032@infradead.org
---
 include/linux/compiler-context-analysis.h | 32 +++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-context-analysis.h b/include/linux/compiler-context-analysis.h
index 00c074a2ccb0..a9317571e6af 100644
--- a/include/linux/compiler-context-analysis.h
+++ b/include/linux/compiler-context-analysis.h
@@ -320,6 +320,38 @@ static inline void _context_unsafe_alias(void **p) { }
  */
 #define __releases(...)		__releases_ctx_lock(__VA_ARGS__)
 
+/*
+ * Clang's analysis does not care precisely about the value, only that it is
+ * either zero or non-zero. So the __cond_acquires() interface might be
+ * misleading if we say that @ret is the value returned if acquired. Instead,
+ * provide symbolic variants which we translate.
+ */
+#define __cond_acquires_impl_not_true(x, ...)     __try_acquires##__VA_ARGS__##_ctx_lock(0, x)
+#define __cond_acquires_impl_not_false(x, ...)    __try_acquires##__VA_ARGS__##_ctx_lock(1, x)
+#define __cond_acquires_impl_not_nonzero(x, ...)  __try_acquires##__VA_ARGS__##_ctx_lock(0, x)
+#define __cond_acquires_impl_not_0(x, ...)        __try_acquires##__VA_ARGS__##_ctx_lock(1, x)
+#define __cond_acquires_impl_not_nonnull(x, ...)  __try_acquires##__VA_ARGS__##_ctx_lock(0, x)
+#define __cond_acquires_impl_not_NULL(x, ...)     __try_acquires##__VA_ARGS__##_ctx_lock(1, x)
+
+/**
+ * __cond_releases() - function attribute, function conditionally
+ *                     releases a context lock exclusively
+ * @ret: abstract value returned by function if context lock releases
+ * @x: context lock instance pointer
+ *
+ * Function attribute declaring that the function conditionally releases the
+ * given context lock instance @x exclusively. The associated context(s) must
+ * be active on entry. The function return value @ret denotes when the context
+ * lock is released.
+ *
+ * @ret may be one of: true, false, nonzero, 0, nonnull, NULL.
+ *
+ * NOTE: clang does not have a native attribute for this; instead implement
+ *       it as an unconditional release and a conditional acquire for the
+ *       inverted condition -- which is semantically equivalent.
+ */
+#define __cond_releases(ret, x) __releases(x) __cond_acquires_impl_not_##ret(x)
+
 /**
  * __acquire() - function to acquire context lock exclusively
  * @x: context lock instance pointer
-- 
cgit v1.2.3


From 5c4326231cde36fd5e90c41e403df9fac6238f4b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 20 Jan 2026 10:06:08 +0100
Subject: locking/mutex: Add context analysis

Add compiler context analysis annotations.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260121111213.745353747@infradead.org
---
 include/linux/mutex.h       |  2 +-
 include/linux/mutex_types.h |  2 +-
 kernel/locking/Makefile     |  2 ++
 kernel/locking/mutex.c      | 33 ++++++++++++++++++++++++++++-----
 kernel/locking/mutex.h      |  1 +
 kernel/locking/ww_mutex.h   | 12 ++++++++++++
 6 files changed, 45 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index c471b129f703..734048c02f4f 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -183,7 +183,7 @@ static inline int __must_check __devm_mutex_init(struct device *dev, struct mute
  */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass) __acquires(lock);
-extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
+extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock) __acquires(lock);
 extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock,
 					unsigned int subclass) __cond_acquires(0, lock);
 extern int __must_check _mutex_lock_killable(struct mutex *lock,
diff --git a/include/linux/mutex_types.h b/include/linux/mutex_types.h
index a8f119f81177..24ed599fdda8 100644
--- a/include/linux/mutex_types.h
+++ b/include/linux/mutex_types.h
@@ -44,7 +44,7 @@ context_lock_struct(mutex) {
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	struct optimistic_spin_queue osq; /* Spinner MCS lock */
 #endif
-	struct mutex_waiter	*first_waiter;
+	struct mutex_waiter	*first_waiter __guarded_by(&wait_lock);
 #ifdef CONFIG_DEBUG_MUTEXES
 	void			*magic;
 #endif
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index a114949eeed5..264447d606a6 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -3,6 +3,8 @@
 # and is generally not a function of system call inputs.
 KCOV_INSTRUMENT		:= n
 
+CONTEXT_ANALYSIS_mutex.o := y
+
 obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
 
 # Avoid recursion lockdep -> sanitizer -> ... -> lockdep & improve performance.
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 95f1822122a1..427187ff02db 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -46,8 +46,9 @@
 static void __mutex_init_generic(struct mutex *lock)
 {
 	atomic_long_set(&lock->owner, 0);
-	raw_spin_lock_init(&lock->wait_lock);
-	lock->first_waiter = NULL;
+	scoped_guard (raw_spinlock_init, &lock->wait_lock) {
+		lock->first_waiter = NULL;
+	}
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	osq_lock_init(&lock->osq);
 #endif
@@ -150,6 +151,7 @@ EXPORT_SYMBOL(mutex_init_generic);
  * follow with a __mutex_trylock() before failing.
  */
 static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
+	__cond_acquires(true, lock)
 {
 	unsigned long curr = (unsigned long)current;
 	unsigned long zero = 0UL;
@@ -163,6 +165,7 @@ static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
 }
 
 static __always_inline bool __mutex_unlock_fast(struct mutex *lock)
+	__cond_releases(true, lock)
 {
 	unsigned long curr = (unsigned long)current;
 
@@ -201,6 +204,7 @@ static inline void __mutex_clear_flag(struct mutex *lock, unsigned long flag)
 static void
 __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 		   struct mutex_waiter *first)
+	__must_hold(&lock->wait_lock)
 {
 	hung_task_set_blocker(lock, BLOCKER_TYPE_MUTEX);
 	debug_mutex_add_waiter(lock, waiter, current);
@@ -219,6 +223,7 @@ __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 
 static void
 __mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter)
+	__must_hold(&lock->wait_lock)
 {
 	if (list_empty(&waiter->list)) {
 		__mutex_clear_flag(lock, MUTEX_FLAGS);
@@ -268,7 +273,8 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
  * We also put the fastpath first in the kernel image, to make sure the
  * branch is predicted by the CPU as default-untaken.
  */
-static void __sched __mutex_lock_slowpath(struct mutex *lock);
+static void __sched __mutex_lock_slowpath(struct mutex *lock)
+	__acquires(lock);
 
 /**
  * mutex_lock - acquire the mutex
@@ -349,7 +355,7 @@ bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
 	 * Similarly, stop spinning if we are no longer the
 	 * first waiter.
 	 */
-	if (waiter && lock->first_waiter != waiter)
+	if (waiter && data_race(lock->first_waiter != waiter))
 		return false;
 
 	return true;
@@ -534,7 +540,8 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
 }
 #endif
 
-static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip);
+static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip)
+	__releases(lock);
 
 /**
  * mutex_unlock - release the mutex
@@ -574,6 +581,7 @@ EXPORT_SYMBOL(mutex_unlock);
  * of a unlocked mutex is not allowed.
  */
 void __sched ww_mutex_unlock(struct ww_mutex *lock)
+	__no_context_analysis
 {
 	__ww_mutex_unlock(lock);
 	mutex_unlock(&lock->base);
@@ -587,6 +595,7 @@ static __always_inline int __sched
 __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclass,
 		    struct lockdep_map *nest_lock, unsigned long ip,
 		    struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
+	__cond_acquires(0, lock)
 {
 	DEFINE_WAKE_Q(wake_q);
 	struct mutex_waiter waiter;
@@ -780,6 +789,7 @@ err_early_kill:
 static int __sched
 __mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass,
 	     struct lockdep_map *nest_lock, unsigned long ip)
+	__cond_acquires(0, lock)
 {
 	return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false);
 }
@@ -787,6 +797,7 @@ __mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass,
 static int __sched
 __ww_mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass,
 		unsigned long ip, struct ww_acquire_ctx *ww_ctx)
+	__cond_acquires(0, lock)
 {
 	return __mutex_lock_common(lock, state, subclass, NULL, ip, ww_ctx, true);
 }
@@ -834,6 +845,7 @@ void __sched
 mutex_lock_nested(struct mutex *lock, unsigned int subclass)
 {
 	__mutex_lock(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
+	__acquire(lock);
 }
 
 EXPORT_SYMBOL_GPL(mutex_lock_nested);
@@ -842,6 +854,7 @@ void __sched
 _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
 {
 	__mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_);
+	__acquire(lock);
 }
 EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
 
@@ -870,12 +883,14 @@ mutex_lock_io_nested(struct mutex *lock, unsigned int subclass)
 	token = io_schedule_prepare();
 	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
 			    subclass, NULL, _RET_IP_, NULL, 0);
+	__acquire(lock);
 	io_schedule_finish(token);
 }
 EXPORT_SYMBOL_GPL(mutex_lock_io_nested);
 
 static inline int
 ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+	__cond_releases(nonzero, lock)
 {
 #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
 	unsigned tmp;
@@ -937,6 +952,7 @@ EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
  * Release the lock, slowpath:
  */
 static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip)
+	__releases(lock)
 {
 	struct task_struct *next = NULL;
 	struct mutex_waiter *waiter;
@@ -945,6 +961,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
 	unsigned long flags;
 
 	mutex_release(&lock->dep_map, ip);
+	__release(lock);
 
 	/*
 	 * Release the lock before (potentially) taking the spinlock such that
@@ -1066,24 +1083,29 @@ EXPORT_SYMBOL_GPL(mutex_lock_io);
 
 static noinline void __sched
 __mutex_lock_slowpath(struct mutex *lock)
+	__acquires(lock)
 {
 	__mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
+	__acquire(lock);
 }
 
 static noinline int __sched
 __mutex_lock_killable_slowpath(struct mutex *lock)
+	__cond_acquires(0, lock)
 {
 	return __mutex_lock(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);
 }
 
 static noinline int __sched
 __mutex_lock_interruptible_slowpath(struct mutex *lock)
+	__cond_acquires(0, lock)
 {
 	return __mutex_lock(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);
 }
 
 static noinline int __sched
 __ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+	__cond_acquires(0, lock)
 {
 	return __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, 0,
 			       _RET_IP_, ctx);
@@ -1092,6 +1114,7 @@ __ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 static noinline int __sched
 __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
 					    struct ww_acquire_ctx *ctx)
+	__cond_acquires(0, lock)
 {
 	return __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, 0,
 			       _RET_IP_, ctx);
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 9ad4da8cea00..b94ef40c1f48 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -7,6 +7,7 @@
  *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
 #ifndef CONFIG_PREEMPT_RT
+#include <linux/mutex.h>
 /*
  * This is the control structure for tasks blocked on mutex, which resides
  * on the blocked task's kernel stack:
diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
index a0847e91ae04..c50ea5dd3c44 100644
--- a/kernel/locking/ww_mutex.h
+++ b/kernel/locking/ww_mutex.h
@@ -7,12 +7,14 @@
 
 static inline struct mutex_waiter *
 __ww_waiter_first(struct mutex *lock)
+	__must_hold(&lock->wait_lock)
 {
 	return lock->first_waiter;
 }
 
 static inline struct mutex_waiter *
 __ww_waiter_next(struct mutex *lock, struct mutex_waiter *w)
+	__must_hold(&lock->wait_lock)
 {
 	w = list_next_entry(w, list);
 	if (lock->first_waiter == w)
@@ -23,6 +25,7 @@ __ww_waiter_next(struct mutex *lock, struct mutex_waiter *w)
 
 static inline struct mutex_waiter *
 __ww_waiter_prev(struct mutex *lock, struct mutex_waiter *w)
+	__must_hold(&lock->wait_lock)
 {
 	w = list_prev_entry(w, list);
 	if (lock->first_waiter == w)
@@ -33,6 +36,7 @@ __ww_waiter_prev(struct mutex *lock, struct mutex_waiter *w)
 
 static inline struct mutex_waiter *
 __ww_waiter_last(struct mutex *lock)
+	__must_hold(&lock->wait_lock)
 {
 	struct mutex_waiter *w = lock->first_waiter;
 
@@ -43,6 +47,7 @@ __ww_waiter_last(struct mutex *lock)
 
 static inline void
 __ww_waiter_add(struct mutex *lock, struct mutex_waiter *waiter, struct mutex_waiter *pos)
+	__must_hold(&lock->wait_lock)
 {
 	__mutex_add_waiter(lock, waiter, pos);
 }
@@ -60,16 +65,19 @@ __ww_mutex_has_waiters(struct mutex *lock)
 }
 
 static inline void lock_wait_lock(struct mutex *lock, unsigned long *flags)
+	__acquires(&lock->wait_lock)
 {
 	raw_spin_lock_irqsave(&lock->wait_lock, *flags);
 }
 
 static inline void unlock_wait_lock(struct mutex *lock, unsigned long *flags)
+	__releases(&lock->wait_lock)
 {
 	raw_spin_unlock_irqrestore(&lock->wait_lock, *flags);
 }
 
 static inline void lockdep_assert_wait_lock_held(struct mutex *lock)
+	__must_hold(&lock->wait_lock)
 {
 	lockdep_assert_held(&lock->wait_lock);
 }
@@ -296,6 +304,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
 			     struct ww_acquire_ctx *ww_ctx,
 			     struct ww_acquire_ctx *hold_ctx,
 			     struct wake_q_head *wake_q)
+	__must_hold(&lock->wait_lock)
 {
 	struct task_struct *owner = __ww_mutex_owner(lock);
 
@@ -360,6 +369,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
 static void
 __ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx,
 			 struct wake_q_head *wake_q)
+	__must_hold(&lock->wait_lock)
 {
 	struct MUTEX_WAITER *cur;
 
@@ -453,6 +463,7 @@ __ww_mutex_kill(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx)
 static inline int
 __ww_mutex_check_kill(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
 		      struct ww_acquire_ctx *ctx)
+	__must_hold(&lock->wait_lock)
 {
 	struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
 	struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
@@ -503,6 +514,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter,
 		      struct MUTEX *lock,
 		      struct ww_acquire_ctx *ww_ctx,
 		      struct wake_q_head *wake_q)
+	__must_hold(&lock->wait_lock)
 {
 	struct MUTEX_WAITER *cur, *pos = NULL;
 	bool is_wait_die;
-- 
cgit v1.2.3


From 90bb681dcdf7e69c90b56a18f06c0389a0810b92 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 20 Jan 2026 18:17:50 +0100
Subject: locking/rtmutex: Add context analysis

Add compiler context analysis annotations.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260121111213.851599178@infradead.org
---
 include/linux/rtmutex.h                  |  8 ++++----
 kernel/locking/Makefile                  |  2 ++
 kernel/locking/rtmutex.c                 | 18 +++++++++++++++++-
 kernel/locking/rtmutex_api.c             |  2 ++
 kernel/locking/rtmutex_common.h          | 27 +++++++++++++++++++--------
 kernel/locking/ww_mutex.h                | 20 +++++++++++++++-----
 kernel/locking/ww_rt_mutex.c             |  1 +
 scripts/context-analysis-suppression.txt |  1 +
 8 files changed, 61 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index ede4c6bf6f22..78e7e588817c 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -22,8 +22,8 @@ extern int max_lock_depth;
 
 struct rt_mutex_base {
 	raw_spinlock_t		wait_lock;
-	struct rb_root_cached   waiters;
-	struct task_struct	*owner;
+	struct rb_root_cached   waiters __guarded_by(&wait_lock);
+	struct task_struct	*owner  __guarded_by(&wait_lock);
 };
 
 #define __RT_MUTEX_BASE_INITIALIZER(rtbasename)				\
@@ -41,7 +41,7 @@ struct rt_mutex_base {
  */
 static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock)
 {
-	return READ_ONCE(lock->owner) != NULL;
+	return data_race(READ_ONCE(lock->owner) != NULL);
 }
 
 #ifdef CONFIG_RT_MUTEXES
@@ -49,7 +49,7 @@ static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock)
 
 static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock)
 {
-	unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
+	unsigned long owner = (unsigned long) data_race(READ_ONCE(lock->owner));
 
 	return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS);
 }
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 264447d606a6..0c07de79388c 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -4,6 +4,8 @@
 KCOV_INSTRUMENT		:= n
 
 CONTEXT_ANALYSIS_mutex.o := y
+CONTEXT_ANALYSIS_rtmutex_api.o := y
+CONTEXT_ANALYSIS_ww_rt_mutex.o := y
 
 obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
 
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index c80902eacd79..ccaba6148b61 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -94,6 +94,7 @@ static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
 
 static __always_inline struct task_struct *
 rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
+	__must_hold(&lock->wait_lock)
 {
 	unsigned long val = (unsigned long)owner;
 
@@ -105,6 +106,7 @@ rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
 
 static __always_inline void
 rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
+	__must_hold(&lock->wait_lock)
 {
 	/*
 	 * lock->wait_lock is held but explicit acquire semantics are needed
@@ -114,12 +116,14 @@ rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
 }
 
 static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock)
+	__must_hold(&lock->wait_lock)
 {
 	/* lock->wait_lock is held so the unlock provides release semantics. */
 	WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));
 }
 
 static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
+	__must_hold(&lock->wait_lock)
 {
 	lock->owner = (struct task_struct *)
 			((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
@@ -127,6 +131,7 @@ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
 
 static __always_inline void
 fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)
+	__must_hold(&lock->wait_lock)
 {
 	unsigned long owner, *p = (unsigned long *) &lock->owner;
 
@@ -328,6 +333,7 @@ static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
 }
 
 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
+	__must_hold(&lock->wait_lock)
 {
 	lock->owner = (struct task_struct *)
 			((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
@@ -1206,6 +1212,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
 					   struct ww_acquire_ctx *ww_ctx,
 					   enum rtmutex_chainwalk chwalk,
 					   struct wake_q_head *wake_q)
+	__must_hold(&lock->wait_lock)
 {
 	struct task_struct *owner = rt_mutex_owner(lock);
 	struct rt_mutex_waiter *top_waiter = waiter;
@@ -1249,6 +1256,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
 
 		/* Check whether the waiter should back out immediately */
 		rtm = container_of(lock, struct rt_mutex, rtmutex);
+		__assume_ctx_lock(&rtm->rtmutex.wait_lock);
 		res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx, wake_q);
 		if (res) {
 			raw_spin_lock(&task->pi_lock);
@@ -1356,6 +1364,7 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
 }
 
 static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
+	__must_hold(&lock->wait_lock)
 {
 	int ret = try_to_take_rt_mutex(lock, current, NULL);
 
@@ -1505,7 +1514,7 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
 		 *  - the VCPU on which owner runs is preempted
 		 */
 		if (!owner_on_cpu(owner) || need_resched() ||
-		    !rt_mutex_waiter_is_top_waiter(lock, waiter)) {
+		    !data_race(rt_mutex_waiter_is_top_waiter(lock, waiter))) {
 			res = false;
 			break;
 		}
@@ -1538,6 +1547,7 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
  */
 static void __sched remove_waiter(struct rt_mutex_base *lock,
 				  struct rt_mutex_waiter *waiter)
+	__must_hold(&lock->wait_lock)
 {
 	bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
 	struct task_struct *owner = rt_mutex_owner(lock);
@@ -1613,6 +1623,8 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
 	struct task_struct *owner;
 	int ret = 0;
 
+	__assume_ctx_lock(&rtm->rtmutex.wait_lock);
+
 	lockevent_inc(rtmutex_slow_block);
 	for (;;) {
 		/* Try to acquire the lock: */
@@ -1658,6 +1670,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
 static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
 					     struct rt_mutex_base *lock,
 					     struct rt_mutex_waiter *w)
+	__must_hold(&lock->wait_lock)
 {
 	/*
 	 * If the result is not -EDEADLOCK or the caller requested
@@ -1694,11 +1707,13 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
 				       enum rtmutex_chainwalk chwalk,
 				       struct rt_mutex_waiter *waiter,
 				       struct wake_q_head *wake_q)
+	__must_hold(&lock->wait_lock)
 {
 	struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
 	struct ww_mutex *ww = ww_container_of(rtm);
 	int ret;
 
+	__assume_ctx_lock(&rtm->rtmutex.wait_lock);
 	lockdep_assert_held(&lock->wait_lock);
 	lockevent_inc(rtmutex_slowlock);
 
@@ -1750,6 +1765,7 @@ static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
 					     struct ww_acquire_ctx *ww_ctx,
 					     unsigned int state,
 					     struct wake_q_head *wake_q)
+	__must_hold(&lock->wait_lock)
 {
 	struct rt_mutex_waiter waiter;
 	int ret;
diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
index 59dbd29cb219..124219aea46e 100644
--- a/kernel/locking/rtmutex_api.c
+++ b/kernel/locking/rtmutex_api.c
@@ -526,6 +526,7 @@ static __always_inline int __mutex_lock_common(struct mutex *lock,
 					       unsigned int subclass,
 					       struct lockdep_map *nest_lock,
 					       unsigned long ip)
+	__acquires(lock) __no_context_analysis
 {
 	int ret;
 
@@ -647,6 +648,7 @@ EXPORT_SYMBOL(mutex_trylock);
 #endif /* !CONFIG_DEBUG_LOCK_ALLOC */
 
 void __sched mutex_unlock(struct mutex *lock)
+	__releases(lock) __no_context_analysis
 {
 	mutex_release(&lock->dep_map, _RET_IP_);
 	__rt_mutex_unlock(&lock->rtmutex);
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index cf6ddd1b23a2..c38b7bdea7b3 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -79,12 +79,18 @@ struct rt_wake_q_head {
  * PI-futex support (proxy locking functions, etc.):
  */
 extern void rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
-				       struct task_struct *proxy_owner);
-extern void rt_mutex_proxy_unlock(struct rt_mutex_base *lock);
+				       struct task_struct *proxy_owner)
+	__must_hold(&lock->wait_lock);
+
+extern void rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
+	__must_hold(&lock->wait_lock);
+
 extern int __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
 				     struct rt_mutex_waiter *waiter,
 				     struct task_struct *task,
-				     struct wake_q_head *);
+				     struct wake_q_head *)
+	__must_hold(&lock->wait_lock);
+
 extern int rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
 				     struct rt_mutex_waiter *waiter,
 				     struct task_struct *task);
@@ -94,8 +100,9 @@ extern int rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock,
 extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock,
 				 struct rt_mutex_waiter *waiter);
 
-extern int rt_mutex_futex_trylock(struct rt_mutex_base *l);
-extern int __rt_mutex_futex_trylock(struct rt_mutex_base *l);
+extern int rt_mutex_futex_trylock(struct rt_mutex_base *lock);
+extern int __rt_mutex_futex_trylock(struct rt_mutex_base *lock)
+	__must_hold(&lock->wait_lock);
 
 extern void rt_mutex_futex_unlock(struct rt_mutex_base *lock);
 extern bool __rt_mutex_futex_unlock(struct rt_mutex_base *lock,
@@ -109,6 +116,7 @@ extern void rt_mutex_postunlock(struct rt_wake_q_head *wqh);
  */
 #ifdef CONFIG_RT_MUTEXES
 static inline int rt_mutex_has_waiters(struct rt_mutex_base *lock)
+	__must_hold(&lock->wait_lock)
 {
 	return !RB_EMPTY_ROOT(&lock->waiters.rb_root);
 }
@@ -120,6 +128,7 @@ static inline int rt_mutex_has_waiters(struct rt_mutex_base *lock)
  */
 static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock,
 						 struct rt_mutex_waiter *waiter)
+	__must_hold(&lock->wait_lock)
 {
 	struct rb_node *leftmost = rb_first_cached(&lock->waiters);
 
@@ -127,6 +136,7 @@ static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock,
 }
 
 static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock)
+	__must_hold(&lock->wait_lock)
 {
 	struct rb_node *leftmost = rb_first_cached(&lock->waiters);
 	struct rt_mutex_waiter *w = NULL;
@@ -170,9 +180,10 @@ enum rtmutex_chainwalk {
 
 static inline void __rt_mutex_base_init(struct rt_mutex_base *lock)
 {
-	raw_spin_lock_init(&lock->wait_lock);
-	lock->waiters = RB_ROOT_CACHED;
-	lock->owner = NULL;
+	scoped_guard (raw_spinlock_init, &lock->wait_lock) {
+		lock->waiters = RB_ROOT_CACHED;
+		lock->owner = NULL;
+	}
 }
 
 /* Debug functions */
diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
index c50ea5dd3c44..b1834ab7e782 100644
--- a/kernel/locking/ww_mutex.h
+++ b/kernel/locking/ww_mutex.h
@@ -4,6 +4,7 @@
 
 #define MUTEX		mutex
 #define MUTEX_WAITER	mutex_waiter
+#define WAIT_LOCK	wait_lock
 
 static inline struct mutex_waiter *
 __ww_waiter_first(struct mutex *lock)
@@ -86,9 +87,11 @@ static inline void lockdep_assert_wait_lock_held(struct mutex *lock)
 
 #define MUTEX		rt_mutex
 #define MUTEX_WAITER	rt_mutex_waiter
+#define WAIT_LOCK	rtmutex.wait_lock
 
 static inline struct rt_mutex_waiter *
 __ww_waiter_first(struct rt_mutex *lock)
+	__must_hold(&lock->rtmutex.wait_lock)
 {
 	struct rb_node *n = rb_first(&lock->rtmutex.waiters.rb_root);
 	if (!n)
@@ -116,6 +119,7 @@ __ww_waiter_prev(struct rt_mutex *lock, struct rt_mutex_waiter *w)
 
 static inline struct rt_mutex_waiter *
 __ww_waiter_last(struct rt_mutex *lock)
+	__must_hold(&lock->rtmutex.wait_lock)
 {
 	struct rb_node *n = rb_last(&lock->rtmutex.waiters.rb_root);
 	if (!n)
@@ -137,21 +141,25 @@ __ww_mutex_owner(struct rt_mutex *lock)
 
 static inline bool
 __ww_mutex_has_waiters(struct rt_mutex *lock)
+	__must_hold(&lock->rtmutex.wait_lock)
 {
 	return rt_mutex_has_waiters(&lock->rtmutex);
 }
 
 static inline void lock_wait_lock(struct rt_mutex *lock, unsigned long *flags)
+	__acquires(&lock->rtmutex.wait_lock)
 {
 	raw_spin_lock_irqsave(&lock->rtmutex.wait_lock, *flags);
 }
 
 static inline void unlock_wait_lock(struct rt_mutex *lock, unsigned long *flags)
+	__releases(&lock->rtmutex.wait_lock)
 {
 	raw_spin_unlock_irqrestore(&lock->rtmutex.wait_lock, *flags);
 }
 
 static inline void lockdep_assert_wait_lock_held(struct rt_mutex *lock)
+	__must_hold(&lock->rtmutex.wait_lock)
 {
 	lockdep_assert_held(&lock->rtmutex.wait_lock);
 }
@@ -304,7 +312,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
 			     struct ww_acquire_ctx *ww_ctx,
 			     struct ww_acquire_ctx *hold_ctx,
 			     struct wake_q_head *wake_q)
-	__must_hold(&lock->wait_lock)
+	__must_hold(&lock->WAIT_LOCK)
 {
 	struct task_struct *owner = __ww_mutex_owner(lock);
 
@@ -369,7 +377,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
 static void
 __ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx,
 			 struct wake_q_head *wake_q)
-	__must_hold(&lock->wait_lock)
+	__must_hold(&lock->WAIT_LOCK)
 {
 	struct MUTEX_WAITER *cur;
 
@@ -396,6 +404,7 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 {
 	DEFINE_WAKE_Q(wake_q);
 	unsigned long flags;
+	bool has_waiters;
 
 	ww_mutex_lock_acquired(lock, ctx);
 
@@ -417,7 +426,8 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 	 * __ww_mutex_add_waiter() and makes sure we either observe ww->ctx
 	 * and/or !empty list.
 	 */
-	if (likely(!__ww_mutex_has_waiters(&lock->base)))
+	has_waiters = data_race(__ww_mutex_has_waiters(&lock->base));
+	if (likely(!has_waiters))
 		return;
 
 	/*
@@ -463,7 +473,7 @@ __ww_mutex_kill(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx)
 static inline int
 __ww_mutex_check_kill(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
 		      struct ww_acquire_ctx *ctx)
-	__must_hold(&lock->wait_lock)
+	__must_hold(&lock->WAIT_LOCK)
 {
 	struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
 	struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
@@ -514,7 +524,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter,
 		      struct MUTEX *lock,
 		      struct ww_acquire_ctx *ww_ctx,
 		      struct wake_q_head *wake_q)
-	__must_hold(&lock->wait_lock)
+	__must_hold(&lock->WAIT_LOCK)
 {
 	struct MUTEX_WAITER *cur, *pos = NULL;
 	bool is_wait_die;
diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c
index c7196de838ed..e07fb3b96bc3 100644
--- a/kernel/locking/ww_rt_mutex.c
+++ b/kernel/locking/ww_rt_mutex.c
@@ -90,6 +90,7 @@ ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 EXPORT_SYMBOL(ww_mutex_lock_interruptible);
 
 void __sched ww_mutex_unlock(struct ww_mutex *lock)
+	__no_context_analysis
 {
 	struct rt_mutex *rtm = &lock->base;
 
diff --git a/scripts/context-analysis-suppression.txt b/scripts/context-analysis-suppression.txt
index fd8951d06706..1c51b6153f08 100644
--- a/scripts/context-analysis-suppression.txt
+++ b/scripts/context-analysis-suppression.txt
@@ -24,6 +24,7 @@ src:*include/linux/mutex*.h=emit
 src:*include/linux/rcupdate.h=emit
 src:*include/linux/refcount.h=emit
 src:*include/linux/rhashtable.h=emit
+src:*include/linux/rtmutex*.h=emit
 src:*include/linux/rwlock*.h=emit
 src:*include/linux/rwsem.h=emit
 src:*include/linux/sched*=emit
-- 
cgit v1.2.3


From 739690915ce1f017223ef4e6f3cc966ccfa3c861 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 6 Mar 2026 10:43:56 +0100
Subject: locking/rwsem: Add context analysis

Add compiler context analysis annotations.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260306101417.GT1282955@noisy.programming.kicks-ass.net
---
 include/linux/rwsem.h      |  4 ++--
 kernel/locking/Makefile    |  1 +
 kernel/locking/rwbase_rt.c |  1 +
 kernel/locking/rwsem.c     | 27 ++++++++++++++++++++++++---
 4 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index e7829531c4ba..6a1a7bae5f81 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -57,7 +57,7 @@ context_lock_struct(rw_semaphore) {
 	struct optimistic_spin_queue osq; /* spinner MCS lock */
 #endif
 	raw_spinlock_t wait_lock;
-	struct rwsem_waiter *first_waiter;
+	struct rwsem_waiter *first_waiter __guarded_by(&wait_lock);
 #ifdef CONFIG_DEBUG_RWSEMS
 	void *magic;
 #endif
@@ -131,7 +131,7 @@ do {								\
  */
 static inline bool rwsem_is_contended(struct rw_semaphore *sem)
 {
-	return sem->first_waiter != NULL;
+	return data_race(sem->first_waiter != NULL);
 }
 
 #if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER)
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 0c07de79388c..cee1901d4cff 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -6,6 +6,7 @@ KCOV_INSTRUMENT		:= n
 CONTEXT_ANALYSIS_mutex.o := y
 CONTEXT_ANALYSIS_rtmutex_api.o := y
 CONTEXT_ANALYSIS_ww_rt_mutex.o := y
+CONTEXT_ANALYSIS_rwsem.o := y
 
 obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
 
diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
index 9f4322c07486..82e078c0665a 100644
--- a/kernel/locking/rwbase_rt.c
+++ b/kernel/locking/rwbase_rt.c
@@ -186,6 +186,7 @@ static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb,
 
 static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias,
 					 unsigned long flags)
+	__releases(&rwb->rtmutex.wait_lock)
 {
 	struct rt_mutex_base *rtm = &rwb->rtmutex;
 
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index e66f37ebc6f6..ba4cb74de064 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -72,7 +72,7 @@
 		#c, atomic_long_read(&(sem)->count),		\
 		(unsigned long) sem->magic,			\
 		atomic_long_read(&(sem)->owner), (long)current,	\
-		(sem)->first_waiter ? "" : "not "))		\
+		rwsem_is_contended(sem) ? "" : "not "))		\
 			debug_locks_off();			\
 	} while (0)
 #else
@@ -320,9 +320,10 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	sem->magic = sem;
 #endif
 	atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
-	raw_spin_lock_init(&sem->wait_lock);
-	sem->first_waiter = NULL;
 	atomic_long_set(&sem->owner, 0L);
+	scoped_guard (raw_spinlock_init, &sem->wait_lock) {
+		sem->first_waiter = NULL;
+	}
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 	osq_lock_init(&sem->osq);
 #endif
@@ -365,6 +366,7 @@ enum rwsem_wake_type {
 
 static inline
 bool __rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
+	__must_hold(&sem->wait_lock)
 {
 	if (list_empty(&waiter->list)) {
 		sem->first_waiter = NULL;
@@ -401,6 +403,7 @@ rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
 static inline
 struct rwsem_waiter *next_waiter(const struct rw_semaphore *sem,
 				 const struct rwsem_waiter *waiter)
+	__must_hold(&sem->wait_lock)
 {
 	struct rwsem_waiter *next = list_first_entry(&waiter->list,
 						     struct rwsem_waiter, list);
@@ -621,6 +624,7 @@ rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
  */
 static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
 					struct rwsem_waiter *waiter)
+	__must_hold(&sem->wait_lock)
 {
 	struct rwsem_waiter *first = sem->first_waiter;
 	long count, new;
@@ -1558,6 +1562,7 @@ static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
  * lock for reading
  */
 void __sched down_read(struct rw_semaphore *sem)
+	__no_context_analysis
 {
 	might_sleep();
 	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
@@ -1567,6 +1572,7 @@ void __sched down_read(struct rw_semaphore *sem)
 EXPORT_SYMBOL(down_read);
 
 int __sched down_read_interruptible(struct rw_semaphore *sem)
+	__no_context_analysis
 {
 	might_sleep();
 	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
@@ -1581,6 +1587,7 @@ int __sched down_read_interruptible(struct rw_semaphore *sem)
 EXPORT_SYMBOL(down_read_interruptible);
 
 int __sched down_read_killable(struct rw_semaphore *sem)
+	__no_context_analysis
 {
 	might_sleep();
 	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
@@ -1598,6 +1605,7 @@ EXPORT_SYMBOL(down_read_killable);
  * trylock for reading -- returns 1 if successful, 0 if contention
  */
 int down_read_trylock(struct rw_semaphore *sem)
+	__no_context_analysis
 {
 	int ret = __down_read_trylock(sem);
 
@@ -1611,6 +1619,7 @@ EXPORT_SYMBOL(down_read_trylock);
  * lock for writing
  */
 void __sched down_write(struct rw_semaphore *sem)
+	__no_context_analysis
 {
 	might_sleep();
 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
@@ -1622,6 +1631,7 @@ EXPORT_SYMBOL(down_write);
  * lock for writing
  */
 int __sched down_write_killable(struct rw_semaphore *sem)
+	__no_context_analysis
 {
 	might_sleep();
 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
@@ -1640,6 +1650,7 @@ EXPORT_SYMBOL(down_write_killable);
  * trylock for writing -- returns 1 if successful, 0 if contention
  */
 int down_write_trylock(struct rw_semaphore *sem)
+	__no_context_analysis
 {
 	int ret = __down_write_trylock(sem);
 
@@ -1654,6 +1665,7 @@ EXPORT_SYMBOL(down_write_trylock);
  * release a read lock
  */
 void up_read(struct rw_semaphore *sem)
+	__no_context_analysis
 {
 	rwsem_release(&sem->dep_map, _RET_IP_);
 	__up_read(sem);
@@ -1664,6 +1676,7 @@ EXPORT_SYMBOL(up_read);
  * release a write lock
  */
 void up_write(struct rw_semaphore *sem)
+	__no_context_analysis
 {
 	rwsem_release(&sem->dep_map, _RET_IP_);
 	__up_write(sem);
@@ -1674,6 +1687,7 @@ EXPORT_SYMBOL(up_write);
  * downgrade write lock to read lock
  */
 void downgrade_write(struct rw_semaphore *sem)
+	__no_context_analysis
 {
 	lock_downgrade(&sem->dep_map, _RET_IP_);
 	__downgrade_write(sem);
@@ -1683,6 +1697,7 @@ EXPORT_SYMBOL(downgrade_write);
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 void down_read_nested(struct rw_semaphore *sem, int subclass)
+	__no_context_analysis
 {
 	might_sleep();
 	rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
@@ -1691,6 +1706,7 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
 EXPORT_SYMBOL(down_read_nested);
 
 int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
+	__no_context_analysis
 {
 	might_sleep();
 	rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
@@ -1705,6 +1721,7 @@ int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
 EXPORT_SYMBOL(down_read_killable_nested);
 
 void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
+	__no_context_analysis
 {
 	might_sleep();
 	rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
@@ -1713,6 +1730,7 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
 EXPORT_SYMBOL(_down_write_nest_lock);
 
 void down_read_non_owner(struct rw_semaphore *sem)
+	__no_context_analysis
 {
 	might_sleep();
 	__down_read(sem);
@@ -1727,6 +1745,7 @@ void down_read_non_owner(struct rw_semaphore *sem)
 EXPORT_SYMBOL(down_read_non_owner);
 
 void down_write_nested(struct rw_semaphore *sem, int subclass)
+	__no_context_analysis
 {
 	might_sleep();
 	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
@@ -1735,6 +1754,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
 EXPORT_SYMBOL(down_write_nested);
 
 int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
+	__no_context_analysis
 {
 	might_sleep();
 	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
@@ -1750,6 +1770,7 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
 EXPORT_SYMBOL(down_write_killable_nested);
 
 void up_read_non_owner(struct rw_semaphore *sem)
+	__no_context_analysis
 {
 	DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
 	__up_read(sem);
-- 
cgit v1.2.3


From 27ab4f1e4909a674dfd03058fb9802cae2343a36 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Thu, 26 Feb 2026 12:25:54 +0530
Subject: soundwire: amd: refactor bandwidth calculation logic

For current platforms(ACP6.3/ACP7.0/ACP7.1/ACP7.2), AMD SoundWire manager
doesn't have banked registers for data port programming on Manager's side.
Need to use fixed block offsets, hstart & hstop for manager ports.

Earlier amd manager driver has support for 12 MHz as a bus clock frequency
where frame rate is 48000 and number of bits is 500, frame shape as
50 x 10 with fixed block offset mapping based on port number.

Got a new requirement to support 6 MHz as a bus clock frequency.
For 6 MHz bus clock frequency amd manager driver needs to support two
different frame shapes i.e number of bits as 250 with frame rate as 48000
and frame shape as 125 x 2 and for the second combination number of bits as
500 where frame rate is 24000 and frame shape is 50 x 10.

Few SoundWire peripherals doesn't support 125 x 2 as a frame shape for
6 MHz bus clock frequency. They have explicit requirement for the frame
shape. In this scenario, amd manager driver needs to use 50 x 10 as a frame
shape where frame rate is 24000. Based on the platform and SoundWire
topology for 6Mhz support frame shape will be decided which is part of
SoundWire manager DisCo tables.

For current platforms, amd manager driver supports only two bus clock
frequencies(12 MHz & 6 MHz). Refactor bandwidth logic to support different
bus clock frequencies.

Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.dev>
Link: https://patch.msgid.link/20260226065638.1251771-3-Vijendar.Mukunda@amd.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/amd_manager.c   | 57 ++++++++++++++++++++++++++++++++++++---
 include/linux/soundwire/sdw_amd.h |  4 +++
 2 files changed, 57 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c
index 61df5cecdccc..a3316efdf8ac 100644
--- a/drivers/soundwire/amd_manager.c
+++ b/drivers/soundwire/amd_manager.c
@@ -467,12 +467,16 @@ static u32 amd_sdw_read_ping_status(struct sdw_bus *bus)
 
 static int amd_sdw_compute_params(struct sdw_bus *bus, struct sdw_stream_runtime *stream)
 {
+	struct amd_sdw_manager *amd_manager = to_amd_sdw(bus);
 	struct sdw_transport_data t_data = {0};
 	struct sdw_master_runtime *m_rt;
 	struct sdw_port_runtime *p_rt;
 	struct sdw_bus_params *b_params = &bus->params;
 	int port_bo, hstart, hstop, sample_int;
-	unsigned int rate, bps;
+	unsigned int rate, bps, channels;
+	unsigned int stream_slot_size, max_slots;
+	static unsigned int next_offset[AMD_SDW_MAX_MANAGER_COUNT] = {1};
+	unsigned int inst_id = amd_manager->instance;
 
 	port_bo = 0;
 	hstart = 1;
@@ -483,11 +487,51 @@ static int amd_sdw_compute_params(struct sdw_bus *bus, struct sdw_stream_runtime
 	list_for_each_entry(m_rt, &bus->m_rt_list, bus_node) {
 		rate = m_rt->stream->params.rate;
 		bps = m_rt->stream->params.bps;
+		channels = m_rt->stream->params.ch_count;
 		sample_int = (bus->params.curr_dr_freq / rate);
+
+		/* Compute slots required for this stream dynamically */
+		stream_slot_size = bps * channels;
+
 		list_for_each_entry(p_rt, &m_rt->port_list, port_node) {
-			port_bo = (p_rt->num * 64) + 1;
-			dev_dbg(bus->dev, "p_rt->num=%d hstart=%d hstop=%d port_bo=%d\n",
-				p_rt->num, hstart, hstop, port_bo);
+			if (p_rt->num >= amd_manager->max_ports) {
+				dev_err(bus->dev, "Port %d exceeds max ports %d\n",
+					p_rt->num, amd_manager->max_ports);
+				return -EINVAL;
+			}
+
+			if (!amd_manager->port_offset_map[p_rt->num]) {
+				/*
+				 * port block offset calculation for 6MHz bus clock frequency with
+				 * different frame sizes 50 x 10 and 125 x 2
+				 */
+				if (bus->params.curr_dr_freq == 12000000) {
+					max_slots = bus->params.row * (bus->params.col - 1);
+					if (next_offset[inst_id] + stream_slot_size <=
+					    (max_slots - 1)) {
+						amd_manager->port_offset_map[p_rt->num] =
+									next_offset[inst_id];
+						next_offset[inst_id] += stream_slot_size;
+					} else {
+						dev_err(bus->dev,
+							"No space for port %d\n", p_rt->num);
+						return -ENOMEM;
+					}
+				} else {
+					 /*
+					  * port block offset calculation for 12MHz bus clock
+					  * frequency
+					  */
+					amd_manager->port_offset_map[p_rt->num] =
+									(p_rt->num * 64) + 1;
+				}
+			}
+			port_bo = amd_manager->port_offset_map[p_rt->num];
+			dev_dbg(bus->dev,
+				"Port=%d hstart=%d hstop=%d port_bo=%d slots=%d max_ports=%d\n",
+				p_rt->num, hstart, hstop, port_bo, stream_slot_size,
+				amd_manager->max_ports);
+
 			sdw_fill_xport_params(&p_rt->transport_params, p_rt->num,
 					      false, SDW_BLK_GRP_CNT_1, sample_int,
 					      port_bo, port_bo >> 8, hstart, hstop,
@@ -1079,6 +1123,11 @@ static int amd_sdw_manager_probe(struct platform_device *pdev)
 	default:
 		return -EINVAL;
 	}
+	amd_manager->max_ports = amd_manager->num_dout_ports + amd_manager->num_din_ports;
+	amd_manager->port_offset_map = devm_kcalloc(dev, amd_manager->max_ports,
+						    sizeof(int), GFP_KERNEL);
+	if (!amd_manager->port_offset_map)
+		return -ENOMEM;
 
 	prop = &amd_manager->bus.prop;
 	prop->mclk_freq = AMD_SDW_BUS_BASE_FREQ;
diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h
index fe31773d5210..470360a2723c 100644
--- a/include/linux/soundwire/sdw_amd.h
+++ b/include/linux/soundwire/sdw_amd.h
@@ -66,8 +66,10 @@ struct sdw_amd_dai_runtime {
  * @status: peripheral devices status array
  * @num_din_ports: number of input ports
  * @num_dout_ports: number of output ports
+ * @max_ports: total number of input ports and output ports
  * @cols_index: Column index in frame shape
  * @rows_index: Rows index in frame shape
+ * @port_offset_map: dynamic array to map port block offset
  * @instance: SoundWire manager instance
  * @quirks: SoundWire manager quirks
  * @wake_en_mask: wake enable mask per SoundWire manager
@@ -92,10 +94,12 @@ struct amd_sdw_manager {
 
 	int num_din_ports;
 	int num_dout_ports;
+	int max_ports;
 
 	int cols_index;
 	int rows_index;
 
+	int *port_offset_map;
 	u32 instance;
 	u32 quirks;
 	u32 wake_en_mask;
-- 
cgit v1.2.3


From 493740d790cce709d285cd1022d16d05439b7d5b Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Fri, 6 Mar 2026 11:31:54 +0530
Subject: drm/buddy: Improve offset-aligned allocation handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Large alignment requests previously forced the buddy allocator to search by
alignment order, which often caused higher-order free blocks to be split even
when a suitably aligned smaller region already existed within them. This led
to excessive fragmentation, especially for workloads requesting small sizes
with large alignment constraints.

This change prioritizes the requested allocation size during the search and
uses an augmented RB-tree field (subtree_max_alignment) to efficiently locate
free blocks that satisfy both size and offset-alignment requirements. As a
result, the allocator can directly select an aligned sub-region without
splitting larger blocks unnecessarily.

A practical example is the VKCTS test
dEQP-VK.memory.allocation.basic.size_8KiB.reverse.count_4000, which repeatedly
allocates 8 KiB buffers with a 256 KiB alignment. Previously, such allocations
caused large blocks to be split aggressively, despite smaller aligned regions
being sufficient. With this change, those aligned regions are reused directly,
significantly reducing fragmentation.

This improvement is visible in the amdgpu VRAM buddy allocator state
(/sys/kernel/debug/dri/1/amdgpu_vram_mm). After the change, higher-order blocks
are preserved and the number of low-order fragments is substantially reduced.

Before:
  order- 5 free: 1936 MiB, blocks: 15490
  order- 4 free:  967 MiB, blocks: 15486
  order- 3 free:  483 MiB, blocks: 15485
  order- 2 free:  241 MiB, blocks: 15486
  order- 1 free:  241 MiB, blocks: 30948

After:
  order- 5 free:  493 MiB, blocks:  3941
  order- 4 free:  246 MiB, blocks:  3943
  order- 3 free:  123 MiB, blocks:  4101
  order- 2 free:   61 MiB, blocks:  4101
  order- 1 free:   61 MiB, blocks:  8018

By avoiding unnecessary splits, this change improves allocator efficiency and
helps maintain larger contiguous free regions under heavy offset-aligned
allocation workloads.

v2:(Matthew)
  - Update augmented information along the path to the inserted node.

v3:
  - Move the patch to gpu/buddy.c file.

v4:(Matthew)
  - Use the helper instead of calling _ffs directly
  - Remove gpu_buddy_block_order(block) >= order check and drop order
  - Drop !node check as all callers handle this already
  - Return larger than any other possible alignment for __ffs64(0)
  - Replace __ffs with __ffs64

v5:(Matthew)
  - Drop subtree_max_alignment initialization at gpu_block_alloc()

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patch.msgid.link/20260306060155.2114-1-Arunpravin.PaneerSelvam@amd.com
---
 drivers/gpu/buddy.c       | 272 ++++++++++++++++++++++++++++++++++++++--------
 include/linux/gpu_buddy.h |   2 +
 2 files changed, 229 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/buddy.c b/drivers/gpu/buddy.c
index da5a1222f46b..52686672e99f 100644
--- a/drivers/gpu/buddy.c
+++ b/drivers/gpu/buddy.c
@@ -53,6 +53,25 @@ gpu_buddy_block_is_split(struct gpu_buddy_block *block)
 	return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
 }
 
+static unsigned int gpu_buddy_block_offset_alignment(struct gpu_buddy_block *block)
+{
+	u64 offset = gpu_buddy_block_offset(block);
+
+	if (!offset)
+		/*
+		 * __ffs64(0) is undefined; offset 0 is maximally aligned, so return
+		 * a value greater than any possible alignment.
+		 */
+		return 64 + 1;
+
+	return __ffs64(offset);
+}
+
+RB_DECLARE_CALLBACKS_MAX(static, gpu_buddy_augment_cb,
+			 struct gpu_buddy_block, rb,
+			 unsigned int, subtree_max_alignment,
+			 gpu_buddy_block_offset_alignment);
+
 static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm,
 					       struct gpu_buddy_block *parent,
 					       unsigned int order,
@@ -106,26 +125,42 @@ static bool rbtree_is_empty(struct rb_root *root)
 	return RB_EMPTY_ROOT(root);
 }
 
-static bool gpu_buddy_block_offset_less(const struct gpu_buddy_block *block,
-					const struct gpu_buddy_block *node)
-{
-	return gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node);
-}
-
-static bool rbtree_block_offset_less(struct rb_node *block,
-				     const struct rb_node *node)
-{
-	return gpu_buddy_block_offset_less(rbtree_get_free_block(block),
-					   rbtree_get_free_block(node));
-}
-
 static void rbtree_insert(struct gpu_buddy *mm,
 			  struct gpu_buddy_block *block,
 			  enum gpu_buddy_free_tree tree)
 {
-	rb_add(&block->rb,
-	       &mm->free_trees[tree][gpu_buddy_block_order(block)],
-	       rbtree_block_offset_less);
+	struct rb_node **link, *parent = NULL;
+	unsigned int block_alignment, order;
+	struct gpu_buddy_block *node;
+	struct rb_root *root;
+
+	order = gpu_buddy_block_order(block);
+	block_alignment = gpu_buddy_block_offset_alignment(block);
+
+	root = &mm->free_trees[tree][order];
+	link = &root->rb_node;
+
+	while (*link) {
+		parent = *link;
+		node = rbtree_get_free_block(parent);
+		/*
+		 * Manual augmentation update during insertion traversal. Required
+		 * because rb_insert_augmented() only calls rotate callback during
+		 * rotations. This ensures all ancestors on the insertion path have
+		 * correct subtree_max_alignment values.
+		 */
+		if (node->subtree_max_alignment < block_alignment)
+			node->subtree_max_alignment = block_alignment;
+
+		if (gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node))
+			link = &parent->rb_left;
+		else
+			link = &parent->rb_right;
+	}
+
+	block->subtree_max_alignment = block_alignment;
+	rb_link_node(&block->rb, parent, link);
+	rb_insert_augmented(&block->rb, root, &gpu_buddy_augment_cb);
 }
 
 static void rbtree_remove(struct gpu_buddy *mm,
@@ -138,7 +173,7 @@ static void rbtree_remove(struct gpu_buddy *mm,
 	tree = get_block_tree(block);
 	root = &mm->free_trees[tree][order];
 
-	rb_erase(&block->rb, root);
+	rb_erase_augmented(&block->rb, root, &gpu_buddy_augment_cb);
 	RB_CLEAR_NODE(&block->rb);
 }
 
@@ -811,6 +846,127 @@ err_undo:
 	return ERR_PTR(err);
 }
 
+static bool
+gpu_buddy_can_offset_align(u64 size, u64 min_block_size)
+{
+	return size < min_block_size && is_power_of_2(size);
+}
+
+static bool gpu_buddy_subtree_can_satisfy(struct rb_node *node,
+					  unsigned int alignment)
+{
+	struct gpu_buddy_block *block;
+
+	block = rbtree_get_free_block(node);
+	return block->subtree_max_alignment >= alignment;
+}
+
+static struct gpu_buddy_block *
+gpu_buddy_find_block_aligned(struct gpu_buddy *mm,
+			     enum gpu_buddy_free_tree tree,
+			     unsigned int order,
+			     unsigned int alignment,
+			     unsigned long flags)
+{
+	struct rb_root *root = &mm->free_trees[tree][order];
+	struct rb_node *rb = root->rb_node;
+
+	while (rb) {
+		struct gpu_buddy_block *block = rbtree_get_free_block(rb);
+		struct rb_node *left_node = rb->rb_left, *right_node = rb->rb_right;
+
+		if (right_node) {
+			if (gpu_buddy_subtree_can_satisfy(right_node, alignment)) {
+				rb = right_node;
+				continue;
+			}
+		}
+
+		if (gpu_buddy_block_offset_alignment(block) >= alignment)
+			return block;
+
+		if (left_node) {
+			if (gpu_buddy_subtree_can_satisfy(left_node, alignment)) {
+				rb = left_node;
+				continue;
+			}
+		}
+
+		break;
+	}
+
+	return NULL;
+}
+
+static struct gpu_buddy_block *
+gpu_buddy_offset_aligned_allocation(struct gpu_buddy *mm,
+				    u64 size,
+				    u64 min_block_size,
+				    unsigned long flags)
+{
+	struct gpu_buddy_block *block = NULL;
+	unsigned int order, tmp, alignment;
+	struct gpu_buddy_block *buddy;
+	enum gpu_buddy_free_tree tree;
+	unsigned long pages;
+	int err;
+
+	alignment = ilog2(min_block_size);
+	pages = size >> ilog2(mm->chunk_size);
+	order = fls(pages) - 1;
+
+	tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
+		GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
+
+	for (tmp = order; tmp <= mm->max_order; ++tmp) {
+		block = gpu_buddy_find_block_aligned(mm, tree, tmp,
+						     alignment, flags);
+		if (!block) {
+			tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
+				GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
+			block = gpu_buddy_find_block_aligned(mm, tree, tmp,
+							     alignment, flags);
+		}
+
+		if (block)
+			break;
+	}
+
+	if (!block)
+		return ERR_PTR(-ENOSPC);
+
+	while (gpu_buddy_block_order(block) > order) {
+		struct gpu_buddy_block *left, *right;
+
+		err = split_block(mm, block);
+		if (unlikely(err))
+			goto err_undo;
+
+		left  = block->left;
+		right = block->right;
+
+		if (gpu_buddy_block_offset_alignment(right) >= alignment)
+			block = right;
+		else
+			block = left;
+	}
+
+	return block;
+
+err_undo:
+	/*
+	 * We really don't want to leave around a bunch of split blocks, since
+	 * bigger is better, so make sure we merge everything back before we
+	 * free the allocated blocks.
+	 */
+	buddy = __get_buddy(block);
+	if (buddy &&
+	    (gpu_buddy_block_is_free(block) &&
+	     gpu_buddy_block_is_free(buddy)))
+		__gpu_buddy_free(mm, block, false);
+	return ERR_PTR(err);
+}
+
 static int __alloc_range(struct gpu_buddy *mm,
 			 struct list_head *dfs,
 			 u64 start, u64 size,
@@ -1080,6 +1236,7 @@ EXPORT_SYMBOL(gpu_buddy_block_trim);
 static struct gpu_buddy_block *
 __gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
 			 u64 start, u64 end,
+			 u64 size, u64 min_block_size,
 			 unsigned int order,
 			 unsigned long flags)
 {
@@ -1087,6 +1244,11 @@ __gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
 		/* Allocate traversing within the range */
 		return  __gpu_buddy_alloc_range_bias(mm, start, end,
 						     order, flags);
+	else if (size < min_block_size)
+		/* Allocate from an offset-aligned region without size rounding */
+		return gpu_buddy_offset_aligned_allocation(mm, size,
+							   min_block_size,
+							   flags);
 	else
 		/* Allocate from freetree */
 		return alloc_from_freetree(mm, order, flags);
@@ -1158,8 +1320,11 @@ int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
 	if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) {
 		size = roundup_pow_of_two(size);
 		min_block_size = size;
-	/* Align size value to min_block_size */
-	} else if (!IS_ALIGNED(size, min_block_size)) {
+		/*
+		 * Normalize the requested size to min_block_size for regular allocations.
+		 * Offset-aligned allocations intentionally skip size rounding.
+		 */
+	} else if (!gpu_buddy_can_offset_align(size, min_block_size)) {
 		size = round_up(size, min_block_size);
 	}
 
@@ -1179,43 +1344,60 @@ int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
 	do {
 		order = min(order, (unsigned int)fls(pages) - 1);
 		BUG_ON(order > mm->max_order);
-		BUG_ON(order < min_order);
+		/*
+		 * Regular allocations must not allocate blocks smaller than min_block_size.
+		 * Offset-aligned allocations deliberately bypass this constraint.
+		 */
+		BUG_ON(size >= min_block_size && order < min_order);
 
 		do {
+			unsigned int fallback_order;
+
 			block = __gpu_buddy_alloc_blocks(mm, start,
 							 end,
+							 size,
+							 min_block_size,
 							 order,
 							 flags);
 			if (!IS_ERR(block))
 				break;
 
-			if (order-- == min_order) {
-				/* Try allocation through force merge method */
-				if (mm->clear_avail &&
-				    !__force_merge(mm, start, end, min_order)) {
-					block = __gpu_buddy_alloc_blocks(mm, start,
-									 end,
-									 min_order,
-									 flags);
-					if (!IS_ERR(block)) {
-						order = min_order;
-						break;
-					}
-				}
+			if (size < min_block_size) {
+				fallback_order = order;
+			} else if (order == min_order) {
+				fallback_order = min_order;
+			} else {
+				order--;
+				continue;
+			}
 
-				/*
-				 * Try contiguous block allocation through
-				 * try harder method.
-				 */
-				if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION &&
-				    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
-					return __alloc_contig_try_harder(mm,
-									 original_size,
-									 original_min_size,
-									 blocks);
-				err = -ENOSPC;
-				goto err_free;
+			/* Try allocation through force merge method */
+			if (mm->clear_avail &&
+			    !__force_merge(mm, start, end, fallback_order)) {
+				block = __gpu_buddy_alloc_blocks(mm, start,
+								 end,
+								 size,
+								 min_block_size,
+								 fallback_order,
+								 flags);
+				if (!IS_ERR(block)) {
+					order = fallback_order;
+					break;
+				}
 			}
+
+			/*
+			 * Try contiguous block allocation through
+			 * try harder method.
+			 */
+			if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION &&
+			    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
+				return __alloc_contig_try_harder(mm,
+								 original_size,
+								 original_min_size,
+								 blocks);
+			err = -ENOSPC;
+			goto err_free;
 		} while (1);
 
 		mark_allocated(mm, block);
diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
index f1fb6eff604a..5fa917ba5450 100644
--- a/include/linux/gpu_buddy.h
+++ b/include/linux/gpu_buddy.h
@@ -11,6 +11,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/rbtree.h>
+#include <linux/rbtree_augmented.h>
 
 /**
  * GPU_BUDDY_RANGE_ALLOCATION - Allocate within a specific address range
@@ -128,6 +129,7 @@ struct gpu_buddy_block {
 	};
 /* private: */
 	struct list_head tmp_link;
+	unsigned int subtree_max_alignment;
 };
 
 /* Order-zero must be at least SZ_4K */
-- 
cgit v1.2.3


From 5f88899ec7531e1680b1003f32584d7da5922902 Mon Sep 17 00:00:00 2001
From: Nuno Sá <nuno.sa@analog.com>
Date: Tue, 3 Mar 2026 10:25:00 +0000
Subject: dmaengine: Document cyclic transfer for
 dmaengine_prep_peripheral_dma_vec()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document that the DMA_PREP_REPEAT flag can be used with the
dmaengine_prep_peripheral_dma_vec() to mark a transfer as cyclic similar
to dmaengine_prep_dma_cyclic().

Reviewed-by: Frank Li <Frank.Li@nxp.com>
Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://patch.msgid.link/20260303-axi-dac-cyclic-support-v2-1-0db27b4be95a@analog.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dmaengine.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 99efe2b9b4ea..b3d251c9734e 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -996,7 +996,8 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
  * @vecs: The array of DMA vectors that should be transferred
  * @nents: The number of DMA vectors in the array
  * @dir: Specifies the direction of the data transfer
- * @flags: DMA engine flags
+ * @flags: DMA engine flags - DMA_PREP_REPEAT can be used to mark a cyclic
+ *         DMA transfer
  */
 static inline struct dma_async_tx_descriptor *dmaengine_prep_peripheral_dma_vec(
 	struct dma_chan *chan, const struct dma_vec *vecs, size_t nents,
-- 
cgit v1.2.3


From 70fbea9f1a44d80a4c573c225f119022d6e21360 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 28 Feb 2026 17:12:13 -0800
Subject: dmaengine: ti-cppi5: fix all kernel-doc warnings

Add missing struct member, function parameter, and enum value descriptions.
Add missing function Returns: sections.
Use correct function name in kernel-doc to avoid mismatched prototypes.

These repair all kernel-doc warnings in ti-cppi5.h:

Warning: include/linux/dma/ti-cppi5.h:27 struct member 'pkt_info1' not
 described in 'cppi5_desc_hdr_t'
Warning: include/linux/dma/ti-cppi5.h:27 struct member 'pkt_info2' not
 described in 'cppi5_desc_hdr_t'
Warning: include/linux/dma/ti-cppi5.h:50 struct member 'epib' not
 described in 'cppi5_host_desc_t'
Warning: include/linux/dma/ti-cppi5.h:142 struct member 'epib' not
 described in 'cppi5_monolithic_desc_t'
Warning: include/linux/dma/ti-cppi5.h:413 function parameter 'pkt_len'
 not described in 'cppi5_hdesc_set_pktlen'
Warning: include/linux/dma/ti-cppi5.h:436 function parameter 'ps_flags'
 not described in 'cppi5_hdesc_set_psflags'
Warning: include/linux/dma/ti-cppi5.h:509 function parameter 'hbuf_desc'
 not described in 'cppi5_hdesc_link_hbdesc'
Warning: include/linux/dma/ti-cppi5.h:839 struct member 'dicnt3' not
 described in 'cppi5_tr_type15_t'
Warning: include/linux/dma/ti-cppi5.h:970 function parameter 'desc_hdr'
 not described in 'cppi5_trdesc_init'
Warning: include/linux/dma/ti-cppi5.h:184 No description found for
 return value of 'cppi5_desc_is_tdcm'
Warning: include/linux/dma/ti-cppi5.h:198 No description found for
 return value of 'cppi5_desc_get_type'
Warning: include/linux/dma/ti-cppi5.h:210 No description found for
 return value of 'cppi5_desc_get_errflags'
Warning: include/linux/dma/ti-cppi5.h:448 expecting prototype for
 cppi5_hdesc_get_errflags(). Prototype was for cppi5_hdesc_get_pkttype()
 instead
Warning: include/linux/dma/ti-cppi5.h:460 expecting prototype for
 cppi5_hdesc_get_errflags(). Prototype was for cppi5_hdesc_set_pkttype()
 instead
Warning: include/linux/dma/ti-cppi5.h:1053 expecting prototype for
 cppi5_tr_cflag_set(). Prototype was for cppi5_tr_csf_set() instead
Warning: include/linux/dma/ti-cppi5.h:651 Enum value 'CPPI5_TR_TYPE_MAX'
 not described in enum 'cppi5_tr_types'
Warning: include/linux/dma/ti-cppi5.h:676 Enum value
 'CPPI5_TR_EVENT_SIZE_MAX' not described in enum 'cppi5_tr_event_size'
Warning: include/linux/dma/ti-cppi5.h:693 Enum value 'CPPI5_TR_TRIGGER_MAX'
 not described in enum 'cppi5_tr_trigger'
Warning: include/linux/dma/ti-cppi5.h:714 Enum value
 'CPPI5_TR_TRIGGER_TYPE_MAX' not described in enum 'cppi5_tr_trigger_type'
Warning: include/linux/dma/ti-cppi5.h:890 Enum value
 'CPPI5_TR_RESPONSE_STATUS_MAX' not described in enum
 'cppi5_tr_resp_status_type'
Warning: include/linux/dma/ti-cppi5.h:906 Enum value
 'CPPI5_TR_RESPONSE_STATUS_SUBMISSION_MAX' not described in enum
 'cppi5_tr_resp_status_submission'
Warning: include/linux/dma/ti-cppi5.h:934 Enum value
 'CPPI5_TR_RESPONSE_STATUS_UNSUPPORTED_MAX' not described in enum
 'cppi5_tr_resp_status_unsupported'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260301011213.3063688-1-rdunlap@infradead.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dma/ti-cppi5.h | 53 +++++++++++++++++++++++++++-----------------
 1 file changed, 33 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma/ti-cppi5.h b/include/linux/dma/ti-cppi5.h
index c53c0f6e3b1a..3fe19b75ddf7 100644
--- a/include/linux/dma/ti-cppi5.h
+++ b/include/linux/dma/ti-cppi5.h
@@ -16,8 +16,8 @@
  * struct cppi5_desc_hdr_t - Descriptor header, present in all types of
  *			     descriptors
  * @pkt_info0:		Packet info word 0 (n/a in Buffer desc)
- * @pkt_info0:		Packet info word 1 (n/a in Buffer desc)
- * @pkt_info0:		Packet info word 2 (n/a in Buffer desc)
+ * @pkt_info1:		Packet info word 1 (n/a in Buffer desc)
+ * @pkt_info2:		Packet info word 2 (n/a in Buffer desc)
  * @src_dst_tag:	Packet info word 3 (n/a in Buffer desc)
  */
 struct cppi5_desc_hdr_t {
@@ -35,7 +35,7 @@ struct cppi5_desc_hdr_t {
  * @buf_info1:		word 8: Buffer valid data length
  * @org_buf_len:	word 9: Original buffer length
  * @org_buf_ptr:	word 10/11: Original buffer pointer
- * @epib[0]:		Extended Packet Info Data (optional, 4 words), and/or
+ * @epib:		Extended Packet Info Data (optional, 4 words), and/or
  *			Protocol Specific Data (optional, 0-128 bytes in
  *			multiples of 4), and/or
  *			Other Software Data (0-N bytes, optional)
@@ -132,7 +132,7 @@ struct cppi5_desc_epib_t {
 /**
  * struct cppi5_monolithic_desc_t - Monolithic-mode packet descriptor
  * @hdr:		Descriptor header
- * @epib[0]:		Extended Packet Info Data (optional, 4 words), and/or
+ * @epib:		Extended Packet Info Data (optional, 4 words), and/or
  *			Protocol Specific Data (optional, 0-128 bytes in
  *			multiples of 4), and/or
  *			Other Software Data (0-N bytes, optional)
@@ -179,7 +179,7 @@ static inline void cppi5_desc_dump(void *desc, u32 size)
  * cppi5_desc_is_tdcm - check if the paddr indicates Teardown Complete Message
  * @paddr: Physical address of the packet popped from the ring
  *
- * Returns true if the address indicates TDCM
+ * Returns: true if the address indicates TDCM
  */
 static inline bool cppi5_desc_is_tdcm(dma_addr_t paddr)
 {
@@ -190,7 +190,7 @@ static inline bool cppi5_desc_is_tdcm(dma_addr_t paddr)
  * cppi5_desc_get_type - get descriptor type
  * @desc_hdr: packet descriptor/TR header
  *
- * Returns descriptor type:
+ * Returns: descriptor type:
  * CPPI5_INFO0_DESC_TYPE_VAL_HOST
  * CPPI5_INFO0_DESC_TYPE_VAL_MONO
  * CPPI5_INFO0_DESC_TYPE_VAL_TR
@@ -205,7 +205,7 @@ static inline u32 cppi5_desc_get_type(struct cppi5_desc_hdr_t *desc_hdr)
  * cppi5_desc_get_errflags - get Error Flags from Desc
  * @desc_hdr: packet/TR descriptor header
  *
- * Returns Error Flags from Packet/TR Descriptor
+ * Returns: Error Flags from Packet/TR Descriptor
  */
 static inline u32 cppi5_desc_get_errflags(struct cppi5_desc_hdr_t *desc_hdr)
 {
@@ -307,7 +307,7 @@ static inline void cppi5_desc_set_tags_ids(struct cppi5_desc_hdr_t *desc_hdr,
  * @psdata_size: PSDATA size
  * @sw_data_size: SWDATA size
  *
- * Returns required Host Packet Descriptor size
+ * Returns: required Host Packet Descriptor size
  * 0 - if PSDATA > CPPI5_INFO0_HDESC_PSDATA_MAX_SIZE
  */
 static inline u32 cppi5_hdesc_calc_size(bool epib, u32 psdata_size,
@@ -381,6 +381,8 @@ cppi5_hdesc_update_psdata_size(struct cppi5_host_desc_t *desc, u32 psdata_size)
 /**
  * cppi5_hdesc_get_psdata_size - get PSdata size in bytes
  * @desc: Host packet descriptor
+ *
+ * Returns: PSdata size in bytes
  */
 static inline u32 cppi5_hdesc_get_psdata_size(struct cppi5_host_desc_t *desc)
 {
@@ -398,7 +400,7 @@ static inline u32 cppi5_hdesc_get_psdata_size(struct cppi5_host_desc_t *desc)
  * cppi5_hdesc_get_pktlen - get Packet Length from HDesc
  * @desc: Host packet descriptor
  *
- * Returns Packet Length from Host Packet Descriptor
+ * Returns: Packet Length from Host Packet Descriptor
  */
 static inline u32 cppi5_hdesc_get_pktlen(struct cppi5_host_desc_t *desc)
 {
@@ -408,6 +410,7 @@ static inline u32 cppi5_hdesc_get_pktlen(struct cppi5_host_desc_t *desc)
 /**
  * cppi5_hdesc_set_pktlen - set Packet Length in HDesc
  * @desc: Host packet descriptor
+ * @pkt_len: Packet length to set
  */
 static inline void cppi5_hdesc_set_pktlen(struct cppi5_host_desc_t *desc,
 					  u32 pkt_len)
@@ -420,7 +423,7 @@ static inline void cppi5_hdesc_set_pktlen(struct cppi5_host_desc_t *desc,
  * cppi5_hdesc_get_psflags - get Protocol Specific Flags from HDesc
  * @desc: Host packet descriptor
  *
- * Returns Protocol Specific Flags from Host Packet Descriptor
+ * Returns: Protocol Specific Flags from Host Packet Descriptor
  */
 static inline u32 cppi5_hdesc_get_psflags(struct cppi5_host_desc_t *desc)
 {
@@ -431,6 +434,7 @@ static inline u32 cppi5_hdesc_get_psflags(struct cppi5_host_desc_t *desc)
 /**
  * cppi5_hdesc_set_psflags - set Protocol Specific Flags in HDesc
  * @desc: Host packet descriptor
+ * @ps_flags: Protocol Specific flags to set
  */
 static inline void cppi5_hdesc_set_psflags(struct cppi5_host_desc_t *desc,
 					   u32 ps_flags)
@@ -442,8 +446,10 @@ static inline void cppi5_hdesc_set_psflags(struct cppi5_host_desc_t *desc,
 }
 
 /**
- * cppi5_hdesc_get_errflags - get Packet Type from HDesc
+ * cppi5_hdesc_get_pkttype - get Packet Type from HDesc
  * @desc: Host packet descriptor
+ *
+ * Returns: Packet type
  */
 static inline u32 cppi5_hdesc_get_pkttype(struct cppi5_host_desc_t *desc)
 {
@@ -452,7 +458,7 @@ static inline u32 cppi5_hdesc_get_pkttype(struct cppi5_host_desc_t *desc)
 }
 
 /**
- * cppi5_hdesc_get_errflags - set Packet Type in HDesc
+ * cppi5_hdesc_set_pkttype - set Packet Type in HDesc
  * @desc: Host packet descriptor
  * @pkt_type: Packet Type
  */
@@ -501,7 +507,7 @@ static inline void cppi5_hdesc_reset_to_original(struct cppi5_host_desc_t *desc)
 /**
  * cppi5_hdesc_link_hbdesc - link Host Buffer Descriptor to HDesc
  * @desc: Host Packet Descriptor
- * @buf_desc: Host Buffer Descriptor physical address
+ * @hbuf_desc: Host Buffer Descriptor physical address
  *
  * add and link Host Buffer Descriptor to HDesc
  */
@@ -527,7 +533,7 @@ static inline void cppi5_hdesc_reset_hbdesc(struct cppi5_host_desc_t *desc)
  * cppi5_hdesc_epib_present -  check if EPIB present
  * @desc_hdr: packet descriptor/TR header
  *
- * Returns true if EPIB present in the packet
+ * Returns: true if EPIB present in the packet
  */
 static inline bool cppi5_hdesc_epib_present(struct cppi5_desc_hdr_t *desc_hdr)
 {
@@ -538,7 +544,7 @@ static inline bool cppi5_hdesc_epib_present(struct cppi5_desc_hdr_t *desc_hdr)
  * cppi5_hdesc_get_psdata -  Get pointer on PSDATA
  * @desc: Host packet descriptor
  *
- * Returns pointer on PSDATA in HDesc.
+ * Returns: pointer on PSDATA in HDesc.
  * NULL - if ps_data placed at the start of data buffer.
  */
 static inline void *cppi5_hdesc_get_psdata(struct cppi5_host_desc_t *desc)
@@ -568,7 +574,7 @@ static inline void *cppi5_hdesc_get_psdata(struct cppi5_host_desc_t *desc)
  * cppi5_hdesc_get_swdata -  Get pointer on swdata
  * @desc: Host packet descriptor
  *
- * Returns pointer on SWDATA in HDesc.
+ * Returns: pointer on SWDATA in HDesc.
  * NOTE. It's caller responsibility to be sure hdesc actually has swdata.
  */
 static inline void *cppi5_hdesc_get_swdata(struct cppi5_host_desc_t *desc)
@@ -648,6 +654,7 @@ enum cppi5_tr_types {
 	CPPI5_TR_TYPE11,
 	/* type12-14: Reserved */
 	CPPI5_TR_TYPE15 = 15,
+	/* private: */
 	CPPI5_TR_TYPE_MAX
 };
 
@@ -673,6 +680,7 @@ enum cppi5_tr_event_size {
 	CPPI5_TR_EVENT_SIZE_ICNT1_DEC,
 	CPPI5_TR_EVENT_SIZE_ICNT2_DEC,
 	CPPI5_TR_EVENT_SIZE_ICNT3_DEC,
+	/* private: */
 	CPPI5_TR_EVENT_SIZE_MAX
 };
 
@@ -690,6 +698,7 @@ enum cppi5_tr_trigger {
 	CPPI5_TR_TRIGGER_GLOBAL0,
 	CPPI5_TR_TRIGGER_GLOBAL1,
 	CPPI5_TR_TRIGGER_LOCAL_EVENT,
+	/* private: */
 	CPPI5_TR_TRIGGER_MAX
 };
 
@@ -711,6 +720,7 @@ enum cppi5_tr_trigger_type {
 	CPPI5_TR_TRIGGER_TYPE_ICNT2_DEC,
 	CPPI5_TR_TRIGGER_TYPE_ICNT3_DEC,
 	CPPI5_TR_TRIGGER_TYPE_ALL,
+	/* private: */
 	CPPI5_TR_TRIGGER_TYPE_MAX
 };
 
@@ -815,7 +825,7 @@ struct cppi5_tr_type3_t {
  *			destination
  * @dicnt1:		Total loop iteration count for level 1 for destination
  * @dicnt2:		Total loop iteration count for level 2 for destination
- * @sicnt3:		Total loop iteration count for level 3 (outermost) for
+ * @dicnt3:		Total loop iteration count for level 3 (outermost) for
  *			destination
  */
 struct cppi5_tr_type15_t {
@@ -887,6 +897,7 @@ enum cppi5_tr_resp_status_type {
 	CPPI5_TR_RESPONSE_STATUS_UNSUPPORTED_ERR,
 	CPPI5_TR_RESPONSE_STATUS_TRANSFER_EXCEPTION,
 	CPPI5_TR_RESPONSE_STATUS__TEARDOWN_FLUSH,
+	/* private: */
 	CPPI5_TR_RESPONSE_STATUS_MAX
 };
 
@@ -903,6 +914,7 @@ enum cppi5_tr_resp_status_submission {
 	CPPI5_TR_RESPONSE_STATUS_SUBMISSION_ICNT0,
 	CPPI5_TR_RESPONSE_STATUS_SUBMISSION_FIFO_FULL,
 	CPPI5_TR_RESPONSE_STATUS_SUBMISSION_OWN,
+	/* private: */
 	CPPI5_TR_RESPONSE_STATUS_SUBMISSION_MAX
 };
 
@@ -931,6 +943,7 @@ enum cppi5_tr_resp_status_unsupported {
 	CPPI5_TR_RESPONSE_STATUS_UNSUPPORTED_DFMT,
 	CPPI5_TR_RESPONSE_STATUS_UNSUPPORTED_SECTR,
 	CPPI5_TR_RESPONSE_STATUS_UNSUPPORTED_AMODE_SPECIFIC,
+	/* private: */
 	CPPI5_TR_RESPONSE_STATUS_UNSUPPORTED_MAX
 };
 
@@ -939,7 +952,7 @@ enum cppi5_tr_resp_status_unsupported {
  * @tr_count: number of TR records
  * @tr_size: Nominal size of TR record (max) [16, 32, 64, 128]
  *
- * Returns required TR Descriptor size
+ * Returns: required TR Descriptor size
  */
 static inline size_t cppi5_trdesc_calc_size(u32 tr_count, u32 tr_size)
 {
@@ -955,7 +968,7 @@ static inline size_t cppi5_trdesc_calc_size(u32 tr_count, u32 tr_size)
 
 /**
  * cppi5_trdesc_init - Init TR Descriptor
- * @desc: TR Descriptor
+ * @desc_hdr: TR Descriptor
  * @tr_count: number of TR records
  * @tr_size: Nominal size of TR record (max) [16, 32, 64, 128]
  * @reload_idx: Absolute index to jump to on the 2nd and following passes
@@ -1044,7 +1057,7 @@ static inline void cppi5_tr_set_trigger(cppi5_tr_flags_t *flags,
 }
 
 /**
- * cppi5_tr_cflag_set - Update the Configuration specific flags
+ * cppi5_tr_csf_set - Update the Configuration specific flags
  * @flags: Pointer to the TR's flags
  * @csf: Configuration specific flags
  *
-- 
cgit v1.2.3


From 7b84a00dd3528d980f1f35fd2c5015f72dc3f62a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 28 Feb 2026 17:12:03 -0800
Subject: dmaengine: qcom: qcom-gpi-dma.h: fix all kernel-doc warnings

Add missing enum descriptions and spell one struct member correctly
to avoid kernel-doc warnings:

Warning: include/linux/dma/qcom-gpi-dma.h:15 Enum value 'SPI_TX' not
 described in enum 'spi_transfer_cmd'
Warning: include/linux/dma/qcom-gpi-dma.h:15 Enum value 'SPI_RX' not
 described in enum 'spi_transfer_cmd'
Warning: include/linux/dma/qcom-gpi-dma.h:15 Enum value 'SPI_DUPLEX' not
 described in enum 'spi_transfer_cmd'
Warning: include/linux/dma/qcom-gpi-dma.h:80 struct member 'multi_msg' not
 described in 'gpi_i2c_config'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260301011203.3062658-1-rdunlap@infradead.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dma/qcom-gpi-dma.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h
index 6680dd1a43c6..332be28427e4 100644
--- a/include/linux/dma/qcom-gpi-dma.h
+++ b/include/linux/dma/qcom-gpi-dma.h
@@ -8,6 +8,9 @@
 
 /**
  * enum spi_transfer_cmd - spi transfer commands
+ * @SPI_TX: SPI peripheral TX command
+ * @SPI_RX: SPI peripheral RX command
+ * @SPI_DUPLEX: SPI peripheral Duplex command
  */
 enum spi_transfer_cmd {
 	SPI_TX = 1,
@@ -64,7 +67,7 @@ enum i2c_op {
  * @set_config: set peripheral config
  * @rx_len: receive length for buffer
  * @op: i2c cmd
- * @muli-msg: is part of multi i2c r-w msgs
+ * @multi_msg: is part of multi i2c r-w msgs
  */
 struct gpi_i2c_config {
 	u8 set_config;
-- 
cgit v1.2.3


From 4d94ce88c77e74830a5b9d02ecb8286039ffa494 Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Wed, 25 Feb 2026 09:17:00 +1100
Subject: VFS: unexport lock_rename(), lock_rename_child(), unlock_rename()

These three function are now only used in namei.c, so they don't need to
be exported.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neil@brown.name>
Link: https://patch.msgid.link/20260224222542.3458677-16-neilb@ownmail.net
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 Documentation/filesystems/porting.rst | 7 +++++++
 fs/namei.c                            | 9 +++------
 include/linux/namei.h                 | 3 ---
 3 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 1dd31ab417a2..d02aa57e4477 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1368,3 +1368,10 @@ lifetime, consider using inode_set_cached_link() instead.
 
 lookup_one_qstr_excl() is no longer exported - use start_creating() or
 similar.
+---
+
+** mandatory**
+
+lock_rename(), lock_rename_child(), unlock_rename() are no
+longer available.  Use start_renaming() or similar.
+
diff --git a/fs/namei.c b/fs/namei.c
index a5daa62399d7..77189335bbcc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3775,7 +3775,7 @@ static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
 /*
  * p1 and p2 should be directories on the same fs.
  */
-struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
+static struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 {
 	if (p1 == p2) {
 		inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
@@ -3785,12 +3785,11 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 	mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
 	return lock_two_directories(p1, p2);
 }
-EXPORT_SYMBOL(lock_rename);
 
 /*
  * c1 and p2 should be on the same fs.
  */
-struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2)
+static struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2)
 {
 	if (READ_ONCE(c1->d_parent) == p2) {
 		/*
@@ -3827,9 +3826,8 @@ struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2)
 	mutex_unlock(&c1->d_sb->s_vfs_rename_mutex);
 	return NULL;
 }
-EXPORT_SYMBOL(lock_rename_child);
 
-void unlock_rename(struct dentry *p1, struct dentry *p2)
+static void unlock_rename(struct dentry *p1, struct dentry *p2)
 {
 	inode_unlock(p1->d_inode);
 	if (p1 != p2) {
@@ -3837,7 +3835,6 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
 		mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
 	}
 }
-EXPORT_SYMBOL(unlock_rename);
 
 /**
  * __start_renaming - lookup and lock names for rename
diff --git a/include/linux/namei.h b/include/linux/namei.h
index c7a7288cdd25..2ad6dd9987b9 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -165,9 +165,6 @@ extern int follow_down_one(struct path *);
 extern int follow_down(struct path *path, unsigned int flags);
 extern int follow_up(struct path *);
 
-extern struct dentry *lock_rename(struct dentry *, struct dentry *);
-extern struct dentry *lock_rename_child(struct dentry *, struct dentry *);
-extern void unlock_rename(struct dentry *, struct dentry *);
 int start_renaming(struct renamedata *rd, int lookup_flags,
 		   struct qstr *old_last, struct qstr *new_last);
 int start_renaming_dentry(struct renamedata *rd, int lookup_flags,
-- 
cgit v1.2.3


From 5645f805927c9bd4443e6143e487ef3ffea34aaf Mon Sep 17 00:00:00 2001
From: Linus Walleij <linusw@kernel.org>
Date: Fri, 6 Mar 2026 14:22:00 +0100
Subject: gpio: Document line value semantics

It is not clearly documented that the GPIO driver API expect the
driver to get/set the physical level of the GPIO line and the
consumer API will get/set the logic level. Document this in
relevant places.

Reported-by: David Jander <david@protonic.nl>
Signed-off-by: Linus Walleij <linusw@kernel.org>
Link: https://patch.msgid.link/20260306-gpio-doc-levels-v1-1-19928739e400@kernel.org
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
 Documentation/driver-api/gpio/driver.rst | 27 +++++++++++++++++++++++++++
 include/linux/gpio/driver.h              | 10 ++++++++--
 2 files changed, 35 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index 85d86f92c41b..a4f160b95089 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -87,6 +87,33 @@ atomic context on realtime kernels (inside hard IRQ handlers and similar
 contexts). Normally this should not be required.
 
 
+GPIO level semantics
+--------------------
+
+The gpip_chip .get/set[_multiple]() line values are clamped to the boolean
+space [0, 1], low level or high level.
+
+Low and high values are defined as physical low on the line in/out to the
+connector such as a physical pad, pin or rail.
+
+The GPIO library has internal logic to handle lines that are active low, such
+as indicated by overstrike or #name in a schematic, and the driver should not
+try to second-guess the logic value of a line.
+
+The way GPIO values are handled by the consumers is that the library present
+the *logical* value to the consumer. A line is *asserted* if its *logical*
+value is 1, and *de-asserted* if its logical value is 0. If inversion is
+required, this is handled by gpiolib and configured using hardware descriptions
+such as device tree or ACPI that can clearly indicate if a line is active
+high or low.
+
+Since electronics commonly insert inverters as driving stages or protection
+buffers in front of a GPIO line it is necessary that this semantic is part
+of the hardware description, so that consumers such as kernel drivers need
+not worry about this, and can for example assert a RESET line tied to a GPIO
+pin by setting it to logic 1 even if it is physically active low.
+
+
 GPIO electrical configuration
 -----------------------------
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 5f5ddcbfa445..17511434ed07 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -343,11 +343,17 @@ struct gpio_irq_chip {
  * @direction_output: configures signal "offset" as output, returns 0 on
  *	success or a negative error number. This can be omitted on input-only
  *	or output-only gpio chips.
- * @get: returns value for signal "offset", 0=low, 1=high, or negative error
+ * @get: returns value for signal "offset", 0=low, 1=high, or negative error.
+ *	the low and high values are defined as physical low on the line
+ *	in/out to the connector such as a physical pad, pin or rail. The GPIO
+ *	library has internal logic to handle lines that are active low, such
+ *	as indicated by overstrike or #name in a schematic, and the driver
+ *	should not try to second-guess the logic value of a line.
  * @get_multiple: reads values for multiple signals defined by "mask" and
  *	stores them in "bits", returns 0 on success or negative error
  * @set: assigns output value for signal "offset", returns 0 on success or
- *       negative error value
+ *	negative error value. The output value follows the same semantic
+ *	rules as for @get.
  * @set_multiple: assigns output values for multiple signals defined by
  *                "mask", returns 0 on success or negative error value
  * @set_config: optional hook for all kinds of settings. Uses the same
-- 
cgit v1.2.3


From ad9d28e68f4f9d15b9bde15e1ab79a4f85eff60e Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Date: Fri, 6 Mar 2026 18:22:47 +0100
Subject: reset: gpio: simplify fallback device matching

The of_args field of struct reset_controller_dev was introduced to allow
the reset-gpio driver to pass the phandle arguments back to reset core.
The thing is: it doesn't even have to do it. The core sets the platform
data of the auxiliary device *AND* has access to it later on during the
lookup. This means the field is unneeded and all can happen entirely in
reset core.

Remove the field from the public header and don't set it in
reset-gpio.c. Retrieve the platform data in reset core when needed
instead.

Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/reset/core.c             | 15 ++++++---------
 drivers/reset/reset-gpio.c       |  5 -----
 include/linux/reset-controller.h |  4 ----
 3 files changed, 6 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/reset/core.c b/drivers/reset/core.c
index 3845e77a8d32..954df36a242e 100644
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -94,9 +94,6 @@ static const char *rcdev_name(struct reset_controller_dev *rcdev)
 	if (rcdev->of_node)
 		return rcdev->of_node->full_name;
 
-	if (rcdev->of_args)
-		return rcdev->of_args->np->full_name;
-
 	return NULL;
 }
 
@@ -125,9 +122,6 @@ static int of_reset_simple_xlate(struct reset_controller_dev *rcdev,
  */
 int reset_controller_register(struct reset_controller_dev *rcdev)
 {
-	if (rcdev->of_node && rcdev->of_args)
-		return -EINVAL;
-
 	if (!rcdev->of_xlate) {
 		rcdev->of_reset_n_cells = 1;
 		rcdev->of_xlate = of_reset_simple_xlate;
@@ -1006,13 +1000,16 @@ static struct reset_controller_dev *__reset_find_rcdev(const struct of_phandle_a
 						       bool gpio_fallback)
 {
 	struct reset_controller_dev *rcdev;
+	struct of_phandle_args *rc_args;
 
 	lockdep_assert_held(&reset_list_mutex);
 
 	list_for_each_entry(rcdev, &reset_controller_list, list) {
-		if (gpio_fallback) {
-			if (rcdev->of_args && of_phandle_args_equal(args,
-								    rcdev->of_args))
+		if (gpio_fallback && rcdev->dev &&
+		    device_is_compatible(rcdev->dev, "reset-gpio")) {
+			rc_args = dev_get_platdata(rcdev->dev);
+
+			if (of_phandle_args_equal(args, rc_args))
 				return rcdev;
 		} else {
 			if (args->np == rcdev->of_node)
diff --git a/drivers/reset/reset-gpio.c b/drivers/reset/reset-gpio.c
index ad5bfe27aaef..6e1c4f990bc0 100644
--- a/drivers/reset/reset-gpio.c
+++ b/drivers/reset/reset-gpio.c
@@ -56,12 +56,8 @@ static int reset_gpio_probe(struct auxiliary_device *adev,
 			    const struct auxiliary_device_id *id)
 {
 	struct device *dev = &adev->dev;
-	struct of_phandle_args *platdata = dev_get_platdata(dev);
 	struct reset_gpio_priv *priv;
 
-	if (!platdata)
-		return -EINVAL;
-
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
@@ -76,7 +72,6 @@ static int reset_gpio_probe(struct auxiliary_device *adev,
 	priv->rc.ops = &reset_gpio_ops;
 	priv->rc.owner = THIS_MODULE;
 	priv->rc.dev = dev;
-	priv->rc.of_args = platdata;
 
 	/* Cells to match GPIO specifier, but it's not really used */
 	priv->rc.of_reset_n_cells = 2;
diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h
index 46514cb1b9e0..aa95b460fdf8 100644
--- a/include/linux/reset-controller.h
+++ b/include/linux/reset-controller.h
@@ -35,9 +35,6 @@ struct of_phandle_args;
  * @reset_control_head: head of internal list of requested reset controls
  * @dev: corresponding driver model device struct
  * @of_node: corresponding device tree node as phandle target
- * @of_args: for reset-gpios controllers: corresponding phandle args with
- *           of_node and GPIO number complementing of_node; either this or
- *           of_node should be present
  * @of_reset_n_cells: number of cells in reset line specifiers
  * @of_xlate: translation function to translate from specifier as found in the
  *            device tree to id as given to the reset control ops, defaults
@@ -51,7 +48,6 @@ struct reset_controller_dev {
 	struct list_head reset_control_head;
 	struct device *dev;
 	struct device_node *of_node;
-	const struct of_phandle_args *of_args;
 	int of_reset_n_cells;
 	int (*of_xlate)(struct reset_controller_dev *rcdev,
 			const struct of_phandle_args *reset_spec);
-- 
cgit v1.2.3


From 44a0acb2caca3bfd0ca459fbf0b19be75f1819c0 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Date: Fri, 6 Mar 2026 18:22:53 +0100
Subject: reset: protect struct reset_controller_dev with its own mutex

Currently we use a single, global mutex - misleadingly names
reset_list_mutex - to protect the global list of reset devices,
per-controller list of reset control handles and also internal fields of
struct reset_control. Locking can be made a lot more fine-grained if we
use a separate mutex for serializing operations on the list AND
accessing the reset controller device.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/reset/core.c             | 44 ++++++++++++++++++++++++----------------
 include/linux/reset-controller.h |  3 +++
 2 files changed, 30 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/reset/core.c b/drivers/reset/core.c
index e6c12fbebca9..acd9d10b1ceb 100644
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -131,6 +131,7 @@ int reset_controller_register(struct reset_controller_dev *rcdev)
 	}
 
 	INIT_LIST_HEAD(&rcdev->reset_control_head);
+	mutex_init(&rcdev->lock);
 
 	guard(mutex)(&reset_list_mutex);
 
@@ -143,6 +144,8 @@ EXPORT_SYMBOL_GPL(reset_controller_register);
 static void reset_controller_remove(struct reset_controller_dev *rcdev,
 				    struct reset_control *rstc)
 {
+	lockdep_assert_held(&rcdev->lock);
+
 	list_del(&rstc->list);
 	module_put(rcdev->owner);
 	put_device(rcdev->dev);
@@ -156,19 +159,22 @@ void reset_controller_unregister(struct reset_controller_dev *rcdev)
 {
 	struct reset_control *rstc, *pos;
 
-	guard(mutex)(&reset_list_mutex);
-
-	list_del(&rcdev->list);
+	scoped_guard(mutex, &reset_list_mutex)
+		list_del(&rcdev->list);
 
-	/*
-	 * Numb but don't free the remaining reset control handles that are
-	 * still held by consumers.
-	 */
-	list_for_each_entry_safe(rstc, pos, &rcdev->reset_control_head, list) {
-		rcu_assign_pointer(rstc->rcdev, NULL);
-		synchronize_srcu(&rstc->srcu);
-		reset_controller_remove(rcdev, rstc);
+	scoped_guard(mutex, &rcdev->lock) {
+		/*
+		 * Numb but don't free the remaining reset control handles that are
+		 * still held by consumers.
+		 */
+		list_for_each_entry_safe(rstc, pos, &rcdev->reset_control_head, list) {
+			rcu_assign_pointer(rstc->rcdev, NULL);
+			synchronize_srcu(&rstc->srcu);
+			reset_controller_remove(rcdev, rstc);
+		}
 	}
+
+	mutex_destroy(&rcdev->lock);
 }
 EXPORT_SYMBOL_GPL(reset_controller_unregister);
 
@@ -712,10 +718,12 @@ int reset_control_acquire(struct reset_control *rstc)
 	if (!rcdev)
 		return -ENODEV;
 
-	list_for_each_entry(rc, &rcdev->reset_control_head, list) {
-		if (rstc != rc && rstc->id == rc->id) {
-			if (rc->acquired)
-				return -EBUSY;
+	scoped_guard(mutex, &rcdev->lock) {
+		list_for_each_entry(rc, &rcdev->reset_control_head, list) {
+			if (rstc != rc && rstc->id == rc->id) {
+				if (rc->acquired)
+					return -EBUSY;
+			}
 		}
 	}
 
@@ -806,7 +814,7 @@ __reset_control_get_internal(struct reset_controller_dev *rcdev,
 	struct reset_control *rstc;
 	int ret;
 
-	lockdep_assert_held(&reset_list_mutex);
+	lockdep_assert_held(&rcdev->lock);
 
 	/* Expect callers to filter out OPTIONAL and DEASSERTED bits */
 	if (WARN_ON(flags & ~(RESET_CONTROL_FLAGS_BIT_SHARED |
@@ -868,8 +876,10 @@ static void __reset_control_release(struct kref *kref)
 
 	scoped_guard(srcu, &rstc->srcu) {
 		rcdev = rcu_replace_pointer(rstc->rcdev, NULL, true);
-		if (rcdev)
+		if (rcdev) {
+			guard(mutex)(&rcdev->lock);
 			reset_controller_remove(rcdev, rstc);
+		}
 	}
 
 	synchronize_srcu(&rstc->srcu);
diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h
index aa95b460fdf8..185d2a9bd7cd 100644
--- a/include/linux/reset-controller.h
+++ b/include/linux/reset-controller.h
@@ -3,6 +3,7 @@
 #define _LINUX_RESET_CONTROLLER_H_
 
 #include <linux/list.h>
+#include <linux/mutex.h>
 
 struct reset_controller_dev;
 
@@ -40,6 +41,7 @@ struct of_phandle_args;
  *            device tree to id as given to the reset control ops, defaults
  *            to :c:func:`of_reset_simple_xlate`.
  * @nr_resets: number of reset controls in this reset controller device
+ * @lock: protects the reset control list from concurrent access
  */
 struct reset_controller_dev {
 	const struct reset_control_ops *ops;
@@ -52,6 +54,7 @@ struct reset_controller_dev {
 	int (*of_xlate)(struct reset_controller_dev *rcdev,
 			const struct of_phandle_args *reset_spec);
 	unsigned int nr_resets;
+	struct mutex lock;
 };
 
 #if IS_ENABLED(CONFIG_RESET_CONTROLLER)
-- 
cgit v1.2.3


From ba8dbbb14b7e6734afbb5ba37d0679831aa3d590 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Date: Fri, 6 Mar 2026 18:22:56 +0100
Subject: reset: convert the core API to using firmware nodes

In order to simplify the commit converting the internals of reset core
to using firmware nodes, first convert the user-facing API. Modify the
signature of the core consumer functions but leave the specialized
wrappers as is to avoid modifying users for now.

No functional change intended.

Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 Documentation/driver-api/reset.rst |  1 -
 drivers/reset/core.c               | 33 ++++++++++++++++-------------
 include/linux/reset.h              | 43 +++++++++++++++++++++++++-------------
 3 files changed, 46 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/reset.rst b/Documentation/driver-api/reset.rst
index f773100daaa4..7a6571849664 100644
--- a/Documentation/driver-api/reset.rst
+++ b/Documentation/driver-api/reset.rst
@@ -198,7 +198,6 @@ query the reset line status using reset_control_status().
                reset_control_rearm
                reset_control_put
                of_reset_control_get_count
-               of_reset_control_array_get
                devm_reset_control_array_get
                reset_control_get_count
 
diff --git a/drivers/reset/core.c b/drivers/reset/core.c
index f1b644a86ad0..0da5079ea9db 100644
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -1061,7 +1061,7 @@ static int __reset_add_reset_gpio_device(struct device_node *np,
 	rgpio_dev->of_args = *args;
 	/*
 	 * We keep the device_node reference, but of_args.np is put at the end
-	 * of __of_reset_control_get(), so get it one more time.
+	 * of __fwnode_reset_control_get(), so get it one more time.
 	 * Hold reference as long as rgpio_dev memory is valid.
 	 */
 	of_node_get(rgpio_dev->of_args.np);
@@ -1115,18 +1115,19 @@ static struct reset_controller_dev *__reset_find_rcdev(const struct of_phandle_a
 }
 
 struct reset_control *
-__of_reset_control_get(struct device_node *node, const char *id, int index,
-		       enum reset_control_flags flags)
+__fwnode_reset_control_get(struct fwnode_handle *fwnode, const char *id, int index,
+			   enum reset_control_flags flags)
 {
 	bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL;
 	bool gpio_fallback = false;
+	struct device_node *node = to_of_node(fwnode);
 	struct reset_control *rstc = ERR_PTR(-EINVAL);
 	struct reset_controller_dev *rcdev;
 	struct of_phandle_args args;
 	int rstc_id;
 	int ret;
 
-	if (!node)
+	if (!fwnode)
 		return ERR_PTR(-EINVAL);
 
 	if (id) {
@@ -1193,7 +1194,7 @@ out_put:
 
 	return rstc;
 }
-EXPORT_SYMBOL_GPL(__of_reset_control_get);
+EXPORT_SYMBOL_GPL(__fwnode_reset_control_get);
 
 struct reset_control *__reset_control_get(struct device *dev, const char *id,
 					  int index, enum reset_control_flags flags)
@@ -1201,12 +1202,13 @@ struct reset_control *__reset_control_get(struct device *dev, const char *id,
 	bool shared = flags & RESET_CONTROL_FLAGS_BIT_SHARED;
 	bool acquired = flags & RESET_CONTROL_FLAGS_BIT_ACQUIRED;
 	bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL;
+	struct fwnode_handle *fwnode = dev_fwnode(dev);
 
 	if (WARN_ON(shared && acquired))
 		return ERR_PTR(-EINVAL);
 
-	if (dev->of_node)
-		return __of_reset_control_get(dev->of_node, id, index, flags);
+	if (fwnode)
+		return __fwnode_reset_control_get(fwnode, id, index, flags);
 
 	return optional ? NULL : ERR_PTR(-ENOENT);
 }
@@ -1468,23 +1470,24 @@ static int fwnode_reset_control_get_count(struct fwnode_handle *fwnode)
 }
 
 /**
- * of_reset_control_array_get - Get a list of reset controls using
- *				device node.
+ * fwnode_reset_control_array_get - Get a list of reset controls using
+ *                                  a firmware node.
  *
- * @np: device node for the device that requests the reset controls array
+ * @fwnode: firmware node for the device that requests the reset controls array
  * @flags: whether reset controls are shared, optional, acquired
  *
  * Returns pointer to allocated reset_control on success or error on failure
  */
 struct reset_control *
-of_reset_control_array_get(struct device_node *np, enum reset_control_flags flags)
+fwnode_reset_control_array_get(struct fwnode_handle *fwnode,
+			       enum reset_control_flags flags)
 {
 	bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL;
 	struct reset_control_array *resets;
 	struct reset_control *rstc;
 	int num, i;
 
-	num = fwnode_reset_control_get_count(of_fwnode_handle(np));
+	num = fwnode_reset_control_get_count(fwnode);
 	if (num < 0)
 		return optional ? NULL : ERR_PTR(num);
 
@@ -1494,7 +1497,7 @@ of_reset_control_array_get(struct device_node *np, enum reset_control_flags flag
 	resets->num_rstcs = num;
 
 	for (i = 0; i < num; i++) {
-		rstc = __of_reset_control_get(np, NULL, i, flags);
+		rstc = __fwnode_reset_control_get(fwnode, NULL, i, flags);
 		if (IS_ERR(rstc))
 			goto err_rst;
 		resets->rstc[i] = rstc;
@@ -1511,7 +1514,7 @@ err_rst:
 
 	return rstc;
 }
-EXPORT_SYMBOL_GPL(of_reset_control_array_get);
+EXPORT_SYMBOL_GPL(fwnode_reset_control_array_get);
 
 /**
  * devm_reset_control_array_get - Resource managed reset control array get
@@ -1535,7 +1538,7 @@ devm_reset_control_array_get(struct device *dev, enum reset_control_flags flags)
 	if (!ptr)
 		return ERR_PTR(-ENOMEM);
 
-	rstc = of_reset_control_array_get(dev->of_node, flags);
+	rstc = fwnode_reset_control_array_get(dev_fwnode(dev), flags);
 	if (IS_ERR_OR_NULL(rstc)) {
 		devres_free(ptr);
 		return rstc;
diff --git a/include/linux/reset.h b/include/linux/reset.h
index 44f9e3415f92..9c391cf0c822 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -5,10 +5,12 @@
 #include <linux/bits.h>
 #include <linux/err.h>
 #include <linux/errno.h>
+#include <linux/of.h>
 #include <linux/types.h>
 
 struct device;
 struct device_node;
+struct fwnode_handle;
 struct reset_control;
 
 /**
@@ -84,7 +86,7 @@ int reset_control_bulk_deassert(int num_rstcs, struct reset_control_bulk_data *r
 int reset_control_bulk_acquire(int num_rstcs, struct reset_control_bulk_data *rstcs);
 void reset_control_bulk_release(int num_rstcs, struct reset_control_bulk_data *rstcs);
 
-struct reset_control *__of_reset_control_get(struct device_node *node,
+struct reset_control *__fwnode_reset_control_get(struct fwnode_handle *fwnode,
 				     const char *id, int index, enum reset_control_flags flags);
 struct reset_control *__reset_control_get(struct device *dev, const char *id,
 					  int index, enum reset_control_flags flags);
@@ -103,7 +105,8 @@ int __devm_reset_control_bulk_get(struct device *dev, int num_rstcs,
 
 struct reset_control *devm_reset_control_array_get(struct device *dev,
 						   enum reset_control_flags flags);
-struct reset_control *of_reset_control_array_get(struct device_node *np, enum reset_control_flags);
+struct reset_control *fwnode_reset_control_array_get(struct fwnode_handle *fwnode,
+						     enum reset_control_flags);
 
 int reset_control_get_count(struct device *dev);
 
@@ -152,8 +155,8 @@ static inline int __device_reset(struct device *dev, bool optional)
 	return optional ? 0 : -ENOTSUPP;
 }
 
-static inline struct reset_control *__of_reset_control_get(
-					struct device_node *node,
+static inline struct reset_control *__fwnode_reset_control_get(
+					struct fwnode_handle *fwnode,
 					const char *id, int index, enum reset_control_flags flags)
 {
 	bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL;
@@ -242,7 +245,7 @@ devm_reset_control_array_get(struct device *dev, enum reset_control_flags flags)
 }
 
 static inline struct reset_control *
-of_reset_control_array_get(struct device_node *np, enum reset_control_flags flags)
+fwnode_reset_control_array_get(struct fwnode_handle *fwnode, enum reset_control_flags flags)
 {
 	bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL;
 
@@ -500,7 +503,8 @@ reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs,
 static inline struct reset_control *of_reset_control_get_exclusive(
 				struct device_node *node, const char *id)
 {
-	return __of_reset_control_get(node, id, 0, RESET_CONTROL_EXCLUSIVE);
+	return __fwnode_reset_control_get(of_fwnode_handle(node), id, 0,
+					  RESET_CONTROL_EXCLUSIVE);
 }
 
 /**
@@ -520,7 +524,8 @@ static inline struct reset_control *of_reset_control_get_exclusive(
 static inline struct reset_control *of_reset_control_get_optional_exclusive(
 				struct device_node *node, const char *id)
 {
-	return __of_reset_control_get(node, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE);
+	return __fwnode_reset_control_get(of_fwnode_handle(node), id, 0,
+					  RESET_CONTROL_OPTIONAL_EXCLUSIVE);
 }
 
 /**
@@ -545,7 +550,8 @@ static inline struct reset_control *of_reset_control_get_optional_exclusive(
 static inline struct reset_control *of_reset_control_get_shared(
 				struct device_node *node, const char *id)
 {
-	return __of_reset_control_get(node, id, 0, RESET_CONTROL_SHARED);
+	return __fwnode_reset_control_get(of_fwnode_handle(node), id, 0,
+					  RESET_CONTROL_SHARED);
 }
 
 /**
@@ -562,7 +568,8 @@ static inline struct reset_control *of_reset_control_get_shared(
 static inline struct reset_control *of_reset_control_get_exclusive_by_index(
 					struct device_node *node, int index)
 {
-	return __of_reset_control_get(node, NULL, index, RESET_CONTROL_EXCLUSIVE);
+	return __fwnode_reset_control_get(of_fwnode_handle(node), NULL, index,
+					  RESET_CONTROL_EXCLUSIVE);
 }
 
 /**
@@ -590,7 +597,8 @@ static inline struct reset_control *of_reset_control_get_exclusive_by_index(
 static inline struct reset_control *of_reset_control_get_shared_by_index(
 					struct device_node *node, int index)
 {
-	return __of_reset_control_get(node, NULL, index, RESET_CONTROL_SHARED);
+	return __fwnode_reset_control_get(of_fwnode_handle(node), NULL, index,
+					  RESET_CONTROL_SHARED);
 }
 
 /**
@@ -1032,30 +1040,35 @@ devm_reset_control_array_get_optional_shared(struct device *dev)
 static inline struct reset_control *
 of_reset_control_array_get_exclusive(struct device_node *node)
 {
-	return of_reset_control_array_get(node, RESET_CONTROL_EXCLUSIVE);
+	return fwnode_reset_control_array_get(of_fwnode_handle(node),
+					      RESET_CONTROL_EXCLUSIVE);
 }
 
 static inline struct reset_control *
 of_reset_control_array_get_exclusive_released(struct device_node *node)
 {
-	return of_reset_control_array_get(node, RESET_CONTROL_EXCLUSIVE_RELEASED);
+	return fwnode_reset_control_array_get(of_fwnode_handle(node),
+					      RESET_CONTROL_EXCLUSIVE_RELEASED);
 }
 
 static inline struct reset_control *
 of_reset_control_array_get_shared(struct device_node *node)
 {
-	return of_reset_control_array_get(node, RESET_CONTROL_SHARED);
+	return fwnode_reset_control_array_get(of_fwnode_handle(node),
+					      RESET_CONTROL_SHARED);
 }
 
 static inline struct reset_control *
 of_reset_control_array_get_optional_exclusive(struct device_node *node)
 {
-	return of_reset_control_array_get(node, RESET_CONTROL_OPTIONAL_EXCLUSIVE);
+	return fwnode_reset_control_array_get(of_fwnode_handle(node),
+					      RESET_CONTROL_OPTIONAL_EXCLUSIVE);
 }
 
 static inline struct reset_control *
 of_reset_control_array_get_optional_shared(struct device_node *node)
 {
-	return of_reset_control_array_get(node, RESET_CONTROL_OPTIONAL_SHARED);
+	return fwnode_reset_control_array_get(of_fwnode_handle(node),
+					      RESET_CONTROL_OPTIONAL_SHARED);
 }
 #endif
-- 
cgit v1.2.3


From 9035073d0ef1de813c6335239250248bfe0a64aa Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Date: Fri, 6 Mar 2026 18:22:57 +0100
Subject: reset: convert reset core to using firmware nodes

With everything else now in place, we can convert the remaining parts of
the reset subsystem to becoming fwnode-agnostic - meaning it will work
with all kinds of firmware nodes, not only devicetree.

To that end: extend struct reset_controller_dev with fields taking
information relevant for using firmware nodes (which mirrors what we
already do for OF-nodes) and limit using of_ APIs only to where it's
absolutely necessary (mostly around the of_xlate callback).

For backward compatibility of existing drivers we still support OF-nodes
but firmware nodes become the preferred method.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/reset/core.c             | 166 +++++++++++++++++++++++----------------
 include/linux/reset-controller.h |  14 +++-
 2 files changed, 112 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/reset/core.c b/drivers/reset/core.c
index 0da5079ea9db..e625cf59cfb0 100644
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -81,13 +81,13 @@ struct reset_control_array {
 
 /**
  * struct reset_gpio_lookup - lookup key for ad-hoc created reset-gpio devices
- * @of_args: phandle to the reset controller with all the args like GPIO number
+ * @ref_args: Reference to the reset controller with all the args like GPIO number
  * @swnode: Software node containing the reference to the GPIO provider
  * @list: list entry for the reset_gpio_lookup_list
  * @adev: Auxiliary device representing the reset controller
  */
 struct reset_gpio_lookup {
-	struct of_phandle_args of_args;
+	struct fwnode_reference_args ref_args;
 	struct fwnode_handle *swnode;
 	struct list_head list;
 	struct auxiliary_device adev;
@@ -98,24 +98,24 @@ static const char *rcdev_name(struct reset_controller_dev *rcdev)
 	if (rcdev->dev)
 		return dev_name(rcdev->dev);
 
-	if (rcdev->of_node)
-		return rcdev->of_node->full_name;
+	if (rcdev->fwnode)
+		return fwnode_get_name(rcdev->fwnode);
 
 	return NULL;
 }
 
 /**
- * of_reset_simple_xlate - translate reset_spec to the reset line number
+ * fwnode_reset_simple_xlate - translate reset_spec to the reset line number
  * @rcdev: a pointer to the reset controller device
- * @reset_spec: reset line specifier as found in the device tree
+ * @reset_spec: reset line specifier as found in firmware
  *
- * This static translation function is used by default if of_xlate in
- * :c:type:`reset_controller_dev` is not set. It is useful for all reset
- * controllers with 1:1 mapping, where reset lines can be indexed by number
- * without gaps.
+ * This static translation function is used by default if neither fwnode_xlate
+ * not of_xlate in :c:type:`reset_controller_dev` is not set. It is useful for
+ * all reset controllers with 1:1 mapping, where reset lines can be indexed by
+ * number without gaps.
  */
-static int of_reset_simple_xlate(struct reset_controller_dev *rcdev,
-				 const struct of_phandle_args *reset_spec)
+static int fwnode_reset_simple_xlate(struct reset_controller_dev *rcdev,
+				     const struct fwnode_reference_args *reset_spec)
 {
 	if (reset_spec->args[0] >= rcdev->nr_resets)
 		return -EINVAL;
@@ -129,9 +129,23 @@ static int of_reset_simple_xlate(struct reset_controller_dev *rcdev,
  */
 int reset_controller_register(struct reset_controller_dev *rcdev)
 {
-	if (!rcdev->of_xlate) {
-		rcdev->of_reset_n_cells = 1;
-		rcdev->of_xlate = of_reset_simple_xlate;
+	if ((rcdev->of_node && rcdev->fwnode) || (rcdev->of_xlate && rcdev->fwnode_xlate))
+		return -EINVAL;
+
+	if (!rcdev->of_node && !rcdev->fwnode) {
+		rcdev->fwnode = dev_fwnode(rcdev->dev);
+		if (!rcdev->fwnode)
+			return -EINVAL;
+	}
+
+	if (rcdev->of_node) {
+		rcdev->fwnode = of_fwnode_handle(rcdev->of_node);
+		rcdev->fwnode_reset_n_cells = rcdev->of_reset_n_cells;
+	}
+
+	if (rcdev->fwnode && !rcdev->fwnode_xlate) {
+		rcdev->fwnode_reset_n_cells = 1;
+		rcdev->fwnode_xlate = fwnode_reset_simple_xlate;
 	}
 
 	INIT_LIST_HEAD(&rcdev->reset_control_head);
@@ -931,7 +945,7 @@ static int reset_create_gpio_aux_device(struct reset_gpio_lookup *rgpio_dev,
 	adev->id = id;
 	adev->name = "gpio";
 	adev->dev.parent = parent;
-	adev->dev.platform_data = &rgpio_dev->of_args;
+	adev->dev.platform_data = &rgpio_dev->ref_args;
 	adev->dev.release = reset_gpio_aux_device_release;
 	device_set_node(&adev->dev, rgpio_dev->swnode);
 
@@ -951,18 +965,18 @@ static int reset_create_gpio_aux_device(struct reset_gpio_lookup *rgpio_dev,
 	return 0;
 }
 
-static void reset_gpio_add_devlink(struct device_node *np,
+static void reset_gpio_add_devlink(struct fwnode_handle *fwnode,
 				   struct reset_gpio_lookup *rgpio_dev)
 {
 	struct device *consumer;
 
 	/*
-	 * We must use get_dev_from_fwnode() and not of_find_device_by_node()
+	 * We must use get_dev_from_fwnode() and not ref_find_device_by_node()
 	 * because the latter only considers the platform bus while we want to
 	 * get consumers of any kind that can be associated with firmware
 	 * nodes: auxiliary, soundwire, etc.
 	 */
-	consumer = get_dev_from_fwnode(of_fwnode_handle(np));
+	consumer = get_dev_from_fwnode(fwnode);
 	if (consumer) {
 		if (!device_link_add(consumer, &rgpio_dev->adev.dev,
 				     DL_FLAG_AUTOREMOVE_CONSUMER))
@@ -982,15 +996,23 @@ static void reset_gpio_add_devlink(struct device_node *np,
 	 */
 }
 
+/* TODO: move it out into drivers/base/ */
+static bool fwnode_reference_args_equal(const struct fwnode_reference_args *left,
+					const struct fwnode_reference_args *right)
+{
+	return left->fwnode == right->fwnode && left->nargs == right->nargs &&
+	       !memcmp(left->args, right->args, sizeof(left->args[0]) * left->nargs);
+}
+
 /*
  * @np: OF-node associated with the consumer
- * @args: phandle to the GPIO provider with all the args like GPIO number
+ * @args: Reference to the GPIO provider with all the args like GPIO number
  */
-static int __reset_add_reset_gpio_device(struct device_node *np,
-					 const struct of_phandle_args *args)
+static int __reset_add_reset_gpio_device(struct fwnode_handle *fwnode,
+					 const struct fwnode_reference_args *args)
 {
 	struct property_entry properties[3] = { };
-	unsigned int offset, of_flags, lflags;
+	unsigned int offset, flags, lflags;
 	struct reset_gpio_lookup *rgpio_dev;
 	struct device *parent;
 	int ret, prop = 0;
@@ -1001,7 +1023,7 @@ static int __reset_add_reset_gpio_device(struct device_node *np,
 	 * args[1]: GPIO flags
 	 * TODO: Handle other cases.
 	 */
-	if (args->args_count != 2)
+	if (args->nargs != 2)
 		return -ENOENT;
 
 	/*
@@ -1012,7 +1034,7 @@ static int __reset_add_reset_gpio_device(struct device_node *np,
 	lockdep_assert_not_held(&reset_list_mutex);
 
 	offset = args->args[0];
-	of_flags = args->args[1];
+	flags = args->args[1];
 
 	/*
 	 * Later we map GPIO flags between OF and Linux, however not all
@@ -1022,33 +1044,31 @@ static int __reset_add_reset_gpio_device(struct device_node *np,
 	 * FIXME: Find a better way of translating OF flags to GPIO lookup
 	 * flags.
 	 */
-	if (of_flags > GPIO_ACTIVE_LOW) {
+	if (flags > GPIO_ACTIVE_LOW) {
 		pr_err("reset-gpio code does not support GPIO flags %u for GPIO %u\n",
-		       of_flags, offset);
+		       flags, offset);
 		return -EINVAL;
 	}
 
 	struct gpio_device *gdev __free(gpio_device_put) =
-		gpio_device_find_by_fwnode(of_fwnode_handle(args->np));
+			gpio_device_find_by_fwnode(args->fwnode);
 	if (!gdev)
 		return -EPROBE_DEFER;
 
 	guard(mutex)(&reset_gpio_lookup_mutex);
 
 	list_for_each_entry(rgpio_dev, &reset_gpio_lookup_list, list) {
-		if (args->np == rgpio_dev->of_args.np) {
-			if (of_phandle_args_equal(args, &rgpio_dev->of_args)) {
-				/*
-				 * Already on the list, create the device link
-				 * and stop here.
-				 */
-				reset_gpio_add_devlink(np, rgpio_dev);
-				return 0;
-			}
+		if (fwnode_reference_args_equal(args, &rgpio_dev->ref_args)) {
+			/*
+			 * Already on the list, create the device link
+			 * and stop here.
+			 */
+			reset_gpio_add_devlink(fwnode, rgpio_dev);
+			return 0;
 		}
 	}
 
-	lflags = GPIO_PERSISTENT | (of_flags & GPIO_ACTIVE_LOW);
+	lflags = GPIO_PERSISTENT | (flags & GPIO_ACTIVE_LOW);
 	parent = gpio_device_to_device(gdev);
 	properties[prop++] = PROPERTY_ENTRY_STRING("compatible", "reset-gpio");
 	properties[prop++] = PROPERTY_ENTRY_GPIO("reset-gpios", parent->fwnode, offset, lflags);
@@ -1058,43 +1078,43 @@ static int __reset_add_reset_gpio_device(struct device_node *np,
 	if (!rgpio_dev)
 		return -ENOMEM;
 
-	rgpio_dev->of_args = *args;
+	rgpio_dev->ref_args = *args;
 	/*
-	 * We keep the device_node reference, but of_args.np is put at the end
-	 * of __fwnode_reset_control_get(), so get it one more time.
+	 * We keep the fwnode_handle reference, but ref_args.fwnode is put at
+	 * the end of __fwnode_reset_control_get(), so get it one more time.
 	 * Hold reference as long as rgpio_dev memory is valid.
 	 */
-	of_node_get(rgpio_dev->of_args.np);
+	fwnode_handle_get(rgpio_dev->ref_args.fwnode);
 
 	rgpio_dev->swnode = fwnode_create_software_node(properties, NULL);
 	if (IS_ERR(rgpio_dev->swnode)) {
 		ret = PTR_ERR(rgpio_dev->swnode);
-		goto err_put_of_node;
+		goto err_put_fwnode;
 	}
 
 	ret = reset_create_gpio_aux_device(rgpio_dev, parent);
 	if (ret)
 		goto err_del_swnode;
 
-	reset_gpio_add_devlink(np, rgpio_dev);
+	reset_gpio_add_devlink(fwnode, rgpio_dev);
 	list_add(&rgpio_dev->list, &reset_gpio_lookup_list);
 
 	return 0;
 
 err_del_swnode:
 	fwnode_remove_software_node(rgpio_dev->swnode);
-err_put_of_node:
-	of_node_put(rgpio_dev->of_args.np);
+err_put_fwnode:
+	fwnode_handle_put(rgpio_dev->ref_args.fwnode);
 	kfree(rgpio_dev);
 
 	return ret;
 }
 
-static struct reset_controller_dev *__reset_find_rcdev(const struct of_phandle_args *args,
-						       bool gpio_fallback)
+static struct reset_controller_dev *
+__reset_find_rcdev(const struct fwnode_reference_args *args, bool gpio_fallback)
 {
+	struct fwnode_reference_args *rc_args;
 	struct reset_controller_dev *rcdev;
-	struct of_phandle_args *rc_args;
 
 	lockdep_assert_held(&reset_list_mutex);
 
@@ -1103,10 +1123,10 @@ static struct reset_controller_dev *__reset_find_rcdev(const struct of_phandle_a
 		    device_is_compatible(rcdev->dev, "reset-gpio")) {
 			rc_args = dev_get_platdata(rcdev->dev);
 
-			if (of_phandle_args_equal(args, rc_args))
+			if (fwnode_reference_args_equal(args, rc_args))
 				return rcdev;
 		} else {
-			if (args->np == rcdev->of_node)
+			if (args->fwnode == rcdev->fwnode)
 				return rcdev;
 		}
 	}
@@ -1120,27 +1140,26 @@ __fwnode_reset_control_get(struct fwnode_handle *fwnode, const char *id, int ind
 {
 	bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL;
 	bool gpio_fallback = false;
-	struct device_node *node = to_of_node(fwnode);
 	struct reset_control *rstc = ERR_PTR(-EINVAL);
 	struct reset_controller_dev *rcdev;
-	struct of_phandle_args args;
-	int rstc_id;
+	struct fwnode_reference_args args;
+	struct of_phandle_args of_args;
+	int rstc_id = -EINVAL;
 	int ret;
 
 	if (!fwnode)
 		return ERR_PTR(-EINVAL);
 
 	if (id) {
-		index = of_property_match_string(node,
-						 "reset-names", id);
+		index = fwnode_property_match_string(fwnode, "reset-names", id);
 		if (index == -EILSEQ)
 			return ERR_PTR(index);
 		if (index < 0)
 			return optional ? NULL : ERR_PTR(-ENOENT);
 	}
 
-	ret = of_parse_phandle_with_args(node, "resets", "#reset-cells",
-					 index, &args);
+	ret = fwnode_property_get_reference_args(fwnode, "resets", "#reset-cells",
+						 0, index, &args);
 	if (ret == -EINVAL)
 		return ERR_PTR(ret);
 	if (ret) {
@@ -1151,16 +1170,16 @@ __fwnode_reset_control_get(struct fwnode_handle *fwnode, const char *id, int ind
 		 * There can be only one reset-gpio for regular devices, so
 		 * don't bother with the "reset-gpios" phandle index.
 		 */
-		ret = of_parse_phandle_with_args(node, "reset-gpios", "#gpio-cells",
-						 0, &args);
+		ret = fwnode_property_get_reference_args(fwnode, "reset-gpios",
+							 "#gpio-cells", 0, 0, &args);
 		if (ret)
 			return optional ? NULL : ERR_PTR(ret);
 
 		gpio_fallback = true;
 
-		ret = __reset_add_reset_gpio_device(node, &args);
+		ret = __reset_add_reset_gpio_device(fwnode, &args);
 		if (ret) {
-			of_node_put(args.np);
+			fwnode_handle_put(args.fwnode);
 			return ERR_PTR(ret);
 		}
 	}
@@ -1173,15 +1192,30 @@ __fwnode_reset_control_get(struct fwnode_handle *fwnode, const char *id, int ind
 		goto out_put;
 	}
 
-	if (WARN_ON(args.args_count != rcdev->of_reset_n_cells)) {
+	if (WARN_ON(args.nargs != rcdev->fwnode_reset_n_cells)) {
 		rstc = ERR_PTR(-EINVAL);
 		goto out_put;
 	}
 
-	rstc_id = rcdev->of_xlate(rcdev, &args);
+	if (rcdev->of_xlate && is_of_node(fwnode)) {
+		ret = of_parse_phandle_with_args(to_of_node(fwnode),
+					 gpio_fallback ? "reset-gpios" : "resets",
+					 gpio_fallback ? "#gpio-cells" : "#reset-cells",
+					 gpio_fallback ? 0 : index,
+					 &of_args);
+		if (ret) {
+			rstc = ERR_PTR(ret);
+			goto out_put;
+		}
+
+		rstc_id = rcdev->of_xlate(rcdev, &of_args);
+		of_node_put(of_args.np);
+	} else if (rcdev->fwnode_xlate) {
+		rstc_id = rcdev->fwnode_xlate(rcdev, &args);
+	}
 	if (rstc_id < 0) {
 		rstc = ERR_PTR(rstc_id);
-		goto out_put;
+			goto out_put;
 	}
 
 	flags &= ~RESET_CONTROL_FLAGS_BIT_OPTIONAL;
@@ -1190,7 +1224,7 @@ __fwnode_reset_control_get(struct fwnode_handle *fwnode, const char *id, int ind
 		rstc = __reset_control_get_internal(rcdev, rstc_id, flags);
 
 out_put:
-	of_node_put(args.np);
+	fwnode_handle_put(args.fwnode);
 
 	return rstc;
 }
diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h
index 185d2a9bd7cd..52a5a4e81f18 100644
--- a/include/linux/reset-controller.h
+++ b/include/linux/reset-controller.h
@@ -5,6 +5,8 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 
+struct fwnode_handle;
+struct fwnode_reference_args;
 struct reset_controller_dev;
 
 /**
@@ -38,8 +40,12 @@ struct of_phandle_args;
  * @of_node: corresponding device tree node as phandle target
  * @of_reset_n_cells: number of cells in reset line specifiers
  * @of_xlate: translation function to translate from specifier as found in the
- *            device tree to id as given to the reset control ops, defaults
- *            to :c:func:`of_reset_simple_xlate`.
+ *            device tree to id as given to the reset control ops
+ * @fwnode: firmware node associated with this device
+ * @fwnode_reset_n_cells: number of cells in reset line specifiers
+ * @fwnode_xlate: translation function to translate from firmware specifier to
+ *                id as given to the reset control ops, defaults to
+ *                :c:func:`fwnode_reset_simple_xlate`
  * @nr_resets: number of reset controls in this reset controller device
  * @lock: protects the reset control list from concurrent access
  */
@@ -53,6 +59,10 @@ struct reset_controller_dev {
 	int of_reset_n_cells;
 	int (*of_xlate)(struct reset_controller_dev *rcdev,
 			const struct of_phandle_args *reset_spec);
+	struct fwnode_handle *fwnode;
+	int fwnode_reset_n_cells;
+	int (*fwnode_xlate)(struct reset_controller_dev *rcdev,
+			    const struct fwnode_reference_args *reset_spec);
 	unsigned int nr_resets;
 	struct mutex lock;
 };
-- 
cgit v1.2.3


From b6420bd5aa0c374331bad6c0fa2eb5f0f87cf5a0 Mon Sep 17 00:00:00 2001
From: Jialu Xu <xujialu@vimux.org>
Date: Sat, 7 Mar 2026 11:06:26 +0800
Subject: gpio: remove of_get_named_gpio() and <linux/of_gpio.h>

All in-tree consumers have been converted to the descriptor-based API.
Remove the deprecated of_get_named_gpio() helper, delete the
<linux/of_gpio.h> header, and drop the corresponding entry from
MAINTAINERS.

Also remove the completed TODO item for this cleanup.

Signed-off-by: Jialu Xu <xujialu@vimux.org>
Reviewed-by: Linus Walleij <linusw@kernel.org>
Link: https://patch.msgid.link/02ABDA1F9E3FAF1F+20260307030623.3495092-6-xujialu@vimux.org
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
 MAINTAINERS               |  1 -
 drivers/gpio/TODO         | 28 ----------------------------
 drivers/gpio/gpiolib-of.c | 27 ---------------------------
 include/linux/of_gpio.h   | 38 --------------------------------------
 4 files changed, 94 deletions(-)
 delete mode 100644 include/linux/of_gpio.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 55af015174a5..24b3f8d2a64c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10959,7 +10959,6 @@ F:	drivers/gpio/
 F:	include/dt-bindings/gpio/
 F:	include/linux/gpio.h
 F:	include/linux/gpio/
-F:	include/linux/of_gpio.h
 K:	(devm_)?gpio_(request|free|direction|get|set)
 K:	GPIOD_FLAGS_BIT_NONEXCLUSIVE
 K:	devm_gpiod_unhinge
diff --git a/drivers/gpio/TODO b/drivers/gpio/TODO
index 5acaeab029ec..7ce80fde1f17 100644
--- a/drivers/gpio/TODO
+++ b/drivers/gpio/TODO
@@ -58,34 +58,6 @@ Work items:
 
 -------------------------------------------------------------------------------
 
-Get rid of <linux/of_gpio.h>
-
-This header and helpers appeared at one point when there was no proper
-driver infrastructure for doing simpler MMIO GPIO devices and there was
-no core support for parsing device tree GPIOs from the core library with
-the [devm_]gpiod_get() calls we have today that will implicitly go into
-the device tree back-end. It is legacy and should not be used in new code.
-
-Work items:
-
-- Change all consumer drivers that #include <linux/of_gpio.h> to
-  #include <linux/gpio/consumer.h> and stop doing custom parsing of the
-  GPIO lines from the device tree. This can be tricky and often involves
-  changing board files, etc.
-
-- Pull semantics for legacy device tree (OF) GPIO lookups into
-  gpiolib-of.c: in some cases subsystems are doing custom flags and
-  lookups for polarity inversion, open drain and what not. As we now
-  handle this with generic OF bindings, pull all legacy handling into
-  gpiolib so the library API becomes narrow and deep and handle all
-  legacy bindings internally. (See e.g. commits 6953c57ab172,
-  6a537d48461d etc)
-
-- Delete <linux/of_gpio.h> when all the above is complete and everything
-  uses <linux/gpio/consumer.h> or <linux/gpio/driver.h> instead.
-
--------------------------------------------------------------------------------
-
 Collect drivers
 
 Collect GPIO drivers from arch/* and other places that should be placed
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 3bdd9af67447..c512d735e85f 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_gpio.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/slab.h>
 #include <linux/string.h>
@@ -446,32 +445,6 @@ out:
 	return desc;
 }
 
-/**
- * of_get_named_gpio() - Get a GPIO number to use with GPIO API
- * @np:		device node to get GPIO from
- * @propname:	Name of property containing gpio specifier(s)
- * @index:	index of the GPIO
- *
- * **DEPRECATED** This function is deprecated and must not be used in new code.
- *
- * Returns:
- * GPIO number to use with Linux generic GPIO API, or one of the errno
- * value on the error condition.
- */
-int of_get_named_gpio(const struct device_node *np, const char *propname,
-		      int index)
-{
-	struct gpio_desc *desc;
-
-	desc = of_get_named_gpiod_flags(np, propname, index, NULL);
-
-	if (IS_ERR(desc))
-		return PTR_ERR(desc);
-	else
-		return desc_to_gpio(desc);
-}
-EXPORT_SYMBOL_GPL(of_get_named_gpio);
-
 /* Converts gpio_lookup_flags into bitmask of GPIO_* values */
 static unsigned long of_convert_gpio_flags(enum of_gpio_flags flags)
 {
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
deleted file mode 100644
index d0f66a5e1b2a..000000000000
--- a/include/linux/of_gpio.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * OF helpers for the GPIO API
- *
- * Copyright (c) 2007-2008  MontaVista Software, Inc.
- *
- * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- */
-
-#ifndef __LINUX_OF_GPIO_H
-#define __LINUX_OF_GPIO_H
-
-#include <linux/compiler.h>
-#include <linux/gpio/driver.h>
-#include <linux/gpio.h>		/* FIXME: Shouldn't be here */
-#include <linux/of.h>
-
-struct device_node;
-
-#ifdef CONFIG_OF_GPIO
-
-extern int of_get_named_gpio(const struct device_node *np,
-			     const char *list_name, int index);
-
-#else /* CONFIG_OF_GPIO */
-
-#include <linux/errno.h>
-
-/* Drivers may not strictly depend on the GPIO support, so let them link. */
-static inline int of_get_named_gpio(const struct device_node *np,
-                                   const char *propname, int index)
-{
-	return -ENOSYS;
-}
-
-#endif /* CONFIG_OF_GPIO */
-
-#endif /* __LINUX_OF_GPIO_H */
-- 
cgit v1.2.3


From 9840bb66e7e5dffd72b03201318f154a10b06b4a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Sat, 7 Mar 2026 14:54:31 -0500
Subject: vfs: remove externs from fs.h on functions modified by i_ino widening

Christoph says, in response to one of the patches in the i_ino widening
series, which changes the prototype of several functions in fs.h:

    "Can you please drop all these pointless externs while you're at it?"

Remove extern keyword from functions touched by that patch (and a few
that happened to be nearby). Also add missing argument names to
declarations that lacked them.

Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://patch.msgid.link/20260307-iino-u64-v2-1-a1a1696e0653@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 62 +++++++++++++++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e820c14f9c5a..23f36a2613a3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2912,27 +2912,27 @@ static inline bool name_contains_dotdot(const char *name)
 #include <linux/err.h>
 
 /* needed for stackable file system support */
-extern loff_t default_llseek(struct file *file, loff_t offset, int whence);
+loff_t default_llseek(struct file *file, loff_t offset, int whence);
 
-extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
+loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
 
-extern int inode_init_always_gfp(struct super_block *, struct inode *, gfp_t);
+int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp);
 static inline int inode_init_always(struct super_block *sb, struct inode *inode)
 {
 	return inode_init_always_gfp(sb, inode, GFP_NOFS);
 }
 
-extern void inode_init_once(struct inode *);
-extern void address_space_init_once(struct address_space *mapping);
-extern struct inode * igrab(struct inode *);
-extern ino_t iunique(struct super_block *, ino_t);
-extern int inode_needs_sync(struct inode *inode);
-extern int inode_just_drop(struct inode *inode);
+void inode_init_once(struct inode *inode);
+void address_space_init_once(struct address_space *mapping);
+struct inode *igrab(struct inode *inode);
+ino_t iunique(struct super_block *sb, ino_t max_reserved);
+int inode_needs_sync(struct inode *inode);
+int inode_just_drop(struct inode *inode);
 static inline int inode_generic_drop(struct inode *inode)
 {
 	return !inode->i_nlink || inode_unhashed(inode);
 }
-extern void d_mark_dontcache(struct inode *inode);
+void d_mark_dontcache(struct inode *inode);
 
 struct inode *ilookup5_nowait(struct super_block *sb, u64 hashval,
 			      int (*test)(struct inode *, void *), void *data,
@@ -2944,31 +2944,31 @@ struct inode *ilookup(struct super_block *sb, u64 ino);
 struct inode *inode_insert5(struct inode *inode, u64 hashval,
 			    int (*test)(struct inode *, void *),
 			    int (*set)(struct inode *, void *), void *data);
-struct inode *iget5_locked(struct super_block *, u64,
+struct inode *iget5_locked(struct super_block *sb, u64 hashval,
 			   int (*test)(struct inode *, void *),
-			   int (*set)(struct inode *, void *), void *);
-struct inode *iget5_locked_rcu(struct super_block *, u64,
+			   int (*set)(struct inode *, void *), void *data);
+struct inode *iget5_locked_rcu(struct super_block *sb, u64 hashval,
 			       int (*test)(struct inode *, void *),
-			       int (*set)(struct inode *, void *), void *);
-struct inode *iget_locked(struct super_block *, u64);
-struct inode *find_inode_nowait(struct super_block *, u64,
+			       int (*set)(struct inode *, void *), void *data);
+struct inode *iget_locked(struct super_block *sb, u64 ino);
+struct inode *find_inode_nowait(struct super_block *sb, u64 hashval,
 				int (*match)(struct inode *, u64, void *),
 				void *data);
-struct inode *find_inode_rcu(struct super_block *, u64,
-			     int (*)(struct inode *, void *), void *);
-struct inode *find_inode_by_ino_rcu(struct super_block *, u64);
-int insert_inode_locked4(struct inode *, u64,
-			 int (*test)(struct inode *, void *), void *);
-extern int insert_inode_locked(struct inode *);
+struct inode *find_inode_rcu(struct super_block *sb, u64 hashval,
+			     int (*test)(struct inode *, void *), void *data);
+struct inode *find_inode_by_ino_rcu(struct super_block *sb, u64 ino);
+int insert_inode_locked4(struct inode *inode, u64 hashval,
+			 int (*test)(struct inode *, void *), void *data);
+int insert_inode_locked(struct inode *inode);
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
+void lockdep_annotate_inode_mutex_key(struct inode *inode);
 #else
 static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
 #endif
-extern void unlock_new_inode(struct inode *);
-extern void discard_new_inode(struct inode *);
-extern unsigned int get_next_ino(void);
-extern void evict_inodes(struct super_block *sb);
+void unlock_new_inode(struct inode *inode);
+void discard_new_inode(struct inode *inode);
+unsigned int get_next_ino(void);
+void evict_inodes(struct super_block *sb);
 void dump_mapping(const struct address_space *);
 
 /*
@@ -3013,21 +3013,21 @@ int setattr_should_drop_sgid(struct mnt_idmap *idmap,
  */
 #define alloc_inode_sb(_sb, _cache, _gfp) kmem_cache_alloc_lru(_cache, &_sb->s_inode_lru, _gfp)
 
-void __insert_inode_hash(struct inode *, u64 hashval);
+void __insert_inode_hash(struct inode *inode, u64 hashval);
 static inline void insert_inode_hash(struct inode *inode)
 {
 	__insert_inode_hash(inode, inode->i_ino);
 }
 
-extern void __remove_inode_hash(struct inode *);
+void __remove_inode_hash(struct inode *inode);
 static inline void remove_inode_hash(struct inode *inode)
 {
 	if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash))
 		__remove_inode_hash(inode);
 }
 
-extern void inode_sb_list_add(struct inode *inode);
-extern void inode_lru_list_add(struct inode *inode);
+void inode_sb_list_add(struct inode *inode);
+void inode_lru_list_add(struct inode *inode);
 
 int generic_file_mmap(struct file *, struct vm_area_struct *);
 int generic_file_mmap_prepare(struct vm_area_desc *desc);
-- 
cgit v1.2.3


From a6fe20d67dc7d512f9b5dc11c5777fb1e1ff70ce Mon Sep 17 00:00:00 2001
From: Maciej Strozek <mstrozek@opensource.cirrus.com>
Date: Fri, 6 Mar 2026 15:28:10 +0000
Subject: mfd: cs42l43: Add support for the B variant

Introducing CS42L43B codec, a variant of CS42L43 which can be driven by
the same driver.

Changes in CS42L43 driver specific for CS42L43B:
- Decimator 1 and 2 are dedicated to ADC, can't be selected for PDM
- Decimators 3 and 4 are connected to PDM1
- Added Decimator 5 and 6 for PDM2
- Supports SoundWire Clock Gearing
- Updated ROM requiring no patching
- Reduced RAM space
- Each ISRC has 4 decimators now

Signed-off-by: Maciej Strozek <mstrozek@opensource.cirrus.com>
Acked-by: Lee Jones <lee@kernel.org>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://patch.msgid.link/20260306152829.3130530-4-mstrozek@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/mfd/cs42l43-i2c.c        |  7 ++-
 drivers/mfd/cs42l43-sdw.c        |  4 +-
 drivers/mfd/cs42l43.c            | 93 ++++++++++++++++++++++++++++++++++------
 drivers/mfd/cs42l43.h            |  2 +-
 include/linux/mfd/cs42l43-regs.h | 76 ++++++++++++++++++++++++++++++++
 include/linux/mfd/cs42l43.h      |  1 +
 6 files changed, 166 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/cs42l43-i2c.c b/drivers/mfd/cs42l43-i2c.c
index a2ab001a600a..0a0ab5e549a5 100644
--- a/drivers/mfd/cs42l43-i2c.c
+++ b/drivers/mfd/cs42l43-i2c.c
@@ -47,6 +47,7 @@ static int cs42l43_i2c_probe(struct i2c_client *i2c)
 	cs42l43->irq = i2c->irq;
 	/* A device on an I2C is always attached by definition. */
 	cs42l43->attached = true;
+	cs42l43->variant_id = (long)device_get_match_data(cs42l43->dev);
 
 	cs42l43->regmap = devm_regmap_init_i2c(i2c, &cs42l43_i2c_regmap);
 	if (IS_ERR(cs42l43->regmap))
@@ -58,7 +59,8 @@ static int cs42l43_i2c_probe(struct i2c_client *i2c)
 
 #if IS_ENABLED(CONFIG_OF)
 static const struct of_device_id cs42l43_of_match[] = {
-	{ .compatible = "cirrus,cs42l43", },
+	{ .compatible = "cirrus,cs42l43", .data = (void *)CS42L43_DEVID_VAL },
+	{ .compatible = "cirrus,cs42l43b", .data = (void *)CS42L43B_DEVID_VAL },
 	{}
 };
 MODULE_DEVICE_TABLE(of, cs42l43_of_match);
@@ -66,7 +68,8 @@ MODULE_DEVICE_TABLE(of, cs42l43_of_match);
 
 #if IS_ENABLED(CONFIG_ACPI)
 static const struct acpi_device_id cs42l43_acpi_match[] = {
-	{ "CSC4243", 0 },
+	{ "CSC4243", CS42L43_DEVID_VAL },
+	{ "CSC2A3B", CS42L43B_DEVID_VAL },
 	{}
 };
 MODULE_DEVICE_TABLE(acpi, cs42l43_acpi_match);
diff --git a/drivers/mfd/cs42l43-sdw.c b/drivers/mfd/cs42l43-sdw.c
index 023f7e1a30f8..794c98378175 100644
--- a/drivers/mfd/cs42l43-sdw.c
+++ b/drivers/mfd/cs42l43-sdw.c
@@ -178,6 +178,7 @@ static int cs42l43_sdw_probe(struct sdw_slave *sdw, const struct sdw_device_id *
 
 	cs42l43->dev = dev;
 	cs42l43->sdw = sdw;
+	cs42l43->variant_id = (long)id->driver_data;
 
 	cs42l43->regmap = devm_regmap_init_sdw(sdw, &cs42l43_sdw_regmap);
 	if (IS_ERR(cs42l43->regmap))
@@ -188,7 +189,8 @@ static int cs42l43_sdw_probe(struct sdw_slave *sdw, const struct sdw_device_id *
 }
 
 static const struct sdw_device_id cs42l43_sdw_id[] = {
-	SDW_SLAVE_ENTRY(0x01FA, 0x4243, 0),
+	SDW_SLAVE_ENTRY(0x01FA, 0x4243, (void *) CS42L43_DEVID_VAL),
+	SDW_SLAVE_ENTRY(0x01FA, 0x2A3B, (void *) CS42L43B_DEVID_VAL),
 	{}
 };
 MODULE_DEVICE_TABLE(sdw, cs42l43_sdw_id);
diff --git a/drivers/mfd/cs42l43.c b/drivers/mfd/cs42l43.c
index 107cfb983fec..166881751e69 100644
--- a/drivers/mfd/cs42l43.c
+++ b/drivers/mfd/cs42l43.c
@@ -115,9 +115,14 @@ const struct reg_default cs42l43_reg_default[CS42L43_N_DEFAULTS] = {
 	{ CS42L43_DECIM_HPF_WNF_CTRL2,			0x00000001 },
 	{ CS42L43_DECIM_HPF_WNF_CTRL3,			0x00000001 },
 	{ CS42L43_DECIM_HPF_WNF_CTRL4,			0x00000001 },
+	{ CS42L43B_DECIM_HPF_WNF_CTRL5,			0x00000001 },
+	{ CS42L43B_DECIM_HPF_WNF_CTRL6,			0x00000001 },
 	{ CS42L43_DMIC_PDM_CTRL,			0x00000000 },
 	{ CS42L43_DECIM_VOL_CTRL_CH1_CH2,		0x20122012 },
 	{ CS42L43_DECIM_VOL_CTRL_CH3_CH4,		0x20122012 },
+	{ CS42L43B_DECIM_VOL_CTRL_CH1_CH2,		0x20122012 },
+	{ CS42L43B_DECIM_VOL_CTRL_CH3_CH4,		0x20122012 },
+	{ CS42L43B_DECIM_VOL_CTRL_CH5_CH6,		0x20122012 },
 	{ CS42L43_INTP_VOLUME_CTRL1,			0x00000180 },
 	{ CS42L43_INTP_VOLUME_CTRL2,			0x00000180 },
 	{ CS42L43_AMP1_2_VOL_RAMP,			0x00000022 },
@@ -155,8 +160,12 @@ const struct reg_default cs42l43_reg_default[CS42L43_N_DEFAULTS] = {
 	{ CS42L43_SWIRE_DP2_CH2_INPUT,			0x00000000 },
 	{ CS42L43_SWIRE_DP3_CH1_INPUT,			0x00000000 },
 	{ CS42L43_SWIRE_DP3_CH2_INPUT,			0x00000000 },
+	{ CS42L43B_SWIRE_DP3_CH3_INPUT,			0x00000000 },
+	{ CS42L43B_SWIRE_DP3_CH4_INPUT,			0x00000000 },
 	{ CS42L43_SWIRE_DP4_CH1_INPUT,			0x00000000 },
 	{ CS42L43_SWIRE_DP4_CH2_INPUT,			0x00000000 },
+	{ CS42L43B_SWIRE_DP4_CH3_INPUT,			0x00000000 },
+	{ CS42L43B_SWIRE_DP4_CH4_INPUT,			0x00000000 },
 	{ CS42L43_ASRC_INT1_INPUT1,			0x00000000 },
 	{ CS42L43_ASRC_INT2_INPUT1,			0x00000000 },
 	{ CS42L43_ASRC_INT3_INPUT1,			0x00000000 },
@@ -169,10 +178,14 @@ const struct reg_default cs42l43_reg_default[CS42L43_N_DEFAULTS] = {
 	{ CS42L43_ISRC1INT2_INPUT1,			0x00000000 },
 	{ CS42L43_ISRC1DEC1_INPUT1,			0x00000000 },
 	{ CS42L43_ISRC1DEC2_INPUT1,			0x00000000 },
+	{ CS42L43B_ISRC1DEC3_INPUT1,			0x00000000 },
+	{ CS42L43B_ISRC1DEC4_INPUT1,			0x00000000 },
 	{ CS42L43_ISRC2INT1_INPUT1,			0x00000000 },
 	{ CS42L43_ISRC2INT2_INPUT1,			0x00000000 },
 	{ CS42L43_ISRC2DEC1_INPUT1,			0x00000000 },
 	{ CS42L43_ISRC2DEC2_INPUT1,			0x00000000 },
+	{ CS42L43B_ISRC2DEC3_INPUT1,			0x00000000 },
+	{ CS42L43B_ISRC2DEC4_INPUT1,			0x00000000 },
 	{ CS42L43_EQ1MIX_INPUT1,			0x00800000 },
 	{ CS42L43_EQ1MIX_INPUT2,			0x00800000 },
 	{ CS42L43_EQ1MIX_INPUT3,			0x00800000 },
@@ -269,6 +282,8 @@ EXPORT_SYMBOL_NS_GPL(cs42l43_reg_default, "MFD_CS42L43");
 
 bool cs42l43_readable_register(struct device *dev, unsigned int reg)
 {
+	struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+
 	switch (reg) {
 	case CS42L43_DEVID:
 	case CS42L43_REVID:
@@ -292,7 +307,6 @@ bool cs42l43_readable_register(struct device *dev, unsigned int reg)
 	case CS42L43_ADC_B_CTRL1 ...  CS42L43_ADC_B_CTRL2:
 	case CS42L43_DECIM_HPF_WNF_CTRL1 ... CS42L43_DECIM_HPF_WNF_CTRL4:
 	case CS42L43_DMIC_PDM_CTRL:
-	case CS42L43_DECIM_VOL_CTRL_CH1_CH2 ... CS42L43_DECIM_VOL_CTRL_CH3_CH4:
 	case CS42L43_INTP_VOLUME_CTRL1 ... CS42L43_INTP_VOLUME_CTRL2:
 	case CS42L43_AMP1_2_VOL_RAMP:
 	case CS42L43_ASP_CTRL:
@@ -387,8 +401,16 @@ bool cs42l43_readable_register(struct device *dev, unsigned int reg)
 	case CS42L43_BOOT_CONTROL:
 	case CS42L43_BLOCK_EN:
 	case CS42L43_SHUTTER_CONTROL:
-	case CS42L43_MCU_SW_REV ... CS42L43_MCU_RAM_MAX:
-		return true;
+	case CS42L43B_MCU_SW_REV ... CS42L43B_MCU_RAM_MAX:
+		return true; // registers present on all variants
+	case CS42L43_MCU_SW_REV ... CS42L43B_MCU_SW_REV - 1:
+	case CS42L43B_MCU_RAM_MAX + 1 ... CS42L43_MCU_RAM_MAX:
+	case CS42L43_DECIM_VOL_CTRL_CH1_CH2 ... CS42L43_DECIM_VOL_CTRL_CH3_CH4:
+		return cs42l43->variant_id == CS42L43_DEVID_VAL; // regs only in CS42L43 variant
+	case CS42L43B_DECIM_VOL_CTRL_CH1_CH2 ... CS42L43B_DECIM_HPF_WNF_CTRL6:
+	case CS42L43B_SWIRE_DP3_CH3_INPUT ... CS42L43B_SWIRE_DP4_CH4_INPUT:
+	case CS42L43B_ISRC1DEC3_INPUT1 ... CS42L43B_ISRC2DEC4_INPUT1:
+		return cs42l43->variant_id == CS42L43B_DEVID_VAL; // regs only in CS42L43B variant
 	default:
 		return false;
 	}
@@ -597,15 +619,27 @@ static int cs42l43_wait_for_attach(struct cs42l43 *cs42l43)
 static int cs42l43_mcu_stage_2_3(struct cs42l43 *cs42l43, bool shadow)
 {
 	unsigned int need_reg = CS42L43_NEED_CONFIGS;
+	unsigned int boot_reg;
 	unsigned int val;
 	int ret;
 
-	if (shadow)
-		need_reg = CS42L43_FW_SH_BOOT_CFG_NEED_CONFIGS;
+	switch (cs42l43->variant_id) {
+	case CS42L43_DEVID_VAL:
+		if (shadow)
+			need_reg = CS42L43_FW_SH_BOOT_CFG_NEED_CONFIGS;
+		boot_reg = CS42L43_BOOT_STATUS;
+		break;
+	case CS42L43B_DEVID_VAL:
+		need_reg = CS42L43B_NEED_CONFIGS;
+		boot_reg = CS42L43B_BOOT_STATUS;
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	regmap_write(cs42l43->regmap, need_reg, 0);
 
-	ret = regmap_read_poll_timeout(cs42l43->regmap, CS42L43_BOOT_STATUS,
+	ret = regmap_read_poll_timeout(cs42l43->regmap, boot_reg,
 				       val, (val == CS42L43_MCU_BOOT_STAGE3),
 				       CS42L43_MCU_POLL_US, CS42L43_MCU_CMD_TIMEOUT_US);
 	if (ret) {
@@ -644,13 +678,25 @@ static int cs42l43_mcu_stage_3_2(struct cs42l43 *cs42l43)
  */
 static int cs42l43_mcu_disable(struct cs42l43 *cs42l43)
 {
-	unsigned int val;
+	unsigned int val, cfg_reg, ctrl_reg;
 	int ret;
 
-	regmap_write(cs42l43->regmap, CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_REG,
-		     CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_DISABLE_VAL);
-	regmap_write(cs42l43->regmap, CS42L43_FW_MISSION_CTRL_MM_CTRL_SELECTION,
-		     CS42L43_FW_MM_CTRL_MCU_SEL_MASK);
+	switch (cs42l43->variant_id) {
+	case CS42L43_DEVID_VAL:
+		cfg_reg = CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_REG;
+		ctrl_reg = CS42L43_FW_MISSION_CTRL_MM_CTRL_SELECTION;
+		break;
+	case CS42L43B_DEVID_VAL:
+		cfg_reg = CS42L43B_FW_MISSION_CTRL_MM_MCU_CFG_REG;
+		ctrl_reg = CS42L43B_FW_MISSION_CTRL_MM_CTRL_SELECTION;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	regmap_write(cs42l43->regmap, cfg_reg, CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_DISABLE_VAL);
+	regmap_write(cs42l43->regmap, ctrl_reg, CS42L43_FW_MM_CTRL_MCU_SEL_MASK);
+
 	regmap_write(cs42l43->regmap, CS42L43_MCU_SW_INTERRUPT, CS42L43_CONTROL_IND_MASK);
 	regmap_write(cs42l43->regmap, CS42L43_MCU_SW_INTERRUPT, 0);
 
@@ -740,18 +786,32 @@ static int cs42l43_mcu_update_step(struct cs42l43 *cs42l43)
 {
 	unsigned int mcu_rev, bios_rev, boot_status, secure_cfg;
 	bool patched, shadow;
+	int boot_status_reg, mcu_sw_rev_reg;
 	int ret;
 
+	switch (cs42l43->variant_id) {
+	case CS42L43_DEVID_VAL:
+		boot_status_reg = CS42L43_BOOT_STATUS;
+		mcu_sw_rev_reg = CS42L43_MCU_SW_REV;
+		break;
+	case CS42L43B_DEVID_VAL:
+		boot_status_reg = CS42L43B_BOOT_STATUS;
+		mcu_sw_rev_reg = CS42L43B_MCU_SW_REV;
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	/* Clear any stale software interrupt bits. */
 	regmap_read(cs42l43->regmap, CS42L43_SOFT_INT, &mcu_rev);
 
-	ret = regmap_read(cs42l43->regmap, CS42L43_BOOT_STATUS, &boot_status);
+	ret = regmap_read(cs42l43->regmap, boot_status_reg, &boot_status);
 	if (ret) {
 		dev_err(cs42l43->dev, "Failed to read boot status: %d\n", ret);
 		return ret;
 	}
 
-	ret = regmap_read(cs42l43->regmap, CS42L43_MCU_SW_REV, &mcu_rev);
+	ret = regmap_read(cs42l43->regmap, mcu_sw_rev_reg, &mcu_rev);
 	if (ret) {
 		dev_err(cs42l43->dev, "Failed to read firmware revision: %d\n", ret);
 		return ret;
@@ -918,6 +978,13 @@ static void cs42l43_boot_work(struct work_struct *work)
 
 	switch (devid) {
 	case CS42L43_DEVID_VAL:
+	case CS42L43B_DEVID_VAL:
+		if (devid != cs42l43->variant_id) {
+			dev_err(cs42l43->dev,
+				"Device ID (0x%06x) does not match variant ID (0x%06lx)\n",
+				devid, cs42l43->variant_id);
+			goto err;
+		}
 		break;
 	default:
 		dev_err(cs42l43->dev, "Unrecognised devid: 0x%06x\n", devid);
diff --git a/drivers/mfd/cs42l43.h b/drivers/mfd/cs42l43.h
index f3da783930f5..a0068f6572e2 100644
--- a/drivers/mfd/cs42l43.h
+++ b/drivers/mfd/cs42l43.h
@@ -9,7 +9,7 @@
 #ifndef CS42L43_CORE_INT_H
 #define CS42L43_CORE_INT_H
 
-#define CS42L43_N_DEFAULTS 176
+#define CS42L43_N_DEFAULTS 189
 
 struct dev_pm_ops;
 struct device;
diff --git a/include/linux/mfd/cs42l43-regs.h b/include/linux/mfd/cs42l43-regs.h
index c39a49269cb7..68831f113589 100644
--- a/include/linux/mfd/cs42l43-regs.h
+++ b/include/linux/mfd/cs42l43-regs.h
@@ -1181,4 +1181,80 @@
 /* CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_REG */
 #define CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_DISABLE_VAL		0xF05AA50F
 
+/* CS42L43B VARIANT REGISTERS */
+#define CS42L43B_DEVID_VAL					0x0042A43B
+
+#define CS42L43B_DECIM_VOL_CTRL_CH1_CH2				0x00008280
+#define CS42L43B_DECIM_VOL_CTRL_CH3_CH4				0x00008284
+
+#define CS42L43B_DECIM_VOL_CTRL_CH5_CH6				0x00008290
+#define CS42L43B_DECIM_VOL_CTRL_UPDATE				0x0000829C
+
+#define CS42L43B_DECIM_HPF_WNF_CTRL5				0x000082A0
+#define CS42L43B_DECIM_HPF_WNF_CTRL6				0x000082A4
+
+#define CS42L43B_SWIRE_DP3_CH3_INPUT				0x0000C320
+#define CS42L43B_SWIRE_DP3_CH4_INPUT				0x0000C330
+#define CS42L43B_SWIRE_DP4_CH3_INPUT				0x0000C340
+#define CS42L43B_SWIRE_DP4_CH4_INPUT				0x0000C350
+
+#define CS42L43B_ISRC1DEC3_INPUT1				0x0000C780
+#define CS42L43B_ISRC1DEC4_INPUT1				0x0000C790
+#define CS42L43B_ISRC2DEC3_INPUT1				0x0000C7A0
+#define CS42L43B_ISRC2DEC4_INPUT1				0x0000C7B0
+
+#define CS42L43B_FW_MISSION_CTRL_NEED_CONFIGS			0x00117E00
+#define CS42L43B_FW_MISSION_CTRL_HAVE_CONFIGS			0x00117E04
+#define CS42L43B_FW_MISSION_CTRL_PATCH_START_ADDR_REG		0x00117E08
+#define CS42L43B_FW_MISSION_CTRL_MM_CTRL_SELECTION		0x00117E0C
+#define CS42L43B_FW_MISSION_CTRL_MM_MCU_CFG_REG			0x00117E10
+
+#define CS42L43B_MCU_SW_REV					0x00117314
+#define CS42L43B_PATCH_START_ADDR				0x00117318
+#define CS42L43B_CONFIG_SELECTION				0x0011731C
+#define CS42L43B_NEED_CONFIGS					0x00117320
+#define CS42L43B_BOOT_STATUS					0x00117330
+
+#define CS42L43B_FW_MISSION_CTRL_NEED_CONFIGS			0x00117E00
+#define CS42L43B_FW_MISSION_CTRL_HAVE_CONFIGS			0x00117E04
+#define CS42L43B_FW_MISSION_CTRL_PATCH_START_ADDR_REG		0x00117E08
+#define CS42L43B_FW_MISSION_CTRL_MM_CTRL_SELECTION		0x00117E0C
+#define CS42L43B_FW_MISSION_CTRL_MM_MCU_CFG_REG			0x00117E10
+
+#define CS42L43B_MCU_RAM_MAX					0x00117FFF
+
+/* CS42L43B_DECIM_DECIM_VOL_CTRL_CH5_CH6 */
+#define CS42L43B_DECIM6_MUTE_MASK				0x80000000
+#define CS42L43B_DECIM6_MUTE_SHIFT				31
+#define CS42L43B_DECIM6_VOL_MASK				0x3FC00000
+#define CS42L43B_DECIM6_VOL_SHIFT				22
+#define CS42L43B_DECIM6_PATH1_VOL_FALL_RATE_MASK		0x00380000
+#define CS42L43B_DECIM6_PATH1_VOL_FALL_RATE_SHIFT		19
+#define CS42L43B_DECIM6_PATH1_VOL_RISE_RATE_MASK		0x00070000
+#define CS42L43B_DECIM6_PATH1_VOL_RISE_RATE_SHIFT		16
+#define CS42L43B_DECIM5_MUTE_MASK				0x00008000
+#define CS42L43B_DECIM5_MUTE_SHIFT				15
+#define CS42L43B_DECIM5_VOL_MASK				0x00003FC0
+#define CS42L43B_DECIM5_VOL_SHIFT				6
+#define CS42L43B_DECIM5_PATH1_VOL_FALL_RATE_MASK		0x00000038
+#define CS42L43B_DECIM5_PATH1_VOL_FALL_RATE_SHIFT		3
+#define CS42L43B_DECIM5_PATH1_VOL_RISE_RATE_MASK		0x00000007
+#define CS42L43B_DECIM5_PATH1_VOL_RISE_RATE_SHIFT		0
+
+/* CS42L43B_DECIM_VOL_CTRL_UPDATE */
+#define CS42L43B_DECIM6_PATH1_VOL_TRIG_MASK			0x00000800
+#define CS42L43B_DECIM6_PATH1_VOL_TRIG_SHIFT			11
+#define CS42L43B_DECIM5_PATH1_VOL_TRIG_MASK			0x00000100
+#define CS42L43B_DECIM5_PATH1_VOL_TRIG_SHIFT			8
+#define CS42L43B_DECIM4_VOL_UPDATE_MASK				0x00000020
+#define CS42L43B_DECIM4_VOL_UPDATE_SHIFT			5
+
+/* CS42L43_ISRC1_CTRL..CS42L43_ISRC2_CTRL */
+#define CS42L43B_ISRC_DEC4_EN_MASK				0x00000008
+#define CS42L43B_ISRC_DEC4_EN_SHIFT				3
+#define CS42L43B_ISRC_DEC4_EN_WIDTH				1
+#define CS42L43B_ISRC_DEC3_EN_MASK				0x00000004
+#define CS42L43B_ISRC_DEC3_EN_SHIFT				2
+#define CS42L43B_ISRC_DEC3_EN_WIDTH				1
+
 #endif /* CS42L43_CORE_REGS_H */
diff --git a/include/linux/mfd/cs42l43.h b/include/linux/mfd/cs42l43.h
index 2239d8585e78..ff0f7e365a19 100644
--- a/include/linux/mfd/cs42l43.h
+++ b/include/linux/mfd/cs42l43.h
@@ -98,6 +98,7 @@ struct cs42l43 {
 	bool sdw_pll_active;
 	bool attached;
 	bool hw_lock;
+	long variant_id;
 };
 
 #endif /* CS42L43_CORE_EXT_H */
-- 
cgit v1.2.3


From 3a005126c9d7f30093627a6f329656c358e16b3a Mon Sep 17 00:00:00 2001
From: Frank Li <Frank.Li@nxp.com>
Date: Wed, 25 Feb 2026 16:41:37 -0500
Subject: dmaengine: of_dma: Add devm_of_dma_controller_register()

Add a managed API, devm_of_dma_controller_register(), to simplify DMA
engine controller registration by automatically handling resource
cleanup.

Signed-off-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260225-mxsdma-module-v3-1-8f798b13baa6@nxp.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/of_dma.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index fd706cdf255c..16b08234d03b 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -38,6 +38,26 @@ extern int of_dma_controller_register(struct device_node *np,
 		void *data);
 extern void of_dma_controller_free(struct device_node *np);
 
+static void __of_dma_controller_free(void *np)
+{
+	of_dma_controller_free(np);
+}
+
+static inline int
+devm_of_dma_controller_register(struct device *dev, struct device_node *np,
+				struct dma_chan *(*of_dma_xlate)
+				(struct of_phandle_args *, struct of_dma *),
+				void *data)
+{
+	int ret;
+
+	ret = of_dma_controller_register(np, of_dma_xlate, data);
+	if (ret)
+		return ret;
+
+	return devm_add_action_or_reset(dev, __of_dma_controller_free, np);
+}
+
 extern int of_dma_router_register(struct device_node *np,
 		void *(*of_dma_route_allocate)
 		(struct of_phandle_args *, struct of_dma *),
@@ -64,6 +84,15 @@ static inline void of_dma_controller_free(struct device_node *np)
 {
 }
 
+static inline int
+devm_of_dma_controller_register(struct device *dev, struct device_node *np,
+				struct dma_chan *(*of_dma_xlate)
+				(struct of_phandle_args *, struct of_dma *),
+				void *data)
+{
+	return -ENODEV;
+}
+
 static inline int of_dma_router_register(struct device_node *np,
 		void *(*of_dma_route_allocate)
 		(struct of_phandle_args *, struct of_dma *),
-- 
cgit v1.2.3


From 9ded47ad003f09a94b6a710b5c47f4aa5ceb7429 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 24 Feb 2026 09:25:54 +0100
Subject: fbdev: defio: Disconnect deferred I/O from the lifetime of struct
 fb_info

Hold state of deferred I/O in struct fb_deferred_io_state. Allocate an
instance as part of initializing deferred I/O and remove it only after
the final mapping has been closed. If the fb_info and the contained
deferred I/O meanwhile goes away, clear struct fb_deferred_io_state.info
to invalidate the mapping. Any access will then result in a SIGBUS
signal.

Fixes a long-standing problem, where a device hot-unplug happens while
user space still has an active mapping of the graphics memory. The hot-
unplug frees the instance of struct fb_info. Accessing the memory will
operate on undefined state.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: 60b59beafba8 ("fbdev: mm: Deferred IO support")
Cc: Helge Deller <deller@gmx.de>
Cc: linux-fbdev@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Cc: stable@vger.kernel.org # v2.6.22+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/core/fb_defio.c | 178 ++++++++++++++++++++++++++++--------
 include/linux/fb.h                  |   4 +-
 2 files changed, 145 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c
index ca48b89a323d..93bd2f696fa4 100644
--- a/drivers/video/fbdev/core/fb_defio.c
+++ b/drivers/video/fbdev/core/fb_defio.c
@@ -24,6 +24,75 @@
 #include <linux/rmap.h>
 #include <linux/pagemap.h>
 
+/*
+ * struct fb_deferred_io_state
+ */
+
+struct fb_deferred_io_state {
+	struct kref ref;
+
+	struct mutex lock; /* mutex that protects the pageref list */
+	/* fields protected by lock */
+	struct fb_info *info;
+};
+
+static struct fb_deferred_io_state *fb_deferred_io_state_alloc(void)
+{
+	struct fb_deferred_io_state *fbdefio_state;
+
+	fbdefio_state = kzalloc_obj(*fbdefio_state);
+	if (!fbdefio_state)
+		return NULL;
+
+	kref_init(&fbdefio_state->ref);
+	mutex_init(&fbdefio_state->lock);
+
+	return fbdefio_state;
+}
+
+static void fb_deferred_io_state_release(struct fb_deferred_io_state *fbdefio_state)
+{
+	mutex_destroy(&fbdefio_state->lock);
+
+	kfree(fbdefio_state);
+}
+
+static void fb_deferred_io_state_get(struct fb_deferred_io_state *fbdefio_state)
+{
+	kref_get(&fbdefio_state->ref);
+}
+
+static void __fb_deferred_io_state_release(struct kref *ref)
+{
+	struct fb_deferred_io_state *fbdefio_state =
+		container_of(ref, struct fb_deferred_io_state, ref);
+
+	fb_deferred_io_state_release(fbdefio_state);
+}
+
+static void fb_deferred_io_state_put(struct fb_deferred_io_state *fbdefio_state)
+{
+	kref_put(&fbdefio_state->ref, __fb_deferred_io_state_release);
+}
+
+/*
+ * struct vm_operations_struct
+ */
+
+static void fb_deferred_io_vm_open(struct vm_area_struct *vma)
+{
+	struct fb_deferred_io_state *fbdefio_state = vma->vm_private_data;
+
+	fb_deferred_io_state_get(fbdefio_state);
+}
+
+static void fb_deferred_io_vm_close(struct vm_area_struct *vma)
+{
+	struct fb_deferred_io_state *fbdefio_state = vma->vm_private_data;
+
+	fb_deferred_io_state_put(fbdefio_state);
+}
+
 static struct page *fb_deferred_io_get_page(struct fb_info *info, unsigned long offs)
 {
 	struct fb_deferred_io *fbdefio = info->fbdefio;
@@ -121,25 +190,46 @@ static void fb_deferred_io_pageref_put(struct fb_deferred_io_pageref *pageref,
 /* this is to find and return the vmalloc-ed fb pages */
 static vm_fault_t fb_deferred_io_fault(struct vm_fault *vmf)
 {
+	struct fb_info *info;
 	unsigned long offset;
 	struct page *page;
-	struct fb_info *info = vmf->vma->vm_private_data;
+	vm_fault_t ret;
+	struct fb_deferred_io_state *fbdefio_state = vmf->vma->vm_private_data;
+
+	mutex_lock(&fbdefio_state->lock);
+
+	info = fbdefio_state->info;
+	if (!info) {
+		ret = VM_FAULT_SIGBUS; /* our device is gone */
+		goto err_mutex_unlock;
+	}
 
 	offset = vmf->pgoff << PAGE_SHIFT;
-	if (offset >= info->fix.smem_len)
-		return VM_FAULT_SIGBUS;
+	if (offset >= info->fix.smem_len) {
+		ret = VM_FAULT_SIGBUS;
+		goto err_mutex_unlock;
+	}
 
 	page = fb_deferred_io_get_page(info, offset);
-	if (!page)
-		return VM_FAULT_SIGBUS;
+	if (!page) {
+		ret = VM_FAULT_SIGBUS;
+		goto err_mutex_unlock;
+	}
 
 	if (!vmf->vma->vm_file)
 		fb_err(info, "no mapping available\n");
 
 	BUG_ON(!info->fbdefio->mapping);
 
+	mutex_unlock(&fbdefio_state->lock);
+
 	vmf->page = page;
+
 	return 0;
+
+err_mutex_unlock:
+	mutex_unlock(&fbdefio_state->lock);
+	return ret;
 }
 
 int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync)
@@ -166,15 +256,24 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_fsync);
  * Adds a page to the dirty list. Call this from struct
  * vm_operations_struct.page_mkwrite.
  */
-static vm_fault_t fb_deferred_io_track_page(struct fb_info *info, unsigned long offset,
-					    struct page *page)
+static vm_fault_t fb_deferred_io_track_page(struct fb_deferred_io_state *fbdefio_state,
+					    unsigned long offset, struct page *page)
 {
-	struct fb_deferred_io *fbdefio = info->fbdefio;
+	struct fb_info *info;
+	struct fb_deferred_io *fbdefio;
 	struct fb_deferred_io_pageref *pageref;
 	vm_fault_t ret;
 
 	/* protect against the workqueue changing the page list */
-	mutex_lock(&fbdefio->lock);
+	mutex_lock(&fbdefio_state->lock);
+
+	info = fbdefio_state->info;
+	if (!info) {
+		ret = VM_FAULT_SIGBUS; /* our device is gone */
+		goto err_mutex_unlock;
+	}
+
+	fbdefio = info->fbdefio;
 
 	pageref = fb_deferred_io_pageref_get(info, offset, page);
 	if (WARN_ON_ONCE(!pageref)) {
@@ -192,50 +291,38 @@ static vm_fault_t fb_deferred_io_track_page(struct fb_info *info, unsigned long
 	 */
 	lock_page(pageref->page);
 
-	mutex_unlock(&fbdefio->lock);
+	mutex_unlock(&fbdefio_state->lock);
 
 	/* come back after delay to process the deferred IO */
 	schedule_delayed_work(&info->deferred_work, fbdefio->delay);
 	return VM_FAULT_LOCKED;
 
 err_mutex_unlock:
-	mutex_unlock(&fbdefio->lock);
+	mutex_unlock(&fbdefio_state->lock);
 	return ret;
 }
 
-/*
- * fb_deferred_io_page_mkwrite - Mark a page as written for deferred I/O
- * @fb_info: The fbdev info structure
- * @vmf: The VM fault
- *
- * This is a callback we get when userspace first tries to
- * write to the page. We schedule a workqueue. That workqueue
- * will eventually mkclean the touched pages and execute the
- * deferred framebuffer IO. Then if userspace touches a page
- * again, we repeat the same scheme.
- *
- * Returns:
- * VM_FAULT_LOCKED on success, or a VM_FAULT error otherwise.
- */
-static vm_fault_t fb_deferred_io_page_mkwrite(struct fb_info *info, struct vm_fault *vmf)
+static vm_fault_t fb_deferred_io_page_mkwrite(struct fb_deferred_io_state *fbdefio_state,
+					      struct vm_fault *vmf)
 {
 	unsigned long offset = vmf->pgoff << PAGE_SHIFT;
 	struct page *page = vmf->page;
 
 	file_update_time(vmf->vma->vm_file);
 
-	return fb_deferred_io_track_page(info, offset, page);
+	return fb_deferred_io_track_page(fbdefio_state, offset, page);
 }
 
-/* vm_ops->page_mkwrite handler */
 static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf)
 {
-	struct fb_info *info = vmf->vma->vm_private_data;
+	struct fb_deferred_io_state *fbdefio_state = vmf->vma->vm_private_data;
 
-	return fb_deferred_io_page_mkwrite(info, vmf);
+	return fb_deferred_io_page_mkwrite(fbdefio_state, vmf);
 }
 
 static const struct vm_operations_struct fb_deferred_io_vm_ops = {
+	.open		= fb_deferred_io_vm_open,
+	.close		= fb_deferred_io_vm_close,
 	.fault		= fb_deferred_io_fault,
 	.page_mkwrite	= fb_deferred_io_mkwrite,
 };
@@ -252,7 +339,10 @@ int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);
 	if (!(info->flags & FBINFO_VIRTFB))
 		vm_flags_set(vma, VM_IO);
-	vma->vm_private_data = info;
+	vma->vm_private_data = info->fbdefio_state;
+
+	fb_deferred_io_state_get(info->fbdefio_state); /* released in vma->vm_ops->close() */
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(fb_deferred_io_mmap);
@@ -263,9 +353,10 @@ static void fb_deferred_io_work(struct work_struct *work)
 	struct fb_info *info = container_of(work, struct fb_info, deferred_work.work);
 	struct fb_deferred_io_pageref *pageref, *next;
 	struct fb_deferred_io *fbdefio = info->fbdefio;
+	struct fb_deferred_io_state *fbdefio_state = info->fbdefio_state;
 
 	/* here we wrprotect the page's mappings, then do all deferred IO. */
-	mutex_lock(&fbdefio->lock);
+	mutex_lock(&fbdefio_state->lock);
 #ifdef CONFIG_MMU
 	list_for_each_entry(pageref, &fbdefio->pagereflist, list) {
 		struct page *page = pageref->page;
@@ -283,12 +374,13 @@ static void fb_deferred_io_work(struct work_struct *work)
 	list_for_each_entry_safe(pageref, next, &fbdefio->pagereflist, list)
 		fb_deferred_io_pageref_put(pageref, info);
 
-	mutex_unlock(&fbdefio->lock);
+	mutex_unlock(&fbdefio_state->lock);
 }
 
 int fb_deferred_io_init(struct fb_info *info)
 {
 	struct fb_deferred_io *fbdefio = info->fbdefio;
+	struct fb_deferred_io_state *fbdefio_state;
 	struct fb_deferred_io_pageref *pagerefs;
 	unsigned long npagerefs;
 	int ret;
@@ -298,7 +390,11 @@ int fb_deferred_io_init(struct fb_info *info)
 	if (WARN_ON(!info->fix.smem_len))
 		return -EINVAL;
 
-	mutex_init(&fbdefio->lock);
+	fbdefio_state = fb_deferred_io_state_alloc();
+	if (!fbdefio_state)
+		return -ENOMEM;
+	fbdefio_state->info = info;
+
 	INIT_DELAYED_WORK(&info->deferred_work, fb_deferred_io_work);
 	INIT_LIST_HEAD(&fbdefio->pagereflist);
 	if (fbdefio->delay == 0) /* set a default of 1 s */
@@ -315,10 +411,12 @@ int fb_deferred_io_init(struct fb_info *info)
 	info->npagerefs = npagerefs;
 	info->pagerefs = pagerefs;
 
+	info->fbdefio_state = fbdefio_state;
+
 	return 0;
 
 err:
-	mutex_destroy(&fbdefio->lock);
+	fb_deferred_io_state_release(fbdefio_state);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(fb_deferred_io_init);
@@ -352,11 +450,19 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_release);
 void fb_deferred_io_cleanup(struct fb_info *info)
 {
 	struct fb_deferred_io *fbdefio = info->fbdefio;
+	struct fb_deferred_io_state *fbdefio_state = info->fbdefio_state;
 
 	fb_deferred_io_lastclose(info);
 
+	info->fbdefio_state = NULL;
+
+	mutex_lock(&fbdefio_state->lock);
+	fbdefio_state->info = NULL;
+	mutex_unlock(&fbdefio_state->lock);
+
+	fb_deferred_io_state_put(fbdefio_state);
+
 	kvfree(info->pagerefs);
-	mutex_destroy(&fbdefio->lock);
 	fbdefio->mapping = NULL;
 }
 EXPORT_SYMBOL_GPL(fb_deferred_io_cleanup);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 6d4a58084fd5..aed17567fe50 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -218,13 +218,14 @@ struct fb_deferred_io {
 	unsigned long delay;
 	bool sort_pagereflist; /* sort pagelist by offset */
 	int open_count; /* number of opened files; protected by fb_info lock */
-	struct mutex lock; /* mutex that protects the pageref list */
 	struct list_head pagereflist; /* list of pagerefs for touched pages */
 	struct address_space *mapping; /* page cache object for fb device */
 	/* callback */
 	struct page *(*get_page)(struct fb_info *info, unsigned long offset);
 	void (*deferred_io)(struct fb_info *info, struct list_head *pagelist);
 };
+
+struct fb_deferred_io_state;
 #endif
 
 /*
@@ -487,6 +488,7 @@ struct fb_info {
 	unsigned long npagerefs;
 	struct fb_deferred_io_pageref *pagerefs;
 	struct fb_deferred_io *fbdefio;
+	struct fb_deferred_io_state *fbdefio_state;
 #endif
 
 	const struct fb_ops *fbops;
-- 
cgit v1.2.3


From 648bfb62da4e7a970f6b153bb8cdab1703877fcd Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 24 Feb 2026 09:25:56 +0100
Subject: fbdev: defio: Move variable state into struct fb_deferred_io_state

Move variable fields from struct fb_deferred_io into struct
fb_deferred_io_state. These fields are internal to the defio code
and should not be exposed to drivers. At some later point, struct
fb_defered_io might become const in all defio code.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/core/fb_defio.c | 37 ++++++++++++++++++++++---------------
 include/linux/fb.h                  |  3 ---
 2 files changed, 22 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c
index 56030eb42f71..35ac13727da1 100644
--- a/drivers/video/fbdev/core/fb_defio.c
+++ b/drivers/video/fbdev/core/fb_defio.c
@@ -25,6 +25,8 @@
 #include <linux/rmap.h>
 #include <linux/pagemap.h>
 
+struct address_space;
+
 /*
  * struct fb_deferred_io_state
  */
@@ -32,9 +34,13 @@
 struct fb_deferred_io_state {
 	struct kref ref;
 
+	int open_count; /* number of opened files; protected by fb_info lock */
+	struct address_space *mapping; /* page cache object for fb device */
+
 	struct mutex lock; /* mutex that protects the pageref list */
 	/* fields protected by lock */
 	struct fb_info *info;
+	struct list_head pagereflist; /* list of pagerefs for touched pages */
 };
 
 static struct fb_deferred_io_state *fb_deferred_io_state_alloc(void)
@@ -48,11 +54,14 @@ static struct fb_deferred_io_state *fb_deferred_io_state_alloc(void)
 	kref_init(&fbdefio_state->ref);
 	mutex_init(&fbdefio_state->lock);
 
+	INIT_LIST_HEAD(&fbdefio_state->pagereflist);
+
 	return fbdefio_state;
 }
 
 static void fb_deferred_io_state_release(struct fb_deferred_io_state *fbdefio_state)
 {
+	WARN_ON(!list_empty(&fbdefio_state->pagereflist));
 	mutex_destroy(&fbdefio_state->lock);
 
 	kfree(fbdefio_state);
@@ -147,7 +156,8 @@ static struct fb_deferred_io_pageref *fb_deferred_io_pageref_get(struct fb_info
 								 struct page *page)
 {
 	struct fb_deferred_io *fbdefio = info->fbdefio;
-	struct list_head *pos = &fbdefio->pagereflist;
+	struct fb_deferred_io_state *fbdefio_state = info->fbdefio_state;
+	struct list_head *pos = &fbdefio_state->pagereflist;
 	struct fb_deferred_io_pageref *pageref, *cur;
 
 	pageref = fb_deferred_io_pageref_lookup(info, offset, page);
@@ -171,7 +181,7 @@ static struct fb_deferred_io_pageref *fb_deferred_io_pageref_get(struct fb_info
 		 * pages. If possible, drivers should try to work with
 		 * unsorted page lists instead.
 		 */
-		list_for_each_entry(cur, &fbdefio->pagereflist, list) {
+		list_for_each_entry(cur, &fbdefio_state->pagereflist, list) {
 			if (cur->offset > pageref->offset)
 				break;
 		}
@@ -222,7 +232,7 @@ static vm_fault_t fb_deferred_io_fault(struct vm_fault *vmf)
 	if (!vmf->vma->vm_file)
 		fb_err(info, "no mapping available\n");
 
-	BUG_ON(!info->fbdefio->mapping);
+	fb_WARN_ON_ONCE(info, !fbdefio_state->mapping);
 
 	mutex_unlock(&fbdefio_state->lock);
 
@@ -364,20 +374,20 @@ static void fb_deferred_io_work(struct work_struct *work)
 	/* here we wrprotect the page's mappings, then do all deferred IO. */
 	mutex_lock(&fbdefio_state->lock);
 #ifdef CONFIG_MMU
-	list_for_each_entry(pageref, &fbdefio->pagereflist, list) {
+	list_for_each_entry(pageref, &fbdefio_state->pagereflist, list) {
 		struct page *page = pageref->page;
 		pgoff_t pgoff = pageref->offset >> PAGE_SHIFT;
 
-		mapping_wrprotect_range(fbdefio->mapping, pgoff,
+		mapping_wrprotect_range(fbdefio_state->mapping, pgoff,
 					page_to_pfn(page), 1);
 	}
 #endif
 
 	/* driver's callback with pagereflist */
-	fbdefio->deferred_io(info, &fbdefio->pagereflist);
+	fbdefio->deferred_io(info, &fbdefio_state->pagereflist);
 
 	/* clear the list */
-	list_for_each_entry_safe(pageref, next, &fbdefio->pagereflist, list)
+	list_for_each_entry_safe(pageref, next, &fbdefio_state->pagereflist, list)
 		fb_deferred_io_pageref_put(pageref, info);
 
 	mutex_unlock(&fbdefio_state->lock);
@@ -402,7 +412,6 @@ int fb_deferred_io_init(struct fb_info *info)
 	fbdefio_state->info = info;
 
 	INIT_DELAYED_WORK(&info->deferred_work, fb_deferred_io_work);
-	INIT_LIST_HEAD(&fbdefio->pagereflist);
 	if (fbdefio->delay == 0) /* set a default of 1 s */
 		fbdefio->delay = HZ;
 
@@ -431,11 +440,11 @@ void fb_deferred_io_open(struct fb_info *info,
 			 struct inode *inode,
 			 struct file *file)
 {
-	struct fb_deferred_io *fbdefio = info->fbdefio;
+	struct fb_deferred_io_state *fbdefio_state = info->fbdefio_state;
 
-	fbdefio->mapping = file->f_mapping;
+	fbdefio_state->mapping = file->f_mapping;
 	file->f_mapping->a_ops = &fb_deferred_io_aops;
-	fbdefio->open_count++;
+	fbdefio_state->open_count++;
 }
 EXPORT_SYMBOL_GPL(fb_deferred_io_open);
 
@@ -446,16 +455,15 @@ static void fb_deferred_io_lastclose(struct fb_info *info)
 
 void fb_deferred_io_release(struct fb_info *info)
 {
-	struct fb_deferred_io *fbdefio = info->fbdefio;
+	struct fb_deferred_io_state *fbdefio_state = info->fbdefio_state;
 
-	if (!--fbdefio->open_count)
+	if (!--fbdefio_state->open_count)
 		fb_deferred_io_lastclose(info);
 }
 EXPORT_SYMBOL_GPL(fb_deferred_io_release);
 
 void fb_deferred_io_cleanup(struct fb_info *info)
 {
-	struct fb_deferred_io *fbdefio = info->fbdefio;
 	struct fb_deferred_io_state *fbdefio_state = info->fbdefio_state;
 
 	fb_deferred_io_lastclose(info);
@@ -469,6 +477,5 @@ void fb_deferred_io_cleanup(struct fb_info *info)
 	fb_deferred_io_state_put(fbdefio_state);
 
 	kvfree(info->pagerefs);
-	fbdefio->mapping = NULL;
 }
 EXPORT_SYMBOL_GPL(fb_deferred_io_cleanup);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index aed17567fe50..2791777f3a50 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -217,9 +217,6 @@ struct fb_deferred_io {
 	/* delay between mkwrite and deferred handler */
 	unsigned long delay;
 	bool sort_pagereflist; /* sort pagelist by offset */
-	int open_count; /* number of opened files; protected by fb_info lock */
-	struct list_head pagereflist; /* list of pagerefs for touched pages */
-	struct address_space *mapping; /* page cache object for fb device */
 	/* callback */
 	struct page *(*get_page)(struct fb_info *info, unsigned long offset);
 	void (*deferred_io)(struct fb_info *info, struct list_head *pagelist);
-- 
cgit v1.2.3


From 02fe86e5fc22faee9b29e761d9fdd17fdfe583e6 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 24 Feb 2026 09:25:57 +0100
Subject: fbdev: defio: Move pageref array to struct fb_deferred_io_state

The pageref array stores all pageref structures for a device's defio
helpers. Move it into struct fb_deferred_io_state to not expose it to
drivers.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/core/fb_defio.c | 55 ++++++++++++++++++-------------------
 include/linux/fb.h                  |  2 --
 2 files changed, 27 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c
index 35ac13727da1..a12dd25ab697 100644
--- a/drivers/video/fbdev/core/fb_defio.c
+++ b/drivers/video/fbdev/core/fb_defio.c
@@ -41,28 +41,46 @@ struct fb_deferred_io_state {
 	/* fields protected by lock */
 	struct fb_info *info;
 	struct list_head pagereflist; /* list of pagerefs for touched pages */
+	unsigned long npagerefs;
+	struct fb_deferred_io_pageref *pagerefs;
 };
 
-static struct fb_deferred_io_state *fb_deferred_io_state_alloc(void)
+static struct fb_deferred_io_state *fb_deferred_io_state_alloc(unsigned long len)
 {
 	struct fb_deferred_io_state *fbdefio_state;
+	struct fb_deferred_io_pageref *pagerefs;
+	unsigned long npagerefs;
 
 	fbdefio_state = kzalloc_obj(*fbdefio_state);
 	if (!fbdefio_state)
 		return NULL;
 
+	npagerefs = DIV_ROUND_UP(len, PAGE_SIZE);
+
+	/* alloc a page ref for each page of the display memory */
+	pagerefs = kvzalloc_objs(*pagerefs, npagerefs);
+	if (!pagerefs)
+		goto err_kfree;
+	fbdefio_state->npagerefs = npagerefs;
+	fbdefio_state->pagerefs = pagerefs;
+
 	kref_init(&fbdefio_state->ref);
 	mutex_init(&fbdefio_state->lock);
 
 	INIT_LIST_HEAD(&fbdefio_state->pagereflist);
 
 	return fbdefio_state;
+
+err_kfree:
+	kfree(fbdefio_state);
+	return NULL;
 }
 
 static void fb_deferred_io_state_release(struct fb_deferred_io_state *fbdefio_state)
 {
 	WARN_ON(!list_empty(&fbdefio_state->pagereflist));
 	mutex_destroy(&fbdefio_state->lock);
+	kvfree(fbdefio_state->pagerefs);
 
 	kfree(fbdefio_state);
 }
@@ -125,18 +143,19 @@ static struct page *fb_deferred_io_get_page(struct fb_info *info, unsigned long
 	return page;
 }
 
-static struct fb_deferred_io_pageref *fb_deferred_io_pageref_lookup(struct fb_info *info,
-								    unsigned long offset,
-								    struct page *page)
+static struct fb_deferred_io_pageref *
+fb_deferred_io_pageref_lookup(struct fb_deferred_io_state *fbdefio_state, unsigned long offset,
+			      struct page *page)
 {
+	struct fb_info *info = fbdefio_state->info;
 	unsigned long pgoff = offset >> PAGE_SHIFT;
 	struct fb_deferred_io_pageref *pageref;
 
-	if (fb_WARN_ON_ONCE(info, pgoff >= info->npagerefs))
+	if (fb_WARN_ON_ONCE(info, pgoff >= fbdefio_state->npagerefs))
 		return NULL; /* incorrect allocation size */
 
 	/* 1:1 mapping between pageref and page offset */
-	pageref = &info->pagerefs[pgoff];
+	pageref = &fbdefio_state->pagerefs[pgoff];
 
 	if (pageref->page)
 		goto out;
@@ -160,7 +179,7 @@ static struct fb_deferred_io_pageref *fb_deferred_io_pageref_get(struct fb_info
 	struct list_head *pos = &fbdefio_state->pagereflist;
 	struct fb_deferred_io_pageref *pageref, *cur;
 
-	pageref = fb_deferred_io_pageref_lookup(info, offset, page);
+	pageref = fb_deferred_io_pageref_lookup(fbdefio_state, offset, page);
 	if (!pageref)
 		return NULL;
 
@@ -397,16 +416,13 @@ int fb_deferred_io_init(struct fb_info *info)
 {
 	struct fb_deferred_io *fbdefio = info->fbdefio;
 	struct fb_deferred_io_state *fbdefio_state;
-	struct fb_deferred_io_pageref *pagerefs;
-	unsigned long npagerefs;
-	int ret;
 
 	BUG_ON(!fbdefio);
 
 	if (WARN_ON(!info->fix.smem_len))
 		return -EINVAL;
 
-	fbdefio_state = fb_deferred_io_state_alloc();
+	fbdefio_state = fb_deferred_io_state_alloc(info->fix.smem_len);
 	if (!fbdefio_state)
 		return -ENOMEM;
 	fbdefio_state->info = info;
@@ -415,24 +431,9 @@ int fb_deferred_io_init(struct fb_info *info)
 	if (fbdefio->delay == 0) /* set a default of 1 s */
 		fbdefio->delay = HZ;
 
-	npagerefs = DIV_ROUND_UP(info->fix.smem_len, PAGE_SIZE);
-
-	/* alloc a page ref for each page of the display memory */
-	pagerefs = kvzalloc_objs(*pagerefs, npagerefs);
-	if (!pagerefs) {
-		ret = -ENOMEM;
-		goto err;
-	}
-	info->npagerefs = npagerefs;
-	info->pagerefs = pagerefs;
-
 	info->fbdefio_state = fbdefio_state;
 
 	return 0;
-
-err:
-	fb_deferred_io_state_release(fbdefio_state);
-	return ret;
 }
 EXPORT_SYMBOL_GPL(fb_deferred_io_init);
 
@@ -475,7 +476,5 @@ void fb_deferred_io_cleanup(struct fb_info *info)
 	mutex_unlock(&fbdefio_state->lock);
 
 	fb_deferred_io_state_put(fbdefio_state);
-
-	kvfree(info->pagerefs);
 }
 EXPORT_SYMBOL_GPL(fb_deferred_io_cleanup);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 2791777f3a50..b27943719fab 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -482,8 +482,6 @@ struct fb_info {
 
 #ifdef CONFIG_FB_DEFERRED_IO
 	struct delayed_work deferred_work;
-	unsigned long npagerefs;
-	struct fb_deferred_io_pageref *pagerefs;
 	struct fb_deferred_io *fbdefio;
 	struct fb_deferred_io_state *fbdefio_state;
 #endif
-- 
cgit v1.2.3


From 993bcaf32c494014f56357f6cdd87fdfaa4e4d11 Mon Sep 17 00:00:00 2001
From: Josua Mayer <josua@solid-run.com>
Date: Thu, 26 Feb 2026 15:21:11 +0200
Subject: mux: Add helper functions for getting optional and selected mux-state

In-tree phy-can-transceiver and phy_rcar_gen3_usb2 have already
implemented local versions of devm_mux_state_get_optional.

The omap-i2c driver gets and selects an optional mux in its probe
function without using any helper.

Add new helper functions covering both aforementioned use-cases:

- mux_control_get_optional:
  Get a mux-control if specified in dt, return NULL otherwise.
- devm_mux_state_get_optional:
  Get a mux-state if specified in dt, return NULL otherwise.
- devm_mux_state_get_selected:
  Get and select a mux-state specified in dt, return error otherwise.
- devm_mux_state_get_optional_selected:
  Get and select a mux-state if specified in dt, return error or NULL.

Existing mux_get helper function is changed to take an extra argument
indicating whether the mux is optional.
In this case no error is printed, and NULL returned in case of ENOENT.

Calling code is adapted to handle NULL return case, and to pass optional
argument as required.

To support automatic deselect for _selected helper, a new structure is
created storing an exit pointer similar to clock core which is called on
release.

To facilitate code sharing between optional/mandatory/selected helpers,
a new internal helper function is added to handle quiet (optional) and
verbose (mandatory) errors, as well as storing the correct callback for
devm release: __devm_mux_state_get

Due to this structure devm_mux_state_get_*_selected can no longer print
a useful error message when select fails. Instead callers should print
errors where needed.

Commit e153fdea9db04 ("phy: can-transceiver: Re-instate "mux-states"
property presence check") noted that "mux_get() always prints an error
message in case of an error, including when the property is not present,
confusing the user."

The first error message covers the case that a mux name is not matched
in dt. The second error message is based on of_parse_phandle_with_args
return value.

In optional case no error is printed and NULL is returned.
This ensures that the new helper functions will not confuse the user
either.

With the addition of optional helper functions it became clear that
drivers should compile and link even if CONFIG_MULTIPLEXER was not enabled.
Add stubs for all symbols exported by mux core.

Acked-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Josua Mayer <josua@solid-run.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mux/core.c           | 194 +++++++++++++++++++++++++++++++++++++------
 include/linux/mux/consumer.h | 108 +++++++++++++++++++++++-
 2 files changed, 271 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mux/core.c b/drivers/mux/core.c
index f09ee8782e3d..23538de2c91b 100644
--- a/drivers/mux/core.c
+++ b/drivers/mux/core.c
@@ -46,6 +46,16 @@ static const struct class mux_class = {
 	.name = "mux",
 };
 
+/**
+ * struct devm_mux_state_state -	Tracks managed resources for mux-state objects.
+ * @mstate:				Pointer to a mux state.
+ * @exit:				An optional callback to execute before free.
+ */
+struct devm_mux_state_state {
+	struct mux_state *mstate;
+	int (*exit)(struct mux_state *mstate);
+};
+
 static DEFINE_IDA(mux_ida);
 
 static int __init mux_init(void)
@@ -516,17 +526,19 @@ static struct mux_chip *of_find_mux_chip_by_node(struct device_node *np)
 	return dev ? to_mux_chip(dev) : NULL;
 }
 
-/*
+/**
  * mux_get() - Get the mux-control for a device.
  * @dev: The device that needs a mux-control.
  * @mux_name: The name identifying the mux-control.
  * @state: Pointer to where the requested state is returned, or NULL when
  *         the required multiplexer states are handled by other means.
+ * @optional: Whether to return NULL and silence errors when mux doesn't exist.
  *
- * Return: A pointer to the mux-control, or an ERR_PTR with a negative errno.
+ * Return: Pointer to the mux-control on success, an ERR_PTR with a negative
+ * errno on error, or NULL if optional is true and mux doesn't exist.
  */
 static struct mux_control *mux_get(struct device *dev, const char *mux_name,
-				   unsigned int *state)
+				   unsigned int *state, bool optional)
 {
 	struct device_node *np = dev->of_node;
 	struct of_phandle_args args;
@@ -542,7 +554,9 @@ static struct mux_control *mux_get(struct device *dev, const char *mux_name,
 		else
 			index = of_property_match_string(np, "mux-control-names",
 							 mux_name);
-		if (index < 0) {
+		if (index < 0 && optional) {
+			return NULL;
+		} else if (index < 0) {
 			dev_err(dev, "mux controller '%s' not found\n",
 				mux_name);
 			return ERR_PTR(index);
@@ -558,8 +572,12 @@ static struct mux_control *mux_get(struct device *dev, const char *mux_name,
 						 "mux-controls", "#mux-control-cells",
 						 index, &args);
 	if (ret) {
+		if (optional && ret == -ENOENT)
+			return NULL;
+
 		dev_err(dev, "%pOF: failed to get mux-%s %s(%i)\n",
-			np, state ? "state" : "control", mux_name ?: "", index);
+			np, state ? "state" : "control",
+			mux_name ?: "", index);
 		return ERR_PTR(ret);
 	}
 
@@ -617,10 +635,29 @@ static struct mux_control *mux_get(struct device *dev, const char *mux_name,
  */
 struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
 {
-	return mux_get(dev, mux_name, NULL);
+	struct mux_control *mux = mux_get(dev, mux_name, NULL, false);
+
+	if (!mux)
+		return ERR_PTR(-ENOENT);
+
+	return mux;
 }
 EXPORT_SYMBOL_GPL(mux_control_get);
 
+/**
+ * mux_control_get_optional() - Get the optional mux-control for a device.
+ * @dev: The device that needs a mux-control.
+ * @mux_name: The name identifying the mux-control.
+ *
+ * Return: Pointer to the mux-control on success, an ERR_PTR with a negative
+ * errno on error, or NULL if mux doesn't exist.
+ */
+struct mux_control *mux_control_get_optional(struct device *dev, const char *mux_name)
+{
+	return mux_get(dev, mux_name, NULL, true);
+}
+EXPORT_SYMBOL_GPL(mux_control_get_optional);
+
 /**
  * mux_control_put() - Put away the mux-control for good.
  * @mux: The mux-control to put away.
@@ -670,14 +707,16 @@ struct mux_control *devm_mux_control_get(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(devm_mux_control_get);
 
-/*
+/**
  * mux_state_get() - Get the mux-state for a device.
  * @dev: The device that needs a mux-state.
  * @mux_name: The name identifying the mux-state.
+ * @optional: Whether to return NULL and silence errors when mux doesn't exist.
  *
- * Return: A pointer to the mux-state, or an ERR_PTR with a negative errno.
+ * Return: Pointer to the mux-state on success, an ERR_PTR with a negative
+ * errno on error, or NULL if optional is true and mux doesn't exist.
  */
-static struct mux_state *mux_state_get(struct device *dev, const char *mux_name)
+static struct mux_state *mux_state_get(struct device *dev, const char *mux_name, bool optional)
 {
 	struct mux_state *mstate;
 
@@ -685,12 +724,15 @@ static struct mux_state *mux_state_get(struct device *dev, const char *mux_name)
 	if (!mstate)
 		return ERR_PTR(-ENOMEM);
 
-	mstate->mux = mux_get(dev, mux_name, &mstate->state);
+	mstate->mux = mux_get(dev, mux_name, &mstate->state, optional);
 	if (IS_ERR(mstate->mux)) {
 		int err = PTR_ERR(mstate->mux);
 
 		kfree(mstate);
 		return ERR_PTR(err);
+	} else if (!mstate->mux) {
+		kfree(mstate);
+		return optional ? NULL : ERR_PTR(-ENOENT);
 	}
 
 	return mstate;
@@ -710,9 +752,66 @@ static void mux_state_put(struct mux_state *mstate)
 
 static void devm_mux_state_release(struct device *dev, void *res)
 {
-	struct mux_state *mstate = *(struct mux_state **)res;
+	struct devm_mux_state_state *devm_state = res;
+
+	if (devm_state->exit)
+		devm_state->exit(devm_state->mstate);
+
+	mux_state_put(devm_state->mstate);
+}
+
+/**
+ * __devm_mux_state_get() - Get the optional mux-state for a device,
+ *			    with resource management.
+ * @dev: The device that needs a mux-state.
+ * @mux_name: The name identifying the mux-state.
+ * @optional: Whether to return NULL and silence errors when mux doesn't exist.
+ * @init: Optional function pointer for mux-state object initialisation.
+ * @exit: Optional function pointer for mux-state object cleanup on release.
+ *
+ * Return: Pointer to the mux-state on success, an ERR_PTR with a negative
+ * errno on error, or NULL if optional is true and mux doesn't exist.
+ */
+static struct mux_state *__devm_mux_state_get(struct device *dev, const char *mux_name,
+					      bool optional,
+					      int (*init)(struct mux_state *mstate),
+					      int (*exit)(struct mux_state *mstate))
+{
+	struct devm_mux_state_state *devm_state;
+	struct mux_state *mstate;
+	int ret;
+
+	mstate = mux_state_get(dev, mux_name, optional);
+	if (IS_ERR(mstate))
+		return ERR_CAST(mstate);
+	else if (optional && !mstate)
+		return NULL;
+	else if (!mstate)
+		return ERR_PTR(-ENOENT);
+
+	devm_state = devres_alloc(devm_mux_state_release, sizeof(*devm_state), GFP_KERNEL);
+	if (!devm_state) {
+		ret = -ENOMEM;
+		goto err_devres_alloc;
+	}
+
+	if (init) {
+		ret = init(mstate);
+		if (ret)
+			goto err_mux_state_init;
+	}
+
+	devm_state->mstate = mstate;
+	devm_state->exit = exit;
+	devres_add(dev, devm_state);
 
+	return mstate;
+
+err_mux_state_init:
+	devres_free(devm_state);
+err_devres_alloc:
 	mux_state_put(mstate);
+	return ERR_PTR(ret);
 }
 
 /**
@@ -722,28 +821,69 @@ static void devm_mux_state_release(struct device *dev, void *res)
  * @mux_name: The name identifying the mux-control.
  *
  * Return: Pointer to the mux-state, or an ERR_PTR with a negative errno.
+ *
+ * The mux-state will automatically be freed on release.
  */
-struct mux_state *devm_mux_state_get(struct device *dev,
-				     const char *mux_name)
+struct mux_state *devm_mux_state_get(struct device *dev, const char *mux_name)
 {
-	struct mux_state **ptr, *mstate;
-
-	ptr = devres_alloc(devm_mux_state_release, sizeof(*ptr), GFP_KERNEL);
-	if (!ptr)
-		return ERR_PTR(-ENOMEM);
+	return __devm_mux_state_get(dev, mux_name, false, NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(devm_mux_state_get);
 
-	mstate = mux_state_get(dev, mux_name);
-	if (IS_ERR(mstate)) {
-		devres_free(ptr);
-		return mstate;
-	}
+/**
+ * devm_mux_state_get_optional() - Get the optional mux-state for a device,
+ *				   with resource management.
+ * @dev: The device that needs a mux-state.
+ * @mux_name: The name identifying the mux-state.
+ *
+ * Return: Pointer to the mux-state on success, an ERR_PTR with a negative
+ * errno on error, or NULL if mux doesn't exist.
+ *
+ * The mux-state will automatically be freed on release.
+ */
+struct mux_state *devm_mux_state_get_optional(struct device *dev, const char *mux_name)
+{
+	return __devm_mux_state_get(dev, mux_name, true, NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(devm_mux_state_get_optional);
 
-	*ptr = mstate;
-	devres_add(dev, ptr);
+/**
+ * devm_mux_state_get_selected() - Get the mux-state for a device, with
+ *				   resource management.
+ * @dev: The device that needs a mux-state.
+ * @mux_name: The name identifying the mux-state.
+ *
+ * Return: Pointer to the mux-state, or an ERR_PTR with a negative errno.
+ *
+ * The returned mux-state (if valid) is already selected.
+ *
+ * The mux-state will automatically be deselected and freed on release.
+ */
+struct mux_state *devm_mux_state_get_selected(struct device *dev, const char *mux_name)
+{
+	return __devm_mux_state_get(dev, mux_name, false, mux_state_select, mux_state_deselect);
+}
+EXPORT_SYMBOL_GPL(devm_mux_state_get_selected);
 
-	return mstate;
+/**
+ * devm_mux_state_get_optional_selected() - Get the optional mux-state for
+ *					    a device, with resource management.
+ * @dev: The device that needs a mux-state.
+ * @mux_name: The name identifying the mux-state.
+ *
+ * Return: Pointer to the mux-state on success, an ERR_PTR with a negative
+ * errno on error, or NULL if mux doesn't exist.
+ *
+ * The returned mux-state (if valid) is already selected.
+ *
+ * The mux-state will automatically be deselected and freed on release.
+ */
+struct mux_state *devm_mux_state_get_optional_selected(struct device *dev,
+						       const char *mux_name)
+{
+	return __devm_mux_state_get(dev, mux_name, true, mux_state_select, mux_state_deselect);
 }
-EXPORT_SYMBOL_GPL(devm_mux_state_get);
+EXPORT_SYMBOL_GPL(devm_mux_state_get_optional_selected);
 
 /*
  * Using subsys_initcall instead of module_init here to try to ensure - for
diff --git a/include/linux/mux/consumer.h b/include/linux/mux/consumer.h
index 2e25c838f831..a961861a503b 100644
--- a/include/linux/mux/consumer.h
+++ b/include/linux/mux/consumer.h
@@ -16,6 +16,8 @@ struct device;
 struct mux_control;
 struct mux_state;
 
+#if IS_ENABLED(CONFIG_MULTIPLEXER)
+
 unsigned int mux_control_states(struct mux_control *mux);
 int __must_check mux_control_select_delay(struct mux_control *mux,
 					  unsigned int state,
@@ -54,11 +56,109 @@ int mux_control_deselect(struct mux_control *mux);
 int mux_state_deselect(struct mux_state *mstate);
 
 struct mux_control *mux_control_get(struct device *dev, const char *mux_name);
+struct mux_control *mux_control_get_optional(struct device *dev, const char *mux_name);
 void mux_control_put(struct mux_control *mux);
 
-struct mux_control *devm_mux_control_get(struct device *dev,
-					 const char *mux_name);
-struct mux_state *devm_mux_state_get(struct device *dev,
-				     const char *mux_name);
+struct mux_control *devm_mux_control_get(struct device *dev, const char *mux_name);
+struct mux_state *devm_mux_state_get(struct device *dev, const char *mux_name);
+struct mux_state *devm_mux_state_get_optional(struct device *dev, const char *mux_name);
+struct mux_state *devm_mux_state_get_selected(struct device *dev, const char *mux_name);
+struct mux_state *devm_mux_state_get_optional_selected(struct device *dev, const char *mux_name);
+
+#else
+
+static inline unsigned int mux_control_states(struct mux_control *mux)
+{
+	return 0;
+}
+static inline int __must_check mux_control_select_delay(struct mux_control *mux,
+							unsigned int state, unsigned int delay_us)
+{
+	return -EOPNOTSUPP;
+}
+static inline int __must_check mux_state_select_delay(struct mux_state *mstate,
+						      unsigned int delay_us)
+{
+	return -EOPNOTSUPP;
+}
+static inline int __must_check mux_control_try_select_delay(struct mux_control *mux,
+							    unsigned int state,
+							    unsigned int delay_us)
+{
+	return -EOPNOTSUPP;
+}
+static inline int __must_check mux_state_try_select_delay(struct mux_state *mstate,
+							  unsigned int delay_us)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int __must_check mux_control_select(struct mux_control *mux,
+						  unsigned int state)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int __must_check mux_state_select(struct mux_state *mstate)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int __must_check mux_control_try_select(struct mux_control *mux,
+						      unsigned int state)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int __must_check mux_state_try_select(struct mux_state *mstate)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mux_control_deselect(struct mux_control *mux)
+{
+	return -EOPNOTSUPP;
+}
+static inline int mux_state_deselect(struct mux_state *mstate)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+static inline struct mux_control *mux_control_get_optional(struct device *dev,
+							   const char *mux_name)
+{
+	return NULL;
+}
+static inline void mux_control_put(struct mux_control *mux) {}
+
+static inline struct mux_control *devm_mux_control_get(struct device *dev, const char *mux_name)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+static inline struct mux_state *devm_mux_state_get(struct device *dev, const char *mux_name)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+static inline struct mux_state *devm_mux_state_get_optional(struct device *dev,
+							    const char *mux_name)
+{
+	return NULL;
+}
+static inline struct mux_state *devm_mux_state_get_selected(struct device *dev,
+							    const char *mux_name)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+static inline struct mux_state *devm_mux_state_get_optional_selected(struct device *dev,
+								     const char *mux_name)
+{
+	return NULL;
+}
+
+#endif /* CONFIG_MULTIPLEXER */
 
 #endif /* _LINUX_MUX_CONSUMER_H */
-- 
cgit v1.2.3


From 7ea25eaad5ae3a6c837a3df9bdb822194f002565 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 23 Feb 2026 05:20:01 -0800
Subject: block: factor out a bio_integrity_action helper

Split the logic to see if a bio needs integrity metadata from
bio_integrity_prep into a reusable helper than can be called from
file system code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity-auto.c    | 64 ++++++-------------------------------------
 block/bio-integrity.c         | 48 ++++++++++++++++++++++++++++++++
 block/blk-mq.c                |  6 ++--
 drivers/nvdimm/btt.c          |  6 ++--
 include/linux/bio-integrity.h |  5 ++--
 include/linux/blk-integrity.h | 23 ++++++++++++++++
 6 files changed, 89 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity-auto.c b/block/bio-integrity-auto.c
index 44dcdf7520c5..e16f669dbf1e 100644
--- a/block/bio-integrity-auto.c
+++ b/block/bio-integrity-auto.c
@@ -50,11 +50,6 @@ static bool bip_should_check(struct bio_integrity_payload *bip)
 	return bip->bip_flags & BIP_CHECK_FLAGS;
 }
 
-static bool bi_offload_capable(struct blk_integrity *bi)
-{
-	return bi->metadata_size == bi->pi_tuple_size;
-}
-
 /**
  * __bio_integrity_endio - Integrity I/O completion function
  * @bio:	Protected bio
@@ -84,69 +79,27 @@ bool __bio_integrity_endio(struct bio *bio)
 /**
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:	bio to prepare
+ * @action:	preparation action needed (BI_ACT_*)
+ *
+ * Allocate the integrity payload.  For writes, generate the integrity metadata
+ * and for reads, setup the completion handler to verify the metadata.
  *
- * Checks if the bio already has an integrity payload attached.  If it does, the
- * payload has been generated by another kernel subsystem, and we just pass it
- * through.
- * Otherwise allocates integrity payload and for writes the integrity metadata
- * will be generated.  For reads, the completion handler will verify the
- * metadata.
+ * This is used for bios that do not have user integrity payloads attached.
  */
-bool bio_integrity_prep(struct bio *bio)
+void bio_integrity_prep(struct bio *bio, unsigned int action)
 {
 	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
 	struct bio_integrity_data *bid;
-	bool set_flags = true;
-	gfp_t gfp = GFP_NOIO;
-
-	if (!bi)
-		return true;
-
-	if (!bio_sectors(bio))
-		return true;
-
-	/* Already protected? */
-	if (bio_integrity(bio))
-		return true;
-
-	switch (bio_op(bio)) {
-	case REQ_OP_READ:
-		if (bi->flags & BLK_INTEGRITY_NOVERIFY) {
-			if (bi_offload_capable(bi))
-				return true;
-			set_flags = false;
-		}
-		break;
-	case REQ_OP_WRITE:
-		/*
-		 * Zero the memory allocated to not leak uninitialized kernel
-		 * memory to disk for non-integrity metadata where nothing else
-		 * initializes the memory.
-		 */
-		if (bi->flags & BLK_INTEGRITY_NOGENERATE) {
-			if (bi_offload_capable(bi))
-				return true;
-			set_flags = false;
-			gfp |= __GFP_ZERO;
-		} else if (bi->metadata_size > bi->pi_tuple_size)
-			gfp |= __GFP_ZERO;
-		break;
-	default:
-		return true;
-	}
-
-	if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
-		return true;
 
 	bid = mempool_alloc(&bid_pool, GFP_NOIO);
 	bio_integrity_init(bio, &bid->bip, &bid->bvec, 1);
 	bid->bio = bio;
 	bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
-	bio_integrity_alloc_buf(bio, gfp & __GFP_ZERO);
+	bio_integrity_alloc_buf(bio, action & BI_ACT_ZERO);
 
 	bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
 
-	if (set_flags) {
+	if (action & BI_ACT_CHECK) {
 		if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
 			bid->bip.bip_flags |= BIP_IP_CHECKSUM;
 		if (bi->csum_type)
@@ -160,7 +113,6 @@ bool bio_integrity_prep(struct bio *bio)
 		blk_integrity_generate(bio);
 	else
 		bid->saved_bio_iter = bio->bi_iter;
-	return true;
 }
 EXPORT_SYMBOL(bio_integrity_prep);
 
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 20f5d301d32d..0955be90038b 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/blk-integrity.h>
+#include <linux/t10-pi.h>
 #include "blk.h"
 
 struct bio_integrity_alloc {
@@ -16,6 +17,53 @@ struct bio_integrity_alloc {
 
 static mempool_t integrity_buf_pool;
 
+static bool bi_offload_capable(struct blk_integrity *bi)
+{
+	return bi->metadata_size == bi->pi_tuple_size;
+}
+
+unsigned int __bio_integrity_action(struct bio *bio)
+{
+	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+
+	if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
+		return 0;
+
+	switch (bio_op(bio)) {
+	case REQ_OP_READ:
+		if (bi->flags & BLK_INTEGRITY_NOVERIFY) {
+			if (bi_offload_capable(bi))
+				return 0;
+			return BI_ACT_BUFFER;
+		}
+		return BI_ACT_BUFFER | BI_ACT_CHECK;
+	case REQ_OP_WRITE:
+		/*
+		 * Flush masquerading as write?
+		 */
+		if (!bio_sectors(bio))
+			return 0;
+
+		/*
+		 * Zero the memory allocated to not leak uninitialized kernel
+		 * memory to disk for non-integrity metadata where nothing else
+		 * initializes the memory.
+		 */
+		if (bi->flags & BLK_INTEGRITY_NOGENERATE) {
+			if (bi_offload_capable(bi))
+				return 0;
+			return BI_ACT_BUFFER | BI_ACT_ZERO;
+		}
+
+		if (bi->metadata_size > bi->pi_tuple_size)
+			return BI_ACT_BUFFER | BI_ACT_CHECK | BI_ACT_ZERO;
+		return BI_ACT_BUFFER | BI_ACT_CHECK;
+	default:
+		return 0;
+	}
+}
+EXPORT_SYMBOL_GPL(__bio_integrity_action);
+
 void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer)
 {
 	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9af8c3dec3f6..0b311a797178 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3143,6 +3143,7 @@ void blk_mq_submit_bio(struct bio *bio)
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 	struct blk_plug *plug = current->plug;
 	const int is_sync = op_is_sync(bio->bi_opf);
+	unsigned int integrity_action;
 	struct blk_mq_hw_ctx *hctx;
 	unsigned int nr_segs;
 	struct request *rq;
@@ -3195,8 +3196,9 @@ void blk_mq_submit_bio(struct bio *bio)
 	if (!bio)
 		goto queue_exit;
 
-	if (!bio_integrity_prep(bio))
-		goto queue_exit;
+	integrity_action = bio_integrity_action(bio);
+	if (integrity_action)
+		bio_integrity_prep(bio, integrity_action);
 
 	blk_mq_bio_issue_init(q, bio);
 	if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index b6bef092f8b8..fdcb080a4314 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1435,14 +1435,16 @@ static void btt_submit_bio(struct bio *bio)
 {
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct btt *btt = bio->bi_bdev->bd_disk->private_data;
+	unsigned int integrity_action;
 	struct bvec_iter iter;
 	unsigned long start;
 	struct bio_vec bvec;
 	int err = 0;
 	bool do_acct;
 
-	if (!bio_integrity_prep(bio))
-		return;
+	integrity_action = bio_integrity_action(bio);
+	if (integrity_action)
+		bio_integrity_prep(bio, integrity_action);
 
 	do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue);
 	if (do_acct)
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index 21e4652dcfd2..276cbbdd2c9d 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -78,7 +78,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len,
 int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter);
 int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta);
 void bio_integrity_unmap_user(struct bio *bio);
-bool bio_integrity_prep(struct bio *bio);
+void bio_integrity_prep(struct bio *bio, unsigned int action);
 void bio_integrity_advance(struct bio *bio, unsigned int bytes_done);
 void bio_integrity_trim(struct bio *bio);
 int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask);
@@ -104,9 +104,8 @@ static inline void bio_integrity_unmap_user(struct bio *bio)
 {
 }
 
-static inline bool bio_integrity_prep(struct bio *bio)
+static inline void bio_integrity_prep(struct bio *bio, unsigned int action)
 {
-	return true;
 }
 
 static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index c15b1ac62765..fd3f3c8c0fcd 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -180,4 +180,27 @@ static inline struct bio_vec rq_integrity_vec(struct request *rq)
 }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
+enum bio_integrity_action {
+	BI_ACT_BUFFER		= (1u << 0),	/* allocate buffer */
+	BI_ACT_CHECK		= (1u << 1),	/* generate / verify PI */
+	BI_ACT_ZERO		= (1u << 2),	/* zero buffer */
+};
+
+/**
+ * bio_integrity_action - return the integrity action needed for a bio
+ * @bio:	bio to operate on
+ *
+ * Returns the mask of integrity actions (BI_ACT_*) that need to be performed
+ * for @bio.
+ */
+unsigned int __bio_integrity_action(struct bio *bio);
+static inline unsigned int bio_integrity_action(struct bio *bio)
+{
+	if (!blk_get_integrity(bio->bi_bdev->bd_disk))
+		return 0;
+	if (bio_integrity(bio))
+		return 0;
+	return __bio_integrity_action(bio);
+}
+
 #endif /* _LINUX_BLK_INTEGRITY_H */
-- 
cgit v1.2.3


From a936655697cd8d1bab2fd5189e2c33dd6356a266 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 23 Feb 2026 05:20:02 -0800
Subject: block: factor out a bio_integrity_setup_default helper

Add a helper to set the seed and check flag based on useful defaults
from the profile.

Note that this includes a small behavior change, as we now only set the
seed if any action is set, which is fine as nothing will look at it
otherwise.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity-auto.c    | 14 ++------------
 block/bio-integrity.c         | 16 ++++++++++++++++
 include/linux/bio-integrity.h |  1 +
 3 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity-auto.c b/block/bio-integrity-auto.c
index e16f669dbf1e..b64c71a7fc82 100644
--- a/block/bio-integrity-auto.c
+++ b/block/bio-integrity-auto.c
@@ -88,7 +88,6 @@ bool __bio_integrity_endio(struct bio *bio)
  */
 void bio_integrity_prep(struct bio *bio, unsigned int action)
 {
-	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
 	struct bio_integrity_data *bid;
 
 	bid = mempool_alloc(&bid_pool, GFP_NOIO);
@@ -96,17 +95,8 @@ void bio_integrity_prep(struct bio *bio, unsigned int action)
 	bid->bio = bio;
 	bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
 	bio_integrity_alloc_buf(bio, action & BI_ACT_ZERO);
-
-	bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
-
-	if (action & BI_ACT_CHECK) {
-		if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
-			bid->bip.bip_flags |= BIP_IP_CHECKSUM;
-		if (bi->csum_type)
-			bid->bip.bip_flags |= BIP_CHECK_GUARD;
-		if (bi->flags & BLK_INTEGRITY_REF_TAG)
-			bid->bip.bip_flags |= BIP_CHECK_REFTAG;
-	}
+	if (action & BI_ACT_CHECK)
+		bio_integrity_setup_default(bio);
 
 	/* Auto-generate integrity metadata if this is a write */
 	if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip))
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 0955be90038b..e79eaf047794 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -101,6 +101,22 @@ void bio_integrity_free_buf(struct bio_integrity_payload *bip)
 		kfree(bvec_virt(bv));
 }
 
+void bio_integrity_setup_default(struct bio *bio)
+{
+	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+
+	bip_set_seed(bip, bio->bi_iter.bi_sector);
+
+	if (bi->csum_type) {
+		bip->bip_flags |= BIP_CHECK_GUARD;
+		if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
+			bip->bip_flags |= BIP_IP_CHECKSUM;
+	}
+	if (bi->flags & BLK_INTEGRITY_REF_TAG)
+		bip->bip_flags |= BIP_CHECK_REFTAG;
+}
+
 /**
  * bio_integrity_free - Free bio integrity payload
  * @bio:	bio containing bip to be freed
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index 276cbbdd2c9d..232b86b9bbcb 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -143,5 +143,6 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
 
 void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer);
 void bio_integrity_free_buf(struct bio_integrity_payload *bip);
+void bio_integrity_setup_default(struct bio *bio);
 
 #endif /* _LINUX_BIO_INTEGRITY_H */
-- 
cgit v1.2.3


From 7afe93946dff63aa57c6db81f5eb43ac8233364e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 23 Feb 2026 05:20:03 -0800
Subject: block: add a bdev_has_integrity_csum helper

Factor out a helper to see if the block device has an integrity checksum
from bdev_stable_writes so that it can be reused for other checks.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d463b9b5a0a5..dec0acaed6e6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1477,14 +1477,18 @@ static inline bool bdev_synchronous(struct block_device *bdev)
 	return bdev->bd_disk->queue->limits.features & BLK_FEAT_SYNCHRONOUS;
 }
 
-static inline bool bdev_stable_writes(struct block_device *bdev)
+static inline bool bdev_has_integrity_csum(struct block_device *bdev)
 {
-	struct request_queue *q = bdev_get_queue(bdev);
+	struct queue_limits *lim = bdev_limits(bdev);
 
-	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
-	    q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE)
-		return true;
-	return q->limits.features & BLK_FEAT_STABLE_WRITES;
+	return IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+		lim->integrity.csum_type != BLK_INTEGRITY_CSUM_NONE;
+}
+
+static inline bool bdev_stable_writes(struct block_device *bdev)
+{
+	return bdev_has_integrity_csum(bdev) ||
+		(bdev_limits(bdev)->features & BLK_FEAT_STABLE_WRITES);
 }
 
 static inline bool blk_queue_write_cache(struct request_queue *q)
-- 
cgit v1.2.3


From 8c56ef10150ed7650cf4105539242c94c156148c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 23 Feb 2026 05:20:05 -0800
Subject: block: make max_integrity_io_size public

File systems that generate integrity will need this, so move it out
of the block private or blk-mq specific headers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-settings.c          | 13 -------------
 include/linux/blk-integrity.h |  5 -----
 include/linux/blkdev.h        | 18 ++++++++++++++++++
 3 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index a9e65dc090da..dabfab97fbab 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -123,19 +123,6 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
 	return 0;
 }
 
-/*
- * Maximum size of I/O that needs a block layer integrity buffer.  Limited
- * by the number of intervals for which we can fit the integrity buffer into
- * the buffer size.  Because the buffer is a single segment it is also limited
- * by the maximum segment size.
- */
-static inline unsigned int max_integrity_io_size(struct queue_limits *lim)
-{
-	return min_t(unsigned int, lim->max_segment_size,
-		(BLK_INTEGRITY_MAX_SIZE / lim->integrity.metadata_size) <<
-			lim->integrity.interval_exp);
-}
-
 static int blk_validate_integrity_limits(struct queue_limits *lim)
 {
 	struct blk_integrity *bi = &lim->integrity;
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index fd3f3c8c0fcd..ea6d7d322ae3 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -8,11 +8,6 @@
 
 struct request;
 
-/*
- * Maximum contiguous integrity buffer allocation.
- */
-#define BLK_INTEGRITY_MAX_SIZE		SZ_2M
-
 enum blk_integrity_flags {
 	BLK_INTEGRITY_NOVERIFY		= 1 << 0,
 	BLK_INTEGRITY_NOGENERATE	= 1 << 1,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index dec0acaed6e6..11857ae13d10 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1881,6 +1881,24 @@ static inline int bio_split_rw_at(struct bio *bio,
 	return bio_split_io_at(bio, lim, segs, max_bytes, lim->dma_alignment);
 }
 
+/*
+ * Maximum contiguous integrity buffer allocation.
+ */
+#define BLK_INTEGRITY_MAX_SIZE		SZ_2M
+
+/*
+ * Maximum size of I/O that needs a block layer integrity buffer.  Limited
+ * by the number of intervals for which we can fit the integrity buffer into
+ * the buffer size.  Because the buffer is a single segment it is also limited
+ * by the maximum segment size.
+ */
+static inline unsigned int max_integrity_io_size(struct queue_limits *lim)
+{
+	return min_t(unsigned int, lim->max_segment_size,
+		(BLK_INTEGRITY_MAX_SIZE / lim->integrity.metadata_size) <<
+			lim->integrity.interval_exp);
+}
+
 #define DEFINE_IO_COMP_BATCH(name)	struct io_comp_batch name = { }
 
 #endif /* _LINUX_BLKDEV_H */
-- 
cgit v1.2.3


From 0bde8a12b5540572a7fd6d2867bee6de15e4f289 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 23 Feb 2026 05:20:06 -0800
Subject: block: add fs_bio_integrity helpers

Add a set of helpers for file system initiated integrity information.
These include mempool backed allocations and verifying based on a passed
in sector and size which is often available from file system completion
routines.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Makefile                |  2 +-
 block/bio-integrity-fs.c      | 81 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/bio-integrity.h |  6 ++++
 3 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 block/bio-integrity-fs.c

(limited to 'include/linux')

diff --git a/block/Makefile b/block/Makefile
index c65f4da93702..7dce2e44276c 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -26,7 +26,7 @@ bfq-y				:= bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
 obj-$(CONFIG_IOSCHED_BFQ)	+= bfq.o
 
 obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o \
-				   bio-integrity-auto.o
+				   bio-integrity-auto.o bio-integrity-fs.o
 obj-$(CONFIG_BLK_DEV_ZONED)	+= blk-zoned.o
 obj-$(CONFIG_BLK_WBT)		+= blk-wbt.o
 obj-$(CONFIG_BLK_DEBUG_FS)	+= blk-mq-debugfs.o
diff --git a/block/bio-integrity-fs.c b/block/bio-integrity-fs.c
new file mode 100644
index 000000000000..acb1e5f270d2
--- /dev/null
+++ b/block/bio-integrity-fs.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Christoph Hellwig.
+ */
+#include <linux/blk-integrity.h>
+#include <linux/bio-integrity.h>
+#include "blk.h"
+
+struct fs_bio_integrity_buf {
+	struct bio_integrity_payload	bip;
+	struct bio_vec			bvec;
+};
+
+static struct kmem_cache *fs_bio_integrity_cache;
+static mempool_t fs_bio_integrity_pool;
+
+unsigned int fs_bio_integrity_alloc(struct bio *bio)
+{
+	struct fs_bio_integrity_buf *iib;
+	unsigned int action;
+
+	action = bio_integrity_action(bio);
+	if (!action)
+		return 0;
+
+	iib = mempool_alloc(&fs_bio_integrity_pool, GFP_NOIO);
+	bio_integrity_init(bio, &iib->bip, &iib->bvec, 1);
+
+	bio_integrity_alloc_buf(bio, action & BI_ACT_ZERO);
+	if (action & BI_ACT_CHECK)
+		bio_integrity_setup_default(bio);
+	return action;
+}
+
+void fs_bio_integrity_free(struct bio *bio)
+{
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+
+	bio_integrity_free_buf(bip);
+	mempool_free(container_of(bip, struct fs_bio_integrity_buf, bip),
+			&fs_bio_integrity_pool);
+
+	bio->bi_integrity = NULL;
+	bio->bi_opf &= ~REQ_INTEGRITY;
+}
+
+void fs_bio_integrity_generate(struct bio *bio)
+{
+	if (fs_bio_integrity_alloc(bio))
+		bio_integrity_generate(bio);
+}
+EXPORT_SYMBOL_GPL(fs_bio_integrity_generate);
+
+int fs_bio_integrity_verify(struct bio *bio, sector_t sector, unsigned int size)
+{
+	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+
+	/*
+	 * Reinitialize bip->bip_iter.
+	 *
+	 * This is for use in the submitter after the driver is done with the
+	 * bio.  Requires the submitter to remember the sector and the size.
+	 */
+	memset(&bip->bip_iter, 0, sizeof(bip->bip_iter));
+	bip->bip_iter.bi_sector = sector;
+	bip->bip_iter.bi_size = bio_integrity_bytes(bi, size >> SECTOR_SHIFT);
+	return blk_status_to_errno(bio_integrity_verify(bio, &bip->bip_iter));
+}
+
+static int __init fs_bio_integrity_init(void)
+{
+	fs_bio_integrity_cache = kmem_cache_create("fs_bio_integrity",
+			sizeof(struct fs_bio_integrity_buf), 0,
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+	if (mempool_init_slab_pool(&fs_bio_integrity_pool, BIO_POOL_SIZE,
+			fs_bio_integrity_cache))
+		panic("fs_bio_integrity: can't create pool\n");
+	return 0;
+}
+fs_initcall(fs_bio_integrity_init);
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index 232b86b9bbcb..af5178434ec6 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -145,4 +145,10 @@ void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer);
 void bio_integrity_free_buf(struct bio_integrity_payload *bip);
 void bio_integrity_setup_default(struct bio *bio);
 
+unsigned int fs_bio_integrity_alloc(struct bio *bio);
+void fs_bio_integrity_free(struct bio *bio);
+void fs_bio_integrity_generate(struct bio *bio);
+int fs_bio_integrity_verify(struct bio *bio, sector_t sector,
+		unsigned int size);
+
 #endif /* _LINUX_BIO_INTEGRITY_H */
-- 
cgit v1.2.3


From a9aa6045abde87b94168c3ba034b953417e27272 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 23 Feb 2026 05:20:07 -0800
Subject: block: pass a maxlen argument to bio_iov_iter_bounce

Allow the file system to limit the size processed in a single
bounce operation.  This is needed when generating integrity data
so that the size of a single integrity segment can't overflow.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c          | 17 ++++++++++-------
 fs/iomap/direct-io.c |  2 +-
 include/linux/bio.h  |  2 +-
 3 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index d80d5d26804e..784d2a66d3ae 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1327,9 +1327,10 @@ static void bio_free_folios(struct bio *bio)
 	}
 }
 
-static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter)
+static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter,
+		size_t maxlen)
 {
-	size_t total_len = iov_iter_count(iter);
+	size_t total_len = min(maxlen, iov_iter_count(iter));
 
 	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
 		return -EINVAL;
@@ -1367,9 +1368,10 @@ static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter)
 	return 0;
 }
 
-static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter)
+static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter,
+		size_t maxlen)
 {
-	size_t len = min(iov_iter_count(iter), SZ_1M);
+	size_t len = min3(iov_iter_count(iter), maxlen, SZ_1M);
 	struct folio *folio;
 
 	folio = folio_alloc_greedy(GFP_KERNEL, &len);
@@ -1408,6 +1410,7 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter)
  * bio_iov_iter_bounce - bounce buffer data from an iter into a bio
  * @bio:	bio to send
  * @iter:	iter to read from / write into
+ * @maxlen:	maximum size to bounce
  *
  * Helper for direct I/O implementations that need to bounce buffer because
  * we need to checksum the data or perform other operations that require
@@ -1415,11 +1418,11 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter)
  * copies the data into it.  Needs to be paired with bio_iov_iter_unbounce()
  * called on completion.
  */
-int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter)
+int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen)
 {
 	if (op_is_write(bio_op(bio)))
-		return bio_iov_iter_bounce_write(bio, iter);
-	return bio_iov_iter_bounce_read(bio, iter);
+		return bio_iov_iter_bounce_write(bio, iter, maxlen);
+	return bio_iov_iter_bounce_read(bio, iter, maxlen);
 }
 
 static void bvec_unpin(struct bio_vec *bv, bool mark_dirty)
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 95254aa1b654..21d4fad2eeb8 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -338,7 +338,7 @@ static ssize_t iomap_dio_bio_iter_one(struct iomap_iter *iter,
 	bio->bi_end_io = iomap_dio_bio_end_io;
 
 	if (dio->flags & IOMAP_DIO_BOUNCE)
-		ret = bio_iov_iter_bounce(bio, dio->submit.iter);
+		ret = bio_iov_iter_bounce(bio, dio->submit.iter, BIO_MAX_SIZE);
 	else
 		ret = bio_iov_iter_get_pages(bio, dio->submit.iter,
 					     alignment - 1);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 36a3f2275ecd..9693a0d6fefe 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -474,7 +474,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
 
-int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter);
+int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen);
 void bio_iov_iter_unbounce(struct bio *bio, bool is_error, bool mark_dirty);
 
 extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
-- 
cgit v1.2.3


From b9e0180b2e6a48532eb80e5cd8793157196586cf Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Mar 2026 15:14:43 +0100
Subject: fbdev: Declare src parameter of fb_pad_ helpers as constant

Fbdev's padding helpers do not modify the source buffer. Declare the
parameter as 'const'.

Fbcon's font-rendering code calls these helpers with the font data.
Declaring src as const will allow for making the font data constant
as well.

While at it, also remove the extern qualifier from the function
declarations in the header file.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/core/fbmem.c |  6 +++---
 include/linux/fb.h               | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index cf199038f069..30f2b59c47bf 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -91,14 +91,14 @@ EXPORT_SYMBOL(fb_get_color_depth);
 /*
  * Data padding functions.
  */
-void fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch, u32 height)
+void fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, const u8 *src, u32 s_pitch, u32 height)
 {
 	__fb_pad_aligned_buffer(dst, d_pitch, src, s_pitch, height);
 }
 EXPORT_SYMBOL(fb_pad_aligned_buffer);
 
-void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx, u32 height,
-				u32 shift_high, u32 shift_low, u32 mod)
+void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, const u8 *src, u32 idx, u32 height,
+			     u32 shift_high, u32 shift_low, u32 mod)
 {
 	u8 mask = (u8) (0xff << shift_high), tmp;
 	int i, j;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index b27943719fab..5178a33c752c 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -602,9 +602,9 @@ extern int register_framebuffer(struct fb_info *fb_info);
 extern void unregister_framebuffer(struct fb_info *fb_info);
 extern int devm_register_framebuffer(struct device *dev, struct fb_info *fb_info);
 extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size);
-extern void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx,
-				u32 height, u32 shift_high, u32 shift_low, u32 mod);
-extern void fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch, u32 height);
+void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, const u8 *src, u32 idx, u32 height,
+			     u32 shift_high, u32 shift_low, u32 mod);
+void fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, const u8 *src, u32 s_pitch, u32 height);
 extern void fb_set_suspend(struct fb_info *info, int state);
 extern int fb_get_color_depth(struct fb_var_screeninfo *var,
 			      struct fb_fix_screeninfo *fix);
@@ -630,8 +630,8 @@ static inline struct device *dev_of_fbinfo(const struct fb_info *info)
 #endif
 }
 
-static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch,
-					   u8 *src, u32 s_pitch, u32 height)
+static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, const u8 *src, u32 s_pitch,
+					   u32 height)
 {
 	u32 i, j;
 
-- 
cgit v1.2.3


From 982f8b002aadef2b5169147b3a60a9eb62f908df Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Mar 2026 15:14:44 +0100
Subject: vt: Remove trailing whitespaces

Fix coding style. No functional changes.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 include/linux/console_struct.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index 13b35637bd5a..ebdb9750d348 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -120,7 +120,7 @@ struct vc_data {
 	unsigned short	vc_complement_mask;	/* [#] Xor mask for mouse pointer */
 	unsigned short	vc_s_complement_mask;	/* Saved mouse pointer mask */
 	unsigned long	vc_pos;			/* Cursor address */
-	/* fonts */	
+	/* fonts */
 	unsigned short	vc_hi_font_mask;	/* [#] Attribute set for upper 256 chars of font or 0 if not supported */
 	struct console_font vc_font;		/* Current VC font set */
 	unsigned short	vc_video_erase_char;	/* Background erase character */
-- 
cgit v1.2.3


From 61912c607fa9955dcf3fc018b227baa98a6776dc Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Mar 2026 15:14:45 +0100
Subject: vt: Store font in struct vc_font

Replace struct console_font with struct vc_font for the type of the
vc_font field of struct vc_data. Struct console_font is UAPI, which
prevents further changes. Hence a new data type is required.

Struct console_font has a documented vertical pitch of 32 bytes. This
is not the case after the font data has been loaded into the kernel.
Changing the type of vc_font addresses this inconsistency.

The font data is now declared as constant, as it might come from the
kernel's read-only section. There's some fallout throughout the console
code where non-const variables refer to it. Fix them. A later update
will declare the font data to a dedicated data type.

v3:
- fix typos

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/core/bitblit.c | 11 +++++------
 drivers/video/fbdev/core/fbcon.c   |  4 ++--
 drivers/video/fbdev/core/fbcon.h   |  4 ++--
 include/linux/console_struct.h     | 29 +++++++++++++++++++++++++++--
 4 files changed, 36 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/bitblit.c b/drivers/video/fbdev/core/bitblit.c
index 085ffb44c51a..7478accea8ec 100644
--- a/drivers/video/fbdev/core/bitblit.c
+++ b/drivers/video/fbdev/core/bitblit.c
@@ -22,8 +22,7 @@
 /*
  * Accelerated handlers.
  */
-static void update_attr(u8 *dst, u8 *src, int attribute,
-			       struct vc_data *vc)
+static void update_attr(u8 *dst, const u8 *src, int attribute, struct vc_data *vc)
 {
 	int i, offset = (vc->vc_font.height < 10) ? 1 : 2;
 	int width = DIV_ROUND_UP(vc->vc_font.width, 8);
@@ -81,7 +80,7 @@ static inline void bit_putcs_aligned(struct vc_data *vc, struct fb_info *info,
 	u16 charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff;
 	unsigned int charcnt = vc->vc_font.charcount;
 	u32 idx = vc->vc_font.width >> 3;
-	u8 *src;
+	const u8 *src;
 
 	while (cnt--) {
 		u16 ch = scr_readw(s++) & charmask;
@@ -120,7 +119,7 @@ static inline void bit_putcs_unaligned(struct vc_data *vc,
 	u32 shift_low = 0, mod = vc->vc_font.width % 8;
 	u32 shift_high = 8;
 	u32 idx = vc->vc_font.width >> 3;
-	u8 *src;
+	const u8 *src;
 
 	while (cnt--) {
 		u16 ch = scr_readw(s++) & charmask;
@@ -267,7 +266,7 @@ static void bit_cursor(struct vc_data *vc, struct fb_info *info, bool enable,
 	int y = real_y(par->p, vc->state.y);
 	int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
 	int err = 1;
-	char *src;
+	const u8 *src;
 
 	cursor.set = 0;
 
@@ -278,7 +277,7 @@ static void bit_cursor(struct vc_data *vc, struct fb_info *info, bool enable,
 	attribute = get_attribute(info, c);
 	src = vc->vc_font.data + ((c & charmask) * (w * vc->vc_font.height));
 
-	if (par->cursor_state.image.data != src ||
+	if (par->cursor_state.image.data != (const char *)src ||
 	    par->cursor_reset) {
 		par->cursor_state.image.data = src;
 		cursor.set |= FB_CUR_SETIMAGE;
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 666261ae59d8..247bb90c08d3 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2286,7 +2286,7 @@ static bool fbcon_blank(struct vc_data *vc, enum vesa_blank_mode blank,
 
 static int fbcon_get_font(struct vc_data *vc, struct console_font *font, unsigned int vpitch)
 {
-	u8 *fontdata = vc->vc_font.data;
+	const u8 *fontdata = vc->vc_font.data;
 	u8 *data = font->data;
 	int i, j;
 
@@ -2417,7 +2417,7 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
 	struct fbcon_par *par = info->fbcon_par;
 	struct fbcon_display *p = &fb_display[vc->vc_num];
 	int resize, ret, old_userfont, old_width, old_height, old_charcount;
-	u8 *old_data = vc->vc_font.data;
+	const u8 *old_data = vc->vc_font.data;
 
 	resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
 	vc->vc_font.data = (void *)(p->fontdata = data);
diff --git a/drivers/video/fbdev/core/fbcon.h b/drivers/video/fbdev/core/fbcon.h
index fca14e9b729b..3f4386a40237 100644
--- a/drivers/video/fbdev/core/fbcon.h
+++ b/drivers/video/fbdev/core/fbcon.h
@@ -82,8 +82,8 @@ struct fbcon_par {
 	int    rotate;
 	int    cur_rotate;
 	char  *cursor_data;
-	u8    *fontbuffer;
-	u8    *fontdata;
+	u8          *fontbuffer;
+	const u8    *fontdata;
 	u8    *cursor_src;
 	u32    cursor_size;
 	u32    fd_size;
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index ebdb9750d348..ea0cdf4278a3 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -13,8 +13,9 @@
 #ifndef _LINUX_CONSOLE_STRUCT_H
 #define _LINUX_CONSOLE_STRUCT_H
 
-#include <linux/wait.h>
+#include <linux/math.h>
 #include <linux/vt.h>
+#include <linux/wait.h>
 #include <linux/workqueue.h>
 
 struct uni_pagedict;
@@ -58,6 +59,30 @@ struct vc_state {
 	bool		reverse;
 };
 
+/**
+ * struct vc_font - Describes a font
+ * @width: The width of a single glyph in bits
+ * @height: The height of a single glyph in scanlines
+ * @charcount: The number of glyphs in the font
+ * @data: The raw font data
+ *
+ * Font data is organized as an array of glyphs. Each glyph is a bitmap with
+ * set bits indicating the foreground color. Unset bits indicate background
+ * color. The fields @width and @height store a single glyph's number of
+ * horizontal bits and vertical scanlines. If width is not a multiple of 8,
+ * there are trailing bits to fill up the byte. These bits should not be drawn.
+ *
+ * The field @data points to the first glyph's first byte. The value @charcount
+ * gives the number of glyphs in the font. There are no empty scanlines between
+ * two adjacent glyphs.
+ */
+struct vc_font {
+	unsigned int width;
+	unsigned int height;
+	unsigned int charcount;
+	const unsigned char *data;
+};
+
 /*
  * Example: vc_data of a console that was scrolled 3 lines down.
  *
@@ -122,7 +147,7 @@ struct vc_data {
 	unsigned long	vc_pos;			/* Cursor address */
 	/* fonts */
 	unsigned short	vc_hi_font_mask;	/* [#] Attribute set for upper 256 chars of font or 0 if not supported */
-	struct console_font vc_font;		/* Current VC font set */
+	struct vc_font vc_font;			/* Current VC font set */
 	unsigned short	vc_video_erase_char;	/* Background erase character */
 	/* VT terminal data */
 	unsigned int	vc_state;		/* Escape sequence parser state */
-- 
cgit v1.2.3


From e370d84b79ad28ecf9a9e1dad967aa64dbfbd8d8 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Mar 2026 15:14:46 +0100
Subject: vt: Calculate font-buffer size with vc_font_size()

In fbcon, fbcon_resize() computes the size of the font buffer from the
values stored in vc_font. Move these calculations to the dedicated helpers
vc_font_pitch() and vc_font_size().

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/core/fbcon.c |  9 ++-------
 include/linux/console_struct.h   | 28 ++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 247bb90c08d3..103e91c8d874 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2037,7 +2037,6 @@ static void updatescrollmode(struct fbcon_display *p,
 }
 
 #define PITCH(w) (((w) + 7) >> 3)
-#define CALC_FONTSZ(h, p, c) ((h) * (p) * (c)) /* size = height * pitch * charcount */
 
 static int fbcon_resize(struct vc_data *vc, unsigned int width,
 			unsigned int height, bool from_user)
@@ -2049,8 +2048,7 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,
 	int x_diff, y_diff, virt_w, virt_h, virt_fw, virt_fh;
 
 	if (p->userfont && FNTSIZE(vc->vc_font.data)) {
-		int size;
-		int pitch = PITCH(vc->vc_font.width);
+		unsigned int size = vc_font_size(&vc->vc_font);
 
 		/*
 		 * If user font, ensure that a possible change to user font
@@ -2059,10 +2057,7 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,
 		 * charcount can change and cannot be used to determine the
 		 * font data allocated size.
 		 */
-		if (pitch <= 0)
-			return -EINVAL;
-		size = CALC_FONTSZ(vc->vc_font.height, pitch, vc->vc_font.charcount);
-		if (size > FNTSIZE(vc->vc_font.data))
+		if (!size || size > FNTSIZE(vc->vc_font.data))
 			return -EINVAL;
 	}
 
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index ea0cdf4278a3..771cba16cb54 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -83,6 +83,34 @@ struct vc_font {
 	const unsigned char *data;
 };
 
+/**
+ * vc_font_pitch - Calculates the number of bytes between two adjacent scanlines
+ * @font: The VC font
+ *
+ * Returns:
+ * The number of bytes between two adjacent scanlines in the font data
+ */
+static inline unsigned int vc_font_pitch(const struct vc_font *font)
+{
+	return DIV_ROUND_UP(font->width, 8);
+}
+
+/**
+ * vc_font_size - Calculates the size of the font data in bytes
+ * @font: The VC font
+ *
+ * vc_font_size() calculates the number of bytes of font data in the
+ * font specified by @font. The function calculates the size from the
+ * font parameters.
+ *
+ * Returns:
+ * The size of the font data in bytes.
+ */
+static inline unsigned int vc_font_size(const struct vc_font *font)
+{
+	return font->height * vc_font_pitch(font) * font->charcount;
+}
+
 /*
  * Example: vc_data of a console that was scrolled 3 lines down.
  *
-- 
cgit v1.2.3


From 773ac24c44614ad1d5a96258534160c4a0d48d72 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Mar 2026 15:14:48 +0100
Subject: lib/fonts: Remove FNTCHARCNT()

The character count in the font data is unused. The internal fonts also
do not set it. Remove FNTCHARCNT().

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/console/newport_con.c | 1 -
 include/linux/font.h                | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c
index 337e04236d6d..259a63fc7789 100644
--- a/drivers/video/console/newport_con.c
+++ b/drivers/video/console/newport_con.c
@@ -516,7 +516,6 @@ static int newport_set_font(int unit, const struct console_font *op,
 
 	new_data += FONT_EXTRA_WORDS * sizeof(int);
 	FNTSIZE(new_data) = size;
-	FNTCHARCNT(new_data) = op->charcount;
 	REFCOUNT(new_data) = 0;	/* usage counter */
 	FNTSUM(new_data) = 0;
 
diff --git a/include/linux/font.h b/include/linux/font.h
index fd8625cd76b2..d929c5fa32ca 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -68,7 +68,6 @@ extern const struct font_desc *get_default_font(int xres, int yres,
 /* Extra word getters */
 #define REFCOUNT(fd)	(((int *)(fd))[-1])
 #define FNTSIZE(fd)	(((int *)(fd))[-2])
-#define FNTCHARCNT(fd)	(((int *)(fd))[-3])
 #define FNTSUM(fd)	(((int *)(fd))[-4])
 
 #define FONT_EXTRA_WORDS 4
-- 
cgit v1.2.3


From 04bd5abc8cbebc1bd7e02471a8e3af51b8aad029 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Mar 2026 15:14:49 +0100
Subject: lib/fonts: Store font data as font_data_t; update consoles

Store font data as pointer to font_data_t instead of unsigned char.
Update consoles.

Pointers to font data refer to the raw data. There is a hidden header
before the data that contains additional state. Document the existing
layout and semantics of font_data_t.

The data field in struct vc_font can be used by any console. Therefore
it still points to plain data without the additional header. Fbcon sets
its value from struct fbcon_display.fontdata. Hence, update the size
test in fbcon_resize() to use struct fbcon_display.fontdata instead of
struct vc_font.data.

v3:
- fix typos (Helge)
v2:
- 'Font lookup' -> 'Font description' in <linux/font.h>

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/console/newport_con.c | 17 +++++++-------
 drivers/video/fbdev/core/fbcon.c    | 44 ++++++++++++++++++++--------------
 drivers/video/fbdev/core/fbcon.h    |  3 ++-
 include/linux/font.h                | 47 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 84 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c
index 259a63fc7789..857b85df360c 100644
--- a/drivers/video/console/newport_con.c
+++ b/drivers/video/console/newport_con.c
@@ -33,9 +33,9 @@
 
 #define NEWPORT_LEN	0x10000
 
-#define FONT_DATA ((unsigned char *)font_vga_8x16.data)
+#define FONT_DATA font_vga_8x16.data
 
-static unsigned char *font_data[MAX_NR_CONSOLES];
+static font_data_t *font_data[MAX_NR_CONSOLES];
 
 static struct newport_regs *npregs;
 static unsigned long newport_addr;
@@ -370,9 +370,9 @@ static void newport_clear(struct vc_data *vc, unsigned int sy, unsigned int sx,
 static void newport_putc(struct vc_data *vc, u16 charattr, unsigned int ypos,
 			 unsigned int xpos)
 {
-	unsigned char *p;
+	const unsigned char *p;
 
-	p = &font_data[vc->vc_num][(charattr & 0xff) << 4];
+	p = &font_data_buf(font_data[vc->vc_num])[(charattr & 0xff) << 4];
 	charattr = (charattr >> 8) & 0xff;
 	xpos <<= 3;
 	ypos <<= 4;
@@ -400,7 +400,7 @@ static void newport_putcs(struct vc_data *vc, const u16 *s,
 			  unsigned int count, unsigned int ypos,
 			  unsigned int xpos)
 {
-	unsigned char *p;
+	const unsigned char *p;
 	unsigned int i;
 	u16 charattr;
 
@@ -424,7 +424,7 @@ static void newport_putcs(struct vc_data *vc, const u16 *s,
 				 NPORT_DMODE0_L32);
 
 	for (i = 0; i < count; i++, xpos += 8) {
-		p = &font_data[vc->vc_num][(scr_readw(s++) & 0xff) << 4];
+		p = &font_data_buf(font_data[vc->vc_num])[(scr_readw(s++) & 0xff) << 4];
 
 		newport_wait(npregs);
 
@@ -503,7 +503,8 @@ static int newport_set_font(int unit, const struct console_font *op,
 	int h = op->height;
 	int size = h * op->charcount;
 	int i;
-	unsigned char *new_data, *data = op->data, *p;
+	font_data_t *new_data;
+	unsigned char *data = op->data, *p;
 
 	/* ladis: when I grow up, there will be a day... and more sizes will
 	 * be supported ;-) */
@@ -519,7 +520,7 @@ static int newport_set_font(int unit, const struct console_font *op,
 	REFCOUNT(new_data) = 0;	/* usage counter */
 	FNTSUM(new_data) = 0;
 
-	p = new_data;
+	p = (unsigned char *)font_data_buf(new_data);
 	for (i = 0; i < op->charcount; i++) {
 		memcpy(p, data, h);
 		data += 32;
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 103e91c8d874..8d7840b9ebad 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -1019,8 +1019,10 @@ static const char *fbcon_startup(void)
 						info->pixmap.blit_y);
 		vc->vc_font.width = font->width;
 		vc->vc_font.height = font->height;
-		vc->vc_font.data = (void *)(p->fontdata = font->data);
+		vc->vc_font.data = font_data_buf(font->data);
 		vc->vc_font.charcount = font->charcount;
+
+		p->fontdata = font->data;
 	}
 
 	cols = FBCON_SWAP(par->rotate, info->var.xres, info->var.yres);
@@ -1078,11 +1080,12 @@ static void fbcon_init(struct vc_data *vc, bool init)
 		if (t->fontdata) {
 			struct vc_data *fvc = vc_cons[fg_console].d;
 
-			vc->vc_font.data = (void *)(p->fontdata =
-						    fvc->vc_font.data);
+			vc->vc_font.data = fvc->vc_font.data;
 			vc->vc_font.width = fvc->vc_font.width;
 			vc->vc_font.height = fvc->vc_font.height;
 			vc->vc_font.charcount = fvc->vc_font.charcount;
+
+			p->fontdata = t->fontdata;
 			p->userfont = t->userfont;
 
 			if (p->userfont)
@@ -1097,8 +1100,10 @@ static void fbcon_init(struct vc_data *vc, bool init)
 							info->pixmap.blit_y);
 			vc->vc_font.width = font->width;
 			vc->vc_font.height = font->height;
-			vc->vc_font.data = (void *)(p->fontdata = font->data);
+			vc->vc_font.data = font_data_buf(font->data);
 			vc->vc_font.charcount = font->charcount;
+
+			p->fontdata = font->data;
 		}
 	}
 
@@ -1409,11 +1414,12 @@ static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var,
 	svc = *default_mode;
 	t = &fb_display[svc->vc_num];
 
-	if (!vc->vc_font.data) {
-		vc->vc_font.data = (void *)(p->fontdata = t->fontdata);
+	if (!p->fontdata) {
+		vc->vc_font.data = font_data_buf(t->fontdata);
 		vc->vc_font.width = (*default_mode)->vc_font.width;
 		vc->vc_font.height = (*default_mode)->vc_font.height;
 		vc->vc_font.charcount = (*default_mode)->vc_font.charcount;
+		p->fontdata = t->fontdata;
 		p->userfont = t->userfont;
 		if (p->userfont)
 			REFCOUNT(p->fontdata)++;
@@ -2047,7 +2053,7 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,
 	struct fb_var_screeninfo var = info->var;
 	int x_diff, y_diff, virt_w, virt_h, virt_fw, virt_fh;
 
-	if (p->userfont && FNTSIZE(vc->vc_font.data)) {
+	if (p->userfont && FNTSIZE(p->fontdata)) {
 		unsigned int size = vc_font_size(&vc->vc_font);
 
 		/*
@@ -2057,7 +2063,7 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,
 		 * charcount can change and cannot be used to determine the
 		 * font data allocated size.
 		 */
-		if (!size || size > FNTSIZE(vc->vc_font.data))
+		if (!size || size > FNTSIZE(p->fontdata))
 			return -EINVAL;
 	}
 
@@ -2281,7 +2287,8 @@ static bool fbcon_blank(struct vc_data *vc, enum vesa_blank_mode blank,
 
 static int fbcon_get_font(struct vc_data *vc, struct console_font *font, unsigned int vpitch)
 {
-	const u8 *fontdata = vc->vc_font.data;
+	struct fbcon_display *p = &fb_display[vc->vc_num];
+	font_data_t *fontdata = p->fontdata;
 	u8 *data = font->data;
 	int i, j;
 
@@ -2406,16 +2413,18 @@ static void set_vc_hi_font(struct vc_data *vc, bool set)
 }
 
 static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
-			     const u8 * data, int userfont)
+			     font_data_t *data, int userfont)
 {
 	struct fb_info *info = fbcon_info_from_console(vc->vc_num);
 	struct fbcon_par *par = info->fbcon_par;
 	struct fbcon_display *p = &fb_display[vc->vc_num];
 	int resize, ret, old_userfont, old_width, old_height, old_charcount;
+	font_data_t *old_fontdata = p->fontdata;
 	const u8 *old_data = vc->vc_font.data;
 
 	resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
-	vc->vc_font.data = (void *)(p->fontdata = data);
+	p->fontdata = data;
+	vc->vc_font.data = font_data_buf(p->fontdata);
 	old_userfont = p->userfont;
 	if ((p->userfont = userfont))
 		REFCOUNT(data)++;
@@ -2448,12 +2457,12 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
 		update_screen(vc);
 	}
 
-	if (old_userfont && (--REFCOUNT(old_data) == 0))
-		kfree(old_data - FONT_EXTRA_WORDS * sizeof(int));
+	if (old_userfont && (--REFCOUNT(old_fontdata) == 0))
+		kfree(old_fontdata - FONT_EXTRA_WORDS * sizeof(int));
 	return 0;
 
 err_out:
-	p->fontdata = old_data;
+	p->fontdata = old_fontdata;
 	vc->vc_font.data = old_data;
 
 	if (userfont) {
@@ -2483,7 +2492,8 @@ static int fbcon_set_font(struct vc_data *vc, const struct console_font *font,
 	int h = font->height;
 	int size, alloc_size;
 	int i, csum;
-	u8 *new_data, *data = font->data;
+	font_data_t *new_data;
+	u8 *data = font->data;
 	int pitch = PITCH(font->width);
 
 	/* Is there a reason why fbconsole couldn't handle any charcount >256?
@@ -2522,13 +2532,13 @@ static int fbcon_set_font(struct vc_data *vc, const struct console_font *font,
 	if (!new_data)
 		return -ENOMEM;
 
-	memset(new_data, 0, FONT_EXTRA_WORDS * sizeof(int));
+	memset((u8 *)new_data, 0, FONT_EXTRA_WORDS * sizeof(int));
 
 	new_data += FONT_EXTRA_WORDS * sizeof(int);
 	FNTSIZE(new_data) = size;
 	REFCOUNT(new_data) = 0;	/* usage counter */
 	for (i=0; i< charcount; i++) {
-		memcpy(new_data + i*h*pitch, data +  i*vpitch*pitch, h*pitch);
+		memcpy((u8 *)new_data + i * h * pitch, data + i * vpitch * pitch, h * pitch);
 	}
 
 	/* Since linux has a nice crc32 function use it for counting font
diff --git a/drivers/video/fbdev/core/fbcon.h b/drivers/video/fbdev/core/fbcon.h
index 3f4386a40237..d26ee7860cf5 100644
--- a/drivers/video/fbdev/core/fbcon.h
+++ b/drivers/video/fbdev/core/fbcon.h
@@ -11,6 +11,7 @@
 #ifndef _VIDEO_FBCON_H
 #define _VIDEO_FBCON_H
 
+#include <linux/font.h>
 #include <linux/types.h>
 #include <linux/vt_buffer.h>
 #include <linux/vt_kern.h>
@@ -25,7 +26,7 @@
 
 struct fbcon_display {
     /* Filled in by the low-level console driver */
-    const u_char *fontdata;
+    font_data_t *fontdata;
     int userfont;                   /* != 0 if fontdata kmalloc()ed */
 #ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
     u_short scrollmode;             /* Scroll Method, use fb_scrollmode() */
diff --git a/include/linux/font.h b/include/linux/font.h
index d929c5fa32ca..746a0996a018 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -13,12 +13,57 @@
 
 #include <linux/types.h>
 
+/*
+ * font_data_t and helpers
+ */
+
+/**
+ * font_data_t - Raw font data
+ *
+ * Values of type font_data_t store a pointer to raw font data. The format
+ * is monochrome. Each bit sets a pixel of a stored glyph. Font data does
+ * not store geometry information for the individual glyphs. Users of the
+ * font have to store glyph size, pitch and character count separately.
+ *
+ * Font data in font_data_t is not equivalent to raw u8. Each pointer stores
+ * an additional hidden header before the font data. The layout is
+ *
+ * +------+-----------------------------+
+ * | -16  |  CRC32 Checksum (optional)  |
+ * | -12  |  <Unused>                   |
+ * |  -8  |  Number of data bytes       |
+ * |  -4  |  Reference count            |
+ * +------+-----------------------------+
+ * |   0  |  Data buffer                |
+ * |  ... |                             |
+ * +------+-----------------------------+
+ *
+ * Use helpers to access font_data_t. Use font_data_buf() to get the stored data.
+ */
+typedef const unsigned char font_data_t;
+
+/**
+ * font_data_buf() - Returns the font data as raw bytes
+ * @fd: The font data
+ *
+ * Returns:
+ * The raw font data. The provided buffer is read-only.
+ */
+static inline const unsigned char *font_data_buf(font_data_t *fd)
+{
+	return (const unsigned char *)fd;
+}
+
+/*
+ * Font description
+ */
+
 struct font_desc {
     int idx;
     const char *name;
     unsigned int width, height;
     unsigned int charcount;
-    const void *data;
+    font_data_t *data;
     int pref;
 };
 
-- 
cgit v1.2.3


From e2e000a0b22036a72474088d3399097ff47ace02 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Mar 2026 15:14:50 +0100
Subject: lib/fonts: Read font size with font_data_size()

Add font_data_size() and update consoles to use it.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/console/newport_con.c |  2 +-
 drivers/video/fbdev/core/fbcon.c    | 14 +++++++-------
 include/linux/font.h                |  2 ++
 lib/fonts/fonts.c                   | 21 +++++++++++++++++++++
 4 files changed, 31 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c
index 857b85df360c..00ace2940aa0 100644
--- a/drivers/video/console/newport_con.c
+++ b/drivers/video/console/newport_con.c
@@ -530,7 +530,7 @@ static int newport_set_font(int unit, const struct console_font *op,
 	/* check if font is already used by other console */
 	for (i = 0; i < MAX_NR_CONSOLES; i++) {
 		if (font_data[i] != FONT_DATA
-		    && FNTSIZE(font_data[i]) == size
+		    && font_data_size(font_data[i]) == size
 		    && !memcmp(font_data[i], new_data, size)) {
 			kfree(new_data - FONT_EXTRA_WORDS * sizeof(int));
 			/* current font is the same as the new one */
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 8d7840b9ebad..fa8f3e4196de 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2053,7 +2053,7 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,
 	struct fb_var_screeninfo var = info->var;
 	int x_diff, y_diff, virt_w, virt_h, virt_fw, virt_fh;
 
-	if (p->userfont && FNTSIZE(p->fontdata)) {
+	if (p->userfont && font_data_size(p->fontdata)) {
 		unsigned int size = vc_font_size(&vc->vc_font);
 
 		/*
@@ -2063,7 +2063,7 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,
 		 * charcount can change and cannot be used to determine the
 		 * font data allocated size.
 		 */
-		if (!size || size > FNTSIZE(p->fontdata))
+		if (!size || size > font_data_size(p->fontdata))
 			return -EINVAL;
 	}
 
@@ -2302,7 +2302,7 @@ static int fbcon_get_font(struct vc_data *vc, struct console_font *font, unsigne
 
 	if (font->width <= 8) {
 		j = vc->vc_font.height;
-		if (font->charcount * j > FNTSIZE(fontdata))
+		if (font->charcount * j > font_data_size(fontdata))
 			return -EINVAL;
 
 		for (i = 0; i < font->charcount; i++) {
@@ -2313,7 +2313,7 @@ static int fbcon_get_font(struct vc_data *vc, struct console_font *font, unsigne
 		}
 	} else if (font->width <= 16) {
 		j = vc->vc_font.height * 2;
-		if (font->charcount * j > FNTSIZE(fontdata))
+		if (font->charcount * j > font_data_size(fontdata))
 			return -EINVAL;
 
 		for (i = 0; i < font->charcount; i++) {
@@ -2323,7 +2323,7 @@ static int fbcon_get_font(struct vc_data *vc, struct console_font *font, unsigne
 			fontdata += j;
 		}
 	} else if (font->width <= 24) {
-		if (font->charcount * (vc->vc_font.height * sizeof(u32)) > FNTSIZE(fontdata))
+		if (font->charcount * (vc->vc_font.height * sizeof(u32)) > font_data_size(fontdata))
 			return -EINVAL;
 
 		for (i = 0; i < font->charcount; i++) {
@@ -2338,7 +2338,7 @@ static int fbcon_get_font(struct vc_data *vc, struct console_font *font, unsigne
 		}
 	} else {
 		j = vc->vc_font.height * 4;
-		if (font->charcount * j > FNTSIZE(fontdata))
+		if (font->charcount * j > font_data_size(fontdata))
 			return -EINVAL;
 
 		for (i = 0; i < font->charcount; i++) {
@@ -2553,7 +2553,7 @@ static int fbcon_set_font(struct vc_data *vc, const struct console_font *font,
 		if (fb_display[i].userfont &&
 		    fb_display[i].fontdata &&
 		    FNTSUM(fb_display[i].fontdata) == csum &&
-		    FNTSIZE(fb_display[i].fontdata) == size &&
+		    font_data_size(fb_display[i].fontdata) == size &&
 		    tmp->vc_font.width == w &&
 		    !memcmp(fb_display[i].fontdata, new_data, size)) {
 			kfree(new_data - FONT_EXTRA_WORDS * sizeof(int));
diff --git a/include/linux/font.h b/include/linux/font.h
index 746a0996a018..5b8557813c5c 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -54,6 +54,8 @@ static inline const unsigned char *font_data_buf(font_data_t *fd)
 	return (const unsigned char *)fd;
 }
 
+unsigned int font_data_size(font_data_t *fd);
+
 /*
  * Font description
  */
diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c
index a7f118b30171..8c9a6762061c 100644
--- a/lib/fonts/fonts.c
+++ b/lib/fonts/fonts.c
@@ -20,6 +20,27 @@
 #endif
 #include <linux/font.h>
 
+/*
+ * Helpers for font_data_t
+ */
+
+/**
+ * font_data_size - Return size of the font data in bytes
+ * @fd: Font data
+ *
+ * Returns:
+ * The number of bytes in the given font data.
+ */
+unsigned int font_data_size(font_data_t *fd)
+{
+	return FNTSIZE(fd);
+}
+EXPORT_SYMBOL_GPL(font_data_size);
+
+/*
+ * Font lookup
+ */
+
 static const struct font_desc *fonts[] = {
 #ifdef CONFIG_FONT_8x8
 	&font_vga_8x8,
-- 
cgit v1.2.3


From 1de371b1f1b02dc82da598f9707089229699a604 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Mar 2026 15:14:51 +0100
Subject: lib/fonts: Manage font-data lifetime with font_data_get/_put()

Add font_data_get() and font_data_put(). Update consoles to use them
over REFCOUNT() and plain kfree().

Newly allocated font data starts with a reference count of 1. Loading
the font puts the previously loaded font. If the reference count reaches
zero, font_data_put() frees the font data.

The kernel stores a refcount of zero for internal font data. Invoking
font_data_get() and font_data_put() tests this internally and returns
success without further operation. From the caller's perspective,
getting and putting works the same for all font data.

Fbcon used the userfont flag distinguish between internal fonts and
fonts loaded by user space. Only the latter where refcounted. With the
new helper's automatic handling of internal font data, remove the
userfont flag from fbcon.

Newport_con uses a default font, FONT_DATA, until user space loads
custom font data. Remove all special cases for FONT_DATA, as the get
and put calls' read-only handling also covers this case.

v3:
- fix module linker error wrt font symbols (Nathan, Arnd)
- fix typos

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/console/newport_con.c | 23 ++++-------
 drivers/video/fbdev/core/fbcon.c    | 65 +++++++++++++++----------------
 drivers/video/fbdev/core/fbcon.h    |  1 -
 include/linux/font.h                |  2 +
 lib/fonts/fonts.c                   | 77 ++++++++++++++++++++++++++++++++++++-
 5 files changed, 115 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c
index 00ace2940aa0..dbbb787fc46e 100644
--- a/drivers/video/console/newport_con.c
+++ b/drivers/video/console/newport_con.c
@@ -517,7 +517,7 @@ static int newport_set_font(int unit, const struct console_font *op,
 
 	new_data += FONT_EXTRA_WORDS * sizeof(int);
 	FNTSIZE(new_data) = size;
-	REFCOUNT(new_data) = 0;	/* usage counter */
+	REFCOUNT(new_data) = 1;	/* usage counter */
 	FNTSUM(new_data) = 0;
 
 	p = (unsigned char *)font_data_buf(new_data);
@@ -532,21 +532,17 @@ static int newport_set_font(int unit, const struct console_font *op,
 		if (font_data[i] != FONT_DATA
 		    && font_data_size(font_data[i]) == size
 		    && !memcmp(font_data[i], new_data, size)) {
-			kfree(new_data - FONT_EXTRA_WORDS * sizeof(int));
+			font_data_put(new_data);
 			/* current font is the same as the new one */
 			if (i == unit)
 				return 0;
 			new_data = font_data[i];
+			font_data_get(new_data);
 			break;
 		}
 	}
-	/* old font is user font */
-	if (font_data[unit] != FONT_DATA) {
-		if (--REFCOUNT(font_data[unit]) == 0)
-			kfree(font_data[unit] -
-			      FONT_EXTRA_WORDS * sizeof(int));
-	}
-	REFCOUNT(new_data)++;
+
+	font_data_put(font_data[unit]);
 	font_data[unit] = new_data;
 
 	return 0;
@@ -554,12 +550,9 @@ static int newport_set_font(int unit, const struct console_font *op,
 
 static int newport_set_def_font(int unit, struct console_font *op)
 {
-	if (font_data[unit] != FONT_DATA) {
-		if (--REFCOUNT(font_data[unit]) == 0)
-			kfree(font_data[unit] -
-			      FONT_EXTRA_WORDS * sizeof(int));
-		font_data[unit] = FONT_DATA;
-	}
+	font_data_put(font_data[unit]);
+	font_data[unit] = FONT_DATA;
+	font_data_get(font_data[unit]);
 
 	return 0;
 }
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index fa8f3e4196de..ac59480c98cb 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -1023,6 +1023,7 @@ static const char *fbcon_startup(void)
 		vc->vc_font.charcount = font->charcount;
 
 		p->fontdata = font->data;
+		font_data_get(p->fontdata);
 	}
 
 	cols = FBCON_SWAP(par->rotate, info->var.xres, info->var.yres);
@@ -1086,10 +1087,7 @@ static void fbcon_init(struct vc_data *vc, bool init)
 			vc->vc_font.charcount = fvc->vc_font.charcount;
 
 			p->fontdata = t->fontdata;
-			p->userfont = t->userfont;
-
-			if (p->userfont)
-				REFCOUNT(p->fontdata)++;
+			font_data_get(p->fontdata);
 		} else {
 			const struct font_desc *font = NULL;
 
@@ -1104,6 +1102,7 @@ static void fbcon_init(struct vc_data *vc, bool init)
 			vc->vc_font.charcount = font->charcount;
 
 			p->fontdata = font->data;
+			font_data_get(p->fontdata);
 		}
 	}
 
@@ -1194,10 +1193,10 @@ static void fbcon_init(struct vc_data *vc, bool init)
 
 static void fbcon_free_font(struct fbcon_display *p)
 {
-	if (p->userfont && p->fontdata && (--REFCOUNT(p->fontdata) == 0))
-		kfree(p->fontdata - FONT_EXTRA_WORDS * sizeof(int));
-	p->fontdata = NULL;
-	p->userfont = 0;
+	if (p->fontdata) {
+		font_data_put(p->fontdata);
+		p->fontdata = NULL;
+	}
 }
 
 static void set_vc_hi_font(struct vc_data *vc, bool set);
@@ -1420,9 +1419,7 @@ static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var,
 		vc->vc_font.height = (*default_mode)->vc_font.height;
 		vc->vc_font.charcount = (*default_mode)->vc_font.charcount;
 		p->fontdata = t->fontdata;
-		p->userfont = t->userfont;
-		if (p->userfont)
-			REFCOUNT(p->fontdata)++;
+		font_data_get(p->fontdata);
 	}
 
 	var->activate = FB_ACTIVATE_NOW;
@@ -2053,7 +2050,7 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,
 	struct fb_var_screeninfo var = info->var;
 	int x_diff, y_diff, virt_w, virt_h, virt_fw, virt_fh;
 
-	if (p->userfont && font_data_size(p->fontdata)) {
+	if (font_data_size(p->fontdata)) {
 		unsigned int size = vc_font_size(&vc->vc_font);
 
 		/*
@@ -2413,21 +2410,20 @@ static void set_vc_hi_font(struct vc_data *vc, bool set)
 }
 
 static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
-			     font_data_t *data, int userfont)
+			     font_data_t *data)
 {
 	struct fb_info *info = fbcon_info_from_console(vc->vc_num);
 	struct fbcon_par *par = info->fbcon_par;
 	struct fbcon_display *p = &fb_display[vc->vc_num];
-	int resize, ret, old_userfont, old_width, old_height, old_charcount;
+	int resize, ret, old_width, old_height, old_charcount;
 	font_data_t *old_fontdata = p->fontdata;
 	const u8 *old_data = vc->vc_font.data;
 
+	font_data_get(data);
+
 	resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
 	p->fontdata = data;
 	vc->vc_font.data = font_data_buf(p->fontdata);
-	old_userfont = p->userfont;
-	if ((p->userfont = userfont))
-		REFCOUNT(data)++;
 
 	old_width = vc->vc_font.width;
 	old_height = vc->vc_font.height;
@@ -2457,24 +2453,20 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
 		update_screen(vc);
 	}
 
-	if (old_userfont && (--REFCOUNT(old_fontdata) == 0))
-		kfree(old_fontdata - FONT_EXTRA_WORDS * sizeof(int));
+	if (old_fontdata)
+		font_data_put(old_fontdata);
+
 	return 0;
 
 err_out:
 	p->fontdata = old_fontdata;
 	vc->vc_font.data = old_data;
-
-	if (userfont) {
-		p->userfont = old_userfont;
-		if (--REFCOUNT(data) == 0)
-			kfree(data - FONT_EXTRA_WORDS * sizeof(int));
-	}
-
 	vc->vc_font.width = old_width;
 	vc->vc_font.height = old_height;
 	vc->vc_font.charcount = old_charcount;
 
+	font_data_put(data);
+
 	return ret;
 }
 
@@ -2491,9 +2483,9 @@ static int fbcon_set_font(struct vc_data *vc, const struct console_font *font,
 	int w = font->width;
 	int h = font->height;
 	int size, alloc_size;
-	int i, csum;
+	int i, csum, ret;
 	font_data_t *new_data;
-	u8 *data = font->data;
+	const u8 *data = font->data;
 	int pitch = PITCH(font->width);
 
 	/* Is there a reason why fbconsole couldn't handle any charcount >256?
@@ -2536,7 +2528,7 @@ static int fbcon_set_font(struct vc_data *vc, const struct console_font *font,
 
 	new_data += FONT_EXTRA_WORDS * sizeof(int);
 	FNTSIZE(new_data) = size;
-	REFCOUNT(new_data) = 0;	/* usage counter */
+	REFCOUNT(new_data) = 1;	/* usage counter */
 	for (i=0; i< charcount; i++) {
 		memcpy((u8 *)new_data + i * h * pitch, data + i * vpitch * pitch, h * pitch);
 	}
@@ -2550,18 +2542,21 @@ static int fbcon_set_font(struct vc_data *vc, const struct console_font *font,
 	for (i = first_fb_vc; i <= last_fb_vc; i++) {
 		struct vc_data *tmp = vc_cons[i].d;
 
-		if (fb_display[i].userfont &&
-		    fb_display[i].fontdata &&
+		if (fb_display[i].fontdata &&
 		    FNTSUM(fb_display[i].fontdata) == csum &&
 		    font_data_size(fb_display[i].fontdata) == size &&
 		    tmp->vc_font.width == w &&
 		    !memcmp(fb_display[i].fontdata, new_data, size)) {
-			kfree(new_data - FONT_EXTRA_WORDS * sizeof(int));
-			new_data = (u8 *)fb_display[i].fontdata;
+			font_data_get(fb_display[i].fontdata);
+			font_data_put(new_data);
+			new_data = fb_display[i].fontdata;
 			break;
 		}
 	}
-	return fbcon_do_set_font(vc, font->width, font->height, charcount, new_data, 1);
+	ret = fbcon_do_set_font(vc, font->width, font->height, charcount, new_data);
+	font_data_put(new_data);
+
+	return ret;
 }
 
 static int fbcon_set_def_font(struct vc_data *vc, struct console_font *font,
@@ -2578,7 +2573,7 @@ static int fbcon_set_def_font(struct vc_data *vc, struct console_font *font,
 
 	font->width = f->width;
 	font->height = f->height;
-	return fbcon_do_set_font(vc, f->width, f->height, f->charcount, f->data, 0);
+	return fbcon_do_set_font(vc, f->width, f->height, f->charcount, f->data);
 }
 
 static u16 palette_red[16];
diff --git a/drivers/video/fbdev/core/fbcon.h b/drivers/video/fbdev/core/fbcon.h
index d26ee7860cf5..1e3c1ef84762 100644
--- a/drivers/video/fbdev/core/fbcon.h
+++ b/drivers/video/fbdev/core/fbcon.h
@@ -27,7 +27,6 @@
 struct fbcon_display {
     /* Filled in by the low-level console driver */
     font_data_t *fontdata;
-    int userfont;                   /* != 0 if fontdata kmalloc()ed */
 #ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
     u_short scrollmode;             /* Scroll Method, use fb_scrollmode() */
 #endif
diff --git a/include/linux/font.h b/include/linux/font.h
index 5b8557813c5c..dd319d0f0201 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -54,6 +54,8 @@ static inline const unsigned char *font_data_buf(font_data_t *fd)
 	return (const unsigned char *)fd;
 }
 
+void font_data_get(font_data_t *fd);
+bool font_data_put(font_data_t *fd);
 unsigned int font_data_size(font_data_t *fd);
 
 /*
diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c
index 8c9a6762061c..d25efd8d6c31 100644
--- a/lib/fonts/fonts.c
+++ b/lib/fonts/fonts.c
@@ -12,18 +12,91 @@
  * for more details.
  */
 
+#include <linux/container_of.h>
+#include <linux/font.h>
 #include <linux/module.h>
-#include <linux/types.h>
+#include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/types.h>
+
 #if defined(__mc68000__)
 #include <asm/setup.h>
 #endif
-#include <linux/font.h>
 
 /*
  * Helpers for font_data_t
  */
 
+static struct font_data *to_font_data_struct(font_data_t *fd)
+{
+	return container_of(fd, struct font_data, data[0]);
+}
+
+static bool font_data_is_internal(font_data_t *fd)
+{
+	return !REFCOUNT(fd); /* internal fonts have no reference counting */
+}
+
+static void font_data_free(font_data_t *fd)
+{
+	kfree(to_font_data_struct(fd));
+}
+
+/**
+ * font_data_get - Acquires a reference on font data
+ * @fd: Font data
+ *
+ * Font data from user space is reference counted. The helper
+ * font_data_get() increases the reference counter by one. Invoke
+ * font_data_put() to release the reference.
+ *
+ * Internal font data is located in read-only memory. In this case
+ * the helper returns success without modifying the counter field.
+ * It is still required to call font_data_put() on internal font data.
+ */
+void font_data_get(font_data_t *fd)
+{
+	if (font_data_is_internal(fd))
+		return; /* never ref static data */
+
+	if (WARN_ON(!REFCOUNT(fd)))
+		return; /* should never be 0 */
+	++REFCOUNT(fd);
+}
+EXPORT_SYMBOL_GPL(font_data_get);
+
+/**
+ * font_data_put - Release a reference on font data
+ * @fd: Font data
+ *
+ * Font data from user space is reference counted. The helper
+ * font_data_put() decreases the reference counter by one. If this was
+ * the final reference, it frees the allocated memory.
+ *
+ * Internal font data is located in read-only memory. In this case
+ * the helper returns success without modifying the counter field.
+ *
+ * Returns:
+ * True if the font data's memory buffer has been freed, false otherwise.
+ */
+bool font_data_put(font_data_t *fd)
+{
+	unsigned int count;
+
+	if (font_data_is_internal(fd))
+		return false; /* never unref static data */
+
+	if (WARN_ON(!REFCOUNT(fd)))
+		return false; /* should never be 0 */
+
+	count = --REFCOUNT(fd);
+	if (!count)
+		font_data_free(fd);
+
+	return !count;
+}
+EXPORT_SYMBOL_GPL(font_data_put);
+
 /**
  * font_data_size - Return size of the font data in bytes
  * @fd: Font data
-- 
cgit v1.2.3


From 1e3c49aa03fbfeea595ca0c9a4ebaf1e9cc078af Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Mar 2026 15:14:52 +0100
Subject: lib/fonts: Compare font data for equality with font_data_is_equal()

Add font_data_is_equal() and update consoles to use it.

Font data is equal if it has the same size and contains the same values
on all bytes. Only fbcon uses a crc32 checksum. If set in both operands
the checksums have to be equal.

The new helper also guarantees to not compare internal fonts against
fonts from user space. Internal fonts cannot be ref-counted, so making
them equal to user-space fonts with the same byte sequence results in
undefined behavior.

The test only compares data buffers. Their interpretation is up each
console. Therefore remove a width test in fbcon_set_font().

v3:
- rebase onto font_data_{get,put}()

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/console/newport_con.c |  4 +---
 drivers/video/fbdev/core/fbcon.c    |  7 +------
 include/linux/font.h                |  1 +
 lib/fonts/fonts.c                   | 26 ++++++++++++++++++++++++++
 4 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c
index dbbb787fc46e..db0228bce00e 100644
--- a/drivers/video/console/newport_con.c
+++ b/drivers/video/console/newport_con.c
@@ -529,9 +529,7 @@ static int newport_set_font(int unit, const struct console_font *op,
 
 	/* check if font is already used by other console */
 	for (i = 0; i < MAX_NR_CONSOLES; i++) {
-		if (font_data[i] != FONT_DATA
-		    && font_data_size(font_data[i]) == size
-		    && !memcmp(font_data[i], new_data, size)) {
+		if (font_data_is_equal(font_data[i], new_data)) {
 			font_data_put(new_data);
 			/* current font is the same as the new one */
 			if (i == unit)
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index ac59480c98cb..00255ac92e42 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2540,13 +2540,8 @@ static int fbcon_set_font(struct vc_data *vc, const struct console_font *font,
 	FNTSUM(new_data) = csum;
 	/* Check if the same font is on some other console already */
 	for (i = first_fb_vc; i <= last_fb_vc; i++) {
-		struct vc_data *tmp = vc_cons[i].d;
-
 		if (fb_display[i].fontdata &&
-		    FNTSUM(fb_display[i].fontdata) == csum &&
-		    font_data_size(fb_display[i].fontdata) == size &&
-		    tmp->vc_font.width == w &&
-		    !memcmp(fb_display[i].fontdata, new_data, size)) {
+		    font_data_is_equal(fb_display[i].fontdata, new_data)) {
 			font_data_get(fb_display[i].fontdata);
 			font_data_put(new_data);
 			new_data = fb_display[i].fontdata;
diff --git a/include/linux/font.h b/include/linux/font.h
index dd319d0f0201..58bf3c64cabb 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -57,6 +57,7 @@ static inline const unsigned char *font_data_buf(font_data_t *fd)
 void font_data_get(font_data_t *fd);
 bool font_data_put(font_data_t *fd);
 unsigned int font_data_size(font_data_t *fd);
+bool font_data_is_equal(font_data_t *lhs, font_data_t *rhs);
 
 /*
  * Font description
diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c
index d25efd8d6c31..3fb76d185647 100644
--- a/lib/fonts/fonts.c
+++ b/lib/fonts/fonts.c
@@ -110,6 +110,32 @@ unsigned int font_data_size(font_data_t *fd)
 }
 EXPORT_SYMBOL_GPL(font_data_size);
 
+/**
+ * font_data_is_equal - Compares font data for equality
+ * @lhs: Left-hand side font data
+ * @rhs: Right-hand-size font data
+ *
+ * Font data is equal if is constain the same sequence of values. The
+ * helper also use the checksum, if both arguments contain it. Font data
+ * coming from different origins, internal or from user space, is never
+ * equal. Allowing this would break reference counting.
+ *
+ * Returns:
+ * True if the given font data is equal, false otherwise.
+ */
+bool font_data_is_equal(font_data_t *lhs, font_data_t *rhs)
+{
+	if (font_data_is_internal(lhs) != font_data_is_internal(rhs))
+		return false;
+	if (font_data_size(lhs) != font_data_size(rhs))
+		return false;
+	if (FNTSUM(lhs) && FNTSUM(rhs) && FNTSUM(lhs) != FNTSUM(rhs))
+		return false;
+
+	return !memcmp(lhs, rhs, FNTSIZE(lhs));
+}
+EXPORT_SYMBOL_GPL(font_data_is_equal);
+
 /*
  * Font lookup
  */
-- 
cgit v1.2.3


From 514d0de7cf403144d3e6c5b9fabb1ce4c15974ca Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Mar 2026 15:14:53 +0100
Subject: lib/fonts: Create font_data_t from struct console_font with
 font_data_import()

Add font_data_import() and update consoles to use it.

The implementation of font_data_import() is based on code from fbcon,
which supports overflow checks and crc32 checksums. Fbcon uses the crc32
checksum.

Newport_con now implements the same overflow checks as fbcon. As before,
this console does not support checksums, which are optional. Newport_con
can now also handle input font data with a vertical pitch other than 32
bytes. (The vertical pitch is the offset between two glyphs in the font
data.)

As an internal change, remove the const qualifier from the data field
if struct font_data. This allows font_data_import() to write the data
without type casting. For all users of the font data via font_data_t,
the stored data is still read only.

v3:
- fix typos

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/console/newport_con.c | 22 +++----------
 drivers/video/fbdev/core/fbcon.c    | 38 +++--------------------
 include/linux/font.h                |  6 +++-
 lib/fonts/fonts.c                   | 62 +++++++++++++++++++++++++++++++++++++
 4 files changed, 75 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c
index db0228bce00e..e88ff3a93b77 100644
--- a/drivers/video/console/newport_con.c
+++ b/drivers/video/console/newport_con.c
@@ -501,31 +501,17 @@ static int newport_set_font(int unit, const struct console_font *op,
 {
 	int w = op->width;
 	int h = op->height;
-	int size = h * op->charcount;
 	int i;
 	font_data_t *new_data;
-	unsigned char *data = op->data, *p;
 
 	/* ladis: when I grow up, there will be a day... and more sizes will
 	 * be supported ;-) */
-	if ((w != 8) || (h != 16) || (vpitch != 32)
-	    || (op->charcount != 256 && op->charcount != 512))
+	if (w != 8 || h != 16 || (op->charcount != 256 && op->charcount != 512))
 		return -EINVAL;
 
-	if (!(new_data = kmalloc(FONT_EXTRA_WORDS * sizeof(int) + size,
-	     GFP_USER))) return -ENOMEM;
-
-	new_data += FONT_EXTRA_WORDS * sizeof(int);
-	FNTSIZE(new_data) = size;
-	REFCOUNT(new_data) = 1;	/* usage counter */
-	FNTSUM(new_data) = 0;
-
-	p = (unsigned char *)font_data_buf(new_data);
-	for (i = 0; i < op->charcount; i++) {
-		memcpy(p, data, h);
-		data += 32;
-		p += h;
-	}
+	new_data = font_data_import(op, vpitch, NULL);
+	if (IS_ERR(new_data))
+		return PTR_ERR(new_data);
 
 	/* check if font is already used by other console */
 	for (i = 0; i < MAX_NR_CONSOLES; i++) {
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 00255ac92e42..53677c09a0ec 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2039,8 +2039,6 @@ static void updatescrollmode(struct fbcon_display *p,
 	updatescrollmode_accel(p, info, vc);
 }
 
-#define PITCH(w) (((w) + 7) >> 3)
-
 static int fbcon_resize(struct vc_data *vc, unsigned int width,
 			unsigned int height, bool from_user)
 {
@@ -2424,7 +2422,6 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
 	resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
 	p->fontdata = data;
 	vc->vc_font.data = font_data_buf(p->fontdata);
-
 	old_width = vc->vc_font.width;
 	old_height = vc->vc_font.height;
 	old_charcount = vc->vc_font.charcount;
@@ -2482,11 +2479,8 @@ static int fbcon_set_font(struct vc_data *vc, const struct console_font *font,
 	unsigned charcount = font->charcount;
 	int w = font->width;
 	int h = font->height;
-	int size, alloc_size;
-	int i, csum, ret;
+	int i, ret;
 	font_data_t *new_data;
-	const u8 *data = font->data;
-	int pitch = PITCH(font->width);
 
 	/* Is there a reason why fbconsole couldn't handle any charcount >256?
 	 * If not this check should be changed to charcount < 256 */
@@ -2510,34 +2504,10 @@ static int fbcon_set_font(struct vc_data *vc, const struct console_font *font,
 	if (fbcon_invalid_charcount(info, charcount))
 		return -EINVAL;
 
-	/* Check for integer overflow in font size calculation */
-	if (check_mul_overflow(h, pitch, &size) ||
-	    check_mul_overflow(size, charcount, &size))
-		return -EINVAL;
-
-	/* Check for overflow in allocation size calculation */
-	if (check_add_overflow(FONT_EXTRA_WORDS * sizeof(int), size, &alloc_size))
-		return -EINVAL;
-
-	new_data = kmalloc(alloc_size, GFP_USER);
-
-	if (!new_data)
-		return -ENOMEM;
-
-	memset((u8 *)new_data, 0, FONT_EXTRA_WORDS * sizeof(int));
-
-	new_data += FONT_EXTRA_WORDS * sizeof(int);
-	FNTSIZE(new_data) = size;
-	REFCOUNT(new_data) = 1;	/* usage counter */
-	for (i=0; i< charcount; i++) {
-		memcpy((u8 *)new_data + i * h * pitch, data + i * vpitch * pitch, h * pitch);
-	}
-
-	/* Since linux has a nice crc32 function use it for counting font
-	 * checksums. */
-	csum = crc32(0, new_data, size);
+	new_data = font_data_import(font, vpitch, crc32);
+	if (IS_ERR(new_data))
+		return PTR_ERR(new_data);
 
-	FNTSUM(new_data) = csum;
 	/* Check if the same font is on some other console already */
 	for (i = first_fb_vc; i <= last_fb_vc; i++) {
 		if (fb_display[i].fontdata &&
diff --git a/include/linux/font.h b/include/linux/font.h
index 58bf3c64cabb..3eb4818402c5 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -13,6 +13,8 @@
 
 #include <linux/types.h>
 
+struct console_font;
+
 /*
  * font_data_t and helpers
  */
@@ -54,6 +56,8 @@ static inline const unsigned char *font_data_buf(font_data_t *fd)
 	return (const unsigned char *)fd;
 }
 
+font_data_t *font_data_import(const struct console_font *font, unsigned int vpitch,
+			      u32 (*calc_csum)(u32, const void *, size_t));
 void font_data_get(font_data_t *fd);
 bool font_data_put(font_data_t *fd);
 unsigned int font_data_size(font_data_t *fd);
@@ -124,7 +128,7 @@ extern const struct font_desc *get_default_font(int xres, int yres,
 
 struct font_data {
 	unsigned int extra[FONT_EXTRA_WORDS];
-	const unsigned char data[];
+	unsigned char data[];
 } __packed;
 
 #endif /* _VIDEO_FONT_H */
diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c
index 3fb76d185647..16e75c3d2a0f 100644
--- a/lib/fonts/fonts.c
+++ b/lib/fonts/fonts.c
@@ -14,7 +14,9 @@
 
 #include <linux/container_of.h>
 #include <linux/font.h>
+#include <linux/kd.h>
 #include <linux/module.h>
+#include <linux/overflow.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/types.h>
@@ -23,6 +25,8 @@
 #include <asm/setup.h>
 #endif
 
+#define console_font_pitch(font) DIV_ROUND_UP((font)->width, 8)
+
 /*
  * Helpers for font_data_t
  */
@@ -42,6 +46,64 @@ static void font_data_free(font_data_t *fd)
 	kfree(to_font_data_struct(fd));
 }
 
+/**
+ * font_data_import - Allocates and initializes font data from user space
+ * @font: A font from user space
+ * @vpitch: The size of a single glyph in @font in bytes
+ * @calc_csum: An optional helper to calculate a chechsum
+ *
+ * Font data from user space must be translated to the kernel's format. The
+ * font's glyph geometry and data is provided in @font. The parameter @vpitch
+ * gives the number of bytes per glyph, including trailing bytes.
+ *
+ * The parameter @calc_csum is optional. Fbcon passes crc32() to calculate the
+ * font data's checksum.
+ *
+ * Returns:
+ * Newly initialized font data on success, or a pointer-encoded errno value otherwise.
+ */
+font_data_t *font_data_import(const struct console_font *font, unsigned int vpitch,
+			      u32 (*calc_csum)(u32, const void *, size_t))
+{
+	unsigned int pitch = console_font_pitch(font);
+	unsigned int h = font->height;
+	unsigned int charcount = font->charcount;
+	const unsigned char *data = font->data;
+	u32 csum = 0;
+	struct font_data *font_data;
+	int size, alloc_size;
+	unsigned int i;
+	font_data_t *fd;
+
+	/* Check for integer overflow in font-size calculation */
+	if (check_mul_overflow(h, pitch, &size) ||
+	    check_mul_overflow(size, charcount, &size))
+		return ERR_PTR(-EINVAL);
+
+	/* Check for overflow in allocation size calculation */
+	if (check_add_overflow(sizeof(*font_data), size, &alloc_size))
+		return ERR_PTR(-EINVAL);
+
+	font_data = kmalloc(alloc_size, GFP_USER);
+	if (!font_data)
+		return ERR_PTR(-ENOMEM);
+	memset(font_data->extra, 0, sizeof(font_data->extra));
+
+	for (i = 0; i < charcount; ++i)
+		memcpy(font_data->data + i * h * pitch, data + i * vpitch * pitch, h * pitch);
+
+	if (calc_csum)
+		csum = calc_csum(0, font_data->data, size);
+
+	fd = font_data->data;
+	REFCOUNT(fd) = 1; /* start with reference acquired */
+	FNTSIZE(fd) = size;
+	FNTSUM(fd) = csum;
+
+	return fd;
+}
+EXPORT_SYMBOL_GPL(font_data_import);
+
 /**
  * font_data_get - Acquires a reference on font data
  * @fd: Font data
-- 
cgit v1.2.3


From c37bd7c8d36f760c064de2639423866dc0270997 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Mar 2026 15:14:54 +0100
Subject: lib/fonts: Store font data for user space with font_data_export()

Add font_data_export() and update consoles to use it.

The helper font_data_export() is based on code in fbcon_get_font().
It extends the size of a single glyph to match the requested vpitch,
which us usually 32 bytes for fonts from user space. Internal fonts
have a pitch according to the glyph's height.

The implementation of font_data_export() differs in several ways from
the original code. The original implementation distinguished between
different pitches of the font data. This is not necessary as the pitch
is a parameter in the copying.

There was also special handling for a font pitch of 3 bytes, which got
expanded to 4 bytes (with trailing bits on each scanline). The logic
originated from long before git history exists even in the historical
tree. So it is not clear why this was implemented. It is not what user
space expects. The setfont utitlity loads font with 3-bytes pitches and
expects to read such fonts with a 3-byte pitch. For any font width, the
font pitch is always the width extended to the next multiple of 8. See
[1] for the user-space font-reading code.

With the changes to handling the font pitches, font_data_export() replaces
the original code's various special cases with a single copying logic.

v3:
- fix typos (Helge)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://github.com/legionus/kbd/blob/v2.9.0/src/libkfont/kdfontop.c#L73 # [1]
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/core/fbcon.c | 57 ++--------------------------------------
 include/linux/font.h             |  1 +
 lib/fonts/fonts.c                | 40 ++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 53677c09a0ec..8641b0b3edc4 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2282,68 +2282,15 @@ static bool fbcon_blank(struct vc_data *vc, enum vesa_blank_mode blank,
 
 static int fbcon_get_font(struct vc_data *vc, struct console_font *font, unsigned int vpitch)
 {
-	struct fbcon_display *p = &fb_display[vc->vc_num];
-	font_data_t *fontdata = p->fontdata;
-	u8 *data = font->data;
-	int i, j;
+	const struct fbcon_display *p = &fb_display[vc->vc_num];
 
 	font->width = vc->vc_font.width;
 	font->height = vc->vc_font.height;
 	if (font->height > vpitch)
 		return -ENOSPC;
 	font->charcount = vc->vc_hi_font_mask ? 512 : 256;
-	if (!font->data)
-		return 0;
-
-	if (font->width <= 8) {
-		j = vc->vc_font.height;
-		if (font->charcount * j > font_data_size(fontdata))
-			return -EINVAL;
 
-		for (i = 0; i < font->charcount; i++) {
-			memcpy(data, fontdata, j);
-			memset(data + j, 0, vpitch - j);
-			data += vpitch;
-			fontdata += j;
-		}
-	} else if (font->width <= 16) {
-		j = vc->vc_font.height * 2;
-		if (font->charcount * j > font_data_size(fontdata))
-			return -EINVAL;
-
-		for (i = 0; i < font->charcount; i++) {
-			memcpy(data, fontdata, j);
-			memset(data + j, 0, 2*vpitch - j);
-			data += 2*vpitch;
-			fontdata += j;
-		}
-	} else if (font->width <= 24) {
-		if (font->charcount * (vc->vc_font.height * sizeof(u32)) > font_data_size(fontdata))
-			return -EINVAL;
-
-		for (i = 0; i < font->charcount; i++) {
-			for (j = 0; j < vc->vc_font.height; j++) {
-				*data++ = fontdata[0];
-				*data++ = fontdata[1];
-				*data++ = fontdata[2];
-				fontdata += sizeof(u32);
-			}
-			memset(data, 0, 3 * (vpitch - j));
-			data += 3 * (vpitch - j);
-		}
-	} else {
-		j = vc->vc_font.height * 4;
-		if (font->charcount * j > font_data_size(fontdata))
-			return -EINVAL;
-
-		for (i = 0; i < font->charcount; i++) {
-			memcpy(data, fontdata, j);
-			memset(data + j, 0, 4 * vpitch - j);
-			data += 4 * vpitch;
-			fontdata += j;
-		}
-	}
-	return 0;
+	return font_data_export(p->fontdata, font, vpitch);
 }
 
 /* set/clear vc_hi_font_mask and update vc attrs accordingly */
diff --git a/include/linux/font.h b/include/linux/font.h
index 3eb4818402c5..d80db66a5c17 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -62,6 +62,7 @@ void font_data_get(font_data_t *fd);
 bool font_data_put(font_data_t *fd);
 unsigned int font_data_size(font_data_t *fd);
 bool font_data_is_equal(font_data_t *lhs, font_data_t *rhs);
+int font_data_export(font_data_t *fd, struct console_font *font, unsigned int vpitch);
 
 /*
  * Font description
diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c
index 16e75c3d2a0f..a861b375e35d 100644
--- a/lib/fonts/fonts.c
+++ b/lib/fonts/fonts.c
@@ -198,6 +198,46 @@ bool font_data_is_equal(font_data_t *lhs, font_data_t *rhs)
 }
 EXPORT_SYMBOL_GPL(font_data_is_equal);
 
+/**
+ * font_data_export - Stores font data for user space
+ * @fd: Font data
+ * @font: A font for user space
+ * @vpitch: The size of a single glyph in @font in bytes
+ *
+ * Store the font data given in @fd to the font in @font. Values and
+ * pointers in @font are pre-initialized. This helper mostly checks some
+ * corner cases and translates glyph sizes according to the value given
+ * @vpitch.
+ *
+ * Returns:
+ * 0 on success, or a negative errno code otherwise.
+ */
+int font_data_export(font_data_t *fd, struct console_font *font, unsigned int vpitch)
+{
+	const unsigned char *font_data = font_data_buf(fd);
+	unsigned char *data = font->data;
+	unsigned int pitch = console_font_pitch(font);
+	unsigned int glyphsize, i;
+
+	if (!font->width || !font->height || !font->charcount || !font->data)
+		return 0;
+
+	glyphsize = font->height * pitch;
+
+	if (font->charcount * glyphsize > font_data_size(fd))
+		return -EINVAL;
+
+	for (i = 0; i < font->charcount; i++) {
+		memcpy(data, font_data, glyphsize);
+		memset(data + glyphsize, 0, pitch * vpitch - glyphsize);
+		data += pitch * vpitch;
+		font_data += glyphsize;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(font_data_export);
+
 /*
  * Font lookup
  */
-- 
cgit v1.2.3


From db65872b38dc9f18a62669d6ae1e4ec7868a85a9 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Mar 2026 15:14:55 +0100
Subject: lib/fonts: Remove internal symbols and macros from public header file

Define access macros for font_data_t in fonts.c. Define struct font_data
and declare most of the font symbols in the internal header font.h, where
they can only be seen by the font code.

Also move font indices into internal font.h. They appear to be unused
though. There is m86k assembly code that operates on the idx field, so
leave them in place for now.

List all built-in fonts in a separate section in the public header file.

v2:
- do not add config guards around font symbols (Helge)
- keep declaration of built-in fonts in public header

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 include/linux/font.h       | 57 ++++++++++++++--------------------------------
 lib/fonts/font.h           | 38 +++++++++++++++++++++++++++++++
 lib/fonts/font_10x18.c     |  2 +-
 lib/fonts/font_6x10.c      |  3 ++-
 lib/fonts/font_6x11.c      |  2 +-
 lib/fonts/font_6x8.c       |  3 ++-
 lib/fonts/font_7x14.c      |  2 +-
 lib/fonts/font_8x16.c      |  3 ++-
 lib/fonts/font_8x8.c       |  2 +-
 lib/fonts/font_acorn_8x8.c |  2 +-
 lib/fonts/font_mini_4x6.c  |  2 +-
 lib/fonts/font_pearl_8x8.c |  2 +-
 lib/fonts/font_sun12x22.c  |  3 ++-
 lib/fonts/font_sun8x16.c   |  3 ++-
 lib/fonts/font_ter10x18.c  |  4 +++-
 lib/fonts/font_ter16x32.c  |  4 +++-
 lib/fonts/fonts.c          |  8 ++++++-
 17 files changed, 85 insertions(+), 55 deletions(-)
 create mode 100644 lib/fonts/font.h

(limited to 'include/linux')

diff --git a/include/linux/font.h b/include/linux/font.h
index d80db66a5c17..5401f07dd6ce 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -77,36 +77,6 @@ struct font_desc {
     int pref;
 };
 
-#define VGA8x8_IDX	0
-#define VGA8x16_IDX	1
-#define PEARL8x8_IDX	2
-#define VGA6x11_IDX	3
-#define FONT7x14_IDX	4
-#define	FONT10x18_IDX	5
-#define SUN8x16_IDX	6
-#define SUN12x22_IDX	7
-#define ACORN8x8_IDX	8
-#define	MINI4x6_IDX	9
-#define FONT6x10_IDX	10
-#define TER16x32_IDX	11
-#define FONT6x8_IDX	12
-#define TER10x18_IDX	13
-
-extern const struct font_desc	font_vga_8x8,
-			font_vga_8x16,
-			font_pearl_8x8,
-			font_vga_6x11,
-			font_7x14,
-			font_10x18,
-			font_sun_8x16,
-			font_sun_12x22,
-			font_acorn_8x8,
-			font_mini_4x6,
-			font_6x10,
-			font_ter_16x32,
-			font_6x8,
-			font_ter_10x18;
-
 /* Find a font with a specific name */
 
 extern const struct font_desc *find_font(const char *name);
@@ -120,16 +90,23 @@ extern const struct font_desc *get_default_font(int xres, int yres,
 /* Max. length for the name of a predefined font */
 #define MAX_FONT_NAME	32
 
-/* Extra word getters */
-#define REFCOUNT(fd)	(((int *)(fd))[-1])
-#define FNTSIZE(fd)	(((int *)(fd))[-2])
-#define FNTSUM(fd)	(((int *)(fd))[-4])
-
-#define FONT_EXTRA_WORDS 4
+/*
+ * Built-in fonts
+ */
 
-struct font_data {
-	unsigned int extra[FONT_EXTRA_WORDS];
-	unsigned char data[];
-} __packed;
+extern const struct font_desc font_10x18;
+extern const struct font_desc font_6x10;
+extern const struct font_desc font_6x8;
+extern const struct font_desc font_7x14;
+extern const struct font_desc font_acorn_8x8;
+extern const struct font_desc font_mini_4x6;
+extern const struct font_desc font_pearl_8x8;
+extern const struct font_desc font_sun_12x22;
+extern const struct font_desc font_sun_8x16;
+extern const struct font_desc font_ter_10x18;
+extern const struct font_desc font_ter_16x32;
+extern const struct font_desc font_vga_6x11;
+extern const struct font_desc font_vga_8x16;
+extern const struct font_desc font_vga_8x8;
 
 #endif /* _VIDEO_FONT_H */
diff --git a/lib/fonts/font.h b/lib/fonts/font.h
new file mode 100644
index 000000000000..4f1adf0b6b54
--- /dev/null
+++ b/lib/fonts/font.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LIB_FONTS_FONT_H
+#define _LIB_FONTS_FONT_H
+
+#include <linux/font.h>
+
+/*
+ * Font data
+ */
+
+#define FONT_EXTRA_WORDS 4
+
+struct font_data {
+	unsigned int extra[FONT_EXTRA_WORDS];
+	unsigned char data[];
+} __packed;
+
+/*
+ * Built-in fonts
+ */
+
+#define VGA8x8_IDX	0
+#define VGA8x16_IDX	1
+#define PEARL8x8_IDX	2
+#define VGA6x11_IDX	3
+#define FONT7x14_IDX	4
+#define	FONT10x18_IDX	5
+#define SUN8x16_IDX	6
+#define SUN12x22_IDX	7
+#define ACORN8x8_IDX	8
+#define	MINI4x6_IDX	9
+#define FONT6x10_IDX	10
+#define TER16x32_IDX	11
+#define FONT6x8_IDX	12
+#define TER10x18_IDX	13
+
+#endif
diff --git a/lib/fonts/font_10x18.c b/lib/fonts/font_10x18.c
index 5d940db626e7..10edebc4bb74 100644
--- a/lib/fonts/font_10x18.c
+++ b/lib/fonts/font_10x18.c
@@ -4,7 +4,7 @@
  * by Jurriaan Kalkman 06-2005  *
  ********************************/
 
-#include <linux/font.h>
+#include "font.h"
 
 #define FONTDATAMAX 9216
 
diff --git a/lib/fonts/font_6x10.c b/lib/fonts/font_6x10.c
index e65df019e0d2..660d3a371b30 100644
--- a/lib/fonts/font_6x10.c
+++ b/lib/fonts/font_6x10.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/font.h>
+
+#include "font.h"
 
 #define FONTDATAMAX 2560
 
diff --git a/lib/fonts/font_6x11.c b/lib/fonts/font_6x11.c
index bd76b3f6b635..671487ccc172 100644
--- a/lib/fonts/font_6x11.c
+++ b/lib/fonts/font_6x11.c
@@ -5,7 +5,7 @@
 /*                                            */
 /**********************************************/
 
-#include <linux/font.h>
+#include "font.h"
 
 #define FONTDATAMAX (11*256)
 
diff --git a/lib/fonts/font_6x8.c b/lib/fonts/font_6x8.c
index 06ace7792521..5811ee07f4d8 100644
--- a/lib/fonts/font_6x8.c
+++ b/lib/fonts/font_6x8.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/font.h>
+
+#include "font.h"
 
 #define FONTDATAMAX 2048
 
diff --git a/lib/fonts/font_7x14.c b/lib/fonts/font_7x14.c
index a2f561c9fa04..0c7475d643c8 100644
--- a/lib/fonts/font_7x14.c
+++ b/lib/fonts/font_7x14.c
@@ -4,7 +4,7 @@
 /* by Jurriaan Kalkman 05-2005        */
 /**************************************/
 
-#include <linux/font.h>
+#include "font.h"
 
 #define FONTDATAMAX 3584
 
diff --git a/lib/fonts/font_8x16.c b/lib/fonts/font_8x16.c
index 06ae14088514..523e95c75569 100644
--- a/lib/fonts/font_8x16.c
+++ b/lib/fonts/font_8x16.c
@@ -5,9 +5,10 @@
 /*                                            */
 /**********************************************/
 
-#include <linux/font.h>
 #include <linux/module.h>
 
+#include "font.h"
+
 #define FONTDATAMAX 4096
 
 static const struct font_data fontdata_8x16 = {
diff --git a/lib/fonts/font_8x8.c b/lib/fonts/font_8x8.c
index 69570b8c31af..e5b697fc9675 100644
--- a/lib/fonts/font_8x8.c
+++ b/lib/fonts/font_8x8.c
@@ -5,7 +5,7 @@
 /*                                            */
 /**********************************************/
 
-#include <linux/font.h>
+#include "font.h"
 
 #define FONTDATAMAX 2048
 
diff --git a/lib/fonts/font_acorn_8x8.c b/lib/fonts/font_acorn_8x8.c
index af5fa72aa8b7..36c51016769d 100644
--- a/lib/fonts/font_acorn_8x8.c
+++ b/lib/fonts/font_acorn_8x8.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Acorn-like font definition, with PC graphics characters */
 
-#include <linux/font.h>
+#include "font.h"
 
 #define FONTDATAMAX 2048
 
diff --git a/lib/fonts/font_mini_4x6.c b/lib/fonts/font_mini_4x6.c
index cc21dc70cfd1..dc919c160dde 100644
--- a/lib/fonts/font_mini_4x6.c
+++ b/lib/fonts/font_mini_4x6.c
@@ -39,7 +39,7 @@ __END__;
    MSBit to LSBit = left to right.
  */
 
-#include <linux/font.h>
+#include "font.h"
 
 #define FONTDATAMAX 1536
 
diff --git a/lib/fonts/font_pearl_8x8.c b/lib/fonts/font_pearl_8x8.c
index ae98ca17982e..2438b374acea 100644
--- a/lib/fonts/font_pearl_8x8.c
+++ b/lib/fonts/font_pearl_8x8.c
@@ -10,7 +10,7 @@
 /*                                            */
 /**********************************************/
 
-#include <linux/font.h>
+#include "font.h"
 
 #define FONTDATAMAX 2048
 
diff --git a/lib/fonts/font_sun12x22.c b/lib/fonts/font_sun12x22.c
index 91daf5ab8b6b..2afbc144bea8 100644
--- a/lib/fonts/font_sun12x22.c
+++ b/lib/fonts/font_sun12x22.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/font.h>
+
+#include "font.h"
 
 #define FONTDATAMAX 11264
 
diff --git a/lib/fonts/font_sun8x16.c b/lib/fonts/font_sun8x16.c
index 81bb4eeae04e..2b7b2d8e548a 100644
--- a/lib/fonts/font_sun8x16.c
+++ b/lib/fonts/font_sun8x16.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/font.h>
+
+#include "font.h"
 
 #define FONTDATAMAX 4096
 
diff --git a/lib/fonts/font_ter10x18.c b/lib/fonts/font_ter10x18.c
index 80356e9d56c7..3f30b4a211ab 100644
--- a/lib/fonts/font_ter10x18.c
+++ b/lib/fonts/font_ter10x18.c
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/font.h>
+
 #include <linux/module.h>
 
+#include "font.h"
+
 #define FONTDATAMAX 9216
 
 static const struct font_data fontdata_ter10x18 = {
diff --git a/lib/fonts/font_ter16x32.c b/lib/fonts/font_ter16x32.c
index 5baedc573dd6..93616cffe642 100644
--- a/lib/fonts/font_ter16x32.c
+++ b/lib/fonts/font_ter16x32.c
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/font.h>
+
 #include <linux/module.h>
 
+#include "font.h"
+
 #define FONTDATAMAX 16384
 
 static const struct font_data fontdata_ter16x32 = {
diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c
index a861b375e35d..5938f542906b 100644
--- a/lib/fonts/fonts.c
+++ b/lib/fonts/fonts.c
@@ -13,7 +13,6 @@
  */
 
 #include <linux/container_of.h>
-#include <linux/font.h>
 #include <linux/kd.h>
 #include <linux/module.h>
 #include <linux/overflow.h>
@@ -25,12 +24,19 @@
 #include <asm/setup.h>
 #endif
 
+#include "font.h"
+
 #define console_font_pitch(font) DIV_ROUND_UP((font)->width, 8)
 
 /*
  * Helpers for font_data_t
  */
 
+/* Extra word getters */
+#define REFCOUNT(fd)	(((int *)(fd))[-1])
+#define FNTSIZE(fd)	(((int *)(fd))[-2])
+#define FNTSUM(fd)	(((int *)(fd))[-4])
+
 static struct font_data *to_font_data_struct(font_data_t *fd)
 {
 	return container_of(fd, struct font_data, data[0]);
-- 
cgit v1.2.3


From 2e67fabd8b77c4f482df9b211bca1b495c6c2c24 Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vdonnefort@google.com>
Date: Mon, 9 Mar 2026 16:24:49 +0000
Subject: ring-buffer: Introduce ring-buffer remotes

Add ring-buffer remotes to support entities outside of the kernel (such
as firmware or a hypervisor) that writes events into a ring-buffer using
the tracefs format

Require a description of the ring-buffer pages (struct
trace_buffer_desc) and callbacks (swap_reader_page and reset) to set up
the ring-buffer on the kernel side.

Expect the remote entity to maintain and update the meta-page.

Link: https://patch.msgid.link/20260309162516.2623589-4-vdonnefort@google.com
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  58 +++++++++++
 kernel/trace/ring_buffer.c  | 233 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 283 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index d862fa610270..994f52b34344 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -251,4 +251,62 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu,
 void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu);
 int ring_buffer_unmap(struct trace_buffer *buffer, int cpu);
 int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu);
+
+struct ring_buffer_desc {
+	int		cpu;
+	unsigned int	nr_page_va; /* excludes the meta page */
+	unsigned long	meta_va;
+	unsigned long	page_va[] __counted_by(nr_page_va);
+};
+
+struct trace_buffer_desc {
+	int		nr_cpus;
+	size_t		struct_len;
+	char		__data[]; /* list of ring_buffer_desc */
+};
+
+static inline struct ring_buffer_desc *__next_ring_buffer_desc(struct ring_buffer_desc *desc)
+{
+	size_t len = struct_size(desc, page_va, desc->nr_page_va);
+
+	return (struct ring_buffer_desc *)((void *)desc + len);
+}
+
+static inline struct ring_buffer_desc *__first_ring_buffer_desc(struct trace_buffer_desc *desc)
+{
+	return (struct ring_buffer_desc *)(&desc->__data[0]);
+}
+
+static inline size_t trace_buffer_desc_size(size_t buffer_size, unsigned int nr_cpus)
+{
+	unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1;
+	struct ring_buffer_desc *rbdesc;
+
+	return size_add(offsetof(struct trace_buffer_desc, __data),
+			size_mul(nr_cpus, struct_size(rbdesc, page_va, nr_pages)));
+}
+
+#define for_each_ring_buffer_desc(__pdesc, __cpu, __trace_pdesc)		\
+	for (__pdesc = __first_ring_buffer_desc(__trace_pdesc), __cpu = 0;	\
+	     (__cpu) < (__trace_pdesc)->nr_cpus;				\
+	     (__cpu)++, __pdesc = __next_ring_buffer_desc(__pdesc))
+
+struct ring_buffer_remote {
+	struct trace_buffer_desc	*desc;
+	int				(*swap_reader_page)(unsigned int cpu, void *priv);
+	int				(*reset)(unsigned int cpu, void *priv);
+	void				*priv;
+};
+
+int ring_buffer_poll_remote(struct trace_buffer *buffer, int cpu);
+
+struct trace_buffer *
+__ring_buffer_alloc_remote(struct ring_buffer_remote *remote,
+			   struct lock_class_key *key);
+
+#define ring_buffer_alloc_remote(remote)			\
+({								\
+	static struct lock_class_key __key;			\
+	__ring_buffer_alloc_remote(remote, &__key);		\
+})
 #endif /* _LINUX_RING_BUFFER_H */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3d2804a7e8ab..88218377fa29 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -559,6 +559,8 @@ struct ring_buffer_per_cpu {
 	struct trace_buffer_meta	*meta_page;
 	struct ring_buffer_cpu_meta	*ring_meta;
 
+	struct ring_buffer_remote	*remote;
+
 	/* ring buffer pages to update, > 0 to add, < 0 to remove */
 	long				nr_pages_to_update;
 	struct list_head		new_pages; /* new pages to add */
@@ -581,6 +583,8 @@ struct trace_buffer {
 
 	struct ring_buffer_per_cpu	**buffers;
 
+	struct ring_buffer_remote	*remote;
+
 	struct hlist_node		node;
 	u64				(*clock)(void);
 
@@ -2238,6 +2242,40 @@ static void rb_meta_buffer_update(struct ring_buffer_per_cpu *cpu_buffer,
 	}
 }
 
+static struct ring_buffer_desc *ring_buffer_desc(struct trace_buffer_desc *trace_desc, int cpu)
+{
+	struct ring_buffer_desc *desc, *end;
+	size_t len;
+	int i;
+
+	if (!trace_desc)
+		return NULL;
+
+	if (cpu >= trace_desc->nr_cpus)
+		return NULL;
+
+	end = (struct ring_buffer_desc *)((void *)trace_desc + trace_desc->struct_len);
+	desc = __first_ring_buffer_desc(trace_desc);
+	len = struct_size(desc, page_va, desc->nr_page_va);
+	desc = (struct ring_buffer_desc *)((void *)desc + (len * cpu));
+
+	if (desc < end && desc->cpu == cpu)
+		return desc;
+
+	/* Missing CPUs, need to linear search */
+	for_each_ring_buffer_desc(desc, i, trace_desc) {
+		if (desc->cpu == cpu)
+			return desc;
+	}
+
+	return NULL;
+}
+
+static void *ring_buffer_desc_page(struct ring_buffer_desc *desc, int page_id)
+{
+	return page_id > desc->nr_page_va ? NULL : (void *)desc->page_va[page_id];
+}
+
 static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 		long nr_pages, struct list_head *pages)
 {
@@ -2245,6 +2283,7 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 	struct ring_buffer_cpu_meta *meta = NULL;
 	struct buffer_page *bpage, *tmp;
 	bool user_thread = current->mm != NULL;
+	struct ring_buffer_desc *desc = NULL;
 	long i;
 
 	/*
@@ -2273,6 +2312,12 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 	if (buffer->range_addr_start)
 		meta = rb_range_meta(buffer, nr_pages, cpu_buffer->cpu);
 
+	if (buffer->remote) {
+		desc = ring_buffer_desc(buffer->remote->desc, cpu_buffer->cpu);
+		if (!desc || WARN_ON(desc->nr_page_va != (nr_pages + 1)))
+			return -EINVAL;
+	}
+
 	for (i = 0; i < nr_pages; i++) {
 
 		bpage = alloc_cpu_page(cpu_buffer->cpu);
@@ -2297,6 +2342,16 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 				rb_meta_buffer_update(cpu_buffer, bpage);
 			bpage->range = 1;
 			bpage->id = i + 1;
+		} else if (desc) {
+			void *p = ring_buffer_desc_page(desc, i + 1);
+
+			if (WARN_ON(!p))
+				goto free_pages;
+
+			bpage->page = p;
+			bpage->range = 1; /* bpage->page can't be freed */
+			bpage->id = i + 1;
+			cpu_buffer->subbuf_ids[i + 1] = bpage;
 		} else {
 			int order = cpu_buffer->buffer->subbuf_order;
 			bpage->page = alloc_cpu_data(cpu_buffer->cpu, order);
@@ -2394,6 +2449,30 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
 		if (cpu_buffer->ring_meta->head_buffer)
 			rb_meta_buffer_update(cpu_buffer, bpage);
 		bpage->range = 1;
+	} else if (buffer->remote) {
+		struct ring_buffer_desc *desc = ring_buffer_desc(buffer->remote->desc, cpu);
+
+		if (!desc)
+			goto fail_free_reader;
+
+		cpu_buffer->remote = buffer->remote;
+		cpu_buffer->meta_page = (struct trace_buffer_meta *)(void *)desc->meta_va;
+		cpu_buffer->nr_pages = nr_pages;
+		cpu_buffer->subbuf_ids = kcalloc(cpu_buffer->nr_pages + 1,
+						 sizeof(*cpu_buffer->subbuf_ids), GFP_KERNEL);
+		if (!cpu_buffer->subbuf_ids)
+			goto fail_free_reader;
+
+		/* Remote buffers are read-only and immutable */
+		atomic_inc(&cpu_buffer->record_disabled);
+		atomic_inc(&cpu_buffer->resize_disabled);
+
+		bpage->page = ring_buffer_desc_page(desc, cpu_buffer->meta_page->reader.id);
+		if (!bpage->page)
+			goto fail_free_reader;
+
+		bpage->range = 1;
+		cpu_buffer->subbuf_ids[0] = bpage;
 	} else {
 		int order = cpu_buffer->buffer->subbuf_order;
 		bpage->page = alloc_cpu_data(cpu, order);
@@ -2453,6 +2532,9 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
 
 	irq_work_sync(&cpu_buffer->irq_work.work);
 
+	if (cpu_buffer->remote)
+		kfree(cpu_buffer->subbuf_ids);
+
 	free_buffer_page(cpu_buffer->reader_page);
 
 	if (head) {
@@ -2475,7 +2557,8 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
 					 int order, unsigned long start,
 					 unsigned long end,
 					 unsigned long scratch_size,
-					 struct lock_class_key *key)
+					 struct lock_class_key *key,
+					 struct ring_buffer_remote *remote)
 {
 	struct trace_buffer *buffer __free(kfree) = NULL;
 	long nr_pages;
@@ -2515,6 +2598,8 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
 	if (!buffer->buffers)
 		goto fail_free_cpumask;
 
+	cpu = raw_smp_processor_id();
+
 	/* If start/end are specified, then that overrides size */
 	if (start && end) {
 		unsigned long buffers_start;
@@ -2570,6 +2655,15 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
 		buffer->range_addr_end = end;
 
 		rb_range_meta_init(buffer, nr_pages, scratch_size);
+	} else if (remote) {
+		struct ring_buffer_desc *desc = ring_buffer_desc(remote->desc, cpu);
+
+		buffer->remote = remote;
+		/* The writer is remote. This ring-buffer is read-only */
+		atomic_inc(&buffer->record_disabled);
+		nr_pages = desc->nr_page_va - 1;
+		if (nr_pages < 2)
+			goto fail_free_buffers;
 	} else {
 
 		/* need at least two pages */
@@ -2578,7 +2672,6 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
 			nr_pages = 2;
 	}
 
-	cpu = raw_smp_processor_id();
 	cpumask_set_cpu(cpu, buffer->cpumask);
 	buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
 	if (!buffer->buffers[cpu])
@@ -2620,7 +2713,7 @@ struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
 					struct lock_class_key *key)
 {
 	/* Default buffer page size - one system page */
-	return alloc_buffer(size, flags, 0, 0, 0, 0, key);
+	return alloc_buffer(size, flags, 0, 0, 0, 0, key, NULL);
 
 }
 EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
@@ -2647,7 +2740,18 @@ struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flag
 					       struct lock_class_key *key)
 {
 	return alloc_buffer(size, flags, order, start, start + range_size,
-			    scratch_size, key);
+			    scratch_size, key, NULL);
+}
+
+/**
+ * __ring_buffer_alloc_remote - allocate a new ring_buffer from a remote
+ * @remote: Contains a description of the ring-buffer pages and remote callbacks.
+ * @key: ring buffer reader_lock_key.
+ */
+struct trace_buffer *__ring_buffer_alloc_remote(struct ring_buffer_remote *remote,
+						struct lock_class_key *key)
+{
+	return alloc_buffer(0, 0, 0, 0, 0, 0, key, remote);
 }
 
 void *ring_buffer_meta_scratch(struct trace_buffer *buffer, unsigned int *size)
@@ -5274,6 +5378,16 @@ unsigned long ring_buffer_overruns(struct trace_buffer *buffer)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_overruns);
 
+static bool rb_read_remote_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	local_set(&cpu_buffer->entries, READ_ONCE(cpu_buffer->meta_page->entries));
+	local_set(&cpu_buffer->overrun, READ_ONCE(cpu_buffer->meta_page->overrun));
+	local_set(&cpu_buffer->pages_touched, READ_ONCE(cpu_buffer->meta_page->pages_touched));
+	local_set(&cpu_buffer->pages_lost, READ_ONCE(cpu_buffer->meta_page->pages_lost));
+
+	return rb_num_of_entries(cpu_buffer);
+}
+
 static void rb_iter_reset(struct ring_buffer_iter *iter)
 {
 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
@@ -5428,7 +5542,43 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
 }
 
 static struct buffer_page *
-rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
+__rb_get_reader_page_from_remote(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct buffer_page *new_reader, *prev_reader;
+
+	if (!rb_read_remote_meta_page(cpu_buffer))
+		return NULL;
+
+	/* More to read on the reader page */
+	if (cpu_buffer->reader_page->read < rb_page_size(cpu_buffer->reader_page)) {
+		if (!cpu_buffer->reader_page->read)
+			cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
+		return cpu_buffer->reader_page;
+	}
+
+	prev_reader = cpu_buffer->subbuf_ids[cpu_buffer->meta_page->reader.id];
+
+	WARN_ON_ONCE(cpu_buffer->remote->swap_reader_page(cpu_buffer->cpu,
+							  cpu_buffer->remote->priv));
+	/* nr_pages doesn't include the reader page */
+	if (WARN_ON_ONCE(cpu_buffer->meta_page->reader.id > cpu_buffer->nr_pages))
+		return NULL;
+
+	new_reader = cpu_buffer->subbuf_ids[cpu_buffer->meta_page->reader.id];
+
+	WARN_ON_ONCE(prev_reader == new_reader);
+
+	cpu_buffer->reader_page->page = new_reader->page;
+	cpu_buffer->reader_page->id = new_reader->id;
+	cpu_buffer->reader_page->read = 0;
+	cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
+	cpu_buffer->lost_events = cpu_buffer->meta_page->reader.lost_events;
+
+	return rb_page_size(cpu_buffer->reader_page) ? cpu_buffer->reader_page : NULL;
+}
+
+static struct buffer_page *
+__rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	struct buffer_page *reader = NULL;
 	unsigned long bsize = READ_ONCE(cpu_buffer->buffer->subbuf_size);
@@ -5598,6 +5748,13 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	return reader;
 }
 
+static struct buffer_page *
+rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return cpu_buffer->remote ? __rb_get_reader_page_from_remote(cpu_buffer) :
+				    __rb_get_reader_page(cpu_buffer);
+}
+
 static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	struct ring_buffer_event *event;
@@ -5998,7 +6155,7 @@ ring_buffer_read_start(struct trace_buffer *buffer, int cpu, gfp_t flags)
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_iter *iter;
 
-	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask) || buffer->remote)
 		return NULL;
 
 	iter = kzalloc_obj(*iter, flags);
@@ -6166,6 +6323,23 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	struct buffer_page *page;
 
+	if (cpu_buffer->remote) {
+		if (!cpu_buffer->remote->reset)
+			return;
+
+		cpu_buffer->remote->reset(cpu_buffer->cpu, cpu_buffer->remote->priv);
+		rb_read_remote_meta_page(cpu_buffer);
+
+		/* Read related values, not covered by the meta-page */
+		local_set(&cpu_buffer->pages_read, 0);
+		cpu_buffer->read = 0;
+		cpu_buffer->read_bytes = 0;
+		cpu_buffer->last_overrun = 0;
+		cpu_buffer->reader_page->read = 0;
+
+		return;
+	}
+
 	rb_head_page_deactivate(cpu_buffer);
 
 	cpu_buffer->head_page
@@ -6396,6 +6570,46 @@ bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
 
+int ring_buffer_poll_remote(struct trace_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (cpu != RING_BUFFER_ALL_CPUS) {
+		if (!cpumask_test_cpu(cpu, buffer->cpumask))
+			return -EINVAL;
+
+		cpu_buffer = buffer->buffers[cpu];
+
+		guard(raw_spinlock)(&cpu_buffer->reader_lock);
+		if (rb_read_remote_meta_page(cpu_buffer))
+			rb_wakeups(buffer, cpu_buffer);
+
+		return 0;
+	}
+
+	guard(cpus_read_lock)();
+
+	/*
+	 * Make sure all the ring buffers are up to date before we start reading
+	 * them.
+	 */
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+
+		guard(raw_spinlock)(&cpu_buffer->reader_lock);
+		rb_read_remote_meta_page(cpu_buffer);
+	}
+
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+
+		if (rb_num_of_entries(cpu_buffer))
+			rb_wakeups(buffer, cpu_buffer);
+	}
+
+	return 0;
+}
+
 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
 /**
  * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
@@ -6634,6 +6848,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
 	unsigned int commit;
 	unsigned int read;
 	u64 save_timestamp;
+	bool force_memcpy;
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return -1;
@@ -6671,6 +6886,8 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
 	/* Check if any events were dropped */
 	missed_events = cpu_buffer->lost_events;
 
+	force_memcpy = cpu_buffer->mapped || cpu_buffer->remote;
+
 	/*
 	 * If this page has been partially read or
 	 * if len is not big enough to read the rest of the page or
@@ -6680,7 +6897,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
 	 */
 	if (read || (len < (commit - read)) ||
 	    cpu_buffer->reader_page == cpu_buffer->commit_page ||
-	    cpu_buffer->mapped) {
+	    force_memcpy) {
 		struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
 		unsigned int rpos = read;
 		unsigned int pos = 0;
@@ -7259,7 +7476,7 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu,
 	unsigned long flags;
 	int err;
 
-	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask) || buffer->remote)
 		return -EINVAL;
 
 	cpu_buffer = buffer->buffers[cpu];
-- 
cgit v1.2.3


From 96e43537af5461b26f50904c6055046ba65d742f Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vdonnefort@google.com>
Date: Mon, 9 Mar 2026 16:24:51 +0000
Subject: tracing: Introduce trace remotes

A trace remote relies on ring-buffer remotes to read and control
compatible tracing buffers, written by entity such as firmware or
hypervisor.

Add a Tracefs directory remotes/ that contains all instances of trace
remotes. Each instance follows the same hierarchy as any other to ease
the support by existing user-space tools.

This currently does not provide any event support, which will come
later.

Link: https://patch.msgid.link/20260309162516.2623589-6-vdonnefort@google.com
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_remote.h |  36 +++
 kernel/trace/Kconfig         |   3 +
 kernel/trace/Makefile        |   1 +
 kernel/trace/trace.c         |   2 +-
 kernel/trace/trace.h         |   6 +
 kernel/trace/trace_remote.c  | 619 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 666 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/trace_remote.h
 create mode 100644 kernel/trace/trace_remote.c

(limited to 'include/linux')

diff --git a/include/linux/trace_remote.h b/include/linux/trace_remote.h
new file mode 100644
index 000000000000..65b7e7b8267c
--- /dev/null
+++ b/include/linux/trace_remote.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_TRACE_REMOTE_H
+#define _LINUX_TRACE_REMOTE_H
+
+#include <linux/ring_buffer.h>
+
+/**
+ * struct trace_remote_callbacks - Callbacks used by Tracefs to control the remote
+ * @load_trace_buffer:  Called before Tracefs accesses the trace buffer for the first
+ *			time. Must return a &trace_buffer_desc
+ *			(most likely filled with trace_remote_alloc_buffer())
+ * @unload_trace_buffer:
+ *			Called once Tracefs has no use for the trace buffer
+ *			(most likely call trace_remote_free_buffer())
+ * @enable_tracing:	Called on Tracefs tracing_on. It is expected from the
+ *			remote to allow writing.
+ * @swap_reader_page:	Called when Tracefs consumes a new page from a
+ *			ring-buffer. It is expected from the remote to isolate a
+ *			new reader-page from the @cpu ring-buffer.
+ */
+struct trace_remote_callbacks {
+	struct trace_buffer_desc *(*load_trace_buffer)(unsigned long size, void *priv);
+	void	(*unload_trace_buffer)(struct trace_buffer_desc *desc, void *priv);
+	int	(*enable_tracing)(bool enable, void *priv);
+	int	(*swap_reader_page)(unsigned int cpu, void *priv);
+};
+
+int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv);
+
+int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size,
+			      const struct cpumask *cpumask);
+
+void trace_remote_free_buffer(struct trace_buffer_desc *desc);
+
+#endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 49de13cae428..384dd36c8e29 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1281,4 +1281,7 @@ config HIST_TRIGGERS_DEBUG
 
 source "kernel/trace/rv/Kconfig"
 
+config TRACE_REMOTE
+	bool
+
 endif # FTRACE
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 04096c21d06b..318923ce39f5 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -128,4 +128,5 @@ obj-$(CONFIG_FPROBE_EVENTS) += trace_fprobe.o
 obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
 obj-$(CONFIG_RV) += rv/
 
+obj-$(CONFIG_TRACE_REMOTE) += trace_remote.o
 libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ebd996f8710e..e33cb3c39f6e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8589,7 +8589,7 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
 	return tr->percpu_dir;
 }
 
-static struct dentry *
+struct dentry *
 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
 		      void *data, long cpu, const struct file_operations *fops)
 {
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b8f3804586a0..d6af4405b341 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -689,6 +689,12 @@ struct dentry *trace_create_file(const char *name,
 				 struct dentry *parent,
 				 void *data,
 				 const struct file_operations *fops);
+struct dentry *trace_create_cpu_file(const char *name,
+				     umode_t mode,
+				     struct dentry *parent,
+				     void *data,
+				     long cpu,
+				     const struct file_operations *fops);
 
 
 /**
diff --git a/kernel/trace/trace_remote.c b/kernel/trace/trace_remote.c
new file mode 100644
index 000000000000..8b06f730376e
--- /dev/null
+++ b/kernel/trace/trace_remote.c
@@ -0,0 +1,619 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 - Google LLC
+ * Author: Vincent Donnefort <vdonnefort@google.com>
+ */
+
+#include <linux/kstrtox.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/tracefs.h>
+#include <linux/trace_remote.h>
+#include <linux/trace_seq.h>
+#include <linux/types.h>
+
+#include "trace.h"
+
+#define TRACEFS_DIR		"remotes"
+#define TRACEFS_MODE_WRITE	0640
+#define TRACEFS_MODE_READ	0440
+
+struct trace_remote_iterator {
+	struct trace_remote		*remote;
+	struct trace_seq		seq;
+	struct delayed_work		poll_work;
+	unsigned long			lost_events;
+	u64				ts;
+	int				cpu;
+	int				evt_cpu;
+};
+
+struct trace_remote {
+	struct trace_remote_callbacks	*cbs;
+	void				*priv;
+	struct trace_buffer		*trace_buffer;
+	struct trace_buffer_desc	*trace_buffer_desc;
+	unsigned long			trace_buffer_size;
+	struct ring_buffer_remote	rb_remote;
+	struct mutex			lock;
+	unsigned int			nr_readers;
+	unsigned int			poll_ms;
+	bool				tracing_on;
+};
+
+static bool trace_remote_loaded(struct trace_remote *remote)
+{
+	return !!remote->trace_buffer;
+}
+
+static int trace_remote_load(struct trace_remote *remote)
+{
+	struct ring_buffer_remote *rb_remote = &remote->rb_remote;
+	struct trace_buffer_desc *desc;
+
+	lockdep_assert_held(&remote->lock);
+
+	if (trace_remote_loaded(remote))
+		return 0;
+
+	desc = remote->cbs->load_trace_buffer(remote->trace_buffer_size, remote->priv);
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+
+	rb_remote->desc = desc;
+	rb_remote->swap_reader_page = remote->cbs->swap_reader_page;
+	rb_remote->priv = remote->priv;
+	remote->trace_buffer = ring_buffer_alloc_remote(rb_remote);
+	if (!remote->trace_buffer) {
+		remote->cbs->unload_trace_buffer(desc, remote->priv);
+		return -ENOMEM;
+	}
+
+	remote->trace_buffer_desc = desc;
+
+	return 0;
+}
+
+static void trace_remote_try_unload(struct trace_remote *remote)
+{
+	lockdep_assert_held(&remote->lock);
+
+	if (!trace_remote_loaded(remote))
+		return;
+
+	/* The buffer is being read or writable */
+	if (remote->nr_readers || remote->tracing_on)
+		return;
+
+	/* The buffer has readable data */
+	if (!ring_buffer_empty(remote->trace_buffer))
+		return;
+
+	ring_buffer_free(remote->trace_buffer);
+	remote->trace_buffer = NULL;
+	remote->cbs->unload_trace_buffer(remote->trace_buffer_desc, remote->priv);
+}
+
+static int trace_remote_enable_tracing(struct trace_remote *remote)
+{
+	int ret;
+
+	lockdep_assert_held(&remote->lock);
+
+	if (remote->tracing_on)
+		return 0;
+
+	ret = trace_remote_load(remote);
+	if (ret)
+		return ret;
+
+	ret = remote->cbs->enable_tracing(true, remote->priv);
+	if (ret) {
+		trace_remote_try_unload(remote);
+		return ret;
+	}
+
+	remote->tracing_on = true;
+
+	return 0;
+}
+
+static int trace_remote_disable_tracing(struct trace_remote *remote)
+{
+	int ret;
+
+	lockdep_assert_held(&remote->lock);
+
+	if (!remote->tracing_on)
+		return 0;
+
+	ret = remote->cbs->enable_tracing(false, remote->priv);
+	if (ret)
+		return ret;
+
+	ring_buffer_poll_remote(remote->trace_buffer, RING_BUFFER_ALL_CPUS);
+	remote->tracing_on = false;
+	trace_remote_try_unload(remote);
+
+	return 0;
+}
+
+static ssize_t
+tracing_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	struct trace_remote *remote = filp->private_data;
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+	if (ret)
+		return ret;
+
+	guard(mutex)(&remote->lock);
+
+	ret = val ? trace_remote_enable_tracing(remote) : trace_remote_disable_tracing(remote);
+	if (ret)
+		return ret;
+
+	return cnt;
+}
+static int tracing_on_show(struct seq_file *s, void *unused)
+{
+	struct trace_remote *remote = s->private;
+
+	seq_printf(s, "%d\n", remote->tracing_on);
+
+	return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(tracing_on);
+
+static ssize_t buffer_size_kb_write(struct file *filp, const char __user *ubuf, size_t cnt,
+				    loff_t *ppos)
+{
+	struct trace_remote *remote = filp->private_data;
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+	if (ret)
+		return ret;
+
+	/* KiB to Bytes */
+	if (!val || check_shl_overflow(val, 10, &val))
+		return -EINVAL;
+
+	guard(mutex)(&remote->lock);
+
+	if (trace_remote_loaded(remote))
+		return -EBUSY;
+
+	remote->trace_buffer_size = val;
+
+	return cnt;
+}
+
+static int buffer_size_kb_show(struct seq_file *s, void *unused)
+{
+	struct trace_remote *remote = s->private;
+
+	seq_printf(s, "%lu (%s)\n", remote->trace_buffer_size >> 10,
+		   trace_remote_loaded(remote) ? "loaded" : "unloaded");
+
+	return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(buffer_size_kb);
+
+static int trace_remote_get(struct trace_remote *remote, int cpu)
+{
+	int ret;
+
+	if (remote->nr_readers == UINT_MAX)
+		return -EBUSY;
+
+	ret = trace_remote_load(remote);
+	if (ret)
+		return ret;
+
+	remote->nr_readers++;
+
+	return 0;
+}
+
+static void trace_remote_put(struct trace_remote *remote)
+{
+	if (WARN_ON(!remote->nr_readers))
+		return;
+
+	remote->nr_readers--;
+	if (remote->nr_readers)
+		return;
+
+	trace_remote_try_unload(remote);
+}
+
+static void __poll_remote(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct trace_remote_iterator *iter;
+
+	iter = container_of(dwork, struct trace_remote_iterator, poll_work);
+	ring_buffer_poll_remote(iter->remote->trace_buffer, iter->cpu);
+	schedule_delayed_work((struct delayed_work *)work,
+			      msecs_to_jiffies(iter->remote->poll_ms));
+}
+
+static struct trace_remote_iterator *trace_remote_iter(struct trace_remote *remote, int cpu)
+{
+	struct trace_remote_iterator *iter = NULL;
+	int ret;
+
+	lockdep_assert_held(&remote->lock);
+
+
+	ret = trace_remote_get(remote, cpu);
+	if (ret)
+		return ERR_PTR(ret);
+
+	/* Test the CPU */
+	ret = ring_buffer_poll_remote(remote->trace_buffer, cpu);
+	if (ret)
+		goto err;
+
+	iter = kzalloc_obj(*iter);
+	if (iter) {
+		iter->remote = remote;
+		iter->cpu = cpu;
+		trace_seq_init(&iter->seq);
+		INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
+		schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms));
+
+		return iter;
+	}
+	ret = -ENOMEM;
+
+err:
+	kfree(iter);
+	trace_remote_put(remote);
+
+	return ERR_PTR(ret);
+}
+
+static void trace_remote_iter_free(struct trace_remote_iterator *iter)
+{
+	struct trace_remote *remote;
+
+	if (!iter)
+		return;
+
+	remote = iter->remote;
+
+	lockdep_assert_held(&remote->lock);
+
+	kfree(iter);
+	trace_remote_put(remote);
+}
+
+static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
+{
+	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
+	int cpu = iter->cpu;
+
+	if (cpu != RING_BUFFER_ALL_CPUS) {
+		if (ring_buffer_empty_cpu(trace_buffer, cpu))
+			return false;
+
+		if (!ring_buffer_peek(trace_buffer, cpu, &iter->ts, &iter->lost_events))
+			return false;
+
+		iter->evt_cpu = cpu;
+		return true;
+	}
+
+	iter->ts = U64_MAX;
+	for_each_possible_cpu(cpu) {
+		unsigned long lost_events;
+		u64 ts;
+
+		if (ring_buffer_empty_cpu(trace_buffer, cpu))
+			continue;
+
+		if (!ring_buffer_peek(trace_buffer, cpu, &ts, &lost_events))
+			continue;
+
+		if (ts >= iter->ts)
+			continue;
+
+		iter->ts = ts;
+		iter->evt_cpu = cpu;
+		iter->lost_events = lost_events;
+	}
+
+	return iter->ts != U64_MAX;
+}
+
+static int trace_remote_iter_print_event(struct trace_remote_iterator *iter)
+{
+	unsigned long usecs_rem;
+	u64 ts = iter->ts;
+
+	if (iter->lost_events)
+		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
+				 iter->evt_cpu, iter->lost_events);
+
+	do_div(ts, 1000);
+	usecs_rem = do_div(ts, USEC_PER_SEC);
+
+	trace_seq_printf(&iter->seq, "[%03d]\t%5llu.%06lu: ", iter->evt_cpu,
+			 ts, usecs_rem);
+
+	return trace_seq_has_overflowed(&iter->seq) ? -EOVERFLOW : 0;
+}
+
+static int trace_pipe_open(struct inode *inode, struct file *filp)
+{
+	struct trace_remote *remote = inode->i_private;
+	struct trace_remote_iterator *iter;
+	int cpu = RING_BUFFER_ALL_CPUS;
+
+	if (inode->i_cdev)
+		cpu = (long)inode->i_cdev - 1;
+
+	guard(mutex)(&remote->lock);
+	iter = trace_remote_iter(remote, cpu);
+	filp->private_data = iter;
+
+	return IS_ERR(iter) ? PTR_ERR(iter) : 0;
+}
+
+static int trace_pipe_release(struct inode *inode, struct file *filp)
+{
+	struct trace_remote_iterator *iter = filp->private_data;
+	struct trace_remote *remote = iter->remote;
+
+	guard(mutex)(&remote->lock);
+
+	trace_remote_iter_free(iter);
+
+	return 0;
+}
+
+static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	struct trace_remote_iterator *iter = filp->private_data;
+	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
+	int ret;
+
+copy_to_user:
+	ret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+	if (ret != -EBUSY)
+		return ret;
+
+	trace_seq_init(&iter->seq);
+
+	ret = ring_buffer_wait(trace_buffer, iter->cpu, 0, NULL, NULL);
+	if (ret < 0)
+		return ret;
+
+	while (trace_remote_iter_read_event(iter)) {
+		int prev_len = iter->seq.seq.len;
+
+		if (trace_remote_iter_print_event(iter)) {
+			iter->seq.seq.len = prev_len;
+			break;
+		}
+
+		ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
+	}
+
+	goto copy_to_user;
+}
+
+static const struct file_operations trace_pipe_fops = {
+	.open		= trace_pipe_open,
+	.read		= trace_pipe_read,
+	.release	= trace_pipe_release,
+};
+
+static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote)
+{
+	struct dentry *remote_d, *percpu_d, *d;
+	static struct dentry *root;
+	static DEFINE_MUTEX(lock);
+	bool root_inited = false;
+	int cpu;
+
+	guard(mutex)(&lock);
+
+	if (!root) {
+		root = tracefs_create_dir(TRACEFS_DIR, NULL);
+		if (!root) {
+			pr_err("Failed to create tracefs dir "TRACEFS_DIR"\n");
+			return -ENOMEM;
+		}
+		root_inited = true;
+	}
+
+	remote_d = tracefs_create_dir(name, root);
+	if (!remote_d) {
+		pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/\n", name);
+		goto err;
+	}
+
+	d = trace_create_file("tracing_on", TRACEFS_MODE_WRITE, remote_d, remote, &tracing_on_fops);
+	if (!d)
+		goto err;
+
+	d = trace_create_file("buffer_size_kb", TRACEFS_MODE_WRITE, remote_d, remote,
+			      &buffer_size_kb_fops);
+	if (!d)
+		goto err;
+
+	d = trace_create_file("trace_pipe", TRACEFS_MODE_READ, remote_d, remote, &trace_pipe_fops);
+	if (!d)
+		goto err;
+
+	percpu_d = tracefs_create_dir("per_cpu", remote_d);
+	if (!percpu_d) {
+		pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/per_cpu/\n", name);
+		goto err;
+	}
+
+	for_each_possible_cpu(cpu) {
+		struct dentry *cpu_d;
+		char cpu_name[16];
+
+		snprintf(cpu_name, sizeof(cpu_name), "cpu%d", cpu);
+		cpu_d = tracefs_create_dir(cpu_name, percpu_d);
+		if (!cpu_d) {
+			pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/percpu/cpu%d\n",
+			       name, cpu);
+			goto err;
+		}
+
+		d = trace_create_cpu_file("trace_pipe", TRACEFS_MODE_READ, cpu_d, remote, cpu,
+					  &trace_pipe_fops);
+		if (!d)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	if (root_inited) {
+		tracefs_remove(root);
+		root = NULL;
+	} else {
+		tracefs_remove(remote_d);
+	}
+
+	return -ENOMEM;
+}
+
+/**
+ * trace_remote_register() - Register a Tracefs remote
+ * @name:	Name of the remote, used for the Tracefs remotes/ directory.
+ * @cbs:	Set of callbacks used to control the remote.
+ * @priv:	Private data, passed to each callback from @cbs.
+ * @events:	Array of events. &remote_event.name and &remote_event.id must be
+ *		filled by the caller.
+ * @nr_events:	Number of events in the @events array.
+ *
+ * A trace remote is an entity, outside of the kernel (most likely firmware or
+ * hypervisor) capable of writing events into a Tracefs compatible ring-buffer.
+ * The kernel would then act as a reader.
+ *
+ * The registered remote will be found under the Tracefs directory
+ * remotes/<name>.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv)
+{
+	struct trace_remote *remote;
+
+	remote = kzalloc_obj(*remote);
+	if (!remote)
+		return -ENOMEM;
+
+	remote->cbs = cbs;
+	remote->priv = priv;
+	remote->trace_buffer_size = 7 << 10;
+	remote->poll_ms = 100;
+	mutex_init(&remote->lock);
+
+	if (trace_remote_init_tracefs(name, remote)) {
+		kfree(remote);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(trace_remote_register);
+
+/**
+ * trace_remote_free_buffer() - Free trace buffer allocated with trace_remote_alloc_buffer()
+ * @desc:	Descriptor of the per-CPU ring-buffers, originally filled by
+ *		trace_remote_alloc_buffer()
+ *
+ * Most likely called from &trace_remote_callbacks.unload_trace_buffer.
+ */
+void trace_remote_free_buffer(struct trace_buffer_desc *desc)
+{
+	struct ring_buffer_desc *rb_desc;
+	int cpu;
+
+	for_each_ring_buffer_desc(rb_desc, cpu, desc) {
+		unsigned int id;
+
+		free_page(rb_desc->meta_va);
+
+		for (id = 0; id < rb_desc->nr_page_va; id++)
+			free_page(rb_desc->page_va[id]);
+	}
+}
+EXPORT_SYMBOL_GPL(trace_remote_free_buffer);
+
+/**
+ * trace_remote_alloc_buffer() - Dynamically allocate a trace buffer
+ * @desc:		Uninitialized trace_buffer_desc
+ * @desc_size:		Size of the trace_buffer_desc. Must be at least equal to
+ *			trace_buffer_desc_size()
+ * @buffer_size:	Size in bytes of each per-CPU ring-buffer
+ * @cpumask:		CPUs to allocate a ring-buffer for
+ *
+ * Helper to dynamically allocate a set of pages (enough to cover @buffer_size)
+ * for each CPU from @cpumask and fill @desc. Most likely called from
+ * &trace_remote_callbacks.load_trace_buffer.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size,
+			      const struct cpumask *cpumask)
+{
+	unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1;
+	void *desc_end = desc + desc_size;
+	struct ring_buffer_desc *rb_desc;
+	int cpu, ret = -ENOMEM;
+
+	if (desc_size < struct_size(desc, __data, 0))
+		return -EINVAL;
+
+	desc->nr_cpus = 0;
+	desc->struct_len = struct_size(desc, __data, 0);
+
+	rb_desc = (struct ring_buffer_desc *)&desc->__data[0];
+
+	for_each_cpu(cpu, cpumask) {
+		unsigned int id;
+
+		if ((void *)rb_desc + struct_size(rb_desc, page_va, nr_pages) > desc_end) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		rb_desc->cpu = cpu;
+		rb_desc->nr_page_va = 0;
+		rb_desc->meta_va = (unsigned long)__get_free_page(GFP_KERNEL);
+		if (!rb_desc->meta_va)
+			goto err;
+
+		for (id = 0; id < nr_pages; id++) {
+			rb_desc->page_va[id] = (unsigned long)__get_free_page(GFP_KERNEL);
+			if (!rb_desc->page_va[id])
+				goto err;
+
+			rb_desc->nr_page_va++;
+		}
+		desc->nr_cpus++;
+		desc->struct_len += offsetof(struct ring_buffer_desc, page_va);
+		desc->struct_len += struct_size(rb_desc, page_va, rb_desc->nr_page_va);
+		rb_desc = __next_ring_buffer_desc(rb_desc);
+	}
+
+	return 0;
+
+err:
+	trace_remote_free_buffer(desc);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(trace_remote_alloc_buffer);
-- 
cgit v1.2.3


From 9af4ab0e11e336e2671d303ffcc6578e3546d9fc Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vdonnefort@google.com>
Date: Mon, 9 Mar 2026 16:24:52 +0000
Subject: tracing: Add reset to trace remotes

Allow to reset the trace remote buffer by writing to the Tracefs "trace"
file. This is similar to the regular Tracefs interface.

Link: https://patch.msgid.link/20260309162516.2623589-7-vdonnefort@google.com
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_remote.h |  3 +++
 kernel/trace/trace_remote.c  | 45 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/trace_remote.h b/include/linux/trace_remote.h
index 65b7e7b8267c..10ca03dc192b 100644
--- a/include/linux/trace_remote.h
+++ b/include/linux/trace_remote.h
@@ -17,6 +17,8 @@
  *			remote to allow writing.
  * @swap_reader_page:	Called when Tracefs consumes a new page from a
  *			ring-buffer. It is expected from the remote to isolate a
+ * @reset:		Called on `echo 0 > trace`. It is expected from the
+ *			remote to reset all ring-buffer pages.
  *			new reader-page from the @cpu ring-buffer.
  */
 struct trace_remote_callbacks {
@@ -24,6 +26,7 @@ struct trace_remote_callbacks {
 	void	(*unload_trace_buffer)(struct trace_buffer_desc *desc, void *priv);
 	int	(*enable_tracing)(bool enable, void *priv);
 	int	(*swap_reader_page)(unsigned int cpu, void *priv);
+	int	(*reset)(unsigned int cpu, void *priv);
 };
 
 int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv);
diff --git a/kernel/trace/trace_remote.c b/kernel/trace/trace_remote.c
index 8b06f730376e..a7b94736dd38 100644
--- a/kernel/trace/trace_remote.c
+++ b/kernel/trace/trace_remote.c
@@ -63,6 +63,7 @@ static int trace_remote_load(struct trace_remote *remote)
 	rb_remote->desc = desc;
 	rb_remote->swap_reader_page = remote->cbs->swap_reader_page;
 	rb_remote->priv = remote->priv;
+	rb_remote->reset = remote->cbs->reset;
 	remote->trace_buffer = ring_buffer_alloc_remote(rb_remote);
 	if (!remote->trace_buffer) {
 		remote->cbs->unload_trace_buffer(desc, remote->priv);
@@ -138,6 +139,21 @@ static int trace_remote_disable_tracing(struct trace_remote *remote)
 	return 0;
 }
 
+static void trace_remote_reset(struct trace_remote *remote, int cpu)
+{
+	lockdep_assert_held(&remote->lock);
+
+	if (!trace_remote_loaded(remote))
+		return;
+
+	if (cpu == RING_BUFFER_ALL_CPUS)
+		ring_buffer_reset(remote->trace_buffer);
+	else
+		ring_buffer_reset_cpu(remote->trace_buffer, cpu);
+
+	trace_remote_try_unload(remote);
+}
+
 static ssize_t
 tracing_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
 {
@@ -414,6 +430,26 @@ static const struct file_operations trace_pipe_fops = {
 	.release	= trace_pipe_release,
 };
 
+static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	struct inode *inode = file_inode(filp);
+	struct trace_remote *remote = inode->i_private;
+	int cpu = RING_BUFFER_ALL_CPUS;
+
+	if (inode->i_cdev)
+		cpu = (long)inode->i_cdev - 1;
+
+	guard(mutex)(&remote->lock);
+
+	trace_remote_reset(remote, cpu);
+
+	return cnt;
+}
+
+static const struct file_operations trace_fops = {
+	.write		= trace_write,
+};
+
 static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote)
 {
 	struct dentry *remote_d, *percpu_d, *d;
@@ -452,6 +488,10 @@ static int trace_remote_init_tracefs(const char *name, struct trace_remote *remo
 	if (!d)
 		goto err;
 
+	d = trace_create_file("trace", TRACEFS_MODE_WRITE, remote_d, remote, &trace_fops);
+	if (!d)
+		goto err;
+
 	percpu_d = tracefs_create_dir("per_cpu", remote_d);
 	if (!percpu_d) {
 		pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/per_cpu/\n", name);
@@ -474,6 +514,11 @@ static int trace_remote_init_tracefs(const char *name, struct trace_remote *remo
 					  &trace_pipe_fops);
 		if (!d)
 			goto err;
+
+		d = trace_create_cpu_file("trace", TRACEFS_MODE_WRITE, cpu_d, remote, cpu,
+					  &trace_fops);
+		if (!d)
+			goto err;
 	}
 
 	return 0;
-- 
cgit v1.2.3


From bf2ba0f8ca1af14aaaa765cbb93caf564d383aad Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vdonnefort@google.com>
Date: Mon, 9 Mar 2026 16:24:54 +0000
Subject: tracing: Add init callback to trace remotes

Add a .init call back so the trace remote callers can add entries to the
tracefs directory.

Link: https://patch.msgid.link/20260309162516.2623589-9-vdonnefort@google.com
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_remote.h | 4 ++++
 kernel/trace/trace_remote.c  | 7 ++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_remote.h b/include/linux/trace_remote.h
index 10ca03dc192b..090c58b7d92b 100644
--- a/include/linux/trace_remote.h
+++ b/include/linux/trace_remote.h
@@ -3,10 +3,13 @@
 #ifndef _LINUX_TRACE_REMOTE_H
 #define _LINUX_TRACE_REMOTE_H
 
+#include <linux/dcache.h>
 #include <linux/ring_buffer.h>
 
 /**
  * struct trace_remote_callbacks - Callbacks used by Tracefs to control the remote
+ * @init:		Called once the remote has been registered. Allows the
+ *			caller to extend the Tracefs remote directory
  * @load_trace_buffer:  Called before Tracefs accesses the trace buffer for the first
  *			time. Must return a &trace_buffer_desc
  *			(most likely filled with trace_remote_alloc_buffer())
@@ -22,6 +25,7 @@
  *			new reader-page from the @cpu ring-buffer.
  */
 struct trace_remote_callbacks {
+	int	(*init)(struct dentry *d, void *priv);
 	struct trace_buffer_desc *(*load_trace_buffer)(unsigned long size, void *priv);
 	void	(*unload_trace_buffer)(struct trace_buffer_desc *desc, void *priv);
 	int	(*enable_tracing)(bool enable, void *priv);
diff --git a/kernel/trace/trace_remote.c b/kernel/trace/trace_remote.c
index 039ba71c3b3e..294d051dcef1 100644
--- a/kernel/trace/trace_remote.c
+++ b/kernel/trace/trace_remote.c
@@ -863,6 +863,7 @@ err:
 int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv)
 {
 	struct trace_remote *remote;
+	int ret;
 
 	remote = kzalloc_obj(*remote);
 	if (!remote)
@@ -880,7 +881,11 @@ int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs,
 		return -ENOMEM;
 	}
 
-	return 0;
+	ret = cbs->init ? cbs->init(remote->dentry, priv) : 0;
+	if (ret)
+		pr_err("Init failed for trace remote '%s' (%d)\n", name, ret);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(trace_remote_register);
 
-- 
cgit v1.2.3


From 072529158e604cc964feb78dcf094c6975828146 Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vdonnefort@google.com>
Date: Mon, 9 Mar 2026 16:24:55 +0000
Subject: tracing: Add events to trace remotes

An event is predefined point in the writer code that allows to log
data. Following the same scheme as kernel events, add remote events,
described to user-space within the events/ tracefs directory found in
the corresponding trace remote.

Remote events are expected to be described during the trace remote
registration.

Add also a .enable_event callback for trace_remote to toggle the event
logging, if supported.

Link: https://patch.msgid.link/20260309162516.2623589-10-vdonnefort@google.com
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_remote.h       |   7 +-
 include/linux/trace_remote_event.h |  23 ++++
 kernel/trace/trace_remote.c        | 264 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 288 insertions(+), 6 deletions(-)
 create mode 100644 include/linux/trace_remote_event.h

(limited to 'include/linux')

diff --git a/include/linux/trace_remote.h b/include/linux/trace_remote.h
index 090c58b7d92b..fcd1d46ea466 100644
--- a/include/linux/trace_remote.h
+++ b/include/linux/trace_remote.h
@@ -5,6 +5,7 @@
 
 #include <linux/dcache.h>
 #include <linux/ring_buffer.h>
+#include <linux/trace_remote_event.h>
 
 /**
  * struct trace_remote_callbacks - Callbacks used by Tracefs to control the remote
@@ -23,6 +24,8 @@
  * @reset:		Called on `echo 0 > trace`. It is expected from the
  *			remote to reset all ring-buffer pages.
  *			new reader-page from the @cpu ring-buffer.
+ * @enable_event:	Called on events/event_name/enable. It is expected from
+ *			the remote to allow the writing event @id.
  */
 struct trace_remote_callbacks {
 	int	(*init)(struct dentry *d, void *priv);
@@ -31,9 +34,11 @@ struct trace_remote_callbacks {
 	int	(*enable_tracing)(bool enable, void *priv);
 	int	(*swap_reader_page)(unsigned int cpu, void *priv);
 	int	(*reset)(unsigned int cpu, void *priv);
+	int	(*enable_event)(unsigned short id, bool enable, void *priv);
 };
 
-int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv);
+int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv,
+			  struct remote_event *events, size_t nr_events);
 
 int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size,
 			      const struct cpumask *cpumask);
diff --git a/include/linux/trace_remote_event.h b/include/linux/trace_remote_event.h
new file mode 100644
index 000000000000..a4449008a075
--- /dev/null
+++ b/include/linux/trace_remote_event.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_TRACE_REMOTE_EVENTS_H
+#define _LINUX_TRACE_REMOTE_EVENTS_H
+
+struct trace_remote;
+struct trace_event_fields;
+
+struct remote_event_hdr {
+	unsigned short	id;
+};
+
+#define REMOTE_EVENT_NAME_MAX 30
+struct remote_event {
+	char				name[REMOTE_EVENT_NAME_MAX];
+	unsigned short			id;
+	bool				enabled;
+	struct trace_remote		*remote;
+	struct trace_event_fields	*fields;
+	char				*print_fmt;
+	void				(*print)(void *evt, struct trace_seq *seq);
+};
+#endif
diff --git a/kernel/trace/trace_remote.c b/kernel/trace/trace_remote.c
index 294d051dcef1..0d0af53c0ce9 100644
--- a/kernel/trace/trace_remote.c
+++ b/kernel/trace/trace_remote.c
@@ -31,6 +31,7 @@ struct trace_remote_iterator {
 	u64				ts;
 	struct ring_buffer_iter		*rb_iter;
 	struct ring_buffer_iter		**rb_iters;
+	struct remote_event_hdr		*evt;
 	int				cpu;
 	int				evt_cpu;
 	loff_t				pos;
@@ -42,6 +43,10 @@ struct trace_remote {
 	void				*priv;
 	struct trace_buffer		*trace_buffer;
 	struct trace_buffer_desc	*trace_buffer_desc;
+	struct dentry			*dentry;
+	struct eventfs_inode		*eventfs;
+	struct remote_event		*events;
+	unsigned long			nr_events;
 	unsigned long			trace_buffer_size;
 	struct ring_buffer_remote	rb_remote;
 	struct mutex			lock;
@@ -168,7 +173,8 @@ static void trace_remote_reset(struct trace_remote *remote, int cpu)
 static ssize_t
 tracing_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
 {
-	struct trace_remote *remote = filp->private_data;
+	struct seq_file *seq = filp->private_data;
+	struct trace_remote *remote = seq->private;
 	unsigned long val;
 	int ret;
 
@@ -197,7 +203,8 @@ DEFINE_SHOW_STORE_ATTRIBUTE(tracing_on);
 static ssize_t buffer_size_kb_write(struct file *filp, const char __user *ubuf, size_t cnt,
 				    loff_t *ppos)
 {
-	struct trace_remote *remote = filp->private_data;
+	struct seq_file *seq = filp->private_data;
+	struct trace_remote *remote = seq->private;
 	unsigned long val;
 	int ret;
 
@@ -484,16 +491,19 @@ __peek_event(struct trace_remote_iterator *iter, int cpu, u64 *ts, unsigned long
 static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
 {
 	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
+	struct ring_buffer_event *rb_evt;
 	int cpu = iter->cpu;
 
 	if (cpu != RING_BUFFER_ALL_CPUS) {
 		if (ring_buffer_empty_cpu(trace_buffer, cpu))
 			return false;
 
-		if (!__peek_event(iter, cpu, &iter->ts, &iter->lost_events))
+		rb_evt = __peek_event(iter, cpu, &iter->ts, &iter->lost_events);
+		if (!rb_evt)
 			return false;
 
 		iter->evt_cpu = cpu;
+		iter->evt = ring_buffer_event_data(rb_evt);
 		return true;
 	}
 
@@ -505,7 +515,8 @@ static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
 		if (ring_buffer_empty_cpu(trace_buffer, cpu))
 			continue;
 
-		if (!__peek_event(iter, cpu, &ts, &lost_events))
+		rb_evt = __peek_event(iter, cpu, &ts, &lost_events);
+		if (!rb_evt)
 			continue;
 
 		if (ts >= iter->ts)
@@ -513,6 +524,7 @@ static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
 
 		iter->ts = ts;
 		iter->evt_cpu = cpu;
+		iter->evt = ring_buffer_event_data(rb_evt);
 		iter->lost_events = lost_events;
 	}
 
@@ -533,8 +545,11 @@ static void trace_remote_iter_move(struct trace_remote_iterator *iter)
 	}
 }
 
+static struct remote_event *trace_remote_find_event(struct trace_remote *remote, unsigned short id);
+
 static int trace_remote_iter_print_event(struct trace_remote_iterator *iter)
 {
+	struct remote_event *evt;
 	unsigned long usecs_rem;
 	u64 ts = iter->ts;
 
@@ -548,6 +563,12 @@ static int trace_remote_iter_print_event(struct trace_remote_iterator *iter)
 	trace_seq_printf(&iter->seq, "[%03d]\t%5llu.%06lu: ", iter->evt_cpu,
 			 ts, usecs_rem);
 
+	evt = trace_remote_find_event(iter->remote, iter->evt->id);
+	if (!evt)
+		trace_seq_printf(&iter->seq, "UNKNOWN id=%d\n", iter->evt->id);
+	else
+		evt->print(iter->evt, &iter->seq);
+
 	return trace_seq_has_overflowed(&iter->seq) ? -EOVERFLOW : 0;
 }
 
@@ -829,6 +850,8 @@ static int trace_remote_init_tracefs(const char *name, struct trace_remote *remo
 			goto err;
 	}
 
+	remote->dentry = remote_d;
+
 	return 0;
 
 err:
@@ -842,6 +865,9 @@ err:
 	return -ENOMEM;
 }
 
+static int trace_remote_register_events(const char *remote_name, struct trace_remote *remote,
+					struct remote_event *events, size_t nr_events);
+
 /**
  * trace_remote_register() - Register a Tracefs remote
  * @name:	Name of the remote, used for the Tracefs remotes/ directory.
@@ -860,7 +886,8 @@ err:
  *
  * Return: 0 on success, negative error code on failure.
  */
-int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv)
+int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv,
+			  struct remote_event *events, size_t nr_events)
 {
 	struct trace_remote *remote;
 	int ret;
@@ -881,6 +908,13 @@ int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs,
 		return -ENOMEM;
 	}
 
+	ret = trace_remote_register_events(name, remote, events, nr_events);
+	if (ret) {
+		pr_err("Failed to register events for trace remote '%s' (%d)\n",
+		       name, ret);
+		return ret;
+	}
+
 	ret = cbs->init ? cbs->init(remote->dentry, priv) : 0;
 	if (ret)
 		pr_err("Init failed for trace remote '%s' (%d)\n", name, ret);
@@ -976,3 +1010,223 @@ err:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(trace_remote_alloc_buffer);
+
+static int
+trace_remote_enable_event(struct trace_remote *remote, struct remote_event *evt, bool enable)
+{
+	int ret;
+
+	lockdep_assert_held(&remote->lock);
+
+	if (evt->enabled == enable)
+		return 0;
+
+	ret = remote->cbs->enable_event(evt->id, enable, remote->priv);
+	if (ret)
+		return ret;
+
+	evt->enabled = enable;
+
+	return 0;
+}
+
+static int remote_event_enable_show(struct seq_file *s, void *unused)
+{
+	struct remote_event *evt = s->private;
+
+	seq_printf(s, "%d\n", evt->enabled);
+
+	return 0;
+}
+
+static ssize_t remote_event_enable_write(struct file *filp, const char __user *ubuf,
+					 size_t count, loff_t *ppos)
+{
+	struct seq_file *seq = filp->private_data;
+	struct remote_event *evt = seq->private;
+	struct trace_remote *remote = evt->remote;
+	u8 enable;
+	int ret;
+
+	ret = kstrtou8_from_user(ubuf, count, 10, &enable);
+	if (ret)
+		return ret;
+
+	guard(mutex)(&remote->lock);
+
+	ret = trace_remote_enable_event(remote, evt, enable);
+	if (ret)
+		return ret;
+
+	return count;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(remote_event_enable);
+
+static int remote_event_id_show(struct seq_file *s, void *unused)
+{
+	struct remote_event *evt = s->private;
+
+	seq_printf(s, "%d\n", evt->id);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(remote_event_id);
+
+static int remote_event_format_show(struct seq_file *s, void *unused)
+{
+	size_t offset = sizeof(struct remote_event_hdr);
+	struct remote_event *evt = s->private;
+	struct trace_event_fields *field;
+
+	seq_printf(s, "name: %s\n", evt->name);
+	seq_printf(s, "ID: %d\n", evt->id);
+	seq_puts(s,
+		 "format:\n\tfield:unsigned short common_type;\toffset:0;\tsize:2;\tsigned:0;\n\n");
+
+	field = &evt->fields[0];
+	while (field->name) {
+		seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%u;\tsigned:%d;\n",
+			   field->type, field->name, offset, field->size,
+			   field->is_signed);
+		offset += field->size;
+		field++;
+	}
+
+	if (field != &evt->fields[0])
+		seq_puts(s, "\n");
+
+	seq_printf(s, "print fmt: %s\n", evt->print_fmt);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(remote_event_format);
+
+static int remote_event_callback(const char *name, umode_t *mode, void **data,
+				 const struct file_operations **fops)
+{
+	if (!strcmp(name, "enable")) {
+		*mode = TRACEFS_MODE_WRITE;
+		*fops = &remote_event_enable_fops;
+		return 1;
+	}
+
+	if (!strcmp(name, "id")) {
+		*mode = TRACEFS_MODE_READ;
+		*fops = &remote_event_id_fops;
+		return 1;
+	}
+
+	if (!strcmp(name, "format")) {
+		*mode = TRACEFS_MODE_READ;
+		*fops = &remote_event_format_fops;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int trace_remote_init_eventfs(const char *remote_name, struct trace_remote *remote,
+				     struct remote_event *evt)
+{
+	struct eventfs_inode *eventfs = remote->eventfs;
+	static struct eventfs_entry entries[] = {
+		{
+			.name		= "enable",
+			.callback	= remote_event_callback,
+		}, {
+			.name		= "id",
+			.callback	= remote_event_callback,
+		}, {
+			.name		= "format",
+			.callback	= remote_event_callback,
+		}
+	};
+	bool eventfs_create = false;
+
+	if (!eventfs) {
+		eventfs = eventfs_create_events_dir("events", remote->dentry, NULL, 0, NULL);
+		if (IS_ERR(eventfs))
+			return PTR_ERR(eventfs);
+
+		/*
+		 * Create similar hierarchy as local events even if a single system is supported at
+		 * the moment
+		 */
+		eventfs = eventfs_create_dir(remote_name, eventfs, NULL, 0, NULL);
+		if (IS_ERR(eventfs))
+			return PTR_ERR(eventfs);
+
+		remote->eventfs = eventfs;
+		eventfs_create = true;
+	}
+
+	eventfs = eventfs_create_dir(evt->name, eventfs, entries, ARRAY_SIZE(entries), evt);
+	if (IS_ERR(eventfs)) {
+		if (eventfs_create) {
+			eventfs_remove_events_dir(remote->eventfs);
+			remote->eventfs = NULL;
+		}
+		return PTR_ERR(eventfs);
+	}
+
+	return 0;
+}
+
+static int trace_remote_attach_events(struct trace_remote *remote, struct remote_event *events,
+				      size_t nr_events)
+{
+	int i;
+
+	for (i = 0; i < nr_events; i++) {
+		struct remote_event *evt = &events[i];
+
+		if (evt->remote)
+			return -EEXIST;
+
+		evt->remote = remote;
+
+		/* We need events to be sorted for efficient lookup */
+		if (i && evt->id <= events[i - 1].id)
+			return -EINVAL;
+	}
+
+	remote->events = events;
+	remote->nr_events = nr_events;
+
+	return 0;
+}
+
+static int trace_remote_register_events(const char *remote_name, struct trace_remote *remote,
+					struct remote_event *events, size_t nr_events)
+{
+	int i, ret;
+
+	ret = trace_remote_attach_events(remote, events, nr_events);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < nr_events; i++) {
+		struct remote_event *evt = &events[i];
+
+		ret = trace_remote_init_eventfs(remote_name, remote, evt);
+		if (ret)
+			pr_warn("Failed to init eventfs for event '%s' (%d)",
+				evt->name, ret);
+	}
+
+	return 0;
+}
+
+static int __cmp_events(const void *key, const void *data)
+{
+	const struct remote_event *evt = data;
+	int id = (int)((long)key);
+
+	return id - (int)evt->id;
+}
+
+static struct remote_event *trace_remote_find_event(struct trace_remote *remote, unsigned short id)
+{
+	return bsearch((const void *)(unsigned long)id, remote->events, remote->nr_events,
+		       sizeof(*remote->events), __cmp_events);
+}
-- 
cgit v1.2.3


From 5f3efd1dcebc35d44cce39630ae00980a45d9247 Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vdonnefort@google.com>
Date: Mon, 9 Mar 2026 16:24:57 +0000
Subject: tracing: Add helpers to create trace remote events

Declaring remote events can be cumbersome let's add a set of macros to
simplify developers life. The declaration of a remote event is very
similar to kernel's events:

 REMOTE_EVENT(name, id,
     RE_STRUCT(
        re_field(u64 foo)
     ),
     RE_PRINTK("foo=%llu", __entry->foo)
 )

Link: https://patch.msgid.link/20260309162516.2623589-12-vdonnefort@google.com
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_remote_event.h   | 10 +++++
 include/trace/define_remote_events.h | 73 ++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)
 create mode 100644 include/trace/define_remote_events.h

(limited to 'include/linux')

diff --git a/include/linux/trace_remote_event.h b/include/linux/trace_remote_event.h
index a4449008a075..c8ae1e1f5e72 100644
--- a/include/linux/trace_remote_event.h
+++ b/include/linux/trace_remote_event.h
@@ -5,6 +5,7 @@
 
 struct trace_remote;
 struct trace_event_fields;
+struct trace_seq;
 
 struct remote_event_hdr {
 	unsigned short	id;
@@ -20,4 +21,13 @@ struct remote_event {
 	char				*print_fmt;
 	void				(*print)(void *evt, struct trace_seq *seq);
 };
+
+#define RE_STRUCT(__args...) __args
+#define re_field(__type, __field) __type __field;
+
+#define REMOTE_EVENT_FORMAT(__name, __struct)	\
+	struct remote_event_format_##__name {	\
+		struct remote_event_hdr hdr;	\
+		__struct			\
+	}
 #endif
diff --git a/include/trace/define_remote_events.h b/include/trace/define_remote_events.h
new file mode 100644
index 000000000000..676e803dc144
--- /dev/null
+++ b/include/trace/define_remote_events.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/trace_events.h>
+#include <linux/trace_remote_event.h>
+#include <linux/trace_seq.h>
+#include <linux/stringify.h>
+
+#define REMOTE_EVENT_INCLUDE(__file) __stringify(../../__file)
+
+#ifdef REMOTE_EVENT_SECTION
+# define __REMOTE_EVENT_SECTION(__name) __used __section(REMOTE_EVENT_SECTION"."#__name)
+#else
+# define __REMOTE_EVENT_SECTION(__name)
+#endif
+
+#define REMOTE_PRINTK_COUNT_ARGS(__args...) \
+	__COUNT_ARGS(, ##__args, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0)
+
+#define __remote_printk0()								\
+	trace_seq_putc(seq, '\n')
+
+#define __remote_printk1(__fmt)								\
+	trace_seq_puts(seq, " " __fmt "\n")						\
+
+#define __remote_printk2(__fmt, __args...)						\
+do {											\
+	trace_seq_putc(seq, ' ');							\
+	trace_seq_printf(seq, __fmt, __args);						\
+	trace_seq_putc(seq, '\n');							\
+} while (0)
+
+/* Apply the appropriate trace_seq sequence according to the number of arguments */
+#define remote_printk(__args...)							\
+	CONCATENATE(__remote_printk, REMOTE_PRINTK_COUNT_ARGS(__args))(__args)
+
+#define RE_PRINTK(__args...) __args
+
+#define REMOTE_EVENT(__name, __id, __struct, __printk)					\
+	REMOTE_EVENT_FORMAT(__name, __struct);						\
+	static void remote_event_print_##__name(void *evt, struct trace_seq *seq)	\
+	{										\
+		struct remote_event_format_##__name __maybe_unused *__entry = evt;	\
+		trace_seq_puts(seq, #__name);						\
+		remote_printk(__printk);						\
+	}
+#include REMOTE_EVENT_INCLUDE(REMOTE_EVENT_INCLUDE_FILE)
+
+#undef REMOTE_EVENT
+#undef RE_PRINTK
+#undef re_field
+#define re_field(__type, __field)							\
+	{										\
+		.type = #__type, .name = #__field,					\
+		.size = sizeof(__type), .align = __alignof__(__type),			\
+		.is_signed = is_signed_type(__type),					\
+	},
+#define __entry REC
+#define RE_PRINTK(__fmt, __args...) "\"" __fmt "\", " __stringify(__args)
+#define REMOTE_EVENT(__name, __id, __struct, __printk)					\
+	static struct trace_event_fields remote_event_fields_##__name[] = {		\
+		__struct								\
+		{}									\
+	};										\
+	static char remote_event_print_fmt_##__name[] = __printk;			\
+	static struct remote_event __REMOTE_EVENT_SECTION(__name)			\
+	remote_event_##__name = {							\
+		.name		= #__name,						\
+		.id		= __id,							\
+		.fields		= remote_event_fields_##__name,				\
+		.print_fmt	= remote_event_print_fmt_##__name,			\
+		.print		= remote_event_print_##__name,				\
+	}
+#include REMOTE_EVENT_INCLUDE(REMOTE_EVENT_INCLUDE_FILE)
-- 
cgit v1.2.3


From 93ae1b76fff9e745f870a2f2cd32f472328c4a8f Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vdonnefort@google.com>
Date: Mon, 9 Mar 2026 16:24:58 +0000
Subject: ring-buffer: Export buffer_data_page and macros

In preparation for allowing the writing of ring-buffer compliant pages
outside of ring_buffer.c, move buffer_data_page and timestamps encoding
macros into the publicly available ring_buffer_types.h.

Link: https://patch.msgid.link/20260309162516.2623589-13-vdonnefort@google.com
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ring_buffer_types.h | 41 +++++++++++++++++++++++++++++++++++++++
 kernel/trace/ring_buffer.c        | 36 +---------------------------------
 2 files changed, 42 insertions(+), 35 deletions(-)
 create mode 100644 include/linux/ring_buffer_types.h

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer_types.h b/include/linux/ring_buffer_types.h
new file mode 100644
index 000000000000..54577021a49d
--- /dev/null
+++ b/include/linux/ring_buffer_types.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_RING_BUFFER_TYPES_H
+#define _LINUX_RING_BUFFER_TYPES_H
+
+#include <asm/local.h>
+
+#define TS_SHIFT        27
+#define TS_MASK         ((1ULL << TS_SHIFT) - 1)
+#define TS_DELTA_TEST   (~TS_MASK)
+
+/*
+ * We need to fit the time_stamp delta into 27 bits.
+ */
+static inline bool test_time_stamp(u64 delta)
+{
+	return !!(delta & TS_DELTA_TEST);
+}
+
+#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
+
+#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
+#define RB_ALIGNMENT		4U
+#define RB_MAX_SMALL_DATA	(RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
+#define RB_EVNT_MIN_SIZE	8U	/* two 32bit words */
+
+#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
+# define RB_FORCE_8BYTE_ALIGNMENT	0
+# define RB_ARCH_ALIGNMENT		RB_ALIGNMENT
+#else
+# define RB_FORCE_8BYTE_ALIGNMENT	1
+# define RB_ARCH_ALIGNMENT		8U
+#endif
+
+#define RB_ALIGN_DATA		__aligned(RB_ARCH_ALIGNMENT)
+
+struct buffer_data_page {
+	u64		 time_stamp;	/* page time stamp */
+	local_t		 commit;	/* write committed index */
+	unsigned char	 data[] RB_ALIGN_DATA;	/* data of buffer page */
+};
+#endif
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 605142e06863..96e0d80d492b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,6 +4,7 @@
  *
  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
  */
+#include <linux/ring_buffer_types.h>
 #include <linux/sched/isolation.h>
 #include <linux/trace_recursion.h>
 #include <linux/trace_events.h>
@@ -157,23 +158,6 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
 /* Used for individual buffers (after the counter) */
 #define RB_BUFFER_OFF		(1 << 20)
 
-#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
-
-#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
-#define RB_ALIGNMENT		4U
-#define RB_MAX_SMALL_DATA	(RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
-#define RB_EVNT_MIN_SIZE	8U	/* two 32bit words */
-
-#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
-# define RB_FORCE_8BYTE_ALIGNMENT	0
-# define RB_ARCH_ALIGNMENT		RB_ALIGNMENT
-#else
-# define RB_FORCE_8BYTE_ALIGNMENT	1
-# define RB_ARCH_ALIGNMENT		8U
-#endif
-
-#define RB_ALIGN_DATA		__aligned(RB_ARCH_ALIGNMENT)
-
 /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
 #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
 
@@ -316,10 +300,6 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 #define for_each_online_buffer_cpu(buffer, cpu)		\
 	for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask)
 
-#define TS_SHIFT	27
-#define TS_MASK		((1ULL << TS_SHIFT) - 1)
-#define TS_DELTA_TEST	(~TS_MASK)
-
 static u64 rb_event_time_stamp(struct ring_buffer_event *event)
 {
 	u64 ts;
@@ -338,12 +318,6 @@ static u64 rb_event_time_stamp(struct ring_buffer_event *event)
 
 #define RB_MISSED_MASK		(3 << 30)
 
-struct buffer_data_page {
-	u64		 time_stamp;	/* page time stamp */
-	local_t		 commit;	/* write committed index */
-	unsigned char	 data[] RB_ALIGN_DATA;	/* data of buffer page */
-};
-
 struct buffer_data_read_page {
 	unsigned		order;	/* order of the page */
 	struct buffer_data_page	*data;	/* actual data, stored in this page */
@@ -437,14 +411,6 @@ static struct buffer_data_page *alloc_cpu_data(int cpu, int order)
 	return dpage;
 }
 
-/*
- * We need to fit the time_stamp delta into 27 bits.
- */
-static inline bool test_time_stamp(u64 delta)
-{
-	return !!(delta & TS_DELTA_TEST);
-}
-
 struct rb_irq_work {
 	struct irq_work			work;
 	wait_queue_head_t		waiters;
-- 
cgit v1.2.3


From 34e5b958bdad0f9cf16306368bbc2dc5b2a50143 Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vdonnefort@google.com>
Date: Mon, 9 Mar 2026 16:24:59 +0000
Subject: tracing: Introduce simple_ring_buffer

Add a simple implementation of the kernel ring-buffer. This intends to
be used later by ring-buffer remotes such as the pKVM hypervisor, hence
the need for a cut down version (write only) without any dependency.

Link: https://patch.msgid.link/20260309162516.2623589-14-vdonnefort@google.com
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/simple_ring_buffer.h |  57 +++++
 kernel/trace/Kconfig               |   3 +
 kernel/trace/Makefile              |   1 +
 kernel/trace/simple_ring_buffer.c  | 464 +++++++++++++++++++++++++++++++++++++
 4 files changed, 525 insertions(+)
 create mode 100644 include/linux/simple_ring_buffer.h
 create mode 100644 kernel/trace/simple_ring_buffer.c

(limited to 'include/linux')

diff --git a/include/linux/simple_ring_buffer.h b/include/linux/simple_ring_buffer.h
new file mode 100644
index 000000000000..2c4c0ae336bc
--- /dev/null
+++ b/include/linux/simple_ring_buffer.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SIMPLE_RING_BUFFER_H
+#define _LINUX_SIMPLE_RING_BUFFER_H
+
+#include <linux/list.h>
+#include <linux/ring_buffer.h>
+#include <linux/ring_buffer_types.h>
+#include <linux/types.h>
+
+/*
+ * Ideally those struct would stay private but the caller needs to know
+ * the allocation size for simple_ring_buffer_init().
+ */
+struct simple_buffer_page {
+	struct list_head	link;
+	struct buffer_data_page	*page;
+	u64			entries;
+	u32			write;
+	u32			id;
+};
+
+struct simple_rb_per_cpu {
+	struct simple_buffer_page	*tail_page;
+	struct simple_buffer_page	*reader_page;
+	struct simple_buffer_page	*head_page;
+	struct simple_buffer_page	*bpages;
+	struct trace_buffer_meta	*meta;
+	u32				nr_pages;
+
+#define SIMPLE_RB_UNAVAILABLE	0
+#define SIMPLE_RB_READY		1
+#define SIMPLE_RB_WRITING	2
+	u32				status;
+
+	u64				last_overrun;
+	u64				write_stamp;
+
+	struct simple_rb_cbs		*cbs;
+};
+
+int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages,
+			    const struct ring_buffer_desc *desc);
+
+void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer);
+
+void *simple_ring_buffer_reserve(struct simple_rb_per_cpu *cpu_buffer, unsigned long length,
+				 u64 timestamp);
+
+void simple_ring_buffer_commit(struct simple_rb_per_cpu *cpu_buffer);
+
+int simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable);
+
+int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer);
+
+int simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu *cpu_buffer);
+
+#endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 384dd36c8e29..edbdd7b38f61 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1284,4 +1284,7 @@ source "kernel/trace/rv/Kconfig"
 config TRACE_REMOTE
 	bool
 
+config SIMPLE_RING_BUFFER
+	bool
+
 endif # FTRACE
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 318923ce39f5..2e39b09398b3 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -129,4 +129,5 @@ obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
 obj-$(CONFIG_RV) += rv/
 
 obj-$(CONFIG_TRACE_REMOTE) += trace_remote.o
+obj-$(CONFIG_SIMPLE_RING_BUFFER) += simple_ring_buffer.o
 libftrace-y := ftrace.o
diff --git a/kernel/trace/simple_ring_buffer.c b/kernel/trace/simple_ring_buffer.c
new file mode 100644
index 000000000000..15df9781411b
--- /dev/null
+++ b/kernel/trace/simple_ring_buffer.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 - Google LLC
+ * Author: Vincent Donnefort <vdonnefort@google.com>
+ */
+
+#include <linux/atomic.h>
+#include <linux/simple_ring_buffer.h>
+
+#include <asm/barrier.h>
+#include <asm/local.h>
+
+enum simple_rb_link_type {
+	SIMPLE_RB_LINK_NORMAL		= 0,
+	SIMPLE_RB_LINK_HEAD		= 1,
+	SIMPLE_RB_LINK_HEAD_MOVING
+};
+
+#define SIMPLE_RB_LINK_MASK ~(SIMPLE_RB_LINK_HEAD | SIMPLE_RB_LINK_HEAD_MOVING)
+
+static void simple_bpage_set_head_link(struct simple_buffer_page *bpage)
+{
+	unsigned long link = (unsigned long)bpage->link.next;
+
+	link &= SIMPLE_RB_LINK_MASK;
+	link |= SIMPLE_RB_LINK_HEAD;
+
+	/*
+	 * Paired with simple_rb_find_head() to order access between the head
+	 * link and overrun. It ensures we always report an up-to-date value
+	 * after swapping the reader page.
+	 */
+	smp_store_release(&bpage->link.next, (struct list_head *)link);
+}
+
+static bool simple_bpage_unset_head_link(struct simple_buffer_page *bpage,
+					 struct simple_buffer_page *dst,
+					 enum simple_rb_link_type new_type)
+{
+	unsigned long *link = (unsigned long *)(&bpage->link.next);
+	unsigned long old = (*link & SIMPLE_RB_LINK_MASK) | SIMPLE_RB_LINK_HEAD;
+	unsigned long new = (unsigned long)(&dst->link) | new_type;
+
+	return try_cmpxchg(link, &old, new);
+}
+
+static void simple_bpage_set_normal_link(struct simple_buffer_page *bpage)
+{
+	unsigned long link = (unsigned long)bpage->link.next;
+
+	WRITE_ONCE(bpage->link.next, (struct list_head *)(link & SIMPLE_RB_LINK_MASK));
+}
+
+static struct simple_buffer_page *simple_bpage_from_link(struct list_head *link)
+{
+	unsigned long ptr = (unsigned long)link & SIMPLE_RB_LINK_MASK;
+
+	return container_of((struct list_head *)ptr, struct simple_buffer_page, link);
+}
+
+static struct simple_buffer_page *simple_bpage_next_page(struct simple_buffer_page *bpage)
+{
+	return simple_bpage_from_link(bpage->link.next);
+}
+
+static void simple_bpage_reset(struct simple_buffer_page *bpage)
+{
+	bpage->write = 0;
+	bpage->entries = 0;
+
+	local_set(&bpage->page->commit, 0);
+}
+
+static void simple_bpage_init(struct simple_buffer_page *bpage, unsigned long page)
+{
+	INIT_LIST_HEAD(&bpage->link);
+	bpage->page = (struct buffer_data_page *)page;
+
+	simple_bpage_reset(bpage);
+}
+
+#define simple_rb_meta_inc(__meta, __inc)		\
+	WRITE_ONCE((__meta), (__meta + __inc))
+
+static bool simple_rb_loaded(struct simple_rb_per_cpu *cpu_buffer)
+{
+	return !!cpu_buffer->bpages;
+}
+
+static int simple_rb_find_head(struct simple_rb_per_cpu *cpu_buffer)
+{
+	int retry = cpu_buffer->nr_pages * 2;
+	struct simple_buffer_page *head;
+
+	head = cpu_buffer->head_page;
+
+	while (retry--) {
+		unsigned long link;
+
+spin:
+		/* See smp_store_release in simple_bpage_set_head_link() */
+		link = (unsigned long)smp_load_acquire(&head->link.prev->next);
+
+		switch (link & ~SIMPLE_RB_LINK_MASK) {
+		/* Found the head */
+		case SIMPLE_RB_LINK_HEAD:
+			cpu_buffer->head_page = head;
+			return 0;
+		/* The writer caught the head, we can spin, that won't be long */
+		case SIMPLE_RB_LINK_HEAD_MOVING:
+			goto spin;
+		}
+
+		head = simple_bpage_next_page(head);
+	}
+
+	return -EBUSY;
+}
+
+/**
+ * simple_ring_buffer_swap_reader_page - Swap ring-buffer head with the reader
+ * @cpu_buffer: A simple_rb_per_cpu
+ *
+ * This function enables consuming reading. It ensures the current head page will not be overwritten
+ * and can be safely read.
+ *
+ * Returns 0 on success, -ENODEV if @cpu_buffer was unloaded or -EBUSY if we failed to catch the
+ * head page.
+ */
+int simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu *cpu_buffer)
+{
+	struct simple_buffer_page *last, *head, *reader;
+	unsigned long overrun;
+	int retry = 8;
+	int ret;
+
+	if (!simple_rb_loaded(cpu_buffer))
+		return -ENODEV;
+
+	reader = cpu_buffer->reader_page;
+
+	do {
+		/* Run after the writer to find the head */
+		ret = simple_rb_find_head(cpu_buffer);
+		if (ret)
+			return ret;
+
+		head = cpu_buffer->head_page;
+
+		/* Connect the reader page around the header page */
+		reader->link.next = head->link.next;
+		reader->link.prev = head->link.prev;
+
+		/* The last page before the head */
+		last = simple_bpage_from_link(head->link.prev);
+
+		/* The reader page points to the new header page */
+		simple_bpage_set_head_link(reader);
+
+		overrun = cpu_buffer->meta->overrun;
+	} while (!simple_bpage_unset_head_link(last, reader, SIMPLE_RB_LINK_NORMAL) && retry--);
+
+	if (!retry)
+		return -EINVAL;
+
+	cpu_buffer->head_page = simple_bpage_from_link(reader->link.next);
+	cpu_buffer->head_page->link.prev = &reader->link;
+	cpu_buffer->reader_page = head;
+	cpu_buffer->meta->reader.lost_events = overrun - cpu_buffer->last_overrun;
+	cpu_buffer->meta->reader.id = cpu_buffer->reader_page->id;
+	cpu_buffer->last_overrun = overrun;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(simple_ring_buffer_swap_reader_page);
+
+static struct simple_buffer_page *simple_rb_move_tail(struct simple_rb_per_cpu *cpu_buffer)
+{
+	struct simple_buffer_page *tail, *new_tail;
+
+	tail = cpu_buffer->tail_page;
+	new_tail = simple_bpage_next_page(tail);
+
+	if (simple_bpage_unset_head_link(tail, new_tail, SIMPLE_RB_LINK_HEAD_MOVING)) {
+		/*
+		 * Oh no! we've caught the head. There is none anymore and
+		 * swap_reader will spin until we set the new one. Overrun must
+		 * be written first, to make sure we report the correct number
+		 * of lost events.
+		 */
+		simple_rb_meta_inc(cpu_buffer->meta->overrun, new_tail->entries);
+		simple_rb_meta_inc(cpu_buffer->meta->pages_lost, 1);
+
+		simple_bpage_set_head_link(new_tail);
+		simple_bpage_set_normal_link(tail);
+	}
+
+	simple_bpage_reset(new_tail);
+	cpu_buffer->tail_page = new_tail;
+
+	simple_rb_meta_inc(cpu_buffer->meta->pages_touched, 1);
+
+	return new_tail;
+}
+
+static unsigned long rb_event_size(unsigned long length)
+{
+	struct ring_buffer_event *event;
+
+	return length + RB_EVNT_HDR_SIZE + sizeof(event->array[0]);
+}
+
+static struct ring_buffer_event *
+rb_event_add_ts_extend(struct ring_buffer_event *event, u64 delta)
+{
+	event->type_len = RINGBUF_TYPE_TIME_EXTEND;
+	event->time_delta = delta & TS_MASK;
+	event->array[0] = delta >> TS_SHIFT;
+
+	return (struct ring_buffer_event *)((unsigned long)event + 8);
+}
+
+static struct ring_buffer_event *
+simple_rb_reserve_next(struct simple_rb_per_cpu *cpu_buffer, unsigned long length, u64 timestamp)
+{
+	unsigned long ts_ext_size = 0, event_size = rb_event_size(length);
+	struct simple_buffer_page *tail = cpu_buffer->tail_page;
+	struct ring_buffer_event *event;
+	u32 write, prev_write;
+	u64 time_delta;
+
+	time_delta = timestamp - cpu_buffer->write_stamp;
+
+	if (test_time_stamp(time_delta))
+		ts_ext_size = 8;
+
+	prev_write = tail->write;
+	write = prev_write + event_size + ts_ext_size;
+
+	if (unlikely(write > (PAGE_SIZE - BUF_PAGE_HDR_SIZE)))
+		tail = simple_rb_move_tail(cpu_buffer);
+
+	if (!tail->entries) {
+		tail->page->time_stamp = timestamp;
+		time_delta = 0;
+		ts_ext_size = 0;
+		write = event_size;
+		prev_write = 0;
+	}
+
+	tail->write = write;
+	tail->entries++;
+
+	cpu_buffer->write_stamp = timestamp;
+
+	event = (struct ring_buffer_event *)(tail->page->data + prev_write);
+	if (ts_ext_size) {
+		event = rb_event_add_ts_extend(event, time_delta);
+		time_delta = 0;
+	}
+
+	event->type_len = 0;
+	event->time_delta = time_delta;
+	event->array[0] = event_size - RB_EVNT_HDR_SIZE;
+
+	return event;
+}
+
+/**
+ * simple_ring_buffer_reserve - Reserve an entry in @cpu_buffer
+ * @cpu_buffer:	A simple_rb_per_cpu
+ * @length:	Size of the entry in bytes
+ * @timestamp:	Timestamp of the entry
+ *
+ * Returns the address of the entry where to write data or NULL
+ */
+void *simple_ring_buffer_reserve(struct simple_rb_per_cpu *cpu_buffer, unsigned long length,
+				 u64 timestamp)
+{
+	struct ring_buffer_event *rb_event;
+
+	if (cmpxchg(&cpu_buffer->status, SIMPLE_RB_READY, SIMPLE_RB_WRITING) != SIMPLE_RB_READY)
+		return NULL;
+
+	rb_event = simple_rb_reserve_next(cpu_buffer, length, timestamp);
+
+	return &rb_event->array[1];
+}
+EXPORT_SYMBOL_GPL(simple_ring_buffer_reserve);
+
+/**
+ * simple_ring_buffer_commit - Commit the entry reserved with simple_ring_buffer_reserve()
+ * @cpu_buffer:	The simple_rb_per_cpu where the entry has been reserved
+ */
+void simple_ring_buffer_commit(struct simple_rb_per_cpu *cpu_buffer)
+{
+	local_set(&cpu_buffer->tail_page->page->commit,
+		  cpu_buffer->tail_page->write);
+	simple_rb_meta_inc(cpu_buffer->meta->entries, 1);
+
+	/*
+	 * Paired with simple_rb_enable_tracing() to ensure data is
+	 * written to the ring-buffer before teardown.
+	 */
+	smp_store_release(&cpu_buffer->status, SIMPLE_RB_READY);
+}
+EXPORT_SYMBOL_GPL(simple_ring_buffer_commit);
+
+static u32 simple_rb_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable)
+{
+	u32 prev_status;
+
+	if (enable)
+		return cmpxchg(&cpu_buffer->status, SIMPLE_RB_UNAVAILABLE, SIMPLE_RB_READY);
+
+	/* Wait for the buffer to be released */
+	do {
+		prev_status = cmpxchg_acquire(&cpu_buffer->status,
+					      SIMPLE_RB_READY,
+					      SIMPLE_RB_UNAVAILABLE);
+	} while (prev_status == SIMPLE_RB_WRITING);
+
+	return prev_status;
+}
+
+/**
+ * simple_ring_buffer_reset - Reset @cpu_buffer
+ * @cpu_buffer: A simple_rb_per_cpu
+ *
+ * This will not clear the content of the data, only reset counters and pointers
+ *
+ * Returns 0 on success or -ENODEV if @cpu_buffer was unloaded.
+ */
+int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer)
+{
+	struct simple_buffer_page *bpage;
+	u32 prev_status;
+	int ret;
+
+	if (!simple_rb_loaded(cpu_buffer))
+		return -ENODEV;
+
+	prev_status = simple_rb_enable_tracing(cpu_buffer, false);
+
+	ret = simple_rb_find_head(cpu_buffer);
+	if (ret)
+		return ret;
+
+	bpage = cpu_buffer->tail_page = cpu_buffer->head_page;
+	do {
+		simple_bpage_reset(bpage);
+		bpage = simple_bpage_next_page(bpage);
+	} while (bpage != cpu_buffer->head_page);
+
+	simple_bpage_reset(cpu_buffer->reader_page);
+
+	cpu_buffer->last_overrun = 0;
+	cpu_buffer->write_stamp = 0;
+
+	cpu_buffer->meta->reader.read = 0;
+	cpu_buffer->meta->reader.lost_events = 0;
+	cpu_buffer->meta->entries = 0;
+	cpu_buffer->meta->overrun = 0;
+	cpu_buffer->meta->read = 0;
+	cpu_buffer->meta->pages_lost = 0;
+	cpu_buffer->meta->pages_touched = 0;
+
+	if (prev_status == SIMPLE_RB_READY)
+		simple_rb_enable_tracing(cpu_buffer, true);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(simple_ring_buffer_reset);
+
+/**
+ * simple_ring_buffer_init - Init @cpu_buffer based on @desc
+ * @cpu_buffer:	A simple_rb_per_cpu buffer to init, allocated by the caller.
+ * @bpages:	Array of simple_buffer_pages, with as many elements as @desc->nr_page_va
+ * @desc:	A ring_buffer_desc
+ *
+ * Returns 0 on success or -EINVAL if the content of @desc is invalid
+ */
+int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages,
+			    const struct ring_buffer_desc *desc)
+{
+	struct simple_buffer_page *bpage = bpages;
+	int i;
+
+	/* At least 1 reader page and two pages in the ring-buffer */
+	if (desc->nr_page_va < 3)
+		return -EINVAL;
+
+	memset(cpu_buffer, 0, sizeof(*cpu_buffer));
+
+	cpu_buffer->bpages = bpages;
+
+	cpu_buffer->meta = (void *)desc->meta_va;
+	memset(cpu_buffer->meta, 0, sizeof(*cpu_buffer->meta));
+	cpu_buffer->meta->meta_page_size = PAGE_SIZE;
+	cpu_buffer->meta->nr_subbufs = cpu_buffer->nr_pages;
+
+	/* The reader page is not part of the ring initially */
+	simple_bpage_init(bpage, desc->page_va[0]);
+	bpage->id = 0;
+
+	cpu_buffer->nr_pages = 1;
+
+	cpu_buffer->reader_page = bpage;
+	cpu_buffer->tail_page = bpage + 1;
+	cpu_buffer->head_page = bpage + 1;
+
+	for (i = 1; i < desc->nr_page_va; i++) {
+		simple_bpage_init(++bpage, desc->page_va[i]);
+
+		bpage->link.next = &(bpage + 1)->link;
+		bpage->link.prev = &(bpage - 1)->link;
+		bpage->id = i;
+
+		cpu_buffer->nr_pages = i + 1;
+	}
+
+	/* Close the ring */
+	bpage->link.next = &cpu_buffer->tail_page->link;
+	cpu_buffer->tail_page->link.prev = &bpage->link;
+
+	/* The last init'ed page points to the head page */
+	simple_bpage_set_head_link(bpage);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(simple_ring_buffer_init);
+
+/**
+ * simple_ring_buffer_unload - Prepare @cpu_buffer for deletion
+ * @cpu_buffer:	A simple_rb_per_cpu that will be deleted.
+ */
+void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer)
+{
+	if (!simple_rb_loaded(cpu_buffer))
+		return;
+
+	simple_rb_enable_tracing(cpu_buffer, false);
+
+	cpu_buffer->bpages = NULL;
+}
+EXPORT_SYMBOL_GPL(simple_ring_buffer_unload);
+
+/**
+ * simple_ring_buffer_enable_tracing - Enable or disable writing to @cpu_buffer
+ * @cpu_buffer: A simple_rb_per_cpu
+ * @enable:	True to enable tracing, False to disable it
+ *
+ * Returns 0 on success or -ENODEV if @cpu_buffer was unloaded
+ */
+int simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable)
+{
+	if (!simple_rb_loaded(cpu_buffer))
+		return -ENODEV;
+
+	simple_rb_enable_tracing(cpu_buffer, enable);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(simple_ring_buffer_enable_tracing);
-- 
cgit v1.2.3


From 635923081c792c830fb87e680d6dd5f348926b3f Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vdonnefort@google.com>
Date: Mon, 9 Mar 2026 16:25:03 +0000
Subject: tracing: load/unload page callbacks for simple_ring_buffer

Add load/unload callback used for each admitted page in the ring-buffer.
This will be later useful for the pKVM hypervisor which uses a different
VA space and need to dynamically map/unmap the ring-buffer pages.

Link: https://patch.msgid.link/20260309162516.2623589-18-vdonnefort@google.com
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/simple_ring_buffer.h |  8 ++++
 kernel/trace/simple_ring_buffer.c  | 91 ++++++++++++++++++++++++++++++--------
 2 files changed, 80 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/simple_ring_buffer.h b/include/linux/simple_ring_buffer.h
index 2c4c0ae336bc..21aec556293e 100644
--- a/include/linux/simple_ring_buffer.h
+++ b/include/linux/simple_ring_buffer.h
@@ -54,4 +54,12 @@ int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer);
 
 int simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu *cpu_buffer);
 
+int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer,
+			       struct simple_buffer_page *bpages,
+			       const struct ring_buffer_desc *desc,
+			       void *(*load_page)(unsigned long va),
+			       void (*unload_page)(void *va));
+
+void simple_ring_buffer_unload_mm(struct simple_rb_per_cpu *cpu_buffer,
+				  void (*unload_page)(void *));
 #endif
diff --git a/kernel/trace/simple_ring_buffer.c b/kernel/trace/simple_ring_buffer.c
index 15df9781411b..02af2297ae5a 100644
--- a/kernel/trace/simple_ring_buffer.c
+++ b/kernel/trace/simple_ring_buffer.c
@@ -71,7 +71,7 @@ static void simple_bpage_reset(struct simple_buffer_page *bpage)
 	local_set(&bpage->page->commit, 0);
 }
 
-static void simple_bpage_init(struct simple_buffer_page *bpage, unsigned long page)
+static void simple_bpage_init(struct simple_buffer_page *bpage, void *page)
 {
 	INIT_LIST_HEAD(&bpage->link);
 	bpage->page = (struct buffer_data_page *)page;
@@ -372,18 +372,15 @@ int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer)
 }
 EXPORT_SYMBOL_GPL(simple_ring_buffer_reset);
 
-/**
- * simple_ring_buffer_init - Init @cpu_buffer based on @desc
- * @cpu_buffer:	A simple_rb_per_cpu buffer to init, allocated by the caller.
- * @bpages:	Array of simple_buffer_pages, with as many elements as @desc->nr_page_va
- * @desc:	A ring_buffer_desc
- *
- * Returns 0 on success or -EINVAL if the content of @desc is invalid
- */
-int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages,
-			    const struct ring_buffer_desc *desc)
+int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer,
+			       struct simple_buffer_page *bpages,
+			       const struct ring_buffer_desc *desc,
+			       void *(*load_page)(unsigned long va),
+			       void (*unload_page)(void *va))
 {
 	struct simple_buffer_page *bpage = bpages;
+	int ret = 0;
+	void *page;
 	int i;
 
 	/* At least 1 reader page and two pages in the ring-buffer */
@@ -392,15 +389,22 @@ int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_
 
 	memset(cpu_buffer, 0, sizeof(*cpu_buffer));
 
-	cpu_buffer->bpages = bpages;
+	cpu_buffer->meta = load_page(desc->meta_va);
+	if (!cpu_buffer->meta)
+		return -EINVAL;
 
-	cpu_buffer->meta = (void *)desc->meta_va;
 	memset(cpu_buffer->meta, 0, sizeof(*cpu_buffer->meta));
 	cpu_buffer->meta->meta_page_size = PAGE_SIZE;
 	cpu_buffer->meta->nr_subbufs = cpu_buffer->nr_pages;
 
 	/* The reader page is not part of the ring initially */
-	simple_bpage_init(bpage, desc->page_va[0]);
+	page = load_page(desc->page_va[0]);
+	if (!page) {
+		unload_page(cpu_buffer->meta);
+		return -EINVAL;
+	}
+
+	simple_bpage_init(bpage, page);
 	bpage->id = 0;
 
 	cpu_buffer->nr_pages = 1;
@@ -410,7 +414,13 @@ int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_
 	cpu_buffer->head_page = bpage + 1;
 
 	for (i = 1; i < desc->nr_page_va; i++) {
-		simple_bpage_init(++bpage, desc->page_va[i]);
+		page = load_page(desc->page_va[i]);
+		if (!page) {
+			ret = -EINVAL;
+			break;
+		}
+
+		simple_bpage_init(++bpage, page);
 
 		bpage->link.next = &(bpage + 1)->link;
 		bpage->link.prev = &(bpage - 1)->link;
@@ -419,6 +429,14 @@ int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_
 		cpu_buffer->nr_pages = i + 1;
 	}
 
+	if (ret) {
+		for (i--; i >= 0; i--)
+			unload_page((void *)desc->page_va[i]);
+		unload_page(cpu_buffer->meta);
+
+		return ret;
+	}
+
 	/* Close the ring */
 	bpage->link.next = &cpu_buffer->tail_page->link;
 	cpu_buffer->tail_page->link.prev = &bpage->link;
@@ -426,23 +444,58 @@ int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_
 	/* The last init'ed page points to the head page */
 	simple_bpage_set_head_link(bpage);
 
+	cpu_buffer->bpages = bpages;
+
 	return 0;
 }
-EXPORT_SYMBOL_GPL(simple_ring_buffer_init);
+
+static void *__load_page(unsigned long page)
+{
+	return (void *)page;
+}
+
+static void __unload_page(void *page) { }
 
 /**
- * simple_ring_buffer_unload - Prepare @cpu_buffer for deletion
- * @cpu_buffer:	A simple_rb_per_cpu that will be deleted.
+ * simple_ring_buffer_init - Init @cpu_buffer based on @desc
+ * @cpu_buffer:	A simple_rb_per_cpu buffer to init, allocated by the caller.
+ * @bpages:	Array of simple_buffer_pages, with as many elements as @desc->nr_page_va
+ * @desc:	A ring_buffer_desc
+ *
+ * Returns 0 on success or -EINVAL if the content of @desc is invalid
  */
-void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer)
+int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages,
+			    const struct ring_buffer_desc *desc)
+{
+	return simple_ring_buffer_init_mm(cpu_buffer, bpages, desc, __load_page, __unload_page);
+}
+EXPORT_SYMBOL_GPL(simple_ring_buffer_init);
+
+void simple_ring_buffer_unload_mm(struct simple_rb_per_cpu *cpu_buffer,
+				  void (*unload_page)(void *))
 {
+	int p;
+
 	if (!simple_rb_loaded(cpu_buffer))
 		return;
 
 	simple_rb_enable_tracing(cpu_buffer, false);
 
+	unload_page(cpu_buffer->meta);
+	for (p = 0; p < cpu_buffer->nr_pages; p++)
+		unload_page(cpu_buffer->bpages[p].page);
+
 	cpu_buffer->bpages = NULL;
 }
+
+/**
+ * simple_ring_buffer_unload - Prepare @cpu_buffer for deletion
+ * @cpu_buffer:	A simple_rb_per_cpu that will be deleted.
+ */
+void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer)
+{
+	return simple_ring_buffer_unload_mm(cpu_buffer, __unload_page);
+}
 EXPORT_SYMBOL_GPL(simple_ring_buffer_unload);
 
 /**
-- 
cgit v1.2.3


From c116737e972ea74f4468a1bd0703d623a3c0ee4a Mon Sep 17 00:00:00 2001
From: Marco Crivellari <marco.crivellari@suse.com>
Date: Mon, 9 Mar 2026 14:15:28 +0100
Subject: workqueue: Add system_dfl_long_wq for long unbound works

Currently there are users of queue_delayed_work() who specify
system_long_wq, the per-cpu workqueue. This workqueue should
be used for long per-cpu works, but queue_delayed_work()
queue the work using:

  queue_delayed_work_on(WORK_CPU_UNBOUND, ...);

This would end up calling __queue_delayed_work() that does:

	if (housekeeping_enabled(HK_TYPE_TIMER)) {
	//	[....]
	} else {
		if (likely(cpu == WORK_CPU_UNBOUND))
			add_timer_global(timer);
		else
			add_timer_on(timer, cpu);
	}

So when cpu == WORK_CPU_UNBOUND the timer is global and is
not using a specific CPU. Later, when __queue_work() is called:

	if (req_cpu == WORK_CPU_UNBOUND) {
		if (wq->flags & WQ_UNBOUND)
			cpu = wq_select_unbound_cpu(raw_smp_processor_id());
		else
			cpu = raw_smp_processor_id();
	}

Because the wq is not unbound, it takes the CPU where the timer
fired and enqueue the work on that CPU.
The consequence of all of this is that the work can run anywhere,
depending on where the timer fired.

Introduce system_dfl_long_wq in order to change, in a future step,
users that are still calling:

  queue_delayed_work(system_long_wq, ...);

with the new system_dfl_long_wq instead, so that the work may
benefit from scheduler task placement.

Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 6 ++++++
 kernel/workqueue.c        | 5 ++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index fc5744402a66..8e0855d56e74 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -440,6 +440,9 @@ enum wq_consts {
  * system_long_wq is similar to system_percpu_wq but may host long running
  * works.  Queue flushing might take relatively long.
  *
+ * system_dfl_long_wq is similar to system_dfl_wq but it may host long running
+ * works.
+ *
  * system_dfl_wq is unbound workqueue.  Workers are not bound to
  * any specific CPU, not concurrency managed, and all queued works are
  * executed immediately as long as max_active limit is not reached and
@@ -468,6 +471,7 @@ extern struct workqueue_struct *system_power_efficient_wq;
 extern struct workqueue_struct *system_freezable_power_efficient_wq;
 extern struct workqueue_struct *system_bh_wq;
 extern struct workqueue_struct *system_bh_highpri_wq;
+extern struct workqueue_struct *system_dfl_long_wq;
 
 void workqueue_softirq_action(bool highpri);
 void workqueue_softirq_dead(unsigned int cpu);
@@ -783,6 +787,8 @@ extern void __warn_flushing_systemwide_wq(void)
 	     _wq == system_highpri_wq) ||				\
 	    (__builtin_constant_p(_wq == system_long_wq) &&		\
 	     _wq == system_long_wq) ||					\
+	    (__builtin_constant_p(_wq == system_dfl_long_wq) &&		\
+	     _wq == system_dfl_long_wq) ||					\
 	    (__builtin_constant_p(_wq == system_dfl_wq) &&		\
 	     _wq == system_dfl_wq) ||				\
 	    (__builtin_constant_p(_wq == system_freezable_wq) &&	\
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2f95cb0d2f1b..2d8ff903f113 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -530,6 +530,8 @@ struct workqueue_struct *system_bh_wq;
 EXPORT_SYMBOL_GPL(system_bh_wq);
 struct workqueue_struct *system_bh_highpri_wq;
 EXPORT_SYMBOL_GPL(system_bh_highpri_wq);
+struct workqueue_struct *system_dfl_long_wq __ro_after_init;
+EXPORT_SYMBOL_GPL(system_dfl_long_wq);
 
 static int worker_thread(void *__worker);
 static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
@@ -7954,11 +7956,12 @@ void __init workqueue_init_early(void)
 	system_bh_wq = alloc_workqueue("events_bh", WQ_BH | WQ_PERCPU, 0);
 	system_bh_highpri_wq = alloc_workqueue("events_bh_highpri",
 					       WQ_BH | WQ_HIGHPRI | WQ_PERCPU, 0);
+	system_dfl_long_wq = alloc_workqueue("events_dfl_long", WQ_UNBOUND, WQ_MAX_ACTIVE);
 	BUG_ON(!system_wq || !system_percpu_wq|| !system_highpri_wq || !system_long_wq ||
 	       !system_unbound_wq || !system_freezable_wq || !system_dfl_wq ||
 	       !system_power_efficient_wq ||
 	       !system_freezable_power_efficient_wq ||
-	       !system_bh_wq || !system_bh_highpri_wq);
+	       !system_bh_wq || !system_bh_highpri_wq || !system_dfl_long_wq);
 }
 
 static void __init wq_cpu_intensive_thresh_init(void)
-- 
cgit v1.2.3


From 878004e2852bc22ce0687c5597d6fe3909fb59f3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 6 Mar 2026 14:10:32 -0800
Subject: iopoll: fix function parameter names in read_poll_timeout_atomic()

Correct the function parameter names to avoid kernel-doc warnings
and to emphasize this function is atomic (non-sleeping).

Warning: include/linux/iopoll.h:169 function parameter 'sleep_us' not
 described in 'read_poll_timeout_atomic'
Warning: ../include/linux/iopoll.h:169 function parameter
 'sleep_before_read' not described in 'read_poll_timeout_atomic'

Fixes: 9df8043a546d ("iopoll: Generalize read_poll_timeout() into poll_timeout_us()")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patch.msgid.link/20260306221033.2357305-1-rdunlap@infradead.org
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/linux/iopoll.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
index bdd2e0652bc3..53edd69acb9b 100644
--- a/include/linux/iopoll.h
+++ b/include/linux/iopoll.h
@@ -159,7 +159,7 @@
  *
  * This macro does not rely on timekeeping.  Hence it is safe to call even when
  * timekeeping is suspended, at the expense of an underestimation of wall clock
- * time, which is rather minimal with a non-zero delay_us.
+ * time, which is rather minimal with a non-zero @delay_us.
  *
  * When available, you'll probably want to use one of the specialized
  * macros defined below rather than this macro directly.
@@ -167,9 +167,9 @@
  * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
  * case, the last read value at @args is stored in @val.
  */
-#define read_poll_timeout_atomic(op, val, cond, sleep_us, timeout_us, \
-				 sleep_before_read, args...) \
-	poll_timeout_us_atomic((val) = op(args), cond, sleep_us, timeout_us, sleep_before_read)
+#define read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, \
+				 delay_before_read, args...) \
+	poll_timeout_us_atomic((val) = op(args), cond, delay_us, timeout_us, delay_before_read)
 
 /**
  * readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs
-- 
cgit v1.2.3


From aca086ff27c3f67e81617e4b063d1126544a4f19 Mon Sep 17 00:00:00 2001
From: Ondrej Kozina <okozina@redhat.com>
Date: Fri, 6 Feb 2026 15:17:58 +0100
Subject: sed-opal: add IOC_OPAL_REACTIVATE_LSP.

This adds the 'Reactivate' method as described in the
"TCG Storage Opal SSC Feature Set: Single User Mode"
document (ch. 3.1.1.1).

The method enables switching an already active SED OPAL2 device,
with appropriate firmware support for Single User Mode (SUM),
to or from SUM.

Signed-off-by: Ondrej Kozina <okozina@redhat.com>
Reviewed-and-tested-by: Milan Broz <gmazyland@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/opal_proto.h            |  1 +
 block/sed-opal.c              | 99 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/sed-opal.h      |  1 +
 include/uapi/linux/sed-opal.h | 14 ++++++
 4 files changed, 115 insertions(+)

(limited to 'include/linux')

diff --git a/block/opal_proto.h b/block/opal_proto.h
index 3ccee5977c10..d138785b8198 100644
--- a/block/opal_proto.h
+++ b/block/opal_proto.h
@@ -155,6 +155,7 @@ enum opal_method {
 	OPAL_AUTHENTICATE,
 	OPAL_RANDOM,
 	OPAL_ERASE,
+	OPAL_REACTIVATE,
 };
 
 enum opal_token {
diff --git a/block/sed-opal.c b/block/sed-opal.c
index 83bee47aa29f..5d06f5f433bf 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -220,6 +220,8 @@ static const u8 opalmethod[][OPAL_METHOD_LENGTH] = {
 		{ 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x06, 0x01 },
 	[OPAL_ERASE] =
 		{ 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x08, 0x03 },
+	[OPAL_REACTIVATE] =
+		{ 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x08, 0x01 },
 };
 
 static int end_opal_session_error(struct opal_dev *dev);
@@ -2287,6 +2289,74 @@ static int activate_lsp(struct opal_dev *dev, void *data)
 	return finalize_and_send(dev, parse_and_check_status);
 }
 
+static int reactivate_lsp(struct opal_dev *dev, void *data)
+{
+	struct opal_lr_react *opal_react = data;
+	u8 user_lr[OPAL_UID_LENGTH];
+	int err, i;
+
+	err = cmd_start(dev, opaluid[OPAL_THISSP_UID],
+			opalmethod[OPAL_REACTIVATE]);
+
+	if (err) {
+		pr_debug("Error building Reactivate LockingSP command.\n");
+		return err;
+	}
+
+	/*
+	 * If neither 'entire_table' nor 'num_lrs' is set, the device
+	 * gets reactivated with SUM disabled. Only Admin1PIN will change
+	 * if set.
+	 */
+	if (opal_react->entire_table) {
+		/* Entire Locking table (all locking ranges) will be put in SUM. */
+		add_token_u8(&err, dev, OPAL_STARTNAME);
+		add_token_u64(&err, dev, OPAL_SUM_SET_LIST);
+		add_token_bytestring(&err, dev, opaluid[OPAL_LOCKING_TABLE], OPAL_UID_LENGTH);
+		add_token_u8(&err, dev, OPAL_ENDNAME);
+	} else if (opal_react->num_lrs) {
+		/* Subset of Locking table (selected locking range(s)) to be put in SUM */
+		err = build_locking_range(user_lr, sizeof(user_lr),
+					  opal_react->lr[0]);
+		if (err)
+			return err;
+
+		add_token_u8(&err, dev, OPAL_STARTNAME);
+		add_token_u64(&err, dev, OPAL_SUM_SET_LIST);
+
+		add_token_u8(&err, dev, OPAL_STARTLIST);
+		add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH);
+		for (i = 1; i < opal_react->num_lrs; i++) {
+			user_lr[7] = opal_react->lr[i];
+			add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH);
+		}
+		add_token_u8(&err, dev, OPAL_ENDLIST);
+		add_token_u8(&err, dev, OPAL_ENDNAME);
+	}
+
+	/* Skipping the rangle policy parameter is same as setting its value to zero */
+	if (opal_react->range_policy && (opal_react->num_lrs || opal_react->entire_table)) {
+		add_token_u8(&err, dev, OPAL_STARTNAME);
+		add_token_u64(&err, dev, OPAL_SUM_RANGE_POLICY);
+		add_token_u8(&err, dev, 1);
+		add_token_u8(&err, dev, OPAL_ENDNAME);
+	}
+
+	/*
+	 * Optional parameter. If set, it changes the Admin1 PIN even when SUM
+	 * is being disabled.
+	 */
+	if (opal_react->new_admin_key.key_len) {
+		add_token_u8(&err, dev, OPAL_STARTNAME);
+		add_token_u64(&err, dev, OPAL_SUM_ADMIN1_PIN);
+		add_token_bytestring(&err, dev, opal_react->new_admin_key.key,
+				     opal_react->new_admin_key.key_len);
+		add_token_u8(&err, dev, OPAL_ENDNAME);
+	}
+
+	return finalize_and_send(dev, parse_and_check_status);
+}
+
 /* Determine if we're in the Manufactured Inactive or Active state */
 static int get_lsp_lifecycle(struct opal_dev *dev, void *data)
 {
@@ -2957,6 +3027,32 @@ static int opal_activate_lsp(struct opal_dev *dev,
 	return ret;
 }
 
+static int opal_reactivate_lsp(struct opal_dev *dev,
+			       struct opal_lr_react *opal_lr_react)
+{
+	const struct opal_step active_steps[] = {
+		{ start_admin1LSP_opal_session, &opal_lr_react->key },
+		{ reactivate_lsp, opal_lr_react },
+		/* No end_opal_session. The controller terminates the session */
+	};
+	int ret;
+
+	/* use either 'entire_table' parameter or set of locking ranges */
+	if (opal_lr_react->num_lrs > OPAL_MAX_LRS ||
+	    (opal_lr_react->num_lrs && opal_lr_react->entire_table))
+		return -EINVAL;
+
+	ret = opal_get_key(dev, &opal_lr_react->key);
+	if (ret)
+		return ret;
+	mutex_lock(&dev->dev_lock);
+	setup_opal_dev(dev);
+	ret = execute_steps(dev, active_steps, ARRAY_SIZE(active_steps));
+	mutex_unlock(&dev->dev_lock);
+
+	return ret;
+}
+
 static int opal_setup_locking_range(struct opal_dev *dev,
 				    struct opal_user_lr_setup *opal_lrs)
 {
@@ -3315,6 +3411,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
 	case IOC_OPAL_SET_SID_PW:
 		ret = opal_set_new_sid_pw(dev, p);
 		break;
+	case IOC_OPAL_REACTIVATE_LSP:
+		ret = opal_reactivate_lsp(dev, p);
+		break;
 
 	default:
 		break;
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index 80f33a93f944..2ae5e6b0ac21 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -53,6 +53,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
 	case IOC_OPAL_DISCOVERY:
 	case IOC_OPAL_REVERT_LSP:
 	case IOC_OPAL_SET_SID_PW:
+	case IOC_OPAL_REACTIVATE_LSP:
 		return true;
 	}
 	return false;
diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h
index 9025dd5a4f0f..d03e590b6501 100644
--- a/include/uapi/linux/sed-opal.h
+++ b/include/uapi/linux/sed-opal.h
@@ -74,6 +74,19 @@ struct opal_lr_act {
 	__u8 align[2]; /* Align to 8 byte boundary */
 };
 
+struct opal_lr_react {
+	struct opal_key key;
+	struct opal_key new_admin_key; /* Set new Admin1 PIN if key_len is > 0 */
+	__u8 num_lrs; /*
+		       * Configure selected ranges (from lr[]) in SUM.
+		       * If num_lrs > 0 the 'entire_table' must be 0
+		       */
+	__u8 lr[OPAL_MAX_LRS];
+	__u8 range_policy; /* Set RangeStartRangeLengthPolicy parameter */
+	__u8 entire_table; /* Set all locking objects in SUM */
+	__u8 align[4]; /* Align to 8 byte boundary */
+};
+
 struct opal_session_info {
 	__u32 sum;
 	__u32 who;
@@ -216,5 +229,6 @@ struct opal_revert_lsp {
 #define IOC_OPAL_DISCOVERY          _IOW('p', 239, struct opal_discovery)
 #define IOC_OPAL_REVERT_LSP         _IOW('p', 240, struct opal_revert_lsp)
 #define IOC_OPAL_SET_SID_PW         _IOW('p', 241, struct opal_new_pw)
+#define IOC_OPAL_REACTIVATE_LSP     _IOW('p', 242, struct opal_lr_react)
 
 #endif /* _UAPI_SED_OPAL_H */
-- 
cgit v1.2.3


From 8e3d34a7ce7386b01947dd649bd24775544e4d3e Mon Sep 17 00:00:00 2001
From: Ondrej Kozina <okozina@redhat.com>
Date: Fri, 6 Feb 2026 15:18:00 +0100
Subject: sed-opal: add IOC_OPAL_LR_SET_START_LEN ioctl.

This ioctl is used to set up locking range start (offset)
and locking range length attributes only.

In Single User Mode (SUM), if the RangeStartRangeLengthPolicy parameter
is set in the 'Reactivate' method, only Admin authority maintains the
locking range length and start (offset) attributes of Locking objects
set up for SUM. All other attributes from struct opal_user_lr_setup
(RLE - read locking enabled, WLE - write locking enabled) shall
remain in possession of the User authority associated with the Locking
object set for SUM.

Therefore, we need a separate function for setting up locking range
start and locking range length because it may require two different
authorities (and sessions) if the RangeStartRangeLengthPolicy attribute
is set.

With the IOC_OPAL_LR_SET_START_LEN ioctl, the opal_user_lr_setup
members 'RLE' and 'WLE' of the ioctl argument are ignored.

Signed-off-by: Ondrej Kozina <okozina@redhat.com>
Reviewed-and-tested-by: Milan Broz <gmazyland@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/sed-opal.c              | 28 ++++++++++++++++++++++++++++
 include/linux/sed-opal.h      |  1 +
 include/uapi/linux/sed-opal.h |  1 +
 3 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/block/sed-opal.c b/block/sed-opal.c
index 7be72f621952..55c8a0953d78 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -3091,6 +3091,31 @@ static int opal_setup_locking_range(struct opal_dev *dev,
 	return ret;
 }
 
+static int opal_setup_locking_range_start_length(struct opal_dev *dev,
+				    struct opal_user_lr_setup *opal_lrs)
+{
+	const struct opal_step lr_steps[] = {
+		{ start_auth_opal_session, &opal_lrs->session },
+		{ setup_locking_range_start_length, opal_lrs },
+		{ end_opal_session, }
+	};
+	int ret;
+
+	/* we can not set global locking range offset or length */
+	if (opal_lrs->session.opal_key.lr == 0)
+		return -EINVAL;
+
+	ret = opal_get_key(dev, &opal_lrs->session.opal_key);
+	if (ret)
+		return ret;
+	mutex_lock(&dev->dev_lock);
+	setup_opal_dev(dev);
+	ret = execute_steps(dev, lr_steps, ARRAY_SIZE(lr_steps));
+	mutex_unlock(&dev->dev_lock);
+
+	return ret;
+}
+
 static int opal_locking_range_status(struct opal_dev *dev,
 			  struct opal_lr_status *opal_lrst,
 			  void __user *data)
@@ -3431,6 +3456,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
 	case IOC_OPAL_REACTIVATE_LSP:
 		ret = opal_reactivate_lsp(dev, p);
 		break;
+	case IOC_OPAL_LR_SET_START_LEN:
+		ret = opal_setup_locking_range_start_length(dev, p);
+		break;
 
 	default:
 		break;
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index 2ae5e6b0ac21..a0df6819b0a9 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -54,6 +54,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
 	case IOC_OPAL_REVERT_LSP:
 	case IOC_OPAL_SET_SID_PW:
 	case IOC_OPAL_REACTIVATE_LSP:
+	case IOC_OPAL_LR_SET_START_LEN:
 		return true;
 	}
 	return false;
diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h
index d03e590b6501..82de38f3fbeb 100644
--- a/include/uapi/linux/sed-opal.h
+++ b/include/uapi/linux/sed-opal.h
@@ -230,5 +230,6 @@ struct opal_revert_lsp {
 #define IOC_OPAL_REVERT_LSP         _IOW('p', 240, struct opal_revert_lsp)
 #define IOC_OPAL_SET_SID_PW         _IOW('p', 241, struct opal_new_pw)
 #define IOC_OPAL_REACTIVATE_LSP     _IOW('p', 242, struct opal_lr_react)
+#define IOC_OPAL_LR_SET_START_LEN   _IOW('p', 243, struct opal_user_lr_setup)
 
 #endif /* _UAPI_SED_OPAL_H */
-- 
cgit v1.2.3


From a441a9d22433fea561de131e27fff41715c2d186 Mon Sep 17 00:00:00 2001
From: Ondrej Kozina <okozina@redhat.com>
Date: Fri, 6 Feb 2026 15:18:01 +0100
Subject: sed-opal: add IOC_OPAL_ENABLE_DISABLE_LR.

This ioctl is used to set up RLE (read lock enabled) and WLE (write
lock enabled) parameters of the Locking object.

In Single User Mode (SUM), if the RangeStartRangeLengthPolicy parameter
is set in the 'Reactivate' method, only Admin authority maintains the
locking range length and start (offset) attributes of Locking objects
set up for SUM. All other attributes from struct opal_user_lr_setup
(RLE - read locking enabled, WLE - write locking enabled) shall
remain in possession of the User authority associated with the Locking
object set for SUM.

With the IOC_OPAL_ENABLE_DISABLE_LR ioctl, the opal_user_lr_setup
members 'range_start' and 'range_length' of the ioctl argument are
ignored.

Signed-off-by: Ondrej Kozina <okozina@redhat.com>
Reviewed-and-tested-by: Milan Broz <gmazyland@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/sed-opal.c              | 24 ++++++++++++++++++++++++
 include/linux/sed-opal.h      |  1 +
 include/uapi/linux/sed-opal.h |  1 +
 3 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/block/sed-opal.c b/block/sed-opal.c
index 55c8a0953d78..53a73422911e 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -3116,6 +3116,27 @@ static int opal_setup_locking_range_start_length(struct opal_dev *dev,
 	return ret;
 }
 
+static int opal_enable_disable_range(struct opal_dev *dev,
+			     struct opal_user_lr_setup *opal_lrs)
+{
+	const struct opal_step lr_steps[] = {
+		{ start_auth_opal_session, &opal_lrs->session },
+		{ setup_enable_range, opal_lrs },
+		{ end_opal_session, }
+	};
+	int ret;
+
+	ret = opal_get_key(dev, &opal_lrs->session.opal_key);
+	if (ret)
+		return ret;
+	mutex_lock(&dev->dev_lock);
+	setup_opal_dev(dev);
+	ret = execute_steps(dev, lr_steps, ARRAY_SIZE(lr_steps));
+	mutex_unlock(&dev->dev_lock);
+
+	return ret;
+}
+
 static int opal_locking_range_status(struct opal_dev *dev,
 			  struct opal_lr_status *opal_lrst,
 			  void __user *data)
@@ -3459,6 +3480,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
 	case IOC_OPAL_LR_SET_START_LEN:
 		ret = opal_setup_locking_range_start_length(dev, p);
 		break;
+	case IOC_OPAL_ENABLE_DISABLE_LR:
+		ret = opal_enable_disable_range(dev, p);
+		break;
 
 	default:
 		break;
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index a0df6819b0a9..1d63479838cf 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -55,6 +55,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
 	case IOC_OPAL_SET_SID_PW:
 	case IOC_OPAL_REACTIVATE_LSP:
 	case IOC_OPAL_LR_SET_START_LEN:
+	case IOC_OPAL_ENABLE_DISABLE_LR:
 		return true;
 	}
 	return false;
diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h
index 82de38f3fbeb..bde023ae2295 100644
--- a/include/uapi/linux/sed-opal.h
+++ b/include/uapi/linux/sed-opal.h
@@ -231,5 +231,6 @@ struct opal_revert_lsp {
 #define IOC_OPAL_SET_SID_PW         _IOW('p', 241, struct opal_new_pw)
 #define IOC_OPAL_REACTIVATE_LSP     _IOW('p', 242, struct opal_lr_react)
 #define IOC_OPAL_LR_SET_START_LEN   _IOW('p', 243, struct opal_user_lr_setup)
+#define IOC_OPAL_ENABLE_DISABLE_LR  _IOW('p', 244, struct opal_user_lr_setup)
 
 #endif /* _UAPI_SED_OPAL_H */
-- 
cgit v1.2.3


From 0cc9293bccb234552b81c3ebc074f5839f019e01 Mon Sep 17 00:00:00 2001
From: Ondrej Kozina <okozina@redhat.com>
Date: Fri, 6 Feb 2026 15:18:03 +0100
Subject: sed-opal: add IOC_OPAL_GET_SUM_STATUS ioctl.

This adds a function for retrieving the set of Locking objects enabled
for Single User Mode (SUM) and the value of the
RangeStartRangeLengthPolicy parameter.

It retrieves data from the LockingInfo table, specifically the
columns SingleUserModeRanges and RangeStartLengthPolicy, which
were added according to the TCG Opal Feature Set: Single User Mode,
as described in chapters 4.4.3.1 and 4.4.3.2.

Signed-off-by: Ondrej Kozina <okozina@redhat.com>
Reviewed-and-tested-by: Milan Broz <gmazyland@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/sed-opal.c              | 159 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/sed-opal.h      |   1 +
 include/uapi/linux/sed-opal.h |  13 ++++
 3 files changed, 173 insertions(+)

(limited to 'include/linux')

diff --git a/block/sed-opal.c b/block/sed-opal.c
index 6146a1b30421..c34d19e91201 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -1757,6 +1757,12 @@ static int start_anybodyASP_opal_session(struct opal_dev *dev, void *data)
 					  OPAL_ADMINSP_UID, NULL, 0);
 }
 
+static int start_anybodyLSP_opal_session(struct opal_dev *dev, void *data)
+{
+	return start_generic_opal_session(dev, OPAL_ANYBODY_UID,
+					  OPAL_LOCKINGSP_UID, NULL, 0);
+}
+
 static int start_SIDASP_opal_session(struct opal_dev *dev, void *data)
 {
 	int ret;
@@ -3389,6 +3395,156 @@ static int opal_get_geometry(struct opal_dev *dev, void __user *data)
 	return 0;
 }
 
+static int get_sum_ranges(struct opal_dev *dev, void *data)
+{
+	const char *lr_uid;
+	size_t lr_uid_len;
+	u64 val;
+	const struct opal_resp_tok *tok;
+	int err, tok_n = 2;
+	struct opal_sum_ranges *sranges = data;
+	const __u8 lr_all[OPAL_MAX_LRS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
+
+	err = generic_get_columns(dev, opaluid[OPAL_LOCKING_INFO_TABLE], OPAL_SUM_SET_LIST,
+				  OPAL_SUM_RANGE_POLICY);
+	if (err) {
+		pr_debug("Couldn't get locking info table columns %d to %d.\n",
+			 OPAL_SUM_SET_LIST, OPAL_SUM_RANGE_POLICY);
+		return err;
+	}
+
+	tok = response_get_token(&dev->parsed, tok_n);
+	if (IS_ERR(tok))
+		return PTR_ERR(tok);
+
+	if (!response_token_matches(tok, OPAL_STARTNAME)) {
+		pr_debug("Unexpected response token type %d.\n", tok_n);
+		return OPAL_INVAL_PARAM;
+	}
+	tok_n++;
+
+	if (response_get_u64(&dev->parsed, tok_n) != OPAL_SUM_SET_LIST) {
+		pr_debug("Token %d does not match expected column %u.\n",
+			 tok_n, OPAL_SUM_SET_LIST);
+		return OPAL_INVAL_PARAM;
+	}
+	tok_n++;
+
+	tok = response_get_token(&dev->parsed, tok_n);
+	if (IS_ERR(tok))
+		return PTR_ERR(tok);
+
+	/*
+	 * The OPAL_SUM_SET_LIST response contains two distinct values:
+	 *
+	 *  - the list of individual locking ranges (UIDs) put in SUM. The list
+	 *    may also be empty signaling the SUM is disabled.
+	 *
+	 *  - the Locking table UID if the entire Locking table is put in SUM.
+	 */
+	if (response_token_matches(tok, OPAL_STARTLIST)) {
+		sranges->num_lrs = 0;
+
+		tok_n++;
+		tok = response_get_token(&dev->parsed, tok_n);
+		if (IS_ERR(tok))
+			return PTR_ERR(tok);
+
+		while (!response_token_matches(tok, OPAL_ENDLIST)) {
+			lr_uid_len = response_get_string(&dev->parsed, tok_n, &lr_uid);
+			if (lr_uid_len != OPAL_UID_LENGTH) {
+				pr_debug("Unexpected response token type %d.\n", tok_n);
+				return OPAL_INVAL_PARAM;
+			}
+
+			if (memcmp(lr_uid, opaluid[OPAL_LOCKINGRANGE_GLOBAL], OPAL_UID_LENGTH)) {
+				if (lr_uid[5] != LOCKING_RANGE_NON_GLOBAL) {
+					pr_debug("Unexpected byte %d at LR UUID position 5.\n",
+						 lr_uid[5]);
+					return OPAL_INVAL_PARAM;
+				}
+				sranges->lr[sranges->num_lrs++] = lr_uid[7];
+			} else
+				sranges->lr[sranges->num_lrs++] = 0;
+
+			tok_n++;
+			tok = response_get_token(&dev->parsed, tok_n);
+			if (IS_ERR(tok))
+				return PTR_ERR(tok);
+		}
+	} else {
+		/* Only OPAL_LOCKING_TABLE UID is an alternative to OPAL_STARTLIST here. */
+		lr_uid_len = response_get_string(&dev->parsed, tok_n, &lr_uid);
+		if (lr_uid_len != OPAL_UID_LENGTH) {
+			pr_debug("Unexpected response token type %d.\n", tok_n);
+			return OPAL_INVAL_PARAM;
+		}
+
+		if (memcmp(lr_uid, opaluid[OPAL_LOCKING_TABLE], OPAL_UID_LENGTH)) {
+			pr_debug("Unexpected response UID.\n");
+			return OPAL_INVAL_PARAM;
+		}
+
+		/* sed-opal kernel API already provides following limit in Activate command */
+		sranges->num_lrs = OPAL_MAX_LRS;
+		memcpy(sranges->lr, lr_all, OPAL_MAX_LRS);
+	}
+	tok_n++;
+
+	tok = response_get_token(&dev->parsed, tok_n);
+	if (IS_ERR(tok))
+		return PTR_ERR(tok);
+
+	if (!response_token_matches(tok, OPAL_ENDNAME)) {
+		pr_debug("Unexpected response token type %d.\n", tok_n);
+		return OPAL_INVAL_PARAM;
+	}
+	tok_n++;
+
+	err = response_get_column(&dev->parsed, &tok_n, OPAL_SUM_RANGE_POLICY, &val);
+	if (err)
+		return err;
+
+	sranges->range_policy = val ? 1 : 0;
+
+	return 0;
+}
+
+static int opal_get_sum_ranges(struct opal_dev *dev, struct opal_sum_ranges *opal_sum_rngs,
+			       void __user *data)
+{
+	const struct opal_step admin_steps[] = {
+		{ start_admin1LSP_opal_session, &opal_sum_rngs->key },
+		{ get_sum_ranges, opal_sum_rngs },
+		{ end_opal_session, }
+	}, anybody_steps[] = {
+		{ start_anybodyLSP_opal_session, NULL },
+		{ get_sum_ranges, opal_sum_rngs },
+		{ end_opal_session, }
+	};
+	int ret;
+
+	mutex_lock(&dev->dev_lock);
+	setup_opal_dev(dev);
+	if (opal_sum_rngs->key.key_len)
+		/* Use Admin1 session (authenticated by PIN) to retrieve LockingInfo columns */
+		ret = execute_steps(dev, admin_steps, ARRAY_SIZE(admin_steps));
+	else
+		/* Use Anybody session (no key) to retrieve LockingInfo columns */
+		ret = execute_steps(dev, anybody_steps, ARRAY_SIZE(anybody_steps));
+	mutex_unlock(&dev->dev_lock);
+
+	/* skip session info when copying back to uspace */
+	if (!ret && copy_to_user(data + offsetof(struct opal_sum_ranges, num_lrs),
+				(void *)opal_sum_rngs + offsetof(struct opal_sum_ranges, num_lrs),
+				sizeof(*opal_sum_rngs) - offsetof(struct opal_sum_ranges, num_lrs))) {
+		pr_debug("Error copying SUM ranges info to userspace\n");
+		return -EFAULT;
+	}
+
+	return ret;
+}
+
 int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
 {
 	void *p;
@@ -3483,6 +3639,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
 	case IOC_OPAL_ENABLE_DISABLE_LR:
 		ret = opal_enable_disable_range(dev, p);
 		break;
+	case IOC_OPAL_GET_SUM_STATUS:
+		ret = opal_get_sum_ranges(dev, p, arg);
+		break;
 
 	default:
 		break;
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index 1d63479838cf..aa006edb612b 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -56,6 +56,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
 	case IOC_OPAL_REACTIVATE_LSP:
 	case IOC_OPAL_LR_SET_START_LEN:
 	case IOC_OPAL_ENABLE_DISABLE_LR:
+	case IOC_OPAL_GET_SUM_STATUS:
 		return true;
 	}
 	return false;
diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h
index bde023ae2295..9830298ec51c 100644
--- a/include/uapi/linux/sed-opal.h
+++ b/include/uapi/linux/sed-opal.h
@@ -111,6 +111,18 @@ struct opal_lr_status {
 	__u8  align[4];
 };
 
+struct opal_sum_ranges {
+	/*
+	 * Initiate Admin1 session if key_len > 0,
+	 * use Anybody session otherwise.
+	 */
+	struct opal_key key;
+	__u8 num_lrs;
+	__u8 lr[OPAL_MAX_LRS];
+	__u8 range_policy;
+	__u8 align[5]; /* Align to 8 byte boundary */
+};
+
 struct opal_lock_unlock {
 	struct opal_session_info session;
 	__u32 l_state;
@@ -232,5 +244,6 @@ struct opal_revert_lsp {
 #define IOC_OPAL_REACTIVATE_LSP     _IOW('p', 242, struct opal_lr_react)
 #define IOC_OPAL_LR_SET_START_LEN   _IOW('p', 243, struct opal_user_lr_setup)
 #define IOC_OPAL_ENABLE_DISABLE_LR  _IOW('p', 244, struct opal_user_lr_setup)
+#define IOC_OPAL_GET_SUM_STATUS     _IOW('p', 245, struct opal_sum_ranges)
 
 #endif /* _UAPI_SED_OPAL_H */
-- 
cgit v1.2.3


From 0ee8ab5d4dc51704be1157470f3df8090629f9fc Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Wed, 25 Feb 2026 20:51:05 +0000
Subject: block: annotate struct request_queue with __counted_by_ptr

The queue_hw_ctx field in struct request_queue is an array of pointers
to struct blk_mq_hw_ctx. The number of elements in this array is tracked
by the nr_hw_queues field.

The array is allocated in __blk_mq_realloc_hw_ctxs() using
kcalloc_node() with set->nr_hw_queues elements. q->nr_hw_queues is
subsequently updated to set->nr_hw_queues.

When growing the array, the new array is assigned to queue_hw_ctx before
nr_hw_queues is updated. This is safe because nr_hw_queues (the old
smaller count) is used for bounds checking, which is within the new
larger allocation.

When shrinking the array, nr_hw_queues is updated to the smaller value,
while queue_hw_ctx retains the larger allocation. This is also safe as
the count is within the allocation bounds.

Annotating queue_hw_ctx with __counted_by_ptr(nr_hw_queues) allows the
compiler (with kSAN) to verify that accesses to queue_hw_ctx are within
the valid range defined by nr_hw_queues.

This patch was generated by CodeMender and reviewed by Bill Wendling.
Tested by running blktests.

Reviewed-by: Daniel Wagner <dwagner@suse.de>
Signed-off-by: Bill Wendling <morbo@google.com>
[axboe: massage commit message]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d463b9b5a0a5..540c2c6c9afd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -502,7 +502,7 @@ struct request_queue {
 
 	/* hw dispatch queues */
 	unsigned int		nr_hw_queues;
-	struct blk_mq_hw_ctx * __rcu *queue_hw_ctx;
+	struct blk_mq_hw_ctx * __rcu *queue_hw_ctx __counted_by_ptr(nr_hw_queues);
 
 	struct percpu_ref	q_usage_counter;
 	struct lock_class_key	io_lock_cls_key;
-- 
cgit v1.2.3


From b7cbc30e93e3a64ea058230f6d0c764d6d80276f Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Fri, 27 Feb 2026 22:19:48 +0900
Subject: block: rename struct gendisk zone_wplugs_lock field

Rename struct gendisk zone_wplugs_lock field to zone_wplugs_hash_lock to
clearly indicates that this is the spinlock used for manipulating the
hash table of zone write plugs.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-zoned.c      | 23 ++++++++++++-----------
 include/linux/blkdev.h |  2 +-
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 26c2aa79faf6..78810e726222 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -514,10 +514,11 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
 	 * are racing with other submission context, so we may already have a
 	 * zone write plug for the same zone.
 	 */
-	spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
+	spin_lock_irqsave(&disk->zone_wplugs_hash_lock, flags);
 	hlist_for_each_entry_rcu(zwplg, &disk->zone_wplugs_hash[idx], node) {
 		if (zwplg->zone_no == zwplug->zone_no) {
-			spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
+			spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock,
+					       flags);
 			return false;
 		}
 	}
@@ -529,7 +530,7 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
 	 * necessarilly in the active condition.
 	 */
 	zones_cond = rcu_dereference_check(disk->zones_cond,
-				lockdep_is_held(&disk->zone_wplugs_lock));
+				lockdep_is_held(&disk->zone_wplugs_hash_lock));
 	if (zones_cond)
 		zwplug->cond = zones_cond[zwplug->zone_no];
 	else
@@ -537,7 +538,7 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
 
 	hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]);
 	atomic_inc(&disk->nr_zone_wplugs);
-	spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
+	spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock, flags);
 
 	return true;
 }
@@ -590,13 +591,13 @@ static void disk_free_zone_wplug(struct blk_zone_wplug *zwplug)
 	WARN_ON_ONCE(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED);
 	WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list));
 
-	spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
+	spin_lock_irqsave(&disk->zone_wplugs_hash_lock, flags);
 	blk_zone_set_cond(rcu_dereference_check(disk->zones_cond,
-				lockdep_is_held(&disk->zone_wplugs_lock)),
+				lockdep_is_held(&disk->zone_wplugs_hash_lock)),
 			  zwplug->zone_no, zwplug->cond);
 	hlist_del_init_rcu(&zwplug->node);
 	atomic_dec(&disk->nr_zone_wplugs);
-	spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
+	spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock, flags);
 
 	call_rcu(&zwplug->rcu_head, disk_free_zone_wplug_rcu);
 }
@@ -1739,7 +1740,7 @@ put_zwplug:
 
 void disk_init_zone_resources(struct gendisk *disk)
 {
-	spin_lock_init(&disk->zone_wplugs_lock);
+	spin_lock_init(&disk->zone_wplugs_hash_lock);
 }
 
 /*
@@ -1829,10 +1830,10 @@ static void disk_set_zones_cond_array(struct gendisk *disk, u8 *zones_cond)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
+	spin_lock_irqsave(&disk->zone_wplugs_hash_lock, flags);
 	zones_cond = rcu_replace_pointer(disk->zones_cond, zones_cond,
-				lockdep_is_held(&disk->zone_wplugs_lock));
-	spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
+				lockdep_is_held(&disk->zone_wplugs_hash_lock));
+	spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock, flags);
 
 	kfree_rcu_mightsleep(zones_cond);
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 540c2c6c9afd..a49a1e38c6e7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -200,7 +200,7 @@ struct gendisk {
 	u8 __rcu		*zones_cond;
 	unsigned int		zone_wplugs_hash_bits;
 	atomic_t		nr_zone_wplugs;
-	spinlock_t		zone_wplugs_lock;
+	spinlock_t		zone_wplugs_hash_lock;
 	struct mempool		*zone_wplugs_pool;
 	struct hlist_head	*zone_wplugs_hash;
 	struct workqueue_struct *zone_wplugs_wq;
-- 
cgit v1.2.3


From 1365b6904fd050bf22ab9f3df375a396de5837a1 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Fri, 27 Feb 2026 22:19:49 +0900
Subject: block: allow submitting all zone writes from a single context

In order to maintain sequential write patterns per zone with zoned block
devices, zone write plugging issues only a single write BIO per zone at
any time. This works well but has the side effect that when large
sequential write streams are issued by the user and these streams cross
zone boundaries, the device ends up receiving a discontiguous set of
write commands for different zones. The same also happens when a user
writes simultaneously at high queue depth multiple zones: the device
does not see all sequential writes per zone and receives discontiguous
writes to different zones. While this does not affect the performance of
solid state zoned block devices, when using an SMR HDD, this pattern
change from sequential writes to discontiguous writes to different zones
significantly increases head seek which results in degraded write
throughput.

In order to reduce this seek overhead for rotational media devices,
introduce a per disk zone write plugs kernel thread to issue all write
BIOs to zones. This single zone write issuing context is enabled for
any zoned block device that has a request queue flagged with the new
QUEUE_ZONED_QD1_WRITES flag.

The flag QUEUE_ZONED_QD1_WRITES is visible as the sysfs queue attribute
zoned_qd1_writes for zoned devices. For regular block devices, this
attribute is not visible. For zoned block devices, a user can override
the default value set to force the global write maximum queue depth of
1 for a zoned block device, or clear this attribute to fallback to the
default behavior of zone write plugging which limits writes to QD=1 per
sequential zone.

Writing to a zoned block device flagged with QUEUE_ZONED_QD1_WRITES is
implemented using a list of zone write plugs that have a non-empty BIO
list. Listed zone write plugs are processed by the disk zone write plugs
worker kthread in FIFO order, and all BIOs of a zone write plug are all
processed before switching to the next listed zone write plug. A newly
submitted BIO for a non-FULL zone write plug that is not yet listed
causes the addition of the zone write plug at the end of the disk list
of zone write plugs.

Since the write BIOs queued in a zone write plug BIO list are
necessarilly sequential, for rotational media, using the single zone
write plugs kthread to issue all BIOs maintains a sequential write
pattern and thus reduces seek overhead and improves write throughput.
This processing essentially result in always writing to HDDs at QD=1,
which is not an issue for HDDs operating with write caching enabled.
Performance with write cache disabled is also not degraded thanks to
the efficient write handling of modern SMR HDDs.

A disk list of zone write plugs is defined using the new struct gendisk
zone_wplugs_list, and accesses to this list is protected using the
zone_wplugs_list_lock spinlock.  The per disk kthread
(zone_wplugs_worker) code is implemented by the function
disk_zone_wplugs_worker(). A reference on listed zone write plugs is
always held until all BIOs of the zone write plug are processed by the
worker kthread. BIO issuing at QD=1 is driven using a completion
structure (zone_wplugs_worker_bio_done) and calls to blk_io_wait().

With this change, performance when sequentially writing the zones of a
30 TB SMR SATA HDD connected to an AHCI adapter changes as follows
(1MiB direct I/Os, results in MB/s unit):

                    +--------------------+
		    |   Write BW (MB/s)  |
 +------------------+----------+---------+
 | Sequential write | Baseline | Patched |
 |  Queue Depth     | 6.19-rc8 |         |
 +------------------+----------+---------+
 | 1                | 244      | 245     |
 | 2                | 244      | 245     |
 | 4                | 245      | 245     |
 | 8                | 242      | 245     |
 | 16               | 222      | 246     |
 | 32               | 211      | 245     |
 | 64               | 193      | 244     |
 | 128              | 112      | 246     |
 +------------------+----------+---------+

With the current code (baseline), as the sequential write stream crosses
a zone boundary, higher queue depth creates a gap between the
last IO to the previous zone and the first IOs to the following zones,
causing head seeks and degrading performance. Using the disk zone
write plugs worker thread, this pattern disappears and the maximum
throughput of the drive is maintained, leading to over 100%
improvements in throughput for high queue depth write.

Using 16 fio jobs all writing to randomly chosen zones at QD=32 with 1
MiB direct IOs, write throughput also increases significantly.

                    +--------------------+
		    |   Write BW (MB/s)  |
 +------------------+----------+---------+
 |   Random write   | Baseline | Patched |
 |  Number of zones | 6.19-rc7 |         |
 +------------------+----------+---------+
 | 1                | 191      | 192     |
 | 2                | 101      | 128     |
 | 4                | 115      | 123     |
 | 8                | 90       | 120     |
 | 16               | 64       | 115     |
 | 32               | 58       | 105     |
 | 64               | 56       | 101     |
 | 128              | 55       | 99      |
 +------------------+----------+---------+

Tests using XFS shows that buffered write speed with 8 jobs writing
files increases by 12% to 35% depending on the workload.

                    +--------------------+
		    |   Write BW (MB/s)  |
 +------------------+----------+---------+
 |     Workload     | Baseline | Patched |
 |                  | 6.19-rc7 |         |
 +------------------+----------+---------+
 | 256MiB file size | 212      | 238     |
 +------------------+----------+---------+
 | 4MiB .. 128 MiB  | 213      | 243     |
 | random file size |          |         |
 +------------------+----------+---------+
 | 2MiB .. 8 MiB    | 179      | 242     |
 | random file size |          |         |
 +------------------+----------+---------+

Performance gains are even more significant when using an HBA that
limits the maximum size of commands to a small value, e.g. HBAs
controlled with the mpi3mr driver limit commands to a maximum of 1 MiB.
In such case, the write throughput gains are over 40%.

                    +--------------------+
		    |   Write BW (MB/s)  |
 +------------------+----------+---------+
 |     Workload     | Baseline | Patched |
 |                  | 6.19-rc7 |         |
 +------------------+----------+---------+
 | 256MiB file size | 175      | 245     |
 +------------------+----------+---------+
 | 4MiB .. 128 MiB  | 174      | 244     |
 | random file size |          |         |
 +------------------+----------+---------+
 | 2MiB .. 8 MiB    | 171      | 243     |
 | random file size |          |         |
 +------------------+----------+---------+

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c |   1 +
 block/blk-sysfs.c      |  35 ++++++++-
 block/blk-zoned.c      | 190 +++++++++++++++++++++++++++++++++++++++++++------
 include/linux/blkdev.h |   8 +++
 4 files changed, 212 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 28167c9baa55..047ec887456b 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -97,6 +97,7 @@ static const char *const blk_queue_flag_name[] = {
 	QUEUE_FLAG_NAME(NO_ELV_SWITCH),
 	QUEUE_FLAG_NAME(QOS_ENABLED),
 	QUEUE_FLAG_NAME(BIO_ISSUE_TIME),
+	QUEUE_FLAG_NAME(ZONED_QD1_WRITES),
 };
 #undef QUEUE_FLAG_NAME
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 55a1bbfef7d4..ca8033e6d699 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -390,6 +390,36 @@ static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
 	return queue_var_show(disk_nr_zones(disk), page);
 }
 
+static ssize_t queue_zoned_qd1_writes_show(struct gendisk *disk, char *page)
+{
+	return queue_var_show(!!blk_queue_zoned_qd1_writes(disk->queue),
+			      page);
+}
+
+static ssize_t queue_zoned_qd1_writes_store(struct gendisk *disk,
+					    const char *page, size_t count)
+{
+	struct request_queue *q = disk->queue;
+	unsigned long qd1_writes;
+	unsigned int memflags;
+	ssize_t ret;
+
+	ret = queue_var_store(&qd1_writes, page, count);
+	if (ret < 0)
+		return ret;
+
+	memflags = blk_mq_freeze_queue(q);
+	blk_mq_quiesce_queue(q);
+	if (qd1_writes)
+		blk_queue_flag_set(QUEUE_FLAG_ZONED_QD1_WRITES, q);
+	else
+		blk_queue_flag_clear(QUEUE_FLAG_ZONED_QD1_WRITES, q);
+	blk_mq_unquiesce_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
+
+	return count;
+}
+
 static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page)
 {
 	return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page);
@@ -617,6 +647,7 @@ QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
 QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
 
 QUEUE_LIM_RO_ENTRY(queue_zoned, "zoned");
+QUEUE_RW_ENTRY(queue_zoned_qd1_writes, "zoned_qd1_writes");
 QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
 QUEUE_LIM_RO_ENTRY(queue_max_open_zones, "max_open_zones");
 QUEUE_LIM_RO_ENTRY(queue_max_active_zones, "max_active_zones");
@@ -754,6 +785,7 @@ static struct attribute *queue_attrs[] = {
 	&queue_nomerges_entry.attr,
 	&queue_poll_entry.attr,
 	&queue_poll_delay_entry.attr,
+	&queue_zoned_qd1_writes_entry.attr,
 
 	NULL,
 };
@@ -786,7 +818,8 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
 	struct request_queue *q = disk->queue;
 
 	if ((attr == &queue_max_open_zones_entry.attr ||
-	     attr == &queue_max_active_zones_entry.attr) &&
+	     attr == &queue_max_active_zones_entry.attr ||
+	     attr == &queue_zoned_qd1_writes_entry.attr) &&
 	    !blk_queue_is_zoned(q))
 		return 0;
 
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 78810e726222..e1a23c8b676d 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -16,6 +16,8 @@
 #include <linux/spinlock.h>
 #include <linux/refcount.h>
 #include <linux/mempool.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 
 #include <trace/events/block.h>
 
@@ -40,6 +42,8 @@ static const char *const zone_cond_name[] = {
 /*
  * Per-zone write plug.
  * @node: hlist_node structure for managing the plug using a hash table.
+ * @entry: list_head structure for listing the plug in the disk list of active
+ *         zone write plugs.
  * @bio_list: The list of BIOs that are currently plugged.
  * @bio_work: Work struct to handle issuing of plugged BIOs
  * @rcu_head: RCU head to free zone write plugs with an RCU grace period.
@@ -62,6 +66,7 @@ static const char *const zone_cond_name[] = {
  */
 struct blk_zone_wplug {
 	struct hlist_node	node;
+	struct list_head	entry;
 	struct bio_list		bio_list;
 	struct work_struct	bio_work;
 	struct rcu_head		rcu_head;
@@ -623,7 +628,19 @@ static void disk_mark_zone_wplug_dead(struct blk_zone_wplug *zwplug)
 	}
 }
 
-static void blk_zone_wplug_bio_work(struct work_struct *work);
+static bool disk_zone_wplug_submit_bio(struct gendisk *disk,
+				       struct blk_zone_wplug *zwplug);
+
+static void blk_zone_wplug_bio_work(struct work_struct *work)
+{
+	struct blk_zone_wplug *zwplug =
+		container_of(work, struct blk_zone_wplug, bio_work);
+
+	disk_zone_wplug_submit_bio(zwplug->disk, zwplug);
+
+	/* Drop the reference we took in disk_zone_wplug_schedule_work(). */
+	disk_put_zone_wplug(zwplug);
+}
 
 /*
  * Get a zone write plug for the zone containing @sector.
@@ -658,6 +675,7 @@ again:
 	zwplug->wp_offset = bdev_offset_from_zone_start(disk->part0, sector);
 	bio_list_init(&zwplug->bio_list);
 	INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work);
+	INIT_LIST_HEAD(&zwplug->entry);
 	zwplug->disk = disk;
 
 	/*
@@ -690,6 +708,7 @@ static inline void blk_zone_wplug_bio_io_error(struct blk_zone_wplug *zwplug,
  */
 static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
 {
+	struct gendisk *disk = zwplug->disk;
 	struct bio *bio;
 
 	lockdep_assert_held(&zwplug->lock);
@@ -703,6 +722,20 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
 		blk_zone_wplug_bio_io_error(zwplug, bio);
 
 	zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
+
+	/*
+	 * If we are using the per disk zone write plugs worker thread, remove
+	 * the zone write plug from the work list and drop the reference we
+	 * took when the zone write plug was added to that list.
+	 */
+	if (blk_queue_zoned_qd1_writes(disk->queue)) {
+		spin_lock(&disk->zone_wplugs_list_lock);
+		if (!list_empty(&zwplug->entry)) {
+			list_del_init(&zwplug->entry);
+			disk_put_zone_wplug(zwplug);
+		}
+		spin_unlock(&disk->zone_wplugs_list_lock);
+	}
 }
 
 /*
@@ -1137,8 +1170,8 @@ void blk_zone_mgmt_bio_endio(struct bio *bio)
 	}
 }
 
-static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk,
-					      struct blk_zone_wplug *zwplug)
+static void disk_zone_wplug_schedule_work(struct gendisk *disk,
+					  struct blk_zone_wplug *zwplug)
 {
 	lockdep_assert_held(&zwplug->lock);
 
@@ -1151,6 +1184,7 @@ static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk,
 	 * and we also drop this reference if the work is already scheduled.
 	 */
 	WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED));
+	WARN_ON_ONCE(blk_queue_zoned_qd1_writes(disk->queue));
 	refcount_inc(&zwplug->ref);
 	if (!queue_work(disk->zone_wplugs_wq, &zwplug->bio_work))
 		disk_put_zone_wplug(zwplug);
@@ -1190,6 +1224,22 @@ static inline void disk_zone_wplug_add_bio(struct gendisk *disk,
 	bio_list_add(&zwplug->bio_list, bio);
 	trace_disk_zone_wplug_add_bio(zwplug->disk->queue, zwplug->zone_no,
 				      bio->bi_iter.bi_sector, bio_sectors(bio));
+
+	/*
+	 * If we are using the disk zone write plugs worker instead of the per
+	 * zone write plug BIO work, add the zone write plug to the work list
+	 * if it is not already there. Make sure to also get an extra reference
+	 * on the zone write plug so that it does not go away until it is
+	 * removed from the work list.
+	 */
+	if (blk_queue_zoned_qd1_writes(disk->queue)) {
+		spin_lock(&disk->zone_wplugs_list_lock);
+		if (list_empty(&zwplug->entry)) {
+			list_add_tail(&zwplug->entry, &disk->zone_wplugs_list);
+			refcount_inc(&zwplug->ref);
+		}
+		spin_unlock(&disk->zone_wplugs_list_lock);
+	}
 }
 
 /*
@@ -1423,6 +1473,13 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
 		goto queue_bio;
 	}
 
+	/*
+	 * For rotational devices, we will use the gendisk zone write plugs
+	 * work instead of the per zone write plug BIO work, so queue the BIO.
+	 */
+	if (blk_queue_zoned_qd1_writes(disk->queue))
+		goto queue_bio;
+
 	/* If the zone is already plugged, add the BIO to the BIO plug list. */
 	if (zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)
 		goto queue_bio;
@@ -1445,7 +1502,10 @@ queue_bio:
 
 	if (!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)) {
 		zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
-		disk_zone_wplug_schedule_bio_work(disk, zwplug);
+		if (blk_queue_zoned_qd1_writes(disk->queue))
+			wake_up_process(disk->zone_wplugs_worker);
+		else
+			disk_zone_wplug_schedule_work(disk, zwplug);
 	}
 
 	spin_unlock_irqrestore(&zwplug->lock, flags);
@@ -1586,16 +1646,22 @@ static void disk_zone_wplug_unplug_bio(struct gendisk *disk,
 
 	spin_lock_irqsave(&zwplug->lock, flags);
 
-	/* Schedule submission of the next plugged BIO if we have one. */
-	if (!bio_list_empty(&zwplug->bio_list)) {
-		disk_zone_wplug_schedule_bio_work(disk, zwplug);
-		spin_unlock_irqrestore(&zwplug->lock, flags);
-		return;
-	}
+	/*
+	 * For rotational devices, signal the BIO completion to the zone write
+	 * plug work. Otherwise, schedule submission of the next plugged BIO
+	 * if we have one.
+	 */
+	if (bio_list_empty(&zwplug->bio_list))
+		zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
+
+	if (blk_queue_zoned_qd1_writes(disk->queue))
+		complete(&disk->zone_wplugs_worker_bio_done);
+	else if (!bio_list_empty(&zwplug->bio_list))
+		disk_zone_wplug_schedule_work(disk, zwplug);
 
-	zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
 	if (!zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug))
 		disk_mark_zone_wplug_dead(zwplug);
+
 	spin_unlock_irqrestore(&zwplug->lock, flags);
 }
 
@@ -1685,10 +1751,9 @@ void blk_zone_write_plug_finish_request(struct request *req)
 	disk_put_zone_wplug(zwplug);
 }
 
-static void blk_zone_wplug_bio_work(struct work_struct *work)
+static bool disk_zone_wplug_submit_bio(struct gendisk *disk,
+				       struct blk_zone_wplug *zwplug)
 {
-	struct blk_zone_wplug *zwplug =
-		container_of(work, struct blk_zone_wplug, bio_work);
 	struct block_device *bdev;
 	unsigned long flags;
 	struct bio *bio;
@@ -1704,7 +1769,7 @@ again:
 	if (!bio) {
 		zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
 		spin_unlock_irqrestore(&zwplug->lock, flags);
-		goto put_zwplug;
+		return false;
 	}
 
 	trace_blk_zone_wplug_bio(zwplug->disk->queue, zwplug->zone_no,
@@ -1718,14 +1783,15 @@ again:
 		goto again;
 	}
 
-	bdev = bio->bi_bdev;
-
 	/*
 	 * blk-mq devices will reuse the extra reference on the request queue
 	 * usage counter we took when the BIO was plugged, but the submission
 	 * path for BIO-based devices will not do that. So drop this extra
 	 * reference here.
 	 */
+	if (blk_queue_zoned_qd1_writes(disk->queue))
+		reinit_completion(&disk->zone_wplugs_worker_bio_done);
+	bdev = bio->bi_bdev;
 	if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) {
 		bdev->bd_disk->fops->submit_bio(bio);
 		blk_queue_exit(bdev->bd_disk->queue);
@@ -1733,14 +1799,78 @@ again:
 		blk_mq_submit_bio(bio);
 	}
 
-put_zwplug:
-	/* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */
-	disk_put_zone_wplug(zwplug);
+	return true;
+}
+
+static struct blk_zone_wplug *disk_get_zone_wplugs_work(struct gendisk *disk)
+{
+	struct blk_zone_wplug *zwplug;
+
+	spin_lock_irq(&disk->zone_wplugs_list_lock);
+	zwplug = list_first_entry_or_null(&disk->zone_wplugs_list,
+					  struct blk_zone_wplug, entry);
+	if (zwplug)
+		list_del_init(&zwplug->entry);
+	spin_unlock_irq(&disk->zone_wplugs_list_lock);
+
+	return zwplug;
+}
+
+static int disk_zone_wplugs_worker(void *data)
+{
+	struct gendisk *disk = data;
+	struct blk_zone_wplug *zwplug;
+	unsigned int noio_flag;
+
+	noio_flag = memalloc_noio_save();
+	set_user_nice(current, MIN_NICE);
+	set_freezable();
+
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE);
+
+		zwplug = disk_get_zone_wplugs_work(disk);
+		if (zwplug) {
+			/*
+			 * Process all BIOs of this zone write plug and then
+			 * drop the reference we took when adding the zone write
+			 * plug to the active list.
+			 */
+			set_current_state(TASK_RUNNING);
+			while (disk_zone_wplug_submit_bio(disk, zwplug))
+				blk_wait_io(&disk->zone_wplugs_worker_bio_done);
+			disk_put_zone_wplug(zwplug);
+			continue;
+		}
+
+		/*
+		 * Only sleep if nothing sets the state to running. Else check
+		 * for zone write plugs work again as a newly submitted BIO
+		 * might have added a zone write plug to the work list.
+		 */
+		if (get_current_state() == TASK_RUNNING) {
+			try_to_freeze();
+		} else {
+			if (kthread_should_stop()) {
+				set_current_state(TASK_RUNNING);
+				break;
+			}
+			schedule();
+		}
+	}
+
+	WARN_ON_ONCE(!list_empty(&disk->zone_wplugs_list));
+	memalloc_noio_restore(noio_flag);
+
+	return 0;
 }
 
 void disk_init_zone_resources(struct gendisk *disk)
 {
 	spin_lock_init(&disk->zone_wplugs_hash_lock);
+	spin_lock_init(&disk->zone_wplugs_list_lock);
+	INIT_LIST_HEAD(&disk->zone_wplugs_list);
+	init_completion(&disk->zone_wplugs_worker_bio_done);
 }
 
 /*
@@ -1756,6 +1886,7 @@ static int disk_alloc_zone_resources(struct gendisk *disk,
 				     unsigned int pool_size)
 {
 	unsigned int i;
+	int ret = -ENOMEM;
 
 	atomic_set(&disk->nr_zone_wplugs, 0);
 	disk->zone_wplugs_hash_bits =
@@ -1781,8 +1912,21 @@ static int disk_alloc_zone_resources(struct gendisk *disk,
 	if (!disk->zone_wplugs_wq)
 		goto destroy_pool;
 
+	disk->zone_wplugs_worker =
+		kthread_create(disk_zone_wplugs_worker, disk,
+			       "%s_zwplugs_worker", disk->disk_name);
+	if (IS_ERR(disk->zone_wplugs_worker)) {
+		ret = PTR_ERR(disk->zone_wplugs_worker);
+		disk->zone_wplugs_worker = NULL;
+		goto destroy_wq;
+	}
+	wake_up_process(disk->zone_wplugs_worker);
+
 	return 0;
 
+destroy_wq:
+	destroy_workqueue(disk->zone_wplugs_wq);
+	disk->zone_wplugs_wq = NULL;
 destroy_pool:
 	mempool_destroy(disk->zone_wplugs_pool);
 	disk->zone_wplugs_pool = NULL;
@@ -1790,7 +1934,7 @@ free_hash:
 	kfree(disk->zone_wplugs_hash);
 	disk->zone_wplugs_hash = NULL;
 	disk->zone_wplugs_hash_bits = 0;
-	return -ENOMEM;
+	return ret;
 }
 
 static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
@@ -1840,6 +1984,10 @@ static void disk_set_zones_cond_array(struct gendisk *disk, u8 *zones_cond)
 
 void disk_free_zone_resources(struct gendisk *disk)
 {
+	if (disk->zone_wplugs_worker)
+		kthread_stop(disk->zone_wplugs_worker);
+	WARN_ON_ONCE(!list_empty(&disk->zone_wplugs_list));
+
 	if (disk->zone_wplugs_wq) {
 		destroy_workqueue(disk->zone_wplugs_wq);
 		disk->zone_wplugs_wq = NULL;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a49a1e38c6e7..ef6457487d23 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -13,6 +13,7 @@
 #include <linux/minmax.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
+#include <linux/completion.h>
 #include <linux/wait.h>
 #include <linux/bio.h>
 #include <linux/gfp.h>
@@ -204,6 +205,10 @@ struct gendisk {
 	struct mempool		*zone_wplugs_pool;
 	struct hlist_head	*zone_wplugs_hash;
 	struct workqueue_struct *zone_wplugs_wq;
+	spinlock_t		zone_wplugs_list_lock;
+	struct list_head	zone_wplugs_list;
+	struct task_struct	*zone_wplugs_worker;
+	struct completion	zone_wplugs_worker_bio_done;
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 #if IS_ENABLED(CONFIG_CDROM)
@@ -668,6 +673,7 @@ enum {
 	QUEUE_FLAG_NO_ELV_SWITCH,	/* can't switch elevator any more */
 	QUEUE_FLAG_QOS_ENABLED,		/* qos is enabled */
 	QUEUE_FLAG_BIO_ISSUE_TIME,	/* record bio->issue_time_ns */
+	QUEUE_FLAG_ZONED_QD1_WRITES,	/* Limit zoned devices writes to QD=1 */
 	QUEUE_FLAG_MAX
 };
 
@@ -707,6 +713,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
 	test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags)
 #define blk_queue_no_elv_switch(q)	\
 	test_bit(QUEUE_FLAG_NO_ELV_SWITCH, &(q)->queue_flags)
+#define blk_queue_zoned_qd1_writes(q)	\
+	test_bit(QUEUE_FLAG_ZONED_QD1_WRITES, &(q)->queue_flags)
 
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);
-- 
cgit v1.2.3


From ecd92cfec5349876d6a80f8188ea98c5920094b6 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 26 Feb 2026 16:54:48 +0900
Subject: block: remove bdev_nonrot()

bdev_nonrot() is simply the negative return value of bdev_rot().
So replace all call sites of bdev_nonrot() with calls to bdev_rot()
and remove bdev_nonrot().

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/raid1.c                  | 2 +-
 drivers/md/raid10.c                 | 2 +-
 drivers/md/raid5.c                  | 2 +-
 drivers/target/target_core_file.c   | 2 +-
 drivers/target/target_core_iblock.c | 2 +-
 fs/btrfs/volumes.c                  | 4 ++--
 fs/ext4/mballoc-test.c              | 2 +-
 fs/ext4/mballoc.c                   | 2 +-
 include/linux/blkdev.h              | 5 -----
 mm/swapfile.c                       | 2 +-
 10 files changed, 10 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 181400e147c0..cda6af0712b9 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1878,7 +1878,7 @@ static bool raid1_add_conf(struct r1conf *conf, struct md_rdev *rdev, int disk,
 	if (info->rdev)
 		return false;
 
-	if (bdev_nonrot(rdev->bdev)) {
+	if (!bdev_rot(rdev->bdev)) {
 		set_bit(Nonrot, &rdev->flags);
 		WRITE_ONCE(conf->nonrot_disks, conf->nonrot_disks + 1);
 	}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 0653b5d8545a..cfbd345805ca 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -806,7 +806,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
 		if (!do_balance)
 			break;
 
-		nonrot = bdev_nonrot(rdev->bdev);
+		nonrot = !bdev_rot(rdev->bdev);
 		has_nonrot_disk |= nonrot;
 		pending = atomic_read(&rdev->nr_pending);
 		if (min_pending > pending && nonrot) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a8e8d431071b..ba9d6d05b089 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7541,7 +7541,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 	rdev_for_each(rdev, mddev) {
 		if (test_bit(Journal, &rdev->flags))
 			continue;
-		if (bdev_nonrot(rdev->bdev)) {
+		if (!bdev_rot(rdev->bdev)) {
 			conf->batch_bio_dispatch = false;
 			break;
 		}
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 3ae1f7137d9d..d6e3e5214652 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -173,7 +173,7 @@ static int fd_configure_device(struct se_device *dev)
 		 */
 		dev->dev_attrib.max_write_same_len = 0xFFFF;
 
-		if (bdev_nonrot(bdev))
+		if (!bdev_rot(bdev))
 			dev->dev_attrib.is_nonrot = 1;
 	} else {
 		if (!(fd_dev->fbd_flags & FBDF_HAS_SIZE)) {
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 3c92f94497b4..1087d1d17c36 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -148,7 +148,7 @@ static int iblock_configure_device(struct se_device *dev)
 	else
 		dev->dev_attrib.max_write_same_len = 0xFFFF;
 
-	if (bdev_nonrot(bd))
+	if (!bdev_rot(bd))
 		dev->dev_attrib.is_nonrot = 1;
 
 	target_configure_write_atomic_from_bdev(&dev->dev_attrib, bd);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 648bb09fc416..353c9caa8ab9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -694,7 +694,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 			set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
 	}
 
-	if (!bdev_nonrot(file_bdev(bdev_file)))
+	if (bdev_rot(file_bdev(bdev_file)))
 		fs_devices->rotating = true;
 
 	if (bdev_max_discard_sectors(file_bdev(bdev_file)))
@@ -2919,7 +2919,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
 	atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
 
-	if (!bdev_nonrot(device->bdev))
+	if (bdev_rot(device->bdev))
 		fs_devices->rotating = true;
 
 	orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c
index 9fbdf6a09489..b9f22e3a8d5c 100644
--- a/fs/ext4/mballoc-test.c
+++ b/fs/ext4/mballoc-test.c
@@ -72,7 +72,7 @@ static int mbt_mb_init(struct super_block *sb)
 	ext4_fsblk_t block;
 	int ret;
 
-	/* needed by ext4_mb_init->bdev_nonrot(sb->s_bdev) */
+	/* needed by ext4_mb_init->bdev_rot(sb->s_bdev) */
 	sb->s_bdev = kzalloc_obj(*sb->s_bdev);
 	if (sb->s_bdev == NULL)
 		return -ENOMEM;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 20e9fdaf4301..8a4dfe19878c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3836,7 +3836,7 @@ int ext4_mb_init(struct super_block *sb)
 		spin_lock_init(&lg->lg_prealloc_lock);
 	}
 
-	if (bdev_nonrot(sb->s_bdev))
+	if (!bdev_rot(sb->s_bdev))
 		sbi->s_mb_max_linear_groups = 0;
 	else
 		sbi->s_mb_max_linear_groups = MB_DEFAULT_LINEAR_LIMIT;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ef6457487d23..8d93d8e356d8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1475,11 +1475,6 @@ static inline bool bdev_rot(struct block_device *bdev)
 	return blk_queue_rot(bdev_get_queue(bdev));
 }
 
-static inline bool bdev_nonrot(struct block_device *bdev)
-{
-	return !bdev_rot(bdev);
-}
-
 static inline bool bdev_synchronous(struct block_device *bdev)
 {
 	return bdev->bd_disk->queue->limits.features & BLK_FEAT_SYNCHRONOUS;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 94af29d1de88..60e21414624b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3460,7 +3460,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	if (si->bdev && bdev_synchronous(si->bdev))
 		si->flags |= SWP_SYNCHRONOUS_IO;
 
-	if (si->bdev && bdev_nonrot(si->bdev)) {
+	if (si->bdev && !bdev_rot(si->bdev)) {
 		si->flags |= SWP_SOLIDSTATE;
 	} else {
 		atomic_inc(&nr_rotate_swap);
-- 
cgit v1.2.3


From 588e7c048d7d2bfcbe7776ee0888ee248adf01d1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 2 Mar 2026 06:18:09 -0800
Subject: ext4, fscrypt: merge fscrypt_mergeable_bio_bh into
 io_submit_need_new_bio

ext4 already has the inode and folio and can't have a NULL
folio->mapping in this path. Open code fscrypt_mergeable_bio_bh in
io_submit_need_new_bio based on these simplifying assumptions.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20260302141922.370070-5-hch@lst.de
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 fs/crypto/inline_crypt.c | 23 -----------------------
 fs/ext4/page-io.c        |  7 +++++--
 include/linux/fscrypt.h  |  9 ---------
 3 files changed, 5 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c
index c0852b920dbc..0da53956a9b1 100644
--- a/fs/crypto/inline_crypt.c
+++ b/fs/crypto/inline_crypt.c
@@ -406,29 +406,6 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
 }
 EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio);
 
-/**
- * fscrypt_mergeable_bio_bh() - test whether data can be added to a bio
- * @bio: the bio being built up
- * @next_bh: the next buffer_head for which I/O will be submitted
- *
- * Same as fscrypt_mergeable_bio(), except this takes a buffer_head instead of
- * an inode and block number directly.
- *
- * Return: true iff the I/O is mergeable
- */
-bool fscrypt_mergeable_bio_bh(struct bio *bio,
-			      const struct buffer_head *next_bh)
-{
-	const struct inode *inode;
-	u64 next_lblk;
-
-	if (!bh_get_inode_and_lblk_num(next_bh, &inode, &next_lblk))
-		return !bio->bi_crypt_context;
-
-	return fscrypt_mergeable_bio(bio, inode, next_lblk);
-}
-EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio_bh);
-
 /**
  * fscrypt_dio_supported() - check whether DIO (direct I/O) is supported on an
  *			     inode, as far as encryption is concerned
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 58cdbd836fd6..293314d7f236 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -440,11 +440,14 @@ static void io_submit_init_bio(struct ext4_io_submit *io,
 }
 
 static bool io_submit_need_new_bio(struct ext4_io_submit *io,
+				   struct inode *inode,
+				   struct folio *folio,
 				   struct buffer_head *bh)
 {
 	if (bh->b_blocknr != io->io_next_block)
 		return true;
-	if (!fscrypt_mergeable_bio_bh(io->io_bio, bh))
+	if (!fscrypt_mergeable_bio(io->io_bio, inode,
+			(folio_pos(folio) + bh_offset(bh)) >> inode->i_blkbits))
 		return true;
 	return false;
 }
@@ -455,7 +458,7 @@ static void io_submit_add_bh(struct ext4_io_submit *io,
 			     struct folio *io_folio,
 			     struct buffer_head *bh)
 {
-	if (io->io_bio && io_submit_need_new_bio(io, bh)) {
+	if (io->io_bio && io_submit_need_new_bio(io, inode, folio, bh)) {
 submit_and_retry:
 		ext4_io_submit(io);
 	}
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 516aba5b858b..6af3c1907adc 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -876,9 +876,6 @@ void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio,
 bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
 			   u64 next_lblk);
 
-bool fscrypt_mergeable_bio_bh(struct bio *bio,
-			      const struct buffer_head *next_bh);
-
 bool fscrypt_dio_supported(struct inode *inode);
 
 u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks);
@@ -906,12 +903,6 @@ static inline bool fscrypt_mergeable_bio(struct bio *bio,
 	return true;
 }
 
-static inline bool fscrypt_mergeable_bio_bh(struct bio *bio,
-					    const struct buffer_head *next_bh)
-{
-	return true;
-}
-
 static inline bool fscrypt_dio_supported(struct inode *inode)
 {
 	return !fscrypt_needs_contents_encryption(inode);
-- 
cgit v1.2.3


From a18b1ab81654b06e7ff402e5d0b85249e9504bcb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 2 Mar 2026 06:18:10 -0800
Subject: fscrypt: move fscrypt_set_bio_crypt_ctx_bh to buffer.c

fscrypt_set_bio_crypt_ctx_bh is only used by submit_bh_wbc now.  Move it
there and merge bh_get_inode_and_lblk_num into it.

Note that this does not add ifdefs for fscrypt as the compiler will
optimize away the dead code if it is not built in.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20260302141922.370070-6-hch@lst.de
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 fs/buffer.c              | 21 ++++++++++++++++++++-
 fs/crypto/inline_crypt.c | 45 ---------------------------------------------
 include/linux/fscrypt.h  |  9 ---------
 3 files changed, 20 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 22b43642ba57..b6504ec7fa4c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2774,6 +2774,24 @@ static void end_bio_bh_io_sync(struct bio *bio)
 	bio_put(bio);
 }
 
+static void buffer_set_crypto_ctx(struct bio *bio, const struct buffer_head *bh,
+				  gfp_t gfp_mask)
+{
+	const struct address_space *mapping = folio_mapping(bh->b_folio);
+	const struct inode *inode;
+	u64 lblk;
+
+	/*
+	 * The ext4 journal (jbd2) can submit a buffer_head it directly created
+	 * for a non-pagecache page.  fscrypt doesn't care about these.
+	 */
+	if (!mapping)
+		return;
+	inode = mapping->host;
+	lblk = (folio_pos(bh->b_folio) + bh_offset(bh)) >> inode->i_blkbits;
+	fscrypt_set_bio_crypt_ctx(bio, inode, lblk, gfp_mask);
+}
+
 static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
 			  enum rw_hint write_hint,
 			  struct writeback_control *wbc)
@@ -2800,7 +2818,8 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
 
 	bio = bio_alloc(bh->b_bdev, 1, opf, GFP_NOIO);
 
-	fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
+	if (IS_ENABLED(CONFIG_FS_ENCRYPTION))
+		buffer_set_crypto_ctx(bio, bh, GFP_NOIO);
 
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio->bi_write_hint = write_hint;
diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c
index 0da53956a9b1..702d13d138aa 100644
--- a/fs/crypto/inline_crypt.c
+++ b/fs/crypto/inline_crypt.c
@@ -314,51 +314,6 @@ void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
 }
 EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx);
 
-/* Extract the inode and logical block number from a buffer_head. */
-static bool bh_get_inode_and_lblk_num(const struct buffer_head *bh,
-				      const struct inode **inode_ret,
-				      u64 *lblk_num_ret)
-{
-	struct folio *folio = bh->b_folio;
-	const struct address_space *mapping;
-	const struct inode *inode;
-
-	/*
-	 * The ext4 journal (jbd2) can submit a buffer_head it directly created
-	 * for a non-pagecache page.  fscrypt doesn't care about these.
-	 */
-	mapping = folio_mapping(folio);
-	if (!mapping)
-		return false;
-	inode = mapping->host;
-
-	*inode_ret = inode;
-	*lblk_num_ret = (folio_pos(folio) + bh_offset(bh)) >> inode->i_blkbits;
-	return true;
-}
-
-/**
- * fscrypt_set_bio_crypt_ctx_bh() - prepare a file contents bio for inline
- *				    crypto
- * @bio: a bio which will eventually be submitted to the file
- * @first_bh: the first buffer_head for which I/O will be submitted
- * @gfp_mask: memory allocation flags
- *
- * Same as fscrypt_set_bio_crypt_ctx(), except this takes a buffer_head instead
- * of an inode and block number directly.
- */
-void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio,
-				  const struct buffer_head *first_bh,
-				  gfp_t gfp_mask)
-{
-	const struct inode *inode;
-	u64 first_lblk;
-
-	if (bh_get_inode_and_lblk_num(first_bh, &inode, &first_lblk))
-		fscrypt_set_bio_crypt_ctx(bio, inode, first_lblk, gfp_mask);
-}
-EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx_bh);
-
 /**
  * fscrypt_mergeable_bio() - test whether data can be added to a bio
  * @bio: the bio being built up
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 6af3c1907adc..26561b7994e0 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -869,10 +869,6 @@ void fscrypt_set_bio_crypt_ctx(struct bio *bio,
 			       const struct inode *inode, u64 first_lblk,
 			       gfp_t gfp_mask);
 
-void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio,
-				  const struct buffer_head *first_bh,
-				  gfp_t gfp_mask);
-
 bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
 			   u64 next_lblk);
 
@@ -891,11 +887,6 @@ static inline void fscrypt_set_bio_crypt_ctx(struct bio *bio,
 					     const struct inode *inode,
 					     u64 first_lblk, gfp_t gfp_mask) { }
 
-static inline void fscrypt_set_bio_crypt_ctx_bh(
-					 struct bio *bio,
-					 const struct buffer_head *first_bh,
-					 gfp_t gfp_mask) { }
-
 static inline bool fscrypt_mergeable_bio(struct bio *bio,
 					 const struct inode *inode,
 					 u64 next_lblk)
-- 
cgit v1.2.3


From 22be86a23c5956254b752e4e98f0ef2799565a41 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 2 Mar 2026 06:18:12 -0800
Subject: fscrypt: pass a byte offset to fscrypt_mergeable_bio

Logical offsets into an inode are usually expressed as bytes in the VFS.
Switch fscrypt_mergeable_bio to that convention.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20260302141922.370070-8-hch@lst.de
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 fs/crypto/bio.c          | 3 ++-
 fs/crypto/inline_crypt.c | 6 +++---
 fs/ext4/page-io.c        | 2 +-
 fs/ext4/readpage.c       | 3 ++-
 fs/f2fs/data.c           | 3 ++-
 include/linux/fscrypt.h  | 4 ++--
 6 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index 6da683ea69dc..0a701d4a17ef 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -100,7 +100,8 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
 			len -= blocks_this_page;
 			lblk += blocks_this_page;
 			sector += (bytes_this_page >> SECTOR_SHIFT);
-			if (!len || !fscrypt_mergeable_bio(bio, inode, lblk))
+			if (!len || !fscrypt_mergeable_bio(bio, inode,
+					(loff_t)lblk << blockbits))
 				break;
 		}
 
diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c
index 5279565e9846..b0954d17904b 100644
--- a/fs/crypto/inline_crypt.c
+++ b/fs/crypto/inline_crypt.c
@@ -316,7 +316,7 @@ EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx);
  * fscrypt_mergeable_bio() - test whether data can be added to a bio
  * @bio: the bio being built up
  * @inode: the inode for the next part of the I/O
- * @next_lblk: the next file logical block number in the I/O
+ * @pos: the next file position (in bytes) in the I/O
  *
  * When building a bio which may contain data which should undergo inline
  * encryption (or decryption) via fscrypt, filesystems should call this function
@@ -334,7 +334,7 @@ EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx);
  * Return: true iff the I/O is mergeable
  */
 bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
-			   u64 next_lblk)
+			   loff_t pos)
 {
 	const struct bio_crypt_ctx *bc = bio->bi_crypt_context;
 	const struct fscrypt_inode_info *ci;
@@ -354,7 +354,7 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
 	if (bc->bc_key != ci->ci_enc_key.blk_key)
 		return false;
 
-	fscrypt_generate_dun(ci, next_lblk << inode->i_blkbits, next_dun);
+	fscrypt_generate_dun(ci, pos, next_dun);
 	return bio_crypt_dun_is_contiguous(bc, bio->bi_iter.bi_size, next_dun);
 }
 EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 293314d7f236..50f507bab82c 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -447,7 +447,7 @@ static bool io_submit_need_new_bio(struct ext4_io_submit *io,
 	if (bh->b_blocknr != io->io_next_block)
 		return true;
 	if (!fscrypt_mergeable_bio(io->io_bio, inode,
-			(folio_pos(folio) + bh_offset(bh)) >> inode->i_blkbits))
+			folio_pos(folio) + bh_offset(bh)))
 		return true;
 	return false;
 }
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 830f3b8a321f..ba7cfddd6038 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -342,7 +342,8 @@ static int ext4_mpage_readpages(struct inode *inode, struct fsverity_info *vi,
 		 * BIO off first?
 		 */
 		if (bio && (last_block_in_bio != first_block - 1 ||
-			    !fscrypt_mergeable_bio(bio, inode, next_block))) {
+			    !fscrypt_mergeable_bio(bio, inode,
+				(loff_t)next_block << blkbits))) {
 		submit_and_realloc:
 			blk_crypto_submit_bio(bio);
 			bio = NULL;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 338df7a2aea6..dca273fedfde 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -541,7 +541,8 @@ static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
 	if (fio && fio->encrypted_page)
 		return !bio_has_crypt_ctx(bio);
 
-	return fscrypt_mergeable_bio(bio, inode, next_idx);
+	return fscrypt_mergeable_bio(bio, inode,
+			(loff_t)next_idx << inode->i_blkbits);
 }
 
 void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 26561b7994e0..98fb14660d40 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -870,7 +870,7 @@ void fscrypt_set_bio_crypt_ctx(struct bio *bio,
 			       gfp_t gfp_mask);
 
 bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
-			   u64 next_lblk);
+			   loff_t pos);
 
 bool fscrypt_dio_supported(struct inode *inode);
 
@@ -889,7 +889,7 @@ static inline void fscrypt_set_bio_crypt_ctx(struct bio *bio,
 
 static inline bool fscrypt_mergeable_bio(struct bio *bio,
 					 const struct inode *inode,
-					 u64 next_lblk)
+					 loff_t pos)
 {
 	return true;
 }
-- 
cgit v1.2.3


From 3c7eaa775d8e008135646bd4b7aa7db7c5e40a0e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 2 Mar 2026 06:18:13 -0800
Subject: fscrypt: pass a byte offset to fscrypt_set_bio_crypt_ctx

Logical offsets into an inode are usually expressed as bytes in the VFS.
Switch fscrypt_set_bio_crypt_ctx to that convention.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20260302141922.370070-9-hch@lst.de
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 fs/buffer.c              | 7 ++-----
 fs/crypto/bio.c          | 8 ++++----
 fs/crypto/inline_crypt.c | 6 +++---
 fs/ext4/page-io.c        | 5 ++---
 fs/ext4/readpage.c       | 4 ++--
 fs/f2fs/data.c           | 4 +++-
 fs/iomap/direct-io.c     | 6 ++----
 include/linux/fscrypt.h  | 7 +++----
 8 files changed, 21 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index b6504ec7fa4c..1c8ee5a59f88 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2778,8 +2778,6 @@ static void buffer_set_crypto_ctx(struct bio *bio, const struct buffer_head *bh,
 				  gfp_t gfp_mask)
 {
 	const struct address_space *mapping = folio_mapping(bh->b_folio);
-	const struct inode *inode;
-	u64 lblk;
 
 	/*
 	 * The ext4 journal (jbd2) can submit a buffer_head it directly created
@@ -2787,9 +2785,8 @@ static void buffer_set_crypto_ctx(struct bio *bio, const struct buffer_head *bh,
 	 */
 	if (!mapping)
 		return;
-	inode = mapping->host;
-	lblk = (folio_pos(bh->b_folio) + bh_offset(bh)) >> inode->i_blkbits;
-	fscrypt_set_bio_crypt_ctx(bio, inode, lblk, gfp_mask);
+	fscrypt_set_bio_crypt_ctx(bio, mapping->host,
+			folio_pos(bh->b_folio) + bh_offset(bh), gfp_mask);
 }
 
 static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index 0a701d4a17ef..e7fb2fdd9728 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -75,6 +75,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
 {
 	const unsigned int blockbits = inode->i_blkbits;
 	const unsigned int blocks_per_page = 1 << (PAGE_SHIFT - blockbits);
+	loff_t pos = (loff_t)lblk << blockbits;
 	struct fscrypt_zero_done done = {
 		.pending	= ATOMIC_INIT(1),
 		.done		= COMPLETION_INITIALIZER_ONSTACK(done.done),
@@ -89,7 +90,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
 		bio->bi_iter.bi_sector = sector;
 		bio->bi_private = &done;
 		bio->bi_end_io = fscrypt_zeroout_range_end_io;
-		fscrypt_set_bio_crypt_ctx(bio, inode, lblk, GFP_NOFS);
+		fscrypt_set_bio_crypt_ctx(bio, inode, pos, GFP_NOFS);
 
 		for (n = 0; n < BIO_MAX_VECS; n++) {
 			unsigned int blocks_this_page =
@@ -98,10 +99,9 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
 
 			__bio_add_page(bio, ZERO_PAGE(0), bytes_this_page, 0);
 			len -= blocks_this_page;
-			lblk += blocks_this_page;
+			pos += bytes_this_page;
 			sector += (bytes_this_page >> SECTOR_SHIFT);
-			if (!len || !fscrypt_mergeable_bio(bio, inode,
-					(loff_t)lblk << blockbits))
+			if (!len || !fscrypt_mergeable_bio(bio, inode, pos))
 				break;
 		}
 
diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c
index b0954d17904b..37d42d357925 100644
--- a/fs/crypto/inline_crypt.c
+++ b/fs/crypto/inline_crypt.c
@@ -285,7 +285,7 @@ static void fscrypt_generate_dun(const struct fscrypt_inode_info *ci,
  * fscrypt_set_bio_crypt_ctx() - prepare a file contents bio for inline crypto
  * @bio: a bio which will eventually be submitted to the file
  * @inode: the file's inode
- * @first_lblk: the first file logical block number in the I/O
+ * @pos: the first file position (in bytes) in the I/O
  * @gfp_mask: memory allocation flags - these must be a waiting mask so that
  *					bio_crypt_set_ctx can't fail.
  *
@@ -298,7 +298,7 @@ static void fscrypt_generate_dun(const struct fscrypt_inode_info *ci,
  * The encryption context will be freed automatically when the bio is freed.
  */
 void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
-			       u64 first_lblk, gfp_t gfp_mask)
+			       loff_t pos, gfp_t gfp_mask)
 {
 	const struct fscrypt_inode_info *ci;
 	u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE];
@@ -307,7 +307,7 @@ void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
 		return;
 	ci = fscrypt_get_inode_info_raw(inode);
 
-	fscrypt_generate_dun(ci, first_lblk << inode->i_blkbits, dun);
+	fscrypt_generate_dun(ci, pos, dun);
 	bio_crypt_set_ctx(bio, ci->ci_enc_key.blk_key, dun, gfp_mask);
 }
 EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 50f507bab82c..181cda58d387 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -427,9 +427,8 @@ static void io_submit_init_bio(struct ext4_io_submit *io,
 	 * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset().
 	 */
 	bio = bio_alloc(bh->b_bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOIO);
-	fscrypt_set_bio_crypt_ctx(bio, inode,
-			(folio_pos(folio) + bh_offset(bh)) >> inode->i_blkbits,
-			GFP_NOIO);
+	fscrypt_set_bio_crypt_ctx(bio, inode, folio_pos(folio) + bh_offset(bh),
+				  GFP_NOIO);
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio->bi_end_io = ext4_end_bio;
 	bio->bi_private = ext4_get_io_end(io->io_end);
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index ba7cfddd6038..fbfa4d830d9a 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -355,8 +355,8 @@ static int ext4_mpage_readpages(struct inode *inode, struct fsverity_info *vi,
 			 */
 			bio = bio_alloc(bdev, bio_max_segs(nr_pages),
 					REQ_OP_READ, GFP_KERNEL);
-			fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
-						  GFP_KERNEL);
+			fscrypt_set_bio_crypt_ctx(bio, inode,
+					(loff_t)next_block << blkbits, GFP_KERNEL);
 			ext4_set_bio_post_read_ctx(bio, inode, vi);
 			bio->bi_iter.bi_sector = first_block << (blkbits - 9);
 			bio->bi_end_io = mpage_end_io;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index dca273fedfde..07b4ed6bb0cc 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -527,7 +527,9 @@ static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
 	 * read/write raw data without encryption.
 	 */
 	if (!fio || !fio->encrypted_page)
-		fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
+		fscrypt_set_bio_crypt_ctx(bio, inode,
+				(loff_t)first_idx << inode->i_blkbits,
+				gfp_mask);
 }
 
 static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index e911daedff65..9da5d862ef9e 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -311,8 +311,7 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
 
 	bio = iomap_dio_alloc_bio(iter, dio, nr_vecs,
 				  REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
-	fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
-				  GFP_KERNEL);
+	fscrypt_set_bio_crypt_ctx(bio, inode, pos, GFP_KERNEL);
 	bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos);
 	bio->bi_private = dio;
 	bio->bi_end_io = iomap_dio_bio_end_io;
@@ -342,8 +341,7 @@ static ssize_t iomap_dio_bio_iter_one(struct iomap_iter *iter,
 		nr_vecs = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS);
 
 	bio = iomap_dio_alloc_bio(iter, dio, nr_vecs, op);
-	fscrypt_set_bio_crypt_ctx(bio, iter->inode,
-			pos >> iter->inode->i_blkbits, GFP_KERNEL);
+	fscrypt_set_bio_crypt_ctx(bio, iter->inode, pos, GFP_KERNEL);
 	bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos);
 	bio->bi_write_hint = iter->inode->i_write_hint;
 	bio->bi_ioprio = dio->iocb->ki_ioprio;
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 98fb14660d40..90f75fe0e1c9 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -865,9 +865,8 @@ static inline void fscrypt_set_ops(struct super_block *sb,
 
 bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode);
 
-void fscrypt_set_bio_crypt_ctx(struct bio *bio,
-			       const struct inode *inode, u64 first_lblk,
-			       gfp_t gfp_mask);
+void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
+			       loff_t pos, gfp_t gfp_mask);
 
 bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
 			   loff_t pos);
@@ -885,7 +884,7 @@ static inline bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode)
 
 static inline void fscrypt_set_bio_crypt_ctx(struct bio *bio,
 					     const struct inode *inode,
-					     u64 first_lblk, gfp_t gfp_mask) { }
+					     loff_t pos, gfp_t gfp_mask) { }
 
 static inline bool fscrypt_mergeable_bio(struct bio *bio,
 					 const struct inode *inode,
-- 
cgit v1.2.3


From cd7db2e7dfeef99c901156f58ab4a38256b0c3f1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 2 Mar 2026 06:18:16 -0800
Subject: fscrypt: pass a byte offset to fscrypt_zeroout_range

Logical offsets into an inode are usually expressed as bytes in the VFS.
Switch fscrypt_zeroout_range to that convention.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20260302141922.370070-12-hch@lst.de
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 fs/crypto/bio.c         | 7 +++----
 fs/ext4/inode.c         | 3 ++-
 fs/f2fs/file.c          | 4 +++-
 include/linux/fscrypt.h | 4 ++--
 4 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index 303b5acc66a9..a07ac8dcf851 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -113,7 +113,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
 /**
  * fscrypt_zeroout_range() - zero out a range of blocks in an encrypted file
  * @inode: the file's inode
- * @lblk: the first file logical block to zero out
+ * @pos: the first file position (in bytes) to zero out
  * @pblk: the first filesystem physical block to zero out
  * @len: number of blocks to zero out
  *
@@ -127,7 +127,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
  *
  * Return: 0 on success; -errno on failure.
  */
-int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
+int fscrypt_zeroout_range(const struct inode *inode, loff_t pos,
 			  sector_t pblk, unsigned int len)
 {
 	const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode);
@@ -135,9 +135,8 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
 	const unsigned int du_size = 1U << du_bits;
 	const unsigned int du_per_page_bits = PAGE_SHIFT - du_bits;
 	const unsigned int du_per_page = 1U << du_per_page_bits;
-	u64 du_index = (u64)lblk << (inode->i_blkbits - du_bits);
+	u64 du_index = pos >> du_bits;
 	u64 du_remaining = (u64)len << (inode->i_blkbits - du_bits);
-	loff_t pos = (loff_t)lblk << inode->i_blkbits;
 	sector_t sector = pblk << (inode->i_blkbits - SECTOR_SHIFT);
 	struct page *pages[16]; /* write up to 16 pages at a time */
 	unsigned int nr_pages;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 396dc3a5d16b..945613c95ffa 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -405,7 +405,8 @@ int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
 	KUNIT_STATIC_STUB_REDIRECT(ext4_issue_zeroout, inode, lblk, pblk, len);
 
 	if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
-		return fscrypt_zeroout_range(inode, lblk, pblk, len);
+		return fscrypt_zeroout_range(inode,
+				(loff_t)lblk << inode->i_blkbits, pblk, len);
 
 	ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
 	if (ret > 0)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c8a2f17a8f11..239c2666ceb5 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4162,7 +4162,9 @@ static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
 
 	if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
 		if (IS_ENCRYPTED(inode))
-			ret = fscrypt_zeroout_range(inode, off, block, len);
+			ret = fscrypt_zeroout_range(inode,
+					(loff_t)off << inode->i_blkbits, block,
+					len);
 		else
 			ret = blkdev_issue_zeroout(bdev, sector, nr_sects,
 					GFP_NOFS, 0);
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 90f75fe0e1c9..9fc15e1fbe57 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -450,7 +450,7 @@ u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name);
 
 /* bio.c */
 bool fscrypt_decrypt_bio(struct bio *bio);
-int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
+int fscrypt_zeroout_range(const struct inode *inode, loff_t pos,
 			  sector_t pblk, unsigned int len);
 
 /* hooks.c */
@@ -755,7 +755,7 @@ static inline bool fscrypt_decrypt_bio(struct bio *bio)
 	return true;
 }
 
-static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
+static inline int fscrypt_zeroout_range(const struct inode *inode, loff_t pos,
 					sector_t pblk, unsigned int len)
 {
 	return -EOPNOTSUPP;
-- 
cgit v1.2.3


From fb87ab4ad3d0df2397648e5ce2384de26463c183 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 2 Mar 2026 06:18:17 -0800
Subject: fscrypt: pass a byte length to fscrypt_zeroout_range

Range lengths are usually expressed as bytes in the VFS, switch
fscrypt_zeroout_range to this convention.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20260302141922.370070-13-hch@lst.de
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 fs/crypto/bio.c         | 11 ++++++-----
 fs/ext4/inode.c         |  3 ++-
 fs/f2fs/file.c          |  2 +-
 include/linux/fscrypt.h |  6 +++---
 4 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index a07ac8dcf851..9872408f4f52 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -115,12 +115,13 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
  * @inode: the file's inode
  * @pos: the first file position (in bytes) to zero out
  * @pblk: the first filesystem physical block to zero out
- * @len: number of blocks to zero out
+ * @len: bytes to zero out
  *
  * Zero out filesystem blocks in an encrypted regular file on-disk, i.e. write
  * ciphertext blocks which decrypt to the all-zeroes block.  The blocks must be
  * both logically and physically contiguous.  It's also assumed that the
- * filesystem only uses a single block device, ->s_bdev.
+ * filesystem only uses a single block device, ->s_bdev.  @len must be a
+ * multiple of the file system logical block size.
  *
  * Note that since each block uses a different IV, this involves writing a
  * different ciphertext to each block; we can't simply reuse the same one.
@@ -128,7 +129,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
  * Return: 0 on success; -errno on failure.
  */
 int fscrypt_zeroout_range(const struct inode *inode, loff_t pos,
-			  sector_t pblk, unsigned int len)
+			  sector_t pblk, u64 len)
 {
 	const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode);
 	const unsigned int du_bits = ci->ci_data_unit_bits;
@@ -136,7 +137,7 @@ int fscrypt_zeroout_range(const struct inode *inode, loff_t pos,
 	const unsigned int du_per_page_bits = PAGE_SHIFT - du_bits;
 	const unsigned int du_per_page = 1U << du_per_page_bits;
 	u64 du_index = pos >> du_bits;
-	u64 du_remaining = (u64)len << (inode->i_blkbits - du_bits);
+	u64 du_remaining = len >> du_bits;
 	sector_t sector = pblk << (inode->i_blkbits - SECTOR_SHIFT);
 	struct page *pages[16]; /* write up to 16 pages at a time */
 	unsigned int nr_pages;
@@ -150,7 +151,7 @@ int fscrypt_zeroout_range(const struct inode *inode, loff_t pos,
 
 	if (fscrypt_inode_uses_inline_crypto(inode))
 		return fscrypt_zeroout_range_inline_crypt(inode, pos, sector,
-				(u64)len << inode->i_blkbits);
+							  len);
 
 	BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_VECS);
 	nr_pages = min_t(u64, ARRAY_SIZE(pages),
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 945613c95ffa..8ef61198e14c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -406,7 +406,8 @@ int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
 
 	if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
 		return fscrypt_zeroout_range(inode,
-				(loff_t)lblk << inode->i_blkbits, pblk, len);
+				(loff_t)lblk << inode->i_blkbits, pblk,
+				(u64)len << inode->i_blkbits);
 
 	ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
 	if (ret > 0)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 239c2666ceb5..8785f7c13657 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4164,7 +4164,7 @@ static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
 		if (IS_ENCRYPTED(inode))
 			ret = fscrypt_zeroout_range(inode,
 					(loff_t)off << inode->i_blkbits, block,
-					len);
+					(u64)len << inode->i_blkbits);
 		else
 			ret = blkdev_issue_zeroout(bdev, sector, nr_sects,
 					GFP_NOFS, 0);
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 9fc15e1fbe57..90ac62fda926 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -450,8 +450,8 @@ u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name);
 
 /* bio.c */
 bool fscrypt_decrypt_bio(struct bio *bio);
-int fscrypt_zeroout_range(const struct inode *inode, loff_t pos,
-			  sector_t pblk, unsigned int len);
+int fscrypt_zeroout_range(const struct inode *inode, loff_t pos, sector_t pblk,
+			  u64 len);
 
 /* hooks.c */
 int fscrypt_file_open(struct inode *inode, struct file *filp);
@@ -756,7 +756,7 @@ static inline bool fscrypt_decrypt_bio(struct bio *bio)
 }
 
 static inline int fscrypt_zeroout_range(const struct inode *inode, loff_t pos,
-					sector_t pblk, unsigned int len)
+					sector_t pblk, u64 len)
 {
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3


From 5ca1a1f017ea0f0e0bcb6ec52064735f2ac1c393 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 2 Mar 2026 06:18:18 -0800
Subject: fscrypt: pass a real sector_t to fscrypt_zeroout_range

While the pblk argument to fscrypt_zeroout_range is declared as a
sector_t, it actually is interpreted as a logical block size unit, which
is highly unusual.  Switch to passing the 512 byte units that sector_t is
defined for.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20260302141922.370070-14-hch@lst.de
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 fs/crypto/bio.c         | 5 ++---
 fs/ext4/inode.c         | 3 ++-
 fs/f2fs/file.c          | 2 +-
 include/linux/fscrypt.h | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index 9872408f4f52..d07740680602 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -114,7 +114,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
  * fscrypt_zeroout_range() - zero out a range of blocks in an encrypted file
  * @inode: the file's inode
  * @pos: the first file position (in bytes) to zero out
- * @pblk: the first filesystem physical block to zero out
+ * @sector: the first sector to zero out
  * @len: bytes to zero out
  *
  * Zero out filesystem blocks in an encrypted regular file on-disk, i.e. write
@@ -129,7 +129,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
  * Return: 0 on success; -errno on failure.
  */
 int fscrypt_zeroout_range(const struct inode *inode, loff_t pos,
-			  sector_t pblk, u64 len)
+			  sector_t sector, u64 len)
 {
 	const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode);
 	const unsigned int du_bits = ci->ci_data_unit_bits;
@@ -138,7 +138,6 @@ int fscrypt_zeroout_range(const struct inode *inode, loff_t pos,
 	const unsigned int du_per_page = 1U << du_per_page_bits;
 	u64 du_index = pos >> du_bits;
 	u64 du_remaining = len >> du_bits;
-	sector_t sector = pblk << (inode->i_blkbits - SECTOR_SHIFT);
 	struct page *pages[16]; /* write up to 16 pages at a time */
 	unsigned int nr_pages;
 	unsigned int i;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8ef61198e14c..fe258ffd4840 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -406,7 +406,8 @@ int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
 
 	if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
 		return fscrypt_zeroout_range(inode,
-				(loff_t)lblk << inode->i_blkbits, pblk,
+				(loff_t)lblk << inode->i_blkbits,
+				pblk << (inode->i_blkbits - SECTOR_SHIFT),
 				(u64)len << inode->i_blkbits);
 
 	ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 8785f7c13657..a264771cfbc2 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4163,7 +4163,7 @@ static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
 	if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
 		if (IS_ENCRYPTED(inode))
 			ret = fscrypt_zeroout_range(inode,
-					(loff_t)off << inode->i_blkbits, block,
+					(loff_t)off << inode->i_blkbits, sector,
 					(u64)len << inode->i_blkbits);
 		else
 			ret = blkdev_issue_zeroout(bdev, sector, nr_sects,
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 90ac62fda926..54712ec61ffb 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -450,8 +450,8 @@ u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name);
 
 /* bio.c */
 bool fscrypt_decrypt_bio(struct bio *bio);
-int fscrypt_zeroout_range(const struct inode *inode, loff_t pos, sector_t pblk,
-			  u64 len);
+int fscrypt_zeroout_range(const struct inode *inode, loff_t pos,
+			  sector_t sector, u64 len);
 
 /* hooks.c */
 int fscrypt_file_open(struct inode *inode, struct file *filp);
@@ -756,7 +756,7 @@ static inline bool fscrypt_decrypt_bio(struct bio *bio)
 }
 
 static inline int fscrypt_zeroout_range(const struct inode *inode, loff_t pos,
-					sector_t pblk, u64 len)
+					sector_t sector, u64 len)
 {
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3


From 102c8b26b54e363f85c4c86099ca049a0a76bb58 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Tue, 17 Feb 2026 08:08:35 -0800
Subject: PCI: Allow all bus devices to use the same slot

A PCIe hotplug slot applies to the entire secondary bus. Thus, pciehp only
allocates a single hotplug_slot for the bridge to that bus. The existing
PCI slot, though, would only match to functions on device 0, meaning any
devices beyond that, e.g., ARI functions, are not matched to any slot even
though they share it. A slot reset will break all the missing devices
because the handling skips them.

For example, ARI devices with more than 8 functions fail because their
state is not properly handled, nor is the attached driver notified of the
reset. In the best case, the device will appear unresponsive to the driver,
resulting in unexpected errors. A worse possibility may panic the kernel if
in-flight transactions trigger hardware reported errors like this real
observation:

  vfio-pci 0000:01:00.0: resetting
  vfio-pci 0000:01:00.0: reset done
  {1}[Hardware Error]:  Error 1, type: fatal
  {1}[Hardware Error]:   section_type: PCIe error
  {1}[Hardware Error]:   port_type: 0, PCIe end point
  {1}[Hardware Error]:   version: 0.2
  {1}[Hardware Error]:   command: 0x0140, status: 0x0010
  {1}[Hardware Error]:   device_id: 0000:01:01.0
  {1}[Hardware Error]:   slot: 0
  {1}[Hardware Error]:   secondary_bus: 0x00
  {1}[Hardware Error]:   vendor_id: 0x1d9b, device_id: 0x0207
  {1}[Hardware Error]:   class_code: 020000
  {1}[Hardware Error]:   bridge: secondary_status: 0x0000, control: 0x0000
  {1}[Hardware Error]:   aer_cor_status: 0x00008000, aer_cor_mask: 0x00002000
  {1}[Hardware Error]:   aer_uncor_status: 0x00010000, aer_uncor_mask: 0x00100000
  {1}[Hardware Error]:   aer_uncor_severity: 0x006f6030
  {1}[Hardware Error]:   TLP Header: 0a412800 00192080 60000004 00000004
  GHES: Fatal hardware error but panic disabled
  Kernel panic - not syncing: GHES: Fatal hardware error

Allow a slot to be created to claim all devices on a bus, not just a
matching device. This is done by introducing a sentinel value, named
PCI_SLOT_ALL_DEVICES, which then has the PCI slot match to any device on
the bus. This fixes slot resets for pciehp.

Since 0xff already has special meaning, the chosen value for this new
feature is 0xfe. This will not clash with any actual slot number since they
are limited to 5 bits.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20260217160836.2709885-3-kbusch@meta.com
---
 drivers/pci/hotplug/pciehp_core.c |  3 ++-
 drivers/pci/slot.c                | 31 +++++++++++++++++++++++++++----
 include/linux/pci.h               | 10 +++++++++-
 3 files changed, 38 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 1e9158d7bac7..2cafd3b26f34 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -79,7 +79,8 @@ static int init_slot(struct controller *ctrl)
 	snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl));
 
 	retval = pci_hp_initialize(&ctrl->hotplug_slot,
-				   ctrl->pcie->port->subordinate, 0, name);
+				   ctrl->pcie->port->subordinate,
+				   PCI_SLOT_ALL_DEVICES, name);
 	if (retval) {
 		ctrl_err(ctrl, "pci_hp_initialize failed: error %d\n", retval);
 		kfree(ops);
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 787311614e5b..e0b7fb43423c 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -42,6 +42,15 @@ static ssize_t address_read_file(struct pci_slot *slot, char *buf)
 				  pci_domain_nr(slot->bus),
 				  slot->bus->number);
 
+	/*
+	 * Preserve legacy ABI expectations that hotplug drivers that manage
+	 * multiple devices per slot emit 0 for the device number.
+	 */
+	if (slot->number == PCI_SLOT_ALL_DEVICES)
+		return sysfs_emit(buf, "%04x:%02x:00\n",
+				  pci_domain_nr(slot->bus),
+				  slot->bus->number);
+
 	return sysfs_emit(buf, "%04x:%02x:%02x\n",
 			  pci_domain_nr(slot->bus),
 			  slot->bus->number,
@@ -73,7 +82,8 @@ static void pci_slot_release(struct kobject *kobj)
 
 	down_read(&pci_bus_sem);
 	list_for_each_entry(dev, &slot->bus->devices, bus_list)
-		if (PCI_SLOT(dev->devfn) == slot->number)
+		if (slot->number == PCI_SLOT_ALL_DEVICES ||
+		    PCI_SLOT(dev->devfn) == slot->number)
 			dev->slot = NULL;
 	up_read(&pci_bus_sem);
 
@@ -166,7 +176,8 @@ void pci_dev_assign_slot(struct pci_dev *dev)
 
 	mutex_lock(&pci_slot_mutex);
 	list_for_each_entry(slot, &dev->bus->slots, list)
-		if (PCI_SLOT(dev->devfn) == slot->number)
+		if (slot->number == PCI_SLOT_ALL_DEVICES ||
+		    PCI_SLOT(dev->devfn) == slot->number)
 			dev->slot = slot;
 	mutex_unlock(&pci_slot_mutex);
 }
@@ -188,7 +199,8 @@ static struct pci_slot *get_slot(struct pci_bus *parent, int slot_nr)
 /**
  * pci_create_slot - create or increment refcount for physical PCI slot
  * @parent: struct pci_bus of parent bridge
- * @slot_nr: PCI_SLOT(pci_dev->devfn) or -1 for placeholder
+ * @slot_nr: PCI_SLOT(pci_dev->devfn), -1 for placeholder, or
+ *	PCI_SLOT_ALL_DEVICES
  * @name: user visible string presented in /sys/bus/pci/slots/<name>
  * @hotplug: set if caller is hotplug driver, NULL otherwise
  *
@@ -222,6 +234,16 @@ static struct pci_slot *get_slot(struct pci_bus *parent, int slot_nr)
  * consist solely of a dddd:bb tuple, where dddd is the PCI domain of the
  * %struct pci_bus and bb is the bus number. In other words, the devfn of
  * the 'placeholder' slot will not be displayed.
+ *
+ * Bus-wide slots:
+ * For PCIe hotplug, the physical slot encompasses the entire secondary
+ * bus, not just a single device number. If the device supports ARI and ARI
+ * Forwarding is enabled in the upstream bridge, a multi-function device
+ * may include functions that appear to have several different device
+ * numbers, i.e., PCI_SLOT() values.  Pass @slot_nr == PCI_SLOT_ALL_DEVICES
+ * to create a slot that matches all devices on the bus. Unlike placeholder
+ * slots, bus-wide slots go through normal slot lookup and reuse existing
+ * slots if present.
  */
 struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
 				 const char *name,
@@ -285,7 +307,8 @@ placeholder:
 
 	down_read(&pci_bus_sem);
 	list_for_each_entry(dev, &parent->devices, bus_list)
-		if (PCI_SLOT(dev->devfn) == slot_nr)
+		if (slot_nr == PCI_SLOT_ALL_DEVICES ||
+		    PCI_SLOT(dev->devfn) == slot_nr)
 			dev->slot = slot;
 	up_read(&pci_bus_sem);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1c270f1d5123..5ae2dfdb2d6f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -72,12 +72,20 @@
 /* return bus from PCI devid = ((u16)bus_number) << 8) | devfn */
 #define PCI_BUS_NUM(x) (((x) >> 8) & 0xff)
 
+/*
+ * PCI_SLOT_ALL_DEVICES indicates a slot that covers all devices on the bus.
+ * Used for PCIe hotplug where the physical slot is the entire secondary bus,
+ * and, if ARI Forwarding is enabled, functions may appear to be on multiple
+ * devices.
+ */
+#define PCI_SLOT_ALL_DEVICES	0xfe
+
 /* pci_slot represents a physical slot */
 struct pci_slot {
 	struct pci_bus		*bus;		/* Bus this slot is on */
 	struct list_head	list;		/* Node in list of slots */
 	struct hotplug_slot	*hotplug;	/* Hotplug info (move here) */
-	unsigned char		number;		/* PCI_SLOT(pci_dev->devfn) */
+	unsigned char		number;		/* Device nr, or PCI_SLOT_ALL_DEVICES */
 	struct kobject		kobj;
 };
 
-- 
cgit v1.2.3


From abb0eb0b033a0a8980eb9215e02626e4801ead3f Mon Sep 17 00:00:00 2001
From: Qingfang Deng <dqfext@gmail.com>
Date: Fri, 6 Mar 2026 17:36:49 +0800
Subject: ppp: simplify input error handling

Currently, ppp_input_error() indicates an error by allocating a 0-length
skb and calling ppp_do_recv(). It takes an error code argument, which is
stored in skb->cb, but not used by ppp_receive_frame().

Simplify the error handling by removing the unused parameter and the
unnecessary skb allocation. Instead, call ppp_receive_error() directly
from ppp_input_error() under the recv lock, and the length check in
ppp_receive_frame() can be removed.

Signed-off-by: Qingfang Deng <dqfext@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ppp/ppp_async.c   |  2 +-
 drivers/net/ppp/ppp_generic.c | 31 ++++++++++---------------------
 drivers/net/ppp/ppp_synctty.c |  2 +-
 include/linux/ppp_channel.h   |  2 +-
 net/atm/pppoatm.c             |  2 +-
 5 files changed, 14 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index b4cf2d09c6bd..93a7b0f6c4e7 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -491,7 +491,7 @@ static void ppp_async_process(struct tasklet_struct *t)
 	/* process received packets */
 	while ((skb = skb_dequeue(&ap->rqueue)) != NULL) {
 		if (skb->cb[0])
-			ppp_input_error(&ap->chan, 0);
+			ppp_input_error(&ap->chan);
 		ppp_input(&ap->chan, skb);
 	}
 
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 2081da6c2144..6344c5eb0f98 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -2383,12 +2383,10 @@ done:
 	rcu_read_unlock_bh();
 }
 
-/* Put a 0-length skb in the receive queue as an error indication */
 void
-ppp_input_error(struct ppp_channel *chan, int code)
+ppp_input_error(struct ppp_channel *chan)
 {
 	struct channel *pch = chan->ppp;
-	struct sk_buff *skb;
 	struct ppp *ppp;
 
 	if (!pch)
@@ -2397,12 +2395,9 @@ ppp_input_error(struct ppp_channel *chan, int code)
 	rcu_read_lock_bh();
 	ppp = rcu_dereference_bh(pch->ppp);
 	if (ppp) {
-		skb = alloc_skb(0, GFP_ATOMIC);
-		if (skb) {
-			skb->len = 0;		/* probably unnecessary */
-			skb->cb[0] = code;
-			ppp_do_recv(ppp, skb, pch);
-		}
+		ppp_recv_lock(ppp);
+		ppp_receive_error(ppp);
+		ppp_recv_unlock(ppp);
 	}
 	rcu_read_unlock_bh();
 }
@@ -2414,20 +2409,14 @@ ppp_input_error(struct ppp_channel *chan, int code)
 static void
 ppp_receive_frame(struct ppp *ppp, struct sk_buff *skb, struct channel *pch)
 {
-	/* note: a 0-length skb is used as an error indication */
-	if (skb->len > 0) {
-		skb_checksum_complete_unset(skb);
+	skb_checksum_complete_unset(skb);
 #ifdef CONFIG_PPP_MULTILINK
-		/* XXX do channel-level decompression here */
-		if (PPP_PROTO(skb) == PPP_MP)
-			ppp_receive_mp_frame(ppp, skb, pch);
-		else
+	/* XXX do channel-level decompression here */
+	if (PPP_PROTO(skb) == PPP_MP)
+		ppp_receive_mp_frame(ppp, skb, pch);
+	else
 #endif /* CONFIG_PPP_MULTILINK */
-			ppp_receive_nonmp_frame(ppp, skb);
-	} else {
-		kfree_skb(skb);
-		ppp_receive_error(ppp);
-	}
+		ppp_receive_nonmp_frame(ppp, skb);
 }
 
 static void
diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
index c2063961f395..b7f243b416f8 100644
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c
@@ -483,7 +483,7 @@ static void ppp_sync_process(struct tasklet_struct *t)
 	while ((skb = skb_dequeue(&ap->rqueue)) != NULL) {
 		if (skb->len == 0) {
 			/* zero length buffers indicate error */
-			ppp_input_error(&ap->chan, 0);
+			ppp_input_error(&ap->chan);
 			kfree_skb(skb);
 		}
 		else
diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
index f73fbea0dbc2..ca8ad03eeef0 100644
--- a/include/linux/ppp_channel.h
+++ b/include/linux/ppp_channel.h
@@ -55,7 +55,7 @@ extern void ppp_input(struct ppp_channel *, struct sk_buff *);
 
 /* Called by the channel when an input error occurs, indicating
    that we may have missed a packet. */
-extern void ppp_input_error(struct ppp_channel *, int code);
+extern void ppp_input_error(struct ppp_channel *);
 
 /* Attach a channel to a given PPP unit in specified net. */
 extern int ppp_register_net_channel(struct net *, struct ppp_channel *);
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 2574aae3e066..e3c422dc533a 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -228,7 +228,7 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 
 error:
 	kfree_skb(skb);
-	ppp_input_error(&pvcc->chan, 0);
+	ppp_input_error(&pvcc->chan);
 }
 
 static int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
-- 
cgit v1.2.3


From 4b78c9cbd8f1fbb9517aee48b372646f4cf05442 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 8 Mar 2026 12:23:02 +0000
Subject: tcp: move tp->chrono_type next tp->chrono_stat[]

chrono_type is currently in tcp_sock_read_txrx group, which
is supposed to hold read-mostly fields.

But chrono_type is mostly written in tx path, it should
be moved to tcp_sock_write_tx group, close to other
chrono fields (chrono_stat[], chrono_start).

Note this adds holes, but data locality is far more important.

Use a full u8 for the time being, compiler can generate
more efficient code.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Link: https://patch.msgid.link/20260308122302.2895067-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/tcp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index f72eef31fa23..c44cf9ae8d16 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -228,8 +228,7 @@ struct tcp_sock {
 	u32	sacked_out;	/* SACK'd packets			*/
 	u16	tcp_header_len;	/* Bytes of tcp header to send		*/
 	u8	scaling_ratio;	/* see tcp_win_from_space() */
-	u8	chrono_type : 2,	/* current chronograph type */
-		repair      : 1,
+	u8	repair      : 1,
 		tcp_usec_ts : 1, /* TSval values in usec */
 		is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */
 		is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
@@ -264,6 +263,7 @@ struct tcp_sock {
 				 * total number of data bytes sent.
 				 */
 	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */
+	u8	chrono_type;	/* current chronograph type */
 	u32	chrono_start;	/* Start time in jiffies of a TCP chrono */
 	u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */
 	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
-- 
cgit v1.2.3


From 4d25c7d68896b4002c4ab5cd646775392bb7fbb4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 23 Feb 2026 05:20:09 -0800
Subject: iomap: pass the iomap_iter to ->submit_read

This provides additional context for file systems.

Rename the fuse instance to match the method name while we're at it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260223132021.292832-10-hch@lst.de
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/fuse/file.c         | 5 +++--
 fs/iomap/bio.c         | 3 ++-
 fs/iomap/buffered-io.c | 4 ++--
 fs/ntfs3/inode.c       | 3 ++-
 include/linux/iomap.h  | 3 ++-
 5 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b1bb7153cb78..a9c836d7f586 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -947,7 +947,8 @@ static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
 	return ret;
 }
 
-static void fuse_iomap_read_submit(struct iomap_read_folio_ctx *ctx)
+static void fuse_iomap_submit_read(const struct iomap_iter *iter,
+		struct iomap_read_folio_ctx *ctx)
 {
 	struct fuse_fill_read_data *data = ctx->read_ctx;
 
@@ -958,7 +959,7 @@ static void fuse_iomap_read_submit(struct iomap_read_folio_ctx *ctx)
 
 static const struct iomap_read_ops fuse_iomap_read_ops = {
 	.read_folio_range = fuse_iomap_read_folio_range_async,
-	.submit_read = fuse_iomap_read_submit,
+	.submit_read = fuse_iomap_submit_read,
 };
 
 static int fuse_read_folio(struct file *file, struct folio *folio)
diff --git a/fs/iomap/bio.c b/fs/iomap/bio.c
index 578b1202e037..cb60d1facb5a 100644
--- a/fs/iomap/bio.c
+++ b/fs/iomap/bio.c
@@ -18,7 +18,8 @@ static void iomap_read_end_io(struct bio *bio)
 	bio_put(bio);
 }
 
-static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx)
+static void iomap_bio_submit_read(const struct iomap_iter *iter,
+		struct iomap_read_folio_ctx *ctx)
 {
 	struct bio *bio = ctx->read_ctx;
 
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 00f0efaf12b2..f4ee2b1cb877 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -597,7 +597,7 @@ void iomap_read_folio(const struct iomap_ops *ops,
 				&bytes_submitted);
 
 	if (ctx->ops->submit_read)
-		ctx->ops->submit_read(ctx);
+		ctx->ops->submit_read(&iter, ctx);
 
 	if (ctx->cur_folio)
 		iomap_read_end(ctx->cur_folio, bytes_submitted);
@@ -664,7 +664,7 @@ void iomap_readahead(const struct iomap_ops *ops,
 					&cur_bytes_submitted);
 
 	if (ctx->ops->submit_read)
-		ctx->ops->submit_read(ctx);
+		ctx->ops->submit_read(&iter, ctx);
 
 	if (ctx->cur_folio)
 		iomap_read_end(ctx->cur_folio, cur_bytes_submitted);
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 6e65066ebcc1..511967ef7ec9 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -651,7 +651,8 @@ static int ntfs_iomap_bio_read_folio_range(const struct iomap_iter *iter,
 	return 0;
 }
 
-static void ntfs_iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx)
+static void ntfs_iomap_bio_submit_read(const struct iomap_iter *iter,
+		struct iomap_read_folio_ctx *ctx)
 {
 	struct bio *bio = ctx->read_ctx;
 
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 99b7209dabd7..6fbe121e2adf 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -512,7 +512,8 @@ struct iomap_read_ops {
 	 *
 	 * This is optional.
 	 */
-	void (*submit_read)(struct iomap_read_folio_ctx *ctx);
+	void (*submit_read)(const struct iomap_iter *iter,
+			struct iomap_read_folio_ctx *ctx);
 };
 
 /*
-- 
cgit v1.2.3


From 5f4fe046cb3c84eed719f7becbe822000e1a589e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 23 Feb 2026 05:20:11 -0800
Subject: iomap: allow file systems to hook into buffered read bio submission

File systems such as btrfs have additional operations with bios such as
verifying data checksums.  Allow file systems to hook into submission
of the bio to allow for this processing by replacing the direct
submit_bio call in iomap_read_alloc_bio with a call into ->submit_read
and exporting iomap_read_alloc_bio.  Also add a new field to
struct iomap_read_folio_ctx to track the file logic offset of the current
read context.

Based on a patch from Goldwyn Rodrigues <rgoldwyn@suse.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260223132021.292832-12-hch@lst.de
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/iomap/bio.c        | 15 +++++++++------
 include/linux/iomap.h |  4 ++++
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/iomap/bio.c b/fs/iomap/bio.c
index 80bbd328bd3c..903cb9fe759e 100644
--- a/fs/iomap/bio.c
+++ b/fs/iomap/bio.c
@@ -32,10 +32,11 @@ static void iomap_read_alloc_bio(const struct iomap_iter *iter,
 	struct folio *folio = ctx->cur_folio;
 	gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL);
 	gfp_t orig_gfp = gfp;
-	struct bio *bio = ctx->read_ctx;
+	struct bio *bio;
 
-	if (bio)
-		submit_bio(bio);
+	/* Submit the existing range if there was one. */
+	if (ctx->read_ctx)
+		ctx->ops->submit_read(iter, ctx);
 
 	/* Same as readahead_gfp_mask: */
 	if (ctx->rac)
@@ -56,9 +57,10 @@ static void iomap_read_alloc_bio(const struct iomap_iter *iter,
 	bio_add_folio_nofail(bio, folio, plen,
 			offset_in_folio(folio, iter->pos));
 	ctx->read_ctx = bio;
+	ctx->read_ctx_file_offset = iter->pos;
 }
 
-static int iomap_bio_read_folio_range(const struct iomap_iter *iter,
+int iomap_bio_read_folio_range(const struct iomap_iter *iter,
 		struct iomap_read_folio_ctx *ctx, size_t plen)
 {
 	struct folio *folio = ctx->cur_folio;
@@ -70,10 +72,11 @@ static int iomap_bio_read_folio_range(const struct iomap_iter *iter,
 		iomap_read_alloc_bio(iter, ctx, plen);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(iomap_bio_read_folio_range);
 
 const struct iomap_read_ops iomap_bio_read_ops = {
-	.read_folio_range = iomap_bio_read_folio_range,
-	.submit_read = iomap_bio_submit_read,
+	.read_folio_range	= iomap_bio_read_folio_range,
+	.submit_read		= iomap_bio_submit_read,
 };
 EXPORT_SYMBOL_GPL(iomap_bio_read_ops);
 
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 6fbe121e2adf..b2b9e649a3b8 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -493,6 +493,7 @@ struct iomap_read_folio_ctx {
 	struct folio		*cur_folio;
 	struct readahead_control *rac;
 	void			*read_ctx;
+	loff_t			read_ctx_file_offset;
 };
 
 struct iomap_read_ops {
@@ -599,6 +600,9 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
 extern struct bio_set iomap_ioend_bioset;
 
 #ifdef CONFIG_BLOCK
+int iomap_bio_read_folio_range(const struct iomap_iter *iter,
+		struct iomap_read_folio_ctx *ctx, size_t plen);
+
 extern const struct iomap_read_ops iomap_bio_read_ops;
 
 static inline void iomap_bio_read_folio(struct folio *folio,
-- 
cgit v1.2.3


From 57287771fa8d77841149bf847b629f29acbad35b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 23 Feb 2026 05:20:13 -0800
Subject: iomap: add a bioset pointer to iomap_read_folio_ops

Optionally allocate the bio from the bioset provided in
iomap_read_folio_ops.  If no bioset is provided, fs_bio_set is still
used, which is the standard bioset for file systems.

Based on a patch from Goldwyn Rodrigues <rgoldwyn@suse.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260223132021.292832-14-hch@lst.de
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/iomap/bio.c        | 14 ++++++++++++--
 include/linux/iomap.h |  6 ++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/iomap/bio.c b/fs/iomap/bio.c
index 903cb9fe759e..259a2bf95a43 100644
--- a/fs/iomap/bio.c
+++ b/fs/iomap/bio.c
@@ -24,11 +24,19 @@ static void iomap_bio_submit_read(const struct iomap_iter *iter,
 	submit_bio(ctx->read_ctx);
 }
 
+static struct bio_set *iomap_read_bio_set(struct iomap_read_folio_ctx *ctx)
+{
+	if (ctx->ops && ctx->ops->bio_set)
+		return ctx->ops->bio_set;
+	return &fs_bio_set;
+}
+
 static void iomap_read_alloc_bio(const struct iomap_iter *iter,
 		struct iomap_read_folio_ctx *ctx, size_t plen)
 {
 	const struct iomap *iomap = &iter->iomap;
 	unsigned int nr_vecs = DIV_ROUND_UP(iomap_length(iter), PAGE_SIZE);
+	struct bio_set *bio_set = iomap_read_bio_set(ctx);
 	struct folio *folio = ctx->cur_folio;
 	gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL);
 	gfp_t orig_gfp = gfp;
@@ -47,9 +55,11 @@ static void iomap_read_alloc_bio(const struct iomap_iter *iter,
 	 * having to deal with partial page reads.  This emulates what
 	 * do_mpage_read_folio does.
 	 */
-	bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), REQ_OP_READ, gfp);
+	bio = bio_alloc_bioset(iomap->bdev, bio_max_segs(nr_vecs), REQ_OP_READ,
+			gfp, bio_set);
 	if (!bio)
-		bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, orig_gfp);
+		bio = bio_alloc_bioset(iomap->bdev, 1, REQ_OP_READ, orig_gfp,
+				bio_set);
 	if (ctx->rac)
 		bio->bi_opf |= REQ_RAHEAD;
 	bio->bi_iter.bi_sector = iomap_sector(iomap, iter->pos);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index b2b9e649a3b8..387a1174522f 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -515,6 +515,12 @@ struct iomap_read_ops {
 	 */
 	void (*submit_read)(const struct iomap_iter *iter,
 			struct iomap_read_folio_ctx *ctx);
+
+	/*
+	 * Optional, allows filesystem to specify own bio_set, so new bio's
+	 * can be allocated from the provided bio_set.
+	 */
+	struct bio_set *bio_set;
 };
 
 /*
-- 
cgit v1.2.3


From 0b10a370529cbd7b918c1eef43d409e43d9e0b78 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 23 Feb 2026 05:20:15 -0800
Subject: iomap: support T10 protection information

Add support for generating / verifying protection information in iomap.
This is done by hooking into the bio submission and then using the
generic PI helpers.  Compared to just using the block layer auto PI
this extends the protection envelope and also prepares for eventually
passing through PI from userspace at least for direct I/O.

To generate or verify PI, the file system needs to set the
IOMAP_F_INTEGRITY flag on the iomap for the request, and ensure the
ioends are used for all integrity I/O.  Additionally the file system
must defer read I/O completions to user context so that the guard
tag validation isn't run from interrupt context.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260223132021.292832-16-hch@lst.de
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/iomap/bio.c        | 24 +++++++++++++++++++++---
 fs/iomap/direct-io.c  | 15 ++++++++++++++-
 fs/iomap/internal.h   | 13 +++++++++++++
 fs/iomap/ioend.c      | 20 ++++++++++++++++++--
 include/linux/iomap.h |  7 +++++++
 5 files changed, 73 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/iomap/bio.c b/fs/iomap/bio.c
index b4de67bdd513..f989ffcaac96 100644
--- a/fs/iomap/bio.c
+++ b/fs/iomap/bio.c
@@ -3,6 +3,7 @@
  * Copyright (C) 2010 Red Hat, Inc.
  * Copyright (C) 2016-2023 Christoph Hellwig.
  */
+#include <linux/bio-integrity.h>
 #include <linux/iomap.h>
 #include <linux/pagemap.h>
 #include "internal.h"
@@ -17,6 +18,8 @@ static u32 __iomap_read_end_io(struct bio *bio, int error)
 		iomap_finish_folio_read(fi.folio, fi.offset, fi.length, error);
 		folio_count++;
 	}
+	if (bio_integrity(bio))
+		fs_bio_integrity_free(bio);
 	bio_put(bio);
 	return folio_count;
 }
@@ -34,7 +37,11 @@ u32 iomap_finish_ioend_buffered_read(struct iomap_ioend *ioend)
 static void iomap_bio_submit_read(const struct iomap_iter *iter,
 		struct iomap_read_folio_ctx *ctx)
 {
-	submit_bio(ctx->read_ctx);
+	struct bio *bio = ctx->read_ctx;
+
+	if (iter->iomap.flags & IOMAP_F_INTEGRITY)
+		fs_bio_integrity_alloc(bio);
+	submit_bio(bio);
 }
 
 static struct bio_set *iomap_read_bio_set(struct iomap_read_folio_ctx *ctx)
@@ -91,6 +98,7 @@ int iomap_bio_read_folio_range(const struct iomap_iter *iter,
 
 	if (!bio ||
 	    bio_end_sector(bio) != iomap_sector(&iter->iomap, iter->pos) ||
+	    bio->bi_iter.bi_size > iomap_max_bio_size(&iter->iomap) - plen ||
 	    !bio_add_folio(bio, folio, plen, offset_in_folio(folio, iter->pos)))
 		iomap_read_alloc_bio(iter, ctx, plen);
 	return 0;
@@ -107,11 +115,21 @@ int iomap_bio_read_folio_range_sync(const struct iomap_iter *iter,
 		struct folio *folio, loff_t pos, size_t len)
 {
 	const struct iomap *srcmap = iomap_iter_srcmap(iter);
+	sector_t sector = iomap_sector(srcmap, pos);
 	struct bio_vec bvec;
 	struct bio bio;
+	int error;
 
 	bio_init(&bio, srcmap->bdev, &bvec, 1, REQ_OP_READ);
-	bio.bi_iter.bi_sector = iomap_sector(srcmap, pos);
+	bio.bi_iter.bi_sector = sector;
 	bio_add_folio_nofail(&bio, folio, len, offset_in_folio(folio, pos));
-	return submit_bio_wait(&bio);
+	if (srcmap->flags & IOMAP_F_INTEGRITY)
+		fs_bio_integrity_alloc(&bio);
+	error = submit_bio_wait(&bio);
+	if (srcmap->flags & IOMAP_F_INTEGRITY) {
+		if (!error)
+			error = fs_bio_integrity_verify(&bio, sector, len);
+		fs_bio_integrity_free(&bio);
+	}
+	return error;
 }
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 2cb0c0f43215..c24d94349ca5 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -3,6 +3,7 @@
  * Copyright (C) 2010 Red Hat, Inc.
  * Copyright (c) 2016-2025 Christoph Hellwig.
  */
+#include <linux/bio-integrity.h>
 #include <linux/blk-crypto.h>
 #include <linux/fscrypt.h>
 #include <linux/pagemap.h>
@@ -240,6 +241,9 @@ static void __iomap_dio_bio_end_io(struct bio *bio, bool inline_completion)
 {
 	struct iomap_dio *dio = bio->bi_private;
 
+	if (bio_integrity(bio))
+		fs_bio_integrity_free(bio);
+
 	if (dio->flags & IOMAP_DIO_BOUNCE) {
 		bio_iov_iter_unbounce(bio, !!dio->error,
 				dio->flags & IOMAP_DIO_USER_BACKED);
@@ -350,8 +354,10 @@ static ssize_t iomap_dio_bio_iter_one(struct iomap_iter *iter,
 	bio->bi_private = dio;
 	bio->bi_end_io = iomap_dio_bio_end_io;
 
+
 	if (dio->flags & IOMAP_DIO_BOUNCE)
-		ret = bio_iov_iter_bounce(bio, dio->submit.iter, BIO_MAX_SIZE);
+		ret = bio_iov_iter_bounce(bio, dio->submit.iter,
+				iomap_max_bio_size(&iter->iomap));
 	else
 		ret = bio_iov_iter_get_pages(bio, dio->submit.iter,
 					     alignment - 1);
@@ -368,6 +374,13 @@ static ssize_t iomap_dio_bio_iter_one(struct iomap_iter *iter,
 		goto out_put_bio;
 	}
 
+	if (iter->iomap.flags & IOMAP_F_INTEGRITY) {
+		if (dio->flags & IOMAP_DIO_WRITE)
+			fs_bio_integrity_generate(bio);
+		else
+			fs_bio_integrity_alloc(bio);
+	}
+
 	if (dio->flags & IOMAP_DIO_WRITE)
 		task_io_account_write(ret);
 	else if ((dio->flags & IOMAP_DIO_USER_BACKED) &&
diff --git a/fs/iomap/internal.h b/fs/iomap/internal.h
index b39dbc17e3f0..74e898b196dc 100644
--- a/fs/iomap/internal.h
+++ b/fs/iomap/internal.h
@@ -4,6 +4,19 @@
 
 #define IOEND_BATCH_SIZE	4096
 
+/*
+ * Normally we can build bios as big as the data structure supports.
+ *
+ * But for integrity protected I/O we need to respect the maximum size of the
+ * single contiguous allocation for the integrity buffer.
+ */
+static inline size_t iomap_max_bio_size(const struct iomap *iomap)
+{
+	if (iomap->flags & IOMAP_F_INTEGRITY)
+		return max_integrity_io_size(bdev_limits(iomap->bdev));
+	return BIO_MAX_SIZE;
+}
+
 u32 iomap_finish_ioend_buffered_read(struct iomap_ioend *ioend);
 u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend);
 
diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c
index 450ab002eb91..7c034b6a583e 100644
--- a/fs/iomap/ioend.c
+++ b/fs/iomap/ioend.c
@@ -2,6 +2,7 @@
 /*
  * Copyright (c) 2016-2025 Christoph Hellwig.
  */
+#include <linux/bio-integrity.h>
 #include <linux/iomap.h>
 #include <linux/list_sort.h>
 #include <linux/pagemap.h>
@@ -65,6 +66,8 @@ static u32 iomap_finish_ioend_buffered_write(struct iomap_ioend *ioend)
 		folio_count++;
 	}
 
+	if (bio_integrity(bio))
+		fs_bio_integrity_free(bio);
 	bio_put(bio);	/* frees the ioend */
 	return folio_count;
 }
@@ -144,6 +147,8 @@ int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error)
 		return error;
 	}
 
+	if (wpc->iomap.flags & IOMAP_F_INTEGRITY)
+		fs_bio_integrity_generate(&ioend->io_bio);
 	submit_bio(&ioend->io_bio);
 	return 0;
 }
@@ -165,10 +170,13 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc,
 }
 
 static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos,
-		u16 ioend_flags)
+		unsigned int map_len, u16 ioend_flags)
 {
 	struct iomap_ioend *ioend = wpc->wb_ctx;
 
+	if (ioend->io_bio.bi_iter.bi_size >
+	    iomap_max_bio_size(&wpc->iomap) - map_len)
+		return false;
 	if (ioend_flags & IOMAP_IOEND_BOUNDARY)
 		return false;
 	if ((ioend_flags & IOMAP_IOEND_NOMERGE_FLAGS) !=
@@ -234,7 +242,7 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio,
 	if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY))
 		ioend_flags |= IOMAP_IOEND_BOUNDARY;
 
-	if (!ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) {
+	if (!ioend || !iomap_can_add_to_ioend(wpc, pos, map_len, ioend_flags)) {
 new_ioend:
 		if (ioend) {
 			error = wpc->ops->writeback_submit(wpc, 0);
@@ -311,6 +319,14 @@ static u32 iomap_finish_ioend(struct iomap_ioend *ioend, int error)
 
 	if (!atomic_dec_and_test(&ioend->io_remaining))
 		return 0;
+
+	if (!ioend->io_error &&
+	    bio_integrity(&ioend->io_bio) &&
+	    bio_op(&ioend->io_bio) == REQ_OP_READ) {
+		ioend->io_error = fs_bio_integrity_verify(&ioend->io_bio,
+			ioend->io_sector, ioend->io_size);
+	}
+
 	if (ioend->io_flags & IOMAP_IOEND_DIRECT)
 		return iomap_finish_ioend_direct(ioend);
 	if (bio_op(&ioend->io_bio) == REQ_OP_READ)
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 387a1174522f..531f9ebdeeae 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -65,6 +65,8 @@ struct vm_fault;
  *
  * IOMAP_F_ATOMIC_BIO indicates that (write) I/O will be issued as an atomic
  * bio, i.e. set REQ_ATOMIC.
+ *
+ * IOMAP_F_INTEGRITY indicates that the filesystems handles integrity metadata.
  */
 #define IOMAP_F_NEW		(1U << 0)
 #define IOMAP_F_DIRTY		(1U << 1)
@@ -79,6 +81,11 @@ struct vm_fault;
 #define IOMAP_F_BOUNDARY	(1U << 6)
 #define IOMAP_F_ANON_WRITE	(1U << 7)
 #define IOMAP_F_ATOMIC_BIO	(1U << 8)
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+#define IOMAP_F_INTEGRITY	(1U << 9)
+#else
+#define IOMAP_F_INTEGRITY	0
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
 
 /*
  * Flag reserved for file system specific usage
-- 
cgit v1.2.3


From 1b891f4c852817b3afd231712ab7e171932e1eb1 Mon Sep 17 00:00:00 2001
From: "Derek J. Clark" <derekjohn.clark@gmail.com>
Date: Tue, 10 Mar 2026 07:29:19 +0000
Subject: include: device.h: Add named device attributes

Adds DEVICE_ATTR_[RW|RO|WO]_NAMED macros for adding attributes that
reuse the same sysfs name in a driver under separate subdirectories.

When dealing with some devices it can be useful to be able to reuse
the same name for similar attributes under a different subdirectory.
For example, a single logical HID endpoint may provide a configuration
interface for multiple physical devices. In such a case it is useful to
provide symmetrical attribute names under different subdirectories on
the configuration device. The Lenovo Legion Go is one such device,
providing configuration to a detachable left controller, detachable
right controller, the wireless transmission dongle, and the MCU. It is
therefore beneficial to treat each of these as individual devices in
the driver, providing a subdirectory for each physical device in the
sysfs. As some attributes are reused by each physical device, it
provides a much cleaner interface if the same driver can reuse the same
attribute name in sysfs while uniquely distinguishing the store/show
functions in the driver, rather than repeat string portions.

Example new WO attrs:
ATTRS{left_handle/reset}=="(not readable)"
ATTRS{right_handle/reset}=="(not readable)"
ATTRS{tx_dongle/reset}=="(not readable)"

vs old WO attrs in a subdir:
ATTRS{left_handle/left_handle_reset}=="(not readable)"
ATTRS{right_handle/right_handle_reset}=="(not readable)"
ATTRS{tx_dongle/tx_dongle_reset}=="(not readable)"

or old WO attrs with no subdir:
ATTRS{left_handle_reset}=="(not readable)"
ATTRS{right_handle_reset}=="(not readable)"
ATTRS{tx_dongle_reset}=="(not readable)"

While the third option is usable, it doesn't logically break up the
physical devices and creates a device directory with over 80 attributes
once all attrs are defined.

Reviewed-by: Mark Pearson <mpearson-lenovo@squebb.ca>
Signed-off-by: Derek J. Clark <derekjohn.clark@gmail.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 include/linux/device.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 0be95294b6e6..381463baed6d 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -189,6 +189,22 @@ ssize_t device_show_string(struct device *dev, struct device_attribute *attr,
 #define DEVICE_ATTR_ADMIN_RW(_name) \
 	struct device_attribute dev_attr_##_name = __ATTR_RW_MODE(_name, 0600)
 
+/**
+ * DEVICE_ATTR_RW_NAMED - Define a read-write device attribute with a sysfs name
+ * that differs from the function name.
+ * @_name: Attribute function preface
+ * @_attrname: Attribute name as it wil be exposed in the sysfs.
+ *
+ * Like DEVICE_ATTR_RW(), but allows for reusing names under separate paths in
+ * the same driver.
+ */
+#define DEVICE_ATTR_RW_NAMED(_name, _attrname)                            \
+	struct device_attribute dev_attr_##_name = {                      \
+		.attr = { .name = _attrname, .mode = 0644 }, \
+		.show = _name##_show,                                     \
+		.store = _name##_store,                                   \
+	}
+
 /**
  * DEVICE_ATTR_RO - Define a readable device attribute.
  * @_name: Attribute name.
@@ -207,6 +223,21 @@ ssize_t device_show_string(struct device *dev, struct device_attribute *attr,
 #define DEVICE_ATTR_ADMIN_RO(_name) \
 	struct device_attribute dev_attr_##_name = __ATTR_RO_MODE(_name, 0400)
 
+/**
+ * DEVICE_ATTR_RO_NAMED - Define a read-only device attribute with a sysfs name
+ * that differs from the function name.
+ * @_name: Attribute function preface
+ * @_attrname: Attribute name as it wil be exposed in the sysfs.
+ *
+ * Like DEVICE_ATTR_RO(), but allows for reusing names under separate paths in
+ * the same driver.
+ */
+#define DEVICE_ATTR_RO_NAMED(_name, _attrname)                            \
+	struct device_attribute dev_attr_##_name = {                      \
+		.attr = { .name = _attrname, .mode = 0444 }, \
+		.show = _name##_show,                                     \
+	}
+
 /**
  * DEVICE_ATTR_WO - Define an admin-only writable device attribute.
  * @_name: Attribute name.
@@ -216,6 +247,21 @@ ssize_t device_show_string(struct device *dev, struct device_attribute *attr,
 #define DEVICE_ATTR_WO(_name) \
 	struct device_attribute dev_attr_##_name = __ATTR_WO(_name)
 
+/**
+ * DEVICE_ATTR_WO_NAMED - Define a read-only device attribute with a sysfs name
+ * that differs from the function name.
+ * @_name: Attribute function preface
+ * @_attrname: Attribute name as it wil be exposed in the sysfs.
+ *
+ * Like DEVICE_ATTR_WO(), but allows for reusing names under separate paths in
+ * the same driver.
+ */
+#define DEVICE_ATTR_WO_NAMED(_name, _attrname)                            \
+	struct device_attribute dev_attr_##_name = {                      \
+		.attr = { .name = _attrname, .mode = 0200 }, \
+		.store = _name##_store,                                   \
+	}
+
 /**
  * DEVICE_ULONG_ATTR - Define a device attribute backed by an unsigned long.
  * @_name: Attribute name.
-- 
cgit v1.2.3


From 6ca9029c823b7853e980585e757343e0e84227cd Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Tue, 10 Mar 2026 07:29:27 +0000
Subject: HID: Include firmware version in the uevent

Userspace software fwupd probes some HID devices when the daemon starts
up to determine the current firmware version in order to be able to offer
updated firmware if the manufacturer has made it available.

In order to do this fwupd will detach the existing kernel driver if one
is present, send a HID command and then reattach the kernel driver.

This can be problematic if the user is using the HID device at the time
that fwupd probes the hardware and can cause a few frames of input to be
dropped.  In some cases HID drivers already have a command to look up the
firmware version, and so if that is exported to userspace fwupd can
discover it and avoid needing to detach the kernel driver until it's time
to update the device.

Introduce a new member in the struct hid_device for the version and export
a new uevent variable HID_FIRMWARE_VERSION that will display the version
that HID drivers obtained.

Reviewed-by: Derek J. Clark <derekjohn.clark@gmail.com>
Reviewed-by: Mark Pearson <mpearson-lenovo@squebb.ca>
Cc: Richard Hughes <hughsient@gmail.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 drivers/hid/hid-core.c | 5 +++++
 include/linux/hid.h    | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 840a60113868..da57cbf0af26 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2887,6 +2887,11 @@ static int hid_uevent(const struct device *dev, struct kobj_uevent_env *env)
 	if (add_uevent_var(env, "MODALIAS=hid:b%04Xg%04Xv%08Xp%08X",
 			   hdev->bus, hdev->group, hdev->vendor, hdev->product))
 		return -ENOMEM;
+	if (hdev->firmware_version) {
+		if (add_uevent_var(env, "HID_FIRMWARE_VERSION=0x%04llX",
+				   hdev->firmware_version))
+			return -ENOMEM;
+	}
 
 	return 0;
 }
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 2990b9f94cb5..b0b70c05049d 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -698,6 +698,7 @@ struct hid_device {
 	char name[128];							/* Device name */
 	char phys[64];							/* Device physical location */
 	char uniq[64];							/* Device unique identifier (serial #) */
+	u64 firmware_version;						/* Firmware version */
 
 	void *driver_data;
 
-- 
cgit v1.2.3


From 1dfc9d60a69ec148e1cb709256617d86e5f0e8f8 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Date: Thu, 5 Mar 2026 22:45:40 +0100
Subject: workqueue: devres: Add device-managed allocate workqueue

Add a Resource-managed version of alloc_workqueue() to fix common
problem of drivers mixing devm() calls with destroy_workqueue.  Such
naive and discouraged driver approach leads to difficult to debug bugs
when the driver:

1. Allocates workqueue in standard way and destroys it in driver
   remove() callback,
2. Sets work struct with devm_work_autocancel(),
3. Registers interrupt handler with devm_request_threaded_irq().

Which leads to following unbind/removal path:

1. destroy_workqueue() via driver remove(),
   Any interrupt coming now would still execute the interrupt handler,
   which queues work on destroyed workqueue.
2. devm_irq_release(),
3. devm_work_drop() -> cancel_work_sync() on destroyed workqueue.

devm_alloc_workqueue() has two benefits:
1. Solves above problem of mix-and-match devres and non-devres code in
   driver,
2. Simplify any sane drivers which were correctly using
   alloc_workqueue() + devm_add_action_or_reset().

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 Documentation/driver-api/driver-model/devres.rst |  4 ++++
 include/linux/workqueue.h                        | 22 +++++++++++++++++++
 kernel/workqueue.c                               | 28 ++++++++++++++++++++++++
 3 files changed, 54 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
index 7d2b897d66fa..017fb155a5bc 100644
--- a/Documentation/driver-api/driver-model/devres.rst
+++ b/Documentation/driver-api/driver-model/devres.rst
@@ -464,3 +464,7 @@ SPI
 
 WATCHDOG
   devm_watchdog_register_device()
+
+WORKQUEUE
+  devm_alloc_workqueue()
+  devm_alloc_ordered_workqueue()
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a4749f56398f..f8d235aef10d 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -512,6 +512,26 @@ __printf(1, 4) struct workqueue_struct *
 alloc_workqueue_noprof(const char *fmt, unsigned int flags, int max_active, ...);
 #define alloc_workqueue(...)	alloc_hooks(alloc_workqueue_noprof(__VA_ARGS__))
 
+/**
+ * devm_alloc_workqueue - Resource-managed allocate a workqueue
+ * @dev: Device to allocate workqueue for
+ * @fmt: printf format for the name of the workqueue
+ * @flags: WQ_* flags
+ * @max_active: max in-flight work items, 0 for default
+ * @...: args for @fmt
+ *
+ * Resource managed workqueue, see alloc_workqueue() for details.
+ *
+ * The workqueue will be automatically destroyed on driver detach.  Typically
+ * this should be used in drivers already relying on devm interafaces.
+ *
+ * RETURNS:
+ * Pointer to the allocated workqueue on success, %NULL on failure.
+ */
+__printf(2, 5) struct workqueue_struct *
+devm_alloc_workqueue(struct device *dev, const char *fmt, unsigned int flags,
+		     int max_active, ...);
+
 #ifdef CONFIG_LOCKDEP
 /**
  * alloc_workqueue_lockdep_map - allocate a workqueue with user-defined lockdep_map
@@ -568,6 +588,8 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active,
  */
 #define alloc_ordered_workqueue(fmt, flags, args...)			\
 	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
+#define devm_alloc_ordered_workqueue(dev, fmt, flags, args...)		\
+	devm_alloc_workqueue(dev, fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
 
 #define create_workqueue(name)						\
 	alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_PERCPU, 1, (name))
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index aeaec79bc09c..19d20f3039d9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -41,6 +41,7 @@
 #include <linux/mempolicy.h>
 #include <linux/freezer.h>
 #include <linux/debug_locks.h>
+#include <linux/device/devres.h>
 #include <linux/lockdep.h>
 #include <linux/idr.h>
 #include <linux/jhash.h>
@@ -5891,6 +5892,33 @@ struct workqueue_struct *alloc_workqueue_noprof(const char *fmt,
 }
 EXPORT_SYMBOL_GPL(alloc_workqueue_noprof);
 
+static void devm_workqueue_release(void *res)
+{
+	destroy_workqueue(res);
+}
+
+__printf(2, 5) struct workqueue_struct *
+devm_alloc_workqueue(struct device *dev, const char *fmt, unsigned int flags,
+		     int max_active, ...)
+{
+	struct workqueue_struct *wq;
+	va_list args;
+	int ret;
+
+	va_start(args, max_active);
+	wq = alloc_workqueue(fmt, flags, max_active, args);
+	va_end(args);
+	if (!wq)
+		return NULL;
+
+	ret = devm_add_action_or_reset(dev, devm_workqueue_release, wq);
+	if (ret)
+		return NULL;
+
+	return wq;
+}
+EXPORT_SYMBOL_GPL(devm_alloc_workqueue);
+
 #ifdef CONFIG_LOCKDEP
 __printf(1, 5)
 struct workqueue_struct *
-- 
cgit v1.2.3


From 5a8103a6fb0ae9cf99c0271b17474468d6bae2b2 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 4 Mar 2026 18:21:57 +0100
Subject: genirq: Document interaction between <linux/irq.h> and DT binding
 defines

Document that the DT binding definitions in
<dt-bindings/interrupt-controller/irq.h> shadow the first six IRQ_TYPE_*
definitions in <linux/irq.h>.  The values must be the same anyway, so this
is harmless (as long as the latter is included first when both are
included), but it is good to document this explicitly.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/fbcc65dcee6c5437fab5ef18d21766bb4effb7cb.1772644406.git.geert+renesas@glider.be
---
 include/linux/irq.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 951acbdb9f84..efa514ee562f 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -35,6 +35,10 @@ enum irqchip_irq_state;
  *
  * Bits 0-7 are the same as the IRQF_* bits in linux/interrupt.h
  *
+ * Note that the first 6 definitions are shadowed by C preprocessor definitions
+ * in include/dt-bindings/interrupt-controller/irq.h.  This is not an issue, as
+ * the actual values must be the same, due to being part of the stable DT ABI.
+ *
  * IRQ_TYPE_NONE		- default, unspecified type
  * IRQ_TYPE_EDGE_RISING		- rising edge triggered
  * IRQ_TYPE_EDGE_FALLING	- falling edge triggered
-- 
cgit v1.2.3


From 360160f75592bdc85edba8fe78fb20d90924c7e8 Mon Sep 17 00:00:00 2001
From: Ricardo Robaina <rrobaina@redhat.com>
Date: Mon, 9 Mar 2026 10:05:33 -0300
Subject: audit: handle unknown status requests in audit_receive_msg()

Currently, audit_receive_msg() ignores unknown status bits in AUDIT_SET
requests, incorrectly returning success to newer user space tools
querying unsupported features. This breaks forward compatibility.

Fix this by defining AUDIT_STATUS_ALL and returning -EINVAL if any
unrecognized bits are set (s.mask & ~AUDIT_STATUS_ALL).
This ensures invalid requests are safely rejected, allowing user space
to reliably test for and gracefully handle feature detection on older
kernels.

Suggested-by: Steve Grubb <sgrubb@redhat.com>
Signed-off-by: Ricardo Robaina <rrobaina@redhat.com>
[PM: subject line tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/audit.h | 9 +++++++++
 kernel/audit.c        | 2 ++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index b642b5faca65..d79218bf075a 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -15,6 +15,15 @@
 #include <uapi/linux/audit.h>
 #include <uapi/linux/fanotify.h>
 
+#define AUDIT_STATUS_ALL (AUDIT_STATUS_ENABLED | \
+			  AUDIT_STATUS_FAILURE | \
+			  AUDIT_STATUS_PID | \
+			  AUDIT_STATUS_RATE_LIMIT | \
+			  AUDIT_STATUS_BACKLOG_LIMIT | \
+			  AUDIT_STATUS_BACKLOG_WAIT_TIME | \
+			  AUDIT_STATUS_LOST | \
+			  AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL)
+
 #define AUDIT_INO_UNSET ((unsigned long)-1)
 #define AUDIT_DEV_UNSET ((dev_t)-1)
 
diff --git a/kernel/audit.c b/kernel/audit.c
index 08793e71b975..e1d489bc2dff 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1295,6 +1295,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 		memset(&s, 0, sizeof(s));
 		/* guard against past and future API changes */
 		memcpy(&s, data, min_t(size_t, sizeof(s), data_len));
+		if (s.mask & ~AUDIT_STATUS_ALL)
+			return -EINVAL;
 		if (s.mask & AUDIT_STATUS_ENABLED) {
 			err = audit_set_enabled(s.enabled);
 			if (err < 0)
-- 
cgit v1.2.3


From 7a6387dec8cee5a237dc5092269e97028f5a983b Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Mon, 9 Mar 2026 09:39:18 +0000
Subject: net: stmmac: provide plat_dat->dma_cfg in stmmac_plat_dat_alloc()

plat_dat->dma_cfg is unconditionally required for the operation of the
driver, so it would make sense to allocate it along with the plat_dat.

On Arm64, sizeof(*plat_dat) has recently shrunk from 880 to 816 bytes
and sizeof(*plat_dat->dma_cfg) has shrunk from 32 to 20 bytes.

Given that dma_cfg is required, and it is now less than a cache line,
It doesn't make sense to allocate this separateny, so place it at the
end of struct plat_stmmacenet_data, and set plat_dat->dma_cfg to point
at that to avoid mass changes.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Mohd Ayaan Anwar <mohd.anwar@oss.qualcomm.com>
Tested-by: Mohd Ayaan Anwar <mohd.anwar@oss.qualcomm.com>
Link: https://patch.msgid.link/E1vzX54-0000000CVrw-2jfu@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c     | 5 -----
 drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c  | 4 ----
 drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c | 4 ----
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c     | 2 ++
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c      | 5 -----
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 8 +-------
 include/linux/stmmac.h                                | 1 +
 7 files changed, 4 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index fc13bfb47783..0b32560cd059 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -1251,11 +1251,6 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 	if (!plat->mdio_bus_data)
 		return -ENOMEM;
 
-	plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg),
-				     GFP_KERNEL);
-	if (!plat->dma_cfg)
-		return -ENOMEM;
-
 	plat->safety_feat_cfg = devm_kzalloc(&pdev->dev,
 					     sizeof(*plat->safety_feat_cfg),
 					     GFP_KERNEL);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
index ada6c6ef1f5c..51b1562f84d1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
@@ -513,10 +513,6 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
 	if (!plat->mdio_bus_data)
 		return -ENOMEM;
 
-	plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), GFP_KERNEL);
-	if (!plat->dma_cfg)
-		return -ENOMEM;
-
 	ld = devm_kzalloc(&pdev->dev, sizeof(*ld), GFP_KERNEL);
 	if (!ld)
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c
index 8b45b9cf7202..d245546b90db 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c
@@ -218,10 +218,6 @@ motorcomm_default_plat_data(struct pci_dev *pdev)
 	if (!plat->mdio_bus_data)
 		return NULL;
 
-	plat->dma_cfg = devm_kzalloc(dev, sizeof(*plat->dma_cfg), GFP_KERNEL);
-	if (!plat->dma_cfg)
-		return NULL;
-
 	plat->axi = devm_kzalloc(dev, sizeof(*plat->axi), GFP_KERNEL);
 	if (!plat->axi)
 		return NULL;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f0160ff54a59..87f43811faa0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -7730,6 +7730,8 @@ struct plat_stmmacenet_data *stmmac_plat_dat_alloc(struct device *dev)
 	if (!plat_dat)
 		return NULL;
 
+	plat_dat->dma_cfg = &plat_dat->__dma_cfg;
+
 	/* Set the defaults:
 	 * - phy autodetection
 	 * - determine GMII_Address CR field from CSR clock
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 270ad066ced3..836fed7d60ab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -134,11 +134,6 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
 	if (!plat->mdio_bus_data)
 		return -ENOMEM;
 
-	plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg),
-				     GFP_KERNEL);
-	if (!plat->dma_cfg)
-		return -ENOMEM;
-
 	plat->safety_feat_cfg = devm_kzalloc(&pdev->dev,
 					     sizeof(*plat->safety_feat_cfg),
 					     GFP_KERNEL);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index c34998486293..1aed48fe0db6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -548,13 +548,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
 				     &plat->multicast_filter_bins);
 	}
 
-	dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*dma_cfg),
-			       GFP_KERNEL);
-	if (!dma_cfg) {
-		ret = ERR_PTR(-ENOMEM);
-		goto error_put_mdio;
-	}
-	plat->dma_cfg = dma_cfg;
+	dma_cfg = plat->dma_cfg;
 
 	of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl);
 	if (!dma_cfg->pbl)
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 965ada809fdf..919196713c05 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -306,5 +306,6 @@ struct plat_stmmacenet_data {
 	int msi_tx_base_vec;
 	const struct dwmac4_addrs *dwmac4_addrs;
 	unsigned int flags;
+	struct stmmac_dma_cfg __dma_cfg;
 };
 #endif
-- 
cgit v1.2.3


From c3d08424e025aaac8fb54134f76e611ef919cd08 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Mon, 9 Mar 2026 09:39:23 +0000
Subject: net: stmmac: convert plat_stmmacenet_data booleans to type bool

Convert members of struct plat_stmmacenet_data that are booleans to
type 'bool' and ensure their initialisers are true/false. Move the
has_xxx for the GMAC cores together, and move the COE members to the
end of the list of bool to avoid unused holes in the struct.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Mohd Ayaan Anwar <mohd.anwar@oss.qualcomm.com>
Tested-by: Mohd Ayaan Anwar <mohd.anwar@oss.qualcomm.com>
Link: https://patch.msgid.link/E1vzX59-0000000CVs2-3MHc@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c  |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c    |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c |  2 +-
 .../net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c  |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c      |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c  |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c    |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c    |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c    |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c    |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c     |  6 +++---
 .../net/ethernet/stmicro/stmmac/stmmac_platform.c    | 20 ++++++++++----------
 include/linux/stmmac.h                               | 14 +++++++-------
 14 files changed, 31 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index 0495437d3a6e..b0c5d1ecabce 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -88,7 +88,7 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
 	plat_dat->core_type = DWMAC_CORE_GMAC4;
 	plat_dat->dma_cfg->aal = 1;
 	plat_dat->flags |= STMMAC_FLAG_TSO_EN;
-	plat_dat->pmt = 1;
+	plat_dat->pmt = true;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 0b32560cd059..421c6c81ca5e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -566,7 +566,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat)
 	/* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
 	plat->clk_csr = STMMAC_CSR_20_35M;
 	plat->core_type = DWMAC_CORE_GMAC;
-	plat->force_sf_dma_mode = 1;
+	plat->force_sf_dma_mode = true;
 
 	plat->mdio_bus_data->needs_reset = true;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
index 51b1562f84d1..eb14c197d6ae 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
@@ -94,7 +94,7 @@ static void loongson_default_data(struct pci_dev *pdev,
 	/* clk_csr_i = 100-150MHz & MDC = clk_csr_i/62 */
 	plat->clk_csr = STMMAC_CSR_100_150M;
 	plat->core_type = DWMAC_CORE_GMAC;
-	plat->force_sf_dma_mode = 1;
+	plat->force_sf_dma_mode = true;
 
 	/* Increase the default value for multicast hash bins */
 	plat->multicast_filter_bins = 256;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index 1f2d7d19ca56..a139db6a8cbb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -564,7 +564,7 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev,
 		plat->flags &= ~STMMAC_FLAG_USE_PHY_WOL;
 	else
 		plat->flags |= STMMAC_FLAG_USE_PHY_WOL;
-	plat->riwt_off = 1;
+	plat->riwt_off = true;
 	plat->maxmtu = ETH_DATA_LEN;
 	plat->host_dma_width = priv_plat->variant->dma_bit_mask;
 	plat->bsp_priv = priv_plat;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index cb1c074c2053..388e9fdeb86c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -817,7 +817,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
 	plat_dat->core_type = DWMAC_CORE_GMAC4;
 	if (ethqos->has_emac_ge_3)
 		plat_dat->dwmac4_addrs = &data->dwmac4_addrs;
-	plat_dat->pmt = 1;
+	plat_dat->pmt = true;
 	if (of_property_read_bool(np, "snps,tso"))
 		plat_dat->flags |= STMMAC_FLAG_TSO_EN;
 	if (of_device_is_compatible(np, "qcom,qcs404-ethqos"))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c
index af594a096676..48fceadc55b1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c
@@ -163,7 +163,7 @@ static int s32_dwmac_probe(struct platform_device *pdev)
 
 	/* S32CC core feature set */
 	plat->core_type = DWMAC_CORE_GMAC4;
-	plat->pmt = 1;
+	plat->pmt = true;
 	plat->flags |= STMMAC_FLAG_SPH_DISABLE;
 	plat->rx_fifo_size = 20480;
 	plat->tx_fifo_size = 20480;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index c6b99814d391..5f89fd968ae9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -565,7 +565,7 @@ static void socfpga_gen5_setup_plat_dat(struct socfpga_dwmac *dwmac)
 	plat_dat->core_type = DWMAC_CORE_GMAC;
 
 	/* Rx watchdog timer in dwmac is buggy in this hw */
-	plat_dat->riwt_off = 1;
+	plat_dat->riwt_off = true;
 }
 
 static void socfpga_agilex5_setup_plat_dat(struct socfpga_dwmac *dwmac)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 3ce03b059277..6dbe5d5a3224 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -1179,7 +1179,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	 * hardware features were copied from Allwinner drivers.
 	 */
 	plat_dat->rx_coe = STMMAC_RX_COE_TYPE2;
-	plat_dat->tx_coe = 1;
+	plat_dat->tx_coe = true;
 	plat_dat->flags |= STMMAC_FLAG_HAS_SUN8I;
 	plat_dat->bsp_priv = gmac;
 	plat_dat->init = sun8i_dwmac_init;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index 52593ba3a3a3..74bd996d93c9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
@@ -135,7 +135,7 @@ static int sun7i_gmac_probe(struct platform_device *pdev)
 
 	/* platform data specifying hardware features and callbacks.
 	 * hardware features were copied from Allwinner drivers. */
-	plat_dat->tx_coe = 1;
+	plat_dat->tx_coe = true;
 	plat_dat->core_type = DWMAC_CORE_GMAC;
 	plat_dat->bsp_priv = gmac;
 	plat_dat->init = sun7i_gmac_init;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
index d765acbe3754..b4b39e6a169e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
@@ -310,7 +310,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
 
 	plat->core_type = DWMAC_CORE_XGMAC;
 	plat->flags |= STMMAC_FLAG_TSO_EN;
-	plat->pmt = 1;
+	plat->pmt = true;
 	plat->bsp_priv = mgbe;
 
 	if (!plat->mdio_node)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 87f43811faa0..939431255fa5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -7401,7 +7401,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 
 		/* TXCOE doesn't work in thresh DMA mode */
 		if (priv->plat->force_thresh_dma_mode)
-			priv->plat->tx_coe = 0;
+			priv->plat->tx_coe = false;
 		else
 			priv->plat->tx_coe = priv->dma_cap.tx_coe;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 836fed7d60ab..d584fd2daa6f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -25,7 +25,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat)
 	/* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
 	plat->clk_csr = STMMAC_CSR_20_35M;
 	plat->core_type = DWMAC_CORE_GMAC;
-	plat->force_sf_dma_mode = 1;
+	plat->force_sf_dma_mode = true;
 
 	plat->mdio_bus_data->needs_reset = true;
 }
@@ -58,9 +58,9 @@ static int snps_gmac5_default_data(struct pci_dev *pdev,
 
 	plat->clk_csr = STMMAC_CSR_250_300M;
 	plat->core_type = DWMAC_CORE_GMAC4;
-	plat->force_sf_dma_mode = 1;
+	plat->force_sf_dma_mode = true;
 	plat->flags |= STMMAC_FLAG_TSO_EN;
-	plat->pmt = 1;
+	plat->pmt = true;
 
 	/* Set default number of RX and TX queues to use */
 	plat->tx_queues_to_use = 4;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 1aed48fe0db6..0d3bad0f8915 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -514,34 +514,34 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
 		plat->multicast_filter_bins = dwmac1000_validate_mcast_bins(
 				&pdev->dev, plat->multicast_filter_bins);
 		plat->core_type = DWMAC_CORE_GMAC;
-		plat->pmt = 1;
+		plat->pmt = true;
 	}
 
 	if (of_device_is_compatible(np, "snps,dwmac-3.40a")) {
 		plat->core_type = DWMAC_CORE_GMAC;
-		plat->enh_desc = 1;
-		plat->tx_coe = 1;
-		plat->bugged_jumbo = 1;
-		plat->pmt = 1;
+		plat->enh_desc = true;
+		plat->tx_coe = true;
+		plat->bugged_jumbo = true;
+		plat->pmt = true;
 	}
 
 	if (of_device_compatible_match(np, stmmac_gmac4_compats)) {
 		plat->core_type = DWMAC_CORE_GMAC4;
-		plat->pmt = 1;
+		plat->pmt = true;
 		if (of_property_read_bool(np, "snps,tso"))
 			plat->flags |= STMMAC_FLAG_TSO_EN;
 	}
 
 	if (of_device_is_compatible(np, "snps,dwmac-3.610") ||
 		of_device_is_compatible(np, "snps,dwmac-3.710")) {
-		plat->enh_desc = 1;
-		plat->bugged_jumbo = 1;
-		plat->force_sf_dma_mode = 1;
+		plat->enh_desc = true;
+		plat->bugged_jumbo = true;
+		plat->force_sf_dma_mode = true;
 	}
 
 	if (of_device_is_compatible(np, "snps,dwxgmac")) {
 		plat->core_type = DWMAC_CORE_XGMAC;
-		plat->pmt = 1;
+		plat->pmt = true;
 		if (of_property_read_bool(np, "snps,tso"))
 			plat->flags |= STMMAC_FLAG_TSO_EN;
 		of_property_read_u32(np, "snps,multicast-filter-bins",
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 919196713c05..9420da96a4ff 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -229,14 +229,14 @@ struct plat_stmmacenet_data {
 	struct stmmac_dma_cfg *dma_cfg;
 	struct stmmac_safety_feature_cfg *safety_feat_cfg;
 	int clk_csr;
-	int enh_desc;
-	int tx_coe;
+	bool enh_desc;
+	bool tx_coe;
+	bool bugged_jumbo;
+	bool pmt;
+	bool force_sf_dma_mode;
+	bool force_thresh_dma_mode;
+	bool riwt_off;
 	int rx_coe;
-	int bugged_jumbo;
-	int pmt;
-	int force_sf_dma_mode;
-	int force_thresh_dma_mode;
-	int riwt_off;
 	int max_speed;
 	int maxmtu;
 	int multicast_filter_bins;
-- 
cgit v1.2.3


From 3357642e65e9454c3da64b62c0ed987ee4010008 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Mon, 9 Mar 2026 09:39:28 +0000
Subject: net: stmmac: reorder structs to reduce memory consumption

Reorder some of the stmmac structures to allow them to pack better,
thereby using less memory. On aarch64, sizeof(struct stmmac_priv)
was 880, and with this change becomes 816, saving 64 bytes, which
is an 8% saving.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Tested-by: Mohd Ayaan Anwar <mohd.anwar@oss.qualcomm.com>
Link: https://patch.msgid.link/E1vzX5E-0000000CVs8-40w4@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 9420da96a4ff..411cdd3ea034 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -108,37 +108,37 @@ struct stmmac_dma_cfg {
 
 #define AXI_BLEN	7
 struct stmmac_axi {
-	bool axi_lpi_en;
-	bool axi_xit_frm;
 	u32 axi_wr_osr_lmt;
 	u32 axi_rd_osr_lmt;
-	bool axi_kbbe;
 	u32 axi_blen_regval;
+	bool axi_lpi_en;
+	bool axi_xit_frm;
+	bool axi_kbbe;
 	bool axi_fb;
 	bool axi_mb;
 	bool axi_rb;
 };
 
 struct stmmac_rxq_cfg {
-	u8 mode_to_use;
 	u32 chan;
+	u32 prio;
+	u8 mode_to_use;
 	u8 pkt_route;
 	bool use_prio;
-	u32 prio;
 };
 
 struct stmmac_txq_cfg {
 	u32 weight;
-	bool coe_unsupported;
-	u8 mode_to_use;
 	/* Credit Base Shaper parameters */
 	u32 send_slope;
 	u32 idle_slope;
 	u32 high_credit;
 	u32 low_credit;
-	bool use_prio;
 	u32 prio;
 	int tbs_en;
+	bool use_prio;
+	bool coe_unsupported;
+	u8 mode_to_use;
 };
 
 struct stmmac_safety_feature_cfg {
-- 
cgit v1.2.3


From 94808793fed71ee47741df0923d353024b6904ff Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Mon, 9 Mar 2026 09:39:34 +0000
Subject: net: stmmac: use u8 for ?x_queues_to_use and number_?x_queues

The maximum number of queues is a compile time constant of only eight.
This makes using a 32-bit quantity wastefulf. Instead, use u8 for
these and their associated variables.

When reading the DT properties, saturdate at U8_MAX. Provided the core
provides DMA capabilities to describe the number of queues, this will
be capped by stmmac_hw_init() with a warning.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Tested-by: Mohd Ayaan Anwar <mohd.anwar@oss.qualcomm.com>
Link: https://patch.msgid.link/E1vzX5K-0000000CVsE-0J0Y@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/common.h       |   4 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c  |   2 +-
 .../net/ethernet/stmicro/stmmac/dwmac1000_core.c   |   2 +-
 .../net/ethernet/stmicro/stmmac/dwmac100_core.c    |   2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c  |   4 +-
 .../net/ethernet/stmicro/stmmac/dwxgmac2_core.c    |   4 +-
 drivers/net/ethernet/stmicro/stmmac/hwif.h         |   2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |   2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 223 +++++++++++----------
 .../net/ethernet/stmicro/stmmac/stmmac_platform.c  |  15 +-
 include/linux/stmmac.h                             |   4 +-
 11 files changed, 136 insertions(+), 128 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 46454e2886ce..f1628de8ed18 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -446,8 +446,8 @@ struct dma_features {
 	unsigned int number_rx_channel;
 	unsigned int number_tx_channel;
 	/* TX and RX number of queues */
-	unsigned int number_rx_queues;
-	unsigned int number_tx_queues;
+	u8 number_rx_queues;
+	u8 number_tx_queues;
 	/* PPS output */
 	unsigned int pps_out_num;
 	/* Number of Traffic Classes */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 6dbe5d5a3224..48c52eb96233 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -718,7 +718,7 @@ static void sun8i_dwmac_set_filter(struct mac_device_info *hw,
 
 static void sun8i_dwmac_flow_ctrl(struct mac_device_info *hw,
 				  unsigned int duplex, unsigned int fc,
-				  unsigned int pause_time, u32 tx_cnt)
+				  unsigned int pause_time, u8 tx_cnt)
 {
 	void __iomem *ioaddr = hw->pcsr;
 	u32 v;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index c7cb30672604..01f8353eb6ef 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -222,7 +222,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw,
 
 static void dwmac1000_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
 				unsigned int fc, unsigned int pause_time,
-				u32 tx_cnt)
+				u8 tx_cnt)
 {
 	void __iomem *ioaddr = hw->pcsr;
 	/* Set flow such that DZPQ in Mac Register 6 is 0,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index 6b5cf3a0866a..94d24d355d95 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -126,7 +126,7 @@ static void dwmac100_set_filter(struct mac_device_info *hw,
 
 static void dwmac100_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
 			       unsigned int fc, unsigned int pause_time,
-			       u32 tx_cnt)
+			       u8 tx_cnt)
 {
 	void __iomem *ioaddr = hw->pcsr;
 	unsigned int flow = MAC_FLOW_CTRL_ENABLE;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index e6bcb77b22a2..4c6fed3ecbcf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -547,11 +547,11 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
 
 static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
 			     unsigned int fc, unsigned int pause_time,
-			     u32 tx_cnt)
+			     u8 tx_cnt)
 {
 	void __iomem *ioaddr = hw->pcsr;
 	unsigned int flow = 0;
-	u32 queue = 0;
+	u8 queue;
 
 	pr_debug("GMAC Flow-Control:\n");
 	if (fc & FLOW_RX) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index efa76b147f9e..f02b434bbd50 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -355,10 +355,10 @@ static int dwxgmac2_host_mtl_irq_status(struct stmmac_priv *priv,
 
 static void dwxgmac2_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
 			       unsigned int fc, unsigned int pause_time,
-			       u32 tx_cnt)
+			       u8 tx_cnt)
 {
 	void __iomem *ioaddr = hw->pcsr;
-	u32 i;
+	u8 i;
 
 	if (fc & FLOW_RX)
 		writel(XGMAC_RFE, ioaddr + XGMAC_RX_FLOW_CTRL);
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 374f326efa01..010b4d32484a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -352,7 +352,7 @@ struct stmmac_ops {
 	void (*set_filter)(struct mac_device_info *hw, struct net_device *dev);
 	/* Flow control setting */
 	void (*flow_ctrl)(struct mac_device_info *hw, unsigned int duplex,
-			  unsigned int fc, unsigned int pause_time, u32 tx_cnt);
+			  unsigned int fc, unsigned int pause_time, u8 tx_cnt);
 	/* Set power management mode (e.g. magic frame) */
 	void (*pmt)(struct mac_device_info *hw, unsigned long mode);
 	/* Set/Get Unicast MAC addresses */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 335e60439b42..bba9bb9c95bf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -407,7 +407,7 @@ void stmmac_dvr_remove(struct device *dev);
 int stmmac_dvr_probe(struct device *device,
 		     struct plat_stmmacenet_data *plat_dat,
 		     struct stmmac_resources *res);
-int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt);
+int stmmac_reinit_queues(struct net_device *dev, u8 rx_cnt, u8 tx_cnt);
 int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size);
 int stmmac_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i,
 			   phy_interface_t interface, int speed);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 939431255fa5..11150bddd872 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -264,10 +264,10 @@ static void stmmac_verify_args(void)
 
 static void __stmmac_disable_all_queues(struct stmmac_priv *priv)
 {
-	u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
-	u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
-	u32 maxq = max(rx_queues_cnt, tx_queues_cnt);
-	u32 queue;
+	u8 rx_queues_cnt = priv->plat->rx_queues_to_use;
+	u8 tx_queues_cnt = priv->plat->tx_queues_to_use;
+	u8 maxq = max(rx_queues_cnt, tx_queues_cnt);
+	u8 queue;
 
 	for (queue = 0; queue < maxq; queue++) {
 		struct stmmac_channel *ch = &priv->channel[queue];
@@ -291,9 +291,9 @@ static void __stmmac_disable_all_queues(struct stmmac_priv *priv)
  */
 static void stmmac_disable_all_queues(struct stmmac_priv *priv)
 {
-	u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+	u8 rx_queues_cnt = priv->plat->rx_queues_to_use;
 	struct stmmac_rx_queue *rx_q;
-	u32 queue;
+	u8 queue;
 
 	/* synchronize_rcu() needed for pending XDP buffers to drain */
 	for (queue = 0; queue < rx_queues_cnt; queue++) {
@@ -313,10 +313,10 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv)
  */
 static void stmmac_enable_all_queues(struct stmmac_priv *priv)
 {
-	u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
-	u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
-	u32 maxq = max(rx_queues_cnt, tx_queues_cnt);
-	u32 queue;
+	u8 rx_queues_cnt = priv->plat->rx_queues_to_use;
+	u8 tx_queues_cnt = priv->plat->tx_queues_to_use;
+	u8 maxq = max(rx_queues_cnt, tx_queues_cnt);
+	u8 queue;
 
 	for (queue = 0; queue < maxq; queue++) {
 		struct stmmac_channel *ch = &priv->channel[queue];
@@ -377,8 +377,8 @@ static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue)
 
 static bool stmmac_eee_tx_busy(struct stmmac_priv *priv)
 {
-	u32 tx_cnt = priv->plat->tx_queues_to_use;
-	u32 queue;
+	u8 tx_cnt = priv->plat->tx_queues_to_use;
+	u8 queue;
 
 	/* check if all TX queues have the work finished */
 	for (queue = 0; queue < tx_cnt; queue++) {
@@ -909,7 +909,7 @@ static int stmmac_legacy_serdes_power_up(struct stmmac_priv *priv)
 static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex,
 				 unsigned int flow_ctrl)
 {
-	u32 tx_cnt = priv->plat->tx_queues_to_use;
+	u8 tx_cnt = priv->plat->tx_queues_to_use;
 
 	stmmac_flow_ctrl(priv, priv->hw, duplex, flow_ctrl, priv->pause_time,
 			 tx_cnt);
@@ -1410,10 +1410,10 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv)
 static void stmmac_display_rx_rings(struct stmmac_priv *priv,
 				    struct stmmac_dma_conf *dma_conf)
 {
-	u32 rx_cnt = priv->plat->rx_queues_to_use;
+	u8 rx_cnt = priv->plat->rx_queues_to_use;
 	unsigned int desc_size;
 	void *head_rx;
-	u32 queue;
+	u8 queue;
 
 	/* Display RX rings */
 	for (queue = 0; queue < rx_cnt; queue++) {
@@ -1438,10 +1438,10 @@ static void stmmac_display_rx_rings(struct stmmac_priv *priv,
 static void stmmac_display_tx_rings(struct stmmac_priv *priv,
 				    struct stmmac_dma_conf *dma_conf)
 {
-	u32 tx_cnt = priv->plat->tx_queues_to_use;
+	u8 tx_cnt = priv->plat->tx_queues_to_use;
 	unsigned int desc_size;
 	void *head_tx;
-	u32 queue;
+	u8 queue;
 
 	/* Display TX rings */
 	for (queue = 0; queue < tx_cnt; queue++) {
@@ -1571,9 +1571,9 @@ static void stmmac_clear_tx_descriptors(struct stmmac_priv *priv,
 static void stmmac_clear_descriptors(struct stmmac_priv *priv,
 				     struct stmmac_dma_conf *dma_conf)
 {
-	u32 rx_queue_cnt = priv->plat->rx_queues_to_use;
-	u32 tx_queue_cnt = priv->plat->tx_queues_to_use;
-	u32 queue;
+	u8 rx_queue_cnt = priv->plat->rx_queues_to_use;
+	u8 tx_queue_cnt = priv->plat->tx_queues_to_use;
+	u8 queue;
 
 	/* Clear the RX descriptors */
 	for (queue = 0; queue < rx_queue_cnt; queue++)
@@ -1891,7 +1891,7 @@ static int init_dma_rx_desc_rings(struct net_device *dev,
 				  gfp_t flags)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 rx_count = priv->plat->rx_queues_to_use;
+	u8 rx_count = priv->plat->rx_queues_to_use;
 	int queue;
 	int ret;
 
@@ -1985,8 +1985,8 @@ static int init_dma_tx_desc_rings(struct net_device *dev,
 				  struct stmmac_dma_conf *dma_conf)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 tx_queue_cnt;
-	u32 queue;
+	u8 tx_queue_cnt;
+	u8 queue;
 
 	tx_queue_cnt = priv->plat->tx_queues_to_use;
 
@@ -2057,8 +2057,8 @@ static void dma_free_tx_skbufs(struct stmmac_priv *priv,
  */
 static void stmmac_free_tx_skbufs(struct stmmac_priv *priv)
 {
-	u32 tx_queue_cnt = priv->plat->tx_queues_to_use;
-	u32 queue;
+	u8 tx_queue_cnt = priv->plat->tx_queues_to_use;
+	u8 queue;
 
 	for (queue = 0; queue < tx_queue_cnt; queue++)
 		dma_free_tx_skbufs(priv, &priv->dma_conf, queue);
@@ -2106,8 +2106,8 @@ static void __free_dma_rx_desc_resources(struct stmmac_priv *priv,
 static void free_dma_rx_desc_resources(struct stmmac_priv *priv,
 				       struct stmmac_dma_conf *dma_conf)
 {
-	u32 rx_count = priv->plat->rx_queues_to_use;
-	u32 queue;
+	u8 rx_count = priv->plat->rx_queues_to_use;
+	u8 queue;
 
 	/* Free RX queue resources */
 	for (queue = 0; queue < rx_count; queue++)
@@ -2153,8 +2153,8 @@ static void __free_dma_tx_desc_resources(struct stmmac_priv *priv,
 static void free_dma_tx_desc_resources(struct stmmac_priv *priv,
 				       struct stmmac_dma_conf *dma_conf)
 {
-	u32 tx_count = priv->plat->tx_queues_to_use;
-	u32 queue;
+	u8 tx_count = priv->plat->tx_queues_to_use;
+	u8 queue;
 
 	/* Free TX queue resources */
 	for (queue = 0; queue < tx_count; queue++)
@@ -2255,8 +2255,8 @@ static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv,
 static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv,
 				       struct stmmac_dma_conf *dma_conf)
 {
-	u32 rx_count = priv->plat->rx_queues_to_use;
-	u32 queue;
+	u8 rx_count = priv->plat->rx_queues_to_use;
+	u8 queue;
 	int ret;
 
 	/* RX queues buffers and DMA */
@@ -2331,8 +2331,8 @@ static int __alloc_dma_tx_desc_resources(struct stmmac_priv *priv,
 static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv,
 				       struct stmmac_dma_conf *dma_conf)
 {
-	u32 tx_count = priv->plat->tx_queues_to_use;
-	u32 queue;
+	u8 tx_count = priv->plat->tx_queues_to_use;
+	u8 queue;
 	int ret;
 
 	/* TX queues buffers and DMA */
@@ -2396,8 +2396,8 @@ static void free_dma_desc_resources(struct stmmac_priv *priv,
  */
 static void stmmac_mac_enable_rx_queues(struct stmmac_priv *priv)
 {
-	u32 rx_queues_count = priv->plat->rx_queues_to_use;
-	int queue;
+	u8 rx_queues_count = priv->plat->rx_queues_to_use;
+	u8 queue;
 	u8 mode;
 
 	for (queue = 0; queue < rx_queues_count; queue++) {
@@ -2460,10 +2460,10 @@ static void stmmac_stop_tx_dma(struct stmmac_priv *priv, u32 chan)
 
 static void stmmac_enable_all_dma_irq(struct stmmac_priv *priv)
 {
-	u32 rx_channels_count = priv->plat->rx_queues_to_use;
-	u32 tx_channels_count = priv->plat->tx_queues_to_use;
-	u32 dma_csr_ch = max(rx_channels_count, tx_channels_count);
-	u32 chan;
+	u8 rx_channels_count = priv->plat->rx_queues_to_use;
+	u8 tx_channels_count = priv->plat->tx_queues_to_use;
+	u8 dma_csr_ch = max(rx_channels_count, tx_channels_count);
+	u8 chan;
 
 	for (chan = 0; chan < dma_csr_ch; chan++) {
 		struct stmmac_channel *ch = &priv->channel[chan];
@@ -2483,9 +2483,9 @@ static void stmmac_enable_all_dma_irq(struct stmmac_priv *priv)
  */
 static void stmmac_start_all_dma(struct stmmac_priv *priv)
 {
-	u32 rx_channels_count = priv->plat->rx_queues_to_use;
-	u32 tx_channels_count = priv->plat->tx_queues_to_use;
-	u32 chan = 0;
+	u8 rx_channels_count = priv->plat->rx_queues_to_use;
+	u8 tx_channels_count = priv->plat->tx_queues_to_use;
+	u8 chan;
 
 	for (chan = 0; chan < rx_channels_count; chan++)
 		stmmac_start_rx_dma(priv, chan);
@@ -2502,9 +2502,9 @@ static void stmmac_start_all_dma(struct stmmac_priv *priv)
  */
 static void stmmac_stop_all_dma(struct stmmac_priv *priv)
 {
-	u32 rx_channels_count = priv->plat->rx_queues_to_use;
-	u32 tx_channels_count = priv->plat->tx_queues_to_use;
-	u32 chan = 0;
+	u8 rx_channels_count = priv->plat->rx_queues_to_use;
+	u8 tx_channels_count = priv->plat->tx_queues_to_use;
+	u8 chan;
 
 	for (chan = 0; chan < rx_channels_count; chan++)
 		stmmac_stop_rx_dma(priv, chan);
@@ -2521,14 +2521,14 @@ static void stmmac_stop_all_dma(struct stmmac_priv *priv)
  */
 static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 {
-	u32 rx_channels_count = priv->plat->rx_queues_to_use;
-	u32 tx_channels_count = priv->plat->tx_queues_to_use;
+	u8 rx_channels_count = priv->plat->rx_queues_to_use;
+	u8 tx_channels_count = priv->plat->tx_queues_to_use;
 	int rxfifosz = priv->plat->rx_fifo_size;
 	int txfifosz = priv->plat->tx_fifo_size;
 	u32 txmode = 0;
 	u32 rxmode = 0;
-	u32 chan = 0;
 	u8 qmode = 0;
+	u8 chan;
 
 	if (rxfifosz == 0)
 		rxfifosz = priv->dma_cap.rx_fifo_size;
@@ -3012,8 +3012,8 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
 {
 	u8 rxqmode = priv->plat->rx_queues_cfg[chan].mode_to_use;
 	u8 txqmode = priv->plat->tx_queues_cfg[chan].mode_to_use;
-	u32 rx_channels_count = priv->plat->rx_queues_to_use;
-	u32 tx_channels_count = priv->plat->tx_queues_to_use;
+	u8 rx_channels_count = priv->plat->rx_queues_to_use;
+	u8 tx_channels_count = priv->plat->tx_queues_to_use;
 	int rxfifosz = priv->plat->rx_fifo_size;
 	int txfifosz = priv->plat->tx_fifo_size;
 
@@ -3088,12 +3088,12 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan, u32 dir)
  */
 static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 {
-	u32 tx_channel_count = priv->plat->tx_queues_to_use;
-	u32 rx_channel_count = priv->plat->rx_queues_to_use;
-	u32 channels_to_check = tx_channel_count > rx_channel_count ?
-				tx_channel_count : rx_channel_count;
-	u32 chan;
+	u8 tx_channel_count = priv->plat->tx_queues_to_use;
+	u8 rx_channel_count = priv->plat->rx_queues_to_use;
+	u8 channels_to_check = tx_channel_count > rx_channel_count ?
+			       tx_channel_count : rx_channel_count;
 	int status[MAX_T(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)];
+	u8 chan;
 
 	/* Make sure we never check beyond our status buffer. */
 	if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status)))
@@ -3237,13 +3237,13 @@ static int stmmac_prereset_configure(struct stmmac_priv *priv)
  */
 static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 {
-	u32 rx_channels_count = priv->plat->rx_queues_to_use;
-	u32 tx_channels_count = priv->plat->tx_queues_to_use;
-	u32 dma_csr_ch = max(rx_channels_count, tx_channels_count);
+	u8 rx_channels_count = priv->plat->rx_queues_to_use;
+	u8 tx_channels_count = priv->plat->tx_queues_to_use;
+	u8 dma_csr_ch = max(rx_channels_count, tx_channels_count);
 	struct stmmac_rx_queue *rx_q;
 	struct stmmac_tx_queue *tx_q;
-	u32 chan = 0;
 	int ret = 0;
+	u8 chan;
 
 	ret = stmmac_prereset_configure(priv);
 	if (ret)
@@ -3359,9 +3359,9 @@ static enum hrtimer_restart stmmac_tx_timer(struct hrtimer *t)
  */
 static void stmmac_init_coalesce(struct stmmac_priv *priv)
 {
-	u32 tx_channel_count = priv->plat->tx_queues_to_use;
-	u32 rx_channel_count = priv->plat->rx_queues_to_use;
-	u32 chan;
+	u8 tx_channel_count = priv->plat->tx_queues_to_use;
+	u8 rx_channel_count = priv->plat->rx_queues_to_use;
+	u8 chan;
 
 	for (chan = 0; chan < tx_channel_count; chan++) {
 		struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
@@ -3378,9 +3378,9 @@ static void stmmac_init_coalesce(struct stmmac_priv *priv)
 
 static void stmmac_set_rings_length(struct stmmac_priv *priv)
 {
-	u32 rx_channels_count = priv->plat->rx_queues_to_use;
-	u32 tx_channels_count = priv->plat->tx_queues_to_use;
-	u32 chan;
+	u8 rx_channels_count = priv->plat->rx_queues_to_use;
+	u8 tx_channels_count = priv->plat->tx_queues_to_use;
+	u8 chan;
 
 	/* set TX ring length */
 	for (chan = 0; chan < tx_channels_count; chan++)
@@ -3400,9 +3400,9 @@ static void stmmac_set_rings_length(struct stmmac_priv *priv)
  */
 static void stmmac_set_tx_queue_weight(struct stmmac_priv *priv)
 {
-	u32 tx_queues_count = priv->plat->tx_queues_to_use;
+	u8 tx_queues_count = priv->plat->tx_queues_to_use;
 	u32 weight;
-	u32 queue;
+	u8 queue;
 
 	for (queue = 0; queue < tx_queues_count; queue++) {
 		weight = priv->plat->tx_queues_cfg[queue].weight;
@@ -3417,9 +3417,9 @@ static void stmmac_set_tx_queue_weight(struct stmmac_priv *priv)
  */
 static void stmmac_configure_cbs(struct stmmac_priv *priv)
 {
-	u32 tx_queues_count = priv->plat->tx_queues_to_use;
+	u8 tx_queues_count = priv->plat->tx_queues_to_use;
 	u32 mode_to_use;
-	u32 queue;
+	u8 queue;
 
 	/* queue 0 is reserved for legacy traffic */
 	for (queue = 1; queue < tx_queues_count; queue++) {
@@ -3443,8 +3443,8 @@ static void stmmac_configure_cbs(struct stmmac_priv *priv)
  */
 static void stmmac_rx_queue_dma_chan_map(struct stmmac_priv *priv)
 {
-	u32 rx_queues_count = priv->plat->rx_queues_to_use;
-	u32 queue;
+	u8 rx_queues_count = priv->plat->rx_queues_to_use;
+	u8 queue;
 	u32 chan;
 
 	for (queue = 0; queue < rx_queues_count; queue++) {
@@ -3460,8 +3460,8 @@ static void stmmac_rx_queue_dma_chan_map(struct stmmac_priv *priv)
  */
 static void stmmac_mac_config_rx_queues_prio(struct stmmac_priv *priv)
 {
-	u32 rx_queues_count = priv->plat->rx_queues_to_use;
-	u32 queue;
+	u8 rx_queues_count = priv->plat->rx_queues_to_use;
+	u8 queue;
 	u32 prio;
 
 	for (queue = 0; queue < rx_queues_count; queue++) {
@@ -3480,8 +3480,8 @@ static void stmmac_mac_config_rx_queues_prio(struct stmmac_priv *priv)
  */
 static void stmmac_mac_config_tx_queues_prio(struct stmmac_priv *priv)
 {
-	u32 tx_queues_count = priv->plat->tx_queues_to_use;
-	u32 queue;
+	u8 tx_queues_count = priv->plat->tx_queues_to_use;
+	u8 queue;
 	u32 prio;
 
 	for (queue = 0; queue < tx_queues_count; queue++) {
@@ -3500,9 +3500,9 @@ static void stmmac_mac_config_tx_queues_prio(struct stmmac_priv *priv)
  */
 static void stmmac_mac_config_rx_queues_routing(struct stmmac_priv *priv)
 {
-	u32 rx_queues_count = priv->plat->rx_queues_to_use;
-	u32 queue;
+	u8 rx_queues_count = priv->plat->rx_queues_to_use;
 	u8 packet;
+	u8 queue;
 
 	for (queue = 0; queue < rx_queues_count; queue++) {
 		/* no specific packet type routing specified for the queue */
@@ -3537,8 +3537,8 @@ static void stmmac_mac_config_rss(struct stmmac_priv *priv)
  */
 static void stmmac_mtl_configuration(struct stmmac_priv *priv)
 {
-	u32 rx_queues_count = priv->plat->rx_queues_to_use;
-	u32 tx_queues_count = priv->plat->tx_queues_to_use;
+	u8 rx_queues_count = priv->plat->rx_queues_to_use;
+	u8 tx_queues_count = priv->plat->tx_queues_to_use;
 
 	if (tx_queues_count > 1)
 		stmmac_set_tx_queue_weight(priv);
@@ -3606,10 +3606,10 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv)
 static int stmmac_hw_setup(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 rx_cnt = priv->plat->rx_queues_to_use;
-	u32 tx_cnt = priv->plat->tx_queues_to_use;
+	u8 rx_cnt = priv->plat->rx_queues_to_use;
+	u8 tx_cnt = priv->plat->tx_queues_to_use;
 	bool sph_en;
-	u32 chan;
+	u8 chan;
 	int ret;
 
 	/* Make sure RX clock is enabled */
@@ -4001,7 +4001,8 @@ static struct stmmac_dma_conf *
 stmmac_setup_dma_desc(struct stmmac_priv *priv, unsigned int mtu)
 {
 	struct stmmac_dma_conf *dma_conf;
-	int chan, bfsize, ret;
+	int bfsize, ret;
+	u8 chan;
 
 	dma_conf = kzalloc_obj(*dma_conf);
 	if (!dma_conf) {
@@ -4076,7 +4077,7 @@ static int __stmmac_open(struct net_device *dev,
 			 struct stmmac_dma_conf *dma_conf)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 chan;
+	u8 chan;
 	int ret;
 
 	for (int i = 0; i < MTL_MAX_TX_QUEUES; i++)
@@ -4175,7 +4176,7 @@ err_dma_resources:
 static void __stmmac_release(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 chan;
+	u8 chan;
 
 	/* Stop and disconnect the PHY */
 	phylink_stop(priv->phylink);
@@ -6123,7 +6124,7 @@ static int stmmac_set_features(struct net_device *netdev,
 
 	if (priv->sph_capable) {
 		bool sph_en = (priv->hw->rx_csum > 0) && priv->sph_active;
-		u32 chan;
+		u8 chan;
 
 		for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++)
 			stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
@@ -6143,11 +6144,11 @@ static int stmmac_set_features(struct net_device *netdev,
 
 static void stmmac_common_interrupt(struct stmmac_priv *priv)
 {
-	u32 rx_cnt = priv->plat->rx_queues_to_use;
-	u32 tx_cnt = priv->plat->tx_queues_to_use;
-	u32 queues_count;
-	u32 queue;
+	u8 rx_cnt = priv->plat->rx_queues_to_use;
+	u8 tx_cnt = priv->plat->tx_queues_to_use;
+	u8 queues_count;
 	bool xmac;
+	u8 queue;
 
 	xmac = dwmac_is_xmac(priv->plat->core_type);
 	queues_count = (rx_cnt > tx_cnt) ? rx_cnt : tx_cnt;
@@ -6445,9 +6446,9 @@ static int stmmac_rings_status_show(struct seq_file *seq, void *v)
 {
 	struct net_device *dev = seq->private;
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 rx_count = priv->plat->rx_queues_to_use;
-	u32 tx_count = priv->plat->tx_queues_to_use;
-	u32 queue;
+	u8 rx_count = priv->plat->rx_queues_to_use;
+	u8 tx_count = priv->plat->tx_queues_to_use;
+	u8 queue;
 
 	if ((dev->flags & IFF_UP) == 0)
 		return 0;
@@ -6572,9 +6573,9 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v)
 		   priv->dma_cap.number_rx_channel);
 	seq_printf(seq, "\tNumber of Additional TX channel: %d\n",
 		   priv->dma_cap.number_tx_channel);
-	seq_printf(seq, "\tNumber of Additional RX queues: %d\n",
+	seq_printf(seq, "\tNumber of Additional RX queues: %u\n",
 		   priv->dma_cap.number_rx_queues);
-	seq_printf(seq, "\tNumber of Additional TX queues: %d\n",
+	seq_printf(seq, "\tNumber of Additional TX queues: %u\n",
 		   priv->dma_cap.number_tx_queues);
 	seq_printf(seq, "\tEnhanced descriptors: %s\n",
 		   (priv->dma_cap.enh_desc) ? "Y" : "N");
@@ -7043,7 +7044,7 @@ void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue)
 void stmmac_xdp_release(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 chan;
+	u8 chan;
 
 	/* Ensure tx function is not running */
 	netif_tx_disable(dev);
@@ -7076,14 +7077,14 @@ void stmmac_xdp_release(struct net_device *dev)
 int stmmac_xdp_open(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 rx_cnt = priv->plat->rx_queues_to_use;
-	u32 tx_cnt = priv->plat->tx_queues_to_use;
-	u32 dma_csr_ch = max(rx_cnt, tx_cnt);
+	u8 rx_cnt = priv->plat->rx_queues_to_use;
+	u8 tx_cnt = priv->plat->tx_queues_to_use;
+	u8 dma_csr_ch = max(rx_cnt, tx_cnt);
 	struct stmmac_rx_queue *rx_q;
 	struct stmmac_tx_queue *tx_q;
 	u32 buf_size;
 	bool sph_en;
-	u32 chan;
+	u8 chan;
 	int ret;
 
 	ret = alloc_dma_desc_resources(priv, &priv->dma_conf);
@@ -7219,10 +7220,10 @@ int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags)
 static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 tx_cnt = priv->plat->tx_queues_to_use;
-	u32 rx_cnt = priv->plat->rx_queues_to_use;
+	u8 tx_cnt = priv->plat->tx_queues_to_use;
+	u8 rx_cnt = priv->plat->rx_queues_to_use;
 	unsigned int start;
-	int q;
+	u8 q;
 
 	for (q = 0; q < tx_cnt; q++) {
 		struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q];
@@ -7511,7 +7512,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 static void stmmac_napi_add(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 queue, maxq;
+	u8 queue, maxq;
 
 	maxq = max(priv->plat->rx_queues_to_use, priv->plat->tx_queues_to_use);
 
@@ -7540,7 +7541,7 @@ static void stmmac_napi_add(struct net_device *dev)
 static void stmmac_napi_del(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 queue, maxq;
+	u8 queue, maxq;
 
 	maxq = max(priv->plat->rx_queues_to_use, priv->plat->tx_queues_to_use);
 
@@ -7558,7 +7559,7 @@ static void stmmac_napi_del(struct net_device *dev)
 	}
 }
 
-int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
+int stmmac_reinit_queues(struct net_device *dev, u8 rx_cnt, u8 tx_cnt)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 	int ret = 0, i;
@@ -7763,8 +7764,8 @@ static int __stmmac_dvr_probe(struct device *device,
 {
 	struct net_device *ndev = NULL;
 	struct stmmac_priv *priv;
-	u32 rxq;
 	int i, ret = 0;
+	u8 rxq;
 
 	if (!plat_dat->dma_cfg || !plat_dat->dma_cfg->pbl) {
 		dev_err(device, "invalid DMA configuration\n");
@@ -8147,7 +8148,7 @@ int stmmac_suspend(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
-	u32 chan;
+	u8 chan;
 
 	if (!ndev || !netif_running(ndev))
 		goto suspend_bsp;
@@ -8222,9 +8223,9 @@ static void stmmac_reset_tx_queue(struct stmmac_priv *priv, u32 queue)
  */
 static void stmmac_reset_queues_param(struct stmmac_priv *priv)
 {
-	u32 rx_cnt = priv->plat->rx_queues_to_use;
-	u32 tx_cnt = priv->plat->tx_queues_to_use;
-	u32 queue;
+	u8 rx_cnt = priv->plat->rx_queues_to_use;
+	u8 tx_cnt = priv->plat->tx_queues_to_use;
+	u8 queue;
 
 	for (queue = 0; queue < rx_cnt; queue++)
 		stmmac_reset_rx_queue(priv, queue);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 0d3bad0f8915..3b514a702612 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -138,6 +138,7 @@ static int stmmac_mtl_setup(struct platform_device *pdev,
 	struct device_node *tx_node;
 	u8 queue = 0;
 	int ret = 0;
+	u32 value;
 
 	/* First Queue must always be in DCB mode. As MTL_QUEUE_DCB = 1 we need
 	 * to always set this, otherwise Queue will be classified as AVB
@@ -157,8 +158,11 @@ static int stmmac_mtl_setup(struct platform_device *pdev,
 	}
 
 	/* Processing RX queues common config */
-	of_property_read_u32(rx_node, "snps,rx-queues-to-use",
-			     &plat->rx_queues_to_use);
+	if (!of_property_read_u32(rx_node, "snps,rx-queues-to-use", &value)) {
+		if (value > U8_MAX)
+			value = U8_MAX;
+		plat->rx_queues_to_use = value;
+	}
 
 	if (of_property_read_bool(rx_node, "snps,rx-sched-sp"))
 		plat->rx_sched_algorithm = MTL_RX_ALGORITHM_SP;
@@ -208,8 +212,11 @@ static int stmmac_mtl_setup(struct platform_device *pdev,
 	}
 
 	/* Processing TX queues common config */
-	of_property_read_u32(tx_node, "snps,tx-queues-to-use",
-			     &plat->tx_queues_to_use);
+	if (!of_property_read_u32(tx_node, "snps,tx-queues-to-use", &value)) {
+		if (value > U8_MAX)
+			value = U8_MAX;
+		plat->tx_queues_to_use = value;
+	}
 
 	if (of_property_read_bool(tx_node, "snps,tx-sched-wrr"))
 		plat->tx_sched_algorithm = MTL_TX_ALGORITHM_WRR;
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 411cdd3ea034..03fd85060a73 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -244,8 +244,8 @@ struct plat_stmmacenet_data {
 	int tx_fifo_size;
 	int rx_fifo_size;
 	u32 host_dma_width;
-	u32 rx_queues_to_use;
-	u32 tx_queues_to_use;
+	u8 rx_queues_to_use;
+	u8 tx_queues_to_use;
 	u8 rx_sched_algorithm;
 	u8 tx_sched_algorithm;
 	struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES];
-- 
cgit v1.2.3


From 758ed85aadd0668c66cb359c63f384992b10938c Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Mon, 9 Mar 2026 09:39:39 +0000
Subject: net: stmmac: use u8 for host_dma_width and similar struct members

We aren't going to see >= 256-bit address busses soon, so reduce
host_dma_width and associated other struct members that initialise
this from u32 to u8.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Acked-by: Mohd Ayaan Anwar <mohd.anwar@oss.qualcomm.com> # qcom-ethqos
Tested-by: Mohd Ayaan Anwar <mohd.anwar@oss.qualcomm.com>
Link: https://patch.msgid.link/E1vzX5P-0000000CVsK-0iwX@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c         | 2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c    | 6 +++---
 drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 2 +-
 include/linux/stmmac.h                                  | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index 9f5a15b81f8a..9d1bd72ffb73 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -42,8 +42,8 @@
 struct imx_priv_data;
 
 struct imx_dwmac_ops {
-	u32 addr_width;
 	u32 flags;
+	u8 addr_width;
 	bool mac_rgmii_txclk_auto_adj;
 
 	int (*fix_soc_reset)(struct stmmac_priv *priv);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index a139db6a8cbb..30ae0dba7fff 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -93,9 +93,9 @@ struct mediatek_dwmac_variant {
 	const char * const *clk_list;
 	int num_clks;
 
-	u32 dma_bit_mask;
 	u32 rx_delay_max;
 	u32 tx_delay_max;
+	u8 dma_bit_mask;
 };
 
 /* list of clocks required for mac */
@@ -268,9 +268,9 @@ static const struct mediatek_dwmac_variant mt2712_gmac_variant = {
 		.dwmac_set_delay = mt2712_set_delay,
 		.clk_list = mt2712_dwmac_clk_l,
 		.num_clks = ARRAY_SIZE(mt2712_dwmac_clk_l),
-		.dma_bit_mask = 33,
 		.rx_delay_max = 17600,
 		.tx_delay_max = 17600,
+		.dma_bit_mask = 33,
 };
 
 static int mt8195_set_interface(struct mediatek_dwmac_plat_data *plat,
@@ -418,9 +418,9 @@ static const struct mediatek_dwmac_variant mt8195_gmac_variant = {
 	.dwmac_set_delay = mt8195_set_delay,
 	.clk_list = mt8195_dwmac_clk_l,
 	.num_clks = ARRAY_SIZE(mt8195_dwmac_clk_l),
-	.dma_bit_mask = 35,
 	.rx_delay_max = 9280,
 	.tx_delay_max = 9280,
+	.dma_bit_mask = 35,
 };
 
 static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index 388e9fdeb86c..3ccf20fdf52a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -91,8 +91,8 @@ struct ethqos_emac_driver_data {
 	unsigned int num_rgmii_por;
 	bool rgmii_config_loopback_en;
 	bool has_emac_ge_3;
+	u8 dma_addr_width;
 	const char *link_clk_name;
-	u32 dma_addr_width;
 	struct dwmac4_addrs dwmac4_addrs;
 	bool needs_sgmii_loopback;
 };
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 03fd85060a73..11886189bf51 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -243,7 +243,7 @@ struct plat_stmmacenet_data {
 	int unicast_filter_entries;
 	int tx_fifo_size;
 	int rx_fifo_size;
-	u32 host_dma_width;
+	u8 host_dma_width;
 	u8 rx_queues_to_use;
 	u8 tx_queues_to_use;
 	u8 rx_sched_algorithm;
-- 
cgit v1.2.3


From 9fe167ab790b10c9eb9ef82f46a03c83f9953b61 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Mon, 9 Mar 2026 09:39:44 +0000
Subject: net: stmmac: add documentation for stmmac_dma_cfg members

Add documentation of each of the struct stmmac_dma_cfg members. dche
remains undocumented as I don't have documentation that covers this.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Tested-by: Mohd Ayaan Anwar <mohd.anwar@oss.qualcomm.com>
Link: https://patch.msgid.link/E1vzX5U-0000000CVsQ-162V@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 11886189bf51..1af3a5e197c9 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -93,16 +93,37 @@ struct stmmac_mdio_bus_data {
 };
 
 struct stmmac_dma_cfg {
+	/* pbl: programmable burst limit
+	 * txpbl: transmit programmable burst limit
+	 * rxpbl: receive programmable burst limit
+	 * If txpbl or rxpbl are zero, the value of pbl will be substituted.
+	 * Range 0 - 63.
+	 */
 	int pbl;
 	int txpbl;
 	int rxpbl;
+	/* pblx8: multiplies pbl, txpbl, rxpbl by a factor of 8 for dwmac >=
+	 * 3.50a, or a factor of 4 for previous versions.
+	 */
 	bool pblx8;
+	/* fixed_burst:
+	 *  when set, AXI bursts defined by axi_blen_regval are permitted.
+	 *   AHB uses SINGLE, INCR4, INCR8 or INCR16 during burst transfers.
+	 *  when clear, AXI and AHB use SINGLE or INCR bursts.
+	 */
 	bool fixed_burst;
+	/* mixed_burst:
+	 *  when set and fixed_burst is clear, AHB uses INCR for bursts > 16
+	 *  and SINGLE or INCRx for bursts <= 16.
+	 */
 	bool mixed_burst;
+	/* aal: address aligned bursts for AHB and AXI master interface */
 	bool aal;
+	bool dche;
 	bool eame;
+	/* multi_msi_en: stmmac core internal */
 	bool multi_msi_en;
-	bool dche;
+	/* atds: stmmac core internal */
 	bool atds;
 };
 
-- 
cgit v1.2.3


From 315bab9411f3bd3465a47a64a3e44323bfab60be Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Mon, 9 Mar 2026 09:39:49 +0000
Subject: net: stmmac: add documentation for clocks

Add documentation covering stmmac_clk, pclk, clk_ptp_ref and clk_tx_i
in the hope that this will help understand what each of these clocks
are for.

There is confusion around stmmac_clk and pclk which can't be easily
resolved today as the Imagination Technologies Pistachio board that
pclk was introduced for has no public documentation and is likely now
obsolete. So the origins of pclk are lost to the winds of time.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Tested-by: Mohd Ayaan Anwar <mohd.anwar@oss.qualcomm.com>
Link: https://patch.msgid.link/E1vzX5Z-0000000CVsb-1XTm@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 1af3a5e197c9..937985276e6b 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -300,10 +300,41 @@ struct plat_stmmacenet_data {
 	struct phylink_pcs *(*select_pcs)(struct stmmac_priv *priv,
 					  phy_interface_t interface);
 	void *bsp_priv;
+
+	/* stmmac clocks:
+	 *  stmmac_clk: CSR clock (which can be hclk_i, clk_csr_i, aclk_i,
+	 *    or clk_app_i depending on GMAC configuration). This clock
+	 *    generates the MDC clock.
+	 *
+	 *  pclk: introduced for Imagination Technologies Pistachio board -
+	 *    see 5f9755d26fbf ("stmmac: Add an optional register interface
+	 *    clock"). This is probably used for cases where separate clocks
+	 *    are provided for the host interface and register interface. In
+	 *    this case, as the MDC clock is derived from stmmac_clk, pclk
+	 *    can only really be the "application clock" for the "host
+	 *    interface" and not the "register interface" aka CSR clock as
+	 *    it is never used when determining the divider for the MDC
+	 *    clock.
+	 *
+	 *  clk_ptp_ref: optional PTP reference clock (clk_ptp_ref_i). When
+	 *    present, this clock increments the timestamp value. Otherwise,
+	 *    the rate of stmmac_clk will be used.
+	 *
+	 *  clk_tx_i: MAC transmit clock, which will be 2.5MHz for 10M,
+	 *    25MHz for 100M, or 125MHz for 1G irrespective of the interface
+	 *    mode. For the DWMAC PHY interface modes:
+	 *
+	 *    GMII/MII	PHY's transmit clock for 10M (2.5MHz) or 100M (25MHz),
+	 *		or 125MHz local clock for 1G mode
+	 *    RMII	50MHz RMII clock divided by 2 or 20.
+	 *    RGMII	125MHz local clock divided by 1, 5, or 50.
+	 *    SGMII	125MHz SerDes clock divided by 1, 5, or 50.
+	 *    TBI/RTBI	125MHz SerDes clock
+	 */
 	struct clk *stmmac_clk;
 	struct clk *pclk;
 	struct clk *clk_ptp_ref;
-	struct clk *clk_tx_i;		/* clk_tx_i to MAC core */
+	struct clk *clk_tx_i;
 	unsigned long clk_ptp_rate;
 	unsigned long clk_ref_rate;
 	struct clk_bulk_data *clks;
-- 
cgit v1.2.3


From 7aba71dbc41641e43a79fb9f6fac91719094b4fb Mon Sep 17 00:00:00 2001
From: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Date: Thu, 5 Mar 2026 10:39:06 +0100
Subject: mm/mmu_notifier: Allow two-pass struct mmu_interval_notifiers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GPU use-cases for mmu_interval_notifiers with hmm often involve
starting a gpu operation and then waiting for it to complete.
These operations are typically context preemption or TLB flushing.

With single-pass notifiers per GPU this doesn't scale in
multi-gpu scenarios. In those scenarios we'd want to first start
preemption- or TLB flushing on all GPUs and as a second pass wait
for them to complete.

One can do this on per-driver basis multiplexing per-driver
notifiers but that would mean sharing the notifier "user" lock
across all GPUs and that doesn't scale well either, so adding support
for multi-pass in the core appears to be the right choice.

Implement two-pass capability in the mmu_interval_notifier. Use a
linked list for the final passes to minimize the impact for
use-cases that don't need the multi-pass functionality by avoiding
a second interval tree walk, and to be able to easily pass data
between the two passes.

v1:
- Restrict to two passes (Jason Gunthorpe)
- Improve on documentation (Jason Gunthorpe)
- Improve on function naming (Alistair Popple)
v2:
- Include the invalidate_finish() callback in the
  struct mmu_interval_notifier_ops.
- Update documentation (GitHub Copilot:claude-sonnet-4.6)
- Use lockless list for list management.
v3:
- Update kerneldoc for the struct mmu_interval_notifier_finish::list member
  (Matthew Brost)
- Add a WARN_ON_ONCE() checking for NULL invalidate_finish() op if
  if invalidate_start() is non-NULL. (Matthew Brost)
v4:
- Addressed documentation review comments by David Hildenbrand.

Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Simona Vetter <simona.vetter@ffwll.ch>
Cc: Dave Airlie <airlied@gmail.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: <dri-devel@lists.freedesktop.org>
Cc: <linux-mm@kvack.org>
Cc: <linux-kernel@vger.kernel.org>

Assisted-by: GitHub Copilot:claude-sonnet-4.6 # Documentation only.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patch.msgid.link/20260305093909.43623-2-thomas.hellstrom@linux.intel.com
---
 include/linux/mmu_notifier.h | 42 ++++++++++++++++++++++++++++
 mm/mmu_notifier.c            | 65 ++++++++++++++++++++++++++++++++++++++------
 2 files changed, 98 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index d1094c2d5fb6..b60673a8e0bb 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -233,16 +233,58 @@ struct mmu_notifier {
 	unsigned int users;
 };
 
+/**
+ * struct mmu_interval_notifier_finish - mmu_interval_notifier two-pass abstraction
+ * @link: Lockless list link for the notifiers pending pass list
+ * @notifier: The mmu_interval_notifier for which the finish pass is called.
+ *
+ * Allocate, typically using GFP_NOWAIT in the interval notifier's start pass.
+ * Note that with a large number of notifiers implementing two passes,
+ * allocation with GFP_NOWAIT will become increasingly likely to fail, so consider
+ * implementing a small pool instead of using kmalloc() allocations.
+ *
+ * If the implementation needs to pass data between the start and the finish passes,
+ * the recommended way is to embed struct mmu_interval_notifier_finish into a larger
+ * structure that also contains the data needed to be shared. Keep in mind that
+ * a notifier callback can be invoked in parallel, and each invocation needs its
+ * own struct mmu_interval_notifier_finish.
+ *
+ * If allocation fails, then the &mmu_interval_notifier_ops->invalidate_start op
+ * needs to implements the full notifier functionality. Please refer to its
+ * documentation.
+ */
+struct mmu_interval_notifier_finish {
+	struct llist_node link;
+	struct mmu_interval_notifier *notifier;
+};
+
 /**
  * struct mmu_interval_notifier_ops
  * @invalidate: Upon return the caller must stop using any SPTEs within this
  *              range. This function can sleep. Return false only if sleeping
  *              was required but mmu_notifier_range_blockable(range) is false.
+ * @invalidate_start: Similar to @invalidate, but intended for two-pass notifier
+ *                    callbacks where the call to @invalidate_start is the first
+ *                    pass and any struct mmu_interval_notifier_finish pointer
+ *                    returned in the @finish parameter describes the finish pass.
+ *                    If *@finish is %NULL on return, then no final pass will be
+ *                    called, and @invalidate_start needs to implement the full
+ *                    notifier, behaving like @invalidate. The value of *@finish
+ *                    is guaranteed to be %NULL at function entry.
+ * @invalidate_finish: Called as the second pass for any notifier that returned
+ *                     a non-NULL *@finish from @invalidate_start. The @finish
+ *                     pointer passed here is the same one returned by
+ *                     @invalidate_start.
  */
 struct mmu_interval_notifier_ops {
 	bool (*invalidate)(struct mmu_interval_notifier *interval_sub,
 			   const struct mmu_notifier_range *range,
 			   unsigned long cur_seq);
+	bool (*invalidate_start)(struct mmu_interval_notifier *interval_sub,
+				 const struct mmu_notifier_range *range,
+				 unsigned long cur_seq,
+				 struct mmu_interval_notifier_finish **finish);
+	void (*invalidate_finish)(struct mmu_interval_notifier_finish *finish);
 };
 
 struct mmu_interval_notifier {
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 8e0125dc0522..33023dbbd76d 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -260,6 +260,15 @@ mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub)
 }
 EXPORT_SYMBOL_GPL(mmu_interval_read_begin);
 
+static void mn_itree_finish_pass(struct llist_head *finish_passes)
+{
+	struct llist_node *first = llist_reverse_order(__llist_del_all(finish_passes));
+	struct mmu_interval_notifier_finish *f, *next;
+
+	llist_for_each_entry_safe(f, next, first, link)
+		f->notifier->ops->invalidate_finish(f);
+}
+
 static void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions,
 			     struct mm_struct *mm)
 {
@@ -271,6 +280,7 @@ static void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions,
 		.end = ULONG_MAX,
 	};
 	struct mmu_interval_notifier *interval_sub;
+	LLIST_HEAD(finish_passes);
 	unsigned long cur_seq;
 	bool ret;
 
@@ -278,11 +288,27 @@ static void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions,
 		     mn_itree_inv_start_range(subscriptions, &range, &cur_seq);
 	     interval_sub;
 	     interval_sub = mn_itree_inv_next(interval_sub, &range)) {
-		ret = interval_sub->ops->invalidate(interval_sub, &range,
-						    cur_seq);
+		if (interval_sub->ops->invalidate_start) {
+			struct mmu_interval_notifier_finish *finish = NULL;
+
+			ret = interval_sub->ops->invalidate_start(interval_sub,
+								  &range,
+								  cur_seq,
+								  &finish);
+			if (ret && finish) {
+				finish->notifier = interval_sub;
+				__llist_add(&finish->link, &finish_passes);
+			}
+
+		} else {
+			ret = interval_sub->ops->invalidate(interval_sub,
+							    &range,
+							    cur_seq);
+		}
 		WARN_ON(!ret);
 	}
 
+	mn_itree_finish_pass(&finish_passes);
 	mn_itree_inv_end(subscriptions);
 }
 
@@ -430,7 +456,9 @@ static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions,
 			       const struct mmu_notifier_range *range)
 {
 	struct mmu_interval_notifier *interval_sub;
+	LLIST_HEAD(finish_passes);
 	unsigned long cur_seq;
+	int err = 0;
 
 	for (interval_sub =
 		     mn_itree_inv_start_range(subscriptions, range, &cur_seq);
@@ -438,23 +466,41 @@ static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions,
 	     interval_sub = mn_itree_inv_next(interval_sub, range)) {
 		bool ret;
 
-		ret = interval_sub->ops->invalidate(interval_sub, range,
-						    cur_seq);
+		if (interval_sub->ops->invalidate_start) {
+			struct mmu_interval_notifier_finish *finish = NULL;
+
+			ret = interval_sub->ops->invalidate_start(interval_sub,
+								  range,
+								  cur_seq,
+								  &finish);
+			if (ret && finish) {
+				finish->notifier = interval_sub;
+				__llist_add(&finish->link, &finish_passes);
+			}
+
+		} else {
+			ret = interval_sub->ops->invalidate(interval_sub,
+							    range,
+							    cur_seq);
+		}
 		if (!ret) {
 			if (WARN_ON(mmu_notifier_range_blockable(range)))
 				continue;
-			goto out_would_block;
+			err = -EAGAIN;
+			break;
 		}
 	}
-	return 0;
 
-out_would_block:
+	mn_itree_finish_pass(&finish_passes);
+
 	/*
 	 * On -EAGAIN the non-blocking caller is not allowed to call
 	 * invalidate_range_end()
 	 */
-	mn_itree_inv_end(subscriptions);
-	return -EAGAIN;
+	if (err)
+		mn_itree_inv_end(subscriptions);
+
+	return err;
 }
 
 static int mn_hlist_invalidate_range_start(
@@ -977,6 +1023,7 @@ int mmu_interval_notifier_insert(struct mmu_interval_notifier *interval_sub,
 	struct mmu_notifier_subscriptions *subscriptions;
 	int ret;
 
+	WARN_ON_ONCE(ops->invalidate_start && !ops->invalidate_finish);
 	might_lock(&mm->mmap_lock);
 
 	subscriptions = smp_load_acquire(&mm->notifier_subscriptions);
-- 
cgit v1.2.3


From 05988dba11791ccbb458254484826b32f17f4ad2 Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Date: Wed, 4 Mar 2026 08:49:00 +0100
Subject: vdso/datastore: Allocate data pages dynamically
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allocating the data pages as part of the kernel image does not work on
SPARC. The MMU will raise a fault when userspace tries to access them.

Allocate the data pages through the page allocator instead.

Unused pages in the vDSO VMA are still allocated to keep the virtual
addresses aligned. Switch the mapping from PFNs to 'struct page' as that is
required for dynamically allocated pages.  This also aligns the allocation
of the datapages with the code pages and is a prerequisite for mlockall()
support.

VM_MIXEDMAP is necessary for the call to vmf_insert_page() in the timens
prefault path to work.

The data pages need to be order-0, non-compound pages so that the mapping
to userspace and the different orderings work.

These pages are also used by the timekeeping, random pool and architecture
initialization code. Some of these are running before the page allocator is
available. To keep these subsytems working without changes, introduce
early, statically data storage which will then replaced by the real one as
soon as that is available.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Link: https://patch.msgid.link/20260304-vdso-sparc64-generic-2-v6-3-d8eb3b0e1410@linutronix.de
---
 include/linux/vdso_datastore.h |  6 +++
 init/main.c                    |  2 +
 lib/vdso/datastore.c           | 92 +++++++++++++++++++++++++++---------------
 3 files changed, 68 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vdso_datastore.h b/include/linux/vdso_datastore.h
index a91fa24b06e0..0b530428db71 100644
--- a/include/linux/vdso_datastore.h
+++ b/include/linux/vdso_datastore.h
@@ -2,9 +2,15 @@
 #ifndef _LINUX_VDSO_DATASTORE_H
 #define _LINUX_VDSO_DATASTORE_H
 
+#ifdef CONFIG_HAVE_GENERIC_VDSO
 #include <linux/mm_types.h>
 
 extern const struct vm_special_mapping vdso_vvar_mapping;
 struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned long addr);
 
+void __init vdso_setup_data_pages(void);
+#else /* !CONFIG_HAVE_GENERIC_VDSO */
+static inline void vdso_setup_data_pages(void) { }
+#endif /* CONFIG_HAVE_GENERIC_VDSO */
+
 #endif /* _LINUX_VDSO_DATASTORE_H */
diff --git a/init/main.c b/init/main.c
index 1cb395dd94e4..de867b2693d2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -105,6 +105,7 @@
 #include <linux/ptdump.h>
 #include <linux/time_namespace.h>
 #include <linux/unaligned.h>
+#include <linux/vdso_datastore.h>
 #include <net/net_namespace.h>
 
 #include <asm/io.h>
@@ -1119,6 +1120,7 @@ void start_kernel(void)
 	srcu_init();
 	hrtimers_init();
 	softirq_init();
+	vdso_setup_data_pages();
 	timekeeping_init();
 	time_init();
 
diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c
index 7377fcb6e1df..faebf5b7cd6e 100644
--- a/lib/vdso/datastore.c
+++ b/lib/vdso/datastore.c
@@ -1,52 +1,79 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
-#include <linux/linkage.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/time_namespace.h>
 #include <linux/types.h>
 #include <linux/vdso_datastore.h>
 #include <vdso/datapage.h>
 
-/*
- * The vDSO data page.
- */
+static u8 vdso_initdata[VDSO_NR_PAGES * PAGE_SIZE] __aligned(PAGE_SIZE) __initdata = {};
+
 #ifdef CONFIG_GENERIC_GETTIMEOFDAY
-static union {
-	struct vdso_time_data	data;
-	u8			page[PAGE_SIZE];
-} vdso_time_data_store __page_aligned_data;
-struct vdso_time_data *vdso_k_time_data = &vdso_time_data_store.data;
-static_assert(sizeof(vdso_time_data_store) == PAGE_SIZE);
+struct vdso_time_data *vdso_k_time_data __refdata =
+	(void *)&vdso_initdata[VDSO_TIME_PAGE_OFFSET * PAGE_SIZE];
+
+static_assert(sizeof(struct vdso_time_data) <= PAGE_SIZE);
 #endif /* CONFIG_GENERIC_GETTIMEOFDAY */
 
 #ifdef CONFIG_VDSO_GETRANDOM
-static union {
-	struct vdso_rng_data	data;
-	u8			page[PAGE_SIZE];
-} vdso_rng_data_store __page_aligned_data;
-struct vdso_rng_data *vdso_k_rng_data = &vdso_rng_data_store.data;
-static_assert(sizeof(vdso_rng_data_store) == PAGE_SIZE);
+struct vdso_rng_data *vdso_k_rng_data __refdata =
+	(void *)&vdso_initdata[VDSO_RNG_PAGE_OFFSET * PAGE_SIZE];
+
+static_assert(sizeof(struct vdso_rng_data) <= PAGE_SIZE);
 #endif /* CONFIG_VDSO_GETRANDOM */
 
 #ifdef CONFIG_ARCH_HAS_VDSO_ARCH_DATA
-static union {
-	struct vdso_arch_data	data;
-	u8			page[VDSO_ARCH_DATA_SIZE];
-} vdso_arch_data_store __page_aligned_data;
-struct vdso_arch_data *vdso_k_arch_data = &vdso_arch_data_store.data;
+struct vdso_arch_data *vdso_k_arch_data __refdata =
+	(void *)&vdso_initdata[VDSO_ARCH_PAGES_START * PAGE_SIZE];
 #endif /* CONFIG_ARCH_HAS_VDSO_ARCH_DATA */
 
+void __init vdso_setup_data_pages(void)
+{
+	unsigned int order = get_order(VDSO_NR_PAGES * PAGE_SIZE);
+	struct page *pages;
+
+	/*
+	 * Allocate the data pages dynamically. SPARC does not support mapping
+	 * static pages to be mapped into userspace.
+	 * It is also a requirement for mlockall() support.
+	 *
+	 * Do not use folios. In time namespaces the pages are mapped in a different order
+	 * to userspace, which is not handled by the folio optimizations in finish_fault().
+	 */
+	pages = alloc_pages(GFP_KERNEL, order);
+	if (!pages)
+		panic("Unable to allocate VDSO storage pages");
+
+	/* The pages are mapped one-by-one into userspace and each one needs to be refcounted. */
+	split_page(pages, order);
+
+	/* Move the data already written by other subsystems to the new pages */
+	memcpy(page_address(pages), vdso_initdata, VDSO_NR_PAGES * PAGE_SIZE);
+
+	if (IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY))
+		vdso_k_time_data = page_address(pages + VDSO_TIME_PAGE_OFFSET);
+
+	if (IS_ENABLED(CONFIG_VDSO_GETRANDOM))
+		vdso_k_rng_data = page_address(pages + VDSO_RNG_PAGE_OFFSET);
+
+	if (IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA))
+		vdso_k_arch_data = page_address(pages + VDSO_ARCH_PAGES_START);
+}
+
 static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
 			     struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	struct page *timens_page = find_timens_vvar_page(vma);
-	unsigned long pfn;
+	struct page *page, *timens_page;
+
+	timens_page = find_timens_vvar_page(vma);
 
 	switch (vmf->pgoff) {
 	case VDSO_TIME_PAGE_OFFSET:
 		if (!IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY))
 			return VM_FAULT_SIGBUS;
-		pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data));
+		page = virt_to_page(vdso_k_time_data);
 		if (timens_page) {
 			/*
 			 * Fault in VVAR page too, since it will be accessed
@@ -56,10 +83,10 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
 			vm_fault_t err;
 
 			addr = vmf->address + VDSO_TIMENS_PAGE_OFFSET * PAGE_SIZE;
-			err = vmf_insert_pfn(vma, addr, pfn);
+			err = vmf_insert_page(vma, addr, page);
 			if (unlikely(err & VM_FAULT_ERROR))
 				return err;
-			pfn = page_to_pfn(timens_page);
+			page = timens_page;
 		}
 		break;
 	case VDSO_TIMENS_PAGE_OFFSET:
@@ -72,24 +99,25 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
 		 */
 		if (!IS_ENABLED(CONFIG_TIME_NS) || !timens_page)
 			return VM_FAULT_SIGBUS;
-		pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data));
+		page = virt_to_page(vdso_k_time_data);
 		break;
 	case VDSO_RNG_PAGE_OFFSET:
 		if (!IS_ENABLED(CONFIG_VDSO_GETRANDOM))
 			return VM_FAULT_SIGBUS;
-		pfn = __phys_to_pfn(__pa_symbol(vdso_k_rng_data));
+		page = virt_to_page(vdso_k_rng_data);
 		break;
 	case VDSO_ARCH_PAGES_START ... VDSO_ARCH_PAGES_END:
 		if (!IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA))
 			return VM_FAULT_SIGBUS;
-		pfn = __phys_to_pfn(__pa_symbol(vdso_k_arch_data)) +
-			vmf->pgoff - VDSO_ARCH_PAGES_START;
+		page = virt_to_page(vdso_k_arch_data) + vmf->pgoff - VDSO_ARCH_PAGES_START;
 		break;
 	default:
 		return VM_FAULT_SIGBUS;
 	}
 
-	return vmf_insert_pfn(vma, vmf->address, pfn);
+	get_page(page);
+	vmf->page = page;
+	return 0;
 }
 
 const struct vm_special_mapping vdso_vvar_mapping = {
@@ -101,7 +129,7 @@ struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned
 {
 	return _install_special_mapping(mm, addr, VDSO_NR_PAGES * PAGE_SIZE,
 					VM_READ | VM_MAYREAD | VM_IO | VM_DONTDUMP |
-					VM_PFNMAP | VM_SEALED_SYSMAP,
+					VM_MIXEDMAP | VM_SEALED_SYSMAP,
 					&vdso_vvar_mapping);
 }
 
-- 
cgit v1.2.3


From c453b9abb4f422461c1493ef74d63af0961a2d30 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 4 Mar 2026 08:49:11 +0100
Subject: clocksource: Remove ARCH_CLOCKSOURCE_DATA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After sparc64, there are no remaining users of ARCH_CLOCKSOURCE_DATA
and it can just be removed.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Andreas Larsson <andreas@gaisler.com>
Reviewed-by: Andreas Larsson <andreas@gaisler.com>
Acked-by: John Stultz <jstultz@google.com>
Link: https://patch.msgid.link/20260304-vdso-sparc64-generic-2-v6-14-d8eb3b0e1410@linutronix.de

[Thomas: drop sparc64 bits from the patch]
---
 include/linux/clocksource.h | 6 +-----
 kernel/time/Kconfig         | 4 ----
 2 files changed, 1 insertion(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 65b7c41471c3..12d853b18832 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -25,8 +25,7 @@ struct clocksource_base;
 struct clocksource;
 struct module;
 
-#if defined(CONFIG_ARCH_CLOCKSOURCE_DATA) || \
-    defined(CONFIG_GENERIC_GETTIMEOFDAY)
+#if defined(CONFIG_GENERIC_GETTIMEOFDAY)
 #include <asm/clocksource.h>
 #endif
 
@@ -106,9 +105,6 @@ struct clocksource {
 	u64			max_idle_ns;
 	u32			maxadj;
 	u32			uncertainty_margin;
-#ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
-	struct arch_clocksource_data archdata;
-#endif
 	u64			max_cycles;
 	u64			max_raw_delta;
 	const char		*name;
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 7c6a52f7836c..fe3311877097 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -9,10 +9,6 @@
 config CLOCKSOURCE_WATCHDOG
 	bool
 
-# Architecture has extra clocksource data
-config ARCH_CLOCKSOURCE_DATA
-	bool
-
 # Architecture has extra clocksource init called from registration
 config ARCH_CLOCKSOURCE_INIT
 	bool
-- 
cgit v1.2.3


From 282b8eec8a4eab9a3ff3addf6dad2ce699594fe8 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Tue, 10 Feb 2026 13:22:08 +0100
Subject: net: cdc-ncm: cleanup device descriptor

Flags are boolean values, hence they should be typed
as bool, not as u8.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Link: https://patch.msgid.link/20260210122208.29244-1-oneukum@suse.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/cdc_ncm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 4ac082a63173..97ef37a1ff4a 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -118,8 +118,8 @@ struct cdc_ncm_ctx {
 
 	u32 timer_interval;
 	u32 max_ndp_size;
-	u8 is_ndp16;
-	u8 filtering_supported;
+	bool is_ndp16;
+	bool filtering_supported;
 	union {
 		struct usb_cdc_ncm_ndp16 *delayed_ndp16;
 		struct usb_cdc_ncm_ndp32 *delayed_ndp32;
-- 
cgit v1.2.3


From b558a9cc107287bd49bd9256e5d965afa80acfd6 Mon Sep 17 00:00:00 2001
From: Amit Sunil Dhamne <amitsd@google.com>
Date: Mon, 23 Feb 2026 20:05:38 +0000
Subject: usb: typec: tcpm: add support for Sink Cap Extended msg response

Add support for responding to Sink Cap Extended msg request. To achieve
this, include parsing support for DT properties related to Sink Cap
Extended. The request for Sink Cap Ext is a control message while the
response is an extended message (chunked). As the Sink Caps Extended
Data Block size (24 Byte) is less than MaxExtendedMsgChunkLen (26 Byte),
a single chunk is sufficient to complete this AMS.

Supporting sink cap extended messages while responding to a
Get_Sink_Caps_Extended request when port is in Sink role is required in
order to be compliant with at least USB PD Rev3.1 Ver1.8.

Signed-off-by: Amit Sunil Dhamne <amitsd@google.com>
Reviewed-by: Badhri Jagan Sridharan <badhri@google.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://patch.msgid.link/20260223-skedb-v2-2-60675765bc7e@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 253 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/usb/pd.h        |  82 +++++++++++++-
 2 files changed, 332 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 3295d804cf87..5ea0b0e99e4d 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -12,6 +12,7 @@
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
+#include <linux/minmax.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/power_supply.h>
@@ -188,7 +189,8 @@
 	S(STRUCTURED_VDMS),			\
 	S(COUNTRY_INFO),			\
 	S(COUNTRY_CODES),			\
-	S(REVISION_INFORMATION)
+	S(REVISION_INFORMATION),		\
+	S(GETTING_SINK_EXTENDED_CAPABILITIES)
 
 #define GENERATE_ENUM(e)	e
 #define GENERATE_STRING(s)	#s
@@ -229,6 +231,7 @@ enum pd_msg_request {
 	PD_MSG_DATA_SINK_CAP,
 	PD_MSG_DATA_SOURCE_CAP,
 	PD_MSG_DATA_REV,
+	PD_MSG_EXT_SINK_CAP_EXT
 };
 
 enum adev_actions {
@@ -337,6 +340,42 @@ struct pd_timings {
 	u32 snk_bc12_cmpletion_time;
 };
 
+/* Convert microwatt to watt */
+#define UW_TO_W(pow)					((pow) / 1000000)
+
+/*
+ * struct pd_identifier - Contains info about PD identifiers
+ * @vid: Vendor ID (assigned by USB-IF)
+ * @pid: Product ID (assigned by manufacturer)
+ * @xid: Value assigned by USB-IF for product
+ */
+struct pd_identifier {
+	u16 vid;
+	u16 pid;
+	u32 xid;
+};
+
+/*
+ * struct sink_caps_ext_data - Sink extended capability data
+ * @load_step: Indicates the load step slew rate. Value of 0 indicates 150mA/us
+ *             & 1 indicates 500 mA/us
+ * @load_char: Snk overload characteristics
+ * @compliance: Types of sources the sink has been tested & certified on
+ * @modes: Charging caps & power sources supported
+ * @spr_min_pdp: Sink Minimum PDP for SPR mode (in Watts)
+ * @spr_op_pdp: Sink Operational PDP for SPR mode (in Watts)
+ * @spr_max_pdp: Sink Maximum PDP for SPR mode (in Watts)
+ */
+struct sink_caps_ext_data {
+	u8 load_step;
+	u16 load_char;
+	u8 compliance;
+	u8 modes;
+	u8 spr_min_pdp;
+	u8 spr_op_pdp;
+	u8 spr_max_pdp;
+};
+
 struct tcpm_port {
 	struct device *dev;
 
@@ -585,6 +624,9 @@ struct tcpm_port {
 
 	/* Indicates maximum (revision, version) supported */
 	struct pd_revision_info pd_rev;
+
+	struct pd_identifier pd_ident;
+	struct sink_caps_ext_data sink_caps_ext;
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dentry;
 	struct mutex logbuffer_lock;	/* log buffer access lock */
@@ -1367,6 +1409,64 @@ static int tcpm_pd_send_sink_caps(struct tcpm_port *port)
 	return tcpm_pd_transmit(port, TCPC_TX_SOP, &msg);
 }
 
+static int tcpm_pd_send_sink_cap_ext(struct tcpm_port *port)
+{
+	u16 operating_snk_watt = port->operating_snk_mw / 1000;
+	struct sink_caps_ext_data *data = &port->sink_caps_ext;
+	struct pd_identifier *pd_ident = &port->pd_ident;
+	struct sink_caps_ext_msg skedb = {0};
+	struct pd_message msg;
+	u8 data_obj_cnt;
+
+	if (!port->self_powered)
+		data->spr_op_pdp = operating_snk_watt;
+
+	/*
+	 * SPR Sink Minimum PDP indicates the minimum power required to operate
+	 * a sink device in its lowest level of functionality without requiring
+	 * power from the battery. We can use the operating_snk_watt value to
+	 * populate it, as operating_snk_watt indicates device's min operating
+	 * power.
+	 */
+	data->spr_min_pdp = operating_snk_watt;
+
+	if (data->spr_op_pdp < data->spr_min_pdp ||
+	    data->spr_max_pdp < data->spr_op_pdp) {
+		tcpm_log(port,
+			 "Invalid PDP values, Min PDP:%u, Op PDP:%u, Max PDP:%u",
+			 data->spr_min_pdp, data->spr_op_pdp, data->spr_max_pdp);
+		return -EOPNOTSUPP;
+	}
+
+	memset(&msg, 0, sizeof(msg));
+	skedb.vid = cpu_to_le16(pd_ident->vid);
+	skedb.pid = cpu_to_le16(pd_ident->pid);
+	skedb.xid = cpu_to_le32(pd_ident->xid);
+	skedb.skedb_ver = SKEDB_VER_1_0;
+	skedb.load_step = data->load_step;
+	skedb.load_char = cpu_to_le16(data->load_char);
+	skedb.compliance = data->compliance;
+	skedb.modes = data->modes;
+	skedb.spr_min_pdp = data->spr_min_pdp;
+	skedb.spr_op_pdp = data->spr_op_pdp;
+	skedb.spr_max_pdp = data->spr_max_pdp;
+	memcpy(msg.ext_msg.data, &skedb, sizeof(skedb));
+	msg.ext_msg.header = PD_EXT_HDR_LE(sizeof(skedb),
+					   0, /* Denotes if request chunk */
+					   0, /* Chunk Number */
+					   1  /* Chunked */);
+
+	data_obj_cnt = count_chunked_data_objs(sizeof(skedb));
+	msg.header = cpu_to_le16(PD_HEADER(PD_EXT_SINK_CAP_EXT,
+					   port->pwr_role,
+					   port->data_role,
+					   port->negotiated_rev,
+					   port->message_id,
+					   data_obj_cnt,
+					   1 /* Denotes if ext header */));
+	return tcpm_pd_transmit(port, TCPC_TX_SOP, &msg);
+}
+
 static void mod_tcpm_delayed_work(struct tcpm_port *port, unsigned int delay_ms)
 {
 	if (delay_ms) {
@@ -3646,6 +3746,19 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
 					   PD_MSG_CTRL_NOT_SUPP,
 					   NONE_AMS);
 		break;
+	case PD_CTRL_GET_SINK_CAP_EXT:
+		/* This is an unsupported message if port type is SRC */
+		if (port->negotiated_rev >= PD_REV30 &&
+		    port->port_type != TYPEC_PORT_SRC)
+			tcpm_pd_handle_msg(port, PD_MSG_EXT_SINK_CAP_EXT,
+					   GETTING_SINK_EXTENDED_CAPABILITIES);
+		else
+			tcpm_pd_handle_msg(port,
+					   port->negotiated_rev < PD_REV30 ?
+					   PD_MSG_CTRL_REJECT :
+					   PD_MSG_CTRL_NOT_SUPP,
+					   NONE_AMS);
+		break;
 	default:
 		tcpm_pd_handle_msg(port,
 				   port->negotiated_rev < PD_REV30 ?
@@ -3898,6 +4011,16 @@ static bool tcpm_send_queued_message(struct tcpm_port *port)
 					 ret);
 			tcpm_ams_finish(port);
 			break;
+		case PD_MSG_EXT_SINK_CAP_EXT:
+			ret = tcpm_pd_send_sink_cap_ext(port);
+			if (ret == -EOPNOTSUPP)
+				tcpm_pd_send_control(port, PD_CTRL_NOT_SUPP, TCPC_TX_SOP);
+			else if (ret < 0)
+				tcpm_log(port,
+					 "Unable to transmit sink cap extended, ret=%d",
+					 ret);
+			tcpm_ams_finish(port);
+			break;
 		default:
 			break;
 		}
@@ -7282,6 +7405,129 @@ static void tcpm_fw_get_timings(struct tcpm_port *port, struct fwnode_handle *fw
 		port->timings.snk_bc12_cmpletion_time = val;
 }
 
+static void tcpm_fw_get_pd_ident(struct tcpm_port *port)
+{
+	struct pd_identifier *pd_ident = &port->pd_ident;
+	u32 *vdo;
+
+	/* First 3 vdo values contain info regarding USB PID, VID & XID */
+	if (port->nr_snk_vdo >= 3)
+		vdo = port->snk_vdo;
+	else if (port->nr_snk_vdo_v1 >= 3)
+		vdo = port->snk_vdo_v1;
+	else
+		return;
+
+	pd_ident->vid = PD_IDH_VID(vdo[0]);
+	pd_ident->pid = PD_PRODUCT_PID(vdo[2]);
+	pd_ident->xid = PD_CSTAT_XID(vdo[1]);
+	tcpm_log(port, "vid:%#x pid:%#x xid:%#x",
+		 pd_ident->vid, pd_ident->pid, pd_ident->xid);
+}
+
+static void tcpm_parse_snk_pdos(struct tcpm_port *port)
+{
+	struct sink_caps_ext_data *caps = &port->sink_caps_ext;
+	u32 max_mv, max_ma;
+	u8 avs_tier1_pdp, avs_tier2_pdp;
+	int i, pdo_itr;
+	u32 *snk_pdos;
+
+	for (i = 0; i < port->pd_count; ++i) {
+		snk_pdos = port->pd_list[i]->sink_desc.pdo;
+		for (pdo_itr = 0; pdo_itr < PDO_MAX_OBJECTS && snk_pdos[pdo_itr];
+		     ++pdo_itr) {
+			u32 pdo = snk_pdos[pdo_itr];
+			u8 curr_snk_pdp = 0;
+
+			switch (pdo_type(pdo)) {
+			case PDO_TYPE_FIXED:
+				max_mv = pdo_fixed_voltage(pdo);
+				max_ma = pdo_fixed_current(pdo);
+				curr_snk_pdp = UW_TO_W(max_mv * max_ma);
+				break;
+			case PDO_TYPE_BATT:
+				curr_snk_pdp = UW_TO_W(pdo_max_power(pdo));
+				break;
+			case PDO_TYPE_VAR:
+				max_mv = pdo_max_voltage(pdo);
+				max_ma = pdo_max_current(pdo);
+				curr_snk_pdp = UW_TO_W(max_mv * max_ma);
+				break;
+			case PDO_TYPE_APDO:
+				if (pdo_apdo_type(pdo) == APDO_TYPE_PPS) {
+					max_mv = pdo_pps_apdo_max_voltage(pdo);
+					max_ma = pdo_pps_apdo_max_current(pdo);
+					curr_snk_pdp = UW_TO_W(max_mv * max_ma);
+					caps->modes |= SINK_MODE_PPS;
+				} else if (pdo_apdo_type(pdo) ==
+					   APDO_TYPE_SPR_AVS) {
+					avs_tier1_pdp = UW_TO_W(SPR_AVS_TIER1_MAX_VOLT_MV
+						* pdo_spr_avs_apdo_9v_to_15v_max_current_ma(pdo));
+					avs_tier2_pdp = UW_TO_W(SPR_AVS_TIER2_MAX_VOLT_MV
+						* pdo_spr_avs_apdo_15v_to_20v_max_current_ma(pdo));
+					curr_snk_pdp = max(avs_tier1_pdp, avs_tier2_pdp);
+					caps->modes |= SINK_MODE_AVS;
+				}
+				break;
+			default:
+				tcpm_log(port, "Invalid source PDO type, ignoring");
+				continue;
+			}
+
+			caps->spr_max_pdp = max(caps->spr_max_pdp,
+						curr_snk_pdp);
+		}
+	}
+}
+
+static void tcpm_fw_get_sink_caps_ext(struct tcpm_port *port,
+				      struct fwnode_handle *fwnode)
+{
+	struct sink_caps_ext_data *caps = &port->sink_caps_ext;
+	int ret;
+	u32 val;
+
+	/*
+	 * Load step represents the change in current per usec that a given
+	 * source can tolerate while maintaining Vbus within the vSrcValid
+	 * range. For a sink this represents the "preferred" load-step value. It
+	 * can only have 2 values (150 mA/usec or 500 mA/usec) with 150 mA/usec
+	 * being the default.
+	 */
+	ret = fwnode_property_read_u32(fwnode, "sink-load-step", &val);
+	if (!ret)
+		caps->load_step = val == 500 ? 1 : 0;
+
+	fwnode_property_read_u16(fwnode, "sink-load-characteristics",
+				 &caps->load_char);
+	fwnode_property_read_u8(fwnode, "sink-compliance", &caps->compliance);
+	caps->modes = SINK_MODE_VBUS;
+
+	/*
+	 * As per "6.5.13.14" SPR Sink Operational PDP definition, for battery
+	 * powered devices, this value will correspond to the PDP of the
+	 * charging adapter either shipped or recommended for use with it. For
+	 * batteryless sink devices SPR Operational PDP indicates the power
+	 * required to operate all the device's functional modes. Hence, this
+	 * value may be considered equal to port's operating_snk_mw. As
+	 * operating_sink_mw can change as per the pd set used thus, OP PDP
+	 * is determined when populating Sink Caps Extended Data Block.
+	 */
+	if (port->self_powered) {
+		fwnode_property_read_u32(fwnode, "charging-adapter-pdp-milliwatt",
+					 &val);
+		caps->spr_op_pdp = (u8)(val / 1000);
+		caps->modes |= SINK_MODE_BATT;
+	}
+
+	tcpm_parse_snk_pdos(port);
+	tcpm_log(port,
+		 "load-step:%#x load-char:%#x compl:%#x op-pdp:%#x max-pdp:%#x",
+		 caps->load_step, caps->load_char, caps->compliance,
+		 caps->spr_op_pdp, caps->spr_max_pdp);
+}
+
 static int tcpm_fw_get_caps(struct tcpm_port *port, struct fwnode_handle *fwnode)
 {
 	struct fwnode_handle *capabilities, *caps = NULL;
@@ -7455,6 +7701,9 @@ static int tcpm_fw_get_caps(struct tcpm_port *port, struct fwnode_handle *fwnode
 		}
 	}
 
+	if (port->port_type != TYPEC_PORT_SRC)
+		tcpm_fw_get_sink_caps_ext(port, fwnode);
+
 put_caps:
 	if (caps != fwnode)
 		fwnode_handle_put(caps);
@@ -7497,6 +7746,8 @@ static int tcpm_fw_get_snk_vdos(struct tcpm_port *port, struct fwnode_handle *fw
 			return ret;
 	}
 
+	tcpm_fw_get_pd_ident(port);
+
 	return 0;
 }
 
diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h
index 6ccd1b2af993..5a98983195cb 100644
--- a/include/linux/usb/pd.h
+++ b/include/linux/usb/pd.h
@@ -34,7 +34,8 @@ enum pd_ctrl_msg_type {
 	PD_CTRL_FR_SWAP = 19,
 	PD_CTRL_GET_PPS_STATUS = 20,
 	PD_CTRL_GET_COUNTRY_CODES = 21,
-	/* 22-23 Reserved */
+	PD_CTRL_GET_SINK_CAP_EXT = 22,
+	/* 23 Reserved */
 	PD_CTRL_GET_REVISION = 24,
 	/* 25-31 Reserved */
 };
@@ -72,7 +73,8 @@ enum pd_ext_msg_type {
 	PD_EXT_PPS_STATUS = 12,
 	PD_EXT_COUNTRY_INFO = 13,
 	PD_EXT_COUNTRY_CODES = 14,
-	/* 15-31 Reserved */
+	PD_EXT_SINK_CAP_EXT = 15,
+	/* 16-31 Reserved */
 };
 
 #define PD_REV10	0x0
@@ -205,6 +207,72 @@ struct pd_message {
 	};
 } __packed;
 
+/*
+ * count_chunked_data_objs - Helper to calculate number of Data Objects on a 4
+ *   byte boundary.
+ * @size: Size of data block for extended message. Should *not* include extended
+ *   header size.
+ */
+static inline u8 count_chunked_data_objs(u32 size)
+{
+	size += offsetof(struct pd_chunked_ext_message_data, data);
+	return ((size / 4) + (size % 4 ? 1 : 0));
+}
+
+/* Sink Caps Extended Data Block Version */
+#define SKEDB_VER_1_0				1
+
+/* Sink Caps Extended Sink Modes */
+#define SINK_MODE_PPS		BIT(0)
+#define SINK_MODE_VBUS		BIT(1)
+#define SINK_MODE_AC_SUPPLY	BIT(2)
+#define SINK_MODE_BATT		BIT(3)
+#define SINK_MODE_BATT_UL	BIT(4) /* Unlimited battery power supply */
+#define SINK_MODE_AVS		BIT(5)
+
+/**
+ * struct sink_caps_ext_msg - Sink extended capability PD message
+ * @vid: Vendor ID
+ * @pid: Product ID
+ * @xid: Value assigned by USB-IF for product
+ * @fw: Firmware version
+ * @hw: Hardware version
+ * @skedb_ver: Sink Caps Extended Data Block (SKEDB) Version
+ * @load_step: Indicates the load step slew rate.
+ * @load_char: Sink overload characteristics
+ * @compliance: Types of sources the sink has been tested & certified on
+ * @touch_temp: Indicates the IEC standard to which the touch temperature
+ *              conforms to (if applicable).
+ * @batt_info: Indicates number batteries and hot swappable ports
+ * @modes: Charging caps & power sources supported
+ * @spr_min_pdp: Sink Minimum PDP for SPR mode
+ * @spr_op_pdp: Sink Operational PDP for SPR mode
+ * @spr_max_pdp: Sink Maximum PDP for SPR mode
+ * @epr_min_pdp: Sink Minimum PDP for EPR mode
+ * @epr_op_pdp: Sink Operational PDP for EPR mode
+ * @epr_max_pdp: Sink Maximum PDP for EPR mode
+ */
+struct sink_caps_ext_msg {
+	__le16 vid;
+	__le16 pid;
+	__le32 xid;
+	u8 fw;
+	u8 hw;
+	u8 skedb_ver;
+	u8 load_step;
+	__le16 load_char;
+	u8 compliance;
+	u8 touch_temp;
+	u8 batt_info;
+	u8 modes;
+	u8 spr_min_pdp;
+	u8 spr_op_pdp;
+	u8 spr_max_pdp;
+	u8 epr_min_pdp;
+	u8 epr_op_pdp;
+	u8 epr_max_pdp;
+} __packed;
+
 /* PDO: Power Data Object */
 #define PDO_MAX_OBJECTS		7
 
@@ -329,6 +397,11 @@ enum pd_apdo_type {
 #define PDO_SPR_AVS_APDO_9V_TO_15V_MAX_CURR	GENMASK(19, 10)	/* 10mA unit */
 #define PDO_SPR_AVS_APDO_15V_TO_20V_MAX_CURR	GENMASK(9, 0)	/* 10mA unit */
 
+/* SPR AVS has two different current ranges 9V - 15V, 15V - 20V */
+#define SPR_AVS_TIER1_MIN_VOLT_MV		9000
+#define SPR_AVS_TIER1_MAX_VOLT_MV		15000
+#define SPR_AVS_TIER2_MAX_VOLT_MV		20000
+
 static inline enum pd_pdo_type pdo_type(u32 pdo)
 {
 	return (pdo >> PDO_TYPE_SHIFT) & PDO_TYPE_MASK;
@@ -339,6 +412,11 @@ static inline unsigned int pdo_fixed_voltage(u32 pdo)
 	return ((pdo >> PDO_FIXED_VOLT_SHIFT) & PDO_VOLT_MASK) * 50;
 }
 
+static inline unsigned int pdo_fixed_current(u32 pdo)
+{
+	return ((pdo >> PDO_FIXED_CURR_SHIFT) & PDO_CURR_MASK) * 10;
+}
+
 static inline unsigned int pdo_min_voltage(u32 pdo)
 {
 	return ((pdo >> PDO_VAR_MIN_VOLT_SHIFT) & PDO_VOLT_MASK) * 50;
-- 
cgit v1.2.3


From e19eaffc5213fdd6179e849d3032929fae0d8c2c Mon Sep 17 00:00:00 2001
From: Rosen Penev <rosenp@gmail.com>
Date: Thu, 5 Mar 2026 14:44:10 -0800
Subject: mtd: concat: replace alloc + calloc with 1 alloc

A flex array can be used to reduce the allocation to 1.

And actually mtdconcat was using the pointer + 1 trick to point to the
overallocated area. Better alternatives exist.

Signed-off-by: Rosen Penev <rosenp@gmail.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/mtd_virt_concat.c | 8 +-------
 drivers/mtd/mtdconcat.c       | 5 +----
 include/linux/mtd/concat.h    | 2 +-
 3 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtd_virt_concat.c b/drivers/mtd/mtd_virt_concat.c
index 72689545e48e..37075ead0f33 100644
--- a/drivers/mtd/mtd_virt_concat.c
+++ b/drivers/mtd/mtd_virt_concat.c
@@ -182,18 +182,12 @@ static int mtd_virt_concat_create_item(struct device_node *parts,
 	for (i = 1; i < count; i++)
 		item->nodes[i] = of_parse_phandle(parts, CONCAT_PROP, (i - 1));
 
-	concat = kzalloc(sizeof(*concat), GFP_KERNEL);
+	concat = kzalloc_flex(*concat, subdev, count, GFP_KERNEL);
 	if (!concat) {
 		kfree(item);
 		return -ENOMEM;
 	}
 
-	concat->subdev = kcalloc(count, sizeof(*concat->subdev), GFP_KERNEL);
-	if (!concat->subdev) {
-		kfree(item);
-		kfree(concat);
-		return -ENOMEM;
-	}
 	item->concat = concat;
 
 	list_add_tail(&item->head, &concat_node_list);
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 241d15235d01..c97167d51fe2 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -627,7 +627,6 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
 				   const char *name)
 {				/* name for the new device   */
 	int i;
-	size_t size;
 	struct mtd_concat *concat;
 	struct mtd_info *subdev_master = NULL;
 	uint32_t max_erasesize, curr_erasesize;
@@ -640,15 +639,13 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
 	printk(KERN_NOTICE "into device \"%s\"\n", name);
 
 	/* allocate the device structure */
-	size = SIZEOF_STRUCT_MTD_CONCAT(num_devs);
-	concat = kzalloc(size, GFP_KERNEL);
+	concat = kzalloc_flex(*concat, subdev, num_devs, GFP_KERNEL);
 	if (!concat) {
 		printk
 		    ("memory allocation error while creating concatenated device \"%s\"\n",
 		     name);
 		return NULL;
 	}
-	concat->subdev = (struct mtd_info **) (concat + 1);
 
 	/*
 	 * Set up the new "super" device's MTD object structure, check for
diff --git a/include/linux/mtd/concat.h b/include/linux/mtd/concat.h
index 2cd9d48958a8..f8d4d6ac1fc1 100644
--- a/include/linux/mtd/concat.h
+++ b/include/linux/mtd/concat.h
@@ -18,7 +18,7 @@
 struct mtd_concat {
 	struct mtd_info mtd;
 	int num_subdev;
-	struct mtd_info **subdev;
+	struct mtd_info *subdev[];
 };
 
 struct mtd_info *mtd_concat_create(
-- 
cgit v1.2.3


From 77dd8adabbc8ff845177b460de48b9d2cd579966 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 9 Mar 2026 13:52:22 +0100
Subject: efi: Drop unused efi_range_is_wc() function

efi_range_is_wc() has no callers, so remove it.

Reviewed-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/efi.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 664898d09ff5..72e76ec54641 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -832,27 +832,6 @@ extern int __init parse_efi_signature_list(
 	const void *data, size_t size,
 	efi_element_handler_t (*get_handler_for_guid)(const efi_guid_t *));
 
-/**
- * efi_range_is_wc - check the WC bit on an address range
- * @start: starting kvirt address
- * @len: length of range
- *
- * Consult the EFI memory map and make sure it's ok to set this range WC.
- * Returns true or false.
- */
-static inline int efi_range_is_wc(unsigned long start, unsigned long len)
-{
-	unsigned long i;
-
-	for (i = 0; i < len; i += (1UL << EFI_PAGE_SHIFT)) {
-		unsigned long paddr = __pa(start + i);
-		if (!(efi_mem_attributes(paddr) & EFI_MEMORY_WC))
-			return 0;
-	}
-	/* The range checked out */
-	return 1;
-}
-
 /*
  * We play games with efi_enabled so that the compiler will, if
  * possible, remove EFI-related code altogether.
-- 
cgit v1.2.3


From 12ae2c81b21cfaa193db2faf035d495807edc3a7 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 26 Feb 2026 14:50:59 +0100
Subject: clone: add CLONE_AUTOREAP

Add a new clone3() flag CLONE_AUTOREAP that makes a child process
auto-reap on exit without ever becoming a zombie. This is a per-process
property in contrast to the existing auto-reap mechanism via
SA_NOCLDWAIT or SIG_IGN for SIGCHLD which applies to all children of a
given parent.

Currently the only way to automatically reap children is to set
SA_NOCLDWAIT or SIG_IGN on SIGCHLD. This is a parent-scoped property
affecting all children which makes it unsuitable for libraries or
applications that need selective auto-reaping of specific children while
still being able to wait() on others.

CLONE_AUTOREAP stores an autoreap flag in the child's signal_struct.
When the child exits do_notify_parent() checks this flag and causes
exit_notify() to transition the task directly to EXIT_DEAD. Since the
flag lives on the child it survives reparenting: if the original parent
exits and the child is reparented to a subreaper or init the child still
auto-reaps when it eventually exits.

CLONE_AUTOREAP can be combined with CLONE_PIDFD to allow the parent to
monitor the child's exit via poll() and retrieve exit status via
PIDFD_GET_INFO. Without CLONE_PIDFD it provides a fire-and-forget
pattern where the parent simply doesn't care about the child's exit
status. No exit signal is delivered so exit_signal must be zero.

CLONE_AUTOREAP is rejected in combination with CLONE_PARENT. If a
CLONE_AUTOREAP child were to clone(CLONE_PARENT) the new grandchild
would inherit exit_signal == 0 from the autoreap parent's group leader
but without signal->autoreap. This grandchild would become a zombie that
never sends a signal and is never autoreaped - confusing and arguably
broken behavior.

The flag is not inherited by the autoreap process's own children. Each
child that should be autoreaped must be explicitly created with
CLONE_AUTOREAP.

Link: https://github.com/uapi-group/kernel-features/issues/45
Link: https://patch.msgid.link/20260226-work-pidfs-autoreap-v5-1-d148b984a989@kernel.org
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/sched/signal.h |  1 +
 include/uapi/linux/sched.h   |  5 +++--
 kernel/fork.c                | 17 ++++++++++++++++-
 kernel/ptrace.c              |  3 ++-
 kernel/signal.c              |  4 ++++
 5 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index a22248aebcf9..f842c86b806f 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -132,6 +132,7 @@ struct signal_struct {
 	 */
 	unsigned int		is_child_subreaper:1;
 	unsigned int		has_child_subreaper:1;
+	unsigned int		autoreap:1;
 
 #ifdef CONFIG_POSIX_TIMERS
 
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 359a14cc76a4..69f7b4f9eb0c 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -34,8 +34,9 @@
 #define CLONE_IO		0x80000000	/* Clone io context */
 
 /* Flags for the clone3() syscall. */
-#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
-#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
+#define CLONE_CLEAR_SIGHAND	(1ULL << 32) /* Clear any signal handler and reset to SIG_DFL. */
+#define CLONE_INTO_CGROUP	(1ULL << 33) /* Clone into a specific cgroup given the right permissions. */
+#define CLONE_AUTOREAP		(1ULL << 34) /* Auto-reap child on exit. */
 
 /*
  * cloning flags intersect with CSIGNAL so can be used with unshare and clone3
diff --git a/kernel/fork.c b/kernel/fork.c
index e832da9d15a4..10549574fda6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2028,6 +2028,18 @@ __latent_entropy struct task_struct *copy_process(
 			return ERR_PTR(-EINVAL);
 	}
 
+	if (clone_flags & CLONE_AUTOREAP) {
+		if (clone_flags & CLONE_THREAD)
+			return ERR_PTR(-EINVAL);
+		if (clone_flags & CLONE_PARENT)
+			return ERR_PTR(-EINVAL);
+		if (args->exit_signal)
+			return ERR_PTR(-EINVAL);
+	}
+
+	if ((clone_flags & CLONE_PARENT) && current->signal->autoreap)
+		return ERR_PTR(-EINVAL);
+
 	/*
 	 * Force any signals received before this point to be delivered
 	 * before the fork happens.  Collect up signals sent to multiple
@@ -2435,6 +2447,8 @@ __latent_entropy struct task_struct *copy_process(
 			 */
 			p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper ||
 							 p->real_parent->signal->is_child_subreaper;
+			if (clone_flags & CLONE_AUTOREAP)
+				p->signal->autoreap = 1;
 			list_add_tail(&p->sibling, &p->real_parent->children);
 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
 			attach_pid(p, PIDTYPE_TGID);
@@ -2897,7 +2911,8 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs)
 {
 	/* Verify that no unknown flags are passed along. */
 	if (kargs->flags &
-	    ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP))
+	    ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP |
+	      CLONE_AUTOREAP))
 		return false;
 
 	/*
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 392ec2f75f01..68c17daef8d4 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -549,7 +549,8 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
 	if (!dead && thread_group_empty(p)) {
 		if (!same_thread_group(p->real_parent, tracer))
 			dead = do_notify_parent(p, p->exit_signal);
-		else if (ignoring_children(tracer->sighand)) {
+		else if (ignoring_children(tracer->sighand) ||
+			 p->signal->autoreap) {
 			__wake_up_parent(p, tracer);
 			dead = true;
 		}
diff --git a/kernel/signal.c b/kernel/signal.c
index d65d0fe24bfb..e61f39fa8c8a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2251,6 +2251,10 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 		if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
 			sig = 0;
 	}
+	if (!tsk->ptrace && tsk->signal->autoreap) {
+		autoreap = true;
+		sig = 0;
+	}
 	/*
 	 * Send with __send_signal as si_pid and si_uid are in the
 	 * parent's namespaces.
-- 
cgit v1.2.3


From 4ef420b3450026b56807e5d53001f80eb495403c Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeel.butt@linux.dev>
Date: Tue, 10 Mar 2026 18:01:01 -0700
Subject: cgroup: replace global cgroup_file_kn_lock with per-cgroup_file lock

Replace the global cgroup_file_kn_lock with a per-cgroup_file spinlock
to eliminate cross-cgroup contention as it is not really protecting
data shared between different cgroups.

The lock is initialized in cgroup_add_file() alongside timer_setup().
No lock acquisition is needed during initialization since the cgroup
directory is being populated under cgroup_mutex and no concurrent
accessors exist at that point.

Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h |  1 +
 kernel/cgroup/cgroup.c      | 24 ++++++++----------------
 2 files changed, 9 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index bb92f5c169ca..ba26b5d05ce3 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -167,6 +167,7 @@ struct cgroup_file {
 	struct kernfs_node *kn;
 	unsigned long notified_at;
 	struct timer_list notify_timer;
+	spinlock_t lock;
 };
 
 /*
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index d161bcaa68f1..6f58efeb9016 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -99,12 +99,6 @@ static bool cgroup_debug __read_mostly;
  */
 static DEFINE_SPINLOCK(cgroup_idr_lock);
 
-/*
- * Protects cgroup_file->kn for !self csses.  It synchronizes notifications
- * against file removal/re-creation across css hiding.
- */
-static DEFINE_SPINLOCK(cgroup_file_kn_lock);
-
 DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
 
 #define cgroup_assert_mutex_or_rcu_locked()				\
@@ -1693,9 +1687,9 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
 		struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
 		struct cgroup_file *cfile = (void *)css + cft->file_offset;
 
-		spin_lock_irq(&cgroup_file_kn_lock);
+		spin_lock_irq(&cfile->lock);
 		WRITE_ONCE(cfile->kn, NULL);
-		spin_unlock_irq(&cgroup_file_kn_lock);
+		spin_unlock_irq(&cfile->lock);
 
 		timer_delete_sync(&cfile->notify_timer);
 	}
@@ -4373,10 +4367,8 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
 		struct cgroup_file *cfile = (void *)css + cft->file_offset;
 
 		timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
-
-		spin_lock_irq(&cgroup_file_kn_lock);
-		WRITE_ONCE(cfile->kn, kn);
-		spin_unlock_irq(&cgroup_file_kn_lock);
+		spin_lock_init(&cfile->lock);
+		cfile->kn = kn;
 	}
 
 	return 0;
@@ -4645,13 +4637,13 @@ void cgroup_file_notify(struct cgroup_file *cfile)
 			return;
 	}
 
-	spin_lock_irqsave(&cgroup_file_kn_lock, flags);
+	spin_lock_irqsave(&cfile->lock, flags);
 	if (cfile->kn) {
 		kn = cfile->kn;
 		kernfs_get(kn);
 		WRITE_ONCE(cfile->notified_at, jiffies);
 	}
-	spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
+	spin_unlock_irqrestore(&cfile->lock, flags);
 
 	if (kn) {
 		kernfs_notify(kn);
@@ -4669,10 +4661,10 @@ void cgroup_file_show(struct cgroup_file *cfile, bool show)
 {
 	struct kernfs_node *kn;
 
-	spin_lock_irq(&cgroup_file_kn_lock);
+	spin_lock_irq(&cfile->lock);
 	kn = cfile->kn;
 	kernfs_get(kn);
-	spin_unlock_irq(&cgroup_file_kn_lock);
+	spin_unlock_irq(&cfile->lock);
 
 	if (kn)
 		kernfs_show(kn, show);
-- 
cgit v1.2.3


From 49b76317592ecbaefd0969d51d02019966cc994b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 1 Mar 2026 16:52:37 -0800
Subject: sched/wait: correct kernel-doc descriptions

Use the correct function name and function parameter name to avoid
these kernel-doc warnings:

Warning: include/linux/wait_bit.h:424 expecting prototype for
 wait_var_event_killable(). Prototype was for
 wait_var_event_interruptible() instead
Warning: include/linux/wait_bit.h:508 function parameter 'lock' not
 described in 'wait_var_event_mutex'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260302005237.3473095-1-rdunlap@infradead.org
---
 include/linux/wait_bit.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 9e29d79fc790..ace7379d627d 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -406,7 +406,7 @@ do {									\
 			  schedule())
 
 /**
- * wait_var_event_killable - wait for a variable to be updated and notified
+ * wait_var_event_interruptible - wait for a variable to be updated and notified
  * @var: the address of variable being waited on
  * @condition: the condition to wait for
  *
@@ -492,7 +492,7 @@ do {									\
  * wait_var_event_mutex - wait for a variable to be updated under a mutex
  * @var: the address of the variable being waited on
  * @condition: condition to wait for
- * @mutex: the mutex which protects updates to the variable
+ * @lock: the mutex which protects updates to the variable
  *
  * Wait for a condition which can only be reliably tested while holding
  * a mutex.  The variables assessed in the condition will normal be
-- 
cgit v1.2.3


From 754e38d2d1aeeadddac5220f34e07cf263502a46 Mon Sep 17 00:00:00 2001
From: "Thomas Weißschuh (Schneider Electric)"
 <thomas.weissschuh@linutronix.de>
Date: Wed, 11 Mar 2026 11:15:11 +0100
Subject: tracing: Use explicit array size instead of sentinel elements in
 symbol printing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sentinel value added by the wrapper macros __print_symbolic() et al
prevents the callers from adding their own trailing comma. This makes
constructing symbol list dynamically based on kconfig values tedious.

Drop the sentinel elements, so callers can either specify the trailing
comma or not, just like in regular array initializers.

Signed-off-by: Thomas Weißschuh (Schneider Electric) <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260311-hrtimer-cleanups-v1-2-095357392669@linutronix.de
---
 include/linux/trace_events.h               | 13 ++++++----
 include/trace/stages/stage3_trace_output.h | 40 +++++++++++++++---------------
 kernel/trace/trace_events_synth.c          |  4 +--
 kernel/trace/trace_output.c                | 20 +++++++++------
 kernel/trace/trace_syscalls.c              |  3 +--
 5 files changed, 43 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 37eb2f0f3dd8..40a43a4c7caf 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -22,20 +22,23 @@ union bpf_attr;
 
 const char *trace_print_flags_seq(struct trace_seq *p, const char *delim,
 				  unsigned long flags,
-				  const struct trace_print_flags *flag_array);
+				  const struct trace_print_flags *flag_array,
+				  size_t flag_array_size);
 
 const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
-				    const struct trace_print_flags *symbol_array);
+				    const struct trace_print_flags *symbol_array,
+				    size_t symbol_array_size);
 
 #if BITS_PER_LONG == 32
 const char *trace_print_flags_seq_u64(struct trace_seq *p, const char *delim,
 		      unsigned long long flags,
-		      const struct trace_print_flags_u64 *flag_array);
+		      const struct trace_print_flags_u64 *flag_array,
+		      size_t flag_array_size);
 
 const char *trace_print_symbols_seq_u64(struct trace_seq *p,
 					unsigned long long val,
-					const struct trace_print_flags_u64
-								 *symbol_array);
+					const struct trace_print_flags_u64 *symbol_array,
+					size_t symbol_array_size);
 #endif
 
 struct trace_iterator;
diff --git a/include/trace/stages/stage3_trace_output.h b/include/trace/stages/stage3_trace_output.h
index fce85ea2df1c..b7d8ef4b9fe1 100644
--- a/include/trace/stages/stage3_trace_output.h
+++ b/include/trace/stages/stage3_trace_output.h
@@ -64,36 +64,36 @@
 #define __get_rel_sockaddr(field)	((struct sockaddr *)__get_rel_dynamic_array(field))
 
 #undef __print_flags
-#define __print_flags(flag, delim, flag_array...)			\
-	({								\
-		static const struct trace_print_flags __flags[] =	\
-			{ flag_array, { -1, NULL }};			\
-		trace_print_flags_seq(p, delim, flag, __flags);	\
+#define __print_flags(flag, delim, flag_array...)					\
+	({										\
+		static const struct trace_print_flags __flags[] =			\
+			{ flag_array };							\
+		trace_print_flags_seq(p, delim, flag, __flags, ARRAY_SIZE(__flags));	\
 	})
 
 #undef __print_symbolic
-#define __print_symbolic(value, symbol_array...)			\
-	({								\
-		static const struct trace_print_flags symbols[] =	\
-			{ symbol_array, { -1, NULL }};			\
-		trace_print_symbols_seq(p, value, symbols);		\
+#define __print_symbolic(value, symbol_array...)					\
+	({										\
+		static const struct trace_print_flags symbols[] =			\
+			{ symbol_array };						\
+		trace_print_symbols_seq(p, value, symbols, ARRAY_SIZE(symbols));	\
 	})
 
 #undef __print_flags_u64
 #undef __print_symbolic_u64
 #if BITS_PER_LONG == 32
-#define __print_flags_u64(flag, delim, flag_array...)			\
-	({								\
-		static const struct trace_print_flags_u64 __flags[] =	\
-			{ flag_array, { -1, NULL } };			\
-		trace_print_flags_seq_u64(p, delim, flag, __flags);	\
+#define __print_flags_u64(flag, delim, flag_array...)						\
+	({											\
+		static const struct trace_print_flags_u64 __flags[] =				\
+			{ flag_array };								\
+		trace_print_flags_seq_u64(p, delim, flag, __flags, ARRAY_SIZE(__flags));	\
 	})
 
-#define __print_symbolic_u64(value, symbol_array...)			\
-	({								\
-		static const struct trace_print_flags_u64 symbols[] =	\
-			{ symbol_array, { -1, NULL } };			\
-		trace_print_symbols_seq_u64(p, value, symbols);	\
+#define __print_symbolic_u64(value, symbol_array...)					\
+	({										\
+		static const struct trace_print_flags_u64 symbols[] =			\
+			{ symbol_array };						\
+		trace_print_symbols_seq_u64(p, value, symbols, ARRAY_SIZE(symbols));	\
 	})
 #else
 #define __print_flags_u64(flag, delim, flag_array...)			\
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 8bb95b2a6fcf..39ac4eba0702 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -395,7 +395,7 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
 			n_u64++;
 		} else {
 			struct trace_print_flags __flags[] = {
-			    __def_gfpflag_names, {-1, NULL} };
+			    __def_gfpflag_names };
 			char *space = (i == se->n_fields - 1 ? "" : " ");
 
 			print_synth_event_num_val(s, print_fmt,
@@ -408,7 +408,7 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
 				trace_seq_puts(s, " (");
 				trace_print_flags_seq(s, "|",
 						      entry->fields[n_u64].as_u64,
-						      __flags);
+						      __flags, ARRAY_SIZE(__flags));
 				trace_seq_putc(s, ')');
 			}
 			n_u64++;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 1996d7aba038..96e2d22b4364 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -69,14 +69,15 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
 const char *
 trace_print_flags_seq(struct trace_seq *p, const char *delim,
 		      unsigned long flags,
-		      const struct trace_print_flags *flag_array)
+		      const struct trace_print_flags *flag_array,
+		      size_t flag_array_size)
 {
 	unsigned long mask;
 	const char *str;
 	const char *ret = trace_seq_buffer_ptr(p);
 	int i, first = 1;
 
-	for (i = 0;  flag_array[i].name && flags; i++) {
+	for (i = 0; i < flag_array_size && flags; i++) {
 
 		mask = flag_array[i].mask;
 		if ((flags & mask) != mask)
@@ -106,12 +107,13 @@ EXPORT_SYMBOL(trace_print_flags_seq);
 
 const char *
 trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
-			const struct trace_print_flags *symbol_array)
+			const struct trace_print_flags *symbol_array,
+			size_t symbol_array_size)
 {
 	int i;
 	const char *ret = trace_seq_buffer_ptr(p);
 
-	for (i = 0;  symbol_array[i].name; i++) {
+	for (i = 0; i < symbol_array_size; i++) {
 
 		if (val != symbol_array[i].mask)
 			continue;
@@ -133,14 +135,15 @@ EXPORT_SYMBOL(trace_print_symbols_seq);
 const char *
 trace_print_flags_seq_u64(struct trace_seq *p, const char *delim,
 		      unsigned long long flags,
-		      const struct trace_print_flags_u64 *flag_array)
+		      const struct trace_print_flags_u64 *flag_array,
+		      size_t flag_array_size)
 {
 	unsigned long long mask;
 	const char *str;
 	const char *ret = trace_seq_buffer_ptr(p);
 	int i, first = 1;
 
-	for (i = 0;  flag_array[i].name && flags; i++) {
+	for (i = 0; i < flag_array_size && flags; i++) {
 
 		mask = flag_array[i].mask;
 		if ((flags & mask) != mask)
@@ -170,12 +173,13 @@ EXPORT_SYMBOL(trace_print_flags_seq_u64);
 
 const char *
 trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
-			 const struct trace_print_flags_u64 *symbol_array)
+			    const struct trace_print_flags_u64 *symbol_array,
+			    size_t symbol_array_size)
 {
 	int i;
 	const char *ret = trace_seq_buffer_ptr(p);
 
-	for (i = 0;  symbol_array[i].name; i++) {
+	for (i = 0; i < symbol_array_size; i++) {
 
 		if (val != symbol_array[i].mask)
 			continue;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 37317b81fcda..8ad72e17d8eb 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -174,7 +174,6 @@ sys_enter_openat_print(struct syscall_trace_enter *trace, struct syscall_metadat
 			{ O_NOFOLLOW, "O_NOFOLLOW" },
 			{ O_NOATIME, "O_NOATIME" },
 			{ O_CLOEXEC, "O_CLOEXEC" },
-			{ -1, NULL }
 		};
 
 	trace_seq_printf(s, "%s(", entry->name);
@@ -205,7 +204,7 @@ sys_enter_openat_print(struct syscall_trace_enter *trace, struct syscall_metadat
 				trace_seq_puts(s, "O_RDONLY|");
 			}
 
-			trace_print_flags_seq(s, "|", bits, __flags);
+			trace_print_flags_seq(s, "|", bits, __flags, ARRAY_SIZE(__flags));
 			/*
 			 * trace_print_flags_seq() adds a '\0' to the
 			 * buffer, but this needs to append more to the seq.
-- 
cgit v1.2.3


From 8ef2807042d0886a85bbcb0aba1a2a277680dc4a Mon Sep 17 00:00:00 2001
From: "Thomas Weißschuh (Schneider Electric)"
 <thomas.weissschuh@linutronix.de>
Date: Wed, 11 Mar 2026 11:15:15 +0100
Subject: hrtimer: Remove hrtimer_get_expires_ns()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are no users left.

Signed-off-by: Thomas Weißschuh (Schneider Electric) <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260311-hrtimer-cleanups-v1-6-095357392669@linutronix.de
---
 include/linux/hrtimer.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index c087b7142330..9ced498fefaa 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -116,11 +116,6 @@ static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
 	return timer->_softexpires;
 }
 
-static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
-{
-	return ktime_to_ns(timer->node.expires);
-}
-
 ktime_t hrtimer_cb_get_time(const struct hrtimer *timer);
 
 static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
-- 
cgit v1.2.3


From b94c076dd949426d09e5d415304acb3f951d9069 Mon Sep 17 00:00:00 2001
From: "Thomas Weißschuh (Schneider Electric)"
 <thomas.weissschuh@linutronix.de>
Date: Wed, 11 Mar 2026 11:15:17 +0100
Subject: hrtimer: Drop spurious space in 'enum hrtimer_base_type'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This spurious space makes grepping for the enum definition annoying.

Remove it.

Signed-off-by: Thomas Weißschuh (Schneider Electric) <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260311-hrtimer-cleanups-v1-8-095357392669@linutronix.de
---
 include/linux/hrtimer_defs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index 0f851b2432c3..e6d4dc1b61e0 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -35,7 +35,7 @@ struct hrtimer_clock_base {
 	ktime_t				offset;
 } __hrtimer_clock_base_align;
 
-enum  hrtimer_base_type {
+enum hrtimer_base_type {
 	HRTIMER_BASE_MONOTONIC,
 	HRTIMER_BASE_REALTIME,
 	HRTIMER_BASE_BOOTTIME,
-- 
cgit v1.2.3


From f12ef5cb4e035e15f0c324c41ff402441578ffda Mon Sep 17 00:00:00 2001
From: "Thomas Weißschuh (Schneider Electric)"
 <thomas.weissschuh@linutronix.de>
Date: Wed, 11 Mar 2026 11:15:19 +0100
Subject: hrtimer: Mark index and clockid of clock base as const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These fields are initialized once and are never supposed to change.

Mark them as const to make this explicit.

Signed-off-by: Thomas Weißschuh (Schneider Electric) <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260311-hrtimer-cleanups-v1-10-095357392669@linutronix.de
---
 include/linux/hrtimer_defs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index e6d4dc1b61e0..a03240c0b14f 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -26,8 +26,8 @@
  */
 struct hrtimer_clock_base {
 	struct hrtimer_cpu_base		*cpu_base;
-	unsigned int			index;
-	clockid_t			clockid;
+	const unsigned int		index;
+	const clockid_t			clockid;
 	seqcount_raw_spinlock_t		seq;
 	ktime_t				expires_next;
 	struct hrtimer			*running;
-- 
cgit v1.2.3


From f27fc117cf8fba56e0619694e685f9bca9b9cb82 Mon Sep 17 00:00:00 2001
From: "Thomas Weißschuh (Schneider Electric)"
 <thomas.weissschuh@linutronix.de>
Date: Wed, 11 Mar 2026 11:15:20 +0100
Subject: hrtimer: Remove trailing comma after HRTIMER_MAX_CLOCK_BASES
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

HRTIMER_MAX_CLOCK_BASES is required to stay the last value of the enum.

Drop the trailing comma so no new members are added after it by mistake.

Signed-off-by: Thomas Weißschuh (Schneider Electric) <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260311-hrtimer-cleanups-v1-11-095357392669@linutronix.de
---
 include/linux/hrtimer_defs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index a03240c0b14f..52ed9e46ff13 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -44,7 +44,7 @@ enum hrtimer_base_type {
 	HRTIMER_BASE_REALTIME_SOFT,
 	HRTIMER_BASE_BOOTTIME_SOFT,
 	HRTIMER_BASE_TAI_SOFT,
-	HRTIMER_MAX_CLOCK_BASES,
+	HRTIMER_MAX_CLOCK_BASES
 };
 
 /**
-- 
cgit v1.2.3


From 6f459eda8b60382efa0da2ca025c26a2018adc87 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 10 Mar 2026 12:44:51 +0000
Subject: tcp: add tcp_release_cb_cond() helper

Majority of tcp_release_cb() calls do nothing at all.

Provide tcp_release_cb_cond() helper so that release_sock()
can avoid these calls.

Also hint the compiler that __release_sock() and wake_up()
are rarely called.

$ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-77 (-77)
Function                                     old     new   delta
release_sock                                 258     181     -77
Total: Before=25235790, After=25235713, chg -0.00%

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260310124451.2280968-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/tcp.h   |  7 +++++++
 include/net/tcp.h     | 14 ++++++++++++++
 net/core/sock.c       | 14 ++++++++------
 net/ipv4/tcp_output.c |  5 -----
 4 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index c44cf9ae8d16..bcebc4f07532 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -548,6 +548,13 @@ enum tsq_flags {
 	TCPF_ACK_DEFERRED		= BIT(TCP_ACK_DEFERRED),
 };
 
+/* Flags of interest for tcp_release_cb() */
+#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED |		\
+			  TCPF_WRITE_TIMER_DEFERRED |	\
+			  TCPF_DELACK_TIMER_DEFERRED |	\
+			  TCPF_MTU_REDUCED_DEFERRED |	\
+			  TCPF_ACK_DEFERRED)
+
 #define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk)
 
 /* Variant of tcp_sk() upgrading a const sock to a read/write tcp socket.
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9f0aee9e5d76..48dffcca0a71 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -375,7 +375,21 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
 int tcp_wmem_schedule(struct sock *sk, int copy);
 void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
 	      int size_goal);
+
 void tcp_release_cb(struct sock *sk);
+
+static inline bool tcp_release_cb_cond(struct sock *sk)
+{
+#ifdef CONFIG_INET
+	if (likely(sk->sk_prot->release_cb == tcp_release_cb)) {
+		if (unlikely(smp_load_acquire(&sk->sk_tsq_flags) & TCP_DEFERRED_ALL))
+			tcp_release_cb(sk);
+		return true;
+	}
+#endif
+	return false;
+}
+
 void tcp_wfree(struct sk_buff *skb);
 void tcp_write_timer_handler(struct sock *sk);
 void tcp_delack_timer_handler(struct sock *sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index f4e2ff23d60e..fdaf66e6dc18 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3807,16 +3807,18 @@ EXPORT_SYMBOL(lock_sock_nested);
 void release_sock(struct sock *sk)
 {
 	spin_lock_bh(&sk->sk_lock.slock);
-	if (sk->sk_backlog.tail)
-		__release_sock(sk);
 
-	if (sk->sk_prot->release_cb)
-		INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
-				     tcp_release_cb, sk);
+	if (unlikely(sk->sk_backlog.tail))
+		__release_sock(sk);
 
+	if (sk->sk_prot->release_cb) {
+		if (!tcp_release_cb_cond(sk))
+			sk->sk_prot->release_cb(sk);
+	}
 	sock_release_ownership(sk);
-	if (waitqueue_active(&sk->sk_lock.wq))
+	if (unlikely(waitqueue_active(&sk->sk_lock.wq)))
 		wake_up(&sk->sk_lock.wq);
+
 	spin_unlock_bh(&sk->sk_lock.slock);
 }
 EXPORT_SYMBOL(release_sock);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a53802f28dd1..34a25ef61006 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1320,11 +1320,6 @@ static void tcp_tsq_workfn(struct work_struct *work)
 	}
 }
 
-#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED |		\
-			  TCPF_WRITE_TIMER_DEFERRED |	\
-			  TCPF_DELACK_TIMER_DEFERRED |	\
-			  TCPF_MTU_REDUCED_DEFERRED |	\
-			  TCPF_ACK_DEFERRED)
 /**
  * tcp_release_cb - tcp release_sock() callback
  * @sk: socket
-- 
cgit v1.2.3


From c670267ff50d5f9beb486f0203cdede580a99ae3 Mon Sep 17 00:00:00 2001
From: Qingfang Deng <dqfext@gmail.com>
Date: Fri, 6 Feb 2026 14:20:03 +0800
Subject: tty: constify tty_ldisc_ops

tty_ldisc_ops is not modified once registered, so make it const.

Signed-off-by: Qingfang Deng <dqfext@gmail.com>
Link: https://patch.msgid.link/20260206062004.1273890-1-dqfext@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_ldisc.c   | 16 ++++++++--------
 include/linux/tty_ldisc.h |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 888f2f8f9481..27fe8236f662 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -44,7 +44,7 @@ enum {
 
 static DEFINE_RAW_SPINLOCK(tty_ldiscs_lock);
 /* Line disc dispatch table */
-static struct tty_ldisc_ops *tty_ldiscs[NR_LDISCS];
+static const struct tty_ldisc_ops *tty_ldiscs[NR_LDISCS];
 
 /**
  * tty_register_ldisc	-	install a line discipline
@@ -55,7 +55,7 @@ static struct tty_ldisc_ops *tty_ldiscs[NR_LDISCS];
  *
  * Locking: takes %tty_ldiscs_lock to guard against ldisc races
  */
-int tty_register_ldisc(struct tty_ldisc_ops *new_ldisc)
+int tty_register_ldisc(const struct tty_ldisc_ops *new_ldisc)
 {
 	unsigned long flags;
 
@@ -80,7 +80,7 @@ EXPORT_SYMBOL(tty_register_ldisc);
  * Locking: takes %tty_ldiscs_lock to guard against ldisc races
  */
 
-void tty_unregister_ldisc(struct tty_ldisc_ops *ldisc)
+void tty_unregister_ldisc(const struct tty_ldisc_ops *ldisc)
 {
 	unsigned long flags;
 
@@ -90,10 +90,10 @@ void tty_unregister_ldisc(struct tty_ldisc_ops *ldisc)
 }
 EXPORT_SYMBOL(tty_unregister_ldisc);
 
-static struct tty_ldisc_ops *get_ldops(int disc)
+static const struct tty_ldisc_ops *get_ldops(int disc)
 {
 	unsigned long flags;
-	struct tty_ldisc_ops *ldops, *ret;
+	const struct tty_ldisc_ops *ldops, *ret;
 
 	raw_spin_lock_irqsave(&tty_ldiscs_lock, flags);
 	ret = ERR_PTR(-EINVAL);
@@ -107,7 +107,7 @@ static struct tty_ldisc_ops *get_ldops(int disc)
 	return ret;
 }
 
-static void put_ldops(struct tty_ldisc_ops *ldops)
+static void put_ldops(const struct tty_ldisc_ops *ldops)
 {
 	unsigned long flags;
 
@@ -139,7 +139,7 @@ int tty_ldisc_autoload = IS_BUILTIN(CONFIG_LDISC_AUTOLOAD);
 static struct tty_ldisc *tty_ldisc_get(struct tty_struct *tty, int disc)
 {
 	struct tty_ldisc *ld;
-	struct tty_ldisc_ops *ldops;
+	const struct tty_ldisc_ops *ldops;
 
 	if (disc < N_TTY || disc >= NR_LDISCS)
 		return ERR_PTR(-EINVAL);
@@ -202,7 +202,7 @@ static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
 static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
 {
 	int i = *(loff_t *)v;
-	struct tty_ldisc_ops *ldops;
+	const struct tty_ldisc_ops *ldops;
 
 	ldops = get_ldops(i);
 	if (IS_ERR(ldops))
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index c5cccc3fc1e8..d227a58e3e49 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -266,7 +266,7 @@ struct tty_ldisc_ops {
 };
 
 struct tty_ldisc {
-	struct tty_ldisc_ops *ops;
+	const struct tty_ldisc_ops *ops;
 	struct tty_struct *tty;
 };
 
@@ -281,8 +281,8 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *);
 
 void tty_ldisc_flush(struct tty_struct *tty);
 
-int tty_register_ldisc(struct tty_ldisc_ops *new_ldisc);
-void tty_unregister_ldisc(struct tty_ldisc_ops *ldisc);
+int tty_register_ldisc(const struct tty_ldisc_ops *new_ldisc);
+void tty_unregister_ldisc(const struct tty_ldisc_ops *ldisc);
 int tty_set_ldisc(struct tty_struct *tty, int disc);
 
 #endif /* _LINUX_TTY_LDISC_H */
-- 
cgit v1.2.3


From 24728b93fafe0949b5353e1a7b3a94175fe26d6e Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 10 Mar 2026 22:23:47 -0700
Subject: serdev: serdev.h: clean up kernel-doc comments

Correct kernel-doc comment format and add a missing to avoid
kernel-doc warnings:

Warning: include/linux/serdev.h:49 struct member 'write_comp' not
 described in 'serdev_device'
Warning: include/linux/serdev.h:49 struct member 'write_lock' not
 described in 'serdev_device'
Warning: include/linux/serdev.h:68 struct member 'shutdown' not described
 in 'serdev_device_driver'
Warning: include/linux/serdev.h:134 function parameter 'serdev' not
 described in 'serdev_device_put'
Warning: include/linux/serdev.h:162 function parameter 'ctrl' not
 described in 'serdev_controller_put'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260311052347.305612-1-rdunlap@infradead.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serdev.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index 5654c58eb73c..090c93c08045 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -37,8 +37,8 @@ struct serdev_device_ops {
  * @nr:		Device number on serdev bus.
  * @ctrl:	serdev controller managing this device.
  * @ops:	Device operations.
- * @write_comp	Completion used by serdev_device_write() internally
- * @write_lock	Lock to serialize access when writing data
+ * @write_comp:	Completion used by serdev_device_write() internally
+ * @write_lock:	Lock to serialize access when writing data
  */
 struct serdev_device {
 	struct device dev;
@@ -60,6 +60,7 @@ static inline struct serdev_device *to_serdev_device(struct device *d)
  *		structure.
  * @probe:	binds this driver to a serdev device.
  * @remove:	unbinds this driver from the serdev device.
+ * @shutdown:	shut down this serdev device.
  */
 struct serdev_device_driver {
 	struct device_driver driver;
@@ -129,7 +130,7 @@ static inline void serdev_device_set_drvdata(struct serdev_device *serdev, void
 
 /**
  * serdev_device_put() - decrement serdev device refcount
- * @serdev	serdev device.
+ * @serdev:	serdev device.
  */
 static inline void serdev_device_put(struct serdev_device *serdev)
 {
@@ -157,7 +158,7 @@ static inline void serdev_controller_set_drvdata(struct serdev_controller *ctrl,
 
 /**
  * serdev_controller_put() - decrement controller refcount
- * @ctrl	serdev controller.
+ * @ctrl:	serdev controller.
  */
 static inline void serdev_controller_put(struct serdev_controller *ctrl)
 {
-- 
cgit v1.2.3


From eb3b0d92c9c39890592cca6647601fe5c631efea Mon Sep 17 00:00:00 2001
From: Xin Zhao <jackzxcui1989@163.com>
Date: Fri, 13 Feb 2026 16:50:39 +0800
Subject: tty: tty_port: add workqueue to flip TTY buffer

On the embedded platform, certain critical data, such as IMU data, is
transmitted through UART. The tty_flip_buffer_push() interface in the TTY
layer uses system_dfl_wq to handle the flipping of the TTY buffer.
Although the unbound workqueue can create new threads on demand and wake
up the kworker thread on an idle CPU, it may be preempted by real-time
tasks or other high-prio tasks.

flush_to_ldisc() needs to wake up the relevant data handle thread. When
executing __wake_up_common_lock(), it calls spin_lock_irqsave(), which
does not disable preemption but disables migration in RT-Linux. This
prevents the kworker thread from being migrated to other cores by CPU's
balancing logic, resulting in long delays. The call trace is as follows:
    __wake_up_common_lock
    __wake_up
    ep_poll_callback
    __wake_up_common
    __wake_up_common_lock
    __wake_up
    n_tty_receive_buf_common
    n_tty_receive_buf2
    tty_ldisc_receive_buf
    tty_port_default_receive_buf
    flush_to_ldisc

In our system, the processing interval for each frame of IMU data
transmitted via UART can experience significant jitter due to this issue.
Instead of the expected 10 to 15 ms frame processing interval, we see
spikes up to 30 to 35 ms. Moreover, in just one or two hours, there can
be 2 to 3 occurrences of such high jitter, which is quite frequent. This
jitter exceeds the software's tolerable limit of 20 ms.

Introduce flip_wq in tty_port which can be set by tty_port_link_wq() or as
default linked to default workqueue allocated when tty_register_driver().
The default workqueue is allocated with flag WQ_SYSFS, so that cpumask and
nice can be set dynamically. The execution timing of tty_port_link_wq() is
not clearly restricted. The newly added function tty_port_link_driver_wq()
checks whether the flip_wq of the tty_port has already been assigned when
linking the default tty_driver's workqueue to the port. After the user has
set a custom workqueue for a certain tty_port using tty_port_link_wq(), the
system will only use this custom workqueue, even if tty_driver does not
have %TTY_DRIVER_NO_WORKQUEUE flag. When tty_port register device, flip_wq
link operation is done by tty_port_link_driver_wq(), but for in-memory
devices the link operation cannot cover all the cases. Although
tty_port_install() is dedicated for in-memory devices lik PTY to link port
allocated on demand, the logic of tty_port_install() is so simple that
people may not call it, vc_cons[0].d->port is one such case. We check the
buf.flip_wq when flip TTY buffer, if buf.flip_wq of TTY port is NULL, use
system_dfl_wq as a backup.

To avoid naming conflict of the default tty_driver's workqueue, using
'"%s-%s", driver->name, driver->driver_name' as the workqueue name. In
cases where driver_name is not specified and therefore is NULL, the
workqueue is not created. Drivers that do not define driver_name are
potentially in-memory devices like vty, which generally do not require
special workqueue settings. Even with the combination of name and
driver_name, the workqueue names can still be duplicated, as many tty
serial drivers use "ttyS" as dev_name and "serial" as driver_name. I
modified the conflicting driver_name of these drivers by appending a
suffix of _xx based on the corresponding .c file. If this modification is
not made, it could not only lead to duplicate workqueue names but also
result in duplicate entries for the /proc/tty/driver/<driver_name> nodes.

Introduce %TTY_DRIVER_NO_WORKQUEUE flag meaning not to create the
default single tty_driver workqueue. Two reasons why need to introduce the
%TTY_DRIVER_NO_WORKQUEUE flag:
1. If the WQ_SYSFS parameter is enabled, workqueue_sysfs_register() will
fail when trying to create a workqueue with the same name. The pty is an
example of this; if both CONFIG_LEGACY_PTYS and CONFIG_UNIX98_PTYS are
enabled, the call to tty_register_driver() in unix98_pty_init() will fail.
2. Different TTY ports may be used for different tasks, which may require
separate core binding control via workqueues. In this case, the workqueue
created by default in the TTY driver is unnecessary. Enabling this flag
prevents the creation of this redundant workqueue.

After applying this patch, we can set the related UART TTY flip buffer
workqueue by sysfs. We set the cpumask to CPU cores associated with the
IMU tasks, and set the nice to -20. Testing has shown significant
improvement in the previously described issue, with almost no stuttering
occurring anymore.

Tested-by: Tommaso Merciai <tommaso.merciai.xr@bp.renesas.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Xin Zhao <jackzxcui1989@163.com>
Link: https://patch.msgid.link/20260213085039.3274704-1-jackzxcui1989@163.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/pty.c                   | 12 ++++++++----
 drivers/tty/serial/8250/8250_core.c |  2 +-
 drivers/tty/serial/apbuart.c        |  2 +-
 drivers/tty/serial/dz.c             |  2 +-
 drivers/tty/serial/ip22zilog.c      |  2 +-
 drivers/tty/serial/zs.c             |  2 +-
 drivers/tty/tty_buffer.c            | 15 +++++++++++----
 drivers/tty/tty_io.c                | 25 ++++++++++++++++++++++++-
 drivers/tty/tty_port.c              | 22 ++++++++++++++++++++++
 include/linux/tty_buffer.h          |  1 +
 include/linux/tty_driver.h          |  7 +++++++
 include/linux/tty_port.h            | 13 +++++++++++++
 12 files changed, 91 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index cb427e93372d..cc7f7091ed9a 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -532,14 +532,16 @@ static void __init legacy_pty_init(void)
 	pty_driver = tty_alloc_driver(legacy_count,
 			TTY_DRIVER_RESET_TERMIOS |
 			TTY_DRIVER_REAL_RAW |
-			TTY_DRIVER_DYNAMIC_ALLOC);
+			TTY_DRIVER_DYNAMIC_ALLOC |
+			TTY_DRIVER_NO_WORKQUEUE);
 	if (IS_ERR(pty_driver))
 		panic("Couldn't allocate pty driver");
 
 	pty_slave_driver = tty_alloc_driver(legacy_count,
 			TTY_DRIVER_RESET_TERMIOS |
 			TTY_DRIVER_REAL_RAW |
-			TTY_DRIVER_DYNAMIC_ALLOC);
+			TTY_DRIVER_DYNAMIC_ALLOC |
+			TTY_DRIVER_NO_WORKQUEUE);
 	if (IS_ERR(pty_slave_driver))
 		panic("Couldn't allocate pty slave driver");
 
@@ -849,7 +851,8 @@ static void __init unix98_pty_init(void)
 			TTY_DRIVER_REAL_RAW |
 			TTY_DRIVER_DYNAMIC_DEV |
 			TTY_DRIVER_DEVPTS_MEM |
-			TTY_DRIVER_DYNAMIC_ALLOC);
+			TTY_DRIVER_DYNAMIC_ALLOC |
+			TTY_DRIVER_NO_WORKQUEUE);
 	if (IS_ERR(ptm_driver))
 		panic("Couldn't allocate Unix98 ptm driver");
 	pts_driver = tty_alloc_driver(NR_UNIX98_PTY_MAX,
@@ -857,7 +860,8 @@ static void __init unix98_pty_init(void)
 			TTY_DRIVER_REAL_RAW |
 			TTY_DRIVER_DYNAMIC_DEV |
 			TTY_DRIVER_DEVPTS_MEM |
-			TTY_DRIVER_DYNAMIC_ALLOC);
+			TTY_DRIVER_DYNAMIC_ALLOC |
+			TTY_DRIVER_NO_WORKQUEUE);
 	if (IS_ERR(pts_driver))
 		panic("Couldn't allocate Unix98 pts driver");
 
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index d2e2c5dfef99..a428e88938eb 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -524,7 +524,7 @@ console_initcall(univ8250_console_init);
 
 struct uart_driver serial8250_reg = {
 	.owner			= THIS_MODULE,
-	.driver_name		= "serial",
+	.driver_name		= "serial_8250",
 	.dev_name		= "ttyS",
 	.major			= TTY_MAJOR,
 	.minor			= 64,
diff --git a/drivers/tty/serial/apbuart.c b/drivers/tty/serial/apbuart.c
index 364599f256db..3e46341cfff8 100644
--- a/drivers/tty/serial/apbuart.c
+++ b/drivers/tty/serial/apbuart.c
@@ -505,7 +505,7 @@ console_initcall(apbuart_console_init);
 
 static struct uart_driver grlib_apbuart_driver = {
 	.owner = THIS_MODULE,
-	.driver_name = "serial",
+	.driver_name = "serial_apbuart",
 	.dev_name = "ttyS",
 	.major = SERIAL_APBUART_MAJOR,
 	.minor = SERIAL_APBUART_MINOR,
diff --git a/drivers/tty/serial/dz.c b/drivers/tty/serial/dz.c
index eba91daedef8..e53c54353c3e 100644
--- a/drivers/tty/serial/dz.c
+++ b/drivers/tty/serial/dz.c
@@ -914,7 +914,7 @@ console_initcall(dz_serial_console_init);
 
 static struct uart_driver dz_reg = {
 	.owner			= THIS_MODULE,
-	.driver_name		= "serial",
+	.driver_name		= "serial_dz",
 	.dev_name		= "ttyS",
 	.major			= TTY_MAJOR,
 	.minor			= 64,
diff --git a/drivers/tty/serial/ip22zilog.c b/drivers/tty/serial/ip22zilog.c
index 6e19c6713849..a69b06893d9e 100644
--- a/drivers/tty/serial/ip22zilog.c
+++ b/drivers/tty/serial/ip22zilog.c
@@ -1015,7 +1015,7 @@ static struct console ip22zilog_console = {
 
 static struct uart_driver ip22zilog_reg = {
 	.owner		= THIS_MODULE,
-	.driver_name	= "serial",
+	.driver_name	= "serial_ip22zilog",
 	.dev_name	= "ttyS",
 	.major		= TTY_MAJOR,
 	.minor		= 64,
diff --git a/drivers/tty/serial/zs.c b/drivers/tty/serial/zs.c
index 79ea7108a0f3..72a3c0d90f40 100644
--- a/drivers/tty/serial/zs.c
+++ b/drivers/tty/serial/zs.c
@@ -1252,7 +1252,7 @@ console_initcall(zs_serial_console_init);
 
 static struct uart_driver zs_reg = {
 	.owner			= THIS_MODULE,
-	.driver_name		= "serial",
+	.driver_name		= "serial_zs",
 	.dev_name		= "ttyS",
 	.major			= TTY_MAJOR,
 	.minor			= 64,
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 79ec953824d5..96be90db53b7 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -59,6 +59,13 @@ void tty_buffer_lock_exclusive(struct tty_port *port)
 }
 EXPORT_SYMBOL_GPL(tty_buffer_lock_exclusive);
 
+static bool tty_buffer_queue_work(struct tty_bufhead *buf)
+{
+	struct workqueue_struct *flip_wq = READ_ONCE(buf->flip_wq);
+
+	return queue_work(flip_wq ?: system_dfl_wq, &buf->work);
+}
+
 /**
  * tty_buffer_unlock_exclusive	-	release exclusive access
  * @port: tty port owning the flip buffer
@@ -76,7 +83,7 @@ void tty_buffer_unlock_exclusive(struct tty_port *port)
 	mutex_unlock(&buf->lock);
 
 	if (restart)
-		queue_work(system_dfl_wq, &buf->work);
+		tty_buffer_queue_work(buf);
 }
 EXPORT_SYMBOL_GPL(tty_buffer_unlock_exclusive);
 
@@ -530,7 +537,7 @@ void tty_flip_buffer_push(struct tty_port *port)
 	struct tty_bufhead *buf = &port->buf;
 
 	tty_flip_buffer_commit(buf->tail);
-	queue_work(system_dfl_wq, &buf->work);
+	tty_buffer_queue_work(buf);
 }
 EXPORT_SYMBOL(tty_flip_buffer_push);
 
@@ -560,7 +567,7 @@ int tty_insert_flip_string_and_push_buffer(struct tty_port *port,
 		tty_flip_buffer_commit(buf->tail);
 	spin_unlock_irqrestore(&port->lock, flags);
 
-	queue_work(system_dfl_wq, &buf->work);
+	tty_buffer_queue_work(buf);
 
 	return size;
 }
@@ -613,7 +620,7 @@ void tty_buffer_set_lock_subclass(struct tty_port *port)
 
 bool tty_buffer_restart_work(struct tty_port *port)
 {
-	return queue_work(system_dfl_wq, &port->buf.work);
+	return tty_buffer_queue_work(&port->buf);
 }
 
 bool tty_buffer_cancel_work(struct tty_port *port)
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index a5d0457e0e28..6b283fd03ff8 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -3443,10 +3443,27 @@ int tty_register_driver(struct tty_driver *driver)
 	if (error < 0)
 		goto err;
 
+	/*
+	 * Drivers that do not define driver_name are potentially in-memory devices
+	 * like vty, which generally do not require special workqueue settings.
+	 */
+	if (!(driver->flags & TTY_DRIVER_NO_WORKQUEUE) && driver->driver_name) {
+		driver->flip_wq = alloc_workqueue("%s-%s", WQ_UNBOUND | WQ_SYSFS,
+						  0, driver->name, driver->driver_name);
+		if (!driver->flip_wq) {
+			error = -ENOMEM;
+			goto err_unreg_char;
+		}
+		for (i = 0; i < driver->num; i++) {
+			if (driver->ports[i])
+				tty_port_link_driver_wq(driver->ports[i], driver);
+		}
+	}
+
 	if (driver->flags & TTY_DRIVER_DYNAMIC_ALLOC) {
 		error = tty_cdev_add(driver, dev, 0, driver->num);
 		if (error)
-			goto err_unreg_char;
+			goto err_destroy_wq;
 	}
 
 	scoped_guard(mutex, &tty_mutex)
@@ -3472,6 +3489,10 @@ err_unreg_devs:
 	scoped_guard(mutex, &tty_mutex)
 		list_del(&driver->tty_drivers);
 
+err_destroy_wq:
+	if (driver->flip_wq)
+		destroy_workqueue(driver->flip_wq);
+
 err_unreg_char:
 	unregister_chrdev_region(dev, driver->num);
 err:
@@ -3491,6 +3512,8 @@ void tty_unregister_driver(struct tty_driver *driver)
 				driver->num);
 	scoped_guard(mutex, &tty_mutex)
 		list_del(&driver->tty_drivers);
+	if (driver->flip_wq)
+		destroy_workqueue(driver->flip_wq);
 }
 EXPORT_SYMBOL(tty_unregister_driver);
 
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index fe67c5cb0a3f..54359310e293 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -99,6 +99,23 @@ void tty_port_init(struct tty_port *port)
 }
 EXPORT_SYMBOL(tty_port_init);
 
+/**
+ * tty_port_link_wq - link tty_port and flip workqueue
+ * @port: tty_port of the device
+ * @flip_wq: workqueue to queue flip buffer work on
+ *
+ * Whenever %TTY_DRIVER_NO_WORKQUEUE is used, every tty_port can be linked to
+ * a workqueue manually by this function.
+ * tty_port will use system_dfl_wq when buf.flip_wq is NULL.
+ *
+ * Note that tty_port API will NOT destroy the workqueue.
+ */
+void tty_port_link_wq(struct tty_port *port, struct workqueue_struct *flip_wq)
+{
+	port->buf.flip_wq = flip_wq;
+}
+EXPORT_SYMBOL_GPL(tty_port_link_wq);
+
 /**
  * tty_port_link_device - link tty and tty_port
  * @port: tty_port of the device
@@ -157,6 +174,7 @@ struct device *tty_port_register_device_attr(struct tty_port *port,
 		const struct attribute_group **attr_grp)
 {
 	tty_port_link_device(port, driver, index);
+	tty_port_link_driver_wq(port, driver);
 	return tty_register_device_attr(driver, index, device, drvdata,
 			attr_grp);
 }
@@ -183,6 +201,7 @@ struct device *tty_port_register_device_attr_serdev(struct tty_port *port,
 	struct device *dev;
 
 	tty_port_link_device(port, driver, index);
+	tty_port_link_driver_wq(port, driver);
 
 	dev = serdev_tty_port_register(port, host, parent, driver, index);
 	if (PTR_ERR(dev) != -ENODEV) {
@@ -210,6 +229,7 @@ void tty_port_unregister_device(struct tty_port *port,
 {
 	int ret;
 
+	WRITE_ONCE(port->buf.flip_wq, NULL);
 	ret = serdev_tty_port_unregister(port);
 	if (ret == 0)
 		return;
@@ -257,6 +277,7 @@ void tty_port_destroy(struct tty_port *port)
 {
 	tty_buffer_cancel_work(port);
 	tty_buffer_free_all(port);
+	WRITE_ONCE(port->buf.flip_wq, NULL);
 }
 EXPORT_SYMBOL(tty_port_destroy);
 
@@ -703,6 +724,7 @@ int tty_port_install(struct tty_port *port, struct tty_driver *driver,
 		struct tty_struct *tty)
 {
 	tty->port = port;
+	tty_port_link_driver_wq(port, driver);
 	return tty_standard_install(driver, tty);
 }
 EXPORT_SYMBOL_GPL(tty_port_install);
diff --git a/include/linux/tty_buffer.h b/include/linux/tty_buffer.h
index 31125e3be3c5..48adcb0e8ff3 100644
--- a/include/linux/tty_buffer.h
+++ b/include/linux/tty_buffer.h
@@ -34,6 +34,7 @@ static inline u8 *flag_buf_ptr(struct tty_buffer *b, unsigned int ofs)
 
 struct tty_bufhead {
 	struct tty_buffer *head;	/* Queue head */
+	struct workqueue_struct *flip_wq;
 	struct work_struct work;
 	struct mutex	   lock;
 	atomic_t	   priority;
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 188ee9b768eb..1f2896e56e77 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -69,6 +69,10 @@ struct serial_struct;
  *	Do not create numbered ``/dev`` nodes. For example, create
  *	``/dev/ttyprintk`` and not ``/dev/ttyprintk0``. Applicable only when a
  *	driver for a single tty device is being allocated.
+ *
+ * @TTY_DRIVER_NO_WORKQUEUE:
+ *	Do not create workqueue when tty_register_driver(). Whenever set, flip
+ *	buffer workqueue can be set by tty_port_link_wq() for every port.
  */
 enum tty_driver_flag {
 	TTY_DRIVER_INSTALLED		= BIT(0),
@@ -79,6 +83,7 @@ enum tty_driver_flag {
 	TTY_DRIVER_HARDWARE_BREAK	= BIT(5),
 	TTY_DRIVER_DYNAMIC_ALLOC	= BIT(6),
 	TTY_DRIVER_UNNUMBERED_NODE	= BIT(7),
+	TTY_DRIVER_NO_WORKQUEUE		= BIT(8),
 };
 
 enum tty_driver_type {
@@ -506,6 +511,7 @@ struct tty_operations {
  * @flags: tty driver flags (%TTY_DRIVER_)
  * @proc_entry: proc fs entry, used internally
  * @other: driver of the linked tty; only used for the PTY driver
+ * @flip_wq: workqueue to queue flip buffer work on
  * @ttys: array of active &struct tty_struct, set by tty_standard_install()
  * @ports: array of &struct tty_port; can be set during initialization by
  *	   tty_port_link_device() and similar
@@ -539,6 +545,7 @@ struct tty_driver {
 	unsigned long	flags;
 	struct proc_dir_entry *proc_entry;
 	struct tty_driver *other;
+	struct workqueue_struct *flip_wq;
 
 	/*
 	 * Pointer to the tty data structures
diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h
index 660c254f1efe..d2a7882c0b58 100644
--- a/include/linux/tty_port.h
+++ b/include/linux/tty_port.h
@@ -138,6 +138,7 @@ struct tty_port {
 					   kernel */
 
 void tty_port_init(struct tty_port *port);
+void tty_port_link_wq(struct tty_port *port, struct workqueue_struct *flip_wq);
 void tty_port_link_device(struct tty_port *port, struct tty_driver *driver,
 		unsigned index);
 struct device *tty_port_register_device(struct tty_port *port,
@@ -165,6 +166,18 @@ static inline struct tty_port *tty_port_get(struct tty_port *port)
 	return NULL;
 }
 
+/*
+ * Never overwrite the workqueue set by tty_port_link_wq().
+ * No effect when %TTY_DRIVER_NO_WORKQUEUE is set, as driver->flip_wq is
+ * %NULL.
+ */
+static inline void tty_port_link_driver_wq(struct tty_port *port,
+					   struct tty_driver *driver)
+{
+	if (!port->buf.flip_wq)
+		tty_port_link_wq(port, driver->flip_wq);
+}
+
 /* If the cts flow control is enabled, return true. */
 static inline bool tty_port_cts_enabled(const struct tty_port *port)
 {
-- 
cgit v1.2.3


From 59621105ffca7a33955f56bc7dee0923992f5832 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Date: Mon, 23 Feb 2026 14:37:16 +0100
Subject: of: provide of_machine_read_compatible()

Provide a helper function allowing users to read the compatible string
of the machine, hiding the access to the root node.

Reviewed-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://patch.msgid.link/20260223-soc-of-root-v2-1-b45da45903c8@oss.qualcomm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/of/base.c  | 15 +++++++++++++++
 include/linux/of.h |  8 ++++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 57420806c1a2..b70aec32e0e3 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -434,6 +434,21 @@ bool of_machine_compatible_match(const char *const *compats)
 }
 EXPORT_SYMBOL(of_machine_compatible_match);
 
+/**
+ * of_machine_read_compatible - Get the compatible string of this machine
+ * @compatible: address at which the address of the compatible string will be
+ *              stored
+ * @index: index of the compatible entry in the list
+ *
+ * Returns:
+ * 0 on success, negative error number on failure.
+ */
+int of_machine_read_compatible(const char **compatible, unsigned int index)
+{
+	return of_property_read_string_index(of_root, "compatible", index, compatible);
+}
+EXPORT_SYMBOL_GPL(of_machine_read_compatible);
+
 /**
  * of_machine_device_match - Test root of device tree against a of_device_id array
  * @matches:	NULL terminated array of of_device_id match structures to search in
diff --git a/include/linux/of.h b/include/linux/of.h
index be6ec4916adf..7df971d52b55 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -426,6 +426,8 @@ static inline bool of_machine_is_compatible(const char *compat)
 	return of_machine_compatible_match(compats);
 }
 
+int of_machine_read_compatible(const char **compatible, unsigned int index);
+
 extern int of_add_property(struct device_node *np, struct property *prop);
 extern int of_remove_property(struct device_node *np, struct property *prop);
 extern int of_update_property(struct device_node *np, struct property *newprop);
@@ -851,6 +853,12 @@ static inline int of_machine_is_compatible(const char *compat)
 	return 0;
 }
 
+static inline int of_machine_read_compatible(const char **compatible,
+					     unsigned int index)
+{
+	return -ENOSYS;
+}
+
 static inline int of_add_property(struct device_node *np, struct property *prop)
 {
 	return 0;
-- 
cgit v1.2.3


From c86d3b7b847cc9b32a17117cfd71679e4315fd9f Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Date: Mon, 23 Feb 2026 14:37:17 +0100
Subject: of: provide of_machine_read_model()

Provide a helper function allowing users to read the model string of the
machine, hiding the access to the root node.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://patch.msgid.link/20260223-soc-of-root-v2-2-b45da45903c8@oss.qualcomm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/of/base.c  | 13 +++++++++++++
 include/linux/of.h |  6 ++++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index b70aec32e0e3..bf4a51887d74 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -449,6 +449,19 @@ int of_machine_read_compatible(const char **compatible, unsigned int index)
 }
 EXPORT_SYMBOL_GPL(of_machine_read_compatible);
 
+/**
+ * of_machine_read_model - Get the model string of this machine
+ * @model: address at which the address of the model string will be stored
+ *
+ * Returns:
+ * 0 on success, negative error number on failure.
+ */
+int of_machine_read_model(const char **model)
+{
+	return of_property_read_string(of_root, "model", model);
+}
+EXPORT_SYMBOL_GPL(of_machine_read_model);
+
 /**
  * of_machine_device_match - Test root of device tree against a of_device_id array
  * @matches:	NULL terminated array of of_device_id match structures to search in
diff --git a/include/linux/of.h b/include/linux/of.h
index 7df971d52b55..2b95777f16f6 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -427,6 +427,7 @@ static inline bool of_machine_is_compatible(const char *compat)
 }
 
 int of_machine_read_compatible(const char **compatible, unsigned int index);
+int of_machine_read_model(const char **model);
 
 extern int of_add_property(struct device_node *np, struct property *prop);
 extern int of_remove_property(struct device_node *np, struct property *prop);
@@ -859,6 +860,11 @@ static inline int of_machine_read_compatible(const char **compatible,
 	return -ENOSYS;
 }
 
+static inline int of_machine_read_model(const char **model)
+{
+	return -ENOSYS;
+}
+
 static inline int of_add_property(struct device_node *np, struct property *prop)
 {
 	return 0;
-- 
cgit v1.2.3


From 030706e954c10749da8c75464c6b02cb30cb00aa Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Date: Mon, 23 Feb 2026 14:37:19 +0100
Subject: base: soc: rename and export soc_device_get_machine()

Some SoC drivers reimplement the functionality of
soc_device_get_machine(). Make this function accessible through the
sys_soc.h header and rename it to a more descriptive name.

Reviewed-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://patch.msgid.link/20260223-soc-of-root-v2-4-b45da45903c8@oss.qualcomm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/soc.c      | 13 +++++--------
 include/linux/sys_soc.h | 10 ++++++++++
 2 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/soc.c b/drivers/base/soc.c
index 48e2f0dbd330..65ce72d49230 100644
--- a/drivers/base/soc.c
+++ b/drivers/base/soc.c
@@ -111,17 +111,14 @@ static void soc_release(struct device *dev)
 	kfree(soc_dev);
 }
 
-static void soc_device_get_machine(struct soc_device_attribute *soc_dev_attr)
+int soc_attr_read_machine(struct soc_device_attribute *soc_dev_attr)
 {
-	struct device_node *np;
-
 	if (soc_dev_attr->machine)
-		return;
+		return -EBUSY;
 
-	np = of_find_node_by_path("/");
-	of_property_read_string(np, "model", &soc_dev_attr->machine);
-	of_node_put(np);
+	return of_machine_read_model(&soc_dev_attr->machine);
 }
+EXPORT_SYMBOL_GPL(soc_attr_read_machine);
 
 static struct soc_device_attribute *early_soc_dev_attr;
 
@@ -131,7 +128,7 @@ struct soc_device *soc_device_register(struct soc_device_attribute *soc_dev_attr
 	const struct attribute_group **soc_attr_groups;
 	int ret;
 
-	soc_device_get_machine(soc_dev_attr);
+	soc_attr_read_machine(soc_dev_attr);
 
 	if (!soc_bus_registered) {
 		if (early_soc_dev_attr)
diff --git a/include/linux/sys_soc.h b/include/linux/sys_soc.h
index d9b3cf0f410c..f19f5cec18e2 100644
--- a/include/linux/sys_soc.h
+++ b/include/linux/sys_soc.h
@@ -37,6 +37,16 @@ void soc_device_unregister(struct soc_device *soc_dev);
  */
 struct device *soc_device_to_device(struct soc_device *soc);
 
+/**
+ * soc_attr_read_machine - retrieve the machine model and store it in
+ *                         the soc_device_attribute structure
+ * @soc_dev_attr: SoC attribute structure to store the model in
+ *
+ * Returns:
+ * 0 on success, negative error number on failure.
+ */
+int soc_attr_read_machine(struct soc_device_attribute *soc_dev_attr);
+
 #ifdef CONFIG_SOC_BUS
 const struct soc_device_attribute *soc_device_match(
 	const struct soc_device_attribute *matches);
-- 
cgit v1.2.3


From bb729bf1d6fdf5c2087c1651165c74cef0da1742 Mon Sep 17 00:00:00 2001
From: Li Ming <ming.li@zohomail.com>
Date: Tue, 10 Mar 2026 23:57:53 +0800
Subject: driver core: Add conditional guard support for device_lock()

Introduce conditional guard version of device_lock() for scenarios that
require conditional device lock holding.

Suggested-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Li Ming <ming.li@zohomail.com>
Link: https://patch.msgid.link/20260310-fix_access_endpoint_without_drv_check-v1-1-94fe919a0b87@zohomail.com
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 include/linux/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 0be95294b6e6..4fafee80524b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -911,6 +911,7 @@ static inline void device_unlock(struct device *dev)
 }
 
 DEFINE_GUARD(device, struct device *, device_lock(_T), device_unlock(_T))
+DEFINE_GUARD_COND(device, _intr, device_lock_interruptible(_T), _RET == 0)
 
 static inline void device_lock_assert(struct device *dev)
 {
-- 
cgit v1.2.3


From e416e7fa417b2d2604c1526a2f9cc38da7ced166 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 10 Mar 2026 22:29:53 -0700
Subject: tee: clean up tee_core.h kernel-doc

Use the correct struct member name and function parameter name in
kernel-doc comments.
Move a macro that was between a struct's documentation and its
declaration.
These eliminate the following kernel-doc warnings:

Warning: include/linux/tee_core.h:73 struct member 'c_no_users' not
 described in 'tee_device'
Warning: include/linux/tee_core.h:132 #define TEE_DESC_PRIVILEGED
     0x1; error: Cannot parse struct or union!
Warning: include/linux/tee_core.h:257 function parameter 'connection_data'
 not described in 'tee_session_calc_client_uuid'
Warning: include/linux/tee_core.h:320 function parameter 'teedev'
 not described in 'tee_get_drvdata'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Sumit Garg <sumit.garg@oss.qualcomm.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 include/linux/tee_core.h | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h
index ee5f0bd41f43..f993d5118edd 100644
--- a/include/linux/tee_core.h
+++ b/include/linux/tee_core.h
@@ -50,7 +50,7 @@ enum tee_dma_heap_id {
  * @dev:	embedded basic device structure
  * @cdev:	embedded cdev
  * @num_users:	number of active users of this device
- * @c_no_user:	completion used when unregistering the device
+ * @c_no_users:	completion used when unregistering the device
  * @mutex:	mutex protecting @num_users and @idr
  * @idr:	register of user space shared memory objects allocated or
  *		registered on this device
@@ -132,6 +132,7 @@ struct tee_driver_ops {
 /* Size for TEE revision string buffer used by get_tee_revision(). */
 #define TEE_REVISION_STR_SIZE	128
 
+#define TEE_DESC_PRIVILEGED	0x1
 /**
  * struct tee_desc - Describes the TEE driver to the subsystem
  * @name:	name of driver
@@ -139,7 +140,6 @@ struct tee_driver_ops {
  * @owner:	module providing the driver
  * @flags:	Extra properties of driver, defined by TEE_DESC_* below
  */
-#define TEE_DESC_PRIVILEGED	0x1
 struct tee_desc {
 	const char *name;
 	const struct tee_driver_ops *ops;
@@ -187,7 +187,7 @@ struct tee_protmem_pool_ops {
  * Allocates a new struct tee_device instance. The device is
  * removed by tee_device_unregister().
  *
- * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure
+ * @returns: a pointer to a 'struct tee_device' or an ERR_PTR on failure
  */
 struct tee_device *tee_device_alloc(const struct tee_desc *teedesc,
 				    struct device *dev,
@@ -201,7 +201,7 @@ struct tee_device *tee_device_alloc(const struct tee_desc *teedesc,
  * tee_device_unregister() need to be called to remove the @teedev if
  * this function fails.
  *
- * @returns < 0 on failure
+ * @returns: < 0 on failure
  */
 int tee_device_register(struct tee_device *teedev);
 
@@ -254,14 +254,14 @@ void tee_device_set_dev_groups(struct tee_device *teedev,
  * tee_session_calc_client_uuid() - Calculates client UUID for session
  * @uuid:		Resulting UUID
  * @connection_method:	Connection method for session (TEE_IOCTL_LOGIN_*)
- * @connectuon_data:	Connection data for opening session
+ * @connection_data:	Connection data for opening session
  *
  * Based on connection method calculates UUIDv5 based client UUID.
  *
  * For group based logins verifies that calling process has specified
  * credentials.
  *
- * @return < 0 on failure
+ * @returns: < 0 on failure
  */
 int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method,
 				 const u8 connection_data[TEE_IOCTL_UUID_LEN]);
@@ -295,7 +295,7 @@ struct tee_shm_pool_ops {
  * @paddr:	Physical address of start of pool
  * @size:	Size in bytes of the pool
  *
- * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure.
+ * @returns: pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure.
  */
 struct tee_shm_pool *tee_shm_pool_alloc_res_mem(unsigned long vaddr,
 						phys_addr_t paddr, size_t size,
@@ -318,14 +318,16 @@ static inline void tee_shm_pool_free(struct tee_shm_pool *pool)
  * @paddr:	Physical address of start of pool
  * @size:	Size in bytes of the pool
  *
- * @returns pointer to a 'struct tee_protmem_pool' or an ERR_PTR on failure.
+ * @returns: pointer to a 'struct tee_protmem_pool' or an ERR_PTR on failure.
  */
 struct tee_protmem_pool *tee_protmem_static_pool_alloc(phys_addr_t paddr,
 						       size_t size);
 
 /**
  * tee_get_drvdata() - Return driver_data pointer
- * @returns the driver_data pointer supplied to tee_register().
+ * @teedev: Pointer to the tee_device
+ *
+ * @returns: the driver_data pointer supplied to tee_register().
  */
 void *tee_get_drvdata(struct tee_device *teedev);
 
@@ -334,7 +336,7 @@ void *tee_get_drvdata(struct tee_device *teedev);
  *                            TEE driver
  * @ctx:	The TEE context for shared memory allocation
  * @size:	Shared memory allocation size
- * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure
+ * @returns: a pointer to 'struct tee_shm' on success or an ERR_PTR on failure
  */
 struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size);
 
@@ -354,7 +356,7 @@ void tee_dyn_shm_free_helper(struct tee_shm *shm,
 /**
  * tee_shm_is_dynamic() - Check if shared memory object is of the dynamic kind
  * @shm:	Shared memory handle
- * @returns true if object is dynamic shared memory
+ * @returns: true if object is dynamic shared memory
  */
 static inline bool tee_shm_is_dynamic(struct tee_shm *shm)
 {
@@ -370,7 +372,7 @@ void tee_shm_put(struct tee_shm *shm);
 /**
  * tee_shm_get_id() - Get id of a shared memory object
  * @shm:	Shared memory handle
- * @returns id
+ * @returns: id
  */
 static inline int tee_shm_get_id(struct tee_shm *shm)
 {
@@ -382,7 +384,7 @@ static inline int tee_shm_get_id(struct tee_shm *shm)
  * count
  * @ctx:	Context owning the shared memory
  * @id:		Id of shared memory object
- * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure
+ * @returns: a pointer to 'struct tee_shm' on success or an ERR_PTR on failure
  */
 struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id);
 
@@ -402,7 +404,7 @@ static inline bool tee_param_is_memref(struct tee_param *param)
  * teedev_open() - Open a struct tee_device
  * @teedev:	Device to open
  *
- * @return a pointer to struct tee_context on success or an ERR_PTR on failure.
+ * @returns: pointer to struct tee_context on success or an ERR_PTR on failure.
  */
 struct tee_context *teedev_open(struct tee_device *teedev);
 
-- 
cgit v1.2.3


From fe2511adb1fc1814df06ca11e0d8a92f792e4029 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 1 Mar 2026 13:30:17 +0100
Subject: sysfs: constify group arrays in function arguments

Constify the groups array argument where applicable. This allows to
pass constant arrays as arguments.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/17035265-8882-4101-b7a7-16b3eb94f8b5@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/group.c      | 10 +++++-----
 include/linux/sysfs.h | 16 ++++++++--------
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index e1e639f515a0..b3edae0578c0 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -217,7 +217,7 @@ int sysfs_create_group(struct kobject *kobj,
 EXPORT_SYMBOL_GPL(sysfs_create_group);
 
 static int internal_create_groups(struct kobject *kobj, int update,
-				  const struct attribute_group **groups)
+				  const struct attribute_group *const *groups)
 {
 	int error = 0;
 	int i;
@@ -250,7 +250,7 @@ static int internal_create_groups(struct kobject *kobj, int update,
  * Returns 0 on success or error code from sysfs_create_group on failure.
  */
 int sysfs_create_groups(struct kobject *kobj,
-			const struct attribute_group **groups)
+			const struct attribute_group *const *groups)
 {
 	return internal_create_groups(kobj, 0, groups);
 }
@@ -268,7 +268,7 @@ EXPORT_SYMBOL_GPL(sysfs_create_groups);
  * Returns 0 on success or error code from sysfs_update_group on failure.
  */
 int sysfs_update_groups(struct kobject *kobj,
-			const struct attribute_group **groups)
+			const struct attribute_group *const *groups)
 {
 	return internal_create_groups(kobj, 1, groups);
 }
@@ -342,7 +342,7 @@ EXPORT_SYMBOL_GPL(sysfs_remove_group);
  * If groups is not NULL, remove the specified groups from the kobject.
  */
 void sysfs_remove_groups(struct kobject *kobj,
-			 const struct attribute_group **groups)
+			 const struct attribute_group *const *groups)
 {
 	int i;
 
@@ -613,7 +613,7 @@ EXPORT_SYMBOL_GPL(sysfs_group_change_owner);
  * Returns 0 on success or error code on failure.
  */
 int sysfs_groups_change_owner(struct kobject *kobj,
-			      const struct attribute_group **groups,
+			      const struct attribute_group *const *groups,
 			      kuid_t kuid, kgid_t kgid)
 {
 	int error = 0, i;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 99b775f3ff46..9777e9445dd5 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -445,15 +445,15 @@ void sysfs_delete_link(struct kobject *dir, struct kobject *targ,
 int __must_check sysfs_create_group(struct kobject *kobj,
 				    const struct attribute_group *grp);
 int __must_check sysfs_create_groups(struct kobject *kobj,
-				     const struct attribute_group **groups);
+				     const struct attribute_group *const *groups);
 int __must_check sysfs_update_groups(struct kobject *kobj,
-				     const struct attribute_group **groups);
+				     const struct attribute_group *const *groups);
 int sysfs_update_group(struct kobject *kobj,
 		       const struct attribute_group *grp);
 void sysfs_remove_group(struct kobject *kobj,
 			const struct attribute_group *grp);
 void sysfs_remove_groups(struct kobject *kobj,
-			 const struct attribute_group **groups);
+			 const struct attribute_group *const *groups);
 int sysfs_add_file_to_group(struct kobject *kobj,
 			const struct attribute *attr, const char *group);
 void sysfs_remove_file_from_group(struct kobject *kobj,
@@ -486,7 +486,7 @@ int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid);
 int sysfs_link_change_owner(struct kobject *kobj, struct kobject *targ,
 			    const char *name, kuid_t kuid, kgid_t kgid);
 int sysfs_groups_change_owner(struct kobject *kobj,
-			      const struct attribute_group **groups,
+			      const struct attribute_group *const *groups,
 			      kuid_t kuid, kgid_t kgid);
 int sysfs_group_change_owner(struct kobject *kobj,
 			     const struct attribute_group *groups, kuid_t kuid,
@@ -629,13 +629,13 @@ static inline int sysfs_create_group(struct kobject *kobj,
 }
 
 static inline int sysfs_create_groups(struct kobject *kobj,
-				      const struct attribute_group **groups)
+				      const struct attribute_group *const *groups)
 {
 	return 0;
 }
 
 static inline int sysfs_update_groups(struct kobject *kobj,
-				      const struct attribute_group **groups)
+				      const struct attribute_group *const *groups)
 {
 	return 0;
 }
@@ -652,7 +652,7 @@ static inline void sysfs_remove_group(struct kobject *kobj,
 }
 
 static inline void sysfs_remove_groups(struct kobject *kobj,
-				       const struct attribute_group **groups)
+				       const struct attribute_group *const *groups)
 {
 }
 
@@ -733,7 +733,7 @@ static inline int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t k
 }
 
 static inline int sysfs_groups_change_owner(struct kobject *kobj,
-			  const struct attribute_group **groups,
+			  const struct attribute_group *const *groups,
 			  kuid_t kuid, kgid_t kgid)
 {
 	return 0;
-- 
cgit v1.2.3


From ece5283706aff6791a37894bafbb0c134a94c0f3 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 1 Mar 2026 13:31:02 +0100
Subject: driver: core: constify groups array argument in device_add_groups and
 device_remove_groups

Now that sysfs_create_groups() and sysfs_remove_groups() allow to
pass constant groups arrays, we can constify the groups array argument
also here.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/8ea2d6d1-0adb-4d7f-92bc-751e93ce08d6@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 5 +++--
 include/linux/device.h | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 791f9e444df8..f497b724332a 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2831,14 +2831,15 @@ static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(removable);
 
-int device_add_groups(struct device *dev, const struct attribute_group **groups)
+int device_add_groups(struct device *dev,
+		      const struct attribute_group *const *groups)
 {
 	return sysfs_create_groups(&dev->kobj, groups);
 }
 EXPORT_SYMBOL_GPL(device_add_groups);
 
 void device_remove_groups(struct device *dev,
-			  const struct attribute_group **groups)
+			  const struct attribute_group *const *groups)
 {
 	sysfs_remove_groups(&dev->kobj, groups);
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index 0be95294b6e6..48a0444ccc1e 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1131,9 +1131,9 @@ device_create_with_groups(const struct class *cls, struct device *parent, dev_t
 void device_destroy(const struct class *cls, dev_t devt);
 
 int __must_check device_add_groups(struct device *dev,
-				   const struct attribute_group **groups);
+				   const struct attribute_group *const *groups);
 void device_remove_groups(struct device *dev,
-			  const struct attribute_group **groups);
+			  const struct attribute_group *const *groups);
 
 static inline int __must_check device_add_group(struct device *dev,
 					const struct attribute_group *grp)
-- 
cgit v1.2.3


From 10f874dc92b3f3bf96470d997bdf157b289c9d4c Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 1 Mar 2026 13:31:56 +0100
Subject: driver core: make struct class groups members constant arrays

Constify the groups arrays, allowing to assign constant arrays.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/7ff56b07-09ca-4948-b98f-5bd37ceef21e@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device/class.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device/class.h b/include/linux/device/class.h
index 65880e60c720..2079239a5aa5 100644
--- a/include/linux/device/class.h
+++ b/include/linux/device/class.h
@@ -50,8 +50,8 @@ struct fwnode_handle;
 struct class {
 	const char		*name;
 
-	const struct attribute_group	**class_groups;
-	const struct attribute_group	**dev_groups;
+	const struct attribute_group	*const *class_groups;
+	const struct attribute_group	*const *dev_groups;
 
 	int (*dev_uevent)(const struct device *dev, struct kobj_uevent_env *env);
 	char *(*devnode)(const struct device *dev, umode_t *mode);
-- 
cgit v1.2.3


From cb0caadb64ca0894c4a24e1a34841f260d462f90 Mon Sep 17 00:00:00 2001
From: Shayne Chen <shayne.chen@mediatek.com>
Date: Fri, 13 Mar 2026 14:21:49 +0800
Subject: wifi: ieee80211: fix definition of EHT-MCS 15 in MRU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to the definition in IEEE Std 802.11be-2024, Table 9-417r, each
bit indicates support for the transmission and reception of EHT-MCS 15 in:
- B0: 52+26-tone and 106+26-tone MRUs.
- B1: a 484+242-tone MRU if 80 MHz is supported.
- B2: a 996+484-tone MRU and a 996+484+242-tone MRU if 160 MHz is
      supported.
- B3: a 3×996-tone MRU if 320 MHz is supported.

Fixes: 6239da18d2f9 ("wifi: mac80211: adjust EHT capa when lowering bandwidth")
Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Link: https://patch.msgid.link/20260313062150.3165433-1-shayne.chen@mediatek.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211-eht.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211-eht.h b/include/linux/ieee80211-eht.h
index f8e9f5d36d2a..a97b1d01f3ac 100644
--- a/include/linux/ieee80211-eht.h
+++ b/include/linux/ieee80211-eht.h
@@ -251,8 +251,8 @@ struct ieee80211_eht_operation_info {
 #define IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF		0x40
 #define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK	0x07
 
-#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ			0x08
-#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ		0x30
+#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ			0x10
+#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ		0x20
 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ		0x40
 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK			0x78
 #define IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP		0x80
-- 
cgit v1.2.3


From 7caedbb5ade345df0eec0bf01035c780919a9f56 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Mon, 9 Mar 2026 13:37:02 -0700
Subject: integrity: Eliminate weak definition of arch_get_secureboot()

security/integrity/secure_boot.c contains a single __weak function,
which breaks recordmcount when building with clang:

  $ make -skj"$(nproc)" ARCH=powerpc LLVM=1 ppc64_defconfig security/integrity/secure_boot.o
  Cannot find symbol for section 2: .text.
  security/integrity/secure_boot.o: failed

Introduce a Kconfig symbol, CONFIG_HAVE_ARCH_GET_SECUREBOOT, to indicate
that an architecture provides a definition of arch_get_secureboot().
Provide a static inline stub when this symbol is not defined to achieve
the same effect as the __weak function, allowing secure_boot.c to be
removed altogether. Move the s390 definition of arch_get_secureboot()
out of the CONFIG_KEXEC_FILE block to ensure it is always available, as
it does not actually depend on KEXEC_FILE.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 31a6a07eefeb ("integrity: Make arch_ima_get_secureboot integrity-wide")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 arch/Kconfig                     |  3 +++
 arch/powerpc/Kconfig             |  1 +
 arch/s390/Kconfig                |  1 +
 arch/s390/kernel/ipl.c           | 10 +++++-----
 include/linux/secure_boot.h      |  4 ++++
 security/integrity/Makefile      |  2 +-
 security/integrity/secure_boot.c | 16 ----------------
 7 files changed, 15 insertions(+), 22 deletions(-)
 delete mode 100644 security/integrity/secure_boot.c

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 102ddbd4298e..a6d1c8cc1d64 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1841,4 +1841,7 @@ config ARCH_WANTS_PRE_LINK_VMLINUX
 config ARCH_HAS_CPU_ATTACK_VECTORS
 	bool
 
+config HAVE_ARCH_GET_SECUREBOOT
+	def_bool EFI
+
 endmenu
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index ad7a2fe63a2a..da1eafb64354 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1061,6 +1061,7 @@ config PPC_SECURE_BOOT
 	depends on IMA_ARCH_POLICY
 	imply IMA_SECURE_AND_OR_TRUSTED_BOOT
 	select PSERIES_PLPKS if PPC_PSERIES
+	select HAVE_ARCH_GET_SECUREBOOT
 	help
 	  Systems with firmware secure boot enabled need to define security
 	  policies to extend secure boot to the OS. This config allows a user
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 2101cc738b5e..4197c20d34b4 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -181,6 +181,7 @@ config S390
 	select GENERIC_IOREMAP if PCI
 	select HAVE_ALIGNED_STRUCT_PAGE
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_GET_SECUREBOOT
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
 	select HAVE_ARCH_KASAN
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 2d01a1713938..3c346b02ceb9 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2388,6 +2388,11 @@ void __no_stack_protector s390_reset_system(void)
 	diag_amode31_ops.diag308_reset();
 }
 
+bool arch_get_secureboot(void)
+{
+	return ipl_secure_flag;
+}
+
 #ifdef CONFIG_KEXEC_FILE
 
 int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf,
@@ -2505,11 +2510,6 @@ out:
 	return buf;
 }
 
-bool arch_get_secureboot(void)
-{
-	return ipl_secure_flag;
-}
-
 int ipl_report_free(struct ipl_report *report)
 {
 	struct ipl_report_component *comp, *ncomp;
diff --git a/include/linux/secure_boot.h b/include/linux/secure_boot.h
index 3ded3f03655c..d17e92351567 100644
--- a/include/linux/secure_boot.h
+++ b/include/linux/secure_boot.h
@@ -10,10 +10,14 @@
 
 #include <linux/types.h>
 
+#ifdef CONFIG_HAVE_ARCH_GET_SECUREBOOT
 /*
  * Returns true if the platform secure boot is enabled.
  * Returns false if disabled or not supported.
  */
 bool arch_get_secureboot(void);
+#else
+static inline bool arch_get_secureboot(void) { return false; }
+#endif
 
 #endif /* _LINUX_SECURE_BOOT_H */
diff --git a/security/integrity/Makefile b/security/integrity/Makefile
index 548665e2b702..45dfdedbdad4 100644
--- a/security/integrity/Makefile
+++ b/security/integrity/Makefile
@@ -5,7 +5,7 @@
 
 obj-$(CONFIG_INTEGRITY) += integrity.o
 
-integrity-y := iint.o secure_boot.o
+integrity-y := iint.o
 integrity-$(CONFIG_INTEGRITY_AUDIT) += integrity_audit.o
 integrity-$(CONFIG_INTEGRITY_SIGNATURE) += digsig.o
 integrity-$(CONFIG_INTEGRITY_ASYMMETRIC_KEYS) += digsig_asymmetric.o
diff --git a/security/integrity/secure_boot.c b/security/integrity/secure_boot.c
deleted file mode 100644
index fc2693c286f8..000000000000
--- a/security/integrity/secure_boot.c
+++ /dev/null
@@ -1,16 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2026 Red Hat, Inc. All Rights Reserved.
- *
- * Author: Coiby Xu <coxu@redhat.com>
- */
-#include <linux/secure_boot.h>
-
-/*
- * Default weak implementation.
- * Architectures that support secure boot must override this.
- */
-__weak bool arch_get_secureboot(void)
-{
-	return false;
-}
-- 
cgit v1.2.3


From 97e6fabee5dcb2d86d4ff45f20606b8a73181f74 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 13 Mar 2026 13:53:03 +0100
Subject: driver core: auxiliary bus: Introduce dev_is_auxiliary()

Introduce dev_is_auxiliary() in analogy with dev_is_platform() to
facilitate subsequent changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Danilo Krummrich <dakr@kernel.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/5079467.GXAFRqVoOG@rafael.j.wysocki
---
 drivers/base/auxiliary.c      | 10 ++++++++++
 include/linux/auxiliary_bus.h |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c
index 9fd3820d1f8a..11949d6bcda4 100644
--- a/drivers/base/auxiliary.c
+++ b/drivers/base/auxiliary.c
@@ -502,6 +502,16 @@ struct auxiliary_device *__devm_auxiliary_device_create(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(__devm_auxiliary_device_create);
 
+/**
+ * dev_is_auxiliary - check if the device is an auxiliary one
+ * @dev: device to check
+ */
+bool dev_is_auxiliary(struct device *dev)
+{
+	return dev->bus == &auxiliary_bus_type;
+}
+EXPORT_SYMBOL_GPL(dev_is_auxiliary);
+
 void __init auxiliary_bus_init(void)
 {
 	WARN_ON(bus_register(&auxiliary_bus_type));
diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h
index 4086afd0cc6b..bc09b55e3682 100644
--- a/include/linux/auxiliary_bus.h
+++ b/include/linux/auxiliary_bus.h
@@ -271,6 +271,8 @@ struct auxiliary_device *__devm_auxiliary_device_create(struct device *dev,
 	__devm_auxiliary_device_create(dev, KBUILD_MODNAME, devname,  \
 				       platform_data, 0)
 
+bool dev_is_auxiliary(struct device *dev);
+
 /**
  * module_auxiliary_driver() - Helper macro for registering an auxiliary driver
  * @__auxiliary_driver: auxiliary driver struct
-- 
cgit v1.2.3


From 98d709cba3193f0bec54da4cd76ef499ea2f1ef7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 13 Mar 2026 09:43:22 -1000
Subject: sched_ext: Implement SCX_ENQ_IMMED

Add SCX_ENQ_IMMED enqueue flag for local DSQ insertions. Once a task is
dispatched with IMMED, it either gets on the CPU immediately and stays on it,
or gets reenqueued back to the BPF scheduler. It will never linger on a local
DSQ behind other tasks or on a CPU taken by a higher-priority class.

rq_is_open() uses rq->next_class to determine whether the rq is available,
and wakeup_preempt_scx() triggers reenqueue when a higher-priority class task
arrives. These capture all higher class preemptions. Combined with reenqueue
points in the dispatch path, all cases where an IMMED task would not execute
immediately are covered.

SCX_TASK_IMMED persists in p->scx.flags until the next fresh enqueue, so the
guarantee survives SAVE/RESTORE cycles. If preempted while running,
put_prev_task_scx() reenqueues through ops.enqueue() with
SCX_TASK_REENQ_PREEMPTED instead of silently placing the task back on the
local DSQ.

This enables tighter scheduling latency control by preventing tasks from
piling up on local DSQs. It also enables opportunistic CPU sharing across
sub-schedulers - without this, a sub-scheduler can stuff the local DSQ of a
shared CPU, making it difficult for others to use.

v2: - Rewrite is_curr_done() as rq_is_open() using rq->next_class and
      implement wakeup_preempt_scx() to achieve complete coverage of all
      cases where IMMED tasks could get stranded.
    - Track IMMED persistently in p->scx.flags and reenqueue
      preempted-while-running tasks through ops.enqueue().
    - Bound deferred reenq cycles (SCX_REENQ_LOCAL_MAX_REPEAT).
    - Misc renames, documentation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
---
 include/linux/sched/ext.h                |   5 +
 kernel/sched/ext.c                       | 271 ++++++++++++++++++++++++++++---
 kernel/sched/ext_internal.h              |  47 ++++++
 kernel/sched/sched.h                     |   2 +
 tools/sched_ext/include/scx/compat.bpf.h |   5 +
 5 files changed, 311 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 60a4f65d0174..602dc83cab36 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -100,6 +100,7 @@ enum scx_ent_flags {
 	SCX_TASK_RESET_RUNNABLE_AT = 1 << 2, /* runnable_at should be reset */
 	SCX_TASK_DEQD_FOR_SLEEP	= 1 << 3, /* last dequeue was for SLEEP */
 	SCX_TASK_SUB_INIT	= 1 << 4, /* task being initialized for a sub sched */
+	SCX_TASK_IMMED		= 1 << 5, /* task is on local DSQ with %SCX_ENQ_IMMED */
 
 	/*
 	 * Bits 8 and 9 are used to carry task state:
@@ -125,6 +126,8 @@ enum scx_ent_flags {
 	 *
 	 * NONE		not being reenqueued
 	 * KFUNC	reenqueued by scx_bpf_dsq_reenq() and friends
+	 * IMMED	reenqueued due to failed ENQ_IMMED
+	 * PREEMPTED	preempted while running
 	 */
 	SCX_TASK_REENQ_REASON_SHIFT = 12,
 	SCX_TASK_REENQ_REASON_BITS = 2,
@@ -132,6 +135,8 @@ enum scx_ent_flags {
 
 	SCX_TASK_REENQ_NONE	= 0 << SCX_TASK_REENQ_REASON_SHIFT,
 	SCX_TASK_REENQ_KFUNC	= 1 << SCX_TASK_REENQ_REASON_SHIFT,
+	SCX_TASK_REENQ_IMMED	= 2 << SCX_TASK_REENQ_REASON_SHIFT,
+	SCX_TASK_REENQ_PREEMPTED = 3 << SCX_TASK_REENQ_REASON_SHIFT,
 
 	/* iteration cursor, not a task */
 	SCX_TASK_CURSOR		= 1 << 31,
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 2f59265b9b57..c75c35b67a18 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -406,6 +406,62 @@ static bool bypass_dsp_enabled(struct scx_sched *sch)
 	return unlikely(atomic_read(&sch->bypass_dsp_enable_depth));
 }
 
+/**
+ * rq_is_open - Is the rq available for immediate execution of an SCX task?
+ * @rq: rq to test
+ * @enq_flags: optional %SCX_ENQ_* of the task being enqueued
+ *
+ * Returns %true if @rq is currently open for executing an SCX task. After a
+ * %false return, @rq is guaranteed to invoke SCX dispatch path at least once
+ * before going to idle and not inserting a task into @rq's local DSQ after a
+ * %false return doesn't cause @rq to stall.
+ */
+static bool rq_is_open(struct rq *rq, u64 enq_flags)
+{
+	lockdep_assert_rq_held(rq);
+
+	/*
+	 * A higher-priority class task is either running or in the process of
+	 * waking up on @rq.
+	 */
+	if (sched_class_above(rq->next_class, &ext_sched_class))
+		return false;
+
+	/*
+	 * @rq is either in transition to or in idle and there is no
+	 * higher-priority class task waking up on it.
+	 */
+	if (sched_class_above(&ext_sched_class, rq->next_class))
+		return true;
+
+	/*
+	 * @rq is either picking, in transition to, or running an SCX task.
+	 */
+
+	/*
+	 * If we're in the dispatch path holding rq lock, $curr may or may not
+	 * be ready depending on whether the on-going dispatch decides to extend
+	 * $curr's slice. We say yes here and resolve it at the end of dispatch.
+	 * See balance_one().
+	 */
+	if (rq->scx.flags & SCX_RQ_IN_BALANCE)
+		return true;
+
+	/*
+	 * %SCX_ENQ_PREEMPT clears $curr's slice if on SCX and kicks dispatch,
+	 * so allow it to avoid spuriously triggering reenq on a combined
+	 * PREEMPT|IMMED insertion.
+	 */
+	if (enq_flags & SCX_ENQ_PREEMPT)
+		return true;
+
+	/*
+	 * @rq is either in transition to or running an SCX task and can't go
+	 * idle without another SCX dispatch cycle.
+	 */
+	return false;
+}
+
 /*
  * scx_kf_mask enforcement. Some kfuncs can only be called from specific SCX
  * ops. When invoking SCX ops, SCX_CALL_OP[_RET]() should be used to indicate
@@ -1220,6 +1276,16 @@ static void schedule_dsq_reenq(struct scx_sched *sch, struct scx_dispatch_q *dsq
 	}
 }
 
+static void schedule_reenq_local(struct rq *rq, u64 reenq_flags)
+{
+	struct scx_sched *root = rcu_dereference_sched(scx_root);
+
+	if (WARN_ON_ONCE(!root))
+		return;
+
+	schedule_dsq_reenq(root, &rq->scx.local_dsq, reenq_flags);
+}
+
 /**
  * touch_core_sched - Update timestamp used for core-sched task ordering
  * @rq: rq to read clock from, must be locked
@@ -1296,10 +1362,58 @@ static bool scx_dsq_priq_less(struct rb_node *node_a,
 	return time_before64(a->scx.dsq_vtime, b->scx.dsq_vtime);
 }
 
-static void dsq_mod_nr(struct scx_dispatch_q *dsq, s32 delta)
+static void dsq_inc_nr(struct scx_dispatch_q *dsq, struct task_struct *p, u64 enq_flags)
 {
 	/* scx_bpf_dsq_nr_queued() reads ->nr without locking, use WRITE_ONCE() */
-	WRITE_ONCE(dsq->nr, dsq->nr + delta);
+	WRITE_ONCE(dsq->nr, dsq->nr + 1);
+
+	/*
+	 * Once @p reaches a local DSQ, it can only leave it by being dispatched
+	 * to the CPU or dequeued. In both cases, the only way @p can go back to
+	 * the BPF sched is through enqueueing. If being inserted into a local
+	 * DSQ with IMMED, persist the state until the next enqueueing event in
+	 * do_enqueue_task() so that we can maintain IMMED protection through
+	 * e.g. SAVE/RESTORE cycles and slice extensions.
+	 */
+	if (enq_flags & SCX_ENQ_IMMED) {
+		if (unlikely(dsq->id != SCX_DSQ_LOCAL)) {
+			WARN_ON_ONCE(!(enq_flags & SCX_ENQ_GDSQ_FALLBACK));
+			return;
+		}
+		p->scx.flags |= SCX_TASK_IMMED;
+	}
+
+	if (p->scx.flags & SCX_TASK_IMMED) {
+		struct rq *rq = container_of(dsq, struct rq, scx.local_dsq);
+
+		if (WARN_ON_ONCE(dsq->id != SCX_DSQ_LOCAL))
+			return;
+
+		rq->scx.nr_immed++;
+
+		/*
+		 * If @rq already had other tasks or the current task is not
+		 * done yet, @p can't go on the CPU immediately. Re-enqueue.
+		 */
+		if (unlikely(dsq->nr > 1 || !rq_is_open(rq, enq_flags)))
+			schedule_reenq_local(rq, 0);
+	}
+}
+
+static void dsq_dec_nr(struct scx_dispatch_q *dsq, struct task_struct *p)
+{
+	/* see dsq_inc_nr() */
+	WRITE_ONCE(dsq->nr, dsq->nr - 1);
+
+	if (p->scx.flags & SCX_TASK_IMMED) {
+		struct rq *rq = container_of(dsq, struct rq, scx.local_dsq);
+
+		if (WARN_ON_ONCE(dsq->id != SCX_DSQ_LOCAL) ||
+		    WARN_ON_ONCE(rq->scx.nr_immed <= 0))
+			return;
+
+		rq->scx.nr_immed--;
+	}
 }
 
 static void refill_task_slice_dfl(struct scx_sched *sch, struct task_struct *p)
@@ -1458,7 +1572,7 @@ static void dispatch_enqueue(struct scx_sched *sch, struct rq *rq,
 	WRITE_ONCE(dsq->seq, dsq->seq + 1);
 	p->scx.dsq_seq = dsq->seq;
 
-	dsq_mod_nr(dsq, 1);
+	dsq_inc_nr(dsq, p, enq_flags);
 	p->scx.dsq = dsq;
 
 	/*
@@ -1512,7 +1626,7 @@ static void task_unlink_from_dsq(struct task_struct *p,
 	}
 
 	list_del_init(&p->scx.dsq_list.node);
-	dsq_mod_nr(dsq, -1);
+	dsq_dec_nr(dsq, p);
 
 	if (!(dsq->id & SCX_DSQ_FLAG_BUILTIN) && dsq->first_task == p) {
 		struct task_struct *first_task;
@@ -1723,10 +1837,18 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
 
 	WARN_ON_ONCE(!(p->scx.flags & SCX_TASK_QUEUED));
 
-	/* rq migration */
+	/* internal movements - rq migration / RESTORE */
 	if (sticky_cpu == cpu_of(rq))
 		goto local_norefill;
 
+	/*
+	 * Clear persistent TASK_IMMED for fresh enqueues, see dsq_inc_nr().
+	 * Note that exiting and migration-disabled tasks that skip
+	 * ops.enqueue() below will lose IMMED protection unless
+	 * %SCX_OPS_ENQ_EXITING / %SCX_OPS_ENQ_MIGRATION_DISABLED are set.
+	 */
+	p->scx.flags &= ~SCX_TASK_IMMED;
+
 	/*
 	 * If !scx_rq_online(), we already told the BPF scheduler that the CPU
 	 * is offline and are just running the hotplug path. Don't bother the
@@ -2032,6 +2154,30 @@ static bool yield_to_task_scx(struct rq *rq, struct task_struct *to)
 		return false;
 }
 
+static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p, int wake_flags)
+{
+	/*
+	 * Preemption between SCX tasks is implemented by resetting the victim
+	 * task's slice to 0 and triggering reschedule on the target CPU.
+	 * Nothing to do.
+	 */
+	if (p->sched_class == &ext_sched_class)
+		return;
+
+	/*
+	 * Getting preempted by a higher-priority class. Reenqueue IMMED tasks.
+	 * This captures all preemption cases including:
+	 *
+	 * - A SCX task is currently running.
+	 *
+	 * - @rq is waking from idle due to a SCX task waking to it.
+	 *
+	 * - A higher-priority wakes up while SCX dispatch is in progress.
+	 */
+	if (rq->scx.nr_immed)
+		schedule_reenq_local(rq, 0);
+}
+
 static void move_local_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
 					 struct scx_dispatch_q *src_dsq,
 					 struct rq *dst_rq)
@@ -2049,7 +2195,7 @@ static void move_local_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
 	else
 		list_add_tail(&p->scx.dsq_list.node, &dst_dsq->list);
 
-	dsq_mod_nr(dst_dsq, 1);
+	dsq_inc_nr(dst_dsq, p, enq_flags);
 	p->scx.dsq = dst_dsq;
 
 	local_dsq_post_enq(dst_dsq, p, enq_flags);
@@ -2257,6 +2403,7 @@ static struct rq *move_task_between_dsqs(struct scx_sched *sch,
 		    unlikely(!task_can_run_on_remote_rq(sch, p, dst_rq, true))) {
 			dst_dsq = find_global_dsq(sch, task_cpu(p));
 			dst_rq = src_rq;
+			enq_flags |= SCX_ENQ_GDSQ_FALLBACK;
 		}
 	} else {
 		/* no need to migrate if destination is a non-local DSQ */
@@ -2385,7 +2532,7 @@ static void dispatch_to_local_dsq(struct scx_sched *sch, struct rq *rq,
 	if (src_rq != dst_rq &&
 	    unlikely(!task_can_run_on_remote_rq(sch, p, dst_rq, true))) {
 		dispatch_enqueue(sch, rq, find_global_dsq(sch, task_cpu(p)), p,
-				 enq_flags | SCX_ENQ_CLEAR_OPSS);
+				 enq_flags | SCX_ENQ_CLEAR_OPSS | SCX_ENQ_GDSQ_FALLBACK);
 		return;
 	}
 
@@ -2738,6 +2885,19 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
 	return false;
 
 has_tasks:
+	/*
+	 * @rq may have extra IMMED tasks without reenq scheduled:
+	 *
+	 * - rq_is_open() can't reliably tell when and how slice is going to be
+	 *   modified for $curr and allows IMMED tasks to be queued while
+	 *   dispatch is in progress.
+	 *
+	 * - A non-IMMED HEAD task can get queued in front of an IMMED task
+	 *   between the IMMED queueing and the subsequent scheduling event.
+	 */
+	if (unlikely(rq->scx.local_dsq.nr > 1 && rq->scx.nr_immed))
+		schedule_reenq_local(rq, 0);
+
 	rq->scx.flags &= ~SCX_RQ_IN_BALANCE;
 	return true;
 }
@@ -2859,11 +3019,17 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
 		 * If @p has slice left and is being put, @p is getting
 		 * preempted by a higher priority scheduler class or core-sched
 		 * forcing a different task. Leave it at the head of the local
-		 * DSQ.
+		 * DSQ unless it was an IMMED task. IMMED tasks should not
+		 * linger on a busy CPU, reenqueue them to the BPF scheduler.
 		 */
 		if (p->scx.slice && !scx_bypassing(sch, cpu_of(rq))) {
-			dispatch_enqueue(sch, rq, &rq->scx.local_dsq, p,
-					 SCX_ENQ_HEAD);
+			if (p->scx.flags & SCX_TASK_IMMED) {
+				p->scx.flags |= SCX_TASK_REENQ_PREEMPTED;
+				do_enqueue_task(rq, p, SCX_ENQ_REENQ, -1);
+				p->scx.flags &= ~SCX_TASK_REENQ_REASON_MASK;
+			} else {
+				dispatch_enqueue(sch, rq, &rq->scx.local_dsq, p, SCX_ENQ_HEAD);
+			}
 			goto switch_class;
 		}
 
@@ -3682,8 +3848,6 @@ static void switched_from_scx(struct rq *rq, struct task_struct *p)
 	scx_disable_task(scx_task_sched(p), p);
 }
 
-static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p, int wake_flags) {}
-
 static void switched_to_scx(struct rq *rq, struct task_struct *p) {}
 
 int scx_check_setscheduler(struct task_struct *p, int policy)
@@ -3725,9 +3889,45 @@ static void process_ddsp_deferred_locals(struct rq *rq)
 	}
 }
 
+/*
+ * Determine whether @p should be reenqueued from a local DSQ.
+ *
+ * @reenq_flags is mutable and accumulates state across the DSQ walk:
+ *
+ * - %SCX_REENQ_TSR_NOT_FIRST: Set after the first task is visited. "First"
+ *   tracks position in the DSQ list, not among IMMED tasks. A non-IMMED task at
+ *   the head consumes the first slot.
+ *
+ * - %SCX_REENQ_TSR_RQ_OPEN: Set by reenq_local() before the walk if
+ *   rq_is_open() is true.
+ *
+ * An IMMED task is kept (returns %false) only if it's the first task in the DSQ
+ * AND the current task is done — i.e. it will execute immediately. All other
+ * IMMED tasks are reenqueued. This means if a non-IMMED task sits at the head,
+ * every IMMED task behind it gets reenqueued.
+ *
+ * Reenqueued tasks go through ops.enqueue() with %SCX_ENQ_REENQ |
+ * %SCX_TASK_REENQ_IMMED. If the BPF scheduler dispatches back to the same local
+ * DSQ with %SCX_ENQ_IMMED while the CPU is still unavailable, this triggers
+ * another reenq cycle. Repetitions are bounded by %SCX_REENQ_LOCAL_MAX_REPEAT
+ * in process_deferred_reenq_locals().
+ */
 static bool local_task_should_reenq(struct task_struct *p, u64 *reenq_flags, u32 *reason)
 {
+	bool first;
+
+	first = !(*reenq_flags & SCX_REENQ_TSR_NOT_FIRST);
+	*reenq_flags |= SCX_REENQ_TSR_NOT_FIRST;
+
 	*reason = SCX_TASK_REENQ_KFUNC;
+
+	if ((p->scx.flags & SCX_TASK_IMMED) &&
+	    (!first || !(*reenq_flags & SCX_REENQ_TSR_RQ_OPEN))) {
+		__scx_add_event(scx_task_sched(p), SCX_EV_REENQ_IMMED, 1);
+		*reason = SCX_TASK_REENQ_IMMED;
+		return true;
+	}
+
 	return *reenq_flags & SCX_REENQ_ANY;
 }
 
@@ -3739,6 +3939,11 @@ static u32 reenq_local(struct scx_sched *sch, struct rq *rq, u64 reenq_flags)
 
 	lockdep_assert_rq_held(rq);
 
+	if (WARN_ON_ONCE(reenq_flags & __SCX_REENQ_TSR_MASK))
+		reenq_flags &= ~__SCX_REENQ_TSR_MASK;
+	if (rq_is_open(rq, 0))
+		reenq_flags |= SCX_REENQ_TSR_RQ_OPEN;
+
 	/*
 	 * The BPF scheduler may choose to dispatch tasks back to
 	 * @rq->scx.local_dsq. Move all candidate tasks off to a private list
@@ -3792,11 +3997,14 @@ static u32 reenq_local(struct scx_sched *sch, struct rq *rq, u64 reenq_flags)
 
 static void process_deferred_reenq_locals(struct rq *rq)
 {
+	u64 seq = ++rq->scx.deferred_reenq_locals_seq;
+
 	lockdep_assert_rq_held(rq);
 
 	while (true) {
 		struct scx_sched *sch;
 		u64 reenq_flags;
+		bool skip = false;
 
 		scoped_guard (raw_spinlock, &rq->scx.deferred_reenq_lock) {
 			struct scx_deferred_reenq_local *drl =
@@ -3811,15 +4019,31 @@ static void process_deferred_reenq_locals(struct rq *rq)
 			sch_pcpu = container_of(drl, struct scx_sched_pcpu,
 						deferred_reenq_local);
 			sch = sch_pcpu->sch;
+
 			reenq_flags = drl->flags;
 			WRITE_ONCE(drl->flags, 0);
 			list_del_init(&drl->node);
+
+			if (likely(drl->seq != seq)) {
+				drl->seq = seq;
+				drl->cnt = 0;
+			} else {
+				if (unlikely(++drl->cnt > SCX_REENQ_LOCAL_MAX_REPEAT)) {
+					scx_error(sch, "SCX_ENQ_REENQ on SCX_DSQ_LOCAL repeated %u times",
+						  drl->cnt);
+					skip = true;
+				}
+
+				__scx_add_event(sch, SCX_EV_REENQ_LOCAL_REPEAT, 1);
+			}
 		}
 
-		/* see schedule_dsq_reenq() */
-		smp_mb();
+		if (!skip) {
+			/* see schedule_dsq_reenq() */
+			smp_mb();
 
-		reenq_local(sch, rq, reenq_flags);
+			reenq_local(sch, rq, reenq_flags);
+		}
 	}
 }
 
@@ -4208,10 +4432,6 @@ static void scx_cgroup_unlock(void) {}
 /*
  * Omitted operations:
  *
- * - wakeup_preempt: NOOP as it isn't useful in the wakeup path because the task
- *   isn't tied to the CPU at that point. Preemption is implemented by resetting
- *   the victim task's slice to 0 and triggering reschedule on the target CPU.
- *
  * - migrate_task_rq: Unnecessary as task to cpu mapping is transient.
  *
  * - task_fork/dead: We need fork/dead notifications for all tasks regardless of
@@ -4580,6 +4800,8 @@ static ssize_t scx_attr_events_show(struct kobject *kobj,
 	at += scx_attr_event_show(buf, at, &events, SCX_EV_DISPATCH_KEEP_LAST);
 	at += scx_attr_event_show(buf, at, &events, SCX_EV_ENQ_SKIP_EXITING);
 	at += scx_attr_event_show(buf, at, &events, SCX_EV_ENQ_SKIP_MIGRATION_DISABLED);
+	at += scx_attr_event_show(buf, at, &events, SCX_EV_REENQ_IMMED);
+	at += scx_attr_event_show(buf, at, &events, SCX_EV_REENQ_LOCAL_REPEAT);
 	at += scx_attr_event_show(buf, at, &events, SCX_EV_REFILL_SLICE_DFL);
 	at += scx_attr_event_show(buf, at, &events, SCX_EV_BYPASS_DURATION);
 	at += scx_attr_event_show(buf, at, &events, SCX_EV_BYPASS_DISPATCH);
@@ -6019,6 +6241,8 @@ static void scx_dump_state(struct scx_sched *sch, struct scx_exit_info *ei,
 	scx_dump_event(s, &events, SCX_EV_DISPATCH_KEEP_LAST);
 	scx_dump_event(s, &events, SCX_EV_ENQ_SKIP_EXITING);
 	scx_dump_event(s, &events, SCX_EV_ENQ_SKIP_MIGRATION_DISABLED);
+	scx_dump_event(s, &events, SCX_EV_REENQ_IMMED);
+	scx_dump_event(s, &events, SCX_EV_REENQ_LOCAL_REPEAT);
 	scx_dump_event(s, &events, SCX_EV_REFILL_SLICE_DFL);
 	scx_dump_event(s, &events, SCX_EV_BYPASS_DURATION);
 	scx_dump_event(s, &events, SCX_EV_BYPASS_DISPATCH);
@@ -7532,6 +7756,13 @@ void __init init_sched_ext_class(void)
  */
 static bool scx_vet_enq_flags(struct scx_sched *sch, u64 dsq_id, u64 enq_flags)
 {
+	if ((enq_flags & SCX_ENQ_IMMED) &&
+	    unlikely(dsq_id != SCX_DSQ_LOCAL &&
+		     (dsq_id & SCX_DSQ_LOCAL_ON) != SCX_DSQ_LOCAL_ON)) {
+		scx_error(sch, "SCX_ENQ_IMMED on a non-local DSQ 0x%llx", dsq_id);
+		return false;
+	}
+
 	return true;
 }
 
@@ -9101,6 +9332,8 @@ static void scx_read_events(struct scx_sched *sch, struct scx_event_stats *event
 		scx_agg_event(events, e_cpu, SCX_EV_DISPATCH_KEEP_LAST);
 		scx_agg_event(events, e_cpu, SCX_EV_ENQ_SKIP_EXITING);
 		scx_agg_event(events, e_cpu, SCX_EV_ENQ_SKIP_MIGRATION_DISABLED);
+		scx_agg_event(events, e_cpu, SCX_EV_REENQ_IMMED);
+		scx_agg_event(events, e_cpu, SCX_EV_REENQ_LOCAL_REPEAT);
 		scx_agg_event(events, e_cpu, SCX_EV_REFILL_SLICE_DFL);
 		scx_agg_event(events, e_cpu, SCX_EV_BYPASS_DURATION);
 		scx_agg_event(events, e_cpu, SCX_EV_BYPASS_DISPATCH);
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index c78dadaadab8..2ef855f7c861 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -31,6 +31,8 @@ enum scx_consts {
 	SCX_BYPASS_LB_MIN_DELTA_DIV	= 4,
 	SCX_BYPASS_LB_BATCH		= 256,
 
+	SCX_REENQ_LOCAL_MAX_REPEAT	= 256,
+
 	SCX_SUB_MAX_DEPTH		= 4,
 };
 
@@ -887,6 +889,24 @@ struct scx_event_stats {
 	 */
 	s64		SCX_EV_ENQ_SKIP_MIGRATION_DISABLED;
 
+	/*
+	 * The number of times a task, enqueued on a local DSQ with
+	 * SCX_ENQ_IMMED, was re-enqueued because the CPU was not available for
+	 * immediate execution.
+	 */
+	s64		SCX_EV_REENQ_IMMED;
+
+	/*
+	 * The number of times a reenq of local DSQ caused another reenq of
+	 * local DSQ. This can happen when %SCX_ENQ_IMMED races against a higher
+	 * priority class task even if the BPF scheduler always satisfies the
+	 * prerequisites for %SCX_ENQ_IMMED at the time of enqueue. However,
+	 * that scenario is very unlikely and this count going up regularly
+	 * indicates that the BPF scheduler is handling %SCX_ENQ_REENQ
+	 * incorrectly causing recursive reenqueues.
+	 */
+	s64		SCX_EV_REENQ_LOCAL_REPEAT;
+
 	/*
 	 * Total number of times a task's time slice was refilled with the
 	 * default value (SCX_SLICE_DFL).
@@ -951,6 +971,8 @@ struct scx_dsp_ctx {
 struct scx_deferred_reenq_local {
 	struct list_head	node;
 	u64			flags;
+	u64			seq;
+	u32			cnt;
 };
 
 struct scx_sched_pcpu {
@@ -1074,6 +1096,24 @@ enum scx_enq_flags {
 	 */
 	SCX_ENQ_PREEMPT		= 1LLU << 32,
 
+	/*
+	 * Only allowed on local DSQs. Guarantees that the task either gets
+	 * on the CPU immediately and stays on it, or gets reenqueued back
+	 * to the BPF scheduler. It will never linger on a local DSQ or be
+	 * silently put back after preemption.
+	 *
+	 * The protection persists until the next fresh enqueue - it
+	 * survives SAVE/RESTORE cycles, slice extensions and preemption.
+	 * If the task can't stay on the CPU for any reason, it gets
+	 * reenqueued back to the BPF scheduler.
+	 *
+	 * Exiting and migration-disabled tasks bypass ops.enqueue() and
+	 * are placed directly on a local DSQ without IMMED protection
+	 * unless %SCX_OPS_ENQ_EXITING and %SCX_OPS_ENQ_MIGRATION_DISABLED
+	 * are set respectively.
+	 */
+	SCX_ENQ_IMMED		= 1LLU << 33,
+
 	/*
 	 * The task being enqueued was previously enqueued on a DSQ, but was
 	 * removed and is being re-enqueued. See SCX_TASK_REENQ_* flags to find
@@ -1098,6 +1138,7 @@ enum scx_enq_flags {
 	SCX_ENQ_CLEAR_OPSS	= 1LLU << 56,
 	SCX_ENQ_DSQ_PRIQ	= 1LLU << 57,
 	SCX_ENQ_NESTED		= 1LLU << 58,
+	SCX_ENQ_GDSQ_FALLBACK	= 1LLU << 59,	/* fell back to global DSQ */
 };
 
 enum scx_deq_flags {
@@ -1127,6 +1168,12 @@ enum scx_reenq_flags {
 	__SCX_REENQ_FILTER_MASK	= 0xffffLLU,
 
 	__SCX_REENQ_USER_MASK	= SCX_REENQ_ANY,
+
+	/* bits 32-35 used by task_should_reenq() */
+	SCX_REENQ_TSR_RQ_OPEN	= 1LLU << 32,
+	SCX_REENQ_TSR_NOT_FIRST	= 1LLU << 33,
+
+	__SCX_REENQ_TSR_MASK	= 0xfLLU << 32,
 };
 
 enum scx_pick_idle_cpu_flags {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 60627119d0ab..5b93f6190d31 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -800,6 +800,7 @@ struct scx_rq {
 	u32			cpuperf_target;		/* [0, SCHED_CAPACITY_SCALE] */
 	bool			cpu_released;
 	u32			flags;
+	u32			nr_immed;		/* ENQ_IMMED tasks on local_dsq */
 	u64			clock;			/* current per-rq clock -- see scx_bpf_now() */
 	cpumask_var_t		cpus_to_kick;
 	cpumask_var_t		cpus_to_kick_if_idle;
@@ -810,6 +811,7 @@ struct scx_rq {
 	struct task_struct	*sub_dispatch_prev;
 
 	raw_spinlock_t		deferred_reenq_lock;
+	u64			deferred_reenq_locals_seq;
 	struct list_head	deferred_reenq_locals;	/* scheds requesting reenq of local DSQ */
 	struct list_head	deferred_reenq_users;	/* user DSQs requesting reenq */
 	struct balance_callback	deferred_bal_cb;
diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h
index 704728864d83..cba37432eec0 100644
--- a/tools/sched_ext/include/scx/compat.bpf.h
+++ b/tools/sched_ext/include/scx/compat.bpf.h
@@ -404,6 +404,11 @@ static inline void scx_bpf_dsq_reenq(u64 dsq_id, u64 reenq_flags)
 		scx_bpf_error("kernel too old to reenqueue foreign local or user DSQs");
 }
 
+/*
+ * v7.1: %SCX_ENQ_IMMED.
+ */
+#define SCX_ENQ_IMMED	__COMPAT_ENUM_OR_ZERO(enum scx_enq_flags, SCX_ENQ_IMMED)
+
 /*
  * Define sched_ext_ops. This may be expanded to define multiple variants for
  * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
-- 
cgit v1.2.3


From 82b6c1b542ea0530318c6f2a880d884eb4dce49f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 2 Mar 2026 17:29:05 +0100
Subject: of: Add of_machine_get_match() helper

Currently, there are two helpers to match the root compatible value
against an of_device_id array:
  - of_machine_device_match() returns true if a match is found,
  - of_machine_get_match_data() returns the match data if a match is
    found.
However, there is no helper that returns the actual of_device_id
structure corresponding to the match, leading to code duplication in
various drivers.

Fix this by reworking of_machine_device_match() to return the actual
match structure, and renaming it to of_machine_get_match().
Retain the old of_machine_device_match() functionality using a cheap
static inline wrapper around the new of_machine_get_match() helper.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://patch.msgid.link/14e1c03d443b1a5f210609ec3a1ebbaeab8fb3d9.1772468323.git.geert+renesas@glider.be
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 drivers/of/base.c  | 11 +++++------
 include/linux/of.h | 11 ++++++++---
 2 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 57420806c1a2..2a01d2a66eed 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -435,13 +435,12 @@ bool of_machine_compatible_match(const char *const *compats)
 EXPORT_SYMBOL(of_machine_compatible_match);
 
 /**
- * of_machine_device_match - Test root of device tree against a of_device_id array
+ * of_machine_get_match - Test root of device tree against an of_device_id array
  * @matches:	NULL terminated array of of_device_id match structures to search in
  *
- * Returns true if the root node has any of the given compatible values in its
- * compatible property.
+ * Returns matched entry or NULL
  */
-bool of_machine_device_match(const struct of_device_id *matches)
+const struct of_device_id *of_machine_get_match(const struct of_device_id *matches)
 {
 	struct device_node *root;
 	const struct of_device_id *match = NULL;
@@ -452,9 +451,9 @@ bool of_machine_device_match(const struct of_device_id *matches)
 		of_node_put(root);
 	}
 
-	return match != NULL;
+	return match;
 }
-EXPORT_SYMBOL(of_machine_device_match);
+EXPORT_SYMBOL(of_machine_get_match);
 
 /**
  * of_machine_get_match_data - Tell if root of device tree has a matching of_match structure
diff --git a/include/linux/of.h b/include/linux/of.h
index be6ec4916adf..b4d7d33b0ceb 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -410,7 +410,7 @@ extern int of_alias_get_id(const struct device_node *np, const char *stem);
 extern int of_alias_get_highest_id(const char *stem);
 
 bool of_machine_compatible_match(const char *const *compats);
-bool of_machine_device_match(const struct of_device_id *matches);
+const struct of_device_id *of_machine_get_match(const struct of_device_id *matches);
 const void *of_machine_get_match_data(const struct of_device_id *matches);
 
 /**
@@ -866,9 +866,9 @@ static inline bool of_machine_compatible_match(const char *const *compats)
 	return false;
 }
 
-static inline bool of_machine_device_match(const struct of_device_id *matches)
+static inline const struct of_device_id *of_machine_get_match(const struct of_device_id *matches)
 {
-	return false;
+	return NULL;
 }
 
 static inline const void *
@@ -976,6 +976,11 @@ static inline int of_numa_init(void)
 }
 #endif
 
+static inline bool of_machine_device_match(const struct of_device_id *matches)
+{
+	return of_machine_get_match(matches) != NULL;
+}
+
 static inline struct device_node *of_find_matching_node(
 	struct device_node *from,
 	const struct of_device_id *matches)
-- 
cgit v1.2.3


From d7eafe655b741dfc241d5b920f6d2cea45b568d9 Mon Sep 17 00:00:00 2001
From: Barry Song <baohua@kernel.org>
Date: Sun, 1 Mar 2026 06:13:16 +0800
Subject: dma-mapping: Separate DMA sync issuing and completion waiting

Currently, arch_sync_dma_for_cpu and arch_sync_dma_for_device
always wait for the completion of each DMA buffer. That is,
issuing the DMA sync and waiting for completion is done in a
single API call.

For scatter-gather lists with multiple entries, this means
issuing and waiting is repeated for each entry, which can hurt
performance. Architectures like ARM64 may be able to issue all
DMA sync operations for all entries first and then wait for
completion together.

To address this, arch_sync_dma_for_* now batches DMA operations
and performs a flush afterward. On ARM64, the flush is implemented
with a dsb instruction in arch_sync_dma_flush(). On other
architectures, arch_sync_dma_flush() is currently a nop.

Cc: Leon Romanovsky <leon@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Ada Couprie Diaz <ada.coupriediaz@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Cc: Tangquan Zheng <zhengtangquan@oppo.com>
Reviewed-by: Juergen Gross <jgross@suse.com> # drivers/xen/swiotlb-xen.c
Tested-by: Xueyuan Chen <xueyuan.chen21@gmail.com>
Signed-off-by: Barry Song <baohua@kernel.org>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260228221316.59934-1-21cnbao@gmail.com
---
 arch/arm64/Kconfig             |  1 +
 arch/arm64/include/asm/cache.h |  5 +++++
 arch/arm64/mm/dma-mapping.c    |  4 ++--
 drivers/iommu/dma-iommu.c      | 35 +++++++++++++++++++++++++++--------
 drivers/xen/swiotlb-xen.c      | 24 ++++++++++++++++--------
 include/linux/dma-map-ops.h    |  6 ++++++
 kernel/dma/Kconfig             |  3 +++
 kernel/dma/direct.c            |  6 +++++-
 kernel/dma/direct.h            |  9 +++++++--
 kernel/dma/swiotlb.c           |  7 ++++++-
 10 files changed, 78 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 38dba5f7e4d2..ceafaac6532c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -55,6 +55,7 @@ config ARM64
 	select ARCH_HAS_STRICT_MODULE_RWX
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
+	select ARCH_HAS_BATCHED_DMA_SYNC
 	select ARCH_HAS_SYSCALL_WRAPPER
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAS_ZONE_DMA_SET if EXPERT
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index dd2c8586a725..10a7ffadee3d 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -87,6 +87,11 @@ int cache_line_size(void);
 
 #define dma_get_cache_alignment	cache_line_size
 
+static inline void arch_sync_dma_flush(void)
+{
+	dsb(sy);
+}
+
 /* Compress a u64 MPIDR value into 32 bits. */
 static inline u64 arch_compact_of_hwid(u64 id)
 {
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index b2b5792b2caa..ae1ae0280eef 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -17,7 +17,7 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
 {
 	unsigned long start = (unsigned long)phys_to_virt(paddr);
 
-	dcache_clean_poc(start, start + size);
+	dcache_clean_poc_nosync(start, start + size);
 }
 
 void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
@@ -28,7 +28,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
 	if (dir == DMA_TO_DEVICE)
 		return;
 
-	dcache_inval_poc(start, start + size);
+	dcache_inval_poc_nosync(start, start + size);
 }
 
 void arch_dma_prep_coherent(struct page *page, size_t size)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 5dac64be61bb..66fc25bae85b 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1095,8 +1095,10 @@ void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
 		return;
 
 	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
-	if (!dev_is_dma_coherent(dev))
+	if (!dev_is_dma_coherent(dev)) {
 		arch_sync_dma_for_cpu(phys, size, dir);
+		arch_sync_dma_flush();
+	}
 
 	swiotlb_sync_single_for_cpu(dev, phys, size, dir);
 }
@@ -1112,8 +1114,10 @@ void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
 	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
 	swiotlb_sync_single_for_device(dev, phys, size, dir);
 
-	if (!dev_is_dma_coherent(dev))
+	if (!dev_is_dma_coherent(dev)) {
 		arch_sync_dma_for_device(phys, size, dir);
+		arch_sync_dma_flush();
+	}
 }
 
 void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
@@ -1122,13 +1126,15 @@ void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
 	struct scatterlist *sg;
 	int i;
 
-	if (sg_dma_is_swiotlb(sgl))
+	if (sg_dma_is_swiotlb(sgl)) {
 		for_each_sg(sgl, sg, nelems, i)
 			iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
 						      sg->length, dir);
-	else if (!dev_is_dma_coherent(dev))
+	} else if (!dev_is_dma_coherent(dev)) {
 		for_each_sg(sgl, sg, nelems, i)
 			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
+		arch_sync_dma_flush();
+	}
 }
 
 void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
@@ -1137,14 +1143,16 @@ void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
 	struct scatterlist *sg;
 	int i;
 
-	if (sg_dma_is_swiotlb(sgl))
+	if (sg_dma_is_swiotlb(sgl)) {
 		for_each_sg(sgl, sg, nelems, i)
 			iommu_dma_sync_single_for_device(dev,
 							 sg_dma_address(sg),
 							 sg->length, dir);
-	else if (!dev_is_dma_coherent(dev))
+	} else if (!dev_is_dma_coherent(dev)) {
 		for_each_sg(sgl, sg, nelems, i)
 			arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
+		arch_sync_dma_flush();
+	}
 }
 
 static phys_addr_t iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
@@ -1219,8 +1227,10 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
 			return DMA_MAPPING_ERROR;
 	}
 
-	if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)))
+	if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
 		arch_sync_dma_for_device(phys, size, dir);
+		arch_sync_dma_flush();
+	}
 
 	iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
 	if (iova == DMA_MAPPING_ERROR && !(attrs & DMA_ATTR_MMIO))
@@ -1242,8 +1252,10 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle,
 	if (WARN_ON(!phys))
 		return;
 
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) {
 		arch_sync_dma_for_cpu(phys, size, dir);
+		arch_sync_dma_flush();
+	}
 
 	__iommu_dma_unmap(dev, dma_handle, size);
 
@@ -1980,6 +1992,8 @@ int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
 	dma_addr_t addr = state->addr + offset;
 	size_t iova_start_pad = iova_offset(iovad, addr);
 
+	if (!dev_is_dma_coherent(dev))
+		arch_sync_dma_flush();
 	return iommu_sync_map(domain, addr - iova_start_pad,
 		      iova_align(iovad, size + iova_start_pad));
 }
@@ -1993,6 +2007,8 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev,
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iova_domain *iovad = &cookie->iovad;
 	size_t iova_start_pad = iova_offset(iovad, addr);
+	bool need_sync_dma = !dev_is_dma_coherent(dev) &&
+			!(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO));
 	dma_addr_t end = addr + size;
 
 	do {
@@ -2016,6 +2032,9 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev,
 		addr += len;
 		iova_start_pad = 0;
 	} while (addr < end);
+
+	if (need_sync_dma)
+		arch_sync_dma_flush();
 }
 
 static void __iommu_dma_iova_unlink(struct device *dev,
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index ccf25027bec1..b79917e785a5 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -262,10 +262,12 @@ static dma_addr_t xen_swiotlb_map_phys(struct device *dev, phys_addr_t phys,
 
 done:
 	if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
-		if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr))))
+		if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr)))) {
 			arch_sync_dma_for_device(phys, size, dir);
-		else
+			arch_sync_dma_flush();
+		} else {
 			xen_dma_sync_for_device(dev, dev_addr, size, dir);
+		}
 	}
 	return dev_addr;
 }
@@ -287,10 +289,12 @@ static void xen_swiotlb_unmap_phys(struct device *hwdev, dma_addr_t dev_addr,
 	BUG_ON(dir == DMA_NONE);
 
 	if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
-		if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr))))
+		if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr)))) {
 			arch_sync_dma_for_cpu(paddr, size, dir);
-		else
+			arch_sync_dma_flush();
+		} else {
 			xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir);
+		}
 	}
 
 	/* NOTE: We use dev_addr here, not paddr! */
@@ -308,10 +312,12 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
 	struct io_tlb_pool *pool;
 
 	if (!dev_is_dma_coherent(dev)) {
-		if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
+		if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) {
 			arch_sync_dma_for_cpu(paddr, size, dir);
-		else
+			arch_sync_dma_flush();
+		} else {
 			xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
+		}
 	}
 
 	pool = xen_swiotlb_find_pool(dev, dma_addr);
@@ -331,10 +337,12 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
 		__swiotlb_sync_single_for_device(dev, paddr, size, dir, pool);
 
 	if (!dev_is_dma_coherent(dev)) {
-		if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
+		if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) {
 			arch_sync_dma_for_device(paddr, size, dir);
-		else
+			arch_sync_dma_flush();
+		} else {
 			xen_dma_sync_for_device(dev, dma_addr, size, dir);
+		}
 	}
 }
 
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 60b63756df82..8a07df5a9ef6 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -361,6 +361,12 @@ static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
 }
 #endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */
 
+#ifndef CONFIG_ARCH_HAS_BATCHED_DMA_SYNC
+static inline void arch_sync_dma_flush(void)
+{
+}
+#endif
+
 #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
 void arch_sync_dma_for_cpu_all(void);
 #else
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 159900736f25..bfef21b4a9ae 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -72,6 +72,9 @@ config ARCH_HAS_DMA_PREP_COHERENT
 config ARCH_HAS_FORCE_DMA_UNENCRYPTED
 	bool
 
+config ARCH_HAS_BATCHED_DMA_SYNC
+	bool
+
 #
 # Select this option if the architecture assumes DMA devices are coherent
 # by default.
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 8f43a930716d..c7666e5d5e7c 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -406,6 +406,8 @@ void dma_direct_sync_sg_for_device(struct device *dev,
 			arch_sync_dma_for_device(paddr, sg->length,
 					dir);
 	}
+	if (!dev_is_dma_coherent(dev))
+		arch_sync_dma_flush();
 }
 #endif
 
@@ -427,8 +429,10 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
 		swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
 	}
 
-	if (!dev_is_dma_coherent(dev))
+	if (!dev_is_dma_coherent(dev)) {
+		arch_sync_dma_flush();
 		arch_sync_dma_for_cpu_all();
+	}
 }
 
 /*
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index f476c63b668c..f925a7e8b000 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -60,8 +60,10 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
 
 	swiotlb_sync_single_for_device(dev, paddr, size, dir);
 
-	if (!dev_is_dma_coherent(dev))
+	if (!dev_is_dma_coherent(dev)) {
 		arch_sync_dma_for_device(paddr, size, dir);
+		arch_sync_dma_flush();
+	}
 }
 
 static inline void dma_direct_sync_single_for_cpu(struct device *dev,
@@ -71,6 +73,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
 
 	if (!dev_is_dma_coherent(dev)) {
 		arch_sync_dma_for_cpu(paddr, size, dir);
+		arch_sync_dma_flush();
 		arch_sync_dma_for_cpu_all();
 	}
 
@@ -106,8 +109,10 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
 	}
 
 	if (!dev_is_dma_coherent(dev) &&
-	    !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)))
+	    !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
 		arch_sync_dma_for_device(phys, size, dir);
+		arch_sync_dma_flush();
+	}
 	return dma_addr;
 
 err_overflow:
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d8e6f1d889d5..1105db1689d5 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -867,6 +867,9 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
 	if (orig_addr == INVALID_PHYS_ADDR)
 		return;
 
+	if (dir == DMA_FROM_DEVICE && !dev_is_dma_coherent(dev))
+		arch_sync_dma_flush();
+
 	/*
 	 * It's valid for tlb_offset to be negative. This can happen when the
 	 * "offset" returned by swiotlb_align_offset() is non-zero, and the
@@ -1595,8 +1598,10 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
 		return DMA_MAPPING_ERROR;
 	}
 
-	if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+	if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
 		arch_sync_dma_for_device(swiotlb_addr, size, dir);
+		arch_sync_dma_flush();
+	}
 	return dma_addr;
 }
 
-- 
cgit v1.2.3


From 74f0cca1100b6d1f1ea28178435aff8078d06603 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: Wed, 11 Mar 2026 05:19:56 +0000
Subject: udp: Remove UDPLITE_SEND_CSCOV and UDPLITE_RECV_CSCOV.

UDP-Lite supports variable-length checksum and has two socket
options, UDPLITE_SEND_CSCOV and UDPLITE_RECV_CSCOV, to control
the checksum coverage.

Let's remove the support.

setsockopt(UDPLITE_SEND_CSCOV / UDPLITE_RECV_CSCOV) was only
available for UDP-Lite and returned -ENOPROTOOPT for UDP.

Now, the options are handled in ip_setsockopt() and
ipv6_setsockopt(), which still return the same error.

getsockopt(UDPLITE_SEND_CSCOV / UDPLITE_RECV_CSCOV) was available
for UDP and always returned 0, meaning full checksum, but now
-ENOPROTOOPT is returned.

Given that getsockopt() is meaningless for UDP and even the options
are not defined under include/uapi/, this should not be a problem.

  $ man 7 udplite
  ...
  BUGS
       Where glibc support is missing, the following definitions
       are needed:

           #define IPPROTO_UDPLITE     136
           #define UDPLITE_SEND_CSCOV  10
           #define UDPLITE_RECV_CSCOV  11

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20260311052020.1213705-10-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/udp.h      | 10 +---------
 include/net/udplite.h    | 15 ---------------
 include/uapi/linux/udp.h |  2 ++
 net/ipv4/udp.c           | 46 ++--------------------------------------------
 net/ipv6/ip6_checksum.c  |  2 +-
 net/ipv6/udp.c           |  5 ++---
 6 files changed, 8 insertions(+), 72 deletions(-)
 delete mode 100644 include/net/udplite.h

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 1cbf6b4d3aab..ce56ebcee5cb 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -40,8 +40,6 @@ enum {
 	UDP_FLAGS_ACCEPT_FRAGLIST,
 	UDP_FLAGS_ACCEPT_L4,
 	UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */
-	UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */
-	UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */
 };
 
 /* per NUMA structure for lockless producer usage. */
@@ -74,11 +72,7 @@ struct udp_sock {
 	 */
 	__u16		 len;		/* total length of pending frames */
 	__u16		 gso_size;
-	/*
-	 * Fields specific to UDP-Lite.
-	 */
-	__u16		 pcslen;
-	__u16		 pcrlen;
+
 	/*
 	 * For encapsulation sockets.
 	 */
@@ -236,8 +230,6 @@ static inline void udp_allow_gso(struct sock *sk)
 	hlist_nulls_for_each_entry_rcu(__up, node, list, udp_lrpa_node)
 #endif
 
-#define IS_UDPLITE(__sk) (unlikely(__sk->sk_protocol == IPPROTO_UDPLITE))
-
 static inline struct sock *udp_tunnel_sk(const struct net *net, bool is_ipv6)
 {
 #if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
diff --git a/include/net/udplite.h b/include/net/udplite.h
deleted file mode 100644
index 6bfa1d6833d1..000000000000
--- a/include/net/udplite.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *	Definitions for the UDP-Lite (RFC 3828) code.
- */
-#ifndef _UDPLITE_H
-#define _UDPLITE_H
-
-#include <net/ip6_checksum.h>
-#include <net/udp.h>
-
-/* UDP-Lite socket options */
-#define UDPLITE_SEND_CSCOV   10 /* sender partial coverage (as sent)      */
-#define UDPLITE_RECV_CSCOV   11 /* receiver partial coverage (threshold ) */
-
-#endif	/* _UDPLITE_H */
diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index edca3e430305..877fb02df8fb 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -29,6 +29,8 @@ struct udphdr {
 
 /* UDP socket options */
 #define UDP_CORK	1	/* Never send partially complete segments */
+/* Deprecated, reserved for UDPLITE_SEND_CSCOV 10 */
+/* Deprecated, reserved for UDPLITE_RECV_CSCOV 11 */
 #define UDP_ENCAP	100	/* Set the socket to accept encapsulated packets */
 #define UDP_NO_CHECK6_TX 101	/* Disable sending checksum for UDP6X */
 #define UDP_NO_CHECK6_RX 102	/* Disable accepting checksum for UDP6 */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9a2c8ff96e83..d47ca721ef0d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -117,7 +117,6 @@
 #include <net/sock_reuseport.h>
 #include <net/addrconf.h>
 #include <net/udp_tunnel.h>
-#include <net/udplite.h>
 #include <net/gro.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6_stubs.h>
@@ -2924,7 +2923,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 	struct udp_sock *up = udp_sk(sk);
 	int val, valbool;
 	int err = 0;
-	int is_udplite = IS_UDPLITE(sk);
 
 	if (level == SOL_SOCKET) {
 		err = sk_setsockopt(sk, level, optname, optval, optlen);
@@ -3011,36 +3009,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		sockopt_release_sock(sk);
 		break;
 
-	/*
-	 * 	UDP-Lite's partial checksum coverage (RFC 3828).
-	 */
-	/* The sender sets actual checksum coverage length via this option.
-	 * The case coverage > packet length is handled by send module. */
-	case UDPLITE_SEND_CSCOV:
-		if (!is_udplite)         /* Disable the option on UDP sockets */
-			return -ENOPROTOOPT;
-		if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
-			val = 8;
-		else if (val > USHRT_MAX)
-			val = USHRT_MAX;
-		WRITE_ONCE(up->pcslen, val);
-		udp_set_bit(UDPLITE_SEND_CC, sk);
-		break;
-
-	/* The receiver specifies a minimum checksum coverage value. To make
-	 * sense, this should be set to at least 8 (as done below). If zero is
-	 * used, this again means full checksum coverage.                     */
-	case UDPLITE_RECV_CSCOV:
-		if (!is_udplite)         /* Disable the option on UDP sockets */
-			return -ENOPROTOOPT;
-		if (val != 0 && val < 8) /* Avoid silly minimal values.       */
-			val = 8;
-		else if (val > USHRT_MAX)
-			val = USHRT_MAX;
-		WRITE_ONCE(up->pcrlen, val);
-		udp_set_bit(UDPLITE_RECV_CC, sk);
-		break;
-
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -3053,7 +3021,7 @@ EXPORT_IPV6_MOD(udp_lib_setsockopt);
 static int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
 			  unsigned int optlen)
 {
-	if (level == SOL_UDP  ||  level == SOL_UDPLITE || level == SOL_SOCKET)
+	if (level == SOL_UDP || level == SOL_SOCKET)
 		return udp_lib_setsockopt(sk, level, optname,
 					  optval, optlen,
 					  udp_push_pending_frames);
@@ -3099,16 +3067,6 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 		val = udp_test_bit(GRO_ENABLED, sk);
 		break;
 
-	/* The following two cannot be changed on UDP sockets, the return is
-	 * always 0 (which corresponds to the full checksum coverage of UDP). */
-	case UDPLITE_SEND_CSCOV:
-		val = READ_ONCE(up->pcslen);
-		break;
-
-	case UDPLITE_RECV_CSCOV:
-		val = READ_ONCE(up->pcrlen);
-		break;
-
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -3124,7 +3082,7 @@ EXPORT_IPV6_MOD(udp_lib_getsockopt);
 static int udp_getsockopt(struct sock *sk, int level, int optname,
 			  char __user *optval, int __user *optlen)
 {
-	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
+	if (level == SOL_UDP)
 		return udp_lib_getsockopt(sk, level, optname, optval, optlen);
 	return ip_getsockopt(sk, level, optname, optval, optlen);
 }
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 8bb68a0cdfd6..e1a594873675 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <net/ip.h>
+#include <net/ip6_checksum.h>
 #include <net/udp.h>
-#include <net/udplite.h>
 #include <asm/checksum.h>
 
 #ifndef _HAVE_ARCH_IPV6_CSUM
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 511e3f898be5..c3d8b5ede164 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -58,7 +58,6 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <trace/events/skb.h>
-#include <net/udplite.h>
 
 static void udpv6_destruct_sock(struct sock *sk)
 {
@@ -1831,7 +1830,7 @@ static void udpv6_destroy_sock(struct sock *sk)
 static int udpv6_setsockopt(struct sock *sk, int level, int optname,
 			    sockptr_t optval, unsigned int optlen)
 {
-	if (level == SOL_UDP  ||  level == SOL_UDPLITE || level == SOL_SOCKET)
+	if (level == SOL_UDP || level == SOL_SOCKET)
 		return udp_lib_setsockopt(sk, level, optname,
 					  optval, optlen,
 					  udp_v6_push_pending_frames);
@@ -1841,7 +1840,7 @@ static int udpv6_setsockopt(struct sock *sk, int level, int optname,
 static int udpv6_getsockopt(struct sock *sk, int level, int optname,
 			    char __user *optval, int __user *optlen)
 {
-	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
+	if (level == SOL_UDP)
 		return udp_lib_getsockopt(sk, level, optname, optval, optlen);
 	return ipv6_getsockopt(sk, level, optname, optval, optlen);
 }
-- 
cgit v1.2.3


From 203247c5cb972af5d46bdb7d41ef40078048810b Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 13 Mar 2026 07:47:00 -0700
Subject: blk-integrity: support arbitrary buffer alignment

A bio segment may have partial interval block data with the rest
continuing into the next segments because direct-io data payloads only
need to align in memory to the device's DMA limits.

At the same time, the protection information may also be split in
multiple segments. The most likely way that may happen is if two
requests merge, or if we're directly using the io_uring user metadata.
The generate/verify, however, only ever accessed the first bip_vec.

Further, it may be possible to unalign the protection fields from the
user space buffer, or if there are odd additional opaque bytes in front
or in back of the protection information metadata region.

Change up the iteration to allow spanning multiple segments. This patch
is mostly a re-write of the protection information handling to allow any
arbitrary alignments, so it's probably easier to review the end result
rather than the diff.

Many controllers are not able to handle interval data composed of
multiple segments when PI is used, so this patch introduces a new
integrity limit that a low level driver can set to notify that it is
capable, default to false. The nvme driver is the first one to enable it
in this patch. Everyone else will force DMA alignment to the logical
block size as before to ensure interval data is always aligned within a
single segment.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Link: https://patch.msgid.link/20260313144701.1221652-2-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-settings.c          |  12 +-
 block/t10-pi.c                | 816 +++++++++++++++++++++++-------------------
 drivers/nvme/host/core.c      |   1 +
 include/linux/blk-integrity.h |   1 +
 4 files changed, 465 insertions(+), 365 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index dabfab97fbab..78c83817b9d3 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -189,11 +189,11 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
 	}
 
 	/*
-	 * The PI generation / validation helpers do not expect intervals to
-	 * straddle multiple bio_vecs.  Enforce alignment so that those are
+	 * Some IO controllers can not handle data intervals straddling
+	 * multiple bio_vecs.  For those, enforce alignment so that those are
 	 * never generated, and that each buffer is aligned as expected.
 	 */
-	if (bi->csum_type) {
+	if (!(bi->flags & BLK_SPLIT_INTERVAL_CAPABLE) && bi->csum_type) {
 		lim->dma_alignment = max(lim->dma_alignment,
 					(1U << bi->interval_exp) - 1);
 	}
@@ -992,10 +992,14 @@ bool queue_limits_stack_integrity(struct queue_limits *t,
 		if ((ti->flags & BLK_INTEGRITY_REF_TAG) !=
 		    (bi->flags & BLK_INTEGRITY_REF_TAG))
 			goto incompatible;
+		if ((ti->flags & BLK_SPLIT_INTERVAL_CAPABLE) &&
+		    !(bi->flags & BLK_SPLIT_INTERVAL_CAPABLE))
+			ti->flags &= ~BLK_SPLIT_INTERVAL_CAPABLE;
 	} else {
 		ti->flags = BLK_INTEGRITY_STACKED;
 		ti->flags |= (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) |
-			     (bi->flags & BLK_INTEGRITY_REF_TAG);
+			     (bi->flags & BLK_INTEGRITY_REF_TAG) |
+			     (bi->flags & BLK_SPLIT_INTERVAL_CAPABLE);
 		ti->csum_type = bi->csum_type;
 		ti->pi_tuple_size = bi->pi_tuple_size;
 		ti->metadata_size = bi->metadata_size;
diff --git a/block/t10-pi.c b/block/t10-pi.c
index d27be6041fd3..a19b4e102a83 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -12,462 +12,556 @@
 #include <linux/unaligned.h>
 #include "blk.h"
 
+#define APP_TAG_ESCAPE 0xffff
+#define REF_TAG_ESCAPE 0xffffffff
+
+/*
+ * This union is used for onstack allocations when the pi field is split across
+ * segments. blk_validate_integrity_limits() guarantees pi_tuple_size matches
+ * the sizeof one of these two types.
+ */
+union pi_tuple {
+	struct crc64_pi_tuple	crc64_pi;
+	struct t10_pi_tuple	t10_pi;
+};
+
 struct blk_integrity_iter {
-	void			*prot_buf;
-	void			*data_buf;
-	sector_t		seed;
-	unsigned int		data_size;
-	unsigned short		interval;
-	const char		*disk_name;
+	struct bio			*bio;
+	struct bio_integrity_payload	*bip;
+	struct blk_integrity		*bi;
+	struct bvec_iter		data_iter;
+	struct bvec_iter		prot_iter;
+	unsigned int			interval_remaining;
+	u64				seed;
+	u64				csum;
 };
 
-static __be16 t10_pi_csum(__be16 csum, void *data, unsigned int len,
-		unsigned char csum_type)
+static void blk_calculate_guard(struct blk_integrity_iter *iter, void *data,
+				unsigned int len)
 {
-	if (csum_type == BLK_INTEGRITY_CSUM_IP)
-		return (__force __be16)ip_compute_csum(data, len);
-	return cpu_to_be16(crc_t10dif_update(be16_to_cpu(csum), data, len));
+	switch (iter->bi->csum_type) {
+	case BLK_INTEGRITY_CSUM_CRC64:
+		iter->csum = crc64_nvme(iter->csum, data, len);
+		break;
+	case BLK_INTEGRITY_CSUM_CRC:
+		iter->csum = crc_t10dif_update(iter->csum, data, len);
+		break;
+	case BLK_INTEGRITY_CSUM_IP:
+		iter->csum = (__force u32)csum_partial(data, len,
+						(__force __wsum)iter->csum);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		iter->csum = U64_MAX;
+		break;
+	}
+}
+
+static void blk_integrity_csum_finish(struct blk_integrity_iter *iter)
+{
+	switch (iter->bi->csum_type) {
+	case BLK_INTEGRITY_CSUM_IP:
+		iter->csum = (__force u16)csum_fold((__force __wsum)iter->csum);
+		break;
+	default:
+		break;
+	}
 }
 
 /*
- * Type 1 and Type 2 protection use the same format: 16 bit guard tag,
- * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
- * tag.
+ * Update the csum for formats that have metadata padding in front of the data
+ * integrity field
  */
-static void t10_pi_generate(struct blk_integrity_iter *iter,
-		struct blk_integrity *bi)
+static void blk_integrity_csum_offset(struct blk_integrity_iter *iter)
 {
-	u8 offset = bi->pi_offset;
-	unsigned int i;
+	unsigned int offset = iter->bi->pi_offset;
+	struct bio_vec *bvec = iter->bip->bip_vec;
+
+	while (offset > 0) {
+		struct bio_vec pbv = bvec_iter_bvec(bvec, iter->prot_iter);
+		unsigned int len = min(pbv.bv_len, offset);
+		void *prot_buf = bvec_kmap_local(&pbv);
+
+		blk_calculate_guard(iter, prot_buf, len);
+		kunmap_local(prot_buf);
+		offset -= len;
+		bvec_iter_advance_single(bvec, &iter->prot_iter, len);
+	}
+	blk_integrity_csum_finish(iter);
+}
 
-	for (i = 0 ; i < iter->data_size ; i += iter->interval) {
-		struct t10_pi_tuple *pi = iter->prot_buf + offset;
+static void blk_integrity_copy_from_tuple(struct bio_integrity_payload *bip,
+					  struct bvec_iter *iter, void *tuple,
+					  unsigned int tuple_size)
+{
+	while (tuple_size) {
+		struct bio_vec pbv = bvec_iter_bvec(bip->bip_vec, *iter);
+		unsigned int len = min(tuple_size, pbv.bv_len);
+		void *prot_buf = bvec_kmap_local(&pbv);
+
+		memcpy(prot_buf, tuple, len);
+		kunmap_local(prot_buf);
+		bvec_iter_advance_single(bip->bip_vec, iter, len);
+		tuple_size -= len;
+		tuple += len;
+	}
+}
 
-		pi->guard_tag = t10_pi_csum(0, iter->data_buf, iter->interval,
-				bi->csum_type);
-		if (offset)
-			pi->guard_tag = t10_pi_csum(pi->guard_tag,
-					iter->prot_buf, offset, bi->csum_type);
-		pi->app_tag = 0;
+static void blk_integrity_copy_to_tuple(struct bio_integrity_payload *bip,
+					struct bvec_iter *iter, void *tuple,
+					unsigned int tuple_size)
+{
+	while (tuple_size) {
+		struct bio_vec pbv = bvec_iter_bvec(bip->bip_vec, *iter);
+		unsigned int len = min(tuple_size, pbv.bv_len);
+		void *prot_buf = bvec_kmap_local(&pbv);
+
+		memcpy(tuple, prot_buf, len);
+		kunmap_local(prot_buf);
+		bvec_iter_advance_single(bip->bip_vec, iter, len);
+		tuple_size -= len;
+		tuple += len;
+	}
+}
 
-		if (bi->flags & BLK_INTEGRITY_REF_TAG)
-			pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed));
-		else
-			pi->ref_tag = 0;
+static bool ext_pi_ref_escape(const u8 ref_tag[6])
+{
+	static const u8 ref_escape[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
-		iter->data_buf += iter->interval;
-		iter->prot_buf += bi->metadata_size;
-		iter->seed++;
-	}
+	return memcmp(ref_tag, ref_escape, sizeof(ref_escape)) == 0;
 }
 
-static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
-		struct blk_integrity *bi)
-{
-	u8 offset = bi->pi_offset;
-	unsigned int i;
-
-	for (i = 0 ; i < iter->data_size ; i += iter->interval) {
-		struct t10_pi_tuple *pi = iter->prot_buf + offset;
-		__be16 csum;
-
-		if (bi->flags & BLK_INTEGRITY_REF_TAG) {
-			if (pi->app_tag == T10_PI_APP_ESCAPE)
-				goto next;
-
-			if (be32_to_cpu(pi->ref_tag) !=
-			    lower_32_bits(iter->seed)) {
-				pr_err("%s: ref tag error at location %llu " \
-				       "(rcvd %u)\n", iter->disk_name,
-				       (unsigned long long)
-				       iter->seed, be32_to_cpu(pi->ref_tag));
-				return BLK_STS_PROTECTION;
-			}
-		} else {
-			if (pi->app_tag == T10_PI_APP_ESCAPE &&
-			    pi->ref_tag == T10_PI_REF_ESCAPE)
-				goto next;
+static blk_status_t blk_verify_ext_pi(struct blk_integrity_iter *iter,
+				      struct crc64_pi_tuple *pi)
+{
+	u64 seed = lower_48_bits(iter->seed);
+	u64 guard = get_unaligned_be64(&pi->guard_tag);
+	u64 ref = get_unaligned_be48(pi->ref_tag);
+	u16 app = get_unaligned_be16(&pi->app_tag);
+
+	if (iter->bi->flags & BLK_INTEGRITY_REF_TAG) {
+		if (app == APP_TAG_ESCAPE)
+			return BLK_STS_OK;
+		if (ref != seed) {
+			pr_err("%s: ref tag error at location %llu (rcvd %llu)\n",
+				iter->bio->bi_bdev->bd_disk->disk_name, seed,
+				ref);
+			return BLK_STS_PROTECTION;
 		}
+	} else if (app == APP_TAG_ESCAPE && ext_pi_ref_escape(pi->ref_tag)) {
+		return BLK_STS_OK;
+	}
+
+	if (guard != iter->csum) {
+		pr_err("%s: guard tag error at sector %llu (rcvd %016llx, want %016llx)\n",
+			iter->bio->bi_bdev->bd_disk->disk_name, iter->seed,
+			guard, iter->csum);
+		return BLK_STS_PROTECTION;
+	}
+
+	return BLK_STS_OK;
+}
 
-		csum = t10_pi_csum(0, iter->data_buf, iter->interval,
-				bi->csum_type);
-		if (offset)
-			csum = t10_pi_csum(csum, iter->prot_buf, offset,
-					bi->csum_type);
-
-		if (pi->guard_tag != csum) {
-			pr_err("%s: guard tag error at sector %llu " \
-			       "(rcvd %04x, want %04x)\n", iter->disk_name,
-			       (unsigned long long)iter->seed,
-			       be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
+static blk_status_t blk_verify_pi(struct blk_integrity_iter *iter,
+				      struct t10_pi_tuple *pi, u16 guard)
+{
+	u32 seed = lower_32_bits(iter->seed);
+	u32 ref = get_unaligned_be32(&pi->ref_tag);
+	u16 app = get_unaligned_be16(&pi->app_tag);
+
+	if (iter->bi->flags & BLK_INTEGRITY_REF_TAG) {
+		if (app == APP_TAG_ESCAPE)
+			return BLK_STS_OK;
+		if (ref != seed) {
+			pr_err("%s: ref tag error at location %u (rcvd %u)\n",
+				iter->bio->bi_bdev->bd_disk->disk_name, seed,
+				ref);
 			return BLK_STS_PROTECTION;
 		}
+	} else if (app == APP_TAG_ESCAPE && ref == REF_TAG_ESCAPE) {
+		return BLK_STS_OK;
+	}
 
-next:
-		iter->data_buf += iter->interval;
-		iter->prot_buf += bi->metadata_size;
-		iter->seed++;
+	if (guard != (u16)iter->csum) {
+		pr_err("%s: guard tag error at sector %llu (rcvd %04x, want %04x)\n",
+			iter->bio->bi_bdev->bd_disk->disk_name, iter->seed,
+			guard, (u16)iter->csum);
+		return BLK_STS_PROTECTION;
 	}
 
 	return BLK_STS_OK;
 }
 
-/**
- * t10_pi_type1_prepare - prepare PI prior submitting request to device
- * @rq:              request with PI that should be prepared
- *
- * For Type 1/Type 2, the virtual start sector is the one that was
- * originally submitted by the block layer for the ref_tag usage. Due to
- * partitioning, MD/DM cloning, etc. the actual physical start sector is
- * likely to be different. Remap protection information to match the
- * physical LBA.
- */
-static void t10_pi_type1_prepare(struct request *rq)
+static blk_status_t blk_verify_t10_pi(struct blk_integrity_iter *iter,
+				      struct t10_pi_tuple *pi)
 {
-	struct blk_integrity *bi = &rq->q->limits.integrity;
-	const int tuple_sz = bi->metadata_size;
-	u32 ref_tag = t10_pi_ref_tag(rq);
-	u8 offset = bi->pi_offset;
-	struct bio *bio;
+	u16 guard = get_unaligned_be16(&pi->guard_tag);
 
-	__rq_for_each_bio(bio, rq) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-		u32 virt = bip_get_seed(bip) & 0xffffffff;
-		struct bio_vec iv;
-		struct bvec_iter iter;
+	return blk_verify_pi(iter, pi, guard);
+}
 
-		/* Already remapped? */
-		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
-			break;
+static blk_status_t blk_verify_ip_pi(struct blk_integrity_iter *iter,
+				     struct t10_pi_tuple *pi)
+{
+	u16 guard = get_unaligned((u16 *)&pi->guard_tag);
 
-		bip_for_each_vec(iv, bip, iter) {
-			unsigned int j;
-			void *p;
-
-			p = bvec_kmap_local(&iv);
-			for (j = 0; j < iv.bv_len; j += tuple_sz) {
-				struct t10_pi_tuple *pi = p + offset;
-
-				if (be32_to_cpu(pi->ref_tag) == virt)
-					pi->ref_tag = cpu_to_be32(ref_tag);
-				virt++;
-				ref_tag++;
-				p += tuple_sz;
-			}
-			kunmap_local(p);
-		}
+	return blk_verify_pi(iter, pi, guard);
+}
 
-		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
+static blk_status_t blk_integrity_verify(struct blk_integrity_iter *iter,
+					 union pi_tuple *tuple)
+{
+	switch (iter->bi->csum_type) {
+	case BLK_INTEGRITY_CSUM_CRC64:
+		return blk_verify_ext_pi(iter, &tuple->crc64_pi);
+	case BLK_INTEGRITY_CSUM_CRC:
+		return blk_verify_t10_pi(iter, &tuple->t10_pi);
+	case BLK_INTEGRITY_CSUM_IP:
+		return blk_verify_ip_pi(iter, &tuple->t10_pi);
+	default:
+		return BLK_STS_OK;
 	}
 }
 
-/**
- * t10_pi_type1_complete - prepare PI prior returning request to the blk layer
- * @rq:              request with PI that should be prepared
- * @nr_bytes:        total bytes to prepare
- *
- * For Type 1/Type 2, the virtual start sector is the one that was
- * originally submitted by the block layer for the ref_tag usage. Due to
- * partitioning, MD/DM cloning, etc. the actual physical start sector is
- * likely to be different. Since the physical start sector was submitted
- * to the device, we should remap it back to virtual values expected by the
- * block layer.
- */
-static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
+static void blk_set_ext_pi(struct blk_integrity_iter *iter,
+			   struct crc64_pi_tuple *pi)
 {
-	struct blk_integrity *bi = &rq->q->limits.integrity;
-	unsigned intervals = nr_bytes >> bi->interval_exp;
-	const int tuple_sz = bi->metadata_size;
-	u32 ref_tag = t10_pi_ref_tag(rq);
-	u8 offset = bi->pi_offset;
-	struct bio *bio;
+	put_unaligned_be64(iter->csum, &pi->guard_tag);
+	put_unaligned_be16(0, &pi->app_tag);
+	put_unaligned_be48(iter->seed, &pi->ref_tag);
+}
 
-	__rq_for_each_bio(bio, rq) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-		u32 virt = bip_get_seed(bip) & 0xffffffff;
-		struct bio_vec iv;
-		struct bvec_iter iter;
-
-		bip_for_each_vec(iv, bip, iter) {
-			unsigned int j;
-			void *p;
-
-			p = bvec_kmap_local(&iv);
-			for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
-				struct t10_pi_tuple *pi = p + offset;
-
-				if (be32_to_cpu(pi->ref_tag) == ref_tag)
-					pi->ref_tag = cpu_to_be32(virt);
-				virt++;
-				ref_tag++;
-				intervals--;
-				p += tuple_sz;
-			}
-			kunmap_local(p);
-		}
-	}
+static void blk_set_pi(struct blk_integrity_iter *iter,
+		       struct t10_pi_tuple *pi, __be16 csum)
+{
+	put_unaligned(csum, &pi->guard_tag);
+	put_unaligned_be16(0, &pi->app_tag);
+	put_unaligned_be32(iter->seed, &pi->ref_tag);
 }
 
-static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len)
+static void blk_set_t10_pi(struct blk_integrity_iter *iter,
+			   struct t10_pi_tuple *pi)
 {
-	return cpu_to_be64(crc64_nvme(crc, data, len));
+	blk_set_pi(iter, pi, cpu_to_be16((u16)iter->csum));
 }
 
-static void ext_pi_crc64_generate(struct blk_integrity_iter *iter,
-		struct blk_integrity *bi)
+static void blk_set_ip_pi(struct blk_integrity_iter *iter,
+			  struct t10_pi_tuple *pi)
 {
-	u8 offset = bi->pi_offset;
-	unsigned int i;
+	blk_set_pi(iter, pi, (__force __be16)(u16)iter->csum);
+}
 
-	for (i = 0 ; i < iter->data_size ; i += iter->interval) {
-		struct crc64_pi_tuple *pi = iter->prot_buf + offset;
+static void blk_integrity_set(struct blk_integrity_iter *iter,
+			      union pi_tuple *tuple)
+{
+	switch (iter->bi->csum_type) {
+	case BLK_INTEGRITY_CSUM_CRC64:
+		return blk_set_ext_pi(iter, &tuple->crc64_pi);
+	case BLK_INTEGRITY_CSUM_CRC:
+		return blk_set_t10_pi(iter, &tuple->t10_pi);
+	case BLK_INTEGRITY_CSUM_IP:
+		return blk_set_ip_pi(iter, &tuple->t10_pi);
+	default:
+		WARN_ON_ONCE(1);
+		return;
+	}
+}
 
-		pi->guard_tag = ext_pi_crc64(0, iter->data_buf, iter->interval);
-		if (offset)
-			pi->guard_tag = ext_pi_crc64(be64_to_cpu(pi->guard_tag),
-					iter->prot_buf, offset);
-		pi->app_tag = 0;
+static blk_status_t blk_integrity_interval(struct blk_integrity_iter *iter,
+					   bool verify)
+{
+	blk_status_t ret = BLK_STS_OK;
+	union pi_tuple tuple;
+	void *ptuple = &tuple;
+	struct bio_vec pbv;
+
+	blk_integrity_csum_offset(iter);
+	pbv = bvec_iter_bvec(iter->bip->bip_vec, iter->prot_iter);
+	if (pbv.bv_len >= iter->bi->pi_tuple_size) {
+		ptuple = bvec_kmap_local(&pbv);
+		bvec_iter_advance_single(iter->bip->bip_vec, &iter->prot_iter,
+				iter->bi->metadata_size - iter->bi->pi_offset);
+	} else if (verify) {
+		blk_integrity_copy_to_tuple(iter->bip, &iter->prot_iter,
+				ptuple, iter->bi->pi_tuple_size);
+	}
 
-		if (bi->flags & BLK_INTEGRITY_REF_TAG)
-			put_unaligned_be48(iter->seed, pi->ref_tag);
-		else
-			put_unaligned_be48(0ULL, pi->ref_tag);
+	if (verify)
+		ret = blk_integrity_verify(iter, ptuple);
+	else
+		blk_integrity_set(iter, ptuple);
 
-		iter->data_buf += iter->interval;
-		iter->prot_buf += bi->metadata_size;
-		iter->seed++;
+	if (likely(ptuple != &tuple)) {
+		kunmap_local(ptuple);
+	} else if (!verify) {
+		blk_integrity_copy_from_tuple(iter->bip, &iter->prot_iter,
+				ptuple, iter->bi->pi_tuple_size);
 	}
+
+	iter->interval_remaining = 1 << iter->bi->interval_exp;
+	iter->csum = 0;
+	iter->seed++;
+	return ret;
 }
 
-static bool ext_pi_ref_escape(const u8 ref_tag[6])
+static blk_status_t blk_integrity_iterate(struct bio *bio,
+					  struct bvec_iter *data_iter,
+					  bool verify)
 {
-	static const u8 ref_escape[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+	struct blk_integrity_iter iter = {
+		.bio = bio,
+		.bip = bip,
+		.bi = bi,
+		.data_iter = *data_iter,
+		.prot_iter = bip->bip_iter,
+		.interval_remaining = 1 << bi->interval_exp,
+		.seed = data_iter->bi_sector,
+		.csum = 0,
+	};
+	blk_status_t ret = BLK_STS_OK;
+
+	while (iter.data_iter.bi_size && ret == BLK_STS_OK) {
+		struct bio_vec bv = bvec_iter_bvec(iter.bio->bi_io_vec,
+						   iter.data_iter);
+		void *kaddr = bvec_kmap_local(&bv);
+		void *data = kaddr;
+		unsigned int len;
+
+		bvec_iter_advance_single(iter.bio->bi_io_vec, &iter.data_iter,
+					 bv.bv_len);
+		while (bv.bv_len && ret == BLK_STS_OK) {
+			len = min(iter.interval_remaining, bv.bv_len);
+			blk_calculate_guard(&iter, data, len);
+			bv.bv_len -= len;
+			data += len;
+			iter.interval_remaining -= len;
+			if (!iter.interval_remaining)
+				ret = blk_integrity_interval(&iter, verify);
+		}
+		kunmap_local(kaddr);
+	}
 
-	return memcmp(ref_tag, ref_escape, sizeof(ref_escape)) == 0;
+	return ret;
 }
 
-static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
-		struct blk_integrity *bi)
-{
-	u8 offset = bi->pi_offset;
-	unsigned int i;
-
-	for (i = 0; i < iter->data_size; i += iter->interval) {
-		struct crc64_pi_tuple *pi = iter->prot_buf + offset;
-		u64 ref, seed;
-		__be64 csum;
-
-		if (bi->flags & BLK_INTEGRITY_REF_TAG) {
-			if (pi->app_tag == T10_PI_APP_ESCAPE)
-				goto next;
-
-			ref = get_unaligned_be48(pi->ref_tag);
-			seed = lower_48_bits(iter->seed);
-			if (ref != seed) {
-				pr_err("%s: ref tag error at location %llu (rcvd %llu)\n",
-					iter->disk_name, seed, ref);
-				return BLK_STS_PROTECTION;
-			}
-		} else {
-			if (pi->app_tag == T10_PI_APP_ESCAPE &&
-			    ext_pi_ref_escape(pi->ref_tag))
-				goto next;
-		}
+void bio_integrity_generate(struct bio *bio)
+{
+	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
 
-		csum = ext_pi_crc64(0, iter->data_buf, iter->interval);
-		if (offset)
-			csum = ext_pi_crc64(be64_to_cpu(csum), iter->prot_buf,
-					    offset);
+	switch (bi->csum_type) {
+	case BLK_INTEGRITY_CSUM_CRC64:
+	case BLK_INTEGRITY_CSUM_CRC:
+	case BLK_INTEGRITY_CSUM_IP:
+		blk_integrity_iterate(bio, &bio->bi_iter, false);
+		break;
+	default:
+		break;
+	}
+}
 
-		if (pi->guard_tag != csum) {
-			pr_err("%s: guard tag error at sector %llu " \
-			       "(rcvd %016llx, want %016llx)\n",
-				iter->disk_name, (unsigned long long)iter->seed,
-				be64_to_cpu(pi->guard_tag), be64_to_cpu(csum));
-			return BLK_STS_PROTECTION;
-		}
+blk_status_t bio_integrity_verify(struct bio *bio, struct bvec_iter *saved_iter)
+{
+	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
 
-next:
-		iter->data_buf += iter->interval;
-		iter->prot_buf += bi->metadata_size;
-		iter->seed++;
+	switch (bi->csum_type) {
+	case BLK_INTEGRITY_CSUM_CRC64:
+	case BLK_INTEGRITY_CSUM_CRC:
+	case BLK_INTEGRITY_CSUM_IP:
+		return blk_integrity_iterate(bio, saved_iter, true);
+	default:
+		break;
 	}
 
 	return BLK_STS_OK;
 }
 
-static void ext_pi_type1_prepare(struct request *rq)
+/*
+ * Advance @iter past the protection offset for protection formats that
+ * contain front padding on the metadata region.
+ */
+static void blk_pi_advance_offset(struct blk_integrity *bi,
+				  struct bio_integrity_payload *bip,
+				  struct bvec_iter *iter)
 {
-	struct blk_integrity *bi = &rq->q->limits.integrity;
-	const int tuple_sz = bi->metadata_size;
-	u64 ref_tag = ext_pi_ref_tag(rq);
-	u8 offset = bi->pi_offset;
-	struct bio *bio;
+	unsigned int offset = bi->pi_offset;
 
-	__rq_for_each_bio(bio, rq) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-		u64 virt = lower_48_bits(bip_get_seed(bip));
-		struct bio_vec iv;
-		struct bvec_iter iter;
+	while (offset > 0) {
+		struct bio_vec bv = mp_bvec_iter_bvec(bip->bip_vec, *iter);
+		unsigned int len = min(bv.bv_len, offset);
 
-		/* Already remapped? */
-		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
-			break;
+		bvec_iter_advance_single(bip->bip_vec, iter, len);
+		offset -= len;
+	}
+}
 
-		bip_for_each_vec(iv, bip, iter) {
-			unsigned int j;
-			void *p;
-
-			p = bvec_kmap_local(&iv);
-			for (j = 0; j < iv.bv_len; j += tuple_sz) {
-				struct crc64_pi_tuple *pi = p +  offset;
-				u64 ref = get_unaligned_be48(pi->ref_tag);
-
-				if (ref == virt)
-					put_unaligned_be48(ref_tag, pi->ref_tag);
-				virt++;
-				ref_tag++;
-				p += tuple_sz;
-			}
-			kunmap_local(p);
-		}
+static void *blk_tuple_remap_begin(union pi_tuple *tuple,
+				   struct blk_integrity *bi,
+				   struct bio_integrity_payload *bip,
+				   struct bvec_iter *iter)
+{
+	struct bvec_iter titer;
+	struct bio_vec pbv;
 
-		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
+	blk_pi_advance_offset(bi, bip, iter);
+	pbv = bvec_iter_bvec(bip->bip_vec, *iter);
+	if (likely(pbv.bv_len >= bi->pi_tuple_size))
+		return bvec_kmap_local(&pbv);
+
+	/*
+	 * We need to preserve the state of the original iter for the
+	 * copy_from_tuple at the end, so make a temp iter for here.
+	 */
+	titer = *iter;
+	blk_integrity_copy_to_tuple(bip, &titer, tuple, bi->pi_tuple_size);
+	return tuple;
+}
+
+static void blk_tuple_remap_end(union pi_tuple *tuple, void *ptuple,
+				struct blk_integrity *bi,
+				struct bio_integrity_payload *bip,
+				struct bvec_iter *iter)
+{
+	unsigned int len = bi->metadata_size - bi->pi_offset;
+
+	if (likely(ptuple != tuple)) {
+		kunmap_local(ptuple);
+	} else {
+		blk_integrity_copy_from_tuple(bip, iter, ptuple,
+				bi->pi_tuple_size);
+		len -= bi->pi_tuple_size;
 	}
+
+	bvec_iter_advance(bip->bip_vec, iter, len);
 }
 
-static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
+static void blk_set_ext_unmap_ref(struct crc64_pi_tuple *pi, u64 virt,
+				  u64 ref_tag)
 {
-	struct blk_integrity *bi = &rq->q->limits.integrity;
-	unsigned intervals = nr_bytes >> bi->interval_exp;
-	const int tuple_sz = bi->metadata_size;
-	u64 ref_tag = ext_pi_ref_tag(rq);
-	u8 offset = bi->pi_offset;
-	struct bio *bio;
+	u64 ref = get_unaligned_be48(&pi->ref_tag);
 
-	__rq_for_each_bio(bio, rq) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-		u64 virt = lower_48_bits(bip_get_seed(bip));
-		struct bio_vec iv;
-		struct bvec_iter iter;
-
-		bip_for_each_vec(iv, bip, iter) {
-			unsigned int j;
-			void *p;
-
-			p = bvec_kmap_local(&iv);
-			for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
-				struct crc64_pi_tuple *pi = p + offset;
-				u64 ref = get_unaligned_be48(pi->ref_tag);
-
-				if (ref == ref_tag)
-					put_unaligned_be48(virt, pi->ref_tag);
-				virt++;
-				ref_tag++;
-				intervals--;
-				p += tuple_sz;
-			}
-			kunmap_local(p);
-		}
+	if (ref == lower_48_bits(ref_tag) && ref != lower_48_bits(virt))
+		put_unaligned_be48(virt, pi->ref_tag);
+}
+
+static void blk_set_t10_unmap_ref(struct t10_pi_tuple *pi, u32 virt,
+				  u32 ref_tag)
+{
+	u32 ref = get_unaligned_be32(&pi->ref_tag);
+
+	if (ref == ref_tag && ref != virt)
+		put_unaligned_be32(virt, &pi->ref_tag);
+}
+
+static void blk_reftag_remap_complete(struct blk_integrity *bi,
+				      union pi_tuple *tuple, u64 virt, u64 ref)
+{
+	switch (bi->csum_type) {
+	case BLK_INTEGRITY_CSUM_CRC64:
+		blk_set_ext_unmap_ref(&tuple->crc64_pi, virt, ref);
+		break;
+	case BLK_INTEGRITY_CSUM_CRC:
+	case BLK_INTEGRITY_CSUM_IP:
+		blk_set_t10_unmap_ref(&tuple->t10_pi, virt, ref);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
 	}
 }
 
-void bio_integrity_generate(struct bio *bio)
+static void blk_set_ext_map_ref(struct crc64_pi_tuple *pi, u64 virt,
+				u64 ref_tag)
 {
-	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
-	struct bio_integrity_payload *bip = bio_integrity(bio);
-	struct blk_integrity_iter iter;
-	struct bvec_iter bviter;
-	struct bio_vec bv;
-
-	iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
-	iter.interval = 1 << bi->interval_exp;
-	iter.seed = bio->bi_iter.bi_sector;
-	iter.prot_buf = bvec_virt(bip->bip_vec);
-	bio_for_each_segment(bv, bio, bviter) {
-		void *kaddr = bvec_kmap_local(&bv);
+	u64 ref = get_unaligned_be48(&pi->ref_tag);
 
-		iter.data_buf = kaddr;
-		iter.data_size = bv.bv_len;
-		switch (bi->csum_type) {
-		case BLK_INTEGRITY_CSUM_CRC64:
-			ext_pi_crc64_generate(&iter, bi);
-			break;
-		case BLK_INTEGRITY_CSUM_CRC:
-		case BLK_INTEGRITY_CSUM_IP:
-			t10_pi_generate(&iter, bi);
-			break;
-		default:
-			break;
-		}
-		kunmap_local(kaddr);
+	if (ref == lower_48_bits(virt) && ref != ref_tag)
+		put_unaligned_be48(ref_tag, pi->ref_tag);
+}
+
+static void blk_set_t10_map_ref(struct t10_pi_tuple *pi, u32 virt, u32 ref_tag)
+{
+	u32 ref = get_unaligned_be32(&pi->ref_tag);
+
+	if (ref == virt && ref != ref_tag)
+		put_unaligned_be32(ref_tag, &pi->ref_tag);
+}
+
+static void blk_reftag_remap_prepare(struct blk_integrity *bi,
+				     union pi_tuple *tuple,
+				     u64 virt, u64 ref)
+{
+	switch (bi->csum_type) {
+	case BLK_INTEGRITY_CSUM_CRC64:
+		blk_set_ext_map_ref(&tuple->crc64_pi, virt, ref);
+		break;
+	case BLK_INTEGRITY_CSUM_CRC:
+	case BLK_INTEGRITY_CSUM_IP:
+		blk_set_t10_map_ref(&tuple->t10_pi, virt, ref);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
 	}
 }
 
-blk_status_t bio_integrity_verify(struct bio *bio, struct bvec_iter *saved_iter)
+static void __blk_reftag_remap(struct bio *bio, struct blk_integrity *bi,
+			       unsigned *intervals, u64 *ref, bool prep)
 {
-	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
 	struct bio_integrity_payload *bip = bio_integrity(bio);
-	struct blk_integrity_iter iter;
-	struct bvec_iter bviter;
-	struct bio_vec bv;
+	struct bvec_iter iter = bip->bip_iter;
+	u64 virt = bip_get_seed(bip);
+	union pi_tuple *ptuple;
+	union pi_tuple tuple;
 
-	/*
-	 * At the moment verify is called bi_iter has been advanced during split
-	 * and completion, so use the copy created during submission here.
-	 */
-	iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
-	iter.interval = 1 << bi->interval_exp;
-	iter.seed = saved_iter->bi_sector;
-	iter.prot_buf = bvec_virt(bip->bip_vec);
-	__bio_for_each_segment(bv, bio, bviter, *saved_iter) {
-		void *kaddr = bvec_kmap_local(&bv);
-		blk_status_t ret = BLK_STS_OK;
+	if (prep && bip->bip_flags & BIP_MAPPED_INTEGRITY) {
+		*ref += bio->bi_iter.bi_size >> bi->interval_exp;
+		return;
+	}
 
-		iter.data_buf = kaddr;
-		iter.data_size = bv.bv_len;
-		switch (bi->csum_type) {
-		case BLK_INTEGRITY_CSUM_CRC64:
-			ret = ext_pi_crc64_verify(&iter, bi);
-			break;
-		case BLK_INTEGRITY_CSUM_CRC:
-		case BLK_INTEGRITY_CSUM_IP:
-			ret = t10_pi_verify(&iter, bi);
-			break;
-		default:
-			break;
-		}
-		kunmap_local(kaddr);
+	while (iter.bi_size && *intervals) {
+		ptuple = blk_tuple_remap_begin(&tuple, bi, bip, &iter);
+
+		if (prep)
+			blk_reftag_remap_prepare(bi, ptuple, virt, *ref);
+		else
+			blk_reftag_remap_complete(bi, ptuple, virt, *ref);
 
-		if (ret)
-			return ret;
+		blk_tuple_remap_end(&tuple, ptuple, bi, bip, &iter);
+		(*intervals)--;
+		(*ref)++;
+		virt++;
 	}
 
-	return BLK_STS_OK;
+	if (prep)
+		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
 }
 
-void blk_integrity_prepare(struct request *rq)
+static void blk_integrity_remap(struct request *rq, unsigned int nr_bytes,
+				bool prep)
 {
 	struct blk_integrity *bi = &rq->q->limits.integrity;
+	u64 ref = blk_rq_pos(rq) >> (bi->interval_exp - SECTOR_SHIFT);
+	unsigned intervals = nr_bytes >> bi->interval_exp;
+	struct bio *bio;
 
 	if (!(bi->flags & BLK_INTEGRITY_REF_TAG))
 		return;
 
-	if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64)
-		ext_pi_type1_prepare(rq);
-	else
-		t10_pi_type1_prepare(rq);
+	__rq_for_each_bio(bio, rq) {
+		__blk_reftag_remap(bio, bi, &intervals, &ref, prep);
+		if (!intervals)
+			break;
+	}
 }
 
-void blk_integrity_complete(struct request *rq, unsigned int nr_bytes)
+void blk_integrity_prepare(struct request *rq)
 {
-	struct blk_integrity *bi = &rq->q->limits.integrity;
-
-	if (!(bi->flags & BLK_INTEGRITY_REF_TAG))
-		return;
+	blk_integrity_remap(rq, blk_rq_bytes(rq), true);
+}
 
-	if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64)
-		ext_pi_type1_complete(rq, nr_bytes);
-	else
-		t10_pi_type1_complete(rq, nr_bytes);
+void blk_integrity_complete(struct request *rq, unsigned int nr_bytes)
+{
+	blk_integrity_remap(rq, nr_bytes, false);
 }
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 58bf432ec5e6..3de52f1d2723 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1875,6 +1875,7 @@ static bool nvme_init_integrity(struct nvme_ns_head *head,
 		break;
 	}
 
+	bi->flags |= BLK_SPLIT_INTERVAL_CAPABLE;
 	bi->metadata_size = head->ms;
 	if (bi->csum_type) {
 		bi->pi_tuple_size = head->pi_size;
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index ea6d7d322ae3..b1b530613c34 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -14,6 +14,7 @@ enum blk_integrity_flags {
 	BLK_INTEGRITY_DEVICE_CAPABLE	= 1 << 2,
 	BLK_INTEGRITY_REF_TAG		= 1 << 3,
 	BLK_INTEGRITY_STACKED		= 1 << 4,
+	BLK_SPLIT_INTERVAL_CAPABLE	= 1 << 5,
 };
 
 const char *blk_integrity_profile_name(struct blk_integrity *bi);
-- 
cgit v1.2.3


From 0e24d17bd9668f9dad78ede6a0e8f13dab176682 Mon Sep 17 00:00:00 2001
From: Simon Baatz <gmbnomis@gmail.com>
Date: Mon, 9 Mar 2026 09:02:26 +0100
Subject: tcp: implement RFC 7323 window retraction receiver requirements

By default, the Linux TCP implementation does not shrink the
advertised window (RFC 7323 calls this "window retraction") with the
following exceptions:

- When an incoming segment cannot be added due to the receive buffer
  running out of memory. Since commit 8c670bdfa58e ("tcp: correct
  handling of extreme memory squeeze") a zero window will be
  advertised in this case. It turns out that reaching the required
  memory pressure is easy when window scaling is in use. In the
  simplest case, sending a sufficient number of segments smaller than
  the scale factor to a receiver that does not read data is enough.

- Commit b650d953cd39 ("tcp: enforce receive buffer memory limits by
  allowing the tcp window to shrink") addressed the "eating memory"
  problem by introducing a sysctl knob that allows shrinking the
  window before running out of memory.

However, RFC 7323 does not only state that shrinking the window is
necessary in some cases, it also formulates requirements for TCP
implementations when doing so (Section 2.4).

This commit addresses the receiver-side requirements: After retracting
the window, the peer may have a snd_nxt that lies within a previously
advertised window but is now beyond the retracted window. This means
that all incoming segments (including pure ACKs) will be rejected
until the application happens to read enough data to let the peer's
snd_nxt be in window again (which may be never).

To comply with RFC 7323, the receiver MUST honor any segment that
would have been in window for any ACK sent by the receiver and, when
window scaling is in effect, SHOULD track the maximum window sequence
number it has advertised. This patch tracks that maximum window
sequence number rcv_mwnd_seq throughout the connection and uses it in
tcp_sequence() when deciding whether a segment is acceptable.

rcv_mwnd_seq is updated together with rcv_wup and rcv_wnd in
tcp_select_window(). If we count tcp_sequence() as fast path, it is
read in the fast path. Therefore, rcv_mwnd_seq is put into rcv_wnd's
cacheline group.

The logic for handling received data in tcp_data_queue() is already
sufficient and does not need to be updated.

Signed-off-by: Simon Baatz <gmbnomis@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260309-tcp_rfc7323_retract_wnd_rfc-v3-1-4c7f96b1ec69@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../networking/net_cachelines/tcp_sock.rst         |  1 +
 include/linux/tcp.h                                |  3 +++
 include/net/tcp.h                                  | 22 ++++++++++++++++++++++
 net/ipv4/tcp.c                                     |  2 ++
 net/ipv4/tcp_fastopen.c                            |  1 +
 net/ipv4/tcp_input.c                               | 10 +++++-----
 net/ipv4/tcp_minisocks.c                           |  1 +
 net/ipv4/tcp_output.c                              |  3 +++
 .../net/packetdrill/tcp_rcv_big_endseq.pkt         |  2 +-
 9 files changed, 39 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst
index 563daea10d6c..fecf61166a54 100644
--- a/Documentation/networking/net_cachelines/tcp_sock.rst
+++ b/Documentation/networking/net_cachelines/tcp_sock.rst
@@ -121,6 +121,7 @@ u64                           delivered_mstamp        read_write
 u32                           rate_delivered                              read_mostly         tcp_rate_gen
 u32                           rate_interval_us                            read_mostly         rate_delivered,rate_app_limited
 u32                           rcv_wnd                 read_write          read_mostly         tcp_select_window,tcp_receive_window,tcp_fast_path_check
+u32                           rcv_mwnd_seq            read_write                              tcp_select_window
 u32                           write_seq               read_write                              tcp_rate_check_app_limited,tcp_write_queue_empty,tcp_skb_entail,forced_push,tcp_mark_push
 u32                           notsent_lowat           read_mostly                             tcp_stream_memory_free
 u32                           pushed_seq              read_write                              tcp_mark_push,forced_push
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bcebc4f07532..6982f10e826b 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -316,6 +316,9 @@ struct tcp_sock {
 					*/
 	u32	app_limited;	/* limited until "delivered" reaches this val */
 	u32	rcv_wnd;	/* Current receiver window		*/
+	u32	rcv_mwnd_seq;	/* Maximum window sequence number (RFC 7323,
+				 * section 2.4, receiver requirements)
+				 */
 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
 /*
  *      Options received (usually on last packet, some only on SYN packets).
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 48dffcca0a71..f87bdacb5a69 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -934,6 +934,28 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp)
 	return (u32) win;
 }
 
+/* Compute the maximum receive window we ever advertised.
+ * Rcv_nxt can be after the window if our peer push more data
+ * than the offered window.
+ */
+static inline u32 tcp_max_receive_window(const struct tcp_sock *tp)
+{
+	s32 win = tp->rcv_mwnd_seq - tp->rcv_nxt;
+
+	if (win < 0)
+		win = 0;
+	return (u32) win;
+}
+
+/* Check if we need to update the maximum receive window sequence number */
+static inline void tcp_update_max_rcv_wnd_seq(struct tcp_sock *tp)
+{
+	u32 wre = tp->rcv_wup + tp->rcv_wnd;
+
+	if (after(wre, tp->rcv_mwnd_seq))
+		tp->rcv_mwnd_seq = wre;
+}
+
 /* Choose a new window, without checks for shrinking, and without
  * scaling applied to the result.  The caller does these things
  * if necessary.  This is a "raw" window selection.
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ed6f6712f060..516087c622ad 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3561,6 +3561,7 @@ static int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int len)
 
 	tp->rcv_wnd	= opt.rcv_wnd;
 	tp->rcv_wup	= opt.rcv_wup;
+	tp->rcv_mwnd_seq = opt.rcv_wup + opt.rcv_wnd;
 
 	return 0;
 }
@@ -5275,6 +5276,7 @@ static void __init tcp_struct_check(void)
 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes);
 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_mwnd_seq);
 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_tstamp);
 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 9fdc19accafd..4e389d609f91 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -377,6 +377,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
 
 	tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
 	tp->rcv_wup = tp->rcv_nxt;
+	tp->rcv_mwnd_seq = tp->rcv_wup + tp->rcv_wnd;
 	/* tcp_conn_request() is sending the SYNACK,
 	 * and queues the child into listener accept queue.
 	 */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 71ac69b7b75e..2e1b23760815 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4808,20 +4808,18 @@ static enum skb_drop_reason tcp_sequence(const struct sock *sk,
 					 const struct tcphdr *th)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
-	u32 seq_limit;
 
 	if (before(end_seq, tp->rcv_wup))
 		return SKB_DROP_REASON_TCP_OLD_SEQUENCE;
 
-	seq_limit = tp->rcv_nxt + tcp_receive_window(tp);
-	if (unlikely(after(end_seq, seq_limit))) {
+	if (unlikely(after(end_seq, tp->rcv_nxt + tcp_max_receive_window(tp)))) {
 		/* Some stacks are known to handle FIN incorrectly; allow the
 		 * FIN to extend beyond the window and check it in detail later.
 		 */
-		if (!after(end_seq - th->fin, seq_limit))
+		if (!after(end_seq - th->fin, tp->rcv_nxt + tcp_receive_window(tp)))
 			return SKB_NOT_DROPPED_YET;
 
-		if (after(seq, seq_limit))
+		if (after(seq, tp->rcv_nxt + tcp_max_receive_window(tp)))
 			return SKB_DROP_REASON_TCP_INVALID_SEQUENCE;
 
 		/* Only accept this packet if receive queue is empty. */
@@ -6903,6 +6901,7 @@ consume:
 		 */
 		WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
 		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
+		tp->rcv_mwnd_seq = tp->rcv_wup + tp->rcv_wnd;
 
 		/* RFC1323: The window in SYN & SYN/ACK segments is
 		 * never scaled.
@@ -7015,6 +7014,7 @@ consume:
 		WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
 		WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
 		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
+		tp->rcv_mwnd_seq = tp->rcv_wup + tp->rcv_wnd;
 
 		/* RFC1323: The window in SYN & SYN/ACK segments is
 		 * never scaled.
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index dafb63b923d0..d350d794a959 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -604,6 +604,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 	newtp->window_clamp = req->rsk_window_clamp;
 	newtp->rcv_ssthresh = req->rsk_rcv_wnd;
 	newtp->rcv_wnd = req->rsk_rcv_wnd;
+	newtp->rcv_mwnd_seq = newtp->rcv_wup + req->rsk_rcv_wnd;
 	newtp->rx_opt.wscale_ok = ireq->wscale_ok;
 	if (newtp->rx_opt.wscale_ok) {
 		newtp->rx_opt.snd_wscale = ireq->snd_wscale;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 34a25ef61006..35c3b0ab5a0c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -293,6 +293,7 @@ static u16 tcp_select_window(struct sock *sk)
 		tp->pred_flags = 0;
 		tp->rcv_wnd = 0;
 		tp->rcv_wup = tp->rcv_nxt;
+		tcp_update_max_rcv_wnd_seq(tp);
 		return 0;
 	}
 
@@ -316,6 +317,7 @@ static u16 tcp_select_window(struct sock *sk)
 
 	tp->rcv_wnd = new_win;
 	tp->rcv_wup = tp->rcv_nxt;
+	tcp_update_max_rcv_wnd_seq(tp);
 
 	/* Make sure we do not exceed the maximum possible
 	 * scaled window.
@@ -4165,6 +4167,7 @@ static void tcp_connect_init(struct sock *sk)
 	else
 		tp->rcv_tstamp = tcp_jiffies32;
 	tp->rcv_wup = tp->rcv_nxt;
+	tp->rcv_mwnd_seq = tp->rcv_nxt + tp->rcv_wnd;
 	WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
 
 	inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
index 6c0f32c40f19..12882be10f2e 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
@@ -36,7 +36,7 @@
 
   +0 read(4, ..., 100000) = 4000
 
-// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd
+// If queue is empty, accept a packet even if its end_seq is above rcv_mwnd_seq
   +0 < P. 4001:54001(50000) ack 1 win 257
    * > .  1:1(0) ack 54001 win 0
 
-- 
cgit v1.2.3


From f807b5b9b89eb9220d034115c272c312251cbcac Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 12 Mar 2026 12:13:52 +0000
Subject: net: stmmac: avoid passing pci_dev

The pci_dev is only used to provide the ethtool bus_info using
pci_name(priv->plat->pdev). This is the same as dev_name(priv->device).
Thus, rather than passing the pci_dev, make use of what we already
have.

To avoid unexpectedly exposing the device name through ethtool where
it wasn't provided before, add a flag priv->plat->provide_bus_info
to enable this, which only dwmac-intel needs to set.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/E1w0evI-0000000CzY7-1fyo@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c    | 2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 5 ++---
 include/linux/stmmac.h                               | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 421c6c81ca5e..f621077c30a4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -589,7 +589,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	int ret;
 	int i;
 
-	plat->pdev = pdev;
+	plat->provide_bus_info = true;
 	plat->phy_addr = -1;
 	plat->clk_csr = STMMAC_CSR_250_300M;
 	plat->core_type = DWMAC_CORE_GMAC4;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index c1e26965d9b5..92585d27ab88 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -312,10 +312,9 @@ static void stmmac_ethtool_getdrvinfo(struct net_device *dev,
 		strscpy(info->driver, MAC100_ETHTOOL_NAME,
 			sizeof(info->driver));
 
-	if (priv->plat->pdev) {
-		strscpy(info->bus_info, pci_name(priv->plat->pdev),
+	if (priv->plat->provide_bus_info)
+		strscpy(info->bus_info, dev_name(priv->device),
 			sizeof(info->bus_info));
-	}
 }
 
 static int stmmac_ethtool_get_link_ksettings(struct net_device *dev,
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 937985276e6b..72febd246bdb 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -348,7 +348,7 @@ struct plat_stmmacenet_data {
 	int rss_en;
 	int mac_port_sel_speed;
 	u8 vlan_fail_q;
-	struct pci_dev *pdev;
+	bool provide_bus_info;
 	int int_snapshot_num;
 	int msi_mac_vec;
 	int msi_wol_vec;
-- 
cgit v1.2.3


From 6df1459605cedd2112ebf660c77f42bb87d5c306 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 9 Mar 2026 18:03:31 +0100
Subject: net: phy: make mdio_device.c part of libphy

This patch
- makes mdio_device.c part of libphy
- makes mdio_device_(un)register_reset() static
- moves mdiobus_(un)register_device() from mdio_bus.c to mdio_device.c,
  stops exporting both functions and makes them private to phylib

This further decouples the MDIO consumer functionality from libphy.

Note: This makes MDIO driver registration part of phylib, therefore
      adjust Kconfig dependencies where needed.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/c6dbf9b3-3ca0-434b-ad3a-71fe602ab809@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/clk/qcom/Kconfig          |  2 +-
 drivers/net/phy/Makefile          |  6 +++---
 drivers/net/phy/mdio-private.h    | 11 -----------
 drivers/net/phy/mdio_bus.c        | 36 ------------------------------------
 drivers/net/phy/mdio_device.c     | 39 ++++++++++++++++++++++++++++++++++++---
 drivers/net/phy/phylib-internal.h |  4 ++++
 drivers/phy/broadcom/Kconfig      |  4 ++--
 include/linux/mdio.h              |  2 --
 8 files changed, 46 insertions(+), 58 deletions(-)
 delete mode 100644 drivers/net/phy/mdio-private.h

(limited to 'include/linux')

diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index a8a86ea6bb74..a277c434d641 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig
@@ -392,7 +392,7 @@ config IPQ_NSSCC_9574
 
 config IPQ_NSSCC_QCA8K
 	tristate "QCA8K(QCA8386 or QCA8084) NSS Clock Controller"
-	depends on MDIO_BUS
+	depends on PHYLIB
 	help
 	  Support for NSS(Network SubSystem) clock controller on
 	  qca8386/qca8084 chip.
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 3a34917adea7..8d262b4e2be2 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -3,8 +3,8 @@
 
 libphy-y			:= phy.o phy-c45.o phy-core.o phy_device.o \
 				   linkmode.o phy_link_topology.o \
-				   phy_caps.o mdio_bus_provider.o phy_port.o
-mdio-bus-y			+= mdio_bus.o mdio_device.o
+				   phy_caps.o mdio_bus_provider.o phy_port.o \
+				   mdio_device.o
 
 ifdef CONFIG_PHYLIB
 # built-in whenever PHYLIB is built-in or module
@@ -15,7 +15,7 @@ libphy-$(CONFIG_SWPHY)		+= swphy.o
 libphy-$(CONFIG_LED_TRIGGER_PHY)	+= phy_led_triggers.o
 libphy-$(CONFIG_OPEN_ALLIANCE_HELPERS) += open_alliance_helpers.o
 
-obj-$(CONFIG_MDIO_BUS)		+= mdio-bus.o
+obj-$(CONFIG_MDIO_BUS)		+= mdio_bus.o
 obj-$(CONFIG_PHYLINK)		+= phylink.o
 obj-$(CONFIG_PHYLIB)		+= libphy.o
 obj-$(CONFIG_PHYLIB)		+= mdio_devres.o
diff --git a/drivers/net/phy/mdio-private.h b/drivers/net/phy/mdio-private.h
deleted file mode 100644
index 8bc6d9088af1..000000000000
--- a/drivers/net/phy/mdio-private.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef __MDIO_PRIVATE_H
-#define __MDIO_PRIVATE_H
-
-/* MDIO internal helpers
- */
-
-int mdio_device_register_reset(struct mdio_device *mdiodev);
-void mdio_device_unregister_reset(struct mdio_device *mdiodev);
-
-#endif /* __MDIO_PRIVATE_H */
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 48c0447e6a8f..a30c679feeca 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -29,46 +29,10 @@
 #include <linux/string.h>
 #include <linux/uaccess.h>
 #include <linux/unistd.h>
-#include "mdio-private.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/mdio.h>
 
-int mdiobus_register_device(struct mdio_device *mdiodev)
-{
-	int err;
-
-	if (mdiodev->bus->mdio_map[mdiodev->addr])
-		return -EBUSY;
-
-	if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY) {
-		err = mdio_device_register_reset(mdiodev);
-		if (err)
-			return err;
-
-		/* Assert the reset signal */
-		mdio_device_reset(mdiodev, 1);
-	}
-
-	mdiodev->bus->mdio_map[mdiodev->addr] = mdiodev;
-
-	return 0;
-}
-EXPORT_SYMBOL(mdiobus_register_device);
-
-int mdiobus_unregister_device(struct mdio_device *mdiodev)
-{
-	if (mdiodev->bus->mdio_map[mdiodev->addr] != mdiodev)
-		return -EINVAL;
-
-	mdio_device_unregister_reset(mdiodev);
-
-	mdiodev->bus->mdio_map[mdiodev->addr] = NULL;
-
-	return 0;
-}
-EXPORT_SYMBOL(mdiobus_unregister_device);
-
 static struct mdio_device *mdiobus_find_device(struct mii_bus *bus, int addr)
 {
 	bool addr_valid = addr >= 0 && addr < ARRAY_SIZE(bus->mdio_map);
diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c
index da4fb7484c7c..56080d3d2d25 100644
--- a/drivers/net/phy/mdio_device.c
+++ b/drivers/net/phy/mdio_device.c
@@ -22,7 +22,7 @@
 #include <linux/string.h>
 #include <linux/unistd.h>
 #include <linux/property.h>
-#include "mdio-private.h"
+#include "phylib-internal.h"
 
 /**
  * mdio_device_register_reset - Read and initialize the reset properties of
@@ -31,7 +31,7 @@
  *
  * Return: Zero if successful, negative error code on failure
  */
-int mdio_device_register_reset(struct mdio_device *mdiodev)
+static int mdio_device_register_reset(struct mdio_device *mdiodev)
 {
 	struct reset_control *reset;
 
@@ -67,7 +67,7 @@ int mdio_device_register_reset(struct mdio_device *mdiodev)
  *				  an mdio device
  * @mdiodev: mdio_device structure
  */
-void mdio_device_unregister_reset(struct mdio_device *mdiodev)
+static void mdio_device_unregister_reset(struct mdio_device *mdiodev)
 {
 	gpiod_put(mdiodev->reset_gpio);
 	mdiodev->reset_gpio = NULL;
@@ -189,6 +189,39 @@ void mdio_device_remove(struct mdio_device *mdiodev)
 }
 EXPORT_SYMBOL(mdio_device_remove);
 
+int mdiobus_register_device(struct mdio_device *mdiodev)
+{
+	int err;
+
+	if (mdiodev->bus->mdio_map[mdiodev->addr])
+		return -EBUSY;
+
+	if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY) {
+		err = mdio_device_register_reset(mdiodev);
+		if (err)
+			return err;
+
+		/* Assert the reset signal */
+		mdio_device_reset(mdiodev, 1);
+	}
+
+	mdiodev->bus->mdio_map[mdiodev->addr] = mdiodev;
+
+	return 0;
+}
+
+int mdiobus_unregister_device(struct mdio_device *mdiodev)
+{
+	if (mdiodev->bus->mdio_map[mdiodev->addr] != mdiodev)
+		return -EINVAL;
+
+	mdio_device_unregister_reset(mdiodev);
+
+	mdiodev->bus->mdio_map[mdiodev->addr] = NULL;
+
+	return 0;
+}
+
 /**
  * mdio_probe - probe an MDIO device
  * @dev: device to probe
diff --git a/drivers/net/phy/phylib-internal.h b/drivers/net/phy/phylib-internal.h
index dc9592c6bb8e..bfb1aa823868 100644
--- a/drivers/net/phy/phylib-internal.h
+++ b/drivers/net/phy/phylib-internal.h
@@ -6,6 +6,7 @@
 #ifndef __PHYLIB_INTERNAL_H
 #define __PHYLIB_INTERNAL_H
 
+struct mdio_device;
 struct phy_device;
 
 /*
@@ -20,6 +21,9 @@ void of_set_phy_timing_role(struct phy_device *phydev);
 int phy_speed_down_core(struct phy_device *phydev);
 void phy_check_downshift(struct phy_device *phydev);
 
+int mdiobus_register_device(struct mdio_device *mdiodev);
+int mdiobus_unregister_device(struct mdio_device *mdiodev);
+
 int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv);
 
 #endif /* __PHYLIB_INTERNAL_H */
diff --git a/drivers/phy/broadcom/Kconfig b/drivers/phy/broadcom/Kconfig
index 1d89a2fd9b79..46371a8940d7 100644
--- a/drivers/phy/broadcom/Kconfig
+++ b/drivers/phy/broadcom/Kconfig
@@ -52,7 +52,7 @@ config PHY_BCM_NS_USB3
 	tristate "Broadcom Northstar USB 3.0 PHY Driver"
 	depends on ARCH_BCM_IPROC || COMPILE_TEST
 	depends on HAS_IOMEM && OF
-	depends on MDIO_BUS
+	depends on PHYLIB
 	select GENERIC_PHY
 	help
 	  Enable this to support Broadcom USB 3.0 PHY connected to the USB
@@ -60,7 +60,7 @@ config PHY_BCM_NS_USB3
 
 config PHY_NS2_PCIE
 	tristate "Broadcom Northstar2 PCIe PHY driver"
-	depends on (OF && MDIO_BUS_MUX_BCM_IPROC) || (COMPILE_TEST && MDIO_BUS)
+	depends on (OF && MDIO_BUS_MUX_BCM_IPROC) || (COMPILE_TEST && PHYLIB)
 	select GENERIC_PHY
 	default ARCH_BCM_IPROC
 	help
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 5d1203b9af20..f4f9d9609448 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -688,8 +688,6 @@ static inline int mdiodev_c45_write(struct mdio_device *mdiodev, u32 devad,
 				 val);
 }
 
-int mdiobus_register_device(struct mdio_device *mdiodev);
-int mdiobus_unregister_device(struct mdio_device *mdiodev);
 bool mdiobus_is_registered_device(struct mii_bus *bus, int addr);
 struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr);
 
-- 
cgit v1.2.3


From c4399af5e55658e832779b256d8458323011f983 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 9 Mar 2026 18:06:00 +0100
Subject: net: phy: move remaining provider code to mdio_bus_provider.c

This moves definition of mdio_bus class and bus_type to the provider
side, what allows to make them private to libphy.
As a prerequisite MDIO statistics handling is moved to the
provider side as well.

Note: This patch causes a checkpatch error "Macros with complex values
      should be enclosed in parentheses" for
      MDIO_BUS_STATS_ADDR_ATTR_GROUP. I consider this a false positive
      here, in addition the patch just moves existing code.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/47b85676-b349-4aa0-a5ef-cd37769a4c69@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/mdio_bus.c          | 282 ------------------------------------
 drivers/net/phy/mdio_bus_provider.c | 275 +++++++++++++++++++++++++++++++++++
 drivers/net/phy/phylib-internal.h   |   3 +
 include/linux/phy.h                 |   3 -
 4 files changed, 278 insertions(+), 285 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 9fb473326027..00d0e4159e9b 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -10,20 +10,14 @@
 
 #include <linux/device.h>
 #include <linux/errno.h>
-#include <linux/etherdevice.h>
 #include <linux/ethtool.h>
-#include <linux/gpio/consumer.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/mii.h>
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/of_device.h>
-#include <linux/of_mdio.h>
 #include <linux/phy.h>
-#include <linux/reset.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
@@ -64,218 +58,6 @@ bool mdiobus_is_registered_device(struct mii_bus *bus, int addr)
 }
 EXPORT_SYMBOL(mdiobus_is_registered_device);
 
-/**
- * mdiobus_release - mii_bus device release callback
- * @d: the target struct device that contains the mii_bus
- *
- * Description: called when the last reference to an mii_bus is
- * dropped, to free the underlying memory.
- */
-static void mdiobus_release(struct device *d)
-{
-	struct mii_bus *bus = to_mii_bus(d);
-
-	WARN(bus->state != MDIOBUS_RELEASED &&
-	     /* for compatibility with error handling in drivers */
-	     bus->state != MDIOBUS_ALLOCATED,
-	     "%s: not in RELEASED or ALLOCATED state\n",
-	     bus->id);
-
-	if (bus->state == MDIOBUS_RELEASED)
-		fwnode_handle_put(dev_fwnode(d));
-
-	kfree(bus);
-}
-
-struct mdio_bus_stat_attr {
-	struct device_attribute attr;
-	int address;
-	unsigned int field_offset;
-};
-
-static struct mdio_bus_stat_attr *to_sattr(struct device_attribute *attr)
-{
-	return container_of(attr, struct mdio_bus_stat_attr, attr);
-}
-
-static u64 mdio_bus_get_stat(struct mdio_bus_stats *s, unsigned int offset)
-{
-	const u64_stats_t *stats = (const void *)s + offset;
-	unsigned int start;
-	u64 val = 0;
-
-	do {
-		start = u64_stats_fetch_begin(&s->syncp);
-		val = u64_stats_read(stats);
-	} while (u64_stats_fetch_retry(&s->syncp, start));
-
-	return val;
-}
-
-static ssize_t mdio_bus_stat_field_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct mdio_bus_stat_attr *sattr = to_sattr(attr);
-	struct mii_bus *bus = to_mii_bus(dev);
-	u64 val = 0;
-
-	if (sattr->address < 0) {
-		/* get global stats */
-		for (int i = 0; i < PHY_MAX_ADDR; i++)
-			val += mdio_bus_get_stat(&bus->stats[i],
-						 sattr->field_offset);
-	} else {
-		val = mdio_bus_get_stat(&bus->stats[sattr->address],
-					sattr->field_offset);
-	}
-
-	return sysfs_emit(buf, "%llu\n", val);
-}
-
-static ssize_t mdio_bus_device_stat_field_show(struct device *dev,
-					       struct device_attribute *attr,
-					       char *buf)
-{
-	struct mdio_bus_stat_attr *sattr = to_sattr(attr);
-	struct mdio_device *mdiodev = to_mdio_device(dev);
-	struct mii_bus *bus = mdiodev->bus;
-	int addr = mdiodev->addr;
-	u64 val;
-
-	val = mdio_bus_get_stat(&bus->stats[addr], sattr->field_offset);
-
-	return sysfs_emit(buf, "%llu\n", val);
-}
-
-#define MDIO_BUS_STATS_ATTR(field)					\
-static const struct mdio_bus_stat_attr dev_attr_mdio_bus_##field = {	\
-	.attr = __ATTR(field, 0444, mdio_bus_stat_field_show, NULL),	\
-	.address = -1,							\
-	.field_offset = offsetof(struct mdio_bus_stats, field),		\
-};									\
-static const struct mdio_bus_stat_attr dev_attr_mdio_bus_device_##field = { \
-	.attr = __ATTR(field, 0444, mdio_bus_device_stat_field_show, NULL), \
-	.field_offset = offsetof(struct mdio_bus_stats, field),		\
-}
-
-MDIO_BUS_STATS_ATTR(transfers);
-MDIO_BUS_STATS_ATTR(errors);
-MDIO_BUS_STATS_ATTR(writes);
-MDIO_BUS_STATS_ATTR(reads);
-
-#define MDIO_BUS_STATS_ADDR_ATTR_DECL(field, addr, file)		\
-static const struct mdio_bus_stat_attr					\
-dev_attr_mdio_bus_addr_##field##_##addr = {				\
-	.attr = { .attr = { .name = file, .mode = 0444 },		\
-		     .show = mdio_bus_stat_field_show,			\
-	},								\
-	.address = addr,						\
-	.field_offset = offsetof(struct mdio_bus_stats, field),		\
-}
-
-#define MDIO_BUS_STATS_ADDR_ATTR(field, addr)				\
-	MDIO_BUS_STATS_ADDR_ATTR_DECL(field, addr,			\
-				 __stringify(field) "_" __stringify(addr))
-
-#define MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(addr)			\
-	MDIO_BUS_STATS_ADDR_ATTR(transfers, addr);			\
-	MDIO_BUS_STATS_ADDR_ATTR(errors, addr);				\
-	MDIO_BUS_STATS_ADDR_ATTR(writes, addr);				\
-	MDIO_BUS_STATS_ADDR_ATTR(reads, addr)				\
-
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(0);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(1);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(2);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(3);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(4);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(5);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(6);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(7);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(8);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(9);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(10);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(11);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(12);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(13);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(14);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(15);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(16);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(17);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(18);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(19);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(20);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(21);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(22);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(23);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(24);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(25);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(26);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(27);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(28);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(29);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(30);
-MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(31);
-
-#define MDIO_BUS_STATS_ADDR_ATTR_GROUP(addr)				\
-	&dev_attr_mdio_bus_addr_transfers_##addr.attr.attr,		\
-	&dev_attr_mdio_bus_addr_errors_##addr.attr.attr,		\
-	&dev_attr_mdio_bus_addr_writes_##addr.attr.attr,		\
-	&dev_attr_mdio_bus_addr_reads_##addr.attr.attr			\
-
-static const struct attribute *const mdio_bus_statistics_attrs[] = {
-	&dev_attr_mdio_bus_transfers.attr.attr,
-	&dev_attr_mdio_bus_errors.attr.attr,
-	&dev_attr_mdio_bus_writes.attr.attr,
-	&dev_attr_mdio_bus_reads.attr.attr,
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(0),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(1),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(2),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(3),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(4),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(5),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(6),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(7),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(8),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(9),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(10),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(11),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(12),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(13),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(14),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(15),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(16),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(17),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(18),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(19),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(20),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(21),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(22),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(23),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(24),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(25),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(26),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(27),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(28),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(29),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(30),
-	MDIO_BUS_STATS_ADDR_ATTR_GROUP(31),
-	NULL,
-};
-
-static const struct attribute_group mdio_bus_statistics_group = {
-	.name		= "statistics",
-	.attrs_const	= mdio_bus_statistics_attrs,
-};
-__ATTRIBUTE_GROUPS(mdio_bus_statistics);
-
-const struct class mdio_bus_class = {
-	.name		= "mdio_bus",
-	.dev_release	= mdiobus_release,
-	.dev_groups	= mdio_bus_statistics_groups,
-};
-EXPORT_SYMBOL_GPL(mdio_bus_class);
-
 static void mdiobus_stats_acct(struct mdio_bus_stats *stats, bool op, int ret)
 {
 	preempt_disable();
@@ -841,69 +623,5 @@ int mdiobus_c45_modify_changed(struct mii_bus *bus, int addr, int devad,
 }
 EXPORT_SYMBOL_GPL(mdiobus_c45_modify_changed);
 
-/**
- * mdio_bus_match - determine if given MDIO driver supports the given
- *		    MDIO device
- * @dev: target MDIO device
- * @drv: given MDIO driver
- *
- * Return: 1 if the driver supports the device, 0 otherwise
- *
- * Description: This may require calling the devices own match function,
- *   since different classes of MDIO devices have different match criteria.
- */
-static int mdio_bus_match(struct device *dev, const struct device_driver *drv)
-{
-	const struct mdio_driver *mdiodrv = to_mdio_driver(drv);
-	struct mdio_device *mdio = to_mdio_device(dev);
-
-	/* Both the driver and device must type-match */
-	if (!(mdiodrv->mdiodrv.flags & MDIO_DEVICE_IS_PHY) !=
-	    !(mdio->flags & MDIO_DEVICE_FLAG_PHY))
-		return 0;
-
-	if (of_driver_match_device(dev, drv))
-		return 1;
-
-	if (mdio->bus_match)
-		return mdio->bus_match(dev, drv);
-
-	return 0;
-}
-
-static int mdio_uevent(const struct device *dev, struct kobj_uevent_env *env)
-{
-	int rc;
-
-	/* Some devices have extra OF data and an OF-style MODALIAS */
-	rc = of_device_uevent_modalias(dev, env);
-	if (rc != -ENODEV)
-		return rc;
-
-	return 0;
-}
-
-static const struct attribute *const mdio_bus_device_statistics_attrs[] = {
-	&dev_attr_mdio_bus_device_transfers.attr.attr,
-	&dev_attr_mdio_bus_device_errors.attr.attr,
-	&dev_attr_mdio_bus_device_writes.attr.attr,
-	&dev_attr_mdio_bus_device_reads.attr.attr,
-	NULL,
-};
-
-static const struct attribute_group mdio_bus_device_statistics_group = {
-	.name		= "statistics",
-	.attrs_const	= mdio_bus_device_statistics_attrs,
-};
-__ATTRIBUTE_GROUPS(mdio_bus_device_statistics);
-
-const struct bus_type mdio_bus_type = {
-	.name		= "mdio_bus",
-	.dev_groups	= mdio_bus_device_statistics_groups,
-	.match		= mdio_bus_match,
-	.uevent		= mdio_uevent,
-};
-EXPORT_SYMBOL(mdio_bus_type);
-
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("MDIO bus/device layer");
diff --git a/drivers/net/phy/mdio_bus_provider.c b/drivers/net/phy/mdio_bus_provider.c
index d50fe6eb4b02..041576eba47a 100644
--- a/drivers/net/phy/mdio_bus_provider.c
+++ b/drivers/net/phy/mdio_bus_provider.c
@@ -28,6 +28,281 @@
 #include <linux/string.h>
 #include <linux/uaccess.h>
 #include <linux/unistd.h>
+#include "phylib-internal.h"
+
+/**
+ * mdiobus_release - mii_bus device release callback
+ * @d: the target struct device that contains the mii_bus
+ *
+ * Description: called when the last reference to an mii_bus is
+ * dropped, to free the underlying memory.
+ */
+static void mdiobus_release(struct device *d)
+{
+	struct mii_bus *bus = to_mii_bus(d);
+
+	WARN(bus->state != MDIOBUS_RELEASED &&
+	     /* for compatibility with error handling in drivers */
+	     bus->state != MDIOBUS_ALLOCATED,
+	     "%s: not in RELEASED or ALLOCATED state\n",
+	     bus->id);
+
+	if (bus->state == MDIOBUS_RELEASED)
+		fwnode_handle_put(dev_fwnode(d));
+
+	kfree(bus);
+}
+
+struct mdio_bus_stat_attr {
+	struct device_attribute attr;
+	int address;
+	unsigned int field_offset;
+};
+
+static struct mdio_bus_stat_attr *to_sattr(struct device_attribute *attr)
+{
+	return container_of(attr, struct mdio_bus_stat_attr, attr);
+}
+
+static u64 mdio_bus_get_stat(struct mdio_bus_stats *s, unsigned int offset)
+{
+	const u64_stats_t *stats = (const void *)s + offset;
+	unsigned int start;
+	u64 val = 0;
+
+	do {
+		start = u64_stats_fetch_begin(&s->syncp);
+		val = u64_stats_read(stats);
+	} while (u64_stats_fetch_retry(&s->syncp, start));
+
+	return val;
+}
+
+static ssize_t mdio_bus_stat_field_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct mdio_bus_stat_attr *sattr = to_sattr(attr);
+	struct mii_bus *bus = to_mii_bus(dev);
+	u64 val = 0;
+
+	if (sattr->address < 0) {
+		/* get global stats */
+		for (int i = 0; i < PHY_MAX_ADDR; i++)
+			val += mdio_bus_get_stat(&bus->stats[i],
+						 sattr->field_offset);
+	} else {
+		val = mdio_bus_get_stat(&bus->stats[sattr->address],
+					sattr->field_offset);
+	}
+
+	return sysfs_emit(buf, "%llu\n", val);
+}
+
+static ssize_t mdio_bus_device_stat_field_show(struct device *dev,
+					       struct device_attribute *attr,
+					       char *buf)
+{
+	struct mdio_bus_stat_attr *sattr = to_sattr(attr);
+	struct mdio_device *mdiodev = to_mdio_device(dev);
+	struct mii_bus *bus = mdiodev->bus;
+	int addr = mdiodev->addr;
+	u64 val;
+
+	val = mdio_bus_get_stat(&bus->stats[addr], sattr->field_offset);
+
+	return sysfs_emit(buf, "%llu\n", val);
+}
+
+#define MDIO_BUS_STATS_ATTR(field)					\
+static const struct mdio_bus_stat_attr dev_attr_mdio_bus_##field = {	\
+	.attr = __ATTR(field, 0444, mdio_bus_stat_field_show, NULL),	\
+	.address = -1,							\
+	.field_offset = offsetof(struct mdio_bus_stats, field),		\
+};									\
+static const struct mdio_bus_stat_attr dev_attr_mdio_bus_device_##field = { \
+	.attr = __ATTR(field, 0444, mdio_bus_device_stat_field_show, NULL), \
+	.field_offset = offsetof(struct mdio_bus_stats, field),		\
+}
+
+MDIO_BUS_STATS_ATTR(transfers);
+MDIO_BUS_STATS_ATTR(errors);
+MDIO_BUS_STATS_ATTR(writes);
+MDIO_BUS_STATS_ATTR(reads);
+
+#define MDIO_BUS_STATS_ADDR_ATTR_DECL(field, addr, file)		\
+static const struct mdio_bus_stat_attr					\
+dev_attr_mdio_bus_addr_##field##_##addr = {				\
+	.attr = { .attr = { .name = file, .mode = 0444 },		\
+		     .show = mdio_bus_stat_field_show,			\
+	},								\
+	.address = addr,						\
+	.field_offset = offsetof(struct mdio_bus_stats, field),		\
+}
+
+#define MDIO_BUS_STATS_ADDR_ATTR(field, addr)				\
+	MDIO_BUS_STATS_ADDR_ATTR_DECL(field, addr,			\
+				 __stringify(field) "_" __stringify(addr))
+
+#define MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(addr)			\
+	MDIO_BUS_STATS_ADDR_ATTR(transfers, addr);			\
+	MDIO_BUS_STATS_ADDR_ATTR(errors, addr);				\
+	MDIO_BUS_STATS_ADDR_ATTR(writes, addr);				\
+	MDIO_BUS_STATS_ADDR_ATTR(reads, addr)				\
+
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(0);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(1);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(2);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(3);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(4);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(5);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(6);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(7);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(8);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(9);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(10);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(11);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(12);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(13);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(14);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(15);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(16);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(17);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(18);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(19);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(20);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(21);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(22);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(23);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(24);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(25);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(26);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(27);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(28);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(29);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(30);
+MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(31);
+
+#define MDIO_BUS_STATS_ADDR_ATTR_GROUP(addr)				\
+	&(dev_attr_mdio_bus_addr_transfers_##addr).attr.attr,		\
+	&(dev_attr_mdio_bus_addr_errors_##addr).attr.attr,		\
+	&(dev_attr_mdio_bus_addr_writes_##addr).attr.attr,		\
+	&(dev_attr_mdio_bus_addr_reads_##addr).attr.attr			\
+
+static const struct attribute *const mdio_bus_statistics_attrs[] = {
+	&dev_attr_mdio_bus_transfers.attr.attr,
+	&dev_attr_mdio_bus_errors.attr.attr,
+	&dev_attr_mdio_bus_writes.attr.attr,
+	&dev_attr_mdio_bus_reads.attr.attr,
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(0),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(1),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(2),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(3),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(4),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(5),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(6),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(7),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(8),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(9),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(10),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(11),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(12),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(13),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(14),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(15),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(16),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(17),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(18),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(19),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(20),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(21),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(22),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(23),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(24),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(25),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(26),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(27),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(28),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(29),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(30),
+	MDIO_BUS_STATS_ADDR_ATTR_GROUP(31),
+	NULL,
+};
+
+static const struct attribute_group mdio_bus_statistics_group = {
+	.name		= "statistics",
+	.attrs_const	= mdio_bus_statistics_attrs,
+};
+__ATTRIBUTE_GROUPS(mdio_bus_statistics);
+
+const struct class mdio_bus_class = {
+	.name		= "mdio_bus",
+	.dev_release	= mdiobus_release,
+	.dev_groups	= mdio_bus_statistics_groups,
+};
+
+/**
+ * mdio_bus_match - determine if given MDIO driver supports the given
+ *		    MDIO device
+ * @dev: target MDIO device
+ * @drv: given MDIO driver
+ *
+ * Return: 1 if the driver supports the device, 0 otherwise
+ *
+ * Description: This may require calling the devices own match function,
+ *   since different classes of MDIO devices have different match criteria.
+ */
+static int mdio_bus_match(struct device *dev, const struct device_driver *drv)
+{
+	const struct mdio_driver *mdiodrv = to_mdio_driver(drv);
+	struct mdio_device *mdio = to_mdio_device(dev);
+
+	/* Both the driver and device must type-match */
+	if (!(mdiodrv->mdiodrv.flags & MDIO_DEVICE_IS_PHY) !=
+	    !(mdio->flags & MDIO_DEVICE_FLAG_PHY))
+		return 0;
+
+	if (of_driver_match_device(dev, drv))
+		return 1;
+
+	if (mdio->bus_match)
+		return mdio->bus_match(dev, drv);
+
+	return 0;
+}
+
+static int mdio_uevent(const struct device *dev, struct kobj_uevent_env *env)
+{
+	int rc;
+
+	/* Some devices have extra OF data and an OF-style MODALIAS */
+	rc = of_device_uevent_modalias(dev, env);
+	if (rc != -ENODEV)
+		return rc;
+
+	return 0;
+}
+
+static const struct attribute *const mdio_bus_device_statistics_attrs[] = {
+	&dev_attr_mdio_bus_device_transfers.attr.attr,
+	&dev_attr_mdio_bus_device_errors.attr.attr,
+	&dev_attr_mdio_bus_device_writes.attr.attr,
+	&dev_attr_mdio_bus_device_reads.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group mdio_bus_device_statistics_group = {
+	.name		= "statistics",
+	.attrs_const	= mdio_bus_device_statistics_attrs,
+};
+__ATTRIBUTE_GROUPS(mdio_bus_device_statistics);
+
+const struct bus_type mdio_bus_type = {
+	.name		= "mdio_bus",
+	.dev_groups	= mdio_bus_device_statistics_groups,
+	.match		= mdio_bus_match,
+	.uevent		= mdio_uevent,
+};
 
 /**
  * mdiobus_alloc_size - allocate a mii_bus structure
diff --git a/drivers/net/phy/phylib-internal.h b/drivers/net/phy/phylib-internal.h
index bfb1aa823868..664ed7faa518 100644
--- a/drivers/net/phy/phylib-internal.h
+++ b/drivers/net/phy/phylib-internal.h
@@ -9,6 +9,9 @@
 struct mdio_device;
 struct phy_device;
 
+extern const struct bus_type mdio_bus_type;
+extern const struct class mdio_bus_class;
+
 /*
  * phy_supported_speeds - return all speeds currently supported by a PHY device
  */
diff --git a/include/linux/phy.h b/include/linux/phy.h
index e9b0d7427b0e..5de4b172cd0b 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -2446,9 +2446,6 @@ int __phy_hwtstamp_set(struct phy_device *phydev,
 
 struct phy_port *phy_get_sfp_port(struct phy_device *phydev);
 
-extern const struct bus_type mdio_bus_type;
-extern const struct class mdio_bus_class;
-
 /**
  * phy_module_driver() - Helper macro for registering PHY drivers
  * @__phy_drivers: array of PHY drivers to register
-- 
cgit v1.2.3


From 2a8c8a03f306e21a0ea74c93d4332119557f4575 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Thu, 12 Mar 2026 11:04:07 +0100
Subject: net/mlx5: Add a shared devlink instance for PFs on same chip

Use the previously introduced shared devlink infrastructure to create
a shared devlink instance for mlx5 PFs that reside on the same physical
chip. The shared instance is identified by the chip's serial number
extracted from PCI VPD (V3 keyword, with fallback to serial number
for older devices).

Each PF that probes calls mlx5_shd_init() which extracts the chip serial
number and uses devlink_shd_get() to get or create the shared instance.
When a PF is removed, mlx5_shd_uninit() calls devlink_shd_put()
to release the reference. The shared instance is automatically destroyed
when the last PF is removed.

Make the PF devlink instances nested in this shared devlink instance,
allowing userspace to identify which PFs belong to the same physical
chip.

Example:

pci/0000:08:00.0: index 0
  nested_devlink:
    auxiliary/mlx5_core.eth.0
devlink_index/1: index 1
  nested_devlink:
    pci/0000:08:00.0
    pci/0000:08:00.1
auxiliary/mlx5_core.eth.0: index 2
pci/0000:08:00.1: index 3
  nested_devlink:
    auxiliary/mlx5_core.eth.1
auxiliary/mlx5_core.eth.1: index 4

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://patch.msgid.link/20260312100407.551173-14-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |  5 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c     | 17 ++++++
 .../net/ethernet/mellanox/mlx5/core/sh_devlink.c   | 61 ++++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/sh_devlink.h   | 12 +++++
 include/linux/mlx5/driver.h                        |  1 +
 5 files changed, 94 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8ffa286a18f5..d39fe9c4a87c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -16,8 +16,9 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
 		fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
 		lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
-		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \
-		fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o lib/nv_param.o
+		diag/fw_tracer.o diag/crdump.o devlink.o sh_devlink.o diag/rsc_dump.o \
+		diag/reporter_vnic.o fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o \
+		lib/nv_param.o
 
 #
 # Netdev basic
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index fdc3ba20912e..1c35c3fc3bb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -74,6 +74,7 @@
 #include "mlx5_irq.h"
 #include "hwmon.h"
 #include "lag/lag.h"
+#include "sh_devlink.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
@@ -1520,10 +1521,16 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
 	int err;
 
 	devl_lock(devlink);
+	if (dev->shd) {
+		err = devl_nested_devlink_set(dev->shd, devlink);
+		if (err)
+			goto unlock;
+	}
 	devl_register(devlink);
 	err = mlx5_init_one_devl_locked(dev);
 	if (err)
 		devl_unregister(devlink);
+unlock:
 	devl_unlock(devlink);
 	return err;
 }
@@ -2005,6 +2012,13 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto pci_init_err;
 	}
 
+	err = mlx5_shd_init(dev);
+	if (err) {
+		mlx5_core_err(dev, "mlx5_shd_init failed with error code %d\n",
+			      err);
+		goto shd_init_err;
+	}
+
 	err = mlx5_init_one(dev);
 	if (err) {
 		mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n",
@@ -2018,6 +2032,8 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	return 0;
 
 err_init_one:
+	mlx5_shd_uninit(dev);
+shd_init_err:
 	mlx5_pci_close(dev);
 pci_init_err:
 	mlx5_mdev_uninit(dev);
@@ -2039,6 +2055,7 @@ static void remove_one(struct pci_dev *pdev)
 	mlx5_drain_health_wq(dev);
 	mlx5_sriov_disable(pdev, false);
 	mlx5_uninit_one(dev);
+	mlx5_shd_uninit(dev);
 	mlx5_pci_close(dev);
 	mlx5_mdev_uninit(dev);
 	mlx5_adev_idx_free(dev->priv.adev_idx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
new file mode 100644
index 000000000000..bc33f95302df
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/mlx5/driver.h>
+#include <net/devlink.h>
+
+#include "sh_devlink.h"
+
+static const struct devlink_ops mlx5_shd_ops = {
+};
+
+int mlx5_shd_init(struct mlx5_core_dev *dev)
+{
+	u8 *vpd_data __free(kfree) = NULL;
+	struct pci_dev *pdev = dev->pdev;
+	unsigned int vpd_size, kw_len;
+	struct devlink *devlink;
+	char *sn, *end;
+	int start;
+	int err;
+
+	if (!mlx5_core_is_pf(dev))
+		return 0;
+
+	vpd_data = pci_vpd_alloc(pdev, &vpd_size);
+	if (IS_ERR(vpd_data)) {
+		err = PTR_ERR(vpd_data);
+		return err == -ENODEV ? 0 : err;
+	}
+	start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, "V3", &kw_len);
+	if (start < 0) {
+		/* Fall-back to SN for older devices. */
+		start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
+						     PCI_VPD_RO_KEYWORD_SERIALNO, &kw_len);
+		if (start < 0)
+			return -ENOENT;
+	}
+	sn = kstrndup(vpd_data + start, kw_len, GFP_KERNEL);
+	if (!sn)
+		return -ENOMEM;
+	/* Firmware may return spaces at the end of the string, strip it. */
+	end = strchrnul(sn, ' ');
+	*end = '\0';
+
+	/* Get or create shared devlink instance */
+	devlink = devlink_shd_get(sn, &mlx5_shd_ops, 0, pdev->dev.driver);
+	kfree(sn);
+	if (!devlink)
+		return -ENOMEM;
+
+	dev->shd = devlink;
+	return 0;
+}
+
+void mlx5_shd_uninit(struct mlx5_core_dev *dev)
+{
+	if (!dev->shd)
+		return;
+
+	devlink_shd_put(dev->shd);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
new file mode 100644
index 000000000000..8ab8d6940227
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_SH_DEVLINK_H__
+#define __MLX5_SH_DEVLINK_H__
+
+#include <linux/mlx5/driver.h>
+
+int mlx5_shd_init(struct mlx5_core_dev *dev);
+void mlx5_shd_uninit(struct mlx5_core_dev *dev);
+
+#endif /* __MLX5_SH_DEVLINK_H__ */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 04dcd09f7517..1268fcf35ec7 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -798,6 +798,7 @@ struct mlx5_core_dev {
 	enum mlx5_wc_state wc_state;
 	/* sync write combining state */
 	struct mutex wc_state_lock;
+	struct devlink *shd;
 };
 
 struct mlx5_db {
-- 
cgit v1.2.3


From 0834d6f4abd0ca35b5706d267a6e4b78303a95de Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Thu, 12 Mar 2026 14:02:29 +0100
Subject: PCI: endpoint: Do not mark the BAR succeeding a 64-bit BAR as
 BAR_RESERVED

A BAR that can only be configured as a 64-bit BAR by an EPC driver is
marked as such using the "only_64bit" flag.

Currently, the documentation says that an EPC driver should explicitly
mark the BAR succeeding an "only_64bit" BAR as BAR_RESERVED.

However, a 64-bit BAR will always take up two BARs. It is thus redundant
to mark both BARs.

pci_epc_get_next_free_bar() already skips the BAR succeeding a "only_64bit"
BAR, regardless if the succeeding BAR is marked as BAR_RESERVED or not.

Thus, drop the BAR_RESERVED for a BAR succeeding a "only_64bit" BAR.
No functional changes.

Suggested-by: Manivannan Sadhasivam <mani@kernel.org>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Manivannan Sadhasivam <mani@kernel.org>
Link: https://patch.msgid.link/20260312130229.2282001-13-cassel@kernel.org
---
 drivers/pci/controller/dwc/pci-layerscape-ep.c | 2 --
 drivers/pci/controller/dwc/pcie-keembay.c      | 3 ---
 drivers/pci/controller/dwc/pcie-qcom-ep.c      | 2 --
 drivers/pci/controller/dwc/pcie-tegra194.c     | 1 -
 drivers/pci/controller/dwc/pcie-uniphier-ep.c  | 5 -----
 drivers/pci/controller/pcie-rcar-ep.c          | 3 ---
 include/linux/pci-epc.h                        | 3 +--
 7 files changed, 1 insertion(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c b/drivers/pci/controller/dwc/pci-layerscape-ep.c
index a4a800699f89..79d226e0cc80 100644
--- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
+++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
@@ -251,9 +251,7 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev)
 	pci->ops = pcie->drvdata->dw_pcie_ops;
 
 	ls_epc->bar[BAR_2].only_64bit = true;
-	ls_epc->bar[BAR_3].type = BAR_RESERVED;
 	ls_epc->bar[BAR_4].only_64bit = true;
-	ls_epc->bar[BAR_5].type = BAR_RESERVED;
 	ls_epc->linkup_notifier = true;
 
 	pcie->pci = pci;
diff --git a/drivers/pci/controller/dwc/pcie-keembay.c b/drivers/pci/controller/dwc/pcie-keembay.c
index 2666a9c3d67e..7cf2c312ecec 100644
--- a/drivers/pci/controller/dwc/pcie-keembay.c
+++ b/drivers/pci/controller/dwc/pcie-keembay.c
@@ -313,11 +313,8 @@ static const struct pci_epc_features keembay_pcie_epc_features = {
 	.msi_capable		= true,
 	.msix_capable		= true,
 	.bar[BAR_0]		= { .only_64bit = true, },
-	.bar[BAR_1]		= { .type = BAR_RESERVED, },
 	.bar[BAR_2]		= { .only_64bit = true, },
-	.bar[BAR_3]		= { .type = BAR_RESERVED, },
 	.bar[BAR_4]		= { .only_64bit = true, },
-	.bar[BAR_5]		= { .type = BAR_RESERVED, },
 	.align			= SZ_16K,
 };
 
diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c
index 18460f01b2c6..ffb4409c0468 100644
--- a/drivers/pci/controller/dwc/pcie-qcom-ep.c
+++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c
@@ -850,9 +850,7 @@ static const struct pci_epc_features qcom_pcie_epc_features = {
 	.msi_capable = true,
 	.align = SZ_4K,
 	.bar[BAR_0] = { .only_64bit = true, },
-	.bar[BAR_1] = { .type = BAR_RESERVED, },
 	.bar[BAR_2] = { .only_64bit = true, },
-	.bar[BAR_3] = { .type = BAR_RESERVED, },
 };
 
 static const struct pci_epc_features *
diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c
index 06571d806ab3..f1f70fb824b2 100644
--- a/drivers/pci/controller/dwc/pcie-tegra194.c
+++ b/drivers/pci/controller/dwc/pcie-tegra194.c
@@ -1993,7 +1993,6 @@ static const struct pci_epc_features tegra_pcie_epc_features = {
 	.msi_capable = true,
 	.bar[BAR_0] = { .type = BAR_FIXED, .fixed_size = SZ_1M,
 			.only_64bit = true, },
-	.bar[BAR_1] = { .type = BAR_RESERVED, },
 	.bar[BAR_2] = { .type = BAR_RESERVED, },
 	.bar[BAR_3] = { .type = BAR_RESERVED, },
 	.bar[BAR_4] = { .type = BAR_RESERVED, },
diff --git a/drivers/pci/controller/dwc/pcie-uniphier-ep.c b/drivers/pci/controller/dwc/pcie-uniphier-ep.c
index d52753060970..b7020131f626 100644
--- a/drivers/pci/controller/dwc/pcie-uniphier-ep.c
+++ b/drivers/pci/controller/dwc/pcie-uniphier-ep.c
@@ -426,9 +426,7 @@ static const struct uniphier_pcie_ep_soc_data uniphier_pro5_data = {
 		.msix_capable = false,
 		.align = 1 << 16,
 		.bar[BAR_0] = { .only_64bit = true, },
-		.bar[BAR_1] = { .type = BAR_RESERVED, },
 		.bar[BAR_2] = { .only_64bit = true, },
-		.bar[BAR_3] = { .type = BAR_RESERVED, },
 		.bar[BAR_4] = { .type = BAR_RESERVED, },
 		.bar[BAR_5] = { .type = BAR_RESERVED, },
 	},
@@ -445,11 +443,8 @@ static const struct uniphier_pcie_ep_soc_data uniphier_nx1_data = {
 		.msix_capable = false,
 		.align = 1 << 12,
 		.bar[BAR_0] = { .only_64bit = true, },
-		.bar[BAR_1] = { .type = BAR_RESERVED, },
 		.bar[BAR_2] = { .only_64bit = true, },
-		.bar[BAR_3] = { .type = BAR_RESERVED, },
 		.bar[BAR_4] = { .only_64bit = true, },
-		.bar[BAR_5] = { .type = BAR_RESERVED, },
 	},
 };
 
diff --git a/drivers/pci/controller/pcie-rcar-ep.c b/drivers/pci/controller/pcie-rcar-ep.c
index 657875ef4657..c2da8ac1f2e8 100644
--- a/drivers/pci/controller/pcie-rcar-ep.c
+++ b/drivers/pci/controller/pcie-rcar-ep.c
@@ -440,13 +440,10 @@ static const struct pci_epc_features rcar_pcie_epc_features = {
 	/* use 64-bit BARs so mark BAR[1,3,5] as reserved */
 	.bar[BAR_0] = { .type = BAR_FIXED, .fixed_size = 128,
 			.only_64bit = true, },
-	.bar[BAR_1] = { .type = BAR_RESERVED, },
 	.bar[BAR_2] = { .type = BAR_FIXED, .fixed_size = 256,
 			.only_64bit = true, },
-	.bar[BAR_3] = { .type = BAR_RESERVED, },
 	.bar[BAR_4] = { .type = BAR_FIXED, .fixed_size = 256,
 			.only_64bit = true, },
-	.bar[BAR_5] = { .type = BAR_RESERVED, },
 };
 
 static const struct pci_epc_features*
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index c021c7af175f..c981ea7d52c0 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -206,8 +206,7 @@ enum pci_epc_bar_type {
  * @fixed_size: the fixed size, only applicable if type is BAR_FIXED_MASK.
  * @only_64bit: if true, an EPF driver is not allowed to choose if this BAR
  *		should be configured as 32-bit or 64-bit, the EPF driver must
- *		configure this BAR as 64-bit. Additionally, the BAR succeeding
- *		this BAR must be set to type BAR_RESERVED.
+ *		configure this BAR as 64-bit.
  *
  *		only_64bit should not be set on a BAR of type BAR_RESERVED.
  *		(If BARx is a 64-bit BAR that an EPF driver is not allowed to
-- 
cgit v1.2.3


From 27ce1d8ecb9b9ae025b9e9e199845624bc950998 Mon Sep 17 00:00:00 2001
From: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Date: Thu, 12 Mar 2026 14:02:30 +0100
Subject: PCI: endpoint: Allow only_64bit on BAR_RESERVED

Remove the documentation that forbids setting only_64bit on a BAR of type
BAR_RESERVED.

When a reserved BAR is 64-bit by default, setting only_64bit is the most
accurate description. If we later add support to disable a reserved BAR
(e.g. disable_bar() for BARs that were never set via set_bar()), the
implementation will need to clear the adjacent BAR (upper 32 bits) as well;
having only_64bit set documents that requirement.

Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Manivannan Sadhasivam <mani@kernel.org>
Link: https://patch.msgid.link/20260312130229.2282001-14-cassel@kernel.org
---
 include/linux/pci-epc.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index c981ea7d52c0..5c59f5606869 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -207,11 +207,6 @@ enum pci_epc_bar_type {
  * @only_64bit: if true, an EPF driver is not allowed to choose if this BAR
  *		should be configured as 32-bit or 64-bit, the EPF driver must
  *		configure this BAR as 64-bit.
- *
- *		only_64bit should not be set on a BAR of type BAR_RESERVED.
- *		(If BARx is a 64-bit BAR that an EPF driver is not allowed to
- *		touch, then both BARx and BARx+1 must be set to type
- *		BAR_RESERVED.)
  */
 struct pci_epc_bar_desc {
 	enum pci_epc_bar_type type;
-- 
cgit v1.2.3


From f51644eb40a73677fcd0c92d8174eddde5d0be0e Mon Sep 17 00:00:00 2001
From: Koichiro Den <den@valinux.co.jp>
Date: Thu, 12 Mar 2026 14:02:31 +0100
Subject: PCI: endpoint: Describe reserved subregions within BARs

Some endpoint controllers expose platform-owned, fixed register windows
within a BAR that EPF drivers must not reprogram (e.g. a BAR marked
BAR_RESERVED). Even in that case, EPF drivers may need to reference a
well-defined subset of that BAR, e.g. to reuse an integrated DMA
controller MMIO window as a doorbell target.

Introduce struct pci_epc_bar_rsvd_region and extend struct
pci_epc_bar_desc so EPC drivers can advertise such fixed subregions in a
controller-agnostic way.

No functional change for existing users.

Signed-off-by: Koichiro Den <den@valinux.co.jp>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Manivannan Sadhasivam <mani@kernel.org>
Tested-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Tested-by: Koichiro Den <den@valinux.co.jp>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260312130229.2282001-15-cassel@kernel.org
---
 include/linux/pci-epc.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 5c59f5606869..ebcdf70aa9b9 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -200,6 +200,30 @@ enum pci_epc_bar_type {
 	BAR_RESERVED,
 };
 
+/**
+ * enum pci_epc_bar_rsvd_region_type - type of a fixed subregion behind a BAR
+ * @PCI_EPC_BAR_RSVD_DMA_CTRL_MMIO: Integrated DMA controller MMIO window
+ *
+ * BARs marked BAR_RESERVED are owned by the SoC/EPC hardware and must not be
+ * reprogrammed by EPF drivers. Some of them still expose fixed subregions that
+ * EPFs may want to reference (e.g. embedded doorbell fallback).
+ */
+enum pci_epc_bar_rsvd_region_type {
+	PCI_EPC_BAR_RSVD_DMA_CTRL_MMIO = 0,
+};
+
+/**
+ * struct pci_epc_bar_rsvd_region - fixed subregion behind a BAR
+ * @type: reserved region type
+ * @offset: offset within the BAR aperture
+ * @size: size of the reserved region
+ */
+struct pci_epc_bar_rsvd_region {
+	enum pci_epc_bar_rsvd_region_type	type;
+	resource_size_t				offset;
+	resource_size_t				size;
+};
+
 /**
  * struct pci_epc_bar_desc - hardware description for a BAR
  * @type: the type of the BAR
@@ -207,11 +231,15 @@ enum pci_epc_bar_type {
  * @only_64bit: if true, an EPF driver is not allowed to choose if this BAR
  *		should be configured as 32-bit or 64-bit, the EPF driver must
  *		configure this BAR as 64-bit.
+ * @nr_rsvd_regions: number of fixed subregions described for BAR_RESERVED
+ * @rsvd_regions: fixed subregions behind BAR_RESERVED
  */
 struct pci_epc_bar_desc {
 	enum pci_epc_bar_type type;
 	u64 fixed_size;
 	bool only_64bit;
+	u8 nr_rsvd_regions;
+	const struct pci_epc_bar_rsvd_region *rsvd_regions;
 };
 
 /**
-- 
cgit v1.2.3


From 33642e9e36dc084e4fc9245a266c9843bc8303b9 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Thu, 12 Mar 2026 14:02:33 +0100
Subject: PCI: endpoint: Introduce pci_epc_bar_type BAR_DISABLED

Add a pci_epc_bar_type BAR_DISABLED to more clearly differentiate from
BAR_RESERVED.

This BAR type will only be used to describe a BAR that the EPC driver
should disable, and will thus never be available to an EPF driver.
(Unlike BAR_RESERVED, which will never be disabled by default by an EPC
driver.)

Co-developed-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Manivannan Sadhasivam <mani@kernel.org>
Tested-by: Koichiro Den <den@valinux.co.jp>
Tested-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Reviewed-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Link: https://patch.msgid.link/20260312130229.2282001-17-cassel@kernel.org
---
 drivers/pci/endpoint/pci-epc-core.c |  5 +++--
 include/linux/pci-epc.h             | 10 +++++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index e546b3dbb240..6c3c58185fc5 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -103,8 +103,9 @@ enum pci_barno pci_epc_get_next_free_bar(const struct pci_epc_features
 		bar++;
 
 	for (i = bar; i < PCI_STD_NUM_BARS; i++) {
-		/* If the BAR is not reserved, return it. */
-		if (epc_features->bar[i].type != BAR_RESERVED)
+		/* If the BAR is not reserved or disabled, return it. */
+		if (epc_features->bar[i].type != BAR_RESERVED &&
+		    epc_features->bar[i].type != BAR_DISABLED)
 			return i;
 	}
 
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index ebcdf70aa9b9..334c2b7578d0 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -191,13 +191,21 @@ struct pci_epc {
  * @BAR_RESIZABLE: The BAR implements the PCI-SIG Resizable BAR Capability.
  *		   NOTE: An EPC driver can currently only set a single supported
  *		   size.
- * @BAR_RESERVED: The BAR should not be touched by an EPF driver.
+ * @BAR_RESERVED: Used for HW-backed BARs (e.g. MSI-X table, DMA regs). The BAR
+ *		  should not be disabled by an EPC driver. The BAR should not be
+ *		  reprogrammed by an EPF driver. An EPF driver is allowed to
+ *		  disable the BAR if absolutely necessary. (However, right now
+ *		  there is no EPC operation to disable a BAR that has not been
+ *		  programmed using pci_epc_set_bar().)
+ * @BAR_DISABLED: The BAR should be disabled by an EPC driver. The BAR will be
+ *		  unavailable to an EPF driver.
  */
 enum pci_epc_bar_type {
 	BAR_PROGRAMMABLE = 0,
 	BAR_FIXED,
 	BAR_RESIZABLE,
 	BAR_RESERVED,
+	BAR_DISABLED,
 };
 
 /**
-- 
cgit v1.2.3


From 45c2a55d13c698ba6a281315676934c44225b034 Mon Sep 17 00:00:00 2001
From: Unnathi Chalicheemala <unnathi.chalicheemala@oss.qualcomm.com>
Date: Thu, 5 Mar 2026 19:12:05 -0800
Subject: soc: qcom: llcc: Add per-slice counter and common llcc slice
 descriptor

Fix incorrect slice activation/deactivation accounting by replacing the
bitmap-based activation tracking with per-slice atomic reference counters.
This resolves mismatches that occur when multiple client drivers vote for
the same slice or when llcc_slice_getd() is called multiple times.

As part of this fix, simplify slice descriptor handling by eliminating
dynamic allocation. llcc_slice_getd() now returns a pointer to a
preallocated descriptor, removing the need for repeated allocation/free
cycles and ensuring consistent reference tracking across all users.

Signed-off-by: Unnathi Chalicheemala <unnathi.chalicheemala@oss.qualcomm.com>
Signed-off-by: Francisco Munoz Ruiz <francisco.ruiz@oss.qualcomm.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20260305-external_llcc_changes1set-v1-1-6347e52e648e@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 drivers/soc/qcom/llcc-qcom.c       | 57 +++++++++++++++++++-------------------
 include/linux/soc/qcom/llcc-qcom.h |  8 +++---
 2 files changed, 32 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c
index e221e3c4982b..8fdca7658393 100644
--- a/drivers/soc/qcom/llcc-qcom.c
+++ b/drivers/soc/qcom/llcc-qcom.c
@@ -5,7 +5,6 @@
  */
 
 #include <linux/bitfield.h>
-#include <linux/bitmap.h>
 #include <linux/bitops.h>
 #include <linux/cleanup.h>
 #include <linux/device.h>
@@ -4535,8 +4534,7 @@ static struct llcc_drv_data *drv_data = (void *) -EPROBE_DEFER;
 struct llcc_slice_desc *llcc_slice_getd(u32 uid)
 {
 	const struct llcc_slice_config *cfg;
-	struct llcc_slice_desc *desc;
-	u32 sz, count;
+	u32 sz, i;
 
 	if (IS_ERR(drv_data))
 		return ERR_CAST(drv_data);
@@ -4544,21 +4542,14 @@ struct llcc_slice_desc *llcc_slice_getd(u32 uid)
 	cfg = drv_data->cfg;
 	sz = drv_data->cfg_size;
 
-	for (count = 0; cfg && count < sz; count++, cfg++)
+	for (i = 0; cfg && i < sz; i++, cfg++)
 		if (cfg->usecase_id == uid)
 			break;
 
-	if (count == sz || !cfg)
+	if (i == sz)
 		return ERR_PTR(-ENODEV);
 
-	desc = kzalloc_obj(*desc);
-	if (!desc)
-		return ERR_PTR(-ENOMEM);
-
-	desc->slice_id = cfg->slice_id;
-	desc->slice_size = cfg->max_cap;
-
-	return desc;
+	return &drv_data->desc[i];
 }
 EXPORT_SYMBOL_GPL(llcc_slice_getd);
 
@@ -4569,7 +4560,7 @@ EXPORT_SYMBOL_GPL(llcc_slice_getd);
 void llcc_slice_putd(struct llcc_slice_desc *desc)
 {
 	if (!IS_ERR_OR_NULL(desc))
-		kfree(desc);
+		return;
 }
 EXPORT_SYMBOL_GPL(llcc_slice_putd);
 
@@ -4645,7 +4636,8 @@ int llcc_slice_activate(struct llcc_slice_desc *desc)
 		return -EINVAL;
 
 	mutex_lock(&drv_data->lock);
-	if (test_bit(desc->slice_id, drv_data->bitmap)) {
+	/* Already active; try to take another reference. */
+	if (refcount_inc_not_zero(&desc->refcount)) {
 		mutex_unlock(&drv_data->lock);
 		return 0;
 	}
@@ -4659,7 +4651,8 @@ int llcc_slice_activate(struct llcc_slice_desc *desc)
 		return ret;
 	}
 
-	__set_bit(desc->slice_id, drv_data->bitmap);
+	/* Set first reference */
+	refcount_set(&desc->refcount, 1);
 	mutex_unlock(&drv_data->lock);
 
 	return ret;
@@ -4685,10 +4678,12 @@ int llcc_slice_deactivate(struct llcc_slice_desc *desc)
 		return -EINVAL;
 
 	mutex_lock(&drv_data->lock);
-	if (!test_bit(desc->slice_id, drv_data->bitmap)) {
+	/* refcount > 1, drop one ref and we’re done. */
+	if (refcount_dec_not_one(&desc->refcount)) {
 		mutex_unlock(&drv_data->lock);
 		return 0;
 	}
+
 	act_ctrl_val = ACT_CTRL_OPCODE_DEACTIVATE << ACT_CTRL_OPCODE_SHIFT;
 
 	ret = llcc_update_act_ctrl(desc->slice_id, act_ctrl_val,
@@ -4698,7 +4693,8 @@ int llcc_slice_deactivate(struct llcc_slice_desc *desc)
 		return ret;
 	}
 
-	__clear_bit(desc->slice_id, drv_data->bitmap);
+	/* Finalize: atomically transition 1 -> 0 */
+	WARN_ON_ONCE(!refcount_dec_if_one(&desc->refcount));
 	mutex_unlock(&drv_data->lock);
 
 	return ret;
@@ -4742,7 +4738,7 @@ static int _qcom_llcc_cfg_program(const struct llcc_slice_config *config,
 	u32 attr1_val;
 	u32 attr0_val;
 	u32 max_cap_cacheline;
-	struct llcc_slice_desc desc;
+	struct llcc_slice_desc *desc;
 
 	attr1_val = config->cache_mode;
 	attr1_val |= config->probe_target_ways << ATTR1_PROBE_TARGET_WAYS_SHIFT;
@@ -4891,8 +4887,11 @@ static int _qcom_llcc_cfg_program(const struct llcc_slice_config *config,
 	}
 
 	if (config->activate_on_init) {
-		desc.slice_id = config->slice_id;
-		ret = llcc_slice_activate(&desc);
+		desc = llcc_slice_getd(config->usecase_id);
+		if (IS_ERR(desc))
+			return PTR_ERR(desc);
+
+		ret = llcc_slice_activate(desc);
 	}
 
 	return ret;
@@ -5205,18 +5204,18 @@ static int qcom_llcc_probe(struct platform_device *pdev)
 
 	llcc_cfg = cfg->sct_data;
 	sz = cfg->size;
-
-	for (i = 0; i < sz; i++)
-		if (llcc_cfg[i].slice_id > drv_data->max_slices)
-			drv_data->max_slices = llcc_cfg[i].slice_id;
-
-	drv_data->bitmap = devm_bitmap_zalloc(dev, drv_data->max_slices,
-					      GFP_KERNEL);
-	if (!drv_data->bitmap) {
+	drv_data->desc = devm_kcalloc(dev, sz, sizeof(struct llcc_slice_desc), GFP_KERNEL);
+	if (!drv_data->desc) {
 		ret = -ENOMEM;
 		goto err;
 	}
 
+	for (i = 0; i < sz; i++) {
+		drv_data->desc[i].slice_id = llcc_cfg[i].slice_id;
+		drv_data->desc[i].slice_size = llcc_cfg[i].max_cap;
+		refcount_set(&drv_data->desc[i].refcount, 0);
+	}
+
 	drv_data->cfg = llcc_cfg;
 	drv_data->cfg_size = sz;
 	drv_data->edac_reg_offset = cfg->edac_reg_offset;
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 8243ab3a12a8..227125d84318 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -91,10 +91,12 @@
  * struct llcc_slice_desc - Cache slice descriptor
  * @slice_id: llcc slice id
  * @slice_size: Size allocated for the llcc slice
+ * @refcount: Atomic counter to track activate/deactivate calls
  */
 struct llcc_slice_desc {
 	u32 slice_id;
 	size_t slice_size;
+	refcount_t refcount;
 };
 
 /**
@@ -152,11 +154,10 @@ struct llcc_edac_reg_offset {
  * @edac_reg_offset: Offset of the LLCC EDAC registers
  * @lock: mutex associated with each slice
  * @cfg_size: size of the config data table
- * @max_slices: max slices as read from device tree
  * @num_banks: Number of llcc banks
- * @bitmap: Bit map to track the active slice ids
  * @ecc_irq: interrupt for llcc cache error detection and reporting
  * @ecc_irq_configured: 'True' if firmware has already configured the irq propagation
+ * @desc: Array pointer of pre-allocated LLCC slice descriptors
  * @version: Indicates the LLCC version
  */
 struct llcc_drv_data {
@@ -167,12 +168,11 @@ struct llcc_drv_data {
 	const struct llcc_edac_reg_offset *edac_reg_offset;
 	struct mutex lock;
 	u32 cfg_size;
-	u32 max_slices;
 	u32 num_banks;
-	unsigned long *bitmap;
 	int ecc_irq;
 	bool ecc_irq_configured;
 	u32 version;
+	struct llcc_slice_desc *desc;
 };
 
 #if IS_ENABLED(CONFIG_QCOM_LLCC)
-- 
cgit v1.2.3


From e4ee7621d732162ea2ec714ae76dac2f70519417 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@oss.qualcomm.com>
Date: Tue, 10 Mar 2026 00:03:30 +0100
Subject: soc: qcom: qmi: Enumerate the service IDs of QMI

The QMI framework proposes a set of services which are defined by an
integer identifier. The different QMI client lookup for the services
via this identifier. Moreover, the function qmi_add_lookup() and
qmi_add_server() must match the service ID but the code in different
places set the same value but with a different macro name. These
macros are spreaded across the different subsystems implementing the
protocols associated with a service. It would make more sense to
define them in the QMI header for the sake of consistency and clarity.

This change use an unified naming for the services and enumerate the
ones implemented in the Linux kernel. More services can come later and
put the service ID in this same header.

Signed-off-by: Daniel Lezcano <daniel.lezcano@oss.qualcomm.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20260309230346.3584252-2-daniel.lezcano@oss.qualcomm.com
[bjorn: Lower case hex constants]
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/soc/qcom/qmi.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/qcom/qmi.h b/include/linux/soc/qcom/qmi.h
index 291cdc7ef49c..b9dcb437a0be 100644
--- a/include/linux/soc/qcom/qmi.h
+++ b/include/linux/soc/qcom/qmi.h
@@ -92,6 +92,18 @@ struct qmi_elem_info {
 #define QMI_ERR_INCOMPATIBLE_STATE_V01		90
 #define QMI_ERR_NOT_SUPPORTED_V01		94
 
+/*
+ * Enumerate the IDs of the QMI services
+ */
+#define QMI_SERVICE_ID_TEST		0x0f	/*   15 */
+#define QMI_SERVICE_ID_SSCTL		0x2b	/*   43 */
+#define QMI_SERVICE_ID_IPA		0x31	/*   49 */
+#define QMI_SERVICE_ID_SERVREG_LOC	0x40	/*   64 */
+#define QMI_SERVICE_ID_SERVREG_NOTIF	0x42	/*   66 */
+#define QMI_SERVICE_ID_WLFW		0x45	/*   69 */
+#define QMI_SERVICE_ID_SLIMBUS		0x301	/*  769 */
+#define QMI_SERVICE_ID_USB_AUDIO_STREAM 0x41d	/* 1053 */
+
 /**
  * struct qmi_response_type_v01 - common response header (decoded)
  * @result:	result of the transaction
-- 
cgit v1.2.3


From dea046e7f46f2357124a465e058c92cac3e351c5 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Date: Mon, 9 Mar 2026 13:42:41 +0100
Subject: gpio: remove machine hogs

With no more users, remove legacy machine hog API from the kernel.

Reviewed-by: Linus Walleij <linusw@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20260309-gpio-hog-fwnode-v2-5-4e61f3dbf06a@oss.qualcomm.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
 Documentation/driver-api/gpio/board.rst | 16 --------
 drivers/gpio/gpiolib.c                  | 71 ---------------------------------
 include/linux/gpio/machine.h            | 33 ---------------
 3 files changed, 120 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/gpio/board.rst b/Documentation/driver-api/gpio/board.rst
index 069b54d8591b..0993cac891fb 100644
--- a/Documentation/driver-api/gpio/board.rst
+++ b/Documentation/driver-api/gpio/board.rst
@@ -239,22 +239,6 @@ mapping and is thus transparent to GPIO consumers.
 A set of functions such as gpiod_set_value() is available to work with
 the new descriptor-oriented interface.
 
-Boards using platform data can also hog GPIO lines by defining GPIO hog tables.
-
-.. code-block:: c
-
-        struct gpiod_hog gpio_hog_table[] = {
-                GPIO_HOG("gpio.0", 10, "foo", GPIO_ACTIVE_LOW, GPIOD_OUT_HIGH),
-                { }
-        };
-
-And the table can be added to the board code as follows::
-
-        gpiod_add_hogs(gpio_hog_table);
-
-The line will be hogged as soon as the gpiochip is created or - in case the
-chip was created earlier - when the hog table is registered.
-
 Arrays of pins
 --------------
 In addition to requesting pins belonging to a function one by one, a device may
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 4a57d9882600..56fda7891d55 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -103,9 +103,6 @@ static DEFINE_MUTEX(gpio_devices_lock);
 /* Ensures coherence during read-only accesses to the list of GPIO devices. */
 DEFINE_STATIC_SRCU(gpio_devices_srcu);
 
-static DEFINE_MUTEX(gpio_machine_hogs_mutex);
-static LIST_HEAD(gpio_machine_hogs);
-
 const char *const gpio_suffixes[] = { "gpios", "gpio", NULL };
 
 static void gpiochip_free_hogs(struct gpio_chip *gc);
@@ -930,36 +927,6 @@ err_remove_device:
 	return ret;
 }
 
-static void gpiochip_machine_hog(struct gpio_chip *gc, struct gpiod_hog *hog)
-{
-	struct gpio_desc *desc;
-	int rv;
-
-	desc = gpiochip_get_desc(gc, hog->chip_hwnum);
-	if (IS_ERR(desc)) {
-		gpiochip_err(gc, "%s: unable to get GPIO desc: %ld\n",
-			     __func__, PTR_ERR(desc));
-		return;
-	}
-
-	rv = gpiod_hog(desc, hog->line_name, hog->lflags, hog->dflags);
-	if (rv)
-		gpiod_err(desc, "%s: unable to hog GPIO line (%s:%u): %d\n",
-			  __func__, gc->label, hog->chip_hwnum, rv);
-}
-
-static void gpiochip_machine_hog_lines(struct gpio_chip *gc)
-{
-	struct gpiod_hog *hog;
-
-	guard(mutex)(&gpio_machine_hogs_mutex);
-
-	list_for_each_entry(hog, &gpio_machine_hogs, list) {
-		if (!strcmp(gc->label, hog->chip_label))
-			gpiochip_machine_hog(gc, hog);
-	}
-}
-
 int gpiochip_add_hog(struct gpio_chip *gc, struct fwnode_handle *fwnode)
 {
 	struct fwnode_handle *gc_node = dev_fwnode(&gc->gpiodev->dev);
@@ -1047,8 +1014,6 @@ static int gpiochip_hog_lines(struct gpio_chip *gc)
 			return ret;
 	}
 
-	gpiochip_machine_hog_lines(gc);
-
 	return 0;
 }
 
@@ -4578,42 +4543,6 @@ void gpiod_remove_lookup_table(struct gpiod_lookup_table *table)
 }
 EXPORT_SYMBOL_GPL(gpiod_remove_lookup_table);
 
-/**
- * gpiod_add_hogs() - register a set of GPIO hogs from machine code
- * @hogs: table of gpio hog entries with a zeroed sentinel at the end
- */
-void gpiod_add_hogs(struct gpiod_hog *hogs)
-{
-	struct gpiod_hog *hog;
-
-	guard(mutex)(&gpio_machine_hogs_mutex);
-
-	for (hog = &hogs[0]; hog->chip_label; hog++) {
-		list_add_tail(&hog->list, &gpio_machine_hogs);
-
-		/*
-		 * The chip may have been registered earlier, so check if it
-		 * exists and, if so, try to hog the line now.
-		 */
-		struct gpio_device *gdev __free(gpio_device_put) =
-				gpio_device_find_by_label(hog->chip_label);
-		if (gdev)
-			gpiochip_machine_hog(gpio_device_get_chip(gdev), hog);
-	}
-}
-EXPORT_SYMBOL_GPL(gpiod_add_hogs);
-
-void gpiod_remove_hogs(struct gpiod_hog *hogs)
-{
-	struct gpiod_hog *hog;
-
-	guard(mutex)(&gpio_machine_hogs_mutex);
-
-	for (hog = &hogs[0]; hog->chip_label; hog++)
-		list_del(&hog->list);
-}
-EXPORT_SYMBOL_GPL(gpiod_remove_hogs);
-
 static bool gpiod_match_lookup_table(struct device *dev,
 				     const struct gpiod_lookup_table *table)
 {
diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h
index 44e5f162973e..5eb88f5d0630 100644
--- a/include/linux/gpio/machine.h
+++ b/include/linux/gpio/machine.h
@@ -46,23 +46,6 @@ struct gpiod_lookup_table {
 	struct gpiod_lookup table[];
 };
 
-/**
- * struct gpiod_hog - GPIO line hog table
- * @chip_label: name of the chip the GPIO belongs to
- * @chip_hwnum: hardware number (i.e. relative to the chip) of the GPIO
- * @line_name: consumer name for the hogged line
- * @lflags: bitmask of gpio_lookup_flags GPIO_* values
- * @dflags: GPIO flags used to specify the direction and value
- */
-struct gpiod_hog {
-	struct list_head list;
-	const char *chip_label;
-	u16 chip_hwnum;
-	const char *line_name;
-	unsigned long lflags;
-	int dflags;
-};
-
 /*
  * Helper for lookup tables with just one single lookup for a device.
  */
@@ -95,24 +78,10 @@ static struct gpiod_lookup_table _name = {				\
 	.flags = _flags,                                                  \
 }
 
-/*
- * Simple definition of a single GPIO hog in an array.
- */
-#define GPIO_HOG(_chip_label, _chip_hwnum, _line_name, _lflags, _dflags)  \
-(struct gpiod_hog) {                                                      \
-	.chip_label = _chip_label,                                        \
-	.chip_hwnum = _chip_hwnum,                                        \
-	.line_name = _line_name,                                          \
-	.lflags = _lflags,                                                \
-	.dflags = _dflags,                                                \
-}
-
 #ifdef CONFIG_GPIOLIB
 void gpiod_add_lookup_table(struct gpiod_lookup_table *table);
 void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n);
 void gpiod_remove_lookup_table(struct gpiod_lookup_table *table);
-void gpiod_add_hogs(struct gpiod_hog *hogs);
-void gpiod_remove_hogs(struct gpiod_hog *hogs);
 #else /* ! CONFIG_GPIOLIB */
 static inline
 void gpiod_add_lookup_table(struct gpiod_lookup_table *table) {}
@@ -120,8 +89,6 @@ static inline
 void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n) {}
 static inline
 void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) {}
-static inline void gpiod_add_hogs(struct gpiod_hog *hogs) {}
-static inline void gpiod_remove_hogs(struct gpiod_hog *hogs) {}
 #endif /* CONFIG_GPIOLIB */
 
 #endif /* __LINUX_GPIO_MACHINE_H */
-- 
cgit v1.2.3


From a25f48fd920b557e6ad02f692f690520c82f5914 Mon Sep 17 00:00:00 2001
From: Alban Bedel <alban.bedel@lht.dlh.de>
Date: Wed, 11 Mar 2026 15:31:20 +0100
Subject: gpio: kempld: Implement the interrupt controller

Add a GPIO IRQ chip implementation for the kempld GPIO controller. Of
note is only how the parent IRQ is obtained.

The IRQ for the GPIO controller can be configured in the BIOS, along
with the IRQ for the I2C controller. These IRQ are returned by ACPI
but this information is only usable if both IRQ are configured. When
only one is configured, only one is returned making it impossible to
know which one it is.

Luckily the BIOS will set the configured IRQ in the PLD registers, so
it can be read from there instead, and that also work on platforms
without ACPI.

The vendor driver allowed to override the IRQ using a module
parameters, so there are boards in field which used this parameter
instead of properly configuring the BIOS. This implementation provides
this as well for compatibility.

Signed-off-by: Alban Bedel <alban.bedel@lht.dlh.de>
Link: https://patch.msgid.link/20260311143120.2179347-5-alban.bedel@lht.dlh.de
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
 drivers/gpio/Kconfig       |   1 +
 drivers/gpio/gpio-kempld.c | 192 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/kempld.h |   1 +
 3 files changed, 194 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index b45fb799e36c..d665afe19709 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -1440,6 +1440,7 @@ config GPIO_JANZ_TTL
 config GPIO_KEMPLD
 	tristate "Kontron ETX / COMexpress GPIO"
 	depends on MFD_KEMPLD
+	select GPIOLIB_IRQCHIP
 	help
 	  This enables support for the PLD GPIO interface on some Kontron ETX
 	  and COMexpress (ETXexpress) modules.
diff --git a/drivers/gpio/gpio-kempld.c b/drivers/gpio/gpio-kempld.c
index 7dd94ff6f2df..5a63df3ea5fa 100644
--- a/drivers/gpio/gpio-kempld.c
+++ b/drivers/gpio/gpio-kempld.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/bitops.h>
 #include <linux/errno.h>
+#include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/gpio/driver.h>
 #include <linux/mfd/kempld.h>
@@ -19,13 +20,26 @@
 #define KEMPLD_GPIO_MASK(x)		(BIT((x) % 8))
 #define KEMPLD_GPIO_DIR			0x40
 #define KEMPLD_GPIO_LVL			0x42
+#define KEMPLD_GPIO_STS			0x44
 #define KEMPLD_GPIO_EVT_LVL_EDGE	0x46
+#define KEMPLD_GPIO_EVT_LOW_HIGH	0x48
 #define KEMPLD_GPIO_IEN			0x4A
+#define KEMPLD_GPIO_OUT_LVL		0x4E
+
+/* The IRQ to use if none was configured in the BIOS */
+static unsigned int gpio_irq;
+module_param_hw(gpio_irq, uint, irq, 0444);
+MODULE_PARM_DESC(gpio_irq, "Set legacy GPIO IRQ (1-15)");
 
 struct kempld_gpio_data {
 	struct gpio_chip		chip;
 	struct kempld_device_data	*pld;
 	u8				out_lvl_reg;
+
+	struct mutex			irq_lock;
+	u16				ien;
+	u16				evt_low_high;
+	u16				evt_lvl_edge;
 };
 
 /*
@@ -193,6 +207,180 @@ static int kempld_gpio_pincount(struct kempld_device_data *pld)
 	return evt ? __ffs(evt) : 16;
 }
 
+static void kempld_irq_mask(struct irq_data *data)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct kempld_gpio_data *gpio = gpiochip_get_data(chip);
+
+	gpio->ien &= ~BIT(irqd_to_hwirq(data));
+	gpiochip_disable_irq(chip, irqd_to_hwirq(data));
+}
+
+static void kempld_irq_unmask(struct irq_data *data)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct kempld_gpio_data *gpio = gpiochip_get_data(chip);
+
+	gpiochip_enable_irq(chip, irqd_to_hwirq(data));
+	gpio->ien |= BIT(irqd_to_hwirq(data));
+}
+
+static int kempld_irq_set_type(struct irq_data *data, unsigned int type)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct kempld_gpio_data *gpio = gpiochip_get_data(chip);
+
+	switch (type) {
+	case IRQ_TYPE_EDGE_RISING:
+		gpio->evt_low_high |= BIT(data->hwirq);
+		gpio->evt_lvl_edge |= BIT(data->hwirq);
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		gpio->evt_low_high &= ~BIT(data->hwirq);
+		gpio->evt_lvl_edge |= BIT(data->hwirq);
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+		gpio->evt_low_high |= BIT(data->hwirq);
+		gpio->evt_lvl_edge &= ~BIT(data->hwirq);
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		gpio->evt_low_high &= ~BIT(data->hwirq);
+		gpio->evt_lvl_edge &= ~BIT(data->hwirq);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void kempld_irq_bus_lock(struct irq_data *data)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct kempld_gpio_data *gpio = gpiochip_get_data(chip);
+
+	mutex_lock(&gpio->irq_lock);
+}
+
+static void kempld_irq_bus_sync_unlock(struct irq_data *data)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct kempld_gpio_data *gpio = gpiochip_get_data(chip);
+	struct kempld_device_data *pld = gpio->pld;
+
+	kempld_get_mutex(pld);
+	kempld_write16(pld, KEMPLD_GPIO_EVT_LVL_EDGE, gpio->evt_lvl_edge);
+	kempld_write16(pld, KEMPLD_GPIO_EVT_LOW_HIGH, gpio->evt_low_high);
+	kempld_write16(pld, KEMPLD_GPIO_IEN, gpio->ien);
+	kempld_release_mutex(pld);
+
+	mutex_unlock(&gpio->irq_lock);
+}
+
+static const struct irq_chip kempld_irqchip = {
+	.name			= "kempld-gpio",
+	.irq_mask		= kempld_irq_mask,
+	.irq_unmask		= kempld_irq_unmask,
+	.irq_set_type		= kempld_irq_set_type,
+	.irq_bus_lock		= kempld_irq_bus_lock,
+	.irq_bus_sync_unlock	= kempld_irq_bus_sync_unlock,
+	.flags			= IRQCHIP_IMMUTABLE,
+	GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
+static irqreturn_t kempld_gpio_irq_handler(int irq, void *data)
+{
+	struct kempld_gpio_data *gpio = data;
+	struct gpio_chip *chip = &gpio->chip;
+	unsigned int pin, child_irq;
+	unsigned long status;
+
+	kempld_get_mutex(gpio->pld);
+
+	status = kempld_read16(gpio->pld, KEMPLD_GPIO_STS);
+	if (status)
+		kempld_write16(gpio->pld, KEMPLD_GPIO_STS, status);
+
+	kempld_release_mutex(gpio->pld);
+
+	status &= gpio->ien;
+	if (!status)
+		return IRQ_NONE;
+
+	for_each_set_bit(pin, &status, chip->ngpio) {
+		child_irq = irq_find_mapping(chip->irq.domain, pin);
+		handle_nested_irq(child_irq);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int kempld_gpio_irq_init(struct device *dev,
+				struct kempld_gpio_data *gpio)
+{
+	struct kempld_device_data *pld = gpio->pld;
+	struct gpio_chip *chip = &gpio->chip;
+	struct gpio_irq_chip *girq;
+	unsigned int irq;
+	int ret;
+
+	/* Get the IRQ configured by the BIOS in the PLD */
+	kempld_get_mutex(pld);
+	irq = kempld_read8(pld, KEMPLD_IRQ_GPIO);
+	kempld_release_mutex(pld);
+
+	if (irq == 0xff) {
+		dev_info(dev, "GPIO controller has no IRQ support\n");
+		return 0;
+	}
+
+	/* Allow overriding the IRQ with the module parameter */
+	if (gpio_irq > 0) {
+		dev_warn(dev, "Forcing IRQ to %d\n", gpio_irq);
+		irq &= ~KEMPLD_IRQ_GPIO_MASK;
+		irq |= gpio_irq & KEMPLD_IRQ_GPIO_MASK;
+	}
+
+	if (!(irq & KEMPLD_IRQ_GPIO_MASK)) {
+		dev_warn(dev, "No IRQ configured\n");
+		return 0;
+	}
+
+	/* Get the current config, disable all child interrupts, clear them
+	 * and set the parent IRQ
+	 */
+	kempld_get_mutex(pld);
+	gpio->evt_low_high = kempld_read16(pld, KEMPLD_GPIO_EVT_LOW_HIGH);
+	gpio->evt_lvl_edge = kempld_read16(pld, KEMPLD_GPIO_EVT_LVL_EDGE);
+	kempld_write16(pld, KEMPLD_GPIO_IEN, 0);
+	kempld_write16(pld, KEMPLD_GPIO_STS, 0xFFFF);
+	kempld_write16(pld, KEMPLD_IRQ_GPIO, irq);
+	kempld_release_mutex(pld);
+
+	girq = &chip->irq;
+	gpio_irq_chip_set_chip(girq, &kempld_irqchip);
+
+	girq->parent_handler = NULL;
+	girq->num_parents = 0;
+	girq->parents = NULL;
+	girq->default_type = IRQ_TYPE_NONE;
+	girq->handler = handle_simple_irq;
+	girq->threaded = true;
+
+	mutex_init(&gpio->irq_lock);
+
+	ret = devm_request_threaded_irq(dev, irq & KEMPLD_IRQ_GPIO_MASK,
+					NULL, kempld_gpio_irq_handler,
+					IRQF_ONESHOT, chip->label,
+					gpio);
+	if (ret) {
+		dev_err(dev, "failed to request irq %d\n", irq);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int kempld_gpio_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -247,6 +435,10 @@ static int kempld_gpio_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
+	ret = kempld_gpio_irq_init(dev, gpio);
+	if (ret)
+		return ret;
+
 	ret = devm_gpiochip_add_data(dev, chip, gpio);
 	if (ret) {
 		dev_err(dev, "Could not register GPIO chip\n");
diff --git a/include/linux/mfd/kempld.h b/include/linux/mfd/kempld.h
index 643c096b93ac..2dbd80abfd1d 100644
--- a/include/linux/mfd/kempld.h
+++ b/include/linux/mfd/kempld.h
@@ -37,6 +37,7 @@
 #define KEMPLD_SPEC_GET_MINOR(x)	(x & 0x0f)
 #define KEMPLD_SPEC_GET_MAJOR(x)	((x >> 4) & 0x0f)
 #define KEMPLD_IRQ_GPIO			0x35
+#define KEMPLD_IRQ_GPIO_MASK		0x0f
 #define KEMPLD_IRQ_I2C			0x36
 #define KEMPLD_CFG			0x37
 #define KEMPLD_CFG_GPIO_I2C_MUX		(1 << 0)
-- 
cgit v1.2.3


From 428c56525bf5dbc3bd5e30014df1f5213f8bd7c8 Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Date: Fri, 13 Mar 2026 09:22:18 +0100
Subject: jump_label: use ATOMIC_INIT() for initialization of .enabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently ATOMIC_INIT() is not used because in the past that macro was
provided by linux/atomic.h which is not usable from linux/jump_label.h.
However since commit 7ca8cf5347f7 ("locking/atomic: Move ATOMIC_INIT
into linux/types.h") the macro only requires linux/types.h.

Remove the now unnecessary workaround and the associated assertions.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260313-jump_label-cleanup-v2-1-35d3c0bde549@linutronix.de
---
 include/linux/jump_label.h | 11 ++---------
 kernel/jump_label.c        |  9 ---------
 2 files changed, 2 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index fdb79dd1ebd8..e494b360d36d 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -238,18 +238,11 @@ extern void static_key_enable_cpuslocked(struct static_key *key);
 extern void static_key_disable_cpuslocked(struct static_key *key);
 extern enum jump_label_type jump_label_init_type(struct jump_entry *entry);
 
-/*
- * We should be using ATOMIC_INIT() for initializing .enabled, but
- * the inclusion of atomic.h is problematic for inclusion of jump_label.h
- * in 'low-level' headers. Thus, we are initializing .enabled with a
- * raw value, but have added a BUILD_BUG_ON() to catch any issues in
- * jump_label_init() see: kernel/jump_label.c.
- */
 #define STATIC_KEY_INIT_TRUE					\
-	{ .enabled = { 1 },					\
+	{ .enabled = ATOMIC_INIT(1),				\
 	  { .type = JUMP_TYPE_TRUE } }
 #define STATIC_KEY_INIT_FALSE					\
-	{ .enabled = { 0 },					\
+	{ .enabled = ATOMIC_INIT(0),				\
 	  { .type = JUMP_TYPE_FALSE } }
 
 #else  /* !CONFIG_JUMP_LABEL */
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 7cb19e601426..e851e4b37d0e 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -529,15 +529,6 @@ void __init jump_label_init(void)
 	struct static_key *key = NULL;
 	struct jump_entry *iter;
 
-	/*
-	 * Since we are initializing the static_key.enabled field with
-	 * with the 'raw' int values (to avoid pulling in atomic.h) in
-	 * jump_label.h, let's make sure that is safe. There are only two
-	 * cases to check since we initialize to 0 or 1.
-	 */
-	BUILD_BUG_ON((int)ATOMIC_INIT(0) != 0);
-	BUILD_BUG_ON((int)ATOMIC_INIT(1) != 1);
-
 	if (static_key_initialized)
 		return;
 
-- 
cgit v1.2.3


From acb38872d4cbec5b6825345d9d757e21d2d9d953 Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Date: Fri, 13 Mar 2026 09:22:19 +0100
Subject: jump_label: remove workaround for old compilers in initializations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The extra braces for the initialization of the anonymous union members
were added in commit cd8d860dcce9 ("jump_label: Fix anonymous union
initialization") to compensate for limitations in gcc < 4.6.

Versions of gcc this old are not supported anymore,
so drop the workaround.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260313-jump_label-cleanup-v2-2-35d3c0bde549@linutronix.de
---
 include/linux/jump_label.h | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index e494b360d36d..b9c7b0ebf7b9 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -87,13 +87,6 @@ struct static_key {
 	atomic_t enabled;
 #ifdef CONFIG_JUMP_LABEL
 /*
- * Note:
- *   To make anonymous unions work with old compilers, the static
- *   initialization of them requires brackets. This creates a dependency
- *   on the order of the struct with the initializers. If any fields
- *   are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need
- *   to be modified.
- *
  * bit 0 => 1 if key is initially true
  *	    0 if initially false
  * bit 1 => 1 if points to struct static_key_mod
@@ -240,10 +233,10 @@ extern enum jump_label_type jump_label_init_type(struct jump_entry *entry);
 
 #define STATIC_KEY_INIT_TRUE					\
 	{ .enabled = ATOMIC_INIT(1),				\
-	  { .type = JUMP_TYPE_TRUE } }
+	  .type = JUMP_TYPE_TRUE }
 #define STATIC_KEY_INIT_FALSE					\
 	{ .enabled = ATOMIC_INIT(0),				\
-	  { .type = JUMP_TYPE_FALSE } }
+	  .type = JUMP_TYPE_FALSE }
 
 #else  /* !CONFIG_JUMP_LABEL */
 
-- 
cgit v1.2.3


From 2deccd5c862a0337a691bcfaa87919b4216e6103 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 9 Mar 2026 17:40:42 +0100
Subject: cleanup: Optimize guards

Andrew reported that a guard() conversion of zone_lock increased the
code size unnecessarily.

It turns out the unconditional __GUARD_IS_ERR() is to blame. As
explored earlier [1], __GUARD_IS_ERR(), similar to IS_ERR_OR_NULL(),
generates somewhat sub-optimal code.

However, looking at things again, it is possible to avoid doing the
__GUARD_IS_ERR() unconditionally. Revert the normal destructors to a
simple NULL test and only add the IS_ERR bit to COND guards.

This cures the reported overhead; as compiled by GCC-16:

	page_alloc.o:

pre:	Total: Before=45299, After=45371, chg +0.16%
post:	Total: Before=45299, After=45026, chg -0.60%

[1] https://lkml.kernel.org/r/20250513085001.GC25891@noisy.programming.kicks-ass.net

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20260309164516.GE606826@noisy.programming.kicks-ass.net
---
 include/linux/cleanup.h | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index dbc4162921e9..ea95ca4bc11c 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -286,15 +286,18 @@ static __always_inline _type class_##_name##_constructor(_init_args)	\
 	__no_context_analysis						\
 { _type t = _init; return t; }
 
-#define EXTEND_CLASS(_name, ext, _init, _init_args...)			\
-typedef lock_##_name##_t lock_##_name##ext##_t;			\
+#define EXTEND_CLASS_COND(_name, ext, _cond, _init, _init_args...)	\
+typedef lock_##_name##_t lock_##_name##ext##_t;				\
 typedef class_##_name##_t class_##_name##ext##_t;			\
-static __always_inline void class_##_name##ext##_destructor(class_##_name##_t *p) \
-{ class_##_name##_destructor(p); }					\
+static __always_inline void class_##_name##ext##_destructor(class_##_name##_t *_T) \
+{ if (_cond) return; class_##_name##_destructor(_T); }			\
 static __always_inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
 	__no_context_analysis \
 { class_##_name##_t t = _init; return t; }
 
+#define EXTEND_CLASS(_name, ext, _init, _init_args...)			\
+	EXTEND_CLASS_COND(_name, ext, 0, _init, _init_args)
+
 #define CLASS(_name, var)						\
 	class_##_name##_t var __cleanup(class_##_name##_destructor) =	\
 		class_##_name##_constructor
@@ -394,12 +397,12 @@ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond
 	__DEFINE_GUARD_LOCK_PTR(_name, _T)
 
 #define DEFINE_GUARD(_name, _type, _lock, _unlock) \
-	DEFINE_CLASS(_name, _type, if (!__GUARD_IS_ERR(_T)) { _unlock; }, ({ _lock; _T; }), _type _T); \
+	DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \
 	DEFINE_CLASS_IS_GUARD(_name)
 
 #define DEFINE_GUARD_COND_4(_name, _ext, _lock, _cond) \
 	__DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \
-	EXTEND_CLASS(_name, _ext, \
+	EXTEND_CLASS_COND(_name, _ext, __GUARD_IS_ERR(*_T), \
 		     ({ void *_t = _T; int _RET = (_lock); if (_T && !(_cond)) _t = ERR_PTR(_RET); _t; }), \
 		     class_##_name##_t _T) \
 	static __always_inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \
@@ -488,7 +491,7 @@ typedef struct {							\
 static __always_inline void class_##_name##_destructor(class_##_name##_t *_T) \
 	__no_context_analysis						\
 {									\
-	if (!__GUARD_IS_ERR(_T->lock)) { _unlock; }			\
+	if (_T->lock) { _unlock; }					\
 }									\
 									\
 __DEFINE_GUARD_LOCK_PTR(_name, &_T->lock)
@@ -565,7 +568,7 @@ __DEFINE_LOCK_GUARD_0(_name, _lock)
 
 #define DEFINE_LOCK_GUARD_1_COND_4(_name, _ext, _lock, _cond)		\
 	__DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true);		\
-	EXTEND_CLASS(_name, _ext,					\
+	EXTEND_CLASS_COND(_name, _ext, __GUARD_IS_ERR(_T->lock),	\
 		     ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\
 		        int _RET = (_lock);                             \
 		        if (_T->lock && !(_cond)) _T->lock = ERR_PTR(_RET);\
-- 
cgit v1.2.3


From 756a0e011cfca0b45a48464aa25b05d9a9c2fb0b Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 13 Mar 2026 10:15:07 -0700
Subject: locking: Fix rwlock support in <linux/spinlock_up.h>

Architecture support for rwlocks must be available whether or not
CONFIG_DEBUG_SPINLOCK has been defined. Move the definitions of the
arch_{read,write}_{lock,trylock,unlock}() macros such that these become
visbile if CONFIG_DEBUG_SPINLOCK=n.

This patch prepares for converting do_raw_{read,write}_trylock() into
inline functions. Without this patch that conversion triggers a build
failure for UP architectures, e.g. arm-ep93xx. I used the following
kernel configuration to build the kernel for that architecture:

	CONFIG_ARCH_MULTIPLATFORM=y
	CONFIG_ARCH_MULTI_V7=n
	CONFIG_ATAGS=y
	CONFIG_MMU=y
	CONFIG_ARCH_MULTI_V4T=y
	CONFIG_CPU_LITTLE_ENDIAN=y
	CONFIG_ARCH_EP93XX=y

Fixes: fb1c8f93d869 ("[PATCH] spinlock consolidation")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260313171510.230998-2-bvanassche@acm.org
---
 include/linux/spinlock_up.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 1e84e71ca495..3a50976471d7 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -48,16 +48,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 	lock->slock = 1;
 }
 
-/*
- * Read-write spinlocks. No debug version.
- */
-#define arch_read_lock(lock)		do { barrier(); (void)(lock); } while (0)
-#define arch_write_lock(lock)		do { barrier(); (void)(lock); } while (0)
-#define arch_read_trylock(lock)	({ barrier(); (void)(lock); 1; })
-#define arch_write_trylock(lock)	({ barrier(); (void)(lock); 1; })
-#define arch_read_unlock(lock)		do { barrier(); (void)(lock); } while (0)
-#define arch_write_unlock(lock)	do { barrier(); (void)(lock); } while (0)
-
 #else /* DEBUG_SPINLOCK */
 #define arch_spin_is_locked(lock)	((void)(lock), 0)
 /* for sched/core.c and kernel_lock.c: */
@@ -68,4 +58,14 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
 #define arch_spin_is_contended(lock)	(((void)(lock), 0))
 
+/*
+ * Read-write spinlocks. No debug version.
+ */
+#define arch_read_lock(lock)		do { barrier(); (void)(lock); } while (0)
+#define arch_write_lock(lock)		do { barrier(); (void)(lock); } while (0)
+#define arch_read_trylock(lock)	({ barrier(); (void)(lock); 1; })
+#define arch_write_trylock(lock)	({ barrier(); (void)(lock); 1; })
+#define arch_read_unlock(lock)		do { barrier(); (void)(lock); } while (0)
+#define arch_write_unlock(lock)	do { barrier(); (void)(lock); } while (0)
+
 #endif /* __LINUX_SPINLOCK_UP_H */
-- 
cgit v1.2.3


From c4d3b8c77d85082d32250c505beb1d9e46ee47ee Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 13 Mar 2026 10:15:08 -0700
Subject: locking: Add lock context support in do_raw_{read,write}_trylock()

Convert do_raw_{read,write}_trylock() from macros into inline functions
and annotate these inline functions with __cond_acquires_shared() or
__cond_acquires() as appropriate. This change is necessary to build
kernel drivers or subsystems that use rwlock synchronization objects with
lock context analysis enabled. The return type 'int' matches the return
type for CONFIG_DEBUG_SPINLOCK=y.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260313171510.230998-3-bvanassche@acm.org
---
 include/linux/rwlock.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index 21ceefc4a49f..4e67cd934d8f 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -37,10 +37,20 @@ extern int do_raw_write_trylock(rwlock_t *lock) __cond_acquires(true, lock);
  extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock);
 #else
 # define do_raw_read_lock(rwlock)	do {__acquire_shared(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0)
-# define do_raw_read_trylock(rwlock)	arch_read_trylock(&(rwlock)->raw_lock)
+static inline int do_raw_read_trylock(rwlock_t *rwlock)
+	__cond_acquires_shared(true, rwlock)
+	__no_context_analysis
+{
+	return arch_read_trylock(&(rwlock)->raw_lock);
+}
 # define do_raw_read_unlock(rwlock)	do {arch_read_unlock(&(rwlock)->raw_lock); __release_shared(lock); } while (0)
 # define do_raw_write_lock(rwlock)	do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0)
-# define do_raw_write_trylock(rwlock)	arch_write_trylock(&(rwlock)->raw_lock)
+static inline int do_raw_write_trylock(rwlock_t *rwlock)
+	__cond_acquires(true, rwlock)
+	__no_context_analysis
+{
+	return arch_write_trylock(&(rwlock)->raw_lock);
+}
 # define do_raw_write_unlock(rwlock)	do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
 #endif
 
-- 
cgit v1.2.3


From cb15d8e6cbe8d085ac585016deb2e1e0107b99e5 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linusw@kernel.org>
Date: Sat, 14 Mar 2026 23:56:49 +0100
Subject: ASoC: codec: arizona: Convert to use GPIO descriptors

This converts the Arizona driver to use GPIO descriptors
exclusively, deletes the legacy code path an updates the
in-tree user of legacy GPIO.

The GPIO lines for mic detect polarity and headphone ID
detection are made exclusively descriptor-oriented. The
headphone ID detection could actually only be used by
the legacy GPIO code, but I converted it to use a
descriptor if someone would actually need it so we don't
just drop useful code.

The compatible "wlf,hpdet-id-gpio" is not in the device
tree bindings and only intended to be used by software
nodes if any. If someone insists I can try to add a
binding for it, but I doubt there is any real user so
it seems pointless.

Signed-off-by: Linus Walleij <linusw@kernel.org>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Link: https://patch.msgid.link/20260314-asoc-arizona-v1-1-ecc9a165307c@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 arch/arm/mach-s3c/mach-crag6410-module.c |  6 +-
 include/linux/mfd/arizona/pdata.h        | 10 ----
 sound/soc/codecs/arizona-jack.c          | 95 ++++++++++----------------------
 sound/soc/codecs/arizona.h               |  1 +
 4 files changed, 34 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-s3c/mach-crag6410-module.c b/arch/arm/mach-s3c/mach-crag6410-module.c
index 4ffcf024b09d..14b0f9cc103e 100644
--- a/arch/arm/mach-s3c/mach-crag6410-module.c
+++ b/arch/arm/mach-s3c/mach-crag6410-module.c
@@ -239,7 +239,6 @@ static struct gpiod_lookup_table wm8994_gpiod_table = {
 static struct arizona_pdata wm5102_reva_pdata = {
 	.gpio_base = CODEC_GPIO_BASE,
 	.irq_flags = IRQF_TRIGGER_HIGH,
-	.micd_pol_gpio = CODEC_GPIO_BASE + 4,
 	.micd_rate = 6,
 	.gpio_defaults = {
 		[2] = 0x10000, /* AIF3TXLRCLK */
@@ -265,6 +264,8 @@ static struct gpiod_lookup_table wm5102_reva_gpiod_table = {
 	.table = {
 		GPIO_LOOKUP("GPION", 7,
 			    "wlf,ldoena", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("arizona", 4,
+			    "wlf,micd-pol", GPIO_ACTIVE_HIGH),
 		{ },
 	},
 };
@@ -272,7 +273,6 @@ static struct gpiod_lookup_table wm5102_reva_gpiod_table = {
 static struct arizona_pdata wm5102_pdata = {
 	.gpio_base = CODEC_GPIO_BASE,
 	.irq_flags = IRQF_TRIGGER_HIGH,
-	.micd_pol_gpio = CODEC_GPIO_BASE + 2,
 	.gpio_defaults = {
 		[2] = 0x10000, /* AIF3TXLRCLK */
 		[3] = 0x4,     /* OPCLK */
@@ -297,6 +297,8 @@ static struct gpiod_lookup_table wm5102_gpiod_table = {
 	.table = {
 		GPIO_LOOKUP("GPION", 7,
 			    "wlf,ldo1ena", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("arizona", 2,
+			    "wlf,micd-pol", GPIO_ACTIVE_HIGH),
 		{ },
 	},
 };
diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index f72e6d4b14a7..d465dcd8c90a 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -117,11 +117,6 @@ struct arizona_pdata {
 	/** Check for line output with HPDET method */
 	bool hpdet_acc_id_line;
 
-#ifdef CONFIG_GPIOLIB_LEGACY
-	/** GPIO used for mic isolation with HPDET */
-	int hpdet_id_gpio;
-#endif
-
 	/** Channel to use for headphone detection */
 	unsigned int hpdet_channel;
 
@@ -131,11 +126,6 @@ struct arizona_pdata {
 	/** Extra debounce timeout used during initial mic detection (ms) */
 	unsigned int micd_detect_debounce;
 
-#ifdef CONFIG_GPIOLIB_LEGACY
-	/** GPIO for mic detection polarity */
-	int micd_pol_gpio;
-#endif
-
 	/** Mic detect ramp rate */
 	unsigned int micd_bias_start_time;
 
diff --git a/sound/soc/codecs/arizona-jack.c b/sound/soc/codecs/arizona-jack.c
index 303c1d44ebd8..a9063bac2752 100644
--- a/sound/soc/codecs/arizona-jack.c
+++ b/sound/soc/codecs/arizona-jack.c
@@ -11,7 +11,6 @@
 #include <linux/interrupt.h>
 #include <linux/err.h>
 #include <linux/gpio/consumer.h>
-#include <linux/gpio.h>
 #include <linux/input.h>
 #include <linux/pm_runtime.h>
 #include <linux/property.h>
@@ -459,11 +458,6 @@ static int arizona_hpdet_do_id(struct arizona_priv *info, int *reading,
 			       bool *mic)
 {
 	struct arizona *arizona = info->arizona;
-#ifdef CONFIG_GPIOLIB_LEGACY
-	int id_gpio = arizona->pdata.hpdet_id_gpio;
-#else
-	int id_gpio = 0;
-#endif
 
 	if (!arizona->pdata.hpdet_acc_id)
 		return 0;
@@ -474,9 +468,8 @@ static int arizona_hpdet_do_id(struct arizona_priv *info, int *reading,
 	 */
 	info->hpdet_res[info->num_hpdet_res++] = *reading;
 
-#ifdef CONFIG_GPIOLIB_LEGACY
 	/* Only check the mic directly if we didn't already ID it */
-	if (id_gpio && info->num_hpdet_res == 1) {
+	if (info->hpdet_id_gpio && info->num_hpdet_res == 1) {
 		dev_dbg(arizona->dev, "Measuring mic\n");
 
 		regmap_update_bits(arizona->regmap,
@@ -486,13 +479,12 @@ static int arizona_hpdet_do_id(struct arizona_priv *info, int *reading,
 				   ARIZONA_ACCDET_MODE_HPR |
 				   info->micd_modes[0].src);
 
-		gpio_set_value_cansleep(id_gpio, 1);
+		gpiod_set_value_cansleep(info->hpdet_id_gpio, 1);
 
 		regmap_update_bits(arizona->regmap, ARIZONA_HEADPHONE_DETECT_1,
 				   ARIZONA_HP_POLL, ARIZONA_HP_POLL);
 		return -EAGAIN;
 	}
-#endif
 
 	/* OK, got both.  Now, compare... */
 	dev_dbg(arizona->dev, "HPDET measured %d %d\n",
@@ -514,7 +506,7 @@ static int arizona_hpdet_do_id(struct arizona_priv *info, int *reading,
 	/*
 	 * If we measure the mic as high impedance
 	 */
-	if (!id_gpio || info->hpdet_res[1] > 50) {
+	if (!info->hpdet_id_gpio || info->hpdet_res[1] > 50) {
 		dev_dbg(arizona->dev, "Detected mic\n");
 		*mic = true;
 		info->detecting = true;
@@ -533,9 +525,6 @@ static irqreturn_t arizona_hpdet_irq(int irq, void *data)
 {
 	struct arizona_priv *info = data;
 	struct arizona *arizona = info->arizona;
-#ifdef CONFIG_GPIOLIB_LEGACY
-	int id_gpio = arizona->pdata.hpdet_id_gpio;
-#endif
 	int ret, reading, state, report;
 	bool mic = false;
 
@@ -591,10 +580,8 @@ done:
 
 	arizona_extcon_hp_clamp(info, false);
 
-#ifdef CONFIG_GPIOLIB_LEGACY
-	if (id_gpio)
-		gpio_set_value_cansleep(id_gpio, 0);
-#endif
+	if (info->hpdet_id_gpio)
+		gpiod_set_value_cansleep(info->hpdet_id_gpio, 0);
 
 	/* If we have a mic then reenable MICDET */
 	if (state && (mic || info->mic))
@@ -1325,58 +1312,33 @@ int arizona_jack_codec_dev_probe(struct arizona_priv *info, struct device *dev)
 		regmap_update_bits(arizona->regmap, ARIZONA_GP_SWITCH_1,
 				ARIZONA_SW1_MODE_MASK, arizona->pdata.gpsw);
 
-#ifdef CONFIG_GPIOLIB_LEGACY
-	if (pdata->micd_pol_gpio > 0) {
-		if (info->micd_modes[0].gpio)
-			mode = GPIOF_OUT_INIT_HIGH;
-		else
-			mode = GPIOF_OUT_INIT_LOW;
-
-		ret = devm_gpio_request_one(dev, pdata->micd_pol_gpio,
-					    mode, "MICD polarity");
-		if (ret != 0) {
-			dev_err(arizona->dev, "Failed to request GPIO%d: %d\n",
-				pdata->micd_pol_gpio, ret);
-			return ret;
-		}
-
-		info->micd_pol_gpio = gpio_to_desc(pdata->micd_pol_gpio);
-	} else
-#endif
-	{
-		if (info->micd_modes[0].gpio)
-			mode = GPIOD_OUT_HIGH;
-		else
-			mode = GPIOD_OUT_LOW;
+	if (info->micd_modes[0].gpio)
+		mode = GPIOD_OUT_HIGH;
+	else
+		mode = GPIOD_OUT_LOW;
 
-		/* We can't use devm here because we need to do the get
-		 * against the MFD device, as that is where the of_node
-		 * will reside, but if we devm against that the GPIO
-		 * will not be freed if the extcon driver is unloaded.
-		 */
-		info->micd_pol_gpio = gpiod_get_optional(arizona->dev,
-							 "wlf,micd-pol",
-							 mode);
-		if (IS_ERR(info->micd_pol_gpio)) {
-			ret = PTR_ERR(info->micd_pol_gpio);
-			dev_err_probe(arizona->dev, ret, "getting microphone polarity GPIO\n");
-			return ret;
-		}
+	/* We can't use devm here because we need to do the get
+	 * against the MFD device, as that is where the of_node
+	 * will reside, but if we devm against that the GPIO
+	 * will not be freed if the extcon driver is unloaded.
+	 */
+	info->micd_pol_gpio = gpiod_get_optional(arizona->dev,
+						 "wlf,micd-pol",
+						 mode);
+	if (IS_ERR(info->micd_pol_gpio)) {
+		ret = PTR_ERR(info->micd_pol_gpio);
+		dev_err_probe(arizona->dev, ret, "getting microphone polarity GPIO\n");
+		return ret;
 	}
 
-#ifdef CONFIG_GPIOLIB_LEGACY
-	if (arizona->pdata.hpdet_id_gpio > 0) {
-		ret = devm_gpio_request_one(dev, arizona->pdata.hpdet_id_gpio,
-					    GPIOF_OUT_INIT_LOW,
-					    "HPDET");
-		if (ret != 0) {
-			dev_err(arizona->dev, "Failed to request GPIO%d: %d\n",
-				arizona->pdata.hpdet_id_gpio, ret);
-			gpiod_put(info->micd_pol_gpio);
-			return ret;
-		}
+	info->hpdet_id_gpio = gpiod_get_optional(arizona->dev,
+						 "wlf,hpdet-id-gpio",
+						 mode);
+	if (IS_ERR(info->hpdet_id_gpio)) {
+		ret = PTR_ERR(info->hpdet_id_gpio);
+		dev_err_probe(arizona->dev, ret, "getting headphone detect ID GPIO\n");
+		return ret;
 	}
-#endif
 
 	return 0;
 }
@@ -1385,6 +1347,7 @@ EXPORT_SYMBOL_GPL(arizona_jack_codec_dev_probe);
 int arizona_jack_codec_dev_remove(struct arizona_priv *info)
 {
 	gpiod_put(info->micd_pol_gpio);
+	gpiod_put(info->hpdet_id_gpio);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(arizona_jack_codec_dev_remove);
diff --git a/sound/soc/codecs/arizona.h b/sound/soc/codecs/arizona.h
index ecd8890eefc1..0703182d87b3 100644
--- a/sound/soc/codecs/arizona.h
+++ b/sound/soc/codecs/arizona.h
@@ -100,6 +100,7 @@ struct arizona_priv {
 	struct snd_soc_jack *jack;
 	struct regulator *micvdd;
 	struct gpio_desc *micd_pol_gpio;
+	struct gpio_desc *hpdet_id_gpio;
 
 	u16 last_jackdet;
 
-- 
cgit v1.2.3


From f8e761655997cc0ee434fb5f35570d2e93d3a707 Mon Sep 17 00:00:00 2001
From: Alexei Lazar <alazar@nvidia.com>
Date: Mon, 9 Mar 2026 11:34:27 +0200
Subject: net/mlx5: Add IFC bits for shared headroom pool PBMC support

Add hardware interface definitions for shared headroom pool (SHP) in
port buffer management:

- shp_pbmc_pbsr_support: capability bit in PCAM enhanced features
  indicating device support for shared headroom pool in PBMC/PBSR.
- shared_headroom_pool: buffer entry in PBMC register (pbmc_reg_bits)
  for the shared headroom pool configuration, reusing the bufferx
  layout; reduce trailing reserved region accordingly.

Signed-off-by: Alexei Lazar <alazar@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260309093435.1850724-2-tariqt@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index a3948b36820d..a76c54bf1927 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10845,7 +10845,9 @@ struct mlx5_ifc_pcam_enhanced_features_bits {
 	u8         fec_200G_per_lane_in_pplm[0x1];
 	u8         reserved_at_1e[0x2a];
 	u8         fec_100G_per_lane_in_pplm[0x1];
-	u8         reserved_at_49[0xa];
+	u8         reserved_at_49[0x2];
+	u8         shp_pbmc_pbsr_support[0x1];
+	u8         reserved_at_4c[0x7];
 	u8	   buffer_ownership[0x1];
 	u8	   resereved_at_54[0x14];
 	u8         fec_50G_per_lane_in_pplm[0x1];
@@ -12090,8 +12092,9 @@ struct mlx5_ifc_pbmc_reg_bits {
 	u8         port_buffer_size[0x10];
 
 	struct mlx5_ifc_bufferx_reg_bits buffer[10];
+	struct mlx5_ifc_bufferx_reg_bits shared_headroom_pool;
 
-	u8         reserved_at_2e0[0x80];
+	u8         reserved_at_320[0x40];
 };
 
 struct mlx5_ifc_sbpr_reg_bits {
-- 
cgit v1.2.3


From 691dffc7255e740bc3df1c68b50b36786aadeb3a Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Mon, 9 Mar 2026 11:34:28 +0200
Subject: net/mlx5: Add silent mode set/query and VHCA RX IFC bits

Update the mlx5 IFC headers with newly defined capability and
command-layout bits:

- Add silent_mode_query and rename silent_mode to silent_mode_set cap
  fields.
- Add forward_vhca_rx and MLX5_IFC_FLOW_DESTINATION_TYPE_VHCA_RX.
- Expose silent mode fields in the L2 table query command structures.

Update the SD support check to use the new capability name
(silent_mode_set) to match the updated IFC definition.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260309093435.1850724-3-tariqt@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c |  2 +-
 include/linux/mlx5/mlx5_ifc.h                    | 19 ++++++++++++++-----
 3 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index c348ee62cd3a..16b28028609d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -1183,7 +1183,7 @@ int mlx5_fs_cmd_set_l2table_entry_silent(struct mlx5_core_dev *dev, u8 silent_mo
 {
 	u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {};
 
-	if (silent_mode && !MLX5_CAP_GEN(dev, silent_mode))
+	if (silent_mode && !MLX5_CAP_GEN(dev, silent_mode_set))
 		return -EOPNOTSUPP;
 
 	MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index 954942ad93c5..762c783156b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -107,7 +107,7 @@ static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses)
 	/* Disconnect secondaries from the network */
 	if (!MLX5_CAP_GEN(dev, eswitch_manager))
 		return false;
-	if (!MLX5_CAP_GEN(dev, silent_mode))
+	if (!MLX5_CAP_GEN(dev, silent_mode_set))
 		return false;
 
 	/* RX steering from primary to secondaries */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index a76c54bf1927..8fa4fb3d36cf 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -469,7 +469,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8	   table_miss_action_domain[0x1];
 	u8         termination_table[0x1];
 	u8         reformat_and_fwd_to_table[0x1];
-	u8         reserved_at_1a[0x2];
+	u8         forward_vhca_rx[0x1];
+	u8         reserved_at_1b[0x1];
 	u8         ipsec_encrypt[0x1];
 	u8         ipsec_decrypt[0x1];
 	u8         sw_owner_v2[0x1];
@@ -2012,12 +2013,14 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         disable_local_lb_mc[0x1];
 	u8         log_min_hairpin_wq_data_sz[0x5];
 	u8         reserved_at_3e8[0x1];
-	u8         silent_mode[0x1];
+	u8         silent_mode_set[0x1];
 	u8         vhca_state[0x1];
 	u8         log_max_vlan_list[0x5];
 	u8         reserved_at_3f0[0x3];
 	u8         log_max_current_mc_list[0x5];
-	u8         reserved_at_3f8[0x3];
+	u8         reserved_at_3f8[0x1];
+	u8         silent_mode_query[0x1];
+	u8         reserved_at_3fa[0x1];
 	u8         log_max_current_uc_list[0x5];
 
 	u8         general_obj_types[0x40];
@@ -2279,6 +2282,7 @@ enum mlx5_ifc_flow_destination_type {
 	MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT        = 0x0,
 	MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE   = 0x1,
 	MLX5_IFC_FLOW_DESTINATION_TYPE_TIR          = 0x2,
+	MLX5_IFC_FLOW_DESTINATION_TYPE_VHCA_RX	    = 0x4,
 	MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER = 0x6,
 	MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK       = 0x8,
 	MLX5_IFC_FLOW_DESTINATION_TYPE_TABLE_TYPE   = 0xA,
@@ -6265,7 +6269,9 @@ struct mlx5_ifc_query_l2_table_entry_out_bits {
 
 	u8         reserved_at_40[0xa0];
 
-	u8         reserved_at_e0[0x13];
+	u8         reserved_at_e0[0x11];
+	u8         silent_mode[0x1];
+	u8         reserved_at_f2[0x1];
 	u8         vlan_valid[0x1];
 	u8         vlan[0xc];
 
@@ -6281,7 +6287,10 @@ struct mlx5_ifc_query_l2_table_entry_in_bits {
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_at_40[0x60];
+	u8         reserved_at_40[0x40];
+
+	u8         silent_mode_query[0x1];
+	u8         reserved_at_81[0x1f];
 
 	u8         reserved_at_a0[0x8];
 	u8         table_index[0x18];
-- 
cgit v1.2.3


From 971b28accc09436fe6a6d5afd667dcbfb3ed7e03 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Mon, 9 Mar 2026 11:34:32 +0200
Subject: net/mlx5: LAG, replace mlx5_get_dev_index with LAG sequence number

Introduce mlx5_lag_get_dev_seq() which returns a device's sequence
number within the LAG: master is always 0, remaining devices numbered
sequentially. This provides a stable index for peer flow tracking and
vport ordering without depending on native_port_num.

Replace mlx5_get_dev_index() usage in en_tc.c (peer flow array
indexing) and ib_rep.c (vport index ordering) with the new API.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260309093435.1850724-7-tariqt@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/mlx5/ib_rep.c               |  4 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   |  9 +++---
 drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 34 +++++++++++++++++++++++
 include/linux/mlx5/lag.h                          | 11 ++++++++
 4 files changed, 53 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/mlx5/lag.h

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 621834d75205..df8f049c5806 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
  */
 
+#include <linux/mlx5/lag.h>
 #include <linux/mlx5/vport.h>
 #include "ib_rep.h"
 #include "srq.h"
@@ -134,7 +135,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 				/* Only 1 ib port is the representor for all uplinks */
 					peer_n_ports--;
 
-				if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
+				if (mlx5_lag_get_dev_seq(peer_dev) <
+				    mlx5_lag_get_dev_seq(dev))
 					vport_index += peer_n_ports;
 			}
 		}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 1434b65d4746..397a93584fd6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -35,6 +35,7 @@
 #include <net/sch_generic.h>
 #include <net/pkt_cls.h>
 #include <linux/mlx5/fs.h>
+#include <linux/mlx5/lag.h>
 #include <linux/mlx5/device.h>
 #include <linux/rhashtable.h>
 #include <linux/refcount.h>
@@ -2131,7 +2132,7 @@ static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow,
 	mutex_unlock(&esw->offloads.peer_mutex);
 
 	list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) {
-		if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev))
+		if (peer_index != mlx5_lag_get_dev_seq(peer_flow->priv->mdev))
 			continue;
 
 		list_del(&peer_flow->peer_flows);
@@ -2154,7 +2155,7 @@ static void mlx5e_tc_del_fdb_peers_flow(struct mlx5e_tc_flow *flow)
 
 	devcom = flow->priv->mdev->priv.eswitch->devcom;
 	mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) {
-		i = mlx5_get_dev_index(peer_esw->dev);
+		i = mlx5_lag_get_dev_seq(peer_esw->dev);
 		mlx5e_tc_del_fdb_peer_flow(flow, i);
 	}
 }
@@ -4584,7 +4585,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
-	int i = mlx5_get_dev_index(peer_esw->dev);
+	int i = mlx5_lag_get_dev_seq(peer_esw->dev);
 	struct mlx5e_rep_priv *peer_urpriv;
 	struct mlx5e_tc_flow *peer_flow;
 	struct mlx5_core_dev *in_mdev;
@@ -5525,7 +5526,7 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
 	devcom = esw->devcom;
 
 	mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) {
-		i = mlx5_get_dev_index(peer_esw->dev);
+		i = mlx5_lag_get_dev_seq(peer_esw->dev);
 		list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows[i], peer[i])
 			mlx5e_tc_del_fdb_peers_flow(flow);
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 4beee64c937a..51ec8f61ecbb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -35,6 +35,7 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/eswitch.h>
 #include <linux/mlx5/vport.h>
+#include <linux/mlx5/lag.h>
 #include "lib/mlx5.h"
 #include "lib/devcom.h"
 #include "mlx5_core.h"
@@ -369,6 +370,39 @@ int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
 	return -ENOENT;
 }
 
+/* Reverse of mlx5_lag_get_dev_index_by_seq: given a device, return its
+ * sequence number in the LAG. Master is always 0, others numbered
+ * sequentially starting from 1.
+ */
+int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev)
+{
+	struct mlx5_lag *ldev = mlx5_lag_dev(dev);
+	int master_idx, i, num = 1;
+	struct lag_func *pf;
+
+	if (!ldev)
+		return -ENOENT;
+
+	master_idx = mlx5_lag_get_master_idx(ldev);
+	if (master_idx < 0)
+		return -ENOENT;
+
+	pf = mlx5_lag_pf(ldev, master_idx);
+	if (pf && pf->dev == dev)
+		return 0;
+
+	mlx5_ldev_for_each(i, 0, ldev) {
+		if (i == master_idx)
+			continue;
+		pf = mlx5_lag_pf(ldev, i);
+		if (pf->dev == dev)
+			return num;
+		num++;
+	}
+	return -ENOENT;
+}
+EXPORT_SYMBOL(mlx5_lag_get_dev_seq);
+
 /* Devcom events for LAG master marking */
 #define LAG_DEVCOM_PAIR		(0)
 #define LAG_DEVCOM_UNPAIR	(1)
diff --git a/include/linux/mlx5/lag.h b/include/linux/mlx5/lag.h
new file mode 100644
index 000000000000..d370dfd19055
--- /dev/null
+++ b/include/linux/mlx5/lag.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_LAG_API_H__
+#define __MLX5_LAG_API_H__
+
+struct mlx5_core_dev;
+
+int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev);
+
+#endif /* __MLX5_LAG_API_H__ */
-- 
cgit v1.2.3


From 0bc9059fab6365feaf95cc9a796a3d381915a70f Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Mon, 9 Mar 2026 11:34:33 +0200
Subject: net/mlx5: Add VHCA RX flow destination support for FW steering

Introduce MLX5_FLOW_DESTINATION_TYPE_VHCA_RX as a new flow steering
destination type.

Wire the new destination through flow steering command setup by mapping
it to MLX5_IFC_FLOW_DESTINATION_TYPE_VHCA_RX and passing the vhca id,
extend forward-destination validation to accept it, and teach the flow
steering tracepoint formatter to print rx_vhca_id.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260309093435.1850724-8-tariqt@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c | 3 +++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c             | 4 ++++
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c            | 7 +++++--
 include/linux/mlx5/fs.h                                      | 4 ++++
 4 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index 6d73127b7217..2cf1d3825def 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -282,6 +282,9 @@ const char *parse_fs_dst(struct trace_seq *p,
 	case MLX5_FLOW_DESTINATION_TYPE_NONE:
 		trace_seq_printf(p, "none\n");
 		break;
+	case MLX5_FLOW_DESTINATION_TYPE_VHCA_RX:
+		trace_seq_printf(p, "rx_vhca_id=%u\n", dst->vhca.id);
+		break;
 	}
 
 	trace_seq_putc(p, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 16b28028609d..1cd4cd898ec2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -716,6 +716,10 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 				id = dst->dest_attr.ft->id;
 				ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TABLE_TYPE;
 				break;
+			case MLX5_FLOW_DESTINATION_TYPE_VHCA_RX:
+				id = dst->dest_attr.vhca.id;
+				ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VHCA_RX;
+				break;
 			default:
 				id = dst->dest_attr.tir_num;
 				ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 2c3544880a30..003d211306a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -503,7 +503,8 @@ static bool is_fwd_dest_type(enum mlx5_flow_destination_type type)
 		type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER ||
 		type == MLX5_FLOW_DESTINATION_TYPE_TIR ||
 		type == MLX5_FLOW_DESTINATION_TYPE_RANGE ||
-		type == MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE;
+		type == MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE ||
+		type == MLX5_FLOW_DESTINATION_TYPE_VHCA_RX;
 }
 
 static bool check_valid_spec(const struct mlx5_flow_spec *spec)
@@ -1890,7 +1891,9 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
 		     d1->range.hit_ft == d2->range.hit_ft &&
 		     d1->range.miss_ft == d2->range.miss_ft &&
 		     d1->range.min == d2->range.min &&
-		     d1->range.max == d2->range.max))
+		     d1->range.max == d2->range.max) ||
+		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_VHCA_RX &&
+		     d1->vhca.id == d2->vhca.id))
 			return true;
 	}
 
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 9cadb1d5e6df..02064424e868 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -55,6 +55,7 @@ enum mlx5_flow_destination_type {
 	MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM,
 	MLX5_FLOW_DESTINATION_TYPE_RANGE,
 	MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE,
+	MLX5_FLOW_DESTINATION_TYPE_VHCA_RX,
 };
 
 enum {
@@ -189,6 +190,9 @@ struct mlx5_flow_destination {
 		u32			ft_num;
 		struct mlx5_flow_table	*ft;
 		struct mlx5_fc          *counter;
+		struct {
+			u16		id;
+		} vhca;
 		struct {
 			u16		num;
 			u16		vhca_id;
-- 
cgit v1.2.3


From d6c9b4de8109a3b4ca9c6c6b7c5fbc42cfeff9ae Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Mon, 9 Mar 2026 11:34:34 +0200
Subject: {net/RDMA}/mlx5: Add LAG demux table API and vport demux rules

Downstream patches will introduce SW-only LAG (e.g. shared_fdb without
HW LAG). In this mode the firmware cannot create the LAG demux table,
but vport demuxing is still required.

Move LAG demux flow-table ownership to the LAG layer and introduce APIs
to init/cleanup the demux table and add/delete per-vport rules. Adjust
the RDMA driver to use the new APIs.

In this mode, the LAG layer will create a flow group that matches vport
metadata. Vports that are not native to the LAG master eswitch add the
demux rule during IB representor load and remove it on unload.
The demux rule forward traffic from said vports to their native eswitch
manager via a new dest type - MLX5_FLOW_DESTINATION_TYPE_VHCA_RX.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260309093435.1850724-9-tariqt@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/mlx5/ib_rep.c                |  20 ++-
 drivers/infiniband/hw/mlx5/main.c                  |  21 +--
 drivers/infiniband/hw/mlx5/mlx5_ib.h               |   1 -
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  12 ++
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  83 ++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  10 +-
 drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c  | 152 +++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h  |  12 ++
 include/linux/mlx5/fs.h                            |   6 +-
 include/linux/mlx5/lag.h                           |  10 ++
 10 files changed, 300 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index df8f049c5806..1709b628702e 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -10,11 +10,13 @@
 
 static int
 mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
+		      struct mlx5_core_dev *rep_dev,
 		      struct mlx5_eswitch_rep *rep,
 		      int vport_index)
 {
 	struct mlx5_ib_dev *ibdev;
 	struct net_device *ndev;
+	int ret;
 
 	ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
 	if (!ibdev)
@@ -24,7 +26,17 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
 	rep->rep_data[REP_IB].priv = ibdev;
 	ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
 
-	return ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1);
+	ret = ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1);
+	if (ret)
+		return ret;
+
+	/* Only Vports that are not native to the LAG master eswitch need to add
+	 * demux rule.
+	 */
+	if (mlx5_eswitch_get_total_vports(dev) > vport_index)
+		return 0;
+
+	return mlx5_lag_demux_rule_add(rep_dev, rep->vport, vport_index);
 }
 
 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
@@ -131,7 +143,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 
 				if (mlx5_lag_is_master(peer_dev))
 					lag_master = peer_dev;
-				else if (!mlx5_lag_is_mpesw(dev))
+				else if (!mlx5_lag_is_mpesw(peer_dev))
 				/* Only 1 ib port is the representor for all uplinks */
 					peer_n_ports--;
 
@@ -145,7 +157,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
 		profile = &raw_eth_profile;
 	else
-		return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
+		return mlx5_ib_set_vport_rep(lag_master, dev, rep, vport_index);
 
 	if (mlx5_lag_is_shared_fdb(dev)) {
 		ret = mlx5_ib_take_transport(lag_master);
@@ -233,6 +245,8 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 		vport_index = i;
 	}
 
+	mlx5_lag_demux_rule_del(mdev, vport_index);
+
 	port = &dev->port[vport_index];
 
 	ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 635002e684a5..9fb0629978bd 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -26,6 +26,7 @@
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/eswitch.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/lag.h>
 #include <linux/list.h>
 #include <rdma/ib_smi.h>
 #include <rdma/ib_umem_odp.h>
@@ -3678,12 +3679,12 @@ static void mlx5e_lag_event_unregister(struct mlx5_ib_dev *dev)
 
 static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
 {
+	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_core_dev *mdev = dev->mdev;
-	struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev,
-								 MLX5_FLOW_NAMESPACE_LAG);
-	struct mlx5_flow_table *ft;
+	struct mlx5_flow_namespace *ns;
 	int err;
 
+	ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_LAG);
 	if (!ns || !mlx5_lag_is_active(mdev))
 		return 0;
 
@@ -3691,14 +3692,15 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
 	if (err)
 		return err;
 
-	ft = mlx5_create_lag_demux_flow_table(ns, 0, 0);
-	if (IS_ERR(ft)) {
-		err = PTR_ERR(ft);
+	ft_attr.level = 0;
+	ft_attr.prio = 0;
+	ft_attr.max_fte = dev->num_ports;
+
+	err = mlx5_lag_demux_init(mdev, &ft_attr);
+	if (err)
 		goto err_destroy_vport_lag;
-	}
 
 	mlx5e_lag_event_register(dev);
-	dev->flow_db->lag_demux_ft = ft;
 	dev->lag_ports = mlx5_lag_get_num_ports(mdev);
 	dev->lag_active = true;
 	return 0;
@@ -3716,8 +3718,7 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
 		dev->lag_active = false;
 
 		mlx5e_lag_event_unregister(dev);
-		mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
-		dev->flow_db->lag_demux_ft = NULL;
+		mlx5_lag_demux_cleanup(mdev);
 
 		mlx5_cmd_destroy_vport_lag(mdev);
 	}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 4f4114d95130..3fc31415e107 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -306,7 +306,6 @@ struct mlx5_ib_flow_db {
 	struct mlx5_ib_flow_prio	rdma_rx[MLX5_IB_NUM_FLOW_FT];
 	struct mlx5_ib_flow_prio	rdma_tx[MLX5_IB_NUM_FLOW_FT];
 	struct mlx5_ib_flow_prio	opfcs[MLX5_IB_OPCOUNTER_MAX];
-	struct mlx5_flow_table		*lag_demux_ft;
 	struct mlx5_ib_flow_prio        *rdma_transport_rx[MLX5_RDMA_TRANSPORT_BYPASS_PRIO];
 	struct mlx5_ib_flow_prio        *rdma_transport_tx[MLX5_RDMA_TRANSPORT_BYPASS_PRIO];
 	/* Protect flow steering bypass flow tables
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 96309a732d50..9b729789537f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -940,6 +940,12 @@ int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev,
 					       u16 vport_num);
 bool mlx5_esw_host_functions_enabled(const struct mlx5_core_dev *dev);
 void mlx5_eswitch_safe_aux_devs_remove(struct mlx5_core_dev *dev);
+struct mlx5_flow_group *
+mlx5_esw_lag_demux_fg_create(struct mlx5_eswitch *esw,
+			     struct mlx5_flow_table *ft);
+struct mlx5_flow_handle *
+mlx5_esw_lag_demux_rule_create(struct mlx5_eswitch *esw, u16 vport_num,
+			       struct mlx5_flow_table *lag_ft);
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -1025,6 +1031,12 @@ mlx5_esw_vport_vhca_id(struct mlx5_eswitch *esw, u16 vportn, u16 *vhca_id)
 
 static inline void
 mlx5_eswitch_safe_aux_devs_remove(struct mlx5_core_dev *dev) {}
+static inline struct mlx5_flow_handle *
+mlx5_esw_lag_demux_rule_create(struct mlx5_eswitch *esw, u16 vport_num,
+			       struct mlx5_flow_table *lag_ft)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
 
 #endif /* CONFIG_MLX5_ESWITCH */
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 90e6f97bdf4a..f98837470f39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1459,6 +1459,83 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
 	return flow_rule;
 }
 
+struct mlx5_flow_group *
+mlx5_esw_lag_demux_fg_create(struct mlx5_eswitch *esw,
+			     struct mlx5_flow_table *ft)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_group *fg;
+	void *match_criteria;
+	void *flow_group_in;
+
+	if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (IS_ERR(ft))
+		return ERR_CAST(ft);
+
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!flow_group_in)
+		return ERR_PTR(-ENOMEM);
+
+	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+				      match_criteria);
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_MISC_PARAMETERS_2);
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+		 ft->max_fte - 1);
+
+	MLX5_SET(fte_match_param, match_criteria,
+		 misc_parameters_2.metadata_reg_c_0,
+		 mlx5_eswitch_get_vport_metadata_mask());
+
+	fg = mlx5_create_flow_group(ft, flow_group_in);
+	kvfree(flow_group_in);
+	if (IS_ERR(fg))
+		esw_warn(esw->dev, "Can't create LAG demux flow group\n");
+
+	return fg;
+}
+
+struct mlx5_flow_handle *
+mlx5_esw_lag_demux_rule_create(struct mlx5_eswitch *esw, u16 vport_num,
+			       struct mlx5_flow_table *lag_ft)
+{
+	struct mlx5_flow_spec *spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_handle *ret;
+	void *misc;
+
+	if (!spec)
+		return ERR_PTR(-ENOMEM);
+
+	if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		kvfree(spec);
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+			    misc_parameters_2);
+	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+		 mlx5_eswitch_get_vport_metadata_mask());
+	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+			    misc_parameters_2);
+	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+		 mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_VHCA_RX;
+	dest.vhca.id = MLX5_CAP_GEN(esw->dev, vhca_id);
+
+	ret = mlx5_add_flow_rules(lag_ft, spec, &flow_act, &dest, 1);
+	kvfree(spec);
+	return ret;
+}
+
 #define MAX_PF_SQ 256
 #define MAX_SQ_NVPORTS 32
 
@@ -2047,7 +2124,8 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
 
 	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
-		mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err);
+		esw_warn(esw->dev, "Failed to create vport rx group err %d\n",
+			 err);
 		goto out;
 	}
 
@@ -2092,7 +2170,8 @@ static int esw_create_vport_rx_drop_group(struct mlx5_eswitch *esw)
 
 	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
-		mlx5_core_warn(esw->dev, "Failed to create vport rx drop group err %d\n", err);
+		esw_warn(esw->dev,
+			 "Failed to create vport rx drop group err %d\n", err);
 		goto out;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 003d211306a7..61a6ba1e49dd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1438,15 +1438,9 @@ mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
 
 struct mlx5_flow_table*
 mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns,
-				 int prio, u32 level)
+				 struct mlx5_flow_table_attr *ft_attr)
 {
-	struct mlx5_flow_table_attr ft_attr = {};
-
-	ft_attr.level = level;
-	ft_attr.prio  = prio;
-	ft_attr.max_fte = 1;
-
-	return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0);
+	return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0);
 }
 EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 51ec8f61ecbb..449e4bd86c06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -1471,6 +1471,158 @@ struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
 	return devcom;
 }
 
+static int mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev *dev,
+				     struct mlx5_flow_table_attr *ft_attr,
+				     struct mlx5_lag *ldev)
+{
+#ifdef CONFIG_MLX5_ESWITCH
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_group *fg;
+	int err;
+
+	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG);
+	if (!ns)
+		return 0;
+
+	ldev->lag_demux_ft = mlx5_create_flow_table(ns, ft_attr);
+	if (IS_ERR(ldev->lag_demux_ft))
+		return PTR_ERR(ldev->lag_demux_ft);
+
+	fg = mlx5_esw_lag_demux_fg_create(dev->priv.eswitch,
+					  ldev->lag_demux_ft);
+	if (IS_ERR(fg)) {
+		err = PTR_ERR(fg);
+		mlx5_destroy_flow_table(ldev->lag_demux_ft);
+		ldev->lag_demux_ft = NULL;
+		return err;
+	}
+
+	ldev->lag_demux_fg = fg;
+	return 0;
+#else
+	return -EOPNOTSUPP;
+#endif
+}
+
+static int mlx5_lag_demux_fw_init(struct mlx5_core_dev *dev,
+				  struct mlx5_flow_table_attr *ft_attr,
+				  struct mlx5_lag *ldev)
+{
+	struct mlx5_flow_namespace *ns;
+	int err;
+
+	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG);
+	if (!ns)
+		return 0;
+
+	ldev->lag_demux_fg = NULL;
+	ft_attr->max_fte = 1;
+	ldev->lag_demux_ft = mlx5_create_lag_demux_flow_table(ns, ft_attr);
+	if (IS_ERR(ldev->lag_demux_ft)) {
+		err = PTR_ERR(ldev->lag_demux_ft);
+		ldev->lag_demux_ft = NULL;
+		return err;
+	}
+
+	return 0;
+}
+
+int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
+			struct mlx5_flow_table_attr *ft_attr)
+{
+	struct mlx5_lag *ldev;
+
+	if (!ft_attr)
+		return -EINVAL;
+
+	ldev = mlx5_lag_dev(dev);
+	if (!ldev)
+		return -ENODEV;
+
+	xa_init(&ldev->lag_demux_rules);
+
+	if (mlx5_get_sd(dev))
+		return mlx5_lag_demux_ft_fg_init(dev, ft_attr, ldev);
+
+	return mlx5_lag_demux_fw_init(dev, ft_attr, ldev);
+}
+EXPORT_SYMBOL(mlx5_lag_demux_init);
+
+void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev)
+{
+	struct mlx5_flow_handle *rule;
+	struct mlx5_lag *ldev;
+	unsigned long vport_num;
+
+	ldev = mlx5_lag_dev(dev);
+	if (!ldev)
+		return;
+
+	xa_for_each(&ldev->lag_demux_rules, vport_num, rule)
+		mlx5_del_flow_rules(rule);
+	xa_destroy(&ldev->lag_demux_rules);
+
+	if (ldev->lag_demux_fg)
+		mlx5_destroy_flow_group(ldev->lag_demux_fg);
+	if (ldev->lag_demux_ft)
+		mlx5_destroy_flow_table(ldev->lag_demux_ft);
+	ldev->lag_demux_fg = NULL;
+	ldev->lag_demux_ft = NULL;
+}
+EXPORT_SYMBOL(mlx5_lag_demux_cleanup);
+
+int mlx5_lag_demux_rule_add(struct mlx5_core_dev *vport_dev, u16 vport_num,
+			    int index)
+{
+	struct mlx5_flow_handle *rule;
+	struct mlx5_lag *ldev;
+	int err;
+
+	ldev = mlx5_lag_dev(vport_dev);
+	if (!ldev || !ldev->lag_demux_fg)
+		return 0;
+
+	if (xa_load(&ldev->lag_demux_rules, index))
+		return 0;
+
+	rule = mlx5_esw_lag_demux_rule_create(vport_dev->priv.eswitch,
+					      vport_num, ldev->lag_demux_ft);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_warn(vport_dev,
+			       "Failed to create LAG demux rule for vport %u, err %d\n",
+			       vport_num, err);
+		return err;
+	}
+
+	err = xa_err(xa_store(&ldev->lag_demux_rules, index, rule,
+			      GFP_KERNEL));
+	if (err) {
+		mlx5_del_flow_rules(rule);
+		mlx5_core_warn(vport_dev,
+			       "Failed to store LAG demux rule for vport %u, err %d\n",
+			       vport_num, err);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_lag_demux_rule_add);
+
+void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int index)
+{
+	struct mlx5_flow_handle *rule;
+	struct mlx5_lag *ldev;
+
+	ldev = mlx5_lag_dev(dev);
+	if (!ldev || !ldev->lag_demux_fg)
+		return;
+
+	rule = xa_erase(&ldev->lag_demux_rules, index);
+	if (rule)
+		mlx5_del_flow_rules(rule);
+}
+EXPORT_SYMBOL(mlx5_lag_demux_rule_del);
+
 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
 {
 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index 30cbd61768f8..6c911374f409 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -5,6 +5,9 @@
 #define __MLX5_LAG_H__
 
 #include <linux/debugfs.h>
+#include <linux/errno.h>
+#include <linux/xarray.h>
+#include <linux/mlx5/fs.h>
 
 #define MLX5_LAG_MAX_HASH_BUCKETS 16
 /* XArray mark for the LAG master device
@@ -83,6 +86,9 @@ struct mlx5_lag {
 	/* Protect lag fields/state changes */
 	struct mutex		  lock;
 	struct lag_mpesw	  lag_mpesw;
+	struct mlx5_flow_table   *lag_demux_ft;
+	struct mlx5_flow_group   *lag_demux_fg;
+	struct xarray		  lag_demux_rules;
 };
 
 static inline struct mlx5_lag *
@@ -133,6 +139,12 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev)
 
 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev);
 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev);
+int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
+			struct mlx5_flow_table_attr *ft_attr);
+void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev);
+int mlx5_lag_demux_rule_add(struct mlx5_core_dev *dev, u16 vport_num,
+			    int vport_index);
+void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int vport_index);
 void mlx5_modify_lag(struct mlx5_lag *ldev,
 		     struct lag_tracker *tracker);
 int mlx5_activate_lag(struct mlx5_lag *ldev,
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 02064424e868..d8f3b7ef319e 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -252,9 +252,9 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
 struct mlx5_flow_table *
 mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
 			     struct mlx5_flow_table_attr *ft_attr, u16 vport);
-struct mlx5_flow_table *mlx5_create_lag_demux_flow_table(
-					       struct mlx5_flow_namespace *ns,
-					       int prio, u32 level);
+struct mlx5_flow_table *
+mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns,
+				 struct mlx5_flow_table_attr *ft_attr);
 int mlx5_destroy_flow_table(struct mlx5_flow_table *ft);
 
 /* inbox should be set with the following values:
diff --git a/include/linux/mlx5/lag.h b/include/linux/mlx5/lag.h
index d370dfd19055..ab9f754664e5 100644
--- a/include/linux/mlx5/lag.h
+++ b/include/linux/mlx5/lag.h
@@ -4,8 +4,18 @@
 #ifndef __MLX5_LAG_API_H__
 #define __MLX5_LAG_API_H__
 
+#include <linux/types.h>
+
 struct mlx5_core_dev;
+struct mlx5_flow_table;
+struct mlx5_flow_table_attr;
 
+int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
+			struct mlx5_flow_table_attr *ft_attr);
+void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev);
+int mlx5_lag_demux_rule_add(struct mlx5_core_dev *dev, u16 vport_num,
+			    int vport_index);
+void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int vport_index);
 int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev);
 
 #endif /* __MLX5_LAG_API_H__ */
-- 
cgit v1.2.3


From 4dd2115f43594da5271a1aa34fde6719b4259047 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Mon, 9 Mar 2026 11:34:35 +0200
Subject: net/mlx5: Expose MLX5_UMR_ALIGN definition

Expose HW constant value in a shared header, to be used by core/EN
drivers.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260309093435.1850724-10-tariqt@nvidia.com
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/mlx5/mr.c | 1 -
 include/linux/mlx5/device.h     | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 665323b90b64..ff56948597dd 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -51,7 +51,6 @@ enum {
 };
 
 #define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4
-#define MLX5_UMR_ALIGN 2048
 
 static void
 create_mkey_callback(int status, struct mlx5_async_work *context);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 25c6b42140b2..07a25f264292 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -293,6 +293,7 @@ enum {
 	MLX5_UMR_INLINE			= (1 << 7),
 };
 
+#define MLX5_UMR_ALIGN (2048)
 #define MLX5_UMR_FLEX_ALIGNMENT 0x40
 #define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt))
 #define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm))
-- 
cgit v1.2.3


From f1a424e21c15993db0f9594cda17ef5d516ab3e9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 14 Mar 2026 08:41:04 -0600
Subject: io_uring: switch struct io_ring_ctx internal bitfields to flags

Bitfields cannot be set and checked atomically, and this makes it more
clear that these are indeed in shared storage and must be checked and
set in a sane fashion. This is in preparation for annotating a few of
the known racy, but harmless, flags checking.

No intended functional changes in this patch.

Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 34 ++++++++++--------
 io_uring/eventfd.c             |  4 +--
 io_uring/io_uring.c            | 82 +++++++++++++++++++++---------------------
 io_uring/io_uring.h            |  9 ++---
 io_uring/msg_ring.c            |  2 +-
 io_uring/register.c            |  8 ++---
 io_uring/rsrc.c                |  8 ++---
 io_uring/tctx.c                |  2 +-
 io_uring/timeout.c             |  4 +--
 io_uring/tw.c                  |  2 +-
 10 files changed, 82 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index dd1420bfcb73..0b3f08adc217 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -268,24 +268,30 @@ struct io_alloc_cache {
 	unsigned int		init_clear;
 };
 
+enum {
+	IO_RING_F_DRAIN_NEXT		= BIT(0),
+	IO_RING_F_OP_RESTRICTED		= BIT(1),
+	IO_RING_F_REG_RESTRICTED	= BIT(2),
+	IO_RING_F_OFF_TIMEOUT_USED	= BIT(3),
+	IO_RING_F_DRAIN_ACTIVE		= BIT(4),
+	IO_RING_F_HAS_EVFD		= BIT(5),
+	/* all CQEs should be posted only by the submitter task */
+	IO_RING_F_TASK_COMPLETE		= BIT(6),
+	IO_RING_F_LOCKLESS_CQ		= BIT(7),
+	IO_RING_F_SYSCALL_IOPOLL	= BIT(8),
+	IO_RING_F_POLL_ACTIVATED	= BIT(9),
+	IO_RING_F_DRAIN_DISABLED	= BIT(10),
+	IO_RING_F_COMPAT		= BIT(11),
+	IO_RING_F_IOWQ_LIMITS_SET	= BIT(12),
+};
+
 struct io_ring_ctx {
 	/* const or read-mostly hot data */
 	struct {
+		/* ring setup flags */
 		unsigned int		flags;
-		unsigned int		drain_next: 1;
-		unsigned int		op_restricted: 1;
-		unsigned int		reg_restricted: 1;
-		unsigned int		off_timeout_used: 1;
-		unsigned int		drain_active: 1;
-		unsigned int		has_evfd: 1;
-		/* all CQEs should be posted only by the submitter task */
-		unsigned int		task_complete: 1;
-		unsigned int		lockless_cq: 1;
-		unsigned int		syscall_iopoll: 1;
-		unsigned int		poll_activated: 1;
-		unsigned int		drain_disabled: 1;
-		unsigned int		compat: 1;
-		unsigned int		iowq_limits_set : 1;
+		/* internal state flags IO_RING_F_* flags , mostly read-only */
+		unsigned int		int_flags;
 
 		struct task_struct	*submitter_task;
 		struct io_rings		*rings;
diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index 7482a7dc6b38..3da028500f76 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -148,7 +148,7 @@ int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
 	spin_unlock(&ctx->completion_lock);
 
 	ev_fd->eventfd_async = eventfd_async;
-	ctx->has_evfd = true;
+	ctx->int_flags |= IO_RING_F_HAS_EVFD;
 	refcount_set(&ev_fd->refs, 1);
 	atomic_set(&ev_fd->ops, 0);
 	rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
@@ -162,7 +162,7 @@ int io_eventfd_unregister(struct io_ring_ctx *ctx)
 	ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
 					lockdep_is_held(&ctx->uring_lock));
 	if (ev_fd) {
-		ctx->has_evfd = false;
+		ctx->int_flags &= ~IO_RING_F_HAS_EVFD;
 		rcu_assign_pointer(ctx->io_ev_fd, NULL);
 		io_eventfd_put(ev_fd);
 		return 0;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 9a37035e76c0..bfeb3bc3849d 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -477,17 +477,17 @@ static __cold noinline void io_queue_deferred(struct io_ring_ctx *ctx)
 
 void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
 {
-	if (ctx->poll_activated)
+	if (ctx->int_flags & IO_RING_F_POLL_ACTIVATED)
 		io_poll_wq_wake(ctx);
-	if (ctx->off_timeout_used)
+	if (ctx->int_flags & IO_RING_F_OFF_TIMEOUT_USED)
 		io_flush_timeouts(ctx);
-	if (ctx->has_evfd)
+	if (ctx->int_flags & IO_RING_F_HAS_EVFD)
 		io_eventfd_signal(ctx, true);
 }
 
 static inline void __io_cq_lock(struct io_ring_ctx *ctx)
 {
-	if (!ctx->lockless_cq)
+	if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ))
 		spin_lock(&ctx->completion_lock);
 }
 
@@ -500,11 +500,11 @@ static inline void io_cq_lock(struct io_ring_ctx *ctx)
 static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
 {
 	io_commit_cqring(ctx);
-	if (!ctx->task_complete) {
-		if (!ctx->lockless_cq)
+	if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)) {
+		if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ))
 			spin_unlock(&ctx->completion_lock);
 		/* IOPOLL rings only need to wake up if it's also SQPOLL */
-		if (!ctx->syscall_iopoll)
+		if (!(ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL))
 			io_cqring_wake(ctx);
 	}
 	io_commit_cqring_flush(ctx);
@@ -830,7 +830,7 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
 void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
 {
 	lockdep_assert_held(&ctx->uring_lock);
-	lockdep_assert(ctx->lockless_cq);
+	lockdep_assert(ctx->int_flags & IO_RING_F_LOCKLESS_CQ);
 
 	if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
 		struct io_cqe cqe = io_init_cqe(user_data, res, cflags);
@@ -860,7 +860,7 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags)
 	lockdep_assert(!io_wq_current_is_worker());
 	lockdep_assert_held(&ctx->uring_lock);
 
-	if (!ctx->lockless_cq) {
+	if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ)) {
 		spin_lock(&ctx->completion_lock);
 		posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
 		spin_unlock(&ctx->completion_lock);
@@ -885,7 +885,7 @@ bool io_req_post_cqe32(struct io_kiocb *req, struct io_uring_cqe cqe[2])
 	lockdep_assert_held(&ctx->uring_lock);
 
 	cqe[0].user_data = req->cqe.user_data;
-	if (!ctx->lockless_cq) {
+	if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ)) {
 		spin_lock(&ctx->completion_lock);
 		posted = io_fill_cqe_aux32(ctx, cqe);
 		spin_unlock(&ctx->completion_lock);
@@ -913,7 +913,7 @@ static void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
 	 * Handle special CQ sync cases via task_work. DEFER_TASKRUN requires
 	 * the submitter task context, IOPOLL protects with uring_lock.
 	 */
-	if (ctx->lockless_cq || (req->flags & REQ_F_REISSUE)) {
+	if ((ctx->int_flags & IO_RING_F_LOCKLESS_CQ) || (req->flags & REQ_F_REISSUE)) {
 defer_complete:
 		req->io_task_work.func = io_req_task_complete;
 		io_req_task_work_add(req);
@@ -1135,7 +1135,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 		 */
 		if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) &&
 		    unlikely(!io_fill_cqe_req(ctx, req))) {
-			if (ctx->lockless_cq)
+			if (ctx->int_flags & IO_RING_F_LOCKLESS_CQ)
 				io_cqe_overflow(ctx, &req->cqe, &req->big_cqe);
 			else
 				io_cqe_overflow_locked(ctx, &req->cqe, &req->big_cqe);
@@ -1148,7 +1148,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 		INIT_WQ_LIST(&state->compl_reqs);
 	}
 
-	if (unlikely(ctx->drain_active))
+	if (unlikely(ctx->int_flags & IO_RING_F_DRAIN_ACTIVE))
 		io_queue_deferred(ctx);
 
 	ctx->submit_state.cq_flush = false;
@@ -1344,7 +1344,7 @@ static __cold void io_drain_req(struct io_kiocb *req)
 	list_add_tail(&de->list, &ctx->defer_list);
 	io_queue_deferred(ctx);
 	if (!drain && list_empty(&ctx->defer_list))
-		ctx->drain_active = false;
+		ctx->int_flags &= ~IO_RING_F_DRAIN_ACTIVE;
 }
 
 static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
@@ -1655,7 +1655,7 @@ static void io_queue_sqe_fallback(struct io_kiocb *req)
 	} else {
 		/* can't fail with IO_URING_F_INLINE */
 		io_req_sqe_copy(req, IO_URING_F_INLINE);
-		if (unlikely(req->ctx->drain_active))
+		if (unlikely(req->ctx->int_flags & IO_RING_F_DRAIN_ACTIVE))
 			io_drain_req(req);
 		else
 			io_queue_iowq(req);
@@ -1671,7 +1671,7 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx,
 					struct io_kiocb *req,
 					unsigned int sqe_flags)
 {
-	if (!ctx->op_restricted)
+	if (!(ctx->int_flags & IO_RING_F_OP_RESTRICTED))
 		return true;
 	if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
 		return false;
@@ -1691,7 +1691,7 @@ static void io_init_drain(struct io_ring_ctx *ctx)
 {
 	struct io_kiocb *head = ctx->submit_state.link.head;
 
-	ctx->drain_active = true;
+	ctx->int_flags |= IO_RING_F_DRAIN_ACTIVE;
 	if (head) {
 		/*
 		 * If we need to drain a request in the middle of a link, drain
@@ -1701,7 +1701,7 @@ static void io_init_drain(struct io_ring_ctx *ctx)
 		 * link.
 		 */
 		head->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC;
-		ctx->drain_next = true;
+		ctx->int_flags |= IO_RING_F_DRAIN_NEXT;
 	}
 }
 
@@ -1767,23 +1767,23 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 			req->buf_index = READ_ONCE(sqe->buf_group);
 		}
 		if (sqe_flags & IOSQE_CQE_SKIP_SUCCESS)
-			ctx->drain_disabled = true;
+			ctx->int_flags |= IO_RING_F_DRAIN_DISABLED;
 		if (sqe_flags & IOSQE_IO_DRAIN) {
-			if (ctx->drain_disabled)
+			if (ctx->int_flags & IO_RING_F_DRAIN_DISABLED)
 				return io_init_fail_req(req, -EOPNOTSUPP);
 			io_init_drain(ctx);
 		}
 	}
-	if (unlikely(ctx->op_restricted || ctx->drain_active || ctx->drain_next)) {
+	if (unlikely(ctx->int_flags & (IO_RING_F_OP_RESTRICTED | IO_RING_F_DRAIN_ACTIVE | IO_RING_F_DRAIN_NEXT))) {
 		if (!io_check_restriction(ctx, req, sqe_flags))
 			return io_init_fail_req(req, -EACCES);
 		/* knock it to the slow queue path, will be drained there */
-		if (ctx->drain_active)
+		if (ctx->int_flags & IO_RING_F_DRAIN_ACTIVE)
 			req->flags |= REQ_F_FORCE_ASYNC;
 		/* if there is no link, we're at "next" request and need to drain */
-		if (unlikely(ctx->drain_next) && !ctx->submit_state.link.head) {
-			ctx->drain_next = false;
-			ctx->drain_active = true;
+		if (unlikely(ctx->int_flags & IO_RING_F_DRAIN_NEXT) && !ctx->submit_state.link.head) {
+			ctx->int_flags &= ~IO_RING_F_DRAIN_NEXT;
+			ctx->int_flags |= IO_RING_F_DRAIN_ACTIVE;
 			req->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC;
 		}
 	}
@@ -2204,7 +2204,7 @@ static __cold void io_activate_pollwq_cb(struct callback_head *cb)
 					       poll_wq_task_work);
 
 	mutex_lock(&ctx->uring_lock);
-	ctx->poll_activated = true;
+	ctx->int_flags |= IO_RING_F_POLL_ACTIVATED;
 	mutex_unlock(&ctx->uring_lock);
 
 	/*
@@ -2219,9 +2219,9 @@ __cold void io_activate_pollwq(struct io_ring_ctx *ctx)
 {
 	spin_lock(&ctx->completion_lock);
 	/* already activated or in progress */
-	if (ctx->poll_activated || ctx->poll_wq_task_work.func)
+	if ((ctx->int_flags & IO_RING_F_POLL_ACTIVATED) || ctx->poll_wq_task_work.func)
 		goto out;
-	if (WARN_ON_ONCE(!ctx->task_complete))
+	if (WARN_ON_ONCE(!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)))
 		goto out;
 	if (!ctx->submitter_task)
 		goto out;
@@ -2242,7 +2242,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
 	struct io_ring_ctx *ctx = file->private_data;
 	__poll_t mask = 0;
 
-	if (unlikely(!ctx->poll_activated))
+	if (unlikely(!(ctx->int_flags & IO_RING_F_POLL_ACTIVATED)))
 		io_activate_pollwq(ctx);
 	/*
 	 * provides mb() which pairs with barrier from wq_has_sleeper
@@ -2607,7 +2607,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 			goto out;
 		}
 		if (flags & IORING_ENTER_GETEVENTS) {
-			if (ctx->syscall_iopoll)
+			if (ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL)
 				goto iopoll_locked;
 			/*
 			 * Ignore errors, we'll soon call io_cqring_wait() and
@@ -2622,7 +2622,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 	if (flags & IORING_ENTER_GETEVENTS) {
 		int ret2;
 
-		if (ctx->syscall_iopoll) {
+		if (ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL) {
 			/*
 			 * We disallow the app entering submit/complete with
 			 * polling, but we still need to lock the ring to
@@ -2923,9 +2923,9 @@ static void io_ctx_restriction_clone(struct io_ring_ctx *ctx,
 	if (dst->bpf_filters)
 		WRITE_ONCE(ctx->bpf_filters, dst->bpf_filters->filters);
 	if (dst->op_registered)
-		ctx->op_restricted = 1;
+		ctx->int_flags |= IO_RING_F_OP_RESTRICTED;
 	if (dst->reg_registered)
-		ctx->reg_restricted = 1;
+		ctx->int_flags |= IO_RING_F_REG_RESTRICTED;
 }
 
 static __cold int io_uring_create(struct io_ctx_config *config)
@@ -2952,17 +2952,18 @@ static __cold int io_uring_create(struct io_ctx_config *config)
 
 	if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
 	    !(ctx->flags & IORING_SETUP_IOPOLL))
-		ctx->task_complete = true;
+		ctx->int_flags |= IO_RING_F_TASK_COMPLETE;
 
-	if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL))
-		ctx->lockless_cq = true;
+	if ((ctx->int_flags & IO_RING_F_TASK_COMPLETE) ||
+	    (ctx->flags & IORING_SETUP_IOPOLL))
+		ctx->int_flags |= IO_RING_F_LOCKLESS_CQ;
 
 	/*
 	 * lazy poll_wq activation relies on ->task_complete for synchronisation
 	 * purposes, see io_activate_pollwq()
 	 */
-	if (!ctx->task_complete)
-		ctx->poll_activated = true;
+	if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE))
+		ctx->int_flags |= IO_RING_F_POLL_ACTIVATED;
 
 	/*
 	 * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
@@ -2972,9 +2973,10 @@ static __cold int io_uring_create(struct io_ctx_config *config)
 	 */
 	if (ctx->flags & IORING_SETUP_IOPOLL &&
 	    !(ctx->flags & IORING_SETUP_SQPOLL))
-		ctx->syscall_iopoll = 1;
+		ctx->int_flags |= IO_RING_F_SYSCALL_IOPOLL;
 
-	ctx->compat = in_compat_syscall();
+	if (in_compat_syscall())
+		ctx->int_flags |= IO_RING_F_COMPAT;
 	if (!ns_capable_noaudit(&init_user_ns, CAP_IPC_LOCK))
 		ctx->user = get_uid(current_user());
 
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 0fa844faf287..5cb1983043cd 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -211,7 +211,7 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
 
 	if (ctx->flags & IORING_SETUP_IOPOLL) {
 		lockdep_assert_held(&ctx->uring_lock);
-	} else if (!ctx->task_complete) {
+	} else if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)) {
 		lockdep_assert_held(&ctx->completion_lock);
 	} else if (ctx->submitter_task) {
 		/*
@@ -228,7 +228,7 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
 
 static inline bool io_is_compat(struct io_ring_ctx *ctx)
 {
-	return IS_ENABLED(CONFIG_COMPAT) && unlikely(ctx->compat);
+	return IS_ENABLED(CONFIG_COMPAT) && unlikely(ctx->int_flags & IO_RING_F_COMPAT);
 }
 
 static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
@@ -472,8 +472,9 @@ static inline void io_req_complete_defer(struct io_kiocb *req)
 
 static inline void io_commit_cqring_flush(struct io_ring_ctx *ctx)
 {
-	if (unlikely(ctx->off_timeout_used ||
-		     ctx->has_evfd || ctx->poll_activated))
+	if (unlikely(ctx->int_flags & (IO_RING_F_OFF_TIMEOUT_USED |
+				       IO_RING_F_HAS_EVFD |
+				       IO_RING_F_POLL_ACTIVATED)))
 		__io_commit_cqring_flush(ctx);
 }
 
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 57ad0085869a..3ff9098573db 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -67,7 +67,7 @@ void io_msg_ring_cleanup(struct io_kiocb *req)
 
 static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
 {
-	return target_ctx->task_complete;
+	return target_ctx->int_flags & IO_RING_F_TASK_COMPLETE;
 }
 
 static void io_msg_tw_complete(struct io_tw_req tw_req, io_tw_token_t tw)
diff --git a/io_uring/register.c b/io_uring/register.c
index 0148735f7711..489a6feaf228 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -184,9 +184,9 @@ static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
 		return ret;
 	}
 	if (ctx->restrictions.op_registered)
-		ctx->op_restricted = 1;
+		ctx->int_flags |= IO_RING_F_OP_RESTRICTED;
 	if (ctx->restrictions.reg_registered)
-		ctx->reg_restricted = 1;
+		ctx->int_flags |= IO_RING_F_REG_RESTRICTED;
 	return 0;
 }
 
@@ -384,7 +384,7 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
 	for (i = 0; i < ARRAY_SIZE(new_count); i++)
 		if (new_count[i])
 			ctx->iowq_limits[i] = new_count[i];
-	ctx->iowq_limits_set = true;
+	ctx->int_flags |= IO_RING_F_IOWQ_LIMITS_SET;
 
 	if (tctx && tctx->io_wq) {
 		ret = io_wq_max_workers(tctx->io_wq, new_count);
@@ -725,7 +725,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	if (ctx->submitter_task && ctx->submitter_task != current)
 		return -EEXIST;
 
-	if (ctx->reg_restricted && !(ctx->flags & IORING_SETUP_R_DISABLED)) {
+	if ((ctx->int_flags & IO_RING_F_REG_RESTRICTED) && !(ctx->flags & IORING_SETUP_R_DISABLED)) {
 		opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
 		if (!test_bit(opcode, ctx->restrictions.register_op))
 			return -EACCES;
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 4fa59bf89bba..52554ed89b11 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -295,7 +295,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
 		u64 tag = 0;
 
 		uvec = u64_to_user_ptr(user_data);
-		iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat);
+		iov = iovec_from_user(uvec, 1, 1, &fast_iov, io_is_compat(ctx));
 		if (IS_ERR(iov)) {
 			err = PTR_ERR(iov);
 			break;
@@ -319,7 +319,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
 		i = array_index_nospec(up->offset + done, ctx->buf_table.nr);
 		io_reset_rsrc_node(ctx, &ctx->buf_table, i);
 		ctx->buf_table.nodes[i] = node;
-		if (ctx->compat)
+		if (io_is_compat(ctx))
 			user_data += sizeof(struct compat_iovec);
 		else
 			user_data += sizeof(struct iovec);
@@ -883,12 +883,12 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 
 		if (arg) {
 			uvec = (struct iovec __user *) arg;
-			iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat);
+			iov = iovec_from_user(uvec, 1, 1, &fast_iov, io_is_compat(ctx));
 			if (IS_ERR(iov)) {
 				ret = PTR_ERR(iov);
 				break;
 			}
-			if (ctx->compat)
+			if (io_is_compat(ctx))
 				arg += sizeof(struct compat_iovec);
 			else
 				arg += sizeof(struct iovec);
diff --git a/io_uring/tctx.c b/io_uring/tctx.c
index 7cbcb82aedfb..143de8e990eb 100644
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -121,7 +121,7 @@ int __io_uring_add_tctx_node(struct io_ring_ctx *ctx)
 			return ret;
 
 		tctx = current->io_uring;
-		if (ctx->iowq_limits_set) {
+		if (ctx->int_flags & IO_RING_F_IOWQ_LIMITS_SET) {
 			unsigned int limits[2] = { ctx->iowq_limits[0],
 						   ctx->iowq_limits[1], };
 
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 8eddf8add7a2..579fdddac71a 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -566,8 +566,8 @@ static int __io_timeout_prep(struct io_kiocb *req,
 
 	INIT_LIST_HEAD(&timeout->list);
 	timeout->off = off;
-	if (unlikely(off && !req->ctx->off_timeout_used))
-		req->ctx->off_timeout_used = true;
+	if (unlikely(off && !(req->ctx->int_flags & IO_RING_F_OFF_TIMEOUT_USED)))
+		req->ctx->int_flags |= IO_RING_F_OFF_TIMEOUT_USED;
 	/*
 	 * for multishot reqs w/ fixed nr of repeats, repeats tracks the
 	 * remaining nr
diff --git a/io_uring/tw.c b/io_uring/tw.c
index 2f2b4ac4b126..022fe8753c19 100644
--- a/io_uring/tw.c
+++ b/io_uring/tw.c
@@ -222,7 +222,7 @@ void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
 
 	if (!head) {
 		io_ctx_mark_taskrun(ctx);
-		if (ctx->has_evfd)
+		if (ctx->int_flags & IO_RING_F_HAS_EVFD)
 			io_eventfd_signal(ctx, false);
 	}
 
-- 
cgit v1.2.3


From 9165dc4fa969b64c2d4396ee4e1546a719978dd1 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Mon, 2 Mar 2026 10:29:10 -0700
Subject: io_uring: add REQ_F_IOPOLL

A subsequent commit will allow uring_cmds to files that don't implement
->uring_cmd_iopoll() to be issued to IORING_SETUP_IOPOLL io_urings. This
means the ctx's IORING_SETUP_IOPOLL flag isn't sufficient to determine
whether a given request needs to be iopolled.

Introduce a request flag REQ_F_IOPOLL set in ->issue() if a request
needs to be iopolled to completion. Set the flag in io_rw_init_file()
and io_uring_cmd() for requests issued to IORING_SETUP_IOPOLL ctxs. Use
the request flag instead of IORING_SETUP_IOPOLL in places dealing with a
specific request.

A future possibility would be to add an option to enable/disable iopoll
in the io_uring SQE instead of determining it from IORING_SETUP_IOPOLL.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Link: https://patch.msgid.link/20260302172914.2488599-2-csander@purestorage.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h |  3 +++
 io_uring/io_uring.c            |  9 ++++-----
 io_uring/rw.c                  | 11 ++++++-----
 io_uring/uring_cmd.c           |  5 +++--
 4 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 0b3f08adc217..4dbd7083dd54 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -550,6 +550,7 @@ enum {
 	REQ_F_HAS_METADATA_BIT,
 	REQ_F_IMPORT_BUFFER_BIT,
 	REQ_F_SQE_COPIED_BIT,
+	REQ_F_IOPOLL_BIT,
 
 	/* not a real bit, just to check we're not overflowing the space */
 	__REQ_F_LAST_BIT,
@@ -641,6 +642,8 @@ enum {
 	REQ_F_IMPORT_BUFFER	= IO_REQ_FLAG(REQ_F_IMPORT_BUFFER_BIT),
 	/* ->sqe_copy() has been called, if necessary */
 	REQ_F_SQE_COPIED	= IO_REQ_FLAG(REQ_F_SQE_COPIED_BIT),
+	/* request must be iopolled to completion (set in ->issue()) */
+	REQ_F_IOPOLL		= IO_REQ_FLAG(REQ_F_IOPOLL_BIT),
 };
 
 struct io_tw_req {
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index fb5a263706be..a610eaa5fd7c 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -356,7 +356,6 @@ static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
 static void io_prep_async_work(struct io_kiocb *req)
 {
 	const struct io_issue_def *def = &io_issue_defs[req->opcode];
-	struct io_ring_ctx *ctx = req->ctx;
 
 	if (!(req->flags & REQ_F_CREDS)) {
 		req->flags |= REQ_F_CREDS;
@@ -378,7 +377,7 @@ static void io_prep_async_work(struct io_kiocb *req)
 		if (should_hash && (req->file->f_flags & O_DIRECT) &&
 		    (req->file->f_op->fop_flags & FOP_DIO_PARALLEL_WRITE))
 			should_hash = false;
-		if (should_hash || (ctx->flags & IORING_SETUP_IOPOLL))
+		if (should_hash || (req->flags & REQ_F_IOPOLL))
 			io_wq_hash_work(&req->work, file_inode(req->file));
 	} else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) {
 		if (def->unbound_nonreg_file)
@@ -1419,7 +1418,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 		ret = 0;
 
 		/* If the op doesn't have a file, we're not polling for it */
-		if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue)
+		if ((req->flags & REQ_F_IOPOLL) && def->iopoll_queue)
 			io_iopoll_req_issued(req, issue_flags);
 	}
 	return ret;
@@ -1435,7 +1434,7 @@ int io_poll_issue(struct io_kiocb *req, io_tw_token_t tw)
 	io_tw_lock(req->ctx, tw);
 
 	WARN_ON_ONCE(!req->file);
-	if (WARN_ON_ONCE(req->ctx->flags & IORING_SETUP_IOPOLL))
+	if (WARN_ON_ONCE(req->flags & REQ_F_IOPOLL))
 		return -EFAULT;
 
 	ret = __io_issue_sqe(req, issue_flags, &io_issue_defs[req->opcode]);
@@ -1533,7 +1532,7 @@ fail:
 		 * wait for request slots on the block side.
 		 */
 		if (!needs_poll) {
-			if (!(req->ctx->flags & IORING_SETUP_IOPOLL))
+			if (!(req->flags & REQ_F_IOPOLL))
 				break;
 			if (io_wq_worker_stopped())
 				break;
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 1a5f262734e8..3bdb9914e673 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -504,7 +504,7 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
 	if (!S_ISBLK(mode) && !S_ISREG(mode))
 		return false;
 	if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() &&
-	    !(ctx->flags & IORING_SETUP_IOPOLL)))
+	    !(req->flags & REQ_F_IOPOLL)))
 		return false;
 	/*
 	 * If ref is dying, we might be running poll reap from the exit work.
@@ -640,7 +640,7 @@ static inline void io_rw_done(struct io_kiocb *req, ssize_t ret)
 		}
 	}
 
-	if (req->ctx->flags & IORING_SETUP_IOPOLL)
+	if (req->flags & REQ_F_IOPOLL)
 		io_complete_rw_iopoll(&rw->kiocb, ret);
 	else
 		io_complete_rw(&rw->kiocb, ret);
@@ -654,7 +654,7 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret,
 
 	if (ret >= 0 && req->flags & REQ_F_CUR_POS)
 		req->file->f_pos = rw->kiocb.ki_pos;
-	if (ret >= 0 && !(req->ctx->flags & IORING_SETUP_IOPOLL)) {
+	if (ret >= 0 && !(req->flags & REQ_F_IOPOLL)) {
 		u32 cflags = 0;
 
 		__io_complete_rw_common(req, ret);
@@ -876,6 +876,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 	if (ctx->flags & IORING_SETUP_IOPOLL) {
 		if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll)
 			return -EOPNOTSUPP;
+		req->flags |= REQ_F_IOPOLL;
 		kiocb->private = NULL;
 		kiocb->ki_flags |= IOCB_HIPRI;
 		req->iopoll_completed = 0;
@@ -963,7 +964,7 @@ static int __io_read(struct io_kiocb *req, struct io_br_sel *sel,
 		if (io_file_can_poll(req))
 			return -EAGAIN;
 		/* IOPOLL retry should happen for io-wq threads */
-		if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
+		if (!force_nonblock && !(req->flags & REQ_F_IOPOLL))
 			goto done;
 		/* no retry on NONBLOCK nor RWF_NOWAIT */
 		if (req->flags & REQ_F_NOWAIT)
@@ -1188,7 +1189,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
 		goto done;
 	if (!force_nonblock || ret2 != -EAGAIN) {
 		/* IOPOLL retry should happen for io-wq threads */
-		if (ret2 == -EAGAIN && (req->ctx->flags & IORING_SETUP_IOPOLL))
+		if (ret2 == -EAGAIN && (req->flags & REQ_F_IOPOLL))
 			goto ret_eagain;
 
 		if (ret2 != req->cqe.res && ret2 >= 0 && need_complete_io(req)) {
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index ee7b49f47cb5..b651c63f6e20 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -110,7 +110,7 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
 	 * because iopoll completion data overlaps with the hash_node used
 	 * for tracking.
 	 */
-	if (ctx->flags & IORING_SETUP_IOPOLL)
+	if (req->flags & REQ_F_IOPOLL)
 		return;
 
 	if (!(cmd->flags & IORING_URING_CMD_CANCELABLE)) {
@@ -167,7 +167,7 @@ void __io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret, u64 res2,
 		io_req_set_cqe32_extra(req, res2, 0);
 	}
 	io_req_uring_cleanup(req, issue_flags);
-	if (req->ctx->flags & IORING_SETUP_IOPOLL) {
+	if (req->flags & REQ_F_IOPOLL) {
 		/* order with io_iopoll_req_issued() checking ->iopoll_complete */
 		smp_store_release(&req->iopoll_completed, 1);
 	} else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
@@ -260,6 +260,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 	if (ctx->flags & IORING_SETUP_IOPOLL) {
 		if (!file->f_op->uring_cmd_iopoll)
 			return -EOPNOTSUPP;
+		req->flags |= REQ_F_IOPOLL;
 		issue_flags |= IO_URING_F_IOPOLL;
 		req->iopoll_completed = 0;
 		if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) {
-- 
cgit v1.2.3


From 033af2b3eb19c5ed96825572105bca3611635ada Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 26 Feb 2026 12:48:38 +0000
Subject: io_uring: introduce callback driven main loop

The io_uring_enter() has a fixed order of execution: it submits
requests, waits for completions, and returns to the user. Allow to
optionally replace it with a custom loop driven by a callback called
loop_step. The basic requirements to the callback is that it should be
able to submit requests, wait for completions, parse them and repeat.
Most of the communication including parameter passing can be implemented
via shared memory.

The callback should return IOU_LOOP_CONTINUE to continue execution or
IOU_LOOP_STOP to return to the user space. Note that the kernel may
decide to prematurely terminate it as well, e.g. in case the process was
signalled or killed.

The hook takes a structure with parameters. It can be used to ask the
kernel to wait for CQEs by setting cq_wait_idx to the CQE index it wants
to wait for. Spurious wake ups are possible and even likely, the callback
is expected to handle it. There will be more parameters in the future
like timeout.

It can be used with kernel callbacks, for example, as a slow path
deprecation mechanism overwiting SQEs and emulating the wanted
behaviour, however it's more useful together with BPF programs
implemented in following patches.

Note that keeping it separately from the normal io_uring wait loop
makes things much simpler and cleaner. It keeps it in one place instead
of spreading a bunch of checks in different places including disabling
the submission path. It holds the lock by default, which is a better fit
for BPF synchronisation and the loop execution model. It nicely avoids
existing quirks like forced wake ups on timeout request completion. And
it should be easier to implement new features.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://patch.msgid.link/a2d369aa1c9dd23ad7edac9220cffc563abcaed6.1772109579.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h |  5 +++
 io_uring/Makefile              |  2 +-
 io_uring/io_uring.c            | 11 +++++
 io_uring/loop.c                | 91 ++++++++++++++++++++++++++++++++++++++++++
 io_uring/loop.h                | 27 +++++++++++++
 io_uring/wait.h                |  1 +
 6 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100644 io_uring/loop.c
 create mode 100644 io_uring/loop.h

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 4dbd7083dd54..344b634b8989 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -41,6 +41,8 @@ enum io_uring_cmd_flags {
 	IO_URING_F_COMPAT		= (1 << 12),
 };
 
+struct iou_loop_params;
+
 struct io_wq_work_node {
 	struct io_wq_work_node *next;
 };
@@ -361,6 +363,9 @@ struct io_ring_ctx {
 		struct io_alloc_cache	rw_cache;
 		struct io_alloc_cache	cmd_cache;
 
+		int (*loop_step)(struct io_ring_ctx *ctx,
+				 struct iou_loop_params *);
+
 		/*
 		 * Any cancelable uring_cmd is added to this list in
 		 * ->uring_cmd() by io_uring_cmd_insert_cancelable()
diff --git a/io_uring/Makefile b/io_uring/Makefile
index 931f9156132a..1c1f47de32a4 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_IO_URING)		+= io_uring.o opdef.o kbuf.o rsrc.o notif.o \
 					advise.o openclose.o statx.o timeout.o \
 					cancel.o waitid.o register.o \
 					truncate.o memmap.o alloc_cache.o \
-					query.o
+					query.o loop.o
 
 obj-$(CONFIG_IO_URING_ZCRX)	+= zcrx.o
 obj-$(CONFIG_IO_WQ)		+= io-wq.o
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 74cd62b44d94..960d36c49ffe 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -95,6 +95,7 @@
 #include "eventfd.h"
 #include "wait.h"
 #include "bpf_filter.h"
+#include "loop.h"
 
 #define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \
 			  IOSQE_IO_HARDLINK | IOSQE_ASYNC)
@@ -588,6 +589,11 @@ void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx)
 	mutex_unlock(&ctx->uring_lock);
 }
 
+void io_cqring_overflow_flush_locked(struct io_ring_ctx *ctx)
+{
+	__io_cqring_overflow_flush(ctx, false);
+}
+
 /* must to be called somewhat shortly after putting a request */
 static inline void io_put_task(struct io_kiocb *req)
 {
@@ -2571,6 +2577,11 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 	if (unlikely(smp_load_acquire(&ctx->flags) & IORING_SETUP_R_DISABLED))
 		goto out;
 
+	if (io_has_loop_ops(ctx)) {
+		ret = io_run_loop(ctx);
+		goto out;
+	}
+
 	/*
 	 * For SQ polling, the thread will do all submissions and completions.
 	 * Just return the requested submit count, and wake the thread if
diff --git a/io_uring/loop.c b/io_uring/loop.c
new file mode 100644
index 000000000000..31843cc3e451
--- /dev/null
+++ b/io_uring/loop.c
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "io_uring.h"
+#include "wait.h"
+#include "loop.h"
+
+static inline int io_loop_nr_cqes(const struct io_ring_ctx *ctx,
+				  const struct iou_loop_params *lp)
+{
+	return lp->cq_wait_idx - READ_ONCE(ctx->rings->cq.tail);
+}
+
+static inline void io_loop_wait_start(struct io_ring_ctx *ctx, unsigned nr_wait)
+{
+	atomic_set(&ctx->cq_wait_nr, nr_wait);
+	set_current_state(TASK_INTERRUPTIBLE);
+}
+
+static inline void io_loop_wait_finish(struct io_ring_ctx *ctx)
+{
+	__set_current_state(TASK_RUNNING);
+	atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
+}
+
+static void io_loop_wait(struct io_ring_ctx *ctx, struct iou_loop_params *lp,
+			 unsigned nr_wait)
+{
+	io_loop_wait_start(ctx, nr_wait);
+
+	if (unlikely(io_local_work_pending(ctx) ||
+		     io_loop_nr_cqes(ctx, lp) <= 0) ||
+		     READ_ONCE(ctx->check_cq)) {
+		io_loop_wait_finish(ctx);
+		return;
+	}
+
+	mutex_unlock(&ctx->uring_lock);
+	schedule();
+	io_loop_wait_finish(ctx);
+	mutex_lock(&ctx->uring_lock);
+}
+
+static int __io_run_loop(struct io_ring_ctx *ctx)
+{
+	struct iou_loop_params lp = {};
+
+	while (true) {
+		int nr_wait, step_res;
+
+		if (unlikely(!ctx->loop_step))
+			return -EFAULT;
+
+		step_res = ctx->loop_step(ctx, &lp);
+		if (step_res == IOU_LOOP_STOP)
+			break;
+		if (step_res != IOU_LOOP_CONTINUE)
+			return -EINVAL;
+
+		nr_wait = io_loop_nr_cqes(ctx, &lp);
+		if (nr_wait > 0)
+			io_loop_wait(ctx, &lp, nr_wait);
+		else
+			nr_wait = 0;
+
+		if (task_work_pending(current)) {
+			mutex_unlock(&ctx->uring_lock);
+			io_run_task_work();
+			mutex_lock(&ctx->uring_lock);
+		}
+		if (unlikely(task_sigpending(current)))
+			return -EINTR;
+		io_run_local_work_locked(ctx, nr_wait);
+
+		if (READ_ONCE(ctx->check_cq) & BIT(IO_CHECK_CQ_OVERFLOW_BIT))
+			io_cqring_overflow_flush_locked(ctx);
+	}
+
+	return 0;
+}
+
+int io_run_loop(struct io_ring_ctx *ctx)
+{
+	int ret;
+
+	if (!io_allowed_run_tw(ctx))
+		return -EEXIST;
+
+	mutex_lock(&ctx->uring_lock);
+	ret = __io_run_loop(ctx);
+	mutex_unlock(&ctx->uring_lock);
+	return ret;
+}
diff --git a/io_uring/loop.h b/io_uring/loop.h
new file mode 100644
index 000000000000..d7718b9ce61e
--- /dev/null
+++ b/io_uring/loop.h
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef IOU_LOOP_H
+#define IOU_LOOP_H
+
+#include <linux/io_uring_types.h>
+
+struct iou_loop_params {
+	/*
+	 * The CQE index to wait for. Only serves as a hint and can still be
+	 * woken up earlier.
+	 */
+	__u32			cq_wait_idx;
+};
+
+enum {
+	IOU_LOOP_CONTINUE = 0,
+	IOU_LOOP_STOP,
+};
+
+static inline bool io_has_loop_ops(struct io_ring_ctx *ctx)
+{
+	return data_race(ctx->loop_step);
+}
+
+int io_run_loop(struct io_ring_ctx *ctx);
+
+#endif
diff --git a/io_uring/wait.h b/io_uring/wait.h
index 5e236f74e1af..037e512dd80c 100644
--- a/io_uring/wait.h
+++ b/io_uring/wait.h
@@ -25,6 +25,7 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
 		   struct ext_arg *ext_arg);
 int io_run_task_work_sig(struct io_ring_ctx *ctx);
 void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx);
+void io_cqring_overflow_flush_locked(struct io_ring_ctx *ctx);
 
 static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
 {
-- 
cgit v1.2.3


From 98f37634b12b17ad5c56db8fb63cf9d7dc55d74c Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 26 Feb 2026 12:48:41 +0000
Subject: io_uring/bpf-ops: implement bpf ops registration

Implement BPF struct ops registration. It's registered off the BPF
path, and can be removed by BPF as well as io_uring. To protect it,
introduce a global lock synchronising registration. ctx->uring_lock can
be nested under it. ctx->bpf_ops is write protected by both locks and
so it's safe to read it under either of them.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://patch.msgid.link/1f46bffd76008de49cbafa2ad77d348810a4f69e.1772109579.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h |  5 +++
 io_uring/bpf-ops.c             | 92 +++++++++++++++++++++++++++++++++++++++++-
 io_uring/bpf-ops.h             |  8 ++++
 io_uring/io_uring.c            |  1 +
 4 files changed, 104 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 344b634b8989..28e5dbdac55b 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -8,6 +8,9 @@
 #include <linux/llist.h>
 #include <uapi/linux/io_uring.h>
 
+struct iou_loop_params;
+struct io_uring_bpf_ops;
+
 enum {
 	/*
 	 * A hint to not wake right away but delay until there are enough of
@@ -488,6 +491,8 @@ struct io_ring_ctx {
 	DECLARE_HASHTABLE(napi_ht, 4);
 #endif
 
+	struct io_uring_bpf_ops		*bpf_ops;
+
 	/*
 	 * Protection for resize vs mmap races - both the mmap and resize
 	 * side will need to grab this lock, to prevent either side from
diff --git a/io_uring/bpf-ops.c b/io_uring/bpf-ops.c
index 17518f4ecca9..e4b244337aa9 100644
--- a/io_uring/bpf-ops.c
+++ b/io_uring/bpf-ops.c
@@ -5,10 +5,11 @@
 
 #include "io_uring.h"
 #include "register.h"
+#include "loop.h"
 #include "memmap.h"
 #include "bpf-ops.h"
-#include "loop.h"
 
+static DEFINE_MUTEX(io_bpf_ctrl_mutex);
 static const struct btf_type *loop_params_type;
 
 __bpf_kfunc_start_defs();
@@ -143,16 +144,103 @@ static int bpf_io_init_member(const struct btf_type *t,
 			       const struct btf_member *member,
 			       void *kdata, const void *udata)
 {
+	u32 moff = __btf_member_bit_offset(t, member) / 8;
+	const struct io_uring_bpf_ops *uops = udata;
+	struct io_uring_bpf_ops *ops = kdata;
+
+	switch (moff) {
+	case offsetof(struct io_uring_bpf_ops, ring_fd):
+		ops->ring_fd = uops->ring_fd;
+		return 1;
+	}
+	return 0;
+}
+
+static int io_install_bpf(struct io_ring_ctx *ctx, struct io_uring_bpf_ops *ops)
+{
+	if (ctx->flags & (IORING_SETUP_SQPOLL | IORING_SETUP_IOPOLL))
+		return -EOPNOTSUPP;
+	if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
+		return -EOPNOTSUPP;
+
+	if (ctx->bpf_ops)
+		return -EBUSY;
+	if (WARN_ON_ONCE(!ops->loop_step))
+		return -EINVAL;
+
+	ops->priv = ctx;
+	ctx->bpf_ops = ops;
+	ctx->loop_step = ops->loop_step;
 	return 0;
 }
 
 static int bpf_io_reg(void *kdata, struct bpf_link *link)
 {
-	return -EOPNOTSUPP;
+	struct io_uring_bpf_ops *ops = kdata;
+	struct io_ring_ctx *ctx;
+	struct file *file;
+	int ret = -EBUSY;
+
+	file = io_uring_register_get_file(ops->ring_fd, false);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+	ctx = file->private_data;
+
+	scoped_guard(mutex, &io_bpf_ctrl_mutex) {
+		guard(mutex)(&ctx->uring_lock);
+		ret = io_install_bpf(ctx, ops);
+	}
+
+	fput(file);
+	return ret;
+}
+
+static void io_eject_bpf(struct io_ring_ctx *ctx)
+{
+	struct io_uring_bpf_ops *ops = ctx->bpf_ops;
+
+	if (WARN_ON_ONCE(!ops))
+		return;
+	if (WARN_ON_ONCE(ops->priv != ctx))
+		return;
+
+	ops->priv = NULL;
+	ctx->bpf_ops = NULL;
+	ctx->loop_step = NULL;
 }
 
 static void bpf_io_unreg(void *kdata, struct bpf_link *link)
 {
+	struct io_uring_bpf_ops *ops = kdata;
+	struct io_ring_ctx *ctx;
+
+	guard(mutex)(&io_bpf_ctrl_mutex);
+	ctx = ops->priv;
+	if (ctx) {
+		guard(mutex)(&ctx->uring_lock);
+		if (WARN_ON_ONCE(ctx->bpf_ops != ops))
+			return;
+
+		io_eject_bpf(ctx);
+	}
+}
+
+void io_unregister_bpf_ops(struct io_ring_ctx *ctx)
+{
+	/*
+	 * ->bpf_ops is write protected by io_bpf_ctrl_mutex and uring_lock,
+	 * and read protected by either. Try to avoid taking the global lock
+	 * for rings that never had any bpf installed.
+	 */
+	scoped_guard(mutex, &ctx->uring_lock) {
+		if (!ctx->bpf_ops)
+			return;
+	}
+
+	guard(mutex)(&io_bpf_ctrl_mutex);
+	guard(mutex)(&ctx->uring_lock);
+	if (ctx->bpf_ops)
+		io_eject_bpf(ctx);
 }
 
 static struct bpf_struct_ops bpf_ring_ops = {
diff --git a/io_uring/bpf-ops.h b/io_uring/bpf-ops.h
index b9e589ad519a..b39b3fd3acda 100644
--- a/io_uring/bpf-ops.h
+++ b/io_uring/bpf-ops.h
@@ -17,4 +17,12 @@ struct io_uring_bpf_ops {
 	void *priv;
 };
 
+#ifdef CONFIG_IO_URING_BPF_OPS
+void io_unregister_bpf_ops(struct io_ring_ctx *ctx);
+#else
+static inline void io_unregister_bpf_ops(struct io_ring_ctx *ctx)
+{
+}
+#endif
+
 #endif /* IOU_BPF_OPS_H */
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 0a80c8e6e633..d703f0a8b315 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2148,6 +2148,7 @@ static __cold void io_req_caches_free(struct io_ring_ctx *ctx)
 
 static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 {
+	io_unregister_bpf_ops(ctx);
 	io_sq_thread_finish(ctx);
 
 	mutex_lock(&ctx->uring_lock);
-- 
cgit v1.2.3


From 37a23d6f11938cd59927e3307b9b301624df8e8f Mon Sep 17 00:00:00 2001
From: Rosen Penev <rosenp@gmail.com>
Date: Wed, 11 Mar 2026 21:59:21 -0700
Subject: bus: mhi: host: Use kzalloc_flex

Change kzalloc + kzalloc to just kzalloc with a flexible array member.

Add __counted_by for extra runtime analysis when requested.

Move counting assignment immediately after allocation as required by
__counted_by.

Move mhi_buf definition as a complete definition as needed for flex
arrays. It's not a pointer anymore.

Signed-off-by: Rosen Penev <rosenp@gmail.com>
[mani: squashed https://lore.kernel.org/mhi/20260317-mhi-invalid-free-mhi-buffers-v1-1-8418a3ad604f@oss.qualcomm.com]
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
Link: https://patch.msgid.link/20260312045921.7663-1-rosenp@gmail.com
---
 drivers/bus/mhi/host/boot.c | 22 +++-------------------
 include/linux/mhi.h         | 34 +++++++++++++++++-----------------
 2 files changed, 20 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bus/mhi/host/boot.c b/drivers/bus/mhi/host/boot.c
index f16a1e67a667..19c84913cfb9 100644
--- a/drivers/bus/mhi/host/boot.c
+++ b/drivers/bus/mhi/host/boot.c
@@ -308,7 +308,6 @@ static void mhi_free_bhi_buffer(struct mhi_controller *mhi_cntrl,
 	struct mhi_buf *mhi_buf = image_info->mhi_buf;
 
 	dma_free_coherent(mhi_cntrl->cntrl_dev, mhi_buf->len, mhi_buf->buf, mhi_buf->dma_addr);
-	kfree(image_info->mhi_buf);
 	kfree(image_info);
 }
 
@@ -322,7 +321,6 @@ void mhi_free_bhie_table(struct mhi_controller *mhi_cntrl,
 		dma_free_coherent(mhi_cntrl->cntrl_dev, mhi_buf->len,
 				  mhi_buf->buf, mhi_buf->dma_addr);
 
-	kfree(image_info->mhi_buf);
 	kfree(image_info);
 }
 
@@ -333,15 +331,10 @@ static int mhi_alloc_bhi_buffer(struct mhi_controller *mhi_cntrl,
 	struct image_info *img_info;
 	struct mhi_buf *mhi_buf;
 
-	img_info = kzalloc_obj(*img_info);
+	img_info = kzalloc_flex(*img_info, mhi_buf, 1);
 	if (!img_info)
 		return -ENOMEM;
 
-	/* Allocate memory for entry */
-	img_info->mhi_buf = kzalloc_obj(*img_info->mhi_buf);
-	if (!img_info->mhi_buf)
-		goto error_alloc_mhi_buf;
-
 	/* Allocate and populate vector table */
 	mhi_buf = img_info->mhi_buf;
 
@@ -358,8 +351,6 @@ static int mhi_alloc_bhi_buffer(struct mhi_controller *mhi_cntrl,
 	return 0;
 
 error_alloc_segment:
-	kfree(mhi_buf);
-error_alloc_mhi_buf:
 	kfree(img_info);
 
 	return -ENOMEM;
@@ -375,14 +366,11 @@ int mhi_alloc_bhie_table(struct mhi_controller *mhi_cntrl,
 	struct image_info *img_info;
 	struct mhi_buf *mhi_buf;
 
-	img_info = kzalloc_obj(*img_info);
+	img_info = kzalloc_flex(*img_info, mhi_buf, segments);
 	if (!img_info)
 		return -ENOMEM;
 
-	/* Allocate memory for entries */
-	img_info->mhi_buf = kzalloc_objs(*img_info->mhi_buf, segments);
-	if (!img_info->mhi_buf)
-		goto error_alloc_mhi_buf;
+	img_info->entries = segments;
 
 	/* Allocate and populate vector table */
 	mhi_buf = img_info->mhi_buf;
@@ -402,7 +390,6 @@ int mhi_alloc_bhie_table(struct mhi_controller *mhi_cntrl,
 	}
 
 	img_info->bhi_vec = img_info->mhi_buf[segments - 1].buf;
-	img_info->entries = segments;
 	*image_info = img_info;
 
 	return 0;
@@ -411,9 +398,6 @@ error_alloc_segment:
 	for (--i, --mhi_buf; i >= 0; i--, mhi_buf--)
 		dma_free_coherent(mhi_cntrl->cntrl_dev, mhi_buf->len,
 				  mhi_buf->buf, mhi_buf->dma_addr);
-	kfree(img_info->mhi_buf);
-
-error_alloc_mhi_buf:
 	kfree(img_info);
 
 	return -ENOMEM;
diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index 88ccb3e14f48..fb3ba639f4f8 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -85,17 +85,33 @@ enum mhi_ch_type {
 	MHI_CH_TYPE_INBOUND_COALESCED = 3,
 };
 
+/**
+ * struct mhi_buf - MHI Buffer description
+ * @buf: Virtual address of the buffer
+ * @name: Buffer label. For offload channel, configurations name must be:
+ *        ECA - Event context array data
+ *        CCA - Channel context array data
+ * @dma_addr: IOMMU address of the buffer
+ * @len: # of bytes
+ */
+struct mhi_buf {
+	void *buf;
+	const char *name;
+	dma_addr_t dma_addr;
+	size_t len;
+};
+
 /**
  * struct image_info - Firmware and RDDM table
  * @mhi_buf: Buffer for firmware and RDDM table
  * @entries: # of entries in table
  */
 struct image_info {
-	struct mhi_buf *mhi_buf;
 	/* private: from internal.h */
 	struct bhi_vec_entry *bhi_vec;
 	/* public: */
 	u32 entries;
+	struct mhi_buf mhi_buf[] __counted_by(entries);
 };
 
 /**
@@ -488,22 +504,6 @@ struct mhi_result {
 	int transaction_status;
 };
 
-/**
- * struct mhi_buf - MHI Buffer description
- * @buf: Virtual address of the buffer
- * @name: Buffer label. For offload channel, configurations name must be:
- *        ECA - Event context array data
- *        CCA - Channel context array data
- * @dma_addr: IOMMU address of the buffer
- * @len: # of bytes
- */
-struct mhi_buf {
-	void *buf;
-	const char *name;
-	dma_addr_t dma_addr;
-	size_t len;
-};
-
 /**
  * struct mhi_driver - Structure representing a MHI client driver
  * @probe: CB function for client driver probe function
-- 
cgit v1.2.3


From e71e00127110dedc6a9e746178282b4dac97ed96 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Fri, 27 Feb 2026 11:25:36 -0400
Subject: iommupt: Add the RISC-V page table format

The RISC-V format is a fairly simple 5 level page table not unlike the x86
one. It has optional support for a single contiguous page size of 64k (16
x 4k).

The specification describes a 32-bit format, the general code can support
it via a #define but the iommu side implementation has been left off until
a user comes.

Tested-by: Vincent Chen <vincent.chen@sifive.com>
Acked-by: Paul Walmsley <pjw@kernel.org> # arch/riscv
Reviewed-by: Tomasz Jeznach <tjeznach@rivosinc.com>
Tested-by: Tomasz Jeznach <tjeznach@rivosinc.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/generic_pt/.kunitconfig        |   1 +
 drivers/iommu/generic_pt/Kconfig             |  11 +
 drivers/iommu/generic_pt/fmt/Makefile        |   2 +
 drivers/iommu/generic_pt/fmt/defs_riscv.h    |  29 +++
 drivers/iommu/generic_pt/fmt/iommu_riscv64.c |  11 +
 drivers/iommu/generic_pt/fmt/riscv.h         | 313 +++++++++++++++++++++++++++
 include/linux/generic_pt/common.h            |  16 ++
 include/linux/generic_pt/iommu.h             |  11 +
 8 files changed, 394 insertions(+)
 create mode 100644 drivers/iommu/generic_pt/fmt/defs_riscv.h
 create mode 100644 drivers/iommu/generic_pt/fmt/iommu_riscv64.c
 create mode 100644 drivers/iommu/generic_pt/fmt/riscv.h

(limited to 'include/linux')

diff --git a/drivers/iommu/generic_pt/.kunitconfig b/drivers/iommu/generic_pt/.kunitconfig
index a78b295f264d..0bb98fe581fe 100644
--- a/drivers/iommu/generic_pt/.kunitconfig
+++ b/drivers/iommu/generic_pt/.kunitconfig
@@ -5,6 +5,7 @@ CONFIG_DEBUG_GENERIC_PT=y
 CONFIG_IOMMU_PT=y
 CONFIG_IOMMU_PT_AMDV1=y
 CONFIG_IOMMU_PT_VTDSS=y
+CONFIG_IOMMU_PT_RISCV64=y
 CONFIG_IOMMU_PT_X86_64=y
 CONFIG_IOMMU_PT_KUNIT_TEST=y
 
diff --git a/drivers/iommu/generic_pt/Kconfig b/drivers/iommu/generic_pt/Kconfig
index ce4fb4786914..f4ed1add58b7 100644
--- a/drivers/iommu/generic_pt/Kconfig
+++ b/drivers/iommu/generic_pt/Kconfig
@@ -52,6 +52,16 @@ config IOMMU_PT_VTDSS
 
 	  Selected automatically by an IOMMU driver that uses this format.
 
+config IOMMU_PT_RISCV64
+       tristate "IOMMU page table for RISC-V 64 bit Sv57/Sv48/Sv39"
+	depends on !GENERIC_ATOMIC64 # for cmpxchg64
+	help
+	  iommu_domain implementation for RISC-V 64 bit 3/4/5 level page table.
+	  It supports 4K/2M/1G/512G/256T page sizes and can decode a sign
+	  extended portion of the 64 bit IOVA space.
+
+	  Selected automatically by an IOMMU driver that uses this format.
+
 config IOMMU_PT_X86_64
 	tristate "IOMMU page table for x86 64-bit, 4/5 levels"
 	depends on !GENERIC_ATOMIC64 # for cmpxchg64
@@ -66,6 +76,7 @@ config IOMMU_PT_KUNIT_TEST
 	tristate "IOMMU Page Table KUnit Test" if !KUNIT_ALL_TESTS
 	depends on KUNIT
 	depends on IOMMU_PT_AMDV1 || !IOMMU_PT_AMDV1
+	depends on IOMMU_PT_RISCV64 || !IOMMU_PT_RISCV64
 	depends on IOMMU_PT_X86_64 || !IOMMU_PT_X86_64
 	depends on IOMMU_PT_VTDSS || !IOMMU_PT_VTDSS
 	default KUNIT_ALL_TESTS
diff --git a/drivers/iommu/generic_pt/fmt/Makefile b/drivers/iommu/generic_pt/fmt/Makefile
index 976b49ec97dc..ea024d582594 100644
--- a/drivers/iommu/generic_pt/fmt/Makefile
+++ b/drivers/iommu/generic_pt/fmt/Makefile
@@ -5,6 +5,8 @@ iommu_pt_fmt-$(CONFIG_IOMMUFD_TEST) += mock
 
 iommu_pt_fmt-$(CONFIG_IOMMU_PT_VTDSS) += vtdss
 
+iommu_pt_fmt-$(CONFIG_IOMMU_PT_RISCV64) += riscv64
+
 iommu_pt_fmt-$(CONFIG_IOMMU_PT_X86_64) += x86_64
 
 IOMMU_PT_KUNIT_TEST :=
diff --git a/drivers/iommu/generic_pt/fmt/defs_riscv.h b/drivers/iommu/generic_pt/fmt/defs_riscv.h
new file mode 100644
index 000000000000..cf67474d5eba
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/defs_riscv.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES
+ *
+ */
+#ifndef __GENERIC_PT_FMT_DEFS_RISCV_H
+#define __GENERIC_PT_FMT_DEFS_RISCV_H
+
+#include <linux/generic_pt/common.h>
+#include <linux/types.h>
+
+#ifdef PT_RISCV_32BIT
+typedef u32 pt_riscv_entry_t;
+#define riscvpt_write_attrs riscv32pt_write_attrs
+#else
+typedef u64 pt_riscv_entry_t;
+#define riscvpt_write_attrs riscv64pt_write_attrs
+#endif
+
+typedef pt_riscv_entry_t pt_vaddr_t;
+typedef u64 pt_oaddr_t;
+
+struct riscvpt_write_attrs {
+	pt_riscv_entry_t descriptor_bits;
+	gfp_t gfp;
+};
+#define pt_write_attrs riscvpt_write_attrs
+
+#endif
diff --git a/drivers/iommu/generic_pt/fmt/iommu_riscv64.c b/drivers/iommu/generic_pt/fmt/iommu_riscv64.c
new file mode 100644
index 000000000000..cbf60fffa9bf
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/iommu_riscv64.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES
+ */
+#define PT_FMT riscv
+#define PT_FMT_VARIANT 64
+#define PT_SUPPORTED_FEATURES                                  \
+	(BIT(PT_FEAT_SIGN_EXTEND) | BIT(PT_FEAT_FLUSH_RANGE) | \
+	 BIT(PT_FEAT_RISCV_SVNAPOT_64K))
+
+#include "iommu_template.h"
diff --git a/drivers/iommu/generic_pt/fmt/riscv.h b/drivers/iommu/generic_pt/fmt/riscv.h
new file mode 100644
index 000000000000..a7fef6266a36
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/riscv.h
@@ -0,0 +1,313 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES
+ *
+ * RISC-V page table
+ *
+ * This is described in Sections:
+ *  12.3. Sv32: Page-Based 32-bit Virtual-Memory Systems
+ *  12.4. Sv39: Page-Based 39-bit Virtual-Memory System
+ *  12.5. Sv48: Page-Based 48-bit Virtual-Memory System
+ *  12.6. Sv57: Page-Based 57-bit Virtual-Memory System
+ * of the "The RISC-V Instruction Set Manual: Volume II"
+ *
+ * This includes the contiguous page extension from:
+ *  Chapter 13. "Svnapot" Extension for NAPOT Translation Contiguity,
+ *     Version 1.0
+ *
+ * The table format is sign extended and supports leafs in every level. The spec
+ * doesn't talk a lot about levels, but level here is the same as i=LEVELS-1 in
+ * the spec.
+ */
+#ifndef __GENERIC_PT_FMT_RISCV_H
+#define __GENERIC_PT_FMT_RISCV_H
+
+#include "defs_riscv.h"
+#include "../pt_defs.h"
+
+#include <linux/bitfield.h>
+#include <linux/container_of.h>
+#include <linux/log2.h>
+#include <linux/sizes.h>
+
+enum {
+	PT_ITEM_WORD_SIZE = sizeof(pt_riscv_entry_t),
+#ifdef PT_RISCV_32BIT
+	PT_MAX_VA_ADDRESS_LG2 = 32,
+	PT_MAX_OUTPUT_ADDRESS_LG2 = 34,
+	PT_MAX_TOP_LEVEL = 1,
+#else
+	PT_MAX_VA_ADDRESS_LG2 = 57,
+	PT_MAX_OUTPUT_ADDRESS_LG2 = 56,
+	PT_MAX_TOP_LEVEL = 4,
+#endif
+	PT_GRANULE_LG2SZ = 12,
+	PT_TABLEMEM_LG2SZ = 12,
+
+	/* fsc.PPN is 44 bits wide, all PPNs are 4k aligned */
+	PT_TOP_PHYS_MASK = GENMASK_ULL(55, 12),
+};
+
+/* PTE bits */
+enum {
+	RISCVPT_V = BIT(0),
+	RISCVPT_R = BIT(1),
+	RISCVPT_W = BIT(2),
+	RISCVPT_X = BIT(3),
+	RISCVPT_U = BIT(4),
+	RISCVPT_G = BIT(5),
+	RISCVPT_A = BIT(6),
+	RISCVPT_D = BIT(7),
+	RISCVPT_RSW = GENMASK(9, 8),
+	RISCVPT_PPN32 = GENMASK(31, 10),
+
+	RISCVPT_PPN64 = GENMASK_ULL(53, 10),
+	RISCVPT_PPN64_64K = GENMASK_ULL(53, 14),
+	RISCVPT_PBMT = GENMASK_ULL(62, 61),
+	RISCVPT_N = BIT_ULL(63),
+
+	/* Svnapot encodings for ppn[0] */
+	RISCVPT_PPN64_64K_SZ = BIT(13),
+};
+
+#ifdef PT_RISCV_32BIT
+#define RISCVPT_PPN RISCVPT_PPN32
+#define pt_riscv pt_riscv_32
+#else
+#define RISCVPT_PPN RISCVPT_PPN64
+#define pt_riscv pt_riscv_64
+#endif
+
+#define common_to_riscvpt(common_ptr) \
+	container_of_const(common_ptr, struct pt_riscv, common)
+#define to_riscvpt(pts) common_to_riscvpt((pts)->range->common)
+
+static inline pt_oaddr_t riscvpt_table_pa(const struct pt_state *pts)
+{
+	return oalog2_mul(FIELD_GET(RISCVPT_PPN, pts->entry), PT_GRANULE_LG2SZ);
+}
+#define pt_table_pa riscvpt_table_pa
+
+static inline pt_oaddr_t riscvpt_entry_oa(const struct pt_state *pts)
+{
+	if (pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K) &&
+	    pts->entry & RISCVPT_N) {
+		PT_WARN_ON(pts->level != 0);
+		return oalog2_mul(FIELD_GET(RISCVPT_PPN64_64K, pts->entry),
+				  ilog2(SZ_64K));
+	}
+	return oalog2_mul(FIELD_GET(RISCVPT_PPN, pts->entry), PT_GRANULE_LG2SZ);
+}
+#define pt_entry_oa riscvpt_entry_oa
+
+static inline bool riscvpt_can_have_leaf(const struct pt_state *pts)
+{
+	return true;
+}
+#define pt_can_have_leaf riscvpt_can_have_leaf
+
+/* Body in pt_fmt_defaults.h */
+static inline unsigned int pt_table_item_lg2sz(const struct pt_state *pts);
+
+static inline unsigned int
+riscvpt_entry_num_contig_lg2(const struct pt_state *pts)
+{
+	if (PT_SUPPORTED_FEATURE(PT_FEAT_RISCV_SVNAPOT_64K) &&
+	    pts->entry & RISCVPT_N) {
+		PT_WARN_ON(!pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K));
+		PT_WARN_ON(pts->level);
+		return ilog2(16);
+	}
+	return ilog2(1);
+}
+#define pt_entry_num_contig_lg2 riscvpt_entry_num_contig_lg2
+
+static inline unsigned int riscvpt_num_items_lg2(const struct pt_state *pts)
+{
+	return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64));
+}
+#define pt_num_items_lg2 riscvpt_num_items_lg2
+
+static inline unsigned short
+riscvpt_contig_count_lg2(const struct pt_state *pts)
+{
+	if (pts->level == 0 && pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K))
+		return ilog2(16);
+	return ilog2(1);
+}
+#define pt_contig_count_lg2 riscvpt_contig_count_lg2
+
+static inline enum pt_entry_type riscvpt_load_entry_raw(struct pt_state *pts)
+{
+	const pt_riscv_entry_t *tablep = pt_cur_table(pts, pt_riscv_entry_t);
+	pt_riscv_entry_t entry;
+
+	pts->entry = entry = READ_ONCE(tablep[pts->index]);
+	if (!(entry & RISCVPT_V))
+		return PT_ENTRY_EMPTY;
+	if (pts->level == 0 ||
+	    ((entry & (RISCVPT_X | RISCVPT_W | RISCVPT_R)) != 0))
+		return PT_ENTRY_OA;
+	return PT_ENTRY_TABLE;
+}
+#define pt_load_entry_raw riscvpt_load_entry_raw
+
+static inline void
+riscvpt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
+			   unsigned int oasz_lg2,
+			   const struct pt_write_attrs *attrs)
+{
+	pt_riscv_entry_t *tablep = pt_cur_table(pts, pt_riscv_entry_t);
+	pt_riscv_entry_t entry;
+
+	if (!pt_check_install_leaf_args(pts, oa, oasz_lg2))
+		return;
+
+	entry = RISCVPT_V |
+		FIELD_PREP(RISCVPT_PPN, log2_div(oa, PT_GRANULE_LG2SZ)) |
+		attrs->descriptor_bits;
+
+	if (pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K) && pts->level == 0 &&
+	    oasz_lg2 != PT_GRANULE_LG2SZ) {
+		u64 *end;
+
+		entry |= RISCVPT_N | RISCVPT_PPN64_64K_SZ;
+		tablep += pts->index;
+		end = tablep + log2_div(SZ_64K, PT_GRANULE_LG2SZ);
+		for (; tablep != end; tablep++)
+			WRITE_ONCE(*tablep, entry);
+	} else {
+		/* FIXME does riscv need this to be cmpxchg? */
+		WRITE_ONCE(tablep[pts->index], entry);
+	}
+	pts->entry = entry;
+}
+#define pt_install_leaf_entry riscvpt_install_leaf_entry
+
+static inline bool riscvpt_install_table(struct pt_state *pts,
+					 pt_oaddr_t table_pa,
+					 const struct pt_write_attrs *attrs)
+{
+	pt_riscv_entry_t entry;
+
+	entry = RISCVPT_V |
+		FIELD_PREP(RISCVPT_PPN, log2_div(table_pa, PT_GRANULE_LG2SZ));
+	return pt_table_install64(pts, entry);
+}
+#define pt_install_table riscvpt_install_table
+
+static inline void riscvpt_attr_from_entry(const struct pt_state *pts,
+					   struct pt_write_attrs *attrs)
+{
+	attrs->descriptor_bits =
+		pts->entry & (RISCVPT_R | RISCVPT_W | RISCVPT_X | RISCVPT_U |
+			      RISCVPT_G | RISCVPT_A | RISCVPT_D);
+}
+#define pt_attr_from_entry riscvpt_attr_from_entry
+
+/* --- iommu */
+#include <linux/generic_pt/iommu.h>
+#include <linux/iommu.h>
+
+#define pt_iommu_table pt_iommu_riscv_64
+
+/* The common struct is in the per-format common struct */
+static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table)
+{
+	return &container_of(iommu_table, struct pt_iommu_table, iommu)
+			->riscv_64pt.common;
+}
+
+static inline struct pt_iommu *iommu_from_common(struct pt_common *common)
+{
+	return &container_of(common, struct pt_iommu_table, riscv_64pt.common)
+			->iommu;
+}
+
+static inline int riscvpt_iommu_set_prot(struct pt_common *common,
+					 struct pt_write_attrs *attrs,
+					 unsigned int iommu_prot)
+{
+	u64 pte;
+
+	pte = RISCVPT_A | RISCVPT_U;
+	if (iommu_prot & IOMMU_WRITE)
+		pte |= RISCVPT_W | RISCVPT_R | RISCVPT_D;
+	if (iommu_prot & IOMMU_READ)
+		pte |= RISCVPT_R;
+	if (!(iommu_prot & IOMMU_NOEXEC))
+		pte |= RISCVPT_X;
+
+	/* Caller must specify a supported combination of flags */
+	if (unlikely((pte & (RISCVPT_X | RISCVPT_W | RISCVPT_R)) == 0))
+		return -EOPNOTSUPP;
+
+	attrs->descriptor_bits = pte;
+	return 0;
+}
+#define pt_iommu_set_prot riscvpt_iommu_set_prot
+
+static inline int
+riscvpt_iommu_fmt_init(struct pt_iommu_riscv_64 *iommu_table,
+		       const struct pt_iommu_riscv_64_cfg *cfg)
+{
+	struct pt_riscv *table = &iommu_table->riscv_64pt;
+
+	switch (cfg->common.hw_max_vasz_lg2) {
+	case 39:
+		pt_top_set_level(&table->common, 2);
+		break;
+	case 48:
+		pt_top_set_level(&table->common, 3);
+		break;
+	case 57:
+		pt_top_set_level(&table->common, 4);
+		break;
+	default:
+		return -EINVAL;
+	}
+	table->common.max_oasz_lg2 =
+		min(PT_MAX_OUTPUT_ADDRESS_LG2, cfg->common.hw_max_oasz_lg2);
+	return 0;
+}
+#define pt_iommu_fmt_init riscvpt_iommu_fmt_init
+
+static inline void
+riscvpt_iommu_fmt_hw_info(struct pt_iommu_riscv_64 *table,
+			  const struct pt_range *top_range,
+			  struct pt_iommu_riscv_64_hw_info *info)
+{
+	phys_addr_t top_phys = virt_to_phys(top_range->top_table);
+
+	info->ppn = oalog2_div(top_phys, PT_GRANULE_LG2SZ);
+	PT_WARN_ON(top_phys & ~PT_TOP_PHYS_MASK);
+
+	/*
+	 * See Table 3. Encodings of iosatp.MODE field" for DC.tx.SXL = 0:
+	 *  8 = Sv39 = top level 2
+	 *  9 = Sv38 = top level 3
+	 *  10 = Sv57 = top level 4
+	 */
+	info->fsc_iosatp_mode = top_range->top_level + 6;
+}
+#define pt_iommu_fmt_hw_info riscvpt_iommu_fmt_hw_info
+
+#if defined(GENERIC_PT_KUNIT)
+static const struct pt_iommu_riscv_64_cfg riscv_64_kunit_fmt_cfgs[] = {
+	[0] = { .common.features = BIT(PT_FEAT_RISCV_SVNAPOT_64K),
+		.common.hw_max_oasz_lg2 = 56,
+		.common.hw_max_vasz_lg2 = 39 },
+	[1] = { .common.features = 0,
+		.common.hw_max_oasz_lg2 = 56,
+		.common.hw_max_vasz_lg2 = 48 },
+	[2] = { .common.features = BIT(PT_FEAT_RISCV_SVNAPOT_64K),
+		.common.hw_max_oasz_lg2 = 56,
+		.common.hw_max_vasz_lg2 = 57 },
+};
+#define kunit_fmt_cfgs riscv_64_kunit_fmt_cfgs
+enum {
+	KUNIT_FMT_FEATURES = BIT(PT_FEAT_RISCV_SVNAPOT_64K),
+};
+#endif
+
+#endif
diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h
index 6a9a1acb5aad..fc5d0b5edadc 100644
--- a/include/linux/generic_pt/common.h
+++ b/include/linux/generic_pt/common.h
@@ -175,6 +175,22 @@ enum {
 	PT_FEAT_VTDSS_FORCE_WRITEABLE,
 };
 
+struct pt_riscv_32 {
+	struct pt_common common;
+};
+
+struct pt_riscv_64 {
+	struct pt_common common;
+};
+
+enum {
+	/*
+	 * Support the 64k contiguous page size following the Svnapot extension.
+	 */
+	PT_FEAT_RISCV_SVNAPOT_64K = PT_FEAT_FMT_START,
+
+};
+
 struct pt_x86_64 {
 	struct pt_common common;
 };
diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
index 9eefbb74efd0..49d9addb98c5 100644
--- a/include/linux/generic_pt/iommu.h
+++ b/include/linux/generic_pt/iommu.h
@@ -275,6 +275,17 @@ struct pt_iommu_vtdss_hw_info {
 
 IOMMU_FORMAT(vtdss, vtdss_pt);
 
+struct pt_iommu_riscv_64_cfg {
+	struct pt_iommu_cfg common;
+};
+
+struct pt_iommu_riscv_64_hw_info {
+	u64 ppn;
+	u8 fsc_iosatp_mode;
+};
+
+IOMMU_FORMAT(riscv_64, riscv_64pt);
+
 struct pt_iommu_x86_64_cfg {
 	struct pt_iommu_cfg common;
 	/* 4 is a 57 bit 5 level table */
-- 
cgit v1.2.3


From 99fb8afa16add85ed016baee9735231bca0c32b4 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Fri, 27 Feb 2026 15:30:10 -0400
Subject: iommupt: Directly call iommupt's unmap_range()

The common algorithm in iommupt does not require the iommu_pgsize()
calculations, it can directly unmap any arbitrary range. Add a new function
pointer to directly call an iommupt unmap_range op and make
__iommu_unmap() call it directly.

Gives about a 5% gain on single page unmappings.

The function pointer is run through pt_iommu_ops instead of
iommu_domain_ops to discourage using it outside iommupt. All drivers with
their own page tables should continue to use the simplified
map/unmap_pages() style interfaces.

Reviewed-by: Samiullah Khawaja <skhawaja@google.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/generic_pt/iommu_pt.h | 29 ++++-------------------------
 drivers/iommu/iommu.c               | 27 +++++++++++++++++++++------
 include/linux/generic_pt/iommu.h    | 37 +++++++++++++++++++++++++++++++------
 include/linux/iommu.h               |  1 +
 4 files changed, 57 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h
index 9c08bb594e41..a627c26fa62d 100644
--- a/drivers/iommu/generic_pt/iommu_pt.h
+++ b/drivers/iommu/generic_pt/iommu_pt.h
@@ -1031,34 +1031,12 @@ start_oa:
 	return ret;
 }
 
-/**
- * unmap_pages() - Make a range of IOVA empty/not present
- * @domain: Domain to manipulate
- * @iova: IO virtual address to start
- * @pgsize: Length of each page
- * @pgcount: Length of the range in pgsize units starting from @iova
- * @iotlb_gather: Gather struct that must be flushed on return
- *
- * unmap_pages() will remove a translation created by map_pages(). It cannot
- * subdivide a mapping created by map_pages(), so it should be called with IOVA
- * ranges that match those passed to map_pages(). The IOVA range can aggregate
- * contiguous map_pages() calls so long as no individual range is split.
- *
- * Context: The caller must hold a write range lock that includes
- * the whole range.
- *
- * Returns: Number of bytes of VA unmapped. iova + res will be the point
- * unmapping stopped.
- */
-size_t DOMAIN_NS(unmap_pages)(struct iommu_domain *domain, unsigned long iova,
-			      size_t pgsize, size_t pgcount,
+static size_t NS(unmap_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
+			      dma_addr_t len,
 			      struct iommu_iotlb_gather *iotlb_gather)
 {
-	struct pt_iommu *iommu_table =
-		container_of(domain, struct pt_iommu, domain);
 	struct pt_unmap_args unmap = { .free_list = IOMMU_PAGES_LIST_INIT(
 					       unmap.free_list) };
-	pt_vaddr_t len = pgsize * pgcount;
 	struct pt_range range;
 	int ret;
 
@@ -1073,7 +1051,6 @@ size_t DOMAIN_NS(unmap_pages)(struct iommu_domain *domain, unsigned long iova,
 
 	return unmap.unmapped;
 }
-EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(unmap_pages), "GENERIC_PT_IOMMU");
 
 static void NS(get_info)(struct pt_iommu *iommu_table,
 			 struct pt_iommu_info *info)
@@ -1121,6 +1098,7 @@ static void NS(deinit)(struct pt_iommu *iommu_table)
 }
 
 static const struct pt_iommu_ops NS(ops) = {
+	.unmap_range = NS(unmap_range),
 #if IS_ENABLED(CONFIG_IOMMUFD_DRIVER) && defined(pt_entry_is_write_dirty) && \
 	IS_ENABLED(CONFIG_IOMMUFD_TEST) && defined(pt_entry_make_write_dirty)
 	.set_dirty = NS(set_dirty),
@@ -1183,6 +1161,7 @@ static int pt_iommu_init_domain(struct pt_iommu *iommu_table,
 
 	domain->type = __IOMMU_DOMAIN_PAGING;
 	domain->pgsize_bitmap = info.pgsize_bitmap;
+	domain->is_iommupt = true;
 
 	if (pt_feature(common, PT_FEAT_DYNAMIC_TOP))
 		range = _pt_top_range(common,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 35db51780954..f68269707101 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -34,6 +34,7 @@
 #include <linux/sched/mm.h>
 #include <linux/msi.h>
 #include <uapi/linux/iommufd.h>
+#include <linux/generic_pt/iommu.h>
 
 #include "dma-iommu.h"
 #include "iommu-priv.h"
@@ -2666,13 +2667,12 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
 }
 EXPORT_SYMBOL_GPL(iommu_map);
 
-static size_t __iommu_unmap(struct iommu_domain *domain,
-			    unsigned long iova, size_t size,
-			    struct iommu_iotlb_gather *iotlb_gather)
+static size_t
+__iommu_unmap_domain_pgtbl(struct iommu_domain *domain, unsigned long iova,
+			   size_t size, struct iommu_iotlb_gather *iotlb_gather)
 {
 	const struct iommu_domain_ops *ops = domain->ops;
 	size_t unmapped_page, unmapped = 0;
-	unsigned long orig_iova = iova;
 	unsigned int min_pagesz;
 
 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
@@ -2718,8 +2718,23 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 		unmapped += unmapped_page;
 	}
 
-	trace_unmap(orig_iova, size, unmapped);
-	iommu_debug_unmap_end(domain, orig_iova, size, unmapped);
+	return unmapped;
+}
+
+static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+			    size_t size,
+			    struct iommu_iotlb_gather *iotlb_gather)
+{
+	struct pt_iommu *pt = iommupt_from_domain(domain);
+	size_t unmapped;
+
+	if (pt)
+		unmapped = pt->ops->unmap_range(pt, iova, size, iotlb_gather);
+	else
+		unmapped = __iommu_unmap_domain_pgtbl(domain, iova, size,
+						      iotlb_gather);
+	trace_unmap(iova, size, unmapped);
+	iommu_debug_unmap_end(domain, iova, size, unmapped);
 	return unmapped;
 }
 
diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
index 49d9addb98c5..0da971134a37 100644
--- a/include/linux/generic_pt/iommu.h
+++ b/include/linux/generic_pt/iommu.h
@@ -66,6 +66,13 @@ struct pt_iommu {
 	struct device *iommu_device;
 };
 
+static inline struct pt_iommu *iommupt_from_domain(struct iommu_domain *domain)
+{
+	if (!IS_ENABLED(CONFIG_IOMMU_PT) || !domain->is_iommupt)
+		return NULL;
+	return container_of(domain, struct pt_iommu, domain);
+}
+
 /**
  * struct pt_iommu_info - Details about the IOMMU page table
  *
@@ -80,6 +87,29 @@ struct pt_iommu_info {
 };
 
 struct pt_iommu_ops {
+	/**
+	 * @unmap_range: Make a range of IOVA empty/not present
+	 * @iommu_table: Table to manipulate
+	 * @iova: IO virtual address to start
+	 * @len: Length of the range starting from @iova
+	 * @iotlb_gather: Gather struct that must be flushed on return
+	 *
+	 * unmap_range() will remove a translation created by map_range(). It
+	 * cannot subdivide a mapping created by map_range(), so it should be
+	 * called with IOVA ranges that match those passed to map_pages. The
+	 * IOVA range can aggregate contiguous map_range() calls so long as no
+	 * individual range is split.
+	 *
+	 * Context: The caller must hold a write range lock that includes
+	 * the whole range.
+	 *
+	 * Returns: Number of bytes of VA unmapped. iova + res will be the
+	 * point unmapping stopped.
+	 */
+	size_t (*unmap_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
+			      dma_addr_t len,
+			      struct iommu_iotlb_gather *iotlb_gather);
+
 	/**
 	 * @set_dirty: Make the iova write dirty
 	 * @iommu_table: Table to manipulate
@@ -198,10 +228,6 @@ struct pt_iommu_cfg {
 				       unsigned long iova, phys_addr_t paddr,  \
 				       size_t pgsize, size_t pgcount,          \
 				       int prot, gfp_t gfp, size_t *mapped);   \
-	size_t pt_iommu_##fmt##_unmap_pages(                                   \
-		struct iommu_domain *domain, unsigned long iova,               \
-		size_t pgsize, size_t pgcount,                                 \
-		struct iommu_iotlb_gather *iotlb_gather);                      \
 	int pt_iommu_##fmt##_read_and_clear_dirty(                             \
 		struct iommu_domain *domain, unsigned long iova, size_t size,  \
 		unsigned long flags, struct iommu_dirty_bitmap *dirty);        \
@@ -223,8 +249,7 @@ struct pt_iommu_cfg {
  */
 #define IOMMU_PT_DOMAIN_OPS(fmt)                        \
 	.iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \
-	.map_pages = &pt_iommu_##fmt##_map_pages,       \
-	.unmap_pages = &pt_iommu_##fmt##_unmap_pages
+	.map_pages = &pt_iommu_##fmt##_map_pages
 #define IOMMU_PT_DIRTY_OPS(fmt) \
 	.read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 54b8b48c762e..7ca648c01336 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -223,6 +223,7 @@ enum iommu_domain_cookie_type {
 struct iommu_domain {
 	unsigned type;
 	enum iommu_domain_cookie_type cookie_type;
+	bool is_iommupt;
 	const struct iommu_domain_ops *ops;
 	const struct iommu_dirty_ops *dirty_ops;
 	const struct iommu_ops *owner; /* Whose domain_alloc we came from */
-- 
cgit v1.2.3


From d6c65b0fd6218bd21ed0be7a8d3218e8f6dc91de Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Fri, 27 Feb 2026 15:30:11 -0400
Subject: iommupt: Avoid rewalking during map

Currently the core code provides a simplified interface to drivers where
it fragments a requested multi-page map into single page size steps after
doing all the calculations to figure out what page size is
appropriate. Each step rewalks the page tables from the start.

Since iommupt has a single implementation of the mapping algorithm it can
internally compute each step as it goes while retaining its current
position in the walk.

Add a new function pt_pgsz_count() which computes the same page size
fragement of a large mapping operations.

Compute the next fragment when all the leaf entries of the current
fragement have been written, then continue walking from the current
point.

The function pointer is run through pt_iommu_ops instead of
iommu_domain_ops to discourage using it outside iommupt. All drivers with
their own page tables should continue to use the simplified map_pages()
style interfaces.

Reviewed-by: Samiullah Khawaja <skhawaja@google.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/generic_pt/iommu_pt.h         | 133 +++++++++++++++++-----------
 drivers/iommu/generic_pt/kunit_generic_pt.h |  12 +++
 drivers/iommu/generic_pt/pt_iter.h          |  22 +++++
 drivers/iommu/iommu.c                       |  39 ++++++--
 include/linux/generic_pt/iommu.h            |  34 +++++--
 5 files changed, 175 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h
index a627c26fa62d..17b72dbd7d51 100644
--- a/drivers/iommu/generic_pt/iommu_pt.h
+++ b/drivers/iommu/generic_pt/iommu_pt.h
@@ -477,6 +477,7 @@ struct pt_iommu_map_args {
 	pt_oaddr_t oa;
 	unsigned int leaf_pgsize_lg2;
 	unsigned int leaf_level;
+	pt_vaddr_t num_leaves;
 };
 
 /*
@@ -529,11 +530,15 @@ static int clear_contig(const struct pt_state *start_pts,
 static int __map_range_leaf(struct pt_range *range, void *arg,
 			    unsigned int level, struct pt_table_p *table)
 {
+	struct pt_iommu *iommu_table = iommu_from_common(range->common);
 	struct pt_state pts = pt_init(range, level, table);
 	struct pt_iommu_map_args *map = arg;
 	unsigned int leaf_pgsize_lg2 = map->leaf_pgsize_lg2;
 	unsigned int start_index;
 	pt_oaddr_t oa = map->oa;
+	unsigned int num_leaves;
+	unsigned int orig_end;
+	pt_vaddr_t last_va;
 	unsigned int step;
 	bool need_contig;
 	int ret = 0;
@@ -547,6 +552,15 @@ static int __map_range_leaf(struct pt_range *range, void *arg,
 
 	_pt_iter_first(&pts);
 	start_index = pts.index;
+	orig_end = pts.end_index;
+	if (pts.index + map->num_leaves < pts.end_index) {
+		/* Need to stop in the middle of the table to change sizes */
+		pts.end_index = pts.index + map->num_leaves;
+		num_leaves = 0;
+	} else {
+		num_leaves = map->num_leaves - (pts.end_index - pts.index);
+	}
+
 	do {
 		pts.type = pt_load_entry_raw(&pts);
 		if (pts.type != PT_ENTRY_EMPTY || need_contig) {
@@ -572,7 +586,40 @@ static int __map_range_leaf(struct pt_range *range, void *arg,
 	flush_writes_range(&pts, start_index, pts.index);
 
 	map->oa = oa;
-	return ret;
+	map->num_leaves = num_leaves;
+	if (ret || num_leaves)
+		return ret;
+
+	/* range->va is not valid if we reached the end of the table */
+	pts.index -= step;
+	pt_index_to_va(&pts);
+	pts.index += step;
+	last_va = range->va + log2_to_int(leaf_pgsize_lg2);
+
+	if (last_va - 1 == range->last_va) {
+		PT_WARN_ON(pts.index != orig_end);
+		return 0;
+	}
+
+	/*
+	 * Reached a point where the page size changed, compute the new
+	 * parameters.
+	 */
+	map->leaf_pgsize_lg2 = pt_compute_best_pgsize(
+		iommu_table->domain.pgsize_bitmap, last_va, range->last_va, oa);
+	map->leaf_level =
+		pt_pgsz_lg2_to_level(range->common, map->leaf_pgsize_lg2);
+	map->num_leaves = pt_pgsz_count(iommu_table->domain.pgsize_bitmap,
+					last_va, range->last_va, oa,
+					map->leaf_pgsize_lg2);
+
+	/* Didn't finish this table level, caller will repeat it */
+	if (pts.index != orig_end) {
+		if (pts.index != start_index)
+			pt_index_to_va(&pts);
+		return -EAGAIN;
+	}
+	return 0;
 }
 
 static int __map_range(struct pt_range *range, void *arg, unsigned int level,
@@ -595,14 +642,9 @@ static int __map_range(struct pt_range *range, void *arg, unsigned int level,
 			if (pts.type != PT_ENTRY_EMPTY)
 				return -EADDRINUSE;
 			ret = pt_iommu_new_table(&pts, &map->attrs);
-			if (ret) {
-				/*
-				 * Racing with another thread installing a table
-				 */
-				if (ret == -EAGAIN)
-					continue;
+			/* EAGAIN on a race will loop again */
+			if (ret)
 				return ret;
-			}
 		} else {
 			pts.table_lower = pt_table_ptr(&pts);
 			/*
@@ -626,10 +668,12 @@ static int __map_range(struct pt_range *range, void *arg, unsigned int level,
 		 * The already present table can possibly be shared with another
 		 * concurrent map.
 		 */
-		if (map->leaf_level == level - 1)
-			ret = pt_descend(&pts, arg, __map_range_leaf);
-		else
-			ret = pt_descend(&pts, arg, __map_range);
+		do {
+			if (map->leaf_level == level - 1)
+				ret = pt_descend(&pts, arg, __map_range_leaf);
+			else
+				ret = pt_descend(&pts, arg, __map_range);
+		} while (ret == -EAGAIN);
 		if (ret)
 			return ret;
 
@@ -637,6 +681,14 @@ static int __map_range(struct pt_range *range, void *arg, unsigned int level,
 		pt_index_to_va(&pts);
 		if (pts.index >= pts.end_index)
 			break;
+
+		/*
+		 * This level is currently running __map_range_leaf() which is
+		 * not correct if the target level has been updated to this
+		 * level. Have the caller invoke __map_range_leaf.
+		 */
+		if (map->leaf_level == level)
+			return -EAGAIN;
 	} while (true);
 	return 0;
 }
@@ -808,12 +860,13 @@ static int check_map_range(struct pt_iommu *iommu_table, struct pt_range *range,
 static int do_map(struct pt_range *range, struct pt_common *common,
 		  bool single_page, struct pt_iommu_map_args *map)
 {
+	int ret;
+
 	/*
 	 * The __map_single_page() fast path does not support DMA_INCOHERENT
 	 * flushing to keep its .text small.
 	 */
 	if (single_page && !pt_feature(common, PT_FEAT_DMA_INCOHERENT)) {
-		int ret;
 
 		ret = pt_walk_range(range, __map_single_page, map);
 		if (ret != -EAGAIN)
@@ -821,50 +874,25 @@ static int do_map(struct pt_range *range, struct pt_common *common,
 		/* EAGAIN falls through to the full path */
 	}
 
-	if (map->leaf_level == range->top_level)
-		return pt_walk_range(range, __map_range_leaf, map);
-	return pt_walk_range(range, __map_range, map);
+	do {
+		if (map->leaf_level == range->top_level)
+			ret = pt_walk_range(range, __map_range_leaf, map);
+		else
+			ret = pt_walk_range(range, __map_range, map);
+	} while (ret == -EAGAIN);
+	return ret;
 }
 
-/**
- * map_pages() - Install translation for an IOVA range
- * @domain: Domain to manipulate
- * @iova: IO virtual address to start
- * @paddr: Physical/Output address to start
- * @pgsize: Length of each page
- * @pgcount: Length of the range in pgsize units starting from @iova
- * @prot: A bitmap of IOMMU_READ/WRITE/CACHE/NOEXEC/MMIO
- * @gfp: GFP flags for any memory allocations
- * @mapped: Total bytes successfully mapped
- *
- * The range starting at IOVA will have paddr installed into it. The caller
- * must specify a valid pgsize and pgcount to segment the range into compatible
- * blocks.
- *
- * On error the caller will probably want to invoke unmap on the range from iova
- * up to the amount indicated by @mapped to return the table back to an
- * unchanged state.
- *
- * Context: The caller must hold a write range lock that includes the whole
- * range.
- *
- * Returns: -ERRNO on failure, 0 on success. The number of bytes of VA that were
- * mapped are added to @mapped, @mapped is not zerod first.
- */
-int DOMAIN_NS(map_pages)(struct iommu_domain *domain, unsigned long iova,
-			 phys_addr_t paddr, size_t pgsize, size_t pgcount,
-			 int prot, gfp_t gfp, size_t *mapped)
+static int NS(map_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
+			 phys_addr_t paddr, dma_addr_t len, unsigned int prot,
+			 gfp_t gfp, size_t *mapped)
 {
-	struct pt_iommu *iommu_table =
-		container_of(domain, struct pt_iommu, domain);
 	pt_vaddr_t pgsize_bitmap = iommu_table->domain.pgsize_bitmap;
 	struct pt_common *common = common_from_iommu(iommu_table);
 	struct iommu_iotlb_gather iotlb_gather;
-	pt_vaddr_t len = pgsize * pgcount;
 	struct pt_iommu_map_args map = {
 		.iotlb_gather = &iotlb_gather,
 		.oa = paddr,
-		.leaf_pgsize_lg2 = vaffs(pgsize),
 	};
 	bool single_page = false;
 	struct pt_range range;
@@ -892,13 +920,13 @@ int DOMAIN_NS(map_pages)(struct iommu_domain *domain, unsigned long iova,
 		return ret;
 
 	/* Calculate target page size and level for the leaves */
-	if (pt_has_system_page_size(common) && pgsize == PAGE_SIZE &&
-	    pgcount == 1) {
+	if (pt_has_system_page_size(common) && len == PAGE_SIZE) {
 		PT_WARN_ON(!(pgsize_bitmap & PAGE_SIZE));
 		if (log2_mod(iova | paddr, PAGE_SHIFT))
 			return -ENXIO;
 		map.leaf_pgsize_lg2 = PAGE_SHIFT;
 		map.leaf_level = 0;
+		map.num_leaves = 1;
 		single_page = true;
 	} else {
 		map.leaf_pgsize_lg2 = pt_compute_best_pgsize(
@@ -907,6 +935,9 @@ int DOMAIN_NS(map_pages)(struct iommu_domain *domain, unsigned long iova,
 			return -ENXIO;
 		map.leaf_level =
 			pt_pgsz_lg2_to_level(common, map.leaf_pgsize_lg2);
+		map.num_leaves = pt_pgsz_count(pgsize_bitmap, range.va,
+					       range.last_va, paddr,
+					       map.leaf_pgsize_lg2);
 	}
 
 	ret = check_map_range(iommu_table, &range, &map);
@@ -929,7 +960,6 @@ int DOMAIN_NS(map_pages)(struct iommu_domain *domain, unsigned long iova,
 	*mapped += map.oa - paddr;
 	return ret;
 }
-EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(map_pages), "GENERIC_PT_IOMMU");
 
 struct pt_unmap_args {
 	struct iommu_pages_list free_list;
@@ -1098,6 +1128,7 @@ static void NS(deinit)(struct pt_iommu *iommu_table)
 }
 
 static const struct pt_iommu_ops NS(ops) = {
+	.map_range = NS(map_range),
 	.unmap_range = NS(unmap_range),
 #if IS_ENABLED(CONFIG_IOMMUFD_DRIVER) && defined(pt_entry_is_write_dirty) && \
 	IS_ENABLED(CONFIG_IOMMUFD_TEST) && defined(pt_entry_make_write_dirty)
diff --git a/drivers/iommu/generic_pt/kunit_generic_pt.h b/drivers/iommu/generic_pt/kunit_generic_pt.h
index 68278bf15cfe..374e475f591e 100644
--- a/drivers/iommu/generic_pt/kunit_generic_pt.h
+++ b/drivers/iommu/generic_pt/kunit_generic_pt.h
@@ -312,6 +312,17 @@ static void test_best_pgsize(struct kunit *test)
 	}
 }
 
+static void test_pgsz_count(struct kunit *test)
+{
+	KUNIT_EXPECT_EQ(test,
+			pt_pgsz_count(SZ_4K, 0, SZ_1G - 1, 0, ilog2(SZ_4K)),
+			SZ_1G / SZ_4K);
+	KUNIT_EXPECT_EQ(test,
+			pt_pgsz_count(SZ_2M | SZ_4K, SZ_4K, SZ_1G - 1, SZ_4K,
+				      ilog2(SZ_4K)),
+			(SZ_2M - SZ_4K) / SZ_4K);
+}
+
 /*
  * Check that pt_install_table() and pt_table_pa() match
  */
@@ -770,6 +781,7 @@ static struct kunit_case generic_pt_test_cases[] = {
 	KUNIT_CASE_FMT(test_init),
 	KUNIT_CASE_FMT(test_bitops),
 	KUNIT_CASE_FMT(test_best_pgsize),
+	KUNIT_CASE_FMT(test_pgsz_count),
 	KUNIT_CASE_FMT(test_table_ptr),
 	KUNIT_CASE_FMT(test_max_va),
 	KUNIT_CASE_FMT(test_table_radix),
diff --git a/drivers/iommu/generic_pt/pt_iter.h b/drivers/iommu/generic_pt/pt_iter.h
index c0d8617cce29..3e45dbde6b83 100644
--- a/drivers/iommu/generic_pt/pt_iter.h
+++ b/drivers/iommu/generic_pt/pt_iter.h
@@ -569,6 +569,28 @@ static inline unsigned int pt_compute_best_pgsize(pt_vaddr_t pgsz_bitmap,
 	return pgsz_lg2;
 }
 
+/*
+ * Return the number of pgsize_lg2 leaf entries that can be mapped for
+ * va to oa. This accounts for any requirement to reduce or increase the page
+ * size across the VA range.
+ */
+static inline pt_vaddr_t pt_pgsz_count(pt_vaddr_t pgsz_bitmap, pt_vaddr_t va,
+				       pt_vaddr_t last_va, pt_oaddr_t oa,
+				       unsigned int pgsize_lg2)
+{
+	pt_vaddr_t len = last_va - va + 1;
+	pt_vaddr_t next_pgsizes = log2_set_mod(pgsz_bitmap, 0, pgsize_lg2 + 1);
+
+	if (next_pgsizes) {
+		unsigned int next_pgsize_lg2 = vaffs(next_pgsizes);
+
+		if (log2_mod(va ^ oa, next_pgsize_lg2) == 0)
+			len = min(len, log2_set_mod_max(va, next_pgsize_lg2) -
+					       va + 1);
+	}
+	return log2_div(len, pgsize_lg2);
+}
+
 #define _PT_MAKE_CALL_LEVEL(fn)                                          \
 	static __always_inline int fn(struct pt_range *range, void *arg, \
 				      unsigned int level,                \
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f68269707101..33cee64686e3 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2569,14 +2569,14 @@ out_set_count:
 	return pgsize;
 }
 
-int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
-		phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int __iommu_map_domain_pgtbl(struct iommu_domain *domain,
+				    unsigned long iova, phys_addr_t paddr,
+				    size_t size, int prot, gfp_t gfp)
 {
 	const struct iommu_domain_ops *ops = domain->ops;
 	unsigned long orig_iova = iova;
 	unsigned int min_pagesz;
 	size_t orig_size = size;
-	phys_addr_t orig_paddr = paddr;
 	int ret = 0;
 
 	might_sleep_if(gfpflags_allow_blocking(gfp));
@@ -2633,12 +2633,9 @@ int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
 	/* unroll mapping in case something went wrong */
 	if (ret) {
 		iommu_unmap(domain, orig_iova, orig_size - size);
-	} else {
-		trace_map(orig_iova, orig_paddr, orig_size);
-		iommu_debug_map(domain, orig_paddr, orig_size);
+		return ret;
 	}
-
-	return ret;
+	return 0;
 }
 
 int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size)
@@ -2650,6 +2647,32 @@ int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size)
 	return ops->iotlb_sync_map(domain, iova, size);
 }
 
+int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
+		phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+	struct pt_iommu *pt = iommupt_from_domain(domain);
+	int ret;
+
+	if (pt) {
+		size_t mapped = 0;
+
+		ret = pt->ops->map_range(pt, iova, paddr, size, prot, gfp,
+					 &mapped);
+		if (ret) {
+			iommu_unmap(domain, iova, mapped);
+			return ret;
+		}
+		return 0;
+	}
+	ret = __iommu_map_domain_pgtbl(domain, iova, paddr, size, prot, gfp);
+	if (!ret)
+		return ret;
+
+	trace_map(iova, paddr, size);
+	iommu_debug_map(domain, paddr, size);
+	return 0;
+}
+
 int iommu_map(struct iommu_domain *domain, unsigned long iova,
 	      phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
index 0da971134a37..dd0edd02a48a 100644
--- a/include/linux/generic_pt/iommu.h
+++ b/include/linux/generic_pt/iommu.h
@@ -87,6 +87,33 @@ struct pt_iommu_info {
 };
 
 struct pt_iommu_ops {
+	/**
+	 * @map_range: Install translation for an IOVA range
+	 * @iommu_table: Table to manipulate
+	 * @iova: IO virtual address to start
+	 * @paddr: Physical/Output address to start
+	 * @len: Length of the range starting from @iova
+	 * @prot: A bitmap of IOMMU_READ/WRITE/CACHE/NOEXEC/MMIO
+	 * @gfp: GFP flags for any memory allocations
+	 *
+	 * The range starting at IOVA will have paddr installed into it. The
+	 * rage is automatically segmented into optimally sized table entries,
+	 * and can have any valid alignment.
+	 *
+	 * On error the caller will probably want to invoke unmap on the range
+	 * from iova up to the amount indicated by @mapped to return the table
+	 * back to an unchanged state.
+	 *
+	 * Context: The caller must hold a write range lock that includes
+	 * the whole range.
+	 *
+	 * Returns: -ERRNO on failure, 0 on success. The number of bytes of VA
+	 * that were mapped are added to @mapped, @mapped is not zerod first.
+	 */
+	int (*map_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
+			 phys_addr_t paddr, dma_addr_t len, unsigned int prot,
+			 gfp_t gfp, size_t *mapped);
+
 	/**
 	 * @unmap_range: Make a range of IOVA empty/not present
 	 * @iommu_table: Table to manipulate
@@ -224,10 +251,6 @@ struct pt_iommu_cfg {
 #define IOMMU_PROTOTYPES(fmt)                                                  \
 	phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \
 						  dma_addr_t iova);            \
-	int pt_iommu_##fmt##_map_pages(struct iommu_domain *domain,            \
-				       unsigned long iova, phys_addr_t paddr,  \
-				       size_t pgsize, size_t pgcount,          \
-				       int prot, gfp_t gfp, size_t *mapped);   \
 	int pt_iommu_##fmt##_read_and_clear_dirty(                             \
 		struct iommu_domain *domain, unsigned long iova, size_t size,  \
 		unsigned long flags, struct iommu_dirty_bitmap *dirty);        \
@@ -248,8 +271,7 @@ struct pt_iommu_cfg {
  * iommu_pt
  */
 #define IOMMU_PT_DOMAIN_OPS(fmt)                        \
-	.iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \
-	.map_pages = &pt_iommu_##fmt##_map_pages
+	.iova_to_phys = &pt_iommu_##fmt##_iova_to_phys
 #define IOMMU_PT_DIRTY_OPS(fmt) \
 	.read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty
 
-- 
cgit v1.2.3


From a82efb8747d1b8a7c0a377dc79c2aac204eae788 Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <skolothumtho@nvidia.com>
Date: Tue, 17 Mar 2026 11:16:02 +0000
Subject: iommu: Add device ATS supported capability

PCIe ATS may be disabled by platform firmware, root complex limitations,
or kernel policy even when a device advertises the ATS capability in its
PCI configuration space.

Add a new IOMMU_CAP_PCI_ATS_SUPPORTED capability to allow IOMMU drivers
to report the effective ATS decision for a device.

When this capability is true for a device, ATS may be enabled for that
device, but it does not imply that ATS is currently enabled.

A subsequent patch will extend iommufd to expose the effective ATS
status to userspace.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd/iommu.c                   | 6 ++++++
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 3 +++
 drivers/iommu/intel/iommu.c                 | 2 ++
 include/linux/iommu.h                       | 2 ++
 4 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 81c4d7733872..f1814fee5182 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2985,6 +2985,12 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
 
 		return amd_iommu_hd_support(iommu);
 	}
+	case IOMMU_CAP_PCI_ATS_SUPPORTED: {
+		struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
+
+		return amd_iommu_iotlb_sup &&
+			 (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP);
+	}
 	default:
 		break;
 	}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 4d00d796f078..dec5cac98f7c 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -107,6 +107,7 @@ static const char * const event_class_str[] = {
 };
 
 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
+static bool arm_smmu_ats_supported(struct arm_smmu_master *master);
 
 static void parse_driver_options(struct arm_smmu_device *smmu)
 {
@@ -2494,6 +2495,8 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
 		return true;
 	case IOMMU_CAP_DIRTY_TRACKING:
 		return arm_smmu_dbm_capable(master->smmu);
+	case IOMMU_CAP_PCI_ATS_SUPPORTED:
+		return arm_smmu_ats_supported(master);
 	default:
 		return false;
 	}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index ef7613b177b9..5dca8e525c73 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3220,6 +3220,8 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
 		return ecap_sc_support(info->iommu->ecap);
 	case IOMMU_CAP_DIRTY_TRACKING:
 		return ssads_supported(info->iommu);
+	case IOMMU_CAP_PCI_ATS_SUPPORTED:
+		return info->ats_supported;
 	default:
 		return false;
 	}
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7ca648c01336..a904821ed169 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -272,6 +272,8 @@ enum iommu_cap {
 	 */
 	IOMMU_CAP_DEFERRED_FLUSH,
 	IOMMU_CAP_DIRTY_TRACKING,	/* IOMMU supports dirty tracking */
+	/* ATS is supported and may be enabled for this device */
+	IOMMU_CAP_PCI_ATS_SUPPORTED,
 };
 
 /* These are the possible reserved region types */
-- 
cgit v1.2.3


From 2727d44f5d5bc3f8e55a6a0ccf24d8105a5a400e Mon Sep 17 00:00:00 2001
From: Kit Dallege <xaum.io@gmail.com>
Date: Sun, 15 Mar 2026 18:09:31 +0100
Subject: writeback: fix kernel-doc function name mismatch for wb_put_many()

The kernel-doc comment says wb_put but the actual function is
wb_put_many. Fix the name to match.

Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Kit Dallege <xaum.io@gmail.com>
Link: https://patch.msgid.link/20260315170931.65852-1-xaum.io@gmail.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/backing-dev-defs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index c88fd4d37d1f..a06b93446d10 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -237,7 +237,7 @@ static inline void wb_get(struct bdi_writeback *wb)
 }
 
 /**
- * wb_put - decrement a wb's refcount
+ * wb_put_many - decrement a wb's refcount
  * @wb: bdi_writeback to put
  * @nr: number of references to put
  */
-- 
cgit v1.2.3


From 9577c74c96f88d807d1ba005adbf5952e7127e55 Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@linux.intel.com>
Date: Thu, 12 Mar 2026 18:51:41 -0700
Subject: platform/x86/intel/vsec: Make driver_data info const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Treat PCI id->driver_data (intel_vsec_platform_info) as read-only by making
vsec_priv->info a const pointer and updating all function signatures to
accept const intel_vsec_platform_info *.

This improves const-correctness and clarifies that the platform info data
from the driver_data table is not meant to be modified at runtime.

No functional changes intended.

Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Link: https://patch.msgid.link/20260313015202.3660072-3-david.e.box@linux.intel.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 drivers/platform/x86/intel/vsec.c | 20 ++++++++++----------
 include/linux/intel_vsec.h        |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c
index 46966edca03b..e0096be605d9 100644
--- a/drivers/platform/x86/intel/vsec.c
+++ b/drivers/platform/x86/intel/vsec.c
@@ -42,7 +42,7 @@ enum vsec_device_state {
 };
 
 struct vsec_priv {
-	struct intel_vsec_platform_info *info;
+	const struct intel_vsec_platform_info *info;
 	struct device *suppliers[VSEC_FEATURE_COUNT];
 	struct oobmsm_plat_info plat_info;
 	enum vsec_device_state state[VSEC_FEATURE_COUNT];
@@ -270,7 +270,7 @@ cleanup_aux:
 EXPORT_SYMBOL_NS_GPL(intel_vsec_add_aux, "INTEL_VSEC");
 
 static int intel_vsec_add_dev(struct pci_dev *pdev, struct intel_vsec_header *header,
-			      struct intel_vsec_platform_info *info,
+			      const struct intel_vsec_platform_info *info,
 			      unsigned long cap_id, u64 base_addr)
 {
 	struct intel_vsec_device __free(kfree) *intel_vsec_dev = NULL;
@@ -406,7 +406,7 @@ static int get_cap_id(u32 header_id, unsigned long *cap_id)
 
 static int intel_vsec_register_device(struct pci_dev *pdev,
 				      struct intel_vsec_header *header,
-				      struct intel_vsec_platform_info *info,
+				      const struct intel_vsec_platform_info *info,
 				      u64 base_addr)
 {
 	const struct vsec_feature_dependency *consumer_deps;
@@ -452,7 +452,7 @@ static int intel_vsec_register_device(struct pci_dev *pdev,
 }
 
 static bool intel_vsec_walk_header(struct pci_dev *pdev,
-				   struct intel_vsec_platform_info *info)
+				   const struct intel_vsec_platform_info *info)
 {
 	struct intel_vsec_header **header = info->headers;
 	bool have_devices = false;
@@ -468,7 +468,7 @@ static bool intel_vsec_walk_header(struct pci_dev *pdev,
 }
 
 static bool intel_vsec_walk_dvsec(struct pci_dev *pdev,
-				  struct intel_vsec_platform_info *info)
+				  const struct intel_vsec_platform_info *info)
 {
 	bool have_devices = false;
 	int pos = 0;
@@ -519,7 +519,7 @@ static bool intel_vsec_walk_dvsec(struct pci_dev *pdev,
 }
 
 static bool intel_vsec_walk_vsec(struct pci_dev *pdev,
-				 struct intel_vsec_platform_info *info)
+				 const struct intel_vsec_platform_info *info)
 {
 	bool have_devices = false;
 	int pos = 0;
@@ -565,7 +565,7 @@ static bool intel_vsec_walk_vsec(struct pci_dev *pdev,
 }
 
 int intel_vsec_register(struct pci_dev *pdev,
-			 struct intel_vsec_platform_info *info)
+			const struct intel_vsec_platform_info *info)
 {
 	if (!pdev || !info || !info->headers)
 		return -EINVAL;
@@ -578,7 +578,7 @@ int intel_vsec_register(struct pci_dev *pdev,
 EXPORT_SYMBOL_NS_GPL(intel_vsec_register, "INTEL_VSEC");
 
 static bool intel_vsec_get_features(struct pci_dev *pdev,
-				    struct intel_vsec_platform_info *info)
+				    const struct intel_vsec_platform_info *info)
 {
 	bool found = false;
 
@@ -622,7 +622,7 @@ static void intel_vsec_skip_missing_dependencies(struct pci_dev *pdev)
 
 static int intel_vsec_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
-	struct intel_vsec_platform_info *info;
+	const struct intel_vsec_platform_info *info;
 	struct vsec_priv *priv;
 	int num_caps, ret;
 	int run_once = 0;
@@ -633,7 +633,7 @@ static int intel_vsec_pci_probe(struct pci_dev *pdev, const struct pci_device_id
 		return ret;
 
 	pci_save_state(pdev);
-	info = (struct intel_vsec_platform_info *)id->driver_data;
+	info = (const struct intel_vsec_platform_info *)id->driver_data;
 	if (!info)
 		return -EINVAL;
 
diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h
index 1a0f357c2427..d551174b0049 100644
--- a/include/linux/intel_vsec.h
+++ b/include/linux/intel_vsec.h
@@ -200,13 +200,13 @@ static inline struct intel_vsec_device *auxdev_to_ivdev(struct auxiliary_device
 
 #if IS_ENABLED(CONFIG_INTEL_VSEC)
 int intel_vsec_register(struct pci_dev *pdev,
-			 struct intel_vsec_platform_info *info);
+			const struct intel_vsec_platform_info *info);
 int intel_vsec_set_mapping(struct oobmsm_plat_info *plat_info,
 			   struct intel_vsec_device *vsec_dev);
 struct oobmsm_plat_info *intel_vsec_get_mapping(struct pci_dev *pdev);
 #else
 static inline int intel_vsec_register(struct pci_dev *pdev,
-				       struct intel_vsec_platform_info *info)
+				      const struct intel_vsec_platform_info *info)
 {
 	return -ENODEV;
 }
-- 
cgit v1.2.3


From c62fd96a04e4a7b847448f97ecfe9f3fe706e7b3 Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@linux.intel.com>
Date: Thu, 12 Mar 2026 18:51:42 -0700
Subject: platform/x86/intel/vsec: Decouple add/link helpers from PCI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This refactor prepares for adding ACPI-enumerated PMT endpoints. While
intel_vsec is bound to PCI today, some helpers are used by code that will
also register PMT endpoints from non-PCI (ACPI) paths. Clean up
PCI-specific plumbing where it isn’t strictly required and rely on generic
struct device where possible.

Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Link: https://patch.msgid.link/20260313015202.3660072-4-david.e.box@linux.intel.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 drivers/platform/x86/intel/vsec.c      | 13 +++++++++----
 drivers/platform/x86/intel/vsec_tpmi.c |  2 +-
 include/linux/intel_vsec.h             |  2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c
index e0096be605d9..938648b9ef09 100644
--- a/drivers/platform/x86/intel/vsec.c
+++ b/drivers/platform/x86/intel/vsec.c
@@ -158,18 +158,23 @@ static bool vsec_driver_present(int cap_id)
  */
 static const struct pci_device_id intel_vsec_pci_ids[];
 
-static int intel_vsec_link_devices(struct pci_dev *pdev, struct device *dev,
+static int intel_vsec_link_devices(struct device *parent, struct device *dev,
 				   int consumer_id)
 {
 	const struct vsec_feature_dependency *deps;
 	enum vsec_device_state *state;
 	struct device **suppliers;
 	struct vsec_priv *priv;
+	struct pci_dev *pdev;
 	int supplier_id;
 
 	if (!consumer_id)
 		return 0;
 
+	if (!dev_is_pci(parent))
+		return 0;
+
+	pdev = to_pci_dev(parent);
 	if (!pci_match_id(intel_vsec_pci_ids, pdev))
 		return 0;
 
@@ -204,7 +209,7 @@ static int intel_vsec_link_devices(struct pci_dev *pdev, struct device *dev,
 	return 0;
 }
 
-int intel_vsec_add_aux(struct pci_dev *pdev, struct device *parent,
+int intel_vsec_add_aux(struct device *parent,
 		       struct intel_vsec_device *intel_vsec_dev,
 		       const char *name)
 {
@@ -252,7 +257,7 @@ int intel_vsec_add_aux(struct pci_dev *pdev, struct device *parent,
 	if (ret)
 		goto cleanup_aux;
 
-	ret = intel_vsec_link_devices(pdev, &auxdev->dev, intel_vsec_dev->cap_id);
+	ret = intel_vsec_link_devices(parent, &auxdev->dev, intel_vsec_dev->cap_id);
 	if (ret)
 		goto cleanup_aux;
 
@@ -343,7 +348,7 @@ static int intel_vsec_add_dev(struct pci_dev *pdev, struct intel_vsec_header *he
 	 * Pass the ownership of intel_vsec_dev and resource within it to
 	 * intel_vsec_add_aux()
 	 */
-	return intel_vsec_add_aux(pdev, parent, no_free_ptr(intel_vsec_dev),
+	return intel_vsec_add_aux(parent, no_free_ptr(intel_vsec_dev),
 				  intel_vsec_name(header->id));
 }
 
diff --git a/drivers/platform/x86/intel/vsec_tpmi.c b/drivers/platform/x86/intel/vsec_tpmi.c
index 98846e88d3d0..2298b6361094 100644
--- a/drivers/platform/x86/intel/vsec_tpmi.c
+++ b/drivers/platform/x86/intel/vsec_tpmi.c
@@ -655,7 +655,7 @@ static int tpmi_create_device(struct intel_tpmi_info *tpmi_info,
 	 * feature_vsec_dev and res memory are also freed as part of
 	 * device deletion.
 	 */
-	return intel_vsec_add_aux(vsec_dev->pcidev, &vsec_dev->auxdev.dev,
+	return intel_vsec_add_aux(&vsec_dev->auxdev.dev,
 				  feature_vsec_dev, feature_id_name);
 }
 
diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h
index d551174b0049..49a746ec0128 100644
--- a/include/linux/intel_vsec.h
+++ b/include/linux/intel_vsec.h
@@ -184,7 +184,7 @@ struct pmt_feature_group {
 	struct telemetry_region	regions[];
 };
 
-int intel_vsec_add_aux(struct pci_dev *pdev, struct device *parent,
+int intel_vsec_add_aux(struct device *parent,
 		       struct intel_vsec_device *intel_vsec_dev,
 		       const char *name);
 
-- 
cgit v1.2.3


From 353042d54d82f6c46449f0ee38c244b5a13c1fe4 Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@linux.intel.com>
Date: Thu, 12 Mar 2026 18:51:43 -0700
Subject: platform/x86/intel/vsec: Switch exported helpers from pci_dev to
 device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Preparatory refactor for ACPI-enumerated PMT endpoints. Several exported
PMT/VSEC interfaces and structs carried struct pci_dev * even though
callers only need a generic struct device. Move those to struct device * so
the same APIs work for PCI and ACPI parents.

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Link: https://patch.msgid.link/20260313015202.3660072-5-david.e.box@linux.intel.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_debugfs.c                  |  2 +-
 drivers/gpu/drm/xe/xe_hwmon.c                    |  2 +-
 drivers/gpu/drm/xe/xe_vsec.c                     |  7 ++--
 drivers/gpu/drm/xe/xe_vsec.h                     |  4 +--
 drivers/platform/x86/intel/pmc/core.c            |  4 +--
 drivers/platform/x86/intel/pmc/ssram_telemetry.c |  2 +-
 drivers/platform/x86/intel/pmt/class.c           |  8 ++---
 drivers/platform/x86/intel/pmt/class.h           |  5 +--
 drivers/platform/x86/intel/pmt/discovery.c       |  4 +--
 drivers/platform/x86/intel/pmt/telemetry.c       | 13 +++----
 drivers/platform/x86/intel/pmt/telemetry.h       | 12 +++----
 drivers/platform/x86/intel/sdsi.c                |  5 +--
 drivers/platform/x86/intel/vsec.c                | 44 ++++++++++++++----------
 drivers/platform/x86/intel/vsec_tpmi.c           |  6 ++--
 include/linux/intel_vsec.h                       | 13 +++----
 15 files changed, 71 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 844cfafe1ec7..ad2d8f179eb6 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -45,7 +45,7 @@ static void read_residency_counter(struct xe_device *xe, struct xe_mmio *mmio,
 	u64 residency = 0;
 	int ret;
 
-	ret = xe_pmt_telem_read(to_pci_dev(xe->drm.dev),
+	ret = xe_pmt_telem_read(xe->drm.dev,
 				xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID),
 				&residency, offset, sizeof(residency));
 	if (ret != sizeof(residency)) {
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 0fd4d4f1014a..92e423a339f1 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -506,7 +506,7 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy)
 	if (hwmon->xe->info.platform == XE_BATTLEMAGE) {
 		u64 pmt_val;
 
-		ret = xe_pmt_telem_read(to_pci_dev(hwmon->xe->drm.dev),
+		ret = xe_pmt_telem_read(hwmon->xe->drm.dev,
 					xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID),
 					&pmt_val, BMG_ENERGY_STATUS_PMT_OFFSET,	sizeof(pmt_val));
 		if (ret != sizeof(pmt_val)) {
diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c
index 4ebb4dbe1c9b..a9baf0bfe572 100644
--- a/drivers/gpu/drm/xe/xe_vsec.c
+++ b/drivers/gpu/drm/xe/xe_vsec.c
@@ -140,10 +140,10 @@ static int xe_guid_decode(u32 guid, int *index, u32 *offset)
 	return 0;
 }
 
-int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset,
+int xe_pmt_telem_read(struct device *dev, u32 guid, u64 *data, loff_t user_offset,
 		      u32 count)
 {
-	struct xe_device *xe = pdev_to_xe_device(pdev);
+	struct xe_device *xe = kdev_to_xe_device(dev);
 	void __iomem *telem_addr = xe->mmio.regs + BMG_TELEMETRY_OFFSET;
 	u32 mem_region;
 	u32 offset;
@@ -198,7 +198,6 @@ void xe_vsec_init(struct xe_device *xe)
 {
 	struct intel_vsec_platform_info *info;
 	struct device *dev = xe->drm.dev;
-	struct pci_dev *pdev = to_pci_dev(dev);
 	enum xe_vsec platform;
 
 	platform = get_platform_info(xe);
@@ -221,6 +220,6 @@ void xe_vsec_init(struct xe_device *xe)
 	 * Register a VSEC. Cleanup is handled using device managed
 	 * resources.
 	 */
-	intel_vsec_register(pdev, info);
+	intel_vsec_register(dev, info);
 }
 MODULE_IMPORT_NS("INTEL_VSEC");
diff --git a/drivers/gpu/drm/xe/xe_vsec.h b/drivers/gpu/drm/xe/xe_vsec.h
index dabfb4e02d70..a25b4e6e681b 100644
--- a/drivers/gpu/drm/xe/xe_vsec.h
+++ b/drivers/gpu/drm/xe/xe_vsec.h
@@ -6,10 +6,10 @@
 
 #include <linux/types.h>
 
-struct pci_dev;
+struct device;
 struct xe_device;
 
 void xe_vsec_init(struct xe_device *xe);
-int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, u32 count);
+int xe_pmt_telem_read(struct device *dev, u32 guid, u64 *data, loff_t user_offset, u32 count);
 
 #endif
diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
index 02b303418d18..d91e1ab842d6 100644
--- a/drivers/platform/x86/intel/pmc/core.c
+++ b/drivers/platform/x86/intel/pmc/core.c
@@ -1315,7 +1315,7 @@ static struct telem_endpoint *pmc_core_register_endpoint(struct pci_dev *pcidev,
 	unsigned int i;
 
 	for (i = 0; guids[i]; i++) {
-		ep = pmt_telem_find_and_register_endpoint(pcidev, guids[i], 0);
+		ep = pmt_telem_find_and_register_endpoint(&pcidev->dev, guids[i], 0);
 		if (!IS_ERR(ep))
 			return ep;
 	}
@@ -1600,7 +1600,7 @@ static int pmc_core_get_telem_info(struct pmc_dev *pmcdev, struct pmc_dev_info *
 		if (!pmc->map->lpm_req_guid)
 			return -ENXIO;
 
-		ep = pmt_telem_find_and_register_endpoint(pcidev, pmc->map->lpm_req_guid, 0);
+		ep = pmt_telem_find_and_register_endpoint(&pcidev->dev, pmc->map->lpm_req_guid, 0);
 		if (IS_ERR(ep)) {
 			dev_dbg(&pmcdev->pdev->dev, "couldn't get telem endpoint %pe", ep);
 			return -EPROBE_DEFER;
diff --git a/drivers/platform/x86/intel/pmc/ssram_telemetry.c b/drivers/platform/x86/intel/pmc/ssram_telemetry.c
index 03fad9331fc0..6f6e83e70fc5 100644
--- a/drivers/platform/x86/intel/pmc/ssram_telemetry.c
+++ b/drivers/platform/x86/intel/pmc/ssram_telemetry.c
@@ -60,7 +60,7 @@ pmc_ssram_telemetry_add_pmt(struct pci_dev *pcidev, u64 ssram_base, void __iomem
 	info.base_addr = ssram_base;
 	info.parent = &pcidev->dev;
 
-	return intel_vsec_register(pcidev, &info);
+	return intel_vsec_register(&pcidev->dev, &info);
 }
 
 static inline u64 get_base(void __iomem *addr, u32 offset)
diff --git a/drivers/platform/x86/intel/pmt/class.c b/drivers/platform/x86/intel/pmt/class.c
index be3c8d9e4fff..b4c9964df807 100644
--- a/drivers/platform/x86/intel/pmt/class.c
+++ b/drivers/platform/x86/intel/pmt/class.c
@@ -60,11 +60,11 @@ pmt_memcpy64_fromio(void *to, const u64 __iomem *from, size_t count)
 	return count;
 }
 
-int pmt_telem_read_mmio(struct pci_dev *pdev, struct pmt_callbacks *cb, u32 guid, void *buf,
+int pmt_telem_read_mmio(struct device *dev, struct pmt_callbacks *cb, u32 guid, void *buf,
 			void __iomem *addr, loff_t off, u32 count)
 {
 	if (cb && cb->read_telem)
-		return cb->read_telem(pdev, guid, buf, off, count);
+		return cb->read_telem(dev, guid, buf, off, count);
 
 	addr += off;
 
@@ -99,7 +99,7 @@ intel_pmt_read(struct file *filp, struct kobject *kobj,
 	if (count > entry->size - off)
 		count = entry->size - off;
 
-	count = pmt_telem_read_mmio(entry->pcidev, entry->cb, entry->header.guid, buf,
+	count = pmt_telem_read_mmio(entry->ep->dev, entry->cb, entry->header.guid, buf,
 				    entry->base, off, count);
 
 	return count;
@@ -208,7 +208,7 @@ static int intel_pmt_populate_entry(struct intel_pmt_entry *entry,
 				    struct intel_vsec_device *ivdev,
 				    struct resource *disc_res)
 {
-	struct pci_dev *pci_dev = ivdev->pcidev;
+	struct pci_dev *pci_dev = to_pci_dev(ivdev->dev);
 	struct device *dev = &ivdev->auxdev.dev;
 	struct intel_pmt_header *header = &entry->header;
 	u8 bir;
diff --git a/drivers/platform/x86/intel/pmt/class.h b/drivers/platform/x86/intel/pmt/class.h
index 3c5ad5f52bca..1ae56a5baad2 100644
--- a/drivers/platform/x86/intel/pmt/class.h
+++ b/drivers/platform/x86/intel/pmt/class.h
@@ -19,11 +19,12 @@
 #define GET_BIR(v)		((v) & GENMASK(2, 0))
 #define GET_ADDRESS(v)		((v) & GENMASK(31, 3))
 
+struct device;
 struct pci_dev;
 extern struct class intel_pmt_class;
 
 struct telem_endpoint {
-	struct pci_dev		*pcidev;
+	struct device		*dev;
 	struct telem_header	header;
 	struct pmt_callbacks	*cb;
 	void __iomem		*base;
@@ -65,7 +66,7 @@ struct intel_pmt_namespace {
 				struct intel_pmt_entry *entry);
 };
 
-int pmt_telem_read_mmio(struct pci_dev *pdev, struct pmt_callbacks *cb, u32 guid, void *buf,
+int pmt_telem_read_mmio(struct device *dev, struct pmt_callbacks *cb, u32 guid, void *buf,
 			void __iomem *addr, loff_t off, u32 count);
 bool intel_pmt_is_early_client_hw(struct device *dev);
 int intel_pmt_dev_create(struct intel_pmt_entry *entry,
diff --git a/drivers/platform/x86/intel/pmt/discovery.c b/drivers/platform/x86/intel/pmt/discovery.c
index e500aa327d23..c482368bfaae 100644
--- a/drivers/platform/x86/intel/pmt/discovery.c
+++ b/drivers/platform/x86/intel/pmt/discovery.c
@@ -542,7 +542,7 @@ static int pmt_features_probe(struct auxiliary_device *auxdev, const struct auxi
 	if (!priv)
 		return -ENOMEM;
 
-	priv->parent = &ivdev->pcidev->dev;
+	priv->parent = ivdev->dev;
 	auxiliary_set_drvdata(auxdev, priv);
 
 	priv->dev = device_create(&intel_pmt_class, &auxdev->dev, MKDEV(0, 0), priv,
@@ -609,7 +609,7 @@ void intel_pmt_get_features(struct intel_pmt_entry *entry)
 
 	mutex_lock(&feature_list_lock);
 	list_for_each_entry(feature, &pmt_feature_list, list) {
-		if (feature->priv->parent != &entry->ep->pcidev->dev)
+		if (feature->priv->parent != entry->ep->dev)
 			continue;
 
 		pmt_get_features(entry, feature);
diff --git a/drivers/platform/x86/intel/pmt/telemetry.c b/drivers/platform/x86/intel/pmt/telemetry.c
index a52803bfe124..bdc7c24a3678 100644
--- a/drivers/platform/x86/intel/pmt/telemetry.c
+++ b/drivers/platform/x86/intel/pmt/telemetry.c
@@ -112,7 +112,7 @@ static int pmt_telem_add_endpoint(struct intel_vsec_device *ivdev,
 		return -ENOMEM;
 
 	ep = entry->ep;
-	ep->pcidev = ivdev->pcidev;
+	ep->dev = ivdev->dev;
 	ep->header.access_type = entry->header.access_type;
 	ep->header.guid = entry->header.guid;
 	ep->header.base_offset = entry->header.base_offset;
@@ -204,7 +204,7 @@ int pmt_telem_get_endpoint_info(int devid, struct telem_endpoint_info *info)
 		goto unlock;
 	}
 
-	info->pdev = entry->ep->pcidev;
+	info->dev = entry->ep->dev;
 	info->header = entry->ep->header;
 
 unlock:
@@ -218,9 +218,10 @@ static int pmt_copy_region(struct telemetry_region *region,
 			   struct intel_pmt_entry *entry)
 {
 
+	struct pci_dev *pdev = to_pci_dev(entry->ep->dev);
 	struct oobmsm_plat_info *plat_info;
 
-	plat_info = intel_vsec_get_mapping(entry->ep->pcidev);
+	plat_info = intel_vsec_get_mapping(pdev);
 	if (IS_ERR(plat_info))
 		return PTR_ERR(plat_info);
 
@@ -308,7 +309,7 @@ int pmt_telem_read(struct telem_endpoint *ep, u32 id, u64 *data, u32 count)
 	if (offset + NUM_BYTES_QWORD(count) > size)
 		return -EINVAL;
 
-	pmt_telem_read_mmio(ep->pcidev, ep->cb, ep->header.guid, data, ep->base, offset,
+	pmt_telem_read_mmio(ep->dev, ep->cb, ep->header.guid, data, ep->base, offset,
 			    NUM_BYTES_QWORD(count));
 
 	return ep->present ? 0 : -EPIPE;
@@ -335,7 +336,7 @@ int pmt_telem_read32(struct telem_endpoint *ep, u32 id, u32 *data, u32 count)
 EXPORT_SYMBOL_NS_GPL(pmt_telem_read32, "INTEL_PMT_TELEMETRY");
 
 struct telem_endpoint *
-pmt_telem_find_and_register_endpoint(struct pci_dev *pcidev, u32 guid, u16 pos)
+pmt_telem_find_and_register_endpoint(struct device *dev, u32 guid, u16 pos)
 {
 	int devid = 0;
 	int inst = 0;
@@ -348,7 +349,7 @@ pmt_telem_find_and_register_endpoint(struct pci_dev *pcidev, u32 guid, u16 pos)
 		if (err)
 			return ERR_PTR(err);
 
-		if (ep_info.header.guid == guid && ep_info.pdev == pcidev) {
+		if (ep_info.header.guid == guid && ep_info.dev == dev) {
 			if (inst == pos)
 				return pmt_telem_register_endpoint(devid);
 			++inst;
diff --git a/drivers/platform/x86/intel/pmt/telemetry.h b/drivers/platform/x86/intel/pmt/telemetry.h
index d45af5512b4e..0f88c5e7d90e 100644
--- a/drivers/platform/x86/intel/pmt/telemetry.h
+++ b/drivers/platform/x86/intel/pmt/telemetry.h
@@ -6,8 +6,8 @@
 #define PMT_TELEM_TELEMETRY	0
 #define PMT_TELEM_CRASHLOG	1
 
+struct device;
 struct telem_endpoint;
-struct pci_dev;
 
 struct telem_header {
 	u8	access_type;
@@ -17,7 +17,7 @@ struct telem_header {
 };
 
 struct telem_endpoint_info {
-	struct pci_dev		*pdev;
+	struct device		*dev;
 	struct telem_header	header;
 };
 
@@ -71,8 +71,8 @@ int pmt_telem_get_endpoint_info(int devid, struct telem_endpoint_info *info);
 
 /**
  * pmt_telem_find_and_register_endpoint() - Get a telemetry endpoint from
- * pci_dev device, guid and pos
- * @pdev:   PCI device inside the Intel vsec
+ * device, guid and pos
+ * @dev:    device inside the Intel vsec
  * @guid:   GUID of the telemetry space
  * @pos:    Instance of the guid
  *
@@ -80,8 +80,8 @@ int pmt_telem_get_endpoint_info(int devid, struct telem_endpoint_info *info);
  * * endpoint    - On success returns pointer to the telemetry endpoint
  * * -ENXIO      - telemetry endpoint not found
  */
-struct telem_endpoint *pmt_telem_find_and_register_endpoint(struct pci_dev *pcidev,
-				u32 guid, u16 pos);
+struct telem_endpoint *
+pmt_telem_find_and_register_endpoint(struct device *dev, u32 guid, u16 pos);
 
 /**
  * pmt_telem_read() - Read qwords from counter sram using sample id
diff --git a/drivers/platform/x86/intel/sdsi.c b/drivers/platform/x86/intel/sdsi.c
index da75f53d0bcc..d7e37d4ace23 100644
--- a/drivers/platform/x86/intel/sdsi.c
+++ b/drivers/platform/x86/intel/sdsi.c
@@ -599,13 +599,14 @@ static int sdsi_get_layout(struct sdsi_priv *priv, struct disc_table *table)
 	return 0;
 }
 
-static int sdsi_map_mbox_registers(struct sdsi_priv *priv, struct pci_dev *parent,
+static int sdsi_map_mbox_registers(struct sdsi_priv *priv, struct device *dev,
 				   struct disc_table *disc_table, struct resource *disc_res)
 {
 	u32 access_type = FIELD_GET(DT_ACCESS_TYPE, disc_table->access_info);
 	u32 size = FIELD_GET(DT_SIZE, disc_table->access_info);
 	u32 tbir = FIELD_GET(DT_TBIR, disc_table->offset);
 	u32 offset = DT_OFFSET(disc_table->offset);
+	struct pci_dev *parent = to_pci_dev(dev);
 	struct resource res = {};
 
 	/* Starting location of SDSi MMIO region based on access type */
@@ -681,7 +682,7 @@ static int sdsi_probe(struct auxiliary_device *auxdev, const struct auxiliary_de
 		return ret;
 
 	/* Map the SDSi mailbox registers */
-	ret = sdsi_map_mbox_registers(priv, intel_cap_dev->pcidev, &disc_table, disc_res);
+	ret = sdsi_map_mbox_registers(priv, intel_cap_dev->dev, &disc_table, disc_res);
 	if (ret)
 		return ret;
 
diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c
index 938648b9ef09..a547e4b98245 100644
--- a/drivers/platform/x86/intel/vsec.c
+++ b/drivers/platform/x86/intel/vsec.c
@@ -274,7 +274,7 @@ cleanup_aux:
 }
 EXPORT_SYMBOL_NS_GPL(intel_vsec_add_aux, "INTEL_VSEC");
 
-static int intel_vsec_add_dev(struct pci_dev *pdev, struct intel_vsec_header *header,
+static int intel_vsec_add_dev(struct device *dev, struct intel_vsec_header *header,
 			      const struct intel_vsec_platform_info *info,
 			      unsigned long cap_id, u64 base_addr)
 {
@@ -288,18 +288,18 @@ static int intel_vsec_add_dev(struct pci_dev *pdev, struct intel_vsec_header *he
 	if (info->parent)
 		parent = info->parent;
 	else
-		parent = &pdev->dev;
+		parent = dev;
 
 	if (!intel_vsec_supported(header->id, info->caps))
 		return -EINVAL;
 
 	if (!header->num_entries) {
-		dev_dbg(&pdev->dev, "Invalid 0 entry count for header id %d\n", header->id);
+		dev_dbg(dev, "Invalid 0 entry count for header id %d\n", header->id);
 		return -EINVAL;
 	}
 
 	if (!header->entry_size) {
-		dev_dbg(&pdev->dev, "Invalid 0 entry size for header id %d\n", header->id);
+		dev_dbg(dev, "Invalid 0 entry size for header id %d\n", header->id);
 		return -EINVAL;
 	}
 
@@ -331,7 +331,7 @@ static int intel_vsec_add_dev(struct pci_dev *pdev, struct intel_vsec_header *he
 		release_mem_region(tmp->start, resource_size(tmp));
 	}
 
-	intel_vsec_dev->pcidev = pdev;
+	intel_vsec_dev->dev = dev;
 	intel_vsec_dev->resource = no_free_ptr(res);
 	intel_vsec_dev->num_resources = header->num_entries;
 	intel_vsec_dev->quirks = info->quirks;
@@ -409,13 +409,14 @@ static int get_cap_id(u32 header_id, unsigned long *cap_id)
 	return 0;
 }
 
-static int intel_vsec_register_device(struct pci_dev *pdev,
+static int intel_vsec_register_device(struct device *dev,
 				      struct intel_vsec_header *header,
 				      const struct intel_vsec_platform_info *info,
 				      u64 base_addr)
 {
 	const struct vsec_feature_dependency *consumer_deps;
 	struct vsec_priv *priv;
+	struct pci_dev *pdev;
 	unsigned long cap_id;
 	int ret;
 
@@ -427,8 +428,12 @@ static int intel_vsec_register_device(struct pci_dev *pdev,
 	 * Only track dependencies for devices probed by the VSEC driver.
 	 * For others using the exported APIs, add the device directly.
 	 */
+	if (!dev_is_pci(dev))
+		return intel_vsec_add_dev(dev, header, info, cap_id, base_addr);
+
+	pdev = to_pci_dev(dev);
 	if (!pci_match_id(intel_vsec_pci_ids, pdev))
-		return intel_vsec_add_dev(pdev, header, info, cap_id, base_addr);
+		return intel_vsec_add_dev(dev, header, info, cap_id, base_addr);
 
 	priv = pci_get_drvdata(pdev);
 	if (priv->state[cap_id] == STATE_REGISTERED ||
@@ -444,7 +449,7 @@ static int intel_vsec_register_device(struct pci_dev *pdev,
 
 	consumer_deps = get_consumer_dependencies(priv, cap_id);
 	if (!consumer_deps || suppliers_ready(priv, consumer_deps, cap_id)) {
-		ret = intel_vsec_add_dev(pdev, header, info, cap_id, base_addr);
+		ret = intel_vsec_add_dev(dev, header, info, cap_id, base_addr);
 		if (ret)
 			priv->state[cap_id] = STATE_SKIP;
 		else
@@ -456,7 +461,7 @@ static int intel_vsec_register_device(struct pci_dev *pdev,
 	return -EAGAIN;
 }
 
-static bool intel_vsec_walk_header(struct pci_dev *pdev,
+static bool intel_vsec_walk_header(struct device *dev,
 				   const struct intel_vsec_platform_info *info)
 {
 	struct intel_vsec_header **header = info->headers;
@@ -464,7 +469,7 @@ static bool intel_vsec_walk_header(struct pci_dev *pdev,
 	int ret;
 
 	for ( ; *header; header++) {
-		ret = intel_vsec_register_device(pdev, *header, info, info->base_addr);
+		ret = intel_vsec_register_device(dev, *header, info, info->base_addr);
 		if (!ret)
 			have_devices = true;
 	}
@@ -512,7 +517,7 @@ static bool intel_vsec_walk_dvsec(struct pci_dev *pdev,
 		pci_read_config_dword(pdev, pos + PCI_DVSEC_HEADER2, &hdr);
 		header.id = PCI_DVSEC_HEADER2_ID(hdr);
 
-		ret = intel_vsec_register_device(pdev, &header, info,
+		ret = intel_vsec_register_device(&pdev->dev, &header, info,
 						 pci_resource_start(pdev, header.tbir));
 		if (ret)
 			continue;
@@ -558,7 +563,7 @@ static bool intel_vsec_walk_vsec(struct pci_dev *pdev,
 		header.tbir = INTEL_DVSEC_TABLE_BAR(table);
 		header.offset = INTEL_DVSEC_TABLE_OFFSET(table);
 
-		ret = intel_vsec_register_device(pdev, &header, info,
+		ret = intel_vsec_register_device(&pdev->dev, &header, info,
 						 pci_resource_start(pdev, header.tbir));
 		if (ret)
 			continue;
@@ -569,13 +574,13 @@ static bool intel_vsec_walk_vsec(struct pci_dev *pdev,
 	return have_devices;
 }
 
-int intel_vsec_register(struct pci_dev *pdev,
+int intel_vsec_register(struct device *dev,
 			const struct intel_vsec_platform_info *info)
 {
-	if (!pdev || !info || !info->headers)
+	if (!dev || !info || !info->headers)
 		return -EINVAL;
 
-	if (!intel_vsec_walk_header(pdev, info))
+	if (!intel_vsec_walk_header(dev, info))
 		return -ENODEV;
 	else
 		return 0;
@@ -601,7 +606,7 @@ static bool intel_vsec_get_features(struct pci_dev *pdev,
 		found = true;
 
 	if (info && (info->quirks & VSEC_QUIRK_NO_DVSEC) &&
-	    intel_vsec_walk_header(pdev, info))
+	    intel_vsec_walk_header(&pdev->dev, info))
 		found = true;
 
 	return found;
@@ -673,7 +678,10 @@ int intel_vsec_set_mapping(struct oobmsm_plat_info *plat_info,
 {
 	struct vsec_priv *priv;
 
-	priv = pci_get_drvdata(vsec_dev->pcidev);
+	if (!dev_is_pci(vsec_dev->dev))
+		return -ENODEV;
+
+	priv = pci_get_drvdata(to_pci_dev(vsec_dev->dev));
 	if (!priv)
 		return -EINVAL;
 
@@ -821,7 +829,7 @@ static pci_ers_result_t intel_vsec_pci_slot_reset(struct pci_dev *pdev)
 
 	xa_for_each(&auxdev_array, index, intel_vsec_dev) {
 		/* check if pdev doesn't match */
-		if (pdev != intel_vsec_dev->pcidev)
+		if (&pdev->dev != intel_vsec_dev->dev)
 			continue;
 		devm_release_action(&pdev->dev, intel_vsec_remove_aux,
 				    &intel_vsec_dev->auxdev);
diff --git a/drivers/platform/x86/intel/vsec_tpmi.c b/drivers/platform/x86/intel/vsec_tpmi.c
index 2298b6361094..9dddf4e5863e 100644
--- a/drivers/platform/x86/intel/vsec_tpmi.c
+++ b/drivers/platform/x86/intel/vsec_tpmi.c
@@ -530,7 +530,7 @@ static const struct file_operations mem_write_ops = {
 	.release        = single_release,
 };
 
-#define tpmi_to_dev(info)	(&info->vsec_dev->pcidev->dev)
+#define tpmi_to_dev(info)	((info)->vsec_dev->dev)
 
 static void tpmi_dbgfs_register(struct intel_tpmi_info *tpmi_info)
 {
@@ -642,7 +642,7 @@ static int tpmi_create_device(struct intel_tpmi_info *tpmi_info,
 		tmp->flags = IORESOURCE_MEM;
 	}
 
-	feature_vsec_dev->pcidev = vsec_dev->pcidev;
+	feature_vsec_dev->dev = vsec_dev->dev;
 	feature_vsec_dev->resource = res;
 	feature_vsec_dev->num_resources = pfs->pfs_header.num_entries;
 	feature_vsec_dev->priv_data = &tpmi_info->plat_info;
@@ -742,7 +742,7 @@ static int tpmi_fetch_pfs_header(struct intel_tpmi_pm_feature *pfs, u64 start, i
 static int intel_vsec_tpmi_init(struct auxiliary_device *auxdev)
 {
 	struct intel_vsec_device *vsec_dev = auxdev_to_ivdev(auxdev);
-	struct pci_dev *pci_dev = vsec_dev->pcidev;
+	struct pci_dev *pci_dev = to_pci_dev(vsec_dev->dev);
 	struct intel_tpmi_info *tpmi_info;
 	u64 pfs_start = 0;
 	int ret, i;
diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h
index 49a746ec0128..4eecb2a6bac4 100644
--- a/include/linux/intel_vsec.h
+++ b/include/linux/intel_vsec.h
@@ -29,6 +29,7 @@
 #define INTEL_DVSEC_TABLE_OFFSET(x)	((x) & GENMASK(31, 3))
 #define TABLE_OFFSET_SHIFT		3
 
+struct device;
 struct pci_dev;
 struct resource;
 
@@ -82,14 +83,14 @@ enum intel_vsec_quirks {
  * struct pmt_callbacks - Callback infrastructure for PMT devices
  * @read_telem: when specified, called by client driver to access PMT
  * data (instead of direct copy).
- * * pdev:  PCI device reference for the callback's use
+ * * dev:   device reference for the callback's use
  * * guid:  ID of data to acccss
  * * data:  buffer for the data to be copied
  * * off:   offset into the requested buffer
  * * count: size of buffer
  */
 struct pmt_callbacks {
-	int (*read_telem)(struct pci_dev *pdev, u32 guid, u64 *data, loff_t off, u32 count);
+	int (*read_telem)(struct device *dev, u32 guid, u64 *data, loff_t off, u32 count);
 };
 
 struct vsec_feature_dependency {
@@ -122,7 +123,7 @@ struct intel_vsec_platform_info {
 /**
  * struct intel_vsec_device - Auxbus specific device information
  * @auxdev:        auxbus device struct for auxbus access
- * @pcidev:        pci device associated with the device
+ * @dev:           struct device associated with the device
  * @resource:      any resources shared by the parent
  * @ida:           id reference
  * @num_resources: number of resources
@@ -135,7 +136,7 @@ struct intel_vsec_platform_info {
  */
 struct intel_vsec_device {
 	struct auxiliary_device auxdev;
-	struct pci_dev *pcidev;
+	struct device *dev;
 	struct resource *resource;
 	struct ida *ida;
 	int num_resources;
@@ -199,13 +200,13 @@ static inline struct intel_vsec_device *auxdev_to_ivdev(struct auxiliary_device
 }
 
 #if IS_ENABLED(CONFIG_INTEL_VSEC)
-int intel_vsec_register(struct pci_dev *pdev,
+int intel_vsec_register(struct device *dev,
 			const struct intel_vsec_platform_info *info);
 int intel_vsec_set_mapping(struct oobmsm_plat_info *plat_info,
 			   struct intel_vsec_device *vsec_dev);
 struct oobmsm_plat_info *intel_vsec_get_mapping(struct pci_dev *pdev);
 #else
-static inline int intel_vsec_register(struct pci_dev *pdev,
+static inline int intel_vsec_register(struct device *dev,
 				      const struct intel_vsec_platform_info *info)
 {
 	return -ENODEV;
-- 
cgit v1.2.3


From 22fa2ebc11a164e1ea529da6c356e3e01aef8ac8 Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@linux.intel.com>
Date: Thu, 12 Mar 2026 18:51:45 -0700
Subject: platform/x86/intel/vsec: Plumb ACPI PMT discovery tables through vsec
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some platforms expose PMT discovery via ACPI instead of PCI BARs. Add a
generic discovery source flag and carry ACPI discovery entries alongside
the existing PCI resource path so PMT clients can consume either.

Changes:
  - Add enum intel_vsec_disc_source { _PCI, _ACPI }.
  - Extend intel_vsec_platform_info and intel_vsec_device with source enum
    and ACPI discovery table pointer/
  - When src==ACPI, skip BAR resource setup and copy the ACPI discovery
    entries into the aux device.

No user-visible behavior change yet; this only wires ACPI data through vsec
in preparation for ACPI-enumerated PMT clients.

Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Link: https://patch.msgid.link/20260313015202.3660072-7-david.e.box@linux.intel.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 drivers/platform/x86/intel/vsec.c | 23 +++++++++++++++++++++++
 include/linux/intel_vsec.h        | 20 +++++++++++++++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c
index 34b2c19ecff0..7d5dbc1c1d05 100644
--- a/drivers/platform/x86/intel/vsec.c
+++ b/drivers/platform/x86/intel/vsec.c
@@ -24,7 +24,9 @@
 #include <linux/intel_vsec.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/overflow.h>
 #include <linux/pci.h>
+#include <linux/string.h>
 #include <linux/types.h>
 
 #define PMT_XA_START			0
@@ -109,6 +111,7 @@ static void intel_vsec_dev_release(struct device *dev)
 
 	ida_free(intel_vsec_dev->ida, intel_vsec_dev->auxdev.id);
 
+	kfree(intel_vsec_dev->acpi_disc);
 	kfree(intel_vsec_dev->resource);
 	kfree(intel_vsec_dev);
 }
@@ -320,6 +323,13 @@ static int intel_vsec_add_dev(struct device *dev, struct intel_vsec_header *head
 	 * auxiliary device driver.
 	 */
 	for (i = 0, tmp = res; i < header->num_entries; i++, tmp++) {
+		/*
+		 * Skip resource mapping check for ACPI-based discovery
+		 * since those tables are read from _DSD, not MMIO.
+		 */
+		if (info->src == INTEL_VSEC_DISC_ACPI)
+			break;
+
 		tmp->start = base_addr + header->offset + i * (header->entry_size * sizeof(u32));
 		tmp->end = tmp->start + (header->entry_size * sizeof(u32)) - 1;
 		tmp->flags = IORESOURCE_MEM;
@@ -338,6 +348,19 @@ static int intel_vsec_add_dev(struct device *dev, struct intel_vsec_header *head
 	intel_vsec_dev->base_addr = info->base_addr;
 	intel_vsec_dev->priv_data = info->priv_data;
 	intel_vsec_dev->cap_id = cap_id;
+	intel_vsec_dev->src = info->src;
+
+	if (info->src == INTEL_VSEC_DISC_ACPI) {
+		size_t bytes;
+
+		if (check_mul_overflow(intel_vsec_dev->num_resources,
+				       sizeof(*info->acpi_disc), &bytes))
+			return -EOVERFLOW;
+
+		intel_vsec_dev->acpi_disc = kmemdup(info->acpi_disc, bytes, GFP_KERNEL);
+		if (!intel_vsec_dev->acpi_disc)
+			return -ENOMEM;
+	}
 
 	if (header->id == VSEC_ID_SDSI)
 		intel_vsec_dev->ida = &intel_vsec_sdsi_ida;
diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h
index 4eecb2a6bac4..1fe5665a9d02 100644
--- a/include/linux/intel_vsec.h
+++ b/include/linux/intel_vsec.h
@@ -33,6 +33,11 @@ struct device;
 struct pci_dev;
 struct resource;
 
+enum intel_vsec_disc_source {
+	INTEL_VSEC_DISC_PCI,	/* PCI, default */
+	INTEL_VSEC_DISC_ACPI,	/* ACPI */
+};
+
 enum intel_vsec_id {
 	VSEC_ID_TELEMETRY	= 2,
 	VSEC_ID_WATCHER		= 3,
@@ -103,6 +108,10 @@ struct vsec_feature_dependency {
  * @parent:    parent device in the auxbus chain
  * @headers:   list of headers to define the PMT client devices to create
  * @deps:      array of feature dependencies
+ * @acpi_disc: ACPI discovery tables, each entry is two QWORDs
+ *             in little-endian format as defined by the PMT ACPI spec.
+ *             Valid only when @provider == INTEL_VSEC_DISC_ACPI.
+ * @src:       source of discovery table data
  * @priv_data: private data, usable by parent devices, currently a callback
  * @caps:      bitmask of PMT capabilities for the given headers
  * @quirks:    bitmask of VSEC device quirks
@@ -113,6 +122,8 @@ struct intel_vsec_platform_info {
 	struct device *parent;
 	struct intel_vsec_header **headers;
 	const struct vsec_feature_dependency *deps;
+	u32 (*acpi_disc)[4];
+	enum intel_vsec_disc_source src;
 	void *priv_data;
 	unsigned long caps;
 	unsigned long quirks;
@@ -124,7 +135,12 @@ struct intel_vsec_platform_info {
  * struct intel_vsec_device - Auxbus specific device information
  * @auxdev:        auxbus device struct for auxbus access
  * @dev:           struct device associated with the device
- * @resource:      any resources shared by the parent
+ * @resource:      PCI discovery resources (BAR windows), one per discovery
+ *                 instance. Valid only when @src == INTEL_VSEC_DISC_PCI
+ * @acpi_disc:     ACPI discovery tables, each entry is two QWORDs
+ *                 in little-endian format as defined by the PMT ACPI spec.
+ *                 Valid only when @src == INTEL_VSEC_DISC_ACPI.
+ * @src:           source of discovery table data
  * @ida:           id reference
  * @num_resources: number of resources
  * @id:            xarray id
@@ -138,6 +154,8 @@ struct intel_vsec_device {
 	struct auxiliary_device auxdev;
 	struct device *dev;
 	struct resource *resource;
+	u32 (*acpi_disc)[4];
+	enum intel_vsec_disc_source src;
 	struct ida *ida;
 	int num_resources;
 	int id; /* xa */
-- 
cgit v1.2.3


From bb8539e0e60916ef3ed4a92eb2f3cfd8e34061ef Mon Sep 17 00:00:00 2001
From: Qingfang Deng <dqfext@gmail.com>
Date: Mon, 16 Mar 2026 17:28:23 +0800
Subject: ppp: require callers of ppp_dev_name() to hold RCU

ppp_dev_name() holds the RCU read lock internally to protect pch->ppp.
However, as it returns netdev->name to the caller, the caller should
also hold either RCU or RTNL lock to prevent the netdev from being
freed.

The only two references of the function is in the L2TP driver, both of
which already hold RCU. So remove the internal RCU lock and document
that callers must hold RCU.

Signed-off-by: Qingfang Deng <dqfext@gmail.com>
Reviewed-by: Breno Leitao <leitao@debian.org>
Link: https://patch.msgid.link/20260316092824.479149-1-dqfext@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ppp/ppp_generic.c | 3 +--
 include/linux/ppp_channel.h   | 4 +++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index a036ddfe327b..cb29a6968c63 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -2969,6 +2969,7 @@ int ppp_unit_number(struct ppp_channel *chan)
 
 /*
  * Return the PPP device interface name of a channel.
+ * Caller must hold RCU read lock.
  */
 char *ppp_dev_name(struct ppp_channel *chan)
 {
@@ -2977,11 +2978,9 @@ char *ppp_dev_name(struct ppp_channel *chan)
 	struct ppp *ppp;
 
 	if (pch) {
-		rcu_read_lock();
 		ppp = rcu_dereference(pch->ppp);
 		if (ppp && ppp->dev)
 			name = ppp->dev->name;
-		rcu_read_unlock();
 	}
 	return name;
 }
diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
index ca8ad03eeef0..2f63e9a6cc88 100644
--- a/include/linux/ppp_channel.h
+++ b/include/linux/ppp_channel.h
@@ -72,7 +72,9 @@ extern int ppp_channel_index(struct ppp_channel *);
 /* Get the unit number associated with a channel, or -1 if none */
 extern int ppp_unit_number(struct ppp_channel *);
 
-/* Get the device name associated with a channel, or NULL if none */
+/* Get the device name associated with a channel, or NULL if none.
+ * Caller must hold RCU read lock.
+ */
 extern char *ppp_dev_name(struct ppp_channel *);
 
 /*
-- 
cgit v1.2.3


From b520c4eef83dd406591431f936de0908c3ed7fb9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 16 Mar 2026 17:11:30 +0100
Subject: block: split bio_alloc_bioset more clearly into a fast and slowpath

bio_alloc_bioset tries non-waiting slab allocations first for the bio and
bvec array, but does so in a somewhat convoluted way.

Restructure the function so that it first open codes these slab
allocations, and then falls back to the mempools with the original
gfp mask.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com> -ck
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://patch.msgid.link/20260316161144.1607877-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 180 +++++++++++++++++++++-------------------------------
 include/linux/bio.h |   3 +-
 2 files changed, 74 insertions(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 6131ccb7284a..5982bf069cef 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -176,43 +176,12 @@ static void bvec_free(struct mempool *pool, struct bio_vec *bv,
  * Make the first allocation restricted and don't dump info on allocation
  * failures, since we'll fall back to the mempool in case of failure.
  */
-static inline gfp_t bvec_alloc_gfp(gfp_t gfp)
+static inline gfp_t try_alloc_gfp(gfp_t gfp)
 {
 	return (gfp & ~(__GFP_DIRECT_RECLAIM | __GFP_IO)) |
 		__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
 }
 
-static struct bio_vec *bvec_alloc(struct mempool *pool, unsigned short *nr_vecs,
-		gfp_t gfp_mask)
-{
-	struct biovec_slab *bvs = biovec_slab(*nr_vecs);
-
-	if (WARN_ON_ONCE(!bvs))
-		return NULL;
-
-	/*
-	 * Upgrade the nr_vecs request to take full advantage of the allocation.
-	 * We also rely on this in the bvec_free path.
-	 */
-	*nr_vecs = bvs->nr_vecs;
-
-	/*
-	 * Try a slab allocation first for all smaller allocations.  If that
-	 * fails and __GFP_DIRECT_RECLAIM is set retry with the mempool.
-	 * The mempool is sized to handle up to BIO_MAX_VECS entries.
-	 */
-	if (*nr_vecs < BIO_MAX_VECS) {
-		struct bio_vec *bvl;
-
-		bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask));
-		if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM))
-			return bvl;
-		*nr_vecs = BIO_MAX_VECS;
-	}
-
-	return mempool_alloc(pool, gfp_mask);
-}
-
 void bio_uninit(struct bio *bio)
 {
 #ifdef CONFIG_BLK_CGROUP
@@ -433,13 +402,31 @@ static void bio_alloc_rescue(struct work_struct *work)
 	}
 }
 
+/*
+ * submit_bio_noacct() converts recursion to iteration; this means if we're
+ * running beneath it, any bios we allocate and submit will not be submitted
+ * (and thus freed) until after we return.
+ *
+ * This exposes us to a potential deadlock if we allocate multiple bios from the
+ * same bio_set while running underneath submit_bio_noacct().  If we were to
+ * allocate multiple bios (say a stacking block driver that was splitting bios),
+ * we would deadlock if we exhausted the mempool's reserve.
+ *
+ * We solve this, and guarantee forward progress by punting the bios on
+ * current->bio_list to a per bio_set rescuer workqueue before blocking to wait
+ * for elements being returned to the mempool.
+ */
 static void punt_bios_to_rescuer(struct bio_set *bs)
 {
 	struct bio_list punt, nopunt;
 	struct bio *bio;
 
-	if (WARN_ON_ONCE(!bs->rescue_workqueue))
+	if (!current->bio_list || !bs->rescue_workqueue)
 		return;
+	if (bio_list_empty(&current->bio_list[0]) &&
+	    bio_list_empty(&current->bio_list[1]))
+		return;
+
 	/*
 	 * In order to guarantee forward progress we must punt only bios that
 	 * were allocated from this bio_set; otherwise, if there was a bio on
@@ -486,9 +473,7 @@ static void bio_alloc_irq_cache_splice(struct bio_alloc_cache *cache)
 	local_irq_restore(flags);
 }
 
-static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
-		unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp,
-		struct bio_set *bs)
+static struct bio *bio_alloc_percpu_cache(struct bio_set *bs)
 {
 	struct bio_alloc_cache *cache;
 	struct bio *bio;
@@ -506,11 +491,6 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
 	cache->free_list = bio->bi_next;
 	cache->nr--;
 	put_cpu();
-
-	if (nr_vecs)
-		bio_init_inline(bio, bdev, nr_vecs, opf);
-	else
-		bio_init(bio, bdev, NULL, nr_vecs, opf);
 	bio->bi_pool = bs;
 	return bio;
 }
@@ -520,7 +500,7 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
  * @bdev:	block device to allocate the bio for (can be %NULL)
  * @nr_vecs:	number of bvecs to pre-allocate
  * @opf:	operation and flags for bio
- * @gfp_mask:   the GFP_* mask given to the slab allocator
+ * @gfp:	the GFP_* mask given to the slab allocator
  * @bs:		the bio_set to allocate from.
  *
  * Allocate a bio from the mempools in @bs.
@@ -550,91 +530,77 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
  * Returns: Pointer to new bio on success, NULL on failure.
  */
 struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
-			     blk_opf_t opf, gfp_t gfp_mask,
-			     struct bio_set *bs)
+			     blk_opf_t opf, gfp_t gfp, struct bio_set *bs)
 {
-	gfp_t saved_gfp = gfp_mask;
-	struct bio *bio;
+	struct bio_vec *bvecs = NULL;
+	struct bio *bio = NULL;
+	gfp_t saved_gfp = gfp;
 	void *p;
 
 	/* should not use nobvec bioset for nr_vecs > 0 */
 	if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
 		return NULL;
 
+	gfp = try_alloc_gfp(gfp);
 	if (bs->cache && nr_vecs <= BIO_INLINE_VECS) {
-		opf |= REQ_ALLOC_CACHE;
-		bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf,
-					     gfp_mask, bs);
-		if (bio)
-			return bio;
 		/*
-		 * No cached bio available, bio returned below marked with
-		 * REQ_ALLOC_CACHE to participate in per-cpu alloc cache.
+		 * Set REQ_ALLOC_CACHE even if no cached bio is available to
+		 * return the allocated bio to the percpu cache when done.
 		 */
-	} else
+		opf |= REQ_ALLOC_CACHE;
+		bio = bio_alloc_percpu_cache(bs);
+	} else {
 		opf &= ~REQ_ALLOC_CACHE;
-
-	/*
-	 * submit_bio_noacct() converts recursion to iteration; this means if
-	 * we're running beneath it, any bios we allocate and submit will not be
-	 * submitted (and thus freed) until after we return.
-	 *
-	 * This exposes us to a potential deadlock if we allocate multiple bios
-	 * from the same bio_set() while running underneath submit_bio_noacct().
-	 * If we were to allocate multiple bios (say a stacking block driver
-	 * that was splitting bios), we would deadlock if we exhausted the
-	 * mempool's reserve.
-	 *
-	 * We solve this, and guarantee forward progress, with a rescuer
-	 * workqueue per bio_set. If we go to allocate and there are bios on
-	 * current->bio_list, we first try the allocation without
-	 * __GFP_DIRECT_RECLAIM; if that fails, we punt those bios we would be
-	 * blocking to the rescuer workqueue before we retry with the original
-	 * gfp_flags.
-	 */
-	if (current->bio_list &&
-	    (!bio_list_empty(&current->bio_list[0]) ||
-	     !bio_list_empty(&current->bio_list[1])) &&
-	    bs->rescue_workqueue)
-		gfp_mask &= ~__GFP_DIRECT_RECLAIM;
-
-	p = mempool_alloc(&bs->bio_pool, gfp_mask);
-	if (!p && gfp_mask != saved_gfp) {
-		punt_bios_to_rescuer(bs);
-		gfp_mask = saved_gfp;
-		p = mempool_alloc(&bs->bio_pool, gfp_mask);
+		p = kmem_cache_alloc(bs->bio_slab, gfp);
+		if (p)
+			bio = p + bs->front_pad;
 	}
-	if (unlikely(!p))
-		return NULL;
-	if (!mempool_is_saturated(&bs->bio_pool))
-		opf &= ~REQ_ALLOC_CACHE;
 
-	bio = p + bs->front_pad;
-	if (nr_vecs > BIO_INLINE_VECS) {
-		struct bio_vec *bvl = NULL;
+	if (bio && nr_vecs > BIO_INLINE_VECS) {
+		struct biovec_slab *bvs = biovec_slab(nr_vecs);
 
-		bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask);
-		if (!bvl && gfp_mask != saved_gfp) {
-			punt_bios_to_rescuer(bs);
-			gfp_mask = saved_gfp;
-			bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask);
+		/*
+		 * Upgrade nr_vecs to take full advantage of the allocation.
+		 * We also rely on this in bvec_free().
+		 */
+		nr_vecs = bvs->nr_vecs;
+		bvecs = kmem_cache_alloc(bvs->slab, gfp);
+		if (unlikely(!bvecs)) {
+			kmem_cache_free(bs->bio_slab, p);
+			bio = NULL;
 		}
-		if (unlikely(!bvl))
-			goto err_free;
+	}
 
-		bio_init(bio, bdev, bvl, nr_vecs, opf);
-	} else if (nr_vecs) {
-		bio_init_inline(bio, bdev, BIO_INLINE_VECS, opf);
-	} else {
-		bio_init(bio, bdev, NULL, 0, opf);
+	if (unlikely(!bio)) {
+		/*
+		 * Give up if we are not allow to sleep as non-blocking mempool
+		 * allocations just go back to the slab allocation.
+		 */
+		if (!(saved_gfp & __GFP_DIRECT_RECLAIM))
+			return NULL;
+
+		punt_bios_to_rescuer(bs);
+
+		/*
+		 * Don't rob the mempools by returning to the per-CPU cache if
+		 * we're tight on memory.
+		 */
+		opf &= ~REQ_ALLOC_CACHE;
+
+		p = mempool_alloc(&bs->bio_pool, gfp);
+		bio = p + bs->front_pad;
+		if (nr_vecs > BIO_INLINE_VECS) {
+			nr_vecs = BIO_MAX_VECS;
+			bvecs = mempool_alloc(&bs->bvec_pool, gfp);
+		}
 	}
 
+	if (nr_vecs && nr_vecs <= BIO_INLINE_VECS)
+		bio_init_inline(bio, bdev, nr_vecs, opf);
+	else
+		bio_init(bio, bdev, bvecs, nr_vecs, opf);
 	bio->bi_pool = bs;
 	return bio;
-
-err_free:
-	mempool_free(p, &bs->bio_pool);
-	return NULL;
 }
 EXPORT_SYMBOL(bio_alloc_bioset);
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 9693a0d6fefe..984844d2870b 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -350,8 +350,7 @@ extern void bioset_exit(struct bio_set *);
 extern int biovec_init_pool(mempool_t *pool, int pool_entries);
 
 struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
-			     blk_opf_t opf, gfp_t gfp_mask,
-			     struct bio_set *bs);
+			     blk_opf_t opf, gfp_t gfp, struct bio_set *bs);
 struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask);
 extern void bio_put(struct bio *);
 
-- 
cgit v1.2.3


From 10febd397591d93f42adb743c2c664041e7f1bcb Mon Sep 17 00:00:00 2001
From: K Prateek Nayak <kprateek.nayak@amd.com>
Date: Thu, 12 Mar 2026 04:44:30 +0000
Subject: sched/topology: Remove sched_domain_shared allocation with sd_data

Now that "sd->shared" assignments are using the sched_domain_shared
objects allocated with s_data, remove the sd_data based allocations.

Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://patch.msgid.link/20260312044434.1974-6-kprateek.nayak@amd.com
---
 include/linux/sched/topology.h |  1 -
 kernel/sched/topology.c        | 19 -------------------
 2 files changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index a1e1032426dc..51c29581f15e 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -172,7 +172,6 @@ typedef int (*sched_domain_flags_f)(void);
 
 struct sd_data {
 	struct sched_domain *__percpu *sd;
-	struct sched_domain_shared *__percpu *sds;
 	struct sched_group *__percpu *sg;
 	struct sched_group_capacity *__percpu *sgc;
 };
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index b19d84f44669..43150591914b 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1609,9 +1609,6 @@ static void claim_allocations(int cpu, struct s_data *d)
 		WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
 		*per_cpu_ptr(sdd->sd, cpu) = NULL;
 
-		if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref))
-			*per_cpu_ptr(sdd->sds, cpu) = NULL;
-
 		if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
 			*per_cpu_ptr(sdd->sg, cpu) = NULL;
 
@@ -2390,10 +2387,6 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 		if (!sdd->sd)
 			return -ENOMEM;
 
-		sdd->sds = alloc_percpu(struct sched_domain_shared *);
-		if (!sdd->sds)
-			return -ENOMEM;
-
 		sdd->sg = alloc_percpu(struct sched_group *);
 		if (!sdd->sg)
 			return -ENOMEM;
@@ -2404,7 +2397,6 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 
 		for_each_cpu(j, cpu_map) {
 			struct sched_domain *sd;
-			struct sched_domain_shared *sds;
 			struct sched_group *sg;
 			struct sched_group_capacity *sgc;
 
@@ -2415,13 +2407,6 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 
 			*per_cpu_ptr(sdd->sd, j) = sd;
 
-			sds = kzalloc_node(sizeof(struct sched_domain_shared),
-					GFP_KERNEL, cpu_to_node(j));
-			if (!sds)
-				return -ENOMEM;
-
-			*per_cpu_ptr(sdd->sds, j) = sds;
-
 			sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
 			if (!sg)
@@ -2463,8 +2448,6 @@ static void __sdt_free(const struct cpumask *cpu_map)
 				kfree(*per_cpu_ptr(sdd->sd, j));
 			}
 
-			if (sdd->sds)
-				kfree(*per_cpu_ptr(sdd->sds, j));
 			if (sdd->sg)
 				kfree(*per_cpu_ptr(sdd->sg, j));
 			if (sdd->sgc)
@@ -2472,8 +2455,6 @@ static void __sdt_free(const struct cpumask *cpu_map)
 		}
 		free_percpu(sdd->sd);
 		sdd->sd = NULL;
-		free_percpu(sdd->sds);
-		sdd->sds = NULL;
 		free_percpu(sdd->sg);
 		sdd->sg = NULL;
 		free_percpu(sdd->sgc);
-- 
cgit v1.2.3


From 8ca12326f592f7554acf2788ecb1c5c954dcf31c Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Mon, 16 Mar 2026 00:36:22 +0100
Subject: PM: EM: Switch to rcu_dereference_all() in wakeup path

em_cpu_energy() is part of the EAS (Fair) task wakeup path. Now that
rcu_read_{,un}lock() have been removed from find_energy_efficient_cpu()
switch to rcu_dereference_all() and check for rcu_read_lock_any_held()
in em_cpu_energy() as well.
In EAS (Fair) task wakeup path is a preempt/IRQ disabled region, so
rcu_read_{,un}lock() can be removed.

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com>
Link: https://patch.msgid.link/5b1228b7-5949-4a45-9f62-e8ce936de694@arm.com
---
 include/linux/energy_model.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index e7497f804644..c909a8ba22e8 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -248,7 +248,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
 	struct em_perf_state *ps;
 	int i;
 
-	WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
+	lockdep_assert(rcu_read_lock_any_held());
 
 	if (!sum_util)
 		return 0;
@@ -267,7 +267,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
 	 * Find the lowest performance state of the Energy Model above the
 	 * requested performance.
 	 */
-	em_table = rcu_dereference(pd->em_table);
+	em_table = rcu_dereference_all(pd->em_table);
 	i = em_pd_get_efficient_state(em_table->state, pd, max_util);
 	ps = &em_table->state[i];
 
-- 
cgit v1.2.3


From b7560798466a07d9c3fb011698e92c335ab28baf Mon Sep 17 00:00:00 2001
From: Devendra K Verma <devendra.verma@amd.com>
Date: Wed, 18 Mar 2026 12:34:03 +0530
Subject: dmaengine: dw-edma: Add non-LL mode

AMD MDB IP supports Linked List (LL) mode as well as non-LL mode.
The current code does not have the mechanisms to enable the
DMA transactions using the non-LL mode. The following two cases
are added with this patch:
- For the AMD (Xilinx) only, when a valid physical base address of
  the device side DDR is not configured, then the IP can still be
  used in non-LL mode. For all the channels DMA transactions will
  be using the non-LL mode only. This, the default non-LL mode,
  is not applicable for Synopsys IP with the current code addition.

- If the default mode is LL-mode, for both AMD (Xilinx) and Synosys,
  and if user wants to use non-LL mode then user can do so via
  configuring the peripheral_config param of dma_slave_config.

Signed-off-by: Devendra K Verma <devendra.verma@amd.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260318070403.1634706-3-devendra.verma@amd.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dw-edma/dw-edma-core.c    | 47 ++++++++++++++++++++++++-
 drivers/dma/dw-edma/dw-edma-core.h    |  1 +
 drivers/dma/dw-edma/dw-edma-pcie.c    | 44 +++++++++++++++++-------
 drivers/dma/dw-edma/dw-hdma-v0-core.c | 64 ++++++++++++++++++++++++++++++++++-
 drivers/dma/dw-edma/dw-hdma-v0-regs.h |  1 +
 include/linux/dma/edma.h              |  1 +
 6 files changed, 143 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c
index 94bcbd560143..c2feb3adc79f 100644
--- a/drivers/dma/dw-edma/dw-edma-core.c
+++ b/drivers/dma/dw-edma/dw-edma-core.c
@@ -223,6 +223,43 @@ static int dw_edma_device_config(struct dma_chan *dchan,
 				 struct dma_slave_config *config)
 {
 	struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan);
+	bool cfg_non_ll;
+	int non_ll = 0;
+
+	chan->non_ll = false;
+	if (chan->dw->chip->mf == EDMA_MF_HDMA_NATIVE) {
+		if (config->peripheral_config &&
+		    config->peripheral_size != sizeof(int)) {
+			dev_err(dchan->device->dev,
+				"config param peripheral size mismatch\n");
+			return -EINVAL;
+		}
+
+		/*
+		 * When there is no valid LLP base address available then the
+		 * default DMA ops will use the non-LL mode.
+		 *
+		 * Cases where LL mode is enabled and client wants to use the
+		 * non-LL mode then also client can do so via providing the
+		 * peripheral_config param.
+		 */
+		cfg_non_ll = chan->dw->chip->cfg_non_ll;
+		if (config->peripheral_config) {
+			non_ll = *(int *)config->peripheral_config;
+
+			if (cfg_non_ll && !non_ll) {
+				dev_err(dchan->device->dev, "invalid configuration\n");
+				return -EINVAL;
+			}
+		}
+
+		if (cfg_non_ll || non_ll)
+			chan->non_ll = true;
+	} else if (config->peripheral_config) {
+		dev_err(dchan->device->dev,
+			"peripheral config param applicable only for HDMA\n");
+		return -EINVAL;
+	}
 
 	memcpy(&chan->config, config, sizeof(*config));
 	chan->configured = true;
@@ -358,6 +395,7 @@ dw_edma_device_transfer(struct dw_edma_transfer *xfer)
 	struct dw_edma_desc *desc;
 	u64 src_addr, dst_addr;
 	size_t fsz = 0;
+	u32 bursts_max;
 	u32 cnt = 0;
 	int i;
 
@@ -415,6 +453,13 @@ dw_edma_device_transfer(struct dw_edma_transfer *xfer)
 		return NULL;
 	}
 
+	/*
+	 * For non-LL mode, only a single burst can be handled
+	 * in a single chunk unlike LL mode where multiple bursts
+	 * can be configured in a single chunk.
+	 */
+	bursts_max = chan->non_ll ? 1 : chan->ll_max;
+
 	desc = dw_edma_alloc_desc(chan);
 	if (unlikely(!desc))
 		goto err_alloc;
@@ -450,7 +495,7 @@ dw_edma_device_transfer(struct dw_edma_transfer *xfer)
 		if (xfer->type == EDMA_XFER_SCATTER_GATHER && !sg)
 			break;
 
-		if (chunk->bursts_alloc == chan->ll_max) {
+		if (chunk->bursts_alloc == bursts_max) {
 			chunk = dw_edma_alloc_chunk(desc);
 			if (unlikely(!chunk))
 				goto err_alloc;
diff --git a/drivers/dma/dw-edma/dw-edma-core.h b/drivers/dma/dw-edma/dw-edma-core.h
index 59b24973fa7d..902574b1ba86 100644
--- a/drivers/dma/dw-edma/dw-edma-core.h
+++ b/drivers/dma/dw-edma/dw-edma-core.h
@@ -86,6 +86,7 @@ struct dw_edma_chan {
 	u8				configured;
 
 	struct dma_slave_config		config;
+	bool				non_ll;
 };
 
 struct dw_edma_irq {
diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c
index 0cb5850ca411..0b30ce138503 100644
--- a/drivers/dma/dw-edma/dw-edma-pcie.c
+++ b/drivers/dma/dw-edma/dw-edma-pcie.c
@@ -295,6 +295,15 @@ static void dw_edma_pcie_get_xilinx_dma_data(struct pci_dev *pdev,
 	pdata->devmem_phys_off = off;
 }
 
+static u64 dw_edma_get_phys_addr(struct pci_dev *pdev,
+				 struct dw_edma_pcie_data *pdata,
+				 enum pci_barno bar)
+{
+	if (pdev->vendor == PCI_VENDOR_ID_XILINX)
+		return pdata->devmem_phys_off;
+	return pci_bus_address(pdev, bar);
+}
+
 static int dw_edma_pcie_probe(struct pci_dev *pdev,
 			      const struct pci_device_id *pid)
 {
@@ -303,6 +312,7 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
 	struct dw_edma_chip *chip;
 	int err, nr_irqs;
 	int i, mask;
+	bool non_ll = false;
 
 	struct dw_edma_pcie_data *vsec_data __free(kfree) =
 		kmalloc_obj(*vsec_data);
@@ -329,21 +339,24 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
 
 		/*
 		 * There is no valid address found for the LL memory
-		 * space on the device side.
+		 * space on the device side. In the absence of LL base
+		 * address use the non-LL mode or simple mode supported by
+		 * the HDMA IP.
 		 */
 		if (vsec_data->devmem_phys_off == DW_PCIE_XILINX_MDB_INVALID_ADDR)
-			return -ENOMEM;
+			non_ll = true;
 
 		/*
 		 * Configure the channel LL and data blocks if number of
 		 * channels enabled in VSEC capability are more than the
 		 * channels configured in xilinx_mdb_data.
 		 */
-		dw_edma_set_chan_region_offset(vsec_data, BAR_2, 0,
-					       DW_PCIE_XILINX_MDB_LL_OFF_GAP,
-					       DW_PCIE_XILINX_MDB_LL_SIZE,
-					       DW_PCIE_XILINX_MDB_DT_OFF_GAP,
-					       DW_PCIE_XILINX_MDB_DT_SIZE);
+		if (!non_ll)
+			dw_edma_set_chan_region_offset(vsec_data, BAR_2, 0,
+						       DW_PCIE_XILINX_MDB_LL_OFF_GAP,
+						       DW_PCIE_XILINX_MDB_LL_SIZE,
+						       DW_PCIE_XILINX_MDB_DT_OFF_GAP,
+						       DW_PCIE_XILINX_MDB_DT_SIZE);
 	}
 
 	/* Mapping PCI BAR regions */
@@ -391,6 +404,7 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
 	chip->mf = vsec_data->mf;
 	chip->nr_irqs = nr_irqs;
 	chip->ops = &dw_edma_pcie_plat_ops;
+	chip->cfg_non_ll = non_ll;
 
 	chip->ll_wr_cnt = vsec_data->wr_ch_cnt;
 	chip->ll_rd_cnt = vsec_data->rd_ch_cnt;
@@ -399,7 +413,7 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
 	if (!chip->reg_base)
 		return -ENOMEM;
 
-	for (i = 0; i < chip->ll_wr_cnt; i++) {
+	for (i = 0; i < chip->ll_wr_cnt && !non_ll; i++) {
 		struct dw_edma_region *ll_region = &chip->ll_region_wr[i];
 		struct dw_edma_region *dt_region = &chip->dt_region_wr[i];
 		struct dw_edma_block *ll_block = &vsec_data->ll_wr[i];
@@ -410,7 +424,8 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
 			return -ENOMEM;
 
 		ll_region->vaddr.io += ll_block->off;
-		ll_region->paddr = pci_bus_address(pdev, ll_block->bar);
+		ll_region->paddr = dw_edma_get_phys_addr(pdev, vsec_data,
+							 ll_block->bar);
 		ll_region->paddr += ll_block->off;
 		ll_region->sz = ll_block->sz;
 
@@ -419,12 +434,13 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
 			return -ENOMEM;
 
 		dt_region->vaddr.io += dt_block->off;
-		dt_region->paddr = pci_bus_address(pdev, dt_block->bar);
+		dt_region->paddr = dw_edma_get_phys_addr(pdev, vsec_data,
+							 dt_block->bar);
 		dt_region->paddr += dt_block->off;
 		dt_region->sz = dt_block->sz;
 	}
 
-	for (i = 0; i < chip->ll_rd_cnt; i++) {
+	for (i = 0; i < chip->ll_rd_cnt && !non_ll; i++) {
 		struct dw_edma_region *ll_region = &chip->ll_region_rd[i];
 		struct dw_edma_region *dt_region = &chip->dt_region_rd[i];
 		struct dw_edma_block *ll_block = &vsec_data->ll_rd[i];
@@ -435,7 +451,8 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
 			return -ENOMEM;
 
 		ll_region->vaddr.io += ll_block->off;
-		ll_region->paddr = pci_bus_address(pdev, ll_block->bar);
+		ll_region->paddr = dw_edma_get_phys_addr(pdev, vsec_data,
+							 ll_block->bar);
 		ll_region->paddr += ll_block->off;
 		ll_region->sz = ll_block->sz;
 
@@ -444,7 +461,8 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
 			return -ENOMEM;
 
 		dt_region->vaddr.io += dt_block->off;
-		dt_region->paddr = pci_bus_address(pdev, dt_block->bar);
+		dt_region->paddr = dw_edma_get_phys_addr(pdev, vsec_data,
+							 dt_block->bar);
 		dt_region->paddr += dt_block->off;
 		dt_region->sz = dt_block->sz;
 	}
diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c
index 8f75934e09d0..632abb8b481c 100644
--- a/drivers/dma/dw-edma/dw-hdma-v0-core.c
+++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c
@@ -225,7 +225,7 @@ static void dw_hdma_v0_sync_ll_data(struct dw_edma_chunk *chunk)
 		readl(chunk->ll_region.vaddr.io);
 }
 
-static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
+static void dw_hdma_v0_core_ll_start(struct dw_edma_chunk *chunk, bool first)
 {
 	struct dw_edma_chan *chan = chunk->chan;
 	struct dw_edma *dw = chan->dw;
@@ -263,6 +263,68 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
 	SET_CH_32(dw, chan->dir, chan->id, doorbell, HDMA_V0_DOORBELL_START);
 }
 
+static void dw_hdma_v0_core_non_ll_start(struct dw_edma_chunk *chunk)
+{
+	struct dw_edma_chan *chan = chunk->chan;
+	struct dw_edma *dw = chan->dw;
+	struct dw_edma_burst *child;
+	u32 val;
+
+	child = list_first_entry_or_null(&chunk->burst->list,
+					 struct dw_edma_burst, list);
+	if (!child)
+		return;
+
+	SET_CH_32(dw, chan->dir, chan->id, ch_en, HDMA_V0_CH_EN);
+
+	/* Source address */
+	SET_CH_32(dw, chan->dir, chan->id, sar.lsb,
+		  lower_32_bits(child->sar));
+	SET_CH_32(dw, chan->dir, chan->id, sar.msb,
+		  upper_32_bits(child->sar));
+
+	/* Destination address */
+	SET_CH_32(dw, chan->dir, chan->id, dar.lsb,
+		  lower_32_bits(child->dar));
+	SET_CH_32(dw, chan->dir, chan->id, dar.msb,
+		  upper_32_bits(child->dar));
+
+	/* Transfer size */
+	SET_CH_32(dw, chan->dir, chan->id, transfer_size, child->sz);
+
+	/* Interrupt setup */
+	val = GET_CH_32(dw, chan->dir, chan->id, int_setup) |
+			HDMA_V0_STOP_INT_MASK |
+			HDMA_V0_ABORT_INT_MASK |
+			HDMA_V0_LOCAL_STOP_INT_EN |
+			HDMA_V0_LOCAL_ABORT_INT_EN;
+
+	if (!(dw->chip->flags & DW_EDMA_CHIP_LOCAL)) {
+		val |= HDMA_V0_REMOTE_STOP_INT_EN |
+		       HDMA_V0_REMOTE_ABORT_INT_EN;
+	}
+
+	SET_CH_32(dw, chan->dir, chan->id, int_setup, val);
+
+	/* Channel control setup */
+	val = GET_CH_32(dw, chan->dir, chan->id, control1);
+	val &= ~HDMA_V0_LINKLIST_EN;
+	SET_CH_32(dw, chan->dir, chan->id, control1, val);
+
+	SET_CH_32(dw, chan->dir, chan->id, doorbell,
+		  HDMA_V0_DOORBELL_START);
+}
+
+static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
+{
+	struct dw_edma_chan *chan = chunk->chan;
+
+	if (chan->non_ll)
+		dw_hdma_v0_core_non_ll_start(chunk);
+	else
+		dw_hdma_v0_core_ll_start(chunk, first);
+}
+
 static void dw_hdma_v0_core_ch_config(struct dw_edma_chan *chan)
 {
 	struct dw_edma *dw = chan->dw;
diff --git a/drivers/dma/dw-edma/dw-hdma-v0-regs.h b/drivers/dma/dw-edma/dw-hdma-v0-regs.h
index eab5fd7177e5..7759ba9b4850 100644
--- a/drivers/dma/dw-edma/dw-hdma-v0-regs.h
+++ b/drivers/dma/dw-edma/dw-hdma-v0-regs.h
@@ -12,6 +12,7 @@
 #include <linux/dmaengine.h>
 
 #define HDMA_V0_MAX_NR_CH			8
+#define HDMA_V0_CH_EN				BIT(0)
 #define HDMA_V0_LOCAL_ABORT_INT_EN		BIT(6)
 #define HDMA_V0_REMOTE_ABORT_INT_EN		BIT(5)
 #define HDMA_V0_LOCAL_STOP_INT_EN		BIT(4)
diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h
index 9da53c75e49b..1fafd5b0e315 100644
--- a/include/linux/dma/edma.h
+++ b/include/linux/dma/edma.h
@@ -103,6 +103,7 @@ struct dw_edma_chip {
 	enum dw_edma_map_format	mf;
 
 	struct dw_edma		*dw;
+	bool			cfg_non_ll;
 };
 
 /* Export to the platform drivers */
-- 
cgit v1.2.3


From de9e2b3d88af36411301c049a1b049f3e4fe0757 Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Tue, 3 Mar 2026 21:24:17 +0200
Subject: uapi: Provide DIV_ROUND_CLOSEST()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently DIV_ROUND_CLOSEST() is only available for the kernel via
include/linux/math.h.

Expose it to userland as well by adding __KERNEL_DIV_ROUND_CLOSEST() as
a common definition in uapi.

Additionally, ensure it allows building ISO C applications by switching
from the 'typeof' GNU extension to the ISO-friendly __typeof__.

Reviewed-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Tested-by: Diederik de Haas <diederik@cknow-tech.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Link: https://patch.msgid.link/20260303-rk3588-bgcolor-v8-1-fee377037ad1@collabora.com
Signed-off-by: Daniel Stone <daniels@collabora.com>
---
 include/linux/math.h       | 18 +-----------------
 include/uapi/linux/const.h | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/math.h b/include/linux/math.h
index 6dc1d1d32fbc..1e8fb3efbc8c 100644
--- a/include/linux/math.h
+++ b/include/linux/math.h
@@ -89,23 +89,7 @@
 }							\
 )
 
-/*
- * Divide positive or negative dividend by positive or negative divisor
- * and round to closest integer. Result is undefined for negative
- * divisors if the dividend variable type is unsigned and for negative
- * dividends if the divisor variable type is unsigned.
- */
-#define DIV_ROUND_CLOSEST(x, divisor)(			\
-{							\
-	typeof(x) __x = x;				\
-	typeof(divisor) __d = divisor;			\
-	(((typeof(x))-1) > 0 ||				\
-	 ((typeof(divisor))-1) > 0 ||			\
-	 (((__x) > 0) == ((__d) > 0))) ?		\
-		(((__x) + ((__d) / 2)) / (__d)) :	\
-		(((__x) - ((__d) / 2)) / (__d));	\
-}							\
-)
+#define DIV_ROUND_CLOSEST __KERNEL_DIV_ROUND_CLOSEST
 /*
  * Same as above but for u64 dividends. divisor must be a 32-bit
  * number.
diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h
index b8f629ef135f..565f309b9df8 100644
--- a/include/uapi/linux/const.h
+++ b/include/uapi/linux/const.h
@@ -50,4 +50,22 @@
 
 #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
 
+/*
+ * Divide positive or negative dividend by positive or negative divisor
+ * and round to closest integer. Result is undefined for negative
+ * divisors if the dividend variable type is unsigned and for negative
+ * dividends if the divisor variable type is unsigned.
+ */
+#define __KERNEL_DIV_ROUND_CLOSEST(x, divisor)		\
+({							\
+	__typeof__(x) __x = x;				\
+	__typeof__(divisor) __d = divisor;		\
+							\
+	(((__typeof__(x))-1) > 0 ||			\
+	 ((__typeof__(divisor))-1) > 0 ||		\
+	 (((__x) > 0) == ((__d) > 0))) ?		\
+		(((__x) + ((__d) / 2)) / (__d)) :	\
+		(((__x) - ((__d) / 2)) / (__d));	\
+})
+
 #endif /* _UAPI_LINUX_CONST_H */
-- 
cgit v1.2.3


From f8a5f6934f30b9ee334256347dd70a7ba0f8be7f Mon Sep 17 00:00:00 2001
From: Aldo Conte <aldocontelk@gmail.com>
Date: Wed, 11 Mar 2026 17:33:20 +0100
Subject: usb: typec: Document priority and mode_selection fields in struct
 typec_altmode

The fields 'priority' and 'mode_selection' in struct typec_altmode are
missing from the kernel-doc comment, which results in warnings when
building the documentation with 'make htmldocs'.

WARNING: ./include/linux/usb/typec_altmode.h:44 struct member 'priority' not described in 'typec_altmode'
WARNING: ./include/linux/usb/typec_altmode.h:44 struct member 'mode_selection' not described in 'typec_altmode'

Document both fields to keep the kernel-doc comment aligned with the
structure definition.

Signed-off-by: Aldo Conte <aldocontelk@gmail.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://patch.msgid.link/20260311163320.61534-1-aldocontelk@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/typec_altmode.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/typec_altmode.h b/include/linux/usb/typec_altmode.h
index 0513d333b797..b90cc5cfff8d 100644
--- a/include/linux/usb/typec_altmode.h
+++ b/include/linux/usb/typec_altmode.h
@@ -26,6 +26,9 @@ struct typec_altmode_ops;
  * @mode: Index of the Mode
  * @vdo: VDO returned by Discover Modes USB PD command
  * @active: Tells has the mode been entered or not
+ * @priority: Priority used by the automatic alternate mode selection process
+ * @mode_selection: Whether entry to this alternate mode is managed by the
+ * automatic alternate mode selection process or by the specific driver
  * @desc: Optional human readable description of the mode
  * @ops: Operations vector from the driver
  * @cable_ops: Cable operations vector from the driver.
-- 
cgit v1.2.3


From a43dd4f6f91ed1a1d16595cb0c550b283e9b2298 Mon Sep 17 00:00:00 2001
From: Badhri Jagan Sridharan <badhri@google.com>
Date: Mon, 16 Mar 2026 15:03:00 +0000
Subject: power: supply: Add PD SPR AVS support to USB type enum

Add two new members to the power_supply_usb_type to represent the
USB Power Delivery (PD) Standard Power Range (SPR) Adjustable Voltage
Supply (AVS) charging types:

POWER_SUPPLY_USB_TYPE_PD_SPR_AVS: For devices supporting only the
PD SPR AVS type.

POWER_SUPPLY_USB_TYPE_PD_PPS_SPR_AVS: For devices that support both
PD Programmable Power Supply (PPS) and PD SPR AVS.

Signed-off-by: Badhri Jagan Sridharan <badhri@google.com>
Link: https://patch.msgid.link/20260316150301.3892223-3-badhri@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-class-power | 3 ++-
 drivers/power/supply/power_supply_sysfs.c   | 2 ++
 include/linux/power_supply.h                | 3 +++
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power
index 4b21d5d23251..32697b926cc8 100644
--- a/Documentation/ABI/testing/sysfs-class-power
+++ b/Documentation/ABI/testing/sysfs-class-power
@@ -675,7 +675,8 @@ Description:
 
 		Valid values:
 			      "Unknown", "SDP", "DCP", "CDP", "ACA", "C", "PD",
-			      "PD_DRP", "PD_PPS", "BrickID"
+			      "PD_DRP", "PD_PPS", "BrickID", "PD_SPR_AVS",
+			      "PD_PPS_SPR_AVS"
 
 **Device Specific Properties**
 
diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c
index dd3a48d72d2b..f30a7b9ccd5e 100644
--- a/drivers/power/supply/power_supply_sysfs.c
+++ b/drivers/power/supply/power_supply_sysfs.c
@@ -70,6 +70,8 @@ static const char * const POWER_SUPPLY_USB_TYPE_TEXT[] = {
 	[POWER_SUPPLY_USB_TYPE_PD]		= "PD",
 	[POWER_SUPPLY_USB_TYPE_PD_DRP]		= "PD_DRP",
 	[POWER_SUPPLY_USB_TYPE_PD_PPS]		= "PD_PPS",
+	[POWER_SUPPLY_USB_TYPE_PD_SPR_AVS]	= "PD_SPR_AVS",
+	[POWER_SUPPLY_USB_TYPE_PD_PPS_SPR_AVS]	= "PD_PPS_SPR_AVS",
 	[POWER_SUPPLY_USB_TYPE_APPLE_BRICK_ID]	= "BrickID",
 };
 
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 360ffdf272da..7a5e4c3242a0 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -210,6 +210,9 @@ enum power_supply_usb_type {
 	POWER_SUPPLY_USB_TYPE_PD,		/* Power Delivery Port */
 	POWER_SUPPLY_USB_TYPE_PD_DRP,		/* PD Dual Role Port */
 	POWER_SUPPLY_USB_TYPE_PD_PPS,		/* PD Programmable Power Supply */
+	/* PD Standard Power Range Adjustable Voltage Supply */
+	POWER_SUPPLY_USB_TYPE_PD_SPR_AVS,
+	POWER_SUPPLY_USB_TYPE_PD_PPS_SPR_AVS,	/* Supports both PD PPS + SPR AVS */
 	POWER_SUPPLY_USB_TYPE_APPLE_BRICK_ID,	/* Apple Charging Method */
 };
 
-- 
cgit v1.2.3


From d3d959404e6c72e09db8de8893a970edf0ac565d Mon Sep 17 00:00:00 2001
From: Badhri Jagan Sridharan <badhri@google.com>
Date: Mon, 16 Mar 2026 15:03:01 +0000
Subject: tcpm: Implement sink support for PD SPR AVS negotiation

Add support to enable TCPM to negotiate with
USB PD Standard Power Range Adjustable Voltage Supply (SPR AVS) when
acting as a power sink.

* Added support to the tcpm power supply properties, allowing userspace
  to enable and control the dynamic limits (voltage and current)
  specific to the SPR AVS contract.
* Implemented tcpm_pd_select_spr_avs_apdo() to select the appropriate
  APDO and validate the requested voltage/current against both the
  Source and Sink capabilities.
* Implemented tcpm_pd_build_spr_avs_request() to construct the
  Request Data Object (RDO) for SPR AVS.
* Added SNK_NEGOTIATE_SPR_AVS_CAPABILITIES state to the state machine to
  handle negotiation for SPR AVS.
* Updated the SNK_TRANSITION_SINK state to implement the SPR
  AVS-specific VBUS transition rules, including reducing current draw to
  PD_I_SNK_STBY_MA for large voltage changes, as required by USB PD spec.

Log stub captured when enabling AVS:
$ echo 3 > /sys/class/power_supply/tcpm-source-psy-1-0025/online
$ cat /d/usb/tcpm-1-0025/log
[  358.895775] request to set AVS online
[  358.895792] AMS POWER_NEGOTIATION start
[  358.895806] state change SNK_READY -> AMS_START [rev3 POWER_NEGOTIATION]
[  358.895850] state change AMS_START -> SNK_NEGOTIATE_SPR_AVS_CAPABILITIES [rev3 POWER_NEGOTIATION]
[  358.895866] SPR AVS src_pdo_index:4 snk_pdo_index:2 req_op_curr_ma roundup:2200 req_out_volt_mv roundup:9000
[  358.895880] Requesting APDO SPR AVS 4: 9000 mV, 2200 mA
[  358.896405] set_auto_vbus_discharge_threshold mode:0 pps_active:n vbus:0 pps_apdo_min_volt:0 ret:0
[  358.896422] PD TX, header: 0x1a82
[  358.900158] PD TX complete, status: 0
[  358.900205] pending state change SNK_NEGOTIATE_SPR_AVS_CAPABILITIES -> HARD_RESET_SEND @ 60 ms [rev3 POWER_NEGOTIATION]
[  358.904832] PD RX, header: 0x1a3 [1]
[  358.904854] state change SNK_NEGOTIATE_SPR_AVS_CAPABILITIES -> SNK_TRANSITION_SINK [rev3 POWER_NEGOTIATION]
[  358.904888] pending state change SNK_TRANSITION_SINK -> HARD_RESET_SEND @ 700 ms [rev3 POWER_NEGOTIATION]
[  359.021530] PD RX, header: 0x3a6 [1]
[  359.021546] Setting voltage/current limit 9000 mV 2200 mA
[  359.023035] set_auto_vbus_discharge_threshold mode:3 pps_active:n vbus:9000 pps_apdo_min_volt:0 ret:0
[  359.023053] state change SNK_TRANSITION_SINK -> SNK_READY [rev3 POWER_NEGOTIATION]
[  359.023090] AMS POWER_NEGOTIATION finished

$ cat /sys/class/power_supply/tcpm-source-psy-1-0025/online
3

Log stub captured when increasing voltage:
$ echo 9100000 > /sys/class/power_supply/tcpm-source-psy-1-0025/voltage_now
$ cat /d/usb/tcpm-1-0025/log

[  632.116714] AMS POWER_NEGOTIATION start
[  632.116728] state change SNK_READY -> AMS_START [rev3 POWER_NEGOTIATION]
[  632.116779] state change AMS_START -> SNK_NEGOTIATE_SPR_AVS_CAPABILITIES [rev3 POWER_NEGOTIATION]
[  632.116798] SPR AVS src_pdo_index:4 snk_pdo_index:2 req_op_curr_ma roundup:2200 req_out_volt_mv roundup:9100
[  632.116811] Requesting APDO SPR AVS 4: 9100 mV, 2200 mA
[  632.117315] set_auto_vbus_discharge_threshold mode:0 pps_active:n vbus:0 pps_apdo_min_volt:0 ret:0
[  632.117328] PD TX, header: 0x1c82
[  632.121007] PD TX complete, status: 0
[  632.121052] pending state change SNK_NEGOTIATE_SPR_AVS_CAPABILITIES -> HARD_RESET_SEND @ 60 ms [rev3 POWER_NEGOTIATION]
[  632.124572] PD RX, header: 0x5a3 [1]
[  632.124594] state change SNK_NEGOTIATE_SPR_AVS_CAPABILITIES -> SNK_TRANSITION_SINK [rev3 POWER_NEGOTIATION]
[  632.124623] pending state change SNK_TRANSITION_SINK -> HARD_RESET_SEND @ 700 ms [rev3 POWER_NEGOTIATION]
[  632.149256] PD RX, header: 0x7a6 [1]
[  632.149271] Setting voltage/current limit 9100 mV 2200 mA
[  632.150770] set_auto_vbus_discharge_threshold mode:3 pps_active:n vbus:9100 pps_apdo_min_volt:0 ret:0
[  632.150787] state change SNK_TRANSITION_SINK -> SNK_READY [rev3 POWER_NEGOTIATION]
[  632.150823] AMS POWER_NEGOTIATION finished

$ cat /sys/class/power_supply/tcpm-source-psy-1-0025/voltage_now
9100000

Signed-off-by: Badhri Jagan Sridharan <badhri@google.com>
Reviewed-by: Amit Sunil Dhamne <amitsd@google.com>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://patch.msgid.link/20260316150301.3892223-4-badhri@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 611 +++++++++++++++++++++++++++++++++++-------
 include/linux/usb/pd.h        |  32 ++-
 include/linux/usb/tcpm.h      |   2 +-
 3 files changed, 537 insertions(+), 108 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 63a75b94743d..dfbb94ddc98a 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -62,6 +62,7 @@
 	S(SNK_WAIT_CAPABILITIES_TIMEOUT),	\
 	S(SNK_NEGOTIATE_CAPABILITIES),		\
 	S(SNK_NEGOTIATE_PPS_CAPABILITIES),	\
+	S(SNK_NEGOTIATE_SPR_AVS_CAPABILITIES),	\
 	S(SNK_TRANSITION_SINK),			\
 	S(SNK_TRANSITION_SINK_VBUS),		\
 	S(SNK_READY),				\
@@ -308,6 +309,51 @@ struct pd_pps_data {
 	bool active;
 };
 
+enum spr_avs_status {
+	SPR_AVS_UNKNOWN,
+	SPR_AVS_NOT_SUPPORTED,
+	SPR_AVS_SUPPORTED
+};
+
+static const char * const spr_avs_status_strings[] = {
+	[SPR_AVS_UNKNOWN]	= "Unknown",
+	[SPR_AVS_SUPPORTED]	= "Supported",
+	[SPR_AVS_NOT_SUPPORTED]	= "Not Supported",
+};
+
+/*
+ * Standard Power Range Adjustable Voltage Supply (SPR - AVS) data
+ * @max_current_ma_9v_to_15v: Max current for 9V to 15V range derived from
+ *                            source cap & sink cap
+ * @max_current_ma_15v_to_20v: Max current for 15V to 20V range derived from
+ *                             source cap & sink cap
+ * @req_op_curr_ma: Requested operating current to the port partner acting as source
+ * @req_out_volt_mv: Requested output voltage to the port partner acting as source
+ * @max_out_volt_mv: Max SPR voltage supported by the port and the port partner
+ * @max_current_ma; MAX SPR current supported by the port and the port partner
+ * @port_partner_src_status: SPR AVS status of port partner acting as source
+ * @port_partner_src_pdo_index: PDO index of SPR AVS cap of the port partner
+ *                              acting as source. Valid only when
+ *                              port_partner_src_status is SPR_AVS_SUPPORTED.
+ * @port_snk_status: SPR AVS status of the local port acting as sink.
+ * @port_snk_pdo_index: PDO index of SPR AVS cap of local port acting as sink
+ * @active: True when the local port acting as the sink has negotiated SPR AVS
+ *          with the partner acting as source.
+ */
+struct pd_spr_avs_data {
+	u32 max_current_ma_9v_to_15v;
+	u32 max_current_ma_15v_to_20v;
+	u32 req_op_curr_ma;
+	u32 req_out_volt_mv;
+	u32 max_out_volt_mv;
+	u32 max_current_ma;
+	enum spr_avs_status port_partner_src_status;
+	unsigned int port_partner_src_pdo_index;
+	enum spr_avs_status port_snk_status;
+	unsigned int port_snk_pdo_index;
+	bool active;
+};
+
 struct pd_data {
 	struct usb_power_delivery *pd;
 	struct usb_power_delivery_capabilities *source_cap;
@@ -376,6 +422,11 @@ struct sink_caps_ext_data {
 	u8 spr_max_pdp;
 };
 
+enum aug_req_type {
+	PD_PPS,
+	PD_SPR_AVS,
+};
+
 struct tcpm_port {
 	struct device *dev;
 
@@ -538,9 +589,14 @@ struct tcpm_port {
 
 	/* PPS */
 	struct pd_pps_data pps_data;
-	struct completion pps_complete;
-	bool pps_pending;
-	int pps_status;
+
+	/* SPR AVS */
+	struct pd_spr_avs_data spr_avs_data;
+
+	/* Augmented supply request - PPS; SPR_AVS */
+	struct completion aug_supply_req_complete;
+	bool aug_supply_req_pending;
+	int aug_supply_req_status;
 
 	/* Alternate mode data */
 	struct pd_mode_data mode_data;
@@ -3285,6 +3341,7 @@ static void tcpm_pd_data_request(struct tcpm_port *port,
 
 	switch (type) {
 	case PD_DATA_SOURCE_CAP:
+		port->spr_avs_data.port_partner_src_status = SPR_AVS_UNKNOWN;
 		for (i = 0; i < cnt; i++)
 			port->source_caps[i] = le32_to_cpu(msg->payload[i]);
 
@@ -3456,12 +3513,12 @@ static void tcpm_pd_data_request(struct tcpm_port *port,
 	}
 }
 
-static void tcpm_pps_complete(struct tcpm_port *port, int result)
+static void tcpm_aug_supply_req_complete(struct tcpm_port *port, int result)
 {
-	if (port->pps_pending) {
-		port->pps_status = result;
-		port->pps_pending = false;
-		complete(&port->pps_complete);
+	if (port->aug_supply_req_pending) {
+		port->aug_supply_req_status = result;
+		port->aug_supply_req_pending = false;
+		complete(&port->aug_supply_req_complete);
 	}
 }
 
@@ -3559,7 +3616,7 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
 			/* Revert data back from any requested PPS updates */
 			port->pps_data.req_out_volt = port->supply_voltage;
 			port->pps_data.req_op_curr = port->current_limit;
-			port->pps_status = (type == PD_CTRL_WAIT ?
+			port->aug_supply_req_status = (type == PD_CTRL_WAIT ?
 					    -EAGAIN : -EOPNOTSUPP);
 
 			/* Threshold was relaxed before sending Request. Restore it back. */
@@ -3567,6 +3624,20 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
 							       port->pps_data.active,
 							       port->supply_voltage);
 
+			tcpm_set_state(port, SNK_READY, 0);
+			break;
+		case SNK_NEGOTIATE_SPR_AVS_CAPABILITIES:
+			/* Revert data back from any requested SPR AVS updates */
+			port->spr_avs_data.req_out_volt_mv = port->supply_voltage;
+			port->spr_avs_data.req_op_curr_ma = port->current_limit;
+			port->aug_supply_req_status = (type == PD_CTRL_WAIT ?
+					      -EAGAIN : -EOPNOTSUPP);
+
+			/* Threshold was relaxed before sending Request. Restore it back. */
+			tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_PD,
+							       port->spr_avs_data.active,
+							       port->supply_voltage);
+
 			tcpm_set_state(port, SNK_READY, 0);
 			break;
 		case DR_SWAP_SEND:
@@ -3621,6 +3692,7 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
 		switch (port->state) {
 		case SNK_NEGOTIATE_CAPABILITIES:
 			port->pps_data.active = false;
+			port->spr_avs_data.active = false;
 			tcpm_set_state(port, SNK_TRANSITION_SINK, 0);
 			break;
 		case SNK_NEGOTIATE_PPS_CAPABILITIES:
@@ -3633,6 +3705,13 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
 			power_supply_changed(port->psy);
 			tcpm_set_state(port, SNK_TRANSITION_SINK, 0);
 			break;
+		case SNK_NEGOTIATE_SPR_AVS_CAPABILITIES:
+			port->spr_avs_data.active = true;
+			port->req_supply_voltage = port->spr_avs_data.req_out_volt_mv;
+			port->req_current_limit = port->spr_avs_data.req_op_curr_ma;
+			power_supply_changed(port->psy);
+			tcpm_set_state(port, SNK_TRANSITION_SINK, 0);
+			break;
 		case SOFT_RESET_SEND:
 			if (port->ams == SOFT_RESET_AMS)
 				tcpm_ams_finish(port);
@@ -4130,9 +4209,9 @@ static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo,
 		case PDO_TYPE_APDO:
 			if (pdo_apdo_type(pdo) == APDO_TYPE_PPS) {
 				port->pps_data.supported = true;
-				port->usb_type =
-					POWER_SUPPLY_USB_TYPE_PD_PPS;
-				power_supply_changed(port->psy);
+			} else if (pdo_apdo_type(pdo) == APDO_TYPE_SPR_AVS) {
+				port->spr_avs_data.port_partner_src_status = SPR_AVS_SUPPORTED;
+				port->spr_avs_data.port_partner_src_pdo_index = i;
 			}
 			continue;
 		default:
@@ -4170,6 +4249,10 @@ static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo,
 				min_snk_mv = pdo_min_voltage(pdo);
 				break;
 			case PDO_TYPE_APDO:
+				if (pdo_apdo_type(pdo) == APDO_TYPE_SPR_AVS) {
+					port->spr_avs_data.port_snk_status = SPR_AVS_SUPPORTED;
+					port->spr_avs_data.port_snk_pdo_index = j;
+				}
 				continue;
 			default:
 				tcpm_log(port, "Invalid sink PDO type, ignoring");
@@ -4191,6 +4274,23 @@ static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo,
 		}
 	}
 
+	if (port->spr_avs_data.port_snk_status == SPR_AVS_UNKNOWN)
+		port->spr_avs_data.port_snk_status = SPR_AVS_NOT_SUPPORTED;
+
+	if (port->spr_avs_data.port_partner_src_status == SPR_AVS_UNKNOWN)
+		port->spr_avs_data.port_partner_src_status = SPR_AVS_NOT_SUPPORTED;
+
+	if (port->pps_data.supported &&
+	    port->spr_avs_data.port_partner_src_status == SPR_AVS_SUPPORTED)
+		port->usb_type = POWER_SUPPLY_USB_TYPE_PD_PPS_SPR_AVS;
+	else if (port->pps_data.supported)
+		port->usb_type = POWER_SUPPLY_USB_TYPE_PD_PPS;
+	else if (port->spr_avs_data.port_partner_src_status == SPR_AVS_SUPPORTED)
+		port->usb_type = POWER_SUPPLY_USB_TYPE_PD_SPR_AVS;
+
+	if (port->usb_type != POWER_SUPPLY_USB_TYPE_PD)
+		power_supply_changed(port->psy);
+
 	return ret;
 }
 
@@ -4241,6 +4341,88 @@ static unsigned int tcpm_pd_select_pps_apdo(struct tcpm_port *port)
 	return src_pdo;
 }
 
+static int tcpm_pd_select_spr_avs_apdo(struct tcpm_port *port)
+{
+	u32 req_out_volt_mv, req_op_curr_ma, src_max_curr_ma = 0, source_cap;
+	u32 snk_max_curr_ma = 0, src_pdo_index, snk_pdo_index, snk_pdo;
+
+	if (port->spr_avs_data.port_snk_status != SPR_AVS_SUPPORTED ||
+	    port->spr_avs_data.port_partner_src_status !=
+	    SPR_AVS_SUPPORTED) {
+		tcpm_log(port, "SPR AVS not supported. port:%s partner:%s",
+			 spr_avs_status_strings[port->spr_avs_data.port_snk_status],
+			 spr_avs_status_strings[port->spr_avs_data.port_partner_src_status]);
+		return -EOPNOTSUPP;
+	}
+
+	/* Round up to SPR_AVS_VOLT_MV_STEP */
+	req_out_volt_mv = port->spr_avs_data.req_out_volt_mv;
+	if (req_out_volt_mv % SPR_AVS_VOLT_MV_STEP) {
+		req_out_volt_mv += SPR_AVS_VOLT_MV_STEP -
+			(req_out_volt_mv % SPR_AVS_VOLT_MV_STEP);
+		port->spr_avs_data.req_out_volt_mv = req_out_volt_mv;
+	}
+
+	/* Round up to RDO_SPR_AVS_CURR_MA_STEP */
+	req_op_curr_ma = port->spr_avs_data.req_op_curr_ma;
+	if (req_op_curr_ma % RDO_SPR_AVS_CURR_MA_STEP) {
+		req_op_curr_ma += RDO_SPR_AVS_CURR_MA_STEP -
+			(req_op_curr_ma % RDO_SPR_AVS_CURR_MA_STEP);
+		port->spr_avs_data.req_op_curr_ma = req_op_curr_ma;
+	}
+
+	src_pdo_index = port->spr_avs_data.port_partner_src_pdo_index;
+	snk_pdo_index = port->spr_avs_data.port_snk_pdo_index;
+	source_cap = port->source_caps[src_pdo_index];
+	snk_pdo = port->snk_pdo[snk_pdo_index];
+	tcpm_log(port,
+		 "SPR AVS src_pdo_index:%d snk_pdo_index:%d req_op_curr_ma roundup:%u req_out_volt_mv roundup:%u",
+		 src_pdo_index, snk_pdo_index, req_op_curr_ma, req_out_volt_mv);
+
+	if (req_out_volt_mv >= SPR_AVS_TIER1_MIN_VOLT_MV &&
+	    req_out_volt_mv <= SPR_AVS_TIER1_MAX_VOLT_MV) {
+		src_max_curr_ma =
+			pdo_spr_avs_apdo_9v_to_15v_max_current_ma(source_cap);
+		snk_max_curr_ma =
+			pdo_spr_avs_apdo_9v_to_15v_max_current_ma(snk_pdo);
+	} else if (req_out_volt_mv > SPR_AVS_TIER1_MAX_VOLT_MV &&
+		   req_out_volt_mv <= SPR_AVS_TIER2_MAX_VOLT_MV) {
+		src_max_curr_ma =
+			pdo_spr_avs_apdo_15v_to_20v_max_current_ma(source_cap);
+		snk_max_curr_ma =
+			pdo_spr_avs_apdo_15v_to_20v_max_current_ma(snk_pdo);
+	} else {
+		tcpm_log(port, "Invalid SPR AVS req_volt:%umV", req_out_volt_mv);
+		return -EINVAL;
+	}
+
+	if (req_op_curr_ma > src_max_curr_ma ||
+	    req_op_curr_ma > snk_max_curr_ma) {
+		tcpm_log(port,
+			 "Invalid SPR AVS request. req_volt:%umV req_curr:%umA src_max_cur:%umA snk_max_cur:%umA",
+			 req_out_volt_mv, req_op_curr_ma, src_max_curr_ma,
+			 snk_max_curr_ma);
+		return -EINVAL;
+	}
+
+	/* Max SPR voltage based on both the port and the partner caps */
+	if (pdo_spr_avs_apdo_15v_to_20v_max_current_ma(snk_pdo) &&
+	    pdo_spr_avs_apdo_15v_to_20v_max_current_ma(source_cap))
+		port->spr_avs_data.max_out_volt_mv = SPR_AVS_TIER2_MAX_VOLT_MV;
+	else
+		port->spr_avs_data.max_out_volt_mv = SPR_AVS_TIER1_MAX_VOLT_MV;
+
+	/*
+	 * Max SPR AVS curr based on 9V to 15V. This should be higher than or
+	 * equal to 15V to 20V range.
+	 */
+	port->spr_avs_data.max_current_ma =
+		min(pdo_spr_avs_apdo_9v_to_15v_max_current_ma(source_cap),
+		    pdo_spr_avs_apdo_9v_to_15v_max_current_ma(snk_pdo));
+
+	return src_pdo_index;
+}
+
 static int tcpm_pd_build_request(struct tcpm_port *port, u32 *rdo)
 {
 	unsigned int mv, ma, mw, flags;
@@ -4408,13 +4590,74 @@ static int tcpm_pd_build_pps_request(struct tcpm_port *port, u32 *rdo)
 	return 0;
 }
 
-static int tcpm_pd_send_pps_request(struct tcpm_port *port)
+static int tcpm_pd_build_spr_avs_request(struct tcpm_port *port, u32 *rdo)
+{
+	u32 out_mv, op_ma, flags, snk_pdo_index, source_cap;
+	unsigned int src_power_mw, snk_power_mw;
+	int src_pdo_index;
+	u32 snk_pdo;
+
+	src_pdo_index = tcpm_pd_select_spr_avs_apdo(port);
+	if (src_pdo_index < 0)
+		return src_pdo_index;
+	snk_pdo_index = port->spr_avs_data.port_snk_pdo_index;
+	source_cap = port->source_caps[src_pdo_index];
+	snk_pdo = port->snk_pdo[snk_pdo_index];
+	out_mv = port->spr_avs_data.req_out_volt_mv;
+	op_ma = port->spr_avs_data.req_op_curr_ma;
+
+	flags = RDO_USB_COMM | RDO_NO_SUSPEND;
+
+	/*
+	 * Set capability mismatch when the maximum power needs in the current
+	 * requested AVS voltage tier range is greater than
+	 * port->operating_snk_mw, however, the maximum power offered by the
+	 * source at the current requested AVS voltage tier is less than
+	 * port->operating_sink_mw.
+	 */
+	if (out_mv > SPR_AVS_TIER1_MAX_VOLT_MV) {
+		src_power_mw =
+			pdo_spr_avs_apdo_15v_to_20v_max_current_ma(source_cap) *
+			SPR_AVS_TIER2_MAX_VOLT_MV / 1000;
+		snk_power_mw =
+			pdo_spr_avs_apdo_15v_to_20v_max_current_ma(snk_pdo) *
+			SPR_AVS_TIER2_MAX_VOLT_MV / 1000;
+	} else {
+		src_power_mw =
+			pdo_spr_avs_apdo_9v_to_15v_max_current_ma(source_cap) *
+			SPR_AVS_TIER1_MAX_VOLT_MV / 1000;
+		snk_power_mw =
+			pdo_spr_avs_apdo_9v_to_15v_max_current_ma(snk_pdo) *
+			SPR_AVS_TIER1_MAX_VOLT_MV / 1000;
+	}
+
+	if (snk_power_mw >= port->operating_snk_mw &&
+	    src_power_mw < port->operating_snk_mw)
+		flags |= RDO_CAP_MISMATCH;
+
+	*rdo = RDO_AVS(src_pdo_index + 1, out_mv, op_ma, flags);
+
+	tcpm_log(port, "Requesting APDO SPR AVS %d: %u mV, %u mA",
+		 src_pdo_index, out_mv, op_ma);
+
+	return 0;
+}
+
+static int tcpm_pd_send_aug_supply_request(struct tcpm_port *port,
+					   enum aug_req_type type)
 {
 	struct pd_message msg;
 	int ret;
 	u32 rdo;
 
-	ret = tcpm_pd_build_pps_request(port, &rdo);
+	if (type == PD_PPS) {
+		ret = tcpm_pd_build_pps_request(port, &rdo);
+	} else if (type == PD_SPR_AVS) {
+		ret = tcpm_pd_build_spr_avs_request(port, &rdo);
+	} else {
+		tcpm_log(port, "Invalid aug_req_type %d", type);
+		ret = -EOPNOTSUPP;
+	}
 	if (ret < 0)
 		return ret;
 
@@ -4637,6 +4880,14 @@ static void tcpm_set_partner_usb_comm_capable(struct tcpm_port *port, bool capab
 		port->tcpc->set_partner_usb_comm_capable(port->tcpc, capable);
 }
 
+static void tcpm_partner_source_caps_reset(struct tcpm_port *port)
+{
+	usb_power_delivery_unregister_capabilities(port->partner_source_caps);
+	port->partner_source_caps = NULL;
+	port->spr_avs_data.port_partner_src_status = SPR_AVS_UNKNOWN;
+	port->spr_avs_data.active = false;
+}
+
 static void tcpm_reset_port(struct tcpm_port *port)
 {
 	tcpm_enable_auto_vbus_discharge(port, false);
@@ -4676,8 +4927,7 @@ static void tcpm_reset_port(struct tcpm_port *port)
 
 	usb_power_delivery_unregister_capabilities(port->partner_sink_caps);
 	port->partner_sink_caps = NULL;
-	usb_power_delivery_unregister_capabilities(port->partner_source_caps);
-	port->partner_source_caps = NULL;
+	tcpm_partner_source_caps_reset(port);
 	usb_power_delivery_unregister(port->partner_pd);
 	port->partner_pd = NULL;
 }
@@ -5169,7 +5419,7 @@ static void run_state_machine(struct tcpm_port *port)
 	case SNK_UNATTACHED:
 		if (!port->non_pd_role_swap)
 			tcpm_swap_complete(port, -ENOTCONN);
-		tcpm_pps_complete(port, -ENOTCONN);
+		tcpm_aug_supply_req_complete(port, -ENOTCONN);
 		tcpm_snk_detach(port);
 		if (port->potential_contaminant) {
 			tcpm_set_state(port, CHECK_CONTAMINANT, 0);
@@ -5400,13 +5650,16 @@ static void run_state_machine(struct tcpm_port *port)
 		}
 		break;
 	case SNK_NEGOTIATE_PPS_CAPABILITIES:
-		ret = tcpm_pd_send_pps_request(port);
+	case SNK_NEGOTIATE_SPR_AVS_CAPABILITIES:
+		ret = tcpm_pd_send_aug_supply_request(port, port->state ==
+						      SNK_NEGOTIATE_PPS_CAPABILITIES ?
+						      PD_PPS : PD_SPR_AVS);
 		if (ret < 0) {
 			/* Restore back to the original state */
 			tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_PD,
 							       port->pps_data.active,
 							       port->supply_voltage);
-			port->pps_status = ret;
+			port->aug_supply_req_status = ret;
 			/*
 			 * If this was called due to updates to sink
 			 * capabilities, and pps is no longer valid, we should
@@ -5422,23 +5675,58 @@ static void run_state_machine(struct tcpm_port *port)
 		}
 		break;
 	case SNK_TRANSITION_SINK:
-		/* From the USB PD spec:
-		 * "The Sink Shall transition to Sink Standby before a positive or
-		 * negative voltage transition of VBUS. During Sink Standby
-		 * the Sink Shall reduce its power draw to pSnkStdby."
-		 *
-		 * This is not applicable to PPS though as the port can continue
-		 * to draw negotiated power without switching to standby.
-		 */
-		if (port->supply_voltage != port->req_supply_voltage && !port->pps_data.active &&
-		    port->current_limit * port->supply_voltage / 1000 > PD_P_SNK_STDBY_MW) {
-			u32 stdby_ma = PD_P_SNK_STDBY_MW * 1000 / port->supply_voltage;
+		if (port->spr_avs_data.active) {
+			if (abs(port->req_supply_voltage - port->supply_voltage) >
+			    SPR_AVS_AVS_SMALL_STEP_V * 1000) {
+				/*
+				 * The Sink Shall reduce its current draw to
+				 * iSnkStdby within tSnkStdby. The reduction to
+				 * iSnkStdby is not required if the voltage
+				 * increase is less than or equal to
+				 * vAvsSmallStep.
+				 */
+				tcpm_log(port,
+					 "SPR AVS Setting iSnkstandby. Req vol: %u mV Curr vol: %u mV",
+					 port->req_supply_voltage,
+					 port->supply_voltage);
+				tcpm_set_current_limit(port, PD_I_SNK_STBY_MA,
+						       port->supply_voltage);
+			}
+			/*
+			 * Although tAvsSrcTransSmall is expected to be used
+			 * for voltage transistions smaller than 1V, using
+			 * tAvsSrcTransLarge to be resilient against chargers
+			 * which strictly cannot honor tAvsSrcTransSmall to
+			 * improve interoperability.
+			 */
+			tcpm_set_state(port, hard_reset_state(port),
+				       PD_T_AVS_SRC_TRANS_LARGE);
+			/*
+			 * From the USB PD spec:
+			 * "The Sink Shall transition to Sink Standby before a
+			 * positive ornegative voltage transition of VBUS.
+			 * During Sink Standby the Sink Shall reduce its power
+			 * draw to pSnkStdby."
+			 *
+			 * This is not applicable to PPS though as the port can
+			 * continue to draw negotiated power without switching
+			 * to standby.
+			 */
+		} else if (port->supply_voltage != port->req_supply_voltage &&
+			   !port->pps_data.active &&
+			   (port->current_limit * port->supply_voltage / 1000 >
+			   PD_P_SNK_STDBY_MW)) {
+			u32 stdby_ma = PD_P_SNK_STDBY_MW * 1000 /
+				port->supply_voltage;
 
 			tcpm_log(port, "Setting standby current %u mV @ %u mA",
 				 port->supply_voltage, stdby_ma);
-			tcpm_set_current_limit(port, stdby_ma, port->supply_voltage);
+			tcpm_set_current_limit(port, stdby_ma,
+					       port->supply_voltage);
+			tcpm_set_state(port, hard_reset_state(port),
+				       PD_T_PS_TRANSITION);
 		}
-		fallthrough;
+		break;
 	case SNK_TRANSITION_SINK_VBUS:
 		tcpm_set_state(port, hard_reset_state(port),
 			       PD_T_PS_TRANSITION);
@@ -5458,7 +5746,7 @@ static void run_state_machine(struct tcpm_port *port)
 		tcpm_typec_connect(port);
 		if (port->pd_capable && port->source_caps[0] & PDO_FIXED_DUAL_ROLE)
 			mod_enable_frs_delayed_work(port, 0);
-		tcpm_pps_complete(port, port->pps_status);
+		tcpm_aug_supply_req_complete(port, port->aug_supply_req_status);
 
 		if (port->ams != NONE_AMS)
 			tcpm_ams_finish(port);
@@ -5645,8 +5933,7 @@ static void run_state_machine(struct tcpm_port *port)
 		port->message_id = 0;
 		port->rx_msgid = -1;
 		/* remove existing capabilities */
-		usb_power_delivery_unregister_capabilities(port->partner_source_caps);
-		port->partner_source_caps = NULL;
+		tcpm_partner_source_caps_reset(port);
 		tcpm_pd_send_control(port, PD_CTRL_ACCEPT, TCPC_TX_SOP);
 		tcpm_ams_finish(port);
 		if (port->pwr_role == TYPEC_SOURCE) {
@@ -5679,8 +5966,7 @@ static void run_state_machine(struct tcpm_port *port)
 			port->message_id = 0;
 			port->rx_msgid = -1;
 			/* remove existing capabilities */
-			usb_power_delivery_unregister_capabilities(port->partner_source_caps);
-			port->partner_source_caps = NULL;
+			tcpm_partner_source_caps_reset(port);
 			if (tcpm_pd_send_control(port, PD_CTRL_SOFT_RESET, TCPC_TX_SOP))
 				tcpm_set_state_cond(port, hard_reset_state(port), 0);
 			else
@@ -5817,8 +6103,7 @@ static void run_state_machine(struct tcpm_port *port)
 		break;
 	case PR_SWAP_SNK_SRC_SINK_OFF:
 		/* will be source, remove existing capabilities */
-		usb_power_delivery_unregister_capabilities(port->partner_source_caps);
-		port->partner_source_caps = NULL;
+		tcpm_partner_source_caps_reset(port);
 		/*
 		 * Prevent vbus discharge circuit from turning on during PR_SWAP
 		 * as this is not a disconnect.
@@ -5966,7 +6251,7 @@ static void run_state_machine(struct tcpm_port *port)
 		break;
 	case ERROR_RECOVERY:
 		tcpm_swap_complete(port, -EPROTO);
-		tcpm_pps_complete(port, -EPROTO);
+		tcpm_aug_supply_req_complete(port, -EPROTO);
 		tcpm_set_state(port, PORT_RESET, 0);
 		break;
 	case PORT_RESET:
@@ -6940,7 +7225,7 @@ static int tcpm_try_role(struct typec_port *p, int role)
 	return ret;
 }
 
-static int tcpm_pps_set_op_curr(struct tcpm_port *port, u16 req_op_curr)
+static int tcpm_aug_set_op_curr(struct tcpm_port *port, u16 req_op_curr_ma)
 {
 	unsigned int target_mw;
 	int ret;
@@ -6948,7 +7233,19 @@ static int tcpm_pps_set_op_curr(struct tcpm_port *port, u16 req_op_curr)
 	mutex_lock(&port->swap_lock);
 	mutex_lock(&port->lock);
 
-	if (!port->pps_data.active) {
+	if (port->pps_data.active) {
+		req_op_curr_ma = req_op_curr_ma -
+				 (req_op_curr_ma % RDO_PROG_CURR_MA_STEP);
+		if (req_op_curr_ma > port->pps_data.max_curr) {
+			ret = -EINVAL;
+			goto port_unlock;
+		}
+		target_mw = (req_op_curr_ma * port->supply_voltage) / 1000;
+		if (target_mw < port->operating_snk_mw) {
+			ret = -EINVAL;
+			goto port_unlock;
+		}
+	} else if (!port->spr_avs_data.active) {
 		ret = -EOPNOTSUPP;
 		goto port_unlock;
 	}
@@ -6958,38 +7255,31 @@ static int tcpm_pps_set_op_curr(struct tcpm_port *port, u16 req_op_curr)
 		goto port_unlock;
 	}
 
-	if (req_op_curr > port->pps_data.max_curr) {
-		ret = -EINVAL;
-		goto port_unlock;
-	}
-
-	target_mw = (req_op_curr * port->supply_voltage) / 1000;
-	if (target_mw < port->operating_snk_mw) {
-		ret = -EINVAL;
-		goto port_unlock;
-	}
+	if (port->pps_data.active)
+		port->upcoming_state = SNK_NEGOTIATE_PPS_CAPABILITIES;
+	else
+		port->upcoming_state = SNK_NEGOTIATE_SPR_AVS_CAPABILITIES;
 
-	port->upcoming_state = SNK_NEGOTIATE_PPS_CAPABILITIES;
 	ret = tcpm_ams_start(port, POWER_NEGOTIATION);
 	if (ret == -EAGAIN) {
 		port->upcoming_state = INVALID_STATE;
 		goto port_unlock;
 	}
 
-	/* Round down operating current to align with PPS valid steps */
-	req_op_curr = req_op_curr - (req_op_curr % RDO_PROG_CURR_MA_STEP);
-
-	reinit_completion(&port->pps_complete);
-	port->pps_data.req_op_curr = req_op_curr;
-	port->pps_status = 0;
-	port->pps_pending = true;
+	reinit_completion(&port->aug_supply_req_complete);
+	if (port->pps_data.active)
+		port->pps_data.req_op_curr = req_op_curr_ma;
+	else
+		port->spr_avs_data.req_op_curr_ma = req_op_curr_ma;
+	port->aug_supply_req_status = 0;
+	port->aug_supply_req_pending = true;
 	mutex_unlock(&port->lock);
 
-	if (!wait_for_completion_timeout(&port->pps_complete,
-				msecs_to_jiffies(PD_PPS_CTRL_TIMEOUT)))
+	if (!wait_for_completion_timeout(&port->aug_supply_req_complete,
+					 msecs_to_jiffies(PD_AUG_PSY_CTRL_TIMEOUT)))
 		ret = -ETIMEDOUT;
 	else
-		ret = port->pps_status;
+		ret = port->aug_supply_req_status;
 
 	goto swap_unlock;
 
@@ -7001,7 +7291,7 @@ swap_unlock:
 	return ret;
 }
 
-static int tcpm_pps_set_out_volt(struct tcpm_port *port, u16 req_out_volt)
+static int tcpm_aug_set_out_volt(struct tcpm_port *port, u16 req_out_volt_mv)
 {
 	unsigned int target_mw;
 	int ret;
@@ -7009,7 +7299,16 @@ static int tcpm_pps_set_out_volt(struct tcpm_port *port, u16 req_out_volt)
 	mutex_lock(&port->swap_lock);
 	mutex_lock(&port->lock);
 
-	if (!port->pps_data.active) {
+	if (port->pps_data.active) {
+		req_out_volt_mv = req_out_volt_mv - (req_out_volt_mv %
+						     RDO_PROG_VOLT_MV_STEP);
+		/* Round down output voltage to align with PPS valid steps */
+		target_mw = (port->current_limit * req_out_volt_mv) / 1000;
+		if (target_mw < port->operating_snk_mw) {
+			ret = -EINVAL;
+			goto port_unlock;
+		}
+	} else if (!port->spr_avs_data.active) {
 		ret = -EOPNOTSUPP;
 		goto port_unlock;
 	}
@@ -7019,33 +7318,31 @@ static int tcpm_pps_set_out_volt(struct tcpm_port *port, u16 req_out_volt)
 		goto port_unlock;
 	}
 
-	target_mw = (port->current_limit * req_out_volt) / 1000;
-	if (target_mw < port->operating_snk_mw) {
-		ret = -EINVAL;
-		goto port_unlock;
-	}
+	if (port->pps_data.active)
+		port->upcoming_state = SNK_NEGOTIATE_PPS_CAPABILITIES;
+	else
+		port->upcoming_state = SNK_NEGOTIATE_SPR_AVS_CAPABILITIES;
 
-	port->upcoming_state = SNK_NEGOTIATE_PPS_CAPABILITIES;
 	ret = tcpm_ams_start(port, POWER_NEGOTIATION);
 	if (ret == -EAGAIN) {
 		port->upcoming_state = INVALID_STATE;
 		goto port_unlock;
 	}
 
-	/* Round down output voltage to align with PPS valid steps */
-	req_out_volt = req_out_volt - (req_out_volt % RDO_PROG_VOLT_MV_STEP);
-
-	reinit_completion(&port->pps_complete);
-	port->pps_data.req_out_volt = req_out_volt;
-	port->pps_status = 0;
-	port->pps_pending = true;
+	reinit_completion(&port->aug_supply_req_complete);
+	if (port->pps_data.active)
+		port->pps_data.req_out_volt = req_out_volt_mv;
+	else
+		port->spr_avs_data.req_out_volt_mv = req_out_volt_mv;
+	port->aug_supply_req_status = 0;
+	port->aug_supply_req_pending = true;
 	mutex_unlock(&port->lock);
 
-	if (!wait_for_completion_timeout(&port->pps_complete,
-				msecs_to_jiffies(PD_PPS_CTRL_TIMEOUT)))
+	if (!wait_for_completion_timeout(&port->aug_supply_req_complete,
+					 msecs_to_jiffies(PD_AUG_PSY_CTRL_TIMEOUT)))
 		ret = -ETIMEDOUT;
 	else
-		ret = port->pps_status;
+		ret = port->aug_supply_req_status;
 
 	goto swap_unlock;
 
@@ -7088,9 +7385,9 @@ static int tcpm_pps_activate(struct tcpm_port *port, bool activate)
 		goto port_unlock;
 	}
 
-	reinit_completion(&port->pps_complete);
-	port->pps_status = 0;
-	port->pps_pending = true;
+	reinit_completion(&port->aug_supply_req_complete);
+	port->aug_supply_req_status = 0;
+	port->aug_supply_req_pending = true;
 
 	/* Trigger PPS request or move back to standard PDO contract */
 	if (activate) {
@@ -7099,11 +7396,75 @@ static int tcpm_pps_activate(struct tcpm_port *port, bool activate)
 	}
 	mutex_unlock(&port->lock);
 
-	if (!wait_for_completion_timeout(&port->pps_complete,
-				msecs_to_jiffies(PD_PPS_CTRL_TIMEOUT)))
+	if (!wait_for_completion_timeout(&port->aug_supply_req_complete,
+					 msecs_to_jiffies(PD_AUG_PSY_CTRL_TIMEOUT)))
+		ret = -ETIMEDOUT;
+	else
+		ret = port->aug_supply_req_status;
+
+	goto swap_unlock;
+
+port_unlock:
+	mutex_unlock(&port->lock);
+swap_unlock:
+	mutex_unlock(&port->swap_lock);
+
+	return ret;
+}
+
+static int tcpm_spr_avs_activate(struct tcpm_port *port, bool activate)
+{
+	int ret = 0;
+
+	mutex_lock(&port->swap_lock);
+	mutex_lock(&port->lock);
+
+	if (port->spr_avs_data.port_snk_status == SPR_AVS_NOT_SUPPORTED ||
+	    port->spr_avs_data.port_partner_src_status == SPR_AVS_NOT_SUPPORTED) {
+		tcpm_log(port, "SPR_AVS not supported");
+		ret = -EOPNOTSUPP;
+		goto port_unlock;
+	}
+
+	/* Trying to deactivate SPR AVS when already deactivated so just bail */
+	if (!port->spr_avs_data.active && !activate)
+		goto port_unlock;
+
+	if (port->state != SNK_READY) {
+		tcpm_log(port,
+			 "SPR_AVS cannot be activated. Port not in SNK_READY");
+		ret = -EAGAIN;
+		goto port_unlock;
+	}
+
+	if (activate)
+		port->upcoming_state = SNK_NEGOTIATE_SPR_AVS_CAPABILITIES;
+	else
+		port->upcoming_state = SNK_NEGOTIATE_CAPABILITIES;
+	ret = tcpm_ams_start(port, POWER_NEGOTIATION);
+	if (ret == -EAGAIN) {
+		tcpm_log(port, "SPR_AVS cannot be %s. AMS start failed",
+			 activate ? "activated" : "deactivated");
+		port->upcoming_state = INVALID_STATE;
+		goto port_unlock;
+	}
+
+	reinit_completion(&port->aug_supply_req_complete);
+	port->aug_supply_req_status = 0;
+	port->aug_supply_req_pending = true;
+
+	/* Trigger AVS request or move back to standard PDO contract */
+	if (activate) {
+		port->spr_avs_data.req_out_volt_mv = port->supply_voltage;
+		port->spr_avs_data.req_op_curr_ma = port->current_limit;
+	}
+	mutex_unlock(&port->lock);
+
+	if (!wait_for_completion_timeout(&port->aug_supply_req_complete,
+					 msecs_to_jiffies(PD_AUG_PSY_CTRL_TIMEOUT)))
 		ret = -ETIMEDOUT;
 	else
-		ret = port->pps_status;
+		ret = port->aug_supply_req_status;
 
 	goto swap_unlock;
 
@@ -7259,16 +7620,26 @@ static int tcpm_pd_set(struct typec_port *p, struct usb_power_delivery *pd)
 		break;
 	case SNK_NEGOTIATE_CAPABILITIES:
 	case SNK_NEGOTIATE_PPS_CAPABILITIES:
+	case SNK_NEGOTIATE_SPR_AVS_CAPABILITIES:
 	case SNK_READY:
 	case SNK_TRANSITION_SINK:
 	case SNK_TRANSITION_SINK_VBUS:
-		if (port->pps_data.active)
+		if (port->pps_data.active) {
 			port->upcoming_state = SNK_NEGOTIATE_PPS_CAPABILITIES;
-		else if (port->pd_capable)
+		} else if (port->pd_capable) {
 			port->upcoming_state = SNK_NEGOTIATE_CAPABILITIES;
-		else
+			if (port->spr_avs_data.active) {
+				/*
+				 * De-activate AVS and fallback to PD to
+				 * re-evaluate whether AVS is supported in the
+				 * current sink cap set.
+				 */
+				port->spr_avs_data.active = false;
+				port->spr_avs_data.port_snk_status = SPR_AVS_UNKNOWN;
+			}
+		} else {
 			break;
-
+		}
 		port->update_sink_caps = true;
 
 		ret = tcpm_ams_start(port, POWER_NEGOTIATION);
@@ -7778,7 +8149,8 @@ static void tcpm_fw_get_pd_revision(struct tcpm_port *port, struct fwnode_handle
 enum tcpm_psy_online_states {
 	TCPM_PSY_OFFLINE = 0,
 	TCPM_PSY_FIXED_ONLINE,
-	TCPM_PSY_PROG_ONLINE,
+	TCPM_PSY_PPS_ONLINE,
+	TCPM_PSY_SPR_AVS_ONLINE,
 };
 
 static enum power_supply_property tcpm_psy_props[] = {
@@ -7796,7 +8168,9 @@ static int tcpm_psy_get_online(struct tcpm_port *port,
 {
 	if (port->vbus_charge) {
 		if (port->pps_data.active)
-			val->intval = TCPM_PSY_PROG_ONLINE;
+			val->intval = TCPM_PSY_PPS_ONLINE;
+		else if (port->spr_avs_data.active)
+			val->intval = TCPM_PSY_SPR_AVS_ONLINE;
 		else
 			val->intval = TCPM_PSY_FIXED_ONLINE;
 	} else {
@@ -7811,6 +8185,8 @@ static int tcpm_psy_get_voltage_min(struct tcpm_port *port,
 {
 	if (port->pps_data.active)
 		val->intval = port->pps_data.min_volt * 1000;
+	else if (port->spr_avs_data.active)
+		val->intval = SPR_AVS_TIER1_MIN_VOLT_MV * 1000;
 	else
 		val->intval = port->supply_voltage * 1000;
 
@@ -7822,6 +8198,8 @@ static int tcpm_psy_get_voltage_max(struct tcpm_port *port,
 {
 	if (port->pps_data.active)
 		val->intval = port->pps_data.max_volt * 1000;
+	else if (port->spr_avs_data.active)
+		val->intval = port->spr_avs_data.max_out_volt_mv * 1000;
 	else
 		val->intval = port->supply_voltage * 1000;
 
@@ -7841,6 +8219,8 @@ static int tcpm_psy_get_current_max(struct tcpm_port *port,
 {
 	if (port->pps_data.active)
 		val->intval = port->pps_data.max_curr * 1000;
+	else if (port->spr_avs_data.active)
+		val->intval = port->spr_avs_data.max_current_ma * 1000;
 	else
 		val->intval = port->current_limit * 1000;
 
@@ -7916,17 +8296,41 @@ static int tcpm_psy_get_prop(struct power_supply *psy,
 	return ret;
 }
 
+static int tcpm_disable_pps_avs(struct tcpm_port *port)
+{
+	int ret = 0;
+
+	if (port->pps_data.active)
+		ret = tcpm_pps_activate(port, false);
+	else if (port->spr_avs_data.active)
+		ret = tcpm_spr_avs_activate(port, false);
+
+	return ret;
+}
+
 static int tcpm_psy_set_online(struct tcpm_port *port,
 			       const union power_supply_propval *val)
 {
-	int ret;
+	int ret = 0;
 
 	switch (val->intval) {
 	case TCPM_PSY_FIXED_ONLINE:
-		ret = tcpm_pps_activate(port, false);
+		ret = tcpm_disable_pps_avs(port);
+		break;
+	case TCPM_PSY_PPS_ONLINE:
+		if (port->spr_avs_data.active)
+			ret = tcpm_spr_avs_activate(port, false);
+		if (!ret)
+			ret = tcpm_pps_activate(port, true);
 		break;
-	case TCPM_PSY_PROG_ONLINE:
-		ret = tcpm_pps_activate(port, true);
+	case TCPM_PSY_SPR_AVS_ONLINE:
+		tcpm_log(port, "request to set AVS online");
+		if (port->spr_avs_data.active)
+			return 0;
+		ret = tcpm_disable_pps_avs(port);
+		if (ret)
+			break;
+		ret = tcpm_spr_avs_activate(port, true);
 		break;
 	default:
 		ret = -EINVAL;
@@ -7955,13 +8359,10 @@ static int tcpm_psy_set_prop(struct power_supply *psy,
 		ret = tcpm_psy_set_online(port, val);
 		break;
 	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
-		ret = tcpm_pps_set_out_volt(port, val->intval / 1000);
+		ret = tcpm_aug_set_out_volt(port, val->intval / 1000);
 		break;
 	case POWER_SUPPLY_PROP_CURRENT_NOW:
-		if (val->intval > port->pps_data.max_curr * 1000)
-			ret = -EINVAL;
-		else
-			ret = tcpm_pps_set_op_curr(port, val->intval / 1000);
+		ret = tcpm_aug_set_op_curr(port, val->intval / 1000);
 		break;
 	default:
 		ret = -EINVAL;
@@ -8006,7 +8407,9 @@ static int devm_tcpm_psy_register(struct tcpm_port *port)
 	port->psy_desc.type = POWER_SUPPLY_TYPE_USB;
 	port->psy_desc.usb_types = BIT(POWER_SUPPLY_USB_TYPE_C)  |
 				   BIT(POWER_SUPPLY_USB_TYPE_PD) |
-				   BIT(POWER_SUPPLY_USB_TYPE_PD_PPS);
+				   BIT(POWER_SUPPLY_USB_TYPE_PD_PPS) |
+				   BIT(POWER_SUPPLY_USB_TYPE_PD_PPS_SPR_AVS) |
+				   BIT(POWER_SUPPLY_USB_TYPE_PD_SPR_AVS);
 	port->psy_desc.properties = tcpm_psy_props;
 	port->psy_desc.num_properties = ARRAY_SIZE(tcpm_psy_props);
 	port->psy_desc.get_property = tcpm_psy_get_prop;
@@ -8101,7 +8504,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 
 	init_completion(&port->tx_complete);
 	init_completion(&port->swap_complete);
-	init_completion(&port->pps_complete);
+	init_completion(&port->aug_supply_req_complete);
 	tcpm_debugfs_init(port);
 
 	err = tcpm_fw_get_caps(port, tcpc->fwnode);
diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h
index 5a98983195cb..337a5485af7c 100644
--- a/include/linux/usb/pd.h
+++ b/include/linux/usb/pd.h
@@ -398,9 +398,30 @@ enum pd_apdo_type {
 #define PDO_SPR_AVS_APDO_15V_TO_20V_MAX_CURR	GENMASK(9, 0)	/* 10mA unit */
 
 /* SPR AVS has two different current ranges 9V - 15V, 15V - 20V */
-#define SPR_AVS_TIER1_MIN_VOLT_MV		9000
-#define SPR_AVS_TIER1_MAX_VOLT_MV		15000
-#define SPR_AVS_TIER2_MAX_VOLT_MV		20000
+#define SPR_AVS_TIER1_MIN_VOLT_MV              9000
+#define SPR_AVS_TIER1_MAX_VOLT_MV              15000
+#define SPR_AVS_TIER2_MAX_VOLT_MV              20000
+
+#define SPR_AVS_AVS_SMALL_STEP_V	1
+/* vAvsStep - 100mv */
+#define SPR_AVS_VOLT_MV_STEP		100
+/* SPR AVS RDO Operating Current is in 50mA step */
+#define RDO_SPR_AVS_CURR_MA_STEP	50
+/* SPR AVS RDO Output voltage is in 25mV step */
+#define RDO_SPR_AVS_OUT_VOLT_MV_STEP	25
+
+#define RDO_SPR_AVS_VOLT	GENMASK(20, 9)
+#define RDO_SPR_AVS_CURR	GENMASK(6, 0)
+
+#define RDO_SPR_AVS_OUT_VOLT(mv)					\
+	FIELD_PREP(RDO_SPR_AVS_VOLT, ((mv) / RDO_SPR_AVS_OUT_VOLT_MV_STEP))
+
+#define RDO_SPR_AVS_OP_CURR(ma)						\
+	FIELD_PREP(RDO_SPR_AVS_CURR, ((ma) / RDO_SPR_AVS_CURR_MA_STEP))
+
+#define RDO_AVS(idx, out_mv, op_ma, flags)				\
+	(RDO_OBJ(idx) | (flags) |					\
+	 RDO_SPR_AVS_OUT_VOLT(out_mv) | RDO_SPR_AVS_OP_CURR(op_ma))
 
 static inline enum pd_pdo_type pdo_type(u32 pdo)
 {
@@ -660,6 +681,11 @@ static inline unsigned int rdo_max_power(u32 rdo)
 
 #define PD_P_SNK_STDBY_MW	2500	/* 2500 mW */
 
+#define PD_I_SNK_STBY_MA		500	/* 500 mA */
+
+#define PD_T_AVS_SRC_TRANS_SMALL	50	/* 50 ms */
+#define PD_T_AVS_SRC_TRANS_LARGE	700	/* 700 ms */
+
 #if IS_ENABLED(CONFIG_TYPEC)
 
 struct usb_power_delivery;
diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h
index b22e659f81ba..93079450bba0 100644
--- a/include/linux/usb/tcpm.h
+++ b/include/linux/usb/tcpm.h
@@ -31,7 +31,7 @@ enum typec_cc_polarity {
 /* Time to wait for TCPC to complete transmit */
 #define PD_T_TCPC_TX_TIMEOUT	100		/* in ms	*/
 #define PD_ROLE_SWAP_TIMEOUT	(MSEC_PER_SEC * 10)
-#define PD_PPS_CTRL_TIMEOUT	(MSEC_PER_SEC * 10)
+#define PD_AUG_PSY_CTRL_TIMEOUT	(MSEC_PER_SEC * 10)
 
 enum tcpm_transmit_status {
 	TCPC_TX_SUCCESS = 0,
-- 
cgit v1.2.3


From 16c1e8385b3bb65d412d7a60107f8894587c63fa Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@nvidia.com>
Date: Sat, 14 Mar 2026 15:25:44 -0400
Subject: cpufreq: optimize policy_is_shared()

The switch to cpumask_nth() over cpumask_weight(), as it may return
earlier - as soon as the function counts the required number of CPUs.

Signed-off-by: Yury Norov <ynorov@nvidia.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Zhongqiu Han <zhongqiu.han@oss.qualcomm.com>
Link: https://patch.msgid.link/20260314192544.605914-1-ynorov@nvidia.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index cc894fc38971..8ca2bcb3d7ae 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -232,7 +232,7 @@ static inline bool policy_is_inactive(struct cpufreq_policy *policy)
 
 static inline bool policy_is_shared(struct cpufreq_policy *policy)
 {
-	return cpumask_weight(policy->cpus) > 1;
+	return cpumask_nth(1, policy->cpus) < nr_cpumask_bits;
 }
 
 #ifdef CONFIG_CPU_FREQ
-- 
cgit v1.2.3


From deffe1edba626d474fef38007c03646ca5876a0e Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Fri, 13 Mar 2026 14:48:02 +0100
Subject: module: Fix freeing of charp module parameters when CONFIG_SYSFS=n

When setting a charp module parameter, the param_set_charp() function
allocates memory to store a copy of the input value. Later, when the module
is potentially unloaded, the destroy_params() function is called to free
this allocated memory.

However, destroy_params() is available only when CONFIG_SYSFS=y, otherwise
only a dummy variant is present. In the unlikely case that the kernel is
configured with CONFIG_MODULES=y and CONFIG_SYSFS=n, this results in
a memory leak of charp values when a module is unloaded.

Fix this issue by making destroy_params() always available when
CONFIG_MODULES=y. Rename the function to module_destroy_params() to clarify
that it is intended for use by the module loader.

Fixes: e180a6b7759a ("param: fix charp parameters set via sysfs")
Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 include/linux/moduleparam.h | 11 +++--------
 kernel/module/main.c        |  4 ++--
 kernel/params.c             | 27 ++++++++++++++++++---------
 3 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 7d22d4c4ea2e..8667f72503d9 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -426,14 +426,9 @@ extern char *parse_args(const char *name,
 		      void *arg, parse_unknown_fn unknown);
 
 /* Called by module remove. */
-#ifdef CONFIG_SYSFS
-extern void destroy_params(const struct kernel_param *params, unsigned num);
-#else
-static inline void destroy_params(const struct kernel_param *params,
-				  unsigned num)
-{
-}
-#endif /* !CONFIG_SYSFS */
+#ifdef CONFIG_MODULES
+void module_destroy_params(const struct kernel_param *params, unsigned int num);
+#endif
 
 /* All the helper functions */
 /* The macros to do compile-time type checking stolen from Jakub
diff --git a/kernel/module/main.c b/kernel/module/main.c
index c3ce106c70af..ef2e2130972f 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1408,7 +1408,7 @@ static void free_module(struct module *mod)
 	module_unload_free(mod);
 
 	/* Free any allocated parameters. */
-	destroy_params(mod->kp, mod->num_kp);
+	module_destroy_params(mod->kp, mod->num_kp);
 
 	if (is_livepatch_module(mod))
 		free_module_elf(mod);
@@ -3519,7 +3519,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
 	mod_sysfs_teardown(mod);
  coming_cleanup:
 	mod->state = MODULE_STATE_GOING;
-	destroy_params(mod->kp, mod->num_kp);
+	module_destroy_params(mod->kp, mod->num_kp);
 	blocking_notifier_call_chain(&module_notify_list,
 				     MODULE_STATE_GOING, mod);
 	klp_module_going(mod);
diff --git a/kernel/params.c b/kernel/params.c
index 7188a12dbe86..c6a354d54213 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -745,15 +745,6 @@ void module_param_sysfs_remove(struct module *mod)
 }
 #endif
 
-void destroy_params(const struct kernel_param *params, unsigned num)
-{
-	unsigned int i;
-
-	for (i = 0; i < num; i++)
-		if (params[i].ops->free)
-			params[i].ops->free(params[i].arg);
-}
-
 struct module_kobject * __init_or_module
 lookup_or_create_module_kobject(const char *name)
 {
@@ -985,3 +976,21 @@ static int __init param_sysfs_builtin_init(void)
 late_initcall(param_sysfs_builtin_init);
 
 #endif /* CONFIG_SYSFS */
+
+#ifdef CONFIG_MODULES
+
+/*
+ * module_destroy_params - free all parameters for one module
+ * @params: module parameters (array)
+ * @num: number of module parameters
+ */
+void module_destroy_params(const struct kernel_param *params, unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++)
+		if (params[i].ops->free)
+			params[i].ops->free(params[i].arg);
+}
+
+#endif /* CONFIG_MODULES */
-- 
cgit v1.2.3


From 65f535501e2a3378629b8650eca553920de5e5a2 Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Fri, 13 Mar 2026 14:48:03 +0100
Subject: module: Clean up parse_args() arguments

* Use the preferred `unsigned int` over plain `unsigned` for the `num`
  parameter.
* Synchronize the parameter names in moduleparam.h with the ones used by
  the implementation in params.c.

Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 include/linux/moduleparam.h | 8 ++++----
 kernel/params.c             | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 8667f72503d9..604bc6e9f3a1 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -417,12 +417,12 @@ extern bool parameqn(const char *name1, const char *name2, size_t n);
 typedef int (*parse_unknown_fn)(char *param, char *val, const char *doing, void *arg);
 
 /* Called on module insert or kernel boot */
-extern char *parse_args(const char *name,
+extern char *parse_args(const char *doing,
 		      char *args,
 		      const struct kernel_param *params,
-		      unsigned num,
-		      s16 level_min,
-		      s16 level_max,
+		      unsigned int num,
+		      s16 min_level,
+		      s16 max_level,
 		      void *arg, parse_unknown_fn unknown);
 
 /* Called by module remove. */
diff --git a/kernel/params.c b/kernel/params.c
index c6a354d54213..74d620bc2521 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -161,7 +161,7 @@ static int parse_one(char *param,
 char *parse_args(const char *doing,
 		 char *args,
 		 const struct kernel_param *params,
-		 unsigned num,
+		 unsigned int num,
 		 s16 min_level,
 		 s16 max_level,
 		 void *arg, parse_unknown_fn unknown)
-- 
cgit v1.2.3


From 44a063c00fb13cf1f2e8a53a2ab10b232a44954b Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Fri, 13 Mar 2026 14:48:04 +0100
Subject: module: Remove extern keyword from param prototypes

The external function declarations do not need the "extern" keyword. Remove
it to align with the Linux kernel coding style and to silence the
associated checkpatch warnings.

Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 include/linux/moduleparam.h | 89 ++++++++++++++++++++++-----------------------
 1 file changed, 44 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 604bc6e9f3a1..075f28585074 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -317,8 +317,8 @@ struct kparam_array
 			    name, &__param_ops_##name, arg, perm, -1, 0)
 
 #ifdef CONFIG_SYSFS
-extern void kernel_param_lock(struct module *mod);
-extern void kernel_param_unlock(struct module *mod);
+void kernel_param_lock(struct module *mod);
+void kernel_param_unlock(struct module *mod);
 #else
 static inline void kernel_param_lock(struct module *mod)
 {
@@ -398,7 +398,7 @@ static inline void kernel_param_unlock(struct module *mod)
  * Returns: true if the two parameter names are equal.
  * Dashes (-) are considered equal to underscores (_).
  */
-extern bool parameq(const char *name1, const char *name2);
+bool parameq(const char *name1, const char *name2);
 
 /**
  * parameqn - checks if two parameter names match
@@ -412,18 +412,18 @@ extern bool parameq(const char *name1, const char *name2);
  * are equal.
  * Dashes (-) are considered equal to underscores (_).
  */
-extern bool parameqn(const char *name1, const char *name2, size_t n);
+bool parameqn(const char *name1, const char *name2, size_t n);
 
 typedef int (*parse_unknown_fn)(char *param, char *val, const char *doing, void *arg);
 
 /* Called on module insert or kernel boot */
-extern char *parse_args(const char *doing,
-		      char *args,
-		      const struct kernel_param *params,
-		      unsigned int num,
-		      s16 min_level,
-		      s16 max_level,
-		      void *arg, parse_unknown_fn unknown);
+char *parse_args(const char *doing,
+		 char *args,
+		 const struct kernel_param *params,
+		 unsigned int num,
+		 s16 min_level,
+		 s16 max_level,
+		 void *arg, parse_unknown_fn unknown);
 
 /* Called by module remove. */
 #ifdef CONFIG_MODULES
@@ -437,78 +437,77 @@ void module_destroy_params(const struct kernel_param *params, unsigned int num);
 	static inline type __always_unused *__check_##name(void) { return(p); }
 
 extern const struct kernel_param_ops param_ops_byte;
-extern int param_set_byte(const char *val, const struct kernel_param *kp);
-extern int param_get_byte(char *buffer, const struct kernel_param *kp);
+int param_set_byte(const char *val, const struct kernel_param *kp);
+int param_get_byte(char *buffer, const struct kernel_param *kp);
 #define param_check_byte(name, p) __param_check(name, p, unsigned char)
 
 extern const struct kernel_param_ops param_ops_short;
-extern int param_set_short(const char *val, const struct kernel_param *kp);
-extern int param_get_short(char *buffer, const struct kernel_param *kp);
+int param_set_short(const char *val, const struct kernel_param *kp);
+int param_get_short(char *buffer, const struct kernel_param *kp);
 #define param_check_short(name, p) __param_check(name, p, short)
 
 extern const struct kernel_param_ops param_ops_ushort;
-extern int param_set_ushort(const char *val, const struct kernel_param *kp);
-extern int param_get_ushort(char *buffer, const struct kernel_param *kp);
+int param_set_ushort(const char *val, const struct kernel_param *kp);
+int param_get_ushort(char *buffer, const struct kernel_param *kp);
 #define param_check_ushort(name, p) __param_check(name, p, unsigned short)
 
 extern const struct kernel_param_ops param_ops_int;
-extern int param_set_int(const char *val, const struct kernel_param *kp);
-extern int param_get_int(char *buffer, const struct kernel_param *kp);
+int param_set_int(const char *val, const struct kernel_param *kp);
+int param_get_int(char *buffer, const struct kernel_param *kp);
 #define param_check_int(name, p) __param_check(name, p, int)
 
 extern const struct kernel_param_ops param_ops_uint;
-extern int param_set_uint(const char *val, const struct kernel_param *kp);
-extern int param_get_uint(char *buffer, const struct kernel_param *kp);
+int param_set_uint(const char *val, const struct kernel_param *kp);
+int param_get_uint(char *buffer, const struct kernel_param *kp);
 int param_set_uint_minmax(const char *val, const struct kernel_param *kp,
 		unsigned int min, unsigned int max);
 #define param_check_uint(name, p) __param_check(name, p, unsigned int)
 
 extern const struct kernel_param_ops param_ops_long;
-extern int param_set_long(const char *val, const struct kernel_param *kp);
-extern int param_get_long(char *buffer, const struct kernel_param *kp);
+int param_set_long(const char *val, const struct kernel_param *kp);
+int param_get_long(char *buffer, const struct kernel_param *kp);
 #define param_check_long(name, p) __param_check(name, p, long)
 
 extern const struct kernel_param_ops param_ops_ulong;
-extern int param_set_ulong(const char *val, const struct kernel_param *kp);
-extern int param_get_ulong(char *buffer, const struct kernel_param *kp);
+int param_set_ulong(const char *val, const struct kernel_param *kp);
+int param_get_ulong(char *buffer, const struct kernel_param *kp);
 #define param_check_ulong(name, p) __param_check(name, p, unsigned long)
 
 extern const struct kernel_param_ops param_ops_ullong;
-extern int param_set_ullong(const char *val, const struct kernel_param *kp);
-extern int param_get_ullong(char *buffer, const struct kernel_param *kp);
+int param_set_ullong(const char *val, const struct kernel_param *kp);
+int param_get_ullong(char *buffer, const struct kernel_param *kp);
 #define param_check_ullong(name, p) __param_check(name, p, unsigned long long)
 
 extern const struct kernel_param_ops param_ops_hexint;
-extern int param_set_hexint(const char *val, const struct kernel_param *kp);
-extern int param_get_hexint(char *buffer, const struct kernel_param *kp);
+int param_set_hexint(const char *val, const struct kernel_param *kp);
+int param_get_hexint(char *buffer, const struct kernel_param *kp);
 #define param_check_hexint(name, p) param_check_uint(name, p)
 
 extern const struct kernel_param_ops param_ops_charp;
-extern int param_set_charp(const char *val, const struct kernel_param *kp);
-extern int param_get_charp(char *buffer, const struct kernel_param *kp);
-extern void param_free_charp(void *arg);
+int param_set_charp(const char *val, const struct kernel_param *kp);
+int param_get_charp(char *buffer, const struct kernel_param *kp);
+void param_free_charp(void *arg);
 #define param_check_charp(name, p) __param_check(name, p, char *)
 
 /* We used to allow int as well as bool.  We're taking that away! */
 extern const struct kernel_param_ops param_ops_bool;
-extern int param_set_bool(const char *val, const struct kernel_param *kp);
-extern int param_get_bool(char *buffer, const struct kernel_param *kp);
+int param_set_bool(const char *val, const struct kernel_param *kp);
+int param_get_bool(char *buffer, const struct kernel_param *kp);
 #define param_check_bool(name, p) __param_check(name, p, bool)
 
 extern const struct kernel_param_ops param_ops_bool_enable_only;
-extern int param_set_bool_enable_only(const char *val,
-				      const struct kernel_param *kp);
+int param_set_bool_enable_only(const char *val, const struct kernel_param *kp);
 /* getter is the same as for the regular bool */
 #define param_check_bool_enable_only param_check_bool
 
 extern const struct kernel_param_ops param_ops_invbool;
-extern int param_set_invbool(const char *val, const struct kernel_param *kp);
-extern int param_get_invbool(char *buffer, const struct kernel_param *kp);
+int param_set_invbool(const char *val, const struct kernel_param *kp);
+int param_get_invbool(char *buffer, const struct kernel_param *kp);
 #define param_check_invbool(name, p) __param_check(name, p, bool)
 
 /* An int, which can only be set like a bool (though it shows as an int). */
 extern const struct kernel_param_ops param_ops_bint;
-extern int param_set_bint(const char *val, const struct kernel_param *kp);
+int param_set_bint(const char *val, const struct kernel_param *kp);
 #define param_get_bint param_get_int
 #define param_check_bint param_check_int
 
@@ -615,19 +614,19 @@ enum hwparam_type {
 extern const struct kernel_param_ops param_array_ops;
 
 extern const struct kernel_param_ops param_ops_string;
-extern int param_set_copystring(const char *val, const struct kernel_param *);
-extern int param_get_string(char *buffer, const struct kernel_param *kp);
+int param_set_copystring(const char *val, const struct kernel_param *kp);
+int param_get_string(char *buffer, const struct kernel_param *kp);
 
 /* for exporting parameters in /sys/module/.../parameters */
 
 struct module;
 
 #if defined(CONFIG_SYSFS) && defined(CONFIG_MODULES)
-extern int module_param_sysfs_setup(struct module *mod,
-				    const struct kernel_param *kparam,
-				    unsigned int num_params);
+int module_param_sysfs_setup(struct module *mod,
+			     const struct kernel_param *kparam,
+			     unsigned int num_params);
 
-extern void module_param_sysfs_remove(struct module *mod);
+void module_param_sysfs_remove(struct module *mod);
 #else
 static inline int module_param_sysfs_setup(struct module *mod,
 			     const struct kernel_param *kparam,
-- 
cgit v1.2.3


From 306c36a76da2d6d2b5e91db925d41a9a8d77dbfd Mon Sep 17 00:00:00 2001
From: Josh Law <objecting@objecting.org>
Date: Wed, 18 Mar 2026 15:59:12 +0000
Subject: bootconfig: constify xbc_calc_checksum() data parameter

xbc_calc_checksum() only reads the data buffer, so mark the parameter
as const void * and the internal pointer as const unsigned char *.

Link: https://lore.kernel.org/all/20260318155919.78168-7-objecting@objecting.org/

Signed-off-by: Josh Law <objecting@objecting.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 include/linux/bootconfig.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 25df9260d206..23a96c5edcf3 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -36,9 +36,9 @@ bool __init cmdline_has_extra_options(void);
  * The checksum will be used with the BOOTCONFIG_MAGIC and the size for
  * embedding the bootconfig in the initrd image.
  */
-static inline __init uint32_t xbc_calc_checksum(void *data, uint32_t size)
+static inline __init uint32_t xbc_calc_checksum(const void *data, uint32_t size)
 {
-	unsigned char *p = data;
+	const unsigned char *p = data;
 	uint32_t ret = 0;
 
 	while (size--)
-- 
cgit v1.2.3


From 6eb255d019b810614c5cbd99b9ef281b7b9361e3 Mon Sep 17 00:00:00 2001
From: Josh Law <objecting@objecting.org>
Date: Wed, 18 Mar 2026 15:59:19 +0000
Subject: lib/bootconfig: change xbc_node_index() return type to uint16_t

  lib/bootconfig.c:136:21: warning: conversion from 'long int' to
  'int' may change value [-Wconversion]
  lib/bootconfig.c:308:33: warning: conversion from 'int' to 'uint16_t'
  may change value [-Wconversion]
  lib/bootconfig.c:467:37: warning: conversion from 'int' to 'uint16_t'
  may change value [-Wconversion]
  lib/bootconfig.c:469:40: warning: conversion from 'int' to 'uint16_t'
  may change value [-Wconversion]
  lib/bootconfig.c:472:54: warning: conversion from 'int' to 'uint16_t'
  may change value [-Wconversion]
  lib/bootconfig.c:476:45: warning: conversion from 'int' to 'uint16_t'
  may change value [-Wconversion]

xbc_node_index() returns the position of a node in the xbc_nodes array,
which has at most XBC_NODE_MAX (8192) entries, well within uint16_t
range.  Every caller stores the result in a uint16_t field (node->parent,
node->child, node->next, or the keys[] array in compose_key_after), so
the int return type causes narrowing warnings at all six call sites.

Change the return type to uint16_t and add an explicit cast on the
pointer subtraction to match the storage width and eliminate the
warnings.

Link: https://lore.kernel.org/all/20260318155919.78168-14-objecting@objecting.org/

Signed-off-by: Josh Law <objecting@objecting.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 include/linux/bootconfig.h | 2 +-
 lib/bootconfig.c           | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 23a96c5edcf3..692a5acc2ffc 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -66,7 +66,7 @@ struct xbc_node {
 
 /* Node tree access raw APIs */
 struct xbc_node * __init xbc_root_node(void);
-int __init xbc_node_index(struct xbc_node *node);
+uint16_t __init xbc_node_index(struct xbc_node *node);
 struct xbc_node * __init xbc_node_get_parent(struct xbc_node *node);
 struct xbc_node * __init xbc_node_get_child(struct xbc_node *node);
 struct xbc_node * __init xbc_node_get_next(struct xbc_node *node);
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 343aa9629464..01966ab9a8b5 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -131,9 +131,9 @@ struct xbc_node * __init xbc_root_node(void)
  *
  * Return the index number of @node in XBC node list.
  */
-int __init xbc_node_index(struct xbc_node *node)
+uint16_t __init xbc_node_index(struct xbc_node *node)
 {
-	return node - &xbc_nodes[0];
+	return (uint16_t)(node - &xbc_nodes[0]);
 }
 
 /**
-- 
cgit v1.2.3


From 9c6b4009da5991db6bf02a47a578885a04a5e3f6 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Date: Mon, 16 Mar 2026 11:04:03 +0100
Subject: net: mdio-gpio: remove linux/mdio-gpio.h

The three defines from the linux/mdio-gpio.h header are only used in the
mdio-gpio module. There's no reason to have them in a public header.
Move them into the driver and remove mdio-gpio.h.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Link: https://patch.msgid.link/20260316-gpio-mdio-hdr-cleanup-v1-1-2df696f74728@oss.qualcomm.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/mdio/mdio-gpio.c | 5 ++++-
 include/linux/mdio-gpio.h    | 9 ---------
 2 files changed, 4 insertions(+), 10 deletions(-)
 delete mode 100644 include/linux/mdio-gpio.h

(limited to 'include/linux')

diff --git a/drivers/net/mdio/mdio-gpio.c b/drivers/net/mdio/mdio-gpio.c
index 1cfd538b5105..c99310889896 100644
--- a/drivers/net/mdio/mdio-gpio.c
+++ b/drivers/net/mdio/mdio-gpio.c
@@ -20,13 +20,16 @@
 #include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
 #include <linux/mdio-bitbang.h>
-#include <linux/mdio-gpio.h>
 #include <linux/module.h>
 #include <linux/of_mdio.h>
 #include <linux/platform_data/mdio-gpio.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
+#define MDIO_GPIO_MDC	0
+#define MDIO_GPIO_MDIO	1
+#define MDIO_GPIO_MDO	2
+
 struct mdio_gpio_info {
 	struct mdiobb_ctrl ctrl;
 	struct gpio_desc *mdc, *mdio, *mdo;
diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h
deleted file mode 100644
index cea443a672cb..000000000000
--- a/include/linux/mdio-gpio.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LINUX_MDIO_GPIO_H
-#define __LINUX_MDIO_GPIO_H
-
-#define MDIO_GPIO_MDC	0
-#define MDIO_GPIO_MDIO	1
-#define MDIO_GPIO_MDO	2
-
-#endif
-- 
cgit v1.2.3


From 356d4fbcf3defaff0f98d2b6b54f3b26f0ff189d Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Date: Mon, 16 Mar 2026 11:04:04 +0100
Subject: net: mdio-gpio: remove linux/platform_data/mdio-gpio.h

Nobody defines struct mdio_gpio_platform_data. Remove platform data
support from mdio-gpio and drop the header.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Link: https://patch.msgid.link/20260316-gpio-mdio-hdr-cleanup-v1-2-2df696f74728@oss.qualcomm.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS                             |  1 -
 drivers/net/mdio/mdio-gpio.c            |  7 -------
 include/linux/platform_data/mdio-gpio.h | 14 --------------
 3 files changed, 22 deletions(-)
 delete mode 100644 include/linux/platform_data/mdio-gpio.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 5d477fd592db..7d65f9435950 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9547,7 +9547,6 @@ F:	include/linux/phy_fixed.h
 F:	include/linux/phy_link_topology.h
 F:	include/linux/phylib_stubs.h
 F:	include/linux/platform_data/mdio-bcm-unimac.h
-F:	include/linux/platform_data/mdio-gpio.h
 F:	include/net/phy/
 F:	include/trace/events/mdio.h
 F:	include/uapi/linux/mdio.h
diff --git a/drivers/net/mdio/mdio-gpio.c b/drivers/net/mdio/mdio-gpio.c
index c99310889896..958d1c6608ab 100644
--- a/drivers/net/mdio/mdio-gpio.c
+++ b/drivers/net/mdio/mdio-gpio.c
@@ -22,7 +22,6 @@
 #include <linux/mdio-bitbang.h>
 #include <linux/module.h>
 #include <linux/of_mdio.h>
-#include <linux/platform_data/mdio-gpio.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
@@ -113,7 +112,6 @@ static struct mii_bus *mdio_gpio_bus_init(struct device *dev,
 					  struct mdio_gpio_info *bitbang,
 					  int bus_id)
 {
-	struct mdio_gpio_platform_data *pdata = dev_get_platdata(dev);
 	struct mii_bus *new_bus;
 
 	bitbang->ctrl.ops = &mdio_gpio_ops;
@@ -130,11 +128,6 @@ static struct mii_bus *mdio_gpio_bus_init(struct device *dev,
 	else
 		strscpy(new_bus->id, "gpio", sizeof(new_bus->id));
 
-	if (pdata) {
-		new_bus->phy_mask = pdata->phy_mask;
-		new_bus->phy_ignore_ta_mask = pdata->phy_ignore_ta_mask;
-	}
-
 	if (device_is_compatible(dev, "microchip,mdio-smi0")) {
 		bitbang->ctrl.op_c22_read = 0;
 		bitbang->ctrl.op_c22_write = 0;
diff --git a/include/linux/platform_data/mdio-gpio.h b/include/linux/platform_data/mdio-gpio.h
deleted file mode 100644
index 13874fa6e767..000000000000
--- a/include/linux/platform_data/mdio-gpio.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * MDIO-GPIO bus platform data structure
- */
-
-#ifndef __LINUX_MDIO_GPIO_PDATA_H
-#define __LINUX_MDIO_GPIO_PDATA_H
-
-struct mdio_gpio_platform_data {
-	u32 phy_mask;
-	u32 phy_ignore_ta_mask;
-};
-
-#endif /* __LINUX_MDIO_GPIO_PDATA_H */
-- 
cgit v1.2.3


From dc3d720e12f602059490c1ab2bfee84a7465998f Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Tue, 17 Mar 2026 12:18:05 -0700
Subject: net: ethtool: add ethtool COALESCE_RX_CQE_FRAMES/NSECS

Add two parameters for drivers supporting Rx CQE coalescing /
descriptor writeback.

ETHTOOL_A_COALESCE_RX_CQE_FRAMES:
Maximum number of frames that can be coalesced into a CQE or
writeback.

ETHTOOL_A_COALESCE_RX_CQE_NSECS:
Max time in nanoseconds after the first packet arrival in a
coalesced CQE or writeback to be sent.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Link: https://patch.msgid.link/20260317191826.1346111-2-haiyangz@linux.microsoft.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/ethtool.yaml       |  8 ++++++++
 Documentation/networking/ethtool-netlink.rst   | 11 +++++++++++
 include/linux/ethtool.h                        |  6 +++++-
 include/uapi/linux/ethtool_netlink_generated.h |  2 ++
 net/ethtool/coalesce.c                         | 14 +++++++++++++-
 5 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index a05b5425b76a..5dd4d1b5d94b 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -857,6 +857,12 @@ attribute-sets:
         name: tx-profile
         type: nest
         nested-attributes: profile
+      -
+        name: rx-cqe-frames
+        type: u32
+      -
+        name: rx-cqe-nsecs
+        type: u32
 
   -
     name: pause-stat
@@ -2253,6 +2259,8 @@ operations:
             - tx-aggr-time-usecs
             - rx-profile
             - tx-profile
+            - rx-cqe-frames
+            - rx-cqe-nsecs
       dump: *coalesce-get-op
     -
       name: coalesce-set
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 32179168eb73..e92abf45faf5 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1076,6 +1076,8 @@ Kernel response contents:
   ``ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS``    u32     time (us), aggr, Tx
   ``ETHTOOL_A_COALESCE_RX_PROFILE``            nested  profile of DIM, Rx
   ``ETHTOOL_A_COALESCE_TX_PROFILE``            nested  profile of DIM, Tx
+  ``ETHTOOL_A_COALESCE_RX_CQE_FRAMES``         u32     max packets, Rx CQE
+  ``ETHTOOL_A_COALESCE_RX_CQE_NSECS``          u32     delay (ns), Rx CQE
   ===========================================  ======  =======================
 
 Attributes are only included in reply if their value is not zero or the
@@ -1109,6 +1111,13 @@ well with frequent small-sized URBs transmissions.
 to DIM parameters, see `Generic Network Dynamic Interrupt Moderation (Net DIM)
 <https://www.kernel.org/doc/Documentation/networking/net_dim.rst>`_.
 
+Rx CQE coalescing allows multiple received packets to be coalesced into a
+single Completion Queue Entry (CQE) or descriptor writeback.
+``ETHTOOL_A_COALESCE_RX_CQE_FRAMES`` describes the maximum number of
+frames that can be coalesced into a CQE or writeback.
+``ETHTOOL_A_COALESCE_RX_CQE_NSECS`` describes max time in nanoseconds after
+the first packet arrival in a coalesced CQE or writeback to be sent.
+
 COALESCE_SET
 ============
 
@@ -1147,6 +1156,8 @@ Request contents:
   ``ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS``    u32     time (us), aggr, Tx
   ``ETHTOOL_A_COALESCE_RX_PROFILE``            nested  profile of DIM, Rx
   ``ETHTOOL_A_COALESCE_TX_PROFILE``            nested  profile of DIM, Tx
+  ``ETHTOOL_A_COALESCE_RX_CQE_FRAMES``         u32     max packets, Rx CQE
+  ``ETHTOOL_A_COALESCE_RX_CQE_NSECS``          u32     delay (ns), Rx CQE
   ===========================================  ======  =======================
 
 Request is rejected if it attributes declared as unsupported by driver (i.e.
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 83c375840835..656d465bcd06 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -332,6 +332,8 @@ struct kernel_ethtool_coalesce {
 	u32 tx_aggr_max_bytes;
 	u32 tx_aggr_max_frames;
 	u32 tx_aggr_time_usecs;
+	u32 rx_cqe_frames;
+	u32 rx_cqe_nsecs;
 };
 
 /**
@@ -380,7 +382,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
 #define ETHTOOL_COALESCE_TX_AGGR_TIME_USECS	BIT(26)
 #define ETHTOOL_COALESCE_RX_PROFILE		BIT(27)
 #define ETHTOOL_COALESCE_TX_PROFILE		BIT(28)
-#define ETHTOOL_COALESCE_ALL_PARAMS		GENMASK(28, 0)
+#define ETHTOOL_COALESCE_RX_CQE_FRAMES		BIT(29)
+#define ETHTOOL_COALESCE_RX_CQE_NSECS		BIT(30)
+#define ETHTOOL_COALESCE_ALL_PARAMS		GENMASK(30, 0)
 
 #define ETHTOOL_COALESCE_USECS						\
 	(ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS)
diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h
index 114b83017297..8134baf7860f 100644
--- a/include/uapi/linux/ethtool_netlink_generated.h
+++ b/include/uapi/linux/ethtool_netlink_generated.h
@@ -371,6 +371,8 @@ enum {
 	ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS,
 	ETHTOOL_A_COALESCE_RX_PROFILE,
 	ETHTOOL_A_COALESCE_TX_PROFILE,
+	ETHTOOL_A_COALESCE_RX_CQE_FRAMES,
+	ETHTOOL_A_COALESCE_RX_CQE_NSECS,
 
 	__ETHTOOL_A_COALESCE_CNT,
 	ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1)
diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c
index 3e18ca1ccc5e..349bb02c517a 100644
--- a/net/ethtool/coalesce.c
+++ b/net/ethtool/coalesce.c
@@ -118,6 +118,8 @@ static int coalesce_reply_size(const struct ethnl_req_info *req_base,
 	       nla_total_size(sizeof(u32)) +	/* _TX_AGGR_MAX_BYTES */
 	       nla_total_size(sizeof(u32)) +	/* _TX_AGGR_MAX_FRAMES */
 	       nla_total_size(sizeof(u32)) +	/* _TX_AGGR_TIME_USECS */
+	       nla_total_size(sizeof(u32)) +	/* _RX_CQE_FRAMES */
+	       nla_total_size(sizeof(u32)) +	/* _RX_CQE_NSECS */
 	       total_modersz * 2;		/* _{R,T}X_PROFILE */
 }
 
@@ -269,7 +271,11 @@ static int coalesce_fill_reply(struct sk_buff *skb,
 	    coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES,
 			     kcoal->tx_aggr_max_frames, supported) ||
 	    coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS,
-			     kcoal->tx_aggr_time_usecs, supported))
+			     kcoal->tx_aggr_time_usecs, supported) ||
+	    coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_CQE_FRAMES,
+			     kcoal->rx_cqe_frames, supported) ||
+	    coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_CQE_NSECS,
+			     kcoal->rx_cqe_nsecs, supported))
 		return -EMSGSIZE;
 
 	if (!req_base->dev || !req_base->dev->irq_moder)
@@ -338,6 +344,8 @@ const struct nla_policy ethnl_coalesce_set_policy[] = {
 	[ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES] = { .type = NLA_U32 },
 	[ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES] = { .type = NLA_U32 },
 	[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS] = { .type = NLA_U32 },
+	[ETHTOOL_A_COALESCE_RX_CQE_FRAMES] = { .type = NLA_U32 },
+	[ETHTOOL_A_COALESCE_RX_CQE_NSECS] = { .type = NLA_U32 },
 	[ETHTOOL_A_COALESCE_RX_PROFILE] =
 		NLA_POLICY_NESTED(coalesce_profile_policy),
 	[ETHTOOL_A_COALESCE_TX_PROFILE] =
@@ -570,6 +578,10 @@ __ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info,
 			 tb[ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES], &mod);
 	ethnl_update_u32(&kernel_coalesce.tx_aggr_time_usecs,
 			 tb[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS], &mod);
+	ethnl_update_u32(&kernel_coalesce.rx_cqe_frames,
+			 tb[ETHTOOL_A_COALESCE_RX_CQE_FRAMES], &mod);
+	ethnl_update_u32(&kernel_coalesce.rx_cqe_nsecs,
+			 tb[ETHTOOL_A_COALESCE_RX_CQE_NSECS], &mod);
 
 	if (dev->irq_moder && dev->irq_moder->profile_flags & DIM_PROFILE_RX) {
 		ret = ethnl_update_profile(dev, &dev->irq_moder->rx_profile,
-- 
cgit v1.2.3


From a8eed0ba6a4b2f1803ecdfa9f11a4818cf87c474 Mon Sep 17 00:00:00 2001
From: Shardul Bankar <shardul.b@mpiricsoftware.com>
Date: Wed, 18 Mar 2026 13:08:22 +0530
Subject: hfsplus: refactor b-tree map page access and add node-type validation

In HFS+ b-trees, the node allocation bitmap is stored across multiple
records. The first chunk resides in the b-tree Header Node at record
index 2, while all subsequent chunks are stored in dedicated Map Nodes
at record index 0.

This structural quirk forces callers like hfs_bmap_alloc() and
hfs_bmap_free() to duplicate boilerplate code to validate offsets, correct
lengths, and map the underlying pages via kmap_local_page(). There is
also currently no strict node-type validation before reading these
records, leaving the allocator vulnerable if a corrupted image points a
map linkage to an Index or Leaf node.

Introduce a unified bit-level API to encapsulate the map record access:
1. A new `struct hfs_bmap_ctx` to cleanly pass state and safely handle
   page math across all architectures.
2. `hfs_bmap_get_map_page()`: Automatically validates node types
   (HFS_NODE_HEADER vs HFS_NODE_MAP), infers the correct record index,
   handles page-boundary math, and returns the unmapped `struct page *`
   directly to the caller to avoid asymmetric mappings.
3. `hfs_bmap_clear_bit()`: A clean wrapper that internally handles page
   mapping/unmapping for single-bit operations.

Refactor hfs_bmap_alloc() and hfs_bmap_free() to utilize this new API.
This deduplicates the allocator logic, hardens the map traversal against
fuzzed images, and provides the exact abstractions needed for upcoming
mount-time validation checks.

Signed-off-by: Shardul Bankar <shardul.b@mpiricsoftware.com>
Reviewed-by: Viacheslav Dubeyko <slava@dubeyko.com>
Tested-by: Viacheslav Dubeyko <slava@dubeyko.com>
Signed-off-by: Viacheslav Dubeyko <slava@dubeyko.com>
Link: https://lore.kernel.org/r/20260318073823.3933718-2-shardul.b@mpiricsoftware.com
Signed-off-by: Viacheslav Dubeyko <slava@dubeyko.com>
---
 fs/hfsplus/btree.c         | 169 ++++++++++++++++++++++++++++++++-------------
 include/linux/hfs_common.h |   2 +
 2 files changed, 124 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 1220a2f22737..64d168347b4b 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -129,6 +129,95 @@ u32 hfsplus_calc_btree_clump_size(u32 block_size, u32 node_size,
 	return clump_size;
 }
 
+/* Context for iterating b-tree map pages
+ * @page_idx: The index of the page within the b-node's page array
+ * @off: The byte offset within the mapped page
+ * @len: The remaining length of the map record
+ */
+struct hfs_bmap_ctx {
+	unsigned int page_idx;
+	unsigned int off;
+	u16 len;
+};
+
+/*
+ * Finds the specific page containing the requested byte offset within the map
+ * record. Automatically handles the difference between header and map nodes.
+ * Returns the struct page pointer, or an ERR_PTR on failure.
+ * Note: The caller is responsible for mapping/unmapping the returned page.
+ */
+static struct page *hfs_bmap_get_map_page(struct hfs_bnode *node, struct hfs_bmap_ctx *ctx,
+				u32 byte_offset)
+{
+	u16 rec_idx, off16;
+	unsigned int page_off;
+
+	if (node->this == HFSPLUS_TREE_HEAD) {
+		if (node->type != HFS_NODE_HEADER) {
+			pr_err("hfsplus: invalid btree header node\n");
+			return ERR_PTR(-EIO);
+		}
+		rec_idx = HFSPLUS_BTREE_HDR_MAP_REC_INDEX;
+	} else {
+		if (node->type != HFS_NODE_MAP) {
+			pr_err("hfsplus: invalid btree map node\n");
+			return ERR_PTR(-EIO);
+		}
+		rec_idx = HFSPLUS_BTREE_MAP_NODE_REC_INDEX;
+	}
+
+	ctx->len = hfs_brec_lenoff(node, rec_idx, &off16);
+	if (!ctx->len)
+		return ERR_PTR(-ENOENT);
+
+	if (!is_bnode_offset_valid(node, off16))
+		return ERR_PTR(-EIO);
+
+	ctx->len = check_and_correct_requested_length(node, off16, ctx->len);
+
+	if (byte_offset >= ctx->len)
+		return ERR_PTR(-EINVAL);
+
+	page_off = (u32)off16 + node->page_offset + byte_offset;
+	ctx->page_idx = page_off >> PAGE_SHIFT;
+	ctx->off = page_off & ~PAGE_MASK;
+
+	return node->page[ctx->page_idx];
+}
+
+/**
+ * hfs_bmap_clear_bit - clear a bit in the b-tree map
+ * @node: the b-tree node containing the map record
+ * @node_bit_idx: the relative bit index within the node's map record
+ *
+ * Returns 0 on success, -EINVAL if already clear, or negative error code.
+ */
+static int hfs_bmap_clear_bit(struct hfs_bnode *node, u32 node_bit_idx)
+{
+	struct hfs_bmap_ctx ctx;
+	struct page *page;
+	u8 *bmap, mask;
+
+	page = hfs_bmap_get_map_page(node, &ctx, node_bit_idx / BITS_PER_BYTE);
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+
+	bmap = kmap_local_page(page);
+
+	mask = 1 << (7 - (node_bit_idx % BITS_PER_BYTE));
+
+	if (!(bmap[ctx.off] & mask)) {
+		kunmap_local(bmap);
+		return -EINVAL;
+	}
+
+	bmap[ctx.off] &= ~mask;
+	set_page_dirty(page);
+	kunmap_local(bmap);
+
+	return 0;
+}
+
 /* Get a reference to a B*Tree and do some initial checks */
 struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
 {
@@ -374,11 +463,9 @@ int hfs_bmap_reserve(struct hfs_btree *tree, u32 rsvd_nodes)
 struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
 {
 	struct hfs_bnode *node, *next_node;
-	struct page **pagep;
+	struct hfs_bmap_ctx ctx;
+	struct page *page;
 	u32 nidx, idx;
-	unsigned off;
-	u16 off16;
-	u16 len;
 	u8 *data, byte, m;
 	int i, res;
 
@@ -390,30 +477,26 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
 	node = hfs_bnode_find(tree, nidx);
 	if (IS_ERR(node))
 		return node;
-	len = hfs_brec_lenoff(node, 2, &off16);
-	off = off16;
 
-	if (!is_bnode_offset_valid(node, off)) {
+	page = hfs_bmap_get_map_page(node, &ctx, 0);
+	if (IS_ERR(page)) {
+		res = PTR_ERR(page);
 		hfs_bnode_put(node);
-		return ERR_PTR(-EIO);
+		return ERR_PTR(res);
 	}
-	len = check_and_correct_requested_length(node, off, len);
 
-	off += node->page_offset;
-	pagep = node->page + (off >> PAGE_SHIFT);
-	data = kmap_local_page(*pagep);
-	off &= ~PAGE_MASK;
+	data = kmap_local_page(page);
 	idx = 0;
 
 	for (;;) {
-		while (len) {
-			byte = data[off];
+		while (ctx.len) {
+			byte = data[ctx.off];
 			if (byte != 0xff) {
 				for (m = 0x80, i = 0; i < 8; m >>= 1, i++) {
 					if (!(byte & m)) {
 						idx += i;
-						data[off] |= m;
-						set_page_dirty(*pagep);
+						data[ctx.off] |= m;
+						set_page_dirty(page);
 						kunmap_local(data);
 						tree->free_nodes--;
 						mark_inode_dirty(tree->inode);
@@ -423,13 +506,14 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
 					}
 				}
 			}
-			if (++off >= PAGE_SIZE) {
+			if (++ctx.off >= PAGE_SIZE) {
 				kunmap_local(data);
-				data = kmap_local_page(*++pagep);
-				off = 0;
+				page = node->page[++ctx.page_idx];
+				data = kmap_local_page(page);
+				ctx.off = 0;
 			}
 			idx += 8;
-			len--;
+			ctx.len--;
 		}
 		kunmap_local(data);
 		nidx = node->next;
@@ -443,22 +527,22 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
 			return next_node;
 		node = next_node;
 
-		len = hfs_brec_lenoff(node, 0, &off16);
-		off = off16;
-		off += node->page_offset;
-		pagep = node->page + (off >> PAGE_SHIFT);
-		data = kmap_local_page(*pagep);
-		off &= ~PAGE_MASK;
+		page = hfs_bmap_get_map_page(node, &ctx, 0);
+		if (IS_ERR(page)) {
+			res = PTR_ERR(page);
+			hfs_bnode_put(node);
+			return ERR_PTR(res);
+		}
+		data = kmap_local_page(page);
 	}
 }
 
 void hfs_bmap_free(struct hfs_bnode *node)
 {
 	struct hfs_btree *tree;
-	struct page *page;
 	u16 off, len;
 	u32 nidx;
-	u8 *data, byte, m;
+	int res;
 
 	hfs_dbg("node %u\n", node->this);
 	BUG_ON(!node->this);
@@ -495,24 +579,15 @@ void hfs_bmap_free(struct hfs_bnode *node)
 		}
 		len = hfs_brec_lenoff(node, 0, &off);
 	}
-	off += node->page_offset + nidx / 8;
-	page = node->page[off >> PAGE_SHIFT];
-	data = kmap_local_page(page);
-	off &= ~PAGE_MASK;
-	m = 1 << (~nidx & 7);
-	byte = data[off];
-	if (!(byte & m)) {
-		pr_crit("trying to free free bnode "
-				"%u(%d)\n",
-			node->this, node->type);
-		kunmap_local(data);
-		hfs_bnode_put(node);
-		return;
+
+	res = hfs_bmap_clear_bit(node, nidx);
+	if (res == -EINVAL) {
+		pr_crit("trying to free free bnode %u(%d)\n",
+				node->this, node->type);
+	} else if (!res) {
+		tree->free_nodes++;
+		mark_inode_dirty(tree->inode);
 	}
-	data[off] = byte & ~m;
-	set_page_dirty(page);
-	kunmap_local(data);
+
 	hfs_bnode_put(node);
-	tree->free_nodes++;
-	mark_inode_dirty(tree->inode);
 }
diff --git a/include/linux/hfs_common.h b/include/linux/hfs_common.h
index dadb5e0aa8a3..be24c687858e 100644
--- a/include/linux/hfs_common.h
+++ b/include/linux/hfs_common.h
@@ -510,6 +510,8 @@ struct hfs_btree_header_rec {
 #define HFSPLUS_NODE_MXSZ			32768
 #define HFSPLUS_ATTR_TREE_NODE_SIZE		8192
 #define HFSPLUS_BTREE_HDR_NODE_RECS_COUNT	3
+#define HFSPLUS_BTREE_HDR_MAP_REC_INDEX		2	/* Map (bitmap) record in Header node */
+#define HFSPLUS_BTREE_MAP_NODE_REC_INDEX	0	/* Map record in Map Node */
 #define HFSPLUS_BTREE_HDR_USER_BYTES		128
 
 /* btree key type */
-- 
cgit v1.2.3


From c888c4c834c9afc18879fde91ad405be21b7425d Mon Sep 17 00:00:00 2001
From: Svyatoslav Ryhel <clamor95@gmail.com>
Date: Tue, 3 Mar 2026 10:42:28 +0200
Subject: gpu: host1x: convert MIPI to use operation function pointers

Convert existing MIPI code to use operation function pointers, a necessary
step for supporting Tegra20/Tegra30 SoCs. All common MIPI configuration
that is SoC-independent remains in mipi.c, while all SoC-specific code is
moved to tegra114-mipi.c (The naming matches the first SoC generation with
a dedicated calibration block). Shared structures and function calls are
placed into tegra-mipi-cal.h.

Tested-by: Luca Ceresoli <luca.ceresoli@bootlin.com> # tegra20, parallel camera
Signed-off-by: Svyatoslav Ryhel <clamor95@gmail.com>
Acked-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Hans Verkuil <hverkuil+cisco@kernel.org>
---
 drivers/gpu/drm/tegra/dsi.c             |   1 +
 drivers/gpu/host1x/Makefile             |   1 +
 drivers/gpu/host1x/mipi.c               | 592 +++++++-------------------------
 drivers/gpu/host1x/tegra114-mipi.c      | 483 ++++++++++++++++++++++++++
 drivers/staging/media/tegra-video/csi.c |   1 +
 include/linux/host1x.h                  |  10 -
 include/linux/tegra-mipi-cal.h          |  57 +++
 7 files changed, 666 insertions(+), 479 deletions(-)
 create mode 100644 drivers/gpu/host1x/tegra114-mipi.c
 create mode 100644 include/linux/tegra-mipi-cal.h

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index 2c5aefe9621a..7f25c50621c9 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -14,6 +14,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
+#include <linux/tegra-mipi-cal.h>
 
 #include <video/mipi_display.h>
 
diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index ee5286ffe08d..fead483af0b4 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -9,6 +9,7 @@ host1x-y = \
 	job.o \
 	debug.o \
 	mipi.o \
+	tegra114-mipi.o \
 	fence.o \
 	hw/host1x01.o \
 	hw/host1x02.o \
diff --git a/drivers/gpu/host1x/mipi.c b/drivers/gpu/host1x/mipi.c
index fea9f491df66..01513b775d89 100644
--- a/drivers/gpu/host1x/mipi.c
+++ b/drivers/gpu/host1x/mipi.c
@@ -1,215 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2013 NVIDIA Corporation
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that copyright
- * notice and this permission notice appear in supporting documentation, and
- * that the name of the copyright holders not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  The copyright holders make no representations
- * about the suitability of this software for any purpose.  It is provided "as
- * is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
- * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THIS SOFTWARE.
+ * Copyright (C) 2025 Svyatoslav Ryhel <clamor95@gmail.com>
  */
 
 #include <linux/clk.h>
-#include <linux/host1x.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/tegra-mipi-cal.h>
 
-#include "dev.h"
-
-#define MIPI_CAL_CTRL			0x00
-#define MIPI_CAL_CTRL_NOISE_FILTER(x)	(((x) & 0xf) << 26)
-#define MIPI_CAL_CTRL_PRESCALE(x)	(((x) & 0x3) << 24)
-#define MIPI_CAL_CTRL_CLKEN_OVR		(1 << 4)
-#define MIPI_CAL_CTRL_START		(1 << 0)
-
-#define MIPI_CAL_AUTOCAL_CTRL		0x01
-
-#define MIPI_CAL_STATUS			0x02
-#define MIPI_CAL_STATUS_DONE		(1 << 16)
-#define MIPI_CAL_STATUS_ACTIVE		(1 <<  0)
-
-#define MIPI_CAL_CONFIG_CSIA		0x05
-#define MIPI_CAL_CONFIG_CSIB		0x06
-#define MIPI_CAL_CONFIG_CSIC		0x07
-#define MIPI_CAL_CONFIG_CSID		0x08
-#define MIPI_CAL_CONFIG_CSIE		0x09
-#define MIPI_CAL_CONFIG_CSIF		0x0a
-#define MIPI_CAL_CONFIG_DSIA		0x0e
-#define MIPI_CAL_CONFIG_DSIB		0x0f
-#define MIPI_CAL_CONFIG_DSIC		0x10
-#define MIPI_CAL_CONFIG_DSID		0x11
-
-#define MIPI_CAL_CONFIG_DSIA_CLK	0x19
-#define MIPI_CAL_CONFIG_DSIB_CLK	0x1a
-#define MIPI_CAL_CONFIG_CSIAB_CLK	0x1b
-#define MIPI_CAL_CONFIG_DSIC_CLK	0x1c
-#define MIPI_CAL_CONFIG_CSICD_CLK	0x1c
-#define MIPI_CAL_CONFIG_DSID_CLK	0x1d
-#define MIPI_CAL_CONFIG_CSIE_CLK	0x1d
-
-/* for data and clock lanes */
-#define MIPI_CAL_CONFIG_SELECT		(1 << 21)
-
-/* for data lanes */
-#define MIPI_CAL_CONFIG_HSPDOS(x)	(((x) & 0x1f) << 16)
-#define MIPI_CAL_CONFIG_HSPUOS(x)	(((x) & 0x1f) <<  8)
-#define MIPI_CAL_CONFIG_TERMOS(x)	(((x) & 0x1f) <<  0)
-
-/* for clock lanes */
-#define MIPI_CAL_CONFIG_HSCLKPDOSD(x)	(((x) & 0x1f) <<  8)
-#define MIPI_CAL_CONFIG_HSCLKPUOSD(x)	(((x) & 0x1f) <<  0)
-
-#define MIPI_CAL_BIAS_PAD_CFG0		0x16
-#define MIPI_CAL_BIAS_PAD_PDVCLAMP	(1 << 1)
-#define MIPI_CAL_BIAS_PAD_E_VCLAMP_REF	(1 << 0)
-
-#define MIPI_CAL_BIAS_PAD_CFG1		0x17
-#define MIPI_CAL_BIAS_PAD_DRV_DN_REF(x) (((x) & 0x7) << 16)
-#define MIPI_CAL_BIAS_PAD_DRV_UP_REF(x) (((x) & 0x7) << 8)
-
-#define MIPI_CAL_BIAS_PAD_CFG2		0x18
-#define MIPI_CAL_BIAS_PAD_VCLAMP(x)	(((x) & 0x7) << 16)
-#define MIPI_CAL_BIAS_PAD_VAUXP(x)	(((x) & 0x7) << 4)
-#define MIPI_CAL_BIAS_PAD_PDVREG	(1 << 1)
-
-struct tegra_mipi_pad {
-	unsigned long data;
-	unsigned long clk;
-};
-
-struct tegra_mipi_soc {
-	bool has_clk_lane;
-	const struct tegra_mipi_pad *pads;
-	unsigned int num_pads;
-
-	bool clock_enable_override;
-	bool needs_vclamp_ref;
-
-	/* bias pad configuration settings */
-	u8 pad_drive_down_ref;
-	u8 pad_drive_up_ref;
+/* only need to support one provider */
+static struct {
+	struct device_node *np;
+	const struct tegra_mipi_ops *ops;
+} provider;
 
-	u8 pad_vclamp_level;
-	u8 pad_vauxp_level;
-
-	/* calibration settings for data lanes */
-	u8 hspdos;
-	u8 hspuos;
-	u8 termos;
-
-	/* calibration settings for clock lanes */
-	u8 hsclkpdos;
-	u8 hsclkpuos;
-};
-
-struct tegra_mipi {
-	const struct tegra_mipi_soc *soc;
-	struct device *dev;
-	void __iomem *regs;
-	struct mutex lock;
-	struct clk *clk;
-
-	unsigned long usage_count;
-};
-
-struct tegra_mipi_device {
-	struct platform_device *pdev;
-	struct tegra_mipi *mipi;
-	struct device *device;
-	unsigned long pads;
-};
-
-static inline u32 tegra_mipi_readl(struct tegra_mipi *mipi,
-				   unsigned long offset)
+/**
+ * tegra_mipi_enable - Enable the Tegra MIPI calibration device.
+ * @device: Handle to the Tegra MIPI calibration device.
+ *
+ * This calls the enable sequence for the Tegra MIPI calibration device.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int tegra_mipi_enable(struct tegra_mipi_device *device)
 {
-	return readl(mipi->regs + (offset << 2));
-}
+	if (device->ops->enable)
+		return device->ops->enable(device);
 
-static inline void tegra_mipi_writel(struct tegra_mipi *mipi, u32 value,
-				     unsigned long offset)
-{
-	writel(value, mipi->regs + (offset << 2));
+	return 0;
 }
+EXPORT_SYMBOL(tegra_mipi_enable);
 
-static int tegra_mipi_power_up(struct tegra_mipi *mipi)
+/**
+ * tegra_mipi_disable - Disable the Tegra MIPI calibration device.
+ * @device: Handle to the Tegra MIPI calibration device.
+ *
+ * This calls the disable sequence for the Tegra MIPI calibration device.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int tegra_mipi_disable(struct tegra_mipi_device *device)
 {
-	u32 value;
-	int err;
-
-	err = clk_enable(mipi->clk);
-	if (err < 0)
-		return err;
-
-	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG0);
-	value &= ~MIPI_CAL_BIAS_PAD_PDVCLAMP;
-
-	if (mipi->soc->needs_vclamp_ref)
-		value |= MIPI_CAL_BIAS_PAD_E_VCLAMP_REF;
-
-	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG0);
-
-	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG2);
-	value &= ~MIPI_CAL_BIAS_PAD_PDVREG;
-	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG2);
-
-	clk_disable(mipi->clk);
+	if (device->ops->disable)
+		return device->ops->disable(device);
 
 	return 0;
 }
+EXPORT_SYMBOL(tegra_mipi_disable);
 
-static int tegra_mipi_power_down(struct tegra_mipi *mipi)
+/**
+ * tegra_mipi_start_calibration - Start the Tegra MIPI calibration sequence.
+ * @device: Handle to the Tegra MIPI calibration device.
+ *
+ * This initiates the calibration of CSI/DSI interfaces via the Tegra MIPI
+ * calibration device.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int tegra_mipi_start_calibration(struct tegra_mipi_device *device)
 {
-	u32 value;
-	int err;
-
-	err = clk_enable(mipi->clk);
-	if (err < 0)
-		return err;
-
-	/*
-	 * The MIPI_CAL_BIAS_PAD_PDVREG controls a voltage regulator that
-	 * supplies the DSI pads. This must be kept enabled until none of the
-	 * DSI lanes are used anymore.
-	 */
-	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG2);
-	value |= MIPI_CAL_BIAS_PAD_PDVREG;
-	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG2);
-
-	/*
-	 * MIPI_CAL_BIAS_PAD_PDVCLAMP and MIPI_CAL_BIAS_PAD_E_VCLAMP_REF
-	 * control a regulator that supplies current to the pre-driver logic.
-	 * Powering down this regulator causes DSI to fail, so it must remain
-	 * powered on until none of the DSI lanes are used anymore.
-	 */
-	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG0);
+	if (device->ops->start_calibration)
+		return device->ops->start_calibration(device);
 
-	if (mipi->soc->needs_vclamp_ref)
-		value &= ~MIPI_CAL_BIAS_PAD_E_VCLAMP_REF;
+	return 0;
+}
+EXPORT_SYMBOL(tegra_mipi_start_calibration);
 
-	value |= MIPI_CAL_BIAS_PAD_PDVCLAMP;
-	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG0);
+/**
+ * tegra_mipi_finish_calibration - Finish the Tegra MIPI calibration sequence.
+ * @device: Handle to the Tegra MIPI calibration device.
+ *
+ * This completes the calibration of CSI/DSI interfaces via the Tegra MIPI
+ * calibration device.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int tegra_mipi_finish_calibration(struct tegra_mipi_device *device)
+{
+	if (device->ops->finish_calibration)
+		return device->ops->finish_calibration(device);
 
 	return 0;
 }
+EXPORT_SYMBOL(tegra_mipi_finish_calibration);
 
+/**
+ * tegra_mipi_request - Request a Tegra MIPI calibration device.
+ * @device: Handle of the device requesting the MIPI calibration function.
+ * @np: Device node pointer of the device requesting the MIPI calibration
+ *	function.
+ *
+ * This function requests a reference to a Tegra MIPI calibration device.
+ *
+ * Returns a pointer to the Tegra MIPI calibration device on success,
+ * or an ERR_PTR-encoded error code on failure.
+ */
 struct tegra_mipi_device *tegra_mipi_request(struct device *device,
 					     struct device_node *np)
 {
-	struct tegra_mipi_device *dev;
+	struct tegra_mipi_device *mipidev;
 	struct of_phandle_args args;
 	int err;
 
@@ -219,321 +114,80 @@ struct tegra_mipi_device *tegra_mipi_request(struct device *device,
 	if (err < 0)
 		return ERR_PTR(err);
 
-	dev = kzalloc_obj(*dev);
-	if (!dev) {
+	if (provider.np != args.np)
+		return ERR_PTR(-ENODEV);
+
+	mipidev = kzalloc_obj(*mipidev);
+	if (!mipidev) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	dev->pdev = of_find_device_by_node(args.np);
-	if (!dev->pdev) {
+	mipidev->pdev = of_find_device_by_node(args.np);
+	if (!mipidev->pdev) {
 		err = -ENODEV;
 		goto free;
 	}
 
-	dev->mipi = platform_get_drvdata(dev->pdev);
-	if (!dev->mipi) {
-		err = -EPROBE_DEFER;
-		goto put;
-	}
-
 	of_node_put(args.np);
 
-	dev->pads = args.args[0];
-	dev->device = device;
+	mipidev->ops = provider.ops;
+	mipidev->pads = args.args[0];
 
-	return dev;
+	return mipidev;
 
-put:
-	platform_device_put(dev->pdev);
 free:
-	kfree(dev);
+	kfree(mipidev);
 out:
 	of_node_put(args.np);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(tegra_mipi_request);
 
-void tegra_mipi_free(struct tegra_mipi_device *device)
+/**
+ * tegra_mipi_free - Free a Tegra MIPI calibration device.
+ * @mipidev: Handle to the Tegra MIPI calibration device.
+ *
+ * This function releases a reference to a Tegra MIPI calibration device
+ * previously requested by tegra_mipi_request().
+ */
+void tegra_mipi_free(struct tegra_mipi_device *mipidev)
 {
-	platform_device_put(device->pdev);
-	kfree(device);
+	platform_device_put(mipidev->pdev);
+	kfree(mipidev);
 }
 EXPORT_SYMBOL(tegra_mipi_free);
 
-int tegra_mipi_enable(struct tegra_mipi_device *dev)
+static void tegra_mipi_remove_provider(void *data)
 {
-	int err = 0;
-
-	mutex_lock(&dev->mipi->lock);
-
-	if (dev->mipi->usage_count++ == 0)
-		err = tegra_mipi_power_up(dev->mipi);
-
-	mutex_unlock(&dev->mipi->lock);
-
-	return err;
-
-}
-EXPORT_SYMBOL(tegra_mipi_enable);
-
-int tegra_mipi_disable(struct tegra_mipi_device *dev)
-{
-	int err = 0;
-
-	mutex_lock(&dev->mipi->lock);
-
-	if (--dev->mipi->usage_count == 0)
-		err = tegra_mipi_power_down(dev->mipi);
-
-	mutex_unlock(&dev->mipi->lock);
-
-	return err;
-
-}
-EXPORT_SYMBOL(tegra_mipi_disable);
-
-int tegra_mipi_finish_calibration(struct tegra_mipi_device *device)
-{
-	struct tegra_mipi *mipi = device->mipi;
-	void __iomem *status_reg = mipi->regs + (MIPI_CAL_STATUS << 2);
-	u32 value;
-	int err;
-
-	err = readl_relaxed_poll_timeout(status_reg, value,
-					 !(value & MIPI_CAL_STATUS_ACTIVE) &&
-					 (value & MIPI_CAL_STATUS_DONE), 50,
-					 250000);
-	mutex_unlock(&device->mipi->lock);
-	clk_disable(device->mipi->clk);
-
-	return err;
+	provider.np = NULL;
+	provider.ops = NULL;
 }
-EXPORT_SYMBOL(tegra_mipi_finish_calibration);
-
-int tegra_mipi_start_calibration(struct tegra_mipi_device *device)
-{
-	const struct tegra_mipi_soc *soc = device->mipi->soc;
-	unsigned int i;
-	u32 value;
-	int err;
-
-	err = clk_enable(device->mipi->clk);
-	if (err < 0)
-		return err;
-
-	mutex_lock(&device->mipi->lock);
-
-	value = MIPI_CAL_BIAS_PAD_DRV_DN_REF(soc->pad_drive_down_ref) |
-		MIPI_CAL_BIAS_PAD_DRV_UP_REF(soc->pad_drive_up_ref);
-	tegra_mipi_writel(device->mipi, value, MIPI_CAL_BIAS_PAD_CFG1);
-
-	value = tegra_mipi_readl(device->mipi, MIPI_CAL_BIAS_PAD_CFG2);
-	value &= ~MIPI_CAL_BIAS_PAD_VCLAMP(0x7);
-	value &= ~MIPI_CAL_BIAS_PAD_VAUXP(0x7);
-	value |= MIPI_CAL_BIAS_PAD_VCLAMP(soc->pad_vclamp_level);
-	value |= MIPI_CAL_BIAS_PAD_VAUXP(soc->pad_vauxp_level);
-	tegra_mipi_writel(device->mipi, value, MIPI_CAL_BIAS_PAD_CFG2);
-
-	for (i = 0; i < soc->num_pads; i++) {
-		u32 clk = 0, data = 0;
-
-		if (device->pads & BIT(i)) {
-			data = MIPI_CAL_CONFIG_SELECT |
-			       MIPI_CAL_CONFIG_HSPDOS(soc->hspdos) |
-			       MIPI_CAL_CONFIG_HSPUOS(soc->hspuos) |
-			       MIPI_CAL_CONFIG_TERMOS(soc->termos);
-			clk = MIPI_CAL_CONFIG_SELECT |
-			      MIPI_CAL_CONFIG_HSCLKPDOSD(soc->hsclkpdos) |
-			      MIPI_CAL_CONFIG_HSCLKPUOSD(soc->hsclkpuos);
-		}
 
-		tegra_mipi_writel(device->mipi, data, soc->pads[i].data);
-
-		if (soc->has_clk_lane && soc->pads[i].clk != 0)
-			tegra_mipi_writel(device->mipi, clk, soc->pads[i].clk);
-	}
-
-	value = tegra_mipi_readl(device->mipi, MIPI_CAL_CTRL);
-	value &= ~MIPI_CAL_CTRL_NOISE_FILTER(0xf);
-	value &= ~MIPI_CAL_CTRL_PRESCALE(0x3);
-	value |= MIPI_CAL_CTRL_NOISE_FILTER(0xa);
-	value |= MIPI_CAL_CTRL_PRESCALE(0x2);
-
-	if (!soc->clock_enable_override)
-		value &= ~MIPI_CAL_CTRL_CLKEN_OVR;
-	else
-		value |= MIPI_CAL_CTRL_CLKEN_OVR;
-
-	tegra_mipi_writel(device->mipi, value, MIPI_CAL_CTRL);
-
-	/* clear any pending status bits */
-	value = tegra_mipi_readl(device->mipi, MIPI_CAL_STATUS);
-	tegra_mipi_writel(device->mipi, value, MIPI_CAL_STATUS);
-
-	value = tegra_mipi_readl(device->mipi, MIPI_CAL_CTRL);
-	value |= MIPI_CAL_CTRL_START;
-	tegra_mipi_writel(device->mipi, value, MIPI_CAL_CTRL);
-
-	/*
-	 * Wait for min 72uS to let calibration logic finish calibration
-	 * sequence codes before waiting for pads idle state to apply the
-	 * results.
-	 */
-	usleep_range(75, 80);
-
-	return 0;
-}
-EXPORT_SYMBOL(tegra_mipi_start_calibration);
-
-static const struct tegra_mipi_pad tegra114_mipi_pads[] = {
-	{ .data = MIPI_CAL_CONFIG_CSIA },
-	{ .data = MIPI_CAL_CONFIG_CSIB },
-	{ .data = MIPI_CAL_CONFIG_CSIC },
-	{ .data = MIPI_CAL_CONFIG_CSID },
-	{ .data = MIPI_CAL_CONFIG_CSIE },
-	{ .data = MIPI_CAL_CONFIG_DSIA },
-	{ .data = MIPI_CAL_CONFIG_DSIB },
-	{ .data = MIPI_CAL_CONFIG_DSIC },
-	{ .data = MIPI_CAL_CONFIG_DSID },
-};
-
-static const struct tegra_mipi_soc tegra114_mipi_soc = {
-	.has_clk_lane = false,
-	.pads = tegra114_mipi_pads,
-	.num_pads = ARRAY_SIZE(tegra114_mipi_pads),
-	.clock_enable_override = true,
-	.needs_vclamp_ref = true,
-	.pad_drive_down_ref = 0x2,
-	.pad_drive_up_ref = 0x0,
-	.pad_vclamp_level = 0x0,
-	.pad_vauxp_level = 0x0,
-	.hspdos = 0x0,
-	.hspuos = 0x4,
-	.termos = 0x5,
-	.hsclkpdos = 0x0,
-	.hsclkpuos = 0x4,
-};
-
-static const struct tegra_mipi_pad tegra124_mipi_pads[] = {
-	{ .data = MIPI_CAL_CONFIG_CSIA, .clk = MIPI_CAL_CONFIG_CSIAB_CLK },
-	{ .data = MIPI_CAL_CONFIG_CSIB, .clk = MIPI_CAL_CONFIG_CSIAB_CLK },
-	{ .data = MIPI_CAL_CONFIG_CSIC, .clk = MIPI_CAL_CONFIG_CSICD_CLK },
-	{ .data = MIPI_CAL_CONFIG_CSID, .clk = MIPI_CAL_CONFIG_CSICD_CLK },
-	{ .data = MIPI_CAL_CONFIG_CSIE, .clk = MIPI_CAL_CONFIG_CSIE_CLK  },
-	{ .data = MIPI_CAL_CONFIG_DSIA, .clk = MIPI_CAL_CONFIG_DSIA_CLK  },
-	{ .data = MIPI_CAL_CONFIG_DSIB, .clk = MIPI_CAL_CONFIG_DSIB_CLK  },
-};
-
-static const struct tegra_mipi_soc tegra124_mipi_soc = {
-	.has_clk_lane = true,
-	.pads = tegra124_mipi_pads,
-	.num_pads = ARRAY_SIZE(tegra124_mipi_pads),
-	.clock_enable_override = true,
-	.needs_vclamp_ref = true,
-	.pad_drive_down_ref = 0x2,
-	.pad_drive_up_ref = 0x0,
-	.pad_vclamp_level = 0x0,
-	.pad_vauxp_level = 0x0,
-	.hspdos = 0x0,
-	.hspuos = 0x0,
-	.termos = 0x0,
-	.hsclkpdos = 0x1,
-	.hsclkpuos = 0x2,
-};
-
-static const struct tegra_mipi_soc tegra132_mipi_soc = {
-	.has_clk_lane = true,
-	.pads = tegra124_mipi_pads,
-	.num_pads = ARRAY_SIZE(tegra124_mipi_pads),
-	.clock_enable_override = false,
-	.needs_vclamp_ref = false,
-	.pad_drive_down_ref = 0x0,
-	.pad_drive_up_ref = 0x3,
-	.pad_vclamp_level = 0x0,
-	.pad_vauxp_level = 0x0,
-	.hspdos = 0x0,
-	.hspuos = 0x0,
-	.termos = 0x0,
-	.hsclkpdos = 0x3,
-	.hsclkpuos = 0x2,
-};
-
-static const struct tegra_mipi_pad tegra210_mipi_pads[] = {
-	{ .data = MIPI_CAL_CONFIG_CSIA, .clk = 0 },
-	{ .data = MIPI_CAL_CONFIG_CSIB, .clk = 0 },
-	{ .data = MIPI_CAL_CONFIG_CSIC, .clk = 0 },
-	{ .data = MIPI_CAL_CONFIG_CSID, .clk = 0 },
-	{ .data = MIPI_CAL_CONFIG_CSIE, .clk = 0 },
-	{ .data = MIPI_CAL_CONFIG_CSIF, .clk = 0 },
-	{ .data = MIPI_CAL_CONFIG_DSIA, .clk = MIPI_CAL_CONFIG_DSIA_CLK },
-	{ .data = MIPI_CAL_CONFIG_DSIB, .clk = MIPI_CAL_CONFIG_DSIB_CLK },
-	{ .data = MIPI_CAL_CONFIG_DSIC, .clk = MIPI_CAL_CONFIG_DSIC_CLK },
-	{ .data = MIPI_CAL_CONFIG_DSID, .clk = MIPI_CAL_CONFIG_DSID_CLK },
-};
-
-static const struct tegra_mipi_soc tegra210_mipi_soc = {
-	.has_clk_lane = true,
-	.pads = tegra210_mipi_pads,
-	.num_pads = ARRAY_SIZE(tegra210_mipi_pads),
-	.clock_enable_override = true,
-	.needs_vclamp_ref = false,
-	.pad_drive_down_ref = 0x0,
-	.pad_drive_up_ref = 0x3,
-	.pad_vclamp_level = 0x1,
-	.pad_vauxp_level = 0x1,
-	.hspdos = 0x0,
-	.hspuos = 0x2,
-	.termos = 0x0,
-	.hsclkpdos = 0x0,
-	.hsclkpuos = 0x2,
-};
-
-static const struct of_device_id tegra_mipi_of_match[] = {
-	{ .compatible = "nvidia,tegra114-mipi", .data = &tegra114_mipi_soc },
-	{ .compatible = "nvidia,tegra124-mipi", .data = &tegra124_mipi_soc },
-	{ .compatible = "nvidia,tegra132-mipi", .data = &tegra132_mipi_soc },
-	{ .compatible = "nvidia,tegra210-mipi", .data = &tegra210_mipi_soc },
-	{ },
-};
-
-static int tegra_mipi_probe(struct platform_device *pdev)
+/**
+ * devm_tegra_mipi_add_provider - Managed registration of a Tegra MIPI
+ *				  calibration function provider.
+ * @device: Handle to the device providing the MIPI calibration function.
+ * @np: Device node pointer of the device providing the MIPI calibration
+ *	function.
+ * @ops: Operations supported by the MIPI calibration device.
+ *
+ * This registers a device that provides MIPI calibration functions.
+ * For Tegra20 and Tegra30, this is the CSI block, while Tegra114 and
+ * newer SoC generations have a dedicated hardware block for these
+ * functions.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int devm_tegra_mipi_add_provider(struct device *device, struct device_node *np,
+				 const struct tegra_mipi_ops *ops)
 {
-	const struct of_device_id *match;
-	struct tegra_mipi *mipi;
-
-	match = of_match_node(tegra_mipi_of_match, pdev->dev.of_node);
-	if (!match)
-		return -ENODEV;
+	if (provider.np)
+		return -EBUSY;
 
-	mipi = devm_kzalloc(&pdev->dev, sizeof(*mipi), GFP_KERNEL);
-	if (!mipi)
-		return -ENOMEM;
+	provider.np = np;
+	provider.ops = ops;
 
-	mipi->soc = match->data;
-	mipi->dev = &pdev->dev;
-
-	mipi->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
-	if (IS_ERR(mipi->regs))
-		return PTR_ERR(mipi->regs);
-
-	mutex_init(&mipi->lock);
-
-	mipi->clk = devm_clk_get_prepared(&pdev->dev, NULL);
-	if (IS_ERR(mipi->clk)) {
-		dev_err(&pdev->dev, "failed to get clock\n");
-		return PTR_ERR(mipi->clk);
-	}
-
-	platform_set_drvdata(pdev, mipi);
-
-	return 0;
+	return devm_add_action_or_reset(device, tegra_mipi_remove_provider, NULL);
 }
-
-struct platform_driver tegra_mipi_driver = {
-	.driver = {
-		.name = "tegra-mipi",
-		.of_match_table = tegra_mipi_of_match,
-	},
-	.probe = tegra_mipi_probe,
-};
+EXPORT_SYMBOL(devm_tegra_mipi_add_provider);
diff --git a/drivers/gpu/host1x/tegra114-mipi.c b/drivers/gpu/host1x/tegra114-mipi.c
new file mode 100644
index 000000000000..c084a09784d1
--- /dev/null
+++ b/drivers/gpu/host1x/tegra114-mipi.c
@@ -0,0 +1,483 @@
+/*
+ * Copyright (C) 2013 NVIDIA Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <linux/clk.h>
+#include <linux/host1x.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/tegra-mipi-cal.h>
+
+#include "dev.h"
+
+#define MIPI_CAL_CTRL			0x00
+#define MIPI_CAL_CTRL_NOISE_FILTER(x)	(((x) & 0xf) << 26)
+#define MIPI_CAL_CTRL_PRESCALE(x)	(((x) & 0x3) << 24)
+#define MIPI_CAL_CTRL_CLKEN_OVR		BIT(4)
+#define MIPI_CAL_CTRL_START		BIT(0)
+
+#define MIPI_CAL_AUTOCAL_CTRL		0x01
+
+#define MIPI_CAL_STATUS			0x02
+#define MIPI_CAL_STATUS_DONE		BIT(16)
+#define MIPI_CAL_STATUS_ACTIVE		BIT(0)
+
+#define MIPI_CAL_CONFIG_CSIA		0x05
+#define MIPI_CAL_CONFIG_CSIB		0x06
+#define MIPI_CAL_CONFIG_CSIC		0x07
+#define MIPI_CAL_CONFIG_CSID		0x08
+#define MIPI_CAL_CONFIG_CSIE		0x09
+#define MIPI_CAL_CONFIG_CSIF		0x0a
+#define MIPI_CAL_CONFIG_DSIA		0x0e
+#define MIPI_CAL_CONFIG_DSIB		0x0f
+#define MIPI_CAL_CONFIG_DSIC		0x10
+#define MIPI_CAL_CONFIG_DSID		0x11
+
+#define MIPI_CAL_CONFIG_DSIA_CLK	0x19
+#define MIPI_CAL_CONFIG_DSIB_CLK	0x1a
+#define MIPI_CAL_CONFIG_CSIAB_CLK	0x1b
+#define MIPI_CAL_CONFIG_DSIC_CLK	0x1c
+#define MIPI_CAL_CONFIG_CSICD_CLK	0x1c
+#define MIPI_CAL_CONFIG_DSID_CLK	0x1d
+#define MIPI_CAL_CONFIG_CSIE_CLK	0x1d
+
+/* for data and clock lanes */
+#define MIPI_CAL_CONFIG_SELECT		BIT(21)
+
+/* for data lanes */
+#define MIPI_CAL_CONFIG_HSPDOS(x)	(((x) & 0x1f) << 16)
+#define MIPI_CAL_CONFIG_HSPUOS(x)	(((x) & 0x1f) <<  8)
+#define MIPI_CAL_CONFIG_TERMOS(x)	(((x) & 0x1f) <<  0)
+
+/* for clock lanes */
+#define MIPI_CAL_CONFIG_HSCLKPDOSD(x)	(((x) & 0x1f) <<  8)
+#define MIPI_CAL_CONFIG_HSCLKPUOSD(x)	(((x) & 0x1f) <<  0)
+
+#define MIPI_CAL_BIAS_PAD_CFG0		0x16
+#define MIPI_CAL_BIAS_PAD_PDVCLAMP	BIT(1)
+#define MIPI_CAL_BIAS_PAD_E_VCLAMP_REF	BIT(0)
+
+#define MIPI_CAL_BIAS_PAD_CFG1		0x17
+#define MIPI_CAL_BIAS_PAD_DRV_DN_REF(x) (((x) & 0x7) << 16)
+#define MIPI_CAL_BIAS_PAD_DRV_UP_REF(x) (((x) & 0x7) << 8)
+
+#define MIPI_CAL_BIAS_PAD_CFG2		0x18
+#define MIPI_CAL_BIAS_PAD_VCLAMP(x)	(((x) & 0x7) << 16)
+#define MIPI_CAL_BIAS_PAD_VAUXP(x)	(((x) & 0x7) << 4)
+#define MIPI_CAL_BIAS_PAD_PDVREG	BIT(1)
+
+struct tegra_mipi_pad {
+	unsigned long data;
+	unsigned long clk;
+};
+
+struct tegra_mipi_soc {
+	bool has_clk_lane;
+	const struct tegra_mipi_pad *pads;
+	unsigned int num_pads;
+
+	bool clock_enable_override;
+	bool needs_vclamp_ref;
+
+	/* bias pad configuration settings */
+	u8 pad_drive_down_ref;
+	u8 pad_drive_up_ref;
+
+	u8 pad_vclamp_level;
+	u8 pad_vauxp_level;
+
+	/* calibration settings for data lanes */
+	u8 hspdos;
+	u8 hspuos;
+	u8 termos;
+
+	/* calibration settings for clock lanes */
+	u8 hsclkpdos;
+	u8 hsclkpuos;
+};
+
+struct tegra_mipi {
+	const struct tegra_mipi_soc *soc;
+	struct device *dev;
+	void __iomem *regs;
+	struct mutex lock; /* for register access */
+	struct clk *clk;
+
+	unsigned long usage_count;
+};
+
+static inline u32 tegra_mipi_readl(struct tegra_mipi *mipi,
+				   unsigned long offset)
+{
+	return readl(mipi->regs + (offset << 2));
+}
+
+static inline void tegra_mipi_writel(struct tegra_mipi *mipi, u32 value,
+				     unsigned long offset)
+{
+	writel(value, mipi->regs + (offset << 2));
+}
+
+static int tegra114_mipi_power_up(struct tegra_mipi *mipi)
+{
+	u32 value;
+	int err;
+
+	err = clk_enable(mipi->clk);
+	if (err < 0)
+		return err;
+
+	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG0);
+	value &= ~MIPI_CAL_BIAS_PAD_PDVCLAMP;
+
+	if (mipi->soc->needs_vclamp_ref)
+		value |= MIPI_CAL_BIAS_PAD_E_VCLAMP_REF;
+
+	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG0);
+
+	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG2);
+	value &= ~MIPI_CAL_BIAS_PAD_PDVREG;
+	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG2);
+
+	clk_disable(mipi->clk);
+
+	return 0;
+}
+
+static int tegra114_mipi_power_down(struct tegra_mipi *mipi)
+{
+	u32 value;
+	int err;
+
+	err = clk_enable(mipi->clk);
+	if (err < 0)
+		return err;
+
+	/*
+	 * The MIPI_CAL_BIAS_PAD_PDVREG controls a voltage regulator that
+	 * supplies the DSI pads. This must be kept enabled until none of the
+	 * DSI lanes are used anymore.
+	 */
+	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG2);
+	value |= MIPI_CAL_BIAS_PAD_PDVREG;
+	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG2);
+
+	/*
+	 * MIPI_CAL_BIAS_PAD_PDVCLAMP and MIPI_CAL_BIAS_PAD_E_VCLAMP_REF
+	 * control a regulator that supplies current to the pre-driver logic.
+	 * Powering down this regulator causes DSI to fail, so it must remain
+	 * powered on until none of the DSI lanes are used anymore.
+	 */
+	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG0);
+
+	if (mipi->soc->needs_vclamp_ref)
+		value &= ~MIPI_CAL_BIAS_PAD_E_VCLAMP_REF;
+
+	value |= MIPI_CAL_BIAS_PAD_PDVCLAMP;
+	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG0);
+
+	return 0;
+}
+
+static int tegra114_mipi_enable(struct tegra_mipi_device *mipidev)
+{
+	struct tegra_mipi *mipi = platform_get_drvdata(mipidev->pdev);
+	int err = 0;
+
+	mutex_lock(&mipi->lock);
+
+	if (mipi->usage_count++ == 0)
+		err = tegra114_mipi_power_up(mipi);
+
+	mutex_unlock(&mipi->lock);
+
+	return err;
+}
+
+static int tegra114_mipi_disable(struct tegra_mipi_device *mipidev)
+{
+	struct tegra_mipi *mipi = platform_get_drvdata(mipidev->pdev);
+	int err = 0;
+
+	mutex_lock(&mipi->lock);
+
+	if (--mipi->usage_count == 0)
+		err = tegra114_mipi_power_down(mipi);
+
+	mutex_unlock(&mipi->lock);
+
+	return err;
+}
+
+static int tegra114_mipi_finish_calibration(struct tegra_mipi_device *mipidev)
+{
+	struct tegra_mipi *mipi = platform_get_drvdata(mipidev->pdev);
+	void __iomem *status_reg = mipi->regs + (MIPI_CAL_STATUS << 2);
+	u32 value;
+	int err;
+
+	err = readl_relaxed_poll_timeout(status_reg, value,
+					 !(value & MIPI_CAL_STATUS_ACTIVE) &&
+					 (value & MIPI_CAL_STATUS_DONE), 50,
+					 250000);
+	mutex_unlock(&mipi->lock);
+	clk_disable(mipi->clk);
+
+	return err;
+}
+
+static int tegra114_mipi_start_calibration(struct tegra_mipi_device *mipidev)
+{
+	struct tegra_mipi *mipi = platform_get_drvdata(mipidev->pdev);
+	const struct tegra_mipi_soc *soc = mipi->soc;
+	unsigned int i;
+	u32 value;
+	int err;
+
+	err = clk_enable(mipi->clk);
+	if (err < 0)
+		return err;
+
+	mutex_lock(&mipi->lock);
+
+	value = MIPI_CAL_BIAS_PAD_DRV_DN_REF(soc->pad_drive_down_ref) |
+		MIPI_CAL_BIAS_PAD_DRV_UP_REF(soc->pad_drive_up_ref);
+	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG1);
+
+	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG2);
+	value &= ~MIPI_CAL_BIAS_PAD_VCLAMP(0x7);
+	value &= ~MIPI_CAL_BIAS_PAD_VAUXP(0x7);
+	value |= MIPI_CAL_BIAS_PAD_VCLAMP(soc->pad_vclamp_level);
+	value |= MIPI_CAL_BIAS_PAD_VAUXP(soc->pad_vauxp_level);
+	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG2);
+
+	for (i = 0; i < soc->num_pads; i++) {
+		u32 clk = 0, data = 0;
+
+		if (mipidev->pads & BIT(i)) {
+			data = MIPI_CAL_CONFIG_SELECT |
+			       MIPI_CAL_CONFIG_HSPDOS(soc->hspdos) |
+			       MIPI_CAL_CONFIG_HSPUOS(soc->hspuos) |
+			       MIPI_CAL_CONFIG_TERMOS(soc->termos);
+			clk = MIPI_CAL_CONFIG_SELECT |
+			      MIPI_CAL_CONFIG_HSCLKPDOSD(soc->hsclkpdos) |
+			      MIPI_CAL_CONFIG_HSCLKPUOSD(soc->hsclkpuos);
+		}
+
+		tegra_mipi_writel(mipi, data, soc->pads[i].data);
+
+		if (soc->has_clk_lane && soc->pads[i].clk != 0)
+			tegra_mipi_writel(mipi, clk, soc->pads[i].clk);
+	}
+
+	value = tegra_mipi_readl(mipi, MIPI_CAL_CTRL);
+	value &= ~MIPI_CAL_CTRL_NOISE_FILTER(0xf);
+	value &= ~MIPI_CAL_CTRL_PRESCALE(0x3);
+	value |= MIPI_CAL_CTRL_NOISE_FILTER(0xa);
+	value |= MIPI_CAL_CTRL_PRESCALE(0x2);
+
+	if (!soc->clock_enable_override)
+		value &= ~MIPI_CAL_CTRL_CLKEN_OVR;
+	else
+		value |= MIPI_CAL_CTRL_CLKEN_OVR;
+
+	tegra_mipi_writel(mipi, value, MIPI_CAL_CTRL);
+
+	/* clear any pending status bits */
+	value = tegra_mipi_readl(mipi, MIPI_CAL_STATUS);
+	tegra_mipi_writel(mipi, value, MIPI_CAL_STATUS);
+
+	value = tegra_mipi_readl(mipi, MIPI_CAL_CTRL);
+	value |= MIPI_CAL_CTRL_START;
+	tegra_mipi_writel(mipi, value, MIPI_CAL_CTRL);
+
+	/*
+	 * Wait for min 72uS to let calibration logic finish calibration
+	 * sequence codes before waiting for pads idle state to apply the
+	 * results.
+	 */
+	usleep_range(75, 80);
+
+	return 0;
+}
+
+static const struct tegra_mipi_ops tegra114_mipi_ops = {
+	.enable = tegra114_mipi_enable,
+	.disable = tegra114_mipi_disable,
+	.start_calibration = tegra114_mipi_start_calibration,
+	.finish_calibration = tegra114_mipi_finish_calibration,
+};
+
+static const struct tegra_mipi_pad tegra114_mipi_pads[] = {
+	{ .data = MIPI_CAL_CONFIG_CSIA },
+	{ .data = MIPI_CAL_CONFIG_CSIB },
+	{ .data = MIPI_CAL_CONFIG_CSIC },
+	{ .data = MIPI_CAL_CONFIG_CSID },
+	{ .data = MIPI_CAL_CONFIG_CSIE },
+	{ .data = MIPI_CAL_CONFIG_DSIA },
+	{ .data = MIPI_CAL_CONFIG_DSIB },
+	{ .data = MIPI_CAL_CONFIG_DSIC },
+	{ .data = MIPI_CAL_CONFIG_DSID },
+};
+
+static const struct tegra_mipi_soc tegra114_mipi_soc = {
+	.has_clk_lane = false,
+	.pads = tegra114_mipi_pads,
+	.num_pads = ARRAY_SIZE(tegra114_mipi_pads),
+	.clock_enable_override = true,
+	.needs_vclamp_ref = true,
+	.pad_drive_down_ref = 0x2,
+	.pad_drive_up_ref = 0x0,
+	.pad_vclamp_level = 0x0,
+	.pad_vauxp_level = 0x0,
+	.hspdos = 0x0,
+	.hspuos = 0x4,
+	.termos = 0x5,
+	.hsclkpdos = 0x0,
+	.hsclkpuos = 0x4,
+};
+
+static const struct tegra_mipi_pad tegra124_mipi_pads[] = {
+	{ .data = MIPI_CAL_CONFIG_CSIA, .clk = MIPI_CAL_CONFIG_CSIAB_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSIB, .clk = MIPI_CAL_CONFIG_CSIAB_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSIC, .clk = MIPI_CAL_CONFIG_CSICD_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSID, .clk = MIPI_CAL_CONFIG_CSICD_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSIE, .clk = MIPI_CAL_CONFIG_CSIE_CLK  },
+	{ .data = MIPI_CAL_CONFIG_DSIA, .clk = MIPI_CAL_CONFIG_DSIA_CLK  },
+	{ .data = MIPI_CAL_CONFIG_DSIB, .clk = MIPI_CAL_CONFIG_DSIB_CLK  },
+};
+
+static const struct tegra_mipi_soc tegra124_mipi_soc = {
+	.has_clk_lane = true,
+	.pads = tegra124_mipi_pads,
+	.num_pads = ARRAY_SIZE(tegra124_mipi_pads),
+	.clock_enable_override = true,
+	.needs_vclamp_ref = true,
+	.pad_drive_down_ref = 0x2,
+	.pad_drive_up_ref = 0x0,
+	.pad_vclamp_level = 0x0,
+	.pad_vauxp_level = 0x0,
+	.hspdos = 0x0,
+	.hspuos = 0x0,
+	.termos = 0x0,
+	.hsclkpdos = 0x1,
+	.hsclkpuos = 0x2,
+};
+
+static const struct tegra_mipi_soc tegra132_mipi_soc = {
+	.has_clk_lane = true,
+	.pads = tegra124_mipi_pads,
+	.num_pads = ARRAY_SIZE(tegra124_mipi_pads),
+	.clock_enable_override = false,
+	.needs_vclamp_ref = false,
+	.pad_drive_down_ref = 0x0,
+	.pad_drive_up_ref = 0x3,
+	.pad_vclamp_level = 0x0,
+	.pad_vauxp_level = 0x0,
+	.hspdos = 0x0,
+	.hspuos = 0x0,
+	.termos = 0x0,
+	.hsclkpdos = 0x3,
+	.hsclkpuos = 0x2,
+};
+
+static const struct tegra_mipi_pad tegra210_mipi_pads[] = {
+	{ .data = MIPI_CAL_CONFIG_CSIA, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSIB, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSIC, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSID, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSIE, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSIF, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_DSIA, .clk = MIPI_CAL_CONFIG_DSIA_CLK },
+	{ .data = MIPI_CAL_CONFIG_DSIB, .clk = MIPI_CAL_CONFIG_DSIB_CLK },
+	{ .data = MIPI_CAL_CONFIG_DSIC, .clk = MIPI_CAL_CONFIG_DSIC_CLK },
+	{ .data = MIPI_CAL_CONFIG_DSID, .clk = MIPI_CAL_CONFIG_DSID_CLK },
+};
+
+static const struct tegra_mipi_soc tegra210_mipi_soc = {
+	.has_clk_lane = true,
+	.pads = tegra210_mipi_pads,
+	.num_pads = ARRAY_SIZE(tegra210_mipi_pads),
+	.clock_enable_override = true,
+	.needs_vclamp_ref = false,
+	.pad_drive_down_ref = 0x0,
+	.pad_drive_up_ref = 0x3,
+	.pad_vclamp_level = 0x1,
+	.pad_vauxp_level = 0x1,
+	.hspdos = 0x0,
+	.hspuos = 0x2,
+	.termos = 0x0,
+	.hsclkpdos = 0x0,
+	.hsclkpuos = 0x2,
+};
+
+static const struct of_device_id tegra_mipi_of_match[] = {
+	{ .compatible = "nvidia,tegra114-mipi", .data = &tegra114_mipi_soc },
+	{ .compatible = "nvidia,tegra124-mipi", .data = &tegra124_mipi_soc },
+	{ .compatible = "nvidia,tegra132-mipi", .data = &tegra132_mipi_soc },
+	{ .compatible = "nvidia,tegra210-mipi", .data = &tegra210_mipi_soc },
+	{ },
+};
+
+static int tegra_mipi_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	struct tegra_mipi *mipi;
+
+	match = of_match_node(tegra_mipi_of_match, pdev->dev.of_node);
+	if (!match)
+		return -ENODEV;
+
+	mipi = devm_kzalloc(&pdev->dev, sizeof(*mipi), GFP_KERNEL);
+	if (!mipi)
+		return -ENOMEM;
+
+	mipi->soc = match->data;
+	mipi->dev = &pdev->dev;
+
+	mipi->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
+	if (IS_ERR(mipi->regs))
+		return PTR_ERR(mipi->regs);
+
+	mutex_init(&mipi->lock);
+
+	mipi->clk = devm_clk_get_prepared(&pdev->dev, NULL);
+	if (IS_ERR(mipi->clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		return PTR_ERR(mipi->clk);
+	}
+
+	platform_set_drvdata(pdev, mipi);
+
+	return devm_tegra_mipi_add_provider(&pdev->dev, pdev->dev.of_node,
+					    &tegra114_mipi_ops);
+}
+
+struct platform_driver tegra_mipi_driver = {
+	.driver = {
+		.name = "tegra-mipi",
+		.of_match_table = tegra_mipi_of_match,
+	},
+	.probe = tegra_mipi_probe,
+};
diff --git a/drivers/staging/media/tegra-video/csi.c b/drivers/staging/media/tegra-video/csi.c
index f858c05ce6c9..bcaea20c3025 100644
--- a/drivers/staging/media/tegra-video/csi.c
+++ b/drivers/staging/media/tegra-video/csi.c
@@ -12,6 +12,7 @@
 #include <linux/of_graph.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/tegra-mipi-cal.h>
 
 #include <media/v4l2-fwnode.h>
 
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 5e7a63143a4a..1f5f55917d1c 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -453,16 +453,6 @@ void host1x_client_unregister(struct host1x_client *client);
 int host1x_client_suspend(struct host1x_client *client);
 int host1x_client_resume(struct host1x_client *client);
 
-struct tegra_mipi_device;
-
-struct tegra_mipi_device *tegra_mipi_request(struct device *device,
-					     struct device_node *np);
-void tegra_mipi_free(struct tegra_mipi_device *device);
-int tegra_mipi_enable(struct tegra_mipi_device *device);
-int tegra_mipi_disable(struct tegra_mipi_device *device);
-int tegra_mipi_start_calibration(struct tegra_mipi_device *device);
-int tegra_mipi_finish_calibration(struct tegra_mipi_device *device);
-
 /* host1x memory contexts */
 
 struct host1x_memory_context {
diff --git a/include/linux/tegra-mipi-cal.h b/include/linux/tegra-mipi-cal.h
new file mode 100644
index 000000000000..2a540b50f65d
--- /dev/null
+++ b/include/linux/tegra-mipi-cal.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __TEGRA_MIPI_CAL_H_
+#define __TEGRA_MIPI_CAL_H_
+
+struct tegra_mipi_device {
+	const struct tegra_mipi_ops *ops;
+	struct platform_device *pdev;
+	unsigned long pads;
+};
+
+/**
+ * Operations for Tegra MIPI calibration device
+ */
+struct tegra_mipi_ops {
+	/**
+	 * @enable:
+	 *
+	 * Enable MIPI calibration device
+	 */
+	int (*enable)(struct tegra_mipi_device *device);
+
+	/**
+	 * @disable:
+	 *
+	 * Disable MIPI calibration device
+	 */
+	int (*disable)(struct tegra_mipi_device *device);
+
+	/**
+	 * @start_calibration:
+	 *
+	 * Start MIPI calibration
+	 */
+	int (*start_calibration)(struct tegra_mipi_device *device);
+
+	/**
+	 * @finish_calibration:
+	 *
+	 * Finish MIPI calibration
+	 */
+	int (*finish_calibration)(struct tegra_mipi_device *device);
+};
+
+int devm_tegra_mipi_add_provider(struct device *device, struct device_node *np,
+				 const struct tegra_mipi_ops *ops);
+
+struct tegra_mipi_device *tegra_mipi_request(struct device *device,
+					     struct device_node *np);
+void tegra_mipi_free(struct tegra_mipi_device *device);
+
+int tegra_mipi_enable(struct tegra_mipi_device *device);
+int tegra_mipi_disable(struct tegra_mipi_device *device);
+int tegra_mipi_start_calibration(struct tegra_mipi_device *device);
+int tegra_mipi_finish_calibration(struct tegra_mipi_device *device);
+
+#endif /* __TEGRA_MIPI_CAL_H_ */
-- 
cgit v1.2.3


From 7a3ac62473f2bd213557e41aaab7a8f144037dfd Mon Sep 17 00:00:00 2001
From: Lucas Zampieri <lcasmz54@gmail.com>
Date: Sat, 14 Mar 2026 01:05:29 +0000
Subject: HID: input: Introduce struct hid_battery and refactor battery code

Introduce struct hid_battery to encapsulate individual battery state,
preparing for future multi-battery support.

The new structure contains all battery-related fields previously stored
directly in hid_device (capacity, min, max, report_type, report_id,
charge_status, etc.). The hid_device->battery pointer type changes from
struct power_supply* to struct hid_battery*, and all battery functions
are refactored accordingly.

A hid_get_battery() helper is added for external drivers, with
hid-apple.c and hid-magicmouse.c updated to use the new API. The
hid-input-test.c KUnit tests are also updated for the new structure.

No functional changes for single-battery devices.

Signed-off-by: Lucas Zampieri <lcasmz54@gmail.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 drivers/hid/hid-apple.c      |  10 ++--
 drivers/hid/hid-input-test.c |  39 +++++++------
 drivers/hid/hid-input.c      | 131 ++++++++++++++++++++++++-------------------
 drivers/hid/hid-magicmouse.c |  10 ++--
 include/linux/hid.h          |  52 ++++++++++++-----
 5 files changed, 146 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index fc5897a6bb53..8b8b05c8a441 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -623,17 +623,19 @@ static int apple_fetch_battery(struct hid_device *hdev)
 	struct apple_sc *asc = hid_get_drvdata(hdev);
 	struct hid_report_enum *report_enum;
 	struct hid_report *report;
+	struct hid_battery *bat;
 
-	if (!(asc->quirks & APPLE_RDESC_BATTERY) || !hdev->battery)
+	bat = hid_get_battery(hdev);
+	if (!(asc->quirks & APPLE_RDESC_BATTERY) || !bat)
 		return -1;
 
-	report_enum = &hdev->report_enum[hdev->battery_report_type];
-	report = report_enum->report_id_hash[hdev->battery_report_id];
+	report_enum = &hdev->report_enum[bat->report_type];
+	report = report_enum->report_id_hash[bat->report_id];
 
 	if (!report || report->maxfield < 1)
 		return -1;
 
-	if (hdev->battery_capacity == hdev->battery_max)
+	if (bat->capacity == bat->max)
 		return -1;
 
 	hid_hw_request(hdev, report, HID_REQ_GET_REPORT);
diff --git a/drivers/hid/hid-input-test.c b/drivers/hid/hid-input-test.c
index 6f5c71660d82..c92008dafddf 100644
--- a/drivers/hid/hid-input-test.c
+++ b/drivers/hid/hid-input-test.c
@@ -9,54 +9,59 @@
 
 static void hid_test_input_update_battery_charge_status(struct kunit *test)
 {
-	struct hid_device *dev;
+	struct hid_battery *bat;
 	bool handled;
 
-	dev = kunit_kzalloc(test, sizeof(*dev), GFP_KERNEL);
-	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+	bat = kunit_kzalloc(test, sizeof(*bat), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bat);
 
-	handled = hidinput_update_battery_charge_status(dev, HID_DG_HEIGHT, 0);
+	handled = hidinput_update_battery_charge_status(bat, HID_DG_HEIGHT, 0);
 	KUNIT_EXPECT_FALSE(test, handled);
-	KUNIT_EXPECT_EQ(test, dev->battery_charge_status, POWER_SUPPLY_STATUS_UNKNOWN);
+	KUNIT_EXPECT_EQ(test, bat->charge_status, POWER_SUPPLY_STATUS_UNKNOWN);
 
-	handled = hidinput_update_battery_charge_status(dev, HID_BAT_CHARGING, 0);
+	handled = hidinput_update_battery_charge_status(bat, HID_BAT_CHARGING, 0);
 	KUNIT_EXPECT_TRUE(test, handled);
-	KUNIT_EXPECT_EQ(test, dev->battery_charge_status, POWER_SUPPLY_STATUS_DISCHARGING);
+	KUNIT_EXPECT_EQ(test, bat->charge_status, POWER_SUPPLY_STATUS_DISCHARGING);
 
-	handled = hidinput_update_battery_charge_status(dev, HID_BAT_CHARGING, 1);
+	handled = hidinput_update_battery_charge_status(bat, HID_BAT_CHARGING, 1);
 	KUNIT_EXPECT_TRUE(test, handled);
-	KUNIT_EXPECT_EQ(test, dev->battery_charge_status, POWER_SUPPLY_STATUS_CHARGING);
+	KUNIT_EXPECT_EQ(test, bat->charge_status, POWER_SUPPLY_STATUS_CHARGING);
 }
 
 static void hid_test_input_get_battery_property(struct kunit *test)
 {
 	struct power_supply *psy;
+	struct hid_battery *bat;
 	struct hid_device *dev;
 	union power_supply_propval val;
 	int ret;
 
 	dev = kunit_kzalloc(test, sizeof(*dev), GFP_KERNEL);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
-	dev->battery_avoid_query = true;
+
+	bat = kunit_kzalloc(test, sizeof(*bat), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bat);
+	bat->dev = dev;
+	bat->avoid_query = true;
 
 	psy = kunit_kzalloc(test, sizeof(*psy), GFP_KERNEL);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, psy);
-	psy->drv_data = dev;
+	psy->drv_data = bat;
 
-	dev->battery_status = HID_BATTERY_UNKNOWN;
-	dev->battery_charge_status = POWER_SUPPLY_STATUS_CHARGING;
+	bat->status = HID_BATTERY_UNKNOWN;
+	bat->charge_status = POWER_SUPPLY_STATUS_CHARGING;
 	ret = hidinput_get_battery_property(psy, POWER_SUPPLY_PROP_STATUS, &val);
 	KUNIT_EXPECT_EQ(test, ret, 0);
 	KUNIT_EXPECT_EQ(test, val.intval, POWER_SUPPLY_STATUS_UNKNOWN);
 
-	dev->battery_status = HID_BATTERY_REPORTED;
-	dev->battery_charge_status = POWER_SUPPLY_STATUS_CHARGING;
+	bat->status = HID_BATTERY_REPORTED;
+	bat->charge_status = POWER_SUPPLY_STATUS_CHARGING;
 	ret = hidinput_get_battery_property(psy, POWER_SUPPLY_PROP_STATUS, &val);
 	KUNIT_EXPECT_EQ(test, ret, 0);
 	KUNIT_EXPECT_EQ(test, val.intval, POWER_SUPPLY_STATUS_CHARGING);
 
-	dev->battery_status = HID_BATTERY_REPORTED;
-	dev->battery_charge_status = POWER_SUPPLY_STATUS_DISCHARGING;
+	bat->status = HID_BATTERY_REPORTED;
+	bat->charge_status = POWER_SUPPLY_STATUS_DISCHARGING;
 	ret = hidinput_get_battery_property(psy, POWER_SUPPLY_PROP_STATUS, &val);
 	KUNIT_EXPECT_EQ(test, ret, 0);
 	KUNIT_EXPECT_EQ(test, val.intval, POWER_SUPPLY_STATUS_DISCHARGING);
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 7e0f971efe5d..b5d34658b68d 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -416,18 +416,18 @@ static unsigned find_battery_quirk(struct hid_device *hdev)
 	return quirks;
 }
 
-static int hidinput_scale_battery_capacity(struct hid_device *dev,
+static int hidinput_scale_battery_capacity(struct hid_battery *bat,
 					   int value)
 {
-	if (dev->battery_min < dev->battery_max &&
-	    value >= dev->battery_min && value <= dev->battery_max)
-		value = ((value - dev->battery_min) * 100) /
-			(dev->battery_max - dev->battery_min);
+	if (bat->min < bat->max &&
+	    value >= bat->min && value <= bat->max)
+		value = ((value - bat->min) * 100) /
+			(bat->max - bat->min);
 
 	return value;
 }
 
-static int hidinput_query_battery_capacity(struct hid_device *dev)
+static int hidinput_query_battery_capacity(struct hid_battery *bat)
 {
 	int ret;
 
@@ -435,19 +435,20 @@ static int hidinput_query_battery_capacity(struct hid_device *dev)
 	if (!buf)
 		return -ENOMEM;
 
-	ret = hid_hw_raw_request(dev, dev->battery_report_id, buf, 4,
-				 dev->battery_report_type, HID_REQ_GET_REPORT);
+	ret = hid_hw_raw_request(bat->dev, bat->report_id, buf, 4,
+				 bat->report_type, HID_REQ_GET_REPORT);
 	if (ret < 2)
 		return -ENODATA;
 
-	return hidinput_scale_battery_capacity(dev, buf[1]);
+	return hidinput_scale_battery_capacity(bat, buf[1]);
 }
 
 static int hidinput_get_battery_property(struct power_supply *psy,
 					 enum power_supply_property prop,
 					 union power_supply_propval *val)
 {
-	struct hid_device *dev = power_supply_get_drvdata(psy);
+	struct hid_battery *bat = power_supply_get_drvdata(psy);
+	struct hid_device *dev = bat->dev;
 	int value;
 	int ret = 0;
 
@@ -457,17 +458,17 @@ static int hidinput_get_battery_property(struct power_supply *psy,
 		break;
 
 	case POWER_SUPPLY_PROP_PRESENT:
-		val->intval = dev->battery_present;
+		val->intval = bat->present;
 		break;
 
 	case POWER_SUPPLY_PROP_CAPACITY:
-		if (dev->battery_status != HID_BATTERY_REPORTED &&
-		    !dev->battery_avoid_query) {
-			value = hidinput_query_battery_capacity(dev);
+		if (bat->status != HID_BATTERY_REPORTED &&
+		    !bat->avoid_query) {
+			value = hidinput_query_battery_capacity(bat);
 			if (value < 0)
 				return value;
 		} else  {
-			value = dev->battery_capacity;
+			value = bat->capacity;
 		}
 
 		val->intval = value;
@@ -478,20 +479,20 @@ static int hidinput_get_battery_property(struct power_supply *psy,
 		break;
 
 	case POWER_SUPPLY_PROP_STATUS:
-		if (dev->battery_status != HID_BATTERY_REPORTED &&
-		    !dev->battery_avoid_query) {
-			value = hidinput_query_battery_capacity(dev);
+		if (bat->status != HID_BATTERY_REPORTED &&
+		    !bat->avoid_query) {
+			value = hidinput_query_battery_capacity(bat);
 			if (value < 0)
 				return value;
 
-			dev->battery_capacity = value;
-			dev->battery_status = HID_BATTERY_QUERIED;
+			bat->capacity = value;
+			bat->status = HID_BATTERY_QUERIED;
 		}
 
-		if (dev->battery_status == HID_BATTERY_UNKNOWN)
+		if (bat->status == HID_BATTERY_UNKNOWN)
 			val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
 		else
-			val->intval = dev->battery_charge_status;
+			val->intval = bat->charge_status;
 		break;
 
 	case POWER_SUPPLY_PROP_SCOPE:
@@ -509,8 +510,9 @@ static int hidinput_get_battery_property(struct power_supply *psy,
 static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
 				  struct hid_field *field, bool is_percentage)
 {
+	struct hid_battery *bat;
 	struct power_supply_desc *psy_desc;
-	struct power_supply_config psy_cfg = { .drv_data = dev, };
+	struct power_supply_config psy_cfg = { 0 };
 	unsigned quirks;
 	s32 min, max;
 	int error;
@@ -526,16 +528,22 @@ static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
 	if (quirks & HID_BATTERY_QUIRK_IGNORE)
 		return 0;
 
-	psy_desc = devm_kzalloc(&dev->dev, sizeof(*psy_desc), GFP_KERNEL);
-	if (!psy_desc)
+	bat = devm_kzalloc(&dev->dev, sizeof(*bat), GFP_KERNEL);
+	if (!bat)
 		return -ENOMEM;
 
+	psy_desc = devm_kzalloc(&dev->dev, sizeof(*psy_desc), GFP_KERNEL);
+	if (!psy_desc) {
+		error = -ENOMEM;
+		goto err_free_bat;
+	}
+
 	psy_desc->name = devm_kasprintf(&dev->dev, GFP_KERNEL, "hid-%s-battery",
 					strlen(dev->uniq) ?
 						dev->uniq : dev_name(&dev->dev));
 	if (!psy_desc->name) {
 		error = -ENOMEM;
-		goto err_free_mem;
+		goto err_free_desc;
 	}
 
 	psy_desc->type = POWER_SUPPLY_TYPE_BATTERY;
@@ -555,51 +563,57 @@ static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
 	if (quirks & HID_BATTERY_QUIRK_FEATURE)
 		report_type = HID_FEATURE_REPORT;
 
-	dev->battery_min = min;
-	dev->battery_max = max;
-	dev->battery_report_type = report_type;
-	dev->battery_report_id = field->report->id;
-	dev->battery_charge_status = POWER_SUPPLY_STATUS_DISCHARGING;
+	bat->dev = dev;
+	bat->min = min;
+	bat->max = max;
+	bat->report_type = report_type;
+	bat->report_id = field->report->id;
+	bat->charge_status = POWER_SUPPLY_STATUS_DISCHARGING;
+	bat->status = HID_BATTERY_UNKNOWN;
 
 	/*
 	 * Stylus is normally not connected to the device and thus we
 	 * can't query the device and get meaningful battery strength.
 	 * We have to wait for the device to report it on its own.
 	 */
-	dev->battery_avoid_query = report_type == HID_INPUT_REPORT &&
-				   field->physical == HID_DG_STYLUS;
+	bat->avoid_query = report_type == HID_INPUT_REPORT &&
+			   field->physical == HID_DG_STYLUS;
 
 	if (quirks & HID_BATTERY_QUIRK_AVOID_QUERY)
-		dev->battery_avoid_query = true;
+		bat->avoid_query = true;
 
-	dev->battery_present = (quirks & HID_BATTERY_QUIRK_DYNAMIC) ? false : true;
+	bat->present = (quirks & HID_BATTERY_QUIRK_DYNAMIC) ? false : true;
 
-	dev->battery = devm_power_supply_register(&dev->dev, psy_desc, &psy_cfg);
-	if (IS_ERR(dev->battery)) {
-		error = PTR_ERR(dev->battery);
+	psy_cfg.drv_data = bat;
+	bat->ps = devm_power_supply_register(&dev->dev, psy_desc, &psy_cfg);
+	if (IS_ERR(bat->ps)) {
+		error = PTR_ERR(bat->ps);
 		hid_warn(dev, "can't register power supply: %d\n", error);
 		goto err_free_name;
 	}
 
-	power_supply_powers(dev->battery, &dev->dev);
+	power_supply_powers(bat->ps, &dev->dev);
+	dev->battery = bat;
 	return 0;
 
 err_free_name:
 	devm_kfree(&dev->dev, psy_desc->name);
-err_free_mem:
+err_free_desc:
 	devm_kfree(&dev->dev, psy_desc);
+err_free_bat:
+	devm_kfree(&dev->dev, bat);
 	dev->battery = NULL;
 	return error;
 }
 
-static bool hidinput_update_battery_charge_status(struct hid_device *dev,
+static bool hidinput_update_battery_charge_status(struct hid_battery *bat,
 						  unsigned int usage, int value)
 {
 	switch (usage) {
 	case HID_BAT_CHARGING:
-		dev->battery_charge_status = value ?
-					     POWER_SUPPLY_STATUS_CHARGING :
-					     POWER_SUPPLY_STATUS_DISCHARGING;
+		bat->charge_status = value ?
+				     POWER_SUPPLY_STATUS_CHARGING :
+				     POWER_SUPPLY_STATUS_DISCHARGING;
 		return true;
 	}
 
@@ -609,34 +623,35 @@ static bool hidinput_update_battery_charge_status(struct hid_device *dev,
 static void hidinput_update_battery(struct hid_device *dev, unsigned int usage,
 				    int value)
 {
+	struct hid_battery *bat = dev->battery;
 	int capacity;
 
-	if (!dev->battery)
+	if (!bat)
 		return;
 
-	if (hidinput_update_battery_charge_status(dev, usage, value)) {
-		dev->battery_present = true;
-		power_supply_changed(dev->battery);
+	if (hidinput_update_battery_charge_status(bat, usage, value)) {
+		bat->present = true;
+		power_supply_changed(bat->ps);
 		return;
 	}
 
 	if ((usage & HID_USAGE_PAGE) == HID_UP_DIGITIZER && value == 0)
 		return;
 
-	if (value < dev->battery_min || value > dev->battery_max)
+	if (value < bat->min || value > bat->max)
 		return;
 
-	capacity = hidinput_scale_battery_capacity(dev, value);
+	capacity = hidinput_scale_battery_capacity(bat, value);
 
-	if (dev->battery_status != HID_BATTERY_REPORTED ||
-	    capacity != dev->battery_capacity ||
-	    ktime_after(ktime_get_coarse(), dev->battery_ratelimit_time)) {
-		dev->battery_present = true;
-		dev->battery_capacity = capacity;
-		dev->battery_status = HID_BATTERY_REPORTED;
-		dev->battery_ratelimit_time =
+	if (bat->status != HID_BATTERY_REPORTED ||
+	    capacity != bat->capacity ||
+	    ktime_after(ktime_get_coarse(), bat->ratelimit_time)) {
+		bat->present = true;
+		bat->capacity = capacity;
+		bat->status = HID_BATTERY_REPORTED;
+		bat->ratelimit_time =
 			ktime_add_ms(ktime_get_coarse(), 30 * 1000);
-		power_supply_changed(dev->battery);
+		power_supply_changed(bat->ps);
 	}
 }
 #else  /* !CONFIG_HID_BATTERY_STRENGTH */
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index 9eadf3252d0d..e70bd3dc07ab 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -817,19 +817,21 @@ static int magicmouse_fetch_battery(struct hid_device *hdev)
 #ifdef CONFIG_HID_BATTERY_STRENGTH
 	struct hid_report_enum *report_enum;
 	struct hid_report *report;
+	struct hid_battery *bat;
 
-	if (!hdev->battery ||
+	bat = hid_get_battery(hdev);
+	if (!bat ||
 	    (!is_usb_magicmouse2(hdev->vendor, hdev->product) &&
 	     !is_usb_magictrackpad2(hdev->vendor, hdev->product)))
 		return -1;
 
-	report_enum = &hdev->report_enum[hdev->battery_report_type];
-	report = report_enum->report_id_hash[hdev->battery_report_id];
+	report_enum = &hdev->report_enum[bat->report_type];
+	report = report_enum->report_id_hash[bat->report_id];
 
 	if (!report || report->maxfield < 1)
 		return -1;
 
-	if (hdev->battery_capacity == hdev->battery_max)
+	if (bat->capacity == bat->max)
 		return -1;
 
 	hid_hw_request(hdev, report, HID_REQ_GET_REPORT);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 31324609af4d..e4e2a5643bda 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -634,6 +634,36 @@ enum hid_battery_status {
 	HID_BATTERY_REPORTED,		/* Device sent unsolicited battery strength report */
 };
 
+/**
+ * struct hid_battery - represents a single battery power supply
+ * @dev: pointer to the parent hid_device
+ * @ps: the power supply instance
+ * @min: minimum battery value from HID descriptor
+ * @max: maximum battery value from HID descriptor
+ * @report_type: HID report type (input/feature)
+ * @report_id: HID report ID for this battery
+ * @charge_status: current charging status
+ * @status: battery reporting status
+ * @capacity: current battery capacity (0-100)
+ * @avoid_query: if true, avoid querying battery (e.g., for stylus)
+ * @present: if true, battery is present (may be dynamic)
+ * @ratelimit_time: rate limiting for battery reports
+ */
+struct hid_battery {
+	struct hid_device *dev;
+	struct power_supply *ps;
+	__s32 min;
+	__s32 max;
+	__s32 report_type;
+	__s32 report_id;
+	__s32 charge_status;
+	enum hid_battery_status status;
+	__s32 capacity;
+	bool avoid_query;
+	bool present;
+	ktime_t ratelimit_time;
+};
+
 struct hid_driver;
 struct hid_ll_driver;
 
@@ -670,20 +700,9 @@ struct hid_device {
 #ifdef CONFIG_HID_BATTERY_STRENGTH
 	/*
 	 * Power supply information for HID devices which report
-	 * battery strength. power_supply was successfully registered if
-	 * battery is non-NULL.
+	 * battery strength. battery is non-NULL if successfully registered.
 	 */
-	struct power_supply *battery;
-	__s32 battery_capacity;
-	__s32 battery_min;
-	__s32 battery_max;
-	__s32 battery_report_type;
-	__s32 battery_report_id;
-	__s32 battery_charge_status;
-	enum hid_battery_status battery_status;
-	bool battery_avoid_query;
-	bool battery_present;
-	ktime_t battery_ratelimit_time;
+	struct hid_battery *battery;
 #endif
 
 	unsigned long status;						/* see STAT flags above */
@@ -744,6 +763,13 @@ static inline void hid_set_drvdata(struct hid_device *hdev, void *data)
 	dev_set_drvdata(&hdev->dev, data);
 }
 
+#ifdef CONFIG_HID_BATTERY_STRENGTH
+static inline struct hid_battery *hid_get_battery(struct hid_device *hdev)
+{
+	return hdev->battery;
+}
+#endif
+
 #define HID_GLOBAL_STACK_SIZE 4
 #define HID_COLLECTION_STACK_SIZE 4
 
-- 
cgit v1.2.3


From 4a58ae85c3f9b142ffba023d0f976978ade57d1b Mon Sep 17 00:00:00 2001
From: Lucas Zampieri <lcasmz54@gmail.com>
Date: Sat, 14 Mar 2026 01:05:30 +0000
Subject: HID: input: Add support for multiple batteries per device

Add support for HID devices that report multiple batteries, each
identified by its report ID.

The hid_device->battery pointer is replaced with a batteries list.
Batteries are named using the pattern hid-{uniq}-battery-{report_id}.
The hid_get_battery() helper returns the first battery in the list for
backwards compatibility with single-battery drivers.

Signed-off-by: Lucas Zampieri <lcasmz54@gmail.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 drivers/hid/hid-core.c  |  4 ++++
 drivers/hid/hid-input.c | 44 ++++++++++++++++++++++++++++++--------------
 include/linux/hid.h     | 11 ++++++++---
 3 files changed, 42 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 833df14ef68f..deb6239e375c 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2991,6 +2991,10 @@ struct hid_device *hid_allocate_device(void)
 	mutex_init(&hdev->ll_open_lock);
 	kref_init(&hdev->ref);
 
+#ifdef CONFIG_HID_BATTERY_STRENGTH
+	INIT_LIST_HEAD(&hdev->batteries);
+#endif
+
 	ret = hid_bpf_device_init(hdev);
 	if (ret)
 		goto out_err;
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index b5d34658b68d..8fff185fe0e6 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -507,6 +507,18 @@ static int hidinput_get_battery_property(struct power_supply *psy,
 	return ret;
 }
 
+static struct hid_battery *hidinput_find_battery(struct hid_device *dev,
+						 int report_id)
+{
+	struct hid_battery *bat;
+
+	list_for_each_entry(bat, &dev->batteries, list) {
+		if (bat->report_id == report_id)
+			return bat;
+	}
+	return NULL;
+}
+
 static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
 				  struct hid_field *field, bool is_percentage)
 {
@@ -517,13 +529,15 @@ static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
 	s32 min, max;
 	int error;
 
-	if (dev->battery)
-		return 0;	/* already initialized? */
+	/* Check if battery for this report ID already exists */
+	if (hidinput_find_battery(dev, field->report->id))
+		return 0;
 
 	quirks = find_battery_quirk(dev);
 
-	hid_dbg(dev, "device %x:%x:%x %d quirks %d\n",
-		dev->bus, dev->vendor, dev->product, dev->version, quirks);
+	hid_dbg(dev, "device %x:%x:%x %d quirks %d report_id %d\n",
+		dev->bus, dev->vendor, dev->product, dev->version, quirks,
+		field->report->id);
 
 	if (quirks & HID_BATTERY_QUIRK_IGNORE)
 		return 0;
@@ -538,9 +552,11 @@ static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
 		goto err_free_bat;
 	}
 
-	psy_desc->name = devm_kasprintf(&dev->dev, GFP_KERNEL, "hid-%s-battery",
+	psy_desc->name = devm_kasprintf(&dev->dev, GFP_KERNEL,
+					"hid-%s-battery-%d",
 					strlen(dev->uniq) ?
-						dev->uniq : dev_name(&dev->dev));
+						dev->uniq : dev_name(&dev->dev),
+					field->report->id);
 	if (!psy_desc->name) {
 		error = -ENOMEM;
 		goto err_free_desc;
@@ -593,7 +609,7 @@ static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
 	}
 
 	power_supply_powers(bat->ps, &dev->dev);
-	dev->battery = bat;
+	list_add_tail(&bat->list, &dev->batteries);
 	return 0;
 
 err_free_name:
@@ -602,7 +618,6 @@ err_free_desc:
 	devm_kfree(&dev->dev, psy_desc);
 err_free_bat:
 	devm_kfree(&dev->dev, bat);
-	dev->battery = NULL;
 	return error;
 }
 
@@ -620,12 +635,13 @@ static bool hidinput_update_battery_charge_status(struct hid_battery *bat,
 	return false;
 }
 
-static void hidinput_update_battery(struct hid_device *dev, unsigned int usage,
-				    int value)
+static void hidinput_update_battery(struct hid_device *dev, int report_id,
+				    unsigned int usage, int value)
 {
-	struct hid_battery *bat = dev->battery;
+	struct hid_battery *bat;
 	int capacity;
 
+	bat = hidinput_find_battery(dev, report_id);
 	if (!bat)
 		return;
 
@@ -661,8 +677,8 @@ static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
 	return 0;
 }
 
-static void hidinput_update_battery(struct hid_device *dev, unsigned int usage,
-				    int value)
+static void hidinput_update_battery(struct hid_device *dev, int report_id,
+				    unsigned int usage, int value)
 {
 }
 #endif	/* CONFIG_HID_BATTERY_STRENGTH */
@@ -1546,7 +1562,7 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct
 		return;
 
 	if (usage->type == EV_PWR) {
-		hidinput_update_battery(hid, usage->hid, value);
+		hidinput_update_battery(hid, report->id, usage->hid, value);
 		return;
 	}
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index e4e2a5643bda..fb1a3f3ad9fa 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -648,6 +648,7 @@ enum hid_battery_status {
  * @avoid_query: if true, avoid querying battery (e.g., for stylus)
  * @present: if true, battery is present (may be dynamic)
  * @ratelimit_time: rate limiting for battery reports
+ * @list: list node for linking into hid_device's battery list
  */
 struct hid_battery {
 	struct hid_device *dev;
@@ -662,6 +663,7 @@ struct hid_battery {
 	bool avoid_query;
 	bool present;
 	ktime_t ratelimit_time;
+	struct list_head list;
 };
 
 struct hid_driver;
@@ -700,9 +702,10 @@ struct hid_device {
 #ifdef CONFIG_HID_BATTERY_STRENGTH
 	/*
 	 * Power supply information for HID devices which report
-	 * battery strength. battery is non-NULL if successfully registered.
+	 * battery strength. Each battery is tracked separately in the
+	 * batteries list.
 	 */
-	struct hid_battery *battery;
+	struct list_head batteries;
 #endif
 
 	unsigned long status;						/* see STAT flags above */
@@ -766,7 +769,9 @@ static inline void hid_set_drvdata(struct hid_device *hdev, void *data)
 #ifdef CONFIG_HID_BATTERY_STRENGTH
 static inline struct hid_battery *hid_get_battery(struct hid_device *hdev)
 {
-	return hdev->battery;
+	if (list_empty(&hdev->batteries))
+		return NULL;
+	return list_first_entry(&hdev->batteries, struct hid_battery, list);
 }
 #endif
 
-- 
cgit v1.2.3


From a1e97ce80d9f41d0bb83951d758ff6fe49f3de60 Mon Sep 17 00:00:00 2001
From: Yang Xiuwei <yangxiuwei@kylinos.cn>
Date: Tue, 17 Mar 2026 15:22:25 +0800
Subject: bsg: add io_uring command support to generic layer

Add an io_uring command handler to the generic BSG layer. The new
.uring_cmd file operation validates io_uring features and delegates
handling to a per-queue bsg_uring_cmd_fn callback.

Extend bsg_register_queue() so transport drivers can register both
sg_io and io_uring command handlers.

Signed-off-by: Yang Xiuwei <yangxiuwei@kylinos.cn>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://patch.msgid.link/20260317072226.2598233-3-yangxiuwei@kylinos.cn
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bsg-lib.c         |  2 +-
 block/bsg.c             | 33 ++++++++++++++++++++++++++++++++-
 drivers/scsi/scsi_bsg.c | 10 +++++++++-
 include/linux/bsg.h     |  6 +++++-
 4 files changed, 47 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 20cd0ef3c394..fdb4b290ca68 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -393,7 +393,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
 
 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
-	bset->bd = bsg_register_queue(q, dev, name, bsg_transport_sg_io_fn);
+	bset->bd = bsg_register_queue(q, dev, name, bsg_transport_sg_io_fn, NULL);
 	if (IS_ERR(bset->bd)) {
 		ret = PTR_ERR(bset->bd);
 		goto out_cleanup_queue;
diff --git a/block/bsg.c b/block/bsg.c
index e0af6206ed28..82aaf3cee582 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -12,6 +12,7 @@
 #include <linux/idr.h>
 #include <linux/bsg.h>
 #include <linux/slab.h>
+#include <linux/io_uring/cmd.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_ioctl.h>
@@ -28,6 +29,7 @@ struct bsg_device {
 	unsigned int timeout;
 	unsigned int reserved_size;
 	bsg_sg_io_fn *sg_io_fn;
+	bsg_uring_cmd_fn *uring_cmd_fn;
 };
 
 static inline struct bsg_device *to_bsg_device(struct inode *inode)
@@ -158,11 +160,38 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	}
 }
 
+static int bsg_check_uring_features(unsigned int issue_flags)
+{
+	/* BSG passthrough requires big SQE/CQE support */
+	if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) !=
+	    (IO_URING_F_SQE128|IO_URING_F_CQE32))
+		return -EOPNOTSUPP;
+	return 0;
+}
+
+static int bsg_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
+{
+	struct bsg_device *bd = to_bsg_device(file_inode(ioucmd->file));
+	bool open_for_write = ioucmd->file->f_mode & FMODE_WRITE;
+	struct request_queue *q = bd->queue;
+	int ret;
+
+	ret = bsg_check_uring_features(issue_flags);
+	if (ret)
+		return ret;
+
+	if (!bd->uring_cmd_fn)
+		return -EOPNOTSUPP;
+
+	return bd->uring_cmd_fn(q, ioucmd, issue_flags, open_for_write);
+}
+
 static const struct file_operations bsg_fops = {
 	.open		=	bsg_open,
 	.release	=	bsg_release,
 	.unlocked_ioctl	=	bsg_ioctl,
 	.compat_ioctl	=	compat_ptr_ioctl,
+	.uring_cmd	=	bsg_uring_cmd,
 	.owner		=	THIS_MODULE,
 	.llseek		=	default_llseek,
 };
@@ -187,7 +216,8 @@ void bsg_unregister_queue(struct bsg_device *bd)
 EXPORT_SYMBOL_GPL(bsg_unregister_queue);
 
 struct bsg_device *bsg_register_queue(struct request_queue *q,
-		struct device *parent, const char *name, bsg_sg_io_fn *sg_io_fn)
+		struct device *parent, const char *name, bsg_sg_io_fn *sg_io_fn,
+		bsg_uring_cmd_fn *uring_cmd_fn)
 {
 	struct bsg_device *bd;
 	int ret;
@@ -199,6 +229,7 @@ struct bsg_device *bsg_register_queue(struct request_queue *q,
 	bd->reserved_size = INT_MAX;
 	bd->queue = q;
 	bd->sg_io_fn = sg_io_fn;
+	bd->uring_cmd_fn = uring_cmd_fn;
 
 	ret = ida_alloc_max(&bsg_minor_ida, BSG_MAX_DEVS - 1, GFP_KERNEL);
 	if (ret < 0) {
diff --git a/drivers/scsi/scsi_bsg.c b/drivers/scsi/scsi_bsg.c
index a9a9ec086a7e..4d57e524e141 100644
--- a/drivers/scsi/scsi_bsg.c
+++ b/drivers/scsi/scsi_bsg.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/bsg.h>
+#include <linux/io_uring/cmd.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_ioctl.h>
 #include <scsi/scsi_cmnd.h>
@@ -9,6 +10,12 @@
 
 #define uptr64(val) ((void __user *)(uintptr_t)(val))
 
+static int scsi_bsg_uring_cmd(struct request_queue *q, struct io_uring_cmd *ioucmd,
+			       unsigned int issue_flags, bool open_for_write)
+{
+	return -EOPNOTSUPP;
+}
+
 static int scsi_bsg_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr,
 		bool open_for_write, unsigned int timeout)
 {
@@ -99,5 +106,6 @@ out_put_request:
 struct bsg_device *scsi_bsg_register_queue(struct scsi_device *sdev)
 {
 	return bsg_register_queue(sdev->request_queue, &sdev->sdev_gendev,
-			dev_name(&sdev->sdev_gendev), scsi_bsg_sg_io_fn);
+			dev_name(&sdev->sdev_gendev), scsi_bsg_sg_io_fn,
+			scsi_bsg_uring_cmd);
 }
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index ee2df73edf83..162730bfc2d8 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -7,13 +7,17 @@
 struct bsg_device;
 struct device;
 struct request_queue;
+struct io_uring_cmd;
 
 typedef int (bsg_sg_io_fn)(struct request_queue *, struct sg_io_v4 *hdr,
 		bool open_for_write, unsigned int timeout);
 
+typedef int (bsg_uring_cmd_fn)(struct request_queue *q, struct io_uring_cmd *ioucmd,
+			       unsigned int issue_flags, bool open_for_write);
+
 struct bsg_device *bsg_register_queue(struct request_queue *q,
 		struct device *parent, const char *name,
-		bsg_sg_io_fn *sg_io_fn);
+		bsg_sg_io_fn *sg_io_fn, bsg_uring_cmd_fn *uring_cmd_fn);
 void bsg_unregister_queue(struct bsg_device *bcd);
 
 #endif /* _LINUX_BSG_H */
-- 
cgit v1.2.3


From f656807150e3e1c6f76cab918e5adfad6d881d58 Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <Sascha.Bischoff@arm.com>
Date: Thu, 19 Mar 2026 15:52:34 +0000
Subject: KVM: arm64: gic-v5: Detect implemented PPIs on boot

As part of booting the system and initialising KVM, create and
populate a mask of the implemented PPIs. This mask allows future PPI
operations (such as save/restore or state, or syncing back into the
shadow state) to only consider PPIs that are actually implemented on
the host.

The set of implemented virtual PPIs matches the set of implemented
physical PPIs for a GICv5 host. Therefore, this mask represents all
PPIs that could ever by used by a GICv5-based guest on a specific
host, albeit pre-filtered by what we support in KVM (see next
paragraph).

Only architected PPIs are currently supported in KVM with
GICv5. Moreover, as KVM only supports a subset of all possible PPIS
(Timers, PMU, GICv5 SW_PPI) the PPI mask only includes these PPIs, if
present. The timers are always assumed to be present; if we have KVM
we have EL2, which means that we have the EL1 & EL2 Timer PPIs. If we
have a PMU (v3), then the PMUIRQ is present. The GICv5 SW_PPI is
always assumed to be present.

Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260319154937.3619520-12-sascha.bischoff@arm.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/vgic/vgic-v5.c      | 31 +++++++++++++++++++++++++++++++
 include/kvm/arm_vgic.h             | 13 +++++++++++++
 include/linux/irqchip/arm-gic-v5.h | 22 ++++++++++++++++++++++
 3 files changed, 66 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
index 9d9aa5774e63..cf8382a954bb 100644
--- a/arch/arm64/kvm/vgic/vgic-v5.c
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -4,10 +4,39 @@
  */
 
 #include <kvm/arm_vgic.h>
+
+#include <linux/bitops.h>
 #include <linux/irqchip/arm-vgic-info.h>
 
 #include "vgic.h"
 
+static struct vgic_v5_ppi_caps ppi_caps;
+
+/*
+ * Not all PPIs are guaranteed to be implemented for GICv5. Deterermine which
+ * ones are, and generate a mask.
+ */
+static void vgic_v5_get_implemented_ppis(void)
+{
+	if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
+		return;
+
+	/*
+	 * If we have KVM, we have EL2, which means that we have support for the
+	 * EL1 and EL2 Physical & Virtual timers.
+	 */
+	__assign_bit(GICV5_ARCH_PPI_CNTHP, ppi_caps.impl_ppi_mask, 1);
+	__assign_bit(GICV5_ARCH_PPI_CNTV, ppi_caps.impl_ppi_mask, 1);
+	__assign_bit(GICV5_ARCH_PPI_CNTHV, ppi_caps.impl_ppi_mask, 1);
+	__assign_bit(GICV5_ARCH_PPI_CNTP, ppi_caps.impl_ppi_mask, 1);
+
+	/* The SW_PPI should be available */
+	__assign_bit(GICV5_ARCH_PPI_SW_PPI, ppi_caps.impl_ppi_mask, 1);
+
+	/* The PMUIRQ is available if we have the PMU */
+	__assign_bit(GICV5_ARCH_PPI_PMUIRQ, ppi_caps.impl_ppi_mask, system_supports_pmuv3());
+}
+
 /*
  * Probe for a vGICv5 compatible interrupt controller, returning 0 on success.
  * Currently only supports GICv3-based VMs on a GICv5 host, and hence only
@@ -18,6 +47,8 @@ int vgic_v5_probe(const struct gic_kvm_info *info)
 	u64 ich_vtr_el2;
 	int ret;
 
+	vgic_v5_get_implemented_ppis();
+
 	if (!cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY))
 		return -ENODEV;
 
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index b8011b395796..0fabeabedd6d 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -32,6 +32,14 @@
 #define VGIC_MIN_LPI		8192
 #define KVM_IRQCHIP_NUM_PINS	(1020 - 32)
 
+/*
+ * GICv5 supports 128 PPIs, but only the first 64 are architected. We only
+ * support the timers and PMU in KVM, both of which are architected. Rather than
+ * handling twice the state, we instead opt to only support the architected set
+ * in KVM for now. At a future stage, this can be bumped up to 128, if required.
+ */
+#define VGIC_V5_NR_PRIVATE_IRQS	64
+
 #define is_v5_type(t, i)	(FIELD_GET(GICV5_HWIRQ_TYPE, (i)) == (t))
 
 #define __irq_is_sgi(t, i)						\
@@ -420,6 +428,11 @@ struct vgic_v3_cpu_if {
 	unsigned int used_lrs;
 };
 
+/* What PPI capabilities does a GICv5 host have */
+struct vgic_v5_ppi_caps {
+	DECLARE_BITMAP(impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
+};
+
 struct vgic_cpu {
 	/* CPU vif control registers for world switch */
 	union {
diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h
index b78488df6c98..b1566a7c93ec 100644
--- a/include/linux/irqchip/arm-gic-v5.h
+++ b/include/linux/irqchip/arm-gic-v5.h
@@ -24,6 +24,28 @@
 #define GICV5_HWIRQ_TYPE_LPI		UL(0x2)
 #define GICV5_HWIRQ_TYPE_SPI		UL(0x3)
 
+/*
+ * Architected PPIs
+ */
+#define GICV5_ARCH_PPI_S_DB_PPI		0x0
+#define GICV5_ARCH_PPI_RL_DB_PPI	0x1
+#define GICV5_ARCH_PPI_NS_DB_PPI	0x2
+#define GICV5_ARCH_PPI_SW_PPI		0x3
+#define GICV5_ARCH_PPI_HACDBSIRQ	0xf
+#define GICV5_ARCH_PPI_CNTHVS		0x13
+#define GICV5_ARCH_PPI_CNTHPS		0x14
+#define GICV5_ARCH_PPI_PMBIRQ		0x15
+#define GICV5_ARCH_PPI_COMMIRQ		0x16
+#define GICV5_ARCH_PPI_PMUIRQ		0x17
+#define GICV5_ARCH_PPI_CTIIRQ		0x18
+#define GICV5_ARCH_PPI_GICMNT		0x19
+#define GICV5_ARCH_PPI_CNTHP		0x1a
+#define GICV5_ARCH_PPI_CNTV		0x1b
+#define GICV5_ARCH_PPI_CNTHV		0x1c
+#define GICV5_ARCH_PPI_CNTPS		0x1d
+#define GICV5_ARCH_PPI_CNTP		0x1e
+#define GICV5_ARCH_PPI_TRBIRQ		0x1f
+
 /*
  * Tables attributes
  */
-- 
cgit v1.2.3


From 9b8e3d4ca0e734dd13dc261c5f888b359f8f5983 Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <Sascha.Bischoff@arm.com>
Date: Thu, 19 Mar 2026 15:54:08 +0000
Subject: KVM: arm64: gic-v5: Implement GICv5 load/put and save/restore

This change introduces GICv5 load/put. Additionally, it plumbs in
save/restore for:

* PPIs (ICH_PPI_x_EL2 regs)
* ICH_VMCR_EL2
* ICH_APR_EL2
* ICC_ICSR_EL1

A GICv5-specific enable bit is added to struct vgic_vmcr as this
differs from previous GICs. On GICv5-native systems, the VMCR only
contains the enable bit (driven by the guest via ICC_CR0_EL1.EN) and
the priority mask (PCR).

A struct gicv5_vpe is also introduced. This currently only contains a
single field - bool resident - which is used to track if a VPE is
currently running or not, and is used to avoid a case of double load
or double put on the WFI path for a vCPU. This struct will be extended
as additional GICv5 support is merged, specifically for VPE doorbells.

Co-authored-by: Timothy Hayes <timothy.hayes@arm.com>
Signed-off-by: Timothy Hayes <timothy.hayes@arm.com>
Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260319154937.3619520-18-sascha.bischoff@arm.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/hyp/nvhe/switch.c   | 12 +++++++
 arch/arm64/kvm/vgic/vgic-mmio.c    | 40 +++++++++++++++++----
 arch/arm64/kvm/vgic/vgic-v5.c      | 74 ++++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/vgic/vgic.c         | 74 ++++++++++++++++++++++++++++++--------
 arch/arm64/kvm/vgic/vgic.h         |  7 ++++
 include/kvm/arm_vgic.h             |  2 ++
 include/linux/irqchip/arm-gic-v5.h |  5 +++
 7 files changed, 193 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index b41485ce295a..a88da302b6d0 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -113,6 +113,12 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
 /* Save VGICv3 state on non-VHE systems */
 static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
 {
+	if (vgic_is_v5(kern_hyp_va(vcpu->kvm))) {
+		__vgic_v5_save_state(&vcpu->arch.vgic_cpu.vgic_v5);
+		__vgic_v5_save_ppi_state(&vcpu->arch.vgic_cpu.vgic_v5);
+		return;
+	}
+
 	if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
 		__vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
 		__vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
@@ -122,6 +128,12 @@ static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
 /* Restore VGICv3 state on non-VHE systems */
 static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
 {
+	if (vgic_is_v5(kern_hyp_va(vcpu->kvm))) {
+		__vgic_v5_restore_state(&vcpu->arch.vgic_cpu.vgic_v5);
+		__vgic_v5_restore_ppi_state(&vcpu->arch.vgic_cpu.vgic_v5);
+		return;
+	}
+
 	if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
 		__vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
 		__vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index a573b1f0c6cb..74d76dec9730 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -842,18 +842,46 @@ vgic_find_mmio_region(const struct vgic_register_region *regions,
 
 void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
 {
-	if (kvm_vgic_global_state.type == VGIC_V2)
-		vgic_v2_set_vmcr(vcpu, vmcr);
-	else
+	const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	switch (dist->vgic_model) {
+	case KVM_DEV_TYPE_ARM_VGIC_V5:
+		vgic_v5_set_vmcr(vcpu, vmcr);
+		break;
+	case KVM_DEV_TYPE_ARM_VGIC_V3:
 		vgic_v3_set_vmcr(vcpu, vmcr);
+		break;
+	case KVM_DEV_TYPE_ARM_VGIC_V2:
+		if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+			vgic_v3_set_vmcr(vcpu, vmcr);
+		else
+			vgic_v2_set_vmcr(vcpu, vmcr);
+		break;
+	default:
+		BUG();
+	}
 }
 
 void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
 {
-	if (kvm_vgic_global_state.type == VGIC_V2)
-		vgic_v2_get_vmcr(vcpu, vmcr);
-	else
+	const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	switch (dist->vgic_model) {
+	case KVM_DEV_TYPE_ARM_VGIC_V5:
+		vgic_v5_get_vmcr(vcpu, vmcr);
+		break;
+	case KVM_DEV_TYPE_ARM_VGIC_V3:
 		vgic_v3_get_vmcr(vcpu, vmcr);
+		break;
+	case KVM_DEV_TYPE_ARM_VGIC_V2:
+		if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+			vgic_v3_get_vmcr(vcpu, vmcr);
+		else
+			vgic_v2_get_vmcr(vcpu, vmcr);
+		break;
+	default:
+		BUG();
+	}
 }
 
 /*
diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
index cf8382a954bb..41317e1d94a2 100644
--- a/arch/arm64/kvm/vgic/vgic-v5.c
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -86,3 +86,77 @@ int vgic_v5_probe(const struct gic_kvm_info *info)
 
 	return 0;
 }
+
+void vgic_v5_load(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+	/*
+	 * On the WFI path, vgic_load is called a second time. The first is when
+	 * scheduling in the vcpu thread again, and the second is when leaving
+	 * WFI. Skip the second instance as it serves no purpose and just
+	 * restores the same state again.
+	 */
+	if (cpu_if->gicv5_vpe.resident)
+		return;
+
+	kvm_call_hyp(__vgic_v5_restore_vmcr_apr, cpu_if);
+
+	cpu_if->gicv5_vpe.resident = true;
+}
+
+void vgic_v5_put(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+	/*
+	 * Do nothing if we're not resident. This can happen in the WFI path
+	 * where we do a vgic_put in the WFI path and again later when
+	 * descheduling the thread. We risk losing VMCR state if we sync it
+	 * twice, so instead return early in this case.
+	 */
+	if (!cpu_if->gicv5_vpe.resident)
+		return;
+
+	kvm_call_hyp(__vgic_v5_save_apr, cpu_if);
+
+	cpu_if->gicv5_vpe.resident = false;
+}
+
+void vgic_v5_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+	u64 vmcr = cpu_if->vgic_vmcr;
+
+	vmcrp->en = FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_EN, vmcr);
+	vmcrp->pmr = FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_VPMR, vmcr);
+}
+
+void vgic_v5_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+	u64 vmcr;
+
+	vmcr = FIELD_PREP(FEAT_GCIE_ICH_VMCR_EL2_VPMR, vmcrp->pmr) |
+	       FIELD_PREP(FEAT_GCIE_ICH_VMCR_EL2_EN, vmcrp->en);
+
+	cpu_if->vgic_vmcr = vmcr;
+}
+
+void vgic_v5_restore_state(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+	__vgic_v5_restore_state(cpu_if);
+	__vgic_v5_restore_ppi_state(cpu_if);
+	dsb(sy);
+}
+
+void vgic_v5_save_state(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+	__vgic_v5_save_state(cpu_if);
+	__vgic_v5_save_ppi_state(cpu_if);
+	dsb(sy);
+}
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 2f3f892cbddc..84199d2df80a 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -1017,7 +1017,10 @@ static inline bool can_access_vgic_from_kernel(void)
 
 static inline void vgic_save_state(struct kvm_vcpu *vcpu)
 {
-	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+	/* No switch statement here. See comment in vgic_restore_state() */
+	if (vgic_is_v5(vcpu->kvm))
+		vgic_v5_save_state(vcpu);
+	else if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 		vgic_v2_save_state(vcpu);
 	else
 		__vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
@@ -1026,14 +1029,16 @@ static inline void vgic_save_state(struct kvm_vcpu *vcpu)
 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
-	/* If nesting, emulate the HW effect from L0 to L1 */
-	if (vgic_state_is_nested(vcpu)) {
-		vgic_v3_sync_nested(vcpu);
-		return;
-	}
+	if (vgic_is_v3(vcpu->kvm)) {
+		/* If nesting, emulate the HW effect from L0 to L1 */
+		if (vgic_state_is_nested(vcpu)) {
+			vgic_v3_sync_nested(vcpu);
+			return;
+		}
 
-	if (vcpu_has_nv(vcpu))
-		vgic_v3_nested_update_mi(vcpu);
+		if (vcpu_has_nv(vcpu))
+			vgic_v3_nested_update_mi(vcpu);
+	}
 
 	if (can_access_vgic_from_kernel())
 		vgic_save_state(vcpu);
@@ -1055,7 +1060,18 @@ void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu)
 
 static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
 {
-	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+	/*
+	 * As nice as it would be to restructure this code into a switch
+	 * statement as can be found elsewhere, the logic quickly gets ugly.
+	 *
+	 * __vgic_v3_restore_state() is doing a lot of heavy lifting here. It is
+	 * required for GICv3-on-GICv3, GICv2-on-GICv3, GICv3-on-GICv5, and the
+	 * no-in-kernel-irqchip case on GICv3 hardware. Hence, adding a switch
+	 * here results in much more complex code.
+	 */
+	if (vgic_is_v5(vcpu->kvm))
+		vgic_v5_restore_state(vcpu);
+	else if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 		vgic_v2_restore_state(vcpu);
 	else
 		__vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
@@ -1109,30 +1125,58 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 
 void kvm_vgic_load(struct kvm_vcpu *vcpu)
 {
+	const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
 	if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
 		if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 			__vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
 		return;
 	}
 
-	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
-		vgic_v2_load(vcpu);
-	else
+	switch (dist->vgic_model) {
+	case KVM_DEV_TYPE_ARM_VGIC_V5:
+		vgic_v5_load(vcpu);
+		break;
+	case KVM_DEV_TYPE_ARM_VGIC_V3:
 		vgic_v3_load(vcpu);
+		break;
+	case KVM_DEV_TYPE_ARM_VGIC_V2:
+		if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+			vgic_v3_load(vcpu);
+		else
+			vgic_v2_load(vcpu);
+		break;
+	default:
+		BUG();
+	}
 }
 
 void kvm_vgic_put(struct kvm_vcpu *vcpu)
 {
+	const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
 	if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
 		if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 			__vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
 		return;
 	}
 
-	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
-		vgic_v2_put(vcpu);
-	else
+	switch (dist->vgic_model) {
+	case KVM_DEV_TYPE_ARM_VGIC_V5:
+		vgic_v5_put(vcpu);
+		break;
+	case KVM_DEV_TYPE_ARM_VGIC_V3:
 		vgic_v3_put(vcpu);
+		break;
+	case KVM_DEV_TYPE_ARM_VGIC_V2:
+		if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+			vgic_v3_put(vcpu);
+		else
+			vgic_v2_put(vcpu);
+		break;
+	default:
+		BUG();
+	}
 }
 
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 7b7eed69d797..cc487a69d038 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -187,6 +187,7 @@ static inline u64 vgic_ich_hcr_trap_bits(void)
  * registers regardless of the hardware backed GIC used.
  */
 struct vgic_vmcr {
+	u32	en; /* GICv5-specific */
 	u32	grpen0;
 	u32	grpen1;
 
@@ -363,6 +364,12 @@ void vgic_debug_init(struct kvm *kvm);
 void vgic_debug_destroy(struct kvm *kvm);
 
 int vgic_v5_probe(const struct gic_kvm_info *info);
+void vgic_v5_load(struct kvm_vcpu *vcpu);
+void vgic_v5_put(struct kvm_vcpu *vcpu);
+void vgic_v5_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+void vgic_v5_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+void vgic_v5_restore_state(struct kvm_vcpu *vcpu);
+void vgic_v5_save_state(struct kvm_vcpu *vcpu);
 
 static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
 {
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 07e394690dcc..b27bfc463a31 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -447,6 +447,8 @@ struct vgic_v5_cpu_if {
 	 * it is the hyp's responsibility to keep the state constistent.
 	 */
 	u64	vgic_icsr;
+
+	struct gicv5_vpe gicv5_vpe;
 };
 
 /* What PPI capabilities does a GICv5 host have */
diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h
index b1566a7c93ec..40d2fce68294 100644
--- a/include/linux/irqchip/arm-gic-v5.h
+++ b/include/linux/irqchip/arm-gic-v5.h
@@ -387,6 +387,11 @@ int gicv5_spi_irq_set_type(struct irq_data *d, unsigned int type);
 int gicv5_irs_iste_alloc(u32 lpi);
 void gicv5_irs_syncr(void);
 
+/* Embedded in kvm.arch */
+struct gicv5_vpe {
+	bool			resident;
+};
+
 struct gicv5_its_devtab_cfg {
 	union {
 		struct {
-- 
cgit v1.2.3


From 37a25294682d28ef3bd131566602450a72c4d839 Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <Sascha.Bischoff@arm.com>
Date: Thu, 19 Mar 2026 15:58:48 +0000
Subject: KVM: arm64: gic-v5: Introduce kvm_arm_vgic_v5_ops and register them

Only the KVM_DEV_ARM_VGIC_GRP_CTRL->KVM_DEV_ARM_VGIC_CTRL_INIT op is
currently supported. All other ops are stubbed out.

Co-authored-by: Timothy Hayes <timothy.hayes@arm.com>
Signed-off-by: Timothy Hayes <timothy.hayes@arm.com>
Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260319154937.3619520-36-sascha.bischoff@arm.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/vgic/vgic-kvm-device.c | 74 +++++++++++++++++++++++++++++++++++
 include/linux/kvm_host.h              |  1 +
 2 files changed, 75 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index b12ba99a423e..772da54c1518 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -336,6 +336,10 @@ int kvm_register_vgic_device(unsigned long type)
 			break;
 		ret = kvm_vgic_register_its_device();
 		break;
+	case KVM_DEV_TYPE_ARM_VGIC_V5:
+		ret = kvm_register_device_ops(&kvm_arm_vgic_v5_ops,
+					      KVM_DEV_TYPE_ARM_VGIC_V5);
+		break;
 	}
 
 	return ret;
@@ -715,3 +719,73 @@ struct kvm_device_ops kvm_arm_vgic_v3_ops = {
 	.get_attr = vgic_v3_get_attr,
 	.has_attr = vgic_v3_has_attr,
 };
+
+static int vgic_v5_set_attr(struct kvm_device *dev,
+			    struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR:
+	case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+		return -ENXIO;
+	case KVM_DEV_ARM_VGIC_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_ARM_VGIC_CTRL_INIT:
+			return vgic_set_common_attr(dev, attr);
+		default:
+			return -ENXIO;
+		}
+	default:
+		return -ENXIO;
+	}
+
+}
+
+static int vgic_v5_get_attr(struct kvm_device *dev,
+			    struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR:
+	case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+		return -ENXIO;
+	case KVM_DEV_ARM_VGIC_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_ARM_VGIC_CTRL_INIT:
+			return vgic_get_common_attr(dev, attr);
+		default:
+			return -ENXIO;
+		}
+	default:
+		return -ENXIO;
+	}
+}
+
+static int vgic_v5_has_attr(struct kvm_device *dev,
+			    struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR:
+	case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+		return -ENXIO;
+	case KVM_DEV_ARM_VGIC_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_ARM_VGIC_CTRL_INIT:
+			return 0;
+		default:
+			return -ENXIO;
+		}
+	default:
+		return -ENXIO;
+	}
+}
+
+struct kvm_device_ops kvm_arm_vgic_v5_ops = {
+	.name = "kvm-arm-vgic-v5",
+	.create = vgic_create,
+	.destroy = vgic_destroy,
+	.set_attr = vgic_v5_set_attr,
+	.get_attr = vgic_v5_get_attr,
+	.has_attr = vgic_v5_has_attr,
+};
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6b76e7a6f4c2..779d9ed85cbf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2366,6 +2366,7 @@ void kvm_unregister_device_ops(u32 type);
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
+extern struct kvm_device_ops kvm_arm_vgic_v5_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
-- 
cgit v1.2.3


From 50ff3f404617c5d15832fec3711978104c4c9efd Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Tue, 17 Mar 2026 18:17:49 +0200
Subject: vfio: Add support for VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2

Currently, existing VFIO_MIG_GET_PRECOPY_INFO implementations don't
assign info.flags before copy_to_user().

Because they copy the struct in from userspace first, this effectively
echoes userspace-provided flags back as output, preventing the field
from being used to report new reliable data from the drivers.

Add support for a new device feature named
VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2.

On SET, enables the v2 pre_copy_info behaviour, where the
vfio_precopy_info.flags is a valid output field.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20260317161753.18964-3-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex@shazbot.org>
---
 drivers/vfio/vfio_main.c | 21 +++++++++++++++++++++
 include/linux/vfio.h     |  1 +
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 742477546b15..8666f35fb3f0 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -553,6 +553,7 @@ static void vfio_df_device_last_close(struct vfio_device_file *df)
 		vfio_df_iommufd_unbind(df);
 	else
 		vfio_device_group_unuse_iommu(device);
+	device->precopy_info_v2 = 0;
 	module_put(device->dev->driver->owner);
 }
 
@@ -964,6 +965,23 @@ vfio_ioctl_device_feature_migration_data_size(struct vfio_device *device,
 	return 0;
 }
 
+static int
+vfio_ioctl_device_feature_migration_precopy_info_v2(struct vfio_device *device,
+						    u32 flags, size_t argsz)
+{
+	int ret;
+
+	if (!(device->migration_flags & VFIO_MIGRATION_PRE_COPY))
+		return -EINVAL;
+
+	ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0);
+	if (ret != 1)
+		return ret;
+
+	device->precopy_info_v2 = 1;
+	return 0;
+}
+
 static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
 					       u32 flags, void __user *arg,
 					       size_t argsz)
@@ -1251,6 +1269,9 @@ static int vfio_ioctl_device_feature(struct vfio_device *device,
 		return vfio_ioctl_device_feature_migration_data_size(
 			device, feature.flags, arg->data,
 			feature.argsz - minsz);
+	case VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2:
+		return vfio_ioctl_device_feature_migration_precopy_info_v2(
+			device, feature.flags, feature.argsz - minsz);
 	default:
 		if (unlikely(!device->ops->device_feature))
 			return -ENOTTY;
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e90859956514..7c1d33283e04 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -52,6 +52,7 @@ struct vfio_device {
 	struct vfio_device_set *dev_set;
 	struct list_head dev_set_list;
 	unsigned int migration_flags;
+	u8 precopy_info_v2;
 	struct kvm *kvm;
 
 	/* Members below here are private, not for driver use */
-- 
cgit v1.2.3


From c995498636c704641c9e809c31b59445b48f7adc Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Tue, 17 Mar 2026 18:17:50 +0200
Subject: vfio: Adapt drivers to use the core helper vfio_check_precopy_ioctl

Introduce a core helper function for VFIO_MIG_GET_PRECOPY_INFO and adapt
all drivers to use it.

It centralizes the common code and ensures that output flags are cleared
on entry, in case user opts in to VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2.
This preventing any unintended echoing of userspace data back to
userspace.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20260317161753.18964-4-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex@shazbot.org>
---
 drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 17 ++++-------
 drivers/vfio/pci/mlx5/main.c                   | 18 ++++--------
 drivers/vfio/pci/qat/main.c                    | 17 ++++-------
 drivers/vfio/pci/virtio/migrate.c              | 17 ++++-------
 include/linux/vfio.h                           | 39 ++++++++++++++++++++++++++
 samples/vfio-mdev/mtty.c                       | 16 ++++-------
 6 files changed, 68 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 1d367cff7dcf..bb121f635b9f 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -857,18 +857,12 @@ static long hisi_acc_vf_precopy_ioctl(struct file *filp,
 	struct hisi_acc_vf_core_device *hisi_acc_vdev = migf->hisi_acc_vdev;
 	loff_t *pos = &filp->f_pos;
 	struct vfio_precopy_info info;
-	unsigned long minsz;
 	int ret;
 
-	if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
-		return -ENOTTY;
-
-	minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
-
-	if (copy_from_user(&info, (void __user *)arg, minsz))
-		return -EFAULT;
-	if (info.argsz < minsz)
-		return -EINVAL;
+	ret = vfio_check_precopy_ioctl(&hisi_acc_vdev->core_device.vdev, cmd,
+				       arg, &info);
+	if (ret)
+		return ret;
 
 	mutex_lock(&hisi_acc_vdev->state_mutex);
 	if (hisi_acc_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY) {
@@ -893,7 +887,8 @@ static long hisi_acc_vf_precopy_ioctl(struct file *filp,
 	mutex_unlock(&migf->lock);
 	mutex_unlock(&hisi_acc_vdev->state_mutex);
 
-	return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
+	return copy_to_user((void __user *)arg, &info,
+		offsetofend(struct vfio_precopy_info, dirty_bytes)) ? -EFAULT : 0;
 out:
 	mutex_unlock(&migf->lock);
 	mutex_unlock(&hisi_acc_vdev->state_mutex);
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index dbba6173894b..fb541c17c712 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -463,21 +463,14 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
 	struct mlx5_vhca_data_buffer *buf;
 	struct vfio_precopy_info info = {};
 	loff_t *pos = &filp->f_pos;
-	unsigned long minsz;
 	size_t inc_length = 0;
 	bool end_of_data = false;
 	int ret;
 
-	if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
-		return -ENOTTY;
-
-	minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
-
-	if (copy_from_user(&info, (void __user *)arg, minsz))
-		return -EFAULT;
-
-	if (info.argsz < minsz)
-		return -EINVAL;
+	ret = vfio_check_precopy_ioctl(&mvdev->core_device.vdev, cmd, arg,
+				       &info);
+	if (ret)
+		return ret;
 
 	mutex_lock(&mvdev->state_mutex);
 	if (mvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
@@ -545,7 +538,8 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
 
 done:
 	mlx5vf_state_mutex_unlock(mvdev);
-	if (copy_to_user((void __user *)arg, &info, minsz))
+	if (copy_to_user((void __user *)arg, &info,
+			 offsetofend(struct vfio_precopy_info, dirty_bytes)))
 		return -EFAULT;
 	return 0;
 
diff --git a/drivers/vfio/pci/qat/main.c b/drivers/vfio/pci/qat/main.c
index b982d4ae666c..b3a4b7a55696 100644
--- a/drivers/vfio/pci/qat/main.c
+++ b/drivers/vfio/pci/qat/main.c
@@ -121,18 +121,12 @@ static long qat_vf_precopy_ioctl(struct file *filp, unsigned int cmd,
 	struct qat_mig_dev *mig_dev = qat_vdev->mdev;
 	struct vfio_precopy_info info;
 	loff_t *pos = &filp->f_pos;
-	unsigned long minsz;
 	int ret = 0;
 
-	if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
-		return -ENOTTY;
-
-	minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
-
-	if (copy_from_user(&info, (void __user *)arg, minsz))
-		return -EFAULT;
-	if (info.argsz < minsz)
-		return -EINVAL;
+	ret = vfio_check_precopy_ioctl(&qat_vdev->core_device.vdev, cmd, arg,
+				       &info);
+	if (ret)
+		return ret;
 
 	mutex_lock(&qat_vdev->state_mutex);
 	if (qat_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
@@ -160,7 +154,8 @@ out:
 	mutex_unlock(&qat_vdev->state_mutex);
 	if (ret)
 		return ret;
-	return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
+	return copy_to_user((void __user *)arg, &info,
+		offsetofend(struct vfio_precopy_info, dirty_bytes)) ? -EFAULT : 0;
 }
 
 static ssize_t qat_vf_save_read(struct file *filp, char __user *buf,
diff --git a/drivers/vfio/pci/virtio/migrate.c b/drivers/vfio/pci/virtio/migrate.c
index 35fa2d6ed611..7e11834ad512 100644
--- a/drivers/vfio/pci/virtio/migrate.c
+++ b/drivers/vfio/pci/virtio/migrate.c
@@ -443,19 +443,13 @@ static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd,
 	struct vfio_precopy_info info = {};
 	loff_t *pos = &filp->f_pos;
 	bool end_of_data = false;
-	unsigned long minsz;
 	u32 ctx_size = 0;
 	int ret;
 
-	if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
-		return -ENOTTY;
-
-	minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
-	if (copy_from_user(&info, (void __user *)arg, minsz))
-		return -EFAULT;
-
-	if (info.argsz < minsz)
-		return -EINVAL;
+	ret = vfio_check_precopy_ioctl(&virtvdev->core_device.vdev, cmd, arg,
+				       &info);
+	if (ret)
+		return ret;
 
 	mutex_lock(&virtvdev->state_mutex);
 	if (virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
@@ -514,7 +508,8 @@ static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd,
 
 done:
 	virtiovf_state_mutex_unlock(virtvdev);
-	if (copy_to_user((void __user *)arg, &info, minsz))
+	if (copy_to_user((void __user *)arg, &info,
+			 offsetofend(struct vfio_precopy_info, dirty_bytes)))
 		return -EFAULT;
 	return 0;
 
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 7c1d33283e04..50b474334a19 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -16,6 +16,7 @@
 #include <linux/cdev.h>
 #include <uapi/linux/vfio.h>
 #include <linux/iova_bitmap.h>
+#include <linux/uaccess.h>
 
 struct kvm;
 struct iommufd_ctx;
@@ -285,6 +286,44 @@ static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops,
 	return 1;
 }
 
+/**
+ * vfio_check_precopy_ioctl - Validate user input for the VFIO_MIG_GET_PRECOPY_INFO ioctl
+ * @vdev: The vfio device
+ * @cmd: Cmd from the ioctl
+ * @arg: Arg from the ioctl
+ * @info: Driver pointer to hold the userspace input to the ioctl
+ *
+ * For use in a driver's get_precopy_info. Checks that the inputs to the
+ * VFIO_MIG_GET_PRECOPY_INFO ioctl are correct.
+
+ * Returns 0 on success, otherwise errno.
+ */
+
+static inline int
+vfio_check_precopy_ioctl(struct vfio_device *vdev, unsigned int cmd,
+			 unsigned long arg, struct vfio_precopy_info *info)
+{
+	unsigned long minsz;
+
+	if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
+		return -ENOTTY;
+
+	minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
+
+	if (copy_from_user(info, (void __user *)arg, minsz))
+		return -EFAULT;
+
+	if (info->argsz < minsz)
+		return -EINVAL;
+
+	/* keep v1 behaviour as is for compatibility reasons */
+	if (vdev->precopy_info_v2)
+		/* flags are output, set its initial value to 0 */
+		info->flags = 0;
+
+	return 0;
+}
+
 struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
 				       const struct vfio_device_ops *ops);
 #define vfio_alloc_device(dev_struct, member, dev, ops)				\
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index 01a9db84c4ab..69b6d9defbce 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -840,18 +840,11 @@ static long mtty_precopy_ioctl(struct file *filp, unsigned int cmd,
 	struct mdev_state *mdev_state = migf->mdev_state;
 	loff_t *pos = &filp->f_pos;
 	struct vfio_precopy_info info = {};
-	unsigned long minsz;
 	int ret;
 
-	if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
-		return -ENOTTY;
-
-	minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
-
-	if (copy_from_user(&info, (void __user *)arg, minsz))
-		return -EFAULT;
-	if (info.argsz < minsz)
-		return -EINVAL;
+	ret = vfio_check_precopy_ioctl(&mdev_state->vdev, cmd, arg, &info);
+	if (ret)
+		return ret;
 
 	mutex_lock(&mdev_state->state_mutex);
 	if (mdev_state->state != VFIO_DEVICE_STATE_PRE_COPY &&
@@ -878,7 +871,8 @@ static long mtty_precopy_ioctl(struct file *filp, unsigned int cmd,
 	info.initial_bytes = migf->filled_size - *pos;
 	mutex_unlock(&migf->lock);
 
-	ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
+	ret = copy_to_user((void __user *)arg, &info,
+		offsetofend(struct vfio_precopy_info, dirty_bytes)) ? -EFAULT : 0;
 unlock:
 	mtty_state_mutex_unlock(mdev_state);
 	return ret;
-- 
cgit v1.2.3


From 4bee09a5dbd14e3369926b14b4ee14e22ebfc1f6 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Tue, 17 Mar 2026 18:17:51 +0200
Subject: net/mlx5: Add IFC bits for migration state

Add the relevant IFC bits for querying an extra migration state from the
device.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20260317161753.18964-5-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex@shazbot.org>
---
 include/linux/mlx5/mlx5_ifc.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 775cb0c56865..1c8922c58c8f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2173,7 +2173,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8	   sf_eq_usage[0x1];
 	u8	   reserved_at_d3[0x5];
 	u8	   multiplane[0x1];
-	u8	   reserved_at_d9[0x7];
+	u8	   migration_state[0x1];
+	u8	   reserved_at_da[0x6];
 
 	u8	   cross_vhca_object_to_object_supported[0x20];
 
@@ -13280,13 +13281,24 @@ struct mlx5_ifc_query_vhca_migration_state_in_bits {
 	u8         reserved_at_60[0x20];
 };
 
+enum {
+	MLX5_QUERY_VHCA_MIG_STATE_UNINITIALIZED = 0x0,
+	MLX5_QUERY_VHCA_MIG_STATE_OPER_MIGRATION_IDLE = 0x1,
+	MLX5_QUERY_VHCA_MIG_STATE_OPER_MIGRATION_READY = 0x2,
+	MLX5_QUERY_VHCA_MIG_STATE_OPER_MIGRATION_DIRTY = 0x3,
+	MLX5_QUERY_VHCA_MIG_STATE_OPER_MIGRATION_INIT = 0x4,
+};
+
 struct mlx5_ifc_query_vhca_migration_state_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_at_40[0x40];
+	u8         reserved_at_40[0x20];
+
+	u8         migration_state[0x4];
+	u8         reserved_at_64[0x1c];
 
 	u8         required_umem_size[0x20];
 
-- 
cgit v1.2.3


From 3fe1dcbc2d20c5dbc581c0bb458e05365bfffcf7 Mon Sep 17 00:00:00 2001
From: Nicholas Sielicki <linux@opensource.nslick.com>
Date: Sat, 7 Mar 2026 03:00:09 -0600
Subject: module: expose imported namespaces via sysfs

Add /sys/module/*/import_ns to expose imported namespaces for
currently loaded modules. The file contains one namespace per line and
only exists for modules that import at least one namespace.

Previously, the only way for userspace to inspect the symbol
namespaces a module imports is to locate the .ko on disk and invoke
modinfo(8) to decompress/parse the metadata. The kernel validated
namespaces at load time, but it was otherwise discarded.

Exposing this data via sysfs provides a runtime mechanism to verify
which namespaces are being used by modules. For example, this allows
userspace to audit driver API access in Android GKI, which uses symbol
namespaces to restrict vendor drivers from using specific kernel
interfaces (e.g., direct filesystem access).

Signed-off-by: Nicholas Sielicki <linux@opensource.nslick.com>
[Sami: Updated the commit message to explain motivation.]
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 Documentation/ABI/testing/sysfs-module |  9 +++++
 include/linux/module.h                 |  1 +
 kernel/module/main.c                   | 69 +++++++++++++++++++++++++++++++++-
 3 files changed, 78 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-module b/Documentation/ABI/testing/sysfs-module
index 6bc9af6229f0..d5b7d19bd310 100644
--- a/Documentation/ABI/testing/sysfs-module
+++ b/Documentation/ABI/testing/sysfs-module
@@ -48,6 +48,15 @@ Contact:	Kay Sievers <kay.sievers@vrfy.org>
 Description:	Show the initialization state(live, coming, going) of
 		the module.
 
+What:		/sys/module/*/import_ns
+Date:		January 2026
+KernelVersion:	7.1
+Contact:	linux-modules@vger.kernel.org
+Description:	List of symbol namespaces imported by this module via
+		MODULE_IMPORT_NS(). Each namespace appears on a separate line.
+		This file only exists for modules that import at least one
+		namespace.
+
 What:		/sys/module/*/taint
 Date:		Jan 2012
 KernelVersion:	3.3
diff --git a/include/linux/module.h b/include/linux/module.h
index 14f391b186c6..60ed1c3e0ed9 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -413,6 +413,7 @@ struct module {
 	struct module_attribute *modinfo_attrs;
 	const char *version;
 	const char *srcversion;
+	const char *imported_namespaces;
 	struct kobject *holders_dir;
 
 	/* Exported symbols */
diff --git a/kernel/module/main.c b/kernel/module/main.c
index ef2e2130972f..fc033137863d 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -607,6 +607,36 @@ static const struct module_attribute modinfo_##field = {              \
 MODINFO_ATTR(version);
 MODINFO_ATTR(srcversion);
 
+static void setup_modinfo_import_ns(struct module *mod, const char *s)
+{
+	mod->imported_namespaces = NULL;
+}
+
+static ssize_t show_modinfo_import_ns(const struct module_attribute *mattr,
+				      struct module_kobject *mk, char *buffer)
+{
+	return sysfs_emit(buffer, "%s\n", mk->mod->imported_namespaces);
+}
+
+static int modinfo_import_ns_exists(struct module *mod)
+{
+	return mod->imported_namespaces != NULL;
+}
+
+static void free_modinfo_import_ns(struct module *mod)
+{
+	kfree(mod->imported_namespaces);
+	mod->imported_namespaces = NULL;
+}
+
+static const struct module_attribute modinfo_import_ns = {
+	.attr = { .name = "import_ns", .mode = 0444 },
+	.show = show_modinfo_import_ns,
+	.setup = setup_modinfo_import_ns,
+	.test = modinfo_import_ns_exists,
+	.free = free_modinfo_import_ns,
+};
+
 static struct {
 	char name[MODULE_NAME_LEN];
 	char taints[MODULE_FLAGS_BUF_SIZE];
@@ -1058,6 +1088,7 @@ const struct module_attribute *const modinfo_attrs[] = {
 	&module_uevent,
 	&modinfo_version,
 	&modinfo_srcversion,
+	&modinfo_import_ns,
 	&modinfo_initstate,
 	&modinfo_coresize,
 #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
@@ -1760,11 +1791,43 @@ static void module_license_taint_check(struct module *mod, const char *license)
 	}
 }
 
+static int copy_modinfo_import_ns(struct module *mod, struct load_info *info)
+{
+	char *ns;
+	size_t len, total_len = 0;
+	char *buf, *p;
+
+	for_each_modinfo_entry(ns, info, "import_ns")
+		total_len += strlen(ns) + 1;
+
+	if (!total_len) {
+		mod->imported_namespaces = NULL;
+		return 0;
+	}
+
+	buf = kmalloc(total_len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	p = buf;
+	for_each_modinfo_entry(ns, info, "import_ns") {
+		len = strlen(ns);
+		memcpy(p, ns, len);
+		p += len;
+		*p++ = '\n';
+	}
+	/* Replace trailing newline with null terminator. */
+	*(p - 1) = '\0';
+
+	mod->imported_namespaces = buf;
+	return 0;
+}
+
 static int setup_modinfo(struct module *mod, struct load_info *info)
 {
 	const struct module_attribute *attr;
 	char *imported_namespace;
-	int i;
+	int i, err;
 
 	for (i = 0; (attr = modinfo_attrs[i]); i++) {
 		if (attr->setup)
@@ -1783,6 +1846,10 @@ static int setup_modinfo(struct module *mod, struct load_info *info)
 		}
 	}
 
+	err = copy_modinfo_import_ns(mod, info);
+	if (err)
+		return err;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From e61af3ca893363838f263f3528476f6e1faed9aa Mon Sep 17 00:00:00 2001
From: Rosen Penev <rosenp@gmail.com>
Date: Wed, 18 Mar 2026 12:10:37 -0700
Subject: hsi: hsi_core: use kzalloc_flex

Simplifies allocations by using a flexible array member in this struct.

Add __counted_by to get extra runtime analysis.

Signed-off-by: Rosen Penev <rosenp@gmail.com>
Link: https://patch.msgid.link/20260318191037.5661-1-rosenp@gmail.com
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/hsi/hsi_core.c  | 37 +++++++++++++++----------------------
 include/linux/hsi/hsi.h |  2 +-
 2 files changed, 16 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hsi/hsi_core.c b/drivers/hsi/hsi_core.c
index 7cb2dcb30fdb..754949f5ebd6 100644
--- a/drivers/hsi/hsi_core.c
+++ b/drivers/hsi/hsi_core.c
@@ -342,7 +342,6 @@ static void hsi_controller_release(struct device *dev)
 {
 	struct hsi_controller *hsi = to_hsi_controller(dev);
 
-	kfree(hsi->port);
 	kfree(hsi);
 }
 
@@ -446,7 +445,7 @@ void hsi_put_controller(struct hsi_controller *hsi)
 		return;
 
 	for (i = 0; i < hsi->num_ports; i++)
-		if (hsi->port && hsi->port[i])
+		if (hsi->port[i])
 			put_device(&hsi->port[i]->device);
 	put_device(&hsi->device);
 }
@@ -462,39 +461,33 @@ EXPORT_SYMBOL_GPL(hsi_put_controller);
 struct hsi_controller *hsi_alloc_controller(unsigned int n_ports, gfp_t flags)
 {
 	struct hsi_controller	*hsi;
-	struct hsi_port		**port;
 	unsigned int		i;
 
 	if (!n_ports)
 		return NULL;
 
-	hsi = kzalloc_obj(*hsi, flags);
+	hsi = kzalloc_flex(*hsi, port, n_ports, flags);
 	if (!hsi)
 		return NULL;
-	port = kzalloc_objs(*port, n_ports, flags);
-	if (!port) {
-		kfree(hsi);
-		return NULL;
-	}
+
 	hsi->num_ports = n_ports;
-	hsi->port = port;
 	hsi->device.release = hsi_controller_release;
 	device_initialize(&hsi->device);
 
 	for (i = 0; i < n_ports; i++) {
-		port[i] = kzalloc_obj(**port, flags);
-		if (port[i] == NULL)
+		hsi->port[i] = kzalloc_obj(**hsi->port, flags);
+		if (hsi->port[i] == NULL)
 			goto out;
-		port[i]->num = i;
-		port[i]->async = hsi_dummy_msg;
-		port[i]->setup = hsi_dummy_cl;
-		port[i]->flush = hsi_dummy_cl;
-		port[i]->start_tx = hsi_dummy_cl;
-		port[i]->stop_tx = hsi_dummy_cl;
-		port[i]->release = hsi_dummy_cl;
-		mutex_init(&port[i]->lock);
-		BLOCKING_INIT_NOTIFIER_HEAD(&port[i]->n_head);
-		dev_set_name(&port[i]->device, "port%d", i);
+		hsi->port[i]->num = i;
+		hsi->port[i]->async = hsi_dummy_msg;
+		hsi->port[i]->setup = hsi_dummy_cl;
+		hsi->port[i]->flush = hsi_dummy_cl;
+		hsi->port[i]->start_tx = hsi_dummy_cl;
+		hsi->port[i]->stop_tx = hsi_dummy_cl;
+		hsi->port[i]->release = hsi_dummy_cl;
+		mutex_init(&hsi->port[i]->lock);
+		BLOCKING_INIT_NOTIFIER_HEAD(&hsi->port[i]->n_head);
+		dev_set_name(&hsi->port[i]->device, "port%d", i);
 		hsi->port[i]->device.release = hsi_port_release;
 		device_initialize(&hsi->port[i]->device);
 	}
diff --git a/include/linux/hsi/hsi.h b/include/linux/hsi/hsi.h
index 6ca92bff02c6..ea6bef9b6012 100644
--- a/include/linux/hsi/hsi.h
+++ b/include/linux/hsi/hsi.h
@@ -271,7 +271,7 @@ struct hsi_controller {
 	struct module		*owner;
 	unsigned int		id;
 	unsigned int		num_ports;
-	struct hsi_port		**port;
+	struct hsi_port		*port[] __counted_by(num_ports);
 };
 
 #define to_hsi_controller(dev) container_of(dev, struct hsi_controller, device)
-- 
cgit v1.2.3


From d9794c0600f95b226b6672c5b364e44c80d660c5 Mon Sep 17 00:00:00 2001
From: Kit Dallege <xaum.io@gmail.com>
Date: Sun, 15 Mar 2026 18:10:01 +0100
Subject: dma-mapping: fix false kernel-doc comment marker

Change /** to /* for the DMA attributes list comment in dma-mapping.h.
The comment is not a kernel-doc structured comment and should not use
the kernel-doc opening marker.

Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Kit Dallege <xaum.io@gmail.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260315171001.66010-1-xaum.io@gmail.com
---
 include/linux/dma-mapping.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 29973baa0581..0c2807e50bdf 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -9,7 +9,7 @@
 #include <linux/bug.h>
 #include <linux/cache.h>
 
-/**
+/*
  * List of possible attributes associated with a DMA mapping. The semantics
  * of each attribute should be defined in Documentation/core-api/dma-attributes.rst.
  */
-- 
cgit v1.2.3


From 763aacf86f1baefb134c70813aa8c72d1675d738 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 17 Mar 2026 10:01:54 +0100
Subject: clocksource: Rewrite watchdog code completely

The clocksource watchdog code has over time reached the state of an
impenetrable maze of duct tape and staples. The original design, which was
made in the context of systems far smaller than today, is based on the
assumption that the to be monitored clocksource (TSC) can be trivially
compared against a known to be stable clocksource (HPET/ACPI-PM timer).

Over the years it turned out that this approach has major flaws:

  - Long delays between watchdog invocations can result in wrap arounds
    of the reference clocksource

  - Scalability of the reference clocksource readout can degrade on large
    multi-socket systems due to interconnect congestion

This was addressed with various heuristics which degraded the accuracy of
the watchdog to the point that it fails to detect actual TSC problems on
older hardware which exposes slow inter CPU drifts due to firmware
manipulating the TSC to hide SMI time.

To address this and bring back sanity to the watchdog, rewrite the code
completely with a different approach:

  1) Restrict the validation against a reference clocksource to the boot
     CPU, which is usually the CPU/Socket closest to the legacy block which
     contains the reference source (HPET/ACPI-PM timer). Validate that the
     reference readout is within a bound latency so that the actual
     comparison against the TSC stays within 500ppm as long as the clocks
     are stable.

  2) Compare the TSCs of the other CPUs in a round robin fashion against
     the boot CPU in the same way the TSC synchronization on CPU hotplug
     works. This still can suffer from delayed reaction of the remote CPU
     to the SMP function call and the latency of the control variable cache
     line. But this latency is not affecting correctness. It only affects
     the accuracy. With low contention the readout latency is in the low
     nanoseconds range, which detects even slight skews between CPUs. Under
     high contention this becomes obviously less accurate, but still
     detects slow skews reliably as it solely relies on subsequent readouts
     being monotonically increasing. It just can take slightly longer to
     detect the issue.

  3) Rewrite the watchdog test so it tests the various mechanisms one by
     one and validating the result against the expectation.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Daniel J Blueman <daniel@quora.org>
Reviewed-by: Jiri Wiesner <jwiesner@suse.de>
Reviewed-by: Daniel J Blueman <daniel@quora.org>
Link: https://patch.msgid.link/20260123231521.926490888@kernel.org
Link: https://patch.msgid.link/87h5qeomm5.ffs@tglx
---
 Documentation/admin-guide/kernel-parameters.txt |   7 +-
 arch/x86/include/asm/time.h                     |   1 -
 arch/x86/kernel/hpet.c                          |   4 +-
 arch/x86/kernel/tsc.c                           |  49 +-
 drivers/clocksource/acpi_pm.c                   |   4 +-
 include/linux/clocksource.h                     |  28 +-
 kernel/time/Kconfig                             |  12 -
 kernel/time/clocksource-wdtest.c                | 268 ++++----
 kernel/time/clocksource.c                       | 793 +++++++++++++-----------
 kernel/time/jiffies.c                           |   1 -
 10 files changed, 584 insertions(+), 583 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index cb850e5290c2..9b0be127d4a8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -7950,12 +7950,7 @@ Kernel parameters
 			(HPET or PM timer) on systems whose TSC frequency was
 			obtained from HW or FW using either an MSR or CPUID(0x15).
 			Warn if the difference is more than 500 ppm.
-			[x86] watchdog: Use TSC as the watchdog clocksource with
-			which to check other HW timers (HPET or PM timer), but
-			only on systems where TSC has been deemed trustworthy.
-			This will be suppressed by an earlier tsc=nowatchdog and
-			can be overridden by a later tsc=nowatchdog.  A console
-			message will flag any such suppression or overriding.
+			[x86] watchdog: Enforce the clocksource watchdog on TSC
 
 	tsc_early_khz=  [X86,EARLY] Skip early TSC calibration and use the given
 			value instead. Useful when the early TSC frequency discovery
diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h
index f360104ed172..459780c3ed1f 100644
--- a/arch/x86/include/asm/time.h
+++ b/arch/x86/include/asm/time.h
@@ -7,7 +7,6 @@
 
 extern void hpet_time_init(void);
 extern bool pit_timer_init(void);
-extern bool tsc_clocksource_watchdog_disabled(void);
 
 extern struct clock_event_device *global_clock_event;
 
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 610590e83445..8dc7b710e125 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -854,7 +854,7 @@ static struct clocksource clocksource_hpet = {
 	.rating		= 250,
 	.read		= read_hpet,
 	.mask		= HPET_MASK,
-	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_CALIBRATED,
 	.resume		= hpet_resume_counter,
 };
 
@@ -1082,8 +1082,6 @@ int __init hpet_enable(void)
 	if (!hpet_counting())
 		goto out_nohpet;
 
-	if (tsc_clocksource_watchdog_disabled())
-		clocksource_hpet.flags |= CLOCK_SOURCE_MUST_VERIFY;
 	clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
 
 	if (id & HPET_ID_LEGSUP) {
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 9ccd58c29409..c5110eb554bc 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -322,12 +322,16 @@ int __init notsc_setup(char *str)
 	return 1;
 }
 #endif
-
 __setup("notsc", notsc_setup);
 
+enum {
+	TSC_WATCHDOG_AUTO,
+	TSC_WATCHDOG_OFF,
+	TSC_WATCHDOG_ON,
+};
+
 static int no_sched_irq_time;
-static int no_tsc_watchdog;
-static int tsc_as_watchdog;
+static int tsc_watchdog;
 
 static int __init tsc_setup(char *str)
 {
@@ -337,25 +341,14 @@ static int __init tsc_setup(char *str)
 		no_sched_irq_time = 1;
 	if (!strcmp(str, "unstable"))
 		mark_tsc_unstable("boot parameter");
-	if (!strcmp(str, "nowatchdog")) {
-		no_tsc_watchdog = 1;
-		if (tsc_as_watchdog)
-			pr_alert("%s: Overriding earlier tsc=watchdog with tsc=nowatchdog\n",
-				 __func__);
-		tsc_as_watchdog = 0;
-	}
+	if (!strcmp(str, "nowatchdog"))
+		tsc_watchdog = TSC_WATCHDOG_OFF;
 	if (!strcmp(str, "recalibrate"))
 		tsc_force_recalibrate = 1;
-	if (!strcmp(str, "watchdog")) {
-		if (no_tsc_watchdog)
-			pr_alert("%s: tsc=watchdog overridden by earlier tsc=nowatchdog\n",
-				 __func__);
-		else
-			tsc_as_watchdog = 1;
-	}
+	if (!strcmp(str, "watchdog"))
+		tsc_watchdog = TSC_WATCHDOG_ON;
 	return 1;
 }
-
 __setup("tsc=", tsc_setup);
 
 #define MAX_RETRIES		5
@@ -1175,7 +1168,6 @@ static int tsc_cs_enable(struct clocksource *cs)
 static struct clocksource clocksource_tsc_early = {
 	.name			= "tsc-early",
 	.rating			= 299,
-	.uncertainty_margin	= 32 * NSEC_PER_MSEC,
 	.read			= read_tsc,
 	.mask			= CLOCKSOURCE_MASK(64),
 	.flags			= CLOCK_SOURCE_IS_CONTINUOUS |
@@ -1202,7 +1194,6 @@ static struct clocksource clocksource_tsc = {
 	.flags			= CLOCK_SOURCE_IS_CONTINUOUS |
 				  CLOCK_SOURCE_CAN_INLINE_READ |
 				  CLOCK_SOURCE_MUST_VERIFY |
-				  CLOCK_SOURCE_VERIFY_PERCPU |
 				  CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT,
 	.id			= CSID_X86_TSC,
 	.vdso_clock_mode	= VDSO_CLOCKMODE_TSC,
@@ -1231,16 +1222,12 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
 
 static void __init tsc_disable_clocksource_watchdog(void)
 {
+	if (tsc_watchdog == TSC_WATCHDOG_ON)
+		return;
 	clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
 	clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
 }
 
-bool tsc_clocksource_watchdog_disabled(void)
-{
-	return !(clocksource_tsc.flags & CLOCK_SOURCE_MUST_VERIFY) &&
-	       tsc_as_watchdog && !no_tsc_watchdog;
-}
-
 static void __init check_system_tsc_reliable(void)
 {
 #if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
@@ -1395,6 +1382,8 @@ restart:
 		(unsigned long)tsc_khz / 1000,
 		(unsigned long)tsc_khz % 1000);
 
+	clocksource_tsc.flags |= CLOCK_SOURCE_CALIBRATED;
+
 	/* Inform the TSC deadline clockevent devices about the recalibration */
 	lapic_update_tsc_freq();
 
@@ -1470,12 +1459,10 @@ static bool __init determine_cpu_tsc_frequencies(bool early)
 
 	if (early) {
 		cpu_khz = x86_platform.calibrate_cpu();
-		if (tsc_early_khz) {
+		if (tsc_early_khz)
 			tsc_khz = tsc_early_khz;
-		} else {
+		else
 			tsc_khz = x86_platform.calibrate_tsc();
-			clocksource_tsc.freq_khz = tsc_khz;
-		}
 	} else {
 		/* We should not be here with non-native cpu calibration */
 		WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu);
@@ -1579,7 +1566,7 @@ void __init tsc_init(void)
 		return;
 	}
 
-	if (tsc_clocksource_reliable || no_tsc_watchdog)
+	if (tsc_clocksource_reliable || tsc_watchdog == TSC_WATCHDOG_OFF)
 		tsc_disable_clocksource_watchdog();
 
 	clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index b4330a01a566..67792937242f 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -98,7 +98,7 @@ static struct clocksource clocksource_acpi_pm = {
 	.rating		= 200,
 	.read		= acpi_pm_read,
 	.mask		= (u64)ACPI_PM_MASK,
-	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_CALIBRATED,
 	.suspend	= acpi_pm_suspend,
 	.resume		= acpi_pm_resume,
 };
@@ -243,8 +243,6 @@ static int __init init_acpi_pm_clocksource(void)
 		return -ENODEV;
 	}
 
-	if (tsc_clocksource_watchdog_disabled())
-		clocksource_acpi_pm.flags |= CLOCK_SOURCE_MUST_VERIFY;
 	return clocksource_register_hz(&clocksource_acpi_pm, PMTMR_TICKS_PER_SEC);
 }
 
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 25774fc5b53d..ccf5c0ca26b7 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -44,8 +44,6 @@ struct module;
  * @shift:		Cycle to nanosecond divisor (power of two)
  * @max_idle_ns:	Maximum idle time permitted by the clocksource (nsecs)
  * @maxadj:		Maximum adjustment value to mult (~11%)
- * @uncertainty_margin:	Maximum uncertainty in nanoseconds per half second.
- *			Zero says to use default WATCHDOG_THRESHOLD.
  * @archdata:		Optional arch-specific data
  * @max_cycles:		Maximum safe cycle value which won't overflow on
  *			multiplication
@@ -105,7 +103,6 @@ struct clocksource {
 	u32			shift;
 	u64			max_idle_ns;
 	u32			maxadj;
-	u32			uncertainty_margin;
 #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
 	struct arch_clocksource_data archdata;
 #endif
@@ -133,6 +130,7 @@ struct clocksource {
 	struct list_head	wd_list;
 	u64			cs_last;
 	u64			wd_last;
+	unsigned int		wd_cpu;
 #endif
 	struct module		*owner;
 };
@@ -142,15 +140,18 @@ struct clocksource {
  */
 #define CLOCK_SOURCE_IS_CONTINUOUS		0x01
 #define CLOCK_SOURCE_MUST_VERIFY		0x02
+#define CLOCK_SOURCE_CALIBRATED			0x04
 
 #define CLOCK_SOURCE_WATCHDOG			0x10
 #define CLOCK_SOURCE_VALID_FOR_HRES		0x20
 #define CLOCK_SOURCE_UNSTABLE			0x40
 #define CLOCK_SOURCE_SUSPEND_NONSTOP		0x80
 #define CLOCK_SOURCE_RESELECT			0x100
-#define CLOCK_SOURCE_VERIFY_PERCPU		0x200
-#define CLOCK_SOURCE_CAN_INLINE_READ		0x400
-#define CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT	0x800
+#define CLOCK_SOURCE_CAN_INLINE_READ		0x200
+#define CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT	0x400
+
+#define CLOCK_SOURCE_WDTEST			0x800
+#define CLOCK_SOURCE_WDTEST_PERCPU		0x1000
 
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0)
@@ -301,21 +302,6 @@ static inline void timer_probe(void) {}
 #define TIMER_ACPI_DECLARE(name, table_id, fn)		\
 	ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn)
 
-static inline unsigned int clocksource_get_max_watchdog_retry(void)
-{
-	/*
-	 * When system is in the boot phase or under heavy workload, there
-	 * can be random big latencies during the clocksource/watchdog
-	 * read, so allow retries to filter the noise latency. As the
-	 * latency's frequency and maximum value goes up with the number of
-	 * CPUs, scale the number of retries with the number of online
-	 * CPUs.
-	 */
-	return (ilog2(num_online_cpus()) / 2) + 1;
-}
-
-void clocksource_verify_percpu(struct clocksource *cs);
-
 /**
  * struct clocksource_base - hardware abstraction for clock on which a clocksource
  *			is based
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 6d6aace0a693..6a11964377e6 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -212,18 +212,6 @@ config HIGH_RES_TIMERS
 	  hardware is not capable then this option only increases
 	  the size of the kernel image.
 
-config CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
-	int "Clocksource watchdog maximum allowable skew (in microseconds)"
-	depends on CLOCKSOURCE_WATCHDOG
-	range 50 1000
-	default 125
-	help
-	  Specify the maximum amount of allowable watchdog skew in
-	  microseconds before reporting the clocksource to be unstable.
-	  The default is based on a half-second clocksource watchdog
-	  interval and NTP's maximum frequency drift of 500 parts
-	  per million.	If the clocksource is good enough for NTP,
-	  it is good enough for the clocksource watchdog!
 endif
 
 config POSIX_AUX_CLOCKS
diff --git a/kernel/time/clocksource-wdtest.c b/kernel/time/clocksource-wdtest.c
index 38dae590b29f..b4cf17b4aeed 100644
--- a/kernel/time/clocksource-wdtest.c
+++ b/kernel/time/clocksource-wdtest.c
@@ -3,202 +3,196 @@
  * Unit test for the clocksource watchdog.
  *
  * Copyright (C) 2021 Facebook, Inc.
+ * Copyright (C) 2026 Intel Corp.
  *
  * Author: Paul E. McKenney <paulmck@kernel.org>
+ * Author: Thomas Gleixner <tglx@kernel.org>
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/device.h>
 #include <linux/clocksource.h>
-#include <linux/init.h>
+#include <linux/delay.h>
 #include <linux/module.h>
-#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
-#include <linux/tick.h>
 #include <linux/kthread.h>
-#include <linux/delay.h>
-#include <linux/prandom.h>
-#include <linux/cpu.h>
 
 #include "tick-internal.h"
+#include "timekeeping_internal.h"
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Clocksource watchdog unit test");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@kernel.org>");
+MODULE_AUTHOR("Thomas Gleixner <tglx@kernel.org>");
+
+enum wdtest_states {
+	WDTEST_INJECT_NONE,
+	WDTEST_INJECT_DELAY,
+	WDTEST_INJECT_POSITIVE,
+	WDTEST_INJECT_NEGATIVE,
+	WDTEST_INJECT_PERCPU	= 0x100,
+};
 
-static int holdoff = IS_BUILTIN(CONFIG_TEST_CLOCKSOURCE_WATCHDOG) ? 10 : 0;
-module_param(holdoff, int, 0444);
-MODULE_PARM_DESC(holdoff, "Time to wait to start test (s).");
+static enum wdtest_states wdtest_state;
+static unsigned long wdtest_test_count;
+static ktime_t wdtest_last_ts, wdtest_offset;
 
-/* Watchdog kthread's task_struct pointer for debug purposes. */
-static struct task_struct *wdtest_task;
+#define SHIFT_4000PPM	8
 
-static u64 wdtest_jiffies_read(struct clocksource *cs)
+static ktime_t wdtest_get_offset(struct clocksource *cs)
 {
-	return (u64)jiffies;
-}
-
-static struct clocksource clocksource_wdtest_jiffies = {
-	.name			= "wdtest-jiffies",
-	.rating			= 1, /* lowest valid rating*/
-	.uncertainty_margin	= TICK_NSEC,
-	.read			= wdtest_jiffies_read,
-	.mask			= CLOCKSOURCE_MASK(32),
-	.flags			= CLOCK_SOURCE_MUST_VERIFY,
-	.mult			= TICK_NSEC << JIFFIES_SHIFT, /* details above */
-	.shift			= JIFFIES_SHIFT,
-	.max_cycles		= 10,
-};
+	if (wdtest_state < WDTEST_INJECT_PERCPU)
+		return wdtest_test_count & 0x1 ? 0 : wdtest_offset >> SHIFT_4000PPM;
 
-static int wdtest_ktime_read_ndelays;
-static bool wdtest_ktime_read_fuzz;
+	/* Only affect the readout of the "remote" CPU */
+	return cs->wd_cpu == smp_processor_id() ? 0 : NSEC_PER_MSEC;
+}
 
 static u64 wdtest_ktime_read(struct clocksource *cs)
 {
-	int wkrn = READ_ONCE(wdtest_ktime_read_ndelays);
-	static int sign = 1;
-	u64 ret;
+	ktime_t now = ktime_get_raw_fast_ns();
+	ktime_t intv = now - wdtest_last_ts;
 
-	if (wkrn) {
-		udelay(cs->uncertainty_margin / 250);
-		WRITE_ONCE(wdtest_ktime_read_ndelays, wkrn - 1);
-	}
-	ret = ktime_get_real_fast_ns();
-	if (READ_ONCE(wdtest_ktime_read_fuzz)) {
-		sign = -sign;
-		ret = ret + sign * 100 * NSEC_PER_MSEC;
+	/*
+	 * Only increment the test counter once per watchdog interval and
+	 * store the interval for the offset calculation of this step. This
+	 * guarantees a consistent behaviour even if the other side needs
+	 * to repeat due to a watchdog read timeout.
+	 */
+	if (intv > (NSEC_PER_SEC / 4)) {
+		WRITE_ONCE(wdtest_test_count, wdtest_test_count + 1);
+		wdtest_last_ts = now;
+		wdtest_offset = intv;
 	}
-	return ret;
-}
 
-static void wdtest_ktime_cs_mark_unstable(struct clocksource *cs)
-{
-	pr_info("--- Marking %s unstable due to clocksource watchdog.\n", cs->name);
+	switch (wdtest_state & ~WDTEST_INJECT_PERCPU) {
+	case WDTEST_INJECT_POSITIVE:
+		return now + wdtest_get_offset(cs);
+	case WDTEST_INJECT_NEGATIVE:
+		return now - wdtest_get_offset(cs);
+	case WDTEST_INJECT_DELAY:
+		udelay(500);
+		return now;
+	default:
+		return now;
+	}
 }
 
-#define KTIME_FLAGS (CLOCK_SOURCE_IS_CONTINUOUS | \
-		     CLOCK_SOURCE_VALID_FOR_HRES | \
-		     CLOCK_SOURCE_MUST_VERIFY | \
-		     CLOCK_SOURCE_VERIFY_PERCPU)
+#define KTIME_FLAGS (CLOCK_SOURCE_IS_CONTINUOUS |	\
+		     CLOCK_SOURCE_CALIBRATED |		\
+		     CLOCK_SOURCE_MUST_VERIFY |		\
+		     CLOCK_SOURCE_WDTEST)
 
 static struct clocksource clocksource_wdtest_ktime = {
 	.name			= "wdtest-ktime",
-	.rating			= 300,
+	.rating			= 10,
 	.read			= wdtest_ktime_read,
 	.mask			= CLOCKSOURCE_MASK(64),
 	.flags			= KTIME_FLAGS,
-	.mark_unstable		= wdtest_ktime_cs_mark_unstable,
 	.list			= LIST_HEAD_INIT(clocksource_wdtest_ktime.list),
 };
 
-/* Reset the clocksource if needed. */
-static void wdtest_ktime_clocksource_reset(void)
+static void wdtest_clocksource_reset(enum wdtest_states which, bool percpu)
+{
+	clocksource_unregister(&clocksource_wdtest_ktime);
+
+	pr_info("Test: State %d percpu %d\n", which, percpu);
+
+	wdtest_state = which;
+	if (percpu)
+		wdtest_state |= WDTEST_INJECT_PERCPU;
+	wdtest_test_count = 0;
+	wdtest_last_ts = 0;
+
+	clocksource_wdtest_ktime.rating = 10;
+	clocksource_wdtest_ktime.flags = KTIME_FLAGS;
+	if (percpu)
+		clocksource_wdtest_ktime.flags |= CLOCK_SOURCE_WDTEST_PERCPU;
+	clocksource_register_khz(&clocksource_wdtest_ktime, 1000 * 1000);
+}
+
+static bool wdtest_execute(enum wdtest_states which, bool percpu, unsigned int expect,
+			   unsigned long calls)
 {
-	if (clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE) {
-		clocksource_unregister(&clocksource_wdtest_ktime);
-		clocksource_wdtest_ktime.flags = KTIME_FLAGS;
-		schedule_timeout_uninterruptible(HZ / 10);
-		clocksource_register_khz(&clocksource_wdtest_ktime, 1000 * 1000);
+	wdtest_clocksource_reset(which, percpu);
+
+	for (; READ_ONCE(wdtest_test_count) < calls; msleep(100)) {
+		unsigned int flags = READ_ONCE(clocksource_wdtest_ktime.flags);
+
+		if (kthread_should_stop())
+			return false;
+
+		if (flags & CLOCK_SOURCE_UNSTABLE) {
+			if (expect & CLOCK_SOURCE_UNSTABLE)
+				return true;
+			pr_warn("Fail: Unexpected unstable\n");
+			return false;
+		}
+		if (flags & CLOCK_SOURCE_VALID_FOR_HRES) {
+			if (expect & CLOCK_SOURCE_VALID_FOR_HRES)
+				return true;
+			pr_warn("Fail: Unexpected valid for highres\n");
+			return false;
+		}
 	}
+
+	if (!expect)
+		return true;
+
+	pr_warn("Fail: Timed out\n");
+	return false;
 }
 
-/* Run the specified series of watchdog tests. */
-static int wdtest_func(void *arg)
+static bool wdtest_run(bool percpu)
 {
-	unsigned long j1, j2;
-	int i, max_retries;
-	char *s;
+	if (!wdtest_execute(WDTEST_INJECT_NONE, percpu, CLOCK_SOURCE_VALID_FOR_HRES, 8))
+		return false;
 
-	schedule_timeout_uninterruptible(holdoff * HZ);
+	if (!wdtest_execute(WDTEST_INJECT_DELAY, percpu, 0, 4))
+		return false;
 
-	/*
-	 * Verify that jiffies-like clocksources get the manually
-	 * specified uncertainty margin.
-	 */
-	pr_info("--- Verify jiffies-like uncertainty margin.\n");
-	__clocksource_register(&clocksource_wdtest_jiffies);
-	WARN_ON_ONCE(clocksource_wdtest_jiffies.uncertainty_margin != TICK_NSEC);
+	if (!wdtest_execute(WDTEST_INJECT_POSITIVE, percpu, CLOCK_SOURCE_UNSTABLE, 8))
+		return false;
 
-	j1 = clocksource_wdtest_jiffies.read(&clocksource_wdtest_jiffies);
-	schedule_timeout_uninterruptible(HZ);
-	j2 = clocksource_wdtest_jiffies.read(&clocksource_wdtest_jiffies);
-	WARN_ON_ONCE(j1 == j2);
+	if (!wdtest_execute(WDTEST_INJECT_NEGATIVE, percpu, CLOCK_SOURCE_UNSTABLE, 8))
+		return false;
 
-	clocksource_unregister(&clocksource_wdtest_jiffies);
+	return true;
+}
 
-	/*
-	 * Verify that tsc-like clocksources are assigned a reasonable
-	 * uncertainty margin.
-	 */
-	pr_info("--- Verify tsc-like uncertainty margin.\n");
+static int wdtest_func(void *arg)
+{
 	clocksource_register_khz(&clocksource_wdtest_ktime, 1000 * 1000);
-	WARN_ON_ONCE(clocksource_wdtest_ktime.uncertainty_margin < NSEC_PER_USEC);
-
-	j1 = clocksource_wdtest_ktime.read(&clocksource_wdtest_ktime);
-	udelay(1);
-	j2 = clocksource_wdtest_ktime.read(&clocksource_wdtest_ktime);
-	pr_info("--- tsc-like times: %lu - %lu = %lu.\n", j2, j1, j2 - j1);
-	WARN_ONCE(time_before(j2, j1 + NSEC_PER_USEC),
-		  "Expected at least 1000ns, got %lu.\n", j2 - j1);
-
-	/* Verify tsc-like stability with various numbers of errors injected. */
-	max_retries = clocksource_get_max_watchdog_retry();
-	for (i = 0; i <= max_retries + 1; i++) {
-		if (i <= 1 && i < max_retries)
-			s = "";
-		else if (i <= max_retries)
-			s = ", expect message";
-		else
-			s = ", expect clock skew";
-		pr_info("--- Watchdog with %dx error injection, %d retries%s.\n", i, max_retries, s);
-		WRITE_ONCE(wdtest_ktime_read_ndelays, i);
-		schedule_timeout_uninterruptible(2 * HZ);
-		WARN_ON_ONCE(READ_ONCE(wdtest_ktime_read_ndelays));
-		WARN_ON_ONCE((i <= max_retries) !=
-			     !(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE));
-		wdtest_ktime_clocksource_reset();
+	if (wdtest_run(false)) {
+		if (wdtest_run(true))
+			pr_info("Success: All tests passed\n");
 	}
-
-	/* Verify tsc-like stability with clock-value-fuzz error injection. */
-	pr_info("--- Watchdog clock-value-fuzz error injection, expect clock skew and per-CPU mismatches.\n");
-	WRITE_ONCE(wdtest_ktime_read_fuzz, true);
-	schedule_timeout_uninterruptible(2 * HZ);
-	WARN_ON_ONCE(!(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE));
-	clocksource_verify_percpu(&clocksource_wdtest_ktime);
-	WRITE_ONCE(wdtest_ktime_read_fuzz, false);
-
 	clocksource_unregister(&clocksource_wdtest_ktime);
 
-	pr_info("--- Done with test.\n");
-	return 0;
-}
+	if (!IS_MODULE(CONFIG_TEST_CLOCKSOURCE_WATCHDOG))
+		return 0;
 
-static void wdtest_print_module_parms(void)
-{
-	pr_alert("--- holdoff=%d\n", holdoff);
+	while (!kthread_should_stop())
+		schedule_timeout_interruptible(3600 * HZ);
+	return 0;
 }
 
-/* Cleanup function. */
-static void clocksource_wdtest_cleanup(void)
-{
-}
+static struct task_struct *wdtest_thread;
 
 static int __init clocksource_wdtest_init(void)
 {
-	int ret = 0;
-
-	wdtest_print_module_parms();
+	struct task_struct *t = kthread_run(wdtest_func, NULL, "wdtest");
 
-	/* Create watchdog-test task. */
-	wdtest_task = kthread_run(wdtest_func, NULL, "wdtest");
-	if (IS_ERR(wdtest_task)) {
-		ret = PTR_ERR(wdtest_task);
-		pr_warn("%s: Failed to create wdtest kthread.\n", __func__);
-		wdtest_task = NULL;
-		return ret;
+	if (IS_ERR(t)) {
+		pr_warn("Failed to create wdtest kthread.\n");
+		return PTR_ERR(t);
 	}
-
+	wdtest_thread = t;
 	return 0;
 }
-
 module_init(clocksource_wdtest_init);
+
+static void clocksource_wdtest_cleanup(void)
+{
+	if (wdtest_thread)
+		kthread_stop(wdtest_thread);
+}
 module_exit(clocksource_wdtest_cleanup);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e753a0632ac8..baee13a1f87f 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -7,15 +7,17 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/device.h>
 #include <linux/clocksource.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/device.h>
 #include <linux/init.h>
-#include <linux/module.h>
-#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
-#include <linux/tick.h>
 #include <linux/kthread.h>
+#include <linux/module.h>
 #include <linux/prandom.h>
-#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/tick.h>
+#include <linux/topology.h>
 
 #include "tick-internal.h"
 #include "timekeeping_internal.h"
@@ -107,48 +109,6 @@ static char override_name[CS_NAME_LEN];
 static int finished_booting;
 static u64 suspend_start;
 
-/*
- * Interval: 0.5sec.
- */
-#define WATCHDOG_INTERVAL (HZ >> 1)
-#define WATCHDOG_INTERVAL_MAX_NS ((2 * WATCHDOG_INTERVAL) * (NSEC_PER_SEC / HZ))
-
-/*
- * Threshold: 0.0312s, when doubled: 0.0625s.
- */
-#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 5)
-
-/*
- * Maximum permissible delay between two readouts of the watchdog
- * clocksource surrounding a read of the clocksource being validated.
- * This delay could be due to SMIs, NMIs, or to VCPU preemptions.  Used as
- * a lower bound for cs->uncertainty_margin values when registering clocks.
- *
- * The default of 500 parts per million is based on NTP's limits.
- * If a clocksource is good enough for NTP, it is good enough for us!
- *
- * In other words, by default, even if a clocksource is extremely
- * precise (for example, with a sub-nanosecond period), the maximum
- * permissible skew between the clocksource watchdog and the clocksource
- * under test is not permitted to go below the 500ppm minimum defined
- * by MAX_SKEW_USEC.  This 500ppm minimum may be overridden using the
- * CLOCKSOURCE_WATCHDOG_MAX_SKEW_US Kconfig option.
- */
-#ifdef CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
-#define MAX_SKEW_USEC	CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
-#else
-#define MAX_SKEW_USEC	(125 * WATCHDOG_INTERVAL / HZ)
-#endif
-
-/*
- * Default for maximum permissible skew when cs->uncertainty_margin is
- * not specified, and the lower bound even when cs->uncertainty_margin
- * is specified.  This is also the default that is used when registering
- * clocks with unspecified cs->uncertainty_margin, so this macro is used
- * even in CONFIG_CLOCKSOURCE_WATCHDOG=n kernels.
- */
-#define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC)
-
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 static void clocksource_watchdog_work(struct work_struct *work);
 static void clocksource_select(void);
@@ -160,7 +120,42 @@ static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
 static DEFINE_SPINLOCK(watchdog_lock);
 static int watchdog_running;
 static atomic_t watchdog_reset_pending;
-static int64_t watchdog_max_interval;
+
+/* Watchdog interval: 0.5sec. */
+#define WATCHDOG_INTERVAL		(HZ >> 1)
+#define WATCHDOG_INTERVAL_NS		(WATCHDOG_INTERVAL * (NSEC_PER_SEC / HZ))
+
+/* Maximum time between two reference watchdog readouts */
+#define WATCHDOG_READOUT_MAX_NS		(50U * NSEC_PER_USEC)
+
+/*
+ * Maximum time between two remote readouts for NUMA=n. On NUMA enabled systems
+ * the timeout is calculated from the numa distance.
+ */
+#define WATCHDOG_DEFAULT_TIMEOUT_NS	(50U * NSEC_PER_USEC)
+
+/*
+ * Remote timeout NUMA distance multiplier. The local distance is 10. The
+ * default remote distance is 20. ACPI tables provide more accurate numbers
+ * which are guaranteed to be greater than the local distance.
+ *
+ * This results in a 5us base value, which is equivalent to the above !NUMA
+ * default.
+ */
+#define WATCHDOG_NUMA_MULTIPLIER_NS	((u64)(WATCHDOG_DEFAULT_TIMEOUT_NS / LOCAL_DISTANCE))
+
+/* Limit the NUMA timeout in case the distance values are insanely big */
+#define WATCHDOG_NUMA_MAX_TIMEOUT_NS	((u64)(500U * NSEC_PER_USEC))
+
+/* Shift values to calculate the approximate $N ppm of a given delta. */
+#define SHIFT_500PPM			11
+#define SHIFT_4000PPM			8
+
+/* Number of attempts to read the watchdog */
+#define WATCHDOG_FREQ_RETRIES		3
+
+/* Five reads local and remote for inter CPU skew detection */
+#define WATCHDOG_REMOTE_MAX_SEQ		10
 
 static inline void clocksource_watchdog_lock(unsigned long *flags)
 {
@@ -241,204 +236,422 @@ void clocksource_mark_unstable(struct clocksource *cs)
 	spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
-static int verify_n_cpus = 8;
-module_param(verify_n_cpus, int, 0644);
+static inline void clocksource_reset_watchdog(void)
+{
+	struct clocksource *cs;
+
+	list_for_each_entry(cs, &watchdog_list, wd_list)
+		cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
+}
+
+enum wd_result {
+	WD_SUCCESS,
+	WD_FREQ_NO_WATCHDOG,
+	WD_FREQ_TIMEOUT,
+	WD_FREQ_RESET,
+	WD_FREQ_SKEWED,
+	WD_CPU_TIMEOUT,
+	WD_CPU_SKEWED,
+};
 
-enum wd_read_status {
-	WD_READ_SUCCESS,
-	WD_READ_UNSTABLE,
-	WD_READ_SKIP
+struct watchdog_cpu_data {
+	/* Keep first as it is 32 byte aligned */
+	call_single_data_t	csd;
+	atomic_t		remote_inprogress;
+	enum wd_result		result;
+	u64			cpu_ts[2];
+	struct clocksource	*cs;
+	/* Ensure that the sequence is in a separate cache line */
+	atomic_t		seq ____cacheline_aligned;
+	/* Set by the control CPU according to NUMA distance */
+	u64			timeout_ns;
 };
 
-static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
-{
-	int64_t md = watchdog->uncertainty_margin;
-	unsigned int nretries, max_retries;
-	int64_t wd_delay, wd_seq_delay;
-	u64 wd_end, wd_end2;
-
-	max_retries = clocksource_get_max_watchdog_retry();
-	for (nretries = 0; nretries <= max_retries; nretries++) {
-		local_irq_disable();
-		*wdnow = watchdog->read(watchdog);
-		*csnow = cs->read(cs);
-		wd_end = watchdog->read(watchdog);
-		wd_end2 = watchdog->read(watchdog);
-		local_irq_enable();
-
-		wd_delay = cycles_to_nsec_safe(watchdog, *wdnow, wd_end);
-		if (wd_delay <= md + cs->uncertainty_margin) {
-			if (nretries > 1 && nretries >= max_retries) {
-				pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
-					smp_processor_id(), watchdog->name, nretries);
+struct watchdog_data {
+	raw_spinlock_t	lock;
+	enum wd_result	result;
+
+	u64		wd_seq;
+	u64		wd_delta;
+	u64		cs_delta;
+	u64		cpu_ts[2];
+
+	unsigned int	curr_cpu;
+} ____cacheline_aligned_in_smp;
+
+static void watchdog_check_skew_remote(void *unused);
+
+static DEFINE_PER_CPU_ALIGNED(struct watchdog_cpu_data, watchdog_cpu_data) = {
+	.csd	= CSD_INIT(watchdog_check_skew_remote, NULL),
+};
+
+static struct watchdog_data watchdog_data = {
+	.lock	= __RAW_SPIN_LOCK_UNLOCKED(watchdog_data.lock),
+};
+
+static inline void watchdog_set_result(struct watchdog_cpu_data *wd, enum wd_result result)
+{
+	guard(raw_spinlock)(&watchdog_data.lock);
+	if (!wd->result) {
+		atomic_set(&wd->seq, WATCHDOG_REMOTE_MAX_SEQ);
+		WRITE_ONCE(wd->result, result);
+	}
+}
+
+/* Wait for the sequence number to hand over control. */
+static bool watchdog_wait_seq(struct watchdog_cpu_data *wd, u64 start, int seq)
+{
+	for(int cnt = 0; atomic_read(&wd->seq) < seq; cnt++) {
+		/* Bail if the other side set an error result */
+		if (READ_ONCE(wd->result) != WD_SUCCESS)
+			return false;
+
+		/* Prevent endless loops if the other CPU does not react. */
+		if (cnt == 5000) {
+			u64 nsecs = ktime_get_raw_fast_ns();
+
+			if (nsecs - start >=wd->timeout_ns) {
+				watchdog_set_result(wd, WD_CPU_TIMEOUT);
+				return false;
 			}
-			return WD_READ_SUCCESS;
+			cnt = 0;
 		}
+		cpu_relax();
+	}
+	return seq < WATCHDOG_REMOTE_MAX_SEQ;
+}
 
-		/*
-		 * Now compute delay in consecutive watchdog read to see if
-		 * there is too much external interferences that cause
-		 * significant delay in reading both clocksource and watchdog.
-		 *
-		 * If consecutive WD read-back delay > md, report
-		 * system busy, reinit the watchdog and skip the current
-		 * watchdog test.
-		 */
-		wd_seq_delay = cycles_to_nsec_safe(watchdog, wd_end, wd_end2);
-		if (wd_seq_delay > md)
-			goto skip_test;
+static void watchdog_check_skew(struct watchdog_cpu_data *wd, int index)
+{
+	u64 prev, now, delta, start = ktime_get_raw_fast_ns();
+	int local = index, remote = (index + 1) & 0x1;
+	struct clocksource *cs = wd->cs;
+
+	/* Set the local timestamp so that the first iteration works correctly */
+	wd->cpu_ts[local] = cs->read(cs);
+
+	/* Signal arrival */
+	atomic_inc(&wd->seq);
+
+	for (int seq = local + 2; seq < WATCHDOG_REMOTE_MAX_SEQ; seq += 2) {
+		if (!watchdog_wait_seq(wd, start, seq))
+			return;
+
+		/* Capture local timestamp before possible non-local coherency overhead */
+		now = cs->read(cs);
+
+		/* Store local timestamp before reading remote to limit coherency stalls */
+		wd->cpu_ts[local] = now;
+
+		prev = wd->cpu_ts[remote];
+		delta = (now - prev) & cs->mask;
+
+		if (delta > cs->max_raw_delta) {
+			watchdog_set_result(wd, WD_CPU_SKEWED);
+			return;
+		}
+
+		/* Hand over to the remote CPU */
+		atomic_inc(&wd->seq);
 	}
+}
 
-	pr_warn("timekeeping watchdog on CPU%d: wd-%s-wd excessive read-back delay of %lldns vs. limit of %ldns, wd-wd read-back delay only %lldns, attempt %d, marking %s unstable\n",
-		smp_processor_id(), cs->name, wd_delay, WATCHDOG_MAX_SKEW, wd_seq_delay, nretries, cs->name);
-	return WD_READ_UNSTABLE;
+static void watchdog_check_skew_remote(void *unused)
+{
+	struct watchdog_cpu_data *wd = this_cpu_ptr(&watchdog_cpu_data);
 
-skip_test:
-	pr_info("timekeeping watchdog on CPU%d: %s wd-wd read-back delay of %lldns\n",
-		smp_processor_id(), watchdog->name, wd_seq_delay);
-	pr_info("wd-%s-wd read-back delay of %lldns, clock-skew test skipped!\n",
-		cs->name, wd_delay);
-	return WD_READ_SKIP;
+	atomic_inc(&wd->remote_inprogress);
+	watchdog_check_skew(wd, 1);
+	atomic_dec(&wd->remote_inprogress);
 }
 
-static u64 csnow_mid;
-static cpumask_t cpus_ahead;
-static cpumask_t cpus_behind;
-static cpumask_t cpus_chosen;
+static inline bool wd_csd_locked(struct watchdog_cpu_data *wd)
+{
+	return READ_ONCE(wd->csd.node.u_flags) & CSD_FLAG_LOCK;
+}
 
-static void clocksource_verify_choose_cpus(void)
+/*
+ * This is only invoked for remote CPUs. See watchdog_check_cpu_skew().
+ */
+static inline u64 wd_get_remote_timeout(unsigned int remote_cpu)
 {
-	int cpu, i, n = verify_n_cpus;
+	unsigned int n1, n2;
+	u64 ns;
+
+	if (nr_node_ids == 1)
+		return WATCHDOG_DEFAULT_TIMEOUT_NS;
+
+	n1 = cpu_to_node(smp_processor_id());
+	n2 = cpu_to_node(remote_cpu);
+	ns = WATCHDOG_NUMA_MULTIPLIER_NS * node_distance(n1, n2);
+	return min(ns, WATCHDOG_NUMA_MAX_TIMEOUT_NS);
+}
 
-	if (n < 0 || n >= num_online_cpus()) {
-		/* Check all of the CPUs. */
-		cpumask_copy(&cpus_chosen, cpu_online_mask);
-		cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
+static void __watchdog_check_cpu_skew(struct clocksource *cs, unsigned int cpu)
+{
+	struct watchdog_cpu_data *wd;
+
+	wd = per_cpu_ptr(&watchdog_cpu_data, cpu);
+	if (atomic_read(&wd->remote_inprogress) || wd_csd_locked(wd)) {
+		watchdog_data.result = WD_CPU_TIMEOUT;
+		return;
+	}
+
+	atomic_set(&wd->seq, 0);
+	wd->result = WD_SUCCESS;
+	wd->cs = cs;
+	/* Store the current CPU ID for the watchdog test unit */
+	cs->wd_cpu = smp_processor_id();
+
+	wd->timeout_ns = wd_get_remote_timeout(cpu);
+
+	/* Kick the remote CPU into the watchdog function */
+	if (WARN_ON_ONCE(smp_call_function_single_async(cpu, &wd->csd))) {
+		watchdog_data.result = WD_CPU_TIMEOUT;
 		return;
 	}
 
-	/* If no checking desired, or no other CPU to check, leave. */
-	cpumask_clear(&cpus_chosen);
-	if (n == 0 || num_online_cpus() <= 1)
+	scoped_guard(irq)
+		watchdog_check_skew(wd, 0);
+
+	scoped_guard(raw_spinlock_irq, &watchdog_data.lock) {
+		watchdog_data.result = wd->result;
+		memcpy(watchdog_data.cpu_ts, wd->cpu_ts, sizeof(wd->cpu_ts));
+	}
+}
+
+static void watchdog_check_cpu_skew(struct clocksource *cs)
+{
+	unsigned int cpu = watchdog_data.curr_cpu;
+
+	cpu = cpumask_next_wrap(cpu, cpu_online_mask);
+	watchdog_data.curr_cpu = cpu;
+
+	/* Skip the current CPU. Handles num_online_cpus() == 1 as well */
+	if (cpu == smp_processor_id())
 		return;
 
-	/* Make sure to select at least one CPU other than the current CPU. */
-	cpu = cpumask_any_but(cpu_online_mask, smp_processor_id());
-	if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
+	/* Don't interfere with the test mechanics */
+	if ((cs->flags & CLOCK_SOURCE_WDTEST) && !(cs->flags & CLOCK_SOURCE_WDTEST_PERCPU))
 		return;
-	cpumask_set_cpu(cpu, &cpus_chosen);
 
-	/* Force a sane value for the boot parameter. */
-	if (n > nr_cpu_ids)
-		n = nr_cpu_ids;
+	__watchdog_check_cpu_skew(cs, cpu);
+}
+
+static bool watchdog_check_freq(struct clocksource *cs, bool reset_pending)
+{
+	unsigned int ppm_shift = SHIFT_4000PPM;
+	u64 wd_ts0, wd_ts1, cs_ts;
+
+	watchdog_data.result = WD_SUCCESS;
+	if (!watchdog) {
+		watchdog_data.result = WD_FREQ_NO_WATCHDOG;
+		return false;
+	}
+
+	if (cs->flags & CLOCK_SOURCE_WDTEST_PERCPU)
+		return true;
 
 	/*
-	 * Randomly select the specified number of CPUs.  If the same
-	 * CPU is selected multiple times, that CPU is checked only once,
-	 * and no replacement CPU is selected.  This gracefully handles
-	 * situations where verify_n_cpus is greater than the number of
-	 * CPUs that are currently online.
+	 * If both the clocksource and the watchdog claim they are
+	 * calibrated use 500ppm limit. Uncalibrated clocksources need a
+	 * larger allowance because thefirmware supplied frequencies can be
+	 * way off.
 	 */
-	for (i = 1; i < n; i++) {
-		cpu = cpumask_random(cpu_online_mask);
-		if (!WARN_ON_ONCE(cpu >= nr_cpu_ids))
-			cpumask_set_cpu(cpu, &cpus_chosen);
+	if (watchdog->flags & CLOCK_SOURCE_CALIBRATED && cs->flags & CLOCK_SOURCE_CALIBRATED)
+		ppm_shift = SHIFT_500PPM;
+
+	for (int retries = 0; retries < WATCHDOG_FREQ_RETRIES; retries++) {
+		s64 wd_last, cs_last, wd_seq, wd_delta, cs_delta, max_delta;
+
+		scoped_guard(irq) {
+			wd_ts0 = watchdog->read(watchdog);
+			cs_ts = cs->read(cs);
+			wd_ts1 = watchdog->read(watchdog);
+		}
+
+		wd_last = cs->wd_last;
+		cs_last = cs->cs_last;
+
+		/* Validate the watchdog readout window */
+		wd_seq = cycles_to_nsec_safe(watchdog, wd_ts0, wd_ts1);
+		if (wd_seq > WATCHDOG_READOUT_MAX_NS) {
+			/* Store for printout in case all retries fail */
+			watchdog_data.wd_seq = wd_seq;
+			continue;
+		}
+
+		/* Store for subsequent processing */
+		cs->wd_last = wd_ts0;
+		cs->cs_last = cs_ts;
+
+		/* First round or reset pending? */
+		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) || reset_pending)
+			goto reset;
+
+		/* Calculate the nanosecond deltas from the last invocation */
+		wd_delta = cycles_to_nsec_safe(watchdog, wd_last, wd_ts0);
+		cs_delta = cycles_to_nsec_safe(cs, cs_last, cs_ts);
+
+		watchdog_data.wd_delta = wd_delta;
+		watchdog_data.cs_delta = cs_delta;
+
+		/*
+		 * Ensure that the deltas are within the readout limits of
+		 * the clocksource and the watchdog. Long delays can cause
+		 * clocksources to overflow.
+		 */
+		max_delta = max(wd_delta, cs_delta);
+		if (max_delta > cs->max_idle_ns || max_delta > watchdog->max_idle_ns)
+			goto reset;
+
+		/*
+		 * Calculate and validate the skew against the allowed PPM
+		 * value of the maximum delta plus the watchdog readout
+		 * time.
+		 */
+		if (abs(wd_delta - cs_delta) < (max_delta >> ppm_shift) + wd_seq)
+			return true;
+
+		watchdog_data.result = WD_FREQ_SKEWED;
+		return false;
 	}
 
-	/* Don't verify ourselves. */
-	cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
+	watchdog_data.result = WD_FREQ_TIMEOUT;
+	return false;
+
+reset:
+	cs->flags |= CLOCK_SOURCE_WATCHDOG;
+	watchdog_data.result = WD_FREQ_RESET;
+	return false;
 }
 
-static void clocksource_verify_one_cpu(void *csin)
+/* Synchronization for sched clock */
+static void clocksource_tick_stable(struct clocksource *cs)
 {
-	struct clocksource *cs = (struct clocksource *)csin;
-
-	csnow_mid = cs->read(cs);
+	if (cs == curr_clocksource && cs->tick_stable)
+		cs->tick_stable(cs);
 }
 
-void clocksource_verify_percpu(struct clocksource *cs)
+/* Conditionaly enable high resolution mode */
+static void clocksource_enable_highres(struct clocksource *cs)
 {
-	int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX;
-	u64 csnow_begin, csnow_end;
-	int cpu, testcpu;
-	s64 delta;
+	if ((cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) ||
+	    !(cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) ||
+	    !watchdog || !(watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS))
+		return;
+
+	/* Mark it valid for high-res. */
+	cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
 
-	if (verify_n_cpus == 0)
+	/*
+	 * Can't schedule work before finished_booting is
+	 * true. clocksource_done_booting will take care of it.
+	 */
+	if (!finished_booting)
 		return;
-	cpumask_clear(&cpus_ahead);
-	cpumask_clear(&cpus_behind);
-	cpus_read_lock();
-	migrate_disable();
-	clocksource_verify_choose_cpus();
-	if (cpumask_empty(&cpus_chosen)) {
-		migrate_enable();
-		cpus_read_unlock();
-		pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
+
+	if (cs->flags & CLOCK_SOURCE_WDTEST)
 		return;
+
+	/*
+	 * If this is not the current clocksource let the watchdog thread
+	 * reselect it. Due to the change to high res this clocksource
+	 * might be preferred now. If it is the current clocksource let the
+	 * tick code know about that change.
+	 */
+	if (cs != curr_clocksource) {
+		cs->flags |= CLOCK_SOURCE_RESELECT;
+		schedule_work(&watchdog_work);
+	} else {
+		tick_clock_notify();
 	}
-	testcpu = smp_processor_id();
-	pr_info("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n",
-		cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
-	preempt_disable();
-	for_each_cpu(cpu, &cpus_chosen) {
-		if (cpu == testcpu)
-			continue;
-		csnow_begin = cs->read(cs);
-		smp_call_function_single(cpu, clocksource_verify_one_cpu, cs, 1);
-		csnow_end = cs->read(cs);
-		delta = (s64)((csnow_mid - csnow_begin) & cs->mask);
-		if (delta < 0)
-			cpumask_set_cpu(cpu, &cpus_behind);
-		delta = (csnow_end - csnow_mid) & cs->mask;
-		if (delta < 0)
-			cpumask_set_cpu(cpu, &cpus_ahead);
-		cs_nsec = cycles_to_nsec_safe(cs, csnow_begin, csnow_end);
-		if (cs_nsec > cs_nsec_max)
-			cs_nsec_max = cs_nsec;
-		if (cs_nsec < cs_nsec_min)
-			cs_nsec_min = cs_nsec;
+}
+
+static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2);
+
+static void watchdog_print_freq_timeout(struct clocksource *cs)
+{
+	if (!__ratelimit(&ratelimit_state))
+		return;
+	pr_info("Watchdog %s read timed out. Readout sequence took: %lluns\n",
+		watchdog->name, watchdog_data.wd_seq);
+}
+
+static void watchdog_print_freq_skew(struct clocksource *cs)
+{
+	pr_warn("Marking clocksource %s unstable due to frequency skew\n", cs->name);
+	pr_warn("Watchdog    %20s interval: %16lluns\n", watchdog->name, watchdog_data.wd_delta);
+	pr_warn("Clocksource %20s interval: %16lluns\n", cs->name, watchdog_data.cs_delta);
+}
+
+static void watchdog_handle_remote_timeout(struct clocksource *cs)
+{
+	pr_info_once("Watchdog remote CPU %u read timed out\n", watchdog_data.curr_cpu);
+}
+
+static void watchdog_print_remote_skew(struct clocksource *cs)
+{
+	pr_warn("Marking clocksource %s unstable due to inter CPU skew\n", cs->name);
+	if (watchdog_data.cpu_ts[0] < watchdog_data.cpu_ts[1]) {
+		pr_warn("CPU%u %16llu < CPU%u %16llu (cycles)\n", smp_processor_id(),
+			watchdog_data.cpu_ts[0], watchdog_data.curr_cpu, watchdog_data.cpu_ts[1]);
+	} else {
+		pr_warn("CPU%u %16llu < CPU%u %16llu (cycles)\n", watchdog_data.curr_cpu,
+			watchdog_data.cpu_ts[1], smp_processor_id(), watchdog_data.cpu_ts[0]);
 	}
-	preempt_enable();
-	migrate_enable();
-	cpus_read_unlock();
-	if (!cpumask_empty(&cpus_ahead))
-		pr_warn("        CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
-			cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
-	if (!cpumask_empty(&cpus_behind))
-		pr_warn("        CPUs %*pbl behind CPU %d for clocksource %s.\n",
-			cpumask_pr_args(&cpus_behind), testcpu, cs->name);
-	pr_info("        CPU %d check durations %lldns - %lldns for clocksource %s.\n",
-		testcpu, cs_nsec_min, cs_nsec_max, cs->name);
-}
-EXPORT_SYMBOL_GPL(clocksource_verify_percpu);
+}
 
-static inline void clocksource_reset_watchdog(void)
+static void watchdog_check_result(struct clocksource *cs)
 {
-	struct clocksource *cs;
+	switch (watchdog_data.result) {
+	case WD_SUCCESS:
+		clocksource_tick_stable(cs);
+		clocksource_enable_highres(cs);
+		return;
 
-	list_for_each_entry(cs, &watchdog_list, wd_list)
+	case WD_FREQ_TIMEOUT:
+		watchdog_print_freq_timeout(cs);
+		/* Try again later and invalidate the reference timestamps. */
 		cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
-}
+		return;
 
+	case WD_FREQ_NO_WATCHDOG:
+	case WD_FREQ_RESET:
+		/*
+		 * Nothing to do when the reference timestamps were reset
+		 * or no watchdog clocksource registered.
+		 */
+		return;
+
+	case WD_FREQ_SKEWED:
+		watchdog_print_freq_skew(cs);
+		break;
+
+	case WD_CPU_TIMEOUT:
+		/* Remote check timed out. Try again next cycle. */
+		watchdog_handle_remote_timeout(cs);
+		return;
+
+	case WD_CPU_SKEWED:
+		watchdog_print_remote_skew(cs);
+		break;
+	}
+	__clocksource_unstable(cs);
+}
 
 static void clocksource_watchdog(struct timer_list *unused)
 {
-	int64_t wd_nsec, cs_nsec, interval;
-	u64 csnow, wdnow, cslast, wdlast;
-	int next_cpu, reset_pending;
 	struct clocksource *cs;
-	enum wd_read_status read_ret;
-	unsigned long extra_wait = 0;
-	u32 md;
+	bool reset_pending;
 
-	spin_lock(&watchdog_lock);
+	guard(spinlock)(&watchdog_lock);
 	if (!watchdog_running)
-		goto out;
+		return;
 
 	reset_pending = atomic_read(&watchdog_reset_pending);
 
 	list_for_each_entry(cs, &watchdog_list, wd_list) {
-
 		/* Clocksource already marked unstable? */
 		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
 			if (finished_booting)
@@ -446,170 +659,40 @@ static void clocksource_watchdog(struct timer_list *unused)
 			continue;
 		}
 
-		read_ret = cs_watchdog_read(cs, &csnow, &wdnow);
-
-		if (read_ret == WD_READ_UNSTABLE) {
-			/* Clock readout unreliable, so give it up. */
-			__clocksource_unstable(cs);
-			continue;
-		}
-
-		/*
-		 * When WD_READ_SKIP is returned, it means the system is likely
-		 * under very heavy load, where the latency of reading
-		 * watchdog/clocksource is very big, and affect the accuracy of
-		 * watchdog check. So give system some space and suspend the
-		 * watchdog check for 5 minutes.
-		 */
-		if (read_ret == WD_READ_SKIP) {
-			/*
-			 * As the watchdog timer will be suspended, and
-			 * cs->last could keep unchanged for 5 minutes, reset
-			 * the counters.
-			 */
-			clocksource_reset_watchdog();
-			extra_wait = HZ * 300;
-			break;
-		}
-
-		/* Clocksource initialized ? */
-		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
-		    atomic_read(&watchdog_reset_pending)) {
-			cs->flags |= CLOCK_SOURCE_WATCHDOG;
-			cs->wd_last = wdnow;
-			cs->cs_last = csnow;
-			continue;
-		}
-
-		wd_nsec = cycles_to_nsec_safe(watchdog, cs->wd_last, wdnow);
-		cs_nsec = cycles_to_nsec_safe(cs, cs->cs_last, csnow);
-		wdlast = cs->wd_last; /* save these in case we print them */
-		cslast = cs->cs_last;
-		cs->cs_last = csnow;
-		cs->wd_last = wdnow;
-
-		if (atomic_read(&watchdog_reset_pending))
-			continue;
-
-		/*
-		 * The processing of timer softirqs can get delayed (usually
-		 * on account of ksoftirqd not getting to run in a timely
-		 * manner), which causes the watchdog interval to stretch.
-		 * Skew detection may fail for longer watchdog intervals
-		 * on account of fixed margins being used.
-		 * Some clocksources, e.g. acpi_pm, cannot tolerate
-		 * watchdog intervals longer than a few seconds.
-		 */
-		interval = max(cs_nsec, wd_nsec);
-		if (unlikely(interval > WATCHDOG_INTERVAL_MAX_NS)) {
-			if (system_state > SYSTEM_SCHEDULING &&
-			    interval > 2 * watchdog_max_interval) {
-				watchdog_max_interval = interval;
-				pr_warn("Long readout interval, skipping watchdog check: cs_nsec: %lld wd_nsec: %lld\n",
-					cs_nsec, wd_nsec);
-			}
-			watchdog_timer.expires = jiffies;
-			continue;
-		}
-
-		/* Check the deviation from the watchdog clocksource. */
-		md = cs->uncertainty_margin + watchdog->uncertainty_margin;
-		if (abs(cs_nsec - wd_nsec) > md) {
-			s64 cs_wd_msec;
-			s64 wd_msec;
-			u32 wd_rem;
-
-			pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
-				smp_processor_id(), cs->name);
-			pr_warn("                      '%s' wd_nsec: %lld wd_now: %llx wd_last: %llx mask: %llx\n",
-				watchdog->name, wd_nsec, wdnow, wdlast, watchdog->mask);
-			pr_warn("                      '%s' cs_nsec: %lld cs_now: %llx cs_last: %llx mask: %llx\n",
-				cs->name, cs_nsec, csnow, cslast, cs->mask);
-			cs_wd_msec = div_s64_rem(cs_nsec - wd_nsec, 1000 * 1000, &wd_rem);
-			wd_msec = div_s64_rem(wd_nsec, 1000 * 1000, &wd_rem);
-			pr_warn("                      Clocksource '%s' skewed %lld ns (%lld ms) over watchdog '%s' interval of %lld ns (%lld ms)\n",
-				cs->name, cs_nsec - wd_nsec, cs_wd_msec, watchdog->name, wd_nsec, wd_msec);
-			if (curr_clocksource == cs)
-				pr_warn("                      '%s' is current clocksource.\n", cs->name);
-			else if (curr_clocksource)
-				pr_warn("                      '%s' (not '%s') is current clocksource.\n", curr_clocksource->name, cs->name);
-			else
-				pr_warn("                      No current clocksource.\n");
-			__clocksource_unstable(cs);
-			continue;
+		/* Compare against watchdog clocksource if available */
+		if (watchdog_check_freq(cs, reset_pending)) {
+			/* Check for inter CPU skew */
+			watchdog_check_cpu_skew(cs);
 		}
 
-		if (cs == curr_clocksource && cs->tick_stable)
-			cs->tick_stable(cs);
-
-		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
-		    (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
-		    (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
-			/* Mark it valid for high-res. */
-			cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
-
-			/*
-			 * clocksource_done_booting() will sort it if
-			 * finished_booting is not set yet.
-			 */
-			if (!finished_booting)
-				continue;
-
-			/*
-			 * If this is not the current clocksource let
-			 * the watchdog thread reselect it. Due to the
-			 * change to high res this clocksource might
-			 * be preferred now. If it is the current
-			 * clocksource let the tick code know about
-			 * that change.
-			 */
-			if (cs != curr_clocksource) {
-				cs->flags |= CLOCK_SOURCE_RESELECT;
-				schedule_work(&watchdog_work);
-			} else {
-				tick_clock_notify();
-			}
-		}
+		watchdog_check_result(cs);
 	}
 
-	/*
-	 * We only clear the watchdog_reset_pending, when we did a
-	 * full cycle through all clocksources.
-	 */
+	/* Clear after the full clocksource walk */
 	if (reset_pending)
 		atomic_dec(&watchdog_reset_pending);
 
-	/*
-	 * Cycle through CPUs to check if the CPUs stay synchronized
-	 * to each other.
-	 */
-	next_cpu = cpumask_next_wrap(raw_smp_processor_id(), cpu_online_mask);
-
-	/*
-	 * Arm timer if not already pending: could race with concurrent
-	 * pair clocksource_stop_watchdog() clocksource_start_watchdog().
-	 */
+	/* Could have been rearmed by a stop/start cycle */
 	if (!timer_pending(&watchdog_timer)) {
-		watchdog_timer.expires += WATCHDOG_INTERVAL + extra_wait;
-		add_timer_on(&watchdog_timer, next_cpu);
+		watchdog_timer.expires += WATCHDOG_INTERVAL;
+		add_timer_local(&watchdog_timer);
 	}
-out:
-	spin_unlock(&watchdog_lock);
 }
 
 static inline void clocksource_start_watchdog(void)
 {
-	if (watchdog_running || !watchdog || list_empty(&watchdog_list))
+	if (watchdog_running || list_empty(&watchdog_list))
 		return;
-	timer_setup(&watchdog_timer, clocksource_watchdog, 0);
+	timer_setup(&watchdog_timer, clocksource_watchdog, TIMER_PINNED);
 	watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
-	add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
+
+	add_timer_on(&watchdog_timer, get_boot_cpu_id());
 	watchdog_running = 1;
 }
 
 static inline void clocksource_stop_watchdog(void)
 {
-	if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
+	if (!watchdog_running || !list_empty(&watchdog_list))
 		return;
 	timer_delete(&watchdog_timer);
 	watchdog_running = 0;
@@ -697,12 +780,6 @@ static int __clocksource_watchdog_kthread(void)
 	unsigned long flags;
 	int select = 0;
 
-	/* Do any required per-CPU skew verification. */
-	if (curr_clocksource &&
-	    curr_clocksource->flags & CLOCK_SOURCE_UNSTABLE &&
-	    curr_clocksource->flags & CLOCK_SOURCE_VERIFY_PERCPU)
-		clocksource_verify_percpu(curr_clocksource);
-
 	spin_lock_irqsave(&watchdog_lock, flags);
 	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
 		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
@@ -1023,6 +1100,8 @@ static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
 			continue;
 		if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
 			continue;
+		if (cs->flags & CLOCK_SOURCE_WDTEST)
+			continue;
 		return cs;
 	}
 	return NULL;
@@ -1047,6 +1126,8 @@ static void __clocksource_select(bool skipcur)
 			continue;
 		if (strcmp(cs->name, override_name) != 0)
 			continue;
+		if (cs->flags & CLOCK_SOURCE_WDTEST)
+			continue;
 		/*
 		 * Check to make sure we don't switch to a non-highres
 		 * capable clocksource if the tick code is in oneshot
@@ -1181,30 +1262,6 @@ void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq
 		cs->freq_khz = div_u64((u64)freq * scale, 1000);
 	}
 
-	/*
-	 * If the uncertainty margin is not specified, calculate it.  If
-	 * both scale and freq are non-zero, calculate the clock period, but
-	 * bound below at 2*WATCHDOG_MAX_SKEW, that is, 500ppm by default.
-	 * However, if either of scale or freq is zero, be very conservative
-	 * and take the tens-of-milliseconds WATCHDOG_THRESHOLD value
-	 * for the uncertainty margin.  Allow stupidly small uncertainty
-	 * margins to be specified by the caller for testing purposes,
-	 * but warn to discourage production use of this capability.
-	 *
-	 * Bottom line:  The sum of the uncertainty margins of the
-	 * watchdog clocksource and the clocksource under test will be at
-	 * least 500ppm by default.  For more information, please see the
-	 * comment preceding CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US above.
-	 */
-	if (scale && freq && !cs->uncertainty_margin) {
-		cs->uncertainty_margin = NSEC_PER_SEC / (scale * freq);
-		if (cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW)
-			cs->uncertainty_margin = 2 * WATCHDOG_MAX_SKEW;
-	} else if (!cs->uncertainty_margin) {
-		cs->uncertainty_margin = WATCHDOG_THRESHOLD;
-	}
-	WARN_ON_ONCE(cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW);
-
 	/*
 	 * Ensure clocksources that have large 'mult' values don't overflow
 	 * when adjusted.
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index a5c7d15fce72..4aebcc80b8e2 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -32,7 +32,6 @@ static u64 jiffies_read(struct clocksource *cs)
 static struct clocksource clocksource_jiffies = {
 	.name			= "jiffies",
 	.rating			= 1, /* lowest valid rating*/
-	.uncertainty_margin	= 32 * NSEC_PER_MSEC,
 	.read			= jiffies_read,
 	.mask			= CLOCKSOURCE_MASK(32),
 	.mult			= TICK_NSEC << JIFFIES_SHIFT, /* details above */
-- 
cgit v1.2.3


From 3d6bb84f6bb3f4c05fc47fd02ce75ce3032a4ce1 Mon Sep 17 00:00:00 2001
From: Yuto Ohnuki <ytohnuki@amazon.com>
Date: Thu, 26 Feb 2026 20:18:58 +0000
Subject: fs: remove stale and duplicate forward declarations

Remove the following unnecessary forward declarations from fs.h, which
improves maintainability.

- struct hd_geometry: became unused in fs.h when
  block_device_operations was moved to blkdev.h in commit 08f858512151
  ("[PATCH] move block_device_operations to blkdev.h"). The forward
  declaration is now added to blkdev.h where it is actually used.

- struct iovec: became unused when aio_read/aio_write were removed in
  commit 8436318205b9 ("->aio_read and ->aio_write removed")

- struct iov_iter: duplicate forward declaration. This removes the
  redundant second declaration, added in commit 293bc9822fa9
  ("new methods: ->read_iter() and ->write_iter()")

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202512301303.s7YWTZHA-lkp@intel.com/
Closes: https://lore.kernel.org/oe-kbuild-all/202512302139.Wl0soAlz-lkp@intel.com/
Closes: https://lore.kernel.org/oe-kbuild-all/202512302105.pmzYfmcV-lkp@intel.com/
Closes: https://lore.kernel.org/oe-kbuild-all/202512302125.FNgHwu5z-lkp@intel.com/
Closes: https://lore.kernel.org/oe-kbuild-all/202512302108.nIV8r5ES-lkp@intel.com/
Signed-off-by: Yuto Ohnuki <ytohnuki@amazon.com>
Link: https://patch.msgid.link/20260226201857.27310-2-ytohnuki@amazon.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/blkdev.h | 1 +
 include/linux/fs.h     | 3 ---
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 99ef8cd7673c..54cdd71aab07 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -38,6 +38,7 @@ struct blk_flush_queue;
 struct kiocb;
 struct pr_ops;
 struct rq_qos;
+struct hd_geometry;
 struct blk_report_zones_args;
 struct blk_queue_stats;
 struct blk_stat_callback;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a2af5ddd5323..280d43c9f04a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -55,8 +55,6 @@ struct bdi_writeback;
 struct bio;
 struct io_comp_batch;
 struct fiemap_extent_info;
-struct hd_geometry;
-struct iovec;
 struct kiocb;
 struct kobject;
 struct pipe_inode_info;
@@ -1917,7 +1915,6 @@ struct dir_context {
  */
 #define COPY_FILE_SPLICE		(1 << 0)
 
-struct iov_iter;
 struct io_uring_cmd;
 struct offset_ctx;
 
-- 
cgit v1.2.3


From 3414c809777e37855063347f5fbd23ff03e1c9fb Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 11 Mar 2026 22:13:23 -0700
Subject: hwrng: core - avoid kernel-doc warnings

Mark internal fields as "private:" so that kernel-doc comments
are not needed for them, eliminating kernel-doc warnings:

Warning: include/linux/hw_random.h:54 struct member 'list' not described
 in 'hwrng'
Warning: include/linux/hw_random.h:54 struct member 'ref' not described
 in 'hwrng'
Warning: include/linux/hw_random.h:54 struct member 'cleanup_work' not
 described in 'hwrng'
Warning: include/linux/hw_random.h:54 struct member 'cleanup_done' not
 described in 'hwrng'
Warning: include/linux/hw_random.h:54 struct member 'dying' not described
 in 'hwrng'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/hw_random.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index b77bc55a4cf3..1d3c1927986e 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -46,7 +46,7 @@ struct hwrng {
 	unsigned long priv;
 	unsigned short quality;
 
-	/* internal. */
+	/* private: internal. */
 	struct list_head list;
 	struct kref ref;
 	struct work_struct cleanup_work;
-- 
cgit v1.2.3


From b44c7129f1e3cd0e6233c7cb2d88f917d92f213d Mon Sep 17 00:00:00 2001
From: Zongyu Wu <wuzongyu1@huawei.com>
Date: Fri, 13 Mar 2026 17:40:39 +0800
Subject: crypto: hisilicon - add device load query functionality to debugfs

The accelerator device supports usage statistics. This patch enables
obtaining the accelerator's usage through the "dev_usage" file.
The returned number expressed as a percentage as a percentage.

Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/ABI/testing/debugfs-hisi-hpre |  7 ++++
 Documentation/ABI/testing/debugfs-hisi-sec  |  7 ++++
 Documentation/ABI/testing/debugfs-hisi-zip  |  7 ++++
 drivers/crypto/hisilicon/debugfs.c          | 54 +++++++++++++++++++++++++++++
 drivers/crypto/hisilicon/hpre/hpre_main.c   | 18 ++++++++++
 drivers/crypto/hisilicon/sec2/sec_main.c    | 11 ++++++
 drivers/crypto/hisilicon/zip/zip_main.c     | 19 ++++++++++
 include/linux/hisi_acc_qm.h                 | 12 +++++++
 8 files changed, 135 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/debugfs-hisi-hpre b/Documentation/ABI/testing/debugfs-hisi-hpre
index 29fb7d5ffc69..5a137f701eea 100644
--- a/Documentation/ABI/testing/debugfs-hisi-hpre
+++ b/Documentation/ABI/testing/debugfs-hisi-hpre
@@ -50,6 +50,13 @@ Description:	Dump debug registers from the QM.
 		Available for PF and VF in host. VF in guest currently only
 		has one debug register.
 
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/dev_usage
+Date:		Mar 2026
+Contact:	linux-crypto@vger.kernel.org
+Description:	Query the real-time bandwidth usage of device.
+		Returns the bandwidth usage of each channel on the device.
+		The returned number is in percentage.
+
 What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/current_q
 Date:		Sep 2019
 Contact:	linux-crypto@vger.kernel.org
diff --git a/Documentation/ABI/testing/debugfs-hisi-sec b/Documentation/ABI/testing/debugfs-hisi-sec
index 82bf4a0dc7f7..676e2dc2de8d 100644
--- a/Documentation/ABI/testing/debugfs-hisi-sec
+++ b/Documentation/ABI/testing/debugfs-hisi-sec
@@ -24,6 +24,13 @@ Description:	The <bdf> is related the function for PF and VF.
 		1/1000~1000/1000 of total QoS. The driver reading alg_qos to
 		get related QoS in the host and VM, Such as "cat alg_qos".
 
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/dev_usage
+Date:		Mar 2026
+Contact:	linux-crypto@vger.kernel.org
+Description:	Query the real-time bandwidth usage of device.
+		Returns the bandwidth usage of each channel on the device.
+		The returned number is in percentage.
+
 What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
 Date:		Oct 2019
 Contact:	linux-crypto@vger.kernel.org
diff --git a/Documentation/ABI/testing/debugfs-hisi-zip b/Documentation/ABI/testing/debugfs-hisi-zip
index 0abd65d27e9b..46bf47bf6b42 100644
--- a/Documentation/ABI/testing/debugfs-hisi-zip
+++ b/Documentation/ABI/testing/debugfs-hisi-zip
@@ -36,6 +36,13 @@ Description:	The <bdf> is related the function for PF and VF.
 		1/1000~1000/1000 of total QoS. The driver reading alg_qos to
 		get related QoS in the host and VM, Such as "cat alg_qos".
 
+What:		/sys/kernel/debug/hisi_zip/<bdf>/dev_usage
+Date:		Mar 2026
+Contact:	linux-crypto@vger.kernel.org
+Description:	Query the real-time bandwidth usage of device.
+		Returns the bandwidth usage of each channel on the device.
+		The returned number is in percentage.
+
 What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/regs
 Date:		Nov 2018
 Contact:	linux-crypto@vger.kernel.org
diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c
index 32e9f8350289..5d8b4112c543 100644
--- a/drivers/crypto/hisilicon/debugfs.c
+++ b/drivers/crypto/hisilicon/debugfs.c
@@ -1040,6 +1040,57 @@ void hisi_qm_show_last_dfx_regs(struct hisi_qm *qm)
 	}
 }
 
+static int qm_usage_percent(struct hisi_qm *qm, int chan_num)
+{
+	u32 val, used_bw, total_bw;
+
+	val = readl(qm->io_base + QM_CHANNEL_USAGE_OFFSET +
+				chan_num * QM_CHANNEL_ADDR_INTRVL);
+	used_bw = lower_16_bits(val);
+	total_bw = upper_16_bits(val);
+	if (!total_bw)
+		return -EIO;
+
+	if (total_bw <= used_bw)
+		return QM_MAX_DEV_USAGE;
+
+	return (used_bw * QM_DEV_USAGE_RATE) / total_bw;
+}
+
+static int qm_usage_show(struct seq_file *s, void *unused)
+{
+	struct hisi_qm *qm = s->private;
+	bool dev_is_active = true;
+	int i, ret;
+
+	/* If device is in suspended, usage is 0. */
+	ret = hisi_qm_get_dfx_access(qm);
+	if (ret == -EAGAIN) {
+		dev_is_active = false;
+	} else if (ret) {
+		dev_err(&qm->pdev->dev, "failed to get dfx access for usage_show!\n");
+		return ret;
+	}
+
+	ret = 0;
+	for (i = 0; i < qm->channel_data.channel_num; i++) {
+		if (dev_is_active) {
+			ret = qm_usage_percent(qm, i);
+			if (ret < 0) {
+				hisi_qm_put_dfx_access(qm);
+				return ret;
+			}
+		}
+		seq_printf(s, "%s: %d\n", qm->channel_data.channel_name[i], ret);
+	}
+
+	if (dev_is_active)
+		hisi_qm_put_dfx_access(qm);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(qm_usage);
+
 static int qm_diff_regs_show(struct seq_file *s, void *unused)
 {
 	struct hisi_qm *qm = s->private;
@@ -1159,6 +1210,9 @@ void hisi_qm_debug_init(struct hisi_qm *qm)
 		debugfs_create_file("diff_regs", 0444, qm->debug.qm_d,
 					qm, &qm_diff_regs_fops);
 
+	if (qm->ver >= QM_HW_V5)
+		debugfs_create_file("dev_usage", 0444, qm->debug.debug_root, qm, &qm_usage_fops);
+
 	debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
 
 	debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops);
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 884d5d0afaf4..357ab5e5887e 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -121,6 +121,8 @@
 #define HPRE_DFX_COMMON2_LEN		0xE
 #define HPRE_DFX_CORE_LEN		0x43
 
+#define HPRE_MAX_CHANNEL_NUM		2
+
 static const char hpre_name[] = "hisi_hpre";
 static struct dentry *hpre_debugfs_root;
 static const struct pci_device_id hpre_dev_ids[] = {
@@ -370,6 +372,11 @@ static struct dfx_diff_registers hpre_diff_regs[] = {
 	},
 };
 
+static const char *hpre_channel_name[HPRE_MAX_CHANNEL_NUM] = {
+	"RSA",
+	"ECC",
+};
+
 static const struct hisi_qm_err_ini hpre_err_ini;
 
 bool hpre_check_alg_support(struct hisi_qm *qm, u32 alg)
@@ -1234,6 +1241,16 @@ static int hpre_pre_store_cap_reg(struct hisi_qm *qm)
 	return 0;
 }
 
+static void hpre_set_channels(struct hisi_qm *qm)
+{
+	struct qm_channel *channel_data = &qm->channel_data;
+	int i;
+
+	channel_data->channel_num = HPRE_MAX_CHANNEL_NUM;
+	for (i = 0; i < HPRE_MAX_CHANNEL_NUM; i++)
+		channel_data->channel_name[i] = hpre_channel_name[i];
+}
+
 static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 {
 	u64 alg_msk;
@@ -1267,6 +1284,7 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 		return ret;
 	}
 
+	hpre_set_channels(qm);
 	/* Fetch and save the value of capability registers */
 	ret = hpre_pre_store_cap_reg(qm);
 	if (ret) {
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index efda8646fc60..6647b7340827 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -133,6 +133,8 @@
 #define SEC_AEAD_BITMAP			(GENMASK_ULL(7, 6) | GENMASK_ULL(18, 17) | \
 					GENMASK_ULL(45, 43))
 
+#define SEC_MAX_CHANNEL_NUM		1
+
 struct sec_hw_error {
 	u32 int_msk;
 	const char *msg;
@@ -1288,6 +1290,14 @@ static int sec_pre_store_cap_reg(struct hisi_qm *qm)
 	return 0;
 }
 
+static void sec_set_channels(struct hisi_qm *qm)
+{
+	struct qm_channel *channel_data = &qm->channel_data;
+
+	channel_data->channel_num = SEC_MAX_CHANNEL_NUM;
+	channel_data->channel_name[0] = "SEC";
+}
+
 static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 {
 	u64 alg_msk;
@@ -1325,6 +1335,7 @@ static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 		return ret;
 	}
 
+	sec_set_channels(qm);
 	/* Fetch and save the value of capability registers */
 	ret = sec_pre_store_cap_reg(qm);
 	if (ret) {
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 85b26ef17548..44df9c859bd8 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -122,6 +122,8 @@
 #define HZIP_LIT_LEN_EN_OFFSET		0x301204
 #define HZIP_LIT_LEN_EN_EN		BIT(4)
 
+#define HZIP_MAX_CHANNEL_NUM		3
+
 enum {
 	HZIP_HIGH_COMP_RATE,
 	HZIP_HIGH_COMP_PERF,
@@ -359,6 +361,12 @@ static struct dfx_diff_registers hzip_diff_regs[] = {
 	},
 };
 
+static const char *zip_channel_name[HZIP_MAX_CHANNEL_NUM] = {
+	"COMPRESS",
+	"DECOMPRESS",
+	"DAE"
+};
+
 static int hzip_diff_regs_show(struct seq_file *s, void *unused)
 {
 	struct hisi_qm *qm = s->private;
@@ -1400,6 +1408,16 @@ static int zip_pre_store_cap_reg(struct hisi_qm *qm)
 	return 0;
 }
 
+static void zip_set_channels(struct hisi_qm *qm)
+{
+	struct qm_channel *channel_data = &qm->channel_data;
+	int i;
+
+	channel_data->channel_num = HZIP_MAX_CHANNEL_NUM;
+	for (i = 0; i < HZIP_MAX_CHANNEL_NUM; i++)
+		channel_data->channel_name[i] = zip_channel_name[i];
+}
+
 static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 {
 	u64 alg_msk;
@@ -1438,6 +1456,7 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 		return ret;
 	}
 
+	zip_set_channels(qm);
 	/* Fetch and save the value of capability registers */
 	ret = zip_pre_store_cap_reg(qm);
 	if (ret) {
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 51a6dc2b97e9..8a581b5bbbcd 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -102,6 +102,12 @@
 #define QM_MIG_REGION_SEL		0x100198
 #define QM_MIG_REGION_EN		BIT(0)
 
+#define QM_MAX_CHANNEL_NUM		8
+#define QM_CHANNEL_USAGE_OFFSET		0x1100
+#define QM_MAX_DEV_USAGE		100
+#define QM_DEV_USAGE_RATE		100
+#define QM_CHANNEL_ADDR_INTRVL		0x4
+
 /* uacce mode of the driver */
 #define UACCE_MODE_NOUACCE		0 /* don't use uacce */
 #define UACCE_MODE_SVA			1 /* use uacce sva mode */
@@ -359,6 +365,11 @@ struct qm_rsv_buf {
 	struct qm_dma qcdma;
 };
 
+struct qm_channel {
+	int channel_num;
+	const char *channel_name[QM_MAX_CHANNEL_NUM];
+};
+
 struct hisi_qm {
 	enum qm_hw_ver ver;
 	enum qm_fun_type fun_type;
@@ -433,6 +444,7 @@ struct hisi_qm {
 	struct qm_err_isolate isolate_data;
 
 	struct hisi_qm_cap_tables cap_tables;
+	struct qm_channel channel_data;
 };
 
 struct hisi_qp_status {
-- 
cgit v1.2.3


From c8c4a2972f83c8b68ff03b43cecdb898939ff851 Mon Sep 17 00:00:00 2001
From: Daniel Jordan <daniel.m.jordan@oracle.com>
Date: Fri, 13 Mar 2026 11:24:33 -0400
Subject: padata: Put CPU offline callback in ONLINE section to allow failure

syzbot reported the following warning:

    DEAD callback error for CPU1
    WARNING: kernel/cpu.c:1463 at _cpu_down+0x759/0x1020 kernel/cpu.c:1463, CPU#0: syz.0.1960/14614

at commit 4ae12d8bd9a8 ("Merge tag 'kbuild-fixes-7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kbuild/linux")
which tglx traced to padata_cpu_dead() given it's the only
sub-CPUHP_TEARDOWN_CPU callback that returns an error.

Failure isn't allowed in hotplug states before CPUHP_TEARDOWN_CPU
so move the CPU offline callback to the ONLINE section where failure is
possible.

Fixes: 894c9ef9780c ("padata: validate cpumask without removed CPU during offline")
Reported-by: syzbot+123e1b70473ce213f3af@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/69af0a05.050a0220.310d8.002f.GAE@google.com/
Debugged-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/cpuhotplug.h |   1 -
 include/linux/padata.h     |   8 +--
 kernel/padata.c            | 120 +++++++++++++++++++++++----------------------
 3 files changed, 65 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 62cd7b35a29c..22ba327ec227 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -92,7 +92,6 @@ enum cpuhp_state {
 	CPUHP_NET_DEV_DEAD,
 	CPUHP_IOMMU_IOVA_DEAD,
 	CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
-	CPUHP_PADATA_DEAD,
 	CPUHP_AP_DTPM_CPU_DEAD,
 	CPUHP_RANDOM_PREPARE,
 	CPUHP_WORKQUEUE_PREP,
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 765f2778e264..b6232bea6edf 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -149,23 +149,23 @@ struct padata_mt_job {
 /**
  * struct padata_instance - The overall control structure.
  *
- * @cpu_online_node: Linkage for CPU online callback.
- * @cpu_dead_node: Linkage for CPU offline callback.
+ * @cpuhp_node: Linkage for CPU hotplug callbacks.
  * @parallel_wq: The workqueue used for parallel work.
  * @serial_wq: The workqueue used for serial work.
  * @pslist: List of padata_shell objects attached to this instance.
  * @cpumask: User supplied cpumasks for parallel and serial works.
+ * @validate_cpumask: Internal cpumask used to validate @cpumask during hotplug.
  * @kobj: padata instance kernel object.
  * @lock: padata instance lock.
  * @flags: padata flags.
  */
 struct padata_instance {
-	struct hlist_node		cpu_online_node;
-	struct hlist_node		cpu_dead_node;
+	struct hlist_node		cpuhp_node;
 	struct workqueue_struct		*parallel_wq;
 	struct workqueue_struct		*serial_wq;
 	struct list_head		pslist;
 	struct padata_cpumask		cpumask;
+	cpumask_var_t			validate_cpumask;
 	struct kobject                   kobj;
 	struct mutex			 lock;
 	u8				 flags;
diff --git a/kernel/padata.c b/kernel/padata.c
index 9e7cfa5ed55b..0d3ea1b68b1f 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -535,7 +535,8 @@ static void padata_init_reorder_list(struct parallel_data *pd)
 }
 
 /* Allocate and initialize the internal cpumask dependend resources. */
-static struct parallel_data *padata_alloc_pd(struct padata_shell *ps)
+static struct parallel_data *padata_alloc_pd(struct padata_shell *ps,
+					     int offlining_cpu)
 {
 	struct padata_instance *pinst = ps->pinst;
 	struct parallel_data *pd;
@@ -561,6 +562,10 @@ static struct parallel_data *padata_alloc_pd(struct padata_shell *ps)
 
 	cpumask_and(pd->cpumask.pcpu, pinst->cpumask.pcpu, cpu_online_mask);
 	cpumask_and(pd->cpumask.cbcpu, pinst->cpumask.cbcpu, cpu_online_mask);
+	if (offlining_cpu >= 0) {
+		__cpumask_clear_cpu(offlining_cpu, pd->cpumask.pcpu);
+		__cpumask_clear_cpu(offlining_cpu, pd->cpumask.cbcpu);
+	}
 
 	padata_init_reorder_list(pd);
 	padata_init_squeues(pd);
@@ -607,11 +612,11 @@ static void __padata_stop(struct padata_instance *pinst)
 }
 
 /* Replace the internal control structure with a new one. */
-static int padata_replace_one(struct padata_shell *ps)
+static int padata_replace_one(struct padata_shell *ps, int offlining_cpu)
 {
 	struct parallel_data *pd_new;
 
-	pd_new = padata_alloc_pd(ps);
+	pd_new = padata_alloc_pd(ps, offlining_cpu);
 	if (!pd_new)
 		return -ENOMEM;
 
@@ -621,7 +626,7 @@ static int padata_replace_one(struct padata_shell *ps)
 	return 0;
 }
 
-static int padata_replace(struct padata_instance *pinst)
+static int padata_replace(struct padata_instance *pinst, int offlining_cpu)
 {
 	struct padata_shell *ps;
 	int err = 0;
@@ -629,7 +634,7 @@ static int padata_replace(struct padata_instance *pinst)
 	pinst->flags |= PADATA_RESET;
 
 	list_for_each_entry(ps, &pinst->pslist, list) {
-		err = padata_replace_one(ps);
+		err = padata_replace_one(ps, offlining_cpu);
 		if (err)
 			break;
 	}
@@ -646,9 +651,21 @@ static int padata_replace(struct padata_instance *pinst)
 
 /* If cpumask contains no active cpu, we mark the instance as invalid. */
 static bool padata_validate_cpumask(struct padata_instance *pinst,
-				    const struct cpumask *cpumask)
+				    const struct cpumask *cpumask,
+				    int offlining_cpu)
 {
-	if (!cpumask_intersects(cpumask, cpu_online_mask)) {
+	cpumask_copy(pinst->validate_cpumask, cpu_online_mask);
+
+	/*
+	 * @offlining_cpu is still in cpu_online_mask, so remove it here for
+	 * validation.  Using a sub-CPUHP_TEARDOWN_CPU hotplug state where
+	 * @offlining_cpu wouldn't be in the online mask doesn't work because
+	 * padata_cpu_offline() can fail but such a state doesn't allow failure.
+	 */
+	if (offlining_cpu >= 0)
+		__cpumask_clear_cpu(offlining_cpu, pinst->validate_cpumask);
+
+	if (!cpumask_intersects(cpumask, pinst->validate_cpumask)) {
 		pinst->flags |= PADATA_INVALID;
 		return false;
 	}
@@ -664,13 +681,13 @@ static int __padata_set_cpumasks(struct padata_instance *pinst,
 	int valid;
 	int err;
 
-	valid = padata_validate_cpumask(pinst, pcpumask);
+	valid = padata_validate_cpumask(pinst, pcpumask, -1);
 	if (!valid) {
 		__padata_stop(pinst);
 		goto out_replace;
 	}
 
-	valid = padata_validate_cpumask(pinst, cbcpumask);
+	valid = padata_validate_cpumask(pinst, cbcpumask, -1);
 	if (!valid)
 		__padata_stop(pinst);
 
@@ -678,7 +695,7 @@ out_replace:
 	cpumask_copy(pinst->cpumask.pcpu, pcpumask);
 	cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
 
-	err = padata_setup_cpumasks(pinst) ?: padata_replace(pinst);
+	err = padata_setup_cpumasks(pinst) ?: padata_replace(pinst, -1);
 
 	if (valid)
 		__padata_start(pinst);
@@ -730,26 +747,6 @@ EXPORT_SYMBOL(padata_set_cpumask);
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
-{
-	int err = padata_replace(pinst);
-
-	if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) &&
-	    padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
-		__padata_start(pinst);
-
-	return err;
-}
-
-static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
-{
-	if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) ||
-	    !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
-		__padata_stop(pinst);
-
-	return padata_replace(pinst);
-}
-
 static inline int pinst_has_cpu(struct padata_instance *pinst, int cpu)
 {
 	return cpumask_test_cpu(cpu, pinst->cpumask.pcpu) ||
@@ -761,27 +758,39 @@ static int padata_cpu_online(unsigned int cpu, struct hlist_node *node)
 	struct padata_instance *pinst;
 	int ret;
 
-	pinst = hlist_entry_safe(node, struct padata_instance, cpu_online_node);
+	pinst = hlist_entry_safe(node, struct padata_instance, cpuhp_node);
 	if (!pinst_has_cpu(pinst, cpu))
 		return 0;
 
 	mutex_lock(&pinst->lock);
-	ret = __padata_add_cpu(pinst, cpu);
+
+	ret = padata_replace(pinst, -1);
+
+	if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu, -1) &&
+	    padata_validate_cpumask(pinst, pinst->cpumask.cbcpu, -1))
+		__padata_start(pinst);
+
 	mutex_unlock(&pinst->lock);
 	return ret;
 }
 
-static int padata_cpu_dead(unsigned int cpu, struct hlist_node *node)
+static int padata_cpu_offline(unsigned int cpu, struct hlist_node *node)
 {
 	struct padata_instance *pinst;
 	int ret;
 
-	pinst = hlist_entry_safe(node, struct padata_instance, cpu_dead_node);
+	pinst = hlist_entry_safe(node, struct padata_instance, cpuhp_node);
 	if (!pinst_has_cpu(pinst, cpu))
 		return 0;
 
 	mutex_lock(&pinst->lock);
-	ret = __padata_remove_cpu(pinst, cpu);
+
+	if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu, cpu) ||
+	    !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu, cpu))
+		__padata_stop(pinst);
+
+	ret = padata_replace(pinst, cpu);
+
 	mutex_unlock(&pinst->lock);
 	return ret;
 }
@@ -792,15 +801,14 @@ static enum cpuhp_state hp_online;
 static void __padata_free(struct padata_instance *pinst)
 {
 #ifdef CONFIG_HOTPLUG_CPU
-	cpuhp_state_remove_instance_nocalls(CPUHP_PADATA_DEAD,
-					    &pinst->cpu_dead_node);
-	cpuhp_state_remove_instance_nocalls(hp_online, &pinst->cpu_online_node);
+	cpuhp_state_remove_instance_nocalls(hp_online, &pinst->cpuhp_node);
 #endif
 
 	WARN_ON(!list_empty(&pinst->pslist));
 
 	free_cpumask_var(pinst->cpumask.pcpu);
 	free_cpumask_var(pinst->cpumask.cbcpu);
+	free_cpumask_var(pinst->validate_cpumask);
 	destroy_workqueue(pinst->serial_wq);
 	destroy_workqueue(pinst->parallel_wq);
 	kfree(pinst);
@@ -961,10 +969,10 @@ struct padata_instance *padata_alloc(const char *name)
 
 	if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
 		goto err_free_serial_wq;
-	if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
-		free_cpumask_var(pinst->cpumask.pcpu);
-		goto err_free_serial_wq;
-	}
+	if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL))
+		goto err_free_p_mask;
+	if (!alloc_cpumask_var(&pinst->validate_cpumask, GFP_KERNEL))
+		goto err_free_cb_mask;
 
 	INIT_LIST_HEAD(&pinst->pslist);
 
@@ -972,7 +980,7 @@ struct padata_instance *padata_alloc(const char *name)
 	cpumask_copy(pinst->cpumask.cbcpu, cpu_possible_mask);
 
 	if (padata_setup_cpumasks(pinst))
-		goto err_free_masks;
+		goto err_free_v_mask;
 
 	__padata_start(pinst);
 
@@ -981,18 +989,19 @@ struct padata_instance *padata_alloc(const char *name)
 
 #ifdef CONFIG_HOTPLUG_CPU
 	cpuhp_state_add_instance_nocalls_cpuslocked(hp_online,
-						    &pinst->cpu_online_node);
-	cpuhp_state_add_instance_nocalls_cpuslocked(CPUHP_PADATA_DEAD,
-						    &pinst->cpu_dead_node);
+						    &pinst->cpuhp_node);
 #endif
 
 	cpus_read_unlock();
 
 	return pinst;
 
-err_free_masks:
-	free_cpumask_var(pinst->cpumask.pcpu);
+err_free_v_mask:
+	free_cpumask_var(pinst->validate_cpumask);
+err_free_cb_mask:
 	free_cpumask_var(pinst->cpumask.cbcpu);
+err_free_p_mask:
+	free_cpumask_var(pinst->cpumask.pcpu);
 err_free_serial_wq:
 	destroy_workqueue(pinst->serial_wq);
 err_put_cpus:
@@ -1035,7 +1044,7 @@ struct padata_shell *padata_alloc_shell(struct padata_instance *pinst)
 	ps->pinst = pinst;
 
 	cpus_read_lock();
-	pd = padata_alloc_pd(ps);
+	pd = padata_alloc_pd(ps, -1);
 	cpus_read_unlock();
 
 	if (!pd)
@@ -1084,31 +1093,24 @@ void __init padata_init(void)
 	int ret;
 
 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "padata:online",
-				      padata_cpu_online, NULL);
+				      padata_cpu_online, padata_cpu_offline);
 	if (ret < 0)
 		goto err;
 	hp_online = ret;
-
-	ret = cpuhp_setup_state_multi(CPUHP_PADATA_DEAD, "padata:dead",
-				      NULL, padata_cpu_dead);
-	if (ret < 0)
-		goto remove_online_state;
 #endif
 
 	possible_cpus = num_possible_cpus();
 	padata_works = kmalloc_objs(struct padata_work, possible_cpus);
 	if (!padata_works)
-		goto remove_dead_state;
+		goto remove_online_state;
 
 	for (i = 0; i < possible_cpus; ++i)
 		list_add(&padata_works[i].pw_list, &padata_free_works);
 
 	return;
 
-remove_dead_state:
-#ifdef CONFIG_HOTPLUG_CPU
-	cpuhp_remove_multi_state(CPUHP_PADATA_DEAD);
 remove_online_state:
+#ifdef CONFIG_HOTPLUG_CPU
 	cpuhp_remove_multi_state(hp_online);
 err:
 #endif
-- 
cgit v1.2.3


From e8b83499b4cbc8b989f7cd6aaa893b669326e93c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 11 Mar 2026 22:13:45 -0700
Subject: iio: st_sensors: correct kernel-doc issues

Use the proper kernel-doc format and struct member names to avoid
kernel-doc warnings:

Warning: include/linux/iio/common/st_sensors.h:184 struct member 'int1'
 not described in 'st_sensor_data_ready_irq'
Warning: ../include/linux/iio/common/st_sensors.h:184 struct member 'int2'
 not described in 'st_sensor_data_ready_irq'
Warning: ../include/linux/iio/common/st_sensors.h:184 struct member
 'stat_drdy' not described in 'st_sensor_data_ready_irq'
Warning: ../include/linux/iio/common/st_sensors.h:184 struct member 'ig1'
 not described in 'st_sensor_data_ready_irq'
Warning: ../include/linux/iio/common/st_sensors.h:219 struct member
 'num_ch' not described in 'st_sensor_settings'
Warning: ../include/linux/iio/common/st_sensors.h:263 struct member
 'num_data_channels' not described in 'st_sensor_data'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/common/st_sensors.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index f9ae5cdd884f..1ba496f0fea5 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -160,12 +160,12 @@ struct st_sensor_int_drdy {
 
 /**
  * struct st_sensor_data_ready_irq - ST sensor device data-ready interrupt
- * struct int1 - data-ready configuration register for INT1 pin.
- * struct int2 - data-ready configuration register for INT2 pin.
+ * @int1: data-ready configuration register for INT1 pin.
+ * @int2: data-ready configuration register for INT2 pin.
  * @addr_ihl: address to enable/disable active low on the INT lines.
  * @mask_ihl: mask to enable/disable active low on the INT lines.
- * struct stat_drdy - status register of DRDY (data ready) interrupt.
- * struct ig1 - represents the Interrupt Generator 1 of sensors.
+ * @stat_drdy: status register of DRDY (data ready) interrupt.
+ * @ig1: represents the Interrupt Generator 1 of sensors.
  * @en_addr: address of the enable ig1 register.
  * @en_mask: mask to write the on/off value for enable.
  */
@@ -190,6 +190,7 @@ struct st_sensor_data_ready_irq {
  * @wai_addr: The address of WhoAmI register.
  * @sensors_supported: List of supported sensors by struct itself.
  * @ch: IIO channels for the sensor.
+ * @num_ch: Number of IIO channels in @ch
  * @odr: Output data rate register and ODR list available.
  * @pw: Power register of the sensor.
  * @enable_axis: Enable one or more axis of the sensor.
@@ -228,7 +229,7 @@ struct st_sensor_settings {
  * @regmap: Pointer to specific sensor regmap configuration.
  * @enabled: Status of the sensor (false->off, true->on).
  * @odr: Output data rate of the sensor [Hz].
- * num_data_channels: Number of data channels used in buffer.
+ * @num_data_channels: Number of data channels used in buffer.
  * @drdy_int_pin: Redirect DRDY on pin 1 (1) or pin 2 (2).
  * @int_pin_open_drain: Set the interrupt/DRDY to open drain.
  * @irq: the IRQ number.
-- 
cgit v1.2.3


From c0368933dd3d4a8210a07a0c95c471421fbf7523 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Thu, 19 Mar 2026 14:22:10 +0200
Subject: mlx5: Remove redundant iseg base

iseg_base and base_addr both point to BAR0, making iseg_base redundant.
Remove iseg_base and rely on base_addr instead, reducing the size of
struct mlx5_core_dev.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Shay Drori <shayd@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260319122211.27384-2-tariqt@nvidia.com
Reviewed-by: Joe Damato <joe@dama.to>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/mlx5/main.c                       | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c          | 3 +--
 drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c | 3 +--
 include/linux/mlx5/driver.h                             | 1 -
 4 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 9fb0629978bd..5b8987ddaa8e 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2740,7 +2740,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
 		if (PAGE_SIZE > 4096)
 			return -EOPNOTSUPP;
 
-		pfn = (dev->mdev->iseg_base +
+		pfn = (dev->mdev->bar_addr +
 		       offsetof(struct mlx5_init_seg, internal_timer_h)) >>
 			PAGE_SHIFT;
 		return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index b0bc4a7d4a93..661b211eeb95 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -950,8 +950,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
 	    pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128))
 		mlx5_core_dbg(dev, "Enabling pci atomics failed\n");
 
-	dev->iseg_base = dev->bar_addr;
-	dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
+	dev->iseg = ioremap(dev->bar_addr, sizeof(*dev->iseg));
 	if (!dev->iseg) {
 		err = -ENOMEM;
 		mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index c45540fe7d9d..4391ef0bab5d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -37,7 +37,6 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
 	mdev->device = &adev->dev;
 	mdev->pdev = sf_dev->parent_mdev->pdev;
 	mdev->bar_addr = sf_dev->bar_base_addr;
-	mdev->iseg_base = sf_dev->bar_base_addr;
 	mdev->coredev_type = MLX5_COREDEV_SF;
 	mdev->priv.parent_mdev = sf_dev->parent_mdev;
 	mdev->priv.adev_idx = adev->id;
@@ -53,7 +52,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
 		goto mdev_err;
 	}
 
-	mdev->iseg = ioremap(mdev->iseg_base, sizeof(*mdev->iseg));
+	mdev->iseg = ioremap(mdev->bar_addr, sizeof(*mdev->iseg));
 	if (!mdev->iseg) {
 		mlx5_core_warn(mdev, "remap error\n");
 		err = -ENOMEM;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 04dcd09f7517..b8b5af78284d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -755,7 +755,6 @@ struct mlx5_core_dev {
 	} caps;
 	struct mlx5_timeouts	*timeouts;
 	u64			sys_image_guid;
-	phys_addr_t		iseg_base;
 	struct mlx5_init_seg __iomem *iseg;
 	phys_addr_t             bar_addr;
 	enum mlx5_device_state	state;
-- 
cgit v1.2.3


From 26469110c750c8179560637dd813e5d65b8148d2 Mon Sep 17 00:00:00 2001
From: Patrisious Haddad <phaddad@nvidia.com>
Date: Thu, 19 Mar 2026 14:22:11 +0200
Subject: net/mlx5: Add vhca_id_type bit to alias context

Add vhca_id_type bit to alias context which allows indicating the
vhca_id_type to be passed at vhca_id_to_be_accessed, which can be either
HW or SW, note that SW_VHCA_ID must be used to allow alias to work
properly after migration.

Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260319122211.27384-3-tariqt@nvidia.com
Reviewed-by: Joe Damato <joe@dama.to>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 8fa4fb3d36cf..2400b4c38c77 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1968,7 +1968,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         reserved_at_360[0x3];
 	u8         log_max_rq[0x5];
-	u8         reserved_at_368[0x3];
+	u8         ft_alias_sw_vhca_id[0x1];
+	u8         reserved_at_369[0x2];
 	u8         log_max_sq[0x5];
 	u8         reserved_at_370[0x3];
 	u8         log_max_tir[0x5];
@@ -6957,7 +6958,9 @@ struct mlx5_ifc_create_match_definer_out_bits {
 
 struct mlx5_ifc_alias_context_bits {
 	u8 vhca_id_to_be_accessed[0x10];
-	u8 reserved_at_10[0xd];
+	u8 reserved_at_10[0xb];
+	u8 vhca_id_type[0x1];
+	u8 reserved_at_1c[0x1];
 	u8 status[0x3];
 	u8 object_id_to_be_accessed[0x20];
 	u8 reserved_at_40[0x40];
-- 
cgit v1.2.3


From 00a5d1e71c928edb1e7de211a82e87858105dd47 Mon Sep 17 00:00:00 2001
From: Tzuyi Chang <tychang@realtek.com>
Date: Tue, 17 Mar 2026 19:54:05 +0800
Subject: pinctrl: pinconf-generic: Add properties
 'input-threshold-voltage-microvolt'

Add a new generic pin configuration parameter PIN_CONFIG_INPUT_VOLTAGE_UV.
This parameter is used to specify the input voltage level of a pin in
microvolts, which corresponds to the 'input-voltage-microvolt' property
in Device Tree.

Reviewed-by: Linus Walleij <linusw@kernel.org>
Signed-off-by: Tzuyi Chang <tychang@realtek.com>
Co-developed-by: Yu-Chun Lin <eleanor.lin@realtek.com>
Signed-off-by: Yu-Chun Lin <eleanor.lin@realtek.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
---
 drivers/pinctrl/pinconf-generic.c       | 2 ++
 include/linux/pinctrl/pinconf-generic.h | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index faa03a7967ee..6daa9729dd13 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -57,6 +57,7 @@ static const struct pin_config_item conf_items[] = {
 	PCONFDUMP(PIN_CONFIG_SKEW_DELAY, "skew delay", NULL, true),
 	PCONFDUMP(PIN_CONFIG_SKEW_DELAY_INPUT_PS, "input skew delay", "ps", true),
 	PCONFDUMP(PIN_CONFIG_SKEW_DELAY_OUTPUT_PS, "output skew delay", "ps", true),
+	PCONFDUMP(PIN_CONFIG_INPUT_VOLTAGE_UV, "input voltage in microvolt", "uV", true),
 };
 
 static void pinconf_generic_dump_one(struct pinctrl_dev *pctldev,
@@ -203,6 +204,7 @@ static const struct pinconf_generic_params dt_params[] = {
 	{ "skew-delay", PIN_CONFIG_SKEW_DELAY, 0 },
 	{ "skew-delay-input-ps", PIN_CONFIG_SKEW_DELAY_INPUT_PS, 0 },
 	{ "skew-delay-output-ps", PIN_CONFIG_SKEW_DELAY_OUTPUT_PS, 0 },
+	{ "input-threshold-voltage-microvolt", PIN_CONFIG_INPUT_VOLTAGE_UV, 0 },
 };
 
 /**
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 531dc3e9b3f7..a5d4b2d8633a 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -83,6 +83,8 @@ struct pinctrl_map;
  *      schmitt-trigger mode is disabled.
  * @PIN_CONFIG_INPUT_SCHMITT_UV: this will configure an input pin to run in
  *	schmitt-trigger mode. The argument is in uV.
+ * @PIN_CONFIG_INPUT_VOLTAGE_UV: this will configure the input voltage level of
+ * the pin. The argument is specified in microvolts.
  * @PIN_CONFIG_MODE_LOW_POWER: this will configure the pin for low power
  *	operation, if several modes of operation are supported these can be
  *	passed in the argument on a custom form, else just use argument 1
@@ -145,6 +147,7 @@ enum pin_config_param {
 	PIN_CONFIG_INPUT_SCHMITT,
 	PIN_CONFIG_INPUT_SCHMITT_ENABLE,
 	PIN_CONFIG_INPUT_SCHMITT_UV,
+	PIN_CONFIG_INPUT_VOLTAGE_UV,
 	PIN_CONFIG_MODE_LOW_POWER,
 	PIN_CONFIG_MODE_PWM,
 	PIN_CONFIG_LEVEL,
-- 
cgit v1.2.3


From e43dce8a0bc09083ea1145a1a0c61d83cbe72d97 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 23 Mar 2026 08:01:44 +0100
Subject: fs: fix archiecture-specific compat_ftruncate64

The "small" argument to do_sys_ftruncate indicates if > 32-bit size
should be reject, but all the arch-specific compat ftruncate64
implementations get this wrong.  Merge do_sys_ftruncate and
ksys_ftruncate, replace the integer as boolean small flag with a
descriptive one about LFS semantics, and use it correctly in the
architecture-specific ftruncate64 implementations.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Fixes: 3dd681d944f6 ("arm64: 32-bit (compat) applications support")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260323070205.2939118-2-hch@lst.de
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 arch/arm64/kernel/sys32.c       |  2 +-
 arch/mips/kernel/linux32.c      |  2 +-
 arch/parisc/kernel/sys_parisc.c |  4 ++--
 arch/powerpc/kernel/sys_ppc32.c |  2 +-
 arch/sparc/kernel/sys_sparc32.c |  2 +-
 arch/x86/kernel/sys_ia32.c      |  3 ++-
 fs/internal.h                   |  1 -
 fs/open.c                       | 12 ++++++------
 include/linux/syscalls.h        |  8 ++------
 9 files changed, 16 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c
index 96bcfb907443..12a948f3a504 100644
--- a/arch/arm64/kernel/sys32.c
+++ b/arch/arm64/kernel/sys32.c
@@ -89,7 +89,7 @@ COMPAT_SYSCALL_DEFINE4(aarch32_truncate64, const char __user *, pathname,
 COMPAT_SYSCALL_DEFINE4(aarch32_ftruncate64, unsigned int, fd, u32, __pad,
 		       arg_u32p(length))
 {
-	return ksys_ftruncate(fd, arg_u64(length));
+	return ksys_ftruncate(fd, arg_u64(length), FTRUNCATE_LFS);
 }
 
 COMPAT_SYSCALL_DEFINE5(aarch32_readahead, int, fd, u32, __pad,
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index a0c0a7a654e9..fe9a787db569 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -60,7 +60,7 @@ SYSCALL_DEFINE4(32_truncate64, const char __user *, path,
 SYSCALL_DEFINE4(32_ftruncate64, unsigned long, fd, unsigned long, __dummy,
 	unsigned long, a2, unsigned long, a3)
 {
-	return ksys_ftruncate(fd, merge_64(a2, a3));
+	return ksys_ftruncate(fd, merge_64(a2, a3), FTRUNCATE_LFS);
 }
 
 SYSCALL_DEFINE5(32_llseek, unsigned int, fd, unsigned int, offset_high,
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index b2cdbb8a12b1..fcb0d8069139 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -216,7 +216,7 @@ asmlinkage long parisc_truncate64(const char __user * path,
 asmlinkage long parisc_ftruncate64(unsigned int fd,
 					unsigned int high, unsigned int low)
 {
-	return ksys_ftruncate(fd, (long)high << 32 | low);
+	return ksys_ftruncate(fd, (long)high << 32 | low, FTRUNCATE_LFS);
 }
 
 /* stubs for the benefit of the syscall_table since truncate64 and truncate 
@@ -227,7 +227,7 @@ asmlinkage long sys_truncate64(const char __user * path, unsigned long length)
 }
 asmlinkage long sys_ftruncate64(unsigned int fd, unsigned long length)
 {
-	return ksys_ftruncate(fd, length);
+	return ksys_ftruncate(fd, length, FTRUNCATE_LFS);
 }
 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index d451a8229223..03fa487f2614 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -101,7 +101,7 @@ PPC32_SYSCALL_DEFINE4(ppc_ftruncate64,
 		       unsigned int, fd, u32, reg4,
 		       unsigned long, len1, unsigned long, len2)
 {
-	return ksys_ftruncate(fd, merge_64(len1, len2));
+	return ksys_ftruncate(fd, merge_64(len1, len2), FTRUNCATE_LFS);
 }
 
 PPC32_SYSCALL_DEFINE6(ppc32_fadvise64,
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index f84a02ab6bf9..04432b82b9e3 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -58,7 +58,7 @@ COMPAT_SYSCALL_DEFINE3(truncate64, const char __user *, path, u32, high, u32, lo
 
 COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd, u32, high, u32, low)
 {
-	return ksys_ftruncate(fd, ((u64)high << 32) | low);
+	return ksys_ftruncate(fd, ((u64)high << 32) | low, FTRUNCATE_LFS);
 }
 
 static int cp_compat_stat64(struct kstat *stat,
diff --git a/arch/x86/kernel/sys_ia32.c b/arch/x86/kernel/sys_ia32.c
index 6cf65397d225..610a1c2f4519 100644
--- a/arch/x86/kernel/sys_ia32.c
+++ b/arch/x86/kernel/sys_ia32.c
@@ -61,7 +61,8 @@ SYSCALL_DEFINE3(ia32_truncate64, const char __user *, filename,
 SYSCALL_DEFINE3(ia32_ftruncate64, unsigned int, fd,
 		unsigned long, offset_low, unsigned long, offset_high)
 {
-	return ksys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
+	return ksys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low,
+			FTRUNCATE_LFS);
 }
 
 /* warning: next two assume little endian */
diff --git a/fs/internal.h b/fs/internal.h
index cbc384a1aa09..2663823e273a 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -199,7 +199,6 @@ extern int build_open_flags(const struct open_how *how, struct open_flags *op);
 struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);
 
 int do_ftruncate(struct file *file, loff_t length, int small);
-int do_sys_ftruncate(unsigned int fd, loff_t length, int small);
 int chmod_common(const struct path *path, umode_t mode);
 int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
 		int flag);
diff --git a/fs/open.c b/fs/open.c
index 91f1139591ab..412d0d6fbaa7 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -197,7 +197,7 @@ int do_ftruncate(struct file *file, loff_t length, int small)
 				   ATTR_MTIME | ATTR_CTIME, file);
 }
 
-int do_sys_ftruncate(unsigned int fd, loff_t length, int small)
+int ksys_ftruncate(unsigned int fd, loff_t length, unsigned int flags)
 {
 	if (length < 0)
 		return -EINVAL;
@@ -205,18 +205,18 @@ int do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 	if (fd_empty(f))
 		return -EBADF;
 
-	return do_ftruncate(fd_file(f), length, small);
+	return do_ftruncate(fd_file(f), length, !(flags & FTRUNCATE_LFS));
 }
 
 SYSCALL_DEFINE2(ftruncate, unsigned int, fd, off_t, length)
 {
-	return do_sys_ftruncate(fd, length, 1);
+	return ksys_ftruncate(fd, length, 0);
 }
 
 #ifdef CONFIG_COMPAT
 COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_off_t, length)
 {
-	return do_sys_ftruncate(fd, length, 1);
+	return ksys_ftruncate(fd, length, 0);
 }
 #endif
 
@@ -229,7 +229,7 @@ SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length)
 
 SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length)
 {
-	return do_sys_ftruncate(fd, length, 0);
+	return ksys_ftruncate(fd, length, FTRUNCATE_LFS);
 }
 #endif /* BITS_PER_LONG == 32 */
 
@@ -245,7 +245,7 @@ COMPAT_SYSCALL_DEFINE3(truncate64, const char __user *, pathname,
 COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd,
 		       compat_arg_u64_dual(length))
 {
-	return ksys_ftruncate(fd, compat_arg_u64_glue(length));
+	return ksys_ftruncate(fd, compat_arg_u64_glue(length), FTRUNCATE_LFS);
 }
 #endif
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 02bd6ddb6278..8787b3511c86 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1283,12 +1283,8 @@ static inline long ksys_lchown(const char __user *filename, uid_t user,
 			     AT_SYMLINK_NOFOLLOW);
 }
 
-int do_sys_ftruncate(unsigned int fd, loff_t length, int small);
-
-static inline long ksys_ftruncate(unsigned int fd, loff_t length)
-{
-	return do_sys_ftruncate(fd, length, 1);
-}
+#define FTRUNCATE_LFS	(1u << 0)	/* allow truncating > 32-bit */
+int ksys_ftruncate(unsigned int fd, loff_t length, unsigned int flags);
 
 int do_sys_truncate(const char __user *pathname, loff_t length);
 
-- 
cgit v1.2.3


From e8767a3134ca69a307b37f7f58ca088dbee6eb82 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 23 Mar 2026 08:01:46 +0100
Subject: fs: remove do_sys_truncate

do_sys_truncate ist only used to implement ksys_truncate and the native
truncate syscalls.  Merge do_sys_truncate into ksys_truncate and return
int from it as it only returns 0 or negative errnos.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260323070205.2939118-4-hch@lst.de
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/open.c                | 8 ++++----
 include/linux/syscalls.h | 8 +-------
 2 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/open.c b/fs/open.c
index 181c1597e73c..681d405bc61e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -126,7 +126,7 @@ mnt_drop_write_and_out:
 }
 EXPORT_SYMBOL_GPL(vfs_truncate);
 
-int do_sys_truncate(const char __user *pathname, loff_t length)
+int ksys_truncate(const char __user *pathname, loff_t length)
 {
 	unsigned int lookup_flags = LOOKUP_FOLLOW;
 	struct path path;
@@ -151,13 +151,13 @@ retry:
 
 SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
 {
-	return do_sys_truncate(path, length);
+	return ksys_truncate(path, length);
 }
 
 #ifdef CONFIG_COMPAT
 COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length)
 {
-	return do_sys_truncate(path, length);
+	return ksys_truncate(path, length);
 }
 #endif
 
@@ -222,7 +222,7 @@ COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_off_t, length)
 #if BITS_PER_LONG == 32
 SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length)
 {
-	return do_sys_truncate(path, length);
+	return ksys_truncate(path, length);
 }
 
 SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 8787b3511c86..f5639d5ac331 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1285,13 +1285,7 @@ static inline long ksys_lchown(const char __user *filename, uid_t user,
 
 #define FTRUNCATE_LFS	(1u << 0)	/* allow truncating > 32-bit */
 int ksys_ftruncate(unsigned int fd, loff_t length, unsigned int flags);
-
-int do_sys_truncate(const char __user *pathname, loff_t length);
-
-static inline long ksys_truncate(const char __user *pathname, loff_t length)
-{
-	return do_sys_truncate(pathname, length);
-}
+int ksys_truncate(const char __user *pathname, loff_t length);
 
 static inline unsigned int ksys_personality(unsigned int personality)
 {
-- 
cgit v1.2.3


From a676643709115816d3ce7e50aa5b5a4af1ee6c45 Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@nvidia.com>
Date: Wed, 25 Feb 2026 18:58:32 -0500
Subject: bitmap: drop __find_nth_andnot_bit()

Remove find_nth_andnot_bit() leftovers.

CC: Rasmus Villemoes <linux@rasmusvillemoes.dk>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Fixes: b0c85e99458a ("cpumask: Remove unnecessary cpumask_nth_andnot()")
Signed-off-by: Yury Norov <ynorov@nvidia.com>
---
 include/linux/find.h | 2 --
 lib/find_bit.c       | 7 -------
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/find.h b/include/linux/find.h
index 9d720ad92bc1..6c2be8ca615d 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -22,8 +22,6 @@ extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long si
 unsigned long __find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n);
 unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2,
 				unsigned long size, unsigned long n);
-unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
-					unsigned long size, unsigned long n);
 unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
 					const unsigned long *addr3, unsigned long size,
 					unsigned long n);
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 5a0066c26d9a..5ac52dfce730 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -172,13 +172,6 @@ unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long
 }
 EXPORT_SYMBOL(__find_nth_and_bit);
 
-unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
-				 unsigned long size, unsigned long n)
-{
-	return FIND_NTH_BIT(addr1[idx] & ~addr2[idx], size, n);
-}
-EXPORT_SYMBOL(__find_nth_andnot_bit);
-
 unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1,
 					const unsigned long *addr2,
 					const unsigned long *addr3,
-- 
cgit v1.2.3


From bf31ddc14f8c6bcd4987c31fe2bc9e93e433b41a Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@nvidia.com>
Date: Mon, 22 Dec 2025 14:11:37 -0500
Subject: bitmap: add bitmap_weight_from()

The function calculates a Hamming weight of a bitmap starting from an
arbitrary bit.

Signed-off-by: Yury Norov <ynorov@nvidia.com>
---
 include/linux/bitmap.h | 33 +++++++++++++++++++++++++++++++++
 lib/test_bitmap.c      | 26 ++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index b0395e4ccf90..9c0d1de44350 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -57,6 +57,7 @@ struct device;
  *  bitmap_weight(src, nbits)                   Hamming Weight: number set bits
  *  bitmap_weight_and(src1, src2, nbits)        Hamming Weight of and'ed bitmap
  *  bitmap_weight_andnot(src1, src2, nbits)     Hamming Weight of andnot'ed bitmap
+ *  bitmap_weight_from(src, start, end)         Hamming Weight starting from @start
  *  bitmap_set(dst, pos, nbits)                 Set specified bit area
  *  bitmap_clear(dst, pos, nbits)               Clear specified bit area
  *  bitmap_find_next_zero_area(buf, len, pos, n, mask)  Find bit free area
@@ -479,6 +480,38 @@ unsigned long bitmap_weight_andnot(const unsigned long *src1,
 	return __bitmap_weight_andnot(src1, src2, nbits);
 }
 
+/**
+ * bitmap_weight_from - Hamming weight for a memory region
+ * @bitmap: The base address
+ * @start: The bitnumber to starts weighting
+ * @end: the bitmap size in bits
+ *
+ * Returns the number of set bits in the region. If @start >= @end,
+ * return >= end.
+ */
+static __always_inline
+unsigned long bitmap_weight_from(const unsigned long *bitmap,
+				   unsigned int start, unsigned int end)
+{
+	unsigned long w;
+
+	if (unlikely(start >= end))
+		return end;
+
+	if (small_const_nbits(end))
+		return hweight_long(*bitmap & GENMASK(end - 1, start));
+
+	bitmap += start / BITS_PER_LONG;
+	/* Opencode round_down() to not include math.h */
+	end -= start & ~(BITS_PER_LONG - 1);
+	start %= BITS_PER_LONG;
+	w = bitmap_weight(bitmap, end);
+	if (start)
+		w -= hweight_long(*bitmap & BITMAP_LAST_WORD_MASK(start));
+
+	return w;
+}
+
 static __always_inline
 void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits)
 {
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 1c352c1edfa5..cd4cb36e42a5 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -854,6 +854,31 @@ static void __init test_for_each_set_bit_from(void)
 	}
 }
 
+static void __init test_bitmap_weight(void)
+{
+	unsigned int bit, w1, w2, w;
+	DECLARE_BITMAP(b, 30);
+
+	bitmap_parselist("all:1/2", b, 30);
+
+	/* Test inline implementation */
+	w = bitmap_weight(b, 30);
+	w1 = bitmap_weight(b, 15);
+	w2 = bitmap_weight_from(b, 15, 30);
+
+	expect_eq_uint(15, w);
+	expect_eq_uint(8, w1);
+	expect_eq_uint(7, w2);
+
+	/* Test outline implementation */
+	w = bitmap_weight(exp1, EXP1_IN_BITS);
+	for (bit = 0; bit < EXP1_IN_BITS; bit++) {
+		w1 = bitmap_weight(exp1, bit);
+		w2 = bitmap_weight_from(exp1, bit, EXP1_IN_BITS);
+		expect_eq_uint(w1 + w2, w);
+	}
+}
+
 static void __init test_for_each_clear_bit(void)
 {
 	DECLARE_BITMAP(orig, 500);
@@ -1444,6 +1469,7 @@ static void __init selftest(void)
 	test_bitmap_const_eval();
 	test_bitmap_read_write();
 	test_bitmap_read_perf();
+	test_bitmap_weight();
 	test_bitmap_write_perf();
 
 	test_find_nth_bit();
-- 
cgit v1.2.3


From 18c48899653fa7a04120537c228031b5c7e4e9d6 Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@nvidia.com>
Date: Tue, 10 Mar 2026 16:53:02 -0400
Subject: lib: crypto: fix comments for count_leading_zeros()

count_leading_zeros() is based on fls(), which is defined for x == 0,
contrary to __ffs() family. The comment in crypto/mpi erroneously
states that the function may return undef in such case.

Fix the comment together with the outdated function signature, and now
that COUNT_LEADING_ZEROS_0 is not referenced in the codebase, get rid of
it too.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Yury Norov <ynorov@nvidia.com>
---
 include/linux/count_zeros.h | 4 +---
 lib/crypto/mpi/longlong.h   | 8 ++++----
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/count_zeros.h b/include/linux/count_zeros.h
index 5b8ff5ac660d..4e5680327ece 100644
--- a/include/linux/count_zeros.h
+++ b/include/linux/count_zeros.h
@@ -18,7 +18,7 @@
  *
  * If the MSB of @x is set, the result is 0.
  * If only the LSB of @x is set, then the result is BITS_PER_LONG-1.
- * If @x is 0 then the result is COUNT_LEADING_ZEROS_0.
+ * If @x is 0 then the result is BITS_PER_LONG.
  */
 static inline int count_leading_zeros(unsigned long x)
 {
@@ -28,8 +28,6 @@ static inline int count_leading_zeros(unsigned long x)
 		return BITS_PER_LONG - fls64(x);
 }
 
-#define COUNT_LEADING_ZEROS_0 BITS_PER_LONG
-
 /**
  * count_trailing_zeros - Count the number of zeros from the LSB forwards
  * @x: The value
diff --git a/lib/crypto/mpi/longlong.h b/lib/crypto/mpi/longlong.h
index b6fa1d08fb55..6d31c3a729f1 100644
--- a/lib/crypto/mpi/longlong.h
+++ b/lib/crypto/mpi/longlong.h
@@ -66,12 +66,12 @@
  * denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
  * is rounded towards 0.
  *
- * 5) count_leading_zeros(count, x) counts the number of zero-bits from the
+ * 5) count_leading_zeros(x) counts the number of zero-bits from the
  * msb to the first non-zero bit in the UWtype X.  This is the number of
- * steps X needs to be shifted left to set the msb.  Undefined for X == 0,
- * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+ * steps X needs to be shifted left to set the msb.
+ * count_leading_zeros(0) == BITS_PER_LONG
  *
- * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+ * 6) count_trailing_zeros() like count_leading_zeros(), but counts
  * from the least significant end.
  *
  * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
-- 
cgit v1.2.3


From 4e64c91b813f666dffc3962a815a8a50b7d6f468 Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@nvidia.com>
Date: Thu, 12 Mar 2026 19:08:16 -0400
Subject: lib: count_zeros: fix 32/64-bit inconsistency in
 count_trailing_zeros()

Based on 'sizeof(x) == 4' condition, in 32-bit case the function is wired
to ffs(), while in 64-bit case to __ffs(). The difference is substantial:
ffs(x) == __ffs(x) + 1. Also, ffs(0) == 0, while __ffs(0) is undefined.

The 32-bit behaviour is inconsistent with the function description, so it
needs to get fixed.

There are 9 individual users for the function in 6 different subsystems.
Some arches and drivers are 64-bit only:
 - arch/loongarch/kvm/intc/eiointc.c;
 - drivers/hv/mshv_vtl_main.c;
 - kernel/liveupdate/kexec_handover.c;

The others are:
 - ib_umem_find_best_pgsz(): as per comment, __ffs() should be correct;
 - rzv2m_csi_reg_write_bit(): ARCH_RENESAS only, unclear;
 - lz77_match_len(): CIFS_COMPRESSION only, unclear, experimental;

IB and CIFS are explicitly OK with the change.

The attached patch gets rid of 32-bit explicit support, so that both
32- and 64-bit versions rely on __ffs().

CC: K. Y. Srinivasan <kys@microsoft.com>
CC: Haiyang Zhang <haiyangz@microsoft.com>
CC: Mark Brown <broonie@kernel.org>
CC: Steve French <sfrench@samba.org>
CC: Alexander Graf <graf@amazon.com>
CC: Mike Rapoport <rppt@kernel.org>
CC: Pasha Tatashin <pasha.tatashin@soleen.com>
Acked-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Yury Norov <ynorov@nvidia.com>
---
 include/linux/count_zeros.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/count_zeros.h b/include/linux/count_zeros.h
index 4e5680327ece..5034a30b5c7c 100644
--- a/include/linux/count_zeros.h
+++ b/include/linux/count_zeros.h
@@ -10,6 +10,8 @@
 
 #include <asm/bitops.h>
 
+#define COUNT_TRAILING_ZEROS_0 (-1)
+
 /**
  * count_leading_zeros - Count the number of zeros from the MSB back
  * @x: The value
@@ -40,12 +42,7 @@ static inline int count_leading_zeros(unsigned long x)
  */
 static inline int count_trailing_zeros(unsigned long x)
 {
-#define COUNT_TRAILING_ZEROS_0 (-1)
-
-	if (sizeof(x) == 4)
-		return ffs(x);
-	else
-		return (x != 0) ? __ffs(x) : COUNT_TRAILING_ZEROS_0;
+	return (x != 0) ? __ffs(x) : COUNT_TRAILING_ZEROS_0;
 }
 
 #endif /* _LINUX_BITOPS_COUNT_ZEROS_H_ */
-- 
cgit v1.2.3


From be56db15fcce8bb8bd85f236382276d52ce73d08 Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@nvidia.com>
Date: Mon, 23 Mar 2026 12:17:12 -0400
Subject: lib: count_zeros: unify count_{leading,trailing}_zeros()

The 'leading' helper returns BITS_PER_LONG if x == 0, while 'trailing'
one returns COUNT_TRAILING_ZEROS_0, which turns to be -1.

None of the current users explicitly check the returned value for
COUNT_TRAILING_ZEROS_0, except the loongarch, which tests implicitly
for the '>= 0'.

So, align count_trailing_zeros() with the count_leading_zeros(), and
simplify the loongarch handling.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Signed-off-by: Yury Norov <ynorov@nvidia.com>
---
 arch/loongarch/kvm/intc/eiointc.c | 4 ++--
 include/linux/count_zeros.h       | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c
index d2acb4d09e73..3b5268116727 100644
--- a/arch/loongarch/kvm/intc/eiointc.c
+++ b/arch/loongarch/kvm/intc/eiointc.c
@@ -16,7 +16,7 @@ static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s)
 		ipnum = (s->ipmap >> (irq / 32 * 8)) & 0xff;
 		if (!(s->status & BIT(EIOINTC_ENABLE_INT_ENCODE))) {
 			ipnum = count_trailing_zeros(ipnum);
-			ipnum = (ipnum >= 0 && ipnum < 4) ? ipnum : 0;
+			ipnum = ipnum < 4 ? ipnum : 0;
 		}
 
 		cpuid = ((u8 *)s->coremap)[irq];
@@ -41,7 +41,7 @@ static void eiointc_update_irq(struct loongarch_eiointc *s, int irq, int level)
 	ipnum = (s->ipmap >> (irq / 32 * 8)) & 0xff;
 	if (!(s->status & BIT(EIOINTC_ENABLE_INT_ENCODE))) {
 		ipnum = count_trailing_zeros(ipnum);
-		ipnum = (ipnum >= 0 && ipnum < 4) ? ipnum : 0;
+		ipnum = ipnum < 4 ? ipnum : 0;
 	}
 
 	cpu = s->sw_coremap[irq];
diff --git a/include/linux/count_zeros.h b/include/linux/count_zeros.h
index 5034a30b5c7c..b72ba3faa036 100644
--- a/include/linux/count_zeros.h
+++ b/include/linux/count_zeros.h
@@ -10,8 +10,6 @@
 
 #include <asm/bitops.h>
 
-#define COUNT_TRAILING_ZEROS_0 (-1)
-
 /**
  * count_leading_zeros - Count the number of zeros from the MSB back
  * @x: The value
@@ -38,11 +36,11 @@ static inline int count_leading_zeros(unsigned long x)
  *
  * If the LSB of @x is set, the result is 0.
  * If only the MSB of @x is set, then the result is BITS_PER_LONG-1.
- * If @x is 0 then the result is COUNT_TRAILING_ZEROS_0.
+ * If @x is 0 then the result is BITS_PER_LONG.
  */
 static inline int count_trailing_zeros(unsigned long x)
 {
-	return (x != 0) ? __ffs(x) : COUNT_TRAILING_ZEROS_0;
+	return x ? __ffs(x) : BITS_PER_LONG;
 }
 
 #endif /* _LINUX_BITOPS_COUNT_ZEROS_H_ */
-- 
cgit v1.2.3


From 7b52b262f8a8cd96dac33721389a884420c18365 Mon Sep 17 00:00:00 2001
From: Kit Dallege <xaum.io@gmail.com>
Date: Sun, 15 Mar 2026 16:34:14 +0100
Subject: bitops: fix kernel-doc parameter name for parity8()

The kernel-doc comment for parity8() documents the parameter as @value
but the actual parameter name is @val. Fix the mismatch.

Assisted-by: Claude <noreply@anthropic.com>
Signed-off-by: Kit Dallege <xaum.io@gmail.com>
Signed-off-by: Yury Norov <ynorov@nvidia.com>
---
 include/linux/bitops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index ea7898cc5903..1fe46703792f 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -230,7 +230,7 @@ static inline int get_count_order_long(unsigned long l)
 
 /**
  * parity8 - get the parity of an u8 value
- * @value: the value to be examined
+ * @val: the value to be examined
  *
  * Determine the parity of the u8 argument.
  *
-- 
cgit v1.2.3


From 473e470f16f98569d59adc11c4a318780fb68fe9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 3 Feb 2026 17:45:29 +0100
Subject: tracing: move __printf() attribute on __ftrace_vbprintk()

The sunrpc change to use trace_printk() for debugging caused
a new warning for every instance of dprintk() in some configurations,
when -Wformat-security is enabled:

fs/nfs/getroot.c: In function 'nfs_get_root':
fs/nfs/getroot.c:90:17: error: format not a string literal and no format arguments [-Werror=format-security]
   90 |                 nfs_errorf(fc, "NFS: Couldn't getattr on root");

I've been slowly chipping away at those warnings over time with the
intention of enabling them by default in the future. While I could not
figure out why this only happens for this one instance, I see that the
__trace_bprintk() function is always called with a local variable as
the format string, rather than a literal.

Move the __printf(2,3) annotation on this function from the declaration
to the caller. As this is can only be validated for literals, the
attribute on the declaration causes the warnings every time, but
removing it entirely introduces a new warning on the __ftrace_vbprintk()
definition.

The format strings still get checked because the underlying literal keeps
getting passed into __trace_printk() in the "else" branch, which is not
taken but still evaluated for compile-time warnings.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yury Norov <ynorov@nvidia.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260203164545.3174910-1-arnd@kernel.org
Fixes: ec7d8e68ef0e ("sunrpc: add a Kconfig option to redirect dfprintk() output to trace buffer")
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_printk.h | 1 -
 kernel/trace/trace_printk.c  | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_printk.h b/include/linux/trace_printk.h
index bb5874097f24..2670ec7f4262 100644
--- a/include/linux/trace_printk.h
+++ b/include/linux/trace_printk.h
@@ -107,7 +107,6 @@ do {									\
 		__trace_printk(_THIS_IP_, fmt, ##args);			\
 } while (0)
 
-extern __printf(2, 3)
 int __trace_bprintk(unsigned long ip, const char *fmt, ...);
 
 extern __printf(2, 3)
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 5ea5e0d76f00..3ea17af60169 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -197,6 +197,7 @@ struct notifier_block module_trace_bprintk_format_nb = {
 	.notifier_call = module_trace_bprintk_format_notify,
 };
 
+__printf(2, 3)
 int __trace_bprintk(unsigned long ip, const char *fmt, ...)
 {
 	int ret;
-- 
cgit v1.2.3


From 9a475dc71c38d6abc42ba722ace4a72372876d91 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 18 Mar 2026 16:06:11 +0000
Subject: net: stmmac: move default_an_inband to plat_stmmacenet_data

Move the default_an_inband flag from struct mdio_bus_data to struct
plat_stmmacenet_data. This is to allow platforms that do not use the
integrated MDIO bus to enable inband mode.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1w2tPP-0000000DYAX-0TKw@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 4 ++--
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +----
 include/linux/stmmac.h                            | 2 +-
 3 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index f621077c30a4..7898b5075a8b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -298,7 +298,7 @@ static void tgl_get_interfaces(struct stmmac_priv *priv, void *bsp_priv,
 
 	if (FIELD_GET(SERDES_LINK_MODE_MASK, data) == SERDES_LINK_MODE_2G5) {
 		dev_info(priv->device, "Link Speed Mode: 2.5Gbps\n");
-		priv->plat->mdio_bus_data->default_an_inband = false;
+		priv->plat->default_an_inband = false;
 		interface = PHY_INTERFACE_MODE_2500BASEX;
 	} else {
 		interface = PHY_INTERFACE_MODE_SGMII;
@@ -700,7 +700,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII ||
 	    plat->phy_interface == PHY_INTERFACE_MODE_1000BASEX) {
 		plat->mdio_bus_data->pcs_mask = BIT_U32(INTEL_MGBE_XPCS_ADDR);
-		plat->mdio_bus_data->default_an_inband = true;
+		plat->default_an_inband = true;
 		plat->select_pcs = intel_mgbe_select_pcs;
 	}
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 5062537f79e9..bd0f5d487e0f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1390,7 +1390,6 @@ static int stmmac_init_phy(struct net_device *dev)
 
 static int stmmac_phylink_setup(struct stmmac_priv *priv)
 {
-	struct stmmac_mdio_bus_data *mdio_bus_data;
 	struct phylink_config *config;
 	struct phylink_pcs *pcs;
 	struct phylink *phylink;
@@ -1415,9 +1414,7 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv)
 	priv->tx_lpi_clk_stop = priv->plat->flags &
 				STMMAC_FLAG_EN_TX_LPI_CLOCKGATING;
 
-	mdio_bus_data = priv->plat->mdio_bus_data;
-	if (mdio_bus_data)
-		config->default_an_inband = mdio_bus_data->default_an_inband;
+	config->default_an_inband = priv->plat->default_an_inband;
 
 	/* Get the PHY interface modes (at the PHY end of the link) that
 	 * are supported by the platform.
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 72febd246bdb..565bb394b194 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -89,7 +89,6 @@ struct stmmac_mdio_bus_data {
 	int *irqs;
 	int probed_phy_irq;
 	bool needs_reset;
-	bool default_an_inband;
 };
 
 struct stmmac_dma_cfg {
@@ -250,6 +249,7 @@ struct plat_stmmacenet_data {
 	struct stmmac_dma_cfg *dma_cfg;
 	struct stmmac_safety_feature_cfg *safety_feat_cfg;
 	int clk_csr;
+	bool default_an_inband;
 	bool enh_desc;
 	bool tx_coe;
 	bool bugged_jumbo;
-- 
cgit v1.2.3


From 68cff4fff61fb69ef7bb1f6302d4766822a395cc Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 18 Mar 2026 16:06:26 +0000
Subject: net: stmmac: add BASE-X support to integrated PCS

The integrated PCS supports 802.3z (BASE-X) modes when the Synopsys
IP is coupled with an appropriate SerDes to provide the electrical
interface. The PCS presents a TBI interface to the SerDes for this.
Thus, the BASE-X related registers are only present when TBI mode is
supported.

dwmac-qcom-ethqos added support for using 2.5G with the integrated PCS
by calling dwmac_ctrl_ane() directly.

Add support for the following to the integrated PCS:
- 1000BASE-X protocol unconditionally.
- 2500BASE-X if the coupled SerDes supports 2.5G speed.
- The above without autonegotiation.
- If the PCS supports TBI, then optional BASE-X autonegotiation for each
  of the above.

Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1w2tPe-0000000DYAp-1qpV@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c | 103 ++++++++++++++++++++++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h |   1 +
 include/linux/stmmac.h                           |   1 +
 3 files changed, 101 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c
index df72f7c5a6a7..df37af5ab837 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c
@@ -26,6 +26,27 @@
 #define GMAC_RGSMII_SPEED_2_5		0
 #define GMAC_RGSMII_LNKSTS		BIT(3)
 
+static unsigned int dwmac_integrated_pcs_inband_caps(struct phylink_pcs *pcs,
+						     phy_interface_t interface)
+{
+	struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs);
+	unsigned int ib_caps;
+
+	if (phy_interface_mode_is_8023z(interface)) {
+		ib_caps = LINK_INBAND_DISABLE;
+
+		/* If the PCS supports TBI/RTBI, then BASE-X negotiation is
+		 * supported.
+		 */
+		if (spcs->support_tbi_rtbi)
+			ib_caps |= LINK_INBAND_ENABLE;
+
+		return ib_caps;
+	}
+
+	return 0;
+}
+
 static int dwmac_integrated_pcs_enable(struct phylink_pcs *pcs)
 {
 	struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs);
@@ -47,12 +68,20 @@ static void dwmac_integrated_pcs_get_state(struct phylink_pcs *pcs,
 					   struct phylink_link_state *state)
 {
 	struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs);
-	u32 status, rgsmii;
+	u32 status, lpa, rgsmii;
 
 	status = readl(spcs->base + GMAC_AN_STATUS);
 
 	if (phy_interface_mode_is_8023z(state->interface)) {
-		state->link = false;
+		/* For BASE-X modes, the PCS block supports the advertisement
+		 * and link partner advertisement registers using standard
+		 * 802.3 format. The status register also has the link status
+		 * and AN complete bits in the same bit location. This will
+		 * only be used when AN is enabled.
+		 */
+		lpa = readl(spcs->base + GMAC_ANE_LPA);
+
+		phylink_mii_c22_pcs_decode_state(state, neg_mode, status, lpa);
 	} else {
 		rgsmii = field_get(spcs->rgsmii_status_mask,
 				   readl(spcs->rgsmii));
@@ -84,6 +113,21 @@ static void dwmac_integrated_pcs_get_state(struct phylink_pcs *pcs,
 	}
 }
 
+static int dwmac_integrated_pcs_config_aneg(struct stmmac_pcs *spcs,
+					    phy_interface_t interface,
+					    const unsigned long *advertising)
+{
+	bool changed = false;
+	u32 adv;
+
+	adv = phylink_mii_c22_pcs_encode_advertisement(interface, advertising);
+	if (readl(spcs->base + GMAC_ANE_ADV) != adv)
+		changed = true;
+	writel(adv, spcs->base + GMAC_ANE_ADV);
+
+	return changed;
+}
+
 static int dwmac_integrated_pcs_config(struct phylink_pcs *pcs,
 				       unsigned int neg_mode,
 				       phy_interface_t interface,
@@ -91,17 +135,46 @@ static int dwmac_integrated_pcs_config(struct phylink_pcs *pcs,
 				       bool permit_pause_to_mac)
 {
 	struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs);
+	bool changed = false, ane = true;
+
+	/* Only configure the advertisement and allow AN in BASE-X mode if
+	 * the core supports TBI/RTBI. AN will be filtered out by via phylink
+	 * and the .pcs_inband_caps() method above.
+	 */
+	if (phy_interface_mode_is_8023z(interface) &&
+	    spcs->support_tbi_rtbi) {
+		ane = neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED;
+
+		changed = dwmac_integrated_pcs_config_aneg(spcs, interface,
+							   advertising);
+	}
 
-	dwmac_ctrl_ane(spcs->base, 0, 1, spcs->priv->hw->reverse_sgmii_enable);
+	dwmac_ctrl_ane(spcs->base, 0, ane,
+		       spcs->priv->hw->reverse_sgmii_enable);
 
-	return 0;
+	return changed;
+}
+
+static void dwmac_integrated_pcs_an_restart(struct phylink_pcs *pcs)
+{
+	struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs);
+	void __iomem *an_control = spcs->base + GMAC_AN_CTRL(0);
+	u32 ctrl;
+
+	/* We can only do AN restart if using TBI/RTBI mode */
+	if (spcs->support_tbi_rtbi) {
+		ctrl = readl(an_control) | GMAC_AN_CTRL_RAN;
+		writel(ctrl, an_control);
+	}
 }
 
 static const struct phylink_pcs_ops dwmac_integrated_pcs_ops = {
+	.pcs_inband_caps = dwmac_integrated_pcs_inband_caps,
 	.pcs_enable = dwmac_integrated_pcs_enable,
 	.pcs_disable = dwmac_integrated_pcs_disable,
 	.pcs_get_state = dwmac_integrated_pcs_get_state,
 	.pcs_config = dwmac_integrated_pcs_config,
+	.pcs_an_restart = dwmac_integrated_pcs_an_restart,
 };
 
 void stmmac_integrated_pcs_irq(struct stmmac_priv *priv, u32 status,
@@ -129,9 +202,18 @@ void stmmac_integrated_pcs_irq(struct stmmac_priv *priv, u32 status,
 int stmmac_integrated_pcs_get_phy_intf_sel(struct phylink_pcs *pcs,
 					   phy_interface_t interface)
 {
+	struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs);
+
 	if (interface == PHY_INTERFACE_MODE_SGMII)
 		return PHY_INTF_SEL_SGMII;
 
+	if (phy_interface_mode_is_8023z(interface)) {
+		if (spcs->support_tbi_rtbi)
+			return PHY_INTF_SEL_TBI;
+		else
+			return PHY_INTF_SEL_SGMII;
+	}
+
 	return -EINVAL;
 }
 
@@ -151,7 +233,20 @@ int stmmac_integrated_pcs_init(struct stmmac_priv *priv,
 	spcs->int_mask = pcs_info->int_mask;
 	spcs->pcs.ops = &dwmac_integrated_pcs_ops;
 
+	/* If the PCS supports extended status, then it supports BASE-X AN
+	 * with a TBI interface to the SerDes. Otherwise, we can support
+	 * BASE-X without AN using SGMII, which is required for qcom-ethqos.
+	 */
+	if (readl(spcs->base + GMAC_AN_STATUS) & BMSR_ESTATEN)
+		spcs->support_tbi_rtbi = true;
+
 	__set_bit(PHY_INTERFACE_MODE_SGMII, spcs->pcs.supported_interfaces);
+	__set_bit(PHY_INTERFACE_MODE_1000BASEX, spcs->pcs.supported_interfaces);
+
+	/* Only allow 2500BASE-X if the SerDes has support. */
+	if (priv->plat->flags & STMMAC_FLAG_SERDES_SUPPORTS_2500M)
+		__set_bit(PHY_INTERFACE_MODE_2500BASEX,
+			  spcs->pcs.supported_interfaces);
 
 	priv->integrated_pcs = spcs;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h
index 09e609f111b1..b2b12d34b3dd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h
@@ -41,6 +41,7 @@ struct stmmac_pcs {
 	u32 rgsmii_status_mask;
 	u32 int_mask;
 	struct phylink_pcs pcs;
+	bool support_tbi_rtbi;
 };
 
 static inline struct stmmac_pcs *
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 565bb394b194..5b2bece81448 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -212,6 +212,7 @@ enum dwmac_core_type {
 #define STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP	BIT(12)
 #define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY	BIT(13)
 #define STMMAC_FLAG_KEEP_PREAMBLE_BEFORE_SFD	BIT(14)
+#define STMMAC_FLAG_SERDES_SUPPORTS_2500M	BIT(15)
 
 struct mac_device_info;
 
-- 
cgit v1.2.3


From eb37011395f12138056a4d124159f1a8436662d3 Mon Sep 17 00:00:00 2001
From: Qingfang Deng <dqfext@gmail.com>
Date: Fri, 20 Mar 2026 15:56:03 +0800
Subject: net: add netdev_from_priv() helper

Add a helper to get netdev from private data pointer, so drivers won't
have to store redundant netdev in priv.

Signed-off-by: Qingfang Deng <dqfext@gmail.com>
Link: https://patch.msgid.link/20260320075605.490832-1-dqfext@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7ca01eb3f7d2..6882b41bb3e8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2765,6 +2765,17 @@ static inline void *netdev_priv(const struct net_device *dev)
 	return (void *)dev->priv;
 }
 
+/**
+ * netdev_from_priv() - get network device from priv
+ * @priv: network device private data
+ *
+ * Returns: net_device to which @priv belongs
+ */
+static inline struct net_device *netdev_from_priv(const void *priv)
+{
+	return container_of(priv, struct net_device, priv);
+}
+
 /* Set the sysfs physical device reference for the network logical device
  * if set prior to registration will cause a symlink during initialization.
  */
-- 
cgit v1.2.3


From 9027497a25e3c92b5053b2643e0c18f910865625 Mon Sep 17 00:00:00 2001
From: Qingfang Deng <dqfext@gmail.com>
Date: Fri, 20 Mar 2026 15:56:04 +0800
Subject: team: use netdev_from_priv()

Use the new netdev_from_priv() helper to access the net device from
struct team.

Signed-off-by: Qingfang Deng <dqfext@gmail.com>
Link: https://patch.msgid.link/20260320075605.490832-2-dqfext@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/team/team_core.c | 77 ++++++++++++++++++++++++--------------------
 include/linux/if_team.h      |  3 +-
 2 files changed, 43 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index b7282f5c9632..3a745bfb228a 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -66,7 +66,7 @@ static int team_port_set_orig_dev_addr(struct team_port *port)
 static int team_port_set_team_dev_addr(struct team *team,
 				       struct team_port *port)
 {
-	return __set_port_dev_addr(port->dev, team->dev->dev_addr);
+	return __set_port_dev_addr(port->dev, netdev_from_priv(team)->dev_addr);
 }
 
 int team_modeop_port_enter(struct team *team, struct team_port *port)
@@ -591,7 +591,7 @@ static int __team_change_mode(struct team *team,
 static int team_change_mode(struct team *team, const char *kind)
 {
 	const struct team_mode *new_mode;
-	struct net_device *dev = team->dev;
+	struct net_device *dev = netdev_from_priv(team);
 	int err;
 
 	if (!list_empty(&team->port_list)) {
@@ -642,7 +642,7 @@ static void team_notify_peers_work(struct work_struct *work)
 		rtnl_unlock();
 		return;
 	}
-	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, team->dev);
+	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev_from_priv(team));
 	rtnl_unlock();
 	if (val)
 		schedule_delayed_work(&team->notify_peers.dw,
@@ -651,7 +651,7 @@ static void team_notify_peers_work(struct work_struct *work)
 
 static void team_notify_peers(struct team *team)
 {
-	if (!team->notify_peers.count || !netif_running(team->dev))
+	if (!team->notify_peers.count || !netif_running(netdev_from_priv(team)))
 		return;
 	atomic_add(team->notify_peers.count, &team->notify_peers.count_pending);
 	schedule_delayed_work(&team->notify_peers.dw, 0);
@@ -688,7 +688,7 @@ static void team_mcast_rejoin_work(struct work_struct *work)
 		rtnl_unlock();
 		return;
 	}
-	call_netdevice_notifiers(NETDEV_RESEND_IGMP, team->dev);
+	call_netdevice_notifiers(NETDEV_RESEND_IGMP, netdev_from_priv(team));
 	rtnl_unlock();
 	if (val)
 		schedule_delayed_work(&team->mcast_rejoin.dw,
@@ -697,7 +697,7 @@ static void team_mcast_rejoin_work(struct work_struct *work)
 
 static void team_mcast_rejoin(struct team *team)
 {
-	if (!team->mcast_rejoin.count || !netif_running(team->dev))
+	if (!team->mcast_rejoin.count || !netif_running(netdev_from_priv(team)))
 		return;
 	atomic_add(team->mcast_rejoin.count, &team->mcast_rejoin.count_pending);
 	schedule_delayed_work(&team->mcast_rejoin.dw, 0);
@@ -756,7 +756,7 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb)
 			u64_stats_inc(&pcpu_stats->rx_multicast);
 		u64_stats_update_end(&pcpu_stats->syncp);
 
-		skb->dev = team->dev;
+		skb->dev = netdev_from_priv(team);
 	} else if (res == RX_HANDLER_EXACT) {
 		this_cpu_inc(team->pcpu_stats->rx_nohandler);
 	} else {
@@ -774,7 +774,7 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb)
 static int team_queue_override_init(struct team *team)
 {
 	struct list_head *listarr;
-	unsigned int queue_cnt = team->dev->num_tx_queues - 1;
+	unsigned int queue_cnt = netdev_from_priv(team)->num_tx_queues - 1;
 	unsigned int i;
 
 	if (!queue_cnt)
@@ -868,7 +868,7 @@ static void __team_queue_override_enabled_check(struct team *team)
 	}
 	if (enabled == team->queue_override_enabled)
 		return;
-	netdev_dbg(team->dev, "%s queue override\n",
+	netdev_dbg(netdev_from_priv(team), "%s queue override\n",
 		   enabled ? "Enabling" : "Disabling");
 	team->queue_override_enabled = enabled;
 }
@@ -984,11 +984,12 @@ static int team_port_enter(struct team *team, struct team_port *port)
 {
 	int err = 0;
 
-	dev_hold(team->dev);
+	dev_hold(netdev_from_priv(team));
 	if (team->ops.port_enter) {
 		err = team->ops.port_enter(team, port);
 		if (err) {
-			netdev_err(team->dev, "Device %s failed to enter team mode\n",
+			netdev_err(netdev_from_priv(team),
+				   "Device %s failed to enter team mode\n",
 				   port->dev->name);
 			goto err_port_enter;
 		}
@@ -997,7 +998,7 @@ static int team_port_enter(struct team *team, struct team_port *port)
 	return 0;
 
 err_port_enter:
-	dev_put(team->dev);
+	dev_put(netdev_from_priv(team));
 
 	return err;
 }
@@ -1006,7 +1007,7 @@ static void team_port_leave(struct team *team, struct team_port *port)
 {
 	if (team->ops.port_leave)
 		team->ops.port_leave(team, port);
-	dev_put(team->dev);
+	dev_put(netdev_from_priv(team));
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -1030,7 +1031,7 @@ static int __team_port_enable_netpoll(struct team_port *port)
 
 static int team_port_enable_netpoll(struct team_port *port)
 {
-	if (!port->team->dev->npinfo)
+	if (!netdev_from_priv(port->team)->npinfo)
 		return 0;
 
 	return __team_port_enable_netpoll(port);
@@ -1064,8 +1065,8 @@ static int team_upper_dev_link(struct team *team, struct team_port *port,
 
 	lag_upper_info.tx_type = team->mode->lag_tx_type;
 	lag_upper_info.hash_type = NETDEV_LAG_HASH_UNKNOWN;
-	err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
-					   &lag_upper_info, extack);
+	err = netdev_master_upper_dev_link(port->dev, netdev_from_priv(team),
+					   NULL, &lag_upper_info, extack);
 	if (err)
 		return err;
 	port->dev->priv_flags |= IFF_TEAM_PORT;
@@ -1074,7 +1075,7 @@ static int team_upper_dev_link(struct team *team, struct team_port *port,
 
 static void team_upper_dev_unlink(struct team *team, struct team_port *port)
 {
-	netdev_upper_dev_unlink(port->dev, team->dev);
+	netdev_upper_dev_unlink(port->dev, netdev_from_priv(team));
 	port->dev->priv_flags &= ~IFF_TEAM_PORT;
 }
 
@@ -1085,7 +1086,7 @@ static int team_dev_type_check_change(struct net_device *dev,
 static int team_port_add(struct team *team, struct net_device *port_dev,
 			 struct netlink_ext_ack *extack)
 {
-	struct net_device *dev = team->dev;
+	struct net_device *dev = netdev_from_priv(team);
 	struct team_port *port;
 	char *portname = port_dev->name;
 	int err;
@@ -1247,7 +1248,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
 	port->index = -1;
 	list_add_tail_rcu(&port->list, &team->port_list);
 	team_port_enable(team, port);
-	netdev_compute_master_upper_features(team->dev, true);
+	netdev_compute_master_upper_features(dev, true);
 	__team_port_change_port_added(port, !!netif_oper_up(port_dev));
 	__team_options_change_check(team);
 
@@ -1292,7 +1293,7 @@ static void __team_port_change_port_removed(struct team_port *port);
 
 static int team_port_del(struct team *team, struct net_device *port_dev, bool unregister)
 {
-	struct net_device *dev = team->dev;
+	struct net_device *dev = netdev_from_priv(team);
 	struct team_port *port;
 	char *portname = port_dev->name;
 
@@ -1337,7 +1338,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev, bool un
 	}
 	kfree_rcu(port, rcu);
 	netdev_info(dev, "Port device %s removed\n", portname);
-	netdev_compute_master_upper_features(team->dev, true);
+	netdev_compute_master_upper_features(dev, true);
 
 	return 0;
 }
@@ -1506,7 +1507,7 @@ static int team_queue_id_option_set(struct team *team,
 
 	if (port->queue_id == new_queue_id)
 		return 0;
-	if (new_queue_id >= team->dev->real_num_tx_queues)
+	if (new_queue_id >= netdev_from_priv(team)->real_num_tx_queues)
 		return -EINVAL;
 	team_queue_override_port_change_queue_id(team, port, new_queue_id);
 	return 0;
@@ -1587,7 +1588,6 @@ static int team_init(struct net_device *dev)
 	int i;
 	int err;
 
-	team->dev = dev;
 	team_set_no_mode(team);
 	team->notifier_ctx = false;
 
@@ -2256,7 +2256,7 @@ static struct team *team_nl_team_get(struct genl_info *info)
 
 static void team_nl_team_put(struct team *team)
 {
-	dev_put(team->dev);
+	dev_put(netdev_from_priv(team));
 }
 
 typedef int team_nl_send_func_t(struct sk_buff *skb,
@@ -2264,7 +2264,7 @@ typedef int team_nl_send_func_t(struct sk_buff *skb,
 
 static int team_nl_send_unicast(struct sk_buff *skb, struct team *team, u32 portid)
 {
-	return genlmsg_unicast(dev_net(team->dev), skb, portid);
+	return genlmsg_unicast(dev_net(netdev_from_priv(team)), skb, portid);
 }
 
 static int team_nl_fill_one_option_get(struct sk_buff *skb, struct team *team,
@@ -2393,7 +2393,8 @@ start_again:
 		return -EMSGSIZE;
 	}
 
-	if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex))
+	if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX,
+			netdev_from_priv(team)->ifindex))
 		goto nla_put_failure;
 	option_list = nla_nest_start_noflag(skb, TEAM_ATTR_LIST_OPTION);
 	if (!option_list)
@@ -2681,7 +2682,8 @@ start_again:
 		return -EMSGSIZE;
 	}
 
-	if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex))
+	if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX,
+			netdev_from_priv(team)->ifindex))
 		goto nla_put_failure;
 	port_list = nla_nest_start_noflag(skb, TEAM_ATTR_LIST_PORT);
 	if (!port_list)
@@ -2782,7 +2784,8 @@ static struct genl_family team_nl_family __ro_after_init = {
 static int team_nl_send_multicast(struct sk_buff *skb,
 				  struct team *team, u32 portid)
 {
-	return genlmsg_multicast_netns(&team_nl_family, dev_net(team->dev),
+	return genlmsg_multicast_netns(&team_nl_family,
+				       dev_net(netdev_from_priv(team)),
 				       skb, 0, 0, GFP_KERNEL);
 }
 
@@ -2827,7 +2830,8 @@ static void __team_options_change_check(struct team *team)
 	}
 	err = team_nl_send_event_options_get(team, &sel_opt_inst_list);
 	if (err && err != -ESRCH)
-		netdev_warn(team->dev, "Failed to send options change via netlink (err %d)\n",
+		netdev_warn(netdev_from_priv(team),
+			    "Failed to send options change via netlink (err %d)\n",
 			    err);
 }
 
@@ -2856,7 +2860,8 @@ static void __team_port_change_send(struct team_port *port, bool linkup)
 send_event:
 	err = team_nl_send_event_port_get(port->team, port);
 	if (err && err != -ESRCH)
-		netdev_warn(port->team->dev, "Failed to send port change of device %s via netlink (err %d)\n",
+		netdev_warn(netdev_from_priv(port->team),
+			    "Failed to send port change of device %s via netlink (err %d)\n",
 			    port->dev->name, err);
 
 }
@@ -2878,9 +2883,9 @@ static void __team_carrier_check(struct team *team)
 	}
 
 	if (team_linkup)
-		netif_carrier_on(team->dev);
+		netif_carrier_on(netdev_from_priv(team));
 	else
-		netif_carrier_off(team->dev);
+		netif_carrier_off(netdev_from_priv(team));
 }
 
 static void __team_port_change_check(struct team_port *port, bool linkup)
@@ -2939,12 +2944,14 @@ static int team_device_event(struct notifier_block *unused,
 					       !!netif_oper_up(port->dev));
 		break;
 	case NETDEV_UNREGISTER:
-		team_del_slave_on_unregister(port->team->dev, dev);
+		team_del_slave_on_unregister(netdev_from_priv(port->team),
+					     dev);
 		break;
 	case NETDEV_FEAT_CHANGE:
 		if (!port->team->notifier_ctx) {
 			port->team->notifier_ctx = true;
-			netdev_compute_master_upper_features(port->team->dev, true);
+			netdev_compute_master_upper_features(netdev_from_priv(port->team),
+							     true);
 			port->team->notifier_ctx = false;
 		}
 		break;
@@ -2958,7 +2965,7 @@ static int team_device_event(struct notifier_block *unused,
 		return NOTIFY_BAD;
 	case NETDEV_RESEND_IGMP:
 		/* Propagate to master device */
-		call_netdevice_notifiers(event, port->team->dev);
+		call_netdevice_notifiers(event, netdev_from_priv(port->team));
 		break;
 	}
 	return NOTIFY_DONE;
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index ce97d891cf72..ccb5327de26d 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -186,7 +186,6 @@ struct team_mode {
 #define TEAM_MODE_PRIV_SIZE (sizeof(long) * TEAM_MODE_PRIV_LONGS)
 
 struct team {
-	struct net_device *dev; /* associated netdevice */
 	struct team_pcpu_stats __percpu *pcpu_stats;
 
 	const struct header_ops *header_ops_cache;
@@ -232,7 +231,7 @@ static inline int team_dev_queue_xmit(struct team *team, struct team_port *port,
 	skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping);
 
 	skb->dev = port->dev;
-	if (unlikely(netpoll_tx_running(team->dev))) {
+	if (unlikely(netpoll_tx_running(netdev_from_priv(team)))) {
 		team_netpoll_send_skb(port, skb);
 		return 0;
 	}
-- 
cgit v1.2.3


From 0475f9e779b456f934adbc44eeb98e3080a1893f Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn@kernel.org>
Date: Fri, 20 Mar 2026 09:58:21 +0100
Subject: ethtool: Track user-provided RSS indirection table size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Track the number of indirection table entries the user originally
provided (context 0/default as well!).

Replace IFF_RXFH_CONFIGURED with rss_indir_user_size: the flag is
redundant now that user_size captures the same information.

Add ethtool_rxfh_indir_lost() for drivers that must reset the
indirection table.

Convert bnxt and mlx5 to use it.

Signed-off-by: Björn Töpel <bjorn@kernel.org>
Link: https://patch.msgid.link/20260320085826.1957255-2-bjorn@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         |  3 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 21 +++++++++--------
 include/linux/ethtool.h                           |  7 ++++++
 include/linux/netdevice.h                         |  7 +-----
 net/ethtool/common.c                              | 28 +++++++++++++++++++++++
 net/ethtool/ioctl.c                               |  9 ++++----
 net/ethtool/rss.c                                 | 24 ++++++++++++-------
 7 files changed, 70 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 604966a398f5..84eb53b4172b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8118,8 +8118,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
 		    (bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) !=
 		     bnxt_get_nr_rss_ctxs(bp, rx_rings) ||
 		     bnxt_get_max_rss_ring(bp) >= rx_rings)) {
-			netdev_warn(bp->dev, "RSS table entries reverting to default\n");
-			bp->dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+			ethtool_rxfh_indir_lost(bp->dev);
 		}
 	}
 	bp->rx_nr_rings = rx_rings;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 3eebdf402129..1238e5356012 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -6480,12 +6480,23 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
 
 	/* max number of channels may have changed */
 	max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile);
+
+	/* Locking is required by ethtool_rxfh_indir_lost() (sends
+	 * ETHTOOL_MSG_RSS_NTF) and by netif_set_real_num_*_queues in case
+	 * the netdev has been registered by this point (if this function
+	 * was called in the reload or resume flow).
+	 */
+	if (need_lock) {
+		rtnl_lock();
+		netdev_lock(priv->netdev);
+	}
+
 	if (priv->channels.params.num_channels > max_nch) {
 		mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
 		/* Reducing the number of channels - RXFH has to be reset, and
 		 * mlx5e_num_channels_changed below will build the RQT.
 		 */
-		priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+		ethtool_rxfh_indir_lost(priv->netdev);
 		priv->channels.params.num_channels = max_nch;
 		if (priv->channels.params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) {
 			mlx5_core_warn(priv->mdev, "MLX5E: Disabling MQPRIO channel mode\n");
@@ -6502,15 +6513,7 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
 	/* 1. Set the real number of queues in the kernel the first time.
 	 * 2. Set our default XPS cpumask.
 	 * 3. Build the RQT.
-	 *
-	 * Locking is required by netif_set_real_num_*_queues in case the
-	 * netdev has been registered by this point (if this function was called
-	 * in the reload or resume flow).
 	 */
-	if (need_lock) {
-		rtnl_lock();
-		netdev_lock(priv->netdev);
-	}
 	err = mlx5e_num_channels_changed(priv);
 	if (need_lock) {
 		netdev_unlock(priv->netdev);
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 656d465bcd06..34ca9261de82 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -176,6 +176,8 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  * struct ethtool_rxfh_context - a custom RSS context configuration
  * @indir_size: Number of u32 entries in indirection table
  * @key_size: Size of hash key, in bytes
+ * @indir_user_size: number of user provided entries for the
+ *	indirection table
  * @priv_size: Size of driver private data, in bytes
  * @hfunc: RSS hash function identifier.  One of the %ETH_RSS_HASH_*
  * @input_xfrm: Defines how the input data is transformed. Valid values are one
@@ -186,6 +188,7 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
 struct ethtool_rxfh_context {
 	u32 indir_size;
 	u32 key_size;
+	u32 indir_user_size;
 	u16 priv_size;
 	u8 hfunc;
 	u8 input_xfrm;
@@ -214,6 +217,7 @@ static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx)
 }
 
 void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id);
+void ethtool_rxfh_indir_lost(struct net_device *dev);
 
 struct link_mode_info {
 	int	speed;
@@ -1337,12 +1341,15 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
  * @rss_ctx:		XArray of custom RSS contexts
  * @rss_lock:		Protects entries in @rss_ctx.  May be taken from
  *			within RTNL.
+ * @rss_indir_user_size: Number of user provided entries for the default
+ *			 (context 0) indirection table.
  * @wol_enabled:	Wake-on-LAN is enabled
  * @module_fw_flash_in_progress: Module firmware flashing is in progress.
  */
 struct ethtool_netdev_state {
 	struct xarray		rss_ctx;
 	struct mutex		rss_lock;
+	u32			rss_indir_user_size;
 	unsigned		wol_enabled:1;
 	unsigned		module_fw_flash_in_progress:1;
 };
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6882b41bb3e8..e15367373f7c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1716,7 +1716,6 @@ struct net_device_ops {
  * @IFF_OPENVSWITCH: device is a Open vSwitch master
  * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device
  * @IFF_TEAM: device is a team device
- * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured
  * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external
  *	entity (i.e. the master device for bridged veth)
  * @IFF_MACSEC: device is a MACsec device
@@ -1752,7 +1751,6 @@ enum netdev_priv_flags {
 	IFF_OPENVSWITCH			= 1<<20,
 	IFF_L3MDEV_SLAVE		= 1<<21,
 	IFF_TEAM			= 1<<22,
-	IFF_RXFH_CONFIGURED		= 1<<23,
 	IFF_PHONY_HEADROOM		= 1<<24,
 	IFF_MACSEC			= 1<<25,
 	IFF_NO_RX_HANDLER		= 1<<26,
@@ -5580,10 +5578,7 @@ static inline bool netif_is_lag_port(const struct net_device *dev)
 	return netif_is_bond_slave(dev) || netif_is_team_port(dev);
 }
 
-static inline bool netif_is_rxfh_configured(const struct net_device *dev)
-{
-	return dev->priv_flags & IFF_RXFH_CONFIGURED;
-}
+bool netif_is_rxfh_configured(const struct net_device *dev);
 
 static inline bool netif_is_failover(const struct net_device *dev)
 {
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 6a4a3797a812..98e85f393f8c 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -1204,6 +1204,34 @@ void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id)
 }
 EXPORT_SYMBOL(ethtool_rxfh_context_lost);
 
+bool netif_is_rxfh_configured(const struct net_device *dev)
+{
+	return dev->ethtool->rss_indir_user_size;
+}
+EXPORT_SYMBOL(netif_is_rxfh_configured);
+
+/**
+ * ethtool_rxfh_indir_lost - Notify core that the RSS indirection table was lost
+ * @dev: network device
+ *
+ * Drivers should call this when the device can no longer maintain the
+ * user-configured indirection table, typically after a HW fault recovery
+ * that reduced the maximum queue count. Marks the default RSS context
+ * indirection table as unconfigured and sends an %ETHTOOL_MSG_RSS_NTF
+ * notification.
+ */
+void ethtool_rxfh_indir_lost(struct net_device *dev)
+{
+	WARN_ONCE(!rtnl_is_locked() &&
+		  !lockdep_is_held_type(&dev->ethtool->rss_lock, -1),
+		  "RSS context lock assertion failed\n");
+
+	netdev_err(dev, "device error, RSS indirection table lost\n");
+	dev->ethtool->rss_indir_user_size = 0;
+	ethtool_rss_notify(dev, ETHTOOL_MSG_RSS_NTF, 0);
+}
+EXPORT_SYMBOL(ethtool_rxfh_indir_lost);
+
 enum ethtool_link_medium ethtool_str_to_medium(const char *str)
 {
 	int i;
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 11dfbf076b6d..3c713a91ad0d 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1405,9 +1405,9 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 
 	/* indicate whether rxfh was set to default */
 	if (user_size == 0)
-		dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+		dev->ethtool->rss_indir_user_size = 0;
 	else
-		dev->priv_flags |= IFF_RXFH_CONFIGURED;
+		dev->ethtool->rss_indir_user_size = rxfh_dev.indir_size;
 
 out_unlock:
 	mutex_unlock(&dev->ethtool->rss_lock);
@@ -1722,9 +1722,9 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 	if (!rxfh_dev.rss_context) {
 		/* indicate whether rxfh was set to default */
 		if (rxfh.indir_size == 0)
-			dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+			dev->ethtool->rss_indir_user_size = 0;
 		else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
-			dev->priv_flags |= IFF_RXFH_CONFIGURED;
+			dev->ethtool->rss_indir_user_size = dev_indir_size;
 	}
 	/* Update rss_ctx tracking */
 	if (rxfh_dev.rss_delete) {
@@ -1737,6 +1737,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 			ctx->indir_configured =
 				rxfh.indir_size &&
 				rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE;
+			ctx->indir_user_size = dev_indir_size;
 		}
 		if (rxfh_dev.key) {
 			memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key,
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index 0f4e5cd2ac71..5cf90d73e70b 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -686,7 +686,7 @@ rss_set_prep_indir(struct net_device *dev, struct genl_info *info,
 
 	*mod |= memcmp(rxfh->indir, data->indir_table, data->indir_size);
 
-	return 0;
+	return user_size;
 
 err_free:
 	kfree(rxfh->indir);
@@ -833,6 +833,7 @@ ethnl_rss_set(struct ethnl_req_info *req_info, struct genl_info *info)
 	struct nlattr **tb = info->attrs;
 	struct rss_reply_data data = {};
 	const struct ethtool_ops *ops;
+	u32 indir_user_size;
 	int ret;
 
 	ops = dev->ethtool_ops;
@@ -845,8 +846,9 @@ ethnl_rss_set(struct ethnl_req_info *req_info, struct genl_info *info)
 	rxfh.rss_context = request->rss_context;
 
 	ret = rss_set_prep_indir(dev, info, &data, &rxfh, &indir_reset, &mod);
-	if (ret)
+	if (ret < 0)
 		goto exit_clean_data;
+	indir_user_size = ret;
 	indir_mod = !!tb[ETHTOOL_A_RSS_INDIR];
 
 	rxfh.hfunc = data.hfunc;
@@ -889,12 +891,15 @@ ethnl_rss_set(struct ethnl_req_info *req_info, struct genl_info *info)
 	if (ret)
 		goto exit_unlock;
 
-	if (ctx)
+	if (ctx) {
 		rss_set_ctx_update(ctx, tb, &data, &rxfh);
-	else if (indir_reset)
-		dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
-	else if (indir_mod)
-		dev->priv_flags |= IFF_RXFH_CONFIGURED;
+		if (indir_user_size)
+			ctx->indir_user_size = indir_user_size;
+	} else if (indir_reset) {
+		dev->ethtool->rss_indir_user_size = 0;
+	} else if (indir_mod) {
+		dev->ethtool->rss_indir_user_size = indir_user_size;
+	}
 
 exit_unlock:
 	mutex_unlock(&dev->ethtool->rss_lock);
@@ -999,6 +1004,7 @@ int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info)
 	const struct ethtool_ops *ops;
 	struct rss_req_info req = {};
 	struct net_device *dev;
+	u32 indir_user_size;
 	struct sk_buff *rsp;
 	void *hdr;
 	u32 limit;
@@ -1035,8 +1041,9 @@ int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info)
 		goto exit_ops;
 
 	ret = rss_set_prep_indir(dev, info, &data, &rxfh, &indir_dflt, &mod);
-	if (ret)
+	if (ret < 0)
 		goto exit_clean_data;
+	indir_user_size = ret;
 
 	ethnl_update_u8(&rxfh.hfunc, tb[ETHTOOL_A_RSS_HFUNC], &mod);
 
@@ -1080,6 +1087,7 @@ int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info)
 
 	/* Store the config from rxfh to Xarray.. */
 	rss_set_ctx_update(ctx, tb, &data, &rxfh);
+	ctx->indir_user_size = indir_user_size;
 	/* .. copy from Xarray to data. */
 	__rss_prepare_ctx(dev, &data, ctx);
 
-- 
cgit v1.2.3


From 02bcb20083b2780772cfb66cd426f31940296783 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn@kernel.org>
Date: Fri, 20 Mar 2026 09:58:22 +0100
Subject: ethtool: Add RSS indirection table resize helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The core locks ctx->indir_size when an RSS context is created. Some
NICs (e.g. bnxt) change their indirection table size based on the
channel count, because the hardware table is a shared resource. This
forces drivers to reject channel changes when RSS contexts exist.

Add driver helpers to resize indirection tables:

ethtool_rxfh_indir_can_resize() checks whether the default context
indirection table can be resized.

ethtool_rxfh_indir_resize() resizes the default context table in
place. Folding (shrink) requires the table to be periodic at the new
size; non-periodic tables are rejected. Unfolding (grow) replicates
the existing pattern. Sizes must be multiples of each other.

ethtool_rxfh_ctxs_can_resize() validates all non-default RSS contexts
can be resized.

ethtool_rxfh_ctxs_resize() applies the resize.

Signed-off-by: Björn Töpel <bjorn@kernel.org>
Link: https://patch.msgid.link/20260320085826.1957255-3-bjorn@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h |   6 ++
 net/ethtool/common.c    | 155 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 161 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 34ca9261de82..1cb0740ba331 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -218,6 +218,12 @@ static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx)
 
 void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id);
 void ethtool_rxfh_indir_lost(struct net_device *dev);
+bool ethtool_rxfh_indir_can_resize(struct net_device *dev, const u32 *tbl,
+				   u32 old_size, u32 new_size);
+void ethtool_rxfh_indir_resize(struct net_device *dev, u32 *tbl,
+			       u32 old_size, u32 new_size);
+int ethtool_rxfh_ctxs_can_resize(struct net_device *dev, u32 new_indir_size);
+void ethtool_rxfh_ctxs_resize(struct net_device *dev, u32 new_indir_size);
 
 struct link_mode_info {
 	int	speed;
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 98e85f393f8c..84ec88dee05c 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -1232,6 +1232,161 @@ void ethtool_rxfh_indir_lost(struct net_device *dev)
 }
 EXPORT_SYMBOL(ethtool_rxfh_indir_lost);
 
+static bool ethtool_rxfh_is_periodic(const u32 *tbl, u32 old_size, u32 new_size)
+{
+	u32 i;
+
+	for (i = new_size; i < old_size; i++)
+		if (tbl[i] != tbl[i % new_size])
+			return false;
+	return true;
+}
+
+static bool ethtool_rxfh_can_resize(const u32 *tbl, u32 old_size, u32 new_size,
+				    u32 user_size)
+{
+	if (new_size == old_size)
+		return true;
+
+	if (!user_size)
+		return true;
+
+	if (new_size < old_size) {
+		if (new_size < user_size)
+			return false;
+		if (old_size % new_size)
+			return false;
+		if (!ethtool_rxfh_is_periodic(tbl, old_size, new_size))
+			return false;
+		return true;
+	}
+
+	if (new_size % old_size)
+		return false;
+	return true;
+}
+
+/* Resize without validation; caller must have called can_resize first */
+static void ethtool_rxfh_resize(u32 *tbl, u32 old_size, u32 new_size)
+{
+	u32 i;
+
+	/* Grow: replicate existing pattern; shrink is a no-op on the data */
+	for (i = old_size; i < new_size; i++)
+		tbl[i] = tbl[i % old_size];
+}
+
+/**
+ * ethtool_rxfh_indir_can_resize - Check if context 0 indir table can resize
+ * @dev: network device
+ * @tbl: indirection table
+ * @old_size: current number of entries in the table
+ * @new_size: desired number of entries
+ *
+ * Validate that @tbl can be resized from @old_size to @new_size without
+ * data loss. Uses the user_size floor from context 0. When user_size is
+ * zero the table is not user-configured and resize always succeeds.
+ * Read-only; does not modify the table.
+ *
+ * Return: true if resize is possible, false otherwise.
+ */
+bool ethtool_rxfh_indir_can_resize(struct net_device *dev, const u32 *tbl,
+				   u32 old_size, u32 new_size)
+{
+	return ethtool_rxfh_can_resize(tbl, old_size, new_size,
+				       dev->ethtool->rss_indir_user_size);
+}
+EXPORT_SYMBOL(ethtool_rxfh_indir_can_resize);
+
+/**
+ * ethtool_rxfh_indir_resize - Fold or unfold context 0 indirection table
+ * @dev: network device
+ * @tbl: indirection table (must have room for max(old_size, new_size) entries)
+ * @old_size: current number of entries in the table
+ * @new_size: desired number of entries
+ *
+ * Resize the default RSS context indirection table in place. Caller
+ * must have validated with ethtool_rxfh_indir_can_resize() first.
+ */
+void ethtool_rxfh_indir_resize(struct net_device *dev, u32 *tbl,
+			       u32 old_size, u32 new_size)
+{
+	if (!dev->ethtool->rss_indir_user_size)
+		return;
+
+	ethtool_rxfh_resize(tbl, old_size, new_size);
+}
+EXPORT_SYMBOL(ethtool_rxfh_indir_resize);
+
+/**
+ * ethtool_rxfh_ctxs_can_resize - Validate resize for all RSS contexts
+ * @dev: network device
+ * @new_indir_size: new indirection table size
+ *
+ * Validate that the indirection tables of all non-default RSS contexts
+ * can be resized to @new_indir_size. Read-only; does not modify any
+ * context. Intended to be paired with ethtool_rxfh_ctxs_resize().
+ *
+ * Return: 0 if all contexts can be resized, negative errno on failure.
+ */
+int ethtool_rxfh_ctxs_can_resize(struct net_device *dev, u32 new_indir_size)
+{
+	struct ethtool_rxfh_context *ctx;
+	unsigned long context;
+	int ret = 0;
+
+	if (!dev->ethtool_ops->rxfh_indir_space ||
+	    new_indir_size > dev->ethtool_ops->rxfh_indir_space)
+		return -EINVAL;
+
+	mutex_lock(&dev->ethtool->rss_lock);
+	xa_for_each(&dev->ethtool->rss_ctx, context, ctx) {
+		u32 *indir = ethtool_rxfh_context_indir(ctx);
+
+		if (!ethtool_rxfh_can_resize(indir, ctx->indir_size,
+					     new_indir_size,
+					     ctx->indir_user_size)) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+	}
+unlock:
+	mutex_unlock(&dev->ethtool->rss_lock);
+	return ret;
+}
+EXPORT_SYMBOL(ethtool_rxfh_ctxs_can_resize);
+
+/**
+ * ethtool_rxfh_ctxs_resize - Resize all RSS context indirection tables
+ * @dev: network device
+ * @new_indir_size: new indirection table size
+ *
+ * Resize the indirection table of every non-default RSS context to
+ * @new_indir_size. Caller must have validated with
+ * ethtool_rxfh_ctxs_can_resize() first. An %ETHTOOL_MSG_RSS_NTF is
+ * sent for each resized context.
+ *
+ * Notifications are sent outside the RSS lock to avoid holding the
+ * mutex during notification delivery.
+ */
+void ethtool_rxfh_ctxs_resize(struct net_device *dev, u32 new_indir_size)
+{
+	struct ethtool_rxfh_context *ctx;
+	unsigned long context;
+
+	mutex_lock(&dev->ethtool->rss_lock);
+	xa_for_each(&dev->ethtool->rss_ctx, context, ctx) {
+		ethtool_rxfh_resize(ethtool_rxfh_context_indir(ctx),
+				    ctx->indir_size, new_indir_size);
+		ctx->indir_size = new_indir_size;
+	}
+	mutex_unlock(&dev->ethtool->rss_lock);
+
+	xa_for_each(&dev->ethtool->rss_ctx, context, ctx)
+		ethtool_rss_notify(dev, ETHTOOL_MSG_RSS_NTF, context);
+}
+EXPORT_SYMBOL(ethtool_rxfh_ctxs_resize);
+
 enum ethtool_link_medium ethtool_str_to_medium(const char *str)
 {
 	int i;
-- 
cgit v1.2.3


From e379dce8af11d8d6040b4348316a499bfd174bfb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 23 Mar 2026 10:36:27 +0100
Subject: sched/topology: Fix sched_domain_span()

Commit 8e8e23dea43e ("sched/topology: Compute sd_weight considering
cpuset partitions") ends up relying on the fact that structure
initialization should not touch the flexible array.

However, the official GCC specification for "Arrays of Length Zero"
[*] says:

  Although the size of a zero-length array is zero, an array member of
  this kind may increase the size of the enclosing type as a result of
  tail padding.

Additionally, structure initialization will zero tail padding. With
the end result that since offsetof(*type, member) < sizeof(*type),
array initialization will clobber the flex array.

Luckily, the way flexible array sizes are calculated is:

  sizeof(*type) + count * sizeof(*type->member)

This means we have the complete size of the flex array *outside* of
sizeof(*type), so use that instead of relying on the broken flex array
definition.

[*] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html

Fixes: 8e8e23dea43e ("sched/topology: Compute sd_weight considering cpuset partitions")
Reported-by: Nathan Chancellor <nathan@kernel.org>
Debugged-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Chen Yu <yu.c.chen@intel.com>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Link: https://patch.msgid.link/20260323093627.GY3738010@noisy.programming.kicks-ass.net
---
 include/linux/sched/topology.h | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 51c29581f15e..36553e14866d 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -142,18 +142,30 @@ struct sched_domain {
 
 	unsigned int span_weight;
 	/*
-	 * Span of all CPUs in this domain.
+	 * See sched_domain_span(), on why flex arrays are broken.
 	 *
-	 * NOTE: this field is variable length. (Allocated dynamically
-	 * by attaching extra space to the end of the structure,
-	 * depending on how many CPUs the kernel has booted up with)
-	 */
 	unsigned long span[];
+	 */
 };
 
 static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
 {
-	return to_cpumask(sd->span);
+	/*
+	 * Turns out that C flexible arrays are fundamentally broken since it
+	 * is allowed for offsetof(*sd, span) < sizeof(*sd), this means that
+	 * structure initialzation *sd = { ... }; which writes every byte
+	 * inside sizeof(*type), will over-write the start of the flexible
+	 * array.
+	 *
+	 * Luckily, the way we allocate sched_domain is by:
+	 *
+	 *   sizeof(*sd) + cpumask_size()
+	 *
+	 * this means that we have sufficient space for the whole flex array
+	 * *outside* of sizeof(*sd). So use that, and avoid using sd->span.
+	 */
+	unsigned long *bitmap = (void *)sd + sizeof(*sd);
+	return to_cpumask(bitmap);
 }
 
 extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-- 
cgit v1.2.3


From 96b76f7bc575ac6c69090f4642e424b04fb6784c Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Mon, 23 Mar 2026 22:01:10 +0300
Subject: pinctrl: introduce pinctrl_gpio_get_config()

This is a counterpart of pinctrl_gpio_set_config(), which will be used
to implement the ->get() interface in a GPIO driver for SCMI.

This also requires that we create a stub so pin_config_get_for_pin()
can build when CONFIG_PINCONF is disabled.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Linus Walleij <linusw@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
---
 drivers/pinctrl/core.c           | 31 +++++++++++++++++++++++++++++++
 drivers/pinctrl/pinconf.h        |  6 ++++++
 include/linux/pinctrl/consumer.h |  9 +++++++++
 3 files changed, 46 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index b5e97689589f..da0a07742460 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -30,6 +30,7 @@
 #include <linux/pinctrl/consumer.h>
 #include <linux/pinctrl/devinfo.h>
 #include <linux/pinctrl/machine.h>
+#include <linux/pinctrl/pinconf.h>
 #include <linux/pinctrl/pinctrl.h>
 
 #include "core.h"
@@ -938,6 +939,36 @@ int pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset,
 }
 EXPORT_SYMBOL_GPL(pinctrl_gpio_set_config);
 
+/**
+ * pinctrl_gpio_get_config() - Get the config for a given GPIO pin
+ * @gc: GPIO chip structure from the GPIO subsystem
+ * @offset: hardware offset of the GPIO relative to the controller
+ * @config: the configuration to query.  On success it holds the result
+ * Return: 0 on success, negative errno otherwise
+ */
+int pinctrl_gpio_get_config(struct gpio_chip *gc, unsigned int offset, unsigned long *config)
+{
+	struct pinctrl_gpio_range *range;
+	struct pinctrl_dev *pctldev;
+	int ret, pin;
+
+	ret = pinctrl_get_device_gpio_range(gc, offset, &pctldev, &range);
+	if (ret)
+		return ret;
+
+	mutex_lock(&pctldev->mutex);
+	pin = gpio_to_pin(range, gc, offset);
+	ret = pin_config_get_for_pin(pctldev, pin, config);
+	mutex_unlock(&pctldev->mutex);
+
+	if (ret)
+		return ret;
+
+	*config = pinconf_to_config_argument(*config);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pinctrl_gpio_get_config);
+
 static struct pinctrl_state *find_state(struct pinctrl *p,
 					const char *name)
 {
diff --git a/drivers/pinctrl/pinconf.h b/drivers/pinctrl/pinconf.h
index 2880adef476e..659a781e2091 100644
--- a/drivers/pinctrl/pinconf.h
+++ b/drivers/pinctrl/pinconf.h
@@ -74,6 +74,12 @@ static inline int pinconf_set_config(struct pinctrl_dev *pctldev, unsigned int p
 	return -ENOTSUPP;
 }
 
+static inline int pin_config_get_for_pin(struct pinctrl_dev *pctldev, unsigned int pin,
+			   unsigned long *config)
+{
+	return -ENOTSUPP;
+}
+
 #endif
 
 #if defined(CONFIG_PINCONF) && defined(CONFIG_DEBUG_FS)
diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h
index 63ce16191eb9..11b8f0b8da0c 100644
--- a/include/linux/pinctrl/consumer.h
+++ b/include/linux/pinctrl/consumer.h
@@ -35,6 +35,8 @@ int pinctrl_gpio_direction_output(struct gpio_chip *gc,
 				  unsigned int offset);
 int pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset,
 				unsigned long config);
+int pinctrl_gpio_get_config(struct gpio_chip *gc, unsigned int offset,
+			    unsigned long *config);
 
 struct pinctrl * __must_check pinctrl_get(struct device *dev);
 void pinctrl_put(struct pinctrl *p);
@@ -101,6 +103,13 @@ pinctrl_gpio_direction_output(struct gpio_chip *gc, unsigned int offset)
 	return 0;
 }
 
+static inline int
+pinctrl_gpio_get_config(struct gpio_chip *gc, unsigned int offset,
+			unsigned long *config)
+{
+	return 0;
+}
+
 static inline int
 pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset,
 			    unsigned long config)
-- 
cgit v1.2.3


From a21c1e961de28b95099a9ca2c3774b2eee1a33bb Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Thu, 19 Mar 2026 14:52:38 +0100
Subject: compiler: Simplify generic RELOC_HIDE()

When enabling Context Analysis (CONTEXT_ANALYSIS := y) in arch/x86/kvm
code, Clang's Thread Safety Analysis failed to recognize that identical
per_cpu() accesses refer to the same lock:

|   CC [M]  arch/x86/kvm/vmx/posted_intr.o
| arch/x86/kvm/vmx/posted_intr.c:186:2: error: releasing raw_spinlock '__ptr + __per_cpu_offset[vcpu->cpu]' that was not held [-Werror,-Wthread-safety-analysis]
|   186 |         raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
|       |         ^
| ./include/linux/spinlock.h:276:32: note: expanded from macro 'raw_spin_unlock'
|   276 | #define raw_spin_unlock(lock)           _raw_spin_unlock(lock)
|       |                                         ^
| arch/x86/kvm/vmx/posted_intr.c:207:1: error: raw_spinlock '__ptr + __per_cpu_offset[vcpu->cpu]' is still held at the end of function [-Werror,-Wthread-safety-analysis]
|   207 | }
|       | ^
| arch/x86/kvm/vmx/posted_intr.c:182:2: note: raw_spinlock acquired here
|   182 |         raw_spin_lock_nested(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu),
|       |         ^
| ./include/linux/spinlock.h:235:2: note: expanded from macro 'raw_spin_lock_nested'
|   235 |         _raw_spin_lock(((void)(subclass), (lock)))
|       |         ^
| 2 errors generated.

This occurred because the default RELOC_HIDE() implementation (used by
the per-CPU macros) is a statement expression containing an intermediate
'unsigned long' variable (this version appears to predate Git history).

While the analysis strips away inner casts when resolving pointer
aliases, it stops when encountering intermediate non-pointer variables
(this is Thread Safety Analysis specific and irrelevant for codegen).
This prevents the analysis from concluding that the pointers passed to
e.g. raw_spin_lock() and raw_spin_unlock() were identical when per-CPU
accessors are used.

Simplify RELOC_HIDE() to a single expression. This preserves the intent
of obfuscating UB-introducing out-of-bounds pointer calculations from
the compiler via the 'unsigned long' cast, but allows the alias analysis
to successfully resolve the pointers.

Using a recent Clang version, I observe that generated code remains the
same for vmlinux; the intermediate variable was already being optimized
away (for any respectable modern compiler, not doing so would be an
optimizer bug). Note that GCC provides its own version of RELOC_HIDE(),
so this change only affects Clang builds.

Add a test case to lib/test_context-analysis.c to catch any regressions.

Reported-by: Bart Van Assche <bvanassche@acm.org>
Reported-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/all/e3946223-4543-4a76-a328-9c6865e95192@acm.org/
Link: https://patch.msgid.link/20260319135245.1420780-1-elver@google.com
---
 include/linux/compiler.h    |  5 +----
 lib/test_context-analysis.c | 11 +++++++++++
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index af16624b29fd..cb2f6050bdf7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -149,10 +149,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 #endif
 
 #ifndef RELOC_HIDE
-# define RELOC_HIDE(ptr, off)					\
-  ({ unsigned long __ptr;					\
-     __ptr = (unsigned long) (ptr);				\
-    (typeof(ptr)) (__ptr + (off)); })
+# define RELOC_HIDE(ptr, off) ((typeof(ptr))((unsigned long)(ptr) + (off)))
 #endif
 
 #define absolute_pointer(val)	RELOC_HIDE((void *)(val), 0)
diff --git a/lib/test_context-analysis.c b/lib/test_context-analysis.c
index 140efa8a9763..06b4a6a028e0 100644
--- a/lib/test_context-analysis.c
+++ b/lib/test_context-analysis.c
@@ -596,3 +596,14 @@ static void __used test_ww_mutex_lock_ctx(struct test_ww_mutex_data *d)
 
 	ww_mutex_destroy(&d->mtx);
 }
+
+static DEFINE_PER_CPU(raw_spinlock_t, test_per_cpu_lock);
+
+static void __used test_per_cpu(int cpu)
+{
+	raw_spin_lock(&per_cpu(test_per_cpu_lock, cpu));
+	raw_spin_unlock(&per_cpu(test_per_cpu_lock, cpu));
+
+	raw_spin_lock(per_cpu_ptr(&test_per_cpu_lock, cpu));
+	raw_spin_unlock(per_cpu_ptr(&test_per_cpu_lock, cpu));
+}
-- 
cgit v1.2.3


From a02327413acc141a887fe77b89656e88bcc4f412 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Sat, 14 Mar 2026 14:45:55 -0700
Subject: bpf: Remove inclusions of crypto/sha1.h

Since commit 603b44162325 ("bpf: Update the bpf_prog_calc_tag to use
SHA256") made BPF program tags use SHA-256 instead of SHA-1, the header
<crypto/sha1.h> no longer needs to be included.  Remove the relevant
inclusions so that they no longer unnecessarily come up in searches for
which kernel code is still using the obsolete SHA-1 algorithm.

Since net/ipv6/addrconf.c was relying on the transitive inclusion of
<crypto/sha1.h> (for an unrelated purpose) via <linux/filter.h>, make it
include <crypto/sha1.h> explicitly in order to keep that file building.

Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Acked-by: Paul Chaignon <paul.chaignon@gmail.com>
Link: https://lore.kernel.org/r/20260314214555.112386-1-ebiggers@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 1 -
 kernel/bpf/core.c      | 1 -
 net/ipv6/addrconf.c    | 1 +
 3 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 44d7ae95ddbc..e40d4071a345 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -21,7 +21,6 @@
 #include <linux/if_vlan.h>
 #include <linux/vmalloc.h>
 #include <linux/sockptr.h>
-#include <crypto/sha1.h>
 #include <linux/u64_stats_sync.h>
 
 #include <net/sch_generic.h>
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 67eb12b637a5..1af5fb3f21d9 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -18,7 +18,6 @@
  */
 
 #include <uapi/linux/btf.h>
-#include <crypto/sha1.h>
 #include <linux/filter.h>
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0e55f139e05d..04c1e856bf7f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -36,6 +36,7 @@
 
 #define pr_fmt(fmt) "IPv6: " fmt
 
+#include <crypto/sha1.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-- 
cgit v1.2.3


From bd882ffdd48a200ca2faa7c3e690ecf765784b16 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Mon, 23 Mar 2026 16:38:32 +0800
Subject: f2fs: call f2fs_handle_critical_error() to set cp_error flag

f2fs_handle_page_eio() is the only left place we set CP_ERROR_FLAG
directly, it missed to update superblock.s_stop_reason, let's
call f2fs_handle_critical_error() instead to fix that.

Introduce STOP_CP_REASON_READ_{META,NODE,DATA} stop_cp_reason enum
variable to indicate which kind of data we failed to read.

Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h          | 21 +++++++++++++++++++--
 include/linux/f2fs_fs.h |  3 +++
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8942b2a63cfd..931f8394bb18 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -5070,8 +5070,25 @@ static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi,
 		return;
 
 	if (ofs == sbi->page_eio_ofs[type]) {
-		if (sbi->page_eio_cnt[type]++ == MAX_RETRY_PAGE_EIO)
-			set_ckpt_flags(sbi, CP_ERROR_FLAG);
+		if (sbi->page_eio_cnt[type]++ == MAX_RETRY_PAGE_EIO) {
+			enum stop_cp_reason stop_reason;
+
+			switch (type) {
+			case META:
+				stop_reason = STOP_CP_REASON_READ_META;
+				break;
+			case NODE:
+				stop_reason = STOP_CP_REASON_READ_NODE;
+				break;
+			case DATA:
+				stop_reason = STOP_CP_REASON_READ_DATA;
+				break;
+			default:
+				f2fs_bug_on(sbi, 1);
+				return;
+			}
+			f2fs_handle_critical_error(sbi, stop_reason);
+		}
 	} else {
 		sbi->page_eio_ofs[type] = ofs;
 		sbi->page_eio_cnt[type] = 0;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index dc41722fcc9d..829a59399dac 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -80,6 +80,9 @@ enum stop_cp_reason {
 	STOP_CP_REASON_NO_SEGMENT,
 	STOP_CP_REASON_CORRUPTED_FREE_BITMAP,
 	STOP_CP_REASON_CORRUPTED_NID,
+	STOP_CP_REASON_READ_META,
+	STOP_CP_REASON_READ_NODE,
+	STOP_CP_REASON_READ_DATA,
 	STOP_CP_REASON_MAX,
 };
 
-- 
cgit v1.2.3


From 8988913aacee82e5401bf3b96839731982dcbde7 Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Date: Thu, 5 Mar 2026 10:31:38 +0100
Subject: module: Drop unused signature types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only PKCS#7 signatures are used today.

Remove the unused enum values. As this enum is used in on-disk data,
preserve the numeric value.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Reviewed-by: Nicolas Schier <nsc@kernel.org>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 include/linux/module_signature.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module_signature.h b/include/linux/module_signature.h
index 7eb4b00381ac..820cc1473383 100644
--- a/include/linux/module_signature.h
+++ b/include/linux/module_signature.h
@@ -15,9 +15,7 @@
 #define MODULE_SIG_STRING "~Module signature appended~\n"
 
 enum pkey_id_type {
-	PKEY_ID_PGP,		/* OpenPGP generated key ID */
-	PKEY_ID_X509,		/* X.509 arbitrary subjectKeyIdentifier */
-	PKEY_ID_PKCS7,		/* Signature in PKCS#7 message */
+	PKEY_ID_PKCS7 = 2,	/* Signature in PKCS#7 message */
 };
 
 /*
-- 
cgit v1.2.3


From acd87264af525dba6e9355310e8acdf066a5f6b5 Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Date: Thu, 5 Mar 2026 10:31:39 +0100
Subject: module: Give 'enum pkey_id_type' a more specific name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This enum originates in generic cryptographic code and has a very
generic name. Nowadays it is only used for module signatures.

As this enum is going to be exposed in a UAPI header, give it a more
specific name for clarity and consistency.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Reviewed-by: Nicolas Schier <nsc@kernel.org>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 arch/s390/kernel/machine_kexec_file.c | 2 +-
 include/linux/module_signature.h      | 6 +++---
 kernel/module_signature.c             | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 1bf59c3f0e2b..667ee9279e23 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -53,7 +53,7 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
 		return -EKEYREJECTED;
 	kernel_len -= sig_len;
 
-	if (ms->id_type != PKEY_ID_PKCS7)
+	if (ms->id_type != MODULE_SIGNATURE_TYPE_PKCS7)
 		return -EKEYREJECTED;
 
 	if (ms->algo != 0 ||
diff --git a/include/linux/module_signature.h b/include/linux/module_signature.h
index 820cc1473383..c3a05d4cfe67 100644
--- a/include/linux/module_signature.h
+++ b/include/linux/module_signature.h
@@ -14,8 +14,8 @@
 /* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
 #define MODULE_SIG_STRING "~Module signature appended~\n"
 
-enum pkey_id_type {
-	PKEY_ID_PKCS7 = 2,	/* Signature in PKCS#7 message */
+enum module_signature_type {
+	MODULE_SIGNATURE_TYPE_PKCS7 = 2,	/* Signature in PKCS#7 message */
 };
 
 /*
@@ -31,7 +31,7 @@ enum pkey_id_type {
 struct module_signature {
 	u8	algo;		/* Public-key crypto algorithm [0] */
 	u8	hash;		/* Digest algorithm [0] */
-	u8	id_type;	/* Key identifier type [PKEY_ID_PKCS7] */
+	u8	id_type;	/* Key identifier type [enum module_signature_type] */
 	u8	signer_len;	/* Length of signer's name [0] */
 	u8	key_id_len;	/* Length of key identifier [0] */
 	u8	__pad[3];
diff --git a/kernel/module_signature.c b/kernel/module_signature.c
index 00132d12487c..a0eee2fe4368 100644
--- a/kernel/module_signature.c
+++ b/kernel/module_signature.c
@@ -24,7 +24,7 @@ int mod_check_sig(const struct module_signature *ms, size_t file_len,
 	if (be32_to_cpu(ms->sig_len) >= file_len - sizeof(*ms))
 		return -EBADMSG;
 
-	if (ms->id_type != PKEY_ID_PKCS7) {
+	if (ms->id_type != MODULE_SIGNATURE_TYPE_PKCS7) {
 		pr_err("%s: not signed with expected PKCS#7 message\n",
 		       name);
 		return -ENOPKG;
-- 
cgit v1.2.3


From 2ae4ea2d9aaf25cb74fbc23450b1b8f0a5b7aa89 Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Date: Thu, 5 Mar 2026 10:31:40 +0100
Subject: module: Give MODULE_SIG_STRING a more descriptive name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The purpose of the constant it is not entirely clear from its name.

As this constant is going to be exposed in a UAPI header, give it a more
specific name for clarity. As all its users call it 'marker', use that
wording in the constant itself.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Reviewed-by: Nicolas Schier <nsc@kernel.org>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 arch/s390/kernel/machine_kexec_file.c | 4 ++--
 include/linux/module_signature.h      | 2 +-
 kernel/module/signing.c               | 4 ++--
 security/integrity/ima/ima_modsig.c   | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 667ee9279e23..6f0852d5a3a9 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -28,7 +28,7 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
 #ifdef CONFIG_KEXEC_SIG
 int s390_verify_sig(const char *kernel, unsigned long kernel_len)
 {
-	const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
+	const unsigned long marker_len = sizeof(MODULE_SIGNATURE_MARKER) - 1;
 	struct module_signature *ms;
 	unsigned long sig_len;
 	int ret;
@@ -40,7 +40,7 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
 	if (marker_len > kernel_len)
 		return -EKEYREJECTED;
 
-	if (memcmp(kernel + kernel_len - marker_len, MODULE_SIG_STRING,
+	if (memcmp(kernel + kernel_len - marker_len, MODULE_SIGNATURE_MARKER,
 		   marker_len))
 		return -EKEYREJECTED;
 	kernel_len -= marker_len;
diff --git a/include/linux/module_signature.h b/include/linux/module_signature.h
index c3a05d4cfe67..915549c779dc 100644
--- a/include/linux/module_signature.h
+++ b/include/linux/module_signature.h
@@ -12,7 +12,7 @@
 #include <linux/types.h>
 
 /* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
-#define MODULE_SIG_STRING "~Module signature appended~\n"
+#define MODULE_SIGNATURE_MARKER "~Module signature appended~\n"
 
 enum module_signature_type {
 	MODULE_SIGNATURE_TYPE_PKCS7 = 2,	/* Signature in PKCS#7 message */
diff --git a/kernel/module/signing.c b/kernel/module/signing.c
index a2ff4242e623..590ba29c85ab 100644
--- a/kernel/module/signing.c
+++ b/kernel/module/signing.c
@@ -70,7 +70,7 @@ int mod_verify_sig(const void *mod, struct load_info *info)
 int module_sig_check(struct load_info *info, int flags)
 {
 	int err = -ENODATA;
-	const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
+	const unsigned long markerlen = sizeof(MODULE_SIGNATURE_MARKER) - 1;
 	const char *reason;
 	const void *mod = info->hdr;
 	bool mangled_module = flags & (MODULE_INIT_IGNORE_MODVERSIONS |
@@ -81,7 +81,7 @@ int module_sig_check(struct load_info *info, int flags)
 	 */
 	if (!mangled_module &&
 	    info->len > markerlen &&
-	    memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
+	    memcmp(mod + info->len - markerlen, MODULE_SIGNATURE_MARKER, markerlen) == 0) {
 		/* We truncate the module to discard the signature */
 		info->len -= markerlen;
 		err = mod_verify_sig(mod, info);
diff --git a/security/integrity/ima/ima_modsig.c b/security/integrity/ima/ima_modsig.c
index 9aa92fd35a03..632c746fd81e 100644
--- a/security/integrity/ima/ima_modsig.c
+++ b/security/integrity/ima/ima_modsig.c
@@ -40,7 +40,7 @@ struct modsig {
 int ima_read_modsig(enum ima_hooks func, const void *buf, loff_t buf_len,
 		    struct modsig **modsig)
 {
-	const size_t marker_len = strlen(MODULE_SIG_STRING);
+	const size_t marker_len = strlen(MODULE_SIGNATURE_MARKER);
 	const struct module_signature *sig;
 	struct modsig *hdr;
 	size_t sig_len;
@@ -51,7 +51,7 @@ int ima_read_modsig(enum ima_hooks func, const void *buf, loff_t buf_len,
 		return -ENOENT;
 
 	p = buf + buf_len - marker_len;
-	if (memcmp(p, MODULE_SIG_STRING, marker_len))
+	if (memcmp(p, MODULE_SIGNATURE_MARKER, marker_len))
 		return -ENOENT;
 
 	buf_len -= marker_len;
@@ -105,7 +105,7 @@ void ima_collect_modsig(struct modsig *modsig, const void *buf, loff_t size)
 	 * Provide the file contents (minus the appended sig) so that the PKCS7
 	 * code can calculate the file hash.
 	 */
-	size -= modsig->raw_pkcs7_len + strlen(MODULE_SIG_STRING) +
+	size -= modsig->raw_pkcs7_len + strlen(MODULE_SIGNATURE_MARKER) +
 		sizeof(struct module_signature);
 	rc = pkcs7_supply_detached_data(modsig->pkcs7_msg, buf, size);
 	if (rc)
-- 
cgit v1.2.3


From f9909cf0a2dcc9e99377f3fcc965ccd93e518e34 Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Date: Thu, 5 Mar 2026 10:31:41 +0100
Subject: module: Move 'struct module_signature' to UAPI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This structure definition is used outside the kernel proper.
For example in kmod and the kernel build environment.

To allow reuse, move it to a new UAPI header.

While it is not a true UAPI, it is a common practice to have
non-UAPI interface definitions in the kernel's UAPI headers.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Reviewed-by: Nicolas Schier <nsc@kernel.org>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 include/linux/module_signature.h      | 28 +-----------------------
 include/uapi/linux/module_signature.h | 41 +++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 27 deletions(-)
 create mode 100644 include/uapi/linux/module_signature.h

(limited to 'include/linux')

diff --git a/include/linux/module_signature.h b/include/linux/module_signature.h
index 915549c779dc..db335d46787f 100644
--- a/include/linux/module_signature.h
+++ b/include/linux/module_signature.h
@@ -10,33 +10,7 @@
 #define _LINUX_MODULE_SIGNATURE_H
 
 #include <linux/types.h>
-
-/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
-#define MODULE_SIGNATURE_MARKER "~Module signature appended~\n"
-
-enum module_signature_type {
-	MODULE_SIGNATURE_TYPE_PKCS7 = 2,	/* Signature in PKCS#7 message */
-};
-
-/*
- * Module signature information block.
- *
- * The constituents of the signature section are, in order:
- *
- *	- Signer's name
- *	- Key identifier
- *	- Signature data
- *	- Information block
- */
-struct module_signature {
-	u8	algo;		/* Public-key crypto algorithm [0] */
-	u8	hash;		/* Digest algorithm [0] */
-	u8	id_type;	/* Key identifier type [enum module_signature_type] */
-	u8	signer_len;	/* Length of signer's name [0] */
-	u8	key_id_len;	/* Length of key identifier [0] */
-	u8	__pad[3];
-	__be32	sig_len;	/* Length of signature data */
-};
+#include <uapi/linux/module_signature.h>
 
 int mod_check_sig(const struct module_signature *ms, size_t file_len,
 		  const char *name);
diff --git a/include/uapi/linux/module_signature.h b/include/uapi/linux/module_signature.h
new file mode 100644
index 000000000000..634c9f1c8fc2
--- /dev/null
+++ b/include/uapi/linux/module_signature.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Module signature handling.
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef _UAPI_LINUX_MODULE_SIGNATURE_H
+#define _UAPI_LINUX_MODULE_SIGNATURE_H
+
+#include <linux/types.h>
+
+/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
+#define MODULE_SIGNATURE_MARKER "~Module signature appended~\n"
+
+enum module_signature_type {
+	MODULE_SIGNATURE_TYPE_PKCS7 = 2,	/* Signature in PKCS#7 message */
+};
+
+/*
+ * Module signature information block.
+ *
+ * The constituents of the signature section are, in order:
+ *
+ *	- Signer's name
+ *	- Key identifier
+ *	- Signature data
+ *	- Information block
+ */
+struct module_signature {
+	__u8	algo;		/* Public-key crypto algorithm [0] */
+	__u8	hash;		/* Digest algorithm [0] */
+	__u8	id_type;	/* Key identifier type [enum module_signature_type] */
+	__u8	signer_len;	/* Length of signer's name [0] */
+	__u8	key_id_len;	/* Length of key identifier [0] */
+	__u8	__pad[3];
+	__be32	sig_len;	/* Length of signature data */
+};
+
+#endif /* _UAPI_LINUX_MODULE_SIGNATURE_H */
-- 
cgit v1.2.3


From c291cfac49a67debdad766eedc450ef613f41b2d Mon Sep 17 00:00:00 2001
From: Kit Dallege <xaum.io@gmail.com>
Date: Sun, 15 Mar 2026 18:09:41 +0100
Subject: entry: Add missing kernel-doc for arch_ptrace_report_syscall
 functions

Document @regs and @step parameters for arch_ptrace_report_syscall_entry()
and arch_ptrace_report_syscall_exit() that were missing from the kernel-doc
comments.

Signed-off-by: Kit Dallege <xaum.io@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Assisted-by: Claude:claude-opus-4-6
Link: https://patch.msgid.link/20260315170941.65913-1-xaum.io@gmail.com
---
 include/linux/entry-common.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index f83ca0abf2cd..d223246401bc 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -48,6 +48,7 @@
 
 /**
  * arch_ptrace_report_syscall_entry - Architecture specific ptrace_report_syscall_entry() wrapper
+ * @regs: Pointer to the register state at syscall entry
  *
  * Invoked from syscall_trace_enter() to wrap ptrace_report_syscall_entry().
  *
@@ -205,6 +206,8 @@ static __always_inline bool report_single_step(unsigned long work)
 
 /**
  * arch_ptrace_report_syscall_exit - Architecture specific ptrace_report_syscall_exit()
+ * @regs: Pointer to the register state at syscall exit
+ * @step: Indicates a single-step exit rather than a normal syscall exit
  *
  * This allows architecture specific ptrace_report_syscall_exit()
  * implementations. If not defined by the architecture this falls back to
-- 
cgit v1.2.3


From 8add6d87dc69c0620c7e60bdc6be6b3b0092d9fa Mon Sep 17 00:00:00 2001
From: Xuyang Dong <dongxuyang@eswincomputing.com>
Date: Tue, 3 Mar 2026 16:06:55 +0800
Subject: clk: divider: Add devm_clk_hw_register_divider_parent_data

Add the devres variant of clk_hw_register_divider_parent_data() for
registering a divider clock with parent clk data instead of parent
name.

Reviewed-by: Brian Masney <bmasney@redhat.com>
Signed-off-by: Xuyang Dong <dongxuyang@eswincomputing.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk-provider.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 630705a47129..64967ac1b1df 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -947,6 +947,26 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name,
 				       (parent_hw), NULL, (flags), (reg),     \
 				       (shift), (width), (clk_divider_flags), \
 				       NULL, (lock))
+/**
+ * devm_clk_hw_register_divider_parent_data - register a divider clock with the
+ * clock framework
+ * @dev: device registering this clock
+ * @name: name of this clock
+ * @parent_data: parent clk data
+ * @flags: framework-specific flags
+ * @reg: register address to adjust divider
+ * @shift: number of bits to shift the bitfield
+ * @width: width of the bitfield
+ * @clk_divider_flags: divider-specific flags for this clock
+ * @lock: shared register lock for this clock
+ */
+#define devm_clk_hw_register_divider_parent_data(dev, name, parent_data,       \
+						 flags, reg, shift, width,     \
+						 clk_divider_flags, lock)      \
+	__devm_clk_hw_register_divider((dev), NULL, (name), NULL, NULL,	       \
+				       (parent_data), (flags), (reg), (shift), \
+				       (width), (clk_divider_flags), NULL,     \
+				       (lock))
 /**
  * devm_clk_hw_register_divider_table - register a table based divider clock
  * with the clock framework (devres variant)
-- 
cgit v1.2.3


From 37beb42560165869838e7d91724f3e629db64129 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts@arm.com>
Date: Tue, 3 Mar 2026 15:08:38 +0000
Subject: randomize_kstack: Maintain kstack_offset per task

kstack_offset was previously maintained per-cpu, but this caused a
couple of issues. So let's instead make it per-task.

Issue 1: add_random_kstack_offset() and choose_random_kstack_offset()
expected and required to be called with interrupts and preemption
disabled so that it could manipulate per-cpu state. But arm64, loongarch
and risc-v are calling them with interrupts and preemption enabled. I
don't _think_ this causes any functional issues, but it's certainly
unexpected and could lead to manipulating the wrong cpu's state, which
could cause a minor performance degradation due to bouncing the cache
lines. By maintaining the state per-task those functions can safely be
called in preemptible context.

Issue 2: add_random_kstack_offset() is called before executing the
syscall and expands the stack using a previously chosen random offset.
choose_random_kstack_offset() is called after executing the syscall and
chooses and stores a new random offset for the next syscall. With
per-cpu storage for this offset, an attacker could force cpu migration
during the execution of the syscall and prevent the offset from being
updated for the original cpu such that it is predictable for the next
syscall on that cpu. By maintaining the state per-task, this problem
goes away because the per-task random offset is updated after the
syscall regardless of which cpu it is executing on.

Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall")
Closes: https://lore.kernel.org/all/dd8c37bc-795f-4c7a-9086-69e584d8ab24@arm.com/
Cc: stable@vger.kernel.org
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Link: https://patch.msgid.link/20260303150840.3789438-2-ryan.roberts@arm.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/randomize_kstack.h | 26 +++++++++++++++-----------
 include/linux/sched.h            |  4 ++++
 init/main.c                      |  1 -
 kernel/fork.c                    |  2 ++
 4 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index 1d982dbdd0d0..5d3916ca747c 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -9,7 +9,6 @@
 
 DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
 			 randomize_kstack_offset);
-DECLARE_PER_CPU(u32, kstack_offset);
 
 /*
  * Do not use this anywhere else in the kernel. This is used here because
@@ -50,15 +49,14 @@ DECLARE_PER_CPU(u32, kstack_offset);
  * add_random_kstack_offset - Increase stack utilization by previously
  *			      chosen random offset
  *
- * This should be used in the syscall entry path when interrupts and
- * preempt are disabled, and after user registers have been stored to
- * the stack. For testing the resulting entropy, please see:
- * tools/testing/selftests/lkdtm/stack-entropy.sh
+ * This should be used in the syscall entry path after user registers have been
+ * stored to the stack. Preemption may be enabled. For testing the resulting
+ * entropy, please see: tools/testing/selftests/lkdtm/stack-entropy.sh
  */
 #define add_random_kstack_offset() do {					\
 	if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,	\
 				&randomize_kstack_offset)) {		\
-		u32 offset = raw_cpu_read(kstack_offset);		\
+		u32 offset = current->kstack_offset;			\
 		u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset));	\
 		/* Keep allocation even after "ptr" loses scope. */	\
 		asm volatile("" :: "r"(ptr) : "memory");		\
@@ -69,9 +67,9 @@ DECLARE_PER_CPU(u32, kstack_offset);
  * choose_random_kstack_offset - Choose the random offset for the next
  *				 add_random_kstack_offset()
  *
- * This should only be used during syscall exit when interrupts and
- * preempt are disabled. This position in the syscall flow is done to
- * frustrate attacks from userspace attempting to learn the next offset:
+ * This should only be used during syscall exit. Preemption may be enabled. This
+ * position in the syscall flow is done to frustrate attacks from userspace
+ * attempting to learn the next offset:
  * - Maximize the timing uncertainty visible from userspace: if the
  *   offset is chosen at syscall entry, userspace has much more control
  *   over the timing between choosing offsets. "How long will we be in
@@ -85,14 +83,20 @@ DECLARE_PER_CPU(u32, kstack_offset);
 #define choose_random_kstack_offset(rand) do {				\
 	if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,	\
 				&randomize_kstack_offset)) {		\
-		u32 offset = raw_cpu_read(kstack_offset);		\
+		u32 offset = current->kstack_offset;			\
 		offset = ror32(offset, 5) ^ (rand);			\
-		raw_cpu_write(kstack_offset, offset);			\
+		current->kstack_offset = offset;			\
 	}								\
 } while (0)
+
+static inline void random_kstack_task_init(struct task_struct *tsk)
+{
+	tsk->kstack_offset = 0;
+}
 #else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
 #define add_random_kstack_offset()		do { } while (0)
 #define choose_random_kstack_offset(rand)	do { } while (0)
+#define random_kstack_task_init(tsk)		do { } while (0)
 #endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
 
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a7b4a980eb2f..8358e430dd7f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1592,6 +1592,10 @@ struct task_struct {
 	unsigned long			prev_lowest_stack;
 #endif
 
+#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
+	u32				kstack_offset;
+#endif
+
 #ifdef CONFIG_X86_MCE
 	void __user			*mce_vaddr;
 	__u64				mce_kflags;
diff --git a/init/main.c b/init/main.c
index 1cb395dd94e4..0a1d8529212e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -833,7 +833,6 @@ static inline void initcall_debug_enable(void)
 #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
 DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
 			   randomize_kstack_offset);
-DEFINE_PER_CPU(u32, kstack_offset);
 
 static int __init early_randomize_kstack_offset(char *buf)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 65113a304518..5715adeb6adf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -95,6 +95,7 @@
 #include <linux/thread_info.h>
 #include <linux/kstack_erase.h>
 #include <linux/kasan.h>
+#include <linux/randomize_kstack.h>
 #include <linux/scs.h>
 #include <linux/io_uring.h>
 #include <linux/io_uring_types.h>
@@ -2233,6 +2234,7 @@ __latent_entropy struct task_struct *copy_process(
 	if (retval)
 		goto bad_fork_cleanup_io;
 
+	random_kstack_task_init(p);
 	stackleak_task_init(p);
 
 	if (pid != &init_struct_pid) {
-- 
cgit v1.2.3


From a96ef5848cb096226bf6aff31a90d8b136d99b71 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts@arm.com>
Date: Tue, 3 Mar 2026 15:08:39 +0000
Subject: randomize_kstack: Unify random source across arches

Previously different architectures were using random sources of
differing strength and cost to decide the random kstack offset. A number
of architectures (loongarch, powerpc, s390, x86) were using their
timestamp counter, at whatever the frequency happened to be. Other
arches (arm64, riscv) were using entropy from the crng via
get_random_u16().

There have been concerns that in some cases the timestamp counters may
be too weak, because they can be easily guessed or influenced by user
space. And get_random_u16() has been shown to be too costly for the
level of protection kstack offset randomization provides.

So let's use a common, architecture-agnostic source of entropy; a
per-cpu prng, seeded at boot-time from the crng. This has a few
benefits:

  - We can remove choose_random_kstack_offset(); That was only there to
    try to make the timestamp counter value a bit harder to influence
    from user space [*].

  - The architecture code is simplified. All it has to do now is call
    add_random_kstack_offset() in the syscall path.

  - The strength of the randomness can be reasoned about independently
    of the architecture.

  - Arches previously using get_random_u16() now have much faster
    syscall paths, see below results.

[*] Additionally, this gets rid of some redundant work on s390 and x86.
Before this patch, those architectures called
choose_random_kstack_offset() under arch_exit_to_user_mode_prepare(),
which is also called for exception returns to userspace which were *not*
syscalls (e.g. regular interrupts). Getting rid of
choose_random_kstack_offset() avoids a small amount of redundant work
for the non-syscall cases.

In some configurations, add_random_kstack_offset() will now call
instrumentable code, so for a couple of arches, I have moved the call a
bit later to the first point where instrumentation is allowed. This
doesn't impact the efficacy of the mechanism.

There have been some claims that a prng may be less strong than the
timestamp counter if not regularly reseeded. But the prng has a period
of about 2^113. So as long as the prng state remains secret, it should
not be possible to guess. If the prng state can be accessed, we have
bigger problems.

Additionally, we are only consuming 6 bits to randomize the stack, so
there are only 64 possible random offsets. I assert that it would be
trivial for an attacker to brute force by repeating their attack and
waiting for the random stack offset to be the desired one. The prng
approach seems entirely proportional to this level of protection.

Performance data are provided below. The baseline is v6.18 with rndstack
on for each respective arch. (I)/(R) indicate statistically significant
improvement/regression. arm64 platform is AWS Graviton3 (m7g.metal).
x86_64 platform is AWS Sapphire Rapids (m7i.24xlarge):

+-----------------+--------------+---------------+---------------+
| Benchmark       | Result Class |  per-cpu-prng |  per-cpu-prng |
|                 |              | arm64 (metal) |   x86_64 (VM) |
+=================+==============+===============+===============+
| syscall/getpid  | mean (ns)    |    (I) -9.50% |   (I) -17.65% |
|                 | p99 (ns)     |   (I) -59.24% |   (I) -24.41% |
|                 | p99.9 (ns)   |   (I) -59.52% |   (I) -28.52% |
+-----------------+--------------+---------------+---------------+
| syscall/getppid | mean (ns)    |    (I) -9.52% |   (I) -19.24% |
|                 | p99 (ns)     |   (I) -59.25% |   (I) -25.03% |
|                 | p99.9 (ns)   |   (I) -59.50% |   (I) -28.17% |
+-----------------+--------------+---------------+---------------+
| syscall/invalid | mean (ns)    |   (I) -10.31% |   (I) -18.56% |
|                 | p99 (ns)     |   (I) -60.79% |   (I) -20.06% |
|                 | p99.9 (ns)   |   (I) -61.04% |   (I) -25.04% |
+-----------------+--------------+---------------+---------------+

I tested an earlier version of this change on x86 bare metal and it
showed a smaller but still significant improvement. The bare metal
system wasn't available this time around so testing was done in a VM
instance. I'm guessing the cost of rdtsc is higher for VMs.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Link: https://patch.msgid.link/20260303150840.3789438-3-ryan.roberts@arm.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 arch/Kconfig                         |  5 ++--
 arch/arm64/kernel/syscall.c          | 11 --------
 arch/loongarch/kernel/syscall.c      | 11 --------
 arch/powerpc/kernel/syscall.c        | 16 ++---------
 arch/riscv/kernel/traps.c            | 12 ---------
 arch/s390/include/asm/entry-common.h |  8 ------
 arch/s390/kernel/syscall.c           |  2 +-
 arch/x86/entry/syscall_32.c          |  4 +--
 arch/x86/entry/syscall_64.c          |  2 +-
 arch/x86/include/asm/entry-common.h  | 12 ---------
 include/linux/randomize_kstack.h     | 52 ++++++++++++------------------------
 include/linux/sched.h                |  4 ---
 init/main.c                          |  8 ++++++
 kernel/fork.c                        |  1 -
 14 files changed, 33 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 102ddbd4298e..f134527ace10 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1519,9 +1519,8 @@ config HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
 	def_bool n
 	help
 	  An arch should select this symbol if it can support kernel stack
-	  offset randomization with calls to add_random_kstack_offset()
-	  during syscall entry and choose_random_kstack_offset() during
-	  syscall exit. Careful removal of -fstack-protector-strong and
+	  offset randomization with a call to add_random_kstack_offset()
+	  during syscall entry. Careful removal of -fstack-protector-strong and
 	  -fstack-protector should also be applied to the entry code and
 	  closely examined, as the artificial stack bump looks like an array
 	  to the compiler, so it will attempt to add canary checks regardless
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index c062badd1a56..358ddfbf1401 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -52,17 +52,6 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
 	}
 
 	syscall_set_return_value(current, regs, 0, ret);
-
-	/*
-	 * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
-	 * bits. The actual entropy will be further reduced by the compiler
-	 * when applying stack alignment constraints: the AAPCS mandates a
-	 * 16-byte aligned SP at function boundaries, which will remove the
-	 * 4 low bits from any entropy chosen here.
-	 *
-	 * The resulting 6 bits of entropy is seen in SP[9:4].
-	 */
-	choose_random_kstack_offset(get_random_u16());
 }
 
 static inline bool has_syscall_work(unsigned long flags)
diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c
index 1249d82c1cd0..85da7e050d97 100644
--- a/arch/loongarch/kernel/syscall.c
+++ b/arch/loongarch/kernel/syscall.c
@@ -79,16 +79,5 @@ void noinstr __no_stack_protector do_syscall(struct pt_regs *regs)
 					   regs->regs[7], regs->regs[8], regs->regs[9]);
 	}
 
-	/*
-	 * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
-	 * bits. The actual entropy will be further reduced by the compiler
-	 * when applying stack alignment constraints: 16-bytes (i.e. 4-bits)
-	 * aligned, which will remove the 4 low bits from any entropy chosen
-	 * here.
-	 *
-	 * The resulting 6 bits of entropy is seen in SP[9:4].
-	 */
-	choose_random_kstack_offset(get_cycles());
-
 	syscall_exit_to_user_mode(regs);
 }
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index be159ad4b77b..b762677f8737 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -20,8 +20,6 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
 
 	kuap_lock();
 
-	add_random_kstack_offset();
-
 	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
 		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
 
@@ -30,6 +28,8 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
 	CT_WARN_ON(ct_state() == CT_STATE_KERNEL);
 	user_exit_irqoff();
 
+	add_random_kstack_offset();
+
 	BUG_ON(regs_is_unrecoverable(regs));
 	BUG_ON(!user_mode(regs));
 	BUG_ON(arch_irq_disabled_regs(regs));
@@ -173,17 +173,5 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
 	}
 #endif
 
-	/*
-	 * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
-	 * so the maximum stack offset is 1k bytes (10 bits).
-	 *
-	 * The actual entropy will be further reduced by the compiler when
-	 * applying stack alignment constraints: the powerpc architecture
-	 * may have two kinds of stack alignment (16-bytes and 8-bytes).
-	 *
-	 * So the resulting 6 or 7 bits of entropy is seen in SP[9:4] or SP[9:3].
-	 */
-	choose_random_kstack_offset(mftb());
-
 	return ret;
 }
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 5fb57fad188a..461279a7bd86 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -344,18 +344,6 @@ void do_trap_ecall_u(struct pt_regs *regs)
 			syscall_handler(regs, syscall);
 		}
 
-		/*
-		 * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
-		 * so the maximum stack offset is 1k bytes (10 bits).
-		 *
-		 * The actual entropy will be further reduced by the compiler when
-		 * applying stack alignment constraints: 16-byte (i.e. 4-bit) aligned
-		 * for RV32I or RV64I.
-		 *
-		 * The resulting 6 bits of entropy is seen in SP[9:4].
-		 */
-		choose_random_kstack_offset(get_random_u16());
-
 		syscall_exit_to_user_mode(regs);
 	} else {
 		irqentry_state_t state = irqentry_nmi_enter(regs);
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index 979af986a8fe..35450a485323 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -51,14 +51,6 @@ static __always_inline void arch_exit_to_user_mode(void)
 
 #define arch_exit_to_user_mode arch_exit_to_user_mode
 
-static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
-						  unsigned long ti_work)
-{
-	choose_random_kstack_offset(get_tod_clock_fast());
-}
-
-#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
-
 static __always_inline bool arch_in_rcu_eqs(void)
 {
 	if (IS_ENABLED(CONFIG_KVM))
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
index 795b6cca74c9..1cf49af74a1d 100644
--- a/arch/s390/kernel/syscall.c
+++ b/arch/s390/kernel/syscall.c
@@ -96,8 +96,8 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
 {
 	unsigned long nr;
 
-	add_random_kstack_offset();
 	enter_from_user_mode(regs);
+	add_random_kstack_offset();
 	regs->psw = get_lowcore()->svc_old_psw;
 	regs->int_code = get_lowcore()->svc_int_code;
 	update_timer_sys();
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 8e829575e12f..31b9492fe851 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -247,7 +247,6 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
 {
 	int nr = syscall_32_enter(regs);
 
-	add_random_kstack_offset();
 	/*
 	 * Subtlety here: if ptrace pokes something larger than 2^31-1 into
 	 * orig_ax, the int return value truncates it. This matches
@@ -256,6 +255,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
 	nr = syscall_enter_from_user_mode(regs, nr);
 	instrumentation_begin();
 
+	add_random_kstack_offset();
 	do_syscall_32_irqs_on(regs, nr);
 
 	instrumentation_end();
@@ -268,7 +268,6 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
 	int nr = syscall_32_enter(regs);
 	int res;
 
-	add_random_kstack_offset();
 	/*
 	 * This cannot use syscall_enter_from_user_mode() as it has to
 	 * fetch EBP before invoking any of the syscall entry work
@@ -277,6 +276,7 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
 	enter_from_user_mode(regs);
 
 	instrumentation_begin();
+	add_random_kstack_offset();
 	local_irq_enable();
 	/* Fetch EBP from where the vDSO stashed it. */
 	if (IS_ENABLED(CONFIG_X86_64)) {
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index b6e68ea98b83..71f032504e73 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -86,10 +86,10 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
 /* Returns true to return using SYSRET, or false to use IRET */
 __visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
 {
-	add_random_kstack_offset();
 	nr = syscall_enter_from_user_mode(regs, nr);
 
 	instrumentation_begin();
+	add_random_kstack_offset();
 
 	if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
 		/* Invalid system call, but still a system call. */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index ce3eb6d5fdf9..7535131c711b 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -82,18 +82,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
 	current_thread_info()->status &= ~(TS_COMPAT | TS_I386_REGS_POKED);
 #endif
 
-	/*
-	 * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
-	 * bits. The actual entropy will be further reduced by the compiler
-	 * when applying stack alignment constraints (see cc_stack_align4/8 in
-	 * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
-	 * low bits from any entropy chosen here.
-	 *
-	 * Therefore, final stack offset entropy will be 7 (x86_64) or
-	 * 8 (ia32) bits.
-	 */
-	choose_random_kstack_offset(rdtsc());
-
 	/* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */
 	if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) &&
 	    this_cpu_read(x86_ibpb_exit_to_user)) {
diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index 5d3916ca747c..024fc20e7762 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/jump_label.h>
 #include <linux/percpu-defs.h>
+#include <linux/prandom.h>
 
 DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
 			 randomize_kstack_offset);
@@ -45,9 +46,22 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
 #define KSTACK_OFFSET_MAX(x)	((x) & 0b1111111100)
 #endif
 
+DECLARE_PER_CPU(struct rnd_state, kstack_rnd_state);
+
+static __always_inline u32 get_kstack_offset(void)
+{
+	struct rnd_state *state;
+	u32 rnd;
+
+	state = &get_cpu_var(kstack_rnd_state);
+	rnd = prandom_u32_state(state);
+	put_cpu_var(kstack_rnd_state);
+
+	return rnd;
+}
+
 /**
- * add_random_kstack_offset - Increase stack utilization by previously
- *			      chosen random offset
+ * add_random_kstack_offset - Increase stack utilization by a random offset.
  *
  * This should be used in the syscall entry path after user registers have been
  * stored to the stack. Preemption may be enabled. For testing the resulting
@@ -56,47 +70,15 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
 #define add_random_kstack_offset() do {					\
 	if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,	\
 				&randomize_kstack_offset)) {		\
-		u32 offset = current->kstack_offset;			\
+		u32 offset = get_kstack_offset();			\
 		u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset));	\
 		/* Keep allocation even after "ptr" loses scope. */	\
 		asm volatile("" :: "r"(ptr) : "memory");		\
 	}								\
 } while (0)
 
-/**
- * choose_random_kstack_offset - Choose the random offset for the next
- *				 add_random_kstack_offset()
- *
- * This should only be used during syscall exit. Preemption may be enabled. This
- * position in the syscall flow is done to frustrate attacks from userspace
- * attempting to learn the next offset:
- * - Maximize the timing uncertainty visible from userspace: if the
- *   offset is chosen at syscall entry, userspace has much more control
- *   over the timing between choosing offsets. "How long will we be in
- *   kernel mode?" tends to be more difficult to predict than "how long
- *   will we be in user mode?"
- * - Reduce the lifetime of the new offset sitting in memory during
- *   kernel mode execution. Exposure of "thread-local" memory content
- *   (e.g. current, percpu, etc) tends to be easier than arbitrary
- *   location memory exposure.
- */
-#define choose_random_kstack_offset(rand) do {				\
-	if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,	\
-				&randomize_kstack_offset)) {		\
-		u32 offset = current->kstack_offset;			\
-		offset = ror32(offset, 5) ^ (rand);			\
-		current->kstack_offset = offset;			\
-	}								\
-} while (0)
-
-static inline void random_kstack_task_init(struct task_struct *tsk)
-{
-	tsk->kstack_offset = 0;
-}
 #else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
 #define add_random_kstack_offset()		do { } while (0)
-#define choose_random_kstack_offset(rand)	do { } while (0)
-#define random_kstack_task_init(tsk)		do { } while (0)
 #endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
 
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8358e430dd7f..a7b4a980eb2f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1592,10 +1592,6 @@ struct task_struct {
 	unsigned long			prev_lowest_stack;
 #endif
 
-#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
-	u32				kstack_offset;
-#endif
-
 #ifdef CONFIG_X86_MCE
 	void __user			*mce_vaddr;
 	__u64				mce_kflags;
diff --git a/init/main.c b/init/main.c
index 0a1d8529212e..c9638a6946dc 100644
--- a/init/main.c
+++ b/init/main.c
@@ -833,6 +833,14 @@ static inline void initcall_debug_enable(void)
 #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
 DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
 			   randomize_kstack_offset);
+DEFINE_PER_CPU(struct rnd_state, kstack_rnd_state);
+
+static int __init random_kstack_init(void)
+{
+	prandom_seed_full_state(&kstack_rnd_state);
+	return 0;
+}
+late_initcall(random_kstack_init);
 
 static int __init early_randomize_kstack_offset(char *buf)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 5715adeb6adf..1f738c28ca07 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2234,7 +2234,6 @@ __latent_entropy struct task_struct *copy_process(
 	if (retval)
 		goto bad_fork_cleanup_io;
 
-	random_kstack_task_init(p);
 	stackleak_task_init(p);
 
 	if (pid != &init_struct_pid) {
-- 
cgit v1.2.3


From 2c8fe1f14240d75f2002e16b2b69c5c2d27ed41c Mon Sep 17 00:00:00 2001
From: Oliver Collyer <ovcollyer@mac.com>
Date: Fri, 26 Dec 2025 06:57:18 +0000
Subject: media: uvcvideo: Add support for P010 pixel format

Add support for the P010 (10-bit Y/UV 4:2:0) pixel format to the
uvcvideo driver. This format is exposed by USB capture devices such as
the Magewell USB Capture HDMI 4K Pro when capturing HDR10 content.

P010 stores 10-bit Y and interleaved UV samples in 16-bit little-endian
words, with data in the upper 10 bits and zeros in the lower 6 bits.
This requires 2 bytes per sample, so bytesperline is wWidth * 2.

V4L2_PIX_FMT_P010 was added to the V4L2 core in commit 5374d8fb75f3
("media: Add P010 video format").

Based on the community DKMS patch from awawa-dev/P010_for_V4L2.

Link: https://github.com/awawa-dev/P010_for_V4L2
Signed-off-by: Oliver Collyer <ovcollyer@mac.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Link: https://patch.msgid.link/20251226065718.95504-1-ovcollyer@mac.com
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil+cisco@kernel.org>
---
 drivers/media/common/uvc.c       | 4 ++++
 drivers/media/usb/uvc/uvc_v4l2.c | 3 +++
 include/linux/usb/uvc.h          | 3 +++
 3 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/media/common/uvc.c b/drivers/media/common/uvc.c
index 1ad4604474ac..0107cedd031e 100644
--- a/drivers/media/common/uvc.c
+++ b/drivers/media/common/uvc.c
@@ -40,6 +40,10 @@ static const struct uvc_format_desc uvc_fmts[] = {
 		.guid		= UVC_GUID_FORMAT_M420,
 		.fcc		= V4L2_PIX_FMT_M420,
 	},
+	{
+		.guid		= UVC_GUID_FORMAT_P010,
+		.fcc		= V4L2_PIX_FMT_P010,
+	},
 	{
 		.guid		= UVC_GUID_FORMAT_UYVY,
 		.fcc		= V4L2_PIX_FMT_UYVY,
diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c
index d5860661c115..cda1697204ea 100644
--- a/drivers/media/usb/uvc/uvc_v4l2.c
+++ b/drivers/media/usb/uvc/uvc_v4l2.c
@@ -230,6 +230,9 @@ static u32 uvc_v4l2_get_bytesperline(const struct uvc_format *format,
 	case V4L2_PIX_FMT_M420:
 		return frame->wWidth;
 
+	case V4L2_PIX_FMT_P010:
+		return frame->wWidth * 2;
+
 	default:
 		return format->bpp * frame->wWidth / 8;
 	}
diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h
index ea92ac623a45..05bfebab42b6 100644
--- a/include/linux/usb/uvc.h
+++ b/include/linux/usb/uvc.h
@@ -138,6 +138,9 @@
 #define UVC_GUID_FORMAT_M420 \
 	{ 'M',  '4',  '2',  '0', 0x00, 0x00, 0x10, 0x00, \
 	 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}
+#define UVC_GUID_FORMAT_P010 \
+	{ 'P',  '0',  '1',  '0', 0x00, 0x00, 0x10, 0x00, \
+	 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}
 
 #define UVC_GUID_FORMAT_H264 \
 	{ 'H',  '2',  '6',  '4', 0x00, 0x00, 0x10, 0x00, \
-- 
cgit v1.2.3


From e93ab401da4b2e2c1b8ef2424de2f238d51c8b2d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 27 Feb 2026 14:22:16 +0100
Subject: quota: Fix race of dquot_scan_active() with quota deactivation

dquot_scan_active() can race with quota deactivation in
quota_release_workfn() like:

  CPU0 (quota_release_workfn)         CPU1 (dquot_scan_active)
  ==============================      ==============================
  spin_lock(&dq_list_lock);
  list_replace_init(
    &releasing_dquots, &rls_head);
    /* dquot X on rls_head,
       dq_count == 0,
       DQ_ACTIVE_B still set */
  spin_unlock(&dq_list_lock);
  synchronize_srcu(&dquot_srcu);
                                      spin_lock(&dq_list_lock);
                                      list_for_each_entry(dquot,
                                          &inuse_list, dq_inuse) {
                                        /* finds dquot X */
                                        dquot_active(X) -> true
                                        atomic_inc(&X->dq_count);
                                      }
                                      spin_unlock(&dq_list_lock);
  spin_lock(&dq_list_lock);
  dquot = list_first_entry(&rls_head);
  WARN_ON_ONCE(atomic_read(&dquot->dq_count));

The problem is not only a cosmetic one as under memory pressure the
caller of dquot_scan_active() can end up working on freed dquot.

Fix the problem by making sure the dquot is removed from releasing list
when we acquire a reference to it.

Fixes: 869b6ea1609f ("quota: Fix slow quotaoff")
Reported-by: Sam Sun <samsun1006219@gmail.com>
Link: https://lore.kernel.org/all/CAEkJfYPTt3uP1vAYnQ5V2ZWn5O9PLhhGi5HbOcAzyP9vbXyjeg@mail.gmail.com
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c         | 38 ++++++++++++++++++++++++++++++--------
 include/linux/quotaops.h |  9 +--------
 2 files changed, 31 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 376739f6420e..64cf42721496 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -363,6 +363,31 @@ static inline int dquot_active(struct dquot *dquot)
 	return test_bit(DQ_ACTIVE_B, &dquot->dq_flags);
 }
 
+static struct dquot *__dqgrab(struct dquot *dquot)
+{
+	lockdep_assert_held(&dq_list_lock);
+	if (!atomic_read(&dquot->dq_count))
+		remove_free_dquot(dquot);
+	atomic_inc(&dquot->dq_count);
+	return dquot;
+}
+
+/*
+ * Get reference to dquot when we got pointer to it by some other means. The
+ * dquot has to be active and the caller has to make sure it cannot get
+ * deactivated under our hands.
+ */
+struct dquot *dqgrab(struct dquot *dquot)
+{
+	spin_lock(&dq_list_lock);
+	WARN_ON_ONCE(!dquot_active(dquot));
+	dquot = __dqgrab(dquot);
+	spin_unlock(&dq_list_lock);
+
+	return dquot;
+}
+EXPORT_SYMBOL_GPL(dqgrab);
+
 static inline int dquot_dirty(struct dquot *dquot)
 {
 	return test_bit(DQ_MOD_B, &dquot->dq_flags);
@@ -641,15 +666,14 @@ int dquot_scan_active(struct super_block *sb,
 			continue;
 		if (dquot->dq_sb != sb)
 			continue;
-		/* Now we have active dquot so we can just increase use count */
-		atomic_inc(&dquot->dq_count);
+		__dqgrab(dquot);
 		spin_unlock(&dq_list_lock);
 		dqput(old_dquot);
 		old_dquot = dquot;
 		/*
 		 * ->release_dquot() can be racing with us. Our reference
-		 * protects us from new calls to it so just wait for any
-		 * outstanding call and recheck the DQ_ACTIVE_B after that.
+		 * protects us from dquot_release() proceeding so just wait for
+		 * any outstanding call and recheck the DQ_ACTIVE_B after that.
 		 */
 		wait_on_dquot(dquot);
 		if (dquot_active(dquot)) {
@@ -717,7 +741,7 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
 			/* Now we have active dquot from which someone is
  			 * holding reference so we can safely just increase
 			 * use count */
-			dqgrab(dquot);
+			__dqgrab(dquot);
 			spin_unlock(&dq_list_lock);
 			err = dquot_write_dquot(dquot);
 			if (err && !ret)
@@ -963,9 +987,7 @@ we_slept:
 		spin_unlock(&dq_list_lock);
 		dqstats_inc(DQST_LOOKUPS);
 	} else {
-		if (!atomic_read(&dquot->dq_count))
-			remove_free_dquot(dquot);
-		atomic_inc(&dquot->dq_count);
+		__dqgrab(dquot);
 		spin_unlock(&dq_list_lock);
 		dqstats_inc(DQST_CACHE_HITS);
 		dqstats_inc(DQST_LOOKUPS);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index c334f82ed385..f9c0f9d7c9d9 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -44,14 +44,7 @@ int dquot_initialize(struct inode *inode);
 bool dquot_initialize_needed(struct inode *inode);
 void dquot_drop(struct inode *inode);
 struct dquot *dqget(struct super_block *sb, struct kqid qid);
-static inline struct dquot *dqgrab(struct dquot *dquot)
-{
-	/* Make sure someone else has active reference to dquot */
-	WARN_ON_ONCE(!atomic_read(&dquot->dq_count));
-	WARN_ON_ONCE(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags));
-	atomic_inc(&dquot->dq_count);
-	return dquot;
-}
+struct dquot *dqgrab(struct dquot *dquot);
 
 static inline bool dquot_is_busy(struct dquot *dquot)
 {
-- 
cgit v1.2.3


From 239cd6a417b989708da4b39a71f925897ec87287 Mon Sep 17 00:00:00 2001
From: Manikandan Muralidharan <manikandan.m@microchip.com>
Date: Mon, 23 Feb 2026 15:49:17 +0530
Subject: mfd: atmel-hlcdc: Fetch LVDS PLL clock for LVDS display

The XLCDC IP supports parallel RGB, MIPI DSI and LVDS Display.
The LCD Generic clock (sys_clk) is used for Parallel RGB and MIPI
displays, while the LVDS PLL clock (lvds_pll_clk) is used for LVDS
displays.Since both the clocks cannot co-exist together in the DT
for a given display, this patch tries sys_clk first (RGB/MIPI),
fallback to lvds_pll_clk (LVDS).

Signed-off-by: Manikandan Muralidharan <manikandan.m@microchip.com>
Signed-off-by: Dharma Balasubiramani <dharma.b@microchip.com>
Link: https://patch.msgid.link/20260223101920.284697-2-manikandan.m@microchip.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/mfd/atmel-hlcdc.c       | 13 +++++++++++--
 include/linux/mfd/atmel-hlcdc.h |  1 +
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/atmel-hlcdc.c b/drivers/mfd/atmel-hlcdc.c
index c3f3d39bf584..0b541c0d3b1b 100644
--- a/drivers/mfd/atmel-hlcdc.c
+++ b/drivers/mfd/atmel-hlcdc.c
@@ -108,10 +108,19 @@ static int atmel_hlcdc_probe(struct platform_device *pdev)
 		return PTR_ERR(hlcdc->periph_clk);
 	}
 
+	/*
+	 * Retrieve one of the primary clocks required for LCD operation:
+	 * prefer sys_clk (for RGB/MIPI), and fall back to lvds_pll_clk
+	 * (for LVDS) if needed.
+	 */
 	hlcdc->sys_clk = devm_clk_get(dev, "sys_clk");
 	if (IS_ERR(hlcdc->sys_clk)) {
-		dev_err(dev, "failed to get system clock\n");
-		return PTR_ERR(hlcdc->sys_clk);
+		hlcdc->sys_clk = NULL;
+		hlcdc->lvds_pll_clk = devm_clk_get(dev, "lvds_pll_clk");
+		if (IS_ERR(hlcdc->lvds_pll_clk)) {
+			dev_err(dev, "Failed to obtain both the LCDC (generic) and LVDS PLL clocks\n");
+			return PTR_ERR(hlcdc->lvds_pll_clk);
+		}
 	}
 
 	hlcdc->slow_clk = devm_clk_get(dev, "slow_clk");
diff --git a/include/linux/mfd/atmel-hlcdc.h b/include/linux/mfd/atmel-hlcdc.h
index 80d675a03b39..07c2081867fd 100644
--- a/include/linux/mfd/atmel-hlcdc.h
+++ b/include/linux/mfd/atmel-hlcdc.h
@@ -75,6 +75,7 @@
  */
 struct atmel_hlcdc {
 	struct regmap *regmap;
+	struct clk *lvds_pll_clk;
 	struct clk *periph_clk;
 	struct clk *sys_clk;
 	struct clk *slow_clk;
-- 
cgit v1.2.3


From 0735e3007c1be6cb40372c403a69200d0929c8d7 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Date: Wed, 18 Feb 2026 11:48:01 +0100
Subject: mfd: lpc_ich: Expose the GPIO controller cell's software node

One of the users of this driver - meraki-mx100 - abuses the software
node API by setting up a dummy software node without any logical link to
this GPIO controller and uses the fact that the GPIO core matches the
controller's label against the swnode's name to make the lookup work.

We want to remove this behavior from GPIOLIB in favor of actual matching
of firmware nodes but that would break this user. To facilitate that:
create a software node for the GPIO controller cell and expose its
address in the provided MFD header.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20260218-meraki-swnodes-v2-1-92c521da241c@oss.qualcomm.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/mfd/lpc_ich.c       | 7 +++++++
 include/linux/mfd/lpc_ich.h | 2 ++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
index 4b7d0cb9340f..5a3d79f339dd 100644
--- a/drivers/mfd/lpc_ich.c
+++ b/drivers/mfd/lpc_ich.c
@@ -45,6 +45,7 @@
 #include <linux/acpi.h>
 #include <linux/pci.h>
 #include <linux/pinctrl/pinctrl.h>
+#include <linux/property.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/lpc_ich.h>
 #include <linux/platform_data/itco_wdt.h>
@@ -125,11 +126,17 @@ static struct mfd_cell lpc_ich_wdt_cell = {
 	.ignore_resource_conflicts = true,
 };
 
+const struct software_node lpc_ich_gpio_swnode = {
+	.name = "gpio_ich",
+};
+EXPORT_SYMBOL_NS(lpc_ich_gpio_swnode, "LPC_ICH");
+
 static struct mfd_cell lpc_ich_gpio_cell = {
 	.name = "gpio_ich",
 	.num_resources = ARRAY_SIZE(gpio_ich_res),
 	.resources = gpio_ich_res,
 	.ignore_resource_conflicts = true,
+	.swnode = &lpc_ich_gpio_swnode,
 };
 
 #define INTEL_GPIO_RESOURCE_SIZE	0x1000
diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h
index 1fbda1f8967d..1819aa743c5c 100644
--- a/include/linux/mfd/lpc_ich.h
+++ b/include/linux/mfd/lpc_ich.h
@@ -37,4 +37,6 @@ struct lpc_ich_info {
 	u8 use_gpio;
 };
 
+extern const struct software_node lpc_ich_gpio_swnode;
+
 #endif
-- 
cgit v1.2.3


From a09506820afa391e0a8ecc4b05c954f21e50b1de Mon Sep 17 00:00:00 2001
From: Akari Tsuyukusa <akkun11.open@gmail.com>
Date: Mon, 2 Mar 2026 23:00:45 +0900
Subject: mfd: mt6397: Properly fix CID of MT6328, MT6331 and MT6332

CIDs set for MT6328, MT6331 and MT6332 are not appropriate.
Many Android downstream kernels define CID as below,

MT6328:

    #define PMIC6328_E1_CID_CODE    0x2810
    #define PMIC6328_E2_CID_CODE    0x2820
    #define PMIC6328_E3_CID_CODE    0x2830

MT6331/MT6332:

    #define PMIC6331_E1_CID_CODE    0x3110
    #define PMIC6331_E2_CID_CODE    0x3120
    #define PMIC6331_E3_CID_CODE    0x3130

    #define PMIC6332_E1_CID_CODE    0x3210
    #define PMIC6332_E2_CID_CODE    0x3220
    #define PMIC6332_E3_CID_CODE    0x3230

The current configuration incorrectly uses the revision code as the CID.
Therefore, the driver cannot detect the same PMIC of different revisions.
(E1/E2 for MT6328, E1/E3 for MT6331/MT6332)
Based on these, the CID of MT6328, MT6331 and MT6332 should be corrected.

Additionally, the incorrect MT6331/MT6332 CID overlaps with the MT6320's
actual CID:

    #define PMIC6320_E1_CID_CODE    0x1020
    #define PMIC6320_E2_CID_CODE    0x2020

This causes a conflict in the switch-case statement of mt6397-irq.c,
this prevents adding support for MT6320.

Signed-off-by: Akari Tsuyukusa <akkun11.open@gmail.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://patch.msgid.link/20260302140045.651727-1-akkun11.open@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/mfd/mt6397-core.c       | 4 ++--
 include/linux/mfd/mt6397/core.h | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mt6397-core.c b/drivers/mfd/mt6397-core.c
index 3e58d0764c7e..1bdacda9a933 100644
--- a/drivers/mfd/mt6397-core.c
+++ b/drivers/mfd/mt6397-core.c
@@ -297,7 +297,7 @@ static const struct chip_data mt6323_core = {
 
 static const struct chip_data mt6328_core = {
 	.cid_addr = MT6328_HWCID,
-	.cid_shift = 0,
+	.cid_shift = 8,
 	.cells = mt6328_devs,
 	.cell_size = ARRAY_SIZE(mt6328_devs),
 	.irq_init = mt6397_irq_init,
@@ -313,7 +313,7 @@ static const struct chip_data mt6357_core = {
 
 static const struct chip_data mt6331_mt6332_core = {
 	.cid_addr = MT6331_HWCID,
-	.cid_shift = 0,
+	.cid_shift = 8,
 	.cells = mt6331_mt6332_devs,
 	.cell_size = ARRAY_SIZE(mt6331_mt6332_devs),
 	.irq_init = mt6397_irq_init,
diff --git a/include/linux/mfd/mt6397/core.h b/include/linux/mfd/mt6397/core.h
index b774c3a4bb62..340fc72e22aa 100644
--- a/include/linux/mfd/mt6397/core.h
+++ b/include/linux/mfd/mt6397/core.h
@@ -12,9 +12,9 @@
 
 enum chip_id {
 	MT6323_CHIP_ID = 0x23,
-	MT6328_CHIP_ID = 0x30,
-	MT6331_CHIP_ID = 0x20,
-	MT6332_CHIP_ID = 0x20,
+	MT6328_CHIP_ID = 0x28,
+	MT6331_CHIP_ID = 0x31,
+	MT6332_CHIP_ID = 0x32,
 	MT6357_CHIP_ID = 0x57,
 	MT6358_CHIP_ID = 0x58,
 	MT6359_CHIP_ID = 0x59,
-- 
cgit v1.2.3


From ee63402eb41a4ffcac72490b3e93de606de8d394 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 9 Mar 2026 14:42:20 -0700
Subject: mfd: congatec: Fix kernel-doc struct member names

Correct the struct member names to avoid kernel-doc warnings:

Warning: include/linux/mfd/cgbc.h:38 struct member 'version' not
 described in 'cgbc_device_data'
Warning: ../include/linux/mfd/cgbc.h:38 struct member 'lock' not
 described in 'cgbc_device_data'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260309214223.749088-2-rdunlap@infradead.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/cgbc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cgbc.h b/include/linux/mfd/cgbc.h
index badbec4c7033..91f501e76c8f 100644
--- a/include/linux/mfd/cgbc.h
+++ b/include/linux/mfd/cgbc.h
@@ -26,8 +26,8 @@ struct cgbc_version {
  * @io_cmd:		Pointer to the command IO memory
  * @session:		Session id returned by the Board Controller
  * @dev:		Pointer to kernel device structure
- * @cgbc_version:	Board Controller version structure
- * @mutex:		Board Controller mutex
+ * @version:		Board Controller version structure
+ * @lock:		Board Controller mutex
  */
 struct cgbc_device_data {
 	void __iomem		*io_session;
-- 
cgit v1.2.3


From 69d7fa1b918d0aa0157aef5f71f757916194f099 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 9 Mar 2026 14:42:21 -0700
Subject: mfd: kempld: Fix kernel-doc struct member names

Correct the struct member names to avoid kernel-doc warnings:

Warning: include/linux/mfd/kempld.h:114 struct member 'gpio_base' not
 described in 'kempld_platform_data'
Warning: include/linux/mfd/kempld.h:114 struct member 'get_hardware_mutex'
 not described in 'kempld_platform_data'
Warning: include/linux/mfd/kempld.h:114 struct member
 'release_hardware_mutex' not described in 'kempld_platform_data'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260309214223.749088-3-rdunlap@infradead.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/kempld.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/kempld.h b/include/linux/mfd/kempld.h
index 643c096b93ac..30fb40325a09 100644
--- a/include/linux/mfd/kempld.h
+++ b/include/linux/mfd/kempld.h
@@ -97,10 +97,10 @@ struct kempld_device_data {
 /**
  * struct kempld_platform_data - PLD hardware configuration structure
  * @pld_clock:			PLD clock frequency
- * @gpio_base			GPIO base pin number
+ * @gpio_base:			GPIO base pin number
  * @ioresource:			IO addresses of the PLD
- * @get_mutex:			PLD specific get_mutex callback
- * @release_mutex:		PLD specific release_mutex callback
+ * @get_hardware_mutex:		PLD specific get_mutex callback
+ * @release_hardware_mutex:	PLD specific release_mutex callback
  * @get_info:			PLD specific get_info callback
  * @register_cells:		PLD specific register_cells callback
  */
-- 
cgit v1.2.3


From 5671125a129e97bdd634cf74137cf109d4420a0b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 9 Mar 2026 14:42:22 -0700
Subject: mfd: rsmu: Remove a empty kernel-doc line

kernel-doc format expects a prototype on the line that immediately
follows the "/**" line, so drop this empty line.

Warning: include/linux/mfd/rsmu.h:21 Cannot find identifier on line: *

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260309214223.749088-4-rdunlap@infradead.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/rsmu.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/rsmu.h b/include/linux/mfd/rsmu.h
index 0379aa207428..2f27386a7122 100644
--- a/include/linux/mfd/rsmu.h
+++ b/include/linux/mfd/rsmu.h
@@ -19,7 +19,6 @@ enum rsmu_type {
 };
 
 /**
- *
  * struct rsmu_ddata - device data structure for sub devices.
  *
  * @dev:    i2c/spi device.
-- 
cgit v1.2.3


From 92601fb9d8f61db2ea254965722e379f53d111b5 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 9 Mar 2026 14:42:23 -0700
Subject: mfd: si476x: Fix kernel-doc warnings

Add kernel-doc entries for missing fields or correct some typos
in names to eliminate kernel-doc warnings:

Warning: include/linux/mfd/si476x-core.h:156 struct member 'regmap' not
 described in 'si476x_core'
Warning: include/linux/mfd/si476x-core.h:156 struct member 'power_state'
 not described in 'si476x_core'
Warning: include/linux/mfd/si476x-core.h:156 struct member 'supplies' not
 described in 'si476x_core'
Warning: include/linux/mfd/si476x-core.h:156 struct member 'is_alive' not
 described in 'si476x_core'
Warning: include/linux/mfd/si476x-core.h:156 struct member 'rds_fifo_depth'
 not described in 'si476x_core'
Warning: include/linux/mfd/si476x-core.h:170 function parameter 'core' not
 described in 'si476x_core_lock'
Warning: include/linux/mfd/si476x-core.h:179 function parameter 'core' not
 described in 'si476x_core_unlock'
Warning: include/linux/mfd/si476x-core.h:259 struct member 'firmware' not
 described in 'si476x_func_info'
Warning: include/linux/mfd/si476x-core.h:335 struct member 'rds' not
 described in 'si476x_rds_status_report'

I don't know what the 'ble' field is so I didn't add a kernel-doc comment
for it:
  Warning: include/linux/mfd/si476x-core.h:335 struct member 'ble' not
    described in 'si476x_rds_status_report'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260309214223.749088-5-rdunlap@infradead.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/si476x-core.h | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/si476x-core.h b/include/linux/mfd/si476x-core.h
index dd95c37ca134..e913b2cdf77d 100644
--- a/include/linux/mfd/si476x-core.h
+++ b/include/linux/mfd/si476x-core.h
@@ -77,6 +77,7 @@ enum si476x_power_state {
  * underlying "core" device which all the MFD cell-devices use.
  *
  * @client: Actual I2C client used to transfer commands to the chip.
+ * @regmap: Regmap for accessing the device registers
  * @chip_id: Last digit of the chip model(E.g. "1" for SI4761)
  * @cells: MFD cell devices created by this driver.
  * @cmd_lock: Mutex used to serialize all the requests to the core
@@ -100,16 +101,18 @@ enum si476x_power_state {
  * @stc: Similar to @cts, but for the STC bit of the status value.
  * @power_up_parameters: Parameters used as argument for POWER_UP
  * command when the device is started.
- * @state: Current power state of the device.
- * @supplues: Structure containing handles to all power supplies used
+ * @power_state: Current power state of the device.
+ * @supplies: Structure containing handles to all power supplies used
  * by the device (NULL ones are ignored).
  * @gpio_reset: GPIO pin connectet to the RSTB pin of the chip.
  * @pinmux: Chip's configurable pins configuration.
  * @diversity_mode: Chips role when functioning in diversity mode.
+ * @is_alive: Chip is initialized and active.
  * @status_monitor: Polling worker used in polling use case scenarion
  * (when IRQ is not avalible).
  * @revision: Chip's running firmware revision number(Used for correct
  * command set support).
+ * @rds_fifo_depth: RDS FIFO size: 20 for IRQ mode or 5 for polling mode.
  */
 
 struct si476x_core {
@@ -166,6 +169,7 @@ static inline struct si476x_core *i2c_mfd_cell_to_core(struct device *dev)
 /**
  * si476x_core_lock() - lock the core device to get an exclusive access
  * to it.
+ * @core: Core device structure
  */
 static inline void si476x_core_lock(struct si476x_core *core)
 {
@@ -175,6 +179,7 @@ static inline void si476x_core_lock(struct si476x_core *core)
 /**
  * si476x_core_unlock() - unlock the core device to relinquish an
  * exclusive access to it.
+ * @core: Core device structure
  */
 static inline void si476x_core_unlock(struct si476x_core *core)
 {
@@ -246,9 +251,10 @@ static inline int si476x_to_v4l2(struct si476x_core *core, u16 freq)
  * struct si476x_func_info - structure containing result of the
  * FUNC_INFO command.
  *
+ * @firmware: Firmware version numbers.
  * @firmware.major: Firmware major number.
  * @firmware.minor[...]: Firmware minor numbers.
- * @patch_id:
+ * @patch_id: Firmware patch level.
  * @func: Mode tuner is working in.
  */
 struct si476x_func_info {
@@ -318,8 +324,9 @@ enum si476x_smoothmetrics {
  * @tp: Current channel's TP flag.
  * @pty: Current channel's PTY code.
  * @pi: Current channel's PI code.
- * @rdsfifoused: Number of blocks remaining in the RDS FIFO (0 if
- * empty).
+ * @rdsfifoused: Number of blocks remaining in the RDS FIFO (0 if empty).
+ * @ble:
+ * @rds: RDS data descriptor
  */
 struct si476x_rds_status_report {
 	bool rdstpptyint, rdspiint, rdssyncint, rdsfifoint;
-- 
cgit v1.2.3


From fe0e422cbcf4b7ee35cacc463631092b310a6f6a Mon Sep 17 00:00:00 2001
From: Phil Elwell <phil@raspberrypi.com>
Date: Sat, 7 Mar 2026 00:41:21 +0100
Subject: mfd: bcm2835-pm: Introduce SoC-specific type identifier

Power management blocks across the BCM2835 family share a common
base but require variant-specific handling. For instance, the
BCM2712 lacks ASB register space, yet it manages the power domain
for the V3D graphics block.

Add a hardware type identifier to the driver's private data. This
allows the driver to distinguish between SoC models and implement
custom quirks or features as needed.

Signed-off-by: Phil Elwell <phil@raspberrypi.com>
Co-developed-by: Stanimir Varbanov <svarbanov@suse.de>
Signed-off-by: Stanimir Varbanov <svarbanov@suse.de>
Signed-off-by: Andrea della Porta <andrea.porta@suse.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/c4bb218654e91f312a01b419d3d408e5131f7673.1772839224.git.andrea.porta@suse.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/mfd/bcm2835-pm.c       | 7 ++++---
 include/linux/mfd/bcm2835-pm.h | 7 +++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/bcm2835-pm.c b/drivers/mfd/bcm2835-pm.c
index 8bed59816e82..2d5dc521b623 100644
--- a/drivers/mfd/bcm2835-pm.c
+++ b/drivers/mfd/bcm2835-pm.c
@@ -81,6 +81,7 @@ static int bcm2835_pm_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, pm);
 
 	pm->dev = dev;
+	pm->soc = (uintptr_t)device_get_match_data(dev);
 
 	ret = bcm2835_pm_get_pdata(pdev, pm);
 	if (ret)
@@ -106,9 +107,9 @@ static int bcm2835_pm_probe(struct platform_device *pdev)
 
 static const struct of_device_id bcm2835_pm_of_match[] = {
 	{ .compatible = "brcm,bcm2835-pm-wdt", },
-	{ .compatible = "brcm,bcm2835-pm", },
-	{ .compatible = "brcm,bcm2711-pm", },
-	{ .compatible = "brcm,bcm2712-pm", },
+	{ .compatible = "brcm,bcm2835-pm", .data = (void *)BCM2835_PM_SOC_BCM2835 },
+	{ .compatible = "brcm,bcm2711-pm", .data = (void *)BCM2835_PM_SOC_BCM2711 },
+	{ .compatible = "brcm,bcm2712-pm", .data = (void *)BCM2835_PM_SOC_BCM2712 },
 	{},
 };
 MODULE_DEVICE_TABLE(of, bcm2835_pm_of_match);
diff --git a/include/linux/mfd/bcm2835-pm.h b/include/linux/mfd/bcm2835-pm.h
index f70a810c55f7..d2e17ab1dbfc 100644
--- a/include/linux/mfd/bcm2835-pm.h
+++ b/include/linux/mfd/bcm2835-pm.h
@@ -5,11 +5,18 @@
 
 #include <linux/regmap.h>
 
+enum bcm2835_soc {
+	BCM2835_PM_SOC_BCM2835,
+	BCM2835_PM_SOC_BCM2711,
+	BCM2835_PM_SOC_BCM2712,
+};
+
 struct bcm2835_pm {
 	struct device *dev;
 	void __iomem *base;
 	void __iomem *asb;
 	void __iomem *rpivid_asb;
+	enum bcm2835_soc soc;
 };
 
 #endif /* BCM2835_MFD_PM_H */
-- 
cgit v1.2.3


From 630bbba45cfd3e4f9247cefd3e2cdc03fe40421b Mon Sep 17 00:00:00 2001
From: Christoph Böhmwalder <christoph.boehmwalder@linbit.com>
Date: Tue, 24 Mar 2026 16:29:07 +0100
Subject: drbd: use genl pre_doit/post_doit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Every doit handler followed the same pattern: stack-allocate an
adm_ctx, call drbd_adm_prepare() at the top, call drbd_adm_finish()
at the bottom. This duplicated boilerplate across 25 handlers and
made error paths inconsistent, since some handlers could miss sending
the reply skb on early-exit paths.

The generic netlink framework already provides pre_doit/post_doit
hooks for exactly this purpose. An old comment even noted "this
would be a good candidate for a pre_doit hook".

Use them:

- pre_doit heap-allocates adm_ctx, looks up per-command flags from a
  new drbd_genl_cmd_flags[] table, runs drbd_adm_prepare(), and
  stores the context in info->user_ptr[0].
- post_doit sends the reply, drops kref references for
  device/connection/resource, and frees the adm_ctx.
- Handlers just receive adm_ctx from info->user_ptr[0], set
  reply_dh->ret_code, and return. All teardown is in post_doit.
- drbd_adm_finish() is removed, superseded by post_doit.

Signed-off-by: Christoph Böhmwalder <christoph.boehmwalder@linbit.com>
Link: https://patch.msgid.link/20260324152907.2840984-1-christoph.boehmwalder@linbit.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_nl.c    | 573 ++++++++++++++++++++++------------------
 include/linux/genl_magic_func.h |   4 +
 2 files changed, 324 insertions(+), 253 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index e201f0087a0f..e18fa260a662 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -75,6 +75,15 @@ int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
 
 #include <linux/drbd_genl_api.h>
 #include "drbd_nla.h"
+
+static int drbd_pre_doit(const struct genl_split_ops *ops,
+			 struct sk_buff *skb, struct genl_info *info);
+static void drbd_post_doit(const struct genl_split_ops *ops,
+			   struct sk_buff *skb, struct genl_info *info);
+
+#define GENL_MAGIC_FAMILY_PRE_DOIT	drbd_pre_doit
+#define GENL_MAGIC_FAMILY_POST_DOIT	drbd_post_doit
+
 #include <linux/genl_magic_func.h>
 
 static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
@@ -144,18 +153,46 @@ static int drbd_msg_sprintf_info(struct sk_buff *skb, const char *fmt, ...)
 	return 0;
 }
 
-/* This would be a good candidate for a "pre_doit" hook,
- * and per-family private info->pointers.
- * But we need to stay compatible with older kernels.
- * If it returns successfully, adm_ctx members are valid.
- *
+/* Flags for drbd_adm_prepare() */
+#define DRBD_ADM_NEED_MINOR	 (1 << 0)
+#define DRBD_ADM_NEED_RESOURCE	 (1 << 1)
+#define DRBD_ADM_NEED_CONNECTION (1 << 2)
+
+/* Per-command flags for drbd_pre_doit() */
+static const unsigned int drbd_genl_cmd_flags[] = {
+	[DRBD_ADM_GET_STATUS]     = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_NEW_MINOR]      = DRBD_ADM_NEED_RESOURCE,
+	[DRBD_ADM_DEL_MINOR]      = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_NEW_RESOURCE]   = 0,
+	[DRBD_ADM_DEL_RESOURCE]   = DRBD_ADM_NEED_RESOURCE,
+	[DRBD_ADM_RESOURCE_OPTS]  = DRBD_ADM_NEED_RESOURCE,
+	[DRBD_ADM_CONNECT]        = DRBD_ADM_NEED_RESOURCE,
+	[DRBD_ADM_CHG_NET_OPTS]   = DRBD_ADM_NEED_CONNECTION,
+	[DRBD_ADM_DISCONNECT]     = DRBD_ADM_NEED_CONNECTION,
+	[DRBD_ADM_ATTACH]         = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_CHG_DISK_OPTS]  = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_RESIZE]         = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_PRIMARY]        = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_SECONDARY]      = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_NEW_C_UUID]     = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_START_OV]       = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_DETACH]         = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_INVALIDATE]     = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_INVAL_PEER]     = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_PAUSE_SYNC]     = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_RESUME_SYNC]    = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_SUSPEND_IO]     = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_RESUME_IO]      = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_OUTDATE]        = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_GET_TIMEOUT_TYPE] = DRBD_ADM_NEED_MINOR,
+	[DRBD_ADM_DOWN]           = DRBD_ADM_NEED_RESOURCE,
+};
+
+/*
  * At this point, we still rely on the global genl_lock().
  * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
  * to add additional synchronization against object destruction/modification.
  */
-#define DRBD_ADM_NEED_MINOR	1
-#define DRBD_ADM_NEED_RESOURCE	2
-#define DRBD_ADM_NEED_CONNECTION 4
 static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
 	struct sk_buff *skb, struct genl_info *info, unsigned flags)
 {
@@ -163,8 +200,6 @@ static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
 	const u8 cmd = info->genlhdr->cmd;
 	int err;
 
-	memset(adm_ctx, 0, sizeof(*adm_ctx));
-
 	/* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
 	if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
 	       return -EPERM;
@@ -300,9 +335,45 @@ fail:
 	return err;
 }
 
-static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
-	struct genl_info *info, int retcode)
+static int drbd_pre_doit(const struct genl_split_ops *ops,
+			 struct sk_buff *skb, struct genl_info *info)
 {
+	struct drbd_config_context *adm_ctx;
+	u8 cmd = info->genlhdr->cmd;
+	unsigned int flags;
+	int err;
+
+	adm_ctx = kzalloc_obj(*adm_ctx);
+	if (!adm_ctx)
+		return -ENOMEM;
+
+	flags = (cmd < ARRAY_SIZE(drbd_genl_cmd_flags))
+		? drbd_genl_cmd_flags[cmd] : 0;
+
+	err = drbd_adm_prepare(adm_ctx, skb, info, flags);
+	if (err && !adm_ctx->reply_skb) {
+		/* Fatal error before reply_skb was allocated. */
+		kfree(adm_ctx);
+		return err;
+	}
+	if (err)
+		adm_ctx->reply_dh->ret_code = err;
+
+	info->user_ptr[0] = adm_ctx;
+	return 0;
+}
+
+static void drbd_post_doit(const struct genl_split_ops *ops,
+			   struct sk_buff *skb, struct genl_info *info)
+{
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
+
+	if (!adm_ctx)
+		return;
+
+	if (adm_ctx->reply_skb)
+		drbd_adm_send_reply(adm_ctx->reply_skb, info);
+
 	if (adm_ctx->device) {
 		kref_put(&adm_ctx->device->kref, drbd_destroy_device);
 		adm_ctx->device = NULL;
@@ -316,12 +387,7 @@ static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
 		adm_ctx->resource = NULL;
 	}
 
-	if (!adm_ctx->reply_skb)
-		return -ENOMEM;
-
-	adm_ctx->reply_dh->ret_code = retcode;
-	drbd_adm_send_reply(adm_ctx->reply_skb, info);
-	return 0;
+	kfree(adm_ctx);
 }
 
 static void setup_khelper_env(struct drbd_connection *connection, char **envp)
@@ -766,15 +832,15 @@ static const char *from_attrs_err_to_txt(int err)
 
 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	struct set_role_parms parms;
 	int err;
 	enum drbd_ret_code retcode;
 	enum drbd_state_rv rv;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
 
@@ -783,24 +849,24 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 		err = set_role_parms_from_attrs(&parms, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
-			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
+			drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err));
 			goto out;
 		}
 	}
 	genl_unlock();
-	mutex_lock(&adm_ctx.resource->adm_mutex);
+	mutex_lock(&adm_ctx->resource->adm_mutex);
 
 	if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
-		rv = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
+		rv = drbd_set_role(adm_ctx->device, R_PRIMARY, parms.assume_uptodate);
 	else
-		rv = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
+		rv = drbd_set_role(adm_ctx->device, R_SECONDARY, 0);
 
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 	genl_lock();
-	drbd_adm_finish(&adm_ctx, info, rv);
+	adm_ctx->reply_dh->ret_code = rv;
 	return 0;
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -1512,7 +1578,7 @@ out:
 
 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	enum drbd_ret_code retcode;
 	struct drbd_device *device;
 	struct disk_conf *new_disk_conf, *old_disk_conf;
@@ -1520,14 +1586,14 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 	int err;
 	unsigned int fifo_size;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto finish;
 
-	device = adm_ctx.device;
-	mutex_lock(&adm_ctx.resource->adm_mutex);
+	device = adm_ctx->device;
+	mutex_lock(&adm_ctx->resource->adm_mutex);
 
 	/* we also need a disk
 	 * to change the options on */
@@ -1551,7 +1617,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 	err = disk_conf_from_attrs_for_change(new_disk_conf, info);
 	if (err && err != -ENOMSG) {
 		retcode = ERR_MANDATORY_TAG;
-		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
+		drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err));
 		goto fail_unlock;
 	}
 
@@ -1577,7 +1643,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 	if (err) {
 		/* Could be just "busy". Ignore?
 		 * Introduce dedicated error code? */
-		drbd_msg_put_info(adm_ctx.reply_skb,
+		drbd_msg_put_info(adm_ctx->reply_skb,
 			"Try again without changing current al-extents setting");
 		retcode = ERR_NOMEM;
 		goto fail_unlock;
@@ -1640,9 +1706,9 @@ fail_unlock:
 success:
 	put_ldev(device);
  out:
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
  finish:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -1734,7 +1800,7 @@ void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *
 
 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	struct drbd_device *device;
 	struct drbd_peer_device *peer_device;
 	struct drbd_connection *connection;
@@ -1751,14 +1817,14 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	enum drbd_state_rv rv;
 	struct net_conf *nc;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto finish;
 
-	device = adm_ctx.device;
-	mutex_lock(&adm_ctx.resource->adm_mutex);
+	device = adm_ctx->device;
+	mutex_lock(&adm_ctx->resource->adm_mutex);
 	peer_device = first_peer_device(device);
 	connection = peer_device->connection;
 	conn_reconfig_start(connection);
@@ -1803,7 +1869,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	err = disk_conf_from_attrs(new_disk_conf, info);
 	if (err) {
 		retcode = ERR_MANDATORY_TAG;
-		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
+		drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
@@ -1954,7 +2020,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 			drbd_warn(device, "truncating a consistent device during attach (%llu < %llu)\n", nsz, eff);
 		} else {
 			drbd_warn(device, "refusing to truncate a consistent device (%llu < %llu)\n", nsz, eff);
-			drbd_msg_sprintf_info(adm_ctx.reply_skb,
+			drbd_msg_sprintf_info(adm_ctx->reply_skb,
 				"To-be-attached device has last effective > current size, and is consistent\n"
 				"(%llu > %llu sectors). Refusing to attach.", eff, nsz);
 			retcode = ERR_IMPLICIT_SHRINK;
@@ -2130,8 +2196,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
 	put_ldev(device);
 	conn_reconfig_done(connection);
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 
  force_diskless_dec:
@@ -2150,9 +2216,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	kfree(new_disk_conf);
 	lc_destroy(resync_lru);
 	kfree(new_plan);
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
  finish:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -2174,14 +2240,14 @@ static int adm_detach(struct drbd_device *device, int force)
  * Only then we have finally detached. */
 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	enum drbd_ret_code retcode;
 	struct detach_parms parms = { };
 	int err;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
 
@@ -2189,16 +2255,16 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 		err = detach_parms_from_attrs(&parms, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
-			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
+			drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err));
 			goto out;
 		}
 	}
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
-	retcode = adm_detach(adm_ctx.device, parms.force_detach);
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_lock(&adm_ctx->resource->adm_mutex);
+	retcode = adm_detach(adm_ctx->device, parms.force_detach);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -2372,7 +2438,7 @@ static void free_crypto(struct crypto *crypto)
 
 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	enum drbd_ret_code retcode;
 	struct drbd_connection *connection;
 	struct net_conf *old_net_conf, *new_net_conf = NULL;
@@ -2381,14 +2447,14 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 	int rsr; /* re-sync running */
 	struct crypto crypto = { };
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto finish;
 
-	connection = adm_ctx.connection;
-	mutex_lock(&adm_ctx.resource->adm_mutex);
+	connection = adm_ctx->connection;
+	mutex_lock(&adm_ctx->resource->adm_mutex);
 
 	new_net_conf = kzalloc_obj(struct net_conf);
 	if (!new_net_conf) {
@@ -2403,7 +2469,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 	old_net_conf = connection->net_conf;
 
 	if (!old_net_conf) {
-		drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
+		drbd_msg_put_info(adm_ctx->reply_skb, "net conf missing, try connect");
 		retcode = ERR_INVALID_REQUEST;
 		goto fail;
 	}
@@ -2415,7 +2481,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 	err = net_conf_from_attrs_for_change(new_net_conf, info);
 	if (err && err != -ENOMSG) {
 		retcode = ERR_MANDATORY_TAG;
-		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
+		drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
@@ -2485,9 +2551,9 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
  done:
 	conn_reconfig_done(connection);
  out:
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
  finish:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -2516,7 +2582,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	struct connection_info connection_info;
 	enum drbd_notification_type flags;
 	unsigned int peer_devices = 0;
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	struct drbd_peer_device *peer_device;
 	struct net_conf *old_net_conf, *new_net_conf = NULL;
 	struct crypto crypto = { };
@@ -2527,14 +2593,13 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	int i;
 	int err;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
-
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
-	if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
-		drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
+	if (!(adm_ctx->my_addr && adm_ctx->peer_addr)) {
+		drbd_msg_put_info(adm_ctx->reply_skb, "connection endpoint(s) missing");
 		retcode = ERR_INVALID_REQUEST;
 		goto out;
 	}
@@ -2544,15 +2609,15 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	 * concurrent reconfiguration/addition/deletion */
 	for_each_resource(resource, &drbd_resources) {
 		for_each_connection(connection, resource) {
-			if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
-			    !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
+			if (nla_len(adm_ctx->my_addr) == connection->my_addr_len &&
+			    !memcmp(nla_data(adm_ctx->my_addr), &connection->my_addr,
 				    connection->my_addr_len)) {
 				retcode = ERR_LOCAL_ADDR;
 				goto out;
 			}
 
-			if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
-			    !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
+			if (nla_len(adm_ctx->peer_addr) == connection->peer_addr_len &&
+			    !memcmp(nla_data(adm_ctx->peer_addr), &connection->peer_addr,
 				    connection->peer_addr_len)) {
 				retcode = ERR_PEER_ADDR;
 				goto out;
@@ -2560,8 +2625,8 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
-	connection = first_connection(adm_ctx.resource);
+	mutex_lock(&adm_ctx->resource->adm_mutex);
+	connection = first_connection(adm_ctx->resource);
 	conn_reconfig_start(connection);
 
 	if (connection->cstate > C_STANDALONE) {
@@ -2581,7 +2646,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	err = net_conf_from_attrs(new_net_conf, info);
 	if (err && err != -ENOMSG) {
 		retcode = ERR_MANDATORY_TAG;
-		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
+		drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
@@ -2597,11 +2662,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 
 	drbd_flush_workqueue(&connection->sender_work);
 
-	mutex_lock(&adm_ctx.resource->conf_update);
+	mutex_lock(&adm_ctx->resource->conf_update);
 	old_net_conf = connection->net_conf;
 	if (old_net_conf) {
 		retcode = ERR_NET_CONFIGURED;
-		mutex_unlock(&adm_ctx.resource->conf_update);
+		mutex_unlock(&adm_ctx->resource->conf_update);
 		goto fail;
 	}
 	rcu_assign_pointer(connection->net_conf, new_net_conf);
@@ -2612,10 +2677,10 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	connection->csums_tfm = crypto.csums_tfm;
 	connection->verify_tfm = crypto.verify_tfm;
 
-	connection->my_addr_len = nla_len(adm_ctx.my_addr);
-	memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
-	connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
-	memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
+	connection->my_addr_len = nla_len(adm_ctx->my_addr);
+	memcpy(&connection->my_addr, nla_data(adm_ctx->my_addr), connection->my_addr_len);
+	connection->peer_addr_len = nla_len(adm_ctx->peer_addr);
+	memcpy(&connection->peer_addr, nla_data(adm_ctx->peer_addr), connection->peer_addr_len);
 
 	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
 		peer_devices++;
@@ -2633,7 +2698,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 		notify_peer_device_state(NULL, 0, peer_device, &peer_device_info, NOTIFY_CREATE | flags);
 	}
 	mutex_unlock(&notification_mutex);
-	mutex_unlock(&adm_ctx.resource->conf_update);
+	mutex_unlock(&adm_ctx->resource->conf_update);
 
 	rcu_read_lock();
 	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
@@ -2646,8 +2711,8 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	rv = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
 
 	conn_reconfig_done(connection);
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
-	drbd_adm_finish(&adm_ctx, info, rv);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
+	adm_ctx->reply_dh->ret_code = rv;
 	return 0;
 
 fail:
@@ -2655,9 +2720,9 @@ fail:
 	kfree(new_net_conf);
 
 	conn_reconfig_done(connection);
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -2729,40 +2794,40 @@ repeat:
 
 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	struct disconnect_parms parms;
 	struct drbd_connection *connection;
 	enum drbd_state_rv rv;
 	enum drbd_ret_code retcode;
 	int err;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto fail;
 
-	connection = adm_ctx.connection;
+	connection = adm_ctx->connection;
 	memset(&parms, 0, sizeof(parms));
 	if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
 		err = disconnect_parms_from_attrs(&parms, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
-			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
+			drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err));
 			goto fail;
 		}
 	}
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
+	mutex_lock(&adm_ctx->resource->adm_mutex);
 	rv = conn_try_disconnect(connection, parms.force_disconnect);
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 	if (rv < SS_SUCCESS) {
-		drbd_adm_finish(&adm_ctx, info, rv);
+		adm_ctx->reply_dh->ret_code = rv;
 		return 0;
 	}
 	retcode = NO_ERROR;
  fail:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -2784,7 +2849,7 @@ void resync_after_online_grow(struct drbd_device *device)
 
 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
 	struct resize_parms rs;
 	struct drbd_device *device;
@@ -2795,14 +2860,14 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	sector_t u_size;
 	int err;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto finish;
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
-	device = adm_ctx.device;
+	mutex_lock(&adm_ctx->resource->adm_mutex);
+	device = adm_ctx->device;
 	if (!get_ldev(device)) {
 		retcode = ERR_NO_DISK;
 		goto fail;
@@ -2815,7 +2880,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 		err = resize_parms_from_attrs(&rs, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
-			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
+			drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err));
 			goto fail_ldev;
 		}
 	}
@@ -2907,9 +2972,9 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	}
 
  fail:
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
  finish:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 
  fail_ldev:
@@ -2920,61 +2985,61 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 
 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	enum drbd_ret_code retcode;
 	struct res_opts res_opts;
 	int err;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto fail;
 
-	res_opts = adm_ctx.resource->res_opts;
+	res_opts = adm_ctx->resource->res_opts;
 	if (should_set_defaults(info))
 		set_res_opts_defaults(&res_opts);
 
 	err = res_opts_from_attrs(&res_opts, info);
 	if (err && err != -ENOMSG) {
 		retcode = ERR_MANDATORY_TAG;
-		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
+		drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
-	err = set_resource_options(adm_ctx.resource, &res_opts);
+	mutex_lock(&adm_ctx->resource->adm_mutex);
+	err = set_resource_options(adm_ctx->resource, &res_opts);
 	if (err) {
 		retcode = ERR_INVALID_REQUEST;
 		if (err == -ENOMEM)
 			retcode = ERR_NOMEM;
 	}
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 
 fail:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	struct drbd_device *device;
 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
 
-	device = adm_ctx.device;
+	device = adm_ctx->device;
 	if (!get_ldev(device)) {
 		retcode = ERR_NO_DISK;
 		goto out;
 	}
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
+	mutex_lock(&adm_ctx->resource->adm_mutex);
 
 	/* If there is still bitmap IO pending, probably because of a previous
 	 * resync just being finished, wait for it before requesting a new resync.
@@ -2997,30 +3062,30 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 	} else
 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
 	drbd_resume_io(device);
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 	put_ldev(device);
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
 		union drbd_state mask, union drbd_state val)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	enum drbd_ret_code retcode;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
-	retcode = drbd_request_state(adm_ctx.device, mask, val);
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_lock(&adm_ctx->resource->adm_mutex);
+	retcode = drbd_request_state(adm_ctx->device, mask, val);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -3036,23 +3101,23 @@ static int drbd_bmio_set_susp_al(struct drbd_device *device,
 
 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	int retcode; /* drbd_ret_code, drbd_state_rv */
 	struct drbd_device *device;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
 
-	device = adm_ctx.device;
+	device = adm_ctx->device;
 	if (!get_ldev(device)) {
 		retcode = ERR_NO_DISK;
 		goto out;
 	}
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
+	mutex_lock(&adm_ctx->resource->adm_mutex);
 
 	/* If there is still bitmap IO pending, probably because of a previous
 	 * resync just being finished, wait for it before requesting a new resync.
@@ -3078,48 +3143,48 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 	} else
 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
 	drbd_resume_io(device);
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 	put_ldev(device);
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	enum drbd_ret_code retcode;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
-	if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
+	mutex_lock(&adm_ctx->resource->adm_mutex);
+	if (drbd_request_state(adm_ctx->device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
 		retcode = ERR_PAUSE_IS_SET;
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	union drbd_dev_state s;
 	enum drbd_ret_code retcode;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
-	if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
-		s = adm_ctx.device->state;
+	mutex_lock(&adm_ctx->resource->adm_mutex);
+	if (drbd_request_state(adm_ctx->device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
+		s = adm_ctx->device->state;
 		if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
 			retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
 				  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
@@ -3127,9 +3192,9 @@ int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
 			retcode = ERR_PAUSE_IS_CLEAR;
 		}
 	}
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -3140,18 +3205,18 @@ int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
 
 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	struct drbd_device *device;
 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
-	device = adm_ctx.device;
+	mutex_lock(&adm_ctx->resource->adm_mutex);
+	device = adm_ctx->device;
 	if (test_bit(NEW_CUR_UUID, &device->flags)) {
 		if (get_ldev_if_state(device, D_ATTACHING)) {
 			drbd_uuid_new_current(device);
@@ -3188,9 +3253,9 @@ int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
 			tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
 	}
 	drbd_resume_io(device);
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -3843,23 +3908,24 @@ nla_put_failure:
 
 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	enum drbd_ret_code retcode;
 	int err;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
 
-	err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
+	err = nla_put_status_info(adm_ctx->reply_skb, adm_ctx->device, NULL);
 	if (err) {
-		nlmsg_free(adm_ctx.reply_skb);
+		nlmsg_free(adm_ctx->reply_skb);
+		adm_ctx->reply_skb = NULL;
 		return err;
 	}
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -4046,46 +4112,47 @@ dump:
 
 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	enum drbd_ret_code retcode;
 	struct timeout_parms tp;
 	int err;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
 
 	tp.timeout_type =
-		adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
-		test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
+		adm_ctx->device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
+		test_bit(USE_DEGR_WFC_T, &adm_ctx->device->flags) ? UT_DEGRADED :
 		UT_DEFAULT;
 
-	err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
+	err = timeout_parms_to_priv_skb(adm_ctx->reply_skb, &tp);
 	if (err) {
-		nlmsg_free(adm_ctx.reply_skb);
+		nlmsg_free(adm_ctx->reply_skb);
+		adm_ctx->reply_skb = NULL;
 		return err;
 	}
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	struct drbd_device *device;
 	enum drbd_ret_code retcode;
 	struct start_ov_parms parms;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
 
-	device = adm_ctx.device;
+	device = adm_ctx->device;
 
 	/* resume from last known position, if possible */
 	parms.ov_start_sector = device->ov_start_sector;
@@ -4094,11 +4161,11 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 		int err = start_ov_parms_from_attrs(&parms, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
-			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
+			drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err));
 			goto out;
 		}
 	}
-	mutex_lock(&adm_ctx.resource->adm_mutex);
+	mutex_lock(&adm_ctx->resource->adm_mutex);
 
 	/* w_make_ov_request expects position to be aligned */
 	device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
@@ -4111,40 +4178,40 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 	retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
 	drbd_resume_io(device);
 
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
 
 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	struct drbd_device *device;
 	enum drbd_ret_code retcode;
 	int skip_initial_sync = 0;
 	int err;
 	struct new_c_uuid_parms args;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out_nolock;
 
-	device = adm_ctx.device;
+	device = adm_ctx->device;
 	memset(&args, 0, sizeof(args));
 	if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
 		err = new_c_uuid_parms_from_attrs(&args, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
-			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
+			drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err));
 			goto out_nolock;
 		}
 	}
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
+	mutex_lock(&adm_ctx->resource->adm_mutex);
 	mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
 
 	if (!get_ldev(device)) {
@@ -4189,9 +4256,9 @@ out_dec:
 	put_ldev(device);
 out:
 	mutex_unlock(device->state_mutex);
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 out_nolock:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -4224,14 +4291,14 @@ static void resource_to_info(struct resource_info *info,
 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 {
 	struct drbd_connection *connection;
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	enum drbd_ret_code retcode;
 	struct res_opts res_opts;
 	int err;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
 
@@ -4239,18 +4306,18 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 	err = res_opts_from_attrs(&res_opts, info);
 	if (err && err != -ENOMSG) {
 		retcode = ERR_MANDATORY_TAG;
-		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
+		drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err));
 		goto out;
 	}
 
-	retcode = drbd_check_resource_name(&adm_ctx);
+	retcode = drbd_check_resource_name(adm_ctx);
 	if (retcode != NO_ERROR)
 		goto out;
 
-	if (adm_ctx.resource) {
+	if (adm_ctx->resource) {
 		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
 			retcode = ERR_INVALID_REQUEST;
-			drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
+			drbd_msg_put_info(adm_ctx->reply_skb, "resource exists");
 		}
 		/* else: still NO_ERROR */
 		goto out;
@@ -4258,7 +4325,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 
 	/* not yet safe for genl_family.parallel_ops */
 	mutex_lock(&resources_mutex);
-	connection = conn_create(adm_ctx.resource_name, &res_opts);
+	connection = conn_create(adm_ctx->resource_name, &res_opts);
 	mutex_unlock(&resources_mutex);
 
 	if (connection) {
@@ -4273,7 +4340,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 		retcode = ERR_NOMEM;
 
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -4286,38 +4353,38 @@ static void device_to_info(struct device_info *info,
 
 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	struct drbd_genlmsghdr *dh = genl_info_userhdr(info);
 	enum drbd_ret_code retcode;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
 
 	if (dh->minor > MINORMASK) {
-		drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
+		drbd_msg_put_info(adm_ctx->reply_skb, "requested minor out of range");
 		retcode = ERR_INVALID_REQUEST;
 		goto out;
 	}
-	if (adm_ctx.volume > DRBD_VOLUME_MAX) {
-		drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
+	if (adm_ctx->volume > DRBD_VOLUME_MAX) {
+		drbd_msg_put_info(adm_ctx->reply_skb, "requested volume id out of range");
 		retcode = ERR_INVALID_REQUEST;
 		goto out;
 	}
 
 	/* drbd_adm_prepare made sure already
 	 * that first_peer_device(device)->connection and device->vnr match the request. */
-	if (adm_ctx.device) {
+	if (adm_ctx->device) {
 		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
 			retcode = ERR_MINOR_OR_VOLUME_EXISTS;
 		/* else: still NO_ERROR */
 		goto out;
 	}
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
-	retcode = drbd_create_device(&adm_ctx, dh->minor);
+	mutex_lock(&adm_ctx->resource->adm_mutex);
+	retcode = drbd_create_device(adm_ctx, dh->minor);
 	if (retcode == NO_ERROR) {
 		struct drbd_device *device;
 		struct drbd_peer_device *peer_device;
@@ -4348,9 +4415,9 @@ int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
 		}
 		mutex_unlock(&notification_mutex);
 	}
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -4393,20 +4460,20 @@ static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
 
 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	enum drbd_ret_code retcode;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
-	retcode = adm_del_minor(adm_ctx.device);
-	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	mutex_lock(&adm_ctx->resource->adm_mutex);
+	retcode = adm_del_minor(adm_ctx->device);
+	mutex_unlock(&adm_ctx->resource->adm_mutex);
 out:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
@@ -4442,20 +4509,20 @@ static int adm_del_resource(struct drbd_resource *resource)
 
 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	struct drbd_resource *resource;
 	struct drbd_connection *connection;
 	struct drbd_device *device;
 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
 	unsigned i;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto finish;
 
-	resource = adm_ctx.resource;
+	resource = adm_ctx->resource;
 	mutex_lock(&resource->adm_mutex);
 	/* demote */
 	for_each_connection(connection, resource) {
@@ -4464,14 +4531,14 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 		idr_for_each_entry(&connection->peer_devices, peer_device, i) {
 			retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
 			if (retcode < SS_SUCCESS) {
-				drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
+				drbd_msg_put_info(adm_ctx->reply_skb, "failed to demote");
 				goto out;
 			}
 		}
 
 		retcode = conn_try_disconnect(connection, 0);
 		if (retcode < SS_SUCCESS) {
-			drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
+			drbd_msg_put_info(adm_ctx->reply_skb, "failed to disconnect");
 			goto out;
 		}
 	}
@@ -4480,7 +4547,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 	idr_for_each_entry(&resource->devices, device, i) {
 		retcode = adm_detach(device, 0);
 		if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
-			drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
+			drbd_msg_put_info(adm_ctx->reply_skb, "failed to detach");
 			goto out;
 		}
 	}
@@ -4490,7 +4557,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 		retcode = adm_del_minor(device);
 		if (retcode != NO_ERROR) {
 			/* "can not happen" */
-			drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
+			drbd_msg_put_info(adm_ctx->reply_skb, "failed to delete volume");
 			goto out;
 		}
 	}
@@ -4499,28 +4566,28 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 out:
 	mutex_unlock(&resource->adm_mutex);
 finish:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_config_context adm_ctx;
+	struct drbd_config_context *adm_ctx = info->user_ptr[0];
 	struct drbd_resource *resource;
 	enum drbd_ret_code retcode;
 
-	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
-	if (!adm_ctx.reply_skb)
-		return retcode;
+	if (!adm_ctx->reply_skb)
+		return 0;
+	retcode = adm_ctx->reply_dh->ret_code;
 	if (retcode != NO_ERROR)
 		goto finish;
-	resource = adm_ctx.resource;
+	resource = adm_ctx->resource;
 
 	mutex_lock(&resource->adm_mutex);
 	retcode = adm_del_resource(resource);
 	mutex_unlock(&resource->adm_mutex);
 finish:
-	drbd_adm_finish(&adm_ctx, info, retcode);
+	adm_ctx->reply_dh->ret_code = retcode;
 	return 0;
 }
 
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index d4da060b7532..6edcac85155e 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -292,6 +292,10 @@ static struct genl_family ZZZ_genl_family __ro_after_init = {
 #endif
 	.maxattr = ARRAY_SIZE(CONCATENATE(GENL_MAGIC_FAMILY, _tla_nl_policy))-1,
 	.policy	= CONCATENATE(GENL_MAGIC_FAMILY, _tla_nl_policy),
+#ifdef GENL_MAGIC_FAMILY_PRE_DOIT
+	.pre_doit = GENL_MAGIC_FAMILY_PRE_DOIT,
+	.post_doit = GENL_MAGIC_FAMILY_POST_DOIT,
+#endif
 	.ops = ZZZ_genl_ops,
 	.n_ops = ARRAY_SIZE(ZZZ_genl_ops),
 	.mcgrps = ZZZ_genl_mcgrps,
-- 
cgit v1.2.3


From 698753e4a51a5a6670d527a3db084c3616253abc Mon Sep 17 00:00:00 2001
From: Clément Le Goffic <clement.legoffic@foss.st.com>
Date: Tue, 10 Feb 2026 20:05:45 +0100
Subject: bus: firewall: move stm32_firewall header file in include folder
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Other driver than RIFSC and ETZPC can implement firewall ops, such as
RCC.
In order for them to have access to the ops and type of this framework,
we need to get the `stm32_firewall.h` file in the include/ folder.

Signed-off-by: Clément Le Goffic <clement.legoffic@foss.st.com>
Signed-off-by: Clément Le Goffic <legoffic.clement@gmail.com>
Acked-by: Gatien Chevallier <gatien.chevallier@foss.st.com>
Link: https://lore.kernel.org/r/20260210-b4-firewall-upstream-v8-1-097c1e47af82@gmail.com
Signed-off-by: Alexandre Torgue <alexandre.torgue@foss.st.com>
---
 drivers/bus/stm32_etzpc.c          |  3 +-
 drivers/bus/stm32_firewall.c       |  3 +-
 drivers/bus/stm32_firewall.h       | 83 --------------------------------------
 drivers/bus/stm32_rifsc.c          |  3 +-
 include/linux/bus/stm32_firewall.h | 83 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 86 insertions(+), 89 deletions(-)
 delete mode 100644 drivers/bus/stm32_firewall.h
 create mode 100644 include/linux/bus/stm32_firewall.h

(limited to 'include/linux')

diff --git a/drivers/bus/stm32_etzpc.c b/drivers/bus/stm32_etzpc.c
index 7fc0f16960be..4918a14e507e 100644
--- a/drivers/bus/stm32_etzpc.c
+++ b/drivers/bus/stm32_etzpc.c
@@ -5,6 +5,7 @@
 
 #include <linux/bitfield.h>
 #include <linux/bits.h>
+#include <linux/bus/stm32_firewall.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/init.h>
@@ -16,8 +17,6 @@
 #include <linux/platform_device.h>
 #include <linux/types.h>
 
-#include "stm32_firewall.h"
-
 /*
  * ETZPC registers
  */
diff --git a/drivers/bus/stm32_firewall.c b/drivers/bus/stm32_firewall.c
index 7e7afe8007db..bbcf42ad668a 100644
--- a/drivers/bus/stm32_firewall.c
+++ b/drivers/bus/stm32_firewall.c
@@ -5,6 +5,7 @@
 
 #include <linux/bitfield.h>
 #include <linux/bits.h>
+#include <linux/bus/stm32_firewall.h>
 #include <linux/bus/stm32_firewall_device.h>
 #include <linux/device.h>
 #include <linux/err.h>
@@ -18,8 +19,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 
-#include "stm32_firewall.h"
-
 /* Corresponds to STM32_FIREWALL_MAX_EXTRA_ARGS + firewall ID */
 #define STM32_FIREWALL_MAX_ARGS		(STM32_FIREWALL_MAX_EXTRA_ARGS + 1)
 
diff --git a/drivers/bus/stm32_firewall.h b/drivers/bus/stm32_firewall.h
deleted file mode 100644
index e5fac85fe346..000000000000
--- a/drivers/bus/stm32_firewall.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2023, STMicroelectronics - All Rights Reserved
- */
-
-#ifndef _STM32_FIREWALL_H
-#define _STM32_FIREWALL_H
-
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/types.h>
-
-/**
- * STM32_PERIPHERAL_FIREWALL:		This type of firewall protects peripherals
- * STM32_MEMORY_FIREWALL:		This type of firewall protects memories/subsets of memory
- *					zones
- * STM32_NOTYPE_FIREWALL:		Undefined firewall type
- */
-
-#define STM32_PERIPHERAL_FIREWALL	BIT(1)
-#define STM32_MEMORY_FIREWALL		BIT(2)
-#define STM32_NOTYPE_FIREWALL		BIT(3)
-
-/**
- * struct stm32_firewall_controller - Information on firewall controller supplying services
- *
- * @name:			Name of the firewall controller
- * @dev:			Device reference of the firewall controller
- * @mmio:			Base address of the firewall controller
- * @entry:			List entry of the firewall controller list
- * @type:			Type of firewall
- * @max_entries:		Number of entries covered by the firewall
- * @grant_access:		Callback used to grant access for a device access against a
- *				firewall controller
- * @release_access:		Callback used to release resources taken by a device when access was
- *				granted
- * @grant_memory_range_access:	Callback used to grant access for a device to a given memory region
- */
-struct stm32_firewall_controller {
-	const char *name;
-	struct device *dev;
-	void __iomem *mmio;
-	struct list_head entry;
-	unsigned int type;
-	unsigned int max_entries;
-
-	int (*grant_access)(struct stm32_firewall_controller *ctrl, u32 id);
-	void (*release_access)(struct stm32_firewall_controller *ctrl, u32 id);
-	int (*grant_memory_range_access)(struct stm32_firewall_controller *ctrl, phys_addr_t paddr,
-					 size_t size);
-};
-
-/**
- * stm32_firewall_controller_register - Register a firewall controller to the STM32 firewall
- *					framework
- * @firewall_controller:	Firewall controller to register
- *
- * Returns 0 in case of success or -ENODEV if no controller was given.
- */
-int stm32_firewall_controller_register(struct stm32_firewall_controller *firewall_controller);
-
-/**
- * stm32_firewall_controller_unregister - Unregister a firewall controller from the STM32
- *					  firewall framework
- * @firewall_controller:	Firewall controller to unregister
- */
-void stm32_firewall_controller_unregister(struct stm32_firewall_controller *firewall_controller);
-
-/**
- * stm32_firewall_populate_bus - Populate device tree nodes that have a correct firewall
- *				 configuration. This is used at boot-time only, as a sanity check
- *				 between device tree and firewalls hardware configurations to
- *				 prevent a kernel crash when a device driver is not granted access
- *
- * @firewall_controller:	Firewall controller which nodes will be populated or not
- *
- * Returns 0 in case of success or appropriate errno code if error occurred.
- */
-int stm32_firewall_populate_bus(struct stm32_firewall_controller *firewall_controller);
-
-#endif /* _STM32_FIREWALL_H */
diff --git a/drivers/bus/stm32_rifsc.c b/drivers/bus/stm32_rifsc.c
index debeaf8ea1bd..65990ae8dd08 100644
--- a/drivers/bus/stm32_rifsc.c
+++ b/drivers/bus/stm32_rifsc.c
@@ -5,6 +5,7 @@
 
 #include <linux/bitfield.h>
 #include <linux/bits.h>
+#include <linux/bus/stm32_firewall.h>
 #include <linux/debugfs.h>
 #include <linux/device.h>
 #include <linux/err.h>
@@ -17,8 +18,6 @@
 #include <linux/platform_device.h>
 #include <linux/types.h>
 
-#include "stm32_firewall.h"
-
 /*
  * RIFSC offset register
  */
diff --git a/include/linux/bus/stm32_firewall.h b/include/linux/bus/stm32_firewall.h
new file mode 100644
index 000000000000..e5fac85fe346
--- /dev/null
+++ b/include/linux/bus/stm32_firewall.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023, STMicroelectronics - All Rights Reserved
+ */
+
+#ifndef _STM32_FIREWALL_H
+#define _STM32_FIREWALL_H
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+/**
+ * STM32_PERIPHERAL_FIREWALL:		This type of firewall protects peripherals
+ * STM32_MEMORY_FIREWALL:		This type of firewall protects memories/subsets of memory
+ *					zones
+ * STM32_NOTYPE_FIREWALL:		Undefined firewall type
+ */
+
+#define STM32_PERIPHERAL_FIREWALL	BIT(1)
+#define STM32_MEMORY_FIREWALL		BIT(2)
+#define STM32_NOTYPE_FIREWALL		BIT(3)
+
+/**
+ * struct stm32_firewall_controller - Information on firewall controller supplying services
+ *
+ * @name:			Name of the firewall controller
+ * @dev:			Device reference of the firewall controller
+ * @mmio:			Base address of the firewall controller
+ * @entry:			List entry of the firewall controller list
+ * @type:			Type of firewall
+ * @max_entries:		Number of entries covered by the firewall
+ * @grant_access:		Callback used to grant access for a device access against a
+ *				firewall controller
+ * @release_access:		Callback used to release resources taken by a device when access was
+ *				granted
+ * @grant_memory_range_access:	Callback used to grant access for a device to a given memory region
+ */
+struct stm32_firewall_controller {
+	const char *name;
+	struct device *dev;
+	void __iomem *mmio;
+	struct list_head entry;
+	unsigned int type;
+	unsigned int max_entries;
+
+	int (*grant_access)(struct stm32_firewall_controller *ctrl, u32 id);
+	void (*release_access)(struct stm32_firewall_controller *ctrl, u32 id);
+	int (*grant_memory_range_access)(struct stm32_firewall_controller *ctrl, phys_addr_t paddr,
+					 size_t size);
+};
+
+/**
+ * stm32_firewall_controller_register - Register a firewall controller to the STM32 firewall
+ *					framework
+ * @firewall_controller:	Firewall controller to register
+ *
+ * Returns 0 in case of success or -ENODEV if no controller was given.
+ */
+int stm32_firewall_controller_register(struct stm32_firewall_controller *firewall_controller);
+
+/**
+ * stm32_firewall_controller_unregister - Unregister a firewall controller from the STM32
+ *					  firewall framework
+ * @firewall_controller:	Firewall controller to unregister
+ */
+void stm32_firewall_controller_unregister(struct stm32_firewall_controller *firewall_controller);
+
+/**
+ * stm32_firewall_populate_bus - Populate device tree nodes that have a correct firewall
+ *				 configuration. This is used at boot-time only, as a sanity check
+ *				 between device tree and firewalls hardware configurations to
+ *				 prevent a kernel crash when a device driver is not granted access
+ *
+ * @firewall_controller:	Firewall controller which nodes will be populated or not
+ *
+ * Returns 0 in case of success or appropriate errno code if error occurred.
+ */
+int stm32_firewall_populate_bus(struct stm32_firewall_controller *firewall_controller);
+
+#endif /* _STM32_FIREWALL_H */
-- 
cgit v1.2.3


From 4ea96cfc728bbc7c1c3051cce9209bf9a6eeb23e Mon Sep 17 00:00:00 2001
From: Gatien Chevallier <gatien.chevallier@foss.st.com>
Date: Thu, 26 Feb 2026 11:30:20 +0100
Subject: bus: stm32_firewall: add stm32_firewall_get_grant_all_access() API

Add the stm32_firewall_get_grant_all_access() API to be able to fetch
all firewall references in an access-controllers property and try to grant
access to all of them.

Signed-off-by: Gatien Chevallier <gatien.chevallier@foss.st.com>
Reviewed-by: Linus Walleij <linusw@kernel.org>
Link: https://lore.kernel.org/r/20260226-debug_bus-v6-5-5d794697798d@foss.st.com
Signed-off-by: Alexandre Torgue <alexandre.torgue@foss.st.com>
---
 drivers/bus/stm32_firewall.c              | 42 +++++++++++++++++++++++++++++++
 include/linux/bus/stm32_firewall_device.h | 26 +++++++++++++++++++
 2 files changed, 68 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bus/stm32_firewall.c b/drivers/bus/stm32_firewall.c
index e69bc8c1a4e7..e3619dba8b06 100644
--- a/drivers/bus/stm32_firewall.c
+++ b/drivers/bus/stm32_firewall.c
@@ -184,6 +184,48 @@ void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 su
 }
 EXPORT_SYMBOL_GPL(stm32_firewall_release_access_by_id);
 
+int stm32_firewall_get_grant_all_access(struct device *dev, struct stm32_firewall **firewall,
+					int *nb_firewall)
+{
+	struct stm32_firewall *loc_firewall;
+	int err;
+	int i;
+
+	*nb_firewall = of_count_phandle_with_args(dev->of_node, "access-controllers",
+						  "#access-controller-cells");
+	if (*nb_firewall < 0)
+		return *nb_firewall;
+
+	if (!*nb_firewall) {
+		*firewall = NULL;
+		return 0;
+	}
+
+	loc_firewall = devm_kcalloc(dev, *nb_firewall, sizeof(*loc_firewall), GFP_KERNEL);
+	if (!loc_firewall)
+		return -ENOMEM;
+
+	/* Get stm32 firewall information */
+	err = stm32_firewall_get_firewall(dev->of_node, loc_firewall, *nb_firewall);
+	if (err)
+		return err;
+
+	for (i = 0; i < *nb_firewall; i++) {
+		err = stm32_firewall_grant_access(&loc_firewall[i]);
+		if (err) {
+			while (i--)
+				stm32_firewall_release_access(&loc_firewall[i]);
+
+			return err;
+		}
+	}
+
+	*firewall = loc_firewall;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(stm32_firewall_get_grant_all_access);
+
 /* Firewall controller API */
 
 int stm32_firewall_controller_register(struct stm32_firewall_controller *firewall_controller)
diff --git a/include/linux/bus/stm32_firewall_device.h b/include/linux/bus/stm32_firewall_device.h
index eaa7a3f54450..6c878f3ca86f 100644
--- a/include/linux/bus/stm32_firewall_device.h
+++ b/include/linux/bus/stm32_firewall_device.h
@@ -112,6 +112,25 @@ int stm32_firewall_grant_access_by_id(struct stm32_firewall *firewall, u32 subsy
  */
 void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id);
 
+/**
+ * stm32_firewall_get_grant_all_access - Allocate and get all the firewall(s) associated to given
+ *					 device. Then, try to grant access rights for each element.
+ *					 This function is basically a helper function that wraps
+ *					 both stm32_firewall_get_firewall() and
+ *					 stm32_firewall_grant_access() on all firewall references of
+ *					 a device along with the allocation of the array.
+ *					 Realease access using stm32_firewall_release_access* APIs
+ *					 when done.
+ *
+ * @dev:			Device performing the checks
+ * @firewall:			Pointer to the array of firewall references to be allocated
+ * @nb_firewall:		Number of allocated elements in @firewall
+ *
+ * Returns 0 on success, or appropriate errno code if error occurred.
+ */
+int stm32_firewall_get_grant_all_access(struct device *dev, struct stm32_firewall **firewall,
+					int *nb_firewall);
+
 #else /* CONFIG_STM32_FIREWALL */
 
 static inline int stm32_firewall_get_firewall(struct device_node *np,
@@ -141,5 +160,12 @@ static inline void stm32_firewall_release_access_by_id(struct stm32_firewall *fi
 {
 }
 
+static inline int stm32_firewall_get_grant_all_access(struct device *dev,
+						      struct stm32_firewall **firewall,
+						      int *nb_firewall)
+{
+	return -ENODEV;
+}
+
 #endif /* CONFIG_STM32_FIREWALL */
 #endif /* STM32_FIREWALL_DEVICE_H */
-- 
cgit v1.2.3


From cc5623947f3d86687c39771fcbea641907966d5c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 9 Mar 2026 23:17:26 -0700
Subject: smp: Add missing kernel-doc comments

Add missing kernel-doc comments and rearrange the order of others to
prevent all kernel-doc warnings.

 - add function Returns: sections or format existing comments as kernel-doc
 - add missing function parameter comments
 - use "/**" for smp_call_function_any() and on_each_cpu_cond_mask()
 - correct the commented function name for on_each_cpu_cond_mask()
 - use correct format for function short descriptions
 - add all kernel-doc comments for smp_call_on_cpu()
 - remove kernel-doc comments for raw_smp_processor_id() since there is
   no prototype for it here (other than !SMP)
 - in smp.h, rearrange some lines so that the kernel-doc comments for
   smp_processor_id() are immediately before the macro (to prevent
   kernel-doc warnings)
 - remove "Returns" from smp_call_function() since it doesn't
   return a value

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260310061726.1153764-1-rdunlap@infradead.org
---
 include/linux/smp.h | 38 +++++++++++++++++++++-----------------
 kernel/smp.c        | 38 +++++++++++++++++++++++++-------------
 2 files changed, 46 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 1ebd88026119..6925d15ccaa7 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -73,7 +73,7 @@ static inline void on_each_cpu(smp_call_func_t func, void *info, int wait)
 }
 
 /**
- * on_each_cpu_mask(): Run a function on processors specified by
+ * on_each_cpu_mask() - Run a function on processors specified by
  * cpumask, which may include the local processor.
  * @mask: The set of cpus to run on (only runs on online subset).
  * @func: The function to run. This must be fast and non-blocking.
@@ -239,13 +239,30 @@ static inline int get_boot_cpu_id(void)
 
 #endif /* !SMP */
 
-/**
+/*
  * raw_smp_processor_id() - get the current (unstable) CPU id
  *
- * For then you know what you are doing and need an unstable
+ * raw_smp_processor_id() is arch-specific/arch-defined and
+ * may be a macro or a static inline function.
+ *
+ * For when you know what you are doing and need an unstable
  * CPU id.
  */
 
+/*
+ * Allow the architecture to differentiate between a stable and unstable read.
+ * For example, x86 uses an IRQ-safe asm-volatile read for the unstable but a
+ * regular asm read for the stable.
+ */
+#ifndef __smp_processor_id
+#define __smp_processor_id() raw_smp_processor_id()
+#endif
+
+#ifdef CONFIG_DEBUG_PREEMPT
+  extern unsigned int debug_smp_processor_id(void);
+# define smp_processor_id() debug_smp_processor_id()
+
+#else
 /**
  * smp_processor_id() - get the current (stable) CPU id
  *
@@ -258,23 +275,10 @@ static inline int get_boot_cpu_id(void)
  *  - preemption is disabled;
  *  - the task is CPU affine.
  *
- * When CONFIG_DEBUG_PREEMPT; we verify these assumption and WARN
+ * When CONFIG_DEBUG_PREEMPT=y, we verify these assumptions and WARN
  * when smp_processor_id() is used when the CPU id is not stable.
  */
 
-/*
- * Allow the architecture to differentiate between a stable and unstable read.
- * For example, x86 uses an IRQ-safe asm-volatile read for the unstable but a
- * regular asm read for the stable.
- */
-#ifndef __smp_processor_id
-#define __smp_processor_id() raw_smp_processor_id()
-#endif
-
-#ifdef CONFIG_DEBUG_PREEMPT
-  extern unsigned int debug_smp_processor_id(void);
-# define smp_processor_id() debug_smp_processor_id()
-#else
 # define smp_processor_id() __smp_processor_id()
 #endif
 
diff --git a/kernel/smp.c b/kernel/smp.c
index f349960f79ca..b179424f6040 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -215,7 +215,7 @@ static atomic_t n_csd_lock_stuck;
 /**
  * csd_lock_is_stuck - Has a CSD-lock acquisition been stuck too long?
  *
- * Returns @true if a CSD-lock acquisition is stuck and has been stuck
+ * Returns: @true if a CSD-lock acquisition is stuck and has been stuck
  * long enough for a "non-responsive CSD lock" message to be printed.
  */
 bool csd_lock_is_stuck(void)
@@ -625,13 +625,14 @@ void flush_smp_call_function_queue(void)
 	local_irq_restore(flags);
 }
 
-/*
+/**
  * smp_call_function_single - Run a function on a specific CPU
+ * @cpu: Specific target CPU for this function.
  * @func: The function to run. This must be fast and non-blocking.
  * @info: An arbitrary pointer to pass to the function.
  * @wait: If true, wait until function has completed on other CPUs.
  *
- * Returns 0 on success, else a negative status code.
+ * Returns: %0 on success, else a negative status code.
  */
 int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 			     int wait)
@@ -738,18 +739,18 @@ out:
 }
 EXPORT_SYMBOL_GPL(smp_call_function_single_async);
 
-/*
+/**
  * smp_call_function_any - Run a function on any of the given cpus
  * @mask: The mask of cpus it can run on.
  * @func: The function to run. This must be fast and non-blocking.
  * @info: An arbitrary pointer to pass to the function.
  * @wait: If true, wait until function has completed.
  *
- * Returns 0 on success, else a negative status code (if no cpus were online).
- *
  * Selection preference:
  *	1) current cpu if in @mask
  *	2) nearest cpu in @mask, based on NUMA topology
+ *
+ * Returns: %0 on success, else a negative status code (if no cpus were online).
  */
 int smp_call_function_any(const struct cpumask *mask,
 			  smp_call_func_t func, void *info, int wait)
@@ -880,7 +881,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
 }
 
 /**
- * smp_call_function_many(): Run a function on a set of CPUs.
+ * smp_call_function_many() - Run a function on a set of CPUs.
  * @mask: The set of cpus to run on (only runs on online subset).
  * @func: The function to run. This must be fast and non-blocking.
  * @info: An arbitrary pointer to pass to the function.
@@ -902,14 +903,12 @@ void smp_call_function_many(const struct cpumask *mask,
 EXPORT_SYMBOL(smp_call_function_many);
 
 /**
- * smp_call_function(): Run a function on all other CPUs.
+ * smp_call_function() - Run a function on all other CPUs.
  * @func: The function to run. This must be fast and non-blocking.
  * @info: An arbitrary pointer to pass to the function.
  * @wait: If true, wait (atomically) until function has completed
  *        on other CPUs.
  *
- * Returns 0.
- *
  * If @wait is true, then returns once @func has returned; otherwise
  * it returns just before the target cpu calls @func.
  *
@@ -1009,8 +1008,8 @@ void __init smp_init(void)
 	smp_cpus_done(setup_max_cpus);
 }
 
-/*
- * on_each_cpu_cond(): Call a function on each processor for which
+/**
+ * on_each_cpu_cond_mask() - Call a function on each processor for which
  * the supplied function cond_func returns true, optionally waiting
  * for all the required CPUs to finish. This may include the local
  * processor.
@@ -1024,6 +1023,7 @@ void __init smp_init(void)
  * @info:	An arbitrary pointer to pass to both functions.
  * @wait:	If true, wait (atomically) until function has
  *		completed on other CPUs.
+ * @mask:	The set of cpus to run on (only runs on online subset).
  *
  * Preemption is disabled to protect against CPUs going offline but not online.
  * CPUs going online during the call will not be seen or sent an IPI.
@@ -1095,7 +1095,7 @@ EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
  * scheduled, for any of the CPUs in the @mask. It does not guarantee
  * correctness as it only provides a racy snapshot.
  *
- * Returns true if there is a pending IPI scheduled and false otherwise.
+ * Returns: true if there is a pending IPI scheduled and false otherwise.
  */
 bool cpus_peek_for_pending_ipi(const struct cpumask *mask)
 {
@@ -1145,6 +1145,18 @@ static void smp_call_on_cpu_callback(struct work_struct *work)
 	complete(&sscs->done);
 }
 
+/**
+ * smp_call_on_cpu() - Call a function on a specific CPU and wait
+ *	for it to return.
+ * @cpu: The CPU to run on.
+ * @func: The function to run
+ * @par: An arbitrary pointer parameter for @func.
+ * @phys: If @true, force to run on physical @cpu. See
+ *	&struct smp_call_on_cpu_struct for more info.
+ *
+ * Returns: %-ENXIO if the @cpu is invalid; otherwise the return value
+ *	from @func.
+ */
 int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
 {
 	struct smp_call_on_cpu_struct sscs = {
-- 
cgit v1.2.3


From 897c2beb4a7799154a67942fa85a9678f885f36b Mon Sep 17 00:00:00 2001
From: Viacheslav Dubeyko <slava@dubeyko.com>
Date: Mon, 23 Mar 2026 17:39:50 -0700
Subject: hfsplus: fix generic/523 test-case failure

The xfstests' test-case generic/523 fails to execute
correctly:

FSTYP -- hfsplus
PLATFORM -- Linux/x86_64 hfsplus-testing-0001 6.15.0-rc4+ #8 SMP PREEMPT_DYNAMIC Thu May 1 16:43:22 PDT 2025
MKFS_OPTIONS -- /dev/loop51
MOUNT_OPTIONS -- /dev/loop51 /mnt/scratch

generic/523 - output mismatch (see xfstests-dev/results//generic/523.out.bad)

The test-case expects to have '/' in the xattr name.
However, HFS+ unicode logic makes conversion of '/'
into ':'. In HFS+, a filename can contain '/' because
':' is the separator. The slash is a valid filename
character on macOS. But on Linux, / is the path separator
and it cannot appear in a filename component. But xattr
name can contain any of these symbols. It means that
this unicode logic conversion doesn't need to be executed
for the case of xattr name.

This patch adds distinguishing the regular and xattr names.
If we have a regular name, then this conversion of special
symbols will be executed. Otherwise, the conversion is skipped
for the case of xattr names.

sudo ./check -g auto
FSTYP         -- hfsplus
PLATFORM      -- Linux/x86_64 hfsplus-testing-0001 7.0.0-rc1+ #24 SMP PREEMPT_DYNAMIC Fri Mar 20 12:36:49 PDT 2026
MKFS_OPTIONS  -- /dev/loop51
MOUNT_OPTIONS -- /dev/loop51 /mnt/scratch

<skipped>
generic/523 33s ...  25s
<skipped>

Closes: https://github.com/hfs-linux-kernel/hfs-linux-kernel/issues/178
cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
cc: Yangtao Li <frank.li@vivo.com>
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Viacheslav Dubeyko <slava@dubeyko.com>
Link: https://lore.kernel.org/r/20260324003949.417048-2-slava@dubeyko.com
Signed-off-by: Viacheslav Dubeyko <slava@dubeyko.com>
---
 fs/hfsplus/attributes.c    |   3 +-
 fs/hfsplus/catalog.c       |   4 +-
 fs/hfsplus/hfsplus_fs.h    |   3 +-
 fs/hfsplus/unicode.c       | 121 ++++++++++++++++++++++++++++++++-------------
 fs/hfsplus/unicode_test.c  |  51 ++++++++++++-------
 include/linux/hfs_common.h |   5 ++
 6 files changed, 132 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c
index 704635c65e9a..fa87496409c9 100644
--- a/fs/hfsplus/attributes.c
+++ b/fs/hfsplus/attributes.c
@@ -57,7 +57,8 @@ int hfsplus_attr_build_key(struct super_block *sb, hfsplus_btree_key *key,
 	if (name) {
 		int res = hfsplus_asc2uni(sb,
 				(struct hfsplus_unistr *)&key->attr.key_name,
-				HFSPLUS_ATTR_MAX_STRLEN, name, strlen(name));
+				HFSPLUS_ATTR_MAX_STRLEN, name, strlen(name),
+				HFS_XATTR_NAME);
 		if (res)
 			return res;
 		len = be16_to_cpu(key->attr.key_name.length);
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 708046c5dae6..1f7ef61fc318 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -47,7 +47,7 @@ int hfsplus_cat_build_key(struct super_block *sb,
 
 	key->cat.parent = cpu_to_be32(parent);
 	err = hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
-			str->name, str->len);
+			      str->name, str->len, HFS_REGULAR_NAME);
 	if (unlikely(err < 0))
 		return err;
 
@@ -183,7 +183,7 @@ static int hfsplus_fill_cat_thread(struct super_block *sb,
 	entry->thread.reserved = 0;
 	entry->thread.parentID = cpu_to_be32(parentid);
 	err = hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
-				str->name, str->len);
+				str->name, str->len, HFS_REGULAR_NAME);
 	if (unlikely(err < 0))
 		return err;
 
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index caba698814fe..3545b8dbf11c 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -506,7 +506,8 @@ int hfsplus_uni2asc_xattr_str(struct super_block *sb,
 			      const struct hfsplus_attr_unistr *ustr,
 			      char *astr, int *len_p);
 int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
-		    int max_unistr_len, const char *astr, int len);
+		    int max_unistr_len, const char *astr, int len,
+		    int name_type);
 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str);
 int hfsplus_compare_dentry(const struct dentry *dentry, unsigned int len,
 			   const char *str, const struct qstr *name);
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index d3a142f4518b..008fec186382 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -147,9 +147,44 @@ static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
 	return NULL;
 }
 
+/*
+ * In HFS+, a filename can contain / because : is the separator.
+ * The slash is a valid filename character on macOS.
+ * But on Linux, / is the path separator and
+ * it cannot appear in a filename component.
+ * There's a parallel mapping for the NUL character (0 -> U+2400).
+ * NUL terminates strings in C/POSIX but is valid in HFS+ filenames.
+ */
+static inline
+void hfsplus_mac2linux_compatibility_check(u16 symbol, u16 *conversion,
+					   int name_type)
+{
+	*conversion = symbol;
+
+	switch (name_type) {
+	case HFS_XATTR_NAME:
+		/* ignore conversion */
+		return;
+
+	default:
+		/* continue logic */
+		break;
+	}
+
+	switch (symbol) {
+	case 0:
+		*conversion = 0x2400;
+		break;
+	case '/':
+		*conversion = ':';
+		break;
+	}
+}
+
 static int hfsplus_uni2asc(struct super_block *sb,
 			   const struct hfsplus_unistr *ustr,
-			   int max_len, char *astr, int *len_p)
+			   int max_len, char *astr, int *len_p,
+			   int name_type)
 {
 	const hfsplus_unichr *ip;
 	struct nls_table *nls = HFSPLUS_SB(sb)->nls;
@@ -217,14 +252,8 @@ static int hfsplus_uni2asc(struct super_block *sb,
 					hfsplus_compose_table, c1);
 			if (ce1)
 				break;
-			switch (c0) {
-			case 0:
-				c0 = 0x2400;
-				break;
-			case '/':
-				c0 = ':';
-				break;
-			}
+			hfsplus_mac2linux_compatibility_check(c0, &c0,
+							      name_type);
 			res = nls->uni2char(c0, op, len);
 			if (res < 0) {
 				if (res == -ENAMETOOLONG)
@@ -257,16 +286,8 @@ static int hfsplus_uni2asc(struct super_block *sb,
 			}
 		}
 same:
-		switch (c0) {
-		case 0:
-			cc = 0x2400;
-			break;
-		case '/':
-			cc = ':';
-			break;
-		default:
-			cc = c0;
-		}
+		hfsplus_mac2linux_compatibility_check(c0, &cc,
+						      name_type);
 done:
 		res = nls->uni2char(cc, op, len);
 		if (res < 0) {
@@ -288,7 +309,10 @@ inline int hfsplus_uni2asc_str(struct super_block *sb,
 			       const struct hfsplus_unistr *ustr, char *astr,
 			       int *len_p)
 {
-	return hfsplus_uni2asc(sb, ustr, HFSPLUS_MAX_STRLEN, astr, len_p);
+	return hfsplus_uni2asc(sb,
+				ustr, HFSPLUS_MAX_STRLEN,
+				astr, len_p,
+				HFS_REGULAR_NAME);
 }
 EXPORT_SYMBOL_IF_KUNIT(hfsplus_uni2asc_str);
 
@@ -297,22 +321,32 @@ inline int hfsplus_uni2asc_xattr_str(struct super_block *sb,
 				     char *astr, int *len_p)
 {
 	return hfsplus_uni2asc(sb, (const struct hfsplus_unistr *)ustr,
-			       HFSPLUS_ATTR_MAX_STRLEN, astr, len_p);
+				HFSPLUS_ATTR_MAX_STRLEN, astr, len_p,
+				HFS_XATTR_NAME);
 }
 EXPORT_SYMBOL_IF_KUNIT(hfsplus_uni2asc_xattr_str);
 
 /*
- * Convert one or more ASCII characters into a single unicode character.
- * Returns the number of ASCII characters corresponding to the unicode char.
+ * In HFS+, a filename can contain / because : is the separator.
+ * The slash is a valid filename character on macOS.
+ * But on Linux, / is the path separator and
+ * it cannot appear in a filename component.
+ * There's a parallel mapping for the NUL character (0 -> U+2400).
+ * NUL terminates strings in C/POSIX but is valid in HFS+ filenames.
  */
-static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
-			      wchar_t *uc)
+static inline
+void hfsplus_linux2mac_compatibility_check(wchar_t *uc, int name_type)
 {
-	int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
-	if (size <= 0) {
-		*uc = '?';
-		size = 1;
+	switch (name_type) {
+	case HFS_XATTR_NAME:
+		/* ignore conversion */
+		return;
+
+	default:
+		/* continue logic */
+		break;
 	}
+
 	switch (*uc) {
 	case 0x2400:
 		*uc = 0;
@@ -321,6 +355,23 @@ static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
 		*uc = '/';
 		break;
 	}
+}
+
+/*
+ * Convert one or more ASCII characters into a single unicode character.
+ * Returns the number of ASCII characters corresponding to the unicode char.
+ */
+static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
+			      wchar_t *uc, int name_type)
+{
+	int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
+
+	if (size <= 0) {
+		*uc = '?';
+		size = 1;
+	}
+
+	hfsplus_linux2mac_compatibility_check(uc, name_type);
 	return size;
 }
 
@@ -395,7 +446,7 @@ static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
 
 int hfsplus_asc2uni(struct super_block *sb,
 		    struct hfsplus_unistr *ustr, int max_unistr_len,
-		    const char *astr, int len)
+		    const char *astr, int len, int name_type)
 {
 	int size, dsize, decompose;
 	u16 *dstr, outlen = 0;
@@ -404,7 +455,7 @@ int hfsplus_asc2uni(struct super_block *sb,
 
 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
 	while (outlen < max_unistr_len && len > 0) {
-		size = asc2unichar(sb, astr, len, &c);
+		size = asc2unichar(sb, astr, len, &c, name_type);
 
 		if (decompose)
 			dstr = decompose_unichar(c, &dsize, dhangul);
@@ -452,7 +503,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
 	len = str->len;
 	while (len > 0) {
 		int dsize;
-		size = asc2unichar(sb, astr, len, &c);
+		size = asc2unichar(sb, astr, len, &c, HFS_REGULAR_NAME);
 		astr += size;
 		len -= size;
 
@@ -510,7 +561,8 @@ int hfsplus_compare_dentry(const struct dentry *dentry,
 
 	while (len1 > 0 && len2 > 0) {
 		if (!dsize1) {
-			size = asc2unichar(sb, astr1, len1, &c);
+			size = asc2unichar(sb, astr1, len1, &c,
+					   HFS_REGULAR_NAME);
 			astr1 += size;
 			len1 -= size;
 
@@ -525,7 +577,8 @@ int hfsplus_compare_dentry(const struct dentry *dentry,
 		}
 
 		if (!dsize2) {
-			size = asc2unichar(sb, astr2, len2, &c);
+			size = asc2unichar(sb, astr2, len2, &c,
+					   HFS_REGULAR_NAME);
 			astr2 += size;
 			len2 -= size;
 
diff --git a/fs/hfsplus/unicode_test.c b/fs/hfsplus/unicode_test.c
index 26145bf88946..83737c9bafa0 100644
--- a/fs/hfsplus/unicode_test.c
+++ b/fs/hfsplus/unicode_test.c
@@ -715,28 +715,32 @@ static void hfsplus_asc2uni_basic_test(struct kunit *test)
 
 	/* Test simple ASCII string conversion */
 	result = hfsplus_asc2uni(&mock_sb->sb, &mock_env->str1,
-				 HFSPLUS_MAX_STRLEN, "hello", 5);
+				 HFSPLUS_MAX_STRLEN, "hello", 5,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, 0, result);
 	check_unistr_content(test, &mock_env->str1, "hello");
 
 	/* Test empty string */
 	result = hfsplus_asc2uni(&mock_sb->sb, &mock_env->str1,
-				 HFSPLUS_MAX_STRLEN, "", 0);
+				 HFSPLUS_MAX_STRLEN, "", 0,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, 0, result);
 	KUNIT_EXPECT_EQ(test, 0, be16_to_cpu(mock_env->str1.length));
 
 	/* Test single character */
 	result = hfsplus_asc2uni(&mock_sb->sb, &mock_env->str1,
-				 HFSPLUS_MAX_STRLEN, "A", 1);
+				 HFSPLUS_MAX_STRLEN, "A", 1,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, 0, result);
 	check_unistr_content(test, &mock_env->str1, "A");
 
 	/* Test null-terminated string with explicit length */
 	result = hfsplus_asc2uni(&mock_sb->sb, &mock_env->str1,
-				 HFSPLUS_MAX_STRLEN, "test\0extra", 4);
+				 HFSPLUS_MAX_STRLEN, "test\0extra", 4,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, 0, result);
 	check_unistr_content(test, &mock_env->str1, "test");
@@ -762,7 +766,8 @@ static void hfsplus_asc2uni_special_chars_test(struct kunit *test)
 
 	/* Test colon conversion (should become forward slash) */
 	result = hfsplus_asc2uni(&mock_sb->sb, &mock_env->str1,
-				 HFSPLUS_MAX_STRLEN, ":", 1);
+				 HFSPLUS_MAX_STRLEN, ":", 1,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, 0, result);
 	KUNIT_EXPECT_EQ(test, 1, be16_to_cpu(mock_env->str1.length));
@@ -770,7 +775,8 @@ static void hfsplus_asc2uni_special_chars_test(struct kunit *test)
 
 	/* Test string with mixed special characters */
 	result = hfsplus_asc2uni(&mock_sb->sb, &mock_env->str1,
-				 HFSPLUS_MAX_STRLEN, "a:b", 3);
+				 HFSPLUS_MAX_STRLEN, "a:b", 3,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, 0, result);
 	KUNIT_EXPECT_EQ(test, 3, be16_to_cpu(mock_env->str1.length));
@@ -780,7 +786,8 @@ static void hfsplus_asc2uni_special_chars_test(struct kunit *test)
 
 	/* Test multiple special characters */
 	result = hfsplus_asc2uni(&mock_sb->sb, &mock_env->str1,
-				 HFSPLUS_MAX_STRLEN, ":::", 3);
+				 HFSPLUS_MAX_STRLEN, ":::", 3,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, 0, result);
 	KUNIT_EXPECT_EQ(test, 3, be16_to_cpu(mock_env->str1.length));
@@ -811,7 +818,8 @@ static void hfsplus_asc2uni_buffer_limits_test(struct kunit *test)
 	memset(mock_env->buf, 'a', HFSPLUS_MAX_STRLEN);
 	result = hfsplus_asc2uni(&mock_sb->sb,
 				 &mock_env->str1, HFSPLUS_MAX_STRLEN,
-				 mock_env->buf, HFSPLUS_MAX_STRLEN);
+				 mock_env->buf, HFSPLUS_MAX_STRLEN,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, 0, result);
 	KUNIT_EXPECT_EQ(test, HFSPLUS_MAX_STRLEN,
@@ -821,7 +829,8 @@ static void hfsplus_asc2uni_buffer_limits_test(struct kunit *test)
 	memset(mock_env->buf, 'a', HFSPLUS_MAX_STRLEN + 5);
 	result = hfsplus_asc2uni(&mock_sb->sb,
 				 &mock_env->str1, HFSPLUS_MAX_STRLEN,
-				 mock_env->buf, HFSPLUS_MAX_STRLEN + 5);
+				 mock_env->buf, HFSPLUS_MAX_STRLEN + 5,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, -ENAMETOOLONG, result);
 	KUNIT_EXPECT_EQ(test, HFSPLUS_MAX_STRLEN,
@@ -829,13 +838,15 @@ static void hfsplus_asc2uni_buffer_limits_test(struct kunit *test)
 
 	/* Test with smaller max_unistr_len */
 	result = hfsplus_asc2uni(&mock_sb->sb,
-				 &mock_env->str1, 5, "toolongstring", 13);
+				 &mock_env->str1, 5, "toolongstring", 13,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, -ENAMETOOLONG, result);
 	KUNIT_EXPECT_EQ(test, 5, be16_to_cpu(mock_env->str1.length));
 
 	/* Test zero max length */
-	result = hfsplus_asc2uni(&mock_sb->sb, &mock_env->str1, 0, "test", 4);
+	result = hfsplus_asc2uni(&mock_sb->sb, &mock_env->str1, 0, "test", 4,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, -ENAMETOOLONG, result);
 	KUNIT_EXPECT_EQ(test, 0, be16_to_cpu(mock_env->str1.length));
@@ -859,28 +870,32 @@ static void hfsplus_asc2uni_edge_cases_test(struct kunit *test)
 
 	/* Test zero length input */
 	result = hfsplus_asc2uni(&mock_sb->sb,
-				 &ustr, HFSPLUS_MAX_STRLEN, "test", 0);
+				 &ustr, HFSPLUS_MAX_STRLEN, "test", 0,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, 0, result);
 	KUNIT_EXPECT_EQ(test, 0, be16_to_cpu(ustr.length));
 
 	/* Test input with length mismatch */
 	result = hfsplus_asc2uni(&mock_sb->sb,
-				 &ustr, HFSPLUS_MAX_STRLEN, "hello", 3);
+				 &ustr, HFSPLUS_MAX_STRLEN, "hello", 3,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, 0, result);
 	check_unistr_content(test, &ustr, "hel");
 
 	/* Test with various printable ASCII characters */
 	result = hfsplus_asc2uni(&mock_sb->sb,
-				 &ustr, HFSPLUS_MAX_STRLEN, "ABC123!@#", 9);
+				 &ustr, HFSPLUS_MAX_STRLEN, "ABC123!@#", 9,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, 0, result);
 	check_unistr_content(test, &ustr, "ABC123!@#");
 
 	/* Test null character in the middle */
 	result = hfsplus_asc2uni(&mock_sb->sb,
-				 &ustr, HFSPLUS_MAX_STRLEN, test_str, 3);
+				 &ustr, HFSPLUS_MAX_STRLEN, test_str, 3,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, 0, result);
 	KUNIT_EXPECT_EQ(test, 3, be16_to_cpu(ustr.length));
@@ -909,7 +924,8 @@ static void hfsplus_asc2uni_decompose_test(struct kunit *test)
 	/* Test with decomposition disabled (default) */
 	clear_bit(HFSPLUS_SB_NODECOMPOSE, &mock_sb->sb_info.flags);
 	result = hfsplus_asc2uni(&mock_sb->sb, &mock_env->str1,
-				 HFSPLUS_MAX_STRLEN, "test", 4);
+				 HFSPLUS_MAX_STRLEN, "test", 4,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, 0, result);
 	check_unistr_content(test, &mock_env->str1, "test");
@@ -917,7 +933,8 @@ static void hfsplus_asc2uni_decompose_test(struct kunit *test)
 	/* Test with decomposition enabled */
 	set_bit(HFSPLUS_SB_NODECOMPOSE, &mock_sb->sb_info.flags);
 	result = hfsplus_asc2uni(&mock_sb->sb, &mock_env->str2,
-				 HFSPLUS_MAX_STRLEN, "test", 4);
+				 HFSPLUS_MAX_STRLEN, "test", 4,
+				 HFS_REGULAR_NAME);
 
 	KUNIT_EXPECT_EQ(test, 0, result);
 	check_unistr_content(test, &mock_env->str2, "test");
diff --git a/include/linux/hfs_common.h b/include/linux/hfs_common.h
index be24c687858e..9e71b9a03b60 100644
--- a/include/linux/hfs_common.h
+++ b/include/linux/hfs_common.h
@@ -166,6 +166,11 @@ struct hfsplus_attr_unistr {
 	hfsplus_unichr unicode[HFSPLUS_ATTR_MAX_STRLEN];
 } __packed;
 
+enum {
+	HFS_REGULAR_NAME,
+	HFS_XATTR_NAME,
+};
+
 struct hfs_extent {
 	__be16 block;
 	__be16 count;
-- 
cgit v1.2.3


From 15d6dacdc97dce5ca8a0baf40f5fe8f3dcfef516 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Wed, 18 Mar 2026 14:39:25 +0200
Subject: wifi: ieee80211: Add some missing NAN definitions

Add some missing NAN Device capabilities definitions.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20260318143604.5f6b36d2b208.I7ef571682d5add96eabfcf87f81285893021e851@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211-nan.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211-nan.h b/include/linux/ieee80211-nan.h
index d07959bf8a90..ebf28ea651f9 100644
--- a/include/linux/ieee80211-nan.h
+++ b/include/linux/ieee80211-nan.h
@@ -9,7 +9,7 @@
  * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net>
  * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright (c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (c) 2018 - 2025 Intel Corporation
+ * Copyright (c) 2018 - 2026 Intel Corporation
  */
 
 #ifndef LINUX_IEEE80211_NAN_H
@@ -23,6 +23,11 @@
 #define NAN_OP_MODE_160MHZ		0x04
 #define NAN_OP_MODE_PNDL_SUPPRTED	0x08
 
+#define NAN_DEV_CAPA_NUM_TX_ANT_POS	0
+#define NAN_DEV_CAPA_NUM_TX_ANT_MASK	0x0f
+#define NAN_DEV_CAPA_NUM_RX_ANT_POS	4
+#define NAN_DEV_CAPA_NUM_RX_ANT_MASK	0xf0
+
 /* NAN Device capabilities, as defined in Wi-Fi Aware (TM) specification
  * Table 79
  */
-- 
cgit v1.2.3


From 7dd6f81f4ef801b57f6ce7b0eee32aef5c488538 Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Wed, 25 Mar 2026 21:57:39 +0200
Subject: wifi: mac80211: ignore reserved bits in reconfiguration status

The Link ID Info field in the Reconfiguration Status Duple subfield of
the Reconfiguration Response frame only uses the lower four bits for the
link ID. The upper bits are reserved and should therefore be ignored.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Reviewed-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20260325215404.ab5ccf4bc62e.I9aef8f4fb6f1b06671bb6cf0e2bd4ec6e4c8bda4@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h |  7 +++++++
 net/mac80211/mlme.c       | 14 ++++++++------
 2 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 52db36120314..b5d649db123f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1194,6 +1194,13 @@ struct ieee80211_mgmt {
 
 #define IEEE80211_MIN_ACTION_SIZE(type)	offsetofend(struct ieee80211_mgmt, u.action.type)
 
+/* Link Reconfiguration Status Duple field */
+struct ieee80211_ml_reconf_status {
+	u8 info;
+	__le16 status;
+} __packed;
+
+#define IEEE80211_ML_RECONF_LINK_ID_MASK	0xf
 
 /* Management MIC information element (IEEE 802.11w) for CMAC */
 struct ieee80211_mmie {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 173a60360a45..7fc5616cb244 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -10459,8 +10459,8 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata,
 	pos = mgmt->u.action.ml_reconf_resp.variable;
 	len -= offsetofend(typeof(*mgmt), u.action.ml_reconf_resp);
 
-	/* each status duple is 3 octets */
-	if (len < mgmt->u.action.ml_reconf_resp.count * 3) {
+	if (len < mgmt->u.action.ml_reconf_resp.count *
+				sizeof(struct ieee80211_ml_reconf_status)) {
 		sdata_info(sdata,
 			   "mlo: reconf: unexpected len=%zu, count=%u\n",
 			   len, mgmt->u.action.ml_reconf_resp.count);
@@ -10469,9 +10469,11 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata,
 
 	link_mask = sta_changed_links;
 	for (i = 0; i < mgmt->u.action.ml_reconf_resp.count; i++) {
-		u16 status = get_unaligned_le16(pos + 1);
+		struct ieee80211_ml_reconf_status *reconf_status = (void *)pos;
+		u16 status = le16_to_cpu(reconf_status->status);
 
-		link_id = *pos;
+		link_id = u8_get_bits(reconf_status->info,
+				      IEEE80211_ML_RECONF_LINK_ID_MASK);
 
 		if (!(link_mask & BIT(link_id))) {
 			sdata_info(sdata,
@@ -10506,8 +10508,8 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata,
 			sdata->u.mgd.reconf.added_links &= ~BIT(link_id);
 		}
 
-		pos += 3;
-		len -= 3;
+		pos += sizeof(*reconf_status);
+		len -= sizeof(*reconf_status);
 	}
 
 	if (link_mask) {
-- 
cgit v1.2.3


From 677a3d82b6407522d922705209c311e043a17813 Mon Sep 17 00:00:00 2001
From: "Vineeth Pillai (Google)" <vineeth@bitbyteword.org>
Date: Mon, 23 Mar 2026 12:00:20 -0400
Subject: tracepoint: Add trace_call__##name() API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add trace_call__##name() as a companion to trace_##name().  When a
caller already guards a tracepoint with an explicit enabled check:

  if (trace_foo_enabled() && cond)
      trace_foo(args);

trace_foo() internally repeats the static_branch_unlikely() test, which
the compiler cannot fold since static branches are patched binary
instructions.  This results in two static-branch evaluations for every
guarded call site.

trace_call__##name() calls __do_trace_##name() directly, skipping the
redundant static-branch re-check.  This avoids leaking the internal
__do_trace_##name() symbol into call sites while still eliminating the
double evaluation:

  if (trace_foo_enabled() && cond)
      trace_invoke_foo(args);   /* calls __do_trace_foo() directly */

Three locations are updated:
- __DECLARE_TRACE: invoke form omits static_branch_unlikely, retains
  the LOCKDEP RCU-watching assertion.
- __DECLARE_TRACE_SYSCALL: same, plus retains might_fault().
- !TRACEPOINTS_ENABLED stub: empty no-op so callers compile cleanly
  when tracepoints are compiled out.

Cc: Dmitry Ilvokhin <d@ilvokhin.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Cc: Xin Long <lucien.xin@gmail.com>
Cc: Jon Maloy <jmaloy@redhat.com>
Cc: Aaron Conole <aconole@redhat.com>
Cc: Eelco Chaudron <echaudro@redhat.com>
Cc: Ilya Maximets <i.maximets@ovn.org>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: Oded Gabbay <ogabbay@kernel.org>
Cc: Koby Elbaz <koby.elbaz@intel.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: "Gautham R. Shenoy" <gautham.shenoy@amd.com>
Cc: Huang Rui <ray.huang@amd.com>
Cc: Mario Limonciello <mario.limonciello@amd.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: MyungJoo Ham <myungjoo.ham@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Chanwoo Choi <cw00.choi@samsung.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Eddie James <eajames@linux.ibm.com>
Cc: Andrew Jeffery <andrew@codeconstruct.com.au>
Cc: Joel Stanley <joel@jms.id.au>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Philipp Stanner <phasta@kernel.org>
Cc: Harry Wentland <harry.wentland@amd.com>
Cc: Leo Li <sunpeng.li@amd.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <bentiss@kernel.org>
Cc: Wolfram Sang <wsa+renesas@sang-engineering.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Michael Hennerich <michael.hennerich@analog.com>
Cc: Nuno Sá <nuno.sa@analog.com>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Chris Mason <clm@fb.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: SeongJae Park <sj@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://patch.msgid.link/20260323160052.17528-2-vineeth@bitbyteword.org
Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Vineeth Pillai (Google) <vineeth@bitbyteword.org>
Assisted-by: Claude:claude-sonnet-4-6
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 22ca1c8b54f3..ed969705341f 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -294,6 +294,10 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 			WARN_ONCE(!rcu_is_watching(),			\
 				  "RCU not watching for tracepoint");	\
 		}							\
+	}								\
+	static inline void trace_call__##name(proto)			\
+	{								\
+		__do_trace_##name(args);				\
 	}
 
 #define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto)		\
@@ -313,6 +317,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 			WARN_ONCE(!rcu_is_watching(),			\
 				  "RCU not watching for tracepoint");	\
 		}							\
+	}								\
+	static inline void trace_call__##name(proto)			\
+	{								\
+		might_fault();						\
+		__do_trace_##name(args);				\
 	}
 
 /*
@@ -398,6 +407,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 #define __DECLARE_TRACE_COMMON(name, proto, args, data_proto)		\
 	static inline void trace_##name(proto)				\
 	{ }								\
+	static inline void trace_call__##name(proto)			\
+	{ }								\
 	static inline int						\
 	register_trace_##name(void (*probe)(data_proto),		\
 			      void *data)				\
-- 
cgit v1.2.3


From 5f36c9ca33336036a087b270e68e8236c733f448 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 26 Mar 2026 10:54:08 +0100
Subject: fs: Rename generic_file_fsync() to simple_fsync()

The implementation is now really basic so rename generic_file_fsync()
simple_fsync() and __generic_file_fsync() to simple_fsync_noflush().

Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260326095354.16340-56-jack@suse.cz
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/adfs/dir.c      |  2 +-
 fs/adfs/file.c     |  2 +-
 fs/exfat/file.c    |  2 +-
 fs/libfs.c         | 26 ++++++++++++--------------
 fs/omfs/file.c     |  2 +-
 fs/qnx4/dir.c      |  2 +-
 fs/qnx6/dir.c      |  2 +-
 fs/ufs/dir.c       |  2 +-
 fs/ufs/file.c      |  2 +-
 include/linux/fs.h |  4 ++--
 10 files changed, 22 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 493500f37cb9..b8e23e8124ed 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -389,7 +389,7 @@ const struct file_operations adfs_dir_operations = {
 	.read		= generic_read_dir,
 	.llseek		= generic_file_llseek,
 	.iterate_shared	= adfs_iterate,
-	.fsync		= generic_file_fsync,
+	.fsync		= simple_fsync,
 };
 
 static int
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index cd13165fd904..4a1828b3f88f 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -26,7 +26,7 @@ const struct file_operations adfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read_iter	= generic_file_read_iter,
 	.mmap_prepare	= generic_file_mmap_prepare,
-	.fsync		= generic_file_fsync,
+	.fsync		= simple_fsync,
 	.write_iter	= generic_file_write_iter,
 	.splice_read	= filemap_splice_read,
 };
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 90cd540afeaa..4e8d34a75b66 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -577,7 +577,7 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
-	err = __generic_file_fsync(filp, start, end, datasync);
+	err = simple_fsync_noflush(filp, start, end, datasync);
 	if (err)
 		return err;
 
diff --git a/fs/libfs.c b/fs/libfs.c
index e67e43c6509a..327d8108ed9f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1538,19 +1538,18 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
 EXPORT_SYMBOL_GPL(generic_fh_to_parent);
 
 /**
- * __generic_file_fsync - generic fsync implementation for simple filesystems
+ * simple_fsync_noflush - generic fsync implementation for simple filesystems
  *
  * @file:	file to synchronize
  * @start:	start offset in bytes
  * @end:	end offset in bytes (inclusive)
  * @datasync:	only synchronize essential metadata if true
  *
- * This is a generic implementation of the fsync method for simple
- * filesystems which track all non-inode metadata in the buffers list
- * hanging off the address_space structure.
+ * This function is an fsync handler for simple filesystems. It writes out
+ * dirty data, inode (if dirty), but does not issue a cache flush.
  */
-int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
-				 int datasync)
+int simple_fsync_noflush(struct file *file, loff_t start, loff_t end,
+			 int datasync)
 {
 	struct inode *inode = file->f_mapping->host;
 	int err;
@@ -1573,30 +1572,29 @@ out:
 		ret = err;
 	return ret;
 }
-EXPORT_SYMBOL(__generic_file_fsync);
+EXPORT_SYMBOL(simple_fsync_noflush);
 
 /**
- * generic_file_fsync - generic fsync implementation for simple filesystems
- *			with flush
+ * simple_fsync - fsync implementation for simple filesystems with flush
  * @file:	file to synchronize
  * @start:	start offset in bytes
  * @end:	end offset in bytes (inclusive)
  * @datasync:	only synchronize essential metadata if true
  *
+ * This function is an fsync handler for simple filesystems. It writes out
+ * dirty data, inode (if dirty), and issues a cache flush.
  */
-
-int generic_file_fsync(struct file *file, loff_t start, loff_t end,
-		       int datasync)
+int simple_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct inode *inode = file->f_mapping->host;
 	int err;
 
-	err = __generic_file_fsync(file, start, end, datasync);
+	err = simple_fsync_noflush(file, start, end, datasync);
 	if (err)
 		return err;
 	return blkdev_issue_flush(inode->i_sb->s_bdev);
 }
-EXPORT_SYMBOL(generic_file_fsync);
+EXPORT_SYMBOL(simple_fsync);
 
 /**
  * generic_check_addressable - Check addressability of file system
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 49a1de5a827f..28f3b113340e 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -334,7 +334,7 @@ const struct file_operations omfs_file_operations = {
 	.read_iter = generic_file_read_iter,
 	.write_iter = generic_file_write_iter,
 	.mmap_prepare = generic_file_mmap_prepare,
-	.fsync = generic_file_fsync,
+	.fsync = simple_fsync,
 	.splice_read = filemap_splice_read,
 };
 
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 6402715ab377..a9038d231be4 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -71,7 +71,7 @@ const struct file_operations qnx4_dir_operations =
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.iterate_shared	= qnx4_readdir,
-	.fsync		= generic_file_fsync,
+	.fsync		= simple_fsync,
 	.setlease	= generic_setlease,
 };
 
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index ae0c9846833d..135fb42f6936 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -275,7 +275,7 @@ const struct file_operations qnx6_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.iterate_shared	= qnx6_readdir,
-	.fsync		= generic_file_fsync,
+	.fsync		= simple_fsync,
 	.setlease	= generic_setlease,
 };
 
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 43f1578ab866..f611f965cb96 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -652,7 +652,7 @@ const struct file_operations ufs_dir_operations = {
 	.release	= ufs_dir_release,
 	.read		= generic_read_dir,
 	.iterate_shared	= ufs_readdir,
-	.fsync		= generic_file_fsync,
+	.fsync		= simple_fsync,
 	.llseek		= ufs_dir_llseek,
 	.setlease	= generic_setlease,
 };
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 809c7a4603f8..85c509ced7f9 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -41,7 +41,7 @@ const struct file_operations ufs_file_operations = {
 	.write_iter	= generic_file_write_iter,
 	.mmap_prepare	= generic_file_mmap_prepare,
 	.open           = generic_file_open,
-	.fsync		= generic_file_fsync,
+	.fsync		= simple_fsync,
 	.splice_read	= filemap_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.setlease	= generic_setlease,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b3dd145b25e..0fc0cb23000e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3295,8 +3295,8 @@ void simple_offset_destroy(struct offset_ctx *octx);
 
 extern const struct file_operations simple_offset_dir_operations;
 
-extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
-extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
+extern int simple_fsync_noflush(struct file *, loff_t, loff_t, int);
+extern int simple_fsync(struct file *, loff_t, loff_t, int);
 
 extern int generic_check_addressable(unsigned, u64);
 
-- 
cgit v1.2.3


From 972b9dd4e4180fbb2352bf2f0e015b7b63f5cca0 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 26 Mar 2026 10:54:16 +0100
Subject: fs: Ignore inode metadata buffers in inode_lru_isolate()

There are only a few filesystems that use generic tracking of inode
metadata buffer heads. As such the logic to reclaim tracked metadata
buffer heads in inode_lru_isolate() doesn't bring a benefit big enough
to justify intertwining of inode reclaim and metadata buffer head
tracking. Just treat tracked metadata buffer heads as any other metadata
filesystem has to properly clean up on inode eviction and stop handling
it in inode_lru_isolate(). As a result filesystems using generic
tracking of metadata buffer heads may now see dirty metadata buffers in
their .evict methods more often which can slow down inode reclaim but
given these filesystems aren't used in performance demanding setups we
should be fine.

Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260326095354.16340-64-jack@suse.cz
Tested-by: syzbot@syzkaller.appspotmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/buffer.c                 | 29 -----------------------------
 fs/inode.c                  | 21 +++++++++------------
 include/linux/buffer_head.h |  3 ---
 3 files changed, 9 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 1bc0f22f3cc2..bd48644e1bf8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -878,35 +878,6 @@ void invalidate_inode_buffers(struct inode *inode)
 }
 EXPORT_SYMBOL(invalidate_inode_buffers);
 
-/*
- * Remove any clean buffers from the inode's buffer list.  This is called
- * when we're trying to free the inode itself.  Those buffers can pin it.
- *
- * Returns true if all buffers were removed.
- */
-int remove_inode_buffers(struct inode *inode)
-{
-	int ret = 1;
-
-	if (inode_has_buffers(inode)) {
-		struct address_space *mapping = &inode->i_data;
-		struct list_head *list = &mapping->i_private_list;
-		struct address_space *buffer_mapping = mapping->i_private_data;
-
-		spin_lock(&buffer_mapping->i_private_lock);
-		while (!list_empty(list)) {
-			struct buffer_head *bh = BH_ENTRY(list->next);
-			if (buffer_dirty(bh)) {
-				ret = 0;
-				break;
-			}
-			__remove_assoc_queue(bh);
-		}
-		spin_unlock(&buffer_mapping->i_private_lock);
-	}
-	return ret;
-}
-
 /*
  * Create the appropriate buffers when given a folio for data area and
  * the size of each buffer.. Use the bh->b_this_page linked list to
diff --git a/fs/inode.c b/fs/inode.c
index cc12b68e021b..4f98a5f04bbd 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -17,7 +17,6 @@
 #include <linux/fsverity.h>
 #include <linux/mount.h>
 #include <linux/posix_acl.h>
-#include <linux/buffer_head.h> /* for inode_has_buffers */
 #include <linux/ratelimit.h>
 #include <linux/list_lru.h>
 #include <linux/iversion.h>
@@ -367,7 +366,6 @@ struct inode *alloc_inode(struct super_block *sb)
 
 void __destroy_inode(struct inode *inode)
 {
-	BUG_ON(inode_has_buffers(inode));
 	inode_detach_wb(inode);
 	security_inode_free(inode);
 	fsnotify_inode_delete(inode);
@@ -994,19 +992,18 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
 	 * page cache in order to free up struct inodes: lowmem might
 	 * be under pressure before the cache inside the highmem zone.
 	 */
-	if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
+	if (!mapping_empty(&inode->i_data)) {
+		unsigned long reap;
+
 		inode_pin_lru_isolating(inode);
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&lru->lock);
-		if (remove_inode_buffers(inode)) {
-			unsigned long reap;
-			reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
-			if (current_is_kswapd())
-				__count_vm_events(KSWAPD_INODESTEAL, reap);
-			else
-				__count_vm_events(PGINODESTEAL, reap);
-			mm_account_reclaimed_pages(reap);
-		}
+		reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
+		if (current_is_kswapd())
+			__count_vm_events(KSWAPD_INODESTEAL, reap);
+		else
+			__count_vm_events(PGINODESTEAL, reap);
+		mm_account_reclaimed_pages(reap);
 		inode_unpin_lru_isolating(inode);
 		return LRU_RETRY;
 	}
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index b16b88bfbc3e..631bf971efc0 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -517,7 +517,6 @@ void buffer_init(void);
 bool try_to_free_buffers(struct folio *folio);
 int inode_has_buffers(struct inode *inode);
 void invalidate_inode_buffers(struct inode *inode);
-int remove_inode_buffers(struct inode *inode);
 int sync_mapping_buffers(struct address_space *mapping);
 void invalidate_bh_lrus(void);
 void invalidate_bh_lrus_cpu(void);
@@ -528,9 +527,7 @@ extern int buffer_heads_over_limit;
 
 static inline void buffer_init(void) {}
 static inline bool try_to_free_buffers(struct folio *folio) { return true; }
-static inline int inode_has_buffers(struct inode *inode) { return 0; }
 static inline void invalidate_inode_buffers(struct inode *inode) {}
-static inline int remove_inode_buffers(struct inode *inode) { return 1; }
 static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
 static inline void invalidate_bh_lrus(void) {}
 static inline void invalidate_bh_lrus_cpu(void) {}
-- 
cgit v1.2.3


From 2811f2a82fafff40867b318360cc06143b088a7c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 26 Mar 2026 10:54:18 +0100
Subject: hugetlbfs: Stop using i_private_data

Instead of using i_private_data for resv_map pointer add the pointer
into hugetlbfs private part of the inode.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260326095354.16340-66-jack@suse.cz
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/hugetlbfs/inode.c    | 11 +++--------
 include/linux/hugetlb.h |  1 +
 mm/hugetlb.c            | 10 +---------
 3 files changed, 5 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 3f70c47981de..6ad02493adfd 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -622,13 +622,7 @@ static void hugetlbfs_evict_inode(struct inode *inode)
 	trace_hugetlbfs_evict_inode(inode);
 	remove_inode_hugepages(inode, 0, LLONG_MAX);
 
-	/*
-	 * Get the resv_map from the address space embedded in the inode.
-	 * This is the address space which points to any resv_map allocated
-	 * at inode creation time.  If this is a device special inode,
-	 * i_mapping may not point to the original address space.
-	 */
-	resv_map = (struct resv_map *)(&inode->i_data)->i_private_data;
+	resv_map = HUGETLBFS_I(inode)->resv_map;
 	/* Only regular and link inodes have associated reserve maps */
 	if (resv_map)
 		resv_map_release(&resv_map->refs);
@@ -907,6 +901,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
 		simple_inode_init_ts(inode);
 		inode->i_op = &hugetlbfs_dir_inode_operations;
 		inode->i_fop = &simple_dir_operations;
+		HUGETLBFS_I(inode)->resv_map = NULL;
 		/* directory inodes start off with i_nlink == 2 (for "." entry) */
 		inc_nlink(inode);
 		lockdep_annotate_inode_mutex_key(inode);
@@ -950,7 +945,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 				&hugetlbfs_i_mmap_rwsem_key);
 		inode->i_mapping->a_ops = &hugetlbfs_aops;
 		simple_inode_init_ts(inode);
-		inode->i_mapping->i_private_data = resv_map;
+		info->resv_map = resv_map;
 		info->seals = F_SEAL_SEAL;
 		switch (mode & S_IFMT) {
 		default:
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 65910437be1c..fc5462fe943f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -518,6 +518,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
 
 struct hugetlbfs_inode_info {
 	struct inode vfs_inode;
+	struct resv_map *resv_map;
 	unsigned int seals;
 };
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0beb6e22bc26..7ab5c724a711 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1157,15 +1157,7 @@ void resv_map_release(struct kref *ref)
 
 static inline struct resv_map *inode_resv_map(struct inode *inode)
 {
-	/*
-	 * At inode evict time, i_mapping may not point to the original
-	 * address space within the inode.  This original address space
-	 * contains the pointer to the resv_map.  So, always use the
-	 * address space embedded within the inode.
-	 * The VERY common case is inode->mapping == &inode->i_data but,
-	 * this may not be true for device special inodes.
-	 */
-	return (struct resv_map *)(&inode->i_data)->i_private_data;
+	return HUGETLBFS_I(inode)->resv_map;
 }
 
 static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
-- 
cgit v1.2.3


From cd336f2e275de14866101d3395c7d2be0a0c1b04 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 26 Mar 2026 10:54:20 +0100
Subject: fs: Remove i_private_data

Nobody is using it anymore.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260326095354.16340-68-jack@suse.cz
Tested-by: syzbot@syzkaller.appspotmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/inode.c         | 1 -
 include/linux/fs.h | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 4f98a5f04bbd..d5774e627a9c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -283,7 +283,6 @@ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp
 	atomic_set(&mapping->nr_thps, 0);
 #endif
 	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
-	mapping->i_private_data = NULL;
 	mapping->writeback_index = 0;
 	init_rwsem(&mapping->invalidate_lock);
 	lockdep_set_class_and_name(&mapping->invalidate_lock,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0fc0cb23000e..d488459396f4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -465,7 +465,6 @@ extern const struct address_space_operations empty_aops;
  * @wb_err: The most recent error which has occurred.
  * @i_private_lock: For use by the owner of the address_space.
  * @i_private_list: For use by the owner of the address_space.
- * @i_private_data: For use by the owner of the address_space.
  */
 struct address_space {
 	struct inode		*host;
@@ -486,7 +485,6 @@ struct address_space {
 	spinlock_t		i_private_lock;
 	struct list_head	i_private_list;
 	struct rw_semaphore	i_mmap_rwsem;
-	void *			i_private_data;
 } __attribute__((aligned(sizeof(long)))) __randomize_layout;
 	/*
 	 * On most architectures that alignment is already the case; but
-- 
cgit v1.2.3


From 521bea7cec8a79684402d555caab408ed43171d5 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 26 Mar 2026 10:54:24 +0100
Subject: fs: Move metadata bhs tracking to a separate struct

Instead of tracking metadata bhs for a mapping using i_private_list and
i_private_lock create a dedicated mapping_metadata_bhs struct for it.
So far this struct is embedded in address_space but that will be
switched for per-fs private inode parts later in the series. This also
changes the locking from bdev mapping's i_private_lock to a new lock
embedded in mapping_metadata_bhs to untangle the i_private_lock locking
for maintaining lists of metadata bhs and the locking for looking up /
reclaiming bdev's buffer heads. The locking in remove_assoc_map() gets
more complex due to this but overall this looks like a reasonable
tradeoff.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260326095354.16340-72-jack@suse.cz
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/buffer.c        | 138 +++++++++++++++++++++++++----------------------------
 fs/inode.c         |   2 +
 include/linux/fs.h |   7 +++
 3 files changed, 74 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index fa3d84084adf..294f9cd07f42 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -469,30 +469,13 @@ EXPORT_SYMBOL(mark_buffer_async_write);
  *
  * The functions mark_buffer_dirty_inode(), fsync_inode_buffers(),
  * inode_has_buffers() and invalidate_inode_buffers() are provided for the
- * management of a list of dependent buffers at ->i_mapping->i_private_list.
- *
- * Locking is a little subtle: try_to_free_buffers() will remove buffers
- * from their controlling inode's queue when they are being freed.  But
- * try_to_free_buffers() will be operating against the *blockdev* mapping
- * at the time, not against the S_ISREG file which depends on those buffers.
- * So the locking for i_private_list is via the i_private_lock in the address_space
- * which backs the buffers.  Which is different from the address_space 
- * against which the buffers are listed.  So for a particular address_space,
- * mapping->i_private_lock does *not* protect mapping->i_private_list!  In fact,
- * mapping->i_private_list will always be protected by the backing blockdev's
- * ->i_private_lock.
- *
- * Which introduces a requirement: all buffers on an address_space's
- * ->i_private_list must be from the same address_space: the blockdev's.
- *
- * address_spaces which do not place buffers at ->i_private_list via these
- * utility functions are free to use i_private_lock and i_private_list for
- * whatever they want.  The only requirement is that list_empty(i_private_list)
- * be true at clear_inode() time.
- *
- * FIXME: clear_inode should not call invalidate_inode_buffers().  The
- * filesystems should do that.  invalidate_inode_buffers() should just go
- * BUG_ON(!list_empty).
+ * management of a list of dependent buffers in mapping_metadata_bhs struct.
+ *
+ * The locking is a little subtle: The list of buffer heads is protected by
+ * the lock in mapping_metadata_bhs so functions coming from bdev mapping
+ * (such as try_to_free_buffers()) need to safely get to mapping_metadata_bhs
+ * using RCU, grab the lock, verify we didn't race with somebody detaching the
+ * bh / moving it to different inode and only then proceeding.
  *
  * FIXME: mark_buffer_dirty_inode() is a data-plane operation.  It should
  * take an address_space, not an inode.  And it should be called
@@ -509,19 +492,45 @@ EXPORT_SYMBOL(mark_buffer_async_write);
  * b_inode back.
  */
 
-/*
- * The buffer's backing address_space's i_private_lock must be held
- */
-static void __remove_assoc_queue(struct buffer_head *bh)
+static void __remove_assoc_queue(struct mapping_metadata_bhs *mmb,
+			         struct buffer_head *bh)
 {
+	lockdep_assert_held(&mmb->lock);
 	list_del_init(&bh->b_assoc_buffers);
 	WARN_ON(!bh->b_assoc_map);
 	bh->b_assoc_map = NULL;
 }
 
+static void remove_assoc_queue(struct buffer_head *bh)
+{
+	struct address_space *mapping;
+	struct mapping_metadata_bhs *mmb;
+
+	/*
+	 * The locking dance is ugly here. We need to acquire the lock
+	 * protecting the metadata bh list while possibly racing with bh
+	 * being removed from the list or moved to a different one.  We
+	 * use RCU to pin mapping_metadata_bhs in memory to
+	 * opportunistically acquire the lock and then recheck the bh
+	 * didn't move under us.
+	 */
+	while (bh->b_assoc_map) {
+		rcu_read_lock();
+		mapping = READ_ONCE(bh->b_assoc_map);
+		if (mapping) {
+			mmb = &mapping->i_metadata_bhs;
+			spin_lock(&mmb->lock);
+			if (bh->b_assoc_map == mapping)
+				__remove_assoc_queue(mmb, bh);
+			spin_unlock(&mmb->lock);
+		}
+		rcu_read_unlock();
+	}
+}
+
 int inode_has_buffers(struct inode *inode)
 {
-	return !list_empty(&inode->i_data.i_private_list);
+	return !list_empty(&inode->i_data.i_metadata_bhs.list);
 }
 EXPORT_SYMBOL_GPL(inode_has_buffers);
 
@@ -529,7 +538,7 @@ EXPORT_SYMBOL_GPL(inode_has_buffers);
  * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
  * @mapping: the mapping which wants those buffers written
  *
- * Starts I/O against the buffers at mapping->i_private_list, and waits upon
+ * Starts I/O against the buffers at mapping->i_metadata_bhs and waits upon
  * that I/O. Basically, this is a convenience function for fsync().  @mapping
  * is a file or directory which needs those buffers to be written for a
  * successful fsync().
@@ -548,23 +557,22 @@ EXPORT_SYMBOL_GPL(inode_has_buffers);
  */
 int sync_mapping_buffers(struct address_space *mapping)
 {
-	struct address_space *buffer_mapping =
-				mapping->host->i_sb->s_bdev->bd_mapping;
+	struct mapping_metadata_bhs *mmb = &mapping->i_metadata_bhs;
 	struct buffer_head *bh;
 	int err = 0;
 	struct blk_plug plug;
 	LIST_HEAD(tmp);
 
-	if (list_empty(&mapping->i_private_list))
+	if (list_empty(&mmb->list))
 		return 0;
 
 	blk_start_plug(&plug);
 
-	spin_lock(&buffer_mapping->i_private_lock);
-	while (!list_empty(&mapping->i_private_list)) {
-		bh = BH_ENTRY(mapping->i_private_list.next);
+	spin_lock(&mmb->lock);
+	while (!list_empty(&mmb->list)) {
+		bh = BH_ENTRY(mmb->list.next);
 		WARN_ON_ONCE(bh->b_assoc_map != mapping);
-		__remove_assoc_queue(bh);
+		__remove_assoc_queue(mmb, bh);
 		/* Avoid race with mark_buffer_dirty_inode() which does
 		 * a lockless check and we rely on seeing the dirty bit */
 		smp_mb();
@@ -573,7 +581,7 @@ int sync_mapping_buffers(struct address_space *mapping)
 			bh->b_assoc_map = mapping;
 			if (buffer_dirty(bh)) {
 				get_bh(bh);
-				spin_unlock(&buffer_mapping->i_private_lock);
+				spin_unlock(&mmb->lock);
 				/*
 				 * Ensure any pending I/O completes so that
 				 * write_dirty_buffer() actually writes the
@@ -590,35 +598,34 @@ int sync_mapping_buffers(struct address_space *mapping)
 				 * through sync_buffer().
 				 */
 				brelse(bh);
-				spin_lock(&buffer_mapping->i_private_lock);
+				spin_lock(&mmb->lock);
 			}
 		}
 	}
 
-	spin_unlock(&buffer_mapping->i_private_lock);
+	spin_unlock(&mmb->lock);
 	blk_finish_plug(&plug);
-	spin_lock(&buffer_mapping->i_private_lock);
+	spin_lock(&mmb->lock);
 
 	while (!list_empty(&tmp)) {
 		bh = BH_ENTRY(tmp.prev);
 		get_bh(bh);
-		__remove_assoc_queue(bh);
+		__remove_assoc_queue(mmb, bh);
 		/* Avoid race with mark_buffer_dirty_inode() which does
 		 * a lockless check and we rely on seeing the dirty bit */
 		smp_mb();
 		if (buffer_dirty(bh)) {
-			list_add(&bh->b_assoc_buffers,
-				 &mapping->i_private_list);
+			list_add(&bh->b_assoc_buffers, &mmb->list);
 			bh->b_assoc_map = mapping;
 		}
-		spin_unlock(&buffer_mapping->i_private_lock);
+		spin_unlock(&mmb->lock);
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh))
 			err = -EIO;
 		brelse(bh);
-		spin_lock(&buffer_mapping->i_private_lock);
+		spin_lock(&mmb->lock);
 	}
-	spin_unlock(&buffer_mapping->i_private_lock);
+	spin_unlock(&mmb->lock);
 	return err;
 }
 EXPORT_SYMBOL(sync_mapping_buffers);
@@ -715,15 +722,14 @@ void write_boundary_block(struct block_device *bdev,
 void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
 {
 	struct address_space *mapping = inode->i_mapping;
-	struct address_space *buffer_mapping = bh->b_folio->mapping;
 
 	mark_buffer_dirty(bh);
 	if (!bh->b_assoc_map) {
-		spin_lock(&buffer_mapping->i_private_lock);
+		spin_lock(&mapping->i_metadata_bhs.lock);
 		list_move_tail(&bh->b_assoc_buffers,
-				&mapping->i_private_list);
+				&mapping->i_metadata_bhs.list);
 		bh->b_assoc_map = mapping;
-		spin_unlock(&buffer_mapping->i_private_lock);
+		spin_unlock(&mapping->i_metadata_bhs.lock);
 	}
 }
 EXPORT_SYMBOL(mark_buffer_dirty_inode);
@@ -796,22 +802,16 @@ EXPORT_SYMBOL(block_dirty_folio);
  * Invalidate any and all dirty buffers on a given inode.  We are
  * probably unmounting the fs, but that doesn't mean we have already
  * done a sync().  Just drop the buffers from the inode list.
- *
- * NOTE: we take the inode's blockdev's mapping's i_private_lock.  Which
- * assumes that all the buffers are against the blockdev.
  */
 void invalidate_inode_buffers(struct inode *inode)
 {
 	if (inode_has_buffers(inode)) {
-		struct address_space *mapping = &inode->i_data;
-		struct list_head *list = &mapping->i_private_list;
-		struct address_space *buffer_mapping =
-				mapping->host->i_sb->s_bdev->bd_mapping;
-
-		spin_lock(&buffer_mapping->i_private_lock);
-		while (!list_empty(list))
-			__remove_assoc_queue(BH_ENTRY(list->next));
-		spin_unlock(&buffer_mapping->i_private_lock);
+		struct mapping_metadata_bhs *mmb = &inode->i_data.i_metadata_bhs;
+
+		spin_lock(&mmb->lock);
+		while (!list_empty(&mmb->list))
+			__remove_assoc_queue(mmb, BH_ENTRY(mmb->list.next));
+		spin_unlock(&mmb->lock);
 	}
 }
 EXPORT_SYMBOL(invalidate_inode_buffers);
@@ -1155,14 +1155,7 @@ EXPORT_SYMBOL(__brelse);
 void __bforget(struct buffer_head *bh)
 {
 	clear_buffer_dirty(bh);
-	if (bh->b_assoc_map) {
-		struct address_space *buffer_mapping = bh->b_folio->mapping;
-
-		spin_lock(&buffer_mapping->i_private_lock);
-		list_del_init(&bh->b_assoc_buffers);
-		bh->b_assoc_map = NULL;
-		spin_unlock(&buffer_mapping->i_private_lock);
-	}
+	remove_assoc_queue(bh);
 	__brelse(bh);
 }
 EXPORT_SYMBOL(__bforget);
@@ -2810,8 +2803,7 @@ drop_buffers(struct folio *folio, struct buffer_head **buffers_to_free)
 	do {
 		struct buffer_head *next = bh->b_this_page;
 
-		if (bh->b_assoc_map)
-			__remove_assoc_queue(bh);
+		remove_assoc_queue(bh);
 		bh = next;
 	} while (bh != head);
 	*buffers_to_free = head;
diff --git a/fs/inode.c b/fs/inode.c
index d5774e627a9c..393f586d050a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -483,6 +483,8 @@ static void __address_space_init_once(struct address_space *mapping)
 	init_rwsem(&mapping->i_mmap_rwsem);
 	INIT_LIST_HEAD(&mapping->i_private_list);
 	spin_lock_init(&mapping->i_private_lock);
+	spin_lock_init(&mapping->i_metadata_bhs.lock);
+	INIT_LIST_HEAD(&mapping->i_metadata_bhs.list);
 	mapping->i_mmap = RB_ROOT_CACHED;
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d488459396f4..76360b0040e0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -445,6 +445,12 @@ struct address_space_operations {
 
 extern const struct address_space_operations empty_aops;
 
+/* Structure for tracking metadata buffer heads associated with the mapping */
+struct mapping_metadata_bhs {
+	spinlock_t lock;	/* Lock protecting bh list */
+	struct list_head list;	/* The list of bhs (b_assoc_buffers) */
+};
+
 /**
  * struct address_space - Contents of a cacheable, mappable object.
  * @host: Owner, either the inode or the block_device.
@@ -484,6 +490,7 @@ struct address_space {
 	errseq_t		wb_err;
 	spinlock_t		i_private_lock;
 	struct list_head	i_private_list;
+	struct mapping_metadata_bhs i_metadata_bhs;
 	struct rw_semaphore	i_mmap_rwsem;
 } __attribute__((aligned(sizeof(long)))) __randomize_layout;
 	/*
-- 
cgit v1.2.3


From c86f5d25514c2a60fcf5ea0aa11c5d8bd1a313ef Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 26 Mar 2026 10:54:25 +0100
Subject: fs: Make bhs point to mapping_metadata_bhs

Make buffer heads point to mapping_metadata_bhs instead of struct
address_space. This makes the code more self contained. For the (only)
case of IO error handling where we really need to reach struct
address_space add a pointer to the mapping from mapping_metadata_bhs.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260326095354.16340-73-jack@suse.cz
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/buffer.c                 | 34 ++++++++++++++++------------------
 fs/inode.c                  |  1 +
 include/linux/buffer_head.h |  4 ++--
 include/linux/fs.h          |  1 +
 4 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 294f9cd07f42..67b3d4624503 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -497,13 +497,12 @@ static void __remove_assoc_queue(struct mapping_metadata_bhs *mmb,
 {
 	lockdep_assert_held(&mmb->lock);
 	list_del_init(&bh->b_assoc_buffers);
-	WARN_ON(!bh->b_assoc_map);
-	bh->b_assoc_map = NULL;
+	WARN_ON(!bh->b_mmb);
+	bh->b_mmb = NULL;
 }
 
 static void remove_assoc_queue(struct buffer_head *bh)
 {
-	struct address_space *mapping;
 	struct mapping_metadata_bhs *mmb;
 
 	/*
@@ -514,13 +513,12 @@ static void remove_assoc_queue(struct buffer_head *bh)
 	 * opportunistically acquire the lock and then recheck the bh
 	 * didn't move under us.
 	 */
-	while (bh->b_assoc_map) {
+	while (bh->b_mmb) {
 		rcu_read_lock();
-		mapping = READ_ONCE(bh->b_assoc_map);
-		if (mapping) {
-			mmb = &mapping->i_metadata_bhs;
+		mmb = READ_ONCE(bh->b_mmb);
+		if (mmb) {
 			spin_lock(&mmb->lock);
-			if (bh->b_assoc_map == mapping)
+			if (bh->b_mmb == mmb)
 				__remove_assoc_queue(mmb, bh);
 			spin_unlock(&mmb->lock);
 		}
@@ -551,9 +549,9 @@ EXPORT_SYMBOL_GPL(inode_has_buffers);
  * Do this in two main stages: first we copy dirty buffers to a
  * temporary inode list, queueing the writes as we go. Then we clean
  * up, waiting for those writes to complete. mark_buffer_dirty_inode()
- * doesn't touch b_assoc_buffers list if b_assoc_map is not NULL so we
- * are sure the buffer stays on our list until IO completes (at which point
- * it can be reaped).
+ * doesn't touch b_assoc_buffers list if b_mmb is not NULL so we are sure the
+ * buffer stays on our list until IO completes (at which point it can be
+ * reaped).
  */
 int sync_mapping_buffers(struct address_space *mapping)
 {
@@ -571,14 +569,14 @@ int sync_mapping_buffers(struct address_space *mapping)
 	spin_lock(&mmb->lock);
 	while (!list_empty(&mmb->list)) {
 		bh = BH_ENTRY(mmb->list.next);
-		WARN_ON_ONCE(bh->b_assoc_map != mapping);
+		WARN_ON_ONCE(bh->b_mmb != mmb);
 		__remove_assoc_queue(mmb, bh);
 		/* Avoid race with mark_buffer_dirty_inode() which does
 		 * a lockless check and we rely on seeing the dirty bit */
 		smp_mb();
 		if (buffer_dirty(bh) || buffer_locked(bh)) {
 			list_add(&bh->b_assoc_buffers, &tmp);
-			bh->b_assoc_map = mapping;
+			bh->b_mmb = mmb;
 			if (buffer_dirty(bh)) {
 				get_bh(bh);
 				spin_unlock(&mmb->lock);
@@ -616,7 +614,7 @@ int sync_mapping_buffers(struct address_space *mapping)
 		smp_mb();
 		if (buffer_dirty(bh)) {
 			list_add(&bh->b_assoc_buffers, &mmb->list);
-			bh->b_assoc_map = mapping;
+			bh->b_mmb = mmb;
 		}
 		spin_unlock(&mmb->lock);
 		wait_on_buffer(bh);
@@ -724,11 +722,11 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
 	struct address_space *mapping = inode->i_mapping;
 
 	mark_buffer_dirty(bh);
-	if (!bh->b_assoc_map) {
+	if (!bh->b_mmb) {
 		spin_lock(&mapping->i_metadata_bhs.lock);
 		list_move_tail(&bh->b_assoc_buffers,
 				&mapping->i_metadata_bhs.list);
-		bh->b_assoc_map = mapping;
+		bh->b_mmb = &mapping->i_metadata_bhs;
 		spin_unlock(&mapping->i_metadata_bhs.lock);
 	}
 }
@@ -1124,8 +1122,8 @@ void mark_buffer_write_io_error(struct buffer_head *bh)
 	/* FIXME: do we need to set this in both places? */
 	if (bh->b_folio && bh->b_folio->mapping)
 		mapping_set_error(bh->b_folio->mapping, -EIO);
-	if (bh->b_assoc_map)
-		mapping_set_error(bh->b_assoc_map, -EIO);
+	if (bh->b_mmb)
+		mapping_set_error(bh->b_mmb->mapping, -EIO);
 }
 EXPORT_SYMBOL(mark_buffer_write_io_error);
 
diff --git a/fs/inode.c b/fs/inode.c
index 393f586d050a..3874b933abdb 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -276,6 +276,7 @@ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp
 
 	mapping->a_ops = &empty_aops;
 	mapping->host = inode;
+	mapping->i_metadata_bhs.mapping = mapping;
 	mapping->flags = 0;
 	mapping->wb_err = 0;
 	atomic_set(&mapping->i_mmap_writable, 0);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 631bf971efc0..20636599d858 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -73,8 +73,8 @@ struct buffer_head {
 	bh_end_io_t *b_end_io;		/* I/O completion */
  	void *b_private;		/* reserved for b_end_io */
 	struct list_head b_assoc_buffers; /* associated with another mapping */
-	struct address_space *b_assoc_map;	/* mapping this buffer is
-						   associated with */
+	struct mapping_metadata_bhs *b_mmb; /* head of the list of metadata bhs
+					     * this buffer is associated with */
 	atomic_t b_count;		/* users using this buffer_head */
 	spinlock_t b_uptodate_lock;	/* Used by the first bh in a page, to
 					 * serialise IO completion of other
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 76360b0040e0..fa2a812bd718 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -447,6 +447,7 @@ extern const struct address_space_operations empty_aops;
 
 /* Structure for tracking metadata buffer heads associated with the mapping */
 struct mapping_metadata_bhs {
+	struct address_space *mapping;	/* Mapping bhs are associated with */
 	spinlock_t lock;	/* Lock protecting bh list */
 	struct list_head list;	/* The list of bhs (b_assoc_buffers) */
 };
-- 
cgit v1.2.3


From 025c9af1a20c8353f586c9bfd30705dfe4a277de Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 26 Mar 2026 10:54:26 +0100
Subject: fs: Switch inode_has_buffers() to take mapping_metadata_bhs

As part of a move towards placing mapping_metadata_bhs in fs-private
inode part, switch inode_has_buffers() to take mapping_metadata_bhs
and rename the function to mmb_has_buffers().

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260326095354.16340-74-jack@suse.cz
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/buffer.c                 | 14 +++++++-------
 fs/ext4/inode.c             |  2 +-
 include/linux/buffer_head.h |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 67b3d4624503..b0436481d0f1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -468,7 +468,7 @@ EXPORT_SYMBOL(mark_buffer_async_write);
  * written back and waited upon before fsync() returns.
  *
  * The functions mark_buffer_dirty_inode(), fsync_inode_buffers(),
- * inode_has_buffers() and invalidate_inode_buffers() are provided for the
+ * mmb_has_buffers() and invalidate_inode_buffers() are provided for the
  * management of a list of dependent buffers in mapping_metadata_bhs struct.
  *
  * The locking is a little subtle: The list of buffer heads is protected by
@@ -526,11 +526,11 @@ static void remove_assoc_queue(struct buffer_head *bh)
 	}
 }
 
-int inode_has_buffers(struct inode *inode)
+bool mmb_has_buffers(struct mapping_metadata_bhs *mmb)
 {
-	return !list_empty(&inode->i_data.i_metadata_bhs.list);
+	return !list_empty(&mmb->list);
 }
-EXPORT_SYMBOL_GPL(inode_has_buffers);
+EXPORT_SYMBOL_GPL(mmb_has_buffers);
 
 /**
  * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
@@ -561,7 +561,7 @@ int sync_mapping_buffers(struct address_space *mapping)
 	struct blk_plug plug;
 	LIST_HEAD(tmp);
 
-	if (list_empty(&mmb->list))
+	if (!mmb_has_buffers(mmb))
 		return 0;
 
 	blk_start_plug(&plug);
@@ -803,9 +803,9 @@ EXPORT_SYMBOL(block_dirty_folio);
  */
 void invalidate_inode_buffers(struct inode *inode)
 {
-	if (inode_has_buffers(inode)) {
-		struct mapping_metadata_bhs *mmb = &inode->i_data.i_metadata_bhs;
+	struct mapping_metadata_bhs *mmb = &inode->i_data.i_metadata_bhs;
 
+	if (mmb_has_buffers(mmb)) {
 		spin_lock(&mmb->lock);
 		while (!list_empty(&mmb->list))
 			__remove_assoc_queue(mmb, BH_ENTRY(mmb->list.next));
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6f892abef003..011cb2eb16a2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3436,7 +3436,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
 	}
 
 	/* Any metadata buffers to write? */
-	if (inode_has_buffers(inode))
+	if (mmb_has_buffers(&inode->i_mapping->i_metadata_bhs))
 		return true;
 	return inode_state_read_once(inode) & I_DIRTY_DATASYNC;
 }
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 20636599d858..44094fd476f5 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -515,7 +515,7 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio);
 
 void buffer_init(void);
 bool try_to_free_buffers(struct folio *folio);
-int inode_has_buffers(struct inode *inode);
+bool mmb_has_buffers(struct mapping_metadata_bhs *mmb);
 void invalidate_inode_buffers(struct inode *inode);
 int sync_mapping_buffers(struct address_space *mapping);
 void invalidate_bh_lrus(void);
-- 
cgit v1.2.3


From a8c8122a3dac55d25a1912b8fec9b8cd7366c37a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 26 Mar 2026 10:54:27 +0100
Subject: fs: Provide functions for handling mapping_metadata_bhs directly

As part of transition toward moving mapping_metadata_bhs to fs-private
part of the inode, provide functions for operations on this list
directly instead of going through the inode / mapping.

Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260326095354.16340-75-jack@suse.cz
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/buffer.c                 | 110 ++++++++++++++++++++------------------------
 include/linux/buffer_head.h |  44 ++++++++++++++----
 2 files changed, 87 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index b0436481d0f1..cbed175f418b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -467,31 +467,25 @@ EXPORT_SYMBOL(mark_buffer_async_write);
  * a successful fsync().  For example, ext2 indirect blocks need to be
  * written back and waited upon before fsync() returns.
  *
- * The functions mark_buffer_dirty_inode(), fsync_inode_buffers(),
- * mmb_has_buffers() and invalidate_inode_buffers() are provided for the
- * management of a list of dependent buffers in mapping_metadata_bhs struct.
+ * The functions mmb_mark_buffer_dirty(), mmb_sync(), mmb_has_buffers()
+ * and mmb_invalidate() are provided for the management of a list of dependent
+ * buffers in mapping_metadata_bhs struct.
  *
  * The locking is a little subtle: The list of buffer heads is protected by
  * the lock in mapping_metadata_bhs so functions coming from bdev mapping
  * (such as try_to_free_buffers()) need to safely get to mapping_metadata_bhs
  * using RCU, grab the lock, verify we didn't race with somebody detaching the
  * bh / moving it to different inode and only then proceeding.
- *
- * FIXME: mark_buffer_dirty_inode() is a data-plane operation.  It should
- * take an address_space, not an inode.  And it should be called
- * mark_buffer_dirty_fsync() to clearly define why those buffers are being
- * queued up.
- *
- * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
- * list if it is already on a list.  Because if the buffer is on a list,
- * it *must* already be on the right one.  If not, the filesystem is being
- * silly.  This will save a ton of locking.  But first we have to ensure
- * that buffers are taken *off* the old inode's list when they are freed
- * (presumably in truncate).  That requires careful auditing of all
- * filesystems (do it inside bforget()).  It could also be done by bringing
- * b_inode back.
  */
 
+void mmb_init(struct mapping_metadata_bhs *mmb, struct address_space *mapping)
+{
+	spin_lock_init(&mmb->lock);
+	INIT_LIST_HEAD(&mmb->list);
+	mmb->mapping = mapping;
+}
+EXPORT_SYMBOL(mmb_init);
+
 static void __remove_assoc_queue(struct mapping_metadata_bhs *mmb,
 			         struct buffer_head *bh)
 {
@@ -533,12 +527,12 @@ bool mmb_has_buffers(struct mapping_metadata_bhs *mmb)
 EXPORT_SYMBOL_GPL(mmb_has_buffers);
 
 /**
- * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
- * @mapping: the mapping which wants those buffers written
+ * mmb_sync - write out & wait upon all buffers in a list
+ * @mmb: the list of buffers to write
  *
- * Starts I/O against the buffers at mapping->i_metadata_bhs and waits upon
- * that I/O. Basically, this is a convenience function for fsync().  @mapping
- * is a file or directory which needs those buffers to be written for a
+ * Starts I/O against the buffers in the given list and waits upon
+ * that I/O. Basically, this is a convenience function for fsync().  @mmb is
+ * for a file or directory which needs those buffers to be written for a
  * successful fsync().
  *
  * We have conflicting pressures: we want to make sure that all
@@ -553,9 +547,8 @@ EXPORT_SYMBOL_GPL(mmb_has_buffers);
  * buffer stays on our list until IO completes (at which point it can be
  * reaped).
  */
-int sync_mapping_buffers(struct address_space *mapping)
+int mmb_sync(struct mapping_metadata_bhs *mmb)
 {
-	struct mapping_metadata_bhs *mmb = &mapping->i_metadata_bhs;
 	struct buffer_head *bh;
 	int err = 0;
 	struct blk_plug plug;
@@ -626,33 +619,35 @@ int sync_mapping_buffers(struct address_space *mapping)
 	spin_unlock(&mmb->lock);
 	return err;
 }
-EXPORT_SYMBOL(sync_mapping_buffers);
+EXPORT_SYMBOL(mmb_sync);
 
 /**
- * generic_buffers_fsync_noflush - generic buffer fsync implementation
- * for simple filesystems with no inode lock
+ * mmb_fsync_noflush - fsync implementation for simple filesystems with
+ * 		       metadata buffers list
  *
  * @file:	file to synchronize
+ * @mmb:	list of metadata bhs to flush
  * @start:	start offset in bytes
  * @end:	end offset in bytes (inclusive)
  * @datasync:	only synchronize essential metadata if true
  *
- * This is a generic implementation of the fsync method for simple
- * filesystems which track all non-inode metadata in the buffers list
- * hanging off the address_space structure.
+ * This is an implementation of the fsync method for simple filesystems which
+ * track all non-inode metadata in the buffers list hanging off the @mmb
+ * structure.
  */
-int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end,
-				  bool datasync)
+int mmb_fsync_noflush(struct file *file, struct mapping_metadata_bhs *mmb,
+		      loff_t start, loff_t end, bool datasync)
 {
 	struct inode *inode = file->f_mapping->host;
 	int err;
-	int ret;
+	int ret = 0;
 
 	err = file_write_and_wait_range(file, start, end);
 	if (err)
 		return err;
 
-	ret = sync_mapping_buffers(inode->i_mapping);
+	if (mmb)
+		ret = mmb_sync(mmb);
 	if (!(inode_state_read_once(inode) & I_DIRTY_ALL))
 		goto out;
 	if (datasync && !(inode_state_read_once(inode) & I_DIRTY_DATASYNC))
@@ -669,34 +664,35 @@ out:
 		ret = err;
 	return ret;
 }
-EXPORT_SYMBOL(generic_buffers_fsync_noflush);
+EXPORT_SYMBOL(mmb_fsync_noflush);
 
 /**
- * generic_buffers_fsync - generic buffer fsync implementation
- * for simple filesystems with no inode lock
+ * mmb_fsync - fsync implementation for simple filesystems with metadata
+ * 	       buffers list
  *
  * @file:	file to synchronize
+ * @mmb:	list of metadata bhs to flush
  * @start:	start offset in bytes
  * @end:	end offset in bytes (inclusive)
  * @datasync:	only synchronize essential metadata if true
  *
- * This is a generic implementation of the fsync method for simple
- * filesystems which track all non-inode metadata in the buffers list
- * hanging off the address_space structure. This also makes sure that
- * a device cache flush operation is called at the end.
+ * This is an implementation of the fsync method for simple filesystems which
+ * track all non-inode metadata in the buffers list hanging off the @mmb
+ * structure. This also makes sure that a device cache flush operation is
+ * called at the end.
  */
-int generic_buffers_fsync(struct file *file, loff_t start, loff_t end,
-			  bool datasync)
+int mmb_fsync(struct file *file, struct mapping_metadata_bhs *mmb,
+	      loff_t start, loff_t end, bool datasync)
 {
 	struct inode *inode = file->f_mapping->host;
 	int ret;
 
-	ret = generic_buffers_fsync_noflush(file, start, end, datasync);
+	ret = mmb_fsync_noflush(file, mmb, start, end, datasync);
 	if (!ret)
 		ret = blkdev_issue_flush(inode->i_sb->s_bdev);
 	return ret;
 }
-EXPORT_SYMBOL(generic_buffers_fsync);
+EXPORT_SYMBOL(mmb_fsync);
 
 /*
  * Called when we've recently written block `bblock', and it is known that
@@ -717,20 +713,18 @@ void write_boundary_block(struct block_device *bdev,
 	}
 }
 
-void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
+void mmb_mark_buffer_dirty(struct buffer_head *bh,
+			   struct mapping_metadata_bhs *mmb)
 {
-	struct address_space *mapping = inode->i_mapping;
-
 	mark_buffer_dirty(bh);
 	if (!bh->b_mmb) {
-		spin_lock(&mapping->i_metadata_bhs.lock);
-		list_move_tail(&bh->b_assoc_buffers,
-				&mapping->i_metadata_bhs.list);
-		bh->b_mmb = &mapping->i_metadata_bhs;
-		spin_unlock(&mapping->i_metadata_bhs.lock);
+		spin_lock(&mmb->lock);
+		list_move_tail(&bh->b_assoc_buffers, &mmb->list);
+		bh->b_mmb = mmb;
+		spin_unlock(&mmb->lock);
 	}
 }
-EXPORT_SYMBOL(mark_buffer_dirty_inode);
+EXPORT_SYMBOL(mmb_mark_buffer_dirty);
 
 /**
  * block_dirty_folio - Mark a folio as dirty.
@@ -797,14 +791,12 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio)
 EXPORT_SYMBOL(block_dirty_folio);
 
 /*
- * Invalidate any and all dirty buffers on a given inode.  We are
+ * Invalidate any and all dirty buffers on a given buffers list.  We are
  * probably unmounting the fs, but that doesn't mean we have already
  * done a sync().  Just drop the buffers from the inode list.
  */
-void invalidate_inode_buffers(struct inode *inode)
+void mmb_invalidate(struct mapping_metadata_bhs *mmb)
 {
-	struct mapping_metadata_bhs *mmb = &inode->i_data.i_metadata_bhs;
-
 	if (mmb_has_buffers(mmb)) {
 		spin_lock(&mmb->lock);
 		while (!list_empty(&mmb->list))
@@ -812,7 +804,7 @@ void invalidate_inode_buffers(struct inode *inode)
 		spin_unlock(&mmb->lock);
 	}
 }
-EXPORT_SYMBOL(invalidate_inode_buffers);
+EXPORT_SYMBOL(mmb_invalidate);
 
 /*
  * Create the appropriate buffers when given a folio for data area and
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 44094fd476f5..e207dcca7a25 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -205,12 +205,30 @@ struct buffer_head *create_empty_buffers(struct folio *folio,
 void end_buffer_read_sync(struct buffer_head *bh, int uptodate);
 void end_buffer_write_sync(struct buffer_head *bh, int uptodate);
 
-/* Things to do with buffers at mapping->private_list */
-void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode);
-int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end,
-				  bool datasync);
-int generic_buffers_fsync(struct file *file, loff_t start, loff_t end,
-			  bool datasync);
+/* Things to do with metadata buffers list */
+void mmb_mark_buffer_dirty(struct buffer_head *bh, struct mapping_metadata_bhs *mmb);
+static inline void mark_buffer_dirty_inode(struct buffer_head *bh,
+					   struct inode *inode)
+{
+	mmb_mark_buffer_dirty(bh, &inode->i_data.i_metadata_bhs);
+}
+int mmb_fsync_noflush(struct file *file, struct mapping_metadata_bhs *mmb,
+		      loff_t start, loff_t end, bool datasync);
+static inline int generic_buffers_fsync_noflush(struct file *file,
+						loff_t start, loff_t end,
+						bool datasync)
+{
+	return mmb_fsync_noflush(file, &file->f_mapping->i_metadata_bhs,
+				 start, end, datasync);
+}
+int mmb_fsync(struct file *file, struct mapping_metadata_bhs *mmb,
+	      loff_t start, loff_t end, bool datasync);
+static inline int generic_buffers_fsync(struct file *file,
+					loff_t start, loff_t end, bool datasync)
+{
+	return mmb_fsync(file, &file->f_mapping->i_metadata_bhs,
+			 start, end, datasync);
+}
 void clean_bdev_aliases(struct block_device *bdev, sector_t block,
 			sector_t len);
 static inline void clean_bdev_bh_alias(struct buffer_head *bh)
@@ -515,9 +533,18 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio);
 
 void buffer_init(void);
 bool try_to_free_buffers(struct folio *folio);
+void mmb_init(struct mapping_metadata_bhs *mmb, struct address_space *mapping);
 bool mmb_has_buffers(struct mapping_metadata_bhs *mmb);
-void invalidate_inode_buffers(struct inode *inode);
-int sync_mapping_buffers(struct address_space *mapping);
+void mmb_invalidate(struct mapping_metadata_bhs *mmb);
+int mmb_sync(struct mapping_metadata_bhs *mmb);
+static inline void invalidate_inode_buffers(struct inode *inode)
+{
+	mmb_invalidate(&inode->i_data.i_metadata_bhs);
+}
+static inline int sync_mapping_buffers(struct address_space *mapping)
+{
+	return mmb_sync(&mapping->i_metadata_bhs);
+}
 void invalidate_bh_lrus(void);
 void invalidate_bh_lrus_cpu(void);
 bool has_bh_in_lru(int cpu, void *dummy);
@@ -527,6 +554,7 @@ extern int buffer_heads_over_limit;
 
 static inline void buffer_init(void) {}
 static inline bool try_to_free_buffers(struct folio *folio) { return true; }
+static inline int mmb_sync(struct mapping_metadata_bhs *mmb) { return 0; }
 static inline void invalidate_inode_buffers(struct inode *inode) {}
 static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
 static inline void invalidate_bh_lrus(void) {}
-- 
cgit v1.2.3


From cb6d109b9ccc374d09812c2387ab826499ee6562 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 26 Mar 2026 10:54:35 +0100
Subject: fs: Drop mapping_metadata_bhs from address space

Nobody uses mapping_metadata_bhs in struct address_space anymore. Just
remove it and with it all helper functions using it.

Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260326095354.16340-83-jack@suse.cz
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/inode.c                  |  3 ---
 include/linux/buffer_head.h | 28 ----------------------------
 include/linux/fs.h          |  1 -
 3 files changed, 32 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 3874b933abdb..d5774e627a9c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -276,7 +276,6 @@ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp
 
 	mapping->a_ops = &empty_aops;
 	mapping->host = inode;
-	mapping->i_metadata_bhs.mapping = mapping;
 	mapping->flags = 0;
 	mapping->wb_err = 0;
 	atomic_set(&mapping->i_mmap_writable, 0);
@@ -484,8 +483,6 @@ static void __address_space_init_once(struct address_space *mapping)
 	init_rwsem(&mapping->i_mmap_rwsem);
 	INIT_LIST_HEAD(&mapping->i_private_list);
 	spin_lock_init(&mapping->i_private_lock);
-	spin_lock_init(&mapping->i_metadata_bhs.lock);
-	INIT_LIST_HEAD(&mapping->i_metadata_bhs.list);
 	mapping->i_mmap = RB_ROOT_CACHED;
 }
 
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index e207dcca7a25..e4939e33b4b5 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -207,28 +207,10 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate);
 
 /* Things to do with metadata buffers list */
 void mmb_mark_buffer_dirty(struct buffer_head *bh, struct mapping_metadata_bhs *mmb);
-static inline void mark_buffer_dirty_inode(struct buffer_head *bh,
-					   struct inode *inode)
-{
-	mmb_mark_buffer_dirty(bh, &inode->i_data.i_metadata_bhs);
-}
 int mmb_fsync_noflush(struct file *file, struct mapping_metadata_bhs *mmb,
 		      loff_t start, loff_t end, bool datasync);
-static inline int generic_buffers_fsync_noflush(struct file *file,
-						loff_t start, loff_t end,
-						bool datasync)
-{
-	return mmb_fsync_noflush(file, &file->f_mapping->i_metadata_bhs,
-				 start, end, datasync);
-}
 int mmb_fsync(struct file *file, struct mapping_metadata_bhs *mmb,
 	      loff_t start, loff_t end, bool datasync);
-static inline int generic_buffers_fsync(struct file *file,
-					loff_t start, loff_t end, bool datasync)
-{
-	return mmb_fsync(file, &file->f_mapping->i_metadata_bhs,
-			 start, end, datasync);
-}
 void clean_bdev_aliases(struct block_device *bdev, sector_t block,
 			sector_t len);
 static inline void clean_bdev_bh_alias(struct buffer_head *bh)
@@ -537,14 +519,6 @@ void mmb_init(struct mapping_metadata_bhs *mmb, struct address_space *mapping);
 bool mmb_has_buffers(struct mapping_metadata_bhs *mmb);
 void mmb_invalidate(struct mapping_metadata_bhs *mmb);
 int mmb_sync(struct mapping_metadata_bhs *mmb);
-static inline void invalidate_inode_buffers(struct inode *inode)
-{
-	mmb_invalidate(&inode->i_data.i_metadata_bhs);
-}
-static inline int sync_mapping_buffers(struct address_space *mapping)
-{
-	return mmb_sync(&mapping->i_metadata_bhs);
-}
 void invalidate_bh_lrus(void);
 void invalidate_bh_lrus_cpu(void);
 bool has_bh_in_lru(int cpu, void *dummy);
@@ -555,8 +529,6 @@ extern int buffer_heads_over_limit;
 static inline void buffer_init(void) {}
 static inline bool try_to_free_buffers(struct folio *folio) { return true; }
 static inline int mmb_sync(struct mapping_metadata_bhs *mmb) { return 0; }
-static inline void invalidate_inode_buffers(struct inode *inode) {}
-static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
 static inline void invalidate_bh_lrus(void) {}
 static inline void invalidate_bh_lrus_cpu(void) {}
 static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fa2a812bd718..ccfa696253c8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -491,7 +491,6 @@ struct address_space {
 	errseq_t		wb_err;
 	spinlock_t		i_private_lock;
 	struct list_head	i_private_list;
-	struct mapping_metadata_bhs i_metadata_bhs;
 	struct rw_semaphore	i_mmap_rwsem;
 } __attribute__((aligned(sizeof(long)))) __randomize_layout;
 	/*
-- 
cgit v1.2.3


From f219798ce294e346031022a85670f68eb2dec10e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 26 Mar 2026 10:54:36 +0100
Subject: fs: Drop i_private_list from address_space

Nobody is using i_private_list anymore. Remove it.

Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260326095354.16340-84-jack@suse.cz
Tested-by: syzbot@syzkaller.appspotmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/inode.c         | 2 --
 include/linux/fs.h | 2 --
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index d5774e627a9c..a8f019078fab 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -481,7 +481,6 @@ static void __address_space_init_once(struct address_space *mapping)
 {
 	xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
 	init_rwsem(&mapping->i_mmap_rwsem);
-	INIT_LIST_HEAD(&mapping->i_private_list);
 	spin_lock_init(&mapping->i_private_lock);
 	mapping->i_mmap = RB_ROOT_CACHED;
 }
@@ -795,7 +794,6 @@ void clear_inode(struct inode *inode)
 	 * nor even WARN_ON(!mapping_empty).
 	 */
 	xa_unlock_irq(&inode->i_data.i_pages);
-	BUG_ON(!list_empty(&inode->i_data.i_private_list));
 	BUG_ON(!(inode_state_read_once(inode) & I_FREEING));
 	BUG_ON(inode_state_read_once(inode) & I_CLEAR);
 	BUG_ON(!list_empty(&inode->i_wb_list));
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ccfa696253c8..a3bed26d066d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -471,7 +471,6 @@ struct mapping_metadata_bhs {
  * @flags: Error bits and flags (AS_*).
  * @wb_err: The most recent error which has occurred.
  * @i_private_lock: For use by the owner of the address_space.
- * @i_private_list: For use by the owner of the address_space.
  */
 struct address_space {
 	struct inode		*host;
@@ -490,7 +489,6 @@ struct address_space {
 	unsigned long		flags;
 	errseq_t		wb_err;
 	spinlock_t		i_private_lock;
-	struct list_head	i_private_list;
 	struct rw_semaphore	i_mmap_rwsem;
 } __attribute__((aligned(sizeof(long)))) __randomize_layout;
 	/*
-- 
cgit v1.2.3


From 935a04923ad293cd89bf6ec23fc4efc9cf1a0142 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Thu, 12 Mar 2026 11:04:36 +0100
Subject: nsproxy: Add FOR_EACH_NS_TYPE() X-macro and CLONE_NS_ALL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce the FOR_EACH_NS_TYPE(X) macro as the single source of truth
for the set of (struct type, CLONE_NEW* flag) pairs that define Linux
namespace types.

Currently, the list of CLONE_NEW* flags is duplicated inline in
multiple call sites and would need another copy in each new consumer.
This makes it easy to miss one when a new namespace type is added.

Derive two things from the X-macro:

- CLONE_NS_ALL: Bitmask of all known CLONE_NEW* flags, usable as a
  validity mask or iteration bound.

- ns_common_type(): Rewritten to use the X-macro via a leading-comma
  _Generic pattern, so the struct-to-flag mapping stays in sync with the
  flag set automatically.

Replace the inline flag enumerations in copy_namespaces(),
unshare_nsproxy_namespaces(), check_setns_flags(), and
ksys_unshare() with CLONE_NS_ALL.

When a new namespace type is added, only FOR_EACH_NS_TYPE needs to
be updated; CLONE_NS_ALL, ns_common_type(), and all the call sites
pick up the change automatically.

Cc: Christian Brauner <brauner@kernel.org>
Cc: Günther Noack <gnoack@google.com>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
Link: https://patch.msgid.link/20260312100444.2609563-4-mic@digikod.net
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/ns/ns_common_types.h | 44 +++++++++++++++++++++++++++++---------
 kernel/fork.c                      |  7 +++---
 kernel/nsproxy.c                   | 13 ++++-------
 3 files changed, 41 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ns/ns_common_types.h b/include/linux/ns/ns_common_types.h
index 0014fbc1c626..ea45c54e4435 100644
--- a/include/linux/ns/ns_common_types.h
+++ b/include/linux/ns/ns_common_types.h
@@ -7,6 +7,7 @@
 #include <linux/rbtree.h>
 #include <linux/refcount.h>
 #include <linux/types.h>
+#include <uapi/linux/sched.h>
 
 struct cgroup_namespace;
 struct dentry;
@@ -184,15 +185,38 @@ struct ns_common {
 		struct user_namespace *:   (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations   : NULL), \
 		struct uts_namespace *:    (IS_ENABLED(CONFIG_UTS_NS)  ? &utsns_operations    : NULL))
 
-#define ns_common_type(__ns)                                \
-	_Generic((__ns),                                    \
-		struct cgroup_namespace *: CLONE_NEWCGROUP, \
-		struct ipc_namespace *:    CLONE_NEWIPC,    \
-		struct mnt_namespace *:    CLONE_NEWNS,     \
-		struct net *:              CLONE_NEWNET,    \
-		struct pid_namespace *:    CLONE_NEWPID,    \
-		struct time_namespace *:   CLONE_NEWTIME,   \
-		struct user_namespace *:   CLONE_NEWUSER,   \
-		struct uts_namespace *:    CLONE_NEWUTS)
+/*
+ * FOR_EACH_NS_TYPE - Canonical list of namespace types
+ *
+ * Enumerates all (struct type, CLONE_NEW* flag) pairs.  This is the
+ * single source of truth used to derive ns_common_type() and
+ * CLONE_NS_ALL.  When adding a new namespace type, add a single entry
+ * here; all consumers update automatically.
+ *
+ * @X: Callback macro taking (struct_name, clone_flag) as arguments.
+ */
+#define FOR_EACH_NS_TYPE(X)                  \
+	X(cgroup_namespace, CLONE_NEWCGROUP) \
+	X(ipc_namespace, CLONE_NEWIPC)       \
+	X(mnt_namespace, CLONE_NEWNS)        \
+	X(net, CLONE_NEWNET)                 \
+	X(pid_namespace, CLONE_NEWPID)       \
+	X(time_namespace, CLONE_NEWTIME)     \
+	X(user_namespace, CLONE_NEWUSER)     \
+	X(uts_namespace, CLONE_NEWUTS)
+
+/* Bitmask of all known CLONE_NEW* flags. */
+#define _NS_TYPE_FLAG_OR(struct_name, flag) | (flag)
+#define CLONE_NS_ALL                        (0 FOR_EACH_NS_TYPE(_NS_TYPE_FLAG_OR))
+
+/*
+ * ns_common_type - Map a namespace struct pointer to its CLONE_NEW* flag
+ *
+ * Uses a leading-comma pattern so the FOR_EACH_NS_TYPE expansion
+ * produces ", struct foo *: FLAG" entries without a trailing comma.
+ */
+#define _NS_TYPE_ASSOC(struct_name, flag) , struct struct_name *: (flag)
+
+#define ns_common_type(__ns) _Generic((__ns)FOR_EACH_NS_TYPE(_NS_TYPE_ASSOC))
 
 #endif /* _LINUX_NS_COMMON_TYPES_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 65113a304518..767559acd060 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -46,6 +46,7 @@
 #include <linux/mm_inline.h>
 #include <linux/memblock.h>
 #include <linux/nsproxy.h>
+#include <linux/ns/ns_common_types.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
 #include <linux/cgroup.h>
@@ -3046,11 +3047,9 @@ void __init proc_caches_init(void)
  */
 static int check_unshare_flags(unsigned long unshare_flags)
 {
-	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
+	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_SIGHAND|
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
-				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
-				CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP|
-				CLONE_NEWTIME))
+				CLONE_NS_ALL))
 		return -EINVAL;
 	/*
 	 * Not implemented, but pretend it works if there is nothing
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 259c4b4f1eeb..63b44ee79847 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <linux/nsproxy.h>
+#include <linux/ns/ns_common_types.h>
 #include <linux/init_task.h>
 #include <linux/mnt_namespace.h>
 #include <linux/utsname.h>
@@ -170,9 +171,7 @@ int copy_namespaces(u64 flags, struct task_struct *tsk)
 	struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
 	struct nsproxy *new_ns;
 
-	if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			      CLONE_NEWPID | CLONE_NEWNET |
-			      CLONE_NEWCGROUP | CLONE_NEWTIME)))) {
+	if (likely(!(flags & (CLONE_NS_ALL & ~CLONE_NEWUSER)))) {
 		if ((flags & CLONE_VM) ||
 		    likely(old_ns->time_ns_for_children == old_ns->time_ns)) {
 			get_nsproxy(old_ns);
@@ -214,9 +213,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 	struct user_namespace *user_ns;
 	int err = 0;
 
-	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			       CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
-			       CLONE_NEWTIME)))
+	if (!(unshare_flags & (CLONE_NS_ALL & ~CLONE_NEWUSER)))
 		return 0;
 
 	user_ns = new_cred ? new_cred->user_ns : current_user_ns();
@@ -292,9 +289,7 @@ int exec_task_namespaces(void)
 
 static int check_setns_flags(unsigned long flags)
 {
-	if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-				 CLONE_NEWNET | CLONE_NEWTIME | CLONE_NEWUSER |
-				 CLONE_NEWPID | CLONE_NEWCGROUP)))
+	if (!flags || (flags & ~CLONE_NS_ALL))
 		return -EINVAL;
 
 #ifndef CONFIG_USER_NS
-- 
cgit v1.2.3


From bade44fe546212e142befb69ba22f34944030a99 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 24 Mar 2026 14:01:45 -0400
Subject: tracing: Move snapshot code out of trace.c and into trace_snapshot.c

The trace.c file was a dumping ground for most tracing code. Start
organizing it better by moving various functions out into their own files.
Move all the snapshot code, including the max trace code into its own
trace_snapshot.c file.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://patch.msgid.link/20260324140145.36352d6a@gandalf.local.home
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h        |    2 +-
 kernel/trace/Makefile         |    1 +
 kernel/trace/trace.c          | 1274 +++--------------------------------------
 kernel/trace/trace.h          |  105 +++-
 kernel/trace/trace_snapshot.c | 1067 ++++++++++++++++++++++++++++++++++
 5 files changed, 1230 insertions(+), 1219 deletions(-)
 create mode 100644 kernel/trace/trace_snapshot.c

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index c242fe49af4c..28b30c6f1031 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -31,7 +31,7 @@
 #define ARCH_SUPPORTS_FTRACE_OPS 0
 #endif
 
-#ifdef CONFIG_TRACING
+#ifdef CONFIG_TRACER_SNAPSHOT
 extern void ftrace_boot_snapshot(void);
 #else
 static inline void ftrace_boot_snapshot(void) { }
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 04096c21d06b..83aeb5c77008 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_TRACING) += trace_seq.o
 obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_TRACING) += trace_printk.o
 obj-$(CONFIG_TRACING) += trace_pid.o
+obj-$(CONFIG_TRACER_SNAPSHOT) += trace_snapshot.o
 obj-$(CONFIG_TRACING) += 	pid_list.o
 obj-$(CONFIG_TRACING_MAP) += tracing_map.o
 obj-$(CONFIG_PREEMPTIRQ_DELAY_TEST) += preemptirq_delay_test.o
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index cf48fe23e71f..850f62032fd2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -47,7 +47,6 @@
 #include <linux/trace.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/rt.h>
-#include <linux/fsnotify.h>
 #include <linux/irq_work.h>
 #include <linux/workqueue.h>
 #include <linux/sort.h>
@@ -219,15 +218,9 @@ static void ftrace_trace_userstack(struct trace_array *tr,
 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
 static char *default_bootup_tracer;
 
-static bool allocate_snapshot;
-static bool snapshot_at_boot;
-
 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
 static int boot_instance_index;
 
-static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
-static int boot_snapshot_index;
-
 static int __init set_cmdline_ftrace(char *str)
 {
 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
@@ -276,38 +269,6 @@ static int __init stop_trace_on_warning(char *str)
 }
 __setup("traceoff_on_warning", stop_trace_on_warning);
 
-static int __init boot_alloc_snapshot(char *str)
-{
-	char *slot = boot_snapshot_info + boot_snapshot_index;
-	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
-	int ret;
-
-	if (str[0] == '=') {
-		str++;
-		if (strlen(str) >= left)
-			return -1;
-
-		ret = snprintf(slot, left, "%s\t", str);
-		boot_snapshot_index += ret;
-	} else {
-		allocate_snapshot = true;
-		/* We also need the main ring buffer expanded */
-		trace_set_ring_buffer_expanded(NULL);
-	}
-	return 1;
-}
-__setup("alloc_snapshot", boot_alloc_snapshot);
-
-
-static int __init boot_snapshot(char *str)
-{
-	snapshot_at_boot = true;
-	boot_alloc_snapshot(str);
-	return 1;
-}
-__setup("ftrace_boot_snapshot", boot_snapshot);
-
-
 static int __init boot_instance(char *str)
 {
 	char *slot = boot_instance_info + boot_instance_index;
@@ -807,47 +768,6 @@ void tracing_on(void)
 EXPORT_SYMBOL_GPL(tracing_on);
 
 #ifdef CONFIG_TRACER_SNAPSHOT
-static void tracing_snapshot_instance_cond(struct trace_array *tr,
-					   void *cond_data)
-{
-	unsigned long flags;
-
-	if (in_nmi()) {
-		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
-		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
-		return;
-	}
-
-	if (!tr->allocated_snapshot) {
-		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
-		trace_array_puts(tr, "*** stopping trace here!   ***\n");
-		tracer_tracing_off(tr);
-		return;
-	}
-
-	if (tr->mapped) {
-		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
-		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
-		return;
-	}
-
-	/* Note, snapshot can not be used when the tracer uses it */
-	if (tracer_uses_snapshot(tr->current_trace)) {
-		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
-		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
-		return;
-	}
-
-	local_irq_save(flags);
-	update_max_tr(tr, current, smp_processor_id(), cond_data);
-	local_irq_restore(flags);
-}
-
-void tracing_snapshot_instance(struct trace_array *tr)
-{
-	tracing_snapshot_instance_cond(tr, NULL);
-}
-
 /**
  * tracing_snapshot - take a snapshot of the current buffer.
  *
@@ -870,138 +790,6 @@ void tracing_snapshot(void)
 }
 EXPORT_SYMBOL_GPL(tracing_snapshot);
 
-/**
- * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
- * @tr:		The tracing instance to snapshot
- * @cond_data:	The data to be tested conditionally, and possibly saved
- *
- * This is the same as tracing_snapshot() except that the snapshot is
- * conditional - the snapshot will only happen if the
- * cond_snapshot.update() implementation receiving the cond_data
- * returns true, which means that the trace array's cond_snapshot
- * update() operation used the cond_data to determine whether the
- * snapshot should be taken, and if it was, presumably saved it along
- * with the snapshot.
- */
-void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
-{
-	tracing_snapshot_instance_cond(tr, cond_data);
-}
-EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
-
-/**
- * tracing_cond_snapshot_data - get the user data associated with a snapshot
- * @tr:		The tracing instance
- *
- * When the user enables a conditional snapshot using
- * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
- * with the snapshot.  This accessor is used to retrieve it.
- *
- * Should not be called from cond_snapshot.update(), since it takes
- * the tr->max_lock lock, which the code calling
- * cond_snapshot.update() has already done.
- *
- * Returns the cond_data associated with the trace array's snapshot.
- */
-void *tracing_cond_snapshot_data(struct trace_array *tr)
-{
-	void *cond_data = NULL;
-
-	local_irq_disable();
-	arch_spin_lock(&tr->max_lock);
-
-	if (tr->cond_snapshot)
-		cond_data = tr->cond_snapshot->cond_data;
-
-	arch_spin_unlock(&tr->max_lock);
-	local_irq_enable();
-
-	return cond_data;
-}
-EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
-
-static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
-					struct array_buffer *size_buf, int cpu_id);
-static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
-
-int tracing_alloc_snapshot_instance(struct trace_array *tr)
-{
-	int order;
-	int ret;
-
-	if (!tr->allocated_snapshot) {
-
-		/* Make the snapshot buffer have the same order as main buffer */
-		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
-		ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
-		if (ret < 0)
-			return ret;
-
-		/* allocate spare buffer */
-		ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
-				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
-		if (ret < 0)
-			return ret;
-
-		tr->allocated_snapshot = true;
-	}
-
-	return 0;
-}
-
-static void free_snapshot(struct trace_array *tr)
-{
-	/*
-	 * We don't free the ring buffer. instead, resize it because
-	 * The max_tr ring buffer has some state (e.g. ring->clock) and
-	 * we want preserve it.
-	 */
-	ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, 0);
-	ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
-	set_buffer_entries(&tr->snapshot_buffer, 1);
-	tracing_reset_online_cpus(&tr->snapshot_buffer);
-	tr->allocated_snapshot = false;
-}
-
-static int tracing_arm_snapshot_locked(struct trace_array *tr)
-{
-	int ret;
-
-	lockdep_assert_held(&trace_types_lock);
-
-	spin_lock(&tr->snapshot_trigger_lock);
-	if (tr->snapshot == UINT_MAX || tr->mapped) {
-		spin_unlock(&tr->snapshot_trigger_lock);
-		return -EBUSY;
-	}
-
-	tr->snapshot++;
-	spin_unlock(&tr->snapshot_trigger_lock);
-
-	ret = tracing_alloc_snapshot_instance(tr);
-	if (ret) {
-		spin_lock(&tr->snapshot_trigger_lock);
-		tr->snapshot--;
-		spin_unlock(&tr->snapshot_trigger_lock);
-	}
-
-	return ret;
-}
-
-int tracing_arm_snapshot(struct trace_array *tr)
-{
-	guard(mutex)(&trace_types_lock);
-	return tracing_arm_snapshot_locked(tr);
-}
-
-void tracing_disarm_snapshot(struct trace_array *tr)
-{
-	spin_lock(&tr->snapshot_trigger_lock);
-	if (!WARN_ON(!tr->snapshot))
-		tr->snapshot--;
-	spin_unlock(&tr->snapshot_trigger_lock);
-}
-
 /**
  * tracing_alloc_snapshot - allocate snapshot buffer.
  *
@@ -1023,129 +811,12 @@ int tracing_alloc_snapshot(void)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
-
-/**
- * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
- *
- * This is similar to tracing_snapshot(), but it will allocate the
- * snapshot buffer if it isn't already allocated. Use this only
- * where it is safe to sleep, as the allocation may sleep.
- *
- * This causes a swap between the snapshot buffer and the current live
- * tracing buffer. You can use this to take snapshots of the live
- * trace when some condition is triggered, but continue to trace.
- */
-void tracing_snapshot_alloc(void)
-{
-	int ret;
-
-	ret = tracing_alloc_snapshot();
-	if (ret < 0)
-		return;
-
-	tracing_snapshot();
-}
-EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
-
-/**
- * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
- * @tr:		The tracing instance
- * @cond_data:	User data to associate with the snapshot
- * @update:	Implementation of the cond_snapshot update function
- *
- * Check whether the conditional snapshot for the given instance has
- * already been enabled, or if the current tracer is already using a
- * snapshot; if so, return -EBUSY, else create a cond_snapshot and
- * save the cond_data and update function inside.
- *
- * Returns 0 if successful, error otherwise.
- */
-int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
-				 cond_update_fn_t update)
-{
-	struct cond_snapshot *cond_snapshot __free(kfree) =
-		kzalloc_obj(*cond_snapshot);
-	int ret;
-
-	if (!cond_snapshot)
-		return -ENOMEM;
-
-	cond_snapshot->cond_data = cond_data;
-	cond_snapshot->update = update;
-
-	guard(mutex)(&trace_types_lock);
-
-	if (tracer_uses_snapshot(tr->current_trace))
-		return -EBUSY;
-
-	/*
-	 * The cond_snapshot can only change to NULL without the
-	 * trace_types_lock. We don't care if we race with it going
-	 * to NULL, but we want to make sure that it's not set to
-	 * something other than NULL when we get here, which we can
-	 * do safely with only holding the trace_types_lock and not
-	 * having to take the max_lock.
-	 */
-	if (tr->cond_snapshot)
-		return -EBUSY;
-
-	ret = tracing_arm_snapshot_locked(tr);
-	if (ret)
-		return ret;
-
-	local_irq_disable();
-	arch_spin_lock(&tr->max_lock);
-	tr->cond_snapshot = no_free_ptr(cond_snapshot);
-	arch_spin_unlock(&tr->max_lock);
-	local_irq_enable();
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
-
-/**
- * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
- * @tr:		The tracing instance
- *
- * Check whether the conditional snapshot for the given instance is
- * enabled; if so, free the cond_snapshot associated with it,
- * otherwise return -EINVAL.
- *
- * Returns 0 if successful, error otherwise.
- */
-int tracing_snapshot_cond_disable(struct trace_array *tr)
-{
-	int ret = 0;
-
-	local_irq_disable();
-	arch_spin_lock(&tr->max_lock);
-
-	if (!tr->cond_snapshot)
-		ret = -EINVAL;
-	else {
-		kfree(tr->cond_snapshot);
-		tr->cond_snapshot = NULL;
-	}
-
-	arch_spin_unlock(&tr->max_lock);
-	local_irq_enable();
-
-	tracing_disarm_snapshot(tr);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
 #else
 void tracing_snapshot(void)
 {
 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
 }
 EXPORT_SYMBOL_GPL(tracing_snapshot);
-void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
-{
-	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
-}
-EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
 int tracing_alloc_snapshot(void)
 {
 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
@@ -1158,23 +829,6 @@ void tracing_snapshot_alloc(void)
 	tracing_snapshot();
 }
 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
-void *tracing_cond_snapshot_data(struct trace_array *tr)
-{
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
-int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
-{
-	return -ENODEV;
-}
-EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
-int tracing_snapshot_cond_disable(struct trace_array *tr)
-{
-	return false;
-}
-EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
-#define free_snapshot(tr)	do { } while (0)
-#define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
 #endif /* CONFIG_TRACER_SNAPSHOT */
 
 void tracer_tracing_off(struct trace_array *tr)
@@ -1487,206 +1141,6 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
 
 unsigned long __read_mostly	tracing_thresh;
 
-#ifdef CONFIG_TRACER_MAX_TRACE
-#ifdef LATENCY_FS_NOTIFY
-static struct workqueue_struct *fsnotify_wq;
-
-static void latency_fsnotify_workfn(struct work_struct *work)
-{
-	struct trace_array *tr = container_of(work, struct trace_array,
-					      fsnotify_work);
-	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
-}
-
-static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
-{
-	struct trace_array *tr = container_of(iwork, struct trace_array,
-					      fsnotify_irqwork);
-	queue_work(fsnotify_wq, &tr->fsnotify_work);
-}
-
-__init static int latency_fsnotify_init(void)
-{
-	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
-				      WQ_UNBOUND | WQ_HIGHPRI, 0);
-	if (!fsnotify_wq) {
-		pr_err("Unable to allocate tr_max_lat_wq\n");
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-late_initcall_sync(latency_fsnotify_init);
-
-void latency_fsnotify(struct trace_array *tr)
-{
-	if (!fsnotify_wq)
-		return;
-	/*
-	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
-	 * possible that we are called from __schedule() or do_idle(), which
-	 * could cause a deadlock.
-	 */
-	irq_work_queue(&tr->fsnotify_irqwork);
-}
-#endif /* !LATENCY_FS_NOTIFY */
-
-static const struct file_operations tracing_max_lat_fops;
-
-static void trace_create_maxlat_file(struct trace_array *tr,
-				     struct dentry *d_tracer)
-{
-#ifdef LATENCY_FS_NOTIFY
-	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
-	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
-#endif
-	tr->d_max_latency = trace_create_file("tracing_max_latency",
-					      TRACE_MODE_WRITE,
-					      d_tracer, tr,
-					      &tracing_max_lat_fops);
-}
-
-/*
- * Copy the new maximum trace into the separate maximum-trace
- * structure. (this way the maximum trace is permanently saved,
- * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
- */
-static void
-__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
-{
-	struct array_buffer *trace_buf = &tr->array_buffer;
-	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
-	struct array_buffer *max_buf = &tr->snapshot_buffer;
-	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
-
-	max_buf->cpu = cpu;
-	max_buf->time_start = data->preempt_timestamp;
-
-	max_data->saved_latency = tr->max_latency;
-	max_data->critical_start = data->critical_start;
-	max_data->critical_end = data->critical_end;
-
-	strscpy(max_data->comm, tsk->comm);
-	max_data->pid = tsk->pid;
-	/*
-	 * If tsk == current, then use current_uid(), as that does not use
-	 * RCU. The irq tracer can be called out of RCU scope.
-	 */
-	if (tsk == current)
-		max_data->uid = current_uid();
-	else
-		max_data->uid = task_uid(tsk);
-
-	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
-	max_data->policy = tsk->policy;
-	max_data->rt_priority = tsk->rt_priority;
-
-	/* record this tasks comm */
-	tracing_record_cmdline(tsk);
-	latency_fsnotify(tr);
-}
-#else
-static inline void trace_create_maxlat_file(struct trace_array *tr,
-					    struct dentry *d_tracer) { }
-static inline void __update_max_tr(struct trace_array *tr,
-				   struct task_struct *tsk, int cpu) { }
-#endif /* CONFIG_TRACER_MAX_TRACE */
-
-#ifdef CONFIG_TRACER_SNAPSHOT
-/**
- * update_max_tr - snapshot all trace buffers from global_trace to max_tr
- * @tr: tracer
- * @tsk: the task with the latency
- * @cpu: The cpu that initiated the trace.
- * @cond_data: User data associated with a conditional snapshot
- *
- * Flip the buffers between the @tr and the max_tr and record information
- * about which task was the cause of this latency.
- */
-void
-update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
-	      void *cond_data)
-{
-	if (tr->stop_count)
-		return;
-
-	WARN_ON_ONCE(!irqs_disabled());
-
-	if (!tr->allocated_snapshot) {
-		/* Only the nop tracer should hit this when disabling */
-		WARN_ON_ONCE(tr->current_trace != &nop_trace);
-		return;
-	}
-
-	arch_spin_lock(&tr->max_lock);
-
-	/* Inherit the recordable setting from array_buffer */
-	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
-		ring_buffer_record_on(tr->snapshot_buffer.buffer);
-	else
-		ring_buffer_record_off(tr->snapshot_buffer.buffer);
-
-	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
-		arch_spin_unlock(&tr->max_lock);
-		return;
-	}
-
-	swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer);
-
-	__update_max_tr(tr, tsk, cpu);
-
-	arch_spin_unlock(&tr->max_lock);
-
-	/* Any waiters on the old snapshot buffer need to wake up */
-	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
-}
-
-/**
- * update_max_tr_single - only copy one trace over, and reset the rest
- * @tr: tracer
- * @tsk: task with the latency
- * @cpu: the cpu of the buffer to copy.
- *
- * Flip the trace of a single CPU buffer between the @tr and the max_tr.
- */
-void
-update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
-{
-	int ret;
-
-	if (tr->stop_count)
-		return;
-
-	WARN_ON_ONCE(!irqs_disabled());
-	if (!tr->allocated_snapshot) {
-		/* Only the nop tracer should hit this when disabling */
-		WARN_ON_ONCE(tr->current_trace != &nop_trace);
-		return;
-	}
-
-	arch_spin_lock(&tr->max_lock);
-
-	ret = ring_buffer_swap_cpu(tr->snapshot_buffer.buffer, tr->array_buffer.buffer, cpu);
-
-	if (ret == -EBUSY) {
-		/*
-		 * We failed to swap the buffer due to a commit taking
-		 * place on this CPU. We fail to record, but we reset
-		 * the max trace buffer (no one writes directly to it)
-		 * and flag that it failed.
-		 * Another reason is resize is in progress.
-		 */
-		trace_array_printk_buf(tr->snapshot_buffer.buffer, _THIS_IP_,
-			"Failed to swap buffers due to commit or resize in progress\n");
-	}
-
-	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
-
-	__update_max_tr(tr, tsk, cpu);
-	arch_spin_unlock(&tr->max_lock);
-}
-#endif /* CONFIG_TRACER_SNAPSHOT */
-
 struct pipe_wait {
 	struct trace_iterator		*iter;
 	int				wait_index;
@@ -1995,7 +1449,7 @@ int __init register_tracer(struct tracer *type)
 	return 0;
 }
 
-static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
+void tracing_reset_cpu(struct array_buffer *buf, int cpu)
 {
 	struct trace_buffer *buffer = buf->buffer;
 
@@ -3760,50 +3214,6 @@ static void test_ftrace_alive(struct seq_file *m)
 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
 }
 
-#ifdef CONFIG_TRACER_SNAPSHOT
-static void show_snapshot_main_help(struct seq_file *m)
-{
-	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
-		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
-		    "#                      Takes a snapshot of the main buffer.\n"
-		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
-		    "#                      (Doesn't have to be '2' works with any number that\n"
-		    "#                       is not a '0' or '1')\n");
-}
-
-static void show_snapshot_percpu_help(struct seq_file *m)
-{
-	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
-#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
-	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
-		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
-#else
-	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
-		    "#                     Must use main snapshot file to allocate.\n");
-#endif
-	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
-		    "#                      (Doesn't have to be '2' works with any number that\n"
-		    "#                       is not a '0' or '1')\n");
-}
-
-static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
-{
-	if (iter->tr->allocated_snapshot)
-		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
-	else
-		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
-
-	seq_puts(m, "# Snapshot commands:\n");
-	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
-		show_snapshot_main_help(m);
-	else
-		show_snapshot_percpu_help(m);
-}
-#else
-/* Should never be called */
-static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
-#endif
-
 static int s_show(struct seq_file *m, void *v)
 {
 	struct trace_iterator *iter = v;
@@ -3852,17 +3262,6 @@ static int s_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-/*
- * Should be used after trace_array_get(), trace_types_lock
- * ensures that i_cdev was already initialized.
- */
-static inline int tracing_get_cpu(struct inode *inode)
-{
-	if (inode->i_cdev) /* See trace_create_cpu_file() */
-		return (long)inode->i_cdev - 1;
-	return RING_BUFFER_ALL_CPUS;
-}
-
 static const struct seq_operations tracer_seq_ops = {
 	.start		= s_start,
 	.next		= s_next,
@@ -3889,7 +3288,7 @@ static void free_trace_iter_content(struct trace_iterator *iter)
 	free_cpumask_var(iter->started);
 }
 
-static struct trace_iterator *
+struct trace_iterator *
 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
 {
 	struct trace_array *tr = inode->i_private;
@@ -4069,7 +3468,7 @@ int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
 	return single_release(inode, filp);
 }
 
-static int tracing_release(struct inode *inode, struct file *file)
+int tracing_release(struct inode *inode, struct file *file)
 {
 	struct trace_array *tr = inode->i_private;
 	struct seq_file *m = file->private_data;
@@ -5220,7 +4619,7 @@ int tracer_init(struct tracer *t, struct trace_array *tr)
 	return t->init(tr);
 }
 
-static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
+void trace_set_buffer_entries(struct array_buffer *buf, unsigned long val)
 {
 	int cpu;
 
@@ -5231,40 +4630,12 @@ static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
 static void update_buffer_entries(struct array_buffer *buf, int cpu)
 {
 	if (cpu == RING_BUFFER_ALL_CPUS) {
-		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
+		trace_set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
 	} else {
 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
 	}
 }
 
-#ifdef CONFIG_TRACER_SNAPSHOT
-/* resize @tr's buffer to the size of @size_tr's entries */
-static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
-					struct array_buffer *size_buf, int cpu_id)
-{
-	int cpu, ret = 0;
-
-	if (cpu_id == RING_BUFFER_ALL_CPUS) {
-		for_each_tracing_cpu(cpu) {
-			ret = ring_buffer_resize(trace_buf->buffer,
-				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
-			if (ret < 0)
-				break;
-			per_cpu_ptr(trace_buf->data, cpu)->entries =
-				per_cpu_ptr(size_buf->data, cpu)->entries;
-		}
-	} else {
-		ret = ring_buffer_resize(trace_buf->buffer,
-				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
-		if (ret == 0)
-			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
-				per_cpu_ptr(size_buf->data, cpu_id)->entries;
-	}
-
-	return ret;
-}
-#endif /* CONFIG_TRACER_SNAPSHOT */
-
 static int __tracing_resize_ring_buffer(struct trace_array *tr,
 					unsigned long size, int cpu)
 {
@@ -5683,9 +5054,8 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
 	return ret;
 }
 
-static ssize_t
-tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
-		   size_t cnt, loff_t *ppos)
+ssize_t tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
+			   size_t cnt, loff_t *ppos)
 {
 	char buf[64];
 	int r;
@@ -5697,9 +5067,8 @@ tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 }
 
-static ssize_t
-tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
-		    size_t cnt, loff_t *ppos)
+ssize_t tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
+			    size_t cnt, loff_t *ppos)
 {
 	unsigned long val;
 	int ret;
@@ -5741,28 +5110,6 @@ tracing_thresh_write(struct file *filp, const char __user *ubuf,
 	return cnt;
 }
 
-#ifdef CONFIG_TRACER_MAX_TRACE
-
-static ssize_t
-tracing_max_lat_read(struct file *filp, char __user *ubuf,
-		     size_t cnt, loff_t *ppos)
-{
-	struct trace_array *tr = filp->private_data;
-
-	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
-}
-
-static ssize_t
-tracing_max_lat_write(struct file *filp, const char __user *ubuf,
-		      size_t cnt, loff_t *ppos)
-{
-	struct trace_array *tr = filp->private_data;
-
-	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
-}
-
-#endif
-
 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
 {
 	if (cpu == RING_BUFFER_ALL_CPUS) {
@@ -7067,266 +6414,78 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
 	const char *clockstr;
 	int ret;
 
-	if (cnt >= sizeof(buf))
-		return -EINVAL;
-
-	if (copy_from_user(buf, ubuf, cnt))
-		return -EFAULT;
-
-	buf[cnt] = 0;
-
-	clockstr = strstrip(buf);
-
-	ret = tracing_set_clock(tr, clockstr);
-	if (ret)
-		return ret;
-
-	*fpos += cnt;
-
-	return cnt;
-}
-
-static int tracing_clock_open(struct inode *inode, struct file *file)
-{
-	struct trace_array *tr = inode->i_private;
-	int ret;
-
-	ret = tracing_check_open_get_tr(tr);
-	if (ret)
-		return ret;
-
-	ret = single_open(file, tracing_clock_show, inode->i_private);
-	if (ret < 0)
-		trace_array_put(tr);
-
-	return ret;
-}
-
-static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
-{
-	struct trace_array *tr = m->private;
-
-	guard(mutex)(&trace_types_lock);
-
-	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
-		seq_puts(m, "delta [absolute]\n");
-	else
-		seq_puts(m, "[delta] absolute\n");
-
-	return 0;
-}
-
-static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
-{
-	struct trace_array *tr = inode->i_private;
-	int ret;
-
-	ret = tracing_check_open_get_tr(tr);
-	if (ret)
-		return ret;
-
-	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
-	if (ret < 0)
-		trace_array_put(tr);
-
-	return ret;
-}
-
-u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
-{
-	if (rbe == this_cpu_read(trace_buffered_event))
-		return ring_buffer_time_stamp(buffer);
-
-	return ring_buffer_event_time_stamp(buffer, rbe);
-}
-
-struct ftrace_buffer_info {
-	struct trace_iterator	iter;
-	void			*spare;
-	unsigned int		spare_cpu;
-	unsigned int		spare_size;
-	unsigned int		read;
-};
-
-#ifdef CONFIG_TRACER_SNAPSHOT
-static int tracing_snapshot_open(struct inode *inode, struct file *file)
-{
-	struct trace_array *tr = inode->i_private;
-	struct trace_iterator *iter;
-	struct seq_file *m;
-	int ret;
-
-	ret = tracing_check_open_get_tr(tr);
-	if (ret)
-		return ret;
-
-	if (file->f_mode & FMODE_READ) {
-		iter = __tracing_open(inode, file, true);
-		if (IS_ERR(iter))
-			ret = PTR_ERR(iter);
-	} else {
-		/* Writes still need the seq_file to hold the private data */
-		ret = -ENOMEM;
-		m = kzalloc_obj(*m);
-		if (!m)
-			goto out;
-		iter = kzalloc_obj(*iter);
-		if (!iter) {
-			kfree(m);
-			goto out;
-		}
-		ret = 0;
-
-		iter->tr = tr;
-		iter->array_buffer = &tr->snapshot_buffer;
-		iter->cpu_file = tracing_get_cpu(inode);
-		m->private = iter;
-		file->private_data = m;
-	}
-out:
-	if (ret < 0)
-		trace_array_put(tr);
-
-	return ret;
-}
-
-static void tracing_swap_cpu_buffer(void *tr)
-{
-	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
-}
-
-static ssize_t
-tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
-		       loff_t *ppos)
-{
-	struct seq_file *m = filp->private_data;
-	struct trace_iterator *iter = m->private;
-	struct trace_array *tr = iter->tr;
-	unsigned long val;
-	int ret;
-
-	ret = tracing_update_buffers(tr);
-	if (ret < 0)
-		return ret;
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
 
-	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
-	if (ret)
-		return ret;
+	if (copy_from_user(buf, ubuf, cnt))
+		return -EFAULT;
 
-	guard(mutex)(&trace_types_lock);
+	buf[cnt] = 0;
 
-	if (tracer_uses_snapshot(tr->current_trace))
-		return -EBUSY;
+	clockstr = strstrip(buf);
 
-	local_irq_disable();
-	arch_spin_lock(&tr->max_lock);
-	if (tr->cond_snapshot)
-		ret = -EBUSY;
-	arch_spin_unlock(&tr->max_lock);
-	local_irq_enable();
+	ret = tracing_set_clock(tr, clockstr);
 	if (ret)
 		return ret;
 
-	switch (val) {
-	case 0:
-		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
-			return -EINVAL;
-		if (tr->allocated_snapshot)
-			free_snapshot(tr);
-		break;
-	case 1:
-/* Only allow per-cpu swap if the ring buffer supports it */
-#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
-		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
-			return -EINVAL;
-#endif
-		if (tr->allocated_snapshot)
-			ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
-					&tr->array_buffer, iter->cpu_file);
+	*fpos += cnt;
 
-		ret = tracing_arm_snapshot_locked(tr);
-		if (ret)
-			return ret;
+	return cnt;
+}
 
-		/* Now, we're going to swap */
-		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
-			local_irq_disable();
-			update_max_tr(tr, current, smp_processor_id(), NULL);
-			local_irq_enable();
-		} else {
-			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
-						 (void *)tr, 1);
-		}
-		tracing_disarm_snapshot(tr);
-		break;
-	default:
-		if (tr->allocated_snapshot) {
-			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
-				tracing_reset_online_cpus(&tr->snapshot_buffer);
-			else
-				tracing_reset_cpu(&tr->snapshot_buffer, iter->cpu_file);
-		}
-		break;
-	}
+static int tracing_clock_open(struct inode *inode, struct file *file)
+{
+	struct trace_array *tr = inode->i_private;
+	int ret;
 
-	if (ret >= 0) {
-		*ppos += cnt;
-		ret = cnt;
-	}
+	ret = tracing_check_open_get_tr(tr);
+	if (ret)
+		return ret;
+
+	ret = single_open(file, tracing_clock_show, inode->i_private);
+	if (ret < 0)
+		trace_array_put(tr);
 
 	return ret;
 }
 
-static int tracing_snapshot_release(struct inode *inode, struct file *file)
+static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
 {
-	struct seq_file *m = file->private_data;
-	int ret;
-
-	ret = tracing_release(inode, file);
+	struct trace_array *tr = m->private;
 
-	if (file->f_mode & FMODE_READ)
-		return ret;
+	guard(mutex)(&trace_types_lock);
 
-	/* If write only, the seq_file is just a stub */
-	if (m)
-		kfree(m->private);
-	kfree(m);
+	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
+		seq_puts(m, "delta [absolute]\n");
+	else
+		seq_puts(m, "[delta] absolute\n");
 
 	return 0;
 }
 
-static int tracing_buffers_open(struct inode *inode, struct file *filp);
-static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
-				    size_t count, loff_t *ppos);
-static int tracing_buffers_release(struct inode *inode, struct file *file);
-static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
-		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
-
-static int snapshot_raw_open(struct inode *inode, struct file *filp)
+static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
 {
-	struct ftrace_buffer_info *info;
+	struct trace_array *tr = inode->i_private;
 	int ret;
 
-	/* The following checks for tracefs lockdown */
-	ret = tracing_buffers_open(inode, filp);
-	if (ret < 0)
+	ret = tracing_check_open_get_tr(tr);
+	if (ret)
 		return ret;
 
-	info = filp->private_data;
-
-	if (tracer_uses_snapshot(info->iter.trace)) {
-		tracing_buffers_release(inode, filp);
-		return -EBUSY;
-	}
-
-	info->iter.snapshot = true;
-	info->iter.array_buffer = &info->iter.tr->snapshot_buffer;
+	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
+	if (ret < 0)
+		trace_array_put(tr);
 
 	return ret;
 }
 
-#endif /* CONFIG_TRACER_SNAPSHOT */
+u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
+{
+	if (rbe == this_cpu_read(trace_buffered_event))
+		return ring_buffer_time_stamp(buffer);
 
+	return ring_buffer_event_time_stamp(buffer, rbe);
+}
 
 static const struct file_operations tracing_thresh_fops = {
 	.open		= tracing_open_generic,
@@ -7335,16 +6494,6 @@ static const struct file_operations tracing_thresh_fops = {
 	.llseek		= generic_file_llseek,
 };
 
-#ifdef CONFIG_TRACER_MAX_TRACE
-static const struct file_operations tracing_max_lat_fops = {
-	.open		= tracing_open_generic_tr,
-	.read		= tracing_max_lat_read,
-	.write		= tracing_max_lat_write,
-	.llseek		= generic_file_llseek,
-	.release	= tracing_release_generic_tr,
-};
-#endif
-
 static const struct file_operations set_tracer_fops = {
 	.open		= tracing_open_generic_tr,
 	.read		= tracing_set_trace_read,
@@ -7431,24 +6580,6 @@ static const struct file_operations last_boot_fops = {
 	.release	= tracing_seq_release,
 };
 
-#ifdef CONFIG_TRACER_SNAPSHOT
-static const struct file_operations snapshot_fops = {
-	.open		= tracing_snapshot_open,
-	.read		= seq_read,
-	.write		= tracing_snapshot_write,
-	.llseek		= tracing_lseek,
-	.release	= tracing_snapshot_release,
-};
-
-static const struct file_operations snapshot_raw_fops = {
-	.open		= snapshot_raw_open,
-	.read		= tracing_buffers_read,
-	.release	= tracing_buffers_release,
-	.splice_read	= tracing_buffers_splice_read,
-};
-
-#endif /* CONFIG_TRACER_SNAPSHOT */
-
 /*
  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
  * @filp: The active open file structure
@@ -7808,7 +6939,7 @@ static const struct file_operations tracing_err_log_fops = {
 	.release        = tracing_err_log_release,
 };
 
-static int tracing_buffers_open(struct inode *inode, struct file *filp)
+int tracing_buffers_open(struct inode *inode, struct file *filp)
 {
 	struct trace_array *tr = inode->i_private;
 	struct ftrace_buffer_info *info;
@@ -7856,9 +6987,8 @@ tracing_buffers_poll(struct file *filp, poll_table *poll_table)
 	return trace_poll(iter, filp, poll_table);
 }
 
-static ssize_t
-tracing_buffers_read(struct file *filp, char __user *ubuf,
-		     size_t count, loff_t *ppos)
+ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
+			     size_t count, loff_t *ppos)
 {
 	struct ftrace_buffer_info *info = filp->private_data;
 	struct trace_iterator *iter = &info->iter;
@@ -7959,7 +7089,7 @@ static int tracing_buffers_flush(struct file *file, fl_owner_t id)
 	return 0;
 }
 
-static int tracing_buffers_release(struct inode *inode, struct file *file)
+int tracing_buffers_release(struct inode *inode, struct file *file)
 {
 	struct ftrace_buffer_info *info = file->private_data;
 	struct trace_iterator *iter = &info->iter;
@@ -8033,10 +7163,9 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
 	spd->partial[i].private = 0;
 }
 
-static ssize_t
-tracing_buffers_splice_read(struct file *file, loff_t *ppos,
-			    struct pipe_inode_info *pipe, size_t len,
-			    unsigned int flags)
+ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
+				    struct pipe_inode_info *pipe, size_t len,
+				    unsigned int flags)
 {
 	struct ftrace_buffer_info *info = file->private_data;
 	struct trace_iterator *iter = &info->iter;
@@ -8190,44 +7319,6 @@ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned
 	return 0;
 }
 
-#ifdef CONFIG_TRACER_SNAPSHOT
-static int get_snapshot_map(struct trace_array *tr)
-{
-	int err = 0;
-
-	/*
-	 * Called with mmap_lock held. lockdep would be unhappy if we would now
-	 * take trace_types_lock. Instead use the specific
-	 * snapshot_trigger_lock.
-	 */
-	spin_lock(&tr->snapshot_trigger_lock);
-
-	if (tr->snapshot || tr->mapped == UINT_MAX)
-		err = -EBUSY;
-	else
-		tr->mapped++;
-
-	spin_unlock(&tr->snapshot_trigger_lock);
-
-	/* Wait for update_max_tr() to observe iter->tr->mapped */
-	if (tr->mapped == 1)
-		synchronize_rcu();
-
-	return err;
-
-}
-static void put_snapshot_map(struct trace_array *tr)
-{
-	spin_lock(&tr->snapshot_trigger_lock);
-	if (!WARN_ON(!tr->mapped))
-		tr->mapped--;
-	spin_unlock(&tr->snapshot_trigger_lock);
-}
-#else
-static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
-static inline void put_snapshot_map(struct trace_array *tr) { }
-#endif
-
 /*
  * This is called when a VMA is duplicated (e.g., on fork()) to increment
  * the user_mapped counter without remapping pages.
@@ -8408,170 +7499,6 @@ static const struct file_operations tracing_dyn_info_fops = {
 };
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
-#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
-static void
-ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
-		struct trace_array *tr, struct ftrace_probe_ops *ops,
-		void *data)
-{
-	tracing_snapshot_instance(tr);
-}
-
-static void
-ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
-		      struct trace_array *tr, struct ftrace_probe_ops *ops,
-		      void *data)
-{
-	struct ftrace_func_mapper *mapper = data;
-	long *count = NULL;
-
-	if (mapper)
-		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
-
-	if (count) {
-
-		if (*count <= 0)
-			return;
-
-		(*count)--;
-	}
-
-	tracing_snapshot_instance(tr);
-}
-
-static int
-ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
-		      struct ftrace_probe_ops *ops, void *data)
-{
-	struct ftrace_func_mapper *mapper = data;
-	long *count = NULL;
-
-	seq_printf(m, "%ps:", (void *)ip);
-
-	seq_puts(m, "snapshot");
-
-	if (mapper)
-		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
-
-	if (count)
-		seq_printf(m, ":count=%ld\n", *count);
-	else
-		seq_puts(m, ":unlimited\n");
-
-	return 0;
-}
-
-static int
-ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
-		     unsigned long ip, void *init_data, void **data)
-{
-	struct ftrace_func_mapper *mapper = *data;
-
-	if (!mapper) {
-		mapper = allocate_ftrace_func_mapper();
-		if (!mapper)
-			return -ENOMEM;
-		*data = mapper;
-	}
-
-	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
-}
-
-static void
-ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
-		     unsigned long ip, void *data)
-{
-	struct ftrace_func_mapper *mapper = data;
-
-	if (!ip) {
-		if (!mapper)
-			return;
-		free_ftrace_func_mapper(mapper, NULL);
-		return;
-	}
-
-	ftrace_func_mapper_remove_ip(mapper, ip);
-}
-
-static struct ftrace_probe_ops snapshot_probe_ops = {
-	.func			= ftrace_snapshot,
-	.print			= ftrace_snapshot_print,
-};
-
-static struct ftrace_probe_ops snapshot_count_probe_ops = {
-	.func			= ftrace_count_snapshot,
-	.print			= ftrace_snapshot_print,
-	.init			= ftrace_snapshot_init,
-	.free			= ftrace_snapshot_free,
-};
-
-static int
-ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
-			       char *glob, char *cmd, char *param, int enable)
-{
-	struct ftrace_probe_ops *ops;
-	void *count = (void *)-1;
-	char *number;
-	int ret;
-
-	if (!tr)
-		return -ENODEV;
-
-	/* hash funcs only work with set_ftrace_filter */
-	if (!enable)
-		return -EINVAL;
-
-	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
-
-	if (glob[0] == '!') {
-		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
-		if (!ret)
-			tracing_disarm_snapshot(tr);
-
-		return ret;
-	}
-
-	if (!param)
-		goto out_reg;
-
-	number = strsep(&param, ":");
-
-	if (!strlen(number))
-		goto out_reg;
-
-	/*
-	 * We use the callback data field (which is a pointer)
-	 * as our counter.
-	 */
-	ret = kstrtoul(number, 0, (unsigned long *)&count);
-	if (ret)
-		return ret;
-
- out_reg:
-	ret = tracing_arm_snapshot(tr);
-	if (ret < 0)
-		return ret;
-
-	ret = register_ftrace_function_probe(glob, tr, ops, count);
-	if (ret < 0)
-		tracing_disarm_snapshot(tr);
-
-	return ret < 0 ? ret : 0;
-}
-
-static struct ftrace_func_command ftrace_snapshot_cmd = {
-	.name			= "snapshot",
-	.func			= ftrace_trace_snapshot_callback,
-};
-
-static __init int register_snapshot_cmd(void)
-{
-	return register_ftrace_command(&ftrace_snapshot_cmd);
-}
-#else
-static inline __init int register_snapshot_cmd(void) { return 0; }
-#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
-
 static struct dentry *tracing_get_dentry(struct trace_array *tr)
 {
 	/* Top directory uses NULL as the parent */
@@ -9364,8 +8291,7 @@ static void setup_trace_scratch(struct trace_array *tr,
 	memset(tscratch, 0, size);
 }
 
-static int
-allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, unsigned long size)
+int allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
 {
 	enum ring_buffer_flags rb_flags;
 	struct trace_scratch *tscratch;
@@ -9404,8 +8330,8 @@ allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, unsigned
 	}
 
 	/* Allocate the first page for all buffers */
-	set_buffer_entries(&tr->array_buffer,
-			   ring_buffer_size(tr->array_buffer.buffer, 0));
+	trace_set_buffer_entries(&tr->array_buffer,
+				 ring_buffer_size(tr->array_buffer.buffer, 0));
 
 	return 0;
 }
@@ -9428,23 +8354,11 @@ static int allocate_trace_buffers(struct trace_array *tr, unsigned long size)
 	if (ret)
 		return ret;
 
-#ifdef CONFIG_TRACER_SNAPSHOT
-	/* Fix mapped buffer trace arrays do not have snapshot buffers */
-	if (tr->range_addr_start)
-		return 0;
-
-	ret = allocate_trace_buffer(tr, &tr->snapshot_buffer,
-				    allocate_snapshot ? size : 1);
-	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
+	ret = trace_allocate_snapshot(tr, size);
+	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n"))
 		free_trace_buffer(&tr->array_buffer);
-		return -ENOMEM;
-	}
-	tr->allocated_snapshot = allocate_snapshot;
-
-	allocate_snapshot = false;
-#endif
 
-	return 0;
+	return ret;
 }
 
 static void free_trace_buffers(struct trace_array *tr)
@@ -10552,47 +9466,6 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
 	return done;
 }
 
-#ifdef CONFIG_TRACER_SNAPSHOT
-__init static bool tr_needs_alloc_snapshot(const char *name)
-{
-	char *test;
-	int len = strlen(name);
-	bool ret;
-
-	if (!boot_snapshot_index)
-		return false;
-
-	if (strncmp(name, boot_snapshot_info, len) == 0 &&
-	    boot_snapshot_info[len] == '\t')
-		return true;
-
-	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
-	if (!test)
-		return false;
-
-	sprintf(test, "\t%s\t", name);
-	ret = strstr(boot_snapshot_info, test) == NULL;
-	kfree(test);
-	return ret;
-}
-
-__init static void do_allocate_snapshot(const char *name)
-{
-	if (!tr_needs_alloc_snapshot(name))
-		return;
-
-	/*
-	 * When allocate_snapshot is set, the next call to
-	 * allocate_trace_buffers() (called by trace_array_get_by_name())
-	 * will allocate the snapshot buffer. That will also clear
-	 * this flag.
-	 */
-	allocate_snapshot = true;
-}
-#else
-static inline void do_allocate_snapshot(const char *name) { }
-#endif
-
 __init static int backup_instance_area(const char *backup,
 				       unsigned long *addr, phys_addr_t *size)
 {
@@ -10742,8 +9615,7 @@ __init static void enable_instances(void)
 			}
 		} else {
 			/* Only non mapped buffers have snapshot buffers */
-			if (IS_ENABLED(CONFIG_TRACER_SNAPSHOT))
-				do_allocate_snapshot(name);
+			do_allocate_snapshot(name);
 		}
 
 		tr = trace_array_create_systems(name, NULL, addr, size);
@@ -10935,24 +9807,6 @@ struct trace_array *trace_get_global_array(void)
 }
 #endif
 
-void __init ftrace_boot_snapshot(void)
-{
-#ifdef CONFIG_TRACER_SNAPSHOT
-	struct trace_array *tr;
-
-	if (!snapshot_at_boot)
-		return;
-
-	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
-		if (!tr->allocated_snapshot)
-			continue;
-
-		tracing_snapshot_instance(tr);
-		trace_array_puts(tr, "** Boot snapshot taken **\n");
-	}
-#endif
-}
-
 void __init early_trace_init(void)
 {
 	if (tracepoint_printk) {
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 7db78a62f786..a3ea735a9ef6 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -264,6 +264,7 @@ static inline bool still_need_pid_events(int type, struct trace_pid_list *pid_li
 
 typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data);
 
+#ifdef CONFIG_TRACER_SNAPSHOT
 /**
  * struct cond_snapshot - conditional snapshot data and callback
  *
@@ -306,6 +307,7 @@ struct cond_snapshot {
 	void				*cond_data;
 	cond_update_fn_t		update;
 };
+#endif /* CONFIG_TRACER_SNAPSHOT */
 
 /*
  * struct trace_func_repeats - used to keep track of the consecutive
@@ -675,6 +677,7 @@ void tracing_reset_all_online_cpus(void);
 void tracing_reset_all_online_cpus_unlocked(void);
 int tracing_open_generic(struct inode *inode, struct file *filp);
 int tracing_open_generic_tr(struct inode *inode, struct file *filp);
+int tracing_release(struct inode *inode, struct file *file);
 int tracing_release_generic_tr(struct inode *inode, struct file *file);
 int tracing_open_file_tr(struct inode *inode, struct file *filp);
 int tracing_release_file_tr(struct inode *inode, struct file *filp);
@@ -684,12 +687,48 @@ void tracer_tracing_on(struct trace_array *tr);
 void tracer_tracing_off(struct trace_array *tr);
 void tracer_tracing_disable(struct trace_array *tr);
 void tracer_tracing_enable(struct trace_array *tr);
+int allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size);
 struct dentry *trace_create_file(const char *name,
 				 umode_t mode,
 				 struct dentry *parent,
 				 void *data,
 				 const struct file_operations *fops);
 
+struct trace_iterator *__tracing_open(struct inode *inode, struct file *file,
+				      bool snapshot);
+int tracing_buffers_open(struct inode *inode, struct file *filp);
+ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
+			     size_t count, loff_t *ppos);
+int tracing_buffers_release(struct inode *inode, struct file *file);
+ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
+		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
+
+ssize_t tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
+			   size_t cnt, loff_t *ppos);
+ssize_t tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
+			    size_t cnt, loff_t *ppos);
+
+void trace_set_buffer_entries(struct array_buffer *buf, unsigned long val);
+
+/*
+ * Should be used after trace_array_get(), trace_types_lock
+ * ensures that i_cdev was already initialized.
+ */
+static inline int tracing_get_cpu(struct inode *inode)
+{
+	if (inode->i_cdev) /* See trace_create_cpu_file() */
+		return (long)inode->i_cdev - 1;
+	return RING_BUFFER_ALL_CPUS;
+}
+void tracing_reset_cpu(struct array_buffer *buf, int cpu);
+
+struct ftrace_buffer_info {
+	struct trace_iterator	iter;
+	void			*spare;
+	unsigned int		spare_cpu;
+	unsigned int		spare_size;
+	unsigned int		read;
+};
 
 /**
  * tracer_tracing_is_on_cpu - show real state of ring buffer enabled on for a cpu
@@ -828,11 +867,15 @@ static inline bool tracer_uses_snapshot(struct tracer *tracer)
 {
 	return tracer->use_max_tr;
 }
+void trace_create_maxlat_file(struct trace_array *tr,
+			      struct dentry *d_tracer);
 #else
 static inline bool tracer_uses_snapshot(struct tracer *tracer)
 {
 	return false;
 }
+static inline void trace_create_maxlat_file(struct trace_array *tr,
+					    struct dentry *d_tracer) { }
 #endif
 
 void trace_last_func_repeats(struct trace_array *tr,
@@ -2140,12 +2183,6 @@ static inline bool event_command_needs_rec(struct event_command *cmd_ops)
 
 extern int trace_event_enable_disable(struct trace_event_file *file,
 				      int enable, int soft_disable);
-extern int tracing_alloc_snapshot(void);
-extern void tracing_snapshot_cond(struct trace_array *tr, void *cond_data);
-extern int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update);
-
-extern int tracing_snapshot_cond_disable(struct trace_array *tr);
-extern void *tracing_cond_snapshot_data(struct trace_array *tr);
 
 extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];
@@ -2233,19 +2270,71 @@ static inline void trace_event_update_all(struct trace_eval_map **map, int len)
 #endif
 
 #ifdef CONFIG_TRACER_SNAPSHOT
+extern const struct file_operations snapshot_fops;
+extern const struct file_operations snapshot_raw_fops;
+
+/* Used when creating instances */
+int trace_allocate_snapshot(struct trace_array *tr, int size);
+
+int tracing_alloc_snapshot(void);
+void tracing_snapshot_cond(struct trace_array *tr, void *cond_data);
+int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update);
+int tracing_snapshot_cond_disable(struct trace_array *tr);
+void *tracing_cond_snapshot_data(struct trace_array *tr);
 void tracing_snapshot_instance(struct trace_array *tr);
 int tracing_alloc_snapshot_instance(struct trace_array *tr);
+int tracing_arm_snapshot_locked(struct trace_array *tr);
 int tracing_arm_snapshot(struct trace_array *tr);
 void tracing_disarm_snapshot(struct trace_array *tr);
-#else
+void free_snapshot(struct trace_array *tr);
+void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter);
+int get_snapshot_map(struct trace_array *tr);
+void put_snapshot_map(struct trace_array *tr);
+int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
+				 struct array_buffer *size_buf, int cpu_id);
+__init void do_allocate_snapshot(const char *name);
+# ifdef CONFIG_DYNAMIC_FTRACE
+__init int register_snapshot_cmd(void);
+# else
+static inline int register_snapshot_cmd(void) { return 0; }
+# endif
+#else /* !CONFIG_TRACER_SNAPSHOT */
+static inline int trace_allocate_snapshot(struct trace_array *tr, int size) { return 0; }
 static inline void tracing_snapshot_instance(struct trace_array *tr) { }
 static inline int tracing_alloc_snapshot_instance(struct trace_array *tr)
 {
 	return 0;
 }
+static inline int tracing_arm_snapshot_locked(struct trace_array *tr) { return -EBUSY; }
 static inline int tracing_arm_snapshot(struct trace_array *tr) { return 0; }
 static inline void tracing_disarm_snapshot(struct trace_array *tr) { }
-#endif
+static inline void free_snapshot(struct trace_array *tr) {}
+static inline void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
+{
+	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
+}
+static inline void *tracing_cond_snapshot_data(struct trace_array *tr)
+{
+	return NULL;
+}
+static inline int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
+{
+	return -ENODEV;
+}
+static inline int tracing_snapshot_cond_disable(struct trace_array *tr)
+{
+	return false;
+}
+static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
+{
+	/* Should never be called */
+	WARN_ONCE(1, "Snapshot print function called without snapshot configured");
+}
+static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
+static inline void put_snapshot_map(struct trace_array *tr) { }
+static inline void do_allocate_snapshot(const char *name) { }
+static inline int register_snapshot_cmd(void) { return 0; }
+#endif /* CONFIG_TRACER_SNAPSHOT */
 
 #ifdef CONFIG_PREEMPT_TRACER
 void tracer_preempt_on(unsigned long a0, unsigned long a1);
diff --git a/kernel/trace/trace_snapshot.c b/kernel/trace/trace_snapshot.c
new file mode 100644
index 000000000000..8865b2ef2264
--- /dev/null
+++ b/kernel/trace/trace_snapshot.c
@@ -0,0 +1,1067 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/fsnotify.h>
+
+#include <asm/setup.h> /* COMMAND_LINE_SIZE */
+
+#include "trace.h"
+
+/* Used if snapshot allocated at boot */
+static bool allocate_snapshot;
+static bool snapshot_at_boot;
+
+static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
+static int boot_snapshot_index;
+
+static int __init boot_alloc_snapshot(char *str)
+{
+	char *slot = boot_snapshot_info + boot_snapshot_index;
+	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
+	int ret;
+
+	if (str[0] == '=') {
+		str++;
+		if (strlen(str) >= left)
+			return -1;
+
+		ret = snprintf(slot, left, "%s\t", str);
+		boot_snapshot_index += ret;
+	} else {
+		allocate_snapshot = true;
+		/* We also need the main ring buffer expanded */
+		trace_set_ring_buffer_expanded(NULL);
+	}
+	return 1;
+}
+__setup("alloc_snapshot", boot_alloc_snapshot);
+
+
+static int __init boot_snapshot(char *str)
+{
+	snapshot_at_boot = true;
+	boot_alloc_snapshot(str);
+	return 1;
+}
+__setup("ftrace_boot_snapshot", boot_snapshot);
+static void tracing_snapshot_instance_cond(struct trace_array *tr,
+					   void *cond_data)
+{
+	unsigned long flags;
+
+	if (in_nmi()) {
+		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
+		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
+		return;
+	}
+
+	if (!tr->allocated_snapshot) {
+		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
+		trace_array_puts(tr, "*** stopping trace here!   ***\n");
+		tracer_tracing_off(tr);
+		return;
+	}
+
+	if (tr->mapped) {
+		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
+		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
+		return;
+	}
+
+	/* Note, snapshot can not be used when the tracer uses it */
+	if (tracer_uses_snapshot(tr->current_trace)) {
+		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
+		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
+		return;
+	}
+
+	local_irq_save(flags);
+	update_max_tr(tr, current, smp_processor_id(), cond_data);
+	local_irq_restore(flags);
+}
+
+void tracing_snapshot_instance(struct trace_array *tr)
+{
+	tracing_snapshot_instance_cond(tr, NULL);
+}
+
+/**
+ * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
+ * @tr:		The tracing instance to snapshot
+ * @cond_data:	The data to be tested conditionally, and possibly saved
+ *
+ * This is the same as tracing_snapshot() except that the snapshot is
+ * conditional - the snapshot will only happen if the
+ * cond_snapshot.update() implementation receiving the cond_data
+ * returns true, which means that the trace array's cond_snapshot
+ * update() operation used the cond_data to determine whether the
+ * snapshot should be taken, and if it was, presumably saved it along
+ * with the snapshot.
+ */
+void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
+{
+	tracing_snapshot_instance_cond(tr, cond_data);
+}
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
+
+/**
+ * tracing_cond_snapshot_data - get the user data associated with a snapshot
+ * @tr:		The tracing instance
+ *
+ * When the user enables a conditional snapshot using
+ * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
+ * with the snapshot.  This accessor is used to retrieve it.
+ *
+ * Should not be called from cond_snapshot.update(), since it takes
+ * the tr->max_lock lock, which the code calling
+ * cond_snapshot.update() has already done.
+ *
+ * Returns the cond_data associated with the trace array's snapshot.
+ */
+void *tracing_cond_snapshot_data(struct trace_array *tr)
+{
+	void *cond_data = NULL;
+
+	local_irq_disable();
+	arch_spin_lock(&tr->max_lock);
+
+	if (tr->cond_snapshot)
+		cond_data = tr->cond_snapshot->cond_data;
+
+	arch_spin_unlock(&tr->max_lock);
+	local_irq_enable();
+
+	return cond_data;
+}
+EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
+
+/* resize @tr's buffer to the size of @size_tr's entries */
+int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
+				 struct array_buffer *size_buf, int cpu_id)
+{
+	int cpu, ret = 0;
+
+	if (cpu_id == RING_BUFFER_ALL_CPUS) {
+		for_each_tracing_cpu(cpu) {
+			ret = ring_buffer_resize(trace_buf->buffer,
+				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
+			if (ret < 0)
+				break;
+			per_cpu_ptr(trace_buf->data, cpu)->entries =
+				per_cpu_ptr(size_buf->data, cpu)->entries;
+		}
+	} else {
+		ret = ring_buffer_resize(trace_buf->buffer,
+				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
+		if (ret == 0)
+			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
+				per_cpu_ptr(size_buf->data, cpu_id)->entries;
+	}
+
+	return ret;
+}
+
+int tracing_alloc_snapshot_instance(struct trace_array *tr)
+{
+	int order;
+	int ret;
+
+	if (!tr->allocated_snapshot) {
+
+		/* Make the snapshot buffer have the same order as main buffer */
+		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
+		ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
+		if (ret < 0)
+			return ret;
+
+		/* allocate spare buffer */
+		ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
+				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
+		if (ret < 0)
+			return ret;
+
+		tr->allocated_snapshot = true;
+	}
+
+	return 0;
+}
+
+void free_snapshot(struct trace_array *tr)
+{
+	/*
+	 * We don't free the ring buffer. instead, resize it because
+	 * The max_tr ring buffer has some state (e.g. ring->clock) and
+	 * we want preserve it.
+	 */
+	ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, 0);
+	ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
+	trace_set_buffer_entries(&tr->snapshot_buffer, 1);
+	tracing_reset_online_cpus(&tr->snapshot_buffer);
+	tr->allocated_snapshot = false;
+}
+
+int tracing_arm_snapshot_locked(struct trace_array *tr)
+{
+	int ret;
+
+	lockdep_assert_held(&trace_types_lock);
+
+	spin_lock(&tr->snapshot_trigger_lock);
+	if (tr->snapshot == UINT_MAX || tr->mapped) {
+		spin_unlock(&tr->snapshot_trigger_lock);
+		return -EBUSY;
+	}
+
+	tr->snapshot++;
+	spin_unlock(&tr->snapshot_trigger_lock);
+
+	ret = tracing_alloc_snapshot_instance(tr);
+	if (ret) {
+		spin_lock(&tr->snapshot_trigger_lock);
+		tr->snapshot--;
+		spin_unlock(&tr->snapshot_trigger_lock);
+	}
+
+	return ret;
+}
+
+int tracing_arm_snapshot(struct trace_array *tr)
+{
+	guard(mutex)(&trace_types_lock);
+	return tracing_arm_snapshot_locked(tr);
+}
+
+void tracing_disarm_snapshot(struct trace_array *tr)
+{
+	spin_lock(&tr->snapshot_trigger_lock);
+	if (!WARN_ON(!tr->snapshot))
+		tr->snapshot--;
+	spin_unlock(&tr->snapshot_trigger_lock);
+}
+
+/**
+ * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
+ *
+ * This is similar to tracing_snapshot(), but it will allocate the
+ * snapshot buffer if it isn't already allocated. Use this only
+ * where it is safe to sleep, as the allocation may sleep.
+ *
+ * This causes a swap between the snapshot buffer and the current live
+ * tracing buffer. You can use this to take snapshots of the live
+ * trace when some condition is triggered, but continue to trace.
+ */
+void tracing_snapshot_alloc(void)
+{
+	int ret;
+
+	ret = tracing_alloc_snapshot();
+	if (ret < 0)
+		return;
+
+	tracing_snapshot();
+}
+EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
+
+/**
+ * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
+ * @tr:		The tracing instance
+ * @cond_data:	User data to associate with the snapshot
+ * @update:	Implementation of the cond_snapshot update function
+ *
+ * Check whether the conditional snapshot for the given instance has
+ * already been enabled, or if the current tracer is already using a
+ * snapshot; if so, return -EBUSY, else create a cond_snapshot and
+ * save the cond_data and update function inside.
+ *
+ * Returns 0 if successful, error otherwise.
+ */
+int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
+				 cond_update_fn_t update)
+{
+	struct cond_snapshot *cond_snapshot __free(kfree) =
+		kzalloc_obj(*cond_snapshot);
+	int ret;
+
+	if (!cond_snapshot)
+		return -ENOMEM;
+
+	cond_snapshot->cond_data = cond_data;
+	cond_snapshot->update = update;
+
+	guard(mutex)(&trace_types_lock);
+
+	if (tracer_uses_snapshot(tr->current_trace))
+		return -EBUSY;
+
+	/*
+	 * The cond_snapshot can only change to NULL without the
+	 * trace_types_lock. We don't care if we race with it going
+	 * to NULL, but we want to make sure that it's not set to
+	 * something other than NULL when we get here, which we can
+	 * do safely with only holding the trace_types_lock and not
+	 * having to take the max_lock.
+	 */
+	if (tr->cond_snapshot)
+		return -EBUSY;
+
+	ret = tracing_arm_snapshot_locked(tr);
+	if (ret)
+		return ret;
+
+	local_irq_disable();
+	arch_spin_lock(&tr->max_lock);
+	tr->cond_snapshot = no_free_ptr(cond_snapshot);
+	arch_spin_unlock(&tr->max_lock);
+	local_irq_enable();
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
+
+/**
+ * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
+ * @tr:		The tracing instance
+ *
+ * Check whether the conditional snapshot for the given instance is
+ * enabled; if so, free the cond_snapshot associated with it,
+ * otherwise return -EINVAL.
+ *
+ * Returns 0 if successful, error otherwise.
+ */
+int tracing_snapshot_cond_disable(struct trace_array *tr)
+{
+	int ret = 0;
+
+	local_irq_disable();
+	arch_spin_lock(&tr->max_lock);
+
+	if (!tr->cond_snapshot)
+		ret = -EINVAL;
+	else {
+		kfree(tr->cond_snapshot);
+		tr->cond_snapshot = NULL;
+	}
+
+	arch_spin_unlock(&tr->max_lock);
+	local_irq_enable();
+
+	tracing_disarm_snapshot(tr);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+#ifdef LATENCY_FS_NOTIFY
+static struct workqueue_struct *fsnotify_wq;
+
+static void latency_fsnotify_workfn(struct work_struct *work)
+{
+	struct trace_array *tr = container_of(work, struct trace_array,
+					      fsnotify_work);
+	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
+}
+
+static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
+{
+	struct trace_array *tr = container_of(iwork, struct trace_array,
+					      fsnotify_irqwork);
+	queue_work(fsnotify_wq, &tr->fsnotify_work);
+}
+
+__init static int latency_fsnotify_init(void)
+{
+	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
+				      WQ_UNBOUND | WQ_HIGHPRI, 0);
+	if (!fsnotify_wq) {
+		pr_err("Unable to allocate tr_max_lat_wq\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+late_initcall_sync(latency_fsnotify_init);
+
+void latency_fsnotify(struct trace_array *tr)
+{
+	if (!fsnotify_wq)
+		return;
+	/*
+	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
+	 * possible that we are called from __schedule() or do_idle(), which
+	 * could cause a deadlock.
+	 */
+	irq_work_queue(&tr->fsnotify_irqwork);
+}
+#else
+static inline void latency_fsnotify(struct trace_array *tr) { }
+#endif /* LATENCY_FS_NOTIFY */
+static const struct file_operations tracing_max_lat_fops;
+
+void trace_create_maxlat_file(struct trace_array *tr,
+			      struct dentry *d_tracer)
+{
+#ifdef LATENCY_FS_NOTIFY
+	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
+	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
+#endif
+	tr->d_max_latency = trace_create_file("tracing_max_latency",
+					      TRACE_MODE_WRITE,
+					      d_tracer, tr,
+					      &tracing_max_lat_fops);
+}
+
+/*
+ * Copy the new maximum trace into the separate maximum-trace
+ * structure. (this way the maximum trace is permanently saved,
+ * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
+ */
+static void
+__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
+{
+	struct array_buffer *trace_buf = &tr->array_buffer;
+	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
+	struct array_buffer *max_buf = &tr->snapshot_buffer;
+	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
+
+	max_buf->cpu = cpu;
+	max_buf->time_start = data->preempt_timestamp;
+
+	max_data->saved_latency = tr->max_latency;
+	max_data->critical_start = data->critical_start;
+	max_data->critical_end = data->critical_end;
+
+	strscpy(max_data->comm, tsk->comm);
+	max_data->pid = tsk->pid;
+	/*
+	 * If tsk == current, then use current_uid(), as that does not use
+	 * RCU. The irq tracer can be called out of RCU scope.
+	 */
+	if (tsk == current)
+		max_data->uid = current_uid();
+	else
+		max_data->uid = task_uid(tsk);
+
+	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
+	max_data->policy = tsk->policy;
+	max_data->rt_priority = tsk->rt_priority;
+
+	/* record this tasks comm */
+	tracing_record_cmdline(tsk);
+	latency_fsnotify(tr);
+}
+#else
+static inline void __update_max_tr(struct trace_array *tr,
+				   struct task_struct *tsk, int cpu) { }
+#endif /* CONFIG_TRACER_MAX_TRACE */
+
+/**
+ * update_max_tr - snapshot all trace buffers from global_trace to max_tr
+ * @tr: tracer
+ * @tsk: the task with the latency
+ * @cpu: The cpu that initiated the trace.
+ * @cond_data: User data associated with a conditional snapshot
+ *
+ * Flip the buffers between the @tr and the max_tr and record information
+ * about which task was the cause of this latency.
+ */
+void
+update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
+	      void *cond_data)
+{
+	if (tr->stop_count)
+		return;
+
+	WARN_ON_ONCE(!irqs_disabled());
+
+	if (!tr->allocated_snapshot) {
+		/* Only the nop tracer should hit this when disabling */
+		WARN_ON_ONCE(tr->current_trace != &nop_trace);
+		return;
+	}
+
+	arch_spin_lock(&tr->max_lock);
+
+	/* Inherit the recordable setting from array_buffer */
+	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
+		ring_buffer_record_on(tr->snapshot_buffer.buffer);
+	else
+		ring_buffer_record_off(tr->snapshot_buffer.buffer);
+
+	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
+		arch_spin_unlock(&tr->max_lock);
+		return;
+	}
+
+	swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer);
+
+	__update_max_tr(tr, tsk, cpu);
+
+	arch_spin_unlock(&tr->max_lock);
+
+	/* Any waiters on the old snapshot buffer need to wake up */
+	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
+}
+
+/**
+ * update_max_tr_single - only copy one trace over, and reset the rest
+ * @tr: tracer
+ * @tsk: task with the latency
+ * @cpu: the cpu of the buffer to copy.
+ *
+ * Flip the trace of a single CPU buffer between the @tr and the max_tr.
+ */
+void
+update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
+{
+	int ret;
+
+	if (tr->stop_count)
+		return;
+
+	WARN_ON_ONCE(!irqs_disabled());
+	if (!tr->allocated_snapshot) {
+		/* Only the nop tracer should hit this when disabling */
+		WARN_ON_ONCE(tr->current_trace != &nop_trace);
+		return;
+	}
+
+	arch_spin_lock(&tr->max_lock);
+
+	ret = ring_buffer_swap_cpu(tr->snapshot_buffer.buffer, tr->array_buffer.buffer, cpu);
+
+	if (ret == -EBUSY) {
+		/*
+		 * We failed to swap the buffer due to a commit taking
+		 * place on this CPU. We fail to record, but we reset
+		 * the max trace buffer (no one writes directly to it)
+		 * and flag that it failed.
+		 * Another reason is resize is in progress.
+		 */
+		trace_array_printk_buf(tr->snapshot_buffer.buffer, _THIS_IP_,
+			"Failed to swap buffers due to commit or resize in progress\n");
+	}
+
+	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
+
+	__update_max_tr(tr, tsk, cpu);
+	arch_spin_unlock(&tr->max_lock);
+}
+
+static void show_snapshot_main_help(struct seq_file *m)
+{
+	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
+		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
+		    "#                      Takes a snapshot of the main buffer.\n"
+		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
+		    "#                      (Doesn't have to be '2' works with any number that\n"
+		    "#                       is not a '0' or '1')\n");
+}
+
+static void show_snapshot_percpu_help(struct seq_file *m)
+{
+	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
+#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
+	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
+		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
+#else
+	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
+		    "#                     Must use main snapshot file to allocate.\n");
+#endif
+	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
+		    "#                      (Doesn't have to be '2' works with any number that\n"
+		    "#                       is not a '0' or '1')\n");
+}
+
+void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
+{
+	if (iter->tr->allocated_snapshot)
+		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
+	else
+		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
+
+	seq_puts(m, "# Snapshot commands:\n");
+	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
+		show_snapshot_main_help(m);
+	else
+		show_snapshot_percpu_help(m);
+}
+
+static int tracing_snapshot_open(struct inode *inode, struct file *file)
+{
+	struct trace_array *tr = inode->i_private;
+	struct trace_iterator *iter;
+	struct seq_file *m;
+	int ret;
+
+	ret = tracing_check_open_get_tr(tr);
+	if (ret)
+		return ret;
+
+	if (file->f_mode & FMODE_READ) {
+		iter = __tracing_open(inode, file, true);
+		if (IS_ERR(iter))
+			ret = PTR_ERR(iter);
+	} else {
+		/* Writes still need the seq_file to hold the private data */
+		ret = -ENOMEM;
+		m = kzalloc_obj(*m);
+		if (!m)
+			goto out;
+		iter = kzalloc_obj(*iter);
+		if (!iter) {
+			kfree(m);
+			goto out;
+		}
+		ret = 0;
+
+		iter->tr = tr;
+		iter->array_buffer = &tr->snapshot_buffer;
+		iter->cpu_file = tracing_get_cpu(inode);
+		m->private = iter;
+		file->private_data = m;
+	}
+out:
+	if (ret < 0)
+		trace_array_put(tr);
+
+	return ret;
+}
+
+static void tracing_swap_cpu_buffer(void *tr)
+{
+	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
+}
+
+static ssize_t
+tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
+		       loff_t *ppos)
+{
+	struct seq_file *m = filp->private_data;
+	struct trace_iterator *iter = m->private;
+	struct trace_array *tr = iter->tr;
+	unsigned long val;
+	int ret;
+
+	ret = tracing_update_buffers(tr);
+	if (ret < 0)
+		return ret;
+
+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+	if (ret)
+		return ret;
+
+	guard(mutex)(&trace_types_lock);
+
+	if (tracer_uses_snapshot(tr->current_trace))
+		return -EBUSY;
+
+	local_irq_disable();
+	arch_spin_lock(&tr->max_lock);
+	if (tr->cond_snapshot)
+		ret = -EBUSY;
+	arch_spin_unlock(&tr->max_lock);
+	local_irq_enable();
+	if (ret)
+		return ret;
+
+	switch (val) {
+	case 0:
+		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
+			return -EINVAL;
+		if (tr->allocated_snapshot)
+			free_snapshot(tr);
+		break;
+	case 1:
+/* Only allow per-cpu swap if the ring buffer supports it */
+#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
+		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
+			return -EINVAL;
+#endif
+		if (tr->allocated_snapshot)
+			ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
+					&tr->array_buffer, iter->cpu_file);
+
+		ret = tracing_arm_snapshot_locked(tr);
+		if (ret)
+			return ret;
+
+		/* Now, we're going to swap */
+		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
+			local_irq_disable();
+			update_max_tr(tr, current, smp_processor_id(), NULL);
+			local_irq_enable();
+		} else {
+			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
+						 (void *)tr, 1);
+		}
+		tracing_disarm_snapshot(tr);
+		break;
+	default:
+		if (tr->allocated_snapshot) {
+			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
+				tracing_reset_online_cpus(&tr->snapshot_buffer);
+			else
+				tracing_reset_cpu(&tr->snapshot_buffer, iter->cpu_file);
+		}
+		break;
+	}
+
+	if (ret >= 0) {
+		*ppos += cnt;
+		ret = cnt;
+	}
+
+	return ret;
+}
+
+static int tracing_snapshot_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = file->private_data;
+	int ret;
+
+	ret = tracing_release(inode, file);
+
+	if (file->f_mode & FMODE_READ)
+		return ret;
+
+	/* If write only, the seq_file is just a stub */
+	if (m)
+		kfree(m->private);
+	kfree(m);
+
+	return 0;
+}
+
+static int snapshot_raw_open(struct inode *inode, struct file *filp)
+{
+	struct ftrace_buffer_info *info;
+	int ret;
+
+	/* The following checks for tracefs lockdown */
+	ret = tracing_buffers_open(inode, filp);
+	if (ret < 0)
+		return ret;
+
+	info = filp->private_data;
+
+	if (tracer_uses_snapshot(info->iter.trace)) {
+		tracing_buffers_release(inode, filp);
+		return -EBUSY;
+	}
+
+	info->iter.snapshot = true;
+	info->iter.array_buffer = &info->iter.tr->snapshot_buffer;
+
+	return ret;
+}
+
+const struct file_operations snapshot_fops = {
+	.open		= tracing_snapshot_open,
+	.read		= seq_read,
+	.write		= tracing_snapshot_write,
+	.llseek		= tracing_lseek,
+	.release	= tracing_snapshot_release,
+};
+
+const struct file_operations snapshot_raw_fops = {
+	.open		= snapshot_raw_open,
+	.read		= tracing_buffers_read,
+	.release	= tracing_buffers_release,
+	.splice_read	= tracing_buffers_splice_read,
+};
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+static ssize_t
+tracing_max_lat_read(struct file *filp, char __user *ubuf,
+		     size_t cnt, loff_t *ppos)
+{
+	struct trace_array *tr = filp->private_data;
+
+	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
+}
+
+static ssize_t
+tracing_max_lat_write(struct file *filp, const char __user *ubuf,
+		      size_t cnt, loff_t *ppos)
+{
+	struct trace_array *tr = filp->private_data;
+
+	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
+}
+
+static const struct file_operations tracing_max_lat_fops = {
+	.open		= tracing_open_generic_tr,
+	.read		= tracing_max_lat_read,
+	.write		= tracing_max_lat_write,
+	.llseek		= generic_file_llseek,
+	.release	= tracing_release_generic_tr,
+};
+#endif /* CONFIG_TRACER_MAX_TRACE */
+
+int get_snapshot_map(struct trace_array *tr)
+{
+	int err = 0;
+
+	/*
+	 * Called with mmap_lock held. lockdep would be unhappy if we would now
+	 * take trace_types_lock. Instead use the specific
+	 * snapshot_trigger_lock.
+	 */
+	spin_lock(&tr->snapshot_trigger_lock);
+
+	if (tr->snapshot || tr->mapped == UINT_MAX)
+		err = -EBUSY;
+	else
+		tr->mapped++;
+
+	spin_unlock(&tr->snapshot_trigger_lock);
+
+	/* Wait for update_max_tr() to observe iter->tr->mapped */
+	if (tr->mapped == 1)
+		synchronize_rcu();
+
+	return err;
+
+}
+
+void put_snapshot_map(struct trace_array *tr)
+{
+	spin_lock(&tr->snapshot_trigger_lock);
+	if (!WARN_ON(!tr->mapped))
+		tr->mapped--;
+	spin_unlock(&tr->snapshot_trigger_lock);
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+static void
+ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
+		struct trace_array *tr, struct ftrace_probe_ops *ops,
+		void *data)
+{
+	tracing_snapshot_instance(tr);
+}
+
+static void
+ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
+		      struct trace_array *tr, struct ftrace_probe_ops *ops,
+		      void *data)
+{
+	struct ftrace_func_mapper *mapper = data;
+	long *count = NULL;
+
+	if (mapper)
+		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
+
+	if (count) {
+
+		if (*count <= 0)
+			return;
+
+		(*count)--;
+	}
+
+	tracing_snapshot_instance(tr);
+}
+
+static int
+ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
+		      struct ftrace_probe_ops *ops, void *data)
+{
+	struct ftrace_func_mapper *mapper = data;
+	long *count = NULL;
+
+	seq_printf(m, "%ps:", (void *)ip);
+
+	seq_puts(m, "snapshot");
+
+	if (mapper)
+		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
+
+	if (count)
+		seq_printf(m, ":count=%ld\n", *count);
+	else
+		seq_puts(m, ":unlimited\n");
+
+	return 0;
+}
+
+static int
+ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
+		     unsigned long ip, void *init_data, void **data)
+{
+	struct ftrace_func_mapper *mapper = *data;
+
+	if (!mapper) {
+		mapper = allocate_ftrace_func_mapper();
+		if (!mapper)
+			return -ENOMEM;
+		*data = mapper;
+	}
+
+	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
+}
+
+static void
+ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
+		     unsigned long ip, void *data)
+{
+	struct ftrace_func_mapper *mapper = data;
+
+	if (!ip) {
+		if (!mapper)
+			return;
+		free_ftrace_func_mapper(mapper, NULL);
+		return;
+	}
+
+	ftrace_func_mapper_remove_ip(mapper, ip);
+}
+
+static struct ftrace_probe_ops snapshot_probe_ops = {
+	.func			= ftrace_snapshot,
+	.print			= ftrace_snapshot_print,
+};
+
+static struct ftrace_probe_ops snapshot_count_probe_ops = {
+	.func			= ftrace_count_snapshot,
+	.print			= ftrace_snapshot_print,
+	.init			= ftrace_snapshot_init,
+	.free			= ftrace_snapshot_free,
+};
+
+static int
+ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
+			       char *glob, char *cmd, char *param, int enable)
+{
+	struct ftrace_probe_ops *ops;
+	void *count = (void *)-1;
+	char *number;
+	int ret;
+
+	if (!tr)
+		return -ENODEV;
+
+	/* hash funcs only work with set_ftrace_filter */
+	if (!enable)
+		return -EINVAL;
+
+	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
+
+	if (glob[0] == '!') {
+		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
+		if (!ret)
+			tracing_disarm_snapshot(tr);
+
+		return ret;
+	}
+
+	if (!param)
+		goto out_reg;
+
+	number = strsep(&param, ":");
+
+	if (!strlen(number))
+		goto out_reg;
+
+	/*
+	 * We use the callback data field (which is a pointer)
+	 * as our counter.
+	 */
+	ret = kstrtoul(number, 0, (unsigned long *)&count);
+	if (ret)
+		return ret;
+
+ out_reg:
+	ret = tracing_arm_snapshot(tr);
+	if (ret < 0)
+		return ret;
+
+	ret = register_ftrace_function_probe(glob, tr, ops, count);
+	if (ret < 0)
+		tracing_disarm_snapshot(tr);
+
+	return ret < 0 ? ret : 0;
+}
+
+static struct ftrace_func_command ftrace_snapshot_cmd = {
+	.name			= "snapshot",
+	.func			= ftrace_trace_snapshot_callback,
+};
+
+__init int register_snapshot_cmd(void)
+{
+	return register_ftrace_command(&ftrace_snapshot_cmd);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+int trace_allocate_snapshot(struct trace_array *tr, int size)
+{
+	int ret;
+
+	/* Fix mapped buffer trace arrays do not have snapshot buffers */
+	if (tr->range_addr_start)
+		return 0;
+
+	/* allocate_snapshot can only be true during system boot */
+	ret = allocate_trace_buffer(tr, &tr->snapshot_buffer,
+				    allocate_snapshot ? size : 1);
+	if (ret < 0)
+		return -ENOMEM;
+
+	tr->allocated_snapshot = allocate_snapshot;
+
+	allocate_snapshot = false;
+	return 0;
+}
+
+__init static bool tr_needs_alloc_snapshot(const char *name)
+{
+	char *test;
+	int len = strlen(name);
+	bool ret;
+
+	if (!boot_snapshot_index)
+		return false;
+
+	if (strncmp(name, boot_snapshot_info, len) == 0 &&
+	    boot_snapshot_info[len] == '\t')
+		return true;
+
+	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
+	if (!test)
+		return false;
+
+	sprintf(test, "\t%s\t", name);
+	ret = strstr(boot_snapshot_info, test) == NULL;
+	kfree(test);
+	return ret;
+}
+
+__init void do_allocate_snapshot(const char *name)
+{
+	if (!tr_needs_alloc_snapshot(name))
+		return;
+
+	/*
+	 * When allocate_snapshot is set, the next call to
+	 * allocate_trace_buffers() (called by trace_array_get_by_name())
+	 * will allocate the snapshot buffer. That will also clear
+	 * this flag.
+	 */
+	allocate_snapshot = true;
+}
+
+void __init ftrace_boot_snapshot(void)
+{
+	struct trace_array *tr;
+
+	if (!snapshot_at_boot)
+		return;
+
+	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+		if (!tr->allocated_snapshot)
+			continue;
+
+		tracing_snapshot_instance(tr);
+		trace_array_puts(tr, "** Boot snapshot taken **\n");
+	}
+}
-- 
cgit v1.2.3


From 5dc9cf835aba73c882348aa4f99be83b6e45ad9b Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Date: Thu, 26 Mar 2026 12:42:30 +0100
Subject: vdso/timens: Move functions to new file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As a preparation of the untangling of time namespaces and the vDSO, move
the glue functions between those subsystems into a new file.

While at it, switch the mutex lock and mmap_read_lock() in the vDSO
namespace code to guard().

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260326-vdso-timens-decoupling-v2-1-c82693a7775f@linutronix.de
---
 MAINTAINERS                      |   2 +
 include/linux/time_namespace.h   |   8 ---
 kernel/time/Makefile             |   2 +-
 kernel/time/namespace.c          | 124 ++-------------------------------
 kernel/time/namespace_internal.h |  13 ++++
 kernel/time/namespace_vdso.c     | 146 +++++++++++++++++++++++++++++++++++++++
 lib/vdso/datastore.c             |  25 -------
 7 files changed, 166 insertions(+), 154 deletions(-)
 create mode 100644 kernel/time/namespace_internal.h
 create mode 100644 kernel/time/namespace_vdso.c

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 77fdfcb55f06..6ad74a5196d1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10768,6 +10768,7 @@ S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso
 F:	include/asm-generic/vdso/vsyscall.h
 F:	include/vdso/
+F:	kernel/time/namespace_vdso.c
 F:	kernel/time/vsyscall.c
 F:	lib/vdso/
 F:	tools/testing/selftests/vDSO/
@@ -21000,6 +21001,7 @@ F:	include/trace/events/timer*
 F:	kernel/time/itimer.c
 F:	kernel/time/posix-*
 F:	kernel/time/namespace.c
+F:	kernel/time/namespace_vdso.c
 
 POWER MANAGEMENT CORE
 M:	"Rafael J. Wysocki" <rafael@kernel.org>
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index c514d0e5a45c..0421bf1b13d7 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -38,8 +38,6 @@ static inline struct time_namespace *to_time_ns(struct ns_common *ns)
 	return container_of(ns, struct time_namespace, ns);
 }
 void __init time_ns_init(void);
-extern int vdso_join_timens(struct task_struct *task,
-			    struct time_namespace *ns);
 extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns);
 
 static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
@@ -117,12 +115,6 @@ static inline void __init time_ns_init(void)
 {
 }
 
-static inline int vdso_join_timens(struct task_struct *task,
-				   struct time_namespace *ns)
-{
-	return 0;
-}
-
 static inline void timens_commit(struct task_struct *tsk,
 				 struct time_namespace *ns)
 {
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index f7d52d9543cc..662bccb3b7f9 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -29,6 +29,6 @@ endif
 obj-$(CONFIG_GENERIC_GETTIMEOFDAY)		+= vsyscall.o
 obj-$(CONFIG_DEBUG_FS)				+= timekeeping_debug.o
 obj-$(CONFIG_TEST_UDELAY)			+= test_udelay.o
-obj-$(CONFIG_TIME_NS)				+= namespace.o
+obj-$(CONFIG_TIME_NS)				+= namespace.o namespace_vdso.o
 obj-$(CONFIG_TEST_CLOCKSOURCE_WATCHDOG)		+= clocksource-wdtest.o
 obj-$(CONFIG_TIME_KUNIT_TEST)			+= time_test.o
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index 652744e00eb4..903f55a2dfb2 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -19,7 +19,7 @@
 #include <linux/err.h>
 #include <linux/mm.h>
 
-#include <vdso/datapage.h>
+#include "namespace_internal.h"
 
 ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
 				struct timens_offsets *ns_offsets)
@@ -138,117 +138,7 @@ struct time_namespace *copy_time_ns(u64 flags,
 	return clone_time_ns(user_ns, old_ns);
 }
 
-static struct timens_offset offset_from_ts(struct timespec64 off)
-{
-	struct timens_offset ret;
-
-	ret.sec = off.tv_sec;
-	ret.nsec = off.tv_nsec;
-
-	return ret;
-}
-
-/*
- * A time namespace VVAR page has the same layout as the VVAR page which
- * contains the system wide VDSO data.
- *
- * For a normal task the VVAR pages are installed in the normal ordering:
- *     VVAR
- *     PVCLOCK
- *     HVCLOCK
- *     TIMENS   <- Not really required
- *
- * Now for a timens task the pages are installed in the following order:
- *     TIMENS
- *     PVCLOCK
- *     HVCLOCK
- *     VVAR
- *
- * The check for vdso_clock->clock_mode is in the unlikely path of
- * the seq begin magic. So for the non-timens case most of the time
- * 'seq' is even, so the branch is not taken.
- *
- * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
- * for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the
- * update to finish and for 'seq' to become even anyway.
- *
- * Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which
- * enforces the time namespace handling path.
- */
-static void timens_setup_vdso_clock_data(struct vdso_clock *vc,
-					 struct time_namespace *ns)
-{
-	struct timens_offset *offset = vc->offset;
-	struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
-	struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
-
-	vc->seq				= 1;
-	vc->clock_mode			= VDSO_CLOCKMODE_TIMENS;
-	offset[CLOCK_MONOTONIC]		= monotonic;
-	offset[CLOCK_MONOTONIC_RAW]	= monotonic;
-	offset[CLOCK_MONOTONIC_COARSE]	= monotonic;
-	offset[CLOCK_BOOTTIME]		= boottime;
-	offset[CLOCK_BOOTTIME_ALARM]	= boottime;
-}
-
-struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
-	if (likely(vma->vm_mm == current->mm))
-		return current->nsproxy->time_ns->vvar_page;
-
-	/*
-	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
-	 * through interfaces like /proc/$pid/mem or
-	 * process_vm_{readv,writev}() as long as there's no .access()
-	 * in special_mapping_vmops().
-	 * For more details check_vma_flags() and __access_remote_vm()
-	 */
-
-	WARN(1, "vvar_page accessed remotely");
-
-	return NULL;
-}
-
-/*
- * Protects possibly multiple offsets writers racing each other
- * and tasks entering the namespace.
- */
-static DEFINE_MUTEX(offset_lock);
-
-static void timens_set_vvar_page(struct task_struct *task,
-				struct time_namespace *ns)
-{
-	struct vdso_time_data *vdata;
-	struct vdso_clock *vc;
-	unsigned int i;
-
-	if (ns == &init_time_ns)
-		return;
-
-	/* Fast-path, taken by every task in namespace except the first. */
-	if (likely(ns->frozen_offsets))
-		return;
-
-	mutex_lock(&offset_lock);
-	/* Nothing to-do: vvar_page has been already initialized. */
-	if (ns->frozen_offsets)
-		goto out;
-
-	ns->frozen_offsets = true;
-	vdata = page_address(ns->vvar_page);
-	vc = vdata->clock_data;
-
-	for (i = 0; i < CS_BASES; i++)
-		timens_setup_vdso_clock_data(&vc[i], ns);
-
-	if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) {
-		for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++)
-			timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns);
-	}
-
-out:
-	mutex_unlock(&offset_lock);
-}
+DEFINE_MUTEX(timens_offset_lock);
 
 void free_time_ns(struct time_namespace *ns)
 {
@@ -298,12 +188,6 @@ static void timens_put(struct ns_common *ns)
 	put_time_ns(to_time_ns(ns));
 }
 
-void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
-{
-	timens_set_vvar_page(tsk, ns);
-	vdso_join_timens(tsk, ns);
-}
-
 static int timens_install(struct nsset *nsset, struct ns_common *new)
 {
 	struct nsproxy *nsproxy = nsset->nsproxy;
@@ -428,7 +312,7 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p,
 			goto out;
 	}
 
-	mutex_lock(&offset_lock);
+	mutex_lock(&timens_offset_lock);
 	if (time_ns->frozen_offsets) {
 		err = -EACCES;
 		goto out_unlock;
@@ -453,7 +337,7 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p,
 	}
 
 out_unlock:
-	mutex_unlock(&offset_lock);
+	mutex_unlock(&timens_offset_lock);
 out:
 	put_time_ns(time_ns);
 
diff --git a/kernel/time/namespace_internal.h b/kernel/time/namespace_internal.h
new file mode 100644
index 000000000000..e85da11abb4d
--- /dev/null
+++ b/kernel/time/namespace_internal.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TIME_NAMESPACE_INTERNAL_H
+#define _TIME_NAMESPACE_INTERNAL_H
+
+#include <linux/mutex.h>
+
+/*
+ * Protects possibly multiple offsets writers racing each other
+ * and tasks entering the namespace.
+ */
+extern struct mutex timens_offset_lock;
+
+#endif /* _TIME_NAMESPACE_INTERNAL_H */
diff --git a/kernel/time/namespace_vdso.c b/kernel/time/namespace_vdso.c
new file mode 100644
index 000000000000..0e154f901501
--- /dev/null
+++ b/kernel/time/namespace_vdso.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author: Andrei Vagin <avagin@openvz.org>
+ * Author: Dmitry Safonov <dima@arista.com>
+ */
+
+#include <linux/cleanup.h>
+#include <linux/mm.h>
+#include <linux/time_namespace.h>
+#include <linux/time.h>
+#include <linux/vdso_datastore.h>
+
+#include <vdso/clocksource.h>
+#include <vdso/datapage.h>
+
+#include "namespace_internal.h"
+
+static struct timens_offset offset_from_ts(struct timespec64 off)
+{
+	struct timens_offset ret;
+
+	ret.sec = off.tv_sec;
+	ret.nsec = off.tv_nsec;
+
+	return ret;
+}
+
+/*
+ * A time namespace VVAR page has the same layout as the VVAR page which
+ * contains the system wide VDSO data.
+ *
+ * For a normal task the VVAR pages are installed in the normal ordering:
+ *     VVAR
+ *     PVCLOCK
+ *     HVCLOCK
+ *     TIMENS   <- Not really required
+ *
+ * Now for a timens task the pages are installed in the following order:
+ *     TIMENS
+ *     PVCLOCK
+ *     HVCLOCK
+ *     VVAR
+ *
+ * The check for vdso_clock->clock_mode is in the unlikely path of
+ * the seq begin magic. So for the non-timens case most of the time
+ * 'seq' is even, so the branch is not taken.
+ *
+ * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
+ * for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the
+ * update to finish and for 'seq' to become even anyway.
+ *
+ * Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which
+ * enforces the time namespace handling path.
+ */
+static void timens_setup_vdso_clock_data(struct vdso_clock *vc,
+					 struct time_namespace *ns)
+{
+	struct timens_offset *offset = vc->offset;
+	struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
+	struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
+
+	vc->seq				= 1;
+	vc->clock_mode			= VDSO_CLOCKMODE_TIMENS;
+	offset[CLOCK_MONOTONIC]		= monotonic;
+	offset[CLOCK_MONOTONIC_RAW]	= monotonic;
+	offset[CLOCK_MONOTONIC_COARSE]	= monotonic;
+	offset[CLOCK_BOOTTIME]		= boottime;
+	offset[CLOCK_BOOTTIME_ALARM]	= boottime;
+}
+
+struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	if (likely(vma->vm_mm == current->mm))
+		return current->nsproxy->time_ns->vvar_page;
+
+	/*
+	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
+	 * through interfaces like /proc/$pid/mem or
+	 * process_vm_{readv,writev}() as long as there's no .access()
+	 * in special_mapping_vmops().
+	 * For more details check_vma_flags() and __access_remote_vm()
+	 */
+
+	WARN(1, "vvar_page accessed remotely");
+
+	return NULL;
+}
+
+static void timens_set_vvar_page(struct task_struct *task,
+				struct time_namespace *ns)
+{
+	struct vdso_time_data *vdata;
+	struct vdso_clock *vc;
+	unsigned int i;
+
+	if (ns == &init_time_ns)
+		return;
+
+	/* Fast-path, taken by every task in namespace except the first. */
+	if (likely(ns->frozen_offsets))
+		return;
+
+	guard(mutex)(&timens_offset_lock);
+	/* Nothing to-do: vvar_page has been already initialized. */
+	if (ns->frozen_offsets)
+		return;
+
+	ns->frozen_offsets = true;
+	vdata = page_address(ns->vvar_page);
+	vc = vdata->clock_data;
+
+	for (i = 0; i < CS_BASES; i++)
+		timens_setup_vdso_clock_data(&vc[i], ns);
+
+	if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) {
+		for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++)
+			timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns);
+	}
+}
+
+/*
+ * The vvar page layout depends on whether a task belongs to the root or
+ * non-root time namespace. Whenever a task changes its namespace, the VVAR
+ * page tables are cleared and then they will be re-faulted with a
+ * corresponding layout.
+ * See also the comment near timens_setup_vdso_clock_data() for details.
+ */
+static int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+	struct mm_struct *mm = task->mm;
+	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
+
+	guard(mmap_read_lock)(mm);
+	for_each_vma(vmi, vma) {
+		if (vma_is_special_mapping(vma, &vdso_vvar_mapping))
+			zap_vma_pages(vma);
+	}
+	return 0;
+}
+
+void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
+{
+	timens_set_vvar_page(tsk, ns);
+	vdso_join_timens(tsk, ns);
+}
diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c
index faebf5b7cd6e..cf5d784a4a5a 100644
--- a/lib/vdso/datastore.c
+++ b/lib/vdso/datastore.c
@@ -132,28 +132,3 @@ struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned
 					VM_MIXEDMAP | VM_SEALED_SYSMAP,
 					&vdso_vvar_mapping);
 }
-
-#ifdef CONFIG_TIME_NS
-/*
- * The vvar page layout depends on whether a task belongs to the root or
- * non-root time namespace. Whenever a task changes its namespace, the VVAR
- * page tables are cleared and then they will be re-faulted with a
- * corresponding layout.
- * See also the comment near timens_setup_vdso_clock_data() for details.
- */
-int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
-{
-	struct mm_struct *mm = task->mm;
-	struct vm_area_struct *vma;
-	VMA_ITERATOR(vmi, mm, 0);
-
-	mmap_read_lock(mm);
-	for_each_vma(vmi, vma) {
-		if (vma_is_special_mapping(vma, &vdso_vvar_mapping))
-			zap_vma_pages(vma);
-	}
-	mmap_read_unlock(mm);
-
-	return 0;
-}
-#endif
-- 
cgit v1.2.3


From 1b6c89285d37114d7efe8ab04102a542581cd7da Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Date: Thu, 26 Mar 2026 12:42:31 +0100
Subject: timens: Remove dependency on the vDSO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, missing time namespace support in the vDSO meant that time
namespaces needed to be disabled globally. This was expressed in a hard
dependency on the generic vDSO library. This also meant that architectures
without any vDSO or only a stub vDSO could not enable time namespaces.
Now that all architectures using a real vDSO are using the generic library,
that dependency is not necessary anymore.

Remove the dependency and let all architectures enable time namespaces.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260326-vdso-timens-decoupling-v2-2-c82693a7775f@linutronix.de
---
 include/linux/time_namespace.h   | 28 ++++++++++++++++------------
 init/Kconfig                     |  4 +++-
 kernel/time/Makefile             |  3 ++-
 kernel/time/namespace.c          |  8 ++++----
 kernel/time/namespace_internal.h | 15 +++++++++++++++
 kernel/time/namespace_vdso.c     | 14 ++++++++++++++
 6 files changed, 54 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index 0421bf1b13d7..c1de21a27c34 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -25,7 +25,9 @@ struct time_namespace {
 	struct ucounts		*ucounts;
 	struct ns_common	ns;
 	struct timens_offsets	offsets;
+#ifdef CONFIG_TIME_NS_VDSO
 	struct page		*vvar_page;
+#endif
 	/* If set prevents changing offsets after any task joined namespace. */
 	bool			frozen_offsets;
 } __randomize_layout;
@@ -38,7 +40,6 @@ static inline struct time_namespace *to_time_ns(struct ns_common *ns)
 	return container_of(ns, struct time_namespace, ns);
 }
 void __init time_ns_init(void);
-extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns);
 
 static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
 {
@@ -51,7 +52,6 @@ struct time_namespace *copy_time_ns(u64 flags,
 				    struct time_namespace *old_ns);
 void free_time_ns(struct time_namespace *ns);
 void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
-struct page *find_timens_vvar_page(struct vm_area_struct *vma);
 
 static inline void put_time_ns(struct time_namespace *ns)
 {
@@ -115,11 +115,6 @@ static inline void __init time_ns_init(void)
 {
 }
 
-static inline void timens_commit(struct task_struct *tsk,
-				 struct time_namespace *ns)
-{
-}
-
 static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
 {
 	return NULL;
@@ -146,11 +141,6 @@ static inline void timens_on_fork(struct nsproxy *nsproxy,
 	return;
 }
 
-static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
-	return NULL;
-}
-
 static inline void timens_add_monotonic(struct timespec64 *ts) { }
 static inline void timens_add_boottime(struct timespec64 *ts) { }
 
@@ -167,4 +157,18 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
 }
 #endif
 
+#ifdef CONFIG_TIME_NS_VDSO
+extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns);
+struct page *find_timens_vvar_page(struct vm_area_struct *vma);
+#else /* !CONFIG_TIME_NS_VDSO */
+static inline void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
+{
+}
+
+static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	return NULL;
+}
+#endif /* CONFIG_TIME_NS_VDSO */
+
 #endif /* _LINUX_TIMENS_H */
diff --git a/init/Kconfig b/init/Kconfig
index 444ce811ea67..5e710b03a27a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1386,12 +1386,14 @@ config UTS_NS
 
 config TIME_NS
 	bool "TIME namespace"
-	depends on GENERIC_GETTIMEOFDAY
 	default y
 	help
 	  In this namespace boottime and monotonic clocks can be set.
 	  The time will keep going with the same pace.
 
+config TIME_NS_VDSO
+	def_bool TIME_NS && GENERIC_GETTIMEOFDAY
+
 config IPC_NS
 	bool "IPC namespace"
 	depends on (SYSVIPC || POSIX_MQUEUE)
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 662bccb3b7f9..eaf290c972f9 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -29,6 +29,7 @@ endif
 obj-$(CONFIG_GENERIC_GETTIMEOFDAY)		+= vsyscall.o
 obj-$(CONFIG_DEBUG_FS)				+= timekeeping_debug.o
 obj-$(CONFIG_TEST_UDELAY)			+= test_udelay.o
-obj-$(CONFIG_TIME_NS)				+= namespace.o namespace_vdso.o
+obj-$(CONFIG_TIME_NS)				+= namespace.o
+obj-$(CONFIG_TIME_NS_VDSO)			+= namespace_vdso.o
 obj-$(CONFIG_TEST_CLOCKSOURCE_WATCHDOG)		+= clocksource-wdtest.o
 obj-$(CONFIG_TIME_KUNIT_TEST)			+= time_test.o
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index 903f55a2dfb2..42302cc3f3fb 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -93,8 +93,8 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
 	if (!ns)
 		goto fail_dec;
 
-	ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
-	if (!ns->vvar_page)
+	err = timens_vdso_alloc_vvar_page(ns);
+	if (err)
 		goto fail_free;
 
 	err = ns_common_init(ns);
@@ -109,7 +109,7 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
 	return ns;
 
 fail_free_page:
-	__free_page(ns->vvar_page);
+	timens_vdso_free_vvar_page(ns);
 fail_free:
 	kfree(ns);
 fail_dec:
@@ -146,7 +146,7 @@ void free_time_ns(struct time_namespace *ns)
 	dec_time_namespaces(ns->ucounts);
 	put_user_ns(ns->user_ns);
 	ns_common_free(ns);
-	__free_page(ns->vvar_page);
+	timens_vdso_free_vvar_page(ns);
 	/* Concurrent nstree traversal depends on a grace period. */
 	kfree_rcu(ns, ns.ns_rcu);
 }
diff --git a/kernel/time/namespace_internal.h b/kernel/time/namespace_internal.h
index e85da11abb4d..b37ba179f43b 100644
--- a/kernel/time/namespace_internal.h
+++ b/kernel/time/namespace_internal.h
@@ -4,10 +4,25 @@
 
 #include <linux/mutex.h>
 
+struct time_namespace;
+
 /*
  * Protects possibly multiple offsets writers racing each other
  * and tasks entering the namespace.
  */
 extern struct mutex timens_offset_lock;
 
+#ifdef CONFIG_TIME_NS_VDSO
+int timens_vdso_alloc_vvar_page(struct time_namespace *ns);
+void timens_vdso_free_vvar_page(struct time_namespace *ns);
+#else /* !CONFIG_TIME_NS_VDSO */
+static inline int timens_vdso_alloc_vvar_page(struct time_namespace *ns)
+{
+	return 0;
+}
+static inline void timens_vdso_free_vvar_page(struct time_namespace *ns)
+{
+}
+#endif /* CONFIG_TIME_NS_VDSO */
+
 #endif /* _TIME_NAMESPACE_INTERNAL_H */
diff --git a/kernel/time/namespace_vdso.c b/kernel/time/namespace_vdso.c
index 0e154f901501..88c075cd16a3 100644
--- a/kernel/time/namespace_vdso.c
+++ b/kernel/time/namespace_vdso.c
@@ -144,3 +144,17 @@ void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
 	timens_set_vvar_page(tsk, ns);
 	vdso_join_timens(tsk, ns);
 }
+
+int timens_vdso_alloc_vvar_page(struct time_namespace *ns)
+{
+	ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+	if (!ns->vvar_page)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void timens_vdso_free_vvar_page(struct time_namespace *ns)
+{
+	__free_page(ns->vvar_page);
+}
-- 
cgit v1.2.3


From abdd23c8849d45c6bdef0ab6facbbc63bddebbe1 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Wed, 25 Mar 2026 10:00:17 +0100
Subject: of: reserved_mem: remove fdt node from the structure

FDT node is not needed for anything besides the initialization, so it can
be simply passed as an argument to the reserved memory region init
function.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://patch.msgid.link/20260325090023.3175348-2-m.szyprowski@samsung.com
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 drivers/memory/tegra/tegra210-emc-table.c |  3 ++-
 drivers/of/of_reserved_mem.c              | 25 +++++++++++--------------
 include/linux/of_reserved_mem.h           |  4 ++--
 kernel/dma/coherent.c                     |  4 +---
 kernel/dma/contiguous.c                   |  3 +--
 kernel/dma/swiotlb.c                      |  5 ++---
 6 files changed, 19 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/memory/tegra/tegra210-emc-table.c b/drivers/memory/tegra/tegra210-emc-table.c
index 34a8785d2861..ac1d1e13482a 100644
--- a/drivers/memory/tegra/tegra210-emc-table.c
+++ b/drivers/memory/tegra/tegra210-emc-table.c
@@ -75,7 +75,8 @@ static const struct reserved_mem_ops tegra210_emc_table_ops = {
 	.device_release = tegra210_emc_table_device_release,
 };
 
-static int tegra210_emc_table_init(struct reserved_mem *rmem)
+static int tegra210_emc_table_init(unsigned long node,
+				   struct reserved_mem *rmem)
 {
 	pr_debug("Tegra210 EMC table at %pa, size %lu bytes\n", &rmem->base,
 		 (unsigned long)rmem->size);
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 1fd28f805610..6705b7afebf0 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -104,7 +104,8 @@ static void __init alloc_reserved_mem_array(void)
 	reserved_mem = new_array;
 }
 
-static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem);
+static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem,
+					      unsigned long node);
 /*
  * fdt_reserved_mem_save_node() - save fdt node for second pass initialization
  */
@@ -118,13 +119,12 @@ static void __init fdt_reserved_mem_save_node(unsigned long node, const char *un
 		return;
 	}
 
-	rmem->fdt_node = node;
 	rmem->name = uname;
 	rmem->base = base;
 	rmem->size = size;
 
 	/* Call the region specific initialization function */
-	fdt_init_reserved_mem_node(rmem);
+	fdt_init_reserved_mem_node(rmem, node);
 
 	reserved_mem_count++;
 }
@@ -483,7 +483,8 @@ static const struct of_device_id __rmem_of_table_sentinel
 /*
  * __reserved_mem_init_node() - call region specific reserved memory init code
  */
-static int __init __reserved_mem_init_node(struct reserved_mem *rmem)
+static int __init __reserved_mem_init_node(struct reserved_mem *rmem,
+					   unsigned long node)
 {
 	extern const struct of_device_id __reservedmem_of_table[];
 	const struct of_device_id *i;
@@ -493,10 +494,10 @@ static int __init __reserved_mem_init_node(struct reserved_mem *rmem)
 		reservedmem_of_init_fn initfn = i->data;
 		const char *compat = i->compatible;
 
-		if (!of_flat_dt_is_compatible(rmem->fdt_node, compat))
+		if (!of_flat_dt_is_compatible(node, compat))
 			continue;
 
-		ret = initfn(rmem);
+		ret = initfn(node, rmem);
 		if (ret == 0) {
 			pr_info("initialized node %s, compatible id %s\n",
 				rmem->name, compat);
@@ -526,11 +527,6 @@ static int __init __rmem_cmp(const void *a, const void *b)
 	if (ra->size > rb->size)
 		return 1;
 
-	if (ra->fdt_node < rb->fdt_node)
-		return -1;
-	if (ra->fdt_node > rb->fdt_node)
-		return 1;
-
 	return 0;
 }
 
@@ -564,19 +560,20 @@ static void __init __rmem_check_for_overlap(void)
 /**
  * fdt_init_reserved_mem_node() - Initialize a reserved memory region
  * @rmem: reserved_mem struct of the memory region to be initialized.
+ * @node: fdt node of the initialized region
  *
  * This function is used to call the region specific initialization
  * function for a reserved memory region.
  */
-static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem)
+static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem,
+					      unsigned long node)
 {
-	unsigned long node = rmem->fdt_node;
 	int err = 0;
 	bool nomap;
 
 	nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
 
-	err = __reserved_mem_init_node(rmem);
+	err = __reserved_mem_init_node(rmem, node);
 	if (err != 0 && err != -ENOENT) {
 		pr_info("node %s compatible matching fail\n", rmem->name);
 		if (nomap)
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index f573423359f4..5159938bfe03 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -11,7 +11,6 @@ struct resource;
 
 struct reserved_mem {
 	const char			*name;
-	unsigned long			fdt_node;
 	const struct reserved_mem_ops	*ops;
 	phys_addr_t			base;
 	phys_addr_t			size;
@@ -25,7 +24,8 @@ struct reserved_mem_ops {
 				  struct device *dev);
 };
 
-typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
+typedef int (*reservedmem_of_init_fn)(unsigned long node,
+				      struct reserved_mem *rmem);
 
 #ifdef CONFIG_OF_RESERVED_MEM
 
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 1147497bc512..34621acbd3c5 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -367,10 +367,8 @@ static const struct reserved_mem_ops rmem_dma_ops = {
 	.device_release	= rmem_dma_device_release,
 };
 
-static int __init rmem_dma_setup(struct reserved_mem *rmem)
+static int __init rmem_dma_setup(unsigned long node, struct reserved_mem *rmem)
 {
-	unsigned long node = rmem->fdt_node;
-
 	if (of_get_flat_dt_prop(node, "reusable", NULL))
 		return -EINVAL;
 
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index c56004d314dc..81a2fa4971ee 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -475,9 +475,8 @@ static const struct reserved_mem_ops rmem_cma_ops = {
 	.device_release = rmem_cma_device_release,
 };
 
-static int __init rmem_cma_setup(struct reserved_mem *rmem)
+static int __init rmem_cma_setup(unsigned long node, struct reserved_mem *rmem)
 {
-	unsigned long node = rmem->fdt_node;
 	bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
 	struct cma *cma;
 	int err;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d8e6f1d889d5..f3a12e15a951 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -1860,10 +1860,9 @@ static const struct reserved_mem_ops rmem_swiotlb_ops = {
 	.device_release = rmem_swiotlb_device_release,
 };
 
-static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
+static int __init rmem_swiotlb_setup(unsigned long node,
+				     struct reserved_mem *rmem)
 {
-	unsigned long node = rmem->fdt_node;
-
 	if (of_get_flat_dt_prop(node, "reusable", NULL) ||
 	    of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
 	    of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
-- 
cgit v1.2.3


From c640cad6a5382ea08a4e052156cfefc8021c51b7 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Wed, 25 Mar 2026 10:00:19 +0100
Subject: of: reserved_mem: switch to ops based OF_DECLARE()

Move init function from OF_DECLARE() argument to the given reserved
memory region ops structure and then pass that structure to the
OF_DECLARE() initializer. This node_init callback is mandatory for the
reserved mem driver. Such change makes it possible in the future to add
more functions called by the generic code before given memory region is
initialized and rmem object is created.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://patch.msgid.link/20260325090023.3175348-4-m.szyprowski@samsung.com
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 drivers/memory/tegra/tegra210-emc-table.c | 16 ++++++++--------
 drivers/of/of_reserved_mem.c              | 17 +++++++++++++----
 include/linux/of_reserved_mem.h           | 13 ++++++-------
 kernel/dma/coherent.c                     | 13 +++++++------
 kernel/dma/contiguous.c                   | 15 ++++++++-------
 kernel/dma/swiotlb.c                      | 14 +++++++-------
 6 files changed, 49 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/memory/tegra/tegra210-emc-table.c b/drivers/memory/tegra/tegra210-emc-table.c
index ac1d1e13482a..4b3c478b2743 100644
--- a/drivers/memory/tegra/tegra210-emc-table.c
+++ b/drivers/memory/tegra/tegra210-emc-table.c
@@ -70,20 +70,20 @@ static void tegra210_emc_table_device_release(struct reserved_mem *rmem,
 	memunmap(timings);
 }
 
-static const struct reserved_mem_ops tegra210_emc_table_ops = {
-	.device_init = tegra210_emc_table_device_init,
-	.device_release = tegra210_emc_table_device_release,
-};
-
 static int tegra210_emc_table_init(unsigned long node,
 				   struct reserved_mem *rmem)
 {
 	pr_debug("Tegra210 EMC table at %pa, size %lu bytes\n", &rmem->base,
 		 (unsigned long)rmem->size);
 
-	rmem->ops = &tegra210_emc_table_ops;
-
 	return 0;
 }
+
+static const struct reserved_mem_ops tegra210_emc_table_ops = {
+	.node_init = tegra210_emc_table_init,
+	.device_init = tegra210_emc_table_device_init,
+	.device_release = tegra210_emc_table_device_release,
+};
+
 RESERVEDMEM_OF_DECLARE(tegra210_emc_table, "nvidia,tegra210-emc-table",
-		       tegra210_emc_table_init);
+		       &tegra210_emc_table_ops);
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 9aff460a0420..4dd0d6f6a4b0 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -480,8 +480,16 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam
 static const struct of_device_id __rmem_of_table_sentinel
 	__used __section("__reservedmem_of_table_end");
 
-/*
- * __reserved_mem_init_node() - call region specific reserved memory init code
+/**
+ * __reserved_mem_init_node() - initialize a reserved memory region
+ * @rmem: reserved_mem structure to initialize
+ * @node: FDT node describing the reserved memory region
+ *
+ * This function iterates through the reserved memory drivers and calls the
+ * node_init callback for the compatible entry matching the node. On success,
+ * the operations pointer is stored in the reserved_mem structure.
+ *
+ * Return: 0 on success, -ENODEV if no compatible match found
  */
 static int __init __reserved_mem_init_node(struct reserved_mem *rmem,
 					   unsigned long node)
@@ -492,14 +500,15 @@ static int __init __reserved_mem_init_node(struct reserved_mem *rmem,
 
 	for (i = __reservedmem_of_table; ret == -ENODEV &&
 	     i < &__rmem_of_table_sentinel; i++) {
-		reservedmem_of_init_fn initfn = i->data;
+		const struct reserved_mem_ops *ops = i->data;
 		const char *compat = i->compatible;
 
 		if (!of_flat_dt_is_compatible(node, compat))
 			continue;
 
-		ret = initfn(node, rmem);
+		ret = ops->node_init(node, rmem);
 		if (ret == 0) {
+			rmem->ops = ops;
 			pr_info("initialized node %s, compatible id %s\n",
 				rmem->name, compat);
 			break;
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index 5159938bfe03..747a1e73d5dd 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -18,19 +18,17 @@ struct reserved_mem {
 };
 
 struct reserved_mem_ops {
+	int	(*node_init)(unsigned long fdt_node, struct reserved_mem *rmem);
 	int	(*device_init)(struct reserved_mem *rmem,
 			       struct device *dev);
 	void	(*device_release)(struct reserved_mem *rmem,
 				  struct device *dev);
 };
 
-typedef int (*reservedmem_of_init_fn)(unsigned long node,
-				      struct reserved_mem *rmem);
-
 #ifdef CONFIG_OF_RESERVED_MEM
 
-#define RESERVEDMEM_OF_DECLARE(name, compat, init)			\
-	_OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn)
+#define RESERVEDMEM_OF_DECLARE(name, compat, ops)			\
+	_OF_DECLARE(reservedmem, name, compat, ops, struct reserved_mem_ops *)
 
 int of_reserved_mem_device_init_by_idx(struct device *dev,
 				       struct device_node *np, int idx);
@@ -48,8 +46,9 @@ int of_reserved_mem_region_count(const struct device_node *np);
 
 #else
 
-#define RESERVEDMEM_OF_DECLARE(name, compat, init)			\
-	_OF_DECLARE_STUB(reservedmem, name, compat, init, reservedmem_of_init_fn)
+#define RESERVEDMEM_OF_DECLARE(name, compat, ops)			\
+	_OF_DECLARE_STUB(reservedmem, name, compat, ops,		\
+			 struct reserved_mem_ops *)
 
 static inline int of_reserved_mem_device_init_by_idx(struct device *dev,
 					struct device_node *np, int idx)
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 64f9ba618e19..bcdc0f76d2e8 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -362,10 +362,6 @@ static void rmem_dma_device_release(struct reserved_mem *rmem,
 		dev->dma_mem = NULL;
 }
 
-static const struct reserved_mem_ops rmem_dma_ops = {
-	.device_init	= rmem_dma_device_init,
-	.device_release	= rmem_dma_device_release,
-};
 
 static int __init rmem_dma_setup(unsigned long node, struct reserved_mem *rmem)
 {
@@ -388,7 +384,6 @@ static int __init rmem_dma_setup(unsigned long node, struct reserved_mem *rmem)
 	}
 #endif
 
-	rmem->ops = &rmem_dma_ops;
 	pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n",
 		&rmem->base, (unsigned long)rmem->size / SZ_1M);
 	return 0;
@@ -405,5 +400,11 @@ static int __init dma_init_reserved_memory(void)
 core_initcall(dma_init_reserved_memory);
 #endif /* CONFIG_DMA_GLOBAL_POOL */
 
-RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup);
+static const struct reserved_mem_ops rmem_dma_ops = {
+	.node_init	= rmem_dma_setup,
+	.device_init	= rmem_dma_device_init,
+	.device_release	= rmem_dma_device_release,
+};
+
+RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", &rmem_dma_ops);
 #endif
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index e6fc6906b5c0..efeebda92537 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -470,11 +470,6 @@ static void rmem_cma_device_release(struct reserved_mem *rmem,
 	dev->cma_area = NULL;
 }
 
-static const struct reserved_mem_ops rmem_cma_ops = {
-	.device_init	= rmem_cma_device_init,
-	.device_release = rmem_cma_device_release,
-};
-
 static int __init rmem_cma_setup(unsigned long node, struct reserved_mem *rmem)
 {
 	bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
@@ -499,7 +494,6 @@ static int __init rmem_cma_setup(unsigned long node, struct reserved_mem *rmem)
 	if (default_cma)
 		dma_contiguous_default_area = cma;
 
-	rmem->ops = &rmem_cma_ops;
 	rmem->priv = cma;
 
 	pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n",
@@ -511,5 +505,12 @@ static int __init rmem_cma_setup(unsigned long node, struct reserved_mem *rmem)
 
 	return 0;
 }
-RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup);
+
+static const struct reserved_mem_ops rmem_cma_ops = {
+	.node_init	= rmem_cma_setup,
+	.device_init	= rmem_cma_device_init,
+	.device_release = rmem_cma_device_release,
+};
+
+RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", &rmem_cma_ops);
 #endif
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index f3a12e15a951..44b566d20e04 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -1855,11 +1855,6 @@ static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
 	dev->dma_io_tlb_mem = &io_tlb_default_mem;
 }
 
-static const struct reserved_mem_ops rmem_swiotlb_ops = {
-	.device_init = rmem_swiotlb_device_init,
-	.device_release = rmem_swiotlb_device_release,
-};
-
 static int __init rmem_swiotlb_setup(unsigned long node,
 				     struct reserved_mem *rmem)
 {
@@ -1869,11 +1864,16 @@ static int __init rmem_swiotlb_setup(unsigned long node,
 	    of_get_flat_dt_prop(node, "no-map", NULL))
 		return -EINVAL;
 
-	rmem->ops = &rmem_swiotlb_ops;
 	pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n",
 		&rmem->base, (unsigned long)rmem->size / SZ_1M);
 	return 0;
 }
 
-RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
+static const struct reserved_mem_ops rmem_swiotlb_ops = {
+	.node_init = rmem_swiotlb_setup,
+	.device_init = rmem_swiotlb_device_init,
+	.device_release = rmem_swiotlb_device_release,
+};
+
+RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", &rmem_swiotlb_ops);
 #endif /* CONFIG_DMA_RESTRICTED_POOL */
-- 
cgit v1.2.3


From 7fd3981202b9aef8862aa06ca0d75496c0f9681f Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Wed, 25 Mar 2026 10:00:20 +0100
Subject: of: reserved_mem: replace CMA quirks by generic methods

Add optional reserved memory callbacks to perform region verification and
early fixup, then move all CMA related code in of_reserved_mem.c to them.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://patch.msgid.link/20260325090023.3175348-5-m.szyprowski@samsung.com
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 drivers/of/of_reserved_mem.c    | 118 ++++++++++++++++++++++++++++------------
 include/linux/cma.h             |  10 ----
 include/linux/dma-map-ops.h     |   3 -
 include/linux/of_reserved_mem.h |   3 +
 kernel/dma/contiguous.c         |  70 +++++++++++++++++-------
 5 files changed, 137 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 4dd0d6f6a4b0..5dd585bcf8a8 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -24,8 +24,6 @@
 #include <linux/slab.h>
 #include <linux/memblock.h>
 #include <linux/kmemleak.h>
-#include <linux/cma.h>
-#include <linux/dma-map-ops.h>
 
 #include "of_private.h"
 
@@ -106,6 +104,11 @@ static void __init alloc_reserved_mem_array(void)
 
 static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem,
 					      unsigned long node);
+static int fdt_validate_reserved_mem_node(unsigned long node,
+					  phys_addr_t *align);
+static int fdt_fixup_reserved_mem_node(unsigned long node,
+				       phys_addr_t base, phys_addr_t size);
+
 /*
  * fdt_reserved_mem_save_node() - save fdt node for second pass initialization
  */
@@ -154,21 +157,19 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
 					     const char *uname)
 {
 	phys_addr_t base, size;
-	int i, len;
+	int i, len, err;
 	const __be32 *prop;
-	bool nomap, default_cma;
+	bool nomap;
 
 	prop = of_flat_dt_get_addr_size_prop(node, "reg", &len);
 	if (!prop)
 		return -ENOENT;
 
 	nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
-	default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
 
-	if (default_cma && cma_skip_dt_default_reserved_mem()) {
-		pr_err("Skipping dt linux,cma-default for \"cma=\" kernel param.\n");
-		return -EINVAL;
-	}
+	err = fdt_validate_reserved_mem_node(node, NULL);
+	if (err && err != -ENODEV)
+		return err;
 
 	for (i = 0; i < len; i++) {
 		u64 b, s;
@@ -179,10 +180,7 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
 		size = s;
 
 		if (size && early_init_dt_reserve_memory(base, size, nomap) == 0) {
-			/* Architecture specific contiguous memory fixup. */
-			if (of_flat_dt_is_compatible(node, "shared-dma-pool") &&
-			    of_get_flat_dt_prop(node, "reusable", NULL))
-				dma_contiguous_early_fixup(base, size);
+			fdt_fixup_reserved_mem_node(node, base, size);
 			pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
 				uname, &base, (unsigned long)(size / SZ_1M));
 		} else {
@@ -253,17 +251,19 @@ void __init fdt_scan_reserved_mem_reg_nodes(void)
 
 	fdt_for_each_subnode(child, fdt, node) {
 		const char *uname;
-		bool default_cma = of_get_flat_dt_prop(child, "linux,cma-default", NULL);
 		u64 b, s;
+		int ret;
 
 		if (!of_fdt_device_is_available(fdt, child))
 			continue;
-		if (default_cma && cma_skip_dt_default_reserved_mem())
-			continue;
 
 		if (!of_flat_dt_get_addr_size(child, "reg", &b, &s))
 			continue;
 
+		ret = fdt_validate_reserved_mem_node(child, NULL);
+		if (ret && ret != -ENODEV)
+			continue;
+
 		base = b;
 		size = s;
 
@@ -397,7 +397,7 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam
 	phys_addr_t base = 0, align = 0, size;
 	int i, len;
 	const __be32 *prop;
-	bool nomap, default_cma;
+	bool nomap;
 	int ret;
 
 	prop = of_get_flat_dt_prop(node, "size", &len);
@@ -421,19 +421,10 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam
 	}
 
 	nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
-	default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
-
-	if (default_cma && cma_skip_dt_default_reserved_mem()) {
-		pr_err("Skipping dt linux,cma-default for \"cma=\" kernel param.\n");
-		return -EINVAL;
-	}
 
-	/* Need adjust the alignment to satisfy the CMA requirement */
-	if (IS_ENABLED(CONFIG_CMA)
-	    && of_flat_dt_is_compatible(node, "shared-dma-pool")
-	    && of_get_flat_dt_prop(node, "reusable", NULL)
-	    && !nomap)
-		align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES);
+	ret = fdt_validate_reserved_mem_node(node, &align);
+	if (ret && ret != -ENODEV)
+		return ret;
 
 	prop = of_flat_dt_get_addr_size_prop(node, "alloc-ranges", &len);
 	if (prop) {
@@ -468,18 +459,76 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam
 		       uname, (unsigned long)(size / SZ_1M));
 		return -ENOMEM;
 	}
-	/* Architecture specific contiguous memory fixup. */
-	if (of_flat_dt_is_compatible(node, "shared-dma-pool") &&
-	    of_get_flat_dt_prop(node, "reusable", NULL))
-		dma_contiguous_early_fixup(base, size);
+
+	fdt_fixup_reserved_mem_node(node, base, size);
+
 	/* Save region in the reserved_mem array */
 	fdt_reserved_mem_save_node(node, uname, base, size);
 	return 0;
 }
 
+extern const struct of_device_id __reservedmem_of_table[];
 static const struct of_device_id __rmem_of_table_sentinel
 	__used __section("__reservedmem_of_table_end");
 
+/**
+ * fdt_fixup_reserved_mem_node() - call fixup function for a reserved memory node
+ * @node: FDT node to fixup
+ * @base: base address of the reserved memory region
+ * @size: size of the reserved memory region
+ *
+ * This function iterates through the reserved memory drivers and calls
+ * the node_fixup callback for the compatible entry matching the node.
+ *
+ * Return: 0 on success, -ENODEV if no compatible match found
+ */
+static int __init fdt_fixup_reserved_mem_node(unsigned long node,
+					phys_addr_t base, phys_addr_t size)
+{
+	const struct of_device_id *i;
+	int ret = -ENODEV;
+
+	for (i = __reservedmem_of_table; ret == -ENODEV &&
+	     i < &__rmem_of_table_sentinel; i++) {
+		const struct reserved_mem_ops *ops = i->data;
+
+		if (!of_flat_dt_is_compatible(node, i->compatible))
+			continue;
+
+		if (ops->node_fixup)
+			ret = ops->node_fixup(node, base, size);
+	}
+	return ret;
+}
+
+/**
+ * fdt_validate_reserved_mem_node() - validate a reserved memory node
+ * @node: FDT node to validate
+ * @align: pointer to store the validated alignment (may be modified by callback)
+ *
+ * This function iterates through the reserved memory drivers and calls
+ * the node_validate callback for the compatible entry matching the node.
+ *
+ * Return: 0 on success, -ENODEV if no compatible match found
+ */
+static int __init fdt_validate_reserved_mem_node(unsigned long node, phys_addr_t *align)
+{
+	const struct of_device_id *i;
+	int ret = -ENODEV;
+
+	for (i = __reservedmem_of_table; ret == -ENODEV &&
+	     i < &__rmem_of_table_sentinel; i++) {
+		const struct reserved_mem_ops *ops = i->data;
+
+		if (!of_flat_dt_is_compatible(node, i->compatible))
+			continue;
+
+		if (ops->node_validate)
+			ret = ops->node_validate(node, align);
+	}
+	return ret;
+}
+
 /**
  * __reserved_mem_init_node() - initialize a reserved memory region
  * @rmem: reserved_mem structure to initialize
@@ -494,7 +543,6 @@ static const struct of_device_id __rmem_of_table_sentinel
 static int __init __reserved_mem_init_node(struct reserved_mem *rmem,
 					   unsigned long node)
 {
-	extern const struct of_device_id __reservedmem_of_table[];
 	const struct of_device_id *i;
 	int ret = -ENODEV;
 
@@ -511,7 +559,7 @@ static int __init __reserved_mem_init_node(struct reserved_mem *rmem,
 			rmem->ops = ops;
 			pr_info("initialized node %s, compatible id %s\n",
 				rmem->name, compat);
-			break;
+			return ret;
 		}
 	}
 	return ret;
diff --git a/include/linux/cma.h b/include/linux/cma.h
index d0793eaaadaa..8555d38a97b1 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -61,14 +61,4 @@ extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data)
 extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end);
 
 extern void cma_reserve_pages_on_error(struct cma *cma);
-
-#ifdef CONFIG_DMA_CMA
-extern bool cma_skip_dt_default_reserved_mem(void);
-#else
-static inline bool cma_skip_dt_default_reserved_mem(void)
-{
-	return false;
-}
-#endif
-
 #endif
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 60b63756df82..55ecd2934225 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -147,9 +147,6 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page,
 {
 	__free_pages(page, get_order(size));
 }
-static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
-{
-}
 #endif /* CONFIG_DMA_CMA*/
 
 #ifdef CONFIG_DMA_DECLARE_COHERENT
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index 747a1e73d5dd..e8b20b29fa68 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -18,6 +18,9 @@ struct reserved_mem {
 };
 
 struct reserved_mem_ops {
+	int	(*node_validate)(unsigned long fdt_node, phys_addr_t *align);
+	int	(*node_fixup)(unsigned long fdt_node, phys_addr_t base,
+			      phys_addr_t size);
 	int	(*node_init)(unsigned long fdt_node, struct reserved_mem *rmem);
 	int	(*device_init)(struct reserved_mem *rmem,
 			       struct device *dev);
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index efeebda92537..65d216663e81 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -91,16 +91,6 @@ static int __init early_cma(char *p)
 }
 early_param("cma", early_cma);
 
-/*
- * cma_skip_dt_default_reserved_mem - This is called from the
- * reserved_mem framework to detect if the default cma region is being
- * set by the "cma=" kernel parameter.
- */
-bool __init cma_skip_dt_default_reserved_mem(void)
-{
-	return size_cmdline != -1;
-}
-
 #ifdef CONFIG_DMA_NUMA_CMA
 
 static struct cma *dma_contiguous_numa_area[MAX_NUMNODES];
@@ -470,25 +460,65 @@ static void rmem_cma_device_release(struct reserved_mem *rmem,
 	dev->cma_area = NULL;
 }
 
+static int __init __rmem_cma_verify_node(unsigned long node)
+{
+	if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
+	    of_get_flat_dt_prop(node, "no-map", NULL))
+		return -ENODEV;
+
+	if (size_cmdline != -1 &&
+	    of_get_flat_dt_prop(node, "linux,cma-default", NULL)) {
+		pr_err("Skipping dt linux,cma-default node in favor for \"cma=\" kernel param.\n");
+		return -EBUSY;
+	}
+	return 0;
+}
+
+static int __init rmem_cma_validate(unsigned long node, phys_addr_t *align)
+{
+	int ret = __rmem_cma_verify_node(node);
+
+	if (ret)
+		return ret;
+
+	if (align)
+		*align = max_t(phys_addr_t, *align, CMA_MIN_ALIGNMENT_BYTES);
+
+	return 0;
+}
+
+static int __init rmem_cma_fixup(unsigned long node, phys_addr_t base,
+				    phys_addr_t size)
+{
+	int ret = __rmem_cma_verify_node(node);
+
+	if (ret)
+		return ret;
+
+	/* Architecture specific contiguous memory fixup. */
+	dma_contiguous_early_fixup(base, size);
+	return 0;
+}
+
 static int __init rmem_cma_setup(unsigned long node, struct reserved_mem *rmem)
 {
 	bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
 	struct cma *cma;
-	int err;
+	int ret;
 
-	if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
-	    of_get_flat_dt_prop(node, "no-map", NULL))
-		return -ENODEV;
+	ret = __rmem_cma_verify_node(node);
+	if (ret)
+		return ret;
 
 	if (!IS_ALIGNED(rmem->base | rmem->size, CMA_MIN_ALIGNMENT_BYTES)) {
 		pr_err("Reserved memory: incorrect alignment of CMA region\n");
 		return -EINVAL;
 	}
 
-	err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma);
-	if (err) {
+	ret = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma);
+	if (ret) {
 		pr_err("Reserved memory: unable to setup CMA region\n");
-		return err;
+		return ret;
 	}
 
 	if (default_cma)
@@ -499,14 +529,16 @@ static int __init rmem_cma_setup(unsigned long node, struct reserved_mem *rmem)
 	pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n",
 		&rmem->base, (unsigned long)rmem->size / SZ_1M);
 
-	err = dma_heap_cma_register_heap(cma);
-	if (err)
+	ret = dma_heap_cma_register_heap(cma);
+	if (ret)
 		pr_warn("Couldn't register CMA heap.");
 
 	return 0;
 }
 
 static const struct reserved_mem_ops rmem_cma_ops = {
+	.node_validate  = rmem_cma_validate,
+	.node_fixup	= rmem_cma_fixup,
 	.node_init	= rmem_cma_setup,
 	.device_init	= rmem_cma_device_init,
 	.device_release = rmem_cma_device_release,
-- 
cgit v1.2.3


From edfaa81d5da5fbfe3c73fece3ca0417a04cc4ba2 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Date: Tue, 24 Mar 2026 18:56:24 +0200
Subject: resource: Add __resource_contains_unbound() for internal contains
 checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

__find_resource_space() currently uses resource_contains() but for
tentative resources that are not yet crafted into the resource tree. As
resource_contains() checks that IORESOURCE_UNSET is not set for either of
the resources, the caller has to hack around this problem by clearing the
IORESOURCE_UNSET flag (essentially lying to resource_contains()).

Instead of the hack, introduce __resource_contains_unbound() for cases like
this.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Xifer <xiferdev@gmail.com>
Link: https://patch.msgid.link/20260324165633.4583-2-ilpo.jarvinen@linux.intel.com
---
 include/linux/ioport.h | 20 +++++++++++++++++---
 kernel/resource.c      |  4 ++--
 2 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 5533a5debf3f..19d5e04564d9 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -304,14 +304,28 @@ static inline unsigned long resource_ext_type(const struct resource *res)
 {
 	return res->flags & IORESOURCE_EXT_TYPE_BITS;
 }
-/* True iff r1 completely contains r2 */
-static inline bool resource_contains(const struct resource *r1, const struct resource *r2)
+
+/*
+ * For checking if @r1 completely contains @r2 for resources that have real
+ * addresses but are not yet crafted into the resource tree. Normally
+ * resource_contains() should be used instead of this function as it checks
+ * also IORESOURCE_UNSET flag.
+ */
+static inline bool __resource_contains_unbound(const struct resource *r1,
+					       const struct resource *r2)
 {
 	if (resource_type(r1) != resource_type(r2))
 		return false;
+
+	return r1->start <= r2->start && r1->end >= r2->end;
+}
+/* True iff r1 completely contains r2 */
+static inline bool resource_contains(const struct resource *r1, const struct resource *r2)
+{
 	if (r1->flags & IORESOURCE_UNSET || r2->flags & IORESOURCE_UNSET)
 		return false;
-	return r1->start <= r2->start && r1->end >= r2->end;
+
+	return __resource_contains_unbound(r1, r2);
 }
 
 /* True if any part of r1 overlaps r2 */
diff --git a/kernel/resource.c b/kernel/resource.c
index bb966699da31..1e2f1dfc0edd 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -754,7 +754,7 @@ static int __find_resource_space(struct resource *root, struct resource *old,
 		/* Check for overflow after ALIGN() */
 		avail.start = ALIGN(tmp.start, constraint->align);
 		avail.end = tmp.end;
-		avail.flags = new->flags & ~IORESOURCE_UNSET;
+		avail.flags = new->flags;
 		if (avail.start >= tmp.start) {
 			alloc.flags = avail.flags;
 			if (alignf) {
@@ -765,7 +765,7 @@ static int __find_resource_space(struct resource *root, struct resource *old,
 			}
 			alloc.end = alloc.start + size - 1;
 			if (alloc.start <= alloc.end &&
-			    resource_contains(&avail, &alloc)) {
+			    __resource_contains_unbound(&avail, &alloc)) {
 				new->start = alloc.start;
 				new->end = alloc.end;
 				return 0;
-- 
cgit v1.2.3


From f72e77c33e4b5657af35125e75bab249256030f3 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Tue, 17 Mar 2026 09:01:20 -0700
Subject: device property: Make modifications of fwnode "flags" thread safe

In various places in the kernel, we modify the fwnode "flags" member
by doing either:
  fwnode->flags |= SOME_FLAG;
  fwnode->flags &= ~SOME_FLAG;

This type of modification is not thread-safe. If two threads are both
mucking with the flags at the same time then one can clobber the
other.

While flags are often modified while under the "fwnode_link_lock",
this is not universally true.

Create some accessor functions for setting, clearing, and testing the
FWNODE flags and move all users to these accessor functions. New
accessor functions use set_bit() and clear_bit(), which are
thread-safe.

Cc: stable@vger.kernel.org
Fixes: c2c724c868c4 ("driver core: Add fw_devlink_parse_fwtree()")
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Rafael J. Wysocki (Intel) <rafael@kernel.org>
Reviewed-by: Saravana Kannan <saravanak@kernel.org>
Link: https://patch.msgid.link/20260317090112.v2.1.I0a4d03104ecd5103df3d76f66c8d21b1d15a2e38@changeid
[ Fix fwnode_clear_flag() argument alignment, restore dropped blank
  line in fwnode_dev_initialized(), and remove unnecessary parentheses
  around fwnode_test_flag() calls. - Danilo ]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 drivers/base/core.c                 | 24 ++++++++++----------
 drivers/bus/imx-weim.c              |  2 +-
 drivers/i2c/i2c-core-of.c           |  2 +-
 drivers/net/phy/mdio_bus_provider.c |  4 ++--
 drivers/of/base.c                   |  2 +-
 drivers/of/dynamic.c                |  2 +-
 drivers/of/platform.c               |  2 +-
 drivers/spi/spi.c                   |  2 +-
 include/linux/fwnode.h              | 44 +++++++++++++++++++++++++++----------
 9 files changed, 53 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 5933852df837..763e17e9f148 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -182,7 +182,7 @@ void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode)
 	if (fwnode->dev)
 		return;
 
-	fwnode->flags |= FWNODE_FLAG_NOT_DEVICE;
+	fwnode_set_flag(fwnode, FWNODE_FLAG_NOT_DEVICE);
 	fwnode_links_purge_consumers(fwnode);
 
 	fwnode_for_each_available_child_node(fwnode, child)
@@ -228,7 +228,7 @@ static void __fw_devlink_pickup_dangling_consumers(struct fwnode_handle *fwnode,
 	if (fwnode->dev && fwnode->dev->bus)
 		return;
 
-	fwnode->flags |= FWNODE_FLAG_NOT_DEVICE;
+	fwnode_set_flag(fwnode, FWNODE_FLAG_NOT_DEVICE);
 	__fwnode_links_move_consumers(fwnode, new_sup);
 
 	fwnode_for_each_available_child_node(fwnode, child)
@@ -1012,7 +1012,7 @@ static void device_links_missing_supplier(struct device *dev)
 static bool dev_is_best_effort(struct device *dev)
 {
 	return (fw_devlink_best_effort && dev->can_match) ||
-		(dev->fwnode && (dev->fwnode->flags & FWNODE_FLAG_BEST_EFFORT));
+		(dev->fwnode && fwnode_test_flag(dev->fwnode, FWNODE_FLAG_BEST_EFFORT));
 }
 
 static struct fwnode_handle *fwnode_links_check_suppliers(
@@ -1723,11 +1723,11 @@ bool fw_devlink_is_strict(void)
 
 static void fw_devlink_parse_fwnode(struct fwnode_handle *fwnode)
 {
-	if (fwnode->flags & FWNODE_FLAG_LINKS_ADDED)
+	if (fwnode_test_flag(fwnode, FWNODE_FLAG_LINKS_ADDED))
 		return;
 
 	fwnode_call_int_op(fwnode, add_links);
-	fwnode->flags |= FWNODE_FLAG_LINKS_ADDED;
+	fwnode_set_flag(fwnode, FWNODE_FLAG_LINKS_ADDED);
 }
 
 static void fw_devlink_parse_fwtree(struct fwnode_handle *fwnode)
@@ -1885,7 +1885,7 @@ static bool fwnode_init_without_drv(struct fwnode_handle *fwnode)
 	struct device *dev;
 	bool ret;
 
-	if (!(fwnode->flags & FWNODE_FLAG_INITIALIZED))
+	if (!fwnode_test_flag(fwnode, FWNODE_FLAG_INITIALIZED))
 		return false;
 
 	dev = get_dev_from_fwnode(fwnode);
@@ -2001,10 +2001,10 @@ static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle,
 	 * We aren't trying to find all cycles. Just a cycle between con and
 	 * sup_handle.
 	 */
-	if (sup_handle->flags & FWNODE_FLAG_VISITED)
+	if (fwnode_test_flag(sup_handle, FWNODE_FLAG_VISITED))
 		return false;
 
-	sup_handle->flags |= FWNODE_FLAG_VISITED;
+	fwnode_set_flag(sup_handle, FWNODE_FLAG_VISITED);
 
 	/* Termination condition. */
 	if (sup_handle == con_handle) {
@@ -2074,7 +2074,7 @@ static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle,
 	}
 
 out:
-	sup_handle->flags &= ~FWNODE_FLAG_VISITED;
+	fwnode_clear_flag(sup_handle, FWNODE_FLAG_VISITED);
 	put_device(sup_dev);
 	put_device(con_dev);
 	put_device(par_dev);
@@ -2127,7 +2127,7 @@ static int fw_devlink_create_devlink(struct device *con,
 	 * When such a flag is set, we can't create device links where P is the
 	 * supplier of C as that would delay the probe of C.
 	 */
-	if (sup_handle->flags & FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD &&
+	if (fwnode_test_flag(sup_handle, FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD) &&
 	    fwnode_is_ancestor_of(sup_handle, con->fwnode))
 		return -EINVAL;
 
@@ -2150,7 +2150,7 @@ static int fw_devlink_create_devlink(struct device *con,
 	else
 		flags = FW_DEVLINK_FLAGS_PERMISSIVE;
 
-	if (sup_handle->flags & FWNODE_FLAG_NOT_DEVICE)
+	if (fwnode_test_flag(sup_handle, FWNODE_FLAG_NOT_DEVICE))
 		sup_dev = fwnode_get_next_parent_dev(sup_handle);
 	else
 		sup_dev = get_dev_from_fwnode(sup_handle);
@@ -2162,7 +2162,7 @@ static int fw_devlink_create_devlink(struct device *con,
 		 * supplier device indefinitely.
 		 */
 		if (sup_dev->links.status == DL_DEV_NO_DRIVER &&
-		    sup_handle->flags & FWNODE_FLAG_INITIALIZED) {
+		    fwnode_test_flag(sup_handle, FWNODE_FLAG_INITIALIZED)) {
 			dev_dbg(con,
 				"Not linking %pfwf - dev might never probe\n",
 				sup_handle);
diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c
index 83d623d97f5f..f735e0462c55 100644
--- a/drivers/bus/imx-weim.c
+++ b/drivers/bus/imx-weim.c
@@ -332,7 +332,7 @@ static int of_weim_notify(struct notifier_block *nb, unsigned long action,
 			 * fw_devlink doesn't skip adding consumers to this
 			 * device.
 			 */
-			rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE;
+			fwnode_clear_flag(&rd->dn->fwnode, FWNODE_FLAG_NOT_DEVICE);
 			if (!of_platform_device_create(rd->dn, NULL, &pdev->dev)) {
 				dev_err(&pdev->dev,
 					"Failed to create child device '%pOF'\n",
diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c
index eb7fb202355f..354a88d0599e 100644
--- a/drivers/i2c/i2c-core-of.c
+++ b/drivers/i2c/i2c-core-of.c
@@ -180,7 +180,7 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action,
 		 * Clear the flag before adding the device so that fw_devlink
 		 * doesn't skip adding consumers to this device.
 		 */
-		rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE;
+		fwnode_clear_flag(&rd->dn->fwnode, FWNODE_FLAG_NOT_DEVICE);
 		client = of_i2c_register_device(adap, rd->dn);
 		if (IS_ERR(client)) {
 			dev_err(&adap->dev, "failed to create client for '%pOF'\n",
diff --git a/drivers/net/phy/mdio_bus_provider.c b/drivers/net/phy/mdio_bus_provider.c
index 4b0637405740..fd691c5424ea 100644
--- a/drivers/net/phy/mdio_bus_provider.c
+++ b/drivers/net/phy/mdio_bus_provider.c
@@ -294,8 +294,8 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
 		return -EINVAL;
 
 	if (bus->parent && bus->parent->of_node)
-		bus->parent->of_node->fwnode.flags |=
-					FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD;
+		fwnode_set_flag(&bus->parent->of_node->fwnode,
+				FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD);
 
 	WARN(bus->state != MDIOBUS_ALLOCATED &&
 	     bus->state != MDIOBUS_UNREGISTERED,
diff --git a/drivers/of/base.c b/drivers/of/base.c
index bf4a51887d74..180dbce65b98 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1943,7 +1943,7 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align))
 		if (name)
 			of_stdout = of_find_node_opts_by_path(name, &of_stdout_options);
 		if (of_stdout)
-			of_stdout->fwnode.flags |= FWNODE_FLAG_BEST_EFFORT;
+			fwnode_set_flag(&of_stdout->fwnode, FWNODE_FLAG_BEST_EFFORT);
 	}
 
 	if (!of_aliases)
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 1a06175def37..ade288372101 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -225,7 +225,7 @@ static void __of_attach_node(struct device_node *np)
 	np->sibling = np->parent->child;
 	np->parent->child = np;
 	of_node_clear_flag(np, OF_DETACHED);
-	np->fwnode.flags |= FWNODE_FLAG_NOT_DEVICE;
+	fwnode_set_flag(&np->fwnode, FWNODE_FLAG_NOT_DEVICE);
 
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index ba591fbceb56..7eeaf8e27b5b 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -742,7 +742,7 @@ static int of_platform_notify(struct notifier_block *nb,
 		 * Clear the flag before adding the device so that fw_devlink
 		 * doesn't skip adding consumers to this device.
 		 */
-		rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE;
+		fwnode_clear_flag(&rd->dn->fwnode, FWNODE_FLAG_NOT_DEVICE);
 		/* pdev_parent may be NULL when no bus platform device */
 		pdev_parent = of_find_device_by_node(parent);
 		pdev = of_platform_device_create(rd->dn, NULL,
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 53dee314d76a..e686ecaf3dc8 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -4937,7 +4937,7 @@ static int of_spi_notify(struct notifier_block *nb, unsigned long action,
 		 * Clear the flag before adding the device so that fw_devlink
 		 * doesn't skip adding consumers to this device.
 		 */
-		rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE;
+		fwnode_clear_flag(&rd->dn->fwnode, FWNODE_FLAG_NOT_DEVICE);
 		spi = of_register_spi_device(ctlr, rd->dn);
 		put_device(&ctlr->dev);
 
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 097be89487bf..80b38fbf2121 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -15,6 +15,7 @@
 #define _LINUX_FWNODE_H_
 
 #include <linux/bits.h>
+#include <linux/bitops.h>
 #include <linux/err.h>
 #include <linux/list.h>
 #include <linux/types.h>
@@ -42,12 +43,12 @@ struct device;
  *		suppliers. Only enforce ordering with suppliers that have
  *		drivers.
  */
-#define FWNODE_FLAG_LINKS_ADDED			BIT(0)
-#define FWNODE_FLAG_NOT_DEVICE			BIT(1)
-#define FWNODE_FLAG_INITIALIZED			BIT(2)
-#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD	BIT(3)
-#define FWNODE_FLAG_BEST_EFFORT			BIT(4)
-#define FWNODE_FLAG_VISITED			BIT(5)
+#define FWNODE_FLAG_LINKS_ADDED			0
+#define FWNODE_FLAG_NOT_DEVICE			1
+#define FWNODE_FLAG_INITIALIZED			2
+#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD	3
+#define FWNODE_FLAG_BEST_EFFORT			4
+#define FWNODE_FLAG_VISITED			5
 
 struct fwnode_handle {
 	struct fwnode_handle *secondary;
@@ -57,7 +58,7 @@ struct fwnode_handle {
 	struct device *dev;
 	struct list_head suppliers;
 	struct list_head consumers;
-	u8 flags;
+	unsigned long flags;
 };
 
 /*
@@ -212,16 +213,37 @@ static inline void fwnode_init(struct fwnode_handle *fwnode,
 	INIT_LIST_HEAD(&fwnode->suppliers);
 }
 
+static inline void fwnode_set_flag(struct fwnode_handle *fwnode,
+				   unsigned int bit)
+{
+	set_bit(bit, &fwnode->flags);
+}
+
+static inline void fwnode_clear_flag(struct fwnode_handle *fwnode,
+				     unsigned int bit)
+{
+	clear_bit(bit, &fwnode->flags);
+}
+
+static inline void fwnode_assign_flag(struct fwnode_handle *fwnode,
+				      unsigned int bit, bool value)
+{
+	assign_bit(bit, &fwnode->flags, value);
+}
+
+static inline bool fwnode_test_flag(struct fwnode_handle *fwnode,
+				    unsigned int bit)
+{
+	return test_bit(bit, &fwnode->flags);
+}
+
 static inline void fwnode_dev_initialized(struct fwnode_handle *fwnode,
 					  bool initialized)
 {
 	if (IS_ERR_OR_NULL(fwnode))
 		return;
 
-	if (initialized)
-		fwnode->flags |= FWNODE_FLAG_INITIALIZED;
-	else
-		fwnode->flags &= ~FWNODE_FLAG_INITIALIZED;
+	fwnode_assign_flag(fwnode, FWNODE_FLAG_INITIALIZED, initialized);
 }
 
 int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup,
-- 
cgit v1.2.3


From a800398e746f8c9010c626a71d92a05b708f7622 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 24 Mar 2026 10:05:40 +0000
Subject: net: stmmac: remove axi_kbbe, axi_mb and axi_rb members

axi_kbbe, axi_mb and axi_rb are all written, but nothing ever reads
their values. Remove the code that sets these and the struct members.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1w4ydo-0000000Dlpb-34jd@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c | 1 -
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 3 ---
 include/linux/stmmac.h                                | 3 ---
 3 files changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c
index d245546b90db..02c786ce5dd4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c
@@ -231,7 +231,6 @@ motorcomm_default_plat_data(struct pci_dev *pdev)
 
 	plat->axi->axi_wr_osr_lmt	= 1;
 	plat->axi->axi_rd_osr_lmt	= 1;
-	plat->axi->axi_mb		= true;
 	plat->axi->axi_blen_regval	= DMA_AXI_BLEN4 | DMA_AXI_BLEN8 |
 					  DMA_AXI_BLEN16 | DMA_AXI_BLEN32;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 545b8a3425eb..5cae2aa72906 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -109,10 +109,7 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
 
 	axi->axi_lpi_en = of_property_read_bool(np, "snps,lpi_en");
 	axi->axi_xit_frm = of_property_read_bool(np, "snps,xit_frm");
-	axi->axi_kbbe = of_property_read_bool(np, "snps,kbbe");
 	axi->axi_fb = of_property_read_bool(np, "snps,fb");
-	axi->axi_mb = of_property_read_bool(np, "snps,mb");
-	axi->axi_rb =  of_property_read_bool(np, "snps,rb");
 
 	if (of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt))
 		axi->axi_wr_osr_lmt = 1;
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 5b2bece81448..eaaee329ef9d 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -133,10 +133,7 @@ struct stmmac_axi {
 	u32 axi_blen_regval;
 	bool axi_lpi_en;
 	bool axi_xit_frm;
-	bool axi_kbbe;
 	bool axi_fb;
-	bool axi_mb;
-	bool axi_rb;
 };
 
 struct stmmac_rxq_cfg {
-- 
cgit v1.2.3


From d134feeb5df33fbf77f482f52a366a44642dba09 Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@linux.dev>
Date: Thu, 19 Mar 2026 10:29:32 +0100
Subject: printk: add print_hex_dump_devel()

Add print_hex_dump_devel() as the hex dump equivalent of pr_devel(),
which emits output only when DEBUG is enabled, but keeps call sites
compiled otherwise.

Suggested-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Reviewed-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/printk.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 63d516c873b4..54e3c621fec3 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -801,6 +801,19 @@ static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type,
 }
 #endif
 
+#if defined(DEBUG)
+#define print_hex_dump_devel(prefix_str, prefix_type, rowsize,		\
+			     groupsize, buf, len, ascii)		\
+	print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize,	\
+		       groupsize, buf, len, ascii)
+#else
+static inline void print_hex_dump_devel(const char *prefix_str, int prefix_type,
+					int rowsize, int groupsize,
+					const void *buf, size_t len, bool ascii)
+{
+}
+#endif
+
 /**
  * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params
  * @prefix_str: string to prefix each line with;
-- 
cgit v1.2.3


From 7be18a1fa00eab5283b35c13e26c6b76fcaab9ce Mon Sep 17 00:00:00 2001
From: Pavan Chebbi <pavan.chebbi@broadcom.com>
Date: Sat, 14 Mar 2026 08:16:01 -0700
Subject: fwctl/bnxt_en: Move common definitions to include/linux/bnxt/

We have common definitions that are now going to be used
by more than one component outside of bnxt (bnxt_re and
fwctl)

Move bnxt_ulp.h to include/linux/bnxt/ as ulp.h.

Link: https://patch.msgid.link/r/20260314151605.932749-2-pavan.chebbi@broadcom.com
Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Cc: linux-rdma@vger.kernel.org
Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/bnxt_re/debugfs.c           |   2 +-
 drivers/infiniband/hw/bnxt_re/main.c              |   2 +-
 drivers/infiniband/hw/bnxt_re/qplib_fp.c          |   2 +-
 drivers/infiniband/hw/bnxt_re/qplib_res.h         |   2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         |   2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c |   2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c |   2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c   |   2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c     |   2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h     | 128 ----------------------
 include/linux/bnxt/ulp.h                          | 128 ++++++++++++++++++++++
 11 files changed, 137 insertions(+), 137 deletions(-)
 delete mode 100644 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
 create mode 100644 include/linux/bnxt/ulp.h

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.c b/drivers/infiniband/hw/bnxt_re/debugfs.c
index a2ad79c3bbd0..5fed2cf66be3 100644
--- a/drivers/infiniband/hw/bnxt_re/debugfs.c
+++ b/drivers/infiniband/hw/bnxt_re/debugfs.c
@@ -10,8 +10,8 @@
 #include <linux/pci.h>
 #include <linux/seq_file.h>
 #include <rdma/ib_addr.h>
+#include <linux/bnxt/ulp.h>
 
-#include "bnxt_ulp.h"
 #include "roce_hsi.h"
 #include "qplib_res.h"
 #include "qplib_sp.h"
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index b576f05e3b26..47afccddf55e 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -55,8 +55,8 @@
 #include <rdma/ib_umem.h>
 #include <rdma/ib_addr.h>
 #include <linux/hashtable.h>
+#include <linux/bnxt/ulp.h>
 
-#include "bnxt_ulp.h"
 #include "roce_hsi.h"
 #include "qplib_res.h"
 #include "qplib_sp.h"
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 2d7932b3c492..b4c7b8f582ba 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -46,6 +46,7 @@
 #include <linux/delay.h>
 #include <linux/prefetch.h>
 #include <linux/if_ether.h>
+#include <linux/bnxt/ulp.h>
 #include <rdma/ib_mad.h>
 
 #include "roce_hsi.h"
@@ -55,7 +56,6 @@
 #include "qplib_sp.h"
 #include "qplib_fp.h"
 #include <rdma/ib_addr.h>
-#include "bnxt_ulp.h"
 #include "bnxt_re.h"
 #include "ib_verbs.h"
 
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 9a5dcf97b6f4..0a4a03efeb0b 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -39,7 +39,7 @@
 #ifndef __BNXT_QPLIB_RES_H__
 #define __BNXT_QPLIB_RES_H__
 
-#include "bnxt_ulp.h"
+#include <linux/bnxt/ulp.h>
 
 extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0751c0e4581a..ed56dba327b4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -59,10 +59,10 @@
 #include <net/netdev_rx_queue.h>
 #include <linux/pci-tph.h>
 #include <linux/bnxt/hsi.h>
+#include <linux/bnxt/ulp.h>
 
 #include "bnxt.h"
 #include "bnxt_hwrm.h"
-#include "bnxt_ulp.h"
 #include "bnxt_sriov.h"
 #include "bnxt_ethtool.h"
 #include "bnxt_dcb.h"
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 15de802bbac4..230cd95d30a2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -13,12 +13,12 @@
 #include <net/devlink.h>
 #include <net/netdev_lock.h>
 #include <linux/bnxt/hsi.h>
+#include <linux/bnxt/ulp.h>
 #include "bnxt.h"
 #include "bnxt_hwrm.h"
 #include "bnxt_vfr.h"
 #include "bnxt_devlink.h"
 #include "bnxt_ethtool.h"
-#include "bnxt_ulp.h"
 #include "bnxt_ptp.h"
 #include "bnxt_coredump.h"
 #include "bnxt_nvm_defs.h"
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 28d0ece2e7b1..c78a5b161cb5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -27,9 +27,9 @@
 #include <net/netdev_queues.h>
 #include <net/netlink.h>
 #include <linux/bnxt/hsi.h>
+#include <linux/bnxt/ulp.h>
 #include "bnxt.h"
 #include "bnxt_hwrm.h"
-#include "bnxt_ulp.h"
 #include "bnxt_xdp.h"
 #include "bnxt_ptp.h"
 #include "bnxt_ethtool.h"
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 7f9829287c49..edcc002e4ca3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -17,9 +17,9 @@
 #include <linux/etherdevice.h>
 #include <net/dcbnl.h>
 #include <linux/bnxt/hsi.h>
+#include <linux/bnxt/ulp.h>
 #include "bnxt.h"
 #include "bnxt_hwrm.h"
-#include "bnxt_ulp.h"
 #include "bnxt_sriov.h"
 #include "bnxt_vfr.h"
 #include "bnxt_ethtool.h"
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index e1e82a72cf1b..11ced44ead29 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -22,10 +22,10 @@
 #include <linux/auxiliary_bus.h>
 #include <net/netdev_lock.h>
 #include <linux/bnxt/hsi.h>
+#include <linux/bnxt/ulp.h>
 
 #include "bnxt.h"
 #include "bnxt_hwrm.h"
-#include "bnxt_ulp.h"
 
 static DEFINE_IDA(bnxt_aux_dev_ids);
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
deleted file mode 100644
index 3c5b8a53f715..000000000000
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/* Broadcom NetXtreme-C/E network driver.
- *
- * Copyright (c) 2016-2018 Broadcom Limited
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation.
- */
-
-#ifndef BNXT_ULP_H
-#define BNXT_ULP_H
-
-#define BNXT_MIN_ROCE_CP_RINGS	2
-#define BNXT_MIN_ROCE_STAT_CTXS	1
-
-#define BNXT_MAX_ROCE_MSIX_VF		2
-#define BNXT_MAX_ROCE_MSIX_NPAR_PF	5
-#define BNXT_MAX_ROCE_MSIX		64
-
-struct hwrm_async_event_cmpl;
-struct bnxt;
-
-struct bnxt_msix_entry {
-	u32	vector;
-	u32	ring_idx;
-	u32	db_offset;
-};
-
-struct bnxt_ulp_ops {
-	/* async_notifier() cannot sleep (in BH context) */
-	void (*ulp_async_notifier)(void *, struct hwrm_async_event_cmpl *);
-	void (*ulp_irq_stop)(void *, bool);
-	void (*ulp_irq_restart)(void *, struct bnxt_msix_entry *);
-};
-
-struct bnxt_fw_msg {
-	void	*msg;
-	int	msg_len;
-	void	*resp;
-	int	resp_max_len;
-	int	timeout;
-};
-
-struct bnxt_ulp {
-	void		*handle;
-	struct bnxt_ulp_ops __rcu *ulp_ops;
-	unsigned long	*async_events_bmap;
-	u16		max_async_event_id;
-	u16		msix_requested;
-};
-
-struct bnxt_en_dev {
-	struct net_device *net;
-	struct pci_dev *pdev;
-	struct bnxt_msix_entry			msix_entries[BNXT_MAX_ROCE_MSIX];
-	u32 flags;
-	#define BNXT_EN_FLAG_ROCEV1_CAP		0x1
-	#define BNXT_EN_FLAG_ROCEV2_CAP		0x2
-	#define BNXT_EN_FLAG_ROCE_CAP		(BNXT_EN_FLAG_ROCEV1_CAP | \
-						 BNXT_EN_FLAG_ROCEV2_CAP)
-	#define BNXT_EN_FLAG_ULP_STOPPED	0x8
-	#define BNXT_EN_FLAG_VF			0x10
-#define BNXT_EN_VF(edev)	((edev)->flags & BNXT_EN_FLAG_VF)
-	#define BNXT_EN_FLAG_ROCE_VF_RES_MGMT	0x20
-	#define BNXT_EN_FLAG_SW_RES_LMT		0x40
-#define BNXT_EN_SW_RES_LMT(edev) ((edev)->flags & BNXT_EN_FLAG_SW_RES_LMT)
-
-	struct bnxt_ulp			*ulp_tbl;
-	int				l2_db_size;	/* Doorbell BAR size in
-							 * bytes mapped by L2
-							 * driver.
-							 */
-	int				l2_db_size_nc;	/* Doorbell BAR size in
-							 * bytes mapped as non-
-							 * cacheable.
-							 */
-	int				l2_db_offset;	/* Doorbell offset in
-							 * bytes within
-							 * l2_db_size_nc.
-							 */
-	u16				chip_num;
-	u16				hw_ring_stats_size;
-	u16				pf_port_id;
-	unsigned long			en_state;	/* Could be checked in
-							 * RoCE driver suspend
-							 * mode only. Will be
-							 * updated in resume.
-							 */
-	void __iomem                    *bar0;
-
-	u16				ulp_num_msix_vec;
-	u16				ulp_num_ctxs;
-
-					/* serialize ulp operations */
-	struct mutex			en_dev_lock;
-};
-
-static inline bool bnxt_ulp_registered(struct bnxt_en_dev *edev)
-{
-	if (edev && rcu_access_pointer(edev->ulp_tbl->ulp_ops))
-		return true;
-	return false;
-}
-
-int bnxt_get_ulp_msix_num(struct bnxt *bp);
-int bnxt_get_ulp_msix_num_in_use(struct bnxt *bp);
-void bnxt_set_ulp_msix_num(struct bnxt *bp, int num);
-int bnxt_get_ulp_stat_ctxs(struct bnxt *bp);
-void bnxt_set_ulp_stat_ctxs(struct bnxt *bp, int num_ctxs);
-int bnxt_get_ulp_stat_ctxs_in_use(struct bnxt *bp);
-void bnxt_set_dflt_ulp_stat_ctxs(struct bnxt *bp);
-void bnxt_ulp_stop(struct bnxt *bp);
-void bnxt_ulp_start(struct bnxt *bp, int err);
-void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs);
-void bnxt_ulp_irq_stop(struct bnxt *bp);
-void bnxt_ulp_irq_restart(struct bnxt *bp, int err);
-void bnxt_ulp_async_events(struct bnxt *bp, struct hwrm_async_event_cmpl *cmpl);
-void bnxt_rdma_aux_device_uninit(struct bnxt *bp);
-void bnxt_rdma_aux_device_del(struct bnxt *bp);
-void bnxt_rdma_aux_device_add(struct bnxt *bp);
-void bnxt_rdma_aux_device_init(struct bnxt *bp);
-int bnxt_register_dev(struct bnxt_en_dev *edev, struct bnxt_ulp_ops *ulp_ops,
-		      void *handle);
-void bnxt_unregister_dev(struct bnxt_en_dev *edev);
-int bnxt_send_msg(struct bnxt_en_dev *edev, struct bnxt_fw_msg *fw_msg);
-void bnxt_register_async_events(struct bnxt_en_dev *edev,
-				unsigned long *events_bmap, u16 max_id);
-#endif
diff --git a/include/linux/bnxt/ulp.h b/include/linux/bnxt/ulp.h
new file mode 100644
index 000000000000..3c5b8a53f715
--- /dev/null
+++ b/include/linux/bnxt/ulp.h
@@ -0,0 +1,128 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2016-2018 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef BNXT_ULP_H
+#define BNXT_ULP_H
+
+#define BNXT_MIN_ROCE_CP_RINGS	2
+#define BNXT_MIN_ROCE_STAT_CTXS	1
+
+#define BNXT_MAX_ROCE_MSIX_VF		2
+#define BNXT_MAX_ROCE_MSIX_NPAR_PF	5
+#define BNXT_MAX_ROCE_MSIX		64
+
+struct hwrm_async_event_cmpl;
+struct bnxt;
+
+struct bnxt_msix_entry {
+	u32	vector;
+	u32	ring_idx;
+	u32	db_offset;
+};
+
+struct bnxt_ulp_ops {
+	/* async_notifier() cannot sleep (in BH context) */
+	void (*ulp_async_notifier)(void *, struct hwrm_async_event_cmpl *);
+	void (*ulp_irq_stop)(void *, bool);
+	void (*ulp_irq_restart)(void *, struct bnxt_msix_entry *);
+};
+
+struct bnxt_fw_msg {
+	void	*msg;
+	int	msg_len;
+	void	*resp;
+	int	resp_max_len;
+	int	timeout;
+};
+
+struct bnxt_ulp {
+	void		*handle;
+	struct bnxt_ulp_ops __rcu *ulp_ops;
+	unsigned long	*async_events_bmap;
+	u16		max_async_event_id;
+	u16		msix_requested;
+};
+
+struct bnxt_en_dev {
+	struct net_device *net;
+	struct pci_dev *pdev;
+	struct bnxt_msix_entry			msix_entries[BNXT_MAX_ROCE_MSIX];
+	u32 flags;
+	#define BNXT_EN_FLAG_ROCEV1_CAP		0x1
+	#define BNXT_EN_FLAG_ROCEV2_CAP		0x2
+	#define BNXT_EN_FLAG_ROCE_CAP		(BNXT_EN_FLAG_ROCEV1_CAP | \
+						 BNXT_EN_FLAG_ROCEV2_CAP)
+	#define BNXT_EN_FLAG_ULP_STOPPED	0x8
+	#define BNXT_EN_FLAG_VF			0x10
+#define BNXT_EN_VF(edev)	((edev)->flags & BNXT_EN_FLAG_VF)
+	#define BNXT_EN_FLAG_ROCE_VF_RES_MGMT	0x20
+	#define BNXT_EN_FLAG_SW_RES_LMT		0x40
+#define BNXT_EN_SW_RES_LMT(edev) ((edev)->flags & BNXT_EN_FLAG_SW_RES_LMT)
+
+	struct bnxt_ulp			*ulp_tbl;
+	int				l2_db_size;	/* Doorbell BAR size in
+							 * bytes mapped by L2
+							 * driver.
+							 */
+	int				l2_db_size_nc;	/* Doorbell BAR size in
+							 * bytes mapped as non-
+							 * cacheable.
+							 */
+	int				l2_db_offset;	/* Doorbell offset in
+							 * bytes within
+							 * l2_db_size_nc.
+							 */
+	u16				chip_num;
+	u16				hw_ring_stats_size;
+	u16				pf_port_id;
+	unsigned long			en_state;	/* Could be checked in
+							 * RoCE driver suspend
+							 * mode only. Will be
+							 * updated in resume.
+							 */
+	void __iomem                    *bar0;
+
+	u16				ulp_num_msix_vec;
+	u16				ulp_num_ctxs;
+
+					/* serialize ulp operations */
+	struct mutex			en_dev_lock;
+};
+
+static inline bool bnxt_ulp_registered(struct bnxt_en_dev *edev)
+{
+	if (edev && rcu_access_pointer(edev->ulp_tbl->ulp_ops))
+		return true;
+	return false;
+}
+
+int bnxt_get_ulp_msix_num(struct bnxt *bp);
+int bnxt_get_ulp_msix_num_in_use(struct bnxt *bp);
+void bnxt_set_ulp_msix_num(struct bnxt *bp, int num);
+int bnxt_get_ulp_stat_ctxs(struct bnxt *bp);
+void bnxt_set_ulp_stat_ctxs(struct bnxt *bp, int num_ctxs);
+int bnxt_get_ulp_stat_ctxs_in_use(struct bnxt *bp);
+void bnxt_set_dflt_ulp_stat_ctxs(struct bnxt *bp);
+void bnxt_ulp_stop(struct bnxt *bp);
+void bnxt_ulp_start(struct bnxt *bp, int err);
+void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs);
+void bnxt_ulp_irq_stop(struct bnxt *bp);
+void bnxt_ulp_irq_restart(struct bnxt *bp, int err);
+void bnxt_ulp_async_events(struct bnxt *bp, struct hwrm_async_event_cmpl *cmpl);
+void bnxt_rdma_aux_device_uninit(struct bnxt *bp);
+void bnxt_rdma_aux_device_del(struct bnxt *bp);
+void bnxt_rdma_aux_device_add(struct bnxt *bp);
+void bnxt_rdma_aux_device_init(struct bnxt *bp);
+int bnxt_register_dev(struct bnxt_en_dev *edev, struct bnxt_ulp_ops *ulp_ops,
+		      void *handle);
+void bnxt_unregister_dev(struct bnxt_en_dev *edev);
+int bnxt_send_msg(struct bnxt_en_dev *edev, struct bnxt_fw_msg *fw_msg);
+void bnxt_register_async_events(struct bnxt_en_dev *edev,
+				unsigned long *events_bmap, u16 max_id);
+#endif
-- 
cgit v1.2.3


From 2c7c85c8c7881d57c5fa1114f4b0dbd7fc53a36f Mon Sep 17 00:00:00 2001
From: Pavan Chebbi <pavan.chebbi@broadcom.com>
Date: Sat, 14 Mar 2026 08:16:02 -0700
Subject: fwctl/bnxt_en: Refactor aux bus functions to be more generic

Up until now there was only one auxiliary device that bnxt
created and that was for RoCE driver. bnxt fwctl is also
going to use an aux bus device that bnxt should create.
This requires some nomenclature changes and refactoring of
the existing bnxt aux dev functions.

Convert 'aux_priv' and 'edev' members of struct bnxt into
arrays where each element contains supported auxbus device's
data. Move struct bnxt_aux_priv from bnxt.h to ulp.h because
that is where it belongs. Make aux bus init/uninit/add/del
functions more generic which will loop through all the aux
device types. Make bnxt_ulp_start/stop functions (the only
other common functions applicable to any aux device) loop
through the aux devices to update their config and states.
Make callers of bnxt_ulp_start() call it only when there
are no errors.

Also, as an improvement in code, bnxt_register_dev() can skip
unnecessary dereferencing of edev from bp, instead use the
edev pointer from the function parameter.

Future patches will reuse these functions to add an aux bus
device for fwctl.

Link: https://patch.msgid.link/r/20260314151605.932749-3-pavan.chebbi@broadcom.com
Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         |  47 +--
 drivers/net/ethernet/broadcom/bnxt/bnxt.h         |  19 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c |   8 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c |   2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c     | 339 ++++++++++++++--------
 include/linux/bnxt/ulp.h                          |  25 +-
 6 files changed, 273 insertions(+), 167 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index ed56dba327b4..67e6a082123a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6896,7 +6896,8 @@ vnic_mru:
 #endif
 	if ((bp->flags & BNXT_FLAG_STRIP_VLAN) || def_vlan)
 		req->flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_VLAN_STRIP_MODE);
-	if (vnic->vnic_id == BNXT_VNIC_DEFAULT && bnxt_ulp_registered(bp->edev))
+	if (vnic->vnic_id == BNXT_VNIC_DEFAULT &&
+	    bnxt_ulp_registered(bp->edev[BNXT_AUXDEV_RDMA]))
 		req->flags |= cpu_to_le32(bnxt_get_roce_vnic_mode(bp));
 
 	return hwrm_req_send(bp, req);
@@ -8031,6 +8032,7 @@ static int bnxt_get_avail_msix(struct bnxt *bp, int num);
 
 static int __bnxt_reserve_rings(struct bnxt *bp)
 {
+	struct bnxt_en_dev *edev = bp->edev[BNXT_AUXDEV_RDMA];
 	struct bnxt_hw_rings hwr = {0};
 	int rx_rings, old_rx_rings, rc;
 	int cp = bp->cp_nr_rings;
@@ -8041,7 +8043,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
 	if (!bnxt_need_reserve_rings(bp))
 		return 0;
 
-	if (BNXT_NEW_RM(bp) && !bnxt_ulp_registered(bp->edev)) {
+	if (BNXT_NEW_RM(bp) && !bnxt_ulp_registered(edev)) {
 		ulp_msix = bnxt_get_avail_msix(bp, bp->ulp_num_msix_want);
 		if (!ulp_msix)
 			bnxt_set_ulp_stat_ctxs(bp, 0);
@@ -8092,8 +8094,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
 	}
 	rx_rings = min_t(int, rx_rings, hwr.grp);
 	hwr.cp = min_t(int, hwr.cp, bp->cp_nr_rings);
-	if (bnxt_ulp_registered(bp->edev) &&
-	    hwr.stat > bnxt_get_ulp_stat_ctxs(bp))
+	if (bnxt_ulp_registered(edev) && hwr.stat > bnxt_get_ulp_stat_ctxs(bp))
 		hwr.stat -= bnxt_get_ulp_stat_ctxs(bp);
 	hwr.cp = min_t(int, hwr.cp, hwr.stat);
 	rc = bnxt_trim_rings(bp, &rx_rings, &hwr.tx, hwr.cp, sh);
@@ -8137,7 +8138,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
 	    !netif_is_rxfh_configured(bp->dev))
 		bnxt_set_dflt_rss_indir_tbl(bp, NULL);
 
-	if (!bnxt_ulp_registered(bp->edev) && BNXT_NEW_RM(bp)) {
+	if (!bnxt_ulp_registered(edev) && BNXT_NEW_RM(bp)) {
 		int resv_msix, resv_ctx, ulp_ctxs;
 		struct bnxt_hw_resc *hw_resc;
 
@@ -11494,6 +11495,7 @@ static void bnxt_clear_int_mode(struct bnxt *bp)
 
 int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
 {
+	struct bnxt_en_dev *edev = bp->edev[BNXT_AUXDEV_RDMA];
 	bool irq_cleared = false;
 	bool irq_change = false;
 	int tcs = bp->num_tc;
@@ -11503,7 +11505,7 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
 	if (!bnxt_need_reserve_rings(bp))
 		return 0;
 
-	if (BNXT_NEW_RM(bp) && !bnxt_ulp_registered(bp->edev)) {
+	if (BNXT_NEW_RM(bp) && !bnxt_ulp_registered(edev)) {
 		int ulp_msix = bnxt_get_avail_msix(bp, bp->ulp_num_msix_want);
 
 		if (ulp_msix > bp->ulp_num_msix_want)
@@ -14593,7 +14595,7 @@ static void bnxt_fw_echo_reply(struct bnxt *bp)
 static void bnxt_ulp_restart(struct bnxt *bp)
 {
 	bnxt_ulp_stop(bp);
-	bnxt_ulp_start(bp, 0);
+	bnxt_ulp_start(bp);
 }
 
 static void bnxt_sp_task(struct work_struct *work)
@@ -14750,7 +14752,7 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
 		hwr.cp_p5 = hwr.tx + rx;
 	rc = bnxt_hwrm_check_rings(bp, &hwr);
 	if (!rc && pci_msix_can_alloc_dyn(bp->pdev)) {
-		if (!bnxt_ulp_registered(bp->edev)) {
+		if (!bnxt_ulp_registered(bp->edev[BNXT_AUXDEV_RDMA])) {
 			hwr.cp += bnxt_get_ulp_msix_num(bp);
 			hwr.cp = min_t(int, hwr.cp, bnxt_get_max_func_irqs(bp));
 		}
@@ -15270,7 +15272,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
 			bnxt_dl_health_fw_status_update(bp, true);
 		}
 		netdev_unlock(bp->dev);
-		bnxt_ulp_start(bp, 0);
+		bnxt_ulp_start(bp);
 		bnxt_reenable_sriov(bp);
 		netdev_lock(bp->dev);
 		bnxt_vf_reps_alloc(bp);
@@ -15292,7 +15294,8 @@ fw_reset_abort:
 	bnxt_fw_reset_abort(bp, rc);
 	netdev_unlock(bp->dev);
 ulp_start:
-	bnxt_ulp_start(bp, rc);
+	if (!rc)
+		bnxt_ulp_start(bp);
 }
 
 static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
@@ -16327,12 +16330,13 @@ static void bnxt_remove_one(struct pci_dev *pdev)
 	if (BNXT_PF(bp))
 		__bnxt_sriov_disable(bp);
 
-	bnxt_rdma_aux_device_del(bp);
+	bnxt_aux_devices_del(bp);
 
 	unregister_netdev(dev);
 	bnxt_ptp_clear(bp);
 
-	bnxt_rdma_aux_device_uninit(bp);
+	bnxt_aux_devices_uninit(bp);
+	bnxt_auxdev_id_free(bp, bp->auxdev_id);
 
 	bnxt_free_l2_filters(bp, true);
 	bnxt_free_ntp_fltrs(bp, true);
@@ -16918,7 +16922,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	bnxt_set_tpa_flags(bp);
 	bnxt_init_ring_params(bp);
 	bnxt_set_ring_params(bp);
-	bnxt_rdma_aux_device_init(bp);
+	mutex_init(&bp->auxdev_lock);
+	if (!bnxt_auxdev_id_alloc(bp))
+		bnxt_aux_devices_init(bp);
 	rc = bnxt_set_dflt_rings(bp, true);
 	if (rc) {
 		if (BNXT_VF(bp) && rc == -ENODEV) {
@@ -16983,7 +16989,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	bnxt_dl_fw_reporters_create(bp);
 
-	bnxt_rdma_aux_device_add(bp);
+	bnxt_aux_devices_add(bp);
 
 	bnxt_print_device_info(bp);
 
@@ -16991,7 +16997,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	return 0;
 init_err_cleanup:
-	bnxt_rdma_aux_device_uninit(bp);
+	bnxt_aux_devices_uninit(bp);
+	bnxt_auxdev_id_free(bp, bp->auxdev_id);
 	bnxt_dl_unregister(bp);
 init_err_dl:
 	bnxt_shutdown_tc(bp);
@@ -17125,9 +17132,10 @@ static int bnxt_resume(struct device *device)
 
 resume_exit:
 	netdev_unlock(bp->dev);
-	bnxt_ulp_start(bp, rc);
-	if (!rc)
+	if (!rc) {
+		bnxt_ulp_start(bp);
 		bnxt_reenable_sriov(bp);
+	}
 	return rc;
 }
 
@@ -17307,9 +17315,10 @@ static void bnxt_io_resume(struct pci_dev *pdev)
 		netif_device_attach(netdev);
 
 	netdev_unlock(netdev);
-	bnxt_ulp_start(bp, err);
-	if (!err)
+	if (!err) {
+		bnxt_ulp_start(bp);
 		bnxt_reenable_sriov(bp);
+	}
 }
 
 static const struct pci_error_handlers bnxt_err_handler = {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index a97d651130df..e40490a35626 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -24,12 +24,12 @@
 #include <linux/interrupt.h>
 #include <linux/rhashtable.h>
 #include <linux/crash_dump.h>
-#include <linux/auxiliary_bus.h>
 #include <net/devlink.h>
 #include <net/dst_metadata.h>
 #include <net/xdp.h>
 #include <linux/dim.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/bnxt/ulp.h>
 #ifdef CONFIG_TEE_BNXT_FW
 #include <linux/firmware/broadcom/tee_bnxt_fw.h>
 #endif
@@ -2085,12 +2085,6 @@ struct bnxt_fw_health {
 #define BNXT_FW_IF_RETRY		10
 #define BNXT_FW_SLOT_RESET_RETRY	4
 
-struct bnxt_aux_priv {
-	struct auxiliary_device aux_dev;
-	struct bnxt_en_dev *edev;
-	int id;
-};
-
 enum board_idx {
 	BCM57301,
 	BCM57302,
@@ -2350,8 +2344,8 @@ struct bnxt {
 #define BNXT_CHIP_P5_AND_MINUS(bp)		\
 	(BNXT_CHIP_P3(bp) || BNXT_CHIP_P4(bp) || BNXT_CHIP_P5(bp))
 
-	struct bnxt_aux_priv	*aux_priv;
-	struct bnxt_en_dev	*edev;
+	struct bnxt_aux_priv	*aux_priv[__BNXT_AUXDEV_MAX];
+	struct bnxt_en_dev	*edev[__BNXT_AUXDEV_MAX];
 
 	struct bnxt_napi	**bnapi;
 
@@ -2763,6 +2757,13 @@ struct bnxt {
 	struct bnxt_ctx_pg_info	*fw_crash_mem;
 	u32			fw_crash_len;
 	struct bnxt_bs_trace_info bs_trace[BNXT_TRACE_MAX];
+	int			auxdev_id;
+	/* synchronize validity checks of available aux devices */
+	struct mutex		auxdev_lock;
+	u8			auxdev_state[__BNXT_AUXDEV_MAX];
+#define	BNXT_ADEV_STATE_NONE	0
+#define	BNXT_ADEV_STATE_INIT	1
+#define	BNXT_ADEV_STATE_ADD	2
 };
 
 #define BNXT_NUM_RX_RING_STATS			8
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 230cd95d30a2..835f2b413931 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -440,13 +440,13 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change,
 					   "reload is unsupported while VFs are allocated or being configured");
 			netdev_unlock(bp->dev);
 			rtnl_unlock();
-			bnxt_ulp_start(bp, 0);
+			bnxt_ulp_start(bp);
 			return -EOPNOTSUPP;
 		}
 		if (bp->dev->reg_state == NETREG_UNREGISTERED) {
 			netdev_unlock(bp->dev);
 			rtnl_unlock();
-			bnxt_ulp_start(bp, 0);
+			bnxt_ulp_start(bp);
 			return -ENODEV;
 		}
 		if (netif_running(bp->dev))
@@ -578,8 +578,8 @@ static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action acti
 	}
 	netdev_unlock(bp->dev);
 	rtnl_unlock();
-	if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT)
-		bnxt_ulp_start(bp, rc);
+	if (!rc && action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT)
+		bnxt_ulp_start(bp);
 	return rc;
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index c78a5b161cb5..20ab8152dab6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -5212,7 +5212,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
 
 	memset(buf, 0, sizeof(u64) * bp->num_tests);
 	if (etest->flags & ETH_TEST_FL_OFFLINE &&
-	    bnxt_ulp_registered(bp->edev)) {
+	    bnxt_ulp_registered(bp->edev[BNXT_AUXDEV_RDMA])) {
 		etest->flags |= ETH_TEST_FL_FAILED;
 		netdev_warn(dev, "Offline tests cannot be run with RoCE driver loaded\n");
 		return;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 11ced44ead29..3705ef3a75c9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -29,9 +29,32 @@
 
 static DEFINE_IDA(bnxt_aux_dev_ids);
 
+struct bnxt_aux_device {
+	const char *name;
+};
+
+static void bnxt_auxdev_set_state(struct bnxt *bp, int idx, int state)
+{
+	bp->auxdev_state[idx] = state;
+}
+
+static bool bnxt_auxdev_is_init(struct bnxt *bp, int idx)
+{
+	return (bp->auxdev_state[idx] == BNXT_ADEV_STATE_INIT);
+}
+
+static bool bnxt_auxdev_is_active(struct bnxt *bp, int idx)
+{
+	return (bp->auxdev_state[idx] == BNXT_ADEV_STATE_ADD);
+}
+
+static struct bnxt_aux_device bnxt_aux_devices[__BNXT_AUXDEV_MAX] = {{
+	.name		= "rdma",
+}};
+
 static void bnxt_fill_msix_vecs(struct bnxt *bp, struct bnxt_msix_entry *ent)
 {
-	struct bnxt_en_dev *edev = bp->edev;
+	struct bnxt_en_dev *edev = bp->edev[BNXT_AUXDEV_RDMA];
 	int num_msix, i;
 
 	if (!edev->ulp_tbl->msix_requested) {
@@ -51,61 +74,75 @@ static void bnxt_fill_msix_vecs(struct bnxt *bp, struct bnxt_msix_entry *ent)
 
 int bnxt_get_ulp_msix_num(struct bnxt *bp)
 {
-	if (bp->edev)
-		return bp->edev->ulp_num_msix_vec;
+	struct bnxt_en_dev *edev = bp->edev[BNXT_AUXDEV_RDMA];
+
+	if (edev)
+		return edev->ulp_num_msix_vec;
 	return 0;
 }
 
 void bnxt_set_ulp_msix_num(struct bnxt *bp, int num)
 {
-	if (bp->edev)
-		bp->edev->ulp_num_msix_vec = num;
+	struct bnxt_en_dev *edev = bp->edev[BNXT_AUXDEV_RDMA];
+
+	if (edev)
+		edev->ulp_num_msix_vec = num;
 }
 
 int bnxt_get_ulp_msix_num_in_use(struct bnxt *bp)
 {
-	if (bnxt_ulp_registered(bp->edev))
-		return bp->edev->ulp_num_msix_vec;
+	struct bnxt_en_dev *edev = bp->edev[BNXT_AUXDEV_RDMA];
+
+	if (bnxt_ulp_registered(edev))
+		return edev->ulp_num_msix_vec;
 	return 0;
 }
 
 int bnxt_get_ulp_stat_ctxs(struct bnxt *bp)
 {
-	if (bp->edev)
-		return bp->edev->ulp_num_ctxs;
+	struct bnxt_en_dev *edev = bp->edev[BNXT_AUXDEV_RDMA];
+
+	if (edev)
+		return edev->ulp_num_ctxs;
 	return 0;
 }
 
 void bnxt_set_ulp_stat_ctxs(struct bnxt *bp, int num_ulp_ctx)
 {
-	if (bp->edev)
-		bp->edev->ulp_num_ctxs = num_ulp_ctx;
+	struct bnxt_en_dev *edev = bp->edev[BNXT_AUXDEV_RDMA];
+
+	if (edev)
+		edev->ulp_num_ctxs = num_ulp_ctx;
 }
 
 int bnxt_get_ulp_stat_ctxs_in_use(struct bnxt *bp)
 {
-	if (bnxt_ulp_registered(bp->edev))
-		return bp->edev->ulp_num_ctxs;
+	struct bnxt_en_dev *edev = bp->edev[BNXT_AUXDEV_RDMA];
+
+	if (bnxt_ulp_registered(edev))
+		return edev->ulp_num_ctxs;
 	return 0;
 }
 
 void bnxt_set_dflt_ulp_stat_ctxs(struct bnxt *bp)
 {
-	if (bp->edev) {
-		bp->edev->ulp_num_ctxs = BNXT_MIN_ROCE_STAT_CTXS;
+	struct bnxt_en_dev *edev = bp->edev[BNXT_AUXDEV_RDMA];
+
+	if (edev) {
+		edev->ulp_num_ctxs = BNXT_MIN_ROCE_STAT_CTXS;
 		/* Reserve one additional stat_ctx for PF0 (except
 		 * on 1-port NICs) as it also creates one stat_ctx
 		 * for PF1 in case of RoCE bonding.
 		 */
 		if (BNXT_PF(bp) && !bp->pf.port_id &&
 		    bp->port_count > 1)
-			bp->edev->ulp_num_ctxs++;
+			edev->ulp_num_ctxs++;
 
 		/* Reserve one additional stat_ctx when the device is capable
 		 * of supporting port mirroring on RDMA device.
 		 */
 		if (BNXT_MIRROR_ON_ROCE_CAP(bp))
-			bp->edev->ulp_num_ctxs++;
+			edev->ulp_num_ctxs++;
 	}
 }
 
@@ -141,7 +178,7 @@ int bnxt_register_dev(struct bnxt_en_dev *edev,
 
 	edev->ulp_tbl->msix_requested = bnxt_get_ulp_msix_num(bp);
 
-	bnxt_fill_msix_vecs(bp, bp->edev->msix_entries);
+	bnxt_fill_msix_vecs(bp, edev->msix_entries);
 exit:
 	mutex_unlock(&edev->en_dev_lock);
 	netdev_unlock(dev);
@@ -227,20 +264,27 @@ EXPORT_SYMBOL(bnxt_send_msg);
 
 void bnxt_ulp_stop(struct bnxt *bp)
 {
-	struct bnxt_aux_priv *aux_priv = bp->aux_priv;
-	struct bnxt_en_dev *edev = bp->edev;
+	int i;
 
-	if (!edev)
-		return;
-
-	mutex_lock(&edev->en_dev_lock);
-	if (!bnxt_ulp_registered(edev) ||
-	    (edev->flags & BNXT_EN_FLAG_ULP_STOPPED))
-		goto ulp_stop_exit;
-
-	edev->flags |= BNXT_EN_FLAG_ULP_STOPPED;
-	if (aux_priv) {
+	mutex_lock(&bp->auxdev_lock);
+	for (i = 0; i < __BNXT_AUXDEV_MAX; i++) {
+		struct bnxt_aux_priv *aux_priv;
 		struct auxiliary_device *adev;
+		struct bnxt_en_dev *edev;
+
+		if (!bnxt_auxdev_is_active(bp, i))
+			continue;
+
+		aux_priv = bp->aux_priv[i];
+		edev = bp->edev[i];
+		mutex_lock(&edev->en_dev_lock);
+		if (!bnxt_ulp_registered(edev) ||
+		    (edev->flags & BNXT_EN_FLAG_ULP_STOPPED)) {
+			mutex_unlock(&edev->en_dev_lock);
+			continue;
+		}
+
+		edev->flags |= BNXT_EN_FLAG_ULP_STOPPED;
 
 		adev = &aux_priv->aux_dev;
 		if (adev->dev.driver) {
@@ -251,29 +295,35 @@ void bnxt_ulp_stop(struct bnxt *bp)
 			edev->en_state = bp->state;
 			adrv->suspend(adev, pm);
 		}
+		mutex_unlock(&edev->en_dev_lock);
 	}
-ulp_stop_exit:
-	mutex_unlock(&edev->en_dev_lock);
+	mutex_unlock(&bp->auxdev_lock);
 }
 
-void bnxt_ulp_start(struct bnxt *bp, int err)
+void bnxt_ulp_start(struct bnxt *bp)
 {
-	struct bnxt_aux_priv *aux_priv = bp->aux_priv;
-	struct bnxt_en_dev *edev = bp->edev;
+	int i;
 
-	if (!edev || err)
-		return;
+	mutex_lock(&bp->auxdev_lock);
+	for (i = 0; i < __BNXT_AUXDEV_MAX; i++) {
+		struct bnxt_aux_priv *aux_priv;
+		struct auxiliary_device *adev;
+		struct bnxt_en_dev *edev;
 
-	mutex_lock(&edev->en_dev_lock);
-	if (!bnxt_ulp_registered(edev) ||
-	    !(edev->flags & BNXT_EN_FLAG_ULP_STOPPED))
-		goto ulp_start_exit;
+		if (!bnxt_auxdev_is_active(bp, i))
+			continue;
 
-	if (edev->ulp_tbl->msix_requested)
-		bnxt_fill_msix_vecs(bp, edev->msix_entries);
+		aux_priv = bp->aux_priv[i];
+		edev = bp->edev[i];
+		mutex_lock(&edev->en_dev_lock);
+		if (!bnxt_ulp_registered(edev) ||
+		    !(edev->flags & BNXT_EN_FLAG_ULP_STOPPED)) {
+			goto clear_flag_continue;
+		}
+
+		if (edev->ulp_tbl->msix_requested)
+			bnxt_fill_msix_vecs(bp, edev->msix_entries);
 
-	if (aux_priv) {
-		struct auxiliary_device *adev;
 
 		adev = &aux_priv->aux_dev;
 		if (adev->dev.driver) {
@@ -283,22 +333,23 @@ void bnxt_ulp_start(struct bnxt *bp, int err)
 			edev->en_state = bp->state;
 			adrv->resume(adev);
 		}
+clear_flag_continue:
+		edev->flags &= ~BNXT_EN_FLAG_ULP_STOPPED;
+		mutex_unlock(&edev->en_dev_lock);
 	}
-ulp_start_exit:
-	edev->flags &= ~BNXT_EN_FLAG_ULP_STOPPED;
-	mutex_unlock(&edev->en_dev_lock);
+	mutex_unlock(&bp->auxdev_lock);
 }
 
 void bnxt_ulp_irq_stop(struct bnxt *bp)
 {
-	struct bnxt_en_dev *edev = bp->edev;
+	struct bnxt_en_dev *edev = bp->edev[BNXT_AUXDEV_RDMA];
 	struct bnxt_ulp_ops *ops;
 	bool reset = false;
 
 	if (!edev)
 		return;
 
-	if (bnxt_ulp_registered(bp->edev)) {
+	if (bnxt_ulp_registered(edev)) {
 		struct bnxt_ulp *ulp = edev->ulp_tbl;
 
 		if (!ulp->msix_requested)
@@ -315,13 +366,13 @@ void bnxt_ulp_irq_stop(struct bnxt *bp)
 
 void bnxt_ulp_irq_restart(struct bnxt *bp, int err)
 {
-	struct bnxt_en_dev *edev = bp->edev;
+	struct bnxt_en_dev *edev = bp->edev[BNXT_AUXDEV_RDMA];
 	struct bnxt_ulp_ops *ops;
 
 	if (!edev)
 		return;
 
-	if (bnxt_ulp_registered(bp->edev)) {
+	if (bnxt_ulp_registered(edev)) {
 		struct bnxt_ulp *ulp = edev->ulp_tbl;
 		struct bnxt_msix_entry *ent = NULL;
 
@@ -346,7 +397,7 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err)
 void bnxt_ulp_async_events(struct bnxt *bp, struct hwrm_async_event_cmpl *cmpl)
 {
 	u16 event_id = le16_to_cpu(cmpl->event_id);
-	struct bnxt_en_dev *edev = bp->edev;
+	struct bnxt_en_dev *edev = bp->edev[BNXT_AUXDEV_RDMA];
 	struct bnxt_ulp_ops *ops;
 	struct bnxt_ulp *ulp;
 
@@ -387,18 +438,21 @@ void bnxt_register_async_events(struct bnxt_en_dev *edev,
 }
 EXPORT_SYMBOL(bnxt_register_async_events);
 
-void bnxt_rdma_aux_device_uninit(struct bnxt *bp)
+void bnxt_aux_devices_uninit(struct bnxt *bp)
 {
 	struct bnxt_aux_priv *aux_priv;
 	struct auxiliary_device *adev;
-
-	/* Skip if no auxiliary device init was done. */
-	if (!bp->aux_priv)
-		return;
-
-	aux_priv = bp->aux_priv;
-	adev = &aux_priv->aux_dev;
-	auxiliary_device_uninit(adev);
+	int idx;
+
+	mutex_lock(&bp->auxdev_lock);
+	for (idx = 0; idx < __BNXT_AUXDEV_MAX; idx++) {
+		if (bnxt_auxdev_is_init(bp, idx)) {
+			aux_priv = bp->aux_priv[idx];
+			adev = &aux_priv->aux_dev;
+			auxiliary_device_uninit(adev);
+		}
+	}
+	mutex_unlock(&bp->auxdev_lock);
 }
 
 static void bnxt_aux_dev_release(struct device *dev)
@@ -407,20 +461,25 @@ static void bnxt_aux_dev_release(struct device *dev)
 		container_of(dev, struct bnxt_aux_priv, aux_dev.dev);
 	struct bnxt *bp = netdev_priv(aux_priv->edev->net);
 
-	ida_free(&bnxt_aux_dev_ids, aux_priv->id);
 	kfree(aux_priv->edev->ulp_tbl);
-	bp->edev = NULL;
+	bp->edev[aux_priv->id] = NULL;
 	kfree(aux_priv->edev);
+	bp->aux_priv[aux_priv->id] = NULL;
 	kfree(aux_priv);
-	bp->aux_priv = NULL;
 }
 
-void bnxt_rdma_aux_device_del(struct bnxt *bp)
+void bnxt_aux_devices_del(struct bnxt *bp)
 {
-	if (!bp->edev)
-		return;
+	int idx;
 
-	auxiliary_device_delete(&bp->aux_priv->aux_dev);
+	mutex_lock(&bp->auxdev_lock);
+	for (idx = 0; idx < __BNXT_AUXDEV_MAX; idx++) {
+		if (bnxt_auxdev_is_active(bp, idx)) {
+			auxiliary_device_delete(&bp->aux_priv[idx]->aux_dev);
+			bnxt_auxdev_set_state(bp, idx, BNXT_ADEV_STATE_INIT);
+		}
+	}
+	mutex_unlock(&bp->auxdev_lock);
 }
 
 static void bnxt_set_edev_info(struct bnxt_en_dev *edev, struct bnxt *bp)
@@ -450,83 +509,105 @@ static void bnxt_set_edev_info(struct bnxt_en_dev *edev, struct bnxt *bp)
 	edev->bar0 = bp->bar0;
 }
 
-void bnxt_rdma_aux_device_add(struct bnxt *bp)
+void bnxt_aux_devices_add(struct bnxt *bp)
 {
 	struct auxiliary_device *aux_dev;
-	int rc;
-
-	if (!bp->edev)
-		return;
-
-	aux_dev = &bp->aux_priv->aux_dev;
-	rc = auxiliary_device_add(aux_dev);
-	if (rc) {
-		netdev_warn(bp->dev, "Failed to add auxiliary device for ROCE\n");
-		auxiliary_device_uninit(aux_dev);
-		bp->flags &= ~BNXT_FLAG_ROCE_CAP;
+	int rc, idx;
+
+	mutex_lock(&bp->auxdev_lock);
+	for (idx = 0; idx < __BNXT_AUXDEV_MAX; idx++) {
+		if (bnxt_auxdev_is_init(bp, idx)) {
+			aux_dev = &bp->aux_priv[idx]->aux_dev;
+			rc = auxiliary_device_add(aux_dev);
+			if (rc) {
+				netdev_warn(bp->dev, "Failed to add auxiliary device for ROCE\n");
+				auxiliary_device_uninit(aux_dev);
+				if (idx == BNXT_AUXDEV_RDMA)
+					bp->flags &= ~BNXT_FLAG_ROCE_CAP;
+				continue;
+			}
+			bnxt_auxdev_set_state(bp, idx, BNXT_ADEV_STATE_ADD);
+		}
 	}
+	mutex_unlock(&bp->auxdev_lock);
 }
 
-void bnxt_rdma_aux_device_init(struct bnxt *bp)
+void bnxt_aux_devices_init(struct bnxt *bp)
 {
 	struct auxiliary_device *aux_dev;
 	struct bnxt_aux_priv *aux_priv;
 	struct bnxt_en_dev *edev;
 	struct bnxt_ulp *ulp;
-	int rc;
+	int rc, idx;
+
+	mutex_lock(&bp->auxdev_lock);
+	for (idx = 0; idx < __BNXT_AUXDEV_MAX; idx++) {
+		bnxt_auxdev_set_state(bp, idx, BNXT_ADEV_STATE_NONE);
+
+		if (idx == BNXT_AUXDEV_RDMA &&
+		    !(bp->flags & BNXT_FLAG_ROCE_CAP))
+			continue;
+
+		aux_priv = kzalloc_obj(*aux_priv);
+		if (!aux_priv)
+			goto next_auxdev;
+
+		aux_dev = &aux_priv->aux_dev;
+		aux_dev->id = bp->auxdev_id;
+		aux_dev->name = bnxt_aux_devices[idx].name;
+		aux_dev->dev.parent = &bp->pdev->dev;
+		aux_dev->dev.release = bnxt_aux_dev_release;
+
+		rc = auxiliary_device_init(aux_dev);
+		if (rc) {
+			kfree(aux_priv);
+			goto next_auxdev;
+		}
+		bp->aux_priv[idx] = aux_priv;
 
-	if (!(bp->flags & BNXT_FLAG_ROCE_CAP))
-		return;
+		/* From this point, all cleanup will happen via the .release
+		 * callback & any error unwinding will need to include a call
+		 * to auxiliary_device_uninit.
+		 */
+		edev = kzalloc_obj(*edev);
+		if (!edev)
+			goto aux_dev_uninit;
 
-	aux_priv = kzalloc_obj(*bp->aux_priv);
-	if (!aux_priv)
-		goto exit;
+		aux_priv->edev = edev;
+		bnxt_set_edev_info(edev, bp);
 
-	aux_priv->id = ida_alloc(&bnxt_aux_dev_ids, GFP_KERNEL);
-	if (aux_priv->id < 0) {
-		netdev_warn(bp->dev,
-			    "ida alloc failed for ROCE auxiliary device\n");
-		kfree(aux_priv);
-		goto exit;
-	}
+		ulp = kzalloc_obj(*ulp);
+		if (!ulp)
+			goto aux_dev_uninit;
 
-	aux_dev = &aux_priv->aux_dev;
-	aux_dev->id = aux_priv->id;
-	aux_dev->name = "rdma";
-	aux_dev->dev.parent = &bp->pdev->dev;
-	aux_dev->dev.release = bnxt_aux_dev_release;
+		edev->ulp_tbl = ulp;
+		bp->edev[idx] = edev;
+		if (idx == BNXT_AUXDEV_RDMA)
+			bp->ulp_num_msix_want = bnxt_set_dflt_ulp_msix(bp);
+		aux_priv->id = idx;
+		bnxt_auxdev_set_state(bp, idx, BNXT_ADEV_STATE_INIT);
 
-	rc = auxiliary_device_init(aux_dev);
-	if (rc) {
-		ida_free(&bnxt_aux_dev_ids, aux_priv->id);
-		kfree(aux_priv);
-		goto exit;
+		continue;
+aux_dev_uninit:
+		auxiliary_device_uninit(aux_dev);
+next_auxdev:
+		if (idx == BNXT_AUXDEV_RDMA)
+			bp->flags &= ~BNXT_FLAG_ROCE_CAP;
 	}
-	bp->aux_priv = aux_priv;
-
-	/* From this point, all cleanup will happen via the .release callback &
-	 * any error unwinding will need to include a call to
-	 * auxiliary_device_uninit.
-	 */
-	edev = kzalloc_obj(*edev);
-	if (!edev)
-		goto aux_dev_uninit;
-
-	aux_priv->edev = edev;
-
-	ulp = kzalloc_obj(*ulp);
-	if (!ulp)
-		goto aux_dev_uninit;
+	mutex_unlock(&bp->auxdev_lock);
+}
 
-	edev->ulp_tbl = ulp;
-	bp->edev = edev;
-	bnxt_set_edev_info(edev, bp);
-	bp->ulp_num_msix_want = bnxt_set_dflt_ulp_msix(bp);
+int bnxt_auxdev_id_alloc(struct bnxt *bp)
+{
+	bp->auxdev_id = ida_alloc(&bnxt_aux_dev_ids, GFP_KERNEL);
+	if (bp->auxdev_id < 0)
+		return bp->auxdev_id;
 
-	return;
+	return 0;
+}
 
-aux_dev_uninit:
-	auxiliary_device_uninit(aux_dev);
-exit:
-	bp->flags &= ~BNXT_FLAG_ROCE_CAP;
+void bnxt_auxdev_id_free(struct bnxt *bp, int id)
+{
+	if (bp->auxdev_id >= 0)
+		ida_free(&bnxt_aux_dev_ids, id);
 }
diff --git a/include/linux/bnxt/ulp.h b/include/linux/bnxt/ulp.h
index 3c5b8a53f715..1a4643c46f86 100644
--- a/include/linux/bnxt/ulp.h
+++ b/include/linux/bnxt/ulp.h
@@ -10,6 +10,8 @@
 #ifndef BNXT_ULP_H
 #define BNXT_ULP_H
 
+#include <linux/auxiliary_bus.h>
+
 #define BNXT_MIN_ROCE_CP_RINGS	2
 #define BNXT_MIN_ROCE_STAT_CTXS	1
 
@@ -20,6 +22,17 @@
 struct hwrm_async_event_cmpl;
 struct bnxt;
 
+enum bnxt_auxdev_type {
+	BNXT_AUXDEV_RDMA = 0,
+	__BNXT_AUXDEV_MAX
+};
+
+struct bnxt_aux_priv {
+	struct auxiliary_device aux_dev;
+	struct bnxt_en_dev *edev;
+	int id;
+};
+
 struct bnxt_msix_entry {
 	u32	vector;
 	u32	ring_idx;
@@ -110,19 +123,21 @@ void bnxt_set_ulp_stat_ctxs(struct bnxt *bp, int num_ctxs);
 int bnxt_get_ulp_stat_ctxs_in_use(struct bnxt *bp);
 void bnxt_set_dflt_ulp_stat_ctxs(struct bnxt *bp);
 void bnxt_ulp_stop(struct bnxt *bp);
-void bnxt_ulp_start(struct bnxt *bp, int err);
+void bnxt_ulp_start(struct bnxt *bp);
 void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs);
 void bnxt_ulp_irq_stop(struct bnxt *bp);
 void bnxt_ulp_irq_restart(struct bnxt *bp, int err);
 void bnxt_ulp_async_events(struct bnxt *bp, struct hwrm_async_event_cmpl *cmpl);
-void bnxt_rdma_aux_device_uninit(struct bnxt *bp);
-void bnxt_rdma_aux_device_del(struct bnxt *bp);
-void bnxt_rdma_aux_device_add(struct bnxt *bp);
-void bnxt_rdma_aux_device_init(struct bnxt *bp);
+void bnxt_aux_devices_uninit(struct bnxt *bp);
+void bnxt_aux_devices_del(struct bnxt *bp);
+void bnxt_aux_devices_add(struct bnxt *bp);
+void bnxt_aux_devices_init(struct bnxt *bp);
 int bnxt_register_dev(struct bnxt_en_dev *edev, struct bnxt_ulp_ops *ulp_ops,
 		      void *handle);
 void bnxt_unregister_dev(struct bnxt_en_dev *edev);
 int bnxt_send_msg(struct bnxt_en_dev *edev, struct bnxt_fw_msg *fw_msg);
 void bnxt_register_async_events(struct bnxt_en_dev *edev,
 				unsigned long *events_bmap, u16 max_id);
+int bnxt_auxdev_id_alloc(struct bnxt *bp);
+void bnxt_auxdev_id_free(struct bnxt *bp, int id);
 #endif
-- 
cgit v1.2.3


From 5102836da8397deb2a3d8b9e574238781ea47390 Mon Sep 17 00:00:00 2001
From: Pavan Chebbi <pavan.chebbi@broadcom.com>
Date: Sat, 14 Mar 2026 08:16:03 -0700
Subject: fwctl/bnxt_en: Create an aux device for fwctl

Create an additional auxiliary device to support fwctl.
The next patch will create bnxt_fwctl and bind to this
device.

Link: https://patch.msgid.link/r/20260314151605.932749-4-pavan.chebbi@broadcom.com
Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 12 ++++++++++--
 include/linux/bnxt/ulp.h                      |  1 +
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 3705ef3a75c9..052bf69cfa4c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -50,6 +50,8 @@ static bool bnxt_auxdev_is_active(struct bnxt *bp, int idx)
 
 static struct bnxt_aux_device bnxt_aux_devices[__BNXT_AUXDEV_MAX] = {{
 	.name		= "rdma",
+}, {
+	.name		= "fwctl",
 }};
 
 static void bnxt_fill_msix_vecs(struct bnxt *bp, struct bnxt_msix_entry *ent)
@@ -278,6 +280,11 @@ void bnxt_ulp_stop(struct bnxt *bp)
 		aux_priv = bp->aux_priv[i];
 		edev = bp->edev[i];
 		mutex_lock(&edev->en_dev_lock);
+		if (i == BNXT_AUXDEV_FWCTL) {
+			edev->flags |= BNXT_EN_FLAG_ULP_STOPPED;
+			mutex_unlock(&edev->en_dev_lock);
+			continue;
+		}
 		if (!bnxt_ulp_registered(edev) ||
 		    (edev->flags & BNXT_EN_FLAG_ULP_STOPPED)) {
 			mutex_unlock(&edev->en_dev_lock);
@@ -316,7 +323,7 @@ void bnxt_ulp_start(struct bnxt *bp)
 		aux_priv = bp->aux_priv[i];
 		edev = bp->edev[i];
 		mutex_lock(&edev->en_dev_lock);
-		if (!bnxt_ulp_registered(edev) ||
+		if (i == BNXT_AUXDEV_FWCTL || !bnxt_ulp_registered(edev) ||
 		    !(edev->flags & BNXT_EN_FLAG_ULP_STOPPED)) {
 			goto clear_flag_continue;
 		}
@@ -520,7 +527,8 @@ void bnxt_aux_devices_add(struct bnxt *bp)
 			aux_dev = &bp->aux_priv[idx]->aux_dev;
 			rc = auxiliary_device_add(aux_dev);
 			if (rc) {
-				netdev_warn(bp->dev, "Failed to add auxiliary device for ROCE\n");
+				netdev_warn(bp->dev, "Failed to add auxiliary device for auxdev type %d\n",
+					    idx);
 				auxiliary_device_uninit(aux_dev);
 				if (idx == BNXT_AUXDEV_RDMA)
 					bp->flags &= ~BNXT_FLAG_ROCE_CAP;
diff --git a/include/linux/bnxt/ulp.h b/include/linux/bnxt/ulp.h
index 1a4643c46f86..0851ad3394b0 100644
--- a/include/linux/bnxt/ulp.h
+++ b/include/linux/bnxt/ulp.h
@@ -24,6 +24,7 @@ struct bnxt;
 
 enum bnxt_auxdev_type {
 	BNXT_AUXDEV_RDMA = 0,
+	BNXT_AUXDEV_FWCTL,
 	__BNXT_AUXDEV_MAX
 };
 
-- 
cgit v1.2.3


From 9100a28c8bb4270744942cf834efcd80f1acda7d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Sun, 1 Mar 2026 23:59:39 -0800
Subject: nvme-auth: add NVME_AUTH_MAX_DIGEST_SIZE constant

Define a NVME_AUTH_MAX_DIGEST_SIZE constant and use it in the
appropriate places.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/common/auth.c | 6 ++----
 drivers/nvme/host/auth.c   | 6 +++---
 include/linux/nvme.h       | 5 +++++
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c
index e07e7d4bf8b6..78d751481fe3 100644
--- a/drivers/nvme/common/auth.c
+++ b/drivers/nvme/common/auth.c
@@ -15,8 +15,6 @@
 #include <linux/nvme.h>
 #include <linux/nvme-auth.h>
 
-#define HKDF_MAX_HASHLEN 64
-
 static u32 nvme_dhchap_seqnum;
 static DEFINE_MUTEX(nvme_dhchap_mutex);
 
@@ -769,7 +767,7 @@ int nvme_auth_derive_tls_psk(int hmac_id, u8 *psk, size_t psk_len,
 	struct crypto_shash *hmac_tfm;
 	const char *hmac_name;
 	const char *label = "nvme-tls-psk";
-	static const char default_salt[HKDF_MAX_HASHLEN];
+	static const char default_salt[NVME_AUTH_MAX_DIGEST_SIZE];
 	size_t prk_len;
 	const char *ctx;
 	unsigned char *prk, *tls_key;
@@ -798,7 +796,7 @@ int nvme_auth_derive_tls_psk(int hmac_id, u8 *psk, size_t psk_len,
 		goto out_free_shash;
 	}
 
-	if (WARN_ON(prk_len > HKDF_MAX_HASHLEN)) {
+	if (WARN_ON(prk_len > NVME_AUTH_MAX_DIGEST_SIZE)) {
 		ret = -EINVAL;
 		goto out_free_prk;
 	}
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index 405e7c03b1cf..301c858b7c57 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -38,9 +38,9 @@ struct nvme_dhchap_queue_context {
 	u8 hash_id;
 	u8 sc_c;
 	size_t hash_len;
-	u8 c1[64];
-	u8 c2[64];
-	u8 response[64];
+	u8 c1[NVME_AUTH_MAX_DIGEST_SIZE];
+	u8 c2[NVME_AUTH_MAX_DIGEST_SIZE];
+	u8 response[NVME_AUTH_MAX_DIGEST_SIZE];
 	u8 *ctrl_key;
 	u8 *host_key;
 	u8 *sess_key;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 655d194f8e72..edfebbce6745 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1837,6 +1837,11 @@ enum {
 	NVME_AUTH_HASH_INVALID	= 0xff,
 };
 
+/* Maximum digest size for any NVME_AUTH_HASH_* value */
+enum {
+	NVME_AUTH_MAX_DIGEST_SIZE = 64,
+};
+
 /* Defined Diffie-Hellman group identifiers for DH-HMAC-CHAP authentication */
 enum {
 	NVME_AUTH_DHGROUP_NULL		= 0x00,
-- 
cgit v1.2.3


From bf0e2567a639c455110f9be5db8c92032175e222 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Sun, 1 Mar 2026 23:59:41 -0800
Subject: nvme-auth: use proper argument types

For input parameters, use pointer to const.  This makes it easier to
understand which parameters are inputs and which are outputs.

In addition, consistently use char for strings and u8 for binary.  This
makes it easier to understand what is a string and what is binary data.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/common/auth.c  | 47 ++++++++++++++++++++++++---------------------
 drivers/nvme/host/auth.c    |  3 ++-
 drivers/nvme/target/auth.c  |  5 +++--
 drivers/nvme/target/nvmet.h |  2 +-
 include/linux/nvme-auth.h   | 26 ++++++++++++-------------
 5 files changed, 44 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c
index 9e5cee217ff5..d35523d0a017 100644
--- a/drivers/nvme/common/auth.c
+++ b/drivers/nvme/common/auth.c
@@ -159,11 +159,10 @@ u32 nvme_auth_key_struct_size(u32 key_len)
 }
 EXPORT_SYMBOL_GPL(nvme_auth_key_struct_size);
 
-struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret,
-					      u8 key_hash)
+struct nvme_dhchap_key *nvme_auth_extract_key(const char *secret, u8 key_hash)
 {
 	struct nvme_dhchap_key *key;
-	unsigned char *p;
+	const char *p;
 	u32 crc;
 	int ret, key_len;
 	size_t allocated_len = strlen(secret);
@@ -181,14 +180,14 @@ struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret,
 		pr_debug("base64 key decoding error %d\n",
 			 key_len);
 		ret = key_len;
-		goto out_free_secret;
+		goto out_free_key;
 	}
 
 	if (key_len != 36 && key_len != 52 &&
 	    key_len != 68) {
 		pr_err("Invalid key len %d\n", key_len);
 		ret = -EINVAL;
-		goto out_free_secret;
+		goto out_free_key;
 	}
 
 	/* The last four bytes is the CRC in little-endian format */
@@ -203,12 +202,12 @@ struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret,
 		pr_err("key crc mismatch (key %08x, crc %08x)\n",
 		       get_unaligned_le32(key->key + key_len), crc);
 		ret = -EKEYREJECTED;
-		goto out_free_secret;
+		goto out_free_key;
 	}
 	key->len = key_len;
 	key->hash = key_hash;
 	return key;
-out_free_secret:
+out_free_key:
 	nvme_auth_free_key(key);
 	return ERR_PTR(ret);
 }
@@ -236,7 +235,7 @@ void nvme_auth_free_key(struct nvme_dhchap_key *key)
 EXPORT_SYMBOL_GPL(nvme_auth_free_key);
 
 struct nvme_dhchap_key *nvme_auth_transform_key(
-		struct nvme_dhchap_key *key, char *nqn)
+		const struct nvme_dhchap_key *key, const char *nqn)
 {
 	const char *hmac_name;
 	struct crypto_shash *key_tfm;
@@ -302,7 +301,8 @@ out_free_key:
 }
 EXPORT_SYMBOL_GPL(nvme_auth_transform_key);
 
-static int nvme_auth_hash_skey(int hmac_id, u8 *skey, size_t skey_len, u8 *hkey)
+static int nvme_auth_hash_skey(int hmac_id, const u8 *skey, size_t skey_len,
+			       u8 *hkey)
 {
 	const char *digest_name;
 	struct crypto_shash *tfm;
@@ -327,8 +327,8 @@ static int nvme_auth_hash_skey(int hmac_id, u8 *skey, size_t skey_len, u8 *hkey)
 	return ret;
 }
 
-int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len,
-		u8 *challenge, u8 *aug, size_t hlen)
+int nvme_auth_augmented_challenge(u8 hmac_id, const u8 *skey, size_t skey_len,
+				  const u8 *challenge, u8 *aug, size_t hlen)
 {
 	struct crypto_shash *tfm;
 	u8 *hashed_key;
@@ -409,7 +409,7 @@ int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm,
 EXPORT_SYMBOL_GPL(nvme_auth_gen_pubkey);
 
 int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm,
-		u8 *ctrl_key, size_t ctrl_key_len,
+		const u8 *ctrl_key, size_t ctrl_key_len,
 		u8 *sess_key, size_t sess_key_len)
 {
 	struct kpp_request *req;
@@ -436,7 +436,7 @@ int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm,
 }
 EXPORT_SYMBOL_GPL(nvme_auth_gen_shared_secret);
 
-int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key)
+int nvme_auth_generate_key(const char *secret, struct nvme_dhchap_key **ret_key)
 {
 	struct nvme_dhchap_key *key;
 	u8 key_hash;
@@ -484,8 +484,9 @@ EXPORT_SYMBOL_GPL(nvme_auth_generate_key);
  * Returns 0 on success with a valid generated PSK pointer in @ret_psk and
  * the length of @ret_psk in @ret_len, or a negative error number otherwise.
  */
-int nvme_auth_generate_psk(u8 hmac_id, u8 *skey, size_t skey_len,
-		u8 *c1, u8 *c2, size_t hash_len, u8 **ret_psk, size_t *ret_len)
+int nvme_auth_generate_psk(u8 hmac_id, const u8 *skey, size_t skey_len,
+			   const u8 *c1, const u8 *c2, size_t hash_len,
+			   u8 **ret_psk, size_t *ret_len)
 {
 	struct crypto_shash *tfm;
 	SHASH_DESC_ON_STACK(shash, tfm);
@@ -582,12 +583,14 @@ EXPORT_SYMBOL_GPL(nvme_auth_generate_psk);
  * Returns 0 on success with a valid digest pointer in @ret_digest, or a
  * negative error number on failure.
  */
-int nvme_auth_generate_digest(u8 hmac_id, u8 *psk, size_t psk_len,
-		char *subsysnqn, char *hostnqn, u8 **ret_digest)
+int nvme_auth_generate_digest(u8 hmac_id, const u8 *psk, size_t psk_len,
+			      const char *subsysnqn, const char *hostnqn,
+			      char **ret_digest)
 {
 	struct crypto_shash *tfm;
 	SHASH_DESC_ON_STACK(shash, tfm);
-	u8 *digest, *enc;
+	u8 *digest;
+	char *enc;
 	const char *hmac_name;
 	size_t digest_len, hmac_len;
 	int ret;
@@ -761,16 +764,16 @@ static int hkdf_expand_label(struct crypto_shash *hmac_tfm,
  * Returns 0 on success with a valid psk pointer in @ret_psk or a negative
  * error number otherwise.
  */
-int nvme_auth_derive_tls_psk(int hmac_id, u8 *psk, size_t psk_len,
-		u8 *psk_digest, u8 **ret_psk)
+int nvme_auth_derive_tls_psk(int hmac_id, const u8 *psk, size_t psk_len,
+			     const char *psk_digest, u8 **ret_psk)
 {
 	struct crypto_shash *hmac_tfm;
 	const char *hmac_name;
 	const char *label = "nvme-tls-psk";
-	static const char default_salt[NVME_AUTH_MAX_DIGEST_SIZE];
+	static const u8 default_salt[NVME_AUTH_MAX_DIGEST_SIZE];
 	size_t prk_len;
 	const char *ctx;
-	unsigned char *prk, *tls_key;
+	u8 *prk, *tls_key;
 	int ret;
 
 	hmac_name = nvme_auth_hmac_name(hmac_id);
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index 301c858b7c57..d0d0a9d5a871 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -708,7 +708,8 @@ EXPORT_SYMBOL_GPL(nvme_auth_revoke_tls_key);
 static int nvme_auth_secure_concat(struct nvme_ctrl *ctrl,
 				   struct nvme_dhchap_queue_context *chap)
 {
-	u8 *psk, *digest, *tls_psk;
+	u8 *psk, *tls_psk;
+	char *digest;
 	struct key *tls_key;
 	size_t psk_len;
 	int ret = 0;
diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c
index 2eadeb7e06f2..f483e1fd48ac 100644
--- a/drivers/nvme/target/auth.c
+++ b/drivers/nvme/target/auth.c
@@ -531,7 +531,7 @@ int nvmet_auth_ctrl_exponential(struct nvmet_req *req,
 }
 
 int nvmet_auth_ctrl_sesskey(struct nvmet_req *req,
-			    u8 *pkey, int pkey_size)
+			    const u8 *pkey, int pkey_size)
 {
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	int ret;
@@ -557,7 +557,8 @@ int nvmet_auth_ctrl_sesskey(struct nvmet_req *req,
 void nvmet_auth_insert_psk(struct nvmet_sq *sq)
 {
 	int hash_len = nvme_auth_hmac_hash_len(sq->ctrl->shash_id);
-	u8 *psk, *digest, *tls_psk;
+	u8 *psk, *tls_psk;
+	char *digest;
 	size_t psk_len;
 	int ret;
 #ifdef CONFIG_NVME_TARGET_TCP_TLS
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index b664b584fdc8..986d4c7bd734 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -912,7 +912,7 @@ static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq)
 int nvmet_auth_ctrl_exponential(struct nvmet_req *req,
 				u8 *buf, int buf_size);
 int nvmet_auth_ctrl_sesskey(struct nvmet_req *req,
-			    u8 *buf, int buf_size);
+			    const u8 *pkey, int pkey_size);
 void nvmet_auth_insert_psk(struct nvmet_sq *sq);
 #else
 static inline u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl,
diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h
index 60e069a6757f..a4b248c24ccf 100644
--- a/include/linux/nvme-auth.h
+++ b/include/linux/nvme-auth.h
@@ -25,27 +25,27 @@ size_t nvme_auth_hmac_hash_len(u8 hmac_id);
 u8 nvme_auth_hmac_id(const char *hmac_name);
 
 u32 nvme_auth_key_struct_size(u32 key_len);
-struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret,
-					      u8 key_hash);
+struct nvme_dhchap_key *nvme_auth_extract_key(const char *secret, u8 key_hash);
 void nvme_auth_free_key(struct nvme_dhchap_key *key);
 struct nvme_dhchap_key *nvme_auth_alloc_key(u32 len, u8 hash);
 struct nvme_dhchap_key *nvme_auth_transform_key(
-				struct nvme_dhchap_key *key, char *nqn);
-int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key);
-int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len,
-				  u8 *challenge, u8 *aug, size_t hlen);
+		const struct nvme_dhchap_key *key, const char *nqn);
+int nvme_auth_generate_key(const char *secret, struct nvme_dhchap_key **ret_key);
+int nvme_auth_augmented_challenge(u8 hmac_id, const u8 *skey, size_t skey_len,
+				  const u8 *challenge, u8 *aug, size_t hlen);
 int nvme_auth_gen_privkey(struct crypto_kpp *dh_tfm, u8 dh_gid);
 int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm,
 			 u8 *host_key, size_t host_key_len);
 int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm,
-				u8 *ctrl_key, size_t ctrl_key_len,
+				const u8 *ctrl_key, size_t ctrl_key_len,
 				u8 *sess_key, size_t sess_key_len);
-int nvme_auth_generate_psk(u8 hmac_id, u8 *skey, size_t skey_len,
-			   u8 *c1, u8 *c2, size_t hash_len,
+int nvme_auth_generate_psk(u8 hmac_id, const u8 *skey, size_t skey_len,
+			   const u8 *c1, const u8 *c2, size_t hash_len,
 			   u8 **ret_psk, size_t *ret_len);
-int nvme_auth_generate_digest(u8 hmac_id, u8 *psk, size_t psk_len,
-		char *subsysnqn, char *hostnqn, u8 **ret_digest);
-int nvme_auth_derive_tls_psk(int hmac_id, u8 *psk, size_t psk_len,
-		u8 *psk_digest, u8 **ret_psk);
+int nvme_auth_generate_digest(u8 hmac_id, const u8 *psk, size_t psk_len,
+			      const char *subsysnqn, const char *hostnqn,
+			      char **ret_digest);
+int nvme_auth_derive_tls_psk(int hmac_id, const u8 *psk, size_t psk_len,
+			     const char *psk_digest, u8 **ret_psk);
 
 #endif /* _NVME_AUTH_H */
-- 
cgit v1.2.3


From 0beeca72cf21c7c1d9d232148fdeef8e5e242f62 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Sun, 1 Mar 2026 23:59:43 -0800
Subject: nvme-auth: rename nvme_auth_generate_key() to nvme_auth_parse_key()

This function does not generate a key.  It parses the key from the
string that the caller passes in.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/common/auth.c | 4 ++--
 drivers/nvme/host/auth.c   | 7 +++----
 drivers/nvme/host/sysfs.c  | 4 ++--
 include/linux/nvme-auth.h  | 2 +-
 4 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c
index d35523d0a017..2f83c9ddea5e 100644
--- a/drivers/nvme/common/auth.c
+++ b/drivers/nvme/common/auth.c
@@ -436,7 +436,7 @@ int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm,
 }
 EXPORT_SYMBOL_GPL(nvme_auth_gen_shared_secret);
 
-int nvme_auth_generate_key(const char *secret, struct nvme_dhchap_key **ret_key)
+int nvme_auth_parse_key(const char *secret, struct nvme_dhchap_key **ret_key)
 {
 	struct nvme_dhchap_key *key;
 	u8 key_hash;
@@ -459,7 +459,7 @@ int nvme_auth_generate_key(const char *secret, struct nvme_dhchap_key **ret_key)
 	*ret_key = key;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(nvme_auth_generate_key);
+EXPORT_SYMBOL_GPL(nvme_auth_parse_key);
 
 /**
  * nvme_auth_generate_psk - Generate a PSK for TLS
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index d0d0a9d5a871..47a1525e876e 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -1072,12 +1072,11 @@ int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl)
 	INIT_WORK(&ctrl->dhchap_auth_work, nvme_ctrl_auth_work);
 	if (!ctrl->opts)
 		return 0;
-	ret = nvme_auth_generate_key(ctrl->opts->dhchap_secret,
-			&ctrl->host_key);
+	ret = nvme_auth_parse_key(ctrl->opts->dhchap_secret, &ctrl->host_key);
 	if (ret)
 		return ret;
-	ret = nvme_auth_generate_key(ctrl->opts->dhchap_ctrl_secret,
-			&ctrl->ctrl_key);
+	ret = nvme_auth_parse_key(ctrl->opts->dhchap_ctrl_secret,
+				  &ctrl->ctrl_key);
 	if (ret)
 		goto err_free_dhchap_secret;
 
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index 16c6fea4b2db..45422d4274de 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -658,7 +658,7 @@ static ssize_t nvme_ctrl_dhchap_secret_store(struct device *dev,
 		struct nvme_dhchap_key *key, *host_key;
 		int ret;
 
-		ret = nvme_auth_generate_key(dhchap_secret, &key);
+		ret = nvme_auth_parse_key(dhchap_secret, &key);
 		if (ret) {
 			kfree(dhchap_secret);
 			return ret;
@@ -716,7 +716,7 @@ static ssize_t nvme_ctrl_dhchap_ctrl_secret_store(struct device *dev,
 		struct nvme_dhchap_key *key, *ctrl_key;
 		int ret;
 
-		ret = nvme_auth_generate_key(dhchap_secret, &key);
+		ret = nvme_auth_parse_key(dhchap_secret, &key);
 		if (ret) {
 			kfree(dhchap_secret);
 			return ret;
diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h
index a4b248c24ccf..02ca9a716256 100644
--- a/include/linux/nvme-auth.h
+++ b/include/linux/nvme-auth.h
@@ -30,7 +30,7 @@ void nvme_auth_free_key(struct nvme_dhchap_key *key);
 struct nvme_dhchap_key *nvme_auth_alloc_key(u32 len, u8 hash);
 struct nvme_dhchap_key *nvme_auth_transform_key(
 		const struct nvme_dhchap_key *key, const char *nqn);
-int nvme_auth_generate_key(const char *secret, struct nvme_dhchap_key **ret_key);
+int nvme_auth_parse_key(const char *secret, struct nvme_dhchap_key **ret_key);
 int nvme_auth_augmented_challenge(u8 hmac_id, const u8 *skey, size_t skey_len,
 				  const u8 *challenge, u8 *aug, size_t hlen);
 int nvme_auth_gen_privkey(struct crypto_kpp *dh_tfm, u8 dh_gid);
-- 
cgit v1.2.3


From 4263ca1cae5cebc09ba95375c4a8927bf4b39d49 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Sun, 1 Mar 2026 23:59:45 -0800
Subject: nvme-auth: common: add HMAC helper functions

Add some helper functions for computing HMAC-SHA256, HMAC-SHA384, or
HMAC-SHA512 values using the crypto library instead of crypto_shash.
These will enable some significant simplifications and performance
improvements in nvme-auth.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/common/Kconfig |  2 ++
 drivers/nvme/common/auth.c  | 66 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nvme-auth.h   | 14 ++++++++++
 3 files changed, 82 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/nvme/common/Kconfig b/drivers/nvme/common/Kconfig
index d19988c13af5..1ec507d1f9b5 100644
--- a/drivers/nvme/common/Kconfig
+++ b/drivers/nvme/common/Kconfig
@@ -13,6 +13,8 @@ config NVME_AUTH
 	select CRYPTO_DH
 	select CRYPTO_DH_RFC7919_GROUPS
 	select CRYPTO_HKDF
+	select CRYPTO_LIB_SHA256
+	select CRYPTO_LIB_SHA512
 
 config NVME_AUTH_KUNIT_TEST
 	tristate "KUnit tests for NVMe authentication" if !KUNIT_ALL_TESTS
diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c
index 9e33fc02cf51..00f21176181f 100644
--- a/drivers/nvme/common/auth.c
+++ b/drivers/nvme/common/auth.c
@@ -12,6 +12,7 @@
 #include <crypto/hash.h>
 #include <crypto/dh.h>
 #include <crypto/hkdf.h>
+#include <crypto/sha2.h>
 #include <linux/nvme.h>
 #include <linux/nvme-auth.h>
 
@@ -234,6 +235,71 @@ void nvme_auth_free_key(struct nvme_dhchap_key *key)
 }
 EXPORT_SYMBOL_GPL(nvme_auth_free_key);
 
+/*
+ * Start computing an HMAC value, given the algorithm ID and raw key.
+ *
+ * The context should be zeroized at the end of its lifetime.  The caller can do
+ * that implicitly by calling nvme_auth_hmac_final(), or explicitly (needed when
+ * a context is abandoned without finalizing it) by calling memzero_explicit().
+ */
+int nvme_auth_hmac_init(struct nvme_auth_hmac_ctx *hmac, u8 hmac_id,
+			const u8 *key, size_t key_len)
+{
+	hmac->hmac_id = hmac_id;
+	switch (hmac_id) {
+	case NVME_AUTH_HASH_SHA256:
+		hmac_sha256_init_usingrawkey(&hmac->sha256, key, key_len);
+		return 0;
+	case NVME_AUTH_HASH_SHA384:
+		hmac_sha384_init_usingrawkey(&hmac->sha384, key, key_len);
+		return 0;
+	case NVME_AUTH_HASH_SHA512:
+		hmac_sha512_init_usingrawkey(&hmac->sha512, key, key_len);
+		return 0;
+	}
+	pr_warn("%s: invalid hash algorithm %d\n", __func__, hmac_id);
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(nvme_auth_hmac_init);
+
+void nvme_auth_hmac_update(struct nvme_auth_hmac_ctx *hmac, const u8 *data,
+			   size_t data_len)
+{
+	switch (hmac->hmac_id) {
+	case NVME_AUTH_HASH_SHA256:
+		hmac_sha256_update(&hmac->sha256, data, data_len);
+		return;
+	case NVME_AUTH_HASH_SHA384:
+		hmac_sha384_update(&hmac->sha384, data, data_len);
+		return;
+	case NVME_AUTH_HASH_SHA512:
+		hmac_sha512_update(&hmac->sha512, data, data_len);
+		return;
+	}
+	/* Unreachable because nvme_auth_hmac_init() validated hmac_id */
+	WARN_ON_ONCE(1);
+}
+EXPORT_SYMBOL_GPL(nvme_auth_hmac_update);
+
+/* Finish computing an HMAC value.  Note that this zeroizes the HMAC context. */
+void nvme_auth_hmac_final(struct nvme_auth_hmac_ctx *hmac, u8 *out)
+{
+	switch (hmac->hmac_id) {
+	case NVME_AUTH_HASH_SHA256:
+		hmac_sha256_final(&hmac->sha256, out);
+		return;
+	case NVME_AUTH_HASH_SHA384:
+		hmac_sha384_final(&hmac->sha384, out);
+		return;
+	case NVME_AUTH_HASH_SHA512:
+		hmac_sha512_final(&hmac->sha512, out);
+		return;
+	}
+	/* Unreachable because nvme_auth_hmac_init() validated hmac_id */
+	WARN_ON_ONCE(1);
+}
+EXPORT_SYMBOL_GPL(nvme_auth_hmac_final);
+
 struct nvme_dhchap_key *nvme_auth_transform_key(
 		const struct nvme_dhchap_key *key, const char *nqn)
 {
diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h
index 02ca9a716256..940d0703eb1d 100644
--- a/include/linux/nvme-auth.h
+++ b/include/linux/nvme-auth.h
@@ -7,6 +7,7 @@
 #define _NVME_AUTH_H
 
 #include <crypto/kpp.h>
+#include <crypto/sha2.h>
 
 struct nvme_dhchap_key {
 	size_t len;
@@ -23,6 +24,19 @@ const char *nvme_auth_hmac_name(u8 hmac_id);
 const char *nvme_auth_digest_name(u8 hmac_id);
 size_t nvme_auth_hmac_hash_len(u8 hmac_id);
 u8 nvme_auth_hmac_id(const char *hmac_name);
+struct nvme_auth_hmac_ctx {
+	u8 hmac_id;
+	union {
+		struct hmac_sha256_ctx sha256;
+		struct hmac_sha384_ctx sha384;
+		struct hmac_sha512_ctx sha512;
+	};
+};
+int nvme_auth_hmac_init(struct nvme_auth_hmac_ctx *hmac, u8 hmac_id,
+			const u8 *key, size_t key_len);
+void nvme_auth_hmac_update(struct nvme_auth_hmac_ctx *hmac, const u8 *data,
+			   size_t data_len);
+void nvme_auth_hmac_final(struct nvme_auth_hmac_ctx *hmac, u8 *out);
 
 u32 nvme_auth_key_struct_size(u32 key_len);
 struct nvme_dhchap_key *nvme_auth_extract_key(const char *secret, u8 key_hash);
-- 
cgit v1.2.3


From 844d950bb2cb1fc5b8973369de59cbfb7eecd94d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Sun, 1 Mar 2026 23:59:57 -0800
Subject: nvme-auth: common: remove nvme_auth_digest_name()

Since nvme_auth_digest_name() is no longer used, remove it and the
associated data from the hash_map array.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/common/auth.c | 12 ------------
 include/linux/nvme-auth.h  |  1 -
 2 files changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c
index 5be86629c2d4..2d325fb93083 100644
--- a/drivers/nvme/common/auth.c
+++ b/drivers/nvme/common/auth.c
@@ -89,22 +89,18 @@ EXPORT_SYMBOL_GPL(nvme_auth_dhgroup_id);
 static const struct nvme_dhchap_hash_map {
 	int len;
 	char hmac[15];
-	char digest[8];
 } hash_map[] = {
 	[NVME_AUTH_HASH_SHA256] = {
 		.len = 32,
 		.hmac = "hmac(sha256)",
-		.digest = "sha256",
 	},
 	[NVME_AUTH_HASH_SHA384] = {
 		.len = 48,
 		.hmac = "hmac(sha384)",
-		.digest = "sha384",
 	},
 	[NVME_AUTH_HASH_SHA512] = {
 		.len = 64,
 		.hmac = "hmac(sha512)",
-		.digest = "sha512",
 	},
 };
 
@@ -116,14 +112,6 @@ const char *nvme_auth_hmac_name(u8 hmac_id)
 }
 EXPORT_SYMBOL_GPL(nvme_auth_hmac_name);
 
-const char *nvme_auth_digest_name(u8 hmac_id)
-{
-	if (hmac_id >= ARRAY_SIZE(hash_map))
-		return NULL;
-	return hash_map[hmac_id].digest;
-}
-EXPORT_SYMBOL_GPL(nvme_auth_digest_name);
-
 u8 nvme_auth_hmac_id(const char *hmac_name)
 {
 	int i;
diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h
index 940d0703eb1d..184a1f9510fa 100644
--- a/include/linux/nvme-auth.h
+++ b/include/linux/nvme-auth.h
@@ -21,7 +21,6 @@ const char *nvme_auth_dhgroup_kpp(u8 dhgroup_id);
 u8 nvme_auth_dhgroup_id(const char *dhgroup_name);
 
 const char *nvme_auth_hmac_name(u8 hmac_id);
-const char *nvme_auth_digest_name(u8 hmac_id);
 size_t nvme_auth_hmac_hash_len(u8 hmac_id);
 u8 nvme_auth_hmac_id(const char *hmac_name);
 struct nvme_auth_hmac_ctx {
-- 
cgit v1.2.3


From ac61e869bef13a43d624893559acbac3a4e2a341 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Fri, 27 Feb 2026 13:23:46 -0700
Subject: nvme: add preferred I/O size fields to struct nvme_id_ns_nvm

A subsequent change will use the NPDGL and NPDAL fields of the NVM
Command Set Specific Identify Namespace structure, so add them (and the
handful of intervening fields) to struct nvme_id_ns_nvm. Add an
assertion that the size is still 4 KB.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index edfebbce6745..ec011dce4a97 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -513,9 +513,16 @@ struct nvme_id_ns_nvm {
 	__u8	pic;
 	__u8	rsvd9[3];
 	__le32	elbaf[64];
-	__u8	rsvd268[3828];
+	__le32	npdgl;
+	__le32	nprg;
+	__le32	npra;
+	__le32	nors;
+	__le32	npdal;
+	__u8	rsvd288[3808];
 };
 
+static_assert(sizeof(struct nvme_id_ns_nvm) == 4096);
+
 enum {
 	NVME_ID_NS_NVM_STS_MASK		= 0x7f,
 	NVME_ID_NS_NVM_GUARD_SHIFT	= 7,
-- 
cgit v1.2.3


From d3c04a6ea5fd7a3d81f7c80880125108df9a4cbd Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Fri, 27 Feb 2026 13:23:48 -0700
Subject: nvme: update nvme_id_ns OPTPERF constants

In NVMe verson 2.0 and below, OPTPERF comprises only bit 4 of NSFEAT in
the Identify Namespace structure. Since version 2.1, OPTPERF includes
both bits 4 and 5 of NSFEAT. Replace the NVME_NS_FEAT_IO_OPT constant
with NVME_NS_FEAT_OPTPERF_SHIFT, NVME_NS_FEAT_OPTPERF_MASK, and
NVME_NS_FEAT_OPTPERF_MASK_2_1, representing the first bit, pre-2.1 bit
width, and post-2.1 bit width of OPTPERF.

Update nvme_update_disk_info() to check both OPTPERF bits for
controllers that report version 2.1 or newer, as NPWG and NOWS are
supported even if only bit 5 is set.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/core.c | 8 +++++++-
 include/linux/nvme.h     | 6 +++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index da477b502762..04a8dae12333 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2066,6 +2066,7 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
 	u32 bs = 1U << head->lba_shift;
 	u32 atomic_bs, phys_bs, io_opt = 0;
 	bool valid = true;
+	u8 optperf;
 
 	/*
 	 * The block layer can't support LBA sizes larger than the page size
@@ -2080,7 +2081,12 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
 	phys_bs = bs;
 	atomic_bs = nvme_configure_atomic_write(ns, id, lim, bs);
 
-	if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
+	optperf = id->nsfeat >> NVME_NS_FEAT_OPTPERF_SHIFT;
+	if (ctrl->vs >= NVME_VS(2, 1, 0))
+		optperf &= NVME_NS_FEAT_OPTPERF_MASK_2_1;
+	else
+		optperf &= NVME_NS_FEAT_OPTPERF_MASK;
+	if (optperf) {
 		/* NPWG = Namespace Preferred Write Granularity */
 		phys_bs = bs * (1 + le16_to_cpu(id->npwg));
 		/* NOWS = Namespace Optimal Write Size */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index ec011dce4a97..2b66a86d7da6 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -597,7 +597,11 @@ enum {
 enum {
 	NVME_NS_FEAT_THIN	= 1 << 0,
 	NVME_NS_FEAT_ATOMICS	= 1 << 1,
-	NVME_NS_FEAT_IO_OPT	= 1 << 4,
+	NVME_NS_FEAT_OPTPERF_SHIFT = 4,
+	/* In NVMe version 2.0 and below, OPTPERF is only bit 4 of NSFEAT */
+	NVME_NS_FEAT_OPTPERF_MASK = 0x1,
+	/* Since version 2.1, OPTPERF is bits 4 and 5 of NSFEAT */
+	NVME_NS_FEAT_OPTPERF_MASK_2_1 = 0x3,
 	NVME_NS_ATTR_RO		= 1 << 0,
 	NVME_NS_FLBAS_LBA_MASK	= 0xf,
 	NVME_NS_FLBAS_LBA_UMASK	= 0x60,
-- 
cgit v1.2.3


From 09e8f0f93491c6be867f32d4edc0b16fb5da785e Mon Sep 17 00:00:00 2001
From: Alistair Francis <alistair.francis@wdc.com>
Date: Fri, 20 Mar 2026 10:20:44 +1000
Subject: nvme: Add the DHCHAP maximum HD IDs

In preperation for using DHCHAP length in upcoming host and target
patches let's add the hash and diffie-hellman ID length macros.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Yunje Shin <ioerts@kookmin.ac.kr>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Chris Leech <cleech@redhat.com>
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2b66a86d7da6..041f30931a90 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -2348,4 +2348,8 @@ enum nvme_pr_change_ptpl {
 
 #define NVME_PR_IGNORE_KEY (1 << 3)
 
+/* Section 8.3.4.5.2 of the NVMe 2.1 */
+#define NVME_AUTH_DHCHAP_MAX_HASH_IDS 30
+#define NVME_AUTH_DHCHAP_MAX_DH_IDS 30
+
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From f699bcc8bcdf99565928a7b1fc7ee656f6c81815 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Date: Tue, 24 Mar 2026 18:56:25 +0200
Subject: resource: Pass full extent of empty space to resource_alignf callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

__find_resource_space() calculates the full extent of empty space but only
passes the aligned space to resource_alignf callback. In some situations,
the callback may choose take advantage of the free space before the
requested alignment.

Pass the full extent of the calculated empty space to resource_alignf
callback as an additional parameter.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Xifer <xiferdev@gmail.com>
Link: https://patch.msgid.link/20260324165633.4583-3-ilpo.jarvinen@linux.intel.com
---
 arch/alpha/kernel/pci.c          | 1 +
 arch/arm/kernel/bios32.c         | 4 +++-
 arch/m68k/kernel/pcibios.c       | 4 +++-
 arch/mips/pci/pci-generic.c      | 3 ++-
 arch/mips/pci/pci-legacy.c       | 1 +
 arch/parisc/kernel/pci.c         | 4 +++-
 arch/powerpc/kernel/pci-common.c | 4 +++-
 arch/s390/pci/pci.c              | 1 +
 arch/sh/drivers/pci/pci.c        | 4 +++-
 arch/x86/pci/i386.c              | 3 ++-
 arch/xtensa/kernel/pci.c         | 1 +
 drivers/pci/setup-res.c          | 3 ++-
 drivers/pcmcia/rsrc_nonstatic.c  | 3 ++-
 include/linux/ioport.h           | 2 ++
 include/linux/pci.h              | 7 ++++---
 kernel/resource.c                | 3 ++-
 16 files changed, 35 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 51a8a4c4572a..11df411b1d18 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -125,6 +125,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final);
 
 resource_size_t
 pcibios_align_resource(void *data, const struct resource *res,
+		       const struct resource *empty_res,
 		       resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index b5793e8fbdc1..5b9b4fcd0e54 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -560,7 +560,9 @@ char * __init pcibios_setup(char *str)
  * which might be mirrored at 0x0100-0x03ff..
  */
 resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-				resource_size_t size, resource_size_t align)
+				       const struct resource *empty_res,
+				       resource_size_t size,
+				       resource_size_t align)
 {
 	struct pci_dev *dev = data;
 	resource_size_t start = res->start;
diff --git a/arch/m68k/kernel/pcibios.c b/arch/m68k/kernel/pcibios.c
index e6ab3f9ff5d8..1415f6e4e5ce 100644
--- a/arch/m68k/kernel/pcibios.c
+++ b/arch/m68k/kernel/pcibios.c
@@ -27,7 +27,9 @@
  * which might be mirrored at 0x0100-0x03ff..
  */
 resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-	resource_size_t size, resource_size_t align)
+				       const struct resource *empty_res,
+				       resource_size_t size,
+				       resource_size_t align)
 {
 	resource_size_t start = res->start;
 
diff --git a/arch/mips/pci/pci-generic.c b/arch/mips/pci/pci-generic.c
index d2d68bac3d25..f4957c26efc7 100644
--- a/arch/mips/pci/pci-generic.c
+++ b/arch/mips/pci/pci-generic.c
@@ -22,7 +22,8 @@
  * which might have be mirrored at 0x0100-0x03ff..
  */
 resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-				resource_size_t size, resource_size_t align)
+				       const struct resource *empty_res,
+				       resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
 	resource_size_t start = res->start;
diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c
index d04b7c1294b6..817e97402afe 100644
--- a/arch/mips/pci/pci-legacy.c
+++ b/arch/mips/pci/pci-legacy.c
@@ -52,6 +52,7 @@ unsigned long pci_address_to_pio(phys_addr_t address)
  */
 resource_size_t
 pcibios_align_resource(void *data, const struct resource *res,
+		       const struct resource *empty_res,
 		       resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
diff --git a/arch/parisc/kernel/pci.c b/arch/parisc/kernel/pci.c
index cf285b17a5ae..f99b20795d5a 100644
--- a/arch/parisc/kernel/pci.c
+++ b/arch/parisc/kernel/pci.c
@@ -196,7 +196,9 @@ void __ref pcibios_init_bridge(struct pci_dev *dev)
  * than res->start.
  */
 resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-				resource_size_t size, resource_size_t alignment)
+				       const struct resource *empty_res,
+				       resource_size_t size,
+				       resource_size_t alignment)
 {
 	resource_size_t mask, align, start = res->start;
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index a7a2fb605971..e7bfa15da043 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1132,7 +1132,9 @@ static int skip_isa_ioresource_align(struct pci_dev *dev)
  * which might have be mirrored at 0x0100-0x03ff..
  */
 resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-				resource_size_t size, resource_size_t align)
+				       const struct resource *empty_res,
+				       resource_size_t size,
+				       resource_size_t align)
 {
 	struct pci_dev *dev = data;
 	resource_size_t start = res->start;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 2a430722cbe4..39bd2adfc240 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -266,6 +266,7 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
 }
 
 resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+				       const struct resource *empty_res,
 				       resource_size_t size,
 				       resource_size_t align)
 {
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index a3903304f33f..7a0522316ee3 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -168,7 +168,9 @@ subsys_initcall(pcibios_init);
  * modulo 0x400.
  */
 resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-				resource_size_t size, resource_size_t align)
+				       const struct resource *empty_res,
+				       resource_size_t size,
+				       resource_size_t align)
 {
 	struct pci_dev *dev = data;
 	struct pci_channel *hose = dev->sysdata;
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index c4ec39ad276b..6fbd4b34c3f7 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -153,7 +153,8 @@ skip_isa_ioresource_align(struct pci_dev *dev) {
  */
 resource_size_t
 pcibios_align_resource(void *data, const struct resource *res,
-			resource_size_t size, resource_size_t align)
+		       const struct resource *empty_res,
+		       resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
 	resource_size_t start = res->start;
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index 62c900e400d6..64ccb7e0d92f 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -39,6 +39,7 @@
  */
 resource_size_t
 pcibios_align_resource(void *data, const struct resource *res,
+		       const struct resource *empty_res,
 		       resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index bb2aef373d6f..c375e255c509 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -251,10 +251,11 @@ static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev,
  */
 resource_size_t __weak pcibios_align_resource(void *data,
 					      const struct resource *res,
+					      const struct resource *empty_res,
 					      resource_size_t size,
 					      resource_size_t align)
 {
-       return res->start;
+	return res->start;
 }
 
 static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c
index 0679dd434719..949e69921fe9 100644
--- a/drivers/pcmcia/rsrc_nonstatic.c
+++ b/drivers/pcmcia/rsrc_nonstatic.c
@@ -602,7 +602,8 @@ static resource_size_t pcmcia_common_align(struct pcmcia_align_data *align_data,
 
 static resource_size_t
 pcmcia_align(void *align_data, const struct resource *res,
-	resource_size_t size, resource_size_t align)
+	     const struct resource *empty_res,
+	     resource_size_t size, resource_size_t align)
 {
 	struct pcmcia_align_data *data = align_data;
 	struct resource_map *m;
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 19d5e04564d9..3c73c9c0d4f7 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -202,6 +202,7 @@ enum {
  * typedef resource_alignf - Resource alignment callback
  * @data:	Private data used by the callback
  * @res:	Resource candidate range (an empty resource space)
+ * @empty_res:	Empty resource range without alignment applied
  * @size:	The minimum size of the empty space
  * @align:	Alignment from the constraints
  *
@@ -212,6 +213,7 @@ enum {
  */
 typedef resource_size_t (*resource_alignf)(void *data,
 					   const struct resource *res,
+					   const struct resource *empty_res,
 					   resource_size_t size,
 					   resource_size_t align);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1c270f1d5123..ac332ff9da9f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1206,9 +1206,10 @@ int __must_check pcibios_enable_device(struct pci_dev *, int mask);
 char *pcibios_setup(char *str);
 
 /* Used only when drivers/pci/setup.c is used */
-resource_size_t pcibios_align_resource(void *, const struct resource *,
-				resource_size_t,
-				resource_size_t);
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+				       const struct resource *empty_res,
+				       resource_size_t size,
+				       resource_size_t align);
 
 /* Generic PCI functions used internally */
 
diff --git a/kernel/resource.c b/kernel/resource.c
index 1e2f1dfc0edd..1b8d3101bdc6 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -759,7 +759,8 @@ static int __find_resource_space(struct resource *root, struct resource *old,
 			alloc.flags = avail.flags;
 			if (alignf) {
 				alloc.start = alignf(constraint->alignf_data,
-						     &avail, size, constraint->align);
+						     &avail, &tmp,
+						     size, constraint->align);
 			} else {
 				alloc.start = avail.start;
 			}
-- 
cgit v1.2.3


From 9036bd0efcb6162a77f3bf9bacbafba7686c7275 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Date: Tue, 24 Mar 2026 18:56:32 +0200
Subject: PCI: Align head space better
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a bridge window contains big and small resource(s), the small
resource(s) may not amount to the half of the size of the big resource
which would allow calculate_head_align() to shrink the head alignment.
This results in always placing the small resource(s) after the big
resource.

In general, it would be good to be able to place the small resource(s)
before the big resource to achieve better utilization of the address space.
In the cases where the large resource can only fit at the end of the
window, it is even required.

However, carrying the information over from pbus_size_mem() and
calculate_head_align() to __pci_assign_resource() and
pcibios_align_resource() is not easy with the current data structures.

A somewhat hacky way to move the non-aligning tail part to the head is
possible within pcibios_align_resource(). The free space between the start
of the free space span and the aligned start address can be compared with
the non-aligning remainder of the size. If the free space is larger than
the remainder, placing the remainder before the start address is possible.
This relocation should generally work, because PCI resources consist only
power-of-2 atoms.

Various arch requirements may still need to override the relocation, so the
relocation is only applied selectively in such cases.

Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221205
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Xifer <xiferdev@gmail.com>
Link: https://patch.msgid.link/20260324165633.4583-10-ilpo.jarvinen@linux.intel.com
---
 arch/arm/kernel/bios32.c         |  3 +++
 arch/m68k/kernel/pcibios.c       |  4 ++++
 arch/mips/pci/pci-generic.c      |  3 +++
 arch/mips/pci/pci-legacy.c       |  2 ++
 arch/parisc/kernel/pci.c         |  3 +++
 arch/powerpc/kernel/pci-common.c |  2 ++
 arch/sh/drivers/pci/pci.c        |  2 ++
 arch/x86/pci/i386.c              |  2 ++
 arch/xtensa/kernel/pci.c         |  2 ++
 drivers/pci/setup-res.c          | 39 ++++++++++++++++++++++++++++++++++++++-
 include/linux/pci.h              |  5 +++++
 kernel/resource.c                |  2 +-
 12 files changed, 67 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index cedb83a85dd9..ac0e890510da 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -577,6 +577,9 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 		return host_bridge->align_resource(dev, res,
 				start, size, align);
 
+	if (res->flags & IORESOURCE_MEM)
+		return pci_align_resource(dev, res, empty_res, size, align);
+
 	return start;
 }
 
diff --git a/arch/m68k/kernel/pcibios.c b/arch/m68k/kernel/pcibios.c
index 7e286ee1976b..7a9e60df79c5 100644
--- a/arch/m68k/kernel/pcibios.c
+++ b/arch/m68k/kernel/pcibios.c
@@ -31,11 +31,15 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				       resource_size_t size,
 				       resource_size_t align)
 {
+	struct pci_dev *dev = data;
 	resource_size_t start = res->start;
 
 	if ((res->flags & IORESOURCE_IO) && (start & 0x300))
 		start = (start + 0x3ff) & ~0x3ff;
 
+	if (res->flags & IORESOURCE_MEM)
+		return pci_align_resource(dev, res, empty_res, size, align);
+
 	return start;
 }
 
diff --git a/arch/mips/pci/pci-generic.c b/arch/mips/pci/pci-generic.c
index aaa1d6de8bef..c2e23d0c1d77 100644
--- a/arch/mips/pci/pci-generic.c
+++ b/arch/mips/pci/pci-generic.c
@@ -38,6 +38,9 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 		return host_bridge->align_resource(dev, res,
 				start, size, align);
 
+	if (res->flags & IORESOURCE_MEM)
+		return pci_align_resource(dev, res, empty_res, size, align);
+
 	return start;
 }
 
diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c
index 817e97402afe..dae6dafdd6e0 100644
--- a/arch/mips/pci/pci-legacy.c
+++ b/arch/mips/pci/pci-legacy.c
@@ -70,6 +70,8 @@ pcibios_align_resource(void *data, const struct resource *res,
 		if (start & 0x300)
 			start = (start + 0x3ff) & ~0x3ff;
 	} else if (res->flags & IORESOURCE_MEM) {
+		start = pci_align_resource(dev, res, empty_res, size, align);
+
 		/* Make sure we start at our min on all hoses */
 		if (start < PCIBIOS_MIN_MEM + hose->mem_resource->start)
 			start = PCIBIOS_MIN_MEM + hose->mem_resource->start;
diff --git a/arch/parisc/kernel/pci.c b/arch/parisc/kernel/pci.c
index f50be1a63c4c..b8007c7400d4 100644
--- a/arch/parisc/kernel/pci.c
+++ b/arch/parisc/kernel/pci.c
@@ -201,6 +201,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				       resource_size_t size,
 				       resource_size_t alignment)
 {
+	struct pci_dev *dev = data;
 	resource_size_t align, start = res->start;
 
 	DBG_RES("pcibios_align_resource(%s, (%p) [%lx,%lx]/%x, 0x%lx, 0x%lx)\n",
@@ -212,6 +213,8 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 	align = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
 	if (align > alignment)
 		start = ALIGN(start, align);
+	else
+		start = pci_align_resource(dev, res, empty_res, size, alignment);
 
 	return start;
 }
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index e7bfa15da043..8efe95a0c4ff 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1144,6 +1144,8 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 			return start;
 		if (start & 0x300)
 			start = (start + 0x3ff) & ~0x3ff;
+	} else if (res->flags & IORESOURCE_MEM) {
+		start = pci_align_resource(dev, res, empty_res, size, align);
 	}
 
 	return start;
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 7a0522316ee3..878a27a1acfb 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -185,6 +185,8 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 		 */
 		if (start & 0x300)
 			start = (start + 0x3ff) & ~0x3ff;
+	} else if (res->flags & IORESOURCE_MEM) {
+		start = pci_align_resource(dev, res, empty_res, size, align);
 	}
 
 	return start;
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 6fbd4b34c3f7..e2de26b82940 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -165,6 +165,8 @@ pcibios_align_resource(void *data, const struct resource *res,
 		if (start & 0x300)
 			start = (start + 0x3ff) & ~0x3ff;
 	} else if (res->flags & IORESOURCE_MEM) {
+		start = pci_align_resource(dev, res, empty_res, size, align);
+
 		/* The low 1MB range is reserved for ISA cards */
 		if (start < BIOS_END)
 			start = BIOS_END;
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index 64ccb7e0d92f..305031551136 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -54,6 +54,8 @@ pcibios_align_resource(void *data, const struct resource *res,
 
 		if (start & 0x300)
 			start = (start + 0x3ff) & ~0x3ff;
+	} else if (res->flags & IORESOURCE_MEM) {
+		start = pci_align_resource(dev, res, empty_res, size, align);
 	}
 
 	return start;
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index c375e255c509..fbc05cda96ee 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -244,6 +244,41 @@ static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev,
 	return 0;
 }
 
+/*
+ * For mem bridge windows, try to relocate tail remainder space to space
+ * before res->start if there's enough free space there. This enables
+ * tighter packing for resources.
+ */
+resource_size_t pci_align_resource(struct pci_dev *dev,
+				   const struct resource *res,
+				   const struct resource *empty_res,
+				   resource_size_t size,
+				   resource_size_t align)
+{
+	resource_size_t remainder, start_addr;
+
+	if (!(res->flags & IORESOURCE_MEM))
+		return res->start;
+
+	if (IS_ALIGNED(size, align))
+		return res->start;
+
+	remainder = size - ALIGN_DOWN(size, align);
+	/* Don't mess with size that doesn't align with window size granularity */
+	if (!IS_ALIGNED(remainder, pci_min_window_alignment(dev->bus, res->flags)))
+		return res->start;
+	/* Try to place remainder that doesn't fill align before */
+	if (res->start < remainder)
+		return res->start;
+	start_addr = res->start - remainder;
+	if (empty_res->start > start_addr)
+		return res->start;
+
+	pci_dbg(dev, "%pR: moving candidate start address below align to %llx\n",
+		res, (unsigned long long)start_addr);
+	return start_addr;
+}
+
 /*
  * We don't have to worry about legacy ISA devices, so nothing to do here.
  * This is marked as __weak because multiple architectures define it; it should
@@ -255,7 +290,9 @@ resource_size_t __weak pcibios_align_resource(void *data,
 					      resource_size_t size,
 					      resource_size_t align)
 {
-	return res->start;
+	struct pci_dev *dev = data;
+
+	return pci_align_resource(dev, res, empty_res, size, align);
 }
 
 static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ac332ff9da9f..cedf948dc614 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1210,6 +1210,11 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				       const struct resource *empty_res,
 				       resource_size_t size,
 				       resource_size_t align);
+resource_size_t pci_align_resource(struct pci_dev *dev,
+				   const struct resource *res,
+				   const struct resource *empty_res,
+				   resource_size_t size,
+				   resource_size_t align);
 
 /* Generic PCI functions used internally */
 
diff --git a/kernel/resource.c b/kernel/resource.c
index 8c5fcb30fc33..d02a53fb95d8 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -766,7 +766,7 @@ static int __find_resource_space(struct resource *root, struct resource *old,
 			}
 			alloc.end = alloc.start + size - 1;
 			if (alloc.start <= alloc.end &&
-			    __resource_contains_unbound(&avail, &alloc)) {
+			    __resource_contains_unbound(&full_avail, &alloc)) {
 				new->start = alloc.start;
 				new->end = alloc.end;
 				return 0;
-- 
cgit v1.2.3


From 09e61daf8e96b9bdb04dd112bdecf9382fd3f919 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 13 Mar 2026 14:45:51 +0000
Subject: arm_mpam: resctrl: Add boilerplate cpuhp and domain allocation

resctrl has its own data structures to describe its resources. We can't use
these directly as we play tricks with the 'MBA' resource, picking the MPAM
controls or monitors that best apply. We may export the same component as
both L3 and MBA.

Add mpam_resctrl_res[] as the array of class->resctrl mappings we are
exporting, and add the cpuhp hooks that allocated and free the resctrl
domain structures. Only the mpam control feature are considered here and
monitor support will be added later.

While we're here, plumb in a few other obvious things.

CONFIG_ARM_CPU_RESCTRL is used to allow this code to be built even though
it can't yet be linked against resctrl.

Tested-by: Gavin Shan <gshan@redhat.com>
Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Tested-by: Peter Newman <peternewman@google.com>
Tested-by: Zeng Heng <zengheng4@huawei.com>
Tested-by: Punit Agrawal <punit.agrawal@oss.qualcomm.com>
Tested-by: Jesse Chick <jessechick@os.amperecomputing.com>
Reviewed-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Co-developed-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
 drivers/resctrl/Makefile        |   1 +
 drivers/resctrl/mpam_devices.c  |  12 ++
 drivers/resctrl/mpam_internal.h |  21 +++
 drivers/resctrl/mpam_resctrl.c  | 324 ++++++++++++++++++++++++++++++++++++++++
 include/linux/arm_mpam.h        |   3 +
 5 files changed, 361 insertions(+)
 create mode 100644 drivers/resctrl/mpam_resctrl.c

(limited to 'include/linux')

diff --git a/drivers/resctrl/Makefile b/drivers/resctrl/Makefile
index 898199dcf80d..40beaf999582 100644
--- a/drivers/resctrl/Makefile
+++ b/drivers/resctrl/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_ARM64_MPAM_DRIVER)			+= mpam.o
 mpam-y						+= mpam_devices.o
+mpam-$(CONFIG_ARM_CPU_RESCTRL)			+= mpam_resctrl.o
 
 ccflags-$(CONFIG_ARM64_MPAM_DRIVER_DEBUG)	+= -DDEBUG
diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
index d50461d6ff3f..0e525539b7e2 100644
--- a/drivers/resctrl/mpam_devices.c
+++ b/drivers/resctrl/mpam_devices.c
@@ -1612,6 +1612,9 @@ static int mpam_cpu_online(unsigned int cpu)
 			mpam_reprogram_msc(msc);
 	}
 
+	if (mpam_is_enabled())
+		return mpam_resctrl_online_cpu(cpu);
+
 	return 0;
 }
 
@@ -1655,6 +1658,9 @@ static int mpam_cpu_offline(unsigned int cpu)
 {
 	struct mpam_msc *msc;
 
+	if (mpam_is_enabled())
+		mpam_resctrl_offline_cpu(cpu);
+
 	guard(srcu)(&mpam_srcu);
 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
 				 srcu_read_lock_held(&mpam_srcu)) {
@@ -2500,6 +2506,12 @@ static void mpam_enable_once(void)
 	mutex_unlock(&mpam_list_lock);
 	cpus_read_unlock();
 
+	if (!err) {
+		err = mpam_resctrl_setup();
+		if (err)
+			pr_err("Failed to initialise resctrl: %d\n", err);
+	}
+
 	if (err) {
 		mpam_disable_reason = "Failed to enable.";
 		schedule_work(&mpam_broken_work);
diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h
index a13fb9880ced..43c8e0f5f7ac 100644
--- a/drivers/resctrl/mpam_internal.h
+++ b/drivers/resctrl/mpam_internal.h
@@ -12,6 +12,7 @@
 #include <linux/jump_label.h>
 #include <linux/llist.h>
 #include <linux/mutex.h>
+#include <linux/resctrl.h>
 #include <linux/spinlock.h>
 #include <linux/srcu.h>
 #include <linux/types.h>
@@ -333,6 +334,16 @@ struct mpam_msc_ris {
 	struct mpam_garbage	garbage;
 };
 
+struct mpam_resctrl_dom {
+	struct mpam_component		*ctrl_comp;
+	struct rdt_ctrl_domain		resctrl_ctrl_dom;
+};
+
+struct mpam_resctrl_res {
+	struct mpam_class	*class;
+	struct rdt_resource	resctrl_res;
+};
+
 static inline int mpam_alloc_csu_mon(struct mpam_class *class)
 {
 	struct mpam_props *cprops = &class->props;
@@ -387,6 +398,16 @@ void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx);
 int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level,
 				   cpumask_t *affinity);
 
+#ifdef CONFIG_RESCTRL_FS
+int mpam_resctrl_setup(void);
+int mpam_resctrl_online_cpu(unsigned int cpu);
+void mpam_resctrl_offline_cpu(unsigned int cpu);
+#else
+static inline int mpam_resctrl_setup(void) { return 0; }
+static inline int mpam_resctrl_online_cpu(unsigned int cpu) { return 0; }
+static inline void mpam_resctrl_offline_cpu(unsigned int cpu) { }
+#endif /* CONFIG_RESCTRL_FS */
+
 /*
  * MPAM MSCs have the following register layout. See:
  * Arm Memory System Resource Partitioning and Monitoring (MPAM) System
diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c
new file mode 100644
index 000000000000..9a3070970414
--- /dev/null
+++ b/drivers/resctrl/mpam_resctrl.c
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2025 Arm Ltd.
+
+#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
+
+#include <linux/arm_mpam.h>
+#include <linux/cacheinfo.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/resctrl.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <asm/mpam.h>
+
+#include "mpam_internal.h"
+
+/*
+ * The classes we've picked to map to resctrl resources, wrapped
+ * in with their resctrl structure.
+ * Class pointer may be NULL.
+ */
+static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES];
+
+#define for_each_mpam_resctrl_control(res, rid)					\
+	for (rid = 0, res = &mpam_resctrl_controls[rid];			\
+	     rid < RDT_NUM_RESOURCES;						\
+	     rid++, res = &mpam_resctrl_controls[rid])
+
+/* The lock for modifying resctrl's domain lists from cpuhp callbacks. */
+static DEFINE_MUTEX(domain_list_lock);
+
+bool resctrl_arch_alloc_capable(void)
+{
+	struct mpam_resctrl_res *res;
+	enum resctrl_res_level rid;
+
+	for_each_mpam_resctrl_control(res, rid) {
+		if (res->resctrl_res.alloc_capable)
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * MSC may raise an error interrupt if it sees an out or range partid/pmg,
+ * and go on to truncate the value. Regardless of what the hardware supports,
+ * only the system wide safe value is safe to use.
+ */
+u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored)
+{
+	return mpam_partid_max + 1;
+}
+
+struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
+{
+	if (l >= RDT_NUM_RESOURCES)
+		return NULL;
+
+	return &mpam_resctrl_controls[l].resctrl_res;
+}
+
+static int mpam_resctrl_control_init(struct mpam_resctrl_res *res)
+{
+	/* TODO: initialise the resctrl resources */
+
+	return 0;
+}
+
+static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp)
+{
+	struct mpam_class *class = comp->class;
+
+	if (class->type == MPAM_CLASS_CACHE)
+		return comp->comp_id;
+
+	/* TODO: repaint domain ids to match the L3 domain ids */
+	/* Otherwise, expose the ID used by the firmware table code. */
+	return comp->comp_id;
+}
+
+static void mpam_resctrl_domain_hdr_init(int cpu, struct mpam_component *comp,
+					 enum resctrl_res_level rid,
+					 struct rdt_domain_hdr *hdr)
+{
+	lockdep_assert_cpus_held();
+
+	INIT_LIST_HEAD(&hdr->list);
+	hdr->id = mpam_resctrl_pick_domain_id(cpu, comp);
+	hdr->rid = rid;
+	cpumask_set_cpu(cpu, &hdr->cpu_mask);
+}
+
+static void mpam_resctrl_online_domain_hdr(unsigned int cpu,
+					   struct rdt_domain_hdr *hdr)
+{
+	lockdep_assert_cpus_held();
+
+	cpumask_set_cpu(cpu, &hdr->cpu_mask);
+}
+
+/**
+ * mpam_resctrl_offline_domain_hdr() - Update the domain header to remove a CPU.
+ * @cpu:	The CPU to remove from the domain.
+ * @hdr:	The domain's header.
+ *
+ * Removes @cpu from the header mask. If this was the last CPU in the domain,
+ * the domain header is removed from its parent list and true is returned,
+ * indicating the parent structure can be freed.
+ * If there are other CPUs in the domain, returns false.
+ */
+static bool mpam_resctrl_offline_domain_hdr(unsigned int cpu,
+					    struct rdt_domain_hdr *hdr)
+{
+	lockdep_assert_held(&domain_list_lock);
+
+	cpumask_clear_cpu(cpu, &hdr->cpu_mask);
+	if (cpumask_empty(&hdr->cpu_mask)) {
+		list_del_rcu(&hdr->list);
+		synchronize_rcu();
+		return true;
+	}
+
+	return false;
+}
+
+static void mpam_resctrl_domain_insert(struct list_head *list,
+				       struct rdt_domain_hdr *new)
+{
+	struct rdt_domain_hdr *err;
+	struct list_head *pos = NULL;
+
+	lockdep_assert_held(&domain_list_lock);
+
+	err = resctrl_find_domain(list, new->id, &pos);
+	if (WARN_ON_ONCE(err))
+		return;
+
+	list_add_tail_rcu(&new->list, pos);
+}
+
+static struct mpam_resctrl_dom *
+mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res)
+{
+	int err;
+	struct mpam_resctrl_dom *dom;
+	struct rdt_ctrl_domain *ctrl_d;
+	struct mpam_class *class = res->class;
+	struct mpam_component *comp_iter, *ctrl_comp;
+	struct rdt_resource *r = &res->resctrl_res;
+
+	lockdep_assert_held(&domain_list_lock);
+
+	ctrl_comp = NULL;
+	guard(srcu)(&mpam_srcu);
+	list_for_each_entry_srcu(comp_iter, &class->components, class_list,
+				 srcu_read_lock_held(&mpam_srcu)) {
+		if (cpumask_test_cpu(cpu, &comp_iter->affinity)) {
+			ctrl_comp = comp_iter;
+			break;
+		}
+	}
+
+	/* class has no component for this CPU */
+	if (WARN_ON_ONCE(!ctrl_comp))
+		return ERR_PTR(-EINVAL);
+
+	dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu));
+	if (!dom)
+		return ERR_PTR(-ENOMEM);
+
+	if (r->alloc_capable) {
+		dom->ctrl_comp = ctrl_comp;
+
+		ctrl_d = &dom->resctrl_ctrl_dom;
+		mpam_resctrl_domain_hdr_init(cpu, ctrl_comp, r->rid, &ctrl_d->hdr);
+		ctrl_d->hdr.type = RESCTRL_CTRL_DOMAIN;
+		err = resctrl_online_ctrl_domain(r, ctrl_d);
+		if (err)
+			goto free_domain;
+
+		mpam_resctrl_domain_insert(&r->ctrl_domains, &ctrl_d->hdr);
+	} else {
+		pr_debug("Skipped control domain online - no controls\n");
+	}
+	return dom;
+
+free_domain:
+	kfree(dom);
+	dom = ERR_PTR(err);
+
+	return dom;
+}
+
+static struct mpam_resctrl_dom *
+mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res)
+{
+	struct mpam_resctrl_dom *dom;
+	struct rdt_resource *r = &res->resctrl_res;
+
+	lockdep_assert_cpus_held();
+
+	list_for_each_entry_rcu(dom, &r->ctrl_domains, resctrl_ctrl_dom.hdr.list) {
+		if (cpumask_test_cpu(cpu, &dom->ctrl_comp->affinity))
+			return dom;
+	}
+
+	return NULL;
+}
+
+int mpam_resctrl_online_cpu(unsigned int cpu)
+{
+	struct mpam_resctrl_res *res;
+	enum resctrl_res_level rid;
+
+	guard(mutex)(&domain_list_lock);
+	for_each_mpam_resctrl_control(res, rid) {
+		struct mpam_resctrl_dom *dom;
+		struct rdt_resource *r = &res->resctrl_res;
+
+		if (!res->class)
+			continue;	// dummy_resource;
+
+		dom = mpam_resctrl_get_domain_from_cpu(cpu, res);
+		if (!dom) {
+			dom = mpam_resctrl_alloc_domain(cpu, res);
+			if (IS_ERR(dom))
+				return PTR_ERR(dom);
+		} else {
+			if (r->alloc_capable) {
+				struct rdt_ctrl_domain *ctrl_d = &dom->resctrl_ctrl_dom;
+
+				mpam_resctrl_online_domain_hdr(cpu, &ctrl_d->hdr);
+			}
+		}
+	}
+
+	resctrl_online_cpu(cpu);
+
+	return 0;
+}
+
+void mpam_resctrl_offline_cpu(unsigned int cpu)
+{
+	struct mpam_resctrl_res *res;
+	enum resctrl_res_level rid;
+
+	resctrl_offline_cpu(cpu);
+
+	guard(mutex)(&domain_list_lock);
+	for_each_mpam_resctrl_control(res, rid) {
+		struct mpam_resctrl_dom *dom;
+		struct rdt_ctrl_domain *ctrl_d;
+		bool ctrl_dom_empty;
+		struct rdt_resource *r = &res->resctrl_res;
+
+		if (!res->class)
+			continue;	// dummy resource
+
+		dom = mpam_resctrl_get_domain_from_cpu(cpu, res);
+		if (WARN_ON_ONCE(!dom))
+			continue;
+
+		if (r->alloc_capable) {
+			ctrl_d = &dom->resctrl_ctrl_dom;
+			ctrl_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr);
+			if (ctrl_dom_empty)
+				resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d);
+		} else {
+			ctrl_dom_empty = true;
+		}
+
+		if (ctrl_dom_empty)
+			kfree(dom);
+	}
+}
+
+int mpam_resctrl_setup(void)
+{
+	int err = 0;
+	struct mpam_resctrl_res *res;
+	enum resctrl_res_level rid;
+
+	cpus_read_lock();
+	for_each_mpam_resctrl_control(res, rid) {
+		INIT_LIST_HEAD_RCU(&res->resctrl_res.ctrl_domains);
+		res->resctrl_res.rid = rid;
+	}
+
+	/* TODO: pick MPAM classes to map to resctrl resources */
+
+	/* Initialise the resctrl structures from the classes */
+	for_each_mpam_resctrl_control(res, rid) {
+		if (!res->class)
+			continue;	// dummy resource
+
+		err = mpam_resctrl_control_init(res);
+		if (err) {
+			pr_debug("Failed to initialise rid %u\n", rid);
+			break;
+		}
+	}
+	cpus_read_unlock();
+
+	if (err) {
+		pr_debug("Internal error %d - resctrl not supported\n", err);
+		return err;
+	}
+
+	if (!resctrl_arch_alloc_capable()) {
+		pr_debug("No alloc(%u) found - resctrl not supported\n",
+			 resctrl_arch_alloc_capable());
+		return -EOPNOTSUPP;
+	}
+
+	/* TODO: call resctrl_init() */
+
+	return 0;
+}
diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h
index 7f00c5285a32..2c7d1413a401 100644
--- a/include/linux/arm_mpam.h
+++ b/include/linux/arm_mpam.h
@@ -49,6 +49,9 @@ static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx,
 }
 #endif
 
+bool resctrl_arch_alloc_capable(void);
+bool resctrl_arch_mon_capable(void);
+
 /**
  * mpam_register_requestor() - Register a requestor with the MPAM driver
  * @partid_max:		The maximum PARTID value the requestor can generate.
-- 
cgit v1.2.3


From 9d2e1a99fae58ce992f147bdf83b5d9089f70b27 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 13 Mar 2026 14:45:56 +0000
Subject: arm_mpam: resctrl: Add plumbing against arm64 task and cpu hooks

arm64 provides helpers for changing a task's and a cpu's mpam partid/pmg
values.

These are used to back a number of resctrl_arch_ functions. Connect them
up.

Tested-by: Gavin Shan <gshan@redhat.com>
Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Tested-by: Peter Newman <peternewman@google.com>
Tested-by: Zeng Heng <zengheng4@huawei.com>
Tested-by: Punit Agrawal <punit.agrawal@oss.qualcomm.com>
Tested-by: Jesse Chick <jessechick@os.amperecomputing.com>
Reviewed-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Co-developed-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
 drivers/resctrl/mpam_resctrl.c | 58 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/arm_mpam.h       |  5 ++++
 2 files changed, 63 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c
index ea60777934ff..9cde5b7e644c 100644
--- a/drivers/resctrl/mpam_resctrl.c
+++ b/drivers/resctrl/mpam_resctrl.c
@@ -8,6 +8,7 @@
 #include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/errno.h>
+#include <linux/limits.h>
 #include <linux/list.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
@@ -34,6 +35,8 @@ static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES];
 /* The lock for modifying resctrl's domain lists from cpuhp callbacks. */
 static DEFINE_MUTEX(domain_list_lock);
 
+static bool cdp_enabled;
+
 bool resctrl_arch_alloc_capable(void)
 {
 	struct mpam_resctrl_res *res;
@@ -57,6 +60,61 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored)
 	return mpam_partid_max + 1;
 }
 
+void resctrl_arch_sched_in(struct task_struct *tsk)
+{
+	lockdep_assert_preemption_disabled();
+
+	mpam_thread_switch(tsk);
+}
+
+void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid)
+{
+	WARN_ON_ONCE(closid > U16_MAX);
+	WARN_ON_ONCE(rmid > U8_MAX);
+
+	if (!cdp_enabled) {
+		mpam_set_cpu_defaults(cpu, closid, closid, rmid, rmid);
+	} else {
+		/*
+		 * When CDP is enabled, resctrl halves the closid range and we
+		 * use odd/even partid for one closid.
+		 */
+		u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
+		u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
+
+		mpam_set_cpu_defaults(cpu, partid_d, partid_i, rmid, rmid);
+	}
+}
+
+void resctrl_arch_sync_cpu_closid_rmid(void *info)
+{
+	struct resctrl_cpu_defaults *r = info;
+
+	lockdep_assert_preemption_disabled();
+
+	if (r) {
+		resctrl_arch_set_cpu_default_closid_rmid(smp_processor_id(),
+							 r->closid, r->rmid);
+	}
+
+	resctrl_arch_sched_in(current);
+}
+
+void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
+{
+	WARN_ON_ONCE(closid > U16_MAX);
+	WARN_ON_ONCE(rmid > U8_MAX);
+
+	if (!cdp_enabled) {
+		mpam_set_task_partid_pmg(tsk, closid, closid, rmid, rmid);
+	} else {
+		u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
+		u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
+
+		mpam_set_task_partid_pmg(tsk, partid_d, partid_i, rmid, rmid);
+	}
+}
+
 struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
 {
 	if (l >= RDT_NUM_RESOURCES)
diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h
index 2c7d1413a401..5a78299ec464 100644
--- a/include/linux/arm_mpam.h
+++ b/include/linux/arm_mpam.h
@@ -52,6 +52,11 @@ static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx,
 bool resctrl_arch_alloc_capable(void);
 bool resctrl_arch_mon_capable(void);
 
+void resctrl_arch_set_cpu_default_closid(int cpu, u32 closid);
+void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid);
+void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid);
+void resctrl_arch_sched_in(struct task_struct *tsk);
+
 /**
  * mpam_register_requestor() - Register a requestor with the MPAM driver
  * @partid_max:		The maximum PARTID value the requestor can generate.
-- 
cgit v1.2.3


From 6789fb99282c0a8e8e84701b7edf456f4a9e71e2 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 13 Mar 2026 14:45:57 +0000
Subject: arm_mpam: resctrl: Add CDP emulation

Intel RDT's CDP feature allows the cache to use a different control value
depending on whether the accesses was for instruction fetch or a data
access. MPAM's equivalent feature is the other way up: the CPU assigns a
different partid label to traffic depending on whether it was instruction
fetch or a data access, which causes the cache to use a different control
value based solely on the partid.

MPAM can emulate CDP, with the side effect that the alternative partid is
seen by all MSC, it can't be enabled per-MSC.

Add the resctrl hooks to turn this on or off. Add the helpers that match a
closid against a task, which need to be aware that the value written to
hardware is not the same as the one resctrl is using.

Update the 'arm64_mpam_global_default' variable the arch code uses during
context switch to know when the per-cpu value should be used instead. Also,
update these per-cpu values and sync the resulting mpam partid/pmg
configuration to hardware.

resctrl can enable CDP for L2 caches, L3 caches or both. When it is enabled
by one and not the other MPAM globally enabled CDP but hides the effect
on the other cache resource. This hiding is possible as CPOR is the only
supported cache control and that uses a resource bitmap; two partids with
the same bitmap act as one.

Awkwardly, the MB controls don't implement CDP and CDP can't be hidden as
the memory bandwidth control is a maximum per partid which can't be
modelled with more partids. If the total maximum is used for both the data
and instruction partids then then the maximum may be exceeded and if it is
split in two then the one using more bandwidth will hit a lower
limit. Hence, hide the MB controls completely if CDP is enabled for any
resource.

Tested-by: Gavin Shan <gshan@redhat.com>
Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Tested-by: Peter Newman <peternewman@google.com>
Tested-by: Zeng Heng <zengheng4@huawei.com>
Tested-by: Punit Agrawal <punit.agrawal@oss.qualcomm.com>
Tested-by: Jesse Chick <jessechick@os.amperecomputing.com>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Amit Singh Tomar <amitsinght@marvell.com>
Reviewed-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Co-developed-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
 arch/arm64/include/asm/mpam.h   |   1 +
 drivers/resctrl/mpam_internal.h |   1 +
 drivers/resctrl/mpam_resctrl.c  | 122 ++++++++++++++++++++++++++++++++++++++++
 include/linux/arm_mpam.h        |   2 +
 4 files changed, 126 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/mpam.h b/arch/arm64/include/asm/mpam.h
index 05aa71200f61..70d396e7b6da 100644
--- a/arch/arm64/include/asm/mpam.h
+++ b/arch/arm64/include/asm/mpam.h
@@ -4,6 +4,7 @@
 #ifndef __ASM__MPAM_H
 #define __ASM__MPAM_H
 
+#include <linux/arm_mpam.h>
 #include <linux/bitfield.h>
 #include <linux/jump_label.h>
 #include <linux/percpu.h>
diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h
index f063a741aaba..2751eeaba302 100644
--- a/drivers/resctrl/mpam_internal.h
+++ b/drivers/resctrl/mpam_internal.h
@@ -342,6 +342,7 @@ struct mpam_resctrl_dom {
 struct mpam_resctrl_res {
 	struct mpam_class	*class;
 	struct rdt_resource	resctrl_res;
+	bool			cdp_enabled;
 };
 
 static inline int mpam_alloc_csu_mon(struct mpam_class *class)
diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c
index 9cde5b7e644c..2111542f485e 100644
--- a/drivers/resctrl/mpam_resctrl.c
+++ b/drivers/resctrl/mpam_resctrl.c
@@ -35,6 +35,10 @@ static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES];
 /* The lock for modifying resctrl's domain lists from cpuhp callbacks. */
 static DEFINE_MUTEX(domain_list_lock);
 
+/*
+ * MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM0_EL1.
+ * This applies globally to all traffic the CPU generates.
+ */
 static bool cdp_enabled;
 
 bool resctrl_arch_alloc_capable(void)
@@ -50,6 +54,74 @@ bool resctrl_arch_alloc_capable(void)
 	return false;
 }
 
+bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid)
+{
+	return mpam_resctrl_controls[rid].cdp_enabled;
+}
+
+/**
+ * resctrl_reset_task_closids() - Reset the PARTID/PMG values for all tasks.
+ *
+ * At boot, all existing tasks use partid zero for D and I.
+ * To enable/disable CDP emulation, all these tasks need relabelling.
+ */
+static void resctrl_reset_task_closids(void)
+{
+	struct task_struct *p, *t;
+
+	read_lock(&tasklist_lock);
+	for_each_process_thread(p, t) {
+		resctrl_arch_set_closid_rmid(t, RESCTRL_RESERVED_CLOSID,
+					     RESCTRL_RESERVED_RMID);
+	}
+	read_unlock(&tasklist_lock);
+}
+
+int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable)
+{
+	u32 partid_i = RESCTRL_RESERVED_CLOSID, partid_d = RESCTRL_RESERVED_CLOSID;
+	int cpu;
+
+	/*
+	 * resctrl_arch_set_cdp_enabled() is only called with enable set to
+	 * false on error and unmount.
+	 */
+	cdp_enabled = enable;
+	mpam_resctrl_controls[rid].cdp_enabled = enable;
+
+	/* The mbw_max feature can't hide cdp as it's a per-partid maximum. */
+	if (cdp_enabled && !mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled)
+		mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = false;
+
+	if (mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled &&
+	    mpam_resctrl_controls[RDT_RESOURCE_MBA].class)
+		mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = true;
+
+	if (enable) {
+		if (mpam_partid_max < 1)
+			return -EINVAL;
+
+		partid_d = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_DATA);
+		partid_i = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_CODE);
+	}
+
+	mpam_set_task_partid_pmg(current, partid_d, partid_i, 0, 0);
+	WRITE_ONCE(arm64_mpam_global_default, mpam_get_regval(current));
+
+	resctrl_reset_task_closids();
+
+	for_each_possible_cpu(cpu)
+		mpam_set_cpu_defaults(cpu, partid_d, partid_i, 0, 0);
+	on_each_cpu(resctrl_arch_sync_cpu_closid_rmid, NULL, 1);
+
+	return 0;
+}
+
+static bool mpam_resctrl_hide_cdp(enum resctrl_res_level rid)
+{
+	return cdp_enabled && !resctrl_arch_get_cdp_enabled(rid);
+}
+
 /*
  * MSC may raise an error interrupt if it sees an out or range partid/pmg,
  * and go on to truncate the value. Regardless of what the hardware supports,
@@ -115,6 +187,30 @@ void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
 	}
 }
 
+bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid)
+{
+	u64 regval = mpam_get_regval(tsk);
+	u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval);
+
+	if (cdp_enabled)
+		tsk_closid >>= 1;
+
+	return tsk_closid == closid;
+}
+
+/* The task's pmg is not unique, the partid must be considered too */
+bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
+{
+	u64 regval = mpam_get_regval(tsk);
+	u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval);
+	u32 tsk_rmid = FIELD_GET(MPAM0_EL1_PMG_D, regval);
+
+	if (cdp_enabled)
+		tsk_closid >>= 1;
+
+	return (tsk_closid == closid) && (tsk_rmid == rmid);
+}
+
 struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
 {
 	if (l >= RDT_NUM_RESOURCES)
@@ -247,6 +343,14 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
 	dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
 	cprops = &res->class->props;
 
+	/*
+	 * When CDP is enabled, but the resource doesn't support it,
+	 * the control is cloned across both partids.
+	 * Pick one at random to read:
+	 */
+	if (mpam_resctrl_hide_cdp(r->rid))
+		type = CDP_DATA;
+
 	partid = resctrl_get_config_index(closid, type);
 	cfg = &dom->ctrl_comp->cfg[partid];
 
@@ -274,6 +378,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
 int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
 			    u32 closid, enum resctrl_conf_type t, u32 cfg_val)
 {
+	int err;
 	u32 partid;
 	struct mpam_config cfg;
 	struct mpam_props *cprops;
@@ -291,6 +396,9 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
 	dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
 	cprops = &res->class->props;
 
+	if (mpam_resctrl_hide_cdp(r->rid))
+		t = CDP_DATA;
+
 	partid = resctrl_get_config_index(closid, t);
 	if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r)) {
 		pr_debug("Not alloc capable or computed PARTID out of range\n");
@@ -313,6 +421,20 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
 		return -EINVAL;
 	}
 
+	/*
+	 * When CDP is enabled, but the resource doesn't support it, we need to
+	 * apply the same configuration to the other partid.
+	 */
+	if (mpam_resctrl_hide_cdp(r->rid)) {
+		partid = resctrl_get_config_index(closid, CDP_CODE);
+		err = mpam_apply_config(dom->ctrl_comp, partid, &cfg);
+		if (err)
+			return err;
+
+		partid = resctrl_get_config_index(closid, CDP_DATA);
+		return mpam_apply_config(dom->ctrl_comp, partid, &cfg);
+	}
+
 	return mpam_apply_config(dom->ctrl_comp, partid, &cfg);
 }
 
diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h
index 5a78299ec464..d329b1dc148b 100644
--- a/include/linux/arm_mpam.h
+++ b/include/linux/arm_mpam.h
@@ -56,6 +56,8 @@ void resctrl_arch_set_cpu_default_closid(int cpu, u32 closid);
 void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid);
 void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid);
 void resctrl_arch_sched_in(struct task_struct *tsk);
+bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid);
+bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid);
 
 /**
  * mpam_register_requestor() - Register a requestor with the MPAM driver
-- 
cgit v1.2.3


From 3e9b35823aabcb85cc039960256426e50f1fd601 Mon Sep 17 00:00:00 2001
From: Ben Horgan <ben.horgan@arm.com>
Date: Fri, 13 Mar 2026 14:46:00 +0000
Subject: arm_mpam: resctrl: Add rmid index helpers

Because MPAM's pmg aren't identical to RDT's rmid, resctrl handles some
data structures by index. This allows x86 to map indexes to RMID, and MPAM
to map them to partid-and-pmg.

Add the helpers to do this.

Tested-by: Gavin Shan <gshan@redhat.com>
Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Tested-by: Peter Newman <peternewman@google.com>
Tested-by: Zeng Heng <zengheng4@huawei.com>
Tested-by: Punit Agrawal <punit.agrawal@oss.qualcomm.com>
Tested-by: Jesse Chick <jessechick@os.amperecomputing.com>
Reviewed-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Suggested-by: James Morse <james.morse@arm.com>
Signed-off-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
 drivers/resctrl/mpam_resctrl.c | 16 ++++++++++++++++
 include/linux/arm_mpam.h       |  3 +++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c
index 240a06df2f07..370830ab1119 100644
--- a/drivers/resctrl/mpam_resctrl.c
+++ b/drivers/resctrl/mpam_resctrl.c
@@ -145,6 +145,22 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored)
 	return mpam_partid_max + 1;
 }
 
+u32 resctrl_arch_system_num_rmid_idx(void)
+{
+	return (mpam_pmg_max + 1) * (mpam_partid_max + 1);
+}
+
+u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
+{
+	return closid * (mpam_pmg_max + 1) + rmid;
+}
+
+void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
+{
+	*closid = idx / (mpam_pmg_max + 1);
+	*rmid = idx % (mpam_pmg_max + 1);
+}
+
 void resctrl_arch_sched_in(struct task_struct *tsk)
 {
 	lockdep_assert_preemption_disabled();
diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h
index d329b1dc148b..7d23c90f077d 100644
--- a/include/linux/arm_mpam.h
+++ b/include/linux/arm_mpam.h
@@ -58,6 +58,9 @@ void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid);
 void resctrl_arch_sched_in(struct task_struct *tsk);
 bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid);
 bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid);
+u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid);
+void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid);
+u32 resctrl_arch_system_num_rmid_idx(void);
 
 /**
  * mpam_register_requestor() - Register a requestor with the MPAM driver
-- 
cgit v1.2.3


From 2a3c79c61539779a09928893518c8286d7774b54 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 13 Mar 2026 14:46:06 +0000
Subject: arm_mpam: resctrl: Allow resctrl to allocate monitors

When resctrl wants to read a domain's 'QOS_L3_OCCUP', it needs to allocate
a monitor on the corresponding resource. Monitors are allocated by class
instead of component.

Add helpers to allocate a CSU monitor. These helper return an out of range
value for MBM counters.

Allocating a montitor context is expected to block until hardware resources
become available. This only makes sense for QOS_L3_OCCUP as unallocated MBM
counters are losing data.

Tested-by: Gavin Shan <gshan@redhat.com>
Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Tested-by: Peter Newman <peternewman@google.com>
Tested-by: Zeng Heng <zengheng4@huawei.com>
Tested-by: Punit Agrawal <punit.agrawal@oss.qualcomm.com>
Tested-by: Jesse Chick <jessechick@os.amperecomputing.com>
Reviewed-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Co-developed-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
 drivers/resctrl/mpam_internal.h | 14 ++++++++-
 drivers/resctrl/mpam_resctrl.c  | 67 +++++++++++++++++++++++++++++++++++++++++
 include/linux/arm_mpam.h        |  5 +++
 3 files changed, 85 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h
index 301cf5c151bd..85b2b9926360 100644
--- a/drivers/resctrl/mpam_internal.h
+++ b/drivers/resctrl/mpam_internal.h
@@ -29,6 +29,14 @@ struct platform_device;
 #define PACKED_FOR_KUNIT
 #endif
 
+/*
+ * This 'mon' values must not alias an actual monitor, so must be larger than
+ * U16_MAX, but not be confused with an errno value, so smaller than
+ * (u32)-SZ_4K.
+ * USE_PRE_ALLOCATED is used to avoid confusion with an actual monitor.
+ */
+#define USE_PRE_ALLOCATED	(U16_MAX + 1)
+
 static inline bool mpam_is_enabled(void)
 {
 	return static_branch_likely(&mpam_enabled);
@@ -216,7 +224,11 @@ enum mon_filter_options {
 };
 
 struct mon_cfg {
-	u16			mon;
+	/*
+	 * mon must be large enough to hold out of range values like
+	 * USE_PRE_ALLOCATED
+	 */
+	u32			mon;
 	u8			pmg;
 	bool			match_pmg;
 	bool			csu_exclude_clean;
diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c
index 07bb20a01b38..9682ffb15184 100644
--- a/drivers/resctrl/mpam_resctrl.c
+++ b/drivers/resctrl/mpam_resctrl.c
@@ -22,6 +22,8 @@
 
 #include "mpam_internal.h"
 
+DECLARE_WAIT_QUEUE_HEAD(resctrl_mon_ctx_waiters);
+
 /*
  * The classes we've picked to map to resctrl resources, wrapped
  * in with their resctrl structure.
@@ -289,6 +291,71 @@ struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
 	return &mpam_resctrl_controls[l].resctrl_res;
 }
 
+static int resctrl_arch_mon_ctx_alloc_no_wait(enum resctrl_event_id evtid)
+{
+	struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid];
+
+	if (!mon->class)
+		return -EINVAL;
+
+	switch (evtid) {
+	case QOS_L3_OCCUP_EVENT_ID:
+		/* With CDP, one monitor gets used for both code/data reads */
+		return mpam_alloc_csu_mon(mon->class);
+	case QOS_L3_MBM_LOCAL_EVENT_ID:
+	case QOS_L3_MBM_TOTAL_EVENT_ID:
+		return USE_PRE_ALLOCATED;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r,
+				 enum resctrl_event_id evtid)
+{
+	DEFINE_WAIT(wait);
+	int *ret;
+
+	ret = kmalloc_obj(*ret);
+	if (!ret)
+		return ERR_PTR(-ENOMEM);
+
+	do {
+		prepare_to_wait(&resctrl_mon_ctx_waiters, &wait,
+				TASK_INTERRUPTIBLE);
+		*ret = resctrl_arch_mon_ctx_alloc_no_wait(evtid);
+		if (*ret == -ENOSPC)
+			schedule();
+	} while (*ret == -ENOSPC && !signal_pending(current));
+	finish_wait(&resctrl_mon_ctx_waiters, &wait);
+
+	return ret;
+}
+
+static void resctrl_arch_mon_ctx_free_no_wait(enum resctrl_event_id evtid,
+					      u32 mon_idx)
+{
+	struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid];
+
+	if (!mon->class)
+		return;
+
+	if (evtid == QOS_L3_OCCUP_EVENT_ID)
+		mpam_free_csu_mon(mon->class, mon_idx);
+
+	wake_up(&resctrl_mon_ctx_waiters);
+}
+
+void resctrl_arch_mon_ctx_free(struct rdt_resource *r,
+			       enum resctrl_event_id evtid, void *arch_mon_ctx)
+{
+	u32 mon_idx = *(u32 *)arch_mon_ctx;
+
+	kfree(arch_mon_ctx);
+
+	resctrl_arch_mon_ctx_free_no_wait(evtid, mon_idx);
+}
+
 static bool cache_has_usable_cpor(struct mpam_class *class)
 {
 	struct mpam_props *cprops = &class->props;
diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h
index 7d23c90f077d..e1461e32af75 100644
--- a/include/linux/arm_mpam.h
+++ b/include/linux/arm_mpam.h
@@ -5,6 +5,7 @@
 #define __LINUX_ARM_MPAM_H
 
 #include <linux/acpi.h>
+#include <linux/resctrl_types.h>
 #include <linux/types.h>
 
 struct mpam_msc;
@@ -62,6 +63,10 @@ u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid);
 void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid);
 u32 resctrl_arch_system_num_rmid_idx(void);
 
+struct rdt_resource;
+void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, enum resctrl_event_id evtid);
+void resctrl_arch_mon_ctx_free(struct rdt_resource *r, enum resctrl_event_id evtid, void *ctx);
+
 /**
  * mpam_register_requestor() - Register a requestor with the MPAM driver
  * @partid_max:		The maximum PARTID value the requestor can generate.
-- 
cgit v1.2.3


From fb56b29932ca276df268806ad52ed80f40f99a6e Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 13 Mar 2026 14:46:07 +0000
Subject: arm_mpam: resctrl: Add resctrl_arch_rmid_read()

resctrl uses resctrl_arch_rmid_read() to read counters. CDP emulation means
the counter may need reading in three different ways.

The helpers behind the resctrl_arch_ functions will be re-used for the ABMC
equivalent functions.

Add the rounding helper for checking monitor values while we're here.

Tested-by: Gavin Shan <gshan@redhat.com>
Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Tested-by: Peter Newman <peternewman@google.com>
Tested-by: Zeng Heng <zengheng4@huawei.com>
Tested-by: Jesse Chick <jessechick@os.amperecomputing.com>
Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Co-developed-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
 drivers/resctrl/mpam_resctrl.c | 82 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/arm_mpam.h       |  5 +++
 2 files changed, 87 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c
index 9682ffb15184..9a15ddd340f7 100644
--- a/drivers/resctrl/mpam_resctrl.c
+++ b/drivers/resctrl/mpam_resctrl.c
@@ -356,6 +356,88 @@ void resctrl_arch_mon_ctx_free(struct rdt_resource *r,
 	resctrl_arch_mon_ctx_free_no_wait(evtid, mon_idx);
 }
 
+static int __read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
+		      enum mpam_device_features mon_type,
+		      int mon_idx,
+		      enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, u64 *val)
+{
+	struct mon_cfg cfg;
+
+	if (!mpam_is_enabled())
+		return -EINVAL;
+
+	/* Shift closid to account for CDP */
+	closid = resctrl_get_config_index(closid, cdp_type);
+
+	if (irqs_disabled()) {
+		/* Check if we can access this domain without an IPI */
+		return -EIO;
+	}
+
+	cfg = (struct mon_cfg) {
+		.mon = mon_idx,
+		.match_pmg = true,
+		.partid = closid,
+		.pmg = rmid,
+	};
+
+	return mpam_msmon_read(mon_comp, &cfg, mon_type, val);
+}
+
+static int read_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
+			     enum mpam_device_features mon_type,
+			     int mon_idx, u32 closid, u32 rmid, u64 *val)
+{
+	if (cdp_enabled) {
+		u64 code_val = 0, data_val = 0;
+		int err;
+
+		err = __read_mon(mon, mon_comp, mon_type, mon_idx,
+				 CDP_CODE, closid, rmid, &code_val);
+		if (err)
+			return err;
+
+		err = __read_mon(mon, mon_comp, mon_type, mon_idx,
+				 CDP_DATA, closid, rmid, &data_val);
+		if (err)
+			return err;
+
+		*val += code_val + data_val;
+		return 0;
+	}
+
+	return __read_mon(mon, mon_comp, mon_type, mon_idx,
+			  CDP_NONE, closid, rmid, val);
+}
+
+/* MBWU when not in ABMC mode (not supported), and CSU counters. */
+int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr,
+			   u32 closid, u32 rmid, enum resctrl_event_id eventid,
+			   void *arch_priv, u64 *val, void *arch_mon_ctx)
+{
+	struct mpam_resctrl_dom *l3_dom;
+	struct mpam_component *mon_comp;
+	u32 mon_idx = *(u32 *)arch_mon_ctx;
+	enum mpam_device_features mon_type;
+	struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid];
+
+	resctrl_arch_rmid_read_context_check();
+
+	if (eventid >= QOS_NUM_EVENTS || !mon->class)
+		return -EINVAL;
+
+	l3_dom = container_of(hdr, struct mpam_resctrl_dom, resctrl_mon_dom.hdr);
+	mon_comp = l3_dom->mon_comp[eventid];
+
+	if (eventid != QOS_L3_OCCUP_EVENT_ID)
+		return -EINVAL;
+
+	mon_type = mpam_feat_msmon_csu;
+
+	return read_mon_cdp_safe(mon, mon_comp, mon_type, mon_idx,
+				 closid, rmid, val);
+}
+
 static bool cache_has_usable_cpor(struct mpam_class *class)
 {
 	struct mpam_props *cprops = &class->props;
diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h
index e1461e32af75..86d5e326d2bd 100644
--- a/include/linux/arm_mpam.h
+++ b/include/linux/arm_mpam.h
@@ -67,6 +67,11 @@ struct rdt_resource;
 void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, enum resctrl_event_id evtid);
 void resctrl_arch_mon_ctx_free(struct rdt_resource *r, enum resctrl_event_id evtid, void *ctx);
 
+static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
+{
+	return val;
+}
+
 /**
  * mpam_register_requestor() - Register a requestor with the MPAM driver
  * @partid_max:		The maximum PARTID value the requestor can generate.
-- 
cgit v1.2.3


From efc775eadce2c6e0921c21d9c29a7b6686022281 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 13 Mar 2026 14:46:09 +0000
Subject: arm_mpam: resctrl: Add empty definitions for assorted resctrl
 functions

A few resctrl features and hooks need to be provided, but aren't needed or
supported on MPAM platforms.

resctrl has individual hooks to separately enable and disable the
closid/partid and rmid/pmg context switching code. For MPAM this is all the
same thing, as the value in struct task_struct is used to cache the value
that should be written to hardware. arm64's context switching code is
enabled once MPAM is usable, but doesn't touch the hardware unless the
value has changed.

For now event configuration is not supported, and can be turned off by
returning 'false' from resctrl_arch_is_evt_configurable().

The new io_alloc feature is not supported either, always return false from
the enable helper to indicate and fail the enable.

Add this, and empty definitions for the other hooks.

Tested-by: Gavin Shan <gshan@redhat.com>
Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Tested-by: Peter Newman <peternewman@google.com>
Tested-by: Zeng Heng <zengheng4@huawei.com>
Tested-by: Punit Agrawal <punit.agrawal@oss.qualcomm.com>
Tested-by: Jesse Chick <jessechick@os.amperecomputing.com>
Reviewed-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Co-developed-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: Ben Horgan <ben.horgan@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
 drivers/resctrl/mpam_resctrl.c | 65 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/arm_mpam.h       |  9 ++++++
 2 files changed, 74 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c
index f82fff3519df..777ecdc2d0f8 100644
--- a/drivers/resctrl/mpam_resctrl.c
+++ b/drivers/resctrl/mpam_resctrl.c
@@ -91,6 +91,71 @@ bool resctrl_arch_mon_capable(void)
 	return l3->mon_capable;
 }
 
+bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
+{
+	return false;
+}
+
+void resctrl_arch_mon_event_config_read(void *info)
+{
+}
+
+void resctrl_arch_mon_event_config_write(void *info)
+{
+}
+
+void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
+{
+}
+
+void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
+			     u32 closid, u32 rmid, enum resctrl_event_id eventid)
+{
+}
+
+void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
+			     u32 closid, u32 rmid, int cntr_id,
+			     enum resctrl_event_id eventid)
+{
+}
+
+void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
+			      enum resctrl_event_id evtid, u32 rmid, u32 closid,
+			      u32 cntr_id, bool assign)
+{
+}
+
+int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
+			   u32 unused, u32 rmid, int cntr_id,
+			   enum resctrl_event_id eventid, u64 *val)
+{
+	return -EOPNOTSUPP;
+}
+
+bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r)
+{
+	return false;
+}
+
+int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable)
+{
+	return -EINVAL;
+}
+
+int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable)
+{
+	return -EOPNOTSUPP;
+}
+
+bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r)
+{
+	return false;
+}
+
+void resctrl_arch_pre_mount(void)
+{
+}
+
 bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid)
 {
 	return mpam_resctrl_controls[rid].cdp_enabled;
diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h
index 86d5e326d2bd..f92a36187a52 100644
--- a/include/linux/arm_mpam.h
+++ b/include/linux/arm_mpam.h
@@ -67,6 +67,15 @@ struct rdt_resource;
 void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, enum resctrl_event_id evtid);
 void resctrl_arch_mon_ctx_free(struct rdt_resource *r, enum resctrl_event_id evtid, void *ctx);
 
+/*
+ * The CPU configuration for MPAM is cheap to write, and is only written if it
+ * has changed. No need for fine grained enables.
+ */
+static inline void resctrl_arch_enable_mon(void) { }
+static inline void resctrl_arch_disable_mon(void) { }
+static inline void resctrl_arch_enable_alloc(void) { }
+static inline void resctrl_arch_disable_alloc(void) { }
+
 static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
 {
 	return val;
-- 
cgit v1.2.3


From 362a4549f2acfb03390ddfcd91041e65a11a739f Mon Sep 17 00:00:00 2001
From: Koichiro Den <den@valinux.co.jp>
Date: Fri, 6 Mar 2026 12:14:41 +0900
Subject: NTB: core: Add .get_dma_dev() callback to ntb_dev_ops

Some NTB implementations are backed by a PCI function that is not the right
struct device to use with DMA API helpers (e.g. due to IOMMU topology, or
because the NTB device is virtual).

Add an optional .get_dma_dev() callback to struct ntb_dev_ops and provide a
helper, ntb_get_dma_dev(), so NTB clients can use the appropriate struct
device for DMA allocations and mappings.

If the callback is not implemented, ntb_get_dma_dev() returns the current
default (ntb->dev.parent). Drivers that implement .get_dma_dev() must
return a non-NULL device.

Suggested-by: Frank Li <Frank.Li@nxp.com>
Signed-off-by: Koichiro Den <den@valinux.co.jp>
Signed-off-by: Manivannan Sadhasivam <mani@kernel.org>
[bhelgaas: format doc]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://patch.msgid.link/20260306031443.1911860-2-den@valinux.co.jp
---
 include/linux/ntb.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index 8ff9d663096b..879c3e89e026 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -256,6 +256,7 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
  * @msg_clear_mask:	See ntb_msg_clear_mask().
  * @msg_read:		See ntb_msg_read().
  * @peer_msg_write:	See ntb_peer_msg_write().
+ * @get_dma_dev:	See ntb_get_dma_dev().
  */
 struct ntb_dev_ops {
 	int (*port_number)(struct ntb_dev *ntb);
@@ -329,6 +330,7 @@ struct ntb_dev_ops {
 	int (*msg_clear_mask)(struct ntb_dev *ntb, u64 mask_bits);
 	u32 (*msg_read)(struct ntb_dev *ntb, int *pidx, int midx);
 	int (*peer_msg_write)(struct ntb_dev *ntb, int pidx, int midx, u32 msg);
+	struct device *(*get_dma_dev)(struct ntb_dev *ntb);
 };
 
 static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
@@ -391,6 +393,8 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		/* !ops->msg_clear_mask == !ops->msg_count	&& */
 		!ops->msg_read == !ops->msg_count		&&
 		!ops->peer_msg_write == !ops->msg_count		&&
+
+		/* ops->get_dma_dev is optional */
 		1;
 }
 
@@ -1563,6 +1567,26 @@ static inline int ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx,
 	return ntb->ops->peer_msg_write(ntb, pidx, midx, msg);
 }
 
+/**
+ * ntb_get_dma_dev() - get the device to use for DMA allocations/mappings
+ * @ntb:	NTB device context.
+ *
+ * Return a struct device suitable for DMA API allocations and mappings.
+ * This is typically the parent of the NTB device, but may be overridden by a
+ * driver by implementing .get_dma_dev().
+ *
+ * Drivers that implement .get_dma_dev() must return a non-NULL pointer.
+ *
+ * Return: device pointer to use for DMA operations.
+ */
+static inline struct device *ntb_get_dma_dev(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->get_dma_dev)
+		return ntb->dev.parent;
+
+	return ntb->ops->get_dma_dev(ntb);
+}
+
 /**
  * ntb_peer_resource_idx() - get a resource index for a given peer idx
  * @ntb:	NTB device context.
-- 
cgit v1.2.3


From 33eded29319d41fcba5d0257b126a48b449aad47 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 25 Mar 2026 12:36:08 -0700
Subject: dm: provide helper to set stacked limits

There are multiple device mappers that set up their stacking limits
exactly the same for the logical, physical and minimum IO queue limits.
Provide a helper for it.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
 drivers/md/dm-crypt.c         |  6 +-----
 drivers/md/dm-integrity.c     |  7 +------
 drivers/md/dm-verity-target.c |  8 +-------
 drivers/md/dm-writecache.c    | 10 +---------
 include/linux/device-mapper.h |  7 +++++++
 5 files changed, 11 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 885208a82c55..60642cee8609 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -3684,11 +3684,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
 	struct crypt_config *cc = ti->private;
 
-	limits->logical_block_size =
-		max_t(unsigned int, limits->logical_block_size, cc->sector_size);
-	limits->physical_block_size =
-		max_t(unsigned int, limits->physical_block_size, cc->sector_size);
-	limits->io_min = max_t(unsigned int, limits->io_min, cc->sector_size);
+	dm_stack_bs_limits(limits, cc->sector_size);
 	limits->dma_alignment = limits->logical_block_size - 1;
 
 	/*
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index d64c15c761d0..65c30dec8222 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -4046,12 +4046,7 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
 {
 	struct dm_integrity_c *ic = ti->private;
 
-	limits->logical_block_size = max(limits->logical_block_size,
-			    ic->sectors_per_block << SECTOR_SHIFT);
-	limits->physical_block_size = max(limits->physical_block_size,
-			    ic->sectors_per_block << SECTOR_SHIFT);
-	limits->io_min = max(limits->io_min,
-			    ic->sectors_per_block << SECTOR_SHIFT);
+	dm_stack_bs_limits(limits, ic->sectors_per_block << SECTOR_SHIFT);
 	limits->dma_alignment = limits->logical_block_size - 1;
 	limits->discard_granularity = ic->sectors_per_block << SECTOR_SHIFT;
 
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index e1d435c79e96..9a9847f94c46 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -1011,13 +1011,7 @@ static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
 	struct dm_verity *v = ti->private;
 
-	if (limits->logical_block_size < 1 << v->data_dev_block_bits)
-		limits->logical_block_size = 1 << v->data_dev_block_bits;
-
-	if (limits->physical_block_size < 1 << v->data_dev_block_bits)
-		limits->physical_block_size = 1 << v->data_dev_block_bits;
-
-	limits->io_min = limits->logical_block_size;
+	dm_stack_bs_limits(limits, 1 << v->data_dev_block_bits);
 
 	/*
 	 * Similar to what dm-crypt does, opt dm-verity out of support for
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 98bd945f6da7..493f5202ad04 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1640,17 +1640,9 @@ static void writecache_io_hints(struct dm_target *ti, struct queue_limits *limit
 {
 	struct dm_writecache *wc = ti->private;
 
-	if (limits->logical_block_size < wc->block_size)
-		limits->logical_block_size = wc->block_size;
-
-	if (limits->physical_block_size < wc->block_size)
-		limits->physical_block_size = wc->block_size;
-
-	if (limits->io_min < wc->block_size)
-		limits->io_min = wc->block_size;
+	dm_stack_bs_limits(limits, wc->block_size);
 }
 
-
 static void writecache_writeback_endio(struct bio *bio)
 {
 	struct writeback_struct *wb = container_of(bio, struct writeback_struct, bio);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 38f625af6ab4..cd4faaf5d427 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -755,4 +755,11 @@ static inline unsigned long to_bytes(sector_t n)
 	return (n << SECTOR_SHIFT);
 }
 
+static inline void dm_stack_bs_limits(struct queue_limits *limits, unsigned int bs)
+{
+	limits->logical_block_size = max(limits->logical_block_size, bs);
+	limits->physical_block_size = max(limits->physical_block_size, bs);
+	limits->io_min = max(limits->io_min, bs);
+}
+
 #endif	/* _LINUX_DEVICE_MAPPER_H */
-- 
cgit v1.2.3


From d748047af1355df044faf8df0eedf0ef75f87de8 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 25 Mar 2026 13:36:00 -0400
Subject: ptr_ring: disable KCSAN warnings

Eric Dumazet reported KCSAN warnings:

BUG: KCSAN: data-race in pfifo_fast_dequeue / pfifo_fast_enqueue

write to 0xffff88811d5ccc00 of 8 bytes by interrupt on cpu 0:
__ptr_ring_zero_tail include/linux/ptr_ring.h:259 [inline]
__ptr_ring_discard_one include/linux/ptr_ring.h:291 [inline]
__ptr_ring_consume include/linux/ptr_ring.h:311 [inline]
__skb_array_consume include/linux/skb_array.h:98 [inline]
pfifo_fast_dequeue+0x770/0x8f0 net/sched/sch_generic.c:770
dequeue_skb net/sched/sch_generic.c:297 [inline]
qdisc_restart net/sched/sch_generic.c:402 [inline]
__qdisc_run+0x189/0xc80 net/sched/sch_generic.c:420
qdisc_run include/net/pkt_sched.h:120 [inline]
net_tx_action+0x379/0x590 net/core/dev.c:5793
handle_softirqs+0xb9/0x280 kernel/softirq.c:622
do_softirq+0x45/0x60 kernel/softirq.c:523
__local_bh_enable_ip+0x70/0x80 kernel/softirq.c:450
local_bh_enable include/linux/bottom_half.h:33 [inline]
bpf_test_run+0x2db/0x620 net/bpf/test_run.c:426
bpf_prog_test_run_skb+0x9a4/0xef0 net/bpf/test_run.c:1159
bpf_prog_test_run+0x204/0x340 kernel/bpf/syscall.c:4721
__sys_bpf+0x52e/0x7e0 kernel/bpf/syscall.c:6246
__do_sys_bpf kernel/bpf/syscall.c:6341 [inline]
__se_sys_bpf kernel/bpf/syscall.c:6339 [inline]
__x64_sys_bpf+0x41/0x50 kernel/bpf/syscall.c:6339
x64_sys_call+0x10cb/0x3020 arch/x86/include/generated/asm/syscalls_64.h:322
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0x12c/0x370 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x77/0x7f

read to 0xffff88811d5ccc00 of 8 bytes by task 22632 on cpu 1:
__ptr_ring_produce include/linux/ptr_ring.h:106 [inline]
ptr_ring_produce include/linux/ptr_ring.h:129 [inline]
skb_array_produce include/linux/skb_array.h:44 [inline]
pfifo_fast_enqueue+0xd5/0x2c0 net/sched/sch_generic.c:741
dev_qdisc_enqueue net/core/dev.c:4144 [inline]
__dev_xmit_skb net/core/dev.c:4188 [inline]
__dev_queue_xmit+0x6a4/0x1f20 net/core/dev.c:4795
dev_queue_xmit include/linux/netdevice.h:3384 [inline]
__bpf_tx_skb net/core/filter.c:2153 [inline]
__bpf_redirect_common net/core/filter.c:2197 [inline]
__bpf_redirect+0x862/0x990 net/core/filter.c:2204
____bpf_clone_redirect net/core/filter.c:2487 [inline]
bpf_clone_redirect+0x20c/0x290 net/core/filter.c:2450
bpf_prog_53f18857bc887b09+0x22/0x2a
bpf_dispatcher_nop_func include/linux/bpf.h:1402 [inline]
__bpf_prog_run include/linux/filter.h:723 [inline]
bpf_prog_run include/linux/filter.h:730 [inline]
bpf_test_run+0x29d/0x620 net/bpf/test_run.c:423
bpf_prog_test_run_skb+0x9a4/0xef0 net/bpf/test_run.c:1159
bpf_prog_test_run+0x204/0x340 kernel/bpf/syscall.c:4721
__sys_bpf+0x52e/0x7e0 kernel/bpf/syscall.c:6246
__do_sys_bpf kernel/bpf/syscall.c:6341 [inline]
__se_sys_bpf kernel/bpf/syscall.c:6339 [inline]
__x64_sys_bpf+0x41/0x50 kernel/bpf/syscall.c:6339
x64_sys_call+0x10cb/0x3020 arch/x86/include/generated/asm/syscalls_64.h:322
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0x12c/0x370 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x77/0x7f

value changed: 0xffff888104a93a00 -> 0x0000000000000000

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 UID: 0 PID: 22632 Comm: syz.0.4135 Tainted: G W syzkaller #0
PREEMPT(full)
Tainted: [W]=WARN
Hardware name: Google Google Compute Engine/Google Compute Engine,
BIOS Google 01/24/2026

There is no race on ring accesses: reading/writing a partial pointer
would be fine, because the reading is done by the producer
which merely cares about NULL/non NULL.
Document and disable the warnings using data_race().

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/dd3984b3bce9df3591927f927668cb31cc7ecf34.1774460059.git.mst@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ptr_ring.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 534531807d95..d2c3629bbe45 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -48,7 +48,7 @@ struct ptr_ring {
  */
 static inline bool __ptr_ring_full(struct ptr_ring *r)
 {
-	return r->queue[r->producer];
+	return data_race(r->queue[r->producer]);
 }
 
 static inline bool ptr_ring_full(struct ptr_ring *r)
@@ -103,7 +103,7 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r)
  */
 static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
 {
-	if (unlikely(!r->size) || r->queue[r->producer])
+	if (unlikely(!r->size) || data_race(r->queue[r->producer]))
 		return -ENOSPC;
 
 	/* Make sure the pointer we are storing points to a valid data. */
@@ -194,7 +194,7 @@ static inline void *__ptr_ring_peek(struct ptr_ring *r)
 static inline bool __ptr_ring_empty(struct ptr_ring *r)
 {
 	if (likely(r->size))
-		return !r->queue[READ_ONCE(r->consumer_head)];
+		return !data_race(r->queue[READ_ONCE(r->consumer_head)]);
 	return true;
 }
 
@@ -256,7 +256,7 @@ static inline void __ptr_ring_zero_tail(struct ptr_ring *r, int consumer_head)
 	 * besides the first one until we write out all entries.
 	 */
 	while (likely(head > r->consumer_tail))
-		r->queue[--head] = NULL;
+		data_race(r->queue[--head] = NULL);
 
 	r->consumer_tail = consumer_head;
 }
-- 
cgit v1.2.3


From 187b00a26679ae58a79f56c0024df1e3dbd7dff0 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 25 Mar 2026 23:00:02 +0200
Subject: net: stmmac: provide flag to disable EEE

Some platforms have problems when EEE is enabled, and thus need a way
to disable stmmac EEE support. Add a flag before the other LPI related
flags which tells stmmac to avoid populating the phylink LPI
capabilities, which causes phylink to call phy_disable_eee() for any
PHY that is attached to the affected phylink instance.

iMX8MP is an example - the lpi_intr_o signal is wired to an OR gate
along with the main dwmac interrupts. Since lpi_intr_o is synchronous
to the receive clock domain, and takes four clock cycles to clear, this
leads to interrupt storms as the interrupt remains asserted for some
time after the LPI control and status register is read.

This problem becomes worse when the receive clock from the PHY stops
when the receive path enters LPI state - which means that lpi_intr_o
can not deassert until the clock restarts. Since the LPI state of the
receive path depends on the link partner, this is out of our control.
We could disable RX clock stop at the PHY, but that doesn't get around
the slow-to-deassert lpi_intr_o mentioned in the above paragraph.

Previously, iMX8MP worked around this by disabling gigabit EEE, but
this is insufficient - the problem is also visible at 100M speeds,
where the receive clock is slower.

There is extensive discussion and investigation in the thread linked
below, the result of which is summarised in this commit message.

Reported-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Closes: https://lore.kernel.org/r/20251026122905.29028-1-laurent.pinchart@ideasonboard.com
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Tested-by: Ovidiu Panait <ovidiu.panait.rb@renesas.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
Link: https://patch.msgid.link/20260325210003.2752013-2-laurent.pinchart@ideasonboard.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |  7 ++++++-
 include/linux/stmmac.h                            | 13 +++++++------
 2 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 9b6b49331639..ce51b9c22129 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1438,7 +1438,12 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv)
 				 config->supported_interfaces,
 				 pcs->supported_interfaces);
 
-	if (priv->dma_cap.eee) {
+	/* Some platforms, e.g. iMX8MP, wire lpi_intr_o to the same interrupt
+	 * used for stmmac's main interrupts, which leads to interrupt storms.
+	 * STMMAC_FLAG_EEE_DISABLE allows EEE to be disabled on such platforms.
+	 */
+	if (priv->dma_cap.eee &&
+	    !(priv->plat->flags & STMMAC_FLAG_EEE_DISABLE)) {
 		/* The GMAC 3.74a databook states that EEE is only supported
 		 * in MII, GMII, and RGMII interfaces.
 		 */
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index eaaee329ef9d..4430b967abde 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -204,12 +204,13 @@ enum dwmac_core_type {
 #define STMMAC_FLAG_MULTI_MSI_EN		BIT(7)
 #define STMMAC_FLAG_EXT_SNAPSHOT_EN		BIT(8)
 #define STMMAC_FLAG_INT_SNAPSHOT_EN		BIT(9)
-#define STMMAC_FLAG_RX_CLK_RUNS_IN_LPI		BIT(10)
-#define STMMAC_FLAG_EN_TX_LPI_CLOCKGATING	BIT(11)
-#define STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP	BIT(12)
-#define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY	BIT(13)
-#define STMMAC_FLAG_KEEP_PREAMBLE_BEFORE_SFD	BIT(14)
-#define STMMAC_FLAG_SERDES_SUPPORTS_2500M	BIT(15)
+#define STMMAC_FLAG_EEE_DISABLE			BIT(10)
+#define STMMAC_FLAG_RX_CLK_RUNS_IN_LPI		BIT(11)
+#define STMMAC_FLAG_EN_TX_LPI_CLOCKGATING	BIT(12)
+#define STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP	BIT(13)
+#define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY	BIT(14)
+#define STMMAC_FLAG_KEEP_PREAMBLE_BEFORE_SFD	BIT(15)
+#define STMMAC_FLAG_SERDES_SUPPORTS_2500M	BIT(16)
 
 struct mac_device_info;
 
-- 
cgit v1.2.3


From 077ba03600faea5f2aa15afbb83580878cc8b500 Mon Sep 17 00:00:00 2001
From: Mayank Rungta <mrungta@google.com>
Date: Thu, 12 Mar 2026 16:22:05 -0700
Subject: watchdog/hardlockup: improve buddy system detection timeliness

Currently, the buddy system only performs checks every 3rd sample.  With a
4-second interval.  If a check window is missed, the next check occurs 12
seconds later, potentially delaying hard lockup detection for up to 24
seconds.

Modify the buddy system to perform checks at every interval (4s).
Introduce a missed-interrupt threshold to maintain the existing grace
period while reducing the detection window to 8-12 seconds.

Best and worst case detection scenarios:

Before (12s check window):
- Best case: Lockup occurs after first check but just before heartbeat
  interval. Detected in ~8s (8s till next check).
- Worst case: Lockup occurs just after a check.
  Detected in ~24s (missed check + 12s till next check + 12s logic).

After (4s check window with threshold of 3):
- Best case: Lockup occurs just before a check.
  Detected in ~8s (0s till 1st check + 4s till 2nd + 4s till 3rd).
- Worst case: Lockup occurs just after a check.
  Detected in ~12s (4s till 1st check + 4s till 2nd + 4s till 3rd).

Link: https://lkml.kernel.org/r/20260312-hardlockup-watchdog-fixes-v2-4-45bd8a0cc7ed@google.com
Signed-off-by: Mayank Rungta <mrungta@google.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: Max Kellermann <max.kellermann@ionos.com>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Stephane Erainan <eranian@google.com>
Cc: Wang Jinchao <wangjinchao600@gmail.com>
Cc: Yunhui Cui <cuiyunhui@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nmi.h     |  1 +
 kernel/watchdog.c       | 19 ++++++++++++++++---
 kernel/watchdog_buddy.c |  9 +--------
 3 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 207156f2143c..bc1162895f35 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -21,6 +21,7 @@ void lockup_detector_soft_poweroff(void);
 extern int watchdog_user_enabled;
 extern int watchdog_thresh;
 extern unsigned long watchdog_enabled;
+extern int watchdog_hardlockup_miss_thresh;
 
 extern struct cpumask watchdog_cpumask;
 extern unsigned long *watchdog_cpumask_bits;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 431c540bd035..87dd5e0f6968 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -60,6 +60,13 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
 int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
 # endif /* CONFIG_SMP */
 
+/*
+ * Number of consecutive missed interrupts before declaring a lockup.
+ * Default to 1 (immediate) for NMI/Perf. Buddy will overwrite this to 3.
+ */
+int __read_mostly watchdog_hardlockup_miss_thresh = 1;
+EXPORT_SYMBOL_GPL(watchdog_hardlockup_miss_thresh);
+
 /*
  * Should we panic when a soft-lockup or hard-lockup occurs:
  */
@@ -137,6 +144,7 @@ __setup("nmi_watchdog=", hardlockup_panic_setup);
 
 static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts);
 static DEFINE_PER_CPU(int, hrtimer_interrupts_saved);
+static DEFINE_PER_CPU(int, hrtimer_interrupts_missed);
 static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned);
 static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched);
 static unsigned long hard_lockup_nmi_warn;
@@ -159,7 +167,7 @@ void watchdog_hardlockup_touch_cpu(unsigned int cpu)
 	per_cpu(watchdog_hardlockup_touched, cpu) = true;
 }
 
-static void watchdog_hardlockup_update(unsigned int cpu)
+static void watchdog_hardlockup_update_reset(unsigned int cpu)
 {
 	int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu));
 
@@ -169,6 +177,7 @@ static void watchdog_hardlockup_update(unsigned int cpu)
 	 * written/read by a single CPU.
 	 */
 	per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
+	per_cpu(hrtimer_interrupts_missed, cpu) = 0;
 }
 
 static bool is_hardlockup(unsigned int cpu)
@@ -176,10 +185,14 @@ static bool is_hardlockup(unsigned int cpu)
 	int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu));
 
 	if (per_cpu(hrtimer_interrupts_saved, cpu) != hrint) {
-		watchdog_hardlockup_update(cpu);
+		watchdog_hardlockup_update_reset(cpu);
 		return false;
 	}
 
+	per_cpu(hrtimer_interrupts_missed, cpu)++;
+	if (per_cpu(hrtimer_interrupts_missed, cpu) % watchdog_hardlockup_miss_thresh)
+		return false;
+
 	return true;
 }
 
@@ -198,7 +211,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
 	unsigned long flags;
 
 	if (per_cpu(watchdog_hardlockup_touched, cpu)) {
-		watchdog_hardlockup_update(cpu);
+		watchdog_hardlockup_update_reset(cpu);
 		per_cpu(watchdog_hardlockup_touched, cpu) = false;
 		return;
 	}
diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c
index ee754d767c21..3a1e57080c1c 100644
--- a/kernel/watchdog_buddy.c
+++ b/kernel/watchdog_buddy.c
@@ -21,6 +21,7 @@ static unsigned int watchdog_next_cpu(unsigned int cpu)
 
 int __init watchdog_hardlockup_probe(void)
 {
+	watchdog_hardlockup_miss_thresh = 3;
 	return 0;
 }
 
@@ -86,14 +87,6 @@ void watchdog_buddy_check_hardlockup(int hrtimer_interrupts)
 {
 	unsigned int next_cpu;
 
-	/*
-	 * Test for hardlockups every 3 samples. The sample period is
-	 *  watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over
-	 *  watchdog_thresh (over by 20%).
-	 */
-	if (hrtimer_interrupts % 3 != 0)
-		return;
-
 	/* check for a hardlockup on the next CPU */
 	next_cpu = watchdog_next_cpu(smp_processor_id());
 	if (next_cpu >= nr_cpu_ids)
-- 
cgit v1.2.3


From d457072576a6a60ba853b1d815f123da57b48021 Mon Sep 17 00:00:00 2001
From: Ihor Solodrai <ihor.solodrai@linux.dev>
Date: Fri, 27 Mar 2026 13:32:40 -0700
Subject: bpf: Support struct btf_struct_meta via KF_IMPLICIT_ARGS

The following kfuncs currently accept void *meta__ign argument:
  * bpf_obj_new_impl
  * bpf_obj_drop_impl
  * bpf_percpu_obj_new_impl
  * bpf_percpu_obj_drop_impl
  * bpf_refcount_acquire_impl
  * bpf_list_push_back_impl
  * bpf_list_push_front_impl
  * bpf_rbtree_add_impl

The __ign suffix is an indicator for the verifier to skip the argument
in check_kfunc_args(). Then, in fixup_kfunc_call() the verifier may
set the value of this argument to struct btf_struct_meta *
kptr_struct_meta from insn_aux_data.

BPF programs must pass a dummy NULL value when calling these kfuncs.

Additionally, the list and rbtree _impl kfuncs also accept an implicit
u64 argument, which doesn't require __ign suffix because it's a
scalar, and BPF programs explicitly pass 0.

Add new kfuncs with KF_IMPLICIT_ARGS [1], that correspond to each
_impl kfunc accepting meta__ign. The existing _impl kfuncs remain
unchanged for backwards compatibility.

To support this, add "btf_struct_meta" to the list of recognized
implicit argument types in resolve_btfids.

Implement is_kfunc_arg_implicit() in the verifier, that determines
implicit args by inspecting both a non-_impl BTF prototype of the
kfunc.

Update the special_kfunc_list in the verifier and relevant checks to
support both the old _impl and the new KF_IMPLICIT_ARGS variants of
btf_struct_meta users.

[1] https://lore.kernel.org/bpf/20260120222638.3976562-1-ihor.solodrai@linux.dev/

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20260327203241.3365046-1-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/btf_ids.h                            |   2 +-
 kernel/bpf/helpers.c                               | 178 ++++++++++++++++++---
 kernel/bpf/verifier.c                              | 178 +++++++++++++++------
 tools/bpf/resolve_btfids/main.c                    |   1 +
 .../selftests/bpf/progs/percpu_alloc_fail.c        |   4 +-
 5 files changed, 287 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 139bdececdcf..af011db39ab3 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -217,7 +217,7 @@ BTF_SET8_END(name)
 
 #else
 
-#define BTF_ID_LIST(name) static u32 __maybe_unused name[64];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[128];
 #define BTF_ID(prefix, name)
 #define BTF_ID_FLAGS(prefix, name, ...)
 #define BTF_ID_UNUSED
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index cb6d242bd093..2d8538bf4cfa 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2302,9 +2302,20 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
 
 __bpf_kfunc_start_defs();
 
-__bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+/**
+ * bpf_obj_new() - allocate an object described by program BTF
+ * @local_type_id__k: type ID in program BTF
+ * @meta: verifier-supplied struct metadata
+ *
+ * Allocate an object of the type identified by @local_type_id__k and
+ * initialize its special fields. BPF programs can use
+ * bpf_core_type_id_local() to provide @local_type_id__k. The verifier
+ * rewrites @meta; BPF programs do not set it.
+ *
+ * Return: Pointer to the allocated object, or %NULL on failure.
+ */
+__bpf_kfunc void *bpf_obj_new(u64 local_type_id__k, struct btf_struct_meta *meta)
 {
-	struct btf_struct_meta *meta = meta__ign;
 	u64 size = local_type_id__k;
 	void *p;
 
@@ -2313,17 +2324,39 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
 		return NULL;
 	if (meta)
 		bpf_obj_init(meta->record, p);
+
 	return p;
 }
 
-__bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+__bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+{
+	return bpf_obj_new(local_type_id__k, meta__ign);
+}
+
+/**
+ * bpf_percpu_obj_new() - allocate a percpu object described by program BTF
+ * @local_type_id__k: type ID in program BTF
+ * @meta: verifier-supplied struct metadata
+ *
+ * Allocate a percpu object of the type identified by @local_type_id__k. BPF
+ * programs can use bpf_core_type_id_local() to provide @local_type_id__k.
+ * The verifier rewrites @meta; BPF programs do not set it.
+ *
+ * Return: Pointer to the allocated percpu object, or %NULL on failure.
+ */
+__bpf_kfunc void *bpf_percpu_obj_new(u64 local_type_id__k, struct btf_struct_meta *meta)
 {
 	u64 size = local_type_id__k;
 
-	/* The verifier has ensured that meta__ign must be NULL */
+	/* The verifier has ensured that meta must be NULL */
 	return bpf_mem_alloc(&bpf_global_percpu_ma, size);
 }
 
+__bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+{
+	return bpf_percpu_obj_new(local_type_id__k, meta__ign);
+}
+
 /* Must be called under migrate_disable(), as required by bpf_mem_free */
 void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu)
 {
@@ -2347,23 +2380,56 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu)
 	bpf_mem_free_rcu(ma, p);
 }
 
-__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
+/**
+ * bpf_obj_drop() - drop a previously allocated object
+ * @p__alloc: object to free
+ * @meta: verifier-supplied struct metadata
+ *
+ * Destroy special fields in @p__alloc as needed and free the object. The
+ * verifier rewrites @meta; BPF programs do not set it.
+ */
+__bpf_kfunc void bpf_obj_drop(void *p__alloc, struct btf_struct_meta *meta)
 {
-	struct btf_struct_meta *meta = meta__ign;
 	void *p = p__alloc;
 
 	__bpf_obj_drop_impl(p, meta ? meta->record : NULL, false);
 }
 
-__bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
+__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
+{
+	return bpf_obj_drop(p__alloc, meta__ign);
+}
+
+/**
+ * bpf_percpu_obj_drop() - drop a previously allocated percpu object
+ * @p__alloc: percpu object to free
+ * @meta: verifier-supplied struct metadata
+ *
+ * Free @p__alloc. The verifier rewrites @meta; BPF programs do not set it.
+ */
+__bpf_kfunc void bpf_percpu_obj_drop(void *p__alloc, struct btf_struct_meta *meta)
 {
-	/* The verifier has ensured that meta__ign must be NULL */
+	/* The verifier has ensured that meta must be NULL */
 	bpf_mem_free_rcu(&bpf_global_percpu_ma, p__alloc);
 }
 
-__bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
+__bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
+{
+	bpf_percpu_obj_drop(p__alloc, meta__ign);
+}
+
+/**
+ * bpf_refcount_acquire() - turn a local kptr into an owning reference
+ * @p__refcounted_kptr: non-owning local kptr
+ * @meta: verifier-supplied struct metadata
+ *
+ * Increment the refcount for @p__refcounted_kptr. The verifier rewrites
+ * @meta; BPF programs do not set it.
+ *
+ * Return: Owning reference to @p__refcounted_kptr, or %NULL on failure.
+ */
+__bpf_kfunc void *bpf_refcount_acquire(void *p__refcounted_kptr, struct btf_struct_meta *meta)
 {
-	struct btf_struct_meta *meta = meta__ign;
 	struct bpf_refcount *ref;
 
 	/* Could just cast directly to refcount_t *, but need some code using
@@ -2379,6 +2445,11 @@ __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta
 	return (void *)p__refcounted_kptr;
 }
 
+__bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
+{
+	return bpf_refcount_acquire(p__refcounted_kptr, meta__ign);
+}
+
 static int __bpf_list_add(struct bpf_list_node_kern *node,
 			  struct bpf_list_head *head,
 			  bool tail, struct btf_record *rec, u64 off)
@@ -2406,24 +2477,62 @@ static int __bpf_list_add(struct bpf_list_node_kern *node,
 	return 0;
 }
 
+/**
+ * bpf_list_push_front() - add a node to the front of a BPF linked list
+ * @head: list head
+ * @node: node to insert
+ * @meta: verifier-supplied struct metadata
+ * @off: verifier-supplied offset of @node within the containing object
+ *
+ * Insert @node at the front of @head. The verifier rewrites @meta and @off;
+ * BPF programs do not set them.
+ *
+ * Return: 0 on success, or %-EINVAL if @node is already linked.
+ */
+__bpf_kfunc int bpf_list_push_front(struct bpf_list_head *head,
+				    struct bpf_list_node *node,
+				    struct btf_struct_meta *meta,
+				    u64 off)
+{
+	struct bpf_list_node_kern *n = (void *)node;
+
+	return __bpf_list_add(n, head, false, meta ? meta->record : NULL, off);
+}
+
 __bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head,
 					 struct bpf_list_node *node,
 					 void *meta__ign, u64 off)
+{
+	return bpf_list_push_front(head, node, meta__ign, off);
+}
+
+/**
+ * bpf_list_push_back() - add a node to the back of a BPF linked list
+ * @head: list head
+ * @node: node to insert
+ * @meta: verifier-supplied struct metadata
+ * @off: verifier-supplied offset of @node within the containing object
+ *
+ * Insert @node at the back of @head. The verifier rewrites @meta and @off;
+ * BPF programs do not set them.
+ *
+ * Return: 0 on success, or %-EINVAL if @node is already linked.
+ */
+__bpf_kfunc int bpf_list_push_back(struct bpf_list_head *head,
+				   struct bpf_list_node *node,
+				   struct btf_struct_meta *meta,
+				   u64 off)
 {
 	struct bpf_list_node_kern *n = (void *)node;
-	struct btf_struct_meta *meta = meta__ign;
 
-	return __bpf_list_add(n, head, false, meta ? meta->record : NULL, off);
+	return __bpf_list_add(n, head, true, meta ? meta->record : NULL, off);
 }
 
 __bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head,
 					struct bpf_list_node *node,
 					void *meta__ign, u64 off)
 {
-	struct bpf_list_node_kern *n = (void *)node;
-	struct btf_struct_meta *meta = meta__ign;
-
-	return __bpf_list_add(n, head, true, meta ? meta->record : NULL, off);
+	return bpf_list_push_back(head, node, meta__ign, off);
 }
 
 static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail)
@@ -2535,16 +2644,37 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root,
 	return 0;
 }
 
-__bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
-				    bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
-				    void *meta__ign, u64 off)
+/**
+ * bpf_rbtree_add() - add a node to a BPF rbtree
+ * @root: tree root
+ * @node: node to insert
+ * @less: comparator used to order nodes
+ * @meta: verifier-supplied struct metadata
+ * @off: verifier-supplied offset of @node within the containing object
+ *
+ * Insert @node into @root using @less. The verifier rewrites @meta and @off;
+ * BPF programs do not set them.
+ *
+ * Return: 0 on success, or %-EINVAL if @node is already linked in a tree.
+ */
+__bpf_kfunc int bpf_rbtree_add(struct bpf_rb_root *root,
+			       struct bpf_rb_node *node,
+			       bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
+			       struct btf_struct_meta *meta,
+			       u64 off)
 {
-	struct btf_struct_meta *meta = meta__ign;
 	struct bpf_rb_node_kern *n = (void *)node;
 
 	return __bpf_rbtree_add(root, n, (void *)less, meta ? meta->record : NULL, off);
 }
 
+__bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
+				    bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
+				    void *meta__ign, u64 off)
+{
+	return bpf_rbtree_add(root, node, less, meta__ign, off);
+}
+
 __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
 {
 	struct rb_root_cached *r = (struct rb_root_cached *)root;
@@ -4536,12 +4666,19 @@ BTF_KFUNCS_START(generic_btf_ids)
 #ifdef CONFIG_CRASH_DUMP
 BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
 #endif
+BTF_ID_FLAGS(func, bpf_obj_new, KF_ACQUIRE | KF_RET_NULL | KF_IMPLICIT_ARGS)
 BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_percpu_obj_new, KF_ACQUIRE | KF_RET_NULL | KF_IMPLICIT_ARGS)
 BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_obj_drop, KF_RELEASE | KF_IMPLICIT_ARGS)
 BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_percpu_obj_drop, KF_RELEASE | KF_IMPLICIT_ARGS)
 BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_refcount_acquire, KF_ACQUIRE | KF_RET_NULL | KF_RCU | KF_IMPLICIT_ARGS)
 BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL | KF_RCU)
+BTF_ID_FLAGS(func, bpf_list_push_front, KF_IMPLICIT_ARGS)
 BTF_ID_FLAGS(func, bpf_list_push_front_impl)
+BTF_ID_FLAGS(func, bpf_list_push_back, KF_IMPLICIT_ARGS)
 BTF_ID_FLAGS(func, bpf_list_push_back_impl)
 BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
@@ -4550,6 +4687,7 @@ BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_add, KF_IMPLICIT_ARGS)
 BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
 BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_rbtree_root, KF_RET_NULL)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 11207e63c94e..8c1cf2eb6cbb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -12473,6 +12473,28 @@ static bool is_kfunc_arg_prog_aux(const struct btf *btf, const struct btf_param
 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_PROG_AUX_ID);
 }
 
+/*
+ * A kfunc with KF_IMPLICIT_ARGS has two prototypes in BTF:
+ *   - the _impl prototype with full arg list (meta->func_proto)
+ *   - the BPF API prototype w/o implicit args (func->type in BTF)
+ * To determine whether an argument is implicit, we compare its position
+ * against the number of arguments in the prototype w/o implicit args.
+ */
+static bool is_kfunc_arg_implicit(const struct bpf_kfunc_call_arg_meta *meta, u32 arg_idx)
+{
+	const struct btf_type *func, *func_proto;
+	u32 argn;
+
+	if (!(meta->kfunc_flags & KF_IMPLICIT_ARGS))
+		return false;
+
+	func = btf_type_by_id(meta->btf, meta->func_id);
+	func_proto = btf_type_by_id(meta->btf, func->type);
+	argn = btf_type_vlen(func_proto);
+
+	return argn <= arg_idx;
+}
+
 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
 					const struct btf *btf,
@@ -12539,10 +12561,15 @@ enum kfunc_ptr_arg_type {
 
 enum special_kfunc_type {
 	KF_bpf_obj_new_impl,
+	KF_bpf_obj_new,
 	KF_bpf_obj_drop_impl,
+	KF_bpf_obj_drop,
 	KF_bpf_refcount_acquire_impl,
+	KF_bpf_refcount_acquire,
 	KF_bpf_list_push_front_impl,
+	KF_bpf_list_push_front,
 	KF_bpf_list_push_back_impl,
+	KF_bpf_list_push_back,
 	KF_bpf_list_pop_front,
 	KF_bpf_list_pop_back,
 	KF_bpf_list_front,
@@ -12553,6 +12580,7 @@ enum special_kfunc_type {
 	KF_bpf_rcu_read_unlock,
 	KF_bpf_rbtree_remove,
 	KF_bpf_rbtree_add_impl,
+	KF_bpf_rbtree_add,
 	KF_bpf_rbtree_first,
 	KF_bpf_rbtree_root,
 	KF_bpf_rbtree_left,
@@ -12565,7 +12593,9 @@ enum special_kfunc_type {
 	KF_bpf_dynptr_slice_rdwr,
 	KF_bpf_dynptr_clone,
 	KF_bpf_percpu_obj_new_impl,
+	KF_bpf_percpu_obj_new,
 	KF_bpf_percpu_obj_drop_impl,
+	KF_bpf_percpu_obj_drop,
 	KF_bpf_throw,
 	KF_bpf_wq_set_callback,
 	KF_bpf_preempt_disable,
@@ -12599,10 +12629,15 @@ enum special_kfunc_type {
 
 BTF_ID_LIST(special_kfunc_list)
 BTF_ID(func, bpf_obj_new_impl)
+BTF_ID(func, bpf_obj_new)
 BTF_ID(func, bpf_obj_drop_impl)
+BTF_ID(func, bpf_obj_drop)
 BTF_ID(func, bpf_refcount_acquire_impl)
+BTF_ID(func, bpf_refcount_acquire)
 BTF_ID(func, bpf_list_push_front_impl)
+BTF_ID(func, bpf_list_push_front)
 BTF_ID(func, bpf_list_push_back_impl)
+BTF_ID(func, bpf_list_push_back)
 BTF_ID(func, bpf_list_pop_front)
 BTF_ID(func, bpf_list_pop_back)
 BTF_ID(func, bpf_list_front)
@@ -12613,6 +12648,7 @@ BTF_ID(func, bpf_rcu_read_lock)
 BTF_ID(func, bpf_rcu_read_unlock)
 BTF_ID(func, bpf_rbtree_remove)
 BTF_ID(func, bpf_rbtree_add_impl)
+BTF_ID(func, bpf_rbtree_add)
 BTF_ID(func, bpf_rbtree_first)
 BTF_ID(func, bpf_rbtree_root)
 BTF_ID(func, bpf_rbtree_left)
@@ -12632,7 +12668,9 @@ BTF_ID(func, bpf_dynptr_slice)
 BTF_ID(func, bpf_dynptr_slice_rdwr)
 BTF_ID(func, bpf_dynptr_clone)
 BTF_ID(func, bpf_percpu_obj_new_impl)
+BTF_ID(func, bpf_percpu_obj_new)
 BTF_ID(func, bpf_percpu_obj_drop_impl)
+BTF_ID(func, bpf_percpu_obj_drop)
 BTF_ID(func, bpf_throw)
 BTF_ID(func, bpf_wq_set_callback)
 BTF_ID(func, bpf_preempt_disable)
@@ -12676,6 +12714,50 @@ BTF_ID(func, bpf_session_is_return)
 BTF_ID(func, bpf_stream_vprintk)
 BTF_ID(func, bpf_stream_print_stack)
 
+static bool is_bpf_obj_new_kfunc(u32 func_id)
+{
+	return func_id == special_kfunc_list[KF_bpf_obj_new] ||
+	       func_id == special_kfunc_list[KF_bpf_obj_new_impl];
+}
+
+static bool is_bpf_percpu_obj_new_kfunc(u32 func_id)
+{
+	return func_id == special_kfunc_list[KF_bpf_percpu_obj_new] ||
+	       func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl];
+}
+
+static bool is_bpf_obj_drop_kfunc(u32 func_id)
+{
+	return func_id == special_kfunc_list[KF_bpf_obj_drop] ||
+	       func_id == special_kfunc_list[KF_bpf_obj_drop_impl];
+}
+
+static bool is_bpf_percpu_obj_drop_kfunc(u32 func_id)
+{
+	return func_id == special_kfunc_list[KF_bpf_percpu_obj_drop] ||
+	       func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl];
+}
+
+static bool is_bpf_refcount_acquire_kfunc(u32 func_id)
+{
+	return func_id == special_kfunc_list[KF_bpf_refcount_acquire] ||
+	       func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
+}
+
+static bool is_bpf_list_push_kfunc(u32 func_id)
+{
+	return func_id == special_kfunc_list[KF_bpf_list_push_front] ||
+	       func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
+	       func_id == special_kfunc_list[KF_bpf_list_push_back] ||
+	       func_id == special_kfunc_list[KF_bpf_list_push_back_impl];
+}
+
+static bool is_bpf_rbtree_add_kfunc(u32 func_id)
+{
+	return func_id == special_kfunc_list[KF_bpf_rbtree_add] ||
+	       func_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
+}
+
 static bool is_task_work_add_kfunc(u32 func_id)
 {
 	return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] ||
@@ -12684,10 +12766,8 @@ static bool is_task_work_add_kfunc(u32 func_id)
 
 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
 {
-	if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
-	    meta->arg_owning_ref) {
+	if (is_bpf_refcount_acquire_kfunc(meta->func_id) && meta->arg_owning_ref)
 		return false;
-	}
 
 	return meta->kfunc_flags & KF_RET_NULL;
 }
@@ -13075,8 +13155,7 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_
 
 static bool is_bpf_list_api_kfunc(u32 btf_id)
 {
-	return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
-	       btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
+	return is_bpf_list_push_kfunc(btf_id) ||
 	       btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
 	       btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
 	       btf_id == special_kfunc_list[KF_bpf_list_front] ||
@@ -13085,7 +13164,7 @@ static bool is_bpf_list_api_kfunc(u32 btf_id)
 
 static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
 {
-	return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
+	return is_bpf_rbtree_add_kfunc(btf_id) ||
 	       btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
 	       btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
 	       btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
@@ -13102,8 +13181,9 @@ static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
 
 static bool is_bpf_graph_api_kfunc(u32 btf_id)
 {
-	return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id) ||
-	       btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
+	return is_bpf_list_api_kfunc(btf_id) ||
+	       is_bpf_rbtree_api_kfunc(btf_id) ||
+	       is_bpf_refcount_acquire_kfunc(btf_id);
 }
 
 static bool is_bpf_res_spin_lock_kfunc(u32 btf_id)
@@ -13136,7 +13216,7 @@ static bool kfunc_spin_allowed(u32 btf_id)
 
 static bool is_sync_callback_calling_kfunc(u32 btf_id)
 {
-	return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
+	return is_bpf_rbtree_add_kfunc(btf_id);
 }
 
 static bool is_async_callback_calling_kfunc(u32 btf_id)
@@ -13200,12 +13280,11 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
 
 	switch (node_field_type) {
 	case BPF_LIST_NODE:
-		ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
-		       kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]);
+		ret = is_bpf_list_push_kfunc(kfunc_btf_id);
 		break;
 	case BPF_RB_NODE:
-		ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
-		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
+		ret = (is_bpf_rbtree_add_kfunc(kfunc_btf_id) ||
+		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
 		break;
@@ -13422,11 +13501,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 		bool is_ret_buf_sz = false;
 		int kf_arg_type;
 
-		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
-
-		if (is_kfunc_arg_ignore(btf, &args[i]))
-			continue;
-
 		if (is_kfunc_arg_prog_aux(btf, &args[i])) {
 			/* Reject repeated use bpf_prog_aux */
 			if (meta->arg_prog) {
@@ -13438,6 +13512,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			continue;
 		}
 
+		if (is_kfunc_arg_ignore(btf, &args[i]) || is_kfunc_arg_implicit(meta, i))
+			continue;
+
+		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
+
 		if (btf_type_is_scalar(t)) {
 			if (reg->type != SCALAR_VALUE) {
 				verbose(env, "R%d is not a scalar\n", regno);
@@ -13612,13 +13691,13 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			break;
 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
-				if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) {
-					verbose(env, "arg#%d expected for bpf_obj_drop_impl()\n", i);
+				if (!is_bpf_obj_drop_kfunc(meta->func_id)) {
+					verbose(env, "arg#%d expected for bpf_obj_drop()\n", i);
 					return -EINVAL;
 				}
 			} else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
-				if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
-					verbose(env, "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i);
+				if (!is_bpf_percpu_obj_drop_kfunc(meta->func_id)) {
+					verbose(env, "arg#%d expected for bpf_percpu_obj_drop()\n", i);
 					return -EINVAL;
 				}
 			} else {
@@ -13744,7 +13823,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				return ret;
 			break;
 		case KF_ARG_PTR_TO_RB_NODE:
-			if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+			if (is_bpf_rbtree_add_kfunc(meta->func_id)) {
 				if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
 					verbose(env, "arg#%d expected pointer to allocated object\n", i);
 					return -EINVAL;
@@ -13981,13 +14060,12 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
 	if (meta->btf != btf_vmlinux)
 		return 0;
 
-	if (meta->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
-	    meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+	if (is_bpf_obj_new_kfunc(meta->func_id) || is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
 		struct btf_struct_meta *struct_meta;
 		struct btf *ret_btf;
 		u32 ret_btf_id;
 
-		if (meta->func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
+		if (is_bpf_obj_new_kfunc(meta->func_id) && !bpf_global_ma_set)
 			return -ENOMEM;
 
 		if (((u64)(u32)meta->arg_constant.value) != meta->arg_constant.value) {
@@ -14010,7 +14088,7 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
 			return -EINVAL;
 		}
 
-		if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+		if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
 			if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
 				verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
 					ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
@@ -14040,7 +14118,7 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
 		}
 
 		struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
-		if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+		if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
 			if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
 				verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
 				return -EINVAL;
@@ -14056,12 +14134,12 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
 		regs[BPF_REG_0].btf = ret_btf;
 		regs[BPF_REG_0].btf_id = ret_btf_id;
-		if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
+		if (is_bpf_percpu_obj_new_kfunc(meta->func_id))
 			regs[BPF_REG_0].type |= MEM_PERCPU;
 
 		insn_aux->obj_new_size = ret_t->size;
 		insn_aux->kptr_struct_meta = struct_meta;
-	} else if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
+	} else if (is_bpf_refcount_acquire_kfunc(meta->func_id)) {
 		mark_reg_known_zero(env, regs, BPF_REG_0);
 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
 		regs[BPF_REG_0].btf = meta->arg_btf;
@@ -14227,7 +14305,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	if (err < 0)
 		return err;
 
-	if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+	if (is_bpf_rbtree_add_kfunc(meta.func_id)) {
 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
 					 set_rbtree_add_callback_state);
 		if (err) {
@@ -14331,9 +14409,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			return err;
 	}
 
-	if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
-	    meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
-	    meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+	if (is_bpf_list_push_kfunc(meta.func_id) || is_bpf_rbtree_add_kfunc(meta.func_id)) {
 		release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
 		insn_aux->insert_off = regs[BPF_REG_2].var_off.value;
 		insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
@@ -14381,11 +14457,10 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
 
 	if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
-		/* Only exception is bpf_obj_new_impl */
 		if (meta.btf != btf_vmlinux ||
-		    (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
-		     meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
-		     meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
+		    (!is_bpf_obj_new_kfunc(meta.func_id) &&
+		     !is_bpf_percpu_obj_new_kfunc(meta.func_id) &&
+		     !is_bpf_refcount_acquire_kfunc(meta.func_id))) {
 			verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
 			return -EINVAL;
 		}
@@ -14496,8 +14571,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			regs[BPF_REG_0].id = ++env->id_gen;
 	} else if (btf_type_is_void(t)) {
 		if (meta.btf == btf_vmlinux) {
-			if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
-			    meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
+			if (is_bpf_obj_drop_kfunc(meta.func_id) ||
+			    is_bpf_percpu_obj_drop_kfunc(meta.func_id)) {
 				insn_aux->kptr_struct_meta =
 					btf_find_struct_meta(meta.arg_btf,
 							     meta.arg_btf_id);
@@ -23324,13 +23399,12 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	if (!bpf_jit_supports_far_kfunc_call())
 		insn->imm = BPF_CALL_IMM(desc->addr);
 
-	if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
-	    desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+	if (is_bpf_obj_new_kfunc(desc->func_id) || is_bpf_percpu_obj_new_kfunc(desc->func_id)) {
 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
 		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
 
-		if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) {
+		if (is_bpf_percpu_obj_new_kfunc(desc->func_id) && kptr_struct_meta) {
 			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
 				     insn_idx);
 			return -EFAULT;
@@ -23341,20 +23415,19 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		insn_buf[2] = addr[1];
 		insn_buf[3] = *insn;
 		*cnt = 4;
-	} else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
-		   desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] ||
-		   desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
+	} else if (is_bpf_obj_drop_kfunc(desc->func_id) ||
+		   is_bpf_percpu_obj_drop_kfunc(desc->func_id) ||
+		   is_bpf_refcount_acquire_kfunc(desc->func_id)) {
 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
 
-		if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) {
+		if (is_bpf_percpu_obj_drop_kfunc(desc->func_id) && kptr_struct_meta) {
 			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
 				     insn_idx);
 			return -EFAULT;
 		}
 
-		if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
-		    !kptr_struct_meta) {
+		if (is_bpf_refcount_acquire_kfunc(desc->func_id) && !kptr_struct_meta) {
 			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
 				     insn_idx);
 			return -EFAULT;
@@ -23364,15 +23437,14 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		insn_buf[1] = addr[1];
 		insn_buf[2] = *insn;
 		*cnt = 3;
-	} else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
-		   desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
-		   desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+	} else if (is_bpf_list_push_kfunc(desc->func_id) ||
+		   is_bpf_rbtree_add_kfunc(desc->func_id)) {
 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
 		int struct_meta_reg = BPF_REG_3;
 		int node_offset_reg = BPF_REG_4;
 
 		/* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
-		if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+		if (is_bpf_rbtree_add_kfunc(desc->func_id)) {
 			struct_meta_reg = BPF_REG_4;
 			node_offset_reg = BPF_REG_5;
 		}
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 5208f650080f..f8a91fa7584f 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -1065,6 +1065,7 @@ static bool is_kf_implicit_arg(const struct btf *btf, const struct btf_param *p)
 {
 	static const char *const kf_implicit_arg_types[] = {
 		"bpf_prog_aux",
+		"btf_struct_meta",
 	};
 	const struct btf_type *t;
 	const char *name;
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
index f2b8eb2ff76f..81813c724fa9 100644
--- a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
@@ -110,7 +110,7 @@ int BPF_PROG(test_array_map_3)
 }
 
 SEC("?fentry.s/bpf_fentry_test1")
-__failure __msg("arg#0 expected for bpf_percpu_obj_drop_impl()")
+__failure __msg("arg#0 expected for bpf_percpu_obj_drop()")
 int BPF_PROG(test_array_map_4)
 {
 	struct val_t __percpu_kptr *p;
@@ -124,7 +124,7 @@ int BPF_PROG(test_array_map_4)
 }
 
 SEC("?fentry.s/bpf_fentry_test1")
-__failure __msg("arg#0 expected for bpf_obj_drop_impl()")
+__failure __msg("arg#0 expected for bpf_obj_drop()")
 int BPF_PROG(test_array_map_5)
 {
 	struct val_t *p;
-- 
cgit v1.2.3


From fde39f7df10b3dc150abb87c4718efba93cbc755 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <fmancera@suse.de>
Date: Wed, 25 Mar 2026 13:08:44 +0100
Subject: ipv6: replace IS_BUILTIN(CONFIG_IPV6) with IS_ENABLED(CONFIG_IPV6)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As IPv6 is built-in only, it does not make sense to continue using
IS_BUILTIN(CONFIG_IPV6). Therefore, replace it with IS_ENABLED() when
necessary and drop it if it isn't valid anymore.

Notice that there is still one instance related to ICMPv6, as it
requires more changes it will be handle separately.

Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Tested-by: Ricardo B. Marlière <rbm@suse.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20260325120928.15848-4-fmancera@suse.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/indirect_call_wrapper.h |  2 +-
 include/net/ip6_fib.h                 |  2 +-
 net/core/filter.c                     | 14 +++++++-------
 net/ipv6/ip6_fib.c                    |  2 +-
 net/ipv6/ip6_offload.c                |  4 ++--
 net/ipv6/route.c                      |  6 ------
 6 files changed, 12 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h
index dc272b514a01..0e4340ecd857 100644
--- a/include/linux/indirect_call_wrapper.h
+++ b/include/linux/indirect_call_wrapper.h
@@ -57,7 +57,7 @@
  * builtin, this macro simplify dealing with indirect calls with only ipv4/ipv6
  * alternatives
  */
-#if IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 #define INDIRECT_CALL_INET(f, f2, f1, ...) \
 	INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__)
 #elif IS_ENABLED(CONFIG_INET)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 9f8b6814a96a..10f30d158340 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -599,7 +599,7 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
 void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
 			    bool offload, bool trap, bool offload_failed);
 
-#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
+#if IS_ENABLED(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
 struct bpf_iter__ipv6_route {
 	__bpf_md_ptr(struct bpf_iter_meta *, meta);
 	__bpf_md_ptr(struct fib6_info *, rt);
diff --git a/net/core/filter.c b/net/core/filter.c
index 2f023999f046..c56821afaa0f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7590,7 +7590,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
 		ret = __cookie_v4_check((struct iphdr *)iph, th);
 		break;
 
-#if IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	case 6:
 		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
 			return -EINVAL;
@@ -7660,7 +7660,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
 		mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
 		break;
 
-#if IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	case 6:
 		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
 			return -EINVAL;
@@ -8026,7 +8026,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = {
 BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph,
 	   struct tcphdr *, th, u32, th_len)
 {
-#if IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
 		sizeof(struct ipv6hdr);
 	u32 cookie;
@@ -8078,7 +8078,7 @@ static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = {
 BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph,
 	   struct tcphdr *, th)
 {
-#if IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (__cookie_v6_check(iph, th) > 0)
 		return 0;
 
@@ -11964,7 +11964,7 @@ BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
 		return (unsigned long)sk;
 #endif
 
-#if IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
 		return (unsigned long)sk;
 #endif
@@ -11987,7 +11987,7 @@ BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
 		return (unsigned long)sk;
 #endif
 
-#if IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
 		return (unsigned long)sk;
 #endif
@@ -12250,7 +12250,7 @@ __bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct __sk_buff *s, struct sock *sk,
 		ops = &tcp_request_sock_ops;
 		min_mss = 536;
 		break;
-#if IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
 		ops = &tcp6_request_sock_ops;
 		min_mss = IPV6_MIN_MTU - 60;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 388ac88d741a..17f757e9c54a 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2774,7 +2774,7 @@ static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
 	rcu_read_unlock();
 }
 
-#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
+#if defined(CONFIG_BPF_SYSCALL)
 static int ipv6_route_prog_seq_show(struct bpf_prog *prog,
 				    struct bpf_iter_meta *meta,
 				    void *v)
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index bd7f780e37a5..d8072ad6b8c4 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -286,7 +286,7 @@ not_same_flow:
 
 	if (likely(proto == IPPROTO_TCP))
 		pp = tcp6_gro_receive(head, skb);
-#if IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	else if (likely(proto == IPPROTO_UDP))
 		pp = udp6_gro_receive(head, skb);
 #endif
@@ -346,7 +346,7 @@ INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
 
 	if (likely(ops == &net_hotdata.tcpv6_offload))
 		return tcp6_gro_complete(skb, nhoff);
-#if IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (ops == &net_hotdata.udpv6_offload)
 		return udp6_gro_complete(skb, nhoff);
 #endif
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cb521700cee7..08deb18dcc85 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6826,7 +6826,6 @@ void __init ip6_route_init_special_entries(void)
   #endif
 }
 
-#if IS_BUILTIN(CONFIG_IPV6)
 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
 DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
 
@@ -6860,7 +6859,6 @@ static void bpf_iter_unregister(void)
 	bpf_iter_unreg_target(&ipv6_route_reg_info);
 }
 #endif
-#endif
 
 static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_or_module = {
 	{.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE,
@@ -6921,12 +6919,10 @@ int __init ip6_route_init(void)
 	if (ret)
 		goto out_register_late_subsys;
 
-#if IS_BUILTIN(CONFIG_IPV6)
 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
 	ret = bpf_iter_register();
 	if (ret)
 		goto out_register_late_subsys;
-#endif
 #endif
 
 	for_each_possible_cpu(cpu) {
@@ -6961,10 +6957,8 @@ out_kmem_cache:
 
 void ip6_route_cleanup(void)
 {
-#if IS_BUILTIN(CONFIG_IPV6)
 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
 	bpf_iter_unregister();
-#endif
 #endif
 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
 	unregister_pernet_subsys(&ip6_route_net_late_ops);
-- 
cgit v1.2.3


From d2042d35f413b7131cc571655bbcb2c049489fe7 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <fmancera@suse.de>
Date: Wed, 25 Mar 2026 13:08:45 +0100
Subject: ipv6: remove dynamic ICMPv6 sender registration infrastructure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As IPv6 is built-in only, there is no need to maintain the sender
registration infrastructure used to allow built-in subsystems to send
ICMPv6 messages when IPv6 was compiled as a module.

Drop the registration mechanism and the __icmpv6_send() sender
implementation. While icmpv6_send() users could be converted to
icmp6_send() that doesn't seems necessary as none of them are using the
force_saddr parameter.

Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Tested-by: Ricardo B. Marlière <rbm@suse.com>
Link: https://patch.msgid.link/20260325120928.15848-5-fmancera@suse.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/icmpv6.h | 29 ++---------------------------
 net/ipv6/icmp.c        |  6 ------
 net/ipv6/ip6_icmp.c    | 46 +++-------------------------------------------
 3 files changed, 5 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index e3b3b0fa2a8f..2bd9f2157e6c 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -15,38 +15,13 @@ static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb)
 
 #if IS_ENABLED(CONFIG_IPV6)
 
-typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info,
-			     const struct in6_addr *force_saddr,
-			     const struct inet6_skb_parm *parm);
 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 		const struct in6_addr *force_saddr,
 		const struct inet6_skb_parm *parm);
-#if IS_BUILTIN(CONFIG_IPV6)
-static inline void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
-				 const struct inet6_skb_parm *parm)
-{
-	icmp6_send(skb, type, code, info, NULL, parm);
-}
-static inline int inet6_register_icmp_sender(ip6_icmp_send_t *fn)
-{
-	BUILD_BUG_ON(fn != icmp6_send);
-	return 0;
-}
-static inline int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn)
-{
-	BUILD_BUG_ON(fn != icmp6_send);
-	return 0;
-}
-#else
-extern void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
-			  const struct inet6_skb_parm *parm);
-extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn);
-extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn);
-#endif
 
 static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 {
-	__icmpv6_send(skb, type, code, info, IP6CB(skb));
+	icmp6_send(skb, type, code, info, NULL, IP6CB(skb));
 }
 
 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
@@ -58,7 +33,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info);
 static inline void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
 {
 	struct inet6_skb_parm parm = { 0 };
-	__icmpv6_send(skb_in, type, code, info, &parm);
+	icmp6_send(skb_in, type, code, info, NULL, &parm);
 }
 #endif
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 813d2e9edb8b..8e8d7bd84a4c 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -1288,13 +1288,8 @@ int __init icmpv6_init(void)
 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
 		goto fail;
 
-	err = inet6_register_icmp_sender(icmp6_send);
-	if (err)
-		goto sender_reg_err;
 	return 0;
 
-sender_reg_err:
-	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
 fail:
 	pr_err("Failed to register ICMP6 protocol\n");
 	return err;
@@ -1302,7 +1297,6 @@ fail:
 
 void icmpv6_cleanup(void)
 {
-	inet6_unregister_icmp_sender(icmp6_send);
 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
 }
 
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 233914b63bdb..e43ea9492332 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -7,47 +7,8 @@
 
 #include <net/ipv6.h>
 
-#if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6) && IS_ENABLED(CONFIG_NF_NAT)
 
-#if !IS_BUILTIN(CONFIG_IPV6)
-
-static ip6_icmp_send_t __rcu *ip6_icmp_send;
-
-int inet6_register_icmp_sender(ip6_icmp_send_t *fn)
-{
-	return (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, NULL, fn) == NULL) ?
-		0 : -EBUSY;
-}
-EXPORT_SYMBOL(inet6_register_icmp_sender);
-
-int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn)
-{
-	int ret;
-
-	ret = (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, fn, NULL) == fn) ?
-	      0 : -EINVAL;
-
-	synchronize_net();
-
-	return ret;
-}
-EXPORT_SYMBOL(inet6_unregister_icmp_sender);
-
-void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
-		   const struct inet6_skb_parm *parm)
-{
-	ip6_icmp_send_t *send;
-
-	rcu_read_lock();
-	send = rcu_dereference(ip6_icmp_send);
-	if (send)
-		send(skb, type, code, info, NULL, parm);
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL(__icmpv6_send);
-#endif
-
-#if IS_ENABLED(CONFIG_NF_NAT)
 #include <net/netfilter/nf_conntrack.h>
 void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
 {
@@ -60,7 +21,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
 
 	ct = nf_ct_get(skb_in, &ctinfo);
 	if (!ct || !(READ_ONCE(ct->status) & IPS_NAT_MASK)) {
-		__icmpv6_send(skb_in, type, code, info, &parm);
+		icmp6_send(skb_in, type, code, info, NULL, &parm);
 		return;
 	}
 
@@ -76,11 +37,10 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
 	orig_ip = ipv6_hdr(skb_in)->saddr;
 	dir = CTINFO2DIR(ctinfo);
 	ipv6_hdr(skb_in)->saddr = ct->tuplehash[dir].tuple.src.u3.in6;
-	__icmpv6_send(skb_in, type, code, info, &parm);
+	icmp6_send(skb_in, type, code, info, NULL, &parm);
 	ipv6_hdr(skb_in)->saddr = orig_ip;
 out:
 	consume_skb(cloned_skb);
 }
 EXPORT_SYMBOL(icmpv6_ndo_send);
 #endif
-#endif
-- 
cgit v1.2.3


From b2c981e7c4653e3c276d5f3a0e012711d3596418 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <fmancera@suse.de>
Date: Wed, 25 Mar 2026 13:08:52 +0100
Subject: netfilter: remove nf_ipv6_ops and use direct function calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As IPv6 is built-in only, nf_ipv6_ops can be removed completely as it is
not longer necessary.

Convert all nf_ipv6_ops usage to direct function calls instead. In
addition, remove the ipv6_netfilter_init/fini() functions as they are
not necessary any longer.

Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Tested-by: Ricardo B. Marlière <rbm@suse.com>
Link: https://patch.msgid.link/20260325120928.15848-12-fmancera@suse.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netfilter_ipv6.h    | 102 +++-----------------------------------
 net/bridge/br_netfilter_hooks.c   |  12 ++---
 net/bridge/br_netfilter_ipv6.c    |   7 ++-
 net/ipv6/af_inet6.c               |   6 ---
 net/ipv6/netfilter.c              |  48 ------------------
 net/netfilter/core.c              |   3 --
 net/netfilter/nf_nat_masquerade.c |  21 +-------
 net/netfilter/nfnetlink_queue.c   |  22 ++++++--
 net/netfilter/utils.c             |   1 -
 9 files changed, 32 insertions(+), 190 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 61aa48f46dd7..5ce45b6d890f 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -34,59 +34,13 @@ struct ip6_rt_info {
 struct nf_queue_entry;
 struct nf_bridge_frag_data;
 
-/*
- * Hook functions for ipv6 to allow xt_* modules to be built-in even
- * if IPv6 is a module.
- */
-struct nf_ipv6_ops {
-#if IS_MODULE(CONFIG_IPV6)
-	int (*chk_addr)(struct net *net, const struct in6_addr *addr,
-			const struct net_device *dev, int strict);
-	int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb);
-	int (*dev_get_saddr)(struct net *net, const struct net_device *dev,
-		       const struct in6_addr *daddr, unsigned int srcprefs,
-		       struct in6_addr *saddr);
-	int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
-		     bool strict);
-	u32 (*cookie_init_sequence)(const struct ipv6hdr *iph,
-				    const struct tcphdr *th, u16 *mssp);
-	int (*cookie_v6_check)(const struct ipv6hdr *iph,
-			       const struct tcphdr *th);
-#endif
-	void (*route_input)(struct sk_buff *skb);
-	int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
-			int (*output)(struct net *, struct sock *, struct sk_buff *));
-	int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
-#if IS_MODULE(CONFIG_IPV6)
-	int (*br_fragment)(struct net *net, struct sock *sk,
-			   struct sk_buff *skb,
-			   struct nf_bridge_frag_data *data,
-			   int (*output)(struct net *, struct sock *sk,
-					 const struct nf_bridge_frag_data *data,
-					 struct sk_buff *));
-#endif
-};
-
 #ifdef CONFIG_NETFILTER
 #include <net/addrconf.h>
 
-extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops;
-static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void)
-{
-	return rcu_dereference(nf_ipv6_ops);
-}
-
 static inline int nf_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 				   const struct net_device *dev, int strict)
 {
-#if IS_MODULE(CONFIG_IPV6)
-	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
-
-	if (!v6_ops)
-		return 1;
-
-	return v6_ops->chk_addr(net, addr, dev, strict);
-#elif IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	return ipv6_chk_addr(net, addr, dev, strict);
 #else
 	return 1;
@@ -99,15 +53,7 @@ int __nf_ip6_route(struct net *net, struct dst_entry **dst,
 static inline int nf_ip6_route(struct net *net, struct dst_entry **dst,
 			       struct flowi *fl, bool strict)
 {
-#if IS_MODULE(CONFIG_IPV6)
-	const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
-
-	if (v6ops)
-		return v6ops->route(net, dst, fl, strict);
-
-	return -EHOSTUNREACH;
-#endif
-#if IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	return __nf_ip6_route(net, dst, fl, strict);
 #else
 	return -EHOSTUNREACH;
@@ -129,14 +75,7 @@ static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk,
 						   const struct nf_bridge_frag_data *data,
 						   struct sk_buff *))
 {
-#if IS_MODULE(CONFIG_IPV6)
-	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
-
-	if (!v6_ops)
-		return 1;
-
-	return v6_ops->br_fragment(net, sk, skb, data, output);
-#elif IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	return br_ip6_fragment(net, sk, skb, data, output);
 #else
 	return 1;
@@ -147,14 +86,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb);
 
 static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-#if IS_MODULE(CONFIG_IPV6)
-	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
-
-	if (!v6_ops)
-		return -EHOSTUNREACH;
-
-	return v6_ops->route_me_harder(net, sk, skb);
-#elif IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	return ip6_route_me_harder(net, sk, skb);
 #else
 	return -EHOSTUNREACH;
@@ -165,15 +97,8 @@ static inline u32 nf_ipv6_cookie_init_sequence(const struct ipv6hdr *iph,
 					       const struct tcphdr *th,
 					       u16 *mssp)
 {
-#if IS_ENABLED(CONFIG_SYN_COOKIES)
-#if IS_MODULE(CONFIG_IPV6)
-	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
-
-	if (v6_ops)
-		return v6_ops->cookie_init_sequence(iph, th, mssp);
-#elif IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6) && IS_ENABLED(CONFIG_SYN_COOKIES)
 	return __cookie_v6_init_sequence(iph, th, mssp);
-#endif
 #endif
 	return 0;
 }
@@ -181,15 +106,8 @@ static inline u32 nf_ipv6_cookie_init_sequence(const struct ipv6hdr *iph,
 static inline int nf_cookie_v6_check(const struct ipv6hdr *iph,
 				     const struct tcphdr *th)
 {
-#if IS_ENABLED(CONFIG_SYN_COOKIES)
-#if IS_MODULE(CONFIG_IPV6)
-	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
-
-	if (v6_ops)
-		return v6_ops->cookie_v6_check(iph, th);
-#elif IS_BUILTIN(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6) && IS_ENABLED(CONFIG_SYN_COOKIES)
 	return __cookie_v6_check(iph, th);
-#endif
 #endif
 	return 0;
 }
@@ -198,14 +116,6 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
 			unsigned int dataoff, u_int8_t protocol);
 
 int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen);
-
-int ipv6_netfilter_init(void);
-void ipv6_netfilter_fini(void);
-
-#else /* CONFIG_NETFILTER */
-static inline int ipv6_netfilter_init(void) { return 0; }
-static inline void ipv6_netfilter_fini(void) { return; }
-static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) { return NULL; }
 #endif /* CONFIG_NETFILTER */
 
 #endif /*__LINUX_IP6_NETFILTER_H*/
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 083e2fe96441..0ab1c94db4b9 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -32,6 +32,7 @@
 
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/ip6_route.h>
 #include <net/addrconf.h>
 #include <net/dst_metadata.h>
 #include <net/route.h>
@@ -890,7 +891,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
 	}
 	if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) &&
 	    skb->protocol == htons(ETH_P_IPV6)) {
-		const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
 		struct brnf_frag_data *data;
 
 		if (br_validate_ipv6(net, skb))
@@ -906,15 +906,9 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
 		skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
 						 data->size);
 
-		if (v6ops) {
-			ret = v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit);
-			local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock);
-			return ret;
-		}
+		ret = ip6_fragment(net, sk, skb, br_nf_push_frag_xmit);
 		local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock);
-
-		kfree_skb(skb);
-		return -EMSGSIZE;
+		return ret;
 	}
 	nf_bridge_info_free(skb);
 	return br_dev_queue_push_xmit(net, sk, skb);
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 76ce70b4e7f3..d8548428929e 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -30,6 +30,7 @@
 
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/ip6_route.h>
 #include <net/addrconf.h>
 #include <net/route.h>
 #include <net/netfilter/br_netfilter.h>
@@ -95,15 +96,13 @@ br_nf_ipv6_daddr_was_changed(const struct sk_buff *skb,
 
 /* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables
  * PREROUTING and continue the bridge PRE_ROUTING hook. See comment
- * for br_nf_pre_routing_finish(), same logic is used here but
- * equivalent IPv6 function ip6_route_input() called indirectly.
+ * for br_nf_pre_routing_finish(), same logic is used here.
  */
 static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 	struct rtable *rt;
 	struct net_device *dev = skb->dev, *br_indev;
-	const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
 
 	br_indev = nf_bridge_get_physindev(skb, net);
 	if (!br_indev) {
@@ -120,7 +119,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
 	nf_bridge->in_prerouting = 0;
 	if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) {
 		skb_dst_drop(skb);
-		v6ops->route_input(skb);
+		ip6_route_input(skb);
 
 		if (skb_dst(skb)->error) {
 			kfree_skb(skb);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 33abd8d8cd7d..ee341a8254bf 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -38,7 +38,6 @@
 #include <linux/inet.h>
 #include <linux/netdevice.h>
 #include <linux/icmpv6.h>
-#include <linux/netfilter_ipv6.h>
 
 #include <net/ip.h>
 #include <net/ipv6.h>
@@ -1066,9 +1065,6 @@ static int __init inet6_init(void)
 	if (err)
 		goto igmp_fail;
 
-	err = ipv6_netfilter_init();
-	if (err)
-		goto netfilter_fail;
 	/* Create /proc/foo6 entries. */
 #ifdef CONFIG_PROC_FS
 	err = -ENOMEM;
@@ -1199,8 +1195,6 @@ proc_misc6_fail:
 	raw6_proc_exit();
 proc_raw6_fail:
 #endif
-	ipv6_netfilter_fini();
-netfilter_fail:
 	igmp6_cleanup();
 igmp_fail:
 	ndisc_cleanup();
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index c3dc90dfab80..6d80f85e55fa 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -86,21 +86,6 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
 }
 EXPORT_SYMBOL(ip6_route_me_harder);
 
-static int nf_ip6_reroute(struct sk_buff *skb,
-			  const struct nf_queue_entry *entry)
-{
-	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
-
-	if (entry->state.hook == NF_INET_LOCAL_OUT) {
-		const struct ipv6hdr *iph = ipv6_hdr(skb);
-		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
-		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
-		    skb->mark != rt_info->mark)
-			return ip6_route_me_harder(entry->state.net, entry->state.sk, skb);
-	}
-	return 0;
-}
-
 int __nf_ip6_route(struct net *net, struct dst_entry **dst,
 		   struct flowi *fl, bool strict)
 {
@@ -243,36 +228,3 @@ blackhole:
 	return 0;
 }
 EXPORT_SYMBOL_GPL(br_ip6_fragment);
-
-static const struct nf_ipv6_ops ipv6ops = {
-#if IS_MODULE(CONFIG_IPV6)
-	.chk_addr		= ipv6_chk_addr,
-	.route_me_harder	= ip6_route_me_harder,
-	.dev_get_saddr		= ipv6_dev_get_saddr,
-	.route			= __nf_ip6_route,
-#if IS_ENABLED(CONFIG_SYN_COOKIES)
-	.cookie_init_sequence	= __cookie_v6_init_sequence,
-	.cookie_v6_check	= __cookie_v6_check,
-#endif
-#endif
-	.route_input		= ip6_route_input,
-	.fragment		= ip6_fragment,
-	.reroute		= nf_ip6_reroute,
-#if IS_MODULE(CONFIG_IPV6)
-	.br_fragment		= br_ip6_fragment,
-#endif
-};
-
-int __init ipv6_netfilter_init(void)
-{
-	RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
-	return 0;
-}
-
-/* This can be called from inet6_init() on errors, so it cannot
- * be marked __exit. -DaveM
- */
-void ipv6_netfilter_fini(void)
-{
-	RCU_INIT_POINTER(nf_ipv6_ops, NULL);
-}
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index d5df44ea9e7b..675a1034b340 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -27,9 +27,6 @@
 
 #include "nf_internals.h"
 
-const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
-EXPORT_SYMBOL_GPL(nf_ipv6_ops);
-
 #ifdef CONFIG_JUMP_LABEL
 struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 EXPORT_SYMBOL(nf_hooks_needed);
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index a5a23c03fda9..4de6e0a51701 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -220,23 +220,6 @@ static struct notifier_block masq_inet_notifier = {
 };
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int
-nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
-		       const struct in6_addr *daddr, unsigned int srcprefs,
-		       struct in6_addr *saddr)
-{
-#ifdef CONFIG_IPV6_MODULE
-	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
-
-	if (!v6_ops)
-		return -EHOSTUNREACH;
-
-	return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
-#else
-	return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
-#endif
-}
-
 unsigned int
 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 		       const struct net_device *out)
@@ -251,8 +234,8 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
 			 ctinfo == IP_CT_RELATED_REPLY)));
 
-	if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
-				   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
+	if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
+			       &ipv6_hdr(skb)->daddr, 0, &src) < 0)
 		return NF_DROP;
 
 	nat = nf_ct_nat_ext_add(ct);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index a91ae07db059..2aa2380d976a 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -356,9 +356,25 @@ static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry
 	return 0;
 }
 
+static int nf_ip6_reroute(struct sk_buff *skb,
+			  const struct nf_queue_entry *entry)
+{
+	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+	if (entry->state.hook == NF_INET_LOCAL_OUT) {
+		const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
+		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
+		    skb->mark != rt_info->mark)
+			return nf_ip6_route_me_harder(entry->state.net,
+						      entry->state.sk, skb);
+	}
+	return 0;
+}
+
 static int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
 {
-	const struct nf_ipv6_ops *v6ops;
 	int ret = 0;
 
 	switch (entry->state.pf) {
@@ -366,9 +382,7 @@ static int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
 		ret = nf_ip_reroute(skb, entry);
 		break;
 	case AF_INET6:
-		v6ops = rcu_dereference(nf_ipv6_ops);
-		if (v6ops)
-			ret = v6ops->reroute(skb, entry);
+		ret = nf_ip6_reroute(skb, entry);
 		break;
 	}
 	return ret;
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index 008419db815a..29c4dcc362c7 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -163,7 +163,6 @@ EXPORT_SYMBOL_GPL(nf_checksum_partial);
 int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
 	     bool strict, unsigned short family)
 {
-	const struct nf_ipv6_ops *v6ops __maybe_unused;
 	int ret = 0;
 
 	switch (family) {
-- 
cgit v1.2.3


From 55b6dd54c3bcb6edf7ad630a4510759f4b0cf1cd Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Tue, 13 Jan 2026 13:37:39 -0500
Subject: nfsd/sunrpc: add svc_rqst->rq_private pointer and remove
 rq_lease_breaker

rq_lease_breaker has always been a NFSv4 specific layering violation in
svc_rqst. The reason it's there though is that we need a place that is
thread-local, and accessible from the svc_rqst pointer.

Add a new rq_private pointer to struct svc_rqst. This is intended for
use by the threads that are handling the service. sunrpc code doesn't
touch it.

In nfsd, define a new struct nfsd_thread_local_info. nfsd declares one
of these on the stack and puts a pointer to it in rq_private.

Add a new ntli_lease_breaker field to the new struct and convert all of
the places that access rq_lease_breaker to use the new field instead.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Benjamin Coddington <bcodding@hammerspace.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4proc.c         | 3 ++-
 fs/nfsd/nfs4state.c        | 9 ++++++---
 fs/nfsd/nfsd.h             | 4 ++++
 fs/nfsd/nfssvc.c           | 5 +++++
 include/linux/sunrpc/svc.h | 5 ++++-
 5 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6880c5c520e7..85e94c30285a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -3043,6 +3043,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 	struct svc_fh *current_fh = &cstate->current_fh;
 	struct svc_fh *save_fh = &cstate->save_fh;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+	struct nfsd_thread_local_info *ntli = rqstp->rq_private;
 	__be32		status;
 
 	resp->xdr = &rqstp->rq_res_stream;
@@ -3081,7 +3082,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 	}
 	check_if_stalefh_allowed(args);
 
-	rqstp->rq_lease_breaker = (void **)&cstate->clp;
+	ntli->ntli_lease_breaker = &cstate->clp;
 
 	trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt);
 	while (!status && resp->opcnt < args->opcnt) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6b9c399b89df..d8b0bd8ac842 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5535,13 +5535,15 @@ nfsd_break_deleg_cb(struct file_lease *fl)
 static bool nfsd_breaker_owns_lease(struct file_lease *fl)
 {
 	struct nfs4_delegation *dl = fl->c.flc_owner;
+	struct nfsd_thread_local_info *ntli;
 	struct svc_rqst *rqst;
 	struct nfs4_client *clp;
 
 	rqst = nfsd_current_rqst();
 	if (!nfsd_v4client(rqst))
 		return false;
-	clp = *(rqst->rq_lease_breaker);
+	ntli = rqst->rq_private;
+	clp = *ntli->ntli_lease_breaker;
 	return dl->dl_stid.sc_client == clp;
 }
 
@@ -9348,13 +9350,14 @@ __be32
 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
 			     struct nfs4_delegation **pdp)
 {
-	__be32 status;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+	struct nfsd_thread_local_info *ntli = rqstp->rq_private;
 	struct file_lock_context *ctx;
 	struct nfs4_delegation *dp = NULL;
 	struct file_lease *fl;
 	struct nfs4_cb_fattr *ncf;
 	struct inode *inode = d_inode(dentry);
+	__be32 status;
 
 	ctx = locks_inode_context(inode);
 	if (!ctx)
@@ -9375,7 +9378,7 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
 		break;
 	}
 	if (dp == NULL || dp == NON_NFSD_LEASE ||
-	    dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
+	    dp->dl_recall.cb_clp == *(ntli->ntli_lease_breaker)) {
 		spin_unlock(&ctx->flc_lock);
 		if (dp == NON_NFSD_LEASE) {
 			status = nfserrno(nfsd_open_break_lease(inode,
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index a01d70953358..938906c6d10c 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -82,6 +82,10 @@ extern atomic_t			nfsd_th_cnt;		/* number of available threads */
 
 extern const struct seq_operations nfs_exports_op;
 
+struct nfsd_thread_local_info {
+	struct nfs4_client	**ntli_lease_breaker;
+};
+
 /*
  * Common void argument and result helpers
  */
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 4a04208393b8..fd979e5392a1 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -887,6 +887,7 @@ nfsd(void *vrqstp)
 	struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
 	struct net *net = perm_sock->xpt_net;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct nfsd_thread_local_info ntli = { };
 	bool have_mutex = false;
 
 	/* At this point, the thread shares current->fs
@@ -901,6 +902,10 @@ nfsd(void *vrqstp)
 
 	set_freezable();
 
+	/* use dynamic allocation if ntli should ever become large */
+	static_assert(sizeof(struct nfsd_thread_local_info) < 256);
+	rqstp->rq_private = &ntli;
+
 	/*
 	 * The main request loop
 	 */
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 4dc14c7a711b..ab8237ba9596 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -175,6 +175,9 @@ static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv)
 /*
  * The context of a single thread, including the request currently being
  * processed.
+ *
+ * RPC programs are free to use rq_private to stash thread-local information.
+ * The sunrpc layer will not access it.
  */
 struct svc_rqst {
 	struct list_head	rq_all;		/* all threads list */
@@ -251,7 +254,7 @@ struct svc_rqst {
 	unsigned long		bc_to_initval;
 	unsigned int		bc_to_retries;
 	unsigned int		rq_status_counter; /* RPC processing counter */
-	void			**rq_lease_breaker; /* The v4 client breaking a lease */
+	void			*rq_private;	/* For use by the service thread */
 };
 
 /* bits for rq_flags */
-- 
cgit v1.2.3


From 322ecd01bf8ad7e0da21e174679aff1759e68b2c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Tue, 13 Jan 2026 13:37:40 -0500
Subject: nfsd/sunrpc: move rq_cachetype into struct nfsd_thread_local_info

The svc_rqst->rq_cachetype field is only accessed by nfsd. Move it
into the nfsd_thread_local_info instead.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Benjamin Coddington <bcodding@hammerspace.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c          | 3 ++-
 fs/nfsd/nfscache.c         | 3 ++-
 fs/nfsd/nfsd.h             | 1 +
 fs/nfsd/nfssvc.c           | 5 +++--
 include/linux/sunrpc/svc.h | 1 -
 5 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 9d234913100b..690f7a3122ec 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2598,6 +2598,7 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op)
 static bool
 nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 {
+	struct nfsd_thread_local_info *ntli = argp->rqstp->rq_private;
 	struct nfsd4_op *op;
 	bool cachethis = false;
 	int auth_slack= argp->rqstp->rq_auth_slack;
@@ -2690,7 +2691,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	if (argp->minorversion)
 		cachethis = false;
 	svc_reserve_auth(argp->rqstp, max_reply + readbytes);
-	argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
+	ntli->ntli_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
 
 	argp->splice_ok = nfsd_read_splice_ok(argp->rqstp);
 	if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index ab13ee9c7fd8..154468ceccdc 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -467,10 +467,11 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
 		      unsigned int len, struct nfsd_cacherep **cacherep)
 {
 	struct nfsd_net		*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+	struct nfsd_thread_local_info *ntli = rqstp->rq_private;
 	struct nfsd_cacherep	*rp, *found;
 	__wsum			csum;
 	struct nfsd_drc_bucket	*b;
-	int type = rqstp->rq_cachetype;
+	int type = ntli->ntli_cachetype;
 	LIST_HEAD(dispose);
 	int rtn = RC_DOIT;
 
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 938906c6d10c..a2e35a4fa105 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -84,6 +84,7 @@ extern const struct seq_operations nfs_exports_op;
 
 struct nfsd_thread_local_info {
 	struct nfs4_client	**ntli_lease_breaker;
+	int			ntli_cachetype;
 };
 
 /*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index fd979e5392a1..4f1ab3222a4d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -972,6 +972,7 @@ nfsd(void *vrqstp)
  */
 int nfsd_dispatch(struct svc_rqst *rqstp)
 {
+	struct nfsd_thread_local_info *ntli = rqstp->rq_private;
 	const struct svc_procedure *proc = rqstp->rq_procinfo;
 	__be32 *statp = rqstp->rq_accept_statp;
 	struct nfsd_cacherep *rp;
@@ -982,7 +983,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
 	 * Give the xdr decoder a chance to change this if it wants
 	 * (necessary in the NFSv4.0 compound case)
 	 */
-	rqstp->rq_cachetype = proc->pc_cachetype;
+	ntli->ntli_cachetype = proc->pc_cachetype;
 
 	/*
 	 * ->pc_decode advances the argument stream past the NFS
@@ -1027,7 +1028,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
 	 */
 	smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
 
-	nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, nfs_reply);
+	nfsd_cache_update(rqstp, rp, ntli->ntli_cachetype, nfs_reply);
 out_cached_reply:
 	return 1;
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index ab8237ba9596..62152e4f3bcc 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -218,7 +218,6 @@ struct svc_rqst {
 	u32			rq_vers;	/* program version */
 	u32			rq_proc;	/* procedure number */
 	u32			rq_prot;	/* IP protocol */
-	int			rq_cachetype;	/* catering to nfsd */
 	unsigned long		rq_flags;	/* flags field */
 	ktime_t			rq_qtime;	/* enqueue time */
 
-- 
cgit v1.2.3


From 153b9e025308417d167332c93e1bcc11174178de Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:23 -0500
Subject: lockd: Relocate and rename nlm_drop_reply

The nlm_drop_reply status code is internal to the kernel's lockd
implementation and must never appear on the wire. Its previous
location in xdr.h grouped it with legitimate NLM protocol status
codes, obscuring this critical distinction.

Relocate the definition to lockd.h with a comment block for internal
status codes, and rename to nlm__int__drop_reply to make its
internal-only nature explicit. This prepares for adding additional
internal status codes in subsequent patches.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svc4proc.c         | 22 ++++++++++++++--------
 fs/lockd/svclock.c          |  4 ++--
 fs/lockd/svcproc.c          | 24 +++++++++++++++---------
 fs/nfsd/lockd.c             |  2 +-
 include/linux/lockd/lockd.h |  6 ++++++
 include/linux/lockd/xdr.h   |  2 --
 6 files changed, 38 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4b6f18d97734..9c756d07223a 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -104,12 +104,13 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now check for conflicting locks */
 	resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock,
 				       &resp->lock);
-	if (resp->status == nlm_drop_reply)
+	if (resp->status == nlm__int__drop_reply)
 		rc = rpc_drop_reply;
 	else
 		dprintk("lockd: TEST4        status %d\n", ntohl(resp->status));
@@ -140,13 +141,14 @@ __nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to lock the file */
 	resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock,
 					argp->block, &argp->cookie,
 					argp->reclaim);
-	if (resp->status == nlm_drop_reply)
+	if (resp->status == nlm__int__drop_reply)
 		rc = rpc_drop_reply;
 	else
 		dprintk("lockd: LOCK         status %d\n", ntohl(resp->status));
@@ -182,7 +184,8 @@ __nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Try to cancel request. */
 	resp->status = nlmsvc_cancel_blocked(SVC_NET(rqstp), file, &argp->lock);
@@ -222,7 +225,8 @@ __nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to remove the lock */
 	resp->status = nlmsvc_unlock(SVC_NET(rqstp), file, &argp->lock);
@@ -369,7 +373,8 @@ nlm4svc_proc_share(struct svc_rqst *rqstp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to create the share */
 	resp->status = nlmsvc_share_file(host, file, argp);
@@ -404,7 +409,8 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to lock the file */
 	resp->status = nlmsvc_unshare_file(host, file, argp);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 255a847ca0b6..d86b02153c7c 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -463,7 +463,7 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
 		block->b_deferred_req =
 			rqstp->rq_chandle.defer(block->b_cache_req);
 		if (block->b_deferred_req != NULL)
-			status = nlm_drop_reply;
+			status = nlm__int__drop_reply;
 	}
 	dprintk("lockd: nlmsvc_defer_lock_rqst block %p flags %d status %d\n",
 		block, block->b_flags, ntohl(status));
@@ -531,7 +531,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			ret = nlm_lck_denied;
 			goto out;
 		}
-		ret = nlm_drop_reply;
+		ret = nlm__int__drop_reply;
 		goto out;
 	}
 
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 95c6bf7ab757..2a2e48a9bd12 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -25,7 +25,7 @@ static inline __be32 cast_status(__be32 status)
 	case nlm_lck_denied_nolocks:
 	case nlm_lck_blocked:
 	case nlm_lck_denied_grace_period:
-	case nlm_drop_reply:
+	case nlm__int__drop_reply:
 		break;
 	case nlm4_deadlock:
 		status = nlm_lck_denied;
@@ -122,12 +122,13 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now check for conflicting locks */
 	resp->status = cast_status(nlmsvc_testlock(rqstp, file, host,
 						   &argp->lock, &resp->lock));
-	if (resp->status == nlm_drop_reply)
+	if (resp->status == nlm__int__drop_reply)
 		rc = rpc_drop_reply;
 	else
 		dprintk("lockd: TEST          status %d vers %d\n",
@@ -159,13 +160,14 @@ __nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to lock the file */
 	resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock,
 					       argp->block, &argp->cookie,
 					       argp->reclaim));
-	if (resp->status == nlm_drop_reply)
+	if (resp->status == nlm__int__drop_reply)
 		rc = rpc_drop_reply;
 	else
 		dprintk("lockd: LOCK         status %d\n", ntohl(resp->status));
@@ -202,7 +204,8 @@ __nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Try to cancel request. */
 	resp->status = cast_status(nlmsvc_cancel_blocked(net, file, &argp->lock));
@@ -243,7 +246,8 @@ __nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to remove the lock */
 	resp->status = cast_status(nlmsvc_unlock(net, file, &argp->lock));
@@ -400,7 +404,8 @@ nlmsvc_proc_share(struct svc_rqst *rqstp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to create the share */
 	resp->status = cast_status(nlmsvc_share_file(host, file, argp));
@@ -435,7 +440,8 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to unshare the file */
 	resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index c774ce9aa296..8c230ccd6645 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -71,7 +71,7 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
 		 * to callback when the delegation is returned but might
 		 * not have a proper lock request to block on.
 		 */
-		return nlm_drop_reply;
+		return nlm__int__drop_reply;
 	case nfserr_stale:
 		return nlm_stale_fh;
 	default:
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 330e38776bb2..fdefec39553f 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -38,6 +38,12 @@
  */
 #define LOCKD_DFLT_TIMEO	10
 
+/*
+ * Internal-use status codes, not to be placed on the wire.
+ * Version handlers translate these to appropriate wire values.
+ */
+#define nlm__int__drop_reply	cpu_to_be32(30000)
+
 /*
  * Lockd host handle (used both by the client and server personality).
  */
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 17d53165d9f2..292e4e38d17d 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -33,8 +33,6 @@ struct svc_rqst;
 #define	nlm_lck_blocked		cpu_to_be32(NLM_LCK_BLOCKED)
 #define	nlm_lck_denied_grace_period	cpu_to_be32(NLM_LCK_DENIED_GRACE_PERIOD)
 
-#define nlm_drop_reply		cpu_to_be32(30000)
-
 /* Lock info passed via NLM */
 struct nlm_lock {
 	char *			caller;
-- 
cgit v1.2.3


From 9e0d0c61940796893e0c2200cdc7be0684218238 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:24 -0500
Subject: lockd: Introduce nlm__int__deadlock

The use of CONFIG_LOCKD_V4 in combination with a later cast_status()
in the NLMv3 code is difficult to reason about. Instead, replace the
use of nlm_deadlock with an implementation-defined status value that
version-specific code translates appropriately.

The new approach establishes a translation boundary: generic lockd
code returns nlm__int__deadlock when posix_lock_file() yields
-EDEADLK. Version-specific handlers (svc4proc.c for NLMv4,
svcproc.c for NLMv3) translate this internal status to the
appropriate wire protocol value. NLMv4 maps to nlm4_deadlock;
NLMv3 maps to nlm_lck_denied (since NLMv3 lacks a deadlock-specific
status code).

Later this modification will also remove the need to include NLMv4
headers in NLMv3 and generic code.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svc4proc.c         | 10 ++++++++--
 fs/lockd/svclock.c          |  8 +-------
 fs/lockd/svcproc.c          |  4 +++-
 include/linux/lockd/lockd.h |  1 +
 4 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 9c756d07223a..55b6dcc56db1 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -148,10 +148,16 @@ __nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
 	resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock,
 					argp->block, &argp->cookie,
 					argp->reclaim);
-	if (resp->status == nlm__int__drop_reply)
+	switch (resp->status) {
+	case nlm__int__drop_reply:
 		rc = rpc_drop_reply;
-	else
+		break;
+	case nlm__int__deadlock:
+		resp->status = nlm4_deadlock;
+		fallthrough;
+	default:
 		dprintk("lockd: LOCK         status %d\n", ntohl(resp->status));
+	}
 
 	nlmsvc_release_lockowner(&argp->lock);
 	nlmsvc_release_host(host);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index d86b02153c7c..5edf00751a1e 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -33,12 +33,6 @@
 
 #define NLMDBG_FACILITY		NLMDBG_SVCLOCK
 
-#ifdef CONFIG_LOCKD_V4
-#define nlm_deadlock	nlm4_deadlock
-#else
-#define nlm_deadlock	nlm_lck_denied
-#endif
-
 static void nlmsvc_release_block(struct nlm_block *block);
 static void	nlmsvc_insert_block(struct nlm_block *block, unsigned long);
 static void	nlmsvc_remove_block(struct nlm_block *block);
@@ -589,7 +583,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			goto out;
 		case -EDEADLK:
 			nlmsvc_remove_block(block);
-			ret = nlm_deadlock;
+			ret = nlm__int__deadlock;
 			goto out;
 		default:			/* includes ENOLCK */
 			nlmsvc_remove_block(block);
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 2a2e48a9bd12..27ed71935e45 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -27,7 +27,7 @@ static inline __be32 cast_status(__be32 status)
 	case nlm_lck_denied_grace_period:
 	case nlm__int__drop_reply:
 		break;
-	case nlm4_deadlock:
+	case nlm__int__deadlock:
 		status = nlm_lck_denied;
 		break;
 	default:
@@ -39,6 +39,8 @@ static inline __be32 cast_status(__be32 status)
 #else
 static inline __be32 cast_status(__be32 status)
 {
+	if (status == nlm__int__deadlock)
+		status = nlm_lck_denied;
 	return status;
 }
 #endif
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index fdefec39553f..793691912137 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -43,6 +43,7 @@
  * Version handlers translate these to appropriate wire values.
  */
 #define nlm__int__drop_reply	cpu_to_be32(30000)
+#define nlm__int__deadlock	cpu_to_be32(30001)
 
 /*
  * Lockd host handle (used both by the client and server personality).
-- 
cgit v1.2.3


From 7db001e03d7a668ca6c3789fee42a24236ca90f6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:25 -0500
Subject: lockd: Have nlm_fopen() return errno values

The nlm_fopen() function is part of the API between nfsd and lockd.

Currently its return value is an on-the-wire NLM status code. But
that forces NFSD to include NLM wire protocol definitions despite
having no other dependency on the NLM wire protocol.

In addition, a CONFIG_LOCKD_V4 Kconfig symbol appears in the middle
of NFSD source code.

Refactor: Let's not use on-the-wire values as part of a high-level
API between two Linux kernel modules. That's what we have errno for,
right?

And, instead of simply moving the CONFIG_LOCKD_V4 check, we can get
rid of it entirely and let the decision of what actual NLM status
code goes on the wire to be left up to NLM version-specific code.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svc4proc.c         | 18 +++++++++++++---
 fs/lockd/svcproc.c          | 14 ++++++++++++-
 fs/lockd/svcsubs.c          | 27 ++++++++++++++++++------
 fs/nfsd/lockd.c             | 50 +++++++++++++++++++++++++--------------------
 include/linux/lockd/bind.h  |  8 +++-----
 include/linux/lockd/lockd.h |  2 ++
 6 files changed, 82 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 55b6dcc56db1..4ceb27cc72e4 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -73,9 +73,21 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 no_locks:
 	nlmsvc_release_host(host);
- 	if (error)
-		return error;	
-	return nlm_lck_denied_nolocks;
+	switch (error) {
+	case nlm_granted:
+		return nlm_lck_denied_nolocks;
+	case nlm__int__stale_fh:
+		return nlm4_stale_fh;
+	case nlm__int__failed:
+		return nlm4_failed;
+	default:
+		if (be32_to_cpu(error) >= 30000) {
+			pr_warn_once("lockd: unhandled internal status %u\n",
+				     be32_to_cpu(error));
+			return nlm4_failed;
+		}
+		return error;
+	}
 }
 
 /*
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 27ed71935e45..272c8f36ed2a 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -39,8 +39,20 @@ static inline __be32 cast_status(__be32 status)
 #else
 static inline __be32 cast_status(__be32 status)
 {
-	if (status == nlm__int__deadlock)
+	switch (status) {
+	case nlm__int__deadlock:
 		status = nlm_lck_denied;
+		break;
+	case nlm__int__stale_fh:
+	case nlm__int__failed:
+		status = nlm_lck_denied_nolocks;
+		break;
+	default:
+		if (be32_to_cpu(status) >= 30000)
+			pr_warn_once("lockd: unhandled internal status %u\n",
+				     be32_to_cpu(status));
+		break;
+	}
 	return status;
 }
 #endif
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index dd0214dcb695..967739d2aa90 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -87,14 +87,29 @@ static __be32 nlm_do_fopen(struct svc_rqst *rqstp,
 			   struct nlm_file *file, int mode)
 {
 	struct file **fp = &file->f_file[mode];
-	__be32	nfserr;
+	__be32 nlmerr = nlm_granted;
+	int error;
 
 	if (*fp)
-		return 0;
-	nfserr = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode);
-	if (nfserr)
-		dprintk("lockd: open failed (error %d)\n", nfserr);
-	return nfserr;
+		return nlmerr;
+
+	error = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode);
+	if (error) {
+		dprintk("lockd: open failed (errno %d)\n", error);
+		switch (error) {
+		case -EWOULDBLOCK:
+			nlmerr = nlm__int__drop_reply;
+			break;
+		case -ESTALE:
+			nlmerr = nlm__int__stale_fh;
+			break;
+		default:
+			nlmerr = nlm__int__failed;
+			break;
+		}
+	}
+
+	return nlmerr;
 }
 
 /*
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 8c230ccd6645..6fe1325815e0 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -14,19 +14,20 @@
 
 #define NFSDDBG_FACILITY		NFSDDBG_LOCKD
 
-#ifdef CONFIG_LOCKD_V4
-#define nlm_stale_fh	nlm4_stale_fh
-#define nlm_failed	nlm4_failed
-#else
-#define nlm_stale_fh	nlm_lck_denied_nolocks
-#define nlm_failed	nlm_lck_denied_nolocks
-#endif
-/*
- * Note: we hold the dentry use count while the file is open.
+/**
+ * nlm_fopen - Open an NFSD file
+ * @rqstp: NLM RPC procedure execution context
+ * @f: NFS file handle to be opened
+ * @filp: OUT: an opened struct file
+ * @flags: the POSIX open flags to use
+ *
+ * nlm_fopen() holds the dentry reference until nlm_fclose() releases it.
+ *
+ * Returns zero on success or a negative errno value if the file
+ * cannot be opened.
  */
-static __be32
-nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
-		int mode)
+static int nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f,
+		     struct file **filp, int flags)
 {
 	__be32		nfserr;
 	int		access;
@@ -47,18 +48,17 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
 	 * if NFSEXP_NOAUTHNLM is set.  Some older clients use AUTH_NULL
 	 * for NLM requests.
 	 */
-	access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
+	access = (flags == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
 	access |= NFSD_MAY_NLM | NFSD_MAY_OWNER_OVERRIDE | NFSD_MAY_BYPASS_GSS;
 	nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp);
 	fh_put(&fh);
-	/* We return nlm error codes as nlm doesn't know
-	 * about nfsd, but nfsd does know about nlm..
-	 */
+
 	switch (nfserr) {
 	case nfs_ok:
-		return 0;
+		break;
 	case nfserr_jukebox:
-		/* this error can indicate a presence of a conflicting
+		/*
+		 * This error can indicate a presence of a conflicting
 		 * delegation to an NLM lock request. Options are:
 		 * (1) For now, drop this request and make the client
 		 * retry. When delegation is returned, client's lock retry
@@ -66,19 +66,25 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
 		 * (2) NLM4_DENIED as per "spec" signals to the client
 		 * that the lock is unavailable now but client can retry.
 		 * Linux client implementation does not. It treats
-		 * NLM4_DENIED same as NLM4_FAILED and errors the request.
+		 * NLM4_DENIED same as NLM4_FAILED and fails the request.
 		 * (3) For the future, treat this as blocked lock and try
 		 * to callback when the delegation is returned but might
 		 * not have a proper lock request to block on.
 		 */
-		return nlm__int__drop_reply;
+		return -EWOULDBLOCK;
 	case nfserr_stale:
-		return nlm_stale_fh;
+		return -ESTALE;
 	default:
-		return nlm_failed;
+		return -ENOLCK;
 	}
+
+	return 0;
 }
 
+/**
+ * nlm_fclose - Close an NFSD file
+ * @filp: a struct file that was opened by nlm_fopen()
+ */
 static void
 nlm_fclose(struct file *filp)
 {
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index c53c81242e72..2f5dd9e943ee 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -26,11 +26,9 @@ struct rpc_clnt;
  * This is the set of functions for lockd->nfsd communication
  */
 struct nlmsvc_binding {
-	__be32			(*fopen)(struct svc_rqst *,
-						struct nfs_fh *,
-						struct file **,
-						int mode);
-	void			(*fclose)(struct file *);
+	int		(*fopen)(struct svc_rqst *rqstp, struct nfs_fh *f,
+				 struct file **filp, int flags);
+	void		(*fclose)(struct file *filp);
 };
 
 extern const struct nlmsvc_binding *nlmsvc_ops;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 793691912137..195e6ce28f6e 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -44,6 +44,8 @@
  */
 #define nlm__int__drop_reply	cpu_to_be32(30000)
 #define nlm__int__deadlock	cpu_to_be32(30001)
+#define nlm__int__stale_fh	cpu_to_be32(30002)
+#define nlm__int__failed	cpu_to_be32(30003)
 
 /*
  * Lockd host handle (used both by the client and server personality).
-- 
cgit v1.2.3


From efb5b15e3b78f5644dd2d4ddec8880e0c9aa5b5f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:26 -0500
Subject: lockd: Relocate nlmsvc_unlock API declarations

The nlmsvc_unlock_all_by_sb() and nlmsvc_unlock_all_by_ip()
functions are part of lockd's external API, consumed by other
kernel subsystems. Their declarations currently reside in
linux/lockd/lockd.h alongside internal implementation details,
which blurs the boundary between lockd's public interface and
its private internals.

Moving these declarations to linux/lockd/bind.h groups them
with other external API functions and makes the separation
explicit. This clarifies which functions are intended for
external use and reduces the risk of internal implementation
details leaking into the public API surface.

Build-tested with allyesconfig; no functional changes.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfsctl.c            | 2 +-
 include/linux/lockd/bind.h  | 7 +++++++
 include/linux/lockd/lockd.h | 6 ------
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 71aabdaa1d15..0bf01ae411c5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -11,7 +11,7 @@
 #include <linux/fs_context.h>
 
 #include <linux/sunrpc/svcsock.h>
-#include <linux/lockd/lockd.h>
+#include <linux/lockd/bind.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/gss_api.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 2f5dd9e943ee..82eca0a13ccc 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -21,6 +21,7 @@
 struct svc_rqst;
 struct rpc_task;
 struct rpc_clnt;
+struct super_block;
 
 /*
  * This is the set of functions for lockd->nfsd communication
@@ -80,4 +81,10 @@ extern int	nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, vo
 extern int	lockd_up(struct net *net, const struct cred *cred);
 extern void	lockd_down(struct net *net);
 
+/*
+ * Cluster failover support
+ */
+int nlmsvc_unlock_all_by_sb(struct super_block *sb);
+int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
+
 #endif /* LINUX_LOCKD_BIND_H */
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 195e6ce28f6e..0d883f48ec21 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -311,12 +311,6 @@ void		  nlmsvc_mark_resources(struct net *);
 void		  nlmsvc_free_host_resources(struct nlm_host *);
 void		  nlmsvc_invalidate_all(void);
 
-/*
- * Cluster failover support
- */
-int           nlmsvc_unlock_all_by_sb(struct super_block *sb);
-int           nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
-
 static inline struct file *nlmsvc_file_file(const struct nlm_file *file)
 {
 	return file->f_file[O_RDONLY] ?
-- 
cgit v1.2.3


From 840621fd2ff23ada8b9262d90477e75232566e6b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:27 -0500
Subject: NFS: Use nlmclnt_shutdown_rpc_clnt() to safely shut down NLM

A race condition exists in shutdown_store() when writing to the sysfs
"shutdown" file concurrently with nlm_shutdown_hosts_net(). Without
synchronization, the following sequence can occur:

  1. shutdown_store() reads server->nlm_host (non-NULL)
  2. nlm_shutdown_hosts_net() acquires nlm_host_mutex, calls
     rpc_shutdown_client(), sets h_rpcclnt to NULL, and potentially
     frees the host via nlm_gc_hosts()
  3. shutdown_store() dereferences the now-stale or freed host

Introduce nlmclnt_shutdown_rpc_clnt(), which acquires nlm_host_mutex
before accessing h_rpcclnt. This synchronizes with
nlm_shutdown_hosts_net() and ensures the rpc_clnt pointer remains
valid during the shutdown operation.

This change also improves API layering: NFS client code no longer
needs to include the internal lockd header to access nlm_host fields.
The new helper resides in bind.h alongside other public lockd
interfaces.

Reported-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/host.c            | 29 +++++++++++++++++++++++++++++
 fs/nfs/sysfs.c             |  4 ++--
 include/linux/lockd/bind.h |  1 +
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 1a9582a10a86..015900d2d4c2 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -306,6 +306,35 @@ void nlmclnt_release_host(struct nlm_host *host)
 	}
 }
 
+/* Callback for rpc_cancel_tasks() - matches all tasks for cancellation */
+static bool nlmclnt_match_all(const struct rpc_task *task, const void *data)
+{
+	return true;
+}
+
+/**
+ * nlmclnt_shutdown_rpc_clnt - safely shut down NLM client RPC operations
+ * @host: nlm_host to shut down
+ *
+ * Cancels outstanding RPC tasks and marks the client as shut down.
+ * Synchronizes with nlmclnt_release_host() via nlm_host_mutex to prevent
+ * races between shutdown and host destruction. Safe to call if h_rpcclnt
+ * is NULL or already shut down.
+ */
+void nlmclnt_shutdown_rpc_clnt(struct nlm_host *host)
+{
+	struct rpc_clnt *clnt;
+
+	mutex_lock(&nlm_host_mutex);
+	clnt = host->h_rpcclnt;
+	if (clnt) {
+		clnt->cl_shutdown = 1;
+		rpc_cancel_tasks(clnt, -EIO, nlmclnt_match_all, NULL);
+	}
+	mutex_unlock(&nlm_host_mutex);
+}
+EXPORT_SYMBOL_GPL(nlmclnt_shutdown_rpc_clnt);
+
 /**
  * nlmsvc_lookup_host - Find an NLM host handle matching a remote client
  * @rqstp: incoming NLM request
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 7d8921f524a6..051da37770d8 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -12,7 +12,7 @@
 #include <linux/string.h>
 #include <linux/nfs_fs.h>
 #include <linux/rcupdate.h>
-#include <linux/lockd/lockd.h>
+#include <linux/lockd/bind.h>
 
 #include "internal.h"
 #include "nfs4_fs.h"
@@ -285,7 +285,7 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
 		shutdown_client(server->client_acl);
 
 	if (server->nlm_host)
-		shutdown_client(server->nlm_host->h_rpcclnt);
+		nlmclnt_shutdown_rpc_clnt(server->nlm_host);
 out:
 	shutdown_nfs_client(server->nfs_client);
 	return count;
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 82eca0a13ccc..39c124dcb19c 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -57,6 +57,7 @@ struct nlmclnt_initdata {
 extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init);
 extern void	nlmclnt_done(struct nlm_host *host);
 extern struct rpc_clnt *nlmclnt_rpc_clnt(struct nlm_host *host);
+extern void	nlmclnt_shutdown_rpc_clnt(struct nlm_host *host);
 
 /*
  * NLM client operations provide a means to modify RPC processing of NLM
-- 
cgit v1.2.3


From f4d5f8caadd858f11b21e8a9e5c85290fc21a568 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:28 -0500
Subject: lockd: Move xdr4.h from include/linux/lockd/ to fs/lockd/

The xdr4.h header declares NLMv4-specific XDR encoder/decoder
functions and error codes that are used exclusively within the
lockd subsystem. Moving it from include/linux/lockd/ to fs/lockd/
clarifies the intended scope of these declarations and prevents
external code from depending on lockd-internal interfaces.

This change reduces the public API surface of the lockd module
and makes it easier to refactor NLMv4 internals without risk of
breaking out-of-tree consumers. The header's contents are
implementation details of the NLMv4 wire protocol handling, not
a contract with other kernel subsystems.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/clnt4xdr.c         |  2 ++
 fs/lockd/svc4proc.c         |  2 ++
 fs/lockd/xdr4.c             |  1 +
 fs/lockd/xdr4.h             | 34 ++++++++++++++++++++++++++++++++++
 include/linux/lockd/bind.h  |  3 ---
 include/linux/lockd/lockd.h |  7 ++++---
 include/linux/lockd/xdr4.h  | 43 -------------------------------------------
 7 files changed, 43 insertions(+), 49 deletions(-)
 create mode 100644 fs/lockd/xdr4.h
 delete mode 100644 include/linux/lockd/xdr4.h

(limited to 'include/linux')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 527458db4525..23896073c7e5 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -17,6 +17,8 @@
 
 #include <uapi/linux/nfs3.h>
 
+#include "xdr4.h"
+
 #define NLMDBG_FACILITY		NLMDBG_XDR
 
 #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4ceb27cc72e4..51d072a83a49 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -14,6 +14,8 @@
 #include <linux/lockd/share.h>
 #include <linux/sunrpc/svc_xprt.h>
 
+#include "xdr4.h"
+
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
 /*
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index e343c820301f..5b1e15977697 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -19,6 +19,7 @@
 #include <linux/lockd/lockd.h>
 
 #include "svcxdr.h"
+#include "xdr4.h"
 
 static inline s64
 loff_t_to_s64(loff_t offset)
diff --git a/fs/lockd/xdr4.h b/fs/lockd/xdr4.h
new file mode 100644
index 000000000000..7be318c0512b
--- /dev/null
+++ b/fs/lockd/xdr4.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * XDR types for the NLM protocol
+ *
+ * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LOCKD_XDR4_H
+#define _LOCKD_XDR4_H
+
+/* error codes new to NLMv4 */
+#define	nlm4_deadlock		cpu_to_be32(NLM_DEADLCK)
+#define	nlm4_rofs		cpu_to_be32(NLM_ROFS)
+#define	nlm4_stale_fh		cpu_to_be32(NLM_STALE_FH)
+#define	nlm4_fbig		cpu_to_be32(NLM_FBIG)
+#define	nlm4_failed		cpu_to_be32(NLM_FAILED)
+
+void	nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len);
+bool	nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+bool	nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+#endif /* _LOCKD_XDR4_H */
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 39c124dcb19c..077da0696f12 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -13,9 +13,6 @@
 #include <linux/lockd/nlm.h>
 /* need xdr-encoded error codes too, so... */
 #include <linux/lockd/xdr.h>
-#ifdef CONFIG_LOCKD_V4
-#include <linux/lockd/xdr4.h>
-#endif
 
 /* Dummy declarations */
 struct svc_rqst;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 0d883f48ec21..46f244141645 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -22,9 +22,6 @@
 #include <linux/utsname.h>
 #include <linux/lockd/bind.h>
 #include <linux/lockd/xdr.h>
-#ifdef CONFIG_LOCKD_V4
-#include <linux/lockd/xdr4.h>
-#endif
 #include <linux/lockd/debug.h>
 #include <linux/sunrpc/svc.h>
 
@@ -235,6 +232,10 @@ int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *,
 				  struct nlm_rqst *);
 void		  nlmclnt_next_cookie(struct nlm_cookie *);
 
+#ifdef CONFIG_LOCKD_V4
+extern const struct rpc_version nlm_version4;
+#endif
+
 /*
  * Host cache
  */
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
deleted file mode 100644
index 72831e35dca3..000000000000
--- a/include/linux/lockd/xdr4.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/xdr4.h
- *
- * XDR types for the NLM protocol
- *
- * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LOCKD_XDR4_H
-#define LOCKD_XDR4_H
-
-#include <linux/fs.h>
-#include <linux/nfs.h>
-#include <linux/sunrpc/xdr.h>
-#include <linux/lockd/xdr.h>
-
-/* error codes new to NLMv4 */
-#define	nlm4_deadlock		cpu_to_be32(NLM_DEADLCK)
-#define	nlm4_rofs		cpu_to_be32(NLM_ROFS)
-#define	nlm4_stale_fh		cpu_to_be32(NLM_STALE_FH)
-#define	nlm4_fbig		cpu_to_be32(NLM_FBIG)
-#define	nlm4_failed		cpu_to_be32(NLM_FAILED)
-
-void	nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len);
-bool	nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-bool	nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-extern const struct rpc_version nlm_version4;
-
-#endif /* LOCKD_XDR4_H */
-- 
cgit v1.2.3


From 4db2f8a016dc9f9b357bfbf5c507c2582bb36730 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:29 -0500
Subject: lockd: Move share.h from include/linux/lockd/ to fs/lockd/

The share.h header defines struct nlm_share and declares the DOS
share management functions used by the NLM server to implement
NLM_SHARE and NLM_UNSHARE operations. These interfaces are used
exclusively within the lockd subsystem. A git grep search confirms
no external code references them.

Relocating this header from include/linux/lockd/ to fs/lockd/
narrows the public API surface of the lockd module. Out-of-tree
code cannot depend on these internal interfaces after this change.
Future refactoring of the share management implementation thus
requires no consideration of external consumers.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/share.h            | 30 ++++++++++++++++++++++++++++++
 fs/lockd/svc4proc.c         |  2 +-
 fs/lockd/svcproc.c          |  3 ++-
 fs/lockd/svcshare.c         |  3 ++-
 fs/lockd/svcsubs.c          |  3 ++-
 include/linux/lockd/lockd.h |  2 ++
 include/linux/lockd/share.h | 32 --------------------------------
 7 files changed, 39 insertions(+), 36 deletions(-)
 create mode 100644 fs/lockd/share.h
 delete mode 100644 include/linux/lockd/share.h

(limited to 'include/linux')

diff --git a/fs/lockd/share.h b/fs/lockd/share.h
new file mode 100644
index 000000000000..d8f4ebd9c278
--- /dev/null
+++ b/fs/lockd/share.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DOS share management for lockd.
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LOCKD_SHARE_H
+#define _LOCKD_SHARE_H
+
+/*
+ * DOS share for a specific file
+ */
+struct nlm_share {
+	struct nlm_share *	s_next;		/* linked list */
+	struct nlm_host *	s_host;		/* client host */
+	struct nlm_file *	s_file;		/* shared file */
+	struct xdr_netobj	s_owner;	/* owner handle */
+	u32			s_access;	/* access mode */
+	u32			s_mode;		/* deny mode */
+};
+
+__be32	nlmsvc_share_file(struct nlm_host *, struct nlm_file *,
+					       struct nlm_args *);
+__be32	nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *,
+					       struct nlm_args *);
+void	nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *,
+					       nlm_host_match_fn_t);
+
+#endif /* _LOCKD_SHARE_H */
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 51d072a83a49..da88b638d90d 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -11,9 +11,9 @@
 #include <linux/types.h>
 #include <linux/time.h>
 #include <linux/lockd/lockd.h>
-#include <linux/lockd/share.h>
 #include <linux/sunrpc/svc_xprt.h>
 
+#include "share.h"
 #include "xdr4.h"
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 272c8f36ed2a..8441fabd019f 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -11,9 +11,10 @@
 #include <linux/types.h>
 #include <linux/time.h>
 #include <linux/lockd/lockd.h>
-#include <linux/lockd/share.h>
 #include <linux/sunrpc/svc_xprt.h>
 
+#include "share.h"
+
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
 #ifdef CONFIG_LOCKD_V4
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 88c81ce1148d..8e06840834c6 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -15,7 +15,8 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
-#include <linux/lockd/share.h>
+
+#include "share.h"
 
 static inline int
 nlm_cmp_owner(struct nlm_share *share, struct xdr_netobj *oh)
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 967739d2aa90..ce596a17112c 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -16,11 +16,12 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/lockd/lockd.h>
-#include <linux/lockd/share.h>
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <uapi/linux/nfs2.h>
 
+#include "share.h"
+
 #define NLMDBG_FACILITY		NLMDBG_SVCSUBS
 
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 46f244141645..eebcecd12fae 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -155,6 +155,8 @@ struct nlm_rqst {
 	void *	a_callback_data; /* sent to nlmclnt_operations callbacks */
 };
 
+struct nlm_share;
+
 /*
  * This struct describes a file held open by lockd on behalf of
  * an NFS client.
diff --git a/include/linux/lockd/share.h b/include/linux/lockd/share.h
deleted file mode 100644
index 1f18a9faf645..000000000000
--- a/include/linux/lockd/share.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/share.h
- *
- * DOS share management for lockd.
- *
- * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_SHARE_H
-#define LINUX_LOCKD_SHARE_H
-
-/*
- * DOS share for a specific file
- */
-struct nlm_share {
-	struct nlm_share *	s_next;		/* linked list */
-	struct nlm_host *	s_host;		/* client host */
-	struct nlm_file *	s_file;		/* shared file */
-	struct xdr_netobj	s_owner;	/* owner handle */
-	u32			s_access;	/* access mode */
-	u32			s_mode;		/* deny mode */
-};
-
-__be32	nlmsvc_share_file(struct nlm_host *, struct nlm_file *,
-					       struct nlm_args *);
-__be32	nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *,
-					       struct nlm_args *);
-void	nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *,
-					       nlm_host_match_fn_t);
-
-#endif /* LINUX_LOCKD_SHARE_H */
-- 
cgit v1.2.3


From 2c562c6e6715619ce34bb37d8a0a5e40fdcc7a44 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:30 -0500
Subject: lockd: Relocate include/linux/lockd/lockd.h

Headers placed in include/linux/ form part of the kernel's
internal API and signal to subsystem maintainers that other
parts of the kernel may depend on them. By moving lockd.h
into fs/lockd/, lockd becomes a more self-contained module
whose internal interfaces are clearly distinguished from its
public contract with the rest of the kernel. This relocation
addresses a long-standing XXX comment in the header itself
that acknowledged the file's misplacement. Future changes to
lockd internals can now proceed with confidence that external
consumers are not inadvertently coupled to implementation
details.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/clnt4xdr.c         |   3 +-
 fs/lockd/clntlock.c         |   2 +-
 fs/lockd/clntproc.c         |   2 +-
 fs/lockd/clntxdr.c          |   3 +-
 fs/lockd/host.c             |   2 +-
 fs/lockd/lockd.h            | 395 +++++++++++++++++++++++++++++++++++++++++++
 fs/lockd/mon.c              |   2 +-
 fs/lockd/svc.c              |   2 +-
 fs/lockd/svc4proc.c         |   2 +-
 fs/lockd/svclock.c          |   3 +-
 fs/lockd/svcproc.c          |   2 +-
 fs/lockd/svcshare.c         |   2 +-
 fs/lockd/svcsubs.c          |   2 +-
 fs/lockd/trace.h            |   3 +-
 fs/lockd/xdr.c              |   3 +-
 fs/lockd/xdr4.c             |   2 +-
 include/linux/lockd/lockd.h | 401 --------------------------------------------
 17 files changed, 414 insertions(+), 417 deletions(-)
 create mode 100644 fs/lockd/lockd.h
 delete mode 100644 include/linux/lockd/lockd.h

(limited to 'include/linux')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 23896073c7e5..61ee5fa6dfa4 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -13,7 +13,8 @@
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/stats.h>
-#include <linux/lockd/lockd.h>
+
+#include "lockd.h"
 
 #include <uapi/linux/nfs3.h>
 
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 85bc0f3e91df..8fa30c42c92a 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -15,9 +15,9 @@
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svc_xprt.h>
-#include <linux/lockd/lockd.h>
 #include <linux/kthread.h>
 
+#include "lockd.h"
 #include "trace.h"
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index fb4d0752c9bb..7f211008a5d2 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -18,8 +18,8 @@
 #include <linux/freezer.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
-#include <linux/lockd/lockd.h>
 
+#include "lockd.h"
 #include "trace.h"
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 6ea3448d2d31..65555f5224b1 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -15,7 +15,8 @@
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/stats.h>
-#include <linux/lockd/lockd.h>
+
+#include "lockd.h"
 
 #include <uapi/linux/nfs2.h>
 
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 015900d2d4c2..ea8a8e166f7e 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -16,13 +16,13 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc.h>
-#include <linux/lockd/lockd.h>
 #include <linux/mutex.h>
 
 #include <linux/sunrpc/svc_xprt.h>
 
 #include <net/ipv6.h>
 
+#include "lockd.h"
 #include "netns.h"
 
 #define NLMDBG_FACILITY		NLMDBG_HOSTCACHE
diff --git a/fs/lockd/lockd.h b/fs/lockd/lockd.h
new file mode 100644
index 000000000000..9bcf89765a69
--- /dev/null
+++ b/fs/lockd/lockd.h
@@ -0,0 +1,395 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LOCKD_LOCKD_H
+#define _LOCKD_LOCKD_H
+
+#include <linux/exportfs.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
+#include <linux/fs.h>
+#include <linux/kref.h>
+#include <linux/refcount.h>
+#include <linux/utsname.h>
+#include <linux/lockd/bind.h>
+#include <linux/lockd/xdr.h>
+#include <linux/lockd/debug.h>
+#include <linux/sunrpc/svc.h>
+
+/*
+ * Version string
+ */
+#define LOCKD_VERSION		"0.5"
+
+/*
+ * Default timeout for RPC calls (seconds)
+ */
+#define LOCKD_DFLT_TIMEO	10
+
+/*
+ * Internal-use status codes, not to be placed on the wire.
+ * Version handlers translate these to appropriate wire values.
+ */
+#define nlm__int__drop_reply	cpu_to_be32(30000)
+#define nlm__int__deadlock	cpu_to_be32(30001)
+#define nlm__int__stale_fh	cpu_to_be32(30002)
+#define nlm__int__failed	cpu_to_be32(30003)
+
+/*
+ * Lockd host handle (used both by the client and server personality).
+ */
+struct nlm_host {
+	struct hlist_node	h_hash;		/* doubly linked list */
+	struct sockaddr_storage	h_addr;		/* peer address */
+	size_t			h_addrlen;
+	struct sockaddr_storage	h_srcaddr;	/* our address (optional) */
+	size_t			h_srcaddrlen;
+	struct rpc_clnt		*h_rpcclnt;	/* RPC client to talk to peer */
+	char			*h_name;		/* remote hostname */
+	u32			h_version;	/* interface version */
+	unsigned short		h_proto;	/* transport proto */
+	unsigned short		h_reclaiming : 1,
+				h_server     : 1, /* server side, not client side */
+				h_noresvport : 1,
+				h_inuse      : 1;
+	wait_queue_head_t	h_gracewait;	/* wait while reclaiming */
+	struct rw_semaphore	h_rwsem;	/* Reboot recovery lock */
+	u32			h_state;	/* pseudo-state counter */
+	u32			h_nsmstate;	/* true remote NSM state */
+	u32			h_pidcount;	/* Pseudopids */
+	refcount_t		h_count;	/* reference count */
+	struct mutex		h_mutex;	/* mutex for pmap binding */
+	unsigned long		h_nextrebind;	/* next portmap call */
+	unsigned long		h_expires;	/* eligible for GC */
+	struct list_head	h_lockowners;	/* Lockowners for the client */
+	spinlock_t		h_lock;
+	struct list_head	h_granted;	/* Locks in GRANTED state */
+	struct list_head	h_reclaim;	/* Locks in RECLAIM state */
+	struct nsm_handle	*h_nsmhandle;	/* NSM status handle */
+	char			*h_addrbuf;	/* address eyecatcher */
+	struct net		*net;		/* host net */
+	const struct cred	*h_cred;
+	char			nodename[UNX_MAXNODENAME + 1];
+	const struct nlmclnt_operations	*h_nlmclnt_ops;	/* Callback ops for NLM users */
+};
+
+/*
+ * The largest string sm_addrbuf should hold is a full-size IPv6 address
+ * (no "::" anywhere) with a scope ID.  The buffer size is computed to
+ * hold eight groups of colon-separated four-hex-digit numbers, a
+ * percent sign, a scope id (at most 32 bits, in decimal), and NUL.
+ */
+#define NSM_ADDRBUF		((8 * 4 + 7) + (1 + 10) + 1)
+
+struct nsm_handle {
+	struct list_head	sm_link;
+	refcount_t		sm_count;
+	char			*sm_mon_name;
+	char			*sm_name;
+	struct sockaddr_storage	sm_addr;
+	size_t			sm_addrlen;
+	unsigned int		sm_monitored : 1,
+				sm_sticky : 1;	/* don't unmonitor */
+	struct nsm_private	sm_priv;
+	char			sm_addrbuf[NSM_ADDRBUF];
+};
+
+/*
+ * Rigorous type checking on sockaddr type conversions
+ */
+static inline struct sockaddr *nlm_addr(const struct nlm_host *host)
+{
+	return (struct sockaddr *)&host->h_addr;
+}
+
+static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host)
+{
+	return (struct sockaddr *)&host->h_srcaddr;
+}
+
+/*
+ * Map an fl_owner_t into a unique 32-bit "pid"
+ */
+struct nlm_lockowner {
+	struct list_head list;
+	refcount_t count;
+
+	struct nlm_host *host;
+	fl_owner_t owner;
+	uint32_t pid;
+};
+
+/*
+ * This is the representation of a blocked client lock.
+ */
+struct nlm_wait {
+	struct list_head	b_list;		/* linked list */
+	wait_queue_head_t	b_wait;		/* where to wait on */
+	struct nlm_host		*b_host;
+	struct file_lock	*b_lock;	/* local file lock */
+	__be32			b_status;	/* grant callback status */
+};
+
+/*
+ * Memory chunk for NLM client RPC request.
+ */
+#define NLMCLNT_OHSIZE		((__NEW_UTS_LEN) + 10u)
+struct nlm_rqst {
+	refcount_t		a_count;
+	unsigned int		a_flags;	/* initial RPC task flags */
+	struct nlm_host *	a_host;		/* host handle */
+	struct nlm_args		a_args;		/* arguments */
+	struct nlm_res		a_res;		/* result */
+	struct nlm_block *	a_block;
+	unsigned int		a_retries;	/* Retry count */
+	u8			a_owner[NLMCLNT_OHSIZE];
+	void *	a_callback_data; /* sent to nlmclnt_operations callbacks */
+};
+
+struct nlm_share;
+
+/*
+ * This struct describes a file held open by lockd on behalf of
+ * an NFS client.
+ */
+struct nlm_file {
+	struct hlist_node	f_list;		/* linked list */
+	struct nfs_fh		f_handle;	/* NFS file handle */
+	struct file *		f_file[2];	/* VFS file pointers,
+						   indexed by O_ flags */
+	struct nlm_share *	f_shares;	/* DOS shares */
+	struct list_head	f_blocks;	/* blocked locks */
+	unsigned int		f_locks;	/* guesstimate # of locks */
+	unsigned int		f_count;	/* reference count */
+	struct mutex		f_mutex;	/* avoid concurrent access */
+};
+
+/*
+ * This is a server block (i.e. a lock requested by some client which
+ * couldn't be granted because of a conflicting lock).
+ */
+#define NLM_NEVER		(~(unsigned long) 0)
+/* timeout on non-blocking call: */
+#define NLM_TIMEOUT		(7 * HZ)
+
+struct nlm_block {
+	struct kref		b_count;	/* Reference count */
+	struct list_head	b_list;		/* linked list of all blocks */
+	struct list_head	b_flist;	/* linked list (per file) */
+	struct nlm_rqst	*	b_call;		/* RPC args & callback info */
+	struct svc_serv *	b_daemon;	/* NLM service */
+	struct nlm_host *	b_host;		/* host handle for RPC clnt */
+	unsigned long		b_when;		/* next re-xmit */
+	unsigned int		b_id;		/* block id */
+	unsigned char		b_granted;	/* VFS granted lock */
+	struct nlm_file *	b_file;		/* file in question */
+	struct cache_req *	b_cache_req;	/* deferred request handling */
+	struct cache_deferred_req * b_deferred_req;
+	unsigned int		b_flags;	/* block flags */
+#define B_QUEUED		1	/* lock queued */
+#define B_GOT_CALLBACK		2	/* got lock or conflicting lock */
+#define B_TIMED_OUT		4	/* filesystem too slow to respond */
+};
+
+/*
+ * Global variables
+ */
+extern const struct rpc_program	nlm_program;
+extern const struct svc_procedure nlmsvc_procedures[24];
+#ifdef CONFIG_LOCKD_V4
+extern const struct svc_procedure nlmsvc_procedures4[24];
+#endif
+extern int			nlmsvc_grace_period;
+extern unsigned long		nlm_timeout;
+extern bool			nsm_use_hostnames;
+extern u32			nsm_local_state;
+
+extern struct timer_list	nlmsvc_retry;
+
+/*
+ * Lockd client functions
+ */
+struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
+int		  nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+int		  nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+void		  nlmclnt_release_call(struct nlm_rqst *);
+void		  nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host,
+					struct file_lock *fl);
+void		  nlmclnt_queue_block(struct nlm_wait *block);
+__be32		  nlmclnt_dequeue_block(struct nlm_wait *block);
+int		  nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
+__be32		  nlmclnt_grant(const struct sockaddr *addr,
+				const struct nlm_lock *lock);
+void		  nlmclnt_recovery(struct nlm_host *);
+int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *,
+				  struct nlm_rqst *);
+void		  nlmclnt_next_cookie(struct nlm_cookie *);
+
+#ifdef CONFIG_LOCKD_V4
+extern const struct rpc_version nlm_version4;
+#endif
+
+/*
+ * Host cache
+ */
+struct nlm_host  *nlmclnt_lookup_host(const struct sockaddr *sap,
+					const size_t salen,
+					const unsigned short protocol,
+					const u32 version,
+					const char *hostname,
+					int noresvport,
+					struct net *net,
+					const struct cred *cred);
+void		  nlmclnt_release_host(struct nlm_host *);
+struct nlm_host  *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
+					const char *hostname,
+					const size_t hostname_len);
+void		  nlmsvc_release_host(struct nlm_host *);
+struct rpc_clnt * nlm_bind_host(struct nlm_host *);
+void		  nlm_rebind_host(struct nlm_host *);
+struct nlm_host * nlm_get_host(struct nlm_host *);
+void		  nlm_shutdown_hosts(void);
+void		  nlm_shutdown_hosts_net(struct net *net);
+void		  nlm_host_rebooted(const struct net *net,
+					const struct nlm_reboot *);
+
+/*
+ * Host monitoring
+ */
+int		  nsm_monitor(const struct nlm_host *host);
+void		  nsm_unmonitor(const struct nlm_host *host);
+
+struct nsm_handle *nsm_get_handle(const struct net *net,
+					const struct sockaddr *sap,
+					const size_t salen,
+					const char *hostname,
+					const size_t hostname_len);
+struct nsm_handle *nsm_reboot_lookup(const struct net *net,
+					const struct nlm_reboot *info);
+void		  nsm_release(struct nsm_handle *nsm);
+
+/*
+ * This is used in garbage collection and resource reclaim
+ * A return value != 0 means destroy the lock/block/share
+ */
+typedef int	  (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
+
+/*
+ * Server-side lock handling
+ */
+int		  lock_to_openmode(struct file_lock *);
+__be32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
+			      struct nlm_host *, struct nlm_lock *, int,
+			      struct nlm_cookie *, int);
+__be32		  nlmsvc_unlock(struct net *net, struct nlm_file *, struct nlm_lock *);
+__be32		  nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
+			struct nlm_host *host, struct nlm_lock *lock,
+			struct nlm_lock *conflock);
+__be32		  nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
+void		  nlmsvc_retry_blocked(struct svc_rqst *rqstp);
+void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
+					nlm_host_match_fn_t match);
+void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
+void		  nlmsvc_release_call(struct nlm_rqst *);
+void		  nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t);
+
+/*
+ * File handling for the server personality
+ */
+__be32		  nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
+					struct nlm_lock *);
+void		  nlm_release_file(struct nlm_file *);
+void		  nlmsvc_put_lockowner(struct nlm_lockowner *);
+void		  nlmsvc_release_lockowner(struct nlm_lock *);
+void		  nlmsvc_mark_resources(struct net *);
+void		  nlmsvc_free_host_resources(struct nlm_host *);
+void		  nlmsvc_invalidate_all(void);
+
+static inline struct file *nlmsvc_file_file(const struct nlm_file *file)
+{
+	return file->f_file[O_RDONLY] ?
+	       file->f_file[O_RDONLY] : file->f_file[O_WRONLY];
+}
+
+static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
+{
+	return file_inode(nlmsvc_file_file(file));
+}
+
+static inline bool
+nlmsvc_file_cannot_lock(const struct nlm_file *file)
+{
+	return exportfs_cannot_lock(nlmsvc_file_file(file)->f_path.dentry->d_sb->s_export_op);
+}
+
+static inline int __nlm_privileged_request4(const struct sockaddr *sap)
+{
+	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+
+	if (ntohs(sin->sin_port) > 1023)
+		return 0;
+
+	return ipv4_is_loopback(sin->sin_addr.s_addr);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int __nlm_privileged_request6(const struct sockaddr *sap)
+{
+	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+
+	if (ntohs(sin6->sin6_port) > 1023)
+		return 0;
+
+	if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED)
+		return ipv4_is_loopback(sin6->sin6_addr.s6_addr32[3]);
+
+	return ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK;
+}
+#else	/* IS_ENABLED(CONFIG_IPV6) */
+static inline int __nlm_privileged_request6(const struct sockaddr *sap)
+{
+	return 0;
+}
+#endif	/* IS_ENABLED(CONFIG_IPV6) */
+
+/*
+ * Ensure incoming requests are from local privileged callers.
+ *
+ * Return TRUE if sender is local and is connecting via a privileged port;
+ * otherwise return FALSE.
+ */
+static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
+{
+	const struct sockaddr *sap = svc_addr(rqstp);
+
+	switch (sap->sa_family) {
+	case AF_INET:
+		return __nlm_privileged_request4(sap);
+	case AF_INET6:
+		return __nlm_privileged_request6(sap);
+	default:
+		return 0;
+	}
+}
+
+/*
+ * Compare two NLM locks.
+ * When the second lock is of type F_UNLCK, this acts like a wildcard.
+ */
+static inline int nlm_compare_locks(const struct file_lock *fl1,
+				    const struct file_lock *fl2)
+{
+	return file_inode(fl1->c.flc_file) == file_inode(fl2->c.flc_file)
+	     && fl1->c.flc_pid   == fl2->c.flc_pid
+	     && fl1->c.flc_owner == fl2->c.flc_owner
+	     && fl1->fl_start == fl2->fl_start
+	     && fl1->fl_end   == fl2->fl_end
+	     &&(fl1->c.flc_type  == fl2->c.flc_type || fl2->c.flc_type == F_UNLCK);
+}
+
+extern const struct lock_manager_operations nlmsvc_lock_operations;
+
+#endif /* _LOCKD_LOCKD_H */
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index b8fc732e1c67..3d3ee88ca4dc 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -16,10 +16,10 @@
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/xprtsock.h>
 #include <linux/sunrpc/svc.h>
-#include <linux/lockd/lockd.h>
 
 #include <linux/unaligned.h>
 
+#include "lockd.h"
 #include "netns.h"
 
 #define NLMDBG_FACILITY		NLMDBG_MONITOR
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index dcd80c4e74c9..9dd7f8e11544 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -36,9 +36,9 @@
 #include <net/ip.h>
 #include <net/addrconf.h>
 #include <net/ipv6.h>
-#include <linux/lockd/lockd.h>
 #include <linux/nfs.h>
 
+#include "lockd.h"
 #include "netns.h"
 #include "procfs.h"
 #include "netlink.h"
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index da88b638d90d..86dfeb6ce68d 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -10,9 +10,9 @@
 
 #include <linux/types.h>
 #include <linux/time.h>
-#include <linux/lockd/lockd.h>
 #include <linux/sunrpc/svc_xprt.h>
 
+#include "lockd.h"
 #include "share.h"
 #include "xdr4.h"
 
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 5edf00751a1e..1c800fffe69c 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -29,7 +29,8 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc_xprt.h>
 #include <linux/lockd/nlm.h>
-#include <linux/lockd/lockd.h>
+
+#include "lockd.h"
 
 #define NLMDBG_FACILITY		NLMDBG_SVCLOCK
 
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 8441fabd019f..e9a6bcc3bf2e 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -10,9 +10,9 @@
 
 #include <linux/types.h>
 #include <linux/time.h>
-#include <linux/lockd/lockd.h>
 #include <linux/sunrpc/svc_xprt.h>
 
+#include "lockd.h"
 #include "share.h"
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 8e06840834c6..8675ac80ab16 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -14,8 +14,8 @@
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
-#include <linux/lockd/lockd.h>
 
+#include "lockd.h"
 #include "share.h"
 
 static inline int
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index ce596a17112c..71eaec5ed8d7 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -15,11 +15,11 @@
 #include <linux/mutex.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/addr.h>
-#include <linux/lockd/lockd.h>
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <uapi/linux/nfs2.h>
 
+#include "lockd.h"
 #include "share.h"
 
 #define NLMDBG_FACILITY		NLMDBG_SVCSUBS
diff --git a/fs/lockd/trace.h b/fs/lockd/trace.h
index 7461b13b6e74..7214d7e96a42 100644
--- a/fs/lockd/trace.h
+++ b/fs/lockd/trace.h
@@ -8,7 +8,8 @@
 #include <linux/tracepoint.h>
 #include <linux/crc32.h>
 #include <linux/nfs.h>
-#include <linux/lockd/lockd.h>
+
+#include "lockd.h"
 
 #ifdef CONFIG_LOCKD_V4
 #define NLM_STATUS_LIST					\
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index adfcce2bf11b..5aac49d1875a 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -15,13 +15,12 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/stats.h>
-#include <linux/lockd/lockd.h>
 
 #include <uapi/linux/nfs2.h>
 
+#include "lockd.h"
 #include "svcxdr.h"
 
-
 static inline loff_t
 s32_to_loff_t(__s32 offset)
 {
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 5b1e15977697..f57d4881d5f1 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -16,8 +16,8 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/stats.h>
-#include <linux/lockd/lockd.h>
 
+#include "lockd.h"
 #include "svcxdr.h"
 #include "xdr4.h"
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
deleted file mode 100644
index eebcecd12fae..000000000000
--- a/include/linux/lockd/lockd.h
+++ /dev/null
@@ -1,401 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/lockd.h
- *
- * General-purpose lockd include file.
- *
- * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_LOCKD_H
-#define LINUX_LOCKD_LOCKD_H
-
-/* XXX: a lot of this should really be under fs/lockd. */
-
-#include <linux/exportfs.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <net/ipv6.h>
-#include <linux/fs.h>
-#include <linux/kref.h>
-#include <linux/refcount.h>
-#include <linux/utsname.h>
-#include <linux/lockd/bind.h>
-#include <linux/lockd/xdr.h>
-#include <linux/lockd/debug.h>
-#include <linux/sunrpc/svc.h>
-
-/*
- * Version string
- */
-#define LOCKD_VERSION		"0.5"
-
-/*
- * Default timeout for RPC calls (seconds)
- */
-#define LOCKD_DFLT_TIMEO	10
-
-/*
- * Internal-use status codes, not to be placed on the wire.
- * Version handlers translate these to appropriate wire values.
- */
-#define nlm__int__drop_reply	cpu_to_be32(30000)
-#define nlm__int__deadlock	cpu_to_be32(30001)
-#define nlm__int__stale_fh	cpu_to_be32(30002)
-#define nlm__int__failed	cpu_to_be32(30003)
-
-/*
- * Lockd host handle (used both by the client and server personality).
- */
-struct nlm_host {
-	struct hlist_node	h_hash;		/* doubly linked list */
-	struct sockaddr_storage	h_addr;		/* peer address */
-	size_t			h_addrlen;
-	struct sockaddr_storage	h_srcaddr;	/* our address (optional) */
-	size_t			h_srcaddrlen;
-	struct rpc_clnt		*h_rpcclnt;	/* RPC client to talk to peer */
-	char			*h_name;		/* remote hostname */
-	u32			h_version;	/* interface version */
-	unsigned short		h_proto;	/* transport proto */
-	unsigned short		h_reclaiming : 1,
-				h_server     : 1, /* server side, not client side */
-				h_noresvport : 1,
-				h_inuse      : 1;
-	wait_queue_head_t	h_gracewait;	/* wait while reclaiming */
-	struct rw_semaphore	h_rwsem;	/* Reboot recovery lock */
-	u32			h_state;	/* pseudo-state counter */
-	u32			h_nsmstate;	/* true remote NSM state */
-	u32			h_pidcount;	/* Pseudopids */
-	refcount_t		h_count;	/* reference count */
-	struct mutex		h_mutex;	/* mutex for pmap binding */
-	unsigned long		h_nextrebind;	/* next portmap call */
-	unsigned long		h_expires;	/* eligible for GC */
-	struct list_head	h_lockowners;	/* Lockowners for the client */
-	spinlock_t		h_lock;
-	struct list_head	h_granted;	/* Locks in GRANTED state */
-	struct list_head	h_reclaim;	/* Locks in RECLAIM state */
-	struct nsm_handle	*h_nsmhandle;	/* NSM status handle */
-	char			*h_addrbuf;	/* address eyecatcher */
-	struct net		*net;		/* host net */
-	const struct cred	*h_cred;
-	char			nodename[UNX_MAXNODENAME + 1];
-	const struct nlmclnt_operations	*h_nlmclnt_ops;	/* Callback ops for NLM users */
-};
-
-/*
- * The largest string sm_addrbuf should hold is a full-size IPv6 address
- * (no "::" anywhere) with a scope ID.  The buffer size is computed to
- * hold eight groups of colon-separated four-hex-digit numbers, a
- * percent sign, a scope id (at most 32 bits, in decimal), and NUL.
- */
-#define NSM_ADDRBUF		((8 * 4 + 7) + (1 + 10) + 1)
-
-struct nsm_handle {
-	struct list_head	sm_link;
-	refcount_t		sm_count;
-	char			*sm_mon_name;
-	char			*sm_name;
-	struct sockaddr_storage	sm_addr;
-	size_t			sm_addrlen;
-	unsigned int		sm_monitored : 1,
-				sm_sticky : 1;	/* don't unmonitor */
-	struct nsm_private	sm_priv;
-	char			sm_addrbuf[NSM_ADDRBUF];
-};
-
-/*
- * Rigorous type checking on sockaddr type conversions
- */
-static inline struct sockaddr *nlm_addr(const struct nlm_host *host)
-{
-	return (struct sockaddr *)&host->h_addr;
-}
-
-static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host)
-{
-	return (struct sockaddr *)&host->h_srcaddr;
-}
-
-/*
- * Map an fl_owner_t into a unique 32-bit "pid"
- */
-struct nlm_lockowner {
-	struct list_head list;
-	refcount_t count;
-
-	struct nlm_host *host;
-	fl_owner_t owner;
-	uint32_t pid;
-};
-
-/*
- * This is the representation of a blocked client lock.
- */
-struct nlm_wait {
-	struct list_head	b_list;		/* linked list */
-	wait_queue_head_t	b_wait;		/* where to wait on */
-	struct nlm_host		*b_host;
-	struct file_lock	*b_lock;	/* local file lock */
-	__be32			b_status;	/* grant callback status */
-};
-
-/*
- * Memory chunk for NLM client RPC request.
- */
-#define NLMCLNT_OHSIZE		((__NEW_UTS_LEN) + 10u)
-struct nlm_rqst {
-	refcount_t		a_count;
-	unsigned int		a_flags;	/* initial RPC task flags */
-	struct nlm_host *	a_host;		/* host handle */
-	struct nlm_args		a_args;		/* arguments */
-	struct nlm_res		a_res;		/* result */
-	struct nlm_block *	a_block;
-	unsigned int		a_retries;	/* Retry count */
-	u8			a_owner[NLMCLNT_OHSIZE];
-	void *	a_callback_data; /* sent to nlmclnt_operations callbacks */
-};
-
-struct nlm_share;
-
-/*
- * This struct describes a file held open by lockd on behalf of
- * an NFS client.
- */
-struct nlm_file {
-	struct hlist_node	f_list;		/* linked list */
-	struct nfs_fh		f_handle;	/* NFS file handle */
-	struct file *		f_file[2];	/* VFS file pointers,
-						   indexed by O_ flags */
-	struct nlm_share *	f_shares;	/* DOS shares */
-	struct list_head	f_blocks;	/* blocked locks */
-	unsigned int		f_locks;	/* guesstimate # of locks */
-	unsigned int		f_count;	/* reference count */
-	struct mutex		f_mutex;	/* avoid concurrent access */
-};
-
-/*
- * This is a server block (i.e. a lock requested by some client which
- * couldn't be granted because of a conflicting lock).
- */
-#define NLM_NEVER		(~(unsigned long) 0)
-/* timeout on non-blocking call: */
-#define NLM_TIMEOUT		(7 * HZ)
-
-struct nlm_block {
-	struct kref		b_count;	/* Reference count */
-	struct list_head	b_list;		/* linked list of all blocks */
-	struct list_head	b_flist;	/* linked list (per file) */
-	struct nlm_rqst	*	b_call;		/* RPC args & callback info */
-	struct svc_serv *	b_daemon;	/* NLM service */
-	struct nlm_host *	b_host;		/* host handle for RPC clnt */
-	unsigned long		b_when;		/* next re-xmit */
-	unsigned int		b_id;		/* block id */
-	unsigned char		b_granted;	/* VFS granted lock */
-	struct nlm_file *	b_file;		/* file in question */
-	struct cache_req *	b_cache_req;	/* deferred request handling */
-	struct cache_deferred_req * b_deferred_req;
-	unsigned int		b_flags;	/* block flags */
-#define B_QUEUED		1	/* lock queued */
-#define B_GOT_CALLBACK		2	/* got lock or conflicting lock */
-#define B_TIMED_OUT		4	/* filesystem too slow to respond */
-};
-
-/*
- * Global variables
- */
-extern const struct rpc_program	nlm_program;
-extern const struct svc_procedure nlmsvc_procedures[24];
-#ifdef CONFIG_LOCKD_V4
-extern const struct svc_procedure nlmsvc_procedures4[24];
-#endif
-extern int			nlmsvc_grace_period;
-extern unsigned long		nlm_timeout;
-extern bool			nsm_use_hostnames;
-extern u32			nsm_local_state;
-
-extern struct timer_list	nlmsvc_retry;
-
-/*
- * Lockd client functions
- */
-struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
-int		  nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
-int		  nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
-void		  nlmclnt_release_call(struct nlm_rqst *);
-void		  nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host,
-					struct file_lock *fl);
-void		  nlmclnt_queue_block(struct nlm_wait *block);
-__be32		  nlmclnt_dequeue_block(struct nlm_wait *block);
-int		  nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
-__be32		  nlmclnt_grant(const struct sockaddr *addr,
-				const struct nlm_lock *lock);
-void		  nlmclnt_recovery(struct nlm_host *);
-int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *,
-				  struct nlm_rqst *);
-void		  nlmclnt_next_cookie(struct nlm_cookie *);
-
-#ifdef CONFIG_LOCKD_V4
-extern const struct rpc_version nlm_version4;
-#endif
-
-/*
- * Host cache
- */
-struct nlm_host  *nlmclnt_lookup_host(const struct sockaddr *sap,
-					const size_t salen,
-					const unsigned short protocol,
-					const u32 version,
-					const char *hostname,
-					int noresvport,
-					struct net *net,
-					const struct cred *cred);
-void		  nlmclnt_release_host(struct nlm_host *);
-struct nlm_host  *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
-					const char *hostname,
-					const size_t hostname_len);
-void		  nlmsvc_release_host(struct nlm_host *);
-struct rpc_clnt * nlm_bind_host(struct nlm_host *);
-void		  nlm_rebind_host(struct nlm_host *);
-struct nlm_host * nlm_get_host(struct nlm_host *);
-void		  nlm_shutdown_hosts(void);
-void		  nlm_shutdown_hosts_net(struct net *net);
-void		  nlm_host_rebooted(const struct net *net,
-					const struct nlm_reboot *);
-
-/*
- * Host monitoring
- */
-int		  nsm_monitor(const struct nlm_host *host);
-void		  nsm_unmonitor(const struct nlm_host *host);
-
-struct nsm_handle *nsm_get_handle(const struct net *net,
-					const struct sockaddr *sap,
-					const size_t salen,
-					const char *hostname,
-					const size_t hostname_len);
-struct nsm_handle *nsm_reboot_lookup(const struct net *net,
-					const struct nlm_reboot *info);
-void		  nsm_release(struct nsm_handle *nsm);
-
-/*
- * This is used in garbage collection and resource reclaim
- * A return value != 0 means destroy the lock/block/share
- */
-typedef int	  (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
-
-/*
- * Server-side lock handling
- */
-int		  lock_to_openmode(struct file_lock *);
-__be32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
-			      struct nlm_host *, struct nlm_lock *, int,
-			      struct nlm_cookie *, int);
-__be32		  nlmsvc_unlock(struct net *net, struct nlm_file *, struct nlm_lock *);
-__be32		  nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
-			struct nlm_host *host, struct nlm_lock *lock,
-			struct nlm_lock *conflock);
-__be32		  nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
-void		  nlmsvc_retry_blocked(struct svc_rqst *rqstp);
-void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
-					nlm_host_match_fn_t match);
-void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
-void		  nlmsvc_release_call(struct nlm_rqst *);
-void		  nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t);
-
-/*
- * File handling for the server personality
- */
-__be32		  nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
-					struct nlm_lock *);
-void		  nlm_release_file(struct nlm_file *);
-void		  nlmsvc_put_lockowner(struct nlm_lockowner *);
-void		  nlmsvc_release_lockowner(struct nlm_lock *);
-void		  nlmsvc_mark_resources(struct net *);
-void		  nlmsvc_free_host_resources(struct nlm_host *);
-void		  nlmsvc_invalidate_all(void);
-
-static inline struct file *nlmsvc_file_file(const struct nlm_file *file)
-{
-	return file->f_file[O_RDONLY] ?
-	       file->f_file[O_RDONLY] : file->f_file[O_WRONLY];
-}
-
-static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
-{
-	return file_inode(nlmsvc_file_file(file));
-}
-
-static inline bool
-nlmsvc_file_cannot_lock(const struct nlm_file *file)
-{
-	return exportfs_cannot_lock(nlmsvc_file_file(file)->f_path.dentry->d_sb->s_export_op);
-}
-
-static inline int __nlm_privileged_request4(const struct sockaddr *sap)
-{
-	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
-
-	if (ntohs(sin->sin_port) > 1023)
-		return 0;
-
-	return ipv4_is_loopback(sin->sin_addr.s_addr);
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static inline int __nlm_privileged_request6(const struct sockaddr *sap)
-{
-	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
-
-	if (ntohs(sin6->sin6_port) > 1023)
-		return 0;
-
-	if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED)
-		return ipv4_is_loopback(sin6->sin6_addr.s6_addr32[3]);
-
-	return ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK;
-}
-#else	/* IS_ENABLED(CONFIG_IPV6) */
-static inline int __nlm_privileged_request6(const struct sockaddr *sap)
-{
-	return 0;
-}
-#endif	/* IS_ENABLED(CONFIG_IPV6) */
-
-/*
- * Ensure incoming requests are from local privileged callers.
- *
- * Return TRUE if sender is local and is connecting via a privileged port;
- * otherwise return FALSE.
- */
-static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
-{
-	const struct sockaddr *sap = svc_addr(rqstp);
-
-	switch (sap->sa_family) {
-	case AF_INET:
-		return __nlm_privileged_request4(sap);
-	case AF_INET6:
-		return __nlm_privileged_request6(sap);
-	default:
-		return 0;
-	}
-}
-
-/*
- * Compare two NLM locks.
- * When the second lock is of type F_UNLCK, this acts like a wildcard.
- */
-static inline int nlm_compare_locks(const struct file_lock *fl1,
-				    const struct file_lock *fl2)
-{
-	return file_inode(fl1->c.flc_file) == file_inode(fl2->c.flc_file)
-	     && fl1->c.flc_pid   == fl2->c.flc_pid
-	     && fl1->c.flc_owner == fl2->c.flc_owner
-	     && fl1->fl_start == fl2->fl_start
-	     && fl1->fl_end   == fl2->fl_end
-	     &&(fl1->c.flc_type  == fl2->c.flc_type || fl2->c.flc_type == F_UNLCK);
-}
-
-extern const struct lock_manager_operations nlmsvc_lock_operations;
-
-#endif /* LINUX_LOCKD_LOCKD_H */
-- 
cgit v1.2.3


From 236f3171ac690f632e13d391f47c68c3a8519bd2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:31 -0500
Subject: lockd: Remove lockd/debug.h

The lockd include structure has unnecessary indirection. The header
include/linux/lockd/debug.h is consumed only by fs/lockd/lockd.h,
creating an extra compilation dependency and making the code harder
to navigate.

Fold the debug.h definitions directly into lockd.h and remove the
now-redundant header. This reduces the include tree depth and makes
the debug-related definitions easier to find when working on lockd
internals.

Build-tested with lockd built as module and built-in.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/lockd.h            | 24 +++++++++++++++++++++++-
 include/linux/lockd/debug.h | 40 ----------------------------------------
 2 files changed, 23 insertions(+), 41 deletions(-)
 delete mode 100644 include/linux/lockd/debug.h

(limited to 'include/linux')

diff --git a/fs/lockd/lockd.h b/fs/lockd/lockd.h
index 9bcf89765a69..460ccb701749 100644
--- a/fs/lockd/lockd.h
+++ b/fs/lockd/lockd.h
@@ -16,9 +16,31 @@
 #include <linux/utsname.h>
 #include <linux/lockd/bind.h>
 #include <linux/lockd/xdr.h>
-#include <linux/lockd/debug.h>
+#include <linux/sunrpc/debug.h>
 #include <linux/sunrpc/svc.h>
 
+/*
+ * Enable lockd debugging.
+ * Requires CONFIG_SUNRPC_DEBUG.
+ */
+#undef ifdebug
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define ifdebug(flag)		if (unlikely(nlm_debug & NLMDBG_##flag))
+#else
+# define ifdebug(flag)		if (0)
+#endif
+
+#define NLMDBG_SVC		0x0001
+#define NLMDBG_CLIENT		0x0002
+#define NLMDBG_CLNTLOCK		0x0004
+#define NLMDBG_SVCLOCK		0x0008
+#define NLMDBG_MONITOR		0x0010
+#define NLMDBG_CLNTSUBS		0x0020
+#define NLMDBG_SVCSUBS		0x0040
+#define NLMDBG_HOSTCACHE	0x0080
+#define NLMDBG_XDR		0x0100
+#define NLMDBG_ALL		0x7fff
+
 /*
  * Version string
  */
diff --git a/include/linux/lockd/debug.h b/include/linux/lockd/debug.h
deleted file mode 100644
index eede2ab5246f..000000000000
--- a/include/linux/lockd/debug.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/debug.h
- *
- * Debugging stuff.
- *
- * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_DEBUG_H
-#define LINUX_LOCKD_DEBUG_H
-
-#include <linux/sunrpc/debug.h>
-
-/*
- * Enable lockd debugging.
- * Requires RPC_DEBUG.
- */
-#undef ifdebug
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define ifdebug(flag)		if (unlikely(nlm_debug & NLMDBG_##flag))
-#else
-# define ifdebug(flag)		if (0)
-#endif
-
-/*
- * Debug flags
- */
-#define NLMDBG_SVC		0x0001
-#define NLMDBG_CLIENT		0x0002
-#define NLMDBG_CLNTLOCK		0x0004
-#define NLMDBG_SVCLOCK		0x0008
-#define NLMDBG_MONITOR		0x0010
-#define NLMDBG_CLNTSUBS		0x0020
-#define NLMDBG_SVCSUBS		0x0040
-#define NLMDBG_HOSTCACHE	0x0080
-#define NLMDBG_XDR		0x0100
-#define NLMDBG_ALL		0x7fff
-
-#endif /* LINUX_LOCKD_DEBUG_H */
-- 
cgit v1.2.3


From 615384a24b1e6b0f091ebc1dfbf7ec8b4c27fa81 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:32 -0500
Subject: lockd: Move xdr.h from include/linux/lockd/ to fs/lockd/

The lockd subsystem unnecessarily exposes internal NLM XDR type
definitions through the global include path. These definitions
are not used by any code outside fs/lockd/, making them
inappropriate for include/linux/lockd/.

Moving xdr.h to fs/lockd/ narrows the API surface and clarifies
that these types are internal implementation details. The
comment in linux/lockd/bind.h stating xdr.h was needed for
"xdr-encoded error codes" is stale: no lockd API consumers use
those codes.

Forward declarations for struct nfs_fh and struct file_lock are
added to bind.h because their definitions were previously pulled
in transitively through xdr.h. Additionally, nfs3proc.c and
proc.c need explicit includes of filelock.h for FL_CLOSE and
for accessing struct file_lock members, respectively.

Built and tested with lockd client/server operations. No
functional change.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/lockd.h           |   2 +-
 fs/lockd/xdr.h             | 111 ++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs3proc.c          |   1 +
 fs/nfs/proc.c              |   1 +
 include/linux/lockd/bind.h |   5 +-
 include/linux/lockd/xdr.h  | 113 ---------------------------------------------
 6 files changed, 116 insertions(+), 117 deletions(-)
 create mode 100644 fs/lockd/xdr.h
 delete mode 100644 include/linux/lockd/xdr.h

(limited to 'include/linux')

diff --git a/fs/lockd/lockd.h b/fs/lockd/lockd.h
index 460ccb701749..6f83b9a7257f 100644
--- a/fs/lockd/lockd.h
+++ b/fs/lockd/lockd.h
@@ -15,7 +15,7 @@
 #include <linux/refcount.h>
 #include <linux/utsname.h>
 #include <linux/lockd/bind.h>
-#include <linux/lockd/xdr.h>
+#include "xdr.h"
 #include <linux/sunrpc/debug.h>
 #include <linux/sunrpc/svc.h>
 
diff --git a/fs/lockd/xdr.h b/fs/lockd/xdr.h
new file mode 100644
index 000000000000..af821ecf2a4e
--- /dev/null
+++ b/fs/lockd/xdr.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * XDR types for the NLM protocol
+ *
+ * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LOCKD_XDR_H
+#define _LOCKD_XDR_H
+
+#include <linux/fs.h>
+#include <linux/filelock.h>
+#include <linux/nfs.h>
+#include <linux/sunrpc/xdr.h>
+
+#define SM_MAXSTRLEN		1024
+#define SM_PRIV_SIZE		16
+
+struct nsm_private {
+	unsigned char		data[SM_PRIV_SIZE];
+};
+
+struct svc_rqst;
+
+#define NLM_MAXCOOKIELEN    	32
+#define NLM_MAXSTRLEN		1024
+
+#define	nlm_granted		cpu_to_be32(NLM_LCK_GRANTED)
+#define	nlm_lck_denied		cpu_to_be32(NLM_LCK_DENIED)
+#define	nlm_lck_denied_nolocks	cpu_to_be32(NLM_LCK_DENIED_NOLOCKS)
+#define	nlm_lck_blocked		cpu_to_be32(NLM_LCK_BLOCKED)
+#define	nlm_lck_denied_grace_period	cpu_to_be32(NLM_LCK_DENIED_GRACE_PERIOD)
+
+/* Lock info passed via NLM */
+struct nlm_lock {
+	char *			caller;
+	unsigned int		len; 	/* length of "caller" */
+	struct nfs_fh		fh;
+	struct xdr_netobj	oh;
+	u32			svid;
+	u64			lock_start;
+	u64			lock_len;
+	struct file_lock	fl;
+};
+
+/*
+ *	NLM cookies. Technically they can be 1K, but Linux only uses 8 bytes.
+ *	FreeBSD uses 16, Apple Mac OS X 10.3 uses 20. Therefore we set it to
+ *	32 bytes.
+ */
+
+struct nlm_cookie
+{
+	unsigned char data[NLM_MAXCOOKIELEN];
+	unsigned int len;
+};
+
+/*
+ * Generic lockd arguments for all but sm_notify
+ */
+struct nlm_args {
+	struct nlm_cookie	cookie;
+	struct nlm_lock		lock;
+	u32			block;
+	u32			reclaim;
+	u32			state;
+	u32			monitor;
+	u32			fsm_access;
+	u32			fsm_mode;
+};
+
+/*
+ * Generic lockd result
+ */
+struct nlm_res {
+	struct nlm_cookie	cookie;
+	__be32			status;
+	struct nlm_lock		lock;
+};
+
+/*
+ * statd callback when client has rebooted
+ */
+struct nlm_reboot {
+	char			*mon;
+	unsigned int		len;
+	u32			state;
+	struct nsm_private	priv;
+};
+
+/*
+ * Contents of statd callback when monitored host rebooted
+ */
+#define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
+
+bool	nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+bool	nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+#endif /* _LOCKD_XDR_H */
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index be2aebf62056..95d7cd564b74 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -16,6 +16,7 @@
 #include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
+#include <linux/filelock.h>
 #include <linux/lockd/bind.h>
 #include <linux/nfs_mount.h>
 #include <linux/freezer.h>
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 8c3d2efa2636..70795684b8e8 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -41,6 +41,7 @@
 #include <linux/nfs2.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
+#include <linux/filelock.h>
 #include <linux/lockd/bind.h>
 #include <linux/freezer.h>
 #include "internal.h"
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 077da0696f12..ba9258c96bfd 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -11,10 +11,9 @@
 #define LINUX_LOCKD_BIND_H
 
 #include <linux/lockd/nlm.h>
-/* need xdr-encoded error codes too, so... */
-#include <linux/lockd/xdr.h>
 
-/* Dummy declarations */
+struct file_lock;
+struct nfs_fh;
 struct svc_rqst;
 struct rpc_task;
 struct rpc_clnt;
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
deleted file mode 100644
index 292e4e38d17d..000000000000
--- a/include/linux/lockd/xdr.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/xdr.h
- *
- * XDR types for the NLM protocol
- *
- * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LOCKD_XDR_H
-#define LOCKD_XDR_H
-
-#include <linux/fs.h>
-#include <linux/filelock.h>
-#include <linux/nfs.h>
-#include <linux/sunrpc/xdr.h>
-
-#define SM_MAXSTRLEN		1024
-#define SM_PRIV_SIZE		16
-
-struct nsm_private {
-	unsigned char		data[SM_PRIV_SIZE];
-};
-
-struct svc_rqst;
-
-#define NLM_MAXCOOKIELEN    	32
-#define NLM_MAXSTRLEN		1024
-
-#define	nlm_granted		cpu_to_be32(NLM_LCK_GRANTED)
-#define	nlm_lck_denied		cpu_to_be32(NLM_LCK_DENIED)
-#define	nlm_lck_denied_nolocks	cpu_to_be32(NLM_LCK_DENIED_NOLOCKS)
-#define	nlm_lck_blocked		cpu_to_be32(NLM_LCK_BLOCKED)
-#define	nlm_lck_denied_grace_period	cpu_to_be32(NLM_LCK_DENIED_GRACE_PERIOD)
-
-/* Lock info passed via NLM */
-struct nlm_lock {
-	char *			caller;
-	unsigned int		len; 	/* length of "caller" */
-	struct nfs_fh		fh;
-	struct xdr_netobj	oh;
-	u32			svid;
-	u64			lock_start;
-	u64			lock_len;
-	struct file_lock	fl;
-};
-
-/*
- *	NLM cookies. Technically they can be 1K, but Linux only uses 8 bytes.
- *	FreeBSD uses 16, Apple Mac OS X 10.3 uses 20. Therefore we set it to
- *	32 bytes.
- */
-
-struct nlm_cookie
-{
-	unsigned char data[NLM_MAXCOOKIELEN];
-	unsigned int len;
-};
-
-/*
- * Generic lockd arguments for all but sm_notify
- */
-struct nlm_args {
-	struct nlm_cookie	cookie;
-	struct nlm_lock		lock;
-	u32			block;
-	u32			reclaim;
-	u32			state;
-	u32			monitor;
-	u32			fsm_access;
-	u32			fsm_mode;
-};
-
-/*
- * Generic lockd result
- */
-struct nlm_res {
-	struct nlm_cookie	cookie;
-	__be32			status;
-	struct nlm_lock		lock;
-};
-
-/*
- * statd callback when client has rebooted
- */
-struct nlm_reboot {
-	char			*mon;
-	unsigned int		len;
-	u32			state;
-	struct nsm_private	priv;
-};
-
-/*
- * Contents of statd callback when monitored host rebooted
- */
-#define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
-
-bool	nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-bool	nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-#endif /* LOCKD_XDR_H */
-- 
cgit v1.2.3


From 5829352e568d24dd04ae112128a4f44748d073bc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:33 -0500
Subject: lockd: Make linux/lockd/nlm.h an internal header

The NLM protocol constants and status codes in nlm.h are needed
only by lockd's internal implementation. NFS client code and
NFSD interact with lockd through the stable API in bind.h and
have no direct use for protocol-level definitions.

Exposing these definitions globally via bind.h creates unnecessary
coupling between lockd internals and its consumers. Moving nlm.h
from include/linux/lockd/ to fs/lockd/ clarifies the API boundary:
bind.h provides the lockd service interface, while nlm.h remains
available only to code within fs/lockd/ that implements the
protocol.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/lockd.h           |  1 +
 fs/lockd/nlm.h             | 56 ++++++++++++++++++++++++++++++++++++++++++++
 fs/lockd/svclock.c         |  1 -
 include/linux/lockd/bind.h |  2 --
 include/linux/lockd/nlm.h  | 58 ----------------------------------------------
 5 files changed, 57 insertions(+), 61 deletions(-)
 create mode 100644 fs/lockd/nlm.h
 delete mode 100644 include/linux/lockd/nlm.h

(limited to 'include/linux')

diff --git a/fs/lockd/lockd.h b/fs/lockd/lockd.h
index 6f83b9a7257f..e73c6b348154 100644
--- a/fs/lockd/lockd.h
+++ b/fs/lockd/lockd.h
@@ -14,6 +14,7 @@
 #include <linux/kref.h>
 #include <linux/refcount.h>
 #include <linux/utsname.h>
+#include "nlm.h"
 #include <linux/lockd/bind.h>
 #include "xdr.h"
 #include <linux/sunrpc/debug.h>
diff --git a/fs/lockd/nlm.h b/fs/lockd/nlm.h
new file mode 100644
index 000000000000..47be65d0111f
--- /dev/null
+++ b/fs/lockd/nlm.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Declarations for the Network Lock Manager protocol.
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LOCKD_NLM_H
+#define _LOCKD_NLM_H
+
+
+/* Maximum file offset in file_lock.fl_end */
+# define NLM_OFFSET_MAX		((s32) 0x7fffffff)
+# define NLM4_OFFSET_MAX	((s64) ((~(u64)0) >> 1))
+
+/* Return states for NLM */
+enum {
+	NLM_LCK_GRANTED			= 0,
+	NLM_LCK_DENIED			= 1,
+	NLM_LCK_DENIED_NOLOCKS		= 2,
+	NLM_LCK_BLOCKED			= 3,
+	NLM_LCK_DENIED_GRACE_PERIOD	= 4,
+#ifdef CONFIG_LOCKD_V4
+	NLM_DEADLCK			= 5,
+	NLM_ROFS			= 6,
+	NLM_STALE_FH			= 7,
+	NLM_FBIG			= 8,
+	NLM_FAILED			= 9,
+#endif
+};
+
+#define NLM_PROGRAM		100021
+
+#define NLMPROC_NULL		0
+#define NLMPROC_TEST		1
+#define NLMPROC_LOCK		2
+#define NLMPROC_CANCEL		3
+#define NLMPROC_UNLOCK		4
+#define NLMPROC_GRANTED		5
+#define NLMPROC_TEST_MSG	6
+#define NLMPROC_LOCK_MSG	7
+#define NLMPROC_CANCEL_MSG	8
+#define NLMPROC_UNLOCK_MSG	9
+#define NLMPROC_GRANTED_MSG	10
+#define NLMPROC_TEST_RES	11
+#define NLMPROC_LOCK_RES	12
+#define NLMPROC_CANCEL_RES	13
+#define NLMPROC_UNLOCK_RES	14
+#define NLMPROC_GRANTED_RES	15
+#define NLMPROC_NSM_NOTIFY	16		/* statd callback */
+#define NLMPROC_SHARE		20
+#define NLMPROC_UNSHARE		21
+#define NLMPROC_NM_LOCK		22
+#define NLMPROC_FREE_ALL	23
+
+#endif /* _LOCKD_NLM_H */
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 1c800fffe69c..e687103e42d1 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -28,7 +28,6 @@
 #include <linux/sched.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc_xprt.h>
-#include <linux/lockd/nlm.h>
 
 #include "lockd.h"
 
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index ba9258c96bfd..b614e0deea72 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -10,8 +10,6 @@
 #ifndef LINUX_LOCKD_BIND_H
 #define LINUX_LOCKD_BIND_H
 
-#include <linux/lockd/nlm.h>
-
 struct file_lock;
 struct nfs_fh;
 struct svc_rqst;
diff --git a/include/linux/lockd/nlm.h b/include/linux/lockd/nlm.h
deleted file mode 100644
index 6e343ef760dc..000000000000
--- a/include/linux/lockd/nlm.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/nlm.h
- *
- * Declarations for the Network Lock Manager protocol.
- *
- * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_NLM_H
-#define LINUX_LOCKD_NLM_H
-
-
-/* Maximum file offset in file_lock.fl_end */
-# define NLM_OFFSET_MAX		((s32) 0x7fffffff)
-# define NLM4_OFFSET_MAX	((s64) ((~(u64)0) >> 1))
-
-/* Return states for NLM */
-enum {
-	NLM_LCK_GRANTED			= 0,
-	NLM_LCK_DENIED			= 1,
-	NLM_LCK_DENIED_NOLOCKS		= 2,
-	NLM_LCK_BLOCKED			= 3,
-	NLM_LCK_DENIED_GRACE_PERIOD	= 4,
-#ifdef CONFIG_LOCKD_V4
-	NLM_DEADLCK			= 5,
-	NLM_ROFS			= 6,
-	NLM_STALE_FH			= 7,
-	NLM_FBIG			= 8,
-	NLM_FAILED			= 9,
-#endif
-};
-
-#define NLM_PROGRAM		100021
-
-#define NLMPROC_NULL		0
-#define NLMPROC_TEST		1
-#define NLMPROC_LOCK		2
-#define NLMPROC_CANCEL		3
-#define NLMPROC_UNLOCK		4
-#define NLMPROC_GRANTED		5
-#define NLMPROC_TEST_MSG	6
-#define NLMPROC_LOCK_MSG	7
-#define NLMPROC_CANCEL_MSG	8
-#define NLMPROC_UNLOCK_MSG	9
-#define NLMPROC_GRANTED_MSG	10
-#define NLMPROC_TEST_RES	11
-#define NLMPROC_LOCK_RES	12
-#define NLMPROC_CANCEL_RES	13
-#define NLMPROC_UNLOCK_RES	14
-#define NLMPROC_GRANTED_RES	15
-#define NLMPROC_NSM_NOTIFY	16		/* statd callback */
-#define NLMPROC_SHARE		20
-#define NLMPROC_UNSHARE		21
-#define NLMPROC_NM_LOCK		22
-#define NLMPROC_FREE_ALL	23
-
-#endif /* LINUX_LOCKD_NLM_H */
-- 
cgit v1.2.3


From adcc59114ccd402259c089b0fea24da5e4974563 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Feb 2026 21:21:50 +0100
Subject: sunrpc: Kill RPC_IFDEBUG()

RPC_IFDEBUG() is used in only two places. In one the user of
the definition is guarded by ifdeffery, in the second one
it's implied due to dprintk() usage. Kill the macro and move
the ifdeffery to the regular condition with the variable defined
inside, while in the second case add the same conditional and
move the respective code there.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfsfh.c                          |  9 ++++++---
 include/linux/sunrpc/debug.h             |  2 --
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 27 ++++++++++++++-------------
 3 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index ed85dd43da18..68b629fbaaeb 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -105,9 +105,12 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
 {
 	/* Check if the request originated from a secure port. */
 	if (rqstp && !nfsd_originating_port_ok(rqstp, cred, exp)) {
-		RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
-		dprintk("nfsd: request from insecure port %s!\n",
-		        svc_print_addr(rqstp, buf, sizeof(buf)));
+		if (IS_ENABLED(CONFIG_SUNRPC_DEBUG)) {
+			char buf[RPC_MAX_ADDRBUFLEN];
+
+			dprintk("nfsd: request from insecure port %s!\n",
+			        svc_print_addr(rqstp, buf, sizeof(buf)));
+		}
 		return nfserr_perm;
 	}
 
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index eb4bd62df319..93d1a11ffbfb 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -49,12 +49,10 @@ do {									\
 	}								\
 } while (0)
 
-# define RPC_IFDEBUG(x)		x
 #else
 # define ifdebug(fac)		if (0)
 # define dfprintk(fac, fmt, ...)	do {} while (0)
 # define dfprintk_rcu(fac, fmt, ...)	do {} while (0)
-# define RPC_IFDEBUG(x)
 #endif
 
 /*
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9b623849723e..f2d72181a6fe 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -414,7 +414,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	struct ib_qp_init_attr qp_attr;
 	struct ib_device *dev;
 	int ret = 0;
-	RPC_IFDEBUG(struct sockaddr *sap);
 
 	listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
 	clear_bit(XPT_CONN, &xprt->xpt_flags);
@@ -560,18 +559,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 		goto errout;
 	}
 
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-	dprintk("svcrdma: new connection accepted on device %s:\n", dev->name);
-	sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
-	dprintk("    local address   : %pIS:%u\n", sap, rpc_get_port(sap));
-	sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
-	dprintk("    remote address  : %pIS:%u\n", sap, rpc_get_port(sap));
-	dprintk("    max_sge         : %d\n", newxprt->sc_max_send_sges);
-	dprintk("    sq_depth        : %d\n", newxprt->sc_sq_depth);
-	dprintk("    rdma_rw_ctxs    : %d\n", ctxts);
-	dprintk("    max_requests    : %d\n", newxprt->sc_max_requests);
-	dprintk("    ord             : %d\n", conn_param.initiator_depth);
-#endif
+	if (IS_ENABLED(CONFIG_SUNRPC_DEBUG)) {
+		struct sockaddr *sap;
+
+		dprintk("svcrdma: new connection accepted on device %s:\n", dev->name);
+		sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
+		dprintk("    local address   : %pIS:%u\n", sap, rpc_get_port(sap));
+		sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
+		dprintk("    remote address  : %pIS:%u\n", sap, rpc_get_port(sap));
+		dprintk("    max_sge         : %d\n", newxprt->sc_max_send_sges);
+		dprintk("    sq_depth        : %d\n", newxprt->sc_sq_depth);
+		dprintk("    rdma_rw_ctxs    : %d\n", ctxts);
+		dprintk("    max_requests    : %d\n", newxprt->sc_max_requests);
+		dprintk("    ord             : %d\n", conn_param.initiator_depth);
+	}
 
 	return &newxprt->sc_xprt;
 
-- 
cgit v1.2.3


From 6f57293abb8d087de830dd3f02e66d94b3e59973 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Feb 2026 21:21:51 +0100
Subject: sunrpc: Fix compilation error (`make W=1`) when dprintk() is no-op

Clang compiler is not happy about set but unused variables:

.../flexfilelayout/flexfilelayoutdev.c:56:9: error: variable 'ret' set but not used [-Werror,-Wunused-but-set-variable]
.../flexfilelayout/flexfilelayout.c:1505:6: error: variable 'err' set but not used [-Werror,-Wunused-but-set-variable]
.../nfs4proc.c:9244:12: error: variable 'ptr' set but not used [-Werror,-Wunused-but-set-variable]

Fix these by forwarding parameters of dprintk() to no_printk().
The positive side-effect is a format-string checker enabled even for the cases
when dprintk() is no-op.

Fixes: d67ae825a59d ("pnfs/flexfiles: Add the FlexFile Layout Driver")
Fixes: fc931582c260 ("nfs41: create_session operation")
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svclock.c           | 5 +++++
 include/linux/sunrpc/debug.h | 8 ++++++--
 include/linux/sunrpc/sched.h | 3 ---
 3 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index e687103e42d1..ee23f5802af1 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -74,6 +74,11 @@ static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
 
 	return buf;
 }
+#else
+static inline const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
+{
+	return "???";
+}
 #endif
 
 /*
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 93d1a11ffbfb..ab61bed2f7af 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -38,6 +38,8 @@ extern unsigned int		nlm_debug;
 do {									\
 	ifdebug(fac)							\
 		__sunrpc_printk(fmt, ##__VA_ARGS__);			\
+	else								\
+		no_printk(fmt, ##__VA_ARGS__);				\
 } while (0)
 
 # define dfprintk_rcu(fac, fmt, ...)					\
@@ -46,13 +48,15 @@ do {									\
 		rcu_read_lock();					\
 		__sunrpc_printk(fmt, ##__VA_ARGS__);			\
 		rcu_read_unlock();					\
+	} else {							\
+		no_printk(fmt, ##__VA_ARGS__);				\
 	}								\
 } while (0)
 
 #else
 # define ifdebug(fac)		if (0)
-# define dfprintk(fac, fmt, ...)	do {} while (0)
-# define dfprintk_rcu(fac, fmt, ...)	do {} while (0)
+# define dfprintk(fac, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
+# define dfprintk_rcu(fac, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /*
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index ccba79ebf893..0dbdf3722537 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -95,10 +95,7 @@ struct rpc_task {
 	int			tk_rpc_status;	/* Result of last RPC operation */
 	unsigned short		tk_flags;	/* misc flags */
 	unsigned short		tk_timeouts;	/* maj timeouts */
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
 	unsigned short		tk_pid;		/* debugging aid */
-#endif
 	unsigned char		tk_priority : 2,/* Task priority */
 				tk_garb_retry : 2,
 				tk_cred_retry : 2;
-- 
cgit v1.2.3


From f52792f484ba2316853736856dde19b7e7458861 Mon Sep 17 00:00:00 2001
From: Dai Ngo <dai.ngo@oracle.com>
Date: Fri, 13 Feb 2026 10:36:30 -0800
Subject: NFSD: Enforce timeout on layout recall and integrate lease manager
 fencing

When a layout conflict triggers a recall, enforcing a timeout is
necessary to prevent excessive nfsd threads from being blocked in
__break_lease ensuring the server continues servicing incoming
requests efficiently.

This patch introduces a new function to lease_manager_operations:

lm_breaker_timedout: Invoked when a lease recall times out and is
about to be disposed of. This function enables the lease manager
to inform the caller whether the file_lease should remain on the
flc_list or be disposed of.

For the NFSD lease manager, this function now handles layout recall
timeouts. If the layout type supports fencing and the client has not
been fenced, a fence operation is triggered to prevent the client
from accessing the block device.

While the fencing operation is in progress, the conflicting file_lease
remains on the flc_list until fencing is complete. This guarantees
that no other clients can access the file, and the client with
exclusive access is properly blocked before disposal.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 .../admin-guide/nfs/pnfs-block-server.rst          |  30 ++++
 Documentation/admin-guide/nfs/pnfs-scsi-server.rst |  31 +++++
 Documentation/filesystems/locking.rst              |   2 +
 fs/locks.c                                         |  26 +++-
 fs/nfsd/blocklayout.c                              |  42 +++++-
 fs/nfsd/nfs4layouts.c                              | 152 ++++++++++++++++++++-
 fs/nfsd/nfs4state.c                                |   1 +
 fs/nfsd/pnfs.h                                     |   5 +-
 fs/nfsd/state.h                                    |   6 +
 include/linux/filelock.h                           |   1 +
 10 files changed, 279 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/nfs/pnfs-block-server.rst b/Documentation/admin-guide/nfs/pnfs-block-server.rst
index 20fe9f5117fe..b4f5997009af 100644
--- a/Documentation/admin-guide/nfs/pnfs-block-server.rst
+++ b/Documentation/admin-guide/nfs/pnfs-block-server.rst
@@ -40,3 +40,33 @@ how to translate the device into a serial number from SCSI EVPD 0x80::
 
 	echo "fencing client ${CLIENT} serial ${EVPD}" >> /var/log/pnfsd-fence.log
 	EOF
+
+If the nfsd server needs to fence a non-responding client and the
+fencing operation fails, the server logs a warning message in the
+system log with the following format:
+
+    FENCE failed client[IP_address] clid[#n] device[dev_name]
+
+    Where:
+
+    IP_address: refers to the IP address of the affected client.
+    #n: indicates the unique client identifier.
+    dev_name: specifies the name of the block device related
+              to the fencing attempt.
+
+The server will repeatedly retry the operation indefinitely. During
+this time, access to the affected file is restricted for all other
+clients. This is to prevent potential data corruption if multiple
+clients access the same file simultaneously.
+
+To restore access to the affected file for other clients, the admin
+needs to take the following actions:
+
+    . shutdown or power off the client being fenced.
+    . manually expire the client to release all its state on the server:
+
+      echo 'expire' > /proc/fs/nfsd/clients/clid/ctl'.
+
+      Where:
+
+      clid: is the unique client identifier displayed in the system log.
diff --git a/Documentation/admin-guide/nfs/pnfs-scsi-server.rst b/Documentation/admin-guide/nfs/pnfs-scsi-server.rst
index b2eec2288329..db34afbf67a9 100644
--- a/Documentation/admin-guide/nfs/pnfs-scsi-server.rst
+++ b/Documentation/admin-guide/nfs/pnfs-scsi-server.rst
@@ -22,3 +22,34 @@ option and the underlying SCSI device support persistent reservations.
 On the client make sure the kernel has the CONFIG_PNFS_BLOCK option
 enabled, and the file system is mounted using the NFSv4.1 protocol
 version (mount -o vers=4.1).
+
+If the nfsd server needs to fence a non-responding client and the
+fencing operation fails, the server logs a warning message in the
+system log with the following format:
+
+    FENCE failed client[IP_address] clid[#n] device[dev_name]
+
+    Where:
+
+    IP_address: refers to the IP address of the affected client.
+    #n: indicates the unique client identifier.
+    dev_name: specifies the name of the block device related
+              to the fencing attempt.
+
+The server will repeatedly retry the operation indefinitely. During
+this time, access to the affected file is restricted for all other
+clients. This is to prevent potential data corruption if multiple
+clients access the same file simultaneously.
+
+To restore access to the affected file for other clients, the admin
+needs to take the following actions:
+
+    . shutdown or power off the client being fenced.
+    . manually expire the client to release all its state on the server:
+
+      echo 'expire' > /proc/fs/nfsd/clients/clid/ctl'.
+
+      Where:
+
+      clid: is the unique client identifier displayed in the system log.
+
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 8025df6e6499..8421ea21bd35 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -398,6 +398,7 @@ prototypes::
 	bool (*lm_breaker_owns_lease)(struct file_lock *);
         bool (*lm_lock_expirable)(struct file_lock *);
         void (*lm_expire_lock)(void);
+        bool (*lm_breaker_timedout)(struct file_lease *);
 
 locking rules:
 
@@ -412,6 +413,7 @@ lm_breaker_owns_lease:	yes     	no			no
 lm_lock_expirable	yes		no			no
 lm_expire_lock		no		no			yes
 lm_open_conflict	yes		no			no
+lm_breaker_timedout     yes             no                      no
 ======================	=============	=================	=========
 
 buffer_head
diff --git a/fs/locks.c b/fs/locks.c
index d13ec930b7bb..8e44b1f6c15a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1534,6 +1534,7 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
 {
 	struct file_lock_context *ctx = inode->i_flctx;
 	struct file_lease *fl, *tmp;
+	bool remove;
 
 	lockdep_assert_held(&ctx->flc_lock);
 
@@ -1541,8 +1542,19 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
 		trace_time_out_leases(inode, fl);
 		if (past_time(fl->fl_downgrade_time))
 			lease_modify(fl, F_RDLCK, dispose);
-		if (past_time(fl->fl_break_time))
-			lease_modify(fl, F_UNLCK, dispose);
+
+		remove = true;
+		if (past_time(fl->fl_break_time)) {
+			/*
+			 * Consult the lease manager when a lease break times
+			 * out to determine whether the lease should be disposed
+			 * of.
+			 */
+			if (fl->fl_lmops && fl->fl_lmops->lm_breaker_timedout)
+				remove = fl->fl_lmops->lm_breaker_timedout(fl);
+			if (remove)
+				lease_modify(fl, F_UNLCK, dispose);
+		}
 	}
 }
 
@@ -1670,9 +1682,13 @@ int __break_lease(struct inode *inode, unsigned int flags)
 restart:
 	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
 	break_time = fl->fl_break_time;
-	if (break_time != 0)
-		break_time -= jiffies;
-	if (break_time == 0)
+	if (break_time != 0) {
+		if (time_after(jiffies, break_time)) {
+			fl->fl_break_time = jiffies + lease_break_time * HZ;
+			break_time = lease_break_time * HZ;
+		} else
+			break_time -= jiffies;
+	} else
 		break_time++;
 	locks_insert_block(&fl->c, &new_fl->c, leases_conflict);
 	trace_break_lease_block(inode, new_fl);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 8b987fca1e60..9d829c84f374 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -297,6 +297,7 @@ static inline int nfsd4_scsi_fence_insert(struct nfs4_client *clp,
 		ret = 0;
 	}
 	xa_unlock(xa);
+	clp->cl_fence_retry_warn = false;
 	return ret;
 }
 
@@ -443,15 +444,33 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
 	return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
 }
 
-static void
+/*
+ * Perform the fence operation to prevent the client from accessing the
+ * block device. If a fence operation is already in progress, wait for
+ * it to complete before checking the NFSD_MDS_PR_FENCED flag. Once the
+ * operation is complete, check the flag. If NFSD_MDS_PR_FENCED is set,
+ * update the layout stateid by setting the ls_fenced flag to indicate
+ * that the client has been fenced.
+ *
+ * The cl_fence_mutex ensures that the fence operation has been fully
+ * completed, rather than just in progress, when returning from this
+ * function.
+ *
+ * Return true if client was fenced otherwise return false.
+ */
+static bool
 nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
 {
 	struct nfs4_client *clp = ls->ls_stid.sc_client;
 	struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
 	int status;
+	bool ret;
 
-	if (nfsd4_scsi_fence_set(clp, bdev->bd_dev))
-		return;
+	mutex_lock(&clp->cl_fence_mutex);
+	if (nfsd4_scsi_fence_set(clp, bdev->bd_dev)) {
+		mutex_unlock(&clp->cl_fence_mutex);
+		return true;
+	}
 
 	status = bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
 			nfsd4_scsi_pr_key(clp),
@@ -470,13 +489,22 @@ nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
 	 * PR_STS_RESERVATION_CONFLICT, which would cause an infinite
 	 * retry loop.
 	 */
-	if (status < 0 ||
-	    status == PR_STS_PATH_FAILED ||
-	    status == PR_STS_PATH_FAST_FAILED ||
-	    status == PR_STS_RETRY_PATH_FAILURE)
+	switch (status) {
+	case 0:
+	case PR_STS_IOERR:
+	case PR_STS_RESERVATION_CONFLICT:
+		ret = true;
+		break;
+	default:
+		/* retry-able and other errors */
+		ret = false;
 		nfsd4_scsi_fence_clear(clp, bdev->bd_dev);
+		break;
+	}
+	mutex_unlock(&clp->cl_fence_mutex);
 
 	trace_nfsd_pnfs_fence(clp, bdev->bd_disk->disk_name, status);
+	return ret;
 }
 
 const struct nfsd4_layout_ops scsi_layout_ops = {
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index ad7af8cfcf1f..69e41105efdd 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -27,6 +27,8 @@ static struct kmem_cache *nfs4_layout_stateid_cache;
 static const struct nfsd4_callback_ops nfsd4_cb_layout_ops;
 static const struct lease_manager_operations nfsd4_layouts_lm_ops;
 
+static void nfsd4_layout_fence_worker(struct work_struct *work);
+
 const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] =  {
 #ifdef CONFIG_NFSD_FLEXFILELAYOUT
 	[LAYOUT_FLEX_FILES]	= &ff_layout_ops,
@@ -177,6 +179,13 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
 
 	trace_nfsd_layoutstate_free(&ls->ls_stid.sc_stateid);
 
+	spin_lock(&ls->ls_lock);
+	if (delayed_work_pending(&ls->ls_fence_work)) {
+		spin_unlock(&ls->ls_lock);
+		cancel_delayed_work_sync(&ls->ls_fence_work);
+	} else
+		spin_unlock(&ls->ls_lock);
+
 	spin_lock(&clp->cl_lock);
 	list_del_init(&ls->ls_perclnt);
 	spin_unlock(&clp->cl_lock);
@@ -271,6 +280,10 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
 	list_add(&ls->ls_perfile, &fp->fi_lo_states);
 	spin_unlock(&fp->fi_lock);
 
+	ls->ls_fenced = false;
+	ls->ls_fence_delay = 0;
+	INIT_DELAYED_WORK(&ls->ls_fence_work, nfsd4_layout_fence_worker);
+
 	trace_nfsd_layoutstate_alloc(&ls->ls_stid.sc_stateid);
 	return ls;
 }
@@ -747,11 +760,9 @@ static bool
 nfsd4_layout_lm_break(struct file_lease *fl)
 {
 	/*
-	 * We don't want the locks code to timeout the lease for us;
-	 * we'll remove it ourself if a layout isn't returned
-	 * in time:
+	 * Enforce break lease timeout to prevent NFSD
+	 * thread from hanging in __break_lease.
 	 */
-	fl->fl_break_time = 0;
 	nfsd4_recall_file_layout(fl->c.flc_owner);
 	return false;
 }
@@ -782,10 +793,143 @@ nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
 	return 0;
 }
 
+static void
+nfsd4_layout_fence_worker(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct nfs4_layout_stateid *ls = container_of(dwork,
+			struct nfs4_layout_stateid, ls_fence_work);
+	struct nfsd_file *nf;
+	struct block_device *bdev;
+	struct nfs4_client *clp;
+	struct nfsd_net *nn;
+
+	/*
+	 * The workqueue clears WORK_STRUCT_PENDING before invoking
+	 * this callback. Re-arm immediately so that
+	 * delayed_work_pending() returns true while the fence
+	 * operation is in progress, preventing
+	 * lm_breaker_timedout() from taking a duplicate reference.
+	 */
+	mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, 0);
+
+	spin_lock(&ls->ls_lock);
+	if (list_empty(&ls->ls_layouts)) {
+		spin_unlock(&ls->ls_lock);
+dispose:
+		cancel_delayed_work(&ls->ls_fence_work);
+		/* unlock the lease so that tasks waiting on it can proceed */
+		nfsd4_close_layout(ls);
+
+		ls->ls_fenced = true;
+		nfs4_put_stid(&ls->ls_stid);
+		return;
+	}
+	spin_unlock(&ls->ls_lock);
+
+	rcu_read_lock();
+	nf = nfsd_file_get(ls->ls_file);
+	rcu_read_unlock();
+	if (!nf)
+		goto dispose;
+
+	clp = ls->ls_stid.sc_client;
+	nn = net_generic(clp->net, nfsd_net_id);
+	bdev = nf->nf_file->f_path.mnt->mnt_sb->s_bdev;
+	if (nfsd4_layout_ops[ls->ls_layout_type]->fence_client(ls, nf)) {
+		/* fenced ok */
+		nfsd_file_put(nf);
+		pr_warn("%s: FENCED client[%pISpc] clid[%d] to device[%s]\n",
+			__func__, (struct sockaddr *)&clp->cl_addr,
+			clp->cl_clientid.cl_id - nn->clientid_base,
+			bdev->bd_disk->disk_name);
+		goto dispose;
+	}
+	/* fence failed */
+	nfsd_file_put(nf);
+
+	if (!clp->cl_fence_retry_warn) {
+		pr_warn("%s: FENCE failed client[%pISpc] clid[%d] device[%s]\n",
+			__func__, (struct sockaddr *)&clp->cl_addr,
+			clp->cl_clientid.cl_id - nn->clientid_base,
+			bdev->bd_disk->disk_name);
+		clp->cl_fence_retry_warn = true;
+	}
+	/*
+	 * The fence worker retries the fencing operation indefinitely to
+	 * prevent data corruption. The admin needs to take the following
+	 * actions to restore access to the file for other clients:
+	 *
+	 *  . shutdown or power off the client being fenced.
+	 *  . manually expire the client to release all its state on the server;
+	 *    echo 'expire' > /proc/fs/nfsd/clients/clid/ctl'.
+	 *
+	 *    Where:
+	 *
+	 *    clid: is the unique client identifier displayed in
+	 *          the warning message above.
+	 */
+	if (!ls->ls_fence_delay)
+		ls->ls_fence_delay = HZ;
+	else
+		ls->ls_fence_delay = min(ls->ls_fence_delay << 1,
+					 MAX_FENCE_DELAY);
+	mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, ls->ls_fence_delay);
+}
+
+/**
+ * nfsd4_layout_lm_breaker_timedout - The layout recall has timed out.
+ * @fl: file to check
+ *
+ * If the layout type supports a fence operation, schedule a worker to
+ * fence the client from accessing the block device.
+ *
+ * This function runs under the protection of the spin_lock flc_lock.
+ * At this time, the file_lease associated with the layout stateid is
+ * on the flc_list. A reference count is incremented on the layout
+ * stateid to prevent it from being freed while the fence worker is
+ * executing. Once the fence worker finishes its operation, it releases
+ * this reference.
+ *
+ * The fence worker continues to run until either the client has been
+ * fenced or the layout becomes invalid. The layout can become invalid
+ * as a result of a LAYOUTRETURN or when the CB_LAYOUT recall callback
+ * has completed.
+ *
+ * Return true if the file_lease should be disposed of by the caller;
+ * otherwise, return false.
+ */
+static bool
+nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
+{
+	struct nfs4_layout_stateid *ls = fl->c.flc_owner;
+
+	if ((!nfsd4_layout_ops[ls->ls_layout_type]->fence_client) ||
+			ls->ls_fenced)
+		return true;
+	if (delayed_work_pending(&ls->ls_fence_work))
+		return false;
+	/*
+	 * Make sure layout has not been returned yet before
+	 * taking a reference count on the layout stateid.
+	 */
+	spin_lock(&ls->ls_lock);
+	if (list_empty(&ls->ls_layouts) ||
+			!refcount_inc_not_zero(&ls->ls_stid.sc_count)) {
+		spin_unlock(&ls->ls_lock);
+		return true;
+	}
+	spin_unlock(&ls->ls_lock);
+
+	mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, 0);
+	return false;
+}
+
 static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
 	.lm_break		= nfsd4_layout_lm_break,
 	.lm_change		= nfsd4_layout_lm_change,
 	.lm_open_conflict	= nfsd4_layout_lm_open_conflict,
+	.lm_breaker_timedout	= nfsd4_layout_lm_breaker_timedout,
 };
 
 int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1b4c101ff04b..1d31f2bb2162 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2386,6 +2386,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name,
 #endif
 #ifdef CONFIG_NFSD_SCSILAYOUT
 	xa_init(&clp->cl_dev_fences);
+	mutex_init(&clp->cl_fence_mutex);
 #endif
 	INIT_LIST_HEAD(&clp->async_copies);
 	spin_lock_init(&clp->async_lock);
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
index db9af780438b..f7bee4dc5d3d 100644
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h
@@ -11,6 +11,9 @@
 
 struct xdr_stream;
 
+/* Cap exponential backoff between fence retries at 3 minutes */
+#define	MAX_FENCE_DELAY		((unsigned int)(3 * 60 * HZ))
+
 struct nfsd4_deviceid_map {
 	struct list_head	hash;
 	u64			idx;
@@ -38,7 +41,7 @@ struct nfsd4_layout_ops {
 			struct svc_rqst *rqstp,
 			struct nfsd4_layoutcommit *lcp);
 
-	void (*fence_client)(struct nfs4_layout_stateid *ls,
+	bool (*fence_client)(struct nfs4_layout_stateid *ls,
 			     struct nfsd_file *file);
 };
 
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 99aeaab9cf2b..ec1c5467012e 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -456,6 +456,7 @@ struct nfs4_client {
 	struct list_head        cl_lru;         /* tail queue */
 #ifdef CONFIG_NFSD_PNFS
 	struct list_head	cl_lo_states;	/* outstanding layout states */
+	bool			cl_fence_retry_warn;
 #endif
 	struct xdr_netobj	cl_name; 	/* id generated by client */
 	nfs4_verifier		cl_verifier; 	/* generated by client */
@@ -529,6 +530,7 @@ struct nfs4_client {
 	time64_t		cl_ra_time;
 #ifdef CONFIG_NFSD_SCSILAYOUT
 	struct xarray		cl_dev_fences;
+	struct mutex		cl_fence_mutex;
 #endif
 };
 
@@ -745,6 +747,10 @@ struct nfs4_layout_stateid {
 	stateid_t			ls_recall_sid;
 	bool				ls_recalled;
 	struct mutex			ls_mutex;
+
+	struct delayed_work		ls_fence_work;
+	unsigned int			ls_fence_delay;
+	bool				ls_fenced;
 };
 
 static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s)
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index d2c9740e26a8..5f0a2fb31450 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -50,6 +50,7 @@ struct lease_manager_operations {
 	void (*lm_setup)(struct file_lease *, void **);
 	bool (*lm_breaker_owns_lease)(struct file_lease *);
 	int (*lm_open_conflict)(struct file *, int);
+	bool (*lm_breaker_timedout)(struct file_lease *fl);
 };
 
 struct lock_manager {
-- 
cgit v1.2.3


From 5bc37b759ec0cdde2c652a2637d704f2d6306617 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 17 Feb 2026 17:06:53 -0500
Subject: Documentation: Add the RPC language description of NLM version 4

In order to generate source code to encode and decode NLMv4 protocol
elements, include a copy of the RPC language description of NLMv4
for xdrgen to process. The language description is an amalgam of
RFC 1813 and the Open Group's XNFS specification:

  https://pubs.opengroup.org/onlinepubs/9629799/chap10.htm

The C code committed here was generated from the new nlm4.x file
using tools/net/sunrpc/xdrgen/xdrgen.

The goals of replacing hand-written XDR functions with ones that
are tool-generated are to improve memory safety and make XDR
encoding and decoding less brittle to maintain.

The xdrgen utility derives both the type definitions and the
encode/decode functions directly from protocol specifications,
using names and symbols familiar to anyone who knows those specs.
Unlike hand-written code that can inadvertently diverge from the
specification, xdrgen guarantees that the generated code matches
the specification exactly.

We would eventually like xdrgen to generate Rust code as well,
making the conversion of the kernel's NFS stacks to use Rust just
a little easier for us.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 Documentation/sunrpc/xdr/nlm4.x    | 211 +++++++++++
 fs/lockd/Makefile                  |  30 +-
 fs/lockd/nlm4xdr_gen.c             | 724 +++++++++++++++++++++++++++++++++++++
 fs/lockd/nlm4xdr_gen.h             |  32 ++
 include/linux/sunrpc/xdrgen/nlm4.h | 233 ++++++++++++
 5 files changed, 1229 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/sunrpc/xdr/nlm4.x
 create mode 100644 fs/lockd/nlm4xdr_gen.c
 create mode 100644 fs/lockd/nlm4xdr_gen.h
 create mode 100644 include/linux/sunrpc/xdrgen/nlm4.h

(limited to 'include/linux')

diff --git a/Documentation/sunrpc/xdr/nlm4.x b/Documentation/sunrpc/xdr/nlm4.x
new file mode 100644
index 000000000000..0c44a80ef674
--- /dev/null
+++ b/Documentation/sunrpc/xdr/nlm4.x
@@ -0,0 +1,211 @@
+/*
+ * This file was extracted by hand from
+ * https://www.rfc-editor.org/rfc/rfc1813.html .
+ *
+ * Note that RFC 1813 is Informational. Its official date of
+ * publication (June 1995) is before the IETF required its RFCs to
+ * carry an explicit copyright or other IP ownership notices.
+ *
+ * Note also that RFC 1813 does not specify the whole NLM4 protocol.
+ * In particular, the argument and result types are not present in
+ * that document, and had to be reverse-engineered.
+ */
+
+/*
+ * The NLMv4 protocol
+ */
+
+pragma header nlm4;
+
+/*
+ * The following definitions are missing in RFC 1813,
+ * but can be found in the OpenNetworking Network Lock
+ * Manager protocol:
+ *
+ * https://pubs.opengroup.org/onlinepubs/9629799/chap10.htm
+ */
+
+const LM_MAXSTRLEN = 1024;
+
+const LM_MAXNAMELEN = 1025;
+
+const MAXNETOBJ_SZ = 1024;
+
+typedef opaque netobj<MAXNETOBJ_SZ>;
+
+enum fsh4_mode {
+	fsm_DN = 0,		/* deny none */
+	fsm_DR = 1,		/* deny read */
+	fsm_DW = 2,		/* deny write */
+	fsm_DRW = 3		/* deny read/write */
+};
+
+enum fsh4_access {
+	fsa_NONE = 0,		/* for completeness */
+	fsa_R = 1,		/* read-only */
+	fsa_W = 2,		/* write-only */
+	fsa_RW = 3		/* read/write */
+};
+
+/*
+ * The following definitions come from the OpenNetworking
+ * Network Status Monitor protocol:
+ *
+ * https://pubs.opengroup.org/onlinepubs/9629799/chap11.htm
+ */
+
+const SM_MAXSTRLEN = 1024;
+
+/*
+ * The NLM protocol as extracted from:
+ * https://tools.ietf.org/html/rfc1813 Appendix II
+ */
+
+typedef unsigned hyper uint64;
+
+typedef hyper int64;
+
+typedef unsigned long uint32;
+
+typedef long int32;
+
+enum nlm4_stats {
+	NLM4_GRANTED = 0,
+	NLM4_DENIED = 1,
+	NLM4_DENIED_NOLOCKS = 2,
+	NLM4_BLOCKED = 3,
+	NLM4_DENIED_GRACE_PERIOD = 4,
+	NLM4_DEADLCK = 5,
+	NLM4_ROFS = 6,
+	NLM4_STALE_FH = 7,
+	NLM4_FBIG = 8,
+	NLM4_FAILED = 9
+};
+
+pragma big_endian nlm4_stats;
+
+struct nlm4_holder {
+	bool		exclusive;
+	int32		svid;
+	netobj		oh;
+	uint64		l_offset;
+	uint64		l_len;
+};
+
+union nlm4_testrply switch (nlm4_stats stat) {
+	case NLM4_DENIED:
+		nlm4_holder	holder;
+	default:
+		void;
+};
+
+struct nlm4_stat {
+	nlm4_stats	stat;
+};
+
+struct nlm4_res {
+	netobj		cookie;
+	nlm4_stat	stat;
+};
+
+struct nlm4_testres {
+	netobj		cookie;
+	nlm4_testrply	stat;
+};
+
+struct nlm4_lock {
+	string		caller_name<LM_MAXSTRLEN>;
+	netobj		fh;
+	netobj		oh;
+	int32		svid;
+	uint64		l_offset;
+	uint64		l_len;
+};
+
+struct nlm4_lockargs {
+	netobj		cookie;
+	bool		block;
+	bool		exclusive;
+	nlm4_lock	alock;
+	bool		reclaim;
+	int32		state;
+};
+
+struct nlm4_cancargs {
+	netobj		cookie;
+	bool		block;
+	bool		exclusive;
+	nlm4_lock	alock;
+};
+
+struct nlm4_testargs {
+	netobj		cookie;
+	bool		exclusive;
+	nlm4_lock	alock;
+};
+
+struct nlm4_unlockargs {
+	netobj		cookie;
+	nlm4_lock	alock;
+};
+
+struct nlm4_share {
+	string		caller_name<LM_MAXSTRLEN>;
+	netobj		fh;
+	netobj		oh;
+	fsh4_mode	mode;
+	fsh4_access	access;
+};
+
+struct nlm4_shareargs {
+	netobj		cookie;
+	nlm4_share	share;
+	bool		reclaim;
+};
+
+struct nlm4_shareres {
+	netobj		cookie;
+	nlm4_stats	stat;
+	int32		sequence;
+};
+
+struct nlm4_notify {
+	string		name<LM_MAXNAMELEN>;
+	int32		state;
+};
+
+/*
+ * Argument for the Linux-private SM_NOTIFY procedure
+ */
+const SM_PRIV_SIZE = 16;
+
+struct nlm4_notifyargs {
+	nlm4_notify	notify;
+	opaque		private[SM_PRIV_SIZE];
+};
+
+program NLM4_PROG {
+	version NLM4_VERS {
+		void		NLMPROC4_NULL(void)			= 0;
+		nlm4_testres	NLMPROC4_TEST(nlm4_testargs)		= 1;
+		nlm4_res	NLMPROC4_LOCK(nlm4_lockargs)		= 2;
+		nlm4_res	NLMPROC4_CANCEL(nlm4_cancargs)		= 3;
+		nlm4_res	NLMPROC4_UNLOCK(nlm4_unlockargs)	= 4;
+		nlm4_res	NLMPROC4_GRANTED(nlm4_testargs)		= 5;
+		void		NLMPROC4_TEST_MSG(nlm4_testargs)	= 6;
+		void		NLMPROC4_LOCK_MSG(nlm4_lockargs)	= 7;
+		void		NLMPROC4_CANCEL_MSG(nlm4_cancargs)	= 8;
+		void		NLMPROC4_UNLOCK_MSG(nlm4_unlockargs)	= 9;
+		void		NLMPROC4_GRANTED_MSG(nlm4_testargs)	= 10;
+		void		NLMPROC4_TEST_RES(nlm4_testres)		= 11;
+		void		NLMPROC4_LOCK_RES(nlm4_res)		= 12;
+		void		NLMPROC4_CANCEL_RES(nlm4_res)		= 13;
+		void		NLMPROC4_UNLOCK_RES(nlm4_res)		= 14;
+		void		NLMPROC4_GRANTED_RES(nlm4_res)		= 15;
+		void		NLMPROC4_SM_NOTIFY(nlm4_notifyargs)	= 16;
+		nlm4_shareres	NLMPROC4_SHARE(nlm4_shareargs)		= 20;
+		nlm4_shareres	NLMPROC4_UNSHARE(nlm4_shareargs)	= 21;
+		nlm4_res	NLMPROC4_NM_LOCK(nlm4_lockargs)		= 22;
+		void		NLMPROC4_FREE_ALL(nlm4_notify)		= 23;
+	} = 4;
+} = 100021;
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile
index 51bbe22d21e3..8e9d18a4348c 100644
--- a/fs/lockd/Makefile
+++ b/fs/lockd/Makefile
@@ -9,5 +9,33 @@ obj-$(CONFIG_LOCKD) += lockd.o
 
 lockd-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
 	   svcshare.o svcproc.o svcsubs.o mon.o trace.o xdr.o netlink.o
-lockd-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
+lockd-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o nlm4xdr_gen.o
 lockd-$(CONFIG_PROC_FS) += procfs.o
+
+#
+# XDR code generation (requires Python and additional packages)
+#
+# The generated *xdr_gen.{h,c} files are checked into git. Normal kernel
+# builds do not require the xdrgen tool or its Python dependencies.
+#
+# Developers modifying .x files in Documentation/sunrpc/xdr/ should run
+# "make xdrgen" to regenerate the affected files.
+#
+.PHONY: xdrgen
+
+XDRGEN			= ../../tools/net/sunrpc/xdrgen/xdrgen
+
+XDRGEN_DEFINITIONS	= ../../include/linux/sunrpc/xdrgen/nlm4.h
+XDRGEN_DECLARATIONS	= nlm4xdr_gen.h
+XDRGEN_SOURCE		= nlm4xdr_gen.c
+
+xdrgen: $(XDRGEN_DEFINITIONS) $(XDRGEN_DECLARATIONS) $(XDRGEN_SOURCE)
+
+../../include/linux/sunrpc/xdrgen/nlm4.h: ../../Documentation/sunrpc/xdr/nlm4.x
+	$(XDRGEN) definitions $< > $@
+
+nlm4xdr_gen.h: ../../Documentation/sunrpc/xdr/nlm4.x
+	$(XDRGEN) declarations $< > $@
+
+nlm4xdr_gen.c: ../../Documentation/sunrpc/xdr/nlm4.x
+	$(XDRGEN) source --peer server $< > $@
diff --git a/fs/lockd/nlm4xdr_gen.c b/fs/lockd/nlm4xdr_gen.c
new file mode 100644
index 000000000000..1c8c221db456
--- /dev/null
+++ b/fs/lockd/nlm4xdr_gen.c
@@ -0,0 +1,724 @@
+// SPDX-License-Identifier: GPL-2.0
+// Generated by xdrgen. Manual edits will be lost.
+// XDR specification file: ../../Documentation/sunrpc/xdr/nlm4.x
+// XDR specification modification time: Thu Dec 25 13:10:19 2025
+
+#include <linux/sunrpc/svc.h>
+
+#include "nlm4xdr_gen.h"
+
+static bool __maybe_unused
+xdrgen_decode_netobj(struct xdr_stream *xdr, netobj *ptr)
+{
+	return xdrgen_decode_opaque(xdr, ptr, MAXNETOBJ_SZ);
+}
+
+static bool __maybe_unused
+xdrgen_decode_fsh4_mode(struct xdr_stream *xdr, fsh4_mode *ptr)
+{
+	u32 val;
+
+	if (xdr_stream_decode_u32(xdr, &val) < 0)
+		return false;
+	*ptr = val;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_fsh4_access(struct xdr_stream *xdr, fsh4_access *ptr)
+{
+	u32 val;
+
+	if (xdr_stream_decode_u32(xdr, &val) < 0)
+		return false;
+	*ptr = val;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_uint64(struct xdr_stream *xdr, uint64 *ptr)
+{
+	return xdrgen_decode_unsigned_hyper(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_int64(struct xdr_stream *xdr, int64 *ptr)
+{
+	return xdrgen_decode_hyper(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_uint32(struct xdr_stream *xdr, uint32 *ptr)
+{
+	return xdrgen_decode_unsigned_long(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_int32(struct xdr_stream *xdr, int32 *ptr)
+{
+	return xdrgen_decode_long(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_stats(struct xdr_stream *xdr, nlm4_stats *ptr)
+{
+	return xdr_stream_decode_be32(xdr, ptr) == 0;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_holder(struct xdr_stream *xdr, struct nlm4_holder *ptr)
+{
+	if (!xdrgen_decode_bool(xdr, &ptr->exclusive))
+		return false;
+	if (!xdrgen_decode_int32(xdr, &ptr->svid))
+		return false;
+	if (!xdrgen_decode_netobj(xdr, &ptr->oh))
+		return false;
+	if (!xdrgen_decode_uint64(xdr, &ptr->l_offset))
+		return false;
+	if (!xdrgen_decode_uint64(xdr, &ptr->l_len))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_testrply(struct xdr_stream *xdr, struct nlm4_testrply *ptr)
+{
+	if (!xdrgen_decode_nlm4_stats(xdr, &ptr->stat))
+		return false;
+	switch (ptr->stat) {
+	case __constant_cpu_to_be32(NLM4_DENIED):
+		if (!xdrgen_decode_nlm4_holder(xdr, &ptr->u.holder))
+			return false;
+		break;
+	default:
+		break;
+	}
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_stat(struct xdr_stream *xdr, struct nlm4_stat *ptr)
+{
+	if (!xdrgen_decode_nlm4_stats(xdr, &ptr->stat))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_res(struct xdr_stream *xdr, struct nlm4_res *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_nlm4_stat(xdr, &ptr->stat))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_testres(struct xdr_stream *xdr, struct nlm4_testres *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_nlm4_testrply(xdr, &ptr->stat))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_lock(struct xdr_stream *xdr, struct nlm4_lock *ptr)
+{
+	if (!xdrgen_decode_string(xdr, (string *)ptr, LM_MAXSTRLEN))
+		return false;
+	if (!xdrgen_decode_netobj(xdr, &ptr->fh))
+		return false;
+	if (!xdrgen_decode_netobj(xdr, &ptr->oh))
+		return false;
+	if (!xdrgen_decode_int32(xdr, &ptr->svid))
+		return false;
+	if (!xdrgen_decode_uint64(xdr, &ptr->l_offset))
+		return false;
+	if (!xdrgen_decode_uint64(xdr, &ptr->l_len))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_lockargs(struct xdr_stream *xdr, struct nlm4_lockargs *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->block))
+		return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->exclusive))
+		return false;
+	if (!xdrgen_decode_nlm4_lock(xdr, &ptr->alock))
+		return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->reclaim))
+		return false;
+	if (!xdrgen_decode_int32(xdr, &ptr->state))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_cancargs(struct xdr_stream *xdr, struct nlm4_cancargs *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->block))
+		return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->exclusive))
+		return false;
+	if (!xdrgen_decode_nlm4_lock(xdr, &ptr->alock))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_testargs(struct xdr_stream *xdr, struct nlm4_testargs *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->exclusive))
+		return false;
+	if (!xdrgen_decode_nlm4_lock(xdr, &ptr->alock))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_unlockargs(struct xdr_stream *xdr, struct nlm4_unlockargs *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_nlm4_lock(xdr, &ptr->alock))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_share(struct xdr_stream *xdr, struct nlm4_share *ptr)
+{
+	if (!xdrgen_decode_string(xdr, (string *)ptr, LM_MAXSTRLEN))
+		return false;
+	if (!xdrgen_decode_netobj(xdr, &ptr->fh))
+		return false;
+	if (!xdrgen_decode_netobj(xdr, &ptr->oh))
+		return false;
+	if (!xdrgen_decode_fsh4_mode(xdr, &ptr->mode))
+		return false;
+	if (!xdrgen_decode_fsh4_access(xdr, &ptr->access))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_shareargs(struct xdr_stream *xdr, struct nlm4_shareargs *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_nlm4_share(xdr, &ptr->share))
+		return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->reclaim))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_shareres(struct xdr_stream *xdr, struct nlm4_shareres *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_nlm4_stats(xdr, &ptr->stat))
+		return false;
+	if (!xdrgen_decode_int32(xdr, &ptr->sequence))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_notify(struct xdr_stream *xdr, struct nlm4_notify *ptr)
+{
+	if (!xdrgen_decode_string(xdr, (string *)ptr, LM_MAXNAMELEN))
+		return false;
+	if (!xdrgen_decode_int32(xdr, &ptr->state))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_notifyargs(struct xdr_stream *xdr, struct nlm4_notifyargs *ptr)
+{
+	if (!xdrgen_decode_nlm4_notify(xdr, &ptr->notify))
+		return false;
+	if (xdr_stream_decode_opaque_fixed(xdr, ptr->private, SM_PRIV_SIZE) < 0)
+		return false;
+	return true;
+}
+
+/**
+ * nlm4_svc_decode_void - Decode a void argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	return xdrgen_decode_void(xdr);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_testargs - Decode a nlm4_testargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_testargs *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_testargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_lockargs - Decode a nlm4_lockargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_lockargs *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_lockargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_cancargs - Decode a nlm4_cancargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_cancargs *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_cancargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_unlockargs - Decode a nlm4_unlockargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_unlockargs *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_unlockargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_testres - Decode a nlm4_testres argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_testres *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_testres(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_res - Decode a nlm4_res argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_res *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_res(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_notifyargs - Decode a nlm4_notifyargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_notifyargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_notifyargs *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_notifyargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_shareargs - Decode a nlm4_shareargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_shareargs *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_shareargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_notify - Decode a nlm4_notify argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_notify *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_notify(xdr, argp);
+}
+
+static bool __maybe_unused
+xdrgen_encode_netobj(struct xdr_stream *xdr, const netobj value)
+{
+	return xdr_stream_encode_opaque(xdr, value.data, value.len) >= 0;
+}
+
+static bool __maybe_unused
+xdrgen_encode_fsh4_mode(struct xdr_stream *xdr, fsh4_mode value)
+{
+	return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_fsh4_access(struct xdr_stream *xdr, fsh4_access value)
+{
+	return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_uint64(struct xdr_stream *xdr, const uint64 value)
+{
+	return xdrgen_encode_unsigned_hyper(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_int64(struct xdr_stream *xdr, const int64 value)
+{
+	return xdrgen_encode_hyper(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_uint32(struct xdr_stream *xdr, const uint32 value)
+{
+	return xdrgen_encode_unsigned_long(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_int32(struct xdr_stream *xdr, const int32 value)
+{
+	return xdrgen_encode_long(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_stats(struct xdr_stream *xdr, nlm4_stats value)
+{
+	return xdr_stream_encode_be32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_holder(struct xdr_stream *xdr, const struct nlm4_holder *value)
+{
+	if (!xdrgen_encode_bool(xdr, value->exclusive))
+		return false;
+	if (!xdrgen_encode_int32(xdr, value->svid))
+		return false;
+	if (!xdrgen_encode_netobj(xdr, value->oh))
+		return false;
+	if (!xdrgen_encode_uint64(xdr, value->l_offset))
+		return false;
+	if (!xdrgen_encode_uint64(xdr, value->l_len))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_testrply(struct xdr_stream *xdr, const struct nlm4_testrply *ptr)
+{
+	if (!xdrgen_encode_nlm4_stats(xdr, ptr->stat))
+		return false;
+	switch (ptr->stat) {
+	case __constant_cpu_to_be32(NLM4_DENIED):
+		if (!xdrgen_encode_nlm4_holder(xdr, &ptr->u.holder))
+			return false;
+		break;
+	default:
+		break;
+	}
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_stat(struct xdr_stream *xdr, const struct nlm4_stat *value)
+{
+	if (!xdrgen_encode_nlm4_stats(xdr, value->stat))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_res(struct xdr_stream *xdr, const struct nlm4_res *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_nlm4_stat(xdr, &value->stat))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_testres(struct xdr_stream *xdr, const struct nlm4_testres *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_nlm4_testrply(xdr, &value->stat))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_lock(struct xdr_stream *xdr, const struct nlm4_lock *value)
+{
+	if (value->caller_name.len > LM_MAXSTRLEN)
+		return false;
+	if (xdr_stream_encode_opaque(xdr, value->caller_name.data, value->caller_name.len) < 0)
+		return false;
+	if (!xdrgen_encode_netobj(xdr, value->fh))
+		return false;
+	if (!xdrgen_encode_netobj(xdr, value->oh))
+		return false;
+	if (!xdrgen_encode_int32(xdr, value->svid))
+		return false;
+	if (!xdrgen_encode_uint64(xdr, value->l_offset))
+		return false;
+	if (!xdrgen_encode_uint64(xdr, value->l_len))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_lockargs(struct xdr_stream *xdr, const struct nlm4_lockargs *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_bool(xdr, value->block))
+		return false;
+	if (!xdrgen_encode_bool(xdr, value->exclusive))
+		return false;
+	if (!xdrgen_encode_nlm4_lock(xdr, &value->alock))
+		return false;
+	if (!xdrgen_encode_bool(xdr, value->reclaim))
+		return false;
+	if (!xdrgen_encode_int32(xdr, value->state))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_cancargs(struct xdr_stream *xdr, const struct nlm4_cancargs *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_bool(xdr, value->block))
+		return false;
+	if (!xdrgen_encode_bool(xdr, value->exclusive))
+		return false;
+	if (!xdrgen_encode_nlm4_lock(xdr, &value->alock))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_testargs(struct xdr_stream *xdr, const struct nlm4_testargs *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_bool(xdr, value->exclusive))
+		return false;
+	if (!xdrgen_encode_nlm4_lock(xdr, &value->alock))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_unlockargs(struct xdr_stream *xdr, const struct nlm4_unlockargs *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_nlm4_lock(xdr, &value->alock))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_share(struct xdr_stream *xdr, const struct nlm4_share *value)
+{
+	if (value->caller_name.len > LM_MAXSTRLEN)
+		return false;
+	if (xdr_stream_encode_opaque(xdr, value->caller_name.data, value->caller_name.len) < 0)
+		return false;
+	if (!xdrgen_encode_netobj(xdr, value->fh))
+		return false;
+	if (!xdrgen_encode_netobj(xdr, value->oh))
+		return false;
+	if (!xdrgen_encode_fsh4_mode(xdr, value->mode))
+		return false;
+	if (!xdrgen_encode_fsh4_access(xdr, value->access))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_shareargs(struct xdr_stream *xdr, const struct nlm4_shareargs *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_nlm4_share(xdr, &value->share))
+		return false;
+	if (!xdrgen_encode_bool(xdr, value->reclaim))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_shareres(struct xdr_stream *xdr, const struct nlm4_shareres *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_nlm4_stats(xdr, value->stat))
+		return false;
+	if (!xdrgen_encode_int32(xdr, value->sequence))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_notify(struct xdr_stream *xdr, const struct nlm4_notify *value)
+{
+	if (value->name.len > LM_MAXNAMELEN)
+		return false;
+	if (xdr_stream_encode_opaque(xdr, value->name.data, value->name.len) < 0)
+		return false;
+	if (!xdrgen_encode_int32(xdr, value->state))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_notifyargs(struct xdr_stream *xdr, const struct nlm4_notifyargs *value)
+{
+	if (!xdrgen_encode_nlm4_notify(xdr, &value->notify))
+		return false;
+	if (xdr_stream_encode_opaque_fixed(xdr, value->private, SM_PRIV_SIZE) < 0)
+		return false;
+	return true;
+}
+
+/**
+ * nlm4_svc_encode_void - Encode a void result
+ * @rqstp: RPC transaction context
+ * @xdr: target XDR data stream
+ *
+ * Return values:
+ *   %true: procedure results encoded successfully
+ *   %false: encode failed
+ */
+bool nlm4_svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	return xdrgen_encode_void(xdr);
+}
+
+/**
+ * nlm4_svc_encode_nlm4_testres - Encode a nlm4_testres result
+ * @rqstp: RPC transaction context
+ * @xdr: target XDR data stream
+ *
+ * Return values:
+ *   %true: procedure results encoded successfully
+ *   %false: encode failed
+ */
+bool nlm4_svc_encode_nlm4_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_testres *resp = rqstp->rq_resp;
+
+	return xdrgen_encode_nlm4_testres(xdr, resp);
+}
+
+/**
+ * nlm4_svc_encode_nlm4_res - Encode a nlm4_res result
+ * @rqstp: RPC transaction context
+ * @xdr: target XDR data stream
+ *
+ * Return values:
+ *   %true: procedure results encoded successfully
+ *   %false: encode failed
+ */
+bool nlm4_svc_encode_nlm4_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_res *resp = rqstp->rq_resp;
+
+	return xdrgen_encode_nlm4_res(xdr, resp);
+}
+
+/**
+ * nlm4_svc_encode_nlm4_shareres - Encode a nlm4_shareres result
+ * @rqstp: RPC transaction context
+ * @xdr: target XDR data stream
+ *
+ * Return values:
+ *   %true: procedure results encoded successfully
+ *   %false: encode failed
+ */
+bool nlm4_svc_encode_nlm4_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_shareres *resp = rqstp->rq_resp;
+
+	return xdrgen_encode_nlm4_shareres(xdr, resp);
+}
diff --git a/fs/lockd/nlm4xdr_gen.h b/fs/lockd/nlm4xdr_gen.h
new file mode 100644
index 000000000000..b6008b296a3e
--- /dev/null
+++ b/fs/lockd/nlm4xdr_gen.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Generated by xdrgen. Manual edits will be lost. */
+/* XDR specification file: ../../Documentation/sunrpc/xdr/nlm4.x */
+/* XDR specification modification time: Thu Dec 25 13:10:19 2025 */
+
+#ifndef _LINUX_XDRGEN_NLM4_DECL_H
+#define _LINUX_XDRGEN_NLM4_DECL_H
+
+#include <linux/types.h>
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/xdrgen/_defs.h>
+#include <linux/sunrpc/xdrgen/_builtins.h>
+#include <linux/sunrpc/xdrgen/nlm4.h>
+
+bool nlm4_svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_notifyargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+bool nlm4_svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_encode_nlm4_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_encode_nlm4_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_encode_nlm4_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+#endif /* _LINUX_XDRGEN_NLM4_DECL_H */
diff --git a/include/linux/sunrpc/xdrgen/nlm4.h b/include/linux/sunrpc/xdrgen/nlm4.h
new file mode 100644
index 000000000000..e95e8f105624
--- /dev/null
+++ b/include/linux/sunrpc/xdrgen/nlm4.h
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Generated by xdrgen. Manual edits will be lost. */
+/* XDR specification file: ../../Documentation/sunrpc/xdr/nlm4.x */
+/* XDR specification modification time: Thu Dec 25 13:10:19 2025 */
+
+#ifndef _LINUX_XDRGEN_NLM4_DEF_H
+#define _LINUX_XDRGEN_NLM4_DEF_H
+
+#include <linux/types.h>
+#include <linux/sunrpc/xdrgen/_defs.h>
+
+enum { LM_MAXSTRLEN = 1024 };
+
+enum { LM_MAXNAMELEN = 1025 };
+
+enum { MAXNETOBJ_SZ = 1024 };
+
+typedef opaque netobj;
+
+enum fsh4_mode {
+	fsm_DN = 0,
+	fsm_DR = 1,
+	fsm_DW = 2,
+	fsm_DRW = 3,
+};
+
+typedef enum fsh4_mode fsh4_mode;
+
+enum fsh4_access {
+	fsa_NONE = 0,
+	fsa_R = 1,
+	fsa_W = 2,
+	fsa_RW = 3,
+};
+
+typedef enum fsh4_access fsh4_access;
+
+enum { SM_MAXSTRLEN = 1024 };
+
+typedef u64 uint64;
+
+typedef s64 int64;
+
+typedef u32 uint32;
+
+typedef s32 int32;
+
+enum nlm4_stats {
+	NLM4_GRANTED = 0,
+	NLM4_DENIED = 1,
+	NLM4_DENIED_NOLOCKS = 2,
+	NLM4_BLOCKED = 3,
+	NLM4_DENIED_GRACE_PERIOD = 4,
+	NLM4_DEADLCK = 5,
+	NLM4_ROFS = 6,
+	NLM4_STALE_FH = 7,
+	NLM4_FBIG = 8,
+	NLM4_FAILED = 9,
+};
+
+typedef __be32 nlm4_stats;
+
+struct nlm4_holder {
+	bool exclusive;
+	int32 svid;
+	netobj oh;
+	uint64 l_offset;
+	uint64 l_len;
+};
+
+struct nlm4_testrply {
+	nlm4_stats stat;
+	union {
+		struct nlm4_holder holder;
+	} u;
+};
+
+struct nlm4_stat {
+	nlm4_stats stat;
+};
+
+struct nlm4_res {
+	netobj cookie;
+	struct nlm4_stat stat;
+};
+
+struct nlm4_testres {
+	netobj cookie;
+	struct nlm4_testrply stat;
+};
+
+struct nlm4_lock {
+	string caller_name;
+	netobj fh;
+	netobj oh;
+	int32 svid;
+	uint64 l_offset;
+	uint64 l_len;
+};
+
+struct nlm4_lockargs {
+	netobj cookie;
+	bool block;
+	bool exclusive;
+	struct nlm4_lock alock;
+	bool reclaim;
+	int32 state;
+};
+
+struct nlm4_cancargs {
+	netobj cookie;
+	bool block;
+	bool exclusive;
+	struct nlm4_lock alock;
+};
+
+struct nlm4_testargs {
+	netobj cookie;
+	bool exclusive;
+	struct nlm4_lock alock;
+};
+
+struct nlm4_unlockargs {
+	netobj cookie;
+	struct nlm4_lock alock;
+};
+
+struct nlm4_share {
+	string caller_name;
+	netobj fh;
+	netobj oh;
+	fsh4_mode mode;
+	fsh4_access access;
+};
+
+struct nlm4_shareargs {
+	netobj cookie;
+	struct nlm4_share share;
+	bool reclaim;
+};
+
+struct nlm4_shareres {
+	netobj cookie;
+	nlm4_stats stat;
+	int32 sequence;
+};
+
+struct nlm4_notify {
+	string name;
+	int32 state;
+};
+
+enum { SM_PRIV_SIZE = 16 };
+
+struct nlm4_notifyargs {
+	struct nlm4_notify notify;
+	u8 private[SM_PRIV_SIZE];
+};
+
+enum {
+	NLMPROC4_NULL = 0,
+	NLMPROC4_TEST = 1,
+	NLMPROC4_LOCK = 2,
+	NLMPROC4_CANCEL = 3,
+	NLMPROC4_UNLOCK = 4,
+	NLMPROC4_GRANTED = 5,
+	NLMPROC4_TEST_MSG = 6,
+	NLMPROC4_LOCK_MSG = 7,
+	NLMPROC4_CANCEL_MSG = 8,
+	NLMPROC4_UNLOCK_MSG = 9,
+	NLMPROC4_GRANTED_MSG = 10,
+	NLMPROC4_TEST_RES = 11,
+	NLMPROC4_LOCK_RES = 12,
+	NLMPROC4_CANCEL_RES = 13,
+	NLMPROC4_UNLOCK_RES = 14,
+	NLMPROC4_GRANTED_RES = 15,
+	NLMPROC4_SM_NOTIFY = 16,
+	NLMPROC4_SHARE = 20,
+	NLMPROC4_UNSHARE = 21,
+	NLMPROC4_NM_LOCK = 22,
+	NLMPROC4_FREE_ALL = 23,
+};
+
+#ifndef NLM4_PROG
+#define NLM4_PROG (100021)
+#endif
+
+#define NLM4_netobj_sz                  (XDR_unsigned_int + XDR_QUADLEN(MAXNETOBJ_SZ))
+#define NLM4_fsh4_mode_sz               (XDR_int)
+#define NLM4_fsh4_access_sz             (XDR_int)
+#define NLM4_uint64_sz                  \
+	(XDR_unsigned_hyper)
+#define NLM4_int64_sz                   \
+	(XDR_hyper)
+#define NLM4_uint32_sz                  \
+	(XDR_unsigned_long)
+#define NLM4_int32_sz                   \
+	(XDR_long)
+#define NLM4_nlm4_stats_sz              (XDR_int)
+#define NLM4_nlm4_holder_sz             \
+	(XDR_bool + NLM4_int32_sz + NLM4_netobj_sz + NLM4_uint64_sz + NLM4_uint64_sz)
+#define NLM4_nlm4_testrply_sz           \
+	(NLM4_nlm4_stats_sz + NLM4_nlm4_holder_sz)
+#define NLM4_nlm4_stat_sz               \
+	(NLM4_nlm4_stats_sz)
+#define NLM4_nlm4_res_sz                \
+	(NLM4_netobj_sz + NLM4_nlm4_stat_sz)
+#define NLM4_nlm4_testres_sz            \
+	(NLM4_netobj_sz + NLM4_nlm4_testrply_sz)
+#define NLM4_nlm4_lock_sz               \
+	(XDR_unsigned_int + XDR_QUADLEN(LM_MAXSTRLEN) + NLM4_netobj_sz + NLM4_netobj_sz + NLM4_int32_sz + NLM4_uint64_sz + NLM4_uint64_sz)
+#define NLM4_nlm4_lockargs_sz           \
+	(NLM4_netobj_sz + XDR_bool + XDR_bool + NLM4_nlm4_lock_sz + XDR_bool + NLM4_int32_sz)
+#define NLM4_nlm4_cancargs_sz           \
+	(NLM4_netobj_sz + XDR_bool + XDR_bool + NLM4_nlm4_lock_sz)
+#define NLM4_nlm4_testargs_sz           \
+	(NLM4_netobj_sz + XDR_bool + NLM4_nlm4_lock_sz)
+#define NLM4_nlm4_unlockargs_sz         \
+	(NLM4_netobj_sz + NLM4_nlm4_lock_sz)
+#define NLM4_nlm4_share_sz              \
+	(XDR_unsigned_int + XDR_QUADLEN(LM_MAXSTRLEN) + NLM4_netobj_sz + NLM4_netobj_sz + NLM4_fsh4_mode_sz + NLM4_fsh4_access_sz)
+#define NLM4_nlm4_shareargs_sz          \
+	(NLM4_netobj_sz + NLM4_nlm4_share_sz + XDR_bool)
+#define NLM4_nlm4_shareres_sz           \
+	(NLM4_netobj_sz + NLM4_nlm4_stats_sz + NLM4_int32_sz)
+#define NLM4_nlm4_notify_sz             \
+	(XDR_unsigned_int + XDR_QUADLEN(LM_MAXNAMELEN) + NLM4_int32_sz)
+#define NLM4_nlm4_notifyargs_sz         \
+	(NLM4_nlm4_notify_sz + XDR_QUADLEN(SM_PRIV_SIZE))
+#define NLM4_MAX_ARGS_SZ                \
+	(NLM4_nlm4_lockargs_sz)
+
+#endif /* _LINUX_XDRGEN_NLM4_DEF_H */
-- 
cgit v1.2.3


From 17c1d66579ff27a7a8f2f407d1425272ff6fdd8c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 23 Feb 2026 12:09:59 -0500
Subject: sunrpc: convert queue_lock from global spinlock to per-cache-detail
 lock

The global queue_lock serializes all upcall queue operations across
every cache_detail instance. Convert it to a per-cache-detail spinlock
so that different caches (e.g. auth.unix.ip vs nfsd.fh) no longer
contend with each other on queue operations.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/cache.h |  1 +
 net/sunrpc/cache.c           | 47 ++++++++++++++++++++++----------------------
 2 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index e783132e481f..3d32dd1f7b05 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -113,6 +113,7 @@ struct cache_detail {
 
 	/* fields for communication over channel */
 	struct list_head	queue;
+	spinlock_t		queue_lock;
 
 	atomic_t		writers;		/* how many time is /channel open */
 	time64_t		last_close;		/* if no writers, when did last close */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 86b3fd5a429d..1cfaae488c6c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -400,6 +400,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
 {
 	spin_lock_init(&cd->hash_lock);
 	INIT_LIST_HEAD(&cd->queue);
+	spin_lock_init(&cd->queue_lock);
 	spin_lock(&cache_list_lock);
 	cd->nextcheck = 0;
 	cd->entries = 0;
@@ -803,8 +804,6 @@ void cache_clean_deferred(void *owner)
  *
  */
 
-static DEFINE_SPINLOCK(queue_lock);
-
 struct cache_queue {
 	struct list_head	list;
 	int			reader;	/* if 0, then request */
@@ -847,7 +846,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 	inode_lock(inode); /* protect against multiple concurrent
 			      * readers on this file */
  again:
-	spin_lock(&queue_lock);
+	spin_lock(&cd->queue_lock);
 	/* need to find next request */
 	while (rp->q.list.next != &cd->queue &&
 	       list_entry(rp->q.list.next, struct cache_queue, list)
@@ -856,7 +855,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 		list_move(&rp->q.list, next);
 	}
 	if (rp->q.list.next == &cd->queue) {
-		spin_unlock(&queue_lock);
+		spin_unlock(&cd->queue_lock);
 		inode_unlock(inode);
 		WARN_ON_ONCE(rp->offset);
 		return 0;
@@ -865,7 +864,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 	WARN_ON_ONCE(rq->q.reader);
 	if (rp->offset == 0)
 		rq->readers++;
-	spin_unlock(&queue_lock);
+	spin_unlock(&cd->queue_lock);
 
 	if (rq->len == 0) {
 		err = cache_request(cd, rq);
@@ -876,9 +875,9 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 
 	if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
 		err = -EAGAIN;
-		spin_lock(&queue_lock);
+		spin_lock(&cd->queue_lock);
 		list_move(&rp->q.list, &rq->q.list);
-		spin_unlock(&queue_lock);
+		spin_unlock(&cd->queue_lock);
 	} else {
 		if (rp->offset + count > rq->len)
 			count = rq->len - rp->offset;
@@ -888,26 +887,26 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 		rp->offset += count;
 		if (rp->offset >= rq->len) {
 			rp->offset = 0;
-			spin_lock(&queue_lock);
+			spin_lock(&cd->queue_lock);
 			list_move(&rp->q.list, &rq->q.list);
-			spin_unlock(&queue_lock);
+			spin_unlock(&cd->queue_lock);
 		}
 		err = 0;
 	}
  out:
 	if (rp->offset == 0) {
 		/* need to release rq */
-		spin_lock(&queue_lock);
+		spin_lock(&cd->queue_lock);
 		rq->readers--;
 		if (rq->readers == 0 &&
 		    !test_bit(CACHE_PENDING, &rq->item->flags)) {
 			list_del(&rq->q.list);
-			spin_unlock(&queue_lock);
+			spin_unlock(&cd->queue_lock);
 			cache_put(rq->item, cd);
 			kfree(rq->buf);
 			kfree(rq);
 		} else
-			spin_unlock(&queue_lock);
+			spin_unlock(&cd->queue_lock);
 	}
 	if (err == -EAGAIN)
 		goto again;
@@ -988,7 +987,7 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
 	if (!rp)
 		return mask;
 
-	spin_lock(&queue_lock);
+	spin_lock(&cd->queue_lock);
 
 	for (cq= &rp->q; &cq->list != &cd->queue;
 	     cq = list_entry(cq->list.next, struct cache_queue, list))
@@ -996,7 +995,7 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
 			mask |= EPOLLIN | EPOLLRDNORM;
 			break;
 		}
-	spin_unlock(&queue_lock);
+	spin_unlock(&cd->queue_lock);
 	return mask;
 }
 
@@ -1011,7 +1010,7 @@ static int cache_ioctl(struct inode *ino, struct file *filp,
 	if (cmd != FIONREAD || !rp)
 		return -EINVAL;
 
-	spin_lock(&queue_lock);
+	spin_lock(&cd->queue_lock);
 
 	/* only find the length remaining in current request,
 	 * or the length of the next request
@@ -1024,7 +1023,7 @@ static int cache_ioctl(struct inode *ino, struct file *filp,
 			len = cr->len - rp->offset;
 			break;
 		}
-	spin_unlock(&queue_lock);
+	spin_unlock(&cd->queue_lock);
 
 	return put_user(len, (int __user *)arg);
 }
@@ -1046,9 +1045,9 @@ static int cache_open(struct inode *inode, struct file *filp,
 		rp->offset = 0;
 		rp->q.reader = 1;
 
-		spin_lock(&queue_lock);
+		spin_lock(&cd->queue_lock);
 		list_add(&rp->q.list, &cd->queue);
-		spin_unlock(&queue_lock);
+		spin_unlock(&cd->queue_lock);
 	}
 	if (filp->f_mode & FMODE_WRITE)
 		atomic_inc(&cd->writers);
@@ -1064,7 +1063,7 @@ static int cache_release(struct inode *inode, struct file *filp,
 	if (rp) {
 		struct cache_request *rq = NULL;
 
-		spin_lock(&queue_lock);
+		spin_lock(&cd->queue_lock);
 		if (rp->offset) {
 			struct cache_queue *cq;
 			for (cq = &rp->q; &cq->list != &cd->queue;
@@ -1086,7 +1085,7 @@ static int cache_release(struct inode *inode, struct file *filp,
 			rp->offset = 0;
 		}
 		list_del(&rp->q.list);
-		spin_unlock(&queue_lock);
+		spin_unlock(&cd->queue_lock);
 
 		if (rq) {
 			cache_put(rq->item, cd);
@@ -1113,7 +1112,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
 	struct cache_request *cr;
 	LIST_HEAD(dequeued);
 
-	spin_lock(&queue_lock);
+	spin_lock(&detail->queue_lock);
 	list_for_each_entry_safe(cq, tmp, &detail->queue, list)
 		if (!cq->reader) {
 			cr = container_of(cq, struct cache_request, q);
@@ -1126,7 +1125,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
 				continue;
 			list_move(&cr->q.list, &dequeued);
 		}
-	spin_unlock(&queue_lock);
+	spin_unlock(&detail->queue_lock);
 	while (!list_empty(&dequeued)) {
 		cr = list_entry(dequeued.next, struct cache_request, q.list);
 		list_del(&cr->q.list);
@@ -1251,7 +1250,7 @@ static int cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 	crq->buf = buf;
 	crq->len = 0;
 	crq->readers = 0;
-	spin_lock(&queue_lock);
+	spin_lock(&detail->queue_lock);
 	if (test_bit(CACHE_PENDING, &h->flags)) {
 		crq->item = cache_get(h);
 		list_add_tail(&crq->q.list, &detail->queue);
@@ -1259,7 +1258,7 @@ static int cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 	} else
 		/* Lost a race, no longer PENDING, so don't enqueue */
 		ret = -EAGAIN;
-	spin_unlock(&queue_lock);
+	spin_unlock(&detail->queue_lock);
 	wake_up(&queue_wait);
 	if (ret == -EAGAIN) {
 		kfree(buf);
-- 
cgit v1.2.3


From 552d0e17ea042fc4f959c4543cbbd0e54de7a8e9 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 23 Feb 2026 12:10:00 -0500
Subject: sunrpc: convert queue_wait from global to per-cache-detail waitqueue

The queue_wait waitqueue is currently a file-scoped global, so a
wake_up for one cache_detail wakes pollers on all caches. Convert it
to a per-cache-detail field so that only pollers on the relevant cache
are woken.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/cache.h | 2 ++
 net/sunrpc/cache.c           | 7 +++----
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 3d32dd1f7b05..031379efba24 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -16,6 +16,7 @@
 #include <linux/atomic.h>
 #include <linux/kstrtox.h>
 #include <linux/proc_fs.h>
+#include <linux/wait.h>
 
 /*
  * Each cache requires:
@@ -114,6 +115,7 @@ struct cache_detail {
 	/* fields for communication over channel */
 	struct list_head	queue;
 	spinlock_t		queue_lock;
+	wait_queue_head_t	queue_wait;
 
 	atomic_t		writers;		/* how many time is /channel open */
 	time64_t		last_close;		/* if no writers, when did last close */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 1cfaae488c6c..fd02dca1f07a 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -401,6 +401,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
 	spin_lock_init(&cd->hash_lock);
 	INIT_LIST_HEAD(&cd->queue);
 	spin_lock_init(&cd->queue_lock);
+	init_waitqueue_head(&cd->queue_wait);
 	spin_lock(&cache_list_lock);
 	cd->nextcheck = 0;
 	cd->entries = 0;
@@ -970,8 +971,6 @@ out:
 	return ret;
 }
 
-static DECLARE_WAIT_QUEUE_HEAD(queue_wait);
-
 static __poll_t cache_poll(struct file *filp, poll_table *wait,
 			       struct cache_detail *cd)
 {
@@ -979,7 +978,7 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
 	struct cache_reader *rp = filp->private_data;
 	struct cache_queue *cq;
 
-	poll_wait(filp, &queue_wait, wait);
+	poll_wait(filp, &cd->queue_wait, wait);
 
 	/* alway allow write */
 	mask = EPOLLOUT | EPOLLWRNORM;
@@ -1259,7 +1258,7 @@ static int cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 		/* Lost a race, no longer PENDING, so don't enqueue */
 		ret = -EAGAIN;
 	spin_unlock(&detail->queue_lock);
-	wake_up(&queue_wait);
+	wake_up(&detail->queue_wait);
 	if (ret == -EAGAIN) {
 		kfree(buf);
 		kfree(crq);
-- 
cgit v1.2.3


From facc4e3c80420e3466003ce09b576e005b56a015 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 23 Feb 2026 12:10:01 -0500
Subject: sunrpc: split cache_detail queue into request and reader lists

Replace the single interleaved queue (which mixed cache_request and
cache_reader entries distinguished by a ->reader flag) with two
dedicated lists: cd->requests for upcall requests and cd->readers
for open file handles.

Readers now track their position via a monotonically increasing
sequence number (next_seqno) rather than by their position in the
shared list. Each cache_request is assigned a seqno when enqueued,
and a new cache_next_request() helper finds the next request at or
after a given seqno.

This eliminates the cache_queue wrapper struct entirely, simplifies
the reader-skipping loops in cache_read/cache_poll/cache_ioctl/
cache_release, and makes the data flow easier to reason about.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/cache.h |   4 +-
 net/sunrpc/cache.c           | 143 ++++++++++++++++++-------------------------
 2 files changed, 62 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 031379efba24..b1e595c2615b 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -113,9 +113,11 @@ struct cache_detail {
 	int			entries;
 
 	/* fields for communication over channel */
-	struct list_head	queue;
+	struct list_head	requests;
+	struct list_head	readers;
 	spinlock_t		queue_lock;
 	wait_queue_head_t	queue_wait;
+	u64			next_seqno;
 
 	atomic_t		writers;		/* how many time is /channel open */
 	time64_t		last_close;		/* if no writers, when did last close */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index fd02dca1f07a..7081c1214e6c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -399,9 +399,11 @@ static struct delayed_work cache_cleaner;
 void sunrpc_init_cache_detail(struct cache_detail *cd)
 {
 	spin_lock_init(&cd->hash_lock);
-	INIT_LIST_HEAD(&cd->queue);
+	INIT_LIST_HEAD(&cd->requests);
+	INIT_LIST_HEAD(&cd->readers);
 	spin_lock_init(&cd->queue_lock);
 	init_waitqueue_head(&cd->queue_wait);
+	cd->next_seqno = 0;
 	spin_lock(&cache_list_lock);
 	cd->nextcheck = 0;
 	cd->entries = 0;
@@ -796,29 +798,20 @@ void cache_clean_deferred(void *owner)
  * On read, you get a full request, or block.
  * On write, an update request is processed.
  * Poll works if anything to read, and always allows write.
- *
- * Implemented by linked list of requests.  Each open file has
- * a ->private that also exists in this list.  New requests are added
- * to the end and may wakeup and preceding readers.
- * New readers are added to the head.  If, on read, an item is found with
- * CACHE_UPCALLING clear, we free it from the list.
- *
  */
 
-struct cache_queue {
-	struct list_head	list;
-	int			reader;	/* if 0, then request */
-};
 struct cache_request {
-	struct cache_queue	q;
+	struct list_head	list;
 	struct cache_head	*item;
-	char			* buf;
+	char			*buf;
 	int			len;
 	int			readers;
+	u64			seqno;
 };
 struct cache_reader {
-	struct cache_queue	q;
+	struct list_head	list;
 	int			offset;	/* if non-0, we have a refcnt on next request */
+	u64			next_seqno;
 };
 
 static int cache_request(struct cache_detail *detail,
@@ -833,6 +826,17 @@ static int cache_request(struct cache_detail *detail,
 	return PAGE_SIZE - len;
 }
 
+static struct cache_request *
+cache_next_request(struct cache_detail *cd, u64 seqno)
+{
+	struct cache_request *rq;
+
+	list_for_each_entry(rq, &cd->requests, list)
+		if (rq->seqno >= seqno)
+			return rq;
+	return NULL;
+}
+
 static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 			  loff_t *ppos, struct cache_detail *cd)
 {
@@ -849,20 +853,13 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
  again:
 	spin_lock(&cd->queue_lock);
 	/* need to find next request */
-	while (rp->q.list.next != &cd->queue &&
-	       list_entry(rp->q.list.next, struct cache_queue, list)
-	       ->reader) {
-		struct list_head *next = rp->q.list.next;
-		list_move(&rp->q.list, next);
-	}
-	if (rp->q.list.next == &cd->queue) {
+	rq = cache_next_request(cd, rp->next_seqno);
+	if (!rq) {
 		spin_unlock(&cd->queue_lock);
 		inode_unlock(inode);
 		WARN_ON_ONCE(rp->offset);
 		return 0;
 	}
-	rq = container_of(rp->q.list.next, struct cache_request, q.list);
-	WARN_ON_ONCE(rq->q.reader);
 	if (rp->offset == 0)
 		rq->readers++;
 	spin_unlock(&cd->queue_lock);
@@ -876,9 +873,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 
 	if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
 		err = -EAGAIN;
-		spin_lock(&cd->queue_lock);
-		list_move(&rp->q.list, &rq->q.list);
-		spin_unlock(&cd->queue_lock);
+		rp->next_seqno = rq->seqno + 1;
 	} else {
 		if (rp->offset + count > rq->len)
 			count = rq->len - rp->offset;
@@ -888,9 +883,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 		rp->offset += count;
 		if (rp->offset >= rq->len) {
 			rp->offset = 0;
-			spin_lock(&cd->queue_lock);
-			list_move(&rp->q.list, &rq->q.list);
-			spin_unlock(&cd->queue_lock);
+			rp->next_seqno = rq->seqno + 1;
 		}
 		err = 0;
 	}
@@ -901,7 +894,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 		rq->readers--;
 		if (rq->readers == 0 &&
 		    !test_bit(CACHE_PENDING, &rq->item->flags)) {
-			list_del(&rq->q.list);
+			list_del(&rq->list);
 			spin_unlock(&cd->queue_lock);
 			cache_put(rq->item, cd);
 			kfree(rq->buf);
@@ -976,7 +969,6 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
 {
 	__poll_t mask;
 	struct cache_reader *rp = filp->private_data;
-	struct cache_queue *cq;
 
 	poll_wait(filp, &cd->queue_wait, wait);
 
@@ -988,12 +980,8 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
 
 	spin_lock(&cd->queue_lock);
 
-	for (cq= &rp->q; &cq->list != &cd->queue;
-	     cq = list_entry(cq->list.next, struct cache_queue, list))
-		if (!cq->reader) {
-			mask |= EPOLLIN | EPOLLRDNORM;
-			break;
-		}
+	if (cache_next_request(cd, rp->next_seqno))
+		mask |= EPOLLIN | EPOLLRDNORM;
 	spin_unlock(&cd->queue_lock);
 	return mask;
 }
@@ -1004,7 +992,7 @@ static int cache_ioctl(struct inode *ino, struct file *filp,
 {
 	int len = 0;
 	struct cache_reader *rp = filp->private_data;
-	struct cache_queue *cq;
+	struct cache_request *rq;
 
 	if (cmd != FIONREAD || !rp)
 		return -EINVAL;
@@ -1014,14 +1002,9 @@ static int cache_ioctl(struct inode *ino, struct file *filp,
 	/* only find the length remaining in current request,
 	 * or the length of the next request
 	 */
-	for (cq= &rp->q; &cq->list != &cd->queue;
-	     cq = list_entry(cq->list.next, struct cache_queue, list))
-		if (!cq->reader) {
-			struct cache_request *cr =
-				container_of(cq, struct cache_request, q);
-			len = cr->len - rp->offset;
-			break;
-		}
+	rq = cache_next_request(cd, rp->next_seqno);
+	if (rq)
+		len = rq->len - rp->offset;
 	spin_unlock(&cd->queue_lock);
 
 	return put_user(len, (int __user *)arg);
@@ -1042,10 +1025,10 @@ static int cache_open(struct inode *inode, struct file *filp,
 			return -ENOMEM;
 		}
 		rp->offset = 0;
-		rp->q.reader = 1;
+		rp->next_seqno = 0;
 
 		spin_lock(&cd->queue_lock);
-		list_add(&rp->q.list, &cd->queue);
+		list_add(&rp->list, &cd->readers);
 		spin_unlock(&cd->queue_lock);
 	}
 	if (filp->f_mode & FMODE_WRITE)
@@ -1064,26 +1047,21 @@ static int cache_release(struct inode *inode, struct file *filp,
 
 		spin_lock(&cd->queue_lock);
 		if (rp->offset) {
-			struct cache_queue *cq;
-			for (cq = &rp->q; &cq->list != &cd->queue;
-			     cq = list_entry(cq->list.next,
-					     struct cache_queue, list))
-				if (!cq->reader) {
-					struct cache_request *cr =
-						container_of(cq,
-						struct cache_request, q);
-					cr->readers--;
-					if (cr->readers == 0 &&
-					    !test_bit(CACHE_PENDING,
-						      &cr->item->flags)) {
-						list_del(&cr->q.list);
-						rq = cr;
-					}
-					break;
+			struct cache_request *cr;
+
+			cr = cache_next_request(cd, rp->next_seqno);
+			if (cr) {
+				cr->readers--;
+				if (cr->readers == 0 &&
+				    !test_bit(CACHE_PENDING,
+					      &cr->item->flags)) {
+					list_del(&cr->list);
+					rq = cr;
 				}
+			}
 			rp->offset = 0;
 		}
-		list_del(&rp->q.list);
+		list_del(&rp->list);
 		spin_unlock(&cd->queue_lock);
 
 		if (rq) {
@@ -1107,27 +1085,24 @@ static int cache_release(struct inode *inode, struct file *filp,
 
 static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
 {
-	struct cache_queue *cq, *tmp;
-	struct cache_request *cr;
+	struct cache_request *cr, *tmp;
 	LIST_HEAD(dequeued);
 
 	spin_lock(&detail->queue_lock);
-	list_for_each_entry_safe(cq, tmp, &detail->queue, list)
-		if (!cq->reader) {
-			cr = container_of(cq, struct cache_request, q);
-			if (cr->item != ch)
-				continue;
-			if (test_bit(CACHE_PENDING, &ch->flags))
-				/* Lost a race and it is pending again */
-				break;
-			if (cr->readers != 0)
-				continue;
-			list_move(&cr->q.list, &dequeued);
-		}
+	list_for_each_entry_safe(cr, tmp, &detail->requests, list) {
+		if (cr->item != ch)
+			continue;
+		if (test_bit(CACHE_PENDING, &ch->flags))
+			/* Lost a race and it is pending again */
+			break;
+		if (cr->readers != 0)
+			continue;
+		list_move(&cr->list, &dequeued);
+	}
 	spin_unlock(&detail->queue_lock);
 	while (!list_empty(&dequeued)) {
-		cr = list_entry(dequeued.next, struct cache_request, q.list);
-		list_del(&cr->q.list);
+		cr = list_entry(dequeued.next, struct cache_request, list);
+		list_del(&cr->list);
 		cache_put(cr->item, detail);
 		kfree(cr->buf);
 		kfree(cr);
@@ -1245,14 +1220,14 @@ static int cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 		return -EAGAIN;
 	}
 
-	crq->q.reader = 0;
 	crq->buf = buf;
 	crq->len = 0;
 	crq->readers = 0;
 	spin_lock(&detail->queue_lock);
 	if (test_bit(CACHE_PENDING, &h->flags)) {
 		crq->item = cache_get(h);
-		list_add_tail(&crq->q.list, &detail->queue);
+		crq->seqno = detail->next_seqno++;
+		list_add_tail(&crq->list, &detail->requests);
 		trace_cache_entry_upcall(detail, h);
 	} else
 		/* Lost a race, no longer PENDING, so don't enqueue */
-- 
cgit v1.2.3


From ee66b9e3e1c69efc986f3932555f07121c3460a7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 26 Feb 2026 09:47:35 -0500
Subject: SUNRPC: Allocate a separate Reply page array

struct svc_rqst uses a single dynamically-allocated page array
(rq_pages) for both the incoming RPC Call message and the outgoing
RPC Reply message. rq_respages is a sliding pointer into rq_pages
that each transport receive path must compute based on how many
pages the Call consumed. This boundary tracking is a source of
confusion and bugs, and prevents an RPC transaction from having
both a large Call and a large Reply simultaneously.

Allocate rq_respages as its own page array, eliminating the boundary
arithmetic. This decouples Call and Reply buffer lifetimes,
following the precedent set by rq_bvec (a separate dynamically-
allocated array for I/O vectors).

Each svc_rqst now pins twice as many pages as before. For a server
running 16 threads with a 1MB maximum payload, the additional cost
is roughly 16MB of pinned memory. The new dynamic svc thread count
facility keeps this overhead minimal on an idle server. A subsequent
patch in this series limits per-request repopulation to only the
pages released during the previous RPC, avoiding a full-array scan
on each call to svc_alloc_arg().

Note: We've considered several alternatives to maintaining a full
second array. Each alternative reintroduces either boundary logic
complexity or I/O-path allocation pressure.

rq_next_page is initialized in svc_alloc_arg() and svc_process()
during Reply construction, and in svc_rdma_recvfrom() as a
precaution on error paths. Transport receive paths no longer compute
it from the Call size.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h              | 47 ++++++++++++++++-----------------
 net/sunrpc/svc.c                        | 29 ++++++++++++++++----
 net/sunrpc/svc_xprt.c                   | 36 ++++++++++++++++++-------
 net/sunrpc/svcsock.c                    |  6 -----
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 15 +++--------
 5 files changed, 77 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 62152e4f3bcc..3b1a98ab5cba 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -134,25 +134,24 @@ enum {
 extern u32 svc_max_payload(const struct svc_rqst *rqstp);
 
 /*
- * RPC Requests and replies are stored in one or more pages.
- * We maintain an array of pages for each server thread.
- * Requests are copied into these pages as they arrive.  Remaining
- * pages are available to write the reply into.
+ * RPC Call and Reply messages each have their own page array.
+ * rq_pages holds the incoming Call message; rq_respages holds
+ * the outgoing Reply message. Both arrays are sized to
+ * svc_serv_maxpages() entries and are allocated dynamically.
  *
- * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each server thread
- * needs to allocate more to replace those used in sending.  To help keep track
- * of these pages we have a receive list where all pages initialy live, and a
- * send list where pages are moved to when there are to be part of a reply.
+ * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each
+ * server thread needs to allocate more to replace those used in
+ * sending.
  *
- * We use xdr_buf for holding responses as it fits well with NFS
- * read responses (that have a header, and some data pages, and possibly
- * a tail) and means we can share some client side routines.
+ * xdr_buf holds responses; the structure fits NFS read responses
+ * (header, data pages, optional tail) and enables sharing of
+ * client-side routines.
  *
- * The xdr_buf.head kvec always points to the first page in the rq_*pages
- * list.  The xdr_buf.pages pointer points to the second page on that
- * list.  xdr_buf.tail points to the end of the first page.
- * This assumes that the non-page part of an rpc reply will fit
- * in a page - NFSd ensures this.  lockd also has no trouble.
+ * The xdr_buf.head kvec always points to the first page in the
+ * rq_*pages list. The xdr_buf.pages pointer points to the second
+ * page on that list. xdr_buf.tail points to the end of the first
+ * page. This assumes that the non-page part of an rpc reply will
+ * fit in a page - NFSd ensures this. lockd also has no trouble.
  */
 
 /**
@@ -162,10 +161,10 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
  * Returns a count of pages or vectors that can hold the maximum
  * size RPC message for @serv.
  *
- * Each request/reply pair can have at most one "payload", plus two
- * pages, one for the request, and one for the reply.
- * nfsd_splice_actor() might need an extra page when a READ payload
- * is not page-aligned.
+ * Each page array can hold at most one payload plus two
+ * overhead pages (one for the RPC header, one for tail data).
+ * nfsd_splice_actor() might need an extra page when a READ
+ * payload is not page-aligned.
  */
 static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv)
 {
@@ -204,11 +203,11 @@ struct svc_rqst {
 	struct xdr_stream	rq_res_stream;
 	struct folio		*rq_scratch_folio;
 	struct xdr_buf		rq_res;
-	unsigned long		rq_maxpages;	/* num of entries in rq_pages */
-	struct page *		*rq_pages;
-	struct page *		*rq_respages;	/* points into rq_pages */
+	unsigned long		rq_maxpages;	/* entries per page array */
+	struct page *		*rq_pages;	/* Call buffer pages */
+	struct page *		*rq_respages;	/* Reply buffer pages */
 	struct page *		*rq_next_page; /* next reply page to use */
-	struct page *		*rq_page_end;  /* one past the last page */
+	struct page *		*rq_page_end;  /* one past the last reply page */
 
 	struct folio_batch	rq_fbatch;
 	struct bio_vec		*rq_bvec;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index f7ec02457328..9abef638b1e0 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -638,13 +638,23 @@ svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node)
 {
 	rqstp->rq_maxpages = svc_serv_maxpages(serv);
 
-	/* rq_pages' last entry is NULL for historical reasons. */
+	/* +1 for a NULL sentinel readable by nfsd_splice_actor() */
 	rqstp->rq_pages = kcalloc_node(rqstp->rq_maxpages + 1,
 				       sizeof(struct page *),
 				       GFP_KERNEL, node);
 	if (!rqstp->rq_pages)
 		return false;
 
+	/* +1 for a NULL sentinel at rq_page_end (see svc_rqst_replace_page) */
+	rqstp->rq_respages = kcalloc_node(rqstp->rq_maxpages + 1,
+					  sizeof(struct page *),
+					  GFP_KERNEL, node);
+	if (!rqstp->rq_respages) {
+		kfree(rqstp->rq_pages);
+		rqstp->rq_pages = NULL;
+		return false;
+	}
+
 	return true;
 }
 
@@ -656,10 +666,19 @@ svc_release_buffer(struct svc_rqst *rqstp)
 {
 	unsigned long i;
 
-	for (i = 0; i < rqstp->rq_maxpages; i++)
-		if (rqstp->rq_pages[i])
-			put_page(rqstp->rq_pages[i]);
-	kfree(rqstp->rq_pages);
+	if (rqstp->rq_pages) {
+		for (i = 0; i < rqstp->rq_maxpages; i++)
+			if (rqstp->rq_pages[i])
+				put_page(rqstp->rq_pages[i]);
+		kfree(rqstp->rq_pages);
+	}
+
+	if (rqstp->rq_respages) {
+		for (i = 0; i < rqstp->rq_maxpages; i++)
+			if (rqstp->rq_respages[i])
+				put_page(rqstp->rq_respages[i]);
+		kfree(rqstp->rq_respages);
+	}
 }
 
 static void
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 56a663b8939f..e027765f4307 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -650,14 +650,13 @@ static void svc_check_conn_limits(struct svc_serv *serv)
 	}
 }
 
-static bool svc_alloc_arg(struct svc_rqst *rqstp)
+static bool svc_fill_pages(struct svc_rqst *rqstp, struct page **pages,
+			   unsigned long npages)
 {
-	struct xdr_buf *arg = &rqstp->rq_arg;
-	unsigned long pages, filled, ret;
+	unsigned long filled, ret;
 
-	pages = rqstp->rq_maxpages;
-	for (filled = 0; filled < pages; filled = ret) {
-		ret = alloc_pages_bulk(GFP_KERNEL, pages, rqstp->rq_pages);
+	for (filled = 0; filled < npages; filled = ret) {
+		ret = alloc_pages_bulk(GFP_KERNEL, npages, pages);
 		if (ret > filled)
 			/* Made progress, don't sleep yet */
 			continue;
@@ -667,11 +666,29 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp)
 			set_current_state(TASK_RUNNING);
 			return false;
 		}
-		trace_svc_alloc_arg_err(pages, ret);
+		trace_svc_alloc_arg_err(npages, ret);
 		memalloc_retry_wait(GFP_KERNEL);
 	}
-	rqstp->rq_page_end = &rqstp->rq_pages[pages];
-	rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */
+	return true;
+}
+
+static bool svc_alloc_arg(struct svc_rqst *rqstp)
+{
+	struct xdr_buf *arg = &rqstp->rq_arg;
+	unsigned long pages;
+
+	pages = rqstp->rq_maxpages;
+
+	if (!svc_fill_pages(rqstp, rqstp->rq_pages, pages))
+		return false;
+	if (!svc_fill_pages(rqstp, rqstp->rq_respages, pages))
+		return false;
+	rqstp->rq_next_page = rqstp->rq_respages;
+	rqstp->rq_page_end = &rqstp->rq_respages[pages];
+	/* svc_rqst_replace_page() dereferences *rq_next_page even
+	 * at rq_page_end; NULL prevents releasing a garbage page.
+	 */
+	rqstp->rq_page_end[0] = NULL;
 
 	/* Make arg->head point to first page and arg->pages point to rest */
 	arg->head[0].iov_base = page_address(rqstp->rq_pages[0]);
@@ -1277,7 +1294,6 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
 	rqstp->rq_addrlen     = dr->addrlen;
 	/* Save off transport header len in case we get deferred again */
 	rqstp->rq_daddr       = dr->daddr;
-	rqstp->rq_respages    = rqstp->rq_pages;
 	rqstp->rq_xprt_ctxt   = dr->xprt_ctxt;
 
 	dr->xprt_ctxt = NULL;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f28c6076f7e8..c86f28f720f7 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -351,8 +351,6 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
 
 	for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE)
 		bvec_set_page(&bvec[i], rqstp->rq_pages[i], PAGE_SIZE, 0);
-	rqstp->rq_respages = &rqstp->rq_pages[i];
-	rqstp->rq_next_page = rqstp->rq_respages + 1;
 
 	iov_iter_bvec(&msg.msg_iter, ITER_DEST, bvec, i, buflen);
 	if (seek) {
@@ -677,13 +675,9 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 	if (len <= rqstp->rq_arg.head[0].iov_len) {
 		rqstp->rq_arg.head[0].iov_len = len;
 		rqstp->rq_arg.page_len = 0;
-		rqstp->rq_respages = rqstp->rq_pages+1;
 	} else {
 		rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
-		rqstp->rq_respages = rqstp->rq_pages + 1 +
-			DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);
 	}
-	rqstp->rq_next_page = rqstp->rq_respages+1;
 
 	if (serv->sv_stats)
 		serv->sv_stats->netudpcnt++;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index e7e4a39ca6c6..3081a37a5896 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -861,18 +861,12 @@ static noinline void svc_rdma_read_complete(struct svc_rqst *rqstp,
 	unsigned int i;
 
 	/* Transfer the Read chunk pages into @rqstp.rq_pages, replacing
-	 * the rq_pages that were already allocated for this rqstp.
+	 * the receive buffer pages already allocated for this rqstp.
 	 */
-	release_pages(rqstp->rq_respages, ctxt->rc_page_count);
+	release_pages(rqstp->rq_pages, ctxt->rc_page_count);
 	for (i = 0; i < ctxt->rc_page_count; i++)
 		rqstp->rq_pages[i] = ctxt->rc_pages[i];
 
-	/* Update @rqstp's result send buffer to start after the
-	 * last page in the RDMA Read payload.
-	 */
-	rqstp->rq_respages = &rqstp->rq_pages[ctxt->rc_page_count];
-	rqstp->rq_next_page = rqstp->rq_respages + 1;
-
 	/* Prevent svc_rdma_recv_ctxt_put() from releasing the
 	 * pages in ctxt::rc_pages a second time.
 	 */
@@ -931,10 +925,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	struct svc_rdma_recv_ctxt *ctxt;
 	int ret;
 
-	/* Prevent svc_xprt_release() from releasing pages in rq_pages
-	 * when returning 0 or an error.
+	/* Precaution: a zero page count on error return causes
+	 * svc_rqst_release_pages() to release nothing.
 	 */
-	rqstp->rq_respages = rqstp->rq_pages;
 	rqstp->rq_next_page = rqstp->rq_respages;
 
 	rqstp->rq_xprt_ctxt = NULL;
-- 
cgit v1.2.3


From 7ed7504287a627834f2a35ef04e5dfd26d1c8986 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 26 Feb 2026 09:47:38 -0500
Subject: SUNRPC: Track consumed rq_pages entries

The rq_pages array holds pages allocated for incoming RPC requests.
Two transport receive paths NULL entries in rq_pages to prevent
svc_rqst_release_pages() from freeing pages that the transport has
taken ownership of:

- svc_tcp_save_pages() moves partial request data pages to
  svsk->sk_pages during multi-fragment TCP reassembly.

- svc_rdma_clear_rqst_pages() moves request data pages to
  head->rc_pages because they are targets of active RDMA Read WRs.

A new rq_pages_nfree field in struct svc_rqst records how many
entries were NULLed. svc_alloc_arg() uses it to refill only those
entries rather than scanning the full rq_pages array. In steady
state, the transport NULLs a handful of entries per RPC, so the
allocator visits only those entries instead of the full ~259 slots
(for 1MB messages).

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h        | 10 ++++++++++
 net/sunrpc/svc.c                  |  1 +
 net/sunrpc/svc_xprt.c             | 11 ++++++++---
 net/sunrpc/svcsock.c              |  1 +
 net/sunrpc/xprtrdma/svc_rdma_rw.c |  1 +
 5 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 3b1a98ab5cba..c3399cf64524 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -143,6 +143,15 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
  * server thread needs to allocate more to replace those used in
  * sending.
  *
+ * rq_pages request page contract:
+ *
+ * Transport receive paths that move request data pages out of
+ * rq_pages -- TCP multi-fragment reassembly (svc_tcp_save_pages)
+ * and RDMA Read I/O (svc_rdma_clear_rqst_pages) -- NULL those
+ * entries to prevent svc_rqst_release_pages() from freeing pages
+ * still in transport use, and set rq_pages_nfree to the count.
+ * svc_alloc_arg() refills only that many rq_pages entries.
+ *
  * xdr_buf holds responses; the structure fits NFS read responses
  * (header, data pages, optional tail) and enables sharing of
  * client-side routines.
@@ -204,6 +213,7 @@ struct svc_rqst {
 	struct folio		*rq_scratch_folio;
 	struct xdr_buf		rq_res;
 	unsigned long		rq_maxpages;	/* entries per page array */
+	unsigned long		rq_pages_nfree;	/* rq_pages entries NULLed by transport */
 	struct page *		*rq_pages;	/* Call buffer pages */
 	struct page *		*rq_respages;	/* Reply buffer pages */
 	struct page *		*rq_next_page; /* next reply page to use */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 0ce16e9abdf6..6e57e35fa6d6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -655,6 +655,7 @@ svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node)
 		return false;
 	}
 
+	rqstp->rq_pages_nfree = rqstp->rq_maxpages;
 	return true;
 }
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e027765f4307..795b5729525f 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -675,12 +675,17 @@ static bool svc_fill_pages(struct svc_rqst *rqstp, struct page **pages,
 static bool svc_alloc_arg(struct svc_rqst *rqstp)
 {
 	struct xdr_buf *arg = &rqstp->rq_arg;
-	unsigned long pages;
+	unsigned long pages, nfree;
 
 	pages = rqstp->rq_maxpages;
 
-	if (!svc_fill_pages(rqstp, rqstp->rq_pages, pages))
-		return false;
+	nfree = rqstp->rq_pages_nfree;
+	if (nfree) {
+		if (!svc_fill_pages(rqstp, rqstp->rq_pages, nfree))
+			return false;
+		rqstp->rq_pages_nfree = 0;
+	}
+
 	if (!svc_fill_pages(rqstp, rqstp->rq_respages, pages))
 		return false;
 	rqstp->rq_next_page = rqstp->rq_respages;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c86f28f720f7..2ce43f9995f1 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1009,6 +1009,7 @@ static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
 		svsk->sk_pages[i] = rqstp->rq_pages[i];
 		rqstp->rq_pages[i] = NULL;
 	}
+	rqstp->rq_pages_nfree = npages;
 }
 
 static void svc_tcp_clear_pages(struct svc_sock *svsk)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 4ec2f9ae06aa..cf4a1762b629 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -1107,6 +1107,7 @@ static void svc_rdma_clear_rqst_pages(struct svc_rqst *rqstp,
 		head->rc_pages[i] = rqstp->rq_pages[i];
 		rqstp->rq_pages[i] = NULL;
 	}
+	rqstp->rq_pages_nfree = head->rc_page_count;
 }
 
 /**
-- 
cgit v1.2.3


From d7f3efd9ff474867b04e1ea784690f02450a245b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 26 Feb 2026 09:47:39 -0500
Subject: SUNRPC: Optimize rq_respages allocation in svc_alloc_arg

svc_alloc_arg() invokes alloc_pages_bulk() with the full rq_maxpages
count (~259 for 1MB messages) for the rq_respages array, causing a
full-array scan despite most slots holding valid pages.

svc_rqst_release_pages() NULLs only the range

  [rq_respages, rq_next_page)

after each RPC, so only that range contains NULL entries. Limit the
rq_respages fill in svc_alloc_arg() to that range instead of
scanning the full array.

svc_init_buffer() initializes rq_next_page to span the entire
rq_respages array, so the first svc_alloc_arg() call fills all
slots.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 4 ++++
 net/sunrpc/svc.c           | 1 +
 net/sunrpc/svc_xprt.c      | 8 +++++++-
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index c3399cf64524..669c944eaf7f 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -152,6 +152,10 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
  * still in transport use, and set rq_pages_nfree to the count.
  * svc_alloc_arg() refills only that many rq_pages entries.
  *
+ * For rq_respages, svc_rqst_release_pages() NULLs entries in
+ * [rq_respages, rq_next_page) after each RPC. svc_alloc_arg()
+ * refills only that range.
+ *
  * xdr_buf holds responses; the structure fits NFS read responses
  * (header, data pages, optional tail) and enables sharing of
  * client-side routines.
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6e57e35fa6d6..5e0b5ec2fd52 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -656,6 +656,7 @@ svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node)
 	}
 
 	rqstp->rq_pages_nfree = rqstp->rq_maxpages;
+	rqstp->rq_next_page = rqstp->rq_respages + rqstp->rq_maxpages;
 	return true;
 }
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 795b5729525f..b16e710926c1 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -686,8 +686,14 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp)
 		rqstp->rq_pages_nfree = 0;
 	}
 
-	if (!svc_fill_pages(rqstp, rqstp->rq_respages, pages))
+	if (WARN_ON_ONCE(rqstp->rq_next_page < rqstp->rq_respages))
 		return false;
+	nfree = rqstp->rq_next_page - rqstp->rq_respages;
+	if (nfree) {
+		if (!svc_fill_pages(rqstp, rqstp->rq_respages, nfree))
+			return false;
+	}
+
 	rqstp->rq_next_page = rqstp->rq_respages;
 	rqstp->rq_page_end = &rqstp->rq_respages[pages];
 	/* svc_rqst_replace_page() dereferences *rq_next_page even
-- 
cgit v1.2.3


From ccc89b9d1ed233349cfe8d87b842e7351b74d8de Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 27 Feb 2026 09:03:28 -0500
Subject: svcrdma: Add fair queuing for Send Queue access

When the Send Queue fills, multiple threads may wait for SQ slots.
The previous implementation had no ordering guarantee, allowing
starvation when one thread repeatedly acquires slots while others
wait indefinitely.

Introduce a ticket-based fair queuing system. Each waiter takes a
ticket number and is served in FIFO order. This ensures forward
progress for all waiters when SQ capacity is constrained.

The implementation has two phases:
1. Fast path: attempt to reserve SQ slots without waiting
2. Slow path: take a ticket, wait for turn, then wait for slots

The ticket system adds two atomic counters to the transport:
- sc_sq_ticket_head: next ticket to issue
- sc_sq_ticket_tail: ticket currently being served

A dedicated wait queue (sc_sq_ticket_wait) handles ticket
ordering, separate from sc_send_wait which handles SQ capacity.
This separation ensures that send completions (the high-frequency
wake source) wake only the current ticket holder rather than all
queued waiters. Ticket handoff wakes only the ticket wait queue,
and each ticket holder that exits via connection close propagates
the wake to the next waiter in line.

When a waiter successfully reserves slots, it advances the tail
counter and wakes the next waiter. This creates an orderly handoff
that prevents starvation while maintaining good throughput on the
fast path when contention is low.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h          |  10 ++
 net/sunrpc/xprtrdma/svc_rdma_rw.c        |  37 ++-----
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 162 +++++++++++++++++++++++--------
 net/sunrpc/xprtrdma/svc_rdma_transport.c |   6 +-
 4 files changed, 146 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 57f4fd94166a..658b8498177e 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -84,6 +84,9 @@ struct svcxprt_rdma {
 
 	atomic_t             sc_sq_avail;	/* SQEs ready to be consumed */
 	unsigned int	     sc_sq_depth;	/* Depth of SQ */
+	atomic_t	     sc_sq_ticket_head;	/* Next ticket to issue */
+	atomic_t	     sc_sq_ticket_tail;	/* Ticket currently serving */
+	wait_queue_head_t    sc_sq_ticket_wait;	/* Ticket ordering waitlist */
 	__be32		     sc_fc_credits;	/* Forward credits */
 	u32		     sc_max_requests;	/* Max requests */
 	u32		     sc_max_bc_requests;/* Backward credits */
@@ -306,6 +309,13 @@ extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
 				    struct svc_rdma_recv_ctxt *rctxt,
 				    int status);
 extern void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail);
+extern int svc_rdma_sq_wait(struct svcxprt_rdma *rdma,
+			    const struct rpc_rdma_cid *cid, int sqecount);
+extern int svc_rdma_post_send_err(struct svcxprt_rdma *rdma,
+				  const struct rpc_rdma_cid *cid,
+				  const struct ib_send_wr *bad_wr,
+				  const struct ib_send_wr *first_wr,
+				  int sqecount, int ret);
 extern int svc_rdma_sendto(struct svc_rqst *);
 extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
 				   unsigned int length);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index cf4a1762b629..97bce806974b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -405,34 +405,17 @@ static int svc_rdma_post_chunk_ctxt(struct svcxprt_rdma *rdma,
 		cqe = NULL;
 	}
 
-	do {
-		if (atomic_sub_return(cc->cc_sqecount,
-				      &rdma->sc_sq_avail) > 0) {
-			cc->cc_posttime = ktime_get();
-			ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
-			if (ret)
-				break;
-			return 0;
-		}
-
-		percpu_counter_inc(&svcrdma_stat_sq_starve);
-		trace_svcrdma_sq_full(rdma, &cc->cc_cid);
-		atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
-		wait_event(rdma->sc_send_wait,
-			   atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount);
-		trace_svcrdma_sq_retry(rdma, &cc->cc_cid);
-	} while (1);
-
-	trace_svcrdma_sq_post_err(rdma, &cc->cc_cid, ret);
-	svc_xprt_deferred_close(&rdma->sc_xprt);
-
-	/* If even one was posted, there will be a completion. */
-	if (bad_wr != first_wr)
-		return 0;
+	ret = svc_rdma_sq_wait(rdma, &cc->cc_cid, cc->cc_sqecount);
+	if (ret < 0)
+		return ret;
 
-	atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
-	wake_up(&rdma->sc_send_wait);
-	return -ENOTCONN;
+	cc->cc_posttime = ktime_get();
+	ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+	if (ret)
+		return svc_rdma_post_send_err(rdma, &cc->cc_cid, bad_wr,
+					      first_wr, cc->cc_sqecount,
+					      ret);
+	return 0;
 }
 
 /* Build a bvec that covers one kvec in an xdr_buf.
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 17c8429da9d5..02559947272a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -294,6 +294,117 @@ void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail)
 		wake_up(&rdma->sc_send_wait);
 }
 
+/**
+ * svc_rdma_sq_wait - Wait for SQ slots using fair queuing
+ * @rdma: controlling transport
+ * @cid: completion ID for tracing
+ * @sqecount: number of SQ entries needed
+ *
+ * A ticket-based system ensures fair ordering when multiple threads
+ * wait for Send Queue capacity. Each waiter takes a ticket and is
+ * served in order, preventing starvation.
+ *
+ * Protocol invariant: every ticket holder must increment
+ * sc_sq_ticket_tail exactly once, whether the reservation
+ * succeeds or the connection closes. Failing to advance the
+ * tail stalls all subsequent waiters.
+ *
+ * The ticket counters are signed 32-bit atomics. After
+ * wrapping through INT_MAX, the equality check
+ * (tail == ticket) remains correct because both counters
+ * advance monotonically and the comparison uses exact
+ * equality rather than relational operators.
+ *
+ * Return values:
+ *   %0: SQ slots were reserved successfully
+ *   %-ENOTCONN: The connection was lost
+ */
+int svc_rdma_sq_wait(struct svcxprt_rdma *rdma,
+		     const struct rpc_rdma_cid *cid, int sqecount)
+{
+	int ticket;
+
+	/* Fast path: try to reserve SQ slots without waiting.
+	 *
+	 * A failed reservation temporarily understates sc_sq_avail
+	 * until the compensating atomic_add restores it. A Send
+	 * completion arriving in that window sees a lower count
+	 * than reality, but the value self-corrects once the add
+	 * completes. No ordering guarantee is needed here because
+	 * the slow path serializes all contended waiters.
+	 */
+	if (likely(atomic_sub_return(sqecount, &rdma->sc_sq_avail) >= 0))
+		return 0;
+	atomic_add(sqecount, &rdma->sc_sq_avail);
+
+	/* Slow path: take a ticket and wait in line */
+	ticket = atomic_fetch_inc(&rdma->sc_sq_ticket_head);
+
+	percpu_counter_inc(&svcrdma_stat_sq_starve);
+	trace_svcrdma_sq_full(rdma, cid);
+
+	/* Wait until all earlier tickets have been served */
+	wait_event(rdma->sc_sq_ticket_wait,
+		   test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) ||
+		   atomic_read(&rdma->sc_sq_ticket_tail) == ticket);
+	if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
+		goto out_close;
+
+	/* It's our turn. Wait for enough SQ slots to be available. */
+	while (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) {
+		atomic_add(sqecount, &rdma->sc_sq_avail);
+
+		wait_event(rdma->sc_send_wait,
+			   test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) ||
+			   atomic_read(&rdma->sc_sq_avail) >= sqecount);
+		if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
+			goto out_close;
+	}
+
+	/* Slots reserved successfully. Let the next waiter proceed. */
+	atomic_inc(&rdma->sc_sq_ticket_tail);
+	wake_up(&rdma->sc_sq_ticket_wait);
+	trace_svcrdma_sq_retry(rdma, cid);
+	return 0;
+
+out_close:
+	atomic_inc(&rdma->sc_sq_ticket_tail);
+	wake_up(&rdma->sc_sq_ticket_wait);
+	return -ENOTCONN;
+}
+
+/**
+ * svc_rdma_post_send_err - Handle ib_post_send failure
+ * @rdma: controlling transport
+ * @cid: completion ID for tracing
+ * @bad_wr: first WR that was not posted
+ * @first_wr: first WR in the chain
+ * @sqecount: number of SQ entries that were reserved
+ * @ret: error code from ib_post_send
+ *
+ * Return values:
+ *   %0: At least one WR was posted; a completion handles cleanup
+ *   %-ENOTCONN: No WRs were posted; SQ slots are released
+ */
+int svc_rdma_post_send_err(struct svcxprt_rdma *rdma,
+			   const struct rpc_rdma_cid *cid,
+			   const struct ib_send_wr *bad_wr,
+			   const struct ib_send_wr *first_wr,
+			   int sqecount, int ret)
+{
+	trace_svcrdma_sq_post_err(rdma, cid, ret);
+	svc_xprt_deferred_close(&rdma->sc_xprt);
+
+	/* If even one WR was posted, a Send completion will
+	 * return the reserved SQ slots.
+	 */
+	if (bad_wr != first_wr)
+		return 0;
+
+	svc_rdma_wake_send_waiters(rdma, sqecount);
+	return -ENOTCONN;
+}
+
 /**
  * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
  * @cq: Completion Queue context
@@ -336,11 +447,6 @@ flushed:
  * that these values remain available after the ib_post_send() call.
  * In some error flow cases, svc_rdma_wc_send() releases @ctxt.
  *
- * Note there is potential for starvation when the Send Queue is
- * full because there is no order to when waiting threads are
- * awoken. The transport is typically provisioned with a deep
- * enough Send Queue that SQ exhaustion should be a rare event.
- *
  * Return values:
  *   %0: @ctxt's WR chain was posted successfully
  *   %-ENOTCONN: The connection was lost
@@ -362,42 +468,16 @@ int svc_rdma_post_send(struct svcxprt_rdma *rdma,
 				      send_wr->sg_list[0].length,
 				      DMA_TO_DEVICE);
 
-	/* If the SQ is full, wait until an SQ entry is available */
-	while (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) {
-		if (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) {
-			svc_rdma_wake_send_waiters(rdma, sqecount);
-
-			/* When the transport is torn down, assume
-			 * ib_drain_sq() will trigger enough Send
-			 * completions to wake us. The XPT_CLOSE test
-			 * above should then cause the while loop to
-			 * exit.
-			 */
-			percpu_counter_inc(&svcrdma_stat_sq_starve);
-			trace_svcrdma_sq_full(rdma, &cid);
-			wait_event(rdma->sc_send_wait,
-				   atomic_read(&rdma->sc_sq_avail) > 0);
-			trace_svcrdma_sq_retry(rdma, &cid);
-			continue;
-		}
-
-		trace_svcrdma_post_send(ctxt);
-		ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
-		if (ret) {
-			trace_svcrdma_sq_post_err(rdma, &cid, ret);
-			svc_xprt_deferred_close(&rdma->sc_xprt);
-
-			/* If even one WR was posted, there will be a
-			 * Send completion that bumps sc_sq_avail.
-			 */
-			if (bad_wr == first_wr) {
-				svc_rdma_wake_send_waiters(rdma, sqecount);
-				break;
-			}
-		}
-		return 0;
-	}
-	return -ENOTCONN;
+	ret = svc_rdma_sq_wait(rdma, &cid, sqecount);
+	if (ret < 0)
+		return ret;
+
+	trace_svcrdma_post_send(ctxt);
+	ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+	if (ret)
+		return svc_rdma_post_send_err(rdma, &cid, bad_wr,
+					      first_wr, sqecount, ret);
+	return 0;
 }
 
 /**
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index f2d72181a6fe..f18bc60d9f4f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -179,6 +179,7 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
 	init_llist_head(&cma_xprt->sc_recv_ctxts);
 	init_llist_head(&cma_xprt->sc_rw_ctxts);
 	init_waitqueue_head(&cma_xprt->sc_send_wait);
+	init_waitqueue_head(&cma_xprt->sc_sq_ticket_wait);
 
 	spin_lock_init(&cma_xprt->sc_lock);
 	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
@@ -477,6 +478,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
 		newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
 	atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
+	atomic_set(&newxprt->sc_sq_ticket_head, 0);
+	atomic_set(&newxprt->sc_sq_ticket_tail, 0);
 
 	newxprt->sc_pd = ib_alloc_pd(dev, 0);
 	if (IS_ERR(newxprt->sc_pd)) {
@@ -649,7 +652,8 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
 	 * If there are already waiters on the SQ,
 	 * return false.
 	 */
-	if (waitqueue_active(&rdma->sc_send_wait))
+	if (waitqueue_active(&rdma->sc_send_wait) ||
+	    waitqueue_active(&rdma->sc_sq_ticket_wait))
 		return 0;
 
 	/* Otherwise return true. */
-- 
cgit v1.2.3


From d16f060f3ee297424c0aba047b1d49208adb9318 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 27 Feb 2026 09:03:31 -0500
Subject: svcrdma: Add Write chunk WRs to the RPC's Send WR chain

Previously, Write chunk RDMA Writes were posted via a separate
ib_post_send() call with their own completion handler. Each Write
chunk incurred a doorbell and generated a completion event.

Link Write chunk WRs onto the RPC Reply's Send WR chain so that a
single ib_post_send() call posts both the RDMA Writes and the Send
WR. A single completion event signals that all operations have
finished. This reduces both doorbell rate and completion rate, as
well as eliminating the latency of a round-trip between the Write
chunk completion and the subsequent Send WR posting.

The lifecycle of Write chunk resources changes: previously, the
svc_rdma_write_done() completion handler released Write chunk
resources when RDMA Writes completed. With WR chaining, resources
remain live until the Send completion. A new sc_write_info_list
tracks Write chunk metadata attached to each Send context, and
svc_rdma_write_chunk_release() frees these resources when the
Send context is released.

The svc_rdma_write_done() handler now handles only error cases.
On success it returns immediately since the Send completion handles
resource release. On failure (WR flush), it closes the connection
to signal to the client that the RPC Reply is incomplete.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h       | 13 +++--
 net/sunrpc/xprtrdma/svc_rdma_rw.c     | 94 +++++++++++++++++++++++++++--------
 net/sunrpc/xprtrdma/svc_rdma_sendto.c | 10 +++-
 3 files changed, 91 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 658b8498177e..df6e08aaad57 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -216,6 +216,7 @@ struct svc_rdma_recv_ctxt {
  */
 struct svc_rdma_write_info {
 	struct svcxprt_rdma	*wi_rdma;
+	struct list_head	wi_list;
 
 	const struct svc_rdma_chunk	*wi_chunk;
 
@@ -244,7 +245,10 @@ struct svc_rdma_send_ctxt {
 	struct ib_cqe		sc_cqe;
 	struct xdr_buf		sc_hdrbuf;
 	struct xdr_stream	sc_stream;
+
+	struct list_head	sc_write_info_list;
 	struct svc_rdma_write_info sc_reply_info;
+
 	void			*sc_xprt_buf;
 	int			sc_page_count;
 	int			sc_cur_sge_no;
@@ -277,11 +281,14 @@ extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
 extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
 				struct svc_rdma_chunk_ctxt *cc,
 				enum dma_data_direction dir);
+extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
+					 struct svc_rdma_send_ctxt *ctxt);
 extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
 					 struct svc_rdma_send_ctxt *ctxt);
-extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
-				    const struct svc_rdma_recv_ctxt *rctxt,
-				    const struct xdr_buf *xdr);
+extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
+				       const struct svc_rdma_recv_ctxt *rctxt,
+				       struct svc_rdma_send_ctxt *sctxt,
+				       const struct xdr_buf *xdr);
 extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
 					const struct svc_rdma_pcl *write_pcl,
 					const struct svc_rdma_pcl *reply_pcl,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 97bce806974b..ebc90c12c835 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -251,6 +251,28 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
 	queue_work(svcrdma_wq, &info->wi_work);
 }
 
+/**
+ * svc_rdma_write_chunk_release - Release Write chunk I/O resources
+ * @rdma: controlling transport
+ * @ctxt: Send context that is being released
+ *
+ * Write chunk resources remain live until Send completion because
+ * Write WRs are chained to the Send WR. This function releases all
+ * write_info structures accumulated on @ctxt->sc_write_info_list.
+ */
+void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
+				  struct svc_rdma_send_ctxt *ctxt)
+{
+	struct svc_rdma_write_info *info;
+
+	while (!list_empty(&ctxt->sc_write_info_list)) {
+		info = list_first_entry(&ctxt->sc_write_info_list,
+					struct svc_rdma_write_info, wi_list);
+		list_del(&info->wi_list);
+		svc_rdma_write_info_free(info);
+	}
+}
+
 /**
  * svc_rdma_reply_chunk_release - Release Reply chunk I/O resources
  * @rdma: controlling transport
@@ -307,13 +329,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
 	struct ib_cqe *cqe = wc->wr_cqe;
 	struct svc_rdma_chunk_ctxt *cc =
 			container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
-	struct svc_rdma_write_info *info =
-			container_of(cc, struct svc_rdma_write_info, wi_cc);
 
 	switch (wc->status) {
 	case IB_WC_SUCCESS:
 		trace_svcrdma_wc_write(&cc->cc_cid);
-		break;
+		return;
 	case IB_WC_WR_FLUSH_ERR:
 		trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
 		break;
@@ -321,12 +341,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
 		trace_svcrdma_wc_write_err(wc, &cc->cc_cid);
 	}
 
-	svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
-
-	if (unlikely(wc->status != IB_WC_SUCCESS))
-		svc_xprt_deferred_close(&rdma->sc_xprt);
-
-	svc_rdma_write_info_free(info);
+	/* The RDMA Write has flushed, so the client won't get
+	 * some of the outgoing RPC message. Signal the loss
+	 * to the client by closing the connection.
+	 */
+	svc_xprt_deferred_close(&rdma->sc_xprt);
 }
 
 /**
@@ -600,13 +619,27 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data)
 	return xdr->len;
 }
 
-static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
-				     const struct svc_rdma_chunk *chunk,
-				     const struct xdr_buf *xdr)
+/*
+ * svc_rdma_prepare_write_chunk - Link Write WRs for @chunk onto @sctxt's chain
+ *
+ * Write WRs are prepended to the Send WR chain so that a single
+ * ib_post_send() posts both RDMA Writes and the final Send. Only
+ * the first WR in each chunk gets a CQE for error detection;
+ * subsequent WRs complete without individual completion events.
+ * The Send WR's signaled completion indicates all chained
+ * operations have finished.
+ */
+static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
+					struct svc_rdma_send_ctxt *sctxt,
+					const struct svc_rdma_chunk *chunk,
+					const struct xdr_buf *xdr)
 {
 	struct svc_rdma_write_info *info;
 	struct svc_rdma_chunk_ctxt *cc;
+	struct ib_send_wr *first_wr;
 	struct xdr_buf payload;
+	struct list_head *pos;
+	struct ib_cqe *cqe;
 	int ret;
 
 	if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position,
@@ -622,10 +655,25 @@ static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
 	if (ret != payload.len)
 		goto out_err;
 
-	trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
-	ret = svc_rdma_post_chunk_ctxt(rdma, cc);
-	if (ret < 0)
+	ret = -EINVAL;
+	if (unlikely(sctxt->sc_sqecount + cc->cc_sqecount > rdma->sc_sq_depth))
 		goto out_err;
+
+	first_wr = sctxt->sc_wr_chain;
+	cqe = &cc->cc_cqe;
+	list_for_each(pos, &cc->cc_rwctxts) {
+		struct svc_rdma_rw_ctxt *rwc;
+
+		rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
+		first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
+					   rdma->sc_port_num, cqe, first_wr);
+		cqe = NULL;
+	}
+	sctxt->sc_wr_chain = first_wr;
+	sctxt->sc_sqecount += cc->cc_sqecount;
+	list_add(&info->wi_list, &sctxt->sc_write_info_list);
+
+	trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
 	return 0;
 
 out_err:
@@ -634,17 +682,19 @@ out_err:
 }
 
 /**
- * svc_rdma_send_write_list - Send all chunks on the Write list
+ * svc_rdma_prepare_write_list - Construct WR chain for sending Write list
  * @rdma: controlling RDMA transport
  * @rctxt: Write list provisioned by the client
+ * @sctxt: Send WR resources
  * @xdr: xdr_buf containing an RPC Reply message
  *
- * Returns zero on success, or a negative errno if one or more
- * Write chunks could not be sent.
+ * Returns zero on success, or a negative errno if WR chain
+ * construction fails for one or more Write chunks.
  */
-int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
-			     const struct svc_rdma_recv_ctxt *rctxt,
-			     const struct xdr_buf *xdr)
+int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
+				const struct svc_rdma_recv_ctxt *rctxt,
+				struct svc_rdma_send_ctxt *sctxt,
+				const struct xdr_buf *xdr)
 {
 	struct svc_rdma_chunk *chunk;
 	int ret;
@@ -652,7 +702,7 @@ int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
 	pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) {
 		if (!chunk->ch_payload_length)
 			break;
-		ret = svc_rdma_send_write_chunk(rdma, chunk, xdr);
+		ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index bef68efa7034..8b3f0c8c14b2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -150,6 +150,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
 	ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
 	ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
 	ctxt->sc_cqe.done = svc_rdma_wc_send;
+	INIT_LIST_HEAD(&ctxt->sc_write_info_list);
 	ctxt->sc_xprt_buf = buffer;
 	xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
 		     rdma->sc_max_req_size);
@@ -237,6 +238,7 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
 	struct ib_device *device = rdma->sc_cm_id->device;
 	unsigned int i;
 
+	svc_rdma_write_chunk_release(rdma, ctxt);
 	svc_rdma_reply_chunk_release(rdma, ctxt);
 
 	if (ctxt->sc_page_count)
@@ -1054,6 +1056,12 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
 	sctxt->sc_send_wr.num_sge = 1;
 	sctxt->sc_send_wr.opcode = IB_WR_SEND;
 	sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
+
+	/* Ensure only the error message is posted, not any previously
+	 * prepared Write chunk WRs.
+	 */
+	sctxt->sc_wr_chain = &sctxt->sc_send_wr;
+	sctxt->sc_sqecount = 1;
 	if (svc_rdma_post_send(rdma, sctxt))
 		goto put_ctxt;
 	return;
@@ -1101,7 +1109,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	if (!p)
 		goto put_ctxt;
 
-	ret = svc_rdma_send_write_list(rdma, rctxt, &rqstp->rq_res);
+	ret = svc_rdma_prepare_write_list(rdma, rctxt, sctxt, &rqstp->rq_res);
 	if (ret < 0)
 		goto put_ctxt;
 
-- 
cgit v1.2.3


From 3603bf99062c6d563df4fba3848f829d5401d959 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 28 Feb 2026 14:09:22 -0800
Subject: SUNRPC: xdr.h: fix all kernel-doc warnings

Correct a function parameter name (s/page/folio/) and add function
return value sections for multiple functions to eliminate
kernel-doc warnings:

Warning: include/linux/sunrpc/xdr.h:298 function parameter 'folio' not
 described in 'xdr_set_scratch_folio'
Warning: include/linux/sunrpc/xdr.h:337 No description found for return
 value of 'xdr_stream_remaining'
Warning: include/linux/sunrpc/xdr.h:357 No description found for return
 value of 'xdr_align_size'
Warning: include/linux/sunrpc/xdr.h:374 No description found for return
 value of 'xdr_pad_size'
Warning: include/linux/sunrpc/xdr.h:387 No description found for return
 value of 'xdr_stream_encode_item_present'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/xdr.h | 48 +++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 152597750f55..b639a6fafcbc 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -290,7 +290,7 @@ xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
 /**
  * xdr_set_scratch_folio - Attach a scratch buffer for decoding data
  * @xdr: pointer to xdr_stream struct
- * @page: an anonymous folio
+ * @folio: an anonymous folio
  *
  * See xdr_set_scratch_buffer().
  */
@@ -330,7 +330,7 @@ static inline void xdr_commit_encode(struct xdr_stream *xdr)
  * xdr_stream_remaining - Return the number of bytes remaining in the stream
  * @xdr: pointer to struct xdr_stream
  *
- * Return value:
+ * Returns:
  *   Number of bytes remaining in @xdr before xdr->end
  */
 static inline size_t
@@ -350,7 +350,7 @@ ssize_t xdr_stream_encode_opaque_auth(struct xdr_stream *xdr, u32 flavor,
  * xdr_align_size - Calculate padded size of an object
  * @n: Size of an object being XDR encoded (in bytes)
  *
- * Return value:
+ * Returns:
  *   Size (in bytes) of the object including xdr padding
  */
 static inline size_t
@@ -368,7 +368,7 @@ xdr_align_size(size_t n)
  * This implementation avoids the need for conditional
  * branches or modulo division.
  *
- * Return value:
+ * Returns:
  *   Size (in bytes) of the needed XDR pad
  */
 static inline size_t xdr_pad_size(size_t n)
@@ -380,7 +380,7 @@ static inline size_t xdr_pad_size(size_t n)
  * xdr_stream_encode_item_present - Encode a "present" list item
  * @xdr: pointer to xdr_stream
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -399,7 +399,7 @@ static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr)
  * xdr_stream_encode_item_absent - Encode a "not present" list item
  * @xdr: pointer to xdr_stream
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -419,7 +419,7 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr)
  * @p: address in a buffer into which to encode
  * @n: boolean value to encode
  *
- * Return value:
+ * Returns:
  *   Address of item following the encoded boolean
  */
 static inline __be32 *xdr_encode_bool(__be32 *p, u32 n)
@@ -433,7 +433,7 @@ static inline __be32 *xdr_encode_bool(__be32 *p, u32 n)
  * @xdr: pointer to xdr_stream
  * @n: boolean value to encode
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -453,7 +453,7 @@ static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n)
  * @xdr: pointer to xdr_stream
  * @n: integer to encode
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -474,7 +474,7 @@ xdr_stream_encode_u32(struct xdr_stream *xdr, __u32 n)
  * @xdr: pointer to xdr_stream
  * @n: integer to encode
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -495,7 +495,7 @@ xdr_stream_encode_be32(struct xdr_stream *xdr, __be32 n)
  * @xdr: pointer to xdr_stream
  * @n: 64-bit integer to encode
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -517,7 +517,7 @@ xdr_stream_encode_u64(struct xdr_stream *xdr, __u64 n)
  * @ptr: pointer to void pointer
  * @len: size of object
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -542,7 +542,7 @@ xdr_stream_encode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t len)
  * @ptr: pointer to opaque data object
  * @len: size of object pointed to by @ptr
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -563,7 +563,7 @@ xdr_stream_encode_opaque_fixed(struct xdr_stream *xdr, const void *ptr, size_t l
  * @ptr: pointer to opaque data object
  * @len: size of object pointed to by @ptr
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -585,7 +585,7 @@ xdr_stream_encode_opaque(struct xdr_stream *xdr, const void *ptr, size_t len)
  * @array: array of integers
  * @array_size: number of elements in @array
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -608,7 +608,7 @@ xdr_stream_encode_uint32_array(struct xdr_stream *xdr,
  * xdr_item_is_absent - symbolically handle XDR discriminators
  * @p: pointer to undecoded discriminator
  *
- * Return values:
+ * Returns:
  *   %true if the following XDR item is absent
  *   %false if the following XDR item is present
  */
@@ -621,7 +621,7 @@ static inline bool xdr_item_is_absent(const __be32 *p)
  * xdr_item_is_present - symbolically handle XDR discriminators
  * @p: pointer to undecoded discriminator
  *
- * Return values:
+ * Returns:
  *   %true if the following XDR item is present
  *   %false if the following XDR item is absent
  */
@@ -635,7 +635,7 @@ static inline bool xdr_item_is_present(const __be32 *p)
  * @xdr: pointer to xdr_stream
  * @ptr: pointer to a u32 in which to store the result
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -656,7 +656,7 @@ xdr_stream_decode_bool(struct xdr_stream *xdr, __u32 *ptr)
  * @xdr: pointer to xdr_stream
  * @ptr: location to store integer
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -677,7 +677,7 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr)
  * @xdr: pointer to xdr_stream
  * @ptr: location to store integer
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -698,7 +698,7 @@ xdr_stream_decode_be32(struct xdr_stream *xdr, __be32 *ptr)
  * @xdr: pointer to xdr_stream
  * @ptr: location to store 64-bit integer
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -720,7 +720,7 @@ xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr)
  * @ptr: location to store data
  * @len: size of buffer pointed to by @ptr
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -746,7 +746,7 @@ xdr_stream_decode_opaque_fixed(struct xdr_stream *xdr, void *ptr, size_t len)
  * on @xdr. It is therefore expected that the object it points to should
  * be processed immediately.
  *
- * Return values:
+ * Returns:
  *   On success, returns size of object stored in *@ptr
  *   %-EBADMSG on XDR buffer overflow
  *   %-EMSGSIZE if the size of the object would exceed @maxlen
@@ -777,7 +777,7 @@ xdr_stream_decode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t maxle
  * @array: location to store the integer array or NULL
  * @array_size: number of elements to store
  *
- * Return values:
+ * Returns:
  *   On success, returns number of elements stored in @array
  *   %-EBADMSG on XDR buffer overflow
  *   %-EMSGSIZE if the size of the array exceeds @array_size
-- 
cgit v1.2.3


From f995fc377ac7d3757e1d94e6403940c4b8f3d76e Mon Sep 17 00:00:00 2001
From: "Tycho Andersen (AMD)" <tycho@kernel.org>
Date: Tue, 24 Mar 2026 10:13:00 -0600
Subject: crypto/ccp: Implement SNP x86 shutdown

The SEV firmware has support to disable SNP during an SNP_SHUTDOWN_EX command.
Verify that this support is available and set the flag so that SNP is disabled
when it is not being used.

In cases where SNP is disabled, skip the call to amd_iommu_snp_disable(), as
all of the IOMMU pages have already been made shared. Also skip the panic
case, since snp_shutdown() does IPIs.

Signed-off-by: Tycho Andersen (AMD) <tycho@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Link: https://patch.msgid.link/20260324161301.1353976-7-tycho@kernel.org
---
 drivers/crypto/ccp/sev-dev.c | 41 ++++++++++++++++++++++++-----------------
 include/linux/psp-sev.h      |  5 ++++-
 2 files changed, 28 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 0d0c09a3a0af..cc5c5b3ad66d 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -2039,6 +2039,8 @@ static int __sev_snp_shutdown_locked(int *error, bool panic)
 	memset(&data, 0, sizeof(data));
 	data.len = sizeof(data);
 	data.iommu_snp_shutdown = 1;
+	if (sev->snp_feat_info_0.ecx & SNP_X86_SHUTDOWN_SUPPORTED)
+		data.x86_snp_shutdown = 1;
 
 	/*
 	 * If invoked during panic handling, local interrupts are disabled
@@ -2072,23 +2074,28 @@ static int __sev_snp_shutdown_locked(int *error, bool panic)
 		return ret;
 	}
 
-	/*
-	 * SNP_SHUTDOWN_EX with IOMMU_SNP_SHUTDOWN set to 1 disables SNP
-	 * enforcement by the IOMMU and also transitions all pages
-	 * associated with the IOMMU to the Reclaim state.
-	 * Firmware was transitioning the IOMMU pages to Hypervisor state
-	 * before version 1.53. But, accounting for the number of assigned
-	 * 4kB pages in a 2M page was done incorrectly by not transitioning
-	 * to the Reclaim state. This resulted in RMP #PF when later accessing
-	 * the 2M page containing those pages during kexec boot. Hence, the
-	 * firmware now transitions these pages to Reclaim state and hypervisor
-	 * needs to transition these pages to shared state. SNP Firmware
-	 * version 1.53 and above are needed for kexec boot.
-	 */
-	ret = amd_iommu_snp_disable();
-	if (ret) {
-		dev_err(sev->dev, "SNP IOMMU shutdown failed\n");
-		return ret;
+	if (data.x86_snp_shutdown) {
+		if (!panic)
+			snp_shutdown();
+	} else {
+		/*
+		 * SNP_SHUTDOWN_EX with IOMMU_SNP_SHUTDOWN set to 1 disables SNP
+		 * enforcement by the IOMMU and also transitions all pages
+		 * associated with the IOMMU to the Reclaim state.
+		 * Firmware was transitioning the IOMMU pages to Hypervisor state
+		 * before version 1.53. But, accounting for the number of assigned
+		 * 4kB pages in a 2M page was done incorrectly by not transitioning
+		 * to the Reclaim state. This resulted in RMP #PF when later accessing
+		 * the 2M page containing those pages during kexec boot. Hence, the
+		 * firmware now transitions these pages to Reclaim state and hypervisor
+		 * needs to transition these pages to shared state. SNP Firmware
+		 * version 1.53 and above are needed for kexec boot.
+		 */
+		ret = amd_iommu_snp_disable();
+		if (ret) {
+			dev_err(sev->dev, "SNP IOMMU shutdown failed\n");
+			return ret;
+		}
 	}
 
 	snp_leak_hv_fixed_pages();
diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index 69ffa4b4d1fa..d5099a2baca5 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -829,12 +829,14 @@ struct sev_data_range_list {
  *
  * @len: length of the command buffer read by the PSP
  * @iommu_snp_shutdown: Disable enforcement of SNP in the IOMMU
+ * @x86_snp_shutdown: Disable SNP on all cores
  * @rsvd1: reserved
  */
 struct sev_data_snp_shutdown_ex {
 	u32 len;
 	u32 iommu_snp_shutdown:1;
-	u32 rsvd1:31;
+	u32 x86_snp_shutdown:1;
+	u32 rsvd1:30;
 } __packed;
 
 /**
@@ -891,6 +893,7 @@ struct snp_feature_info {
 } __packed;
 
 /* Feature bits in ECX */
+#define SNP_X86_SHUTDOWN_SUPPORTED		BIT(1)
 #define SNP_RAPL_DISABLE_SUPPORTED		BIT(2)
 #define SNP_CIPHER_TEXT_HIDING_SUPPORTED	BIT(3)
 #define SNP_AES_256_XTS_POLICY_SUPPORTED	BIT(4)
-- 
cgit v1.2.3


From f3b536878a3cf47e5193a96176a3ca2aaf0d848f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 11 Mar 2026 22:14:44 -0700
Subject: powercap: correct kernel-doc function parameter names

Use the correct function parameter names in kernel-doc comments to
avoid these warnings:

Warning: include/linux/powercap.h:254 function parameter 'name' not
 described in 'powercap_register_control_type'
Warning: include/linux/powercap.h:298 function parameter 'nr_constraints'
 not described in 'powercap_register_zone'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260312051444.685136-1-rdunlap@infradead.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/powercap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/powercap.h b/include/linux/powercap.h
index 3d557bbcd2c7..603419db924c 100644
--- a/include/linux/powercap.h
+++ b/include/linux/powercap.h
@@ -238,7 +238,7 @@ static inline void *powercap_get_zone_data(struct powercap_zone *power_zone)
 *			Advantage of this parameter is that client can embed
 *			this data in its data structures and allocate in a
 *			single call, preventing multiple allocations.
-* @control_type_name:	The Name of this control_type, which will be shown
+* @name:		The Name of this control_type, which will be shown
 *			in the sysfs Interface.
 * @ops:			Callbacks for control type. This parameter is optional.
 *
@@ -277,7 +277,7 @@ int powercap_unregister_control_type(struct powercap_control_type *instance);
 * @name:	A name for this zone.
 * @parent:	A pointer to the parent power zone instance if any or NULL
 * @ops:		Pointer to zone operation callback structure.
-* @no_constraints: Number of constraints for this zone
+* @nr_constraints: Number of constraints for this zone
 * @const_ops:	Pointer to constraint callback structure
 *
 * Register a power zone under a given control type. A power zone must register
-- 
cgit v1.2.3


From 8765715b4e8a1dd24ab5d507c42fc0bcd3d83f5c Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Fri, 13 Mar 2026 11:53:27 -0700
Subject: powercap: intel_rapl: Remove unused AVERAGE_POWER primitive

The AVERAGE_POWER primitive and RAPL_PRIMITIVE_DERIVED flag are not
used anywhere in the code.

Remove them to simplify the primitive handling logic.

No functional changes.

Co-developed-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://patch.msgid.link/20260313185333.2370733-2-sathyanarayanan.kuppuswamy@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c | 13 -------------
 include/linux/intel_rapl.h           |  1 -
 2 files changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 6a2153039f73..e099514e6c56 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -90,7 +90,6 @@
 #define TPMI_INFO_MAX_TIME_WIN_MASK	GENMASK_ULL(60, 54)
 
 /* Non HW constants */
-#define RAPL_PRIMITIVE_DERIVED		BIT(1)	/* not from raw data */
 #define RAPL_PRIMITIVE_DUMMY		BIT(2)
 
 #define ENERGY_UNIT_SCALE		1000	/* scale from driver unit to powercap unit */
@@ -703,9 +702,6 @@ static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
 						      19, RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
 	[PSYS_TIME_WINDOW2]	= PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK,
 						      51, RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
-	/* non-hardware */
-	[AVERAGE_POWER]		= PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
-						      RAPL_PRIMITIVE_DERIVED),
 };
 
 /* RAPL primitives for TPMI I/F */
@@ -745,9 +741,6 @@ static struct rapl_primitive_info rpi_tpmi[NR_RAPL_PRIMITIVES] = {
 						      54, RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
 	[THROTTLED_TIME]	= PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK,
 						      0, RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
-	/* non-hardware */
-	[AVERAGE_POWER]		= PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
-						      RAPL_PRIMITIVE_DERIVED),
 };
 
 static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim)
@@ -841,12 +834,6 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
 	if (!ra.reg.val)
 		return -EINVAL;
 
-	/* non-hardware data are collected by the polling thread */
-	if (rpi->flag & RAPL_PRIMITIVE_DERIVED) {
-		*data = rd->rdd.primitives[prim];
-		return 0;
-	}
-
 	ra.mask = rpi->mask;
 
 	if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, pmu_ctx)) {
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index 6d694099a3ad..9e6bd654be1f 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -77,7 +77,6 @@ enum rapl_primitives {
 	PSYS_TIME_WINDOW1,
 	PSYS_TIME_WINDOW2,
 	/* below are not raw primitive data */
-	AVERAGE_POWER,
 	NR_RAPL_PRIMITIVES,
 };
 
-- 
cgit v1.2.3


From 59eb73b98ae0b12fc9b39c08f0f5a5552cb02d1e Mon Sep 17 00:00:00 2001
From: John Groves <John@Groves.net>
Date: Fri, 27 Mar 2026 21:04:22 +0000
Subject: dax: Factor out dax_folio_reset_order() helper

Both fs/dax.c:dax_folio_put() and drivers/dax/fsdev.c:
fsdev_clear_folio_state() (the latter coming in the next commit after this
one) contain nearly identical code to reset a compound DAX folio back to
order-0 pages. Factor this out into a shared helper function.

The new dax_folio_reset_order() function:
- Clears the folio's mapping and share count
- Resets compound folio state via folio_reset_order()
- Clears PageHead and compound_head for each sub-page
- Restores the pgmap pointer for each resulting order-0 folio
- Returns the original folio order (for callers that need to advance by
  that many pages)

Two intentional differences from the original dax_folio_put() logic:

1. folio->share is cleared unconditionally. This is correct because the DAX
   subsystem maintains the invariant that share != 0 only when
   mapping == NULL (enforced by dax_folio_make_shared()). dax_folio_put()
   ensures share has reached zero before calling this helper, so the
   unconditional clear is safe.

2. folio->pgmap is now explicitly restored for order-0 folios. For the
   dax_folio_put() caller this is a no-op (reads and writes back the same
   field). It is intentional for the upcoming fsdev_clear_folio_state()
   caller, which converts previously-compound folios and needs pgmap
   re-established for all pages regardless of order.

This simplifies fsdev_clear_folio_state() from ~50 lines to ~15 lines.

Suggested-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: John Groves <john@groves.net>
Link: https://patch.msgid.link/0100019d311cc6b9-5be7428a-7f16-4774-8f90-a44b88ac5660-000000@email.amazonses.com
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 fs/dax.c            | 73 ++++++++++++++++++++++++++++++++++++++++-------------
 include/linux/dax.h |  1 +
 2 files changed, 56 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dax.c b/fs/dax.c
index 289e6254aa30..87bed6de920d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -378,6 +378,58 @@ static void dax_folio_make_shared(struct folio *folio)
 	folio->share = 1;
 }
 
+/**
+ * dax_folio_reset_order - Reset a compound DAX folio to order-0 pages
+ * @folio: The folio to reset
+ *
+ * Splits a compound folio back into individual order-0 pages,
+ * clearing compound state and restoring pgmap pointers.
+ *
+ * Returns: the original folio order (0 if already order-0)
+ */
+int dax_folio_reset_order(struct folio *folio)
+{
+	struct dev_pagemap *pgmap = page_pgmap(&folio->page);
+	int order = folio_order(folio);
+
+	/*
+	 * DAX maintains the invariant that folio->share != 0 only when
+	 * folio->mapping == NULL (enforced by dax_folio_make_shared()).
+	 * Equivalently: folio->mapping != NULL implies folio->share == 0.
+	 * Callers ensure share has been decremented to zero before
+	 * calling here, so unconditionally clearing both fields is
+	 * correct.
+	 */
+	folio->mapping = NULL;
+	folio->share = 0;
+
+	if (!order) {
+		/*
+		 * Restore pgmap explicitly even for order-0 folios. For the
+		 * dax_folio_put() caller this is a no-op (same value), but
+		 * fsdev_clear_folio_state() may call this on folios that
+		 * were previously compound and need pgmap re-established.
+		 */
+		folio->pgmap = pgmap;
+		return 0;
+	}
+
+	folio_reset_order(folio);
+
+	for (int i = 0; i < (1UL << order); i++) {
+		struct page *page = folio_page(folio, i);
+		struct folio *f = (struct folio *)page;
+
+		ClearPageHead(page);
+		clear_compound_head(page);
+		f->mapping = NULL;
+		f->share = 0;
+		f->pgmap = pgmap;
+	}
+
+	return order;
+}
+
 static inline unsigned long dax_folio_put(struct folio *folio)
 {
 	unsigned long ref;
@@ -391,28 +443,13 @@ static inline unsigned long dax_folio_put(struct folio *folio)
 	if (ref)
 		return ref;
 
-	folio->mapping = NULL;
-	order = folio_order(folio);
-	if (!order)
-		return 0;
-	folio_reset_order(folio);
+	order = dax_folio_reset_order(folio);
 
+	/* Debug check: verify refcounts are zero for all sub-folios */
 	for (i = 0; i < (1UL << order); i++) {
-		struct dev_pagemap *pgmap = page_pgmap(&folio->page);
 		struct page *page = folio_page(folio, i);
-		struct folio *new_folio = (struct folio *)page;
-
-		ClearPageHead(page);
-		clear_compound_head(page);
 
-		new_folio->mapping = NULL;
-		/*
-		 * Reset pgmap which was over-written by
-		 * prep_compound_page().
-		 */
-		new_folio->pgmap = pgmap;
-		new_folio->share = 0;
-		WARN_ON_ONCE(folio_ref_count(new_folio));
+		WARN_ON_ONCE(folio_ref_count((struct folio *)page));
 	}
 
 	return ref;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index bf103f317cac..73cfc1a7c8f1 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -153,6 +153,7 @@ static inline void fs_put_dax(struct dax_device *dax_dev, void *holder)
 #if IS_ENABLED(CONFIG_FS_DAX)
 int dax_writeback_mapping_range(struct address_space *mapping,
 		struct dax_device *dax_dev, struct writeback_control *wbc);
+int dax_folio_reset_order(struct folio *folio);
 
 struct page *dax_layout_busy_page(struct address_space *mapping);
 struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
-- 
cgit v1.2.3


From 700ecbc1f5aa02ba9ad68d7be1ef7a9c8eae07e9 Mon Sep 17 00:00:00 2001
From: John Groves <John@Groves.net>
Date: Fri, 27 Mar 2026 21:05:03 +0000
Subject: dax: Add dax_set_ops() for setting dax_operations at bind time

Add a new dax_set_ops() function that allows drivers to set the
dax_operations after the dax_device has been allocated. This is needed
for fsdev_dax where the operations need to be set during probe and
cleared during unbind.

The fsdev driver uses devm_add_action_or_reset() for cleanup consistency,
avoiding the complexity of mixing devm-managed resources with manual
cleanup in a remove() callback. This ensures cleanup happens automatically
in the correct reverse order when the device is unbound.

Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: John Groves <john@groves.net>
Link: https://patch.msgid.link/0100019d311d65a0-b9c1419e-f3a0-4afd-b0bd-848f18ff5950-000000@email.amazonses.com
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/dax/fsdev.c | 16 ++++++++++++++++
 drivers/dax/super.c | 38 +++++++++++++++++++++++++++++++++++++-
 include/linux/dax.h |  1 +
 3 files changed, 54 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dax/fsdev.c b/drivers/dax/fsdev.c
index 30f57c74c979..4499d9621f33 100644
--- a/drivers/dax/fsdev.c
+++ b/drivers/dax/fsdev.c
@@ -117,6 +117,13 @@ static void fsdev_kill(void *dev_dax)
 	kill_dev_dax(dev_dax);
 }
 
+static void fsdev_clear_ops(void *data)
+{
+	struct dev_dax *dev_dax = data;
+
+	dax_set_ops(dev_dax->dax_dev, NULL);
+}
+
 /*
  * Page map operations for FS-DAX mode
  * Similar to fsdax_pagemap_ops in drivers/nvdimm/pmem.c
@@ -303,6 +310,15 @@ static int fsdev_dax_probe(struct dev_dax *dev_dax)
 	if (rc)
 		return rc;
 
+	/* Set the dax operations for fs-dax access path */
+	rc = dax_set_ops(dax_dev, &dev_dax_ops);
+	if (rc)
+		return rc;
+
+	rc = devm_add_action_or_reset(dev, fsdev_clear_ops, dev_dax);
+	if (rc)
+		return rc;
+
 	run_dax(dax_dev);
 	return devm_add_action_or_reset(dev, fsdev_kill, dev_dax);
 }
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index c00b9dff4a06..ba0b4cd18a77 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -157,6 +157,9 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 	if (!dax_alive(dax_dev))
 		return -ENXIO;
 
+	if (!dax_dev->ops)
+		return -EOPNOTSUPP;
+
 	if (nr_pages < 0)
 		return -EINVAL;
 
@@ -207,6 +210,10 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
 
 	if (!dax_alive(dax_dev))
 		return -ENXIO;
+
+	if (!dax_dev->ops)
+		return -EOPNOTSUPP;
+
 	/*
 	 * There are no callers that want to zero more than one page as of now.
 	 * Once users are there, this check can be removed after the
@@ -223,7 +230,7 @@ EXPORT_SYMBOL_GPL(dax_zero_page_range);
 size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
 		void *addr, size_t bytes, struct iov_iter *iter)
 {
-	if (!dax_dev->ops->recovery_write)
+	if (!dax_dev->ops || !dax_dev->ops->recovery_write)
 		return 0;
 	return dax_dev->ops->recovery_write(dax_dev, pgoff, addr, bytes, iter);
 }
@@ -307,6 +314,35 @@ void set_dax_nomc(struct dax_device *dax_dev)
 }
 EXPORT_SYMBOL_GPL(set_dax_nomc);
 
+/**
+ * dax_set_ops - set the dax_operations for a dax_device
+ * @dax_dev: the dax_device to configure
+ * @ops: the operations to set (may be NULL to clear)
+ *
+ * This allows drivers to set the dax_operations after the dax_device
+ * has been allocated. This is needed when the device is created before
+ * the driver that needs specific ops is bound (e.g., fsdev_dax binding
+ * to a dev_dax created by hmem).
+ *
+ * When setting non-NULL ops, fails if ops are already set (returns -EBUSY).
+ * When clearing ops (NULL), always succeeds.
+ *
+ * Return: 0 on success, -EBUSY if ops already set
+ */
+int dax_set_ops(struct dax_device *dax_dev, const struct dax_operations *ops)
+{
+	if (ops) {
+		/* Setting ops: fail if already set */
+		if (cmpxchg(&dax_dev->ops, NULL, ops) != NULL)
+			return -EBUSY;
+	} else {
+		/* Clearing ops: always allowed */
+		dax_dev->ops = NULL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dax_set_ops);
+
 bool dax_alive(struct dax_device *dax_dev)
 {
 	lockdep_assert_held(&dax_srcu);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 73cfc1a7c8f1..b19bfe0c2fd1 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -243,6 +243,7 @@ static inline void dax_break_layout_final(struct inode *inode)
 
 bool dax_alive(struct dax_device *dax_dev);
 void *dax_get_private(struct dax_device *dax_dev);
+int dax_set_ops(struct dax_device *dax_dev, const struct dax_operations *ops);
 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 		enum dax_access_mode mode, void **kaddr, unsigned long *pfn);
 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
-- 
cgit v1.2.3


From eec38f5d86d27535509c99f02ccc642ceb0c3e2a Mon Sep 17 00:00:00 2001
From: John Groves <john@groves.net>
Date: Fri, 27 Mar 2026 21:05:12 +0000
Subject: dax: Add fs_dax_get() func to prepare dax for fs-dax usage

The fs_dax_get() function should be called by fs-dax file systems after
opening a fsdev dax device. This adds holder_operations, which provides
a memory failure callback path and effects exclusivity between callers
of fs_dax_get().

fs_dax_get() is specific to fsdev_dax, so it checks the driver type
(which required touching bus.[ch]). fs_dax_get() fails if fsdev_dax is
not bound to the memory.

This function serves the same role as fs_dax_get_by_bdev(), which dax
file systems call after opening the pmem block device.

This can't be located in fsdev.c because struct dax_device is opaque
there.

This will be called by fs/fuse/famfs.c in a subsequent commit.

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: John Groves <john@groves.net>
Link: https://patch.msgid.link/0100019d311d8750-75395c22-031b-4d5f-aebe-790dca656b87-000000@email.amazonses.com
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/dax/bus.c   |  2 --
 drivers/dax/bus.h   |  2 ++
 drivers/dax/super.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/dax.h | 16 +++++++++----
 4 files changed, 79 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 1b412264bb36..32f7b7702c28 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -39,8 +39,6 @@ static int dax_bus_uevent(const struct device *dev, struct kobj_uevent_env *env)
 	return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0);
 }
 
-#define to_dax_drv(__drv)	container_of_const(__drv, struct dax_device_driver, drv)
-
 static struct dax_id *__dax_match_id(const struct dax_device_driver *dax_drv,
 		const char *dev_name)
 {
diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
index 880bdf7e72d7..dc6f112ac4a4 100644
--- a/drivers/dax/bus.h
+++ b/drivers/dax/bus.h
@@ -42,6 +42,8 @@ struct dax_device_driver {
 	void (*remove)(struct dev_dax *dev);
 };
 
+#define to_dax_drv(__drv) container_of_const(__drv, struct dax_device_driver, drv)
+
 int __dax_driver_register(struct dax_device_driver *dax_drv,
 		struct module *module, const char *mod_name);
 #define dax_driver_register(driver) \
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index ba0b4cd18a77..d4ab60c406bf 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -14,6 +14,7 @@
 #include <linux/fs.h>
 #include <linux/cacheinfo.h>
 #include "dax-private.h"
+#include "bus.h"
 
 /**
  * struct dax_device - anchor object for dax services
@@ -111,6 +112,10 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off,
 }
 EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
 
+#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
+
+#if IS_ENABLED(CONFIG_FS_DAX)
+
 void fs_put_dax(struct dax_device *dax_dev, void *holder)
 {
 	if (dax_dev && holder &&
@@ -119,7 +124,66 @@ void fs_put_dax(struct dax_device *dax_dev, void *holder)
 	put_dax(dax_dev);
 }
 EXPORT_SYMBOL_GPL(fs_put_dax);
-#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
+
+/**
+ * fs_dax_get() - get ownership of a devdax via holder/holder_ops
+ *
+ * fs-dax file systems call this function to prepare to use a devdax device for
+ * fsdax. This is like fs_dax_get_by_bdev(), but the caller already has struct
+ * dev_dax (and there is no bdev). The holder makes this exclusive.
+ *
+ * @dax_dev: dev to be prepared for fs-dax usage
+ * @holder: filesystem or mapped device inside the dax_device
+ * @hops: operations for the inner holder
+ *
+ * Returns: 0 on success, <0 on failure
+ */
+int fs_dax_get(struct dax_device *dax_dev, void *holder,
+	const struct dax_holder_operations *hops)
+{
+	struct dev_dax *dev_dax;
+	struct dax_device_driver *dax_drv;
+	int id;
+
+	id = dax_read_lock();
+	if (!dax_dev || !dax_alive(dax_dev) || !igrab(&dax_dev->inode)) {
+		dax_read_unlock(id);
+		return -ENODEV;
+	}
+	dax_read_unlock(id);
+
+	/* Verify the device is bound to fsdev_dax driver */
+	dev_dax = dax_get_private(dax_dev);
+	if (!dev_dax) {
+		iput(&dax_dev->inode);
+		return -ENODEV;
+	}
+
+	device_lock(&dev_dax->dev);
+	if (!dev_dax->dev.driver) {
+		device_unlock(&dev_dax->dev);
+		iput(&dax_dev->inode);
+		return -ENODEV;
+	}
+	dax_drv = to_dax_drv(dev_dax->dev.driver);
+	if (dax_drv->type != DAXDRV_FSDEV_TYPE) {
+		device_unlock(&dev_dax->dev);
+		iput(&dax_dev->inode);
+		return -EOPNOTSUPP;
+	}
+	device_unlock(&dev_dax->dev);
+
+	if (cmpxchg(&dax_dev->holder_data, NULL, holder)) {
+		iput(&dax_dev->inode);
+		return -EBUSY;
+	}
+
+	dax_dev->holder_ops = hops;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fs_dax_get);
+#endif /* CONFIG_FS_DAX */
 
 enum dax_device_flags {
 	/* !alive + rcu grace period == no new operations / mappings */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index b19bfe0c2fd1..a85e270bfb3c 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -130,7 +130,6 @@ int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
 void dax_remove_host(struct gendisk *disk);
 struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off,
 		void *holder, const struct dax_holder_operations *ops);
-void fs_put_dax(struct dax_device *dax_dev, void *holder);
 #else
 static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
 {
@@ -145,12 +144,12 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
 {
 	return NULL;
 }
-static inline void fs_put_dax(struct dax_device *dax_dev, void *holder)
-{
-}
 #endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
 
 #if IS_ENABLED(CONFIG_FS_DAX)
+void fs_put_dax(struct dax_device *dax_dev, void *holder);
+int fs_dax_get(struct dax_device *dax_dev, void *holder,
+	       const struct dax_holder_operations *hops);
 int dax_writeback_mapping_range(struct address_space *mapping,
 		struct dax_device *dax_dev, struct writeback_control *wbc);
 int dax_folio_reset_order(struct folio *folio);
@@ -164,6 +163,15 @@ dax_entry_t dax_lock_mapping_entry(struct address_space *mapping,
 void dax_unlock_mapping_entry(struct address_space *mapping,
 		unsigned long index, dax_entry_t cookie);
 #else
+static inline void fs_put_dax(struct dax_device *dax_dev, void *holder)
+{
+}
+
+static inline int fs_dax_get(struct dax_device *dax_dev, void *holder,
+			     const struct dax_holder_operations *hops)
+{
+	return -EOPNOTSUPP;
+}
 static inline struct page *dax_layout_busy_page(struct address_space *mapping)
 {
 	return NULL;
-- 
cgit v1.2.3


From 2ae624d5a555d47a735fb3f4d850402859a4db77 Mon Sep 17 00:00:00 2001
From: John Groves <john@groves.net>
Date: Fri, 27 Mar 2026 21:05:21 +0000
Subject: dax: export dax_dev_get()

famfs needs to look up a dax_device by dev_t when resolving fmap
entries that reference character dax devices.

Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: John Groves <john@groves.net>
Link: https://patch.msgid.link/0100019d311daab5-bb212f0b-4e05-4668-bf53-d76fab56be68-000000@email.amazonses.com
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/dax/super.c | 3 ++-
 include/linux/dax.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index d4ab60c406bf..25cf99dd9360 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -521,7 +521,7 @@ static int dax_set(struct inode *inode, void *data)
 	return 0;
 }
 
-static struct dax_device *dax_dev_get(dev_t devt)
+struct dax_device *dax_dev_get(dev_t devt)
 {
 	struct dax_device *dax_dev;
 	struct inode *inode;
@@ -544,6 +544,7 @@ static struct dax_device *dax_dev_get(dev_t devt)
 
 	return dax_dev;
 }
+EXPORT_SYMBOL_GPL(dax_dev_get);
 
 struct dax_device *alloc_dax(void *private, const struct dax_operations *ops)
 {
diff --git a/include/linux/dax.h b/include/linux/dax.h
index a85e270bfb3c..9ef95b136bb8 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -54,6 +54,7 @@ struct dax_device *alloc_dax(void *private, const struct dax_operations *ops);
 void *dax_holder(struct dax_device *dax_dev);
 void put_dax(struct dax_device *dax_dev);
 void kill_dax(struct dax_device *dax_dev);
+struct dax_device *dax_dev_get(dev_t devt);
 void dax_write_cache(struct dax_device *dax_dev, bool wc);
 bool dax_write_cache_enabled(struct dax_device *dax_dev);
 bool dax_synchronous(struct dax_device *dax_dev);
-- 
cgit v1.2.3


From 698f54d4eb9034bb4366985659bd77ae471b6c4e Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Wed, 25 Mar 2026 15:55:20 +0100
Subject: usb: translate ENOSPC for user space

In case of insufficient bandwidth usb_submit_urb()
returns -ENOSPC. Translating this to -EIO is not
optimal. There are insufficient resources not
an error. EBUSY is a better fit.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Link: https://patch.msgid.link/20260325145537.372993-1-oneukum@suse.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 04277af4bb9d..815f2212936e 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -2075,6 +2075,8 @@ static inline int usb_translate_errors(int error_code)
 	case -ENODEV:
 	case -EOPNOTSUPP:
 		return error_code;
+	case -ENOSPC:
+		return -EBUSY;
 	default:
 		return -EIO;
 	}
-- 
cgit v1.2.3


From b422f7c072ac8d9b83c3d22e03709b92626ca88a Mon Sep 17 00:00:00 2001
From: Amit Sunil Dhamne <amitsd@google.com>
Date: Wed, 25 Mar 2026 22:22:24 +0000
Subject: mfd: max77759: add register bitmasks and modify irq configs for
 charger
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add register bitmasks for charger function.
In addition split the charger IRQs further such that each bit represents
an IRQ downstream of charger regmap irq chip. In addition populate the
ack_base to offload irq ack to the regmap irq chip framework.

Signed-off-by: Amit Sunil Dhamne <amitsd@google.com>
Reviewed-by: André Draszik <andre.draszik@linaro.org>
Link: https://patch.msgid.link/20260325-max77759-charger-v9-3-4486dd297adc@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mfd/max77759.c       |  95 ++++++++++++++++++++++---
 include/linux/mfd/max77759.h | 166 +++++++++++++++++++++++++++++++++++--------
 2 files changed, 222 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max77759.c b/drivers/mfd/max77759.c
index a7efe233ec8c..9fa6027a92c4 100644
--- a/drivers/mfd/max77759.c
+++ b/drivers/mfd/max77759.c
@@ -201,8 +201,24 @@ static const struct regmap_config max77759_regmap_config_charger = {
  *         - SYSUVLO_INT
  *         - FSHIP_NOT_RD
  *     - CHGR_INT: charger
- *       - CHG_INT
- *       - CHG_INT2
+ *       - INT1
+ *         - AICL
+ *         - CHGIN
+ *         - WCIN
+ *         - CHG
+ *         - BAT
+ *         - INLIM
+ *         - THM2
+ *         - BYP
+ *       - INT2
+ *         - INSEL
+ *         - SYS_UVLO1
+ *         - SYS_UVLO2
+ *         - BAT_OILO
+ *         - CHG_STA_CC
+ *         - CHG_STA_CV
+ *         - CHG_STA_TO
+ *         - CHG_STA_DONE
  */
 enum {
 	MAX77759_INT_MAXQ,
@@ -228,8 +244,22 @@ enum {
 };
 
 enum {
-	MAX77759_CHARGER_INT_1,
-	MAX77759_CHARGER_INT_2,
+	MAX77759_CHGR_INT1_AICL,
+	MAX77759_CHGR_INT1_CHGIN,
+	MAX77759_CHGR_INT1_WCIN,
+	MAX77759_CHGR_INT1_CHG,
+	MAX77759_CHGR_INT1_BAT,
+	MAX77759_CHGR_INT1_INLIM,
+	MAX77759_CHGR_INT1_THM2,
+	MAX77759_CHGR_INT1_BYP,
+	MAX77759_CHGR_INT2_INSEL,
+	MAX77759_CHGR_INT2_SYS_UVLO1,
+	MAX77759_CHGR_INT2_SYS_UVLO2,
+	MAX77759_CHGR_INT2_BAT_OILO,
+	MAX77759_CHGR_INT2_CHG_STA_CC,
+	MAX77759_CHGR_INT2_CHG_STA_CV,
+	MAX77759_CHGR_INT2_CHG_STA_TO,
+	MAX77759_CHGR_INT2_CHG_STA_DONE,
 };
 
 static const struct regmap_irq max77759_pmic_irqs[] = {
@@ -256,8 +286,38 @@ static const struct regmap_irq max77759_topsys_irqs[] = {
 };
 
 static const struct regmap_irq max77759_chgr_irqs[] = {
-	REGMAP_IRQ_REG(MAX77759_CHARGER_INT_1, 0, GENMASK(7, 0)),
-	REGMAP_IRQ_REG(MAX77759_CHARGER_INT_2, 1, GENMASK(7, 0)),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT1_AICL, 0,
+		       MAX77759_CHGR_REG_CHG_INT_AICL),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT1_CHGIN, 0,
+		       MAX77759_CHGR_REG_CHG_INT_CHGIN),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT1_WCIN, 0,
+		       MAX77759_CHGR_REG_CHG_INT_WCIN),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT1_CHG, 0,
+		       MAX77759_CHGR_REG_CHG_INT_CHG),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT1_BAT, 0,
+		       MAX77759_CHGR_REG_CHG_INT_BAT),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT1_INLIM, 0,
+		       MAX77759_CHGR_REG_CHG_INT_INLIM),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT1_THM2, 0,
+		       MAX77759_CHGR_REG_CHG_INT_THM2),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT1_BYP, 0,
+		       MAX77759_CHGR_REG_CHG_INT_BYP),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT2_INSEL, 1,
+		       MAX77759_CHGR_REG_CHG_INT2_INSEL),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT2_SYS_UVLO1, 1,
+		       MAX77759_CHGR_REG_CHG_INT2_SYS_UVLO1),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT2_SYS_UVLO2, 1,
+		       MAX77759_CHGR_REG_CHG_INT2_SYS_UVLO2),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT2_BAT_OILO, 1,
+		       MAX77759_CHGR_REG_CHG_INT2_BAT_OILO),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT2_CHG_STA_CC, 1,
+		       MAX77759_CHGR_REG_CHG_INT2_CHG_STA_CC),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT2_CHG_STA_CV, 1,
+		       MAX77759_CHGR_REG_CHG_INT2_CHG_STA_CV),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT2_CHG_STA_TO, 1,
+		       MAX77759_CHGR_REG_CHG_INT2_CHG_STA_TO),
+	REGMAP_IRQ_REG(MAX77759_CHGR_INT2_CHG_STA_DONE, 1,
+		       MAX77759_CHGR_REG_CHG_INT2_CHG_STA_DONE),
 };
 
 static const struct regmap_irq_chip max77759_pmic_irq_chip = {
@@ -297,11 +357,12 @@ static const struct regmap_irq_chip max77759_topsys_irq_chip = {
 	.num_irqs = ARRAY_SIZE(max77759_topsys_irqs),
 };
 
-static const struct regmap_irq_chip max77759_chrg_irq_chip = {
+static const struct regmap_irq_chip max77759_chgr_irq_chip = {
 	.name = "max77759-chgr",
 	.domain_suffix = "CHGR",
 	.status_base = MAX77759_CHGR_REG_CHG_INT,
 	.mask_base = MAX77759_CHGR_REG_CHG_INT_MASK,
+	.ack_base = MAX77759_CHGR_REG_CHG_INT,
 	.num_regs = 2,
 	.irqs = max77759_chgr_irqs,
 	.num_irqs = ARRAY_SIZE(max77759_chgr_irqs),
@@ -325,8 +386,22 @@ static const struct resource max77759_gpio_resources[] = {
 };
 
 static const struct resource max77759_charger_resources[] = {
-	DEFINE_RES_IRQ_NAMED(MAX77759_CHARGER_INT_1, "INT1"),
-	DEFINE_RES_IRQ_NAMED(MAX77759_CHARGER_INT_2, "INT2"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT1_AICL,         "AICL"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT1_CHGIN,        "CHGIN"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT1_WCIN,         "WCIN"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT1_CHG,          "CHG"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT1_BAT,          "BAT"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT1_INLIM,        "INLIM"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT1_THM2,         "THM2"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT1_BYP,          "BYP"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT2_INSEL,        "INSEL"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT2_SYS_UVLO1,    "SYS_UVLO1"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT2_SYS_UVLO2,    "SYS_UVLO2"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT2_BAT_OILO,     "BAT_OILO"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT2_CHG_STA_CC,   "CHG_STA_CC"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT2_CHG_STA_CV,   "CHG_STA_CV"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT2_CHG_STA_TO,   "CHG_STA_TO"),
+	DEFINE_RES_IRQ_NAMED(MAX77759_CHGR_INT2_CHG_STA_DONE, "CHG_STA_DONE"),
 };
 
 static const struct mfd_cell max77759_cells[] = {
@@ -567,7 +642,7 @@ static int max77759_add_chained_charger(struct i2c_client *client,
 					    max77759->regmap_charger,
 					    MAX77759_INT_CHGR,
 					    parent,
-					    &max77759_chrg_irq_chip,
+					    &max77759_chgr_irq_chip,
 					    &irq_chip_data);
 	if (ret)
 		return ret;
diff --git a/include/linux/mfd/max77759.h b/include/linux/mfd/max77759.h
index c6face34e385..ad1aa4c2b779 100644
--- a/include/linux/mfd/max77759.h
+++ b/include/linux/mfd/max77759.h
@@ -59,35 +59,65 @@
 #define MAX77759_MAXQ_REG_AP_DATAIN0            0xb1
 #define MAX77759_MAXQ_REG_UIC_SWRST             0xe0
 
-#define MAX77759_CHGR_REG_CHG_INT               0xb0
-#define MAX77759_CHGR_REG_CHG_INT2              0xb1
-#define MAX77759_CHGR_REG_CHG_INT_MASK          0xb2
-#define MAX77759_CHGR_REG_CHG_INT2_MASK         0xb3
-#define MAX77759_CHGR_REG_CHG_INT_OK            0xb4
-#define MAX77759_CHGR_REG_CHG_DETAILS_00        0xb5
-#define MAX77759_CHGR_REG_CHG_DETAILS_01        0xb6
-#define MAX77759_CHGR_REG_CHG_DETAILS_02        0xb7
-#define MAX77759_CHGR_REG_CHG_DETAILS_03        0xb8
-#define MAX77759_CHGR_REG_CHG_CNFG_00           0xb9
-#define MAX77759_CHGR_REG_CHG_CNFG_01           0xba
-#define MAX77759_CHGR_REG_CHG_CNFG_02           0xbb
-#define MAX77759_CHGR_REG_CHG_CNFG_03           0xbc
-#define MAX77759_CHGR_REG_CHG_CNFG_04           0xbd
-#define MAX77759_CHGR_REG_CHG_CNFG_05           0xbe
-#define MAX77759_CHGR_REG_CHG_CNFG_06           0xbf
-#define MAX77759_CHGR_REG_CHG_CNFG_07           0xc0
-#define MAX77759_CHGR_REG_CHG_CNFG_08           0xc1
-#define MAX77759_CHGR_REG_CHG_CNFG_09           0xc2
-#define MAX77759_CHGR_REG_CHG_CNFG_10           0xc3
-#define MAX77759_CHGR_REG_CHG_CNFG_11           0xc4
-#define MAX77759_CHGR_REG_CHG_CNFG_12           0xc5
-#define MAX77759_CHGR_REG_CHG_CNFG_13           0xc6
-#define MAX77759_CHGR_REG_CHG_CNFG_14           0xc7
-#define MAX77759_CHGR_REG_CHG_CNFG_15           0xc8
-#define MAX77759_CHGR_REG_CHG_CNFG_16           0xc9
-#define MAX77759_CHGR_REG_CHG_CNFG_17           0xca
-#define MAX77759_CHGR_REG_CHG_CNFG_18           0xcb
-#define MAX77759_CHGR_REG_CHG_CNFG_19           0xcc
+#define MAX77759_CHGR_REG_CHG_INT                      0xb0
+#define   MAX77759_CHGR_REG_CHG_INT_AICL               BIT(7)
+#define   MAX77759_CHGR_REG_CHG_INT_CHGIN              BIT(6)
+#define   MAX77759_CHGR_REG_CHG_INT_WCIN               BIT(5)
+#define   MAX77759_CHGR_REG_CHG_INT_CHG                BIT(4)
+#define   MAX77759_CHGR_REG_CHG_INT_BAT                BIT(3)
+#define   MAX77759_CHGR_REG_CHG_INT_INLIM              BIT(2)
+#define   MAX77759_CHGR_REG_CHG_INT_THM2               BIT(1)
+#define   MAX77759_CHGR_REG_CHG_INT_BYP                BIT(0)
+#define MAX77759_CHGR_REG_CHG_INT2                     0xb1
+#define   MAX77759_CHGR_REG_CHG_INT2_INSEL             BIT(7)
+#define   MAX77759_CHGR_REG_CHG_INT2_SYS_UVLO1         BIT(6)
+#define   MAX77759_CHGR_REG_CHG_INT2_SYS_UVLO2         BIT(5)
+#define   MAX77759_CHGR_REG_CHG_INT2_BAT_OILO          BIT(4)
+#define   MAX77759_CHGR_REG_CHG_INT2_CHG_STA_CC        BIT(3)
+#define   MAX77759_CHGR_REG_CHG_INT2_CHG_STA_CV        BIT(2)
+#define   MAX77759_CHGR_REG_CHG_INT2_CHG_STA_TO        BIT(1)
+#define   MAX77759_CHGR_REG_CHG_INT2_CHG_STA_DONE      BIT(0)
+#define MAX77759_CHGR_REG_CHG_INT_MASK                 0xb2
+#define MAX77759_CHGR_REG_CHG_INT2_MASK                0xb3
+#define MAX77759_CHGR_REG_CHG_INT_OK                   0xb4
+#define MAX77759_CHGR_REG_CHG_DETAILS_00               0xb5
+#define   MAX77759_CHGR_REG_CHG_DETAILS_00_CHGIN_DTLS  GENMASK(6, 5)
+#define MAX77759_CHGR_REG_CHG_DETAILS_01               0xb6
+#define   MAX77759_CHGR_REG_CHG_DETAILS_01_BAT_DTLS    GENMASK(6, 4)
+#define   MAX77759_CHGR_REG_CHG_DETAILS_01_CHG_DTLS    GENMASK(3, 0)
+#define MAX77759_CHGR_REG_CHG_DETAILS_02               0xb7
+#define   MAX77759_CHGR_REG_CHG_DETAILS_02_CHGIN_STS   BIT(5)
+#define MAX77759_CHGR_REG_CHG_DETAILS_03               0xb8
+#define MAX77759_CHGR_REG_CHG_CNFG_00                  0xb9
+#define   MAX77759_CHGR_REG_CHG_CNFG_00_MODE           GENMASK(3, 0)
+#define MAX77759_CHGR_REG_CHG_CNFG_01                  0xba
+#define MAX77759_CHGR_REG_CHG_CNFG_02                  0xbb
+#define   MAX77759_CHGR_REG_CHG_CNFG_02_CHGCC          GENMASK(5, 0)
+#define MAX77759_CHGR_REG_CHG_CNFG_03                  0xbc
+#define MAX77759_CHGR_REG_CHG_CNFG_04                  0xbd
+#define   MAX77759_CHGR_REG_CHG_CNFG_04_CHG_CV_PRM     GENMASK(5, 0)
+#define MAX77759_CHGR_REG_CHG_CNFG_05                  0xbe
+#define MAX77759_CHGR_REG_CHG_CNFG_06                  0xbf
+#define   MAX77759_CHGR_REG_CHG_CNFG_06_CHGPROT        GENMASK(3, 2)
+#define MAX77759_CHGR_REG_CHG_CNFG_07                  0xc0
+#define MAX77759_CHGR_REG_CHG_CNFG_08                  0xc1
+#define MAX77759_CHGR_REG_CHG_CNFG_09                  0xc2
+#define   MAX77759_CHGR_REG_CHG_CNFG_09_CHGIN_ILIM     GENMASK(6, 0)
+#define MAX77759_CHGR_REG_CHG_CNFG_10                  0xc3
+#define MAX77759_CHGR_REG_CHG_CNFG_11                  0xc4
+#define MAX77759_CHGR_REG_CHG_CNFG_12                  0xc5
+/* Wireless Charging input channel select */
+#define   MAX77759_CHGR_REG_CHG_CNFG_12_WCINSEL        BIT(6)
+/* CHGIN/USB input channel select */
+#define   MAX77759_CHGR_REG_CHG_CNFG_12_CHGINSEL       BIT(5)
+#define MAX77759_CHGR_REG_CHG_CNFG_13                  0xc6
+#define MAX77759_CHGR_REG_CHG_CNFG_14                  0xc7
+#define MAX77759_CHGR_REG_CHG_CNFG_15                  0xc8
+#define MAX77759_CHGR_REG_CHG_CNFG_16                  0xc9
+#define MAX77759_CHGR_REG_CHG_CNFG_17                  0xca
+#define MAX77759_CHGR_REG_CHG_CNFG_18                  0xcb
+#define   MAX77759_CHGR_REG_CHG_CNFG_18_WDTEN          BIT(0)
+#define MAX77759_CHGR_REG_CHG_CNFG_19                  0xcc
 
 /* MaxQ opcodes for max77759_maxq_command() */
 #define MAX77759_MAXQ_OPCODE_MAXLENGTH (MAX77759_MAXQ_REG_AP_DATAOUT32 - \
@@ -101,6 +131,84 @@
 #define MAX77759_MAXQ_OPCODE_USER_SPACE_READ     0x81
 #define MAX77759_MAXQ_OPCODE_USER_SPACE_WRITE    0x82
 
+/*
+ * enum max77759_chgr_chgin_dtls_status - Charger Input Status
+ * @MAX77759_CHGR_CHGIN_DTLS_VBUS_UNDERVOLTAGE:
+ *     Charger input voltage (Vchgin) < Under Voltage Threshold (Vuvlo)
+ * @MAX77759_CHGR_CHGIN_DTLS_VBUS_MARGINAL_VOLTAGE: Vchgin > Vuvlo and
+ *     Vchgin < (Battery Voltage (Vbatt) + system voltage (Vsys))
+ * @MAX77759_CHGR_CHGIN_DTLS_VBUS_OVERVOLTAGE:
+ *     Vchgin > Over Voltage threshold (Vovlo)
+ * @MAX77759_CHGR_CHGIN_DTLS_VBUS_VALID:
+ *     Vchgin > Vuvlo, Vchgin < Vovlo and Vchgin > (Vsys + Vbatt)
+ */
+enum max77759_chgr_chgin_dtls_status {
+	MAX77759_CHGR_CHGIN_DTLS_VBUS_UNDERVOLTAGE,
+	MAX77759_CHGR_CHGIN_DTLS_VBUS_MARGINAL_VOLTAGE,
+	MAX77759_CHGR_CHGIN_DTLS_VBUS_OVERVOLTAGE,
+	MAX77759_CHGR_CHGIN_DTLS_VBUS_VALID,
+};
+
+/*
+ * enum max77759_chgr_bat_dtls_states - Battery Details
+ * @MAX77759_CHGR_BAT_DTLS_NO_BATT_CHG_SUSP:	No battery and the charger suspended
+ * @MAX77759_CHGR_BAT_DTLS_DEAD_BATTERY:	Vbatt < Vtrickle
+ * @MAX77759_CHGR_BAT_DTLS_BAT_CHG_TIMER_FAULT:	Charging suspended due to timer fault
+ * @MAX77759_CHGR_BAT_DTLS_BAT_OKAY:		Battery okay and Vbatt > Min Sys Voltage (Vsysmin)
+ * @MAX77759_CHGR_BAT_DTLS_BAT_UNDERVOLTAGE:	Battery is okay. Vtrickle < Vbatt < Vsysmin
+ * @MAX77759_CHGR_BAT_DTLS_BAT_OVERVOLTAGE:	Battery voltage > Overvoltage threshold
+ * @MAX77759_CHGR_BAT_DTLS_BAT_OVERCURRENT:	Battery current exceeds overcurrent threshold
+ * @MAX77759_CHGR_BAT_DTLS_BAT_ONLY_MODE:	Battery only mode and battery level not available
+ */
+enum max77759_chgr_bat_dtls_states {
+	MAX77759_CHGR_BAT_DTLS_NO_BATT_CHG_SUSP,
+	MAX77759_CHGR_BAT_DTLS_DEAD_BATTERY,
+	MAX77759_CHGR_BAT_DTLS_BAT_CHG_TIMER_FAULT,
+	MAX77759_CHGR_BAT_DTLS_BAT_OKAY,
+	MAX77759_CHGR_BAT_DTLS_BAT_UNDERVOLTAGE,
+	MAX77759_CHGR_BAT_DTLS_BAT_OVERVOLTAGE,
+	MAX77759_CHGR_BAT_DTLS_BAT_OVERCURRENT,
+	MAX77759_CHGR_BAT_DTLS_BAT_ONLY_MODE,
+};
+
+/*
+ * enum max77759_chgr_chg_dtls_states - Charger Details
+ * @MAX77759_CHGR_CHG_DTLS_PREQUAL:		Charger in prequalification mode
+ * @MAX77759_CHGR_CHG_DTLS_CC:			Charger in fast charge const curr mode
+ * @MAX77759_CHGR_CHG_DTLS_CV:			Charger in fast charge const voltage mode
+ * @MAX77759_CHGR_CHG_DTLS_TO:			Charger is in top off mode
+ * @MAX77759_CHGR_CHG_DTLS_DONE:		Charger is done
+ * @MAX77759_CHGR_CHG_DTLS_RSVD_1:		Reserved
+ * @MAX77759_CHGR_CHG_DTLS_TIMER_FAULT:		Charger is in timer fault mode
+ * @MAX77759_CHGR_CHG_DTLS_SUSP_BATT_THM:	Charger is suspended as battery removal detected
+ * @MAX77759_CHGR_CHG_DTLS_OFF:			Charger is off. Input invalid or charger disabled
+ * @MAX77759_CHGR_CHG_DTLS_RSVD_2:		Reserved
+ * @MAX77759_CHGR_CHG_DTLS_RSVD_3:		Reserved
+ * @MAX77759_CHGR_CHG_DTLS_OFF_WDOG_TIMER:	Charger is off as watchdog timer expired
+ * @MAX77759_CHGR_CHG_DTLS_SUSP_JEITA:		Charger is in JEITA control mode
+ */
+enum max77759_chgr_chg_dtls_states {
+	MAX77759_CHGR_CHG_DTLS_PREQUAL,
+	MAX77759_CHGR_CHG_DTLS_CC,
+	MAX77759_CHGR_CHG_DTLS_CV,
+	MAX77759_CHGR_CHG_DTLS_TO,
+	MAX77759_CHGR_CHG_DTLS_DONE,
+	MAX77759_CHGR_CHG_DTLS_RSVD_1,
+	MAX77759_CHGR_CHG_DTLS_TIMER_FAULT,
+	MAX77759_CHGR_CHG_DTLS_SUSP_BATT_THM,
+	MAX77759_CHGR_CHG_DTLS_OFF,
+	MAX77759_CHGR_CHG_DTLS_RSVD_2,
+	MAX77759_CHGR_CHG_DTLS_RSVD_3,
+	MAX77759_CHGR_CHG_DTLS_OFF_WDOG_TIMER,
+	MAX77759_CHGR_CHG_DTLS_SUSP_JEITA,
+};
+
+enum max77759_chgr_mode {
+	MAX77759_CHGR_MODE_OFF,
+	MAX77759_CHGR_MODE_CHG_BUCK_ON = 0x5,
+	MAX77759_CHGR_MODE_OTG_BOOST_ON = 0xA,
+};
+
 /**
  * struct max77759 - core max77759 internal data structure
  *
-- 
cgit v1.2.3


From f23388d0f6523cc3a72edf6e78cb11931a07da10 Mon Sep 17 00:00:00 2001
From: Amit Sunil Dhamne <amitsd@google.com>
Date: Wed, 25 Mar 2026 22:22:25 +0000
Subject: lib/linear_ranges: Add linear_range_get_selector_high_array

Add a helper function to find the selector for a given value in a linear
range array. The selector should be such that the value it represents
should be higher or equal to the given value.

Signed-off-by: Amit Sunil Dhamne <amitsd@google.com>
Reviewed-by: Matti Vaittinen <mazziesaccount@gmail.com>
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://patch.msgid.link/20260325-max77759-charger-v9-4-4486dd297adc@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/linear_range.h |  3 +++
 lib/linear_ranges.c          | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/linear_range.h b/include/linux/linear_range.h
index 2e4f4c3539c0..0f3037f1a94f 100644
--- a/include/linux/linear_range.h
+++ b/include/linux/linear_range.h
@@ -57,5 +57,8 @@ void linear_range_get_selector_within(const struct linear_range *r,
 int linear_range_get_selector_low_array(const struct linear_range *r,
 					int ranges, unsigned int val,
 					unsigned int *selector, bool *found);
+int linear_range_get_selector_high_array(const struct linear_range *r,
+					 int ranges, unsigned int val,
+					 unsigned int *selector, bool *found);
 
 #endif
diff --git a/lib/linear_ranges.c b/lib/linear_ranges.c
index a1a7dfa881de..c85583678f6b 100644
--- a/lib/linear_ranges.c
+++ b/lib/linear_ranges.c
@@ -241,6 +241,42 @@ int linear_range_get_selector_high(const struct linear_range *r,
 }
 EXPORT_SYMBOL_GPL(linear_range_get_selector_high);
 
+/**
+ * linear_range_get_selector_high_array - return linear range selector for value
+ * @r:		pointer to array of linear ranges where selector is looked from
+ * @ranges:	amount of ranges to scan from array
+ * @val:	value for which the selector is searched
+ * @selector:	address where found selector value is updated
+ * @found:	flag to indicate that given value was in the range
+ *
+ * Scan array of ranges for selector for which range value matches given
+ * input value. Value is matching if it is equal or higher than given value
+ * If given value is found to be in a range scanning is stopped and @found is
+ * set true. If a range with values greater than given value is found
+ * but the range min is being greater than given value, then the range's
+ * lowest selector is updated to @selector and scanning is stopped.
+ *
+ * Return: 0 on success, -EINVAL if range array is invalid or does not contain
+ * range with a value greater or equal to given value
+ */
+int linear_range_get_selector_high_array(const struct linear_range *r,
+					 int ranges, unsigned int val,
+					 unsigned int *selector, bool *found)
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < ranges; i++) {
+		ret = linear_range_get_selector_high(&r[i], val, selector,
+						     found);
+		if (!ret)
+			return 0;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(linear_range_get_selector_high_array);
+
 /**
  * linear_range_get_selector_within - return linear range selector for value
  * @r:		pointer to linear range where selector is looked from
-- 
cgit v1.2.3


From 68a66a44af6e196ca426d1250104d3018ed9e74b Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Date: Sun, 25 Jan 2026 13:30:03 +0200
Subject: soc: qcom: ubwc: add helper to get min_acc length

MDSS and GPU drivers use different approaches to get min_acc length.
Add helper function that can be used by all the drivers.

The helper reflects our current best guess, it blindly copies the
approach adopted by the MDSS drivers and it matches current values
selected by the GPU driver.

Reviewed-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Acked-by: Bjorn Andersson <andersson@kernel.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Reviewed-by: Dikshita Agarwal <dikshita.agarwal@oss.qualcomm.com>
Tested-by: Wangao Wang <wangao.wang@oss.qualcomm.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20260125-iris-ubwc-v4-1-1ff30644ac81@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/soc/qcom/ubwc.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/qcom/ubwc.h b/include/linux/soc/qcom/ubwc.h
index f052e241736c..5bdeca18d54d 100644
--- a/include/linux/soc/qcom/ubwc.h
+++ b/include/linux/soc/qcom/ubwc.h
@@ -74,4 +74,14 @@ static inline bool qcom_ubwc_get_ubwc_mode(const struct qcom_ubwc_cfg_data *cfg)
 	return ret;
 }
 
+/*
+ * This is the best guess, based on the MDSS driver, which worked so far.
+ */
+static inline bool qcom_ubwc_min_acc_length_64b(const struct qcom_ubwc_cfg_data *cfg)
+{
+	return cfg->ubwc_enc_version == UBWC_1_0 &&
+		(cfg->ubwc_dec_version == UBWC_2_0 ||
+		 cfg->ubwc_dec_version == UBWC_3_0);
+}
+
 #endif /* __QCOM_UBWC_H__ */
-- 
cgit v1.2.3


From b2571ef8d4ec9bb636889a9132090bcc3449792e Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Date: Sun, 25 Jan 2026 13:30:04 +0200
Subject: soc: qcom: ubwc: add helpers to get programmable values

Currently the database stores macrotile_mode in the data. However it
can be derived from the rest of the data: it should be used for UBWC
encoding >= 3.0 except for several corner cases (SM8150 and SC8180X).

The ubwc_bank_spread field seems to be based on the impreside data we
had for the MDSS and DPU programming. In some cases UBWC engine inside
the display controller doesn't need to program it, although bank spread
is to be enabled.

Bank swizzle is also currently stored as is, but it is almost standard
(banks 1-3 for UBWC 1.0 and 2-3 for other versions), the only exception
being Lemans (it uses only bank 3).

Add helpers returning values from the config for now. They will be
rewritten later, in a separate series, but having the helper now
simplifies refacroring the code later.

Tested-by: Wangao Wang <wangao.wang@oss.qualcomm.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20260125-iris-ubwc-v4-2-1ff30644ac81@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/soc/qcom/ubwc.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/qcom/ubwc.h b/include/linux/soc/qcom/ubwc.h
index 5bdeca18d54d..f5d0e2341261 100644
--- a/include/linux/soc/qcom/ubwc.h
+++ b/include/linux/soc/qcom/ubwc.h
@@ -84,4 +84,19 @@ static inline bool qcom_ubwc_min_acc_length_64b(const struct qcom_ubwc_cfg_data
 		 cfg->ubwc_dec_version == UBWC_3_0);
 }
 
+static inline bool qcom_ubwc_macrotile_mode(const struct qcom_ubwc_cfg_data *cfg)
+{
+	return cfg->macrotile_mode;
+}
+
+static inline bool qcom_ubwc_bank_spread(const struct qcom_ubwc_cfg_data *cfg)
+{
+	return cfg->ubwc_bank_spread;
+}
+
+static inline u32 qcom_ubwc_swizzle(const struct qcom_ubwc_cfg_data *cfg)
+{
+	return cfg->ubwc_swizzle;
+}
+
 #endif /* __QCOM_UBWC_H__ */
-- 
cgit v1.2.3


From c6f4e552e1eae4a5726230254108213b085e1ae3 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 15 Nov 2025 19:07:41 -0800
Subject: rcutorture: Add a textbook-style trivial preemptible RCU

This commit adds a trivial textbook implementation of preemptible RCU
to rcutorture ("torture_type=trivial-preempt"), similar in spirit to the
existing "torture_type=trivial" textbook implementation of non-preemptible
RCU.  Neither trivial RCU implementation has any value for production use,
and are intended only to keep Paul honest in his introductory writings
and presentations.

[ paulmck: Apply kernel test robot feedback. ]

Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 include/linux/sched.h                              |  4 ++
 kernel/rcu/Kconfig.debug                           | 11 +++++
 kernel/rcu/rcu.h                                   |  4 ++
 kernel/rcu/rcutorture.c                            | 57 +++++++++++++++++++++-
 kernel/rcu/update.c                                | 22 +++++++++
 .../rcutorture/configs/rcu/TRIVIAL-PREEMPT         | 12 +++++
 .../rcutorture/configs/rcu/TRIVIAL-PREEMPT.boot    |  3 ++
 7 files changed, 112 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT
 create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT.boot

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a5d3dbc9cdf..ffb2ad9716f0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -949,6 +949,10 @@ struct task_struct {
 	struct srcu_ctr __percpu	*trc_reader_scp;
 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
 
+#ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+	int				rcu_trivial_preempt_nesting;
+#endif /* #ifdef CONFIG_TRIVIAL_PREEMPT_RCU */
+
 	struct sched_info		sched_info;
 
 	struct list_head		tasks;
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 625d75392647..e078e988773d 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -228,4 +228,15 @@ config RCU_DYNTICKS_TORTURE
 
 	  This has no value for production and is only for testing.
 
+config TRIVIAL_PREEMPT_RCU
+	bool "Textbook trivial preemptible RCU in rcutorture"
+	depends on RCU_EXPERT && RCU_TORTURE_TEST
+	default n
+	help
+	  This option enables a textbook preemptible RCU that is
+	  implemented in rcutorture.  Its sole purpose is to validate
+	  code used in books, papers, and presentations.
+
+	  This has no value for production and is only for testing.
+
 endmenu # "RCU Debugging"
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 9b10b57b79ad..fa6d30ce73d1 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -691,4 +691,8 @@ int rcu_stall_notifier_call_chain(unsigned long val, void *v);
 static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return NOTIFY_DONE; }
 #endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
 
+#ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+void synchronize_rcu_trivial_preempt(void);
+#endif // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+
 #endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 8a9282a0245c..3c272413666b 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1061,6 +1061,61 @@ static struct rcu_torture_ops trivial_ops = {
 	.name		= "trivial"
 };
 
+#ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+
+/*
+ * Definitions for trivial CONFIG_PREEMPT=y torture testing.  This
+ * implementation does not work well with large numbers of tasks or with
+ * long-term preemption.  Either or both get you RCU CPU stall warnings.
+ */
+
+static void rcu_sync_torture_init_trivial_preempt(void)
+{
+	rcu_sync_torture_init();
+	if (WARN_ONCE(onoff_interval || shuffle_interval, "%s: Non-zero onoff_interval (%d) or shuffle_interval (%d) breaks trivial RCU, resetting to zero", __func__, onoff_interval, shuffle_interval)) {
+		onoff_interval = 0;
+		shuffle_interval = 0;
+	}
+}
+
+static int rcu_torture_read_lock_trivial_preempt(void)
+{
+	struct task_struct *t = current;
+
+	WRITE_ONCE(t->rcu_trivial_preempt_nesting, t->rcu_trivial_preempt_nesting + 1);
+	smp_mb();
+	return 0;
+}
+
+static void rcu_torture_read_unlock_trivial_preempt(int idx)
+{
+	struct task_struct *t = current;
+
+	smp_store_release(&t->rcu_trivial_preempt_nesting, t->rcu_trivial_preempt_nesting - 1);
+}
+
+static struct rcu_torture_ops trivial_preempt_ops = {
+	.ttype		= RCU_TRIVIAL_FLAVOR,
+	.init		= rcu_sync_torture_init_trivial_preempt,
+	.readlock	= rcu_torture_read_lock_trivial_preempt,
+	.read_delay	= rcu_read_delay,  // just reuse rcu's version.
+	.readunlock	= rcu_torture_read_unlock_trivial_preempt,
+	.readlock_held	= torture_readlock_not_held,
+	.get_gp_seq	= rcu_no_completed,
+	.sync		= synchronize_rcu_trivial_preempt,
+	.exp_sync	= synchronize_rcu_trivial_preempt,
+	.irq_capable	= 0, // In theory it should be, but let's keep it trivial.
+	.name		= "trivial-preempt"
+};
+
+#define TRIVIAL_PREEMPT_OPS &trivial_preempt_ops,
+
+#else // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+
+#define TRIVIAL_PREEMPT_OPS
+
+#endif // #else // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+
 #ifdef CONFIG_TASKS_RCU
 
 /*
@@ -4449,7 +4504,7 @@ rcu_torture_init(void)
 	static struct rcu_torture_ops *torture_ops[] = {
 		&rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, &busted_srcud_ops,
 		TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS
-		&trivial_ops,
+		&trivial_ops, TRIVIAL_PREEMPT_OPS
 	};
 
 	if (!torture_init_begin(torture_type, verbose))
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index d98a5c38e19c..b62735a67884 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -538,6 +538,28 @@ long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool do
 EXPORT_SYMBOL_GPL(torture_sched_setaffinity);
 #endif
 
+#if IS_ENABLED(CONFIG_TRIVIAL_PREEMPT_RCU)
+// Trivial and stupid grace-period wait.  Defined here so that lockdep
+// kernels can find tasklist_lock.
+void synchronize_rcu_trivial_preempt(void)
+{
+	struct task_struct *g;
+	struct task_struct *t;
+
+	smp_mb(); // Order prior accesses before grace-period start.
+	rcu_read_lock(); // Protect task list.
+	for_each_process_thread(g, t) {
+		if (t == current)
+			continue;  // Don't deadlock on ourselves!
+		// Order later rcu_read_lock() on other tasks after QS.
+		while (smp_load_acquire(&t->rcu_trivial_preempt_nesting))
+			continue;
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_trivial_preempt);
+#endif // #if IS_ENABLED(CONFIG_TRIVIAL_PREEMPT_RCU)
+
 int rcu_cpu_stall_notifiers __read_mostly; // !0 = provide stall notifiers (rarely useful)
 EXPORT_SYMBOL_GPL(rcu_cpu_stall_notifiers);
 
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT
new file mode 100644
index 000000000000..8230b14bfe68
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT
@@ -0,0 +1,12 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_TRIVIAL_PREEMPT_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT.boot
new file mode 100644
index 000000000000..299cd3a12df6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT.boot
@@ -0,0 +1,3 @@
+rcutorture.torture_type=trivial-preempt
+rcutorture.onoff_interval=0
+rcutorture.shuffle_interval=0
-- 
cgit v1.2.3


From d978d3fc0488691f3b10919594d1d7d465fa568b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 27 Jan 2026 15:24:24 -0800
Subject: srcu: Fix SRCU read flavor macro comments

The SRCU_READ_FLAVOR_FAST and SRCU_READ_FLAVOR_FAST_UPDOWN comments
need repair.  The former fails to not that SRCU-fast can be used in NMI
handlers, and the latter says that it goes with srcu_read_lock_fast()
when it really goes with srcu_read_lock_fast_updown().  This commit
therefore fixes both comments.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 include/linux/srcu.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index bb44a0bd7696..81b1938512d5 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -69,8 +69,8 @@ int init_srcu_struct_fast_updown(struct srcu_struct *ssp);
 #define SRCU_READ_FLAVOR_NORMAL		0x1		// srcu_read_lock().
 #define SRCU_READ_FLAVOR_NMI		0x2		// srcu_read_lock_nmisafe().
 //					0x4		// SRCU-lite is no longer with us.
-#define SRCU_READ_FLAVOR_FAST		0x4		// srcu_read_lock_fast().
-#define SRCU_READ_FLAVOR_FAST_UPDOWN	0x8		// srcu_read_lock_fast().
+#define SRCU_READ_FLAVOR_FAST		0x4		// srcu_read_lock_fast(), also NMI-safe.
+#define SRCU_READ_FLAVOR_FAST_UPDOWN	0x8		// srcu_read_lock_fast_updown().
 #define SRCU_READ_FLAVOR_ALL		(SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \
 					 SRCU_READ_FLAVOR_FAST | SRCU_READ_FLAVOR_FAST_UPDOWN)
 						// All of the above.
-- 
cgit v1.2.3


From 4968907016c2a54800a67273b92b3b66245bd372 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 6 Jan 2026 10:28:10 -0800
Subject: srcu: Fix s/they disables/they disable/ typo in
 srcu_read_unlock_fast()

Typo fix in srcu_read_unlock_fast() header comment.

Reported-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 include/linux/srcutree.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index be76fa4fc170..fd1a9270cb9a 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -260,7 +260,7 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss
  * srcu_read_unlock_fast().
  *
  * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side
- * critical sections either because they disables interrupts, because
+ * critical sections either because they disable interrupts, because
  * they are a single instruction, or because they are read-modify-write
  * atomic operations, depending on the whims of the architecture.
  * This matters because the SRCU-fast grace-period mechanism uses either
-- 
cgit v1.2.3


From ad6ef775cbefffd6c614dfc57429c364192b5de0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 14 Jan 2026 16:18:30 -0800
Subject: rcu-tasks: Document that RCU Tasks Trace grace periods now imply RCU
 grace periods

Now that RCU Tasks Trace is implemented in terms of SRCU-fast, the fact
that each SRCU-fast grace period implies at least two RCU grace periods
in turn means that each RCU Tasks Trace grace period implies at least
two grace periods.  This commit therefore updates the documentation
accordingly.

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Reported-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 Documentation/RCU/Design/Requirements/Requirements.rst | 7 +++++++
 include/linux/rcupdate.h                               | 9 +++------
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
index b5cdbba3ec2e..4d886e7c7a95 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -2787,6 +2787,13 @@ which avoids the read-side memory barriers, at least for architectures
 that apply noinstr to kernel entry/exit code (or that build with
 ``CONFIG_TASKS_TRACE_RCU_NO_MB=y``.
 
+Now that the implementation is based on SRCU-fast, a call
+to synchronize_rcu_tasks_trace() implies at least one call to
+synchronize_rcu(), that is, every Tasks Trace RCU grace period contains
+at least one plain vanilla RCU grace period.  Should there ever
+be a synchronize_rcu_tasks_trace_expedited(), this guarantee would
+*not* necessarily apply to this hypothetical API member.
+
 The tasks-trace-RCU API is also reasonably compact,
 consisting of rcu_read_lock_trace(), rcu_read_unlock_trace(),
 rcu_read_lock_trace_held(), call_rcu_tasks_trace(),
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 04f3f86a4145..18a85c30fd4f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -208,12 +208,9 @@ static inline void exit_tasks_rcu_finish(void) { }
 /**
  * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period?
  *
- * As an accident of implementation, an RCU Tasks Trace grace period also
- * acts as an RCU grace period.  However, this could change at any time.
- * Code relying on this accident must call this function to verify that
- * this accident is still happening.
- *
- * You have been warned!
+ * Now that RCU Tasks Trace is implemented in terms of SRCU-fast, a
+ * call to synchronize_rcu_tasks_trace() is guaranteed to imply at least
+ * one call to synchronize_rcu().
  */
 static inline bool rcu_trace_implies_rcu_gp(void) { return true; }
 
-- 
cgit v1.2.3


From 6e39ba4e5a82aa5469b2ac517b74a71accb0540f Mon Sep 17 00:00:00 2001
From: Pierre Gondois <pierre.gondois@arm.com>
Date: Thu, 26 Mar 2026 21:44:01 +0100
Subject: cpufreq: Add boost_freq_req QoS request

The Power Management Quality of Service (PM QoS) allows to
aggregate constraints from multiple entities. It is currently
used to manage the min/max frequency of a given policy.

Frequency constraints can come for instance from:
 - Thermal framework: acpi_thermal_cpufreq_init()
 - Firmware: _PPC objects: acpi_processor_ppc_init()
 - User: by setting policyX/scaling_[min|max]_freq
The minimum of the max frequency constraints is used to compute
the resulting maximum allowed frequency.

When enabling boost frequencies, the same frequency request object
(policy->max_freq_req) as to handle requests from users is used.
As a result, when setting:
 - scaling_max_freq
 - boost
The last sysfs file used overwrites the request from the other
sysfs file.

To avoid this, create a per-policy boost_freq_req to save the boost
constraints instead of overwriting the last scaling_max_freq
constraint.

policy_set_boost() calls the cpufreq set_boost callback.
Update the newly added boost_freq_req request from there:
 - whenever boost is toggled
 - to cover all possible paths

In the existing .set_boost() callbacks:
 - Don't update policy->max as this is done through the qos notifier
   cpufreq_notifier_max() which calls cpufreq_set_policy().
 - Remove freq_qos_update_request() calls as the qos request is now
   done in policy_set_boost() and updates the new boost_freq_req

$ ## Init state
scaling_max_freq:1000000
cpuinfo_max_freq:1000000

$ echo 700000 > scaling_max_freq
scaling_max_freq:700000
cpuinfo_max_freq:1000000

$ echo 1 > ../boost
scaling_max_freq:1200000
cpuinfo_max_freq:1200000

$ echo 800000 > scaling_max_freq
scaling_max_freq:800000
cpuinfo_max_freq:1200000

$ ## Final step:
$ ## Without the patches:
$ echo 0 > ../boost
scaling_max_freq:1000000
cpuinfo_max_freq:1000000

$ ## With the patches:
$ echo 0 > ../boost
scaling_max_freq:800000
cpuinfo_max_freq:1000000

Note:
cpufreq_frequency_table_cpuinfo() updates policy->min
and max from:
A.
cpufreq_boost_set_sw()
\-cpufreq_frequency_table_cpuinfo()
B.
cpufreq_policy_online()
\-cpufreq_table_validate_and_sort()
  \-cpufreq_frequency_table_cpuinfo()
Keep these updates as some drivers expect policy->min and
max to be set through B.

Reviewed-by: Lifeng Zheng <zhenglifeng1@huawei.com>
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://patch.msgid.link/20260326204404.1401849-3-pierre.gondois@arm.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/amd-pstate.c   |  2 --
 drivers/cpufreq/cppc_cpufreq.c | 10 ++-------
 drivers/cpufreq/cpufreq.c      | 46 ++++++++++++++++++++++++++++--------------
 include/linux/cpufreq.h        |  1 +
 4 files changed, 34 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 5aa9fcd80cf5..d0675d6a19fe 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -769,8 +769,6 @@ static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
 	else if (policy->cpuinfo.max_freq > nominal_freq)
 		policy->cpuinfo.max_freq = nominal_freq;
 
-	policy->max = policy->cpuinfo.max_freq;
-
 	if (cppc_state == AMD_PSTATE_PASSIVE) {
 		ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq);
 		if (ret < 0)
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 011f35cb47b9..f4f574fbe547 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -807,17 +807,11 @@ static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
 {
 	struct cppc_cpudata *cpu_data = policy->driver_data;
 	struct cppc_perf_caps *caps = &cpu_data->perf_caps;
-	int ret;
 
 	if (state)
-		policy->max = cppc_perf_to_khz(caps, caps->highest_perf);
+		policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->highest_perf);
 	else
-		policy->max = cppc_perf_to_khz(caps, caps->nominal_perf);
-	policy->cpuinfo.max_freq = policy->max;
-
-	ret = freq_qos_update_request(policy->max_freq_req, policy->max);
-	if (ret < 0)
-		return ret;
+		policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->nominal_perf);
 
 	return 0;
 }
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b127f5cb682c..c0aa970c7a67 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -609,10 +609,19 @@ static int policy_set_boost(struct cpufreq_policy *policy, bool enable)
 	policy->boost_enabled = enable;
 
 	ret = cpufreq_driver->set_boost(policy, enable);
-	if (ret)
+	if (ret) {
 		policy->boost_enabled = !policy->boost_enabled;
+		return ret;
+	}
 
-	return ret;
+	ret = freq_qos_update_request(policy->boost_freq_req, policy->cpuinfo.max_freq);
+	if (ret < 0) {
+		policy->boost_enabled = !policy->boost_enabled;
+		cpufreq_driver->set_boost(policy, policy->boost_enabled);
+		return ret;
+	}
+
+	return 0;
 }
 
 static ssize_t store_local_boost(struct cpufreq_policy *policy,
@@ -1377,6 +1386,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
 	}
 
 	freq_qos_remove_request(policy->min_freq_req);
+	freq_qos_remove_request(policy->boost_freq_req);
 	kfree(policy->min_freq_req);
 
 	cpufreq_policy_put_kobj(policy);
@@ -1442,26 +1452,38 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy,
 	cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
 
 	if (new_policy) {
+		unsigned int count;
+
 		for_each_cpu(j, policy->related_cpus) {
 			per_cpu(cpufreq_cpu_data, j) = policy;
 			add_cpu_dev_symlink(policy, j, get_cpu_device(j));
 		}
 
-		policy->min_freq_req = kzalloc(2 * sizeof(*policy->min_freq_req),
+		count = policy->boost_supported ? 3 : 2;
+		policy->min_freq_req = kzalloc(count * sizeof(*policy->min_freq_req),
 					       GFP_KERNEL);
 		if (!policy->min_freq_req) {
 			ret = -ENOMEM;
 			goto out_destroy_policy;
 		}
 
+		if (policy->boost_supported) {
+			policy->boost_freq_req = policy->min_freq_req + 2;
+
+			ret = freq_qos_add_request(&policy->constraints,
+						   policy->boost_freq_req,
+						   FREQ_QOS_MAX,
+						   policy->cpuinfo.max_freq);
+			if (ret < 0) {
+				policy->boost_freq_req = NULL;
+				goto out_destroy_policy;
+			}
+		}
+
 		ret = freq_qos_add_request(&policy->constraints,
 					   policy->min_freq_req, FREQ_QOS_MIN,
 					   FREQ_QOS_MIN_DEFAULT_VALUE);
 		if (ret < 0) {
-			/*
-			 * So we don't call freq_qos_remove_request() for an
-			 * uninitialized request.
-			 */
 			kfree(policy->min_freq_req);
 			policy->min_freq_req = NULL;
 			goto out_destroy_policy;
@@ -2785,16 +2807,10 @@ int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state)
 		return -ENXIO;
 
 	ret = cpufreq_frequency_table_cpuinfo(policy);
-	if (ret) {
+	if (ret)
 		pr_err("%s: Policy frequency update failed\n", __func__);
-		return ret;
-	}
-
-	ret = freq_qos_update_request(policy->max_freq_req, policy->max);
-	if (ret < 0)
-		return ret;
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(cpufreq_boost_set_sw);
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 8ca2bcb3d7ae..b6f6c7d06912 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -81,6 +81,7 @@ struct cpufreq_policy {
 	struct freq_constraints	constraints;
 	struct freq_qos_request	*min_freq_req;
 	struct freq_qos_request	*max_freq_req;
+	struct freq_qos_request *boost_freq_req;
 
 	struct cpufreq_frequency_table	*freq_table;
 	enum cpufreq_table_sorting freq_table_sorted;
-- 
cgit v1.2.3


From 5de7bcaadf160c1716b20a263cf8f5b06f658959 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 30 Mar 2026 13:11:07 -0700
Subject: x86: rename and clean up __copy_from_user_inatomic_nocache()

Similarly to the previous commit, this renames the somewhat confusingly
named function.  But in this case, it was at least less confusing: the
__copy_from_user_inatomic_nocache is indeed copying from user memory,
and it is indeed ok to be used in an atomic context, so it will not warn
about it.

But the previous commit also removed the NTB mis-use of the
__copy_from_user_inatomic_nocache() function, and as a result every
call-site is now _actually_ doing a real user copy.  That means that we
can now do the proper user pointer verification too.

End result: add proper address checking, remove the double underscores,
and change the "nocache" to "nontemporal" to more accurately describe
what this x86-only function actually does.  It might be worth noting
that only the target is non-temporal: the actual user accesses are
normal memory accesses.

Also worth noting is that non-x86 targets (and on older 32-bit x86 CPU's
before XMM2 in the Pentium III) we end up just falling back on a regular
user copy, so nothing can actually depend on the non-temporal semantics,
but that has always been true.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/uaccess.h    |  2 +-
 arch/x86/include/asm/uaccess_32.h |  8 +-------
 arch/x86/include/asm/uaccess_64.h |  3 ++-
 arch/x86/lib/usercopy_32.c        |  9 +++++----
 drivers/gpu/drm/i915/i915_gem.c   |  2 +-
 drivers/gpu/drm/qxl/qxl_ioctl.c   |  2 +-
 include/linux/uaccess.h           | 11 ++++++++---
 lib/iov_iter.c                    |  2 +-
 8 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 367297b188c3..3a0dd3c2b233 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -507,7 +507,7 @@ extern struct movsl_mask {
 } ____cacheline_aligned_in_smp movsl_mask;
 #endif
 
-#define ARCH_HAS_NOCACHE_UACCESS 1
+#define ARCH_HAS_NONTEMPORAL_UACCESS 1
 
 /*
  * The "unsafe" user accesses aren't really "unsafe", but the naming
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 40379a1adbb8..fff19e73ccb3 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -26,13 +26,7 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n)
 	return __copy_user_ll(to, (__force const void *)from, n);
 }
 
-static __always_inline unsigned long
-__copy_from_user_inatomic_nocache(void *to, const void __user *from,
-				  unsigned long n)
-{
-       return __copy_from_user_ll_nocache_nozero(to, from, n);
-}
-
+unsigned long __must_check copy_from_user_inatomic_nontemporal(void *, const void __user *, unsigned long n);
 unsigned long __must_check clear_user(void __user *mem, unsigned long len);
 unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
 
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index c4b45673e9b8..859d99c31c29 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -152,11 +152,12 @@ extern size_t copy_to_nontemporal(void *dst, const void *src, size_t size);
 extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size);
 
 static inline int
-__copy_from_user_inatomic_nocache(void *dst, const void __user *src,
+copy_from_user_inatomic_nontemporal(void *dst, const void __user *src,
 				  unsigned size)
 {
 	long ret;
 	kasan_check_write(dst, size);
+	src = mask_user_address(src);
 	stac();
 	ret = copy_to_nontemporal(dst, (__force const void *)src, size);
 	clac();
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index f6f436f1d573..ac27e39fc993 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -322,10 +322,11 @@ unsigned long __copy_user_ll(void *to, const void *from, unsigned long n)
 }
 EXPORT_SYMBOL(__copy_user_ll);
 
-unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
+unsigned long copy_from_user_inatomic_nontemporal(void *to, const void __user *from,
 					unsigned long n)
 {
-	__uaccess_begin_nospec();
+	if (!user_access_begin(from, n))
+		return n;
 #ifdef CONFIG_X86_INTEL_USERCOPY
 	if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
 		n = __copy_user_intel_nocache(to, from, n);
@@ -334,7 +335,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
 #else
 	__copy_user(to, from, n);
 #endif
-	__uaccess_end();
+	user_access_end();
 	return n;
 }
-EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
+EXPORT_SYMBOL(copy_from_user_inatomic_nontemporal);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4c82c9544b93..72fe91ed1c74 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -520,7 +520,7 @@ ggtt_write(struct io_mapping *mapping,
 
 	/* We can use the cpu mem copy function because this is X86. */
 	vaddr = io_mapping_map_atomic_wc(mapping, base);
-	unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
+	unwritten = copy_from_user_inatomic_nontemporal((void __force *)vaddr + offset,
 						      user_data, length);
 	io_mapping_unmap_atomic(vaddr);
 	if (unwritten) {
diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index 336cbff26089..26545a08cdf7 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -184,7 +184,7 @@ static int qxl_process_single_command(struct qxl_device *qdev,
 
 	/* TODO copy slow path code from i915 */
 	fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_MASK));
-	unwritten = __copy_from_user_inatomic_nocache
+	unwritten = copy_from_user_inatomic_nontemporal
 		(fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_MASK),
 		 u64_to_user_ptr(cmd->command), cmd->command_size);
 
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 1f3804245c06..4c7d0b815093 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -331,16 +331,21 @@ static inline size_t probe_subpage_writeable(char __user *uaddr, size_t size)
 
 #endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */
 
-#ifndef ARCH_HAS_NOCACHE_UACCESS
+#ifndef ARCH_HAS_NONTEMPORAL_UACCESS
 
 static inline __must_check unsigned long
-__copy_from_user_inatomic_nocache(void *to, const void __user *from,
+copy_from_user_inatomic_nontemporal(void *to, const void __user *from,
 				  unsigned long n)
 {
+	if (can_do_masked_user_access())
+		from = mask_user_address(from);
+	else
+		if (!access_ok(from, n))
+			return n;
 	return __copy_from_user_inatomic(to, from, n);
 }
 
-#endif		/* ARCH_HAS_NOCACHE_UACCESS */
+#endif		/* ARCH_HAS_NONTEMPORAL_UACCESS */
 
 extern __must_check int check_zeroed_user(const void __user *from, size_t size);
 
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 896760bad455..b3a7642ced4f 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -277,7 +277,7 @@ static __always_inline
 size_t copy_from_user_iter_nocache(void __user *iter_from, size_t progress,
 				   size_t len, void *to, void *priv2)
 {
-	return __copy_from_user_inatomic_nocache(to + progress, iter_from, len);
+	return copy_from_user_inatomic_nontemporal(to + progress, iter_from, len);
 }
 
 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
-- 
cgit v1.2.3


From 555aa178f8d22261d71da74df6267e6e6e97f95a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 27 Mar 2026 17:55:08 +0100
Subject: vfio: unhide vdev->debug_root

When debugfs is disabled, the hisilicon driver now fails to build:

drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c: In function 'hisi_acc_vfio_debug_init':
drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c:1671:62: error: 'struct vfio_device' has no member named 'debug_root'
 1671 |         vfio_dev_migration = debugfs_lookup("migration", vdev->debug_root);
      |                                                              ^~

The driver otherwise relies on dead-code elimination, but this reference
fails. The single struct member is not going to make much of a difference
for memory consumption, so just keep this visible unconditionally.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: b398f91779b8 ("hisi_acc_vfio_pci: register debugfs for hisilicon migration driver")
Link: https://lore.kernel.org/r/20260327165521.3779707-1-arnd@kernel.org
Signed-off-by: Alex Williamson <alex@shazbot.org>
---
 include/linux/vfio.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 50b474334a19..31b826efba00 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -74,13 +74,11 @@ struct vfio_device {
 	u8 iommufd_attached:1;
 #endif
 	u8 cdev_opened:1;
-#ifdef CONFIG_DEBUG_FS
 	/*
 	 * debug_root is a static property of the vfio_device
 	 * which must be set prior to registering the vfio_device.
 	 */
 	struct dentry *debug_root;
-#endif
 };
 
 /**
-- 
cgit v1.2.3


From 331e5fd5bfd7aae3ab4eb947367b9d609ebb3fb3 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Date: Thu, 26 Mar 2026 10:30:00 +0100
Subject: hwmon: (ina2xx) drop unused platform data

Nobody defines struct ina2xx_platform_data. Remove platform data support
from the drivers which still have it (it's effectively dead code) and
remove the header.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Acked-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://lore.kernel.org/r/20260326-drop-ina2xx-pdata-v1-1-c159437bb2df@oss.qualcomm.com
[groeck: Fixed continuation line alignment]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ina209.c               | 11 ++---------
 drivers/iio/adc/ina2xx-adc.c         | 14 ++------------
 include/linux/platform_data/ina2xx.h | 16 ----------------
 3 files changed, 4 insertions(+), 37 deletions(-)
 delete mode 100644 include/linux/platform_data/ina2xx.h

(limited to 'include/linux')

diff --git a/drivers/hwmon/ina209.c b/drivers/hwmon/ina209.c
index bd7b3380d847..a116f1600e81 100644
--- a/drivers/hwmon/ina209.c
+++ b/drivers/hwmon/ina209.c
@@ -27,8 +27,6 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 
-#include <linux/platform_data/ina2xx.h>
-
 /* register definitions */
 #define INA209_CONFIGURATION		0x00
 #define INA209_STATUS			0x01
@@ -487,7 +485,6 @@ static void ina209_restore_conf(struct i2c_client *client,
 static int ina209_init_client(struct i2c_client *client,
 			      struct ina209_data *data)
 {
-	struct ina2xx_platform_data *pdata = dev_get_platdata(&client->dev);
 	u32 shunt;
 	int reg;
 
@@ -501,12 +498,8 @@ static int ina209_init_client(struct i2c_client *client,
 		return reg;
 	data->config_orig = reg;
 
-	if (pdata) {
-		if (pdata->shunt_uohms <= 0)
-			return -EINVAL;
-		shunt = pdata->shunt_uohms;
-	} else if (!of_property_read_u32(client->dev.of_node, "shunt-resistor",
-					 &shunt)) {
+	if (!of_property_read_u32(client->dev.of_node, "shunt-resistor",
+				  &shunt)) {
 		if (shunt == 0)
 			return -EINVAL;
 	} else {
diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c
index 857e1b69d6cd..c6cded508738 100644
--- a/drivers/iio/adc/ina2xx-adc.c
+++ b/drivers/iio/adc/ina2xx-adc.c
@@ -33,8 +33,6 @@
 #include <linux/sched/task.h>
 #include <linux/util_macros.h>
 
-#include <linux/platform_data/ina2xx.h>
-
 /* INA2XX registers definition */
 #define INA2XX_CONFIG                   0x00
 #define INA2XX_SHUNT_VOLTAGE            0x01	/* readonly */
@@ -980,16 +978,8 @@ static int ina2xx_probe(struct i2c_client *client)
 
 	mutex_init(&chip->state_lock);
 
-	if (of_property_read_u32(client->dev.of_node,
-				 "shunt-resistor", &val) < 0) {
-		struct ina2xx_platform_data *pdata =
-		    dev_get_platdata(&client->dev);
-
-		if (pdata)
-			val = pdata->shunt_uohms;
-		else
-			val = INA2XX_RSHUNT_DEFAULT;
-	}
+	if (of_property_read_u32(client->dev.of_node, "shunt-resistor", &val) < 0)
+		val = INA2XX_RSHUNT_DEFAULT;
 
 	ret = set_shunt_resistor(chip, val);
 	if (ret)
diff --git a/include/linux/platform_data/ina2xx.h b/include/linux/platform_data/ina2xx.h
deleted file mode 100644
index 2aa5ee9a9050..000000000000
--- a/include/linux/platform_data/ina2xx.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Driver for Texas Instruments INA219, INA226 power monitor chips
- *
- * Copyright (C) 2012 Lothar Felten <lothar.felten@gmail.com>
- *
- * For further information, see the Documentation/hwmon/ina2xx.rst file.
- */
-
-/**
- * struct ina2xx_platform_data - ina2xx info
- * @shunt_uohms		shunt resistance in microohms
- */
-struct ina2xx_platform_data {
-	long shunt_uohms;
-};
-- 
cgit v1.2.3


From e7fef85039ccdba67d97b2a09f313aceeb6691c8 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
Date: Thu, 26 Mar 2026 13:36:29 +0530
Subject: serdev: Convert to_serdev_*() helpers to macros and use
 container_of_const()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If these helpers receive the 'const struct device' pointer, then the const
qualifier will get dropped, leading to below warning:

warning: passing argument 1 of ‘to_serdev_device_driver’ discards 'const'
qualifier from pointer target type [-Wdiscarded-qualifiers]

This is not an issue as of now, but with the future commits adding serdev
device based driver matching, this warning will get triggered. Hence,
convert these helpers to macros so that the qualifier get preserved and
also use container_of_const() as container_of() is deprecated.

Tested-by: Hans de Goede <johannes.goede@oss.qualcomm.com> # ThinkPad T14s gen6 (arm64)
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://patch.msgid.link/20260326-pci-m2-e-v7-1-43324a7866e6@oss.qualcomm.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
 include/linux/serdev.h | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index 5654c58eb73c..0c7d3c27d1f8 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -49,10 +49,7 @@ struct serdev_device {
 	struct mutex write_lock;
 };
 
-static inline struct serdev_device *to_serdev_device(struct device *d)
-{
-	return container_of(d, struct serdev_device, dev);
-}
+#define to_serdev_device(d) container_of_const(d, struct serdev_device, dev)
 
 /**
  * struct serdev_device_driver - serdev slave device driver
@@ -68,10 +65,7 @@ struct serdev_device_driver {
 	void	(*shutdown)(struct serdev_device *);
 };
 
-static inline struct serdev_device_driver *to_serdev_device_driver(struct device_driver *d)
-{
-	return container_of(d, struct serdev_device_driver, driver);
-}
+#define to_serdev_device_driver(d) container_of_const(d, struct serdev_device_driver, driver)
 
 enum serdev_parity {
 	SERDEV_PARITY_NONE,
@@ -112,10 +106,7 @@ struct serdev_controller {
 	const struct serdev_controller_ops *ops;
 };
 
-static inline struct serdev_controller *to_serdev_controller(struct device *d)
-{
-	return container_of(d, struct serdev_controller, dev);
-}
+#define to_serdev_controller(d) container_of_const(d, struct serdev_controller, dev)
 
 static inline void *serdev_device_get_drvdata(const struct serdev_device *serdev)
 {
-- 
cgit v1.2.3


From a2b4814190af5944b276c5fd708d95ea146106b3 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
Date: Thu, 26 Mar 2026 13:36:30 +0530
Subject: serdev: Add an API to find the serdev controller associated with the
 devicetree node

Add of_find_serdev_controller_by_node() API to find the serdev controller
device associated with the devicetree node.

Tested-by: Hans de Goede <johannes.goede@oss.qualcomm.com> # ThinkPad T14s gen6 (arm64)
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://patch.msgid.link/20260326-pci-m2-e-v7-2-43324a7866e6@oss.qualcomm.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
 drivers/tty/serdev/core.c | 19 +++++++++++++++++++
 include/linux/serdev.h    |  9 +++++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c
index 8f25510f89b6..bf88b95f7458 100644
--- a/drivers/tty/serdev/core.c
+++ b/drivers/tty/serdev/core.c
@@ -514,6 +514,25 @@ err_free:
 }
 EXPORT_SYMBOL_GPL(serdev_controller_alloc);
 
+#ifdef CONFIG_OF
+/**
+ * of_find_serdev_controller_by_node() - Find the serdev controller associated
+ *					 with the devicetree node
+ * @node:	Devicetree node
+ *
+ * Return: Pointer to the serdev controller associated with the node. NULL if
+ * the controller is not found. Caller is responsible for calling
+ * serdev_controller_put() to drop the reference.
+ */
+struct serdev_controller *of_find_serdev_controller_by_node(struct device_node *node)
+{
+	struct device *dev = bus_find_device_by_of_node(&serdev_bus_type, node);
+
+	return (dev && dev->type == &serdev_ctrl_type) ? to_serdev_controller(dev) : NULL;
+}
+EXPORT_SYMBOL_GPL(of_find_serdev_controller_by_node);
+#endif
+
 static int of_serdev_register_devices(struct serdev_controller *ctrl)
 {
 	struct device_node *node;
diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index 0c7d3c27d1f8..188c0ba62d50 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -334,4 +334,13 @@ static inline bool serdev_acpi_get_uart_resource(struct acpi_resource *ares,
 }
 #endif /* CONFIG_ACPI */
 
+#ifdef CONFIG_OF
+struct serdev_controller *of_find_serdev_controller_by_node(struct device_node *node);
+#else
+static inline struct serdev_controller *of_find_serdev_controller_by_node(struct device_node *node)
+{
+	return NULL;
+}
+#endif /* CONFIG_OF */
+
 #endif /*_LINUX_SERDEV_H */
-- 
cgit v1.2.3


From 25bd73562941b04cfba1a278d8c84f2b1c69b8e9 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Tue, 31 Mar 2026 12:00:10 +0200
Subject: dma: contiguous: Turn heap registration logic around

The CMA heap instantiation was initially developed by having the
contiguous DMA code call into the CMA heap to create a new instance
every time a reserved memory area is probed.

Turning the CMA heap into a module would create a dependency of the
kernel on a module, which doesn't work.

Let's turn the logic around and do the opposite: store all the reserved
memory CMA regions into the contiguous DMA code, and provide an iterator
for the heap to use when it probes.

Signed-off-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260331-dma-buf-heaps-as-modules-v4-1-e18fda504419@kernel.org
---
 drivers/dma-buf/heaps/cma_heap.c  | 19 ++------------
 include/linux/dma-buf/heaps/cma.h | 16 ------------
 include/linux/dma-map-ops.h       |  5 ++++
 kernel/dma/contiguous.c           | 55 +++++++++++++++++++++++++++++++++++----
 4 files changed, 57 insertions(+), 38 deletions(-)
 delete mode 100644 include/linux/dma-buf/heaps/cma.h

(limited to 'include/linux')

diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c
index bd3370b9a3f6..33cac626da11 100644
--- a/drivers/dma-buf/heaps/cma_heap.c
+++ b/drivers/dma-buf/heaps/cma_heap.c
@@ -14,7 +14,6 @@
 
 #include <linux/cma.h>
 #include <linux/dma-buf.h>
-#include <linux/dma-buf/heaps/cma.h>
 #include <linux/dma-heap.h>
 #include <linux/dma-map-ops.h>
 #include <linux/err.h>
@@ -30,19 +29,6 @@
 
 #define DEFAULT_CMA_NAME "default_cma_region"
 
-static struct cma *dma_areas[MAX_CMA_AREAS] __initdata;
-static unsigned int dma_areas_num __initdata;
-
-int __init dma_heap_cma_register_heap(struct cma *cma)
-{
-	if (dma_areas_num >= ARRAY_SIZE(dma_areas))
-		return -EINVAL;
-
-	dma_areas[dma_areas_num++] = cma;
-
-	return 0;
-}
-
 struct cma_heap {
 	struct dma_heap *heap;
 	struct cma *cma;
@@ -414,6 +400,7 @@ static int __init __add_cma_heap(struct cma *cma, const char *name)
 static int __init add_cma_heaps(void)
 {
 	struct cma *default_cma = dev_get_cma_area(NULL);
+	struct cma *cma;
 	unsigned int i;
 	int ret;
 
@@ -423,9 +410,7 @@ static int __init add_cma_heaps(void)
 			return ret;
 	}
 
-	for (i = 0; i < dma_areas_num; i++) {
-		struct cma *cma = dma_areas[i];
-
+	for (i = 0; (cma = dma_contiguous_get_area_by_idx(i)) != NULL; i++) {
 		ret = __add_cma_heap(cma, cma_get_name(cma));
 		if (ret) {
 			pr_warn("Failed to add CMA heap %s", cma_get_name(cma));
diff --git a/include/linux/dma-buf/heaps/cma.h b/include/linux/dma-buf/heaps/cma.h
deleted file mode 100644
index e751479e21e7..000000000000
--- a/include/linux/dma-buf/heaps/cma.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef DMA_BUF_HEAP_CMA_H_
-#define DMA_BUF_HEAP_CMA_H_
-
-struct cma;
-
-#ifdef CONFIG_DMABUF_HEAPS_CMA
-int dma_heap_cma_register_heap(struct cma *cma);
-#else
-static inline int dma_heap_cma_register_heap(struct cma *cma)
-{
-	return 0;
-}
-#endif // CONFIG_DMABUF_HEAPS_CMA
-
-#endif // DMA_BUF_HEAP_CMA_H_
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 60b63756df82..c4c93c72ff6f 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -99,6 +99,7 @@ static inline struct cma *dev_get_cma_area(struct device *dev)
 		return dev->cma_area;
 	return dma_contiguous_default_area;
 }
+struct cma *dma_contiguous_get_area_by_idx(unsigned int idx);
 
 void dma_contiguous_reserve(phys_addr_t addr_limit);
 int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
@@ -117,6 +118,10 @@ static inline struct cma *dev_get_cma_area(struct device *dev)
 {
 	return NULL;
 }
+static inline struct cma *dma_contiguous_get_area_by_idx(unsigned int idx)
+{
+	return NULL;
+}
 static inline void dma_contiguous_reserve(phys_addr_t limit)
 {
 }
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index c56004d314dc..afa9fd313040 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -42,7 +42,6 @@
 #include <linux/memblock.h>
 #include <linux/err.h>
 #include <linux/sizes.h>
-#include <linux/dma-buf/heaps/cma.h>
 #include <linux/dma-map-ops.h>
 #include <linux/cma.h>
 #include <linux/nospec.h>
@@ -53,6 +52,37 @@
 #define CMA_SIZE_MBYTES 0
 #endif
 
+static struct cma *dma_contiguous_areas[MAX_CMA_AREAS];
+static unsigned int dma_contiguous_areas_num;
+
+static int dma_contiguous_insert_area(struct cma *cma)
+{
+	if (dma_contiguous_areas_num >= ARRAY_SIZE(dma_contiguous_areas))
+		return -EINVAL;
+
+	dma_contiguous_areas[dma_contiguous_areas_num++] = cma;
+
+	return 0;
+}
+
+/**
+ * dma_contiguous_get_area_by_idx() - Get contiguous area at given index
+ * @idx: index of the area we query
+ *
+ * Queries for the contiguous area located at index @idx.
+ *
+ * Returns:
+ * A pointer to the requested contiguous area, or NULL otherwise.
+ */
+struct cma *dma_contiguous_get_area_by_idx(unsigned int idx)
+{
+	if (idx >= dma_contiguous_areas_num)
+		return NULL;
+
+	return dma_contiguous_areas[idx];
+}
+EXPORT_SYMBOL_GPL(dma_contiguous_get_area_by_idx);
+
 struct cma *dma_contiguous_default_area;
 
 /*
@@ -264,9 +294,24 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
 		if (ret)
 			return;
 
-		ret = dma_heap_cma_register_heap(dma_contiguous_default_area);
+		/*
+		 * We need to insert the new area in our list to avoid
+		 * any inconsistencies between having the default area
+		 * listed in the DT or not.
+		 *
+		 * The DT case is handled by rmem_cma_setup() and will
+		 * always insert all its areas in our list. However, if
+		 * it didn't run (because OF_RESERVED_MEM isn't set, or
+		 * there's no DT region specified), then we don't have a
+		 * default area yet, and no area in our list.
+		 *
+		 * This block creates the default area in such a case,
+		 * but we also need to insert it in our list to avoid
+		 * having a default area but an empty list.
+		 */
+		ret = dma_contiguous_insert_area(dma_contiguous_default_area);
 		if (ret)
-			pr_warn("Couldn't register default CMA heap.");
+			pr_warn("Couldn't queue default CMA region for heap creation.");
 	}
 }
 
@@ -506,9 +551,9 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem)
 	pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n",
 		&rmem->base, (unsigned long)rmem->size / SZ_1M);
 
-	err = dma_heap_cma_register_heap(cma);
+	err = dma_contiguous_insert_area(cma);
 	if (err)
-		pr_warn("Couldn't register CMA heap.");
+		pr_warn("Couldn't store CMA reserved area.");
 
 	return 0;
 }
-- 
cgit v1.2.3


From b3707be95f045c4e526e419435af29dc9dd1c267 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Tue, 31 Mar 2026 12:00:11 +0200
Subject: dma: contiguous: Make dev_get_cma_area() a proper function

As we try to enable dma-buf heaps, and the CMA one in particular, to
compile as modules, we need to export dev_get_cma_area(). It's currently
implemented as an inline function that returns either the content of
device->cma_area or dma_contiguous_default_area.

Thus, it means we need to export dma_contiguous_default_area, which
isn't really something we want any module to have access to.

Instead, let's make dev_get_cma_area() a proper function we will be able
to export so we can avoid exporting dma_contiguous_default_area.

Signed-off-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260331-dma-buf-heaps-as-modules-v4-2-e18fda504419@kernel.org
---
 include/linux/dma-map-ops.h | 7 +------
 kernel/dma/contiguous.c     | 8 ++++++++
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index c4c93c72ff6f..8604106c0c01 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -93,12 +93,7 @@ static inline void set_dma_ops(struct device *dev,
 #ifdef CONFIG_DMA_CMA
 extern struct cma *dma_contiguous_default_area;
 
-static inline struct cma *dev_get_cma_area(struct device *dev)
-{
-	if (dev && dev->cma_area)
-		return dev->cma_area;
-	return dma_contiguous_default_area;
-}
+struct cma *dev_get_cma_area(struct device *dev);
 struct cma *dma_contiguous_get_area_by_idx(unsigned int idx);
 
 void dma_contiguous_reserve(phys_addr_t addr_limit);
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index afa9fd313040..40a0ead24979 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -131,6 +131,14 @@ bool __init cma_skip_dt_default_reserved_mem(void)
 	return size_cmdline != -1;
 }
 
+struct cma *dev_get_cma_area(struct device *dev)
+{
+	if (dev && dev->cma_area)
+		return dev->cma_area;
+
+	return dma_contiguous_default_area;
+}
+
 #ifdef CONFIG_DMA_NUMA_CMA
 
 static struct cma *dma_contiguous_numa_area[MAX_NUMNODES];
-- 
cgit v1.2.3


From 633040f853467a490437ace26d6a5413e64c0dd0 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Tue, 31 Mar 2026 12:00:12 +0200
Subject: dma: contiguous: Make dma_contiguous_default_area static

Now that dev_get_cma_area() is no longer inline, we don't have any user
of dma_contiguous_default_area() outside of contiguous.c so we can make
it static.

Signed-off-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260331-dma-buf-heaps-as-modules-v4-3-e18fda504419@kernel.org
---
 include/linux/dma-map-ops.h | 2 --
 kernel/dma/contiguous.c     | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 8604106c0c01..bef279ebeae7 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -91,8 +91,6 @@ static inline void set_dma_ops(struct device *dev,
 #endif /* CONFIG_ARCH_HAS_DMA_OPS */
 
 #ifdef CONFIG_DMA_CMA
-extern struct cma *dma_contiguous_default_area;
-
 struct cma *dev_get_cma_area(struct device *dev);
 struct cma *dma_contiguous_get_area_by_idx(unsigned int idx);
 
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 40a0ead24979..fd8d3518a232 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -83,7 +83,7 @@ struct cma *dma_contiguous_get_area_by_idx(unsigned int idx)
 }
 EXPORT_SYMBOL_GPL(dma_contiguous_get_area_by_idx);
 
-struct cma *dma_contiguous_default_area;
+static struct cma *dma_contiguous_default_area;
 
 /*
  * Default global CMA area size can be defined in kernel's .config.
-- 
cgit v1.2.3


From 499d2d2f4cf9f16634db47b06dee9676611b897f Mon Sep 17 00:00:00 2001
From: Milan Broz <gmazyland@gmail.com>
Date: Tue, 10 Mar 2026 10:53:49 +0100
Subject: sed-opal: Add STACK_RESET command

The TCG Opal device could enter a state where no new session can be
created, blocking even Discovery or PSID reset. While a power cycle
or waiting for the timeout should work, there is another possibility
for recovery: using the Stack Reset command.

The Stack Reset command is defined in the TCG Storage Architecture Core
Specification and is mandatory for all Opal devices (see Section 3.3.6
of the Opal SSC specification).

This patch implements the Stack Reset command. Sending it should clear
all active sessions immediately, allowing subsequent commands to run
successfully. While it is a TCG transport layer command, the Linux
kernel implements only Opal ioctls, so it makes sense to use the
IOC_OPAL ioctl interface.

The Stack Reset takes no arguments; the response can be success or pending.
If the command reports a pending state, userspace can try to repeat it;
in this case, the code returns -EBUSY.

Signed-off-by: Milan Broz <gmazyland@gmail.com>
Reviewed-by: Ondrej Kozina <okozina@redhat.com>
Link: https://patch.msgid.link/20260310095349.411287-1-gmazyland@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/opal_proto.h            | 20 ++++++++++++++++++
 block/sed-opal.c              | 47 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/sed-opal.h      |  1 +
 include/uapi/linux/sed-opal.h |  1 +
 4 files changed, 69 insertions(+)

(limited to 'include/linux')

diff --git a/block/opal_proto.h b/block/opal_proto.h
index d138785b8198..7c24247aa186 100644
--- a/block/opal_proto.h
+++ b/block/opal_proto.h
@@ -19,6 +19,7 @@
 enum {
 	TCG_SECP_00 = 0,
 	TCG_SECP_01,
+	TCG_SECP_02,
 };
 
 /*
@@ -273,6 +274,25 @@ struct opal_header {
 	struct opal_data_subpacket subpkt;
 };
 
+/*
+ * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
+ * Section: 3.3.4.7.5 STACK_RESET
+ */
+#define OPAL_STACK_RESET 0x0002
+
+struct opal_stack_reset {
+	u8 extendedComID[4];
+	__be32 request_code;
+};
+
+struct opal_stack_reset_response {
+	u8 extendedComID[4];
+	__be32 request_code;
+	u8 reserved0[2];
+	__be16 data_length;
+	__be32 response;
+};
+
 #define FC_TPER       0x0001
 #define FC_LOCKING    0x0002
 #define FC_GEOMETRY   0x0003
diff --git a/block/sed-opal.c b/block/sed-opal.c
index c34d19e91201..79b290d9458a 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -3545,6 +3545,50 @@ static int opal_get_sum_ranges(struct opal_dev *dev, struct opal_sum_ranges *opa
 	return ret;
 }
 
+static int opal_stack_reset(struct opal_dev *dev)
+{
+	struct opal_stack_reset *req;
+	struct opal_stack_reset_response *resp;
+	int ret;
+
+	mutex_lock(&dev->dev_lock);
+
+	memset(dev->cmd, 0, IO_BUFFER_LENGTH);
+	req = (struct opal_stack_reset *)dev->cmd;
+	req->extendedComID[0] = dev->comid >> 8;
+	req->extendedComID[1] = dev->comid & 0xFF;
+	req->request_code = cpu_to_be32(OPAL_STACK_RESET);
+
+	ret = dev->send_recv(dev->data, dev->comid, TCG_SECP_02,
+			     dev->cmd, IO_BUFFER_LENGTH, true);
+	if (ret) {
+		pr_debug("Error sending stack reset: %d\n", ret);
+		goto out;
+	}
+
+	memset(dev->resp, 0, IO_BUFFER_LENGTH);
+	ret = dev->send_recv(dev->data, dev->comid, TCG_SECP_02,
+			     dev->resp, IO_BUFFER_LENGTH, false);
+	if (ret) {
+		pr_debug("Error receiving stack reset response: %d\n", ret);
+		goto out;
+	}
+
+	resp = (struct opal_stack_reset_response *)dev->resp;
+	if (be16_to_cpu(resp->data_length) != 4) {
+		pr_debug("Stack reset pending\n");
+		ret = -EBUSY;
+		goto out;
+	}
+	if (be32_to_cpu(resp->response) != 0) {
+		pr_debug("Stack reset failed: %u\n", be32_to_cpu(resp->response));
+		ret = -EIO;
+	}
+out:
+	mutex_unlock(&dev->dev_lock);
+	return ret;
+}
+
 int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
 {
 	void *p;
@@ -3642,6 +3686,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
 	case IOC_OPAL_GET_SUM_STATUS:
 		ret = opal_get_sum_ranges(dev, p, arg);
 		break;
+	case IOC_OPAL_STACK_RESET:
+		ret = opal_stack_reset(dev);
+		break;
 
 	default:
 		break;
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index aa006edb612b..0630430cc01a 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -57,6 +57,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
 	case IOC_OPAL_LR_SET_START_LEN:
 	case IOC_OPAL_ENABLE_DISABLE_LR:
 	case IOC_OPAL_GET_SUM_STATUS:
+	case IOC_OPAL_STACK_RESET:
 		return true;
 	}
 	return false;
diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h
index 9830298ec51c..ef4d3be6ca7f 100644
--- a/include/uapi/linux/sed-opal.h
+++ b/include/uapi/linux/sed-opal.h
@@ -245,5 +245,6 @@ struct opal_revert_lsp {
 #define IOC_OPAL_LR_SET_START_LEN   _IOW('p', 243, struct opal_user_lr_setup)
 #define IOC_OPAL_ENABLE_DISABLE_LR  _IOW('p', 244, struct opal_user_lr_setup)
 #define IOC_OPAL_GET_SUM_STATUS     _IOW('p', 245, struct opal_sum_ranges)
+#define IOC_OPAL_STACK_RESET        _IO('p', 246)
 
 #endif /* _UAPI_SED_OPAL_H */
-- 
cgit v1.2.3


From f5587d1b6ec938afb2f74fe399a68020d66923e4 Mon Sep 17 00:00:00 2001
From: Gabriele Monaco <gmonaco@redhat.com>
Date: Mon, 30 Mar 2026 13:10:00 +0200
Subject: rv: Add Hybrid Automata monitor type

Deterministic automata define which events are allowed in every state,
but cannot define more sophisticated constraint taking into account the
system's environment (e.g. time or other states not producing events).

Add the Hybrid Automata monitor type as an extension of Deterministic
automata where each state transition is validating a constraint on a
finite number of environment variables.
Hybrid automata can be used to implement timed automata, where the
environment variables are clocks.

Also implement the necessary functionality to handle clock constraints
(ns or jiffy granularity) on state and events.

Reviewed-by: Nam Cao <namcao@linutronix.de>
Link: https://lore.kernel.org/r/20260330111010.153663-3-gmonaco@redhat.com
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
---
 include/linux/rv.h         |  38 ++++
 include/rv/da_monitor.h    |  73 ++++++-
 include/rv/ha_monitor.h    | 475 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/rv/Kconfig    |  13 ++
 kernel/trace/rv/rv_trace.h |  63 ++++++
 5 files changed, 658 insertions(+), 4 deletions(-)
 create mode 100644 include/rv/ha_monitor.h

(limited to 'include/linux')

diff --git a/include/linux/rv.h b/include/linux/rv.h
index 58774eb3aecf..0aef9e3c785c 100644
--- a/include/linux/rv.h
+++ b/include/linux/rv.h
@@ -81,11 +81,49 @@ struct ltl_monitor {};
 
 #endif /* CONFIG_RV_LTL_MONITOR */
 
+#ifdef CONFIG_RV_HA_MONITOR
+/*
+ * In the future, hybrid automata may rely on multiple
+ * environment variables, e.g. different clocks started at
+ * different times or running at different speed.
+ * For now we support only 1 variable.
+ */
+#define MAX_HA_ENV_LEN 1
+
+/*
+ * Monitors can pick the preferred timer implementation:
+ * No timer: if monitors don't have state invariants.
+ * Timer wheel: lightweight invariants check but far less precise.
+ * Hrtimer: accurate invariants check with higher overhead.
+ */
+#define HA_TIMER_NONE 0
+#define HA_TIMER_WHEEL 1
+#define HA_TIMER_HRTIMER 2
+
+/*
+ * Hybrid automaton per-object variables.
+ */
+struct ha_monitor {
+	struct da_monitor da_mon;
+	u64 env_store[MAX_HA_ENV_LEN];
+	union {
+		struct hrtimer hrtimer;
+		struct timer_list timer;
+	};
+};
+
+#else
+
+struct ha_monitor { };
+
+#endif /* CONFIG_RV_HA_MONITOR */
+
 #define RV_PER_TASK_MONITOR_INIT	(CONFIG_RV_PER_TASK_MONITORS)
 
 union rv_task_monitor {
 	struct da_monitor	da_mon;
 	struct ltl_monitor	ltl_mon;
+	struct ha_monitor	ha_mon;
 };
 
 #ifdef CONFIG_RV_REACTORS
diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h
index 89a0b81d4b3e..ab5fe0896a46 100644
--- a/include/rv/da_monitor.h
+++ b/include/rv/da_monitor.h
@@ -3,9 +3,9 @@
  * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org>
  *
  * Deterministic automata (DA) monitor functions, to be used together
- * with automata models in C generated by the dot2k tool.
+ * with automata models in C generated by the rvgen tool.
  *
- * The dot2k tool is available at tools/verification/dot2k/
+ * The rvgen tool is available at tools/verification/rvgen/
  *
  * For further information, see:
  *   Documentation/trace/rv/monitor_synthesis.rst
@@ -28,6 +28,33 @@
 
 static struct rv_monitor rv_this;
 
+/*
+ * Hook to allow the implementation of hybrid automata: define it with a
+ * function that takes curr_state, event and next_state and returns true if the
+ * environment constraints (e.g. timing) are satisfied, false otherwise.
+ */
+#ifndef da_monitor_event_hook
+#define da_monitor_event_hook(...) true
+#endif
+
+/*
+ * Hook to allow the implementation of hybrid automata: define it with a
+ * function that takes the da_monitor and performs further initialisation
+ * (e.g. reset set up timers).
+ */
+#ifndef da_monitor_init_hook
+#define da_monitor_init_hook(da_mon)
+#endif
+
+/*
+ * Hook to allow the implementation of hybrid automata: define it with a
+ * function that takes the da_monitor and performs further reset (e.g. reset
+ * all clocks).
+ */
+#ifndef da_monitor_reset_hook
+#define da_monitor_reset_hook(da_mon)
+#endif
+
 /*
  * Type for the target id, default to int but can be overridden.
  */
@@ -49,6 +76,7 @@ static void react(enum states curr_state, enum events event)
  */
 static inline void da_monitor_reset(struct da_monitor *da_mon)
 {
+	da_monitor_reset_hook(da_mon);
 	da_mon->monitoring = 0;
 	da_mon->curr_state = model_get_initial_state();
 }
@@ -63,6 +91,7 @@ static inline void da_monitor_start(struct da_monitor *da_mon)
 {
 	da_mon->curr_state = model_get_initial_state();
 	da_mon->monitoring = 1;
+	da_monitor_init_hook(da_mon);
 }
 
 /*
@@ -142,7 +171,10 @@ static inline int da_monitor_init(void)
 /*
  * da_monitor_destroy - destroy the monitor
  */
-static inline void da_monitor_destroy(void) { }
+static inline void da_monitor_destroy(void)
+{
+	da_monitor_reset_all();
+}
 
 #elif RV_MON_TYPE == RV_MON_PER_CPU
 /*
@@ -188,7 +220,10 @@ static inline int da_monitor_init(void)
 /*
  * da_monitor_destroy - destroy the monitor
  */
-static inline void da_monitor_destroy(void) { }
+static inline void da_monitor_destroy(void)
+{
+	da_monitor_reset_all();
+}
 
 #elif RV_MON_TYPE == RV_MON_PER_TASK
 /*
@@ -209,6 +244,24 @@ static inline struct da_monitor *da_get_monitor(struct task_struct *tsk)
 	return &tsk->rv[task_mon_slot].da_mon;
 }
 
+/*
+ * da_get_task - return the task associated to the monitor
+ */
+static inline struct task_struct *da_get_task(struct da_monitor *da_mon)
+{
+	return container_of(da_mon, struct task_struct, rv[task_mon_slot].da_mon);
+}
+
+/*
+ * da_get_id - return the id associated to the monitor
+ *
+ * For per-task monitors, the id is the task's PID.
+ */
+static inline da_id_type da_get_id(struct da_monitor *da_mon)
+{
+	return da_get_task(da_mon)->pid;
+}
+
 static void da_monitor_reset_all(void)
 {
 	struct task_struct *g, *p;
@@ -253,6 +306,8 @@ static inline void da_monitor_destroy(void)
 	}
 	rv_put_task_monitor_slot(task_mon_slot);
 	task_mon_slot = RV_PER_TASK_MONITOR_INIT;
+
+	da_monitor_reset_all();
 }
 #endif /* RV_MON_TYPE */
 
@@ -279,6 +334,14 @@ static inline void da_trace_error(struct da_monitor *da_mon,
 	CONCATENATE(trace_error_, MONITOR_NAME)(curr_state, event);
 }
 
+/*
+ * da_get_id - unused for implicit monitors
+ */
+static inline da_id_type da_get_id(struct da_monitor *da_mon)
+{
+	return 0;
+}
+
 #elif RV_MON_TYPE == RV_MON_PER_TASK
 /*
  * Trace events for per_task monitors, report the PID of the task.
@@ -323,6 +386,8 @@ static inline bool da_event(struct da_monitor *da_mon, enum events event, da_id_
 			return false;
 		}
 		if (likely(try_cmpxchg(&da_mon->curr_state, &curr_state, next_state))) {
+			if (!da_monitor_event_hook(da_mon, curr_state, event, next_state, id))
+				return false;
 			da_trace_event(da_mon, model_get_state_name(curr_state),
 				       model_get_event_name(event),
 				       model_get_state_name(next_state),
diff --git a/include/rv/ha_monitor.h b/include/rv/ha_monitor.h
new file mode 100644
index 000000000000..b6cf3b2ba989
--- /dev/null
+++ b/include/rv/ha_monitor.h
@@ -0,0 +1,475 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025-2028 Red Hat, Inc. Gabriele Monaco <gmonaco@redhat.com>
+ *
+ * Hybrid automata (HA) monitor functions, to be used together
+ * with automata models in C generated by the rvgen tool.
+ *
+ * This type of monitors extends the Deterministic automata (DA) class by
+ * adding a set of environment variables (e.g. clocks) that can be used to
+ * constraint the valid transitions.
+ *
+ * The rvgen tool is available at tools/verification/rvgen/
+ *
+ * For further information, see:
+ *   Documentation/trace/rv/monitor_synthesis.rst
+ */
+
+#ifndef _RV_HA_MONITOR_H
+#define _RV_HA_MONITOR_H
+
+#include <rv/automata.h>
+
+#ifndef da_id_type
+#define da_id_type int
+#endif
+
+static inline void ha_monitor_init_env(struct da_monitor *da_mon);
+static inline void ha_monitor_reset_env(struct da_monitor *da_mon);
+static inline void ha_setup_timer(struct ha_monitor *ha_mon);
+static inline bool ha_cancel_timer(struct ha_monitor *ha_mon);
+static bool ha_monitor_handle_constraint(struct da_monitor *da_mon,
+					 enum states curr_state,
+					 enum events event,
+					 enum states next_state,
+					 da_id_type id);
+#define da_monitor_event_hook ha_monitor_handle_constraint
+#define da_monitor_init_hook ha_monitor_init_env
+#define da_monitor_reset_hook ha_monitor_reset_env
+
+#include <rv/da_monitor.h>
+#include <linux/seq_buf.h>
+
+/* This simplifies things since da_mon and ha_mon coexist in the same union */
+_Static_assert(offsetof(struct ha_monitor, da_mon) == 0,
+	       "da_mon must be the first element in an ha_mon!");
+#define to_ha_monitor(da) container_of(da, struct ha_monitor, da_mon)
+
+#define ENV_MAX CONCATENATE(env_max_, MONITOR_NAME)
+#define ENV_MAX_STORED CONCATENATE(env_max_stored_, MONITOR_NAME)
+#define envs CONCATENATE(envs_, MONITOR_NAME)
+
+/* Environment storage before being reset */
+#define ENV_INVALID_VALUE U64_MAX
+/* Error with no event occurs only on timeouts */
+#define EVENT_NONE EVENT_MAX
+#define EVENT_NONE_LBL "none"
+#define ENV_BUFFER_SIZE 64
+
+#ifdef CONFIG_RV_REACTORS
+
+/*
+ * ha_react - trigger the reaction after a failed environment constraint
+ *
+ * The transition from curr_state with event is otherwise valid, but the
+ * environment constraint is false. This function can be called also with no
+ * event from a timer (state constraints only).
+ */
+static void ha_react(enum states curr_state, enum events event, char *env)
+{
+	rv_react(&rv_this,
+		 "rv: monitor %s does not allow event %s on state %s with env %s\n",
+		 __stringify(MONITOR_NAME),
+		 event == EVENT_NONE ? EVENT_NONE_LBL : model_get_event_name(event),
+		 model_get_state_name(curr_state), env);
+}
+
+#else /* CONFIG_RV_REACTOR */
+
+static void ha_react(enum states curr_state, enum events event, char *env) { }
+#endif
+
+/*
+ * model_get_state_name - return the (string) name of the given state
+ */
+static char *model_get_env_name(enum envs env)
+{
+	if ((env < 0) || (env >= ENV_MAX))
+		return "INVALID";
+
+	return RV_AUTOMATON_NAME.env_names[env];
+}
+
+/*
+ * Monitors requiring a timer implementation need to request it explicitly.
+ */
+#ifndef HA_TIMER_TYPE
+#define HA_TIMER_TYPE HA_TIMER_NONE
+#endif
+
+#if HA_TIMER_TYPE == HA_TIMER_WHEEL
+static void ha_monitor_timer_callback(struct timer_list *timer);
+#elif HA_TIMER_TYPE == HA_TIMER_HRTIMER
+static enum hrtimer_restart ha_monitor_timer_callback(struct hrtimer *hrtimer);
+#endif
+
+/*
+ * ktime_get_ns is expensive, since we usually don't require precise accounting
+ * of changes within the same event, cache the current time at the beginning of
+ * the constraint handler and use the cache for subsequent calls.
+ * Monitors without ns clocks automatically skip this.
+ */
+#ifdef HA_CLK_NS
+#define ha_get_ns() ktime_get_ns()
+#else
+#define ha_get_ns() 0
+#endif /* HA_CLK_NS */
+
+/* Should be supplied by the monitor */
+static u64 ha_get_env(struct ha_monitor *ha_mon, enum envs env, u64 time_ns);
+static bool ha_verify_constraint(struct ha_monitor *ha_mon,
+				 enum states curr_state,
+				 enum events event,
+				 enum states next_state,
+				 u64 time_ns);
+
+/*
+ * ha_monitor_reset_all_stored - reset all environment variables in the monitor
+ */
+static inline void ha_monitor_reset_all_stored(struct ha_monitor *ha_mon)
+{
+	for (int i = 0; i < ENV_MAX_STORED; i++)
+		WRITE_ONCE(ha_mon->env_store[i], ENV_INVALID_VALUE);
+}
+
+/*
+ * ha_monitor_init_env - setup timer and reset all environment
+ *
+ * Called from a hook in the DA start functions, it supplies the da_mon
+ * corresponding to the current ha_mon.
+ * Not all hybrid automata require the timer, still set it for simplicity.
+ */
+static inline void ha_monitor_init_env(struct da_monitor *da_mon)
+{
+	struct ha_monitor *ha_mon = to_ha_monitor(da_mon);
+
+	ha_monitor_reset_all_stored(ha_mon);
+	ha_setup_timer(ha_mon);
+}
+
+/*
+ * ha_monitor_reset_env - stop timer and reset all environment
+ *
+ * Called from a hook in the DA reset functions, it supplies the da_mon
+ * corresponding to the current ha_mon.
+ * Not all hybrid automata require the timer, still clear it for simplicity.
+ */
+static inline void ha_monitor_reset_env(struct da_monitor *da_mon)
+{
+	struct ha_monitor *ha_mon = to_ha_monitor(da_mon);
+
+	/* Initialisation resets the monitor before initialising the timer */
+	if (likely(da_monitoring(da_mon)))
+		ha_cancel_timer(ha_mon);
+}
+
+/*
+ * ha_monitor_env_invalid - return true if env has not been initialised
+ */
+static inline bool ha_monitor_env_invalid(struct ha_monitor *ha_mon, enum envs env)
+{
+	return READ_ONCE(ha_mon->env_store[env]) == ENV_INVALID_VALUE;
+}
+
+static inline void ha_get_env_string(struct seq_buf *s,
+				     struct ha_monitor *ha_mon, u64 time_ns)
+{
+	const char *format_str = "%s=%llu";
+
+	for (int i = 0; i < ENV_MAX; i++) {
+		seq_buf_printf(s, format_str, model_get_env_name(i),
+			       ha_get_env(ha_mon, i, time_ns));
+		format_str = ",%s=%llu";
+	}
+}
+
+#if RV_MON_TYPE == RV_MON_GLOBAL || RV_MON_TYPE == RV_MON_PER_CPU
+static inline void ha_trace_error_env(struct ha_monitor *ha_mon,
+				      char *curr_state, char *event, char *env,
+				      da_id_type id)
+{
+	CONCATENATE(trace_error_env_, MONITOR_NAME)(curr_state, event, env);
+}
+#elif RV_MON_TYPE == RV_MON_PER_TASK
+static inline void ha_trace_error_env(struct ha_monitor *ha_mon,
+				      char *curr_state, char *event, char *env,
+				      da_id_type id)
+{
+	CONCATENATE(trace_error_env_, MONITOR_NAME)(id, curr_state, event, env);
+}
+#endif /* RV_MON_TYPE */
+
+/*
+ * ha_get_monitor - return the current monitor
+ */
+#define ha_get_monitor(...) to_ha_monitor(da_get_monitor(__VA_ARGS__))
+
+/*
+ * ha_monitor_handle_constraint - handle the constraint on the current transition
+ *
+ * If the monitor implementation defines a constraint in the transition from
+ * curr_state to event, react and trace appropriately as well as return false.
+ * This function is called from the hook in the DA event handle function and
+ * triggers a failure in the monitor.
+ */
+static bool ha_monitor_handle_constraint(struct da_monitor *da_mon,
+					 enum states curr_state,
+					 enum events event,
+					 enum states next_state,
+					 da_id_type id)
+{
+	struct ha_monitor *ha_mon = to_ha_monitor(da_mon);
+	u64 time_ns = ha_get_ns();
+	DECLARE_SEQ_BUF(env_string, ENV_BUFFER_SIZE);
+
+	if (ha_verify_constraint(ha_mon, curr_state, event, next_state, time_ns))
+		return true;
+
+	ha_get_env_string(&env_string, ha_mon, time_ns);
+	ha_react(curr_state, event, env_string.buffer);
+	ha_trace_error_env(ha_mon,
+			   model_get_state_name(curr_state),
+			   model_get_event_name(event),
+			   env_string.buffer, id);
+	return false;
+}
+
+static inline void __ha_monitor_timer_callback(struct ha_monitor *ha_mon)
+{
+	enum states curr_state = READ_ONCE(ha_mon->da_mon.curr_state);
+	DECLARE_SEQ_BUF(env_string, ENV_BUFFER_SIZE);
+	u64 time_ns = ha_get_ns();
+
+	ha_get_env_string(&env_string, ha_mon, time_ns);
+	ha_react(curr_state, EVENT_NONE, env_string.buffer);
+	ha_trace_error_env(ha_mon, model_get_state_name(curr_state),
+			   EVENT_NONE_LBL, env_string.buffer,
+			   da_get_id(&ha_mon->da_mon));
+
+	da_monitor_reset(&ha_mon->da_mon);
+}
+
+/*
+ * The clock variables have 2 different representations in the env_store:
+ * - The guard representation is the timestamp of the last reset
+ * - The invariant representation is the timestamp when the invariant expires
+ * As the representations are incompatible, care must be taken when switching
+ * between them: the invariant representation can only be used when starting a
+ * timer when the previous representation was guard (e.g. no other invariant
+ * started since the last reset operation).
+ * Likewise, switching from invariant to guard representation without a reset
+ * can be done only by subtracting the exact value used to start the invariant.
+ *
+ * Reading the environment variable (ha_get_clk) also reflects this difference
+ * any reads in states that have an invariant return the (possibly negative)
+ * time since expiration, other reads return the time since last reset.
+ */
+
+/*
+ * Helper functions for env variables describing clocks with ns granularity
+ */
+static inline u64 ha_get_clk_ns(struct ha_monitor *ha_mon, enum envs env, u64 time_ns)
+{
+	return time_ns - READ_ONCE(ha_mon->env_store[env]);
+}
+static inline void ha_reset_clk_ns(struct ha_monitor *ha_mon, enum envs env, u64 time_ns)
+{
+	WRITE_ONCE(ha_mon->env_store[env], time_ns);
+}
+static inline void ha_set_invariant_ns(struct ha_monitor *ha_mon, enum envs env,
+				       u64 value, u64 time_ns)
+{
+	WRITE_ONCE(ha_mon->env_store[env], time_ns + value);
+}
+static inline bool ha_check_invariant_ns(struct ha_monitor *ha_mon,
+					 enum envs env, u64 time_ns)
+{
+	return READ_ONCE(ha_mon->env_store[env]) >= time_ns;
+}
+/*
+ * ha_invariant_passed_ns - prepare the invariant and return the time since reset
+ */
+static inline u64 ha_invariant_passed_ns(struct ha_monitor *ha_mon, enum envs env,
+				   u64 expire, u64 time_ns)
+{
+	u64 passed = 0;
+
+	if (env < 0 || env >= ENV_MAX_STORED)
+		return 0;
+	if (ha_monitor_env_invalid(ha_mon, env))
+		return 0;
+	passed = ha_get_env(ha_mon, env, time_ns);
+	ha_set_invariant_ns(ha_mon, env, expire - passed, time_ns);
+	return passed;
+}
+
+/*
+ * Helper functions for env variables describing clocks with jiffy granularity
+ */
+static inline u64 ha_get_clk_jiffy(struct ha_monitor *ha_mon, enum envs env)
+{
+	return get_jiffies_64() - READ_ONCE(ha_mon->env_store[env]);
+}
+static inline void ha_reset_clk_jiffy(struct ha_monitor *ha_mon, enum envs env)
+{
+	WRITE_ONCE(ha_mon->env_store[env], get_jiffies_64());
+}
+static inline void ha_set_invariant_jiffy(struct ha_monitor *ha_mon,
+					  enum envs env, u64 value)
+{
+	WRITE_ONCE(ha_mon->env_store[env], get_jiffies_64() + value);
+}
+static inline bool ha_check_invariant_jiffy(struct ha_monitor *ha_mon,
+					    enum envs env, u64 time_ns)
+{
+	return time_after64(READ_ONCE(ha_mon->env_store[env]), get_jiffies_64());
+
+}
+/*
+ * ha_invariant_passed_jiffy - prepare the invariant and return the time since reset
+ */
+static inline u64 ha_invariant_passed_jiffy(struct ha_monitor *ha_mon, enum envs env,
+				      u64 expire, u64 time_ns)
+{
+	u64 passed = 0;
+
+	if (env < 0 || env >= ENV_MAX_STORED)
+		return 0;
+	if (ha_monitor_env_invalid(ha_mon, env))
+		return 0;
+	passed = ha_get_env(ha_mon, env, time_ns);
+	ha_set_invariant_jiffy(ha_mon, env, expire - passed);
+	return passed;
+}
+
+/*
+ * Retrieve the last reset time (guard representation) from the invariant
+ * representation (expiration).
+ * It the caller's responsibility to make sure the storage was actually in the
+ * invariant representation (e.g. the current state has an invariant).
+ * The provided value must be the same used when starting the invariant.
+ *
+ * This function's access to the storage is NOT atomic, due to the rarity when
+ * this is used. If a monitor allows writes concurrent to this, likely
+ * other things are broken and need rethinking the model or additional locking.
+ */
+static inline void ha_inv_to_guard(struct ha_monitor *ha_mon, enum envs env,
+				   u64 value, u64 time_ns)
+{
+	WRITE_ONCE(ha_mon->env_store[env], READ_ONCE(ha_mon->env_store[env]) - value);
+}
+
+#if HA_TIMER_TYPE == HA_TIMER_WHEEL
+/*
+ * Helper functions to handle the monitor timer.
+ * Not all monitors require a timer, in such case the timer will be set up but
+ * never armed.
+ * Timers start since the last reset of the supplied env or from now if env is
+ * not an environment variable. If env was not initialised no timer starts.
+ * Timers can expire on any CPU unless the monitor is per-cpu,
+ * where we assume every event occurs on the local CPU.
+ */
+static void ha_monitor_timer_callback(struct timer_list *timer)
+{
+	struct ha_monitor *ha_mon = container_of(timer, struct ha_monitor, timer);
+
+	__ha_monitor_timer_callback(ha_mon);
+}
+static inline void ha_setup_timer(struct ha_monitor *ha_mon)
+{
+	int mode = 0;
+
+	if (RV_MON_TYPE == RV_MON_PER_CPU)
+		mode |= TIMER_PINNED;
+	timer_setup(&ha_mon->timer, ha_monitor_timer_callback, mode);
+}
+static inline void ha_start_timer_jiffy(struct ha_monitor *ha_mon, enum envs env,
+					u64 expire, u64 time_ns)
+{
+	u64 passed = ha_invariant_passed_jiffy(ha_mon, env, expire, time_ns);
+
+	mod_timer(&ha_mon->timer, get_jiffies_64() + expire - passed);
+}
+static inline void ha_start_timer_ns(struct ha_monitor *ha_mon, enum envs env,
+				     u64 expire, u64 time_ns)
+{
+	u64 passed = ha_invariant_passed_ns(ha_mon, env, expire, time_ns);
+
+	ha_start_timer_jiffy(ha_mon, ENV_MAX_STORED,
+			     nsecs_to_jiffies(expire - passed + TICK_NSEC - 1), time_ns);
+}
+/*
+ * ha_cancel_timer - Cancel the timer
+ *
+ * Returns:
+ *  *  1 when the timer was active
+ *  *  0 when the timer was not active or running a callback
+ */
+static inline bool ha_cancel_timer(struct ha_monitor *ha_mon)
+{
+	return timer_delete(&ha_mon->timer);
+}
+#elif HA_TIMER_TYPE == HA_TIMER_HRTIMER
+/*
+ * Helper functions to handle the monitor timer.
+ * Not all monitors require a timer, in such case the timer will be set up but
+ * never armed.
+ * Timers start since the last reset of the supplied env or from now if env is
+ * not an environment variable. If env was not initialised no timer starts.
+ * Timers can expire on any CPU unless the monitor is per-cpu,
+ * where we assume every event occurs on the local CPU.
+ */
+static enum hrtimer_restart ha_monitor_timer_callback(struct hrtimer *hrtimer)
+{
+	struct ha_monitor *ha_mon = container_of(hrtimer, struct ha_monitor, hrtimer);
+
+	__ha_monitor_timer_callback(ha_mon);
+	return HRTIMER_NORESTART;
+}
+static inline void ha_setup_timer(struct ha_monitor *ha_mon)
+{
+	hrtimer_setup(&ha_mon->hrtimer, ha_monitor_timer_callback,
+		      CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
+}
+static inline void ha_start_timer_ns(struct ha_monitor *ha_mon, enum envs env,
+				     u64 expire, u64 time_ns)
+{
+	int mode = HRTIMER_MODE_REL_HARD;
+	u64 passed = ha_invariant_passed_ns(ha_mon, env, expire, time_ns);
+
+	if (RV_MON_TYPE == RV_MON_PER_CPU)
+		mode |= HRTIMER_MODE_PINNED;
+	hrtimer_start(&ha_mon->hrtimer, ns_to_ktime(expire - passed), mode);
+}
+static inline void ha_start_timer_jiffy(struct ha_monitor *ha_mon, enum envs env,
+					u64 expire, u64 time_ns)
+{
+	u64 passed = ha_invariant_passed_jiffy(ha_mon, env, expire, time_ns);
+
+	ha_start_timer_ns(ha_mon, ENV_MAX_STORED,
+			  jiffies_to_nsecs(expire - passed), time_ns);
+}
+/*
+ * ha_cancel_timer - Cancel the timer
+ *
+ * Returns:
+ *  *  1 when the timer was active
+ *  *  0 when the timer was not active or running a callback
+ */
+static inline bool ha_cancel_timer(struct ha_monitor *ha_mon)
+{
+	return hrtimer_try_to_cancel(&ha_mon->hrtimer) == 1;
+}
+#else /* HA_TIMER_NONE */
+/*
+ * Start function is intentionally not defined, monitors using timers must
+ * set HA_TIMER_TYPE to either HA_TIMER_WHEEL or HA_TIMER_HRTIMER.
+ */
+static inline void ha_setup_timer(struct ha_monitor *ha_mon) { }
+static inline bool ha_cancel_timer(struct ha_monitor *ha_mon)
+{
+	return false;
+}
+#endif
+
+#endif
diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig
index 5b4be87ba59d..4ad392dfc57f 100644
--- a/kernel/trace/rv/Kconfig
+++ b/kernel/trace/rv/Kconfig
@@ -23,6 +23,19 @@ config LTL_MON_EVENTS_ID
 config RV_LTL_MONITOR
 	bool
 
+config RV_HA_MONITOR
+	bool
+
+config HA_MON_EVENTS_IMPLICIT
+	select DA_MON_EVENTS_IMPLICIT
+	select RV_HA_MONITOR
+	bool
+
+config HA_MON_EVENTS_ID
+	select DA_MON_EVENTS_ID
+	select RV_HA_MONITOR
+	bool
+
 menuconfig RV
 	bool "Runtime Verification"
 	select TRACING
diff --git a/kernel/trace/rv/rv_trace.h b/kernel/trace/rv/rv_trace.h
index 4a6faddac614..7c598967bc0e 100644
--- a/kernel/trace/rv/rv_trace.h
+++ b/kernel/trace/rv/rv_trace.h
@@ -65,6 +65,36 @@ DECLARE_EVENT_CLASS(error_da_monitor,
 #include <monitors/opid/opid_trace.h>
 // Add new monitors based on CONFIG_DA_MON_EVENTS_IMPLICIT here
 
+#ifdef CONFIG_HA_MON_EVENTS_IMPLICIT
+/* For simplicity this class is marked as DA although relevant only for HA */
+DECLARE_EVENT_CLASS(error_env_da_monitor,
+
+	TP_PROTO(char *state, char *event, char *env),
+
+	TP_ARGS(state, event, env),
+
+	TP_STRUCT__entry(
+		__string(	state,	state	)
+		__string(	event,	event	)
+		__string(	env,	env	)
+	),
+
+	TP_fast_assign(
+		__assign_str(state);
+		__assign_str(event);
+		__assign_str(env);
+	),
+
+	TP_printk("event %s not expected in the state %s with env %s",
+		__get_str(event),
+		__get_str(state),
+		__get_str(env))
+);
+
+// Add new monitors based on CONFIG_HA_MON_EVENTS_IMPLICIT here
+
+#endif
+
 #endif /* CONFIG_DA_MON_EVENTS_IMPLICIT */
 
 #ifdef CONFIG_DA_MON_EVENTS_ID
@@ -128,6 +158,39 @@ DECLARE_EVENT_CLASS(error_da_monitor_id,
 #include <monitors/sssw/sssw_trace.h>
 // Add new monitors based on CONFIG_DA_MON_EVENTS_ID here
 
+#ifdef CONFIG_HA_MON_EVENTS_ID
+/* For simplicity this class is marked as DA although relevant only for HA */
+DECLARE_EVENT_CLASS(error_env_da_monitor_id,
+
+	TP_PROTO(int id, char *state, char *event, char *env),
+
+	TP_ARGS(id, state, event, env),
+
+	TP_STRUCT__entry(
+		__field(	int,	id	)
+		__string(	state,	state	)
+		__string(	event,	event	)
+		__string(	env,	env	)
+	),
+
+	TP_fast_assign(
+		__assign_str(state);
+		__assign_str(event);
+		__assign_str(env);
+		__entry->id	= id;
+	),
+
+	TP_printk("%d: event %s not expected in the state %s with env %s",
+		__entry->id,
+		__get_str(event),
+		__get_str(state),
+		__get_str(env))
+);
+
+// Add new monitors based on CONFIG_HA_MON_EVENTS_ID here
+
+#endif
+
 #endif /* CONFIG_DA_MON_EVENTS_ID */
 #ifdef CONFIG_LTL_MON_EVENTS_ID
 DECLARE_EVENT_CLASS(event_ltl_monitor_id,
-- 
cgit v1.2.3


From 4a24127bd6cbf03fb17de8b43f2d8db3f55ca333 Mon Sep 17 00:00:00 2001
From: Gabriele Monaco <gmonaco@redhat.com>
Date: Mon, 30 Mar 2026 13:10:06 +0200
Subject: rv: Add support for per-object monitors in DA/HA

RV deterministic and hybrid automata currently only support global,
per-cpu and per-task monitors. It isn't possible to write a model that
would follow some different type of object, like a deadline entity or a
lock.

Define the generic per-object monitor implementation which shares part
of the implementation with the per-task monitors.
The user needs to provide an id for the object (e.g. pid for tasks) and
define the data type for the monitor_target (e.g. struct task_struct *
for tasks). Both are supplied to the event handlers, as the id may not
be easily available in the target.

The monitor storage (e.g. the rv monitor, pointer to the target, etc.)
is stored in a hash table indexed by id. Monitor storage objects are
automatically allocated unless specified otherwise (e.g. if the creation
context is unsafe for allocation).

Reviewed-by: Nam Cao <namcao@linutronix.de>
Link: https://lore.kernel.org/r/20260330111010.153663-9-gmonaco@redhat.com
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
---
 include/linux/rv.h      |   1 +
 include/rv/da_monitor.h | 300 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/rv/ha_monitor.h |   5 +-
 3 files changed, 300 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rv.h b/include/linux/rv.h
index 0aef9e3c785c..541ba404926a 100644
--- a/include/linux/rv.h
+++ b/include/linux/rv.h
@@ -13,6 +13,7 @@
 #define RV_MON_GLOBAL   0
 #define RV_MON_PER_CPU  1
 #define RV_MON_PER_TASK 2
+#define RV_MON_PER_OBJ  3
 
 #ifdef CONFIG_RV
 #include <linux/array_size.h>
diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h
index ab5fe0896a46..39765ff6f098 100644
--- a/include/rv/da_monitor.h
+++ b/include/rv/da_monitor.h
@@ -19,6 +19,8 @@
 #include <linux/stringify.h>
 #include <linux/bug.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/hashtable.h>
 
 /*
  * Per-cpu variables require a unique name although static in some
@@ -57,6 +59,9 @@ static struct rv_monitor rv_this;
 
 /*
  * Type for the target id, default to int but can be overridden.
+ * A long type can work as hash table key (PER_OBJ) but will be downgraded to
+ * int in the event tracepoint.
+ * Unused for implicit monitors.
  */
 #ifndef da_id_type
 #define da_id_type int
@@ -245,9 +250,9 @@ static inline struct da_monitor *da_get_monitor(struct task_struct *tsk)
 }
 
 /*
- * da_get_task - return the task associated to the monitor
+ * da_get_target - return the task associated to the monitor
  */
-static inline struct task_struct *da_get_task(struct da_monitor *da_mon)
+static inline struct task_struct *da_get_target(struct da_monitor *da_mon)
 {
 	return container_of(da_mon, struct task_struct, rv[task_mon_slot].da_mon);
 }
@@ -259,7 +264,7 @@ static inline struct task_struct *da_get_task(struct da_monitor *da_mon)
  */
 static inline da_id_type da_get_id(struct da_monitor *da_mon)
 {
-	return da_get_task(da_mon)->pid;
+	return da_get_target(da_mon)->pid;
 }
 
 static void da_monitor_reset_all(void)
@@ -309,6 +314,221 @@ static inline void da_monitor_destroy(void)
 
 	da_monitor_reset_all();
 }
+
+#elif RV_MON_TYPE == RV_MON_PER_OBJ
+/*
+ * Functions to define, init and get a per-object monitor.
+ */
+
+struct da_monitor_storage {
+	da_id_type id;
+	monitor_target target;
+	union rv_task_monitor rv;
+	struct hlist_node node;
+	struct rcu_head rcu;
+};
+
+#ifndef DA_MONITOR_HT_BITS
+#define DA_MONITOR_HT_BITS 10
+#endif
+static DEFINE_HASHTABLE(da_monitor_ht, DA_MONITOR_HT_BITS);
+
+/*
+ * da_create_empty_storage - pre-allocate an empty storage
+ */
+static inline struct da_monitor_storage *da_create_empty_storage(da_id_type id)
+{
+	struct da_monitor_storage *mon_storage;
+
+	mon_storage = kmalloc_nolock(sizeof(struct da_monitor_storage),
+				     __GFP_ZERO, NUMA_NO_NODE);
+	if (!mon_storage)
+		return NULL;
+
+	hash_add_rcu(da_monitor_ht, &mon_storage->node, id);
+	mon_storage->id = id;
+	return mon_storage;
+}
+
+/*
+ * da_create_storage - create the per-object storage
+ *
+ * The caller is responsible to synchronise writers, either with locks or
+ * implicitly. For instance, if da_create_storage is only called from a single
+ * event for target (e.g. sched_switch), it's safe to call this without locks.
+ */
+static inline struct da_monitor *da_create_storage(da_id_type id,
+						   monitor_target target,
+						   struct da_monitor *da_mon)
+{
+	struct da_monitor_storage *mon_storage;
+
+	if (da_mon)
+		return da_mon;
+
+	mon_storage = da_create_empty_storage(id);
+	if (!mon_storage)
+		return NULL;
+
+	mon_storage->target = target;
+	return &mon_storage->rv.da_mon;
+}
+
+/*
+ * __da_get_mon_storage - get the monitor storage from the hash table
+ */
+static inline struct da_monitor_storage *__da_get_mon_storage(da_id_type id)
+{
+	struct da_monitor_storage *mon_storage;
+
+	lockdep_assert_in_rcu_read_lock();
+	hash_for_each_possible_rcu(da_monitor_ht, mon_storage, node, id) {
+		if (mon_storage->id == id)
+			return mon_storage;
+	}
+
+	return NULL;
+}
+
+/*
+ * da_get_monitor - return the monitor for target
+ */
+static struct da_monitor *da_get_monitor(da_id_type id, monitor_target target)
+{
+	struct da_monitor_storage *mon_storage;
+
+	mon_storage = __da_get_mon_storage(id);
+	return mon_storage ? &mon_storage->rv.da_mon : NULL;
+}
+
+/*
+ * da_get_target - return the object associated to the monitor
+ */
+static inline monitor_target da_get_target(struct da_monitor *da_mon)
+{
+	return container_of(da_mon, struct da_monitor_storage, rv.da_mon)->target;
+}
+
+/*
+ * da_get_id - return the id associated to the monitor
+ */
+static inline da_id_type da_get_id(struct da_monitor *da_mon)
+{
+	return container_of(da_mon, struct da_monitor_storage, rv.da_mon)->id;
+}
+
+/*
+ * da_create_or_get - create the per-object storage if not already there
+ *
+ * This needs a lookup so should be guarded by RCU, the condition is checked
+ * directly in da_create_storage()
+ */
+static inline void da_create_or_get(da_id_type id, monitor_target target)
+{
+	guard(rcu)();
+	da_create_storage(id, target, da_get_monitor(id, target));
+}
+
+/*
+ * da_fill_empty_storage - store the target in a pre-allocated storage
+ *
+ * Can be used as a substitute of da_create_storage when starting a monitor in
+ * an environment where allocation is unsafe.
+ */
+static inline struct da_monitor *da_fill_empty_storage(da_id_type id,
+						       monitor_target target,
+						       struct da_monitor *da_mon)
+{
+	if (unlikely(da_mon && !da_get_target(da_mon)))
+		container_of(da_mon, struct da_monitor_storage, rv.da_mon)->target = target;
+	return da_mon;
+}
+
+/*
+ * da_get_target_by_id - return the object associated to the id
+ */
+static inline monitor_target da_get_target_by_id(da_id_type id)
+{
+	struct da_monitor_storage *mon_storage;
+
+	guard(rcu)();
+	mon_storage = __da_get_mon_storage(id);
+
+	if (unlikely(!mon_storage))
+		return NULL;
+	return mon_storage->target;
+}
+
+/*
+ * da_destroy_storage - destroy the per-object storage
+ *
+ * The caller is responsible to synchronise writers, either with locks or
+ * implicitly. For instance, if da_destroy_storage is called at sched_exit and
+ * da_create_storage can never occur after that, it's safe to call this without
+ * locks.
+ * This function includes an RCU read-side critical section to synchronise
+ * against da_monitor_destroy().
+ */
+static inline void da_destroy_storage(da_id_type id)
+{
+	struct da_monitor_storage *mon_storage;
+
+	guard(rcu)();
+	mon_storage = __da_get_mon_storage(id);
+
+	if (!mon_storage)
+		return;
+	da_monitor_reset_hook(&mon_storage->rv.da_mon);
+	hash_del_rcu(&mon_storage->node);
+	kfree_rcu(mon_storage, rcu);
+}
+
+static void da_monitor_reset_all(void)
+{
+	struct da_monitor_storage *mon_storage;
+	int bkt;
+
+	rcu_read_lock();
+	hash_for_each_rcu(da_monitor_ht, bkt, mon_storage, node)
+		da_monitor_reset(&mon_storage->rv.da_mon);
+	rcu_read_unlock();
+}
+
+static inline int da_monitor_init(void)
+{
+	hash_init(da_monitor_ht);
+	return 0;
+}
+
+static inline void da_monitor_destroy(void)
+{
+	struct da_monitor_storage *mon_storage;
+	struct hlist_node *tmp;
+	int bkt;
+
+	/*
+	 * This function is called after all probes are disabled, we need only
+	 * worry about concurrency against old events.
+	 */
+	synchronize_rcu();
+	hash_for_each_safe(da_monitor_ht, bkt, tmp, mon_storage, node) {
+		da_monitor_reset_hook(&mon_storage->rv.da_mon);
+		hash_del_rcu(&mon_storage->node);
+		kfree(mon_storage);
+	}
+}
+
+/*
+ * Allow the per-object monitors to run allocation manually, necessary if the
+ * start condition is in a context problematic for allocation (e.g. scheduling).
+ * In such case, if the storage was pre-allocated without a target, set it now.
+ */
+#ifdef DA_SKIP_AUTO_ALLOC
+#define da_prepare_storage da_fill_empty_storage
+#else
+#define da_prepare_storage da_create_storage
+#endif /* DA_SKIP_AUTO_ALLOC */
+
 #endif /* RV_MON_TYPE */
 
 #if RV_MON_TYPE == RV_MON_GLOBAL || RV_MON_TYPE == RV_MON_PER_CPU
@@ -342,9 +562,9 @@ static inline da_id_type da_get_id(struct da_monitor *da_mon)
 	return 0;
 }
 
-#elif RV_MON_TYPE == RV_MON_PER_TASK
+#elif RV_MON_TYPE == RV_MON_PER_TASK || RV_MON_TYPE == RV_MON_PER_OBJ
 /*
- * Trace events for per_task monitors, report the PID of the task.
+ * Trace events for per_task/per_object monitors, report the target id.
  */
 
 static inline void da_trace_event(struct da_monitor *da_mon,
@@ -525,6 +745,76 @@ static inline bool da_handle_start_run_event(struct task_struct *tsk,
 {
 	return __da_handle_start_run_event(da_get_monitor(tsk), event, tsk->pid);
 }
+
+#elif RV_MON_TYPE == RV_MON_PER_OBJ
+/*
+ * Handle event for per object.
+ */
+
+/*
+ * da_handle_event - handle an event
+ */
+static inline void da_handle_event(da_id_type id, monitor_target target, enum events event)
+{
+	struct da_monitor *da_mon;
+
+	guard(rcu)();
+	da_mon = da_get_monitor(id, target);
+	if (likely(da_mon))
+		__da_handle_event(da_mon, event, id);
+}
+
+/*
+ * da_handle_start_event - start monitoring or handle event
+ *
+ * This function is used to notify the monitor that the system is returning
+ * to the initial state, so the monitor can start monitoring in the next event.
+ * Thus:
+ *
+ * If the monitor already started, handle the event.
+ * If the monitor did not start yet, start the monitor but skip the event.
+ */
+static inline bool da_handle_start_event(da_id_type id, monitor_target target,
+					 enum events event)
+{
+	struct da_monitor *da_mon;
+
+	guard(rcu)();
+	da_mon = da_get_monitor(id, target);
+	da_mon = da_prepare_storage(id, target, da_mon);
+	if (unlikely(!da_mon))
+		return 0;
+	return __da_handle_start_event(da_mon, event, id);
+}
+
+/*
+ * da_handle_start_run_event - start monitoring and handle event
+ *
+ * This function is used to notify the monitor that the system is in the
+ * initial state, so the monitor can start monitoring and handling event.
+ */
+static inline bool da_handle_start_run_event(da_id_type id, monitor_target target,
+					     enum events event)
+{
+	struct da_monitor *da_mon;
+
+	guard(rcu)();
+	da_mon = da_get_monitor(id, target);
+	da_mon = da_prepare_storage(id, target, da_mon);
+	if (unlikely(!da_mon))
+		return 0;
+	return __da_handle_start_run_event(da_mon, event, id);
+}
+
+static inline void da_reset(da_id_type id, monitor_target target)
+{
+	struct da_monitor *da_mon;
+
+	guard(rcu)();
+	da_mon = da_get_monitor(id, target);
+	if (likely(da_mon))
+		da_monitor_reset(da_mon);
+}
 #endif /* RV_MON_TYPE */
 
 #endif
diff --git a/include/rv/ha_monitor.h b/include/rv/ha_monitor.h
index b6cf3b2ba989..d59507e8cb30 100644
--- a/include/rv/ha_monitor.h
+++ b/include/rv/ha_monitor.h
@@ -190,7 +190,10 @@ static inline void ha_trace_error_env(struct ha_monitor *ha_mon,
 {
 	CONCATENATE(trace_error_env_, MONITOR_NAME)(curr_state, event, env);
 }
-#elif RV_MON_TYPE == RV_MON_PER_TASK
+#elif RV_MON_TYPE == RV_MON_PER_TASK || RV_MON_TYPE == RV_MON_PER_OBJ
+
+#define ha_get_target(ha_mon) da_get_target(&ha_mon->da_mon)
+
 static inline void ha_trace_error_env(struct ha_monitor *ha_mon,
 				      char *curr_state, char *event, char *env,
 				      da_id_type id)
-- 
cgit v1.2.3


From c85dbddad705babfbddfef182495994f7f5262c9 Mon Sep 17 00:00:00 2001
From: Gabriele Monaco <gmonaco@redhat.com>
Date: Mon, 30 Mar 2026 13:10:09 +0200
Subject: sched/deadline: Move some utility functions to deadline.h

Some utility functions on sched_dl_entity can be useful outside of
deadline.c , for instance for modelling, without relying on raw
structure fields.

Move functions like dl_task_of and dl_is_implicit to deadline.h to make
them available outside.

Acked-by: Juri Lelli <juri.lelli@redhat.com>
Link: https://lore.kernel.org/r/20260330111010.153663-12-gmonaco@redhat.com
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
---
 include/linux/sched/deadline.h | 27 +++++++++++++++++++++++++++
 kernel/sched/deadline.c        | 28 +---------------------------
 2 files changed, 28 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index c40115d4e34d..1198138cb839 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -37,4 +37,31 @@ extern void dl_clear_root_domain_cpu(int cpu);
 extern u64 dl_cookie;
 extern bool dl_bw_visited(int cpu, u64 cookie);
 
+static inline bool dl_server(struct sched_dl_entity *dl_se)
+{
+	return dl_se->dl_server;
+}
+
+static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
+{
+	BUG_ON(dl_server(dl_se));
+	return container_of(dl_se, struct task_struct, dl);
+}
+
+/*
+ * Regarding the deadline, a task with implicit deadline has a relative
+ * deadline == relative period. A task with constrained deadline has a
+ * relative deadline <= relative period.
+ *
+ * We support constrained deadline tasks. However, there are some restrictions
+ * applied only for tasks which do not have an implicit deadline. See
+ * update_dl_entity() to know more about such restrictions.
+ *
+ * The dl_is_implicit() returns true if the task has an implicit deadline.
+ */
+static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
+{
+	return dl_se->dl_deadline == dl_se->dl_period;
+}
+
 #endif /* _LINUX_SCHED_DEADLINE_H */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index e511e36916bd..c10415c1aa4a 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -18,6 +18,7 @@
 
 #include <linux/cpuset.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/deadline.h>
 #include <uapi/linux/sched/types.h>
 #include "sched.h"
 #include "pelt.h"
@@ -57,17 +58,6 @@ static int __init sched_dl_sysctl_init(void)
 late_initcall(sched_dl_sysctl_init);
 #endif /* CONFIG_SYSCTL */
 
-static bool dl_server(struct sched_dl_entity *dl_se)
-{
-	return dl_se->dl_server;
-}
-
-static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
-{
-	BUG_ON(dl_server(dl_se));
-	return container_of(dl_se, struct task_struct, dl);
-}
-
 static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq)
 {
 	return container_of(dl_rq, struct rq, dl);
@@ -990,22 +980,6 @@ update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq)
 	dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT;
 }
 
-/*
- * Regarding the deadline, a task with implicit deadline has a relative
- * deadline == relative period. A task with constrained deadline has a
- * relative deadline <= relative period.
- *
- * We support constrained deadline tasks. However, there are some restrictions
- * applied only for tasks which do not have an implicit deadline. See
- * update_dl_entity() to know more about such restrictions.
- *
- * The dl_is_implicit() returns true if the task has an implicit deadline.
- */
-static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
-{
-	return dl_se->dl_deadline == dl_se->dl_period;
-}
-
 /*
  * When a deadline entity is placed in the runqueue, its runtime and deadline
  * might need to be updated. This is done by a CBS wake up rule. There are two
-- 
cgit v1.2.3


From 62ba6d66b22356a6a78fd015c37fd108710dfc32 Mon Sep 17 00:00:00 2001
From: Rosen Penev <rosenp@gmail.com>
Date: Thu, 26 Mar 2026 19:48:28 -0700
Subject: EDAC/mc: Use kzalloc_flex()

Convert struct mem_ctl_info to use flex array and use the new flex array
helpers to enable runtime bounds checking, including annotating the array
length member with __counted_by() for extra runtime analysis when requested.

Move memcpy() after the counter assignment so that it is initialized before
the first reference to the flex array, as the new attribute requires.

  [ bp: Heavily massage commit message. ]

Signed-off-by: Rosen Penev <rosenp@gmail.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Yazen Ghannam <yazen.ghannam@amd.com>
Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Link: https://patch.msgid.link/20260327024828.7377-1-rosenp@gmail.com
---
 drivers/edac/edac_mc.c | 10 +++-------
 include/linux/edac.h   | 23 ++++++++++++-----------
 2 files changed, 15 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 29e9828422bb..07d3f73bcd23 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -203,7 +203,6 @@ static void mci_release(struct device *dev)
 		kfree(mci->csrows);
 	}
 	kfree(mci->pvt_info);
-	kfree(mci->layers);
 	kfree(mci);
 }
 
@@ -361,13 +360,12 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
 			per_rank = true;
 	}
 
-	mci = kzalloc_obj(struct mem_ctl_info);
+	mci = kzalloc_flex(*mci, layers, n_layers);
 	if (!mci)
 		return NULL;
 
-	mci->layers = kzalloc_objs(struct edac_mc_layer, n_layers);
-	if (!mci->layers)
-		goto error;
+	mci->n_layers = n_layers;
+	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
 
 	mci->pvt_info = kzalloc(sz_pvt, GFP_KERNEL);
 	if (!mci->pvt_info)
@@ -379,8 +377,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
 	/* setup index and various internal pointers */
 	mci->mc_idx = mc_num;
 	mci->tot_dimms = tot_dimms;
-	mci->n_layers = n_layers;
-	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
 	mci->nr_csrows = tot_csrows;
 	mci->num_cschannel = tot_channels;
 	mci->csbased = per_rank;
diff --git a/include/linux/edac.h b/include/linux/edac.h
index fa32f2aca22f..deba46b3ee25 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -541,17 +541,6 @@ struct mem_ctl_info {
 	struct csrow_info **csrows;
 	unsigned int nr_csrows, num_cschannel;
 
-	/*
-	 * Memory Controller hierarchy
-	 *
-	 * There are basically two types of memory controller: the ones that
-	 * sees memory sticks ("dimms"), and the ones that sees memory ranks.
-	 * All old memory controllers enumerate memories per rank, but most
-	 * of the recent drivers enumerate memories per DIMM, instead.
-	 * When the memory controller is per rank, csbased is true.
-	 */
-	unsigned int n_layers;
-	struct edac_mc_layer *layers;
 	bool csbased;
 
 	/*
@@ -609,6 +598,18 @@ struct mem_ctl_info {
 	u8 fake_inject_layer[EDAC_MAX_LAYERS];
 	bool fake_inject_ue;
 	u16 fake_inject_count;
+
+	/*
+	 * Memory Controller hierarchy
+	 *
+	 * There are basically two types of memory controller: the ones that
+	 * sees memory sticks ("dimms"), and the ones that sees memory ranks.
+	 * All old memory controllers enumerate memories per rank, but most
+	 * of the recent drivers enumerate memories per DIMM, instead.
+	 * When the memory controller is per rank, csbased is true.
+	 */
+	unsigned int n_layers;
+	struct edac_mc_layer layers[] __counted_by(n_layers);
 };
 
 #define mci_for_each_dimm(mci, dimm)				\
-- 
cgit v1.2.3


From 00247cbf173a9e1e2304db8e3f9172d36366b255 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Tue, 31 Mar 2026 09:37:19 -0700
Subject: refcount: Remove unused __signed_wrap function annotations

With CONFIG_UBSAN_INTEGER_WRAP being replaced by Overflow Behavior
Types, remove the __signed_wrap function annotation as it is already
unused, and any future work here will use OBT annotations instead.

Link: https://patch.msgid.link/20260331163725.2765789-1-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/compiler_types.h |  9 +--------
 include/linux/refcount.h       | 10 +++++-----
 2 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 890076d0974b..e8fd77593b68 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -432,18 +432,11 @@ struct ftrace_likely_data {
 #define at_least
 #endif
 
-/* Do not trap wrapping arithmetic within an annotated function. */
-#ifdef CONFIG_UBSAN_INTEGER_WRAP
-# define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow")))
-#else
-# define __signed_wrap
-#endif
-
 /* Section for code which can't be instrumented at all */
 #define __noinstr_section(section)					\
 	noinline notrace __attribute((__section__(section)))		\
 	__no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \
-	__no_sanitize_memory __signed_wrap
+	__no_sanitize_memory
 
 #define noinstr __noinstr_section(".noinstr.text")
 
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 3da377ffb0c2..ba7657ced281 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -170,7 +170,7 @@ static inline unsigned int refcount_read(const refcount_t *r)
 	return atomic_read(&r->refs);
 }
 
-static inline __must_check __signed_wrap
+static inline __must_check
 bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp)
 {
 	int old = refcount_read(r);
@@ -212,7 +212,7 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
 	return __refcount_add_not_zero(i, r, NULL);
 }
 
-static inline __must_check __signed_wrap
+static inline __must_check
 bool __refcount_add_not_zero_limited_acquire(int i, refcount_t *r, int *oldp,
 					     int limit)
 {
@@ -244,7 +244,7 @@ __refcount_inc_not_zero_limited_acquire(refcount_t *r, int *oldp, int limit)
 	return __refcount_add_not_zero_limited_acquire(1, r, oldp, limit);
 }
 
-static inline __must_check __signed_wrap
+static inline __must_check
 bool __refcount_add_not_zero_acquire(int i, refcount_t *r, int *oldp)
 {
 	return __refcount_add_not_zero_limited_acquire(i, r, oldp, INT_MAX);
@@ -277,7 +277,7 @@ static inline __must_check bool refcount_add_not_zero_acquire(int i, refcount_t
 	return __refcount_add_not_zero_acquire(i, r, NULL);
 }
 
-static inline __signed_wrap
+static inline
 void __refcount_add(int i, refcount_t *r, int *oldp)
 {
 	int old = atomic_fetch_add_relaxed(i, &r->refs);
@@ -383,7 +383,7 @@ static inline void refcount_inc(refcount_t *r)
 	__refcount_inc(r, NULL);
 }
 
-static inline __must_check __signed_wrap
+static inline __must_check
 bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
 {
 	int old = atomic_fetch_sub_release(i, &r->refs);
-- 
cgit v1.2.3


From 10a4eb5882ba16164ece86d99486084f02f148bb Mon Sep 17 00:00:00 2001
From: Siddharth Nayyar <sidnayyar@google.com>
Date: Thu, 26 Mar 2026 21:25:02 +0000
Subject: module: define ksym_flags enumeration to represent kernel symbol
 flags

The core architectural issue with kernel symbol flags is our reliance on
splitting the main symbol table, ksymtab. To handle a single boolean
property, such as GPL-only, all exported symbols are split across two
separate tables: __ksymtab and __ksymtab_gpl.

This design forces the module loader to perform a separate search on
each of these tables for every symbol it needs, for vmlinux and for all
previously loaded modules.

This approach is fundamentally not scalable. If we were to introduce a
second flag, we would need four distinct symbol tables. For n boolean
flags, this model requires an exponential growth to 2^n tables,
dramatically increasing complexity.

Another consequence of this fragmentation is degraded performance. For
example, a binary search on the symbol table of vmlinux, that would take
only 14 comparison steps (assuming ~2^14 or 16K symbols) in a unified
table, can require up to 26 steps when spread across two tables
(assuming both tables have ~2^13 symbols). This performance penalty
worsens as more flags are added.

To address this, symbol flags is an enumeration used to represent flags
as a bitset, for example a flag to tell if a symbol is GPL only.

The said bitset is introduced in subsequent patches and will contain
values of kernel symbol flags. These bitset will then be used to infer
flag values rather than fragmenting ksymtab for separating symbols with
different flag values, thereby eliminating the need to fragment the
ksymtab.

Link: https://lore.kernel.org/r/20260326-kflagstab-v5-0-fa0796fe88d9@google.com
Signed-off-by: Siddharth Nayyar <sidnayyar@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
[Sami: Updated the commit message to explain the use case for the series.]
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 include/linux/module_symbol.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/module_symbol.h b/include/linux/module_symbol.h
index 77c9895b9ddb..574609aced99 100644
--- a/include/linux/module_symbol.h
+++ b/include/linux/module_symbol.h
@@ -2,6 +2,11 @@
 #ifndef _LINUX_MODULE_SYMBOL_H
 #define _LINUX_MODULE_SYMBOL_H
 
+/* Kernel symbol flags bitset. */
+enum ksym_flags {
+	KSYM_FLAG_GPL_ONLY	= 1 << 0,
+};
+
 /* This ignores the intensely annoying "mapping symbols" found in ELF files. */
 static inline bool is_mapping_symbol(const char *str)
 {
-- 
cgit v1.2.3


From 16d0e04f546ffba78c74bbfeb57d93147bcaf2c5 Mon Sep 17 00:00:00 2001
From: Siddharth Nayyar <sidnayyar@google.com>
Date: Thu, 26 Mar 2026 21:25:04 +0000
Subject: module: populate kflagstab in modpost

This patch adds the ability to create entries for kernel symbol flag
bitsets in kflagstab. Modpost populates only the GPL-only flag for now.

Signed-off-by: Siddharth Nayyar <sidnayyar@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 include/linux/export-internal.h | 7 +++++++
 scripts/mod/modpost.c           | 8 ++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h
index d445705ac13c..4123c7592404 100644
--- a/include/linux/export-internal.h
+++ b/include/linux/export-internal.h
@@ -69,4 +69,11 @@
 	    ".long " #crc					"\n" \
 	    ".previous"						"\n")
 
+#define SYMBOL_FLAGS(sym, flags)					\
+	asm("	.section \"___kflagstab+" #sym "\",\"a\""	"\n"	\
+	    "__flags_" #sym ":"					"\n"	\
+	    "	.byte " #flags					"\n"	\
+	    "	.previous"					"\n"	\
+	)
+
 #endif /* __LINUX_EXPORT_INTERNAL_H__ */
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 0c25b5ad497b..1d721fe67caf 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -244,6 +244,11 @@ static struct symbol *alloc_symbol(const char *name)
 	return s;
 }
 
+static uint8_t get_symbol_flags(const struct symbol *sym)
+{
+	return sym->is_gpl_only ? KSYM_FLAG_GPL_ONLY : 0;
+}
+
 /* For the hash of exported symbols */
 static void hash_add_symbol(struct symbol *sym)
 {
@@ -1874,6 +1879,9 @@ static void add_exported_symbols(struct buffer *buf, struct module *mod)
 		buf_printf(buf, "KSYMTAB_%s(%s, \"%s\", \"%s\");\n",
 			   sym->is_func ? "FUNC" : "DATA", sym->name,
 			   sym->is_gpl_only ? "_gpl" : "", sym->namespace);
+
+		buf_printf(buf, "SYMBOL_FLAGS(%s, 0x%02x);\n",
+			   sym->name, get_symbol_flags(sym));
 	}
 
 	if (!modversions)
-- 
cgit v1.2.3


From 55fcb926b6d8b5cfb40873e4840a69961db1bb69 Mon Sep 17 00:00:00 2001
From: Siddharth Nayyar <sidnayyar@google.com>
Date: Thu, 26 Mar 2026 21:25:05 +0000
Subject: module: use kflagstab instead of *_gpl sections

Read kflagstab section for vmlinux and modules to determine whether
kernel symbols are GPL only.

This patch eliminates the need for fragmenting the ksymtab for infering
the value of GPL-only symbol flag, henceforth stop populating *_gpl
versions of the ksymtab and kcrctab in modpost.

Signed-off-by: Siddharth Nayyar <sidnayyar@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 include/linux/export-internal.h | 21 ++++++++--------
 include/linux/module.h          |  1 +
 kernel/module/internal.h        |  1 +
 kernel/module/main.c            | 55 ++++++++++++++++++++++-------------------
 scripts/mod/modpost.c           |  8 +++---
 5 files changed, 46 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h
index 4123c7592404..726054614752 100644
--- a/include/linux/export-internal.h
+++ b/include/linux/export-internal.h
@@ -37,14 +37,14 @@
  * section flag requires it. Use '%progbits' instead of '@progbits' since the
  * former apparently works on all arches according to the binutils source.
  */
-#define __KSYMTAB(name, sym, sec, ns)						\
+#define __KSYMTAB(name, sym, ns)						\
 	asm("	.section \"__ksymtab_strings\",\"aMS\",%progbits,1"	"\n"	\
 	    "__kstrtab_" #name ":"					"\n"	\
 	    "	.asciz \"" #name "\""					"\n"	\
 	    "__kstrtabns_" #name ":"					"\n"	\
 	    "	.asciz \"" ns "\""					"\n"	\
 	    "	.previous"						"\n"	\
-	    "	.section \"___ksymtab" sec "+" #name "\", \"a\""	"\n"	\
+	    "	.section \"___ksymtab+" #name "\", \"a\""		"\n"	\
 		__KSYM_ALIGN						"\n"	\
 	    "__ksymtab_" #name ":"					"\n"	\
 		__KSYM_REF(sym)						"\n"	\
@@ -59,15 +59,16 @@
 #define KSYM_FUNC(name)		name
 #endif
 
-#define KSYMTAB_FUNC(name, sec, ns)	__KSYMTAB(name, KSYM_FUNC(name), sec, ns)
-#define KSYMTAB_DATA(name, sec, ns)	__KSYMTAB(name, name, sec, ns)
+#define KSYMTAB_FUNC(name, ns)	__KSYMTAB(name, KSYM_FUNC(name), ns)
+#define KSYMTAB_DATA(name, ns)	__KSYMTAB(name, name, ns)
 
-#define SYMBOL_CRC(sym, crc, sec)   \
-	asm(".section \"___kcrctab" sec "+" #sym "\",\"a\""	"\n" \
-	    ".balign 4"						"\n" \
-	    "__crc_" #sym ":"					"\n" \
-	    ".long " #crc					"\n" \
-	    ".previous"						"\n")
+#define SYMBOL_CRC(sym, crc)					\
+	asm("	.section \"___kcrctab+" #sym "\",\"a\""	"\n"	\
+	    "	.balign 4"				"\n"	\
+	    "__crc_" #sym ":"				"\n"	\
+	    "	.long " #crc				"\n"	\
+	    "	.previous"				"\n"	\
+	)
 
 #define SYMBOL_FLAGS(sym, flags)					\
 	asm("	.section \"___kflagstab+" #sym "\",\"a\""	"\n"	\
diff --git a/include/linux/module.h b/include/linux/module.h
index 60ed1c3e0ed9..917b29332e15 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -419,6 +419,7 @@ struct module {
 	/* Exported symbols */
 	const struct kernel_symbol *syms;
 	const u32 *crcs;
+	const u8 *flagstab;
 	unsigned int num_syms;
 
 #ifdef CONFIG_ARCH_USES_CFI_TRAPS
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index 618202578b42..69b84510e097 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -57,6 +57,7 @@ extern const struct kernel_symbol __start___ksymtab_gpl[];
 extern const struct kernel_symbol __stop___ksymtab_gpl[];
 extern const u32 __start___kcrctab[];
 extern const u32 __start___kcrctab_gpl[];
+extern const u8 __start___kflagstab[];
 
 #define KMOD_PATH_LEN 256
 extern char modprobe_path[];
diff --git a/kernel/module/main.c b/kernel/module/main.c
index fc033137863d..c243d6b79cdd 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -11,6 +11,7 @@
 #include <linux/extable.h>
 #include <linux/moduleloader.h>
 #include <linux/module_signature.h>
+#include <linux/module_symbol.h>
 #include <linux/trace_events.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
@@ -87,7 +88,7 @@ struct mod_tree_root mod_tree __cacheline_aligned = {
 struct symsearch {
 	const struct kernel_symbol *start, *stop;
 	const u32 *crcs;
-	enum mod_license license;
+	const u8 *flagstab;
 };
 
 /*
@@ -364,19 +365,21 @@ static bool find_exported_symbol_in_section(const struct symsearch *syms,
 					    struct find_symbol_arg *fsa)
 {
 	struct kernel_symbol *sym;
-
-	if (!fsa->gplok && syms->license == GPL_ONLY)
-		return false;
+	u8 sym_flags;
 
 	sym = bsearch(fsa->name, syms->start, syms->stop - syms->start,
 			sizeof(struct kernel_symbol), cmp_name);
 	if (!sym)
 		return false;
 
+	sym_flags = *(syms->flagstab + (sym - syms->start));
+	if (!fsa->gplok && (sym_flags & KSYM_FLAG_GPL_ONLY))
+		return false;
+
 	fsa->owner = owner;
 	fsa->crc = symversion(syms->crcs, sym - syms->start);
 	fsa->sym = sym;
-	fsa->license = syms->license;
+	fsa->license = (sym_flags & KSYM_FLAG_GPL_ONLY) ? GPL_ONLY : NOT_GPL_ONLY;
 
 	return true;
 }
@@ -387,36 +390,31 @@ static bool find_exported_symbol_in_section(const struct symsearch *syms,
  */
 bool find_symbol(struct find_symbol_arg *fsa)
 {
-	static const struct symsearch arr[] = {
-		{ __start___ksymtab, __stop___ksymtab, __start___kcrctab,
-		  NOT_GPL_ONLY },
-		{ __start___ksymtab_gpl, __stop___ksymtab_gpl,
-		  __start___kcrctab_gpl,
-		  GPL_ONLY },
+	const struct symsearch syms = {
+		.start		= __start___ksymtab,
+		.stop		= __stop___ksymtab,
+		.crcs		= __start___kcrctab,
+		.flagstab	= __start___kflagstab,
 	};
 	struct module *mod;
-	unsigned int i;
 
-	for (i = 0; i < ARRAY_SIZE(arr); i++)
-		if (find_exported_symbol_in_section(&arr[i], NULL, fsa))
-			return true;
+	if (find_exported_symbol_in_section(&syms, NULL, fsa))
+		return true;
 
 	list_for_each_entry_rcu(mod, &modules, list,
 				lockdep_is_held(&module_mutex)) {
-		struct symsearch arr[] = {
-			{ mod->syms, mod->syms + mod->num_syms, mod->crcs,
-			  NOT_GPL_ONLY },
-			{ mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
-			  mod->gpl_crcs,
-			  GPL_ONLY },
+		const struct symsearch syms = {
+			.start		= mod->syms,
+			.stop		= mod->syms + mod->num_syms,
+			.crcs		= mod->crcs,
+			.flagstab	= mod->flagstab,
 		};
 
 		if (mod->state == MODULE_STATE_UNFORMED)
 			continue;
 
-		for (i = 0; i < ARRAY_SIZE(arr); i++)
-			if (find_exported_symbol_in_section(&arr[i], mod, fsa))
-				return true;
+		if (find_exported_symbol_in_section(&syms, mod, fsa))
+			return true;
 	}
 
 	pr_debug("Failed to find symbol %s\n", fsa->name);
@@ -2681,6 +2679,7 @@ static int find_module_sections(struct module *mod, struct load_info *info)
 				     sizeof(*mod->gpl_syms),
 				     &mod->num_gpl_syms);
 	mod->gpl_crcs = section_addr(info, "__kcrctab_gpl");
+	mod->flagstab = section_addr(info, "__kflagstab");
 
 #ifdef CONFIG_CONSTRUCTORS
 	mod->ctors = section_objs(info, ".ctors",
@@ -2884,8 +2883,12 @@ out_err:
 	return ret;
 }
 
-static int check_export_symbol_versions(struct module *mod)
+static int check_export_symbol_sections(struct module *mod)
 {
+	if (mod->num_syms && !mod->flagstab) {
+		pr_err("%s: no flags for exported symbols\n", mod->name);
+		return -ENOEXEC;
+	}
 #ifdef CONFIG_MODVERSIONS
 	if ((mod->num_syms && !mod->crcs) ||
 	    (mod->num_gpl_syms && !mod->gpl_crcs)) {
@@ -3501,7 +3504,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
 	if (err)
 		goto free_unload;
 
-	err = check_export_symbol_versions(mod);
+	err = check_export_symbol_sections(mod);
 	if (err)
 		goto free_unload;
 
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 1d721fe67caf..9d96acce60a8 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1876,9 +1876,9 @@ static void add_exported_symbols(struct buffer *buf, struct module *mod)
 		if (trim_unused_exports && !sym->used)
 			continue;
 
-		buf_printf(buf, "KSYMTAB_%s(%s, \"%s\", \"%s\");\n",
+		buf_printf(buf, "KSYMTAB_%s(%s, \"%s\");\n",
 			   sym->is_func ? "FUNC" : "DATA", sym->name,
-			   sym->is_gpl_only ? "_gpl" : "", sym->namespace);
+			   sym->namespace);
 
 		buf_printf(buf, "SYMBOL_FLAGS(%s, 0x%02x);\n",
 			   sym->name, get_symbol_flags(sym));
@@ -1899,8 +1899,8 @@ static void add_exported_symbols(struct buffer *buf, struct module *mod)
 			     sym->name, mod->name, mod->is_vmlinux ? "" : ".ko",
 			     sym->name);
 
-		buf_printf(buf, "SYMBOL_CRC(%s, 0x%08x, \"%s\");\n",
-			   sym->name, sym->crc, sym->is_gpl_only ? "_gpl" : "");
+		buf_printf(buf, "SYMBOL_CRC(%s, 0x%08x);\n",
+			   sym->name, sym->crc);
 	}
 }
 
-- 
cgit v1.2.3


From b4760ff2a5e4351c59d185967735f59c0b0bd7f6 Mon Sep 17 00:00:00 2001
From: Siddharth Nayyar <sidnayyar@google.com>
Date: Thu, 26 Mar 2026 21:25:06 +0000
Subject: module: deprecate usage of *_gpl sections in module loader

The *_gpl section are not being used populated by modpost anymore. Hence
the module loader doesn't need to find and process these sections in
modules.

This patch also simplifies symbol finding logic in module loader since
*_gpl sections don't have to be searched anymore.

Signed-off-by: Siddharth Nayyar <sidnayyar@google.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 include/linux/module.h   |  3 ---
 kernel/module/internal.h |  3 ---
 kernel/module/main.c     | 46 ++++++++++++++++++----------------------------
 3 files changed, 18 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 917b29332e15..7566815fabbe 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -435,9 +435,6 @@ struct module {
 	unsigned int num_kp;
 
 	/* GPL-only exported symbols. */
-	unsigned int num_gpl_syms;
-	const struct kernel_symbol *gpl_syms;
-	const u32 *gpl_crcs;
 	bool using_gplonly_symbols;
 
 #ifdef CONFIG_MODULE_SIG
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index 69b84510e097..061161cc79d9 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -53,10 +53,7 @@ extern const size_t modinfo_attrs_count;
 /* Provided by the linker */
 extern const struct kernel_symbol __start___ksymtab[];
 extern const struct kernel_symbol __stop___ksymtab[];
-extern const struct kernel_symbol __start___ksymtab_gpl[];
-extern const struct kernel_symbol __stop___ksymtab_gpl[];
 extern const u32 __start___kcrctab[];
-extern const u32 __start___kcrctab_gpl[];
 extern const u8 __start___kflagstab[];
 
 #define KMOD_PATH_LEN 256
diff --git a/kernel/module/main.c b/kernel/module/main.c
index c243d6b79cdd..c4f768953516 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1495,29 +1495,17 @@ EXPORT_SYMBOL_GPL(__symbol_get);
  */
 static int verify_exported_symbols(struct module *mod)
 {
-	unsigned int i;
 	const struct kernel_symbol *s;
-	struct {
-		const struct kernel_symbol *sym;
-		unsigned int num;
-	} arr[] = {
-		{ mod->syms, mod->num_syms },
-		{ mod->gpl_syms, mod->num_gpl_syms },
-	};
-
-	for (i = 0; i < ARRAY_SIZE(arr); i++) {
-		for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) {
-			struct find_symbol_arg fsa = {
-				.name	= kernel_symbol_name(s),
-				.gplok	= true,
-			};
-			if (find_symbol(&fsa)) {
-				pr_err("%s: exports duplicate symbol %s"
-				       " (owned by %s)\n",
-				       mod->name, kernel_symbol_name(s),
-				       module_name(fsa.owner));
-				return -ENOEXEC;
-			}
+	for (s = mod->syms; s < mod->syms + mod->num_syms; s++) {
+		struct find_symbol_arg fsa = {
+			.name	= kernel_symbol_name(s),
+			.gplok	= true,
+		};
+		if (find_symbol(&fsa)) {
+			pr_err("%s: exports duplicate symbol %s (owned by %s)\n",
+				mod->name, kernel_symbol_name(s),
+				module_name(fsa.owner));
+			return -ENOEXEC;
 		}
 	}
 	return 0;
@@ -2675,12 +2663,15 @@ static int find_module_sections(struct module *mod, struct load_info *info)
 	mod->syms = section_objs(info, "__ksymtab",
 				 sizeof(*mod->syms), &mod->num_syms);
 	mod->crcs = section_addr(info, "__kcrctab");
-	mod->gpl_syms = section_objs(info, "__ksymtab_gpl",
-				     sizeof(*mod->gpl_syms),
-				     &mod->num_gpl_syms);
-	mod->gpl_crcs = section_addr(info, "__kcrctab_gpl");
 	mod->flagstab = section_addr(info, "__kflagstab");
 
+	if (section_addr(info, "__ksymtab_gpl"))
+		pr_warn("%s: ignoring obsolete section __ksymtab_gpl\n",
+			mod->name);
+	if (section_addr(info, "__kcrctab_gpl"))
+		pr_warn("%s: ignoring obsolete section __kcrctab_gpl\n",
+			mod->name);
+
 #ifdef CONFIG_CONSTRUCTORS
 	mod->ctors = section_objs(info, ".ctors",
 				  sizeof(*mod->ctors), &mod->num_ctors);
@@ -2890,8 +2881,7 @@ static int check_export_symbol_sections(struct module *mod)
 		return -ENOEXEC;
 	}
 #ifdef CONFIG_MODVERSIONS
-	if ((mod->num_syms && !mod->crcs) ||
-	    (mod->num_gpl_syms && !mod->gpl_crcs)) {
+	if (mod->num_syms && !mod->crcs) {
 		return try_to_force_load(mod,
 					 "no versions for exported symbols");
 	}
-- 
cgit v1.2.3


From 8b7b85384fad6e21e8a28628e7ebacb5a6329de4 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Mon, 23 Mar 2026 09:20:42 +0200
Subject: memblock: move reserve_bootmem_range() to memblock.c and make it
 static

reserve_bootmem_region() is only called from
memmap_init_reserved_pages() and it was in mm/mm_init.c because of its
dependecies on static init_deferred_page().

Since init_deferred_page() is not static anymore, move
reserve_bootmem_region(), rename it to memmap_init_reserved_range() and
make it static.

Update the comment describing it to better reflect what the function
does and drop bogus comment about reserved pages in free_bootmem_page().

Update memblock test stubs to reflect the core changes.

Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>
Link: https://patch.msgid.link/20260323072042.3651061-1-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
 include/linux/bootmem_info.h      |  4 ----
 include/linux/mm.h                |  3 ---
 mm/memblock.c                     | 31 ++++++++++++++++++++++++++++---
 mm/mm_init.c                      | 25 -------------------------
 tools/include/linux/mm.h          |  2 --
 tools/testing/memblock/internal.h |  9 +++++++++
 tools/testing/memblock/mmzone.c   |  4 ----
 7 files changed, 37 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h
index 4c506e76a808..492ceeb1cdf8 100644
--- a/include/linux/bootmem_info.h
+++ b/include/linux/bootmem_info.h
@@ -44,10 +44,6 @@ static inline void free_bootmem_page(struct page *page)
 {
 	enum bootmem_type type = bootmem_type(page);
 
-	/*
-	 * The reserve_bootmem_region sets the reserved flag on bootmem
-	 * pages.
-	 */
 	VM_BUG_ON_PAGE(page_ref_count(page) != 2, page);
 
 	if (type == SECTION_INFO || type == MIX_SECTION_INFO)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index abb4963c1f06..764d10fdfb5d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3686,9 +3686,6 @@ extern unsigned long free_reserved_area(void *start, void *end,
 
 extern void adjust_managed_page_count(struct page *page, long count);
 
-extern void reserve_bootmem_region(phys_addr_t start,
-				   phys_addr_t end, int nid);
-
 /* Free the reserved page into the buddy system, so it gets managed. */
 void free_reserved_page(struct page *page);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 57d96f2484cc..eaaa6110bcc1 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -974,7 +974,7 @@ __init void memmap_init_kho_scratch_pages(void)
 	/*
 	 * Initialize struct pages for free scratch memory.
 	 * The struct pages for reserved scratch memory will be set up in
-	 * reserve_bootmem_region()
+	 * memmap_init_reserved_pages()
 	 */
 	__for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
 			     MEMBLOCK_KHO_SCRATCH, &start, &end, &nid) {
@@ -2241,6 +2241,31 @@ static unsigned long __init __free_memory_core(phys_addr_t start,
 	return end_pfn - start_pfn;
 }
 
+/*
+ * Initialised pages do not have PageReserved set. This function is called
+ * for each reserved range and marks the pages PageReserved.
+ * When deferred initialization of struct pages is enabled it also ensures
+ * that struct pages are properly initialised.
+ */
+static void __init memmap_init_reserved_range(phys_addr_t start,
+					      phys_addr_t end, int nid)
+{
+	unsigned long pfn;
+
+	for_each_valid_pfn(pfn, PFN_DOWN(start), PFN_UP(end)) {
+		struct page *page = pfn_to_page(pfn);
+
+		init_deferred_page(pfn, nid);
+
+		/*
+		 * no need for atomic set_bit because the struct
+		 * page is not visible yet so nobody should
+		 * access it yet.
+		 */
+		__SetPageReserved(page);
+	}
+}
+
 static void __init memmap_init_reserved_pages(void)
 {
 	struct memblock_region *region;
@@ -2260,7 +2285,7 @@ repeat:
 		end = start + region->size;
 
 		if (memblock_is_nomap(region))
-			reserve_bootmem_region(start, end, nid);
+			memmap_init_reserved_range(start, end, nid);
 
 		memblock_set_node(start, region->size, &memblock.reserved, nid);
 	}
@@ -2285,7 +2310,7 @@ repeat:
 			if (!numa_valid_node(nid))
 				nid = early_pfn_to_nid(PFN_DOWN(start));
 
-			reserve_bootmem_region(start, end, nid);
+			memmap_init_reserved_range(start, end, nid);
 		}
 	}
 }
diff --git a/mm/mm_init.c b/mm/mm_init.c
index df34797691bd..ea8d3de43470 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -772,31 +772,6 @@ void __meminit init_deferred_page(unsigned long pfn, int nid)
 	__init_deferred_page(pfn, nid);
 }
 
-/*
- * Initialised pages do not have PageReserved set. This function is
- * called for each range allocated by the bootmem allocator and
- * marks the pages PageReserved. The remaining valid pages are later
- * sent to the buddy page allocator.
- */
-void __meminit reserve_bootmem_region(phys_addr_t start,
-				      phys_addr_t end, int nid)
-{
-	unsigned long pfn;
-
-	for_each_valid_pfn(pfn, PFN_DOWN(start), PFN_UP(end)) {
-		struct page *page = pfn_to_page(pfn);
-
-		__init_deferred_page(pfn, nid);
-
-		/*
-		 * no need for atomic set_bit because the struct
-		 * page is not visible yet so nobody should
-		 * access it yet.
-		 */
-		__SetPageReserved(page);
-	}
-}
-
 /* If zone is ZONE_MOVABLE but memory is mirrored, it is an overlapped init */
 static bool __meminit
 overlap_memmap_init(unsigned long zone, unsigned long *pfn)
diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h
index 028f3faf46e7..74cbd51dbea2 100644
--- a/tools/include/linux/mm.h
+++ b/tools/include/linux/mm.h
@@ -32,8 +32,6 @@ static inline phys_addr_t virt_to_phys(volatile void *address)
 	return (phys_addr_t)address;
 }
 
-void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid);
-
 static inline void totalram_pages_inc(void)
 {
 }
diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h
index 009b97bbdd22..eb02d5771f4c 100644
--- a/tools/testing/memblock/internal.h
+++ b/tools/testing/memblock/internal.h
@@ -29,4 +29,13 @@ static inline unsigned long free_reserved_area(void *start, void *end,
 	return 0;
 }
 
+#define for_each_valid_pfn(pfn, start_pfn, end_pfn)                     \
+       for ((pfn) = (start_pfn); (pfn) < (end_pfn); (pfn)++)
+
+static inline void init_deferred_page(unsigned long pfn, int nid)
+{
+}
+
+#define __SetPageReserved(p)	((void)(p))
+
 #endif
diff --git a/tools/testing/memblock/mmzone.c b/tools/testing/memblock/mmzone.c
index d3d58851864e..e719450f81cb 100644
--- a/tools/testing/memblock/mmzone.c
+++ b/tools/testing/memblock/mmzone.c
@@ -11,10 +11,6 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
 	return NULL;
 }
 
-void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid)
-{
-}
-
 void atomic_long_set(atomic_long_t *v, long i)
 {
 }
-- 
cgit v1.2.3


From 87ce9e83ab8be5daf64351cd481ffa6537778e6b Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Mon, 23 Mar 2026 09:48:35 +0200
Subject: memblock, treewide: make memblock_free() handle late freeing

It shouldn't be responsibility of memblock users to detect if they free
memory allocated from memblock late and should use memblock_free_late().

Make memblock_free() and memblock_phys_free() take care of late memory
freeing and drop memblock_free_late().

Link: https://patch.msgid.link/20260323074836.3653702-9-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
 arch/sparc/kernel/mdesc.c               |  4 +--
 arch/x86/kernel/setup.c                 |  2 +-
 arch/x86/platform/efi/memmap.c          |  5 +---
 arch/x86/platform/efi/quirks.c          |  2 +-
 drivers/firmware/efi/apple-properties.c |  2 +-
 drivers/of/kexec.c                      |  2 +-
 include/linux/memblock.h                |  2 --
 kernel/dma/swiotlb.c                    |  6 ++--
 lib/bootconfig.c                        |  2 +-
 mm/kfence/core.c                        |  4 +--
 mm/memblock.c                           | 49 +++++++++++++--------------------
 11 files changed, 31 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 30f171b7b00c..ecd6c8ae49c7 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -183,14 +183,12 @@ static struct mdesc_handle * __init mdesc_memblock_alloc(unsigned int mdesc_size
 static void __init mdesc_memblock_free(struct mdesc_handle *hp)
 {
 	unsigned int alloc_size;
-	unsigned long start;
 
 	BUG_ON(refcount_read(&hp->refcnt) != 0);
 	BUG_ON(!list_empty(&hp->list));
 
 	alloc_size = PAGE_ALIGN(hp->handle_size);
-	start = __pa(hp);
-	memblock_free_late(start, alloc_size);
+	memblock_free(hp, alloc_size);
 }
 
 static struct mdesc_mem_ops memblock_mdesc_ops = {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index eebcc9db1a1b..46882ce79c3a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -426,7 +426,7 @@ int __init ima_free_kexec_buffer(void)
 	if (!ima_kexec_buffer_size)
 		return -ENOENT;
 
-	memblock_free_late(ima_kexec_buffer_phys,
+	memblock_phys_free(ima_kexec_buffer_phys,
 			   ima_kexec_buffer_size);
 
 	ima_kexec_buffer_phys = 0;
diff --git a/arch/x86/platform/efi/memmap.c b/arch/x86/platform/efi/memmap.c
index 023697c88910..697a9a26a005 100644
--- a/arch/x86/platform/efi/memmap.c
+++ b/arch/x86/platform/efi/memmap.c
@@ -34,10 +34,7 @@ static
 void __init __efi_memmap_free(u64 phys, unsigned long size, unsigned long flags)
 {
 	if (flags & EFI_MEMMAP_MEMBLOCK) {
-		if (slab_is_available())
-			memblock_free_late(phys, size);
-		else
-			memblock_phys_free(phys, size);
+		memblock_phys_free(phys, size);
 	} else if (flags & EFI_MEMMAP_SLAB) {
 		struct page *p = pfn_to_page(PHYS_PFN(phys));
 		unsigned int order = get_order(size);
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 35caa5746115..a560bbcaa006 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -372,7 +372,7 @@ void __init efi_reserve_boot_services(void)
 		 * doesn't make sense as far as the firmware is
 		 * concerned, but it does provide us with a way to tag
 		 * those regions that must not be paired with
-		 * memblock_free_late().
+		 * memblock_phys_free().
 		 */
 		md->attribute |= EFI_MEMORY_RUNTIME;
 	}
diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c
index 13ac28754c03..2e525e17fba7 100644
--- a/drivers/firmware/efi/apple-properties.c
+++ b/drivers/firmware/efi/apple-properties.c
@@ -226,7 +226,7 @@ static int __init map_properties(void)
 		 */
 		data->len = 0;
 		memunmap(data);
-		memblock_free_late(pa_data + sizeof(*data), data_len);
+		memblock_phys_free(pa_data + sizeof(*data), data_len);
 
 		return ret;
 	}
diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c
index c4cf3552c018..512d9be9d513 100644
--- a/drivers/of/kexec.c
+++ b/drivers/of/kexec.c
@@ -175,7 +175,7 @@ int __init ima_free_kexec_buffer(void)
 	if (ret)
 		return ret;
 
-	memblock_free_late(addr, size);
+	memblock_phys_free(addr, size);
 	return 0;
 }
 #endif
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 6ec5e9ac0699..6f6c5b5c4a4b 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -172,8 +172,6 @@ void __next_mem_range_rev(u64 *idx, int nid, enum memblock_flags flags,
 			  struct memblock_type *type_b, phys_addr_t *out_start,
 			  phys_addr_t *out_end, int *out_nid);
 
-void memblock_free_late(phys_addr_t base, phys_addr_t size);
-
 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
 static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
 					phys_addr_t *out_start,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d8e6f1d889d5..e44e039e00d3 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -546,10 +546,10 @@ void __init swiotlb_exit(void)
 		free_pages(tbl_vaddr, get_order(tbl_size));
 		free_pages((unsigned long)mem->slots, get_order(slots_size));
 	} else {
-		memblock_free_late(__pa(mem->areas),
+		memblock_free(mem->areas,
 			array_size(sizeof(*mem->areas), mem->nareas));
-		memblock_free_late(mem->start, tbl_size);
-		memblock_free_late(__pa(mem->slots), slots_size);
+		memblock_phys_free(mem->start, tbl_size);
+		memblock_free(mem->slots, slots_size);
 	}
 
 	memset(mem, 0, sizeof(*mem));
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 2da049216fe0..9225fa057c1e 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -64,7 +64,7 @@ static inline void __init xbc_free_mem(void *addr, size_t size, bool early)
 	if (early)
 		memblock_free(addr, size);
 	else if (addr)
-		memblock_free_late(__pa(addr), size);
+		memblock_free(addr, size);
 }
 
 #else /* !__KERNEL__ */
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 7393957f9a20..5c8268af533e 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -731,10 +731,10 @@ static bool __init kfence_init_pool_early(void)
 	 * fails for the first page, and therefore expect addr==__kfence_pool in
 	 * most failure cases.
 	 */
-	memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
+	memblock_free((void *)addr, KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
 	__kfence_pool = NULL;
 
-	memblock_free_late(__pa(kfence_metadata_init), KFENCE_METADATA_SIZE);
+	memblock_free(kfence_metadata_init, KFENCE_METADATA_SIZE);
 	kfence_metadata_init = NULL;
 
 	return false;
diff --git a/mm/memblock.c b/mm/memblock.c
index dee18c40d928..df4e3475fe39 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -385,26 +385,27 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
  */
 void __init memblock_discard(void)
 {
-	phys_addr_t addr, size;
+	phys_addr_t size;
+	void *addr;
 
 	if (memblock.reserved.regions != memblock_reserved_init_regions) {
-		addr = __pa(memblock.reserved.regions);
+		addr = memblock.reserved.regions;
 		size = PAGE_ALIGN(sizeof(struct memblock_region) *
 				  memblock.reserved.max);
 		if (memblock_reserved_in_slab)
-			kfree(memblock.reserved.regions);
+			kfree(addr);
 		else
-			memblock_free_late(addr, size);
+			memblock_free(addr, size);
 	}
 
 	if (memblock.memory.regions != memblock_memory_init_regions) {
-		addr = __pa(memblock.memory.regions);
+		addr = memblock.memory.regions;
 		size = PAGE_ALIGN(sizeof(struct memblock_region) *
 				  memblock.memory.max);
 		if (memblock_memory_in_slab)
-			kfree(memblock.memory.regions);
+			kfree(addr);
 		else
-			memblock_free_late(addr, size);
+			memblock_free(addr, size);
 	}
 
 	memblock_memory = NULL;
@@ -962,7 +963,8 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char
  * @size: size of the boot memory block in bytes
  *
  * Free boot memory block previously allocated by memblock_alloc_xx() API.
- * The freeing memory will not be released to the buddy allocator.
+ * If called after the buddy allocator is available, the memory is released to
+ * the buddy allocator.
  */
 void __init_memblock memblock_free(void *ptr, size_t size)
 {
@@ -976,17 +978,24 @@ void __init_memblock memblock_free(void *ptr, size_t size)
  * @size: size of the boot memory block in bytes
  *
  * Free boot memory block previously allocated by memblock_phys_alloc_xx() API.
- * The freeing memory will not be released to the buddy allocator.
+ * If called after the buddy allocator is available, the memory is released to
+ * the buddy allocator.
  */
 int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size)
 {
 	phys_addr_t end = base + size - 1;
+	int ret;
 
 	memblock_dbg("%s: [%pa-%pa] %pS\n", __func__,
 		     &base, &end, (void *)_RET_IP_);
 
 	kmemleak_free_part_phys(base, size);
-	return memblock_remove_range(&memblock.reserved, base, size);
+	ret = memblock_remove_range(&memblock.reserved, base, size);
+
+	if (slab_is_available())
+		__free_reserved_area(base, base + size, -1);
+
+	return ret;
 }
 
 int __init_memblock __memblock_reserve(phys_addr_t base, phys_addr_t size,
@@ -1814,26 +1823,6 @@ void *__init __memblock_alloc_or_panic(phys_addr_t size, phys_addr_t align,
 	return addr;
 }
 
-/**
- * memblock_free_late - free pages directly to buddy allocator
- * @base: phys starting address of the  boot memory block
- * @size: size of the boot memory block in bytes
- *
- * This is only useful when the memblock allocator has already been torn
- * down, but we are still initializing the system.  Pages are released directly
- * to the buddy allocator.
- */
-void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
-{
-	phys_addr_t end = base + size - 1;
-
-	memblock_dbg("%s: [%pa-%pa] %pS\n",
-		     __func__, &base, &end, (void *)_RET_IP_);
-
-	kmemleak_free_part_phys(base, size);
-	__free_reserved_area(base, base + size, -1);
-}
-
 /*
  * Remaining API functions
  */
-- 
cgit v1.2.3


From 36776b7f8a8955b4e75b5d490a75fee0c7a2a7ef Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 31 Mar 2026 17:21:43 +0200
Subject: lib/hexdump: print_hex_dump_bytes() calls print_hex_dump_debug()

print_hex_dump_bytes() claims to be a simple wrapper around
print_hex_dump(), but it actally calls print_hex_dump_debug(), which
means no output is printed if (dynamic) DEBUG is disabled.

Update the documentation to match the implementation.

Fixes: 091cb0994edd20d6 ("lib/hexdump: make print_hex_dump_bytes() a nop on !DEBUG builds")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://patch.msgid.link/3d5c3069fd9102ecaf81d044b750cd613eb72a08.1774970392.git.geert+renesas@glider.be
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/printk.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 45c663124c9b..dc02e217a9d1 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -803,7 +803,8 @@ static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type,
 #endif
 
 /**
- * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params
+ * print_hex_dump_bytes - shorthand form of print_hex_dump_debug() with default
+ *                        params
  * @prefix_str: string to prefix each line with;
  *  caller supplies trailing spaces for alignment if desired
  * @prefix_type: controls whether prefix of an offset, address, or none
@@ -811,7 +812,7 @@ static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type,
  * @buf: data blob to dump
  * @len: number of bytes in the @buf
  *
- * Calls print_hex_dump(), with log level of KERN_DEBUG,
+ * Calls print_hex_dump_debug(), with log level of KERN_DEBUG,
  * rowsize of 16, groupsize of 1, and ASCII output included.
  */
 #define print_hex_dump_bytes(prefix_str, prefix_type, buf, len)	\
-- 
cgit v1.2.3


From f67866701d74c4362b7ea74e7015922ad338375b Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 19 Jan 2026 15:31:15 +0100
Subject: pmdomain: core: Extend statistics for domain idle states with s2idle
 data

To allow user space to monitor the selection of the domain idle state
during s2idle for a CPU PM domain, let's extend the debugfs support in
genpd with this information.

Suggested-by: Dhruva Gole <d-gole@ti.com>
Reviewed-by: Dhruva Gole <d-gole@ti.com>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/pmdomain/core.c   | 13 ++++++++++---
 include/linux/pm_domain.h |  1 +
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c
index 4e9088ce3a25..4d32fc676aaf 100644
--- a/drivers/pmdomain/core.c
+++ b/drivers/pmdomain/core.c
@@ -1438,6 +1438,13 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
 		return;
 	} else {
 		genpd->states[genpd->state_idx].usage++;
+
+		/*
+		 * The ->system_power_down_ok() callback is currently used only
+		 * for s2idle. Use it to know when to update the usage counter.
+		 */
+		if (genpd->gov && genpd->gov->system_power_down_ok)
+			genpd->states[genpd->state_idx].usage_s2idle++;
 	}
 
 	genpd->status = GENPD_STATE_OFF;
@@ -3772,7 +3779,7 @@ static int idle_states_show(struct seq_file *s, void *data)
 	if (ret)
 		return -ERESTARTSYS;
 
-	seq_puts(s, "State  Time(ms)       Usage      Rejected   Above      Below\n");
+	seq_puts(s, "State  Time(ms)       Usage      Rejected   Above      Below      S2idle\n");
 
 	for (i = 0; i < genpd->state_count; i++) {
 		struct genpd_power_state *state = &genpd->states[i];
@@ -3790,9 +3797,9 @@ static int idle_states_show(struct seq_file *s, void *data)
 
 		snprintf(state_name, ARRAY_SIZE(state_name), "S%-5d", i);
 		do_div(idle_time, NSEC_PER_MSEC);
-		seq_printf(s, "%-6s %-14llu %-10llu %-10llu %-10llu %llu\n",
+		seq_printf(s, "%-6s %-14llu %-10llu %-10llu %-10llu %-10llu %llu\n",
 			   state_name, idle_time, state->usage, state->rejected,
-			   state->above, state->below);
+			   state->above, state->below, state->usage_s2idle);
 	}
 
 	genpd_unlock(genpd);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 93ba0143ca47..f6f6d494f728 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -183,6 +183,7 @@ struct genpd_power_state {
 	u64 rejected;
 	u64 above;
 	u64 below;
+	u64 usage_s2idle;
 	struct fwnode_handle *fwnode;
 	u64 idle_time;
 	void *data;
-- 
cgit v1.2.3


From 1877d3f258cbb57d64e275754fb9b18b089ce72d Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Date: Sun, 1 Feb 2026 12:48:59 +0200
Subject: PM: domains: De-constify fields in struct dev_pm_domain_attach_data

It doesn't really make sense to keep u32 fields to be marked as const.
Having the const fields prevents their modification in the driver. Instead
the whole struct can be defined as const, if it is constant.

Fixes: 161e16a5e50a ("PM: domains: Add helper functions to attach/detach multiple PM domains")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/pm_domain.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 93ba0143ca47..38d1814ab8a5 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -49,8 +49,8 @@
 
 struct dev_pm_domain_attach_data {
 	const char * const *pd_names;
-	const u32 num_pd_names;
-	const u32 pd_flags;
+	u32 num_pd_names;
+	u32 pd_flags;
 };
 
 struct dev_pm_domain_list {
-- 
cgit v1.2.3


From 9266b4da051a410d9e6c5c0b0ef0c877855aa1b8 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 31 Mar 2026 10:33:46 +0530
Subject: cpufreq: Allocate QoS freq_req objects with policy

A recent change exposed a bug in the error path: if
freq_qos_add_request(boost_freq_req) fails, min_freq_req may remain a
valid pointer even though it was never successfully added. During policy
teardown, this leads to an unconditional call to
freq_qos_remove_request(), triggering a WARN.

The current design allocates all three freq_req objects together, making
the lifetime rules unclear and error handling fragile.

Simplify this by allocating the QoS freq_req objects at policy
allocation time. The policy itself is dynamically allocated, and two of
the three requests are always needed anyway. This ensures consistent
lifetime management and eliminates the inconsistent state in failure
paths.

Reported-by: Zhongqiu Han <zhongqiu.han@oss.qualcomm.com>
Fixes: 6e39ba4e5a82 ("cpufreq: Add boost_freq_req QoS request")
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lifeng Zheng <zhenglifeng1@huawei.com>
Tested-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Zhongqiu Han <zhongqiu.han@oss.qualcomm.com>
Link: https://patch.msgid.link/a293f29d841b86c51f34699c6e717e01858d8ada.1774933424.git.viresh.kumar@linaro.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 53 +++++++++++++----------------------------------
 include/linux/cpufreq.h   |  6 +++---
 2 files changed, 17 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index c0aa970c7a67..f4a949f1e48f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -614,7 +614,7 @@ static int policy_set_boost(struct cpufreq_policy *policy, bool enable)
 		return ret;
 	}
 
-	ret = freq_qos_update_request(policy->boost_freq_req, policy->cpuinfo.max_freq);
+	ret = freq_qos_update_request(&policy->boost_freq_req, policy->cpuinfo.max_freq);
 	if (ret < 0) {
 		policy->boost_enabled = !policy->boost_enabled;
 		cpufreq_driver->set_boost(policy, policy->boost_enabled);
@@ -769,7 +769,7 @@ static ssize_t store_##file_name					\
 	if (ret)							\
 		return ret;						\
 									\
-	ret = freq_qos_update_request(policy->object##_freq_req, val);\
+	ret = freq_qos_update_request(&policy->object##_freq_req, val);	\
 	return ret >= 0 ? count : ret;					\
 }
 
@@ -1374,7 +1374,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
 	/* Cancel any pending policy->update work before freeing the policy. */
 	cancel_work_sync(&policy->update);
 
-	if (policy->max_freq_req) {
+	if (freq_qos_request_active(&policy->max_freq_req)) {
 		/*
 		 * Remove max_freq_req after sending CPUFREQ_REMOVE_POLICY
 		 * notification, since CPUFREQ_CREATE_POLICY notification was
@@ -1382,12 +1382,13 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
 		 */
 		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 					     CPUFREQ_REMOVE_POLICY, policy);
-		freq_qos_remove_request(policy->max_freq_req);
+		freq_qos_remove_request(&policy->max_freq_req);
 	}
 
-	freq_qos_remove_request(policy->min_freq_req);
-	freq_qos_remove_request(policy->boost_freq_req);
-	kfree(policy->min_freq_req);
+	if (freq_qos_request_active(&policy->min_freq_req))
+		freq_qos_remove_request(&policy->min_freq_req);
+	if (freq_qos_request_active(&policy->boost_freq_req))
+		freq_qos_remove_request(&policy->boost_freq_req);
 
 	cpufreq_policy_put_kobj(policy);
 	free_cpumask_var(policy->real_cpus);
@@ -1452,57 +1453,31 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy,
 	cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
 
 	if (new_policy) {
-		unsigned int count;
-
 		for_each_cpu(j, policy->related_cpus) {
 			per_cpu(cpufreq_cpu_data, j) = policy;
 			add_cpu_dev_symlink(policy, j, get_cpu_device(j));
 		}
 
-		count = policy->boost_supported ? 3 : 2;
-		policy->min_freq_req = kzalloc(count * sizeof(*policy->min_freq_req),
-					       GFP_KERNEL);
-		if (!policy->min_freq_req) {
-			ret = -ENOMEM;
-			goto out_destroy_policy;
-		}
-
 		if (policy->boost_supported) {
-			policy->boost_freq_req = policy->min_freq_req + 2;
-
 			ret = freq_qos_add_request(&policy->constraints,
-						   policy->boost_freq_req,
+						   &policy->boost_freq_req,
 						   FREQ_QOS_MAX,
 						   policy->cpuinfo.max_freq);
-			if (ret < 0) {
-				policy->boost_freq_req = NULL;
+			if (ret < 0)
 				goto out_destroy_policy;
-			}
 		}
 
 		ret = freq_qos_add_request(&policy->constraints,
-					   policy->min_freq_req, FREQ_QOS_MIN,
+					   &policy->min_freq_req, FREQ_QOS_MIN,
 					   FREQ_QOS_MIN_DEFAULT_VALUE);
-		if (ret < 0) {
-			kfree(policy->min_freq_req);
-			policy->min_freq_req = NULL;
+		if (ret < 0)
 			goto out_destroy_policy;
-		}
-
-		/*
-		 * This must be initialized right here to avoid calling
-		 * freq_qos_remove_request() on uninitialized request in case
-		 * of errors.
-		 */
-		policy->max_freq_req = policy->min_freq_req + 1;
 
 		ret = freq_qos_add_request(&policy->constraints,
-					   policy->max_freq_req, FREQ_QOS_MAX,
+					   &policy->max_freq_req, FREQ_QOS_MAX,
 					   FREQ_QOS_MAX_DEFAULT_VALUE);
-		if (ret < 0) {
-			policy->max_freq_req = NULL;
+		if (ret < 0)
 			goto out_destroy_policy;
-		}
 
 		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 				CPUFREQ_CREATE_POLICY, policy);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index b6f6c7d06912..9b10eb486ece 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -79,9 +79,9 @@ struct cpufreq_policy {
 					 * called, but you're in IRQ context */
 
 	struct freq_constraints	constraints;
-	struct freq_qos_request	*min_freq_req;
-	struct freq_qos_request	*max_freq_req;
-	struct freq_qos_request *boost_freq_req;
+	struct freq_qos_request	min_freq_req;
+	struct freq_qos_request	max_freq_req;
+	struct freq_qos_request boost_freq_req;
 
 	struct cpufreq_frequency_table	*freq_table;
 	enum cpufreq_table_sorting freq_table_sorted;
-- 
cgit v1.2.3


From 04bcbed4cd33495d05ba98857a748e416ab603b7 Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Tue, 31 Mar 2026 14:19:46 -0700
Subject: powercap: intel_rapl: Move primitive info to header for interface
 drivers

RAPL primitive information varies across different RAPL interfaces
(MSR, TPMI, MMIO). Keeping them in the common code adds no benefit, but
requires interface-specific handling logic and makes the common layer
unnecessarily complex.

Move the primitive info infrastructure to the shared header to allow
interface drivers to configure RAPL primitives. Specific changes:

 1. Move struct rapl_primitive_info, enum unit_type, and
    PRIMITIVE_INFO_INIT macro to intel_rapl.h.
 2. Change the @rpi field in struct rapl_if_priv from void * to
    struct rapl_primitive_info * to improve type safety and eliminate
    unnecessary casts.

No functional changes. This is a preparatory refactoring to allow
interface drivers to supply their own RAPL primitive settings.

Co-developed-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Link: https://patch.msgid.link/20260331211950.3329932-4-sathyanarayanan.kuppuswamy@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c | 32 ++------------------------------
 include/linux/intel_rapl.h           | 32 ++++++++++++++++++++++++++++++--
 2 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index f2637cc2cc6a..ffc9d0378257 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -100,13 +100,6 @@
 
 #define RAPL_EVENT_MASK			GENMASK(7, 0)
 
-enum unit_type {
-	ARBITRARY_UNIT,		/* no translation */
-	POWER_UNIT,
-	ENERGY_UNIT,
-	TIME_UNIT,
-};
-
 static const char *pl_names[NR_POWER_LIMITS] = {
 	[POWER_LIMIT1] = "long_term",
 	[POWER_LIMIT2] = "short_term",
@@ -208,27 +201,6 @@ static const struct rapl_defaults *get_defaults(struct rapl_package *rp)
 	return rp->priv->defaults;
 }
 
-/* per domain data. used to describe individual knobs such that access function
- * can be consolidated into one instead of many inline functions.
- */
-struct rapl_primitive_info {
-	const char *name;
-	u64 mask;
-	int shift;
-	enum rapl_domain_reg_id id;
-	enum unit_type unit;
-	u32 flag;
-};
-
-#define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) {	\
-		.name = #p,			\
-		.mask = m,			\
-		.shift = s,			\
-		.id = i,			\
-		.unit = u,			\
-		.flag = f			\
-	}
-
 static void rapl_init_domains(struct rapl_package *rp);
 static int rapl_read_data_raw(struct rapl_domain *rd,
 			      enum rapl_primitives prim,
@@ -748,10 +720,10 @@ static int rapl_config(struct rapl_package *rp)
 	/* MMIO I/F shares the same register layout as MSR registers */
 	case RAPL_IF_MMIO:
 	case RAPL_IF_MSR:
-		rp->priv->rpi = (void *)rpi_msr;
+		rp->priv->rpi = rpi_msr;
 		break;
 	case RAPL_IF_TPMI:
-		rp->priv->rpi = (void *)rpi_tpmi;
+		rp->priv->rpi = rpi_tpmi;
 		break;
 	default:
 		return -EINVAL;
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index 9e6bd654be1f..01f290de3586 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -137,6 +137,34 @@ struct rapl_defaults {
 	bool spr_psys_bits;
 };
 
+#define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) {	\
+		.name = #p,			\
+		.mask = m,			\
+		.shift = s,			\
+		.id = i,			\
+		.unit = u,			\
+		.flag = f			\
+	}
+
+enum unit_type {
+	ARBITRARY_UNIT,		/* no translation */
+	POWER_UNIT,
+	ENERGY_UNIT,
+	TIME_UNIT,
+};
+
+/* per domain data. used to describe individual knobs such that access function
+ * can be consolidated into one instead of many inline functions.
+ */
+struct rapl_primitive_info {
+	const char *name;
+	u64 mask;
+	int shift;
+	enum rapl_domain_reg_id id;
+	enum unit_type unit;
+	u32 flag;
+};
+
 /**
  * struct rapl_if_priv: private data for different RAPL interfaces
  * @control_type:		Each RAPL interface must have its own powercap
@@ -152,7 +180,7 @@ struct rapl_defaults {
  * @write_raw:			Callback for writing RAPL interface specific
  *				registers.
  * @defaults:			pointer to default settings
- * @rpi:			internal pointer to interface primitive info
+ * @rpi:			pointer to interface primitive info
  */
 struct rapl_if_priv {
 	enum rapl_if_type type;
@@ -164,7 +192,7 @@ struct rapl_if_priv {
 	int (*read_raw)(int id, struct reg_action *ra, bool pmu_ctx);
 	int (*write_raw)(int id, struct reg_action *ra);
 	const struct rapl_defaults *defaults;
-	void *rpi;
+	struct rapl_primitive_info *rpi;
 };
 
 #ifdef CONFIG_PERF_EVENTS
-- 
cgit v1.2.3


From c3bb8d4f5d802ec1a16f018e82030bccb7a053a4 Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Tue, 31 Mar 2026 14:19:50 -0700
Subject: powercap: intel_rapl: Consolidate PL4 and PMU support flags into
 rapl_defaults

Currently, PL4 and MSR-based RAPL PMU support are detected using
separate CPU ID tables (pl4_support_ids and pmu_support_ids) in the
MSR driver probe path. This creates a maintenance burden since adding
a new CPU requires updates in two places: the rapl_ids table and one
or both of these capability tables.

Consolidate PL4 and PMU capability information directly into
struct rapl_defaults by adding msr_pl4_support and msr_pmu_support
flags. This allows per-CPU capability to be expressed in a single
place alongside other per-CPU defaults, eliminating the duplicate
CPU ID tables entirely.

No functional changes are intended.

Co-developed-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Link: https://patch.msgid.link/20260331211950.3329932-8-sathyanarayanan.kuppuswamy@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_msr.c | 83 +++++++++++++++++----------------------
 include/linux/intel_rapl.h        |  2 +
 2 files changed, 38 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index cfb35973f0b5..a34543e66446 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -216,33 +216,6 @@ static int rapl_msr_write_raw(int cpu, struct reg_action *ra)
 	return ra->err;
 }
 
-/* List of verified CPUs. */
-static const struct x86_cpu_id pl4_support_ids[] = {
-	X86_MATCH_VFM(INTEL_ICELAKE_L, NULL),
-	X86_MATCH_VFM(INTEL_TIGERLAKE_L, NULL),
-	X86_MATCH_VFM(INTEL_ALDERLAKE, NULL),
-	X86_MATCH_VFM(INTEL_ALDERLAKE_L, NULL),
-	X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, NULL),
-	X86_MATCH_VFM(INTEL_RAPTORLAKE, NULL),
-	X86_MATCH_VFM(INTEL_RAPTORLAKE_P, NULL),
-	X86_MATCH_VFM(INTEL_METEORLAKE, NULL),
-	X86_MATCH_VFM(INTEL_METEORLAKE_L, NULL),
-	X86_MATCH_VFM(INTEL_ARROWLAKE_U, NULL),
-	X86_MATCH_VFM(INTEL_ARROWLAKE_H, NULL),
-	X86_MATCH_VFM(INTEL_PANTHERLAKE_L, NULL),
-	X86_MATCH_VFM(INTEL_WILDCATLAKE_L, NULL),
-	X86_MATCH_VFM(INTEL_NOVALAKE, NULL),
-	X86_MATCH_VFM(INTEL_NOVALAKE_L, NULL),
-	{}
-};
-
-/* List of MSR-based RAPL PMU support CPUs */
-static const struct x86_cpu_id pmu_support_ids[] = {
-	X86_MATCH_VFM(INTEL_PANTHERLAKE_L, NULL),
-	X86_MATCH_VFM(INTEL_WILDCATLAKE_L, NULL),
-	{}
-};
-
 static int rapl_check_unit_atom(struct rapl_domain *rd)
 {
 	struct reg_action ra;
@@ -420,6 +393,23 @@ static const struct rapl_defaults rapl_defaults_amd = {
 	.check_unit = rapl_default_check_unit,
 };
 
+static const struct rapl_defaults rapl_defaults_core_pl4 = {
+	.floor_freq_reg_addr = 0,
+	.check_unit = rapl_default_check_unit,
+	.set_floor_freq = rapl_default_set_floor_freq,
+	.compute_time_window = rapl_default_compute_time_window,
+	.msr_pl4_support = 1,
+};
+
+static const struct rapl_defaults rapl_defaults_core_pl4_pmu = {
+	.floor_freq_reg_addr = 0,
+	.check_unit = rapl_default_check_unit,
+	.set_floor_freq = rapl_default_set_floor_freq,
+	.compute_time_window = rapl_default_compute_time_window,
+	.msr_pl4_support = 1,
+	.msr_pmu_support = 1,
+};
+
 static const struct x86_cpu_id rapl_ids[]  = {
 	X86_MATCH_VFM(INTEL_SANDYBRIDGE,		&rapl_defaults_core),
 	X86_MATCH_VFM(INTEL_SANDYBRIDGE_X,		&rapl_defaults_core),
@@ -443,35 +433,35 @@ static const struct x86_cpu_id rapl_ids[]  = {
 	X86_MATCH_VFM(INTEL_KABYLAKE_L,			&rapl_defaults_core),
 	X86_MATCH_VFM(INTEL_KABYLAKE,			&rapl_defaults_core),
 	X86_MATCH_VFM(INTEL_CANNONLAKE_L,		&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_ICELAKE_L,			&rapl_defaults_core),
+	X86_MATCH_VFM(INTEL_ICELAKE_L,			&rapl_defaults_core_pl4),
 	X86_MATCH_VFM(INTEL_ICELAKE,			&rapl_defaults_core),
 	X86_MATCH_VFM(INTEL_ICELAKE_NNPI,		&rapl_defaults_core),
 	X86_MATCH_VFM(INTEL_ICELAKE_X,			&rapl_defaults_hsw_server),
 	X86_MATCH_VFM(INTEL_ICELAKE_D,			&rapl_defaults_hsw_server),
 	X86_MATCH_VFM(INTEL_COMETLAKE_L,		&rapl_defaults_core),
 	X86_MATCH_VFM(INTEL_COMETLAKE,			&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_TIGERLAKE_L,		&rapl_defaults_core),
+	X86_MATCH_VFM(INTEL_TIGERLAKE_L,		&rapl_defaults_core_pl4),
 	X86_MATCH_VFM(INTEL_TIGERLAKE,			&rapl_defaults_core),
 	X86_MATCH_VFM(INTEL_ROCKETLAKE,			&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_ALDERLAKE,			&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_ALDERLAKE_L,		&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_ATOM_GRACEMONT,		&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_RAPTORLAKE,			&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_RAPTORLAKE_P,		&rapl_defaults_core),
+	X86_MATCH_VFM(INTEL_ALDERLAKE,			&rapl_defaults_core_pl4),
+	X86_MATCH_VFM(INTEL_ALDERLAKE_L,		&rapl_defaults_core_pl4),
+	X86_MATCH_VFM(INTEL_ATOM_GRACEMONT,		&rapl_defaults_core_pl4),
+	X86_MATCH_VFM(INTEL_RAPTORLAKE,			&rapl_defaults_core_pl4),
+	X86_MATCH_VFM(INTEL_RAPTORLAKE_P,		&rapl_defaults_core_pl4),
 	X86_MATCH_VFM(INTEL_RAPTORLAKE_S,		&rapl_defaults_core),
 	X86_MATCH_VFM(INTEL_BARTLETTLAKE,		&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_METEORLAKE,			&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_METEORLAKE_L,		&rapl_defaults_core),
+	X86_MATCH_VFM(INTEL_METEORLAKE,			&rapl_defaults_core_pl4),
+	X86_MATCH_VFM(INTEL_METEORLAKE_L,		&rapl_defaults_core_pl4),
 	X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X,		&rapl_defaults_spr_server),
 	X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X,		&rapl_defaults_spr_server),
 	X86_MATCH_VFM(INTEL_LUNARLAKE_M,		&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_PANTHERLAKE_L,		&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_WILDCATLAKE_L,		&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_NOVALAKE,			&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_NOVALAKE_L,			&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_ARROWLAKE_H,		&rapl_defaults_core),
+	X86_MATCH_VFM(INTEL_PANTHERLAKE_L,		&rapl_defaults_core_pl4_pmu),
+	X86_MATCH_VFM(INTEL_WILDCATLAKE_L,		&rapl_defaults_core_pl4_pmu),
+	X86_MATCH_VFM(INTEL_NOVALAKE,			&rapl_defaults_core_pl4),
+	X86_MATCH_VFM(INTEL_NOVALAKE_L,			&rapl_defaults_core_pl4),
+	X86_MATCH_VFM(INTEL_ARROWLAKE_H,		&rapl_defaults_core_pl4),
 	X86_MATCH_VFM(INTEL_ARROWLAKE,			&rapl_defaults_core),
-	X86_MATCH_VFM(INTEL_ARROWLAKE_U,		&rapl_defaults_core),
+	X86_MATCH_VFM(INTEL_ARROWLAKE_U,		&rapl_defaults_core_pl4),
 	X86_MATCH_VFM(INTEL_LAKEFIELD,			&rapl_defaults_core),
 
 	X86_MATCH_VFM(INTEL_ATOM_SILVERMONT,		&rapl_defaults_byt),
@@ -498,7 +488,6 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
 
 static int rapl_msr_probe(struct platform_device *pdev)
 {
-	const struct x86_cpu_id *id = x86_match_cpu(pl4_support_ids);
 	int ret;
 
 	switch (boot_cpu_data.x86_vendor) {
@@ -518,16 +507,16 @@ static int rapl_msr_probe(struct platform_device *pdev)
 	rapl_msr_priv->defaults = (const struct rapl_defaults *)pdev->dev.platform_data;
 	rapl_msr_priv->rpi = rpi_msr;
 
-	if (id) {
+	if (rapl_msr_priv->defaults->msr_pl4_support) {
 		rapl_msr_priv->limits[RAPL_DOMAIN_PACKAGE] |= BIT(POWER_LIMIT4);
 		rapl_msr_priv->regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4].msr =
 			MSR_VR_CURRENT_CONFIG;
-		pr_info("PL4 support detected.\n");
+		pr_info("PL4 support detected (updated).\n");
 	}
 
-	if (x86_match_cpu(pmu_support_ids)) {
+	if (rapl_msr_priv->defaults->msr_pmu_support) {
 		rapl_msr_pmu = true;
-		pr_info("MSR-based RAPL PMU support enabled\n");
+		pr_info("MSR-based RAPL PMU support enabled (updated)\n");
 	}
 
 	rapl_msr_priv->control_type = powercap_register_control_type(NULL, "intel-rapl", NULL);
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index 01f290de3586..328004f605c3 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -135,6 +135,8 @@ struct rapl_defaults {
 	unsigned int dram_domain_energy_unit;
 	unsigned int psys_domain_energy_unit;
 	bool spr_psys_bits;
+	bool msr_pl4_support;
+	bool msr_pmu_support;
 };
 
 #define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) {	\
-- 
cgit v1.2.3


From c2de5a5be4d60af5f928a2dd2b0f73e17358e346 Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Date: Mon, 30 Mar 2026 09:07:55 +0200
Subject: timens: Add a __free() wrapper for put_time_ns()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The wrapper will be used to simplify cleanups of 'struct time_namespace'.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260330-timens-cleanup-v1-1-936e91c9dd30@linutronix.de
---
 include/linux/time_namespace.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index c1de21a27c34..58bd9728df58 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -8,6 +8,7 @@
 #include <linux/ns_common.h>
 #include <linux/err.h>
 #include <linux/time64.h>
+#include <linux/cleanup.h>
 
 struct user_namespace;
 extern struct user_namespace init_user_ns;
@@ -171,4 +172,6 @@ static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
 }
 #endif /* CONFIG_TIME_NS_VDSO */
 
+DEFINE_FREE(time_ns, struct time_namespace *, if (_T) put_time_ns(_T))
+
 #endif /* _LINUX_TIMENS_H */
-- 
cgit v1.2.3


From c064abc68e009d2cc18416e7132d9c25e03125b6 Mon Sep 17 00:00:00 2001
From: "Mario Limonciello (AMD)" <superm1@kernel.org>
Date: Sat, 7 Mar 2026 08:10:20 -0600
Subject: firmware: dmi: Correct an indexing error in dmi.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The entries later in enum dmi_entry_type don't match the SMBIOS
specification¹.

The entry for type 33: `64-Bit Memory Error Information` is not present and
thus the index for all later entries is incorrect.

Add it.

Also, add missing entry types 43-46, while at it.

  ¹ Search for "System Management BIOS (SMBIOS) Reference Specification"

  [ bp: Drop the flaky SMBIOS spec URL. ]

Fixes: 93c890dbe5287 ("firmware: Add DMI entry types to the headers")
Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Reviewed-by: Yazen Ghannam <yazen.ghannam@amd.com>
Link: https://patch.msgid.link/20260307141024.819807-2-superm1@kernel.org
---
 include/linux/dmi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index 927f8a8b7a1d..2eedf44e6801 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -60,6 +60,7 @@ enum dmi_entry_type {
 	DMI_ENTRY_OOB_REMOTE_ACCESS,
 	DMI_ENTRY_BIS_ENTRY,
 	DMI_ENTRY_SYSTEM_BOOT,
+	DMI_ENTRY_64_MEM_ERROR,
 	DMI_ENTRY_MGMT_DEV,
 	DMI_ENTRY_MGMT_DEV_COMPONENT,
 	DMI_ENTRY_MGMT_DEV_THRES,
@@ -69,6 +70,10 @@ enum dmi_entry_type {
 	DMI_ENTRY_ADDITIONAL,
 	DMI_ENTRY_ONBOARD_DEV_EXT,
 	DMI_ENTRY_MGMT_CONTROLLER_HOST,
+	DMI_ENTRY_TPM_DEVICE,
+	DMI_ENTRY_PROCESSOR_ADDITIONAL,
+	DMI_ENTRY_FIRMWARE_INVENTORY,
+	DMI_ENTRY_STRING_PROPERTY,
 	DMI_ENTRY_INACTIVE = 126,
 	DMI_ENTRY_END_OF_TABLE = 127,
 };
-- 
cgit v1.2.3


From bc91133e260c8113c1119073c03b93c12aa41738 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Sat, 7 Mar 2026 08:10:24 -0600
Subject: x86/CPU/AMD: Print AGESA string from DMI additional information entry

Type 40 entries (Additional Information) are summarized in section 7.41 as
part of the SMBIOS specification.  Generally, these entries aren't interesting
to save.

However on some AMD Zen systems, the AGESA version is stored here. This is
useful to save to the kernel message logs for debugging. It can be used to
cross-reference issues.

Implement an iterator for the Additional Information entries. Use this to find
and print the AGESA string. Do so in AMD code, since the use case is
AMD-specific.

  [ bp: Match only "AGESA". ]

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Co-developed-by: "Mario Limonciello (AMD)" <superm1@kernel.org>
Signed-off-by: "Mario Limonciello (AMD)" <superm1@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Link: https://patch.msgid.link/20260307141024.819807-6-superm1@kernel.org
---
 arch/x86/kernel/cpu/amd.c   | 49 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/firmware/dmi_scan.c |  3 ++-
 include/linux/dmi.h         | 18 +++++++++++++++++
 3 files changed, 69 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 09de584e4c8f..33b740c26bef 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <linux/export.h>
 #include <linux/bitops.h>
+#include <linux/dmi.h>
 #include <linux/elf.h>
 #include <linux/mm.h>
 #include <linux/kvm_types.h>
@@ -1380,3 +1381,51 @@ static __init int print_s5_reset_status_mmio(void)
 	return 0;
 }
 late_initcall(print_s5_reset_status_mmio);
+
+static void __init dmi_scan_additional(const struct dmi_header *d, void *p)
+{
+	struct dmi_a_info *info = (struct dmi_a_info *)d;
+	void *next, *end;
+
+	if (!IS_ENABLED(CONFIG_DMI))
+		return;
+
+	if (info->header.type != DMI_ENTRY_ADDITIONAL ||
+	    info->header.length < DMI_A_INFO_MIN_SIZE ||
+	    info->count < 1)
+		return;
+
+	next = (void *)(info + 1);
+	end  = (void *)info + info->header.length;
+
+	do {
+		struct dmi_a_info_entry *entry;
+		const char *string_ptr;
+
+		entry = (struct dmi_a_info_entry *)next;
+
+		/*
+		 * Not much can be done to validate data. At least the entry
+		 * length shouldn't be 0.
+		 */
+		if (!entry->length)
+			return;
+
+		string_ptr = dmi_string_nosave(&info->header, entry->str_num);
+
+		/* Sample string: AGESA!V9 StrixKrackanPI-FP8 1.1.0.0c */
+		if (!strncmp(string_ptr, "AGESA", 5)) {
+			pr_info("AGESA: %s\n", string_ptr);
+			break;
+		}
+
+		next += entry->length;
+	} while (end - next >= DMI_A_INFO_ENT_MIN_SIZE);
+}
+
+static __init int print_dmi_agesa(void)
+{
+	dmi_walk(dmi_scan_additional, NULL);
+	return 0;
+}
+late_initcall(print_dmi_agesa);
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index ed6235ac576b..a3f7dabd4955 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -47,7 +47,7 @@ static struct dmi_memdev_info {
 static int dmi_memdev_nr;
 static int dmi_memdev_populated_nr __initdata;
 
-static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s)
+const char *dmi_string_nosave(const struct dmi_header *dm, u8 s)
 {
 	const u8 *bp = ((u8 *) dm) + dm->length;
 	const u8 *nsp;
@@ -66,6 +66,7 @@ static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s)
 
 	return dmi_empty_string;
 }
+EXPORT_SYMBOL_GPL(dmi_string_nosave);
 
 static const char * __init dmi_string(const struct dmi_header *dm, u8 s)
 {
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index 2eedf44e6801..c8700e6a694d 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -91,6 +91,21 @@ struct dmi_device {
 	void *device_data;	/* Type specific data */
 };
 
+#define DMI_A_INFO_ENT_MIN_SIZE 0x6
+struct dmi_a_info_entry {
+	u8 length;
+	u16 handle;
+	u8 offset;
+	u8 str_num;
+	u8 value[];
+} __packed;
+
+#define DMI_A_INFO_MIN_SIZE	0xB
+struct dmi_a_info {
+	struct dmi_header header;
+	u8 count;
+} __packed;
+
 #ifdef CONFIG_DMI
 
 struct dmi_dev_onboard {
@@ -120,6 +135,7 @@ extern void dmi_memdev_name(u16 handle, const char **bank, const char **device);
 extern u64 dmi_memdev_size(u16 handle);
 extern u8 dmi_memdev_type(u16 handle);
 extern u16 dmi_memdev_handle(int slot);
+const char *dmi_string_nosave(const struct dmi_header *dm, u8 s);
 
 #else
 
@@ -153,6 +169,8 @@ static inline u8 dmi_memdev_type(u16 handle) { return 0x0; }
 static inline u16 dmi_memdev_handle(int slot) { return 0xffff; }
 static inline const struct dmi_system_id *
 	dmi_first_match(const struct dmi_system_id *list) { return NULL; }
+static inline const char *
+	dmi_string_nosave(const struct dmi_header *dm, u8 s) { return ""; }
 
 #endif
 
-- 
cgit v1.2.3


From 9bf092c97b86af63694d9902b9e14047214ba76d Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@linux.dev>
Date: Wed, 1 Apr 2026 17:20:41 +0200
Subject: sched: update task_struct->comm comment

Since commit 3a3f61ce5e0b ("exec: Make sure task->comm is always
NUL-terminated"), __set_task_comm() is unlocked and no longer uses
strscpy_pad() - update the stale comment accordingly.

Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Link: https://patch.msgid.link/20260401152039.724811-4-thorsten.blum@linux.dev
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/sched.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 074ad4ef3d81..43dfbcf1babe 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1158,12 +1158,9 @@ struct task_struct {
 	/*
 	 * executable name, excluding path.
 	 *
-	 * - normally initialized begin_new_exec()
-	 * - set it with set_task_comm()
-	 *   - strscpy_pad() to ensure it is always NUL-terminated and
-	 *     zero-padded
-	 *   - task_lock() to ensure the operation is atomic and the name is
-	 *     fully updated.
+	 * - normally initialized by begin_new_exec()
+	 * - set it with set_task_comm() to ensure it is always
+	 *   NUL-terminated and zero-padded
 	 */
 	char				comm[TASK_COMM_LEN];
 
-- 
cgit v1.2.3


From 9dc42c9070282c81058a875fea5acae057610980 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Wed, 1 Apr 2026 06:03:52 -0700
Subject: workqueue: fix typo in WQ_AFFN_SMT comment

Fix "poer" -> "per" in the WQ_AFFN_SMT enum comment.

Signed-off-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 9f971912c6be..75634a09576a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -131,7 +131,7 @@ struct rcu_work {
 enum wq_affn_scope {
 	WQ_AFFN_DFL,			/* use system default */
 	WQ_AFFN_CPU,			/* one pod per CPU */
-	WQ_AFFN_SMT,			/* one pod poer SMT */
+	WQ_AFFN_SMT,			/* one pod per SMT */
 	WQ_AFFN_CACHE,			/* one pod per LLC */
 	WQ_AFFN_NUMA,			/* one pod per NUMA node */
 	WQ_AFFN_SYSTEM,			/* one pod across the whole system */
-- 
cgit v1.2.3


From 5920d046f7ae3bf9cf51b9d915c1fff13d299d84 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Wed, 1 Apr 2026 06:03:53 -0700
Subject: workqueue: add WQ_AFFN_CACHE_SHARD affinity scope

On systems where many CPUs share one LLC, unbound workqueues using
WQ_AFFN_CACHE collapse to a single worker pool, causing heavy spinlock
contention on pool->lock. For example, Chuck Lever measured 39% of
cycles lost to native_queued_spin_lock_slowpath on a 12-core shared-L3
NFS-over-RDMA system.

The existing affinity hierarchy (cpu, smt, cache, numa, system) offers
no intermediate option between per-LLC and per-SMT-core granularity.

Add WQ_AFFN_CACHE_SHARD, which subdivides each LLC into groups of at
most wq_cache_shard_size cores (default 8, tunable via boot parameter).
Shards are always split on core (SMT group) boundaries so that
Hyper-Threading siblings are never placed in different pods. Cores are
distributed across shards as evenly as possible -- for example, 36 cores
in a single LLC with max shard size 8 produces 5 shards of 8+7+7+7+7
cores.

The implementation follows the same comparator pattern as other affinity
scopes: precompute_cache_shard_ids() pre-fills the cpu_shard_id[] array
from the already-initialized WQ_AFFN_CACHE and WQ_AFFN_SMT topology,
and cpus_share_cache_shard() is passed to init_pod_type().

Benchmark on NVIDIA Grace (72 CPUs, single LLC, 50k items/thread), show
cache_shard delivers ~5x the throughput and ~6.5x lower p50 latency
compared to cache scope on this 72-core single-LLC system.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   1 +
 kernel/workqueue.c        | 183 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 184 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 75634a09576a..ab6cb70ca1a5 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -133,6 +133,7 @@ enum wq_affn_scope {
 	WQ_AFFN_CPU,			/* one pod per CPU */
 	WQ_AFFN_SMT,			/* one pod per SMT */
 	WQ_AFFN_CACHE,			/* one pod per LLC */
+	WQ_AFFN_CACHE_SHARD,		/* synthetic sub-LLC shards */
 	WQ_AFFN_NUMA,			/* one pod per NUMA node */
 	WQ_AFFN_SYSTEM,			/* one pod across the whole system */
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 18c3fe90daca..f4a23e1418a7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -131,6 +131,14 @@ enum wq_internal_consts {
 	WORKER_ID_LEN		= 10 + WQ_NAME_LEN, /* "kworker/R-" + WQ_NAME_LEN */
 };
 
+/* Layout of shards within one LLC pod */
+struct llc_shard_layout {
+	int nr_large_shards;	/* number of large shards (cores_per_shard + 1) */
+	int cores_per_shard;	/* base number of cores per default shard */
+	int nr_shards;		/* total number of shards */
+	/* nr_default shards = (nr_shards - nr_large_shards) */
+};
+
 /*
  * We don't want to trap softirq for too long. See MAX_SOFTIRQ_TIME and
  * MAX_SOFTIRQ_RESTART in kernel/softirq.c. These are macros because
@@ -410,6 +418,7 @@ static const char * const wq_affn_names[WQ_AFFN_NR_TYPES] = {
 	[WQ_AFFN_CPU]		= "cpu",
 	[WQ_AFFN_SMT]		= "smt",
 	[WQ_AFFN_CACHE]		= "cache",
+	[WQ_AFFN_CACHE_SHARD]	= "cache_shard",
 	[WQ_AFFN_NUMA]		= "numa",
 	[WQ_AFFN_SYSTEM]	= "system",
 };
@@ -432,6 +441,9 @@ module_param_named(cpu_intensive_warning_thresh, wq_cpu_intensive_warning_thresh
 static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
 module_param_named(power_efficient, wq_power_efficient, bool, 0444);
 
+static unsigned int wq_cache_shard_size = 8;
+module_param_named(cache_shard_size, wq_cache_shard_size, uint, 0444);
+
 static bool wq_online;			/* can kworkers be created yet? */
 static bool wq_topo_initialized __read_mostly = false;
 
@@ -8155,6 +8167,175 @@ static bool __init cpus_share_numa(int cpu0, int cpu1)
 	return cpu_to_node(cpu0) == cpu_to_node(cpu1);
 }
 
+/* Maps each CPU to its shard index within the LLC pod it belongs to */
+static int cpu_shard_id[NR_CPUS] __initdata;
+
+/**
+ * llc_count_cores - count distinct cores (SMT groups) within an LLC pod
+ * @pod_cpus:  the cpumask of CPUs in the LLC pod
+ * @smt_pods:  the SMT pod type, used to identify sibling groups
+ *
+ * A core is represented by the lowest-numbered CPU in its SMT group. Returns
+ * the number of distinct cores found in @pod_cpus.
+ */
+static int __init llc_count_cores(const struct cpumask *pod_cpus,
+				  struct wq_pod_type *smt_pods)
+{
+	const struct cpumask *sibling_cpus;
+	int nr_cores = 0, c;
+
+	/*
+	 * Count distinct cores by only counting the first CPU in each
+	 * SMT sibling group.
+	 */
+	for_each_cpu(c, pod_cpus) {
+		sibling_cpus = smt_pods->pod_cpus[smt_pods->cpu_pod[c]];
+		if (cpumask_first(sibling_cpus) == c)
+			nr_cores++;
+	}
+
+	return nr_cores;
+}
+
+/*
+ * llc_shard_size - number of cores in a given shard
+ *
+ * Cores are spread as evenly as possible. The first @nr_large_shards shards are
+ * "large shards" with (cores_per_shard + 1) cores; the rest are "default
+ * shards" with cores_per_shard cores.
+ */
+static int __init llc_shard_size(int shard_id, int cores_per_shard, int nr_large_shards)
+{
+	/* The first @nr_large_shards shards are large shards */
+	if (shard_id < nr_large_shards)
+		return cores_per_shard + 1;
+
+	/* The remaining shards are default shards */
+	return cores_per_shard;
+}
+
+/*
+ * llc_calc_shard_layout - compute the shard layout for an LLC pod
+ * @nr_cores:  number of distinct cores in the LLC pod
+ *
+ * Chooses the number of shards that keeps average shard size closest to
+ * wq_cache_shard_size. Returns a struct describing the total number of shards,
+ * the base size of each, and how many are large shards.
+ */
+static struct llc_shard_layout __init llc_calc_shard_layout(int nr_cores)
+{
+	struct llc_shard_layout layout;
+
+	/* Ensure at least one shard; pick the count closest to the target size */
+	layout.nr_shards = max(1, DIV_ROUND_CLOSEST(nr_cores, wq_cache_shard_size));
+	layout.cores_per_shard = nr_cores / layout.nr_shards;
+	layout.nr_large_shards = nr_cores % layout.nr_shards;
+
+	return layout;
+}
+
+/*
+ * llc_shard_is_full - check whether a shard has reached its core capacity
+ * @cores_in_shard: number of cores already assigned to this shard
+ * @shard_id:       index of the shard being checked
+ * @layout:         the shard layout computed by llc_calc_shard_layout()
+ *
+ * Returns true if @cores_in_shard equals the expected size for @shard_id.
+ */
+static bool __init llc_shard_is_full(int cores_in_shard, int shard_id,
+				     const struct llc_shard_layout *layout)
+{
+	return cores_in_shard == llc_shard_size(shard_id, layout->cores_per_shard,
+						layout->nr_large_shards);
+}
+
+/**
+ * llc_populate_cpu_shard_id - populate cpu_shard_id[] for each CPU in an LLC pod
+ * @pod_cpus:  the cpumask of CPUs in the LLC pod
+ * @smt_pods:  the SMT pod type, used to identify sibling groups
+ * @nr_cores:  number of distinct cores in @pod_cpus (from llc_count_cores())
+ *
+ * Walks @pod_cpus in order. At each SMT group leader, advances to the next
+ * shard once the current shard is full. Results are written to cpu_shard_id[].
+ */
+static void __init llc_populate_cpu_shard_id(const struct cpumask *pod_cpus,
+					     struct wq_pod_type *smt_pods,
+					     int nr_cores)
+{
+	struct llc_shard_layout layout = llc_calc_shard_layout(nr_cores);
+	const struct cpumask *sibling_cpus;
+	/* Count the number of cores in the current shard_id */
+	int cores_in_shard = 0;
+	/* This is a cursor for the shards. Go from zero to nr_shards - 1*/
+	int shard_id = 0;
+	int c;
+
+	/* Iterate at every CPU for a given LLC pod, and assign it a shard */
+	for_each_cpu(c, pod_cpus) {
+		sibling_cpus = smt_pods->pod_cpus[smt_pods->cpu_pod[c]];
+		if (cpumask_first(sibling_cpus) == c) {
+			/* This is the CPU leader for the siblings */
+			if (llc_shard_is_full(cores_in_shard, shard_id, &layout)) {
+				shard_id++;
+				cores_in_shard = 0;
+			}
+			cores_in_shard++;
+			cpu_shard_id[c] = shard_id;
+		} else {
+			/*
+			 * The siblings' shard MUST be the same as the leader.
+			 * never split threads in the same core.
+			 */
+			cpu_shard_id[c] = cpu_shard_id[cpumask_first(sibling_cpus)];
+		}
+	}
+
+	WARN_ON_ONCE(shard_id != (layout.nr_shards - 1));
+}
+
+/**
+ * precompute_cache_shard_ids - assign each CPU its shard index within its LLC
+ *
+ * Iterates over all LLC pods. For each pod, counts distinct cores then assigns
+ * shard indices to all CPUs in the pod. Must be called after WQ_AFFN_CACHE and
+ * WQ_AFFN_SMT have been initialized.
+ */
+static void __init precompute_cache_shard_ids(void)
+{
+	struct wq_pod_type *llc_pods = &wq_pod_types[WQ_AFFN_CACHE];
+	struct wq_pod_type *smt_pods = &wq_pod_types[WQ_AFFN_SMT];
+	const struct cpumask *cpus_sharing_llc;
+	int nr_cores;
+	int pod;
+
+	if (!wq_cache_shard_size) {
+		pr_warn("workqueue: cache_shard_size must be > 0, setting to 1\n");
+		wq_cache_shard_size = 1;
+	}
+
+	for (pod = 0; pod < llc_pods->nr_pods; pod++) {
+		cpus_sharing_llc = llc_pods->pod_cpus[pod];
+
+		/* Number of cores in this given LLC */
+		nr_cores = llc_count_cores(cpus_sharing_llc, smt_pods);
+		llc_populate_cpu_shard_id(cpus_sharing_llc, smt_pods, nr_cores);
+	}
+}
+
+/*
+ * cpus_share_cache_shard - test whether two CPUs belong to the same cache shard
+ *
+ * Two CPUs share a cache shard if they are in the same LLC and have the same
+ * shard index. Used as the pod affinity callback for WQ_AFFN_CACHE_SHARD.
+ */
+static bool __init cpus_share_cache_shard(int cpu0, int cpu1)
+{
+	if (!cpus_share_cache(cpu0, cpu1))
+		return false;
+
+	return cpu_shard_id[cpu0] == cpu_shard_id[cpu1];
+}
+
 /**
  * workqueue_init_topology - initialize CPU pods for unbound workqueues
  *
@@ -8170,6 +8351,8 @@ void __init workqueue_init_topology(void)
 	init_pod_type(&wq_pod_types[WQ_AFFN_CPU], cpus_dont_share);
 	init_pod_type(&wq_pod_types[WQ_AFFN_SMT], cpus_share_smt);
 	init_pod_type(&wq_pod_types[WQ_AFFN_CACHE], cpus_share_cache);
+	precompute_cache_shard_ids();
+	init_pod_type(&wq_pod_types[WQ_AFFN_CACHE_SHARD], cpus_share_cache_shard);
 	init_pod_type(&wq_pod_types[WQ_AFFN_NUMA], cpus_share_numa);
 
 	wq_topo_initialized = true;
-- 
cgit v1.2.3


From d57e74f10461b80c77d1678f646720f616fb8553 Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@nvidia.com>
Date: Sun, 1 Mar 2026 20:11:55 -0500
Subject: bitmap: introduce bitmap_weighted_xor()

The function helps to XOR bitmaps and calculate Hamming weight of
the result in one pass.

Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Yury Norov <ynorov@nvidia.com>
---
 include/linux/bitmap.h | 15 +++++++++++++++
 lib/bitmap.c           |  7 +++++++
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 9c0d1de44350..b007d54a9036 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -46,6 +46,7 @@ struct device;
  *  bitmap_and(dst, src1, src2, nbits)          *dst = *src1 & *src2
  *  bitmap_or(dst, src1, src2, nbits)           *dst = *src1 | *src2
  *  bitmap_weighted_or(dst, src1, src2, nbits)	*dst = *src1 | *src2. Returns Hamming Weight of dst
+ *  bitmap_weighted_xor(dst, src1, src2, nbits)	*dst = *src1 ^ *src2. Returns Hamming Weight of dst
  *  bitmap_xor(dst, src1, src2, nbits)          *dst = *src1 ^ *src2
  *  bitmap_andnot(dst, src1, src2, nbits)       *dst = *src1 & ~(*src2)
  *  bitmap_complement(dst, src, nbits)          *dst = ~(*src)
@@ -169,6 +170,8 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 		 const unsigned long *bitmap2, unsigned int nbits);
 unsigned int __bitmap_weighted_or(unsigned long *dst, const unsigned long *bitmap1,
 				  const unsigned long *bitmap2, unsigned int nbits);
+unsigned int __bitmap_weighted_xor(unsigned long *dst, const unsigned long *bitmap1,
+				  const unsigned long *bitmap2, unsigned int nbits);
 void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 		  const unsigned long *bitmap2, unsigned int nbits);
 bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
@@ -353,6 +356,18 @@ unsigned int bitmap_weighted_or(unsigned long *dst, const unsigned long *src1,
 	}
 }
 
+static __always_inline
+unsigned int bitmap_weighted_xor(unsigned long *dst, const unsigned long *src1,
+				const unsigned long *src2, unsigned int nbits)
+{
+	if (small_const_nbits(nbits)) {
+		*dst = *src1 ^ *src2;
+		return hweight_long(*dst & BITMAP_LAST_WORD_MASK(nbits));
+	} else {
+		return __bitmap_weighted_xor(dst, src1, src2, nbits);
+	}
+}
+
 static __always_inline
 void bitmap_xor(unsigned long *dst, const unsigned long *src1,
 		const unsigned long *src2, unsigned int nbits)
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 1b897f94e0ff..b9bfa157e095 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -363,6 +363,13 @@ unsigned int __bitmap_weighted_or(unsigned long *dst, const unsigned long *bitma
 }
 EXPORT_SYMBOL(__bitmap_weighted_or);
 
+unsigned int __bitmap_weighted_xor(unsigned long *dst, const unsigned long *bitmap1,
+				  const unsigned long *bitmap2, unsigned int bits)
+{
+	return BITMAP_WEIGHT(({dst[idx] = bitmap1[idx] ^ bitmap2[idx]; dst[idx]; }), bits);
+}
+EXPORT_SYMBOL(__bitmap_weighted_xor);
+
 void __bitmap_set(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
-- 
cgit v1.2.3


From f0548044a02630402d374df195ed3af4cc5e4711 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Wed, 25 Mar 2026 20:23:51 +0100
Subject: dma-mapping: introduce DMA_ATTR_CC_SHARED for shared memory

Current CC designs don't place a vIOMMU in front of untrusted devices.
Instead, the DMA API forces all untrusted device DMA through swiotlb
bounce buffers (is_swiotlb_force_bounce()) which copies data into
shared memory on behalf of the device.

When a caller has already arranged for the memory to be shared
via set_memory_decrypted(), the DMA API needs to know so it can map
directly using the unencrypted physical address rather than bounce
buffering. Following the pattern of DMA_ATTR_MMIO, add
DMA_ATTR_CC_SHARED for this purpose. Like the MMIO case, only the
caller knows what kind of memory it has and must inform the DMA API
for it to work correctly.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260325192352.437608-2-jiri@resnulli.us
---
 include/linux/dma-mapping.h | 10 ++++++++++
 include/trace/events/dma.h  |  3 ++-
 kernel/dma/direct.h         | 14 +++++++++++---
 kernel/dma/mapping.c        | 13 +++++++++++--
 4 files changed, 34 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 677c51ab7510..db8ab24a54f4 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -92,6 +92,16 @@
  * flushing.
  */
 #define DMA_ATTR_REQUIRE_COHERENT	(1UL << 12)
+/*
+ * DMA_ATTR_CC_SHARED: Indicates the DMA mapping is shared (decrypted) for
+ * confidential computing guests. For normal system memory the caller must have
+ * called set_memory_decrypted(), and pgprot_decrypted must be used when
+ * creating CPU PTEs for the mapping. The same shared semantic may be passed
+ * to the vIOMMU when it sets up the IOPTE. For MMIO use together with
+ * DMA_ATTR_MMIO to indicate shared MMIO. Unless DMA_ATTR_MMIO is provided
+ * a struct page is required.
+ */
+#define DMA_ATTR_CC_SHARED	(1UL << 13)
 
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.  It can
diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index 63597b004424..31c9ddf72c9d 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -34,7 +34,8 @@ TRACE_DEFINE_ENUM(DMA_NONE);
 		{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \
 		{ DMA_ATTR_MMIO, "MMIO" }, \
 		{ DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \
-		{ DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" })
+		{ DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" }, \
+		{ DMA_ATTR_CC_SHARED, "CC_SHARED" })
 
 DECLARE_EVENT_CLASS(dma_map,
 	TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index b86ff65496fc..7140c208c123 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -89,16 +89,24 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
 	dma_addr_t dma_addr;
 
 	if (is_swiotlb_force_bounce(dev)) {
-		if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
-			return DMA_MAPPING_ERROR;
+		if (!(attrs & DMA_ATTR_CC_SHARED)) {
+			if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
+				return DMA_MAPPING_ERROR;
 
-		return swiotlb_map(dev, phys, size, dir, attrs);
+			return swiotlb_map(dev, phys, size, dir, attrs);
+		}
+	} else if (attrs & DMA_ATTR_CC_SHARED) {
+		return DMA_MAPPING_ERROR;
 	}
 
 	if (attrs & DMA_ATTR_MMIO) {
 		dma_addr = phys;
 		if (unlikely(!dma_capable(dev, dma_addr, size, false)))
 			goto err_overflow;
+	} else if (attrs & DMA_ATTR_CC_SHARED) {
+		dma_addr = phys_to_dma_unencrypted(dev, phys);
+		if (unlikely(!dma_capable(dev, dma_addr, size, false)))
+			goto err_overflow;
 	} else {
 		dma_addr = phys_to_dma(dev, phys);
 		if (unlikely(!dma_capable(dev, dma_addr, size, true)) ||
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index df3eccc7d4ca..23ed8eb9233e 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -157,6 +157,7 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
 {
 	const struct dma_map_ops *ops = get_dma_ops(dev);
 	bool is_mmio = attrs & DMA_ATTR_MMIO;
+	bool is_cc_shared = attrs & DMA_ATTR_CC_SHARED;
 	dma_addr_t addr = DMA_MAPPING_ERROR;
 
 	BUG_ON(!valid_dma_direction(dir));
@@ -168,8 +169,11 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
 		return DMA_MAPPING_ERROR;
 
 	if (dma_map_direct(dev, ops) ||
-	    (!is_mmio && arch_dma_map_phys_direct(dev, phys + size)))
+	    (!is_mmio && !is_cc_shared &&
+	     arch_dma_map_phys_direct(dev, phys + size)))
 		addr = dma_direct_map_phys(dev, phys, size, dir, attrs, true);
+	else if (is_cc_shared)
+		return DMA_MAPPING_ERROR;
 	else if (use_dma_iommu(dev))
 		addr = iommu_dma_map_phys(dev, phys, size, dir, attrs);
 	else if (ops->map_phys)
@@ -206,11 +210,16 @@ void dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size,
 {
 	const struct dma_map_ops *ops = get_dma_ops(dev);
 	bool is_mmio = attrs & DMA_ATTR_MMIO;
+	bool is_cc_shared = attrs & DMA_ATTR_CC_SHARED;
 
 	BUG_ON(!valid_dma_direction(dir));
+
 	if (dma_map_direct(dev, ops) ||
-	    (!is_mmio && arch_dma_unmap_phys_direct(dev, addr + size)))
+	    (!is_mmio && !is_cc_shared &&
+	     arch_dma_unmap_phys_direct(dev, addr + size)))
 		dma_direct_unmap_phys(dev, addr, size, dir, attrs, true);
+	else if (is_cc_shared)
+		return;
 	else if (use_dma_iommu(dev))
 		iommu_dma_unmap_phys(dev, addr, size, dir, attrs);
 	else if (ops->unmap_phys)
-- 
cgit v1.2.3


From 3c7df5079cfc6133d01ae144ae76a980276cc726 Mon Sep 17 00:00:00 2001
From: Amit Sunil Dhamne <amitsd@google.com>
Date: Wed, 1 Apr 2026 21:02:08 +0000
Subject: mfd: max77759: fix comment style for enums

Fix comment style for enums so they're kernel-doc compliant.

Signed-off-by: Amit Sunil Dhamne <amitsd@google.com>
Link: https://patch.msgid.link/20260401-fix-mfd-max77759-usb-next-v1-1-174ec23ad824@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/mfd/max77759.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/max77759.h b/include/linux/mfd/max77759.h
index ad1aa4c2b779..ec19be952877 100644
--- a/include/linux/mfd/max77759.h
+++ b/include/linux/mfd/max77759.h
@@ -131,12 +131,12 @@
 #define MAX77759_MAXQ_OPCODE_USER_SPACE_READ     0x81
 #define MAX77759_MAXQ_OPCODE_USER_SPACE_WRITE    0x82
 
-/*
+/**
  * enum max77759_chgr_chgin_dtls_status - Charger Input Status
  * @MAX77759_CHGR_CHGIN_DTLS_VBUS_UNDERVOLTAGE:
  *     Charger input voltage (Vchgin) < Under Voltage Threshold (Vuvlo)
- * @MAX77759_CHGR_CHGIN_DTLS_VBUS_MARGINAL_VOLTAGE: Vchgin > Vuvlo and
- *     Vchgin < (Battery Voltage (Vbatt) + system voltage (Vsys))
+ * @MAX77759_CHGR_CHGIN_DTLS_VBUS_MARGINAL_VOLTAGE:
+ *     Vchgin > Vuvlo and Vchgin < (Battery Voltage (Vbatt) + system voltage (Vsys))
  * @MAX77759_CHGR_CHGIN_DTLS_VBUS_OVERVOLTAGE:
  *     Vchgin > Over Voltage threshold (Vovlo)
  * @MAX77759_CHGR_CHGIN_DTLS_VBUS_VALID:
@@ -149,7 +149,7 @@ enum max77759_chgr_chgin_dtls_status {
 	MAX77759_CHGR_CHGIN_DTLS_VBUS_VALID,
 };
 
-/*
+/**
  * enum max77759_chgr_bat_dtls_states - Battery Details
  * @MAX77759_CHGR_BAT_DTLS_NO_BATT_CHG_SUSP:	No battery and the charger suspended
  * @MAX77759_CHGR_BAT_DTLS_DEAD_BATTERY:	Vbatt < Vtrickle
@@ -171,7 +171,7 @@ enum max77759_chgr_bat_dtls_states {
 	MAX77759_CHGR_BAT_DTLS_BAT_ONLY_MODE,
 };
 
-/*
+/**
  * enum max77759_chgr_chg_dtls_states - Charger Details
  * @MAX77759_CHGR_CHG_DTLS_PREQUAL:		Charger in prequalification mode
  * @MAX77759_CHGR_CHG_DTLS_CC:			Charger in fast charge const curr mode
-- 
cgit v1.2.3


From 7b7f2dd913829e06705035dfc41ca25fa6ec68d3 Mon Sep 17 00:00:00 2001
From: Pawel Laszczak <pawell@cadence.com>
Date: Tue, 31 Mar 2026 10:19:11 +0200
Subject: usb: cdnsp: Add support for device-only configuration

This patch introduces support for operating the Cadence USBSSP (cdnsp)
controller in a peripheral-only mode, bypassing the Dual-Role Device (DRD)
logic.

The change in BAR indexing (from BAR 2 to BAR 1) is a direct
consequence of switching from 64-bit to 32-bit addressing in the
Peripheral-only configuration.

Tested on PCI platform with Device-only configuration. Platform-side
changes are included to support the PCI glue layer's property injection.

Signed-off-by: Pawel Laszczak <pawell@cadence.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com> # pci_ids.h
Link: https://patch.msgid.link/20260331-device_only-v1-1-00378b80365c@cadence.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/cdns3/cdns3-plat.c | 24 ++++++++++++---------
 drivers/usb/cdns3/cdnsp-pci.c  | 47 ++++++++++++++++++++++++++++++++++--------
 drivers/usb/cdns3/core.c       |  3 ++-
 drivers/usb/cdns3/core.h       |  5 ++++-
 drivers/usb/cdns3/drd.c        | 16 ++++++++++++--
 include/linux/pci_ids.h        |  1 +
 6 files changed, 73 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/cdns3/cdns3-plat.c b/drivers/usb/cdns3/cdns3-plat.c
index 71c612e27b73..33746e672cda 100644
--- a/drivers/usb/cdns3/cdns3-plat.c
+++ b/drivers/usb/cdns3/cdns3-plat.c
@@ -75,6 +75,7 @@ static int cdns3_plat_probe(struct platform_device *pdev)
 	if (cdns->pdata && cdns->pdata->override_apb_timeout)
 		cdns->override_apb_timeout = cdns->pdata->override_apb_timeout;
 
+	cdns->no_drd = device_property_read_bool(dev, "no_drd");
 	platform_set_drvdata(pdev, cdns);
 
 	ret = platform_get_irq_byname(pdev, "host");
@@ -107,21 +108,23 @@ static int cdns3_plat_probe(struct platform_device *pdev)
 
 	cdns->dev_regs	= regs;
 
-	cdns->otg_irq = platform_get_irq_byname(pdev, "otg");
-	if (cdns->otg_irq < 0)
-		return dev_err_probe(dev, cdns->otg_irq,
-				     "Failed to get otg IRQ\n");
+	if (!cdns->no_drd) {
+		cdns->otg_irq = platform_get_irq_byname(pdev, "otg");
+		if (cdns->otg_irq < 0)
+			return dev_err_probe(dev, cdns->otg_irq,
+					     "Failed to get otg IRQ\n");
 
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "otg");
-	if (!res) {
-		dev_err(dev, "couldn't get otg resource\n");
-		return -ENXIO;
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "otg");
+		if (!res) {
+			dev_err(dev, "couldn't get otg resource\n");
+			return -ENXIO;
+		}
+
+		cdns->otg_res = *res;
 	}
 
 	cdns->phyrst_a_enable = device_property_read_bool(dev, "cdns,phyrst-a-enable");
 
-	cdns->otg_res = *res;
-
 	cdns->wakeup_irq = platform_get_irq_byname_optional(pdev, "wakeup");
 	if (cdns->wakeup_irq == -EPROBE_DEFER)
 		return cdns->wakeup_irq;
@@ -158,6 +161,7 @@ static int cdns3_plat_probe(struct platform_device *pdev)
 		goto err_cdns_init;
 
 	cdns->gadget_init = cdns3_plat_gadget_init;
+
 	ret = cdns_core_init_role(cdns);
 	if (ret)
 		goto err_cdns_init;
diff --git a/drivers/usb/cdns3/cdnsp-pci.c b/drivers/usb/cdns3/cdnsp-pci.c
index 432007cfe695..e20c59ceb8a4 100644
--- a/drivers/usb/cdns3/cdnsp-pci.c
+++ b/drivers/usb/cdns3/cdnsp-pci.c
@@ -19,6 +19,7 @@
 
 struct cdnsp_wrap {
 	struct platform_device *plat_dev;
+	struct property_entry prop[3];
 	struct resource dev_res[6];
 	int devfn;
 };
@@ -29,10 +30,15 @@ struct cdnsp_wrap {
 #define RES_HOST_ID		3
 #define RES_DEV_ID		4
 #define RES_DRD_ID		5
-
+/* DRD PCI configuration - 64-bit addressing */
+/* First PCI function */
 #define PCI_BAR_HOST		0
-#define PCI_BAR_OTG		0
 #define PCI_BAR_DEV		2
+/* Second PCI function */
+#define PCI_BAR_OTG		0
+/* Device only PCI configuration - 32-bit addressing */
+/* First PCI function */
+#define PCI_BAR_ONLY_DEV	1
 
 #define PCI_DEV_FN_HOST_DEVICE	0
 #define PCI_DEV_FN_OTG		1
@@ -65,6 +71,7 @@ static int cdnsp_pci_probe(struct pci_dev *pdev,
 	struct cdnsp_wrap *wrap;
 	struct resource *res;
 	struct pci_dev *func;
+	bool no_drd = false;
 	int ret = 0;
 
 	/*
@@ -75,11 +82,14 @@ static int cdnsp_pci_probe(struct pci_dev *pdev,
 		    pdev->devfn != PCI_DEV_FN_OTG))
 		return -EINVAL;
 
+	if (pdev->device == PCI_DEVICE_ID_CDNS_UDC_USBSSP)
+		no_drd = true;
+
 	func = cdnsp_get_second_fun(pdev);
-	if (!func)
+	if (!func && !no_drd)
 		return -EINVAL;
 
-	if (func->class == PCI_CLASS_SERIAL_USB_XHCI ||
+	if ((func && func->class == PCI_CLASS_SERIAL_USB_XHCI) ||
 	    pdev->class == PCI_CLASS_SERIAL_USB_XHCI) {
 		ret = -EINVAL;
 		goto put_pci;
@@ -93,7 +103,7 @@ static int cdnsp_pci_probe(struct pci_dev *pdev,
 
 	pci_set_master(pdev);
 
-	if (pci_is_enabled(func)) {
+	if (func && pci_is_enabled(func)) {
 		wrap = pci_get_drvdata(func);
 	} else {
 		wrap = kzalloc_obj(*wrap);
@@ -106,10 +116,13 @@ static int cdnsp_pci_probe(struct pci_dev *pdev,
 	res = wrap->dev_res;
 
 	if (pdev->devfn == PCI_DEV_FN_HOST_DEVICE) {
+		int bar_dev = no_drd ? PCI_BAR_ONLY_DEV : PCI_BAR_DEV;
+
 		/* Function 0: host(BAR_0) + device(BAR_2). */
 		dev_dbg(&pdev->dev, "Initialize Device resources\n");
-		res[RES_DEV_ID].start = pci_resource_start(pdev, PCI_BAR_DEV);
-		res[RES_DEV_ID].end = pci_resource_end(pdev, PCI_BAR_DEV);
+
+		res[RES_DEV_ID].start = pci_resource_start(pdev, bar_dev);
+		res[RES_DEV_ID].end = pci_resource_end(pdev, bar_dev);
 		res[RES_DEV_ID].name = "dev";
 		res[RES_DEV_ID].flags = IORESOURCE_MEM;
 		dev_dbg(&pdev->dev, "USBSSP-DEV physical base addr: %pa\n",
@@ -145,9 +158,20 @@ static int cdnsp_pci_probe(struct pci_dev *pdev,
 		wrap->dev_res[RES_IRQ_OTG_ID].flags = IORESOURCE_IRQ;
 	}
 
-	if (pci_is_enabled(func)) {
+	if (no_drd || pci_is_enabled(func)) {
+		u8 idx = 0;
+
 		/* set up platform device info */
 		pdata.override_apb_timeout = CHICKEN_APB_TIMEOUT_VALUE;
+		if (no_drd) {
+			wrap->prop[idx++] = PROPERTY_ENTRY_STRING("dr_mode", "peripheral");
+			wrap->prop[idx++] = PROPERTY_ENTRY_BOOL("no_drd");
+		} else {
+			wrap->prop[idx++] = PROPERTY_ENTRY_STRING("dr_mode", "otg");
+			wrap->prop[idx++] = PROPERTY_ENTRY_BOOL("usb-role-switch");
+		}
+
+		wrap->prop[idx] = (struct property_entry){ };
 		memset(&plat_info, 0, sizeof(plat_info));
 		plat_info.parent = &pdev->dev;
 		plat_info.fwnode = pdev->dev.fwnode;
@@ -158,6 +182,7 @@ static int cdnsp_pci_probe(struct pci_dev *pdev,
 		plat_info.dma_mask = pdev->dma_mask;
 		plat_info.data = &pdata;
 		plat_info.size_data = sizeof(pdata);
+		plat_info.properties = wrap->prop;
 		wrap->devfn = pdev->devfn;
 		/* register platform device */
 		wrap->plat_dev = platform_device_register_full(&plat_info);
@@ -185,13 +210,17 @@ static void cdnsp_pci_remove(struct pci_dev *pdev)
 	if (wrap->devfn == pdev->devfn)
 		platform_device_unregister(wrap->plat_dev);
 
-	if (!pci_is_enabled(func))
+	if (!func || !pci_is_enabled(func))
 		kfree(wrap);
 
 	pci_dev_put(func);
 }
 
 static const struct pci_device_id cdnsp_pci_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CDNS, PCI_DEVICE_ID_CDNS_UDC_USBSSP),
+	  .class = PCI_CLASS_SERIAL_USB_DEVICE },
+	{ PCI_DEVICE(PCI_VENDOR_ID_CDNS, PCI_DEVICE_ID_CDNS_UDC_USBSSP),
+	  .class = PCI_CLASS_SERIAL_USB_CDNS },
 	{ PCI_DEVICE(PCI_VENDOR_ID_CDNS, PCI_DEVICE_ID_CDNS_USBSSP),
 	  .class = PCI_CLASS_SERIAL_USB_DEVICE },
 	{ PCI_DEVICE(PCI_VENDOR_ID_CDNS, PCI_DEVICE_ID_CDNS_USBSSP),
diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c
index 10f00b6c3c83..72f7acba6258 100644
--- a/drivers/usb/cdns3/core.c
+++ b/drivers/usb/cdns3/core.c
@@ -71,7 +71,8 @@ static void cdns_role_stop(struct cdns *cdns)
 static void cdns_exit_roles(struct cdns *cdns)
 {
 	cdns_role_stop(cdns);
-	cdns_drd_exit(cdns);
+	if (!cdns->no_drd)
+		cdns_drd_exit(cdns);
 }
 
 /**
diff --git a/drivers/usb/cdns3/core.h b/drivers/usb/cdns3/core.h
index dc8c4137de15..6abe231f4559 100644
--- a/drivers/usb/cdns3/core.h
+++ b/drivers/usb/cdns3/core.h
@@ -80,9 +80,11 @@ struct cdns3_platform_data {
  * @pdata: platform data from glue layer
  * @lock: spinlock structure
  * @xhci_plat_data: xhci private data structure pointer
+ * @gadget_init: pointer to gadget initialization function
  * @override_apb_timeout: hold value of APB timeout. For value 0 the default
  *                        value in CHICKEN_BITS_3 will be preserved.
- * @gadget_init: pointer to gadget initialization function
+ * @no_drd: DRD register block is inaccessible - driver handles only
+ *          device mode.
  */
 struct cdns {
 	struct device			*dev;
@@ -122,6 +124,7 @@ struct cdns {
 	struct xhci_plat_priv		*xhci_plat_data;
 	int (*gadget_init)(struct cdns *cdns);
 	u32                             override_apb_timeout;
+	bool				no_drd;
 };
 
 int cdns_hw_role_switch(struct cdns *cdns);
diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c
index 84fb38a5723a..38f3051c2188 100644
--- a/drivers/usb/cdns3/drd.c
+++ b/drivers/usb/cdns3/drd.c
@@ -107,7 +107,7 @@ void cdns_clear_vbus(struct cdns *cdns)
 {
 	u32 reg;
 
-	if (cdns->version != CDNSP_CONTROLLER_V2)
+	if (cdns->version != CDNSP_CONTROLLER_V2 || cdns->no_drd)
 		return;
 
 	reg = readl(&cdns->otg_cdnsp_regs->override);
@@ -120,7 +120,7 @@ void cdns_set_vbus(struct cdns *cdns)
 {
 	u32 reg;
 
-	if (cdns->version != CDNSP_CONTROLLER_V2)
+	if (cdns->version != CDNSP_CONTROLLER_V2 || cdns->no_drd)
 		return;
 
 	reg = readl(&cdns->otg_cdnsp_regs->override);
@@ -234,6 +234,9 @@ int cdns_drd_gadget_on(struct cdns *cdns)
 	u32 ready_bit;
 	int ret, val;
 
+	if (cdns->no_drd)
+		return 0;
+
 	/* switch OTG core */
 	writel(OTGCMD_DEV_BUS_REQ | reg, &cdns->otg_regs->cmd);
 
@@ -265,6 +268,9 @@ void cdns_drd_gadget_off(struct cdns *cdns)
 {
 	u32 val;
 
+	if (cdns->no_drd)
+		return;
+
 	/*
 	 * Driver should wait at least 10us after disabling Device
 	 * before turning-off Device (DEV_BUS_DROP).
@@ -392,6 +398,12 @@ int cdns_drd_init(struct cdns *cdns)
 	u32 state, reg;
 	int ret;
 
+	if (cdns->no_drd) {
+		cdns->version  = CDNSP_CONTROLLER_V2;
+		cdns->dr_mode = USB_DR_MODE_PERIPHERAL;
+		return 0;
+	}
+
 	regs = devm_ioremap_resource(cdns->dev, &cdns->otg_res);
 	if (IS_ERR(regs))
 		return PTR_ERR(regs);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 406abf629be2..a931fb201402 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2424,6 +2424,7 @@
 #define PCI_DEVICE_ID_CDNS_USBSS	0x0100
 #define PCI_DEVICE_ID_CDNS_USB		0x0120
 #define PCI_DEVICE_ID_CDNS_USBSSP	0x0200
+#define PCI_DEVICE_ID_CDNS_UDC_USBSSP	0x0400
 
 #define PCI_VENDOR_ID_ARECA		0x17d3
 #define PCI_DEVICE_ID_ARECA_1110	0x1110
-- 
cgit v1.2.3


From 408d8af01f3a4d666620029a85e741906ff96f47 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 24 Jan 2026 17:58:48 -0500
Subject: for_each_alias(): helper macro for iterating through dentries of
 given inode

Most of the places using d_alias are loops iterating through all aliases for
given inode; introduce a helper macro (for_each_alias(dentry, inode))
and convert open-coded instances of such loop to it.

They are easier to read that way and it reduces the noise on the next steps.

You _must_ hold inode->i_lock over that thing.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/porting.rst | 10 ++++++++++
 fs/affs/amigaffs.c                    |  2 +-
 fs/ceph/mds_client.c                  |  2 +-
 fs/dcache.c                           |  6 +++---
 fs/exportfs/expfs.c                   |  2 +-
 fs/nfs/dir.c                          |  2 +-
 fs/notify/fsnotify.c                  |  2 +-
 fs/ocfs2/dcache.c                     |  2 +-
 fs/overlayfs/dir.c                    |  2 +-
 fs/smb/client/inode.c                 |  2 +-
 include/linux/dcache.h                |  4 ++++
 11 files changed, 25 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 52ff1d19405b..9a9babd9ec48 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1361,3 +1361,13 @@ to match what strlen() would return if it was ran on the string.
 
 However, if the string is freely accessible for the duration of inode's
 lifetime, consider using inode_set_cached_link() instead.
+
+---
+
+**recommended**
+
+If you really need to iterate through dentries for given inode, use
+for_each_alias(dentry, inode) instead of hlist_for_each_entry; better
+yet, see if any of the exported primitives could be used instead of
+the entire loop.  You still need to hold ->i_lock of the inode over
+either form of manual loop.
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index fd669daa4e7b..91966b1f41f6 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -126,7 +126,7 @@ affs_fix_dcache(struct inode *inode, u32 entry_ino)
 {
 	struct dentry *dentry;
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
+	for_each_alias(dentry, inode) {
 		if (entry_ino == (u32)(long)dentry->d_fsdata) {
 			dentry->d_fsdata = (void *)inode->i_ino;
 			break;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b1746273f186..f839109fb66f 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4614,7 +4614,7 @@ static struct dentry* d_find_primary(struct inode *inode)
 		goto out_unlock;
 	}
 
-	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
+	for_each_alias(alias, inode) {
 		spin_lock(&alias->d_lock);
 		if (!d_unhashed(alias) &&
 		    (ceph_dentry(alias)->flags & CEPH_DENTRY_PRIMARY_LINK)) {
diff --git a/fs/dcache.c b/fs/dcache.c
index 7ba1801d8132..e069b6ec4ec0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -790,7 +790,7 @@ void d_mark_dontcache(struct inode *inode)
 	struct dentry *de;
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(de, &inode->i_dentry, d_u.d_alias) {
+	for_each_alias(de, inode) {
 		spin_lock(&de->d_lock);
 		de->d_flags |= DCACHE_DONTCACHE;
 		spin_unlock(&de->d_lock);
@@ -1040,7 +1040,7 @@ static struct dentry *__d_find_alias(struct inode *inode)
 	if (S_ISDIR(inode->i_mode))
 		return __d_find_any_alias(inode);
 
-	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
+	for_each_alias(alias, inode) {
 		spin_lock(&alias->d_lock);
  		if (!d_unhashed(alias)) {
 			dget_dlock(alias);
@@ -1133,7 +1133,7 @@ void d_prune_aliases(struct inode *inode)
 	struct dentry *dentry;
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias)
+	for_each_alias(dentry, inode)
 		d_dispose_if_unused(dentry, &dispose);
 	spin_unlock(&inode->i_lock);
 	shrink_dentry_list(&dispose);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 6c9be60a3e48..f67b3ce672fc 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -52,7 +52,7 @@ find_acceptable_alias(struct dentry *result,
 
 	inode = result->d_inode;
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
+	for_each_alias(dentry, inode) {
 		dget(dentry);
 		spin_unlock(&inode->i_lock);
 		if (toput)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2402f57c8e7d..5a0bd8113e3a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1471,7 +1471,7 @@ static void nfs_clear_verifier_file(struct inode *inode)
 	struct dentry *alias;
 	struct inode *dir;
 
-	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
+	for_each_alias(alias, inode) {
 		spin_lock(&alias->d_lock);
 		dir = d_inode_rcu(alias->d_parent);
 		if (!dir ||
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 9995de1710e5..b7198c4744e3 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -76,7 +76,7 @@ void fsnotify_set_children_dentry_flags(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	/* run all of the dentries associated with this inode.  Since this is a
 	 * directory, there damn well better only be one item on this list */
-	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
+	for_each_alias(alias, inode) {
 		struct dentry *child;
 
 		/* run all of the children of the original inode and fix their
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index c4ba968e778b..e06774fd89d8 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -145,7 +145,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
 	struct dentry *dentry;
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
+	for_each_alias(dentry, inode) {
 		spin_lock(&dentry->d_lock);
 		if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
 			trace_ocfs2_find_local_alias(dentry->d_name.len,
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index ff3dbd1ca61f..f8dfa534b566 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -904,7 +904,7 @@ static void ovl_drop_nlink(struct dentry *dentry)
 
 	/* Try to find another, hashed alias */
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
+	for_each_alias(alias, inode) {
 		if (alias != dentry && !d_unhashed(alias))
 			break;
 	}
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 888f9e35f14b..e2b4ad9bd0bd 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -1595,7 +1595,7 @@ inode_has_hashed_dentries(struct inode *inode)
 	struct dentry *dentry;
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
+	for_each_alias(dentry, inode) {
 		if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
 			spin_unlock(&inode->i_lock);
 			return true;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 898c60d21c92..7f1dbc7121d7 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -615,4 +615,8 @@ void set_default_d_op(struct super_block *, const struct dentry_operations *);
 struct dentry *d_make_persistent(struct dentry *, struct inode *);
 void d_make_discardable(struct dentry *dentry);
 
+/* inode->i_lock must be held over that */
+#define for_each_alias(dentry, inode) \
+	hlist_for_each_entry(dentry, &(inode)->i_dentry, d_u.d_alias)
+
 #endif	/* __LINUX_DCACHE_H */
-- 
cgit v1.2.3


From 2420067cecacb1d1bf6dc39294d0c9f04066ff98 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 27 Jan 2026 22:51:37 -0500
Subject: struct dentry: make ->d_u anonymous

Making ->d_rcu and (then) ->d_child overlapping dates back to
2006; anon unions support had been added to gcc only in 4.6
(2011) and the minimal gcc version hadn't been bumped to that
until 4.19 (2018).

These days there's no reason not to keep that union named.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ceph/mds_client.c   |  2 +-
 fs/dcache.c            | 48 ++++++++++++++++++++++++------------------------
 fs/inode.c             |  2 +-
 fs/nfs/dir.c           |  2 +-
 fs/nfs/getroot.c       |  2 +-
 include/linux/dcache.h |  4 ++--
 6 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f839109fb66f..a5eb99c3c36b 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4608,7 +4608,7 @@ static struct dentry* d_find_primary(struct inode *inode)
 		goto out_unlock;
 
 	if (S_ISDIR(inode->i_mode)) {
-		alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
+		alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
 		if (!IS_ROOT(alias))
 			dn = dget(alias);
 		goto out_unlock;
diff --git a/fs/dcache.c b/fs/dcache.c
index e069b6ec4ec0..4378eb8a00bb 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -40,7 +40,7 @@
 /*
  * Usage:
  * dcache->d_inode->i_lock protects:
- *   - i_dentry, d_u.d_alias, d_inode of aliases
+ *   - i_dentry, d_alias, d_inode of aliases
  * dcache_hash_bucket lock protects:
  *   - the dcache hash table
  * s_roots bl list spinlock protects:
@@ -55,7 +55,7 @@
  *   - d_unhashed()
  *   - d_parent and d_chilren
  *   - childrens' d_sib and d_parent
- *   - d_u.d_alias, d_inode
+ *   - d_alias, d_inode
  *
  * Ordering:
  * dentry->d_inode->i_lock
@@ -341,14 +341,14 @@ static inline struct external_name *external_name(struct dentry *dentry)
 
 static void __d_free(struct rcu_head *head)
 {
-	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
+	struct dentry *dentry = container_of(head, struct dentry, d_rcu);
 
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
 static void __d_free_external(struct rcu_head *head)
 {
-	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
+	struct dentry *dentry = container_of(head, struct dentry, d_rcu);
 	kfree(external_name(dentry));
 	kmem_cache_free(dentry_cache, dentry);
 }
@@ -428,19 +428,19 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
 
 static void dentry_free(struct dentry *dentry)
 {
-	WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
+	WARN_ON(!hlist_unhashed(&dentry->d_alias));
 	if (unlikely(dname_external(dentry))) {
 		struct external_name *p = external_name(dentry);
 		if (likely(atomic_dec_and_test(&p->count))) {
-			call_rcu(&dentry->d_u.d_rcu, __d_free_external);
+			call_rcu(&dentry->d_rcu, __d_free_external);
 			return;
 		}
 	}
 	/* if dentry was never visible to RCU, immediate free is OK */
 	if (dentry->d_flags & DCACHE_NORCU)
-		__d_free(&dentry->d_u.d_rcu);
+		__d_free(&dentry->d_rcu);
 	else
-		call_rcu(&dentry->d_u.d_rcu, __d_free);
+		call_rcu(&dentry->d_rcu, __d_free);
 }
 
 /*
@@ -455,7 +455,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
 
 	raw_write_seqcount_begin(&dentry->d_seq);
 	__d_clear_type_and_inode(dentry);
-	hlist_del_init(&dentry->d_u.d_alias);
+	hlist_del_init(&dentry->d_alias);
 	raw_write_seqcount_end(&dentry->d_seq);
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&inode->i_lock);
@@ -1010,7 +1010,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode)
 
 	if (hlist_empty(&inode->i_dentry))
 		return NULL;
-	alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
+	alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
 	lockref_get(&alias->d_lockref);
 	return alias;
 }
@@ -1093,9 +1093,9 @@ struct dentry *d_find_alias_rcu(struct inode *inode)
 	// used without having I_FREEING set, which means no aliases left
 	if (likely(!(inode_state_read(inode) & I_FREEING) && !hlist_empty(l))) {
 		if (S_ISDIR(inode->i_mode)) {
-			de = hlist_entry(l->first, struct dentry, d_u.d_alias);
+			de = hlist_entry(l->first, struct dentry, d_alias);
 		} else {
-			hlist_for_each_entry(de, l, d_u.d_alias)
+			hlist_for_each_entry(de, l, d_alias)
 				if (!d_unhashed(de))
 					break;
 		}
@@ -1787,7 +1787,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 	INIT_HLIST_BL_NODE(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_HLIST_HEAD(&dentry->d_children);
-	INIT_HLIST_NODE(&dentry->d_u.d_alias);
+	INIT_HLIST_NODE(&dentry->d_alias);
 	INIT_HLIST_NODE(&dentry->d_sib);
 
 	if (dentry->d_op && dentry->d_op->d_init) {
@@ -1980,7 +1980,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 	if ((dentry->d_flags &
 	     (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
 		this_cpu_dec(nr_dentry_negative);
-	hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
+	hlist_add_head(&dentry->d_alias, &inode->i_dentry);
 	raw_write_seqcount_begin(&dentry->d_seq);
 	__d_set_inode_and_type(dentry, inode, add_flags);
 	raw_write_seqcount_end(&dentry->d_seq);
@@ -2004,7 +2004,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
  
 void d_instantiate(struct dentry *entry, struct inode * inode)
 {
-	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
+	BUG_ON(!hlist_unhashed(&entry->d_alias));
 	if (inode) {
 		security_d_instantiate(entry, inode);
 		spin_lock(&inode->i_lock);
@@ -2024,7 +2024,7 @@ EXPORT_SYMBOL(d_instantiate);
  */
 void d_instantiate_new(struct dentry *entry, struct inode *inode)
 {
-	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
+	BUG_ON(!hlist_unhashed(&entry->d_alias));
 	BUG_ON(!inode);
 	lockdep_annotate_inode_mutex_key(inode);
 	security_d_instantiate(entry, inode);
@@ -2087,7 +2087,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected)
 
 		spin_lock(&new->d_lock);
 		__d_set_inode_and_type(new, inode, add_flags);
-		hlist_add_head(&new->d_u.d_alias, &inode->i_dentry);
+		hlist_add_head(&new->d_alias, &inode->i_dentry);
 		if (!disconnected) {
 			hlist_bl_lock(&sb->s_roots);
 			hlist_bl_add_head(&new->d_hash, &sb->s_roots);
@@ -2658,7 +2658,7 @@ retry:
 	 * we unlock the chain.  All fields are stable in everything
 	 * we encounter.
 	 */
-	hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) {
+	hlist_bl_for_each_entry(dentry, node, b, d_in_lookup_hash) {
 		if (dentry->d_name.hash != hash)
 			continue;
 		if (dentry->d_parent != parent)
@@ -2700,7 +2700,7 @@ retry:
 	}
 	rcu_read_unlock();
 	new->d_wait = wq;
-	hlist_bl_add_head(&new->d_u.d_in_lookup_hash, b);
+	hlist_bl_add_head(&new->d_in_lookup_hash, b);
 	hlist_bl_unlock(b);
 	return new;
 mismatch:
@@ -2725,11 +2725,11 @@ static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
 	b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash);
 	hlist_bl_lock(b);
 	dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
-	__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
+	__hlist_bl_del(&dentry->d_in_lookup_hash);
 	d_wait = dentry->d_wait;
 	dentry->d_wait = NULL;
 	hlist_bl_unlock(b);
-	INIT_HLIST_NODE(&dentry->d_u.d_alias);
+	INIT_HLIST_NODE(&dentry->d_alias);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	return d_wait;
 }
@@ -2760,7 +2760,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode,
 		d_set_d_op(dentry, ops);
 	if (inode) {
 		unsigned add_flags = d_flags_for_inode(inode);
-		hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
+		hlist_add_head(&dentry->d_alias, &inode->i_dentry);
 		raw_write_seqcount_begin(&dentry->d_seq);
 		__d_set_inode_and_type(dentry, inode, add_flags);
 		raw_write_seqcount_end(&dentry->d_seq);
@@ -2795,7 +2795,7 @@ EXPORT_SYMBOL(d_add);
 
 struct dentry *d_make_persistent(struct dentry *dentry, struct inode *inode)
 {
-	WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
+	WARN_ON(!hlist_unhashed(&dentry->d_alias));
 	WARN_ON(!inode);
 	security_d_instantiate(dentry, inode);
 	spin_lock(&inode->i_lock);
@@ -3185,7 +3185,7 @@ void d_mark_tmpfile(struct file *file, struct inode *inode)
 	struct dentry *dentry = file->f_path.dentry;
 
 	BUG_ON(dname_external(dentry) ||
-		!hlist_unhashed(&dentry->d_u.d_alias) ||
+		!hlist_unhashed(&dentry->d_alias) ||
 		!d_unlinked(dentry));
 	spin_lock(&dentry->d_parent->d_lock);
 	spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
diff --git a/fs/inode.c b/fs/inode.c
index cc12b68e021b..9e1ab333d382 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -754,7 +754,7 @@ void dump_mapping(const struct address_space *mapping)
 		return;
 	}
 
-	dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
+	dentry_ptr = container_of(dentry_first, struct dentry, d_alias);
 	if (get_kernel_nofault(dentry, dentry_ptr) ||
 	    !dentry.d_parent || !dentry.d_name.name) {
 		pr_warn("aops:%ps ino:%lx invalid dentry:%px\n",
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5a0bd8113e3a..f2f1b036f2f1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1490,7 +1490,7 @@ static void nfs_clear_verifier_directory(struct inode *dir)
 	if (hlist_empty(&dir->i_dentry))
 		return;
 	this_parent =
-		hlist_entry(dir->i_dentry.first, struct dentry, d_u.d_alias);
+		hlist_entry(dir->i_dentry.first, struct dentry, d_alias);
 
 	spin_lock(&this_parent->d_lock);
 	nfs_unset_verifier_delegated(&this_parent->d_time);
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index f13d25d95b85..eef0736beb67 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -54,7 +54,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
 		 */
 		spin_lock(&d_inode(sb->s_root)->i_lock);
 		spin_lock(&sb->s_root->d_lock);
-		hlist_del_init(&sb->s_root->d_u.d_alias);
+		hlist_del_init(&sb->s_root->d_alias);
 		spin_unlock(&sb->s_root->d_lock);
 		spin_unlock(&d_inode(sb->s_root)->i_lock);
 	}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 7f1dbc7121d7..f939d2ed10a3 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -128,7 +128,7 @@ struct dentry {
 		struct hlist_node d_alias;	/* inode alias list */
 		struct hlist_bl_node d_in_lookup_hash;	/* only for in-lookup ones */
 	 	struct rcu_head d_rcu;
-	} d_u;
+	};
 };
 
 /*
@@ -617,6 +617,6 @@ void d_make_discardable(struct dentry *dentry);
 
 /* inode->i_lock must be held over that */
 #define for_each_alias(dentry, inode) \
-	hlist_for_each_entry(dentry, &(inode)->i_dentry, d_u.d_alias)
+	hlist_for_each_entry(dentry, &(inode)->i_dentry, d_alias)
 
 #endif	/* __LINUX_DCACHE_H */
-- 
cgit v1.2.3


From 241cb8dee0f83856c728f4fe2c29e331386c92f2 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Fri, 30 Jan 2026 16:47:26 +0000
Subject: comedi: add comedi_check_request_region()

There is an existing comedi_request_region(dev, start, len) function
used by COMEDI drivers for legacy devices to request an I/O port region
starting at a specified base address (which must be non-zero) and with a
specified length.  It uses request_region().  On success, it sets
dev->iobase and dev->iolen and returns 0.  There is a alternative
function __comedi_request_region(dev, start, len) which does the same
thing without setting dev->iobase and dev->iolen.

Most hardware devices have restrictions on the allowed I/O port base
address and alignment, so add new functions
comedi_check_request_region(dev, start, len, minstart, maxend, minalign)
and __comedi_check_request_region(dev, start, len, minstart, maxend,
minalign) to perform these additional checks.  Turn the original
functions into static inline wrapper functions that call the new
functions.

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Link: https://patch.msgid.link/20260130170416.49994-2-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/comedi/drivers.c         | 46 +++++++++++++++++++++++++---------
 include/linux/comedi/comedidev.h | 53 +++++++++++++++++++++++++++++++++++++---
 2 files changed, 84 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/comedi/drivers.c b/drivers/comedi/drivers.c
index db225a3bf012..5b02107bb6bf 100644
--- a/drivers/comedi/drivers.c
+++ b/drivers/comedi/drivers.c
@@ -933,19 +933,24 @@ int comedi_load_firmware(struct comedi_device *dev,
 EXPORT_SYMBOL_GPL(comedi_load_firmware);
 
 /**
- * __comedi_request_region() - Request an I/O region for a legacy driver
+ * __comedi_check_request_region() - Request an I/O region for a legacy driver
  * @dev: COMEDI device.
  * @start: Base address of the I/O region.
  * @len: Length of the I/O region.
+ * @minstart: Minimum allowed start address of region.
+ * @maxend: Maximum allowed region end address of region.
+ * @minalign: Required alignment for base address.
  *
  * Requests the specified I/O port region which must start at a non-zero
- * address.
+ * address, must fall within specified bounds, and must be correctly aligned.
  *
  * Returns 0 on success, -EINVAL if @start is 0, or -EIO if the request
  * fails.
  */
-int __comedi_request_region(struct comedi_device *dev,
-			    unsigned long start, unsigned long len)
+int __comedi_check_request_region(struct comedi_device *dev,
+				  unsigned long start, unsigned long len,
+				  unsigned long minstart, unsigned long maxend,
+				  unsigned long minalign)
 {
 	if (!start) {
 		dev_warn(dev->class_dev,
@@ -954,6 +959,19 @@ int __comedi_request_region(struct comedi_device *dev,
 		return -EINVAL;
 	}
 
+	if (start < minstart || start > maxend || maxend - start < len - 1) {
+		dev_warn(dev->class_dev,
+			 "%s: I/O base address or length out of range\n",
+			 dev->board_name);
+		return -EINVAL;
+	}
+	if (!IS_ALIGNED(start, minalign)) {
+		dev_warn(dev->class_dev,
+			 "%s: I/O base address not correctly aligned\n",
+			 dev->board_name);
+		return -EINVAL;
+	}
+
 	if (!request_region(start, len, dev->board_name)) {
 		dev_warn(dev->class_dev, "%s: I/O port conflict (%#lx,%lu)\n",
 			 dev->board_name, start, len);
@@ -962,16 +980,19 @@ int __comedi_request_region(struct comedi_device *dev,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(__comedi_request_region);
+EXPORT_SYMBOL_GPL(__comedi_check_request_region);
 
 /**
- * comedi_request_region() - Request an I/O region for a legacy driver
+ * comedi_check_request_region() - Request an I/O region for a legacy driver
  * @dev: COMEDI device.
  * @start: Base address of the I/O region.
  * @len: Length of the I/O region.
+ * @minstart: Minimum allowed start address of region.
+ * @maxend: Maximum allowed region end address of region.
+ * @minalign: Required alignment for base address.
  *
  * Requests the specified I/O port region which must start at a non-zero
- * address.
+ * address, must fall within specified bounds, and must be correctly aligned.
  *
  * On success, @dev->iobase is set to the base address of the region and
  * @dev->iolen is set to its length.
@@ -979,12 +1000,15 @@ EXPORT_SYMBOL_GPL(__comedi_request_region);
  * Returns 0 on success, -EINVAL if @start is 0, or -EIO if the request
  * fails.
  */
-int comedi_request_region(struct comedi_device *dev,
-			  unsigned long start, unsigned long len)
+int comedi_check_request_region(struct comedi_device *dev,
+				unsigned long start, unsigned long len,
+				unsigned long minstart, unsigned long maxend,
+				unsigned long minalign)
 {
 	int ret;
 
-	ret = __comedi_request_region(dev, start, len);
+	ret = __comedi_check_request_region(dev, start, len, minstart, maxend,
+					    minalign);
 	if (ret == 0) {
 		dev->iobase = start;
 		dev->iolen = len;
@@ -992,7 +1016,7 @@ int comedi_request_region(struct comedi_device *dev,
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(comedi_request_region);
+EXPORT_SYMBOL_GPL(comedi_check_request_region);
 
 /**
  * comedi_legacy_detach() - A generic (*detach) function for legacy drivers
diff --git a/include/linux/comedi/comedidev.h b/include/linux/comedi/comedidev.h
index 35fdc41845ce..577a08f37aee 100644
--- a/include/linux/comedi/comedidev.h
+++ b/include/linux/comedi/comedidev.h
@@ -1026,10 +1026,55 @@ int comedi_load_firmware(struct comedi_device *dev, struct device *hw_dev,
 				   unsigned long context),
 			 unsigned long context);
 
-int __comedi_request_region(struct comedi_device *dev,
-			    unsigned long start, unsigned long len);
-int comedi_request_region(struct comedi_device *dev,
-			  unsigned long start, unsigned long len);
+int __comedi_check_request_region(struct comedi_device *dev,
+				  unsigned long start, unsigned long len,
+				  unsigned long minstart, unsigned long maxend,
+				  unsigned long minalign);
+int comedi_check_request_region(struct comedi_device *dev,
+				unsigned long start, unsigned long len,
+				unsigned long minstart, unsigned long maxend,
+				unsigned long minalign);
+
+/**
+ * __comedi_request_region() - Request an I/O region for a legacy driver
+ * @dev: COMEDI device.
+ * @start: Base address of the I/O region.
+ * @len: Length of the I/O region.
+ *
+ * Requests the specified I/O port region which must start at a non-zero
+ * address.
+ *
+ * Returns 0 on success, -EINVAL if @start is 0, or -EIO if the request
+ * fails.
+ */
+static inline int __comedi_request_region(struct comedi_device *dev,
+					  unsigned long start,
+					  unsigned long len)
+{
+	return __comedi_check_request_region(dev, start, len, 0, ~0ul, 1);
+}
+
+/**
+ * comedi_request_region() - Request an I/O region for a legacy driver
+ * @dev: COMEDI device.
+ * @start: Base address of the I/O region.
+ * @len: Length of the I/O region.
+ *
+ * Requests the specified I/O port region which must start at a non-zero
+ * address.
+ *
+ * On success, @dev->iobase is set to the base address of the region and
+ * @dev->iolen is set to its length.
+ *
+ * Returns 0 on success, -EINVAL if @start is 0, or -EIO if the request
+ * fails.
+ */
+static inline int comedi_request_region(struct comedi_device *dev,
+					unsigned long start, unsigned long len)
+{
+	return comedi_check_request_region(dev, start, len, 0, ~0ul, 1);
+}
+
 void comedi_legacy_detach(struct comedi_device *dev);
 
 int comedi_auto_config(struct device *hardware_device,
-- 
cgit v1.2.3


From 6c561848ee5246f083770aaf39b2666f940d60dd Mon Sep 17 00:00:00 2001
From: Rosen Penev <rosenp@gmail.com>
Date: Wed, 11 Mar 2026 16:24:59 -0700
Subject: comedi: isadma: use kzalloc_flex

Switched struct pointer member to a flexible array member to get rid of
kzalloc_objs as there's no need for them to be separately allocated.

AAdded __counted_by for extra runtime analysis.

Signed-off-by: Rosen Penev <rosenp@gmail.com>
Link: https://patch.msgid.link/20260311232459.18407-1-rosenp@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/comedi/drivers/comedi_isadma.c | 21 +++++++--------------
 include/linux/comedi/comedi_isadma.h   |  2 +-
 2 files changed, 8 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/comedi/drivers/comedi_isadma.c b/drivers/comedi/drivers/comedi_isadma.c
index f480640413f0..b346079b9b6b 100644
--- a/drivers/comedi/drivers/comedi_isadma.c
+++ b/drivers/comedi/drivers/comedi_isadma.c
@@ -161,14 +161,10 @@ struct comedi_isadma *comedi_isadma_alloc(struct comedi_device *dev,
 	if (n_desc < 1 || n_desc > 2)
 		goto no_dma;
 
-	dma = kzalloc_obj(*dma);
+	dma = kzalloc_flex(*dma, desc, n_desc);
 	if (!dma)
 		goto no_dma;
 
-	desc = kzalloc_objs(*desc, n_desc);
-	if (!desc)
-		goto no_dma;
-	dma->desc = desc;
 	dma->n_desc = n_desc;
 	if (dev->hw_dev) {
 		dma->dev = dev->hw_dev;
@@ -231,15 +227,12 @@ void comedi_isadma_free(struct comedi_isadma *dma)
 	if (!dma)
 		return;
 
-	if (dma->desc) {
-		for (i = 0; i < dma->n_desc; i++) {
-			desc = &dma->desc[i];
-			if (desc->virt_addr)
-				dma_free_coherent(dma->dev, desc->maxsize,
-						  desc->virt_addr,
-						  desc->hw_addr);
-		}
-		kfree(dma->desc);
+	for (i = 0; i < dma->n_desc; i++) {
+		desc = &dma->desc[i];
+		if (desc->virt_addr)
+			dma_free_coherent(dma->dev, desc->maxsize,
+					  desc->virt_addr,
+					  desc->hw_addr);
 	}
 	if (dma->chan2 && dma->chan2 != dma->chan)
 		free_dma(dma->chan2);
diff --git a/include/linux/comedi/comedi_isadma.h b/include/linux/comedi/comedi_isadma.h
index 9d2b12db7e6e..7514ce222fa6 100644
--- a/include/linux/comedi/comedi_isadma.h
+++ b/include/linux/comedi/comedi_isadma.h
@@ -48,11 +48,11 @@ struct comedi_isadma_desc {
  */
 struct comedi_isadma {
 	struct device *dev;
-	struct comedi_isadma_desc *desc;
 	int n_desc;
 	int cur_dma;
 	unsigned int chan;
 	unsigned int chan2;
+	struct comedi_isadma_desc desc[] __counted_by(n_desc);
 };
 
 #if IS_ENABLED(CONFIG_ISA_DMA_API)
-- 
cgit v1.2.3


From 25e531b422dc2ac90cdae3b6e74b5cdeb081440d Mon Sep 17 00:00:00 2001
From: Michal Pecio <michal.pecio@gmail.com>
Date: Thu, 2 Apr 2026 16:13:42 +0300
Subject: usb: xhci: Make usb_host_endpoint.hcpriv survive endpoint_disable()

xHCI hardware maintains its endpoint state between add_endpoint()
and drop_endpoint() calls followed by successful check_bandwidth().
So does the driver.

Core may call endpoint_disable() during xHCI endpoint life, so don't
clear host_ep->hcpriv then, because this breaks endpoint_reset().

If a driver calls usb_set_interface(), submits URBs which make host
sequence state non-zero and calls usb_clear_halt(), the device clears
its sequence state but xhci_endpoint_reset() bails out. The next URB
malfunctions: USB2 loses one packet, USB3 gets Transaction Error or
may not complete at all on some (buggy?) HCs from ASMedia and AMD.
This is triggered by uvcvideo on bulk video devices.

The code was copied from ehci_endpoint_disable() but it isn't needed
here - hcpriv should only be NULL on emulated root hub endpoints.
It might prevent resetting and inadvertently enabling a disabled and
dropped endpoint, but core shouldn't try to reset dropped endpoints.

Document xhci requirements regarding hcpriv. They are currently met.

Fixes: 18b74067ac78 ("xhci: Fix use-after-free regression in xhci clear hub TT implementation")
Cc: stable@vger.kernel.org
Signed-off-by: Michal Pecio <michal.pecio@gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://patch.msgid.link/20260402131342.2628648-26-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci.c | 1 -
 include/linux/usb.h     | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 6d27c471d4da..a54f5b57f205 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -3292,7 +3292,6 @@ rescan:
 		xhci_dbg(xhci, "endpoint disable with ep_state 0x%x\n",
 			 ep->ep_state);
 done:
-	host_ep->hcpriv = NULL;
 	spin_unlock_irqrestore(&xhci->lock, flags);
 }
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 815f2212936e..779bbfdfa0c7 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -54,7 +54,8 @@ struct ep_device;
  * @eusb2_isoc_ep_comp: eUSB2 isoc companion descriptor for this endpoint
  * @urb_list: urbs queued to this endpoint; maintained by usbcore
  * @hcpriv: for use by HCD; typically holds hardware dma queue head (QH)
- *	with one or more transfer descriptors (TDs) per urb
+ *	with one or more transfer descriptors (TDs) per urb; must be preserved
+ *	by core while BW is allocated for the endpoint
  * @ep_dev: ep_device for sysfs info
  * @extra: descriptors following this endpoint in the configuration
  * @extralen: how many bytes of "extra" are valid
-- 
cgit v1.2.3


From 2d7ce8eb59ec880774c7500ac949f0100acba521 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 25 Feb 2026 21:12:07 -0800
Subject: misc: apds990x: fix all kernel-doc warnings

Move a #define so that it is not between kernel-doc and its struct
declaration.
Spell one struct member correctly.

Warning: include/linux/platform_data/apds990x.h:33 #define
 APDS_PARAM_SCALE 4096; error: Cannot parse struct or union!
Warning: include/linux/platform_data/apds990x.h:62 struct member
 'pdrive' not described in 'apds990x_platform_data'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260226051207.547152-1-rdunlap@infradead.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/platform_data/apds990x.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/apds990x.h b/include/linux/platform_data/apds990x.h
index 94dfbaa365e1..37684f68c04f 100644
--- a/include/linux/platform_data/apds990x.h
+++ b/include/linux/platform_data/apds990x.h
@@ -31,7 +31,6 @@
  * itself. If the GA is zero, driver will use uncovered sensor default values
  * format: decimal value * APDS_PARAM_SCALE except df which is plain integer.
  */
-#define APDS_PARAM_SCALE 4096
 struct apds990x_chip_factors {
 	int ga;
 	int cf1;
@@ -40,11 +39,12 @@ struct apds990x_chip_factors {
 	int irf2;
 	int df;
 };
+#define APDS_PARAM_SCALE 4096
 
 /**
  * struct apds990x_platform_data - platform data for apsd990x.c driver
  * @cf: chip factor data
- * @pddrive: IR-led driving current
+ * @pdrive: IR-led driving current
  * @ppcount: number of IR pulses used for proximity estimation
  * @setup_resources: interrupt line setup call back function
  * @release_resources: interrupt line release call back function
-- 
cgit v1.2.3


From c03791085adcd61fa9b766ab303c7d0941d7378d Mon Sep 17 00:00:00 2001
From: K Prateek Nayak <kprateek.nayak@amd.com>
Date: Mon, 16 Mar 2026 08:18:49 +0000
Subject: cpufreq: Pass the policy to cpufreq_driver->adjust_perf()

cpufreq_cpu_get() can sleep on PREEMPT_RT in presence of concurrent
writer(s), however amd-pstate depends on fetching the cpudata via the
policy's driver data which necessitates grabbing the reference.

Since schedutil governor can call "cpufreq_driver->update_perf()"
during sched_tick/enqueue/dequeue with rq_lock held and IRQs disabled,
fetching the policy object using the cpufreq_cpu_get() helper in the
scheduler fast-path leads to "BUG: scheduling while atomic" on
PREEMPT_RT [1].

Pass the cached cpufreq policy object in sg_policy to the update_perf()
instead of just the CPU. The CPU can be inferred using "policy->cpu".

The lifetime of cpufreq_policy object outlasts that of the governor and
the cpufreq driver (allocated when the CPU is onlined and only reclaimed
when the CPU is offlined / the CPU device is removed) which makes it
safe to be referenced throughout the governor's lifetime.

Closes:https://lore.kernel.org/all/20250731092316.3191-1-spasswolf@web.de/ [1]

Fixes: 1d215f0319c2 ("cpufreq: amd-pstate: Add fast switch function for AMD P-State")
Reported-by: Bert Karwatzki <spasswolf@web.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Acked-by: Gary Guo <gary@garyguo.net> # Rust
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Zhongqiu Han <zhongqiu.han@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20260316081849.19368-3-kprateek.nayak@amd.com
Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
---
 drivers/cpufreq/amd-pstate.c     |  3 +--
 drivers/cpufreq/cpufreq.c        |  6 +++---
 drivers/cpufreq/intel_pstate.c   |  4 ++--
 include/linux/cpufreq.h          |  4 ++--
 kernel/sched/cpufreq_schedutil.c |  5 +++--
 rust/kernel/cpufreq.rs           | 13 ++++++-------
 6 files changed, 17 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 2ea4d27fe020..c825fab0bf5c 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -788,13 +788,12 @@ static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy,
 	return policy->cur;
 }
 
-static void amd_pstate_adjust_perf(unsigned int cpu,
+static void amd_pstate_adjust_perf(struct cpufreq_policy *policy,
 				   unsigned long _min_perf,
 				   unsigned long target_perf,
 				   unsigned long capacity)
 {
 	u8 max_perf, min_perf, des_perf, cap_perf;
-	struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
 	struct amd_cpudata *cpudata;
 	union perf_cached perf;
 
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 277884d91913..90e939069cde 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2231,7 +2231,7 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
 
 /**
  * cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
- * @cpu: Target CPU.
+ * @policy: cpufreq policy object of the target CPU.
  * @min_perf: Minimum (required) performance level (units of @capacity).
  * @target_perf: Target (desired) performance level (units of @capacity).
  * @capacity: Capacity of the target CPU.
@@ -2250,12 +2250,12 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
  * parallel with either ->target() or ->target_index() or ->fast_switch() for
  * the same CPU.
  */
-void cpufreq_driver_adjust_perf(unsigned int cpu,
+void cpufreq_driver_adjust_perf(struct cpufreq_policy *policy,
 				 unsigned long min_perf,
 				 unsigned long target_perf,
 				 unsigned long capacity)
 {
-	cpufreq_driver->adjust_perf(cpu, min_perf, target_perf, capacity);
+	cpufreq_driver->adjust_perf(policy, min_perf, target_perf, capacity);
 }
 
 /**
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 11c58af41900..0f50034e4b68 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -3239,12 +3239,12 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
 	return target_pstate * cpu->pstate.scaling;
 }
 
-static void intel_cpufreq_adjust_perf(unsigned int cpunum,
+static void intel_cpufreq_adjust_perf(struct cpufreq_policy *policy,
 				      unsigned long min_perf,
 				      unsigned long target_perf,
 				      unsigned long capacity)
 {
-	struct cpudata *cpu = all_cpu_data[cpunum];
+	struct cpudata *cpu = all_cpu_data[policy->cpu];
 	u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached);
 	int old_pstate = cpu->pstate.current_pstate;
 	int cap_pstate, min_pstate, max_pstate, target_pstate;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index cc894fc38971..4317c5a312bd 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -372,7 +372,7 @@ struct cpufreq_driver {
 	 * conditions) scale invariance can be disabled, which causes the
 	 * schedutil governor to fall back to the latter.
 	 */
-	void		(*adjust_perf)(unsigned int cpu,
+	void		(*adjust_perf)(struct cpufreq_policy *policy,
 				       unsigned long min_perf,
 				       unsigned long target_perf,
 				       unsigned long capacity);
@@ -617,7 +617,7 @@ struct cpufreq_governor {
 /* Pass a target to the cpufreq driver */
 unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
 					unsigned int target_freq);
-void cpufreq_driver_adjust_perf(unsigned int cpu,
+void cpufreq_driver_adjust_perf(struct cpufreq_policy *policy,
 				unsigned long min_perf,
 				unsigned long target_perf,
 				unsigned long capacity);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 153232dd8276..ae9fd211cec1 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -461,6 +461,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
 				     unsigned int flags)
 {
 	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
+	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	unsigned long prev_util = sg_cpu->util;
 	unsigned long max_cap;
 
@@ -482,10 +483,10 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
 	if (sugov_hold_freq(sg_cpu) && sg_cpu->util < prev_util)
 		sg_cpu->util = prev_util;
 
-	cpufreq_driver_adjust_perf(sg_cpu->cpu, sg_cpu->bw_min,
+	cpufreq_driver_adjust_perf(sg_policy->policy, sg_cpu->bw_min,
 				   sg_cpu->util, max_cap);
 
-	sg_cpu->sg_policy->last_freq_update_time = time;
+	sg_policy->last_freq_update_time = time;
 }
 
 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs
index f5adee48d40c..d8d26870bea2 100644
--- a/rust/kernel/cpufreq.rs
+++ b/rust/kernel/cpufreq.rs
@@ -1257,18 +1257,17 @@ impl<T: Driver> Registration<T> {
     /// # Safety
     ///
     /// - This function may only be called from the cpufreq C infrastructure.
+    /// - The pointer arguments must be valid pointers.
     unsafe extern "C" fn adjust_perf_callback(
-        cpu: c_uint,
+        ptr: *mut bindings::cpufreq_policy,
         min_perf: c_ulong,
         target_perf: c_ulong,
         capacity: c_ulong,
     ) {
-        // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number.
-        let cpu_id = unsafe { CpuId::from_u32_unchecked(cpu) };
-
-        if let Ok(mut policy) = PolicyCpu::from_cpu(cpu_id) {
-            T::adjust_perf(&mut policy, min_perf, target_perf, capacity);
-        }
+        // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
+        // lifetime of `policy`.
+        let policy = unsafe { Policy::from_raw_mut(ptr) };
+        T::adjust_perf(policy, min_perf, target_perf, capacity);
     }
 
     /// Driver's `get_intermediate` callback.
-- 
cgit v1.2.3


From e417ac73d24ae68b8dd6a1b02f9db03a7a5c184b Mon Sep 17 00:00:00 2001
From: Nicolai Buchwitz <nb@tipi-net.de>
Date: Wed, 1 Apr 2026 14:38:44 +0200
Subject: net: phy: microchip: add downshift tunable support for LAN88xx

Implement the standard ETHTOOL_PHY_DOWNSHIFT tunable for the LAN88xx
PHY. This allows runtime configuration of the auto-downshift feature
via ethtool:

  ethtool --set-phy-tunable eth0 downshift on count 3

The LAN88xx PHY supports downshifting from 1000BASE-T to 100BASE-TX
after 2-5 failed auto-negotiation attempts. Valid count values are
2, 3, 4 and 5.

This is based on an earlier downstream implementation by Phil Elwell.

Signed-off-by: Nicolai Buchwitz <nb@tipi-net.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/20260401123848.696766-2-nb@tipi-net.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/microchip.c  | 64 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/microchipphy.h |  5 ++++
 2 files changed, 69 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
index dc8634e7bcbe..bc293d2dd130 100644
--- a/drivers/net/phy/microchip.c
+++ b/drivers/net/phy/microchip.c
@@ -2,6 +2,7 @@
 /*
  * Copyright (C) 2015 Microchip Technology
  */
+#include <linux/bitfield.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mii.h>
@@ -193,6 +194,67 @@ static void lan88xx_config_TR_regs(struct phy_device *phydev)
 		phydev_warn(phydev, "Failed to Set Register[0x1686]\n");
 }
 
+static int lan88xx_get_downshift(struct phy_device *phydev, u8 *data)
+{
+	int val;
+
+	val = phy_read_paged(phydev, 1, LAN78XX_PHY_CTRL3);
+	if (val < 0)
+		return val;
+
+	if (!(val & LAN78XX_PHY_CTRL3_AUTO_DOWNSHIFT)) {
+		*data = DOWNSHIFT_DEV_DISABLE;
+		return 0;
+	}
+
+	*data = FIELD_GET(LAN78XX_PHY_CTRL3_DOWNSHIFT_CTRL_MASK, val) + 2;
+
+	return 0;
+}
+
+static int lan88xx_set_downshift(struct phy_device *phydev, u8 cnt)
+{
+	u32 mask = LAN78XX_PHY_CTRL3_DOWNSHIFT_CTRL_MASK |
+		   LAN78XX_PHY_CTRL3_AUTO_DOWNSHIFT;
+
+	if (cnt == DOWNSHIFT_DEV_DISABLE)
+		return phy_modify_paged(phydev, 1, LAN78XX_PHY_CTRL3,
+					LAN78XX_PHY_CTRL3_AUTO_DOWNSHIFT, 0);
+
+	if (cnt == DOWNSHIFT_DEV_DEFAULT_COUNT)
+		cnt = 2;
+
+	if (cnt < 2 || cnt > 5)
+		return -EINVAL;
+
+	return phy_modify_paged(phydev, 1, LAN78XX_PHY_CTRL3, mask,
+				FIELD_PREP(LAN78XX_PHY_CTRL3_DOWNSHIFT_CTRL_MASK,
+					   cnt - 2) |
+				LAN78XX_PHY_CTRL3_AUTO_DOWNSHIFT);
+}
+
+static int lan88xx_get_tunable(struct phy_device *phydev,
+			       struct ethtool_tunable *tuna, void *data)
+{
+	switch (tuna->id) {
+	case ETHTOOL_PHY_DOWNSHIFT:
+		return lan88xx_get_downshift(phydev, data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int lan88xx_set_tunable(struct phy_device *phydev,
+			       struct ethtool_tunable *tuna, const void *data)
+{
+	switch (tuna->id) {
+	case ETHTOOL_PHY_DOWNSHIFT:
+		return lan88xx_set_downshift(phydev, *(const u8 *)data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int lan88xx_probe(struct phy_device *phydev)
 {
 	struct device *dev = &phydev->mdio.dev;
@@ -499,6 +561,8 @@ static struct phy_driver microchip_phy_driver[] = {
 	.set_wol	= lan88xx_set_wol,
 	.read_page	= lan88xx_read_page,
 	.write_page	= lan88xx_write_page,
+	.get_tunable	= lan88xx_get_tunable,
+	.set_tunable	= lan88xx_set_tunable,
 },
 {
 	PHY_ID_MATCH_MODEL(PHY_ID_LAN937X_TX),
diff --git a/include/linux/microchipphy.h b/include/linux/microchipphy.h
index 517288da19fd..7da956c666a0 100644
--- a/include/linux/microchipphy.h
+++ b/include/linux/microchipphy.h
@@ -61,6 +61,11 @@
 /* Registers specific to the LAN7800/LAN7850 embedded phy */
 #define LAN78XX_PHY_LED_MODE_SELECT		(0x1D)
 
+/* PHY Control 3 register (page 1) */
+#define LAN78XX_PHY_CTRL3			(0x14)
+#define  LAN78XX_PHY_CTRL3_AUTO_DOWNSHIFT	BIT(4)
+#define  LAN78XX_PHY_CTRL3_DOWNSHIFT_CTRL_MASK	GENMASK(3, 2)
+
 /* DSP registers */
 #define PHY_ARDENNES_MMD_DEV_3_PHY_CFG		(0x806A)
 #define PHY_ARDENNES_MMD_DEV_3_PHY_CFG_ZD_DLY_EN_	(0x2000)
-- 
cgit v1.2.3


From ec1d77cb0ee98249142dcd0376d76e7a48ba0b31 Mon Sep 17 00:00:00 2001
From: Paul Chaignon <paul.chaignon@gmail.com>
Date: Thu, 2 Apr 2026 17:09:15 +0200
Subject: bpf: Use bpf_verifier_env buffers for reg_set_min_max

In a subsequent patch, the regs_refine_cond_op and reg_bounds_sync
functions will be called in is_branch_taken instead of reg_set_min_max,
to simulate each branch's outcome. Since they will run before we branch
out, these two functions will need to work on temporary registers for
the two branches.

This refactoring patch prepares for that change, by introducing the
temporary registers on bpf_verifier_env and using them in
reg_set_min_max.

This change also allows us to save one fake_reg slot as we don't need to
allocate an additional temporary buffer in case of a BPF_K condition.

Finally, you may notice that this patch removes the check for
"false_reg1 == false_reg2" in reg_set_min_max. That check was introduced
in commit d43ad9da8052 ("bpf: Skip bounds adjustment for conditional
jumps on same scalar register") to avoid an invariant violation. Given
that "env->false_reg1 == env->false_reg2" doesn't make sense and
invariant violations are addressed in a subsequent commit, this patch
just removes the check.

Suggested-by: Eduard Zingerman <eddyz87@gmail.com>
Co-developed-by: Harishankar Vishwanathan <harishankar.vishwanathan@gmail.com>
Signed-off-by: Harishankar Vishwanathan <harishankar.vishwanathan@gmail.com>
Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/260b0270052944a420e1c56e6a92df4d43cadf03.1775142354.git.paul.chaignon@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |  4 ++-
 kernel/bpf/verifier.c        | 64 ++++++++++++++++----------------------------
 2 files changed, 26 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 090aa26d1c98..b129e0aaee20 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -837,7 +837,9 @@ struct bpf_verifier_env {
 	u64 scratched_stack_slots;
 	u64 prev_log_pos, prev_insn_print_pos;
 	/* buffer used to temporary hold constants as scalar registers */
-	struct bpf_reg_state fake_reg[2];
+	struct bpf_reg_state fake_reg[1];
+	/* buffers used to save updated reg states while simulating branches */
+	struct bpf_reg_state true_reg1, true_reg2, false_reg1, false_reg2;
 	/* buffer used to generate temporary string representations,
 	 * e.g., in reg_type_str() to generate reg_type string
 	 */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 760cd137bf3f..15defae1d7ed 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -17253,10 +17253,6 @@ static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state
  * but we don't support that right now.
  */
 static int reg_set_min_max(struct bpf_verifier_env *env,
-			   struct bpf_reg_state *true_reg1,
-			   struct bpf_reg_state *true_reg2,
-			   struct bpf_reg_state *false_reg1,
-			   struct bpf_reg_state *false_reg2,
 			   u8 opcode, bool is_jmp32)
 {
 	int err;
@@ -17265,30 +17261,23 @@ static int reg_set_min_max(struct bpf_verifier_env *env,
 	 * variable offset from the compare (unless they were a pointer into
 	 * the same object, but we don't bother with that).
 	 */
-	if (false_reg1->type != SCALAR_VALUE || false_reg2->type != SCALAR_VALUE)
-		return 0;
-
-	/* We compute branch direction for same SCALAR_VALUE registers in
-	 * is_scalar_branch_taken(). For unknown branch directions (e.g., BPF_JSET)
-	 * on the same registers, we don't need to adjust the min/max values.
-	 */
-	if (false_reg1 == false_reg2)
+	if (env->false_reg1.type != SCALAR_VALUE || env->false_reg2.type != SCALAR_VALUE)
 		return 0;
 
 	/* fallthrough (FALSE) branch */
-	regs_refine_cond_op(false_reg1, false_reg2, rev_opcode(opcode), is_jmp32);
-	reg_bounds_sync(false_reg1);
-	reg_bounds_sync(false_reg2);
+	regs_refine_cond_op(&env->false_reg1, &env->false_reg2, rev_opcode(opcode), is_jmp32);
+	reg_bounds_sync(&env->false_reg1);
+	reg_bounds_sync(&env->false_reg2);
 
 	/* jump (TRUE) branch */
-	regs_refine_cond_op(true_reg1, true_reg2, opcode, is_jmp32);
-	reg_bounds_sync(true_reg1);
-	reg_bounds_sync(true_reg2);
-
-	err = reg_bounds_sanity_check(env, true_reg1, "true_reg1");
-	err = err ?: reg_bounds_sanity_check(env, true_reg2, "true_reg2");
-	err = err ?: reg_bounds_sanity_check(env, false_reg1, "false_reg1");
-	err = err ?: reg_bounds_sanity_check(env, false_reg2, "false_reg2");
+	regs_refine_cond_op(&env->true_reg1, &env->true_reg2, opcode, is_jmp32);
+	reg_bounds_sync(&env->true_reg1);
+	reg_bounds_sync(&env->true_reg2);
+
+	err = reg_bounds_sanity_check(env, &env->true_reg1, "true_reg1");
+	err = err ?: reg_bounds_sanity_check(env, &env->true_reg2, "true_reg2");
+	err = err ?: reg_bounds_sanity_check(env, &env->false_reg1, "false_reg1");
+	err = err ?: reg_bounds_sanity_check(env, &env->false_reg2, "false_reg2");
 	return err;
 }
 
@@ -17672,6 +17661,10 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	}
 
 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
+	copy_register_state(&env->false_reg1, dst_reg);
+	copy_register_state(&env->false_reg2, src_reg);
+	copy_register_state(&env->true_reg1, dst_reg);
+	copy_register_state(&env->true_reg2, src_reg);
 	pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32);
 	if (pred >= 0) {
 		/* If we get here with a dst_reg pointer type it is because
@@ -17736,27 +17729,16 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 		return PTR_ERR(other_branch);
 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
 
-	if (BPF_SRC(insn->code) == BPF_X) {
-		err = reg_set_min_max(env,
-				      &other_branch_regs[insn->dst_reg],
-				      &other_branch_regs[insn->src_reg],
-				      dst_reg, src_reg, opcode, is_jmp32);
-	} else /* BPF_SRC(insn->code) == BPF_K */ {
-		/* reg_set_min_max() can mangle the fake_reg. Make a copy
-		 * so that these are two different memory locations. The
-		 * src_reg is not used beyond here in context of K.
-		 */
-		memcpy(&env->fake_reg[1], &env->fake_reg[0],
-		       sizeof(env->fake_reg[0]));
-		err = reg_set_min_max(env,
-				      &other_branch_regs[insn->dst_reg],
-				      &env->fake_reg[0],
-				      dst_reg, &env->fake_reg[1],
-				      opcode, is_jmp32);
-	}
+	err = reg_set_min_max(env, opcode, is_jmp32);
 	if (err)
 		return err;
 
+	copy_register_state(dst_reg, &env->false_reg1);
+	copy_register_state(src_reg, &env->false_reg2);
+	copy_register_state(&other_branch_regs[insn->dst_reg], &env->true_reg1);
+	if (BPF_SRC(insn->code) == BPF_X)
+		copy_register_state(&other_branch_regs[insn->src_reg], &env->true_reg2);
+
 	if (BPF_SRC(insn->code) == BPF_X &&
 	    src_reg->type == SCALAR_VALUE && src_reg->id &&
 	    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
-- 
cgit v1.2.3


From 54e20be48fd4bc1df5f6fbca552b5be8c47dbd18 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 27 Mar 2026 07:16:40 +0100
Subject: xor: split xor.h

Keep xor.h for the public API, and split the struct xor_block_template
definition that is only needed by the xor.ko core and
architecture-specific optimizations into a separate xor_impl.h header.

Link: https://lkml.kernel.org/r/20260327061704.3707577-9-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Biggers <ebiggers@kernel.org>
Tested-by: Eric Biggers <ebiggers@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Mason <clm@fb.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: David Sterba <dsterba@suse.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason A. Donenfeld <jason@zx2c4.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Li Nan <linan122@huawei.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Magnus Lindholm <linmag7@gmail.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Song Liu <song@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arm/lib/xor-neon.c       |  1 +
 arch/arm64/lib/xor-neon.c     |  1 +
 arch/s390/lib/xor.c           |  2 +-
 include/linux/raid/xor.h      | 22 +---------------------
 include/linux/raid/xor_impl.h | 25 +++++++++++++++++++++++++
 lib/raid/xor/xor-core.c       |  1 +
 6 files changed, 30 insertions(+), 22 deletions(-)
 create mode 100644 include/linux/raid/xor_impl.h

(limited to 'include/linux')

diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c
index cf57fca97908..282980b9bf2a 100644
--- a/arch/arm/lib/xor-neon.c
+++ b/arch/arm/lib/xor-neon.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/raid/xor.h>
+#include <linux/raid/xor_impl.h>
 #include <linux/module.h>
 
 MODULE_DESCRIPTION("NEON accelerated XOR implementation");
diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c
index 8fffebfa17b2..351aba92d932 100644
--- a/arch/arm64/lib/xor-neon.c
+++ b/arch/arm64/lib/xor-neon.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/raid/xor.h>
+#include <linux/raid/xor_impl.h>
 #include <linux/module.h>
 #include <asm/neon-intrinsics.h>
 
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
index 5363e4c2462d..3bbe21b40e66 100644
--- a/arch/s390/lib/xor.c
+++ b/arch/s390/lib/xor.c
@@ -8,7 +8,7 @@
 
 #include <linux/types.h>
 #include <linux/export.h>
-#include <linux/raid/xor.h>
+#include <linux/raid/xor_impl.h>
 #include <asm/xor.h>
 
 static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h
index 51b811b62322..02bda8d99534 100644
--- a/include/linux/raid/xor.h
+++ b/include/linux/raid/xor.h
@@ -7,24 +7,4 @@
 extern void xor_blocks(unsigned int count, unsigned int bytes,
 	void *dest, void **srcs);
 
-struct xor_block_template {
-        struct xor_block_template *next;
-        const char *name;
-        int speed;
-	void (*do_2)(unsigned long, unsigned long * __restrict,
-		     const unsigned long * __restrict);
-	void (*do_3)(unsigned long, unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict);
-	void (*do_4)(unsigned long, unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict);
-	void (*do_5)(unsigned long, unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict);
-};
-
-#endif
+#endif /* _XOR_H */
diff --git a/include/linux/raid/xor_impl.h b/include/linux/raid/xor_impl.h
new file mode 100644
index 000000000000..a1890cd66812
--- /dev/null
+++ b/include/linux/raid/xor_impl.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _XOR_IMPL_H
+#define _XOR_IMPL_H
+
+struct xor_block_template {
+	struct xor_block_template *next;
+	const char *name;
+	int speed;
+	void (*do_2)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict);
+	void (*do_3)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict);
+	void (*do_4)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict);
+	void (*do_5)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict);
+};
+
+#endif /* _XOR_IMPL_H */
diff --git a/lib/raid/xor/xor-core.c b/lib/raid/xor/xor-core.c
index e6e593e404fb..db1824011a12 100644
--- a/lib/raid/xor/xor-core.c
+++ b/lib/raid/xor/xor-core.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/gfp.h>
 #include <linux/raid/xor.h>
+#include <linux/raid/xor_impl.h>
 #include <linux/jiffies.h>
 #include <linux/preempt.h>
 #include <asm/xor.h>
-- 
cgit v1.2.3


From 35ebc4de105989034f1250e40eb6dbf5e136b04e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 27 Mar 2026 07:16:41 +0100
Subject: xor: remove macro abuse for XOR implementation registrations

Drop the pretty confusing historic XOR_TRY_TEMPLATES and
XOR_SELECT_TEMPLATE, and instead let the architectures provide a
arch_xor_init that calls either xor_register to register candidates or
xor_force to force a specific implementation.

Link: https://lkml.kernel.org/r/20260327061704.3707577-10-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Biggers <ebiggers@kernel.org>
Tested-by: Eric Biggers <ebiggers@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Mason <clm@fb.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: David Sterba <dsterba@suse.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason A. Donenfeld <jason@zx2c4.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Li Nan <linan122@huawei.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Magnus Lindholm <linmag7@gmail.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Song Liu <song@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/alpha/include/asm/xor.h     | 29 ++++++++++++++-------------
 arch/arm/include/asm/xor.h       | 25 ++++++++++++------------
 arch/arm64/include/asm/xor.h     | 18 ++++++++---------
 arch/loongarch/include/asm/xor.h | 42 +++++++++++++++-------------------------
 arch/powerpc/include/asm/xor.h   | 31 +++++++++++++----------------
 arch/riscv/include/asm/xor.h     | 19 +++++++++---------
 arch/s390/include/asm/xor.h      | 12 +++++-------
 arch/sparc/include/asm/xor_32.h  | 14 +++++++-------
 arch/sparc/include/asm/xor_64.h  | 31 ++++++++++++++---------------
 arch/x86/include/asm/xor.h       |  3 ---
 arch/x86/include/asm/xor_32.h    | 36 ++++++++++++++++++----------------
 arch/x86/include/asm/xor_64.h    | 18 ++++++++++-------
 arch/x86/include/asm/xor_avx.h   |  9 ---------
 include/asm-generic/xor.h        |  8 --------
 include/linux/raid/xor_impl.h    |  5 +++++
 lib/raid/xor/xor-core.c          | 41 +++++++++++++++++++++++++++++----------
 16 files changed, 168 insertions(+), 173 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/xor.h b/arch/alpha/include/asm/xor.h
index e0de0c233ab9..4c8085711df1 100644
--- a/arch/alpha/include/asm/xor.h
+++ b/arch/alpha/include/asm/xor.h
@@ -851,16 +851,19 @@ static struct xor_block_template xor_block_alpha_prefetch = {
 /* For grins, also test the generic routines.  */
 #include <asm-generic/xor.h>
 
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES				\
-	do {						\
-		xor_speed(&xor_block_8regs);		\
-		xor_speed(&xor_block_32regs);		\
-		xor_speed(&xor_block_alpha);		\
-		xor_speed(&xor_block_alpha_prefetch);	\
-	} while (0)
-
-/* Force the use of alpha_prefetch if EV6, as it is significantly
-   faster in the cold cache case.  */
-#define XOR_SELECT_TEMPLATE(FASTEST) \
-	(implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST)
+/*
+ * Force the use of alpha_prefetch if EV6, as it is significantly faster in the
+ * cold cache case.
+ */
+#define arch_xor_init arch_xor_init
+static __always_inline void __init arch_xor_init(void)
+{
+	if (implver() == IMPLVER_EV6) {
+		xor_force(&xor_block_alpha_prefetch);
+	} else {
+		xor_register(&xor_block_8regs);
+		xor_register(&xor_block_32regs);
+		xor_register(&xor_block_alpha);
+		xor_register(&xor_block_alpha_prefetch);
+	}
+}
diff --git a/arch/arm/include/asm/xor.h b/arch/arm/include/asm/xor.h
index bca2a6514746..b2dcd49186e2 100644
--- a/arch/arm/include/asm/xor.h
+++ b/arch/arm/include/asm/xor.h
@@ -138,15 +138,6 @@ static struct xor_block_template xor_block_arm4regs = {
 	.do_5	= xor_arm4regs_5,
 };
 
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES			\
-	do {					\
-		xor_speed(&xor_block_arm4regs);	\
-		xor_speed(&xor_block_8regs);	\
-		xor_speed(&xor_block_32regs);	\
-		NEON_TEMPLATES;			\
-	} while (0)
-
 #ifdef CONFIG_KERNEL_MODE_NEON
 
 extern struct xor_block_template const xor_block_neon_inner;
@@ -201,8 +192,16 @@ static struct xor_block_template xor_block_neon = {
 	.do_5	= xor_neon_5
 };
 
-#define NEON_TEMPLATES	\
-	do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0)
-#else
-#define NEON_TEMPLATES
+#endif /* CONFIG_KERNEL_MODE_NEON */
+
+#define arch_xor_init arch_xor_init
+static __always_inline void __init arch_xor_init(void)
+{
+	xor_register(&xor_block_arm4regs);
+	xor_register(&xor_block_8regs);
+	xor_register(&xor_block_32regs);
+#ifdef CONFIG_KERNEL_MODE_NEON
+	if (cpu_has_neon())
+		xor_register(&xor_block_neon);
 #endif
+}
diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h
index bb7428d4ebc6..3cee1eb86371 100644
--- a/arch/arm64/include/asm/xor.h
+++ b/arch/arm64/include/asm/xor.h
@@ -60,14 +60,14 @@ static struct xor_block_template xor_block_arm64 = {
 	.do_4   = xor_neon_4,
 	.do_5	= xor_neon_5
 };
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES           \
-	do {        \
-		xor_speed(&xor_block_8regs);    \
-		xor_speed(&xor_block_32regs);    \
-		if (cpu_has_neon()) { \
-			xor_speed(&xor_block_arm64);\
-		} \
-	} while (0)
+
+#define arch_xor_init arch_xor_init
+static __always_inline void __init arch_xor_init(void)
+{
+	xor_register(&xor_block_8regs);
+	xor_register(&xor_block_32regs);
+	if (cpu_has_neon())
+		xor_register(&xor_block_arm64);
+}
 
 #endif /* ! CONFIG_KERNEL_MODE_NEON */
diff --git a/arch/loongarch/include/asm/xor.h b/arch/loongarch/include/asm/xor.h
index 12467fffee46..d17c0e3b047f 100644
--- a/arch/loongarch/include/asm/xor.h
+++ b/arch/loongarch/include/asm/xor.h
@@ -16,14 +16,6 @@ static struct xor_block_template xor_block_lsx = {
 	.do_4 = xor_lsx_4,
 	.do_5 = xor_lsx_5,
 };
-
-#define XOR_SPEED_LSX()					\
-	do {						\
-		if (cpu_has_lsx)			\
-			xor_speed(&xor_block_lsx);	\
-	} while (0)
-#else /* CONFIG_CPU_HAS_LSX */
-#define XOR_SPEED_LSX()
 #endif /* CONFIG_CPU_HAS_LSX */
 
 #ifdef CONFIG_CPU_HAS_LASX
@@ -34,14 +26,6 @@ static struct xor_block_template xor_block_lasx = {
 	.do_4 = xor_lasx_4,
 	.do_5 = xor_lasx_5,
 };
-
-#define XOR_SPEED_LASX()					\
-	do {							\
-		if (cpu_has_lasx)				\
-			xor_speed(&xor_block_lasx);		\
-	} while (0)
-#else /* CONFIG_CPU_HAS_LASX */
-#define XOR_SPEED_LASX()
 #endif /* CONFIG_CPU_HAS_LASX */
 
 /*
@@ -54,15 +38,21 @@ static struct xor_block_template xor_block_lasx = {
  */
 #include <asm-generic/xor.h>
 
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES				\
-do {							\
-	xor_speed(&xor_block_8regs);			\
-	xor_speed(&xor_block_8regs_p);			\
-	xor_speed(&xor_block_32regs);			\
-	xor_speed(&xor_block_32regs_p);			\
-	XOR_SPEED_LSX();				\
-	XOR_SPEED_LASX();				\
-} while (0)
+#define arch_xor_init arch_xor_init
+static __always_inline void __init arch_xor_init(void)
+{
+	xor_register(&xor_block_8regs);
+	xor_register(&xor_block_8regs_p);
+	xor_register(&xor_block_32regs);
+	xor_register(&xor_block_32regs_p);
+#ifdef CONFIG_CPU_HAS_LSX
+	if (cpu_has_lsx)
+		xor_register(&xor_block_lsx);
+#endif
+#ifdef CONFIG_CPU_HAS_LASX
+	if (cpu_has_lasx)
+		xor_register(&xor_block_lasx);
+#endif
+}
 
 #endif /* _ASM_LOONGARCH_XOR_H */
diff --git a/arch/powerpc/include/asm/xor.h b/arch/powerpc/include/asm/xor.h
index 37d05c11d09c..30224c5279c4 100644
--- a/arch/powerpc/include/asm/xor.h
+++ b/arch/powerpc/include/asm/xor.h
@@ -21,27 +21,22 @@ static struct xor_block_template xor_block_altivec = {
 	.do_4 = xor_altivec_4,
 	.do_5 = xor_altivec_5,
 };
-
-#define XOR_SPEED_ALTIVEC()				\
-	do {						\
-		if (cpu_has_feature(CPU_FTR_ALTIVEC))	\
-			xor_speed(&xor_block_altivec);	\
-	} while (0)
-#else
-#define XOR_SPEED_ALTIVEC()
-#endif
+#endif /* CONFIG_ALTIVEC */
 
 /* Also try the generic routines. */
 #include <asm-generic/xor.h>
 
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES				\
-do {							\
-	xor_speed(&xor_block_8regs);			\
-	xor_speed(&xor_block_8regs_p);			\
-	xor_speed(&xor_block_32regs);			\
-	xor_speed(&xor_block_32regs_p);			\
-	XOR_SPEED_ALTIVEC();				\
-} while (0)
+#define arch_xor_init arch_xor_init
+static __always_inline void __init arch_xor_init(void)
+{
+	xor_register(&xor_block_8regs);
+	xor_register(&xor_block_8regs_p);
+	xor_register(&xor_block_32regs);
+	xor_register(&xor_block_32regs_p);
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		xor_register(&xor_block_altivec);
+#endif
+}
 
 #endif /* _ASM_POWERPC_XOR_H */
diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
index 96011861e46b..ed5f27903efc 100644
--- a/arch/riscv/include/asm/xor.h
+++ b/arch/riscv/include/asm/xor.h
@@ -55,14 +55,15 @@ static struct xor_block_template xor_block_rvv = {
 	.do_4 = xor_vector_4,
 	.do_5 = xor_vector_5
 };
+#endif /* CONFIG_RISCV_ISA_V */
 
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES           \
-	do {        \
-		xor_speed(&xor_block_8regs);    \
-		xor_speed(&xor_block_32regs);    \
-		if (has_vector()) { \
-			xor_speed(&xor_block_rvv);\
-		} \
-	} while (0)
+#define arch_xor_init arch_xor_init
+static __always_inline void __init arch_xor_init(void)
+{
+	xor_register(&xor_block_8regs);
+	xor_register(&xor_block_32regs);
+#ifdef CONFIG_RISCV_ISA_V
+	if (has_vector())
+		xor_register(&xor_block_rvv);
 #endif
+}
diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h
index 857d6759b67f..4e2233f64da9 100644
--- a/arch/s390/include/asm/xor.h
+++ b/arch/s390/include/asm/xor.h
@@ -10,12 +10,10 @@
 
 extern struct xor_block_template xor_block_xc;
 
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES				\
-do {							\
-	xor_speed(&xor_block_xc);			\
-} while (0)
-
-#define XOR_SELECT_TEMPLATE(FASTEST)	(&xor_block_xc)
+#define arch_xor_init arch_xor_init
+static __always_inline void __init arch_xor_init(void)
+{
+	xor_force(&xor_block_xc);
+}
 
 #endif /* _ASM_S390_XOR_H */
diff --git a/arch/sparc/include/asm/xor_32.h b/arch/sparc/include/asm/xor_32.h
index 0351813cf3af..8fbf0c07ec28 100644
--- a/arch/sparc/include/asm/xor_32.h
+++ b/arch/sparc/include/asm/xor_32.h
@@ -259,10 +259,10 @@ static struct xor_block_template xor_block_SPARC = {
 /* For grins, also test the generic routines.  */
 #include <asm-generic/xor.h>
 
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES				\
-	do {						\
-		xor_speed(&xor_block_8regs);		\
-		xor_speed(&xor_block_32regs);		\
-		xor_speed(&xor_block_SPARC);		\
-	} while (0)
+#define arch_xor_init arch_xor_init
+static __always_inline void __init arch_xor_init(void)
+{
+	xor_register(&xor_block_8regs);
+	xor_register(&xor_block_32regs);
+	xor_register(&xor_block_SPARC);
+}
diff --git a/arch/sparc/include/asm/xor_64.h b/arch/sparc/include/asm/xor_64.h
index caaddea8ad79..e0482ecc0a68 100644
--- a/arch/sparc/include/asm/xor_64.h
+++ b/arch/sparc/include/asm/xor_64.h
@@ -60,20 +60,17 @@ static struct xor_block_template xor_block_niagara = {
         .do_5	= xor_niagara_5,
 };
 
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES				\
-	do {						\
-		xor_speed(&xor_block_VIS);		\
-		xor_speed(&xor_block_niagara);		\
-	} while (0)
-
-/* For VIS for everything except Niagara.  */
-#define XOR_SELECT_TEMPLATE(FASTEST) \
-	((tlb_type == hypervisor && \
-	  (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || \
-	   sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || \
-	   sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || \
-	   sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || \
-	   sun4v_chip_type == SUN4V_CHIP_NIAGARA5)) ? \
-	 &xor_block_niagara : \
-	 &xor_block_VIS)
+#define arch_xor_init arch_xor_init
+static __always_inline void __init arch_xor_init(void)
+{
+	/* Force VIS for everything except Niagara.  */
+	if (tlb_type == hypervisor &&
+	    (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 ||
+	     sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+	     sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
+	     sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
+	     sun4v_chip_type == SUN4V_CHIP_NIAGARA5))
+		xor_force(&xor_block_niagara);
+	else
+		xor_force(&xor_block_VIS);
+}
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index 7b0307acc410..33f5620d8d69 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -496,7 +496,4 @@ static struct xor_block_template xor_block_sse_pf64 = {
 # include <asm/xor_64.h>
 #endif
 
-#define XOR_SELECT_TEMPLATE(FASTEST) \
-	AVX_SELECT(FASTEST)
-
 #endif /* _ASM_X86_XOR_H */
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 7a6b9474591e..ee32d08c27bc 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -552,22 +552,24 @@ static struct xor_block_template xor_block_pIII_sse = {
 /* We force the use of the SSE xor block because it can write around L2.
    We may also be able to load into the L1 only depending on how the cpu
    deals with a load to a line that is being prefetched.  */
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES				\
-do {							\
-	AVX_XOR_SPEED;					\
-	if (boot_cpu_has(X86_FEATURE_XMM)) {				\
-		xor_speed(&xor_block_pIII_sse);		\
-		xor_speed(&xor_block_sse_pf64);		\
-	} else if (boot_cpu_has(X86_FEATURE_MMX)) {	\
-		xor_speed(&xor_block_pII_mmx);		\
-		xor_speed(&xor_block_p5_mmx);		\
-	} else {					\
-		xor_speed(&xor_block_8regs);		\
-		xor_speed(&xor_block_8regs_p);		\
-		xor_speed(&xor_block_32regs);		\
-		xor_speed(&xor_block_32regs_p);		\
-	}						\
-} while (0)
+#define arch_xor_init arch_xor_init
+static __always_inline void __init arch_xor_init(void)
+{
+	if (boot_cpu_has(X86_FEATURE_AVX) &&
+	    boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+		xor_force(&xor_block_avx);
+	} else if (boot_cpu_has(X86_FEATURE_XMM)) {
+		xor_register(&xor_block_pIII_sse);
+		xor_register(&xor_block_sse_pf64);
+	} else if (boot_cpu_has(X86_FEATURE_MMX)) {
+		xor_register(&xor_block_pII_mmx);
+		xor_register(&xor_block_p5_mmx);
+	} else {
+		xor_register(&xor_block_8regs);
+		xor_register(&xor_block_8regs_p);
+		xor_register(&xor_block_32regs);
+		xor_register(&xor_block_32regs_p);
+	}
+}
 
 #endif /* _ASM_X86_XOR_32_H */
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index 0307e4ec5044..2d2ceb241866 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -17,12 +17,16 @@ static struct xor_block_template xor_block_sse = {
 /* We force the use of the SSE xor block because it can write around L2.
    We may also be able to load into the L1 only depending on how the cpu
    deals with a load to a line that is being prefetched.  */
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES			\
-do {						\
-	AVX_XOR_SPEED;				\
-	xor_speed(&xor_block_sse_pf64);		\
-	xor_speed(&xor_block_sse);		\
-} while (0)
+#define arch_xor_init arch_xor_init
+static __always_inline void __init arch_xor_init(void)
+{
+	if (boot_cpu_has(X86_FEATURE_AVX) &&
+	    boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+		xor_force(&xor_block_avx);
+	} else {
+		xor_register(&xor_block_sse_pf64);
+		xor_register(&xor_block_sse);
+	}
+}
 
 #endif /* _ASM_X86_XOR_64_H */
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 7f81dd5897f4..c600888436bb 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -166,13 +166,4 @@ static struct xor_block_template xor_block_avx = {
 	.do_5 = xor_avx_5,
 };
 
-#define AVX_XOR_SPEED \
-do { \
-	if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
-		xor_speed(&xor_block_avx); \
-} while (0)
-
-#define AVX_SELECT(FASTEST) \
-	(boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
-
 #endif
diff --git a/include/asm-generic/xor.h b/include/asm-generic/xor.h
index 44509d48fca2..79c0096aa9d9 100644
--- a/include/asm-generic/xor.h
+++ b/include/asm-generic/xor.h
@@ -728,11 +728,3 @@ static struct xor_block_template xor_block_32regs_p __maybe_unused = {
 	.do_4 = xor_32regs_p_4,
 	.do_5 = xor_32regs_p_5,
 };
-
-#define XOR_TRY_TEMPLATES			\
-	do {					\
-		xor_speed(&xor_block_8regs);	\
-		xor_speed(&xor_block_8regs_p);	\
-		xor_speed(&xor_block_32regs);	\
-		xor_speed(&xor_block_32regs_p);	\
-	} while (0)
diff --git a/include/linux/raid/xor_impl.h b/include/linux/raid/xor_impl.h
index a1890cd66812..6ed4c445ab24 100644
--- a/include/linux/raid/xor_impl.h
+++ b/include/linux/raid/xor_impl.h
@@ -2,6 +2,8 @@
 #ifndef _XOR_IMPL_H
 #define _XOR_IMPL_H
 
+#include <linux/init.h>
+
 struct xor_block_template {
 	struct xor_block_template *next;
 	const char *name;
@@ -22,4 +24,7 @@ struct xor_block_template {
 		     const unsigned long * __restrict);
 };
 
+void __init xor_register(struct xor_block_template *tmpl);
+void __init xor_force(struct xor_block_template *tmpl);
+
 #endif /* _XOR_IMPL_H */
diff --git a/lib/raid/xor/xor-core.c b/lib/raid/xor/xor-core.c
index db1824011a12..93608b5fece9 100644
--- a/lib/raid/xor/xor-core.c
+++ b/lib/raid/xor/xor-core.c
@@ -14,10 +14,6 @@
 #include <linux/preempt.h>
 #include <asm/xor.h>
 
-#ifndef XOR_SELECT_TEMPLATE
-#define XOR_SELECT_TEMPLATE(x) (x)
-#endif
-
 /* The xor routines to use.  */
 static struct xor_block_template *active_template;
 
@@ -55,12 +51,33 @@ EXPORT_SYMBOL(xor_blocks);
 static struct xor_block_template *__initdata template_list;
 static bool __initdata xor_forced = false;
 
-static void __init do_xor_register(struct xor_block_template *tmpl)
+/**
+ * xor_register - register a XOR template
+ * @tmpl:	template to register
+ *
+ * Register a XOR implementation with the core.  Registered implementations
+ * will be measured by a trivial benchmark, and the fastest one is chosen
+ * unless an implementation is forced using xor_force().
+ */
+void __init xor_register(struct xor_block_template *tmpl)
 {
 	tmpl->next = template_list;
 	template_list = tmpl;
 }
 
+/**
+ * xor_force - force use of a XOR template
+ * @tmpl:	template to register
+ *
+ * Register a XOR implementation with the core and force using it.  Forcing
+ * an implementation will make the core ignore any template registered using
+ * xor_register(), or any previous implementation forced using xor_force().
+ */
+void __init xor_force(struct xor_block_template *tmpl)
+{
+	active_template = tmpl;
+}
+
 #define BENCH_SIZE	4096
 #define REPS		800U
 
@@ -126,11 +143,19 @@ static int __init calibrate_xor_blocks(void)
 
 static int __init xor_init(void)
 {
+#ifdef arch_xor_init
+	arch_xor_init();
+#else
+	xor_register(&xor_block_8regs);
+	xor_register(&xor_block_8regs_p);
+	xor_register(&xor_block_32regs);
+	xor_register(&xor_block_32regs_p);
+#endif
+
 	/*
 	 * If this arch/cpu has a short-circuited selection, don't loop through
 	 * all the possible functions, just use the best one.
 	 */
-	active_template = XOR_SELECT_TEMPLATE(NULL);
 	if (active_template) {
 		pr_info("xor: automatically using best checksumming function   %-10s\n",
 			active_template->name);
@@ -138,10 +163,6 @@ static int __init xor_init(void)
 		return 0;
 	}
 
-#define xor_speed	do_xor_register
-	XOR_TRY_TEMPLATES;
-#undef xor_speed
-
 #ifdef MODULE
 	return calibrate_xor_blocks();
 #else
-- 
cgit v1.2.3


From e20043b4765cdf7ec8e963d706bb91469cba8cb8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 27 Mar 2026 07:16:53 +0100
Subject: xor: make xor.ko self-contained in lib/raid/

Move the asm/xor.h headers to lib/raid/xor/$(SRCARCH)/xor_arch.h and
include/linux/raid/xor_impl.h to lib/raid/xor/xor_impl.h so that the
xor.ko module implementation is self-contained in lib/raid/.

As this remove the asm-generic mechanism a new kconfig symbol is added to
indicate that a architecture-specific implementations exists, and
xor_arch.h should be included.

Link: https://lkml.kernel.org/r/20260327061704.3707577-22-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Biggers <ebiggers@kernel.org>
Tested-by: Eric Biggers <ebiggers@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Mason <clm@fb.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: David Sterba <dsterba@suse.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason A. Donenfeld <jason@zx2c4.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Li Nan <linan122@huawei.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Magnus Lindholm <linmag7@gmail.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Song Liu <song@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/alpha/include/asm/xor.h           | 24 -------------------
 arch/arm/include/asm/xor.h             | 21 ----------------
 arch/arm64/include/asm/xor.h           | 24 -------------------
 arch/loongarch/include/asm/xor.h       | 40 -------------------------------
 arch/powerpc/include/asm/xor.h         | 29 ----------------------
 arch/riscv/include/asm/xor.h           | 19 ---------------
 arch/s390/include/asm/xor.h            | 19 ---------------
 arch/sparc/include/asm/xor.h           | 44 ----------------------------------
 arch/um/include/asm/xor.h              |  8 -------
 arch/x86/include/asm/xor.h             | 43 ---------------------------------
 include/asm-generic/Kbuild             |  1 -
 include/asm-generic/xor.h              | 11 ---------
 include/linux/raid/xor_impl.h          | 30 -----------------------
 lib/raid/Kconfig                       | 15 ++++++++++++
 lib/raid/xor/Makefile                  |  6 +++++
 lib/raid/xor/alpha/xor.c               |  4 ++--
 lib/raid/xor/alpha/xor_arch.h          | 22 +++++++++++++++++
 lib/raid/xor/arm/xor-neon-glue.c       |  4 ++--
 lib/raid/xor/arm/xor-neon.c            |  2 +-
 lib/raid/xor/arm/xor.c                 |  4 ++--
 lib/raid/xor/arm/xor_arch.h            | 19 +++++++++++++++
 lib/raid/xor/arm64/xor-neon-glue.c     |  4 ++--
 lib/raid/xor/arm64/xor-neon.c          |  4 ++--
 lib/raid/xor/arm64/xor_arch.h          | 21 ++++++++++++++++
 lib/raid/xor/loongarch/xor_arch.h      | 33 +++++++++++++++++++++++++
 lib/raid/xor/loongarch/xor_simd_glue.c |  4 ++--
 lib/raid/xor/powerpc/xor_arch.h        | 22 +++++++++++++++++
 lib/raid/xor/powerpc/xor_vmx_glue.c    |  4 ++--
 lib/raid/xor/riscv/xor-glue.c          |  4 ++--
 lib/raid/xor/riscv/xor_arch.h          | 17 +++++++++++++
 lib/raid/xor/s390/xor.c                |  4 ++--
 lib/raid/xor/s390/xor_arch.h           | 13 ++++++++++
 lib/raid/xor/sparc/xor-sparc32.c       |  4 ++--
 lib/raid/xor/sparc/xor-sparc64-glue.c  |  4 ++--
 lib/raid/xor/sparc/xor_arch.h          | 35 +++++++++++++++++++++++++++
 lib/raid/xor/um/xor_arch.h             |  2 ++
 lib/raid/xor/x86/xor-avx.c             |  4 ++--
 lib/raid/xor/x86/xor-mmx.c             |  4 ++--
 lib/raid/xor/x86/xor-sse.c             |  4 ++--
 lib/raid/xor/x86/xor_arch.h            | 36 ++++++++++++++++++++++++++++
 lib/raid/xor/xor-32regs-prefetch.c     |  3 +--
 lib/raid/xor/xor-32regs.c              |  3 +--
 lib/raid/xor/xor-8regs-prefetch.c      |  3 +--
 lib/raid/xor/xor-8regs.c               |  3 +--
 lib/raid/xor/xor-core.c                | 18 ++++++++------
 lib/raid/xor/xor_impl.h                | 36 ++++++++++++++++++++++++++++
 46 files changed, 321 insertions(+), 357 deletions(-)
 delete mode 100644 arch/alpha/include/asm/xor.h
 delete mode 100644 arch/arm/include/asm/xor.h
 delete mode 100644 arch/arm64/include/asm/xor.h
 delete mode 100644 arch/loongarch/include/asm/xor.h
 delete mode 100644 arch/powerpc/include/asm/xor.h
 delete mode 100644 arch/riscv/include/asm/xor.h
 delete mode 100644 arch/s390/include/asm/xor.h
 delete mode 100644 arch/sparc/include/asm/xor.h
 delete mode 100644 arch/um/include/asm/xor.h
 delete mode 100644 arch/x86/include/asm/xor.h
 delete mode 100644 include/asm-generic/xor.h
 delete mode 100644 include/linux/raid/xor_impl.h
 create mode 100644 lib/raid/xor/alpha/xor_arch.h
 create mode 100644 lib/raid/xor/arm/xor_arch.h
 create mode 100644 lib/raid/xor/arm64/xor_arch.h
 create mode 100644 lib/raid/xor/loongarch/xor_arch.h
 create mode 100644 lib/raid/xor/powerpc/xor_arch.h
 create mode 100644 lib/raid/xor/riscv/xor_arch.h
 create mode 100644 lib/raid/xor/s390/xor_arch.h
 create mode 100644 lib/raid/xor/sparc/xor_arch.h
 create mode 100644 lib/raid/xor/um/xor_arch.h
 create mode 100644 lib/raid/xor/x86/xor_arch.h
 create mode 100644 lib/raid/xor/xor_impl.h

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/xor.h b/arch/alpha/include/asm/xor.h
deleted file mode 100644
index e517be577a09..000000000000
--- a/arch/alpha/include/asm/xor.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-
-#include <asm/special_insns.h>
-#include <asm-generic/xor.h>
-
-extern struct xor_block_template xor_block_alpha;
-extern struct xor_block_template xor_block_alpha_prefetch;
-
-/*
- * Force the use of alpha_prefetch if EV6, as it is significantly faster in the
- * cold cache case.
- */
-#define arch_xor_init arch_xor_init
-static __always_inline void __init arch_xor_init(void)
-{
-	if (implver() == IMPLVER_EV6) {
-		xor_force(&xor_block_alpha_prefetch);
-	} else {
-		xor_register(&xor_block_8regs);
-		xor_register(&xor_block_32regs);
-		xor_register(&xor_block_alpha);
-		xor_register(&xor_block_alpha_prefetch);
-	}
-}
diff --git a/arch/arm/include/asm/xor.h b/arch/arm/include/asm/xor.h
deleted file mode 100644
index 989c55872ef6..000000000000
--- a/arch/arm/include/asm/xor.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2001 Russell King
- */
-#include <asm-generic/xor.h>
-#include <asm/neon.h>
-
-extern struct xor_block_template xor_block_arm4regs;
-extern struct xor_block_template xor_block_neon;
-
-#define arch_xor_init arch_xor_init
-static __always_inline void __init arch_xor_init(void)
-{
-	xor_register(&xor_block_arm4regs);
-	xor_register(&xor_block_8regs);
-	xor_register(&xor_block_32regs);
-#ifdef CONFIG_KERNEL_MODE_NEON
-	if (cpu_has_neon())
-		xor_register(&xor_block_neon);
-#endif
-}
diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h
deleted file mode 100644
index 4782c760bcac..000000000000
--- a/arch/arm64/include/asm/xor.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Authors: Jackie Liu <liuyun01@kylinos.cn>
- * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd.
- */
-
-#include <asm-generic/xor.h>
-#include <asm/simd.h>
-
-extern struct xor_block_template xor_block_neon;
-extern struct xor_block_template xor_block_eor3;
-
-#define arch_xor_init arch_xor_init
-static __always_inline void __init arch_xor_init(void)
-{
-	xor_register(&xor_block_8regs);
-	xor_register(&xor_block_32regs);
-	if (cpu_has_neon()) {
-		if (cpu_have_named_feature(SHA3))
-			xor_register(&xor_block_eor3);
-		else
-			xor_register(&xor_block_neon);
-	}
-}
diff --git a/arch/loongarch/include/asm/xor.h b/arch/loongarch/include/asm/xor.h
deleted file mode 100644
index 7e32f72f8b03..000000000000
--- a/arch/loongarch/include/asm/xor.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
- */
-#ifndef _ASM_LOONGARCH_XOR_H
-#define _ASM_LOONGARCH_XOR_H
-
-#include <asm/cpu-features.h>
-
-/*
- * For grins, also test the generic routines.
- *
- * More importantly: it cannot be ruled out at this point of time, that some
- * future (maybe reduced) models could run the vector algorithms slower than
- * the scalar ones, maybe for errata or micro-op reasons. It may be
- * appropriate to revisit this after one or two more uarch generations.
- */
-#include <asm-generic/xor.h>
-
-extern struct xor_block_template xor_block_lsx;
-extern struct xor_block_template xor_block_lasx;
-
-#define arch_xor_init arch_xor_init
-static __always_inline void __init arch_xor_init(void)
-{
-	xor_register(&xor_block_8regs);
-	xor_register(&xor_block_8regs_p);
-	xor_register(&xor_block_32regs);
-	xor_register(&xor_block_32regs_p);
-#ifdef CONFIG_CPU_HAS_LSX
-	if (cpu_has_lsx)
-		xor_register(&xor_block_lsx);
-#endif
-#ifdef CONFIG_CPU_HAS_LASX
-	if (cpu_has_lasx)
-		xor_register(&xor_block_lasx);
-#endif
-}
-
-#endif /* _ASM_LOONGARCH_XOR_H */
diff --git a/arch/powerpc/include/asm/xor.h b/arch/powerpc/include/asm/xor.h
deleted file mode 100644
index 3293ac87181c..000000000000
--- a/arch/powerpc/include/asm/xor.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *
- * Copyright (C) IBM Corporation, 2012
- *
- * Author: Anton Blanchard <anton@au.ibm.com>
- */
-#ifndef _ASM_POWERPC_XOR_H
-#define _ASM_POWERPC_XOR_H
-
-#include <asm/cpu_has_feature.h>
-#include <asm-generic/xor.h>
-
-extern struct xor_block_template xor_block_altivec;
-
-#define arch_xor_init arch_xor_init
-static __always_inline void __init arch_xor_init(void)
-{
-	xor_register(&xor_block_8regs);
-	xor_register(&xor_block_8regs_p);
-	xor_register(&xor_block_32regs);
-	xor_register(&xor_block_32regs_p);
-#ifdef CONFIG_ALTIVEC
-	if (cpu_has_feature(CPU_FTR_ALTIVEC))
-		xor_register(&xor_block_altivec);
-#endif
-}
-
-#endif /* _ASM_POWERPC_XOR_H */
diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
deleted file mode 100644
index 614d9209d078..000000000000
--- a/arch/riscv/include/asm/xor.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2021 SiFive
- */
-#include <asm/vector.h>
-#include <asm-generic/xor.h>
-
-extern struct xor_block_template xor_block_rvv;
-
-#define arch_xor_init arch_xor_init
-static __always_inline void __init arch_xor_init(void)
-{
-	xor_register(&xor_block_8regs);
-	xor_register(&xor_block_32regs);
-#ifdef CONFIG_RISCV_ISA_V
-	if (has_vector())
-		xor_register(&xor_block_rvv);
-#endif
-}
diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h
deleted file mode 100644
index 4e2233f64da9..000000000000
--- a/arch/s390/include/asm/xor.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Optimited xor routines
- *
- * Copyright IBM Corp. 2016
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-#ifndef _ASM_S390_XOR_H
-#define _ASM_S390_XOR_H
-
-extern struct xor_block_template xor_block_xc;
-
-#define arch_xor_init arch_xor_init
-static __always_inline void __init arch_xor_init(void)
-{
-	xor_force(&xor_block_xc);
-}
-
-#endif /* _ASM_S390_XOR_H */
diff --git a/arch/sparc/include/asm/xor.h b/arch/sparc/include/asm/xor.h
deleted file mode 100644
index f923b009fc24..000000000000
--- a/arch/sparc/include/asm/xor.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
- * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
- */
-#ifndef ___ASM_SPARC_XOR_H
-#define ___ASM_SPARC_XOR_H
-
-#if defined(__sparc__) && defined(__arch64__)
-#include <asm/spitfire.h>
-
-extern struct xor_block_template xor_block_VIS;
-extern struct xor_block_template xor_block_niagara;
-
-#define arch_xor_init arch_xor_init
-static __always_inline void __init arch_xor_init(void)
-{
-	/* Force VIS for everything except Niagara.  */
-	if (tlb_type == hypervisor &&
-	    (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 ||
-	     sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
-	     sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
-	     sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
-	     sun4v_chip_type == SUN4V_CHIP_NIAGARA5))
-		xor_force(&xor_block_niagara);
-	else
-		xor_force(&xor_block_VIS);
-}
-#else /* sparc64 */
-
-/* For grins, also test the generic routines.  */
-#include <asm-generic/xor.h>
-
-extern struct xor_block_template xor_block_SPARC;
-
-#define arch_xor_init arch_xor_init
-static __always_inline void __init arch_xor_init(void)
-{
-	xor_register(&xor_block_8regs);
-	xor_register(&xor_block_32regs);
-	xor_register(&xor_block_SPARC);
-}
-#endif /* !sparc64 */
-#endif /* ___ASM_SPARC_XOR_H */
diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h
deleted file mode 100644
index 99e5c7e1f475..000000000000
--- a/arch/um/include/asm/xor.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_UM_XOR_H
-#define _ASM_UM_XOR_H
-
-#include <asm/cpufeature.h>
-#include <../../x86/include/asm/xor.h>
-
-#endif
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
deleted file mode 100644
index d1aab8275908..000000000000
--- a/arch/x86/include/asm/xor.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _ASM_X86_XOR_H
-#define _ASM_X86_XOR_H
-
-#include <asm/cpufeature.h>
-#include <asm-generic/xor.h>
-
-extern struct xor_block_template xor_block_pII_mmx;
-extern struct xor_block_template xor_block_p5_mmx;
-extern struct xor_block_template xor_block_sse;
-extern struct xor_block_template xor_block_sse_pf64;
-extern struct xor_block_template xor_block_avx;
-
-/*
- * When SSE is available, use it as it can write around L2.  We may also be able
- * to load into the L1 only depending on how the cpu deals with a load to a line
- * that is being prefetched.
- *
- * When AVX2 is available, force using it as it is better by all measures.
- *
- * 32-bit without MMX can fall back to the generic routines.
- */
-#define arch_xor_init arch_xor_init
-static __always_inline void __init arch_xor_init(void)
-{
-	if (boot_cpu_has(X86_FEATURE_AVX) &&
-	    boot_cpu_has(X86_FEATURE_OSXSAVE)) {
-		xor_force(&xor_block_avx);
-	} else if (IS_ENABLED(CONFIG_X86_64) || boot_cpu_has(X86_FEATURE_XMM)) {
-		xor_register(&xor_block_sse);
-		xor_register(&xor_block_sse_pf64);
-	} else if (boot_cpu_has(X86_FEATURE_MMX)) {
-		xor_register(&xor_block_pII_mmx);
-		xor_register(&xor_block_p5_mmx);
-	} else {
-		xor_register(&xor_block_8regs);
-		xor_register(&xor_block_8regs_p);
-		xor_register(&xor_block_32regs);
-		xor_register(&xor_block_32regs_p);
-	}
-}
-
-#endif /* _ASM_X86_XOR_H */
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 9aff61e7b8f2..2c53a1e0b760 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -65,4 +65,3 @@ mandatory-y += vermagic.h
 mandatory-y += vga.h
 mandatory-y += video.h
 mandatory-y += word-at-a-time.h
-mandatory-y += xor.h
diff --git a/include/asm-generic/xor.h b/include/asm-generic/xor.h
deleted file mode 100644
index fc151fdc45ab..000000000000
--- a/include/asm-generic/xor.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * include/asm-generic/xor.h
- *
- * Generic optimized RAID-5 checksumming functions.
- */
-
-extern struct xor_block_template xor_block_8regs;
-extern struct xor_block_template xor_block_32regs;
-extern struct xor_block_template xor_block_8regs_p;
-extern struct xor_block_template xor_block_32regs_p;
diff --git a/include/linux/raid/xor_impl.h b/include/linux/raid/xor_impl.h
deleted file mode 100644
index 6ed4c445ab24..000000000000
--- a/include/linux/raid/xor_impl.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _XOR_IMPL_H
-#define _XOR_IMPL_H
-
-#include <linux/init.h>
-
-struct xor_block_template {
-	struct xor_block_template *next;
-	const char *name;
-	int speed;
-	void (*do_2)(unsigned long, unsigned long * __restrict,
-		     const unsigned long * __restrict);
-	void (*do_3)(unsigned long, unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict);
-	void (*do_4)(unsigned long, unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict);
-	void (*do_5)(unsigned long, unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict);
-};
-
-void __init xor_register(struct xor_block_template *tmpl);
-void __init xor_force(struct xor_block_template *tmpl);
-
-#endif /* _XOR_IMPL_H */
diff --git a/lib/raid/Kconfig b/lib/raid/Kconfig
index 01b73a1c303f..81cb3f9c0a7b 100644
--- a/lib/raid/Kconfig
+++ b/lib/raid/Kconfig
@@ -2,3 +2,18 @@
 
 config XOR_BLOCKS
 	tristate
+
+# selected by architectures that provide an optimized XOR implementation
+config XOR_BLOCKS_ARCH
+	depends on XOR_BLOCKS
+	default y if ALPHA
+	default y if ARM
+	default y if ARM64
+	default y if CPU_HAS_LSX		# loongarch
+	default y if ALTIVEC			# powerpc
+	default y if RISCV_ISA_V
+	default y if SPARC
+	default y if S390
+	default y if X86_32
+	default y if X86_64
+	bool
diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile
index 05aca96041b3..df55823c4d82 100644
--- a/lib/raid/xor/Makefile
+++ b/lib/raid/xor/Makefile
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
+ccflags-y			+= -I $(src)
+
 obj-$(CONFIG_XOR_BLOCKS)	+= xor.o
 
 xor-y				+= xor-core.o
@@ -8,6 +10,10 @@ xor-y				+= xor-32regs.o
 xor-y				+= xor-8regs-prefetch.o
 xor-y				+= xor-32regs-prefetch.o
 
+ifeq ($(CONFIG_XOR_BLOCKS_ARCH),y)
+CFLAGS_xor-core.o		+= -I$(src)/$(SRCARCH)
+endif
+
 xor-$(CONFIG_ALPHA)		+= alpha/xor.o
 xor-$(CONFIG_ARM)		+= arm/xor.o
 ifeq ($(CONFIG_ARM),y)
diff --git a/lib/raid/xor/alpha/xor.c b/lib/raid/xor/alpha/xor.c
index 0964ac420604..90694cc47395 100644
--- a/lib/raid/xor/alpha/xor.c
+++ b/lib/raid/xor/alpha/xor.c
@@ -2,8 +2,8 @@
 /*
  * Optimized XOR parity functions for alpha EV5 and EV6
  */
-#include <linux/raid/xor_impl.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
 
 extern void
 xor_alpha_2(unsigned long bytes, unsigned long * __restrict p1,
diff --git a/lib/raid/xor/alpha/xor_arch.h b/lib/raid/xor/alpha/xor_arch.h
new file mode 100644
index 000000000000..0dcfea578a48
--- /dev/null
+++ b/lib/raid/xor/alpha/xor_arch.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <asm/special_insns.h>
+
+extern struct xor_block_template xor_block_alpha;
+extern struct xor_block_template xor_block_alpha_prefetch;
+
+/*
+ * Force the use of alpha_prefetch if EV6, as it is significantly faster in the
+ * cold cache case.
+ */
+static __always_inline void __init arch_xor_init(void)
+{
+	if (implver() == IMPLVER_EV6) {
+		xor_force(&xor_block_alpha_prefetch);
+	} else {
+		xor_register(&xor_block_8regs);
+		xor_register(&xor_block_32regs);
+		xor_register(&xor_block_alpha);
+		xor_register(&xor_block_alpha_prefetch);
+	}
+}
diff --git a/lib/raid/xor/arm/xor-neon-glue.c b/lib/raid/xor/arm/xor-neon-glue.c
index c7b162b383a2..7afd6294464b 100644
--- a/lib/raid/xor/arm/xor-neon-glue.c
+++ b/lib/raid/xor/arm/xor-neon-glue.c
@@ -2,8 +2,8 @@
 /*
  *  Copyright (C) 2001 Russell King
  */
-#include <linux/raid/xor_impl.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
 
 extern struct xor_block_template const xor_block_neon_inner;
 
diff --git a/lib/raid/xor/arm/xor-neon.c b/lib/raid/xor/arm/xor-neon.c
index c9d4378b0f0e..806a42c5952c 100644
--- a/lib/raid/xor/arm/xor-neon.c
+++ b/lib/raid/xor/arm/xor-neon.c
@@ -3,7 +3,7 @@
  * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
  */
 
-#include <linux/raid/xor_impl.h>
+#include "xor_impl.h"
 
 #ifndef __ARM_NEON__
 #error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon'
diff --git a/lib/raid/xor/arm/xor.c b/lib/raid/xor/arm/xor.c
index 2263341dbbcd..5bd5f048bbe9 100644
--- a/lib/raid/xor/arm/xor.c
+++ b/lib/raid/xor/arm/xor.c
@@ -2,8 +2,8 @@
 /*
  *  Copyright (C) 2001 Russell King
  */
-#include <linux/raid/xor_impl.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
 
 #define __XOR(a1, a2) a1 ^= a2
 
diff --git a/lib/raid/xor/arm/xor_arch.h b/lib/raid/xor/arm/xor_arch.h
new file mode 100644
index 000000000000..5a7eedb48fbb
--- /dev/null
+++ b/lib/raid/xor/arm/xor_arch.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  Copyright (C) 2001 Russell King
+ */
+#include <asm/neon.h>
+
+extern struct xor_block_template xor_block_arm4regs;
+extern struct xor_block_template xor_block_neon;
+
+static __always_inline void __init arch_xor_init(void)
+{
+	xor_register(&xor_block_arm4regs);
+	xor_register(&xor_block_8regs);
+	xor_register(&xor_block_32regs);
+#ifdef CONFIG_KERNEL_MODE_NEON
+	if (cpu_has_neon())
+		xor_register(&xor_block_neon);
+#endif
+}
diff --git a/lib/raid/xor/arm64/xor-neon-glue.c b/lib/raid/xor/arm64/xor-neon-glue.c
index 08c3e3573388..3db0a318cf5b 100644
--- a/lib/raid/xor/arm64/xor-neon-glue.c
+++ b/lib/raid/xor/arm64/xor-neon-glue.c
@@ -4,9 +4,9 @@
  * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd.
  */
 
-#include <linux/raid/xor_impl.h>
 #include <asm/simd.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
 #include "xor-neon.h"
 
 #define XOR_TEMPLATE(_name)						\
diff --git a/lib/raid/xor/arm64/xor-neon.c b/lib/raid/xor/arm64/xor-neon.c
index 61194c292917..61f00c4fee49 100644
--- a/lib/raid/xor/arm64/xor-neon.c
+++ b/lib/raid/xor/arm64/xor-neon.c
@@ -4,10 +4,10 @@
  * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd.
  */
 
-#include <linux/raid/xor_impl.h>
 #include <linux/cache.h>
 #include <asm/neon-intrinsics.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
 #include "xor-neon.h"
 
 void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
diff --git a/lib/raid/xor/arm64/xor_arch.h b/lib/raid/xor/arm64/xor_arch.h
new file mode 100644
index 000000000000..5dbb40319501
--- /dev/null
+++ b/lib/raid/xor/arm64/xor_arch.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Authors: Jackie Liu <liuyun01@kylinos.cn>
+ * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd.
+ */
+#include <asm/simd.h>
+
+extern struct xor_block_template xor_block_neon;
+extern struct xor_block_template xor_block_eor3;
+
+static __always_inline void __init arch_xor_init(void)
+{
+	xor_register(&xor_block_8regs);
+	xor_register(&xor_block_32regs);
+	if (cpu_has_neon()) {
+		if (cpu_have_named_feature(SHA3))
+			xor_register(&xor_block_eor3);
+		else
+			xor_register(&xor_block_neon);
+	}
+}
diff --git a/lib/raid/xor/loongarch/xor_arch.h b/lib/raid/xor/loongarch/xor_arch.h
new file mode 100644
index 000000000000..fe5e8244fd0e
--- /dev/null
+++ b/lib/raid/xor/loongarch/xor_arch.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#include <asm/cpu-features.h>
+
+/*
+ * For grins, also test the generic routines.
+ *
+ * More importantly: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+
+extern struct xor_block_template xor_block_lsx;
+extern struct xor_block_template xor_block_lasx;
+
+static __always_inline void __init arch_xor_init(void)
+{
+	xor_register(&xor_block_8regs);
+	xor_register(&xor_block_8regs_p);
+	xor_register(&xor_block_32regs);
+	xor_register(&xor_block_32regs_p);
+#ifdef CONFIG_CPU_HAS_LSX
+	if (cpu_has_lsx)
+		xor_register(&xor_block_lsx);
+#endif
+#ifdef CONFIG_CPU_HAS_LASX
+	if (cpu_has_lasx)
+		xor_register(&xor_block_lasx);
+#endif
+}
diff --git a/lib/raid/xor/loongarch/xor_simd_glue.c b/lib/raid/xor/loongarch/xor_simd_glue.c
index 11fa3b47ba83..b387aa0213b4 100644
--- a/lib/raid/xor/loongarch/xor_simd_glue.c
+++ b/lib/raid/xor/loongarch/xor_simd_glue.c
@@ -6,9 +6,9 @@
  */
 
 #include <linux/sched.h>
-#include <linux/raid/xor_impl.h>
 #include <asm/fpu.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
 #include "xor_simd.h"
 
 #define MAKE_XOR_GLUE_2(flavor)							\
diff --git a/lib/raid/xor/powerpc/xor_arch.h b/lib/raid/xor/powerpc/xor_arch.h
new file mode 100644
index 000000000000..3b00a4a2fd67
--- /dev/null
+++ b/lib/raid/xor/powerpc/xor_arch.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/cpu_has_feature.h>
+
+extern struct xor_block_template xor_block_altivec;
+
+static __always_inline void __init arch_xor_init(void)
+{
+	xor_register(&xor_block_8regs);
+	xor_register(&xor_block_8regs_p);
+	xor_register(&xor_block_32regs);
+	xor_register(&xor_block_32regs_p);
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		xor_register(&xor_block_altivec);
+#endif
+}
diff --git a/lib/raid/xor/powerpc/xor_vmx_glue.c b/lib/raid/xor/powerpc/xor_vmx_glue.c
index c41e38340700..56e99ddfb64f 100644
--- a/lib/raid/xor/powerpc/xor_vmx_glue.c
+++ b/lib/raid/xor/powerpc/xor_vmx_glue.c
@@ -7,9 +7,9 @@
 
 #include <linux/preempt.h>
 #include <linux/sched.h>
-#include <linux/raid/xor_impl.h>
 #include <asm/switch_to.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
 #include "xor_vmx.h"
 
 static void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
diff --git a/lib/raid/xor/riscv/xor-glue.c b/lib/raid/xor/riscv/xor-glue.c
index 11666a4b6b68..060e5f22ebcc 100644
--- a/lib/raid/xor/riscv/xor-glue.c
+++ b/lib/raid/xor/riscv/xor-glue.c
@@ -3,11 +3,11 @@
  * Copyright (C) 2021 SiFive
  */
 
-#include <linux/raid/xor_impl.h>
 #include <asm/vector.h>
 #include <asm/switch_to.h>
 #include <asm/asm-prototypes.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
 
 static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
 			 const unsigned long *__restrict p2)
diff --git a/lib/raid/xor/riscv/xor_arch.h b/lib/raid/xor/riscv/xor_arch.h
new file mode 100644
index 000000000000..9240857d760b
--- /dev/null
+++ b/lib/raid/xor/riscv/xor_arch.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+#include <asm/vector.h>
+
+extern struct xor_block_template xor_block_rvv;
+
+static __always_inline void __init arch_xor_init(void)
+{
+	xor_register(&xor_block_8regs);
+	xor_register(&xor_block_32regs);
+#ifdef CONFIG_RISCV_ISA_V
+	if (has_vector())
+		xor_register(&xor_block_rvv);
+#endif
+}
diff --git a/lib/raid/xor/s390/xor.c b/lib/raid/xor/s390/xor.c
index acbd268adfc8..c28cb56fec92 100644
--- a/lib/raid/xor/s390/xor.c
+++ b/lib/raid/xor/s390/xor.c
@@ -7,8 +7,8 @@
  */
 
 #include <linux/types.h>
-#include <linux/raid/xor_impl.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
 
 static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
 		     const unsigned long * __restrict p2)
diff --git a/lib/raid/xor/s390/xor_arch.h b/lib/raid/xor/s390/xor_arch.h
new file mode 100644
index 000000000000..4a233ed2b97a
--- /dev/null
+++ b/lib/raid/xor/s390/xor_arch.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Optimited xor routines
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+extern struct xor_block_template xor_block_xc;
+
+static __always_inline void __init arch_xor_init(void)
+{
+	xor_force(&xor_block_xc);
+}
diff --git a/lib/raid/xor/sparc/xor-sparc32.c b/lib/raid/xor/sparc/xor-sparc32.c
index b65a75a6e59d..307c4a84f535 100644
--- a/lib/raid/xor/sparc/xor-sparc32.c
+++ b/lib/raid/xor/sparc/xor-sparc32.c
@@ -5,8 +5,8 @@
  *
  * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz)
  */
-#include <linux/raid/xor_impl.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
 
 static void
 sparc_2(unsigned long bytes, unsigned long * __restrict p1,
diff --git a/lib/raid/xor/sparc/xor-sparc64-glue.c b/lib/raid/xor/sparc/xor-sparc64-glue.c
index 3c67c8c3a0e8..5f90c2460b54 100644
--- a/lib/raid/xor/sparc/xor-sparc64-glue.c
+++ b/lib/raid/xor/sparc/xor-sparc64-glue.c
@@ -8,8 +8,8 @@
  * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
  */
 
-#include <linux/raid/xor_impl.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
 
 void xor_vis_2(unsigned long bytes, unsigned long * __restrict p1,
 	       const unsigned long * __restrict p2);
diff --git a/lib/raid/xor/sparc/xor_arch.h b/lib/raid/xor/sparc/xor_arch.h
new file mode 100644
index 000000000000..af288abe4e91
--- /dev/null
+++ b/lib/raid/xor/sparc/xor_arch.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/spitfire.h>
+
+extern struct xor_block_template xor_block_VIS;
+extern struct xor_block_template xor_block_niagara;
+
+static __always_inline void __init arch_xor_init(void)
+{
+	/* Force VIS for everything except Niagara.  */
+	if (tlb_type == hypervisor &&
+	    (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 ||
+	     sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+	     sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
+	     sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
+	     sun4v_chip_type == SUN4V_CHIP_NIAGARA5))
+		xor_force(&xor_block_niagara);
+	else
+		xor_force(&xor_block_VIS);
+}
+#else /* sparc64 */
+
+extern struct xor_block_template xor_block_SPARC;
+
+static __always_inline void __init arch_xor_init(void)
+{
+	xor_register(&xor_block_8regs);
+	xor_register(&xor_block_32regs);
+	xor_register(&xor_block_SPARC);
+}
+#endif /* !sparc64 */
diff --git a/lib/raid/xor/um/xor_arch.h b/lib/raid/xor/um/xor_arch.h
new file mode 100644
index 000000000000..a33e57a26c5e
--- /dev/null
+++ b/lib/raid/xor/um/xor_arch.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <../x86/xor_arch.h>
diff --git a/lib/raid/xor/x86/xor-avx.c b/lib/raid/xor/x86/xor-avx.c
index b49cb5199e70..d411efa1ff43 100644
--- a/lib/raid/xor/x86/xor-avx.c
+++ b/lib/raid/xor/x86/xor-avx.c
@@ -8,9 +8,9 @@
  * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
  */
 #include <linux/compiler.h>
-#include <linux/raid/xor_impl.h>
 #include <asm/fpu/api.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
 
 #define BLOCK4(i) \
 		BLOCK(32 * i, 0) \
diff --git a/lib/raid/xor/x86/xor-mmx.c b/lib/raid/xor/x86/xor-mmx.c
index cf0fafea33b7..e48c58f92874 100644
--- a/lib/raid/xor/x86/xor-mmx.c
+++ b/lib/raid/xor/x86/xor-mmx.c
@@ -4,9 +4,9 @@
  *
  * Copyright (C) 1998 Ingo Molnar.
  */
-#include <linux/raid/xor_impl.h>
 #include <asm/fpu/api.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
 
 #define LD(x, y)	"       movq   8*("#x")(%1), %%mm"#y"   ;\n"
 #define ST(x, y)	"       movq %%mm"#y",   8*("#x")(%1)   ;\n"
diff --git a/lib/raid/xor/x86/xor-sse.c b/lib/raid/xor/x86/xor-sse.c
index 0e727ced8b00..5993ed688c15 100644
--- a/lib/raid/xor/x86/xor-sse.c
+++ b/lib/raid/xor/x86/xor-sse.c
@@ -12,9 +12,9 @@
  * x86-64 changes / gcc fixes from Andi Kleen.
  * Copyright 2002 Andi Kleen, SuSE Labs.
  */
-#include <linux/raid/xor_impl.h>
 #include <asm/fpu/api.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
 
 #ifdef CONFIG_X86_32
 /* reduce register pressure */
diff --git a/lib/raid/xor/x86/xor_arch.h b/lib/raid/xor/x86/xor_arch.h
new file mode 100644
index 000000000000..99fe85a213c6
--- /dev/null
+++ b/lib/raid/xor/x86/xor_arch.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <asm/cpufeature.h>
+
+extern struct xor_block_template xor_block_pII_mmx;
+extern struct xor_block_template xor_block_p5_mmx;
+extern struct xor_block_template xor_block_sse;
+extern struct xor_block_template xor_block_sse_pf64;
+extern struct xor_block_template xor_block_avx;
+
+/*
+ * When SSE is available, use it as it can write around L2.  We may also be able
+ * to load into the L1 only depending on how the cpu deals with a load to a line
+ * that is being prefetched.
+ *
+ * When AVX2 is available, force using it as it is better by all measures.
+ *
+ * 32-bit without MMX can fall back to the generic routines.
+ */
+static __always_inline void __init arch_xor_init(void)
+{
+	if (boot_cpu_has(X86_FEATURE_AVX) &&
+	    boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+		xor_force(&xor_block_avx);
+	} else if (IS_ENABLED(CONFIG_X86_64) || boot_cpu_has(X86_FEATURE_XMM)) {
+		xor_register(&xor_block_sse);
+		xor_register(&xor_block_sse_pf64);
+	} else if (boot_cpu_has(X86_FEATURE_MMX)) {
+		xor_register(&xor_block_pII_mmx);
+		xor_register(&xor_block_p5_mmx);
+	} else {
+		xor_register(&xor_block_8regs);
+		xor_register(&xor_block_8regs_p);
+		xor_register(&xor_block_32regs);
+		xor_register(&xor_block_32regs_p);
+	}
+}
diff --git a/lib/raid/xor/xor-32regs-prefetch.c b/lib/raid/xor/xor-32regs-prefetch.c
index 8666c287f777..2856a8e50cb8 100644
--- a/lib/raid/xor/xor-32regs-prefetch.c
+++ b/lib/raid/xor/xor-32regs-prefetch.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include <linux/prefetch.h>
-#include <linux/raid/xor_impl.h>
-#include <asm-generic/xor.h>
+#include "xor_impl.h"
 
 static void
 xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
diff --git a/lib/raid/xor/xor-32regs.c b/lib/raid/xor/xor-32regs.c
index 58d4fac43eb4..cc44d64032fa 100644
--- a/lib/raid/xor/xor-32regs.c
+++ b/lib/raid/xor/xor-32regs.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-#include <linux/raid/xor_impl.h>
-#include <asm-generic/xor.h>
+#include "xor_impl.h"
 
 static void
 xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1,
diff --git a/lib/raid/xor/xor-8regs-prefetch.c b/lib/raid/xor/xor-8regs-prefetch.c
index 67061e35a0a6..1d53aec50d27 100644
--- a/lib/raid/xor/xor-8regs-prefetch.c
+++ b/lib/raid/xor/xor-8regs-prefetch.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include <linux/prefetch.h>
-#include <linux/raid/xor_impl.h>
-#include <asm-generic/xor.h>
+#include "xor_impl.h"
 
 static void
 xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
diff --git a/lib/raid/xor/xor-8regs.c b/lib/raid/xor/xor-8regs.c
index 769f796ab2cf..72a44e898c55 100644
--- a/lib/raid/xor/xor-8regs.c
+++ b/lib/raid/xor/xor-8regs.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-#include <linux/raid/xor_impl.h>
-#include <asm-generic/xor.h>
+#include "xor_impl.h"
 
 static void
 xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1,
diff --git a/lib/raid/xor/xor-core.c b/lib/raid/xor/xor-core.c
index 93608b5fece9..de1d2899490a 100644
--- a/lib/raid/xor/xor-core.c
+++ b/lib/raid/xor/xor-core.c
@@ -9,10 +9,9 @@
 #include <linux/module.h>
 #include <linux/gfp.h>
 #include <linux/raid/xor.h>
-#include <linux/raid/xor_impl.h>
 #include <linux/jiffies.h>
 #include <linux/preempt.h>
-#include <asm/xor.h>
+#include "xor_impl.h"
 
 /* The xor routines to use.  */
 static struct xor_block_template *active_template;
@@ -141,16 +140,21 @@ static int __init calibrate_xor_blocks(void)
 	return 0;
 }
 
-static int __init xor_init(void)
-{
-#ifdef arch_xor_init
-	arch_xor_init();
+#ifdef CONFIG_XOR_BLOCKS_ARCH
+#include "xor_arch.h" /* $SRCARCH/xor_arch.h */
 #else
+static void __init arch_xor_init(void)
+{
 	xor_register(&xor_block_8regs);
 	xor_register(&xor_block_8regs_p);
 	xor_register(&xor_block_32regs);
 	xor_register(&xor_block_32regs_p);
-#endif
+}
+#endif /* CONFIG_XOR_BLOCKS_ARCH */
+
+static int __init xor_init(void)
+{
+	arch_xor_init();
 
 	/*
 	 * If this arch/cpu has a short-circuited selection, don't loop through
diff --git a/lib/raid/xor/xor_impl.h b/lib/raid/xor/xor_impl.h
new file mode 100644
index 000000000000..44b6c99e2093
--- /dev/null
+++ b/lib/raid/xor/xor_impl.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _XOR_IMPL_H
+#define _XOR_IMPL_H
+
+#include <linux/init.h>
+
+struct xor_block_template {
+	struct xor_block_template *next;
+	const char *name;
+	int speed;
+	void (*do_2)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict);
+	void (*do_3)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict);
+	void (*do_4)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict);
+	void (*do_5)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict);
+};
+
+/* generic implementations */
+extern struct xor_block_template xor_block_8regs;
+extern struct xor_block_template xor_block_32regs;
+extern struct xor_block_template xor_block_8regs_p;
+extern struct xor_block_template xor_block_32regs_p;
+
+void __init xor_register(struct xor_block_template *tmpl);
+void __init xor_force(struct xor_block_template *tmpl);
+
+#endif /* _XOR_IMPL_H */
-- 
cgit v1.2.3


From e420f0a88b24b80302f57965ceb7387aa3f12488 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 27 Mar 2026 07:16:54 +0100
Subject: xor: add a better public API

xor_blocks is very annoying to use, because it is limited to 4 + 1 sources
/ destinations, has an odd argument order and is completely undocumented.

Lift the code that loops around it from btrfs and async_tx/async_xor into
common code under the name xor_gen and properly document it.

[hch@lst.de: make xor_blocks less annoying to use]
 Link: https://lkml.kernel.org/r/20260327061704.3707577-24-hch@lst.de
Link: https://lkml.kernel.org/r/20260327061704.3707577-23-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Biggers <ebiggers@kernel.org>
Tested-by: Eric Biggers <ebiggers@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Mason <clm@fb.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: David Sterba <dsterba@suse.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason A. Donenfeld <jason@zx2c4.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Li Nan <linan122@huawei.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Magnus Lindholm <linmag7@gmail.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Song Liu <song@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/raid/xor.h |  2 ++
 lib/raid/xor/xor-core.c  | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h
index 02bda8d99534..6d9a39fd85dd 100644
--- a/include/linux/raid/xor.h
+++ b/include/linux/raid/xor.h
@@ -7,4 +7,6 @@
 extern void xor_blocks(unsigned int count, unsigned int bytes,
 	void *dest, void **srcs);
 
+void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes);
+
 #endif /* _XOR_H */
diff --git a/lib/raid/xor/xor-core.c b/lib/raid/xor/xor-core.c
index de1d2899490a..2e46b6b83b0a 100644
--- a/lib/raid/xor/xor-core.c
+++ b/lib/raid/xor/xor-core.c
@@ -46,6 +46,40 @@ xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs)
 }
 EXPORT_SYMBOL(xor_blocks);
 
+/**
+ * xor_gen - generate RAID-style XOR information
+ * @dest:	destination vector
+ * @srcs:	source vectors
+ * @src_cnt:	number of source vectors
+ * @bytes:	length in bytes of each vector
+ *
+ * Performs bit-wise XOR operation into @dest for each of the @src_cnt vectors
+ * in @srcs for a length of @bytes bytes.  @src_cnt must be non-zero, and the
+ * memory pointed to by @dest and each member of @srcs must be at least 64-byte
+ * aligned.  @bytes must be non-zero and a multiple of 512.
+ *
+ * Note: for typical RAID uses, @dest either needs to be zeroed, or filled with
+ * the first disk, which then needs to be removed from @srcs.
+ */
+void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes)
+{
+	unsigned int src_off = 0;
+
+	WARN_ON_ONCE(in_interrupt());
+	WARN_ON_ONCE(bytes == 0);
+	WARN_ON_ONCE(bytes & 511);
+
+	while (src_cnt > 0) {
+		unsigned int this_cnt = min(src_cnt, MAX_XOR_BLOCKS);
+
+		xor_blocks(this_cnt, bytes, dest, srcs + src_off);
+
+		src_cnt -= this_cnt;
+		src_off += this_cnt;
+	}
+}
+EXPORT_SYMBOL(xor_gen);
+
 /* Set of all registered templates.  */
 static struct xor_block_template *__initdata template_list;
 static bool __initdata xor_forced = false;
-- 
cgit v1.2.3


From 80dcf0a7832a5acde0f0701a4dc7b586fc8bcc88 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 27 Mar 2026 07:16:58 +0100
Subject: xor: pass the entire operation to the low-level ops

Currently the high-level xor code chunks up all operations into small
units for only up to 1 + 4 vectors, and passes it to four different
methods.  This means the FPU/vector context is entered and left a lot for
wide stripes, and a lot of indirect expensive indirect calls are
performed.  Switch to passing the entire gen_xor request to the low-level
ops, and provide a macro to dispatch it to the existing helper.

This reduce the number of indirect calls and FPU/vector context switches
by a factor approaching nr_stripes / 4, and also reduces source and binary
code size.

Link: https://lkml.kernel.org/r/20260327061704.3707577-27-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Biggers <ebiggers@kernel.org>
Tested-by: Eric Biggers <ebiggers@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Mason <clm@fb.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: David Sterba <dsterba@suse.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason A. Donenfeld <jason@zx2c4.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Li Nan <linan122@huawei.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Magnus Lindholm <linmag7@gmail.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Song Liu <song@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/raid/xor.h               |  5 ---
 lib/raid/xor/alpha/xor.c               | 19 +++++-----
 lib/raid/xor/arm/xor-neon-glue.c       | 49 +++-----------------------
 lib/raid/xor/arm/xor-neon.c            |  9 ++---
 lib/raid/xor/arm/xor.c                 | 10 +++---
 lib/raid/xor/arm/xor_arch.h            |  3 ++
 lib/raid/xor/arm64/xor-neon-glue.c     | 44 +++--------------------
 lib/raid/xor/arm64/xor-neon.c          | 20 +++++++----
 lib/raid/xor/arm64/xor-neon.h          | 32 +++--------------
 lib/raid/xor/loongarch/xor_simd_glue.c | 62 ++++++--------------------------
 lib/raid/xor/powerpc/xor_vmx.c         | 40 +++++++++++----------
 lib/raid/xor/powerpc/xor_vmx.h         | 16 ++-------
 lib/raid/xor/powerpc/xor_vmx_glue.c    | 49 +++-----------------------
 lib/raid/xor/riscv/xor-glue.c          | 43 ++++-------------------
 lib/raid/xor/s390/xor.c                |  9 +++--
 lib/raid/xor/sparc/xor-sparc32.c       |  9 +++--
 lib/raid/xor/sparc/xor-sparc64-glue.c  | 19 +++++-----
 lib/raid/xor/x86/xor-avx.c             | 29 ++++++---------
 lib/raid/xor/x86/xor-mmx.c             | 64 +++++++++++++---------------------
 lib/raid/xor/x86/xor-sse.c             | 63 ++++++++++++---------------------
 lib/raid/xor/xor-32regs-prefetch.c     | 10 +++---
 lib/raid/xor/xor-32regs.c              |  9 +++--
 lib/raid/xor/xor-8regs-prefetch.c      | 11 +++---
 lib/raid/xor/xor-8regs.c               |  9 +++--
 lib/raid/xor/xor-core.c                | 48 +++----------------------
 lib/raid/xor/xor_impl.h                | 48 +++++++++++++++++--------
 26 files changed, 224 insertions(+), 505 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h
index 6d9a39fd85dd..870558c9d36e 100644
--- a/include/linux/raid/xor.h
+++ b/include/linux/raid/xor.h
@@ -2,11 +2,6 @@
 #ifndef _XOR_H
 #define _XOR_H
 
-#define MAX_XOR_BLOCKS 4
-
-extern void xor_blocks(unsigned int count, unsigned int bytes,
-	void *dest, void **srcs);
-
 void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes);
 
 #endif /* _XOR_H */
diff --git a/lib/raid/xor/alpha/xor.c b/lib/raid/xor/alpha/xor.c
index 90694cc47395..a8f72f2dd3a5 100644
--- a/lib/raid/xor/alpha/xor.c
+++ b/lib/raid/xor/alpha/xor.c
@@ -832,18 +832,17 @@ xor_alpha_prefetch_5:						\n\
 	.end xor_alpha_prefetch_5				\n\
 ");
 
+DO_XOR_BLOCKS(alpha, xor_alpha_2, xor_alpha_3, xor_alpha_4, xor_alpha_5);
+
 struct xor_block_template xor_block_alpha = {
-	.name	= "alpha",
-	.do_2	= xor_alpha_2,
-	.do_3	= xor_alpha_3,
-	.do_4	= xor_alpha_4,
-	.do_5	= xor_alpha_5,
+	.name		= "alpha",
+	.xor_gen	= xor_gen_alpha,
 };
 
+DO_XOR_BLOCKS(alpha_prefetch, xor_alpha_prefetch_2, xor_alpha_prefetch_3,
+		xor_alpha_prefetch_4, xor_alpha_prefetch_5);
+
 struct xor_block_template xor_block_alpha_prefetch = {
-	.name	= "alpha prefetch",
-	.do_2	= xor_alpha_prefetch_2,
-	.do_3	= xor_alpha_prefetch_3,
-	.do_4	= xor_alpha_prefetch_4,
-	.do_5	= xor_alpha_prefetch_5,
+	.name		= "alpha prefetch",
+	.xor_gen	= xor_gen_alpha_prefetch,
 };
diff --git a/lib/raid/xor/arm/xor-neon-glue.c b/lib/raid/xor/arm/xor-neon-glue.c
index 7afd6294464b..cea39e019904 100644
--- a/lib/raid/xor/arm/xor-neon-glue.c
+++ b/lib/raid/xor/arm/xor-neon-glue.c
@@ -5,54 +5,15 @@
 #include "xor_impl.h"
 #include "xor_arch.h"
 
-extern struct xor_block_template const xor_block_neon_inner;
-
-static void
-xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
-	   const unsigned long * __restrict p2)
-{
-	kernel_neon_begin();
-	xor_block_neon_inner.do_2(bytes, p1, p2);
-	kernel_neon_end();
-}
-
-static void
-xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
-	   const unsigned long * __restrict p2,
-	   const unsigned long * __restrict p3)
-{
-	kernel_neon_begin();
-	xor_block_neon_inner.do_3(bytes, p1, p2, p3);
-	kernel_neon_end();
-}
-
-static void
-xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
-	   const unsigned long * __restrict p2,
-	   const unsigned long * __restrict p3,
-	   const unsigned long * __restrict p4)
-{
-	kernel_neon_begin();
-	xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4);
-	kernel_neon_end();
-}
-
-static void
-xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
-	   const unsigned long * __restrict p2,
-	   const unsigned long * __restrict p3,
-	   const unsigned long * __restrict p4,
-	   const unsigned long * __restrict p5)
+static void xor_gen_neon(void *dest, void **srcs, unsigned int src_cnt,
+		unsigned int bytes)
 {
 	kernel_neon_begin();
-	xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5);
+	xor_gen_neon_inner(dest, srcs, src_cnt, bytes);
 	kernel_neon_end();
 }
 
 struct xor_block_template xor_block_neon = {
-	.name	= "neon",
-	.do_2	= xor_neon_2,
-	.do_3	= xor_neon_3,
-	.do_4	= xor_neon_4,
-	.do_5	= xor_neon_5
+	.name		= "neon",
+	.xor_gen	= xor_gen_neon,
 };
diff --git a/lib/raid/xor/arm/xor-neon.c b/lib/raid/xor/arm/xor-neon.c
index 806a42c5952c..23147e3a7904 100644
--- a/lib/raid/xor/arm/xor-neon.c
+++ b/lib/raid/xor/arm/xor-neon.c
@@ -4,6 +4,7 @@
  */
 
 #include "xor_impl.h"
+#include "xor_arch.h"
 
 #ifndef __ARM_NEON__
 #error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon'
@@ -22,10 +23,4 @@
 #define NO_TEMPLATE
 #include "../xor-8regs.c"
 
-struct xor_block_template const xor_block_neon_inner = {
-	.name	= "__inner_neon__",
-	.do_2	= xor_8regs_2,
-	.do_3	= xor_8regs_3,
-	.do_4	= xor_8regs_4,
-	.do_5	= xor_8regs_5,
-};
+__DO_XOR_BLOCKS(neon_inner, xor_8regs_2, xor_8regs_3, xor_8regs_4, xor_8regs_5);
diff --git a/lib/raid/xor/arm/xor.c b/lib/raid/xor/arm/xor.c
index 5bd5f048bbe9..45139b6c55ea 100644
--- a/lib/raid/xor/arm/xor.c
+++ b/lib/raid/xor/arm/xor.c
@@ -127,10 +127,10 @@ xor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1,
 	} while (--lines);
 }
 
+DO_XOR_BLOCKS(arm4regs, xor_arm4regs_2, xor_arm4regs_3, xor_arm4regs_4,
+		xor_arm4regs_5);
+
 struct xor_block_template xor_block_arm4regs = {
-	.name	= "arm4regs",
-	.do_2	= xor_arm4regs_2,
-	.do_3	= xor_arm4regs_3,
-	.do_4	= xor_arm4regs_4,
-	.do_5	= xor_arm4regs_5,
+	.name		= "arm4regs",
+	.xor_gen	= xor_gen_arm4regs,
 };
diff --git a/lib/raid/xor/arm/xor_arch.h b/lib/raid/xor/arm/xor_arch.h
index 5a7eedb48fbb..775ff835df65 100644
--- a/lib/raid/xor/arm/xor_arch.h
+++ b/lib/raid/xor/arm/xor_arch.h
@@ -7,6 +7,9 @@
 extern struct xor_block_template xor_block_arm4regs;
 extern struct xor_block_template xor_block_neon;
 
+void xor_gen_neon_inner(void *dest, void **srcs, unsigned int src_cnt,
+		unsigned int bytes);
+
 static __always_inline void __init arch_xor_init(void)
 {
 	xor_register(&xor_block_arm4regs);
diff --git a/lib/raid/xor/arm64/xor-neon-glue.c b/lib/raid/xor/arm64/xor-neon-glue.c
index 3db0a318cf5b..f0284f86feb4 100644
--- a/lib/raid/xor/arm64/xor-neon-glue.c
+++ b/lib/raid/xor/arm64/xor-neon-glue.c
@@ -10,50 +10,16 @@
 #include "xor-neon.h"
 
 #define XOR_TEMPLATE(_name)						\
-static void								\
-xor_##_name##_2(unsigned long bytes, unsigned long * __restrict p1,	\
-	   const unsigned long * __restrict p2)				\
+static void xor_gen_##_name(void *dest, void **srcs, unsigned int src_cnt, \
+		unsigned int bytes)					\
 {									\
 	scoped_ksimd()							\
-		__xor_##_name##_2(bytes, p1, p2);			\
-}									\
-									\
-static void								\
-xor_##_name##_3(unsigned long bytes, unsigned long * __restrict p1,	\
-	   const unsigned long * __restrict p2,				\
-	   const unsigned long * __restrict p3)				\
-{									\
-	scoped_ksimd()							\
-		__xor_##_name##_3(bytes, p1, p2, p3);			\
-}									\
-									\
-static void								\
-xor_##_name##_4(unsigned long bytes, unsigned long * __restrict p1,	\
-	   const unsigned long * __restrict p2,				\
-	   const unsigned long * __restrict p3,				\
-	   const unsigned long * __restrict p4)				\
-{									\
-	scoped_ksimd()							\
-		__xor_##_name##_4(bytes, p1, p2, p3, p4);		\
-}									\
-									\
-static void								\
-xor_##_name##_5(unsigned long bytes, unsigned long * __restrict p1,	\
-	   const unsigned long * __restrict p2,				\
-	   const unsigned long * __restrict p3,				\
-	   const unsigned long * __restrict p4,				\
-	   const unsigned long * __restrict p5)				\
-{									\
-	scoped_ksimd()							\
-		__xor_##_name##_5(bytes, p1, p2, p3, p4, p5);		\
+		xor_gen_##_name##_inner(dest, srcs, src_cnt, bytes);	\
 }									\
 									\
 struct xor_block_template xor_block_##_name = {				\
-	.name   = __stringify(_name),					\
-	.do_2   = xor_##_name##_2,					\
-	.do_3   = xor_##_name##_3,					\
-	.do_4   = xor_##_name##_4,					\
-	.do_5	= xor_##_name##_5					\
+	.name   	= __stringify(_name),				\
+	.xor_gen	= xor_gen_##_name,				\
 };
 
 XOR_TEMPLATE(neon);
diff --git a/lib/raid/xor/arm64/xor-neon.c b/lib/raid/xor/arm64/xor-neon.c
index 61f00c4fee49..97ef3cb92496 100644
--- a/lib/raid/xor/arm64/xor-neon.c
+++ b/lib/raid/xor/arm64/xor-neon.c
@@ -10,7 +10,7 @@
 #include "xor_arch.h"
 #include "xor-neon.h"
 
-void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+static void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
 		const unsigned long * __restrict p2)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
@@ -37,7 +37,7 @@ void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
 	} while (--lines > 0);
 }
 
-void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+static void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
 		const unsigned long * __restrict p2,
 		const unsigned long * __restrict p3)
 {
@@ -73,7 +73,7 @@ void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
 	} while (--lines > 0);
 }
 
-void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+static void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
 		const unsigned long * __restrict p2,
 		const unsigned long * __restrict p3,
 		const unsigned long * __restrict p4)
@@ -118,7 +118,7 @@ void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
 	} while (--lines > 0);
 }
 
-void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+static void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
 		const unsigned long * __restrict p2,
 		const unsigned long * __restrict p3,
 		const unsigned long * __restrict p4,
@@ -172,6 +172,9 @@ void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
 	} while (--lines > 0);
 }
 
+__DO_XOR_BLOCKS(neon_inner, __xor_neon_2, __xor_neon_3, __xor_neon_4,
+		__xor_neon_5);
+
 static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
 {
 	uint64x2_t res;
@@ -182,7 +185,7 @@ static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
 	return res;
 }
 
-void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1,
+static void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1,
 		const unsigned long * __restrict p2,
 		const unsigned long * __restrict p3)
 {
@@ -216,7 +219,7 @@ void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1,
 	} while (--lines > 0);
 }
 
-void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1,
+static void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1,
 		const unsigned long * __restrict p2,
 		const unsigned long * __restrict p3,
 		const unsigned long * __restrict p4)
@@ -259,7 +262,7 @@ void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1,
 	} while (--lines > 0);
 }
 
-void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1,
+static void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1,
 		const unsigned long * __restrict p2,
 		const unsigned long * __restrict p3,
 		const unsigned long * __restrict p4,
@@ -304,3 +307,6 @@ void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1,
 		dp5 += 8;
 	} while (--lines > 0);
 }
+
+__DO_XOR_BLOCKS(eor3_inner, __xor_neon_2, __xor_eor3_3, __xor_eor3_4,
+		__xor_eor3_5);
diff --git a/lib/raid/xor/arm64/xor-neon.h b/lib/raid/xor/arm64/xor-neon.h
index cec0ac846fea..514699ba8f5f 100644
--- a/lib/raid/xor/arm64/xor-neon.h
+++ b/lib/raid/xor/arm64/xor-neon.h
@@ -1,30 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 
-void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
-		const unsigned long * __restrict p2);
-void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
-		const unsigned long * __restrict p2,
-		const unsigned long * __restrict p3);
-void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
-		const unsigned long * __restrict p2,
-		const unsigned long * __restrict p3,
-		const unsigned long * __restrict p4);
-void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
-		const unsigned long * __restrict p2,
-		const unsigned long * __restrict p3,
-		const unsigned long * __restrict p4,
-		const unsigned long * __restrict p5);
-
-#define __xor_eor3_2	__xor_neon_2
-void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1,
-		const unsigned long * __restrict p2,
-		const unsigned long * __restrict p3);
-void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1,
-		const unsigned long * __restrict p2,
-		const unsigned long * __restrict p3,
-		const unsigned long * __restrict p4);
-void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1,
-		const unsigned long * __restrict p2,
-		const unsigned long * __restrict p3,
-		const unsigned long * __restrict p4,
-		const unsigned long * __restrict p5);
+void xor_gen_neon_inner(void *dest, void **srcs, unsigned int src_cnt,
+		unsigned int bytes);
+void xor_gen_eor3_inner(void *dest, void **srcs, unsigned int src_cnt,
+		unsigned int bytes);
diff --git a/lib/raid/xor/loongarch/xor_simd_glue.c b/lib/raid/xor/loongarch/xor_simd_glue.c
index b387aa0213b4..7f324d924f87 100644
--- a/lib/raid/xor/loongarch/xor_simd_glue.c
+++ b/lib/raid/xor/loongarch/xor_simd_glue.c
@@ -11,63 +11,23 @@
 #include "xor_arch.h"
 #include "xor_simd.h"
 
-#define MAKE_XOR_GLUE_2(flavor)							\
-static void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1,\
-		      const unsigned long * __restrict p2)			\
+#define MAKE_XOR_GLUES(flavor)							\
+DO_XOR_BLOCKS(flavor##_inner, __xor_##flavor##_2, __xor_##flavor##_3,		\
+		__xor_##flavor##_4, __xor_##flavor##_5);			\
+										\
+static void xor_gen_##flavor(void *dest, void **srcs, unsigned int src_cnt,	\
+		unsigned int bytes)						\
 {										\
 	kernel_fpu_begin();							\
-	__xor_##flavor##_2(bytes, p1, p2);					\
+	xor_gen_##flavor##_inner(dest, srcs, src_cnt, bytes);			\
 	kernel_fpu_end();							\
 }										\
-
-#define MAKE_XOR_GLUE_3(flavor)							\
-static void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1,\
-		      const unsigned long * __restrict p2,			\
-		      const unsigned long * __restrict p3)			\
-{										\
-	kernel_fpu_begin();							\
-	__xor_##flavor##_3(bytes, p1, p2, p3);					\
-	kernel_fpu_end();							\
-}										\
-
-#define MAKE_XOR_GLUE_4(flavor)							\
-static void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1,\
-		      const unsigned long * __restrict p2,			\
-		      const unsigned long * __restrict p3,			\
-		      const unsigned long * __restrict p4)			\
-{										\
-	kernel_fpu_begin();							\
-	__xor_##flavor##_4(bytes, p1, p2, p3, p4);				\
-	kernel_fpu_end();							\
-}										\
-
-#define MAKE_XOR_GLUE_5(flavor)							\
-static void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1,\
-		      const unsigned long * __restrict p2,			\
-		      const unsigned long * __restrict p3,			\
-		      const unsigned long * __restrict p4,			\
-		      const unsigned long * __restrict p5)			\
-{										\
-	kernel_fpu_begin();							\
-	__xor_##flavor##_5(bytes, p1, p2, p3, p4, p5);				\
-	kernel_fpu_end();							\
-}										\
-
-#define MAKE_XOR_GLUES(flavor)				\
-	MAKE_XOR_GLUE_2(flavor);			\
-	MAKE_XOR_GLUE_3(flavor);			\
-	MAKE_XOR_GLUE_4(flavor);			\
-	MAKE_XOR_GLUE_5(flavor);			\
-							\
-struct xor_block_template xor_block_##flavor = {	\
-	.name = __stringify(flavor),			\
-	.do_2 = xor_##flavor##_2,			\
-	.do_3 = xor_##flavor##_3,			\
-	.do_4 = xor_##flavor##_4,			\
-	.do_5 = xor_##flavor##_5,			\
+										\
+struct xor_block_template xor_block_##flavor = {				\
+	.name		= __stringify(flavor),					\
+	.xor_gen	= xor_gen_##flavor					\
 }
 
-
 #ifdef CONFIG_CPU_HAS_LSX
 MAKE_XOR_GLUES(lsx);
 #endif /* CONFIG_CPU_HAS_LSX */
diff --git a/lib/raid/xor/powerpc/xor_vmx.c b/lib/raid/xor/powerpc/xor_vmx.c
index aab49d056d18..09bed98c1bc7 100644
--- a/lib/raid/xor/powerpc/xor_vmx.c
+++ b/lib/raid/xor/powerpc/xor_vmx.c
@@ -10,6 +10,7 @@
  * Sparse (as at v0.5.0) gets very, very confused by this file.
  * Make it a bit simpler for it.
  */
+#include "xor_impl.h"
 #if !defined(__CHECKER__)
 #include <altivec.h>
 #else
@@ -49,9 +50,9 @@ typedef vector signed char unative_t;
 		V1##_3 = vec_xor(V1##_3, V2##_3);	\
 	} while (0)
 
-void __xor_altivec_2(unsigned long bytes,
-		     unsigned long * __restrict v1_in,
-		     const unsigned long * __restrict v2_in)
+static void __xor_altivec_2(unsigned long bytes,
+		unsigned long * __restrict v1_in,
+		const unsigned long * __restrict v2_in)
 {
 	DEFINE(v1);
 	DEFINE(v2);
@@ -68,10 +69,10 @@ void __xor_altivec_2(unsigned long bytes,
 	} while (--lines > 0);
 }
 
-void __xor_altivec_3(unsigned long bytes,
-		     unsigned long * __restrict v1_in,
-		     const unsigned long * __restrict v2_in,
-		     const unsigned long * __restrict v3_in)
+static void __xor_altivec_3(unsigned long bytes,
+		unsigned long * __restrict v1_in,
+		const unsigned long * __restrict v2_in,
+		const unsigned long * __restrict v3_in)
 {
 	DEFINE(v1);
 	DEFINE(v2);
@@ -92,11 +93,11 @@ void __xor_altivec_3(unsigned long bytes,
 	} while (--lines > 0);
 }
 
-void __xor_altivec_4(unsigned long bytes,
-		     unsigned long * __restrict v1_in,
-		     const unsigned long * __restrict v2_in,
-		     const unsigned long * __restrict v3_in,
-		     const unsigned long * __restrict v4_in)
+static void __xor_altivec_4(unsigned long bytes,
+		unsigned long * __restrict v1_in,
+		const unsigned long * __restrict v2_in,
+		const unsigned long * __restrict v3_in,
+		const unsigned long * __restrict v4_in)
 {
 	DEFINE(v1);
 	DEFINE(v2);
@@ -121,12 +122,12 @@ void __xor_altivec_4(unsigned long bytes,
 	} while (--lines > 0);
 }
 
-void __xor_altivec_5(unsigned long bytes,
-		     unsigned long * __restrict v1_in,
-		     const unsigned long * __restrict v2_in,
-		     const unsigned long * __restrict v3_in,
-		     const unsigned long * __restrict v4_in,
-		     const unsigned long * __restrict v5_in)
+static void __xor_altivec_5(unsigned long bytes,
+		unsigned long * __restrict v1_in,
+		const unsigned long * __restrict v2_in,
+		const unsigned long * __restrict v3_in,
+		const unsigned long * __restrict v4_in,
+		const unsigned long * __restrict v5_in)
 {
 	DEFINE(v1);
 	DEFINE(v2);
@@ -154,3 +155,6 @@ void __xor_altivec_5(unsigned long bytes,
 		v5 += 4;
 	} while (--lines > 0);
 }
+
+__DO_XOR_BLOCKS(altivec_inner, __xor_altivec_2, __xor_altivec_3,
+		__xor_altivec_4, __xor_altivec_5);
diff --git a/lib/raid/xor/powerpc/xor_vmx.h b/lib/raid/xor/powerpc/xor_vmx.h
index 573c41d90dac..1d26c1133a86 100644
--- a/lib/raid/xor/powerpc/xor_vmx.h
+++ b/lib/raid/xor/powerpc/xor_vmx.h
@@ -6,17 +6,5 @@
  * outside of the enable/disable altivec block.
  */
 
-void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
-		     const unsigned long * __restrict p2);
-void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
-		     const unsigned long * __restrict p2,
-		     const unsigned long * __restrict p3);
-void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
-		     const unsigned long * __restrict p2,
-		     const unsigned long * __restrict p3,
-		     const unsigned long * __restrict p4);
-void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
-		     const unsigned long * __restrict p2,
-		     const unsigned long * __restrict p3,
-		     const unsigned long * __restrict p4,
-		     const unsigned long * __restrict p5);
+void xor_gen_altivec_inner(void *dest, void **srcs, unsigned int src_cnt,
+		unsigned int bytes);
diff --git a/lib/raid/xor/powerpc/xor_vmx_glue.c b/lib/raid/xor/powerpc/xor_vmx_glue.c
index 56e99ddfb64f..dbfbb5cadc36 100644
--- a/lib/raid/xor/powerpc/xor_vmx_glue.c
+++ b/lib/raid/xor/powerpc/xor_vmx_glue.c
@@ -12,56 +12,17 @@
 #include "xor_arch.h"
 #include "xor_vmx.h"
 
-static void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
-		const unsigned long * __restrict p2)
+static void xor_gen_altivec(void *dest, void **srcs, unsigned int src_cnt,
+		unsigned int bytes)
 {
 	preempt_disable();
 	enable_kernel_altivec();
-	__xor_altivec_2(bytes, p1, p2);
-	disable_kernel_altivec();
-	preempt_enable();
-}
-
-static void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
-		const unsigned long * __restrict p2,
-		const unsigned long * __restrict p3)
-{
-	preempt_disable();
-	enable_kernel_altivec();
-	__xor_altivec_3(bytes, p1, p2, p3);
-	disable_kernel_altivec();
-	preempt_enable();
-}
-
-static void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
-		const unsigned long * __restrict p2,
-		const unsigned long * __restrict p3,
-		const unsigned long * __restrict p4)
-{
-	preempt_disable();
-	enable_kernel_altivec();
-	__xor_altivec_4(bytes, p1, p2, p3, p4);
-	disable_kernel_altivec();
-	preempt_enable();
-}
-
-static void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
-		const unsigned long * __restrict p2,
-		const unsigned long * __restrict p3,
-		const unsigned long * __restrict p4,
-		const unsigned long * __restrict p5)
-{
-	preempt_disable();
-	enable_kernel_altivec();
-	__xor_altivec_5(bytes, p1, p2, p3, p4, p5);
+	xor_gen_altivec_inner(dest, srcs, src_cnt, bytes);
 	disable_kernel_altivec();
 	preempt_enable();
 }
 
 struct xor_block_template xor_block_altivec = {
-	.name = "altivec",
-	.do_2 = xor_altivec_2,
-	.do_3 = xor_altivec_3,
-	.do_4 = xor_altivec_4,
-	.do_5 = xor_altivec_5,
+	.name		= "altivec",
+	.xor_gen	= xor_gen_altivec,
 };
diff --git a/lib/raid/xor/riscv/xor-glue.c b/lib/raid/xor/riscv/xor-glue.c
index 060e5f22ebcc..2e4c1b05d998 100644
--- a/lib/raid/xor/riscv/xor-glue.c
+++ b/lib/raid/xor/riscv/xor-glue.c
@@ -9,48 +9,17 @@
 #include "xor_impl.h"
 #include "xor_arch.h"
 
-static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
-			 const unsigned long *__restrict p2)
-{
-	kernel_vector_begin();
-	xor_regs_2_(bytes, p1, p2);
-	kernel_vector_end();
-}
-
-static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1,
-			 const unsigned long *__restrict p2,
-			 const unsigned long *__restrict p3)
-{
-	kernel_vector_begin();
-	xor_regs_3_(bytes, p1, p2, p3);
-	kernel_vector_end();
-}
-
-static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1,
-			 const unsigned long *__restrict p2,
-			 const unsigned long *__restrict p3,
-			 const unsigned long *__restrict p4)
-{
-	kernel_vector_begin();
-	xor_regs_4_(bytes, p1, p2, p3, p4);
-	kernel_vector_end();
-}
+DO_XOR_BLOCKS(vector_inner, xor_regs_2_, xor_regs_3_, xor_regs_4_, xor_regs_5_);
 
-static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1,
-			 const unsigned long *__restrict p2,
-			 const unsigned long *__restrict p3,
-			 const unsigned long *__restrict p4,
-			 const unsigned long *__restrict p5)
+static void xor_gen_vector(void *dest, void **srcs, unsigned int src_cnt,
+		unsigned int bytes)
 {
 	kernel_vector_begin();
-	xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+	xor_gen_vector_inner(dest, srcs, src_cnt, bytes);
 	kernel_vector_end();
 }
 
 struct xor_block_template xor_block_rvv = {
-	.name = "rvv",
-	.do_2 = xor_vector_2,
-	.do_3 = xor_vector_3,
-	.do_4 = xor_vector_4,
-	.do_5 = xor_vector_5
+	.name		= "rvv",
+	.xor_gen	= xor_gen_vector,
 };
diff --git a/lib/raid/xor/s390/xor.c b/lib/raid/xor/s390/xor.c
index c28cb56fec92..0c478678a129 100644
--- a/lib/raid/xor/s390/xor.c
+++ b/lib/raid/xor/s390/xor.c
@@ -125,10 +125,9 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
 		: : "0", "cc", "memory");
 }
 
+DO_XOR_BLOCKS(xc, xor_xc_2, xor_xc_3, xor_xc_4, xor_xc_5);
+
 struct xor_block_template xor_block_xc = {
-	.name = "xc",
-	.do_2 = xor_xc_2,
-	.do_3 = xor_xc_3,
-	.do_4 = xor_xc_4,
-	.do_5 = xor_xc_5,
+	.name		= "xc",
+	.xor_gen	= xor_gen_xc,
 };
diff --git a/lib/raid/xor/sparc/xor-sparc32.c b/lib/raid/xor/sparc/xor-sparc32.c
index 307c4a84f535..fb37631e90e6 100644
--- a/lib/raid/xor/sparc/xor-sparc32.c
+++ b/lib/raid/xor/sparc/xor-sparc32.c
@@ -244,10 +244,9 @@ sparc_5(unsigned long bytes, unsigned long * __restrict p1,
 	} while (--lines > 0);
 }
 
+DO_XOR_BLOCKS(sparc32, sparc_2, sparc_3, sparc_4, sparc_5);
+
 struct xor_block_template xor_block_SPARC = {
-	.name	= "SPARC",
-	.do_2	= sparc_2,
-	.do_3	= sparc_3,
-	.do_4	= sparc_4,
-	.do_5	= sparc_5,
+	.name		= "SPARC",
+	.xor_gen	= xor_gen_sparc32,
 };
diff --git a/lib/raid/xor/sparc/xor-sparc64-glue.c b/lib/raid/xor/sparc/xor-sparc64-glue.c
index 5f90c2460b54..a8a686e0d258 100644
--- a/lib/raid/xor/sparc/xor-sparc64-glue.c
+++ b/lib/raid/xor/sparc/xor-sparc64-glue.c
@@ -28,12 +28,11 @@ void xor_vis_5(unsigned long bytes, unsigned long * __restrict p1,
 
 /* XXX Ugh, write cheetah versions... -DaveM */
 
+DO_XOR_BLOCKS(vis, xor_vis_2, xor_vis_3, xor_vis_4, xor_vis_5);
+
 struct xor_block_template xor_block_VIS = {
-        .name	= "VIS",
-        .do_2	= xor_vis_2,
-        .do_3	= xor_vis_3,
-        .do_4	= xor_vis_4,
-        .do_5	= xor_vis_5,
+        .name		= "VIS",
+	.xor_gen	= xor_gen_vis,
 };
 
 void xor_niagara_2(unsigned long bytes, unsigned long * __restrict p1,
@@ -51,10 +50,10 @@ void xor_niagara_5(unsigned long bytes, unsigned long * __restrict p1,
 		   const unsigned long * __restrict p4,
 		   const unsigned long * __restrict p5);
 
+DO_XOR_BLOCKS(niagara, xor_niagara_2, xor_niagara_3, xor_niagara_4,
+		xor_niagara_5);
+
 struct xor_block_template xor_block_niagara = {
-        .name	= "Niagara",
-        .do_2	= xor_niagara_2,
-        .do_3	= xor_niagara_3,
-        .do_4	= xor_niagara_4,
-        .do_5	= xor_niagara_5,
+        .name		= "Niagara",
+	.xor_gen	= xor_gen_niagara,
 };
diff --git a/lib/raid/xor/x86/xor-avx.c b/lib/raid/xor/x86/xor-avx.c
index d411efa1ff43..f7777d7aa269 100644
--- a/lib/raid/xor/x86/xor-avx.c
+++ b/lib/raid/xor/x86/xor-avx.c
@@ -29,8 +29,6 @@ static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0,
 {
 	unsigned long lines = bytes >> 9;
 
-	kernel_fpu_begin();
-
 	while (lines--) {
 #undef BLOCK
 #define BLOCK(i, reg) \
@@ -47,8 +45,6 @@ do { \
 		p0 = (unsigned long *)((uintptr_t)p0 + 512);
 		p1 = (unsigned long *)((uintptr_t)p1 + 512);
 	}
-
-	kernel_fpu_end();
 }
 
 static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0,
@@ -57,8 +53,6 @@ static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0,
 {
 	unsigned long lines = bytes >> 9;
 
-	kernel_fpu_begin();
-
 	while (lines--) {
 #undef BLOCK
 #define BLOCK(i, reg) \
@@ -78,8 +72,6 @@ do { \
 		p1 = (unsigned long *)((uintptr_t)p1 + 512);
 		p2 = (unsigned long *)((uintptr_t)p2 + 512);
 	}
-
-	kernel_fpu_end();
 }
 
 static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0,
@@ -89,8 +81,6 @@ static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0,
 {
 	unsigned long lines = bytes >> 9;
 
-	kernel_fpu_begin();
-
 	while (lines--) {
 #undef BLOCK
 #define BLOCK(i, reg) \
@@ -113,8 +103,6 @@ do { \
 		p2 = (unsigned long *)((uintptr_t)p2 + 512);
 		p3 = (unsigned long *)((uintptr_t)p3 + 512);
 	}
-
-	kernel_fpu_end();
 }
 
 static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0,
@@ -125,8 +113,6 @@ static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0,
 {
 	unsigned long lines = bytes >> 9;
 
-	kernel_fpu_begin();
-
 	while (lines--) {
 #undef BLOCK
 #define BLOCK(i, reg) \
@@ -152,14 +138,19 @@ do { \
 		p3 = (unsigned long *)((uintptr_t)p3 + 512);
 		p4 = (unsigned long *)((uintptr_t)p4 + 512);
 	}
+}
+
+DO_XOR_BLOCKS(avx_inner, xor_avx_2, xor_avx_3, xor_avx_4, xor_avx_5);
 
+static void xor_gen_avx(void *dest, void **srcs, unsigned int src_cnt,
+			unsigned int bytes)
+{
+	kernel_fpu_begin();
+	xor_gen_avx_inner(dest, srcs, src_cnt, bytes);
 	kernel_fpu_end();
 }
 
 struct xor_block_template xor_block_avx = {
-	.name = "avx",
-	.do_2 = xor_avx_2,
-	.do_3 = xor_avx_3,
-	.do_4 = xor_avx_4,
-	.do_5 = xor_avx_5,
+	.name		= "avx",
+	.xor_gen	= xor_gen_avx,
 };
diff --git a/lib/raid/xor/x86/xor-mmx.c b/lib/raid/xor/x86/xor-mmx.c
index e48c58f92874..63a8b0444fce 100644
--- a/lib/raid/xor/x86/xor-mmx.c
+++ b/lib/raid/xor/x86/xor-mmx.c
@@ -21,8 +21,6 @@ xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 7;
 
-	kernel_fpu_begin();
-
 	asm volatile(
 #undef BLOCK
 #define BLOCK(i)				\
@@ -55,8 +53,6 @@ xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
 	  "+r" (p1), "+r" (p2)
 	:
 	: "memory");
-
-	kernel_fpu_end();
 }
 
 static void
@@ -66,8 +62,6 @@ xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 7;
 
-	kernel_fpu_begin();
-
 	asm volatile(
 #undef BLOCK
 #define BLOCK(i)				\
@@ -105,8 +99,6 @@ xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
 	  "+r" (p1), "+r" (p2), "+r" (p3)
 	:
 	: "memory");
-
-	kernel_fpu_end();
 }
 
 static void
@@ -117,8 +109,6 @@ xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 7;
 
-	kernel_fpu_begin();
-
 	asm volatile(
 #undef BLOCK
 #define BLOCK(i)				\
@@ -161,8 +151,6 @@ xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
 	  "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
 	:
 	: "memory");
-
-	kernel_fpu_end();
 }
 
 
@@ -175,8 +163,6 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 7;
 
-	kernel_fpu_begin();
-
 	/* Make sure GCC forgets anything it knows about p4 or p5,
 	   such that it won't pass to the asm volatile below a
 	   register that is shared with any other variable.  That's
@@ -237,8 +223,6 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
 	   Clobber them just to be sure nobody does something stupid
 	   like assuming they have some legal value.  */
 	asm("" : "=r" (p4), "=r" (p5));
-
-	kernel_fpu_end();
 }
 
 #undef LD
@@ -255,8 +239,6 @@ xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 6;
 
-	kernel_fpu_begin();
-
 	asm volatile(
 	" .align 32	             ;\n"
 	" 1:                         ;\n"
@@ -293,8 +275,6 @@ xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
 	  "+r" (p1), "+r" (p2)
 	:
 	: "memory");
-
-	kernel_fpu_end();
 }
 
 static void
@@ -304,8 +284,6 @@ xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 6;
 
-	kernel_fpu_begin();
-
 	asm volatile(
 	" .align 32,0x90             ;\n"
 	" 1:                         ;\n"
@@ -351,8 +329,6 @@ xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
 	  "+r" (p1), "+r" (p2), "+r" (p3)
 	:
 	: "memory" );
-
-	kernel_fpu_end();
 }
 
 static void
@@ -363,8 +339,6 @@ xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 6;
 
-	kernel_fpu_begin();
-
 	asm volatile(
 	" .align 32,0x90             ;\n"
 	" 1:                         ;\n"
@@ -419,8 +393,6 @@ xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
 	  "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
 	:
 	: "memory");
-
-	kernel_fpu_end();
 }
 
 static void
@@ -432,8 +404,6 @@ xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 6;
 
-	kernel_fpu_begin();
-
 	/* Make sure GCC forgets anything it knows about p4 or p5,
 	   such that it won't pass to the asm volatile below a
 	   register that is shared with any other variable.  That's
@@ -510,22 +480,36 @@ xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
 	   Clobber them just to be sure nobody does something stupid
 	   like assuming they have some legal value.  */
 	asm("" : "=r" (p4), "=r" (p5));
+}
+
+DO_XOR_BLOCKS(pII_mmx_inner, xor_pII_mmx_2, xor_pII_mmx_3, xor_pII_mmx_4,
+		xor_pII_mmx_5);
 
+static void xor_gen_pII_mmx(void *dest, void **srcs, unsigned int src_cnt,
+		unsigned int bytes)
+{
+	kernel_fpu_begin();
+	xor_gen_pII_mmx_inner(dest, srcs, src_cnt, bytes);
 	kernel_fpu_end();
 }
 
 struct xor_block_template xor_block_pII_mmx = {
-	.name = "pII_mmx",
-	.do_2 = xor_pII_mmx_2,
-	.do_3 = xor_pII_mmx_3,
-	.do_4 = xor_pII_mmx_4,
-	.do_5 = xor_pII_mmx_5,
+	.name		= "pII_mmx",
+	.xor_gen	= xor_gen_pII_mmx,
 };
 
+DO_XOR_BLOCKS(p5_mmx_inner, xor_p5_mmx_2, xor_p5_mmx_3, xor_p5_mmx_4,
+		xor_p5_mmx_5);
+
+static void xor_gen_p5_mmx(void *dest, void **srcs, unsigned int src_cnt,
+		unsigned int bytes)
+{
+	kernel_fpu_begin();
+	xor_gen_p5_mmx_inner(dest, srcs, src_cnt, bytes);
+	kernel_fpu_end();
+}
+
 struct xor_block_template xor_block_p5_mmx = {
-	.name = "p5_mmx",
-	.do_2 = xor_p5_mmx_2,
-	.do_3 = xor_p5_mmx_3,
-	.do_4 = xor_p5_mmx_4,
-	.do_5 = xor_p5_mmx_5,
+	.name		= "p5_mmx",
+	.xor_gen	= xor_gen_p5_mmx,
 };
diff --git a/lib/raid/xor/x86/xor-sse.c b/lib/raid/xor/x86/xor-sse.c
index 5993ed688c15..c6626ecae6ba 100644
--- a/lib/raid/xor/x86/xor-sse.c
+++ b/lib/raid/xor/x86/xor-sse.c
@@ -51,8 +51,6 @@ xor_sse_2(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 8;
 
-	kernel_fpu_begin();
-
 	asm volatile(
 #undef BLOCK
 #define BLOCK(i)					\
@@ -93,8 +91,6 @@ xor_sse_2(unsigned long bytes, unsigned long * __restrict p1,
 	  [p1] "+r" (p1), [p2] "+r" (p2)
 	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
 	: "memory");
-
-	kernel_fpu_end();
 }
 
 static void
@@ -103,8 +99,6 @@ xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 8;
 
-	kernel_fpu_begin();
-
 	asm volatile(
 #undef BLOCK
 #define BLOCK(i)			\
@@ -128,8 +122,6 @@ xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1,
 	  [p1] "+r" (p1), [p2] "+r" (p2)
 	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
 	: "memory");
-
-	kernel_fpu_end();
 }
 
 static void
@@ -139,8 +131,6 @@ xor_sse_3(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 8;
 
-	kernel_fpu_begin();
-
 	asm volatile(
 #undef BLOCK
 #define BLOCK(i) \
@@ -188,8 +178,6 @@ xor_sse_3(unsigned long bytes, unsigned long * __restrict p1,
 	  [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
 	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
 	: "memory");
-
-	kernel_fpu_end();
 }
 
 static void
@@ -199,8 +187,6 @@ xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 8;
 
-	kernel_fpu_begin();
-
 	asm volatile(
 #undef BLOCK
 #define BLOCK(i)			\
@@ -226,8 +212,6 @@ xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1,
 	  [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
 	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
 	: "memory");
-
-	kernel_fpu_end();
 }
 
 static void
@@ -238,8 +222,6 @@ xor_sse_4(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 8;
 
-	kernel_fpu_begin();
-
 	asm volatile(
 #undef BLOCK
 #define BLOCK(i) \
@@ -294,8 +276,6 @@ xor_sse_4(unsigned long bytes, unsigned long * __restrict p1,
 	  [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
 	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
 	: "memory");
-
-	kernel_fpu_end();
 }
 
 static void
@@ -306,8 +286,6 @@ xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 8;
 
-	kernel_fpu_begin();
-
 	asm volatile(
 #undef BLOCK
 #define BLOCK(i)			\
@@ -335,8 +313,6 @@ xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1,
 	  [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
 	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
 	: "memory");
-
-	kernel_fpu_end();
 }
 
 static void
@@ -348,8 +324,6 @@ xor_sse_5(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 8;
 
-	kernel_fpu_begin();
-
 	asm volatile(
 #undef BLOCK
 #define BLOCK(i) \
@@ -411,8 +385,6 @@ xor_sse_5(unsigned long bytes, unsigned long * __restrict p1,
 	  [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
 	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
 	: "memory");
-
-	kernel_fpu_end();
 }
 
 static void
@@ -424,8 +396,6 @@ xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1,
 {
 	unsigned long lines = bytes >> 8;
 
-	kernel_fpu_begin();
-
 	asm volatile(
 #undef BLOCK
 #define BLOCK(i)			\
@@ -455,22 +425,35 @@ xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1,
 	  [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
 	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
 	: "memory");
+}
+
+DO_XOR_BLOCKS(sse_inner, xor_sse_2, xor_sse_3, xor_sse_4, xor_sse_5);
 
+static void xor_gen_sse(void *dest, void **srcs, unsigned int src_cnt,
+			unsigned int bytes)
+{
+	kernel_fpu_begin();
+	xor_gen_sse_inner(dest, srcs, src_cnt, bytes);
 	kernel_fpu_end();
 }
 
 struct xor_block_template xor_block_sse = {
-	.name = "sse",
-	.do_2 = xor_sse_2,
-	.do_3 = xor_sse_3,
-	.do_4 = xor_sse_4,
-	.do_5 = xor_sse_5,
+	.name		= "sse",
+	.xor_gen	= xor_gen_sse,
 };
 
+DO_XOR_BLOCKS(sse_pf64_inner, xor_sse_2_pf64, xor_sse_3_pf64, xor_sse_4_pf64,
+		xor_sse_5_pf64);
+
+static void xor_gen_sse_pf64(void *dest, void **srcs, unsigned int src_cnt,
+			unsigned int bytes)
+{
+	kernel_fpu_begin();
+	xor_gen_sse_pf64_inner(dest, srcs, src_cnt, bytes);
+	kernel_fpu_end();
+}
+
 struct xor_block_template xor_block_sse_pf64 = {
-	.name = "prefetch64-sse",
-	.do_2 = xor_sse_2_pf64,
-	.do_3 = xor_sse_3_pf64,
-	.do_4 = xor_sse_4_pf64,
-	.do_5 = xor_sse_5_pf64,
+	.name		= "prefetch64-sse",
+	.xor_gen	= xor_gen_sse_pf64,
 };
diff --git a/lib/raid/xor/xor-32regs-prefetch.c b/lib/raid/xor/xor-32regs-prefetch.c
index 2856a8e50cb8..ade2a7d8cbe2 100644
--- a/lib/raid/xor/xor-32regs-prefetch.c
+++ b/lib/raid/xor/xor-32regs-prefetch.c
@@ -258,10 +258,10 @@ xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
 		goto once_more;
 }
 
+DO_XOR_BLOCKS(32regs_p, xor_32regs_p_2, xor_32regs_p_3, xor_32regs_p_4,
+		xor_32regs_p_5);
+
 struct xor_block_template xor_block_32regs_p = {
-	.name = "32regs_prefetch",
-	.do_2 = xor_32regs_p_2,
-	.do_3 = xor_32regs_p_3,
-	.do_4 = xor_32regs_p_4,
-	.do_5 = xor_32regs_p_5,
+	.name		= "32regs_prefetch",
+	.xor_gen	= xor_gen_32regs_p,
 };
diff --git a/lib/raid/xor/xor-32regs.c b/lib/raid/xor/xor-32regs.c
index cc44d64032fa..acb4a10d1e95 100644
--- a/lib/raid/xor/xor-32regs.c
+++ b/lib/raid/xor/xor-32regs.c
@@ -209,10 +209,9 @@ xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1,
 	} while (--lines > 0);
 }
 
+DO_XOR_BLOCKS(32regs, xor_32regs_2, xor_32regs_3, xor_32regs_4, xor_32regs_5);
+
 struct xor_block_template xor_block_32regs = {
-	.name = "32regs",
-	.do_2 = xor_32regs_2,
-	.do_3 = xor_32regs_3,
-	.do_4 = xor_32regs_4,
-	.do_5 = xor_32regs_5,
+	.name		= "32regs",
+	.xor_gen	= xor_gen_32regs,
 };
diff --git a/lib/raid/xor/xor-8regs-prefetch.c b/lib/raid/xor/xor-8regs-prefetch.c
index 1d53aec50d27..451527a951b1 100644
--- a/lib/raid/xor/xor-8regs-prefetch.c
+++ b/lib/raid/xor/xor-8regs-prefetch.c
@@ -136,10 +136,11 @@ xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
 		goto once_more;
 }
 
+
+DO_XOR_BLOCKS(8regs_p, xor_8regs_p_2, xor_8regs_p_3, xor_8regs_p_4,
+		xor_8regs_p_5);
+
 struct xor_block_template xor_block_8regs_p = {
-	.name = "8regs_prefetch",
-	.do_2 = xor_8regs_p_2,
-	.do_3 = xor_8regs_p_3,
-	.do_4 = xor_8regs_p_4,
-	.do_5 = xor_8regs_p_5,
+	.name		= "8regs_prefetch",
+	.xor_gen	= xor_gen_8regs_p,
 };
diff --git a/lib/raid/xor/xor-8regs.c b/lib/raid/xor/xor-8regs.c
index 72a44e898c55..1edaed8acffe 100644
--- a/lib/raid/xor/xor-8regs.c
+++ b/lib/raid/xor/xor-8regs.c
@@ -94,11 +94,10 @@ xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1,
 }
 
 #ifndef NO_TEMPLATE
+DO_XOR_BLOCKS(8regs, xor_8regs_2, xor_8regs_3, xor_8regs_4, xor_8regs_5);
+
 struct xor_block_template xor_block_8regs = {
-	.name = "8regs",
-	.do_2 = xor_8regs_2,
-	.do_3 = xor_8regs_3,
-	.do_4 = xor_8regs_4,
-	.do_5 = xor_8regs_5,
+	.name		= "8regs",
+	.xor_gen	= xor_gen_8regs,
 };
 #endif /* NO_TEMPLATE */
diff --git a/lib/raid/xor/xor-core.c b/lib/raid/xor/xor-core.c
index 2e46b6b83b0a..9e043d8c3a7a 100644
--- a/lib/raid/xor/xor-core.c
+++ b/lib/raid/xor/xor-core.c
@@ -13,39 +13,9 @@
 #include <linux/preempt.h>
 #include "xor_impl.h"
 
-/* The xor routines to use.  */
+/* The xor routine to use.  */
 static struct xor_block_template *active_template;
 
-void
-xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs)
-{
-	unsigned long *p1, *p2, *p3, *p4;
-
-	WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count());
-
-	p1 = (unsigned long *) srcs[0];
-	if (src_count == 1) {
-		active_template->do_2(bytes, dest, p1);
-		return;
-	}
-
-	p2 = (unsigned long *) srcs[1];
-	if (src_count == 2) {
-		active_template->do_3(bytes, dest, p1, p2);
-		return;
-	}
-
-	p3 = (unsigned long *) srcs[2];
-	if (src_count == 3) {
-		active_template->do_4(bytes, dest, p1, p2, p3);
-		return;
-	}
-
-	p4 = (unsigned long *) srcs[3];
-	active_template->do_5(bytes, dest, p1, p2, p3, p4);
-}
-EXPORT_SYMBOL(xor_blocks);
-
 /**
  * xor_gen - generate RAID-style XOR information
  * @dest:	destination vector
@@ -63,20 +33,11 @@ EXPORT_SYMBOL(xor_blocks);
  */
 void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes)
 {
-	unsigned int src_off = 0;
-
-	WARN_ON_ONCE(in_interrupt());
+	WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count());
 	WARN_ON_ONCE(bytes == 0);
 	WARN_ON_ONCE(bytes & 511);
 
-	while (src_cnt > 0) {
-		unsigned int this_cnt = min(src_cnt, MAX_XOR_BLOCKS);
-
-		xor_blocks(this_cnt, bytes, dest, srcs + src_off);
-
-		src_cnt -= this_cnt;
-		src_off += this_cnt;
-	}
+	active_template->xor_gen(dest, srcs, src_cnt, bytes);
 }
 EXPORT_SYMBOL(xor_gen);
 
@@ -120,6 +81,7 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
 	int speed;
 	unsigned long reps;
 	ktime_t min, start, t0;
+	void *srcs[1] = { b2 };
 
 	preempt_disable();
 
@@ -130,7 +92,7 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
 		cpu_relax();
 	do {
 		mb(); /* prevent loop optimization */
-		tmpl->do_2(BENCH_SIZE, b1, b2);
+		tmpl->xor_gen(b1, srcs, 1, BENCH_SIZE);
 		mb();
 	} while (reps++ < REPS || (t0 = ktime_get()) == start);
 	min = ktime_sub(t0, start);
diff --git a/lib/raid/xor/xor_impl.h b/lib/raid/xor/xor_impl.h
index 44b6c99e2093..09ae2916f71e 100644
--- a/lib/raid/xor/xor_impl.h
+++ b/lib/raid/xor/xor_impl.h
@@ -3,27 +3,47 @@
 #define _XOR_IMPL_H
 
 #include <linux/init.h>
+#include <linux/minmax.h>
 
 struct xor_block_template {
 	struct xor_block_template *next;
 	const char *name;
 	int speed;
-	void (*do_2)(unsigned long, unsigned long * __restrict,
-		     const unsigned long * __restrict);
-	void (*do_3)(unsigned long, unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict);
-	void (*do_4)(unsigned long, unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict);
-	void (*do_5)(unsigned long, unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict,
-		     const unsigned long * __restrict);
+	void (*xor_gen)(void *dest, void **srcs, unsigned int src_cnt,
+			unsigned int bytes);
 };
 
+#define __DO_XOR_BLOCKS(_name, _handle1, _handle2, _handle3, _handle4)	\
+void								\
+xor_gen_##_name(void *dest, void **srcs, unsigned int src_cnt,		\
+		unsigned int bytes)					\
+{									\
+	unsigned int src_off = 0;					\
+									\
+	while (src_cnt > 0) {						\
+		unsigned int this_cnt = min(src_cnt, 4);		\
+									\
+		if (this_cnt == 1)					\
+			_handle1(bytes, dest, srcs[src_off]);		\
+		else if (this_cnt == 2)					\
+			_handle2(bytes, dest, srcs[src_off],		\
+				srcs[src_off + 1]);			\
+		else if (this_cnt == 3)					\
+			_handle3(bytes, dest, srcs[src_off],		\
+				srcs[src_off + 1], srcs[src_off + 2]);	\
+		else							\
+			_handle4(bytes, dest, srcs[src_off],		\
+				srcs[src_off + 1], srcs[src_off + 2],	\
+				srcs[src_off + 3]);			\
+									\
+		src_cnt -= this_cnt;					\
+		src_off += this_cnt;					\
+	}								\
+}
+
+#define DO_XOR_BLOCKS(_name, _handle1, _handle2, _handle3, _handle4)	\
+	static __DO_XOR_BLOCKS(_name, _handle1, _handle2, _handle3, _handle4)
+
 /* generic implementations */
 extern struct xor_block_template xor_block_8regs;
 extern struct xor_block_template xor_block_32regs;
-- 
cgit v1.2.3


From fe74eb289163d10b34f8ee571cdc3257306f343f Mon Sep 17 00:00:00 2001
From: Coiby Xu <coxu@redhat.com>
Date: Wed, 25 Feb 2026 14:03:45 +0800
Subject: crash: align the declaration of crash_load_dm_crypt_keys with
 CONFIG_CRASH_DM_CRYPT

This will prevent a compilation failure when CONFIG_CRASH_DUMP is enabled
but CONFIG_CRASH_DM_CRYPT is disabled,

       arch/powerpc/kexec/elf_64.c: In function 'elf64_load':
    >> arch/powerpc/kexec/elf_64.c:82:23: error: implicit declaration of function 'crash_load_dm_crypt_keys' [-Werror=implicit-function-declaration]
          82 |                 ret = crash_load_dm_crypt_keys(image);
             |                       ^~~~~~~~~~~~~~~~~~~~~~~~
       cc1: some warnings being treated as errors

Link: https://lkml.kernel.org/r/20260225060347.718905-3-coxu@redhat.com
Signed-off-by: Coiby Xu <coxu@redhat.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202602120648.RgQALnnI-lkp@intel.com/
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Arnaud Lefebvre <arnaud.lefebvre@clever-cloud.com>
Cc: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Kairui Song <ryncsn@gmail.com>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Pingfan Liu <kernelfans@gmail.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sourabh Jain <sourabhjain@linux.ibm.com>
Cc: Thomas Staudt <tstaudt@de.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/crash_core.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index d35726d6a415..c1dee3f971a9 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -34,13 +34,6 @@ static inline void arch_kexec_protect_crashkres(void) { }
 static inline void arch_kexec_unprotect_crashkres(void) { }
 #endif
 
-#ifdef CONFIG_CRASH_DM_CRYPT
-int crash_load_dm_crypt_keys(struct kimage *image);
-ssize_t dm_crypt_keys_read(char *buf, size_t count, u64 *ppos);
-#else
-static inline int crash_load_dm_crypt_keys(struct kimage *image) {return 0; }
-#endif
-
 #ifndef arch_crash_handle_hotplug_event
 static inline void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { }
 #endif
@@ -96,4 +89,11 @@ static inline void crash_save_cpu(struct pt_regs *regs, int cpu) {};
 static inline int kimage_crash_copy_vmcoreinfo(struct kimage *image) { return 0; };
 #endif /* CONFIG_CRASH_DUMP*/
 
+#ifdef CONFIG_CRASH_DM_CRYPT
+int crash_load_dm_crypt_keys(struct kimage *image);
+ssize_t dm_crypt_keys_read(char *buf, size_t count, u64 *ppos);
+#else
+static inline int crash_load_dm_crypt_keys(struct kimage *image) {return 0; }
+#endif
+
 #endif /* LINUX_CRASH_CORE_H */
-- 
cgit v1.2.3


From 49eac82653e13243cec5f8c25e35161b922a9c80 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 3 Apr 2026 14:03:37 +0200
Subject: Revert "usb: cdnsp: Add support for device-only configuration"

This reverts commit 7b7f2dd913829e06705035dfc41ca25fa6ec68d3.

There was some problems with an earlier cdns3 change, so this one needs
to be backed out as well.

Cc: Pawel Laszczak <pawell@cadence.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Reported-by: Peter Chen <peter.chen@kernel.org>
Link: https://lore.kernel.org/r/ac+LEWMCQpLSnfoD@nchen-desktop
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/cdns3/cdns3-plat.c | 24 +++++++++------------
 drivers/usb/cdns3/cdnsp-pci.c  | 47 ++++++++----------------------------------
 drivers/usb/cdns3/core.c       |  3 +--
 drivers/usb/cdns3/core.h       |  5 +----
 drivers/usb/cdns3/drd.c        | 16 ++------------
 include/linux/pci_ids.h        |  1 -
 6 files changed, 23 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/cdns3/cdns3-plat.c b/drivers/usb/cdns3/cdns3-plat.c
index 33746e672cda..71c612e27b73 100644
--- a/drivers/usb/cdns3/cdns3-plat.c
+++ b/drivers/usb/cdns3/cdns3-plat.c
@@ -75,7 +75,6 @@ static int cdns3_plat_probe(struct platform_device *pdev)
 	if (cdns->pdata && cdns->pdata->override_apb_timeout)
 		cdns->override_apb_timeout = cdns->pdata->override_apb_timeout;
 
-	cdns->no_drd = device_property_read_bool(dev, "no_drd");
 	platform_set_drvdata(pdev, cdns);
 
 	ret = platform_get_irq_byname(pdev, "host");
@@ -108,23 +107,21 @@ static int cdns3_plat_probe(struct platform_device *pdev)
 
 	cdns->dev_regs	= regs;
 
-	if (!cdns->no_drd) {
-		cdns->otg_irq = platform_get_irq_byname(pdev, "otg");
-		if (cdns->otg_irq < 0)
-			return dev_err_probe(dev, cdns->otg_irq,
-					     "Failed to get otg IRQ\n");
+	cdns->otg_irq = platform_get_irq_byname(pdev, "otg");
+	if (cdns->otg_irq < 0)
+		return dev_err_probe(dev, cdns->otg_irq,
+				     "Failed to get otg IRQ\n");
 
-		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "otg");
-		if (!res) {
-			dev_err(dev, "couldn't get otg resource\n");
-			return -ENXIO;
-		}
-
-		cdns->otg_res = *res;
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "otg");
+	if (!res) {
+		dev_err(dev, "couldn't get otg resource\n");
+		return -ENXIO;
 	}
 
 	cdns->phyrst_a_enable = device_property_read_bool(dev, "cdns,phyrst-a-enable");
 
+	cdns->otg_res = *res;
+
 	cdns->wakeup_irq = platform_get_irq_byname_optional(pdev, "wakeup");
 	if (cdns->wakeup_irq == -EPROBE_DEFER)
 		return cdns->wakeup_irq;
@@ -161,7 +158,6 @@ static int cdns3_plat_probe(struct platform_device *pdev)
 		goto err_cdns_init;
 
 	cdns->gadget_init = cdns3_plat_gadget_init;
-
 	ret = cdns_core_init_role(cdns);
 	if (ret)
 		goto err_cdns_init;
diff --git a/drivers/usb/cdns3/cdnsp-pci.c b/drivers/usb/cdns3/cdnsp-pci.c
index e20c59ceb8a4..432007cfe695 100644
--- a/drivers/usb/cdns3/cdnsp-pci.c
+++ b/drivers/usb/cdns3/cdnsp-pci.c
@@ -19,7 +19,6 @@
 
 struct cdnsp_wrap {
 	struct platform_device *plat_dev;
-	struct property_entry prop[3];
 	struct resource dev_res[6];
 	int devfn;
 };
@@ -30,15 +29,10 @@ struct cdnsp_wrap {
 #define RES_HOST_ID		3
 #define RES_DEV_ID		4
 #define RES_DRD_ID		5
-/* DRD PCI configuration - 64-bit addressing */
-/* First PCI function */
+
 #define PCI_BAR_HOST		0
-#define PCI_BAR_DEV		2
-/* Second PCI function */
 #define PCI_BAR_OTG		0
-/* Device only PCI configuration - 32-bit addressing */
-/* First PCI function */
-#define PCI_BAR_ONLY_DEV	1
+#define PCI_BAR_DEV		2
 
 #define PCI_DEV_FN_HOST_DEVICE	0
 #define PCI_DEV_FN_OTG		1
@@ -71,7 +65,6 @@ static int cdnsp_pci_probe(struct pci_dev *pdev,
 	struct cdnsp_wrap *wrap;
 	struct resource *res;
 	struct pci_dev *func;
-	bool no_drd = false;
 	int ret = 0;
 
 	/*
@@ -82,14 +75,11 @@ static int cdnsp_pci_probe(struct pci_dev *pdev,
 		    pdev->devfn != PCI_DEV_FN_OTG))
 		return -EINVAL;
 
-	if (pdev->device == PCI_DEVICE_ID_CDNS_UDC_USBSSP)
-		no_drd = true;
-
 	func = cdnsp_get_second_fun(pdev);
-	if (!func && !no_drd)
+	if (!func)
 		return -EINVAL;
 
-	if ((func && func->class == PCI_CLASS_SERIAL_USB_XHCI) ||
+	if (func->class == PCI_CLASS_SERIAL_USB_XHCI ||
 	    pdev->class == PCI_CLASS_SERIAL_USB_XHCI) {
 		ret = -EINVAL;
 		goto put_pci;
@@ -103,7 +93,7 @@ static int cdnsp_pci_probe(struct pci_dev *pdev,
 
 	pci_set_master(pdev);
 
-	if (func && pci_is_enabled(func)) {
+	if (pci_is_enabled(func)) {
 		wrap = pci_get_drvdata(func);
 	} else {
 		wrap = kzalloc_obj(*wrap);
@@ -116,13 +106,10 @@ static int cdnsp_pci_probe(struct pci_dev *pdev,
 	res = wrap->dev_res;
 
 	if (pdev->devfn == PCI_DEV_FN_HOST_DEVICE) {
-		int bar_dev = no_drd ? PCI_BAR_ONLY_DEV : PCI_BAR_DEV;
-
 		/* Function 0: host(BAR_0) + device(BAR_2). */
 		dev_dbg(&pdev->dev, "Initialize Device resources\n");
-
-		res[RES_DEV_ID].start = pci_resource_start(pdev, bar_dev);
-		res[RES_DEV_ID].end = pci_resource_end(pdev, bar_dev);
+		res[RES_DEV_ID].start = pci_resource_start(pdev, PCI_BAR_DEV);
+		res[RES_DEV_ID].end = pci_resource_end(pdev, PCI_BAR_DEV);
 		res[RES_DEV_ID].name = "dev";
 		res[RES_DEV_ID].flags = IORESOURCE_MEM;
 		dev_dbg(&pdev->dev, "USBSSP-DEV physical base addr: %pa\n",
@@ -158,20 +145,9 @@ static int cdnsp_pci_probe(struct pci_dev *pdev,
 		wrap->dev_res[RES_IRQ_OTG_ID].flags = IORESOURCE_IRQ;
 	}
 
-	if (no_drd || pci_is_enabled(func)) {
-		u8 idx = 0;
-
+	if (pci_is_enabled(func)) {
 		/* set up platform device info */
 		pdata.override_apb_timeout = CHICKEN_APB_TIMEOUT_VALUE;
-		if (no_drd) {
-			wrap->prop[idx++] = PROPERTY_ENTRY_STRING("dr_mode", "peripheral");
-			wrap->prop[idx++] = PROPERTY_ENTRY_BOOL("no_drd");
-		} else {
-			wrap->prop[idx++] = PROPERTY_ENTRY_STRING("dr_mode", "otg");
-			wrap->prop[idx++] = PROPERTY_ENTRY_BOOL("usb-role-switch");
-		}
-
-		wrap->prop[idx] = (struct property_entry){ };
 		memset(&plat_info, 0, sizeof(plat_info));
 		plat_info.parent = &pdev->dev;
 		plat_info.fwnode = pdev->dev.fwnode;
@@ -182,7 +158,6 @@ static int cdnsp_pci_probe(struct pci_dev *pdev,
 		plat_info.dma_mask = pdev->dma_mask;
 		plat_info.data = &pdata;
 		plat_info.size_data = sizeof(pdata);
-		plat_info.properties = wrap->prop;
 		wrap->devfn = pdev->devfn;
 		/* register platform device */
 		wrap->plat_dev = platform_device_register_full(&plat_info);
@@ -210,17 +185,13 @@ static void cdnsp_pci_remove(struct pci_dev *pdev)
 	if (wrap->devfn == pdev->devfn)
 		platform_device_unregister(wrap->plat_dev);
 
-	if (!func || !pci_is_enabled(func))
+	if (!pci_is_enabled(func))
 		kfree(wrap);
 
 	pci_dev_put(func);
 }
 
 static const struct pci_device_id cdnsp_pci_ids[] = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_CDNS, PCI_DEVICE_ID_CDNS_UDC_USBSSP),
-	  .class = PCI_CLASS_SERIAL_USB_DEVICE },
-	{ PCI_DEVICE(PCI_VENDOR_ID_CDNS, PCI_DEVICE_ID_CDNS_UDC_USBSSP),
-	  .class = PCI_CLASS_SERIAL_USB_CDNS },
 	{ PCI_DEVICE(PCI_VENDOR_ID_CDNS, PCI_DEVICE_ID_CDNS_USBSSP),
 	  .class = PCI_CLASS_SERIAL_USB_DEVICE },
 	{ PCI_DEVICE(PCI_VENDOR_ID_CDNS, PCI_DEVICE_ID_CDNS_USBSSP),
diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c
index 72f7acba6258..10f00b6c3c83 100644
--- a/drivers/usb/cdns3/core.c
+++ b/drivers/usb/cdns3/core.c
@@ -71,8 +71,7 @@ static void cdns_role_stop(struct cdns *cdns)
 static void cdns_exit_roles(struct cdns *cdns)
 {
 	cdns_role_stop(cdns);
-	if (!cdns->no_drd)
-		cdns_drd_exit(cdns);
+	cdns_drd_exit(cdns);
 }
 
 /**
diff --git a/drivers/usb/cdns3/core.h b/drivers/usb/cdns3/core.h
index 6abe231f4559..dc8c4137de15 100644
--- a/drivers/usb/cdns3/core.h
+++ b/drivers/usb/cdns3/core.h
@@ -80,11 +80,9 @@ struct cdns3_platform_data {
  * @pdata: platform data from glue layer
  * @lock: spinlock structure
  * @xhci_plat_data: xhci private data structure pointer
- * @gadget_init: pointer to gadget initialization function
  * @override_apb_timeout: hold value of APB timeout. For value 0 the default
  *                        value in CHICKEN_BITS_3 will be preserved.
- * @no_drd: DRD register block is inaccessible - driver handles only
- *          device mode.
+ * @gadget_init: pointer to gadget initialization function
  */
 struct cdns {
 	struct device			*dev;
@@ -124,7 +122,6 @@ struct cdns {
 	struct xhci_plat_priv		*xhci_plat_data;
 	int (*gadget_init)(struct cdns *cdns);
 	u32                             override_apb_timeout;
-	bool				no_drd;
 };
 
 int cdns_hw_role_switch(struct cdns *cdns);
diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c
index 38f3051c2188..84fb38a5723a 100644
--- a/drivers/usb/cdns3/drd.c
+++ b/drivers/usb/cdns3/drd.c
@@ -107,7 +107,7 @@ void cdns_clear_vbus(struct cdns *cdns)
 {
 	u32 reg;
 
-	if (cdns->version != CDNSP_CONTROLLER_V2 || cdns->no_drd)
+	if (cdns->version != CDNSP_CONTROLLER_V2)
 		return;
 
 	reg = readl(&cdns->otg_cdnsp_regs->override);
@@ -120,7 +120,7 @@ void cdns_set_vbus(struct cdns *cdns)
 {
 	u32 reg;
 
-	if (cdns->version != CDNSP_CONTROLLER_V2 || cdns->no_drd)
+	if (cdns->version != CDNSP_CONTROLLER_V2)
 		return;
 
 	reg = readl(&cdns->otg_cdnsp_regs->override);
@@ -234,9 +234,6 @@ int cdns_drd_gadget_on(struct cdns *cdns)
 	u32 ready_bit;
 	int ret, val;
 
-	if (cdns->no_drd)
-		return 0;
-
 	/* switch OTG core */
 	writel(OTGCMD_DEV_BUS_REQ | reg, &cdns->otg_regs->cmd);
 
@@ -268,9 +265,6 @@ void cdns_drd_gadget_off(struct cdns *cdns)
 {
 	u32 val;
 
-	if (cdns->no_drd)
-		return;
-
 	/*
 	 * Driver should wait at least 10us after disabling Device
 	 * before turning-off Device (DEV_BUS_DROP).
@@ -398,12 +392,6 @@ int cdns_drd_init(struct cdns *cdns)
 	u32 state, reg;
 	int ret;
 
-	if (cdns->no_drd) {
-		cdns->version  = CDNSP_CONTROLLER_V2;
-		cdns->dr_mode = USB_DR_MODE_PERIPHERAL;
-		return 0;
-	}
-
 	regs = devm_ioremap_resource(cdns->dev, &cdns->otg_res);
 	if (IS_ERR(regs))
 		return PTR_ERR(regs);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index a931fb201402..406abf629be2 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2424,7 +2424,6 @@
 #define PCI_DEVICE_ID_CDNS_USBSS	0x0100
 #define PCI_DEVICE_ID_CDNS_USB		0x0120
 #define PCI_DEVICE_ID_CDNS_USBSSP	0x0200
-#define PCI_DEVICE_ID_CDNS_UDC_USBSSP	0x0400
 
 #define PCI_VENDOR_ID_ARECA		0x17d3
 #define PCI_DEVICE_ID_ARECA_1110	0x1110
-- 
cgit v1.2.3


From fa4a1ff8ab235a308d8c983827657a69649185fd Mon Sep 17 00:00:00 2001
From: John Stultz <jstultz@google.com>
Date: Tue, 24 Mar 2026 19:13:19 +0000
Subject: locking: Add task::blocked_lock to serialize blocked_on state

So far, we have been able to utilize the mutex::wait_lock
for serializing the blocked_on state, but when we move to
proxying across runqueues, we will need to add more state
and a way to serialize changes to this state in contexts
where we don't hold the mutex::wait_lock.

So introduce the task::blocked_lock, which nests under the
mutex::wait_lock in the locking order, and rework the locking
to use it.

Signed-off-by: John Stultz <jstultz@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com>
Link: https://patch.msgid.link/20260324191337.1841376-5-jstultz@google.com
---
 include/linux/sched.h        | 48 ++++++++++++++++----------------------------
 init/init_task.c             |  1 +
 kernel/fork.c                |  1 +
 kernel/locking/mutex-debug.c |  4 ++--
 kernel/locking/mutex.c       | 40 +++++++++++++++++++++++-------------
 kernel/locking/mutex.h       |  6 ++++++
 kernel/locking/ww_mutex.h    |  4 ++--
 kernel/sched/core.c          |  4 +++-
 8 files changed, 58 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a5d3dbc9cdf..2eef9bc6daaa 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1238,6 +1238,7 @@ struct task_struct {
 #endif
 
 	struct mutex			*blocked_on;	/* lock we're blocked on */
+	raw_spinlock_t			blocked_lock;
 
 #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
 	/*
@@ -2181,57 +2182,42 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock) __must_hold(lock);
 #ifndef CONFIG_PREEMPT_RT
 static inline struct mutex *__get_task_blocked_on(struct task_struct *p)
 {
-	struct mutex *m = p->blocked_on;
-
-	if (m)
-		lockdep_assert_held_once(&m->wait_lock);
-	return m;
+	lockdep_assert_held_once(&p->blocked_lock);
+	return p->blocked_on;
 }
 
 static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m)
 {
-	struct mutex *blocked_on = READ_ONCE(p->blocked_on);
-
 	WARN_ON_ONCE(!m);
 	/* The task should only be setting itself as blocked */
 	WARN_ON_ONCE(p != current);
-	/* Currently we serialize blocked_on under the mutex::wait_lock */
-	lockdep_assert_held_once(&m->wait_lock);
+	/* Currently we serialize blocked_on under the task::blocked_lock */
+	lockdep_assert_held_once(&p->blocked_lock);
 	/*
 	 * Check ensure we don't overwrite existing mutex value
 	 * with a different mutex. Note, setting it to the same
 	 * lock repeatedly is ok.
 	 */
-	WARN_ON_ONCE(blocked_on && blocked_on != m);
-	WRITE_ONCE(p->blocked_on, m);
-}
-
-static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m)
-{
-	guard(raw_spinlock_irqsave)(&m->wait_lock);
-	__set_task_blocked_on(p, m);
+	WARN_ON_ONCE(p->blocked_on && p->blocked_on != m);
+	p->blocked_on = m;
 }
 
 static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m)
 {
-	if (m) {
-		struct mutex *blocked_on = READ_ONCE(p->blocked_on);
-
-		/* Currently we serialize blocked_on under the mutex::wait_lock */
-		lockdep_assert_held_once(&m->wait_lock);
-		/*
-		 * There may be cases where we re-clear already cleared
-		 * blocked_on relationships, but make sure we are not
-		 * clearing the relationship with a different lock.
-		 */
-		WARN_ON_ONCE(blocked_on && blocked_on != m);
-	}
-	WRITE_ONCE(p->blocked_on, NULL);
+	/* Currently we serialize blocked_on under the task::blocked_lock */
+	lockdep_assert_held_once(&p->blocked_lock);
+	/*
+	 * There may be cases where we re-clear already cleared
+	 * blocked_on relationships, but make sure we are not
+	 * clearing the relationship with a different lock.
+	 */
+	WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m);
+	p->blocked_on = NULL;
 }
 
 static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m)
 {
-	guard(raw_spinlock_irqsave)(&m->wait_lock);
+	guard(raw_spinlock_irqsave)(&p->blocked_lock);
 	__clear_task_blocked_on(p, m);
 }
 #else
diff --git a/init/init_task.c b/init/init_task.c
index 5c838757fc10..b5f48ebdc2b6 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -169,6 +169,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
 	.journal_info	= NULL,
 	INIT_CPU_TIMERS(init_task)
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
+	.blocked_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.blocked_lock),
 	.timer_slack_ns = 50000, /* 50 usec default slack */
 	.thread_pid	= &init_struct_pid,
 	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
diff --git a/kernel/fork.c b/kernel/fork.c
index bc2bf58b93b6..079802cb6100 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2076,6 +2076,7 @@ __latent_entropy struct task_struct *copy_process(
 	ftrace_graph_init_task(p);
 
 	rt_mutex_init_task(p);
+	raw_spin_lock_init(&p->blocked_lock);
 
 	lockdep_assert_irqs_enabled();
 #ifdef CONFIG_PROVE_LOCKING
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 2c6b02d4699b..cc6aa9c6e981 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -54,13 +54,13 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 	lockdep_assert_held(&lock->wait_lock);
 
 	/* Current thread can't be already blocked (since it's executing!) */
-	DEBUG_LOCKS_WARN_ON(__get_task_blocked_on(task));
+	DEBUG_LOCKS_WARN_ON(get_task_blocked_on(task));
 }
 
 void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 			 struct task_struct *task)
 {
-	struct mutex *blocked_on = __get_task_blocked_on(task);
+	struct mutex *blocked_on = get_task_blocked_on(task);
 
 	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
 	DEBUG_LOCKS_WARN_ON(waiter->task != task);
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 2a1d165b3167..4aa79bcab08c 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -656,6 +656,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
 			goto err_early_kill;
 	}
 
+	raw_spin_lock(&current->blocked_lock);
 	__set_task_blocked_on(current, lock);
 	set_current_state(state);
 	trace_contention_begin(lock, LCB_F_MUTEX);
@@ -669,8 +670,9 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
 		 * the handoff.
 		 */
 		if (__mutex_trylock(lock))
-			goto acquired;
+			break;
 
+		raw_spin_unlock(&current->blocked_lock);
 		/*
 		 * Check for signals and kill conditions while holding
 		 * wait_lock. This ensures the lock cancellation is ordered
@@ -693,12 +695,14 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
 
 		first = __mutex_waiter_is_first(lock, &waiter);
 
+		raw_spin_lock_irqsave(&lock->wait_lock, flags);
+		raw_spin_lock(&current->blocked_lock);
 		/*
 		 * As we likely have been woken up by task
 		 * that has cleared our blocked_on state, re-set
 		 * it to the lock we are trying to acquire.
 		 */
-		set_task_blocked_on(current, lock);
+		__set_task_blocked_on(current, lock);
 		set_current_state(state);
 		/*
 		 * Here we order against unlock; we must either see it change
@@ -709,25 +713,33 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
 			break;
 
 		if (first) {
-			trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN);
+			bool opt_acquired;
+
 			/*
 			 * mutex_optimistic_spin() can call schedule(), so
-			 * clear blocked on so we don't become unselectable
+			 * we need to release these locks before calling it,
+			 * and clear blocked on so we don't become unselectable
 			 * to run.
 			 */
-			clear_task_blocked_on(current, lock);
-			if (mutex_optimistic_spin(lock, ww_ctx, &waiter))
+			__clear_task_blocked_on(current, lock);
+			raw_spin_unlock(&current->blocked_lock);
+			raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+			trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN);
+			opt_acquired = mutex_optimistic_spin(lock, ww_ctx, &waiter);
+
+			raw_spin_lock_irqsave(&lock->wait_lock, flags);
+			raw_spin_lock(&current->blocked_lock);
+			__set_task_blocked_on(current, lock);
+
+			if (opt_acquired)
 				break;
-			set_task_blocked_on(current, lock);
 			trace_contention_begin(lock, LCB_F_MUTEX);
 		}
-
-		raw_spin_lock_irqsave(&lock->wait_lock, flags);
 	}
-	raw_spin_lock_irqsave(&lock->wait_lock, flags);
-acquired:
 	__clear_task_blocked_on(current, lock);
 	__set_current_state(TASK_RUNNING);
+	raw_spin_unlock(&current->blocked_lock);
 
 	if (ww_ctx) {
 		/*
@@ -756,11 +768,11 @@ skip_wait:
 	return 0;
 
 err:
-	__clear_task_blocked_on(current, lock);
+	clear_task_blocked_on(current, lock);
 	__set_current_state(TASK_RUNNING);
 	__mutex_remove_waiter(lock, &waiter);
 err_early_kill:
-	WARN_ON(__get_task_blocked_on(current));
+	WARN_ON(get_task_blocked_on(current));
 	trace_contention_end(lock, ret);
 	raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q);
 	debug_mutex_free_waiter(&waiter);
@@ -971,7 +983,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
 		next = waiter->task;
 
 		debug_mutex_wake_waiter(lock, waiter);
-		__clear_task_blocked_on(next, lock);
+		clear_task_blocked_on(next, lock);
 		wake_q_add(&wake_q, next);
 	}
 
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 9ad4da8cea00..7a8ba13fee94 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -47,6 +47,12 @@ static inline struct task_struct *__mutex_owner(struct mutex *lock)
 	return (struct task_struct *)(atomic_long_read(&lock->owner) & ~MUTEX_FLAGS);
 }
 
+static inline struct mutex *get_task_blocked_on(struct task_struct *p)
+{
+	guard(raw_spinlock_irqsave)(&p->blocked_lock);
+	return __get_task_blocked_on(p);
+}
+
 #ifdef CONFIG_DEBUG_MUTEXES
 extern void debug_mutex_lock_common(struct mutex *lock,
 				    struct mutex_waiter *waiter);
diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
index 31a785afee6c..e4a81790ea7d 100644
--- a/kernel/locking/ww_mutex.h
+++ b/kernel/locking/ww_mutex.h
@@ -289,7 +289,7 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
 		 * blocked_on pointer. Otherwise we can see circular
 		 * blocked_on relationships that can't resolve.
 		 */
-		__clear_task_blocked_on(waiter->task, lock);
+		clear_task_blocked_on(waiter->task, lock);
 		wake_q_add(wake_q, waiter->task);
 	}
 
@@ -347,7 +347,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
 			 * are waking the mutex owner, who may be currently
 			 * blocked on a different mutex.
 			 */
-			__clear_task_blocked_on(owner, NULL);
+			clear_task_blocked_on(owner, NULL);
 			wake_q_add(wake_q, owner);
 		}
 		return true;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5b7f378af042..1913dbc68eb9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6584,6 +6584,7 @@ static struct task_struct *proxy_deactivate(struct rq *rq, struct task_struct *d
  *   p->pi_lock
  *     rq->lock
  *       mutex->wait_lock
+ *         p->blocked_lock
  *
  * Returns the task that is going to be used as execution context (the one
  * that is actually going to be run on cpu_of(rq)).
@@ -6603,8 +6604,9 @@ find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
 		 * and ensure @owner sticks around.
 		 */
 		guard(raw_spinlock)(&mutex->wait_lock);
+		guard(raw_spinlock)(&p->blocked_lock);
 
-		/* Check again that p is blocked with wait_lock held */
+		/* Check again that p is blocked with blocked_lock held */
 		if (mutex != __get_task_blocked_on(p)) {
 			/*
 			 * Something changed in the blocked_on chain and
-- 
cgit v1.2.3


From 2d7622669836dcbbb449741b4e6c503ffe005c25 Mon Sep 17 00:00:00 2001
From: John Stultz <jstultz@google.com>
Date: Tue, 24 Mar 2026 19:13:21 +0000
Subject: sched/locking: Add special p->blocked_on==PROXY_WAKING value for
 proxy return-migration

As we add functionality to proxy execution, we may migrate a
donor task to a runqueue where it can't run due to cpu affinity.
Thus, we must be careful to ensure we return-migrate the task
back to a cpu in its cpumask when it becomes unblocked.

Peter helpfully provided the following example with pictures:
"Suppose we have a ww_mutex cycle:

                  ,-+-* Mutex-1 <-.
        Task-A ---' |             | ,-- Task-B
                    `-> Mutex-2 *-+-'

Where Task-A holds Mutex-1 and tries to acquire Mutex-2, and
where Task-B holds Mutex-2 and tries to acquire Mutex-1.

Then the blocked_on->owner chain will go in circles.

        Task-A  -> Mutex-2
          ^          |
          |          v
        Mutex-1 <- Task-B

We need two things:

 - find_proxy_task() to stop iterating the circle;

 - the woken task to 'unblock' and run, such that it can
   back-off and re-try the transaction.

Now, the current code [without this patch] does:
        __clear_task_blocked_on();
        wake_q_add();

And surely clearing ->blocked_on is sufficient to break the
cycle.

Suppose it is Task-B that is made to back-off, then we have:

  Task-A -> Mutex-2 -> Task-B (no further blocked_on)

and it would attempt to run Task-B. Or worse, it could directly
pick Task-B and run it, without ever getting into
find_proxy_task().

Now, here is a problem because Task-B might not be runnable on
the CPU it is currently on; and because !task_is_blocked() we
don't get into the proxy paths, so nobody is going to fix this
up.

Ideally we would have dequeued Task-B alongside of clearing
->blocked_on, but alas, [the lock ordering prevents us from
getting the task_rq_lock() and] spoils things."

Thus we need more than just a binary concept of the task being
blocked on a mutex or not.

So allow setting blocked_on to PROXY_WAKING as a special value
which specifies the task is no longer blocked, but needs to
be evaluated for return migration *before* it can be run.

This will then be used in a later patch to handle proxy
return-migration.

Signed-off-by: John Stultz <jstultz@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com>
Link: https://patch.msgid.link/20260324191337.1841376-7-jstultz@google.com
---
 include/linux/sched.h     | 51 +++++++++++++++++++++++++++++++++++++++++++++--
 kernel/locking/mutex.c    |  2 +-
 kernel/locking/ww_mutex.h | 16 +++++++--------
 kernel/sched/core.c       | 16 +++++++++++++++
 4 files changed, 74 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2eef9bc6daaa..8ec3b6d7d718 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2180,10 +2180,20 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock) __must_hold(lock);
 })
 
 #ifndef CONFIG_PREEMPT_RT
+
+/*
+ * With proxy exec, if a task has been proxy-migrated, it may be a donor
+ * on a cpu that it can't actually run on. Thus we need a special state
+ * to denote that the task is being woken, but that it needs to be
+ * evaluated for return-migration before it is run. So if the task is
+ * blocked_on PROXY_WAKING, return migrate it before running it.
+ */
+#define PROXY_WAKING ((struct mutex *)(-1L))
+
 static inline struct mutex *__get_task_blocked_on(struct task_struct *p)
 {
 	lockdep_assert_held_once(&p->blocked_lock);
-	return p->blocked_on;
+	return p->blocked_on == PROXY_WAKING ? NULL : p->blocked_on;
 }
 
 static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m)
@@ -2211,7 +2221,7 @@ static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *
 	 * blocked_on relationships, but make sure we are not
 	 * clearing the relationship with a different lock.
 	 */
-	WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m);
+	WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m && p->blocked_on != PROXY_WAKING);
 	p->blocked_on = NULL;
 }
 
@@ -2220,6 +2230,35 @@ static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m)
 	guard(raw_spinlock_irqsave)(&p->blocked_lock);
 	__clear_task_blocked_on(p, m);
 }
+
+static inline void __set_task_blocked_on_waking(struct task_struct *p, struct mutex *m)
+{
+	/* Currently we serialize blocked_on under the task::blocked_lock */
+	lockdep_assert_held_once(&p->blocked_lock);
+
+	if (!sched_proxy_exec()) {
+		__clear_task_blocked_on(p, m);
+		return;
+	}
+
+	/* Don't set PROXY_WAKING if blocked_on was already cleared */
+	if (!p->blocked_on)
+		return;
+	/*
+	 * There may be cases where we set PROXY_WAKING on tasks that were
+	 * already set to waking, but make sure we are not changing
+	 * the relationship with a different lock.
+	 */
+	WARN_ON_ONCE(m && p->blocked_on != m && p->blocked_on != PROXY_WAKING);
+	p->blocked_on = PROXY_WAKING;
+}
+
+static inline void set_task_blocked_on_waking(struct task_struct *p, struct mutex *m)
+{
+	guard(raw_spinlock_irqsave)(&p->blocked_lock);
+	__set_task_blocked_on_waking(p, m);
+}
+
 #else
 static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m)
 {
@@ -2228,6 +2267,14 @@ static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mute
 static inline void clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m)
 {
 }
+
+static inline void __set_task_blocked_on_waking(struct task_struct *p, struct rt_mutex *m)
+{
+}
+
+static inline void set_task_blocked_on_waking(struct task_struct *p, struct rt_mutex *m)
+{
+}
 #endif /* !CONFIG_PREEMPT_RT */
 
 static __always_inline bool need_resched(void)
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 4aa79bcab08c..7d359647156d 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -983,7 +983,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
 		next = waiter->task;
 
 		debug_mutex_wake_waiter(lock, waiter);
-		clear_task_blocked_on(next, lock);
+		set_task_blocked_on_waking(next, lock);
 		wake_q_add(&wake_q, next);
 	}
 
diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
index e4a81790ea7d..5cd9dfa4b31e 100644
--- a/kernel/locking/ww_mutex.h
+++ b/kernel/locking/ww_mutex.h
@@ -285,11 +285,11 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
 		debug_mutex_wake_waiter(lock, waiter);
 #endif
 		/*
-		 * When waking up the task to die, be sure to clear the
-		 * blocked_on pointer. Otherwise we can see circular
-		 * blocked_on relationships that can't resolve.
+		 * When waking up the task to die, be sure to set the
+		 * blocked_on to PROXY_WAKING. Otherwise we can see
+		 * circular blocked_on relationships that can't resolve.
 		 */
-		clear_task_blocked_on(waiter->task, lock);
+		set_task_blocked_on_waking(waiter->task, lock);
 		wake_q_add(wake_q, waiter->task);
 	}
 
@@ -339,15 +339,15 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
 		 */
 		if (owner != current) {
 			/*
-			 * When waking up the task to wound, be sure to clear the
-			 * blocked_on pointer. Otherwise we can see circular
-			 * blocked_on relationships that can't resolve.
+			 * When waking up the task to wound, be sure to set the
+			 * blocked_on to PROXY_WAKING. Otherwise we can see
+			 * circular blocked_on relationships that can't resolve.
 			 *
 			 * NOTE: We pass NULL here instead of lock, because we
 			 * are waking the mutex owner, who may be currently
 			 * blocked on a different mutex.
 			 */
-			clear_task_blocked_on(owner, NULL);
+			set_task_blocked_on_waking(owner, NULL);
 			wake_q_add(wake_q, owner);
 		}
 		return true;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bf4338f71667..c997d516441d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4239,6 +4239,13 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 		ttwu_queue(p, cpu, wake_flags);
 	}
 out:
+	/*
+	 * For now, if we've been woken up, clear the task->blocked_on
+	 * regardless if it was set to a mutex or PROXY_WAKING so the
+	 * task can run. We will need to be more careful later when
+	 * properly handling proxy migration
+	 */
+	clear_task_blocked_on(p, NULL);
 	if (success)
 		ttwu_stat(p, task_cpu(p), wake_flags);
 
@@ -6600,6 +6607,10 @@ find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
 
 	/* Follow blocked_on chain. */
 	for (p = donor; (mutex = p->blocked_on); p = owner) {
+		/* if its PROXY_WAKING, resched_idle so ttwu can complete */
+		if (mutex == PROXY_WAKING)
+			return proxy_resched_idle(rq);
+
 		/*
 		 * By taking mutex->wait_lock we hold off concurrent mutex_unlock()
 		 * and ensure @owner sticks around.
@@ -6620,6 +6631,11 @@ find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
 
 		owner = __mutex_owner(mutex);
 		if (!owner) {
+			/*
+			 * If there is no owner, clear blocked_on
+			 * and return p so it can run and try to
+			 * acquire the lock
+			 */
 			__clear_task_blocked_on(p, mutex);
 			return p;
 		}
-- 
cgit v1.2.3


From 4e2866b2baaddfff6069a2f18fc134c1d5a08f2b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 11 Mar 2026 12:18:54 -0400
Subject: SUNRPC: Add svc_rqst_page_release() helper

svc_rqst_replace_page() releases displaced pages through a
per-rqst folio batch, but exposes the add-or-flush sequence
directly. svc_tcp_restore_pages() releases displaced pages
individually with put_page().

Introduce svc_rqst_page_release() to encapsulate the
batched release mechanism. Convert svc_rqst_replace_page()
and svc_tcp_restore_pages() to use it. The latter now
benefits from the same batched release that
svc_rqst_replace_page() already uses.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 15 +++++++++++++++
 net/sunrpc/svc.c           |  7 ++-----
 net/sunrpc/svcsock.c       |  2 +-
 3 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 669c944eaf7f..1ebd9c7efa70 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -498,6 +498,21 @@ int		   svc_generic_rpcbind_set(struct net *net,
 
 #define	RPC_MAX_ADDRBUFLEN	(63U)
 
+/**
+ * svc_rqst_page_release - release a page associated with an RPC transaction
+ * @rqstp: RPC transaction context
+ * @page: page to release
+ *
+ * Released pages are batched and freed together, reducing
+ * allocator pressure under heavy RPC workloads.
+ */
+static inline void svc_rqst_page_release(struct svc_rqst *rqstp,
+					 struct page *page)
+{
+	if (!folio_batch_add(&rqstp->rq_fbatch, page_folio(page)))
+		__folio_batch_release(&rqstp->rq_fbatch);
+}
+
 /*
  * When we want to reduce the size of the reserved space in the response
  * buffer, we need to take into account the size of any checksum data that
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 5e0b5ec2fd52..576fa42e7abf 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -976,11 +976,8 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
 		return false;
 	}
 
-	if (*rqstp->rq_next_page) {
-		if (!folio_batch_add(&rqstp->rq_fbatch,
-				page_folio(*rqstp->rq_next_page)))
-			__folio_batch_release(&rqstp->rq_fbatch);
-	}
+	if (*rqstp->rq_next_page)
+		svc_rqst_page_release(rqstp, *rqstp->rq_next_page);
 
 	get_page(page);
 	*(rqstp->rq_next_page++) = page;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2ce43f9995f1..7be3de1a1aed 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -988,7 +988,7 @@ static size_t svc_tcp_restore_pages(struct svc_sock *svsk,
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		if (rqstp->rq_pages[i] != NULL)
-			put_page(rqstp->rq_pages[i]);
+			svc_rqst_page_release(rqstp, rqstp->rq_pages[i]);
 		BUG_ON(svsk->sk_pages[i] == NULL);
 		rqstp->rq_pages[i] = svsk->sk_pages[i];
 		svsk->sk_pages[i] = NULL;
-- 
cgit v1.2.3


From e6898ec751e4d8577b210f8e816ea9f8c2a7158a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 2 Apr 2026 19:44:17 -0700
Subject: bpf: Sort subprogs in topological order after check_cfg()

Add a pass that sorts subprogs in topological order so that iterating
subprog_topo_order[] walks leaf subprogs first, then their callers.
This is computed as a DFS post-order traversal of the CFG.

The pass runs after check_cfg() to ensure the CFG has been validated
before traversing and after postorder has been computed to avoid
walking dead code.

Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260403024422.87231-3-alexei.starovoitov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h                       |  2 +
 kernel/bpf/verifier.c                              | 92 ++++++++++++++++++++++
 .../testing/selftests/bpf/progs/verifier_loops1.c  |  3 +-
 tools/testing/selftests/bpf/verifier/calls.c       |  6 +-
 4 files changed, 98 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index b129e0aaee20..d21541f96ee9 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -787,6 +787,8 @@ struct bpf_verifier_env {
 	const struct bpf_line_info *prev_linfo;
 	struct bpf_verifier_log log;
 	struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 2]; /* max + 2 for the fake and exception subprogs */
+	/* subprog indices sorted in topological order: leaves first, callers last */
+	int subprog_topo_order[BPF_MAX_SUBPROGS + 2];
 	union {
 		struct bpf_idmap idmap_scratch;
 		struct bpf_idset idset_scratch;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9de49d43c21d..f457235c874c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3770,6 +3770,94 @@ next:
 	return 0;
 }
 
+/*
+ * Sort subprogs in topological order so that leaf subprogs come first and
+ * their callers come later. This is a DFS post-order traversal of the call
+ * graph. Scan only reachable instructions (those in the computed postorder) of
+ * the current subprog to discover callees (direct subprogs and sync
+ * callbacks).
+ */
+static int sort_subprogs_topo(struct bpf_verifier_env *env)
+{
+	struct bpf_subprog_info *si = env->subprog_info;
+	int *insn_postorder = env->cfg.insn_postorder;
+	struct bpf_insn *insn = env->prog->insnsi;
+	int cnt = env->subprog_cnt;
+	int *dfs_stack = NULL;
+	int top = 0, order = 0;
+	int i, ret = 0;
+	u8 *color = NULL;
+
+	color = kvzalloc_objs(*color, cnt, GFP_KERNEL_ACCOUNT);
+	dfs_stack = kvmalloc_objs(*dfs_stack, cnt, GFP_KERNEL_ACCOUNT);
+	if (!color || !dfs_stack) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * DFS post-order traversal.
+	 * Color values: 0 = unvisited, 1 = on stack, 2 = done.
+	 */
+	for (i = 0; i < cnt; i++) {
+		if (color[i])
+			continue;
+		color[i] = 1;
+		dfs_stack[top++] = i;
+
+		while (top > 0) {
+			int cur = dfs_stack[top - 1];
+			int po_start = si[cur].postorder_start;
+			int po_end = si[cur + 1].postorder_start;
+			bool pushed = false;
+			int j;
+
+			for (j = po_start; j < po_end; j++) {
+				int idx = insn_postorder[j];
+				int callee;
+
+				if (!bpf_pseudo_call(&insn[idx]) && !bpf_pseudo_func(&insn[idx]))
+					continue;
+				callee = find_subprog(env, idx + insn[idx].imm + 1);
+				if (callee < 0) {
+					ret = -EFAULT;
+					goto out;
+				}
+				if (color[callee] == 2)
+					continue;
+				if (color[callee] == 1) {
+					if (bpf_pseudo_func(&insn[idx]))
+						continue;
+					verbose(env, "recursive call from %s() to %s()\n",
+						subprog_name(env, cur),
+						subprog_name(env, callee));
+					ret = -EINVAL;
+					goto out;
+				}
+				color[callee] = 1;
+				dfs_stack[top++] = callee;
+				pushed = true;
+				break;
+			}
+
+			if (!pushed) {
+				color[cur] = 2;
+				env->subprog_topo_order[order++] = cur;
+				top--;
+			}
+		}
+	}
+
+	if (env->log.level & BPF_LOG_LEVEL2)
+		for (i = 0; i < cnt; i++)
+			verbose(env, "topo_order[%d] = %s\n",
+				i, subprog_name(env, env->subprog_topo_order[i]));
+out:
+	kvfree(dfs_stack);
+	kvfree(color);
+	return ret;
+}
+
 static int mark_stack_slot_obj_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 				    int spi, int nr_slots)
 {
@@ -26320,6 +26408,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (ret)
 		goto skip_full_check;
 
+	ret = sort_subprogs_topo(env);
+	if (ret < 0)
+		goto skip_full_check;
+
 	ret = compute_scc(env);
 	if (ret < 0)
 		goto skip_full_check;
diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c
index fbdde80e7b90..d248ce877f14 100644
--- a/tools/testing/selftests/bpf/progs/verifier_loops1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c
@@ -138,8 +138,7 @@ l0_%=:	exit;						\
 SEC("tracepoint")
 __description("bounded recursion")
 __failure
-/* verifier limitation in detecting max stack depth */
-__msg("the call stack of 8 frames is too deep !")
+__msg("recursive call from")
 __naked void bounded_recursion(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 29e57f0e56c3..c3164b9b2be5 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -455,7 +455,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-	.errstr = "the call stack of 9 frames is too deep",
+	.errstr = "recursive call",
 	.result = REJECT,
 },
 {
@@ -812,7 +812,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-	.errstr = "the call stack of 9 frames is too deep",
+	.errstr = "recursive call",
 	.result = REJECT,
 },
 {
@@ -824,7 +824,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-	.errstr = "the call stack of 9 frames is too deep",
+	.errstr = "recursive call",
 	.result = REJECT,
 },
 {
-- 
cgit v1.2.3


From f1606dd0ac49230f5a5fa1a279210fdf0249c20f Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 2 Apr 2026 19:44:19 -0700
Subject: bpf: Add bpf_compute_const_regs() and bpf_prune_dead_branches()
 passes

Add two passes before the main verifier pass:

bpf_compute_const_regs() is a forward dataflow analysis that tracks
register values in R0-R9 across the program using fixed-point
iteration in reverse postorder. Each register is tracked with
a six-state lattice:

  UNVISITED -> CONST(val) / MAP_PTR(map_index) /
               MAP_VALUE(map_index, offset) / SUBPROG(num) -> UNKNOWN

At merge points, if two paths produce the same state and value for
a register, it stays; otherwise it becomes UNKNOWN.

The analysis handles:
 - MOV, ADD, SUB, AND with immediate or register operands
 - LD_IMM64 for plain constants, map FDs, map values, and subprogs
 - LDX from read-only maps: constant-folds the load by reading the
   map value directly via bpf_map_direct_read()

Results that fit in 32 bits are stored per-instruction in
insn_aux_data and bitmasks.

bpf_prune_dead_branches() uses the computed constants to evaluate
conditional branches. When both operands of a conditional jump are
known constants, the branch outcome is determined statically and the
instruction is rewritten to an unconditional jump.
The CFG postorder is then recomputed to reflect new control flow.
This eliminates dead edges so that subsequent liveness analysis
doesn't propagate through dead code.

Also add runtime sanity check to validate that precomputed
constants match the verifier's tracked state.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260403024422.87231-5-alexei.starovoitov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h                       |  23 ++
 kernel/bpf/Makefile                                |   2 +-
 kernel/bpf/const_fold.c                            | 396 +++++++++++++++++++++
 kernel/bpf/verifier.c                              |  75 ++--
 .../selftests/bpf/progs/verifier_scalar_ids.c      |  20 +-
 .../testing/selftests/bpf/progs/verifier_unpriv.c  |   6 +-
 6 files changed, 490 insertions(+), 32 deletions(-)
 create mode 100644 kernel/bpf/const_fold.c

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index d21541f96ee9..c5e65cdb6328 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -595,6 +595,18 @@ struct bpf_insn_aux_data {
 	u32 scc;
 	/* registers alive before this instruction. */
 	u16 live_regs_before;
+	/*
+	 * Bitmask of R0-R9 that hold known values at this instruction.
+	 * const_reg_mask: scalar constants that fit in 32 bits.
+	 * const_reg_map_mask: map pointers, val is map_index into used_maps[].
+	 * const_reg_subprog_mask: subprog pointers, val is subprog number.
+	 * const_reg_vals[i] holds the 32-bit value for register i.
+	 * Populated by compute_const_regs() pre-pass.
+	 */
+	u16 const_reg_mask;
+	u16 const_reg_map_mask;
+	u16 const_reg_subprog_mask;
+	u32 const_reg_vals[10];
 };
 
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
@@ -945,6 +957,10 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab);
 
 int mark_chain_precision(struct bpf_verifier_env *env, int regno);
 
+bool bpf_map_is_rdonly(const struct bpf_map *map);
+int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
+			bool is_ldsx);
+
 #define BPF_BASE_TYPE_MASK	GENMASK(BPF_BASE_TYPE_BITS - 1, 0)
 
 /* extract base type from bpf_{arg, return, reg}_type. */
@@ -1088,6 +1104,13 @@ struct bpf_iarray *bpf_insn_successors(struct bpf_verifier_env *env, u32 idx);
 void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask);
 bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx);
 
+int bpf_find_subprog(struct bpf_verifier_env *env, int off);
+int bpf_compute_const_regs(struct bpf_verifier_env *env);
+int bpf_prune_dead_branches(struct bpf_verifier_env *env);
+int bpf_compute_postorder(struct bpf_verifier_env *env);
+bool bpf_insn_is_cond_jump(u8 code);
+bool bpf_is_may_goto_insn(struct bpf_insn *insn);
+
 int bpf_stack_liveness_init(struct bpf_verifier_env *env);
 void bpf_stack_liveness_free(struct bpf_verifier_env *env);
 int bpf_update_live_stack(struct bpf_verifier_env *env);
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 79cf22860a99..b8ae7b0988a4 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
 endif
 CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy)
 
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o liveness.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o liveness.o const_fold.o
 obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_insn_array.o
diff --git a/kernel/bpf/const_fold.c b/kernel/bpf/const_fold.c
new file mode 100644
index 000000000000..db73c4740b1e
--- /dev/null
+++ b/kernel/bpf/const_fold.c
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf_verifier.h>
+
+/*
+ * Forward dataflow analysis to determine constant register values at every
+ * instruction. Tracks 64-bit constant values in R0-R9 through the program,
+ * using a fixed-point iteration in reverse postorder. Records which registers
+ * hold known constants and their values in
+ * env->insn_aux_data[].{const_reg_mask, const_reg_vals}.
+ */
+
+enum const_arg_state {
+	CONST_ARG_UNVISITED,	/* instruction not yet reached */
+	CONST_ARG_UNKNOWN,	/* register value not a known constant */
+	CONST_ARG_CONST,	/* register holds a known 64-bit constant */
+	CONST_ARG_MAP_PTR,	/* register holds a map pointer, map_index is set */
+	CONST_ARG_MAP_VALUE,	/* register points to map value data, val is offset */
+	CONST_ARG_SUBPROG,	/* register holds a subprog pointer, val is subprog number */
+};
+
+struct const_arg_info {
+	enum const_arg_state state;
+	u32 map_index;
+	u64 val;
+};
+
+static bool ci_is_unvisited(const struct const_arg_info *ci)
+{
+	return ci->state == CONST_ARG_UNVISITED;
+}
+
+static bool ci_is_unknown(const struct const_arg_info *ci)
+{
+	return ci->state == CONST_ARG_UNKNOWN;
+}
+
+static bool ci_is_const(const struct const_arg_info *ci)
+{
+	return ci->state == CONST_ARG_CONST;
+}
+
+static bool ci_is_map_value(const struct const_arg_info *ci)
+{
+	return ci->state == CONST_ARG_MAP_VALUE;
+}
+
+/* Transfer function: compute output register state from instruction. */
+static void const_reg_xfer(struct bpf_verifier_env *env, struct const_arg_info *ci_out,
+			   struct bpf_insn *insn, struct bpf_insn *insns, int idx)
+{
+	struct const_arg_info unknown = { .state = CONST_ARG_UNKNOWN, .val = 0 };
+	struct const_arg_info *dst = &ci_out[insn->dst_reg];
+	struct const_arg_info *src = &ci_out[insn->src_reg];
+	u8 class = BPF_CLASS(insn->code);
+	u8 mode = BPF_MODE(insn->code);
+	u8 opcode = BPF_OP(insn->code) | BPF_SRC(insn->code);
+	int r;
+
+	switch (class) {
+	case BPF_ALU:
+	case BPF_ALU64:
+		switch (opcode) {
+		case BPF_MOV | BPF_K:
+			dst->state = CONST_ARG_CONST;
+			dst->val = (s64)insn->imm;
+			break;
+		case BPF_MOV | BPF_X:
+			*dst = *src;
+			if (!insn->off)
+				break;
+			if (!ci_is_const(dst)) {
+				*dst = unknown;
+				break;
+			}
+			switch (insn->off) {
+			case 8:  dst->val = (s8)dst->val; break;
+			case 16: dst->val = (s16)dst->val; break;
+			case 32: dst->val = (s32)dst->val; break;
+			default: *dst = unknown; break;
+			}
+			break;
+		case BPF_ADD | BPF_K:
+			if (!ci_is_const(dst) && !ci_is_map_value(dst)) {
+				*dst = unknown;
+				break;
+			}
+			dst->val += insn->imm;
+			break;
+		case BPF_SUB | BPF_K:
+			if (!ci_is_const(dst) && !ci_is_map_value(dst)) {
+				*dst = unknown;
+				break;
+			}
+			dst->val -= insn->imm;
+			break;
+		case BPF_AND | BPF_K:
+			if (!ci_is_const(dst)) {
+				if (!insn->imm) {
+					dst->state = CONST_ARG_CONST;
+					dst->val = 0;
+				} else {
+					*dst = unknown;
+				}
+				break;
+			}
+			dst->val &= (s64)insn->imm;
+			break;
+		case BPF_AND | BPF_X:
+			if (ci_is_const(dst) && dst->val == 0)
+				break; /* 0 & x == 0 */
+			if (ci_is_const(src) && src->val == 0) {
+				dst->state = CONST_ARG_CONST;
+				dst->val = 0;
+				break;
+			}
+			if (!ci_is_const(dst) || !ci_is_const(src)) {
+				*dst = unknown;
+				break;
+			}
+			dst->val &= src->val;
+			break;
+		default:
+			*dst = unknown;
+			break;
+		}
+		if (class == BPF_ALU) {
+			if (ci_is_const(dst))
+				dst->val = (u32)dst->val;
+			else if (!ci_is_unknown(dst))
+				*dst = unknown;
+		}
+		break;
+	case BPF_LD:
+		if (mode == BPF_ABS || mode == BPF_IND)
+			goto process_call;
+		if (mode != BPF_IMM || BPF_SIZE(insn->code) != BPF_DW)
+			break;
+		if (insn->src_reg == BPF_PSEUDO_FUNC) {
+			int subprog = bpf_find_subprog(env, idx + insn->imm + 1);
+
+			if (subprog >= 0) {
+				dst->state = CONST_ARG_SUBPROG;
+				dst->val = subprog;
+			} else {
+				*dst = unknown;
+			}
+		} else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
+			   insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
+			dst->state = CONST_ARG_MAP_VALUE;
+			dst->map_index = env->insn_aux_data[idx].map_index;
+			dst->val = env->insn_aux_data[idx].map_off;
+		} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
+			   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
+			dst->state = CONST_ARG_MAP_PTR;
+			dst->map_index = env->insn_aux_data[idx].map_index;
+		} else if (insn->src_reg == 0) {
+			dst->state = CONST_ARG_CONST;
+			dst->val = (u64)(u32)insn->imm | ((u64)(u32)insns[idx + 1].imm << 32);
+		} else {
+			*dst = unknown;
+		}
+		break;
+	case BPF_LDX:
+		if (!ci_is_map_value(src)) {
+			*dst = unknown;
+			break;
+		}
+		struct bpf_map *map = env->used_maps[src->map_index];
+		int size = bpf_size_to_bytes(BPF_SIZE(insn->code));
+		bool is_ldsx = mode == BPF_MEMSX;
+		int off = src->val + insn->off;
+		u64 val = 0;
+
+		if (!bpf_map_is_rdonly(map) || !map->ops->map_direct_value_addr ||
+		    map->map_type == BPF_MAP_TYPE_INSN_ARRAY ||
+		    off < 0 || off + size > map->value_size ||
+		    bpf_map_direct_read(map, off, size, &val, is_ldsx)) {
+			*dst = unknown;
+			break;
+		}
+		dst->state = CONST_ARG_CONST;
+		dst->val = val;
+		break;
+	case BPF_JMP:
+		if (opcode != BPF_CALL)
+			break;
+process_call:
+		for (r = BPF_REG_0; r <= BPF_REG_5; r++)
+			ci_out[r] = unknown;
+		break;
+	case BPF_STX:
+		if (mode != BPF_ATOMIC)
+			break;
+		if (insn->imm == BPF_CMPXCHG)
+			ci_out[BPF_REG_0] = unknown;
+		else if (insn->imm == BPF_LOAD_ACQ)
+			*dst = unknown;
+		else if (insn->imm & BPF_FETCH)
+			*src = unknown;
+		break;
+	}
+}
+
+/* Join function: merge output state into a successor's input state. */
+static bool const_reg_join(struct const_arg_info *ci_target,
+			   struct const_arg_info *ci_out)
+{
+	bool changed = false;
+	int r;
+
+	for (r = 0; r < MAX_BPF_REG; r++) {
+		struct const_arg_info *old = &ci_target[r];
+		struct const_arg_info *new = &ci_out[r];
+
+		if (ci_is_unvisited(old) && !ci_is_unvisited(new)) {
+			ci_target[r] = *new;
+			changed = true;
+		} else if (!ci_is_unknown(old) && !ci_is_unvisited(old) &&
+			   (new->state != old->state || new->val != old->val ||
+			    new->map_index != old->map_index)) {
+			old->state = CONST_ARG_UNKNOWN;
+			changed = true;
+		}
+	}
+	return changed;
+}
+
+int bpf_compute_const_regs(struct bpf_verifier_env *env)
+{
+	struct const_arg_info unknown = { .state = CONST_ARG_UNKNOWN, .val = 0 };
+	struct bpf_insn_aux_data *insn_aux = env->insn_aux_data;
+	struct bpf_insn *insns = env->prog->insnsi;
+	int insn_cnt = env->prog->len;
+	struct const_arg_info (*ci_in)[MAX_BPF_REG];
+	struct const_arg_info ci_out[MAX_BPF_REG];
+	struct bpf_iarray *succ;
+	bool changed;
+	int i, r;
+
+	/* kvzalloc zeroes memory, so all entries start as CONST_ARG_UNVISITED (0) */
+	ci_in = kvzalloc_objs(*ci_in, insn_cnt, GFP_KERNEL_ACCOUNT);
+	if (!ci_in)
+		return -ENOMEM;
+
+	/* Subprogram entries (including main at subprog 0): all registers unknown */
+	for (i = 0; i < env->subprog_cnt; i++) {
+		int start = env->subprog_info[i].start;
+
+		for (r = 0; r < MAX_BPF_REG; r++)
+			ci_in[start][r] = unknown;
+	}
+
+redo:
+	changed = false;
+	for (i = env->cfg.cur_postorder - 1; i >= 0; i--) {
+		int idx = env->cfg.insn_postorder[i];
+		struct bpf_insn *insn = &insns[idx];
+		struct const_arg_info *ci = ci_in[idx];
+
+		memcpy(ci_out, ci, sizeof(ci_out));
+
+		const_reg_xfer(env, ci_out, insn, insns, idx);
+
+		succ = bpf_insn_successors(env, idx);
+		for (int s = 0; s < succ->cnt; s++)
+			changed |= const_reg_join(ci_in[succ->items[s]], ci_out);
+	}
+	if (changed)
+		goto redo;
+
+	/* Save computed constants into insn_aux[] if they fit into 32-bit */
+	for (i = 0; i < insn_cnt; i++) {
+		u16 mask = 0, map_mask = 0, subprog_mask = 0;
+		struct bpf_insn_aux_data *aux = &insn_aux[i];
+		struct const_arg_info *ci = ci_in[i];
+
+		for (r = BPF_REG_0; r < ARRAY_SIZE(aux->const_reg_vals); r++) {
+			struct const_arg_info *c = &ci[r];
+
+			switch (c->state) {
+			case CONST_ARG_CONST: {
+				u64 val = c->val;
+
+				if (val != (u32)val)
+					break;
+				mask |= BIT(r);
+				aux->const_reg_vals[r] = val;
+				break;
+			}
+			case CONST_ARG_MAP_PTR:
+				map_mask |= BIT(r);
+				aux->const_reg_vals[r] = c->map_index;
+				break;
+			case CONST_ARG_SUBPROG:
+				subprog_mask |= BIT(r);
+				aux->const_reg_vals[r] = c->val;
+				break;
+			default:
+				break;
+			}
+		}
+		aux->const_reg_mask = mask;
+		aux->const_reg_map_mask = map_mask;
+		aux->const_reg_subprog_mask = subprog_mask;
+	}
+
+	kvfree(ci_in);
+	return 0;
+}
+
+static int eval_const_branch(u8 opcode, u64 dst_val, u64 src_val)
+{
+	switch (BPF_OP(opcode)) {
+	case BPF_JEQ:	return dst_val == src_val;
+	case BPF_JNE:	return dst_val != src_val;
+	case BPF_JGT:	return dst_val > src_val;
+	case BPF_JGE:	return dst_val >= src_val;
+	case BPF_JLT:	return dst_val < src_val;
+	case BPF_JLE:	return dst_val <= src_val;
+	case BPF_JSGT:	return (s64)dst_val > (s64)src_val;
+	case BPF_JSGE:	return (s64)dst_val >= (s64)src_val;
+	case BPF_JSLT:	return (s64)dst_val < (s64)src_val;
+	case BPF_JSLE:	return (s64)dst_val <= (s64)src_val;
+	case BPF_JSET:	return (bool)(dst_val & src_val);
+	default:	return -1;
+	}
+}
+
+/*
+ * Rewrite conditional branches with constant outcomes into unconditional
+ * jumps using register values resolved by bpf_compute_const_regs() pass.
+ * This eliminates dead edges from the CFG so that compute_live_registers()
+ * doesn't propagate liveness through dead code.
+ */
+int bpf_prune_dead_branches(struct bpf_verifier_env *env)
+{
+	struct bpf_insn_aux_data *insn_aux = env->insn_aux_data;
+	struct bpf_insn *insns = env->prog->insnsi;
+	int insn_cnt = env->prog->len;
+	bool changed = false;
+	int i;
+
+	for (i = 0; i < insn_cnt; i++) {
+		struct bpf_insn_aux_data *aux = &insn_aux[i];
+		struct bpf_insn *insn = &insns[i];
+		u8 class = BPF_CLASS(insn->code);
+		u64 dst_val, src_val;
+		int taken;
+
+		if (!bpf_insn_is_cond_jump(insn->code))
+			continue;
+		if (bpf_is_may_goto_insn(insn))
+			continue;
+
+		if (!(aux->const_reg_mask & BIT(insn->dst_reg)))
+			continue;
+		dst_val = aux->const_reg_vals[insn->dst_reg];
+
+		if (BPF_SRC(insn->code) == BPF_K) {
+			src_val = insn->imm;
+		} else {
+			if (!(aux->const_reg_mask & BIT(insn->src_reg)))
+				continue;
+			src_val = aux->const_reg_vals[insn->src_reg];
+		}
+
+		if (class == BPF_JMP32) {
+			/*
+			 * The (s32) cast maps the 32-bit range into two u64 sub-ranges:
+			 * [0x00000000, 0x7FFFFFFF] -> [0x0000000000000000, 0x000000007FFFFFFF]
+			 * [0x80000000, 0xFFFFFFFF] -> [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
+			 * The ordering is preserved within each sub-range, and
+			 * the second sub-range is above the first as u64.
+			 */
+			dst_val = (s32)dst_val;
+			src_val = (s32)src_val;
+		}
+
+		taken = eval_const_branch(insn->code, dst_val, src_val);
+		if (taken < 0) {
+			bpf_log(&env->log, "Unknown conditional jump %x\n", insn->code);
+			return -EFAULT;
+		}
+		*insn = BPF_JMP_A(taken ? insn->off : 0);
+		changed = true;
+	}
+
+	if (!changed)
+		return 0;
+	/* recompute postorder, since CFG has changed */
+	kvfree(env->cfg.insn_postorder);
+	env->cfg.insn_postorder = NULL;
+	return bpf_compute_postorder(env);
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f457235c874c..8d9f7e4574ec 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -595,14 +595,14 @@ static bool is_async_cb_sleepable(struct bpf_verifier_env *env, struct bpf_insn
 	return false;
 }
 
-static bool is_may_goto_insn(struct bpf_insn *insn)
+bool bpf_is_may_goto_insn(struct bpf_insn *insn)
 {
 	return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
 }
 
 static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
 {
-	return is_may_goto_insn(&env->prog->insnsi[insn_idx]);
+	return bpf_is_may_goto_insn(&env->prog->insnsi[insn_idx]);
 }
 
 static bool is_storage_get_function(enum bpf_func_id func_id)
@@ -3110,7 +3110,7 @@ struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *en
 }
 
 /* Find subprogram that starts exactly at 'off' */
-static int find_subprog(struct bpf_verifier_env *env, int off)
+int bpf_find_subprog(struct bpf_verifier_env *env, int off)
 {
 	struct bpf_subprog_info *p;
 
@@ -3129,7 +3129,7 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
 		verbose(env, "call to invalid destination\n");
 		return -EINVAL;
 	}
-	ret = find_subprog(env, off);
+	ret = bpf_find_subprog(env, off);
 	if (ret >= 0)
 		return ret;
 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
@@ -3818,7 +3818,7 @@ static int sort_subprogs_topo(struct bpf_verifier_env *env)
 
 				if (!bpf_pseudo_call(&insn[idx]) && !bpf_pseudo_func(&insn[idx]))
 					continue;
-				callee = find_subprog(env, idx + insn[idx].imm + 1);
+				callee = bpf_find_subprog(env, idx + insn[idx].imm + 1);
 				if (callee < 0) {
 					ret = -EFAULT;
 					goto out;
@@ -4624,7 +4624,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
 			int subprog_insn_idx, subprog;
 
 			subprog_insn_idx = idx + insn->imm + 1;
-			subprog = find_subprog(env, subprog_insn_idx);
+			subprog = bpf_find_subprog(env, subprog_insn_idx);
 			if (subprog < 0)
 				return -EFAULT;
 
@@ -6956,7 +6956,7 @@ continue_func:
 
 		/* find the callee */
 		next_insn = i + insn[i].imm + 1;
-		sidx = find_subprog(env, next_insn);
+		sidx = bpf_find_subprog(env, next_insn);
 		if (verifier_bug_if(sidx < 0, env, "callee not found at insn %d", next_insn))
 			return -EFAULT;
 		if (subprog[sidx].is_async_cb) {
@@ -7091,7 +7091,7 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
 {
 	int start = idx + insn->imm + 1, subprog;
 
-	subprog = find_subprog(env, start);
+	subprog = bpf_find_subprog(env, start);
 	if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start))
 		return -EFAULT;
 	return env->subprog_info[subprog].stack_depth;
@@ -7338,7 +7338,7 @@ out:
 	set_sext32_default_val(reg, size);
 }
 
-static bool bpf_map_is_rdonly(const struct bpf_map *map)
+bool bpf_map_is_rdonly(const struct bpf_map *map)
 {
 	/* A map is considered read-only if the following condition are true:
 	 *
@@ -7358,8 +7358,8 @@ static bool bpf_map_is_rdonly(const struct bpf_map *map)
 	       !bpf_map_write_active(map);
 }
 
-static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
-			       bool is_ldsx)
+int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
+			bool is_ldsx)
 {
 	void *ptr;
 	u64 addr;
@@ -11049,7 +11049,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	int err, subprog, target_insn;
 
 	target_insn = *insn_idx + insn->imm + 1;
-	subprog = find_subprog(env, target_insn);
+	subprog = bpf_find_subprog(env, target_insn);
 	if (verifier_bug_if(subprog < 0, env, "target of func call at insn %d is not a program",
 			    target_insn))
 		return -EFAULT;
@@ -17980,8 +17980,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 
 	if (insn->src_reg == BPF_PSEUDO_FUNC) {
 		struct bpf_prog_aux *aux = env->prog->aux;
-		u32 subprogno = find_subprog(env,
-					     env->insn_idx + insn->imm + 1);
+		u32 subprogno = bpf_find_subprog(env,
+						 env->insn_idx + insn->imm + 1);
 
 		if (!aux->func_info) {
 			verbose(env, "missing btf func_info\n");
@@ -19177,7 +19177,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
 	default:
 		/* conditional jump with two edges */
 		mark_prune_point(env, t);
-		if (is_may_goto_insn(insn))
+		if (bpf_is_may_goto_insn(insn))
 			mark_force_checkpoint(env, t);
 
 		ret = push_insn(t, t + 1, FALLTHROUGH, env);
@@ -19284,7 +19284,7 @@ err_free:
  * [env->subprog_info[i].postorder_start, env->subprog_info[i+1].postorder_start)
  * with indices of 'i' instructions in postorder.
  */
-static int compute_postorder(struct bpf_verifier_env *env)
+int bpf_compute_postorder(struct bpf_verifier_env *env)
 {
 	u32 cur_postorder, i, top, stack_sz, s;
 	int *stack = NULL, *postorder = NULL, *state = NULL;
@@ -21593,6 +21593,27 @@ static int do_check(struct bpf_verifier_env *env)
 		sanitize_mark_insn_seen(env);
 		prev_insn_idx = env->insn_idx;
 
+		/* Sanity check: precomputed constants must match verifier state */
+		if (!state->speculative && insn_aux->const_reg_mask) {
+			struct bpf_reg_state *regs = cur_regs(env);
+			u16 mask = insn_aux->const_reg_mask;
+
+			for (int r = 0; r < ARRAY_SIZE(insn_aux->const_reg_vals); r++) {
+				u32 cval = insn_aux->const_reg_vals[r];
+
+				if (!(mask & BIT(r)))
+					continue;
+				if (regs[r].type != SCALAR_VALUE)
+					continue;
+				if (!tnum_is_const(regs[r].var_off))
+					continue;
+				if (verifier_bug_if((u32)regs[r].var_off.value != cval,
+						    env, "const R%d: %u != %llu",
+						    r, cval, regs[r].var_off.value))
+					return -EFAULT;
+			}
+		}
+
 		/* Reduce verification complexity by stopping speculative path
 		 * verification when a nospec is encountered.
 		 */
@@ -22582,7 +22603,7 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
 	}
 }
 
-static bool insn_is_cond_jump(u8 code)
+bool bpf_insn_is_cond_jump(u8 code)
 {
 	u8 op;
 
@@ -22605,7 +22626,7 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
 	int i;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
-		if (!insn_is_cond_jump(insn->code))
+		if (!bpf_insn_is_cond_jump(insn->code))
 			continue;
 
 		if (!aux_data[i + 1].seen)
@@ -23101,7 +23122,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		 * need a hard reject of the program. Thus -EFAULT is
 		 * propagated in any case.
 		 */
-		subprog = find_subprog(env, i + insn->imm + 1);
+		subprog = bpf_find_subprog(env, i + insn->imm + 1);
 		if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d",
 				    i + insn->imm + 1))
 			return -EFAULT;
@@ -23316,7 +23337,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		if (!bpf_pseudo_call(insn))
 			continue;
 		insn->off = env->insn_aux_data[i].call_imm;
-		subprog = find_subprog(env, i + insn->off + 1);
+		subprog = bpf_find_subprog(env, i + insn->off + 1);
 		insn->imm = subprog;
 	}
 
@@ -23927,7 +23948,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 			goto next_insn;
 		}
 
-		if (is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) {
+		if (bpf_is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) {
 			int stack_off_cnt = -stack_depth - 16;
 
 			/*
@@ -23970,7 +23991,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 			env->prog = prog = new_prog;
 			insn = new_prog->insnsi + i + delta;
 			goto next_insn;
-		} else if (is_may_goto_insn(insn)) {
+		} else if (bpf_is_may_goto_insn(insn)) {
 			int stack_off = -stack_depth - 8;
 
 			stack_depth_extra = 8;
@@ -26396,7 +26417,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (ret < 0)
 		goto skip_full_check;
 
-	ret = compute_postorder(env);
+	ret = bpf_compute_postorder(env);
 	if (ret < 0)
 		goto skip_full_check;
 
@@ -26408,6 +26429,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (ret)
 		goto skip_full_check;
 
+	ret = bpf_compute_const_regs(env);
+	if (ret < 0)
+		goto skip_full_check;
+
+	ret = bpf_prune_dead_branches(env);
+	if (ret < 0)
+		goto skip_full_check;
+
 	ret = sort_subprogs_topo(env);
 	if (ret < 0)
 		goto skip_full_check;
diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
index 58c7704d61cd..a5b8753ce52c 100644
--- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
+++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
@@ -592,10 +592,10 @@ __naked void check_ids_in_regsafe_2(void)
  */
 SEC("socket")
 __success __log_level(2)
-__msg("11: (1d) if r3 == r4 goto pc+0")
+__msg("14: (1d) if r3 == r4 goto pc+0")
 __msg("frame 0: propagating r3,r4")
-__msg("11: safe")
-__msg("processed 15 insns")
+__msg("14: safe")
+__msg("processed 18 insns")
 __flag(BPF_F_TEST_STATE_FREQ)
 __naked void no_scalar_id_for_const(void)
 {
@@ -605,6 +605,7 @@ __naked void no_scalar_id_for_const(void)
 	"if r0 > 7 goto l0_%=;"
 	/* possibly generate same scalar ids for r3 and r4 */
 	"r1 = 0;"
+	"r1 ^= r1;" /* prevent bpf_prune_dead_branches from folding the branch */
 	"r1 = r1;"
 	"r3 = r1;"
 	"r4 = r1;"
@@ -612,7 +613,9 @@ __naked void no_scalar_id_for_const(void)
 "l0_%=:"
 	/* possibly generate different scalar ids for r3 and r4 */
 	"r1 = 0;"
+	"r1 ^= r1;"
 	"r2 = 0;"
+	"r2 ^= r2;"
 	"r3 = r1;"
 	"r4 = r2;"
 "l1_%=:"
@@ -628,10 +631,10 @@ __naked void no_scalar_id_for_const(void)
 /* Same as no_scalar_id_for_const() but for 32-bit values */
 SEC("socket")
 __success __log_level(2)
-__msg("11: (1e) if w3 == w4 goto pc+0")
+__msg("14: (1e) if w3 == w4 goto pc+0")
 __msg("frame 0: propagating r3,r4")
-__msg("11: safe")
-__msg("processed 15 insns")
+__msg("14: safe")
+__msg("processed 18 insns")
 __flag(BPF_F_TEST_STATE_FREQ)
 __naked void no_scalar_id_for_const32(void)
 {
@@ -641,6 +644,7 @@ __naked void no_scalar_id_for_const32(void)
 	"if r0 > 7 goto l0_%=;"
 	/* possibly generate same scalar ids for r3 and r4 */
 	"w1 = 0;"
+	"w1 ^= w1;" /* prevent bpf_prune_dead_branches from folding the branch */
 	"w1 = w1;"
 	"w3 = w1;"
 	"w4 = w1;"
@@ -648,11 +652,13 @@ __naked void no_scalar_id_for_const32(void)
 "l0_%=:"
 	/* possibly generate different scalar ids for r3 and r4 */
 	"w1 = 0;"
+	"w1 ^= w1;"
 	"w2 = 0;"
+	"w2 ^= w2;"
 	"w3 = w1;"
 	"w4 = w2;"
 "l1_%=:"
-	/* predictable jump, marks r1 and r2 precise */
+	/* predictable jump, marks r3 and r4 precise */
 	"if w3 == w4 goto +0;"
 	"r0 = 0;"
 	"exit;"
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
index 8ee1243e62a8..c16f8382cf17 100644
--- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
@@ -584,7 +584,7 @@ __naked void alu32_mov_u32_const(void)
 {
 	asm volatile ("					\
 	w7 = 0;						\
-	w7 &= 1;					\
+	w7 ^= w7;					\
 	w0 = w7;					\
 	if r0 == 0 goto l0_%=;				\
 	r0 = *(u64*)(r7 + 0);				\
@@ -894,7 +894,9 @@ __naked void unpriv_spectre_v1_and_v4_simple(void)
 {
 	asm volatile ("					\
 	r8 = 0;						\
+	r8 ^= r8;					\
 	r9 = 0;						\
+	r9 ^= r9;					\
 	r0 = r10;					\
 	r1 = 0;						\
 	r2 = r10;					\
@@ -932,7 +934,9 @@ __naked void unpriv_ldimm64_spectre_v1_and_v4_simple(void)
 {
 	asm volatile ("					\
 	r8 = 0;						\
+	r8 ^= r8;					\
 	r9 = 0;						\
+	r9 ^= r9;					\
 	r0 = r10;					\
 	r1 = 0;						\
 	r2 = r10;					\
-- 
cgit v1.2.3


From 19dbb1347481105e8aabc7479af35c09a65333a9 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 2 Apr 2026 19:44:20 -0700
Subject: bpf: Move verifier helpers to header

Move several helpers to header as preparation for
the subsequent stack liveness patches.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260403024422.87231-6-alexei.starovoitov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 28 ++++++++++++++++++++++++++++
 kernel/bpf/verifier.c        | 44 ++++++++++----------------------------------
 2 files changed, 38 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c5e65cdb6328..7bd32a8a45f6 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -879,6 +879,30 @@ static inline struct bpf_subprog_info *subprog_info(struct bpf_verifier_env *env
 	return &env->subprog_info[subprog];
 }
 
+struct bpf_call_summary {
+	u8 num_params;
+	bool is_void;
+	bool fastcall;
+};
+
+static inline bool bpf_helper_call(const struct bpf_insn *insn)
+{
+	return insn->code == (BPF_JMP | BPF_CALL) &&
+	       insn->src_reg == 0;
+}
+
+static inline bool bpf_pseudo_call(const struct bpf_insn *insn)
+{
+	return insn->code == (BPF_JMP | BPF_CALL) &&
+	       insn->src_reg == BPF_PSEUDO_CALL;
+}
+
+static inline bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
+{
+	return insn->code == (BPF_JMP | BPF_CALL) &&
+	       insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
+}
+
 __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
 				      const char *fmt, va_list args);
 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
@@ -1111,6 +1135,10 @@ int bpf_compute_postorder(struct bpf_verifier_env *env);
 bool bpf_insn_is_cond_jump(u8 code);
 bool bpf_is_may_goto_insn(struct bpf_insn *insn);
 
+void bpf_verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn);
+bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
+			  struct bpf_call_summary *cs);
+
 int bpf_stack_liveness_init(struct bpf_verifier_env *env);
 void bpf_stack_liveness_free(struct bpf_verifier_env *env);
 int bpf_update_live_stack(struct bpf_verifier_env *env);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8d9f7e4574ec..7d4d0f7e2ca1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -256,24 +256,6 @@ static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
 			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
 }
 
-static bool bpf_helper_call(const struct bpf_insn *insn)
-{
-	return insn->code == (BPF_JMP | BPF_CALL) &&
-	       insn->src_reg == 0;
-}
-
-static bool bpf_pseudo_call(const struct bpf_insn *insn)
-{
-	return insn->code == (BPF_JMP | BPF_CALL) &&
-	       insn->src_reg == BPF_PSEUDO_CALL;
-}
-
-static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
-{
-	return insn->code == (BPF_JMP | BPF_CALL) &&
-	       insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
-}
-
 struct bpf_map_desc {
 	struct bpf_map *ptr;
 	int uid;
@@ -4297,7 +4279,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
 	return btf_name_by_offset(desc_btf, func->name_off);
 }
 
-static void verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
+void bpf_verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
 	const struct bpf_insn_cbs cbs = {
 		.cb_call	= disasm_kfunc_name,
@@ -4521,7 +4503,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
 		bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
 		verbose(env, "stack=%s before ", env->tmp_str_buf);
 		verbose(env, "%d: ", idx);
-		verbose_insn(env, insn);
+		bpf_verbose_insn(env, insn);
 	}
 
 	/* If there is a history record that some registers gained range at this insn,
@@ -18582,17 +18564,11 @@ static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
 	}
 }
 
-struct call_summary {
-	u8 num_params;
-	bool is_void;
-	bool fastcall;
-};
-
 /* If @call is a kfunc or helper call, fills @cs and returns true,
  * otherwise returns false.
  */
-static bool get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
-			     struct call_summary *cs)
+bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
+			  struct bpf_call_summary *cs)
 {
 	struct bpf_kfunc_call_arg_meta meta;
 	const struct bpf_func_proto *fn;
@@ -18713,12 +18689,12 @@ static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
 	struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
 	struct bpf_insn *call = &env->prog->insnsi[insn_idx];
 	u32 clobbered_regs_mask;
-	struct call_summary cs;
+	struct bpf_call_summary cs;
 	u32 expected_regs_mask;
 	s16 off;
 	int i;
 
-	if (!get_call_summary(env, call, &cs))
+	if (!bpf_get_call_summary(env, call, &cs))
 		return;
 
 	/* A bitmask specifying which caller saved registers are clobbered
@@ -21578,7 +21554,7 @@ static int do_check(struct bpf_verifier_env *env)
 			verbose_linfo(env, env->insn_idx, "; ");
 			env->prev_log_pos = env->log.end_pos;
 			verbose(env, "%d: ", env->insn_idx);
-			verbose_insn(env, insn);
+			bpf_verbose_insn(env, insn);
 			env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
 			env->prev_log_pos = env->log.end_pos;
 		}
@@ -25885,7 +25861,7 @@ static void compute_insn_live_regs(struct bpf_verifier_env *env,
 				   struct bpf_insn *insn,
 				   struct insn_live_regs *info)
 {
-	struct call_summary cs;
+	struct bpf_call_summary cs;
 	u8 class = BPF_CLASS(insn->code);
 	u8 code = BPF_OP(insn->code);
 	u8 mode = BPF_MODE(insn->code);
@@ -26000,7 +25976,7 @@ static void compute_insn_live_regs(struct bpf_verifier_env *env,
 		case BPF_CALL:
 			def = ALL_CALLER_SAVED_REGS;
 			use = def & ~BIT(BPF_REG_0);
-			if (get_call_summary(env, insn, &cs))
+			if (bpf_get_call_summary(env, insn, &cs))
 				use = GENMASK(cs.num_params, 1);
 			break;
 		default:
@@ -26100,7 +26076,7 @@ static int compute_live_registers(struct bpf_verifier_env *env)
 				else
 					verbose(env, ".");
 			verbose(env, " ");
-			verbose_insn(env, &insns[i]);
+			bpf_verbose_insn(env, &insns[i]);
 			if (bpf_is_ldimm64(&insns[i]))
 				i++;
 		}
-- 
cgit v1.2.3


From 1a1cadbd5d50b31ae1340c2a9938947719696ca0 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 2 Apr 2026 19:44:21 -0700
Subject: bpf: Add helper and kfunc stack access size resolution

The static stack liveness analysis needs to know how many bytes a
helper or kfunc accesses through a stack pointer argument, so it can
precisely mark the affected stack slots as stack 'def' or 'use'.

Add bpf_helper_stack_access_bytes() and bpf_kfunc_stack_access_bytes()
which resolve the access size for a given call argument.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260403024422.87231-7-alexei.starovoitov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |   6 ++
 kernel/bpf/verifier.c        | 188 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 194 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7bd32a8a45f6..36bfd96d4563 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -1138,6 +1138,12 @@ bool bpf_is_may_goto_insn(struct bpf_insn *insn);
 void bpf_verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn);
 bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
 			  struct bpf_call_summary *cs);
+s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env,
+				  struct bpf_insn *insn, int arg,
+				  int insn_idx);
+s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env,
+				 struct bpf_insn *insn, int arg,
+				 int insn_idx);
 
 int bpf_stack_liveness_init(struct bpf_verifier_env *env);
 void bpf_stack_liveness_free(struct bpf_verifier_env *env);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7d4d0f7e2ca1..84699a428077 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -14132,6 +14132,194 @@ static int fetch_kfunc_arg_meta(struct bpf_verifier_env *env,
 	return 0;
 }
 
+/*
+ * Determine how many bytes a helper accesses through a stack pointer at
+ * argument position @arg (0-based, corresponding to R1-R5).
+ *
+ * Returns:
+ *   > 0   known read access size in bytes
+ *     0   doesn't read anything directly
+ * S64_MIN unknown
+ *   < 0   known write access of (-return) bytes
+ */
+s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
+				  int arg, int insn_idx)
+{
+	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+	const struct bpf_func_proto *fn;
+	enum bpf_arg_type at;
+	s64 size;
+
+	if (get_helper_proto(env, insn->imm, &fn) < 0)
+		return S64_MIN;
+
+	at = fn->arg_type[arg];
+
+	switch (base_type(at)) {
+	case ARG_PTR_TO_MAP_KEY:
+	case ARG_PTR_TO_MAP_VALUE: {
+		bool is_key = base_type(at) == ARG_PTR_TO_MAP_KEY;
+		u64 val;
+		int i, map_reg;
+
+		for (i = 0; i < arg; i++) {
+			if (base_type(fn->arg_type[i]) == ARG_CONST_MAP_PTR)
+				break;
+		}
+		if (i >= arg)
+			goto scan_all_maps;
+
+		map_reg = BPF_REG_1 + i;
+
+		if (!(aux->const_reg_map_mask & BIT(map_reg)))
+			goto scan_all_maps;
+
+		i = aux->const_reg_vals[map_reg];
+		if (i < env->used_map_cnt) {
+			size = is_key ? env->used_maps[i]->key_size
+				      : env->used_maps[i]->value_size;
+			goto out;
+		}
+scan_all_maps:
+		/*
+		 * Map pointer is not known at this call site (e.g. different
+		 * maps on merged paths).  Conservatively return the largest
+		 * key_size or value_size across all maps used by the program.
+		 */
+		val = 0;
+		for (i = 0; i < env->used_map_cnt; i++) {
+			struct bpf_map *map = env->used_maps[i];
+			u32 sz = is_key ? map->key_size : map->value_size;
+
+			if (sz > val)
+				val = sz;
+			if (map->inner_map_meta) {
+				sz = is_key ? map->inner_map_meta->key_size
+					    : map->inner_map_meta->value_size;
+				if (sz > val)
+					val = sz;
+			}
+		}
+		if (!val)
+			return S64_MIN;
+		size = val;
+		goto out;
+	}
+	case ARG_PTR_TO_MEM:
+		if (at & MEM_FIXED_SIZE) {
+			size = fn->arg_size[arg];
+			goto out;
+		}
+		if (arg + 1 < ARRAY_SIZE(fn->arg_type) &&
+		    arg_type_is_mem_size(fn->arg_type[arg + 1])) {
+			int size_reg = BPF_REG_1 + arg + 1;
+
+			if (aux->const_reg_mask & BIT(size_reg)) {
+				size = (s64)aux->const_reg_vals[size_reg];
+				goto out;
+			}
+			/*
+			 * Size arg is const on each path but differs across merged
+			 * paths. MAX_BPF_STACK is a safe upper bound for reads.
+			 */
+			if (at & MEM_UNINIT)
+				return 0;
+			return MAX_BPF_STACK;
+		}
+		return S64_MIN;
+	case ARG_PTR_TO_DYNPTR:
+		size = BPF_DYNPTR_SIZE;
+		break;
+	case ARG_PTR_TO_STACK:
+		/*
+		 * Only used by bpf_calls_callback() helpers. The helper itself
+		 * doesn't access stack. The callback subprog does and it's
+		 * analyzed separately.
+		 */
+		return 0;
+	default:
+		return S64_MIN;
+	}
+out:
+	/*
+	 * MEM_UNINIT args are write-only: the helper initializes the
+	 * buffer without reading it.
+	 */
+	if (at & MEM_UNINIT)
+		return -size;
+	return size;
+}
+
+/*
+ * Determine how many bytes a kfunc accesses through a stack pointer at
+ * argument position @arg (0-based, corresponding to R1-R5).
+ *
+ * Returns:
+ *   > 0      known read access size in bytes
+ *     0      doesn't access memory through that argument (ex: not a pointer)
+ *   S64_MIN  unknown
+ *   < 0      known write access of (-return) bytes
+ */
+s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
+				 int arg, int insn_idx)
+{
+	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+	struct bpf_kfunc_call_arg_meta meta;
+	const struct btf_param *args;
+	const struct btf_type *t, *ref_t;
+	const struct btf *btf;
+	u32 nargs, type_size;
+	s64 size;
+
+	if (fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta) < 0)
+		return S64_MIN;
+
+	btf = meta.btf;
+	args = btf_params(meta.func_proto);
+	nargs = btf_type_vlen(meta.func_proto);
+	if (arg >= nargs)
+		return 0;
+
+	t = btf_type_skip_modifiers(btf, args[arg].type, NULL);
+	if (!btf_type_is_ptr(t))
+		return 0;
+
+	/* dynptr: fixed 16-byte on-stack representation */
+	if (is_kfunc_arg_dynptr(btf, &args[arg])) {
+		size = BPF_DYNPTR_SIZE;
+		goto out;
+	}
+
+	/* ptr + __sz/__szk pair: size is in the next register */
+	if (arg + 1 < nargs &&
+	    (btf_param_match_suffix(btf, &args[arg + 1], "__sz") ||
+	     btf_param_match_suffix(btf, &args[arg + 1], "__szk"))) {
+		int size_reg = BPF_REG_1 + arg + 1;
+
+		if (aux->const_reg_mask & BIT(size_reg)) {
+			size = (s64)aux->const_reg_vals[size_reg];
+			goto out;
+		}
+		return MAX_BPF_STACK;
+	}
+
+	/* fixed-size pointed-to type: resolve via BTF */
+	ref_t = btf_type_skip_modifiers(btf, t->type, NULL);
+	if (!IS_ERR(btf_resolve_size(btf, ref_t, &type_size))) {
+		size = type_size;
+		goto out;
+	}
+
+	return S64_MIN;
+out:
+	/* KF_ITER_NEW kfuncs initialize the iterator state at arg 0 */
+	if (arg == 0 && meta.kfunc_flags & KF_ITER_NEW)
+		return -size;
+	if (is_kfunc_arg_uninit(btf, &args[arg]))
+		return -size;
+	return size;
+}
+
 /* check special kfuncs and return:
  *  1  - not fall-through to 'else' branch, continue verification
  *  0  - fall-through to 'else' branch
-- 
cgit v1.2.3


From 624bf3440d7214b62c22d698a0a294323f331d5d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 10 Mar 2026 16:48:12 -0700
Subject: KVM: SEV: Disallow LAUNCH_FINISH if vCPUs are actively being created

Reject LAUNCH_FINISH for SEV-ES and SNP VMs if KVM is actively creating
one or more vCPUs, as KVM needs to process and encrypt each vCPU's VMSA.
Letting userspace create vCPUs while LAUNCH_FINISH is in-progress is
"fine", at least in the current code base, as kvm_for_each_vcpu() operates
on online_vcpus, LAUNCH_FINISH (all SEV+ sub-ioctls) holds kvm->mutex, and
fully onlining a vCPU in kvm_vm_ioctl_create_vcpu() is done under
kvm->mutex.  I.e. there's no difference between an in-progress vCPU and a
vCPU that is created entirely after LAUNCH_FINISH.

However, given that concurrent LAUNCH_FINISH and vCPU creation can't
possibly work (for any reasonable definition of "work"), since userspace
can't guarantee whether a particular vCPU will be encrypted or not,
disallow the combination as a hardening measure, to reduce the probability
of introducing bugs in the future, and to avoid having to reason about the
safety of future changes related to LAUNCH_FINISH.

Cc: Jethro Beekman <jethro@fortanix.com>
Closes: https://lore.kernel.org/all/b31f7c6e-2807-4662-bcdd-eea2c1e132fa@fortanix.com
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260310234829.2608037-5-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/svm/sev.c   | 10 ++++++++--
 include/linux/kvm_host.h |  7 +++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 9265ebd9aa18..10b12db7f902 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1032,6 +1032,9 @@ static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	if (!sev_es_guest(kvm))
 		return -ENOTTY;
 
+	if (kvm_is_vcpu_creation_in_progress(kvm))
+		return -EBUSY;
+
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		ret = mutex_lock_killable(&vcpu->mutex);
 		if (ret)
@@ -2052,8 +2055,8 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
 	struct kvm_vcpu *src_vcpu;
 	unsigned long i;
 
-	if (src->created_vcpus != atomic_read(&src->online_vcpus) ||
-	    dst->created_vcpus != atomic_read(&dst->online_vcpus))
+	if (kvm_is_vcpu_creation_in_progress(src) ||
+	    kvm_is_vcpu_creation_in_progress(dst))
 		return -EBUSY;
 
 	if (!sev_es_guest(src))
@@ -2452,6 +2455,9 @@ static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	unsigned long i;
 	int ret;
 
+	if (kvm_is_vcpu_creation_in_progress(kvm))
+		return -EBUSY;
+
 	data.gctx_paddr = __psp_pa(sev->snp_context);
 	data.page_type = SNP_PAGE_TYPE_VMSA;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 34759a262b28..3c7f8557f7af 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1029,6 +1029,13 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
 	return NULL;
 }
 
+static inline bool kvm_is_vcpu_creation_in_progress(struct kvm *kvm)
+{
+	lockdep_assert_held(&kvm->lock);
+
+	return kvm->created_vcpus != atomic_read(&kvm->online_vcpus);
+}
+
 void kvm_destroy_vcpus(struct kvm *kvm);
 
 int kvm_trylock_all_vcpus(struct kvm *kvm);
-- 
cgit v1.2.3


From 9617b5b62c7cf4284740ba5efdbf083aa5a87e5f Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Date: Thu, 2 Apr 2026 16:15:02 +0200
Subject: kernel: ksysfs: initialize kernel_kobj earlier

Software nodes depend on kernel_kobj which is initialized pretty late
into the boot process - as a core_initcall(). Ahead of moving the
software node initialization to driver_init() we must first make
kernel_kobj available before it.

Make ksysfs_init() visible in a new header - ksysfs.h - and call it in
do_basic_setup() right before driver_init().

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Link: https://patch.msgid.link/20260402-nokia770-gpio-swnodes-v5-1-d730db3dd299@oss.qualcomm.com
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 MAINTAINERS            | 1 +
 include/linux/ksysfs.h | 8 ++++++++
 init/main.c            | 2 ++
 kernel/ksysfs.c        | 9 ++++-----
 4 files changed, 15 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/ksysfs.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 2695f321bfd7..fe7516481a44 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7805,6 +7805,7 @@ F:	include/linux/debugfs.h
 F:	include/linux/device.h
 F:	include/linux/fwnode.h
 F:	include/linux/kobj*
+F:	include/linux/ksysfs.h
 F:	include/linux/property.h
 F:	include/linux/sysfs.h
 F:	kernel/ksysfs.c
diff --git a/include/linux/ksysfs.h b/include/linux/ksysfs.h
new file mode 100644
index 000000000000..c7dc6e18f28e
--- /dev/null
+++ b/include/linux/ksysfs.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _KSYSFS_H_
+#define _KSYSFS_H_
+
+void ksysfs_init(void);
+
+#endif /* _KSYSFS_H_ */
diff --git a/init/main.c b/init/main.c
index 1cb395dd94e4..2c9cce0bfe86 100644
--- a/init/main.c
+++ b/init/main.c
@@ -36,6 +36,7 @@
 #include <linux/kmod.h>
 #include <linux/kprobes.h>
 #include <linux/kmsan.h>
+#include <linux/ksysfs.h>
 #include <linux/vmalloc.h>
 #include <linux/kernel_stat.h>
 #include <linux/start_kernel.h>
@@ -1473,6 +1474,7 @@ static void __init do_initcalls(void)
 static void __init do_basic_setup(void)
 {
 	cpuset_init_smp();
+	ksysfs_init();
 	driver_init();
 	init_irq_proc();
 	do_ctors();
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index a9e6354d9e25..f45ade718054 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -8,6 +8,7 @@
 
 #include <asm/byteorder.h>
 #include <linux/kobject.h>
+#include <linux/ksysfs.h>
 #include <linux/string.h>
 #include <linux/sysfs.h>
 #include <linux/export.h>
@@ -213,7 +214,7 @@ static const struct attribute_group kernel_attr_group = {
 	.attrs = kernel_attrs,
 };
 
-static int __init ksysfs_init(void)
+void __init ksysfs_init(void)
 {
 	int error;
 
@@ -234,14 +235,12 @@ static int __init ksysfs_init(void)
 			goto group_exit;
 	}
 
-	return 0;
+	return;
 
 group_exit:
 	sysfs_remove_group(kernel_kobj, &kernel_attr_group);
 kset_exit:
 	kobject_put(kernel_kobj);
 exit:
-	return error;
+	pr_err("failed to initialize the kernel kobject: %d\n", error);
 }
-
-core_initcall(ksysfs_init);
-- 
cgit v1.2.3


From 6af36aeb147a06dea47c49859cd6ca5659aeb987 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Fri, 19 Dec 2025 13:18:22 -0500
Subject: lsm: add backing_file LSM hooks

Stacked filesystems such as overlayfs do not currently provide the
necessary mechanisms for LSMs to properly enforce access controls on the
mmap() and mprotect() operations.  In order to resolve this gap, a LSM
security blob is being added to the backing_file struct and the following
new LSM hooks are being created:

 security_backing_file_alloc()
 security_backing_file_free()
 security_mmap_backing_file()

The first two hooks are to manage the lifecycle of the LSM security blob
in the backing_file struct, while the third provides a new mmap() access
control point for the underlying backing file.  It is also expected that
LSMs will likely want to update their security_file_mprotect() callback
to address issues with their mprotect() controls, but that does not
require a change to the security_file_mprotect() LSM hook.

There are a three other small changes to support these new LSM hooks:
* Pass the user file associated with a backing file down to
alloc_empty_backing_file() so it can be included in the
security_backing_file_alloc() hook.
* Add getter and setter functions for the backing_file struct LSM blob
as the backing_file struct remains private to fs/file_table.c.
* Constify the file struct field in the LSM common_audit_data struct to
better support LSMs that need to pass a const file struct pointer into
the common LSM audit code.

Thanks to Arnd Bergmann for identifying the missing EXPORT_SYMBOL_GPL()
and supplying a fixup.

Cc: stable@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-unionfs@vger.kernel.org
Cc: linux-erofs@lists.ozlabs.org
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 fs/backing-file.c             |  18 +++++---
 fs/erofs/ishare.c             |  10 ++++-
 fs/file_table.c               |  27 +++++++++--
 fs/fuse/passthrough.c         |   2 +-
 fs/internal.h                 |   3 +-
 fs/overlayfs/dir.c            |   2 +-
 fs/overlayfs/file.c           |   2 +-
 include/linux/backing-file.h  |   4 +-
 include/linux/fs.h            |  13 ++++++
 include/linux/lsm_audit.h     |   2 +-
 include/linux/lsm_hook_defs.h |   5 +++
 include/linux/lsm_hooks.h     |   1 +
 include/linux/security.h      |  22 +++++++++
 security/lsm.h                |   1 +
 security/lsm_init.c           |   9 ++++
 security/security.c           | 102 ++++++++++++++++++++++++++++++++++++++++++
 16 files changed, 206 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/backing-file.c b/fs/backing-file.c
index 45da8600d564..1f3bbfc75882 100644
--- a/fs/backing-file.c
+++ b/fs/backing-file.c
@@ -12,6 +12,7 @@
 #include <linux/backing-file.h>
 #include <linux/splice.h>
 #include <linux/mm.h>
+#include <linux/security.h>
 
 #include "internal.h"
 
@@ -29,14 +30,15 @@
  * returned file into a container structure that also stores the stacked
  * file's path, which can be retrieved using backing_file_user_path().
  */
-struct file *backing_file_open(const struct path *user_path, int flags,
+struct file *backing_file_open(const struct file *user_file, int flags,
 			       const struct path *real_path,
 			       const struct cred *cred)
 {
+	const struct path *user_path = &user_file->f_path;
 	struct file *f;
 	int error;
 
-	f = alloc_empty_backing_file(flags, cred);
+	f = alloc_empty_backing_file(flags, cred, user_file);
 	if (IS_ERR(f))
 		return f;
 
@@ -52,15 +54,16 @@ struct file *backing_file_open(const struct path *user_path, int flags,
 }
 EXPORT_SYMBOL_GPL(backing_file_open);
 
-struct file *backing_tmpfile_open(const struct path *user_path, int flags,
+struct file *backing_tmpfile_open(const struct file *user_file, int flags,
 				  const struct path *real_parentpath,
 				  umode_t mode, const struct cred *cred)
 {
 	struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt);
+	const struct path *user_path = &user_file->f_path;
 	struct file *f;
 	int error;
 
-	f = alloc_empty_backing_file(flags, cred);
+	f = alloc_empty_backing_file(flags, cred, user_file);
 	if (IS_ERR(f))
 		return f;
 
@@ -336,8 +339,13 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
 
 	vma_set_file(vma, file);
 
-	scoped_with_creds(ctx->cred)
+	scoped_with_creds(ctx->cred) {
+		ret = security_mmap_backing_file(vma, file, user_file);
+		if (ret)
+			return ret;
+
 		ret = vfs_mmap(vma->vm_file, vma);
+	}
 
 	if (ctx->accessed)
 		ctx->accessed(user_file);
diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c
index ce980320a8b9..a1efc46d5987 100644
--- a/fs/erofs/ishare.c
+++ b/fs/erofs/ishare.c
@@ -4,6 +4,7 @@
  */
 #include <linux/xxhash.h>
 #include <linux/mount.h>
+#include <linux/security.h>
 #include "internal.h"
 #include "xattr.h"
 
@@ -102,7 +103,8 @@ static int erofs_ishare_file_open(struct inode *inode, struct file *file)
 
 	if (file->f_flags & O_DIRECT)
 		return -EINVAL;
-	realfile = alloc_empty_backing_file(O_RDONLY|O_NOATIME, current_cred());
+	realfile = alloc_empty_backing_file(O_RDONLY|O_NOATIME, current_cred(),
+					    file);
 	if (IS_ERR(realfile))
 		return PTR_ERR(realfile);
 	ihold(sharedinode);
@@ -146,8 +148,14 @@ static ssize_t erofs_ishare_file_read_iter(struct kiocb *iocb,
 static int erofs_ishare_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct file *realfile = file->private_data;
+	int err;
 
 	vma_set_file(vma, realfile);
+
+	err = security_mmap_backing_file(vma, realfile, file);
+	if (err)
+		return err;
+
 	return generic_file_readonly_mmap(file, vma);
 }
 
diff --git a/fs/file_table.c b/fs/file_table.c
index 3b3792903185..d19d879b6efc 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -50,6 +50,9 @@ struct backing_file {
 		struct path user_path;
 		freeptr_t bf_freeptr;
 	};
+#ifdef CONFIG_SECURITY
+	void *security;
+#endif
 };
 
 #define backing_file(f) container_of(f, struct backing_file, file)
@@ -66,8 +69,21 @@ void backing_file_set_user_path(struct file *f, const struct path *path)
 }
 EXPORT_SYMBOL_GPL(backing_file_set_user_path);
 
+#ifdef CONFIG_SECURITY
+void *backing_file_security(const struct file *f)
+{
+	return backing_file(f)->security;
+}
+
+void backing_file_set_security(struct file *f, void *security)
+{
+	backing_file(f)->security = security;
+}
+#endif /* CONFIG_SECURITY */
+
 static inline void backing_file_free(struct backing_file *ff)
 {
+	security_backing_file_free(&ff->file);
 	path_put(&ff->user_path);
 	kmem_cache_free(bfilp_cachep, ff);
 }
@@ -288,10 +304,12 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
 	return f;
 }
 
-static int init_backing_file(struct backing_file *ff)
+static int init_backing_file(struct backing_file *ff,
+			     const struct file *user_file)
 {
 	memset(&ff->user_path, 0, sizeof(ff->user_path));
-	return 0;
+	backing_file_set_security(&ff->file, NULL);
+	return security_backing_file_alloc(&ff->file, user_file);
 }
 
 /*
@@ -301,7 +319,8 @@ static int init_backing_file(struct backing_file *ff)
  * This is only for kernel internal use, and the allocate file must not be
  * installed into file tables or such.
  */
-struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
+				      const struct file *user_file)
 {
 	struct backing_file *ff;
 	int error;
@@ -318,7 +337,7 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
 
 	/* The f_mode flags must be set before fput(). */
 	ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
-	error = init_backing_file(ff);
+	error = init_backing_file(ff, user_file);
 	if (unlikely(error)) {
 		fput(&ff->file);
 		return ERR_PTR(error);
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
index 72de97c03d0e..f2d08ac2459b 100644
--- a/fs/fuse/passthrough.c
+++ b/fs/fuse/passthrough.c
@@ -167,7 +167,7 @@ struct fuse_backing *fuse_passthrough_open(struct file *file, int backing_id)
 		goto out;
 
 	/* Allocate backing file per fuse file to store fuse path */
-	backing_file = backing_file_open(&file->f_path, file->f_flags,
+	backing_file = backing_file_open(file, file->f_flags,
 					 &fb->file->f_path, fb->cred);
 	err = PTR_ERR(backing_file);
 	if (IS_ERR(backing_file)) {
diff --git a/fs/internal.h b/fs/internal.h
index cbc384a1aa09..77e90e4124e0 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,7 +106,8 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
  */
 struct file *alloc_empty_file(int flags, const struct cred *cred);
 struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
-struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
+				      const struct file *user_file);
 void backing_file_set_user_path(struct file *f, const struct path *path);
 
 static inline void file_put_write_access(struct file *file)
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index ff3dbd1ca61f..f2f20a611af3 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -1374,7 +1374,7 @@ static int ovl_create_tmpfile(struct file *file, struct dentry *dentry,
 				return PTR_ERR(cred);
 
 			ovl_path_upper(dentry->d_parent, &realparentpath);
-			realfile = backing_tmpfile_open(&file->f_path, flags, &realparentpath,
+			realfile = backing_tmpfile_open(file, flags, &realparentpath,
 							mode, current_cred());
 			err = PTR_ERR_OR_ZERO(realfile);
 			pr_debug("tmpfile/open(%pd2, 0%o) = %i\n", realparentpath.dentry, mode, err);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 97bed2286030..27cc07738f33 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -48,7 +48,7 @@ static struct file *ovl_open_realfile(const struct file *file,
 			if (!inode_owner_or_capable(real_idmap, realinode))
 				flags &= ~O_NOATIME;
 
-			realfile = backing_file_open(file_user_path(file),
+			realfile = backing_file_open(file,
 						     flags, realpath, current_cred());
 		}
 	}
diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h
index 1476a6ed1bfd..c939cd222730 100644
--- a/include/linux/backing-file.h
+++ b/include/linux/backing-file.h
@@ -18,10 +18,10 @@ struct backing_file_ctx {
 	void (*end_write)(struct kiocb *iocb, ssize_t);
 };
 
-struct file *backing_file_open(const struct path *user_path, int flags,
+struct file *backing_file_open(const struct file *user_file, int flags,
 			       const struct path *real_path,
 			       const struct cred *cred);
-struct file *backing_tmpfile_open(const struct path *user_path, int flags,
+struct file *backing_tmpfile_open(const struct file *user_file, int flags,
 				  const struct path *real_parentpath,
 				  umode_t mode, const struct cred *cred);
 ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b3dd145b25e..d0d0e8f55589 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2475,6 +2475,19 @@ struct file *dentry_create(struct path *path, int flags, umode_t mode,
 			   const struct cred *cred);
 const struct path *backing_file_user_path(const struct file *f);
 
+#ifdef CONFIG_SECURITY
+void *backing_file_security(const struct file *f);
+void backing_file_set_security(struct file *f, void *security);
+#else
+static inline void *backing_file_security(const struct file *f)
+{
+	return NULL;
+}
+static inline void backing_file_set_security(struct file *f, void *security)
+{
+}
+#endif /* CONFIG_SECURITY */
+
 /*
  * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
  * stored in ->vm_file is a backing file whose f_inode is on the underlying
diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 382c56a97bba..584db296e43b 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -94,7 +94,7 @@ struct common_audit_data {
 #endif
 		char *kmod_name;
 		struct lsm_ioctlop_audit *op;
-		struct file *file;
+		const struct file *file;
 		struct lsm_ibpkey_audit *ibpkey;
 		struct lsm_ibendport_audit *ibendport;
 		int reason;
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 8c42b4bde09c..b4958167e381 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -191,6 +191,9 @@ LSM_HOOK(int, 0, file_permission, struct file *file, int mask)
 LSM_HOOK(int, 0, file_alloc_security, struct file *file)
 LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file)
 LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file)
+LSM_HOOK(int, 0, backing_file_alloc, struct file *backing_file,
+	 const struct file *user_file)
+LSM_HOOK(void, LSM_RET_VOID, backing_file_free, struct file *backing_file)
 LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd,
 	 unsigned long arg)
 LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
@@ -198,6 +201,8 @@ LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
 LSM_HOOK(int, 0, mmap_addr, unsigned long addr)
 LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot,
 	 unsigned long prot, unsigned long flags)
+LSM_HOOK(int, 0, mmap_backing_file, struct vm_area_struct *vma,
+	 struct file *backing_file, struct file *user_file)
 LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma,
 	 unsigned long reqprot, unsigned long prot)
 LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index d48bf0ad26f4..b4f8cad53ddb 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -104,6 +104,7 @@ struct security_hook_list {
 struct lsm_blob_sizes {
 	unsigned int lbs_cred;
 	unsigned int lbs_file;
+	unsigned int lbs_backing_file;
 	unsigned int lbs_ib;
 	unsigned int lbs_inode;
 	unsigned int lbs_sock;
diff --git a/include/linux/security.h b/include/linux/security.h
index 83a646d72f6f..ad99b35891a6 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -471,11 +471,17 @@ int security_file_permission(struct file *file, int mask);
 int security_file_alloc(struct file *file);
 void security_file_release(struct file *file);
 void security_file_free(struct file *file);
+int security_backing_file_alloc(struct file *backing_file,
+				const struct file *user_file);
+void security_backing_file_free(struct file *backing_file);
 int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 int security_file_ioctl_compat(struct file *file, unsigned int cmd,
 			       unsigned long arg);
 int security_mmap_file(struct file *file, unsigned long prot,
 			unsigned long flags);
+int security_mmap_backing_file(struct vm_area_struct *vma,
+			       struct file *backing_file,
+			       struct file *user_file);
 int security_mmap_addr(unsigned long addr);
 int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
 			   unsigned long prot);
@@ -1140,6 +1146,15 @@ static inline void security_file_release(struct file *file)
 static inline void security_file_free(struct file *file)
 { }
 
+static inline int security_backing_file_alloc(struct file *backing_file,
+					      const struct file *user_file)
+{
+	return 0;
+}
+
+static inline void security_backing_file_free(struct file *backing_file)
+{ }
+
 static inline int security_file_ioctl(struct file *file, unsigned int cmd,
 				      unsigned long arg)
 {
@@ -1159,6 +1174,13 @@ static inline int security_mmap_file(struct file *file, unsigned long prot,
 	return 0;
 }
 
+static inline int security_mmap_backing_file(struct vm_area_struct *vma,
+					     struct file *backing_file,
+					     struct file *user_file)
+{
+	return 0;
+}
+
 static inline int security_mmap_addr(unsigned long addr)
 {
 	return cap_mmap_addr(addr);
diff --git a/security/lsm.h b/security/lsm.h
index db77cc83e158..32f808ad4335 100644
--- a/security/lsm.h
+++ b/security/lsm.h
@@ -29,6 +29,7 @@ extern struct lsm_blob_sizes blob_sizes;
 
 /* LSM blob caches */
 extern struct kmem_cache *lsm_file_cache;
+extern struct kmem_cache *lsm_backing_file_cache;
 extern struct kmem_cache *lsm_inode_cache;
 
 /* LSM blob allocators */
diff --git a/security/lsm_init.c b/security/lsm_init.c
index 573e2a7250c4..7c0fd17f1601 100644
--- a/security/lsm_init.c
+++ b/security/lsm_init.c
@@ -293,6 +293,8 @@ static void __init lsm_prepare(struct lsm_info *lsm)
 	blobs = lsm->blobs;
 	lsm_blob_size_update(&blobs->lbs_cred, &blob_sizes.lbs_cred);
 	lsm_blob_size_update(&blobs->lbs_file, &blob_sizes.lbs_file);
+	lsm_blob_size_update(&blobs->lbs_backing_file,
+			     &blob_sizes.lbs_backing_file);
 	lsm_blob_size_update(&blobs->lbs_ib, &blob_sizes.lbs_ib);
 	/* inode blob gets an rcu_head in addition to LSM blobs. */
 	if (blobs->lbs_inode && blob_sizes.lbs_inode == 0)
@@ -441,6 +443,8 @@ int __init security_init(void)
 	if (lsm_debug) {
 		lsm_pr("blob(cred) size %d\n", blob_sizes.lbs_cred);
 		lsm_pr("blob(file) size %d\n", blob_sizes.lbs_file);
+		lsm_pr("blob(backing_file) size %d\n",
+		       blob_sizes.lbs_backing_file);
 		lsm_pr("blob(ib) size %d\n", blob_sizes.lbs_ib);
 		lsm_pr("blob(inode) size %d\n", blob_sizes.lbs_inode);
 		lsm_pr("blob(ipc) size %d\n", blob_sizes.lbs_ipc);
@@ -462,6 +466,11 @@ int __init security_init(void)
 		lsm_file_cache = kmem_cache_create("lsm_file_cache",
 						   blob_sizes.lbs_file, 0,
 						   SLAB_PANIC, NULL);
+	if (blob_sizes.lbs_backing_file)
+		lsm_backing_file_cache = kmem_cache_create(
+						   "lsm_backing_file_cache",
+						   blob_sizes.lbs_backing_file,
+						   0, SLAB_PANIC, NULL);
 	if (blob_sizes.lbs_inode)
 		lsm_inode_cache = kmem_cache_create("lsm_inode_cache",
 						    blob_sizes.lbs_inode, 0,
diff --git a/security/security.c b/security/security.c
index 67af9228c4e9..680113625451 100644
--- a/security/security.c
+++ b/security/security.c
@@ -81,6 +81,7 @@ const struct lsm_id *lsm_idlist[MAX_LSM_COUNT];
 struct lsm_blob_sizes blob_sizes;
 
 struct kmem_cache *lsm_file_cache;
+struct kmem_cache *lsm_backing_file_cache;
 struct kmem_cache *lsm_inode_cache;
 
 #define SECURITY_HOOK_ACTIVE_KEY(HOOK, IDX) security_hook_active_##HOOK##_##IDX
@@ -172,6 +173,30 @@ static int lsm_file_alloc(struct file *file)
 	return 0;
 }
 
+/**
+ * lsm_backing_file_alloc - allocate a composite backing file blob
+ * @backing_file: the backing file
+ *
+ * Allocate the backing file blob for all the modules.
+ *
+ * Returns 0, or -ENOMEM if memory can't be allocated.
+ */
+static int lsm_backing_file_alloc(struct file *backing_file)
+{
+	void *blob;
+
+	if (!lsm_backing_file_cache) {
+		backing_file_set_security(backing_file, NULL);
+		return 0;
+	}
+
+	blob = kmem_cache_zalloc(lsm_backing_file_cache, GFP_KERNEL);
+	backing_file_set_security(backing_file, blob);
+	if (!blob)
+		return -ENOMEM;
+	return 0;
+}
+
 /**
  * lsm_blob_alloc - allocate a composite blob
  * @dest: the destination for the blob
@@ -2417,6 +2442,57 @@ void security_file_free(struct file *file)
 	}
 }
 
+/**
+ * security_backing_file_alloc() - Allocate and setup a backing file blob
+ * @backing_file: the backing file
+ * @user_file: the associated user visible file
+ *
+ * Allocate a backing file LSM blob and perform any necessary initialization of
+ * the LSM blob.  There will be some operations where the LSM will not have
+ * access to @user_file after this point, so any important state associated
+ * with @user_file that is important to the LSM should be captured in the
+ * backing file's LSM blob.
+ *
+ * LSM's should avoid taking a reference to @user_file in this hook as it will
+ * result in problems later when the system attempts to drop/put the file
+ * references due to a circular dependency.
+ *
+ * Return: Return 0 if the hook is successful, negative values otherwise.
+ */
+int security_backing_file_alloc(struct file *backing_file,
+				const struct file *user_file)
+{
+	int rc;
+
+	rc = lsm_backing_file_alloc(backing_file);
+	if (rc)
+		return rc;
+	rc = call_int_hook(backing_file_alloc, backing_file, user_file);
+	if (unlikely(rc))
+		security_backing_file_free(backing_file);
+
+	return rc;
+}
+
+/**
+ * security_backing_file_free() - Free a backing file blob
+ * @backing_file: the backing file
+ *
+ * Free any LSM state associate with a backing file's LSM blob, including the
+ * blob itself.
+ */
+void security_backing_file_free(struct file *backing_file)
+{
+	void *blob = backing_file_security(backing_file);
+
+	call_void_hook(backing_file_free, backing_file);
+
+	if (blob) {
+		backing_file_set_security(backing_file, NULL);
+		kmem_cache_free(lsm_backing_file_cache, blob);
+	}
+}
+
 /**
  * security_file_ioctl() - Check if an ioctl is allowed
  * @file: associated file
@@ -2505,6 +2581,32 @@ int security_mmap_file(struct file *file, unsigned long prot,
 			     flags);
 }
 
+/**
+ * security_mmap_backing_file - Check if mmap'ing a backing file is allowed
+ * @vma: the vm_area_struct for the mmap'd region
+ * @backing_file: the backing file being mmap'd
+ * @user_file: the user file being mmap'd
+ *
+ * Check permissions for a mmap operation on a stacked filesystem.  This hook
+ * is called after the security_mmap_file() and is responsible for authorizing
+ * the mmap on @backing_file.  It is important to note that the mmap operation
+ * on @user_file has already been authorized and the @vma->vm_file has been
+ * set to @backing_file.
+ *
+ * Return: Returns 0 if permission is granted.
+ */
+int security_mmap_backing_file(struct vm_area_struct *vma,
+			       struct file *backing_file,
+			       struct file *user_file)
+{
+	/* recommended by the stackable filesystem devs */
+	if (WARN_ON_ONCE(!(backing_file->f_mode & FMODE_BACKING)))
+		return -EIO;
+
+	return call_int_hook(mmap_backing_file, vma, backing_file, user_file);
+}
+EXPORT_SYMBOL_GPL(security_mmap_backing_file);
+
 /**
  * security_mmap_addr() - Check if mmap'ing an address is allowed
  * @addr: address
-- 
cgit v1.2.3


From a9b460225e47a3d98296eba71c62ff0ad58a2032 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 2 Apr 2026 15:26:54 +0000
Subject: net: always inline some skb helpers

Some performance critical helpers from include/linux/skbuff.h
are not inlined by clang.

Use __always_inline hint for:

 - __skb_fill_netmem_desc()
 - __skb_fill_page_desc()
 - skb_fill_netmem_desc()
 - skb_fill_page_desc()
 - __skb_pull()
 - pskb_may_pull_reason()
 - pskb_may_pull()
 - pskb_pull()
 - pskb_trim()
 - skb_orphan()
 - skb_postpull_rcsum()
 - skb_header_pointer()
 - skb_clear_delivery_time()
 - skb_tstamp_cond()
 - skb_warn_if_lro()

This increases performance and saves ~1200 bytes of text.

$ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
add/remove: 4/24 grow/shrink: 66/12 up/down: 4104/-5306 (-1202)
Function                                     old     new   delta
ip_multipath_l3_keys                           -     303    +303
tcp_sendmsg_locked                          4560    4848    +288
xfrm_input                                  6240    6455    +215
esp_output_head                             1516    1711    +195
skb_try_coalesce                             696     866    +170
bpf_prog_test_run_skb                       1951    2091    +140
tls_strp_read_copy                           528     667    +139
gue_udp_recv                                 738     871    +133
__ip6_append_data                           4159    4279    +120
__bond_xmit_hash                            1019    1122    +103
ip6_multipath_l3_keys                        394     495    +101
bpf_lwt_seg6_action                         1096    1197    +101
input_action_end_dx2                         344     442     +98
vxlan_remcsum                                487     581     +94
udpv6_queue_rcv_skb                          393     480     +87
udp_queue_rcv_skb                            385     471     +86
gue_remcsum                                  453     539     +86
udp_lib_checksum_complete                     84     168     +84
vxlan_xmit                                  2777    2857     +80
nf_reset_ct                                  456     532     +76
igmp_rcv                                    1902    1978     +76
mpls_forward                                1097    1169     +72
tcp_add_backlog                             1226    1292     +66
nfulnl_log_packet                           3091    3156     +65
tcp_rcv_established                         1966    2026     +60
__strp_recv                                 1547    1603     +56
eth_type_trans                               357     411     +54
bond_flow_ip                                 392     444     +52
__icmp_send                                 1584    1630     +46
ip_defrag                                   1636    1681     +45
tpacket_rcv                                 2793    2837     +44
refcount_add                                 132     176     +44
nf_ct_frag6_gather                          1959    2003     +44
napi_skb_free_stolen_head                    199     240     +41
__pskb_trim                                    -      41     +41
napi_reuse_skb                               319     358     +39
icmpv6_rcv                                  1877    1916     +39
br_handle_frame_finish                      1672    1711     +39
ip_rcv_core                                  841     879     +38
ip_check_defrag                              377     415     +38
br_stp_rcv                                   909     947     +38
qdisc_pkt_len_segs_init                      366     399     +33
mld_query_work                              2945    2975     +30
bpf_sk_assign_tcp_reqsk                      607     637     +30
udp_gro_receive                             1657    1686     +29
ip6_rcv_core                                1170    1193     +23
ah_input                                    1176    1197     +21
tun_get_user                                5174    5194     +20
llc_rcv                                      815     834     +19
__pfx_udp_lib_checksum_complete               16      32     +16
__pfx_refcount_add                            48      64     +16
__pfx_nf_reset_ct                             96     112     +16
__pfx_ip_multipath_l3_keys                     -      16     +16
__pfx___pskb_trim                              -      16     +16
packet_sendmsg                              5771    5781     +10
esp_output_tail                             1460    1470     +10
alloc_skb_with_frags                         433     443     +10
xsk_generic_xmit                            3477    3486      +9
mptcp_sendmsg_frag                          2250    2259      +9
__ip_append_data                            4166    4175      +9
__ip6_tnl_rcv                               1159    1168      +9
skb_zerocopy                                1215    1220      +5
gre_parse_header                            1358    1362      +4
__iptunnel_pull_header                       405     407      +2
skb_vlan_untag                               692     693      +1
psp_dev_rcv                                  701     702      +1
netkit_xmit                                 1263    1264      +1
gre_rcv                                     2776    2777      +1
gre_gso_segment                             1521    1522      +1
bpf_skb_net_hdr_pop                          535     536      +1
udp6_ufo_fragment                            888     884      -4
br_multicast_rcv                            9154    9148      -6
snap_rcv                                     312     305      -7
skb_copy_ubufs                              1841    1834      -7
__pfx_skb_tstamp_cond                         16       -     -16
__pfx_skb_clear_delivery_time                 16       -     -16
__pfx_pskb_trim                               16       -     -16
__pfx_pskb_pull                               16       -     -16
ipv6_gso_segment                            1400    1383     -17
ipv6_frag_rcv                               2511    2492     -19
erspan_xmit                                 1221    1190     -31
__pfx_skb_warn_if_lro                         32       -     -32
__pfx___skb_fill_page_desc                    32       -     -32
skb_tstamp_cond                               42       -     -42
pskb_trim                                     46       -     -46
__pfx_skb_postpull_rcsum                      48       -     -48
tcp_gso_segment                             1524    1475     -49
skb_clear_delivery_time                       54       -     -54
__pfx_skb_fill_page_desc                      64       -     -64
__pfx_skb_header_pointer                      80       -     -80
pskb_pull                                     91       -     -91
skb_warn_if_lro                              110       -    -110
tcp_v6_rcv                                  3288    3170    -118
__pfx___skb_pull                             128       -    -128
__pfx_skb_orphan                             144       -    -144
__pfx_pskb_may_pull                          160       -    -160
tcp_v4_rcv                                  3334    3153    -181
__skb_fill_page_desc                         231       -    -231
udp_rcv                                     1809    1553    -256
skb_postpull_rcsum                           318       -    -318
skb_header_pointer                           367       -    -367
fib_multipath_hash                          3399    3018    -381
skb_orphan                                   513       -    -513
skb_fill_page_desc                           534       -    -534
__skb_pull                                   568       -    -568
pskb_may_pull                                604       -    -604
Total: Before=29652698, After=29651496, chg -0.00%

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260402152654.1720627-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fbfa9852e82a..26fe18bcfad8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2605,8 +2605,9 @@ static inline void skb_len_add(struct sk_buff *skb, int delta)
  *
  * Does not take any additional reference on the fragment.
  */
-static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i,
-					  netmem_ref netmem, int off, int size)
+static __always_inline void
+__skb_fill_netmem_desc(struct sk_buff *skb, int i, netmem_ref netmem,
+		       int off, int size)
 {
 	struct page *page;
 
@@ -2628,14 +2629,16 @@ static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i,
 		skb->pfmemalloc = true;
 }
 
-static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
-					struct page *page, int off, int size)
+static __always_inline void
+__skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page,
+		     int off, int size)
 {
 	__skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
 }
 
-static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i,
-					netmem_ref netmem, int off, int size)
+static __always_inline void
+skb_fill_netmem_desc(struct sk_buff *skb, int i, netmem_ref netmem,
+		     int off, int size)
 {
 	__skb_fill_netmem_desc(skb, i, netmem, off, size);
 	skb_shinfo(skb)->nr_frags = i + 1;
@@ -2655,8 +2658,9 @@ static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i,
  *
  * Does not take any additional reference on the fragment.
  */
-static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
-				      struct page *page, int off, int size)
+static __always_inline void
+skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page,
+		   int off, int size)
 {
 	skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
 }
@@ -2828,7 +2832,7 @@ static inline void *__skb_push(struct sk_buff *skb, unsigned int len)
 }
 
 void *skb_pull(struct sk_buff *skb, unsigned int len);
-static inline void *__skb_pull(struct sk_buff *skb, unsigned int len)
+static __always_inline void *__skb_pull(struct sk_buff *skb, unsigned int len)
 {
 	DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
 
@@ -2853,7 +2857,7 @@ void *skb_pull_data(struct sk_buff *skb, size_t len);
 
 void *__pskb_pull_tail(struct sk_buff *skb, int delta);
 
-static inline enum skb_drop_reason
+static __always_inline enum skb_drop_reason
 pskb_may_pull_reason(struct sk_buff *skb, unsigned int len)
 {
 	DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
@@ -2871,12 +2875,13 @@ pskb_may_pull_reason(struct sk_buff *skb, unsigned int len)
 	return SKB_NOT_DROPPED_YET;
 }
 
-static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len)
+static __always_inline bool
+pskb_may_pull(struct sk_buff *skb, unsigned int len)
 {
 	return pskb_may_pull_reason(skb, len) == SKB_NOT_DROPPED_YET;
 }
 
-static inline void *pskb_pull(struct sk_buff *skb, unsigned int len)
+static __always_inline void *pskb_pull(struct sk_buff *skb, unsigned int len)
 {
 	if (!pskb_may_pull(skb, len))
 		return NULL;
@@ -3337,7 +3342,7 @@ static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
 	return 0;
 }
 
-static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
+static __always_inline int pskb_trim(struct sk_buff *skb, unsigned int len)
 {
 	skb_might_realloc(skb);
 	return (len < skb->len) ? __pskb_trim(skb, len) : 0;
@@ -3380,7 +3385,7 @@ static inline int __skb_grow(struct sk_buff *skb, unsigned int len)
  *	destructor function and make the @skb unowned. The buffer continues
  *	to exist but is no longer charged to its former owner.
  */
-static inline void skb_orphan(struct sk_buff *skb)
+static __always_inline void skb_orphan(struct sk_buff *skb)
 {
 	if (skb->destructor) {
 		skb->destructor(skb);
@@ -4044,8 +4049,8 @@ __skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
  *	update the CHECKSUM_COMPLETE checksum, or set ip_summed to
  *	CHECKSUM_NONE so that it can be recomputed from scratch.
  */
-static inline void skb_postpull_rcsum(struct sk_buff *skb,
-				      const void *start, unsigned int len)
+static __always_inline void
+skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len)
 {
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->csum = wsum_negate(csum_partial(start, len,
@@ -4304,7 +4309,7 @@ __skb_header_pointer(const struct sk_buff *skb, int offset, int len,
 	return buffer;
 }
 
-static inline void * __must_check
+static __always_inline void * __must_check
 skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer)
 {
 	return __skb_header_pointer(skb, offset, len, skb->data,
@@ -4476,7 +4481,7 @@ DECLARE_STATIC_KEY_FALSE(netstamp_needed_key);
 /* It is used in the ingress path to clear the delivery_time.
  * If needed, set the skb->tstamp to the (rcv) timestamp.
  */
-static inline void skb_clear_delivery_time(struct sk_buff *skb)
+static __always_inline void skb_clear_delivery_time(struct sk_buff *skb)
 {
 	if (skb->tstamp_type) {
 		skb->tstamp_type = SKB_CLOCK_REALTIME;
@@ -4503,7 +4508,8 @@ static inline ktime_t skb_tstamp(const struct sk_buff *skb)
 	return skb->tstamp;
 }
 
-static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond)
+static __always_inline ktime_t
+skb_tstamp_cond(const struct sk_buff *skb, bool cond)
 {
 	if (skb->tstamp_type != SKB_CLOCK_MONOTONIC && skb->tstamp)
 		return skb->tstamp;
@@ -5293,7 +5299,7 @@ static inline void skb_decrease_gso_size(struct skb_shared_info *shinfo,
 
 void __skb_warn_lro_forwarding(const struct sk_buff *skb);
 
-static inline bool skb_warn_if_lro(const struct sk_buff *skb)
+static __always_inline bool skb_warn_if_lro(const struct sk_buff *skb)
 {
 	/* LRO sets gso_size but not gso_type, whereas if GSO is really
 	 * wanted then gso_type will be set. */
-- 
cgit v1.2.3


From 10a4206a24013be4d558d476010cbf2eb4c9fa64 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@kernel.org>
Date: Tue, 24 Mar 2026 01:59:09 +0100
Subject: PCI: use generic driver_override infrastructure

When a driver is probed through __driver_attach(), the bus' match()
callback is called without the device lock held, thus accessing the
driver_override field without a lock, which can cause a UAF.

Fix this by using the driver-core driver_override infrastructure taking
care of proper locking internally.

Note that calling match() from __driver_attach() without the device lock
held is intentional. [1]

Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1]
Reported-by: Gui-Dong Han <hanguidong02@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220789
Fixes: 782a985d7af2 ("PCI: Introduce new device binding path using pci_dev.driver_override")
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Alex Williamson <alex@shazbot.org>
Tested-by: Gui-Dong Han <hanguidong02@gmail.com>
Reviewed-by: Gui-Dong Han <hanguidong02@gmail.com>
Link: https://patch.msgid.link/20260324005919.2408620-6-dakr@kernel.org
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 drivers/pci/pci-driver.c           | 11 +++++++----
 drivers/pci/pci-sysfs.c            | 28 ----------------------------
 drivers/pci/probe.c                |  1 -
 drivers/vfio/pci/vfio_pci_core.c   |  5 ++---
 drivers/xen/xen-pciback/pci_stub.c |  6 ++++--
 include/linux/pci.h                |  6 ------
 6 files changed, 13 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index dd9075403987..d10ece0889f0 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -138,9 +138,11 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv,
 {
 	struct pci_dynid *dynid;
 	const struct pci_device_id *found_id = NULL, *ids;
+	int ret;
 
 	/* When driver_override is set, only bind to the matching driver */
-	if (dev->driver_override && strcmp(dev->driver_override, drv->name))
+	ret = device_match_driver_override(&dev->dev, &drv->driver);
+	if (ret == 0)
 		return NULL;
 
 	/* Look at the dynamic ids first, before the static ones */
@@ -164,7 +166,7 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv,
 		 * matching.
 		 */
 		if (found_id->override_only) {
-			if (dev->driver_override)
+			if (ret > 0)
 				return found_id;
 		} else {
 			return found_id;
@@ -172,7 +174,7 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv,
 	}
 
 	/* driver_override will always match, send a dummy id */
-	if (dev->driver_override)
+	if (ret > 0)
 		return &pci_device_id_any;
 	return NULL;
 }
@@ -452,7 +454,7 @@ static int __pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev)
 static inline bool pci_device_can_probe(struct pci_dev *pdev)
 {
 	return (!pdev->is_virtfn || pdev->physfn->sriov->drivers_autoprobe ||
-		pdev->driver_override);
+		device_has_driver_override(&pdev->dev));
 }
 #else
 static inline bool pci_device_can_probe(struct pci_dev *pdev)
@@ -1722,6 +1724,7 @@ static const struct cpumask *pci_device_irq_get_affinity(struct device *dev,
 
 const struct bus_type pci_bus_type = {
 	.name		= "pci",
+	.driver_override = true,
 	.match		= pci_bus_match,
 	.uevent		= pci_uevent,
 	.probe		= pci_device_probe,
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 16eaaf749ba9..a9006cf4e9c8 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -615,33 +615,6 @@ static ssize_t devspec_show(struct device *dev,
 static DEVICE_ATTR_RO(devspec);
 #endif
 
-static ssize_t driver_override_store(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf, size_t count)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-	int ret;
-
-	ret = driver_set_override(dev, &pdev->driver_override, buf, count);
-	if (ret)
-		return ret;
-
-	return count;
-}
-
-static ssize_t driver_override_show(struct device *dev,
-				    struct device_attribute *attr, char *buf)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-	ssize_t len;
-
-	device_lock(dev);
-	len = sysfs_emit(buf, "%s\n", pdev->driver_override);
-	device_unlock(dev);
-	return len;
-}
-static DEVICE_ATTR_RW(driver_override);
-
 static struct attribute *pci_dev_attrs[] = {
 	&dev_attr_power_state.attr,
 	&dev_attr_resource.attr,
@@ -669,7 +642,6 @@ static struct attribute *pci_dev_attrs[] = {
 #ifdef CONFIG_OF
 	&dev_attr_devspec.attr,
 #endif
-	&dev_attr_driver_override.attr,
 	&dev_attr_ari_enabled.attr,
 	NULL,
 };
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index bccc7a4bdd79..b4707640e102 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2488,7 +2488,6 @@ static void pci_release_dev(struct device *dev)
 	pci_release_of_node(pci_dev);
 	pcibios_release_device(pci_dev);
 	pci_bus_put(pci_dev->bus);
-	kfree(pci_dev->driver_override);
 	bitmap_free(pci_dev->dma_alias_mask);
 	dev_dbg(dev, "device released\n");
 	kfree(pci_dev);
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index d43745fe4c84..460852f79f29 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1987,9 +1987,8 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb,
 	    pdev->is_virtfn && physfn == vdev->pdev) {
 		pci_info(vdev->pdev, "Captured SR-IOV VF %s driver_override\n",
 			 pci_name(pdev));
-		pdev->driver_override = kasprintf(GFP_KERNEL, "%s",
-						  vdev->vdev.ops->name);
-		WARN_ON(!pdev->driver_override);
+		WARN_ON(device_set_driver_override(&pdev->dev,
+						   vdev->vdev.ops->name));
 	} else if (action == BUS_NOTIFY_BOUND_DRIVER &&
 		   pdev->is_virtfn && physfn == vdev->pdev) {
 		struct pci_driver *drv = pci_dev_driver(pdev);
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index e4b27aecbf05..79a2b5dfd694 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -598,6 +598,8 @@ static int pcistub_seize(struct pci_dev *dev,
 	return err;
 }
 
+static struct pci_driver xen_pcibk_pci_driver;
+
 /* Called when 'bind'. This means we must _NOT_ call pci_reset_function or
  * other functions that take the sysfs lock. */
 static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id)
@@ -609,8 +611,8 @@ static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id)
 
 	match = pcistub_match(dev);
 
-	if ((dev->driver_override &&
-	     !strcmp(dev->driver_override, PCISTUB_DRIVER_NAME)) ||
+	if (device_match_driver_override(&dev->dev,
+					 &xen_pcibk_pci_driver.driver) > 0 ||
 	    match) {
 
 		if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1c270f1d5123..57e9463e4347 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -575,12 +575,6 @@ struct pci_dev {
 	u8		supported_speeds; /* Supported Link Speeds Vector */
 	phys_addr_t	rom;		/* Physical address if not from BAR */
 	size_t		romlen;		/* Length if not from BAR */
-	/*
-	 * Driver name to force a match.  Do not set directly, because core
-	 * frees it.  Use driver_set_override() to set or clear it.
-	 */
-	const char	*driver_override;
-
 	unsigned long	priv_flags;	/* Private flags for the PCI driver */
 
 	/* These methods index pci_reset_fn_methods[] */
-- 
cgit v1.2.3


From 8a700b1fc94df4d847a04f14ebc7f8532592b367 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@kernel.org>
Date: Tue, 24 Mar 2026 01:59:10 +0100
Subject: platform/wmi: use generic driver_override infrastructure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a driver is probed through __driver_attach(), the bus' match()
callback is called without the device lock held, thus accessing the
driver_override field without a lock, which can cause a UAF.

Fix this by using the driver-core driver_override infrastructure taking
care of proper locking internally.

Note that calling match() from __driver_attach() without the device lock
held is intentional. [1]

Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1]
Reported-by: Gui-Dong Han <hanguidong02@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220789
Fixes: 12046f8c77e0 ("platform/x86: wmi: Add driver_override support")
Reviewed-by: Armin Wolf <W_Armin@gmx.de>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://patch.msgid.link/20260324005919.2408620-7-dakr@kernel.org
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 drivers/platform/wmi/core.c | 36 +++++-------------------------------
 include/linux/wmi.h         |  4 ----
 2 files changed, 5 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/wmi/core.c b/drivers/platform/wmi/core.c
index b8e6b9a421c6..750e3619724e 100644
--- a/drivers/platform/wmi/core.c
+++ b/drivers/platform/wmi/core.c
@@ -842,39 +842,11 @@ static ssize_t expensive_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(expensive);
 
-static ssize_t driver_override_show(struct device *dev, struct device_attribute *attr,
-				    char *buf)
-{
-	struct wmi_device *wdev = to_wmi_device(dev);
-	ssize_t ret;
-
-	device_lock(dev);
-	ret = sysfs_emit(buf, "%s\n", wdev->driver_override);
-	device_unlock(dev);
-
-	return ret;
-}
-
-static ssize_t driver_override_store(struct device *dev, struct device_attribute *attr,
-				     const char *buf, size_t count)
-{
-	struct wmi_device *wdev = to_wmi_device(dev);
-	int ret;
-
-	ret = driver_set_override(dev, &wdev->driver_override, buf, count);
-	if (ret < 0)
-		return ret;
-
-	return count;
-}
-static DEVICE_ATTR_RW(driver_override);
-
 static struct attribute *wmi_attrs[] = {
 	&dev_attr_modalias.attr,
 	&dev_attr_guid.attr,
 	&dev_attr_instance_count.attr,
 	&dev_attr_expensive.attr,
-	&dev_attr_driver_override.attr,
 	NULL
 };
 ATTRIBUTE_GROUPS(wmi);
@@ -943,7 +915,6 @@ static void wmi_dev_release(struct device *dev)
 {
 	struct wmi_block *wblock = dev_to_wblock(dev);
 
-	kfree(wblock->dev.driver_override);
 	kfree(wblock);
 }
 
@@ -952,10 +923,12 @@ static int wmi_dev_match(struct device *dev, const struct device_driver *driver)
 	const struct wmi_driver *wmi_driver = to_wmi_driver(driver);
 	struct wmi_block *wblock = dev_to_wblock(dev);
 	const struct wmi_device_id *id = wmi_driver->id_table;
+	int ret;
 
 	/* When driver_override is set, only bind to the matching driver */
-	if (wblock->dev.driver_override)
-		return !strcmp(wblock->dev.driver_override, driver->name);
+	ret = device_match_driver_override(dev, driver);
+	if (ret >= 0)
+		return ret;
 
 	if (id == NULL)
 		return 0;
@@ -1076,6 +1049,7 @@ static struct class wmi_bus_class = {
 static const struct bus_type wmi_bus_type = {
 	.name = "wmi",
 	.dev_groups = wmi_groups,
+	.driver_override = true,
 	.match = wmi_dev_match,
 	.uevent = wmi_dev_uevent,
 	.probe = wmi_dev_probe,
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index 75cb0c7cfe57..14fb644e1701 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -18,16 +18,12 @@
  * struct wmi_device - WMI device structure
  * @dev: Device associated with this WMI device
  * @setable: True for devices implementing the Set Control Method
- * @driver_override: Driver name to force a match; do not set directly,
- *		     because core frees it; use driver_set_override() to
- *		     set or clear it.
  *
  * This represents WMI devices discovered by the WMI driver core.
  */
 struct wmi_device {
 	struct device dev;
 	bool setable;
-	const char *driver_override;
 };
 
 /**
-- 
cgit v1.2.3


From 85bb534ff12aab6916058897b39c748940a7a4c6 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@kernel.org>
Date: Tue, 24 Mar 2026 01:59:12 +0100
Subject: vdpa: use generic driver_override infrastructure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a driver is probed through __driver_attach(), the bus' match()
callback is called without the device lock held, thus accessing the
driver_override field without a lock, which can cause a UAF.

Fix this by using the driver-core driver_override infrastructure taking
care of proper locking internally.

Note that calling match() from __driver_attach() without the device lock
held is intentional. [1]

Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1]
Reported-by: Gui-Dong Han <hanguidong02@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220789
Fixes: 539fec78edb4 ("vdpa: add driver_override support")
Acked-by: Eugenio Pérez <eperezma@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://patch.msgid.link/20260324005919.2408620-9-dakr@kernel.org
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 drivers/vdpa/vdpa.c  | 48 +++++-------------------------------------------
 include/linux/vdpa.h |  4 ----
 2 files changed, 5 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index 34874beb0152..caf0ee5d6856 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -67,57 +67,20 @@ static void vdpa_dev_remove(struct device *d)
 
 static int vdpa_dev_match(struct device *dev, const struct device_driver *drv)
 {
-	struct vdpa_device *vdev = dev_to_vdpa(dev);
+	int ret;
 
 	/* Check override first, and if set, only use the named driver */
-	if (vdev->driver_override)
-		return strcmp(vdev->driver_override, drv->name) == 0;
+	ret = device_match_driver_override(dev, drv);
+	if (ret >= 0)
+		return ret;
 
 	/* Currently devices must be supported by all vDPA bus drivers */
 	return 1;
 }
 
-static ssize_t driver_override_store(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf, size_t count)
-{
-	struct vdpa_device *vdev = dev_to_vdpa(dev);
-	int ret;
-
-	ret = driver_set_override(dev, &vdev->driver_override, buf, count);
-	if (ret)
-		return ret;
-
-	return count;
-}
-
-static ssize_t driver_override_show(struct device *dev,
-				    struct device_attribute *attr, char *buf)
-{
-	struct vdpa_device *vdev = dev_to_vdpa(dev);
-	ssize_t len;
-
-	device_lock(dev);
-	len = sysfs_emit(buf, "%s\n", vdev->driver_override);
-	device_unlock(dev);
-
-	return len;
-}
-static DEVICE_ATTR_RW(driver_override);
-
-static struct attribute *vdpa_dev_attrs[] = {
-	&dev_attr_driver_override.attr,
-	NULL,
-};
-
-static const struct attribute_group vdpa_dev_group = {
-	.attrs  = vdpa_dev_attrs,
-};
-__ATTRIBUTE_GROUPS(vdpa_dev);
-
 static const struct bus_type vdpa_bus = {
 	.name  = "vdpa",
-	.dev_groups = vdpa_dev_groups,
+	.driver_override = true,
 	.match = vdpa_dev_match,
 	.probe = vdpa_dev_probe,
 	.remove = vdpa_dev_remove,
@@ -132,7 +95,6 @@ static void vdpa_release_dev(struct device *d)
 		ops->free(vdev);
 
 	ida_free(&vdpa_index_ida, vdev->index);
-	kfree(vdev->driver_override);
 	kfree(vdev);
 }
 
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 2bfe3baa63f4..782c42d25db1 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -72,9 +72,6 @@ struct vdpa_mgmt_dev;
  * struct vdpa_device - representation of a vDPA device
  * @dev: underlying device
  * @vmap: the metadata passed to upper layer to be used for mapping
- * @driver_override: driver name to force a match; do not set directly,
- *                   because core frees it; use driver_set_override() to
- *                   set or clear it.
  * @config: the configuration ops for this device.
  * @map: the map ops for this device
  * @cf_lock: Protects get and set access to configuration layout.
@@ -90,7 +87,6 @@ struct vdpa_mgmt_dev;
 struct vdpa_device {
 	struct device dev;
 	union virtio_map vmap;
-	const char *driver_override;
 	const struct vdpa_config_ops *config;
 	const struct virtio_map_ops *map;
 	struct rw_semaphore cf_lock; /* Protects get/set config */
-- 
cgit v1.2.3


From 15ed91aa84ea7bacef3c24286d5136055b4335a8 Mon Sep 17 00:00:00 2001
From: Ivan Vecera <ivecera@redhat.com>
Date: Thu, 2 Apr 2026 20:40:56 +0200
Subject: dpll: add frequency monitoring callback ops

Add new callback operations for a dpll device:
- freq_monitor_get(..) - to obtain current state of frequency monitor
  feature from dpll device,
- freq_monitor_set(..) - to allow feature configuration.

Add new callback operation for a dpll pin:
- measured_freq_get(..) - to obtain the measured frequency in mHz.

Obtain the feature state value using the get callback and provide it to
the user if the device driver implements callbacks. The measured_freq_get
pin callback is only invoked when the frequency monitor is enabled.
The freq_monitor_get device callback is required when measured_freq_get
is provided by the driver.

Execute the set callback upon user requests.

Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
Link: https://patch.msgid.link/20260402184057.1890514-3-ivecera@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/dpll/dpll_core.c    |  5 ++-
 drivers/dpll/dpll_netlink.c | 90 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dpll.h        | 10 +++++
 3 files changed, 104 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
index 3f54754cdec4..cbb635db4321 100644
--- a/drivers/dpll/dpll_core.c
+++ b/drivers/dpll/dpll_core.c
@@ -876,7 +876,10 @@ dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
 
 	if (WARN_ON(!ops) ||
 	    WARN_ON(!ops->state_on_dpll_get) ||
-	    WARN_ON(!ops->direction_get))
+	    WARN_ON(!ops->direction_get) ||
+	    WARN_ON(ops->measured_freq_get &&
+		    (!dpll_device_ops(dpll)->freq_monitor_get ||
+		     !dpll_device_ops(dpll)->freq_monitor_set)))
 		return -EINVAL;
 
 	mutex_lock(&dpll_lock);
diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c
index 83cbd64abf5a..af7ce62ec55c 100644
--- a/drivers/dpll/dpll_netlink.c
+++ b/drivers/dpll/dpll_netlink.c
@@ -175,6 +175,26 @@ dpll_msg_add_phase_offset_monitor(struct sk_buff *msg, struct dpll_device *dpll,
 	return 0;
 }
 
+static int
+dpll_msg_add_freq_monitor(struct sk_buff *msg, struct dpll_device *dpll,
+			  struct netlink_ext_ack *extack)
+{
+	const struct dpll_device_ops *ops = dpll_device_ops(dpll);
+	enum dpll_feature_state state;
+	int ret;
+
+	if (ops->freq_monitor_set && ops->freq_monitor_get) {
+		ret = ops->freq_monitor_get(dpll, dpll_priv(dpll),
+					    &state, extack);
+		if (ret)
+			return ret;
+		if (nla_put_u32(msg, DPLL_A_FREQUENCY_MONITOR, state))
+			return -EMSGSIZE;
+	}
+
+	return 0;
+}
+
 static int
 dpll_msg_add_phase_offset_avg_factor(struct sk_buff *msg,
 				     struct dpll_device *dpll,
@@ -400,6 +420,38 @@ static int dpll_msg_add_ffo(struct sk_buff *msg, struct dpll_pin *pin,
 			    ffo);
 }
 
+static int dpll_msg_add_measured_freq(struct sk_buff *msg, struct dpll_pin *pin,
+				      struct dpll_pin_ref *ref,
+				      struct netlink_ext_ack *extack)
+{
+	const struct dpll_device_ops *dev_ops = dpll_device_ops(ref->dpll);
+	const struct dpll_pin_ops *ops = dpll_pin_ops(ref);
+	struct dpll_device *dpll = ref->dpll;
+	enum dpll_feature_state state;
+	u64 measured_freq;
+	int ret;
+
+	if (!ops->measured_freq_get)
+		return 0;
+	ret = dev_ops->freq_monitor_get(dpll, dpll_priv(dpll),
+					&state, extack);
+	if (ret)
+		return ret;
+	if (state == DPLL_FEATURE_STATE_DISABLE)
+		return 0;
+	ret = ops->measured_freq_get(pin, dpll_pin_on_dpll_priv(dpll, pin),
+				    dpll, dpll_priv(dpll), &measured_freq,
+				    extack);
+	if (ret)
+		return ret;
+	if (nla_put_64bit(msg, DPLL_A_PIN_MEASURED_FREQUENCY,
+			  sizeof(measured_freq), &measured_freq,
+			  DPLL_A_PIN_PAD))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 static int
 dpll_msg_add_pin_freq(struct sk_buff *msg, struct dpll_pin *pin,
 		      struct dpll_pin_ref *ref, struct netlink_ext_ack *extack)
@@ -670,6 +722,9 @@ dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin,
 	if (ret)
 		return ret;
 	ret = dpll_msg_add_ffo(msg, pin, ref, extack);
+	if (ret)
+		return ret;
+	ret = dpll_msg_add_measured_freq(msg, pin, ref, extack);
 	if (ret)
 		return ret;
 	ret = dpll_msg_add_pin_esync(msg, pin, ref, extack);
@@ -722,6 +777,9 @@ dpll_device_get_one(struct dpll_device *dpll, struct sk_buff *msg,
 	if (ret)
 		return ret;
 	ret = dpll_msg_add_phase_offset_avg_factor(msg, dpll, extack);
+	if (ret)
+		return ret;
+	ret = dpll_msg_add_freq_monitor(msg, dpll, extack);
 	if (ret)
 		return ret;
 
@@ -948,6 +1006,32 @@ dpll_phase_offset_avg_factor_set(struct dpll_device *dpll, struct nlattr *a,
 						extack);
 }
 
+static int
+dpll_freq_monitor_set(struct dpll_device *dpll, struct nlattr *a,
+		      struct netlink_ext_ack *extack)
+{
+	const struct dpll_device_ops *ops = dpll_device_ops(dpll);
+	enum dpll_feature_state state = nla_get_u32(a), old_state;
+	int ret;
+
+	if (!(ops->freq_monitor_set && ops->freq_monitor_get)) {
+		NL_SET_ERR_MSG_ATTR(extack, a,
+				    "dpll device not capable of frequency monitor");
+		return -EOPNOTSUPP;
+	}
+	ret = ops->freq_monitor_get(dpll, dpll_priv(dpll), &old_state,
+				    extack);
+	if (ret) {
+		NL_SET_ERR_MSG(extack,
+			       "unable to get current state of frequency monitor");
+		return ret;
+	}
+	if (state == old_state)
+		return 0;
+
+	return ops->freq_monitor_set(dpll, dpll_priv(dpll), state, extack);
+}
+
 static int
 dpll_pin_freq_set(struct dpll_pin *pin, struct nlattr *a,
 		  struct netlink_ext_ack *extack)
@@ -1878,6 +1962,12 @@ dpll_set_from_nlattr(struct dpll_device *dpll, struct genl_info *info)
 			if (ret)
 				return ret;
 			break;
+		case DPLL_A_FREQUENCY_MONITOR:
+			ret = dpll_freq_monitor_set(dpll, a,
+						    info->extack);
+			if (ret)
+				return ret;
+			break;
 		}
 	}
 
diff --git a/include/linux/dpll.h b/include/linux/dpll.h
index 2ce295b46b8c..b7277a8b484d 100644
--- a/include/linux/dpll.h
+++ b/include/linux/dpll.h
@@ -52,6 +52,12 @@ struct dpll_device_ops {
 	int (*phase_offset_avg_factor_get)(const struct dpll_device *dpll,
 					   void *dpll_priv, u32 *factor,
 					   struct netlink_ext_ack *extack);
+	int (*freq_monitor_set)(const struct dpll_device *dpll, void *dpll_priv,
+				enum dpll_feature_state state,
+				struct netlink_ext_ack *extack);
+	int (*freq_monitor_get)(const struct dpll_device *dpll, void *dpll_priv,
+				enum dpll_feature_state *state,
+				struct netlink_ext_ack *extack);
 };
 
 struct dpll_pin_ops {
@@ -110,6 +116,10 @@ struct dpll_pin_ops {
 	int (*ffo_get)(const struct dpll_pin *pin, void *pin_priv,
 		       const struct dpll_device *dpll, void *dpll_priv,
 		       s64 *ffo, struct netlink_ext_ack *extack);
+	int (*measured_freq_get)(const struct dpll_pin *pin, void *pin_priv,
+				 const struct dpll_device *dpll,
+				 void *dpll_priv, u64 *measured_freq,
+				 struct netlink_ext_ack *extack);
 	int (*esync_set)(const struct dpll_pin *pin, void *pin_priv,
 			 const struct dpll_device *dpll, void *dpll_priv,
 			 u64 freq, struct netlink_ext_ack *extack);
-- 
cgit v1.2.3


From 14a51045e10d3087b8374deef02a9d3a694132d6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 21 Jan 2026 18:17:12 -0500
Subject: get rid of busy-waiting in shrink_dcache_tree()

If shrink_dcache_tree() runs into a potential victim that is already
dying, it must wait for that dentry to go away.  To avoid busy-waiting
we need some object to wait on and a way for dentry_unlist() to see that
we need to be notified.

The obvious place for the object to wait on would be on our stack frame.
We will store a pointer to that object (struct completion_list) in victim
dentry; if there's more than one thread wanting to wait for the same
dentry to finish dying, we'll have their instances linked into a list,
with reference in dentry pointing to the head of that list.

* new object - struct completion_list.  A pair of struct completion and
pointer to the next instance.  That's what shrink_dcache_tree() will wait
on if needed.

* add a new member (->waiters, opaque pointer to struct completion_list)
to struct dentry.  It is defined for negative live dentries that are
not in-lookup ones and it will remain NULL for almost all of them.

It does not conflict with ->d_rcu (defined for killed dentries), ->d_alias
(defined for positive dentries, all live) or ->d_in_lookup_hash (defined
for in-lookup dentries, all live negative).  That allows to colocate
all four members.

* make sure that all places where dentry enters the state where ->waiters
is defined (live, negative, not-in-lookup) initialize ->waiters to NULL.

* if select_collect2() runs into a dentry that is already dying, have
its caller insert a local instance of struct completion_list into the
head of the list hanging off dentry->waiters and wait for completion.

* if dentry_unlist() sees non-NULL ->waiters, have it carefully walk
through the completion_list instances in that list, calling complete()
for each.

For now struct completion_list is local to fs/dcache.c; it's obviously
dentry-agnostic, and it can be trivially lifted into linux/completion.h
if somebody finds a reason to do so...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 79 ++++++++++++++++++++++++++++++++++++++++++++++----
 include/linux/dcache.h | 18 ++++++++++--
 2 files changed, 88 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 616a445ec720..0c8faeee02e2 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -456,6 +456,15 @@ static void dentry_unlink_inode(struct dentry * dentry)
 	raw_write_seqcount_begin(&dentry->d_seq);
 	__d_clear_type_and_inode(dentry);
 	hlist_del_init(&dentry->d_alias);
+	/*
+	 * dentry becomes negative, so the space occupied by ->d_alias
+	 * belongs to ->waiters now; we could use __hlist_del() instead
+	 * of hlist_del_init(), if not for the stunt pulled by nfs
+	 * dummy root dentries - positive dentry *not* included into
+	 * the alias list of its inode.  Open-coding hlist_del_init()
+	 * and removing zeroing would be too clumsy...
+	 */
+	dentry->waiters = NULL;
 	raw_write_seqcount_end(&dentry->d_seq);
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&inode->i_lock);
@@ -605,6 +614,44 @@ void d_drop(struct dentry *dentry)
 }
 EXPORT_SYMBOL(d_drop);
 
+struct completion_list {
+	struct completion_list *next;
+	struct completion completion;
+};
+
+/*
+ *  shrink_dcache_tree() needs to be notified when dentry in process of
+ *  being evicted finally gets unlisted.  Such dentries are
+ *	already with negative ->d_count
+ *	already negative
+ *	already not in in-lookup hash
+ *	reachable only via ->d_sib.
+ *
+ *  Use ->waiters for a single-linked list of struct completion_list of
+ *  waiters.
+ */
+static inline void d_add_waiter(struct dentry *dentry, struct completion_list *p)
+{
+	struct completion_list *v = dentry->waiters;
+	init_completion(&p->completion);
+	p->next = v;
+	dentry->waiters = p;
+}
+
+static inline void d_complete_waiters(struct dentry *dentry)
+{
+	struct completion_list *v = dentry->waiters;
+	if (unlikely(v)) {
+		/* some shrink_dcache_tree() instances are waiting */
+		dentry->waiters = NULL;
+		while (v) {
+			struct completion *r = &v->completion;
+			v = v->next;
+			complete(r);
+		}
+	}
+}
+
 static inline void dentry_unlist(struct dentry *dentry)
 {
 	struct dentry *next;
@@ -613,6 +660,7 @@ static inline void dentry_unlist(struct dentry *dentry)
 	 * attached to the dentry tree
 	 */
 	dentry->d_flags |= DCACHE_DENTRY_KILLED;
+	d_complete_waiters(dentry);
 	if (unlikely(hlist_unhashed(&dentry->d_sib)))
 		return;
 	__hlist_del(&dentry->d_sib);
@@ -1569,6 +1617,10 @@ static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
 			return D_WALK_QUIT;
 		}
 		to_shrink_list(dentry, &data->dispose);
+	} else if (dentry->d_lockref.count < 0) {
+		rcu_read_lock();
+		data->victim = dentry;
+		return D_WALK_QUIT;
 	}
 	/*
 	 * We can return to the caller if we have found some (this
@@ -1608,12 +1660,27 @@ static void shrink_dcache_tree(struct dentry *parent, bool for_umount)
 		data.victim = NULL;
 		d_walk(parent, &data, select_collect2);
 		if (data.victim) {
-			spin_lock(&data.victim->d_lock);
-			if (!lock_for_kill(data.victim)) {
-				spin_unlock(&data.victim->d_lock);
+			struct dentry *v = data.victim;
+
+			spin_lock(&v->d_lock);
+			if (v->d_lockref.count < 0 &&
+			    !(v->d_flags & DCACHE_DENTRY_KILLED)) {
+				struct completion_list wait;
+				// It's busy dying; have it notify us once
+				// it becomes invisible to d_walk().
+				d_add_waiter(v, &wait);
+				spin_unlock(&v->d_lock);
+				rcu_read_unlock();
+				if (!list_empty(&data.dispose))
+					shrink_dentry_list(&data.dispose);
+				wait_for_completion(&wait.completion);
+				continue;
+			}
+			if (!lock_for_kill(v)) {
+				spin_unlock(&v->d_lock);
 				rcu_read_unlock();
 			} else {
-				shrink_kill(data.victim);
+				shrink_kill(v);
 			}
 		}
 		if (!list_empty(&data.dispose))
@@ -1787,7 +1854,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 	INIT_HLIST_BL_NODE(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_HLIST_HEAD(&dentry->d_children);
-	INIT_HLIST_NODE(&dentry->d_alias);
+	dentry->waiters = NULL;
 	INIT_HLIST_NODE(&dentry->d_sib);
 
 	if (dentry->d_op && dentry->d_op->d_init) {
@@ -2729,7 +2796,7 @@ static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
 	d_wait = dentry->d_wait;
 	dentry->d_wait = NULL;
 	hlist_bl_unlock(b);
-	INIT_HLIST_NODE(&dentry->d_alias);
+	dentry->waiters = NULL;
 	INIT_LIST_HEAD(&dentry->d_lru);
 	return d_wait;
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index f939d2ed10a3..19098253f2dd 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -88,6 +88,7 @@ union shortname_store {
 
 #define d_lock	d_lockref.lock
 #define d_iname d_shortname.string
+struct completion_list;
 
 struct dentry {
 	/* RCU lookup touched fields */
@@ -122,12 +123,23 @@ struct dentry {
 	struct hlist_node d_sib;	/* child of parent list */
 	struct hlist_head d_children;	/* our children */
 	/*
-	 * d_alias and d_rcu can share memory
+	 * the following members can share memory - their uses are
+	 * mutually exclusive.
 	 */
 	union {
-		struct hlist_node d_alias;	/* inode alias list */
-		struct hlist_bl_node d_in_lookup_hash;	/* only for in-lookup ones */
+		/* positives: inode alias list */
+		struct hlist_node d_alias;
+		/* in-lookup ones (all negative, live): hash chain */
+		struct hlist_bl_node d_in_lookup_hash;
+		/* killed ones: (already negative) used to schedule freeing */
 	 	struct rcu_head d_rcu;
+		/*
+		 * live non-in-lookup negatives: used if shrink_dcache_tree()
+		 * races with eviction by another thread and needs to wait for
+		 * this dentry to get killed .  Remains NULL for almost all
+		 * negative dentries.
+		 */
+		struct completion_list *waiters;
 	};
 };
 
-- 
cgit v1.2.3


From 5f352433ea39171e19fbb3a7e18d983510176854 Mon Sep 17 00:00:00 2001
From: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Date: Tue, 24 Mar 2026 13:38:54 +0530
Subject: PCI: endpoint: Add reserved region type for MSI-X Table and PBA

Add PCI_EPC_BAR_RSVD_MSIX_TBL_RAM and PCI_EPC_BAR_RSVD_MSIX_PBA_RAM to
enum pci_epc_bar_rsvd_region_type so that Endpoint controllers can
describe hardware-owned MSI-X Table and PBA (Pending Bit Array) regions
behind a BAR_RESERVED BAR.

Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Signed-off-by: Manivannan Sadhasivam <mani@kernel.org>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
Link: https://patch.msgid.link/20260324080857.916263-2-mmaddireddy@nvidia.com
---
 include/linux/pci-epc.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 334c2b7578d0..1eca1264815b 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -211,6 +211,8 @@ enum pci_epc_bar_type {
 /**
  * enum pci_epc_bar_rsvd_region_type - type of a fixed subregion behind a BAR
  * @PCI_EPC_BAR_RSVD_DMA_CTRL_MMIO: Integrated DMA controller MMIO window
+ * @PCI_EPC_BAR_RSVD_MSIX_TBL_RAM: MSI-X table structure
+ * @PCI_EPC_BAR_RSVD_MSIX_PBA_RAM: MSI-X PBA structure
  *
  * BARs marked BAR_RESERVED are owned by the SoC/EPC hardware and must not be
  * reprogrammed by EPF drivers. Some of them still expose fixed subregions that
@@ -218,6 +220,8 @@ enum pci_epc_bar_type {
  */
 enum pci_epc_bar_rsvd_region_type {
 	PCI_EPC_BAR_RSVD_DMA_CTRL_MMIO = 0,
+	PCI_EPC_BAR_RSVD_MSIX_TBL_RAM,
+	PCI_EPC_BAR_RSVD_MSIX_PBA_RAM,
 };
 
 /**
-- 
cgit v1.2.3


From 8b155f2e4a91f3507951e6ace4b413688ac28b96 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Fri, 3 Apr 2026 12:48:51 -0600
Subject: block: remove unused BVEC_ITER_ALL_INIT

This macro no longer has any users, so remove it.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Link: https://patch.msgid.link/20260403184852.2140919-1-csander@purestorage.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bvec.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 06fb60471aaf..d36dd476feda 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -203,15 +203,6 @@ static inline void bvec_iter_advance_single(const struct bio_vec *bv,
 		((bvl = mp_bvec_iter_bvec((bio_vec), (iter))), 1);	\
 	     bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len))
 
-/* for iterating one bio from start to end */
-#define BVEC_ITER_ALL_INIT (struct bvec_iter)				\
-{									\
-	.bi_sector	= 0,						\
-	.bi_size	= UINT_MAX,					\
-	.bi_idx		= 0,						\
-	.bi_bvec_done	= 0,						\
-}
-
 static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all)
 {
 	iter_all->done = 0;
-- 
cgit v1.2.3


From 6c8dfb0362732bf1e4829867a2a5239fedc592d0 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@kernel.org>
Date: Tue, 24 Mar 2026 01:59:06 +0100
Subject: bus: fsl-mc: use generic driver_override infrastructure

When a driver is probed through __driver_attach(), the bus' match()
callback is called without the device lock held, thus accessing the
driver_override field without a lock, which can cause a UAF.

Fix this by using the driver-core driver_override infrastructure taking
care of proper locking internally.

Note that calling match() from __driver_attach() without the device lock
held is intentional. [1]

Tested-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1]
Reported-by: Gui-Dong Han <hanguidong02@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220789
Fixes: 1f86a00c1159 ("bus/fsl-mc: add support for 'driver_override' in the mc-bus")
Link: https://patch.msgid.link/20260324005919.2408620-3-dakr@kernel.org
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 drivers/bus/fsl-mc/fsl-mc-bus.c   | 43 +++++++--------------------------------
 drivers/vfio/fsl-mc/vfio_fsl_mc.c |  4 +---
 include/linux/fsl/mc.h            |  4 ----
 3 files changed, 8 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index c117745cf206..221146e4860b 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -86,12 +86,16 @@ static int fsl_mc_bus_match(struct device *dev, const struct device_driver *drv)
 	struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev);
 	const struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(drv);
 	bool found = false;
+	int ret;
 
 	/* When driver_override is set, only bind to the matching driver */
-	if (mc_dev->driver_override) {
-		found = !strcmp(mc_dev->driver_override, mc_drv->driver.name);
+	ret = device_match_driver_override(dev, drv);
+	if (ret > 0) {
+		found = true;
 		goto out;
 	}
+	if (ret == 0)
+		goto out;
 
 	if (!mc_drv->match_id_table)
 		goto out;
@@ -210,39 +214,8 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(modalias);
 
-static ssize_t driver_override_store(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf, size_t count)
-{
-	struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev);
-	int ret;
-
-	if (WARN_ON(dev->bus != &fsl_mc_bus_type))
-		return -EINVAL;
-
-	ret = driver_set_override(dev, &mc_dev->driver_override, buf, count);
-	if (ret)
-		return ret;
-
-	return count;
-}
-
-static ssize_t driver_override_show(struct device *dev,
-				    struct device_attribute *attr, char *buf)
-{
-	struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev);
-	ssize_t len;
-
-	device_lock(dev);
-	len = sysfs_emit(buf, "%s\n", mc_dev->driver_override);
-	device_unlock(dev);
-	return len;
-}
-static DEVICE_ATTR_RW(driver_override);
-
 static struct attribute *fsl_mc_dev_attrs[] = {
 	&dev_attr_modalias.attr,
-	&dev_attr_driver_override.attr,
 	NULL,
 };
 
@@ -345,6 +318,7 @@ ATTRIBUTE_GROUPS(fsl_mc_bus);
 
 const struct bus_type fsl_mc_bus_type = {
 	.name = "fsl-mc",
+	.driver_override = true,
 	.match = fsl_mc_bus_match,
 	.uevent = fsl_mc_bus_uevent,
 	.probe = fsl_mc_probe,
@@ -910,9 +884,6 @@ static struct notifier_block fsl_mc_nb;
  */
 void fsl_mc_device_remove(struct fsl_mc_device *mc_dev)
 {
-	kfree(mc_dev->driver_override);
-	mc_dev->driver_override = NULL;
-
 	/*
 	 * The device-specific remove callback will get invoked by device_del()
 	 */
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index 462fae1aa538..b4c3958201b2 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -424,9 +424,7 @@ static int vfio_fsl_mc_bus_notifier(struct notifier_block *nb,
 
 	if (action == BUS_NOTIFY_ADD_DEVICE &&
 	    vdev->mc_dev == mc_cont) {
-		mc_dev->driver_override = kasprintf(GFP_KERNEL, "%s",
-						    vfio_fsl_mc_ops.name);
-		if (!mc_dev->driver_override)
+		if (device_set_driver_override(dev, vfio_fsl_mc_ops.name))
 			dev_warn(dev, "VFIO_FSL_MC: Setting driver override for device in dprc %s failed\n",
 				 dev_name(&mc_cont->dev));
 		else
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index 897d6211c163..1da63f2d7040 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -178,9 +178,6 @@ struct fsl_mc_obj_desc {
  * @regions: pointer to array of MMIO region entries
  * @irqs: pointer to array of pointers to interrupts allocated to this device
  * @resource: generic resource associated with this MC object device, if any.
- * @driver_override: driver name to force a match; do not set directly,
- *                   because core frees it; use driver_set_override() to
- *                   set or clear it.
  *
  * Generic device object for MC object devices that are "attached" to a
  * MC bus.
@@ -214,7 +211,6 @@ struct fsl_mc_device {
 	struct fsl_mc_device_irq **irqs;
 	struct fsl_mc_resource *resource;
 	struct device_link *consumer_link;
-	const char *driver_override;
 };
 
 #define to_fsl_mc_device(_dev) \
-- 
cgit v1.2.3


From 9ec1e972c3de3106140c18d2a1c7c74795d85a69 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Fri, 30 Jan 2026 15:59:16 -0500
Subject: maple_tree: introduce maple_copy node and use it in
 mas_spanning_rebalance()

Introduce an internal-memory only node type called maple_copy to
facilitate internal copy operations.  Use it in mas_spanning_rebalance()
for just the leaf nodes.  Initially, the maple_copy node is used to
configure the source nodes and copy the data into the big_node.

The maple_copy contains a list of source entries with start and end
offsets.  One of the maple_copy entries can be itself with an offset of 0
to 2, representing the data where the store partially overwrites entries,
or fully overwrites the entry.  The side effect is that the source nodes
no longer have to worry about partially copying the existing offset if it
is not fully overwritten.

This is in preparation of removal of the maple big_node, but for the time
being the data is copied to the big node to limit the change size.

Link: https://lkml.kernel.org/r/20260130205935.2559335-12-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andrew Ballance <andrewjballance@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Kujau <lists@nerdbynature.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: SeongJae Park <sj@kernel.org>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h |  26 +++++++++
 lib/maple_tree.c           | 140 ++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 157 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 7b8aad47121e..9bc7fa89bc2e 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -139,6 +139,7 @@ enum maple_type {
 	maple_leaf_64,
 	maple_range_64,
 	maple_arange_64,
+	maple_copy,
 };
 
 enum store_type {
@@ -154,6 +155,30 @@ enum store_type {
 	wr_slot_store,
 };
 
+struct maple_copy {
+	struct {
+		struct maple_node *node;
+		unsigned long max;
+		unsigned char start;
+		unsigned char end;
+		enum maple_type mt;
+	} src[4];
+	/* Simulated node */
+	void __rcu *slot[3];
+	unsigned long min;
+	union {
+		unsigned long pivot[3];
+		struct {
+			void *_pad[2];
+			unsigned long max;
+		};
+	};
+	unsigned char end;
+
+	/*Avoid passing these around */
+	unsigned char s_count;
+};
+
 /**
  * DOC: Maple tree flags
  *
@@ -299,6 +324,7 @@ struct maple_node {
 		};
 		struct maple_range_64 mr64;
 		struct maple_arange_64 ma64;
+		struct maple_copy cp;
 	};
 };
 
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index caac936bd8d4..554fdffd6c5b 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -605,6 +605,8 @@ static inline unsigned long *ma_pivots(struct maple_node *node,
 	case maple_range_64:
 	case maple_leaf_64:
 		return node->mr64.pivot;
+	case maple_copy:
+		return node->cp.pivot;
 	case maple_dense:
 		return NULL;
 	}
@@ -624,6 +626,7 @@ static inline unsigned long *ma_gaps(struct maple_node *node,
 	switch (type) {
 	case maple_arange_64:
 		return node->ma64.gap;
+	case maple_copy:
 	case maple_range_64:
 	case maple_leaf_64:
 	case maple_dense:
@@ -690,6 +693,7 @@ static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv,
 	case maple_arange_64:
 		node->ma64.pivot[piv] = val;
 		break;
+	case maple_copy:
 	case maple_dense:
 		break;
 	}
@@ -711,6 +715,8 @@ static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
 	case maple_range_64:
 	case maple_leaf_64:
 		return mn->mr64.slot;
+	case maple_copy:
+		return mn->cp.slot;
 	case maple_dense:
 		return mn->slot;
 	}
@@ -2595,6 +2601,110 @@ dead_node:
 	return NULL;
 }
 
+/*
+ * cp_leaf_init() - Initialize a maple_copy node for the leaf level of a
+ * spanning store
+ * @cp: The maple copy node
+ * @mas: The maple state
+ * @l_wr_mas: The left write state of the spanning store
+ * @r_wr_mas: The right write state of the spanning store
+ */
+static inline void cp_leaf_init(struct maple_copy *cp,
+		struct ma_state *mas, struct ma_wr_state *l_wr_mas,
+		struct ma_wr_state *r_wr_mas)
+{
+	unsigned char end = 0;
+
+	/*
+	 * WARNING: The use of RCU_INIT_POINTER() makes it extremely important
+	 * to not expose the maple_copy node to any readers.  Exposure may
+	 * result in buggy code when a compiler reorders the instructions.
+	 */
+
+	/* Create entries to insert including split entries to left and right */
+	if (l_wr_mas->r_min < mas->index) {
+		end++;
+		RCU_INIT_POINTER(cp->slot[0], l_wr_mas->content);
+		cp->pivot[0] = mas->index - 1;
+	}
+	RCU_INIT_POINTER(cp->slot[end], l_wr_mas->entry);
+	cp->pivot[end] = mas->last;
+
+	if (r_wr_mas->end_piv > mas->last) {
+		end++;
+		RCU_INIT_POINTER(cp->slot[end],
+				 r_wr_mas->slots[r_wr_mas->offset_end]);
+		cp->pivot[end] = r_wr_mas->end_piv;
+	}
+
+	cp->min = l_wr_mas->r_min;
+	cp->max = cp->pivot[end];
+	cp->end = end;
+}
+
+static inline void append_wr_mas_cp(struct maple_copy *cp,
+	struct ma_wr_state *wr_mas, unsigned char start, unsigned char end)
+{
+	unsigned char count;
+
+	count = cp->s_count;
+	cp->src[count].node = wr_mas->node;
+	cp->src[count].mt = wr_mas->type;
+	if (wr_mas->mas->end <= end)
+		cp->src[count].max = wr_mas->mas->max;
+	else
+		cp->src[count].max = wr_mas->pivots[end];
+
+	cp->src[count].start = start;
+	cp->src[count].end = end;
+	cp->s_count++;
+}
+
+static inline void init_cp_src(struct maple_copy *cp)
+{
+	cp->src[cp->s_count].node = ma_mnode_ptr(cp);
+	cp->src[cp->s_count].mt = maple_copy;
+	cp->src[cp->s_count].max = cp->max;
+	cp->src[cp->s_count].start = 0;
+	cp->src[cp->s_count].end = cp->end;
+	cp->s_count++;
+}
+
+static inline
+void cp_data_write(struct maple_copy *cp, struct maple_big_node *b_node)
+{
+	struct maple_node *src;
+	unsigned char s;
+	unsigned char src_end, s_offset;
+	unsigned long *b_pivots, *cp_pivots;
+	void __rcu **b_slots, **cp_slots;
+	enum maple_type s_mt;
+
+	b_node->b_end = 0;
+
+	s = 0;
+	b_pivots = b_node->pivot;
+	b_slots = (void __rcu **)b_node->slot;
+	do {
+		unsigned char size;
+
+		src = cp->src[s].node;
+		s_mt = cp->src[s].mt;
+		s_offset = cp->src[s].start;
+		src_end = cp->src[s].end;
+		size = src_end - s_offset + 1;
+		cp_pivots = ma_pivots(src, s_mt) + s_offset;
+		cp_slots = ma_slots(src, s_mt) + s_offset;
+		memcpy(b_slots, cp_slots, size * sizeof(void __rcu *));
+		if (size > 1)
+			memcpy(b_pivots, cp_pivots, (size - 1) * sizeof(unsigned long));
+		b_pivots[size - 1] = cp->src[s].max;
+		b_pivots += size;
+		b_slots += size;
+		b_node->b_end += size;
+	} while (++s < cp->s_count);
+}
+
 static void mas_spanning_rebalance_loop(struct ma_state *mas,
 		struct maple_subtree_state *mast, unsigned char count)
 {
@@ -2750,10 +2860,11 @@ static void mas_spanning_rebalance(struct ma_state *mas,
 
 
 static noinline void mas_wr_spanning_rebalance(struct ma_state *mas,
-		struct ma_wr_state *wr_mas, struct ma_wr_state *r_wr_mas)
+		struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas)
 {
 	struct maple_subtree_state mast;
 	struct maple_big_node b_node;
+	struct maple_copy cp;
 	unsigned char height;
 	MA_STATE(l_mas, mas->tree, mas->index, mas->index);
 	MA_STATE(r_mas, mas->tree, mas->index, mas->last);
@@ -2765,15 +2876,26 @@ static noinline void mas_wr_spanning_rebalance(struct ma_state *mas,
 	mast.orig_l = &mast_l_mas;
 	mast.orig_r = r_wr_mas->mas;
 	memset(&b_node, 0, sizeof(struct maple_big_node));
-	/* Copy l_mas and store the value in b_node. */
-	mas_store_b_node(wr_mas, &b_node, mast.orig_l->end);
-	/* Copy r_mas into b_node if there is anything to copy. */
-	if (mast.orig_r->max > mast.orig_r->last)
-		mas_mab_cp(mast.orig_r, mast.orig_r->offset,
-			   mast.orig_r->end, &b_node, b_node.b_end + 1);
-	else
-		b_node.b_end++;
+	cp.s_count = 0;
+	cp_leaf_init(&cp, mas, l_wr_mas, r_wr_mas);
+	/* Copy left 0 - offset */
+	if (l_wr_mas->mas->offset) {
+		unsigned char off = l_wr_mas->mas->offset - 1;
+
+		append_wr_mas_cp(&cp, l_wr_mas, 0, off);
+		cp.src[cp.s_count - 1].max = cp.min - 1;
+	}
+
+	init_cp_src(&cp);
+
+	/* Copy right from offset_end + 1 to end */
+	if (r_wr_mas->mas->end != r_wr_mas->offset_end)
+		append_wr_mas_cp(&cp, r_wr_mas, r_wr_mas->offset_end + 1,
+			       r_wr_mas->mas->end);
+
 
+	b_node.type = l_wr_mas->type;
+	cp_data_write(&cp, &b_node);
 	/* Stop spanning searches by searching for just index. */
 	mast.orig_l->last = mas->index;
 
-- 
cgit v1.2.3


From 6953038cab845f3720ec8d83915f4f083861e195 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Fri, 30 Jan 2026 15:59:19 -0500
Subject: maple_tree: change initial big node setup in
 mas_wr_spanning_rebalance()

Instead of copying the data into the big node and finding out that the
data may need to be moved or appended to, calculate the data space up
front (in the maple copy node) and set up another source for the copy.

The additional copy source is tracked in the maple state sib (short for
sibling), and is put into the maple write states for future operations
after the data is in the big node.

To facilitate the newly moved node, some initial setup of the maple
subtree state are relocated after the potential shift caused by the new
way of rebalancing against a sibling.

Link: https://lkml.kernel.org/r/20260130205935.2559335-15-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andrew Ballance <andrewjballance@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Kujau <lists@nerdbynature.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: SeongJae Park <sj@kernel.org>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h |   1 +
 lib/maple_tree.c           | 175 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 153 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 9bc7fa89bc2e..e99e16ac1c6d 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -177,6 +177,7 @@ struct maple_copy {
 
 	/*Avoid passing these around */
 	unsigned char s_count;
+	unsigned char data;
 };
 
 /**
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index a9b7e398c7db..0d6f810a4a1f 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -1304,6 +1304,18 @@ static inline unsigned char mas_data_end(struct ma_state *mas)
 	return mt_pivots[type];
 }
 
+static inline
+void wr_mas_setup(struct ma_wr_state *wr_mas, struct ma_state *mas)
+{
+	wr_mas->node = mas_mn(mas);
+	wr_mas->type = mte_node_type(mas->node);
+	wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type);
+	wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type);
+	wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, mas->offset);
+	wr_mas->r_max = mas_safe_pivot(mas, wr_mas->pivots, mas->offset,
+				       wr_mas->type);
+}
+
 /*
  * mas_leaf_max_gap() - Returns the largest gap in a leaf node
  * @mas: the maple state
@@ -2258,6 +2270,44 @@ static inline void mte_mid_split_check(struct maple_enode **l,
 	*split = mid_split;
 }
 
+static inline
+void spanning_sib(struct ma_wr_state *l_wr_mas,
+		struct ma_wr_state *r_wr_mas, struct ma_state *nneighbour)
+{
+	struct ma_state l_tmp = *l_wr_mas->mas;
+	struct ma_state r_tmp = *r_wr_mas->mas;
+	unsigned char depth = 0;
+
+	do {
+		mas_ascend(&r_tmp);
+		mas_ascend(&l_tmp);
+		depth++;
+		if (r_tmp.offset < mas_data_end(&r_tmp)) {
+			r_tmp.offset++;
+			mas_descend(&r_tmp);
+			r_tmp.offset = 0;
+			while (--depth)
+				mas_descend(&r_tmp);
+
+			r_tmp.end = mas_data_end(&r_tmp);
+			*nneighbour = r_tmp;
+			return;
+		} else if (l_tmp.offset) {
+			l_tmp.offset--;
+			do {
+				mas_descend(&l_tmp);
+				l_tmp.offset = mas_data_end(&l_tmp);
+			} while (--depth);
+
+			l_tmp.end = l_tmp.offset;
+			*nneighbour = l_tmp;
+			return;
+		}
+	} while (!mte_is_root(r_tmp.node));
+
+	WARN_ON_ONCE(1);
+}
+
 /*
  * mast_set_split_parents() - Helper function to set three nodes parents.  Slot
  * is taken from @mast->l.
@@ -2642,6 +2692,49 @@ static inline void cp_leaf_init(struct maple_copy *cp,
 	cp->end = end;
 }
 
+/*
+ * cp_data_calc() - Calculate the size of the data (1 indexed).
+ * @cp: The maple copy struct with the new data populated.
+ * @l_wr_mas: The maple write state containing the data to the left of the write
+ * @r_wr_mas: The maple write state containing the data to the right of the
+ * write
+ *
+ * cp->data is a size (not indexed by 0).
+ */
+static inline void cp_data_calc(struct maple_copy *cp,
+		struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas)
+{
+
+	/* Add 1 every time for the 0th element */
+	cp->data = l_wr_mas->mas->offset;
+	/* Add the new data and any partial overwrites */
+	cp->data += cp->end + 1;
+	/* Data from right (offset + 1 to end), +1 for zero */
+	cp->data += r_wr_mas->mas->end - r_wr_mas->offset_end;
+}
+
+static inline void append_mas_cp(struct maple_copy *cp,
+	struct ma_state *mas, unsigned char start, unsigned char end)
+{
+	struct maple_node *node;
+	enum maple_type mt;
+	unsigned char count;
+
+	count = cp->s_count;
+	node = mas_mn(mas);
+	mt = mte_node_type(mas->node);
+	cp->src[count].node = node;
+	cp->src[count].mt = mt;
+	if (mas->end <= end)
+		cp->src[count].max = mas->max;
+	else
+		cp->src[count].max = ma_pivots(node, mt)[end];
+
+	cp->src[count].start = start;
+	cp->src[count].end = end;
+	cp->s_count++;
+}
+
 static inline void append_wr_mas_cp(struct maple_copy *cp,
 	struct ma_wr_state *wr_mas, unsigned char start, unsigned char end)
 {
@@ -2670,6 +2763,42 @@ static inline void init_cp_src(struct maple_copy *cp)
 	cp->s_count++;
 }
 
+/*
+ * multi_src_setup() - Set the @cp node up with multiple sources to copy from.
+ * @cp: The maple copy node
+ * @l_wr_mas: The left write maple state
+ * @r_wr_mas: The right write maple state
+ * @sib: The sibling maple state
+ *
+ * Note: @sib->end == 0 indicates no sibling will be used.
+ */
+static inline
+void multi_src_setup(struct maple_copy *cp, struct ma_wr_state *l_wr_mas,
+		struct ma_wr_state *r_wr_mas, struct ma_state *sib)
+{
+	cp->s_count = 0;
+	if (sib->end && sib->max < l_wr_mas->mas->min)
+		append_mas_cp(cp, sib, 0, sib->end);
+
+	/* Copy left 0 - offset */
+	if (l_wr_mas->mas->offset) {
+		unsigned char off = l_wr_mas->mas->offset - 1;
+
+		append_wr_mas_cp(cp, l_wr_mas, 0, off);
+		cp->src[cp->s_count - 1].max = cp->min - 1;
+	}
+
+	init_cp_src(cp);
+
+	/* Copy right either from offset or offset + 1 pending on r_max */
+	if (r_wr_mas->mas->end != r_wr_mas->offset_end)
+		append_wr_mas_cp(cp, r_wr_mas, r_wr_mas->offset_end + 1,
+			       r_wr_mas->mas->end);
+
+	if (sib->end && sib->min > r_wr_mas->mas->max)
+		append_mas_cp(cp, sib, 0, sib->end);
+}
+
 static inline
 void cp_data_write(struct maple_copy *cp, struct maple_big_node *b_node)
 {
@@ -2873,36 +3002,42 @@ static noinline void mas_wr_spanning_rebalance(struct ma_state *mas,
 	struct maple_big_node b_node;
 	struct maple_copy cp;
 	unsigned char height;
+	struct ma_state sib;
 	MA_STATE(l_mas, mas->tree, mas->index, mas->index);
 	MA_STATE(r_mas, mas->tree, mas->index, mas->last);
 	MA_STATE(m_mas, mas->tree, mas->index, mas->index);
 	MA_STATE(mast_l_mas, NULL, 0, 0);
 
 
-	mast_l_mas = *mas;
-	mast.orig_l = &mast_l_mas;
-	mast.orig_r = r_wr_mas->mas;
 	memset(&b_node, 0, sizeof(struct maple_big_node));
+	mast_l_mas = *mas;
 	cp.s_count = 0;
 	cp_leaf_init(&cp, mas, l_wr_mas, r_wr_mas);
-	/* Copy left 0 - offset */
-	if (l_wr_mas->mas->offset) {
-		unsigned char off = l_wr_mas->mas->offset - 1;
-
-		append_wr_mas_cp(&cp, l_wr_mas, 0, off);
-		cp.src[cp.s_count - 1].max = cp.min - 1;
+	cp_data_calc(&cp, l_wr_mas, r_wr_mas);
+	if (((l_wr_mas->mas->min != 0) || (r_wr_mas->mas->max != ULONG_MAX)) &&
+	    (cp.data <= mt_min_slots[l_wr_mas->type])) {
+		spanning_sib(l_wr_mas, r_wr_mas, &sib);
+		cp.data += sib.end + 1;
+	} else {
+		sib.end = 0;
 	}
 
-	init_cp_src(&cp);
-
-	/* Copy right from offset_end + 1 to end */
-	if (r_wr_mas->mas->end != r_wr_mas->offset_end)
-		append_wr_mas_cp(&cp, r_wr_mas, r_wr_mas->offset_end + 1,
-			       r_wr_mas->mas->end);
-
-
+	multi_src_setup(&cp, l_wr_mas, r_wr_mas, &sib);
 	b_node.type = l_wr_mas->type;
 	cp_data_write(&cp, &b_node);
+	if (sib.end) {
+		if (sib.max < l_wr_mas->mas->min) {
+			*l_wr_mas->mas = sib;
+			wr_mas_setup(l_wr_mas, &sib);
+			mast_l_mas = sib;
+		} else {
+			*r_wr_mas->mas = sib;
+			wr_mas_setup(r_wr_mas, &sib);
+		}
+	}
+
+	mast.orig_l = &mast_l_mas;
+	mast.orig_r = r_wr_mas->mas;
 	/* Stop spanning searches by searching for just index. */
 	mast.orig_l->last = mas->index;
 
@@ -2917,12 +3052,6 @@ static noinline void mas_wr_spanning_rebalance(struct ma_state *mas,
 	mast.m = &m_mas;
 	mast.r = &r_mas;
 	l_mas.status = r_mas.status = m_mas.status = ma_none;
-
-	/* Check if this is not root and has sufficient data.  */
-	if (((mast.orig_l->min != 0) || (mast.orig_r->max != ULONG_MAX)) &&
-	    unlikely(mast.bn->b_end <= mt_min_slots[mast.bn->type]))
-		mast_spanning_rebalance(&mast);
-
 	height = mas_mt_height(mas) + 1;
 
 	/*
-- 
cgit v1.2.3


From 20b20162e1f3b7e60cf0e79116fb2f3bdef3dc5e Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Fri, 30 Jan 2026 15:59:21 -0500
Subject: maple_tree: add gap support, slot and pivot sizes for maple copy

Add plumbing work for using maple copy as a normal node for a source of
copy operations.  This is needed later.

Link: https://lkml.kernel.org/r/20260130205935.2559335-17-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andrew Ballance <andrewjballance@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Kujau <lists@nerdbynature.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: SeongJae Park <sj@kernel.org>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h | 1 +
 lib/maple_tree.c           | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index e99e16ac1c6d..db6a02788902 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -165,6 +165,7 @@ struct maple_copy {
 	} src[4];
 	/* Simulated node */
 	void __rcu *slot[3];
+	unsigned long gap[3];
 	unsigned long min;
 	union {
 		unsigned long pivot[3];
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 499cae720251..9c701ee7412c 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -101,6 +101,7 @@ static const unsigned long mt_max[] = {
 	[maple_leaf_64]		= ULONG_MAX,
 	[maple_range_64]	= ULONG_MAX,
 	[maple_arange_64]	= ULONG_MAX,
+	[maple_copy]		= ULONG_MAX,
 };
 #define mt_node_max(x) mt_max[mte_node_type(x)]
 #endif
@@ -110,6 +111,7 @@ static const unsigned char mt_slots[] = {
 	[maple_leaf_64]		= MAPLE_RANGE64_SLOTS,
 	[maple_range_64]	= MAPLE_RANGE64_SLOTS,
 	[maple_arange_64]	= MAPLE_ARANGE64_SLOTS,
+	[maple_copy]		= 3,
 };
 #define mt_slot_count(x) mt_slots[mte_node_type(x)]
 
@@ -118,6 +120,7 @@ static const unsigned char mt_pivots[] = {
 	[maple_leaf_64]		= MAPLE_RANGE64_SLOTS - 1,
 	[maple_range_64]	= MAPLE_RANGE64_SLOTS - 1,
 	[maple_arange_64]	= MAPLE_ARANGE64_SLOTS - 1,
+	[maple_copy]		= 3,
 };
 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)]
 
@@ -126,6 +129,7 @@ static const unsigned char mt_min_slots[] = {
 	[maple_leaf_64]		= (MAPLE_RANGE64_SLOTS / 2) - 2,
 	[maple_range_64]	= (MAPLE_RANGE64_SLOTS / 2) - 2,
 	[maple_arange_64]	= (MAPLE_ARANGE64_SLOTS / 2) - 1,
+	[maple_copy]		= 1, /* Should never be used */
 };
 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)]
 
@@ -627,6 +631,7 @@ static inline unsigned long *ma_gaps(struct maple_node *node,
 	case maple_arange_64:
 		return node->ma64.gap;
 	case maple_copy:
+		return node->cp.gap;
 	case maple_range_64:
 	case maple_leaf_64:
 	case maple_dense:
-- 
cgit v1.2.3


From a9c6716e088a1d4badd4fa6797469506bb99ec8b Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Fri, 30 Jan 2026 15:59:22 -0500
Subject: maple_tree: start using maple copy node for destination

Stop using the maple subtree state and big node in favour of using three
destinations in the maple copy node.  That is, expand the way leaves were
handled to all levels of the tree and use the maple copy node to track the
new nodes.

Extract out the sibling init into the data calculation since this is where
the insufficient data can be detected.  The remainder of the sibling code
to shift the next iteration is moved to the spanning_ascend() function,
since it is not always needed.

Next introduce the dst_setup() function which will decide how many nodes
are needed to contain the data at this level.  Using the destination
count, populate the copy node's dst array with the new nodes and set
d_count to the correct value.  Note that this can be tricky in the case of
a leaf node with exactly enough room because of the rule against NULLs at
the end of leaves.

Once the destinations are ready, copy the data by altering the
cp_data_write() function to copy from the sources to the destinations
directly.  This eliminates the use of the big node in this code path.  On
node completion, node_finalise() will zero out the remaining area and set
the metadata, if necessary.

spanning_ascend() is used to decide if the operation is complete.  It may
create a new root, converge into one destination, or continue upwards by
ascending the left and right write maple states.

One test case setup needed to be tweaked so that the targeted node was
surrounded by full nodes.

[akpm@linux-foundation.org: coding-style cleanups]
Link: https://lkml.kernel.org/r/20260130205935.2559335-18-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andrew Ballance <andrewjballance@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Kujau <lists@nerdbynature.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: SeongJae Park <sj@kernel.org>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h       |  14 +
 lib/maple_tree.c                 | 624 +++++++++++++++++++++++++++------------
 tools/testing/radix-tree/maple.c |   2 +-
 3 files changed, 458 insertions(+), 182 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index db6a02788902..0c464eade1d6 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -156,6 +156,17 @@ enum store_type {
 };
 
 struct maple_copy {
+	/*
+	 * min, max, and pivots are values
+	 * start, end, split are indexes into arrays
+	 * data is a size
+	 */
+
+	struct {
+		struct maple_node *node;
+		unsigned long max;
+		enum maple_type mt;
+	} dst[3];
 	struct {
 		struct maple_node *node;
 		unsigned long max;
@@ -178,7 +189,10 @@ struct maple_copy {
 
 	/*Avoid passing these around */
 	unsigned char s_count;
+	unsigned char d_count;
+	unsigned char split;
 	unsigned char data;
+	unsigned char height;
 };
 
 /**
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 9c701ee7412c..4d9e7f00f5c8 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -353,6 +353,13 @@ static inline struct maple_enode *mt_mk_node(const struct maple_node *node,
 			(type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL);
 }
 
+static inline void ma_init_slot(void __rcu **slot, const struct maple_node *mn,
+				const enum maple_type mt)
+{
+	/* WARNING: this is unsafe if the slot is exposed to readers. */
+	RCU_INIT_POINTER(*slot, (void *)mt_mk_node(mn, mt));
+}
+
 static inline void *mte_mk_root(const struct maple_enode *node)
 {
 	return (void *)((unsigned long)node | MAPLE_ROOT_NODE);
@@ -1320,6 +1327,21 @@ void wr_mas_setup(struct ma_wr_state *wr_mas, struct ma_state *mas)
 	wr_mas->r_max = mas_safe_pivot(mas, wr_mas->pivots, mas->offset,
 				       wr_mas->type);
 }
+
+static inline
+void wr_mas_ascend(struct ma_wr_state *wr_mas)
+{
+	struct ma_state *mas = wr_mas->mas;
+
+	mas_ascend(mas);
+	wr_mas_setup(wr_mas, mas);
+	mas->end = ma_data_end(wr_mas->node, wr_mas->type, wr_mas->pivots,
+			       mas->max);
+	/* Careful, this may be wrong.. */
+	wr_mas->end_piv = wr_mas->r_max;
+	wr_mas->offset_end = mas->offset;
+}
+
 static inline unsigned long ma_leaf_max_gap(struct maple_node *mn,
 		enum maple_type mt, unsigned long min, unsigned long max,
 		unsigned long *pivots, void __rcu **slots)
@@ -2507,6 +2529,112 @@ static inline void mas_wmb_replace(struct ma_state *mas,
 	mas_update_gap(mas);
 }
 
+/*
+ * node_copy() - Copy from one node to another.
+ *
+ * @mas: The maple state
+ * @src: The source node
+ * @start: The offset into the src to start copying
+ * @size: The size to copy (non-zero)
+ * @s_max: The source node max
+ * @s_mt: The source maple node type
+ * @dst: The destination
+ * @d_start: The start location in the destination node
+ * @d_mt: The destination maple node type
+ */
+static inline
+unsigned long node_copy(struct ma_state *mas, struct maple_node *src,
+	unsigned char start, unsigned char size, unsigned long s_max,
+	enum maple_type s_mt, struct maple_node *dst, unsigned char d_start,
+	enum maple_type d_mt)
+{
+	unsigned long *s_pivots, *d_pivots;
+	void __rcu **s_slots, **d_slots;
+	unsigned long *s_gaps, *d_gaps;
+	unsigned long d_max;
+
+	d_slots = ma_slots(dst, d_mt) + d_start;
+	d_pivots = ma_pivots(dst, d_mt) + d_start;
+	s_slots = ma_slots(src, s_mt) + start;
+	s_pivots = ma_pivots(src, s_mt) + start;
+	memcpy(d_slots, s_slots, size * sizeof(void __rcu *));
+	if (!ma_is_leaf(d_mt) && s_mt == maple_copy) {
+		struct maple_enode *edst = mt_mk_node(dst, d_mt);
+
+
+		for (int i = 0; i < size; i++)
+			mas_set_parent(mas,
+				       mt_slot_locked(mas->tree, d_slots, i),
+				       edst, d_start + i);
+	}
+
+	d_gaps = ma_gaps(dst, d_mt);
+	if (d_gaps) {
+		s_gaps = ma_gaps(src, s_mt) + start;
+		d_gaps += d_start;
+		memcpy(d_gaps, s_gaps, size * sizeof(unsigned long));
+	}
+
+	if (start + size - 1 < mt_pivots[s_mt])
+		d_max = s_pivots[size - 1];
+	else
+		d_max = s_max;
+
+	if (d_start + size <= mt_pivots[d_mt])
+		d_pivots[size - 1] = d_max;
+
+	size--;
+	if (size)
+		memcpy(d_pivots, s_pivots, size * sizeof(unsigned long));
+
+	return d_max;
+}
+
+/*
+ * node_finalise() - Zero out unused area and populate metadata
+ * @node: The maple node
+ * @mt: The maple node type
+ * @end: The end of the used area
+ */
+static inline
+void node_finalise(struct maple_node *node, enum maple_type mt,
+		   unsigned char end)
+{
+	unsigned char max_end = mt_slots[mt];
+	unsigned char size;
+	unsigned long *gaps;
+	unsigned char gap_slot;
+
+	gaps = ma_gaps(node, mt);
+	if (end < max_end - 1) {
+		size = max_end - end;
+		memset(ma_slots(node, mt) + end, 0, size * sizeof(void *));
+
+		if (gaps)
+			memset(gaps + end, 0, size * sizeof(unsigned long));
+
+		if (--size)
+			memset(ma_pivots(node, mt) + end, 0, size * sizeof(unsigned long));
+	}
+
+	gap_slot = 0;
+	if (gaps && !ma_is_leaf(mt)) {
+		unsigned long max_gap;
+
+		max_gap = 0;
+		for (int i = 0; i <= end; i++)
+			if (gaps[i] > max_gap) {
+				gap_slot = i;
+				max_gap = gaps[i];
+			}
+	}
+
+	if (mt == maple_arange_64)
+		ma_set_meta(node, mt, gap_slot, end - 1);
+	else if (end <= max_end - 1)
+		ma_set_meta(node, mt, gap_slot, end - 1);
+}
+
 /*
  * mast_cp_to_nodes() - Copy data out to nodes.
  * @mast: The maple subtree state
@@ -2684,6 +2812,7 @@ static inline void cp_leaf_init(struct maple_copy *cp,
 	 * result in buggy code when a compiler reorders the instructions.
 	 */
 
+	cp->height = 1;
 	/* Create entries to insert including split entries to left and right */
 	if (l_wr_mas->r_min < mas->index) {
 		end++;
@@ -2726,6 +2855,100 @@ static inline void cp_data_calc(struct maple_copy *cp,
 	cp->data += r_wr_mas->mas->end - r_wr_mas->offset_end;
 }
 
+/*
+ * spanning_data() - Calculate the @cp data and populate @sib if insufficient
+ * @cp: The maple copy node
+ * @l_wr_mas: The left write maple state
+ * @r_wr_mas: The right write maple state
+ * @sib: The maple state of the sibling.
+ *
+ * Note: @cp->data is a size and not indexed by 0. @sib->end may be set to 0 to
+ * indicate it will not be used.
+ */
+static inline void spanning_data(struct maple_copy *cp,
+		struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas,
+		struct ma_state *sib)
+{
+	cp_data_calc(cp, l_wr_mas, r_wr_mas);
+	if (((l_wr_mas->mas->min != 0) || (r_wr_mas->mas->max != ULONG_MAX)) &&
+	    (cp->data <= mt_min_slots[l_wr_mas->type])) {
+		spanning_sib(l_wr_mas, r_wr_mas, sib);
+		cp->data += sib->end + 1;
+	} else {
+		sib->end = 0;
+	}
+}
+
+/*
+ * dst_setup() - Set up one or more destinations for the new data.
+ * @cp: The maple copy node
+ * @mas: The maple state
+ * @mt: The source node type
+ */
+static inline
+void dst_setup(struct maple_copy *cp, struct ma_state *mas, enum maple_type mt)
+{
+	/* Data is 1 indexed, every src has +1 added.  */
+
+	if (cp->data <= mt_slots[mt]) {
+		cp->split = cp->data - 1;
+		cp->d_count = 1;
+		goto node_setup;
+	}
+
+	cp->split = (cp->data - 1) / 2;
+	cp->d_count = 2;
+	if (cp->data < mt_slots[mt] * 2)
+		goto node_setup;
+
+	if (cp->data == mt_slots[mt] * 2) {
+		unsigned char off;
+		unsigned char s;
+
+		if (!ma_is_leaf(mt))
+			goto node_setup;
+
+		/*
+		 * Leaf nodes are a bit tricky because we cannot assume the data
+		 * can fit due to the NULL limitation on node ends.
+		 */
+		off = cp->split;
+		for (s = 0; s < cp->s_count; s++) {
+			unsigned char s_off;
+
+			s_off = cp->src[s].end - cp->src[s].start;
+			if (s_off >= off)
+				break;
+
+			s_off++;
+			off -= s_off;
+		}
+
+		off += cp->src[s].start;
+		if (ma_slots(cp->src[s].node, cp->src[s].mt)[off])
+			goto node_setup;
+
+		cp->split++;
+		if (cp->split < mt_slots[mt])
+			goto node_setup;
+
+		cp->split -= 2;
+		if (cp->data - 2 - cp->split < mt_slots[mt])
+			goto node_setup;
+
+	}
+
+	/* No other choice but to 3-way split the data */
+	cp->split = (cp->data + 2) / 3;
+	cp->d_count = 3;
+
+node_setup:
+	for (int i = 0; i < cp->d_count; i++) {
+		cp->dst[i].mt = mt;
+		cp->dst[i].node = ma_mnode_ptr(mas_pop_node(mas));
+	}
+}
+
 static inline void append_mas_cp(struct maple_copy *cp,
 	struct ma_state *mas, unsigned char start, unsigned char end)
 {
@@ -2813,38 +3036,153 @@ void multi_src_setup(struct maple_copy *cp, struct ma_wr_state *l_wr_mas,
 }
 
 static inline
-void cp_data_write(struct maple_copy *cp, struct maple_big_node *b_node)
+void cp_data_write(struct maple_copy *cp, struct ma_state *mas)
 {
-	struct maple_node *src;
-	unsigned char s;
+	struct maple_node *dst, *src;
+	unsigned char s, d;
+	unsigned char dst_offset;
+	unsigned char data_offset;
 	unsigned char src_end, s_offset;
-	unsigned long *b_pivots, *cp_pivots;
-	void __rcu **b_slots, **cp_slots;
-	enum maple_type s_mt;
+	unsigned char split;
+	unsigned long s_max, d_max;
+	unsigned char dst_size;
+	enum maple_type s_mt, d_mt;
+
+	data_offset = 0;
+	s = d = 0;
+	/* Readability help */
+	src = cp->src[s].node;
+	dst = cp->dst[d].node;
+	s_offset = cp->src[s].start;
+	src_end = cp->src[s].end;
+	split = cp->split;
+	s_max = cp->src[s].max;
+	s_mt = cp->src[s].mt;
+	d_mt = cp->dst[d].mt;
+	do {
+		dst_offset = 0;
+		d_max = 0;
+		dst = cp->dst[d].node;
+		d_mt = cp->dst[d].mt;
+		dst_size = split + 1;
 
-	b_node->b_end = 0;
+		while (dst_size) {
+			unsigned char size;
 
-	s = 0;
-	b_pivots = b_node->pivot;
-	b_slots = (void __rcu **)b_node->slot;
-	do {
-		unsigned char size;
-
-		src = cp->src[s].node;
-		s_mt = cp->src[s].mt;
-		s_offset = cp->src[s].start;
-		src_end = cp->src[s].end;
-		size = src_end - s_offset + 1;
-		cp_pivots = ma_pivots(src, s_mt) + s_offset;
-		cp_slots = ma_slots(src, s_mt) + s_offset;
-		memcpy(b_slots, cp_slots, size * sizeof(void __rcu *));
-		if (size > 1)
-			memcpy(b_pivots, cp_pivots, (size - 1) * sizeof(unsigned long));
-		b_pivots[size - 1] = cp->src[s].max;
-		b_pivots += size;
-		b_slots += size;
-		b_node->b_end += size;
-	} while (++s < cp->s_count);
+			if (src_end - s_offset + 1 < dst_size)
+				size = src_end - s_offset + 1;
+			else
+				size = dst_size;
+
+			d_max = node_copy(mas, src, s_offset, size, s_max, s_mt,
+					  dst, dst_offset, d_mt);
+
+			dst_offset += size;
+			s_offset += size;
+			if (s_offset > src_end) {
+				/* This source is exhausted */
+				s++;
+				if (s >= cp->s_count) {
+					cp->dst[d].max = d_max;
+					node_finalise(dst, d_mt, dst_offset);
+					return;
+				}
+				/* Reset local src */
+				src = cp->src[s].node;
+				s_offset = cp->src[s].start;
+				src_end = cp->src[s].end;
+				s_max = cp->src[s].max;
+				s_mt = cp->src[s].mt;
+			}
+
+			dst_size -= size;
+			data_offset += size;
+		}
+
+		split = cp->split;
+		cp->dst[d].max = d_max;
+		/* Handle null entries */
+		if (cp->dst[d].max != ULONG_MAX &&
+		    !ma_slots(dst, d_mt)[dst_offset - 1]) {
+			if (s_offset == cp->src[s].start) {
+				s--;
+				src = cp->src[s].node;
+				src_end = cp->src[s].end;
+				s_max = cp->src[s].max;
+				s_mt = cp->src[s].mt;
+				s_offset = src_end;
+			} else {
+				s_offset--;
+			}
+			/* Set dst max and clear pivot */
+			split++;
+			data_offset--;
+			dst_offset--;
+			cp->dst[d].max = ma_pivots(dst, d_mt)[dst_offset - 1];
+		}
+
+		node_finalise(dst, d_mt, dst_offset);
+		++d; /* Next destination */
+		if (d == cp->d_count - 1)
+			split = cp->data - data_offset;
+
+		if (d >= cp->d_count) {
+			WARN_ON(data_offset < cp->data);
+			return;
+		}
+
+	} while (data_offset <= cp->data);
+}
+
+/*
+ * cp_dst_to_slots() - Migrate the maple copy destination to the maple copy
+ * slots
+ * @cp: The maple copy node
+ * @min: The minimal value represented
+ * @max: The maximum value represented
+ * @mas: The maple state
+ */
+static inline void cp_dst_to_slots(struct maple_copy *cp, unsigned long min,
+		unsigned long max, struct ma_state *mas)
+{
+	unsigned char d;
+	unsigned long slot_min = min;
+
+	for (d = 0; d < cp->d_count; d++) {
+		struct maple_node *mn = cp->dst[d].node;
+		enum maple_type mt = cp->dst[d].mt;
+		unsigned long slot_max = cp->dst[d].max;
+
+		/*
+		 * Warning, see cp_leaf_init() comment and rcu_assign_pointer()
+		 * documentation.  Since these are new nodes, there are no
+		 * read-side operations that can view them until they are
+		 * inserted into the tree after an rcu_assign_pointer() call.
+		 */
+		ma_init_slot(&cp->slot[d], mn, mt);
+		cp->pivot[d] = slot_max;
+		if (mt_is_alloc(mas->tree)) {
+			if (ma_is_leaf(mt)) {
+				cp->gap[d] = ma_leaf_max_gap(mn, mt, slot_min,
+						 slot_max, ma_pivots(mn, mt),
+						 ma_slots(mn, mt));
+			} else {
+				unsigned long *gaps = ma_gaps(mn, mt);
+
+				if (gaps) {
+					unsigned char gap_slot;
+
+					gap_slot = ma_meta_gap(mn);
+					cp->gap[d] = gaps[gap_slot];
+				}
+			}
+		}
+		slot_min = slot_max + 1;
+	}
+
+	cp->end = cp->d_count - 1;
+	cp->min = min;
+	cp->max = max;
 }
 
 static void mas_spanning_rebalance_loop(struct ma_state *mas,
@@ -3000,173 +3338,97 @@ static void mas_spanning_rebalance(struct ma_state *mas,
 	mas_spanning_rebalance_loop(mas, mast, count);
 }
 
-
-static noinline void mas_wr_spanning_rebalance(struct ma_state *mas,
-		struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas)
+/*
+ * spanning_ascend() - See if a spanning store operation has to keep walking up
+ * the tree
+ * @cp: The maple_copy node
+ * @l_wr_mas: The left maple write state
+ * @r_wr_mas: The right maple write state
+ * @sib: the maple state of the sibling
+ *
+ * Returns: True if another iteration is necessary.
+ */
+static bool spanning_ascend(struct maple_copy *cp, struct ma_state *mas,
+			    struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas,
+			    struct ma_state *sib)
 {
-
-	unsigned char split, mid_split;
-	unsigned char slot = 0;
-	unsigned char new_height = 0; /* used if node is a new root */
-	struct maple_enode *left = NULL, *middle = NULL, *right = NULL;
-	struct maple_enode *old_enode;
-
-	struct maple_subtree_state mast;
-	struct maple_big_node b_node;
-	struct maple_copy cp;
-	unsigned char height;
-	struct ma_state sib;
-	MA_STATE(l_mas, mas->tree, mas->index, mas->index);
-	MA_STATE(r_mas, mas->tree, mas->index, mas->last);
-	MA_STATE(m_mas, mas->tree, mas->index, mas->index);
-	MA_STATE(mast_l_mas, NULL, 0, 0);
-
-
-	memset(&b_node, 0, sizeof(struct maple_big_node));
-	mast_l_mas = *mas;
-	cp.s_count = 0;
-	cp_leaf_init(&cp, mas, l_wr_mas, r_wr_mas);
-	cp_data_calc(&cp, l_wr_mas, r_wr_mas);
-	if (((l_wr_mas->mas->min != 0) || (r_wr_mas->mas->max != ULONG_MAX)) &&
-	    (cp.data <= mt_min_slots[l_wr_mas->type])) {
-		spanning_sib(l_wr_mas, r_wr_mas, &sib);
-		cp.data += sib.end + 1;
-	} else {
-		sib.end = 0;
-	}
-
-	multi_src_setup(&cp, l_wr_mas, r_wr_mas, &sib);
-	b_node.type = l_wr_mas->type;
-	cp_data_write(&cp, &b_node);
-	if (sib.end) {
-		if (sib.max < l_wr_mas->mas->min) {
-			*l_wr_mas->mas = sib;
-			wr_mas_setup(l_wr_mas, &sib);
-			mast_l_mas = sib;
-		} else {
-			*r_wr_mas->mas = sib;
-			wr_mas_setup(r_wr_mas, &sib);
-		}
+	if (sib->end) {
+		if (sib->max < l_wr_mas->mas->min)
+			*l_wr_mas->mas = *sib;
+		else
+			*r_wr_mas->mas = *sib;
 	}
 
-	mast.orig_l = &mast_l_mas;
-	mast.orig_r = r_wr_mas->mas;
-	/* Stop spanning searches by searching for just index. */
-	mast.orig_l->last = mas->index;
+	cp_dst_to_slots(cp, l_wr_mas->mas->min, r_wr_mas->mas->max, mas);
+	if (!cp->min && cp->max == ULONG_MAX) {
+		/* New root */
+		if (cp->d_count != 1) {
+			enum maple_type mt = maple_arange_64;
 
-	mast.bn = &b_node;
-	/* Combine l_mas and r_mas and split them up evenly again. */
+			if (!mt_is_alloc(mas->tree))
+				mt = maple_range_64;
 
-	/*
-	 * The tree needs to be rebalanced and leaves need to be kept at the same level.
-	 * Rebalancing is done by use of the ``struct maple_topiary``.
-	 */
-	mast.l = &l_mas;
-	mast.m = &m_mas;
-	mast.r = &r_mas;
-	l_mas.status = r_mas.status = m_mas.status = ma_none;
-	height = mas_mt_height(mas) + 1;
-
-	/*
-	 * Each level of the tree is examined and balanced, pushing data to the left or
-	 * right, or rebalancing against left or right nodes is employed to avoid
-	 * rippling up the tree to limit the amount of churn.  Once a new sub-section of
-	 * the tree is created, there may be a mix of new and old nodes.  The old nodes
-	 * will have the incorrect parent pointers and currently be in two trees: the
-	 * original tree and the partially new tree.  To remedy the parent pointers in
-	 * the old tree, the new data is swapped into the active tree and a walk down
-	 * the tree is performed and the parent pointers are updated.
-	 * See mas_topiary_replace() for more information.
-	 */
-	while (height--) {
-		mast.bn->b_end--;
-		mast.bn->type = mte_node_type(mast.orig_l->node);
-		split = mas_mab_to_node(mas, mast.bn, &left, &right, &middle,
-					&mid_split);
-		mast_set_split_parents(&mast, left, middle, right, split,
-				       mid_split);
-		mast_cp_to_nodes(&mast, left, middle, right, split, mid_split);
-		new_height++;
-
-		/*
-		 * Copy data from next level in the tree to mast.bn from next
-		 * iteration
-		 */
-		memset(mast.bn, 0, sizeof(struct maple_big_node));
-		mast.bn->type = mte_node_type(left);
-
-		/* Root already stored in l->node. */
-		if (mas_is_root_limits(mast.l))
-			goto new_root;
-
-		mast_ascend(&mast);
-		mast_combine_cp_left(&mast);
-		mast.l->offset = mast.bn->b_end;
-		mab_set_b_end(mast.bn, mast.l, left);
-		mab_set_b_end(mast.bn, mast.m, middle);
-		mab_set_b_end(mast.bn, mast.r, right);
-
-		/* Copy anything necessary out of the right node. */
-		mast_combine_cp_right(&mast);
-		mast.orig_l->last = mast.orig_l->max;
-
-		if (mast_sufficient(&mast)) {
-			if (mast_overflow(&mast))
-				continue;
-
-			if (mast.orig_l->node == mast.orig_r->node) {
-			       /*
-				* The data in b_node should be stored in one
-				* node and in the tree
-				*/
-				slot = mast.l->offset;
-				break;
-			}
-
-			continue;
+			cp->data = cp->d_count;
+			cp->s_count = 0;
+			dst_setup(cp, mas, mt);
+			init_cp_src(cp);
+			node_copy(mas, cp->src[0].node, 0, cp->data, cp->max, maple_copy,
+				  cp->dst[0].node, 0, mt);
+			node_finalise(cp->dst[0].node, mt, cp->end + 1);
+			/*
+			 * Warning, see cp_leaf_init() comment and rcu_assign_pointer()
+			 * documentation.  Since this is a new root, there are no
+			 * read-side operations that can view it until it is insert into
+			 * the tree after an rcu_assign_pointer() call.
+			 */
+			ma_init_slot(&cp->slot[0], cp->dst[0].node, mt);
+			cp->height++;
 		}
-
-		/* May be a new root stored in mast.bn */
-		if (mas_is_root_limits(mast.orig_l))
-			break;
-
-		mast_spanning_rebalance(&mast);
-
-		/* rebalancing from other nodes may require another loop. */
-		if (!height)
-			height++;
+		WARN_ON_ONCE(cp->dst[0].node != mte_to_node(
+				mt_slot_locked(mas->tree, cp->slot, 0)));
+		cp->dst[0].node->parent = ma_parent_ptr(mas_tree_parent(mas));
+		mas->min = 0;
+		mas->max = ULONG_MAX;
+		mas->depth = 0;
+		mas->node = mas_root_locked(mas);
+		return false;
 	}
 
-	mast.l->node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)),
-				mte_node_type(mast.orig_l->node));
+	/* Converged and has a single destination */
+	if ((cp->d_count == 1) &&
+	    (l_wr_mas->mas->node == r_wr_mas->mas->node)) {
+		cp->dst[0].node->parent = ma_parent_ptr(mas_mn(mas)->parent);
+		return false;
+	}
 
-	mab_mas_cp(mast.bn, 0, mt_slots[mast.bn->type] - 1, mast.l, true);
-	new_height++;
-	mas_set_parent(mas, left, mast.l->node, slot);
-	if (middle)
-		mas_set_parent(mas, middle, mast.l->node, ++slot);
+	cp->height++;
+	wr_mas_ascend(l_wr_mas);
+	wr_mas_ascend(r_wr_mas);
+	return true;
+}
 
-	if (right)
-		mas_set_parent(mas, right, mast.l->node, ++slot);
+static noinline void mas_wr_spanning_rebalance(struct ma_state *mas,
+		struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas)
+{
 
-	if (mas_is_root_limits(mast.l)) {
-new_root:
-		mas_mn(mast.l)->parent = ma_parent_ptr(mas_tree_parent(mas));
-		while (!mte_is_root(mast.orig_l->node))
-			mast_ascend(&mast);
-	} else {
-		mas_mn(mast.l)->parent = mas_mn(mast.orig_l)->parent;
-	}
+	struct maple_enode *old_enode;
+	struct maple_copy cp;
+	struct ma_state sib;
 
-	old_enode = mast.orig_l->node;
-	mas->depth = mast.l->depth;
-	mas->node = mast.l->node;
-	mas->min = mast.l->min;
-	mas->max = mast.l->max;
-	mas->offset = mast.l->offset;
-	mas_wmb_replace(mas, old_enode, new_height);
+	cp_leaf_init(&cp, mas, l_wr_mas, r_wr_mas);
+	do {
+		spanning_data(&cp, l_wr_mas, r_wr_mas, &sib);
+		multi_src_setup(&cp, l_wr_mas, r_wr_mas, &sib);
+		dst_setup(&cp, mas, l_wr_mas->type);
+		cp_data_write(&cp, mas);
+	} while (spanning_ascend(&cp, mas, l_wr_mas, r_wr_mas, &sib));
+
+	old_enode = mas->node;
+	mas->node = mt_slot_locked(mas->tree, cp.slot, 0);
+	mas_wmb_replace(mas, old_enode, cp.height);
 	mtree_range_walk(mas);
 }
+
 /*
  * mas_rebalance() - Rebalance a given node.
  * @mas: The maple state
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 85fb5616c133..dfd7099f0d8e 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -35508,7 +35508,7 @@ static noinline void __init check_spanning_write(struct maple_tree *mt)
 	/* Store a value across a node boundary that causes a 3 way split */
 
 	if (MAPLE_32BIT)
-		i = 49590; /* 0xc1b6 */
+		i = 49430; /* 0xc116 */
 	else
 		i = 49670; /* 0xC206 */
 
-- 
cgit v1.2.3


From e4f4fc7aa8b720d934a0bfcea7f8aae4271d308f Mon Sep 17 00:00:00 2001
From: "JP Kobryn (Meta)" <jp.kobryn@linux.dev>
Date: Thu, 19 Feb 2026 15:58:46 -0800
Subject: mm: move pgscan, pgsteal, pgrefill to node stats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are situations where reclaim kicks in on a system with free memory.
One possible cause is a NUMA imbalance scenario where one or more nodes
are under pressure.  It would help if we could easily identify such nodes.

Move the pgscan, pgsteal, and pgrefill counters from vm_event_item to
node_stat_item to provide per-node reclaim visibility.  With these
counters as node stats, the values are now displayed in the per-node
section of /proc/zoneinfo, which allows for quick identification of the
affected nodes.

/proc/vmstat continues to report the same counters, aggregated across all
nodes.  But the ordering of these items within the readout changes as they
move from the vm events section to the node stats section.

Memcg accounting of these counters is preserved.  The relocated counters
remain visible in memory.stat alongside the existing aggregate pgscan and
pgsteal counters.

However, this change affects how the global counters are accumulated.
Previously, the global event count update was gated on !cgroup_reclaim(),
excluding memcg-based reclaim from /proc/vmstat.  Now that
mod_lruvec_state() is being used to update the counters, the global
counters will include all reclaim.  This is consistent with how pgdemote
counters are already tracked.

Finally, the virtio_balloon driver is updated to use
global_node_page_state() to fetch the counters, as they are no longer
accessible through the vm_events array.

Link: https://lkml.kernel.org/r/20260219235846.161910-1-jp.kobryn@linux.dev
Signed-off-by: JP Kobryn <jp.kobryn@linux.dev>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Eugenio Pérez <eperezma@redhat.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mathew Brost <matthew.brost@intel.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/virtio/virtio_balloon.c |  8 +++---
 include/linux/mmzone.h          | 13 ++++++++++
 include/linux/vm_event_item.h   | 13 ----------
 mm/memcontrol.c                 | 56 ++++++++++++++++++++++++++++-------------
 mm/vmscan.c                     | 39 ++++++++++------------------
 mm/vmstat.c                     | 26 +++++++++----------
 6 files changed, 82 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index d1fbc8fe8470..7f15bf162e88 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -369,13 +369,13 @@ static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb)
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_ALLOC_STALL, stall);
 
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_SCAN,
-		    pages_to_bytes(events[PGSCAN_KSWAPD]));
+		    pages_to_bytes(global_node_page_state(PGSCAN_KSWAPD)));
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_SCAN,
-		    pages_to_bytes(events[PGSCAN_DIRECT]));
+		    pages_to_bytes(global_node_page_state(PGSCAN_DIRECT)));
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_RECLAIM,
-		    pages_to_bytes(events[PGSTEAL_KSWAPD]));
+		    pages_to_bytes(global_node_page_state(PGSTEAL_KSWAPD)));
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_RECLAIM,
-		    pages_to_bytes(events[PGSTEAL_DIRECT]));
+		    pages_to_bytes(global_node_page_state(PGSTEAL_DIRECT)));
 
 #ifdef CONFIG_HUGETLB_PAGE
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3e51190a55e4..546bca95ca40 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -255,6 +255,19 @@ enum node_stat_item {
 	PGDEMOTE_DIRECT,
 	PGDEMOTE_KHUGEPAGED,
 	PGDEMOTE_PROACTIVE,
+	PGSTEAL_KSWAPD,
+	PGSTEAL_DIRECT,
+	PGSTEAL_KHUGEPAGED,
+	PGSTEAL_PROACTIVE,
+	PGSTEAL_ANON,
+	PGSTEAL_FILE,
+	PGSCAN_KSWAPD,
+	PGSCAN_DIRECT,
+	PGSCAN_KHUGEPAGED,
+	PGSCAN_PROACTIVE,
+	PGSCAN_ANON,
+	PGSCAN_FILE,
+	PGREFILL,
 #ifdef CONFIG_HUGETLB_PAGE
 	NR_HUGETLB,
 #endif
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 22a139f82d75..03fe95f5a020 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -38,21 +38,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE,
 		PGFAULT, PGMAJFAULT,
 		PGLAZYFREED,
-		PGREFILL,
 		PGREUSE,
-		PGSTEAL_KSWAPD,
-		PGSTEAL_DIRECT,
-		PGSTEAL_KHUGEPAGED,
-		PGSTEAL_PROACTIVE,
-		PGSCAN_KSWAPD,
-		PGSCAN_DIRECT,
-		PGSCAN_KHUGEPAGED,
-		PGSCAN_PROACTIVE,
 		PGSCAN_DIRECT_THROTTLE,
-		PGSCAN_ANON,
-		PGSCAN_FILE,
-		PGSTEAL_ANON,
-		PGSTEAL_FILE,
 #ifdef CONFIG_NUMA
 		PGSCAN_ZONE_RECLAIM_SUCCESS,
 		PGSCAN_ZONE_RECLAIM_FAILED,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 772bac21d155..af75f10150a8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -330,6 +330,19 @@ static const unsigned int memcg_node_stat_items[] = {
 	PGDEMOTE_DIRECT,
 	PGDEMOTE_KHUGEPAGED,
 	PGDEMOTE_PROACTIVE,
+	PGSTEAL_KSWAPD,
+	PGSTEAL_DIRECT,
+	PGSTEAL_KHUGEPAGED,
+	PGSTEAL_PROACTIVE,
+	PGSTEAL_ANON,
+	PGSTEAL_FILE,
+	PGSCAN_KSWAPD,
+	PGSCAN_DIRECT,
+	PGSCAN_KHUGEPAGED,
+	PGSCAN_PROACTIVE,
+	PGSCAN_ANON,
+	PGSCAN_FILE,
+	PGREFILL,
 #ifdef CONFIG_HUGETLB_PAGE
 	NR_HUGETLB,
 #endif
@@ -443,17 +456,8 @@ static const unsigned int memcg_vm_event_stat[] = {
 #endif
 	PSWPIN,
 	PSWPOUT,
-	PGSCAN_KSWAPD,
-	PGSCAN_DIRECT,
-	PGSCAN_KHUGEPAGED,
-	PGSCAN_PROACTIVE,
-	PGSTEAL_KSWAPD,
-	PGSTEAL_DIRECT,
-	PGSTEAL_KHUGEPAGED,
-	PGSTEAL_PROACTIVE,
 	PGFAULT,
 	PGMAJFAULT,
-	PGREFILL,
 	PGACTIVATE,
 	PGDEACTIVATE,
 	PGLAZYFREE,
@@ -1400,6 +1404,15 @@ static const struct memory_stat memory_stats[] = {
 	{ "pgdemote_direct",		PGDEMOTE_DIRECT		},
 	{ "pgdemote_khugepaged",	PGDEMOTE_KHUGEPAGED	},
 	{ "pgdemote_proactive",		PGDEMOTE_PROACTIVE	},
+	{ "pgsteal_kswapd",		PGSTEAL_KSWAPD		},
+	{ "pgsteal_direct",		PGSTEAL_DIRECT		},
+	{ "pgsteal_khugepaged",		PGSTEAL_KHUGEPAGED	},
+	{ "pgsteal_proactive",		PGSTEAL_PROACTIVE	},
+	{ "pgscan_kswapd",		PGSCAN_KSWAPD		},
+	{ "pgscan_direct",		PGSCAN_DIRECT		},
+	{ "pgscan_khugepaged",		PGSCAN_KHUGEPAGED	},
+	{ "pgscan_proactive",		PGSCAN_PROACTIVE	},
+	{ "pgrefill",			PGREFILL		},
 #ifdef CONFIG_NUMA_BALANCING
 	{ "pgpromote_success",		PGPROMOTE_SUCCESS	},
 #endif
@@ -1443,6 +1456,15 @@ static int memcg_page_state_output_unit(int item)
 	case PGDEMOTE_DIRECT:
 	case PGDEMOTE_KHUGEPAGED:
 	case PGDEMOTE_PROACTIVE:
+	case PGSTEAL_KSWAPD:
+	case PGSTEAL_DIRECT:
+	case PGSTEAL_KHUGEPAGED:
+	case PGSTEAL_PROACTIVE:
+	case PGSCAN_KSWAPD:
+	case PGSCAN_DIRECT:
+	case PGSCAN_KHUGEPAGED:
+	case PGSCAN_PROACTIVE:
+	case PGREFILL:
 #ifdef CONFIG_NUMA_BALANCING
 	case PGPROMOTE_SUCCESS:
 #endif
@@ -1514,15 +1536,15 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
 
 	/* Accumulated memory events */
 	seq_buf_printf(s, "pgscan %lu\n",
-		       memcg_events(memcg, PGSCAN_KSWAPD) +
-		       memcg_events(memcg, PGSCAN_DIRECT) +
-		       memcg_events(memcg, PGSCAN_PROACTIVE) +
-		       memcg_events(memcg, PGSCAN_KHUGEPAGED));
+		       memcg_page_state(memcg, PGSCAN_KSWAPD) +
+		       memcg_page_state(memcg, PGSCAN_DIRECT) +
+		       memcg_page_state(memcg, PGSCAN_PROACTIVE) +
+		       memcg_page_state(memcg, PGSCAN_KHUGEPAGED));
 	seq_buf_printf(s, "pgsteal %lu\n",
-		       memcg_events(memcg, PGSTEAL_KSWAPD) +
-		       memcg_events(memcg, PGSTEAL_DIRECT) +
-		       memcg_events(memcg, PGSTEAL_PROACTIVE) +
-		       memcg_events(memcg, PGSTEAL_KHUGEPAGED));
+		       memcg_page_state(memcg, PGSTEAL_KSWAPD) +
+		       memcg_page_state(memcg, PGSTEAL_DIRECT) +
+		       memcg_page_state(memcg, PGSTEAL_PROACTIVE) +
+		       memcg_page_state(memcg, PGSTEAL_KHUGEPAGED));
 
 	for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
 #ifdef CONFIG_MEMCG_V1
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0fc9373e8251..031c5c035a82 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1984,7 +1984,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
 	unsigned long nr_taken;
 	struct reclaim_stat stat;
 	bool file = is_file_lru(lru);
-	enum vm_event_item item;
+	enum node_stat_item item;
 	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 	bool stalled = false;
 
@@ -2010,10 +2010,8 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
 
 	__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
 	item = PGSCAN_KSWAPD + reclaimer_offset(sc);
-	if (!cgroup_reclaim(sc))
-		__count_vm_events(item, nr_scanned);
-	count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
-	__count_vm_events(PGSCAN_ANON + file, nr_scanned);
+	mod_lruvec_state(lruvec, item, nr_scanned);
+	mod_lruvec_state(lruvec, PGSCAN_ANON + file, nr_scanned);
 
 	spin_unlock_irq(&lruvec->lru_lock);
 
@@ -2030,10 +2028,8 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
 					stat.nr_demoted);
 	__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
 	item = PGSTEAL_KSWAPD + reclaimer_offset(sc);
-	if (!cgroup_reclaim(sc))
-		__count_vm_events(item, nr_reclaimed);
-	count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
-	__count_vm_events(PGSTEAL_ANON + file, nr_reclaimed);
+	mod_lruvec_state(lruvec, item, nr_reclaimed);
+	mod_lruvec_state(lruvec, PGSTEAL_ANON + file, nr_reclaimed);
 
 	lru_note_cost_unlock_irq(lruvec, file, stat.nr_pageout,
 					nr_scanned - nr_reclaimed);
@@ -2120,9 +2116,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
 
 	__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
 
-	if (!cgroup_reclaim(sc))
-		__count_vm_events(PGREFILL, nr_scanned);
-	count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned);
+	mod_lruvec_state(lruvec, PGREFILL, nr_scanned);
 
 	spin_unlock_irq(&lruvec->lru_lock);
 
@@ -4543,7 +4537,7 @@ static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 {
 	int i;
 	int gen;
-	enum vm_event_item item;
+	enum node_stat_item item;
 	int sorted = 0;
 	int scanned = 0;
 	int isolated = 0;
@@ -4551,7 +4545,6 @@ static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 	int scan_batch = min(nr_to_scan, MAX_LRU_BATCH);
 	int remaining = scan_batch;
 	struct lru_gen_folio *lrugen = &lruvec->lrugen;
-	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 
 	VM_WARN_ON_ONCE(!list_empty(list));
 
@@ -4602,13 +4595,9 @@ static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 	}
 
 	item = PGSCAN_KSWAPD + reclaimer_offset(sc);
-	if (!cgroup_reclaim(sc)) {
-		__count_vm_events(item, isolated);
-		__count_vm_events(PGREFILL, sorted);
-	}
-	count_memcg_events(memcg, item, isolated);
-	count_memcg_events(memcg, PGREFILL, sorted);
-	__count_vm_events(PGSCAN_ANON + type, isolated);
+	mod_lruvec_state(lruvec, item, isolated);
+	mod_lruvec_state(lruvec, PGREFILL, sorted);
+	mod_lruvec_state(lruvec, PGSCAN_ANON + type, isolated);
 	trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, scan_batch,
 				scanned, skipped, isolated,
 				type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
@@ -4693,7 +4682,7 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 	LIST_HEAD(clean);
 	struct folio *folio;
 	struct folio *next;
-	enum vm_event_item item;
+	enum node_stat_item item;
 	struct reclaim_stat stat;
 	struct lru_gen_mm_walk *walk;
 	bool skip_retry = false;
@@ -4757,10 +4746,8 @@ retry:
 					stat.nr_demoted);
 
 	item = PGSTEAL_KSWAPD + reclaimer_offset(sc);
-	if (!cgroup_reclaim(sc))
-		__count_vm_events(item, reclaimed);
-	count_memcg_events(memcg, item, reclaimed);
-	__count_vm_events(PGSTEAL_ANON + type, reclaimed);
+	mod_lruvec_state(lruvec, item, reclaimed);
+	mod_lruvec_state(lruvec, PGSTEAL_ANON + type, reclaimed);
 
 	spin_unlock_irq(&lruvec->lru_lock);
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 86b14b0f77b5..44bbb7752f11 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1276,6 +1276,19 @@ const char * const vmstat_text[] = {
 	[I(PGDEMOTE_DIRECT)]			= "pgdemote_direct",
 	[I(PGDEMOTE_KHUGEPAGED)]		= "pgdemote_khugepaged",
 	[I(PGDEMOTE_PROACTIVE)]			= "pgdemote_proactive",
+	[I(PGSTEAL_KSWAPD)]			= "pgsteal_kswapd",
+	[I(PGSTEAL_DIRECT)]			= "pgsteal_direct",
+	[I(PGSTEAL_KHUGEPAGED)]			= "pgsteal_khugepaged",
+	[I(PGSTEAL_PROACTIVE)]			= "pgsteal_proactive",
+	[I(PGSTEAL_ANON)]			= "pgsteal_anon",
+	[I(PGSTEAL_FILE)]			= "pgsteal_file",
+	[I(PGSCAN_KSWAPD)]			= "pgscan_kswapd",
+	[I(PGSCAN_DIRECT)]			= "pgscan_direct",
+	[I(PGSCAN_KHUGEPAGED)]			= "pgscan_khugepaged",
+	[I(PGSCAN_PROACTIVE)]			= "pgscan_proactive",
+	[I(PGSCAN_ANON)]			= "pgscan_anon",
+	[I(PGSCAN_FILE)]			= "pgscan_file",
+	[I(PGREFILL)]				= "pgrefill",
 #ifdef CONFIG_HUGETLB_PAGE
 	[I(NR_HUGETLB)]				= "nr_hugetlb",
 #endif
@@ -1318,21 +1331,8 @@ const char * const vmstat_text[] = {
 	[I(PGMAJFAULT)]				= "pgmajfault",
 	[I(PGLAZYFREED)]			= "pglazyfreed",
 
-	[I(PGREFILL)]				= "pgrefill",
 	[I(PGREUSE)]				= "pgreuse",
-	[I(PGSTEAL_KSWAPD)]			= "pgsteal_kswapd",
-	[I(PGSTEAL_DIRECT)]			= "pgsteal_direct",
-	[I(PGSTEAL_KHUGEPAGED)]			= "pgsteal_khugepaged",
-	[I(PGSTEAL_PROACTIVE)]			= "pgsteal_proactive",
-	[I(PGSCAN_KSWAPD)]			= "pgscan_kswapd",
-	[I(PGSCAN_DIRECT)]			= "pgscan_direct",
-	[I(PGSCAN_KHUGEPAGED)]			= "pgscan_khugepaged",
-	[I(PGSCAN_PROACTIVE)]			= "pgscan_proactive",
 	[I(PGSCAN_DIRECT_THROTTLE)]		= "pgscan_direct_throttle",
-	[I(PGSCAN_ANON)]			= "pgscan_anon",
-	[I(PGSCAN_FILE)]			= "pgscan_file",
-	[I(PGSTEAL_ANON)]			= "pgsteal_anon",
-	[I(PGSTEAL_FILE)]			= "pgsteal_file",
 
 #ifdef CONFIG_NUMA
 	[I(PGSCAN_ZONE_RECLAIM_SUCCESS)]	= "zone_reclaim_success",
-- 
cgit v1.2.3


From 0d6af9bcf383bcdf601e670bb605861b01e318e7 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Wed, 18 Feb 2026 04:06:34 +0800
Subject: mm, swap: use the swap table to track the swap count

Now all the infrastructures are ready, switch to using the swap table
only.  This is unfortunately a large patch because the whole old counting
mechanism, especially SWP_CONTINUED, has to be gone and switch to the new
mechanism together, with no intermediate steps available.

The swap table is capable of holding up to SWP_TB_COUNT_MAX - 1 counts in
the higher bits of each table entry, so using that, the swap_map can be
completely dropped.

swap_map also had a limit of SWAP_CONT_MAX.  Any value beyond that limit
will require a COUNT_CONTINUED page.  COUNT_CONTINUED is a bit complex to
maintain, so for the swap table, a simpler approach is used: when the
count goes beyond SWP_TB_COUNT_MAX - 1, the cluster will have an
extend_table allocated, which is a swap cluster-sized array of unsigned
int.  The counting is basically offloaded there until the count drops
below SWP_TB_COUNT_MAX again.

Both the swap table and the extend table are cluster-based, so they
exhibit good performance and sparsity.

To make the switch from swap_map to swap table clean, this commit cleans
up and introduces a new set of functions based on the swap table design,
for manipulating swap counts:

- __swap_cluster_dup_entry, __swap_cluster_put_entry,
  __swap_cluster_alloc_entry, __swap_cluster_free_entry:

  Increase/decrease the count of a swap slot, or alloc / free a swap
  slot. This is the internal routine that does the counting work based
  on the swap table and handles all the complexities. The caller will
  need to lock the cluster before calling them.

  All swap count-related update operations are wrapped by these four
  helpers.

- swap_dup_entries_cluster, swap_put_entries_cluster:

  Increase/decrease the swap count of one or a set of swap slots in the
  same cluster range. These two helpers serve as the common routines for
  folio_dup_swap & swap_dup_entry_direct, or
  folio_put_swap & swap_put_entries_direct.

And use these helpers to replace all existing callers. This helps to
simplify the count tracking by a lot, and the swap_map is gone.

[ryncsn@gmail.com: fix build]
  Link: https://lkml.kernel.org/r/aZWuLZi-vYi3vAWe@KASONG-MC4
Link: https://lkml.kernel.org/r/20260218-swap-table-p3-v3-9-f4e34be021a7@tencent.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Suggested-by: Chris Li <chrisl@kernel.org>
Acked-by: Chris Li <chrisl@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <ryncsn@gmail.com>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h |  28 +-
 mm/memory.c          |   2 +-
 mm/swap.h            |  14 +-
 mm/swap_state.c      |  53 ++--
 mm/swap_table.h      |   5 +
 mm/swapfile.c        | 790 +++++++++++++++++++--------------------------------
 6 files changed, 334 insertions(+), 558 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 62fc7499b408..0effe3cc50f5 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -208,7 +208,6 @@ enum {
 	SWP_DISCARDABLE = (1 << 2),	/* blkdev support discard */
 	SWP_DISCARDING	= (1 << 3),	/* now discarding a free cluster */
 	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */
-	SWP_CONTINUED	= (1 << 5),	/* swap_map has count continuation */
 	SWP_BLKDEV	= (1 << 6),	/* its a block device */
 	SWP_ACTIVATED	= (1 << 7),	/* set after swap_activate success */
 	SWP_FS_OPS	= (1 << 8),	/* swapfile operations go through fs */
@@ -223,16 +222,6 @@ enum {
 #define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10)
 #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
 
-/* Bit flag in swap_map */
-#define COUNT_CONTINUED	0x80	/* Flag swap_map continuation for full count */
-
-/* Special value in first swap_map */
-#define SWAP_MAP_MAX	0x3e	/* Max count */
-#define SWAP_MAP_BAD	0x3f	/* Note page is bad */
-
-/* Special value in each swap_map continuation */
-#define SWAP_CONT_MAX	0x7f	/* Max count */
-
 /*
  * The first page in the swap file is the swap header, which is always marked
  * bad to prevent it from being allocated as an entry. This also prevents the
@@ -264,8 +253,7 @@ struct swap_info_struct {
 	signed short	prio;		/* swap priority of this type */
 	struct plist_node list;		/* entry in swap_active_head */
 	signed char	type;		/* strange name for an index */
-	unsigned int	max;		/* extent of the swap_map */
-	unsigned char *swap_map;	/* vmalloc'ed array of usage counts */
+	unsigned int	max;		/* size of this swap device */
 	unsigned long *zeromap;		/* kvmalloc'ed bitmap to track zero pages */
 	struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
 	struct list_head free_clusters; /* free clusters list */
@@ -284,18 +272,14 @@ struct swap_info_struct {
 	struct completion comp;		/* seldom referenced */
 	spinlock_t lock;		/*
 					 * protect map scan related fields like
-					 * swap_map, inuse_pages and all cluster
-					 * lists. other fields are only changed
+					 * inuse_pages and all cluster lists.
+					 * Other fields are only changed
 					 * at swapon/swapoff, so are protected
 					 * by swap_lock. changing flags need
 					 * hold this lock and swap_lock. If
 					 * both locks need hold, hold swap_lock
 					 * first.
 					 */
-	spinlock_t cont_lock;		/*
-					 * protect swap count continuation page
-					 * list.
-					 */
 	struct work_struct discard_work; /* discard worker */
 	struct work_struct reclaim_work; /* reclaim worker */
 	struct list_head discard_clusters; /* discard clusters list */
@@ -451,7 +435,6 @@ static inline long get_nr_swap_pages(void)
 }
 
 extern void si_swapinfo(struct sysinfo *);
-extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 int swap_type_of(dev_t device, sector_t offset);
 int find_first_swap(dev_t *device);
 extern unsigned int count_swap_pages(int, int);
@@ -517,11 +500,6 @@ static inline void free_swap_cache(struct folio *folio)
 {
 }
 
-static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
-{
-	return 0;
-}
-
 static inline int swap_dup_entry_direct(swp_entry_t ent)
 {
 	return 0;
diff --git a/mm/memory.c b/mm/memory.c
index 2f815a34d924..7084c426f933 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1346,7 +1346,7 @@ again:
 
 	if (ret == -EIO) {
 		VM_WARN_ON_ONCE(!entry.val);
-		if (add_swap_count_continuation(entry, GFP_KERNEL) < 0) {
+		if (swap_retry_table_alloc(entry, GFP_KERNEL) < 0) {
 			ret = -ENOMEM;
 			goto out;
 		}
diff --git a/mm/swap.h b/mm/swap.h
index bfafa637c458..0a91e21e92b1 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -37,6 +37,7 @@ struct swap_cluster_info {
 	u8 flags;
 	u8 order;
 	atomic_long_t __rcu *table;	/* Swap table entries, see mm/swap_table.h */
+	unsigned int *extend_table;	/* For large swap count, protected by ci->lock */
 	struct list_head list;
 };
 
@@ -183,6 +184,8 @@ static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
 	spin_unlock_irq(&ci->lock);
 }
 
+extern int swap_retry_table_alloc(swp_entry_t entry, gfp_t gfp);
+
 /*
  * Below are the core routines for doing swap for a folio.
  * All helpers requires the folio to be locked, and a locked folio
@@ -206,9 +209,9 @@ int folio_dup_swap(struct folio *folio, struct page *subpage);
 void folio_put_swap(struct folio *folio, struct page *subpage);
 
 /* For internal use */
-extern void swap_entries_free(struct swap_info_struct *si,
-			      struct swap_cluster_info *ci,
-			      unsigned long offset, unsigned int nr_pages);
+extern void __swap_cluster_free_entries(struct swap_info_struct *si,
+					struct swap_cluster_info *ci,
+					unsigned int ci_off, unsigned int nr_pages);
 
 /* linux/mm/page_io.c */
 int sio_pool_init(void);
@@ -446,6 +449,11 @@ static inline int swap_writeout(struct folio *folio,
 	return 0;
 }
 
+static inline int swap_retry_table_alloc(swp_entry_t entry, gfp_t gfp)
+{
+	return -EINVAL;
+}
+
 static inline bool swap_cache_has_folio(swp_entry_t entry)
 {
 	return false;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e213ee35c1d2..e7618ffe6d70 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -140,21 +140,20 @@ void *swap_cache_get_shadow(swp_entry_t entry)
 void __swap_cache_add_folio(struct swap_cluster_info *ci,
 			    struct folio *folio, swp_entry_t entry)
 {
-	unsigned long new_tb;
-	unsigned int ci_start, ci_off, ci_end;
+	unsigned int ci_off = swp_cluster_offset(entry), ci_end;
 	unsigned long nr_pages = folio_nr_pages(folio);
+	unsigned long pfn = folio_pfn(folio);
+	unsigned long old_tb;
 
 	VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
 	VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio);
 	VM_WARN_ON_ONCE_FOLIO(!folio_test_swapbacked(folio), folio);
 
-	new_tb = folio_to_swp_tb(folio, 0);
-	ci_start = swp_cluster_offset(entry);
-	ci_off = ci_start;
-	ci_end = ci_start + nr_pages;
+	ci_end = ci_off + nr_pages;
 	do {
-		VM_WARN_ON_ONCE(swp_tb_is_folio(__swap_table_get(ci, ci_off)));
-		__swap_table_set(ci, ci_off, new_tb);
+		old_tb = __swap_table_get(ci, ci_off);
+		VM_WARN_ON_ONCE(swp_tb_is_folio(old_tb));
+		__swap_table_set(ci, ci_off, pfn_to_swp_tb(pfn, __swp_tb_get_count(old_tb)));
 	} while (++ci_off < ci_end);
 
 	folio_ref_add(folio, nr_pages);
@@ -183,14 +182,13 @@ static int swap_cache_add_folio(struct folio *folio, swp_entry_t entry,
 	unsigned long old_tb;
 	struct swap_info_struct *si;
 	struct swap_cluster_info *ci;
-	unsigned int ci_start, ci_off, ci_end, offset;
+	unsigned int ci_start, ci_off, ci_end;
 	unsigned long nr_pages = folio_nr_pages(folio);
 
 	si = __swap_entry_to_info(entry);
 	ci_start = swp_cluster_offset(entry);
 	ci_end = ci_start + nr_pages;
 	ci_off = ci_start;
-	offset = swp_offset(entry);
 	ci = swap_cluster_lock(si, swp_offset(entry));
 	if (unlikely(!ci->table)) {
 		err = -ENOENT;
@@ -202,13 +200,12 @@ static int swap_cache_add_folio(struct folio *folio, swp_entry_t entry,
 			err = -EEXIST;
 			goto failed;
 		}
-		if (unlikely(!__swap_count(swp_entry(swp_type(entry), offset)))) {
+		if (unlikely(!__swp_tb_get_count(old_tb))) {
 			err = -ENOENT;
 			goto failed;
 		}
 		if (swp_tb_is_shadow(old_tb))
 			shadow = swp_tb_to_shadow(old_tb);
-		offset++;
 	} while (++ci_off < ci_end);
 	__swap_cache_add_folio(ci, folio, entry);
 	swap_cluster_unlock(ci);
@@ -237,8 +234,9 @@ failed:
 void __swap_cache_del_folio(struct swap_cluster_info *ci, struct folio *folio,
 			    swp_entry_t entry, void *shadow)
 {
+	int count;
+	unsigned long old_tb;
 	struct swap_info_struct *si;
-	unsigned long old_tb, new_tb;
 	unsigned int ci_start, ci_off, ci_end;
 	bool folio_swapped = false, need_free = false;
 	unsigned long nr_pages = folio_nr_pages(folio);
@@ -249,20 +247,20 @@ void __swap_cache_del_folio(struct swap_cluster_info *ci, struct folio *folio,
 	VM_WARN_ON_ONCE_FOLIO(folio_test_writeback(folio), folio);
 
 	si = __swap_entry_to_info(entry);
-	new_tb = shadow_to_swp_tb(shadow, 0);
 	ci_start = swp_cluster_offset(entry);
 	ci_end = ci_start + nr_pages;
 	ci_off = ci_start;
 	do {
-		/* If shadow is NULL, we sets an empty shadow */
-		old_tb = __swap_table_xchg(ci, ci_off, new_tb);
+		old_tb = __swap_table_get(ci, ci_off);
 		WARN_ON_ONCE(!swp_tb_is_folio(old_tb) ||
 			     swp_tb_to_folio(old_tb) != folio);
-		if (__swap_count(swp_entry(si->type,
-				 swp_offset(entry) + ci_off - ci_start)))
+		count = __swp_tb_get_count(old_tb);
+		if (count)
 			folio_swapped = true;
 		else
 			need_free = true;
+		/* If shadow is NULL, we sets an empty shadow. */
+		__swap_table_set(ci, ci_off, shadow_to_swp_tb(shadow, count));
 	} while (++ci_off < ci_end);
 
 	folio->swap.val = 0;
@@ -271,13 +269,13 @@ void __swap_cache_del_folio(struct swap_cluster_info *ci, struct folio *folio,
 	lruvec_stat_mod_folio(folio, NR_SWAPCACHE, -nr_pages);
 
 	if (!folio_swapped) {
-		swap_entries_free(si, ci, swp_offset(entry), nr_pages);
+		__swap_cluster_free_entries(si, ci, ci_start, nr_pages);
 	} else if (need_free) {
+		ci_off = ci_start;
 		do {
-			if (!__swap_count(entry))
-				swap_entries_free(si, ci, swp_offset(entry), 1);
-			entry.val++;
-		} while (--nr_pages);
+			if (!__swp_tb_get_count(__swap_table_get(ci, ci_off)))
+				__swap_cluster_free_entries(si, ci, ci_off, 1);
+		} while (++ci_off < ci_end);
 	}
 }
 
@@ -324,17 +322,18 @@ void __swap_cache_replace_folio(struct swap_cluster_info *ci,
 	unsigned long nr_pages = folio_nr_pages(new);
 	unsigned int ci_off = swp_cluster_offset(entry);
 	unsigned int ci_end = ci_off + nr_pages;
-	unsigned long old_tb, new_tb;
+	unsigned long pfn = folio_pfn(new);
+	unsigned long old_tb;
 
 	VM_WARN_ON_ONCE(!folio_test_swapcache(old) || !folio_test_swapcache(new));
 	VM_WARN_ON_ONCE(!folio_test_locked(old) || !folio_test_locked(new));
 	VM_WARN_ON_ONCE(!entry.val);
 
 	/* Swap cache still stores N entries instead of a high-order entry */
-	new_tb = folio_to_swp_tb(new, 0);
 	do {
-		old_tb = __swap_table_xchg(ci, ci_off, new_tb);
+		old_tb = __swap_table_get(ci, ci_off);
 		WARN_ON_ONCE(!swp_tb_is_folio(old_tb) || swp_tb_to_folio(old_tb) != old);
+		__swap_table_set(ci, ci_off, pfn_to_swp_tb(pfn, __swp_tb_get_count(old_tb)));
 	} while (++ci_off < ci_end);
 
 	/*
@@ -368,7 +367,7 @@ void __swap_cache_clear_shadow(swp_entry_t entry, int nr_ents)
 	ci_end = ci_off + nr_ents;
 	do {
 		old = __swap_table_xchg(ci, ci_off, null_to_swp_tb());
-		WARN_ON_ONCE(swp_tb_is_folio(old));
+		WARN_ON_ONCE(swp_tb_is_folio(old) || swp_tb_get_count(old));
 	} while (++ci_off < ci_end);
 }
 
diff --git a/mm/swap_table.h b/mm/swap_table.h
index 10762ac5f4f5..8415ffbe2b9c 100644
--- a/mm/swap_table.h
+++ b/mm/swap_table.h
@@ -191,6 +191,11 @@ static inline int swp_tb_get_count(unsigned long swp_tb)
 	return -EINVAL;
 }
 
+static inline unsigned long __swp_tb_mk_count(unsigned long swp_tb, int count)
+{
+	return ((swp_tb & ~SWP_TB_COUNT_MASK) | __count_to_swp_tb(count));
+}
+
 /*
  * Helpers for accessing or modifying the swap table of a cluster,
  * the swap cluster must be locked.
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 54a19ebce540..cf976ecae8a8 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -51,15 +51,8 @@
 #include "swap_table.h"
 #include "swap.h"
 
-static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
-				 unsigned char);
-static void free_swap_count_continuations(struct swap_info_struct *);
 static void swap_range_alloc(struct swap_info_struct *si,
 			     unsigned int nr_entries);
-static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr);
-static void swap_put_entry_locked(struct swap_info_struct *si,
-				  struct swap_cluster_info *ci,
-				  unsigned long offset);
 static bool folio_swapcache_freeable(struct folio *folio);
 static void move_cluster(struct swap_info_struct *si,
 			 struct swap_cluster_info *ci, struct list_head *list,
@@ -182,22 +175,19 @@ static long swap_usage_in_pages(struct swap_info_struct *si)
 /* Reclaim the swap entry if swap is getting full */
 #define TTRS_FULL		0x4
 
-static bool swap_only_has_cache(struct swap_info_struct *si,
-				struct swap_cluster_info *ci,
+static bool swap_only_has_cache(struct swap_cluster_info *ci,
 				unsigned long offset, int nr_pages)
 {
 	unsigned int ci_off = offset % SWAPFILE_CLUSTER;
-	unsigned char *map = si->swap_map + offset;
-	unsigned char *map_end = map + nr_pages;
+	unsigned int ci_end = ci_off + nr_pages;
 	unsigned long swp_tb;
 
 	do {
 		swp_tb = __swap_table_get(ci, ci_off);
 		VM_WARN_ON_ONCE(!swp_tb_is_folio(swp_tb));
-		if (*map)
+		if (swp_tb_get_count(swp_tb))
 			return false;
-		++ci_off;
-	} while (++map < map_end);
+	} while (++ci_off < ci_end);
 
 	return true;
 }
@@ -256,7 +246,7 @@ again:
 	 * reference or pending writeback, and can't be allocated to others.
 	 */
 	ci = swap_cluster_lock(si, offset);
-	need_reclaim = swap_only_has_cache(si, ci, offset, nr_pages);
+	need_reclaim = swap_only_has_cache(ci, offset, nr_pages);
 	swap_cluster_unlock(ci);
 	if (!need_reclaim)
 		goto out_unlock;
@@ -479,6 +469,7 @@ static void swap_cluster_assert_empty(struct swap_cluster_info *ci,
 	} while (++ci_off < ci_end);
 
 	WARN_ON_ONCE(bad_slots != (swapoff ? ci->count : 0));
+	WARN_ON_ONCE(nr == SWAPFILE_CLUSTER && ci->extend_table);
 }
 
 static void swap_cluster_free_table(struct swap_cluster_info *ci)
@@ -807,7 +798,6 @@ static int swap_cluster_setup_bad_slot(struct swap_info_struct *si,
 		pr_warn("Duplicated bad slot offset %d\n", offset);
 		ret = -EINVAL;
 	} else {
-		si->swap_map[offset] = SWAP_MAP_BAD;
 		ci->count++;
 	}
 	spin_unlock(&ci->lock);
@@ -829,18 +819,16 @@ static bool cluster_reclaim_range(struct swap_info_struct *si,
 {
 	unsigned int nr_pages = 1 << order;
 	unsigned long offset = start, end = start + nr_pages;
-	unsigned char *map = si->swap_map;
 	unsigned long swp_tb;
 
 	spin_unlock(&ci->lock);
 	do {
-		if (READ_ONCE(map[offset]))
-			break;
 		swp_tb = swap_table_get(ci, offset % SWAPFILE_CLUSTER);
-		if (swp_tb_is_folio(swp_tb)) {
+		if (swp_tb_get_count(swp_tb))
+			break;
+		if (swp_tb_is_folio(swp_tb))
 			if (__try_to_reclaim_swap(si, offset, TTRS_ANYWAY) < 0)
 				break;
-		}
 	} while (++offset < end);
 	spin_lock(&ci->lock);
 
@@ -864,7 +852,7 @@ static bool cluster_reclaim_range(struct swap_info_struct *si,
 	 */
 	for (offset = start; offset < end; offset++) {
 		swp_tb = __swap_table_get(ci, offset % SWAPFILE_CLUSTER);
-		if (map[offset] || !swp_tb_is_null(swp_tb))
+		if (!swp_tb_is_null(swp_tb))
 			return false;
 	}
 
@@ -876,37 +864,35 @@ static bool cluster_scan_range(struct swap_info_struct *si,
 			       unsigned long offset, unsigned int nr_pages,
 			       bool *need_reclaim)
 {
-	unsigned long end = offset + nr_pages;
-	unsigned char *map = si->swap_map;
+	unsigned int ci_off = offset % SWAPFILE_CLUSTER;
+	unsigned int ci_end = ci_off + nr_pages;
 	unsigned long swp_tb;
 
-	if (cluster_is_empty(ci))
-		return true;
-
 	do {
-		if (map[offset])
-			return false;
-		swp_tb = __swap_table_get(ci, offset % SWAPFILE_CLUSTER);
-		if (swp_tb_is_folio(swp_tb)) {
+		swp_tb = __swap_table_get(ci, ci_off);
+		if (swp_tb_is_null(swp_tb))
+			continue;
+		if (swp_tb_is_folio(swp_tb) && !__swp_tb_get_count(swp_tb)) {
 			if (!vm_swap_full())
 				return false;
 			*need_reclaim = true;
-		} else {
-			/* A entry with no count and no cache must be null */
-			VM_WARN_ON_ONCE(!swp_tb_is_null(swp_tb));
+			continue;
 		}
-	} while (++offset < end);
+		/* Slot with zero count can only be NULL or folio */
+		VM_WARN_ON(!swp_tb_get_count(swp_tb));
+		return false;
+	} while (++ci_off < ci_end);
 
 	return true;
 }
 
-static bool cluster_alloc_range(struct swap_info_struct *si,
-				struct swap_cluster_info *ci,
-				struct folio *folio,
-				unsigned int offset)
+static bool __swap_cluster_alloc_entries(struct swap_info_struct *si,
+					 struct swap_cluster_info *ci,
+					 struct folio *folio,
+					 unsigned int ci_off)
 {
-	unsigned long nr_pages;
 	unsigned int order;
+	unsigned long nr_pages;
 
 	lockdep_assert_held(&ci->lock);
 
@@ -925,14 +911,15 @@ static bool cluster_alloc_range(struct swap_info_struct *si,
 	if (likely(folio)) {
 		order = folio_order(folio);
 		nr_pages = 1 << order;
-		swap_cluster_assert_empty(ci, offset % SWAPFILE_CLUSTER, nr_pages, false);
-		__swap_cache_add_folio(ci, folio, swp_entry(si->type, offset));
+		swap_cluster_assert_empty(ci, ci_off, nr_pages, false);
+		__swap_cache_add_folio(ci, folio, swp_entry(si->type,
+							    ci_off + cluster_offset(si, ci)));
 	} else if (IS_ENABLED(CONFIG_HIBERNATION)) {
 		order = 0;
 		nr_pages = 1;
-		WARN_ON_ONCE(si->swap_map[offset]);
-		si->swap_map[offset] = 1;
-		swap_cluster_assert_empty(ci, offset % SWAPFILE_CLUSTER, 1, false);
+		swap_cluster_assert_empty(ci, ci_off, 1, false);
+		/* Sets a fake shadow as placeholder */
+		__swap_table_set(ci, ci_off, shadow_to_swp_tb(NULL, 1));
 	} else {
 		/* Allocation without folio is only possible with hibernation */
 		WARN_ON_ONCE(1);
@@ -983,7 +970,7 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si,
 			if (!ret)
 				continue;
 		}
-		if (!cluster_alloc_range(si, ci, folio, offset))
+		if (!__swap_cluster_alloc_entries(si, ci, folio, offset % SWAPFILE_CLUSTER))
 			break;
 		found = offset;
 		offset += nr_pages;
@@ -1030,7 +1017,7 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force)
 	long to_scan = 1;
 	unsigned long offset, end;
 	struct swap_cluster_info *ci;
-	unsigned char *map = si->swap_map;
+	unsigned long swp_tb;
 	int nr_reclaim;
 
 	if (force)
@@ -1042,8 +1029,8 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force)
 		to_scan--;
 
 		while (offset < end) {
-			if (!READ_ONCE(map[offset]) &&
-			    swp_tb_is_folio(swap_table_get(ci, offset % SWAPFILE_CLUSTER))) {
+			swp_tb = swap_table_get(ci, offset % SWAPFILE_CLUSTER);
+			if (swp_tb_is_folio(swp_tb) && !__swp_tb_get_count(swp_tb)) {
 				spin_unlock(&ci->lock);
 				nr_reclaim = __try_to_reclaim_swap(si, offset,
 								   TTRS_ANYWAY);
@@ -1452,40 +1439,127 @@ start_over:
 	return false;
 }
 
+static int swap_extend_table_alloc(struct swap_info_struct *si,
+				   struct swap_cluster_info *ci, gfp_t gfp)
+{
+	void *table;
+
+	table = kzalloc(sizeof(ci->extend_table[0]) * SWAPFILE_CLUSTER, gfp);
+	if (!table)
+		return -ENOMEM;
+
+	spin_lock(&ci->lock);
+	if (!ci->extend_table)
+		ci->extend_table = table;
+	else
+		kfree(table);
+	spin_unlock(&ci->lock);
+	return 0;
+}
+
+int swap_retry_table_alloc(swp_entry_t entry, gfp_t gfp)
+{
+	int ret;
+	struct swap_info_struct *si;
+	struct swap_cluster_info *ci;
+	unsigned long offset = swp_offset(entry);
+
+	si = get_swap_device(entry);
+	if (!si)
+		return 0;
+
+	ci = __swap_offset_to_cluster(si, offset);
+	ret = swap_extend_table_alloc(si, ci, gfp);
+
+	put_swap_device(si);
+	return ret;
+}
+
+static void swap_extend_table_try_free(struct swap_cluster_info *ci)
+{
+	unsigned long i;
+	bool can_free = true;
+
+	if (!ci->extend_table)
+		return;
+
+	for (i = 0; i < SWAPFILE_CLUSTER; i++) {
+		if (ci->extend_table[i])
+			can_free = false;
+	}
+
+	if (can_free) {
+		kfree(ci->extend_table);
+		ci->extend_table = NULL;
+	}
+}
+
+/* Decrease the swap count of one slot, without freeing it */
+static void __swap_cluster_put_entry(struct swap_cluster_info *ci,
+				    unsigned int ci_off)
+{
+	int count;
+	unsigned long swp_tb;
+
+	lockdep_assert_held(&ci->lock);
+	swp_tb = __swap_table_get(ci, ci_off);
+	count = __swp_tb_get_count(swp_tb);
+
+	VM_WARN_ON_ONCE(count <= 0);
+	VM_WARN_ON_ONCE(count > SWP_TB_COUNT_MAX);
+
+	if (count == SWP_TB_COUNT_MAX) {
+		count = ci->extend_table[ci_off];
+		/* Overflow starts with SWP_TB_COUNT_MAX */
+		VM_WARN_ON_ONCE(count < SWP_TB_COUNT_MAX);
+		count--;
+		if (count == (SWP_TB_COUNT_MAX - 1)) {
+			ci->extend_table[ci_off] = 0;
+			__swap_table_set(ci, ci_off, __swp_tb_mk_count(swp_tb, count));
+			swap_extend_table_try_free(ci);
+		} else {
+			ci->extend_table[ci_off] = count;
+		}
+	} else {
+		__swap_table_set(ci, ci_off, __swp_tb_mk_count(swp_tb, --count));
+	}
+}
+
 /**
- * swap_put_entries_cluster - Decrease the swap count of a set of slots.
+ * swap_put_entries_cluster - Decrease the swap count of slots within one cluster
  * @si: The swap device.
- * @start: start offset of slots.
+ * @offset: start offset of slots.
  * @nr: number of slots.
- * @reclaim_cache: if true, also reclaim the swap cache.
+ * @reclaim_cache: if true, also reclaim the swap cache if slots are freed.
  *
  * This helper decreases the swap count of a set of slots and tries to
  * batch free them. Also reclaims the swap cache if @reclaim_cache is true.
- * Context: The caller must ensure that all slots belong to the same
- * cluster and their swap count doesn't go underflow.
+ *
+ * Context: The specified slots must be pinned by existing swap count or swap
+ * cache reference, so they won't be released until this helper returns.
  */
 static void swap_put_entries_cluster(struct swap_info_struct *si,
-				     unsigned long start, int nr,
+				     pgoff_t offset, int nr,
 				     bool reclaim_cache)
 {
-	unsigned long offset = start, end = start + nr;
-	unsigned long batch_start = SWAP_ENTRY_INVALID;
 	struct swap_cluster_info *ci;
+	unsigned int ci_off, ci_end;
+	pgoff_t end = offset + nr;
 	bool need_reclaim = false;
 	unsigned int nr_reclaimed;
 	unsigned long swp_tb;
-	unsigned int count;
+	int ci_batch = -1;
 
 	ci = swap_cluster_lock(si, offset);
+	ci_off = offset % SWAPFILE_CLUSTER;
+	ci_end = ci_off + nr;
 	do {
-		swp_tb = __swap_table_get(ci, offset % SWAPFILE_CLUSTER);
-		count = si->swap_map[offset];
-		VM_WARN_ON(count < 1 || count == SWAP_MAP_BAD);
-		if (count == 1) {
+		swp_tb = __swap_table_get(ci, ci_off);
+		if (swp_tb_get_count(swp_tb) == 1) {
 			/* count == 1 and non-cached slots will be batch freed. */
 			if (!swp_tb_is_folio(swp_tb)) {
-				if (!batch_start)
-					batch_start = offset;
+				if (ci_batch == -1)
+					ci_batch = ci_off;
 				continue;
 			}
 			/* count will be 0 after put, slot can be reclaimed */
@@ -1497,21 +1571,20 @@ static void swap_put_entries_cluster(struct swap_info_struct *si,
 		 * slots will be freed when folio is removed from swap cache
 		 * (__swap_cache_del_folio).
 		 */
-		swap_put_entry_locked(si, ci, offset);
-		if (batch_start) {
-			swap_entries_free(si, ci, batch_start, offset - batch_start);
-			batch_start = SWAP_ENTRY_INVALID;
+		__swap_cluster_put_entry(ci, ci_off);
+		if (ci_batch != -1) {
+			__swap_cluster_free_entries(si, ci, ci_batch, ci_off - ci_batch);
+			ci_batch = -1;
 		}
-	} while (++offset < end);
+	} while (++ci_off < ci_end);
 
-	if (batch_start)
-		swap_entries_free(si, ci, batch_start, offset - batch_start);
+	if (ci_batch != -1)
+		__swap_cluster_free_entries(si, ci, ci_batch, ci_off - ci_batch);
 	swap_cluster_unlock(ci);
 
 	if (!need_reclaim || !reclaim_cache)
 		return;
 
-	offset = start;
 	do {
 		nr_reclaimed = __try_to_reclaim_swap(si, offset,
 						     TTRS_UNMAPPED | TTRS_FULL);
@@ -1521,6 +1594,92 @@ static void swap_put_entries_cluster(struct swap_info_struct *si,
 	} while (offset < end);
 }
 
+/* Increase the swap count of one slot. */
+static int __swap_cluster_dup_entry(struct swap_cluster_info *ci,
+				    unsigned int ci_off)
+{
+	int count;
+	unsigned long swp_tb;
+
+	lockdep_assert_held(&ci->lock);
+	swp_tb = __swap_table_get(ci, ci_off);
+	/* Bad or special slots can't be handled */
+	if (WARN_ON_ONCE(swp_tb_is_bad(swp_tb)))
+		return -EINVAL;
+	count = __swp_tb_get_count(swp_tb);
+	/* Must be either cached or have a count already */
+	if (WARN_ON_ONCE(!count && !swp_tb_is_folio(swp_tb)))
+		return -ENOENT;
+
+	if (likely(count < (SWP_TB_COUNT_MAX - 1))) {
+		__swap_table_set(ci, ci_off, __swp_tb_mk_count(swp_tb, count + 1));
+		VM_WARN_ON_ONCE(ci->extend_table && ci->extend_table[ci_off]);
+	} else if (count == (SWP_TB_COUNT_MAX - 1)) {
+		if (ci->extend_table) {
+			VM_WARN_ON_ONCE(ci->extend_table[ci_off]);
+			ci->extend_table[ci_off] = SWP_TB_COUNT_MAX;
+			__swap_table_set(ci, ci_off, __swp_tb_mk_count(swp_tb, SWP_TB_COUNT_MAX));
+		} else {
+			return -ENOMEM;
+		}
+	} else if (count == SWP_TB_COUNT_MAX) {
+		VM_WARN_ON_ONCE(ci->extend_table[ci_off] >=
+                               type_max(typeof(ci->extend_table[0])));
+		++ci->extend_table[ci_off];
+	} else {
+		/* Never happens unless counting went wrong */
+		WARN_ON_ONCE(1);
+	}
+
+	return 0;
+}
+
+/**
+ * swap_dup_entries_cluster: Increase the swap count of slots within one cluster.
+ * @si: The swap device.
+ * @offset: start offset of slots.
+ * @nr: number of slots.
+ *
+ * Context: The specified slots must be pinned by existing swap count or swap
+ * cache reference, so they won't be released until this helper returns.
+ * Return: 0 on success. -ENOMEM if the swap count maxed out (SWP_TB_COUNT_MAX)
+ * and failed to allocate an extended table, -EINVAL if any entry is bad entry.
+ */
+static int swap_dup_entries_cluster(struct swap_info_struct *si,
+				    pgoff_t offset, int nr)
+{
+	int err;
+	struct swap_cluster_info *ci;
+	unsigned int ci_start, ci_off, ci_end;
+
+	ci_start = offset % SWAPFILE_CLUSTER;
+	ci_end = ci_start + nr;
+	ci_off = ci_start;
+	ci = swap_cluster_lock(si, offset);
+restart:
+	do {
+		err = __swap_cluster_dup_entry(ci, ci_off);
+		if (unlikely(err)) {
+			if (err == -ENOMEM) {
+				spin_unlock(&ci->lock);
+				err = swap_extend_table_alloc(si, ci, GFP_ATOMIC);
+				spin_lock(&ci->lock);
+				if (!err)
+					goto restart;
+			}
+			goto failed;
+		}
+	} while (++ci_off < ci_end);
+	swap_cluster_unlock(ci);
+	return 0;
+failed:
+	while (ci_off-- > ci_start)
+		__swap_cluster_put_entry(ci, ci_off);
+	swap_extend_table_try_free(ci);
+	swap_cluster_unlock(ci);
+	return err;
+}
+
 /**
  * folio_alloc_swap - allocate swap space for a folio
  * @folio: folio we want to move to swap
@@ -1589,13 +1748,10 @@ again:
  * Context: Caller must ensure the folio is locked and in the swap cache.
  * NOTE: The caller also has to ensure there is no raced call to
  * swap_put_entries_direct on its swap entry before this helper returns, or
- * the swap map may underflow. Currently, we only accept @subpage == NULL
- * for shmem due to the limitation of swap continuation: shmem always
- * duplicates the swap entry only once, so there is no such issue for it.
+ * the swap count may underflow.
  */
 int folio_dup_swap(struct folio *folio, struct page *subpage)
 {
-	int err = 0;
 	swp_entry_t entry = folio->swap;
 	unsigned long nr_pages = folio_nr_pages(folio);
 
@@ -1607,10 +1763,8 @@ int folio_dup_swap(struct folio *folio, struct page *subpage)
 		nr_pages = 1;
 	}
 
-	while (!err && __swap_duplicate(entry, 1, nr_pages) == -ENOMEM)
-		err = add_swap_count_continuation(entry, GFP_ATOMIC);
-
-	return err;
+	return swap_dup_entries_cluster(swap_entry_to_info(entry),
+					swp_offset(entry), nr_pages);
 }
 
 /**
@@ -1639,28 +1793,6 @@ void folio_put_swap(struct folio *folio, struct page *subpage)
 	swap_put_entries_cluster(si, swp_offset(entry), nr_pages, false);
 }
 
-static void swap_put_entry_locked(struct swap_info_struct *si,
-				  struct swap_cluster_info *ci,
-				  unsigned long offset)
-{
-	unsigned char count;
-
-	count = si->swap_map[offset];
-	if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) {
-		if (count == COUNT_CONTINUED) {
-			if (swap_count_continued(si, offset, count))
-				count = SWAP_MAP_MAX | COUNT_CONTINUED;
-			else
-				count = SWAP_MAP_MAX;
-		} else
-			count--;
-	}
-
-	WRITE_ONCE(si->swap_map[offset], count);
-	if (!count && !swp_tb_is_folio(__swap_table_get(ci, offset % SWAPFILE_CLUSTER)))
-		swap_entries_free(si, ci, offset, 1);
-}
-
 /*
  * When we get a swap entry, if there aren't some other ways to
  * prevent swapoff, such as the folio in swap cache is locked, RCU
@@ -1727,31 +1859,30 @@ put_out:
 }
 
 /*
- * Drop the last ref of swap entries, caller have to ensure all entries
- * belong to the same cgroup and cluster.
+ * Free a set of swap slots after their swap count dropped to zero, or will be
+ * zero after putting the last ref (saves one __swap_cluster_put_entry call).
  */
-void swap_entries_free(struct swap_info_struct *si,
-		       struct swap_cluster_info *ci,
-		       unsigned long offset, unsigned int nr_pages)
+void __swap_cluster_free_entries(struct swap_info_struct *si,
+				 struct swap_cluster_info *ci,
+				 unsigned int ci_start, unsigned int nr_pages)
 {
-	swp_entry_t entry = swp_entry(si->type, offset);
-	unsigned char *map = si->swap_map + offset;
-	unsigned char *map_end = map + nr_pages;
+	unsigned long old_tb;
+	unsigned int ci_off = ci_start, ci_end = ci_start + nr_pages;
+	unsigned long offset = cluster_offset(si, ci) + ci_start;
 
-	/* It should never free entries across different clusters */
-	VM_BUG_ON(ci != __swap_offset_to_cluster(si, offset + nr_pages - 1));
-	VM_BUG_ON(cluster_is_empty(ci));
-	VM_BUG_ON(ci->count < nr_pages);
+	VM_WARN_ON(ci->count < nr_pages);
 
 	ci->count -= nr_pages;
 	do {
-		VM_WARN_ON(*map > 1);
-		*map = 0;
-	} while (++map < map_end);
+		old_tb = __swap_table_get(ci, ci_off);
+		/* Release the last ref, or after swap cache is dropped */
+		VM_WARN_ON(!swp_tb_is_shadow(old_tb) || __swp_tb_get_count(old_tb) > 1);
+		__swap_table_set(ci, ci_off, null_to_swp_tb());
+	} while (++ci_off < ci_end);
 
-	mem_cgroup_uncharge_swap(entry, nr_pages);
+	mem_cgroup_uncharge_swap(swp_entry(si->type, offset), nr_pages);
 	swap_range_free(si, offset, nr_pages);
-	swap_cluster_assert_empty(ci, offset % SWAPFILE_CLUSTER, nr_pages, false);
+	swap_cluster_assert_empty(ci, ci_start, nr_pages, false);
 
 	if (!ci->count)
 		free_cluster(si, ci);
@@ -1761,10 +1892,10 @@ void swap_entries_free(struct swap_info_struct *si,
 
 int __swap_count(swp_entry_t entry)
 {
-	struct swap_info_struct *si = __swap_entry_to_info(entry);
-	pgoff_t offset = swp_offset(entry);
+	struct swap_cluster_info *ci = __swap_entry_to_cluster(entry);
+	unsigned int ci_off = swp_cluster_offset(entry);
 
-	return si->swap_map[offset];
+	return swp_tb_get_count(__swap_table_get(ci, ci_off));
 }
 
 /**
@@ -1776,81 +1907,62 @@ bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry)
 {
 	pgoff_t offset = swp_offset(entry);
 	struct swap_cluster_info *ci;
-	int count;
+	unsigned long swp_tb;
 
 	ci = swap_cluster_lock(si, offset);
-	count = si->swap_map[offset];
+	swp_tb = swap_table_get(ci, offset % SWAPFILE_CLUSTER);
 	swap_cluster_unlock(ci);
 
-	return count && count != SWAP_MAP_BAD;
+	return swp_tb_get_count(swp_tb) > 0;
 }
 
 /*
  * How many references to @entry are currently swapped out?
- * This considers COUNT_CONTINUED so it returns exact answer.
+ * This returns exact answer.
  */
 int swp_swapcount(swp_entry_t entry)
 {
-	int count, tmp_count, n;
 	struct swap_info_struct *si;
 	struct swap_cluster_info *ci;
-	struct page *page;
-	pgoff_t offset;
-	unsigned char *map;
+	unsigned long swp_tb;
+	int count;
 
 	si = get_swap_device(entry);
 	if (!si)
 		return 0;
 
-	offset = swp_offset(entry);
-
-	ci = swap_cluster_lock(si, offset);
-
-	count = si->swap_map[offset];
-	if (!(count & COUNT_CONTINUED))
-		goto out;
-
-	count &= ~COUNT_CONTINUED;
-	n = SWAP_MAP_MAX + 1;
-
-	page = vmalloc_to_page(si->swap_map + offset);
-	offset &= ~PAGE_MASK;
-	VM_BUG_ON(page_private(page) != SWP_CONTINUED);
-
-	do {
-		page = list_next_entry(page, lru);
-		map = kmap_local_page(page);
-		tmp_count = map[offset];
-		kunmap_local(map);
-
-		count += (tmp_count & ~COUNT_CONTINUED) * n;
-		n *= (SWAP_CONT_MAX + 1);
-	} while (tmp_count & COUNT_CONTINUED);
-out:
+	ci = swap_cluster_lock(si, swp_offset(entry));
+	swp_tb = __swap_table_get(ci, swp_cluster_offset(entry));
+	count = swp_tb_get_count(swp_tb);
+	if (count == SWP_TB_COUNT_MAX)
+		count = ci->extend_table[swp_cluster_offset(entry)];
 	swap_cluster_unlock(ci);
 	put_swap_device(si);
-	return count;
+
+	return count < 0 ? 0 : count;
 }
 
 static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
 					 swp_entry_t entry, int order)
 {
 	struct swap_cluster_info *ci;
-	unsigned char *map = si->swap_map;
 	unsigned int nr_pages = 1 << order;
 	unsigned long roffset = swp_offset(entry);
 	unsigned long offset = round_down(roffset, nr_pages);
+	unsigned int ci_off;
 	int i;
 	bool ret = false;
 
 	ci = swap_cluster_lock(si, offset);
 	if (nr_pages == 1) {
-		if (map[roffset])
+		ci_off = roffset % SWAPFILE_CLUSTER;
+		if (swp_tb_get_count(__swap_table_get(ci, ci_off)))
 			ret = true;
 		goto unlock_out;
 	}
 	for (i = 0; i < nr_pages; i++) {
-		if (map[offset + i]) {
+		ci_off = (offset + i) % SWAPFILE_CLUSTER;
+		if (swp_tb_get_count(__swap_table_get(ci, ci_off))) {
 			ret = true;
 			break;
 		}
@@ -2016,7 +2128,8 @@ void swap_free_hibernation_slot(swp_entry_t entry)
 		return;
 
 	ci = swap_cluster_lock(si, offset);
-	swap_put_entry_locked(si, ci, offset);
+	__swap_cluster_put_entry(ci, offset % SWAPFILE_CLUSTER);
+	__swap_cluster_free_entries(si, ci, offset % SWAPFILE_CLUSTER, 1);
 	swap_cluster_unlock(ci);
 
 	/* In theory readahead might add it to the swap cache by accident */
@@ -2242,13 +2355,10 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned int type)
 {
 	pte_t *pte = NULL;
-	struct swap_info_struct *si;
 
-	si = swap_info[type];
 	do {
 		struct folio *folio;
-		unsigned long offset;
-		unsigned char swp_count;
+		unsigned long swp_tb;
 		softleaf_t entry;
 		int ret;
 		pte_t ptent;
@@ -2267,7 +2377,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		if (swp_type(entry) != type)
 			continue;
 
-		offset = swp_offset(entry);
 		pte_unmap(pte);
 		pte = NULL;
 
@@ -2284,8 +2393,9 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 						&vmf);
 		}
 		if (!folio) {
-			swp_count = READ_ONCE(si->swap_map[offset]);
-			if (swp_count == 0 || swp_count == SWAP_MAP_BAD)
+			swp_tb = swap_table_get(__swap_entry_to_cluster(entry),
+						swp_cluster_offset(entry));
+			if (swp_tb_get_count(swp_tb) <= 0)
 				continue;
 			return -ENOMEM;
 		}
@@ -2413,7 +2523,7 @@ unlock:
 }
 
 /*
- * Scan swap_map from current position to next entry still in use.
+ * Scan swap table from current position to next entry still in use.
  * Return 0 if there are no inuse entries after prev till end of
  * the map.
  */
@@ -2422,7 +2532,6 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
 {
 	unsigned int i;
 	unsigned long swp_tb;
-	unsigned char count;
 
 	/*
 	 * No need for swap_lock here: we're just looking
@@ -2431,12 +2540,9 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
 	 * allocations from this area (while holding swap_lock).
 	 */
 	for (i = prev + 1; i < si->max; i++) {
-		count = READ_ONCE(si->swap_map[i]);
 		swp_tb = swap_table_get(__swap_offset_to_cluster(si, i),
 					i % SWAPFILE_CLUSTER);
-		if (count == SWAP_MAP_BAD)
-			continue;
-		if (count || swp_tb_is_folio(swp_tb))
+		if (!swp_tb_is_null(swp_tb) && !swp_tb_is_bad(swp_tb))
 			break;
 		if ((i % LATENCY_LIMIT) == 0)
 			cond_resched();
@@ -2796,7 +2902,6 @@ static void flush_percpu_swap_cluster(struct swap_info_struct *si)
 SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 {
 	struct swap_info_struct *p = NULL;
-	unsigned char *swap_map;
 	unsigned long *zeromap;
 	struct swap_cluster_info *cluster_info;
 	struct file *swap_file, *victim;
@@ -2874,8 +2979,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	flush_percpu_swap_cluster(p);
 
 	destroy_swap_extents(p, p->swap_file);
-	if (p->flags & SWP_CONTINUED)
-		free_swap_count_continuations(p);
 
 	if (!(p->flags & SWP_SOLIDSTATE))
 		atomic_dec(&nr_rotate_swap);
@@ -2887,8 +2990,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 
 	swap_file = p->swap_file;
 	p->swap_file = NULL;
-	swap_map = p->swap_map;
-	p->swap_map = NULL;
 	zeromap = p->zeromap;
 	p->zeromap = NULL;
 	maxpages = p->max;
@@ -2902,7 +3003,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	mutex_unlock(&swapon_mutex);
 	kfree(p->global_cluster);
 	p->global_cluster = NULL;
-	vfree(swap_map);
 	kvfree(zeromap);
 	free_swap_cluster_info(cluster_info, maxpages);
 	/* Destroy swap account information */
@@ -3122,7 +3222,6 @@ static struct swap_info_struct *alloc_swap_info(void)
 		kvfree(defer);
 	}
 	spin_lock_init(&p->lock);
-	spin_lock_init(&p->cont_lock);
 	atomic_long_set(&p->inuse_pages, SWAP_USAGE_OFFLIST_BIT);
 	init_completion(&p->comp);
 
@@ -3249,19 +3348,6 @@ static unsigned long read_swap_header(struct swap_info_struct *si,
 	return maxpages;
 }
 
-static int setup_swap_map(struct swap_info_struct *si,
-			  union swap_header *swap_header,
-			  unsigned long maxpages)
-{
-	unsigned char *swap_map;
-
-	swap_map = vzalloc(maxpages);
-	si->swap_map = swap_map;
-	if (!swap_map)
-		return -ENOMEM;
-	return 0;
-}
-
 static int setup_swap_clusters_info(struct swap_info_struct *si,
 				    union swap_header *swap_header,
 				    unsigned long maxpages)
@@ -3446,11 +3532,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 
 	maxpages = si->max;
 
-	/* Setup the swap map and apply bad block */
-	error = setup_swap_map(si, swap_header, maxpages);
-	if (error)
-		goto bad_swap_unlock_inode;
-
 	/* Set up the swap cluster info */
 	error = setup_swap_clusters_info(si, swap_header, maxpages);
 	if (error)
@@ -3571,8 +3652,6 @@ bad_swap:
 	inode = NULL;
 	destroy_swap_extents(si, swap_file);
 	swap_cgroup_swapoff(si->type);
-	vfree(si->swap_map);
-	si->swap_map = NULL;
 	free_swap_cluster_info(si->cluster_info, si->max);
 	si->cluster_info = NULL;
 	kvfree(si->zeromap);
@@ -3613,322 +3692,29 @@ void si_swapinfo(struct sysinfo *val)
 	spin_unlock(&swap_lock);
 }
 
-/*
- * Verify that nr swap entries are valid and increment their swap map counts.
- *
- * Returns error code in following case.
- * - success -> 0
- * - swp_entry is invalid -> EINVAL
- * - swap-mapped reference is requested but the entry is not used. -> ENOENT
- * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
- */
-static int swap_dup_entries(struct swap_info_struct *si,
-			    struct swap_cluster_info *ci,
-			    unsigned long offset,
-			    unsigned char usage, int nr)
-{
-	int i;
-	unsigned char count;
-
-	for (i = 0; i < nr; i++) {
-		count = si->swap_map[offset + i];
-		/*
-		 * For swapin out, allocator never allocates bad slots. for
-		 * swapin, readahead is guarded by swap_entry_swapped.
-		 */
-		if (WARN_ON(count == SWAP_MAP_BAD))
-			return -ENOENT;
-		/*
-		 * Swap count duplication must be guarded by either swap cache folio (from
-		 * folio_dup_swap) or external lock of existing entry (from swap_dup_entry_direct).
-		 */
-		if (WARN_ON(!count &&
-			    !swp_tb_is_folio(__swap_table_get(ci, offset % SWAPFILE_CLUSTER))))
-			return -ENOENT;
-		if (WARN_ON((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX))
-			return -EINVAL;
-	}
-
-	for (i = 0; i < nr; i++) {
-		count = si->swap_map[offset + i];
-		if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
-			count += usage;
-		else if (swap_count_continued(si, offset + i, count))
-			count = COUNT_CONTINUED;
-		else {
-			/*
-			 * Don't need to rollback changes, because if
-			 * usage == 1, there must be nr == 1.
-			 */
-			return -ENOMEM;
-		}
-
-		WRITE_ONCE(si->swap_map[offset + i], count);
-	}
-
-	return 0;
-}
-
-static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
-{
-	int err;
-	struct swap_info_struct *si;
-	struct swap_cluster_info *ci;
-	unsigned long offset = swp_offset(entry);
-
-	si = swap_entry_to_info(entry);
-	if (WARN_ON_ONCE(!si)) {
-		pr_err("%s%08lx\n", Bad_file, entry.val);
-		return -EINVAL;
-	}
-
-	VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER);
-	ci = swap_cluster_lock(si, offset);
-	err = swap_dup_entries(si, ci, offset, usage, nr);
-	swap_cluster_unlock(ci);
-	return err;
-}
-
 /*
  * swap_dup_entry_direct() - Increase reference count of a swap entry by one.
  * @entry: first swap entry from which we want to increase the refcount.
  *
- * Returns 0 for success, or -ENOMEM if a swap_count_continuation is required
- * but could not be atomically allocated.  Returns 0, just as if it succeeded,
- * if __swap_duplicate() fails for another reason (-EINVAL or -ENOENT), which
- * might occur if a page table entry has got corrupted.
+ * Returns 0 for success, or -ENOMEM if the extend table is required
+ * but could not be atomically allocated.  Returns -EINVAL if the swap
+ * entry is invalid, which might occur if a page table entry has got
+ * corrupted.
  *
  * Context: Caller must ensure there is no race condition on the reference
  * owner. e.g., locking the PTL of a PTE containing the entry being increased.
  */
 int swap_dup_entry_direct(swp_entry_t entry)
-{
-	int err = 0;
-	while (!err && __swap_duplicate(entry, 1, 1) == -ENOMEM)
-		err = add_swap_count_continuation(entry, GFP_ATOMIC);
-	return err;
-}
-
-/*
- * add_swap_count_continuation - called when a swap count is duplicated
- * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
- * page of the original vmalloc'ed swap_map, to hold the continuation count
- * (for that entry and for its neighbouring PAGE_SIZE swap entries).  Called
- * again when count is duplicated beyond SWAP_MAP_MAX * SWAP_CONT_MAX, etc.
- *
- * These continuation pages are seldom referenced: the common paths all work
- * on the original swap_map, only referring to a continuation page when the
- * low "digit" of a count is incremented or decremented through SWAP_MAP_MAX.
- *
- * add_swap_count_continuation(, GFP_ATOMIC) can be called while holding
- * page table locks; if it fails, add_swap_count_continuation(, GFP_KERNEL)
- * can be called after dropping locks.
- */
-int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
 {
 	struct swap_info_struct *si;
-	struct swap_cluster_info *ci;
-	struct page *head;
-	struct page *page;
-	struct page *list_page;
-	pgoff_t offset;
-	unsigned char count;
-	int ret = 0;
-
-	/*
-	 * When debugging, it's easier to use __GFP_ZERO here; but it's better
-	 * for latency not to zero a page while GFP_ATOMIC and holding locks.
-	 */
-	page = alloc_page(gfp_mask | __GFP_HIGHMEM);
-
-	si = get_swap_device(entry);
-	if (!si) {
-		/*
-		 * An acceptable race has occurred since the failing
-		 * __swap_duplicate(): the swap device may be swapoff
-		 */
-		goto outer;
-	}
-
-	offset = swp_offset(entry);
-
-	ci = swap_cluster_lock(si, offset);
-
-	count = si->swap_map[offset];
-
-	if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
-		/*
-		 * The higher the swap count, the more likely it is that tasks
-		 * will race to add swap count continuation: we need to avoid
-		 * over-provisioning.
-		 */
-		goto out;
-	}
-
-	if (!page) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	head = vmalloc_to_page(si->swap_map + offset);
-	offset &= ~PAGE_MASK;
-
-	spin_lock(&si->cont_lock);
-	/*
-	 * Page allocation does not initialize the page's lru field,
-	 * but it does always reset its private field.
-	 */
-	if (!page_private(head)) {
-		BUG_ON(count & COUNT_CONTINUED);
-		INIT_LIST_HEAD(&head->lru);
-		set_page_private(head, SWP_CONTINUED);
-		si->flags |= SWP_CONTINUED;
-	}
-
-	list_for_each_entry(list_page, &head->lru, lru) {
-		unsigned char *map;
-
-		/*
-		 * If the previous map said no continuation, but we've found
-		 * a continuation page, free our allocation and use this one.
-		 */
-		if (!(count & COUNT_CONTINUED))
-			goto out_unlock_cont;
-
-		map = kmap_local_page(list_page) + offset;
-		count = *map;
-		kunmap_local(map);
-
-		/*
-		 * If this continuation count now has some space in it,
-		 * free our allocation and use this one.
-		 */
-		if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX)
-			goto out_unlock_cont;
-	}
 
-	list_add_tail(&page->lru, &head->lru);
-	page = NULL;			/* now it's attached, don't free it */
-out_unlock_cont:
-	spin_unlock(&si->cont_lock);
-out:
-	swap_cluster_unlock(ci);
-	put_swap_device(si);
-outer:
-	if (page)
-		__free_page(page);
-	return ret;
-}
-
-/*
- * swap_count_continued - when the original swap_map count is incremented
- * from SWAP_MAP_MAX, check if there is already a continuation page to carry
- * into, carry if so, or else fail until a new continuation page is allocated;
- * when the original swap_map count is decremented from 0 with continuation,
- * borrow from the continuation and report whether it still holds more.
- * Called while __swap_duplicate() or caller of swap_put_entry_locked()
- * holds cluster lock.
- */
-static bool swap_count_continued(struct swap_info_struct *si,
-				 pgoff_t offset, unsigned char count)
-{
-	struct page *head;
-	struct page *page;
-	unsigned char *map;
-	bool ret;
-
-	head = vmalloc_to_page(si->swap_map + offset);
-	if (page_private(head) != SWP_CONTINUED) {
-		BUG_ON(count & COUNT_CONTINUED);
-		return false;		/* need to add count continuation */
-	}
-
-	spin_lock(&si->cont_lock);
-	offset &= ~PAGE_MASK;
-	page = list_next_entry(head, lru);
-	map = kmap_local_page(page) + offset;
-
-	if (count == SWAP_MAP_MAX)	/* initial increment from swap_map */
-		goto init_map;		/* jump over SWAP_CONT_MAX checks */
-
-	if (count == (SWAP_MAP_MAX | COUNT_CONTINUED)) { /* incrementing */
-		/*
-		 * Think of how you add 1 to 999
-		 */
-		while (*map == (SWAP_CONT_MAX | COUNT_CONTINUED)) {
-			kunmap_local(map);
-			page = list_next_entry(page, lru);
-			BUG_ON(page == head);
-			map = kmap_local_page(page) + offset;
-		}
-		if (*map == SWAP_CONT_MAX) {
-			kunmap_local(map);
-			page = list_next_entry(page, lru);
-			if (page == head) {
-				ret = false;	/* add count continuation */
-				goto out;
-			}
-			map = kmap_local_page(page) + offset;
-init_map:		*map = 0;		/* we didn't zero the page */
-		}
-		*map += 1;
-		kunmap_local(map);
-		while ((page = list_prev_entry(page, lru)) != head) {
-			map = kmap_local_page(page) + offset;
-			*map = COUNT_CONTINUED;
-			kunmap_local(map);
-		}
-		ret = true;			/* incremented */
-
-	} else {				/* decrementing */
-		/*
-		 * Think of how you subtract 1 from 1000
-		 */
-		BUG_ON(count != COUNT_CONTINUED);
-		while (*map == COUNT_CONTINUED) {
-			kunmap_local(map);
-			page = list_next_entry(page, lru);
-			BUG_ON(page == head);
-			map = kmap_local_page(page) + offset;
-		}
-		BUG_ON(*map == 0);
-		*map -= 1;
-		if (*map == 0)
-			count = 0;
-		kunmap_local(map);
-		while ((page = list_prev_entry(page, lru)) != head) {
-			map = kmap_local_page(page) + offset;
-			*map = SWAP_CONT_MAX | count;
-			count = COUNT_CONTINUED;
-			kunmap_local(map);
-		}
-		ret = count == COUNT_CONTINUED;
+	si = swap_entry_to_info(entry);
+	if (WARN_ON_ONCE(!si)) {
+		pr_err("%s%08lx\n", Bad_file, entry.val);
+		return -EINVAL;
 	}
-out:
-	spin_unlock(&si->cont_lock);
-	return ret;
-}
 
-/*
- * free_swap_count_continuations - swapoff free all the continuation pages
- * appended to the swap_map, after swap_map is quiesced, before vfree'ing it.
- */
-static void free_swap_count_continuations(struct swap_info_struct *si)
-{
-	pgoff_t offset;
-
-	for (offset = 0; offset < si->max; offset += PAGE_SIZE) {
-		struct page *head;
-		head = vmalloc_to_page(si->swap_map + offset);
-		if (page_private(head)) {
-			struct page *page, *next;
-
-			list_for_each_entry_safe(page, next, &head->lru, lru) {
-				list_del(&page->lru);
-				__free_page(page);
-			}
-		}
-	}
+	return swap_dup_entries_cluster(si, swp_offset(entry), 1);
 }
 
 #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-- 
cgit v1.2.3


From 1beb9b7223d2a1f1872f76a3d29b0a4a3cee4171 Mon Sep 17 00:00:00 2001
From: "Pratyush Yadav (Google)" <pratyush@kernel.org>
Date: Mon, 16 Feb 2026 19:59:32 +0100
Subject: memfd: export memfd_{add,get}_seals()

Patch series "mm: memfd_luo: preserve file seals", v2.

This series adds support for preserving file seals when preserving a memfd
using LUO.  Patch 1 exports some memfd seal manipulation functions and
patch 2 adds support for preserving them.  Since it makes changes to the
serialized data structure for memfd, it also bumps the version number.


This patch (of 2):

Support for preserving file seals will be added to memfd preservation
using the Live Update Orchestrator (LUO).  Export memfd_{add,get}_seals)()
so memfd_luo can use them to manipulate the seals.

Link: https://lkml.kernel.org/r/20260216185946.1215770-1-pratyush@kernel.org
Link: https://lkml.kernel.org/r/20260216185946.1215770-2-pratyush@kernel.org
Signed-off-by: Pratyush Yadav (Google) <pratyush@kernel.org>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Tested-by: Samiullah Khawaja <skhawaja@google.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memfd.h | 12 ++++++++++++
 mm/memfd.c            |  4 ++--
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memfd.h b/include/linux/memfd.h
index c328a7b356d0..b4fda09dab9f 100644
--- a/include/linux/memfd.h
+++ b/include/linux/memfd.h
@@ -18,6 +18,8 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx);
  */
 int memfd_check_seals_mmap(struct file *file, vm_flags_t *vm_flags_ptr);
 struct file *memfd_alloc_file(const char *name, unsigned int flags);
+int memfd_get_seals(struct file *file);
+int memfd_add_seals(struct file *file, unsigned int seals);
 #else
 static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a)
 {
@@ -37,6 +39,16 @@ static inline struct file *memfd_alloc_file(const char *name, unsigned int flags
 {
 	return ERR_PTR(-EINVAL);
 }
+
+static inline int memfd_get_seals(struct file *file)
+{
+	return -EINVAL;
+}
+
+static inline int memfd_add_seals(struct file *file, unsigned int seals)
+{
+	return -EINVAL;
+}
 #endif
 
 #endif /* __LINUX_MEMFD_H */
diff --git a/mm/memfd.c b/mm/memfd.c
index 919c2a53eb96..fb425f4e315f 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -227,7 +227,7 @@ static unsigned int *memfd_file_seals_ptr(struct file *file)
 		     F_SEAL_WRITE | \
 		     F_SEAL_FUTURE_WRITE)
 
-static int memfd_add_seals(struct file *file, unsigned int seals)
+int memfd_add_seals(struct file *file, unsigned int seals)
 {
 	struct inode *inode = file_inode(file);
 	unsigned int *file_seals;
@@ -309,7 +309,7 @@ unlock:
 	return error;
 }
 
-static int memfd_get_seals(struct file *file)
+int memfd_get_seals(struct file *file)
 {
 	unsigned int *seals = memfd_file_seals_ptr(file);
 
-- 
cgit v1.2.3


From 8a552d68a86ef0e6fb2ff4af13031a5e82c0f1d0 Mon Sep 17 00:00:00 2001
From: "Pratyush Yadav (Google)" <pratyush@kernel.org>
Date: Mon, 16 Feb 2026 19:59:33 +0100
Subject: mm: memfd_luo: preserve file seals

File seals are used on memfd for making shared memory communication with
untrusted peers safer and simpler.  Seals provide a guarantee that certain
operations won't be allowed on the file such as writes or truncations.
Maintaining these guarantees across a live update will help keeping such
use cases secure.

These guarantees will also be needed for IOMMUFD preservation with LUO.
Normally when IOMMUFD maps a memfd, it pins all its pages to make sure any
truncation operations on the memfd don't lead to IOMMUFD using freed
memory.  This doesn't work with LUO since the preserved memfd might have
completely different pages after a live update, and mapping them back to
the IOMMUFD will cause all sorts of problems.  Using and preserving the
seals allows IOMMUFD preservation logic to trust the memfd.

Since the uABI defines seals as an int, preserve them by introducing a new
u32 field.  There are currently only 6 possible seals, so the extra bits
are unused and provide room for future expansion.  Since the seals are
uABI, it is safe to use them directly in the ABI.  While at it, also add a
u32 flags field.  It makes sure the struct is nicely aligned, and can be
used later to support things like MFD_CLOEXEC.

Since the serialization structure is changed, bump the version number to
"memfd-v2".

It is important to note that the memfd-v2 version only supports seals that
existed when this version was defined.  This set is defined by
MEMFD_LUO_ALL_SEALS.  Any new seal might bring a completely different
semantic with it and the parser for memfd-v2 cannot be expected to deal
with that.  If there are any future seals added, they will need another
version bump.

Link: https://lkml.kernel.org/r/20260216185946.1215770-3-pratyush@kernel.org
Signed-off-by: Pratyush Yadav (Google) <pratyush@kernel.org>
Tested-by: Samiullah Khawaja <skhawaja@google.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kho/abi/memfd.h | 18 +++++++++++++++++-
 mm/memfd_luo.c                | 35 +++++++++++++++++++++++++++++++++--
 2 files changed, 50 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kho/abi/memfd.h b/include/linux/kho/abi/memfd.h
index 68cb6303b846..08b10fea2afc 100644
--- a/include/linux/kho/abi/memfd.h
+++ b/include/linux/kho/abi/memfd.h
@@ -56,10 +56,24 @@ struct memfd_luo_folio_ser {
 	u64 index;
 } __packed;
 
+/*
+ * The set of seals this version supports preserving. If support for any new
+ * seals is needed, add it here and bump version.
+ */
+#define MEMFD_LUO_ALL_SEALS (F_SEAL_SEAL | \
+			     F_SEAL_SHRINK | \
+			     F_SEAL_GROW | \
+			     F_SEAL_WRITE | \
+			     F_SEAL_FUTURE_WRITE | \
+			     F_SEAL_EXEC)
+
 /**
  * struct memfd_luo_ser - Main serialization structure for a memfd.
  * @pos:       The file's current position (f_pos).
  * @size:      The total size of the file in bytes (i_size).
+ * @seals:     The seals present on the memfd. The seals are uABI so it is safe
+ *             to directly use them in the ABI.
+ * @flags:     Flags for the file. Unused flag bits must be set to 0.
  * @nr_folios: Number of folios in the folios array.
  * @folios:    KHO vmalloc descriptor pointing to the array of
  *             struct memfd_luo_folio_ser.
@@ -67,11 +81,13 @@ struct memfd_luo_folio_ser {
 struct memfd_luo_ser {
 	u64 pos;
 	u64 size;
+	u32 seals;
+	u32 flags;
 	u64 nr_folios;
 	struct kho_vmalloc folios;
 } __packed;
 
 /* The compatibility string for memfd file handler */
-#define MEMFD_LUO_FH_COMPATIBLE	"memfd-v1"
+#define MEMFD_LUO_FH_COMPATIBLE	"memfd-v2"
 
 #endif /* _LINUX_KHO_ABI_MEMFD_H */
diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c
index b8edb9f981d7..bc7f4f045edf 100644
--- a/mm/memfd_luo.c
+++ b/mm/memfd_luo.c
@@ -79,6 +79,8 @@
 #include <linux/shmem_fs.h>
 #include <linux/vmalloc.h>
 #include <linux/memfd.h>
+#include <uapi/linux/memfd.h>
+
 #include "internal.h"
 
 static int memfd_luo_preserve_folios(struct file *file,
@@ -259,7 +261,7 @@ static int memfd_luo_preserve(struct liveupdate_file_op_args *args)
 	struct memfd_luo_folio_ser *folios_ser;
 	struct memfd_luo_ser *ser;
 	u64 nr_folios;
-	int err = 0;
+	int err = 0, seals;
 
 	inode_lock(inode);
 	shmem_freeze(inode, true);
@@ -271,8 +273,21 @@ static int memfd_luo_preserve(struct liveupdate_file_op_args *args)
 		goto err_unlock;
 	}
 
+	seals = memfd_get_seals(args->file);
+	if (seals < 0) {
+		err = seals;
+		goto err_free_ser;
+	}
+
+	/* Make sure the file only has the seals supported by this version. */
+	if (seals & ~MEMFD_LUO_ALL_SEALS) {
+		err = -EOPNOTSUPP;
+		goto err_free_ser;
+	}
+
 	ser->pos = args->file->f_pos;
 	ser->size = i_size_read(inode);
+	ser->seals = seals;
 
 	err = memfd_luo_preserve_folios(args->file, &ser->folios,
 					&folios_ser, &nr_folios);
@@ -486,13 +501,29 @@ static int memfd_luo_retrieve(struct liveupdate_file_op_args *args)
 	if (!ser)
 		return -EINVAL;
 
-	file = memfd_alloc_file("", 0);
+	/* Make sure the file only has seals supported by this version. */
+	if (ser->seals & ~MEMFD_LUO_ALL_SEALS) {
+		err = -EOPNOTSUPP;
+		goto free_ser;
+	}
+
+	/*
+	 * The seals are preserved. Allow sealing here so they can be added
+	 * later.
+	 */
+	file = memfd_alloc_file("", MFD_ALLOW_SEALING);
 	if (IS_ERR(file)) {
 		pr_err("failed to setup file: %pe\n", file);
 		err = PTR_ERR(file);
 		goto free_ser;
 	}
 
+	err = memfd_add_seals(file, ser->seals);
+	if (err) {
+		pr_err("failed to add seals: %pe\n", ERR_PTR(err));
+		goto put_file;
+	}
+
 	vfs_setpos(file, ser->pos, MAX_LFS_FILESIZE);
 	file->f_inode->i_size = ser->size;
 
-- 
cgit v1.2.3


From c9cb94c6b85a2854ae03c874331b0880ee735441 Mon Sep 17 00:00:00 2001
From: Asier Gutierrez <gutierrez.asier@huawei-partners.com>
Date: Fri, 13 Feb 2026 14:50:32 +0000
Subject: mm/damon: remove unused target param of get_scheme_score()

damon_target is not used by get_scheme_score operations, nor with virtual
neither with physical addresses.

Link: https://lkml.kernel.org/r/20260213145032.1740407-1-gutierrez.asier@huawei-partners.com
Signed-off-by: Asier Gutierrez <gutierrez.asier@huawei-partners.com>
Reviewed-by: SeongJae Park <sj@kernel.org>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Quanmin Yan <yanquanmin1@huawei.com>
Cc: ze zuo <zuoze1@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h |  3 +--
 mm/damon/core.c       | 10 +++++-----
 mm/damon/paddr.c      |  3 +--
 mm/damon/vaddr.c      |  3 +--
 4 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index be3d198043ff..60e6da3012fa 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -647,8 +647,7 @@ struct damon_operations {
 	void (*prepare_access_checks)(struct damon_ctx *context);
 	unsigned int (*check_accesses)(struct damon_ctx *context);
 	int (*get_scheme_score)(struct damon_ctx *context,
-			struct damon_target *t, struct damon_region *r,
-			struct damos *scheme);
+			struct damon_region *r, struct damos *scheme);
 	unsigned long (*apply_scheme)(struct damon_ctx *context,
 			struct damon_target *t, struct damon_region *r,
 			struct damos *scheme, unsigned long *sz_filter_passed);
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 3e1890d64d06..0e5ada441b05 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1689,15 +1689,15 @@ static bool __damos_valid_target(struct damon_region *r, struct damos *s)
 		r->age <= s->pattern.max_age_region;
 }
 
-static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
-		struct damon_region *r, struct damos *s)
+static bool damos_valid_target(struct damon_ctx *c, struct damon_region *r,
+		struct damos *s)
 {
 	bool ret = __damos_valid_target(r, s);
 
 	if (!ret || !s->quota.esz || !c->ops.get_scheme_score)
 		return ret;
 
-	return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score;
+	return c->ops.get_scheme_score(c, r, s) >= s->quota.min_score;
 }
 
 /*
@@ -2021,7 +2021,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
 				s->max_nr_snapshots <= s->stat.nr_snapshots)
 			continue;
 
-		if (damos_valid_target(c, t, r, s))
+		if (damos_valid_target(c, r, s))
 			damos_apply_scheme(c, t, r, s);
 
 		if (damon_is_last_region(r, t))
@@ -2319,7 +2319,7 @@ static void damos_adjust_quota(struct damon_ctx *c, struct damos *s)
 		damon_for_each_region(r, t) {
 			if (!__damos_valid_target(r, s))
 				continue;
-			score = c->ops.get_scheme_score(c, t, r, s);
+			score = c->ops.get_scheme_score(c, r, s);
 			c->regions_score_histogram[score] +=
 				damon_sz_region(r);
 			if (score > max_score)
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 9bfe48826840..5cdcc5037cbc 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -343,8 +343,7 @@ static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx,
 }
 
 static int damon_pa_scheme_score(struct damon_ctx *context,
-		struct damon_target *t, struct damon_region *r,
-		struct damos *scheme)
+		struct damon_region *r, struct damos *scheme)
 {
 	switch (scheme->action) {
 	case DAMOS_PAGEOUT:
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 729b7ffd3565..4d6d8251d419 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -985,8 +985,7 @@ static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
 }
 
 static int damon_va_scheme_score(struct damon_ctx *context,
-		struct damon_target *t, struct damon_region *r,
-		struct damos *scheme)
+		struct damon_region *r, struct damos *scheme)
 {
 
 	switch (scheme->action) {
-- 
cgit v1.2.3


From 5ad41a38c36474ff59545cb514801d90719555de Mon Sep 17 00:00:00 2001
From: Jiayuan Chen <jiayuan.chen@shopee.com>
Date: Fri, 13 Feb 2026 15:18:22 +0800
Subject: mm: zswap: add per-memcg stat for incompressible pages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "mm: zswap: add per-memcg stat for incompressible pages", v3.

In containerized environments, knowing which cgroup is contributing
incompressible pages to zswap is essential for effective resource
management.  This series adds a new per-memcg stat 'zswap_incomp' to track
incompressible pages, along with a selftest.


This patch (of 2):

The global zswap_stored_incompressible_pages counter was added in commit
dca4437a5861 ("mm/zswap: store <PAGE_SIZE compression failed page as-is")
to track how many pages are stored in raw (uncompressed) form in zswap.
However, in containerized environments, knowing which cgroup is
contributing incompressible pages is essential for effective resource
management [1].

Add a new memcg stat 'zswap_incomp' to track incompressible pages per
cgroup.  This helps administrators and orchestrators to:

1. Identify workloads that produce incompressible data (e.g., encrypted
   data, already-compressed media, random data) and may not benefit from
   zswap.

2. Make informed decisions about workload placement - moving
   incompressible workloads to nodes with larger swap backing devices
   rather than relying on zswap.

3. Debug zswap efficiency issues at the cgroup level without needing to
   correlate global stats with individual cgroups.

While the compression ratio can be estimated from existing stats (zswap /
zswapped * PAGE_SIZE), this doesn't distinguish between "uniformly poor
compression" and "a few completely incompressible pages mixed with highly
compressible ones".  The zswap_incomp stat provides direct visibility into
the latter case.

Link: https://lkml.kernel.org/r/20260213071827.5688-1-jiayuan.chen@linux.dev
Link: https://lkml.kernel.org/r/20260213071827.5688-2-jiayuan.chen@linux.dev
Link: https://lore.kernel.org/linux-mm/CAF8kJuONDFj4NAksaR4j_WyDbNwNGYLmTe-o76rqU17La=nkOw@mail.gmail.com/ [1]
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Acked-by: Nhat Pham <nphamcs@gmail.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Reviewed-by: SeongJae Park <sj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/cgroup-v2.rst | 5 +++++
 include/linux/memcontrol.h              | 1 +
 mm/memcontrol.c                         | 6 ++++++
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 91beaa6798ce..8ad0b2781317 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1734,6 +1734,11 @@ The following nested keys are defined.
 	  zswpwb
 		Number of pages written from zswap to swap.
 
+	  zswap_incomp
+		Number of incompressible pages currently stored in zswap
+		without compression. These pages could not be compressed to
+		a size smaller than PAGE_SIZE, so they are stored as-is.
+
 	  thp_fault_alloc (npn)
 		Number of transparent hugepages which were allocated to satisfy
 		a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 70b685a85bf4..5695776f32c8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -39,6 +39,7 @@ enum memcg_stat_item {
 	MEMCG_KMEM,
 	MEMCG_ZSWAP_B,
 	MEMCG_ZSWAPPED,
+	MEMCG_ZSWAP_INCOMP,
 	MEMCG_NR_STAT,
 };
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 823ac6a05bf3..75df24ffdf25 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -356,6 +356,7 @@ static const unsigned int memcg_stat_items[] = {
 	MEMCG_KMEM,
 	MEMCG_ZSWAP_B,
 	MEMCG_ZSWAPPED,
+	MEMCG_ZSWAP_INCOMP,
 };
 
 #define NR_MEMCG_NODE_STAT_ITEMS ARRAY_SIZE(memcg_node_stat_items)
@@ -1368,6 +1369,7 @@ static const struct memory_stat memory_stats[] = {
 #ifdef CONFIG_ZSWAP
 	{ "zswap",			MEMCG_ZSWAP_B			},
 	{ "zswapped",			MEMCG_ZSWAPPED			},
+	{ "zswap_incomp",		MEMCG_ZSWAP_INCOMP		},
 #endif
 	{ "file_mapped",		NR_FILE_MAPPED			},
 	{ "file_dirty",			NR_FILE_DIRTY			},
@@ -5520,6 +5522,8 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
 	memcg = obj_cgroup_memcg(objcg);
 	mod_memcg_state(memcg, MEMCG_ZSWAP_B, size);
 	mod_memcg_state(memcg, MEMCG_ZSWAPPED, 1);
+	if (size == PAGE_SIZE)
+		mod_memcg_state(memcg, MEMCG_ZSWAP_INCOMP, 1);
 	rcu_read_unlock();
 }
 
@@ -5543,6 +5547,8 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
 	memcg = obj_cgroup_memcg(objcg);
 	mod_memcg_state(memcg, MEMCG_ZSWAP_B, -size);
 	mod_memcg_state(memcg, MEMCG_ZSWAPPED, -1);
+	if (size == PAGE_SIZE)
+		mod_memcg_state(memcg, MEMCG_ZSWAP_INCOMP, -1);
 	rcu_read_unlock();
 }
 
-- 
cgit v1.2.3


From c5c48345135ff04e039377020df23294d59aa59a Mon Sep 17 00:00:00 2001
From: Gregory Price <gourry@gourry.net>
Date: Wed, 11 Feb 2026 16:54:47 -0500
Subject: mm: name the anonymous MMOP enum as enum mmop

Give the MMOP enum (MMOP_OFFLINE, MMOP_ONLINE, etc) a proper type name so
the compiler can help catch invalid values being assigned to variables of
this type.

Leave the existing functions returning int alone to allow for
value-or-error pattern to remain unchanged without churn.

mmop_default_online_type is left as int because it uses the -1 sentinal
value to signal it hasn't been initialized yet.

Keep the uint8_t buffer in offline_and_remove_memory() as-is for space
efficiency, with an explicit cast when we consume the value.

Move the enum definition before the CONFIG_MEMORY_HOTPLUG guard so it is
unconditionally available for struct memory_block in memory.h.

No functional change.

Link: https://lore.kernel.org/linux-mm/3424eba7-523b-4351-abd0-3a888a3e5e61@kernel.org/
Link: https://lkml.kernel.org/r/20260211215447.2194189-1-gourry@gourry.net
Signed-off-by: Gregory Price <gourry@gourry.net>
Suggested-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Suggested-by: "David Hildenbrand (arm)" <david@kernel.org>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/base/memory.c          |  2 +-
 include/linux/memory.h         |  3 ++-
 include/linux/memory_hotplug.h | 16 ++++++++--------
 mm/memory_hotplug.c            | 10 +++++-----
 4 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index a3091924918b..5380050b16b7 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -452,7 +452,7 @@ static ssize_t phys_device_show(struct device *dev,
 static int print_allowed_zone(char *buf, int len, int nid,
 			      struct memory_group *group,
 			      unsigned long start_pfn, unsigned long nr_pages,
-			      int online_type, struct zone *default_zone)
+			      enum mmop online_type, struct zone *default_zone)
 {
 	struct zone *zone;
 
diff --git a/include/linux/memory.h b/include/linux/memory.h
index faeaa921e55b..5bb5599c6b2b 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -19,6 +19,7 @@
 #include <linux/node.h>
 #include <linux/compiler.h>
 #include <linux/mutex.h>
+#include <linux/memory_hotplug.h>
 
 #define MIN_MEMORY_BLOCK_SIZE     (1UL << SECTION_SIZE_BITS)
 
@@ -77,7 +78,7 @@ enum memory_block_state {
 struct memory_block {
 	unsigned long start_section_nr;
 	enum memory_block_state state;	/* serialized by the dev->lock */
-	int online_type;		/* for passing data to online routine */
+	enum mmop online_type;	/* for passing data to online routine */
 	int nid;			/* NID for this memory block */
 	/*
 	 * The single zone of this memory block if all PFNs of this memory block
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index f2f16cdd73ee..e77ef3d7ff73 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -16,11 +16,8 @@ struct resource;
 struct vmem_altmap;
 struct dev_pagemap;
 
-#ifdef CONFIG_MEMORY_HOTPLUG
-struct page *pfn_to_online_page(unsigned long pfn);
-
 /* Types for control the zone type of onlined and offlined memory */
-enum {
+enum mmop {
 	/* Offline the memory. */
 	MMOP_OFFLINE = 0,
 	/* Online the memory. Zone depends, see default_zone_for_pfn(). */
@@ -31,6 +28,9 @@ enum {
 	MMOP_ONLINE_MOVABLE,
 };
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+struct page *pfn_to_online_page(unsigned long pfn);
+
 /* Flags for add_memory() and friends to specify memory hotplug details. */
 typedef int __bitwise mhp_t;
 
@@ -286,8 +286,8 @@ static inline void __remove_memory(u64 start, u64 size) {}
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 /* Default online_type (MMOP_*) when new memory blocks are added. */
-extern int mhp_get_default_online_type(void);
-extern void mhp_set_default_online_type(int online_type);
+extern enum mmop mhp_get_default_online_type(void);
+extern void mhp_set_default_online_type(enum mmop online_type);
 extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat);
 extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
 extern int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
@@ -310,8 +310,8 @@ extern void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
 				  struct vmem_altmap *altmap);
 extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
 					  unsigned long pnum);
-extern struct zone *zone_for_pfn_range(int online_type, int nid,
-		struct memory_group *group, unsigned long start_pfn,
+extern struct zone *zone_for_pfn_range(enum mmop online_type,
+		int nid, struct memory_group *group, unsigned long start_pfn,
 		unsigned long nr_pages);
 extern int arch_create_linear_mapping(int nid, u64 start, u64 size,
 				      struct mhp_params *params);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index bc805029da51..a602310bdf33 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -221,7 +221,7 @@ void put_online_mems(void)
 bool movable_node_enabled = false;
 
 static int mhp_default_online_type = -1;
-int mhp_get_default_online_type(void)
+enum mmop mhp_get_default_online_type(void)
 {
 	if (mhp_default_online_type >= 0)
 		return mhp_default_online_type;
@@ -240,7 +240,7 @@ int mhp_get_default_online_type(void)
 	return mhp_default_online_type;
 }
 
-void mhp_set_default_online_type(int online_type)
+void mhp_set_default_online_type(enum mmop online_type)
 {
 	mhp_default_online_type = online_type;
 }
@@ -1046,7 +1046,7 @@ static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn
 	return movable_node_enabled ? movable_zone : kernel_zone;
 }
 
-struct zone *zone_for_pfn_range(int online_type, int nid,
+struct zone *zone_for_pfn_range(enum mmop online_type, int nid,
 		struct memory_group *group, unsigned long start_pfn,
 		unsigned long nr_pages)
 {
@@ -2305,7 +2305,7 @@ EXPORT_SYMBOL_GPL(remove_memory);
 
 static int try_offline_memory_block(struct memory_block *mem, void *arg)
 {
-	uint8_t online_type = MMOP_ONLINE_KERNEL;
+	enum mmop online_type = MMOP_ONLINE_KERNEL;
 	uint8_t **online_types = arg;
 	struct page *page;
 	int rc;
@@ -2338,7 +2338,7 @@ static int try_reonline_memory_block(struct memory_block *mem, void *arg)
 	int rc;
 
 	if (**online_types != MMOP_OFFLINE) {
-		mem->online_type = **online_types;
+		mem->online_type = (enum mmop)**online_types;
 		rc = device_online(&mem->dev);
 		if (rc < 0)
 			pr_warn("%s: Failed to re-online memory: %d",
-- 
cgit v1.2.3


From 652d12bc74a075f345f228f8945e05517a38874d Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 11 Feb 2026 12:31:38 +0200
Subject: mm: don't special case !MMU for is_zero_pfn() and my_zero_pfn()

Patch series "arch, mm: consolidate empty_zero_page", v3.

These patches cleanup handling of ZERO_PAGE() and zero_pfn.


This patch (of 4):

nommu architectures have empty_zero_page and define ZERO_PAGE() and
although they don't really use it to populate page tables, there is no
reason to hardwire !MMU implementation of is_zero_pfn() and my_zero_pfn()
to 0.

Drop #ifdef CONFIG_MMU around implementations of is_zero_pfn() and
my_zero_pfn() and remove !MMU version.

While on it, make zero_pfn __ro_after_init.

Link: https://lkml.kernel.org/r/20260211103141.3215197-1-rppt@kernel.org
Link: https://lkml.kernel.org/r/20260211103141.3215197-2-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Acked-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Magnus Lindholm <linmag7@gmail.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Cc: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 14 +-------------
 mm/memory.c             | 13 -------------
 mm/mm_init.c            | 10 ++++++++++
 3 files changed, 11 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index a50df42a893f..5e772599d9a5 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1917,7 +1917,6 @@ static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot)
 	pfnmap_setup_cachemode(pfn, PAGE_SIZE, prot);
 }
 
-#ifdef CONFIG_MMU
 #ifdef __HAVE_COLOR_ZERO_PAGE
 static inline int is_zero_pfn(unsigned long pfn)
 {
@@ -1940,18 +1939,7 @@ static inline unsigned long my_zero_pfn(unsigned long addr)
 	extern unsigned long zero_pfn;
 	return zero_pfn;
 }
-#endif
-#else
-static inline int is_zero_pfn(unsigned long pfn)
-{
-	return 0;
-}
-
-static inline unsigned long my_zero_pfn(unsigned long addr)
-{
-	return 0;
-}
-#endif /* CONFIG_MMU */
+#endif /* __HAVE_COLOR_ZERO_PAGE */
 
 #ifdef CONFIG_MMU
 
diff --git a/mm/memory.c b/mm/memory.c
index 7084c426f933..6b504fc5e815 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -162,21 +162,8 @@ static int __init disable_randmaps(char *s)
 }
 __setup("norandmaps", disable_randmaps);
 
-unsigned long zero_pfn __read_mostly;
-EXPORT_SYMBOL(zero_pfn);
-
 unsigned long highest_memmap_pfn __read_mostly;
 
-/*
- * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
- */
-static int __init init_zero_pfn(void)
-{
-	zero_pfn = page_to_pfn(ZERO_PAGE(0));
-	return 0;
-}
-early_initcall(init_zero_pfn);
-
 void mm_trace_rss_stat(struct mm_struct *mm, int member)
 {
 	trace_rss_stat(mm, member);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index df34797691bd..f3755a66b9d0 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -53,6 +53,9 @@ EXPORT_SYMBOL(mem_map);
 void *high_memory;
 EXPORT_SYMBOL(high_memory);
 
+unsigned long zero_pfn __ro_after_init;
+EXPORT_SYMBOL(zero_pfn);
+
 #ifdef CONFIG_DEBUG_MEMORY_INIT
 int __meminitdata mminit_loglevel;
 
@@ -2672,6 +2675,13 @@ static void __init mem_init_print_info(void)
 		);
 }
 
+static int __init init_zero_pfn(void)
+{
+	zero_pfn = page_to_pfn(ZERO_PAGE(0));
+	return 0;
+}
+early_initcall(init_zero_pfn);
+
 void __init __weak arch_mm_preinit(void)
 {
 }
-- 
cgit v1.2.3


From 9a1d0c738b45ea8da4e6897099c708e89f43daad Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 11 Feb 2026 12:31:39 +0200
Subject: mm: rename my_zero_pfn() to zero_pfn()

my_zero_pfn() is a silly name.

Rename zero_pfn variable to zero_page_pfn and my_zero_pfn() function to
zero_pfn().

While on it, move extern declarations of zero_page_pfn outside the
functions that use it and add a comment about what ZERO_PAGE is.

Link: https://lkml.kernel.org/r/20260211103141.3215197-3-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Magnus Lindholm <linmag7@gmail.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/kvm/mmu/spte.h |  2 +-
 fs/dax.c                |  2 +-
 fs/proc/vmcore.c        |  2 +-
 include/linux/pgtable.h | 28 ++++++++++++++++++++--------
 mm/huge_memory.c        |  2 +-
 mm/memory.c             |  2 +-
 mm/migrate.c            |  2 +-
 mm/mm_init.c            | 10 +++++-----
 mm/userfaultfd.c        |  4 ++--
 9 files changed, 33 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 91ce29fd6f1b..8c0ffa2cded6 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -248,7 +248,7 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
 
 static inline hpa_t kvm_mmu_get_dummy_root(void)
 {
-	return my_zero_pfn(0) << PAGE_SHIFT;
+	return zero_pfn(0) << PAGE_SHIFT;
 }
 
 static inline bool kvm_mmu_is_dummy_root(hpa_t shadow_page)
diff --git a/fs/dax.c b/fs/dax.c
index 289e6254aa30..b78cff9c91b3 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1360,7 +1360,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, struct vm_fault *vmf,
 {
 	struct inode *inode = iter->inode;
 	unsigned long vaddr = vmf->address;
-	unsigned long pfn = my_zero_pfn(vaddr);
+	unsigned long pfn = zero_pfn(vaddr);
 	vm_fault_t ret;
 
 	*entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, DAX_ZERO_PAGE);
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index f188bd900eb2..44d15436439f 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -525,7 +525,7 @@ static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
 {
 	unsigned long map_size;
 	unsigned long pos_start, pos_end, pos;
-	unsigned long zeropage_pfn = my_zero_pfn(0);
+	unsigned long zeropage_pfn = zero_pfn(0);
 	size_t len = 0;
 
 	pos_start = pfn;
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 5e772599d9a5..c3a56f6b1ea5 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1917,27 +1917,39 @@ static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot)
 	pfnmap_setup_cachemode(pfn, PAGE_SIZE, prot);
 }
 
+/*
+ * ZERO_PAGE() is global shared page(s) that is always zero. It is used for
+ * zero-mapped memory areas, CoW etc.
+ *
+ * On architectures that __HAVE_COLOR_ZERO_PAGE there are several such pages
+ * for different ranges in the virtual address space.
+ *
+ * zero_page_pfn identifies the first (or the only) pfn for these pages.
+ */
 #ifdef __HAVE_COLOR_ZERO_PAGE
 static inline int is_zero_pfn(unsigned long pfn)
 {
-	extern unsigned long zero_pfn;
-	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
+	extern unsigned long zero_page_pfn;
+	unsigned long offset_from_zero_pfn = pfn - zero_page_pfn;
+
 	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
 }
 
-#define my_zero_pfn(addr)	page_to_pfn(ZERO_PAGE(addr))
+#define zero_pfn(addr)	page_to_pfn(ZERO_PAGE(addr))
 
 #else
 static inline int is_zero_pfn(unsigned long pfn)
 {
-	extern unsigned long zero_pfn;
-	return pfn == zero_pfn;
+	extern unsigned long zero_page_pfn;
+
+	return pfn == zero_page_pfn;
 }
 
-static inline unsigned long my_zero_pfn(unsigned long addr)
+static inline unsigned long zero_pfn(unsigned long addr)
 {
-	extern unsigned long zero_pfn;
-	return zero_pfn;
+	extern unsigned long zero_page_pfn;
+
+	return zero_page_pfn;
 }
 #endif /* __HAVE_COLOR_ZERO_PAGE */
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b298cba853ab..a132fb98ed5d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2972,7 +2972,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
 	for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
 		pte_t entry;
 
-		entry = pfn_pte(my_zero_pfn(addr), vma->vm_page_prot);
+		entry = pfn_pte(zero_pfn(addr), vma->vm_page_prot);
 		entry = pte_mkspecial(entry);
 		if (pmd_uffd_wp(old_pmd))
 			entry = pte_mkuffd_wp(entry);
diff --git a/mm/memory.c b/mm/memory.c
index 6b504fc5e815..af26a697562b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5224,7 +5224,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	/* Use the zero-page for reads */
 	if (!(vmf->flags & FAULT_FLAG_WRITE) &&
 			!mm_forbids_zeropage(vma->vm_mm)) {
-		entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address),
+		entry = pte_mkspecial(pfn_pte(zero_pfn(vmf->address),
 						vma->vm_page_prot));
 		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
 				vmf->address, &vmf->ptl);
diff --git a/mm/migrate.c b/mm/migrate.c
index 2c3d489ecf51..6cc654858da6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -321,7 +321,7 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
 	if (!pages_identical(page, ZERO_PAGE(0)))
 		return false;
 
-	newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address),
+	newpte = pte_mkspecial(pfn_pte(zero_pfn(pvmw->address),
 					pvmw->vma->vm_page_prot));
 
 	if (pte_swp_soft_dirty(old_pte))
diff --git a/mm/mm_init.c b/mm/mm_init.c
index f3755a66b9d0..ab6578516dd6 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -53,8 +53,8 @@ EXPORT_SYMBOL(mem_map);
 void *high_memory;
 EXPORT_SYMBOL(high_memory);
 
-unsigned long zero_pfn __ro_after_init;
-EXPORT_SYMBOL(zero_pfn);
+unsigned long zero_page_pfn __ro_after_init;
+EXPORT_SYMBOL(zero_page_pfn);
 
 #ifdef CONFIG_DEBUG_MEMORY_INIT
 int __meminitdata mminit_loglevel;
@@ -2675,12 +2675,12 @@ static void __init mem_init_print_info(void)
 		);
 }
 
-static int __init init_zero_pfn(void)
+static int __init init_zero_page_pfn(void)
 {
-	zero_pfn = page_to_pfn(ZERO_PAGE(0));
+	zero_page_pfn = page_to_pfn(ZERO_PAGE(0));
 	return 0;
 }
-early_initcall(init_zero_pfn);
+early_initcall(init_zero_page_pfn);
 
 void __init __weak arch_mm_preinit(void)
 {
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 927086bb4a3c..e19872e51878 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -357,7 +357,7 @@ static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd,
 	if (mm_forbids_zeropage(dst_vma->vm_mm))
 		return mfill_atomic_pte_zeroed_folio(dst_pmd, dst_vma, dst_addr);
 
-	_dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
+	_dst_pte = pte_mkspecial(pfn_pte(zero_pfn(dst_addr),
 					 dst_vma->vm_page_prot));
 	ret = -EAGAIN;
 	dst_pte = pte_offset_map_lock(dst_vma->vm_mm, dst_pmd, dst_addr, &ptl);
@@ -1229,7 +1229,7 @@ static int move_zeropage_pte(struct mm_struct *mm,
 		return -EAGAIN;
 	}
 
-	zero_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
+	zero_pte = pte_mkspecial(pfn_pte(zero_pfn(dst_addr),
 					 dst_vma->vm_page_prot));
 	ptep_clear_flush(src_vma, src_addr, src_pte);
 	set_pte_at(mm, dst_addr, dst_pte, zero_pte);
-- 
cgit v1.2.3


From 6215d9f4470fbb48245ffdfade821685e2728c65 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 11 Feb 2026 12:31:40 +0200
Subject: arch, mm: consolidate empty_zero_page

Reduce 22 declarations of empty_zero_page to 3 and 23 declarations of
ZERO_PAGE() to 4.

Every architecture defines empty_zero_page that way or another, but for the
most of them it is always a page aligned page in BSS and most definitions
of ZERO_PAGE do virt_to_page(empty_zero_page).

Move Linus vetted x86 definition of empty_zero_page and ZERO_PAGE() to the
core MM and drop these definitions in architectures that do not implement
colored zero page (MIPS and s390).

ZERO_PAGE() remains a macro because turning it to a wrapper for a static
inline causes severe pain in header dependencies.

For the most part the change is mechanical, with these being noteworthy:

* alpha: aliased empty_zero_page with ZERO_PGE that was also used for boot
  parameters. Switching to a generic empty_zero_page removes the aliasing
  and keeps ZERO_PGE for boot parameters only
* arm64: uses __pa_symbol() in ZERO_PAGE() so that definition of
  ZERO_PAGE() is kept intact.
* m68k/parisc/um: allocated empty_zero_page from memblock,
  although they do not support zero page coloring and having it in BSS
  will work fine.
* sparc64 can have empty_zero_page in BSS rather allocate it, but it
  can't use virt_to_page() for BSS. Keep it's definition of ZERO_PAGE()
  but instead of allocating it, make mem_map_zero point to
  empty_zero_page.
* sh: used empty_zero_page for boot parameters at the very early boot.
  Rename the parameters page to boot_params_page and let sh use the generic
  empty_zero_page.
* hexagon: had an amusing comment about empty_zero_page

	/* A handy thing to have if one has the RAM. Declared in head.S */

  that unfortunately had to go :)

Link: https://lkml.kernel.org/r/20260211103141.3215197-4-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Helge Deller <deller@gmx.de>		[parisc]
Tested-by: Helge Deller <deller@gmx.de>		[parisc]
Reviewed-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Magnus Lindholm <linmag7@gmail.com>	[alpha]
Acked-by: Dinh Nguyen <dinguyen@kernel.org>	[nios2]
Acked-by: Andreas Larsson <andreas@gaisler.com>	[sparc]
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Acked-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/alpha/include/asm/pgtable.h          |  6 ------
 arch/arc/include/asm/pgtable.h            |  3 ---
 arch/arc/mm/init.c                        |  2 --
 arch/arm/include/asm/pgtable.h            |  9 ---------
 arch/arm/mm/mmu.c                         |  7 -------
 arch/arm/mm/nommu.c                       |  7 -------
 arch/arm64/include/asm/pgtable.h          |  1 -
 arch/arm64/mm/mmu.c                       |  7 -------
 arch/csky/include/asm/pgtable.h           |  3 ---
 arch/csky/mm/init.c                       |  3 ---
 arch/hexagon/include/asm/pgtable.h        |  6 ------
 arch/hexagon/kernel/head.S                |  5 -----
 arch/hexagon/kernel/hexagon_ksyms.c       |  1 -
 arch/loongarch/include/asm/pgtable.h      |  9 ---------
 arch/loongarch/mm/init.c                  |  3 ---
 arch/m68k/include/asm/pgtable_mm.h        |  9 ---------
 arch/m68k/include/asm/pgtable_no.h        |  7 -------
 arch/m68k/mm/init.c                       |  9 ---------
 arch/m68k/mm/mcfmmu.c                     |  2 --
 arch/m68k/mm/motorola.c                   |  6 ------
 arch/m68k/mm/sun3mmu.c                    |  2 --
 arch/microblaze/include/asm/pgtable.h     | 10 ----------
 arch/microblaze/kernel/head.S             |  4 ----
 arch/microblaze/kernel/microblaze_ksyms.c |  2 --
 arch/nios2/include/asm/pgtable.h          |  7 -------
 arch/nios2/kernel/head.S                  | 10 ----------
 arch/nios2/kernel/nios2_ksyms.c           |  1 -
 arch/openrisc/include/asm/pgtable.h       |  4 ----
 arch/openrisc/kernel/head.S               |  3 ---
 arch/openrisc/kernel/or32_ksyms.c         |  1 -
 arch/openrisc/mm/init.c                   |  3 ---
 arch/parisc/include/asm/pgtable.h         | 11 -----------
 arch/parisc/mm/init.c                     |  6 ------
 arch/powerpc/include/asm/pgtable.h        |  6 ------
 arch/powerpc/mm/mem.c                     |  3 ---
 arch/riscv/include/asm/pgtable.h          |  7 -------
 arch/riscv/mm/init.c                      |  4 ----
 arch/sh/include/asm/pgtable.h             |  8 --------
 arch/sh/include/asm/setup.h               |  3 ++-
 arch/sh/kernel/head_32.S                  |  4 ++--
 arch/sh/kernel/sh_ksyms_32.c              |  1 -
 arch/sh/mm/init.c                         |  1 -
 arch/sparc/include/asm/pgtable_32.h       |  8 --------
 arch/sparc/include/asm/setup.h            |  2 --
 arch/sparc/kernel/head_32.S               |  7 -------
 arch/sparc/mm/init_32.c                   |  4 ----
 arch/sparc/mm/init_64.c                   | 11 ++++-------
 arch/um/include/asm/pgtable.h             |  9 ---------
 arch/um/include/shared/kern_util.h        |  1 -
 arch/um/kernel/mem.c                      | 16 ----------------
 arch/um/kernel/um_arch.c                  |  1 -
 arch/x86/include/asm/pgtable.h            |  8 --------
 arch/x86/kernel/head_32.S                 |  4 ----
 arch/x86/kernel/head_64.S                 |  7 -------
 arch/xtensa/include/asm/pgtable.h         |  4 ----
 arch/xtensa/kernel/head.S                 |  3 ---
 arch/xtensa/kernel/xtensa_ksyms.c         |  2 --
 include/linux/pgtable.h                   | 10 ++++++++++
 mm/mm_init.c                              |  5 +++++
 59 files changed, 23 insertions(+), 285 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index e9368c54be45..268ddde33617 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -126,12 +126,6 @@ struct vm_area_struct;
  */
 #define pgprot_noncached(prot)	(prot)
 
-/*
- * ZERO_PAGE is a global shared page that is always zero:  used
- * for zero-mapped memory areas etc..
- */
-#define ZERO_PAGE(vaddr)	(virt_to_page(ZERO_PGE))
-
 /*
  * On certain platforms whose physical address space can overlap KSEG,
  * namely EV6 and above, we must re-twiddle the physaddr to restore the
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index bd580e2b62d7..0fdaea81b5fa 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -21,9 +21,6 @@
 
 #ifndef __ASSEMBLER__
 
-extern char empty_zero_page[PAGE_SIZE];
-#define ZERO_PAGE(vaddr)	(virt_to_page(empty_zero_page))
-
 extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
 
 /* to cope with aliasing VIPT cache */
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index a5e92f46e5d1..d6b5c27a0098 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -19,8 +19,6 @@
 #include <asm/arcregs.h>
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
-char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE);
-EXPORT_SYMBOL(empty_zero_page);
 
 static const unsigned long low_mem_start = CONFIG_LINUX_RAM_BASE;
 static unsigned long low_mem_sz;
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 6fa9acd6a7f5..982795cf4563 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -10,15 +10,6 @@
 #include <linux/const.h>
 #include <asm/proc-fns.h>
 
-#ifndef __ASSEMBLY__
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr)	(virt_to_page(empty_zero_page))
-#endif
-
 #include <asm-generic/pgtable-nopud.h>
 
 #ifndef CONFIG_MMU
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 518def8314e7..23b87b5ef7f1 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -41,13 +41,6 @@
 
 extern unsigned long __atags_pointer;
 
-/*
- * empty_zero_page is a special page that is used for
- * zero-initialized data and COW.
- */
-unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
-EXPORT_SYMBOL(empty_zero_page);
-
 /*
  * The pmd table for the upper-most set of pages.
  */
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 7e42d8accec6..040ea43cce32 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -27,13 +27,6 @@
 
 unsigned long vectors_base;
 
-/*
- * empty_zero_page is a special page that is used for
- * zero-initialized data and COW.
- */
-unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
-EXPORT_SYMBOL(empty_zero_page);
-
 #ifdef CONFIG_ARM_MPU
 struct mpu_rgn_info mpu_rgn_info;
 #endif
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b3e58735c49b..769570e43c18 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -110,7 +110,6 @@ static inline void arch_leave_lazy_mmu_mode(void)
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
  */
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #define ZERO_PAGE(vaddr)	phys_to_page(__pa_symbol(empty_zero_page))
 
 #define pte_ERROR(e)	\
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index a12ea8776c32..ec932f6ccddc 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -64,13 +64,6 @@ static bool rodata_is_rw __ro_after_init = true;
  */
 long __section(".mmuoff.data.write") __early_cpu_boot_status;
 
-/*
- * Empty_zero_page is a special page that is used for zero-initialized data
- * and COW.
- */
-unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
-EXPORT_SYMBOL(empty_zero_page);
-
 static DEFINE_SPINLOCK(swapper_pgdir_lock);
 static DEFINE_MUTEX(fixmap_lock);
 
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index d606afbabce1..bafcd5823531 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -76,9 +76,6 @@
 #define MAX_SWAPFILES_CHECK() \
 		BUILD_BUG_ON(MAX_SWAPFILES_SHIFT != 5)
 
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr)	(virt_to_page(empty_zero_page))
-
 extern void load_pgd(unsigned long pg_dir);
 extern pte_t invalid_pte_table[PTRS_PER_PTE];
 
diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c
index 573da66b2543..fa16015ea1c0 100644
--- a/arch/csky/mm/init.c
+++ b/arch/csky/mm/init.c
@@ -38,9 +38,6 @@ pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss;
 pte_t kernel_pte_tables[PTRS_KERN_TABLE] __page_aligned_bss;
 
 EXPORT_SYMBOL(invalid_pte_table);
-unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
-						__page_aligned_bss;
-EXPORT_SYMBOL(empty_zero_page);
 
 void free_initmem(void)
 {
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index fbf24d1d1ca6..27b269e2870d 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -14,9 +14,6 @@
 #include <asm/page.h>
 #include <asm-generic/pgtable-nopmd.h>
 
-/* A handy thing to have if one has the RAM. Declared in head.S */
-extern unsigned long empty_zero_page;
-
 /*
  * The PTE model described here is that of the Hexagon Virtual Machine,
  * which autonomously walks 2-level page tables.  At a lower level, we
@@ -348,9 +345,6 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 	return (unsigned long)__va(pmd_val(pmd) & PAGE_MASK);
 }
 
-/* ZERO_PAGE - returns the globally shared zero page */
-#define ZERO_PAGE(vaddr) (virt_to_page(&empty_zero_page))
-
 /*
  * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
  * are !pte_none() && !pte_present().
diff --git a/arch/hexagon/kernel/head.S b/arch/hexagon/kernel/head.S
index 0b016308cc79..908ffece9132 100644
--- a/arch/hexagon/kernel/head.S
+++ b/arch/hexagon/kernel/head.S
@@ -216,8 +216,3 @@ __head_s_vaddr_target:
 .p2align PAGE_SHIFT
 ENTRY(external_cmdline_buffer)
         .fill _PAGE_SIZE,1,0
-
-.data
-.p2align PAGE_SHIFT
-ENTRY(empty_zero_page)
-        .fill _PAGE_SIZE,1,0
diff --git a/arch/hexagon/kernel/hexagon_ksyms.c b/arch/hexagon/kernel/hexagon_ksyms.c
index 36a80e31d187..81bc6f81e200 100644
--- a/arch/hexagon/kernel/hexagon_ksyms.c
+++ b/arch/hexagon/kernel/hexagon_ksyms.c
@@ -17,7 +17,6 @@ EXPORT_SYMBOL(raw_copy_to_user);
 EXPORT_SYMBOL(__vmgetie);
 EXPORT_SYMBOL(__vmsetie);
 EXPORT_SYMBOL(__vmyield);
-EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
 
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index c33b3bcb733e..a244de27a03e 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -74,15 +74,6 @@
 struct mm_struct;
 struct vm_area_struct;
 
-/*
- * ZERO_PAGE is a global shared page that is always zero; used
- * for zero-mapped memory areas etc..
- */
-
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-
-#define ZERO_PAGE(vaddr)	virt_to_page(empty_zero_page)
-
 #ifdef CONFIG_32BIT
 
 #define VMALLOC_START	(vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE))
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index c331bf69d2ec..00f3822b6e47 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -36,9 +36,6 @@
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
-unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
-EXPORT_SYMBOL(empty_zero_page);
-
 void copy_user_highpage(struct page *to, struct page *from,
 	unsigned long vaddr, struct vm_area_struct *vma)
 {
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index bba64a9c49ac..7501ff030c63 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -110,15 +110,6 @@ extern unsigned long m68k_vmalloc_end;
 #define VMALLOC_END KMAP_START
 #endif
 
-/* zero page used for uninitialized stuff */
-extern void *empty_zero_page;
-
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-#define ZERO_PAGE(vaddr)	(virt_to_page(empty_zero_page))
-
 extern void kernel_set_cachemode(void *addr, unsigned long size, int cmode);
 
 /*
diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
index 1a86c15b9008..11751807a3f3 100644
--- a/arch/m68k/include/asm/pgtable_no.h
+++ b/arch/m68k/include/asm/pgtable_no.h
@@ -30,13 +30,6 @@
 
 #define swapper_pg_dir ((pgd_t *) 0)
 
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern void *empty_zero_page;
-#define ZERO_PAGE(vaddr)	(virt_to_page(empty_zero_page))
-
 /*
  * All 32bit addresses are effectively valid for vmalloc...
  * Sort of meaningless for non-VM targets.
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 53b71f786c27..3b88c0dd1616 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -33,13 +33,6 @@
 #include <asm/sections.h>
 #include <asm/tlb.h>
 
-/*
- * ZERO_PAGE is a special page that is used for zero-initialized
- * data and COW.
- */
-void *empty_zero_page;
-EXPORT_SYMBOL(empty_zero_page);
-
 void __init arch_zone_limits_init(unsigned long *max_zone_pfns)
 {
 	max_zone_pfns[ZONE_DMA] = PFN_DOWN(memblock_end_of_DRAM());
@@ -71,8 +64,6 @@ void __init paging_init(void)
 	unsigned long end_mem = memory_end & PAGE_MASK;
 
 	high_memory = (void *) end_mem;
-
-	empty_zero_page = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
 }
 
 #endif /* CONFIG_MMU */
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 3418fd864237..4924f2ff8ef8 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -41,8 +41,6 @@ void __init paging_init(void)
 	unsigned long next_pgtable;
 	int i;
 
-	empty_zero_page = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
-
 	pg_dir = swapper_pg_dir;
 	memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
 
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 127a3fa69f4c..b30aa69a73a6 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -498,12 +498,6 @@ void __init paging_init(void)
 
 	early_memtest(min_addr, max_addr);
 
-	/*
-	 * initialize the bad page table and bad page to point
-	 * to a couple of allocated pages
-	 */
-	empty_zero_page = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
-
 	/*
 	 * Set up SFC/DFC registers
 	 */
diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c
index c801677f7df8..f139cc15753a 100644
--- a/arch/m68k/mm/sun3mmu.c
+++ b/arch/m68k/mm/sun3mmu.c
@@ -43,8 +43,6 @@ void __init paging_init(void)
 	unsigned long bootmem_end;
 	unsigned long size;
 
-	empty_zero_page = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
-
 	address = PAGE_OFFSET;
 	pg_dir = swapper_pg_dir;
 	memset (swapper_pg_dir, 0, sizeof (swapper_pg_dir));
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 4eb76de6be4a..ea72291de553 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -207,16 +207,6 @@ extern pte_t *va_to_pte(unsigned long address);
  * Also, write permissions imply read permissions.
  */
 
-#ifndef __ASSEMBLER__
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern unsigned long empty_zero_page[1024];
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-
-#endif /* __ASSEMBLER__ */
-
 #define pte_none(pte)		((pte_val(pte) & ~_PTE_NONE_MASK) == 0)
 #define pte_present(pte)	(pte_val(pte) & _PAGE_PRESENT)
 #define pte_clear(mm, addr, ptep) \
diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S
index ec2fcb545e64..808019c3b7ac 100644
--- a/arch/microblaze/kernel/head.S
+++ b/arch/microblaze/kernel/head.S
@@ -39,10 +39,6 @@
 #include <asm/processor.h>
 
 .section .data
-.global empty_zero_page
-.align 12
-empty_zero_page:
-	.space	PAGE_SIZE
 .global swapper_pg_dir
 swapper_pg_dir:
 	.space	PAGE_SIZE
diff --git a/arch/microblaze/kernel/microblaze_ksyms.c b/arch/microblaze/kernel/microblaze_ksyms.c
index a8553f54152b..ad7596d7ba07 100644
--- a/arch/microblaze/kernel/microblaze_ksyms.c
+++ b/arch/microblaze/kernel/microblaze_ksyms.c
@@ -33,8 +33,6 @@ EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memmove);
 #endif
 
-EXPORT_SYMBOL(empty_zero_page);
-
 EXPORT_SYMBOL(mbc);
 
 extern void __divsi3(void);
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index 844dce55569f..d389aa9ca57c 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -65,13 +65,6 @@ struct mm_struct;
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr)	(virt_to_page(empty_zero_page))
-
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 extern pte_t invalid_pte_table[PAGE_SIZE/sizeof(pte_t)];
 
diff --git a/arch/nios2/kernel/head.S b/arch/nios2/kernel/head.S
index 372ce4a33018..613212e1a63a 100644
--- a/arch/nios2/kernel/head.S
+++ b/arch/nios2/kernel/head.S
@@ -23,16 +23,6 @@
 #include <asm/asm-offsets.h>
 #include <asm/asm-macros.h>
 
-/*
- * ZERO_PAGE is a special page that is used for zero-initialized
- * data and COW.
- */
-.data
-.global empty_zero_page
-.align 12
-empty_zero_page:
-	.space	PAGE_SIZE
-
 /*
  * This global variable is used as an extension to the nios'
  * STATUS register to emulate a user/supervisor mode.
diff --git a/arch/nios2/kernel/nios2_ksyms.c b/arch/nios2/kernel/nios2_ksyms.c
index 54f7b23df1bf..c40aa39e8658 100644
--- a/arch/nios2/kernel/nios2_ksyms.c
+++ b/arch/nios2/kernel/nios2_ksyms.c
@@ -20,7 +20,6 @@ EXPORT_SYMBOL(memmove);
 
 /* memory management */
 
-EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(flush_icache_range);
 
 /*
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index b218050e2f6d..6b89996d0b62 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -179,10 +179,6 @@ extern void paging_init(void);
 	__pgprot(_PAGE_ALL | _PAGE_SRE | _PAGE_SWE \
 		 | _PAGE_SHARED | _PAGE_DIRTY | _PAGE_EXEC | _PAGE_CI)
 
-/* zero page used for uninitialized stuff */
-extern unsigned long empty_zero_page[2048];
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-
 #define pte_none(x)	(!pte_val(x))
 #define pte_present(x)	(pte_val(x) & _PAGE_PRESENT)
 #define pte_clear(mm, addr, xp)	do { pte_val(*(xp)) = 0; } while (0)
diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S
index bd760066f1cd..45890393947d 100644
--- a/arch/openrisc/kernel/head.S
+++ b/arch/openrisc/kernel/head.S
@@ -1563,9 +1563,6 @@ _string_nl:
  */
 	.section .data,"aw"
 	.align	8192
-	.global  empty_zero_page
-empty_zero_page:
-	.space  8192
 
 	.global  swapper_pg_dir
 swapper_pg_dir:
diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c
index 212e5f85004c..84a937a64e2a 100644
--- a/arch/openrisc/kernel/or32_ksyms.c
+++ b/arch/openrisc/kernel/or32_ksyms.c
@@ -40,7 +40,6 @@ DECLARE_EXPORT(__ashldi3);
 DECLARE_EXPORT(__lshrdi3);
 DECLARE_EXPORT(__ucmpdi2);
 
-EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(__copy_tofrom_user);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(memset);
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index 78fb0734cdbc..89d8c6df8855 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -188,9 +188,6 @@ void __init mem_init(void)
 {
 	BUG_ON(!mem_map);
 
-	/* clear the zero-page */
-	memset((void *)empty_zero_page, 0, PAGE_SIZE);
-
 	printk("mem_init_done ...........................................\n");
 	mem_init_done = 1;
 	return;
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 17afe7a59edf..f6fb99cb94d9 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -262,17 +262,6 @@ extern pgd_t swapper_pg_dir[]; /* declared in init_task.c */
 
 extern pte_t pg0[];
 
-/* zero page used for uninitialized stuff */
-
-extern unsigned long *empty_zero_page;
-
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-
 #define pte_none(x)     (pte_val(x) == 0)
 #define pte_present(x)	(pte_val(x) & _PAGE_PRESENT)
 #define pte_user(x)	(pte_val(x) & _PAGE_USER)
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 6a39e031e5ff..be3380c9bcda 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -604,9 +604,6 @@ void __init mem_init(void)
 #endif
 }
 
-unsigned long *empty_zero_page __ro_after_init;
-EXPORT_SYMBOL(empty_zero_page);
-
 /*
  * pagetable_init() sets up the page tables
  *
@@ -639,9 +636,6 @@ static void __init pagetable_init(void)
 			  initrd_end - initrd_start, PAGE_KERNEL, 0);
 	}
 #endif
-
-	empty_zero_page = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
-
 }
 
 static void __init gateway_init(void)
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index dcd3a88caaf6..b27d94c06d0e 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -76,12 +76,6 @@ static inline const void *pmd_page_vaddr(pmd_t pmd)
 }
 #define pmd_page_vaddr pmd_page_vaddr
 #endif
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern unsigned long empty_zero_page[];
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
 
 extern pgd_t swapper_pg_dir[];
 
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index b7982d0243d4..648d0c5602ec 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -38,9 +38,6 @@
 
 unsigned long long memory_limit __initdata;
 
-unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
-EXPORT_SYMBOL(empty_zero_page);
-
 pgprot_t __phys_mem_access_prot(unsigned long pfn, unsigned long size,
 				pgprot_t vma_prot)
 {
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 08d1ca047104..ab4ce1cc9d9c 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -1284,13 +1284,6 @@ extern u64 satp_mode;
 void paging_init(void);
 void misc_mem_init(void);
 
-/*
- * ZERO_PAGE is a global shared page that is always zero,
- * used for zero-mapped memory areas, etc.
- */
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-
 /*
  * Use set_p*_safe(), and elide TLB flushing, when confident that *no*
  * TLB flush will be required as a result of the "set". For example, use
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 811e03786c56..017bad735d47 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -69,10 +69,6 @@ unsigned long vmemmap_start_pfn __ro_after_init;
 EXPORT_SYMBOL(vmemmap_start_pfn);
 #endif
 
-unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
-							__page_aligned_bss;
-EXPORT_SYMBOL(empty_zero_page);
-
 extern char _start[];
 void *_dtb_early_va __initdata;
 uintptr_t _dtb_early_pa __initdata;
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index 10fa8f2bb8d1..d5ce0950a323 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -20,14 +20,6 @@
 #ifndef __ASSEMBLER__
 #include <asm/addrspace.h>
 #include <asm/fixmap.h>
-
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-
 #endif /* !__ASSEMBLER__ */
 
 /*
diff --git a/arch/sh/include/asm/setup.h b/arch/sh/include/asm/setup.h
index 84bb23a771f3..63c9efc06348 100644
--- a/arch/sh/include/asm/setup.h
+++ b/arch/sh/include/asm/setup.h
@@ -7,7 +7,8 @@
 /*
  * This is set up by the setup-routine at boot-time
  */
-#define PARAM	((unsigned char *)empty_zero_page)
+extern unsigned char *boot_params_page;
+#define PARAM boot_params_page
 
 #define MOUNT_ROOT_RDONLY (*(unsigned long *) (PARAM+0x000))
 #define RAMDISK_FLAGS (*(unsigned long *) (PARAM+0x004))
diff --git a/arch/sh/kernel/head_32.S b/arch/sh/kernel/head_32.S
index b603b7968b38..0b91bb85d40a 100644
--- a/arch/sh/kernel/head_32.S
+++ b/arch/sh/kernel/head_32.S
@@ -26,7 +26,7 @@
 #endif
 
 	.section	.empty_zero_page, "aw"
-ENTRY(empty_zero_page)
+ENTRY(boot_params_page)
 	.long	1		/* MOUNT_ROOT_RDONLY */
 	.long	0		/* RAMDISK_FLAGS */
 	.long	0x0200		/* ORIG_ROOT_DEV */
@@ -39,7 +39,7 @@ ENTRY(empty_zero_page)
 	.long	0x53453f00 + 29	/* "SE?" = 29 bit */
 #endif
 1:
-	.skip	PAGE_SIZE - empty_zero_page - 1b
+	.skip	PAGE_SIZE - boot_params_page - 1b
 
 	__HEAD
 
diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c
index 5858936cb431..041191002e2e 100644
--- a/arch/sh/kernel/sh_ksyms_32.c
+++ b/arch/sh/kernel/sh_ksyms_32.c
@@ -20,7 +20,6 @@ EXPORT_SYMBOL(csum_partial);
 EXPORT_SYMBOL(csum_partial_copy_generic);
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(__clear_user);
-EXPORT_SYMBOL(empty_zero_page);
 #ifdef CONFIG_FLATMEM
 /* need in pfn_valid macro */
 EXPORT_SYMBOL(min_low_pfn);
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 464a3a63e2fa..4e40d5e96be9 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -332,7 +332,6 @@ void __init mem_init(void)
 	cpu_cache_init();
 
 	/* clear the zero-page */
-	memset(empty_zero_page, 0, PAGE_SIZE);
 	__flush_wback_region(empty_zero_page, PAGE_SIZE);
 
 	vsyscall_init();
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index a9f802d1dd64..f89b1250661d 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -71,14 +71,6 @@ extern unsigned long ptr_in_current_pgd;
 extern unsigned long phys_base;
 extern unsigned long pfn_base;
 
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-
 /*
  * In general all page table modifications should use the V8 atomic
  * swap instruction.  This insures the mmu and the cpu are in sync
diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 72205684e51e..21bed5514028 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -17,8 +17,6 @@ extern char reboot_command[];
  */
 extern unsigned char boot_cpu_id;
 
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-
 extern int serial_console;
 static inline int con_is_present(void)
 {
diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
index 38345460d542..8c320fa25a67 100644
--- a/arch/sparc/kernel/head_32.S
+++ b/arch/sparc/kernel/head_32.S
@@ -57,13 +57,6 @@ sun4e_notsup:
 
 	.align PAGE_SIZE
 
-/* This was the only reasonable way I could think of to properly align
- * these page-table data structures.
- */
-	.globl empty_zero_page
-empty_zero_page:	.skip PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
-
 	.global root_flags
 	.global ram_flags
 	.global root_dev
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index fdc93dd12c3e..e0e66f91ceeb 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -246,10 +246,6 @@ void __init arch_mm_preinit(void)
 		prom_halt();
 	}
 
-
-	/* Saves us work later. */
-	memset((void *)empty_zero_page, 0, PAGE_SIZE);
-
 	i = last_valid_pfn >> ((20 - PAGE_SHIFT) + 5);
 	i += 1;
 	sparc_valid_addr_bitmap = (unsigned long *)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f46394c46a76..748790998ff5 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2492,6 +2492,9 @@ static void __init register_page_bootmem_info(void)
 }
 void __init mem_init(void)
 {
+	phys_addr_t zero_page_pa = kern_base +
+		((unsigned long)&empty_zero_page[0] - KERNBASE);
+
 	/*
 	 * Must be done after boot memory is put on freelist, because here we
 	 * might set fields in deferred struct pages that have not yet been
@@ -2504,13 +2507,7 @@ void __init mem_init(void)
 	 * Set up the zero page, mark it reserved, so that page count
 	 * is not manipulated when freeing the page from user ptes.
 	 */
-	mem_map_zero = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0);
-	if (mem_map_zero == NULL) {
-		prom_printf("paging_init: Cannot alloc zero page.\n");
-		prom_halt();
-	}
-	mark_page_reserved(mem_map_zero);
-
+	mem_map_zero = pfn_to_page(PHYS_PFN(zero_page_pa));
 
 	if (tlb_type == cheetah || tlb_type == cheetah_plus)
 		cheetah_ecache_flush_init();
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 3b42b0f45bf6..19e0608fb649 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -34,9 +34,6 @@
 
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
-/* zero page used for uninitialized stuff */
-extern unsigned long *empty_zero_page;
-
 /* Just any arbitrary offset to the start of the vmalloc VM area: the
  * current 8MB value just means that there will be a 8MB "hole" after the
  * physical memory until the kernel virtual memory starts.  That means that
@@ -74,12 +71,6 @@ extern unsigned long *empty_zero_page;
  * get..
  */
 
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page)
-
 #define pte_clear(mm, addr, xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEEDSYNC))
 
 #define pmd_none(x)	(!((unsigned long)pmd_val(x) & ~_PAGE_NEEDSYNC))
diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h
index 38321188c04c..9812efd14ec0 100644
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -38,7 +38,6 @@ extern void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs
 extern void uml_pm_wake(void);
 
 extern int start_uml(void);
-extern void paging_init(void);
 
 extern void uml_cleanup(void);
 extern void do_uml_exitcalls(void);
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 89c8c8b94a79..1eef0e42ef5d 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -44,10 +44,6 @@ __section(".kasan_init") __used
 = kasan_init;
 #endif
 
-/* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */
-unsigned long *empty_zero_page = NULL;
-EXPORT_SYMBOL(empty_zero_page);
-
 /*
  * Initialized during boot, and readonly for initializing page tables
  * afterwards
@@ -65,9 +61,6 @@ void __init arch_mm_preinit(void)
 	/* Safe to call after jump_label_init(). Enables KASAN. */
 	kasan_init_generic();
 
-	/* clear the zero-page */
-	memset(empty_zero_page, 0, PAGE_SIZE);
-
 	/* Map in the area just after the brk now that kmalloc is about
 	 * to be turned on.
 	 */
@@ -89,15 +82,6 @@ void __init arch_zone_limits_init(unsigned long *max_zone_pfns)
 	max_zone_pfns[ZONE_NORMAL] = high_physmem >> PAGE_SHIFT;
 }
 
-void __init paging_init(void)
-{
-	empty_zero_page = (unsigned long *) memblock_alloc_low(PAGE_SIZE,
-							       PAGE_SIZE);
-	if (!empty_zero_page)
-		panic("%s: Failed to allocate %lu bytes align=%lx\n",
-		      __func__, PAGE_SIZE, PAGE_SIZE);
-}
-
 /*
  * This can't do anything because nothing in the kernel image can be freed
  * since it's not in kernel physical memory.
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index e2b24e1ecfa6..2141f5f1f5a2 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -413,7 +413,6 @@ void __init setup_arch(char **cmdline_p)
 	uml_dtb_init();
 	read_initrd();
 
-	paging_init();
 	strscpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 	setup_hostinfo(host_info, sizeof host_info);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1662c5a8f445..54289f4587a4 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -47,14 +47,6 @@ void ptdump_walk_user_pgd_level_checkwx(void);
 #define debug_checkwx_user()	do { } while (0)
 #endif
 
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
-	__visible;
-#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
-
 extern spinlock_t pgd_lock;
 extern struct list_head pgd_list;
 
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 80ef5d386b03..5171cb746444 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -441,10 +441,6 @@ initial_pg_fixmap:
 swapper_pg_dir:
 	.fill 1024,4,0
 	.fill PTI_USER_PGD_FILL,4,0
-.globl empty_zero_page
-empty_zero_page:
-	.fill 4096,1,0
-EXPORT_SYMBOL(empty_zero_page)
 
 /*
  * This starts the data section.
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 85d4a5094f6b..7ed5520dd52e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -684,10 +684,3 @@ SYM_PIC_ALIAS(phys_base);
 EXPORT_SYMBOL(phys_base)
 
 #include "../xen/xen-head.S"
-
-	__PAGE_ALIGNED_BSS
-SYM_DATA_START_PAGE_ALIGNED(empty_zero_page)
-	.skip PAGE_SIZE
-SYM_DATA_END(empty_zero_page)
-EXPORT_SYMBOL(empty_zero_page)
-
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 50a136213b2b..61f07d981a94 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -209,10 +209,6 @@
 #define pgd_ERROR(e) \
 	printk("%s:%d: bad pgd entry %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 
-extern unsigned long empty_zero_page[1024];
-
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-
 #ifdef CONFIG_MMU
 extern pgd_t swapper_pg_dir[PAGE_SIZE/sizeof(pgd_t)];
 extern void paging_init(void);
diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S
index 8484294bc623..4b0c5c5e685a 100644
--- a/arch/xtensa/kernel/head.S
+++ b/arch/xtensa/kernel/head.S
@@ -381,6 +381,3 @@ ENTRY(swapper_pg_dir)
 	.fill	PAGE_SIZE, 1, 0
 END(swapper_pg_dir)
 #endif
-ENTRY(empty_zero_page)
-	.fill	PAGE_SIZE, 1, 0
-END(empty_zero_page)
diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c
index 62d81e76e18e..ced335b4df5f 100644
--- a/arch/xtensa/kernel/xtensa_ksyms.c
+++ b/arch/xtensa/kernel/xtensa_ksyms.c
@@ -15,8 +15,6 @@
 #include <linux/module.h>
 #include <asm/pgtable.h>
 
-EXPORT_SYMBOL(empty_zero_page);
-
 unsigned int __sync_fetch_and_and_4(volatile void *p, unsigned int v)
 {
 	BUG();
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index c3a56f6b1ea5..2a05c3885f85 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1925,6 +1925,9 @@ static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot)
  * for different ranges in the virtual address space.
  *
  * zero_page_pfn identifies the first (or the only) pfn for these pages.
+ *
+ * For architectures that don't __HAVE_COLOR_ZERO_PAGE the zero page lives in
+ * empty_zero_page in BSS.
  */
 #ifdef __HAVE_COLOR_ZERO_PAGE
 static inline int is_zero_pfn(unsigned long pfn)
@@ -1951,6 +1954,13 @@ static inline unsigned long zero_pfn(unsigned long addr)
 
 	return zero_page_pfn;
 }
+
+extern uint8_t empty_zero_page[PAGE_SIZE];
+
+#ifndef ZERO_PAGE
+#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
+#endif
+
 #endif /* __HAVE_COLOR_ZERO_PAGE */
 
 #ifdef CONFIG_MMU
diff --git a/mm/mm_init.c b/mm/mm_init.c
index ab6578516dd6..a0472d496c91 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -56,6 +56,11 @@ EXPORT_SYMBOL(high_memory);
 unsigned long zero_page_pfn __ro_after_init;
 EXPORT_SYMBOL(zero_page_pfn);
 
+#ifndef __HAVE_COLOR_ZERO_PAGE
+uint8_t empty_zero_page[PAGE_SIZE] __page_aligned_bss;
+EXPORT_SYMBOL(empty_zero_page);
+#endif
+
 #ifdef CONFIG_DEBUG_MEMORY_INIT
 int __meminitdata mminit_loglevel;
 
-- 
cgit v1.2.3


From 26513781d1b3a1e8b4b576ed62751d604a69b374 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 11 Feb 2026 12:31:41 +0200
Subject: mm: cache struct page for empty_zero_page and return it from
 ZERO_PAGE()

For most architectures every invocation of ZERO_PAGE() does
virt_to_page(empty_zero_page).  But empty_zero_page is in BSS and it is
enough to get its struct page once at initialization time and then use it
whenever a zero page should be accessed.

Add yet another __zero_page variable that will be initialized as
virt_to_page(empty_zero_page) for most architectures in a weak
arch_setup_zero_pages() function.

For architectures that use colored zero pages (MIPS and s390) rename their
setup_zero_pages() to arch_setup_zero_pages() and make it global rather
than static.

For architectures that cannot use virt_to_page() for BSS (arm64 and
sparc64) add override of arch_setup_zero_pages().

Link: https://lkml.kernel.org/r/20260211103141.3215197-5-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Acked-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Magnus Lindholm <linmag7@gmail.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arm64/include/asm/pgtable.h    |  6 ------
 arch/arm64/mm/init.c                |  5 +++++
 arch/mips/mm/init.c                 | 11 +----------
 arch/s390/mm/init.c                 |  4 +---
 arch/sparc/include/asm/pgtable_64.h |  3 ---
 arch/sparc/mm/init_64.c             | 17 +++++++----------
 include/linux/pgtable.h             | 11 ++++++++---
 mm/mm_init.c                        | 21 +++++++++++++++++----
 8 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 769570e43c18..aa4b13da6371 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -106,12 +106,6 @@ static inline void arch_leave_lazy_mmu_mode(void)
 #define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp)	\
 	local_flush_tlb_page_nonotify(vma, address)
 
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-#define ZERO_PAGE(vaddr)	phys_to_page(__pa_symbol(empty_zero_page))
-
 #define pte_ERROR(e)	\
 	pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e))
 
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 96711b8578fd..417ec7efe569 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -328,6 +328,11 @@ void __init bootmem_init(void)
 	memblock_dump_all();
 }
 
+void __init arch_setup_zero_pages(void)
+{
+	__zero_page = phys_to_page(__pa_symbol(empty_zero_page));
+}
+
 void __init arch_mm_preinit(void)
 {
 	unsigned int flags = SWIOTLB_VERBOSE;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 4f6449ad02ca..55b25e85122a 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -56,10 +56,7 @@ unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL_GPL(empty_zero_page);
 EXPORT_SYMBOL(zero_page_mask);
 
-/*
- * Not static inline because used by IP27 special magic initialization code
- */
-static void __init setup_zero_pages(void)
+void __init arch_setup_zero_pages(void)
 {
 	unsigned int order;
 
@@ -450,7 +447,6 @@ void __init arch_mm_preinit(void)
 	BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT));
 
 	maar_init();
-	setup_zero_pages();	/* Setup zeroed pages.  */
 	highmem_init();
 
 #ifdef CONFIG_64BIT
@@ -461,11 +457,6 @@ void __init arch_mm_preinit(void)
 				0x80000000 - 4, KCORE_TEXT);
 #endif
 }
-#else  /* CONFIG_NUMA */
-void __init arch_mm_preinit(void)
-{
-	setup_zero_pages();	/* This comes from node 0 */
-}
 #endif /* !CONFIG_NUMA */
 
 void free_init_pages(const char *what, unsigned long begin, unsigned long end)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 3c20475cbee2..1f72efc2a579 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -69,7 +69,7 @@ unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(zero_page_mask);
 
-static void __init setup_zero_pages(void)
+void __init arch_setup_zero_pages(void)
 {
 	unsigned long total_pages = memblock_estimated_nr_free_pages();
 	unsigned int order;
@@ -159,8 +159,6 @@ void __init arch_mm_preinit(void)
 	cpumask_set_cpu(0, mm_cpumask(&init_mm));
 
 	pv_init();
-
-	setup_zero_pages();	/* Setup zeroed pages. */
 }
 
 unsigned long memory_block_size_bytes(void)
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 615f460c50af..74ede706fb32 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -210,9 +210,6 @@ extern unsigned long _PAGE_CACHE;
 extern unsigned long pg_iobits;
 extern unsigned long _PAGE_ALL_SZ_BITS;
 
-extern struct page *mem_map_zero;
-#define ZERO_PAGE(vaddr)	(mem_map_zero)
-
 /* PFNs are real physical page numbers.  However, mem_map only begins to record
  * per-page information starting at pfn_base.  This is to handle systems where
  * the first physical page in the machine is at some huge physical address,
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 748790998ff5..3aa47f2b6c6e 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -177,9 +177,6 @@ extern unsigned long sparc_ramdisk_image64;
 extern unsigned int sparc_ramdisk_image;
 extern unsigned int sparc_ramdisk_size;
 
-struct page *mem_map_zero __read_mostly;
-EXPORT_SYMBOL(mem_map_zero);
-
 unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly;
 
 unsigned long sparc64_kern_pri_context __read_mostly;
@@ -2490,11 +2487,17 @@ static void __init register_page_bootmem_info(void)
 			register_page_bootmem_info_node(NODE_DATA(i));
 #endif
 }
-void __init mem_init(void)
+
+void __init arch_setup_zero_pages(void)
 {
 	phys_addr_t zero_page_pa = kern_base +
 		((unsigned long)&empty_zero_page[0] - KERNBASE);
 
+	__zero_page = phys_to_page(zero_page_pa);
+}
+
+void __init mem_init(void)
+{
 	/*
 	 * Must be done after boot memory is put on freelist, because here we
 	 * might set fields in deferred struct pages that have not yet been
@@ -2503,12 +2506,6 @@ void __init mem_init(void)
 	 */
 	register_page_bootmem_info();
 
-	/*
-	 * Set up the zero page, mark it reserved, so that page count
-	 * is not manipulated when freeing the page from user ptes.
-	 */
-	mem_map_zero = pfn_to_page(PHYS_PFN(zero_page_pa));
-
 	if (tlb_type == cheetah || tlb_type == cheetah_plus)
 		cheetah_ecache_flush_init();
 }
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 2a05c3885f85..776993d4567b 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1929,6 +1929,8 @@ static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot)
  * For architectures that don't __HAVE_COLOR_ZERO_PAGE the zero page lives in
  * empty_zero_page in BSS.
  */
+void arch_setup_zero_pages(void);
+
 #ifdef __HAVE_COLOR_ZERO_PAGE
 static inline int is_zero_pfn(unsigned long pfn)
 {
@@ -1956,10 +1958,13 @@ static inline unsigned long zero_pfn(unsigned long addr)
 }
 
 extern uint8_t empty_zero_page[PAGE_SIZE];
+extern struct page *__zero_page;
 
-#ifndef ZERO_PAGE
-#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
-#endif
+static inline struct page *_zero_page(unsigned long addr)
+{
+	return __zero_page;
+}
+#define ZERO_PAGE(vaddr) _zero_page(vaddr)
 
 #endif /* __HAVE_COLOR_ZERO_PAGE */
 
diff --git a/mm/mm_init.c b/mm/mm_init.c
index a0472d496c91..f903747ca854 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -59,7 +59,10 @@ EXPORT_SYMBOL(zero_page_pfn);
 #ifndef __HAVE_COLOR_ZERO_PAGE
 uint8_t empty_zero_page[PAGE_SIZE] __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
-#endif
+
+struct page *__zero_page __ro_after_init;
+EXPORT_SYMBOL(__zero_page);
+#endif /* __HAVE_COLOR_ZERO_PAGE */
 
 #ifdef CONFIG_DEBUG_MEMORY_INIT
 int __meminitdata mminit_loglevel;
@@ -2680,12 +2683,21 @@ static void __init mem_init_print_info(void)
 		);
 }
 
-static int __init init_zero_page_pfn(void)
+#ifndef __HAVE_COLOR_ZERO_PAGE
+/*
+ * architectures that __HAVE_COLOR_ZERO_PAGE must define this function
+ */
+void __init __weak arch_setup_zero_pages(void)
+{
+	__zero_page = virt_to_page(empty_zero_page);
+}
+#endif
+
+static void __init init_zero_page_pfn(void)
 {
+	arch_setup_zero_pages();
 	zero_page_pfn = page_to_pfn(ZERO_PAGE(0));
-	return 0;
 }
-early_initcall(init_zero_page_pfn);
 
 void __init __weak arch_mm_preinit(void)
 {
@@ -2709,6 +2721,7 @@ void __init mm_core_init_early(void)
 void __init mm_core_init(void)
 {
 	arch_mm_preinit();
+	init_zero_page_pfn();
 
 	/* Initializations relying on SMP setup */
 	BUILD_BUG_ON(MAX_ZONELISTS > 2);
-- 
cgit v1.2.3


From 6cc153f90b7cf07db2b49469dfd79141b145036a Mon Sep 17 00:00:00 2001
From: Vernon Yang <yanglincheng@kylinos.cn>
Date: Sat, 21 Feb 2026 17:39:17 +0800
Subject: mm: add folio_test_lazyfree helper

Add folio_test_lazyfree() function to identify lazy-free folios to improve
code readability.

Link: https://lkml.kernel.org/r/20260221093918.1456187-4-vernon2gm@gmail.com
Signed-off-by: Vernon Yang <yanglincheng@kylinos.cn>
Acked-by: David Hildenbrand (Red Hat) <david@kernel.org>
Reviewed-by: Lance Yang <lance.yang@linux.dev>
Reviewed-by: Dev Jain <dev.jain@arm.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 5 +++++
 mm/rmap.c                  | 2 +-
 mm/vmscan.c                | 5 ++---
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f7a0e4af0c73..415e9f2ef616 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -724,6 +724,11 @@ static __always_inline bool folio_test_anon(const struct folio *folio)
 	return ((unsigned long)folio->mapping & FOLIO_MAPPING_ANON) != 0;
 }
 
+static __always_inline bool folio_test_lazyfree(const struct folio *folio)
+{
+	return folio_test_anon(folio) && !folio_test_swapbacked(folio);
+}
+
 static __always_inline bool PageAnonNotKsm(const struct page *page)
 {
 	unsigned long flags = (unsigned long)page_folio(page)->mapping;
diff --git a/mm/rmap.c b/mm/rmap.c
index 8f08090d7eb9..5fd22ba59d35 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2060,7 +2060,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 		}
 
 		if (!pvmw.pte) {
-			if (folio_test_anon(folio) && !folio_test_swapbacked(folio)) {
+			if (folio_test_lazyfree(folio)) {
 				if (unmap_huge_pmd_locked(vma, pvmw.address, pvmw.pmd, folio))
 					goto walk_done;
 				/*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 031c5c035a82..d531040a3593 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -963,8 +963,7 @@ static void folio_check_dirty_writeback(struct folio *folio,
 	 * They could be mistakenly treated as file lru. So further anon
 	 * test is needed.
 	 */
-	if (!folio_is_file_lru(folio) ||
-	    (folio_test_anon(folio) && !folio_test_swapbacked(folio))) {
+	if (!folio_is_file_lru(folio) || folio_test_lazyfree(folio)) {
 		*dirty = false;
 		*writeback = false;
 		return;
@@ -1508,7 +1507,7 @@ retry:
 			}
 		}
 
-		if (folio_test_anon(folio) && !folio_test_swapbacked(folio)) {
+		if (folio_test_lazyfree(folio)) {
 			/* follow __remove_mapping for reference */
 			if (!folio_ref_freeze(folio, 1))
 				goto keep_locked;
-- 
cgit v1.2.3


From 3f2ad90060f65d6f66414b8a67c569154bafec7b Mon Sep 17 00:00:00 2001
From: Jason Miu <jasonmiu@google.com>
Date: Thu, 5 Feb 2026 18:14:27 -0800
Subject: kho: adopt radix tree for preserved memory tracking

Patch series "Make KHO Stateless", v9.

This series transitions KHO from an xarray-based metadata tracking system
with serialization to a radix tree data structure that can be passed
directly to the next kernel.

The key motivations for this change are to:
- Eliminate the need for data serialization before kexec.
- Remove the KHO finalize state.
- Pass preservation metadata more directly to the next kernel via the FDT.

The new approach uses a radix tree to mark preserved pages.  A page's
physical address and its order are encoded into a single value.  The tree
is composed of multiple levels of page-sized tables, with leaf nodes being
bitmaps where each set bit represents a preserved page.  The physical
address of the radix tree's root is passed in the FDT, allowing the next
kernel to reconstruct the preserved memory map.

This series is broken down into the following patches:

1.  kho: Adopt radix tree for preserved memory tracking:
    Replaces the xarray-based tracker with the new radix tree
    implementation and increments the ABI version.

2.  kho: Remove finalize state and clients:
    Removes the now-obsolete kho_finalize() function and its usage
    from client code and debugfs.


This patch (of 2):

Introduce a radix tree implementation for tracking preserved memory pages
and switch the KHO memory tracking mechanism to use it.  This lays the
groundwork for a stateless KHO implementation that eliminates the need for
serialization and the associated "finalize" state.

This patch introduces the core radix tree data structures and constants to
the KHO ABI.  It adds the radix tree node and leaf structures, along with
documentation for the radix tree key encoding scheme that combines a
page's physical address and order.

To support broader use by other kernel subsystems, such as hugetlb
preservation, the core radix tree manipulation functions are exported as a
public API.

The xarray-based memory tracking is replaced with this new radix tree
implementation.  The core KHO preservation and unpreservation functions
are wired up to use the radix tree helpers.  On boot, the second kernel
restores the preserved memory map by walking the radix tree whose root
physical address is passed via the FDT.

The ABI `compatible` version is bumped to "kho-v2" to reflect the
structural changes in the preserved memory map and sub-FDT property names.
This includes renaming "fdt" to "preserved-data" to better reflect that
preserved state may use formats other than FDT.

[ran.xiaokai@zte.com.cn: fix child node parsing for debugfs in/sub_fdts]
  Link: https://lkml.kernel.org/r/20260309033530.244508-1-ranxiaokai627@163.com
Link: https://lkml.kernel.org/r/20260206021428.3386442-1-jasonmiu@google.com
Link: https://lkml.kernel.org/r/20260206021428.3386442-2-jasonmiu@google.com
Signed-off-by: Jason Miu <jasonmiu@google.com>
Signed-off-by: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Changyuan Lyu <changyuanl@google.com>
Cc: David Matlack <dmatlack@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Pratyush Yadav <pratyush@kernel.org>
Cc: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/core-api/kho/abi.rst         |   6 +
 Documentation/core-api/kho/index.rst       |  12 +
 include/linux/kho/abi/kexec_handover.h     | 144 ++++++-
 include/linux/kho_radix_tree.h             |  70 ++++
 kernel/liveupdate/kexec_handover.c         | 619 +++++++++++++++--------------
 kernel/liveupdate/kexec_handover_debugfs.c |   3 +-
 6 files changed, 547 insertions(+), 307 deletions(-)
 create mode 100644 include/linux/kho_radix_tree.h

(limited to 'include/linux')

diff --git a/Documentation/core-api/kho/abi.rst b/Documentation/core-api/kho/abi.rst
index 2e63be3486cf..799d743105a6 100644
--- a/Documentation/core-api/kho/abi.rst
+++ b/Documentation/core-api/kho/abi.rst
@@ -22,6 +22,12 @@ memblock preservation ABI
 .. kernel-doc:: include/linux/kho/abi/memblock.h
    :doc: memblock kexec handover ABI
 
+KHO persistent memory tracker ABI
+=================================
+
+.. kernel-doc:: include/linux/kho/abi/kexec_handover.h
+  :doc: KHO persistent memory tracker
+
 See Also
 ========
 
diff --git a/Documentation/core-api/kho/index.rst b/Documentation/core-api/kho/index.rst
index dcc6a36cc134..002bdf0beb2e 100644
--- a/Documentation/core-api/kho/index.rst
+++ b/Documentation/core-api/kho/index.rst
@@ -83,6 +83,18 @@ called serialization. When the FDT is generated, some properties
 of the system may become immutable because they are already written down
 in the FDT. That state is called the KHO finalization phase.
 
+Kexec Handover Radix Tree
+=========================
+
+.. kernel-doc:: include/linux/kho_radix_tree.h
+  :doc: Kexec Handover Radix Tree
+
+Public API
+==========
+
+.. kernel-doc:: kernel/liveupdate/kexec_handover.c
+  :export:
+
 See Also
 ========
 
diff --git a/include/linux/kho/abi/kexec_handover.h b/include/linux/kho/abi/kexec_handover.h
index 2201a0d2c159..6b7d8ef550f9 100644
--- a/include/linux/kho/abi/kexec_handover.h
+++ b/include/linux/kho/abi/kexec_handover.h
@@ -10,8 +10,13 @@
 #ifndef _LINUX_KHO_ABI_KEXEC_HANDOVER_H
 #define _LINUX_KHO_ABI_KEXEC_HANDOVER_H
 
+#include <linux/bits.h>
+#include <linux/log2.h>
+#include <linux/math.h>
 #include <linux/types.h>
 
+#include <asm/page.h>
+
 /**
  * DOC: Kexec Handover ABI
  *
@@ -29,32 +34,32 @@
  * compatibility is only guaranteed for kernels supporting the same ABI version.
  *
  * FDT Structure Overview:
- *   The FDT serves as a central registry for physical
- *   addresses of preserved data structures and sub-FDTs. The first kernel
- *   populates this FDT with references to memory regions and other FDTs that
- *   need to persist across the kexec transition. The subsequent kernel then
- *   parses this FDT to locate and restore the preserved data.::
+ *   The FDT serves as a central registry for physical addresses of preserved
+ *   data structures. The first kernel populates this FDT with references to
+ *   memory regions and other metadata that need to persist across the kexec
+ *   transition. The subsequent kernel then parses this FDT to locate and
+ *   restore the preserved data.::
  *
  *     / {
- *         compatible = "kho-v1";
+ *         compatible = "kho-v2";
  *
  *         preserved-memory-map = <0x...>;
  *
  *         <subnode-name-1> {
- *             fdt = <0x...>;
+ *             preserved-data = <0x...>;
  *         };
  *
  *         <subnode-name-2> {
- *             fdt = <0x...>;
+ *             preserved-data = <0x...>;
  *         };
  *               ... ...
  *         <subnode-name-N> {
- *             fdt = <0x...>;
+ *             preserved-data = <0x...>;
  *         };
  *     };
  *
  *   Root KHO Node (/):
- *     - compatible: "kho-v1"
+ *     - compatible: "kho-v2"
  *
  *       Indentifies the overall KHO ABI version.
  *
@@ -69,20 +74,20 @@
  *     is provided by the subsystem that uses KHO for preserving its
  *     data.
  *
- *     - fdt: u64
+ *     - preserved-data: u64
  *
- *       Physical address pointing to a subnode FDT blob that is also
+ *       Physical address pointing to a subnode data blob that is also
  *       being preserved.
  */
 
 /* The compatible string for the KHO FDT root node. */
-#define KHO_FDT_COMPATIBLE "kho-v1"
+#define KHO_FDT_COMPATIBLE "kho-v2"
 
 /* The FDT property for the preserved memory map. */
 #define KHO_FDT_MEMORY_MAP_PROP_NAME "preserved-memory-map"
 
-/* The FDT property for sub-FDTs. */
-#define KHO_FDT_SUB_TREE_PROP_NAME "fdt"
+/* The FDT property for preserved data blobs. */
+#define KHO_FDT_SUB_TREE_PROP_NAME "preserved-data"
 
 /**
  * DOC: Kexec Handover ABI for vmalloc Preservation
@@ -160,4 +165,113 @@ struct kho_vmalloc {
 	unsigned short order;
 };
 
+/**
+ * DOC: KHO persistent memory tracker
+ *
+ * KHO tracks preserved memory using a radix tree data structure. Each node of
+ * the tree is exactly a single page. The leaf nodes are bitmaps where each set
+ * bit is a preserved page of any order. The intermediate nodes are tables of
+ * physical addresses that point to a lower level node.
+ *
+ * The tree hierarchy is shown below::
+ *
+ *   root
+ *   +-------------------+
+ *   |     Level 5       | (struct kho_radix_node)
+ *   +-------------------+
+ *     |
+ *     v
+ *   +-------------------+
+ *   |     Level 4       | (struct kho_radix_node)
+ *   +-------------------+
+ *     |
+ *     | ... (intermediate levels)
+ *     |
+ *     v
+ *   +-------------------+
+ *   |      Level 0      | (struct kho_radix_leaf)
+ *   +-------------------+
+ *
+ * The tree is traversed using a key that encodes the page's physical address
+ * (pa) and its order into a single unsigned long value. The encoded key value
+ * is composed of two parts: the 'order bit' in the upper part and the
+ * 'shifted physical address' in the lower part.::
+ *
+ *   +------------+-----------------------------+--------------------------+
+ *   | Page Order | Order Bit                   | Shifted Physical Address |
+ *   +------------+-----------------------------+--------------------------+
+ *   | 0          | ...000100 ... (at bit 52)   | pa >> (PAGE_SHIFT + 0)   |
+ *   | 1          | ...000010 ... (at bit 51)   | pa >> (PAGE_SHIFT + 1)   |
+ *   | 2          | ...000001 ... (at bit 50)   | pa >> (PAGE_SHIFT + 2)   |
+ *   | ...        | ...                         | ...                      |
+ *   +------------+-----------------------------+--------------------------+
+ *
+ * Shifted Physical Address:
+ * The 'shifted physical address' is the physical address normalized for its
+ * order. It effectively represents the PFN shifted right by the order.
+ *
+ * Order Bit:
+ * The 'order bit' encodes the page order by setting a single bit at a
+ * specific position. The position of this bit itself represents the order.
+ *
+ * For instance, on a 64-bit system with 4KB pages (PAGE_SHIFT = 12), the
+ * maximum range for the shifted physical address (for order 0) is 52 bits
+ * (64 - 12). This address occupies bits [0-51]. For order 0, the order bit is
+ * set at position 52.
+ *
+ * The following diagram illustrates how the encoded key value is split into
+ * indices for the tree levels, with PAGE_SIZE of 4KB::
+ *
+ *        63:60   59:51    50:42    41:33    32:24    23:15         14:0
+ *   +---------+--------+--------+--------+--------+--------+-----------------+
+ *   |    0    |  Lv 5  |  Lv 4  |  Lv 3  |  Lv 2  |  Lv 1  |  Lv 0 (bitmap)  |
+ *   +---------+--------+--------+--------+--------+--------+-----------------+
+ *
+ * The radix tree stores pages of all orders in a single 6-level hierarchy. It
+ * efficiently shares higher tree levels, especially due to common zero top
+ * address bits, allowing a single, efficient algorithm to manage all
+ * pages. This bitmap approach also offers memory efficiency; for example, a
+ * 512KB bitmap can cover a 16GB memory range for 0-order pages with PAGE_SIZE =
+ * 4KB.
+ *
+ * The data structures defined here are part of the KHO ABI. Any modification
+ * to these structures that breaks backward compatibility must be accompanied by
+ * an update to the "compatible" string. This ensures that a newer kernel can
+ * correctly interpret the data passed by an older kernel.
+ */
+
+/*
+ * Defines constants for the KHO radix tree structure, used to track preserved
+ * memory. These constants govern the indexing, sizing, and depth of the tree.
+ */
+enum kho_radix_consts {
+	/*
+	 * The bit position of the order bit (and also the length of the
+	 * shifted physical address) for an order-0 page.
+	 */
+	KHO_ORDER_0_LOG2 = 64 - PAGE_SHIFT,
+
+	/* Size of the table in kho_radix_node, in log2 */
+	KHO_TABLE_SIZE_LOG2 = const_ilog2(PAGE_SIZE / sizeof(phys_addr_t)),
+
+	/* Number of bits in the kho_radix_leaf bitmap, in log2 */
+	KHO_BITMAP_SIZE_LOG2 = PAGE_SHIFT + const_ilog2(BITS_PER_BYTE),
+
+	/*
+	 * The total tree depth is the number of intermediate levels
+	 * and 1 bitmap level.
+	 */
+	KHO_TREE_MAX_DEPTH =
+		DIV_ROUND_UP(KHO_ORDER_0_LOG2 - KHO_BITMAP_SIZE_LOG2,
+			     KHO_TABLE_SIZE_LOG2) + 1,
+};
+
+struct kho_radix_node {
+	u64 table[1 << KHO_TABLE_SIZE_LOG2];
+};
+
+struct kho_radix_leaf {
+	DECLARE_BITMAP(bitmap, 1 << KHO_BITMAP_SIZE_LOG2);
+};
+
 #endif	/* _LINUX_KHO_ABI_KEXEC_HANDOVER_H */
diff --git a/include/linux/kho_radix_tree.h b/include/linux/kho_radix_tree.h
new file mode 100644
index 000000000000..84e918b96e53
--- /dev/null
+++ b/include/linux/kho_radix_tree.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_KHO_RADIX_TREE_H
+#define _LINUX_KHO_RADIX_TREE_H
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/mutex_types.h>
+#include <linux/types.h>
+
+/**
+ * DOC: Kexec Handover Radix Tree
+ *
+ * This is a radix tree implementation for tracking physical memory pages
+ * across kexec transitions. It was developed for the KHO mechanism but is
+ * designed for broader use by any subsystem that needs to preserve pages.
+ *
+ * The radix tree is a multi-level tree where leaf nodes are bitmaps
+ * representing individual pages. To allow pages of different sizes (orders)
+ * to be stored efficiently in a single tree, it uses a unique key encoding
+ * scheme. Each key is an unsigned long that combines a page's physical
+ * address and its order.
+ *
+ * Client code is responsible for allocating the root node of the tree,
+ * initializing the mutex lock, and managing its lifecycle. It must use the
+ * tree data structures defined in the KHO ABI,
+ * `include/linux/kho/abi/kexec_handover.h`.
+ */
+
+struct kho_radix_node;
+
+struct kho_radix_tree {
+	struct kho_radix_node *root;
+	struct mutex lock; /* protects the tree's structure and root pointer */
+};
+
+typedef int (*kho_radix_tree_walk_callback_t)(phys_addr_t phys,
+					      unsigned int order);
+
+#ifdef CONFIG_KEXEC_HANDOVER
+
+int kho_radix_add_page(struct kho_radix_tree *tree, unsigned long pfn,
+		       unsigned int order);
+
+void kho_radix_del_page(struct kho_radix_tree *tree, unsigned long pfn,
+			unsigned int order);
+
+int kho_radix_walk_tree(struct kho_radix_tree *tree,
+			kho_radix_tree_walk_callback_t cb);
+
+#else  /* #ifdef CONFIG_KEXEC_HANDOVER */
+
+static inline int kho_radix_add_page(struct kho_radix_tree *tree, long pfn,
+				     unsigned int order)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void kho_radix_del_page(struct kho_radix_tree *tree,
+				      unsigned long pfn, unsigned int order) { }
+
+static inline int kho_radix_walk_tree(struct kho_radix_tree *tree,
+				      kho_radix_tree_walk_callback_t cb)
+{
+	return -EOPNOTSUPP;
+}
+
+#endif /* #ifdef CONFIG_KEXEC_HANDOVER */
+
+#endif	/* _LINUX_KHO_RADIX_TREE_H */
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 4356f277b462..ad877926f3f6 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2025 Microsoft Corporation, Mike Rapoport <rppt@kernel.org>
  * Copyright (C) 2025 Google LLC, Changyuan Lyu <changyuanl@google.com>
  * Copyright (C) 2025 Pasha Tatashin <pasha.tatashin@soleen.com>
+ * Copyright (C) 2026 Google LLC, Jason Miu <jasonmiu@google.com>
  */
 
 #define pr_fmt(fmt) "KHO: " fmt
@@ -15,6 +16,7 @@
 #include <linux/count_zeros.h>
 #include <linux/kexec.h>
 #include <linux/kexec_handover.h>
+#include <linux/kho_radix_tree.h>
 #include <linux/kho/abi/kexec_handover.h>
 #include <linux/libfdt.h>
 #include <linux/list.h>
@@ -64,156 +66,308 @@ static int __init kho_parse_enable(char *p)
 }
 early_param("kho", kho_parse_enable);
 
-/*
- * Keep track of memory that is to be preserved across KHO.
- *
- * The serializing side uses two levels of xarrays to manage chunks of per-order
- * PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order
- * of a 8TB system would fit inside a single 4096 byte bitmap. For order 0
- * allocations each bitmap will cover 128M of address space. Thus, for 16G of
- * memory at most 512K of bitmap memory will be needed for order 0.
- *
- * This approach is fully incremental, as the serialization progresses folios
- * can continue be aggregated to the tracker. The final step, immediately prior
- * to kexec would serialize the xarray information into a linked list for the
- * successor kernel to parse.
- */
-
-#define PRESERVE_BITS (PAGE_SIZE * 8)
-
-struct kho_mem_phys_bits {
-	DECLARE_BITMAP(preserve, PRESERVE_BITS);
-};
-
-static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE);
-
-struct kho_mem_phys {
-	/*
-	 * Points to kho_mem_phys_bits, a sparse bitmap array. Each bit is sized
-	 * to order.
-	 */
-	struct xarray phys_bits;
-};
-
-struct kho_mem_track {
-	/* Points to kho_mem_phys, each order gets its own bitmap tree */
-	struct xarray orders;
-};
-
-struct khoser_mem_chunk;
-
 struct kho_out {
 	void *fdt;
 	bool finalized;
 	struct mutex lock; /* protects KHO FDT finalization */
 
-	struct kho_mem_track track;
+	struct kho_radix_tree radix_tree;
 	struct kho_debugfs dbg;
 };
 
 static struct kho_out kho_out = {
 	.lock = __MUTEX_INITIALIZER(kho_out.lock),
-	.track = {
-		.orders = XARRAY_INIT(kho_out.track.orders, 0),
+	.radix_tree = {
+		.lock = __MUTEX_INITIALIZER(kho_out.radix_tree.lock),
 	},
 	.finalized = false,
 };
 
-static void *xa_load_or_alloc(struct xarray *xa, unsigned long index)
+/**
+ * kho_radix_encode_key - Encodes a physical address and order into a radix key.
+ * @phys: The physical address of the page.
+ * @order: The order of the page.
+ *
+ * This function combines a page's physical address and its order into a
+ * single unsigned long, which is used as a key for all radix tree
+ * operations.
+ *
+ * Return: The encoded unsigned long radix key.
+ */
+static unsigned long kho_radix_encode_key(phys_addr_t phys, unsigned int order)
 {
-	void *res = xa_load(xa, index);
+	/* Order bits part */
+	unsigned long h = 1UL << (KHO_ORDER_0_LOG2 - order);
+	/* Shifted physical address part */
+	unsigned long l = phys >> (PAGE_SHIFT + order);
 
-	if (res)
-		return res;
+	return h | l;
+}
 
-	void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL);
+/**
+ * kho_radix_decode_key - Decodes a radix key back into a physical address and order.
+ * @key: The unsigned long key to decode.
+ * @order: An output parameter, a pointer to an unsigned int where the decoded
+ *         page order will be stored.
+ *
+ * This function reverses the encoding performed by kho_radix_encode_key(),
+ * extracting the original physical address and page order from a given key.
+ *
+ * Return: The decoded physical address.
+ */
+static phys_addr_t kho_radix_decode_key(unsigned long key, unsigned int *order)
+{
+	unsigned int order_bit = fls64(key);
+	phys_addr_t phys;
 
-	if (!elm)
-		return ERR_PTR(-ENOMEM);
+	/* order_bit is numbered starting at 1 from fls64 */
+	*order = KHO_ORDER_0_LOG2 - order_bit + 1;
+	/* The order is discarded by the shift */
+	phys = key << (PAGE_SHIFT + *order);
 
-	if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE)))
-		return ERR_PTR(-EINVAL);
+	return phys;
+}
 
-	res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
-	if (xa_is_err(res))
-		return ERR_PTR(xa_err(res));
-	else if (res)
-		return res;
+static unsigned long kho_radix_get_bitmap_index(unsigned long key)
+{
+	return key % (1 << KHO_BITMAP_SIZE_LOG2);
+}
+
+static unsigned long kho_radix_get_table_index(unsigned long key,
+					       unsigned int level)
+{
+	int s;
 
-	return no_free_ptr(elm);
+	s = ((level - 1) * KHO_TABLE_SIZE_LOG2) + KHO_BITMAP_SIZE_LOG2;
+	return (key >> s) % (1 << KHO_TABLE_SIZE_LOG2);
 }
 
-static void __kho_unpreserve_order(struct kho_mem_track *track, unsigned long pfn,
-				   unsigned int order)
+/**
+ * kho_radix_add_page - Marks a page as preserved in the radix tree.
+ * @tree: The KHO radix tree.
+ * @pfn: The page frame number of the page to preserve.
+ * @order: The order of the page.
+ *
+ * This function traverses the radix tree based on the key derived from @pfn
+ * and @order. It sets the corresponding bit in the leaf bitmap to mark the
+ * page for preservation. If intermediate nodes do not exist along the path,
+ * they are allocated and added to the tree.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+int kho_radix_add_page(struct kho_radix_tree *tree,
+		       unsigned long pfn, unsigned int order)
 {
-	struct kho_mem_phys_bits *bits;
-	struct kho_mem_phys *physxa;
-	const unsigned long pfn_high = pfn >> order;
+	/* Newly allocated nodes for error cleanup */
+	struct kho_radix_node *intermediate_nodes[KHO_TREE_MAX_DEPTH] = { 0 };
+	unsigned long key = kho_radix_encode_key(PFN_PHYS(pfn), order);
+	struct kho_radix_node *anchor_node = NULL;
+	struct kho_radix_node *node = tree->root;
+	struct kho_radix_node *new_node;
+	unsigned int i, idx, anchor_idx;
+	struct kho_radix_leaf *leaf;
+	int err = 0;
 
-	physxa = xa_load(&track->orders, order);
-	if (WARN_ON_ONCE(!physxa))
-		return;
+	if (WARN_ON_ONCE(!tree->root))
+		return -EINVAL;
 
-	bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
-	if (WARN_ON_ONCE(!bits))
-		return;
+	might_sleep();
+
+	guard(mutex)(&tree->lock);
+
+	/* Go from high levels to low levels */
+	for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) {
+		idx = kho_radix_get_table_index(key, i);
+
+		if (node->table[idx]) {
+			node = phys_to_virt(node->table[idx]);
+			continue;
+		}
+
+		/* Next node is empty, create a new node for it */
+		new_node = (struct kho_radix_node *)get_zeroed_page(GFP_KERNEL);
+		if (!new_node) {
+			err = -ENOMEM;
+			goto err_free_nodes;
+		}
+
+		node->table[idx] = virt_to_phys(new_node);
 
-	clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
+		/*
+		 * Capture the node where the new branch starts for cleanup
+		 * if allocation fails.
+		 */
+		if (!anchor_node) {
+			anchor_node = node;
+			anchor_idx = idx;
+		}
+		intermediate_nodes[i] = new_node;
+
+		node = new_node;
+	}
+
+	/* Handle the leaf level bitmap (level 0) */
+	idx = kho_radix_get_bitmap_index(key);
+	leaf = (struct kho_radix_leaf *)node;
+	__set_bit(idx, leaf->bitmap);
+
+	return 0;
+
+err_free_nodes:
+	for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) {
+		if (intermediate_nodes[i])
+			free_page((unsigned long)intermediate_nodes[i]);
+	}
+	if (anchor_node)
+		anchor_node->table[anchor_idx] = 0;
+
+	return err;
 }
+EXPORT_SYMBOL_GPL(kho_radix_add_page);
 
-static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
-			     unsigned long end_pfn)
+/**
+ * kho_radix_del_page - Removes a page's preservation status from the radix tree.
+ * @tree: The KHO radix tree.
+ * @pfn: The page frame number of the page to unpreserve.
+ * @order: The order of the page.
+ *
+ * This function traverses the radix tree and clears the bit corresponding to
+ * the page, effectively removing its "preserved" status. It does not free
+ * the tree's intermediate nodes, even if they become empty.
+ */
+void kho_radix_del_page(struct kho_radix_tree *tree, unsigned long pfn,
+			unsigned int order)
 {
-	unsigned int order;
+	unsigned long key = kho_radix_encode_key(PFN_PHYS(pfn), order);
+	struct kho_radix_node *node = tree->root;
+	struct kho_radix_leaf *leaf;
+	unsigned int i, idx;
 
-	while (pfn < end_pfn) {
-		order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
+	if (WARN_ON_ONCE(!tree->root))
+		return;
+
+	might_sleep();
 
-		__kho_unpreserve_order(track, pfn, order);
+	guard(mutex)(&tree->lock);
 
-		pfn += 1 << order;
+	/* Go from high levels to low levels */
+	for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) {
+		idx = kho_radix_get_table_index(key, i);
+
+		/*
+		 * Attempting to delete a page that has not been preserved,
+		 * return with a warning.
+		 */
+		if (WARN_ON(!node->table[idx]))
+			return;
+
+		node = phys_to_virt(node->table[idx]);
 	}
+
+	/* Handle the leaf level bitmap (level 0) */
+	leaf = (struct kho_radix_leaf *)node;
+	idx = kho_radix_get_bitmap_index(key);
+	__clear_bit(idx, leaf->bitmap);
 }
+EXPORT_SYMBOL_GPL(kho_radix_del_page);
 
-static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,
-				unsigned int order)
+static int kho_radix_walk_leaf(struct kho_radix_leaf *leaf,
+			       unsigned long key,
+			       kho_radix_tree_walk_callback_t cb)
 {
-	struct kho_mem_phys_bits *bits;
-	struct kho_mem_phys *physxa, *new_physxa;
-	const unsigned long pfn_high = pfn >> order;
+	unsigned long *bitmap = (unsigned long *)leaf;
+	unsigned int order;
+	phys_addr_t phys;
+	unsigned int i;
+	int err;
 
-	might_sleep();
-	physxa = xa_load(&track->orders, order);
-	if (!physxa) {
-		int err;
+	for_each_set_bit(i, bitmap, PAGE_SIZE * BITS_PER_BYTE) {
+		phys = kho_radix_decode_key(key | i, &order);
+		err = cb(phys, order);
+		if (err)
+			return err;
+	}
 
-		new_physxa = kzalloc_obj(*physxa);
-		if (!new_physxa)
-			return -ENOMEM;
+	return 0;
+}
+
+static int __kho_radix_walk_tree(struct kho_radix_node *root,
+				 unsigned int level, unsigned long start,
+				 kho_radix_tree_walk_callback_t cb)
+{
+	struct kho_radix_node *node;
+	struct kho_radix_leaf *leaf;
+	unsigned long key, i;
+	unsigned int shift;
+	int err;
+
+	for (i = 0; i < PAGE_SIZE / sizeof(phys_addr_t); i++) {
+		if (!root->table[i])
+			continue;
 
-		xa_init(&new_physxa->phys_bits);
-		physxa = xa_cmpxchg(&track->orders, order, NULL, new_physxa,
-				    GFP_KERNEL);
+		shift = ((level - 1) * KHO_TABLE_SIZE_LOG2) +
+			KHO_BITMAP_SIZE_LOG2;
+		key = start | (i << shift);
 
-		err = xa_err(physxa);
-		if (err || physxa) {
-			xa_destroy(&new_physxa->phys_bits);
-			kfree(new_physxa);
+		node = phys_to_virt(root->table[i]);
 
-			if (err)
-				return err;
+		if (level == 1) {
+			/*
+			 * we are at level 1,
+			 * node is pointing to the level 0 bitmap.
+			 */
+			leaf = (struct kho_radix_leaf *)node;
+			err = kho_radix_walk_leaf(leaf, key, cb);
 		} else {
-			physxa = new_physxa;
+			err  = __kho_radix_walk_tree(node, level - 1,
+						     key, cb);
 		}
+
+		if (err)
+			return err;
 	}
 
-	bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
-	if (IS_ERR(bits))
-		return PTR_ERR(bits);
+	return 0;
+}
+
+/**
+ * kho_radix_walk_tree - Traverses the radix tree and calls a callback for each preserved page.
+ * @tree: A pointer to the KHO radix tree to walk.
+ * @cb: A callback function of type kho_radix_tree_walk_callback_t that will be
+ *      invoked for each preserved page found in the tree. The callback receives
+ *      the physical address and order of the preserved page.
+ *
+ * This function walks the radix tree, searching from the specified top level
+ * down to the lowest level (level 0). For each preserved page found, it invokes
+ * the provided callback, passing the page's physical address and order.
+ *
+ * Return: 0 if the walk completed the specified tree, or the non-zero return
+ *         value from the callback that stopped the walk.
+ */
+int kho_radix_walk_tree(struct kho_radix_tree *tree,
+			kho_radix_tree_walk_callback_t cb)
+{
+	if (WARN_ON_ONCE(!tree->root))
+		return -EINVAL;
 
-	set_bit(pfn_high % PRESERVE_BITS, bits->preserve);
+	guard(mutex)(&tree->lock);
 
-	return 0;
+	return __kho_radix_walk_tree(tree->root, KHO_TREE_MAX_DEPTH - 1, 0, cb);
+}
+EXPORT_SYMBOL_GPL(kho_radix_walk_tree);
+
+static void __kho_unpreserve(struct kho_radix_tree *tree,
+			     unsigned long pfn, unsigned long end_pfn)
+{
+	unsigned int order;
+
+	while (pfn < end_pfn) {
+		order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
+
+		kho_radix_del_page(tree, pfn, order);
+
+		pfn += 1 << order;
+	}
 }
 
 /* For physically contiguous 0-order pages. */
@@ -318,161 +472,24 @@ struct page *kho_restore_pages(phys_addr_t phys, unsigned long nr_pages)
 }
 EXPORT_SYMBOL_GPL(kho_restore_pages);
 
-/* Serialize and deserialize struct kho_mem_phys across kexec
- *
- * Record all the bitmaps in a linked list of pages for the next kernel to
- * process. Each chunk holds bitmaps of the same order and each block of bitmaps
- * starts at a given physical address. This allows the bitmaps to be sparse. The
- * xarray is used to store them in a tree while building up the data structure,
- * but the KHO successor kernel only needs to process them once in order.
- *
- * All of this memory is normal kmalloc() memory and is not marked for
- * preservation. The successor kernel will remain isolated to the scratch space
- * until it completes processing this list. Once processed all the memory
- * storing these ranges will be marked as free.
- */
-
-struct khoser_mem_bitmap_ptr {
-	phys_addr_t phys_start;
-	DECLARE_KHOSER_PTR(bitmap, struct kho_mem_phys_bits *);
-};
-
-struct khoser_mem_chunk_hdr {
-	DECLARE_KHOSER_PTR(next, struct khoser_mem_chunk *);
-	unsigned int order;
-	unsigned int num_elms;
-};
-
-#define KHOSER_BITMAP_SIZE                                   \
-	((PAGE_SIZE - sizeof(struct khoser_mem_chunk_hdr)) / \
-	 sizeof(struct khoser_mem_bitmap_ptr))
-
-struct khoser_mem_chunk {
-	struct khoser_mem_chunk_hdr hdr;
-	struct khoser_mem_bitmap_ptr bitmaps[KHOSER_BITMAP_SIZE];
-};
-
-static_assert(sizeof(struct khoser_mem_chunk) == PAGE_SIZE);
-
-static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
-					  unsigned long order)
-{
-	struct khoser_mem_chunk *chunk __free(free_page) = NULL;
-
-	chunk = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!chunk)
-		return ERR_PTR(-ENOMEM);
-
-	if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE)))
-		return ERR_PTR(-EINVAL);
-
-	chunk->hdr.order = order;
-	if (cur_chunk)
-		KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk);
-	return no_free_ptr(chunk);
-}
-
-static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
-{
-	struct khoser_mem_chunk *chunk = first_chunk;
-
-	while (chunk) {
-		struct khoser_mem_chunk *tmp = chunk;
-
-		chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
-		free_page((unsigned long)tmp);
-	}
-}
-
-/*
- *  Update memory map property, if old one is found discard it via
- *  kho_mem_ser_free().
- */
-static void kho_update_memory_map(struct khoser_mem_chunk *first_chunk)
-{
-	void *ptr;
-	u64 phys;
-
-	ptr = fdt_getprop_w(kho_out.fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, NULL);
-
-	/* Check and discard previous memory map */
-	phys = get_unaligned((u64 *)ptr);
-	if (phys)
-		kho_mem_ser_free((struct khoser_mem_chunk *)phys_to_virt(phys));
-
-	/* Update with the new value */
-	phys = first_chunk ? (u64)virt_to_phys(first_chunk) : 0;
-	put_unaligned(phys, (u64 *)ptr);
-}
-
-static int kho_mem_serialize(struct kho_out *kho_out)
+static int __init kho_preserved_memory_reserve(phys_addr_t phys,
+					       unsigned int order)
 {
-	struct khoser_mem_chunk *first_chunk = NULL;
-	struct khoser_mem_chunk *chunk = NULL;
-	struct kho_mem_phys *physxa;
-	unsigned long order;
-	int err = -ENOMEM;
-
-	xa_for_each(&kho_out->track.orders, order, physxa) {
-		struct kho_mem_phys_bits *bits;
-		unsigned long phys;
-
-		chunk = new_chunk(chunk, order);
-		if (IS_ERR(chunk)) {
-			err = PTR_ERR(chunk);
-			goto err_free;
-		}
-
-		if (!first_chunk)
-			first_chunk = chunk;
-
-		xa_for_each(&physxa->phys_bits, phys, bits) {
-			struct khoser_mem_bitmap_ptr *elm;
-
-			if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) {
-				chunk = new_chunk(chunk, order);
-				if (IS_ERR(chunk)) {
-					err = PTR_ERR(chunk);
-					goto err_free;
-				}
-			}
+	union kho_page_info info;
+	struct page *page;
+	u64 sz;
 
-			elm = &chunk->bitmaps[chunk->hdr.num_elms];
-			chunk->hdr.num_elms++;
-			elm->phys_start = (phys * PRESERVE_BITS)
-					  << (order + PAGE_SHIFT);
-			KHOSER_STORE_PTR(elm->bitmap, bits);
-		}
-	}
+	sz = 1 << (order + PAGE_SHIFT);
+	page = phys_to_page(phys);
 
-	kho_update_memory_map(first_chunk);
+	/* Reserve the memory preserved in KHO in memblock */
+	memblock_reserve(phys, sz);
+	memblock_reserved_mark_noinit(phys, sz);
+	info.magic = KHO_PAGE_MAGIC;
+	info.order = order;
+	page->private = info.page_private;
 
 	return 0;
-
-err_free:
-	kho_mem_ser_free(first_chunk);
-	return err;
-}
-
-static void __init deserialize_bitmap(unsigned int order,
-				      struct khoser_mem_bitmap_ptr *elm)
-{
-	struct kho_mem_phys_bits *bitmap = KHOSER_LOAD_PTR(elm->bitmap);
-	unsigned long bit;
-
-	for_each_set_bit(bit, bitmap->preserve, PRESERVE_BITS) {
-		int sz = 1 << (order + PAGE_SHIFT);
-		phys_addr_t phys =
-			elm->phys_start + (bit << (order + PAGE_SHIFT));
-		struct page *page = phys_to_page(phys);
-		union kho_page_info info;
-
-		memblock_reserve(phys, sz);
-		memblock_reserved_mark_noinit(phys, sz);
-		info.magic = KHO_PAGE_MAGIC;
-		info.order = order;
-		page->private = info.page_private;
-	}
 }
 
 /* Returns physical address of the preserved memory map from FDT */
@@ -483,25 +500,13 @@ static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
 
 	mem_ptr = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len);
 	if (!mem_ptr || len != sizeof(u64)) {
-		pr_err("failed to get preserved memory bitmaps\n");
+		pr_err("failed to get preserved memory map\n");
 		return 0;
 	}
 
 	return get_unaligned((const u64 *)mem_ptr);
 }
 
-static void __init kho_mem_deserialize(struct khoser_mem_chunk *chunk)
-{
-	while (chunk) {
-		unsigned int i;
-
-		for (i = 0; i != chunk->hdr.num_elms; i++)
-			deserialize_bitmap(chunk->hdr.order,
-					   &chunk->bitmaps[i]);
-		chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
-	}
-}
-
 /*
  * With KHO enabled, memory can become fragmented because KHO regions may
  * be anywhere in physical address space. The scratch regions give us a
@@ -812,14 +817,14 @@ EXPORT_SYMBOL_GPL(kho_remove_subtree);
  */
 int kho_preserve_folio(struct folio *folio)
 {
+	struct kho_radix_tree *tree = &kho_out.radix_tree;
 	const unsigned long pfn = folio_pfn(folio);
 	const unsigned int order = folio_order(folio);
-	struct kho_mem_track *track = &kho_out.track;
 
 	if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order)))
 		return -EINVAL;
 
-	return __kho_preserve_order(track, pfn, order);
+	return kho_radix_add_page(tree, pfn, order);
 }
 EXPORT_SYMBOL_GPL(kho_preserve_folio);
 
@@ -833,11 +838,11 @@ EXPORT_SYMBOL_GPL(kho_preserve_folio);
  */
 void kho_unpreserve_folio(struct folio *folio)
 {
+	struct kho_radix_tree *tree = &kho_out.radix_tree;
 	const unsigned long pfn = folio_pfn(folio);
 	const unsigned int order = folio_order(folio);
-	struct kho_mem_track *track = &kho_out.track;
 
-	__kho_unpreserve_order(track, pfn, order);
+	kho_radix_del_page(tree, pfn, order);
 }
 EXPORT_SYMBOL_GPL(kho_unpreserve_folio);
 
@@ -853,7 +858,7 @@ EXPORT_SYMBOL_GPL(kho_unpreserve_folio);
  */
 int kho_preserve_pages(struct page *page, unsigned long nr_pages)
 {
-	struct kho_mem_track *track = &kho_out.track;
+	struct kho_radix_tree *tree = &kho_out.radix_tree;
 	const unsigned long start_pfn = page_to_pfn(page);
 	const unsigned long end_pfn = start_pfn + nr_pages;
 	unsigned long pfn = start_pfn;
@@ -869,7 +874,7 @@ int kho_preserve_pages(struct page *page, unsigned long nr_pages)
 		const unsigned int order =
 			min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
 
-		err = __kho_preserve_order(track, pfn, order);
+		err = kho_radix_add_page(tree, pfn, order);
 		if (err) {
 			failed_pfn = pfn;
 			break;
@@ -879,7 +884,7 @@ int kho_preserve_pages(struct page *page, unsigned long nr_pages)
 	}
 
 	if (err)
-		__kho_unpreserve(track, start_pfn, failed_pfn);
+		__kho_unpreserve(tree, start_pfn, failed_pfn);
 
 	return err;
 }
@@ -897,11 +902,11 @@ EXPORT_SYMBOL_GPL(kho_preserve_pages);
  */
 void kho_unpreserve_pages(struct page *page, unsigned long nr_pages)
 {
-	struct kho_mem_track *track = &kho_out.track;
+	struct kho_radix_tree *tree = &kho_out.radix_tree;
 	const unsigned long start_pfn = page_to_pfn(page);
 	const unsigned long end_pfn = start_pfn + nr_pages;
 
-	__kho_unpreserve(track, start_pfn, end_pfn);
+	__kho_unpreserve(tree, start_pfn, end_pfn);
 }
 EXPORT_SYMBOL_GPL(kho_unpreserve_pages);
 
@@ -960,14 +965,14 @@ err_free:
 static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk,
 					 unsigned short order)
 {
-	struct kho_mem_track *track = &kho_out.track;
+	struct kho_radix_tree *tree = &kho_out.radix_tree;
 	unsigned long pfn = PHYS_PFN(virt_to_phys(chunk));
 
-	__kho_unpreserve(track, pfn, pfn + 1);
+	__kho_unpreserve(tree, pfn, pfn + 1);
 
 	for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) {
 		pfn = PHYS_PFN(chunk->phys[i]);
-		__kho_unpreserve(track, pfn, pfn + (1 << order));
+		__kho_unpreserve(tree, pfn, pfn + (1 << order));
 	}
 }
 
@@ -1238,16 +1243,10 @@ EXPORT_SYMBOL_GPL(kho_restore_free);
 
 int kho_finalize(void)
 {
-	int ret;
-
 	if (!kho_enable)
 		return -EOPNOTSUPP;
 
 	guard(mutex)(&kho_out.lock);
-	ret = kho_mem_serialize(&kho_out);
-	if (ret)
-		return ret;
-
 	kho_out.finalized = true;
 
 	return 0;
@@ -1262,7 +1261,6 @@ bool kho_finalized(void)
 struct kho_in {
 	phys_addr_t fdt_phys;
 	phys_addr_t scratch_phys;
-	phys_addr_t mem_map_phys;
 	struct kho_debugfs dbg;
 };
 
@@ -1330,18 +1328,46 @@ int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
 }
 EXPORT_SYMBOL_GPL(kho_retrieve_subtree);
 
+static int __init kho_mem_retrieve(const void *fdt)
+{
+	struct kho_radix_tree tree;
+	const phys_addr_t *mem;
+	int len;
+
+	/* Retrieve the KHO radix tree from passed-in FDT. */
+	mem = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len);
+
+	if (!mem || len != sizeof(*mem)) {
+		pr_err("failed to get preserved KHO memory tree\n");
+		return -ENOENT;
+	}
+
+	if (!*mem)
+		return -EINVAL;
+
+	tree.root = phys_to_virt(*mem);
+	mutex_init(&tree.lock);
+	return kho_radix_walk_tree(&tree, kho_preserved_memory_reserve);
+}
+
 static __init int kho_out_fdt_setup(void)
 {
+	struct kho_radix_tree *tree = &kho_out.radix_tree;
 	void *root = kho_out.fdt;
-	u64 empty_mem_map = 0;
+	u64 preserved_mem_tree_pa;
 	int err;
 
 	err = fdt_create(root, PAGE_SIZE);
 	err |= fdt_finish_reservemap(root);
 	err |= fdt_begin_node(root, "");
 	err |= fdt_property_string(root, "compatible", KHO_FDT_COMPATIBLE);
-	err |= fdt_property(root, KHO_FDT_MEMORY_MAP_PROP_NAME, &empty_mem_map,
-			    sizeof(empty_mem_map));
+
+	preserved_mem_tree_pa = virt_to_phys(tree->root);
+
+	err |= fdt_property(root, KHO_FDT_MEMORY_MAP_PROP_NAME,
+			    &preserved_mem_tree_pa,
+			    sizeof(preserved_mem_tree_pa));
+
 	err |= fdt_end_node(root);
 	err |= fdt_finish(root);
 
@@ -1350,16 +1376,23 @@ static __init int kho_out_fdt_setup(void)
 
 static __init int kho_init(void)
 {
+	struct kho_radix_tree *tree = &kho_out.radix_tree;
 	const void *fdt = kho_get_fdt();
 	int err = 0;
 
 	if (!kho_enable)
 		return 0;
 
+	tree->root = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!tree->root) {
+		err = -ENOMEM;
+		goto err_free_scratch;
+	}
+
 	kho_out.fdt = kho_alloc_preserve(PAGE_SIZE);
 	if (IS_ERR(kho_out.fdt)) {
 		err = PTR_ERR(kho_out.fdt);
-		goto err_free_scratch;
+		goto err_free_kho_radix_tree_root;
 	}
 
 	err = kho_debugfs_init();
@@ -1405,6 +1438,9 @@ static __init int kho_init(void)
 
 err_free_fdt:
 	kho_unpreserve_free(kho_out.fdt);
+err_free_kho_radix_tree_root:
+	kfree(tree->root);
+	tree->root = NULL;
 err_free_scratch:
 	kho_out.fdt = NULL;
 	for (int i = 0; i < kho_scratch_cnt; i++) {
@@ -1444,10 +1480,12 @@ static void __init kho_release_scratch(void)
 
 void __init kho_memory_init(void)
 {
-	if (kho_in.mem_map_phys) {
+	if (kho_in.scratch_phys) {
 		kho_scratch = phys_to_virt(kho_in.scratch_phys);
 		kho_release_scratch();
-		kho_mem_deserialize(phys_to_virt(kho_in.mem_map_phys));
+
+		if (kho_mem_retrieve(kho_get_fdt()))
+			kho_in.fdt_phys = 0;
 	} else {
 		kho_reserve_scratch();
 	}
@@ -1525,7 +1563,6 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 
 	kho_in.fdt_phys = fdt_phys;
 	kho_in.scratch_phys = scratch_phys;
-	kho_in.mem_map_phys = mem_map_phys;
 	kho_scratch_cnt = scratch_cnt;
 
 	populated = true;
diff --git a/kernel/liveupdate/kexec_handover_debugfs.c b/kernel/liveupdate/kexec_handover_debugfs.c
index 2f93939168ab..548033fd8a62 100644
--- a/kernel/liveupdate/kexec_handover_debugfs.c
+++ b/kernel/liveupdate/kexec_handover_debugfs.c
@@ -13,6 +13,7 @@
 #include <linux/io.h>
 #include <linux/libfdt.h>
 #include <linux/mm.h>
+#include <linux/kho/abi/kexec_handover.h>
 #include "kexec_handover_internal.h"
 
 static struct dentry *debugfs_root;
@@ -139,7 +140,7 @@ __init void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt)
 		const char *name = fdt_get_name(fdt, child, NULL);
 		const u64 *fdt_phys;
 
-		fdt_phys = fdt_getprop(fdt, child, "fdt", &len);
+		fdt_phys = fdt_getprop(fdt, child, KHO_FDT_SUB_TREE_PROP_NAME, &len);
 		if (!fdt_phys)
 			continue;
 		if (len != sizeof(*fdt_phys)) {
-- 
cgit v1.2.3


From b9ec0ed907062a67a7cca2d04e7652aec06a0c35 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 23 Feb 2026 11:01:06 -0500
Subject: mm: vmalloc: streamline vmalloc memory accounting

Use a vmstat counter instead of a custom, open-coded atomic. This has
the added benefit of making the data available per-node, and prepares
for cleaning up the memcg accounting as well.

Link: https://lkml.kernel.org/r/20260223160147.3792777-1-hannes@cmpxchg.org
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/proc/meminfo.c       |  3 ++-
 include/linux/mmzone.h  |  1 +
 include/linux/vmalloc.h |  3 ---
 mm/vmalloc.c            | 19 ++++++++++---------
 mm/vmstat.c             |  1 +
 5 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index a458f1e112fd..549793f44726 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -126,7 +126,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 	show_val_kb(m, "Committed_AS:   ", committed);
 	seq_printf(m, "VmallocTotal:   %8lu kB\n",
 		   (unsigned long)VMALLOC_TOTAL >> 10);
-	show_val_kb(m, "VmallocUsed:    ", vmalloc_nr_pages());
+	show_val_kb(m, "VmallocUsed:    ",
+		    global_node_page_state(NR_VMALLOC));
 	show_val_kb(m, "VmallocChunk:   ", 0ul);
 	show_val_kb(m, "Percpu:         ", pcpu_nr_pages());
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 546bca95ca40..db41b18a919d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -220,6 +220,7 @@ enum node_stat_item {
 	NR_KERNEL_MISC_RECLAIMABLE,	/* reclaimable non-slab kernel pages */
 	NR_FOLL_PIN_ACQUIRED,	/* via: pin_user_page(), gup flag: FOLL_PIN */
 	NR_FOLL_PIN_RELEASED,	/* pages returned via unpin_user_page() */
+	NR_VMALLOC,
 	NR_KERNEL_STACK_KB,	/* measured in KiB */
 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
 	NR_KERNEL_SCS_KB,	/* measured in KiB */
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index e8e94f90d686..3b02c0c6b371 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -286,8 +286,6 @@ int unregister_vmap_purge_notifier(struct notifier_block *nb);
 #ifdef CONFIG_MMU
 #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
 
-unsigned long vmalloc_nr_pages(void);
-
 int vm_area_map_pages(struct vm_struct *area, unsigned long start,
 		      unsigned long end, struct page **pages);
 void vm_area_unmap_pages(struct vm_struct *area, unsigned long start,
@@ -304,7 +302,6 @@ static inline void set_vm_flush_reset_perms(void *addr)
 #else  /* !CONFIG_MMU */
 #define VMALLOC_TOTAL 0UL
 
-static inline unsigned long vmalloc_nr_pages(void) { return 0; }
 static inline void set_vm_flush_reset_perms(void *addr) {}
 #endif /* CONFIG_MMU */
 
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 61caa55a4402..e9d7c2a8c753 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1068,14 +1068,8 @@ static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
 static void drain_vmap_area_work(struct work_struct *work);
 static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
 
-static __cacheline_aligned_in_smp atomic_long_t nr_vmalloc_pages;
 static __cacheline_aligned_in_smp atomic_long_t vmap_lazy_nr;
 
-unsigned long vmalloc_nr_pages(void)
-{
-	return atomic_long_read(&nr_vmalloc_pages);
-}
-
 static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
 {
 	struct rb_node *n = root->rb_node;
@@ -3476,11 +3470,11 @@ void vfree(const void *addr)
 		 * High-order allocs for huge vmallocs are split, so
 		 * can be freed as an array of order-0 allocations
 		 */
+		if (!(vm->flags & VM_MAP_PUT_PAGES))
+			dec_node_page_state(page, NR_VMALLOC);
 		__free_page(page);
 		cond_resched();
 	}
-	if (!(vm->flags & VM_MAP_PUT_PAGES))
-		atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages);
 	kvfree(vm->pages);
 	kfree(vm);
 }
@@ -3668,6 +3662,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 			continue;
 		}
 
+		mod_node_page_state(page_pgdat(page), NR_VMALLOC, 1 << large_order);
+
 		split_page(page, large_order);
 		for (i = 0; i < (1U << large_order); i++)
 			pages[nr_allocated + i] = page + i;
@@ -3688,6 +3684,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 	if (!order) {
 		while (nr_allocated < nr_pages) {
 			unsigned int nr, nr_pages_request;
+			int i;
 
 			/*
 			 * A maximum allowed request is hard-coded and is 100
@@ -3711,6 +3708,9 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 							nr_pages_request,
 							pages + nr_allocated);
 
+			for (i = nr_allocated; i < nr_allocated + nr; i++)
+				inc_node_page_state(pages[i], NR_VMALLOC);
+
 			nr_allocated += nr;
 
 			/*
@@ -3735,6 +3735,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 		if (unlikely(!page))
 			break;
 
+		mod_node_page_state(page_pgdat(page), NR_VMALLOC, 1 << order);
+
 		/*
 		 * High-order allocations must be able to be treated as
 		 * independent small pages by callers (as they can with
@@ -3877,7 +3879,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 			vmalloc_gfp_adjust(gfp_mask, page_order), node,
 			page_order, nr_small_pages, area->pages);
 
-	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
 	/* All pages of vm should be charged to same memcg, so use first one. */
 	if (gfp_mask & __GFP_ACCOUNT && area->nr_pages)
 		mod_memcg_page_state(area->pages[0], MEMCG_VMALLOC,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 667474773dbc..2370c6fb1fcd 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1255,6 +1255,7 @@ const char * const vmstat_text[] = {
 	[I(NR_KERNEL_MISC_RECLAIMABLE)]		= "nr_kernel_misc_reclaimable",
 	[I(NR_FOLL_PIN_ACQUIRED)]		= "nr_foll_pin_acquired",
 	[I(NR_FOLL_PIN_RELEASED)]		= "nr_foll_pin_released",
+	[I(NR_VMALLOC)]				= "nr_vmalloc",
 	[I(NR_KERNEL_STACK_KB)]			= "nr_kernel_stack",
 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
 	[I(NR_KERNEL_SCS_KB)]			= "nr_shadow_call_stack",
-- 
cgit v1.2.3


From c466412c73c339e33e83b68770e5b556457c03de Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 23 Feb 2026 11:01:07 -0500
Subject: mm: memcontrol: switch to native NR_VMALLOC vmstat counter

Eliminates the custom memcg counter and results in a single, consolidated
accounting call in vmalloc code.

Link: https://lkml.kernel.org/r/20260223160147.3792777-2-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h |  1 -
 mm/memcontrol.c            |  4 ++--
 mm/vmalloc.c               | 16 ++++------------
 3 files changed, 6 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5695776f32c8..5173a9f16721 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -35,7 +35,6 @@ enum memcg_stat_item {
 	MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
 	MEMCG_SOCK,
 	MEMCG_PERCPU_B,
-	MEMCG_VMALLOC,
 	MEMCG_KMEM,
 	MEMCG_ZSWAP_B,
 	MEMCG_ZSWAPPED,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 75df24ffdf25..eb54cdf99624 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -317,6 +317,7 @@ static const unsigned int memcg_node_stat_items[] = {
 	NR_SHMEM_THPS,
 	NR_FILE_THPS,
 	NR_ANON_THPS,
+	NR_VMALLOC,
 	NR_KERNEL_STACK_KB,
 	NR_PAGETABLE,
 	NR_SECONDARY_PAGETABLE,
@@ -352,7 +353,6 @@ static const unsigned int memcg_stat_items[] = {
 	MEMCG_SWAP,
 	MEMCG_SOCK,
 	MEMCG_PERCPU_B,
-	MEMCG_VMALLOC,
 	MEMCG_KMEM,
 	MEMCG_ZSWAP_B,
 	MEMCG_ZSWAPPED,
@@ -1364,7 +1364,7 @@ static const struct memory_stat memory_stats[] = {
 	{ "sec_pagetables",		NR_SECONDARY_PAGETABLE		},
 	{ "percpu",			MEMCG_PERCPU_B			},
 	{ "sock",			MEMCG_SOCK			},
-	{ "vmalloc",			MEMCG_VMALLOC			},
+	{ "vmalloc",			NR_VMALLOC			},
 	{ "shmem",			NR_SHMEM			},
 #ifdef CONFIG_ZSWAP
 	{ "zswap",			MEMCG_ZSWAP_B			},
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e9d7c2a8c753..6dda97c3799e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3459,9 +3459,6 @@ void vfree(const void *addr)
 
 	if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS))
 		vm_reset_perms(vm);
-	/* All pages of vm should be charged to same memcg, so use first one. */
-	if (vm->nr_pages && !(vm->flags & VM_MAP_PUT_PAGES))
-		mod_memcg_page_state(vm->pages[0], MEMCG_VMALLOC, -vm->nr_pages);
 	for (i = 0; i < vm->nr_pages; i++) {
 		struct page *page = vm->pages[i];
 
@@ -3471,7 +3468,7 @@ void vfree(const void *addr)
 		 * can be freed as an array of order-0 allocations
 		 */
 		if (!(vm->flags & VM_MAP_PUT_PAGES))
-			dec_node_page_state(page, NR_VMALLOC);
+			mod_lruvec_page_state(page, NR_VMALLOC, -1);
 		__free_page(page);
 		cond_resched();
 	}
@@ -3662,7 +3659,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 			continue;
 		}
 
-		mod_node_page_state(page_pgdat(page), NR_VMALLOC, 1 << large_order);
+		mod_lruvec_page_state(page, NR_VMALLOC, 1 << large_order);
 
 		split_page(page, large_order);
 		for (i = 0; i < (1U << large_order); i++)
@@ -3709,7 +3706,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 							pages + nr_allocated);
 
 			for (i = nr_allocated; i < nr_allocated + nr; i++)
-				inc_node_page_state(pages[i], NR_VMALLOC);
+				mod_lruvec_page_state(pages[i], NR_VMALLOC, 1);
 
 			nr_allocated += nr;
 
@@ -3735,7 +3732,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 		if (unlikely(!page))
 			break;
 
-		mod_node_page_state(page_pgdat(page), NR_VMALLOC, 1 << order);
+		mod_lruvec_page_state(page, NR_VMALLOC, 1 << order);
 
 		/*
 		 * High-order allocations must be able to be treated as
@@ -3879,11 +3876,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 			vmalloc_gfp_adjust(gfp_mask, page_order), node,
 			page_order, nr_small_pages, area->pages);
 
-	/* All pages of vm should be charged to same memcg, so use first one. */
-	if (gfp_mask & __GFP_ACCOUNT && area->nr_pages)
-		mod_memcg_page_state(area->pages[0], MEMCG_VMALLOC,
-				     area->nr_pages);
-
 	/*
 	 * If not enough pages were obtained to accomplish an
 	 * allocation request, free them via vfree() if any.
-- 
cgit v1.2.3


From 2b8acf8450f577d3785dacfd616630b76dc8f88d Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 25 Feb 2026 16:13:58 +0000
Subject: mm: introduce vm_mmap_shadow_stack() as a helper for VM_SHADOW_STACK
 mappings

Patch series "mm: arch/shstk: Common shadow stack mapping helper and
VM_NOHUGEPAGE", v2.

A series to extract the common shadow stack mmap into a separate helper
for arm64, riscv and x86.


This patch (of 5):

arm64, riscv and x86 use a similar pattern for mapping the user shadow
stack (cloned from x86).  Extract this into a helper to facilitate code
reuse.

The call to do_mmap() from the new helper uses PROT_READ|PROT_WRITE prot
bits instead of the PROT_READ with an explicit VM_WRITE vm_flag.  The x86
intent was to avoid PROT_WRITE implying normal write since the shadow
stack is not writable by normal stores.  However, from a kernel
perspective, the vma is writeable.  Functionally there is no difference.

Link: https://lkml.kernel.org/r/20260225161404.3157851-1-catalin.marinas@arm.com
Link: https://lkml.kernel.org/r/20260225161404.3157851-2-catalin.marinas@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Deepak Gupta <debug@rivosinc.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: "Edgecombe, Rick P" <rick.p.edgecombe@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Paul Walmsley <pjw@kernel.org>
Cc: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h |  2 ++
 mm/util.c          | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index abb4963c1f06..bb0cfe38ca19 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3903,6 +3903,8 @@ extern int vm_munmap(unsigned long, size_t);
 extern unsigned long __must_check vm_mmap(struct file *, unsigned long,
         unsigned long, unsigned long,
         unsigned long, unsigned long);
+extern unsigned long __must_check vm_mmap_shadow_stack(unsigned long addr,
+		unsigned long len, unsigned long flags);
 
 struct vm_unmapped_area_info {
 #define VM_UNMAPPED_AREA_TOPDOWN 1
diff --git a/mm/util.c b/mm/util.c
index b05ab6f97e11..51f7f417e91f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -618,6 +618,31 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
 }
 EXPORT_SYMBOL(vm_mmap);
 
+#ifdef CONFIG_ARCH_HAS_USER_SHADOW_STACK
+/*
+ * Perform a userland memory mapping for a shadow stack into the current
+ * process address space. This is intended to be used by architectures that
+ * support user shadow stacks.
+ */
+unsigned long vm_mmap_shadow_stack(unsigned long addr, unsigned long len,
+		unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long ret, unused;
+
+	flags |= MAP_ANONYMOUS | MAP_PRIVATE;
+	if (addr)
+		flags |= MAP_FIXED_NOREPLACE;
+
+	mmap_write_lock(mm);
+	ret = do_mmap(NULL, addr, len, PROT_READ | PROT_WRITE, flags,
+		      VM_SHADOW_STACK, 0, &unused, NULL);
+	mmap_write_unlock(mm);
+
+	return ret;
+}
+#endif /* CONFIG_ARCH_HAS_USER_SHADOW_STACK */
+
 /**
  * __vmalloc_array - allocate memory for a virtually contiguous array.
  * @n: number of elements.
-- 
cgit v1.2.3


From cbf56f9981014ee48ae9b9e2254f31d1642b8f8f Mon Sep 17 00:00:00 2001
From: Tal Zussman <tz2294@columbia.edu>
Date: Wed, 25 Feb 2026 18:44:25 -0500
Subject: mm: remove stray references to struct pagevec

Patch series "mm: Remove stray references to pagevec", v2.

struct pagevec was removed in commit 1e0877d58b1e ("mm: remove struct
pagevec").  Remove any stray references to it and rename relevant files
and macros accordingly.

While at it, remove unnecessary #includes of pagevec.h (now folio_batch.h)
in .c files.  There are probably more of these that could be removed in .h
files, but those are more complex to verify.


This patch (of 4):

struct pagevec was removed in commit 1e0877d58b1e ("mm: remove struct
pagevec").  Remove remaining forward declarations and change
__folio_batch_release()'s declaration to match its definition.

Link: https://lkml.kernel.org/r/20260225-pagevec_cleanup-v2-0-716868cc2d11@columbia.edu
Link: https://lkml.kernel.org/r/20260225-pagevec_cleanup-v2-1-716868cc2d11@columbia.edu
Signed-off-by: Tal Zussman <tz2294@columbia.edu>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Acked-by: Chris Li <chrisl@kernel.org>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/afs/internal.h       | 1 -
 fs/f2fs/f2fs.h          | 2 --
 include/linux/pagevec.h | 2 +-
 include/linux/swap.h    | 2 --
 4 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 009064b8d661..599353c33337 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -31,7 +31,6 @@
 
 #define AFS_CELL_MAX_ADDRS 15
 
-struct pagevec;
 struct afs_call;
 struct afs_vnode;
 struct afs_server_probe;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index bb34e864d0ef..d9e8531a5301 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -28,8 +28,6 @@
 #include <linux/fscrypt.h>
 #include <linux/fsverity.h>
 
-struct pagevec;
-
 #ifdef CONFIG_F2FS_CHECK_FS
 #define f2fs_bug_on(sbi, condition)	BUG_ON(condition)
 #else
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 63be5a451627..007affabf335 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -93,7 +93,7 @@ static inline struct folio *folio_batch_next(struct folio_batch *fbatch)
 	return fbatch->folios[fbatch->i++];
 }
 
-void __folio_batch_release(struct folio_batch *pvec);
+void __folio_batch_release(struct folio_batch *fbatch);
 
 static inline void folio_batch_release(struct folio_batch *fbatch)
 {
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0effe3cc50f5..4b1f13b5bbad 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -20,8 +20,6 @@ struct notifier_block;
 
 struct bio;
 
-struct pagevec;
-
 #define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
 #define SWAP_FLAG_PRIO_MASK	0x7fff
 #define SWAP_FLAG_DISCARD	0x10000 /* enable discard for swap */
-- 
cgit v1.2.3


From 4e1d77a8f382a0ef4dd7732bb1986c8143600def Mon Sep 17 00:00:00 2001
From: Tal Zussman <tz2294@columbia.edu>
Date: Wed, 25 Feb 2026 18:44:27 -0500
Subject: folio_batch: rename pagevec.h to folio_batch.h

struct pagevec was removed in commit 1e0877d58b1e ("mm: remove struct
pagevec").  Rename include/linux/pagevec.h to reflect reality and update
includes tree-wide.  Add the new filename to MAINTAINERS explicitly, as it
no longer matches the "include/linux/page[-_]*" pattern in MEMORY
MANAGEMENT - CORE.

Link: https://lkml.kernel.org/r/20260225-pagevec_cleanup-v2-3-716868cc2d11@columbia.edu
Signed-off-by: Tal Zussman <tz2294@columbia.edu>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 MAINTAINERS                               |   1 +
 drivers/gpu/drm/drm_gem.c                 |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c |   2 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h       |   2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c     |   2 +-
 fs/btrfs/compression.c                    |   2 +-
 fs/btrfs/extent_io.c                      |   2 +-
 fs/btrfs/tests/extent-io-tests.c          |   2 +-
 fs/buffer.c                               |   2 +-
 fs/ceph/addr.c                            |   2 +-
 fs/ext4/inode.c                           |   2 +-
 fs/f2fs/checkpoint.c                      |   2 +-
 fs/f2fs/compress.c                        |   2 +-
 fs/f2fs/data.c                            |   2 +-
 fs/f2fs/node.c                            |   2 +-
 fs/gfs2/aops.c                            |   2 +-
 fs/hugetlbfs/inode.c                      |   2 +-
 fs/nilfs2/btree.c                         |   2 +-
 fs/nilfs2/page.c                          |   2 +-
 fs/nilfs2/segment.c                       |   2 +-
 fs/ramfs/file-nommu.c                     |   2 +-
 include/linux/folio_batch.h               | 105 ++++++++++++++++++++++++++++++
 include/linux/folio_queue.h               |   2 +-
 include/linux/iomap.h                     |   2 +-
 include/linux/pagevec.h                   | 105 ------------------------------
 include/linux/sunrpc/svc.h                |   2 +-
 include/linux/writeback.h                 |   2 +-
 mm/filemap.c                              |   2 +-
 mm/gup.c                                  |   2 +-
 mm/memcontrol.c                           |   2 +-
 mm/mlock.c                                |   2 +-
 mm/page-writeback.c                       |   2 +-
 mm/page_alloc.c                           |   2 +-
 mm/shmem.c                                |   2 +-
 mm/swap.c                                 |   2 +-
 mm/swap_state.c                           |   2 +-
 mm/truncate.c                             |   2 +-
 mm/vmscan.c                               |   2 +-
 38 files changed, 141 insertions(+), 140 deletions(-)
 create mode 100644 include/linux/folio_batch.h
 delete mode 100644 include/linux/pagevec.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 7049d85c586e..7a1b94a4aea2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16653,6 +16653,7 @@ L:	linux-mm@kvack.org
 S:	Maintained
 W:	http://www.linux-mm.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
+F:	include/linux/folio_batch.h
 F:	include/linux/gfp.h
 F:	include/linux/gfp_types.h
 F:	include/linux/highmem.h
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 891c3bff5ae0..dc4534fb175c 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -38,7 +38,7 @@
 #include <linux/mman.h>
 #include <linux/module.h>
 #include <linux/pagemap.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/sched/mm.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 720a9ad39aa2..06543ae60706 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -3,7 +3,7 @@
  * Copyright © 2014-2016 Intel Corporation
  */
 
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/shmem_fs.h>
 #include <linux/swap.h>
 #include <linux/uio.h>
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 9d3a3ad567a0..b54ee4f25af1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -19,7 +19,7 @@
 #include <linux/io-mapping.h>
 #include <linux/kref.h>
 #include <linux/mm.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/scatterlist.h>
 #include <linux/workqueue.h>
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a99b4e45d26c..ffe5f24594c9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -31,7 +31,7 @@
 #include <linux/debugfs.h>
 #include <linux/highmem.h>
 #include <linux/nmi.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/scatterlist.h>
 #include <linux/string_helpers.h>
 #include <linux/utsname.h>
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 85199944c1eb..de40b8934725 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -8,7 +8,7 @@
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/highmem.h>
 #include <linux/kthread.h>
 #include <linux/time.h>
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 5f97a3d2a8d7..89649ef5107a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -11,7 +11,7 @@
 #include <linux/blkdev.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/prefetch.h>
 #include <linux/fsverity.h>
 #include "extent_io.h"
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index a0187d6163df..b2aacf846c8b 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -4,7 +4,7 @@
  */
 
 #include <linux/pagemap.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/sizes.h>
diff --git a/fs/buffer.c b/fs/buffer.c
index 22b43642ba57..f3122160ee2d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -45,7 +45,7 @@
 #include <linux/bitops.h>
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/sched/mm.h>
 #include <trace/events/block.h>
 #include <linux/fscrypt.h>
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 2090fc78529c..bbeafbc777ee 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -7,7 +7,7 @@
 #include <linux/swap.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/signal.h>
 #include <linux/iversion.h>
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 396dc3a5d16b..58f982885187 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -29,7 +29,7 @@
 #include <linux/string.h>
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/mpage.h>
 #include <linux/rmap.h>
 #include <linux/namei.h>
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6dd39b7de11a..0143365c07dc 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -11,7 +11,7 @@
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
 #include <linux/f2fs_fs.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/swap.h>
 #include <linux/kthread.h>
 #include <linux/delayacct.h>
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 8c76400ba631..614e00b8ffdc 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -13,7 +13,7 @@
 #include <linux/lzo.h>
 #include <linux/lz4.h>
 #include <linux/zstd.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 
 #include "f2fs.h"
 #include "node.h"
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 338df7a2aea6..90e8ef625d82 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -10,7 +10,7 @@
 #include <linux/sched/mm.h>
 #include <linux/mpage.h>
 #include <linux/writeback.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 #include <linux/blk-crypto.h>
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 74992fd9c9b6..ba0272314528 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -10,7 +10,7 @@
 #include <linux/mpage.h>
 #include <linux/sched/mm.h>
 #include <linux/blkdev.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/swap.h>
 
 #include "f2fs.h"
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index e79ad087512a..dae3dc4ee6f7 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -10,7 +10,7 @@
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
 #include <linux/pagemap.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/mpage.h>
 #include <linux/fs.h>
 #include <linux/writeback.h>
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 22c799000edb..2ec3e4231252 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -25,7 +25,7 @@
 #include <linux/ctype.h>
 #include <linux/backing-dev.h>
 #include <linux/hugetlb.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/fs_parser.h>
 #include <linux/mman.h>
 #include <linux/slab.h>
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index dd0c8e560ef6..b400cfcdc803 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -10,7 +10,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/errno.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include "nilfs.h"
 #include "page.h"
 #include "btnode.h"
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 56c4da417b6a..a9d8aa65416f 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -14,7 +14,7 @@
 #include <linux/page-flags.h>
 #include <linux/list.h>
 #include <linux/highmem.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/gfp.h>
 #include "nilfs.h"
 #include "page.h"
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 098a3bd103e0..6d62de64a309 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -19,7 +19,7 @@
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/crc32.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/slab.h>
 #include <linux/sched/signal.h>
 
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 0f8e838ece07..2f79bcb89d2e 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -14,7 +14,7 @@
 #include <linux/string.h>
 #include <linux/backing-dev.h>
 #include <linux/ramfs.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/mman.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
diff --git a/include/linux/folio_batch.h b/include/linux/folio_batch.h
new file mode 100644
index 000000000000..a2f3d3043f7e
--- /dev/null
+++ b/include/linux/folio_batch.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/linux/folio_batch.h
+ *
+ * In many places it is efficient to batch an operation up against multiple
+ * folios.  A folio_batch is a container which is used for that.
+ */
+
+#ifndef _LINUX_FOLIO_BATCH_H
+#define _LINUX_FOLIO_BATCH_H
+
+#include <linux/types.h>
+
+/* 31 pointers + header align the folio_batch structure to a power of two */
+#define PAGEVEC_SIZE	31
+
+struct folio;
+
+/**
+ * struct folio_batch - A collection of folios.
+ *
+ * The folio_batch is used to amortise the cost of retrieving and
+ * operating on a set of folios.  The order of folios in the batch may be
+ * significant (eg delete_from_page_cache_batch()).  Some users of the
+ * folio_batch store "exceptional" entries in it which can be removed
+ * by calling folio_batch_remove_exceptionals().
+ */
+struct folio_batch {
+	unsigned char nr;
+	unsigned char i;
+	bool percpu_pvec_drained;
+	struct folio *folios[PAGEVEC_SIZE];
+};
+
+/**
+ * folio_batch_init() - Initialise a batch of folios
+ * @fbatch: The folio batch.
+ *
+ * A freshly initialised folio_batch contains zero folios.
+ */
+static inline void folio_batch_init(struct folio_batch *fbatch)
+{
+	fbatch->nr = 0;
+	fbatch->i = 0;
+	fbatch->percpu_pvec_drained = false;
+}
+
+static inline void folio_batch_reinit(struct folio_batch *fbatch)
+{
+	fbatch->nr = 0;
+	fbatch->i = 0;
+}
+
+static inline unsigned int folio_batch_count(const struct folio_batch *fbatch)
+{
+	return fbatch->nr;
+}
+
+static inline unsigned int folio_batch_space(const struct folio_batch *fbatch)
+{
+	return PAGEVEC_SIZE - fbatch->nr;
+}
+
+/**
+ * folio_batch_add() - Add a folio to a batch.
+ * @fbatch: The folio batch.
+ * @folio: The folio to add.
+ *
+ * The folio is added to the end of the batch.
+ * The batch must have previously been initialised using folio_batch_init().
+ *
+ * Return: The number of slots still available.
+ */
+static inline unsigned folio_batch_add(struct folio_batch *fbatch,
+		struct folio *folio)
+{
+	fbatch->folios[fbatch->nr++] = folio;
+	return folio_batch_space(fbatch);
+}
+
+/**
+ * folio_batch_next - Return the next folio to process.
+ * @fbatch: The folio batch being processed.
+ *
+ * Use this function to implement a queue of folios.
+ *
+ * Return: The next folio in the queue, or NULL if the queue is empty.
+ */
+static inline struct folio *folio_batch_next(struct folio_batch *fbatch)
+{
+	if (fbatch->i == fbatch->nr)
+		return NULL;
+	return fbatch->folios[fbatch->i++];
+}
+
+void __folio_batch_release(struct folio_batch *fbatch);
+
+static inline void folio_batch_release(struct folio_batch *fbatch)
+{
+	if (folio_batch_count(fbatch))
+		__folio_batch_release(fbatch);
+}
+
+void folio_batch_remove_exceptionals(struct folio_batch *fbatch);
+#endif /* _LINUX_FOLIO_BATCH_H */
diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h
index adab609c972e..0d3765fa9d1d 100644
--- a/include/linux/folio_queue.h
+++ b/include/linux/folio_queue.h
@@ -14,7 +14,7 @@
 #ifndef _LINUX_FOLIO_QUEUE_H
 #define _LINUX_FOLIO_QUEUE_H
 
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/mm.h>
 
 /*
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 99b7209dabd7..4551613cea2f 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -9,7 +9,7 @@
 #include <linux/types.h>
 #include <linux/mm_types.h>
 #include <linux/blkdev.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 
 struct address_space;
 struct fiemap_extent_info;
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
deleted file mode 100644
index 007affabf335..000000000000
--- a/include/linux/pagevec.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * include/linux/pagevec.h
- *
- * In many places it is efficient to batch an operation up against multiple
- * folios.  A folio_batch is a container which is used for that.
- */
-
-#ifndef _LINUX_PAGEVEC_H
-#define _LINUX_PAGEVEC_H
-
-#include <linux/types.h>
-
-/* 31 pointers + header align the folio_batch structure to a power of two */
-#define PAGEVEC_SIZE	31
-
-struct folio;
-
-/**
- * struct folio_batch - A collection of folios.
- *
- * The folio_batch is used to amortise the cost of retrieving and
- * operating on a set of folios.  The order of folios in the batch may be
- * significant (eg delete_from_page_cache_batch()).  Some users of the
- * folio_batch store "exceptional" entries in it which can be removed
- * by calling folio_batch_remove_exceptionals().
- */
-struct folio_batch {
-	unsigned char nr;
-	unsigned char i;
-	bool percpu_pvec_drained;
-	struct folio *folios[PAGEVEC_SIZE];
-};
-
-/**
- * folio_batch_init() - Initialise a batch of folios
- * @fbatch: The folio batch.
- *
- * A freshly initialised folio_batch contains zero folios.
- */
-static inline void folio_batch_init(struct folio_batch *fbatch)
-{
-	fbatch->nr = 0;
-	fbatch->i = 0;
-	fbatch->percpu_pvec_drained = false;
-}
-
-static inline void folio_batch_reinit(struct folio_batch *fbatch)
-{
-	fbatch->nr = 0;
-	fbatch->i = 0;
-}
-
-static inline unsigned int folio_batch_count(const struct folio_batch *fbatch)
-{
-	return fbatch->nr;
-}
-
-static inline unsigned int folio_batch_space(const struct folio_batch *fbatch)
-{
-	return PAGEVEC_SIZE - fbatch->nr;
-}
-
-/**
- * folio_batch_add() - Add a folio to a batch.
- * @fbatch: The folio batch.
- * @folio: The folio to add.
- *
- * The folio is added to the end of the batch.
- * The batch must have previously been initialised using folio_batch_init().
- *
- * Return: The number of slots still available.
- */
-static inline unsigned folio_batch_add(struct folio_batch *fbatch,
-		struct folio *folio)
-{
-	fbatch->folios[fbatch->nr++] = folio;
-	return folio_batch_space(fbatch);
-}
-
-/**
- * folio_batch_next - Return the next folio to process.
- * @fbatch: The folio batch being processed.
- *
- * Use this function to implement a queue of folios.
- *
- * Return: The next folio in the queue, or NULL if the queue is empty.
- */
-static inline struct folio *folio_batch_next(struct folio_batch *fbatch)
-{
-	if (fbatch->i == fbatch->nr)
-		return NULL;
-	return fbatch->folios[fbatch->i++];
-}
-
-void __folio_batch_release(struct folio_batch *fbatch);
-
-static inline void folio_batch_release(struct folio_batch *fbatch)
-{
-	if (folio_batch_count(fbatch))
-		__folio_batch_release(fbatch);
-}
-
-void folio_batch_remove_exceptionals(struct folio_batch *fbatch);
-#endif /* _LINUX_PAGEVEC_H */
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 4dc14c7a711b..a11acf5cd63b 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -20,7 +20,7 @@
 #include <linux/lwq.h>
 #include <linux/wait.h>
 #include <linux/mm.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/kthread.h>
 
 /*
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index e530112c4b3a..62552a2ce5b9 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -11,7 +11,7 @@
 #include <linux/flex_proportions.h>
 #include <linux/backing-dev-defs.h>
 #include <linux/blk_types.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 
 struct bio;
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 406cef06b684..7cc6607dc28f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -31,7 +31,7 @@
 #include <linux/hash.h>
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/security.h>
 #include <linux/cpuset.h>
 #include <linux/hugetlb.h>
diff --git a/mm/gup.c b/mm/gup.c
index 8e7dc2c6ee73..ad9ded39609c 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -18,7 +18,7 @@
 #include <linux/hugetlb.h>
 #include <linux/migrate.h>
 #include <linux/mm_inline.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/sched/mm.h>
 #include <linux/shmem_fs.h>
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index eb54cdf99624..87614cfc4a3e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -34,7 +34,7 @@
 #include <linux/shmem_fs.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/vm_event_item.h>
 #include <linux/smp.h>
 #include <linux/page-flags.h>
diff --git a/mm/mlock.c b/mm/mlock.c
index 2f699c3497a5..1a92d16f3684 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -13,7 +13,7 @@
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/pagemap.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/pagewalk.h>
 #include <linux/mempolicy.h>
 #include <linux/syscalls.h>
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 601a5e048d12..1009bb042ba4 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -33,7 +33,7 @@
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/timer.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/signal.h>
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d88c8c67ac0b..74b603872f34 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -31,7 +31,7 @@
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/memory_hotplug.h>
 #include <linux/nodemask.h>
 #include <linux/vmstat.h>
diff --git a/mm/shmem.c b/mm/shmem.c
index cfed6c3ff853..149fdb051170 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -61,7 +61,7 @@ static struct vfsmount *shm_mnt __ro_after_init;
 #include <linux/slab.h>
 #include <linux/backing-dev.h>
 #include <linux/writeback.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/percpu_counter.h>
 #include <linux/falloc.h>
 #include <linux/splice.h>
diff --git a/mm/swap.c b/mm/swap.c
index bb19ccbece46..2e517ede6561 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -20,7 +20,7 @@
 #include <linux/swap.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/mm_inline.h>
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 32d9d877bda8..a0c64db2b275 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -15,7 +15,7 @@
 #include <linux/leafops.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/backing-dev.h>
 #include <linux/blkdev.h>
 #include <linux/migrate.h>
diff --git a/mm/truncate.c b/mm/truncate.c
index 12467c1bd711..df0b7a7e6aff 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -17,7 +17,7 @@
 #include <linux/export.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/shmem_fs.h>
 #include <linux/rmap.h>
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2c954d370048..4ab461f8c65a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -44,7 +44,7 @@
 #include <linux/sysctl.h>
 #include <linux/memory-tiers.h>
 #include <linux/oom.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
 #include <linux/prefetch.h>
 #include <linux/printk.h>
 #include <linux/dax.h>
-- 
cgit v1.2.3


From 511f04aac469a3ae04f7f2588101020aebb19c90 Mon Sep 17 00:00:00 2001
From: Tal Zussman <tz2294@columbia.edu>
Date: Wed, 25 Feb 2026 18:44:28 -0500
Subject: folio_batch: rename PAGEVEC_SIZE to FOLIO_BATCH_SIZE

struct pagevec no longer exists.  Rename the macro appropriately.

Link: https://lkml.kernel.org/r/20260225-pagevec_cleanup-v2-4-716868cc2d11@columbia.edu
Signed-off-by: Tal Zussman <tz2294@columbia.edu>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/btrfs/extent_io.c        | 4 ++--
 include/linux/folio_batch.h | 6 +++---
 include/linux/folio_queue.h | 6 +++---
 mm/shmem.c                  | 4 ++--
 mm/swap.c                   | 2 +-
 mm/swap_state.c             | 2 +-
 mm/truncate.c               | 6 +++---
 7 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 89649ef5107a..070c8759b0b4 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2095,13 +2095,13 @@ static void buffer_tree_tag_for_writeback(struct btrfs_fs_info *fs_info,
 struct eb_batch {
 	unsigned int nr;
 	unsigned int cur;
-	struct extent_buffer *ebs[PAGEVEC_SIZE];
+	struct extent_buffer *ebs[FOLIO_BATCH_SIZE];
 };
 
 static inline bool eb_batch_add(struct eb_batch *batch, struct extent_buffer *eb)
 {
 	batch->ebs[batch->nr++] = eb;
-	return (batch->nr < PAGEVEC_SIZE);
+	return (batch->nr < FOLIO_BATCH_SIZE);
 }
 
 static inline void eb_batch_init(struct eb_batch *batch)
diff --git a/include/linux/folio_batch.h b/include/linux/folio_batch.h
index a2f3d3043f7e..b45946adc50b 100644
--- a/include/linux/folio_batch.h
+++ b/include/linux/folio_batch.h
@@ -12,7 +12,7 @@
 #include <linux/types.h>
 
 /* 31 pointers + header align the folio_batch structure to a power of two */
-#define PAGEVEC_SIZE	31
+#define FOLIO_BATCH_SIZE	31
 
 struct folio;
 
@@ -29,7 +29,7 @@ struct folio_batch {
 	unsigned char nr;
 	unsigned char i;
 	bool percpu_pvec_drained;
-	struct folio *folios[PAGEVEC_SIZE];
+	struct folio *folios[FOLIO_BATCH_SIZE];
 };
 
 /**
@@ -58,7 +58,7 @@ static inline unsigned int folio_batch_count(const struct folio_batch *fbatch)
 
 static inline unsigned int folio_batch_space(const struct folio_batch *fbatch)
 {
-	return PAGEVEC_SIZE - fbatch->nr;
+	return FOLIO_BATCH_SIZE - fbatch->nr;
 }
 
 /**
diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h
index 0d3765fa9d1d..f6d5f1f127c9 100644
--- a/include/linux/folio_queue.h
+++ b/include/linux/folio_queue.h
@@ -29,12 +29,12 @@
  */
 struct folio_queue {
 	struct folio_batch	vec;		/* Folios in the queue segment */
-	u8			orders[PAGEVEC_SIZE]; /* Order of each folio */
+	u8			orders[FOLIO_BATCH_SIZE]; /* Order of each folio */
 	struct folio_queue	*next;		/* Next queue segment or NULL */
 	struct folio_queue	*prev;		/* Previous queue segment of NULL */
 	unsigned long		marks;		/* 1-bit mark per folio */
 	unsigned long		marks2;		/* Second 1-bit mark per folio */
-#if PAGEVEC_SIZE > BITS_PER_LONG
+#if FOLIO_BATCH_SIZE > BITS_PER_LONG
 #error marks is not big enough
 #endif
 	unsigned int		rreq_id;
@@ -70,7 +70,7 @@ static inline void folioq_init(struct folio_queue *folioq, unsigned int rreq_id)
  */
 static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq)
 {
-	return PAGEVEC_SIZE;
+	return FOLIO_BATCH_SIZE;
 }
 
 /**
diff --git a/mm/shmem.c b/mm/shmem.c
index 149fdb051170..5e7dcf5bc5d3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1113,7 +1113,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, uoff_t lend,
 	pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	pgoff_t end = (lend + 1) >> PAGE_SHIFT;
 	struct folio_batch fbatch;
-	pgoff_t indices[PAGEVEC_SIZE];
+	pgoff_t indices[FOLIO_BATCH_SIZE];
 	struct folio *folio;
 	bool same_folio;
 	long nr_swaps_freed = 0;
@@ -1510,7 +1510,7 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type)
 	struct address_space *mapping = inode->i_mapping;
 	pgoff_t start = 0;
 	struct folio_batch fbatch;
-	pgoff_t indices[PAGEVEC_SIZE];
+	pgoff_t indices[FOLIO_BATCH_SIZE];
 	int ret = 0;
 
 	do {
diff --git a/mm/swap.c b/mm/swap.c
index 2e517ede6561..78b4aa811fc6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1018,7 +1018,7 @@ EXPORT_SYMBOL(folios_put_refs);
 void release_pages(release_pages_arg arg, int nr)
 {
 	struct folio_batch fbatch;
-	int refs[PAGEVEC_SIZE];
+	int refs[FOLIO_BATCH_SIZE];
 	struct encoded_page **encoded = arg.encoded_pages;
 	int i;
 
diff --git a/mm/swap_state.c b/mm/swap_state.c
index a0c64db2b275..6313b59d7eab 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -385,7 +385,7 @@ void free_folio_and_swap_cache(struct folio *folio)
 void free_pages_and_swap_cache(struct encoded_page **pages, int nr)
 {
 	struct folio_batch folios;
-	unsigned int refs[PAGEVEC_SIZE];
+	unsigned int refs[FOLIO_BATCH_SIZE];
 
 	folio_batch_init(&folios);
 	for (int i = 0; i < nr; i++) {
diff --git a/mm/truncate.c b/mm/truncate.c
index df0b7a7e6aff..2931d66c16d0 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -369,7 +369,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 	pgoff_t		start;		/* inclusive */
 	pgoff_t		end;		/* exclusive */
 	struct folio_batch fbatch;
-	pgoff_t		indices[PAGEVEC_SIZE];
+	pgoff_t		indices[FOLIO_BATCH_SIZE];
 	pgoff_t		index;
 	int		i;
 	struct folio	*folio;
@@ -534,7 +534,7 @@ EXPORT_SYMBOL(truncate_inode_pages_final);
 unsigned long mapping_try_invalidate(struct address_space *mapping,
 		pgoff_t start, pgoff_t end, unsigned long *nr_failed)
 {
-	pgoff_t indices[PAGEVEC_SIZE];
+	pgoff_t indices[FOLIO_BATCH_SIZE];
 	struct folio_batch fbatch;
 	pgoff_t index = start;
 	unsigned long ret;
@@ -672,7 +672,7 @@ failed:
 int invalidate_inode_pages2_range(struct address_space *mapping,
 				  pgoff_t start, pgoff_t end)
 {
-	pgoff_t indices[PAGEVEC_SIZE];
+	pgoff_t indices[FOLIO_BATCH_SIZE];
 	struct folio_batch fbatch;
 	pgoff_t index;
 	int i;
-- 
cgit v1.2.3


From a2c77ec320a99581e8272868ccfa53a7d7a7b168 Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas@kernel.org>
Date: Fri, 27 Feb 2026 19:42:39 +0000
Subject: mm: move MAX_FOLIO_ORDER definition to mmzone.h

Patch series "mm: Eliminate fake head pages from vmemmap optimization",
v7.

This series removes "fake head pages" from the HugeTLB vmemmap
optimization (HVO) by changing how tail pages encode their relationship to
the head page.

It simplifies compound_head() and page_ref_add_unless().  Both are in the
hot path.

Background
==========

HVO reduces memory overhead by freeing vmemmap pages for HugeTLB pages and
remapping the freed virtual addresses to a single physical page.
Previously, all tail page vmemmap entries were remapped to the first
vmemmap page (containing the head struct page), creating "fake heads" -
tail pages that appear to have PG_head set when accessed through the
deduplicated vmemmap.

This required special handling in compound_head() to detect and work
around fake heads, adding complexity and overhead to a very hot path.

New Approach
============

For architectures/configs where sizeof(struct page) is a power of 2 (the
common case), this series changes how position of the head page is encoded
in the tail pages.

Instead of storing a pointer to the head page, the ->compound_info
(renamed from ->compound_head) now stores a mask.

The mask can be applied to any tail page's virtual address to compute the
head page address.  Critically, all tail pages of the same order now have
identical compound_info values, regardless of which compound page they
belong to.

The key insight is that all tail pages of the same order now have
identical compound_info values, regardless of which compound page they
belong to.

In v7, these shared tail pages are allocated per-zone.  This ensures that
zone information (stored in page->flags) is correct even for shared tail
pages, removing the need for the special-casing in page_zonenum() proposed
in earlier versions.

To support per-zone shared pages for boot-allocated gigantic pages, the
vmemmap population is deferred until zones are initialized.  This
simplifies the logic significantly and allows the removal of
vmemmap_undo_hvo().

Benefits
========

1. Simplified compound_head(): No fake head detection needed, can be
   implemented in a branchless manner.

2. Simplified page_ref_add_unless(): RCU protection removed since there's
   no race with fake head remapping.

3. Cleaner architecture: The shared tail pages are truly read-only and
   contain valid tail page metadata.

If sizeof(struct page) is not power-of-2, there are no functional changes.
HVO is not supported in this configuration.

I had hoped to see performance improvement, but my testing thus far has
shown either no change or only a slight improvement within the noise.

Series Organization
===================

Patch 1: Move MAX_FOLIO_ORDER definition to mmzone.h.
Patches 2-4: Refactoring of field names and interfaces.
Patches 5-6: Architecture alignment for LoongArch and RISC-V.
Patch 7: Mask-based compound_head() implementation.
Patch 8: Add memmap alignment checks.
Patch 9: Branchless compound_head() optimization.
Patch 10: Defer vmemmap population for bootmem hugepages.
Patch 11: Refactor vmemmap_walk.
Patch 12: x86 vDSO build fix.
Patch 13: Eliminate fake heads with per-zone shared tail pages.
Patches 14-16: Cleanup of fake head infrastructure.
Patch 17: Documentation update.
Patch 18: Use compound_head() in page_slab().


This patch (of 17):

Move MAX_FOLIO_ORDER definition from mm.h to mmzone.h.

This is preparation for adding the vmemmap_tails array to struct zone,
which requires MAX_FOLIO_ORDER to be available in mmzone.h.

Link: https://lkml.kernel.org/r/20260227194302.274384-1-kas@kernel.org
Link: https://lkml.kernel.org/r/20260227194302.274384-2-kas@kernel.org
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Acked-by: David Hildenbrand (Red Hat) <david@kernel.org>
Acked-by: Zi Yan <ziy@nvidia.com>
Acked-by: Muchun Song <muchun.song@linux.dev>
Acked-by: Usama Arif <usamaarif642@gmail.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Baoquan He <bhe@redhat.com>
Cc: Christoph Lameter <cl@gentwo.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frank van der Linden <fvdl@google.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: WANG Xuerui <kernel@xen0n.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h     | 31 -------------------------------
 include/linux/mmzone.h | 31 +++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bb0cfe38ca19..4e999c21d89a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -27,7 +27,6 @@
 #include <linux/page-flags.h>
 #include <linux/page_ref.h>
 #include <linux/overflow.h>
-#include <linux/sizes.h>
 #include <linux/sched.h>
 #include <linux/pgtable.h>
 #include <linux/kasan.h>
@@ -2479,36 +2478,6 @@ static inline unsigned long folio_nr_pages(const struct folio *folio)
 	return folio_large_nr_pages(folio);
 }
 
-#if !defined(CONFIG_HAVE_GIGANTIC_FOLIOS)
-/*
- * We don't expect any folios that exceed buddy sizes (and consequently
- * memory sections).
- */
-#define MAX_FOLIO_ORDER		MAX_PAGE_ORDER
-#elif defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
-/*
- * Only pages within a single memory section are guaranteed to be
- * contiguous. By limiting folios to a single memory section, all folio
- * pages are guaranteed to be contiguous.
- */
-#define MAX_FOLIO_ORDER		PFN_SECTION_SHIFT
-#elif defined(CONFIG_HUGETLB_PAGE)
-/*
- * There is no real limit on the folio size. We limit them to the maximum we
- * currently expect (see CONFIG_HAVE_GIGANTIC_FOLIOS): with hugetlb, we expect
- * no folios larger than 16 GiB on 64bit and 1 GiB on 32bit.
- */
-#define MAX_FOLIO_ORDER		get_order(IS_ENABLED(CONFIG_64BIT) ? SZ_16G : SZ_1G)
-#else
-/*
- * Without hugetlb, gigantic folios that are bigger than a single PUD are
- * currently impossible.
- */
-#define MAX_FOLIO_ORDER		PUD_ORDER
-#endif
-
-#define MAX_FOLIO_NR_PAGES	(1UL << MAX_FOLIO_ORDER)
-
 /*
  * compound_nr() returns the number of pages in this potentially compound
  * page.  compound_nr() can be called on a tail page, and is defined to
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index db41b18a919d..4c481ec77da9 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -23,6 +23,7 @@
 #include <linux/page-flags.h>
 #include <linux/local_lock.h>
 #include <linux/zswap.h>
+#include <linux/sizes.h>
 #include <asm/page.h>
 
 /* Free memory management - zoned buddy allocator.  */
@@ -61,6 +62,36 @@
  */
 #define PAGE_ALLOC_COSTLY_ORDER 3
 
+#if !defined(CONFIG_HAVE_GIGANTIC_FOLIOS)
+/*
+ * We don't expect any folios that exceed buddy sizes (and consequently
+ * memory sections).
+ */
+#define MAX_FOLIO_ORDER		MAX_PAGE_ORDER
+#elif defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+/*
+ * Only pages within a single memory section are guaranteed to be
+ * contiguous. By limiting folios to a single memory section, all folio
+ * pages are guaranteed to be contiguous.
+ */
+#define MAX_FOLIO_ORDER		PFN_SECTION_SHIFT
+#elif defined(CONFIG_HUGETLB_PAGE)
+/*
+ * There is no real limit on the folio size. We limit them to the maximum we
+ * currently expect (see CONFIG_HAVE_GIGANTIC_FOLIOS): with hugetlb, we expect
+ * no folios larger than 16 GiB on 64bit and 1 GiB on 32bit.
+ */
+#define MAX_FOLIO_ORDER		get_order(IS_ENABLED(CONFIG_64BIT) ? SZ_16G : SZ_1G)
+#else
+/*
+ * Without hugetlb, gigantic folios that are bigger than a single PUD are
+ * currently impossible.
+ */
+#define MAX_FOLIO_ORDER		PUD_ORDER
+#endif
+
+#define MAX_FOLIO_NR_PAGES	(1UL << MAX_FOLIO_ORDER)
+
 enum migratetype {
 	MIGRATE_UNMOVABLE,
 	MIGRATE_MOVABLE,
-- 
cgit v1.2.3


From f0369fb13619569ba8564ce8d4fc9d385bbee8a2 Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas@kernel.org>
Date: Fri, 27 Feb 2026 19:42:40 +0000
Subject: mm: change the interface of prep_compound_tail()

Instead of passing down the head page and tail page index, pass the tail
and head pages directly, as well as the order of the compound page.

This is a preparation for changing how the head position is encoded in the
tail page.

Link: https://lkml.kernel.org/r/20260227194302.274384-3-kas@kernel.org
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: David Hildenbrand (arm) <david@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Baoquan He <bhe@redhat.com>
Cc: Christoph Lameter <cl@gentwo.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frank van der Linden <fvdl@google.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h |  5 +++--
 mm/hugetlb.c               |  8 +++++---
 mm/internal.h              | 11 +++++------
 mm/mm_init.c               |  2 +-
 mm/page_alloc.c            |  2 +-
 5 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 415e9f2ef616..7729a4a28b44 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -870,9 +870,10 @@ static inline bool folio_test_large(const struct folio *folio)
 	return folio_test_head(folio);
 }
 
-static __always_inline void set_compound_head(struct page *page, struct page *head)
+static __always_inline void set_compound_head(struct page *tail,
+		const struct page *head, unsigned int order)
 {
-	WRITE_ONCE(page->compound_head, (unsigned long)head + 1);
+	WRITE_ONCE(tail->compound_head, (unsigned long)head + 1);
 }
 
 static __always_inline void clear_compound_head(struct page *page)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 327eaa4074d3..1d41fa3dd43e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3168,6 +3168,7 @@ found:
 
 /* Initialize [start_page:end_page_number] tail struct pages of a hugepage */
 static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio,
+					struct hstate *h,
 					unsigned long start_page_number,
 					unsigned long end_page_number)
 {
@@ -3176,6 +3177,7 @@ static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio,
 	struct page *page = folio_page(folio, start_page_number);
 	unsigned long head_pfn = folio_pfn(folio);
 	unsigned long pfn, end_pfn = head_pfn + end_page_number;
+	unsigned int order = huge_page_order(h);
 
 	/*
 	 * As we marked all tail pages with memblock_reserved_mark_noinit(),
@@ -3183,7 +3185,7 @@ static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio,
 	 */
 	for (pfn = head_pfn + start_page_number; pfn < end_pfn; page++, pfn++) {
 		__init_single_page(page, pfn, zone, nid);
-		prep_compound_tail((struct page *)folio, pfn - head_pfn);
+		prep_compound_tail(page, &folio->page, order);
 		set_page_count(page, 0);
 	}
 }
@@ -3203,7 +3205,7 @@ static void __init hugetlb_folio_init_vmemmap(struct folio *folio,
 	__folio_set_head(folio);
 	ret = folio_ref_freeze(folio, 1);
 	VM_BUG_ON(!ret);
-	hugetlb_folio_init_tail_vmemmap(folio, 1, nr_pages);
+	hugetlb_folio_init_tail_vmemmap(folio, h, 1, nr_pages);
 	prep_compound_head(&folio->page, huge_page_order(h));
 }
 
@@ -3260,7 +3262,7 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h,
 			 * time as this is early in boot and there should
 			 * be no contention.
 			 */
-			hugetlb_folio_init_tail_vmemmap(folio,
+			hugetlb_folio_init_tail_vmemmap(folio, h,
 					HUGETLB_VMEMMAP_RESERVE_PAGES,
 					pages_per_huge_page(h));
 		}
diff --git a/mm/internal.h b/mm/internal.h
index 2daa6a744172..9cfbd8e41914 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -897,13 +897,12 @@ static inline void prep_compound_head(struct page *page, unsigned int order)
 		INIT_LIST_HEAD(&folio->_deferred_list);
 }
 
-static inline void prep_compound_tail(struct page *head, int tail_idx)
+static inline void prep_compound_tail(struct page *tail,
+		const struct page *head, unsigned int order)
 {
-	struct page *p = head + tail_idx;
-
-	p->mapping = TAIL_MAPPING;
-	set_compound_head(p, head);
-	set_page_private(p, 0);
+	tail->mapping = TAIL_MAPPING;
+	set_compound_head(tail, head, order);
+	set_page_private(tail, 0);
 }
 
 void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index f903747ca854..5b261f86ba6f 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1110,7 +1110,7 @@ static void __ref memmap_init_compound(struct page *head,
 		struct page *page = pfn_to_page(pfn);
 
 		__init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
-		prep_compound_tail(head, pfn - head_pfn);
+		prep_compound_tail(page, head, order);
 		set_page_count(page, 0);
 	}
 	prep_compound_head(head, order);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 74b603872f34..11f9a0525a3a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -744,7 +744,7 @@ void prep_compound_page(struct page *page, unsigned int order)
 
 	__SetPageHead(page);
 	for (i = 1; i < nr_pages; i++)
-		prep_compound_tail(page, i);
+		prep_compound_tail(page + i, page, order);
 
 	prep_compound_head(page, order);
 }
-- 
cgit v1.2.3


From d50569612c29215c5d1c64f47a65604ed265d2e9 Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas@kernel.org>
Date: Fri, 27 Feb 2026 19:42:41 +0000
Subject: mm: rename the 'compound_head' field in the 'struct page' to
 'compound_info'

The 'compound_head' field in the 'struct page' encodes whether the page is
a tail and where to locate the head page.  Bit 0 is set if the page is a
tail, and the remaining bits in the field point to the head page.

As preparation for changing how the field encodes information about the
head page, rename the field to 'compound_info'.

Link: https://lkml.kernel.org/r/20260227194302.274384-4-kas@kernel.org
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: David Hildenbrand (arm) <david@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Baoquan He <bhe@redhat.com>
Cc: Christoph Lameter <cl@gentwo.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frank van der Linden <fvdl@google.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/kdump/vmcoreinfo.rst |  2 +-
 Documentation/mm/vmemmap_dedup.rst             |  6 +++---
 include/linux/mm_types.h                       | 20 ++++++++++----------
 include/linux/page-flags.h                     | 18 +++++++++---------
 include/linux/types.h                          |  2 +-
 kernel/vmcore_info.c                           |  2 +-
 mm/page_alloc.c                                |  2 +-
 mm/slab.h                                      |  2 +-
 mm/util.c                                      |  2 +-
 9 files changed, 28 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
index 404a15f6782c..7663c610fe90 100644
--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@@ -141,7 +141,7 @@ nodemask_t
 The size of a nodemask_t type. Used to compute the number of online
 nodes.
 
-(page, flags|_refcount|mapping|lru|_mapcount|private|compound_order|compound_head)
+(page, flags|_refcount|mapping|lru|_mapcount|private|compound_order|compound_info)
 ----------------------------------------------------------------------------------
 
 User-space tools compute their values based on the offset of these
diff --git a/Documentation/mm/vmemmap_dedup.rst b/Documentation/mm/vmemmap_dedup.rst
index b4a55b6569fa..1863d88d2dcb 100644
--- a/Documentation/mm/vmemmap_dedup.rst
+++ b/Documentation/mm/vmemmap_dedup.rst
@@ -24,7 +24,7 @@ For each base page, there is a corresponding ``struct page``.
 Within the HugeTLB subsystem, only the first 4 ``struct page`` are used to
 contain unique information about a HugeTLB page. ``__NR_USED_SUBPAGE`` provides
 this upper limit. The only 'useful' information in the remaining ``struct page``
-is the compound_head field, and this field is the same for all tail pages.
+is the compound_info field, and this field is the same for all tail pages.
 
 By removing redundant ``struct page`` for HugeTLB pages, memory can be returned
 to the buddy allocator for other uses.
@@ -124,10 +124,10 @@ Here is how things look before optimization::
  |           |
  +-----------+
 
-The value of page->compound_head is the same for all tail pages. The first
+The value of page->compound_info is the same for all tail pages. The first
 page of ``struct page`` (page 0) associated with the HugeTLB page contains the 4
 ``struct page`` necessary to describe the HugeTLB. The only use of the remaining
-pages of ``struct page`` (page 1 to page 7) is to point to page->compound_head.
+pages of ``struct page`` (page 1 to page 7) is to point to page->compound_info.
 Therefore, we can remap pages 1 to 7 to page 0. Only 1 page of ``struct page``
 will be used for each HugeTLB page. This will allow us to free the remaining
 7 pages to the buddy allocator.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3cc8ae722886..7bc82a2b889f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -126,14 +126,14 @@ struct page {
 			atomic_long_t pp_ref_count;
 		};
 		struct {	/* Tail pages of compound page */
-			unsigned long compound_head;	/* Bit zero is set */
+			unsigned long compound_info;	/* Bit zero is set */
 		};
 		struct {	/* ZONE_DEVICE pages */
 			/*
-			 * The first word is used for compound_head or folio
+			 * The first word is used for compound_info or folio
 			 * pgmap
 			 */
-			void *_unused_pgmap_compound_head;
+			void *_unused_pgmap_compound_info;
 			void *zone_device_data;
 			/*
 			 * ZONE_DEVICE private pages are counted as being
@@ -409,7 +409,7 @@ struct folio {
 	/* private: avoid cluttering the output */
 				/* For the Unevictable "LRU list" slot */
 				struct {
-					/* Avoid compound_head */
+					/* Avoid compound_info */
 					void *__filler;
 	/* public: */
 					unsigned int mlock_count;
@@ -510,7 +510,7 @@ struct folio {
 FOLIO_MATCH(flags, flags);
 FOLIO_MATCH(lru, lru);
 FOLIO_MATCH(mapping, mapping);
-FOLIO_MATCH(compound_head, lru);
+FOLIO_MATCH(compound_info, lru);
 FOLIO_MATCH(__folio_index, index);
 FOLIO_MATCH(private, private);
 FOLIO_MATCH(_mapcount, _mapcount);
@@ -529,7 +529,7 @@ FOLIO_MATCH(_last_cpupid, _last_cpupid);
 	static_assert(offsetof(struct folio, fl) ==			\
 			offsetof(struct page, pg) + sizeof(struct page))
 FOLIO_MATCH(flags, _flags_1);
-FOLIO_MATCH(compound_head, _head_1);
+FOLIO_MATCH(compound_info, _head_1);
 FOLIO_MATCH(_mapcount, _mapcount_1);
 FOLIO_MATCH(_refcount, _refcount_1);
 #undef FOLIO_MATCH
@@ -537,13 +537,13 @@ FOLIO_MATCH(_refcount, _refcount_1);
 	static_assert(offsetof(struct folio, fl) ==			\
 			offsetof(struct page, pg) + 2 * sizeof(struct page))
 FOLIO_MATCH(flags, _flags_2);
-FOLIO_MATCH(compound_head, _head_2);
+FOLIO_MATCH(compound_info, _head_2);
 #undef FOLIO_MATCH
 #define FOLIO_MATCH(pg, fl)						\
 	static_assert(offsetof(struct folio, fl) ==			\
 			offsetof(struct page, pg) + 3 * sizeof(struct page))
 FOLIO_MATCH(flags, _flags_3);
-FOLIO_MATCH(compound_head, _head_3);
+FOLIO_MATCH(compound_info, _head_3);
 #undef FOLIO_MATCH
 
 /**
@@ -609,8 +609,8 @@ struct ptdesc {
 #define TABLE_MATCH(pg, pt)						\
 	static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt))
 TABLE_MATCH(flags, pt_flags);
-TABLE_MATCH(compound_head, pt_list);
-TABLE_MATCH(compound_head, _pt_pad_1);
+TABLE_MATCH(compound_info, pt_list);
+TABLE_MATCH(compound_info, _pt_pad_1);
 TABLE_MATCH(mapping, __page_mapping);
 TABLE_MATCH(__folio_index, pt_index);
 TABLE_MATCH(rcu_head, pt_rcu_head);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 7729a4a28b44..265a798295ff 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -213,7 +213,7 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page
 	/*
 	 * Only addresses aligned with PAGE_SIZE of struct page may be fake head
 	 * struct page. The alignment check aims to avoid access the fields (
-	 * e.g. compound_head) of the @page[1]. It can avoid touch a (possibly)
+	 * e.g. compound_info) of the @page[1]. It can avoid touch a (possibly)
 	 * cold cacheline in some cases.
 	 */
 	if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) &&
@@ -223,7 +223,7 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page
 		 * because the @page is a compound page composed with at least
 		 * two contiguous pages.
 		 */
-		unsigned long head = READ_ONCE(page[1].compound_head);
+		unsigned long head = READ_ONCE(page[1].compound_info);
 
 		if (likely(head & 1))
 			return (const struct page *)(head - 1);
@@ -281,7 +281,7 @@ static __always_inline int page_is_fake_head(const struct page *page)
 
 static __always_inline unsigned long _compound_head(const struct page *page)
 {
-	unsigned long head = READ_ONCE(page->compound_head);
+	unsigned long head = READ_ONCE(page->compound_info);
 
 	if (unlikely(head & 1))
 		return head - 1;
@@ -320,13 +320,13 @@ static __always_inline unsigned long _compound_head(const struct page *page)
 
 static __always_inline int PageTail(const struct page *page)
 {
-	return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page);
+	return READ_ONCE(page->compound_info) & 1 || page_is_fake_head(page);
 }
 
 static __always_inline int PageCompound(const struct page *page)
 {
 	return test_bit(PG_head, &page->flags.f) ||
-	       READ_ONCE(page->compound_head) & 1;
+	       READ_ONCE(page->compound_info) & 1;
 }
 
 #define	PAGE_POISON_PATTERN	-1l
@@ -348,7 +348,7 @@ static const unsigned long *const_folio_flags(const struct folio *folio,
 {
 	const struct page *page = &folio->page;
 
-	VM_BUG_ON_PGFLAGS(page->compound_head & 1, page);
+	VM_BUG_ON_PGFLAGS(page->compound_info & 1, page);
 	VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page);
 	return &page[n].flags.f;
 }
@@ -357,7 +357,7 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n)
 {
 	struct page *page = &folio->page;
 
-	VM_BUG_ON_PGFLAGS(page->compound_head & 1, page);
+	VM_BUG_ON_PGFLAGS(page->compound_info & 1, page);
 	VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page);
 	return &page[n].flags.f;
 }
@@ -873,12 +873,12 @@ static inline bool folio_test_large(const struct folio *folio)
 static __always_inline void set_compound_head(struct page *tail,
 		const struct page *head, unsigned int order)
 {
-	WRITE_ONCE(tail->compound_head, (unsigned long)head + 1);
+	WRITE_ONCE(tail->compound_info, (unsigned long)head + 1);
 }
 
 static __always_inline void clear_compound_head(struct page *page)
 {
-	WRITE_ONCE(page->compound_head, 0);
+	WRITE_ONCE(page->compound_info, 0);
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/include/linux/types.h b/include/linux/types.h
index 7e71d260763c..608050dbca6a 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -239,7 +239,7 @@ struct ustat {
  *
  * This guarantee is important for few reasons:
  *  - future call_rcu_lazy() will make use of lower bits in the pointer;
- *  - the structure shares storage space in struct page with @compound_head,
+ *  - the structure shares storage space in struct page with @compound_info,
  *    which encode PageTail() in bit 0. The guarantee is needed to avoid
  *    false-positive PageTail().
  */
diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
index 8d82913223a1..94e4ef75b1b2 100644
--- a/kernel/vmcore_info.c
+++ b/kernel/vmcore_info.c
@@ -198,7 +198,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_OFFSET(page, lru);
 	VMCOREINFO_OFFSET(page, _mapcount);
 	VMCOREINFO_OFFSET(page, private);
-	VMCOREINFO_OFFSET(page, compound_head);
+	VMCOREINFO_OFFSET(page, compound_info);
 	VMCOREINFO_OFFSET(pglist_data, node_zones);
 	VMCOREINFO_OFFSET(pglist_data, nr_zones);
 #ifdef CONFIG_FLATMEM
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 11f9a0525a3a..f4f9a98bb425 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -731,7 +731,7 @@ static inline bool pcp_allowed_order(unsigned int order)
  * The first PAGE_SIZE page is called the "head page" and have PG_head set.
  *
  * The remaining PAGE_SIZE pages are called "tail pages". PageTail() is encoded
- * in bit 0 of page->compound_head. The rest of bits is pointer to head page.
+ * in bit 0 of page->compound_info. The rest of bits is pointer to head page.
  *
  * The first tail page's ->compound_order holds the order of allocation.
  * This usage means that zero-order pages may not be compound.
diff --git a/mm/slab.h b/mm/slab.h
index e9ab292acd22..0653cf5fd93a 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -94,7 +94,7 @@ struct slab {
 #define SLAB_MATCH(pg, sl)						\
 	static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
 SLAB_MATCH(flags, flags);
-SLAB_MATCH(compound_head, slab_cache);	/* Ensure bit 0 is clear */
+SLAB_MATCH(compound_info, slab_cache);	/* Ensure bit 0 is clear */
 SLAB_MATCH(_refcount, __page_refcount);
 #ifdef CONFIG_MEMCG
 SLAB_MATCH(memcg_data, obj_exts);
diff --git a/mm/util.c b/mm/util.c
index 419cb81ab353..52400a3c5eb4 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1276,7 +1276,7 @@ void snapshot_page(struct page_snapshot *ps, const struct page *page)
 again:
 	memset(&ps->folio_snapshot, 0, sizeof(struct folio));
 	memcpy(&ps->page_snapshot, page, sizeof(*page));
-	head = ps->page_snapshot.compound_head;
+	head = ps->page_snapshot.compound_info;
 	if ((head & 1) == 0) {
 		ps->idx = 0;
 		foliop = (struct folio *)&ps->page_snapshot;
-- 
cgit v1.2.3


From 67c79a5af051f57339ecf383d3f67e200741ce20 Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas@kernel.org>
Date: Fri, 27 Feb 2026 19:42:42 +0000
Subject: mm: move set/clear_compound_head() next to compound_head()

Move set_compound_head() and clear_compound_head() to be adjacent to the
compound_head() function in page-flags.h.

These functions encode and decode the same compound_info field, so keeping
them together makes it easier to verify their logic is consistent,
especially when the encoding changes.

Link: https://lkml.kernel.org/r/20260227194302.274384-5-kas@kernel.org
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: David Hildenbrand (arm) <david@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Baoquan He <bhe@redhat.com>
Cc: Christoph Lameter <cl@gentwo.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frank van der Linden <fvdl@google.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 265a798295ff..5c469d38dd69 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -290,6 +290,17 @@ static __always_inline unsigned long _compound_head(const struct page *page)
 
 #define compound_head(page)	((typeof(page))_compound_head(page))
 
+static __always_inline void set_compound_head(struct page *tail,
+		const struct page *head, unsigned int order)
+{
+	WRITE_ONCE(tail->compound_info, (unsigned long)head + 1);
+}
+
+static __always_inline void clear_compound_head(struct page *page)
+{
+	WRITE_ONCE(page->compound_info, 0);
+}
+
 /**
  * page_folio - Converts from page to folio.
  * @p: The page.
@@ -870,17 +881,6 @@ static inline bool folio_test_large(const struct folio *folio)
 	return folio_test_head(folio);
 }
 
-static __always_inline void set_compound_head(struct page *tail,
-		const struct page *head, unsigned int order)
-{
-	WRITE_ONCE(tail->compound_info, (unsigned long)head + 1);
-}
-
-static __always_inline void clear_compound_head(struct page *page)
-{
-	WRITE_ONCE(page->compound_info, 0);
-}
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline void ClearPageCompound(struct page *page)
 {
-- 
cgit v1.2.3


From 476849b0fba4450f5adf22196bcff9c24c673bc4 Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas@kernel.org>
Date: Fri, 27 Feb 2026 19:42:43 +0000
Subject: riscv/mm: align vmemmap to maximal folio size

The upcoming change to the HugeTLB vmemmap optimization (HVO) requires
struct pages of the head page to be naturally aligned with regard to the
folio size.

Align vmemmap to the newly introduced MAX_FOLIO_VMEMMAP_ALIGN.

Link: https://lkml.kernel.org/r/20260227194302.274384-6-kas@kernel.org
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Baoquan He <bhe@redhat.com>
Cc: Christoph Lameter <cl@gentwo.org>
Cc: David Hildenbrand (arm) <david@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frank van der Linden <fvdl@google.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/riscv/mm/init.c   |  3 ++-
 include/linux/mmzone.h | 11 +++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 017bad735d47..b5c50956bb8a 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -63,7 +63,8 @@ phys_addr_t phys_ram_base __ro_after_init;
 EXPORT_SYMBOL(phys_ram_base);
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-#define VMEMMAP_ADDR_ALIGN	(1ULL << SECTION_SIZE_BITS)
+#define VMEMMAP_ADDR_ALIGN	max(1ULL << SECTION_SIZE_BITS, \
+				    MAX_FOLIO_VMEMMAP_ALIGN)
 
 unsigned long vmemmap_start_pfn __ro_after_init;
 EXPORT_SYMBOL(vmemmap_start_pfn);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4c481ec77da9..0bef68e41f19 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -92,6 +92,17 @@
 
 #define MAX_FOLIO_NR_PAGES	(1UL << MAX_FOLIO_ORDER)
 
+/*
+ * HugeTLB Vmemmap Optimization (HVO) requires struct pages of the head page to
+ * be naturally aligned with regard to the folio size.
+ *
+ * HVO which is only active if the size of struct page is a power of 2.
+ */
+#define MAX_FOLIO_VMEMMAP_ALIGN \
+	(IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP) && \
+	 is_power_of_2(sizeof(struct page)) ? \
+	 MAX_FOLIO_NR_PAGES * sizeof(struct page) : 0)
+
 enum migratetype {
 	MIGRATE_UNMOVABLE,
 	MIGRATE_MOVABLE,
-- 
cgit v1.2.3


From 8c846c879e226c312c2c7a7bc1e323779903530f Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas@kernel.org>
Date: Fri, 27 Feb 2026 19:42:45 +0000
Subject: mm: rework compound_head() for power-of-2 sizeof(struct page)

For tail pages, the kernel uses the 'compound_info' field to get to the
head page.  The bit 0 of the field indicates whether the page is a tail
page, and if set, the remaining bits represent a pointer to the head page.

For cases when size of struct page is power-of-2, change the encoding of
compound_info to store a mask that can be applied to the virtual address
of the tail page in order to access the head page.  It is possible because
struct page of the head page is naturally aligned with regards to order of
the page.

The significant impact of this modification is that all tail pages of the
same order will now have identical 'compound_info', regardless of the
compound page they are associated with.  This paves the way for
eliminating fake heads.

The HugeTLB Vmemmap Optimization (HVO) creates fake heads and it is only
applied when the sizeof(struct page) is power-of-2.  Having identical tail
pages allows the same page to be mapped into the vmemmap of all pages,
maintaining memory savings without fake heads.

If sizeof(struct page) is not power-of-2, there is no functional changes.

Limit mask usage to HugeTLB vmemmap optimization (HVO) where it makes a
difference.  The approach with mask would work in the wider set of
conditions, but it requires validating that struct pages are naturally
aligned for all orders up to the MAX_FOLIO_ORDER, which can be tricky.

Link: https://lkml.kernel.org/r/20260227194302.274384-8-kas@kernel.org
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Acked-by: Usama Arif <usamaarif642@gmail.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Baoquan He <bhe@redhat.com>
Cc: Christoph Lameter <cl@gentwo.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frank van der Linden <fvdl@google.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: WANG Xuerui <kernel@xen0n.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 81 +++++++++++++++++++++++++++++++++++++++++-----
 mm/slab.h                  | 16 ++++++---
 mm/util.c                  | 16 ++++++---
 3 files changed, 97 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5c469d38dd69..43876b108f0a 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -198,6 +198,29 @@ enum pageflags {
 
 #ifndef __GENERATING_BOUNDS_H
 
+/*
+ * For tail pages, if the size of struct page is power-of-2 ->compound_info
+ * encodes the mask that converts the address of the tail page address to
+ * the head page address.
+ *
+ * Otherwise, ->compound_info has direct pointer to head pages.
+ */
+static __always_inline bool compound_info_has_mask(void)
+{
+	/*
+	 * Limit mask usage to HugeTLB vmemmap optimization (HVO) where it
+	 * makes a difference.
+	 *
+	 * The approach with mask would work in the wider set of conditions,
+	 * but it requires validating that struct pages are naturally aligned
+	 * for all orders up to the MAX_FOLIO_ORDER, which can be tricky.
+	 */
+	if (!IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP))
+		return false;
+
+	return is_power_of_2(sizeof(struct page));
+}
+
 #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key);
 
@@ -207,6 +230,10 @@ DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key);
  */
 static __always_inline const struct page *page_fixed_fake_head(const struct page *page)
 {
+	/* Fake heads only exists if compound_info_has_mask() is true */
+	if (!compound_info_has_mask())
+		return page;
+
 	if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key))
 		return page;
 
@@ -223,10 +250,14 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page
 		 * because the @page is a compound page composed with at least
 		 * two contiguous pages.
 		 */
-		unsigned long head = READ_ONCE(page[1].compound_info);
+		unsigned long info = READ_ONCE(page[1].compound_info);
+
+		/* See set_compound_head() */
+		if (likely(info & 1)) {
+			unsigned long p = (unsigned long)page;
 
-		if (likely(head & 1))
-			return (const struct page *)(head - 1);
+			return (const struct page *)(p & info);
+		}
 	}
 	return page;
 }
@@ -281,11 +312,26 @@ static __always_inline int page_is_fake_head(const struct page *page)
 
 static __always_inline unsigned long _compound_head(const struct page *page)
 {
-	unsigned long head = READ_ONCE(page->compound_info);
+	unsigned long info = READ_ONCE(page->compound_info);
 
-	if (unlikely(head & 1))
-		return head - 1;
-	return (unsigned long)page_fixed_fake_head(page);
+	/* Bit 0 encodes PageTail() */
+	if (!(info & 1))
+		return (unsigned long)page_fixed_fake_head(page);
+
+	/*
+	 * If compound_info_has_mask() is false, the rest of compound_info is
+	 * the pointer to the head page.
+	 */
+	if (!compound_info_has_mask())
+		return info - 1;
+
+	/*
+	 * If compound_info_has_mask() is true the rest of the info encodes
+	 * the mask that converts the address of the tail page to the head page.
+	 *
+	 * No need to clear bit 0 in the mask as 'page' always has it clear.
+	 */
+	return (unsigned long)page & info;
 }
 
 #define compound_head(page)	((typeof(page))_compound_head(page))
@@ -293,7 +339,26 @@ static __always_inline unsigned long _compound_head(const struct page *page)
 static __always_inline void set_compound_head(struct page *tail,
 		const struct page *head, unsigned int order)
 {
-	WRITE_ONCE(tail->compound_info, (unsigned long)head + 1);
+	unsigned int shift;
+	unsigned long mask;
+
+	if (!compound_info_has_mask()) {
+		WRITE_ONCE(tail->compound_info, (unsigned long)head | 1);
+		return;
+	}
+
+	/*
+	 * If the size of struct page is power-of-2, bits [shift:0] of the
+	 * virtual address of compound head are zero.
+	 *
+	 * Calculate mask that can be applied to the virtual address of
+	 * the tail page to get address of the head page.
+	 */
+	shift = order + order_base_2(sizeof(struct page));
+	mask = GENMASK(BITS_PER_LONG - 1, shift);
+
+	/* Bit 0 encodes PageTail() */
+	WRITE_ONCE(tail->compound_info, mask | 1);
 }
 
 static __always_inline void clear_compound_head(struct page *page)
diff --git a/mm/slab.h b/mm/slab.h
index 0653cf5fd93a..ccbdbed18c05 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -131,11 +131,19 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(struct freelist
  */
 static inline struct slab *page_slab(const struct page *page)
 {
-	unsigned long head;
+	unsigned long info;
+
+	info = READ_ONCE(page->compound_info);
+	if (info & 1) {
+		/* See compound_head() */
+		if (compound_info_has_mask()) {
+			unsigned long p = (unsigned long)page;
+			page = (struct page *)(p & info);
+		} else {
+			page = (struct page *)(info - 1);
+		}
+	}
 
-	head = READ_ONCE(page->compound_head);
-	if (head & 1)
-		page = (struct page *)(head - 1);
 	if (data_race(page->page_type >> 24) != PGTY_slab)
 		page = NULL;
 
diff --git a/mm/util.c b/mm/util.c
index 52400a3c5eb4..ce7ae80047cf 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1266,7 +1266,7 @@ static void set_ps_flags(struct page_snapshot *ps, const struct folio *folio,
  */
 void snapshot_page(struct page_snapshot *ps, const struct page *page)
 {
-	unsigned long head, nr_pages = 1;
+	unsigned long info, nr_pages = 1;
 	struct folio *foliop;
 	int loops = 5;
 
@@ -1276,8 +1276,8 @@ void snapshot_page(struct page_snapshot *ps, const struct page *page)
 again:
 	memset(&ps->folio_snapshot, 0, sizeof(struct folio));
 	memcpy(&ps->page_snapshot, page, sizeof(*page));
-	head = ps->page_snapshot.compound_info;
-	if ((head & 1) == 0) {
+	info = ps->page_snapshot.compound_info;
+	if (!(info & 1)) {
 		ps->idx = 0;
 		foliop = (struct folio *)&ps->page_snapshot;
 		if (!folio_test_large(foliop)) {
@@ -1288,7 +1288,15 @@ again:
 		}
 		foliop = (struct folio *)page;
 	} else {
-		foliop = (struct folio *)(head - 1);
+		/* See compound_head() */
+		if (compound_info_has_mask()) {
+			unsigned long p = (unsigned long)page;
+
+			foliop = (struct folio *)(p & info);
+		} else {
+			foliop = (struct folio *)(info - 1);
+		}
+
 		ps->idx = folio_page_idx(foliop, page);
 	}
 
-- 
cgit v1.2.3


From 209e6d9eb13aaf1b6e0fc6f76afc00d055e5ba12 Mon Sep 17 00:00:00 2001
From: "Kiryl Shutsemau (Meta)" <kas@kernel.org>
Date: Fri, 27 Feb 2026 19:42:47 +0000
Subject: mm/hugetlb: defer vmemmap population for bootmem hugepages

Currently, the vmemmap for bootmem-allocated gigantic pages is populated
early in hugetlb_vmemmap_init_early().  However, the zone information is
only available after zones are initialized.  If it is later discovered
that a page spans multiple zones, the HVO mapping must be undone and
replaced with a normal mapping using vmemmap_undo_hvo().

Defer the actual vmemmap population to hugetlb_vmemmap_init_late().  At
this stage, zones are already initialized, so it can be checked if the
page is valid for HVO before deciding how to populate the vmemmap.

This allows us to remove vmemmap_undo_hvo() and the complex logic required
to rollback HVO mappings.

In hugetlb_vmemmap_init_late(), if HVO population fails or if the zones
are invalid, fall back to a normal vmemmap population.

Postponing population until hugetlb_vmemmap_init_late() also makes zone
information available from within vmemmap_populate_hvo().

Link: https://lkml.kernel.org/r/20260227194302.274384-10-kas@kernel.org
Signed-off-by: Kiryl Shutsemau (Meta) <kas@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Baoquan He <bhe@redhat.com>
Cc: Christoph Lameter <cl@gentwo.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frank van der Linden <fvdl@google.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h   |  2 --
 mm/hugetlb_vmemmap.c | 37 ++++++++++++++++++------------------
 mm/sparse-vmemmap.c  | 53 ----------------------------------------------------
 3 files changed, 18 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4e999c21d89a..d7e53532a109 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4481,8 +4481,6 @@ int vmemmap_populate(unsigned long start, unsigned long end, int node,
 		struct vmem_altmap *altmap);
 int vmemmap_populate_hvo(unsigned long start, unsigned long end, int node,
 			 unsigned long headsize);
-int vmemmap_undo_hvo(unsigned long start, unsigned long end, int node,
-		     unsigned long headsize);
 void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node,
 			  unsigned long headsize);
 void vmemmap_populate_print_last(void);
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index a9280259e12a..935ec5829be9 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -790,7 +790,6 @@ void __init hugetlb_vmemmap_init_early(int nid)
 {
 	unsigned long psize, paddr, section_size;
 	unsigned long ns, i, pnum, pfn, nr_pages;
-	unsigned long start, end;
 	struct huge_bootmem_page *m = NULL;
 	void *map;
 
@@ -808,14 +807,6 @@ void __init hugetlb_vmemmap_init_early(int nid)
 		paddr = virt_to_phys(m);
 		pfn = PHYS_PFN(paddr);
 		map = pfn_to_page(pfn);
-		start = (unsigned long)map;
-		end = start + nr_pages * sizeof(struct page);
-
-		if (vmemmap_populate_hvo(start, end, nid,
-					HUGETLB_VMEMMAP_RESERVE_SIZE) < 0)
-			continue;
-
-		memmap_boot_pages_add(HUGETLB_VMEMMAP_RESERVE_SIZE / PAGE_SIZE);
 
 		pnum = pfn_to_section_nr(pfn);
 		ns = psize / section_size;
@@ -850,28 +841,36 @@ void __init hugetlb_vmemmap_init_late(int nid)
 		h = m->hstate;
 		pfn = PHYS_PFN(phys);
 		nr_pages = pages_per_huge_page(h);
+		map = pfn_to_page(pfn);
+		start = (unsigned long)map;
+		end = start + nr_pages * sizeof(struct page);
 
 		if (!hugetlb_bootmem_page_zones_valid(nid, m)) {
 			/*
 			 * Oops, the hugetlb page spans multiple zones.
-			 * Remove it from the list, and undo HVO.
+			 * Remove it from the list, and populate it normally.
 			 */
 			list_del(&m->list);
 
-			map = pfn_to_page(pfn);
-
-			start = (unsigned long)map;
-			end = start + nr_pages * sizeof(struct page);
-
-			vmemmap_undo_hvo(start, end, nid,
-					 HUGETLB_VMEMMAP_RESERVE_SIZE);
-			nr_mmap = end - start - HUGETLB_VMEMMAP_RESERVE_SIZE;
+			vmemmap_populate(start, end, nid, NULL);
+			nr_mmap = end - start;
 			memmap_boot_pages_add(DIV_ROUND_UP(nr_mmap, PAGE_SIZE));
 
 			memblock_phys_free(phys, huge_page_size(h));
 			continue;
-		} else
+		}
+
+		if (vmemmap_populate_hvo(start, end, nid,
+					 HUGETLB_VMEMMAP_RESERVE_SIZE) < 0) {
+			/* Fallback if HVO population fails */
+			vmemmap_populate(start, end, nid, NULL);
+			nr_mmap = end - start;
+		} else {
 			m->flags |= HUGE_BOOTMEM_ZONES_VALID;
+			nr_mmap = HUGETLB_VMEMMAP_RESERVE_SIZE;
+		}
+
+		memmap_boot_pages_add(DIV_ROUND_UP(nr_mmap, PAGE_SIZE));
 	}
 }
 #endif
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 37522d6cb398..032a81450838 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -302,59 +302,6 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
 	return vmemmap_populate_range(start, end, node, altmap, -1, 0);
 }
 
-/*
- * Undo populate_hvo, and replace it with a normal base page mapping.
- * Used in memory init in case a HVO mapping needs to be undone.
- *
- * This can happen when it is discovered that a memblock allocated
- * hugetlb page spans multiple zones, which can only be verified
- * after zones have been initialized.
- *
- * We know that:
- * 1) The first @headsize / PAGE_SIZE vmemmap pages were individually
- *    allocated through memblock, and mapped.
- *
- * 2) The rest of the vmemmap pages are mirrors of the last head page.
- */
-int __meminit vmemmap_undo_hvo(unsigned long addr, unsigned long end,
-				      int node, unsigned long headsize)
-{
-	unsigned long maddr, pfn;
-	pte_t *pte;
-	int headpages;
-
-	/*
-	 * Should only be called early in boot, so nothing will
-	 * be accessing these page structures.
-	 */
-	WARN_ON(!early_boot_irqs_disabled);
-
-	headpages = headsize >> PAGE_SHIFT;
-
-	/*
-	 * Clear mirrored mappings for tail page structs.
-	 */
-	for (maddr = addr + headsize; maddr < end; maddr += PAGE_SIZE) {
-		pte = virt_to_kpte(maddr);
-		pte_clear(&init_mm, maddr, pte);
-	}
-
-	/*
-	 * Clear and free mappings for head page and first tail page
-	 * structs.
-	 */
-	for (maddr = addr; headpages-- > 0; maddr += PAGE_SIZE) {
-		pte = virt_to_kpte(maddr);
-		pfn = pte_pfn(ptep_get(pte));
-		pte_clear(&init_mm, maddr, pte);
-		memblock_phys_free(PFN_PHYS(pfn), PAGE_SIZE);
-	}
-
-	flush_tlb_kernel_range(addr, end);
-
-	return vmemmap_populate(addr, end, node, NULL);
-}
-
 /*
  * Write protect the mirrored tail page structs for HVO. This will be
  * called from the hugetlb code when gathering and initializing the
-- 
cgit v1.2.3


From 622026e87c4019e609010811757e31193cc23847 Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas@kernel.org>
Date: Fri, 27 Feb 2026 19:42:50 +0000
Subject: mm/hugetlb: remove fake head pages

HugeTLB Vmemmap Optimization (HVO) reduces memory usage by freeing most
vmemmap pages for huge pages and remapping the freed range to a single
page containing the struct page metadata.

With the new mask-based compound_info encoding (for power-of-2 struct page
sizes), all tail pages of the same order are now identical regardless of
which compound page they belong to.  This means the tail pages can be
truly shared without fake heads.

Allocate a single page of initialized tail struct pages per zone per order
in the vmemmap_tails[] array in struct zone.  All huge pages of that order
in the zone share this tail page, mapped read-only into their vmemmap.
The head page remains unique per huge page.

Redefine MAX_FOLIO_ORDER using ilog2().  The define has to produce a
compile-constant as it is used to specify vmemmap_tail array size.  For
some reason, compiler is not able to solve get_order() at compile-time,
but ilog2() works.

Avoid PUD_ORDER to define MAX_FOLIO_ORDER as it adds dependency to
<linux/pgtable.h> which generates hard-to-break include loop.

This eliminates fake heads while maintaining the same memory savings, and
simplifies compound_head() by removing fake head detection.

Link: https://lkml.kernel.org/r/20260227194302.274384-13-kas@kernel.org
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Reviewed-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Baoquan He <bhe@redhat.com>
Cc: Christoph Lameter <cl@gentwo.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frank van der Linden <fvdl@google.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h     |  3 ++-
 include/linux/mmzone.h | 19 +++++++++++--
 mm/hugetlb_vmemmap.c   | 73 +++++++++++++++++++++++++++++++++++++++++++++++---
 mm/internal.h          |  9 +++++++
 mm/sparse-vmemmap.c    | 55 +++++++++++++++++++++++++++++++------
 5 files changed, 145 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d7e53532a109..19619e5efeba 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4479,7 +4479,8 @@ int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
 			       int node, struct vmem_altmap *altmap);
 int vmemmap_populate(unsigned long start, unsigned long end, int node,
 		struct vmem_altmap *altmap);
-int vmemmap_populate_hvo(unsigned long start, unsigned long end, int node,
+int vmemmap_populate_hvo(unsigned long start, unsigned long end,
+			 unsigned int order, struct zone *zone,
 			 unsigned long headsize);
 void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node,
 			  unsigned long headsize);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0bef68e41f19..5c3ae0348754 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -81,13 +81,17 @@
  * currently expect (see CONFIG_HAVE_GIGANTIC_FOLIOS): with hugetlb, we expect
  * no folios larger than 16 GiB on 64bit and 1 GiB on 32bit.
  */
-#define MAX_FOLIO_ORDER		get_order(IS_ENABLED(CONFIG_64BIT) ? SZ_16G : SZ_1G)
+#ifdef CONFIG_64BIT
+#define MAX_FOLIO_ORDER		(ilog2(SZ_16G) - PAGE_SHIFT)
+#else
+#define MAX_FOLIO_ORDER		(ilog2(SZ_1G) - PAGE_SHIFT)
+#endif
 #else
 /*
  * Without hugetlb, gigantic folios that are bigger than a single PUD are
  * currently impossible.
  */
-#define MAX_FOLIO_ORDER		PUD_ORDER
+#define MAX_FOLIO_ORDER		(PUD_SHIFT - PAGE_SHIFT)
 #endif
 
 #define MAX_FOLIO_NR_PAGES	(1UL << MAX_FOLIO_ORDER)
@@ -103,6 +107,14 @@
 	 is_power_of_2(sizeof(struct page)) ? \
 	 MAX_FOLIO_NR_PAGES * sizeof(struct page) : 0)
 
+/*
+ * vmemmap optimization (like HVO) is only possible for page orders that fill
+ * two or more pages with struct pages.
+ */
+#define VMEMMAP_TAIL_MIN_ORDER (ilog2(2 * PAGE_SIZE / sizeof(struct page)))
+#define __NR_VMEMMAP_TAILS (MAX_FOLIO_ORDER - VMEMMAP_TAIL_MIN_ORDER + 1)
+#define NR_VMEMMAP_TAILS (__NR_VMEMMAP_TAILS > 0 ? __NR_VMEMMAP_TAILS : 0)
+
 enum migratetype {
 	MIGRATE_UNMOVABLE,
 	MIGRATE_MOVABLE,
@@ -1113,6 +1125,9 @@ struct zone {
 	/* Zone statistics */
 	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
 	atomic_long_t		vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
+#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
+	struct page *vmemmap_tails[NR_VMEMMAP_TAILS];
+#endif
 } ____cacheline_internodealigned_in_smp;
 
 enum pgdat_flags {
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 3628fb5b2a28..92330f172eb7 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -19,6 +19,7 @@
 
 #include <asm/tlbflush.h>
 #include "hugetlb_vmemmap.h"
+#include "internal.h"
 
 /**
  * struct vmemmap_remap_walk - walk vmemmap page table
@@ -505,6 +506,32 @@ static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio *
 	return true;
 }
 
+static struct page *vmemmap_get_tail(unsigned int order, struct zone *zone)
+{
+	const unsigned int idx = order - VMEMMAP_TAIL_MIN_ORDER;
+	struct page *tail, *p;
+	int node = zone_to_nid(zone);
+
+	tail = READ_ONCE(zone->vmemmap_tails[idx]);
+	if (likely(tail))
+		return tail;
+
+	tail = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+	if (!tail)
+		return NULL;
+
+	p = page_to_virt(tail);
+	for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++)
+		init_compound_tail(p + i, NULL, order, zone);
+
+	if (cmpxchg(&zone->vmemmap_tails[idx], NULL, tail)) {
+		__free_page(tail);
+		tail = READ_ONCE(zone->vmemmap_tails[idx]);
+	}
+
+	return tail;
+}
+
 static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
 					    struct folio *folio,
 					    struct list_head *vmemmap_pages,
@@ -520,6 +547,11 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
 	if (!vmemmap_should_optimize_folio(h, folio))
 		return ret;
 
+	nid = folio_nid(folio);
+	vmemmap_tail = vmemmap_get_tail(h->order, folio_zone(folio));
+	if (!vmemmap_tail)
+		return -ENOMEM;
+
 	static_branch_inc(&hugetlb_optimize_vmemmap_key);
 
 	if (flags & VMEMMAP_SYNCHRONIZE_RCU)
@@ -537,7 +569,6 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
 	 */
 	folio_set_hugetlb_vmemmap_optimized(folio);
 
-	nid = folio_nid(folio);
 	vmemmap_head = alloc_pages_node(nid, GFP_KERNEL, 0);
 	if (!vmemmap_head) {
 		ret = -ENOMEM;
@@ -548,7 +579,6 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
 	list_add(&vmemmap_head->lru, vmemmap_pages);
 	memmap_pages_add(1);
 
-	vmemmap_tail	= vmemmap_head;
 	vmemmap_start	= (unsigned long)&folio->page;
 	vmemmap_end	= vmemmap_start + hugetlb_vmemmap_size(h);
 
@@ -776,11 +806,26 @@ void __init hugetlb_vmemmap_init_early(int nid)
 	}
 }
 
+static struct zone *pfn_to_zone(unsigned nid, unsigned long pfn)
+{
+	struct zone *zone;
+	enum zone_type zone_type;
+
+	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
+		zone = &NODE_DATA(nid)->node_zones[zone_type];
+		if (zone_spans_pfn(zone, pfn))
+			return zone;
+	}
+
+	return NULL;
+}
+
 void __init hugetlb_vmemmap_init_late(int nid)
 {
 	struct huge_bootmem_page *m, *tm;
 	unsigned long phys, nr_pages, start, end;
 	unsigned long pfn, nr_mmap;
+	struct zone *zone = NULL;
 	struct hstate *h;
 	void *map;
 
@@ -814,7 +859,12 @@ void __init hugetlb_vmemmap_init_late(int nid)
 			continue;
 		}
 
-		if (vmemmap_populate_hvo(start, end, nid,
+		if (!zone || !zone_spans_pfn(zone, pfn))
+			zone = pfn_to_zone(nid, pfn);
+		if (WARN_ON_ONCE(!zone))
+			continue;
+
+		if (vmemmap_populate_hvo(start, end, huge_page_order(h), zone,
 					 HUGETLB_VMEMMAP_RESERVE_SIZE) < 0) {
 			/* Fallback if HVO population fails */
 			vmemmap_populate(start, end, nid, NULL);
@@ -842,10 +892,27 @@ static const struct ctl_table hugetlb_vmemmap_sysctls[] = {
 static int __init hugetlb_vmemmap_init(void)
 {
 	const struct hstate *h;
+	struct zone *zone;
 
 	/* HUGETLB_VMEMMAP_RESERVE_SIZE should cover all used struct pages */
 	BUILD_BUG_ON(__NR_USED_SUBPAGE > HUGETLB_VMEMMAP_RESERVE_PAGES);
 
+	for_each_zone(zone) {
+		for (int i = 0; i < NR_VMEMMAP_TAILS; i++) {
+			struct page *tail, *p;
+			unsigned int order;
+
+			tail = zone->vmemmap_tails[i];
+			if (!tail)
+				continue;
+
+			order = i + VMEMMAP_TAIL_MIN_ORDER;
+			p = page_to_virt(tail);
+			for (int j = 0; j < PAGE_SIZE / sizeof(struct page); j++)
+				init_compound_tail(p + j, NULL, order, zone);
+		}
+	}
+
 	for_each_hstate(h) {
 		if (hugetlb_vmemmap_optimizable(h)) {
 			register_sysctl_init("vm", hugetlb_vmemmap_sysctls);
diff --git a/mm/internal.h b/mm/internal.h
index 9cfbd8e41914..84167b0570c9 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -905,6 +905,15 @@ static inline void prep_compound_tail(struct page *tail,
 	set_page_private(tail, 0);
 }
 
+static inline void init_compound_tail(struct page *tail,
+		const struct page *head, unsigned int order, struct zone *zone)
+{
+	atomic_set(&tail->_mapcount, -1);
+	set_page_node(tail, zone_to_nid(zone));
+	set_page_zone(tail, zone_idx(zone));
+	prep_compound_tail(tail, head, order);
+}
+
 void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags);
 extern bool free_pages_prepare(struct page *page, unsigned int order);
 
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 032a81450838..842ed2f0bce6 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -325,16 +325,54 @@ void vmemmap_wrprotect_hvo(unsigned long addr, unsigned long end,
 	}
 }
 
-/*
- * Populate vmemmap pages HVO-style. The first page contains the head
- * page and needed tail pages, the other ones are mirrors of the first
- * page.
- */
+#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
+static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone *zone)
+{
+	struct page *p, *tail;
+	unsigned int idx;
+	int node = zone_to_nid(zone);
+
+	if (WARN_ON_ONCE(order < VMEMMAP_TAIL_MIN_ORDER))
+		return NULL;
+	if (WARN_ON_ONCE(order > MAX_FOLIO_ORDER))
+		return NULL;
+
+	idx = order - VMEMMAP_TAIL_MIN_ORDER;
+	tail = zone->vmemmap_tails[idx];
+	if (tail)
+		return tail;
+
+	/*
+	 * Only allocate the page, but do not initialize it.
+	 *
+	 * Any initialization done here will be overwritten by memmap_init().
+	 *
+	 * hugetlb_vmemmap_init() will take care of initialization after
+	 * memmap_init().
+	 */
+
+	p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
+	if (!p)
+		return NULL;
+
+	tail = virt_to_page(p);
+	zone->vmemmap_tails[idx] = tail;
+
+	return tail;
+}
+
 int __meminit vmemmap_populate_hvo(unsigned long addr, unsigned long end,
-				       int node, unsigned long headsize)
+				       unsigned int order, struct zone *zone,
+				       unsigned long headsize)
 {
-	pte_t *pte;
 	unsigned long maddr;
+	struct page *tail;
+	pte_t *pte;
+	int node = zone_to_nid(zone);
+
+	tail = vmemmap_get_tail(order, zone);
+	if (!tail)
+		return -ENOMEM;
 
 	for (maddr = addr; maddr < addr + headsize; maddr += PAGE_SIZE) {
 		pte = vmemmap_populate_address(maddr, node, NULL, -1, 0);
@@ -346,8 +384,9 @@ int __meminit vmemmap_populate_hvo(unsigned long addr, unsigned long end,
 	 * Reuse the last page struct page mapped above for the rest.
 	 */
 	return vmemmap_populate_range(maddr, end, node, NULL,
-					pte_pfn(ptep_get(pte)), 0);
+				      page_to_pfn(tail), 0);
 }
+#endif
 
 void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
 				      unsigned long addr, unsigned long next)
-- 
cgit v1.2.3


From 32c440d67e6cd96a715007d0e62eb970b0c49abc Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas@kernel.org>
Date: Fri, 27 Feb 2026 19:42:51 +0000
Subject: mm: drop fake head checks

With fake head pages eliminated in the previous commit, remove the
supporting infrastructure:

  - page_fixed_fake_head(): no longer needed to detect fake heads;
  - page_is_fake_head(): no longer needed;
  - page_count_writable(): no longer needed for RCU protection;
  - RCU read_lock in page_ref_add_unless(): no longer needed;

This substantially simplifies compound_head() and page_ref_add_unless(),
removing both branches and RCU overhead from these hot paths.

RCU was required to serialize allocation of hugetlb page against
get_page_unless_zero() and prevent writing to read-only fake head.  It is
redundant without fake heads.

See bd225530a4c7 ("mm/hugetlb_vmemmap: fix race with speculative PFN
walkers") for more details.

synchronize_rcu() in mm/hugetlb_vmemmap.c will be removed by a separate
patch.

Link: https://lkml.kernel.org/r/20260227194302.274384-14-kas@kernel.org
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Baoquan He <bhe@redhat.com>
Cc: Christoph Lameter <cl@gentwo.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frank van der Linden <fvdl@google.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 93 ++--------------------------------------------
 include/linux/page_ref.h   |  8 +---
 2 files changed, 4 insertions(+), 97 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 43876b108f0a..b8eef2181598 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -221,102 +221,15 @@ static __always_inline bool compound_info_has_mask(void)
 	return is_power_of_2(sizeof(struct page));
 }
 
-#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key);
 
-/*
- * Return the real head page struct iff the @page is a fake head page, otherwise
- * return the @page itself. See Documentation/mm/vmemmap_dedup.rst.
- */
-static __always_inline const struct page *page_fixed_fake_head(const struct page *page)
-{
-	/* Fake heads only exists if compound_info_has_mask() is true */
-	if (!compound_info_has_mask())
-		return page;
-
-	if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key))
-		return page;
-
-	/*
-	 * Only addresses aligned with PAGE_SIZE of struct page may be fake head
-	 * struct page. The alignment check aims to avoid access the fields (
-	 * e.g. compound_info) of the @page[1]. It can avoid touch a (possibly)
-	 * cold cacheline in some cases.
-	 */
-	if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) &&
-	    test_bit(PG_head, &page->flags.f)) {
-		/*
-		 * We can safely access the field of the @page[1] with PG_head
-		 * because the @page is a compound page composed with at least
-		 * two contiguous pages.
-		 */
-		unsigned long info = READ_ONCE(page[1].compound_info);
-
-		/* See set_compound_head() */
-		if (likely(info & 1)) {
-			unsigned long p = (unsigned long)page;
-
-			return (const struct page *)(p & info);
-		}
-	}
-	return page;
-}
-
-static __always_inline bool page_count_writable(const struct page *page, int u)
-{
-	if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key))
-		return true;
-
-	/*
-	 * The refcount check is ordered before the fake-head check to prevent
-	 * the following race:
-	 *   CPU 1 (HVO)                     CPU 2 (speculative PFN walker)
-	 *
-	 *   page_ref_freeze()
-	 *   synchronize_rcu()
-	 *                                   rcu_read_lock()
-	 *                                   page_is_fake_head() is false
-	 *   vmemmap_remap_pte()
-	 *   XXX: struct page[] becomes r/o
-	 *
-	 *   page_ref_unfreeze()
-	 *                                   page_ref_count() is not zero
-	 *
-	 *                                   atomic_add_unless(&page->_refcount)
-	 *                                   XXX: try to modify r/o struct page[]
-	 *
-	 * The refcount check also prevents modification attempts to other (r/o)
-	 * tail pages that are not fake heads.
-	 */
-	if (atomic_read_acquire(&page->_refcount) == u)
-		return false;
-
-	return page_fixed_fake_head(page) == page;
-}
-#else
-static inline const struct page *page_fixed_fake_head(const struct page *page)
-{
-	return page;
-}
-
-static inline bool page_count_writable(const struct page *page, int u)
-{
-	return true;
-}
-#endif
-
-static __always_inline int page_is_fake_head(const struct page *page)
-{
-	return page_fixed_fake_head(page) != page;
-}
-
 static __always_inline unsigned long _compound_head(const struct page *page)
 {
 	unsigned long info = READ_ONCE(page->compound_info);
 
 	/* Bit 0 encodes PageTail() */
 	if (!(info & 1))
-		return (unsigned long)page_fixed_fake_head(page);
+		return (unsigned long)page;
 
 	/*
 	 * If compound_info_has_mask() is false, the rest of compound_info is
@@ -396,7 +309,7 @@ static __always_inline void clear_compound_head(struct page *page)
 
 static __always_inline int PageTail(const struct page *page)
 {
-	return READ_ONCE(page->compound_info) & 1 || page_is_fake_head(page);
+	return READ_ONCE(page->compound_info) & 1;
 }
 
 static __always_inline int PageCompound(const struct page *page)
@@ -928,7 +841,7 @@ static __always_inline bool folio_test_head(const struct folio *folio)
 static __always_inline int PageHead(const struct page *page)
 {
 	PF_POISONED_CHECK(page);
-	return test_bit(PG_head, &page->flags.f) && !page_is_fake_head(page);
+	return test_bit(PG_head, &page->flags.f);
 }
 
 __SETPAGEFLAG(Head, head, PF_ANY)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 544150d1d5fd..490d0ad6e56d 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -230,13 +230,7 @@ static inline int folio_ref_dec_return(struct folio *folio)
 
 static inline bool page_ref_add_unless(struct page *page, int nr, int u)
 {
-	bool ret = false;
-
-	rcu_read_lock();
-	/* avoid writing to the vmemmap area being remapped */
-	if (page_count_writable(page, u))
-		ret = atomic_add_unless(&page->_refcount, nr, u);
-	rcu_read_unlock();
+	bool ret = atomic_add_unless(&page->_refcount, nr, u);
 
 	if (page_ref_tracepoint_active(page_ref_mod_unless))
 		__page_ref_mod_unless(page, nr, ret);
-- 
cgit v1.2.3


From da3e2d1ca43de56a83a806237b6be7e91cf07052 Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas@kernel.org>
Date: Fri, 27 Feb 2026 19:42:53 +0000
Subject: mm/hugetlb: remove hugetlb_optimize_vmemmap_key static key

The hugetlb_optimize_vmemmap_key static key was used to guard fake head
detection in compound_head() and related functions.  It allowed skipping
the fake head checks entirely when HVO was not in use.

With fake heads eliminated and the detection code removed, the static key
serves no purpose.  Remove its definition and all increment/decrement
calls.

Link: https://lkml.kernel.org/r/20260227194302.274384-16-kas@kernel.org
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Baoquan He <bhe@redhat.com>
Cc: Christoph Lameter <cl@gentwo.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frank van der Linden <fvdl@google.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h |  2 --
 mm/hugetlb_vmemmap.c       | 14 ++------------
 2 files changed, 2 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b8eef2181598..f361bd6c814c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -221,8 +221,6 @@ static __always_inline bool compound_info_has_mask(void)
 	return is_power_of_2(sizeof(struct page));
 }
 
-DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key);
-
 static __always_inline unsigned long _compound_head(const struct page *page)
 {
 	unsigned long info = READ_ONCE(page->compound_info);
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index fd1d5d5d12b4..4a077d231d3a 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -385,9 +385,6 @@ static int vmemmap_remap_alloc(unsigned long start, unsigned long end,
 	return vmemmap_remap_range(start, end, &walk);
 }
 
-DEFINE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key);
-EXPORT_SYMBOL(hugetlb_optimize_vmemmap_key);
-
 static bool vmemmap_optimize_enabled = IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON);
 static int __init hugetlb_vmemmap_optimize_param(char *buf)
 {
@@ -419,10 +416,8 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
 	 * discarded vmemmap pages must be allocated and remapping.
 	 */
 	ret = vmemmap_remap_alloc(vmemmap_start, vmemmap_end, flags);
-	if (!ret) {
+	if (!ret)
 		folio_clear_hugetlb_vmemmap_optimized(folio);
-		static_branch_dec(&hugetlb_optimize_vmemmap_key);
-	}
 
 	return ret;
 }
@@ -544,8 +539,6 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
 	if (!vmemmap_tail)
 		return -ENOMEM;
 
-	static_branch_inc(&hugetlb_optimize_vmemmap_key);
-
 	/*
 	 * Very Subtle
 	 * If VMEMMAP_REMAP_NO_TLB_FLUSH is set, TLB flushing is not performed
@@ -581,10 +574,8 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
 				 vmemmap_head, vmemmap_tail,
 				 vmemmap_pages, flags);
 out:
-	if (ret) {
-		static_branch_dec(&hugetlb_optimize_vmemmap_key);
+	if (ret)
 		folio_clear_hugetlb_vmemmap_optimized(folio);
-	}
 
 	return ret;
 }
@@ -650,7 +641,6 @@ static void __hugetlb_vmemmap_optimize_folios(struct hstate *h,
 			register_page_bootmem_memmap(pfn_to_section_nr(spfn),
 					&folio->page,
 					HUGETLB_VMEMMAP_RESERVE_SIZE);
-			static_branch_inc(&hugetlb_optimize_vmemmap_key);
 			continue;
 		}
 
-- 
cgit v1.2.3


From 66b2a3d9ae460934fef5fd588077730f483e8c8c Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas@kernel.org>
Date: Fri, 27 Feb 2026 19:42:54 +0000
Subject: mm: remove the branch from compound_head()

The compound_head() function is a hot path.  For example, the zap path
calls it for every leaf page table entry.

Rewrite the helper function in a branchless manner to eliminate the risk
of CPU branch misprediction.

Link: https://lkml.kernel.org/r/20260227194302.274384-17-kas@kernel.org
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Baoquan He <bhe@redhat.com>
Cc: Christoph Lameter <cl@gentwo.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frank van der Linden <fvdl@google.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f361bd6c814c..7223f6f4e2b4 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -224,25 +224,32 @@ static __always_inline bool compound_info_has_mask(void)
 static __always_inline unsigned long _compound_head(const struct page *page)
 {
 	unsigned long info = READ_ONCE(page->compound_info);
+	unsigned long mask;
 
-	/* Bit 0 encodes PageTail() */
-	if (!(info & 1))
-		return (unsigned long)page;
+	if (!compound_info_has_mask()) {
+		/* Bit 0 encodes PageTail() */
+		if (info & 1)
+			return info - 1;
 
-	/*
-	 * If compound_info_has_mask() is false, the rest of compound_info is
-	 * the pointer to the head page.
-	 */
-	if (!compound_info_has_mask())
-		return info - 1;
+		return (unsigned long)page;
+	}
 
 	/*
 	 * If compound_info_has_mask() is true the rest of the info encodes
 	 * the mask that converts the address of the tail page to the head page.
 	 *
 	 * No need to clear bit 0 in the mask as 'page' always has it clear.
+	 *
+	 * Let's do it in a branchless manner.
 	 */
-	return (unsigned long)page & info;
+
+	/* Non-tail: -1UL, Tail: 0 */
+	mask = (info & 1) - 1;
+
+	/* Non-tail: -1UL, Tail: info */
+	mask |= info;
+
+	return (unsigned long)page & mask;
 }
 
 #define compound_head(page)	((typeof(page))_compound_head(page))
-- 
cgit v1.2.3


From 99573ef4ac30d4eae7a7937f0c9ea351991e3ccc Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Fri, 27 Feb 2026 22:29:52 +0100
Subject: mm/pagewalk: drop FW_MIGRATION

We removed the last user of FW_MIGRATION in commit 912aa825957f ("Revert
"mm/ksm: convert break_ksm() from walk_page_range_vma() to folio_walk"").

So let's remove FW_MIGRATION and assign FW_ZEROPAGE bit 0.  Including
leafops.h is no longer required.

While at it, convert "expose_page" to "zeropage", as zeropages are now the
only remaining use case for not exposing a page.

Link: https://lkml.kernel.org/r/20260227212952.190691-1-david@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagewalk.h |  8 +-------
 mm/pagewalk.c            | 40 ++++++++--------------------------------
 2 files changed, 9 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index 88e18615dd72..b41d7265c01b 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -148,14 +148,8 @@ int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
 
 typedef int __bitwise folio_walk_flags_t;
 
-/*
- * Walk migration entries as well. Careful: a large folio might get split
- * concurrently.
- */
-#define FW_MIGRATION			((__force folio_walk_flags_t)BIT(0))
-
 /* Walk shared zeropages (small + huge) as well. */
-#define FW_ZEROPAGE			((__force folio_walk_flags_t)BIT(1))
+#define FW_ZEROPAGE			((__force folio_walk_flags_t)BIT(0))
 
 enum folio_walk_level {
 	FW_LEVEL_PTE,
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index a94c401ab2cf..cb358558807c 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -5,7 +5,6 @@
 #include <linux/hugetlb.h>
 #include <linux/mmu_context.h>
 #include <linux/swap.h>
-#include <linux/leafops.h>
 
 #include <asm/tlbflush.h>
 
@@ -841,9 +840,6 @@ int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
  * VM as documented by vm_normal_page(). If requested, zeropages will be
  * returned as well.
  *
- * As default, this function only considers present page table entries.
- * If requested, it will also consider migration entries.
- *
  * If this function returns NULL it might either indicate "there is nothing" or
  * "there is nothing suitable".
  *
@@ -854,11 +850,10 @@ int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
  * that call.
  *
  * @fw->page will correspond to the page that is effectively referenced by
- * @addr. However, for migration entries and shared zeropages @fw->page is
- * set to NULL. Note that large folios might be mapped by multiple page table
- * entries, and this function will always only lookup a single entry as
- * specified by @addr, which might or might not cover more than a single page of
- * the returned folio.
+ * @addr. However, for shared zeropages @fw->page is set to NULL. Note that
+ * large folios might be mapped by multiple page table entries, and this
+ * function will always only lookup a single entry as specified by @addr, which
+ * might or might not cover more than a single page of the returned folio.
  *
  * This function must *not* be used as a naive replacement for
  * get_user_pages() / pin_user_pages(), especially not to perform DMA or
@@ -885,7 +880,7 @@ struct folio *folio_walk_start(struct folio_walk *fw,
 		folio_walk_flags_t flags)
 {
 	unsigned long entry_size;
-	bool expose_page = true;
+	bool zeropage = false;
 	struct page *page;
 	pud_t *pudp, pud;
 	pmd_t *pmdp, pmd;
@@ -933,10 +928,6 @@ struct folio *folio_walk_start(struct folio_walk *fw,
 			if (page)
 				goto found;
 		}
-		/*
-		 * TODO: FW_MIGRATION support for PUD migration entries
-		 * once there are relevant users.
-		 */
 		spin_unlock(ptl);
 		goto not_found;
 	}
@@ -970,16 +961,9 @@ pmd_table:
 			} else if ((flags & FW_ZEROPAGE) &&
 				    is_huge_zero_pmd(pmd)) {
 				page = pfn_to_page(pmd_pfn(pmd));
-				expose_page = false;
+				zeropage = true;
 				goto found;
 			}
-		} else if ((flags & FW_MIGRATION) &&
-			   pmd_is_migration_entry(pmd)) {
-			const softleaf_t entry = softleaf_from_pmd(pmd);
-
-			page = softleaf_to_page(entry);
-			expose_page = false;
-			goto found;
 		}
 		spin_unlock(ptl);
 		goto not_found;
@@ -1004,15 +988,7 @@ pte_table:
 		if ((flags & FW_ZEROPAGE) &&
 		    is_zero_pfn(pte_pfn(pte))) {
 			page = pfn_to_page(pte_pfn(pte));
-			expose_page = false;
-			goto found;
-		}
-	} else if (!pte_none(pte)) {
-		const softleaf_t entry = softleaf_from_pte(pte);
-
-		if ((flags & FW_MIGRATION) && softleaf_is_migration(entry)) {
-			page = softleaf_to_page(entry);
-			expose_page = false;
+			zeropage = true;
 			goto found;
 		}
 	}
@@ -1021,7 +997,7 @@ not_found:
 	vma_pgtable_walk_end(vma);
 	return NULL;
 found:
-	if (expose_page)
+	if (!zeropage)
 		/* Note: Offset from the mapped page, not the folio start. */
 		fw->page = page + ((addr & (entry_size - 1)) >> PAGE_SHIFT);
 	else
-- 
cgit v1.2.3


From 3d56d7317b271a1a5030ebb135c58aedc4c0fd36 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Fri, 27 Feb 2026 04:03:00 +0000
Subject: mm: replace READ_ONCE() in pud_trans_unstable()

Replace READ_ONCE() with the existing standard page table accessor for PUD
aka pudp_get() in pud_trans_unstable().  This does not create any
functional change for platforms that do not override pudp_get(), which
still defaults to READ_ONCE().

Link: https://lkml.kernel.org/r/20260227040300.2091901-1-anshuman.khandual@arm.com
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Acked-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mike Rapoport <rppt@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 776993d4567b..d2767a4c027b 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -2004,7 +2004,7 @@ static inline int pud_trans_unstable(pud_t *pud)
 {
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
 	defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
-	pud_t pudval = READ_ONCE(*pud);
+	pud_t pudval = pudp_get(pud);
 
 	if (pud_none(pudval) || pud_trans_huge(pudval))
 		return 1;
-- 
cgit v1.2.3


From 28266ac94a50e585c267a79d9ef5c2803d4dcd7a Mon Sep 17 00:00:00 2001
From: Gladyshev Ilya <gladyshev.ilya1@h-partners.com>
Date: Sun, 1 Mar 2026 13:19:39 +0000
Subject: mm: make ref_unless functions unless_zero only

There are no users of (folio/page)_ref_add_unless(page, nr, u) with u != 0
[1] and all current users are "internal" for page refcounting API.  This
allows us to safely drop this parameter and reduce function semantics to
the "unless zero" cases only.

If needed, these functions for the u!=0 cases can be trivially
reintroduced later using the same atomic_add_unless operations as before.

[1]: The last user was dropped in v5.18 kernel, commit 27674ef6c73f ("mm:
remove the extra ZONE_DEVICE struct page refcount").  There is no trace of
discussion as to why this cleanup wasn't done earlier.

Link: https://lkml.kernel.org/r/a0c89b49d38c671a0bdd35069d15ee13e08314d2.1772370066.git.gladyshev.ilya1@h-partners.com
Co-developed-by: Gorbunov Ivan <gorbunov.ivan@h-partners.com>
Signed-off-by: Gorbunov Ivan <gorbunov.ivan@h-partners.com>
Signed-off-by: Gladyshev Ilya <gladyshev.ilya1@h-partners.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Acked-by: Kiryl Shutsemau <kas@kernel.org>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h       |  2 +-
 include/linux/page_ref.h | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 19619e5efeba..08b743aab92a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1506,7 +1506,7 @@ static inline int folio_put_testzero(struct folio *folio)
  */
 static inline bool get_page_unless_zero(struct page *page)
 {
-	return page_ref_add_unless(page, 1, 0);
+	return page_ref_add_unless_zero(page, 1);
 }
 
 static inline struct folio *folio_get_nontail_page(struct page *page)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 490d0ad6e56d..94d3f0e71c06 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -228,18 +228,18 @@ static inline int folio_ref_dec_return(struct folio *folio)
 	return page_ref_dec_return(&folio->page);
 }
 
-static inline bool page_ref_add_unless(struct page *page, int nr, int u)
+static inline bool page_ref_add_unless_zero(struct page *page, int nr)
 {
-	bool ret = atomic_add_unless(&page->_refcount, nr, u);
+	bool ret = atomic_add_unless(&page->_refcount, nr, 0);
 
 	if (page_ref_tracepoint_active(page_ref_mod_unless))
 		__page_ref_mod_unless(page, nr, ret);
 	return ret;
 }
 
-static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u)
+static inline bool folio_ref_add_unless_zero(struct folio *folio, int nr)
 {
-	return page_ref_add_unless(&folio->page, nr, u);
+	return page_ref_add_unless_zero(&folio->page, nr);
 }
 
 /**
@@ -255,12 +255,12 @@ static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u)
  */
 static inline bool folio_try_get(struct folio *folio)
 {
-	return folio_ref_add_unless(folio, 1, 0);
+	return folio_ref_add_unless_zero(folio, 1);
 }
 
 static inline bool folio_ref_try_add(struct folio *folio, int count)
 {
-	return folio_ref_add_unless(folio, count, 0);
+	return folio_ref_add_unless_zero(folio, count);
 }
 
 static inline int page_ref_freeze(struct page *page, int count)
-- 
cgit v1.2.3


From de008c9ba5684f14e83bcf86cd45fb0e4e6c4d82 Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Fri, 27 Feb 2026 21:08:33 +0100
Subject: mm/memory: remove "zap_details" parameter from
 zap_page_range_single()

Nobody except memory.c should really set that parameter to non-NULL.  So
let's just drop it and make unmap_mapping_range_vma() use
zap_page_range_single_batched() instead.

[david@kernel.org: format on a single line]
  Link: https://lkml.kernel.org/r/8a27e9ac-2025-4724-a46d-0a7c90894ba7@kernel.org
Link: https://lkml.kernel.org/r/20260227200848.114019-3-david@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Puranjay Mohan <puranjay@kernel.org>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Arve <arve@android.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Carlos Llamas <cmllamas@google.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Dave Airlie <airlied@gmail.com>
Cc: David Ahern <dsahern@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ian Abbott <abbotti@mev.co.uk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jakub Kacinski <kuba@kernel.org>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Todd Kjos <tkjos@android.com>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/s390/mm/gmap_helpers.c    |  2 +-
 drivers/android/binder_alloc.c |  2 +-
 include/linux/mm.h             |  5 ++---
 kernel/bpf/arena.c             |  3 +--
 kernel/events/core.c           |  2 +-
 mm/madvise.c                   |  3 +--
 mm/memory.c                    | 16 ++++++++++------
 net/ipv4/tcp.c                 |  5 ++---
 rust/kernel/mm/virt.rs         |  4 +---
 9 files changed, 20 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
index dea83e3103e5..ae2d59a19313 100644
--- a/arch/s390/mm/gmap_helpers.c
+++ b/arch/s390/mm/gmap_helpers.c
@@ -89,7 +89,7 @@ void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned lo
 		if (!vma)
 			return;
 		if (!is_vm_hugetlb_page(vma))
-			zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
+			zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr);
 		vmaddr = vma->vm_end;
 	}
 }
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 241f16a9b63d..dd2046bd5cde 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -1185,7 +1185,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item,
 	if (vma) {
 		trace_binder_unmap_user_start(alloc, index);
 
-		zap_page_range_single(vma, page_addr, PAGE_SIZE, NULL);
+		zap_page_range_single(vma, page_addr, PAGE_SIZE);
 
 		trace_binder_unmap_user_end(alloc, index);
 	}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 08b743aab92a..6512d70c5852 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2804,11 +2804,10 @@ struct page *vm_normal_page_pud(struct vm_area_struct *vma, unsigned long addr,
 void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		  unsigned long size);
 void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
-			   unsigned long size, struct zap_details *details);
+			   unsigned long size);
 static inline void zap_vma_pages(struct vm_area_struct *vma)
 {
-	zap_page_range_single(vma, vma->vm_start,
-			      vma->vm_end - vma->vm_start, NULL);
+	zap_page_range_single(vma, vma->vm_start, vma->vm_end - vma->vm_start);
 }
 struct mmu_notifier_range;
 
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index f355cf1c1a16..19cca936eb9d 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -656,8 +656,7 @@ static void zap_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
 	guard(mutex)(&arena->lock);
 	/* iterate link list under lock */
 	list_for_each_entry(vml, &arena->vma_list, head)
-		zap_page_range_single(vml->vma, uaddr,
-				      PAGE_SIZE * page_cnt, NULL);
+		zap_page_range_single(vml->vma, uaddr, PAGE_SIZE * page_cnt);
 }
 
 static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt, bool sleepable)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 89b40e439717..2ecdaabf1b4d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7213,7 +7213,7 @@ static int map_range(struct perf_buffer *rb, struct vm_area_struct *vma)
 #ifdef CONFIG_MMU
 	/* Clear any partial mappings on error. */
 	if (err)
-		zap_page_range_single(vma, vma->vm_start, nr_pages * PAGE_SIZE, NULL);
+		zap_page_range_single(vma, vma->vm_start, nr_pages * PAGE_SIZE);
 #endif
 
 	return err;
diff --git a/mm/madvise.c b/mm/madvise.c
index 1313166c5514..e4a2728593a8 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1193,8 +1193,7 @@ static long madvise_guard_install(struct madvise_behavior *madv_behavior)
 		 * OK some of the range have non-guard pages mapped, zap
 		 * them. This leaves existing guard pages in place.
 		 */
-		zap_page_range_single(vma, range->start,
-				range->end - range->start, NULL);
+		zap_page_range_single(vma, range->start, range->end - range->start);
 	}
 
 	/*
diff --git a/mm/memory.c b/mm/memory.c
index f78ab3869f8d..fbd02d5bd520 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2203,17 +2203,16 @@ void zap_page_range_single_batched(struct mmu_gather *tlb,
  * @vma: vm_area_struct holding the applicable pages
  * @address: starting address of pages to zap
  * @size: number of bytes to zap
- * @details: details of shared cache invalidation
  *
  * The range must fit into one VMA.
  */
 void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
-		unsigned long size, struct zap_details *details)
+		unsigned long size)
 {
 	struct mmu_gather tlb;
 
 	tlb_gather_mmu(&tlb, vma->vm_mm);
-	zap_page_range_single_batched(&tlb, vma, address, size, details);
+	zap_page_range_single_batched(&tlb, vma, address, size, NULL);
 	tlb_finish_mmu(&tlb);
 }
 
@@ -2235,7 +2234,7 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 	    		!(vma->vm_flags & VM_PFNMAP))
 		return;
 
-	zap_page_range_single(vma, address, size, NULL);
+	zap_page_range_single(vma, address, size);
 }
 EXPORT_SYMBOL_GPL(zap_vma_ptes);
 
@@ -3003,7 +3002,7 @@ static int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long add
 	 * maintain page reference counts, and callers may free
 	 * pages due to the error. So zap it early.
 	 */
-	zap_page_range_single(vma, addr, size, NULL);
+	zap_page_range_single(vma, addr, size);
 	return error;
 }
 
@@ -4226,7 +4225,12 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
 		unsigned long start_addr, unsigned long end_addr,
 		struct zap_details *details)
 {
-	zap_page_range_single(vma, start_addr, end_addr - start_addr, details);
+	struct mmu_gather tlb;
+
+	tlb_gather_mmu(&tlb, vma->vm_mm);
+	zap_page_range_single_batched(&tlb, vma, start_addr,
+				      end_addr - start_addr, details);
+	tlb_finish_mmu(&tlb);
 }
 
 static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 202a4e57a218..89c962672e51 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2105,7 +2105,7 @@ static int tcp_zerocopy_vm_insert_batch_error(struct vm_area_struct *vma,
 		maybe_zap_len = total_bytes_to_map -  /* All bytes to map */
 				*length + /* Mapped or pending */
 				(pages_remaining * PAGE_SIZE); /* Failed map. */
-		zap_page_range_single(vma, *address, maybe_zap_len, NULL);
+		zap_page_range_single(vma, *address, maybe_zap_len);
 		err = 0;
 	}
 
@@ -2270,8 +2270,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
 	total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1);
 	if (total_bytes_to_map) {
 		if (!(zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT))
-			zap_page_range_single(vma, address, total_bytes_to_map,
-					      NULL);
+			zap_page_range_single(vma, address, total_bytes_to_map);
 		zc->length = total_bytes_to_map;
 		zc->recv_skip_hint = 0;
 	} else {
diff --git a/rust/kernel/mm/virt.rs b/rust/kernel/mm/virt.rs
index da21d65ccd20..6bfd91cfa1f4 100644
--- a/rust/kernel/mm/virt.rs
+++ b/rust/kernel/mm/virt.rs
@@ -123,9 +123,7 @@ impl VmaRef {
         // SAFETY: By the type invariants, the caller has read access to this VMA, which is
         // sufficient for this method call. This method has no requirements on the vma flags. The
         // address range is checked to be within the vma.
-        unsafe {
-            bindings::zap_page_range_single(self.as_ptr(), address, size, core::ptr::null_mut())
-        };
+        unsafe { bindings::zap_page_range_single(self.as_ptr(), address, size) };
     }
 
     /// If the [`VM_MIXEDMAP`] flag is set, returns a [`VmaMixedMap`] to this VMA, otherwise
-- 
cgit v1.2.3


From 599a59e6037838ea7cd6264d7980ea63de244994 Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Fri, 27 Feb 2026 21:08:35 +0100
Subject: mm/memory: simplify calculation in unmap_mapping_range_tree()

Let's simplify the calculation a bit further to make it easier to get,
reusing vma_last_pgoff() which we move from interval_tree.c to mm.h.

Link: https://lkml.kernel.org/r/20260227200848.114019-5-david@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Arve <arve@android.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Carlos Llamas <cmllamas@google.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Dave Airlie <airlied@gmail.com>
Cc: David Ahern <dsahern@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ian Abbott <abbotti@mev.co.uk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jakub Kacinski <kuba@kernel.org>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Todd Kjos <tkjos@android.com>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h |  5 +++++
 mm/interval_tree.c |  5 -----
 mm/memory.c        | 12 +++++-------
 3 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6512d70c5852..771d021b7948 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3969,6 +3969,11 @@ static inline unsigned long vma_pages(const struct vm_area_struct *vma)
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 }
 
+static inline unsigned long vma_last_pgoff(struct vm_area_struct *vma)
+{
+	return vma->vm_pgoff + vma_pages(vma) - 1;
+}
+
 static inline unsigned long vma_desc_size(const struct vm_area_desc *desc)
 {
 	return desc->end - desc->start;
diff --git a/mm/interval_tree.c b/mm/interval_tree.c
index 32e390c42c53..32bcfbfcf15f 100644
--- a/mm/interval_tree.c
+++ b/mm/interval_tree.c
@@ -15,11 +15,6 @@ static inline unsigned long vma_start_pgoff(struct vm_area_struct *v)
 	return v->vm_pgoff;
 }
 
-static inline unsigned long vma_last_pgoff(struct vm_area_struct *v)
-{
-	return v->vm_pgoff + vma_pages(v) - 1;
-}
-
 INTERVAL_TREE_DEFINE(struct vm_area_struct, shared.rb,
 		     unsigned long, shared.rb_subtree_last,
 		     vma_start_pgoff, vma_last_pgoff, /* empty */, vma_interval_tree)
diff --git a/mm/memory.c b/mm/memory.c
index f1c5d6b01a62..24b768885379 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4227,17 +4227,15 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
 					    struct zap_details *details)
 {
 	struct vm_area_struct *vma;
-	pgoff_t vba, vea, zba, zea;
 	unsigned long start, size;
 	struct mmu_gather tlb;
 
 	vma_interval_tree_foreach(vma, root, first_index, last_index) {
-		vba = vma->vm_pgoff;
-		vea = vba + vma_pages(vma) - 1;
-		zba = max(first_index, vba);
-		zea = min(last_index, vea);
-		start = ((zba - vba) << PAGE_SHIFT) + vma->vm_start;
-		size = (zea - zba + 1) << PAGE_SHIFT;
+		const pgoff_t start_idx = max(first_index, vma->vm_pgoff);
+		const pgoff_t end_idx = min(last_index, vma_last_pgoff(vma)) + 1;
+
+		start = vma->vm_start + ((start_idx - vma->vm_pgoff) << PAGE_SHIFT);
+		size = (end_idx - start_idx) << PAGE_SHIFT;
 
 		tlb_gather_mmu(&tlb, vma->vm_mm);
 		zap_page_range_single_batched(&tlb, vma, start, size, details);
-- 
cgit v1.2.3


From a97bc13d15f472c7f8ede1b38660fb55b6dab68d Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Fri, 27 Feb 2026 21:08:40 +0100
Subject: mm/memory: convert details->even_cows into details->skip_cows

The current semantics are confusing: simply because someone specifies an
empty zap_detail struct suddenly makes should_zap_cows() behave
differently.  The default should be to also zap CoW'ed anonymous pages.

Really only unmap_mapping_pages() and friends want to skip zapping of
these anon folios.

So let's invert the meaning; turn the confusing "reclaim_pt" check that
overrides other properties in should_zap_cows() into a safety check.

Note that the only caller that sets reclaim_pt=true is
madvise_dontneed_single_vma(), which wants to zap any pages.

Link: https://lkml.kernel.org/r/20260227200848.114019-10-david@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Arve <arve@android.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Carlos Llamas <cmllamas@google.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Dave Airlie <airlied@gmail.com>
Cc: David Ahern <dsahern@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ian Abbott <abbotti@mev.co.uk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jakub Kacinski <kuba@kernel.org>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Todd Kjos <tkjos@android.com>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h |  2 +-
 mm/madvise.c       |  1 -
 mm/memory.c        | 12 ++++++------
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 771d021b7948..cb4f5fbccaf0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2767,7 +2767,7 @@ extern void pagefault_out_of_memory(void);
  */
 struct zap_details {
 	struct folio *single_folio;	/* Locked folio to be unmapped */
-	bool even_cows;			/* Zap COWed private pages too? */
+	bool skip_cows;			/* Do not zap COWed private pages */
 	bool reclaim_pt;		/* Need reclaim page tables? */
 	zap_flags_t zap_flags;		/* Extra flags for zapping */
 };
diff --git a/mm/madvise.c b/mm/madvise.c
index e4a2728593a8..e86228682842 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -853,7 +853,6 @@ static long madvise_dontneed_single_vma(struct madvise_behavior *madv_behavior)
 	struct madvise_behavior_range *range = &madv_behavior->range;
 	struct zap_details details = {
 		.reclaim_pt = true,
-		.even_cows = true,
 	};
 
 	zap_page_range_single_batched(
diff --git a/mm/memory.c b/mm/memory.c
index 7e5d52534ee9..c66b7b8b47eb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1554,11 +1554,13 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
 static inline bool should_zap_cows(struct zap_details *details)
 {
 	/* By default, zap all pages */
-	if (!details || details->reclaim_pt)
+	if (!details)
 		return true;
 
+	VM_WARN_ON_ONCE(details->skip_cows && details->reclaim_pt);
+
 	/* Or, we zap COWed pages only if the caller wants to */
-	return details->even_cows;
+	return !details->skip_cows;
 }
 
 /* Decides whether we should zap this folio with the folio pointer specified */
@@ -2149,8 +2151,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap)
 	struct mmu_notifier_range range;
 	struct zap_details details = {
 		.zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP,
-		/* Careful - we need to zap private pages too! */
-		.even_cows = true,
 	};
 
 	vma = unmap->first;
@@ -4282,7 +4282,7 @@ void unmap_mapping_folio(struct folio *folio)
 	first_index = folio->index;
 	last_index = folio_next_index(folio) - 1;
 
-	details.even_cows = false;
+	details.skip_cows = true;
 	details.single_folio = folio;
 	details.zap_flags = ZAP_FLAG_DROP_MARKER;
 
@@ -4312,7 +4312,7 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
 	pgoff_t	first_index = start;
 	pgoff_t	last_index = start + nr - 1;
 
-	details.even_cows = even_cows;
+	details.skip_cows = !even_cows;
 	if (last_index < first_index)
 		last_index = ULONG_MAX;
 
-- 
cgit v1.2.3


From 5f10cbbddc2bd80a5944f1c783830f7ebf648ad2 Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Fri, 27 Feb 2026 21:08:41 +0100
Subject: mm/memory: use __zap_vma_range() in zap_vma_for_reaping()

Let's call __zap_vma_range() instead of unmap_page_range() to prepare for
further cleanups.

To keep the existing behavior, whereby we do not call uprobe_munmap()
which could block, add a new "reaping" member to zap_details and use it.

Likely we should handle the possible blocking in uprobe_munmap()
differently, but for now keep it unchanged.

Link: https://lkml.kernel.org/r/20260227200848.114019-11-david@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Arve <arve@android.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Carlos Llamas <cmllamas@google.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Dave Airlie <airlied@gmail.com>
Cc: David Ahern <dsahern@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ian Abbott <abbotti@mev.co.uk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jakub Kacinski <kuba@kernel.org>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Todd Kjos <tkjos@android.com>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h |  1 +
 mm/memory.c        | 13 +++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index cb4f5fbccaf0..488a144c9161 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2769,6 +2769,7 @@ struct zap_details {
 	struct folio *single_folio;	/* Locked folio to be unmapped */
 	bool skip_cows;			/* Do not zap COWed private pages */
 	bool reclaim_pt;		/* Need reclaim page tables? */
+	bool reaping;			/* Reaping, do not block. */
 	zap_flags_t zap_flags;		/* Extra flags for zapping */
 };
 
diff --git a/mm/memory.c b/mm/memory.c
index c66b7b8b47eb..d1fd3cdd677a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2079,14 +2079,18 @@ static void __zap_vma_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		unsigned long start, unsigned long end,
 		struct zap_details *details)
 {
+	const bool reaping = details && details->reaping;
+
 	VM_WARN_ON_ONCE(start >= end || !range_in_vma(vma, start, end));
 
-	if (vma->vm_file)
+	/* uprobe_munmap() might sleep, so skip it when reaping. */
+	if (vma->vm_file && !reaping)
 		uprobe_munmap(vma, start, end);
 
 	if (unlikely(is_vm_hugetlb_page(vma))) {
 		zap_flags_t zap_flags = details ? details->zap_flags : 0;
 
+		VM_WARN_ON_ONCE(reaping);
 		/*
 		 * vm_file will be NULL when we fail early while instantiating
 		 * a new mapping. In this case, no pages were mapped yet and
@@ -2111,11 +2115,12 @@ static void __zap_vma_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
  */
 int zap_vma_for_reaping(struct vm_area_struct *vma)
 {
+	struct zap_details details = {
+		.reaping = true,
+	};
 	struct mmu_notifier_range range;
 	struct mmu_gather tlb;
 
-	VM_WARN_ON_ONCE(is_vm_hugetlb_page(vma));
-
 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
 				vma->vm_start, vma->vm_end);
 	tlb_gather_mmu(&tlb, vma->vm_mm);
@@ -2123,7 +2128,7 @@ int zap_vma_for_reaping(struct vm_area_struct *vma)
 		tlb_finish_mmu(&tlb);
 		return -EBUSY;
 	}
-	unmap_page_range(&tlb, vma, range.start, range.end, NULL);
+	__zap_vma_range(&tlb, vma, range.start, range.end, &details);
 	mmu_notifier_invalidate_range_end(&range);
 	tlb_finish_mmu(&tlb);
 	return 0;
-- 
cgit v1.2.3


From 32bc7fe4a6f4d359b6de96cbc106d2cac695154e Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Fri, 27 Feb 2026 21:08:43 +0100
Subject: mm: rename zap_vma_pages() to zap_vma()

Let's rename it to an even simpler name.  While at it, add some simplistic
kernel doc.

Link: https://lkml.kernel.org/r/20260227200848.114019-13-david@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Arve <arve@android.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Carlos Llamas <cmllamas@google.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Dave Airlie <airlied@gmail.com>
Cc: David Ahern <dsahern@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ian Abbott <abbotti@mev.co.uk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jakub Kacinski <kuba@kernel.org>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Todd Kjos <tkjos@android.com>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/powerpc/platforms/book3s/vas-api.c | 2 +-
 arch/powerpc/platforms/pseries/vas.c    | 2 +-
 include/linux/mm.h                      | 6 +++++-
 lib/vdso/datastore.c                    | 2 +-
 mm/page-writeback.c                     | 2 +-
 5 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
index ea4ffa63f043..e96d79db69fe 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -414,7 +414,7 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
 	/*
 	 * When the LPAR lost credits due to core removal or during
 	 * migration, invalidate the existing mapping for the current
-	 * paste addresses and set windows in-active (zap_vma_pages in
+	 * paste addresses and set windows in-active (zap_vma() in
 	 * reconfig_close_windows()).
 	 * New mapping will be done later after migration or new credits
 	 * available. So continue to receive faults if the user space
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index ceb0a8788c0a..fa05f04364fe 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -807,7 +807,7 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
 		 * is done before the original mmap() and after the ioctl.
 		 */
 		if (vma)
-			zap_vma_pages(vma);
+			zap_vma(vma);
 
 		mutex_unlock(&task_ref->mmap_mutex);
 		mmap_write_unlock(task_ref->mm);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 488a144c9161..60c13d40c65c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2806,7 +2806,11 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		  unsigned long size);
 void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
 			   unsigned long size);
-static inline void zap_vma_pages(struct vm_area_struct *vma)
+/**
+ * zap_vma - zap all page table entries in a vma
+ * @vma: The vma to zap.
+ */
+static inline void zap_vma(struct vm_area_struct *vma)
 {
 	zap_page_range_single(vma, vma->vm_start, vma->vm_end - vma->vm_start);
 }
diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c
index a565c30c71a0..222c143aebf7 100644
--- a/lib/vdso/datastore.c
+++ b/lib/vdso/datastore.c
@@ -121,7 +121,7 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
 	mmap_read_lock(mm);
 	for_each_vma(vmi, vma) {
 		if (vma_is_special_mapping(vma, &vdso_vvar_mapping))
-			zap_vma_pages(vma);
+			zap_vma(vma);
 	}
 	mmap_read_unlock(mm);
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 1009bb042ba4..8dc47b59ca18 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2645,7 +2645,7 @@ void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb)
  * while this function is in progress, although it may have been truncated
  * before this function is called.  Most callers have the folio locked.
  * A few have the folio blocked from truncation through other means (e.g.
- * zap_vma_pages() has it mapped and is holding the page table lock).
+ * zap_vma() has it mapped and is holding the page table lock).
  * When called from mark_buffer_dirty(), the filesystem should hold a
  * reference to the buffer_head that is being marked dirty, which causes
  * try_to_free_buffers() to fail.
-- 
cgit v1.2.3


From 0326440c3545c86b6501c7c636fcf018d6e87b8c Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Fri, 27 Feb 2026 21:08:45 +0100
Subject: mm: rename zap_page_range_single() to zap_vma_range()

Let's rename it to make it better match our new naming scheme.

While at it, polish the kerneldoc.

[akpm@linux-foundation.org: fix rustfmtcheck]
Link: https://lkml.kernel.org/r/20260227200848.114019-15-david@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Puranjay Mohan <puranjay@kernel.org>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Arve <arve@android.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Carlos Llamas <cmllamas@google.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Dave Airlie <airlied@gmail.com>
Cc: David Ahern <dsahern@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ian Abbott <abbotti@mev.co.uk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jakub Kacinski <kuba@kernel.org>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Todd Kjos <tkjos@android.com>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/s390/mm/gmap_helpers.c          |  2 +-
 drivers/android/binder/page_range.rs |  4 ++--
 drivers/android/binder_alloc.c       |  2 +-
 include/linux/mm.h                   |  4 ++--
 kernel/bpf/arena.c                   |  2 +-
 kernel/events/core.c                 |  2 +-
 mm/madvise.c                         |  4 ++--
 mm/memory.c                          | 14 +++++++-------
 net/ipv4/tcp.c                       |  6 +++---
 rust/kernel/mm/virt.rs               |  4 ++--
 10 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
index ae2d59a19313..f8789ffcc05c 100644
--- a/arch/s390/mm/gmap_helpers.c
+++ b/arch/s390/mm/gmap_helpers.c
@@ -89,7 +89,7 @@ void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned lo
 		if (!vma)
 			return;
 		if (!is_vm_hugetlb_page(vma))
-			zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr);
+			zap_vma_range(vma, vmaddr, min(end, vma->vm_end) - vmaddr);
 		vmaddr = vma->vm_end;
 	}
 }
diff --git a/drivers/android/binder/page_range.rs b/drivers/android/binder/page_range.rs
index 9dfc154e5dd4..8882fd18d9f3 100644
--- a/drivers/android/binder/page_range.rs
+++ b/drivers/android/binder/page_range.rs
@@ -130,7 +130,7 @@ pub(crate) struct ShrinkablePageRange {
     pid: Pid,
     /// The mm for the relevant process.
     mm: ARef<Mm>,
-    /// Used to synchronize calls to `vm_insert_page` and `zap_page_range_single`.
+    /// Used to synchronize calls to `vm_insert_page` and `zap_vma_range`.
     #[pin]
     mm_lock: Mutex<()>,
     /// Spinlock protecting changes to pages.
@@ -762,7 +762,7 @@ unsafe extern "C" fn rust_shrink_free_page(
     if let Some(unchecked_vma) = mmap_read.vma_lookup(vma_addr) {
         if let Some(vma) = check_vma(unchecked_vma, range_ptr) {
             let user_page_addr = vma_addr + (page_index << PAGE_SHIFT);
-            vma.zap_page_range_single(user_page_addr, PAGE_SIZE);
+            vma.zap_vma_range(user_page_addr, PAGE_SIZE);
         }
     }
 
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index dd2046bd5cde..e4488ad86a65 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -1185,7 +1185,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item,
 	if (vma) {
 		trace_binder_unmap_user_start(alloc, index);
 
-		zap_page_range_single(vma, page_addr, PAGE_SIZE);
+		zap_vma_range(vma, page_addr, PAGE_SIZE);
 
 		trace_binder_unmap_user_end(alloc, index);
 	}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 60c13d40c65c..10a5b9ba4eeb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2804,7 +2804,7 @@ struct page *vm_normal_page_pud(struct vm_area_struct *vma, unsigned long addr,
 
 void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		  unsigned long size);
-void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
+void zap_vma_range(struct vm_area_struct *vma, unsigned long address,
 			   unsigned long size);
 /**
  * zap_vma - zap all page table entries in a vma
@@ -2812,7 +2812,7 @@ void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
  */
 static inline void zap_vma(struct vm_area_struct *vma)
 {
-	zap_page_range_single(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+	zap_vma_range(vma, vma->vm_start, vma->vm_end - vma->vm_start);
 }
 struct mmu_notifier_range;
 
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 19cca936eb9d..08d008cc471e 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -656,7 +656,7 @@ static void zap_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
 	guard(mutex)(&arena->lock);
 	/* iterate link list under lock */
 	list_for_each_entry(vml, &arena->vma_list, head)
-		zap_page_range_single(vml->vma, uaddr, PAGE_SIZE * page_cnt);
+		zap_vma_range(vml->vma, uaddr, PAGE_SIZE * page_cnt);
 }
 
 static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt, bool sleepable)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2ecdaabf1b4d..d5b21077e829 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7213,7 +7213,7 @@ static int map_range(struct perf_buffer *rb, struct vm_area_struct *vma)
 #ifdef CONFIG_MMU
 	/* Clear any partial mappings on error. */
 	if (err)
-		zap_page_range_single(vma, vma->vm_start, nr_pages * PAGE_SIZE);
+		zap_vma_range(vma, vma->vm_start, nr_pages * PAGE_SIZE);
 #endif
 
 	return err;
diff --git a/mm/madvise.c b/mm/madvise.c
index a50ec5f90e3e..afe0f01765c4 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -832,7 +832,7 @@ static int madvise_free_single_vma(struct madvise_behavior *madv_behavior)
  * Application no longer needs these pages.  If the pages are dirty,
  * it's OK to just throw them away.  The app will be more careful about
  * data it wants to keep.  Be sure to free swap resources too.  The
- * zap_page_range_single call sets things up for shrink_active_list to actually
+ * zap_vma_range call sets things up for shrink_active_list to actually
  * free these pages later if no one else has touched them in the meantime,
  * although we could add these pages to a global reuse list for
  * shrink_active_list to pick up before reclaiming other pages.
@@ -1191,7 +1191,7 @@ static long madvise_guard_install(struct madvise_behavior *madv_behavior)
 		 * OK some of the range have non-guard pages mapped, zap
 		 * them. This leaves existing guard pages in place.
 		 */
-		zap_page_range_single(vma, range->start, range->end - range->start);
+		zap_vma_range(vma, range->start, range->end - range->start);
 	}
 
 	/*
diff --git a/mm/memory.c b/mm/memory.c
index 879858e466ef..dd80fbf6473a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2215,14 +2215,14 @@ void zap_vma_range_batched(struct mmu_gather *tlb,
 }
 
 /**
- * zap_page_range_single - remove user pages in a given range
- * @vma: vm_area_struct holding the applicable pages
- * @address: starting address of pages to zap
+ * zap_vma_range - zap all page table entries in a vma range
+ * @vma: the vma covering the range to zap
+ * @address: starting address of the range to zap
  * @size: number of bytes to zap
  *
- * The range must fit into one VMA.
+ * The provided address range must be fully contained within @vma.
  */
-void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
+void zap_vma_range(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size)
 {
 	struct mmu_gather tlb;
@@ -2250,7 +2250,7 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 	    		!(vma->vm_flags & VM_PFNMAP))
 		return;
 
-	zap_page_range_single(vma, address, size);
+	zap_vma_range(vma, address, size);
 }
 EXPORT_SYMBOL_GPL(zap_vma_ptes);
 
@@ -3018,7 +3018,7 @@ static int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long add
 	 * maintain page reference counts, and callers may free
 	 * pages due to the error. So zap it early.
 	 */
-	zap_page_range_single(vma, addr, size);
+	zap_vma_range(vma, addr, size);
 	return error;
 }
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 89c962672e51..9573ce9b0ac1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2105,7 +2105,7 @@ static int tcp_zerocopy_vm_insert_batch_error(struct vm_area_struct *vma,
 		maybe_zap_len = total_bytes_to_map -  /* All bytes to map */
 				*length + /* Mapped or pending */
 				(pages_remaining * PAGE_SIZE); /* Failed map. */
-		zap_page_range_single(vma, *address, maybe_zap_len);
+		zap_vma_range(vma, *address, maybe_zap_len);
 		err = 0;
 	}
 
@@ -2113,7 +2113,7 @@ static int tcp_zerocopy_vm_insert_batch_error(struct vm_area_struct *vma,
 		unsigned long leftover_pages = pages_remaining;
 		int bytes_mapped;
 
-		/* We called zap_page_range_single, try to reinsert. */
+		/* We called zap_vma_range, try to reinsert. */
 		err = vm_insert_pages(vma, *address,
 				      pending_pages,
 				      &pages_remaining);
@@ -2270,7 +2270,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
 	total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1);
 	if (total_bytes_to_map) {
 		if (!(zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT))
-			zap_page_range_single(vma, address, total_bytes_to_map);
+			zap_vma_range(vma, address, total_bytes_to_map);
 		zc->length = total_bytes_to_map;
 		zc->recv_skip_hint = 0;
 	} else {
diff --git a/rust/kernel/mm/virt.rs b/rust/kernel/mm/virt.rs
index 6bfd91cfa1f4..63eb730b0b05 100644
--- a/rust/kernel/mm/virt.rs
+++ b/rust/kernel/mm/virt.rs
@@ -113,7 +113,7 @@ impl VmaRef {
     /// kernel goes further in freeing unused page tables, but for the purposes of this operation
     /// we must only assume that the leaf level is cleared.
     #[inline]
-    pub fn zap_page_range_single(&self, address: usize, size: usize) {
+    pub fn zap_vma_range(&self, address: usize, size: usize) {
         let (end, did_overflow) = address.overflowing_add(size);
         if did_overflow || address < self.start() || self.end() < end {
             // TODO: call WARN_ONCE once Rust version of it is added
@@ -123,7 +123,7 @@ impl VmaRef {
         // SAFETY: By the type invariants, the caller has read access to this VMA, which is
         // sufficient for this method call. This method has no requirements on the vma flags. The
         // address range is checked to be within the vma.
-        unsafe { bindings::zap_page_range_single(self.as_ptr(), address, size) };
+        unsafe { bindings::zap_vma_range(self.as_ptr(), address, size) };
     }
 
     /// If the [`VM_MIXEDMAP`] flag is set, returns a [`VmaMixedMap`] to this VMA, otherwise
-- 
cgit v1.2.3


From 52a9e9cd181fab8b03cf4e982533224697669976 Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Fri, 27 Feb 2026 21:08:46 +0100
Subject: mm: rename zap_vma_ptes() to zap_special_vma_range()

zap_vma_ptes() is the only zapping function we export to modules.

It's essentially a wrapper around zap_vma_range(), however, with some
safety checks:
* That the passed range fits fully into the VMA
* That it's only used for VM_PFNMAP

We will add support for VM_MIXEDMAP next, so use the more-generic term
"special vma", although "special" is a bit overloaded.  Maybe we'll later
just support any VM_SPECIAL flag.

While at it, improve the kerneldoc.

Link: https://lkml.kernel.org/r/20260227200848.114019-16-david@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Acked-by: Leon Romanovsky <leon@kernel.org>	[drivers/infiniband]
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Arve <arve@android.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Carlos Llamas <cmllamas@google.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Dave Airlie <airlied@gmail.com>
Cc: David Ahern <dsahern@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ian Abbott <abbotti@mev.co.uk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jakub Kacinski <kuba@kernel.org>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Todd Kjos <tkjos@android.com>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/kernel/cpu/sgx/encl.c        |  2 +-
 drivers/comedi/comedi_fops.c          |  2 +-
 drivers/gpu/drm/i915/i915_mm.c        |  4 ++--
 drivers/infiniband/core/uverbs_main.c |  6 +++---
 drivers/misc/sgi-gru/grumain.c        |  2 +-
 include/linux/mm.h                    |  2 +-
 mm/memory.c                           | 16 +++++++---------
 7 files changed, 16 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index ac60ebde5d9b..3f0222d10f6e 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -1220,7 +1220,7 @@ void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr)
 
 			ret = sgx_encl_find(encl_mm->mm, addr, &vma);
 			if (!ret && encl == vma->vm_private_data)
-				zap_vma_ptes(vma, addr, PAGE_SIZE);
+				zap_special_vma_range(vma, addr, PAGE_SIZE);
 
 			mmap_read_unlock(encl_mm->mm);
 
diff --git a/drivers/comedi/comedi_fops.c b/drivers/comedi/comedi_fops.c
index 48a8a607a84c..b91e0b5ac394 100644
--- a/drivers/comedi/comedi_fops.c
+++ b/drivers/comedi/comedi_fops.c
@@ -2588,7 +2588,7 @@ static int comedi_mmap(struct file *file, struct vm_area_struct *vma)
 	 * remap_pfn_range() because we call remap_pfn_range() in a loop.
 	 */
 	if (retval)
-		zap_vma_ptes(vma, vma->vm_start, size);
+		zap_special_vma_range(vma, vma->vm_start, size);
 #endif
 
 	if (retval == 0) {
diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c
index c33bd3d83069..fd89e7c7d8d6 100644
--- a/drivers/gpu/drm/i915/i915_mm.c
+++ b/drivers/gpu/drm/i915/i915_mm.c
@@ -108,7 +108,7 @@ int remap_io_mapping(struct vm_area_struct *vma,
 
 	err = apply_to_page_range(r.mm, addr, size, remap_pfn, &r);
 	if (unlikely(err)) {
-		zap_vma_ptes(vma, addr, (r.pfn - pfn) << PAGE_SHIFT);
+		zap_special_vma_range(vma, addr, (r.pfn - pfn) << PAGE_SHIFT);
 		return err;
 	}
 
@@ -156,7 +156,7 @@ int remap_io_sg(struct vm_area_struct *vma,
 
 	err = apply_to_page_range(r.mm, addr, size, remap_sg, &r);
 	if (unlikely(err)) {
-		zap_vma_ptes(vma, addr, r.pfn << PAGE_SHIFT);
+		zap_special_vma_range(vma, addr, r.pfn << PAGE_SHIFT);
 		return err;
 	}
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 7b68967a6301..f5837da47299 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -756,7 +756,7 @@ out_zap:
 	 * point, so zap it.
 	 */
 	vma->vm_private_data = NULL;
-	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+	zap_special_vma_range(vma, vma->vm_start, vma->vm_end - vma->vm_start);
 }
 
 static void rdma_umap_close(struct vm_area_struct *vma)
@@ -782,7 +782,7 @@ static void rdma_umap_close(struct vm_area_struct *vma)
 }
 
 /*
- * Once the zap_vma_ptes has been called touches to the VMA will come here and
+ * Once the zap_special_vma_range has been called touches to the VMA will come here and
  * we return a dummy writable zero page for all the pfns.
  */
 static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
@@ -878,7 +878,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 				continue;
 			list_del_init(&priv->list);
 
-			zap_vma_ptes(vma, vma->vm_start,
+			zap_special_vma_range(vma, vma->vm_start,
 				     vma->vm_end - vma->vm_start);
 
 			if (priv->entry) {
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index 8d749f345246..278b76cbd281 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -542,7 +542,7 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate)
 	int ctxnum = gts->ts_ctxnum;
 
 	if (!is_kernel_context(gts))
-		zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
+		zap_special_vma_range(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
 	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
 
 	gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n",
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 10a5b9ba4eeb..c516d5177211 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2802,7 +2802,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
 struct page *vm_normal_page_pud(struct vm_area_struct *vma, unsigned long addr,
 		pud_t pud);
 
-void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
+void zap_special_vma_range(struct vm_area_struct *vma, unsigned long address,
 		  unsigned long size);
 void zap_vma_range(struct vm_area_struct *vma, unsigned long address,
 			   unsigned long size);
diff --git a/mm/memory.c b/mm/memory.c
index dd80fbf6473a..3dc4664c9af7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2233,17 +2233,15 @@ void zap_vma_range(struct vm_area_struct *vma, unsigned long address,
 }
 
 /**
- * zap_vma_ptes - remove ptes mapping the vma
- * @vma: vm_area_struct holding ptes to be zapped
- * @address: starting address of pages to zap
+ * zap_special_vma_range - zap all page table entries in a special vma range
+ * @vma: the vma covering the range to zap
+ * @address: starting address of the range to zap
  * @size: number of bytes to zap
  *
- * This function only unmaps ptes assigned to VM_PFNMAP vmas.
- *
- * The entire address range must be fully contained within the vma.
- *
+ * This function does nothing when the provided address range is not fully
+ * contained in @vma, or when the @vma is not VM_PFNMAP.
  */
-void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
+void zap_special_vma_range(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size)
 {
 	if (!range_in_vma(vma, address, address + size) ||
@@ -2252,7 +2250,7 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 
 	zap_vma_range(vma, address, size);
 }
-EXPORT_SYMBOL_GPL(zap_vma_ptes);
+EXPORT_SYMBOL_GPL(zap_special_vma_range);
 
 static pmd_t *walk_to_pmd(struct mm_struct *mm, unsigned long addr)
 {
-- 
cgit v1.2.3


From 5a970006786a3b10577e762a9a6c0b9353b4e8a4 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Fri, 6 Mar 2026 14:43:37 +0800
Subject: mm: use inline helper functions instead of ugly macros

Patch series "support batched checking of the young flag for MGLRU", v3.

This is a follow-up to the previous work [1], to support batched checking
of the young flag for MGLRU.

Similarly, batched checking of young flag for large folios can improve
performance during large-folio reclamation when MGLRU is enabled.  I
observed noticeable performance improvements (see patch 5) on an Arm64
machine that supports contiguous PTEs.  All mm-selftests are passed.

Patch 1 - 3: cleanup patches.
Patch 4: add a new generic batched PTE helper: test_and_clear_young_ptes().
Patch 5: support batched young flag checking for MGLRU.
Patch 6: implement the Arm64 arch-specific test_and_clear_young_ptes().


This patch (of 6):

People have already complained that these *_clear_young_notify() related
macros are very ugly, so let's use inline helpers to make them more
readable.

In addition, we cannot implement these inline helper functions in the
mmu_notifier.h file, because some arch-specific files will include the
mmu_notifier.h, which introduces header compilation dependencies and
causes build errors (e.g., arch/arm64/include/asm/tlbflush.h).  Moreover,
since these functions are only used in the mm, implementing these inline
helpers in the mm/internal.h header seems reasonable.

Link: https://lkml.kernel.org/r/cover.1772778858.git.baolin.wang@linux.alibaba.com
Link: https://lkml.kernel.org/r/ea14af84e7967ccebb25082c28a8669d6da8fe57.1772778858.git.baolin.wang@linux.alibaba.com
Link: https://lore.kernel.org/all/cover.1770645603.git.baolin.wang@linux.alibaba.com/ [1]
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Rik van Riel <riel@surriel.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Xu <weixugc@google.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Alistair Popple <apopple@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 54 --------------------------------------------
 mm/internal.h                | 52 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 8450e18a87c2..3705d350c863 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -516,55 +516,6 @@ static inline void mmu_notifier_range_init_owner(
 	range->owner = owner;
 }
 
-#define clear_flush_young_ptes_notify(__vma, __address, __ptep, __nr)	\
-({									\
-	int __young;							\
-	struct vm_area_struct *___vma = __vma;				\
-	unsigned long ___address = __address;				\
-	unsigned int ___nr = __nr;					\
-	__young = clear_flush_young_ptes(___vma, ___address, __ptep, ___nr);	\
-	__young |= mmu_notifier_clear_flush_young(___vma->vm_mm,	\
-						  ___address,		\
-						  ___address +		\
-						  ___nr * PAGE_SIZE);	\
-	__young;							\
-})
-
-#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp)		\
-({									\
-	int __young;							\
-	struct vm_area_struct *___vma = __vma;				\
-	unsigned long ___address = __address;				\
-	__young = pmdp_clear_flush_young(___vma, ___address, __pmdp);	\
-	__young |= mmu_notifier_clear_flush_young(___vma->vm_mm,	\
-						  ___address,		\
-						  ___address +		\
-							PMD_SIZE);	\
-	__young;							\
-})
-
-#define ptep_clear_young_notify(__vma, __address, __ptep)		\
-({									\
-	int __young;							\
-	struct vm_area_struct *___vma = __vma;				\
-	unsigned long ___address = __address;				\
-	__young = ptep_test_and_clear_young(___vma, ___address, __ptep);\
-	__young |= mmu_notifier_clear_young(___vma->vm_mm, ___address,	\
-					    ___address + PAGE_SIZE);	\
-	__young;							\
-})
-
-#define pmdp_clear_young_notify(__vma, __address, __pmdp)		\
-({									\
-	int __young;							\
-	struct vm_area_struct *___vma = __vma;				\
-	unsigned long ___address = __address;				\
-	__young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\
-	__young |= mmu_notifier_clear_young(___vma->vm_mm, ___address,	\
-					    ___address + PMD_SIZE);	\
-	__young;							\
-})
-
 #else /* CONFIG_MMU_NOTIFIER */
 
 struct mmu_notifier_range {
@@ -652,11 +603,6 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
 
 #define mmu_notifier_range_update_to_read_only(r) false
 
-#define clear_flush_young_ptes_notify clear_flush_young_ptes
-#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
-#define ptep_clear_young_notify ptep_test_and_clear_young
-#define pmdp_clear_young_notify pmdp_test_and_clear_young
-
 static inline void mmu_notifier_synchronize(void)
 {
 }
diff --git a/mm/internal.h b/mm/internal.h
index 6e1162e13289..321b8019de9f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -11,6 +11,7 @@
 #include <linux/khugepaged.h>
 #include <linux/mm.h>
 #include <linux/mm_inline.h>
+#include <linux/mmu_notifier.h>
 #include <linux/pagemap.h>
 #include <linux/pagewalk.h>
 #include <linux/rmap.h>
@@ -1796,4 +1797,55 @@ static inline int io_remap_pfn_range_complete(struct vm_area_struct *vma,
 	return remap_pfn_range_complete(vma, addr, pfn, size, prot);
 }
 
+#ifdef CONFIG_MMU_NOTIFIER
+static inline int clear_flush_young_ptes_notify(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep, unsigned int nr)
+{
+	int young;
+
+	young = clear_flush_young_ptes(vma, addr, ptep, nr);
+	young |= mmu_notifier_clear_flush_young(vma->vm_mm, addr,
+						addr + nr * PAGE_SIZE);
+	return young;
+}
+
+static inline int pmdp_clear_flush_young_notify(struct vm_area_struct *vma,
+		unsigned long addr, pmd_t *pmdp)
+{
+	int young;
+
+	young = pmdp_clear_flush_young(vma, addr, pmdp);
+	young |= mmu_notifier_clear_flush_young(vma->vm_mm, addr, addr + PMD_SIZE);
+	return young;
+}
+
+static inline int ptep_clear_young_notify(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep)
+{
+	int young;
+
+	young = ptep_test_and_clear_young(vma, addr, ptep);
+	young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE);
+	return young;
+}
+
+static inline int pmdp_clear_young_notify(struct vm_area_struct *vma,
+		unsigned long addr, pmd_t *pmdp)
+{
+	int young;
+
+	young = pmdp_test_and_clear_young(vma, addr, pmdp);
+	young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PMD_SIZE);
+	return young;
+}
+
+#else /* CONFIG_MMU_NOTIFIER */
+
+#define clear_flush_young_ptes_notify	clear_flush_young_ptes
+#define pmdp_clear_flush_young_notify	pmdp_clear_flush_young
+#define ptep_clear_young_notify	ptep_test_and_clear_young
+#define pmdp_clear_young_notify	pmdp_test_and_clear_young
+
+#endif /* CONFIG_MMU_NOTIFIER */
+
 #endif	/* __MM_INTERNAL_H */
-- 
cgit v1.2.3


From 6d7237dda44f24bb0dec5dbd2a0ed6be77bf6ef6 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Fri, 6 Mar 2026 14:43:40 +0800
Subject: mm: add a batched helper to clear the young flag for large folios

Currently, MGLRU will call ptep_test_and_clear_young_notify() to check and
clear the young flag for each PTE sequentially, which is inefficient for
large folios reclamation.

Moreover, on Arm64 architecture, which supports contiguous PTEs, the
Arm64- specific ptep_test_and_clear_young() already implements an
optimization to clear the young flags for PTEs within a contiguous range.
However, this is not sufficient.  Similar to the Arm64 specific
clear_flush_young_ptes(), we can extend this to perform batched operations
for the entire large folio (which might exceed the contiguous range:
CONT_PTE_SIZE).

Thus, we can introduce a new batched helper: test_and_clear_young_ptes()
and its wrapper test_and_clear_young_ptes_notify() which are consistent
with the existing functions, to perform batched checking of the young
flags for large folios, which can help improve performance during large
folio reclamation when MGLRU is enabled.  And it will be overridden by the
architecture that implements a more efficient batch operation in the
following patches.

Link: https://lkml.kernel.org/r/23ec671bfcc06cd24ee0fbff8e329402742274a0.1772778858.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Hildenbrand (Arm) <david@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Xu <weixugc@google.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 37 +++++++++++++++++++++++++++++++++++++
 mm/internal.h           | 16 +++++++++++-----
 2 files changed, 48 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index d2767a4c027b..17d961c612fc 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1103,6 +1103,43 @@ static inline int clear_flush_young_ptes(struct vm_area_struct *vma,
 }
 #endif
 
+#ifndef test_and_clear_young_ptes
+/**
+ * test_and_clear_young_ptes - Mark PTEs that map consecutive pages of the same
+ *			       folio as old
+ * @vma: The virtual memory area the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to clear access bit.
+ *
+ * May be overridden by the architecture; otherwise, implemented as a simple
+ * loop over ptep_test_and_clear_young().
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock.  The PTEs map consecutive
+ * pages that belong to the same folio.  The PTEs are all in the same PMD.
+ *
+ * Returns: whether any PTE was young.
+ */
+static inline int test_and_clear_young_ptes(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep, unsigned int nr)
+{
+	int young = 0;
+
+	for (;;) {
+		young |= ptep_test_and_clear_young(vma, addr, ptep);
+		if (--nr == 0)
+			break;
+		ptep++;
+		addr += PAGE_SIZE;
+	}
+
+	return young;
+}
+#endif
+
 /*
  * On some architectures hardware does not set page access bit when accessing
  * memory page, it is responsibility of software setting this bit. It brings
diff --git a/mm/internal.h b/mm/internal.h
index 1b718fdb074e..1357dc04f065 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1819,13 +1819,13 @@ static inline int pmdp_clear_flush_young_notify(struct vm_area_struct *vma,
 	return young;
 }
 
-static inline int ptep_test_and_clear_young_notify(struct vm_area_struct *vma,
-		unsigned long addr, pte_t *ptep)
+static inline int test_and_clear_young_ptes_notify(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep, unsigned int nr)
 {
 	int young;
 
-	young = ptep_test_and_clear_young(vma, addr, ptep);
-	young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE);
+	young = test_and_clear_young_ptes(vma, addr, ptep, nr);
+	young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + nr * PAGE_SIZE);
 	return young;
 }
 
@@ -1843,9 +1843,15 @@ static inline int pmdp_test_and_clear_young_notify(struct vm_area_struct *vma,
 
 #define clear_flush_young_ptes_notify	clear_flush_young_ptes
 #define pmdp_clear_flush_young_notify	pmdp_clear_flush_young
-#define ptep_test_and_clear_young_notify	ptep_test_and_clear_young
+#define test_and_clear_young_ptes_notify	test_and_clear_young_ptes
 #define pmdp_test_and_clear_young_notify	pmdp_test_and_clear_young
 
 #endif /* CONFIG_MMU_NOTIFIER */
 
+static inline int ptep_test_and_clear_young_notify(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep)
+{
+	return test_and_clear_young_ptes_notify(vma, addr, ptep, 1);
+}
+
 #endif	/* __MM_INTERNAL_H */
-- 
cgit v1.2.3


From 56e5b60b2114dee967c971f08dd29ef193bd3a2d Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Fri, 6 Mar 2026 14:43:41 +0800
Subject: mm: support batched checking of the young flag for MGLRU

Use the batched helper test_and_clear_young_ptes_notify() to check and
clear the young flag to improve the performance during large folio
reclamation when MGLRU is enabled.

Meanwhile, we can also support batched checking the young and dirty flag
when MGLRU walks the mm's pagetable to update the folios' generation
counter.  Since MGLRU also checks the PTE dirty bit, use
folio_pte_batch_flags() with FPB_MERGE_YOUNG_DIRTY set to detect batches
of PTEs for a large folio.

Then we can remove the ptep_test_and_clear_young_notify() since it has no
users now.

Note that we also update the 'young' counter and 'mm_stats[MM_LEAF_YOUNG]'
counter with the batched count in the lru_gen_look_around() and
walk_pte_range().  However, the batched operations may inflate these two
counters, because in a large folio not all PTEs may have been accessed.
(Additionally, tracking how many PTEs have been accessed within a large
folio is not very meaningful, since the mm core actually tracks
access/dirty on a per-folio basis, not per page).  The impact analysis is
as follows:

1. The 'mm_stats[MM_LEAF_YOUNG]' counter has no functional impact and
   is mainly for debugging.

2. The 'young' counter is used to decide whether to place the current
   PMD entry into the bloom filters by suitable_to_scan() (so that next
   time we can check whether it has been accessed again), which may set
   the hash bit in the bloom filters for a PMD entry that hasn't seen much
   access.  However, bloom filters inherently allow some error, so this
   effect appears negligible.

Link: https://lkml.kernel.org/r/378f4acf7d07410aa7c2e4b49d56bb165918eb34.1772778858.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Xu <weixugc@google.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h |  5 +++--
 mm/internal.h          |  6 ------
 mm/rmap.c              | 28 ++++++++++++++--------------
 mm/vmscan.c            | 43 ++++++++++++++++++++++++++++++++-----------
 4 files changed, 49 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5c3ae0348754..3f651baf7e2b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -684,7 +684,7 @@ struct lru_gen_memcg {
 
 void lru_gen_init_pgdat(struct pglist_data *pgdat);
 void lru_gen_init_lruvec(struct lruvec *lruvec);
-bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
+bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw, unsigned int nr);
 
 void lru_gen_init_memcg(struct mem_cgroup *memcg);
 void lru_gen_exit_memcg(struct mem_cgroup *memcg);
@@ -703,7 +703,8 @@ static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
 {
 }
 
-static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw,
+		unsigned int nr)
 {
 	return false;
 }
diff --git a/mm/internal.h b/mm/internal.h
index 1357dc04f065..4ab833b8bcdf 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1848,10 +1848,4 @@ static inline int pmdp_test_and_clear_young_notify(struct vm_area_struct *vma,
 
 #endif /* CONFIG_MMU_NOTIFIER */
 
-static inline int ptep_test_and_clear_young_notify(struct vm_area_struct *vma,
-		unsigned long addr, pte_t *ptep)
-{
-	return test_and_clear_young_ptes_notify(vma, addr, ptep, 1);
-}
-
 #endif	/* __MM_INTERNAL_H */
diff --git a/mm/rmap.c b/mm/rmap.c
index cd48f34f11b5..abe4712a220c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -965,25 +965,20 @@ static bool folio_referenced_one(struct folio *folio,
 			return false;
 		}
 
+		if (pvmw.pte && folio_test_large(folio)) {
+			const unsigned long end_addr = pmd_addr_end(address, vma->vm_end);
+			const unsigned int max_nr = (end_addr - address) >> PAGE_SHIFT;
+			pte_t pteval = ptep_get(pvmw.pte);
+
+			nr = folio_pte_batch(folio, pvmw.pte, pteval, max_nr);
+		}
+
 		if (lru_gen_enabled() && pvmw.pte) {
-			if (lru_gen_look_around(&pvmw))
+			if (lru_gen_look_around(&pvmw, nr))
 				referenced++;
 		} else if (pvmw.pte) {
-			if (folio_test_large(folio)) {
-				unsigned long end_addr = pmd_addr_end(address, vma->vm_end);
-				unsigned int max_nr = (end_addr - address) >> PAGE_SHIFT;
-				pte_t pteval = ptep_get(pvmw.pte);
-
-				nr = folio_pte_batch(folio, pvmw.pte,
-						     pteval, max_nr);
-			}
-
-			ptes += nr;
 			if (clear_flush_young_ptes_notify(vma, address, pvmw.pte, nr))
 				referenced++;
-			/* Skip the batched PTEs */
-			pvmw.pte += nr - 1;
-			pvmw.address += (nr - 1) * PAGE_SIZE;
 		} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
 			if (pmdp_clear_flush_young_notify(vma, address,
 						pvmw.pmd))
@@ -993,6 +988,7 @@ static bool folio_referenced_one(struct folio *folio,
 			WARN_ON_ONCE(1);
 		}
 
+		ptes += nr;
 		pra->mapcount -= nr;
 		/*
 		 * If we are sure that we batched the entire folio,
@@ -1002,6 +998,10 @@ static bool folio_referenced_one(struct folio *folio,
 			page_vma_mapped_walk_done(&pvmw);
 			break;
 		}
+
+		/* Skip the batched PTEs */
+		pvmw.pte += nr - 1;
+		pvmw.address += (nr - 1) * PAGE_SIZE;
 	}
 
 	if (referenced)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7ab9e1cdccd2..3a4a0a81c871 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3499,6 +3499,7 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
 	struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
 	DEFINE_MAX_SEQ(walk->lruvec);
 	int gen = lru_gen_from_seq(max_seq);
+	unsigned int nr;
 	pmd_t pmdval;
 
 	pte = pte_offset_map_rw_nolock(args->mm, pmd, start & PMD_MASK, &pmdval, &ptl);
@@ -3517,11 +3518,13 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
 
 	lazy_mmu_mode_enable();
 restart:
-	for (i = pte_index(start), addr = start; addr != end; i++, addr += PAGE_SIZE) {
+	for (i = pte_index(start), addr = start; addr != end; i += nr, addr += nr * PAGE_SIZE) {
 		unsigned long pfn;
 		struct folio *folio;
-		pte_t ptent = ptep_get(pte + i);
+		pte_t *cur_pte = pte + i;
+		pte_t ptent = ptep_get(cur_pte);
 
+		nr = 1;
 		total++;
 		walk->mm_stats[MM_LEAF_TOTAL]++;
 
@@ -3533,7 +3536,16 @@ restart:
 		if (!folio)
 			continue;
 
-		if (!ptep_test_and_clear_young_notify(args->vma, addr, pte + i))
+		if (folio_test_large(folio)) {
+			const unsigned int max_nr = (end - addr) >> PAGE_SHIFT;
+
+			nr = folio_pte_batch_flags(folio, NULL, cur_pte, &ptent,
+						   max_nr, FPB_MERGE_YOUNG_DIRTY);
+			total += nr - 1;
+			walk->mm_stats[MM_LEAF_TOTAL] += nr - 1;
+		}
+
+		if (!test_and_clear_young_ptes_notify(args->vma, addr, cur_pte, nr))
 			continue;
 
 		if (last != folio) {
@@ -3546,8 +3558,8 @@ restart:
 		if (pte_dirty(ptent))
 			dirty = true;
 
-		young++;
-		walk->mm_stats[MM_LEAF_YOUNG]++;
+		young += nr;
+		walk->mm_stats[MM_LEAF_YOUNG] += nr;
 	}
 
 	walk_update_folio(walk, last, gen, dirty);
@@ -4191,7 +4203,7 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
  * the PTE table to the Bloom filter. This forms a feedback loop between the
  * eviction and the aging.
  */
-bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw, unsigned int nr)
 {
 	int i;
 	bool dirty;
@@ -4214,7 +4226,7 @@ bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 	lockdep_assert_held(pvmw->ptl);
 	VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio);
 
-	if (!ptep_test_and_clear_young_notify(vma, addr, pte))
+	if (!test_and_clear_young_ptes_notify(vma, addr, pte, nr))
 		return false;
 
 	if (spin_is_contended(pvmw->ptl))
@@ -4248,10 +4260,12 @@ bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 
 	pte -= (addr - start) / PAGE_SIZE;
 
-	for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) {
+	for (i = 0, addr = start; addr != end;
+	     i += nr, pte += nr, addr += nr * PAGE_SIZE) {
 		unsigned long pfn;
-		pte_t ptent = ptep_get(pte + i);
+		pte_t ptent = ptep_get(pte);
 
+		nr = 1;
 		pfn = get_pte_pfn(ptent, vma, addr, pgdat);
 		if (pfn == -1)
 			continue;
@@ -4260,7 +4274,14 @@ bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 		if (!folio)
 			continue;
 
-		if (!ptep_test_and_clear_young_notify(vma, addr, pte + i))
+		if (folio_test_large(folio)) {
+			const unsigned int max_nr = (end - addr) >> PAGE_SHIFT;
+
+			nr = folio_pte_batch_flags(folio, NULL, pte, &ptent,
+						   max_nr, FPB_MERGE_YOUNG_DIRTY);
+		}
+
+		if (!test_and_clear_young_ptes_notify(vma, addr, pte, nr))
 			continue;
 
 		if (last != folio) {
@@ -4273,7 +4294,7 @@ bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 		if (pte_dirty(ptent))
 			dirty = true;
 
-		young++;
+		young += nr;
 	}
 
 	walk_update_folio(walk, last, gen, dirty);
-- 
cgit v1.2.3


From 417607de1f4e6280f646aa42cad5ed84e9228c01 Mon Sep 17 00:00:00 2001
From: Yuvraj Sakshith <yuvraj.sakshith@oss.qualcomm.com>
Date: Tue, 3 Mar 2026 03:30:28 -0800
Subject: mm/page_reporting: add PAGE_REPORTING_ORDER_UNSPECIFIED
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "Allow order zero pages in page reporting", v4.

Today, page reporting sets page_reporting_order in two ways:

(1) page_reporting.page_reporting_order cmdline parameter
(2) Driver can pass order while registering itself.

In both cases, order zero is ignored by free page reporting because it is
used to set page_reporting_order to a default value, like MAX_PAGE_ORDER.

In some cases we might want page_reporting_order to be zero.

For instance, when virtio-balloon runs inside a guest with tiny memory
(say, 16MB), it might not be able to find a order 1 page (or in the worst
case order MAX_PAGE_ORDER page) after some uptime.  Page reporting should
be able to return order zero pages back for optimal memory relinquishment.

This patch changes the default fallback value from '0' to '-1' in all
possible clients of free page reporting (hv_balloon and virtio-balloon)
together with allowing '0' as a valid order in page_reporting_register().


This patch (of 5):

Drivers can pass order of pages to be reported while registering itself.
Today, this is a magic number, 0.

Label this with PAGE_REPORTING_ORDER_UNSPECIFIED and check for it when the
driver is being registered.

This macro will be used in relevant drivers next.

[akpm@linux-foundation.org: tweak whitespace, per David]
Link: https://lkml.kernel.org/r/20260303113032.3008371-1-yuvraj.sakshith@oss.qualcomm.com
Link: https://lkml.kernel.org/r/20260303113032.3008371-2-yuvraj.sakshith@oss.qualcomm.com
Signed-off-by: Yuvraj Sakshith <yuvraj.sakshith@oss.qualcomm.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Eugenio Pérez <eperezma@redhat.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Long Li <longli@microsoft.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page_reporting.h | 1 +
 mm/page_reporting.c            | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page_reporting.h b/include/linux/page_reporting.h
index fe648dfa3a7c..d1886c657285 100644
--- a/include/linux/page_reporting.h
+++ b/include/linux/page_reporting.h
@@ -7,6 +7,7 @@
 
 /* This value should always be a power of 2, see page_reporting_cycle() */
 #define PAGE_REPORTING_CAPACITY		32
+#define PAGE_REPORTING_ORDER_UNSPECIFIED	0
 
 struct page_reporting_dev_info {
 	/* function that alters pages to make them "reported" */
diff --git a/mm/page_reporting.c b/mm/page_reporting.c
index f0042d5743af..a2da5bf3a065 100644
--- a/mm/page_reporting.c
+++ b/mm/page_reporting.c
@@ -370,7 +370,8 @@ int page_reporting_register(struct page_reporting_dev_info *prdev)
 	 */
 
 	if (page_reporting_order == -1) {
-		if (prdev->order > 0 && prdev->order <= MAX_PAGE_ORDER)
+		if (prdev->order != PAGE_REPORTING_ORDER_UNSPECIFIED &&
+		    prdev->order <= MAX_PAGE_ORDER)
 			page_reporting_order = prdev->order;
 		else
 			page_reporting_order = pageblock_order;
-- 
cgit v1.2.3


From 5467c292d07ffcd55a7a66af2259855f49e1dd06 Mon Sep 17 00:00:00 2001
From: Yuvraj Sakshith <yuvraj.sakshith@oss.qualcomm.com>
Date: Tue, 3 Mar 2026 03:30:31 -0800
Subject: mm/page_reporting: change PAGE_REPORTING_ORDER_UNSPECIFIED to -1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PAGE_REPORTING_ORDER_UNSPECIFIED is now set to zero.  This means, pages of
order zero cannot be reported to a client/driver -- as zero is used to
signal a fallback to MAX_PAGE_ORDER.

Change PAGE_REPORTING_ORDER_UNSPECIFIED to (-1), so that zero can be used
as a valid order with which pages can be reported.

Link: https://lkml.kernel.org/r/20260303113032.3008371-5-yuvraj.sakshith@oss.qualcomm.com
Signed-off-by: Yuvraj Sakshith <yuvraj.sakshith@oss.qualcomm.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Eugenio Pérez <eperezma@redhat.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Long Li <longli@microsoft.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page_reporting.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page_reporting.h b/include/linux/page_reporting.h
index d1886c657285..9d4ca5c218a0 100644
--- a/include/linux/page_reporting.h
+++ b/include/linux/page_reporting.h
@@ -7,7 +7,7 @@
 
 /* This value should always be a power of 2, see page_reporting_cycle() */
 #define PAGE_REPORTING_CAPACITY		32
-#define PAGE_REPORTING_ORDER_UNSPECIFIED	0
+#define PAGE_REPORTING_ORDER_UNSPECIFIED	-1
 
 struct page_reporting_dev_info {
 	/* function that alters pages to make them "reported" */
-- 
cgit v1.2.3


From e650bb30ca532901da6def04c7d1de72ae59ea4e Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Thu, 5 Mar 2026 10:50:14 +0000
Subject: mm: rename VMA flag helpers to be more readable

Patch series "mm: vma flag tweaks".

The ongoing work around introducing non-system word VMA flags has
introduced a number of helper functions and macros to make life easier
when working with these flags and to make conversions from the legacy use
of VM_xxx flags more straightforward.

This series improves these to reduce confusion as to what they do and to
improve consistency and readability.

Firstly the series renames vma_flags_test() to vma_flags_test_any() to
make it abundantly clear that this function tests whether any of the flags
are set (as opposed to vma_flags_test_all()).

It then renames vma_desc_test_flags() to vma_desc_test_any() for the same
reason.  Note that we drop the 'flags' suffix here, as
vma_desc_test_any_flags() would be cumbersome and 'test' implies a flag
test.

Similarly, we rename vma_test_all_flags() to vma_test_all() for
consistency.

Next, we have a couple of instances (erofs, zonefs) where we are now
testing for vma_desc_test_any(desc, VMA_SHARED_BIT) &&
vma_desc_test_any(desc, VMA_MAYWRITE_BIT).

This is silly, so this series introduces vma_desc_test_all() so these
callers can instead invoke vma_desc_test_all(desc, VMA_SHARED_BIT,
VMA_MAYWRITE_BIT).

We then observe that quite a few instances of vma_flags_test_any() and
vma_desc_test_any() are in fact only testing against a single flag.

Using the _any() variant here is just confusing - 'any' of single item
reads strangely and is liable to cause confusion.

So in these instances the series reintroduces vma_flags_test() and
vma_desc_test() as helpers which test against a single flag.

The fact that vma_flags_t is a struct and that vma_flag_t utilises sparse
to avoid confusion with vm_flags_t makes it impossible for a user to
misuse these helpers without it getting flagged somewhere.

The series also updates __mk_vma_flags() and functions invoked by it to
explicitly mark them always inline to match expectation and to be
consistent with other VMA flag helpers.

It also renames vma_flag_set() to vma_flags_set_flag() (a function only
used by __mk_vma_flags()) to be consistent with other VMA flag helpers.

Finally it updates the VMA tests for each of these changes, and introduces
explicit tests for vma_flags_test() and vma_desc_test() to assert that
they behave as expected.


This patch (of 6):

On reflection, it's confusing to have vma_flags_test() and
vma_desc_test_flags() test whether any comma-separated VMA flag bit is
set, while also having vma_flags_test_all() and vma_test_all_flags()
separately test whether all flags are set.

Firstly, rename vma_flags_test() to vma_flags_test_any() to eliminate this
confusion.

Secondly, since the VMA descriptor flag functions are becoming rather
cumbersome, prefer vma_desc_test*() to vma_desc_test_flags*(), and also
rename vma_desc_test_flags() to vma_desc_test_any().

Finally, rename vma_test_all_flags() to vma_test_all() to keep the
VMA-specific helper consistent with the VMA descriptor naming convention
and to help avoid confusion vs.  vma_flags_test_all().

While we're here, also update whitespace to be consistent in helper
functions.

Link: https://lkml.kernel.org/r/cover.1772704455.git.ljs@kernel.org
Link: https://lkml.kernel.org/r/0f9cb3c511c478344fac0b3b3b0300bb95be95e9.1772704455.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Suggested-by: Pedro Falcato <pfalcato@suse.de>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Babu Moger <babu.moger@amd.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chao Yu <chao@kernel.org>
Cc: Chatre, Reinette <reinette.chatre@intel.com>
Cc: Chunhai Guo <guochunhai@vivo.com>
Cc: Damien Le Maol <dlemoal@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Dave Martin <dave.martin@arm.com>
Cc: Gao Xiang <xiang@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hongbo Li <lihongbo22@huawei.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morse <james.morse@arm.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jeffle Xu <jefflexu@linux.alibaba.com>
Cc: Johannes Thumshirn <jth@kernel.org>
Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naohiro Aota <naohiro.aota@wdc.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Sandeep Dhavale <dhavale@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Yue Hu <zbestahu@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/char/mem.c              |  2 +-
 drivers/dax/device.c            |  2 +-
 fs/erofs/data.c                 |  4 ++--
 fs/hugetlbfs/inode.c            |  2 +-
 fs/ntfs3/file.c                 |  2 +-
 fs/resctrl/pseudo_lock.c        |  2 +-
 fs/zonefs/file.c                |  4 ++--
 include/linux/dax.h             |  4 ++--
 include/linux/hugetlb_inline.h  |  2 +-
 include/linux/mm.h              | 48 +++++++++++++++++++++--------------------
 mm/hugetlb.c                    | 14 ++++++------
 mm/memory.c                     |  2 +-
 mm/secretmem.c                  |  2 +-
 mm/shmem.c                      |  4 ++--
 tools/testing/vma/include/dup.h | 20 ++++++++---------
 tools/testing/vma/tests/vma.c   | 28 ++++++++++++------------
 16 files changed, 72 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index cca4529431f8..5118787d0954 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -520,7 +520,7 @@ static int mmap_zero_prepare(struct vm_area_desc *desc)
 #ifndef CONFIG_MMU
 	return -ENOSYS;
 #endif
-	if (vma_desc_test_flags(desc, VMA_SHARED_BIT))
+	if (vma_desc_test_any(desc, VMA_SHARED_BIT))
 		return shmem_zero_setup_desc(desc);
 
 	desc->action.success_hook = mmap_zero_private_success;
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 528e81240c4d..381021c2e031 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -24,7 +24,7 @@ static int __check_vma(struct dev_dax *dev_dax, vma_flags_t flags,
 		return -ENXIO;
 
 	/* prevent private mappings from being established */
-	if (!vma_flags_test(&flags, VMA_MAYSHARE_BIT)) {
+	if (!vma_flags_test_any(&flags, VMA_MAYSHARE_BIT)) {
 		dev_info_ratelimited(dev,
 				"%s: %s: fail, attempted private mapping\n",
 				current->comm, func);
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index f79ee80627d9..6774d9b5ee82 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -473,8 +473,8 @@ static int erofs_file_mmap_prepare(struct vm_area_desc *desc)
 	if (!IS_DAX(file_inode(desc->file)))
 		return generic_file_readonly_mmap_prepare(desc);
 
-	if (vma_desc_test_flags(desc, VMA_SHARED_BIT) &&
-	    vma_desc_test_flags(desc, VMA_MAYWRITE_BIT))
+	if (vma_desc_test_any(desc, VMA_SHARED_BIT) &&
+	    vma_desc_test_any(desc, VMA_MAYWRITE_BIT))
 		return -EINVAL;
 
 	desc->vm_ops = &erofs_dax_vm_ops;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 2ec3e4231252..079ffaaf1f6c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -164,7 +164,7 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
 		goto out;
 
 	ret = 0;
-	if (vma_desc_test_flags(desc, VMA_WRITE_BIT) && inode->i_size < len)
+	if (vma_desc_test_any(desc, VMA_WRITE_BIT) && inode->i_size < len)
 		i_size_write(inode, len);
 out:
 	inode_unlock(inode);
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 7eecf1e01f74..c5e2181f9f02 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -276,7 +276,7 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc)
 	struct file *file = desc->file;
 	struct inode *inode = file_inode(file);
 	struct ntfs_inode *ni = ntfs_i(inode);
-	const bool rw = vma_desc_test_flags(desc, VMA_WRITE_BIT);
+	const bool rw = vma_desc_test_any(desc, VMA_WRITE_BIT);
 	int err;
 
 	/* Avoid any operation if inode is bad. */
diff --git a/fs/resctrl/pseudo_lock.c b/fs/resctrl/pseudo_lock.c
index fa3687d69ebd..79a006c6f26c 100644
--- a/fs/resctrl/pseudo_lock.c
+++ b/fs/resctrl/pseudo_lock.c
@@ -1044,7 +1044,7 @@ static int pseudo_lock_dev_mmap_prepare(struct vm_area_desc *desc)
 	 * Ensure changes are carried directly to the memory being mapped,
 	 * do not allow copy-on-write mapping.
 	 */
-	if (!vma_desc_test_flags(desc, VMA_SHARED_BIT)) {
+	if (!vma_desc_test_any(desc, VMA_SHARED_BIT)) {
 		mutex_unlock(&rdtgroup_mutex);
 		return -EINVAL;
 	}
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index 8a7161fc49e5..9f9273ecf71a 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -333,8 +333,8 @@ static int zonefs_file_mmap_prepare(struct vm_area_desc *desc)
 	 * ordering between msync() and page cache writeback.
 	 */
 	if (zonefs_inode_is_seq(file_inode(file)) &&
-	    vma_desc_test_flags(desc, VMA_SHARED_BIT) &&
-	    vma_desc_test_flags(desc, VMA_MAYWRITE_BIT))
+	    vma_desc_test_any(desc, VMA_SHARED_BIT) &&
+	    vma_desc_test_any(desc, VMA_MAYWRITE_BIT))
 		return -EINVAL;
 
 	file_accessed(file);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index bf103f317cac..535019001577 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -69,7 +69,7 @@ static inline bool daxdev_mapping_supported(const struct vm_area_desc *desc,
 					    const struct inode *inode,
 					    struct dax_device *dax_dev)
 {
-	if (!vma_desc_test_flags(desc, VMA_SYNC_BIT))
+	if (!vma_desc_test_any(desc, VMA_SYNC_BIT))
 		return true;
 	if (!IS_DAX(inode))
 		return false;
@@ -115,7 +115,7 @@ static inline bool daxdev_mapping_supported(const struct vm_area_desc *desc,
 					    const struct inode *inode,
 					    struct dax_device *dax_dev)
 {
-	return !vma_desc_test_flags(desc, VMA_SYNC_BIT);
+	return !vma_desc_test_any(desc, VMA_SYNC_BIT);
 }
 static inline size_t dax_recovery_write(struct dax_device *dax_dev,
 		pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
diff --git a/include/linux/hugetlb_inline.h b/include/linux/hugetlb_inline.h
index 593f5d4e108b..84afc3c3e2e4 100644
--- a/include/linux/hugetlb_inline.h
+++ b/include/linux/hugetlb_inline.h
@@ -13,7 +13,7 @@ static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
 
 static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags)
 {
-	return vma_flags_test(flags, VMA_HUGETLB_BIT);
+	return vma_flags_test_any(flags, VMA_HUGETLB_BIT);
 }
 
 #else
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c516d5177211..ee7671d6c5eb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1062,7 +1062,7 @@ static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits)
 					 (const vma_flag_t []){__VA_ARGS__})
 
 /*  Test each of to_test flags in flags, non-atomically. */
-static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags,
+static __always_inline bool vma_flags_test_any_mask(const vma_flags_t *flags,
 		vma_flags_t to_test)
 {
 	const unsigned long *bitmap = flags->__vma_flags;
@@ -1074,10 +1074,10 @@ static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags,
 /*
  * Test whether any specified VMA flag is set, e.g.:
  *
- * if (vma_flags_test(flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... }
+ * if (vma_flags_test_any(flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... }
  */
-#define vma_flags_test(flags, ...) \
-	vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__))
+#define vma_flags_test_any(flags, ...) \
+	vma_flags_test_any_mask(flags, mk_vma_flags(__VA_ARGS__))
 
 /* Test that ALL of the to_test flags are set, non-atomically. */
 static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags,
@@ -1098,7 +1098,8 @@ static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags,
 	vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__))
 
 /* Set each of the to_set flags in flags, non-atomically. */
-static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set)
+static __always_inline void vma_flags_set_mask(vma_flags_t *flags,
+		vma_flags_t to_set)
 {
 	unsigned long *bitmap = flags->__vma_flags;
 	const unsigned long *bitmap_to_set = to_set.__vma_flags;
@@ -1115,7 +1116,8 @@ static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t t
 	vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__))
 
 /* Clear all of the to-clear flags in flags, non-atomically. */
-static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear)
+static __always_inline void vma_flags_clear_mask(vma_flags_t *flags,
+		vma_flags_t to_clear)
 {
 	unsigned long *bitmap = flags->__vma_flags;
 	const unsigned long *bitmap_to_clear = to_clear.__vma_flags;
@@ -1137,8 +1139,8 @@ static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t
  * Note: appropriate locks must be held, this function does not acquire them for
  * you.
  */
-static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma,
-					   vma_flags_t flags)
+static inline bool vma_test_all_mask(const struct vm_area_struct *vma,
+		vma_flags_t flags)
 {
 	return vma_flags_test_all_mask(&vma->flags, flags);
 }
@@ -1146,10 +1148,10 @@ static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma,
 /*
  * Helper macro for checking that ALL specified flags are set in a VMA, e.g.:
  *
- * if (vma_test_all_flags(vma, VMA_READ_BIT, VMA_MAYREAD_BIT) { ... }
+ * if (vma_test_all(vma, VMA_READ_BIT, VMA_MAYREAD_BIT) { ... }
  */
-#define vma_test_all_flags(vma, ...) \
-	vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
+#define vma_test_all(vma, ...) \
+	vma_test_all_mask(vma, mk_vma_flags(__VA_ARGS__))
 
 /*
  * Helper to set all VMA flags in a VMA.
@@ -1158,7 +1160,7 @@ static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma,
  * you.
  */
 static inline void vma_set_flags_mask(struct vm_area_struct *vma,
-				      vma_flags_t flags)
+		vma_flags_t flags)
 {
 	vma_flags_set_mask(&vma->flags, flags);
 }
@@ -1176,25 +1178,25 @@ static inline void vma_set_flags_mask(struct vm_area_struct *vma,
 	vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
 
 /* Helper to test all VMA flags in a VMA descriptor. */
-static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc,
-					    vma_flags_t flags)
+static inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
+		vma_flags_t flags)
 {
-	return vma_flags_test_mask(&desc->vma_flags, flags);
+	return vma_flags_test_any_mask(&desc->vma_flags, flags);
 }
 
 /*
  * Helper macro for testing VMA flags for an input pointer to a struct
  * vm_area_desc object describing a proposed VMA, e.g.:
  *
- * if (vma_desc_test_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT,
+ * if (vma_desc_test_any(desc, VMA_IO_BIT, VMA_PFNMAP_BIT,
  *		VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT)) { ... }
  */
-#define vma_desc_test_flags(desc, ...) \
-	vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
+#define vma_desc_test_any(desc, ...) \
+	vma_desc_test_any_mask(desc, mk_vma_flags(__VA_ARGS__))
 
 /* Helper to set all VMA flags in a VMA descriptor. */
 static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
-					   vma_flags_t flags)
+		vma_flags_t flags)
 {
 	vma_flags_set_mask(&desc->vma_flags, flags);
 }
@@ -1211,7 +1213,7 @@ static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
 
 /* Helper to clear all VMA flags in a VMA descriptor. */
 static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
-					     vma_flags_t flags)
+		vma_flags_t flags)
 {
 	vma_flags_clear_mask(&desc->vma_flags, flags);
 }
@@ -1936,8 +1938,8 @@ static inline bool vma_desc_is_cow_mapping(struct vm_area_desc *desc)
 {
 	const vma_flags_t *flags = &desc->vma_flags;
 
-	return vma_flags_test(flags, VMA_MAYWRITE_BIT) &&
-		!vma_flags_test(flags, VMA_SHARED_BIT);
+	return vma_flags_test_any(flags, VMA_MAYWRITE_BIT) &&
+		!vma_flags_test_any(flags, VMA_SHARED_BIT);
 }
 
 #ifndef CONFIG_MMU
@@ -1956,7 +1958,7 @@ static inline bool is_nommu_shared_mapping(vm_flags_t flags)
 
 static inline bool is_nommu_shared_vma_flags(const vma_flags_t *flags)
 {
-	return vma_flags_test(flags, VMA_MAYSHARE_BIT, VMA_MAYOVERLAY_BIT);
+	return vma_flags_test_any(flags, VMA_MAYSHARE_BIT, VMA_MAYOVERLAY_BIT);
 }
 #endif
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1d41fa3dd43e..fbbe74f94426 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1194,7 +1194,7 @@ static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
 static void set_vma_desc_resv_map(struct vm_area_desc *desc, struct resv_map *map)
 {
 	VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
-	VM_WARN_ON_ONCE(vma_desc_test_flags(desc, VMA_MAYSHARE_BIT));
+	VM_WARN_ON_ONCE(vma_desc_test_any(desc, VMA_MAYSHARE_BIT));
 
 	desc->private_data = map;
 }
@@ -1202,7 +1202,7 @@ static void set_vma_desc_resv_map(struct vm_area_desc *desc, struct resv_map *ma
 static void set_vma_desc_resv_flags(struct vm_area_desc *desc, unsigned long flags)
 {
 	VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
-	VM_WARN_ON_ONCE(vma_desc_test_flags(desc, VMA_MAYSHARE_BIT));
+	VM_WARN_ON_ONCE(vma_desc_test_any(desc, VMA_MAYSHARE_BIT));
 
 	desc->private_data = (void *)((unsigned long)desc->private_data | flags);
 }
@@ -6593,7 +6593,7 @@ long hugetlb_reserve_pages(struct inode *inode,
 	 * attempt will be made for VM_NORESERVE to allocate a page
 	 * without using reserves
 	 */
-	if (vma_flags_test(&vma_flags, VMA_NORESERVE_BIT))
+	if (vma_flags_test_any(&vma_flags, VMA_NORESERVE_BIT))
 		return 0;
 
 	/*
@@ -6602,7 +6602,7 @@ long hugetlb_reserve_pages(struct inode *inode,
 	 * to reserve the full area even if read-only as mprotect() may be
 	 * called to make the mapping read-write. Assume !desc is a shm mapping
 	 */
-	if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)) {
+	if (!desc || vma_desc_test_any(desc, VMA_MAYSHARE_BIT)) {
 		/*
 		 * resv_map can not be NULL as hugetlb_reserve_pages is only
 		 * called for inodes for which resv_maps were created (see
@@ -6636,7 +6636,7 @@ long hugetlb_reserve_pages(struct inode *inode,
 	if (err < 0)
 		goto out_err;
 
-	if (desc && !vma_desc_test_flags(desc, VMA_MAYSHARE_BIT) && h_cg) {
+	if (desc && !vma_desc_test_any(desc, VMA_MAYSHARE_BIT) && h_cg) {
 		/* For private mappings, the hugetlb_cgroup uncharge info hangs
 		 * of the resv_map.
 		 */
@@ -6673,7 +6673,7 @@ long hugetlb_reserve_pages(struct inode *inode,
 	 * consumed reservations are stored in the map. Hence, nothing
 	 * else has to be done for private mappings here
 	 */
-	if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)) {
+	if (!desc || vma_desc_test_any(desc, VMA_MAYSHARE_BIT)) {
 		add = region_add(resv_map, from, to, regions_needed, h, h_cg);
 
 		if (unlikely(add < 0)) {
@@ -6737,7 +6737,7 @@ out_uncharge_cgroup:
 	hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
 					    chg * pages_per_huge_page(h), h_cg);
 out_err:
-	if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT))
+	if (!desc || vma_desc_test_any(desc, VMA_MAYSHARE_BIT))
 		/* Only call region_abort if the region_chg succeeded but the
 		 * region_add failed or didn't run.
 		 */
diff --git a/mm/memory.c b/mm/memory.c
index b1c062bf5fc1..f21c804b50bf 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2982,7 +2982,7 @@ static int remap_pfn_range_internal(struct vm_area_struct *vma, unsigned long ad
 	if (WARN_ON_ONCE(!PAGE_ALIGNED(addr)))
 		return -EINVAL;
 
-	VM_WARN_ON_ONCE(!vma_test_all_flags_mask(vma, VMA_REMAP_FLAGS));
+	VM_WARN_ON_ONCE(!vma_test_all_mask(vma, VMA_REMAP_FLAGS));
 
 	BUG_ON(addr >= end);
 	pfn -= addr >> PAGE_SHIFT;
diff --git a/mm/secretmem.c b/mm/secretmem.c
index 11a779c812a7..5f57ac4720d3 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -122,7 +122,7 @@ static int secretmem_mmap_prepare(struct vm_area_desc *desc)
 {
 	const unsigned long len = vma_desc_size(desc);
 
-	if (!vma_desc_test_flags(desc, VMA_SHARED_BIT, VMA_MAYSHARE_BIT))
+	if (!vma_desc_test_any(desc, VMA_SHARED_BIT, VMA_MAYSHARE_BIT))
 		return -EINVAL;
 
 	vma_desc_set_flags(desc, VMA_LOCKED_BIT, VMA_DONTDUMP_BIT);
diff --git a/mm/shmem.c b/mm/shmem.c
index 5e7dcf5bc5d3..965a8908200b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3086,7 +3086,7 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
 	spin_lock_init(&info->lock);
 	atomic_set(&info->stop_eviction, 0);
 	info->seals = F_SEAL_SEAL;
-	info->flags = vma_flags_test(&flags, VMA_NORESERVE_BIT)
+	info->flags = vma_flags_test_any(&flags, VMA_NORESERVE_BIT)
 		? SHMEM_F_NORESERVE : 0;
 	info->i_crtime = inode_get_mtime(inode);
 	info->fsflags = (dir == NULL) ? 0 :
@@ -5827,7 +5827,7 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
 				       unsigned int i_flags)
 {
 	const unsigned long shmem_flags =
-		vma_flags_test(&flags, VMA_NORESERVE_BIT) ? SHMEM_F_NORESERVE : 0;
+		vma_flags_test_any(&flags, VMA_NORESERVE_BIT) ? SHMEM_F_NORESERVE : 0;
 	struct inode *inode;
 	struct file *res;
 
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 3078ff1487d3..c46b523e428d 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -843,7 +843,7 @@ static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits);
 #define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \
 					 (const vma_flag_t []){__VA_ARGS__})
 
-static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags,
+static __always_inline bool vma_flags_test_any_mask(const vma_flags_t *flags,
 		vma_flags_t to_test)
 {
 	const unsigned long *bitmap = flags->__vma_flags;
@@ -852,8 +852,8 @@ static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags,
 	return bitmap_intersects(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
 }
 
-#define vma_flags_test(flags, ...) \
-	vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__))
+#define vma_flags_test_any(flags, ...) \
+	vma_flags_test_any_mask(flags, mk_vma_flags(__VA_ARGS__))
 
 static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags,
 		vma_flags_t to_test)
@@ -889,14 +889,14 @@ static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t
 #define vma_flags_clear(flags, ...) \
 	vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__))
 
-static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma,
+static inline bool vma_test_all_mask(const struct vm_area_struct *vma,
 					   vma_flags_t flags)
 {
 	return vma_flags_test_all_mask(&vma->flags, flags);
 }
 
-#define vma_test_all_flags(vma, ...) \
-	vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
+#define vma_test_all(vma, ...) \
+	vma_test_all_mask(vma, mk_vma_flags(__VA_ARGS__))
 
 static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags)
 {
@@ -913,14 +913,14 @@ static inline void vma_set_flags_mask(struct vm_area_struct *vma,
 #define vma_set_flags(vma, ...) \
 	vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
 
-static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc,
+static inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
 					    vma_flags_t flags)
 {
-	return vma_flags_test_mask(&desc->vma_flags, flags);
+	return vma_flags_test_any_mask(&desc->vma_flags, flags);
 }
 
-#define vma_desc_test_flags(desc, ...) \
-	vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
+#define vma_desc_test_any(desc, ...) \
+	vma_desc_test_any_mask(desc, mk_vma_flags(__VA_ARGS__))
 
 static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
 					   vma_flags_t flags)
diff --git a/tools/testing/vma/tests/vma.c b/tools/testing/vma/tests/vma.c
index c54ffc954f11..f031e6dfb474 100644
--- a/tools/testing/vma/tests/vma.c
+++ b/tools/testing/vma/tests/vma.c
@@ -159,8 +159,8 @@ static bool test_vma_flags_word(void)
 	return true;
 }
 
-/* Ensure that vma_flags_test() and friends works correctly. */
-static bool test_vma_flags_test(void)
+/* Ensure that vma_flags_test_any() and friends works correctly. */
+static bool test_vma_flags_test_any(void)
 {
 	const vma_flags_t flags = mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT,
 					       VMA_EXEC_BIT, 64, 65);
@@ -171,16 +171,16 @@ static bool test_vma_flags_test(void)
 	desc.vma_flags = flags;
 
 #define do_test(...)						\
-	ASSERT_TRUE(vma_flags_test(&flags, __VA_ARGS__));	\
-	ASSERT_TRUE(vma_desc_test_flags(&desc, __VA_ARGS__))
+	ASSERT_TRUE(vma_flags_test_any(&flags, __VA_ARGS__));	\
+	ASSERT_TRUE(vma_desc_test_any(&desc, __VA_ARGS__))
 
 #define do_test_all_true(...)					\
 	ASSERT_TRUE(vma_flags_test_all(&flags, __VA_ARGS__));	\
-	ASSERT_TRUE(vma_test_all_flags(&vma, __VA_ARGS__))
+	ASSERT_TRUE(vma_test_all(&vma, __VA_ARGS__))
 
 #define do_test_all_false(...)					\
 	ASSERT_FALSE(vma_flags_test_all(&flags, __VA_ARGS__));	\
-	ASSERT_FALSE(vma_test_all_flags(&vma, __VA_ARGS__))
+	ASSERT_FALSE(vma_test_all(&vma, __VA_ARGS__))
 
 	/*
 	 * Testing for some flags that are present, some that are not - should
@@ -200,7 +200,7 @@ static bool test_vma_flags_test(void)
 	 * Check _mask variant. We don't need to test extensively as macro
 	 * helper is the equivalent.
 	 */
-	ASSERT_TRUE(vma_flags_test_mask(&flags, flags));
+	ASSERT_TRUE(vma_flags_test_any_mask(&flags, flags));
 	ASSERT_TRUE(vma_flags_test_all_mask(&flags, flags));
 
 	/* Single bits. */
@@ -268,9 +268,9 @@ static bool test_vma_flags_clear(void)
 	vma_flags_clear_mask(&flags, mask);
 	vma_flags_clear_mask(&vma.flags, mask);
 	vma_desc_clear_flags_mask(&desc, mask);
-	ASSERT_FALSE(vma_flags_test(&flags, VMA_EXEC_BIT, 64));
-	ASSERT_FALSE(vma_flags_test(&vma.flags, VMA_EXEC_BIT, 64));
-	ASSERT_FALSE(vma_desc_test_flags(&desc, VMA_EXEC_BIT, 64));
+	ASSERT_FALSE(vma_flags_test_any(&flags, VMA_EXEC_BIT, 64));
+	ASSERT_FALSE(vma_flags_test_any(&vma.flags, VMA_EXEC_BIT, 64));
+	ASSERT_FALSE(vma_desc_test_any(&desc, VMA_EXEC_BIT, 64));
 	/* Reset. */
 	vma_flags_set(&flags, VMA_EXEC_BIT, 64);
 	vma_set_flags(&vma, VMA_EXEC_BIT, 64);
@@ -284,9 +284,9 @@ static bool test_vma_flags_clear(void)
 	vma_flags_clear(&flags, __VA_ARGS__);			\
 	vma_flags_clear(&vma.flags, __VA_ARGS__);		\
 	vma_desc_clear_flags(&desc, __VA_ARGS__);		\
-	ASSERT_FALSE(vma_flags_test(&flags, __VA_ARGS__));	\
-	ASSERT_FALSE(vma_flags_test(&vma.flags, __VA_ARGS__));	\
-	ASSERT_FALSE(vma_desc_test_flags(&desc, __VA_ARGS__));	\
+	ASSERT_FALSE(vma_flags_test_any(&flags, __VA_ARGS__));	\
+	ASSERT_FALSE(vma_flags_test_any(&vma.flags, __VA_ARGS__));	\
+	ASSERT_FALSE(vma_desc_test_any(&desc, __VA_ARGS__));	\
 	vma_flags_set(&flags, __VA_ARGS__);			\
 	vma_set_flags(&vma, __VA_ARGS__);			\
 	vma_desc_set_flags(&desc, __VA_ARGS__)
@@ -334,6 +334,6 @@ static void run_vma_tests(int *num_tests, int *num_fail)
 	TEST(vma_flags_unchanged);
 	TEST(vma_flags_cleared);
 	TEST(vma_flags_word);
-	TEST(vma_flags_test);
+	TEST(vma_flags_test_any);
 	TEST(vma_flags_clear);
 }
-- 
cgit v1.2.3


From 0b3ed2a495b5c10296d9371502d70ce4398f0c58 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Thu, 5 Mar 2026 10:50:15 +0000
Subject: mm: add vma_desc_test_all() and use it

erofs and zonefs are using vma_desc_test_any() twice to check whether all
of VMA_SHARED_BIT and VMA_MAYWRITE_BIT are set, this is silly, so add
vma_desc_test_all() to test all flags and update erofs and zonefs to use
it.

While we're here, update the helper function comments to be more
consistent.

Also add the same to the VMA test headers.

Link: https://lkml.kernel.org/r/568c8f8d6a84ff64014f997517cba7a629f7eed6.1772704455.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Babu Moger <babu.moger@amd.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chao Yu <chao@kernel.org>
Cc: Chatre, Reinette <reinette.chatre@intel.com>
Cc: Chunhai Guo <guochunhai@vivo.com>
Cc: Damien Le Maol <dlemoal@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Dave Martin <dave.martin@arm.com>
Cc: Gao Xiang <xiang@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hongbo Li <lihongbo22@huawei.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morse <james.morse@arm.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jeffle Xu <jefflexu@linux.alibaba.com>
Cc: Johannes Thumshirn <jth@kernel.org>
Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naohiro Aota <naohiro.aota@wdc.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Sandeep Dhavale <dhavale@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Yue Hu <zbestahu@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/erofs/data.c                 |  3 +--
 fs/zonefs/file.c                |  3 +--
 include/linux/mm.h              | 24 ++++++++++++++++++++----
 tools/testing/vma/include/dup.h |  9 +++++++++
 4 files changed, 31 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 6774d9b5ee82..b33dd4d8710e 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -473,8 +473,7 @@ static int erofs_file_mmap_prepare(struct vm_area_desc *desc)
 	if (!IS_DAX(file_inode(desc->file)))
 		return generic_file_readonly_mmap_prepare(desc);
 
-	if (vma_desc_test_any(desc, VMA_SHARED_BIT) &&
-	    vma_desc_test_any(desc, VMA_MAYWRITE_BIT))
+	if (vma_desc_test_all(desc, VMA_SHARED_BIT, VMA_MAYWRITE_BIT))
 		return -EINVAL;
 
 	desc->vm_ops = &erofs_dax_vm_ops;
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index 9f9273ecf71a..5ada33f70bb4 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -333,8 +333,7 @@ static int zonefs_file_mmap_prepare(struct vm_area_desc *desc)
 	 * ordering between msync() and page cache writeback.
 	 */
 	if (zonefs_inode_is_seq(file_inode(file)) &&
-	    vma_desc_test_any(desc, VMA_SHARED_BIT) &&
-	    vma_desc_test_any(desc, VMA_MAYWRITE_BIT))
+	    vma_desc_test_all(desc, VMA_SHARED_BIT, VMA_MAYWRITE_BIT))
 		return -EINVAL;
 
 	file_accessed(file);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ee7671d6c5eb..f964e4050583 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1177,7 +1177,7 @@ static inline void vma_set_flags_mask(struct vm_area_struct *vma,
 #define vma_set_flags(vma, ...) \
 	vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
 
-/* Helper to test all VMA flags in a VMA descriptor. */
+/* Helper to test any VMA flags in a VMA descriptor. */
 static inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
 		vma_flags_t flags)
 {
@@ -1185,8 +1185,8 @@ static inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
 }
 
 /*
- * Helper macro for testing VMA flags for an input pointer to a struct
- * vm_area_desc object describing a proposed VMA, e.g.:
+ * Helper macro for testing whether any VMA flags are set in a VMA descriptor,
+ * e.g.:
  *
  * if (vma_desc_test_any(desc, VMA_IO_BIT, VMA_PFNMAP_BIT,
  *		VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT)) { ... }
@@ -1194,6 +1194,22 @@ static inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
 #define vma_desc_test_any(desc, ...) \
 	vma_desc_test_any_mask(desc, mk_vma_flags(__VA_ARGS__))
 
+/* Helper to test all VMA flags in a VMA descriptor. */
+static inline bool vma_desc_test_all_mask(const struct vm_area_desc *desc,
+		vma_flags_t flags)
+{
+	return vma_flags_test_all_mask(&desc->vma_flags, flags);
+}
+
+/*
+ * Helper macro for testing whether ALL VMA flags are set in a VMA descriptor,
+ * e.g.:
+ *
+ * if (vma_desc_test_all(desc, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... }
+ */
+#define vma_desc_test_all(desc, ...) \
+	vma_desc_test_all_mask(desc, mk_vma_flags(__VA_ARGS__))
+
 /* Helper to set all VMA flags in a VMA descriptor. */
 static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
 		vma_flags_t flags)
@@ -1206,7 +1222,7 @@ static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
  * vm_area_desc object describing a proposed VMA, e.g.:
  *
  * vma_desc_set_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT,
- * 		VMA_DONTDUMP_BIT);
+ *		VMA_DONTDUMP_BIT);
  */
 #define vma_desc_set_flags(desc, ...) \
 	vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index c46b523e428d..59788bc14d75 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -922,6 +922,15 @@ static inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
 #define vma_desc_test_any(desc, ...) \
 	vma_desc_test_any_mask(desc, mk_vma_flags(__VA_ARGS__))
 
+static inline bool vma_desc_test_all_mask(const struct vm_area_desc *desc,
+		vma_flags_t flags)
+{
+	return vma_flags_test_all_mask(&desc->vma_flags, flags);
+}
+
+#define vma_desc_test_all(desc, ...) \
+	vma_desc_test_all_mask(desc, mk_vma_flags(__VA_ARGS__))
+
 static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
 					   vma_flags_t flags)
 {
-- 
cgit v1.2.3


From a5eee1128de526ba199bd4c7be39b849223e5001 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Thu, 5 Mar 2026 10:50:16 +0000
Subject: mm: always inline __mk_vma_flags() and invoked functions

Be explicit about __mk_vma_flags() (which is used by the mk_vma_flags()
macro) always being inline, as we rely on the compiler to evaluate the
loop in this function and determine that it can replace the code with the
an equivalent constant value, e.g. that:

__mk_vma_flags(2, (const vma_flag_t []){ VMA_WRITE_BIT, VMA_EXEC_BIT });

Can be replaced with:

(1UL << VMA_WRITE_BIT) | (1UL << VMA_EXEC_BIT)

= (1UL << 1) | (1UL << 2) = 6

Most likely an 'inline' will suffice for this, but be explicit as we can
be.

Also update all of the functions __mk_vma_flags() ultimately invokes to be
always inline too.

Note that test_bitmap_const_eval() asserts that the relevant bitmap
functions result in build time constant values.

Additionally, vma_flag_set() operates on a vma_flags_t type, so it is
inconsistently named versus other VMA flags functions.

We only use vma_flag_set() in __mk_vma_flags() so we don't need to worry
about its new name being rather cumbersome, so rename it to
vma_flags_set_flag() to disambiguate it from vma_flags_set().

Also update the VMA test headers to reflect the changes.

Link: https://lkml.kernel.org/r/241f49c52074d436edbb9c6a6662a8dc142a8f43.1772704455.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Babu Moger <babu.moger@amd.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chao Yu <chao@kernel.org>
Cc: Chatre, Reinette <reinette.chatre@intel.com>
Cc: Chunhai Guo <guochunhai@vivo.com>
Cc: Damien Le Maol <dlemoal@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Dave Martin <dave.martin@arm.com>
Cc: Gao Xiang <xiang@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hongbo Li <lihongbo22@huawei.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morse <james.morse@arm.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jeffle Xu <jefflexu@linux.alibaba.com>
Cc: Johannes Thumshirn <jth@kernel.org>
Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naohiro Aota <naohiro.aota@wdc.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Sandeep Dhavale <dhavale@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Yue Hu <zbestahu@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h                 | 8 +++++---
 include/linux/mm_types.h           | 2 +-
 tools/testing/vma/include/custom.h | 5 +++--
 tools/testing/vma/include/dup.h    | 5 +++--
 4 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f964e4050583..9dcdf13570fb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1030,21 +1030,23 @@ static inline bool vma_test_atomic_flag(struct vm_area_struct *vma, vma_flag_t b
 }
 
 /* Set an individual VMA flag in flags, non-atomically. */
-static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit)
+static __always_inline void vma_flags_set_flag(vma_flags_t *flags,
+		vma_flag_t bit)
 {
 	unsigned long *bitmap = flags->__vma_flags;
 
 	__set_bit((__force int)bit, bitmap);
 }
 
-static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits)
+static __always_inline vma_flags_t __mk_vma_flags(size_t count,
+		const vma_flag_t *bits)
 {
 	vma_flags_t flags;
 	int i;
 
 	vma_flags_clear_all(&flags);
 	for (i = 0; i < count; i++)
-		vma_flag_set(&flags, bits[i]);
+		vma_flags_set_flag(&flags, bits[i]);
 	return flags;
 }
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7bc82a2b889f..f22aecb047b7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1056,7 +1056,7 @@ struct vm_area_struct {
 } __randomize_layout;
 
 /* Clears all bits in the VMA flags bitmap, non-atomically. */
-static inline void vma_flags_clear_all(vma_flags_t *flags)
+static __always_inline void vma_flags_clear_all(vma_flags_t *flags)
 {
 	bitmap_zero(flags->__vma_flags, NUM_VMA_FLAG_BITS);
 }
diff --git a/tools/testing/vma/include/custom.h b/tools/testing/vma/include/custom.h
index 802a76317245..833ff4d7f799 100644
--- a/tools/testing/vma/include/custom.h
+++ b/tools/testing/vma/include/custom.h
@@ -102,7 +102,8 @@ static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
 		refcount_set(&vma->vm_refcnt, 0);
 }
 
-static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits)
+static __always_inline vma_flags_t __mk_vma_flags(size_t count,
+		const vma_flag_t *bits)
 {
 	vma_flags_t flags;
 	int i;
@@ -114,6 +115,6 @@ static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits)
 	vma_flags_clear_all(&flags);
 	for (i = 0; i < count; i++)
 		if (bits[i] < NUM_VMA_FLAG_BITS)
-			vma_flag_set(&flags, bits[i]);
+			vma_flags_set_flag(&flags, bits[i]);
 	return flags;
 }
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 59788bc14d75..ef6b9d963acc 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -780,12 +780,13 @@ static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
 	*bitmap &= ~value;
 }
 
-static inline void vma_flags_clear_all(vma_flags_t *flags)
+static __always_inline void vma_flags_clear_all(vma_flags_t *flags)
 {
 	bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS);
 }
 
-static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit)
+static __always_inline void vma_flags_set_flag(vma_flags_t *flags,
+		vma_flag_t bit)
 {
 	unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
 
-- 
cgit v1.2.3


From 5e6d45d720ca299cc82d84948c4ba622fff64f22 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Thu, 5 Mar 2026 10:50:17 +0000
Subject: mm: reintroduce vma_flags_test() as a singular flag test

Since we've now renamed vma_flags_test() to vma_flags_test_any() to be
very clear as to what we are in fact testing, we now have the opportunity
to bring vma_flags_test() back, but for explicitly testing a single VMA
flag.

This is useful, as often flag tests are against a single flag, and
vma_flags_test_any(flags, VMA_READ_BIT) reads oddly and potentially causes
confusion.

We use sparse to enforce that users won't accidentally pass vm_flags_t to
this function without it being flagged so this should make it harder to
get this wrong.

Of course, passing vma_flags_t to the function is impossible, as it is a
struct.

Also update the VMA tests to reflect this change.

Link: https://lkml.kernel.org/r/f33f8d7f16c3f3d286a1dc2cba12c23683073134.1772704455.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Babu Moger <babu.moger@amd.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chao Yu <chao@kernel.org>
Cc: Chatre, Reinette <reinette.chatre@intel.com>
Cc: Chunhai Guo <guochunhai@vivo.com>
Cc: Damien Le Maol <dlemoal@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Dave Martin <dave.martin@arm.com>
Cc: Gao Xiang <xiang@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hongbo Li <lihongbo22@huawei.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morse <james.morse@arm.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jeffle Xu <jefflexu@linux.alibaba.com>
Cc: Johannes Thumshirn <jth@kernel.org>
Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naohiro Aota <naohiro.aota@wdc.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Sandeep Dhavale <dhavale@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Yue Hu <zbestahu@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h              | 17 +++++++++++++++--
 mm/hugetlb.c                    |  2 +-
 mm/shmem.c                      |  4 ++--
 tools/testing/vma/include/dup.h |  8 ++++++++
 4 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9dcdf13570fb..9392723a5c50 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1050,6 +1050,19 @@ static __always_inline vma_flags_t __mk_vma_flags(size_t count,
 	return flags;
 }
 
+/*
+ * Test whether a specific VMA flag is set, e.g.:
+ *
+ * if (vma_flags_test(flags, VMA_READ_BIT)) { ... }
+ */
+static __always_inline bool vma_flags_test(const vma_flags_t *flags,
+		vma_flag_t bit)
+{
+	const unsigned long *bitmap = flags->__vma_flags;
+
+	return test_bit((__force int)bit, bitmap);
+}
+
 /*
  * Helper macro which bitwise-or combines the specified input flags into a
  * vma_flags_t bitmap value. E.g.:
@@ -1956,8 +1969,8 @@ static inline bool vma_desc_is_cow_mapping(struct vm_area_desc *desc)
 {
 	const vma_flags_t *flags = &desc->vma_flags;
 
-	return vma_flags_test_any(flags, VMA_MAYWRITE_BIT) &&
-		!vma_flags_test_any(flags, VMA_SHARED_BIT);
+	return vma_flags_test(flags, VMA_MAYWRITE_BIT) &&
+		!vma_flags_test(flags, VMA_SHARED_BIT);
 }
 
 #ifndef CONFIG_MMU
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index fbbe74f94426..9363b6072c0a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6593,7 +6593,7 @@ long hugetlb_reserve_pages(struct inode *inode,
 	 * attempt will be made for VM_NORESERVE to allocate a page
 	 * without using reserves
 	 */
-	if (vma_flags_test_any(&vma_flags, VMA_NORESERVE_BIT))
+	if (vma_flags_test(&vma_flags, VMA_NORESERVE_BIT))
 		return 0;
 
 	/*
diff --git a/mm/shmem.c b/mm/shmem.c
index 965a8908200b..5e7dcf5bc5d3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3086,7 +3086,7 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
 	spin_lock_init(&info->lock);
 	atomic_set(&info->stop_eviction, 0);
 	info->seals = F_SEAL_SEAL;
-	info->flags = vma_flags_test_any(&flags, VMA_NORESERVE_BIT)
+	info->flags = vma_flags_test(&flags, VMA_NORESERVE_BIT)
 		? SHMEM_F_NORESERVE : 0;
 	info->i_crtime = inode_get_mtime(inode);
 	info->fsflags = (dir == NULL) ? 0 :
@@ -5827,7 +5827,7 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
 				       unsigned int i_flags)
 {
 	const unsigned long shmem_flags =
-		vma_flags_test_any(&flags, VMA_NORESERVE_BIT) ? SHMEM_F_NORESERVE : 0;
+		vma_flags_test(&flags, VMA_NORESERVE_BIT) ? SHMEM_F_NORESERVE : 0;
 	struct inode *inode;
 	struct file *res;
 
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index ef6b9d963acc..630478f0d583 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -844,6 +844,14 @@ static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits);
 #define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \
 					 (const vma_flag_t []){__VA_ARGS__})
 
+static __always_inline bool vma_flags_test(const vma_flags_t *flags,
+		vma_flag_t bit)
+{
+	const unsigned long *bitmap = flags->__vma_flags;
+
+	return test_bit((__force int)bit, bitmap);
+}
+
 static __always_inline bool vma_flags_test_any_mask(const vma_flags_t *flags,
 		vma_flags_t to_test)
 {
-- 
cgit v1.2.3


From 0c2aa6635716a5aa19576deef062efab5322072f Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Thu, 5 Mar 2026 10:50:18 +0000
Subject: mm: reintroduce vma_desc_test() as a singular flag test

Similar to vma_flags_test(), we have previously renamed vma_desc_test() to
vma_desc_test_any().  Now that is in place, we can reintroduce
vma_desc_test() to explicitly check for a single VMA flag.

As with vma_flags_test(), this is useful as often flag tests are against a
single flag, and vma_desc_test_any(flags, VMA_READ_BIT) reads oddly and
potentially causes confusion.

As with vma_flags_test() a combination of sparse and vma_flags_t being a
struct means that users cannot misuse this function without it getting
flagged.

Also update the VMA tests to reflect this change.

Link: https://lkml.kernel.org/r/3a65ca23defb05060333f0586428fe279a484564.1772704455.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Babu Moger <babu.moger@amd.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chao Yu <chao@kernel.org>
Cc: Chatre, Reinette <reinette.chatre@intel.com>
Cc: Chunhai Guo <guochunhai@vivo.com>
Cc: Damien Le Maol <dlemoal@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Dave Martin <dave.martin@arm.com>
Cc: Gao Xiang <xiang@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hongbo Li <lihongbo22@huawei.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morse <james.morse@arm.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jeffle Xu <jefflexu@linux.alibaba.com>
Cc: Johannes Thumshirn <jth@kernel.org>
Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naohiro Aota <naohiro.aota@wdc.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Sandeep Dhavale <dhavale@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Yue Hu <zbestahu@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/char/mem.c              |  2 +-
 fs/hugetlbfs/inode.c            |  2 +-
 fs/ntfs3/file.c                 |  2 +-
 fs/resctrl/pseudo_lock.c        |  2 +-
 include/linux/dax.h             |  4 ++--
 include/linux/mm.h              | 11 +++++++++++
 mm/hugetlb.c                    | 12 ++++++------
 tools/testing/vma/include/dup.h |  6 ++++++
 8 files changed, 29 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 5118787d0954..5fd421e48c04 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -520,7 +520,7 @@ static int mmap_zero_prepare(struct vm_area_desc *desc)
 #ifndef CONFIG_MMU
 	return -ENOSYS;
 #endif
-	if (vma_desc_test_any(desc, VMA_SHARED_BIT))
+	if (vma_desc_test(desc, VMA_SHARED_BIT))
 		return shmem_zero_setup_desc(desc);
 
 	desc->action.success_hook = mmap_zero_private_success;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 079ffaaf1f6c..cd6b22f6e2b1 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -164,7 +164,7 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
 		goto out;
 
 	ret = 0;
-	if (vma_desc_test_any(desc, VMA_WRITE_BIT) && inode->i_size < len)
+	if (vma_desc_test(desc, VMA_WRITE_BIT) && inode->i_size < len)
 		i_size_write(inode, len);
 out:
 	inode_unlock(inode);
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index c5e2181f9f02..fbdfaf989a31 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -276,7 +276,7 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc)
 	struct file *file = desc->file;
 	struct inode *inode = file_inode(file);
 	struct ntfs_inode *ni = ntfs_i(inode);
-	const bool rw = vma_desc_test_any(desc, VMA_WRITE_BIT);
+	const bool rw = vma_desc_test(desc, VMA_WRITE_BIT);
 	int err;
 
 	/* Avoid any operation if inode is bad. */
diff --git a/fs/resctrl/pseudo_lock.c b/fs/resctrl/pseudo_lock.c
index 79a006c6f26c..d1cb0986006e 100644
--- a/fs/resctrl/pseudo_lock.c
+++ b/fs/resctrl/pseudo_lock.c
@@ -1044,7 +1044,7 @@ static int pseudo_lock_dev_mmap_prepare(struct vm_area_desc *desc)
 	 * Ensure changes are carried directly to the memory being mapped,
 	 * do not allow copy-on-write mapping.
 	 */
-	if (!vma_desc_test_any(desc, VMA_SHARED_BIT)) {
+	if (!vma_desc_test(desc, VMA_SHARED_BIT)) {
 		mutex_unlock(&rdtgroup_mutex);
 		return -EINVAL;
 	}
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 535019001577..10a7cc79aea5 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -69,7 +69,7 @@ static inline bool daxdev_mapping_supported(const struct vm_area_desc *desc,
 					    const struct inode *inode,
 					    struct dax_device *dax_dev)
 {
-	if (!vma_desc_test_any(desc, VMA_SYNC_BIT))
+	if (!vma_desc_test(desc, VMA_SYNC_BIT))
 		return true;
 	if (!IS_DAX(inode))
 		return false;
@@ -115,7 +115,7 @@ static inline bool daxdev_mapping_supported(const struct vm_area_desc *desc,
 					    const struct inode *inode,
 					    struct dax_device *dax_dev)
 {
-	return !vma_desc_test_any(desc, VMA_SYNC_BIT);
+	return !vma_desc_test(desc, VMA_SYNC_BIT);
 }
 static inline size_t dax_recovery_write(struct dax_device *dax_dev,
 		pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9392723a5c50..63d1f619260e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1192,6 +1192,17 @@ static inline void vma_set_flags_mask(struct vm_area_struct *vma,
 #define vma_set_flags(vma, ...) \
 	vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
 
+/*
+ * Test whether a specific VMA flag is set in a VMA descriptor, e.g.:
+ *
+ * if (vma_desc_test(desc, VMA_READ_BIT)) { ... }
+ */
+static __always_inline bool vma_desc_test(const struct vm_area_desc *desc,
+		vma_flag_t bit)
+{
+	return vma_flags_test(&desc->vma_flags, bit);
+}
+
 /* Helper to test any VMA flags in a VMA descriptor. */
 static inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
 		vma_flags_t flags)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9363b6072c0a..992c1632d26a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1194,7 +1194,7 @@ static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
 static void set_vma_desc_resv_map(struct vm_area_desc *desc, struct resv_map *map)
 {
 	VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
-	VM_WARN_ON_ONCE(vma_desc_test_any(desc, VMA_MAYSHARE_BIT));
+	VM_WARN_ON_ONCE(vma_desc_test(desc, VMA_MAYSHARE_BIT));
 
 	desc->private_data = map;
 }
@@ -1202,7 +1202,7 @@ static void set_vma_desc_resv_map(struct vm_area_desc *desc, struct resv_map *ma
 static void set_vma_desc_resv_flags(struct vm_area_desc *desc, unsigned long flags)
 {
 	VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
-	VM_WARN_ON_ONCE(vma_desc_test_any(desc, VMA_MAYSHARE_BIT));
+	VM_WARN_ON_ONCE(vma_desc_test(desc, VMA_MAYSHARE_BIT));
 
 	desc->private_data = (void *)((unsigned long)desc->private_data | flags);
 }
@@ -6602,7 +6602,7 @@ long hugetlb_reserve_pages(struct inode *inode,
 	 * to reserve the full area even if read-only as mprotect() may be
 	 * called to make the mapping read-write. Assume !desc is a shm mapping
 	 */
-	if (!desc || vma_desc_test_any(desc, VMA_MAYSHARE_BIT)) {
+	if (!desc || vma_desc_test(desc, VMA_MAYSHARE_BIT)) {
 		/*
 		 * resv_map can not be NULL as hugetlb_reserve_pages is only
 		 * called for inodes for which resv_maps were created (see
@@ -6636,7 +6636,7 @@ long hugetlb_reserve_pages(struct inode *inode,
 	if (err < 0)
 		goto out_err;
 
-	if (desc && !vma_desc_test_any(desc, VMA_MAYSHARE_BIT) && h_cg) {
+	if (desc && !vma_desc_test(desc, VMA_MAYSHARE_BIT) && h_cg) {
 		/* For private mappings, the hugetlb_cgroup uncharge info hangs
 		 * of the resv_map.
 		 */
@@ -6673,7 +6673,7 @@ long hugetlb_reserve_pages(struct inode *inode,
 	 * consumed reservations are stored in the map. Hence, nothing
 	 * else has to be done for private mappings here
 	 */
-	if (!desc || vma_desc_test_any(desc, VMA_MAYSHARE_BIT)) {
+	if (!desc || vma_desc_test(desc, VMA_MAYSHARE_BIT)) {
 		add = region_add(resv_map, from, to, regions_needed, h, h_cg);
 
 		if (unlikely(add < 0)) {
@@ -6737,7 +6737,7 @@ out_uncharge_cgroup:
 	hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
 					    chg * pages_per_huge_page(h), h_cg);
 out_err:
-	if (!desc || vma_desc_test_any(desc, VMA_MAYSHARE_BIT))
+	if (!desc || vma_desc_test(desc, VMA_MAYSHARE_BIT))
 		/* Only call region_abort if the region_chg succeeded but the
 		 * region_add failed or didn't run.
 		 */
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 630478f0d583..5eb313beb43d 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -922,6 +922,12 @@ static inline void vma_set_flags_mask(struct vm_area_struct *vma,
 #define vma_set_flags(vma, ...) \
 	vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
 
+static __always_inline bool vma_desc_test(const struct vm_area_desc *desc,
+		vma_flag_t bit)
+{
+	return vma_flags_test(&desc->vma_flags, bit);
+}
+
 static inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
 					    vma_flags_t flags)
 {
-- 
cgit v1.2.3


From db359fccf212e7fa3136e6edbed6228475646fd7 Mon Sep 17 00:00:00 2001
From: Byungchul Park <byungchul@sk.com>
Date: Tue, 24 Feb 2026 14:13:47 +0900
Subject: mm: introduce a new page type for page pool in page type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, the condition 'page->pp_magic == PP_SIGNATURE' is used to
determine if a page belongs to a page pool.  However, with the planned
removal of @pp_magic, we should instead leverage the page_type in struct
page, such as PGTY_netpp, for this purpose.

Introduce and use the page type APIs e.g.  PageNetpp(), __SetPageNetpp(),
and __ClearPageNetpp() instead, and remove the existing APIs accessing
@pp_magic e.g.  page_pool_page_is_pp(), netmem_or_pp_magic(), and
netmem_clear_pp_magic().

Plus, add @page_type to struct net_iov at the same offset as struct page
so as to use the page_type APIs for struct net_iov as well.  While at it,
reorder @type and @owner in struct net_iov to avoid a hole and increasing
the struct size.

This work was inspired by the following link:

  https://lore.kernel.org/all/582f41c0-2742-4400-9c81-0d46bf4e8314@gmail.com/

While at it, move the sanity check for page pool to on the free path.

[byungchul@sk.com: gate the sanity check, per Johannes]
  Link: https://lkml.kernel.org/r/20260316223113.20097-1-byungchul@sk.com
Link: https://lkml.kernel.org/r/20260224051347.19621-1-byungchul@sk.com
Co-developed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Byungchul Park <byungchul@sk.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrew Lunn <andrew+netdev@lunn.ch>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: David Wei <dw@davidwei.uk>
Cc: Dragos Tatulea <dtatulea@nvidia.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Mark Bloch <mbloch@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Stanislav Fomichev <sdf@fomichev.me>
Cc: Stehen Rothwell <sfr@canb.auug.org.au>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Taehee Yoo <ap420073@gmail.com>
Cc: Tariq Toukan <tariqt@nvidia.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c |  2 +-
 include/linux/mm.h                               | 27 +++---------------------
 include/linux/page-flags.h                       |  6 ++++++
 include/net/netmem.h                             | 15 +++++++++++--
 mm/page_alloc.c                                  | 13 ++++++++----
 net/core/netmem_priv.h                           | 23 +++++++++-----------
 net/core/page_pool.c                             | 24 +++++++++++++++++++--
 7 files changed, 64 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 80f9fc10877a..7d90d2485c78 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -707,7 +707,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
 				xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
 				page = xdpi.page.page;
 
-				/* No need to check page_pool_page_is_pp() as we
+				/* No need to check PageNetpp() as we
 				 * know this is a page_pool page.
 				 */
 				page_pool_recycle_direct(pp_page_to_nmdesc(page)->pp,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 63d1f619260e..c758f4e68727 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4840,10 +4840,9 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
  * DMA mapping IDs for page_pool
  *
  * When DMA-mapping a page, page_pool allocates an ID (from an xarray) and
- * stashes it in the upper bits of page->pp_magic. We always want to be able to
- * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP
- * pages can have arbitrary kernel pointers stored in the same field as pp_magic
- * (since it overlaps with page->lru.next), so we must ensure that we cannot
+ * stashes it in the upper bits of page->pp_magic. Non-PP pages can have
+ * arbitrary kernel pointers stored in the same field as pp_magic (since
+ * it overlaps with page->lru.next), so we must ensure that we cannot
  * mistake a valid kernel pointer with any of the values we write into this
  * field.
  *
@@ -4878,26 +4877,6 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
 #define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \
 				  PP_DMA_INDEX_SHIFT)
 
-/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is
- * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for
- * the head page of compound page and bit 1 for pfmemalloc page, as well as the
- * bits used for the DMA index. page_is_pfmemalloc() is checked in
- * __page_pool_put_page() to avoid recycling the pfmemalloc page.
- */
-#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL)
-
-#ifdef CONFIG_PAGE_POOL
-static inline bool page_pool_page_is_pp(const struct page *page)
-{
-	return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE;
-}
-#else
-static inline bool page_pool_page_is_pp(const struct page *page)
-{
-	return false;
-}
-#endif
-
 #define PAGE_SNAPSHOT_FAITHFUL (1 << 0)
 #define PAGE_SNAPSHOT_PG_BUDDY (1 << 1)
 #define PAGE_SNAPSHOT_PG_IDLE  (1 << 2)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 7223f6f4e2b4..0e03d816e8b9 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -923,6 +923,7 @@ enum pagetype {
 	PGTY_zsmalloc		= 0xf6,
 	PGTY_unaccepted		= 0xf7,
 	PGTY_large_kmalloc	= 0xf8,
+	PGTY_netpp		= 0xf9,
 
 	PGTY_mapcount_underflow = 0xff
 };
@@ -1055,6 +1056,11 @@ PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
 PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted)
 PAGE_TYPE_OPS(LargeKmalloc, large_kmalloc, large_kmalloc)
 
+/*
+ * Marks page_pool allocated pages.
+ */
+PAGE_TYPE_OPS(Netpp, netpp, netpp)
+
 /**
  * PageHuge - Determine if the page belongs to hugetlbfs
  * @page: The page to test.
diff --git a/include/net/netmem.h b/include/net/netmem.h
index a96b3e5e5574..85e3b26ec547 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -110,10 +110,21 @@ struct net_iov {
 			atomic_long_t pp_ref_count;
 		};
 	};
-	struct net_iov_area *owner;
+
+	unsigned int page_type;
 	enum net_iov_type type;
+	struct net_iov_area *owner;
 };
 
+/* Make sure 'the offset of page_type in struct page == the offset of
+ * type in struct net_iov'.
+ */
+#define NET_IOV_ASSERT_OFFSET(pg, iov)			\
+	static_assert(offsetof(struct page, pg) ==	\
+		      offsetof(struct net_iov, iov))
+NET_IOV_ASSERT_OFFSET(page_type, page_type);
+#undef NET_IOV_ASSERT_OFFSET
+
 struct net_iov_area {
 	/* Array of net_iovs for this area. */
 	struct net_iov *niovs;
@@ -256,7 +267,7 @@ static inline unsigned long netmem_pfn_trace(netmem_ref netmem)
  */
 #define pp_page_to_nmdesc(p)						\
 ({									\
-	DEBUG_NET_WARN_ON_ONCE(!page_pool_page_is_pp(p));		\
+	DEBUG_NET_WARN_ON_ONCE(!PageNetpp(p));				\
 	__pp_page_to_nmdesc(p);						\
 })
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f11f38ba2e12..fdcc2fde565b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1043,7 +1043,6 @@ static inline bool page_expected_state(struct page *page,
 #ifdef CONFIG_MEMCG
 			page->memcg_data |
 #endif
-			page_pool_page_is_pp(page) |
 			(page->flags.f & check_flags)))
 		return false;
 
@@ -1070,8 +1069,6 @@ static const char *page_bad_reason(struct page *page, unsigned long flags)
 	if (unlikely(page->memcg_data))
 		bad_reason = "page still charged to cgroup";
 #endif
-	if (unlikely(page_pool_page_is_pp(page)))
-		bad_reason = "page_pool leak";
 	return bad_reason;
 }
 
@@ -1380,9 +1377,17 @@ __always_inline bool __free_pages_prepare(struct page *page,
 		mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1);
 		folio->mapping = NULL;
 	}
-	if (unlikely(page_has_type(page)))
+	if (unlikely(page_has_type(page))) {
+		/* networking expects to clear its page type before releasing */
+		if (is_check_pages_enabled()) {
+			if (unlikely(PageNetpp(page))) {
+				bad_page(page, "page_pool leak");
+				return false;
+			}
+		}
 		/* Reset the page_type (which overlays _mapcount) */
 		page->page_type = UINT_MAX;
+	}
 
 	if (is_check_pages_enabled()) {
 		if (free_page_is_bad(page))
diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h
index 23175cb2bd86..3e6fde8f1726 100644
--- a/net/core/netmem_priv.h
+++ b/net/core/netmem_priv.h
@@ -8,21 +8,18 @@ static inline unsigned long netmem_get_pp_magic(netmem_ref netmem)
 	return netmem_to_nmdesc(netmem)->pp_magic & ~PP_DMA_INDEX_MASK;
 }
 
-static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
-{
-	netmem_to_nmdesc(netmem)->pp_magic |= pp_magic;
-}
-
-static inline void netmem_clear_pp_magic(netmem_ref netmem)
-{
-	WARN_ON_ONCE(netmem_to_nmdesc(netmem)->pp_magic & PP_DMA_INDEX_MASK);
-
-	netmem_to_nmdesc(netmem)->pp_magic = 0;
-}
-
 static inline bool netmem_is_pp(netmem_ref netmem)
 {
-	return (netmem_get_pp_magic(netmem) & PP_MAGIC_MASK) == PP_SIGNATURE;
+	struct page *page;
+
+	/* XXX: Now that the offset of page_type is shared between
+	 * struct page and net_iov, just cast the netmem to struct page
+	 * unconditionally by clearing NET_IOV if any, no matter whether
+	 * it comes from struct net_iov or struct page.  This should be
+	 * adjusted once the offset is no longer shared.
+	 */
+	page = (struct page *)((__force unsigned long)netmem & ~NET_IOV);
+	return PageNetpp(page);
 }
 
 static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 265a729431bb..877bbf7a1938 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -702,8 +702,18 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict)
 
 void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
 {
+	struct page *page;
+
 	netmem_set_pp(netmem, pool);
-	netmem_or_pp_magic(netmem, PP_SIGNATURE);
+
+	/* XXX: Now that the offset of page_type is shared between
+	 * struct page and net_iov, just cast the netmem to struct page
+	 * unconditionally by clearing NET_IOV if any, no matter whether
+	 * it comes from struct net_iov or struct page.  This should be
+	 * adjusted once the offset is no longer shared.
+	 */
+	page = (struct page *)((__force unsigned long)netmem & ~NET_IOV);
+	__SetPageNetpp(page);
 
 	/* Ensuring all pages have been split into one fragment initially:
 	 * page_pool_set_pp_info() is only called once for every page when it
@@ -718,7 +728,17 @@ void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
 
 void page_pool_clear_pp_info(netmem_ref netmem)
 {
-	netmem_clear_pp_magic(netmem);
+	struct page *page;
+
+	/* XXX: Now that the offset of page_type is shared between
+	 * struct page and net_iov, just cast the netmem to struct page
+	 * unconditionally by clearing NET_IOV if any, no matter whether
+	 * it comes from struct net_iov or struct page.  This should be
+	 * adjusted once the offset is no longer shared.
+	 */
+	page = (struct page *)((__force unsigned long)netmem & ~NET_IOV);
+	__ClearPageNetpp(page);
+
 	netmem_set_pp(netmem, NULL);
 }
 
-- 
cgit v1.2.3


From 3802e1d98e92ca6abdd25446b802f405fef83da0 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Sat, 7 Mar 2026 11:53:52 -0800
Subject: mm/damon: document non-zero length damon_region assumption

DAMON regions are assumed to always be non-zero length.  There was a
confusion [1] about it, probably due to lack of the documentation.
Document it.

Link: https://lkml.kernel.org/r/20260307195356.203753-5-sj@kernel.org
Link: https://lore.kernel.org/20251231070029.79682-1-sj@kernel.org/ [1]
Signed-off-by: SeongJae Park <sj@kernel.org>
Acked-by: wang lian <lianux.mm@gmail.com>
Cc: Brendan Higgins <brendan.higgins@linux.dev>
Cc: David Gow <davidgow@google.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 60e6da3012fa..7d0265d02954 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -55,6 +55,8 @@ struct damon_size_range {
  * @list:		List head for siblings.
  * @age:		Age of this region.
  *
+ * For any use case, @ar should be non-zero positive size.
+ *
  * @nr_accesses is reset to zero for every &damon_attrs->aggr_interval and be
  * increased for every &damon_attrs->sample_interval if an access to the region
  * during the last sampling interval is found.  The update of this field should
-- 
cgit v1.2.3


From 341ffe82a7a3a1e0756b58999405b6df0c2b3e8d Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Mon, 9 Mar 2026 16:18:58 +0100
Subject: mm: move vma_kernel_pagesize() from hugetlb to mm.h

Patch series "mm: move vma_(kernel|mmu)_pagesize() out of hugetlb.c", v2.

Looking into vma_(kernel|mmu)_pagesize(), I realized that there is one
scenario where DAX would not do the right thing when the kernel is not
compiled with hugetlb support.

Without hugetlb support, vma_(kernel|mmu)_pagesize() will always return
PAGE_SIZE instead of using the ->pagesize() result provided by dax-device
code.

Fix that by moving vma_kernel_pagesize() to core MM code, where it
belongs.  I don't think this is stable material, but am not 100% sure.

Also, move vma_mmu_pagesize() while at it.  Remove the unnecessary
hugetlb.h inclusion from KVM code.


This patch (of 4):

In the past, only hugetlb had special "vma_kernel_pagesize()"
requirements, so it provided its own implementation.

In commit 05ea88608d4e ("mm, hugetlbfs: introduce ->pagesize() to
vm_operations_struct") we generalized that approach by providing a
vm_ops->pagesize() callback to be used by device-dax.

Once device-dax started using that callback in commit c1d53b92b95c
("device-dax: implement ->pagesize() for smaps to report MMUPageSize") it
was missed that CONFIG_DEV_DAX does not depend on hugetlb support.

So building a kernel with CONFIG_DEV_DAX but without CONFIG_HUGETLBFS
would not pick up that value.

Fix it by moving vma_kernel_pagesize() to mm.h, providing only a single
implementation.  While at it, improve the kerneldoc a bit.

Ideally, we'd move vma_mmu_pagesize() as well to the header.  However, its
__weak symbol might be overwritten by a PPC variant in hugetlb code.  So
let's leave it in there for now, as it really only matters for some
hugetlb oddities.

This was found by code inspection.

Link: https://lkml.kernel.org/r/20260309151901.123947-1-david@kernel.org
Link: https://lkml.kernel.org/r/20260309151901.123947-2-david@kernel.org
Fixes: c1d53b92b95c ("device-dax: implement ->pagesize() for smaps to report MMUPageSize")
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: "Christophe Leroy (CS GROUP)" <chleroy@kernel.org>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h |  7 -------
 include/linux/mm.h      | 20 ++++++++++++++++++++
 mm/hugetlb.c            | 17 -----------------
 3 files changed, 20 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 65910437be1c..44c1848a2c21 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -777,8 +777,6 @@ static inline unsigned long huge_page_size(const struct hstate *h)
 	return (unsigned long)PAGE_SIZE << h->order;
 }
 
-extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma);
-
 extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma);
 
 static inline unsigned long huge_page_mask(struct hstate *h)
@@ -1177,11 +1175,6 @@ static inline unsigned long huge_page_mask(struct hstate *h)
 	return PAGE_MASK;
 }
 
-static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
-{
-	return PAGE_SIZE;
-}
-
 static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
 {
 	return PAGE_SIZE;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c758f4e68727..e62cea754b0e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1351,6 +1351,26 @@ static inline bool vma_is_shared_maywrite(const struct vm_area_struct *vma)
 	return is_shared_maywrite(&vma->flags);
 }
 
+/**
+ * vma_kernel_pagesize - Default page size granularity for this VMA.
+ * @vma: The user mapping.
+ *
+ * The kernel page size specifies in which granularity VMA modifications
+ * can be performed. Folios in this VMA will be aligned to, and at least
+ * the size of the number of bytes returned by this function.
+ *
+ * The default kernel page size is not affected by Transparent Huge Pages
+ * being in effect.
+ *
+ * Return: The default page size granularity for this VMA.
+ */
+static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
+{
+	if (unlikely(vma->vm_ops && vma->vm_ops->pagesize))
+		return vma->vm_ops->pagesize(vma);
+	return PAGE_SIZE;
+}
+
 static inline
 struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
 {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 992c1632d26a..66761ae5ce71 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1017,23 +1017,6 @@ static pgoff_t vma_hugecache_offset(struct hstate *h,
 			(vma->vm_pgoff >> huge_page_order(h));
 }
 
-/**
- * vma_kernel_pagesize - Page size granularity for this VMA.
- * @vma: The user mapping.
- *
- * Folios in this VMA will be aligned to, and at least the size of the
- * number of bytes returned by this function.
- *
- * Return: The default size of the folios allocated when backing a VMA.
- */
-unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
-{
-	if (vma->vm_ops && vma->vm_ops->pagesize)
-		return vma->vm_ops->pagesize(vma);
-	return PAGE_SIZE;
-}
-EXPORT_SYMBOL_GPL(vma_kernel_pagesize);
-
 /*
  * Return the page size being used by the MMU to back a VMA. In the majority
  * of cases, the page size used by the kernel matches the MMU size. On
-- 
cgit v1.2.3


From a9496e9e4b7c5785e82000a26b1118b4a1fd85c7 Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Mon, 9 Mar 2026 16:18:59 +0100
Subject: mm: move vma_mmu_pagesize() from hugetlb to vma.c

vma_mmu_pagesize() is also queried on non-hugetlb VMAs and does not really
belong into hugetlb.c.

PPC64 provides a custom overwrite with CONFIG_HUGETLB_PAGE, see
arch/powerpc/mm/book3s64/slice.c, so we cannot easily make this a static
inline function.

So let's move it to vma.c and add some proper kerneldoc.

To make vma tests happy, add a simple vma_kernel_pagesize() stub in
tools/testing/vma/include/custom.h.

Link: https://lkml.kernel.org/r/20260309151901.123947-3-david@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: "Christophe Leroy (CS GROUP)" <chleroy@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h            |  7 -------
 include/linux/mm.h                 |  2 ++
 mm/hugetlb.c                       | 11 -----------
 mm/vma.c                           | 21 +++++++++++++++++++++
 tools/testing/vma/include/custom.h |  5 +++++
 5 files changed, 28 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 44c1848a2c21..aaf3d472e6b5 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -777,8 +777,6 @@ static inline unsigned long huge_page_size(const struct hstate *h)
 	return (unsigned long)PAGE_SIZE << h->order;
 }
 
-extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma);
-
 static inline unsigned long huge_page_mask(struct hstate *h)
 {
 	return h->mask;
@@ -1175,11 +1173,6 @@ static inline unsigned long huge_page_mask(struct hstate *h)
 	return PAGE_MASK;
 }
 
-static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
-{
-	return PAGE_SIZE;
-}
-
 static inline unsigned int huge_page_order(struct hstate *h)
 {
 	return 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e62cea754b0e..efb8be5d259c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1371,6 +1371,8 @@ static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
 	return PAGE_SIZE;
 }
 
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma);
+
 static inline
 struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
 {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 66761ae5ce71..a786034ac95c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1017,17 +1017,6 @@ static pgoff_t vma_hugecache_offset(struct hstate *h,
 			(vma->vm_pgoff >> huge_page_order(h));
 }
 
-/*
- * Return the page size being used by the MMU to back a VMA. In the majority
- * of cases, the page size used by the kernel matches the MMU size. On
- * architectures where it differs, an architecture-specific 'strong'
- * version of this symbol is required.
- */
-__weak unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
-{
-	return vma_kernel_pagesize(vma);
-}
-
 /*
  * Flags for MAP_PRIVATE reservations.  These are stored in the bottom
  * bits of the reservation map pointer, which are always clear due to
diff --git a/mm/vma.c b/mm/vma.c
index be64f781a3aa..e95fd5a5fe5c 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -3300,3 +3300,24 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 
 	return 0;
 }
+
+/**
+ * vma_mmu_pagesize - Default MMU page size granularity for this VMA.
+ * @vma: The user mapping.
+ *
+ * In the common case, the default page size used by the MMU matches the
+ * default page size used by the kernel (see vma_kernel_pagesize()). On
+ * architectures where it differs, an architecture-specific 'strong' version
+ * of this symbol is required.
+ *
+ * The default MMU page size is not affected by Transparent Huge Pages
+ * being in effect, or any usage of larger MMU page sizes (either through
+ * architectural huge-page mappings or other explicit/implicit coalescing of
+ * virtual ranges performed by the MMU).
+ *
+ * Return: The default MMU page size granularity for this VMA.
+ */
+__weak unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+{
+	return vma_kernel_pagesize(vma);
+}
diff --git a/tools/testing/vma/include/custom.h b/tools/testing/vma/include/custom.h
index 833ff4d7f799..7150e09122b2 100644
--- a/tools/testing/vma/include/custom.h
+++ b/tools/testing/vma/include/custom.h
@@ -118,3 +118,8 @@ static __always_inline vma_flags_t __mk_vma_flags(size_t count,
 			vma_flags_set_flag(&flags, bits[i]);
 	return flags;
 }
+
+static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
+{
+	return PAGE_SIZE;
+}
-- 
cgit v1.2.3


From d239462787b072c78eb19fc1f155c3d411256282 Mon Sep 17 00:00:00 2001
From: Anthony Yznaga <anthony.yznaga@oracle.com>
Date: Tue, 10 Mar 2026 08:58:20 -0700
Subject: mm: prevent droppable mappings from being locked

Droppable mappings must not be lockable.  There is a check for VMAs with
VM_DROPPABLE set in mlock_fixup() along with checks for other types of
unlockable VMAs which ensures this when calling mlock()/mlock2().

For mlockall(MCL_FUTURE), the check for unlockable VMAs is different.  In
apply_mlockall_flags(), if the flags parameter has MCL_FUTURE set, the
current task's mm's default VMA flag field mm->def_flags has VM_LOCKED
applied to it.  VM_LOCKONFAULT is also applied if MCL_ONFAULT is also set.
When these flags are set as default in this manner they are cleared in
__mmap_complete() for new mappings that do not support mlock.  A check for
VM_DROPPABLE in __mmap_complete() is missing resulting in droppable
mappings created with VM_LOCKED set.  To fix this and reduce that chance
of similar bugs in the future, introduce and use vma_supports_mlock().

Link: https://lkml.kernel.org/r/20260310155821.17869-1-anthony.yznaga@oracle.com
Fixes: 9651fcedf7b9 ("mm: add MAP_DROPPABLE for designating always lazily freeable mappings")
Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
Suggested-by: David Hildenbrand <david@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Tested-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Jann Horn <jannh@google.com>
Cc: Jason A. Donenfeld <jason@zx2c4.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb_inline.h    |  2 +-
 mm/internal.h                     | 10 ++++++++++
 mm/mlock.c                        | 10 ++++++----
 mm/vma.c                          |  4 +---
 tools/testing/vma/include/stubs.h |  5 +++++
 5 files changed, 23 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb_inline.h b/include/linux/hugetlb_inline.h
index 84afc3c3e2e4..565b473fd135 100644
--- a/include/linux/hugetlb_inline.h
+++ b/include/linux/hugetlb_inline.h
@@ -30,7 +30,7 @@ static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags)
 
 #endif
 
-static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
+static inline bool is_vm_hugetlb_page(const struct vm_area_struct *vma)
 {
 	return is_vm_hugetlb_flags(vma->vm_flags);
 }
diff --git a/mm/internal.h b/mm/internal.h
index 4ab833b8bcdf..ebb68ad10d5c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1243,6 +1243,16 @@ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
 	}
 	return fpin;
 }
+
+static inline bool vma_supports_mlock(const struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & (VM_SPECIAL | VM_DROPPABLE))
+		return false;
+	if (vma_is_dax(vma) || is_vm_hugetlb_page(vma))
+		return false;
+	return vma != get_gate_vma(current->mm);
+}
+
 #else /* !CONFIG_MMU */
 static inline void unmap_mapping_folio(struct folio *folio) { }
 static inline void mlock_new_folio(struct folio *folio) { }
diff --git a/mm/mlock.c b/mm/mlock.c
index 1a92d16f3684..fd648138bc72 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -472,10 +472,12 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	int ret = 0;
 	vm_flags_t oldflags = vma->vm_flags;
 
-	if (newflags == oldflags || (oldflags & VM_SPECIAL) ||
-	    is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
-	    vma_is_dax(vma) || vma_is_secretmem(vma) || (oldflags & VM_DROPPABLE))
-		/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
+	if (newflags == oldflags || vma_is_secretmem(vma) ||
+	    !vma_supports_mlock(vma))
+		/*
+		 * Don't set VM_LOCKED or VM_LOCKONFAULT and don't count.
+		 * For secretmem, don't allow the memory to be unlocked.
+		 */
 		goto out;
 
 	vma = vma_modify_flags(vmi, *prev, vma, start, end, &newflags);
diff --git a/mm/vma.c b/mm/vma.c
index e95fd5a5fe5c..b7055c264b5d 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -2589,9 +2589,7 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
 
 	vm_stat_account(mm, vma->vm_flags, map->pglen);
 	if (vm_flags & VM_LOCKED) {
-		if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
-					is_vm_hugetlb_page(vma) ||
-					vma == get_gate_vma(mm))
+		if (!vma_supports_mlock(vma))
 			vm_flags_clear(vma, VM_LOCKED_MASK);
 		else
 			mm->locked_vm += map->pglen;
diff --git a/tools/testing/vma/include/stubs.h b/tools/testing/vma/include/stubs.h
index 947a3a0c2566..416bb93f5005 100644
--- a/tools/testing/vma/include/stubs.h
+++ b/tools/testing/vma/include/stubs.h
@@ -426,3 +426,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
 }
 
 static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
+
+static inline bool vma_supports_mlock(const struct vm_area_struct *vma)
+{
+	return false;
+}
-- 
cgit v1.2.3


From 8719c59c4b928fc9ad8d8f45ecbdf859660c904c Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Mon, 9 Mar 2026 18:05:17 -0700
Subject: mm/damon/core: introduce damos_quota_goal_tuner

Patch series "mm/damon: support multiple goal-based quota tuning
algorithms".

Aim-oriented DAMOS quota auto-tuning uses a single tuning algorithm.  The
algorithm is designed to find a quota value that should be consistently
kept for achieving the aimed goal for long term.  It is useful and
reliable at automatically operating systems that have dynamic environments
in the long term.

As always, however, no single algorithm fits all.  When the environment
has static characteristics or there are control towers in not only the
kernel space but also the user space, the algorithm shows some
limitations.  In such environments, users want kernel work in a more short
term deterministic way.  Actually there were at least two reports [1,2] of
such cases.

Extend DAMOS quotas goal to support multiple quota tuning algorithms that
users can select.  Keep the current algorithm as the default one, to not
break the old users.  Also give it a name, "consist", as it is designed to
"consistently" apply the DAMOS action.  And introduce a new tuning
algorithm, namely "temporal".  It is designed to apply the DAMOS action
only temporally, in a deterministic way.  In more detail, as long as the
goal is under-achieved, it uses the maximum quota available.  Once the
goal is over-achieved, it sets the quota zero.

Tests
=====

I confirmed the feature is working as expected using the latest version of
DAMON user-space tool, like below.

    $ # start DAMOS for reclaiming memory aiming 30% free memory
    $ sudo ./damo/damo start --damos_action pageout \
            --damos_quota_goal_tuner temporal \
            --damos_quota_goal node_mem_free_bp 30% 0 \
            --damos_quota_interval 1s \
            --damos_quota_space 100M

Note that >=3.1.8 version of DAMON user-space tool supports this feature
(--damos_quota_goal_tuner).  As expected, DAMOS stops reclaiming memory as
soon as the goal amount of free memory is made.  When 'consist' tuner is
used, the reclamation was continued even after the goal amount of free
memory is made, resulting in more than goal amount of free memory, as
expected.

Patch Sequence
==============

First four patches implement the features.  Patch 1 extends core API to
allow multiple tuners and make the current tuner as the default and only
available tuner, namely 'consist'.  Patch 2 allows future tuners setting
zero effective quota.  Patch 3 introduces the second tuner, namely
'temporal'.  Patch 4 further extends DAMON sysfs API to let users use
that.

Three following patches (patches 5-7) update design, usage, and ABI
documents, respectively.

Final four patches (patches 8-11) are for adding tests.  The eighth patch
(patch 8) extends the kunit test for online parameters commit for
validating the goal_tuner.  The ninth and the tenth patches (patches 9-10)
extend the testing-purpose DAMON sysfs control helper and DAMON status
dumping tool to support the newly added feature.  The final eleventh one
(patch 11) extends the existing online commit selftest to cover the new
feature.

This patch (of 11):

DAMOS quota goal feature utilizes a single feedback loop based algorithm
for automatic tuning of the effective quota.  It is useful in dynamic
environments that operate systems with only kernels in the long term.
But, no one fits all.  It is not very easy to control in environments
having more controlled characteristics and user-space control towers.  We
actually got multiple reports [1,2] of use cases that the algorithm is not
optimal.

Introduce a new field of 'struct damos_quotas', namely 'goal_tuner'.  It
specifies what tuning algorithm the given scheme should use, and allows
DAMON API callers to set it as they want.  Nonetheless, this commit
introduces no new tuning algorithm but only the interface.  This commit
hence makes no behavioral change.  A new algorithm will be added by the
following commit.

Link: https://lkml.kernel.org/r/20260310010529.91162-2-sj@kernel.org
Link: https://lore.kernel.org/CALa+Y17__d=ZsM1yX+MXx0ozVdsXnFqF4p0g+kATEitrWyZFfg@mail.gmail.com [1]
Link: https://lore.kernel.org/20260204022537.814-1-yunjeong.mun@sk.com [2]
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Brendan Higgins <brendan.higgins@linux.dev>
Cc: David Gow <davidgow@google.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 10 ++++++++++
 mm/damon/core.c       |  1 +
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 7d0265d02954..24de35a8395a 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -215,12 +215,21 @@ struct damos_quota_goal {
 	struct list_head list;
 };
 
+/**
+ * enum damos_quota_goal_tuner - Goal-based quota tuning logic.
+ * @DAMOS_QUOTA_GOAL_TUNER_CONSIST:	Aim long term consistent quota.
+ */
+enum damos_quota_goal_tuner {
+	DAMOS_QUOTA_GOAL_TUNER_CONSIST,
+};
+
 /**
  * struct damos_quota - Controls the aggressiveness of the given scheme.
  * @reset_interval:	Charge reset interval in milliseconds.
  * @ms:			Maximum milliseconds that the scheme can use.
  * @sz:			Maximum bytes of memory that the action can be applied.
  * @goals:		Head of quota tuning goals (&damos_quota_goal) list.
+ * @goal_tuner:		Goal-based @esz tuning algorithm to use.
  * @esz:		Effective size quota in bytes.
  *
  * @weight_sz:		Weight of the region's size for prioritization.
@@ -262,6 +271,7 @@ struct damos_quota {
 	unsigned long ms;
 	unsigned long sz;
 	struct list_head goals;
+	enum damos_quota_goal_tuner goal_tuner;
 	unsigned long esz;
 
 	unsigned int weight_sz;
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 2d2332f3d377..16905bf35c40 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -912,6 +912,7 @@ static int damos_commit_quota(struct damos_quota *dst, struct damos_quota *src)
 	err = damos_commit_quota_goals(dst, src);
 	if (err)
 		return err;
+	dst->goal_tuner = src->goal_tuner;
 	dst->weight_sz = src->weight_sz;
 	dst->weight_nr_accesses = src->weight_nr_accesses;
 	dst->weight_age = src->weight_age;
-- 
cgit v1.2.3


From af738a6a00c1febb0d543ba6a1400413f824ecf1 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Mon, 9 Mar 2026 18:05:19 -0700
Subject: mm/damon/core: introduce DAMOS_QUOTA_GOAL_TUNER_TEMPORAL

Introduce a new goal-based DAMOS quota auto-tuning algorithm, namely
DAMOS_QUOTA_GOAL_TUNER_TEMPORAL (temporal in short).  The algorithm aims
to trigger the DAMOS action only for a temporal time, to achieve the goal
as soon as possible.  For the temporal period, it uses as much quota as
allowed.  Once the goal is achieved, it sets the quota zero, so
effectively makes the scheme be deactivated.

Link: https://lkml.kernel.org/r/20260310010529.91162-4-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h |  2 ++
 mm/damon/core.c       | 29 ++++++++++++++++++++++++-----
 2 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 24de35a8395a..e44e2132ccaf 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -218,9 +218,11 @@ struct damos_quota_goal {
 /**
  * enum damos_quota_goal_tuner - Goal-based quota tuning logic.
  * @DAMOS_QUOTA_GOAL_TUNER_CONSIST:	Aim long term consistent quota.
+ * @DAMOS_QUOTA_GOAL_TUNER_TEMPORAL:	Aim zero quota asap.
  */
 enum damos_quota_goal_tuner {
 	DAMOS_QUOTA_GOAL_TUNER_CONSIST,
+	DAMOS_QUOTA_GOAL_TUNER_TEMPORAL,
 };
 
 /**
diff --git a/mm/damon/core.c b/mm/damon/core.c
index db3c59b70e49..b543d1202c9d 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -2347,6 +2347,26 @@ static unsigned long damos_quota_score(struct damos_quota *quota)
 	return highest_score;
 }
 
+static void damos_goal_tune_esz_bp_consist(struct damos_quota *quota)
+{
+	unsigned long score = damos_quota_score(quota);
+
+	quota->esz_bp = damon_feed_loop_next_input(
+			max(quota->esz_bp, 10000UL), score);
+}
+
+static void damos_goal_tune_esz_bp_temporal(struct damos_quota *quota)
+{
+	unsigned long score = damos_quota_score(quota);
+
+	if (score >= 10000)
+		quota->esz_bp = 0;
+	else if (quota->sz)
+		quota->esz_bp = quota->sz * 10000;
+	else
+		quota->esz_bp = ULONG_MAX;
+}
+
 /*
  * Called only if quota->ms, or quota->sz are set, or quota->goals is not empty
  */
@@ -2361,11 +2381,10 @@ static void damos_set_effective_quota(struct damos_quota *quota)
 	}
 
 	if (!list_empty(&quota->goals)) {
-		unsigned long score = damos_quota_score(quota);
-
-		quota->esz_bp = damon_feed_loop_next_input(
-				max(quota->esz_bp, 10000UL),
-				score);
+		if (quota->goal_tuner == DAMOS_QUOTA_GOAL_TUNER_CONSIST)
+			damos_goal_tune_esz_bp_consist(quota);
+		else if (quota->goal_tuner == DAMOS_QUOTA_GOAL_TUNER_TEMPORAL)
+			damos_goal_tune_esz_bp_temporal(quota);
 		esz = quota->esz_bp / 10000;
 	}
 
-- 
cgit v1.2.3


From d4e981b280454f4368950db6269c6077d66453cf Mon Sep 17 00:00:00 2001
From: Kexin Sun <kexinsun@smail.nju.edu.cn>
Date: Thu, 12 Mar 2026 13:38:12 +0800
Subject: kasan: update outdated comment

kmalloc_large() was renamed kmalloc_large_noprof() by commit 7bd230a26648
("mm/slab: enable slab allocation tagging for kmalloc and friends"), and
subsequently renamed __kmalloc_large_noprof() by commit a0a44d9175b3 ("mm,
slab: don't wrap internal functions with alloc_hooks()"), making it an
internal implementation detail.

Large kmalloc allocations are now performed through the public kmalloc()
interface directly, making the reference to KMALLOC_MAX_SIZE also stale
(KMALLOC_MAX_CACHE_SIZE would be more accurate).  Remove the references to
kmalloc_large() and KMALLOC_MAX_SIZE, and rephrase the description for
large kmalloc allocations.

Link: https://lkml.kernel.org/r/20260312053812.1365-1-kexinsun@smail.nju.edu.cn
Signed-off-by: Kexin Sun <kexinsun@smail.nju.edu.cn>
Suggested-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Assisted-by: unnamed:deepseek-v3.2 coccinelle
Reviewed-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kasan.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 338a1921a50a..bf233bde68c7 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -352,8 +352,8 @@ bool __kasan_mempool_poison_object(void *ptr, unsigned long ip);
  * kasan_mempool_unpoison_object().
  *
  * This function operates on all slab allocations including large kmalloc
- * allocations (the ones returned by kmalloc_large() or by kmalloc() with the
- * size > KMALLOC_MAX_SIZE).
+ * allocations (i.e. the ones backed directly by the buddy allocator rather
+ * than kmalloc slab caches).
  *
  * Return: true if the allocation can be safely reused; false otherwise.
  */
@@ -381,8 +381,8 @@ void __kasan_mempool_unpoison_object(void *ptr, size_t size, unsigned long ip);
  * original tags based on the pointer value.
  *
  * This function operates on all slab allocations including large kmalloc
- * allocations (the ones returned by kmalloc_large() or by kmalloc() with the
- * size > KMALLOC_MAX_SIZE).
+ * allocations (i.e. the ones backed directly by the buddy allocator rather
+ * than kmalloc slab caches).
  */
 static __always_inline void kasan_mempool_unpoison_object(void *ptr,
 							  size_t size)
-- 
cgit v1.2.3


From 2d1e54aab6fd01f7502af20e125312e06a15bf9c Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Wed, 11 Mar 2026 17:24:37 +0000
Subject: mm: abstract reading sysctl_max_map_count, and READ_ONCE()

Concurrent reads and writes of sysctl_max_map_count are possible, so we
should READ_ONCE() and WRITE_ONCE().

The sysctl procfs logic already enforces WRITE_ONCE(), so abstract the
read side with get_sysctl_max_map_count().

While we're here, also move the field to mm/internal.h and add the getter
there since only mm interacts with it, there's no need for anybody else to
have access.

Finally, update the VMA userland tests to reflect the change.

Link: https://lkml.kernel.org/r/0715259eb37cbdfde4f9e5db92a20ec7110a1ce5.1773249037.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Cc: Jann Horn <jannh@google.com>
Cc: Jianzhou Zhao <luckd0g@163.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h                 | 2 --
 mm/internal.h                      | 6 ++++++
 mm/mmap.c                          | 2 +-
 mm/mremap.c                        | 4 ++--
 mm/nommu.c                         | 2 +-
 mm/vma.c                           | 6 +++---
 tools/testing/vma/include/custom.h | 3 ---
 tools/testing/vma/include/dup.h    | 9 +++++++++
 tools/testing/vma/main.c           | 2 ++
 9 files changed, 24 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index efb8be5d259c..25ba5816e02b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -207,8 +207,6 @@ static inline void __mm_zero_struct_page(struct page *page)
 #define MAPCOUNT_ELF_CORE_MARGIN	(5)
 #define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
 
-extern int sysctl_max_map_count;
-
 extern unsigned long sysctl_user_reserve_kbytes;
 extern unsigned long sysctl_admin_reserve_kbytes;
 
diff --git a/mm/internal.h b/mm/internal.h
index f50a0376b87e..62d80fd37ae1 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1863,4 +1863,10 @@ static inline int pmdp_test_and_clear_young_notify(struct vm_area_struct *vma,
 
 #endif /* CONFIG_MMU_NOTIFIER */
 
+extern int sysctl_max_map_count;
+static inline int get_sysctl_max_map_count(void)
+{
+	return READ_ONCE(sysctl_max_map_count);
+}
+
 #endif	/* __MM_INTERNAL_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index 843160946aa5..79544d893411 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -375,7 +375,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 		return -EOVERFLOW;
 
 	/* Too many mappings? */
-	if (mm->map_count > sysctl_max_map_count)
+	if (mm->map_count > get_sysctl_max_map_count())
 		return -ENOMEM;
 
 	/*
diff --git a/mm/mremap.c b/mm/mremap.c
index e8c3021dd841..ba6c690f6c1b 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1045,7 +1045,7 @@ static unsigned long prep_move_vma(struct vma_remap_struct *vrm)
 	 * which may not merge, then (if MREMAP_DONTUNMAP is not set) unmap the
 	 * source, which may split, causing a net increase of 2 mappings.
 	 */
-	if (current->mm->map_count + 2 > sysctl_max_map_count)
+	if (current->mm->map_count + 2 > get_sysctl_max_map_count())
 		return -ENOMEM;
 
 	if (vma->vm_ops && vma->vm_ops->may_split) {
@@ -1813,7 +1813,7 @@ static unsigned long check_mremap_params(struct vma_remap_struct *vrm)
 	 * net increased map count of 2. In move_vma() we check for headroom of
 	 * 2 additional mappings, so check early to avoid bailing out then.
 	 */
-	if (current->mm->map_count + 4 > sysctl_max_map_count)
+	if (current->mm->map_count + 4 > get_sysctl_max_map_count())
 		return -ENOMEM;
 
 	return 0;
diff --git a/mm/nommu.c b/mm/nommu.c
index c3a23b082adb..ed3934bc2de4 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1317,7 +1317,7 @@ static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
 		return -ENOMEM;
 
 	mm = vma->vm_mm;
-	if (mm->map_count >= sysctl_max_map_count)
+	if (mm->map_count >= get_sysctl_max_map_count())
 		return -ENOMEM;
 
 	region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL);
diff --git a/mm/vma.c b/mm/vma.c
index b7055c264b5d..4d21e7d8e93c 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -590,7 +590,7 @@ out_free_vma:
 static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
 		     unsigned long addr, int new_below)
 {
-	if (vma->vm_mm->map_count >= sysctl_max_map_count)
+	if (vma->vm_mm->map_count >= get_sysctl_max_map_count())
 		return -ENOMEM;
 
 	return __split_vma(vmi, vma, addr, new_below);
@@ -1394,7 +1394,7 @@ static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
 		 * its limit temporarily, to help free resources as expected.
 		 */
 		if (vms->end < vms->vma->vm_end &&
-		    vms->vma->vm_mm->map_count >= sysctl_max_map_count) {
+		    vms->vma->vm_mm->map_count >= get_sysctl_max_map_count()) {
 			error = -ENOMEM;
 			goto map_count_exceeded;
 		}
@@ -2868,7 +2868,7 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT))
 		return -ENOMEM;
 
-	if (mm->map_count > sysctl_max_map_count)
+	if (mm->map_count > get_sysctl_max_map_count())
 		return -ENOMEM;
 
 	if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
diff --git a/tools/testing/vma/include/custom.h b/tools/testing/vma/include/custom.h
index 7150e09122b2..6c62a38a2f6f 100644
--- a/tools/testing/vma/include/custom.h
+++ b/tools/testing/vma/include/custom.h
@@ -21,9 +21,6 @@ extern unsigned long dac_mmap_min_addr;
 #define VM_BUG_ON(_expr) (BUG_ON(_expr))
 #define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
 
-/* We hardcode this for now. */
-#define sysctl_max_map_count 0x1000000UL
-
 #define TASK_SIZE ((1ul << 47)-PAGE_SIZE)
 
 /*
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 5eb313beb43d..8865ffe046d8 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -419,6 +419,9 @@ struct vma_iterator {
 
 #define EMPTY_VMA_FLAGS ((vma_flags_t){ })
 
+#define MAPCOUNT_ELF_CORE_MARGIN	(5)
+#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+
 /* What action should be taken after an .mmap_prepare call is complete? */
 enum mmap_action_type {
 	MMAP_NOTHING,		/* Mapping is complete, no further action. */
@@ -1342,3 +1345,9 @@ static inline void vma_set_file(struct vm_area_struct *vma, struct file *file)
 	swap(vma->vm_file, file);
 	fput(file);
 }
+
+extern int sysctl_max_map_count;
+static inline int get_sysctl_max_map_count(void)
+{
+	return READ_ONCE(sysctl_max_map_count);
+}
diff --git a/tools/testing/vma/main.c b/tools/testing/vma/main.c
index 49b09e97a51f..18338f5d29e0 100644
--- a/tools/testing/vma/main.c
+++ b/tools/testing/vma/main.c
@@ -14,6 +14,8 @@
 #include "tests/mmap.c"
 #include "tests/vma.c"
 
+int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
+
 /* Helper functions which utilise static kernel functions. */
 
 struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg)
-- 
cgit v1.2.3


From eabc2eddb2767e0ed90f98a65744bf4c8e287db7 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 10 Mar 2026 22:29:24 -0700
Subject: mm/damon/core: receive addr_unit on
 damon_set_region_biggest_system_ram_default()

damon_find_biggest_system_ram() was not supporting addr_unit in the past.
Hence, its caller, damon_set_region_biggest_system_ram_default(), was also
not supporting addr_unit.  The previous commit has updated the inner
function to support addr_unit.  There is no more reason to not support
addr_unit on damon_set_region_biggest_system_ram_default().  Rather, it
makes unnecessary inconsistency on support of addr_unit.  Update it to
receive addr_unit and handle it inside.

Link: https://lkml.kernel.org/r/20260311052927.93921-4-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Yang yingliang <yangyingliang@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 1 +
 mm/damon/core.c       | 7 ++++---
 mm/damon/lru_sort.c   | 1 +
 mm/damon/reclaim.c    | 1 +
 4 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index e44e2132ccaf..d9a3babbafc1 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -994,6 +994,7 @@ int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control);
 
 int damon_set_region_biggest_system_ram_default(struct damon_target *t,
 				unsigned long *start, unsigned long *end,
+				unsigned long addr_unit,
 				unsigned long min_region_sz);
 
 #endif	/* CONFIG_DAMON */
diff --git a/mm/damon/core.c b/mm/damon/core.c
index f5f46ba5d537..01c892a1dcd2 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -3110,6 +3110,7 @@ static bool damon_find_biggest_system_ram(unsigned long *start,
  * @t:		The monitoring target to set the region.
  * @start:	The pointer to the start address of the region.
  * @end:	The pointer to the end address of the region.
+ * @addr_unit:	The address unit for the damon_ctx of @t.
  * @min_region_sz:	Minimum region size.
  *
  * This function sets the region of @t as requested by @start and @end.  If the
@@ -3122,7 +3123,7 @@ static bool damon_find_biggest_system_ram(unsigned long *start,
  */
 int damon_set_region_biggest_system_ram_default(struct damon_target *t,
 			unsigned long *start, unsigned long *end,
-			unsigned long min_region_sz)
+			unsigned long addr_unit, unsigned long min_region_sz)
 {
 	struct damon_addr_range addr_range;
 
@@ -3130,12 +3131,12 @@ int damon_set_region_biggest_system_ram_default(struct damon_target *t,
 		return -EINVAL;
 
 	if (!*start && !*end &&
-		!damon_find_biggest_system_ram(start, end, 1))
+			!damon_find_biggest_system_ram(start, end, addr_unit))
 		return -EINVAL;
 
 	addr_range.start = *start;
 	addr_range.end = *end;
-	return damon_set_regions(t, &addr_range, 1, min_region_sz);
+	return damon_set_regions(t, &addr_range, addr_unit, min_region_sz);
 }
 
 /*
diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
index 7bc5c0b2aea3..133ea17e258d 100644
--- a/mm/damon/lru_sort.c
+++ b/mm/damon/lru_sort.c
@@ -345,6 +345,7 @@ static int damon_lru_sort_apply_parameters(void)
 	err = damon_set_region_biggest_system_ram_default(param_target,
 					&monitor_region_start,
 					&monitor_region_end,
+					param_ctx->addr_unit,
 					param_ctx->min_region_sz);
 	if (err)
 		goto out;
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index 43d76f5bed44..01f2f6cdbcdf 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -251,6 +251,7 @@ static int damon_reclaim_apply_parameters(void)
 	err = damon_set_region_biggest_system_ram_default(param_target,
 					&monitor_region_start,
 					&monitor_region_end,
+					param_ctx->addr_unit,
 					param_ctx->min_region_sz);
 	if (err)
 		goto out;
-- 
cgit v1.2.3


From 0217c7fb4de4a40cee667eb21901f3204effe5ac Mon Sep 17 00:00:00 2001
From: Jianhui Zhou <jianhuizzzzz@gmail.com>
Date: Tue, 10 Mar 2026 19:05:26 +0800
Subject: mm/userfaultfd: fix hugetlb fault mutex hash calculation

In mfill_atomic_hugetlb(), linear_page_index() is used to calculate the
page index for hugetlb_fault_mutex_hash().  However, linear_page_index()
returns the index in PAGE_SIZE units, while hugetlb_fault_mutex_hash()
expects the index in huge page units.  This mismatch means that different
addresses within the same huge page can produce different hash values,
leading to the use of different mutexes for the same huge page.  This can
cause races between faulting threads, which can corrupt the reservation
map and trigger the BUG_ON in resv_map_release().

Fix this by introducing hugetlb_linear_page_index(), which returns the
page index in huge page granularity, and using it in place of
linear_page_index().

Link: https://lkml.kernel.org/r/20260310110526.335749-1-jianhuizzzzz@gmail.com
Fixes: a08c7193e4f1 ("mm/filemap: remove hugetlb special casing in filemap.c")
Signed-off-by: Jianhui Zhou <jianhuizzzzz@gmail.com>
Reported-by: syzbot+f525fd79634858f478e7@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=f525fd79634858f478e7
Acked-by: SeongJae Park <sj@kernel.org>
Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: JonasZhou <JonasZhou@zhaoxin.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 17 +++++++++++++++++
 mm/userfaultfd.c        |  2 +-
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index aaf3d472e6b5..9c098a02a09e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -792,6 +792,23 @@ static inline unsigned huge_page_shift(struct hstate *h)
 	return h->order + PAGE_SHIFT;
 }
 
+/**
+ * hugetlb_linear_page_index() - linear_page_index() but in hugetlb
+ *				 page size granularity.
+ * @vma: the hugetlb VMA
+ * @address: the virtual address within the VMA
+ *
+ * Return: the page offset within the mapping in huge page units.
+ */
+static inline pgoff_t hugetlb_linear_page_index(struct vm_area_struct *vma,
+		unsigned long address)
+{
+	struct hstate *h = hstate_vma(vma);
+
+	return ((address - vma->vm_start) >> huge_page_shift(h)) +
+		(vma->vm_pgoff >> huge_page_order(h));
+}
+
 static inline bool order_is_gigantic(unsigned int order)
 {
 	return order > MAX_PAGE_ORDER;
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index e19872e51878..2c565c7134b6 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -573,7 +573,7 @@ retry:
 		 * in the case of shared pmds.  fault mutex prevents
 		 * races with other faulting threads.
 		 */
-		idx = linear_page_index(dst_vma, dst_addr);
+		idx = hugetlb_linear_page_index(dst_vma, dst_addr);
 		mapping = dst_vma->vm_file->f_mapping;
 		hash = hugetlb_fault_mutex_hash(mapping, idx);
 		mutex_lock(&hugetlb_fault_mutex_table[hash]);
-- 
cgit v1.2.3


From a91fd9f710490a89713823be3e7790ac59a085f8 Mon Sep 17 00:00:00 2001
From: Nico Pache <npache@redhat.com>
Date: Wed, 25 Mar 2026 05:40:18 -0600
Subject: mm: consolidate anonymous folio PTE mapping into helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "mm: khugepaged cleanups and mTHP prerequisites", v4.

The following series contains cleanups and prerequisites for my work on
khugepaged mTHP support [1].  These have been separated out to ease
review.

The first patch in the series refactors the page fault folio to pte
mapping and follows a similar convention as defined by
map_anon_folio_pmd_(no)pf().  This not only cleans up the current
implementation of do_anonymous_page(), but will allow for reuse later in
the khugepaged mTHP implementation.

The second patch adds a small is_pmd_order() helper to check if an order
is the PMD order.  This check is open-coded in a number of places.  This
patch aims to clean this up and will be used more in the khugepaged mTHP
work.  The third patch also adds a small DEFINE for (HPAGE_PMD_NR - 1)
which is used often across the khugepaged code.

The fourth and fifth patch come from the khugepaged mTHP patchset [1].
These two patches include the rename of function prefixes, and the
unification of khugepaged and madvise_collapse via a new
collapse_single_pmd function.

Patch 1:     refactor do_anonymous_page into map_anon_folio_pte_(no)pf
Patch 2:     add is_pmd_order helper
Patch 3:     Add define for (HPAGE_PMD_NR - 1)
Patch 4:     Refactor/rename hpage_collapse
Patch 5:     Refactoring to combine madvise_collapse and khugepaged

A big thanks to everyone that has reviewed, tested, and participated in
the development process.


This patch (of 5):

The anonymous page fault handler in do_anonymous_page() open-codes the
sequence to map a newly allocated anonymous folio at the PTE level:

	- construct the PTE entry
	- add rmap
	- add to LRU
	- set the PTEs
	- update the MMU cache.

Introduce two helpers to consolidate this duplicated logic, mirroring the
existing map_anon_folio_pmd_nopf() pattern for PMD-level mappings:

map_anon_folio_pte_nopf(): constructs the PTE entry, takes folio
references, adds anon rmap and LRU.  This function also handles the
uffd_wp that can occur in the pf variant.  The future khugepaged mTHP code
calls this to handle mapping the new collapsed mTHP to its folio.

map_anon_folio_pte_pf(): extends the nopf variant to handle MM_ANONPAGES
counter updates, and mTHP fault allocation statistics for the page fault
path.

The zero-page read path in do_anonymous_page() is also untangled from the
shared setpte label, since it does not allocate a folio and should not
share the same mapping sequence as the write path.  We can now leave
nr_pages undeclared at the function intialization, and use the single page
update_mmu_cache function to handle the zero page update.

This refactoring will also help reduce code duplication between
mm/memory.c and mm/khugepaged.c, and provides a clean API for PTE-level
anonymous folio mapping that can be reused by future callers (like
khugpeaged mTHP support)

Link: https://lkml.kernel.org/r/20260325114022.444081-1-npache@redhat.com
Link: https://lkml.kernel.org/r/20260325114022.444081-2-npache@redhat.com
Link: https://lore.kernel.org/all/20260122192841.128719-1-npache@redhat.com
Signed-off-by: Nico Pache <npache@redhat.com>
Suggested-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Dev Jain <dev.jain@arm.com>
Reviewed-by: Lance Yang <lance.yang@linux.dev>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nanyong Sun <sunnanyong@huawei.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: Rafael Aquini <raquini@redhat.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shivank Garg <shivankg@amd.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Takashi Iwai (SUSE) <tiwai@suse.de>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <yang@os.amperecomputing.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h |  4 ++++
 mm/memory.c        | 61 ++++++++++++++++++++++++++++++++++++------------------
 2 files changed, 45 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 25ba5816e02b..16a1ad9a3397 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4916,4 +4916,8 @@ static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps)
 
 void snapshot_page(struct page_snapshot *ps, const struct page *page);
 
+void map_anon_folio_pte_nopf(struct folio *folio, pte_t *pte,
+		struct vm_area_struct *vma, unsigned long addr,
+		bool uffd_wp);
+
 #endif /* _LINUX_MM_H */
diff --git a/mm/memory.c b/mm/memory.c
index f21c804b50bf..7c350a38fecf 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5197,6 +5197,37 @@ fallback:
 	return folio_prealloc(vma->vm_mm, vma, vmf->address, true);
 }
 
+void map_anon_folio_pte_nopf(struct folio *folio, pte_t *pte,
+		struct vm_area_struct *vma, unsigned long addr,
+		bool uffd_wp)
+{
+	const unsigned int nr_pages = folio_nr_pages(folio);
+	pte_t entry = folio_mk_pte(folio, vma->vm_page_prot);
+
+	entry = pte_sw_mkyoung(entry);
+
+	if (vma->vm_flags & VM_WRITE)
+		entry = pte_mkwrite(pte_mkdirty(entry), vma);
+	if (uffd_wp)
+		entry = pte_mkuffd_wp(entry);
+
+	folio_ref_add(folio, nr_pages - 1);
+	folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
+	folio_add_lru_vma(folio, vma);
+	set_ptes(vma->vm_mm, addr, pte, entry, nr_pages);
+	update_mmu_cache_range(NULL, vma, addr, pte, nr_pages);
+}
+
+static void map_anon_folio_pte_pf(struct folio *folio, pte_t *pte,
+		struct vm_area_struct *vma, unsigned long addr, bool uffd_wp)
+{
+	const unsigned int order = folio_order(folio);
+
+	map_anon_folio_pte_nopf(folio, pte, vma, addr, uffd_wp);
+	add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1L << order);
+	count_mthp_stat(order, MTHP_STAT_ANON_FAULT_ALLOC);
+}
+
 /*
  * We enter with non-exclusive mmap_lock (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
@@ -5208,7 +5239,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	unsigned long addr = vmf->address;
 	struct folio *folio;
 	vm_fault_t ret = 0;
-	int nr_pages = 1;
+	int nr_pages;
 	pte_t entry;
 
 	/* File mapping without ->vm_ops ? */
@@ -5243,7 +5274,13 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 			pte_unmap_unlock(vmf->pte, vmf->ptl);
 			return handle_userfault(vmf, VM_UFFD_MISSING);
 		}
-		goto setpte;
+		if (vmf_orig_pte_uffd_wp(vmf))
+			entry = pte_mkuffd_wp(entry);
+		set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
+
+		/* No need to invalidate - it was non-present before */
+		update_mmu_cache(vma, addr, vmf->pte);
+		goto unlock;
 	}
 
 	/* Allocate our own private page. */
@@ -5267,11 +5304,6 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	 */
 	__folio_mark_uptodate(folio);
 
-	entry = folio_mk_pte(folio, vma->vm_page_prot);
-	entry = pte_sw_mkyoung(entry);
-	if (vma->vm_flags & VM_WRITE)
-		entry = pte_mkwrite(pte_mkdirty(entry), vma);
-
 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
 	if (!vmf->pte)
 		goto release;
@@ -5293,19 +5325,8 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 		folio_put(folio);
 		return handle_userfault(vmf, VM_UFFD_MISSING);
 	}
-
-	folio_ref_add(folio, nr_pages - 1);
-	add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages);
-	count_mthp_stat(folio_order(folio), MTHP_STAT_ANON_FAULT_ALLOC);
-	folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
-	folio_add_lru_vma(folio, vma);
-setpte:
-	if (vmf_orig_pte_uffd_wp(vmf))
-		entry = pte_mkuffd_wp(entry);
-	set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr_pages);
-
-	/* No need to invalidate - it was non-present before */
-	update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr_pages);
+	map_anon_folio_pte_pf(folio, vmf->pte, vma, addr,
+			      vmf_orig_pte_uffd_wp(vmf));
 unlock:
 	if (vmf->pte)
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
-- 
cgit v1.2.3


From b90c453d2664ba445383956560581f9db708584f Mon Sep 17 00:00:00 2001
From: Nico Pache <npache@redhat.com>
Date: Wed, 25 Mar 2026 05:40:19 -0600
Subject: mm: introduce is_pmd_order helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to add mTHP support to khugepaged, we will often be checking if a
given order is (or is not) a PMD order.  Some places in the kernel already
use this check, so lets create a simple helper function to keep the code
clean and readable.

Link: https://lkml.kernel.org/r/20260325114022.444081-3-npache@redhat.com
Signed-off-by: Nico Pache <npache@redhat.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Dev Jain <dev.jain@arm.com>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Lance Yang <lance.yang@linux.dev>
Reviewed-by: Barry Song <baohua@kernel.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nanyong Sun <sunnanyong@huawei.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Rafael Aquini <raquini@redhat.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shivank Garg <shivankg@amd.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Takashi Iwai (SUSE) <tiwai@suse.de>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <yang@os.amperecomputing.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 5 +++++
 mm/huge_memory.c        | 2 +-
 mm/khugepaged.c         | 6 +++---
 mm/memory.c             | 2 +-
 mm/mempolicy.c          | 2 +-
 mm/page_alloc.c         | 4 ++--
 mm/shmem.c              | 3 +--
 7 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a4d9f964dfde..bd7f0e1d8094 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -771,6 +771,11 @@ static inline bool pmd_is_huge(pmd_t pmd)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+static inline bool is_pmd_order(unsigned int order)
+{
+	return order == HPAGE_PMD_ORDER;
+}
+
 static inline int split_folio_to_list_to_order(struct folio *folio,
 		struct list_head *list, int new_order)
 {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9fea52ccad56..1c1a7cf7b209 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -4159,7 +4159,7 @@ out_unlock:
 		i_mmap_unlock_read(mapping);
 out:
 	xas_destroy(&xas);
-	if (old_order == HPAGE_PMD_ORDER)
+	if (is_pmd_order(old_order))
 		count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
 	count_mthp_stat(old_order, !ret ? MTHP_STAT_SPLIT : MTHP_STAT_SPLIT_FAILED);
 	return ret;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index f972a9a65e3a..c6a5d9d1f252 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1540,7 +1540,7 @@ static enum scan_result try_collapse_pte_mapped_thp(struct mm_struct *mm, unsign
 	if (IS_ERR(folio))
 		return SCAN_PAGE_NULL;
 
-	if (folio_order(folio) != HPAGE_PMD_ORDER) {
+	if (!is_pmd_order(folio_order(folio))) {
 		result = SCAN_PAGE_COMPOUND;
 		goto drop_folio;
 	}
@@ -2023,7 +2023,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
 		 * we locked the first folio, then a THP might be there already.
 		 * This will be discovered on the first iteration.
 		 */
-		if (folio_order(folio) == HPAGE_PMD_ORDER) {
+		if (is_pmd_order(folio_order(folio))) {
 			result = SCAN_PTE_MAPPED_HUGEPAGE;
 			goto out_unlock;
 		}
@@ -2351,7 +2351,7 @@ static enum scan_result hpage_collapse_scan_file(struct mm_struct *mm,
 			continue;
 		}
 
-		if (folio_order(folio) == HPAGE_PMD_ORDER) {
+		if (is_pmd_order(folio_order(folio))) {
 			result = SCAN_PTE_MAPPED_HUGEPAGE;
 			/*
 			 * PMD-sized THP implies that we can only try
diff --git a/mm/memory.c b/mm/memory.c
index 7c350a38fecf..6d54e5ec82f2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5435,7 +5435,7 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct folio *folio, struct page *pa
 	if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER))
 		return ret;
 
-	if (folio_order(folio) != HPAGE_PMD_ORDER)
+	if (!is_pmd_order(folio_order(folio)))
 		return ret;
 	page = &folio->page;
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0e5175f1c767..e5528c35bbb8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2449,7 +2449,7 @@ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
 
 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
 	    /* filter "hugepage" allocation, unless from alloc_pages() */
-	    order == HPAGE_PMD_ORDER && ilx != NO_INTERLEAVE_INDEX) {
+	    is_pmd_order(order) && ilx != NO_INTERLEAVE_INDEX) {
 		/*
 		 * For hugepage allocation and non-interleave policy which
 		 * allows the current node (or other explicitly preferred
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 937e9b850709..cdde59e56a55 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -651,7 +651,7 @@ static inline unsigned int order_to_pindex(int migratetype, int order)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	bool movable;
 	if (order > PAGE_ALLOC_COSTLY_ORDER) {
-		VM_BUG_ON(order != HPAGE_PMD_ORDER);
+		VM_BUG_ON(!is_pmd_order(order));
 
 		movable = migratetype == MIGRATE_MOVABLE;
 
@@ -683,7 +683,7 @@ static inline bool pcp_allowed_order(unsigned int order)
 	if (order <= PAGE_ALLOC_COSTLY_ORDER)
 		return true;
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	if (order == HPAGE_PMD_ORDER)
+	if (is_pmd_order(order))
 		return true;
 #endif
 	return false;
diff --git a/mm/shmem.c b/mm/shmem.c
index 5e7dcf5bc5d3..6fa1e8340c93 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -5558,8 +5558,7 @@ static ssize_t thpsize_shmem_enabled_store(struct kobject *kobj,
 		spin_unlock(&huge_shmem_orders_lock);
 	} else if (sysfs_streq(buf, "inherit")) {
 		/* Do not override huge allocation policy with non-PMD sized mTHP */
-		if (shmem_huge == SHMEM_HUGE_FORCE &&
-		    order != HPAGE_PMD_ORDER)
+		if (shmem_huge == SHMEM_HUGE_FORCE && !is_pmd_order(order))
 			return -EINVAL;
 
 		spin_lock(&huge_shmem_orders_lock);
-- 
cgit v1.2.3


From 22688ade3b54b2f4f2887c7dad75db6d588ae07c Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Fri, 20 Mar 2026 23:13:41 +0100
Subject: mm/sparse: remove sparse_decode_mem_map()

section_deactivate() applies to CONFIG_SPARSEMEM_VMEMMAP only.  So we can
just use pfn_to_page() (after making sure we have the start PFN of the
section), and remove sparse_decode_mem_map().

Link: https://lkml.kernel.org/r/20260320-sparsemem_cleanups-v2-9-096addc8800d@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  2 --
 mm/sparse.c                    | 16 +---------------
 2 files changed, 1 insertion(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index e77ef3d7ff73..815e908c4135 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -308,8 +308,6 @@ extern int sparse_add_section(int nid, unsigned long pfn,
 		struct dev_pagemap *pgmap);
 extern void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
 				  struct vmem_altmap *altmap);
-extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
-					  unsigned long pnum);
 extern struct zone *zone_for_pfn_range(enum mmop online_type,
 		int nid, struct memory_group *group, unsigned long start_pfn,
 		unsigned long nr_pages);
diff --git a/mm/sparse.c b/mm/sparse.c
index 875f718a4c79..b5825c9ee2f2 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -274,18 +274,6 @@ static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long p
 	return coded_mem_map;
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG
-/*
- * Decode mem_map from the coded memmap
- */
-struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
-{
-	/* mask off the extra low bits of information */
-	coded_mem_map &= SECTION_MAP_MASK;
-	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
-}
-#endif /* CONFIG_MEMORY_HOTPLUG */
-
 static void __meminit sparse_init_one_section(struct mem_section *ms,
 		unsigned long pnum, struct page *mem_map,
 		struct mem_section_usage *usage, unsigned long flags)
@@ -754,8 +742,6 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
 
 	empty = is_subsection_map_empty(ms);
 	if (empty) {
-		unsigned long section_nr = pfn_to_section_nr(pfn);
-
 		/*
 		 * Mark the section invalid so that valid_section()
 		 * return false. This prevents code from dereferencing
@@ -774,7 +760,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
 			kfree_rcu(ms->usage, rcu);
 			WRITE_ONCE(ms->usage, NULL);
 		}
-		memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
+		memmap = pfn_to_page(SECTION_ALIGN_DOWN(pfn));
 	}
 
 	/*
-- 
cgit v1.2.3


From fead6dcff83b02f8d6dc3c1ebbe4e09c05c54ee5 Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Fri, 20 Mar 2026 23:13:43 +0100
Subject: mm: prepare to move subsection_map_init() to mm/sparse-vmemmap.c

We want to move subsection_map_init() to mm/sparse-vmemmap.c.

To prepare for getting rid of subsection_map_init() in mm/sparse.c
completely, use a static inline function for !CONFIG_SPARSEMEM_VMEMMAP.

While at it, move the declaration to internal.h and rename it to
"sparse_init_subsection_map()".

Link: https://lkml.kernel.org/r/20260320-sparsemem_cleanups-v2-11-096addc8800d@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h |  3 ---
 mm/internal.h          | 12 ++++++++++++
 mm/mm_init.c           |  2 +-
 mm/sparse.c            |  6 +-----
 4 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3f651baf7e2b..7cf4a194aea2 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1982,8 +1982,6 @@ struct mem_section_usage {
 	unsigned long pageblock_flags[0];
 };
 
-void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
-
 struct page;
 struct page_ext;
 struct mem_section {
@@ -2376,7 +2374,6 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr)
 #define sparse_vmemmap_init_nid_early(_nid) do {} while (0)
 #define sparse_vmemmap_init_nid_late(_nid) do {} while (0)
 #define pfn_in_present_section pfn_valid
-#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
 #endif /* CONFIG_SPARSEMEM */
 
 /*
diff --git a/mm/internal.h b/mm/internal.h
index 62d80fd37ae1..11b0c91b6d9d 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -959,12 +959,24 @@ void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
 		unsigned long, enum meminit_context, struct vmem_altmap *, int,
 		bool);
 
+/*
+ * mm/sparse.c
+ */
 #ifdef CONFIG_SPARSEMEM
 void sparse_init(void);
 #else
 static inline void sparse_init(void) {}
 #endif /* CONFIG_SPARSEMEM */
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+void sparse_init_subsection_map(unsigned long pfn, unsigned long nr_pages);
+#else
+static inline void sparse_init_subsection_map(unsigned long pfn,
+		unsigned long nr_pages)
+{
+}
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 
 /*
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 5b261f86ba6f..4324b93ccebd 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1896,7 +1896,7 @@ static void __init free_area_init(void)
 		pr_info("  node %3d: [mem %#018Lx-%#018Lx]\n", nid,
 			(u64)start_pfn << PAGE_SHIFT,
 			((u64)end_pfn << PAGE_SHIFT) - 1);
-		subsection_map_init(start_pfn, end_pfn - start_pfn);
+		sparse_init_subsection_map(start_pfn, end_pfn - start_pfn);
 	}
 
 	/* Initialise every node */
diff --git a/mm/sparse.c b/mm/sparse.c
index e2048b1fbf5f..c96ac5e70c22 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -185,7 +185,7 @@ static void subsection_mask_set(unsigned long *map, unsigned long pfn,
 	bitmap_set(map, idx, end - idx + 1);
 }
 
-void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
+void __init sparse_init_subsection_map(unsigned long pfn, unsigned long nr_pages)
 {
 	int end_sec_nr = pfn_to_section_nr(pfn + nr_pages - 1);
 	unsigned long nr, start_sec_nr = pfn_to_section_nr(pfn);
@@ -207,10 +207,6 @@ void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
 		nr_pages -= pfns;
 	}
 }
-#else
-void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
-{
-}
 #endif
 
 /* Record a memory area against a node. */
-- 
cgit v1.2.3


From f62a3bf227c95a105fccb5a2062367387cd49430 Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Fri, 20 Mar 2026 23:13:45 +0100
Subject: mm/sparse: move sparse_init_one_section() to internal.h

While at it, convert the BUG_ON to a VM_WARN_ON_ONCE, avoid long lines,
and merge sparse_encode_mem_map() into its only caller
sparse_init_one_section().

Clarify the comment a bit, pointing at page_to_pfn().

[david@kernel.org: s/VM_WARN_ON/VM_WARN_ON_ONCE/]
  Link: https://lkml.kernel.org/r/6b04c1a1-74e7-42e8-8523-a40802e5dacc@kernel.org
Link: https://lkml.kernel.org/r/20260320-sparsemem_cleanups-v2-13-096addc8800d@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h |  2 +-
 mm/internal.h          | 22 ++++++++++++++++++++++
 mm/sparse.c            | 24 ------------------------
 3 files changed, 23 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7cf4a194aea2..ed335567d64e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1988,7 +1988,7 @@ struct mem_section {
 	/*
 	 * This is, logically, a pointer to an array of struct
 	 * pages.  However, it is stored with some other magic.
-	 * (see sparse.c::sparse_init_one_section())
+	 * (see sparse_init_one_section())
 	 *
 	 * Additionally during early boot we encode node id of
 	 * the location of the section here to guide allocation.
diff --git a/mm/internal.h b/mm/internal.h
index 11b0c91b6d9d..e14f58527688 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -964,6 +964,28 @@ void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
  */
 #ifdef CONFIG_SPARSEMEM
 void sparse_init(void);
+
+static inline void sparse_init_one_section(struct mem_section *ms,
+		unsigned long pnum, struct page *mem_map,
+		struct mem_section_usage *usage, unsigned long flags)
+{
+	unsigned long coded_mem_map;
+
+	BUILD_BUG_ON(SECTION_MAP_LAST_BIT > PFN_SECTION_SHIFT);
+
+	/*
+	 * We encode the start PFN of the section into the mem_map such that
+	 * page_to_pfn() on !CONFIG_SPARSEMEM_VMEMMAP can simply subtract it
+	 * from the page pointer to obtain the PFN.
+	 */
+	coded_mem_map = (unsigned long)(mem_map - section_nr_to_pfn(pnum));
+	VM_WARN_ON_ONCE(coded_mem_map & ~SECTION_MAP_MASK);
+
+	ms->section_mem_map &= ~SECTION_MAP_MASK;
+	ms->section_mem_map |= coded_mem_map;
+	ms->section_mem_map |= flags | SECTION_HAS_MEM_MAP;
+	ms->usage = usage;
+}
 #else
 static inline void sparse_init(void) {}
 #endif /* CONFIG_SPARSEMEM */
diff --git a/mm/sparse.c b/mm/sparse.c
index 5c9cad390282..ed5de1a25f04 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -256,30 +256,6 @@ static void __init memblocks_present(void)
 		memory_present(nid, start, end);
 }
 
-/*
- * Subtle, we encode the real pfn into the mem_map such that
- * the identity pfn - section_mem_map will return the actual
- * physical page frame number.
- */
-static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
-{
-	unsigned long coded_mem_map =
-		(unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
-	BUILD_BUG_ON(SECTION_MAP_LAST_BIT > PFN_SECTION_SHIFT);
-	BUG_ON(coded_mem_map & ~SECTION_MAP_MASK);
-	return coded_mem_map;
-}
-
-static void __meminit sparse_init_one_section(struct mem_section *ms,
-		unsigned long pnum, struct page *mem_map,
-		struct mem_section_usage *usage, unsigned long flags)
-{
-	ms->section_mem_map &= ~SECTION_MAP_MASK;
-	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum)
-		| SECTION_HAS_MEM_MAP | flags;
-	ms->usage = usage;
-}
-
 static unsigned long usemap_size(void)
 {
 	return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long);
-- 
cgit v1.2.3


From 738de20c4fafe64290c5086d683254f60e837db6 Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Fri, 20 Mar 2026 23:13:47 +0100
Subject: mm/sparse: move memory hotplug bits to sparse-vmemmap.c

Let's move all memory hoptplug related code to sparse-vmemmap.c.

We only have to expose sparse_index_init().  While at it, drop the
definition of sparse_index_init() for !CONFIG_SPARSEMEM, which is unused,
and place the declaration in internal.h.

Link: https://lkml.kernel.org/r/20260320-sparsemem_cleanups-v2-15-096addc8800d@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h |   1 -
 mm/internal.h          |   4 +
 mm/sparse-vmemmap.c    | 304 ++++++++++++++++++++++++++++++++++++++++++++++++
 mm/sparse.c            | 310 +------------------------------------------------
 4 files changed, 310 insertions(+), 309 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ed335567d64e..4a20df132258 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -2370,7 +2370,6 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr)
 #endif
 
 #else
-#define sparse_index_init(_sec, _nid)  do {} while (0)
 #define sparse_vmemmap_init_nid_early(_nid) do {} while (0)
 #define sparse_vmemmap_init_nid_late(_nid) do {} while (0)
 #define pfn_in_present_section pfn_valid
diff --git a/mm/internal.h b/mm/internal.h
index 4e753bbf00ae..9ae0ee6c34f9 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -964,6 +964,7 @@ void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
  */
 #ifdef CONFIG_SPARSEMEM
 void sparse_init(void);
+int sparse_index_init(unsigned long section_nr, int nid);
 
 static inline void sparse_init_one_section(struct mem_section *ms,
 		unsigned long pnum, struct page *mem_map,
@@ -999,6 +1000,9 @@ static inline void __section_mark_present(struct mem_section *ms,
 static inline void sparse_init(void) {}
 #endif /* CONFIG_SPARSEMEM */
 
+/*
+ * mm/sparse-vmemmap.c
+ */
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 void sparse_init_subsection_map(unsigned long pfn, unsigned long nr_pages);
 #else
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 842ed2f0bce6..24a37676cecb 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -591,3 +591,307 @@ void __init sparse_vmemmap_init_nid_late(int nid)
 	hugetlb_vmemmap_init_late(nid);
 }
 #endif
+
+static void subsection_mask_set(unsigned long *map, unsigned long pfn,
+		unsigned long nr_pages)
+{
+	int idx = subsection_map_index(pfn);
+	int end = subsection_map_index(pfn + nr_pages - 1);
+
+	bitmap_set(map, idx, end - idx + 1);
+}
+
+void __init sparse_init_subsection_map(unsigned long pfn, unsigned long nr_pages)
+{
+	int end_sec_nr = pfn_to_section_nr(pfn + nr_pages - 1);
+	unsigned long nr, start_sec_nr = pfn_to_section_nr(pfn);
+
+	for (nr = start_sec_nr; nr <= end_sec_nr; nr++) {
+		struct mem_section *ms;
+		unsigned long pfns;
+
+		pfns = min(nr_pages, PAGES_PER_SECTION
+				- (pfn & ~PAGE_SECTION_MASK));
+		ms = __nr_to_section(nr);
+		subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
+
+		pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
+				pfns, subsection_map_index(pfn),
+				subsection_map_index(pfn + pfns - 1));
+
+		pfn += pfns;
+		nr_pages -= pfns;
+	}
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+
+/* Mark all memory sections within the pfn range as online */
+void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long pfn;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+		unsigned long section_nr = pfn_to_section_nr(pfn);
+		struct mem_section *ms = __nr_to_section(section_nr);
+
+		ms->section_mem_map |= SECTION_IS_ONLINE;
+	}
+}
+
+/* Mark all memory sections within the pfn range as offline */
+void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long pfn;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+		unsigned long section_nr = pfn_to_section_nr(pfn);
+		struct mem_section *ms = __nr_to_section(section_nr);
+
+		ms->section_mem_map &= ~SECTION_IS_ONLINE;
+	}
+}
+
+static struct page * __meminit populate_section_memmap(unsigned long pfn,
+		unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
+		struct dev_pagemap *pgmap)
+{
+	return __populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
+}
+
+static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
+		struct vmem_altmap *altmap)
+{
+	unsigned long start = (unsigned long) pfn_to_page(pfn);
+	unsigned long end = start + nr_pages * sizeof(struct page);
+
+	vmemmap_free(start, end, altmap);
+}
+static void free_map_bootmem(struct page *memmap)
+{
+	unsigned long start = (unsigned long)memmap;
+	unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
+
+	vmemmap_free(start, end, NULL);
+}
+
+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
+{
+	DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
+	DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
+	struct mem_section *ms = __pfn_to_section(pfn);
+	unsigned long *subsection_map = ms->usage
+		? &ms->usage->subsection_map[0] : NULL;
+
+	subsection_mask_set(map, pfn, nr_pages);
+	if (subsection_map)
+		bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
+
+	if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
+				"section already deactivated (%#lx + %ld)\n",
+				pfn, nr_pages))
+		return -EINVAL;
+
+	bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
+	return 0;
+}
+
+static bool is_subsection_map_empty(struct mem_section *ms)
+{
+	return bitmap_empty(&ms->usage->subsection_map[0],
+			    SUBSECTIONS_PER_SECTION);
+}
+
+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
+{
+	struct mem_section *ms = __pfn_to_section(pfn);
+	DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
+	unsigned long *subsection_map;
+	int rc = 0;
+
+	subsection_mask_set(map, pfn, nr_pages);
+
+	subsection_map = &ms->usage->subsection_map[0];
+
+	if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
+		rc = -EINVAL;
+	else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
+		rc = -EEXIST;
+	else
+		bitmap_or(subsection_map, map, subsection_map,
+				SUBSECTIONS_PER_SECTION);
+
+	return rc;
+}
+
+/*
+ * To deactivate a memory region, there are 3 cases to handle:
+ *
+ * 1. deactivation of a partial hot-added section:
+ *      a) section was present at memory init.
+ *      b) section was hot-added post memory init.
+ * 2. deactivation of a complete hot-added section.
+ * 3. deactivation of a complete section from memory init.
+ *
+ * For 1, when subsection_map does not empty we will not be freeing the
+ * usage map, but still need to free the vmemmap range.
+ */
+static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
+		struct vmem_altmap *altmap)
+{
+	struct mem_section *ms = __pfn_to_section(pfn);
+	bool section_is_early = early_section(ms);
+	struct page *memmap = NULL;
+	bool empty;
+
+	if (clear_subsection_map(pfn, nr_pages))
+		return;
+
+	empty = is_subsection_map_empty(ms);
+	if (empty) {
+		/*
+		 * Mark the section invalid so that valid_section()
+		 * return false. This prevents code from dereferencing
+		 * ms->usage array.
+		 */
+		ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
+
+		/*
+		 * When removing an early section, the usage map is kept (as the
+		 * usage maps of other sections fall into the same page). It
+		 * will be re-used when re-adding the section - which is then no
+		 * longer an early section. If the usage map is PageReserved, it
+		 * was allocated during boot.
+		 */
+		if (!PageReserved(virt_to_page(ms->usage))) {
+			kfree_rcu(ms->usage, rcu);
+			WRITE_ONCE(ms->usage, NULL);
+		}
+		memmap = pfn_to_page(SECTION_ALIGN_DOWN(pfn));
+	}
+
+	/*
+	 * The memmap of early sections is always fully populated. See
+	 * section_activate() and pfn_valid() .
+	 */
+	if (!section_is_early) {
+		memmap_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE)));
+		depopulate_section_memmap(pfn, nr_pages, altmap);
+	} else if (memmap) {
+		memmap_boot_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page),
+							  PAGE_SIZE)));
+		free_map_bootmem(memmap);
+	}
+
+	if (empty)
+		ms->section_mem_map = (unsigned long)NULL;
+}
+
+static struct page * __meminit section_activate(int nid, unsigned long pfn,
+		unsigned long nr_pages, struct vmem_altmap *altmap,
+		struct dev_pagemap *pgmap)
+{
+	struct mem_section *ms = __pfn_to_section(pfn);
+	struct mem_section_usage *usage = NULL;
+	struct page *memmap;
+	int rc;
+
+	if (!ms->usage) {
+		usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
+		if (!usage)
+			return ERR_PTR(-ENOMEM);
+		ms->usage = usage;
+	}
+
+	rc = fill_subsection_map(pfn, nr_pages);
+	if (rc) {
+		if (usage)
+			ms->usage = NULL;
+		kfree(usage);
+		return ERR_PTR(rc);
+	}
+
+	/*
+	 * The early init code does not consider partially populated
+	 * initial sections, it simply assumes that memory will never be
+	 * referenced.  If we hot-add memory into such a section then we
+	 * do not need to populate the memmap and can simply reuse what
+	 * is already there.
+	 */
+	if (nr_pages < PAGES_PER_SECTION && early_section(ms))
+		return pfn_to_page(pfn);
+
+	memmap = populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
+	if (!memmap) {
+		section_deactivate(pfn, nr_pages, altmap);
+		return ERR_PTR(-ENOMEM);
+	}
+	memmap_pages_add(DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE));
+
+	return memmap;
+}
+
+/**
+ * sparse_add_section - add a memory section, or populate an existing one
+ * @nid: The node to add section on
+ * @start_pfn: start pfn of the memory range
+ * @nr_pages: number of pfns to add in the section
+ * @altmap: alternate pfns to allocate the memmap backing store
+ * @pgmap: alternate compound page geometry for devmap mappings
+ *
+ * This is only intended for hotplug.
+ *
+ * Note that only VMEMMAP supports sub-section aligned hotplug,
+ * the proper alignment and size are gated by check_pfn_span().
+ *
+ *
+ * Return:
+ * * 0		- On success.
+ * * -EEXIST	- Section has been present.
+ * * -ENOMEM	- Out of memory.
+ */
+int __meminit sparse_add_section(int nid, unsigned long start_pfn,
+		unsigned long nr_pages, struct vmem_altmap *altmap,
+		struct dev_pagemap *pgmap)
+{
+	unsigned long section_nr = pfn_to_section_nr(start_pfn);
+	struct mem_section *ms;
+	struct page *memmap;
+	int ret;
+
+	ret = sparse_index_init(section_nr, nid);
+	if (ret < 0)
+		return ret;
+
+	memmap = section_activate(nid, start_pfn, nr_pages, altmap, pgmap);
+	if (IS_ERR(memmap))
+		return PTR_ERR(memmap);
+
+	/*
+	 * Poison uninitialized struct pages in order to catch invalid flags
+	 * combinations.
+	 */
+	page_init_poison(memmap, sizeof(struct page) * nr_pages);
+
+	ms = __nr_to_section(section_nr);
+	__section_mark_present(ms, section_nr);
+
+	/* Align memmap to section boundary in the subsection case */
+	if (section_nr_to_pfn(section_nr) != start_pfn)
+		memmap = pfn_to_page(section_nr_to_pfn(section_nr));
+	sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0);
+
+	return 0;
+}
+
+void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
+			   struct vmem_altmap *altmap)
+{
+	struct mem_section *ms = __pfn_to_section(pfn);
+
+	if (WARN_ON_ONCE(!valid_section(ms)))
+		return;
+
+	section_deactivate(pfn, nr_pages, altmap);
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/mm/sparse.c b/mm/sparse.c
index ecd4c41c0ff0..007fd52c621e 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -79,7 +79,7 @@ static noinline struct mem_section __ref *sparse_index_alloc(int nid)
 	return section;
 }
 
-static int __meminit sparse_index_init(unsigned long section_nr, int nid)
+int __meminit sparse_index_init(unsigned long section_nr, int nid)
 {
 	unsigned long root = SECTION_NR_TO_ROOT(section_nr);
 	struct mem_section *section;
@@ -103,7 +103,7 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid)
 	return 0;
 }
 #else /* !SPARSEMEM_EXTREME */
-static inline int sparse_index_init(unsigned long section_nr, int nid)
+int sparse_index_init(unsigned long section_nr, int nid)
 {
 	return 0;
 }
@@ -167,40 +167,6 @@ static inline unsigned long first_present_section_nr(void)
 	return next_present_section_nr(-1);
 }
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-static void subsection_mask_set(unsigned long *map, unsigned long pfn,
-		unsigned long nr_pages)
-{
-	int idx = subsection_map_index(pfn);
-	int end = subsection_map_index(pfn + nr_pages - 1);
-
-	bitmap_set(map, idx, end - idx + 1);
-}
-
-void __init sparse_init_subsection_map(unsigned long pfn, unsigned long nr_pages)
-{
-	int end_sec_nr = pfn_to_section_nr(pfn + nr_pages - 1);
-	unsigned long nr, start_sec_nr = pfn_to_section_nr(pfn);
-
-	for (nr = start_sec_nr; nr <= end_sec_nr; nr++) {
-		struct mem_section *ms;
-		unsigned long pfns;
-
-		pfns = min(nr_pages, PAGES_PER_SECTION
-				- (pfn & ~PAGE_SECTION_MASK));
-		ms = __nr_to_section(nr);
-		subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
-
-		pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
-				pfns, subsection_map_index(pfn),
-				subsection_map_index(pfn + pfns - 1));
-
-		pfn += pfns;
-		nr_pages -= pfns;
-	}
-}
-#endif
-
 /* Record a memory area against a node. */
 static void __init memory_present(int nid, unsigned long start, unsigned long end)
 {
@@ -482,275 +448,3 @@ void __init sparse_init(void)
 	sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
 	vmemmap_populate_print_last();
 }
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-
-/* Mark all memory sections within the pfn range as online */
-void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
-{
-	unsigned long pfn;
-
-	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
-		unsigned long section_nr = pfn_to_section_nr(pfn);
-		struct mem_section *ms = __nr_to_section(section_nr);
-
-		ms->section_mem_map |= SECTION_IS_ONLINE;
-	}
-}
-
-/* Mark all memory sections within the pfn range as offline */
-void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
-{
-	unsigned long pfn;
-
-	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
-		unsigned long section_nr = pfn_to_section_nr(pfn);
-		struct mem_section *ms = __nr_to_section(section_nr);
-
-		ms->section_mem_map &= ~SECTION_IS_ONLINE;
-	}
-}
-
-static struct page * __meminit populate_section_memmap(unsigned long pfn,
-		unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
-		struct dev_pagemap *pgmap)
-{
-	return __populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
-}
-
-static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
-		struct vmem_altmap *altmap)
-{
-	unsigned long start = (unsigned long) pfn_to_page(pfn);
-	unsigned long end = start + nr_pages * sizeof(struct page);
-
-	vmemmap_free(start, end, altmap);
-}
-static void free_map_bootmem(struct page *memmap)
-{
-	unsigned long start = (unsigned long)memmap;
-	unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
-
-	vmemmap_free(start, end, NULL);
-}
-
-static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
-{
-	DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
-	DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
-	struct mem_section *ms = __pfn_to_section(pfn);
-	unsigned long *subsection_map = ms->usage
-		? &ms->usage->subsection_map[0] : NULL;
-
-	subsection_mask_set(map, pfn, nr_pages);
-	if (subsection_map)
-		bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
-
-	if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
-				"section already deactivated (%#lx + %ld)\n",
-				pfn, nr_pages))
-		return -EINVAL;
-
-	bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
-	return 0;
-}
-
-static bool is_subsection_map_empty(struct mem_section *ms)
-{
-	return bitmap_empty(&ms->usage->subsection_map[0],
-			    SUBSECTIONS_PER_SECTION);
-}
-
-static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
-{
-	struct mem_section *ms = __pfn_to_section(pfn);
-	DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
-	unsigned long *subsection_map;
-	int rc = 0;
-
-	subsection_mask_set(map, pfn, nr_pages);
-
-	subsection_map = &ms->usage->subsection_map[0];
-
-	if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
-		rc = -EINVAL;
-	else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
-		rc = -EEXIST;
-	else
-		bitmap_or(subsection_map, map, subsection_map,
-				SUBSECTIONS_PER_SECTION);
-
-	return rc;
-}
-
-/*
- * To deactivate a memory region, there are 3 cases to handle:
- *
- * 1. deactivation of a partial hot-added section:
- *      a) section was present at memory init.
- *      b) section was hot-added post memory init.
- * 2. deactivation of a complete hot-added section.
- * 3. deactivation of a complete section from memory init.
- *
- * For 1, when subsection_map does not empty we will not be freeing the
- * usage map, but still need to free the vmemmap range.
- */
-static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
-		struct vmem_altmap *altmap)
-{
-	struct mem_section *ms = __pfn_to_section(pfn);
-	bool section_is_early = early_section(ms);
-	struct page *memmap = NULL;
-	bool empty;
-
-	if (clear_subsection_map(pfn, nr_pages))
-		return;
-
-	empty = is_subsection_map_empty(ms);
-	if (empty) {
-		/*
-		 * Mark the section invalid so that valid_section()
-		 * return false. This prevents code from dereferencing
-		 * ms->usage array.
-		 */
-		ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
-
-		/*
-		 * When removing an early section, the usage map is kept (as the
-		 * usage maps of other sections fall into the same page). It
-		 * will be re-used when re-adding the section - which is then no
-		 * longer an early section. If the usage map is PageReserved, it
-		 * was allocated during boot.
-		 */
-		if (!PageReserved(virt_to_page(ms->usage))) {
-			kfree_rcu(ms->usage, rcu);
-			WRITE_ONCE(ms->usage, NULL);
-		}
-		memmap = pfn_to_page(SECTION_ALIGN_DOWN(pfn));
-	}
-
-	/*
-	 * The memmap of early sections is always fully populated. See
-	 * section_activate() and pfn_valid() .
-	 */
-	if (!section_is_early) {
-		memmap_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE)));
-		depopulate_section_memmap(pfn, nr_pages, altmap);
-	} else if (memmap) {
-		memmap_boot_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page),
-							  PAGE_SIZE)));
-		free_map_bootmem(memmap);
-	}
-
-	if (empty)
-		ms->section_mem_map = (unsigned long)NULL;
-}
-
-static struct page * __meminit section_activate(int nid, unsigned long pfn,
-		unsigned long nr_pages, struct vmem_altmap *altmap,
-		struct dev_pagemap *pgmap)
-{
-	struct mem_section *ms = __pfn_to_section(pfn);
-	struct mem_section_usage *usage = NULL;
-	struct page *memmap;
-	int rc;
-
-	if (!ms->usage) {
-		usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
-		if (!usage)
-			return ERR_PTR(-ENOMEM);
-		ms->usage = usage;
-	}
-
-	rc = fill_subsection_map(pfn, nr_pages);
-	if (rc) {
-		if (usage)
-			ms->usage = NULL;
-		kfree(usage);
-		return ERR_PTR(rc);
-	}
-
-	/*
-	 * The early init code does not consider partially populated
-	 * initial sections, it simply assumes that memory will never be
-	 * referenced.  If we hot-add memory into such a section then we
-	 * do not need to populate the memmap and can simply reuse what
-	 * is already there.
-	 */
-	if (nr_pages < PAGES_PER_SECTION && early_section(ms))
-		return pfn_to_page(pfn);
-
-	memmap = populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
-	if (!memmap) {
-		section_deactivate(pfn, nr_pages, altmap);
-		return ERR_PTR(-ENOMEM);
-	}
-	memmap_pages_add(DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE));
-
-	return memmap;
-}
-
-/**
- * sparse_add_section - add a memory section, or populate an existing one
- * @nid: The node to add section on
- * @start_pfn: start pfn of the memory range
- * @nr_pages: number of pfns to add in the section
- * @altmap: alternate pfns to allocate the memmap backing store
- * @pgmap: alternate compound page geometry for devmap mappings
- *
- * This is only intended for hotplug.
- *
- * Note that only VMEMMAP supports sub-section aligned hotplug,
- * the proper alignment and size are gated by check_pfn_span().
- *
- *
- * Return:
- * * 0		- On success.
- * * -EEXIST	- Section has been present.
- * * -ENOMEM	- Out of memory.
- */
-int __meminit sparse_add_section(int nid, unsigned long start_pfn,
-		unsigned long nr_pages, struct vmem_altmap *altmap,
-		struct dev_pagemap *pgmap)
-{
-	unsigned long section_nr = pfn_to_section_nr(start_pfn);
-	struct mem_section *ms;
-	struct page *memmap;
-	int ret;
-
-	ret = sparse_index_init(section_nr, nid);
-	if (ret < 0)
-		return ret;
-
-	memmap = section_activate(nid, start_pfn, nr_pages, altmap, pgmap);
-	if (IS_ERR(memmap))
-		return PTR_ERR(memmap);
-
-	/*
-	 * Poison uninitialized struct pages in order to catch invalid flags
-	 * combinations.
-	 */
-	page_init_poison(memmap, sizeof(struct page) * nr_pages);
-
-	ms = __nr_to_section(section_nr);
-	__section_mark_present(ms, section_nr);
-
-	/* Align memmap to section boundary in the subsection case */
-	if (section_nr_to_pfn(section_nr) != start_pfn)
-		memmap = pfn_to_page(section_nr_to_pfn(section_nr));
-	sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0);
-
-	return 0;
-}
-
-void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
-			   struct vmem_altmap *altmap)
-{
-	struct mem_section *ms = __pfn_to_section(pfn);
-
-	if (WARN_ON_ONCE(!valid_section(ms)))
-		return;
-
-	section_deactivate(pfn, nr_pages, altmap);
-}
-#endif /* CONFIG_MEMORY_HOTPLUG */
-- 
cgit v1.2.3


From 6ebf98d71f9b509e833e0af00795ad3723d2f410 Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Thu, 19 Mar 2026 09:19:41 +0100
Subject: mm: introduce CONFIG_NUMA_MIGRATION and simplify CONFIG_MIGRATION

CONFIG_MEMORY_HOTREMOVE, CONFIG_COMPACTION and CONFIG_CMA all select
CONFIG_MIGRATION, because they require it to work (users).

Only CONFIG_NUMA_BALANCING and CONFIG_BALLOON_MIGRATION depend on
CONFIG_MIGRATION.  CONFIG_BALLOON_MIGRATION is not an actual user, but an
implementation of migration support, so the dependency is correct
(CONFIG_BALLOON_MIGRATION does not make any sense without
CONFIG_MIGRATION).

However, kconfig-language.rst clearly states "In general use select only
for non-visible symbols".  So far CONFIG_MIGRATION is user-visible ...
and the dependencies rather confusing.

The whole reason why CONFIG_MIGRATION is user-visible is because of
CONFIG_NUMA: some users might want CONFIG_NUMA but not page migration
support.

Let's clean all that up by introducing a dedicated CONFIG_NUMA_MIGRATION
config option for that purpose only.  Make CONFIG_NUMA_BALANCING that so
far depended on CONFIG_NUMA && CONFIG_MIGRATION to depend on
CONFIG_MIGRATION instead.  CONFIG_NUMA_MIGRATION will depend on
CONFIG_NUMA && CONFIG_MMU.

CONFIG_NUMA_MIGRATION is user-visible and will default to "y".  We use
that default so new configs will automatically enable it, just like it was
the case with CONFIG_MIGRATION.  The downside is that some configs that
used to have CONFIG_MIGRATION=n might get it re-enabled by
CONFIG_NUMA_MIGRATION=y, which shouldn't be a problem.

CONFIG_MIGRATION is now a non-visible config option.  Any code that select
CONFIG_MIGRATION (as before) must depend directly or indirectly on
CONFIG_MMU.

CONFIG_NUMA_MIGRATION is responsible for any NUMA migration code, which is
mempolicy migration code, memory-tiering code, and move_pages() code in
migrate.c.  CONFIG_NUMA_BALANCING uses its functionality.

Note that this implies that with CONFIG_NUMA_MIGRATION=n, move_pages()
will not be available even though CONFIG_MIGRATION=y, which is an expected
change.

In migrate.c, we can remove the CONFIG_NUMA check as both
CONFIG_NUMA_MIGRATION and CONFIG_NUMA_BALANCING depend on it.

With this change, CONFIG_MIGRATION is an internal config, all users of
migration selects CONFIG_MIGRATION, and only CONFIG_BALLOON_MIGRATION
depends on it.

Link: https://lkml.kernel.org/r/20260319-config_migration-v1-2-42270124966f@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memory-tiers.h |  2 +-
 init/Kconfig                 |  2 +-
 mm/Kconfig                   | 26 +++++++++++++-------------
 mm/memory-tiers.c            | 12 ++++++------
 mm/mempolicy.c               |  2 +-
 mm/migrate.c                 |  5 ++---
 6 files changed, 24 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 96987d9d95a8..7999c58629ee 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -52,7 +52,7 @@ int mt_perf_to_adistance(struct access_coordinate *perf, int *adist);
 struct memory_dev_type *mt_find_alloc_memory_type(int adist,
 						  struct list_head *memory_types);
 void mt_put_memory_types(struct list_head *memory_types);
-#ifdef CONFIG_MIGRATION
+#ifdef CONFIG_NUMA_MIGRATION
 int next_demotion_node(int node, const nodemask_t *allowed_mask);
 void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
 bool node_is_toptier(int node);
diff --git a/init/Kconfig b/init/Kconfig
index 444ce811ea67..3648e401b78b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -997,7 +997,7 @@ config NUMA_BALANCING
 	bool "Memory placement aware NUMA scheduler"
 	depends on ARCH_SUPPORTS_NUMA_BALANCING
 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
-	depends on SMP && NUMA && MIGRATION && !PREEMPT_RT
+	depends on SMP && NUMA_MIGRATION && !PREEMPT_RT
 	help
 	  This option adds support for automatic NUMA aware memory/task placement.
 	  The mechanism is quite primitive and is based on migrating memory when
diff --git a/mm/Kconfig b/mm/Kconfig
index b2e21d873d3f..bd283958d675 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -627,20 +627,20 @@ config PAGE_REPORTING
 	  those pages to another entity, such as a hypervisor, so that the
 	  memory can be freed within the host for other uses.
 
-#
-# support for page migration
-#
-config MIGRATION
-	bool "Page migration"
+config NUMA_MIGRATION
+	bool "NUMA page migration"
 	default y
-	depends on (NUMA || MEMORY_HOTREMOVE || COMPACTION || CMA) && MMU
-	help
-	  Allows the migration of the physical location of pages of processes
-	  while the virtual addresses are not changed. This is useful in
-	  two situations. The first is on NUMA systems to put pages nearer
-	  to the processors accessing. The second is when allocating huge
-	  pages as migration can relocate pages to satisfy a huge page
-	  allocation instead of reclaiming.
+	depends on NUMA && MMU
+	select MIGRATION
+	help
+	  Support the migration of pages to other NUMA nodes, available to
+	  user space through interfaces like migrate_pages(), move_pages(),
+	  and mbind(). Selecting this option also enables support for page
+	  demotion for memory tiering.
+
+config MIGRATION
+	bool
+	depends on MMU
 
 config DEVICE_MIGRATION
 	def_bool MIGRATION && ZONE_DEVICE
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 986f809376eb..54851d8a195b 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -69,7 +69,7 @@ bool folio_use_access_time(struct folio *folio)
 }
 #endif
 
-#ifdef CONFIG_MIGRATION
+#ifdef CONFIG_NUMA_MIGRATION
 static int top_tier_adistance;
 /*
  * node_demotion[] examples:
@@ -129,7 +129,7 @@ static int top_tier_adistance;
  *
  */
 static struct demotion_nodes *node_demotion __read_mostly;
-#endif /* CONFIG_MIGRATION */
+#endif /* CONFIG_NUMA_MIGRATION */
 
 static BLOCKING_NOTIFIER_HEAD(mt_adistance_algorithms);
 
@@ -273,7 +273,7 @@ static struct memory_tier *__node_get_memory_tier(int node)
 				     lockdep_is_held(&memory_tier_lock));
 }
 
-#ifdef CONFIG_MIGRATION
+#ifdef CONFIG_NUMA_MIGRATION
 bool node_is_toptier(int node)
 {
 	bool toptier;
@@ -519,7 +519,7 @@ static void establish_demotion_targets(void)
 
 #else
 static inline void establish_demotion_targets(void) {}
-#endif /* CONFIG_MIGRATION */
+#endif /* CONFIG_NUMA_MIGRATION */
 
 static inline void __init_node_memory_type(int node, struct memory_dev_type *memtype)
 {
@@ -911,7 +911,7 @@ static int __init memory_tier_init(void)
 	if (ret)
 		panic("%s() failed to register memory tier subsystem\n", __func__);
 
-#ifdef CONFIG_MIGRATION
+#ifdef CONFIG_NUMA_MIGRATION
 	node_demotion = kzalloc_objs(struct demotion_nodes, nr_node_ids);
 	WARN_ON(!node_demotion);
 #endif
@@ -938,7 +938,7 @@ subsys_initcall(memory_tier_init);
 
 bool numa_demotion_enabled = false;
 
-#ifdef CONFIG_MIGRATION
+#ifdef CONFIG_NUMA_MIGRATION
 #ifdef CONFIG_SYSFS
 static ssize_t demotion_enabled_show(struct kobject *kobj,
 				     struct kobj_attribute *attr, char *buf)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e5528c35bbb8..fd08771e2057 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1239,7 +1239,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 	return err;
 }
 
-#ifdef CONFIG_MIGRATION
+#ifdef CONFIG_NUMA_MIGRATION
 static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist,
 				unsigned long flags)
 {
diff --git a/mm/migrate.c b/mm/migrate.c
index 3323fc96b1cd..4241eb6eca00 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2222,8 +2222,7 @@ struct folio *alloc_migration_target(struct folio *src, unsigned long private)
 	return __folio_alloc(gfp_mask, order, nid, mtc->nmask);
 }
 
-#ifdef CONFIG_NUMA
-
+#ifdef CONFIG_NUMA_MIGRATION
 static int store_status(int __user *status, int start, int value, int nr)
 {
 	while (nr-- > 0) {
@@ -2622,6 +2621,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
 {
 	return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
 }
+#endif /* CONFIG_NUMA_MIGRATION */
 
 #ifdef CONFIG_NUMA_BALANCING
 /*
@@ -2764,4 +2764,3 @@ int migrate_misplaced_folio(struct folio *folio, int node)
 	return nr_remaining ? -EAGAIN : 0;
 }
 #endif /* CONFIG_NUMA_BALANCING */
-#endif /* CONFIG_NUMA */
-- 
cgit v1.2.3


From a6a8c087dce00eac0c6d03e560b0fa3d529afa5f Mon Sep 17 00:00:00 2001
From: Leno Hou <lenohou@gmail.com>
Date: Thu, 19 Mar 2026 00:30:49 +0800
Subject: mm/mglru: fix cgroup OOM during MGLRU state switching

When the Multi-Gen LRU (MGLRU) state is toggled dynamically, a race
condition exists between the state switching and the memory reclaim path.
This can lead to unexpected cgroup OOM kills, even when plenty of
reclaimable memory is available.

Problem Description
==================
The issue arises from a "reclaim vacuum" during the transition.

1. When disabling MGLRU, lru_gen_change_state() sets lrugen->enabled to
   false before the pages are drained from MGLRU lists back to traditional
   LRU lists.
2. Concurrent reclaimers in shrink_lruvec() see lrugen->enabled as false
   and skip the MGLRU path.
3. However, these pages might not have reached the traditional LRU lists
   yet, or the changes are not yet visible to all CPUs due to a lack
   of synchronization.
4. get_scan_count() subsequently finds traditional LRU lists empty,
   concludes there is no reclaimable memory, and triggers an OOM kill.

A similar race can occur during enablement, where the reclaimer sees the
new state but the MGLRU lists haven't been populated via fill_evictable()
yet.

Solution
========
Introduce a 'switching' state (`lru_switch`) to bridge the transition.
When transitioning, the system enters this intermediate state where
the reclaimer is forced to attempt both MGLRU and traditional reclaim
paths sequentially. This ensures that folios remain visible to at least
one reclaim mechanism until the transition is fully materialized across
all CPUs.

Race & Mitigation
================
A race window exists between checking the 'draining' state and performing
the actual list operations. For instance, a reclaimer might observe the
draining state as false just before it changes, leading to a suboptimal
reclaim path decision.

However, this impact is effectively mitigated by the kernel's reclaim
retry mechanism (e.g., in do_try_to_free_pages). If a reclaimer pass fails
to find eligible folios due to a state transition race, subsequent retries
in the loop will observe the updated state and correctly direct the scan
to the appropriate LRU lists. This ensures the transient inconsistency
does not escalate into a terminal OOM kill.

This effectively reduce the race window that previously triggered OOMs
under high memory pressure.

This fix has been verified on v7.0.0-rc1; dynamic toggling of MGLRU
functions correctly without triggering unexpected OOM kills.

Link: https://lkml.kernel.org/r/20260319-b4-switch-mglru-v2-v5-1-8898491e5f17@gmail.com
Signed-off-by: Leno Hou <lenohou@gmail.com>
Acked-by: Yafang Shao <laoar.shao@gmail.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Jialing Wang <wjl.linux@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Kairui Song <ryncsn@gmail.com>
Cc: Bingfang Guo <bfguo@icloud.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_inline.h | 11 +++++++++++
 mm/rmap.c                 |  7 ++++++-
 mm/vmscan.c               | 33 ++++++++++++++++++++++++---------
 3 files changed, 41 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index fa2d6ba811b5..2aedcff6a2c1 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -102,6 +102,12 @@ static __always_inline enum lru_list folio_lru_list(const struct folio *folio)
 
 #ifdef CONFIG_LRU_GEN
 
+static inline bool lru_gen_switching(void)
+{
+	DECLARE_STATIC_KEY_FALSE(lru_switch);
+
+	return static_branch_unlikely(&lru_switch);
+}
 #ifdef CONFIG_LRU_GEN_ENABLED
 static inline bool lru_gen_enabled(void)
 {
@@ -316,6 +322,11 @@ static inline bool lru_gen_enabled(void)
 	return false;
 }
 
+static inline bool lru_gen_switching(void)
+{
+	return false;
+}
+
 static inline bool lru_gen_in_fault(void)
 {
 	return false;
diff --git a/mm/rmap.c b/mm/rmap.c
index abe4712a220c..78b7fb5f367c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -973,7 +973,12 @@ static bool folio_referenced_one(struct folio *folio,
 			nr = folio_pte_batch(folio, pvmw.pte, pteval, max_nr);
 		}
 
-		if (lru_gen_enabled() && pvmw.pte) {
+		/*
+		 * When LRU is switching, we don’t know where the surrounding folios
+		 * are. —they could be on active/inactive lists or on MGLRU. So the
+		 * simplest approach is to disable this look-around optimization.
+		 */
+		if (lru_gen_enabled() && !lru_gen_switching() && pvmw.pte) {
 			if (lru_gen_look_around(&pvmw, nr))
 				referenced++;
 		} else if (pvmw.pte) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 641a6063f375..42f834c508bc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -905,7 +905,7 @@ static enum folio_references folio_check_references(struct folio *folio,
 	if (referenced_ptes == -1)
 		return FOLIOREF_KEEP;
 
-	if (lru_gen_enabled()) {
+	if (lru_gen_enabled() && !lru_gen_switching()) {
 		if (!referenced_ptes)
 			return FOLIOREF_RECLAIM;
 
@@ -2308,7 +2308,7 @@ static void prepare_scan_control(pg_data_t *pgdat, struct scan_control *sc)
 	unsigned long file;
 	struct lruvec *target_lruvec;
 
-	if (lru_gen_enabled())
+	if (lru_gen_enabled() && !lru_gen_switching())
 		return;
 
 	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
@@ -2647,6 +2647,7 @@ static bool can_age_anon_pages(struct lruvec *lruvec,
 
 #ifdef CONFIG_LRU_GEN
 
+DEFINE_STATIC_KEY_FALSE(lru_switch);
 #ifdef CONFIG_LRU_GEN_ENABLED
 DEFINE_STATIC_KEY_ARRAY_TRUE(lru_gen_caps, NR_LRU_GEN_CAPS);
 #define get_cap(cap)	static_branch_likely(&lru_gen_caps[cap])
@@ -5181,6 +5182,8 @@ static void lru_gen_change_state(bool enabled)
 	if (enabled == lru_gen_enabled())
 		goto unlock;
 
+	static_branch_enable_cpuslocked(&lru_switch);
+
 	if (enabled)
 		static_branch_enable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
 	else
@@ -5211,6 +5214,9 @@ static void lru_gen_change_state(bool enabled)
 
 		cond_resched();
 	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
+
+	static_branch_disable_cpuslocked(&lru_switch);
+
 unlock:
 	mutex_unlock(&state_mutex);
 	put_online_mems();
@@ -5783,9 +5789,12 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 	bool proportional_reclaim;
 	struct blk_plug plug;
 
-	if (lru_gen_enabled() && !root_reclaim(sc)) {
+	if ((lru_gen_enabled() || lru_gen_switching()) && !root_reclaim(sc)) {
 		lru_gen_shrink_lruvec(lruvec, sc);
-		return;
+
+		if (!lru_gen_switching())
+			return;
+
 	}
 
 	get_scan_count(lruvec, sc, nr);
@@ -6045,10 +6054,13 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 	struct lruvec *target_lruvec;
 	bool reclaimable = false;
 
-	if (lru_gen_enabled() && root_reclaim(sc)) {
+	if ((lru_gen_enabled() || lru_gen_switching()) && root_reclaim(sc)) {
 		memset(&sc->nr, 0, sizeof(sc->nr));
 		lru_gen_shrink_node(pgdat, sc);
-		return;
+
+		if (!lru_gen_switching())
+			return;
+
 	}
 
 	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
@@ -6318,7 +6330,7 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat)
 	struct lruvec *target_lruvec;
 	unsigned long refaults;
 
-	if (lru_gen_enabled())
+	if (lru_gen_enabled() && !lru_gen_switching())
 		return;
 
 	target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
@@ -6708,9 +6720,12 @@ static void kswapd_age_node(struct pglist_data *pgdat, struct scan_control *sc)
 	struct mem_cgroup *memcg;
 	struct lruvec *lruvec;
 
-	if (lru_gen_enabled()) {
+	if (lru_gen_enabled() || lru_gen_switching()) {
 		lru_gen_age_node(pgdat, sc);
-		return;
+
+		if (!lru_gen_switching())
+			return;
+
 	}
 
 	lruvec = mem_cgroup_lruvec(NULL, pgdat);
-- 
cgit v1.2.3


From a62ca3f40feaaaf0dfc4db1f2edeca5a70f4123d Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Sat, 21 Mar 2026 14:42:49 +0800
Subject: mm: change to return bool for ptep_test_and_clear_young()

Patch series "change young flag check functions to return bool", v2.

This is a cleanup patchset to change all young flag check functions to
return bool, as discussed with David in the previous thread[1].  Since
callers only care about whether the young flag was set, returning bool
makes the intention clearer.  No functional changes intended.


This patch (of 6):

Callers use ptep_test_and_clear_young() to clear the young flag and check
whether it was set.  Change the return type to bool to make the intention
clearer.

Link: https://lkml.kernel.org/r/cover.1774075004.git.baolin.wang@linux.alibaba.com
Link: https://lkml.kernel.org/r/57e70efa9703d43959aa645246ea3cbdba14fa17.1774075004.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arm64/include/asm/pgtable.h             | 16 +++++++---------
 arch/arm64/mm/contpte.c                      |  7 +++----
 arch/microblaze/include/asm/pgtable.h        |  2 +-
 arch/parisc/include/asm/pgtable.h            |  7 ++++---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  4 ++--
 arch/powerpc/include/asm/book3s/64/pgtable.h |  6 +++---
 arch/powerpc/include/asm/nohash/pgtable.h    |  4 ++--
 arch/riscv/include/asm/pgtable.h             |  4 ++--
 arch/riscv/mm/pgtable.c                      |  7 +++----
 arch/s390/include/asm/pgtable.h              |  4 ++--
 arch/x86/include/asm/pgtable.h               |  4 ++--
 arch/x86/mm/pgtable.c                        |  6 +++---
 arch/xtensa/include/asm/pgtable.h            |  9 ++++-----
 include/linux/pgtable.h                      | 16 ++++++++--------
 14 files changed, 46 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index ab451d20e4c5..79596cc05dcb 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1282,9 +1282,8 @@ static inline void __pte_clear(struct mm_struct *mm,
 	__set_pte(ptep, __pte(0));
 }
 
-static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma,
-					      unsigned long address,
-					      pte_t *ptep)
+static inline bool __ptep_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep)
 {
 	pte_t old_pte, pte;
 
@@ -1646,7 +1645,7 @@ extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
 extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
 				unsigned long addr, pte_t *ptep,
 				unsigned int nr, int full);
-int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
+bool contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
 				unsigned long addr, pte_t *ptep, unsigned int nr);
 int contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
 				unsigned long addr, pte_t *ptep, unsigned int nr);
@@ -1813,9 +1812,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 }
 
 #define test_and_clear_young_ptes test_and_clear_young_ptes
-static inline int test_and_clear_young_ptes(struct vm_area_struct *vma,
-					    unsigned long addr, pte_t *ptep,
-					    unsigned int nr)
+static inline bool test_and_clear_young_ptes(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep, unsigned int nr)
 {
 	if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
 		return __ptep_test_and_clear_young(vma, addr, ptep);
@@ -1824,8 +1822,8 @@ static inline int test_and_clear_young_ptes(struct vm_area_struct *vma,
 }
 
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
-				unsigned long addr, pte_t *ptep)
+static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep)
 {
 	return test_and_clear_young_ptes(vma, addr, ptep, 1);
 }
diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
index 1519d090d5ea..a31cae78f712 100644
--- a/arch/arm64/mm/contpte.c
+++ b/arch/arm64/mm/contpte.c
@@ -508,9 +508,8 @@ pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
 }
 EXPORT_SYMBOL_GPL(contpte_get_and_clear_full_ptes);
 
-int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
-					unsigned long addr, pte_t *ptep,
-					unsigned int nr)
+bool contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep, unsigned int nr)
 {
 	/*
 	 * ptep_clear_flush_young() technically requires us to clear the access
@@ -525,7 +524,7 @@ int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
 	 */
 
 	unsigned long end = addr + nr * PAGE_SIZE;
-	int young = 0;
+	bool young = false;
 
 	ptep = contpte_align_addr_ptep(&addr, &end, ptep, nr);
 	for (; addr != end; ptep++, addr += PAGE_SIZE)
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index ea72291de553..7678c040a2fd 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -318,7 +318,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
 
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 struct vm_area_struct;
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
 		unsigned long address, pte_t *ptep)
 {
 	return (pte_update(ptep, _PAGE_ACCESSED, 0) & _PAGE_ACCESSED) != 0;
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index f6fb99cb94d9..7097c785f690 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -438,16 +438,17 @@ static inline pte_t ptep_get(pte_t *ptep)
 }
 #define ptep_get ptep_get
 
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep)
 {
 	pte_t pte;
 
 	pte = ptep_get(ptep);
 	if (!pte_young(pte)) {
-		return 0;
+		return false;
 	}
 	set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte));
-	return 1;
+	return true;
 }
 
 int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 001e28f9eabc..4a271318dee8 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -295,8 +295,8 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p
  * for our hash-based implementation, we fix that up here.
  */
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
-					      unsigned long addr, pte_t *ptep)
+static inline bool __ptep_test_and_clear_young(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
 {
 	unsigned long old;
 	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 1a91762b455d..c049a2e26e25 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -349,13 +349,13 @@ static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr,
  * For radix: H_PAGE_HASHPTE should be zero. Hence we can use the same
  * function for both hash and radix.
  */
-static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
-					      unsigned long addr, pte_t *ptep)
+static inline bool __ptep_test_and_clear_young(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
 {
 	unsigned long old;
 
 	if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
-		return 0;
+		return false;
 	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
 	return (old & _PAGE_ACCESSED) != 0;
 }
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index e6da5eaccff6..3a6f20a1c800 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -101,8 +101,8 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p
 }
 #endif
 
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
-					    unsigned long addr, pte_t *ptep)
+static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep)
 {
 	unsigned long old;
 
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index ab4ce1cc9d9c..643d12481b02 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -659,8 +659,8 @@ static inline void pte_clear(struct mm_struct *mm,
 extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 				 pte_t *ptep, pte_t entry, int dirty);
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG	/* defined in mm/pgtable.c */
-extern int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long address,
-				     pte_t *ptep);
+bool ptep_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep);
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
index b1ed2f14dc3a..9c4427d0b187 100644
--- a/arch/riscv/mm/pgtable.c
+++ b/arch/riscv/mm/pgtable.c
@@ -29,12 +29,11 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 	return true;
 }
 
-int ptep_test_and_clear_young(struct vm_area_struct *vma,
-			      unsigned long address,
-			      pte_t *ptep)
+bool ptep_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep)
 {
 	if (!pte_young(ptep_get(ptep)))
-		return 0;
+		return false;
 	return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep));
 }
 EXPORT_SYMBOL_GPL(ptep_test_and_clear_young);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 1c3c3be93be9..ef4748ee3a2b 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1164,8 +1164,8 @@ pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t);
 pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t);
 
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
-					    unsigned long addr, pte_t *ptep)
+static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep)
 {
 	pte_t pte = *ptep;
 
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 54289f4587a4..1d86fb33239f 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1232,8 +1232,8 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma,
 				 pte_t entry, int dirty);
 
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
-				     unsigned long addr, pte_t *ptep);
+bool ptep_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep);
 
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 extern int ptep_clear_flush_young(struct vm_area_struct *vma,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 2e5ecfdce73c..5ee38dda9124 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -443,10 +443,10 @@ int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 }
 #endif
 
-int ptep_test_and_clear_young(struct vm_area_struct *vma,
-			      unsigned long addr, pte_t *ptep)
+bool ptep_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep)
 {
-	int ret = 0;
+	bool ret = false;
 
 	if (pte_young(*ptep))
 		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 61f07d981a94..f00a879dc298 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -304,15 +304,14 @@ set_pmd(pmd_t *pmdp, pmd_t pmdval)
 
 struct vm_area_struct;
 
-static inline int
-ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr,
-			  pte_t *ptep)
+static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep)
 {
 	pte_t pte = *ptep;
 	if (!pte_young(pte))
-		return 0;
+		return false;
 	update_pte(ptep, pte_mkold(pte));
-	return 1;
+	return true;
 }
 
 static inline pte_t
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 17d961c612fc..8e75dc9f7932 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -491,17 +491,17 @@ static inline pgd_t pgdp_get(pgd_t *pgdp)
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
-					    unsigned long address,
-					    pte_t *ptep)
+static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep)
 {
 	pte_t pte = ptep_get(ptep);
-	int r = 1;
+	bool young = true;
+
 	if (!pte_young(pte))
-		r = 0;
+		young = false;
 	else
 		set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
-	return r;
+	return young;
 }
 #endif
 
@@ -1123,10 +1123,10 @@ static inline int clear_flush_young_ptes(struct vm_area_struct *vma,
  *
  * Returns: whether any PTE was young.
  */
-static inline int test_and_clear_young_ptes(struct vm_area_struct *vma,
+static inline bool test_and_clear_young_ptes(struct vm_area_struct *vma,
 		unsigned long addr, pte_t *ptep, unsigned int nr)
 {
-	int young = 0;
+	bool young = false;
 
 	for (;;) {
 		young |= ptep_test_and_clear_young(vma, addr, ptep);
-- 
cgit v1.2.3


From 06c4dfa3ced61635895d0e258da8dc63da539f42 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Sat, 21 Mar 2026 14:42:50 +0800
Subject: mm: change to return bool for
 ptep_clear_flush_young()/clear_flush_young_ptes()

The ptep_clear_flush_young() and clear_flush_young_ptes() are used to
clear the young flag and flush the TLB, returning whether the young flag
was set.  Change the return type to bool to make the intention clearer.

Link: https://lkml.kernel.org/r/24af5144b96103631594501f77d4525f2475c1be.1774075004.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arm64/include/asm/pgtable.h             | 17 ++++++++---------
 arch/arm64/mm/contpte.c                      |  7 +++----
 arch/parisc/include/asm/pgtable.h            |  2 +-
 arch/parisc/kernel/cache.c                   |  8 ++++----
 arch/powerpc/include/asm/nohash/64/pgtable.h |  2 +-
 arch/riscv/include/asm/pgtable.h             |  4 ++--
 arch/s390/include/asm/pgtable.h              |  4 ++--
 arch/x86/include/asm/pgtable.h               |  4 ++--
 arch/x86/mm/pgtable.c                        |  4 ++--
 include/linux/pgtable.h                      |  8 ++++----
 mm/pgtable-generic.c                         |  7 ++++---
 11 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 79596cc05dcb..1009f719b157 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1298,10 +1298,10 @@ static inline bool __ptep_test_and_clear_young(struct vm_area_struct *vma,
 	return pte_young(pte);
 }
 
-static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
-					 unsigned long address, pte_t *ptep)
+static inline bool __ptep_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep)
 {
-	int young = __ptep_test_and_clear_young(vma, address, ptep);
+	bool young = __ptep_test_and_clear_young(vma, address, ptep);
 
 	if (young) {
 		/*
@@ -1647,7 +1647,7 @@ extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
 				unsigned int nr, int full);
 bool contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
 				unsigned long addr, pte_t *ptep, unsigned int nr);
-int contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
+bool contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
 				unsigned long addr, pte_t *ptep, unsigned int nr);
 extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
 				pte_t *ptep, unsigned int nr);
@@ -1829,8 +1829,8 @@ static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
 }
 
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
-				unsigned long addr, pte_t *ptep)
+static inline bool ptep_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep)
 {
 	pte_t orig_pte = __ptep_get(ptep);
 
@@ -1841,9 +1841,8 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
 }
 
 #define clear_flush_young_ptes clear_flush_young_ptes
-static inline int clear_flush_young_ptes(struct vm_area_struct *vma,
-					 unsigned long addr, pte_t *ptep,
-					 unsigned int nr)
+static inline bool clear_flush_young_ptes(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep, unsigned int nr)
 {
 	if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
 		return __ptep_clear_flush_young(vma, addr, ptep);
diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
index a31cae78f712..2dc1b8ad71e8 100644
--- a/arch/arm64/mm/contpte.c
+++ b/arch/arm64/mm/contpte.c
@@ -534,11 +534,10 @@ bool contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
 }
 EXPORT_SYMBOL_GPL(contpte_test_and_clear_young_ptes);
 
-int contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
-				unsigned long addr, pte_t *ptep,
-				unsigned int nr)
+bool contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep, unsigned int nr)
 {
-	int young;
+	bool young;
 
 	young = contpte_test_and_clear_young_ptes(vma, addr, ptep, nr);
 
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 7097c785f690..467b8547ac8b 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -451,7 +451,7 @@ static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
 	return true;
 }
 
-int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
+bool ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
 pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
 
 struct mm_struct;
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index b189265785dc..0170b69a21d3 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -781,18 +781,18 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned lon
 	__flush_cache_page(vma, vmaddr, PFN_PHYS(page_to_pfn(page)));
 }
 
-int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr,
-			   pte_t *ptep)
+bool ptep_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep)
 {
 	pte_t pte = ptep_get(ptep);
 
 	if (!pte_young(pte))
-		return 0;
+		return false;
 	set_pte(ptep, pte_mkold(pte));
 #if CONFIG_FLUSH_PAGE_ACCESSED
 	__flush_cache_page(vma, addr, PFN_PHYS(pte_pfn(pte)));
 #endif
-	return 1;
+	return true;
 }
 
 /*
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 2deb955b7bc8..661eb3820d12 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -155,7 +155,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 #define ptep_clear_flush_young(__vma, __address, __ptep)		\
 ({									\
-	int __young = ptep_test_and_clear_young(__vma, __address, __ptep);\
+	bool __young = ptep_test_and_clear_young(__vma, __address, __ptep);\
 	__young;							\
 })
 
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 643d12481b02..b9dacfc280b1 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -695,8 +695,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
 }
 
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
-					 unsigned long address, pte_t *ptep)
+static inline bool ptep_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep)
 {
 	/*
 	 * This comment is borrowed from x86, but applies equally to RISC-V:
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index ef4748ee3a2b..ac74b5076d8f 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1174,8 +1174,8 @@ static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
 }
 
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
-					 unsigned long address, pte_t *ptep)
+static inline bool ptep_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep)
 {
 	return ptep_test_and_clear_young(vma, address, ptep);
 }
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1d86fb33239f..3993657e0a35 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1236,8 +1236,8 @@ bool ptep_test_and_clear_young(struct vm_area_struct *vma,
 		unsigned long addr, pte_t *ptep);
 
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-extern int ptep_clear_flush_young(struct vm_area_struct *vma,
-				  unsigned long address, pte_t *ptep);
+bool ptep_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep);
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 5ee38dda9124..1348384a3bb9 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -483,8 +483,8 @@ int pudp_test_and_clear_young(struct vm_area_struct *vma,
 }
 #endif
 
-int ptep_clear_flush_young(struct vm_area_struct *vma,
-			   unsigned long address, pte_t *ptep)
+bool ptep_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep)
 {
 	/*
 	 * On x86 CPUs, clearing the accessed bit without a TLB flush
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 8e75dc9f7932..99450a3b0705 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -531,8 +531,8 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-int ptep_clear_flush_young(struct vm_area_struct *vma,
-			   unsigned long address, pte_t *ptep);
+bool ptep_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep);
 #endif
 
 #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
@@ -1086,10 +1086,10 @@ static inline void wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
  * Context: The caller holds the page table lock.  The PTEs map consecutive
  * pages that belong to the same folio.  The PTEs are all in the same PMD.
  */
-static inline int clear_flush_young_ptes(struct vm_area_struct *vma,
+static inline bool clear_flush_young_ptes(struct vm_area_struct *vma,
 		unsigned long addr, pte_t *ptep, unsigned int nr)
 {
-	int young = 0;
+	bool young = false;
 
 	for (;;) {
 		young |= ptep_clear_flush_young(vma, addr, ptep);
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index af7966169d69..db0ee918b08a 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -81,10 +81,11 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-int ptep_clear_flush_young(struct vm_area_struct *vma,
-			   unsigned long address, pte_t *ptep)
+bool ptep_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep)
 {
-	int young;
+	bool young;
+
 	young = ptep_test_and_clear_young(vma, address, ptep);
 	if (young)
 		flush_tlb_page(vma, address);
-- 
cgit v1.2.3


From 42e26354c4ef28772398b1d71b7477834037305c Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Sat, 21 Mar 2026 14:42:51 +0800
Subject: mm: change to return bool for pmdp_test_and_clear_young()

Callers use pmdp_test_and_clear_young() to clear the young flag and check
whether it was set for this PMD entry.  Change the return type to bool to
make the intention clearer.

Link: https://lkml.kernel.org/r/f1d31307a13365d3d0fed5809727dcc2dd59631b.1774075004.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arm64/include/asm/pgtable.h             |  5 ++---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 +++++-----
 arch/powerpc/mm/book3s64/pgtable.c           |  4 ++--
 arch/riscv/include/asm/pgtable.h             |  4 ++--
 arch/s390/include/asm/pgtable.h              |  4 ++--
 arch/x86/include/asm/pgtable.h               |  4 ++--
 arch/x86/mm/pgtable.c                        |  6 +++---
 include/linux/pgtable.h                      | 19 +++++++++----------
 8 files changed, 27 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 1009f719b157..52bafe79c10a 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1320,9 +1320,8 @@ static inline bool __ptep_clear_flush_young(struct vm_area_struct *vma,
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-					    unsigned long address,
-					    pmd_t *pmdp)
+static inline bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmdp)
 {
 	/* Operation applies to PMD table entry only if FEAT_HAFT is enabled */
 	VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft());
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index c049a2e26e25..8b354e81ab22 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1161,13 +1161,13 @@ pud_hugepage_update(struct mm_struct *mm, unsigned long addr, pud_t *pudp,
  * For radix we should always find H_PAGE_HASHPTE zero. Hence
  * the below will work for radix too
  */
-static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
-					      unsigned long addr, pmd_t *pmdp)
+static inline bool __pmdp_test_and_clear_young(struct mm_struct *mm,
+		unsigned long addr, pmd_t *pmdp)
 {
 	unsigned long old;
 
 	if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
-		return 0;
+		return false;
 	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
 	return ((old & _PAGE_ACCESSED) != 0);
 }
@@ -1300,8 +1300,8 @@ extern int pudp_set_access_flags(struct vm_area_struct *vma,
 				 pud_t entry, int dirty);
 
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-				     unsigned long address, pmd_t *pmdp);
+bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmdp);
 #define __HAVE_ARCH_PUDP_TEST_AND_CLEAR_YOUNG
 extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
 				     unsigned long address, pud_t *pudp);
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 4b09c04654a8..c584321e3d41 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -98,8 +98,8 @@ int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 }
 
 
-int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-			      unsigned long address, pmd_t *pmdp)
+bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmdp)
 {
 	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
 }
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index b9dacfc280b1..67e7746e3fbe 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -1015,8 +1015,8 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
 }
 
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-					unsigned long address, pmd_t *pmdp)
+static inline bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmdp)
 {
 	return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
 }
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index ac74b5076d8f..87a5082da28e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1683,8 +1683,8 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
 }
 
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-					    unsigned long addr, pmd_t *pmdp)
+static inline bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long addr, pmd_t *pmdp)
 {
 	pmd_t pmd = *pmdp;
 
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 3993657e0a35..ba867bac6096 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1295,8 +1295,8 @@ extern int pudp_set_access_flags(struct vm_area_struct *vma,
 				 pud_t entry, int dirty);
 
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-				     unsigned long addr, pmd_t *pmdp);
+bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long addr, pmd_t *pmdp);
 extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
 				     unsigned long addr, pud_t *pudp);
 
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 1348384a3bb9..b09e8c5dadf9 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -456,10 +456,10 @@ bool ptep_test_and_clear_young(struct vm_area_struct *vma,
 }
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
-int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-			      unsigned long addr, pmd_t *pmdp)
+bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long addr, pmd_t *pmdp)
 {
-	int ret = 0;
+	bool ret = false;
 
 	if (pmd_young(*pmdp))
 		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 99450a3b0705..6db900a5d38b 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -507,25 +507,24 @@ static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
 
 #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
-static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-					    unsigned long address,
-					    pmd_t *pmdp)
+static inline bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmdp)
 {
 	pmd_t pmd = *pmdp;
-	int r = 1;
+	bool young = true;
+
 	if (!pmd_young(pmd))
-		r = 0;
+		young = false;
 	else
 		set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
-	return r;
+	return young;
 }
 #else
-static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-					    unsigned long address,
-					    pmd_t *pmdp)
+static inline bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmdp)
 {
 	BUILD_BUG();
-	return 0;
+	return false;
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
 #endif
-- 
cgit v1.2.3


From 2d46a397472191a10b0df294d64da542bfd1de57 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Sat, 21 Mar 2026 14:42:52 +0800
Subject: mm: change to return bool for pmdp_clear_flush_young()

The pmdp_clear_flush_young() is used to clear the young flag and flush the
TLB, returning whether the young flag was set for this PMD entry.  Change
the return type to bool to make the intention clearer.

Link: https://lkml.kernel.org/r/a668b9a974c0d675e7a41f6973bcbe3336e8b373.1774075004.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/s390/include/asm/pgtable.h |  4 ++--
 arch/x86/include/asm/pgtable.h  |  4 ++--
 arch/x86/mm/pgtable.c           |  6 +++---
 include/linux/pgtable.h         | 10 +++++-----
 mm/pgtable-generic.c            |  7 ++++---
 5 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 87a5082da28e..40a6fb19dd1d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1693,8 +1693,8 @@ static inline bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
 }
 
 #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
-static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
-					 unsigned long addr, pmd_t *pmdp)
+static inline bool pmdp_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long addr, pmd_t *pmdp)
 {
 	VM_BUG_ON(addr & ~HPAGE_MASK);
 	return pmdp_test_and_clear_young(vma, addr, pmdp);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index ba867bac6096..6c8f2b17d3f9 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1301,8 +1301,8 @@ extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
 				     unsigned long addr, pud_t *pudp);
 
 #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
-extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
-				  unsigned long address, pmd_t *pmdp);
+bool pmdp_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmdp);
 
 
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index b09e8c5dadf9..fc1c996c5b2d 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -503,10 +503,10 @@ bool ptep_clear_flush_young(struct vm_area_struct *vma,
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-int pmdp_clear_flush_young(struct vm_area_struct *vma,
-			   unsigned long address, pmd_t *pmdp)
+bool pmdp_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmdp)
 {
-	int young;
+	bool young;
 
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 6db900a5d38b..cdd68ed3ae1a 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -536,18 +536,18 @@ bool ptep_clear_flush_young(struct vm_area_struct *vma,
 
 #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
-				  unsigned long address, pmd_t *pmdp);
+bool pmdp_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmdp);
 #else
 /*
  * Despite relevant to THP only, this API is called from generic rmap code
  * under PageTransHuge(), hence needs a dummy implementation for !THP
  */
-static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
-					 unsigned long address, pmd_t *pmdp)
+static inline bool pmdp_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmdp)
 {
 	BUILD_BUG();
-	return 0;
+	return false;
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index db0ee918b08a..b91b1a98029c 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -124,10 +124,11 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
 #endif
 
 #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
-int pmdp_clear_flush_young(struct vm_area_struct *vma,
-			   unsigned long address, pmd_t *pmdp)
+bool pmdp_clear_flush_young(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmdp)
 {
-	int young;
+	bool young;
+
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 	young = pmdp_test_and_clear_young(vma, address, pmdp);
 	if (young)
-- 
cgit v1.2.3


From 1fc7dc675e26c43f3219d70a09b9f0c4aa43a13a Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Sat, 21 Mar 2026 14:42:54 +0800
Subject: mm: change to return bool for the MMU notifier's young flag check

The MMU notifier young flag check related functions only return whether
the young flag was set.  Change the return type to bool to make the
intention clearer.

Link: https://lkml.kernel.org/r/a9ad3fe938002d87358e7bfca264f753ab602561.1774075004.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 76 ++++++++++++++++++++------------------------
 mm/internal.h                | 16 +++++-----
 mm/mmu_notifier.c            | 23 +++++++-------
 virt/kvm/kvm_main.c          | 31 +++++++-----------
 4 files changed, 66 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 3705d350c863..17f2cdc77dd5 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -97,20 +97,20 @@ struct mmu_notifier_ops {
 	 * Start-end is necessary in case the secondary MMU is mapping the page
 	 * at a smaller granularity than the primary MMU.
 	 */
-	int (*clear_flush_young)(struct mmu_notifier *subscription,
-				 struct mm_struct *mm,
-				 unsigned long start,
-				 unsigned long end);
+	bool (*clear_flush_young)(struct mmu_notifier *subscription,
+				  struct mm_struct *mm,
+				  unsigned long start,
+				  unsigned long end);
 
 	/*
 	 * clear_young is a lightweight version of clear_flush_young. Like the
 	 * latter, it is supposed to test-and-clear the young/accessed bitflag
 	 * in the secondary pte, but it may omit flushing the secondary tlb.
 	 */
-	int (*clear_young)(struct mmu_notifier *subscription,
-			   struct mm_struct *mm,
-			   unsigned long start,
-			   unsigned long end);
+	bool (*clear_young)(struct mmu_notifier *subscription,
+			    struct mm_struct *mm,
+			    unsigned long start,
+			    unsigned long end);
 
 	/*
 	 * test_young is called to check the young/accessed bitflag in
@@ -118,9 +118,9 @@ struct mmu_notifier_ops {
 	 * frequently used without actually clearing the flag or tearing
 	 * down the secondary mapping on the page.
 	 */
-	int (*test_young)(struct mmu_notifier *subscription,
-			  struct mm_struct *mm,
-			  unsigned long address);
+	bool (*test_young)(struct mmu_notifier *subscription,
+			   struct mm_struct *mm,
+			   unsigned long address);
 
 	/*
 	 * invalidate_range_start() and invalidate_range_end() must be
@@ -376,14 +376,12 @@ mmu_interval_check_retry(struct mmu_interval_notifier *interval_sub,
 
 extern void __mmu_notifier_subscriptions_destroy(struct mm_struct *mm);
 extern void __mmu_notifier_release(struct mm_struct *mm);
-extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					  unsigned long start,
-					  unsigned long end);
-extern int __mmu_notifier_clear_young(struct mm_struct *mm,
-				      unsigned long start,
-				      unsigned long end);
-extern int __mmu_notifier_test_young(struct mm_struct *mm,
-				     unsigned long address);
+bool __mmu_notifier_clear_flush_young(struct mm_struct *mm,
+		unsigned long start, unsigned long end);
+bool __mmu_notifier_clear_young(struct mm_struct *mm,
+		unsigned long start, unsigned long end);
+bool __mmu_notifier_test_young(struct mm_struct *mm,
+		unsigned long address);
 extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r);
 extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r);
 extern void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
@@ -403,30 +401,28 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
 		__mmu_notifier_release(mm);
 }
 
-static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					  unsigned long start,
-					  unsigned long end)
+static inline bool mmu_notifier_clear_flush_young(struct mm_struct *mm,
+		unsigned long start, unsigned long end)
 {
 	if (mm_has_notifiers(mm))
 		return __mmu_notifier_clear_flush_young(mm, start, end);
-	return 0;
+	return false;
 }
 
-static inline int mmu_notifier_clear_young(struct mm_struct *mm,
-					   unsigned long start,
-					   unsigned long end)
+static inline bool mmu_notifier_clear_young(struct mm_struct *mm,
+		unsigned long start, unsigned long end)
 {
 	if (mm_has_notifiers(mm))
 		return __mmu_notifier_clear_young(mm, start, end);
-	return 0;
+	return false;
 }
 
-static inline int mmu_notifier_test_young(struct mm_struct *mm,
-					  unsigned long address)
+static inline bool mmu_notifier_test_young(struct mm_struct *mm,
+		unsigned long address)
 {
 	if (mm_has_notifiers(mm))
 		return __mmu_notifier_test_young(mm, address);
-	return 0;
+	return false;
 }
 
 static inline void
@@ -552,24 +548,22 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
 {
 }
 
-static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					  unsigned long start,
-					  unsigned long end)
+static inline bool mmu_notifier_clear_flush_young(struct mm_struct *mm,
+		unsigned long start, unsigned long end)
 {
-	return 0;
+	return false;
 }
 
-static inline int mmu_notifier_clear_young(struct mm_struct *mm,
-					   unsigned long start,
-					   unsigned long end)
+static inline bool mmu_notifier_clear_young(struct mm_struct *mm,
+		unsigned long start, unsigned long end)
 {
-	return 0;
+	return false;
 }
 
-static inline int mmu_notifier_test_young(struct mm_struct *mm,
-					  unsigned long address)
+static inline bool mmu_notifier_test_young(struct mm_struct *mm,
+		unsigned long address)
 {
-	return 0;
+	return false;
 }
 
 static inline void
diff --git a/mm/internal.h b/mm/internal.h
index 9ae0ee6c34f9..3d3fa35e5fd1 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1860,10 +1860,10 @@ static inline int io_remap_pfn_range_complete(struct vm_area_struct *vma,
 }
 
 #ifdef CONFIG_MMU_NOTIFIER
-static inline int clear_flush_young_ptes_notify(struct vm_area_struct *vma,
+static inline bool clear_flush_young_ptes_notify(struct vm_area_struct *vma,
 		unsigned long addr, pte_t *ptep, unsigned int nr)
 {
-	int young;
+	bool young;
 
 	young = clear_flush_young_ptes(vma, addr, ptep, nr);
 	young |= mmu_notifier_clear_flush_young(vma->vm_mm, addr,
@@ -1871,30 +1871,30 @@ static inline int clear_flush_young_ptes_notify(struct vm_area_struct *vma,
 	return young;
 }
 
-static inline int pmdp_clear_flush_young_notify(struct vm_area_struct *vma,
+static inline bool pmdp_clear_flush_young_notify(struct vm_area_struct *vma,
 		unsigned long addr, pmd_t *pmdp)
 {
-	int young;
+	bool young;
 
 	young = pmdp_clear_flush_young(vma, addr, pmdp);
 	young |= mmu_notifier_clear_flush_young(vma->vm_mm, addr, addr + PMD_SIZE);
 	return young;
 }
 
-static inline int test_and_clear_young_ptes_notify(struct vm_area_struct *vma,
+static inline bool test_and_clear_young_ptes_notify(struct vm_area_struct *vma,
 		unsigned long addr, pte_t *ptep, unsigned int nr)
 {
-	int young;
+	bool young;
 
 	young = test_and_clear_young_ptes(vma, addr, ptep, nr);
 	young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + nr * PAGE_SIZE);
 	return young;
 }
 
-static inline int pmdp_test_and_clear_young_notify(struct vm_area_struct *vma,
+static inline bool pmdp_test_and_clear_young_notify(struct vm_area_struct *vma,
 		unsigned long addr, pmd_t *pmdp)
 {
-	int young;
+	bool young;
 
 	young = pmdp_test_and_clear_young(vma, addr, pmdp);
 	young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PMD_SIZE);
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 2502474b83b6..dc6f78d559f7 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -364,12 +364,12 @@ void __mmu_notifier_release(struct mm_struct *mm)
  * unmap the address and return 1 or 0 depending if the mapping previously
  * existed or not.
  */
-int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					unsigned long start,
-					unsigned long end)
+bool __mmu_notifier_clear_flush_young(struct mm_struct *mm,
+		unsigned long start, unsigned long end)
 {
 	struct mmu_notifier *subscription;
-	int young = 0, id;
+	bool young = false;
+	int id;
 
 	id = srcu_read_lock(&srcu);
 	hlist_for_each_entry_srcu(subscription,
@@ -384,12 +384,12 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	return young;
 }
 
-int __mmu_notifier_clear_young(struct mm_struct *mm,
-			       unsigned long start,
-			       unsigned long end)
+bool __mmu_notifier_clear_young(struct mm_struct *mm,
+		unsigned long start, unsigned long end)
 {
 	struct mmu_notifier *subscription;
-	int young = 0, id;
+	bool young = false;
+	int id;
 
 	id = srcu_read_lock(&srcu);
 	hlist_for_each_entry_srcu(subscription,
@@ -404,11 +404,12 @@ int __mmu_notifier_clear_young(struct mm_struct *mm,
 	return young;
 }
 
-int __mmu_notifier_test_young(struct mm_struct *mm,
-			      unsigned long address)
+bool __mmu_notifier_test_young(struct mm_struct *mm,
+		unsigned long address)
 {
 	struct mmu_notifier *subscription;
-	int young = 0, id;
+	bool young = false;
+	int id;
 
 	id = srcu_read_lock(&srcu);
 	hlist_for_each_entry_srcu(subscription,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d0ab29672c71..82433f46c438 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -646,11 +646,9 @@ mmu_unlock:
 	return r;
 }
 
-static __always_inline int kvm_age_hva_range(struct mmu_notifier *mn,
-						unsigned long start,
-						unsigned long end,
-						gfn_handler_t handler,
-						bool flush_on_ret)
+static __always_inline bool kvm_age_hva_range(struct mmu_notifier *mn,
+		unsigned long start, unsigned long end, gfn_handler_t handler,
+		bool flush_on_ret)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
 	const struct kvm_mmu_notifier_range range = {
@@ -666,10 +664,8 @@ static __always_inline int kvm_age_hva_range(struct mmu_notifier *mn,
 	return kvm_handle_hva_range(kvm, &range).ret;
 }
 
-static __always_inline int kvm_age_hva_range_no_flush(struct mmu_notifier *mn,
-						      unsigned long start,
-						      unsigned long end,
-						      gfn_handler_t handler)
+static __always_inline bool kvm_age_hva_range_no_flush(struct mmu_notifier *mn,
+		unsigned long start, unsigned long end, gfn_handler_t handler)
 {
 	return kvm_age_hva_range(mn, start, end, handler, false);
 }
@@ -829,10 +825,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
 		rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait);
 }
 
-static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
-					      struct mm_struct *mm,
-					      unsigned long start,
-					      unsigned long end)
+static bool kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
+		struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	trace_kvm_age_hva(start, end);
 
@@ -840,10 +834,8 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 				 !IS_ENABLED(CONFIG_KVM_ELIDE_TLB_FLUSH_IF_YOUNG));
 }
 
-static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
-					struct mm_struct *mm,
-					unsigned long start,
-					unsigned long end)
+static bool kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
+		struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	trace_kvm_age_hva(start, end);
 
@@ -863,9 +855,8 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
 	return kvm_age_hva_range_no_flush(mn, start, end, kvm_age_gfn);
 }
 
-static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
-				       struct mm_struct *mm,
-				       unsigned long address)
+static bool kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
+		struct mm_struct *mm, unsigned long address)
 {
 	trace_kvm_test_age_hva(address);
 
-- 
cgit v1.2.3


From 54fdcbfe1cbd1d8f06d0c57c8cc43ddcc1cd421c Mon Sep 17 00:00:00 2001
From: Ye Liu <liuye@kylinos.cn>
Date: Mon, 23 Mar 2026 17:03:04 +0800
Subject: mm: remove unused page_is_file_lru() function

The page_is_file_lru() wrapper function is no longer used.  The kernel has
moved to folio-based APIs, and all callers should use folio_is_file_lru()
instead.

Remove the obsolete page-based wrapper function.

Link: https://lkml.kernel.org/r/20260323090305.798057-1-ye.liu@linux.dev
Signed-off-by: Ye Liu <liuye@kylinos.cn>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_inline.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 2aedcff6a2c1..7fc2ced00f8f 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -30,11 +30,6 @@ static inline int folio_is_file_lru(const struct folio *folio)
 	return !folio_test_swapbacked(folio);
 }
 
-static inline int page_is_file_lru(struct page *page)
-{
-	return folio_is_file_lru(page_folio(page));
-}
-
 static __always_inline void __update_lru_size(struct lruvec *lruvec,
 				enum lru_list lru, enum zone_type zid,
 				long nr_pages)
-- 
cgit v1.2.3


From 4ff07459db888054f68575646d7fe04f31f1e56d Mon Sep 17 00:00:00 2001
From: Jinjiang Tu <tujinjiang@huawei.com>
Date: Thu, 19 Mar 2026 09:25:41 +0800
Subject: mm/huge_memory: fix folio isn't locked in softleaf_to_folio()

On arm64 server, we found folio that get from migration entry isn't locked
in softleaf_to_folio().  This issue triggers when mTHP splitting and
zap_nonpresent_ptes() races, and the root cause is lack of memory barrier
in softleaf_to_folio().  The race is as follows:

	CPU0                                             CPU1

deferred_split_scan()                              zap_nonpresent_ptes()
  lock folio
  split_folio()
    unmap_folio()
      change ptes to migration entries
    __split_folio_to_order()                         softleaf_to_folio()
      set flags(including PG_locked) for tail pages    folio = pfn_folio(softleaf_to_pfn(entry))
      smp_wmb()                                        VM_WARN_ON_ONCE(!folio_test_locked(folio))
      prep_compound_page() for tail pages

In __split_folio_to_order(), smp_wmb() guarantees page flags of tail pages
are visible before the tail page becomes non-compound.  smp_wmb() should
be paired with smp_rmb() in softleaf_to_folio(), which is missed.  As a
result, if zap_nonpresent_ptes() accesses migration entry that stores tail
pfn, softleaf_to_folio() may see the updated compound_head of tail page
before page->flags.

This issue will trigger VM_WARN_ON_ONCE() in pfn_swap_entry_folio()
because of the race between folio split and zap_nonpresent_ptes()
leading to a folio incorrectly undergoing modification without a folio
lock being held.

This is a BUG_ON() before commit 93976a20345b ("mm: eliminate further
swapops predicates"), which in merged in v6.19-rc1.

To fix it, add missing smp_rmb() if the softleaf entry is migration entry
in softleaf_to_folio() and softleaf_to_page().

[tujinjiang@huawei.com: update function name and comments]
  Link: https://lkml.kernel.org/r/20260321075214.3305564-1-tujinjiang@huawei.com
Link: https://lkml.kernel.org/r/20260319012541.4158561-1-tujinjiang@huawei.com
Fixes: e9b61f19858a ("thp: reintroduce split_huge_page()")
Signed-off-by: Jinjiang Tu <tujinjiang@huawei.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Barry Song <baohua@kernel.org>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nanyong Sun <sunnanyong@huawei.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/leafops.h | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/leafops.h b/include/linux/leafops.h
index a9ff94b744f2..05673d3529e7 100644
--- a/include/linux/leafops.h
+++ b/include/linux/leafops.h
@@ -363,6 +363,23 @@ static inline unsigned long softleaf_to_pfn(softleaf_t entry)
 	return swp_offset(entry) & SWP_PFN_MASK;
 }
 
+static inline void softleaf_migration_sync(softleaf_t entry,
+		struct folio *folio)
+{
+	/*
+	 * Ensure we do not race with split, which might alter tail pages into new
+	 * folios and thus result in observing an unlocked folio.
+	 * This matches the write barrier in __split_folio_to_order().
+	 */
+	smp_rmb();
+
+	/*
+	 * Any use of migration entries may only occur while the
+	 * corresponding page is locked
+	 */
+	VM_WARN_ON_ONCE(!folio_test_locked(folio));
+}
+
 /**
  * softleaf_to_page() - Obtains struct page for PFN encoded within leaf entry.
  * @entry: Leaf entry, softleaf_has_pfn(@entry) must return true.
@@ -374,11 +391,8 @@ static inline struct page *softleaf_to_page(softleaf_t entry)
 	struct page *page = pfn_to_page(softleaf_to_pfn(entry));
 
 	VM_WARN_ON_ONCE(!softleaf_has_pfn(entry));
-	/*
-	 * Any use of migration entries may only occur while the
-	 * corresponding page is locked
-	 */
-	VM_WARN_ON_ONCE(softleaf_is_migration(entry) && !PageLocked(page));
+	if (softleaf_is_migration(entry))
+		softleaf_migration_sync(entry, page_folio(page));
 
 	return page;
 }
@@ -394,12 +408,8 @@ static inline struct folio *softleaf_to_folio(softleaf_t entry)
 	struct folio *folio = pfn_folio(softleaf_to_pfn(entry));
 
 	VM_WARN_ON_ONCE(!softleaf_has_pfn(entry));
-	/*
-	 * Any use of migration entries may only occur while the
-	 * corresponding folio is locked.
-	 */
-	VM_WARN_ON_ONCE(softleaf_is_migration(entry) &&
-			!folio_test_locked(folio));
+	if (softleaf_is_migration(entry))
+		softleaf_migration_sync(entry, folio);
 
 	return folio;
 }
-- 
cgit v1.2.3


From 6bc0987d0b508b3768808efafa1e90041713526b Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 19:38:18 +0000
Subject: mm/vma: add vma_flags_empty(), vma_flags_and(), vma_flags_diff_pair()

Patch series "mm/vma: convert vm_flags_t to vma_flags_t in vma code", v4.

This series converts a lot of the existing use of the legacy vm_flags_t
data type to the new vma_flags_t type which replaces it.

In order to do so it adds a number of additional helpers:

* vma_flags_empty() - Determines whether a vma_flags_t value has no bits
  set.

* vma_flags_and() - Performs a bitwise AND between two vma_flags_t values.

* vma_flags_diff_pair() - Determines which flags are not shared between a
  pair of VMA flags (typically non-constant values)

* append_vma_flags() - Similar to mk_vma_flags(), but allows a vma_flags_t
  value to be specified (typically a constant value) which will be copied
  and appended to to create a new vma_flags_t value, with additional flags
  specified to append to it.

* vma_flags_same() - Determines if a vma_flags_t value is exactly equal to
  a set of VMA flags.

* vma_flags_same_mask() - Determines if a vma_flags_t value is eactly equal
  to another vma_flags_t value (typically constant).

* vma_flags_same_pair() - Determines if a pair of vma_flags_t values are
  exactly equal to one another (typically both non-constant).

* vma_flags_to_legacy() - Converts a vma_flags_t value to a vm_flags_t
  value, used to enable more iterative introduction of the use of
  vma_flags_t.

* legacy_to_vma_flags() - Converts a vm_flags_t value to a vma_flags-t
  value, for the same purpose.

* vma_flags_test_single_mask() - Tests whether a vma_flags_t value contain
  the single flag specified in an input vma_flags_t flag mask, or if that
  flag mask is empty, is defined to return false. Useful for
  config-predicated VMA flag mask defines.

* vma_test() - Tests whether a VMA's flags contain a specific singular VMA
  flag.

* vma_test_any() - Tests whether a VMA's flags contain any of a set of VMA
  flags.

* vma_test_any_mask() - Tests whether a VMA's flags contain any of the
  flags specified in another, typically constant, vma_flags_t value.

* vma_test_single_mask() - Tests whether a VMA's flags contain the single
  flag specified in an input vma_flags_t flag mask, or if that flag mask is
  empty, is defined to return false. Useful for config-predicated VMA flag
  mask defines.

* vma_clear_flags() - Clears a specific set of VMA flags from a vma_flags_t
  value.

* vma_clear_flags_mask() - Clears those flag set in a vma_flags_t value
  (typically constant) from a (typically not constant) vma_flags_t value.

The series mostly focuses on the the VMA specific code, especially that
contained in mm/vma.c and mm/vma.h.

It updates both brk() and mmap() logic to utils vma_flags_t values as much
as is practiaclly possible at this point, changing surrounding logic to be
able to do so.

It also updates the vma_modify_xxx() functions where they interact with VMA
flags directly to use vm_flags_t values where possible.

There is extensive testing added in the VMA userland tests to assert that
all of these new VMA flag functions work correctly.


This patch (of 25):

Firstly, add the ability to determine if VMA flags are empty, that is no
flags are set in a vma_flags_t value.

Next, add the ability to obtain the equivalent of the bitwise and of two
vma_flags_t values, via vma_flags_and_mask().

Next, add the ability to obtain the difference between two sets of VMA
flags, that is the equivalent to the exclusive bitwise OR of the two sets
of flags, via vma_flags_diff_pair().

vma_flags_xxx_mask() typically operates on a pointer to a vma_flags_t
value, which is assumed to be an lvalue of some kind (such as a field in a
struct or a stack variable) and an rvalue of some kind (typically a
constant set of VMA flags obtained e.g.  via mk_vma_flags() or
equivalent).

However vma_flags_diff_pair() is intended to operate on two lvalues, so
use the _pair() suffix to make this clear.

Finally, update VMA userland tests to add these helpers.

We also port bitmap_xor() and __bitmap_xor() to the tools/ headers and
source to allow the tests to work with vma_flags_diff_pair().

Link: https://lkml.kernel.org/r/cover.1774034900.git.ljs@kernel.org
Link: https://lkml.kernel.org/r/53ab55b7da91425775e42c03177498ad6de88ef4.1774034900.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h              | 60 ++++++++++++++++++++++++++++++++++-------
 include/linux/mm_types.h        |  8 ++++++
 tools/include/linux/bitmap.h    | 13 +++++++++
 tools/lib/bitmap.c              | 10 +++++++
 tools/testing/vma/include/dup.h | 36 ++++++++++++++++++++++++-
 5 files changed, 117 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 16a1ad9a3397..7954a7a2b811 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1048,6 +1048,19 @@ static __always_inline vma_flags_t __mk_vma_flags(size_t count,
 	return flags;
 }
 
+/*
+ * Helper macro which bitwise-or combines the specified input flags into a
+ * vma_flags_t bitmap value. E.g.:
+ *
+ * vma_flags_t flags = mk_vma_flags(VMA_IO_BIT, VMA_PFNMAP_BIT,
+ *              VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT);
+ *
+ * The compiler cleverly optimises away all of the work and this ends up being
+ * equivalent to aggregating the values manually.
+ */
+#define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \
+					 (const vma_flag_t []){__VA_ARGS__})
+
 /*
  * Test whether a specific VMA flag is set, e.g.:
  *
@@ -1062,17 +1075,30 @@ static __always_inline bool vma_flags_test(const vma_flags_t *flags,
 }
 
 /*
- * Helper macro which bitwise-or combines the specified input flags into a
- * vma_flags_t bitmap value. E.g.:
- *
- * vma_flags_t flags = mk_vma_flags(VMA_IO_BIT, VMA_PFNMAP_BIT,
- * 		VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT);
+ * Obtain a set of VMA flags which contain the overlapping flags contained
+ * within flags and to_and.
+ */
+static __always_inline vma_flags_t vma_flags_and_mask(const vma_flags_t *flags,
+						      vma_flags_t to_and)
+{
+	vma_flags_t dst;
+	unsigned long *bitmap_dst = dst.__vma_flags;
+	const unsigned long *bitmap = flags->__vma_flags;
+	const unsigned long *bitmap_to_and = to_and.__vma_flags;
+
+	bitmap_and(bitmap_dst, bitmap, bitmap_to_and, NUM_VMA_FLAG_BITS);
+	return dst;
+}
+
+/*
+ * Obtain a set of VMA flags which contains the specified overlapping flags,
+ * e.g.:
  *
- * The compiler cleverly optimises away all of the work and this ends up being
- * equivalent to aggregating the values manually.
+ * vma_flags_t read_flags = vma_flags_and(&flags, VMA_READ_BIT,
+ *                                        VMA_MAY_READ_BIT);
  */
-#define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \
-					 (const vma_flag_t []){__VA_ARGS__})
+#define vma_flags_and(flags, ...)				\
+	vma_flags_and_mask(flags, mk_vma_flags(__VA_ARGS__))
 
 /*  Test each of to_test flags in flags, non-atomically. */
 static __always_inline bool vma_flags_test_any_mask(const vma_flags_t *flags,
@@ -1146,6 +1172,22 @@ static __always_inline void vma_flags_clear_mask(vma_flags_t *flags,
 #define vma_flags_clear(flags, ...) \
 	vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__))
 
+/*
+ * Obtain a VMA flags value containing those flags that are present in flags or
+ * flags_other but not in both.
+ */
+static __always_inline vma_flags_t vma_flags_diff_pair(const vma_flags_t *flags,
+		const vma_flags_t *flags_other)
+{
+	vma_flags_t dst;
+	const unsigned long *bitmap_other = flags_other->__vma_flags;
+	const unsigned long *bitmap = flags->__vma_flags;
+	unsigned long *bitmap_dst = dst.__vma_flags;
+
+	bitmap_xor(bitmap_dst, bitmap, bitmap_other, NUM_VMA_FLAG_BITS);
+	return dst;
+}
+
 /*
  * Helper to test that ALL specified flags are set in a VMA.
  *
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f22aecb047b7..321aa150c1ee 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -870,6 +870,14 @@ typedef struct {
 
 #define EMPTY_VMA_FLAGS ((vma_flags_t){ })
 
+/* Are no flags set in the specified VMA flags? */
+static __always_inline bool vma_flags_empty(const vma_flags_t *flags)
+{
+	const unsigned long *bitmap = flags->__vma_flags;
+
+	return bitmap_empty(bitmap, NUM_VMA_FLAG_BITS);
+}
+
 /*
  * Describes a VMA that is about to be mmap()'ed. Drivers may choose to
  * manipulate mutable fields which will cause those fields to be updated in the
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index 250883090a5d..845eda759f67 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -28,6 +28,8 @@ bool __bitmap_subset(const unsigned long *bitmap1,
 		     const unsigned long *bitmap2, unsigned int nbits);
 bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 		    const unsigned long *bitmap2, unsigned int nbits);
+void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+		  const unsigned long *bitmap2, unsigned int nbits);
 
 #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
 #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
@@ -209,4 +211,15 @@ static inline void bitmap_clear(unsigned long *map, unsigned int start,
 	else
 		__bitmap_clear(map, start, nbits);
 }
+
+static __always_inline
+void bitmap_xor(unsigned long *dst, const unsigned long *src1,
+		const unsigned long *src2, unsigned int nbits)
+{
+	if (small_const_nbits(nbits))
+		*dst = *src1 ^ *src2;
+	else
+		__bitmap_xor(dst, src1, src2, nbits);
+}
+
 #endif /* _TOOLS_LINUX_BITMAP_H */
diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c
index aa83d22c45e3..fedc9070f0e4 100644
--- a/tools/lib/bitmap.c
+++ b/tools/lib/bitmap.c
@@ -169,3 +169,13 @@ bool __bitmap_subset(const unsigned long *bitmap1,
 			return false;
 	return true;
 }
+
+void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+				const unsigned long *bitmap2, unsigned int bits)
+{
+	unsigned int k;
+	unsigned int nr = BITS_TO_LONGS(bits);
+
+	for (k = 0; k < nr; k++)
+		dst[k] = bitmap1[k] ^ bitmap2[k];
+}
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 8865ffe046d8..8091a5caaeb8 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -422,6 +422,13 @@ struct vma_iterator {
 #define MAPCOUNT_ELF_CORE_MARGIN	(5)
 #define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
 
+static __always_inline bool vma_flags_empty(const vma_flags_t *flags)
+{
+	const unsigned long *bitmap = flags->__vma_flags;
+
+	return bitmap_empty(bitmap, NUM_VMA_FLAG_BITS);
+}
+
 /* What action should be taken after an .mmap_prepare call is complete? */
 enum mmap_action_type {
 	MMAP_NOTHING,		/* Mapping is complete, no further action. */
@@ -855,6 +862,21 @@ static __always_inline bool vma_flags_test(const vma_flags_t *flags,
 	return test_bit((__force int)bit, bitmap);
 }
 
+static __always_inline vma_flags_t vma_flags_and_mask(const vma_flags_t *flags,
+						      vma_flags_t to_and)
+{
+	vma_flags_t dst;
+	unsigned long *bitmap_dst = dst.__vma_flags;
+	const unsigned long *bitmap = flags->__vma_flags;
+	const unsigned long *bitmap_to_and = to_and.__vma_flags;
+
+	bitmap_and(bitmap_dst, bitmap, bitmap_to_and, NUM_VMA_FLAG_BITS);
+	return dst;
+}
+
+#define vma_flags_and(flags, ...)		\
+	vma_flags_and_mask(flags, mk_vma_flags(__VA_ARGS__))
+
 static __always_inline bool vma_flags_test_any_mask(const vma_flags_t *flags,
 		vma_flags_t to_test)
 {
@@ -901,8 +923,20 @@ static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t
 #define vma_flags_clear(flags, ...) \
 	vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__))
 
+static __always_inline vma_flags_t vma_flags_diff_pair(const vma_flags_t *flags,
+		const vma_flags_t *flags_other)
+{
+	vma_flags_t dst;
+	const unsigned long *bitmap_other = flags_other->__vma_flags;
+	const unsigned long *bitmap = flags->__vma_flags;
+	unsigned long *bitmap_dst = dst.__vma_flags;
+
+	bitmap_xor(bitmap_dst, bitmap, bitmap_other, NUM_VMA_FLAG_BITS);
+	return dst;
+}
+
 static inline bool vma_test_all_mask(const struct vm_area_struct *vma,
-					   vma_flags_t flags)
+				     vma_flags_t flags)
 {
 	return vma_flags_test_all_mask(&vma->flags, flags);
 }
-- 
cgit v1.2.3


From 8228e42b5f88aa68708ced277399ee3b59748627 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 19:38:20 +0000
Subject: mm/vma: add further vma_flags_t unions

In order to utilise the new vma_flags_t type, we currently place it in
union with legacy vm_flags fields of type vm_flags_t to make the
transition smoother.

Add vma_flags_t union entries for mm->def_flags and vmg->vm_flags -
mm->def_vma_flags and vmg->vma_flags respectively.

Once the conversion is complete, these will be replaced with vma_flags_t
entries alone.

Also update the VMA tests to reflect the change.

Link: https://lkml.kernel.org/r/d507d542c089ba132e9da53f2ff7f80ca117c3b4.1774034900.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h        | 6 +++++-
 mm/vma.h                        | 6 +++++-
 tools/testing/vma/include/dup.h | 5 ++++-
 3 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 321aa150c1ee..8ef84849953f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1249,7 +1249,11 @@ struct mm_struct {
 		unsigned long data_vm;	   /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
 		unsigned long exec_vm;	   /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
 		unsigned long stack_vm;	   /* VM_STACK */
-		vm_flags_t def_flags;
+		union {
+			/* Temporary while VMA flags are being converted. */
+			vm_flags_t def_flags;
+			vma_flags_t def_vma_flags;
+		};
 
 		/**
 		 * @write_protect_seq: Locked when any thread is write
diff --git a/mm/vma.h b/mm/vma.h
index eba388c61ef4..cf8926558bf6 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -98,7 +98,11 @@ struct vma_merge_struct {
 	unsigned long end;
 	pgoff_t pgoff;
 
-	vm_flags_t vm_flags;
+	union {
+		/* Temporary while VMA flags are being converted. */
+		vm_flags_t vm_flags;
+		vma_flags_t vma_flags;
+	};
 	struct file *file;
 	struct anon_vma *anon_vma;
 	struct mempolicy *policy;
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 8091a5caaeb8..58e063b1ee27 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -33,7 +33,10 @@ struct mm_struct {
 	unsigned long exec_vm;	   /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
 	unsigned long stack_vm;	   /* VM_STACK */
 
-	unsigned long def_flags;
+	union {
+		vm_flags_t def_flags;
+		vma_flags_t def_vma_flags;
+	};
 
 	mm_flags_t flags; /* Must use mm_flags_* helpers to access */
 };
-- 
cgit v1.2.3


From 7ec1885a7e283caaf6566aedc1eea5988d545f97 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 19:38:22 +0000
Subject: mm/vma: use new VMA flags for sticky flags logic

Use the new vma_flags_t flags implementation to perform the logic around
sticky flags and what flags are ignored on VMA merge.

We make use of the new vma_flags_empty(), vma_flags_diff_pair(), and
vma_flags_and_mask() functionality.

Also update the VMA tests accordingly.

Link: https://lkml.kernel.org/r/369574f06360ffa44707047e3b58eb4897345fba.1774034900.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h                 | 32 ++++++++++++++-----------
 mm/vma.c                           | 48 ++++++++++++++++++++++++++++----------
 tools/testing/vma/include/custom.h |  5 ----
 tools/testing/vma/include/dup.h    |  9 +++++--
 4 files changed, 62 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7954a7a2b811..d7e647e31742 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -540,6 +540,7 @@ enum {
 
 /* VMA basic access permission flags */
 #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
+#define VMA_ACCESS_FLAGS mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT)
 
 /*
  * Special vmas that are non-mergable, non-mlock()able.
@@ -585,27 +586,32 @@ enum {
  * possesses it but the other does not, the merged VMA should nonetheless have
  * applied to it:
  *
- *   VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its
- *                  references cleared via /proc/$pid/clear_refs, any merged VMA
- *                  should be considered soft-dirty also as it operates at a VMA
- *                  granularity.
+ *   VMA_SOFTDIRTY_BIT - if a VMA is marked soft-dirty, that is has not had its
+ *                       references cleared via /proc/$pid/clear_refs, any
+ *                       merged VMA should be considered soft-dirty also as it
+ *                       operates at a VMA granularity.
  *
- * VM_MAYBE_GUARD - If a VMA may have guard regions in place it implies that
- *                  mapped page tables may contain metadata not described by the
- *                  VMA and thus any merged VMA may also contain this metadata,
- *                  and thus we must make this flag sticky.
+ * VMA_MAYBE_GUARD_BIT - If a VMA may have guard regions in place it implies
+ *                       that mapped page tables may contain metadata not
+ *                       described by the VMA and thus any merged VMA may also
+ *                       contain this metadata, and thus we must make this flag
+ *                       sticky.
  */
-#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define VMA_STICKY_FLAGS mk_vma_flags(VMA_SOFTDIRTY_BIT, VMA_MAYBE_GUARD_BIT)
+#else
+#define VMA_STICKY_FLAGS mk_vma_flags(VMA_MAYBE_GUARD_BIT)
+#endif
 
 /*
  * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one
  * of these flags and the other not does not preclude a merge.
  *
- *    VM_STICKY - When merging VMAs, VMA flags must match, unless they are
- *                'sticky'. If any sticky flags exist in either VMA, we simply
- *                set all of them on the merged VMA.
+ *    VMA_STICKY_FLAGS - When merging VMAs, VMA flags must match, unless they
+ *                       are 'sticky'. If any sticky flags exist in either VMA,
+ *                       we simply set all of them on the merged VMA.
  */
-#define VM_IGNORE_MERGE VM_STICKY
+#define VMA_IGNORE_MERGE_FLAGS VMA_STICKY_FLAGS
 
 /*
  * Flags which should result in page tables being copied on fork. These are
diff --git a/mm/vma.c b/mm/vma.c
index 4d21e7d8e93c..6af26619e020 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -86,10 +86,15 @@ static bool vma_is_fork_child(struct vm_area_struct *vma)
 static inline bool is_mergeable_vma(struct vma_merge_struct *vmg, bool merge_next)
 {
 	struct vm_area_struct *vma = merge_next ? vmg->next : vmg->prev;
+	vma_flags_t diff;
 
 	if (!mpol_equal(vmg->policy, vma_policy(vma)))
 		return false;
-	if ((vma->vm_flags ^ vmg->vm_flags) & ~VM_IGNORE_MERGE)
+
+	diff = vma_flags_diff_pair(&vma->flags, &vmg->vma_flags);
+	vma_flags_clear_mask(&diff, VMA_IGNORE_MERGE_FLAGS);
+
+	if (!vma_flags_empty(&diff))
 		return false;
 	if (vma->vm_file != vmg->file)
 		return false;
@@ -805,7 +810,8 @@ static bool can_merge_remove_vma(struct vm_area_struct *vma)
 static __must_check struct vm_area_struct *vma_merge_existing_range(
 		struct vma_merge_struct *vmg)
 {
-	vm_flags_t sticky_flags = vmg->vm_flags & VM_STICKY;
+	vma_flags_t sticky_flags = vma_flags_and_mask(&vmg->vma_flags,
+						      VMA_STICKY_FLAGS);
 	struct vm_area_struct *middle = vmg->middle;
 	struct vm_area_struct *prev = vmg->prev;
 	struct vm_area_struct *next;
@@ -898,15 +904,22 @@ static __must_check struct vm_area_struct *vma_merge_existing_range(
 	vma_start_write(middle);
 
 	if (merge_right) {
+		vma_flags_t next_sticky;
+
 		vma_start_write(next);
 		vmg->target = next;
-		sticky_flags |= (next->vm_flags & VM_STICKY);
+		next_sticky = vma_flags_and_mask(&next->flags, VMA_STICKY_FLAGS);
+		vma_flags_set_mask(&sticky_flags, next_sticky);
 	}
 
 	if (merge_left) {
+		vma_flags_t prev_sticky;
+
 		vma_start_write(prev);
 		vmg->target = prev;
-		sticky_flags |= (prev->vm_flags & VM_STICKY);
+
+		prev_sticky = vma_flags_and_mask(&prev->flags, VMA_STICKY_FLAGS);
+		vma_flags_set_mask(&sticky_flags, prev_sticky);
 	}
 
 	if (merge_both) {
@@ -976,7 +989,7 @@ static __must_check struct vm_area_struct *vma_merge_existing_range(
 	if (err || commit_merge(vmg))
 		goto abort;
 
-	vm_flags_set(vmg->target, sticky_flags);
+	vma_set_flags_mask(vmg->target, sticky_flags);
 	khugepaged_enter_vma(vmg->target, vmg->vm_flags);
 	vmg->state = VMA_MERGE_SUCCESS;
 	return vmg->target;
@@ -1154,12 +1167,16 @@ int vma_expand(struct vma_merge_struct *vmg)
 	struct vm_area_struct *target = vmg->target;
 	struct vm_area_struct *next = vmg->next;
 	bool remove_next = false;
-	vm_flags_t sticky_flags;
+	vma_flags_t sticky_flags =
+		vma_flags_and_mask(&vmg->vma_flags, VMA_STICKY_FLAGS);
+	vma_flags_t target_sticky;
 	int ret = 0;
 
 	mmap_assert_write_locked(vmg->mm);
 	vma_start_write(target);
 
+	target_sticky = vma_flags_and_mask(&target->flags, VMA_STICKY_FLAGS);
+
 	if (next && target != next && vmg->end == next->vm_end)
 		remove_next = true;
 
@@ -1174,10 +1191,7 @@ int vma_expand(struct vma_merge_struct *vmg)
 	VM_WARN_ON_VMG(target->vm_start < vmg->start ||
 		       target->vm_end > vmg->end, vmg);
 
-	sticky_flags = vmg->vm_flags & VM_STICKY;
-	sticky_flags |= target->vm_flags & VM_STICKY;
-	if (remove_next)
-		sticky_flags |= next->vm_flags & VM_STICKY;
+	vma_flags_set_mask(&sticky_flags, target_sticky);
 
 	/*
 	 * If we are removing the next VMA or copying from a VMA
@@ -1194,13 +1208,18 @@ int vma_expand(struct vma_merge_struct *vmg)
 		return ret;
 
 	if (remove_next) {
+		vma_flags_t next_sticky;
+
 		vma_start_write(next);
 		vmg->__remove_next = true;
+
+		next_sticky = vma_flags_and_mask(&next->flags, VMA_STICKY_FLAGS);
+		vma_flags_set_mask(&sticky_flags, next_sticky);
 	}
 	if (commit_merge(vmg))
 		goto nomem;
 
-	vm_flags_set(target, sticky_flags);
+	vma_set_flags_mask(target, sticky_flags);
 	return 0;
 
 nomem:
@@ -1950,10 +1969,15 @@ out:
  */
 static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
 {
+	vma_flags_t diff = vma_flags_diff_pair(&a->flags, &b->flags);
+
+	vma_flags_clear_mask(&diff, VMA_ACCESS_FLAGS);
+	vma_flags_clear_mask(&diff, VMA_IGNORE_MERGE_FLAGS);
+
 	return a->vm_end == b->vm_start &&
 		mpol_equal(vma_policy(a), vma_policy(b)) &&
 		a->vm_file == b->vm_file &&
-		!((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_IGNORE_MERGE)) &&
+		vma_flags_empty(&diff) &&
 		b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
 }
 
diff --git a/tools/testing/vma/include/custom.h b/tools/testing/vma/include/custom.h
index 6200f938e586..7cdd0f60600a 100644
--- a/tools/testing/vma/include/custom.h
+++ b/tools/testing/vma/include/custom.h
@@ -134,8 +134,3 @@ static __always_inline bool vma_flags_same_mask(vma_flags_t *flags,
 	vma_flags_same_mask(flags, mk_vma_flags(__VA_ARGS__))
 #define VMA_SPECIAL_FLAGS mk_vma_flags(VMA_IO_BIT, VMA_DONTEXPAND_BIT, \
 				       VMA_PFNMAP_BIT, VMA_MIXEDMAP_BIT)
-#ifdef CONFIG_MEM_SOFT_DIRTY
-#define VMA_STICKY_FLAGS mk_vma_flags(VMA_SOFTDIRTY_BIT, VMA_MAYBE_GUARD_BIT)
-#else
-#define VMA_STICKY_FLAGS mk_vma_flags(VMA_MAYBE_GUARD_BIT)
-#endif
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 1dee78c34872..65134303b645 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -338,6 +338,7 @@ enum {
 
 /* VMA basic access permission flags */
 #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
+#define VMA_ACCESS_FLAGS mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT)
 
 /*
  * Special vmas that are non-mergable, non-mlock()able.
@@ -363,9 +364,13 @@ enum {
 
 #define CAP_IPC_LOCK         14
 
-#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define VMA_STICKY_FLAGS mk_vma_flags(VMA_SOFTDIRTY_BIT, VMA_MAYBE_GUARD_BIT)
+#else
+#define VMA_STICKY_FLAGS mk_vma_flags(VMA_MAYBE_GUARD_BIT)
+#endif
 
-#define VM_IGNORE_MERGE VM_STICKY
+#define VMA_IGNORE_MERGE_FLAGS VMA_STICKY_FLAGS
 
 #define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
 
-- 
cgit v1.2.3


From e8d464f4a94ccbcae8c9d3137ac5621b57ddd8a1 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 19:38:24 +0000
Subject: mm/vma: add append_vma_flags() helper

In order to be able to efficiently combine VMA flag masks with additional
VMA flag bits we need to extend the concept introduced in mk_vma_flags()
and __mk_vma_flags() by allowing the specification of a VMA flag mask to
append VMA flag bits to.

Update __mk_vma_flags() to allow for this and update mk_vma_flags()
accordingly, and also provide append_vma_flags() to allow for the caller
to specify which VMA flags mask to append to.

Finally, update the VMA flags tests to reflect the change.

Link: https://lkml.kernel.org/r/9f928cd4688270002f2c0c3777fcc9b49cc7a8ea.1774034900.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h              | 20 ++++++++++++++------
 tools/testing/vma/include/dup.h | 14 +++++++-------
 2 files changed, 21 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d7e647e31742..26cfb2fbe4db 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1042,13 +1042,11 @@ static __always_inline void vma_flags_set_flag(vma_flags_t *flags,
 	__set_bit((__force int)bit, bitmap);
 }
 
-static __always_inline vma_flags_t __mk_vma_flags(size_t count,
-		const vma_flag_t *bits)
+static __always_inline vma_flags_t __mk_vma_flags(vma_flags_t flags,
+		size_t count, const vma_flag_t *bits)
 {
-	vma_flags_t flags;
 	int i;
 
-	vma_flags_clear_all(&flags);
 	for (i = 0; i < count; i++)
 		vma_flags_set_flag(&flags, bits[i]);
 	return flags;
@@ -1064,8 +1062,18 @@ static __always_inline vma_flags_t __mk_vma_flags(size_t count,
  * The compiler cleverly optimises away all of the work and this ends up being
  * equivalent to aggregating the values manually.
  */
-#define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \
-					 (const vma_flag_t []){__VA_ARGS__})
+#define mk_vma_flags(...) __mk_vma_flags(EMPTY_VMA_FLAGS,			\
+		COUNT_ARGS(__VA_ARGS__), (const vma_flag_t []){__VA_ARGS__})
+
+/*
+ * Helper macro which acts like mk_vma_flags, only appending to a copy of the
+ * specified flags rather than establishing new flags. E.g.:
+ *
+ * vma_flags_t flags = append_vma_flags(VMA_STACK_DEFAULT_FLAGS, VMA_STACK_BIT,
+ *              VMA_ACCOUNT_BIT);
+ */
+#define append_vma_flags(flags, ...) __mk_vma_flags(flags,			\
+		COUNT_ARGS(__VA_ARGS__), (const vma_flag_t []){__VA_ARGS__})
 
 /*
  * Test whether a specific VMA flag is set, e.g.:
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 3005e33d1ede..a2f311b5ea82 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -854,21 +854,21 @@ static inline void vm_flags_clear(struct vm_area_struct *vma,
 	vma_flags_clear_word(&vma->flags, flags);
 }
 
-static __always_inline vma_flags_t __mk_vma_flags(size_t count,
-		const vma_flag_t *bits)
+static __always_inline vma_flags_t __mk_vma_flags(vma_flags_t flags,
+		size_t count, const vma_flag_t *bits)
 {
-	vma_flags_t flags;
 	int i;
 
-	vma_flags_clear_all(&flags);
 	for (i = 0; i < count; i++)
 		vma_flags_set_flag(&flags, bits[i]);
-
 	return flags;
 }
 
-#define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__),	\
-		(const vma_flag_t []){__VA_ARGS__})
+#define mk_vma_flags(...) __mk_vma_flags(EMPTY_VMA_FLAGS,			\
+		COUNT_ARGS(__VA_ARGS__), (const vma_flag_t []){__VA_ARGS__})
+
+#define append_vma_flags(flags, ...) __mk_vma_flags(flags,			\
+		COUNT_ARGS(__VA_ARGS__), (const vma_flag_t []){__VA_ARGS__})
 
 static __always_inline bool vma_flags_test(const vma_flags_t *flags,
 		vma_flag_t bit)
-- 
cgit v1.2.3


From 5fb55e951cf591c5e2d45273ceadbdcd0c44932c Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 19:38:26 +0000
Subject: mm: unexport vm_brk_flags() and eliminate vm_flags parameter

This function is only used by elf_load(), and that is a static function
that doesn't need an exported symbol to invoke an internal function, so
un-EXPORT_SYMBOLS() it.

Also, the vm_flags parameter is unnecessary, as we only ever set VM_EXEC,
so simply make this parameter a boolean.

While we're here, clean up the mm.h definitions for the various vm_xxx()
helpers so we actually specify parameter names and elide the redundant
extern's.

Link: https://lkml.kernel.org/r/7bada48ddf3f9dbd3e6c4fc50ec2f4de97706f52.1774034900.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/binfmt_elf.c    |  3 +--
 include/linux/mm.h | 12 ++++++------
 mm/mmap.c          |  8 ++------
 3 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fb857faaf0d6..16a56b6b3f6c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -453,14 +453,13 @@ static unsigned long elf_load(struct file *filep, unsigned long addr,
 		zero_end = ELF_PAGEALIGN(zero_end);
 
 		error = vm_brk_flags(zero_start, zero_end - zero_start,
-				     prot & PROT_EXEC ? VM_EXEC : 0);
+				     prot & PROT_EXEC);
 		if (error)
 			map_addr = error;
 	}
 	return map_addr;
 }
 
-
 static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
 {
 	elf_addr_t min_addr = -1;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 26cfb2fbe4db..5b85ffc2760c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3991,12 +3991,12 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {}
 #endif
 
 /* This takes the mm semaphore itself */
-extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long);
-extern int vm_munmap(unsigned long, size_t);
-extern unsigned long __must_check vm_mmap(struct file *, unsigned long,
-        unsigned long, unsigned long,
-        unsigned long, unsigned long);
-extern unsigned long __must_check vm_mmap_shadow_stack(unsigned long addr,
+int __must_check vm_brk_flags(unsigned long addr, unsigned long request, bool is_exec);
+int vm_munmap(unsigned long start, size_t len);
+unsigned long __must_check vm_mmap(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long prot,
+		unsigned long flag, unsigned long offset);
+unsigned long __must_check vm_mmap_shadow_stack(unsigned long addr,
 		unsigned long len, unsigned long flags);
 
 struct vm_unmapped_area_info {
diff --git a/mm/mmap.c b/mm/mmap.c
index 79544d893411..2d2b814978bf 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1201,8 +1201,9 @@ out:
 	return ret;
 }
 
-int vm_brk_flags(unsigned long addr, unsigned long request, vm_flags_t vm_flags)
+int vm_brk_flags(unsigned long addr, unsigned long request, bool is_exec)
 {
+	const vm_flags_t vm_flags = is_exec ? VM_EXEC : 0;
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma = NULL;
 	unsigned long len;
@@ -1217,10 +1218,6 @@ int vm_brk_flags(unsigned long addr, unsigned long request, vm_flags_t vm_flags)
 	if (!len)
 		return 0;
 
-	/* Until we need other flags, refuse anything except VM_EXEC. */
-	if ((vm_flags & (~VM_EXEC)) != 0)
-		return -EINVAL;
-
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
 
@@ -1246,7 +1243,6 @@ limits_failed:
 	mmap_write_unlock(mm);
 	return ret;
 }
-EXPORT_SYMBOL(vm_brk_flags);
 
 static
 unsigned long tear_down_vmas(struct mm_struct *mm, struct vma_iterator *vmi,
-- 
cgit v1.2.3


From 3ee584538259c356c66146ac46f2e4fd2ba28bee Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 19:38:27 +0000
Subject: mm/vma: introduce vma_flags_same[_mask/_pair]()

Add helpers to determine if two sets of VMA flags are precisely the same,
that is - that every flag set one is set in another, and neither contain
any flags not set in the other.

We also introduce vma_flags_same_pair() for cases where we want to compare
two sets of VMA flags which are both non-const values.

Also update the VMA tests to reflect the change, we already implicitly
test that this functions correctly having used it for testing purposes
previously.

Link: https://lkml.kernel.org/r/4f764bf619e77205837c7c819b62139ef6337ca3.1774034900.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h                 | 28 ++++++++++++++++++++++++++++
 tools/testing/vma/include/custom.h | 11 -----------
 tools/testing/vma/include/dup.h    | 21 +++++++++++++++++++++
 3 files changed, 49 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5b85ffc2760c..1f3e9100164d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1202,6 +1202,34 @@ static __always_inline vma_flags_t vma_flags_diff_pair(const vma_flags_t *flags,
 	return dst;
 }
 
+/* Determine if flags and flags_other have precisely the same flags set. */
+static __always_inline bool vma_flags_same_pair(const vma_flags_t *flags,
+						const vma_flags_t *flags_other)
+{
+	const unsigned long *bitmap = flags->__vma_flags;
+	const unsigned long *bitmap_other = flags_other->__vma_flags;
+
+	return bitmap_equal(bitmap, bitmap_other, NUM_VMA_FLAG_BITS);
+}
+
+/* Determine if flags and flags_other have precisely the same flags set.  */
+static __always_inline bool vma_flags_same_mask(const vma_flags_t *flags,
+						vma_flags_t flags_other)
+{
+	const unsigned long *bitmap = flags->__vma_flags;
+	const unsigned long *bitmap_other = flags_other.__vma_flags;
+
+	return bitmap_equal(bitmap, bitmap_other, NUM_VMA_FLAG_BITS);
+}
+
+/*
+ * Helper macro to determine if only the specific flags are set, e.g.:
+ *
+ * if (vma_flags_same(&flags, VMA_WRITE_BIT) { ... }
+ */
+#define vma_flags_same(flags, ...) \
+	vma_flags_same_mask(flags, mk_vma_flags(__VA_ARGS__))
+
 /*
  * Helper to test that ALL specified flags are set in a VMA.
  *
diff --git a/tools/testing/vma/include/custom.h b/tools/testing/vma/include/custom.h
index 8f33df02816a..2c498e713fbd 100644
--- a/tools/testing/vma/include/custom.h
+++ b/tools/testing/vma/include/custom.h
@@ -102,16 +102,5 @@ static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
 	return PAGE_SIZE;
 }
 
-/* Place here until needed in the kernel code. */
-static __always_inline bool vma_flags_same_mask(vma_flags_t *flags,
-						vma_flags_t flags_other)
-{
-	const unsigned long *bitmap = flags->__vma_flags;
-	const unsigned long *bitmap_other = flags_other.__vma_flags;
-
-	return bitmap_equal(bitmap, bitmap_other, NUM_VMA_FLAG_BITS);
-}
-#define vma_flags_same(flags, ...) \
-	vma_flags_same_mask(flags, mk_vma_flags(__VA_ARGS__))
 #define VMA_SPECIAL_FLAGS mk_vma_flags(VMA_IO_BIT, VMA_DONTEXPAND_BIT, \
 				       VMA_PFNMAP_BIT, VMA_MIXEDMAP_BIT)
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 802b3d97b627..65f630923461 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -954,6 +954,27 @@ static __always_inline vma_flags_t vma_flags_diff_pair(const vma_flags_t *flags,
 	return dst;
 }
 
+static __always_inline bool vma_flags_same_pair(const vma_flags_t *flags,
+						const vma_flags_t *flags_other)
+{
+	const unsigned long *bitmap = flags->__vma_flags;
+	const unsigned long *bitmap_other = flags_other->__vma_flags;
+
+	return bitmap_equal(bitmap, bitmap_other, NUM_VMA_FLAG_BITS);
+}
+
+static __always_inline bool vma_flags_same_mask(const vma_flags_t *flags,
+						vma_flags_t flags_other)
+{
+	const unsigned long *bitmap = flags->__vma_flags;
+	const unsigned long *bitmap_other = flags_other.__vma_flags;
+
+	return bitmap_equal(bitmap, bitmap_other, NUM_VMA_FLAG_BITS);
+}
+
+#define vma_flags_same(flags, ...) \
+	vma_flags_same_mask(flags, mk_vma_flags(__VA_ARGS__))
+
 static inline bool vma_test_all_mask(const struct vm_area_struct *vma,
 				     vma_flags_t flags)
 {
-- 
cgit v1.2.3


From c8555bc95d6222aa729b3a1195e07e566707ec02 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 19:38:28 +0000
Subject: mm/vma: introduce [vma_flags,legacy]_to_[legacy,vma_flags]() helpers

While we are still converting VMA flags from vma_flags_t to vm_flags_t,
introduce helpers to convert between the two to allow for iterative
development without having to 'change the world' in a single commit'.

Also update VMA flags tests to reflect the change.

Finally, refresh vma_flags_overwrite_word(),
vma_flag_overwrite_word_once(), vma_flags_set_word() and
vma_flags_clear_word() in the VMA tests to reflect current kernel
implementations - this should make no functional difference, but keeps the
logic consistent between the two.

Link: https://lkml.kernel.org/r/d3569470dbb3ae79134ca7c3eb3fc4df7086e874.1774034900.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h        | 26 ++++++++++++++++++++++++++
 tools/testing/vma/include/dup.h | 36 ++++++++++++++++++++++++++++++++----
 2 files changed, 58 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8ef84849953f..1da8fb04133f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1069,6 +1069,18 @@ static __always_inline void vma_flags_clear_all(vma_flags_t *flags)
 	bitmap_zero(flags->__vma_flags, NUM_VMA_FLAG_BITS);
 }
 
+/*
+ * Helper function which converts a vma_flags_t value to a legacy vm_flags_t
+ * value. This is only valid if the input flags value can be expressed in a
+ * system word.
+ *
+ * Will be removed once the conversion to VMA flags is complete.
+ */
+static __always_inline vm_flags_t vma_flags_to_legacy(vma_flags_t flags)
+{
+	return (vm_flags_t)flags.__vma_flags[0];
+}
+
 /*
  * Copy value to the first system word of VMA flags, non-atomically.
  *
@@ -1082,6 +1094,20 @@ static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long va
 	bitmap[0] = value;
 }
 
+/*
+ * Helper function which converts a legacy vm_flags_t value to a vma_flags_t
+ * value.
+ *
+ * Will be removed once the conversion to VMA flags is complete.
+ */
+static __always_inline vma_flags_t legacy_to_vma_flags(vm_flags_t flags)
+{
+	vma_flags_t ret = EMPTY_VMA_FLAGS;
+
+	vma_flags_overwrite_word(&ret, flags);
+	return ret;
+}
+
 /*
  * Copy value to the first system word of VMA flags ONCE, non-atomically.
  *
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 65f630923461..f49af21319ba 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -766,7 +766,9 @@ static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
  */
 static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
 {
-	*ACCESS_PRIVATE(flags, __vma_flags) = value;
+	unsigned long *bitmap = flags->__vma_flags;
+
+	bitmap[0] = value;
 }
 
 /*
@@ -777,7 +779,7 @@ static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long va
  */
 static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
 {
-	unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+	unsigned long *bitmap = flags->__vma_flags;
 
 	WRITE_ONCE(*bitmap, value);
 }
@@ -785,7 +787,7 @@ static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned lo
 /* Update the first system word of VMA flags setting bits, non-atomically. */
 static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
 {
-	unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+	unsigned long *bitmap = flags->__vma_flags;
 
 	*bitmap |= value;
 }
@@ -793,7 +795,7 @@ static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
 /* Update the first system word of VMA flags clearing bits, non-atomically. */
 static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
 {
-	unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+	unsigned long *bitmap = flags->__vma_flags;
 
 	*bitmap &= ~value;
 }
@@ -803,6 +805,32 @@ static __always_inline void vma_flags_clear_all(vma_flags_t *flags)
 	bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS);
 }
 
+/*
+ * Helper function which converts a vma_flags_t value to a legacy vm_flags_t
+ * value. This is only valid if the input flags value can be expressed in a
+ * system word.
+ *
+ * Will be removed once the conversion to VMA flags is complete.
+ */
+static __always_inline vm_flags_t vma_flags_to_legacy(vma_flags_t flags)
+{
+	return (vm_flags_t)flags.__vma_flags[0];
+}
+
+/*
+ * Helper function which converts a legacy vm_flags_t value to a vma_flags_t
+ * value.
+ *
+ * Will be removed once the conversion to VMA flags is complete.
+ */
+static __always_inline vma_flags_t legacy_to_vma_flags(vm_flags_t flags)
+{
+	vma_flags_t ret = EMPTY_VMA_FLAGS;
+
+	vma_flags_overwrite_word(&ret, flags);
+	return ret;
+}
+
 static __always_inline void vma_flags_set_flag(vma_flags_t *flags,
 		vma_flag_t bit)
 {
-- 
cgit v1.2.3


From fb67bba5d9b8561f433695c8916c097910193561 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 19:38:30 +0000
Subject: mm/vma: introduce vma_test[_any[_mask]](), and make inlining
 consistent

Introduce helper functions and macros to make it convenient to test flags
and flag masks for VMAs, specifically:

* vma_test() - determine if a single VMA flag is set in a VMA.
* vma_test_any_mask() - determine if any flags in a vma_flags_t value are
			set in a VMA.
* vma_test_any() - Helper macro to test if any of specific flags are set.

Also, there are a mix of 'inline's and '__always_inline's in VMA helper
function declarations, update to consistently use __always_inline.

Finally, update the VMA tests to reflect the changes.

Link: https://lkml.kernel.org/r/be1d71f08307d747a82232cbd8664a88c0f41419.1774034900.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h              | 49 +++++++++++++++++++++++++++------
 include/linux/mm_types.h        | 12 +++++---
 tools/testing/vma/include/dup.h | 61 +++++++++++++++++++++++++++--------------
 3 files changed, 88 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1f3e9100164d..f704d7cf2871 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -994,7 +994,8 @@ static inline void vm_flags_mod(struct vm_area_struct *vma,
 	__vm_flags_mod(vma, set, clear);
 }
 
-static inline bool __vma_atomic_valid_flag(struct vm_area_struct *vma, vma_flag_t bit)
+static __always_inline bool __vma_atomic_valid_flag(struct vm_area_struct *vma,
+		vma_flag_t bit)
 {
 	const vm_flags_t mask = BIT((__force int)bit);
 
@@ -1009,7 +1010,8 @@ static inline bool __vma_atomic_valid_flag(struct vm_area_struct *vma, vma_flag_
  * Set VMA flag atomically. Requires only VMA/mmap read lock. Only specific
  * valid flags are allowed to do this.
  */
-static inline void vma_set_atomic_flag(struct vm_area_struct *vma, vma_flag_t bit)
+static __always_inline void vma_set_atomic_flag(struct vm_area_struct *vma,
+		vma_flag_t bit)
 {
 	unsigned long *bitmap = vma->flags.__vma_flags;
 
@@ -1025,7 +1027,8 @@ static inline void vma_set_atomic_flag(struct vm_area_struct *vma, vma_flag_t bi
  * This is necessarily racey, so callers must ensure that serialisation is
  * achieved through some other means, or that races are permissible.
  */
-static inline bool vma_test_atomic_flag(struct vm_area_struct *vma, vma_flag_t bit)
+static __always_inline bool vma_test_atomic_flag(struct vm_area_struct *vma,
+		vma_flag_t bit)
 {
 	if (__vma_atomic_valid_flag(vma, bit))
 		return test_bit((__force int)bit, &vma->vm_flags);
@@ -1230,13 +1233,41 @@ static __always_inline bool vma_flags_same_mask(const vma_flags_t *flags,
 #define vma_flags_same(flags, ...) \
 	vma_flags_same_mask(flags, mk_vma_flags(__VA_ARGS__))
 
+/*
+ * Test whether a specific flag in the VMA is set, e.g.:
+ *
+ * if (vma_test(vma, VMA_READ_BIT)) { ... }
+ */
+static __always_inline bool vma_test(const struct vm_area_struct *vma,
+		vma_flag_t bit)
+{
+	return vma_flags_test(&vma->flags, bit);
+}
+
+/* Helper to test any VMA flags in a VMA . */
+static __always_inline bool vma_test_any_mask(const struct vm_area_struct *vma,
+		vma_flags_t flags)
+{
+	return vma_flags_test_any_mask(&vma->flags, flags);
+}
+
+/*
+ * Helper macro for testing whether any VMA flags are set in a VMA,
+ * e.g.:
+ *
+ * if (vma_test_any(vma, VMA_IO_BIT, VMA_PFNMAP_BIT,
+ *		VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT)) { ... }
+ */
+#define vma_test_any(vma, ...) \
+	vma_test_any_mask(vma, mk_vma_flags(__VA_ARGS__))
+
 /*
  * Helper to test that ALL specified flags are set in a VMA.
  *
  * Note: appropriate locks must be held, this function does not acquire them for
  * you.
  */
-static inline bool vma_test_all_mask(const struct vm_area_struct *vma,
+static __always_inline bool vma_test_all_mask(const struct vm_area_struct *vma,
 		vma_flags_t flags)
 {
 	return vma_flags_test_all_mask(&vma->flags, flags);
@@ -1256,7 +1287,7 @@ static inline bool vma_test_all_mask(const struct vm_area_struct *vma,
  * Note: appropriate locks must be held, this function does not acquire them for
  * you.
  */
-static inline void vma_set_flags_mask(struct vm_area_struct *vma,
+static __always_inline void vma_set_flags_mask(struct vm_area_struct *vma,
 		vma_flags_t flags)
 {
 	vma_flags_set_mask(&vma->flags, flags);
@@ -1286,7 +1317,7 @@ static __always_inline bool vma_desc_test(const struct vm_area_desc *desc,
 }
 
 /* Helper to test any VMA flags in a VMA descriptor. */
-static inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
+static __always_inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
 		vma_flags_t flags)
 {
 	return vma_flags_test_any_mask(&desc->vma_flags, flags);
@@ -1303,7 +1334,7 @@ static inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
 	vma_desc_test_any_mask(desc, mk_vma_flags(__VA_ARGS__))
 
 /* Helper to test all VMA flags in a VMA descriptor. */
-static inline bool vma_desc_test_all_mask(const struct vm_area_desc *desc,
+static __always_inline bool vma_desc_test_all_mask(const struct vm_area_desc *desc,
 		vma_flags_t flags)
 {
 	return vma_flags_test_all_mask(&desc->vma_flags, flags);
@@ -1319,7 +1350,7 @@ static inline bool vma_desc_test_all_mask(const struct vm_area_desc *desc,
 	vma_desc_test_all_mask(desc, mk_vma_flags(__VA_ARGS__))
 
 /* Helper to set all VMA flags in a VMA descriptor. */
-static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
+static __always_inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
 		vma_flags_t flags)
 {
 	vma_flags_set_mask(&desc->vma_flags, flags);
@@ -1336,7 +1367,7 @@ static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
 	vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
 
 /* Helper to clear all VMA flags in a VMA descriptor. */
-static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
+static __always_inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
 		vma_flags_t flags)
 {
 	vma_flags_clear_mask(&desc->vma_flags, flags);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 1da8fb04133f..38fe6b915024 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1087,7 +1087,8 @@ static __always_inline vm_flags_t vma_flags_to_legacy(vma_flags_t flags)
  * IMPORTANT: This does not overwrite bytes past the first system word. The
  * caller must account for this.
  */
-static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
+static __always_inline void vma_flags_overwrite_word(vma_flags_t *flags,
+		unsigned long value)
 {
 	unsigned long *bitmap = flags->__vma_flags;
 
@@ -1114,7 +1115,8 @@ static __always_inline vma_flags_t legacy_to_vma_flags(vm_flags_t flags)
  * IMPORTANT: This does not overwrite bytes past the first system word. The
  * caller must account for this.
  */
-static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
+static __always_inline void vma_flags_overwrite_word_once(vma_flags_t *flags,
+		unsigned long value)
 {
 	unsigned long *bitmap = flags->__vma_flags;
 
@@ -1122,7 +1124,8 @@ static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned lo
 }
 
 /* Update the first system word of VMA flags setting bits, non-atomically. */
-static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
+static __always_inline void vma_flags_set_word(vma_flags_t *flags,
+		unsigned long value)
 {
 	unsigned long *bitmap = flags->__vma_flags;
 
@@ -1130,7 +1133,8 @@ static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
 }
 
 /* Update the first system word of VMA flags clearing bits, non-atomically. */
-static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
+static __always_inline void vma_flags_clear_word(vma_flags_t *flags,
+		unsigned long value)
 {
 	unsigned long *bitmap = flags->__vma_flags;
 
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index f49af21319ba..f9fe07a8a443 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -764,7 +764,8 @@ static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
  * IMPORTANT: This does not overwrite bytes past the first system word. The
  * caller must account for this.
  */
-static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
+static __always_inline void vma_flags_overwrite_word(vma_flags_t *flags,
+		unsigned long value)
 {
 	unsigned long *bitmap = flags->__vma_flags;
 
@@ -777,7 +778,8 @@ static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long va
  * IMPORTANT: This does not overwrite bytes past the first system word. The
  * caller must account for this.
  */
-static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
+static __always_inline void vma_flags_overwrite_word_once(vma_flags_t *flags,
+		unsigned long value)
 {
 	unsigned long *bitmap = flags->__vma_flags;
 
@@ -785,7 +787,8 @@ static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned lo
 }
 
 /* Update the first system word of VMA flags setting bits, non-atomically. */
-static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
+static __always_inline void vma_flags_set_word(vma_flags_t *flags,
+		unsigned long value)
 {
 	unsigned long *bitmap = flags->__vma_flags;
 
@@ -793,7 +796,8 @@ static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
 }
 
 /* Update the first system word of VMA flags clearing bits, non-atomically. */
-static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
+static __always_inline void vma_flags_clear_word(vma_flags_t *flags,
+		unsigned long value)
 {
 	unsigned long *bitmap = flags->__vma_flags;
 
@@ -1003,23 +1007,32 @@ static __always_inline bool vma_flags_same_mask(const vma_flags_t *flags,
 #define vma_flags_same(flags, ...) \
 	vma_flags_same_mask(flags, mk_vma_flags(__VA_ARGS__))
 
-static inline bool vma_test_all_mask(const struct vm_area_struct *vma,
-				     vma_flags_t flags)
+static __always_inline bool vma_test(const struct vm_area_struct *vma,
+		vma_flag_t bit)
 {
-	return vma_flags_test_all_mask(&vma->flags, flags);
+	return vma_flags_test(&vma->flags, bit);
 }
 
-#define vma_test_all(vma, ...) \
-	vma_test_all_mask(vma, mk_vma_flags(__VA_ARGS__))
+static __always_inline bool vma_test_any_mask(const struct vm_area_struct *vma,
+		vma_flags_t flags)
+{
+	return vma_flags_test_any_mask(&vma->flags, flags);
+}
 
-static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags)
+#define vma_test_any(vma, ...) \
+	vma_test_any_mask(vma, mk_vma_flags(__VA_ARGS__))
+
+static __always_inline bool vma_test_all_mask(const struct vm_area_struct *vma,
+		vma_flags_t flags)
 {
-	return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
-		(VM_SHARED | VM_MAYWRITE);
+	return vma_flags_test_all_mask(&vma->flags, flags);
 }
 
-static inline void vma_set_flags_mask(struct vm_area_struct *vma,
-				      vma_flags_t flags)
+#define vma_test_all(vma, ...) \
+	vma_test_all_mask(vma, mk_vma_flags(__VA_ARGS__))
+
+static __always_inline void vma_set_flags_mask(struct vm_area_struct *vma,
+		vma_flags_t flags)
 {
 	vma_flags_set_mask(&vma->flags, flags);
 }
@@ -1033,8 +1046,8 @@ static __always_inline bool vma_desc_test(const struct vm_area_desc *desc,
 	return vma_flags_test(&desc->vma_flags, bit);
 }
 
-static inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
-					    vma_flags_t flags)
+static __always_inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
+		vma_flags_t flags)
 {
 	return vma_flags_test_any_mask(&desc->vma_flags, flags);
 }
@@ -1042,7 +1055,7 @@ static inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
 #define vma_desc_test_any(desc, ...) \
 	vma_desc_test_any_mask(desc, mk_vma_flags(__VA_ARGS__))
 
-static inline bool vma_desc_test_all_mask(const struct vm_area_desc *desc,
+static __always_inline bool vma_desc_test_all_mask(const struct vm_area_desc *desc,
 		vma_flags_t flags)
 {
 	return vma_flags_test_all_mask(&desc->vma_flags, flags);
@@ -1051,8 +1064,8 @@ static inline bool vma_desc_test_all_mask(const struct vm_area_desc *desc,
 #define vma_desc_test_all(desc, ...) \
 	vma_desc_test_all_mask(desc, mk_vma_flags(__VA_ARGS__))
 
-static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
-					   vma_flags_t flags)
+static __always_inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
+		vma_flags_t flags)
 {
 	vma_flags_set_mask(&desc->vma_flags, flags);
 }
@@ -1060,8 +1073,8 @@ static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
 #define vma_desc_set_flags(desc, ...) \
 	vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
 
-static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
-					     vma_flags_t flags)
+static __always_inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
+		vma_flags_t flags)
 {
 	vma_flags_clear_mask(&desc->vma_flags, flags);
 }
@@ -1069,6 +1082,12 @@ static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
 #define vma_desc_clear_flags(desc, ...) \
 	vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
 
+static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags)
+{
+	return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
+		(VM_SHARED | VM_MAYWRITE);
+}
+
 static inline bool is_shared_maywrite(const vma_flags_t *flags)
 {
 	return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT);
-- 
cgit v1.2.3


From e79d1c500f52506b9eab39e81017e30b76f2864d Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 19:38:32 +0000
Subject: mm: introduce vma_flags_count() and vma[_flags]_test_single_mask()

vma_flags_count() determines how many bits are set in VMA flags, using
bitmap_weight().

vma_flags_test_single_mask() determines if a vma_flags_t set of flags
contains a single flag specified as another vma_flags_t value, or if the
sought flag mask is empty, it is defined to return false.

This is useful when we want to declare a VMA flag as optionally a single
flag in a mask or empty depending on kernel configuration.

This allows us to have VM_NONE-like semantics when checking whether the
flag is set.

In a subsequent patch, we introduce the use of VMA_DROPPABLE of type
vma_flags_t using precisely these semantics.

It would be actively confusing to use vma_flags_test_any_single_mask() for
this (and vma_flags_test_all_mask() is not correct to use here, as it
trivially returns true when tested against an empty vma flags mask).

We introduce vma_flags_count() to be able to assert that the compared flag
mask is singular or empty, checked when CONFIG_DEBUG_VM is enabled.

Also update the VMA tests as part of this change.

Link: https://lkml.kernel.org/r/cd778dd02b9f2a01eb54d25a49dea8ec2ddf7753.1774034900.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h                 | 46 ++++++++++++++++++++++++++++++++++++++
 tools/testing/vma/include/custom.h |  6 -----
 tools/testing/vma/include/dup.h    | 21 +++++++++++++++++
 tools/testing/vma/vma_internal.h   |  6 +++++
 4 files changed, 73 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f704d7cf2871..de72382efac2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1078,6 +1078,14 @@ static __always_inline vma_flags_t __mk_vma_flags(vma_flags_t flags,
 #define append_vma_flags(flags, ...) __mk_vma_flags(flags,			\
 		COUNT_ARGS(__VA_ARGS__), (const vma_flag_t []){__VA_ARGS__})
 
+/* Calculates the number of set bits in the specified VMA flags. */
+static __always_inline int vma_flags_count(const vma_flags_t *flags)
+{
+	const unsigned long *bitmap = flags->__vma_flags;
+
+	return bitmap_weight(bitmap, NUM_VMA_FLAG_BITS);
+}
+
 /*
  * Test whether a specific VMA flag is set, e.g.:
  *
@@ -1153,6 +1161,26 @@ static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags,
 #define vma_flags_test_all(flags, ...) \
 	vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__))
 
+/*
+ * Helper to test that a flag mask of type vma_flags_t has a SINGLE flag set
+ * (returning false if flagmask has no flags set).
+ *
+ * This is defined to make the semantics clearer when testing an optionally
+ * defined VMA flags mask, e.g.:
+ *
+ * if (vma_flags_test_single_mask(&flags, VMA_DROPPABLE)) { ... }
+ *
+ * When VMA_DROPPABLE is defined if available, or set to EMPTY_VMA_FLAGS
+ * otherwise.
+ */
+static __always_inline bool vma_flags_test_single_mask(const vma_flags_t *flags,
+		vma_flags_t flagmask)
+{
+	VM_WARN_ON_ONCE(vma_flags_count(&flagmask) > 1);
+
+	return vma_flags_test_any_mask(flags, flagmask);
+}
+
 /* Set each of the to_set flags in flags, non-atomically. */
 static __always_inline void vma_flags_set_mask(vma_flags_t *flags,
 		vma_flags_t to_set)
@@ -1281,6 +1309,24 @@ static __always_inline bool vma_test_all_mask(const struct vm_area_struct *vma,
 #define vma_test_all(vma, ...) \
 	vma_test_all_mask(vma, mk_vma_flags(__VA_ARGS__))
 
+/*
+ * Helper to test that a flag mask of type vma_flags_t has a SINGLE flag set
+ * (returning false if flagmask has no flags set).
+ *
+ * This is useful when a flag needs to be either defined or not depending upon
+ * kernel configuration, e.g.:
+ *
+ * if (vma_test_single_mask(vma, VMA_DROPPABLE)) { ... }
+ *
+ * When VMA_DROPPABLE is defined if available, or set to EMPTY_VMA_FLAGS
+ * otherwise.
+ */
+static __always_inline bool
+vma_test_single_mask(const struct vm_area_struct *vma, vma_flags_t flagmask)
+{
+	return vma_flags_test_single_mask(&vma->flags, flagmask);
+}
+
 /*
  * Helper to set all VMA flags in a VMA.
  *
diff --git a/tools/testing/vma/include/custom.h b/tools/testing/vma/include/custom.h
index 2c498e713fbd..b7d9eb0a44e4 100644
--- a/tools/testing/vma/include/custom.h
+++ b/tools/testing/vma/include/custom.h
@@ -15,12 +15,6 @@ extern unsigned long dac_mmap_min_addr;
 #define dac_mmap_min_addr	0UL
 #endif
 
-#define VM_WARN_ON(_expr) (WARN_ON(_expr))
-#define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr))
-#define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr))
-#define VM_BUG_ON(_expr) (BUG_ON(_expr))
-#define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
-
 #define TASK_SIZE ((1ul << 47)-PAGE_SIZE)
 
 /*
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index f9fe07a8a443..244ee02dc21d 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -905,6 +905,13 @@ static __always_inline vma_flags_t __mk_vma_flags(vma_flags_t flags,
 #define append_vma_flags(flags, ...) __mk_vma_flags(flags,			\
 		COUNT_ARGS(__VA_ARGS__), (const vma_flag_t []){__VA_ARGS__})
 
+static __always_inline int vma_flags_count(const vma_flags_t *flags)
+{
+	const unsigned long *bitmap = flags->__vma_flags;
+
+	return bitmap_weight(bitmap, NUM_VMA_FLAG_BITS);
+}
+
 static __always_inline bool vma_flags_test(const vma_flags_t *flags,
 		vma_flag_t bit)
 {
@@ -952,6 +959,14 @@ static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags,
 #define vma_flags_test_all(flags, ...) \
 	vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__))
 
+static __always_inline bool vma_flags_test_single_mask(const vma_flags_t *flags,
+						vma_flags_t flagmask)
+{
+	VM_WARN_ON_ONCE(vma_flags_count(&flagmask) > 1);
+
+	return vma_flags_test_any_mask(flags, flagmask);
+}
+
 static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set)
 {
 	unsigned long *bitmap = flags->__vma_flags;
@@ -1031,6 +1046,12 @@ static __always_inline bool vma_test_all_mask(const struct vm_area_struct *vma,
 #define vma_test_all(vma, ...) \
 	vma_test_all_mask(vma, mk_vma_flags(__VA_ARGS__))
 
+static __always_inline bool
+vma_test_single_mask(const struct vm_area_struct *vma, vma_flags_t flagmask)
+{
+	return vma_flags_test_single_mask(&vma->flags, flagmask);
+}
+
 static __always_inline void vma_set_flags_mask(struct vm_area_struct *vma,
 		vma_flags_t flags)
 {
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 0e1121e2ef23..e12ab2c80f95 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -51,6 +51,12 @@ typedef unsigned long	pgprotval_t;
 typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
 typedef __bitwise unsigned int vm_fault_t;
 
+#define VM_WARN_ON(_expr) (WARN_ON(_expr))
+#define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr))
+#define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr))
+#define VM_BUG_ON(_expr) (BUG_ON(_expr))
+#define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
+
 #include "include/stubs.h"
 #include "include/dup.h"
 #include "include/custom.h"
-- 
cgit v1.2.3


From 3a6455d56bd7c4cfb1ea35ddae052943065e338e Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 19:38:34 +0000
Subject: mm: convert do_brk_flags() to use vma_flags_t

In order to be able to do this, we need to change VM_DATA_DEFAULT_FLAGS
and friends and update the architecture-specific definitions also.

We then have to update some KSM logic to handle VMA flags, and introduce
VMA_STACK_FLAGS to define the vma_flags_t equivalent of VM_STACK_FLAGS.

We also introduce two helper functions for use during the time we are
converting legacy flags to vma_flags_t values - vma_flags_to_legacy() and
legacy_to_vma_flags().

This enables us to iteratively make changes to break these changes up into
separate parts.

We use these explicitly here to keep VM_STACK_FLAGS around for certain
users which need to maintain the legacy vm_flags_t values for the time
being.

We are no longer able to rely on the simple VM_xxx being set to zero if
the feature is not enabled, so in the case of VM_DROPPABLE we introduce
VMA_DROPPABLE as the vma_flags_t equivalent, which is set to
EMPTY_VMA_FLAGS if the droppable flag is not available.

While we're here, we make the description of do_brk_flags() into a kdoc
comment, as it almost was already.

We use vma_flags_to_legacy() to not need to update the vm_get_page_prot()
logic as this time.

Note that in create_init_stack_vma() we have to replace the BUILD_BUG_ON()
with a VM_WARN_ON_ONCE() as the tested values are no longer build time
available.

We also update mprotect_fixup() to use VMA flags where possible, though we
have to live with a little duplication between vm_flags_t and vma_flags_t
values for the time being until further conversions are made.

While we're here, update VM_SPECIAL to be defined in terms of
VMA_SPECIAL_FLAGS now we have vma_flags_to_legacy().

Finally, we update the VMA tests to reflect these changes.

Link: https://lkml.kernel.org/r/d02e3e45d9a33d7904b149f5604904089fd640ae.1774034900.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>	[SELinux]
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arc/include/asm/page.h        |  2 +-
 arch/arm/include/asm/page.h        |  2 +-
 arch/arm64/include/asm/page.h      |  7 +++++-
 arch/hexagon/include/asm/page.h    |  2 +-
 arch/loongarch/include/asm/page.h  |  2 +-
 arch/mips/include/asm/page.h       |  2 +-
 arch/nios2/include/asm/page.h      |  2 +-
 arch/powerpc/include/asm/page.h    |  4 ++--
 arch/powerpc/include/asm/page_32.h |  2 +-
 arch/powerpc/include/asm/page_64.h | 12 +++++-----
 arch/riscv/include/asm/page.h      |  2 +-
 arch/s390/include/asm/page.h       |  2 +-
 arch/x86/include/asm/page_types.h  |  2 +-
 arch/x86/um/asm/vm-flags.h         |  4 ++--
 include/linux/ksm.h                | 10 ++++----
 include/linux/mm.h                 | 49 +++++++++++++++++++++++---------------
 mm/internal.h                      |  3 +++
 mm/ksm.c                           | 43 +++++++++++++++++----------------
 mm/mmap.c                          | 13 ++++++----
 mm/mprotect.c                      | 46 +++++++++++++++++++++--------------
 mm/mremap.c                        |  6 ++---
 mm/vma.c                           | 34 ++++++++++++++------------
 mm/vma.h                           | 14 ++++++++---
 mm/vma_exec.c                      |  5 ++--
 security/selinux/hooks.c           |  4 +++-
 tools/testing/vma/include/custom.h |  3 ---
 tools/testing/vma/include/dup.h    | 42 +++++++++++++++++---------------
 tools/testing/vma/include/stubs.h  |  9 +++----
 tools/testing/vma/tests/merge.c    |  3 +--
 29 files changed, 191 insertions(+), 140 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 38214e126c6d..facc7a03b250 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -131,7 +131,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
 #define virt_addr_valid(kaddr)  pfn_valid(virt_to_pfn(kaddr))
 
 /* Default Permissions for stack/heaps pages (Non Executable) */
-#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_NON_EXEC
+#define VMA_DATA_DEFAULT_FLAGS	VMA_DATA_FLAGS_NON_EXEC
 
 #define WANT_PAGE_VIRTUAL   1
 
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index ef11b721230e..fa4c1225dde5 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -184,7 +184,7 @@ extern int pfn_valid(unsigned long);
 
 #include <asm/memory.h>
 
-#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_TSK_EXEC
+#define VMA_DATA_DEFAULT_FLAGS	VMA_DATA_FLAGS_TSK_EXEC
 
 #include <asm-generic/getorder.h>
 #include <asm-generic/memory_model.h>
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index b39cc1127e1f..e25d0d18f6d7 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -46,7 +46,12 @@ int pfn_is_map_memory(unsigned long pfn);
 
 #endif /* !__ASSEMBLER__ */
 
-#define VM_DATA_DEFAULT_FLAGS	(VM_DATA_FLAGS_TSK_EXEC | VM_MTE_ALLOWED)
+#ifdef CONFIG_ARM64_MTE
+#define VMA_DATA_DEFAULT_FLAGS	append_vma_flags(VMA_DATA_FLAGS_TSK_EXEC, \
+						 VMA_MTE_ALLOWED_BIT)
+#else
+#define VMA_DATA_DEFAULT_FLAGS	VMA_DATA_FLAGS_TSK_EXEC
+#endif
 
 #include <asm-generic/getorder.h>
 
diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h
index f0aed3ed812b..6d82572a7f21 100644
--- a/arch/hexagon/include/asm/page.h
+++ b/arch/hexagon/include/asm/page.h
@@ -90,7 +90,7 @@ struct page;
 #define virt_to_page(kaddr) pfn_to_page(PFN_DOWN(__pa(kaddr)))
 
 /* Default vm area behavior is non-executable.  */
-#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_NON_EXEC
+#define VMA_DATA_DEFAULT_FLAGS	VMA_DATA_FLAGS_NON_EXEC
 
 #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 
diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h
index 327bf0bc92bf..79235f4fc399 100644
--- a/arch/loongarch/include/asm/page.h
+++ b/arch/loongarch/include/asm/page.h
@@ -104,7 +104,7 @@ struct page *tlb_virt_to_page(unsigned long kaddr);
 extern int __virt_addr_valid(volatile void *kaddr);
 #define virt_addr_valid(kaddr)	__virt_addr_valid((volatile void *)(kaddr))
 
-#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_TSK_EXEC
+#define VMA_DATA_DEFAULT_FLAGS	VMA_DATA_FLAGS_TSK_EXEC
 
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index 5ec428fcc887..50a382a0d8f6 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -213,7 +213,7 @@ extern bool __virt_addr_valid(const volatile void *kaddr);
 #define virt_addr_valid(kaddr)						\
 	__virt_addr_valid((const volatile void *) (kaddr))
 
-#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_TSK_EXEC
+#define VMA_DATA_DEFAULT_FLAGS	VMA_DATA_FLAGS_TSK_EXEC
 
 extern unsigned long __kaslr_offset;
 static inline unsigned long kaslr_offset(void)
diff --git a/arch/nios2/include/asm/page.h b/arch/nios2/include/asm/page.h
index 722956ac0bf8..71eb7c1b67d4 100644
--- a/arch/nios2/include/asm/page.h
+++ b/arch/nios2/include/asm/page.h
@@ -85,7 +85,7 @@ extern struct page *mem_map;
 # define virt_to_page(vaddr)	pfn_to_page(PFN_DOWN(virt_to_phys(vaddr)))
 # define virt_addr_valid(vaddr)	pfn_valid(PFN_DOWN(virt_to_phys(vaddr)))
 
-# define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_NON_EXEC
+# define VMA_DATA_DEFAULT_FLAGS	VMA_DATA_FLAGS_NON_EXEC
 
 #include <asm-generic/memory_model.h>
 
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index f2bb1f98eebe..281f25e071a3 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -240,8 +240,8 @@ static inline const void *pfn_to_kaddr(unsigned long pfn)
  * and needs to be executable.  This means the whole heap ends
  * up being executable.
  */
-#define VM_DATA_DEFAULT_FLAGS32	VM_DATA_FLAGS_TSK_EXEC
-#define VM_DATA_DEFAULT_FLAGS64	VM_DATA_FLAGS_NON_EXEC
+#define VMA_DATA_DEFAULT_FLAGS32	VMA_DATA_FLAGS_TSK_EXEC
+#define VMA_DATA_DEFAULT_FLAGS64	VMA_DATA_FLAGS_NON_EXEC
 
 #ifdef __powerpc64__
 #include <asm/page_64.h>
diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
index 25482405a811..1fd8c21f0a42 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -10,7 +10,7 @@
 #endif
 #endif
 
-#define VM_DATA_DEFAULT_FLAGS	VM_DATA_DEFAULT_FLAGS32
+#define VMA_DATA_DEFAULT_FLAGS	VMA_DATA_DEFAULT_FLAGS32
 
 #if defined(CONFIG_PPC_256K_PAGES) || \
     (defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES))
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 0f564a06bf68..d96c984d023b 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -84,9 +84,9 @@ extern u64 ppc64_pft_size;
 
 #endif /* __ASSEMBLER__ */
 
-#define VM_DATA_DEFAULT_FLAGS \
+#define VMA_DATA_DEFAULT_FLAGS \
 	(is_32bit_task() ? \
-	 VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
+	 VMA_DATA_DEFAULT_FLAGS32 : VMA_DATA_DEFAULT_FLAGS64)
 
 /*
  * This is the default if a program doesn't have a PT_GNU_STACK
@@ -94,12 +94,12 @@ extern u64 ppc64_pft_size;
  * stack by default, so in the absence of a PT_GNU_STACK program header
  * we turn execute permission off.
  */
-#define VM_STACK_DEFAULT_FLAGS32	VM_DATA_FLAGS_EXEC
-#define VM_STACK_DEFAULT_FLAGS64	VM_DATA_FLAGS_NON_EXEC
+#define VMA_STACK_DEFAULT_FLAGS32	VMA_DATA_FLAGS_EXEC
+#define VMA_STACK_DEFAULT_FLAGS64	VMA_DATA_FLAGS_NON_EXEC
 
-#define VM_STACK_DEFAULT_FLAGS \
+#define VMA_STACK_DEFAULT_FLAGS \
 	(is_32bit_task() ? \
-	 VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)
+	 VMA_STACK_DEFAULT_FLAGS32 : VMA_STACK_DEFAULT_FLAGS64)
 
 #include <asm-generic/getorder.h>
 
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 187aad0a7b03..c78017061b17 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -204,7 +204,7 @@ static __always_inline void *pfn_to_kaddr(unsigned long pfn)
 	(unsigned long)(_addr) >= PAGE_OFFSET && pfn_valid(virt_to_pfn(_addr));	\
 })
 
-#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_NON_EXEC
+#define VMA_DATA_DEFAULT_FLAGS	VMA_DATA_FLAGS_NON_EXEC
 
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index f339258135f7..56da819a79e6 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -277,7 +277,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
 
 #define virt_addr_valid(kaddr)	pfn_valid(phys_to_pfn(__pa_nodebug((unsigned long)(kaddr))))
 
-#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_NON_EXEC
+#define VMA_DATA_DEFAULT_FLAGS	VMA_DATA_FLAGS_NON_EXEC
 
 #endif /* !__ASSEMBLER__ */
 
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 018a8d906ca3..3e0801a0f782 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -26,7 +26,7 @@
 
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 
-#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_TSK_EXEC
+#define VMA_DATA_DEFAULT_FLAGS	VMA_DATA_FLAGS_TSK_EXEC
 
 /* Physical address where kernel should be loaded. */
 #define LOAD_PHYSICAL_ADDR	__ALIGN_KERNEL_MASK(CONFIG_PHYSICAL_START, CONFIG_PHYSICAL_ALIGN - 1)
diff --git a/arch/x86/um/asm/vm-flags.h b/arch/x86/um/asm/vm-flags.h
index df7a3896f5dd..622d36d6ddff 100644
--- a/arch/x86/um/asm/vm-flags.h
+++ b/arch/x86/um/asm/vm-flags.h
@@ -9,11 +9,11 @@
 
 #ifdef CONFIG_X86_32
 
-#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_TSK_EXEC
+#define VMA_DATA_DEFAULT_FLAGS	VMA_DATA_FLAGS_TSK_EXEC
 
 #else
 
-#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_DATA_FLAGS_EXEC)
+#define VMA_STACK_DEFAULT_FLAGS append_vma_flags(VMA_DATA_FLAGS_EXEC, VMA_GROWSDOWN_BIT)
 
 #endif
 #endif
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index c982694c987b..d39d0d5483a2 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -17,8 +17,8 @@
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, vm_flags_t *vm_flags);
-vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
-			 vm_flags_t vm_flags);
+vma_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
+			  vma_flags_t vma_flags);
 int ksm_enable_merge_any(struct mm_struct *mm);
 int ksm_disable_merge_any(struct mm_struct *mm);
 int ksm_disable(struct mm_struct *mm);
@@ -103,10 +103,10 @@ bool ksm_process_mergeable(struct mm_struct *mm);
 
 #else  /* !CONFIG_KSM */
 
-static inline vm_flags_t ksm_vma_flags(struct mm_struct *mm,
-		const struct file *file, vm_flags_t vm_flags)
+static inline vma_flags_t ksm_vma_flags(struct mm_struct *mm,
+		const struct file *file, vma_flags_t vma_flags)
 {
-	return vm_flags;
+	return vma_flags;
 }
 
 static inline int ksm_disable(struct mm_struct *mm)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index de72382efac2..4042a584671e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -346,9 +346,9 @@ enum {
 	 * if KVM does not lock down the memory type.
 	 */
 	DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39),
-#ifdef CONFIG_PPC32
+#if defined(CONFIG_PPC32)
 	DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1),
-#else
+#elif defined(CONFIG_64BIT)
 	DECLARE_VMA_BIT(DROPPABLE, 40),
 #endif
 	DECLARE_VMA_BIT(UFFD_MINOR, 41),
@@ -503,31 +503,42 @@ enum {
 #endif
 #if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
 #define VM_DROPPABLE		INIT_VM_FLAG(DROPPABLE)
+#define VMA_DROPPABLE		mk_vma_flags(VMA_DROPPABLE_BIT)
 #else
 #define VM_DROPPABLE		VM_NONE
+#define VMA_DROPPABLE		EMPTY_VMA_FLAGS
 #endif
 
 /* Bits set in the VMA until the stack is in its final location */
 #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
 
-#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
+#define TASK_EXEC_BIT ((current->personality & READ_IMPLIES_EXEC) ? \
+		       VMA_EXEC_BIT : VMA_READ_BIT)
 
 /* Common data flag combinations */
-#define VM_DATA_FLAGS_TSK_EXEC	(VM_READ | VM_WRITE | TASK_EXEC | \
-				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#define VM_DATA_FLAGS_NON_EXEC	(VM_READ | VM_WRITE | VM_MAYREAD | \
-				 VM_MAYWRITE | VM_MAYEXEC)
-#define VM_DATA_FLAGS_EXEC	(VM_READ | VM_WRITE | VM_EXEC | \
-				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#ifndef VM_DATA_DEFAULT_FLAGS		/* arch can override this */
-#define VM_DATA_DEFAULT_FLAGS  VM_DATA_FLAGS_EXEC
+#define VMA_DATA_FLAGS_TSK_EXEC	mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, \
+		TASK_EXEC_BIT, VMA_MAYREAD_BIT, VMA_MAYWRITE_BIT,	  \
+		VMA_MAYEXEC_BIT)
+#define VMA_DATA_FLAGS_NON_EXEC	mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, \
+		VMA_MAYREAD_BIT, VMA_MAYWRITE_BIT, VMA_MAYEXEC_BIT)
+#define VMA_DATA_FLAGS_EXEC	mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, \
+		VMA_EXEC_BIT, VMA_MAYREAD_BIT, VMA_MAYWRITE_BIT,	  \
+		VMA_MAYEXEC_BIT)
+
+#ifndef VMA_DATA_DEFAULT_FLAGS		/* arch can override this */
+#define VMA_DATA_DEFAULT_FLAGS  VMA_DATA_FLAGS_EXEC
 #endif
 
-#ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
-#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
+#ifndef VMA_STACK_DEFAULT_FLAGS		/* arch can override this */
+#define VMA_STACK_DEFAULT_FLAGS VMA_DATA_DEFAULT_FLAGS
 #endif
 
+#define VMA_STACK_FLAGS	append_vma_flags(VMA_STACK_DEFAULT_FLAGS,	\
+		VMA_STACK_BIT, VMA_ACCOUNT_BIT)
+
+/* Temporary until VMA flags conversion complete. */
+#define VM_STACK_FLAGS vma_flags_to_legacy(VMA_STACK_FLAGS)
+
 #define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
 
 #ifdef CONFIG_MSEAL_SYSTEM_MAPPINGS
@@ -536,8 +547,6 @@ enum {
 #define VM_SEALED_SYSMAP	VM_NONE
 #endif
 
-#define VM_STACK_FLAGS	(VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
-
 /* VMA basic access permission flags */
 #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
 #define VMA_ACCESS_FLAGS mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT)
@@ -545,7 +554,10 @@ enum {
 /*
  * Special vmas that are non-mergable, non-mlock()able.
  */
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
+
+#define VMA_SPECIAL_FLAGS mk_vma_flags(VMA_IO_BIT, VMA_DONTEXPAND_BIT, \
+				       VMA_PFNMAP_BIT, VMA_MIXEDMAP_BIT)
+#define VM_SPECIAL vma_flags_to_legacy(VMA_SPECIAL_FLAGS)
 
 /*
  * Physically remapped pages are special. Tell the
@@ -1407,7 +1419,7 @@ static __always_inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
  * vm_area_desc object describing a proposed VMA, e.g.:
  *
  * vma_desc_set_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT,
- *		VMA_DONTDUMP_BIT);
+ * 		VMA_DONTDUMP_BIT);
  */
 #define vma_desc_set_flags(desc, ...) \
 	vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
@@ -4045,7 +4057,6 @@ extern int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
 extern struct file *get_mm_exe_file(struct mm_struct *mm);
 extern struct file *get_task_exe_file(struct task_struct *task);
 
-extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages);
 extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages);
 
 extern bool vma_is_special_mapping(const struct vm_area_struct *vma,
diff --git a/mm/internal.h b/mm/internal.h
index 3d3fa35e5fd1..ce954bab8a37 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1916,4 +1916,7 @@ static inline int get_sysctl_max_map_count(void)
 	return READ_ONCE(sysctl_max_map_count);
 }
 
+bool may_expand_vm(struct mm_struct *mm, const vma_flags_t *vma_flags,
+		   unsigned long npages);
+
 #endif	/* __MM_INTERNAL_H */
diff --git a/mm/ksm.c b/mm/ksm.c
index 2a2f2f005fc3..7d5b76478f0b 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -735,21 +735,24 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr,
 	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
 }
 
-static bool ksm_compatible(const struct file *file, vm_flags_t vm_flags)
+static bool ksm_compatible(const struct file *file, vma_flags_t vma_flags)
 {
-	if (vm_flags & (VM_SHARED  | VM_MAYSHARE | VM_SPECIAL |
-			VM_HUGETLB | VM_DROPPABLE))
-		return false;		/* just ignore the advice */
-
+	/* Just ignore the advice. */
+	if (vma_flags_test_any(&vma_flags, VMA_SHARED_BIT, VMA_MAYSHARE_BIT,
+			       VMA_HUGETLB_BIT))
+		return false;
+	if (vma_flags_test_single_mask(&vma_flags, VMA_DROPPABLE))
+		return false;
+	if (vma_flags_test_any_mask(&vma_flags, VMA_SPECIAL_FLAGS))
+		return false;
 	if (file_is_dax(file))
 		return false;
-
 #ifdef VM_SAO
-	if (vm_flags & VM_SAO)
+	if (vma_flags_test(&vma_flags, VMA_SAO_BIT))
 		return false;
 #endif
 #ifdef VM_SPARC_ADI
-	if (vm_flags & VM_SPARC_ADI)
+	if (vma_flags_test(&vma_flags, VMA_SPARC_ADI_BIT))
 		return false;
 #endif
 
@@ -758,7 +761,7 @@ static bool ksm_compatible(const struct file *file, vm_flags_t vm_flags)
 
 static bool vma_ksm_compatible(struct vm_area_struct *vma)
 {
-	return ksm_compatible(vma->vm_file, vma->vm_flags);
+	return ksm_compatible(vma->vm_file, vma->flags);
 }
 
 static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
@@ -2825,17 +2828,17 @@ static int ksm_scan_thread(void *nothing)
 	return 0;
 }
 
-static bool __ksm_should_add_vma(const struct file *file, vm_flags_t vm_flags)
+static bool __ksm_should_add_vma(const struct file *file, vma_flags_t vma_flags)
 {
-	if (vm_flags & VM_MERGEABLE)
+	if (vma_flags_test(&vma_flags, VMA_MERGEABLE_BIT))
 		return false;
 
-	return ksm_compatible(file, vm_flags);
+	return ksm_compatible(file, vma_flags);
 }
 
 static void __ksm_add_vma(struct vm_area_struct *vma)
 {
-	if (__ksm_should_add_vma(vma->vm_file, vma->vm_flags))
+	if (__ksm_should_add_vma(vma->vm_file, vma->flags))
 		vm_flags_set(vma, VM_MERGEABLE);
 }
 
@@ -2860,16 +2863,16 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
  *
  * @mm:       Proposed VMA's mm_struct
  * @file:     Proposed VMA's file-backed mapping, if any.
- * @vm_flags: Proposed VMA"s flags.
+ * @vma_flags: Proposed VMA"s flags.
  *
- * Returns: @vm_flags possibly updated to mark mergeable.
+ * Returns: @vma_flags possibly updated to mark mergeable.
  */
-vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
-			 vm_flags_t vm_flags)
+vma_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
+			  vma_flags_t vma_flags)
 {
 	if (mm_flags_test(MMF_VM_MERGE_ANY, mm) &&
-	    __ksm_should_add_vma(file, vm_flags)) {
-		vm_flags |= VM_MERGEABLE;
+	    __ksm_should_add_vma(file, vma_flags)) {
+		vma_flags_set(&vma_flags, VMA_MERGEABLE_BIT);
 		/*
 		 * Generally, the flags here always include MMF_VM_MERGEABLE.
 		 * However, in rare cases, this flag may be cleared by ksmd who
@@ -2879,7 +2882,7 @@ vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
 			__ksm_enter(mm);
 	}
 
-	return vm_flags;
+	return vma_flags;
 }
 
 static void ksm_add_vmas(struct mm_struct *mm)
diff --git a/mm/mmap.c b/mm/mmap.c
index 2d2b814978bf..5754d1c36462 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -192,7 +192,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 
 	brkvma = vma_prev_limit(&vmi, mm->start_brk);
 	/* Ok, looks good - let it rip. */
-	if (do_brk_flags(&vmi, brkvma, oldbrk, newbrk - oldbrk, 0) < 0)
+	if (do_brk_flags(&vmi, brkvma, oldbrk, newbrk - oldbrk,
+			 EMPTY_VMA_FLAGS) < 0)
 		goto out;
 
 	mm->brk = brk;
@@ -1203,7 +1204,8 @@ out:
 
 int vm_brk_flags(unsigned long addr, unsigned long request, bool is_exec)
 {
-	const vm_flags_t vm_flags = is_exec ? VM_EXEC : 0;
+	const vma_flags_t vma_flags = is_exec ?
+		mk_vma_flags(VMA_EXEC_BIT) : EMPTY_VMA_FLAGS;
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma = NULL;
 	unsigned long len;
@@ -1230,7 +1232,7 @@ int vm_brk_flags(unsigned long addr, unsigned long request, bool is_exec)
 		goto munmap_failed;
 
 	vma = vma_prev(&vmi);
-	ret = do_brk_flags(&vmi, vma, addr, len, vm_flags);
+	ret = do_brk_flags(&vmi, vma, addr, len, vma_flags);
 	populate = ((mm->def_flags & VM_LOCKED) != 0);
 	mmap_write_unlock(mm);
 	userfaultfd_unmap_complete(mm, &uf);
@@ -1328,12 +1330,13 @@ destroy:
  * Return true if the calling process may expand its vm space by the passed
  * number of pages
  */
-bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
+bool may_expand_vm(struct mm_struct *mm, const vma_flags_t *vma_flags,
+		   unsigned long npages)
 {
 	if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
 		return false;
 
-	if (is_data_mapping(flags) &&
+	if (is_data_mapping_vma_flags(vma_flags) &&
 	    mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
 		/* Workaround for Valgrind */
 		if (rlimit(RLIMIT_DATA) == 0 &&
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 9681f055b9fc..eaa724b99908 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -697,7 +697,8 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
 	       unsigned long start, unsigned long end, vm_flags_t newflags)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	vm_flags_t oldflags = READ_ONCE(vma->vm_flags);
+	const vma_flags_t old_vma_flags = READ_ONCE(vma->flags);
+	vma_flags_t new_vma_flags = legacy_to_vma_flags(newflags);
 	long nrpages = (end - start) >> PAGE_SHIFT;
 	unsigned int mm_cp_flags = 0;
 	unsigned long charged = 0;
@@ -706,7 +707,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
 	if (vma_is_sealed(vma))
 		return -EPERM;
 
-	if (newflags == oldflags) {
+	if (vma_flags_same_pair(&old_vma_flags, &new_vma_flags)) {
 		*pprev = vma;
 		return 0;
 	}
@@ -717,8 +718,9 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
 	 * uncommon case, so doesn't need to be very optimized.
 	 */
 	if (arch_has_pfn_modify_check() &&
-	    (oldflags & (VM_PFNMAP|VM_MIXEDMAP)) &&
-	    (newflags & VM_ACCESS_FLAGS) == 0) {
+	    vma_flags_test_any(&old_vma_flags, VMA_PFNMAP_BIT,
+			       VMA_MIXEDMAP_BIT) &&
+	    !vma_flags_test_any_mask(&new_vma_flags, VMA_ACCESS_FLAGS)) {
 		pgprot_t new_pgprot = vm_get_page_prot(newflags);
 
 		error = walk_page_range(current->mm, start, end,
@@ -736,28 +738,31 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
 	 * hugetlb mapping were accounted for even if read-only so there is
 	 * no need to account for them here.
 	 */
-	if (newflags & VM_WRITE) {
+	if (vma_flags_test(&new_vma_flags, VMA_WRITE_BIT)) {
 		/* Check space limits when area turns into data. */
-		if (!may_expand_vm(mm, newflags, nrpages) &&
-				may_expand_vm(mm, oldflags, nrpages))
+		if (!may_expand_vm(mm, &new_vma_flags, nrpages) &&
+		    may_expand_vm(mm, &old_vma_flags, nrpages))
 			return -ENOMEM;
-		if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
-						VM_SHARED|VM_NORESERVE))) {
+		if (!vma_flags_test_any(&old_vma_flags,
+				VMA_ACCOUNT_BIT, VMA_WRITE_BIT, VMA_HUGETLB_BIT,
+				VMA_SHARED_BIT, VMA_NORESERVE_BIT)) {
 			charged = nrpages;
 			if (security_vm_enough_memory_mm(mm, charged))
 				return -ENOMEM;
-			newflags |= VM_ACCOUNT;
+			vma_flags_set(&new_vma_flags, VMA_ACCOUNT_BIT);
 		}
-	} else if ((oldflags & VM_ACCOUNT) && vma_is_anonymous(vma) &&
-		   !vma->anon_vma) {
-		newflags &= ~VM_ACCOUNT;
+	} else if (vma_flags_test(&old_vma_flags, VMA_ACCOUNT_BIT) &&
+		   vma_is_anonymous(vma) && !vma->anon_vma) {
+		vma_flags_clear(&new_vma_flags, VMA_ACCOUNT_BIT);
 	}
 
+	newflags = vma_flags_to_legacy(new_vma_flags);
 	vma = vma_modify_flags(vmi, *pprev, vma, start, end, &newflags);
 	if (IS_ERR(vma)) {
 		error = PTR_ERR(vma);
 		goto fail;
 	}
+	new_vma_flags = legacy_to_vma_flags(newflags);
 
 	*pprev = vma;
 
@@ -773,19 +778,24 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
 
 	change_protection(tlb, vma, start, end, mm_cp_flags);
 
-	if ((oldflags & VM_ACCOUNT) && !(newflags & VM_ACCOUNT))
+	if (vma_flags_test(&old_vma_flags, VMA_ACCOUNT_BIT) &&
+	    !vma_flags_test(&new_vma_flags, VMA_ACCOUNT_BIT))
 		vm_unacct_memory(nrpages);
 
 	/*
 	 * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major
 	 * fault on access.
 	 */
-	if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED &&
-			(newflags & VM_WRITE)) {
-		populate_vma_page_range(vma, start, end, NULL);
+	if (vma_flags_test(&new_vma_flags, VMA_WRITE_BIT)) {
+		const vma_flags_t mask =
+			vma_flags_and(&old_vma_flags, VMA_WRITE_BIT,
+				      VMA_SHARED_BIT, VMA_LOCKED_BIT);
+
+		if (vma_flags_same(&mask, VMA_LOCKED_BIT))
+			populate_vma_page_range(vma, start, end, NULL);
 	}
 
-	vm_stat_account(mm, oldflags, -nrpages);
+	vm_stat_account(mm, vma_flags_to_legacy(old_vma_flags), -nrpages);
 	vm_stat_account(mm, newflags, nrpages);
 	perf_event_mmap(vma);
 	return 0;
diff --git a/mm/mremap.c b/mm/mremap.c
index 36b3f1caebad..e9c8b1d05832 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1472,10 +1472,10 @@ static unsigned long mremap_to(struct vma_remap_struct *vrm)
 
 	/* MREMAP_DONTUNMAP expands by old_len since old_len == new_len */
 	if (vrm->flags & MREMAP_DONTUNMAP) {
-		vm_flags_t vm_flags = vrm->vma->vm_flags;
+		vma_flags_t vma_flags = vrm->vma->flags;
 		unsigned long pages = vrm->old_len >> PAGE_SHIFT;
 
-		if (!may_expand_vm(mm, vm_flags, pages))
+		if (!may_expand_vm(mm, &vma_flags, pages))
 			return -ENOMEM;
 	}
 
@@ -1813,7 +1813,7 @@ static int check_prep_vma(struct vma_remap_struct *vrm)
 	if (!mlock_future_ok(mm, vma->vm_flags & VM_LOCKED, vrm->delta))
 		return -EAGAIN;
 
-	if (!may_expand_vm(mm, vma->vm_flags, vrm->delta >> PAGE_SHIFT))
+	if (!may_expand_vm(mm, &vma->flags, vrm->delta >> PAGE_SHIFT))
 		return -ENOMEM;
 
 	return 0;
diff --git a/mm/vma.c b/mm/vma.c
index 6af26619e020..9362860389ae 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -2385,7 +2385,7 @@ static void vms_abort_munmap_vmas(struct vma_munmap_struct *vms,
 
 static void update_ksm_flags(struct mmap_state *map)
 {
-	map->vm_flags = ksm_vma_flags(map->mm, map->file, map->vm_flags);
+	map->vma_flags = ksm_vma_flags(map->mm, map->file, map->vma_flags);
 }
 
 static void set_desc_from_map(struct vm_area_desc *desc,
@@ -2446,7 +2446,7 @@ static int __mmap_setup(struct mmap_state *map, struct vm_area_desc *desc,
 	}
 
 	/* Check against address space limit. */
-	if (!may_expand_vm(map->mm, map->vm_flags, map->pglen - vms->nr_pages))
+	if (!may_expand_vm(map->mm, &map->vma_flags, map->pglen - vms->nr_pages))
 		return -ENOMEM;
 
 	/* Private writable mapping: check memory availability. */
@@ -2866,20 +2866,22 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	return ret;
 }
 
-/*
+/**
  * do_brk_flags() - Increase the brk vma if the flags match.
  * @vmi: The vma iterator
  * @addr: The start address
  * @len: The length of the increase
  * @vma: The vma,
- * @vm_flags: The VMA Flags
+ * @vma_flags: The VMA Flags
  *
  * Extend the brk VMA from addr to addr + len.  If the VMA is NULL or the flags
  * do not match then create a new anonymous VMA.  Eventually we may be able to
  * do some brk-specific accounting here.
+ *
+ * Returns: %0 on success, or otherwise an error.
  */
 int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
-		 unsigned long addr, unsigned long len, vm_flags_t vm_flags)
+		 unsigned long addr, unsigned long len, vma_flags_t vma_flags)
 {
 	struct mm_struct *mm = current->mm;
 
@@ -2887,9 +2889,12 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	 * Check against address space limits by the changed size
 	 * Note: This happens *after* clearing old mappings in some code paths.
 	 */
-	vm_flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
-	vm_flags = ksm_vma_flags(mm, NULL, vm_flags);
-	if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT))
+	vma_flags_set_mask(&vma_flags, VMA_DATA_DEFAULT_FLAGS);
+	vma_flags_set(&vma_flags, VMA_ACCOUNT_BIT);
+	vma_flags_set_mask(&vma_flags, mm->def_vma_flags);
+
+	vma_flags = ksm_vma_flags(mm, NULL, vma_flags);
+	if (!may_expand_vm(mm, &vma_flags, len >> PAGE_SHIFT))
 		return -ENOMEM;
 
 	if (mm->map_count > get_sysctl_max_map_count())
@@ -2903,7 +2908,7 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	 * occur after forking, so the expand will only happen on new VMAs.
 	 */
 	if (vma && vma->vm_end == addr) {
-		VMG_STATE(vmg, mm, vmi, addr, addr + len, vm_flags, PHYS_PFN(addr));
+		VMG_STATE(vmg, mm, vmi, addr, addr + len, vma_flags, PHYS_PFN(addr));
 
 		vmg.prev = vma;
 		/* vmi is positioned at prev, which this mode expects. */
@@ -2924,8 +2929,8 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
 
 	vma_set_anonymous(vma);
 	vma_set_range(vma, addr, addr + len, addr >> PAGE_SHIFT);
-	vm_flags_init(vma, vm_flags);
-	vma->vm_page_prot = vm_get_page_prot(vm_flags);
+	vma->flags = vma_flags;
+	vma->vm_page_prot = vm_get_page_prot(vma_flags_to_legacy(vma_flags));
 	vma_start_write(vma);
 	if (vma_iter_store_gfp(vmi, vma, GFP_KERNEL))
 		goto mas_store_fail;
@@ -2936,10 +2941,10 @@ out:
 	perf_event_mmap(vma);
 	mm->total_vm += len >> PAGE_SHIFT;
 	mm->data_vm += len >> PAGE_SHIFT;
-	if (vm_flags & VM_LOCKED)
+	if (vma_flags_test(&vma_flags, VMA_LOCKED_BIT))
 		mm->locked_vm += (len >> PAGE_SHIFT);
 	if (pgtable_supports_soft_dirty())
-		vm_flags_set(vma, VM_SOFTDIRTY);
+		vma_set_flags(vma, VMA_SOFTDIRTY_BIT);
 	return 0;
 
 mas_store_fail:
@@ -3070,7 +3075,7 @@ static int acct_stack_growth(struct vm_area_struct *vma,
 	unsigned long new_start;
 
 	/* address space limit tests */
-	if (!may_expand_vm(mm, vma->vm_flags, grow))
+	if (!may_expand_vm(mm, &vma->flags, grow))
 		return -ENOMEM;
 
 	/* Stack limit test */
@@ -3289,7 +3294,6 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
 	unsigned long charged = vma_pages(vma);
 
-
 	if (find_vma_intersection(mm, vma->vm_start, vma->vm_end))
 		return -ENOMEM;
 
diff --git a/mm/vma.h b/mm/vma.h
index cf8926558bf6..1f2de6cb3b97 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -237,13 +237,13 @@ static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
 	return vma->vm_pgoff + PHYS_PFN(addr - vma->vm_start);
 }
 
-#define VMG_STATE(name, mm_, vmi_, start_, end_, vm_flags_, pgoff_)	\
+#define VMG_STATE(name, mm_, vmi_, start_, end_, vma_flags_, pgoff_)	\
 	struct vma_merge_struct name = {				\
 		.mm = mm_,						\
 		.vmi = vmi_,						\
 		.start = start_,					\
 		.end = end_,						\
-		.vm_flags = vm_flags_,					\
+		.vma_flags = vma_flags_,				\
 		.pgoff = pgoff_,					\
 		.state = VMA_MERGE_START,				\
 	}
@@ -465,7 +465,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		struct list_head *uf);
 
 int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *brkvma,
-		 unsigned long addr, unsigned long request, unsigned long flags);
+		 unsigned long addr, unsigned long request,
+		 vma_flags_t vma_flags);
 
 unsigned long unmapped_area(struct vm_unmapped_area_info *info);
 unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info);
@@ -527,6 +528,13 @@ static inline bool is_data_mapping(vm_flags_t flags)
 	return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE;
 }
 
+static inline bool is_data_mapping_vma_flags(const vma_flags_t *vma_flags)
+{
+	const vma_flags_t mask = vma_flags_and(vma_flags,
+			VMA_WRITE_BIT, VMA_SHARED_BIT, VMA_STACK_BIT);
+
+	return vma_flags_same(&mask, VMA_WRITE_BIT);
+}
 
 static inline void vma_iter_config(struct vma_iterator *vmi,
 		unsigned long index, unsigned long last)
diff --git a/mm/vma_exec.c b/mm/vma_exec.c
index 8134e1afca68..5cee8b7efa0f 100644
--- a/mm/vma_exec.c
+++ b/mm/vma_exec.c
@@ -36,7 +36,8 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift)
 	unsigned long new_start = old_start - shift;
 	unsigned long new_end = old_end - shift;
 	VMA_ITERATOR(vmi, mm, new_start);
-	VMG_STATE(vmg, mm, &vmi, new_start, old_end, 0, vma->vm_pgoff);
+	VMG_STATE(vmg, mm, &vmi, new_start, old_end, EMPTY_VMA_FLAGS,
+		  vma->vm_pgoff);
 	struct vm_area_struct *next;
 	struct mmu_gather tlb;
 	PAGETABLE_MOVE(pmc, vma, vma, old_start, new_start, length);
@@ -135,7 +136,7 @@ int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
 	 * use STACK_TOP because that can depend on attributes which aren't
 	 * configured yet.
 	 */
-	BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
+	VM_WARN_ON_ONCE(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
 	vma->vm_end = STACK_TOP_MAX;
 	vma->vm_start = vma->vm_end - PAGE_SIZE;
 	if (pgtable_supports_soft_dirty())
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index d8224ea113d1..903303e084c2 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -7713,6 +7713,8 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
 
 static __init int selinux_init(void)
 {
+	vma_flags_t data_default_flags = VMA_DATA_DEFAULT_FLAGS;
+
 	pr_info("SELinux:  Initializing.\n");
 
 	memset(&selinux_state, 0, sizeof(selinux_state));
@@ -7729,7 +7731,7 @@ static __init int selinux_init(void)
 		      AUDIT_CFG_LSM_SECCTX_SUBJECT |
 		      AUDIT_CFG_LSM_SECCTX_OBJECT);
 
-	default_noexec = !(VM_DATA_DEFAULT_FLAGS & VM_EXEC);
+	default_noexec = !vma_flags_test(&data_default_flags, VMA_EXEC_BIT);
 	if (!default_noexec)
 		pr_notice("SELinux:  virtual memory is executable by default\n");
 
diff --git a/tools/testing/vma/include/custom.h b/tools/testing/vma/include/custom.h
index b7d9eb0a44e4..744fe874c168 100644
--- a/tools/testing/vma/include/custom.h
+++ b/tools/testing/vma/include/custom.h
@@ -95,6 +95,3 @@ static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
 {
 	return PAGE_SIZE;
 }
-
-#define VMA_SPECIAL_FLAGS mk_vma_flags(VMA_IO_BIT, VMA_DONTEXPAND_BIT, \
-				       VMA_PFNMAP_BIT, VMA_MIXEDMAP_BIT)
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 244ee02dc21d..36373b81ad24 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -314,27 +314,33 @@ enum {
 /* Bits set in the VMA until the stack is in its final location */
 #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
 
-#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
+#define TASK_EXEC_BIT ((current->personality & READ_IMPLIES_EXEC) ? \
+		       VM_EXEC_BIT : VM_READ_BIT)
 
 /* Common data flag combinations */
-#define VM_DATA_FLAGS_TSK_EXEC	(VM_READ | VM_WRITE | TASK_EXEC | \
-				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#define VM_DATA_FLAGS_NON_EXEC	(VM_READ | VM_WRITE | VM_MAYREAD | \
-				 VM_MAYWRITE | VM_MAYEXEC)
-#define VM_DATA_FLAGS_EXEC	(VM_READ | VM_WRITE | VM_EXEC | \
-				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#ifndef VM_DATA_DEFAULT_FLAGS		/* arch can override this */
-#define VM_DATA_DEFAULT_FLAGS  VM_DATA_FLAGS_EXEC
+#define VMA_DATA_FLAGS_TSK_EXEC	mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, \
+		TASK_EXEC_BIT, VMA_MAYREAD_BIT, VMA_MAYWRITE_BIT,	  \
+		VMA_MAYEXEC_BIT)
+#define VMA_DATA_FLAGS_NON_EXEC	mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, \
+		VMA_MAYREAD_BIT, VMA_MAYWRITE_BIT, VMA_MAYEXEC_BIT)
+#define VMA_DATA_FLAGS_EXEC	mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, \
+		VMA_EXEC_BIT, VMA_MAYREAD_BIT, VMA_MAYWRITE_BIT,	  \
+		VMA_MAYEXEC_BIT)
+
+#ifndef VMA_DATA_DEFAULT_FLAGS		/* arch can override this */
+#define VMA_DATA_DEFAULT_FLAGS  VMA_DATA_FLAGS_EXEC
 #endif
 
-#ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
-#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
+#ifndef VMA_STACK_DEFAULT_FLAGS		/* arch can override this */
+#define VMA_STACK_DEFAULT_FLAGS VMA_DATA_DEFAULT_FLAGS
 #endif
 
-#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
+#define VMA_STACK_FLAGS	append_vma_flags(VMA_STACK_DEFAULT_FLAGS,	\
+		VMA_STACK_BIT, VMA_ACCOUNT_BIT)
+/* Temporary until VMA flags conversion complete. */
+#define VM_STACK_FLAGS vma_flags_to_legacy(VMA_STACK_FLAGS)
 
-#define VM_STACK_FLAGS	(VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
 
 /* VMA basic access permission flags */
 #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
@@ -345,6 +351,9 @@ enum {
  */
 #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
 
+#define VMA_SPECIAL_FLAGS mk_vma_flags(VMA_IO_BIT, VMA_DONTEXPAND_BIT, \
+				       VMA_PFNMAP_BIT, VMA_MIXEDMAP_BIT)
+
 #define VMA_REMAP_FLAGS mk_vma_flags(VMA_IO_BIT, VMA_PFNMAP_BIT,	\
 				     VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT)
 
@@ -357,11 +366,6 @@ enum {
 /* This mask represents all the VMA flag bits used by mlock */
 #define VM_LOCKED_MASK	(VM_LOCKED | VM_LOCKONFAULT)
 
-#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
-
-#define VM_DATA_FLAGS_TSK_EXEC	(VM_READ | VM_WRITE | TASK_EXEC | \
-				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
 #define RLIMIT_STACK		3	/* max stack size */
 #define RLIMIT_MEMLOCK		8	/* max locked-in-memory address space */
 
diff --git a/tools/testing/vma/include/stubs.h b/tools/testing/vma/include/stubs.h
index 416bb93f5005..b5dced3b0bd4 100644
--- a/tools/testing/vma/include/stubs.h
+++ b/tools/testing/vma/include/stubs.h
@@ -101,10 +101,10 @@ static inline bool shmem_file(struct file *file)
 	return false;
 }
 
-static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
-		const struct file *file, vm_flags_t vm_flags)
+static inline vma_flags_t ksm_vma_flags(struct mm_struct *mm,
+		const struct file *file, vma_flags_t vma_flags)
 {
-	return vm_flags;
+	return vma_flags;
 }
 
 static inline void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn)
@@ -239,7 +239,8 @@ static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
 	return 0;
 }
 
-static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags,
+static inline bool may_expand_vm(struct mm_struct *mm,
+				 const vma_flags_t *vma_flags,
 				 unsigned long npages)
 {
 	return true;
diff --git a/tools/testing/vma/tests/merge.c b/tools/testing/vma/tests/merge.c
index d3e725dc0000..44e3977e3fc0 100644
--- a/tools/testing/vma/tests/merge.c
+++ b/tools/testing/vma/tests/merge.c
@@ -1429,11 +1429,10 @@ static bool test_expand_only_mode(void)
 {
 	vma_flags_t vma_flags = mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT,
 					     VMA_MAYREAD_BIT, VMA_MAYWRITE_BIT);
-	vm_flags_t legacy_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
 	VMA_ITERATOR(vmi, &mm, 0);
 	struct vm_area_struct *vma_prev, *vma;
-	VMG_STATE(vmg, &mm, &vmi, 0x5000, 0x9000, legacy_flags, 5);
+	VMG_STATE(vmg, &mm, &vmi, 0x5000, 0x9000, vma_flags, 5);
 
 	/*
 	 * Place a VMA prior to the one we're expanding so we assert that we do
-- 
cgit v1.2.3


From d720b81d01b137dfc23e07461b05b76f822af6ab Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 19:38:36 +0000
Subject: mm/vma: introduce vma_clear_flags[_mask]()

Introduce a helper function and helper macro to easily clear a VMA's flags
using the new vma_flags_t vma->flags field:

* vma_clear_flags_mask() - Clears all of the flags in a specified mask in
			   the VMA's flags field.
* vma_clear_flags() - Clears all of the specified individual VMA flag bits
		      in a VMA's flags field.

Also update the VMA tests to reflect the change.

Link: https://lkml.kernel.org/r/9bd15da35c2c90e7441265adf01b5c2d3b5c6d41.1774034900.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h              | 16 ++++++++++++++++
 tools/testing/vma/include/dup.h |  9 +++++++++
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4042a584671e..6b614f8af045 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1363,6 +1363,22 @@ static __always_inline void vma_set_flags_mask(struct vm_area_struct *vma,
 #define vma_set_flags(vma, ...) \
 	vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
 
+/* Helper to clear all VMA flags in a VMA. */
+static __always_inline void vma_clear_flags_mask(struct vm_area_struct *vma,
+		vma_flags_t flags)
+{
+	vma_flags_clear_mask(&vma->flags, flags);
+}
+
+/*
+ * Helper macro for clearing VMA flags, e.g.:
+ *
+ * vma_clear_flags(vma, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT,
+ * 		VMA_DONTDUMP_BIT);
+ */
+#define vma_clear_flags(vma, ...) \
+	vma_clear_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
+
 /*
  * Test whether a specific VMA flag is set in a VMA descriptor, e.g.:
  *
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 36373b81ad24..93ea600d0895 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -1065,6 +1065,15 @@ static __always_inline void vma_set_flags_mask(struct vm_area_struct *vma,
 #define vma_set_flags(vma, ...) \
 	vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
 
+static __always_inline void vma_clear_flags_mask(struct vm_area_struct *vma,
+		vma_flags_t flags)
+{
+	vma_flags_clear_mask(&vma->flags, flags);
+}
+
+#define vma_clear_flags(vma, ...) \
+	vma_clear_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
+
 static __always_inline bool vma_desc_test(const struct vm_area_desc *desc,
 		vma_flag_t bit)
 {
-- 
cgit v1.2.3


From 769669bd9ca4cbae2562d57fe753efdcf17a196d Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 19:38:38 +0000
Subject: mm/vma: convert as much as we can in mm/vma.c to vma_flags_t

Now we have established a good foundation for vm_flags_t to vma_flags_t
changes, update mm/vma.c to utilise vma_flags_t wherever possible.

We are able to convert VM_STARTGAP_FLAGS entirely as this is only used in
mm/vma.c, and to account for the fact we can't use VM_NONE to make life
easier, place the definition of this within existing #ifdef's to be
cleaner.

Generally the remaining changes are mechanical.

Also update the VMA tests to reflect the changes.

Link: https://lkml.kernel.org/r/5fdeaf8af9a12c2a5d68497495f52fa627d05a5b.1774034900.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h                |  6 ++-
 mm/vma.c                          | 89 ++++++++++++++++++++++-----------------
 tools/testing/vma/include/dup.h   |  4 ++
 tools/testing/vma/include/stubs.h |  2 +-
 4 files changed, 59 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6b614f8af045..c6b40dc88918 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -463,8 +463,10 @@ enum {
 #if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS) || \
 	defined(CONFIG_RISCV_USER_CFI)
 #define VM_SHADOW_STACK	INIT_VM_FLAG(SHADOW_STACK)
+#define VMA_STARTGAP_FLAGS mk_vma_flags(VMA_GROWSDOWN_BIT, VMA_SHADOW_STACK_BIT)
 #else
 #define VM_SHADOW_STACK	VM_NONE
+#define VMA_STARTGAP_FLAGS mk_vma_flags(VMA_GROWSDOWN_BIT)
 #endif
 #if defined(CONFIG_PPC64)
 #define VM_SAO		INIT_VM_FLAG(SAO)
@@ -539,8 +541,6 @@ enum {
 /* Temporary until VMA flags conversion complete. */
 #define VM_STACK_FLAGS vma_flags_to_legacy(VMA_STACK_FLAGS)
 
-#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
-
 #ifdef CONFIG_MSEAL_SYSTEM_MAPPINGS
 #define VM_SEALED_SYSMAP	VM_SEALED
 #else
@@ -584,6 +584,8 @@ enum {
 /* This mask represents all the VMA flag bits used by mlock */
 #define VM_LOCKED_MASK	(VM_LOCKED | VM_LOCKONFAULT)
 
+#define VMA_LOCKED_MASK	mk_vma_flags(VMA_LOCKED_BIT, VMA_LOCKONFAULT_BIT)
+
 /* These flags can be updated atomically via VMA/mmap read lock. */
 #define VM_ATOMIC_SET_ALLOWED VM_MAYBE_GUARD
 
diff --git a/mm/vma.c b/mm/vma.c
index 9362860389ae..9d194f8e7acb 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -185,7 +185,7 @@ static void init_multi_vma_prep(struct vma_prepare *vp,
 }
 
 /*
- * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
+ * Return true if we can merge this (vma_flags,anon_vma,file,vm_pgoff)
  * in front of (at a lower virtual address and file offset than) the vma.
  *
  * We cannot merge two vmas if they have differently assigned (non-NULL)
@@ -211,7 +211,7 @@ static bool can_vma_merge_before(struct vma_merge_struct *vmg)
 }
 
 /*
- * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
+ * Return true if we can merge this (vma_flags,anon_vma,file,vm_pgoff)
  * beyond (at a higher virtual address and file offset than) the vma.
  *
  * We cannot merge two vmas if they have differently assigned (non-NULL)
@@ -850,7 +850,8 @@ static __must_check struct vm_area_struct *vma_merge_existing_range(
 	 * furthermost left or right side of the VMA, then we have no chance of
 	 * merging and should abort.
 	 */
-	if (vmg->vm_flags & VM_SPECIAL || (!left_side && !right_side))
+	if (vma_flags_test_any_mask(&vmg->vma_flags, VMA_SPECIAL_FLAGS) ||
+	    (!left_side && !right_side))
 		return NULL;
 
 	if (left_side)
@@ -1072,7 +1073,8 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg)
 	vmg->state = VMA_MERGE_NOMERGE;
 
 	/* Special VMAs are unmergeable, also if no prev/next. */
-	if ((vmg->vm_flags & VM_SPECIAL) || (!prev && !next))
+	if (vma_flags_test_any_mask(&vmg->vma_flags, VMA_SPECIAL_FLAGS) ||
+	    (!prev && !next))
 		return NULL;
 
 	can_merge_left = can_vma_merge_left(vmg);
@@ -1459,17 +1461,17 @@ static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
 		nrpages = vma_pages(next);
 
 		vms->nr_pages += nrpages;
-		if (next->vm_flags & VM_LOCKED)
+		if (vma_test(next, VMA_LOCKED_BIT))
 			vms->locked_vm += nrpages;
 
-		if (next->vm_flags & VM_ACCOUNT)
+		if (vma_test(next, VMA_ACCOUNT_BIT))
 			vms->nr_accounted += nrpages;
 
 		if (is_exec_mapping(next->vm_flags))
 			vms->exec_vm += nrpages;
 		else if (is_stack_mapping(next->vm_flags))
 			vms->stack_vm += nrpages;
-		else if (is_data_mapping(next->vm_flags))
+		else if (is_data_mapping_vma_flags(&next->flags))
 			vms->data_vm += nrpages;
 
 		if (vms->uf) {
@@ -2065,14 +2067,13 @@ static bool vm_ops_needs_writenotify(const struct vm_operations_struct *vm_ops)
 
 static bool vma_is_shared_writable(struct vm_area_struct *vma)
 {
-	return (vma->vm_flags & (VM_WRITE | VM_SHARED)) ==
-		(VM_WRITE | VM_SHARED);
+	return vma_test_all(vma, VMA_WRITE_BIT, VMA_SHARED_BIT);
 }
 
 static bool vma_fs_can_writeback(struct vm_area_struct *vma)
 {
 	/* No managed pages to writeback. */
-	if (vma->vm_flags & VM_PFNMAP)
+	if (vma_test(vma, VMA_PFNMAP_BIT))
 		return false;
 
 	return vma->vm_file && vma->vm_file->f_mapping &&
@@ -2338,8 +2339,11 @@ void mm_drop_all_locks(struct mm_struct *mm)
  * We account for memory if it's a private writeable mapping,
  * not hugepages and VM_NORESERVE wasn't set.
  */
-static bool accountable_mapping(struct file *file, vm_flags_t vm_flags)
+static bool accountable_mapping(struct mmap_state *map)
 {
+	const struct file *file = map->file;
+	vma_flags_t mask;
+
 	/*
 	 * hugetlb has its own accounting separate from the core VM
 	 * VM_HUGETLB may not be set yet so we cannot check for that flag.
@@ -2347,7 +2351,9 @@ static bool accountable_mapping(struct file *file, vm_flags_t vm_flags)
 	if (file && is_file_hugepages(file))
 		return false;
 
-	return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
+	mask = vma_flags_and(&map->vma_flags, VMA_NORESERVE_BIT, VMA_SHARED_BIT,
+			     VMA_WRITE_BIT);
+	return vma_flags_same(&mask, VMA_WRITE_BIT);
 }
 
 /*
@@ -2450,7 +2456,7 @@ static int __mmap_setup(struct mmap_state *map, struct vm_area_desc *desc,
 		return -ENOMEM;
 
 	/* Private writable mapping: check memory availability. */
-	if (accountable_mapping(map->file, map->vm_flags)) {
+	if (accountable_mapping(map)) {
 		map->charged = map->pglen;
 		map->charged -= vms->nr_accounted;
 		if (map->charged) {
@@ -2460,7 +2466,7 @@ static int __mmap_setup(struct mmap_state *map, struct vm_area_desc *desc,
 		}
 
 		vms->nr_accounted = 0;
-		map->vm_flags |= VM_ACCOUNT;
+		vma_flags_set(&map->vma_flags, VMA_ACCOUNT_BIT);
 	}
 
 	/*
@@ -2508,12 +2514,12 @@ static int __mmap_new_file_vma(struct mmap_state *map,
 	 * Drivers should not permit writability when previously it was
 	 * disallowed.
 	 */
-	VM_WARN_ON_ONCE(map->vm_flags != vma->vm_flags &&
-			!(map->vm_flags & VM_MAYWRITE) &&
-			(vma->vm_flags & VM_MAYWRITE));
+	VM_WARN_ON_ONCE(!vma_flags_same_pair(&map->vma_flags, &vma->flags) &&
+			!vma_flags_test(&map->vma_flags, VMA_MAYWRITE_BIT) &&
+			vma_test(vma, VMA_MAYWRITE_BIT));
 
 	map->file = vma->vm_file;
-	map->vm_flags = vma->vm_flags;
+	map->vma_flags = vma->flags;
 
 	return 0;
 }
@@ -2544,7 +2550,7 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
 
 	vma_iter_config(vmi, map->addr, map->end);
 	vma_set_range(vma, map->addr, map->end, map->pgoff);
-	vm_flags_init(vma, map->vm_flags);
+	vma->flags = map->vma_flags;
 	vma->vm_page_prot = map->page_prot;
 
 	if (vma_iter_prealloc(vmi, vma)) {
@@ -2554,7 +2560,7 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
 
 	if (map->file)
 		error = __mmap_new_file_vma(map, vma);
-	else if (map->vm_flags & VM_SHARED)
+	else if (vma_flags_test(&map->vma_flags, VMA_SHARED_BIT))
 		error = shmem_zero_setup(vma);
 	else
 		vma_set_anonymous(vma);
@@ -2564,7 +2570,7 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
 
 	if (!map->check_ksm_early) {
 		update_ksm_flags(map);
-		vm_flags_init(vma, map->vm_flags);
+		vma->flags = map->vma_flags;
 	}
 
 #ifdef CONFIG_SPARC64
@@ -2604,7 +2610,6 @@ free_vma:
 static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
 {
 	struct mm_struct *mm = map->mm;
-	vm_flags_t vm_flags = vma->vm_flags;
 
 	perf_event_mmap(vma);
 
@@ -2612,9 +2617,9 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
 	vms_complete_munmap_vmas(&map->vms, &map->mas_detach);
 
 	vm_stat_account(mm, vma->vm_flags, map->pglen);
-	if (vm_flags & VM_LOCKED) {
+	if (vma_test(vma, VMA_LOCKED_BIT)) {
 		if (!vma_supports_mlock(vma))
-			vm_flags_clear(vma, VM_LOCKED_MASK);
+			vma_clear_flags_mask(vma, VMA_LOCKED_MASK);
 		else
 			mm->locked_vm += map->pglen;
 	}
@@ -2630,7 +2635,7 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
 	 * a completely new data area).
 	 */
 	if (pgtable_supports_soft_dirty())
-		vm_flags_set(vma, VM_SOFTDIRTY);
+		vma_set_flags(vma, VMA_SOFTDIRTY_BIT);
 
 	vma_set_page_prot(vma);
 }
@@ -2993,7 +2998,8 @@ retry:
 	gap = vma_iter_addr(&vmi) + info->start_gap;
 	gap += (info->align_offset - gap) & info->align_mask;
 	tmp = vma_next(&vmi);
-	if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
+	/* Avoid prev check if possible */
+	if (tmp && vma_test_any_mask(tmp, VMA_STARTGAP_FLAGS)) {
 		if (vm_start_gap(tmp) < gap + length - 1) {
 			low_limit = tmp->vm_end;
 			vma_iter_reset(&vmi);
@@ -3045,7 +3051,8 @@ retry:
 	gap -= (gap - info->align_offset) & info->align_mask;
 	gap_end = vma_iter_end(&vmi);
 	tmp = vma_next(&vmi);
-	if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
+	 /* Avoid prev check if possible */
+	if (tmp && vma_test_any_mask(tmp, VMA_STARTGAP_FLAGS)) {
 		if (vm_start_gap(tmp) < gap_end) {
 			high_limit = vm_start_gap(tmp);
 			vma_iter_reset(&vmi);
@@ -3083,12 +3090,16 @@ static int acct_stack_growth(struct vm_area_struct *vma,
 		return -ENOMEM;
 
 	/* mlock limit tests */
-	if (!mlock_future_ok(mm, vma->vm_flags & VM_LOCKED, grow << PAGE_SHIFT))
+	if (!mlock_future_ok(mm, vma_test(vma, VMA_LOCKED_BIT),
+			     grow << PAGE_SHIFT))
 		return -ENOMEM;
 
 	/* Check to ensure the stack will not grow into a hugetlb-only region */
-	new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
-			vma->vm_end - size;
+	new_start = vma->vm_end - size;
+#ifdef CONFIG_STACK_GROWSUP
+	if (vma_test(vma, VMA_GROWSUP_BIT))
+		new_start = vma->vm_start;
+#endif
 	if (is_hugepage_only_range(vma->vm_mm, new_start, size))
 		return -EFAULT;
 
@@ -3102,7 +3113,7 @@ static int acct_stack_growth(struct vm_area_struct *vma,
 	return 0;
 }
 
-#if defined(CONFIG_STACK_GROWSUP)
+#ifdef CONFIG_STACK_GROWSUP
 /*
  * PA-RISC uses this for its stack.
  * vma is the last one with address > vma->vm_end.  Have to extend vma.
@@ -3115,7 +3126,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 	int error = 0;
 	VMA_ITERATOR(vmi, mm, vma->vm_start);
 
-	if (!(vma->vm_flags & VM_GROWSUP))
+	if (!vma_test(vma, VMA_GROWSUP_BIT))
 		return -EFAULT;
 
 	mmap_assert_write_locked(mm);
@@ -3135,7 +3146,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 
 	next = find_vma_intersection(mm, vma->vm_end, gap_addr);
 	if (next && vma_is_accessible(next)) {
-		if (!(next->vm_flags & VM_GROWSUP))
+		if (!vma_test(next, VMA_GROWSUP_BIT))
 			return -ENOMEM;
 		/* Check that both stack segments have the same anon_vma? */
 	}
@@ -3169,7 +3180,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 		if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
 			error = acct_stack_growth(vma, size, grow);
 			if (!error) {
-				if (vma->vm_flags & VM_LOCKED)
+				if (vma_test(vma, VMA_LOCKED_BIT))
 					mm->locked_vm += grow;
 				vm_stat_account(mm, vma->vm_flags, grow);
 				anon_vma_interval_tree_pre_update_vma(vma);
@@ -3200,7 +3211,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
 	int error = 0;
 	VMA_ITERATOR(vmi, mm, vma->vm_start);
 
-	if (!(vma->vm_flags & VM_GROWSDOWN))
+	if (!vma_test(vma, VMA_GROWSDOWN_BIT))
 		return -EFAULT;
 
 	mmap_assert_write_locked(mm);
@@ -3213,7 +3224,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
 	prev = vma_prev(&vmi);
 	/* Check that both stack segments have the same anon_vma? */
 	if (prev) {
-		if (!(prev->vm_flags & VM_GROWSDOWN) &&
+		if (!vma_test(prev, VMA_GROWSDOWN_BIT) &&
 		    vma_is_accessible(prev) &&
 		    (address - prev->vm_end < stack_guard_gap))
 			return -ENOMEM;
@@ -3248,7 +3259,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
 		if (grow <= vma->vm_pgoff) {
 			error = acct_stack_growth(vma, size, grow);
 			if (!error) {
-				if (vma->vm_flags & VM_LOCKED)
+				if (vma_test(vma, VMA_LOCKED_BIT))
 					mm->locked_vm += grow;
 				vm_stat_account(mm, vma->vm_flags, grow);
 				anon_vma_interval_tree_pre_update_vma(vma);
@@ -3297,7 +3308,7 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 	if (find_vma_intersection(mm, vma->vm_start, vma->vm_end))
 		return -ENOMEM;
 
-	if ((vma->vm_flags & VM_ACCOUNT) &&
+	if (vma_test(vma, VMA_ACCOUNT_BIT) &&
 	     security_vm_enough_memory_mm(mm, charged))
 		return -ENOMEM;
 
@@ -3319,7 +3330,7 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 	}
 
 	if (vma_link(mm, vma)) {
-		if (vma->vm_flags & VM_ACCOUNT)
+		if (vma_test(vma, VMA_ACCOUNT_BIT))
 			vm_unacct_memory(charged);
 		return -ENOMEM;
 	}
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 93ea600d0895..58a621ec389f 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -267,8 +267,10 @@ enum {
 #endif /* CONFIG_ARCH_HAS_PKEYS */
 #if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS)
 #define VM_SHADOW_STACK	INIT_VM_FLAG(SHADOW_STACK)
+#define VMA_STARTGAP_FLAGS mk_vma_flags(VMA_GROWSDOWN_BIT, VMA_SHADOW_STACK_BIT)
 #else
 #define VM_SHADOW_STACK	VM_NONE
+#define VMA_STARTGAP_FLAGS mk_vma_flags(VMA_GROWSDOWN_BIT)
 #endif
 #if defined(CONFIG_PPC64)
 #define VM_SAO		INIT_VM_FLAG(SAO)
@@ -366,6 +368,8 @@ enum {
 /* This mask represents all the VMA flag bits used by mlock */
 #define VM_LOCKED_MASK	(VM_LOCKED | VM_LOCKONFAULT)
 
+#define VMA_LOCKED_MASK	mk_vma_flags(VMA_LOCKED_BIT, VMA_LOCKONFAULT_BIT)
+
 #define RLIMIT_STACK		3	/* max stack size */
 #define RLIMIT_MEMLOCK		8	/* max locked-in-memory address space */
 
diff --git a/tools/testing/vma/include/stubs.h b/tools/testing/vma/include/stubs.h
index b5dced3b0bd4..5afb0afe2d48 100644
--- a/tools/testing/vma/include/stubs.h
+++ b/tools/testing/vma/include/stubs.h
@@ -229,7 +229,7 @@ static inline bool signal_pending(void *p)
 	return false;
 }
 
-static inline bool is_file_hugepages(struct file *file)
+static inline bool is_file_hugepages(const struct file *file)
 {
 	return false;
 }
-- 
cgit v1.2.3


From a06eb2f8279e0b2b42799d42041f144377f5a086 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 19:38:40 +0000
Subject: mm/vma: convert vma_modify_flags[_uffd]() to use vma_flags_t

Update the vma_modify_flags() and vma_modify_flags_uffd() functions to
accept a vma_flags_t parameter rather than a vm_flags_t one, and propagate
the changes as needed to implement this change.

Also add vma_flags_reset_once() in replacement of vm_flags_reset_once(). We
still need to be careful here because we need to avoid tearing, so maintain
the assumption that the first system word set of flags are the only ones
that require protection from tearing, and retain this functionality.

We can copy the remainder of VMA flags above 64 bits normally. But
hopefully by the time that happens, we will have replaced the logic that
requires these WRITE_ONCE()'s with something else.

We also replace instances of vm_flags_reset() with a simple write of VMA
flags. We are no longer perform a number of checks, most notable of all the
VMA flags asserts becase:

1. We might be operating on a VMA that is not yet added to the tree.

2. We might be operating on a VMA that is now detached.

3. Really in all but core code, you should be using vma_desc_xxx().

4. Other VMA fields are manipulated with no such checks.

5. It'd be egregious to have to add variants of flag functions just to
   account for cases such as the above, especially when we don't do so for
   other VMA fields. Drivers are the problematic cases and why it was
   especially important (and also for debug as VMA locks were introduced),
   the mmap_prepare work is solving this generally.

Additionally, we can fairly safely assume by this point the soft dirty
flags are being set correctly, so it's reasonable to drop this also.

Finally, update the VMA tests to reflect this.

Link: https://lkml.kernel.org/r/51afbb2b8c3681003cc7926647e37335d793836e.1774034900.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h              | 22 ++++++++++------------
 include/linux/userfaultfd_k.h   |  3 +++
 mm/madvise.c                    | 10 ++++++----
 mm/mlock.c                      | 38 +++++++++++++++++++++-----------------
 mm/mprotect.c                   |  7 +++----
 mm/mseal.c                      | 11 +++++++----
 mm/userfaultfd.c                | 21 ++++++++++++++-------
 mm/vma.c                        | 15 ++++++++-------
 mm/vma.h                        | 15 +++++++--------
 tools/testing/vma/include/dup.h | 22 +++++++++++++---------
 tools/testing/vma/tests/merge.c |  3 +--
 11 files changed, 93 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c6b40dc88918..72bc5016094b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -954,22 +954,20 @@ static inline void vm_flags_reset(struct vm_area_struct *vma,
 	vm_flags_init(vma, flags);
 }
 
-static inline void vm_flags_reset_once(struct vm_area_struct *vma,
-				       vm_flags_t flags)
+static inline void vma_flags_reset_once(struct vm_area_struct *vma,
+					vma_flags_t *flags)
 {
-	vma_assert_write_locked(vma);
-	/*
-	 * If VMA flags exist beyond the first system word, also clear these. It
-	 * is assumed the write once behaviour is required only for the first
-	 * system word.
-	 */
+	const unsigned long word = flags->__vma_flags[0];
+
+	/* It is assumed only the first system word must be written once. */
+	vma_flags_overwrite_word_once(&vma->flags, word);
+	/* The remainder can be copied normally. */
 	if (NUM_VMA_FLAG_BITS > BITS_PER_LONG) {
-		unsigned long *bitmap = vma->flags.__vma_flags;
+		unsigned long *dst = &vma->flags.__vma_flags[1];
+		const unsigned long *src = &flags->__vma_flags[1];
 
-		bitmap_zero(&bitmap[1], NUM_VMA_FLAG_BITS - BITS_PER_LONG);
+		bitmap_copy(dst, src, NUM_VMA_FLAG_BITS - BITS_PER_LONG);
 	}
-
-	vma_flags_overwrite_word_once(&vma->flags, flags);
 }
 
 static inline void vm_flags_set(struct vm_area_struct *vma,
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index fd5f42765497..d83e349900a3 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -23,6 +23,9 @@
 /* The set of all possible UFFD-related VM flags. */
 #define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR)
 
+#define __VMA_UFFD_FLAGS mk_vma_flags(VMA_UFFD_MISSING_BIT, VMA_UFFD_WP_BIT, \
+				      VMA_UFFD_MINOR_BIT)
+
 /*
  * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
  * new flags, since they might collide with O_* ones. We want
diff --git a/mm/madvise.c b/mm/madvise.c
index afe0f01765c4..69708e953cf5 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -151,13 +151,15 @@ static int madvise_update_vma(vm_flags_t new_flags,
 		struct madvise_behavior *madv_behavior)
 {
 	struct vm_area_struct *vma = madv_behavior->vma;
+	vma_flags_t new_vma_flags = legacy_to_vma_flags(new_flags);
 	struct madvise_behavior_range *range = &madv_behavior->range;
 	struct anon_vma_name *anon_name = madv_behavior->anon_name;
 	bool set_new_anon_name = madv_behavior->behavior == __MADV_SET_ANON_VMA_NAME;
 	VMA_ITERATOR(vmi, madv_behavior->mm, range->start);
 
-	if (new_flags == vma->vm_flags && (!set_new_anon_name ||
-			anon_vma_name_eq(anon_vma_name(vma), anon_name)))
+	if (vma_flags_same_mask(&vma->flags, new_vma_flags) &&
+	    (!set_new_anon_name ||
+	     anon_vma_name_eq(anon_vma_name(vma), anon_name)))
 		return 0;
 
 	if (set_new_anon_name)
@@ -165,7 +167,7 @@ static int madvise_update_vma(vm_flags_t new_flags,
 			range->start, range->end, anon_name);
 	else
 		vma = vma_modify_flags(&vmi, madv_behavior->prev, vma,
-			range->start, range->end, &new_flags);
+			range->start, range->end, &new_vma_flags);
 
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
@@ -174,7 +176,7 @@ static int madvise_update_vma(vm_flags_t new_flags,
 
 	/* vm_flags is protected by the mmap_lock held in write mode. */
 	vma_start_write(vma);
-	vm_flags_reset(vma, new_flags);
+	vma->flags = new_vma_flags;
 	if (set_new_anon_name)
 		return replace_anon_vma_name(vma, anon_name);
 
diff --git a/mm/mlock.c b/mm/mlock.c
index fd648138bc72..fdbd1434a35f 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -415,13 +415,14 @@ out:
  * @vma - vma containing range to be mlock()ed or munlock()ed
  * @start - start address in @vma of the range
  * @end - end of range in @vma
- * @newflags - the new set of flags for @vma.
+ * @new_vma_flags - the new set of flags for @vma.
  *
  * Called for mlock(), mlock2() and mlockall(), to set @vma VM_LOCKED;
  * called for munlock() and munlockall(), to clear VM_LOCKED from @vma.
  */
 static void mlock_vma_pages_range(struct vm_area_struct *vma,
-	unsigned long start, unsigned long end, vm_flags_t newflags)
+	unsigned long start, unsigned long end,
+	vma_flags_t *new_vma_flags)
 {
 	static const struct mm_walk_ops mlock_walk_ops = {
 		.pmd_entry = mlock_pte_range,
@@ -439,18 +440,18 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma,
 	 * combination should not be visible to other mmap_lock users;
 	 * but WRITE_ONCE so rmap walkers must see VM_IO if VM_LOCKED.
 	 */
-	if (newflags & VM_LOCKED)
-		newflags |= VM_IO;
+	if (vma_flags_test(new_vma_flags, VMA_LOCKED_BIT))
+		vma_flags_set(new_vma_flags, VMA_IO_BIT);
 	vma_start_write(vma);
-	vm_flags_reset_once(vma, newflags);
+	vma_flags_reset_once(vma, new_vma_flags);
 
 	lru_add_drain();
 	walk_page_range(vma->vm_mm, start, end, &mlock_walk_ops, NULL);
 	lru_add_drain();
 
-	if (newflags & VM_IO) {
-		newflags &= ~VM_IO;
-		vm_flags_reset_once(vma, newflags);
+	if (vma_flags_test(new_vma_flags, VMA_IO_BIT)) {
+		vma_flags_clear(new_vma_flags, VMA_IO_BIT);
+		vma_flags_reset_once(vma, new_vma_flags);
 	}
 }
 
@@ -467,20 +468,22 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	       struct vm_area_struct **prev, unsigned long start,
 	       unsigned long end, vm_flags_t newflags)
 {
+	vma_flags_t new_vma_flags = legacy_to_vma_flags(newflags);
+	const vma_flags_t old_vma_flags = vma->flags;
 	struct mm_struct *mm = vma->vm_mm;
 	int nr_pages;
 	int ret = 0;
-	vm_flags_t oldflags = vma->vm_flags;
 
-	if (newflags == oldflags || vma_is_secretmem(vma) ||
-	    !vma_supports_mlock(vma))
+	if (vma_flags_same_pair(&old_vma_flags, &new_vma_flags) ||
+	    vma_is_secretmem(vma) || !vma_supports_mlock(vma)) {
 		/*
 		 * Don't set VM_LOCKED or VM_LOCKONFAULT and don't count.
 		 * For secretmem, don't allow the memory to be unlocked.
 		 */
 		goto out;
+	}
 
-	vma = vma_modify_flags(vmi, *prev, vma, start, end, &newflags);
+	vma = vma_modify_flags(vmi, *prev, vma, start, end, &new_vma_flags);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto out;
@@ -490,9 +493,9 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	 * Keep track of amount of locked VM.
 	 */
 	nr_pages = (end - start) >> PAGE_SHIFT;
-	if (!(newflags & VM_LOCKED))
+	if (!vma_flags_test(&new_vma_flags, VMA_LOCKED_BIT))
 		nr_pages = -nr_pages;
-	else if (oldflags & VM_LOCKED)
+	else if (vma_flags_test(&old_vma_flags, VMA_LOCKED_BIT))
 		nr_pages = 0;
 	mm->locked_vm += nr_pages;
 
@@ -501,12 +504,13 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	 * It's okay if try_to_unmap_one unmaps a page just after we
 	 * set VM_LOCKED, populate_vma_page_range will bring it back.
 	 */
-	if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) {
+	if (vma_flags_test(&new_vma_flags, VMA_LOCKED_BIT) &&
+	    vma_flags_test(&old_vma_flags, VMA_LOCKED_BIT)) {
 		/* No work to do, and mlocking twice would be wrong */
 		vma_start_write(vma);
-		vm_flags_reset(vma, newflags);
+		vma->flags = new_vma_flags;
 	} else {
-		mlock_vma_pages_range(vma, start, end, newflags);
+		mlock_vma_pages_range(vma, start, end, &new_vma_flags);
 	}
 out:
 	*prev = vma;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index eaa724b99908..941f1211da0d 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -756,13 +756,11 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
 		vma_flags_clear(&new_vma_flags, VMA_ACCOUNT_BIT);
 	}
 
-	newflags = vma_flags_to_legacy(new_vma_flags);
-	vma = vma_modify_flags(vmi, *pprev, vma, start, end, &newflags);
+	vma = vma_modify_flags(vmi, *pprev, vma, start, end, &new_vma_flags);
 	if (IS_ERR(vma)) {
 		error = PTR_ERR(vma);
 		goto fail;
 	}
-	new_vma_flags = legacy_to_vma_flags(newflags);
 
 	*pprev = vma;
 
@@ -771,7 +769,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
 	 * held in write mode.
 	 */
 	vma_start_write(vma);
-	vm_flags_reset_once(vma, newflags);
+	vma_flags_reset_once(vma, &new_vma_flags);
 	if (vma_wants_manual_pte_write_upgrade(vma))
 		mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE;
 	vma_set_page_prot(vma);
@@ -796,6 +794,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
 	}
 
 	vm_stat_account(mm, vma_flags_to_legacy(old_vma_flags), -nrpages);
+	newflags = vma_flags_to_legacy(new_vma_flags);
 	vm_stat_account(mm, newflags, nrpages);
 	perf_event_mmap(vma);
 	return 0;
diff --git a/mm/mseal.c b/mm/mseal.c
index ac58643181f7..e2093ae3d25c 100644
--- a/mm/mseal.c
+++ b/mm/mseal.c
@@ -68,14 +68,17 @@ static int mseal_apply(struct mm_struct *mm,
 		const unsigned long curr_start = MAX(vma->vm_start, start);
 		const unsigned long curr_end = MIN(vma->vm_end, end);
 
-		if (!(vma->vm_flags & VM_SEALED)) {
-			vm_flags_t vm_flags = vma->vm_flags | VM_SEALED;
+		if (!vma_test(vma, VMA_SEALED_BIT)) {
+			vma_flags_t vma_flags = vma->flags;
+
+			vma_flags_set(&vma_flags, VMA_SEALED_BIT);
 
 			vma = vma_modify_flags(&vmi, prev, vma, curr_start,
-					       curr_end, &vm_flags);
+					       curr_end, &vma_flags);
 			if (IS_ERR(vma))
 				return PTR_ERR(vma);
-			vm_flags_set(vma, VM_SEALED);
+			vma_start_write(vma);
+			vma_set_flags(vma, VMA_SEALED_BIT);
 		}
 
 		prev = vma;
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 2c565c7134b6..89879c3ba344 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1976,6 +1976,9 @@ struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi,
 {
 	struct vm_area_struct *ret;
 	bool give_up_on_oom = false;
+	vma_flags_t new_vma_flags = vma->flags;
+
+	vma_flags_clear_mask(&new_vma_flags, __VMA_UFFD_FLAGS);
 
 	/*
 	 * If we are modifying only and not splitting, just give up on the merge
@@ -1989,8 +1992,8 @@ struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi,
 		uffd_wp_range(vma, start, end - start, false);
 
 	ret = vma_modify_flags_uffd(vmi, prev, vma, start, end,
-				    vma->vm_flags & ~__VM_UFFD_FLAGS,
-				    NULL_VM_UFFD_CTX, give_up_on_oom);
+				    &new_vma_flags, NULL_VM_UFFD_CTX,
+				    give_up_on_oom);
 
 	/*
 	 * In the vma_merge() successful mprotect-like case 8:
@@ -2010,10 +2013,11 @@ int userfaultfd_register_range(struct userfaultfd_ctx *ctx,
 			       unsigned long start, unsigned long end,
 			       bool wp_async)
 {
+	vma_flags_t vma_flags = legacy_to_vma_flags(vm_flags);
 	VMA_ITERATOR(vmi, ctx->mm, start);
 	struct vm_area_struct *prev = vma_prev(&vmi);
 	unsigned long vma_end;
-	vm_flags_t new_flags;
+	vma_flags_t new_vma_flags;
 
 	if (vma->vm_start < start)
 		prev = vma;
@@ -2024,23 +2028,26 @@ int userfaultfd_register_range(struct userfaultfd_ctx *ctx,
 		VM_WARN_ON_ONCE(!vma_can_userfault(vma, vm_flags, wp_async));
 		VM_WARN_ON_ONCE(vma->vm_userfaultfd_ctx.ctx &&
 				vma->vm_userfaultfd_ctx.ctx != ctx);
-		VM_WARN_ON_ONCE(!(vma->vm_flags & VM_MAYWRITE));
+		VM_WARN_ON_ONCE(!vma_test(vma, VMA_MAYWRITE_BIT));
 
 		/*
 		 * Nothing to do: this vma is already registered into this
 		 * userfaultfd and with the right tracking mode too.
 		 */
 		if (vma->vm_userfaultfd_ctx.ctx == ctx &&
-		    (vma->vm_flags & vm_flags) == vm_flags)
+		    vma_test_all_mask(vma, vma_flags))
 			goto skip;
 
 		if (vma->vm_start > start)
 			start = vma->vm_start;
 		vma_end = min(end, vma->vm_end);
 
-		new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags;
+		new_vma_flags = vma->flags;
+		vma_flags_clear_mask(&new_vma_flags, __VMA_UFFD_FLAGS);
+		vma_flags_set_mask(&new_vma_flags, vma_flags);
+
 		vma = vma_modify_flags_uffd(&vmi, prev, vma, start, vma_end,
-					    new_flags,
+					    &new_vma_flags,
 					    (struct vm_userfaultfd_ctx){ctx},
 					    /* give_up_on_oom = */false);
 		if (IS_ERR(vma))
diff --git a/mm/vma.c b/mm/vma.c
index 9d194f8e7acb..16a1d708c978 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -1710,13 +1710,13 @@ static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg)
 struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi,
 		struct vm_area_struct *prev, struct vm_area_struct *vma,
 		unsigned long start, unsigned long end,
-		vm_flags_t *vm_flags_ptr)
+		vma_flags_t *vma_flags_ptr)
 {
 	VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
-	const vm_flags_t vm_flags = *vm_flags_ptr;
+	const vma_flags_t vma_flags = *vma_flags_ptr;
 	struct vm_area_struct *ret;
 
-	vmg.vm_flags = vm_flags;
+	vmg.vma_flags = vma_flags;
 
 	ret = vma_modify(&vmg);
 	if (IS_ERR(ret))
@@ -1728,7 +1728,7 @@ struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi,
 	 * them to the caller.
 	 */
 	if (vmg.state == VMA_MERGE_SUCCESS)
-		*vm_flags_ptr = ret->vm_flags;
+		*vma_flags_ptr = ret->flags;
 	return ret;
 }
 
@@ -1758,12 +1758,13 @@ struct vm_area_struct *vma_modify_policy(struct vma_iterator *vmi,
 
 struct vm_area_struct *vma_modify_flags_uffd(struct vma_iterator *vmi,
 		struct vm_area_struct *prev, struct vm_area_struct *vma,
-		unsigned long start, unsigned long end, vm_flags_t vm_flags,
-		struct vm_userfaultfd_ctx new_ctx, bool give_up_on_oom)
+		unsigned long start, unsigned long end,
+		const vma_flags_t *vma_flags, struct vm_userfaultfd_ctx new_ctx,
+		bool give_up_on_oom)
 {
 	VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
 
-	vmg.vm_flags = vm_flags;
+	vmg.vma_flags = *vma_flags;
 	vmg.uffd_ctx = new_ctx;
 	if (give_up_on_oom)
 		vmg.give_up_on_oom = true;
diff --git a/mm/vma.h b/mm/vma.h
index 1f2de6cb3b97..270008e5babc 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -342,24 +342,23 @@ void unmap_region(struct unmap_desc *unmap);
  * @vma: The VMA containing the range @start to @end to be updated.
  * @start: The start of the range to update. May be offset within @vma.
  * @end: The exclusive end of the range to update, may be offset within @vma.
- * @vm_flags_ptr: A pointer to the VMA flags that the @start to @end range is
+ * @vma_flags_ptr: A pointer to the VMA flags that the @start to @end range is
  * about to be set to. On merge, this will be updated to include sticky flags.
  *
  * IMPORTANT: The actual modification being requested here is NOT applied,
  * rather the VMA is perhaps split, perhaps merged to accommodate the change,
  * and the caller is expected to perform the actual modification.
  *
- * In order to account for sticky VMA flags, the @vm_flags_ptr parameter points
+ * In order to account for sticky VMA flags, the @vma_flags_ptr parameter points
  * to the requested flags which are then updated so the caller, should they
  * overwrite any existing flags, correctly retains these.
  *
  * Returns: A VMA which contains the range @start to @end ready to have its
- * flags altered to *@vm_flags.
+ * flags altered to *@vma_flags.
  */
 __must_check struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi,
 		struct vm_area_struct *prev, struct vm_area_struct *vma,
-		unsigned long start, unsigned long end,
-		vm_flags_t *vm_flags_ptr);
+		unsigned long start, unsigned long end, vma_flags_t *vma_flags_ptr);
 
 /**
  * vma_modify_name() - Perform any necessary split/merge in preparation for
@@ -418,7 +417,7 @@ __must_check struct vm_area_struct *vma_modify_policy(struct vma_iterator *vmi,
  * @vma: The VMA containing the range @start to @end to be updated.
  * @start: The start of the range to update. May be offset within @vma.
  * @end: The exclusive end of the range to update, may be offset within @vma.
- * @vm_flags: The VMA flags that the @start to @end range is about to be set to.
+ * @vma_flags: The VMA flags that the @start to @end range is about to be set to.
  * @new_ctx: The userfaultfd context that the @start to @end range is about to
  * be set to.
  * @give_up_on_oom: If an out of memory condition occurs on merge, simply give
@@ -429,11 +428,11 @@ __must_check struct vm_area_struct *vma_modify_policy(struct vma_iterator *vmi,
  * and the caller is expected to perform the actual modification.
  *
  * Returns: A VMA which contains the range @start to @end ready to have its VMA
- * flags changed to @vm_flags and its userfaultfd context changed to @new_ctx.
+ * flags changed to @vma_flags and its userfaultfd context changed to @new_ctx.
  */
 __must_check struct vm_area_struct *vma_modify_flags_uffd(struct vma_iterator *vmi,
 		struct vm_area_struct *prev, struct vm_area_struct *vma,
-		unsigned long start, unsigned long end, vm_flags_t vm_flags,
+		unsigned long start, unsigned long end, const vma_flags_t *vma_flags,
 		struct vm_userfaultfd_ctx new_ctx, bool give_up_on_oom);
 
 __must_check struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg);
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 58a621ec389f..9dd57f50ea6d 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -871,16 +871,20 @@ static inline void vm_flags_reset(struct vm_area_struct *vma,
 	vm_flags_init(vma, flags);
 }
 
-static inline void vm_flags_reset_once(struct vm_area_struct *vma,
-				       vm_flags_t flags)
+static inline void vma_flags_reset_once(struct vm_area_struct *vma,
+					vma_flags_t *flags)
 {
-	vma_assert_write_locked(vma);
-	/*
-	 * The user should only be interested in avoiding reordering of
-	 * assignment to the first word.
-	 */
-	vma_flags_clear_all(&vma->flags);
-	vma_flags_overwrite_word_once(&vma->flags, flags);
+	const unsigned long word = flags->__vma_flags[0];
+
+	/* It is assumed only the first system word must be written once. */
+	vma_flags_overwrite_word_once(&vma->flags, word);
+	/* The remainder can be copied normally. */
+	if (NUM_VMA_FLAG_BITS > BITS_PER_LONG) {
+		unsigned long *dst = &vma->flags.__vma_flags[1];
+		const unsigned long *src = &flags->__vma_flags[1];
+
+		bitmap_copy(dst, src, NUM_VMA_FLAG_BITS - BITS_PER_LONG);
+	}
 }
 
 static inline void vm_flags_set(struct vm_area_struct *vma,
diff --git a/tools/testing/vma/tests/merge.c b/tools/testing/vma/tests/merge.c
index 44e3977e3fc0..03b6f9820e0a 100644
--- a/tools/testing/vma/tests/merge.c
+++ b/tools/testing/vma/tests/merge.c
@@ -132,7 +132,6 @@ static bool test_simple_modify(void)
 	struct vm_area_struct *vma;
 	vma_flags_t vma_flags = mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, VMA_MAYREAD_BIT,
 					     VMA_MAYWRITE_BIT);
-	vm_flags_t legacy_flags = VM_READ | VM_WRITE;
 	struct mm_struct mm = {};
 	struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, vma_flags);
 	VMA_ITERATOR(vmi, &mm, 0x1000);
@@ -144,7 +143,7 @@ static bool test_simple_modify(void)
 	 * performs the merge/split only.
 	 */
 	vma = vma_modify_flags(&vmi, init_vma, init_vma,
-			       0x1000, 0x2000, &legacy_flags);
+			       0x1000, 0x2000, &vma_flags);
 	ASSERT_NE(vma, NULL);
 	/* We modify the provided VMA, and on split allocate new VMAs. */
 	ASSERT_EQ(vma, init_vma);
-- 
cgit v1.2.3


From 90cb921c4d7bf92854344d3e76561f48784c613e Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 19:38:41 +0000
Subject: mm/vma: convert __mmap_region() to use vma_flags_t

Update the mmap() implementation logic implemented in __mmap_region() and
functions invoked by it.  The mmap_region() function converts its input
vm_flags_t parameter to a vma_flags_t value which it then passes to
__mmap_region() which uses the vma_flags_t value consistently from then
on.

As part of the change, we convert map_deny_write_exec() to using
vma_flags_t (it was incorrectly using unsigned long before), and place it
in vma.h, as it is only used internal to mm.

With this change, we eliminate the legacy is_shared_maywrite_vm_flags()
helper function which is now no longer required.

We are also able to update the MMAP_STATE() and VMG_MMAP_STATE() macros to
use the vma_flags_t value.

Finally, we update the VMA tests to reflect the change.

Link: https://lkml.kernel.org/r/1fc33a404c962f02da778da100387cc19bd62153.1774034900.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h              | 18 ++++++++++-----
 include/linux/mman.h            | 49 ---------------------------------------
 mm/mprotect.c                   |  4 +++-
 mm/vma.c                        | 25 ++++++++++----------
 mm/vma.h                        | 51 +++++++++++++++++++++++++++++++++++++++++
 tools/testing/vma/include/dup.h | 34 ++++++---------------------
 tools/testing/vma/tests/mmap.c  | 18 +++++----------
 7 files changed, 92 insertions(+), 107 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 72bc5016094b..9472b3c9a22b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1522,12 +1522,6 @@ static inline bool vma_is_accessible(const struct vm_area_struct *vma)
 	return vma->vm_flags & VM_ACCESS_FLAGS;
 }
 
-static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags)
-{
-	return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
-		(VM_SHARED | VM_MAYWRITE);
-}
-
 static inline bool is_shared_maywrite(const vma_flags_t *flags)
 {
 	return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT);
@@ -4335,12 +4329,24 @@ static inline bool range_in_vma(const struct vm_area_struct *vma,
 
 #ifdef CONFIG_MMU
 pgprot_t vm_get_page_prot(vm_flags_t vm_flags);
+
+static inline pgprot_t vma_get_page_prot(vma_flags_t vma_flags)
+{
+	const vm_flags_t vm_flags = vma_flags_to_legacy(vma_flags);
+
+	return vm_get_page_prot(vm_flags);
+}
+
 void vma_set_page_prot(struct vm_area_struct *vma);
 #else
 static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
 {
 	return __pgprot(0);
 }
+static inline pgprot_t vma_get_page_prot(vma_flags_t vma_flags)
+{
+	return __pgprot(0);
+}
 static inline void vma_set_page_prot(struct vm_area_struct *vma)
 {
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 0ba8a7e8b90a..389521594c69 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -170,53 +170,4 @@ static inline bool arch_memory_deny_write_exec_supported(void)
 }
 #define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported
 #endif
-
-/*
- * Denies creating a writable executable mapping or gaining executable permissions.
- *
- * This denies the following:
- *
- * 	a)	mmap(PROT_WRITE | PROT_EXEC)
- *
- *	b)	mmap(PROT_WRITE)
- *		mprotect(PROT_EXEC)
- *
- *	c)	mmap(PROT_WRITE)
- *		mprotect(PROT_READ)
- *		mprotect(PROT_EXEC)
- *
- * But allows the following:
- *
- *	d)	mmap(PROT_READ | PROT_EXEC)
- *		mmap(PROT_READ | PROT_EXEC | PROT_BTI)
- *
- * This is only applicable if the user has set the Memory-Deny-Write-Execute
- * (MDWE) protection mask for the current process.
- *
- * @old specifies the VMA flags the VMA originally possessed, and @new the ones
- * we propose to set.
- *
- * Return: false if proposed change is OK, true if not ok and should be denied.
- */
-static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
-{
-	/* If MDWE is disabled, we have nothing to deny. */
-	if (!mm_flags_test(MMF_HAS_MDWE, current->mm))
-		return false;
-
-	/* If the new VMA is not executable, we have nothing to deny. */
-	if (!(new & VM_EXEC))
-		return false;
-
-	/* Under MDWE we do not accept newly writably executable VMAs... */
-	if (new & VM_WRITE)
-		return true;
-
-	/* ...nor previously non-executable VMAs becoming executable. */
-	if (!(old & VM_EXEC))
-		return true;
-
-	return false;
-}
-
 #endif /* _LINUX_MMAN_H */
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 941f1211da0d..007d9a72b2f0 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -882,6 +882,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
 	tmp = vma->vm_start;
 	for_each_vma_range(vmi, vma, end) {
 		vm_flags_t mask_off_old_flags;
+		vma_flags_t new_vma_flags;
 		vm_flags_t newflags;
 		int new_vma_pkey;
 
@@ -904,6 +905,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
 		new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey);
 		newflags = calc_vm_prot_bits(prot, new_vma_pkey);
 		newflags |= (vma->vm_flags & ~mask_off_old_flags);
+		new_vma_flags = legacy_to_vma_flags(newflags);
 
 		/* newflags >> 4 shift VM_MAY% in place of VM_% */
 		if ((newflags & ~(newflags >> 4)) & VM_ACCESS_FLAGS) {
@@ -911,7 +913,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
 			break;
 		}
 
-		if (map_deny_write_exec(vma->vm_flags, newflags)) {
+		if (map_deny_write_exec(&vma->flags, &new_vma_flags)) {
 			error = -EACCES;
 			break;
 		}
diff --git a/mm/vma.c b/mm/vma.c
index 16a1d708c978..c335f989586f 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -44,7 +44,7 @@ struct mmap_state {
 	bool file_doesnt_need_get :1;
 };
 
-#define MMAP_STATE(name, mm_, vmi_, addr_, len_, pgoff_, vm_flags_, file_) \
+#define MMAP_STATE(name, mm_, vmi_, addr_, len_, pgoff_, vma_flags_, file_) \
 	struct mmap_state name = {					\
 		.mm = mm_,						\
 		.vmi = vmi_,						\
@@ -52,9 +52,9 @@ struct mmap_state {
 		.end = (addr_) + (len_),				\
 		.pgoff = pgoff_,					\
 		.pglen = PHYS_PFN(len_),				\
-		.vm_flags = vm_flags_,					\
+		.vma_flags = vma_flags_,				\
 		.file = file_,						\
-		.page_prot = vm_get_page_prot(vm_flags_),		\
+		.page_prot = vma_get_page_prot(vma_flags_),		\
 	}
 
 #define VMG_MMAP_STATE(name, map_, vma_)				\
@@ -63,7 +63,7 @@ struct mmap_state {
 		.vmi = (map_)->vmi,					\
 		.start = (map_)->addr,					\
 		.end = (map_)->end,					\
-		.vm_flags = (map_)->vm_flags,				\
+		.vma_flags = (map_)->vma_flags,				\
 		.pgoff = (map_)->pgoff,					\
 		.file = (map_)->file,					\
 		.prev = (map_)->prev,					\
@@ -2746,14 +2746,14 @@ static int call_action_complete(struct mmap_state *map,
 }
 
 static unsigned long __mmap_region(struct file *file, unsigned long addr,
-		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-		struct list_head *uf)
+		unsigned long len, vma_flags_t vma_flags,
+		unsigned long pgoff, struct list_head *uf)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma = NULL;
 	bool have_mmap_prepare = file && file->f_op->mmap_prepare;
 	VMA_ITERATOR(vmi, mm, addr);
-	MMAP_STATE(map, mm, &vmi, addr, len, pgoff, vm_flags, file);
+	MMAP_STATE(map, mm, &vmi, addr, len, pgoff, vma_flags, file);
 	struct vm_area_desc desc = {
 		.mm = mm,
 		.file = file,
@@ -2837,16 +2837,17 @@ abort_munmap:
  * been performed.
  */
 unsigned long mmap_region(struct file *file, unsigned long addr,
-			  unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-			  struct list_head *uf)
+			  unsigned long len, vm_flags_t vm_flags,
+			  unsigned long pgoff, struct list_head *uf)
 {
 	unsigned long ret;
 	bool writable_file_mapping = false;
+	const vma_flags_t vma_flags = legacy_to_vma_flags(vm_flags);
 
 	mmap_assert_write_locked(current->mm);
 
 	/* Check to see if MDWE is applicable. */
-	if (map_deny_write_exec(vm_flags, vm_flags))
+	if (map_deny_write_exec(&vma_flags, &vma_flags))
 		return -EACCES;
 
 	/* Allow architectures to sanity-check the vm_flags. */
@@ -2854,7 +2855,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		return -EINVAL;
 
 	/* Map writable and ensure this isn't a sealed memfd. */
-	if (file && is_shared_maywrite_vm_flags(vm_flags)) {
+	if (file && is_shared_maywrite(&vma_flags)) {
 		int error = mapping_map_writable(file->f_mapping);
 
 		if (error)
@@ -2862,7 +2863,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		writable_file_mapping = true;
 	}
 
-	ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf);
+	ret = __mmap_region(file, addr, len, vma_flags, pgoff, uf);
 
 	/* Clear our write mapping regardless of error. */
 	if (writable_file_mapping)
diff --git a/mm/vma.h b/mm/vma.h
index 270008e5babc..adc18f7dd9f1 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -704,4 +704,55 @@ int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
 int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift);
 #endif
 
+#ifdef CONFIG_MMU
+/*
+ * Denies creating a writable executable mapping or gaining executable permissions.
+ *
+ * This denies the following:
+ *
+ *	a)	mmap(PROT_WRITE | PROT_EXEC)
+ *
+ *	b)	mmap(PROT_WRITE)
+ *		mprotect(PROT_EXEC)
+ *
+ *	c)	mmap(PROT_WRITE)
+ *		mprotect(PROT_READ)
+ *		mprotect(PROT_EXEC)
+ *
+ * But allows the following:
+ *
+ *	d)	mmap(PROT_READ | PROT_EXEC)
+ *		mmap(PROT_READ | PROT_EXEC | PROT_BTI)
+ *
+ * This is only applicable if the user has set the Memory-Deny-Write-Execute
+ * (MDWE) protection mask for the current process.
+ *
+ * @old specifies the VMA flags the VMA originally possessed, and @new the ones
+ * we propose to set.
+ *
+ * Return: false if proposed change is OK, true if not ok and should be denied.
+ */
+static inline bool map_deny_write_exec(const vma_flags_t *old,
+				       const vma_flags_t *new)
+{
+	/* If MDWE is disabled, we have nothing to deny. */
+	if (!mm_flags_test(MMF_HAS_MDWE, current->mm))
+		return false;
+
+	/* If the new VMA is not executable, we have nothing to deny. */
+	if (!vma_flags_test(new, VMA_EXEC_BIT))
+		return false;
+
+	/* Under MDWE we do not accept newly writably executable VMAs... */
+	if (vma_flags_test(new, VMA_WRITE_BIT))
+		return true;
+
+	/* ...nor previously non-executable VMAs becoming executable. */
+	if (!vma_flags_test(old, VMA_EXEC_BIT))
+		return true;
+
+	return false;
+}
+#endif
+
 #endif	/* __MM_VMA_H */
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 9dd57f50ea6d..ab92358b082c 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -1124,12 +1124,6 @@ static __always_inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
 #define vma_desc_clear_flags(desc, ...) \
 	vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
 
-static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags)
-{
-	return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
-		(VM_SHARED | VM_MAYWRITE);
-}
-
 static inline bool is_shared_maywrite(const vma_flags_t *flags)
 {
 	return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT);
@@ -1446,27 +1440,6 @@ static inline bool mlock_future_ok(const struct mm_struct *mm,
 	return locked_pages <= limit_pages;
 }
 
-static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
-{
-	/* If MDWE is disabled, we have nothing to deny. */
-	if (mm_flags_test(MMF_HAS_MDWE, current->mm))
-		return false;
-
-	/* If the new VMA is not executable, we have nothing to deny. */
-	if (!(new & VM_EXEC))
-		return false;
-
-	/* Under MDWE we do not accept newly writably executable VMAs... */
-	if (new & VM_WRITE)
-		return true;
-
-	/* ...nor previously non-executable VMAs becoming executable. */
-	if (!(old & VM_EXEC))
-		return true;
-
-	return false;
-}
-
 static inline int mapping_map_writable(struct address_space *mapping)
 {
 	return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
@@ -1518,3 +1491,10 @@ static inline int get_sysctl_max_map_count(void)
 #ifndef pgtable_supports_soft_dirty
 #define pgtable_supports_soft_dirty()	IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
 #endif
+
+static inline pgprot_t vma_get_page_prot(vma_flags_t vma_flags)
+{
+	const vm_flags_t vm_flags = vma_flags_to_legacy(vma_flags);
+
+	return vm_get_page_prot(vm_flags);
+}
diff --git a/tools/testing/vma/tests/mmap.c b/tools/testing/vma/tests/mmap.c
index bded4ecbe5db..c85bc000d1cb 100644
--- a/tools/testing/vma/tests/mmap.c
+++ b/tools/testing/vma/tests/mmap.c
@@ -2,6 +2,8 @@
 
 static bool test_mmap_region_basic(void)
 {
+	const vma_flags_t vma_flags = mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT,
+			VMA_MAYREAD_BIT, VMA_MAYWRITE_BIT);
 	struct mm_struct mm = {};
 	unsigned long addr;
 	struct vm_area_struct *vma;
@@ -10,27 +12,19 @@ static bool test_mmap_region_basic(void)
 	current->mm = &mm;
 
 	/* Map at 0x300000, length 0x3000. */
-	addr = __mmap_region(NULL, 0x300000, 0x3000,
-			     VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
-			     0x300, NULL);
+	addr = __mmap_region(NULL, 0x300000, 0x3000, vma_flags, 0x300, NULL);
 	ASSERT_EQ(addr, 0x300000);
 
 	/* Map at 0x250000, length 0x3000. */
-	addr = __mmap_region(NULL, 0x250000, 0x3000,
-			     VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
-			     0x250, NULL);
+	addr = __mmap_region(NULL, 0x250000, 0x3000, vma_flags, 0x250, NULL);
 	ASSERT_EQ(addr, 0x250000);
 
 	/* Map at 0x303000, merging to 0x300000 of length 0x6000. */
-	addr = __mmap_region(NULL, 0x303000, 0x3000,
-			     VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
-			     0x303, NULL);
+	addr = __mmap_region(NULL, 0x303000, 0x3000, vma_flags, 0x303, NULL);
 	ASSERT_EQ(addr, 0x303000);
 
 	/* Map at 0x24d000, merging to 0x250000 of length 0x6000. */
-	addr = __mmap_region(NULL, 0x24d000, 0x3000,
-			     VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
-			     0x24d, NULL);
+	addr = __mmap_region(NULL, 0x24d000, 0x3000, vma_flags, 0x24d, NULL);
 	ASSERT_EQ(addr, 0x24d000);
 
 	ASSERT_EQ(mm.map_count, 2);
-- 
cgit v1.2.3


From 3e4bb2706817710d9461394da8b75be79981586b Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 22:39:27 +0000
Subject: mm: various small mmap_prepare cleanups

Patch series "mm: expand mmap_prepare functionality and usage", v4.

This series expands the mmap_prepare functionality, which is intended to
replace the deprecated f_op->mmap hook which has been the source of bugs
and security issues for some time.

This series starts with some cleanup of existing mmap_prepare logic, then
adds documentation for the mmap_prepare call to make it easier for
filesystem and driver writers to understand how it works.

It then importantly adds a vm_ops->mapped hook, a key feature that was
missing from mmap_prepare previously - this is invoked when a driver which
specifies mmap_prepare has successfully been mapped but not merged with
another VMA.

mmap_prepare is invoked prior to a merge being attempted, so you cannot
manipulate state such as reference counts as if it were a new mapping.

The vm_ops->mapped hook allows a driver to perform tasks required at this
stage, and provides symmetry against subsequent vm_ops->open,close calls.

The series uses this to correct the afs implementation which wrongly
manipulated reference count at mmap_prepare time.

It then adds an mmap_prepare equivalent of vm_iomap_memory() -
mmap_action_simple_ioremap(), then uses this to update a number of drivers.

It then splits out the mmap_prepare compatibility layer (which allows for
invocation of mmap_prepare hooks in an mmap() hook) in such a way as to
allow for more incremental implementation of mmap_prepare hooks.

It then uses this to extend mmap_prepare usage in drivers.

Finally it adds an mmap_prepare equivalent of vm_map_pages(), which lays
the foundation for future work which will extend mmap_prepare to DMA
coherent mappings.


This patch (of 21):

Rather than passing arbitrary fields, pass a vm_area_desc pointer to mmap
prepare functions to mmap prepare, and an action and vma pointer to mmap
complete in order to put all the action-specific logic in the function
actually doing the work.

Additionally, allow mmap prepare functions to return an error so we can
error out as soon as possible if there is something logically incorrect in
the input.

Update remap_pfn_range_prepare() to properly check the input range for the
CoW case.

Also remove io_remap_pfn_range_complete(), as we can simply set up the
fields correctly in io_remap_pfn_range_prepare() and use
remap_pfn_range_complete() for this.

While we're here, make remap_pfn_range_prepare_vma() a little neater, and
pass mmap_action directly to call_action_complete().

Then, update compat_vma_mmap() to perform its logic directly, as
__compat_vma_map() is not used by anything so we don't need to export it.

Also update compat_vma_mmap() to use vfs_mmap_prepare() rather than
calling the mmap_prepare op directly.

Finally, update the VMA userland tests to reflect the changes.

Link: https://lkml.kernel.org/r/cover.1774045440.git.ljs@kernel.org
Link: https://lkml.kernel.org/r/99f408e4694f44ab12bdc55fe0bd9685d3bd1117.1774045440.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bodo Stroesser <bostroesser@gmail.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Hildenbrand <david@kernel.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Long Li <longli@microsoft.com>
Cc: Marc Dionne <marc.dionne@auristor.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Cc: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/fs.h                |   2 -
 include/linux/mm.h                |   7 +--
 mm/internal.h                     |  32 +++++-----
 mm/memory.c                       |  45 +++++++++-----
 mm/util.c                         | 121 +++++++++++++++++---------------------
 mm/vma.c                          |  24 ++++----
 tools/testing/vma/include/dup.h   |   7 ++-
 tools/testing/vma/include/stubs.h |   8 +--
 8 files changed, 126 insertions(+), 120 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b3dd145b25e..a2628a12bd2b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2058,8 +2058,6 @@ static inline bool can_mmap_file(struct file *file)
 	return true;
 }
 
-int __compat_vma_mmap(const struct file_operations *f_op,
-		struct file *file, struct vm_area_struct *vma);
 int compat_vma_mmap(struct file *file, struct vm_area_struct *vma);
 
 static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9472b3c9a22b..6ca2fc5ae83f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4304,10 +4304,9 @@ static inline void mmap_action_ioremap_full(struct vm_area_desc *desc,
 	mmap_action_ioremap(desc, desc->start, start_pfn, vma_desc_size(desc));
 }
 
-void mmap_action_prepare(struct mmap_action *action,
-			 struct vm_area_desc *desc);
-int mmap_action_complete(struct mmap_action *action,
-			 struct vm_area_struct *vma);
+int mmap_action_prepare(struct vm_area_desc *desc);
+int mmap_action_complete(struct vm_area_struct *vma,
+			 struct mmap_action *action);
 
 /* Look up the first VMA which exactly match the interval vm_start ... vm_end */
 static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
diff --git a/mm/internal.h b/mm/internal.h
index 9c690f8635da..4dddd89153d4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1839,26 +1839,28 @@ int walk_page_range_debug(struct mm_struct *mm, unsigned long start,
 void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm);
 int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm);
 
-void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn);
-int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr,
-		unsigned long pfn, unsigned long size, pgprot_t pgprot);
+int remap_pfn_range_prepare(struct vm_area_desc *desc);
+int remap_pfn_range_complete(struct vm_area_struct *vma,
+			     struct mmap_action *action);
 
-static inline void io_remap_pfn_range_prepare(struct vm_area_desc *desc,
-		unsigned long orig_pfn, unsigned long size)
+static inline int io_remap_pfn_range_prepare(struct vm_area_desc *desc)
 {
+	struct mmap_action *action = &desc->action;
+	const unsigned long orig_pfn = action->remap.start_pfn;
+	const pgprot_t orig_pgprot = action->remap.pgprot;
+	const unsigned long size = action->remap.size;
 	const unsigned long pfn = io_remap_pfn_range_pfn(orig_pfn, size);
+	int err;
 
-	return remap_pfn_range_prepare(desc, pfn);
-}
+	action->remap.start_pfn = pfn;
+	action->remap.pgprot = pgprot_decrypted(orig_pgprot);
+	err = remap_pfn_range_prepare(desc);
+	if (err)
+		return err;
 
-static inline int io_remap_pfn_range_complete(struct vm_area_struct *vma,
-		unsigned long addr, unsigned long orig_pfn, unsigned long size,
-		pgprot_t orig_prot)
-{
-	const unsigned long pfn = io_remap_pfn_range_pfn(orig_pfn, size);
-	const pgprot_t prot = pgprot_decrypted(orig_prot);
-
-	return remap_pfn_range_complete(vma, addr, pfn, size, prot);
+	/* Remap does the actual work. */
+	action->type = MMAP_REMAP_PFN;
+	return 0;
 }
 
 #ifdef CONFIG_MMU_NOTIFIER
diff --git a/mm/memory.c b/mm/memory.c
index 425e852a2eb7..10a61dd81f97 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3099,26 +3099,34 @@ static int do_remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 }
 #endif
 
-void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn)
+int remap_pfn_range_prepare(struct vm_area_desc *desc)
 {
-	/*
-	 * We set addr=VMA start, end=VMA end here, so this won't fail, but we
-	 * check it again on complete and will fail there if specified addr is
-	 * invalid.
-	 */
-	get_remap_pgoff(vma_desc_is_cow_mapping(desc), desc->start, desc->end,
-			desc->start, desc->end, pfn, &desc->pgoff);
+	const struct mmap_action *action = &desc->action;
+	const unsigned long start = action->remap.start;
+	const unsigned long end = start + action->remap.size;
+	const unsigned long pfn = action->remap.start_pfn;
+	const bool is_cow = vma_desc_is_cow_mapping(desc);
+	int err;
+
+	err = get_remap_pgoff(is_cow, start, end, desc->start, desc->end, pfn,
+			      &desc->pgoff);
+	if (err)
+		return err;
+
 	vma_desc_set_flags_mask(desc, VMA_REMAP_FLAGS);
+	return 0;
 }
 
-static int remap_pfn_range_prepare_vma(struct vm_area_struct *vma, unsigned long addr,
-		unsigned long pfn, unsigned long size)
+static int remap_pfn_range_prepare_vma(struct vm_area_struct *vma,
+				       unsigned long addr, unsigned long pfn,
+				       unsigned long size)
 {
-	unsigned long end = addr + PAGE_ALIGN(size);
+	const unsigned long end = addr + PAGE_ALIGN(size);
+	const bool is_cow = is_cow_mapping(vma->vm_flags);
 	int err;
 
-	err = get_remap_pgoff(is_cow_mapping(vma->vm_flags), addr, end,
-			      vma->vm_start, vma->vm_end, pfn, &vma->vm_pgoff);
+	err = get_remap_pgoff(is_cow, addr, end, vma->vm_start, vma->vm_end,
+			      pfn, &vma->vm_pgoff);
 	if (err)
 		return err;
 
@@ -3151,10 +3159,15 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 }
 EXPORT_SYMBOL(remap_pfn_range);
 
-int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr,
-		unsigned long pfn, unsigned long size, pgprot_t prot)
+int remap_pfn_range_complete(struct vm_area_struct *vma,
+			     struct mmap_action *action)
 {
-	return do_remap_pfn_range(vma, addr, pfn, size, prot);
+	const unsigned long start = action->remap.start;
+	const unsigned long pfn = action->remap.start_pfn;
+	const unsigned long size = action->remap.size;
+	const pgprot_t prot = action->remap.pgprot;
+
+	return do_remap_pfn_range(vma, start, pfn, size, prot);
 }
 
 /**
diff --git a/mm/util.c b/mm/util.c
index ce7ae80047cf..73c97a748d8e 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1163,43 +1163,6 @@ void flush_dcache_folio(struct folio *folio)
 EXPORT_SYMBOL(flush_dcache_folio);
 #endif
 
-/**
- * __compat_vma_mmap() - See description for compat_vma_mmap()
- * for details. This is the same operation, only with a specific file operations
- * struct which may or may not be the same as vma->vm_file->f_op.
- * @f_op: The file operations whose .mmap_prepare() hook is specified.
- * @file: The file which backs or will back the mapping.
- * @vma: The VMA to apply the .mmap_prepare() hook to.
- * Returns: 0 on success or error.
- */
-int __compat_vma_mmap(const struct file_operations *f_op,
-		struct file *file, struct vm_area_struct *vma)
-{
-	struct vm_area_desc desc = {
-		.mm = vma->vm_mm,
-		.file = file,
-		.start = vma->vm_start,
-		.end = vma->vm_end,
-
-		.pgoff = vma->vm_pgoff,
-		.vm_file = vma->vm_file,
-		.vma_flags = vma->flags,
-		.page_prot = vma->vm_page_prot,
-
-		.action.type = MMAP_NOTHING, /* Default */
-	};
-	int err;
-
-	err = f_op->mmap_prepare(&desc);
-	if (err)
-		return err;
-
-	mmap_action_prepare(&desc.action, &desc);
-	set_vma_from_desc(vma, &desc);
-	return mmap_action_complete(&desc.action, vma);
-}
-EXPORT_SYMBOL(__compat_vma_mmap);
-
 /**
  * compat_vma_mmap() - Apply the file's .mmap_prepare() hook to an
  * existing VMA and execute any requested actions.
@@ -1228,7 +1191,31 @@ EXPORT_SYMBOL(__compat_vma_mmap);
  */
 int compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	return __compat_vma_mmap(file->f_op, file, vma);
+	struct vm_area_desc desc = {
+		.mm = vma->vm_mm,
+		.file = file,
+		.start = vma->vm_start,
+		.end = vma->vm_end,
+
+		.pgoff = vma->vm_pgoff,
+		.vm_file = vma->vm_file,
+		.vma_flags = vma->flags,
+		.page_prot = vma->vm_page_prot,
+
+		.action.type = MMAP_NOTHING, /* Default */
+	};
+	int err;
+
+	err = vfs_mmap_prepare(file, &desc);
+	if (err)
+		return err;
+
+	err = mmap_action_prepare(&desc);
+	if (err)
+		return err;
+
+	set_vma_from_desc(vma, &desc);
+	return mmap_action_complete(vma, &desc.action);
 }
 EXPORT_SYMBOL(compat_vma_mmap);
 
@@ -1320,8 +1307,8 @@ again:
 	}
 }
 
-static int mmap_action_finish(struct mmap_action *action,
-		const struct vm_area_struct *vma, int err)
+static int mmap_action_finish(struct vm_area_struct *vma,
+			      struct mmap_action *action, int err)
 {
 	/*
 	 * If an error occurs, unmap the VMA altogether and return an error. We
@@ -1353,37 +1340,38 @@ static int mmap_action_finish(struct mmap_action *action,
 /**
  * mmap_action_prepare - Perform preparatory setup for an VMA descriptor
  * action which need to be performed.
- * @desc: The VMA descriptor to prepare for @action.
- * @action: The action to perform.
+ * @desc: The VMA descriptor to prepare for its @desc->action.
+ *
+ * Returns: %0 on success, otherwise error.
  */
-void mmap_action_prepare(struct mmap_action *action,
-			 struct vm_area_desc *desc)
+int mmap_action_prepare(struct vm_area_desc *desc)
 {
-	switch (action->type) {
+	switch (desc->action.type) {
 	case MMAP_NOTHING:
-		break;
+		return 0;
 	case MMAP_REMAP_PFN:
-		remap_pfn_range_prepare(desc, action->remap.start_pfn);
-		break;
+		return remap_pfn_range_prepare(desc);
 	case MMAP_IO_REMAP_PFN:
-		io_remap_pfn_range_prepare(desc, action->remap.start_pfn,
-					   action->remap.size);
-		break;
+		return io_remap_pfn_range_prepare(desc);
 	}
+
+	WARN_ON_ONCE(1);
+	return -EINVAL;
 }
 EXPORT_SYMBOL(mmap_action_prepare);
 
 /**
  * mmap_action_complete - Execute VMA descriptor action.
- * @action: The action to perform.
  * @vma: The VMA to perform the action upon.
+ * @action: The action to perform.
  *
  * Similar to mmap_action_prepare().
  *
  * Return: 0 on success, or error, at which point the VMA will be unmapped.
  */
-int mmap_action_complete(struct mmap_action *action,
-			 struct vm_area_struct *vma)
+int mmap_action_complete(struct vm_area_struct *vma,
+			 struct mmap_action *action)
+
 {
 	int err = 0;
 
@@ -1391,25 +1379,22 @@ int mmap_action_complete(struct mmap_action *action,
 	case MMAP_NOTHING:
 		break;
 	case MMAP_REMAP_PFN:
-		err = remap_pfn_range_complete(vma, action->remap.start,
-				action->remap.start_pfn, action->remap.size,
-				action->remap.pgprot);
+		err = remap_pfn_range_complete(vma, action);
 		break;
 	case MMAP_IO_REMAP_PFN:
-		err = io_remap_pfn_range_complete(vma, action->remap.start,
-				action->remap.start_pfn, action->remap.size,
-				action->remap.pgprot);
+		/* Should have been delegated. */
+		WARN_ON_ONCE(1);
+		err = -EINVAL;
 		break;
 	}
 
-	return mmap_action_finish(action, vma, err);
+	return mmap_action_finish(vma, action, err);
 }
 EXPORT_SYMBOL(mmap_action_complete);
 #else
-void mmap_action_prepare(struct mmap_action *action,
-			struct vm_area_desc *desc)
+int mmap_action_prepare(struct vm_area_desc *desc)
 {
-	switch (action->type) {
+	switch (desc->action.type) {
 	case MMAP_NOTHING:
 		break;
 	case MMAP_REMAP_PFN:
@@ -1417,11 +1402,13 @@ void mmap_action_prepare(struct mmap_action *action,
 		WARN_ON_ONCE(1); /* nommu cannot handle these. */
 		break;
 	}
+
+	return 0;
 }
 EXPORT_SYMBOL(mmap_action_prepare);
 
-int mmap_action_complete(struct mmap_action *action,
-			struct vm_area_struct *vma)
+int mmap_action_complete(struct vm_area_struct *vma,
+			 struct mmap_action *action)
 {
 	int err = 0;
 
@@ -1436,7 +1423,7 @@ int mmap_action_complete(struct mmap_action *action,
 		break;
 	}
 
-	return mmap_action_finish(action, vma, err);
+	return mmap_action_finish(vma, action, err);
 }
 EXPORT_SYMBOL(mmap_action_complete);
 #endif
diff --git a/mm/vma.c b/mm/vma.c
index a4b30a069153..1e2996a12d7f 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -2640,15 +2640,18 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
 	vma_set_page_prot(vma);
 }
 
-static void call_action_prepare(struct mmap_state *map,
-				struct vm_area_desc *desc)
+static int call_action_prepare(struct mmap_state *map,
+			       struct vm_area_desc *desc)
 {
-	struct mmap_action *action = &desc->action;
+	int err;
 
-	mmap_action_prepare(action, desc);
+	err = mmap_action_prepare(desc);
+	if (err)
+		return err;
 
-	if (action->hide_from_rmap_until_complete)
+	if (desc->action.hide_from_rmap_until_complete)
 		map->hold_file_rmap_lock = true;
+	return 0;
 }
 
 /*
@@ -2672,7 +2675,9 @@ static int call_mmap_prepare(struct mmap_state *map,
 	if (err)
 		return err;
 
-	call_action_prepare(map, desc);
+	err = call_action_prepare(map, desc);
+	if (err)
+		return err;
 
 	/* Update fields permitted to be changed. */
 	map->pgoff = desc->pgoff;
@@ -2727,13 +2732,12 @@ static bool can_set_ksm_flags_early(struct mmap_state *map)
 }
 
 static int call_action_complete(struct mmap_state *map,
-				struct vm_area_desc *desc,
+				struct mmap_action *action,
 				struct vm_area_struct *vma)
 {
-	struct mmap_action *action = &desc->action;
 	int ret;
 
-	ret = mmap_action_complete(action, vma);
+	ret = mmap_action_complete(vma, action);
 
 	/* If we held the file rmap we need to release it. */
 	if (map->hold_file_rmap_lock) {
@@ -2795,7 +2799,7 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
 	__mmap_complete(&map, vma);
 
 	if (have_mmap_prepare && allocated_new) {
-		error = call_action_complete(&map, &desc, vma);
+		error = call_action_complete(&map, &desc.action, vma);
 
 		if (error)
 			return error;
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index ab92358b082c..e7581efaf470 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -1277,9 +1277,12 @@ static inline int __compat_vma_mmap(const struct file_operations *f_op,
 	if (err)
 		return err;
 
-	mmap_action_prepare(&desc.action, &desc);
+	err = mmap_action_prepare(&desc);
+	if (err)
+		return err;
+
 	set_vma_from_desc(vma, &desc);
-	return mmap_action_complete(&desc.action, vma);
+	return mmap_action_complete(vma, &desc.action);
 }
 
 static inline int compat_vma_mmap(struct file *file,
diff --git a/tools/testing/vma/include/stubs.h b/tools/testing/vma/include/stubs.h
index 5afb0afe2d48..a30b8bc84955 100644
--- a/tools/testing/vma/include/stubs.h
+++ b/tools/testing/vma/include/stubs.h
@@ -81,13 +81,13 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma)
 {
 }
 
-static inline void mmap_action_prepare(struct mmap_action *action,
-					   struct vm_area_desc *desc)
+static inline int mmap_action_prepare(struct vm_area_desc *desc)
 {
+	return 0;
 }
 
-static inline int mmap_action_complete(struct mmap_action *action,
-					   struct vm_area_struct *vma)
+static inline int mmap_action_complete(struct vm_area_struct *vma,
+				       struct mmap_action *action)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 827e97cf4bf59e9a72bcec37944bcebb3139a457 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 22:39:29 +0000
Subject: mm: document vm_operations_struct->open the same as close()

Describe when the operation is invoked and the context in which it is
invoked, matching the description already added for vm_op->close().

While we're here, update all outdated references to an 'area' field for
VMAs to the more consistent 'vma'.

Link: https://lkml.kernel.org/r/7d0ca833c12014320f0fa00f816f95e6e10076f2.1774045440.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bodo Stroesser <bostroesser@gmail.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Hildenbrand <david@kernel.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Long Li <longli@microsoft.com>
Cc: Marc Dionne <marc.dionne@auristor.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Cc: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h              | 15 ++++++++++-----
 tools/testing/vma/include/dup.h | 15 ++++++++++-----
 2 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6ca2fc5ae83f..21a2eef5f8fe 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -764,15 +764,20 @@ struct vm_fault {
  * to the functions called when a no-page or a wp-page exception occurs.
  */
 struct vm_operations_struct {
-	void (*open)(struct vm_area_struct * area);
+	/**
+	 * @open: Called when a VMA is remapped, split or forked. Not called
+	 * upon first mapping a VMA.
+	 * Context: User context.  May sleep.  Caller holds mmap_lock.
+	 */
+	void (*open)(struct vm_area_struct *vma);
 	/**
 	 * @close: Called when the VMA is being removed from the MM.
 	 * Context: User context.  May sleep.  Caller holds mmap_lock.
 	 */
-	void (*close)(struct vm_area_struct * area);
+	void (*close)(struct vm_area_struct *vma);
 	/* Called any time before splitting to check if it's allowed */
-	int (*may_split)(struct vm_area_struct *area, unsigned long addr);
-	int (*mremap)(struct vm_area_struct *area);
+	int (*may_split)(struct vm_area_struct *vma, unsigned long addr);
+	int (*mremap)(struct vm_area_struct *vma);
 	/*
 	 * Called by mprotect() to make driver-specific permission
 	 * checks before mprotect() is finalised.   The VMA must not
@@ -784,7 +789,7 @@ struct vm_operations_struct {
 	vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
 	vm_fault_t (*map_pages)(struct vm_fault *vmf,
 			pgoff_t start_pgoff, pgoff_t end_pgoff);
-	unsigned long (*pagesize)(struct vm_area_struct * area);
+	unsigned long (*pagesize)(struct vm_area_struct *vma);
 
 	/* notification that a previously read-only page is about to become
 	 * writable, if an error is returned it will cause a SIGBUS */
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index e7581efaf470..5bc04c801504 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -632,15 +632,20 @@ struct vm_area_struct {
 } __randomize_layout;
 
 struct vm_operations_struct {
-	void (*open)(struct vm_area_struct * area);
+	/**
+	 * @open: Called when a VMA is remapped, split or forked. Not called
+	 * upon first mapping a VMA.
+	 * Context: User context.  May sleep.  Caller holds mmap_lock.
+	 */
+	void (*open)(struct vm_area_struct *vma);
 	/**
 	 * @close: Called when the VMA is being removed from the MM.
 	 * Context: User context.  May sleep.  Caller holds mmap_lock.
 	 */
-	void (*close)(struct vm_area_struct * area);
+	void (*close)(struct vm_area_struct *vma);
 	/* Called any time before splitting to check if it's allowed */
-	int (*may_split)(struct vm_area_struct *area, unsigned long addr);
-	int (*mremap)(struct vm_area_struct *area);
+	int (*may_split)(struct vm_area_struct *vma, unsigned long addr);
+	int (*mremap)(struct vm_area_struct *vma);
 	/*
 	 * Called by mprotect() to make driver-specific permission
 	 * checks before mprotect() is finalised.   The VMA must not
@@ -652,7 +657,7 @@ struct vm_operations_struct {
 	vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
 	vm_fault_t (*map_pages)(struct vm_fault *vmf,
 			pgoff_t start_pgoff, pgoff_t end_pgoff);
-	unsigned long (*pagesize)(struct vm_area_struct * area);
+	unsigned long (*pagesize)(struct vm_area_struct *vma);
 
 	/* notification that a previously read-only page is about to become
 	 * writable, if an error is returned it will cause a SIGBUS */
-- 
cgit v1.2.3


From c50ca15dd4962bdf834945c2fa29b904042f366a Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 22:39:34 +0000
Subject: mm: add vm_ops->mapped hook

Previously, when a driver needed to do something like establish a
reference count, it could do so in the mmap hook in the knowledge that the
mapping would succeed.

With the introduction of f_op->mmap_prepare this is no longer the case, as
it is invoked prior to actually establishing the mapping.

mmap_prepare is not appropriate for this kind of thing as it is called
before any merge might take place, and after which an error might occur
meaning resources could be leaked.

To take this into account, introduce a new vm_ops->mapped callback which
is invoked when the VMA is first mapped (though notably - not when it is
merged - which is correct and mirrors existing mmap/open/close behaviour).

We do better that vm_ops->open() here, as this callback can return an
error, at which point the VMA will be unmapped.

Note that vm_ops->mapped() is invoked after any mmap action is complete
(such as I/O remapping).

We intentionally do not expose the VMA at this point, exposing only the
fields that could be used, and an output parameter in case the operation
needs to update the vma->vm_private_data field.

In order to deal with stacked filesystems which invoke inner filesystem's
mmap() invocations, add __compat_vma_mapped() and invoke it on vfs_mmap()
(via compat_vma_mmap()) to ensure that the mapped callback is handled when
an mmap() caller invokes a nested filesystem's mmap_prepare() callback.

Update the mmap_prepare documentation to describe the mapped hook and make
it clear what its intended use is.

The vm_ops->mapped() call is handled by the mmap complete logic to ensure
the same code paths are handled by both the compatibility and VMA layers.

Additionally, update VMA userland test headers to reflect the change.

Link: https://lkml.kernel.org/r/4c5e98297eb0aae9565c564e1c296a112702f144.1774045440.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bodo Stroesser <bostroesser@gmail.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Hildenbrand <david@kernel.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Long Li <longli@microsoft.com>
Cc: Marc Dionne <marc.dionne@auristor.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Cc: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/filesystems/mmap_prepare.rst | 15 +++++
 include/linux/fs.h                         |  9 ++-
 include/linux/mm.h                         | 17 ++++++
 mm/util.c                                  | 90 +++++++++++++++++++++---------
 mm/vma.c                                   |  1 -
 tools/testing/vma/include/dup.h            | 17 ++++++
 6 files changed, 120 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/mmap_prepare.rst b/Documentation/filesystems/mmap_prepare.rst
index ae484d371861..f14b35ee11d5 100644
--- a/Documentation/filesystems/mmap_prepare.rst
+++ b/Documentation/filesystems/mmap_prepare.rst
@@ -25,6 +25,21 @@ That is - no resources should be allocated nor state updated to reflect that a
 mapping has been established, as the mapping may either be merged, or fail to be
 mapped after the callback is complete.
 
+Mapped callback
+---------------
+
+If resources need to be allocated per-mapping, or state such as a reference
+count needs to be manipulated, this should be done using the ``vm_ops->mapped``
+hook, which itself should be set by the >mmap_prepare hook.
+
+This callback is only invoked if a new mapping has been established and was not
+merged with any other, and is invoked at a point where no error may occur before
+the mapping is established.
+
+You may return an error to the callback itself, which will cause the mapping to
+become unmapped and an error returned to the mmap() caller. This is useful if
+resources need to be allocated, and that allocation might fail.
+
 How To Use
 ==========
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a2628a12bd2b..c390f5c667e3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2059,13 +2059,20 @@ static inline bool can_mmap_file(struct file *file)
 }
 
 int compat_vma_mmap(struct file *file, struct vm_area_struct *vma);
+int __vma_check_mmap_hook(struct vm_area_struct *vma);
 
 static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
 {
+	int err;
+
 	if (file->f_op->mmap_prepare)
 		return compat_vma_mmap(file, vma);
 
-	return file->f_op->mmap(file, vma);
+	err = file->f_op->mmap(file, vma);
+	if (err)
+		return err;
+
+	return __vma_check_mmap_hook(vma);
 }
 
 static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 21a2eef5f8fe..81fbcfed44dd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -775,6 +775,23 @@ struct vm_operations_struct {
 	 * Context: User context.  May sleep.  Caller holds mmap_lock.
 	 */
 	void (*close)(struct vm_area_struct *vma);
+	/**
+	 * @mapped: Called when the VMA is first mapped in the MM. Not called if
+	 * the new VMA is merged with an adjacent VMA.
+	 *
+	 * The @vm_private_data field is an output field allowing the user to
+	 * modify vma->vm_private_data as necessary.
+	 *
+	 * ONLY valid if set from f_op->mmap_prepare. Will result in an error if
+	 * set from f_op->mmap.
+	 *
+	 * Returns %0 on success, or an error otherwise. On error, the VMA will
+	 * be unmapped.
+	 *
+	 * Context: User context.  May sleep.  Caller holds mmap_lock.
+	 */
+	int (*mapped)(unsigned long start, unsigned long end, pgoff_t pgoff,
+		      const struct file *file, void **vm_private_data);
 	/* Called any time before splitting to check if it's allowed */
 	int (*may_split)(struct vm_area_struct *vma, unsigned long addr);
 	int (*mremap)(struct vm_area_struct *vma);
diff --git a/mm/util.c b/mm/util.c
index e272efca8c0e..98fe67e59ec3 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1163,33 +1163,7 @@ void flush_dcache_folio(struct folio *folio)
 EXPORT_SYMBOL(flush_dcache_folio);
 #endif
 
-/**
- * compat_vma_mmap() - Apply the file's .mmap_prepare() hook to an
- * existing VMA and execute any requested actions.
- * @file: The file which possesss an f_op->mmap_prepare() hook.
- * @vma: The VMA to apply the .mmap_prepare() hook to.
- *
- * Ordinarily, .mmap_prepare() is invoked directly upon mmap(). However, certain
- * stacked filesystems invoke a nested mmap hook of an underlying file.
- *
- * Until all filesystems are converted to use .mmap_prepare(), we must be
- * conservative and continue to invoke these stacked filesystems using the
- * deprecated .mmap() hook.
- *
- * However we have a problem if the underlying file system possesses an
- * .mmap_prepare() hook, as we are in a different context when we invoke the
- * .mmap() hook, already having a VMA to deal with.
- *
- * compat_vma_mmap() is a compatibility function that takes VMA state,
- * establishes a struct vm_area_desc descriptor, passes to the underlying
- * .mmap_prepare() hook and applies any changes performed by it.
- *
- * Once the conversion of filesystems is complete this function will no longer
- * be required and will be removed.
- *
- * Returns: 0 on success or error.
- */
-int compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
+static int __compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct vm_area_desc desc = {
 		.mm = vma->vm_mm,
@@ -1221,8 +1195,49 @@ int compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
 	set_vma_from_desc(vma, &desc);
 	return mmap_action_complete(vma, action);
 }
+
+/**
+ * compat_vma_mmap() - Apply the file's .mmap_prepare() hook to an
+ * existing VMA and execute any requested actions.
+ * @file: The file which possesss an f_op->mmap_prepare() hook.
+ * @vma: The VMA to apply the .mmap_prepare() hook to.
+ *
+ * Ordinarily, .mmap_prepare() is invoked directly upon mmap(). However, certain
+ * stacked filesystems invoke a nested mmap hook of an underlying file.
+ *
+ * Until all filesystems are converted to use .mmap_prepare(), we must be
+ * conservative and continue to invoke these stacked filesystems using the
+ * deprecated .mmap() hook.
+ *
+ * However we have a problem if the underlying file system possesses an
+ * .mmap_prepare() hook, as we are in a different context when we invoke the
+ * .mmap() hook, already having a VMA to deal with.
+ *
+ * compat_vma_mmap() is a compatibility function that takes VMA state,
+ * establishes a struct vm_area_desc descriptor, passes to the underlying
+ * .mmap_prepare() hook and applies any changes performed by it.
+ *
+ * Once the conversion of filesystems is complete this function will no longer
+ * be required and will be removed.
+ *
+ * Returns: 0 on success or error.
+ */
+int compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	return __compat_vma_mmap(file, vma);
+}
 EXPORT_SYMBOL(compat_vma_mmap);
 
+int __vma_check_mmap_hook(struct vm_area_struct *vma)
+{
+	/* vm_ops->mapped is not valid if mmap() is specified. */
+	if (vma->vm_ops && WARN_ON_ONCE(vma->vm_ops->mapped))
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL(__vma_check_mmap_hook);
+
 static void set_ps_flags(struct page_snapshot *ps, const struct folio *folio,
 			 const struct page *page)
 {
@@ -1311,11 +1326,32 @@ again:
 	}
 }
 
+static int call_vma_mapped(struct vm_area_struct *vma)
+{
+	const struct vm_operations_struct *vm_ops = vma->vm_ops;
+	void *vm_private_data = vma->vm_private_data;
+	int err;
+
+	if (!vm_ops || !vm_ops->mapped)
+		return 0;
+
+	err = vm_ops->mapped(vma->vm_start, vma->vm_end, vma->vm_pgoff,
+			     vma->vm_file, &vm_private_data);
+	if (err)
+		return err;
+
+	if (vm_private_data != vma->vm_private_data)
+		vma->vm_private_data = vm_private_data;
+	return 0;
+}
+
 static int mmap_action_finish(struct vm_area_struct *vma,
 			      struct mmap_action *action, int err)
 {
 	size_t len;
 
+	if (!err)
+		err = call_vma_mapped(vma);
 	if (!err && action->success_hook)
 		err = action->success_hook(vma);
 
diff --git a/mm/vma.c b/mm/vma.c
index e1950ae048e2..a43f3c5d4b3d 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -2781,7 +2781,6 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
 
 	if (have_mmap_prepare && allocated_new) {
 		error = mmap_action_complete(vma, &desc.action);
-
 		if (error)
 			return error;
 	}
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index a95a4b07f68b..1fb7bcae4f31 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -643,6 +643,23 @@ struct vm_operations_struct {
 	 * Context: User context.  May sleep.  Caller holds mmap_lock.
 	 */
 	void (*close)(struct vm_area_struct *vma);
+	/**
+	 * @mapped: Called when the VMA is first mapped in the MM. Not called if
+	 * the new VMA is merged with an adjacent VMA.
+	 *
+	 * The @vm_private_data field is an output field allowing the user to
+	 * modify vma->vm_private_data as necessary.
+	 *
+	 * ONLY valid if set from f_op->mmap_prepare. Will result in an error if
+	 * set from f_op->mmap.
+	 *
+	 * Returns %0 on success, or an error otherwise. On error, the VMA will
+	 * be unmapped.
+	 *
+	 * Context: User context.  May sleep.  Caller holds mmap_lock.
+	 */
+	int (*mapped)(unsigned long start, unsigned long end, pgoff_t pgoff,
+		      const struct file *file, void **vm_private_data);
 	/* Called any time before splitting to check if it's allowed */
 	int (*may_split)(struct vm_area_struct *vma, unsigned long addr);
 	int (*mremap)(struct vm_area_struct *vma);
-- 
cgit v1.2.3


From a1b7fb40cb71a33c68a609fcee0946425d698415 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 22:39:37 +0000
Subject: mm: add mmap_action_simple_ioremap()

Currently drivers use vm_iomap_memory() as a simple helper function for
I/O remapping memory over a range starting at a specified physical address
over a specified length.

In order to utilise this from mmap_prepare, separate out the core logic
into __simple_ioremap_prep(), update vm_iomap_memory() to use it, and add
simple_ioremap_prepare() to do the same with a VMA descriptor object.

We also add MMAP_SIMPLE_IO_REMAP and relevant fields to the struct
mmap_action type to permit this operation also.

We use mmap_action_ioremap() to set up the actual I/O remap operation once
we have checked and figured out the parameters, which makes
simple_ioremap_prepare() easy to implement.

We then add mmap_action_simple_ioremap() to allow drivers to make use of
this mode.

We update the mmap_prepare documentation to describe this mode.  Finally,
we update the VMA tests to reflect this change.

Link: https://lkml.kernel.org/r/a08ef1c4542202684da63bb37f459d5dbbeddd91.1774045440.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bodo Stroesser <bostroesser@gmail.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Hildenbrand <david@kernel.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Long Li <longli@microsoft.com>
Cc: Marc Dionne <marc.dionne@auristor.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Cc: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/filesystems/mmap_prepare.rst |  3 ++
 include/linux/mm.h                         | 24 ++++++++-
 include/linux/mm_types.h                   |  6 ++-
 mm/internal.h                              |  1 +
 mm/memory.c                                | 85 +++++++++++++++++++++---------
 mm/util.c                                  |  5 ++
 tools/testing/vma/include/dup.h            |  6 ++-
 7 files changed, 102 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/mmap_prepare.rst b/Documentation/filesystems/mmap_prepare.rst
index f14b35ee11d5..14bb057be564 100644
--- a/Documentation/filesystems/mmap_prepare.rst
+++ b/Documentation/filesystems/mmap_prepare.rst
@@ -153,5 +153,8 @@ pointer. These are:
 * mmap_action_ioremap_full() - Same as mmap_action_ioremap(), only remaps
   the entire mapping from ``start_pfn`` onward.
 
+* mmap_action_simple_ioremap() - Sets up an I/O remap from a specified
+  physical address and over a specified length.
+
 **NOTE:** The ``action`` field should never normally be manipulated directly,
 rather you ought to use one of these helpers.
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 81fbcfed44dd..53b21de40f87 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4321,11 +4321,33 @@ static inline void mmap_action_ioremap(struct vm_area_desc *desc,
  * @start_pfn: The first PFN in the range to remap.
  */
 static inline void mmap_action_ioremap_full(struct vm_area_desc *desc,
-					  unsigned long start_pfn)
+					    unsigned long start_pfn)
 {
 	mmap_action_ioremap(desc, desc->start, start_pfn, vma_desc_size(desc));
 }
 
+/**
+ * mmap_action_simple_ioremap - helper for mmap_prepare hook to specify that the
+ * physical range in [start_phys_addr, start_phys_addr + size) should be I/O
+ * remapped.
+ * @desc: The VMA descriptor for the VMA requiring remap.
+ * @start_phys_addr: Start of the physical memory to be mapped.
+ * @size: Size of the area to map.
+ *
+ * NOTE: Some drivers might want to tweak desc->page_prot for purposes of
+ * write-combine or similar.
+ */
+static inline void mmap_action_simple_ioremap(struct vm_area_desc *desc,
+					      phys_addr_t start_phys_addr,
+					      unsigned long size)
+{
+	struct mmap_action *action = &desc->action;
+
+	action->simple_ioremap.start_phys_addr = start_phys_addr;
+	action->simple_ioremap.size = size;
+	action->type = MMAP_SIMPLE_IO_REMAP;
+}
+
 int mmap_action_prepare(struct vm_area_desc *desc);
 int mmap_action_complete(struct vm_area_struct *vma,
 			 struct mmap_action *action);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 38fe6b915024..91a3db174d78 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -814,6 +814,7 @@ enum mmap_action_type {
 	MMAP_NOTHING,		/* Mapping is complete, no further action. */
 	MMAP_REMAP_PFN,		/* Remap PFN range. */
 	MMAP_IO_REMAP_PFN,	/* I/O remap PFN range. */
+	MMAP_SIMPLE_IO_REMAP,	/* I/O remap with guardrails. */
 };
 
 /*
@@ -822,13 +823,16 @@ enum mmap_action_type {
  */
 struct mmap_action {
 	union {
-		/* Remap range. */
 		struct {
 			unsigned long start;
 			unsigned long start_pfn;
 			unsigned long size;
 			pgprot_t pgprot;
 		} remap;
+		struct {
+			phys_addr_t start_phys_addr;
+			unsigned long size;
+		} simple_ioremap;
 	};
 	enum mmap_action_type type;
 
diff --git a/mm/internal.h b/mm/internal.h
index 241510e21f4b..c693646e5b3f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1842,6 +1842,7 @@ int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm);
 int remap_pfn_range_prepare(struct vm_area_desc *desc);
 int remap_pfn_range_complete(struct vm_area_struct *vma,
 			     struct mmap_action *action);
+int simple_ioremap_prepare(struct vm_area_desc *desc);
 
 static inline int io_remap_pfn_range_prepare(struct vm_area_desc *desc)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 10a61dd81f97..c1c323512939 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3170,6 +3170,58 @@ int remap_pfn_range_complete(struct vm_area_struct *vma,
 	return do_remap_pfn_range(vma, start, pfn, size, prot);
 }
 
+static int __simple_ioremap_prep(unsigned long vm_len, pgoff_t vm_pgoff,
+				 phys_addr_t start_phys, unsigned long size,
+				 unsigned long *pfnp)
+{
+	unsigned long pfn, pages;
+
+	/* Check that the physical memory area passed in looks valid */
+	if (start_phys + size < start_phys)
+		return -EINVAL;
+	/*
+	 * You *really* shouldn't map things that aren't page-aligned,
+	 * but we've historically allowed it because IO memory might
+	 * just have smaller alignment.
+	 */
+	size += start_phys & ~PAGE_MASK;
+	pfn = start_phys >> PAGE_SHIFT;
+	pages = (size + ~PAGE_MASK) >> PAGE_SHIFT;
+	if (pfn + pages < pfn)
+		return -EINVAL;
+
+	/* We start the mapping 'vm_pgoff' pages into the area */
+	if (vm_pgoff > pages)
+		return -EINVAL;
+	pfn += vm_pgoff;
+	pages -= vm_pgoff;
+
+	/* Can we fit all of the mapping? */
+	if ((vm_len >> PAGE_SHIFT) > pages)
+		return -EINVAL;
+
+	*pfnp = pfn;
+	return 0;
+}
+
+int simple_ioremap_prepare(struct vm_area_desc *desc)
+{
+	struct mmap_action *action = &desc->action;
+	const phys_addr_t start = action->simple_ioremap.start_phys_addr;
+	const unsigned long size = action->simple_ioremap.size;
+	unsigned long pfn;
+	int err;
+
+	err = __simple_ioremap_prep(vma_desc_size(desc), desc->pgoff,
+				    start, size, &pfn);
+	if (err)
+		return err;
+
+	/* The I/O remap logic does the heavy lifting. */
+	mmap_action_ioremap_full(desc, pfn);
+	return io_remap_pfn_range_prepare(desc);
+}
+
 /**
  * vm_iomap_memory - remap memory to userspace
  * @vma: user vma to map to
@@ -3187,32 +3239,15 @@ int remap_pfn_range_complete(struct vm_area_struct *vma,
  */
 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len)
 {
-	unsigned long vm_len, pfn, pages;
-
-	/* Check that the physical memory area passed in looks valid */
-	if (start + len < start)
-		return -EINVAL;
-	/*
-	 * You *really* shouldn't map things that aren't page-aligned,
-	 * but we've historically allowed it because IO memory might
-	 * just have smaller alignment.
-	 */
-	len += start & ~PAGE_MASK;
-	pfn = start >> PAGE_SHIFT;
-	pages = (len + ~PAGE_MASK) >> PAGE_SHIFT;
-	if (pfn + pages < pfn)
-		return -EINVAL;
-
-	/* We start the mapping 'vm_pgoff' pages into the area */
-	if (vma->vm_pgoff > pages)
-		return -EINVAL;
-	pfn += vma->vm_pgoff;
-	pages -= vma->vm_pgoff;
+	const unsigned long vm_start = vma->vm_start;
+	const unsigned long vm_end = vma->vm_end;
+	const unsigned long vm_len = vm_end - vm_start;
+	unsigned long pfn;
+	int err;
 
-	/* Can we fit all of the mapping? */
-	vm_len = vma->vm_end - vma->vm_start;
-	if (vm_len >> PAGE_SHIFT > pages)
-		return -EINVAL;
+	err = __simple_ioremap_prep(vm_len, vma->vm_pgoff, start, len, &pfn);
+	if (err)
+		return err;
 
 	/* Ok, let it rip */
 	return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot);
diff --git a/mm/util.c b/mm/util.c
index 98fe67e59ec3..9a27d33273fd 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1393,6 +1393,8 @@ int mmap_action_prepare(struct vm_area_desc *desc)
 		return remap_pfn_range_prepare(desc);
 	case MMAP_IO_REMAP_PFN:
 		return io_remap_pfn_range_prepare(desc);
+	case MMAP_SIMPLE_IO_REMAP:
+		return simple_ioremap_prepare(desc);
 	}
 
 	WARN_ON_ONCE(1);
@@ -1421,6 +1423,7 @@ int mmap_action_complete(struct vm_area_struct *vma,
 		err = remap_pfn_range_complete(vma, action);
 		break;
 	case MMAP_IO_REMAP_PFN:
+	case MMAP_SIMPLE_IO_REMAP:
 		/* Should have been delegated. */
 		WARN_ON_ONCE(1);
 		err = -EINVAL;
@@ -1438,6 +1441,7 @@ int mmap_action_prepare(struct vm_area_desc *desc)
 		break;
 	case MMAP_REMAP_PFN:
 	case MMAP_IO_REMAP_PFN:
+	case MMAP_SIMPLE_IO_REMAP:
 		WARN_ON_ONCE(1); /* nommu cannot handle these. */
 		break;
 	}
@@ -1456,6 +1460,7 @@ int mmap_action_complete(struct vm_area_struct *vma,
 		break;
 	case MMAP_REMAP_PFN:
 	case MMAP_IO_REMAP_PFN:
+	case MMAP_SIMPLE_IO_REMAP:
 		WARN_ON_ONCE(1); /* nommu cannot handle this. */
 
 		err = -EINVAL;
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 1fb7bcae4f31..b31207bbe10d 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -453,6 +453,7 @@ enum mmap_action_type {
 	MMAP_NOTHING,		/* Mapping is complete, no further action. */
 	MMAP_REMAP_PFN,		/* Remap PFN range. */
 	MMAP_IO_REMAP_PFN,	/* I/O remap PFN range. */
+	MMAP_SIMPLE_IO_REMAP,	/* I/O remap with guardrails. */
 };
 
 /*
@@ -461,13 +462,16 @@ enum mmap_action_type {
  */
 struct mmap_action {
 	union {
-		/* Remap range. */
 		struct {
 			unsigned long start;
 			unsigned long start_pfn;
 			unsigned long size;
 			pgprot_t pgprot;
 		} remap;
+		struct {
+			phys_addr_t start_phys_addr;
+			unsigned long size;
+		} simple_ioremap;
 	};
 	enum mmap_action_type type;
 
-- 
cgit v1.2.3


From 668937b7b2256f4b2a982e8f69b07d9ee8f81d36 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 22:39:43 +0000
Subject: mm: allow handling of stacked mmap_prepare hooks in more drivers

While the conversion of mmap hooks to mmap_prepare is underway, we will
encounter situations where mmap hooks need to invoke nested mmap_prepare
hooks.

The nesting of mmap hooks is termed 'stacking'.  In order to flexibly
facilitate the conversion of custom mmap hooks in drivers which stack, we
must split up the existing __compat_vma_mmap() function into two separate
functions:

* compat_set_desc_from_vma() - This allows the setting of a vm_area_desc
  object's fields to the relevant fields of a VMA.

* __compat_vma_mmap() - Once an mmap_prepare hook has been executed upon a
  vm_area_desc object, this function performs any mmap actions specified by
  the mmap_prepare hook and then invokes its vm_ops->mapped() hook if any
  were specified.

In ordinary cases, where a file's f_op->mmap_prepare() hook simply needs
to be invoked in a stacked mmap() hook, compat_vma_mmap() can be used.

However some drivers define their own nested hooks, which are invoked in
turn by another hook.

A concrete example is vmbus_channel->mmap_ring_buffer(), which is invoked
in turn by bin_attribute->mmap():

vmbus_channel->mmap_ring_buffer() has a signature of:

int (*mmap_ring_buffer)(struct vmbus_channel *channel,
			struct vm_area_struct *vma);

And bin_attribute->mmap() has a signature of:

	int (*mmap)(struct file *, struct kobject *,
		    const struct bin_attribute *attr,
		    struct vm_area_struct *vma);

And so compat_vma_mmap() cannot be used here for incremental conversion of
hooks from mmap() to mmap_prepare().

There are many such instances like this, where conversion to mmap_prepare
would otherwise cascade to a huge change set due to nesting of this kind.

The changes in this patch mean we could now instead convert
vmbus_channel->mmap_ring_buffer() to
vmbus_channel->mmap_prepare_ring_buffer(), and implement something like:

	struct vm_area_desc desc;
	int err;

	compat_set_desc_from_vma(&desc, file, vma);
	err = channel->mmap_prepare_ring_buffer(channel, &desc);
	if (err)
		return err;

	return __compat_vma_mmap(&desc, vma);

Allowing us to incrementally update this logic, and other logic like it.

Unfortunately, as part of this change, we need to be able to flexibly
assign to the VMA descriptor, so have to remove some of the const
declarations within the structure.

Also update the VMA tests to reflect the changes.

Link: https://lkml.kernel.org/r/24aac3019dd34740e788d169fccbe3c62781e648.1774045440.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bodo Stroesser <bostroesser@gmail.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Hildenbrand <david@kernel.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Long Li <longli@microsoft.com>
Cc: Marc Dionne <marc.dionne@auristor.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Cc: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/fs.h              |   3 +
 include/linux/mm_types.h        |   4 +-
 mm/util.c                       | 119 +++++++++++++++++++++++++++++-----------
 mm/vma.h                        |   2 +-
 tools/testing/vma/include/dup.h |  68 +++++++++++++++--------
 5 files changed, 136 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c390f5c667e3..0bdccfa70b44 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2058,6 +2058,9 @@ static inline bool can_mmap_file(struct file *file)
 	return true;
 }
 
+void compat_set_desc_from_vma(struct vm_area_desc *desc, const struct file *file,
+			      const struct vm_area_struct *vma);
+int __compat_vma_mmap(struct vm_area_desc *desc, struct vm_area_struct *vma);
 int compat_vma_mmap(struct file *file, struct vm_area_struct *vma);
 int __vma_check_mmap_hook(struct vm_area_struct *vma);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 91a3db174d78..b702c63bf0e0 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -891,8 +891,8 @@ static __always_inline bool vma_flags_empty(const vma_flags_t *flags)
  */
 struct vm_area_desc {
 	/* Immutable state. */
-	const struct mm_struct *const mm;
-	struct file *const file; /* May vary from vm_file in stacked callers. */
+	struct mm_struct *mm;
+	struct file *file; /* May vary from vm_file in stacked callers. */
 	unsigned long start;
 	unsigned long end;
 
diff --git a/mm/util.c b/mm/util.c
index 9a27d33273fd..5ae20876ef2c 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1163,38 +1163,78 @@ void flush_dcache_folio(struct folio *folio)
 EXPORT_SYMBOL(flush_dcache_folio);
 #endif
 
-static int __compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	struct vm_area_desc desc = {
-		.mm = vma->vm_mm,
-		.file = file,
-		.start = vma->vm_start,
-		.end = vma->vm_end,
-
-		.pgoff = vma->vm_pgoff,
-		.vm_file = vma->vm_file,
-		.vma_flags = vma->flags,
-		.page_prot = vma->vm_page_prot,
-
-		.action.type = MMAP_NOTHING, /* Default */
-	};
-	struct mmap_action *action = &desc.action;
-	int err;
+/**
+ * compat_set_desc_from_vma() - assigns VMA descriptor @desc fields from a VMA.
+ * @desc: A VMA descriptor whose fields need to be set.
+ * @file: The file object describing the file being mmap()'d.
+ * @vma: The VMA whose fields we wish to assign to @desc.
+ *
+ * This is a compatibility function to allow an mmap() hook to call
+ * mmap_prepare() hooks when drivers nest these. This function specifically
+ * allows the construction of a vm_area_desc value, @desc, from a VMA @vma for
+ * the purposes of doing this.
+ *
+ * Once the conversion of drivers is complete this function will no longer be
+ * required and will be removed.
+ */
+void compat_set_desc_from_vma(struct vm_area_desc *desc,
+			      const struct file *file,
+			      const struct vm_area_struct *vma)
+{
+	memset(desc, 0, sizeof(*desc));
 
-	err = vfs_mmap_prepare(file, &desc);
-	if (err)
-		return err;
+	desc->mm = vma->vm_mm;
+	desc->file = (struct file *)file;
+	desc->start = vma->vm_start;
+	desc->end = vma->vm_end;
 
-	err = mmap_action_prepare(&desc);
-	if (err)
-		return err;
+	desc->pgoff = vma->vm_pgoff;
+	desc->vm_file = vma->vm_file;
+	desc->vma_flags = vma->flags;
+	desc->page_prot = vma->vm_page_prot;
 
-	/* being invoked from .mmap means we don't have to enforce this. */
-	action->hide_from_rmap_until_complete = false;
+	/* Default. */
+	desc->action.type = MMAP_NOTHING;
+}
+EXPORT_SYMBOL(compat_set_desc_from_vma);
+
+/**
+ * __compat_vma_mmap() - Similar to compat_vma_mmap(), only it allows
+ * flexibility as to how the mmap_prepare callback is invoked, which is useful
+ * for drivers which invoke nested mmap_prepare callbacks in an mmap() hook.
+ * @desc: A VMA descriptor upon which an mmap_prepare() hook has already been
+ * executed.
+ * @vma: The VMA to which @desc should be applied.
+ *
+ * The function assumes that you have obtained a VMA descriptor @desc from
+ * compat_set_desc_from_vma(), and already executed the mmap_prepare() hook upon
+ * it.
+ *
+ * It then performs any specified mmap actions, and invokes the vm_ops->mapped()
+ * hook if one is present.
+ *
+ * See the description of compat_vma_mmap() for more details.
+ *
+ * Once the conversion of drivers is complete this function will no longer be
+ * required and will be removed.
+ *
+ * Returns: 0 on success or error.
+ */
+int __compat_vma_mmap(struct vm_area_desc *desc,
+		      struct vm_area_struct *vma)
+{
+	int err;
 
-	set_vma_from_desc(vma, &desc);
-	return mmap_action_complete(vma, action);
+	/* Perform any preparatory tasks for mmap action. */
+	err = mmap_action_prepare(desc);
+	if (err)
+		return err;
+	/* Update the VMA from the descriptor. */
+	compat_set_vma_from_desc(vma, desc);
+	/* Complete any specified mmap actions. */
+	return mmap_action_complete(vma, &desc->action);
 }
+EXPORT_SYMBOL(__compat_vma_mmap);
 
 /**
  * compat_vma_mmap() - Apply the file's .mmap_prepare() hook to an
@@ -1203,10 +1243,10 @@ static int __compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
  * @vma: The VMA to apply the .mmap_prepare() hook to.
  *
  * Ordinarily, .mmap_prepare() is invoked directly upon mmap(). However, certain
- * stacked filesystems invoke a nested mmap hook of an underlying file.
+ * stacked drivers invoke a nested mmap hook of an underlying file.
  *
- * Until all filesystems are converted to use .mmap_prepare(), we must be
- * conservative and continue to invoke these stacked filesystems using the
+ * Until all drivers are converted to use .mmap_prepare(), we must be
+ * conservative and continue to invoke these stacked drivers using the
  * deprecated .mmap() hook.
  *
  * However we have a problem if the underlying file system possesses an
@@ -1217,14 +1257,27 @@ static int __compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
  * establishes a struct vm_area_desc descriptor, passes to the underlying
  * .mmap_prepare() hook and applies any changes performed by it.
  *
- * Once the conversion of filesystems is complete this function will no longer
- * be required and will be removed.
+ * Once the conversion of drivers is complete this function will no longer be
+ * required and will be removed.
  *
  * Returns: 0 on success or error.
  */
 int compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	return __compat_vma_mmap(file, vma);
+	struct vm_area_desc desc;
+	struct mmap_action *action;
+	int err;
+
+	compat_set_desc_from_vma(&desc, file, vma);
+	err = vfs_mmap_prepare(file, &desc);
+	if (err)
+		return err;
+	action = &desc.action;
+
+	/* being invoked from .mmmap means we don't have to enforce this. */
+	action->hide_from_rmap_until_complete = false;
+
+	return __compat_vma_mmap(&desc, vma);
 }
 EXPORT_SYMBOL(compat_vma_mmap);
 
diff --git a/mm/vma.h b/mm/vma.h
index 1bfe7e47f6be..8e4b61a7304c 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -300,7 +300,7 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
  * f_op->mmap() but which might have an underlying file system which implements
  * f_op->mmap_prepare().
  */
-static inline void set_vma_from_desc(struct vm_area_struct *vma,
+static inline void compat_set_vma_from_desc(struct vm_area_struct *vma,
 		struct vm_area_desc *desc)
 {
 	/*
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index b31207bbe10d..ecd47d0f7d17 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -519,8 +519,8 @@ enum vma_operation {
  */
 struct vm_area_desc {
 	/* Immutable state. */
-	const struct mm_struct *const mm;
-	struct file *const file; /* May vary from vm_file in stacked callers. */
+	struct mm_struct *mm;
+	struct file *file; /* May vary from vm_file in stacked callers. */
 	unsigned long start;
 	unsigned long end;
 
@@ -1278,50 +1278,70 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma)
 }
 
 /* Declared in vma.h. */
-static inline void set_vma_from_desc(struct vm_area_struct *vma,
+static inline void compat_set_vma_from_desc(struct vm_area_struct *vma,
 		struct vm_area_desc *desc);
 
-static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
+static inline void compat_set_desc_from_vma(struct vm_area_desc *desc,
+			      const struct file *file,
+			      const struct vm_area_struct *vma)
 {
-	return file->f_op->mmap_prepare(desc);
+	memset(desc, 0, sizeof(*desc));
+
+	desc->mm = vma->vm_mm;
+	desc->file = (struct file *)file;
+	desc->start = vma->vm_start;
+	desc->end = vma->vm_end;
+
+	desc->pgoff = vma->vm_pgoff;
+	desc->vm_file = vma->vm_file;
+	desc->vma_flags = vma->flags;
+	desc->page_prot = vma->vm_page_prot;
+
+	/* Default. */
+	desc->action.type = MMAP_NOTHING;
 }
 
-static inline unsigned long vma_pages(struct vm_area_struct *vma)
+static inline unsigned long vma_pages(const struct vm_area_struct *vma)
 {
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 }
 
-static inline int compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
+static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
 {
-	struct vm_area_desc desc = {
-		.mm = vma->vm_mm,
-		.file = file,
-		.start = vma->vm_start,
-		.end = vma->vm_end,
-
-		.pgoff = vma->vm_pgoff,
-		.vm_file = vma->vm_file,
-		.vma_flags = vma->flags,
-		.page_prot = vma->vm_page_prot,
+	return file->f_op->mmap_prepare(desc);
+}
 
-		.action.type = MMAP_NOTHING, /* Default */
-	};
-	struct mmap_action *action = &desc.action;
+static inline int __compat_vma_mmap(struct vm_area_desc *desc,
+		struct vm_area_struct *vma)
+{
 	int err;
 
-	err = vfs_mmap_prepare(file, &desc);
+	/* Perform any preparatory tasks for mmap action. */
+	err = mmap_action_prepare(desc);
 	if (err)
 		return err;
+	/* Update the VMA from the descriptor. */
+	compat_set_vma_from_desc(vma, desc);
+	/* Complete any specified mmap actions. */
+	return mmap_action_complete(vma, &desc->action);
+}
 
-	err = mmap_action_prepare(&desc);
+static inline int compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct vm_area_desc desc;
+	struct mmap_action *action;
+	int err;
+
+	compat_set_desc_from_vma(&desc, file, vma);
+	err = vfs_mmap_prepare(file, &desc);
 	if (err)
 		return err;
+	action = &desc.action;
 
 	/* being invoked from .mmmap means we don't have to enforce this. */
 	action->hide_from_rmap_until_complete = false;
 
-	set_vma_from_desc(vma, &desc);
-	return mmap_action_complete(vma, action);
+	return __compat_vma_mmap(&desc, vma);
 }
 
 static inline void vma_iter_init(struct vma_iterator *vmi,
-- 
cgit v1.2.3


From f98cb7ca4aa44645347771c2c2a9724bc210c49e Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 22:39:44 +0000
Subject: drivers: hv: vmbus: replace deprecated mmap hook with mmap_prepare

The f_op->mmap interface is deprecated, so update the vmbus driver to use
its successor, mmap_prepare.

This updates all callbacks which referenced the function pointer
hv_mmap_ring_buffer to instead reference hv_mmap_prepare_ring_buffer,
utilising the newly introduced compat_set_desc_from_vma() and
__compat_vma_mmap() to be able to implement this change.

The UIO HV generic driver is the only user of hv_create_ring_sysfs(),
which is the only function which references
vmbus_channel->mmap_prepare_ring_buffer which, in turn, is the only
external interface to hv_mmap_prepare_ring_buffer.

This patch therefore updates this caller to use mmap_prepare instead,
which also previously used vm_iomap_memory(), so this change replaces it
with its mmap_prepare equivalent, mmap_action_simple_ioremap().

[akpm@linux-foundation.org: restore struct vmbus_channel comment, per Michael Kelley]
Link: https://lkml.kernel.org/r/05467cb62267d750e5c770147517d4df0246cda6.1774045440.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bodo Stroesser <bostroesser@gmail.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Hildenbrand <david@kernel.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Long Li <longli@microsoft.com>
Cc: Marc Dionne <marc.dionne@auristor.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Cc: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/hv/hyperv_vmbus.h    |  4 ++--
 drivers/hv/vmbus_drv.c       | 31 +++++++++++++++++++------------
 drivers/uio/uio_hv_generic.c | 11 ++++++-----
 include/linux/hyperv.h       |  4 ++--
 4 files changed, 29 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 7bd8f8486e85..31f576464f18 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -545,8 +545,8 @@ static inline int hv_debug_add_dev_dir(struct hv_device *dev)
 
 /* Create and remove sysfs entry for memory mapped ring buffers for a channel */
 int hv_create_ring_sysfs(struct vmbus_channel *channel,
-			 int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel,
-						    struct vm_area_struct *vma));
+			 int (*hv_mmap_prepare_ring_buffer)(struct vmbus_channel *channel,
+							    struct vm_area_desc *desc));
 int hv_remove_ring_sysfs(struct vmbus_channel *channel);
 
 #endif /* _HYPERV_VMBUS_H */
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index bc4fc1951ae1..45625487ba36 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1951,12 +1951,19 @@ static int hv_mmap_ring_buffer_wrapper(struct file *filp, struct kobject *kobj,
 				       struct vm_area_struct *vma)
 {
 	struct vmbus_channel *channel = container_of(kobj, struct vmbus_channel, kobj);
+	struct vm_area_desc desc;
+	int err;
 
 	/*
-	 * hv_(create|remove)_ring_sysfs implementation ensures that mmap_ring_buffer
-	 * is not NULL.
+	 * hv_(create|remove)_ring_sysfs implementation ensures that
+	 * mmap_prepare_ring_buffer is not NULL.
 	 */
-	return channel->mmap_ring_buffer(channel, vma);
+	compat_set_desc_from_vma(&desc, filp, vma);
+	err = channel->mmap_prepare_ring_buffer(channel, &desc);
+	if (err)
+		return err;
+
+	return __compat_vma_mmap(&desc, vma);
 }
 
 static struct bin_attribute chan_attr_ring_buffer = {
@@ -2048,13 +2055,13 @@ static const struct kobj_type vmbus_chan_ktype = {
 /**
  * hv_create_ring_sysfs() - create "ring" sysfs entry corresponding to ring buffers for a channel.
  * @channel: Pointer to vmbus_channel structure
- * @hv_mmap_ring_buffer: function pointer for initializing the function to be called on mmap of
+ * @hv_mmap_prepare_ring_buffer: function pointer for initializing the function to be called on mmap
  *                       channel's "ring" sysfs node, which is for the ring buffer of that channel.
  *                       Function pointer is of below type:
- *                       int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel,
- *                                                  struct vm_area_struct *vma))
- *                       This has a pointer to the channel and a pointer to vm_area_struct,
- *                       used for mmap, as arguments.
+ *                       int (*hv_mmap_prepare_ring_buffer)(struct vmbus_channel *channel,
+ *                                                          struct vm_area_desc *desc))
+ *                       This has a pointer to the channel and a pointer to vm_area_desc,
+ *                       used for mmap_prepare, as arguments.
  *
  * Sysfs node for ring buffer of a channel is created along with other fields, however its
  * visibility is disabled by default. Sysfs creation needs to be controlled when the use-case
@@ -2071,12 +2078,12 @@ static const struct kobj_type vmbus_chan_ktype = {
  * Returns 0 on success or error code on failure.
  */
 int hv_create_ring_sysfs(struct vmbus_channel *channel,
-			 int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel,
-						    struct vm_area_struct *vma))
+			 int (*hv_mmap_prepare_ring_buffer)(struct vmbus_channel *channel,
+							    struct vm_area_desc *desc))
 {
 	struct kobject *kobj = &channel->kobj;
 
-	channel->mmap_ring_buffer = hv_mmap_ring_buffer;
+	channel->mmap_prepare_ring_buffer = hv_mmap_prepare_ring_buffer;
 	channel->ring_sysfs_visible = true;
 
 	return sysfs_update_group(kobj, &vmbus_chan_group);
@@ -2098,7 +2105,7 @@ int hv_remove_ring_sysfs(struct vmbus_channel *channel)
 
 	channel->ring_sysfs_visible = false;
 	ret = sysfs_update_group(kobj, &vmbus_chan_group);
-	channel->mmap_ring_buffer = NULL;
+	channel->mmap_prepare_ring_buffer = NULL;
 	return ret;
 }
 EXPORT_SYMBOL_GPL(hv_remove_ring_sysfs);
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 3f8e2e27697f..29ec2d15ada8 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -154,15 +154,16 @@ static void hv_uio_rescind(struct vmbus_channel *channel)
  * The ring buffer is allocated as contiguous memory by vmbus_open
  */
 static int
-hv_uio_ring_mmap(struct vmbus_channel *channel, struct vm_area_struct *vma)
+hv_uio_ring_mmap_prepare(struct vmbus_channel *channel, struct vm_area_desc *desc)
 {
 	void *ring_buffer = page_address(channel->ringbuffer_page);
 
 	if (channel->state != CHANNEL_OPENED_STATE)
 		return -ENODEV;
 
-	return vm_iomap_memory(vma, virt_to_phys(ring_buffer),
-			       channel->ringbuffer_pagecount << PAGE_SHIFT);
+	mmap_action_simple_ioremap(desc, virt_to_phys(ring_buffer),
+			channel->ringbuffer_pagecount << PAGE_SHIFT);
+	return 0;
 }
 
 /* Callback from VMBUS subsystem when new channel created. */
@@ -183,7 +184,7 @@ hv_uio_new_channel(struct vmbus_channel *new_sc)
 	}
 
 	set_channel_read_mode(new_sc, HV_CALL_ISR);
-	ret = hv_create_ring_sysfs(new_sc, hv_uio_ring_mmap);
+	ret = hv_create_ring_sysfs(new_sc, hv_uio_ring_mmap_prepare);
 	if (ret) {
 		dev_err(device, "sysfs create ring bin file failed; %d\n", ret);
 		vmbus_close(new_sc);
@@ -366,7 +367,7 @@ hv_uio_probe(struct hv_device *dev,
 	 * or decoupled from uio_hv_generic probe. Userspace programs can make use of inotify
 	 * APIs to make sure that ring is created.
 	 */
-	hv_create_ring_sysfs(channel, hv_uio_ring_mmap);
+	hv_create_ring_sysfs(channel, hv_uio_ring_mmap_prepare);
 
 	hv_set_drvdata(dev, pdata);
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index dfc516c1c719..a26fb8e7cedf 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1015,8 +1015,8 @@ struct vmbus_channel {
 	/* The max size of a packet on this channel */
 	u32 max_pkt_size;
 
-	/* function to mmap ring buffer memory to the channel's sysfs ring attribute */
-	int (*mmap_ring_buffer)(struct vmbus_channel *channel, struct vm_area_struct *vma);
+	/*  function to mmap ring buffer memory to the channel's sysfs ring attribute */
+	int (*mmap_prepare_ring_buffer)(struct vmbus_channel *channel, struct vm_area_desc *desc);
 
 	/* boolean to control visibility of sysfs for ring buffer */
 	bool ring_sysfs_visible;
-- 
cgit v1.2.3


From 933f05f58ac6014eaac387d22a76ace8606891d1 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 22:39:45 +0000
Subject: uio: replace deprecated mmap hook with mmap_prepare in uio_info

The f_op->mmap interface is deprecated, so update uio_info to use its
successor, mmap_prepare.

Therefore, replace the uio_info->mmap hook with a new
uio_info->mmap_prepare hook, and update its one user, target_core_user,
to both specify this new mmap_prepare hook and also to use the new
vm_ops->mapped() hook to continue to maintain a correct udev->kref
refcount.

Then update uio_mmap() to utilise the mmap_prepare compatibility layer to
invoke this callback from the uio mmap invocation.

Link: https://lkml.kernel.org/r/157583e4477705b496896c7acd4ac88a937b8fa6.1774045440.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bodo Stroesser <bostroesser@gmail.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Hildenbrand <david@kernel.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Long Li <longli@microsoft.com>
Cc: Marc Dionne <marc.dionne@auristor.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Cc: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/target/target_core_user.c | 26 ++++++++++++++++++--------
 drivers/uio/uio.c                 | 10 ++++++++--
 include/linux/uio_driver.h        |  4 ++--
 3 files changed, 28 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index af95531ddd35..edc2afd5f4ee 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1860,6 +1860,17 @@ static struct page *tcmu_try_get_data_page(struct tcmu_dev *udev, uint32_t dpi)
 	return NULL;
 }
 
+static int tcmu_vma_mapped(unsigned long start, unsigned long end, pgoff_t pgoff,
+			   const struct file *file, void **vm_private_data)
+{
+	struct tcmu_dev *udev = *vm_private_data;
+
+	pr_debug("vma_mapped\n");
+
+	kref_get(&udev->kref);
+	return 0;
+}
+
 static void tcmu_vma_open(struct vm_area_struct *vma)
 {
 	struct tcmu_dev *udev = vma->vm_private_data;
@@ -1919,26 +1930,25 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
 }
 
 static const struct vm_operations_struct tcmu_vm_ops = {
+	.mapped = tcmu_vma_mapped,
 	.open = tcmu_vma_open,
 	.close = tcmu_vma_close,
 	.fault = tcmu_vma_fault,
 };
 
-static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma)
+static int tcmu_mmap_prepare(struct uio_info *info, struct vm_area_desc *desc)
 {
 	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
 
-	vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);
-	vma->vm_ops = &tcmu_vm_ops;
+	vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT);
+	desc->vm_ops = &tcmu_vm_ops;
 
-	vma->vm_private_data = udev;
+	desc->private_data = udev;
 
 	/* Ensure the mmap is exactly the right size */
-	if (vma_pages(vma) != udev->mmap_pages)
+	if (vma_desc_pages(desc) != udev->mmap_pages)
 		return -EINVAL;
 
-	tcmu_vma_open(vma);
-
 	return 0;
 }
 
@@ -2253,7 +2263,7 @@ static int tcmu_configure_device(struct se_device *dev)
 	info->irqcontrol = tcmu_irqcontrol;
 	info->irq = UIO_IRQ_CUSTOM;
 
-	info->mmap = tcmu_mmap;
+	info->mmap_prepare = tcmu_mmap_prepare;
 	info->open = tcmu_open;
 	info->release = tcmu_release;
 
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 5a4998e2caf8..1e4ade78ed84 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -850,8 +850,14 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
 		goto out;
 	}
 
-	if (idev->info->mmap) {
-		ret = idev->info->mmap(idev->info, vma);
+	if (idev->info->mmap_prepare) {
+		struct vm_area_desc desc;
+
+		compat_set_desc_from_vma(&desc, filep, vma);
+		ret = idev->info->mmap_prepare(idev->info, &desc);
+		if (ret)
+			goto out;
+		ret = __compat_vma_mmap(&desc, vma);
 		goto out;
 	}
 
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 334641e20fb1..02eaac47ac44 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -97,7 +97,7 @@ struct uio_device {
  * @irq_flags:		flags for request_irq()
  * @priv:		optional private data
  * @handler:		the device's irq handler
- * @mmap:		mmap operation for this uio device
+ * @mmap_prepare:	mmap_prepare operation for this uio device
  * @open:		open operation for this uio device
  * @release:		release operation for this uio device
  * @irqcontrol:		disable/enable irqs when 0/1 is written to /dev/uioX
@@ -112,7 +112,7 @@ struct uio_info {
 	unsigned long		irq_flags;
 	void			*priv;
 	irqreturn_t (*handler)(int irq, struct uio_info *dev_info);
-	int (*mmap)(struct uio_info *info, struct vm_area_struct *vma);
+	int (*mmap_prepare)(struct uio_info *info, struct vm_area_desc *desc);
 	int (*open)(struct uio_info *info, struct inode *inode);
 	int (*release)(struct uio_info *info, struct inode *inode);
 	int (*irqcontrol)(struct uio_info *info, s32 irq_on);
-- 
cgit v1.2.3


From 62c65fd740e979a3967db08971b93aefcec510d4 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 22:39:46 +0000
Subject: mm: add mmap_action_map_kernel_pages[_full]()

A user can invoke mmap_action_map_kernel_pages() to specify that the
mapping should map kernel pages starting from desc->start of a specified
number of pages specified in an array.

In order to implement this, adjust mmap_action_prepare() to be able to
return an error code, as it makes sense to assert that the specified
parameters are valid as quickly as possible as well as updating the VMA
flags to include VMA_MIXEDMAP_BIT as necessary.

This provides an mmap_prepare equivalent of vm_insert_pages().  We
additionally update the existing vm_insert_pages() code to use
range_in_vma() and add a new range_in_vma_desc() helper function for the
mmap_prepare case, sharing the code between the two in range_is_subset().

We add both mmap_action_map_kernel_pages() and
mmap_action_map_kernel_pages_full() to allow for both partial and full VMA
mappings.

We update the documentation to reflect the new features.

Finally, we update the VMA tests accordingly to reflect the changes.

Link: https://lkml.kernel.org/r/926ac961690d856e67ec847bee2370ab3c6b9046.1774045440.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bodo Stroesser <bostroesser@gmail.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Hildenbrand <david@kernel.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Long Li <longli@microsoft.com>
Cc: Marc Dionne <marc.dionne@auristor.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Cc: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/filesystems/mmap_prepare.rst |  8 +++
 include/linux/mm.h                         | 95 +++++++++++++++++++++++++++++-
 include/linux/mm_types.h                   |  7 +++
 mm/memory.c                                | 42 +++++++++++--
 mm/util.c                                  |  7 +++
 tools/testing/vma/include/dup.h            |  7 +++
 6 files changed, 160 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/mmap_prepare.rst b/Documentation/filesystems/mmap_prepare.rst
index 14bb057be564..82c99c95ad85 100644
--- a/Documentation/filesystems/mmap_prepare.rst
+++ b/Documentation/filesystems/mmap_prepare.rst
@@ -156,5 +156,13 @@ pointer. These are:
 * mmap_action_simple_ioremap() - Sets up an I/O remap from a specified
   physical address and over a specified length.
 
+* mmap_action_map_kernel_pages() - Maps a specified array of `struct page`
+  pointers in the VMA from a specific offset.
+
+* mmap_action_map_kernel_pages_full() - Maps a specified array of `struct
+  page` pointers over the entire VMA. The caller must ensure there are
+  sufficient entries in the page array to cover the entire range of the
+  described VMA.
+
 **NOTE:** The ``action`` field should never normally be manipulated directly,
 rather you ought to use one of these helpers.
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 53b21de40f87..61dff7f03554 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2905,7 +2905,7 @@ static inline bool folio_maybe_mapped_shared(struct folio *folio)
  * The caller must add any reference (e.g., from folio_try_get()) it might be
  * holding itself to the result.
  *
- * Returns the expected folio refcount.
+ * Returns: the expected folio refcount.
  */
 static inline int folio_expected_ref_count(const struct folio *folio)
 {
@@ -4348,6 +4348,45 @@ static inline void mmap_action_simple_ioremap(struct vm_area_desc *desc,
 	action->type = MMAP_SIMPLE_IO_REMAP;
 }
 
+/**
+ * mmap_action_map_kernel_pages - helper for mmap_prepare hook to specify that
+ * @num kernel pages contained in the @pages array should be mapped to userland
+ * starting at virtual address @start.
+ * @desc: The VMA descriptor for the VMA requiring kernel pags to be mapped.
+ * @start: The virtual address from which to map them.
+ * @pages: An array of struct page pointers describing the memory to map.
+ * @nr_pages: The number of entries in the @pages aray.
+ */
+static inline void mmap_action_map_kernel_pages(struct vm_area_desc *desc,
+		unsigned long start, struct page **pages,
+		unsigned long nr_pages)
+{
+	struct mmap_action *action = &desc->action;
+
+	action->type = MMAP_MAP_KERNEL_PAGES;
+	action->map_kernel.start = start;
+	action->map_kernel.pages = pages;
+	action->map_kernel.nr_pages = nr_pages;
+	action->map_kernel.pgoff = desc->pgoff;
+}
+
+/**
+ * mmap_action_map_kernel_pages_full - helper for mmap_prepare hook to specify that
+ * kernel pages contained in the @pages array should be mapped to userland
+ * from @desc->start to @desc->end.
+ * @desc: The VMA descriptor for the VMA requiring kernel pags to be mapped.
+ * @pages: An array of struct page pointers describing the memory to map.
+ *
+ * The caller must ensure that @pages contains sufficient entries to cover the
+ * entire range described by @desc.
+ */
+static inline void mmap_action_map_kernel_pages_full(struct vm_area_desc *desc,
+		struct page **pages)
+{
+	mmap_action_map_kernel_pages(desc, desc->start, pages,
+				     vma_desc_pages(desc));
+}
+
 int mmap_action_prepare(struct vm_area_desc *desc);
 int mmap_action_complete(struct vm_area_struct *vma,
 			 struct mmap_action *action);
@@ -4364,10 +4403,59 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
 	return vma;
 }
 
+/**
+ * range_is_subset - Is the specified inner range a subset of the outer range?
+ * @outer_start: The start of the outer range.
+ * @outer_end: The exclusive end of the outer range.
+ * @inner_start: The start of the inner range.
+ * @inner_end: The exclusive end of the inner range.
+ *
+ * Returns: %true if [inner_start, inner_end) is a subset of [outer_start,
+ * outer_end), otherwise %false.
+ */
+static inline bool range_is_subset(unsigned long outer_start,
+				   unsigned long outer_end,
+				   unsigned long inner_start,
+				   unsigned long inner_end)
+{
+	return outer_start <= inner_start && inner_end <= outer_end;
+}
+
+/**
+ * range_in_vma - is the specified [@start, @end) range a subset of the VMA?
+ * @vma: The VMA against which we want to check [@start, @end).
+ * @start: The start of the range we wish to check.
+ * @end: The exclusive end of the range we wish to check.
+ *
+ * Returns: %true if [@start, @end) is a subset of [@vma->vm_start,
+ * @vma->vm_end), %false otherwise.
+ */
 static inline bool range_in_vma(const struct vm_area_struct *vma,
 				unsigned long start, unsigned long end)
 {
-	return (vma && vma->vm_start <= start && end <= vma->vm_end);
+	if (!vma)
+		return false;
+
+	return range_is_subset(vma->vm_start, vma->vm_end, start, end);
+}
+
+/**
+ * range_in_vma_desc - is the specified [@start, @end) range a subset of the VMA
+ * described by @desc, a VMA descriptor?
+ * @desc: The VMA descriptor against which we want to check [@start, @end).
+ * @start: The start of the range we wish to check.
+ * @end: The exclusive end of the range we wish to check.
+ *
+ * Returns: %true if [@start, @end) is a subset of [@desc->start, @desc->end),
+ * %false otherwise.
+ */
+static inline bool range_in_vma_desc(const struct vm_area_desc *desc,
+				     unsigned long start, unsigned long end)
+{
+	if (!desc)
+		return false;
+
+	return range_is_subset(desc->start, desc->end, start, end);
 }
 
 #ifdef CONFIG_MMU
@@ -4411,6 +4499,9 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
 int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
 			struct page **pages, unsigned long *num);
+int map_kernel_pages_prepare(struct vm_area_desc *desc);
+int map_kernel_pages_complete(struct vm_area_struct *vma,
+			      struct mmap_action *action);
 int vm_map_pages(struct vm_area_struct *vma, struct page **pages,
 				unsigned long num);
 int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index b702c63bf0e0..a308e2c23b82 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -815,6 +815,7 @@ enum mmap_action_type {
 	MMAP_REMAP_PFN,		/* Remap PFN range. */
 	MMAP_IO_REMAP_PFN,	/* I/O remap PFN range. */
 	MMAP_SIMPLE_IO_REMAP,	/* I/O remap with guardrails. */
+	MMAP_MAP_KERNEL_PAGES,	/* Map kernel page range from array. */
 };
 
 /*
@@ -833,6 +834,12 @@ struct mmap_action {
 			phys_addr_t start_phys_addr;
 			unsigned long size;
 		} simple_ioremap;
+		struct {
+			unsigned long start;
+			struct page **pages;
+			unsigned long nr_pages;
+			pgoff_t pgoff;
+		} map_kernel;
 	};
 	enum mmap_action_type type;
 
diff --git a/mm/memory.c b/mm/memory.c
index c1c323512939..5d032b5293c6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2484,13 +2484,14 @@ out:
 int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
 			struct page **pages, unsigned long *num)
 {
-	const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1;
+	const unsigned long nr_pages = *num;
+	const unsigned long end = addr + PAGE_SIZE * nr_pages;
 
-	if (addr < vma->vm_start || end_addr >= vma->vm_end)
+	if (!range_in_vma(vma, addr, end))
 		return -EFAULT;
 	if (!(vma->vm_flags & VM_MIXEDMAP)) {
-		BUG_ON(mmap_read_trylock(vma->vm_mm));
-		BUG_ON(vma->vm_flags & VM_PFNMAP);
+		VM_WARN_ON_ONCE(mmap_read_trylock(vma->vm_mm));
+		VM_WARN_ON_ONCE(vma->vm_flags & VM_PFNMAP);
 		vm_flags_set(vma, VM_MIXEDMAP);
 	}
 	/* Defer page refcount checking till we're about to map that page. */
@@ -2498,6 +2499,39 @@ int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
 }
 EXPORT_SYMBOL(vm_insert_pages);
 
+int map_kernel_pages_prepare(struct vm_area_desc *desc)
+{
+	const struct mmap_action *action = &desc->action;
+	const unsigned long addr = action->map_kernel.start;
+	unsigned long nr_pages, end;
+
+	if (!vma_desc_test(desc, VMA_MIXEDMAP_BIT)) {
+		VM_WARN_ON_ONCE(mmap_read_trylock(desc->mm));
+		VM_WARN_ON_ONCE(vma_desc_test(desc, VMA_PFNMAP_BIT));
+		vma_desc_set_flags(desc, VMA_MIXEDMAP_BIT);
+	}
+
+	nr_pages = action->map_kernel.nr_pages;
+	end = addr + PAGE_SIZE * nr_pages;
+	if (!range_in_vma_desc(desc, addr, end))
+		return -EFAULT;
+
+	return 0;
+}
+EXPORT_SYMBOL(map_kernel_pages_prepare);
+
+int map_kernel_pages_complete(struct vm_area_struct *vma,
+			      struct mmap_action *action)
+{
+	unsigned long nr_pages;
+
+	nr_pages = action->map_kernel.nr_pages;
+	return insert_pages(vma, action->map_kernel.start,
+			    action->map_kernel.pages,
+			    &nr_pages, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(map_kernel_pages_complete);
+
 /**
  * vm_insert_page - insert single page into user vma
  * @vma: user vma to map to
diff --git a/mm/util.c b/mm/util.c
index 5ae20876ef2c..f063fd4de1e8 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1448,6 +1448,8 @@ int mmap_action_prepare(struct vm_area_desc *desc)
 		return io_remap_pfn_range_prepare(desc);
 	case MMAP_SIMPLE_IO_REMAP:
 		return simple_ioremap_prepare(desc);
+	case MMAP_MAP_KERNEL_PAGES:
+		return map_kernel_pages_prepare(desc);
 	}
 
 	WARN_ON_ONCE(1);
@@ -1475,6 +1477,9 @@ int mmap_action_complete(struct vm_area_struct *vma,
 	case MMAP_REMAP_PFN:
 		err = remap_pfn_range_complete(vma, action);
 		break;
+	case MMAP_MAP_KERNEL_PAGES:
+		err = map_kernel_pages_complete(vma, action);
+		break;
 	case MMAP_IO_REMAP_PFN:
 	case MMAP_SIMPLE_IO_REMAP:
 		/* Should have been delegated. */
@@ -1495,6 +1500,7 @@ int mmap_action_prepare(struct vm_area_desc *desc)
 	case MMAP_REMAP_PFN:
 	case MMAP_IO_REMAP_PFN:
 	case MMAP_SIMPLE_IO_REMAP:
+	case MMAP_MAP_KERNEL_PAGES:
 		WARN_ON_ONCE(1); /* nommu cannot handle these. */
 		break;
 	}
@@ -1514,6 +1520,7 @@ int mmap_action_complete(struct vm_area_struct *vma,
 	case MMAP_REMAP_PFN:
 	case MMAP_IO_REMAP_PFN:
 	case MMAP_SIMPLE_IO_REMAP:
+	case MMAP_MAP_KERNEL_PAGES:
 		WARN_ON_ONCE(1); /* nommu cannot handle this. */
 
 		err = -EINVAL;
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index ecd47d0f7d17..b4864aad2db0 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -454,6 +454,7 @@ enum mmap_action_type {
 	MMAP_REMAP_PFN,		/* Remap PFN range. */
 	MMAP_IO_REMAP_PFN,	/* I/O remap PFN range. */
 	MMAP_SIMPLE_IO_REMAP,	/* I/O remap with guardrails. */
+	MMAP_MAP_KERNEL_PAGES,	/* Map kernel page range from an array. */
 };
 
 /*
@@ -472,6 +473,12 @@ struct mmap_action {
 			phys_addr_t start_phys_addr;
 			unsigned long size;
 		} simple_ioremap;
+		struct {
+			unsigned long start;
+			struct page **pages;
+			unsigned long nr_pages;
+			pgoff_t pgoff;
+		} map_kernel;
 	};
 	enum mmap_action_type type;
 
-- 
cgit v1.2.3


From c0ea52c18c78c33c68c350eb9d3dcdf8c513254d Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 18:07:18 +0000
Subject: mm/huge_memory: simplify vma_is_specal_huge()

Patch series "mm/huge_memory: refactor zap_huge_pmd()", v3.

zap_huge_pmd() is overly complicated, clean it up and also add an assert
in the case that we encounter a buggy PMD entry that doesn't match
expectations.

This is motivated by a bug discovered [0] where the PMD entry was none of:

* A non-DAX, PFN or mixed map.
* The huge zero folio
* A present PMD entry
* A softleaf entry

In zap_huge_pmd(), but due to the bug we manged to reach this code.

It is useful to explicitly call this out rather than have an arbitrary
NULL pointer dereference happen, which also improves understanding of
what's going on.

The series goes further to make use of vm_normal_folio_pmd() rather than
implementing custom logic for retrieving the folio, and extends softleaf
functionality to provide and use an equivalent softleaf function.


This patch (of 13):

This function is confused - it overloads the term 'special' yet again,
checks for DAX but in many cases the code explicitly excludes DAX before
invoking the predicate.

It also unnecessarily checks for vma->vm_file - this has to be present for
a driver to have set VMA_MIXEDMAP_BIT or VMA_PFNMAP_BIT.

In fact, a far simpler form of this is to reverse the DAX predicate and
return false if DAX is set.

This makes sense from the point of view of 'special' as in
vm_normal_page(), as DAX actually does potentially have retrievable
folios.

Also there's no need to have this in mm.h so move it to huge_memory.c.

No functional change intended.

Link: https://lkml.kernel.org/r/cover.1774029655.git.ljs@kernel.org
Link: https://lkml.kernel.org/r/d2b65883dc4895f197c4b4a69fbf27a063463412.1774029655.git.ljs@kernel.org
Link: https://lore.kernel.org/all/6b3d7ad7-49e1-407a-903d-3103704160d8@lucifer.local/ [0]
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h |  4 ++--
 include/linux/mm.h      | 16 ----------------
 mm/huge_memory.c        | 30 +++++++++++++++++++++++-------
 3 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index bd7f0e1d8094..61fda1672b29 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -83,7 +83,7 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr;
  * file is never split and the MAX_PAGECACHE_ORDER limit does not apply to
  * it.  Same to PFNMAPs where there's neither page* nor pagecache.
  */
-#define THP_ORDERS_ALL_SPECIAL		\
+#define THP_ORDERS_ALL_SPECIAL_DAX	\
 	(BIT(PMD_ORDER) | BIT(PUD_ORDER))
 #define THP_ORDERS_ALL_FILE_DEFAULT	\
 	((BIT(MAX_PAGECACHE_ORDER + 1) - 1) & ~BIT(0))
@@ -92,7 +92,7 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr;
  * Mask of all large folio orders supported for THP.
  */
 #define THP_ORDERS_ALL	\
-	(THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_SPECIAL | THP_ORDERS_ALL_FILE_DEFAULT)
+	(THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_SPECIAL_DAX | THP_ORDERS_ALL_FILE_DEFAULT)
 
 enum tva_type {
 	TVA_SMAPS,		/* Exposing "THPeligible:" in smaps. */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 61dff7f03554..8260e28205e9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -5068,22 +5068,6 @@ long copy_folio_from_user(struct folio *dst_folio,
 			   const void __user *usr_src,
 			   bool allow_pagefault);
 
-/**
- * vma_is_special_huge - Are transhuge page-table entries considered special?
- * @vma: Pointer to the struct vm_area_struct to consider
- *
- * Whether transhuge page-table entries are considered "special" following
- * the definition in vm_normal_page().
- *
- * Return: true if transhuge page-table entries should be considered special,
- * false otherwise.
- */
-static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
-{
-	return vma_is_dax(vma) || (vma->vm_file &&
-				   (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)));
-}
-
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
 #if MAX_NUMNODES > 1
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1c1a7cf7b209..db390b0098d9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -100,6 +100,14 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma)
 	return !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode);
 }
 
+/* If returns true, we are unable to access the VMA's folios. */
+static bool vma_is_special_huge(const struct vm_area_struct *vma)
+{
+	if (vma_is_dax(vma))
+		return false;
+	return vma_test_any(vma, VMA_PFNMAP_BIT, VMA_MIXEDMAP_BIT);
+}
+
 unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
 					 vm_flags_t vm_flags,
 					 enum tva_type type,
@@ -113,8 +121,8 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
 	/* Check the intersection of requested and supported orders. */
 	if (vma_is_anonymous(vma))
 		supported_orders = THP_ORDERS_ALL_ANON;
-	else if (vma_is_special_huge(vma))
-		supported_orders = THP_ORDERS_ALL_SPECIAL;
+	else if (vma_is_dax(vma) || vma_is_special_huge(vma))
+		supported_orders = THP_ORDERS_ALL_SPECIAL_DAX;
 	else
 		supported_orders = THP_ORDERS_ALL_FILE_DEFAULT;
 
@@ -2415,7 +2423,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 						tlb->fullmm);
 	arch_check_zapped_pmd(vma, orig_pmd);
 	tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
-	if (!vma_is_dax(vma) && vma_is_special_huge(vma)) {
+	if (vma_is_special_huge(vma)) {
 		if (arch_needs_pgtable_deposit())
 			zap_deposited_table(tlb->mm, pmd);
 		spin_unlock(ptl);
@@ -2917,7 +2925,7 @@ int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	orig_pud = pudp_huge_get_and_clear_full(vma, addr, pud, tlb->fullmm);
 	arch_check_zapped_pud(vma, orig_pud);
 	tlb_remove_pud_tlb_entry(tlb, pud, addr);
-	if (!vma_is_dax(vma) && vma_is_special_huge(vma)) {
+	if (vma_is_special_huge(vma)) {
 		spin_unlock(ptl);
 		/* No zero page support yet */
 	} else {
@@ -3068,7 +3076,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		 */
 		if (arch_needs_pgtable_deposit())
 			zap_deposited_table(mm, pmd);
-		if (!vma_is_dax(vma) && vma_is_special_huge(vma))
+		if (vma_is_special_huge(vma))
 			return;
 		if (unlikely(pmd_is_migration_entry(old_pmd))) {
 			const softleaf_t old_entry = softleaf_from_pmd(old_pmd);
@@ -4629,8 +4637,16 @@ next:
 
 static inline bool vma_not_suitable_for_thp_split(struct vm_area_struct *vma)
 {
-	return vma_is_special_huge(vma) || (vma->vm_flags & VM_IO) ||
-		    is_vm_hugetlb_page(vma);
+	if (vma_is_dax(vma))
+		return true;
+	if (vma_is_special_huge(vma))
+		return true;
+	if (vma_test(vma, VMA_IO_BIT))
+		return true;
+	if (is_vm_hugetlb_page(vma))
+		return true;
+
+	return false;
 }
 
 static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
-- 
cgit v1.2.3


From b92b9d4f699ce1f0ae746ebc69bca329adc07293 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 18:07:20 +0000
Subject: mm/huge_memory: have zap_huge_pmd return a boolean, add kdoc

There's no need to use the ancient approach of returning an integer here,
just return a boolean.

Also update flush_needed to be a boolean, similarly.

Also add a kdoc comment describing the function.

No functional change intended.

Link: https://lkml.kernel.org/r/132274566cd49d2960a2294c36dd2450593dfc55.1774029655.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h |  4 ++--
 mm/huge_memory.c        | 23 ++++++++++++++++-------
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 61fda1672b29..2949e5acff35 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -27,8 +27,8 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
 vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf);
 bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 			   pmd_t *pmd, unsigned long addr, unsigned long next);
-int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd,
-		 unsigned long addr);
+bool zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd,
+		  unsigned long addr);
 int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud,
 		 unsigned long addr);
 bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4dfffd6a1bbe..65e554afdf16 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2402,11 +2402,20 @@ static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
 	mm_dec_nr_ptes(mm);
 }
 
-int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
+/**
+ * zap_huge_pmd - Zap a huge THP which is of PMD size.
+ * @tlb: The MMU gather TLB state associated with the operation.
+ * @vma: The VMA containing the range to zap.
+ * @pmd: A pointer to the leaf PMD entry.
+ * @addr: The virtual address for the range to zap.
+ *
+ * Returns: %true on success, %false otherwise.
+ */
+bool zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		 pmd_t *pmd, unsigned long addr)
 {
 	struct folio *folio = NULL;
-	int flush_needed = 1;
+	bool flush_needed = true;
 	spinlock_t *ptl;
 	pmd_t orig_pmd;
 
@@ -2414,7 +2423,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 
 	ptl = __pmd_trans_huge_lock(pmd, vma);
 	if (!ptl)
-		return 0;
+		return false;
 	/*
 	 * For architectures like ppc64 we look at deposited pgtable
 	 * when calling pmdp_huge_get_and_clear. So do the
@@ -2429,13 +2438,13 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		if (arch_needs_pgtable_deposit())
 			zap_deposited_table(tlb->mm, pmd);
 		spin_unlock(ptl);
-		return 1;
+		return true;
 	}
 	if (is_huge_zero_pmd(orig_pmd)) {
 		if (!vma_is_dax(vma) || arch_needs_pgtable_deposit())
 			zap_deposited_table(tlb->mm, pmd);
 		spin_unlock(ptl);
-		return 1;
+		return true;
 	}
 
 	if (pmd_present(orig_pmd)) {
@@ -2449,7 +2458,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		const softleaf_t entry = softleaf_from_pmd(orig_pmd);
 
 		folio = softleaf_to_folio(entry);
-		flush_needed = 0;
+		flush_needed = false;
 
 		if (!thp_migration_supported())
 			WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
@@ -2483,7 +2492,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	if (flush_needed)
 		tlb_remove_page_size(tlb, &folio->page, HPAGE_PMD_SIZE);
 
-	return 1;
+	return true;
 }
 
 #ifndef pmd_move_must_withdraw
-- 
cgit v1.2.3


From 64b7d889d03ce94940d6dd9440c4e74c1108ac78 Mon Sep 17 00:00:00 2001
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
Date: Fri, 20 Mar 2026 18:07:28 +0000
Subject: mm: add softleaf_is_valid_pmd_entry(), pmd_to_softleaf_folio()

Separate pmd_is_valid_softleaf() into separate components, then use the
pmd_is_valid_softleaf() predicate to implement pmd_to_softleaf_folio().

This returns the folio associated with a softleaf entry at PMD level. It
expects this to be valid for a PMD entry.

If CONFIG_DEBUG_VM is set, then assert on this being an invalid entry, and
either way return NULL in this case.

This lays the ground for further refactorings.

Link: https://lkml.kernel.org/r/b677592596274fa3fd701890497948e4b0e07cec.1774029655.git.ljs@kernel.org
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nico Pache <npache@redhat.com>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/leafops.h | 39 +++++++++++++++++++++++++++++++++++----
 1 file changed, 35 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/leafops.h b/include/linux/leafops.h
index 05673d3529e7..992cd8bd8ed0 100644
--- a/include/linux/leafops.h
+++ b/include/linux/leafops.h
@@ -607,7 +607,20 @@ static inline bool pmd_is_migration_entry(pmd_t pmd)
 }
 
 /**
- * pmd_is_valid_softleaf() - Is this PMD entry a valid leaf entry?
+ * softleaf_is_valid_pmd_entry() - Is the specified softleaf entry obtained from
+ * a PMD one that we support at PMD level?
+ * @entry: Entry to check.
+ * Returns: true if the softleaf entry is valid at PMD, otherwise false.
+ */
+static inline bool softleaf_is_valid_pmd_entry(softleaf_t entry)
+{
+	/* Only device private, migration entries valid for PMD. */
+	return softleaf_is_device_private(entry) ||
+		softleaf_is_migration(entry);
+}
+
+/**
+ * pmd_is_valid_softleaf() - Is this PMD entry a valid softleaf entry?
  * @pmd: PMD entry.
  *
  * PMD leaf entries are valid only if they are device private or migration
@@ -620,9 +633,27 @@ static inline bool pmd_is_valid_softleaf(pmd_t pmd)
 {
 	const softleaf_t entry = softleaf_from_pmd(pmd);
 
-	/* Only device private, migration entries valid for PMD. */
-	return softleaf_is_device_private(entry) ||
-		softleaf_is_migration(entry);
+	return softleaf_is_valid_pmd_entry(entry);
+}
+
+/**
+ * pmd_to_softleaf_folio() - Convert the PMD entry to a folio.
+ * @pmd: PMD entry.
+ *
+ * The PMD entry is expected to be a valid PMD softleaf entry.
+ *
+ * Returns: the folio the softleaf entry references if this is a valid softleaf
+ * entry, otherwise NULL.
+ */
+static inline struct folio *pmd_to_softleaf_folio(pmd_t pmd)
+{
+	const softleaf_t entry = softleaf_from_pmd(pmd);
+
+	if (!softleaf_is_valid_pmd_entry(entry)) {
+		VM_WARN_ON_ONCE(true);
+		return NULL;
+	}
+	return softleaf_to_folio(entry);
 }
 
 #endif  /* CONFIG_MMU */
-- 
cgit v1.2.3


From 3031b76d65e14a946cfb5000d79b642f58ffac5c Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Sun, 5 Apr 2026 14:23:25 +0300
Subject: mei: bus: add mei_cldev_uuid

Add mei_cldev_uuid API on mei bus to allow client
to query what UUID it bound to.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Link: https://patch.msgid.link/20260405112326.1535208-2-alexander.usyskin@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus.c     | 13 +++++++++++++
 include/linux/mei_cl_bus.h |  1 +
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index f739dbcdb04c..fcde082eb5e3 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -601,6 +601,19 @@ void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data)
 }
 EXPORT_SYMBOL_GPL(mei_cldev_set_drvdata);
 
+/**
+ * mei_cldev_uuid - return uuid of the underlying me client
+ *
+ * @cldev: mei client device
+ *
+ * Return: me client uuid
+ */
+const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev)
+{
+	return mei_me_cl_uuid(cldev->me_cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_uuid);
+
 /**
  * mei_cldev_ver - return protocol version of the underlying me client
  *
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index a82755e1fc40..5bdbd9e1d460 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -112,6 +112,7 @@ int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb);
 int mei_cldev_register_notif_cb(struct mei_cl_device *cldev,
 				mei_cldev_cb_t notif_cb);
 
+const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev);
 u8 mei_cldev_ver(const struct mei_cl_device *cldev);
 size_t mei_cldev_mtu(const struct mei_cl_device *cldev);
 
-- 
cgit v1.2.3


From 4251dab9d176212afdf4ced263b59bc0d5292c7f Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Date: Tue, 17 Mar 2026 13:36:50 +0100
Subject: remoteproc: mtk_scp_ipi: Constify buffer passed to scp_ipi_send()

scp_ipi_send() should only send the passed buffer, without modifying its
contents, so mark pointer 'buf' as pointer to const.

Acked-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20260317-rpmsg-send-const-v3-1-4d7fd27f037f@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 drivers/remoteproc/mtk_scp_ipi.c   | 2 +-
 include/linux/remoteproc/mtk_scp.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/mtk_scp_ipi.c b/drivers/remoteproc/mtk_scp_ipi.c
index 7a37e273b3af..ee2f1121411f 100644
--- a/drivers/remoteproc/mtk_scp_ipi.c
+++ b/drivers/remoteproc/mtk_scp_ipi.c
@@ -156,7 +156,7 @@ EXPORT_SYMBOL_GPL(scp_ipi_unlock);
  *
  * Return: 0 if sending data successfully, -error on error.
  **/
-int scp_ipi_send(struct mtk_scp *scp, u32 id, void *buf, unsigned int len,
+int scp_ipi_send(struct mtk_scp *scp, u32 id, const void *buf, unsigned int len,
 		 unsigned int wait)
 {
 	struct mtk_share_obj __iomem *send_obj = scp->send_buf;
diff --git a/include/linux/remoteproc/mtk_scp.h b/include/linux/remoteproc/mtk_scp.h
index 344ff41c22c7..4070537d6542 100644
--- a/include/linux/remoteproc/mtk_scp.h
+++ b/include/linux/remoteproc/mtk_scp.h
@@ -58,7 +58,7 @@ int scp_ipi_register(struct mtk_scp *scp, u32 id, scp_ipi_handler_t handler,
 		     void *priv);
 void scp_ipi_unregister(struct mtk_scp *scp, u32 id);
 
-int scp_ipi_send(struct mtk_scp *scp, u32 id, void *buf, unsigned int len,
+int scp_ipi_send(struct mtk_scp *scp, u32 id, const void *buf, unsigned int len,
 		 unsigned int wait);
 
 unsigned int scp_get_vdec_hw_capa(struct mtk_scp *scp);
-- 
cgit v1.2.3


From 90dacbf4bf13410c727ffaca8fe3ce3276ae58c2 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Date: Tue, 17 Mar 2026 13:36:51 +0100
Subject: remoteproc: mtk_scp: Constify buffer passed to scp_send_ipi()

scp_send_ipi() should only send the passed buffer, without modifying its
contents, so mark pointer 'buf' as pointer to const.

Acked-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20260317-rpmsg-send-const-v3-2-4d7fd27f037f@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 drivers/remoteproc/mtk_scp.c    | 2 +-
 include/linux/rpmsg/mtk_rpmsg.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/mtk_scp.c b/drivers/remoteproc/mtk_scp.c
index 4651311aeb07..b5c052c72a1b 100644
--- a/drivers/remoteproc/mtk_scp.c
+++ b/drivers/remoteproc/mtk_scp.c
@@ -1078,7 +1078,7 @@ static void scp_unregister_ipi(struct platform_device *pdev, u32 id)
 	scp_ipi_unregister(scp, id);
 }
 
-static int scp_send_ipi(struct platform_device *pdev, u32 id, void *buf,
+static int scp_send_ipi(struct platform_device *pdev, u32 id, const void *buf,
 			unsigned int len, unsigned int wait)
 {
 	struct mtk_scp *scp = platform_get_drvdata(pdev);
diff --git a/include/linux/rpmsg/mtk_rpmsg.h b/include/linux/rpmsg/mtk_rpmsg.h
index 363b60178040..badcbc89917f 100644
--- a/include/linux/rpmsg/mtk_rpmsg.h
+++ b/include/linux/rpmsg/mtk_rpmsg.h
@@ -25,7 +25,7 @@ struct mtk_rpmsg_info {
 			    ipi_handler_t handler, void *priv);
 	void (*unregister_ipi)(struct platform_device *pdev, u32 id);
 	int (*send_ipi)(struct platform_device *pdev, u32 id,
-			void *buf, unsigned int len, unsigned int wait);
+			const void *buf, unsigned int len, unsigned int wait);
 	int ns_ipi_id;
 };
 
-- 
cgit v1.2.3


From b8077b4da2e89917ec4c632b66e60d49089bbda3 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Date: Tue, 17 Mar 2026 13:36:52 +0100
Subject: rpmsg: Constify buffer passed to send API

The rpmsg_send(), rpmsg_sendto() and other variants of sending
interfaces should only send the passed data, without modifying its
contents, so mark pointer 'data' as pointer to const.  All users of this
interface already follow this approach, so only the function
declarations have to be updated.

Acked-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20260317-rpmsg-send-const-v3-3-4d7fd27f037f@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 drivers/rpmsg/mtk_rpmsg.c         |  4 ++--
 drivers/rpmsg/qcom_glink_native.c | 13 ++++++++-----
 drivers/rpmsg/qcom_smd.c          | 10 ++++++----
 drivers/rpmsg/rpmsg_core.c        |  8 ++++----
 drivers/rpmsg/rpmsg_internal.h    |  8 ++++----
 drivers/rpmsg/virtio_rpmsg_bus.c  | 24 +++++++++++++-----------
 include/linux/rpmsg.h             | 17 +++++++++--------
 7 files changed, 46 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rpmsg/mtk_rpmsg.c b/drivers/rpmsg/mtk_rpmsg.c
index 0e03c5336609..1b670ed54cfa 100644
--- a/drivers/rpmsg/mtk_rpmsg.c
+++ b/drivers/rpmsg/mtk_rpmsg.c
@@ -135,7 +135,7 @@ static void mtk_rpmsg_destroy_ept(struct rpmsg_endpoint *ept)
 	kref_put(&ept->refcount, __mtk_ept_release);
 }
 
-static int mtk_rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len)
+static int mtk_rpmsg_send(struct rpmsg_endpoint *ept, const void *data, int len)
 {
 	struct mtk_rpmsg_rproc_subdev *mtk_subdev =
 		to_mtk_rpmsg_endpoint(ept)->mtk_subdev;
@@ -144,7 +144,7 @@ static int mtk_rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len)
 					  len, 0);
 }
 
-static int mtk_rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len)
+static int mtk_rpmsg_trysend(struct rpmsg_endpoint *ept, const void *data, int len)
 {
 	struct mtk_rpmsg_rproc_subdev *mtk_subdev =
 		to_mtk_rpmsg_endpoint(ept)->mtk_subdev;
diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c
index 9ef17c2e45b0..401a4ece0c97 100644
--- a/drivers/rpmsg/qcom_glink_native.c
+++ b/drivers/rpmsg/qcom_glink_native.c
@@ -1474,7 +1474,7 @@ unlock:
 }
 
 static int __qcom_glink_send(struct glink_channel *channel,
-			     void *data, int len, bool wait)
+			     const void *data, int len, bool wait)
 {
 	struct qcom_glink *glink = channel->glink;
 	struct glink_core_rx_intent *intent = NULL;
@@ -1553,28 +1553,31 @@ static int __qcom_glink_send(struct glink_channel *channel,
 	return 0;
 }
 
-static int qcom_glink_send(struct rpmsg_endpoint *ept, void *data, int len)
+static int qcom_glink_send(struct rpmsg_endpoint *ept, const void *data, int len)
 {
 	struct glink_channel *channel = to_glink_channel(ept);
 
 	return __qcom_glink_send(channel, data, len, true);
 }
 
-static int qcom_glink_trysend(struct rpmsg_endpoint *ept, void *data, int len)
+static int qcom_glink_trysend(struct rpmsg_endpoint *ept, const void *data,
+			      int len)
 {
 	struct glink_channel *channel = to_glink_channel(ept);
 
 	return __qcom_glink_send(channel, data, len, false);
 }
 
-static int qcom_glink_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst)
+static int qcom_glink_sendto(struct rpmsg_endpoint *ept, const void *data,
+			     int len, u32 dst)
 {
 	struct glink_channel *channel = to_glink_channel(ept);
 
 	return __qcom_glink_send(channel, data, len, true);
 }
 
-static int qcom_glink_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst)
+static int qcom_glink_trysendto(struct rpmsg_endpoint *ept, const void *data,
+				int len, u32 dst)
 {
 	struct glink_channel *channel = to_glink_channel(ept);
 
diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c
index e1eb450f4fea..3ac863f400ec 100644
--- a/drivers/rpmsg/qcom_smd.c
+++ b/drivers/rpmsg/qcom_smd.c
@@ -960,28 +960,30 @@ static void qcom_smd_destroy_ept(struct rpmsg_endpoint *ept)
 	kref_put(&ept->refcount, __ept_release);
 }
 
-static int qcom_smd_send(struct rpmsg_endpoint *ept, void *data, int len)
+static int qcom_smd_send(struct rpmsg_endpoint *ept, const void *data, int len)
 {
 	struct qcom_smd_endpoint *qsept = to_smd_endpoint(ept);
 
 	return __qcom_smd_send(qsept->qsch, data, len, true);
 }
 
-static int qcom_smd_trysend(struct rpmsg_endpoint *ept, void *data, int len)
+static int qcom_smd_trysend(struct rpmsg_endpoint *ept, const void *data, int len)
 {
 	struct qcom_smd_endpoint *qsept = to_smd_endpoint(ept);
 
 	return __qcom_smd_send(qsept->qsch, data, len, false);
 }
 
-static int qcom_smd_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst)
+static int qcom_smd_sendto(struct rpmsg_endpoint *ept, const void *data, int len,
+			   u32 dst)
 {
 	struct qcom_smd_endpoint *qsept = to_smd_endpoint(ept);
 
 	return __qcom_smd_send(qsept->qsch, data, len, true);
 }
 
-static int qcom_smd_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst)
+static int qcom_smd_trysendto(struct rpmsg_endpoint *ept, const void *data,
+			      int len, u32 dst)
 {
 	struct qcom_smd_endpoint *qsept = to_smd_endpoint(ept);
 
diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index 948541656950..e7f7831d37f8 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -153,7 +153,7 @@ EXPORT_SYMBOL(rpmsg_destroy_ept);
  *
  * Return: 0 on success and an appropriate error value on failure.
  */
-int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len)
+int rpmsg_send(struct rpmsg_endpoint *ept, const void *data, int len)
 {
 	if (WARN_ON(!ept))
 		return -EINVAL;
@@ -182,7 +182,7 @@ EXPORT_SYMBOL(rpmsg_send);
  *
  * Return: 0 on success and an appropriate error value on failure.
  */
-int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst)
+int rpmsg_sendto(struct rpmsg_endpoint *ept, const void *data, int len, u32 dst)
 {
 	if (WARN_ON(!ept))
 		return -EINVAL;
@@ -210,7 +210,7 @@ EXPORT_SYMBOL(rpmsg_sendto);
  *
  * Return: 0 on success and an appropriate error value on failure.
  */
-int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len)
+int rpmsg_trysend(struct rpmsg_endpoint *ept, const void *data, int len)
 {
 	if (WARN_ON(!ept))
 		return -EINVAL;
@@ -238,7 +238,7 @@ EXPORT_SYMBOL(rpmsg_trysend);
  *
  * Return: 0 on success and an appropriate error value on failure.
  */
-int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst)
+int rpmsg_trysendto(struct rpmsg_endpoint *ept, const void *data, int len, u32 dst)
 {
 	if (WARN_ON(!ept))
 		return -EINVAL;
diff --git a/drivers/rpmsg/rpmsg_internal.h b/drivers/rpmsg/rpmsg_internal.h
index 397e4926bd02..a8b7065fd165 100644
--- a/drivers/rpmsg/rpmsg_internal.h
+++ b/drivers/rpmsg/rpmsg_internal.h
@@ -63,11 +63,11 @@ struct rpmsg_device_ops {
 struct rpmsg_endpoint_ops {
 	void (*destroy_ept)(struct rpmsg_endpoint *ept);
 
-	int (*send)(struct rpmsg_endpoint *ept, void *data, int len);
-	int (*sendto)(struct rpmsg_endpoint *ept, void *data, int len, u32 dst);
+	int (*send)(struct rpmsg_endpoint *ept, const void *data, int len);
+	int (*sendto)(struct rpmsg_endpoint *ept, const void *data, int len, u32 dst);
 
-	int (*trysend)(struct rpmsg_endpoint *ept, void *data, int len);
-	int (*trysendto)(struct rpmsg_endpoint *ept, void *data, int len, u32 dst);
+	int (*trysend)(struct rpmsg_endpoint *ept, const void *data, int len);
+	int (*trysendto)(struct rpmsg_endpoint *ept, const void *data, int len, u32 dst);
 	__poll_t (*poll)(struct rpmsg_endpoint *ept, struct file *filp,
 			     poll_table *wait);
 	int (*set_flow_control)(struct rpmsg_endpoint *ept, bool pause, u32 dst);
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 8d9e2b4dc7c1..5ae15111fb4f 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -136,11 +136,12 @@ struct virtio_rpmsg_channel {
 #define RPMSG_RESERVED_ADDRESSES	(1024)
 
 static void virtio_rpmsg_destroy_ept(struct rpmsg_endpoint *ept);
-static int virtio_rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len);
-static int virtio_rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len,
-			       u32 dst);
-static int virtio_rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len);
-static int virtio_rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data,
+static int virtio_rpmsg_send(struct rpmsg_endpoint *ept, const void *data, int len);
+static int virtio_rpmsg_sendto(struct rpmsg_endpoint *ept, const void *data,
+			       int len, u32 dst);
+static int virtio_rpmsg_trysend(struct rpmsg_endpoint *ept, const void *data,
+				int len);
+static int virtio_rpmsg_trysendto(struct rpmsg_endpoint *ept, const void *data,
 				  int len, u32 dst);
 static __poll_t virtio_rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp,
 				  poll_table *wait);
@@ -490,7 +491,7 @@ static void *get_a_tx_buf(struct virtproc_info *vrp)
  */
 static int rpmsg_send_offchannel_raw(struct rpmsg_device *rpdev,
 				     u32 src, u32 dst,
-				     void *data, int len, bool wait)
+				     const void *data, int len, bool wait)
 {
 	struct virtio_rpmsg_channel *vch = to_virtio_rpmsg_channel(rpdev);
 	struct virtproc_info *vrp = vch->vrp;
@@ -580,7 +581,7 @@ out:
 	return err;
 }
 
-static int virtio_rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len)
+static int virtio_rpmsg_send(struct rpmsg_endpoint *ept, const void *data, int len)
 {
 	struct rpmsg_device *rpdev = ept->rpdev;
 	u32 src = ept->addr, dst = rpdev->dst;
@@ -588,8 +589,8 @@ static int virtio_rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len)
 	return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, true);
 }
 
-static int virtio_rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len,
-			       u32 dst)
+static int virtio_rpmsg_sendto(struct rpmsg_endpoint *ept, const void *data,
+			       int len, u32 dst)
 {
 	struct rpmsg_device *rpdev = ept->rpdev;
 	u32 src = ept->addr;
@@ -597,7 +598,8 @@ static int virtio_rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len,
 	return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, true);
 }
 
-static int virtio_rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len)
+static int virtio_rpmsg_trysend(struct rpmsg_endpoint *ept, const void *data,
+				int len)
 {
 	struct rpmsg_device *rpdev = ept->rpdev;
 	u32 src = ept->addr, dst = rpdev->dst;
@@ -605,7 +607,7 @@ static int virtio_rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len)
 	return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, false);
 }
 
-static int virtio_rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data,
+static int virtio_rpmsg_trysendto(struct rpmsg_endpoint *ept, const void *data,
 				  int len, u32 dst)
 {
 	struct rpmsg_device *rpdev = ept->rpdev;
diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h
index fb7ab9165645..83266ce14642 100644
--- a/include/linux/rpmsg.h
+++ b/include/linux/rpmsg.h
@@ -182,11 +182,11 @@ struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *,
 					rpmsg_rx_cb_t cb, void *priv,
 					struct rpmsg_channel_info chinfo);
 
-int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len);
-int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst);
+int rpmsg_send(struct rpmsg_endpoint *ept, const void *data, int len);
+int rpmsg_sendto(struct rpmsg_endpoint *ept, const void *data, int len, u32 dst);
 
-int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len);
-int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst);
+int rpmsg_trysend(struct rpmsg_endpoint *ept, const void *data, int len);
+int rpmsg_trysendto(struct rpmsg_endpoint *ept, const void *data, int len, u32 dst);
 
 __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp,
 			poll_table *wait);
@@ -249,7 +249,7 @@ static inline struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *rpdev
 	return NULL;
 }
 
-static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len)
+static inline int rpmsg_send(struct rpmsg_endpoint *ept, const void *data, int len)
 {
 	/* This shouldn't be possible */
 	WARN_ON(1);
@@ -257,7 +257,7 @@ static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len)
 	return -ENXIO;
 }
 
-static inline int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len,
+static inline int rpmsg_sendto(struct rpmsg_endpoint *ept, const void *data, int len,
 			       u32 dst)
 {
 	/* This shouldn't be possible */
@@ -267,7 +267,8 @@ static inline int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len,
 
 }
 
-static inline int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len)
+static inline int rpmsg_trysend(struct rpmsg_endpoint *ept, const void *data,
+				int len)
 {
 	/* This shouldn't be possible */
 	WARN_ON(1);
@@ -275,7 +276,7 @@ static inline int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len)
 	return -ENXIO;
 }
 
-static inline int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data,
+static inline int rpmsg_trysendto(struct rpmsg_endpoint *ept, const void *data,
 				  int len, u32 dst)
 {
 	/* This shouldn't be possible */
-- 
cgit v1.2.3


From 66ec83627902d2585e14911692b317496731767a Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Date: Tue, 17 Mar 2026 13:36:53 +0100
Subject: ASoC: qcom: Constify GPR packet being send over GPR interface

gpr_send_pkt() and pkt_router_send_svc_pkt() only send the GPR packet
they receive, without any need to actually modify it, so mark the
pointer to GPR packet as pointer to const for code safety and code
self-documentation.  Several users of this interface can follow up and
also operate on pointer to const.

Acked-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20260317-rpmsg-send-const-v3-4-4d7fd27f037f@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 drivers/soc/qcom/apr.c            | 8 ++++----
 include/linux/soc/qcom/apr.h      | 4 ++--
 sound/soc/qcom/qdsp6/audioreach.c | 6 +++---
 sound/soc/qcom/qdsp6/audioreach.h | 4 ++--
 sound/soc/qcom/qdsp6/q6apm.c      | 3 ++-
 sound/soc/qcom/qdsp6/q6apm.h      | 2 +-
 6 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/apr.c b/drivers/soc/qcom/apr.c
index 78e72379a6e0..ea7f83916d8d 100644
--- a/drivers/soc/qcom/apr.c
+++ b/drivers/soc/qcom/apr.c
@@ -123,10 +123,10 @@ gpr_port_t *gpr_alloc_port(struct apr_device *gdev, struct device *dev,
 }
 EXPORT_SYMBOL_GPL(gpr_alloc_port);
 
-static int pkt_router_send_svc_pkt(struct pkt_router_svc *svc, struct gpr_pkt *pkt)
+static int pkt_router_send_svc_pkt(struct pkt_router_svc *svc, const struct gpr_pkt *pkt)
 {
 	struct packet_router *pr = svc->pr;
-	struct gpr_hdr *hdr;
+	const struct gpr_hdr *hdr;
 	unsigned long flags;
 	int ret;
 
@@ -139,13 +139,13 @@ static int pkt_router_send_svc_pkt(struct pkt_router_svc *svc, struct gpr_pkt *p
 	return ret ? ret : hdr->pkt_size;
 }
 
-int gpr_send_pkt(struct apr_device *gdev, struct gpr_pkt *pkt)
+int gpr_send_pkt(struct apr_device *gdev, const struct gpr_pkt *pkt)
 {
 	return pkt_router_send_svc_pkt(&gdev->svc, pkt);
 }
 EXPORT_SYMBOL_GPL(gpr_send_pkt);
 
-int gpr_send_port_pkt(gpr_port_t *port, struct gpr_pkt *pkt)
+int gpr_send_port_pkt(gpr_port_t *port, const struct gpr_pkt *pkt)
 {
 	return pkt_router_send_svc_pkt(port, pkt);
 }
diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h
index 6e1b1202e818..58fa1df96347 100644
--- a/include/linux/soc/qcom/apr.h
+++ b/include/linux/soc/qcom/apr.h
@@ -191,7 +191,7 @@ int apr_send_pkt(struct apr_device *adev, struct apr_pkt *pkt);
 gpr_port_t *gpr_alloc_port(gpr_device_t *gdev, struct device *dev,
 				gpr_port_cb cb, void *priv);
 void gpr_free_port(gpr_port_t *port);
-int gpr_send_port_pkt(gpr_port_t *port, struct gpr_pkt *pkt);
-int gpr_send_pkt(gpr_device_t *gdev, struct gpr_pkt *pkt);
+int gpr_send_port_pkt(gpr_port_t *port, const struct gpr_pkt *pkt);
+int gpr_send_pkt(gpr_device_t *gdev, const struct gpr_pkt *pkt);
 
 #endif /* __QCOM_APR_H_ */
diff --git a/sound/soc/qcom/qdsp6/audioreach.c b/sound/soc/qcom/qdsp6/audioreach.c
index 241c3b4479c6..c84e098230c6 100644
--- a/sound/soc/qcom/qdsp6/audioreach.c
+++ b/sound/soc/qcom/qdsp6/audioreach.c
@@ -579,10 +579,10 @@ EXPORT_SYMBOL_GPL(audioreach_alloc_graph_pkt);
 int audioreach_send_cmd_sync(struct device *dev, gpr_device_t *gdev,
 			     struct gpr_ibasic_rsp_result_t *result, struct mutex *cmd_lock,
 			     gpr_port_t *port, wait_queue_head_t *cmd_wait,
-			     struct gpr_pkt *pkt, uint32_t rsp_opcode)
+			     const struct gpr_pkt *pkt, uint32_t rsp_opcode)
 {
 
-	struct gpr_hdr *hdr = &pkt->hdr;
+	const struct gpr_hdr *hdr = &pkt->hdr;
 	int rc;
 
 	mutex_lock(cmd_lock);
@@ -622,7 +622,7 @@ err:
 }
 EXPORT_SYMBOL_GPL(audioreach_send_cmd_sync);
 
-int audioreach_graph_send_cmd_sync(struct q6apm_graph *graph, struct gpr_pkt *pkt,
+int audioreach_graph_send_cmd_sync(struct q6apm_graph *graph, const struct gpr_pkt *pkt,
 				   uint32_t rsp_opcode)
 {
 
diff --git a/sound/soc/qcom/qdsp6/audioreach.h b/sound/soc/qcom/qdsp6/audioreach.h
index 89f172aab8c0..6262b9251440 100644
--- a/sound/soc/qcom/qdsp6/audioreach.h
+++ b/sound/soc/qcom/qdsp6/audioreach.h
@@ -844,8 +844,8 @@ int audioreach_map_memory_regions(struct q6apm_graph *graph,
 				  bool is_contiguous);
 int audioreach_send_cmd_sync(struct device *dev, gpr_device_t *gdev, struct gpr_ibasic_rsp_result_t *result,
 			     struct mutex *cmd_lock, gpr_port_t *port, wait_queue_head_t *cmd_wait,
-			     struct gpr_pkt *pkt, uint32_t rsp_opcode);
-int audioreach_graph_send_cmd_sync(struct q6apm_graph *graph, struct gpr_pkt *pkt,
+			     const struct gpr_pkt *pkt, uint32_t rsp_opcode);
+int audioreach_graph_send_cmd_sync(struct q6apm_graph *graph, const struct gpr_pkt *pkt,
 				   uint32_t rsp_opcode);
 int audioreach_set_media_format(struct q6apm_graph *graph,
 				const struct audioreach_module *module,
diff --git a/sound/soc/qcom/qdsp6/q6apm.c b/sound/soc/qcom/qdsp6/q6apm.c
index 44841fde3856..3527ad1acbca 100644
--- a/sound/soc/qcom/qdsp6/q6apm.c
+++ b/sound/soc/qcom/qdsp6/q6apm.c
@@ -29,7 +29,8 @@ struct apm_graph_mgmt_cmd {
 
 static struct q6apm *g_apm;
 
-int q6apm_send_cmd_sync(struct q6apm *apm, struct gpr_pkt *pkt, uint32_t rsp_opcode)
+int q6apm_send_cmd_sync(struct q6apm *apm, const struct gpr_pkt *pkt,
+			uint32_t rsp_opcode)
 {
 	gpr_device_t *gdev = apm->gdev;
 
diff --git a/sound/soc/qcom/qdsp6/q6apm.h b/sound/soc/qcom/qdsp6/q6apm.h
index 7ce08b401e31..a39f6046f886 100644
--- a/sound/soc/qcom/qdsp6/q6apm.h
+++ b/sound/soc/qcom/qdsp6/q6apm.h
@@ -138,7 +138,7 @@ int q6apm_map_memory_regions(struct q6apm_graph *graph,
 int q6apm_unmap_memory_regions(struct q6apm_graph *graph,
 			       unsigned int dir);
 /* Helpers */
-int q6apm_send_cmd_sync(struct q6apm *apm, struct gpr_pkt *pkt,
+int q6apm_send_cmd_sync(struct q6apm *apm, const struct gpr_pkt *pkt,
 			uint32_t rsp_opcode);
 
 /* Callback for graph specific */
-- 
cgit v1.2.3


From ad5fd5aeb65a4426635cf55ef06c96e60a66e648 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 1 Apr 2026 09:11:40 +0200
Subject: hwspinlock: remove now unused pdata from header file

The last user turned out to be obsolete and was removed. Remove the
unused struct now, too.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Linus Walleij <linusw@kernel.org>
Acked-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Link: https://lore.kernel.org/r/20260401071141.4718-3-wsa+renesas@sang-engineering.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/hwspinlock.h | 28 ----------------------------
 1 file changed, 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h
index f35b42e8c5de..74b91244fe0e 100644
--- a/include/linux/hwspinlock.h
+++ b/include/linux/hwspinlock.h
@@ -25,34 +25,6 @@ struct hwspinlock;
 struct hwspinlock_device;
 struct hwspinlock_ops;
 
-/**
- * struct hwspinlock_pdata - platform data for hwspinlock drivers
- * @base_id: base id for this hwspinlock device
- *
- * hwspinlock devices provide system-wide hardware locks that are used
- * by remote processors that have no other way to achieve synchronization.
- *
- * To achieve that, each physical lock must have a system-wide id number
- * that is agreed upon, otherwise remote processors can't possibly assume
- * they're using the same hardware lock.
- *
- * Usually boards have a single hwspinlock device, which provides several
- * hwspinlocks, and in this case, they can be trivially numbered 0 to
- * (num-of-locks - 1).
- *
- * In case boards have several hwspinlocks devices, a different base id
- * should be used for each hwspinlock device (they can't all use 0 as
- * a starting id!).
- *
- * This platform data structure should be used to provide the base id
- * for each device (which is trivially 0 when only a single hwspinlock
- * device exists). It can be shared between different platforms, hence
- * its location.
- */
-struct hwspinlock_pdata {
-	int base_id;
-};
-
 #ifdef CONFIG_HWSPINLOCK
 
 int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev,
-- 
cgit v1.2.3


From f652d0a4e13c5f5416da15ba791b99c5d1ac9b18 Mon Sep 17 00:00:00 2001
From: Chengwen Feng <fengchengwen@huawei.com>
Date: Wed, 1 Apr 2026 16:16:37 +0800
Subject: ACPI: Centralize acpi_get_cpu_uid() declaration in
 include/linux/acpi.h

Centralize acpi_get_cpu_uid() in include/linux/acpi.h (global scope) and
remove arch-specific declarations from arm64/loongarch/riscv/x86
asm/acpi.h. This unifies the interface across architectures and
simplifies maintenance by eliminating duplicate prototypes.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260401081640.26875-6-fengchengwen@huawei.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm64/include/asm/acpi.h     |  1 -
 arch/loongarch/include/asm/acpi.h |  1 -
 arch/riscv/include/asm/acpi.h     |  1 -
 arch/x86/include/asm/acpi.h       |  2 --
 include/linux/acpi.h              | 11 +++++++++++
 5 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 2219a3301e72..bdb0ecf95b5c 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -118,7 +118,6 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu)
 {
 	return	acpi_cpu_get_madt_gicc(cpu)->uid;
 }
-int acpi_get_cpu_uid(unsigned int cpu, u32 *uid);
 int get_cpu_for_acpi_id(u32 uid);
 
 static inline void arch_fix_phys_package_id(int num, u32 slot) { }
diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h
index 8bb101b4557e..7376840fa9f7 100644
--- a/arch/loongarch/include/asm/acpi.h
+++ b/arch/loongarch/include/asm/acpi.h
@@ -44,7 +44,6 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu)
 {
 	return acpi_core_pic[cpu_logical_map(cpu)].processor_id;
 }
-int acpi_get_cpu_uid(unsigned int cpu, u32 *uid);
 
 #endif /* !CONFIG_ACPI */
 
diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h
index f3520cc85af3..6e13695120bc 100644
--- a/arch/riscv/include/asm/acpi.h
+++ b/arch/riscv/include/asm/acpi.h
@@ -65,7 +65,6 @@ static inline u32 get_acpi_id_for_cpu(int cpu)
 {
 	return acpi_cpu_get_madt_rintc(cpu)->uid;
 }
-int acpi_get_cpu_uid(unsigned int cpu, u32 *uid);
 
 int acpi_get_riscv_isa(struct acpi_table_header *table,
 		       unsigned int cpu, const char **isa);
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 92b5c27c4fea..a03aa6f999d1 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -157,8 +157,6 @@ static inline bool acpi_has_cpu_in_madt(void)
 	return !!acpi_lapic;
 }
 
-int acpi_get_cpu_uid(unsigned int cpu, u32 *uid);
-
 #define ACPI_HAVE_ARCH_SET_ROOT_POINTER
 static __always_inline void acpi_arch_set_root_pointer(u64 addr)
 {
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 4d2f0bed7a06..74a73f0e5944 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -324,6 +324,17 @@ int acpi_unmap_cpu(int cpu);
 
 acpi_handle acpi_get_processor_handle(int cpu);
 
+/**
+ * acpi_get_cpu_uid() - Get ACPI Processor UID of from MADT table
+ * @cpu: Logical CPU number (0-based)
+ * @uid: Pointer to store ACPI Processor UID
+ *
+ * Return: 0 on success (ACPI Processor ID stored in *uid);
+ *         -EINVAL if CPU number is invalid or out of range;
+ *         -ENODEV if ACPI Processor UID for the CPU is not found.
+ */
+int acpi_get_cpu_uid(unsigned int cpu, u32 *uid);
+
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
 int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr);
 #endif
-- 
cgit v1.2.3


From abdd2a86535b59c76d14da2547160bc83e059c03 Mon Sep 17 00:00:00 2001
From: Chengwen Feng <fengchengwen@huawei.com>
Date: Wed, 1 Apr 2026 16:16:40 +0800
Subject: PCI/TPH: Pass ACPI Processor UID to Cache Locality _DSM

pcie_tph_get_cpu_st() uses the Query Cache Locality Features _DSM [1]
to retrieve the TPH Steering Tag for memory associated with the CPU
identified by its "cpu_uid" parameter, a Linux logical CPU ID.

The _DSM requires an ACPI Processor UID, which pcie_tph_get_cpu_st()
previously assumed was the same as the Linux logical CPU ID. This is
true on x86 but not on arm64, so pcie_tph_get_cpu_st() returned the
wrong Steering Tag, resulting in incorrect TPH functionality on arm64.

Convert the Linux logical CPU ID to the ACPI Processor UID with
acpi_get_cpu_uid() before passing it to the _DSM. Additionally, rename
the pcie_tph_get_cpu_st() parameter from "cpu_uid" to "cpu" to reflect
that it represents a logical CPU ID (not an ACPI Processor UID).

[1] According to ECN_TPH-ST_Revision_20200924
    (https://members.pcisig.com/wg/PCI-SIG/document/15470), the input
    is defined as: "If the target is a processor, then this field
    represents the ACPI Processor UID of the processor as specified in
    the MADT. If the target is a processor container, then this field
    represents the ACPI Processor UID of the processor container as
    specified in the PPTT."

Fixes: d2e8a34876ce ("PCI/TPH: Add Steering Tag support")
Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/20260401081640.26875-9-fengchengwen@huawei.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/PCI/tph.rst |  4 ++--
 drivers/pci/tph.c         | 16 +++++++++++-----
 include/linux/pci-tph.h   |  4 ++--
 3 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/PCI/tph.rst b/Documentation/PCI/tph.rst
index e8993be64fd6..b6cf22b9bd90 100644
--- a/Documentation/PCI/tph.rst
+++ b/Documentation/PCI/tph.rst
@@ -79,10 +79,10 @@ To retrieve a Steering Tag for a target memory associated with a specific
 CPU, use the following function::
 
   int pcie_tph_get_cpu_st(struct pci_dev *pdev, enum tph_mem_type type,
-                          unsigned int cpu_uid, u16 *tag);
+                          unsigned int cpu, u16 *tag);
 
 The `type` argument is used to specify the memory type, either volatile
-or persistent, of the target memory. The `cpu_uid` argument specifies the
+or persistent, of the target memory. The `cpu` argument specifies the
 CPU where the memory is associated to.
 
 After the ST value is retrieved, the device driver can use the following
diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c
index ca4f97be7538..b67c9ad14bda 100644
--- a/drivers/pci/tph.c
+++ b/drivers/pci/tph.c
@@ -236,21 +236,27 @@ static int write_tag_to_st_table(struct pci_dev *pdev, int index, u16 tag)
  * with a specific CPU
  * @pdev: PCI device
  * @mem_type: target memory type (volatile or persistent RAM)
- * @cpu_uid: associated CPU id
+ * @cpu: associated CPU id
  * @tag: Steering Tag to be returned
  *
  * Return the Steering Tag for a target memory that is associated with a
- * specific CPU as indicated by cpu_uid.
+ * specific CPU as indicated by cpu.
  *
  * Return: 0 if success, otherwise negative value (-errno)
  */
 int pcie_tph_get_cpu_st(struct pci_dev *pdev, enum tph_mem_type mem_type,
-			unsigned int cpu_uid, u16 *tag)
+			unsigned int cpu, u16 *tag)
 {
 #ifdef CONFIG_ACPI
 	struct pci_dev *rp;
 	acpi_handle rp_acpi_handle;
 	union st_info info;
+	u32 cpu_uid;
+	int ret;
+
+	ret = acpi_get_cpu_uid(cpu, &cpu_uid);
+	if (ret != 0)
+		return ret;
 
 	rp = pcie_find_root_port(pdev);
 	if (!rp || !rp->bus || !rp->bus->bridge)
@@ -265,9 +271,9 @@ int pcie_tph_get_cpu_st(struct pci_dev *pdev, enum tph_mem_type mem_type,
 
 	*tag = tph_extract_tag(mem_type, pdev->tph_req_type, &info);
 
-	pci_dbg(pdev, "get steering tag: mem_type=%s, cpu_uid=%d, tag=%#04x\n",
+	pci_dbg(pdev, "get steering tag: mem_type=%s, cpu=%d, tag=%#04x\n",
 		(mem_type == TPH_MEM_TYPE_VM) ? "volatile" : "persistent",
-		cpu_uid, *tag);
+		cpu, *tag);
 
 	return 0;
 #else
diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h
index ba28140ce670..be68cd17f2f8 100644
--- a/include/linux/pci-tph.h
+++ b/include/linux/pci-tph.h
@@ -25,7 +25,7 @@ int pcie_tph_set_st_entry(struct pci_dev *pdev,
 			  unsigned int index, u16 tag);
 int pcie_tph_get_cpu_st(struct pci_dev *dev,
 			enum tph_mem_type mem_type,
-			unsigned int cpu_uid, u16 *tag);
+			unsigned int cpu, u16 *tag);
 void pcie_disable_tph(struct pci_dev *pdev);
 int pcie_enable_tph(struct pci_dev *pdev, int mode);
 u16 pcie_tph_get_st_table_size(struct pci_dev *pdev);
@@ -36,7 +36,7 @@ static inline int pcie_tph_set_st_entry(struct pci_dev *pdev,
 { return -EINVAL; }
 static inline int pcie_tph_get_cpu_st(struct pci_dev *dev,
 				      enum tph_mem_type mem_type,
-				      unsigned int cpu_uid, u16 *tag)
+				      unsigned int cpu, u16 *tag)
 { return -EINVAL; }
 static inline void pcie_disable_tph(struct pci_dev *pdev) { }
 static inline int pcie_enable_tph(struct pci_dev *pdev, int mode)
-- 
cgit v1.2.3


From 18474aed5d0d382f8057ceed7811a735134d28b9 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 30 Mar 2026 16:38:18 -0600
Subject: bpf: Avoid -Wflex-array-members-not-at-end warnings

Apparently, struct bpf_empty_prog_array exists entirely to populate a
single element of "items" in a global variable. "null_prog" is only
used during the initializer.

None of this is needed; globals will be correctly sized with an array
initializer of a flexible-array member.

So, remove struct bpf_empty_prog_array and adjust the rest of the code,
accordingly.

With these changes, fix the following warnings:

./include/linux/bpf.h:2369:31: warning: structure containing a flexible
array member is not at the end of another structure [-Wflex-array-member-not-at-end]

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
Link: https://lore.kernel.org/r/acr7Whmn0br3xeBP@kspp
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf-cgroup.h |  2 +-
 include/linux/bpf.h        |  7 +------
 kernel/bpf/core.c          | 12 +++++++-----
 3 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 2f535331f926..b2e79c2b41d5 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -184,7 +184,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
 	struct bpf_prog_array *array;
 
 	array = rcu_access_pointer(cgrp->bpf.effective[type]);
-	return array != &bpf_empty_prog_array.hdr;
+	return array != &bpf_empty_prog_array;
 }
 
 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 35b1e25bd104..30d35d5fe40b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2369,18 +2369,13 @@ struct bpf_prog_array {
 	struct bpf_prog_array_item items[];
 };
 
-struct bpf_empty_prog_array {
-	struct bpf_prog_array hdr;
-	struct bpf_prog *null_prog;
-};
-
 /* to avoid allocating empty bpf_prog_array for cgroups that
  * don't have bpf program attached use one global 'bpf_empty_prog_array'
  * It will not be modified the caller of bpf_prog_array_alloc()
  * (since caller requested prog_cnt == 0)
  * that pointer should be 'freed' by bpf_prog_array_free()
  */
-extern struct bpf_empty_prog_array bpf_empty_prog_array;
+extern struct bpf_prog_array bpf_empty_prog_array;
 
 struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
 void bpf_prog_array_free(struct bpf_prog_array *progs);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 57811c07aa84..ee632f41bd83 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2614,8 +2614,10 @@ static struct bpf_prog_dummy {
 	},
 };
 
-struct bpf_empty_prog_array bpf_empty_prog_array = {
-	.null_prog = NULL,
+struct bpf_prog_array bpf_empty_prog_array = {
+	.items = {
+		{ .prog = NULL },
+	},
 };
 EXPORT_SYMBOL(bpf_empty_prog_array);
 
@@ -2626,14 +2628,14 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
 	if (prog_cnt)
 		p = kzalloc_flex(*p, items, prog_cnt + 1, flags);
 	else
-		p = &bpf_empty_prog_array.hdr;
+		p = &bpf_empty_prog_array;
 
 	return p;
 }
 
 void bpf_prog_array_free(struct bpf_prog_array *progs)
 {
-	if (!progs || progs == &bpf_empty_prog_array.hdr)
+	if (!progs || progs == &bpf_empty_prog_array)
 		return;
 	kfree_rcu(progs, rcu);
 }
@@ -2654,7 +2656,7 @@ static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu)
 
 void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs)
 {
-	if (!progs || progs == &bpf_empty_prog_array.hdr)
+	if (!progs || progs == &bpf_empty_prog_array)
 		return;
 	call_rcu_tasks_trace(&progs->rcu, __bpf_prog_array_free_sleepable_cb);
 }
-- 
cgit v1.2.3


From a9c4b1d37622ed01b75f94a4f68cf55f33153a31 Mon Sep 17 00:00:00 2001
From: Christoph Böhmwalder <christoph.boehmwalder@linbit.com>
Date: Fri, 3 Apr 2026 15:29:53 +0200
Subject: drbd: remove DRBD_GENLA_F_MANDATORY flag handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DRBD used a custom mechanism to mark netlink attributes as "mandatory":
bit 14 of nla_type was repurposed as DRBD_GENLA_F_MANDATORY. Attributes
sent from userspace that had this bit present and that were unknown
to the kernel would lead to an error.

Since commit ef6243acb478 ("genetlink: optionally validate strictly/dumps"),
the generic netlink layer rejects unknown top-level attributes when
strict validation is enabled. DRBD never opted out of strict
validation, so unknown top-level attributes are already rejected by
the netlink core.

The mandatory flag mechanism was required for nested attributes, because
these are parsed liberally, silently dropping attributes unknown to the
kernel.

This prepares for the move to a new YNL-based family, which will use the
now-default strict parsing.
The current family is not expected to gain any new attributes, which
makes this change safe.

Old userspace that still sets bit 14 is unaffected: nla_type()
strips it before __nla_validate_parse() performs attribute validation,
so the bit never reaches DRBD.

Remove all references to the mandatory flag in DRBD.

Cc: Johannes Berg <johannes.berg@intel.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Christoph Böhmwalder <christoph.boehmwalder@linbit.com>
Link: https://patch.msgid.link/20260403132953.2248751-1-christoph.boehmwalder@linbit.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/Makefile       |   1 -
 drivers/block/drbd/drbd_nl.c      |  19 ++--
 drivers/block/drbd/drbd_nla.c     |  56 ----------
 drivers/block/drbd/drbd_nla.h     |   9 --
 include/linux/drbd_genl.h         | 208 +++++++++++++++++++-------------------
 include/linux/genl_magic_func.h   |   3 +-
 include/linux/genl_magic_struct.h |  15 +--
 7 files changed, 114 insertions(+), 197 deletions(-)
 delete mode 100644 drivers/block/drbd/drbd_nla.c
 delete mode 100644 drivers/block/drbd/drbd_nla.h

(limited to 'include/linux')

diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile
index 67a8b352a1d5..187eaf81f0f8 100644
--- a/drivers/block/drbd/Makefile
+++ b/drivers/block/drbd/Makefile
@@ -3,7 +3,6 @@ drbd-y := drbd_buildtag.o drbd_bitmap.o drbd_proc.o
 drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
 drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
 drbd-y += drbd_interval.o drbd_state.o
-drbd-y += drbd_nla.o
 drbd-$(CONFIG_DEBUG_FS) += drbd_debugfs.o
 
 obj-$(CONFIG_BLK_DEV_DRBD)     += drbd.o
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 1f8ffdf9b24e..d997d274092c 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -74,7 +74,6 @@ int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb);
 int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb);
 
 #include <linux/drbd_genl_api.h>
-#include "drbd_nla.h"
 
 static int drbd_pre_doit(const struct genl_split_ops *ops,
 			 struct sk_buff *skb, struct genl_info *info);
@@ -239,14 +238,14 @@ static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
 			goto fail;
 
 		/* and assign stuff to the adm_ctx */
-		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
+		nla = nested_attr_tb[T_ctx_volume];
 		if (nla)
 			adm_ctx->volume = nla_get_u32(nla);
-		nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
+		nla = nested_attr_tb[T_ctx_resource_name];
 		if (nla)
 			adm_ctx->resource_name = nla_data(nla);
-		adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
-		adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
+		adm_ctx->my_addr = nested_attr_tb[T_ctx_my_addr];
+		adm_ctx->peer_addr = nested_attr_tb[T_ctx_peer_addr];
 		if ((adm_ctx->my_addr &&
 		     nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
 		    (adm_ctx->peer_addr &&
@@ -825,7 +824,6 @@ out:
 static const char *from_attrs_err_to_txt(int err)
 {
 	return	err == -ENOMSG ? "required attribute missing" :
-		err == -EOPNOTSUPP ? "unknown mandatory attribute" :
 		err == -EEXIST ? "can not change invariant setting" :
 		"invalid attribute value";
 }
@@ -3303,14 +3301,13 @@ nla_put_failure:
 static struct nlattr *find_cfg_context_attr(const struct nlmsghdr *nlh, int attr)
 {
 	const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
-	const int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
 	struct nlattr *nla;
 
 	nla = nla_find(nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen),
 		       DRBD_NLA_CFG_CONTEXT);
 	if (!nla)
 		return NULL;
-	return drbd_nla_find_nested(maxtype, nla, __nla_type(attr));
+	return nla_find_nested(nla, attr);
 }
 
 static void resource_to_info(struct resource_info *, struct drbd_resource *);
@@ -4068,7 +4065,6 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
 	struct nlattr *nla;
 	const char *resource_name;
 	struct drbd_resource *resource;
-	int maxtype;
 
 	/* Is this a followup call? */
 	if (cb->args[0]) {
@@ -4088,10 +4084,7 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
 	/* No explicit context given.  Dump all. */
 	if (!nla)
 		goto dump;
-	maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
-	nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
-	if (IS_ERR(nla))
-		return PTR_ERR(nla);
+	nla = nla_find_nested(nla, T_ctx_resource_name);
 	/* context given, but no name present? */
 	if (!nla)
 		return -EINVAL;
diff --git a/drivers/block/drbd/drbd_nla.c b/drivers/block/drbd/drbd_nla.c
deleted file mode 100644
index df0d241d3f6a..000000000000
--- a/drivers/block/drbd/drbd_nla.c
+++ /dev/null
@@ -1,56 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/kernel.h>
-#include <net/netlink.h>
-#include <linux/drbd_genl_api.h>
-#include "drbd_nla.h"
-
-static int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
-{
-	struct nlattr *head = nla_data(nla);
-	int len = nla_len(nla);
-	int rem;
-
-	/*
-	 * validate_nla (called from nla_parse_nested) ignores attributes
-	 * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
-	 * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
-	 * flag set also, check and remove that flag before calling
-	 * nla_parse_nested.
-	 */
-
-	nla_for_each_attr(nla, head, len, rem) {
-		if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
-			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
-			if (nla_type(nla) > maxtype)
-				return -EOPNOTSUPP;
-		}
-	}
-	return 0;
-}
-
-int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
-			  const struct nla_policy *policy)
-{
-	int err;
-
-	err = drbd_nla_check_mandatory(maxtype, nla);
-	if (!err)
-		err = nla_parse_nested_deprecated(tb, maxtype, nla, policy,
-						  NULL);
-
-	return err;
-}
-
-struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype)
-{
-	int err;
-	/*
-	 * If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and
-	 * we don't know about that attribute, reject all the nested
-	 * attributes.
-	 */
-	err = drbd_nla_check_mandatory(maxtype, nla);
-	if (err)
-		return ERR_PTR(err);
-	return nla_find_nested(nla, attrtype);
-}
diff --git a/drivers/block/drbd/drbd_nla.h b/drivers/block/drbd/drbd_nla.h
deleted file mode 100644
index d3555df0d353..000000000000
--- a/drivers/block/drbd/drbd_nla.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef __DRBD_NLA_H
-#define __DRBD_NLA_H
-
-extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
-				 const struct nla_policy *policy);
-extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype);
-
-#endif  /* __DRBD_NLA_H */
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 53f44b8cd75f..f53c534aba0c 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -87,7 +87,7 @@
  */
 GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
 		/* "arbitrary" size strings, nla_policy.len = 0 */
-	__str_field(1, DRBD_GENLA_F_MANDATORY,	info_text, 0)
+	__str_field(1, 0,	info_text, 0)
 )
 
 /* Configuration requests typically need a context to operate on.
@@ -96,10 +96,10 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
  * and/or the replication group (aka resource) name,
  * and the volume id within the resource. */
 GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
-	__u32_field(1, DRBD_GENLA_F_MANDATORY,	ctx_volume)
-	__str_field(2, DRBD_GENLA_F_MANDATORY,	ctx_resource_name, 128)
-	__bin_field(3, DRBD_GENLA_F_MANDATORY,	ctx_my_addr, 128)
-	__bin_field(4, DRBD_GENLA_F_MANDATORY,	ctx_peer_addr, 128)
+	__u32_field(1, 0,	ctx_volume)
+	__str_field(2, 0,	ctx_resource_name, 128)
+	__bin_field(3, 0,	ctx_my_addr, 128)
+	__bin_field(4, 0,	ctx_peer_addr, 128)
 )
 
 GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
@@ -108,86 +108,86 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	meta_dev_idx)
 
 	/* use the resize command to try and change the disk_size */
-	__u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	disk_size)
+	__u64_field(4, DRBD_F_INVARIANT,	disk_size)
 	/* we could change the max_bio_bvecs,
 	 * but it won't propagate through the stack */
-	__u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	max_bio_bvecs)
-
-	__u32_field_def(6, DRBD_GENLA_F_MANDATORY,	on_io_error, DRBD_ON_IO_ERROR_DEF)
-	__u32_field_def(7, DRBD_GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
-
-	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF)
-	__s32_field_def(9,	DRBD_GENLA_F_MANDATORY,	resync_after, DRBD_MINOR_NUMBER_DEF)
-	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
-	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
-	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
-	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	c_fill_target, DRBD_C_FILL_TARGET_DEF)
-	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
-	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
-	__u32_field_def(20,     DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF)
+	__u32_field(5, DRBD_F_INVARIANT,	max_bio_bvecs)
+
+	__u32_field_def(6, 0,	on_io_error, DRBD_ON_IO_ERROR_DEF)
+	__u32_field_def(7, 0,	fencing, DRBD_FENCING_DEF)
+
+	__u32_field_def(8,	0,	resync_rate, DRBD_RESYNC_RATE_DEF)
+	__s32_field_def(9,	0,	resync_after, DRBD_MINOR_NUMBER_DEF)
+	__u32_field_def(10,	0,	al_extents, DRBD_AL_EXTENTS_DEF)
+	__u32_field_def(11,	0,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
+	__u32_field_def(12,	0,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
+	__u32_field_def(13,	0,	c_fill_target, DRBD_C_FILL_TARGET_DEF)
+	__u32_field_def(14,	0,	c_max_rate, DRBD_C_MAX_RATE_DEF)
+	__u32_field_def(15,	0,	c_min_rate, DRBD_C_MIN_RATE_DEF)
+	__u32_field_def(20,     0, disk_timeout, DRBD_DISK_TIMEOUT_DEF)
 	__u32_field_def(21,     0 /* OPTIONAL */,       read_balancing, DRBD_READ_BALANCING_DEF)
 	__u32_field_def(25,     0 /* OPTIONAL */,       rs_discard_granularity, DRBD_RS_DISCARD_GRANULARITY_DEF)
 
-	__flg_field_def(16, DRBD_GENLA_F_MANDATORY,	disk_barrier, DRBD_DISK_BARRIER_DEF)
-	__flg_field_def(17, DRBD_GENLA_F_MANDATORY,	disk_flushes, DRBD_DISK_FLUSHES_DEF)
-	__flg_field_def(18, DRBD_GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
-	__flg_field_def(19, DRBD_GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
+	__flg_field_def(16, 0,	disk_barrier, DRBD_DISK_BARRIER_DEF)
+	__flg_field_def(17, 0,	disk_flushes, DRBD_DISK_FLUSHES_DEF)
+	__flg_field_def(18, 0,	disk_drain, DRBD_DISK_DRAIN_DEF)
+	__flg_field_def(19, 0,	md_flushes, DRBD_MD_FLUSHES_DEF)
 	__flg_field_def(23,     0 /* OPTIONAL */,	al_updates, DRBD_AL_UPDATES_DEF)
 	__flg_field_def(24,     0 /* OPTIONAL */,	discard_zeroes_if_aligned, DRBD_DISCARD_ZEROES_IF_ALIGNED_DEF)
 	__flg_field_def(26,     0 /* OPTIONAL */,	disable_write_same, DRBD_DISABLE_WRITE_SAME_DEF)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
-	__str_field_def(1,	DRBD_GENLA_F_MANDATORY,	cpu_mask,       DRBD_CPU_MASK_SIZE)
-	__u32_field_def(2,	DRBD_GENLA_F_MANDATORY,	on_no_data, DRBD_ON_NO_DATA_DEF)
+	__str_field_def(1,	0,	cpu_mask,       DRBD_CPU_MASK_SIZE)
+	__u32_field_def(2,	0,	on_no_data, DRBD_ON_NO_DATA_DEF)
 )
 
 GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
-	__str_field_def(1,	DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE,
+	__str_field_def(1,	DRBD_F_SENSITIVE,
 						shared_secret,	SHARED_SECRET_MAX)
-	__str_field_def(2,	DRBD_GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
-	__str_field_def(3,	DRBD_GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
-	__str_field_def(4,	DRBD_GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
-	__str_field_def(5,	DRBD_GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
-	__u32_field_def(6,	DRBD_GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
-	__u32_field_def(7,	DRBD_GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF)
-	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
-	__u32_field_def(9,	DRBD_GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
-	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
-	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
-	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
-	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF)
-	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF)
-	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
-	__u32_field_def(16,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
-	__u32_field_def(17,	DRBD_GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
-	__u32_field_def(18,	DRBD_GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
-	__u32_field_def(19,	DRBD_GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
-	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF)
-	__u32_field_def(21,	DRBD_GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
-	__u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
-	__u32_field_def(23,	DRBD_GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
-	__flg_field_def(24, DRBD_GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
-	__flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	discard_my_data)
-	__flg_field_def(26, DRBD_GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
-	__flg_field_def(27, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
-	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	tentative)
-	__flg_field_def(29,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
-	/* 9: __u32_field_def(30,	DRBD_GENLA_F_MANDATORY,	fencing_policy, DRBD_FENCING_DEF) */
-	/* 9: __str_field_def(31,     DRBD_GENLA_F_MANDATORY, name, SHARED_SECRET_MAX) */
+	__str_field_def(2,	0,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	__str_field_def(3,	0,	integrity_alg,	SHARED_SECRET_MAX)
+	__str_field_def(4,	0,	verify_alg,     SHARED_SECRET_MAX)
+	__str_field_def(5,	0,	csums_alg,	SHARED_SECRET_MAX)
+	__u32_field_def(6,	0,	wire_protocol, DRBD_PROTOCOL_DEF)
+	__u32_field_def(7,	0,	connect_int, DRBD_CONNECT_INT_DEF)
+	__u32_field_def(8,	0,	timeout, DRBD_TIMEOUT_DEF)
+	__u32_field_def(9,	0,	ping_int, DRBD_PING_INT_DEF)
+	__u32_field_def(10,	0,	ping_timeo, DRBD_PING_TIMEO_DEF)
+	__u32_field_def(11,	0,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
+	__u32_field_def(12,	0,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
+	__u32_field_def(13,	0,	ko_count, DRBD_KO_COUNT_DEF)
+	__u32_field_def(14,	0,	max_buffers, DRBD_MAX_BUFFERS_DEF)
+	__u32_field_def(15,	0,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
+	__u32_field_def(16,	0,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
+	__u32_field_def(17,	0,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
+	__u32_field_def(18,	0,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
+	__u32_field_def(19,	0,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
+	__u32_field_def(20,	0,	rr_conflict, DRBD_RR_CONFLICT_DEF)
+	__u32_field_def(21,	0,	on_congestion, DRBD_ON_CONGESTION_DEF)
+	__u32_field_def(22,	0,	cong_fill, DRBD_CONG_FILL_DEF)
+	__u32_field_def(23,	0,	cong_extents, DRBD_CONG_EXTENTS_DEF)
+	__flg_field_def(24, 0,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
+	__flg_field(25, DRBD_F_INVARIANT,	discard_my_data)
+	__flg_field_def(26, 0,	tcp_cork, DRBD_TCP_CORK_DEF)
+	__flg_field_def(27, 0,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
+	__flg_field(28, DRBD_F_INVARIANT,	tentative)
+	__flg_field_def(29,	0,	use_rle, DRBD_USE_RLE_DEF)
+	/* 9: __u32_field_def(30,	0,	fencing_policy, DRBD_FENCING_DEF) */
+	/* 9: __str_field_def(31,     0, name, SHARED_SECRET_MAX) */
 	/* 9: __u32_field(32,         DRBD_F_REQUIRED | DRBD_F_INVARIANT,     peer_node_id) */
 	__flg_field_def(33, 0 /* OPTIONAL */,	csums_after_crash_only, DRBD_CSUMS_AFTER_CRASH_ONLY_DEF)
 	__u32_field_def(34, 0 /* OPTIONAL */, sock_check_timeo, DRBD_SOCKET_CHECK_TIMEO_DEF)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
-	__flg_field(1, DRBD_GENLA_F_MANDATORY,	assume_uptodate)
+	__flg_field(1, 0,	assume_uptodate)
 )
 
 GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
-	__u64_field(1, DRBD_GENLA_F_MANDATORY,	resize_size)
-	__flg_field(2, DRBD_GENLA_F_MANDATORY,	resize_force)
-	__flg_field(3, DRBD_GENLA_F_MANDATORY,	no_resync)
+	__u64_field(1, 0,	resize_size)
+	__flg_field(2, 0,	resize_force)
+	__flg_field(3, 0,	no_resync)
 	__u32_field_def(4, 0 /* OPTIONAL */, al_stripes, DRBD_AL_STRIPES_DEF)
 	__u32_field_def(5, 0 /* OPTIONAL */, al_stripe_size, DRBD_AL_STRIPE_SIZE_DEF)
 )
@@ -195,31 +195,31 @@ GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
 GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
 	/* the reason of the broadcast,
 	 * if this is an event triggered broadcast. */
-	__u32_field(1, DRBD_GENLA_F_MANDATORY,	sib_reason)
+	__u32_field(1, 0,	sib_reason)
 	__u32_field(2, DRBD_F_REQUIRED,	current_state)
-	__u64_field(3, DRBD_GENLA_F_MANDATORY,	capacity)
-	__u64_field(4, DRBD_GENLA_F_MANDATORY,	ed_uuid)
+	__u64_field(3, 0,	capacity)
+	__u64_field(4, 0,	ed_uuid)
 
 	/* These are for broadcast from after state change work.
 	 * prev_state and new_state are from the moment the state change took
 	 * place, new_state is not neccessarily the same as current_state,
 	 * there may have been more state changes since.  Which will be
 	 * broadcasted soon, in their respective after state change work.  */
-	__u32_field(5, DRBD_GENLA_F_MANDATORY,	prev_state)
-	__u32_field(6, DRBD_GENLA_F_MANDATORY,	new_state)
+	__u32_field(5, 0,	prev_state)
+	__u32_field(6, 0,	new_state)
 
 	/* if we have a local disk: */
-	__bin_field(7, DRBD_GENLA_F_MANDATORY,	uuids, (UI_SIZE*sizeof(__u64)))
-	__u32_field(8, DRBD_GENLA_F_MANDATORY,	disk_flags)
-	__u64_field(9, DRBD_GENLA_F_MANDATORY,	bits_total)
-	__u64_field(10, DRBD_GENLA_F_MANDATORY,	bits_oos)
+	__bin_field(7, 0,	uuids, (UI_SIZE*sizeof(__u64)))
+	__u32_field(8, 0,	disk_flags)
+	__u64_field(9, 0,	bits_total)
+	__u64_field(10, 0,	bits_oos)
 	/* and in case resync or online verify is active */
-	__u64_field(11, DRBD_GENLA_F_MANDATORY,	bits_rs_total)
-	__u64_field(12, DRBD_GENLA_F_MANDATORY,	bits_rs_failed)
+	__u64_field(11, 0,	bits_rs_total)
+	__u64_field(12, 0,	bits_rs_failed)
 
 	/* for pre and post notifications of helper execution */
-	__str_field(13, DRBD_GENLA_F_MANDATORY,	helper, 32)
-	__u32_field(14, DRBD_GENLA_F_MANDATORY,	helper_exit_code)
+	__str_field(13, 0,	helper, 32)
+	__u32_field(14, 0,	helper_exit_code)
 
 	__u64_field(15,                      0, send_cnt)
 	__u64_field(16,                      0, recv_cnt)
@@ -233,12 +233,12 @@ GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
 )
 
 GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
-	__u64_field(1, DRBD_GENLA_F_MANDATORY,	ov_start_sector)
-	__u64_field(2, DRBD_GENLA_F_MANDATORY,	ov_stop_sector)
+	__u64_field(1, 0,	ov_start_sector)
+	__u64_field(2, 0,	ov_stop_sector)
 )
 
 GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms,
-	__flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm)
+	__flg_field(1, 0, clear_bm)
 )
 
 GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms,
@@ -246,11 +246,11 @@ GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms,
 )
 
 GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms,
-	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_disconnect)
+	__flg_field(1, 0,	force_disconnect)
 )
 
 GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms,
-	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_detach)
+	__flg_field(1, 0,	force_detach)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_INFO, 15, resource_info,
@@ -315,12 +315,12 @@ GENL_struct(DRBD_NLA_PEER_DEVICE_STATISTICS, 22, peer_device_statistics,
 )
 
 GENL_struct(DRBD_NLA_NOTIFICATION_HEADER, 23, drbd_notification_header,
-	__u32_field(1, DRBD_GENLA_F_MANDATORY, nh_type)
+	__u32_field(1, 0, nh_type)
 )
 
 GENL_struct(DRBD_NLA_HELPER, 24, drbd_helper_info,
-	__str_field(1, DRBD_GENLA_F_MANDATORY, helper_name, 32)
-	__u32_field(2, DRBD_GENLA_F_MANDATORY, helper_status)
+	__str_field(1, 0, helper_name, 32)
+	__u32_field(2, 0, helper_status)
 )
 
 /*
@@ -333,9 +333,9 @@ GENL_notification(
 	DRBD_EVENT, 1, events,
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
 	GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY)
-	GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY)
-	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, 0)
+	GENL_tla_expected(DRBD_NLA_DISK_CONF, 0)
+	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, 0)
 )
 
 	/* query kernel for specific or all info */
@@ -349,7 +349,7 @@ GENL_op(
 	),
 	/* To select the object .doit.
 	 * Or a subset of objects in .dumpit. */
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, 0)
 )
 
 	/* add DRBD minor devices as volumes to resources */
@@ -367,7 +367,7 @@ GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource),
 GENL_op(DRBD_ADM_RESOURCE_OPTS, 9,
 	GENL_doit(drbd_adm_resource_opts),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, 0)
 )
 
 GENL_op(
@@ -403,7 +403,7 @@ GENL_op(
 	DRBD_ADM_RESIZE, 13,
 	GENL_doit(drbd_adm_resize),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, 0)
 )
 
 GENL_op(
@@ -424,18 +424,18 @@ GENL_op(
 	DRBD_ADM_NEW_C_UUID, 16,
 	GENL_doit(drbd_adm_new_c_uuid),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, 0)
 )
 
 GENL_op(
 	DRBD_ADM_START_OV, 17,
 	GENL_doit(drbd_adm_start_ov),
-	GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_START_OV_PARMS, 0)
 )
 
 GENL_op(DRBD_ADM_DETACH,	18, GENL_doit(drbd_adm_detach),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY))
+	GENL_tla_expected(DRBD_NLA_DETACH_PARMS, 0))
 
 GENL_op(DRBD_ADM_INVALIDATE,	19, GENL_doit(drbd_adm_invalidate),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
@@ -460,36 +460,36 @@ GENL_op(DRBD_ADM_GET_RESOURCES, 30,
 	 GENL_op_init(
 		 .dumpit = drbd_adm_dump_resources,
 	 ),
-	 GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
-	 GENL_tla_expected(DRBD_NLA_RESOURCE_INFO, DRBD_GENLA_F_MANDATORY)
-	 GENL_tla_expected(DRBD_NLA_RESOURCE_STATISTICS, DRBD_GENLA_F_MANDATORY))
+	 GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, 0)
+	 GENL_tla_expected(DRBD_NLA_RESOURCE_INFO, 0)
+	 GENL_tla_expected(DRBD_NLA_RESOURCE_STATISTICS, 0))
 
 GENL_op(DRBD_ADM_GET_DEVICES, 31,
 	 GENL_op_init(
 		 .dumpit = drbd_adm_dump_devices,
 		 .done = drbd_adm_dump_devices_done,
 	 ),
-	 GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
-	 GENL_tla_expected(DRBD_NLA_DEVICE_INFO, DRBD_GENLA_F_MANDATORY)
-	 GENL_tla_expected(DRBD_NLA_DEVICE_STATISTICS, DRBD_GENLA_F_MANDATORY))
+	 GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, 0)
+	 GENL_tla_expected(DRBD_NLA_DEVICE_INFO, 0)
+	 GENL_tla_expected(DRBD_NLA_DEVICE_STATISTICS, 0))
 
 GENL_op(DRBD_ADM_GET_CONNECTIONS, 32,
 	 GENL_op_init(
 		 .dumpit = drbd_adm_dump_connections,
 		 .done = drbd_adm_dump_connections_done,
 	 ),
-	 GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
-	 GENL_tla_expected(DRBD_NLA_CONNECTION_INFO, DRBD_GENLA_F_MANDATORY)
-	 GENL_tla_expected(DRBD_NLA_CONNECTION_STATISTICS, DRBD_GENLA_F_MANDATORY))
+	 GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, 0)
+	 GENL_tla_expected(DRBD_NLA_CONNECTION_INFO, 0)
+	 GENL_tla_expected(DRBD_NLA_CONNECTION_STATISTICS, 0))
 
 GENL_op(DRBD_ADM_GET_PEER_DEVICES, 33,
 	 GENL_op_init(
 		 .dumpit = drbd_adm_dump_peer_devices,
 		 .done = drbd_adm_dump_peer_devices_done,
 	 ),
-	 GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
-	 GENL_tla_expected(DRBD_NLA_PEER_DEVICE_INFO, DRBD_GENLA_F_MANDATORY)
-	 GENL_tla_expected(DRBD_NLA_PEER_DEVICE_STATISTICS, DRBD_GENLA_F_MANDATORY))
+	 GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, 0)
+	 GENL_tla_expected(DRBD_NLA_PEER_DEVICE_INFO, 0)
+	 GENL_tla_expected(DRBD_NLA_PEER_DEVICE_STATISTICS, 0))
 
 GENL_notification(
 	DRBD_RESOURCE_STATE, 34, events,
@@ -524,7 +524,7 @@ GENL_op(
 	GENL_op_init(
 	        .dumpit = drbd_adm_get_initial_state,
 	),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, 0))
 
 GENL_notification(
 	DRBD_HELPER, 40, events,
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 6edcac85155e..a7d36c9ea924 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -149,7 +149,8 @@ static int __ ## s_name ## _from_attrs(struct s_name *s,		\
 	if (!tla)							\
 		return -ENOMSG;						\
 	DPRINT_TLA(#s_name, "<=-", #tag_name);				\
-	err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy);	\
+	err = nla_parse_nested_deprecated(ntb, maxtype, tla,			\
+					  s_name ## _nl_policy, NULL);	\
 	if (err)							\
 		return err;						\
 									\
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index 621b87a87d74..2200cedd160a 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -25,16 +25,6 @@ extern void CONCATENATE(GENL_MAGIC_FAMILY, _genl_unregister)(void);
  * Extension of genl attribute validation policies			{{{2
  */
 
-/*
- * @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not
- * know about.  This flag can be set in nlattr->nla_type to indicate that this
- * attribute must not be ignored.
- *
- * We check and remove this flag in drbd_nla_check_mandatory() before
- * validating the attribute types and lengths via nla_parse_nested().
- */
-#define DRBD_GENLA_F_MANDATORY (1 << 14)
-
 /*
  * Flags specific to drbd and not visible at the netlink layer, used in
  * <struct>_from_attrs and <struct>_to_skb:
@@ -52,7 +42,6 @@ extern void CONCATENATE(GENL_MAGIC_FAMILY, _genl_unregister)(void);
 #define DRBD_F_SENSITIVE (1 << 1)
 #define DRBD_F_INVARIANT (1 << 2)
 
-#define __nla_type(x)	((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY))
 
 /*									}}}1
  * MAGIC
@@ -158,12 +147,12 @@ enum {								\
 #undef __field
 #define __field(attr_nr, attr_flag, name, nla_type, type,	\
 		__get, __put, __is_signed)			\
-	T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
+	T_ ## name = (__u16)(attr_nr),
 
 #undef __array
 #define __array(attr_nr, attr_flag, name, nla_type, type,	\
 		maxlen, __get, __put, __is_signed)		\
-	T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
+	T_ ## name = (__u16)(attr_nr),
 
 #include GENL_MAGIC_INCLUDE_FILE
 
-- 
cgit v1.2.3


From a4c6c18e93a1d24df9ab95794cef471c89daefe4 Mon Sep 17 00:00:00 2001
From: Huisong Li <lihuisong@huawei.com>
Date: Tue, 7 Apr 2026 16:11:40 +0800
Subject: cpuidle: Extract and export no-lock variants of
 cpuidle_unregister_device()

The cpuidle_unregister_device() function always acquires the internal
cpuidle_lock (or pause/resume idle) during their execution.

However, in some power notification scenarios (e.g., when old idle
states may become unavailable), it is necessary to efficiently disable
cpuidle first, then remove and re-create all cpuidle devices for all
CPUs. To avoid frequent lock overhead and ensure atomicity across the
entire batch operation, the caller needs to hold the cpuidle_lock once
outside the loop.

To address this, extract the core logic into the new function
cpuidle_unregister_device_no_lock() and export it.

Signed-off-by: Huisong Li <lihuisong@huawei.com>
[ rjw: Added missing "inline", subject and changelog tweaks ]
Link: https://patch.msgid.link/20260407081141.2493581-2-lihuisong@huawei.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.c | 22 +++++++++++++++-------
 include/linux/cpuidle.h   |  2 ++
 2 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index c7876e9e024f..1a55542efead 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -714,16 +714,12 @@ out_unregister:
 
 EXPORT_SYMBOL_GPL(cpuidle_register_device);
 
-/**
- * cpuidle_unregister_device - unregisters a CPU's idle PM feature
- * @dev: the cpu
- */
-void cpuidle_unregister_device(struct cpuidle_device *dev)
+void cpuidle_unregister_device_no_lock(struct cpuidle_device *dev)
 {
 	if (!dev || dev->registered == 0)
 		return;
 
-	cpuidle_pause_and_lock();
+	lockdep_assert_held(&cpuidle_lock);
 
 	cpuidle_disable_device(dev);
 
@@ -732,10 +728,22 @@ void cpuidle_unregister_device(struct cpuidle_device *dev)
 	__cpuidle_unregister_device(dev);
 
 	cpuidle_coupled_unregister_device(dev);
+}
+EXPORT_SYMBOL_GPL(cpuidle_unregister_device_no_lock);
+
+/**
+ * cpuidle_unregister_device - unregisters a CPU's idle PM feature
+ * @dev: the cpu
+ */
+void cpuidle_unregister_device(struct cpuidle_device *dev)
+{
+	if (!dev || dev->registered == 0)
+		return;
 
+	cpuidle_pause_and_lock();
+	cpuidle_unregister_device_no_lock(dev);
 	cpuidle_resume_and_unlock();
 }
-
 EXPORT_SYMBOL_GPL(cpuidle_unregister_device);
 
 /**
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 4073690504a7..a2485348def3 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -188,6 +188,7 @@ extern void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx,
 extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
 extern int cpuidle_register_device(struct cpuidle_device *dev);
 extern void cpuidle_unregister_device(struct cpuidle_device *dev);
+extern void cpuidle_unregister_device_no_lock(struct cpuidle_device *dev);
 extern int cpuidle_register(struct cpuidle_driver *drv,
 			    const struct cpumask *const coupled_cpus);
 extern void cpuidle_unregister(struct cpuidle_driver *drv);
@@ -226,6 +227,7 @@ static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { }
 static inline int cpuidle_register_device(struct cpuidle_device *dev)
 {return -ENODEV; }
 static inline void cpuidle_unregister_device(struct cpuidle_device *dev) { }
+static inline void cpuidle_unregister_device_no_lock(struct cpuidle_device *dev) {}
 static inline int cpuidle_register(struct cpuidle_driver *drv,
 				   const struct cpumask *const coupled_cpus)
 {return -ENODEV; }
-- 
cgit v1.2.3


From 8ea6b92faebe4bad0e271cb9a8d819b8955ed476 Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Thu, 26 Mar 2026 12:14:32 +0200
Subject: wifi: ieee80211: add more NAN definitions

These will be needed to implement NAN synchronization in mac80211_hwsim.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20260326121156.ebb52db4c1eb.Ie8142cf92fc8c97c744a7c8b0a94ce3da6ff75ec@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211-nan.h | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/ieee80211.h     |  1 +
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211-nan.h b/include/linux/ieee80211-nan.h
index ebf28ea651f9..455033955e54 100644
--- a/include/linux/ieee80211-nan.h
+++ b/include/linux/ieee80211-nan.h
@@ -37,4 +37,41 @@
 #define NAN_DEV_CAPA_NDPE_SUPPORTED		0x08
 #define NAN_DEV_CAPA_S3_SUPPORTED		0x10
 
+/* NAN attributes, as defined in Wi-Fi Aware (TM) specification 4.0 Table 42 */
+#define NAN_ATTR_MASTER_INDICATION		0x00
+#define NAN_ATTR_CLUSTER_INFO			0x01
+
+struct ieee80211_nan_attr {
+	u8 attr;
+	__le16 length;
+	u8 data[];
+} __packed;
+
+struct ieee80211_nan_master_indication {
+	u8 master_pref;
+	u8 random_factor;
+} __packed;
+
+struct ieee80211_nan_anchor_master_info {
+	union {
+		__le64 master_rank;
+		struct {
+			u8 master_addr[ETH_ALEN];
+			u8 random_factor;
+			u8 master_pref;
+		} __packed;
+	} __packed;
+	u8 hop_count;
+	__le32 ambtt;
+} __packed;
+
+#define for_each_nan_attr(_attr, _data, _datalen)			\
+	for (_attr = (const struct ieee80211_nan_attr *)(_data);	\
+	     (const u8 *)(_data) + (_datalen) - (const u8 *)_attr >=	\
+		(int)sizeof(*_attr) &&					\
+	     (const u8 *)(_data) + (_datalen) - (const u8 *)_attr >=	\
+		(int)sizeof(*_attr) + le16_to_cpu(_attr->length);	\
+	     _attr = (const struct ieee80211_nan_attr *)		\
+		(_attr->data + le16_to_cpu(_attr->length)))
+
 #endif /* LINUX_IEEE80211_NAN_H */
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index b5d649db123f..ffa8f9f77efe 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2240,6 +2240,7 @@ struct ieee80211_multiple_bssid_configuration {
 
 #define WLAN_OUI_WFA			0x506f9a
 #define WLAN_OUI_TYPE_WFA_P2P		9
+#define WLAN_OUI_TYPE_WFA_NAN		0x13
 #define WLAN_OUI_TYPE_WFA_DPP		0x1A
 #define WLAN_OUI_MICROSOFT		0x0050f2
 #define WLAN_OUI_TYPE_MICROSOFT_WPA	1
-- 
cgit v1.2.3


From 6fa747550e35f0a74e649b19d97055988a25b2e4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 7 Apr 2026 16:05:26 +0200
Subject: block: factor out a bio_await helper

Add a new helper to wait for a bio and anything chained off it to
complete synchronously after submitting it.  This factors common code out
of submit_bio_wait and bio_await_chain and will also be useful for
file system code and thus is exported.

Note that this will now set REQ_SYNC also for the bio_await case for
consistency.  Nothing should look at the flag in the end_io handler,
but if something does having the flag set makes more sense.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://patch.msgid.link/20260407140538.633364-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 53 +++++++++++++++++++++++++++++++++++++----------------
 include/linux/bio.h |  2 ++
 2 files changed, 39 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 434e41182c05..61d65c544bcc 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1468,17 +1468,20 @@ static void bio_wait_end_io(struct bio *bio)
 }
 
 /**
- * submit_bio_wait - submit a bio, and wait until it completes
- * @bio: The &struct bio which describes the I/O
+ * bio_await - call a function on a bio, and wait until it completes
+ * @bio:	the bio which describes the I/O
+ * @submit:	function called to submit the bio
+ * @priv:	private data passed to @submit
  *
- * Simple wrapper around submit_bio(). Returns 0 on success, or the error from
- * bio_endio() on failure.
+ * Wait for the bio as well as any bio chained off it after executing the
+ * passed in callback @submit.  The wait for the bio is set up before calling
+ * @submit to ensure that the completion is captured.  If @submit is %NULL,
+ * submit_bio() is used instead to submit the bio.
  *
- * WARNING: Unlike to how submit_bio() is usually used, this function does not
- * result in bio reference to be consumed. The caller must drop the reference
- * on his own.
+ * Note: this overrides the bi_private and bi_end_io fields in the bio.
  */
-int submit_bio_wait(struct bio *bio)
+void bio_await(struct bio *bio, void *priv,
+	       void (*submit)(struct bio *bio, void *priv))
 {
 	DECLARE_COMPLETION_ONSTACK_MAP(done,
 			bio->bi_bdev->bd_disk->lockdep_map);
@@ -1486,13 +1489,37 @@ int submit_bio_wait(struct bio *bio)
 	bio->bi_private = &done;
 	bio->bi_end_io = bio_wait_end_io;
 	bio->bi_opf |= REQ_SYNC;
-	submit_bio(bio);
+	if (submit)
+		submit(bio, priv);
+	else
+		submit_bio(bio);
 	blk_wait_io(&done);
+}
+EXPORT_SYMBOL_GPL(bio_await);
 
+/**
+ * submit_bio_wait - submit a bio, and wait until it completes
+ * @bio: The &struct bio which describes the I/O
+ *
+ * Simple wrapper around submit_bio(). Returns 0 on success, or the error from
+ * bio_endio() on failure.
+ *
+ * WARNING: Unlike to how submit_bio() is usually used, this function does not
+ * result in bio reference to be consumed. The caller must drop the reference
+ * on his own.
+ */
+int submit_bio_wait(struct bio *bio)
+{
+	bio_await(bio, NULL, NULL);
 	return blk_status_to_errno(bio->bi_status);
 }
 EXPORT_SYMBOL(submit_bio_wait);
 
+static void bio_endio_cb(struct bio *bio, void *priv)
+{
+	bio_endio(bio);
+}
+
 /**
  * bdev_rw_virt - synchronously read into / write from kernel mapping
  * @bdev:	block device to access
@@ -1528,13 +1555,7 @@ EXPORT_SYMBOL_GPL(bdev_rw_virt);
  */
 void bio_await_chain(struct bio *bio)
 {
-	DECLARE_COMPLETION_ONSTACK_MAP(done,
-			bio->bi_bdev->bd_disk->lockdep_map);
-
-	bio->bi_private = &done;
-	bio->bi_end_io = bio_wait_end_io;
-	bio_endio(bio);
-	blk_wait_io(&done);
+	bio_await(bio, NULL, bio_endio_cb);
 	bio_put(bio);
 }
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 984844d2870b..97d747320b35 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -432,6 +432,8 @@ extern void bio_uninit(struct bio *);
 void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf);
 void bio_reuse(struct bio *bio, blk_opf_t opf);
 void bio_chain(struct bio *, struct bio *);
+void bio_await(struct bio *bio, void *priv,
+	       void (*submit)(struct bio *bio, void *priv));
 
 int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len,
 			      unsigned off);
-- 
cgit v1.2.3


From c713b96427ce5c4a74b8babe14137451ac3ffe54 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 7 Apr 2026 11:23:13 +0200
Subject: vt: Implement helpers for struct vc_font in source file

Move the helpers vc_font_pitch() and vc_font_size() from the VT
header file into source file. They are not called very often, so
there's no benefit in keeping them in the headers. Also avoids
including <linux/math.h> from the header.

v2:
- fix typo in commit description

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/tty/vt/vt.c            | 35 +++++++++++++++++++++++++++++++++++
 include/linux/console_struct.h | 30 ++----------------------------
 2 files changed, 37 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index c1f152d8b03b..3c90856488cd 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -71,6 +71,7 @@
  * by Adam Tla/lka <atlka@pg.gda.pl>, Aug 2006
  */
 
+#include <linux/math.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/sched/signal.h>
@@ -230,6 +231,40 @@ enum {
 	blank_vesa_wait,
 };
 
+/*
+ * struct vc_font
+ */
+
+/**
+ * vc_font_pitch - Calculates the number of bytes between two adjacent scanlines
+ * @font: The VC font
+ *
+ * Returns:
+ * The number of bytes between two adjacent scanlines in the font data
+ */
+unsigned int vc_font_pitch(const struct vc_font *font)
+{
+	return DIV_ROUND_UP(font->width, 8);
+}
+EXPORT_SYMBOL_GPL(vc_font_pitch);
+
+/**
+ * vc_font_size - Calculates the size of the font data in bytes
+ * @font: The VC font
+ *
+ * vc_font_size() calculates the number of bytes of font data in the
+ * font specified by @font. The function calculates the size from the
+ * font parameters.
+ *
+ * Returns:
+ * The size of the font data in bytes.
+ */
+unsigned int vc_font_size(const struct vc_font *font)
+{
+	return font->height * vc_font_pitch(font) * font->charcount;
+}
+EXPORT_SYMBOL_GPL(vc_font_size);
+
 /*
  * /sys/class/tty/tty0/
  *
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index 771cba16cb54..fe4733c1ae4c 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -13,7 +13,6 @@
 #ifndef _LINUX_CONSOLE_STRUCT_H
 #define _LINUX_CONSOLE_STRUCT_H
 
-#include <linux/math.h>
 #include <linux/vt.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
@@ -83,33 +82,8 @@ struct vc_font {
 	const unsigned char *data;
 };
 
-/**
- * vc_font_pitch - Calculates the number of bytes between two adjacent scanlines
- * @font: The VC font
- *
- * Returns:
- * The number of bytes between two adjacent scanlines in the font data
- */
-static inline unsigned int vc_font_pitch(const struct vc_font *font)
-{
-	return DIV_ROUND_UP(font->width, 8);
-}
-
-/**
- * vc_font_size - Calculates the size of the font data in bytes
- * @font: The VC font
- *
- * vc_font_size() calculates the number of bytes of font data in the
- * font specified by @font. The function calculates the size from the
- * font parameters.
- *
- * Returns:
- * The size of the font data in bytes.
- */
-static inline unsigned int vc_font_size(const struct vc_font *font)
-{
-	return font->height * vc_font_pitch(font) * font->charcount;
-}
+unsigned int vc_font_pitch(const struct vc_font *font);
+unsigned int vc_font_size(const struct vc_font *font);
 
 /*
  * Example: vc_data of a console that was scrolled 3 lines down.
-- 
cgit v1.2.3


From 97df8960240afc47c2349d008b0993e7727bbda5 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 7 Apr 2026 11:23:14 +0200
Subject: lib/fonts: Provide helpers for calculating glyph pitch and size

Implement pitch and size calculation for a single font glyph in the
new helpers font_glyph_pitch() and font_glyph_size(). Replace the
instances where the calculations are open-coded.

Note that in the case of fbcon console rotation, the parameters for
a glyph's width and height might be reversed. This is intentional.

v2:
- fix typos in commit message

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/tty/vt/vt.c                     |  5 ++---
 drivers/video/fbdev/core/fbcon_ccw.c    | 11 ++++-----
 drivers/video/fbdev/core/fbcon_cw.c     | 11 ++++-----
 drivers/video/fbdev/core/fbcon_rotate.c |  6 ++---
 drivers/video/fbdev/core/fbcon_ud.c     |  7 +++---
 include/linux/font.h                    | 40 +++++++++++++++++++++++++++++++++
 lib/fonts/fonts.c                       |  2 +-
 7 files changed, 61 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 3c90856488cd..5c9fec4f99a0 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -71,7 +71,6 @@
  * by Adam Tla/lka <atlka@pg.gda.pl>, Aug 2006
  */
 
-#include <linux/math.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/sched/signal.h>
@@ -244,7 +243,7 @@ enum {
  */
 unsigned int vc_font_pitch(const struct vc_font *font)
 {
-	return DIV_ROUND_UP(font->width, 8);
+	return font_glyph_pitch(font->width);
 }
 EXPORT_SYMBOL_GPL(vc_font_pitch);
 
@@ -261,7 +260,7 @@ EXPORT_SYMBOL_GPL(vc_font_pitch);
  */
 unsigned int vc_font_size(const struct vc_font *font)
 {
-	return font->height * vc_font_pitch(font) * font->charcount;
+	return font_glyph_size(font->width, font->height) * font->charcount;
 }
 EXPORT_SYMBOL_GPL(vc_font_size);
 
diff --git a/drivers/video/fbdev/core/fbcon_ccw.c b/drivers/video/fbdev/core/fbcon_ccw.c
index 2f394b5a17f7..96ef449ee6ac 100644
--- a/drivers/video/fbdev/core/fbcon_ccw.c
+++ b/drivers/video/fbdev/core/fbcon_ccw.c
@@ -26,7 +26,7 @@ static void ccw_update_attr(u8 *dst, u8 *src, int attribute,
 				  struct vc_data *vc)
 {
 	int i, j, offset = (vc->vc_font.height < 10) ? 1 : 2;
-	int width = (vc->vc_font.height + 7) >> 3;
+	int width = font_glyph_pitch(vc->vc_font.height);
 	int mod = vc->vc_font.height % 8;
 	u8 c, msk = ~(0xff << offset), msk1 = 0;
 
@@ -101,7 +101,7 @@ static inline void ccw_putcs_aligned(struct vc_data *vc, struct fb_info *info,
 {
 	struct fbcon_par *par = info->fbcon_par;
 	u16 charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff;
-	u32 idx = (vc->vc_font.height + 7) >> 3;
+	u32 idx = font_glyph_pitch(vc->vc_font.height);
 	u8 *src;
 
 	while (cnt--) {
@@ -131,7 +131,7 @@ static void ccw_putcs(struct vc_data *vc, struct fb_info *info,
 {
 	struct fb_image image;
 	struct fbcon_par *par = info->fbcon_par;
-	u32 width = (vc->vc_font.height + 7)/8;
+	u32 width = font_glyph_pitch(vc->vc_font.height);
 	u32 cellsize = width * vc->vc_font.width;
 	u32 maxcnt = info->pixmap.size/cellsize;
 	u32 scan_align = info->pixmap.scan_align - 1;
@@ -223,7 +223,8 @@ static void ccw_cursor(struct vc_data *vc, struct fb_info *info, bool enable,
 	struct fb_cursor cursor;
 	struct fbcon_par *par = info->fbcon_par;
 	unsigned short charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff;
-	int w = (vc->vc_font.height + 7) >> 3, c;
+	int w = font_glyph_pitch(vc->vc_font.height);
+	int c;
 	int y = real_y(par->p, vc->state.y);
 	int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
 	int err = 1, dx, dy;
@@ -297,7 +298,7 @@ static void ccw_cursor(struct vc_data *vc, struct fb_info *info, bool enable,
 		char *tmp, *mask = kmalloc_array(w, vc->vc_font.width,
 						 GFP_ATOMIC);
 		int cur_height, size, i = 0;
-		int width = (vc->vc_font.width + 7)/8;
+		int width = font_glyph_pitch(vc->vc_font.width);
 
 		if (!mask)
 			return;
diff --git a/drivers/video/fbdev/core/fbcon_cw.c b/drivers/video/fbdev/core/fbcon_cw.c
index 3c3ad3471ec4..ea712654edae 100644
--- a/drivers/video/fbdev/core/fbcon_cw.c
+++ b/drivers/video/fbdev/core/fbcon_cw.c
@@ -26,7 +26,7 @@ static void cw_update_attr(u8 *dst, u8 *src, int attribute,
 				  struct vc_data *vc)
 {
 	int i, j, offset = (vc->vc_font.height < 10) ? 1 : 2;
-	int width = (vc->vc_font.height + 7) >> 3;
+	int width = font_glyph_pitch(vc->vc_font.height);
 	u8 c, msk = ~(0xff >> offset);
 
 	for (i = 0; i < vc->vc_font.width; i++) {
@@ -86,7 +86,7 @@ static inline void cw_putcs_aligned(struct vc_data *vc, struct fb_info *info,
 {
 	struct fbcon_par *par = info->fbcon_par;
 	u16 charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff;
-	u32 idx = (vc->vc_font.height + 7) >> 3;
+	u32 idx = font_glyph_pitch(vc->vc_font.height);
 	u8 *src;
 
 	while (cnt--) {
@@ -116,7 +116,7 @@ static void cw_putcs(struct vc_data *vc, struct fb_info *info,
 {
 	struct fb_image image;
 	struct fbcon_par *par = info->fbcon_par;
-	u32 width = (vc->vc_font.height + 7)/8;
+	u32 width = font_glyph_pitch(vc->vc_font.height);
 	u32 cellsize = width * vc->vc_font.width;
 	u32 maxcnt = info->pixmap.size/cellsize;
 	u32 scan_align = info->pixmap.scan_align - 1;
@@ -206,7 +206,8 @@ static void cw_cursor(struct vc_data *vc, struct fb_info *info, bool enable,
 	struct fb_cursor cursor;
 	struct fbcon_par *par = info->fbcon_par;
 	unsigned short charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff;
-	int w = (vc->vc_font.height + 7) >> 3, c;
+	int w = font_glyph_pitch(vc->vc_font.height);
+	int c;
 	int y = real_y(par->p, vc->state.y);
 	int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
 	int err = 1, dx, dy;
@@ -280,7 +281,7 @@ static void cw_cursor(struct vc_data *vc, struct fb_info *info, bool enable,
 		char *tmp, *mask = kmalloc_array(w, vc->vc_font.width,
 						 GFP_ATOMIC);
 		int cur_height, size, i = 0;
-		int width = (vc->vc_font.width + 7)/8;
+		int width = font_glyph_pitch(vc->vc_font.width);
 
 		if (!mask)
 			return;
diff --git a/drivers/video/fbdev/core/fbcon_rotate.c b/drivers/video/fbdev/core/fbcon_rotate.c
index 5348f6c6f57c..18575c5182db 100644
--- a/drivers/video/fbdev/core/fbcon_rotate.c
+++ b/drivers/video/fbdev/core/fbcon_rotate.c
@@ -33,14 +33,12 @@ int fbcon_rotate_font(struct fb_info *info, struct vc_data *vc)
 	src = par->fontdata = vc->vc_font.data;
 	par->cur_rotate = par->p->con_rotate;
 	len = vc->vc_font.charcount;
-	s_cellsize = ((vc->vc_font.width + 7)/8) *
-		vc->vc_font.height;
+	s_cellsize = font_glyph_size(vc->vc_font.width, vc->vc_font.height);
 	d_cellsize = s_cellsize;
 
 	if (par->rotate == FB_ROTATE_CW ||
 	    par->rotate == FB_ROTATE_CCW)
-		d_cellsize = ((vc->vc_font.height + 7)/8) *
-			vc->vc_font.width;
+		d_cellsize = font_glyph_size(vc->vc_font.height, vc->vc_font.width);
 
 	if (info->fbops->fb_sync)
 		info->fbops->fb_sync(info);
diff --git a/drivers/video/fbdev/core/fbcon_ud.c b/drivers/video/fbdev/core/fbcon_ud.c
index 6fc30cad5b19..f7cd89c42b01 100644
--- a/drivers/video/fbdev/core/fbcon_ud.c
+++ b/drivers/video/fbdev/core/fbcon_ud.c
@@ -26,7 +26,7 @@ static void ud_update_attr(u8 *dst, u8 *src, int attribute,
 				  struct vc_data *vc)
 {
 	int i, offset = (vc->vc_font.height < 10) ? 1 : 2;
-	int width = (vc->vc_font.width + 7) >> 3;
+	int width = font_glyph_pitch(vc->vc_font.width);
 	unsigned int cellsize = vc->vc_font.height * width;
 	u8 c;
 
@@ -153,7 +153,7 @@ static void ud_putcs(struct vc_data *vc, struct fb_info *info,
 {
 	struct fb_image image;
 	struct fbcon_par *par = info->fbcon_par;
-	u32 width = (vc->vc_font.width + 7)/8;
+	u32 width = font_glyph_pitch(vc->vc_font.width);
 	u32 cellsize = width * vc->vc_font.height;
 	u32 maxcnt = info->pixmap.size/cellsize;
 	u32 scan_align = info->pixmap.scan_align - 1;
@@ -253,7 +253,8 @@ static void ud_cursor(struct vc_data *vc, struct fb_info *info, bool enable,
 	struct fb_cursor cursor;
 	struct fbcon_par *par = info->fbcon_par;
 	unsigned short charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff;
-	int w = (vc->vc_font.width + 7) >> 3, c;
+	int w = font_glyph_pitch(vc->vc_font.width);
+	int c;
 	int y = real_y(par->p, vc->state.y);
 	int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
 	int err = 1, dx, dy;
diff --git a/include/linux/font.h b/include/linux/font.h
index 5401f07dd6ce..3bd49d914b22 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -11,10 +11,50 @@
 #ifndef _VIDEO_FONT_H
 #define _VIDEO_FONT_H
 
+#include <linux/math.h>
 #include <linux/types.h>
 
 struct console_font;
 
+/*
+ * Glyphs
+ */
+
+/**
+ * font_glyph_pitch - Calculates the number of bytes per scanline
+ * @width: The glyph width in bits per scanline
+ *
+ * A glyph's pitch is the number of bytes in a single scanline, rounded
+ * up to the next full byte. The parameter @width receives the number
+ * of visible bits per scanline. For example, if width is 14 bytes per
+ * scanline, the pitch is 2 bytes per scanline. If width is 8 bits per
+ * scanline, the pitch is 1 byte per scanline.
+ *
+ * Returns:
+ * The number of bytes in a single scanline of the glyph
+ */
+static inline unsigned int font_glyph_pitch(unsigned int width)
+{
+	return DIV_ROUND_UP(width, 8);
+}
+
+/**
+ * font_glyph_size - Calculates the number of bytes per glyph
+ * @width: The glyph width in bits per scanline
+ * @vpitch: The number of scanlines in the glyph
+ *
+ * The number of bytes in a glyph depends on the pitch and the number
+ * of scanlines. font_glyph_size automatically calculates the pitch
+ * from the given width. The parameter @vpitch gives the number of
+ * scanlines, which is usually the glyph's height in scanlines. Fonts
+ * coming from user space can sometimes have a different vertical pitch
+ * with empty scanlines between two adjacent glyphs.
+ */
+static inline unsigned int font_glyph_size(unsigned int width, unsigned int vpitch)
+{
+	return font_glyph_pitch(width) * vpitch;
+}
+
 /*
  * font_data_t and helpers
  */
diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c
index 5938f542906b..f5d5333450a0 100644
--- a/lib/fonts/fonts.c
+++ b/lib/fonts/fonts.c
@@ -26,7 +26,7 @@
 
 #include "font.h"
 
-#define console_font_pitch(font) DIV_ROUND_UP((font)->width, 8)
+#define console_font_pitch(font) font_glyph_pitch((font)->width)
 
 /*
  * Helpers for font_data_t
-- 
cgit v1.2.3


From bdfd943231347ce57133d1ba2d93ed87f1050e81 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 7 Apr 2026 11:23:16 +0200
Subject: lib/fonts: Implement glyph rotation

Move the glyph rotation helpers from fbcon to the font library. Wrap them
behind clean interfaces. Also clear the output memory to zero. Previously,
the implementation relied on the caller to do that.

Go through the fbcon code and callers of the glyph-rotation helpers. In
addition to the font rotation, there's also the cursor code, which uses
the rotation helpers.

The font-rotation relied on a single memset to zero for the whole font.
This is now multiple memsets on each glyph. This will be sorted out when
the font library also implements font rotation.

Building glyph rotation in the font library still depends on
CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y. If we get more users of the code,
we can still add a dedicated Kconfig symbol to the font library.

No changes have been made to the actual implementation of the rotate_*()
and pattern_*() functions. These will be refactored as separate changes.

v2:
- fix typos

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/core/fbcon_ccw.c    |   4 +-
 drivers/video/fbdev/core/fbcon_cw.c     |   4 +-
 drivers/video/fbdev/core/fbcon_rotate.c |  12 +--
 drivers/video/fbdev/core/fbcon_rotate.h |  71 ---------------
 include/linux/font.h                    |   8 ++
 lib/fonts/Makefile                      |   1 +
 lib/fonts/font_rotate.c                 | 150 ++++++++++++++++++++++++++++++++
 7 files changed, 167 insertions(+), 83 deletions(-)
 create mode 100644 lib/fonts/font_rotate.c

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/fbcon_ccw.c b/drivers/video/fbdev/core/fbcon_ccw.c
index 96ef449ee6ac..72453a2aaca8 100644
--- a/drivers/video/fbdev/core/fbcon_ccw.c
+++ b/drivers/video/fbdev/core/fbcon_ccw.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/fb.h>
+#include <linux/font.h>
 #include <linux/vt_kern.h>
 #include <linux/console.h>
 #include <asm/types.h>
@@ -344,8 +345,7 @@ static void ccw_cursor(struct vc_data *vc, struct fb_info *info, bool enable,
 		size = cur_height * width;
 		while (size--)
 			tmp[i++] = 0xff;
-		memset(mask, 0, w * vc->vc_font.width);
-		rotate_ccw(tmp, mask, vc->vc_font.width, vc->vc_font.height);
+		font_glyph_rotate_270(tmp, vc->vc_font.width, vc->vc_font.height, mask);
 		kfree(tmp);
 	}
 
diff --git a/drivers/video/fbdev/core/fbcon_cw.c b/drivers/video/fbdev/core/fbcon_cw.c
index ea712654edae..5690fc1d7854 100644
--- a/drivers/video/fbdev/core/fbcon_cw.c
+++ b/drivers/video/fbdev/core/fbcon_cw.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/fb.h>
+#include <linux/font.h>
 #include <linux/vt_kern.h>
 #include <linux/console.h>
 #include <asm/types.h>
@@ -327,8 +328,7 @@ static void cw_cursor(struct vc_data *vc, struct fb_info *info, bool enable,
 		size = cur_height * width;
 		while (size--)
 			tmp[i++] = 0xff;
-		memset(mask, 0, w * vc->vc_font.width);
-		rotate_cw(tmp, mask, vc->vc_font.width, vc->vc_font.height);
+		font_glyph_rotate_90(tmp, vc->vc_font.width, vc->vc_font.height, mask);
 		kfree(tmp);
 	}
 
diff --git a/drivers/video/fbdev/core/fbcon_rotate.c b/drivers/video/fbdev/core/fbcon_rotate.c
index 18575c5182db..588dc9d6758a 100644
--- a/drivers/video/fbdev/core/fbcon_rotate.c
+++ b/drivers/video/fbdev/core/fbcon_rotate.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/fb.h>
+#include <linux/font.h>
 #include <linux/vt_kern.h>
 #include <linux/console.h>
 #include <asm/types.h>
@@ -60,30 +61,25 @@ int fbcon_rotate_font(struct fb_info *info, struct vc_data *vc)
 	}
 
 	dst = par->fontbuffer;
-	memset(dst, 0, par->fd_size);
 
 	switch (par->rotate) {
 	case FB_ROTATE_UD:
 		for (i = len; i--; ) {
-			rotate_ud(src, dst, vc->vc_font.width,
-				  vc->vc_font.height);
-
+			font_glyph_rotate_180(src, vc->vc_font.width, vc->vc_font.height, dst);
 			src += s_cellsize;
 			dst += d_cellsize;
 		}
 		break;
 	case FB_ROTATE_CW:
 		for (i = len; i--; ) {
-			rotate_cw(src, dst, vc->vc_font.width,
-				  vc->vc_font.height);
+			font_glyph_rotate_90(src, vc->vc_font.width, vc->vc_font.height, dst);
 			src += s_cellsize;
 			dst += d_cellsize;
 		}
 		break;
 	case FB_ROTATE_CCW:
 		for (i = len; i--; ) {
-			rotate_ccw(src, dst, vc->vc_font.width,
-				   vc->vc_font.height);
+			font_glyph_rotate_270(src, vc->vc_font.width, vc->vc_font.height, dst);
 			src += s_cellsize;
 			dst += d_cellsize;
 		}
diff --git a/drivers/video/fbdev/core/fbcon_rotate.h b/drivers/video/fbdev/core/fbcon_rotate.h
index 8cb019e8a9c0..725bcae2df61 100644
--- a/drivers/video/fbdev/core/fbcon_rotate.h
+++ b/drivers/video/fbdev/core/fbcon_rotate.h
@@ -19,77 +19,6 @@
         (fb_scrollmode(s) == SCROLL_REDRAW || fb_scrollmode(s) == SCROLL_MOVE || !(i)->fix.xpanstep) ? \
         (i)->var.xres : (i)->var.xres_virtual; })
 
-
-static inline int pattern_test_bit(u32 x, u32 y, u32 pitch, const char *pat)
-{
-	u32 tmp = (y * pitch) + x, index = tmp / 8,  bit = tmp % 8;
-
-	pat +=index;
-	return (*pat) & (0x80 >> bit);
-}
-
-static inline void pattern_set_bit(u32 x, u32 y, u32 pitch, char *pat)
-{
-	u32 tmp = (y * pitch) + x, index = tmp / 8, bit = tmp % 8;
-
-	pat += index;
-
-	(*pat) |= 0x80 >> bit;
-}
-
-static inline void rotate_ud(const char *in, char *out, u32 width, u32 height)
-{
-	int i, j;
-	int shift = (8 - (width % 8)) & 7;
-
-	width = (width + 7) & ~7;
-
-	for (i = 0; i < height; i++) {
-		for (j = 0; j < width - shift; j++) {
-			if (pattern_test_bit(j, i, width, in))
-				pattern_set_bit(width - (1 + j + shift),
-						height - (1 + i),
-						width, out);
-		}
-
-	}
-}
-
-static inline void rotate_cw(const char *in, char *out, u32 width, u32 height)
-{
-	int i, j, h = height, w = width;
-	int shift = (8 - (height % 8)) & 7;
-
-	width = (width + 7) & ~7;
-	height = (height + 7) & ~7;
-
-	for (i = 0; i < h; i++) {
-		for (j = 0; j < w; j++) {
-			if (pattern_test_bit(j, i, width, in))
-				pattern_set_bit(height - 1 - i - shift, j,
-						height, out);
-
-		}
-	}
-}
-
-static inline void rotate_ccw(const char *in, char *out, u32 width, u32 height)
-{
-	int i, j, h = height, w = width;
-	int shift = (8 - (width % 8)) & 7;
-
-	width = (width + 7) & ~7;
-	height = (height + 7) & ~7;
-
-	for (i = 0; i < h; i++) {
-		for (j = 0; j < w; j++) {
-			if (pattern_test_bit(j, i, width, in))
-				pattern_set_bit(i, width - 1 - j - shift,
-						height, out);
-		}
-	}
-}
-
 int fbcon_rotate_font(struct fb_info *info, struct vc_data *vc);
 
 #if defined(CONFIG_FRAMEBUFFER_CONSOLE_ROTATION)
diff --git a/include/linux/font.h b/include/linux/font.h
index 3bd49d914b22..0a240dd70422 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -104,6 +104,14 @@ unsigned int font_data_size(font_data_t *fd);
 bool font_data_is_equal(font_data_t *lhs, font_data_t *rhs);
 int font_data_export(font_data_t *fd, struct console_font *font, unsigned int vpitch);
 
+/* font_rotate.c */
+void font_glyph_rotate_90(const unsigned char *glyph, unsigned int width, unsigned int height,
+			  unsigned char *out);
+void font_glyph_rotate_180(const unsigned char *glyph, unsigned int width, unsigned int height,
+			   unsigned char *out);
+void font_glyph_rotate_270(const unsigned char *glyph, unsigned int width, unsigned int height,
+			   unsigned char *out);
+
 /*
  * Font description
  */
diff --git a/lib/fonts/Makefile b/lib/fonts/Makefile
index b176af53d53e..7202a70a56ef 100644
--- a/lib/fonts/Makefile
+++ b/lib/fonts/Makefile
@@ -2,6 +2,7 @@
 # Font handling
 
 font-y := fonts.o
+font-$(CONFIG_FRAMEBUFFER_CONSOLE_ROTATION) += font_rotate.o
 
 # Built-in fonts; sorted by Family-Size in ascending order
 font-$(CONFIG_FONT_6x8)       += font_6x8.o
diff --git a/lib/fonts/font_rotate.c b/lib/fonts/font_rotate.c
new file mode 100644
index 000000000000..d107a8d0a2b0
--- /dev/null
+++ b/lib/fonts/font_rotate.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Font rotation
+ *
+ *    Copyright (C) 2005 Antonino Daplas <adaplas @pol.net>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive for
+ * more details.
+ */
+
+#include <linux/export.h>
+#include <linux/math.h>
+#include <linux/string.h>
+
+#include "font.h"
+
+static inline int pattern_test_bit(u32 x, u32 y, u32 pitch, const char *pat)
+{
+	u32 tmp = (y * pitch) + x, index = tmp / 8,  bit = tmp % 8;
+
+	pat += index;
+	return (*pat) & (0x80 >> bit);
+}
+
+static inline void pattern_set_bit(u32 x, u32 y, u32 pitch, char *pat)
+{
+	u32 tmp = (y * pitch) + x, index = tmp / 8, bit = tmp % 8;
+
+	pat += index;
+
+	(*pat) |= 0x80 >> bit;
+}
+
+static inline void rotate_cw(const char *in, char *out, u32 width, u32 height)
+{
+	int i, j, h = height, w = width;
+	int shift = (8 - (height % 8)) & 7;
+
+	width = (width + 7) & ~7;
+	height = (height + 7) & ~7;
+
+	for (i = 0; i < h; i++) {
+		for (j = 0; j < w; j++) {
+			if (pattern_test_bit(j, i, width, in))
+				pattern_set_bit(height - 1 - i - shift, j,
+						height, out);
+		}
+	}
+}
+
+/**
+ * font_glyph_rotate_90 - Rotate a glyph pattern by 90° in clockwise direction
+ * @glyph: The glyph to rotate
+ * @width: The glyph width in bits per scanline
+ * @height: The number of scanlines in the glyph
+ * @out: The rotated glyph bitmap
+ *
+ * The parameters @width and @height refer to the input glyph given in @glyph.
+ * The caller has to provide the output buffer @out of sufficient size to hold
+ * the rotated glyph. Rotating by 90° flips the width and height for the output
+ * glyph. Depending on the glyph pitch, the size of the output glyph can be
+ * different than the size of the input. Callers have to take this into account
+ * when allocating the output memory.
+ */
+void font_glyph_rotate_90(const unsigned char *glyph, unsigned int width, unsigned int height,
+			  unsigned char *out)
+{
+	memset(out, 0, font_glyph_size(height, width)); /* flip width/height */
+
+	rotate_cw(glyph, out, width, height);
+}
+EXPORT_SYMBOL_GPL(font_glyph_rotate_90);
+
+static inline void rotate_ud(const char *in, char *out, u32 width, u32 height)
+{
+	int i, j;
+	int shift = (8 - (width % 8)) & 7;
+
+	width = (width + 7) & ~7;
+
+	for (i = 0; i < height; i++) {
+		for (j = 0; j < width - shift; j++) {
+			if (pattern_test_bit(j, i, width, in))
+				pattern_set_bit(width - (1 + j + shift),
+						height - (1 + i),
+						width, out);
+		}
+	}
+}
+
+/**
+ * font_glyph_rotate_180 - Rotate a glyph pattern by 180°
+ * @glyph: The glyph to rotate
+ * @width: The glyph width in bits per scanline
+ * @height: The number of scanlines in the glyph
+ * @out: The rotated glyph bitmap
+ *
+ * The parameters @width and @height refer to the input glyph given in @glyph.
+ * The caller has to provide the output buffer @out of sufficient size to hold
+ * the rotated glyph.
+ */
+void font_glyph_rotate_180(const unsigned char *glyph, unsigned int width, unsigned int height,
+			   unsigned char *out)
+{
+	memset(out, 0, font_glyph_size(width, height));
+
+	rotate_ud(glyph, out, width, height);
+}
+EXPORT_SYMBOL_GPL(font_glyph_rotate_180);
+
+static inline void rotate_ccw(const char *in, char *out, u32 width, u32 height)
+{
+	int i, j, h = height, w = width;
+	int shift = (8 - (width % 8)) & 7;
+
+	width = (width + 7) & ~7;
+	height = (height + 7) & ~7;
+
+	for (i = 0; i < h; i++) {
+		for (j = 0; j < w; j++) {
+			if (pattern_test_bit(j, i, width, in))
+				pattern_set_bit(i, width - 1 - j - shift,
+						height, out);
+		}
+	}
+}
+
+/**
+ * font_glyph_rotate_270 - Rotate a glyph pattern by 270° in clockwise direction
+ * @glyph: The glyph to rotate
+ * @width: The glyph width in bits per scanline
+ * @height: The number of scanlines in the glyph
+ * @out: The rotated glyph bitmap
+ *
+ * The parameters @width and @height refer to the input glyph given in @glyph.
+ * The caller has to provide the output buffer @out of sufficient size to hold
+ * the rotated glyph. Rotating by 270° flips the width and height for the output
+ * glyph. Depending on the glyph pitch, the size of the output glyph can be
+ * different than the size of the input. Callers have to take this into account
+ * when allocating the output memory.
+ */
+void font_glyph_rotate_270(const unsigned char *glyph, unsigned int width, unsigned int height,
+			   unsigned char *out)
+{
+	memset(out, 0, font_glyph_size(height, width)); /* flip width/height */
+
+	rotate_ccw(glyph, out, width, height);
+}
+EXPORT_SYMBOL_GPL(font_glyph_rotate_270);
-- 
cgit v1.2.3


From cfa72955a029cd79433694cac6b5630788609cd4 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 7 Apr 2026 11:23:19 +0200
Subject: lib/fonts: Implement font rotation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the core of fbcon's font-rotation code to the font library as
the new helper font_data_rotate(). The code can rotate in steps of
90°. For completeness, it also copies the glyph data for multiples
of 360°.

Bring back the memset optimization. A memset to 0 again clears the
whole glyph output buffer. Then use the internal rotation helpers on
the cleared output. Fbcon's original implementation worked like this,
but lost it during refactoring.

Replace fbcon's font-rotation code with the new implementations.
All that's left to do for fbcon is to maintain its internal fbcon
state.

v2:
- fix typos

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/core/fbcon.h        |   2 +-
 drivers/video/fbdev/core/fbcon_rotate.c |  78 ++++++------------------
 include/linux/font.h                    |   3 +
 lib/fonts/font_rotate.c                 | 103 ++++++++++++++++++++++++++++++++
 4 files changed, 126 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/fbcon.h b/drivers/video/fbdev/core/fbcon.h
index 1e3c1ef84762..1793f34a6c84 100644
--- a/drivers/video/fbdev/core/fbcon.h
+++ b/drivers/video/fbdev/core/fbcon.h
@@ -86,7 +86,7 @@ struct fbcon_par {
 	const u8    *fontdata;
 	u8    *cursor_src;
 	u32    cursor_size;
-	u32    fd_size;
+	size_t fd_size;
 
 	const struct fbcon_bitops *bitops;
 };
diff --git a/drivers/video/fbdev/core/fbcon_rotate.c b/drivers/video/fbdev/core/fbcon_rotate.c
index 588dc9d6758a..74206f5a6e98 100644
--- a/drivers/video/fbdev/core/fbcon_rotate.c
+++ b/drivers/video/fbdev/core/fbcon_rotate.c
@@ -8,84 +8,44 @@
  *  more details.
  */
 
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/string.h>
+#include <linux/errno.h>
 #include <linux/fb.h>
 #include <linux/font.h>
-#include <linux/vt_kern.h>
-#include <linux/console.h>
-#include <asm/types.h>
+
 #include "fbcon.h"
 #include "fbcon_rotate.h"
 
 int fbcon_rotate_font(struct fb_info *info, struct vc_data *vc)
 {
 	struct fbcon_par *par = info->fbcon_par;
-	int len, err = 0;
-	int s_cellsize, d_cellsize, i;
-	const u8 *src;
-	u8 *dst;
+	unsigned char *fontbuffer;
+	int ret;
 
 	if (vc->vc_font.data == par->fontdata &&
 	    par->p->con_rotate == par->cur_rotate)
-		goto finished;
+		return 0;
 
-	src = par->fontdata = vc->vc_font.data;
+	par->fontdata = vc->vc_font.data;
 	par->cur_rotate = par->p->con_rotate;
-	len = vc->vc_font.charcount;
-	s_cellsize = font_glyph_size(vc->vc_font.width, vc->vc_font.height);
-	d_cellsize = s_cellsize;
-
-	if (par->rotate == FB_ROTATE_CW ||
-	    par->rotate == FB_ROTATE_CCW)
-		d_cellsize = font_glyph_size(vc->vc_font.height, vc->vc_font.width);
 
 	if (info->fbops->fb_sync)
 		info->fbops->fb_sync(info);
 
-	if (par->fd_size < d_cellsize * len) {
-		kfree(par->fontbuffer);
-		par->fontbuffer = NULL;
-		par->fd_size = 0;
-
-		dst = kmalloc_array(len, d_cellsize, GFP_KERNEL);
-
-		if (dst == NULL) {
-			err = -ENOMEM;
-			goto finished;
-		}
-
-		par->fd_size = d_cellsize * len;
-		par->fontbuffer = dst;
+	fontbuffer = font_data_rotate(par->p->fontdata, vc->vc_font.width,
+				      vc->vc_font.height, vc->vc_font.charcount,
+				      par->rotate, par->fontbuffer, &par->fd_size);
+	if (IS_ERR(fontbuffer)) {
+		ret = PTR_ERR(fontbuffer);
+		goto err_kfree;
 	}
 
-	dst = par->fontbuffer;
+	par->fontbuffer = fontbuffer;
 
-	switch (par->rotate) {
-	case FB_ROTATE_UD:
-		for (i = len; i--; ) {
-			font_glyph_rotate_180(src, vc->vc_font.width, vc->vc_font.height, dst);
-			src += s_cellsize;
-			dst += d_cellsize;
-		}
-		break;
-	case FB_ROTATE_CW:
-		for (i = len; i--; ) {
-			font_glyph_rotate_90(src, vc->vc_font.width, vc->vc_font.height, dst);
-			src += s_cellsize;
-			dst += d_cellsize;
-		}
-		break;
-	case FB_ROTATE_CCW:
-		for (i = len; i--; ) {
-			font_glyph_rotate_270(src, vc->vc_font.width, vc->vc_font.height, dst);
-			src += s_cellsize;
-			dst += d_cellsize;
-		}
-		break;
-	}
+	return 0;
+
+err_kfree:
+	kfree(par->fontbuffer);
+	par->fontbuffer = NULL; /* clear here to avoid output */
 
-finished:
-	return err;
+	return ret;
 }
diff --git a/include/linux/font.h b/include/linux/font.h
index 0a240dd70422..6845f02d739a 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -111,6 +111,9 @@ void font_glyph_rotate_180(const unsigned char *glyph, unsigned int width, unsig
 			   unsigned char *out);
 void font_glyph_rotate_270(const unsigned char *glyph, unsigned int width, unsigned int height,
 			   unsigned char *out);
+unsigned char *font_data_rotate(font_data_t *fd, unsigned int width, unsigned int height,
+				unsigned int charcount, unsigned int steps,
+				unsigned char *buf, size_t *bufsize);
 
 /*
  * Font description
diff --git a/lib/fonts/font_rotate.c b/lib/fonts/font_rotate.c
index 09f6218e036f..065e0fc0667b 100644
--- a/lib/fonts/font_rotate.c
+++ b/lib/fonts/font_rotate.c
@@ -9,8 +9,11 @@
  * more details.
  */
 
+#include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/math.h>
+#include <linux/overflow.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 
 #include "font.h"
@@ -170,3 +173,103 @@ void font_glyph_rotate_270(const unsigned char *glyph, unsigned int width, unsig
 	__font_glyph_rotate_270(glyph, width, height, out);
 }
 EXPORT_SYMBOL_GPL(font_glyph_rotate_270);
+
+/**
+ * font_data_rotate - Rotate font data by multiples of 90°
+ * @fd: The font data to rotate
+ * @width: The glyph width in bits per scanline
+ * @height: The number of scanlines in the glyph
+ * @charcount: The number of glyphs in the font
+ * @steps: Number of rotation steps of 90°
+ * @buf: Preallocated output buffer; can be NULL
+ * @bufsize: The size of @buf in bytes; can be NULL
+ *
+ * The parameters @width and @height refer to the visible number of pixels
+ * and scanlines in a single glyph. The number of glyphs is given in @charcount.
+ * Rotation happens in steps of 90°. The @steps parameter can have any value,
+ * but only 0 to 3 produce distinct results. With 4 or higher, a full rotation
+ * has been performed. You can pass any value for @steps and the helper will
+ * perform the appropriate rotation. Note that the returned buffer is not
+ * compatible with font_data_t. It only contains glyph data in the same format
+ * as returned by font_data_buf(). Callers are responsible to free the returned
+ * buffer with kfree(). Font rotation typically happens when displays get
+ * re-oriented. To avoid unnecessary re-allocation of the memory buffer, the
+ * caller can pass in an earlier result buffer in @buf for reuse. The old and
+ * new buffer sizes are given and retrieved by the caller in @bufsize. The
+ * allocation semantics are compatible with krealloc().
+ *
+ * Returns:
+ * A buffer with rotated glyphs on success, or an error pointer otherwise
+ */
+unsigned char *font_data_rotate(font_data_t *fd, unsigned int width, unsigned int height,
+				unsigned int charcount, unsigned int steps,
+				unsigned char *buf, size_t *bufsize)
+{
+	const unsigned char *src = font_data_buf(fd);
+	unsigned int s_cellsize = font_glyph_size(width, height);
+	unsigned int d_cellsize, i;
+	unsigned char *dst;
+	size_t size;
+
+	steps %= 4;
+
+	switch (steps) {
+	case 0:
+	case 2:
+		d_cellsize = s_cellsize;
+		break;
+	case 1:
+	case 3:
+		d_cellsize = font_glyph_size(height, width); /* flip width/height */
+		break;
+	}
+
+	if (check_mul_overflow(charcount, d_cellsize, &size))
+		return ERR_PTR(-EINVAL);
+
+	if (!buf || !bufsize || size > *bufsize) {
+		dst = kmalloc_array(charcount, d_cellsize, GFP_KERNEL);
+		if (!dst)
+			return ERR_PTR(-ENOMEM);
+
+		kfree(buf);
+		buf = dst;
+		if (bufsize)
+			*bufsize = size;
+	} else {
+		dst = buf;
+	}
+
+	switch (steps) {
+	case 0:
+		memcpy(dst, src, size);
+		break;
+	case 1:
+		memset(dst, 0, size);
+		for (i = 0; i < charcount; ++i) {
+			__font_glyph_rotate_90(src, width, height, dst);
+			src += s_cellsize;
+			dst += d_cellsize;
+		}
+		break;
+	case 2:
+		memset(dst, 0, size);
+		for (i = 0; i < charcount; ++i) {
+			__font_glyph_rotate_180(src, width, height, dst);
+			src += s_cellsize;
+			dst += d_cellsize;
+		}
+		break;
+	case 3:
+		memset(dst, 0, size);
+		for (i = 0; i < charcount; ++i) {
+			__font_glyph_rotate_270(src, width, height, dst);
+			src += s_cellsize;
+			dst += d_cellsize;
+		}
+		break;
+	}
+
+	return buf;
+}
+EXPORT_SYMBOL_GPL(font_data_rotate);
-- 
cgit v1.2.3


From eb25e202b3d60cdc239f14e0e5f6f7465fcc506c Mon Sep 17 00:00:00 2001
From: Justin Suess <utilityemal77@gmail.com>
Date: Fri, 27 Mar 2026 17:48:26 +0100
Subject: lsm: Add LSM hook security_unix_find
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an LSM hook security_unix_find.

This hook is called to check the path of a named UNIX socket before a
connection is initiated. The peer socket may be inspected as well.

Why existing hooks are unsuitable:

Existing socket hooks, security_unix_stream_connect(),
security_unix_may_send(), and security_socket_connect() don't provide
TOCTOU-free / namespace independent access to the paths of sockets.

(1) We cannot resolve the path from the struct sockaddr in existing hooks.
This requires another path lookup. A change in the path between the
two lookups will cause a TOCTOU bug.

(2) We cannot use the struct path from the listening socket, because it
may be bound to a path in a different namespace than the caller,
resulting in a path that cannot be referenced at policy creation time.

Consumers of the hook wishing to reference @other are responsible
for acquiring the unix_state_lock and checking for the SOCK_DEAD flag
therein, ensuring the socket hasn't died since lookup.

Cc: Günther Noack <gnoack3000@gmail.com>
Cc: Tingmao Wang <m@maowtm.org>
Cc: Mickaël Salaün <mic@digikod.net>
Cc: Paul Moore <paul@paul-moore.com>
Signed-off-by: Justin Suess <utilityemal77@gmail.com>
Signed-off-by: Günther Noack <gnoack3000@gmail.com>
Reviewed-by: Georgia Garcia <georgia.garcia@canonical.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Link: https://lore.kernel.org/r/20260327164838.38231-2-gnoack3000@gmail.com
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 include/linux/lsm_hook_defs.h |  5 +++++
 include/linux/security.h      | 11 +++++++++++
 net/unix/af_unix.c            | 10 +++++++---
 security/security.c           | 20 ++++++++++++++++++++
 4 files changed, 43 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 8c42b4bde09c..7a0fd3dbfa29 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -317,6 +317,11 @@ LSM_HOOK(int, 0, post_notification, const struct cred *w_cred,
 LSM_HOOK(int, 0, watch_key, struct key *key)
 #endif /* CONFIG_SECURITY && CONFIG_KEY_NOTIFICATIONS */
 
+#if defined(CONFIG_SECURITY_NETWORK) && defined(CONFIG_SECURITY_PATH)
+LSM_HOOK(int, 0, unix_find, const struct path *path, struct sock *other,
+	 int flags)
+#endif /* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */
+
 #ifdef CONFIG_SECURITY_NETWORK
 LSM_HOOK(int, 0, unix_stream_connect, struct sock *sock, struct sock *other,
 	 struct sock *newsk)
diff --git a/include/linux/security.h b/include/linux/security.h
index ee88dd2d2d1f..c2d665cbfcfb 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1932,6 +1932,17 @@ static inline int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
 }
 #endif	/* CONFIG_SECURITY_NETWORK */
 
+#if defined(CONFIG_SECURITY_NETWORK) && defined(CONFIG_SECURITY_PATH)
+
+int security_unix_find(const struct path *path, struct sock *other, int flags);
+
+#else /* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */
+static inline int security_unix_find(const struct path *path, struct sock *other, int flags)
+{
+	return 0;
+}
+#endif /* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */
+
 #ifdef CONFIG_SECURITY_INFINIBAND
 int security_ib_pkey_access(void *sec, u64 subnet_prefix, u16 pkey);
 int security_ib_endport_manage_subnet(void *sec, const char *name, u8 port_num);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index b23c33df8b46..a5a55a49058d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1231,11 +1231,15 @@ static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
 		goto path_put;
 
 	err = -EPROTOTYPE;
-	if (sk->sk_type == type)
-		touch_atime(&path);
-	else
+	if (sk->sk_type != type)
 		goto sock_put;
 
+	err = security_unix_find(&path, sk, flags);
+	if (err)
+		goto sock_put;
+
+	touch_atime(&path);
+
 	path_put(&path);
 
 	return sk;
diff --git a/security/security.c b/security/security.c
index a26c1474e2e4..687fc486de9d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -4732,6 +4732,26 @@ int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
 
 #endif	/* CONFIG_SECURITY_NETWORK */
 
+#if defined(CONFIG_SECURITY_NETWORK) && defined(CONFIG_SECURITY_PATH)
+/**
+ * security_unix_find() - Check if a named AF_UNIX socket can connect
+ * @path: path of the socket being connected to
+ * @other: peer sock
+ * @flags: flags associated with the socket
+ *
+ * This hook is called to check permissions before connecting to a named
+ * AF_UNIX socket. The caller does not hold any locks on @other.
+ *
+ * Return: Returns 0 if permission is granted.
+ */
+int security_unix_find(const struct path *path, struct sock *other, int flags)
+{
+	return call_int_hook(unix_find, path, other, flags);
+}
+EXPORT_SYMBOL(security_unix_find);
+
+#endif	/* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */
+
 #ifdef CONFIG_SECURITY_INFINIBAND
 /**
  * security_ib_pkey_access() - Check if access to an IB pkey is allowed
-- 
cgit v1.2.3


From 57b23c0f612dcfa1aae99c9422d6d36ced1670d4 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 7 Apr 2026 18:22:33 +0200
Subject: bpf: Retire rcu_trace_implies_rcu_gp()

RCU Tasks Trace grace period implies RCU grace period, and this
guarantee is expected to remain in the future. Only BPF is the user of
this predicate, hence retire the API and clean up all in-tree users.

RCU Tasks Trace is now implemented on SRCU-fast and its grace period
mechanism always has at least one call to synchronize_rcu() as it is
required for SRCU-fast's correctness (it replaces the smp_mb() that
SRCU-fast readers skip). So, RCU-tt GP will always imply RCU GP.

Reviewed-by: Puranjay Mohan <puranjay@kernel.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260407162234.785270-1-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/rcupdate.h | 12 ------------
 kernel/bpf/core.c        | 10 ++++------
 kernel/bpf/helpers.c     |  2 +-
 kernel/bpf/memalloc.c    | 33 ++++++++++-----------------------
 kernel/bpf/syscall.c     | 22 ++++------------------
 5 files changed, 19 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 04f3f86a4145..bfa765132de8 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -205,18 +205,6 @@ static inline void exit_tasks_rcu_start(void) { }
 static inline void exit_tasks_rcu_finish(void) { }
 #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
 
-/**
- * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period?
- *
- * As an accident of implementation, an RCU Tasks Trace grace period also
- * acts as an RCU grace period.  However, this could change at any time.
- * Code relying on this accident must call this function to verify that
- * this accident is still happening.
- *
- * You have been warned!
- */
-static inline bool rcu_trace_implies_rcu_gp(void) { return true; }
-
 /**
  * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
  *
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ee632f41bd83..89b89f55415c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2644,14 +2644,12 @@ static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu)
 {
 	struct bpf_prog_array *progs;
 
-	/* If RCU Tasks Trace grace period implies RCU grace period, there is
-	 * no need to call kfree_rcu(), just call kfree() directly.
+	/*
+	 * RCU Tasks Trace grace period implies RCU grace period, there is no
+	 * need to call kfree_rcu(), just call kfree() directly.
 	 */
 	progs = container_of(rcu, struct bpf_prog_array, rcu);
-	if (rcu_trace_implies_rcu_gp())
-		kfree(progs);
-	else
-		kfree_rcu(progs, rcu);
+	kfree(progs);
 }
 
 void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 8352b7ee0f4d..bb95e287b0dc 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1272,7 +1272,7 @@ static void bpf_async_cb_rcu_tasks_trace_free(struct rcu_head *rcu)
 		return;
 	}
 
-	/* rcu_trace_implies_rcu_gp() is true and will remain so */
+	/* RCU Tasks Trace grace period implies RCU grace period. */
 	bpf_async_cb_rcu_free(rcu);
 }
 
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 682a9f34214b..e9662db7198f 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -284,17 +284,6 @@ static void __free_rcu(struct rcu_head *head)
 	atomic_set(&c->call_rcu_ttrace_in_progress, 0);
 }
 
-static void __free_rcu_tasks_trace(struct rcu_head *head)
-{
-	/* If RCU Tasks Trace grace period implies RCU grace period,
-	 * there is no need to invoke call_rcu().
-	 */
-	if (rcu_trace_implies_rcu_gp())
-		__free_rcu(head);
-	else
-		call_rcu(head, __free_rcu);
-}
-
 static void enque_to_free(struct bpf_mem_cache *c, void *obj)
 {
 	struct llist_node *llnode = obj;
@@ -326,12 +315,12 @@ static void do_call_rcu_ttrace(struct bpf_mem_cache *c)
 		return;
 	}
 
-	/* Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
-	 * If RCU Tasks Trace grace period implies RCU grace period, free
-	 * these elements directly, else use call_rcu() to wait for normal
-	 * progs to finish and finally do free_one() on each element.
+	/*
+	 * Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
+	 * RCU Tasks Trace grace period implies RCU grace period, so pass
+	 * __free_rcu directly as the callback.
 	 */
-	call_rcu_tasks_trace(&c->rcu_ttrace, __free_rcu_tasks_trace);
+	call_rcu_tasks_trace(&c->rcu_ttrace, __free_rcu);
 }
 
 static void free_bulk(struct bpf_mem_cache *c)
@@ -696,20 +685,18 @@ static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma)
 
 static void free_mem_alloc(struct bpf_mem_alloc *ma)
 {
-	/* waiting_for_gp[_ttrace] lists were drained, but RCU callbacks
+	/*
+	 * waiting_for_gp[_ttrace] lists were drained, but RCU callbacks
 	 * might still execute. Wait for them.
 	 *
 	 * rcu_barrier_tasks_trace() doesn't imply synchronize_rcu_tasks_trace(),
 	 * but rcu_barrier_tasks_trace() and rcu_barrier() below are only used
-	 * to wait for the pending __free_rcu_tasks_trace() and __free_rcu(),
-	 * so if call_rcu(head, __free_rcu) is skipped due to
-	 * rcu_trace_implies_rcu_gp(), it will be OK to skip rcu_barrier() by
-	 * using rcu_trace_implies_rcu_gp() as well.
+	 * to wait for the pending __free_by_rcu(), and __free_rcu(). RCU Tasks
+	 * Trace grace period implies RCU grace period, so all __free_rcu don't
+	 * need extra call_rcu() (and thus extra rcu_barrier() here).
 	 */
 	rcu_barrier(); /* wait for __free_by_rcu */
 	rcu_barrier_tasks_trace(); /* wait for __free_rcu */
-	if (!rcu_trace_implies_rcu_gp())
-		rcu_barrier();
 	free_mem_alloc_no_barrier(ma);
 }
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f1044ab9b03b..b73b25c63073 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -941,14 +941,6 @@ static void bpf_map_free_rcu_gp(struct rcu_head *rcu)
 	bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu));
 }
 
-static void bpf_map_free_mult_rcu_gp(struct rcu_head *rcu)
-{
-	if (rcu_trace_implies_rcu_gp())
-		bpf_map_free_rcu_gp(rcu);
-	else
-		call_rcu(rcu, bpf_map_free_rcu_gp);
-}
-
 /* decrement map refcnt and schedule it for freeing via workqueue
  * (underlying map implementation ops->map_free() might sleep)
  */
@@ -959,8 +951,9 @@ void bpf_map_put(struct bpf_map *map)
 		bpf_map_free_id(map);
 
 		WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt));
+		/* RCU tasks trace grace period implies RCU grace period. */
 		if (READ_ONCE(map->free_after_mult_rcu_gp))
-			call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp);
+			call_rcu_tasks_trace(&map->rcu, bpf_map_free_rcu_gp);
 		else if (READ_ONCE(map->free_after_rcu_gp))
 			call_rcu(&map->rcu, bpf_map_free_rcu_gp);
 		else
@@ -3273,14 +3266,6 @@ static bool bpf_link_is_tracepoint(struct bpf_link *link)
 	       (link->type == BPF_LINK_TYPE_TRACING && link->attach_type == BPF_TRACE_RAW_TP);
 }
 
-static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
-{
-	if (rcu_trace_implies_rcu_gp())
-		bpf_link_defer_dealloc_rcu_gp(rcu);
-	else
-		call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
-}
-
 /* bpf_link_free is guaranteed to be called from process context */
 static void bpf_link_free(struct bpf_link *link)
 {
@@ -3306,7 +3291,8 @@ static void bpf_link_free(struct bpf_link *link)
 		 * faultable case, since it exclusively uses RCU Tasks Trace.
 		 */
 		if (link->sleepable || (link->prog && link->prog->sleepable))
-			call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
+			/* RCU Tasks Trace grace period implies RCU grace period. */
+			call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
 		/* We need to do a SRCU grace period wait for non-faultable tracepoint BPF links. */
 		else if (bpf_link_is_tracepoint(link))
 			call_tracepoint_unregister_atomic(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
-- 
cgit v1.2.3


From d79dc408deb6c192adbad7893ee0c22d50826511 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 3 Apr 2026 00:18:15 +0200
Subject: PCI: Remove no_pci_devices()

After having removed the last usage of no_pci_devices(), this function
can be removed.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/b0ce592d-c34c-4e0b-b389-4e346b3a0c44@gmail.com
---
 drivers/pci/probe.c | 17 -----------------
 include/linux/pci.h |  3 ---
 2 files changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index bccc7a4bdd79..19d73f6132fb 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -67,23 +67,6 @@ static struct resource *get_pci_domain_busn_res(int domain_nr)
 	return &r->res;
 }
 
-/*
- * Some device drivers need know if PCI is initiated.
- * Basically, we think PCI is not initiated when there
- * is no device to be found on the pci_bus_type.
- */
-int no_pci_devices(void)
-{
-	struct device *dev;
-	int no_devices;
-
-	dev = bus_find_next_device(&pci_bus_type, NULL);
-	no_devices = (dev == NULL);
-	put_device(dev);
-	return no_devices;
-}
-EXPORT_SYMBOL(no_pci_devices);
-
 /*
  * PCI Bus Class
  */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1c270f1d5123..482dd8460dd9 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1193,8 +1193,6 @@ extern const struct bus_type pci_bus_type;
 /* Do NOT directly access these two variables, unless you are arch-specific PCI
  * code, or PCI core code. */
 extern struct list_head pci_root_buses;	/* List of all known PCI buses */
-/* Some device drivers need know if PCI is initiated */
-int no_pci_devices(void);
 
 void pcibios_resource_survey_bus(struct pci_bus *bus);
 void pcibios_bus_add_device(struct pci_dev *pdev);
@@ -2132,7 +2130,6 @@ static inline struct pci_dev *pci_get_base_class(unsigned int class,
 static inline int pci_dev_present(const struct pci_device_id *ids)
 { return 0; }
 
-#define no_pci_devices()	(1)
 #define pci_dev_put(dev)	do { } while (0)
 
 static inline void pci_set_master(struct pci_dev *dev) { }
-- 
cgit v1.2.3


From 2232ba9c7931d5c1061f7f4e897b944ea39c3aa9 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 24 Feb 2026 12:06:18 +1000
Subject: mm: add gpu active/reclaim per-node stat counters (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While discussing memcg intergration with gpu memory allocations,
it was pointed out that there was no numa/system counters for
GPU memory allocations.

With more integrated memory GPU server systems turning up, and
more requirements for memory tracking it seems we should start
closing the gap.

Add two counters to track GPU per-node system memory allocations.

The first is currently allocated to GPU objects, and the second
is for memory that is stored in GPU page pools that can be reclaimed,
by the shrinker.

Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Zi Yan <ziy@nvidia.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 Documentation/filesystems/proc.rst | 8 ++++++++
 drivers/base/node.c                | 5 +++++
 fs/proc/meminfo.c                  | 6 ++++++
 include/linux/mmzone.h             | 2 ++
 mm/show_mem.c                      | 6 +++++-
 mm/vmstat.c                        | 2 ++
 6 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index b0c0d1b45b99..3acfdf785465 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -1089,6 +1089,8 @@ Example output. You may not have all of these fields.
     CmaFree:               0 kB
     Unaccepted:            0 kB
     Balloon:               0 kB
+    GPUActive:             0 kB
+    GPUReclaim:            0 kB
     HugePages_Total:       0
     HugePages_Free:        0
     HugePages_Rsvd:        0
@@ -1269,6 +1271,12 @@ Unaccepted
               Memory that has not been accepted by the guest
 Balloon
               Memory returned to Host by VM Balloon Drivers
+GPUActive
+              System memory allocated to active GPU objects
+GPUReclaim
+              System memory stored in GPU pools for reuse. This memory is not
+              counted in GPUActive. It is shrinker reclaimable memory kept in a reuse
+              pool because it has non-standard page table attributes, like WC or UC.
 HugePages_Total, HugePages_Free, HugePages_Rsvd, HugePages_Surp, Hugepagesize, Hugetlb
               See Documentation/admin-guide/mm/hugetlbpage.rst.
 DirectMap4k, DirectMap2M, DirectMap1G
diff --git a/drivers/base/node.c b/drivers/base/node.c
index d7647d077b66..126f66aa2c3e 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -523,6 +523,8 @@ static ssize_t node_read_meminfo(struct device *dev,
 #ifdef CONFIG_UNACCEPTED_MEMORY
 			     "Node %d Unaccepted:     %8lu kB\n"
 #endif
+			     "Node %d GPUActive:      %8lu kB\n"
+			     "Node %d GPUReclaim:     %8lu kB\n"
 			     ,
 			     nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
 			     nid, K(node_page_state(pgdat, NR_WRITEBACK)),
@@ -556,6 +558,9 @@ static ssize_t node_read_meminfo(struct device *dev,
 			     ,
 			     nid, K(sum_zone_node_page_state(nid, NR_UNACCEPTED))
 #endif
+			     ,
+			     nid, K(node_page_state(pgdat, NR_GPU_ACTIVE)),
+			     nid, K(node_page_state(pgdat, NR_GPU_RECLAIM))
 			    );
 	len += hugetlb_report_node_meminfo(buf, len, nid);
 	return len;
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index a458f1e112fd..65ba49ec3a63 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -163,6 +163,12 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 	show_val_kb(m, "Balloon:        ",
 		    global_node_page_state(NR_BALLOON_PAGES));
 
+	show_val_kb(m, "GPUActive:      ",
+		    global_node_page_state(NR_GPU_ACTIVE));
+
+	show_val_kb(m, "GPUReclaim:     ",
+		    global_node_page_state(NR_GPU_RECLAIM));
+
 	hugetlb_report_meminfo(m);
 
 	arch_report_meminfo(m);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3e51190a55e4..841b40031833 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -260,6 +260,8 @@ enum node_stat_item {
 #endif
 	NR_BALLOON_PAGES,
 	NR_KERNEL_FILE_PAGES,
+	NR_GPU_ACTIVE,	/* Pages assigned to GPU objects */
+	NR_GPU_RECLAIM,	/* Pages in shrinkable GPU pools */
 	NR_VM_NODE_STAT_ITEMS
 };
 
diff --git a/mm/show_mem.c b/mm/show_mem.c
index 24078ac3e6bc..43aca5a2ac99 100644
--- a/mm/show_mem.c
+++ b/mm/show_mem.c
@@ -254,6 +254,8 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
 			" sec_pagetables:%lukB"
 			" all_unreclaimable? %s"
 			" Balloon:%lukB"
+			" gpu_active:%lukB"
+			" gpu_reclaim:%lukB"
 			"\n",
 			pgdat->node_id,
 			K(node_page_state(pgdat, NR_ACTIVE_ANON)),
@@ -279,7 +281,9 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
 			K(node_page_state(pgdat, NR_PAGETABLE)),
 			K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)),
 			str_yes_no(kswapd_test_hopeless(pgdat)),
-			K(node_page_state(pgdat, NR_BALLOON_PAGES)));
+			K(node_page_state(pgdat, NR_BALLOON_PAGES)),
+			K(node_page_state(pgdat, NR_GPU_ACTIVE)),
+			K(node_page_state(pgdat, NR_GPU_RECLAIM)));
 	}
 
 	for_each_populated_zone(zone) {
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 86b14b0f77b5..ac9affbe48b7 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1281,6 +1281,8 @@ const char * const vmstat_text[] = {
 #endif
 	[I(NR_BALLOON_PAGES)]			= "nr_balloon_pages",
 	[I(NR_KERNEL_FILE_PAGES)]		= "nr_kernel_file_pages",
+	[I(NR_GPU_ACTIVE)]			= "nr_gpu_active",
+	[I(NR_GPU_RECLAIM)]			= "nr_gpu_reclaim",
 #undef I
 
 	/* system-wide enum vm_stat_item counters */
-- 
cgit v1.2.3


From 6e6f2b9b3375cc0e6b8567d31ae7d3b2d910582f Mon Sep 17 00:00:00 2001
From: Sun Jian <sun.jian.kdev@gmail.com>
Date: Tue, 3 Mar 2026 18:15:25 +0800
Subject: netfilter: use function typedefs for __rcu NAT helper hook pointers

After commit 07919126ecfc ("netfilter: annotate NAT helper hook pointers
with __rcu"), sparse can warn about type/address-space mismatches when
RCU-dereferencing NAT helper hook function pointers.

The hooks are __rcu-annotated and accessed via rcu_dereference(), but the
combination of complex function pointer declarators and the WRITE_ONCE()
machinery used by RCU_INIT_POINTER()/rcu_assign_pointer() can confuse
sparse and trigger false positives.

Introduce typedefs for the NAT helper function types, so __rcu applies to
a simple "fn_t __rcu *" pointer form. Also replace local typeof(hook)
variables with "fn_t *" to avoid propagating __rcu address space into
temporaries.

No functional change intended.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202603022359.3dGE9fwI-lkp@intel.com/
Signed-off-by: Sun Jian <sun.jian.kdev@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/linux/netfilter/nf_conntrack_amanda.h | 15 +++++++++------
 include/linux/netfilter/nf_conntrack_ftp.h    | 17 ++++++++++-------
 include/linux/netfilter/nf_conntrack_irc.h    | 15 +++++++++------
 include/linux/netfilter/nf_conntrack_snmp.h   | 11 +++++++----
 include/linux/netfilter/nf_conntrack_tftp.h   |  9 ++++++---
 net/netfilter/nf_conntrack_amanda.c           | 10 ++--------
 net/netfilter/nf_conntrack_ftp.c              | 10 ++--------
 net/netfilter/nf_conntrack_irc.c              | 10 ++--------
 net/netfilter/nf_conntrack_snmp.c             |  7 ++-----
 net/netfilter/nf_conntrack_tftp.c             |  7 ++-----
 10 files changed, 51 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_amanda.h b/include/linux/netfilter/nf_conntrack_amanda.h
index dfe89f38d1f7..1719987e8fd8 100644
--- a/include/linux/netfilter/nf_conntrack_amanda.h
+++ b/include/linux/netfilter/nf_conntrack_amanda.h
@@ -7,10 +7,13 @@
 #include <linux/skbuff.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 
-extern unsigned int (__rcu *nf_nat_amanda_hook)(struct sk_buff *skb,
-					  enum ip_conntrack_info ctinfo,
-					  unsigned int protoff,
-					  unsigned int matchoff,
-					  unsigned int matchlen,
-					  struct nf_conntrack_expect *exp);
+typedef unsigned int
+nf_nat_amanda_hook_fn(struct sk_buff *skb,
+		      enum ip_conntrack_info ctinfo,
+		      unsigned int protoff,
+		      unsigned int matchoff,
+		      unsigned int matchlen,
+		      struct nf_conntrack_expect *exp);
+
+extern nf_nat_amanda_hook_fn __rcu *nf_nat_amanda_hook;
 #endif /* _NF_CONNTRACK_AMANDA_H */
diff --git a/include/linux/netfilter/nf_conntrack_ftp.h b/include/linux/netfilter/nf_conntrack_ftp.h
index f31292642035..7b62446ccec4 100644
--- a/include/linux/netfilter/nf_conntrack_ftp.h
+++ b/include/linux/netfilter/nf_conntrack_ftp.h
@@ -26,11 +26,14 @@ struct nf_ct_ftp_master {
 
 /* For NAT to hook in when we find a packet which describes what other
  * connection we should expect. */
-extern unsigned int (__rcu *nf_nat_ftp_hook)(struct sk_buff *skb,
-				       enum ip_conntrack_info ctinfo,
-				       enum nf_ct_ftp_type type,
-				       unsigned int protoff,
-				       unsigned int matchoff,
-				       unsigned int matchlen,
-				       struct nf_conntrack_expect *exp);
+typedef unsigned int
+nf_nat_ftp_hook_fn(struct sk_buff *skb,
+		   enum ip_conntrack_info ctinfo,
+		   enum nf_ct_ftp_type type,
+		   unsigned int protoff,
+		   unsigned int matchoff,
+		   unsigned int matchlen,
+		   struct nf_conntrack_expect *exp);
+
+extern nf_nat_ftp_hook_fn __rcu *nf_nat_ftp_hook;
 #endif /* _NF_CONNTRACK_FTP_H */
diff --git a/include/linux/netfilter/nf_conntrack_irc.h b/include/linux/netfilter/nf_conntrack_irc.h
index 4f3ca5621998..ce07250afb4e 100644
--- a/include/linux/netfilter/nf_conntrack_irc.h
+++ b/include/linux/netfilter/nf_conntrack_irc.h
@@ -8,11 +8,14 @@
 
 #define IRC_PORT	6667
 
-extern unsigned int (__rcu *nf_nat_irc_hook)(struct sk_buff *skb,
-				       enum ip_conntrack_info ctinfo,
-				       unsigned int protoff,
-				       unsigned int matchoff,
-				       unsigned int matchlen,
-				       struct nf_conntrack_expect *exp);
+typedef unsigned int
+nf_nat_irc_hook_fn(struct sk_buff *skb,
+		   enum ip_conntrack_info ctinfo,
+		   unsigned int protoff,
+		   unsigned int matchoff,
+		   unsigned int matchlen,
+		   struct nf_conntrack_expect *exp);
+
+extern nf_nat_irc_hook_fn __rcu *nf_nat_irc_hook;
 
 #endif /* _NF_CONNTRACK_IRC_H */
diff --git a/include/linux/netfilter/nf_conntrack_snmp.h b/include/linux/netfilter/nf_conntrack_snmp.h
index 99107e4f5234..bb39f04a9977 100644
--- a/include/linux/netfilter/nf_conntrack_snmp.h
+++ b/include/linux/netfilter/nf_conntrack_snmp.h
@@ -5,9 +5,12 @@
 #include <linux/netfilter.h>
 #include <linux/skbuff.h>
 
-extern int (__rcu *nf_nat_snmp_hook)(struct sk_buff *skb,
-				unsigned int protoff,
-				struct nf_conn *ct,
-				enum ip_conntrack_info ctinfo);
+typedef int
+nf_nat_snmp_hook_fn(struct sk_buff *skb,
+		    unsigned int protoff,
+		    struct nf_conn *ct,
+		    enum ip_conntrack_info ctinfo);
+
+extern nf_nat_snmp_hook_fn __rcu *nf_nat_snmp_hook;
 
 #endif /* _NF_CONNTRACK_SNMP_H */
diff --git a/include/linux/netfilter/nf_conntrack_tftp.h b/include/linux/netfilter/nf_conntrack_tftp.h
index 1490b68dd7d1..90b334bbce3c 100644
--- a/include/linux/netfilter/nf_conntrack_tftp.h
+++ b/include/linux/netfilter/nf_conntrack_tftp.h
@@ -19,8 +19,11 @@ struct tftphdr {
 #define TFTP_OPCODE_ACK		4
 #define TFTP_OPCODE_ERROR	5
 
-extern unsigned int (__rcu *nf_nat_tftp_hook)(struct sk_buff *skb,
-				        enum ip_conntrack_info ctinfo,
-				        struct nf_conntrack_expect *exp);
+typedef unsigned int
+nf_nat_tftp_hook_fn(struct sk_buff *skb,
+		    enum ip_conntrack_info ctinfo,
+		    struct nf_conntrack_expect *exp);
+
+extern nf_nat_tftp_hook_fn __rcu *nf_nat_tftp_hook;
 
 #endif /* _NF_CONNTRACK_TFTP_H */
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index c0132559f6af..d2c09e8dd872 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -37,13 +37,7 @@ MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
 module_param(ts_algo, charp, 0400);
 MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
 
-unsigned int (__rcu *nf_nat_amanda_hook)(struct sk_buff *skb,
-					 enum ip_conntrack_info ctinfo,
-					 unsigned int protoff,
-					 unsigned int matchoff,
-					 unsigned int matchlen,
-					 struct nf_conntrack_expect *exp)
-					 __read_mostly;
+nf_nat_amanda_hook_fn __rcu *nf_nat_amanda_hook __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_amanda_hook);
 
 enum amanda_strings {
@@ -98,7 +92,7 @@ static int amanda_help(struct sk_buff *skb,
 	u_int16_t len;
 	__be16 port;
 	int ret = NF_ACCEPT;
-	typeof(nf_nat_amanda_hook) nf_nat_amanda;
+	nf_nat_amanda_hook_fn *nf_nat_amanda;
 
 	/* Only look at packets from the Amanda server */
 	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 5e00f9123c38..de83bf9e6c61 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -43,13 +43,7 @@ module_param_array(ports, ushort, &ports_c, 0400);
 static bool loose;
 module_param(loose, bool, 0600);
 
-unsigned int (__rcu *nf_nat_ftp_hook)(struct sk_buff *skb,
-				      enum ip_conntrack_info ctinfo,
-				      enum nf_ct_ftp_type type,
-				      unsigned int protoff,
-				      unsigned int matchoff,
-				      unsigned int matchlen,
-				      struct nf_conntrack_expect *exp);
+nf_nat_ftp_hook_fn __rcu *nf_nat_ftp_hook;
 EXPORT_SYMBOL_GPL(nf_nat_ftp_hook);
 
 static int try_rfc959(const char *, size_t, struct nf_conntrack_man *,
@@ -385,7 +379,7 @@ static int help(struct sk_buff *skb,
 	struct nf_conntrack_man cmd = {};
 	unsigned int i;
 	int found = 0, ends_in_nl;
-	typeof(nf_nat_ftp_hook) nf_nat_ftp;
+	nf_nat_ftp_hook_fn *nf_nat_ftp;
 
 	/* Until there's been traffic both ways, don't look in packets. */
 	if (ctinfo != IP_CT_ESTABLISHED &&
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index b8e6d724acd1..522183b9a604 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -30,13 +30,7 @@ static unsigned int dcc_timeout __read_mostly = 300;
 static char *irc_buffer;
 static DEFINE_SPINLOCK(irc_buffer_lock);
 
-unsigned int (__rcu *nf_nat_irc_hook)(struct sk_buff *skb,
-				      enum ip_conntrack_info ctinfo,
-				      unsigned int protoff,
-				      unsigned int matchoff,
-				      unsigned int matchlen,
-				      struct nf_conntrack_expect *exp)
-				      __read_mostly;
+nf_nat_irc_hook_fn __rcu *nf_nat_irc_hook __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_irc_hook);
 
 #define HELPER_NAME "irc"
@@ -122,7 +116,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
 	__be16 port;
 	int i, ret = NF_ACCEPT;
 	char *addr_beg_p, *addr_end_p;
-	typeof(nf_nat_irc_hook) nf_nat_irc;
+	nf_nat_irc_hook_fn *nf_nat_irc;
 	unsigned int datalen;
 
 	/* If packet is coming from IRC server */
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
index 387dd6e58f88..7b7eed43c54f 100644
--- a/net/netfilter/nf_conntrack_snmp.c
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -25,17 +25,14 @@ static unsigned int timeout __read_mostly = 30;
 module_param(timeout, uint, 0400);
 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
 
-int (__rcu *nf_nat_snmp_hook)(struct sk_buff *skb,
-			      unsigned int protoff,
-			      struct nf_conn *ct,
-			      enum ip_conntrack_info ctinfo);
+nf_nat_snmp_hook_fn __rcu *nf_nat_snmp_hook;
 EXPORT_SYMBOL_GPL(nf_nat_snmp_hook);
 
 static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff,
 			       struct nf_conn *ct,
 			       enum ip_conntrack_info ctinfo)
 {
-	typeof(nf_nat_snmp_hook) nf_nat_snmp;
+	nf_nat_snmp_hook_fn *nf_nat_snmp;
 
 	nf_conntrack_broadcast_help(skb, ct, ctinfo, timeout);
 
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index 89e9914e5d03..a2e6833a0bf7 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -32,10 +32,7 @@ static unsigned int ports_c;
 module_param_array(ports, ushort, &ports_c, 0400);
 MODULE_PARM_DESC(ports, "Port numbers of TFTP servers");
 
-unsigned int (__rcu *nf_nat_tftp_hook)(struct sk_buff *skb,
-				       enum ip_conntrack_info ctinfo,
-				       struct nf_conntrack_expect *exp)
-				       __read_mostly;
+nf_nat_tftp_hook_fn __rcu *nf_nat_tftp_hook __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_tftp_hook);
 
 static int tftp_help(struct sk_buff *skb,
@@ -48,7 +45,7 @@ static int tftp_help(struct sk_buff *skb,
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple *tuple;
 	unsigned int ret = NF_ACCEPT;
-	typeof(nf_nat_tftp_hook) nf_nat_tftp;
+	nf_nat_tftp_hook_fn *nf_nat_tftp;
 
 	tfh = skb_header_pointer(skb, protoff + sizeof(struct udphdr),
 				 sizeof(_tftph), &_tftph);
-- 
cgit v1.2.3


From 613c83766884503f0f6bfdc45964c84b5286091c Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Tue, 7 Apr 2026 20:06:47 -0700
Subject: wifi: mac80211, cfg80211: Export michael_mic() and move it to
 cfg80211

Export michael_mic() so that the ath11k and ath12k drivers can call it.
In addition, move it from mac80211 to cfg80211 so that the ipw2x00
drivers, which depend on cfg80211 but not mac80211, can also call it.

Currently these drivers have their own local implementations of
michael_mic() based on crypto_shash, which is redundant and inefficient.
By consolidating all the Michael MIC code into cfg80211, we'll be able
to remove the duplicate Michael MIC code in the crypto/ directory.

Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Link: https://patch.msgid.link/20260408030651.80336-3-ebiggers@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  |  5 +++
 net/mac80211/Makefile      |  1 -
 net/mac80211/michael.c     | 83 --------------------------------------------
 net/mac80211/michael.h     | 22 ------------
 net/mac80211/wpa.c         |  1 -
 net/wireless/Makefile      |  2 +-
 net/wireless/michael-mic.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 92 insertions(+), 108 deletions(-)
 delete mode 100644 net/mac80211/michael.c
 delete mode 100644 net/mac80211/michael.h
 create mode 100644 net/wireless/michael-mic.c

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index ffa8f9f77efe..23f9df9be837 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1921,6 +1921,11 @@ enum ieee80211_radio_measurement_actioncode {
 #define PMK_MAX_LEN			64
 #define SAE_PASSWORD_MAX_LEN		128
 
+#define MICHAEL_MIC_LEN			8
+
+void michael_mic(const u8 *key, struct ieee80211_hdr *hdr,
+		 const u8 *data, size_t data_len, u8 *mic);
+
 /* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */
 enum ieee80211_pub_actioncode {
 	WLAN_PUB_ACTION_20_40_BSS_COEX = 0,
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index abf46c951299..20c3135b73ea 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -18,7 +18,6 @@ mac80211-y := \
 	iface.o \
 	link.o \
 	rate.o \
-	michael.o \
 	tkip.o \
 	aes_cmac.o \
 	aes_gmac.o \
diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c
deleted file mode 100644
index 8a1afc93e749..000000000000
--- a/net/mac80211/michael.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Michael MIC implementation - optimized for TKIP MIC operations
- * Copyright 2002-2003, Instant802 Networks, Inc.
- */
-#include <linux/types.h>
-#include <linux/bitops.h>
-#include <linux/ieee80211.h>
-#include <linux/unaligned.h>
-
-#include "michael.h"
-
-static void michael_block(struct michael_mic_ctx *mctx, u32 val)
-{
-	mctx->l ^= val;
-	mctx->r ^= rol32(mctx->l, 17);
-	mctx->l += mctx->r;
-	mctx->r ^= ((mctx->l & 0xff00ff00) >> 8) |
-		   ((mctx->l & 0x00ff00ff) << 8);
-	mctx->l += mctx->r;
-	mctx->r ^= rol32(mctx->l, 3);
-	mctx->l += mctx->r;
-	mctx->r ^= ror32(mctx->l, 2);
-	mctx->l += mctx->r;
-}
-
-static void michael_mic_hdr(struct michael_mic_ctx *mctx, const u8 *key,
-			    struct ieee80211_hdr *hdr)
-{
-	u8 *da, *sa, tid;
-
-	da = ieee80211_get_DA(hdr);
-	sa = ieee80211_get_SA(hdr);
-	if (ieee80211_is_data_qos(hdr->frame_control))
-		tid = ieee80211_get_tid(hdr);
-	else
-		tid = 0;
-
-	mctx->l = get_unaligned_le32(key);
-	mctx->r = get_unaligned_le32(key + 4);
-
-	/*
-	 * A pseudo header (DA, SA, Priority, 0, 0, 0) is used in Michael MIC
-	 * calculation, but it is _not_ transmitted
-	 */
-	michael_block(mctx, get_unaligned_le32(da));
-	michael_block(mctx, get_unaligned_le16(&da[4]) |
-			    (get_unaligned_le16(sa) << 16));
-	michael_block(mctx, get_unaligned_le32(&sa[2]));
-	michael_block(mctx, tid);
-}
-
-void michael_mic(const u8 *key, struct ieee80211_hdr *hdr,
-		 const u8 *data, size_t data_len, u8 *mic)
-{
-	u32 val;
-	size_t block, blocks, left;
-	struct michael_mic_ctx mctx;
-
-	michael_mic_hdr(&mctx, key, hdr);
-
-	/* Real data */
-	blocks = data_len / 4;
-	left = data_len % 4;
-
-	for (block = 0; block < blocks; block++)
-		michael_block(&mctx, get_unaligned_le32(&data[block * 4]));
-
-	/* Partial block of 0..3 bytes and padding: 0x5a + 4..7 zeros to make
-	 * total length a multiple of 4. */
-	val = 0x5a;
-	while (left > 0) {
-		val <<= 8;
-		left--;
-		val |= data[blocks * 4 + left];
-	}
-
-	michael_block(&mctx, val);
-	michael_block(&mctx, 0);
-
-	put_unaligned_le32(mctx.l, mic);
-	put_unaligned_le32(mctx.r, mic + 4);
-}
diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h
deleted file mode 100644
index a7fdb8e84615..000000000000
--- a/net/mac80211/michael.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Michael MIC implementation - optimized for TKIP MIC operations
- * Copyright 2002-2003, Instant802 Networks, Inc.
- */
-
-#ifndef MICHAEL_H
-#define MICHAEL_H
-
-#include <linux/types.h>
-#include <linux/ieee80211.h>
-
-#define MICHAEL_MIC_LEN 8
-
-struct michael_mic_ctx {
-	u32 l, r;
-};
-
-void michael_mic(const u8 *key, struct ieee80211_hdr *hdr,
-		 const u8 *data, size_t data_len, u8 *mic);
-
-#endif /* MICHAEL_H */
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 64a57475ce50..724ec831a885 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -18,7 +18,6 @@
 #include <crypto/utils.h>
 
 #include "ieee80211_i.h"
-#include "michael.h"
 #include "tkip.h"
 #include "aes_ccm.h"
 #include "aes_cmac.h"
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 62a83faf0e07..a77fd5ba6368 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_WEXT_PRIV) += wext-priv.o
 
 cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o
 cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o ocb.o
-cfg80211-y += pmsr.o
+cfg80211-y += michael-mic.o pmsr.o
 cfg80211-$(CONFIG_OF) += of.o
 cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o
 cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o
diff --git a/net/wireless/michael-mic.c b/net/wireless/michael-mic.c
new file mode 100644
index 000000000000..50cdb67f0503
--- /dev/null
+++ b/net/wireless/michael-mic.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Michael MIC implementation - optimized for TKIP MIC operations
+ * Copyright 2002-2003, Instant802 Networks, Inc.
+ */
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/ieee80211.h>
+#include <linux/unaligned.h>
+
+struct michael_mic_ctx {
+	u32 l, r;
+};
+
+static void michael_block(struct michael_mic_ctx *mctx, u32 val)
+{
+	mctx->l ^= val;
+	mctx->r ^= rol32(mctx->l, 17);
+	mctx->l += mctx->r;
+	mctx->r ^= ((mctx->l & 0xff00ff00) >> 8) |
+		   ((mctx->l & 0x00ff00ff) << 8);
+	mctx->l += mctx->r;
+	mctx->r ^= rol32(mctx->l, 3);
+	mctx->l += mctx->r;
+	mctx->r ^= ror32(mctx->l, 2);
+	mctx->l += mctx->r;
+}
+
+static void michael_mic_hdr(struct michael_mic_ctx *mctx, const u8 *key,
+			    struct ieee80211_hdr *hdr)
+{
+	u8 *da, *sa, tid;
+
+	da = ieee80211_get_DA(hdr);
+	sa = ieee80211_get_SA(hdr);
+	if (ieee80211_is_data_qos(hdr->frame_control))
+		tid = ieee80211_get_tid(hdr);
+	else
+		tid = 0;
+
+	mctx->l = get_unaligned_le32(key);
+	mctx->r = get_unaligned_le32(key + 4);
+
+	/*
+	 * A pseudo header (DA, SA, Priority, 0, 0, 0) is used in Michael MIC
+	 * calculation, but it is _not_ transmitted
+	 */
+	michael_block(mctx, get_unaligned_le32(da));
+	michael_block(mctx, get_unaligned_le16(&da[4]) |
+			    (get_unaligned_le16(sa) << 16));
+	michael_block(mctx, get_unaligned_le32(&sa[2]));
+	michael_block(mctx, tid);
+}
+
+void michael_mic(const u8 *key, struct ieee80211_hdr *hdr,
+		 const u8 *data, size_t data_len, u8 *mic)
+{
+	u32 val;
+	size_t block, blocks, left;
+	struct michael_mic_ctx mctx;
+
+	michael_mic_hdr(&mctx, key, hdr);
+
+	/* Real data */
+	blocks = data_len / 4;
+	left = data_len % 4;
+
+	for (block = 0; block < blocks; block++)
+		michael_block(&mctx, get_unaligned_le32(&data[block * 4]));
+
+	/* Partial block of 0..3 bytes and padding: 0x5a + 4..7 zeros to make
+	 * total length a multiple of 4. */
+	val = 0x5a;
+	while (left > 0) {
+		val <<= 8;
+		left--;
+		val |= data[blocks * 4 + left];
+	}
+
+	michael_block(&mctx, val);
+	michael_block(&mctx, 0);
+
+	put_unaligned_le32(mctx.l, mic);
+	put_unaligned_le32(mctx.r, mic + 4);
+}
+EXPORT_SYMBOL_GPL(michael_mic);
-- 
cgit v1.2.3


From 1f0d117cd6ca8e74e70e415e89b059fce37674c6 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 7 Apr 2026 14:16:41 +0100
Subject: entry: Fix stale comment for irqentry_enter()

The kerneldoc comment for irqentry_enter() refers to idtentry_exit(),
which is an accidental holdover from the x86 entry code that the generic
irqentry code was based on.

Correct this to refer to irqentry_exit().

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Jinjie Ruan <ruanjinjie@huawei.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260407131650.3813777-2-mark.rutland@arm.com
---
 include/linux/irq-entry-common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h
index d26d1b1bcbfb..3cf4d21168ba 100644
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -394,7 +394,7 @@ typedef struct irqentry_state {
  * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
  * would not be possible.
  *
- * Returns: An opaque object that must be passed to idtentry_exit()
+ * Returns: An opaque object that must be passed to irqentry_exit()
  */
 irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs);
 
-- 
cgit v1.2.3


From 22f66e7ef4ce9414b4bd18abe50ead4a1284b01a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 7 Apr 2026 14:16:42 +0100
Subject: entry: Remove local_irq_{enable,disable}_exit_to_user()

local_irq_enable_exit_to_user() and local_irq_disable_exit_to_user() are
never overridden by architecture code, and are always equivalent to
local_irq_enable() and local_irq_disable().

These functions were added on the assumption that arm64 would override
them to manage 'DAIF' exception masking, as described by Thomas Gleixner
in these threads:

  https://lore.kernel.org/all/20190919150809.340471236@linutronix.de/
  https://lore.kernel.org/all/alpine.DEB.2.21.1910240119090.1852@nanos.tec.linutronix.de/

In practice arm64 did not need to override either. Prior to moving to
the generic irqentry code, arm64's management of DAIF was reworked in
commit:

  97d935faacde ("arm64: Unmask Debug + SError in do_notify_resume()")

Since that commit, arm64 only masks interrupts during the 'prepare' step
when returning to user mode, and masks other DAIF exceptions later.
Within arm64_exit_to_user_mode(), the arm64 entry code is as follows:

	local_irq_disable();
	exit_to_user_mode_prepare_legacy(regs);
	local_daif_mask();
	mte_check_tfsr_exit();
	exit_to_user_mode();

Remove the unnecessary local_irq_enable_exit_to_user() and
local_irq_disable_exit_to_user() functions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Jinjie Ruan <ruanjinjie@huawei.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260407131650.3813777-3-mark.rutland@arm.com
---
 include/linux/entry-common.h     |  2 +-
 include/linux/irq-entry-common.h | 31 -------------------------------
 kernel/entry/common.c            |  4 ++--
 3 files changed, 3 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index f83ca0abf2cd..dbaa153100f4 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -321,7 +321,7 @@ static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
 {
 	instrumentation_begin();
 	syscall_exit_to_user_mode_work(regs);
-	local_irq_disable_exit_to_user();
+	local_irq_disable();
 	syscall_exit_to_user_mode_prepare(regs);
 	instrumentation_end();
 	exit_to_user_mode();
diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h
index 3cf4d21168ba..93b4b551f7ae 100644
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -100,37 +100,6 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs)
 	instrumentation_end();
 }
 
-/**
- * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable()
- * @ti_work:	Cached TIF flags gathered with interrupts disabled
- *
- * Defaults to local_irq_enable(). Can be supplied by architecture specific
- * code.
- */
-static inline void local_irq_enable_exit_to_user(unsigned long ti_work);
-
-#ifndef local_irq_enable_exit_to_user
-static __always_inline void local_irq_enable_exit_to_user(unsigned long ti_work)
-{
-	local_irq_enable();
-}
-#endif
-
-/**
- * local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable()
- *
- * Defaults to local_irq_disable(). Can be supplied by architecture specific
- * code.
- */
-static inline void local_irq_disable_exit_to_user(void);
-
-#ifndef local_irq_disable_exit_to_user
-static __always_inline void local_irq_disable_exit_to_user(void)
-{
-	local_irq_disable();
-}
-#endif
-
 /**
  * arch_exit_to_user_mode_work - Architecture specific TIF work for exit
  *				 to user mode.
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 9ef63e414791..b5e05d87ba39 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -47,7 +47,7 @@ static __always_inline unsigned long __exit_to_user_mode_loop(struct pt_regs *re
 	 */
 	while (ti_work & EXIT_TO_USER_MODE_WORK_LOOP) {
 
-		local_irq_enable_exit_to_user(ti_work);
+		local_irq_enable();
 
 		if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) {
 			if (!rseq_grant_slice_extension(ti_work & TIF_SLICE_EXT_DENY))
@@ -74,7 +74,7 @@ static __always_inline unsigned long __exit_to_user_mode_loop(struct pt_regs *re
 		 * might have changed while interrupts and preemption was
 		 * enabled above.
 		 */
-		local_irq_disable_exit_to_user();
+		local_irq_disable();
 
 		/* Check if any of the above work has queued a deferred wakeup */
 		tick_nohz_user_enter_prepare();
-- 
cgit v1.2.3


From eb1b51afde506a8e38976190e518990d69ef5382 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 7 Apr 2026 14:16:43 +0100
Subject: entry: Move irqentry_enter() prototype later

Subsequent patches will rework the irqentry_*() functions. The end
result (and the intermediate diffs) will be much clearer if the
prototype for the irqentry_enter() function is moved later, immediately
before the prototype of the irqentry_exit() function.

Move the prototype later.

This is purely a move; there should be no functional change as a result
of this change.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Jinjie Ruan <ruanjinjie@huawei.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260407131650.3813777-4-mark.rutland@arm.com
---
 include/linux/irq-entry-common.h | 44 ++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h
index 93b4b551f7ae..d1e8591a5919 100644
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -334,6 +334,28 @@ typedef struct irqentry_state {
 } irqentry_state_t;
 #endif
 
+/**
+ * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt
+ *
+ * Conditional reschedule with additional sanity checks.
+ */
+void raw_irqentry_exit_cond_resched(void);
+
+#ifdef CONFIG_PREEMPT_DYNAMIC
+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
+#define irqentry_exit_cond_resched_dynamic_enabled	raw_irqentry_exit_cond_resched
+#define irqentry_exit_cond_resched_dynamic_disabled	NULL
+DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
+#define irqentry_exit_cond_resched()	static_call(irqentry_exit_cond_resched)()
+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
+DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+void dynamic_irqentry_exit_cond_resched(void);
+#define irqentry_exit_cond_resched()	dynamic_irqentry_exit_cond_resched()
+#endif
+#else /* CONFIG_PREEMPT_DYNAMIC */
+#define irqentry_exit_cond_resched()	raw_irqentry_exit_cond_resched()
+#endif /* CONFIG_PREEMPT_DYNAMIC */
+
 /**
  * irqentry_enter - Handle state tracking on ordinary interrupt entries
  * @regs:	Pointer to pt_regs of interrupted context
@@ -367,28 +389,6 @@ typedef struct irqentry_state {
  */
 irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs);
 
-/**
- * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt
- *
- * Conditional reschedule with additional sanity checks.
- */
-void raw_irqentry_exit_cond_resched(void);
-
-#ifdef CONFIG_PREEMPT_DYNAMIC
-#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-#define irqentry_exit_cond_resched_dynamic_enabled	raw_irqentry_exit_cond_resched
-#define irqentry_exit_cond_resched_dynamic_disabled	NULL
-DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
-#define irqentry_exit_cond_resched()	static_call(irqentry_exit_cond_resched)()
-#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
-DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
-void dynamic_irqentry_exit_cond_resched(void);
-#define irqentry_exit_cond_resched()	dynamic_irqentry_exit_cond_resched()
-#endif
-#else /* CONFIG_PREEMPT_DYNAMIC */
-#define irqentry_exit_cond_resched()	raw_irqentry_exit_cond_resched()
-#endif /* CONFIG_PREEMPT_DYNAMIC */
-
 /**
  * irqentry_exit - Handle return from exception that used irqentry_enter()
  * @regs:	Pointer to pt_regs (exception entry regs)
-- 
cgit v1.2.3


From c5538d0141b383808f440186fcd0bc2799af2853 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 7 Apr 2026 14:16:44 +0100
Subject: entry: Split kernel mode logic from irqentry_{enter,exit}()

The generic irqentry code has entry/exit functions specifically for
exceptions taken from user mode, but doesn't have entry/exit functions
specifically for exceptions taken from kernel mode.

It would be helpful to have separate entry/exit functions specifically
for exceptions taken from kernel mode. This would make the structure of
the entry code more consistent, and would make it easier for
architectures to manage logic specific to exceptions taken from kernel
mode.

Move the logic specific to kernel mode out of irqentry_enter() and
irqentry_exit() into new irqentry_enter_from_kernel_mode() and
irqentry_exit_to_kernel_mode() functions. These are marked
__always_inline and placed in irq-entry-common.h, as with
irqentry_enter_from_user_mode() and irqentry_exit_to_user_mode(), so
that they can be inlined into architecture-specific wrappers. The
existing out-of-line irqentry_enter() and irqentry_exit() functions
retained as callers of the new functions.

The lockdep assertion from irqentry_exit() is moved into
irqentry_exit_to_user_mode() and irqentry_exit_to_kernel_mode(). This
was previously missing from irqentry_exit_to_user_mode() when called
directly, and any new lockdep assertion failure relating from this
change is a latent bug.

Aside from the lockdep change noted above, there should be no functional
change as a result of this change.

[ tglx: Updated kernel doc ]

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Jinjie Ruan <ruanjinjie@huawei.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260407131650.3813777-5-mark.rutland@arm.com
---
 include/linux/irq-entry-common.h | 134 +++++++++++++++++++++++++++++++++++++++
 kernel/entry/common.c            | 103 +++---------------------------
 2 files changed, 142 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h
index d1e8591a5919..66bc168bc6b5 100644
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -304,6 +304,8 @@ static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs)
  */
 static __always_inline void irqentry_exit_to_user_mode(struct pt_regs *regs)
 {
+	lockdep_assert_irqs_disabled();
+
 	instrumentation_begin();
 	irqentry_exit_to_user_mode_prepare(regs);
 	instrumentation_end();
@@ -356,6 +358,138 @@ void dynamic_irqentry_exit_cond_resched(void);
 #define irqentry_exit_cond_resched()	raw_irqentry_exit_cond_resched()
 #endif /* CONFIG_PREEMPT_DYNAMIC */
 
+/**
+ * irqentry_enter_from_kernel_mode - Establish state before invoking the irq handler
+ * @regs:	Pointer to currents pt_regs
+ *
+ * Invoked from architecture specific entry code with interrupts disabled.
+ * Can only be called when the interrupt entry came from kernel mode. The
+ * calling code must be non-instrumentable.  When the function returns all
+ * state is correct and the subsequent functions can be instrumented.
+ *
+ * The function establishes state (lockdep, RCU (context tracking), tracing) and
+ * is provided for architectures which require a strict split between entry from
+ * kernel and user mode and therefore cannot use irqentry_enter() which handles
+ * both entry modes.
+ *
+ * Returns: An opaque object that must be passed to irqentry_exit_to_kernel_mode().
+ */
+static __always_inline irqentry_state_t irqentry_enter_from_kernel_mode(struct pt_regs *regs)
+{
+	irqentry_state_t ret = {
+		.exit_rcu = false,
+	};
+
+	/*
+	 * If this entry hit the idle task invoke ct_irq_enter() whether
+	 * RCU is watching or not.
+	 *
+	 * Interrupts can nest when the first interrupt invokes softirq
+	 * processing on return which enables interrupts.
+	 *
+	 * Scheduler ticks in the idle task can mark quiescent state and
+	 * terminate a grace period, if and only if the timer interrupt is
+	 * not nested into another interrupt.
+	 *
+	 * Checking for rcu_is_watching() here would prevent the nesting
+	 * interrupt to invoke ct_irq_enter(). If that nested interrupt is
+	 * the tick then rcu_flavor_sched_clock_irq() would wrongfully
+	 * assume that it is the first interrupt and eventually claim
+	 * quiescent state and end grace periods prematurely.
+	 *
+	 * Unconditionally invoke ct_irq_enter() so RCU state stays
+	 * consistent.
+	 *
+	 * TINY_RCU does not support EQS, so let the compiler eliminate
+	 * this part when enabled.
+	 */
+	if (!IS_ENABLED(CONFIG_TINY_RCU) &&
+	    (is_idle_task(current) || arch_in_rcu_eqs())) {
+		/*
+		 * If RCU is not watching then the same careful
+		 * sequence vs. lockdep and tracing is required
+		 * as in irqentry_enter_from_user_mode().
+		 */
+		lockdep_hardirqs_off(CALLER_ADDR0);
+		ct_irq_enter();
+		instrumentation_begin();
+		kmsan_unpoison_entry_regs(regs);
+		trace_hardirqs_off_finish();
+		instrumentation_end();
+
+		ret.exit_rcu = true;
+		return ret;
+	}
+
+	/*
+	 * If RCU is watching then RCU only wants to check whether it needs
+	 * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick()
+	 * already contains a warning when RCU is not watching, so no point
+	 * in having another one here.
+	 */
+	lockdep_hardirqs_off(CALLER_ADDR0);
+	instrumentation_begin();
+	kmsan_unpoison_entry_regs(regs);
+	rcu_irq_enter_check_tick();
+	trace_hardirqs_off_finish();
+	instrumentation_end();
+
+	return ret;
+}
+
+/**
+ * irqentry_exit_to_kernel_mode - Run preempt checks and establish state after
+ *				  invoking the interrupt handler
+ * @regs:	Pointer to current's pt_regs
+ * @state:	Return value from matching call to irqentry_enter_from_kernel_mode()
+ *
+ * This is the counterpart of irqentry_enter_from_kernel_mode() and runs the
+ * necessary preemption check if possible and required. It returns to the caller
+ * with interrupts disabled and the correct state vs. tracing, lockdep and RCU
+ * required to return to the interrupted context.
+ *
+ * It is the last action before returning to the low level ASM code which just
+ * needs to return.
+ */
+static __always_inline void irqentry_exit_to_kernel_mode(struct pt_regs *regs,
+							 irqentry_state_t state)
+{
+	lockdep_assert_irqs_disabled();
+
+	if (!regs_irqs_disabled(regs)) {
+		/*
+		 * If RCU was not watching on entry this needs to be done
+		 * carefully and needs the same ordering of lockdep/tracing
+		 * and RCU as the return to user mode path.
+		 */
+		if (state.exit_rcu) {
+			instrumentation_begin();
+			/* Tell the tracer that IRET will enable interrupts */
+			trace_hardirqs_on_prepare();
+			lockdep_hardirqs_on_prepare();
+			instrumentation_end();
+			ct_irq_exit();
+			lockdep_hardirqs_on(CALLER_ADDR0);
+			return;
+		}
+
+		instrumentation_begin();
+		if (IS_ENABLED(CONFIG_PREEMPTION))
+			irqentry_exit_cond_resched();
+
+		/* Covers both tracing and lockdep */
+		trace_hardirqs_on();
+		instrumentation_end();
+	} else {
+		/*
+		 * IRQ flags state is correct already. Just tell RCU if it
+		 * was not watching on entry.
+		 */
+		if (state.exit_rcu)
+			ct_irq_exit();
+	}
+}
+
 /**
  * irqentry_enter - Handle state tracking on ordinary interrupt entries
  * @regs:	Pointer to pt_regs of interrupted context
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index b5e05d87ba39..1034be02eae8 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -105,70 +105,16 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
 
 noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
 {
-	irqentry_state_t ret = {
-		.exit_rcu = false,
-	};
-
 	if (user_mode(regs)) {
-		irqentry_enter_from_user_mode(regs);
-		return ret;
-	}
+		irqentry_state_t ret = {
+			.exit_rcu = false,
+		};
 
-	/*
-	 * If this entry hit the idle task invoke ct_irq_enter() whether
-	 * RCU is watching or not.
-	 *
-	 * Interrupts can nest when the first interrupt invokes softirq
-	 * processing on return which enables interrupts.
-	 *
-	 * Scheduler ticks in the idle task can mark quiescent state and
-	 * terminate a grace period, if and only if the timer interrupt is
-	 * not nested into another interrupt.
-	 *
-	 * Checking for rcu_is_watching() here would prevent the nesting
-	 * interrupt to invoke ct_irq_enter(). If that nested interrupt is
-	 * the tick then rcu_flavor_sched_clock_irq() would wrongfully
-	 * assume that it is the first interrupt and eventually claim
-	 * quiescent state and end grace periods prematurely.
-	 *
-	 * Unconditionally invoke ct_irq_enter() so RCU state stays
-	 * consistent.
-	 *
-	 * TINY_RCU does not support EQS, so let the compiler eliminate
-	 * this part when enabled.
-	 */
-	if (!IS_ENABLED(CONFIG_TINY_RCU) &&
-	    (is_idle_task(current) || arch_in_rcu_eqs())) {
-		/*
-		 * If RCU is not watching then the same careful
-		 * sequence vs. lockdep and tracing is required
-		 * as in irqentry_enter_from_user_mode().
-		 */
-		lockdep_hardirqs_off(CALLER_ADDR0);
-		ct_irq_enter();
-		instrumentation_begin();
-		kmsan_unpoison_entry_regs(regs);
-		trace_hardirqs_off_finish();
-		instrumentation_end();
-
-		ret.exit_rcu = true;
+		irqentry_enter_from_user_mode(regs);
 		return ret;
 	}
 
-	/*
-	 * If RCU is watching then RCU only wants to check whether it needs
-	 * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick()
-	 * already contains a warning when RCU is not watching, so no point
-	 * in having another one here.
-	 */
-	lockdep_hardirqs_off(CALLER_ADDR0);
-	instrumentation_begin();
-	kmsan_unpoison_entry_regs(regs);
-	rcu_irq_enter_check_tick();
-	trace_hardirqs_off_finish();
-	instrumentation_end();
-
-	return ret;
+	return irqentry_enter_from_kernel_mode(regs);
 }
 
 /**
@@ -212,43 +158,10 @@ void dynamic_irqentry_exit_cond_resched(void)
 
 noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
 {
-	lockdep_assert_irqs_disabled();
-
-	/* Check whether this returns to user mode */
-	if (user_mode(regs)) {
+	if (user_mode(regs))
 		irqentry_exit_to_user_mode(regs);
-	} else if (!regs_irqs_disabled(regs)) {
-		/*
-		 * If RCU was not watching on entry this needs to be done
-		 * carefully and needs the same ordering of lockdep/tracing
-		 * and RCU as the return to user mode path.
-		 */
-		if (state.exit_rcu) {
-			instrumentation_begin();
-			/* Tell the tracer that IRET will enable interrupts */
-			trace_hardirqs_on_prepare();
-			lockdep_hardirqs_on_prepare();
-			instrumentation_end();
-			ct_irq_exit();
-			lockdep_hardirqs_on(CALLER_ADDR0);
-			return;
-		}
-
-		instrumentation_begin();
-		if (IS_ENABLED(CONFIG_PREEMPTION))
-			irqentry_exit_cond_resched();
-
-		/* Covers both tracing and lockdep */
-		trace_hardirqs_on();
-		instrumentation_end();
-	} else {
-		/*
-		 * IRQ flags state is correct already. Just tell RCU if it
-		 * was not watching on entry.
-		 */
-		if (state.exit_rcu)
-			ct_irq_exit();
-	}
+	else
+		irqentry_exit_to_kernel_mode(regs, state);
 }
 
 irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)
-- 
cgit v1.2.3


From 041aa7a85390c99b1de86dc28eddcff0890d8186 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 7 Apr 2026 14:16:45 +0100
Subject: entry: Split preemption from irqentry_exit_to_kernel_mode()

Some architecture-specific work needs to be performed between the state
management for exception entry/exit and the "real" work to handle the
exception. For example, arm64 needs to manipulate a number of exception
masking bits, with different exceptions requiring different masking.

Generally this can all be hidden in the architecture code, but for arm64
the current structure of irqentry_exit_to_kernel_mode() makes this
particularly difficult to handle in a way that is correct, maintainable,
and efficient.

The gory details are described in the thread surrounding:

  https://lore.kernel.org/lkml/acPAzdtjK5w-rNqC@J2N7QTR9R3/

The summary is:

* Currently, irqentry_exit_to_kernel_mode() handles both involuntary
  preemption AND state management necessary for exception return.

* When scheduling (including involuntary preemption), arm64 needs to
  have all arm64-specific exceptions unmasked, though regular interrupts
  must be masked.

* Prior to the state management for exception return, arm64 needs to
  mask a number of arm64-specific exceptions, and perform some work with
  these exceptions masked (with RCU watching, etc).

While in theory it is possible to handle this with a new arch_*() hook
called somewhere under irqentry_exit_to_kernel_mode(), this is fragile
and complicated, and doesn't match the flow used for exception return to
user mode, which has a separate 'prepare' step (where preemption can
occur) prior to the state management.

To solve this, refactor irqentry_exit_to_kernel_mode() to match the
style of {irqentry,syscall}_exit_to_user_mode(), moving preemption logic
into a new irqentry_exit_to_kernel_mode_preempt() function, and moving
state management in a new irqentry_exit_to_kernel_mode_after_preempt()
function. The existing irqentry_exit_to_kernel_mode() is left as a
caller of both of these, avoiding the need to modify existing callers.

There should be no functional change as a result of this change.

[ tglx: Updated kernel doc ]

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Jinjie Ruan <ruanjinjie@huawei.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260407131650.3813777-6-mark.rutland@arm.com
---
 include/linux/irq-entry-common.h | 73 ++++++++++++++++++++++++++++++++--------
 1 file changed, 59 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h
index 66bc168bc6b5..384520217bfe 100644
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -438,24 +438,46 @@ static __always_inline irqentry_state_t irqentry_enter_from_kernel_mode(struct p
 }
 
 /**
- * irqentry_exit_to_kernel_mode - Run preempt checks and establish state after
- *				  invoking the interrupt handler
+ * irqentry_exit_to_kernel_mode_preempt - Run preempt checks on return to kernel mode
  * @regs:	Pointer to current's pt_regs
  * @state:	Return value from matching call to irqentry_enter_from_kernel_mode()
  *
- * This is the counterpart of irqentry_enter_from_kernel_mode() and runs the
- * necessary preemption check if possible and required. It returns to the caller
- * with interrupts disabled and the correct state vs. tracing, lockdep and RCU
- * required to return to the interrupted context.
+ * This is to be invoked before irqentry_exit_to_kernel_mode_after_preempt() to
+ * allow kernel preemption on return from interrupt.
+ *
+ * Must be invoked with interrupts disabled and CPU state which allows kernel
+ * preemption.
  *
- * It is the last action before returning to the low level ASM code which just
- * needs to return.
+ * After returning from this function, the caller can modify CPU state before
+ * invoking irqentry_exit_to_kernel_mode_after_preempt(), which is required to
+ * re-establish the tracing, lockdep and RCU state for returning to the
+ * interrupted context.
  */
-static __always_inline void irqentry_exit_to_kernel_mode(struct pt_regs *regs,
-							 irqentry_state_t state)
+static inline void irqentry_exit_to_kernel_mode_preempt(struct pt_regs *regs,
+							irqentry_state_t state)
 {
-	lockdep_assert_irqs_disabled();
+	if (regs_irqs_disabled(regs) || state.exit_rcu)
+		return;
+
+	if (IS_ENABLED(CONFIG_PREEMPTION))
+		irqentry_exit_cond_resched();
+}
 
+/**
+ * irqentry_exit_to_kernel_mode_after_preempt - Establish trace, lockdep and RCU state
+ * @regs:	Pointer to current's pt_regs
+ * @state:	Return value from matching call to irqentry_enter_from_kernel_mode()
+ *
+ * This is to be invoked after irqentry_exit_to_kernel_mode_preempt() and before
+ * actually returning to the interrupted context.
+ *
+ * There are no requirements for the CPU state other than being able to complete
+ * the tracing, lockdep and RCU state transitions. After this function returns
+ * the caller must return directly to the interrupted context.
+ */
+static __always_inline void
+irqentry_exit_to_kernel_mode_after_preempt(struct pt_regs *regs, irqentry_state_t state)
+{
 	if (!regs_irqs_disabled(regs)) {
 		/*
 		 * If RCU was not watching on entry this needs to be done
@@ -474,9 +496,6 @@ static __always_inline void irqentry_exit_to_kernel_mode(struct pt_regs *regs,
 		}
 
 		instrumentation_begin();
-		if (IS_ENABLED(CONFIG_PREEMPTION))
-			irqentry_exit_cond_resched();
-
 		/* Covers both tracing and lockdep */
 		trace_hardirqs_on();
 		instrumentation_end();
@@ -490,6 +509,32 @@ static __always_inline void irqentry_exit_to_kernel_mode(struct pt_regs *regs,
 	}
 }
 
+/**
+ * irqentry_exit_to_kernel_mode - Run preempt checks and establish state after
+ *				  invoking the interrupt handler
+ * @regs:	Pointer to current's pt_regs
+ * @state:	Return value from matching call to irqentry_enter_from_kernel_mode()
+ *
+ * This is the counterpart of irqentry_enter_from_kernel_mode() and combines
+ * the calls to irqentry_exit_to_kernel_mode_preempt() and
+ * irqentry_exit_to_kernel_mode_after_preempt().
+ *
+ * The requirement for the CPU state is that it can schedule. After the function
+ * returns the tracing, lockdep and RCU state transitions are completed and the
+ * caller must return directly to the interrupted context.
+ */
+static __always_inline void irqentry_exit_to_kernel_mode(struct pt_regs *regs,
+							 irqentry_state_t state)
+{
+	lockdep_assert_irqs_disabled();
+
+	instrumentation_begin();
+	irqentry_exit_to_kernel_mode_preempt(regs, state);
+	instrumentation_end();
+
+	irqentry_exit_to_kernel_mode_after_preempt(regs, state);
+}
+
 /**
  * irqentry_enter - Handle state tracking on ordinary interrupt entries
  * @regs:	Pointer to pt_regs of interrupted context
-- 
cgit v1.2.3


From d33e89d12295087afd108a42f5e240ff53820461 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 7 Apr 2026 16:09:21 +0200
Subject: thermal: core: Suspend thermal zones later and resume them earlier

To avoid some undesirable interactions between thermal zone suspend
and resume with user space that is running when those operations are
carried out, move them closer to the suspend and resume of devices,
respectively, by updating dpm_prepare() to carry out thermal zone
suspend and dpm_complete() to start thermal zone resume (that will
continue asynchronously).

This also makes the code easier to follow by removing one, arguably
redundant, level of indirection represented by the thermal PM notifier.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/2036875.PYKUYFuaPT@rafael.j.wysocki
---
 drivers/base/power/main.c      |  5 ++++
 drivers/thermal/thermal_core.c | 60 +++++++++++++-----------------------------
 include/linux/thermal.h        |  6 +++++
 3 files changed, 29 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 189de5250f25..e1b550664bab 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -33,6 +33,7 @@
 #include <trace/events/power.h>
 #include <linux/cpufreq.h>
 #include <linux/devfreq.h>
+#include <linux/thermal.h>
 #include <linux/timer.h>
 #include <linux/nmi.h>
 
@@ -1282,6 +1283,8 @@ void dpm_complete(pm_message_t state)
 	list_splice(&list, &dpm_list);
 	mutex_unlock(&dpm_list_mtx);
 
+	/* Start resuming thermal control */
+	thermal_pm_complete();
 	/* Allow device probing and trigger re-probing of deferred devices */
 	device_unblock_probing();
 	trace_suspend_resume(TPS("dpm_complete"), state.event, false);
@@ -2225,6 +2228,8 @@ int dpm_prepare(pm_message_t state)
 	 * instead. The normal behavior will be restored in dpm_complete().
 	 */
 	device_block_probing();
+	/* Suspend thermal control. */
+	thermal_pm_prepare();
 
 	mutex_lock(&dpm_list_mtx);
 	while (!list_empty(&dpm_list) && !error) {
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index f0f3a628c00c..2f4e2dc46b8f 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1823,7 +1823,7 @@ static void thermal_zone_pm_prepare(struct thermal_zone_device *tz)
 	cancel_delayed_work(&tz->poll_queue);
 }
 
-static void thermal_pm_notify_prepare(void)
+static void __thermal_pm_prepare(void)
 {
 	struct thermal_zone_device *tz;
 
@@ -1835,6 +1835,19 @@ static void thermal_pm_notify_prepare(void)
 		thermal_zone_pm_prepare(tz);
 }
 
+void thermal_pm_prepare(void)
+{
+	if (thermal_class_unavailable)
+		return;
+
+	__thermal_pm_prepare();
+	/*
+	 * Allow any leftover thermal work items already on the worqueue to
+	 * complete so they don't get in the way later.
+	 */
+	flush_workqueue(thermal_wq);
+}
+
 static void thermal_zone_pm_complete(struct thermal_zone_device *tz)
 {
 	guard(thermal_zone)(tz);
@@ -1851,10 +1864,13 @@ static void thermal_zone_pm_complete(struct thermal_zone_device *tz)
 	mod_delayed_work(thermal_wq, &tz->poll_queue, 0);
 }
 
-static void thermal_pm_notify_complete(void)
+void thermal_pm_complete(void)
 {
 	struct thermal_zone_device *tz;
 
+	if (thermal_class_unavailable)
+		return;
+
 	guard(mutex)(&thermal_list_lock);
 
 	thermal_pm_suspended = false;
@@ -1863,41 +1879,6 @@ static void thermal_pm_notify_complete(void)
 		thermal_zone_pm_complete(tz);
 }
 
-static int thermal_pm_notify(struct notifier_block *nb,
-			     unsigned long mode, void *_unused)
-{
-	switch (mode) {
-	case PM_HIBERNATION_PREPARE:
-	case PM_RESTORE_PREPARE:
-	case PM_SUSPEND_PREPARE:
-		thermal_pm_notify_prepare();
-		/*
-		 * Allow any leftover thermal work items already on the
-		 * worqueue to complete so they don't get in the way later.
-		 */
-		flush_workqueue(thermal_wq);
-		break;
-	case PM_POST_HIBERNATION:
-	case PM_POST_RESTORE:
-	case PM_POST_SUSPEND:
-		thermal_pm_notify_complete();
-		break;
-	default:
-		break;
-	}
-	return 0;
-}
-
-static struct notifier_block thermal_pm_nb = {
-	.notifier_call = thermal_pm_notify,
-	/*
-	 * Run at the lowest priority to avoid interference between the thermal
-	 * zone resume work items spawned by thermal_pm_notify() and the other
-	 * PM notifiers.
-	 */
-	.priority = INT_MIN,
-};
-
 static int __init thermal_init(void)
 {
 	int result;
@@ -1924,11 +1905,6 @@ static int __init thermal_init(void)
 
 	thermal_class_unavailable = false;
 
-	result = register_pm_notifier(&thermal_pm_nb);
-	if (result)
-		pr_warn("Thermal: Can not register suspend notifier, return %d\n",
-			result);
-
 	return 0;
 
 unregister_governors:
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 0b5ed6821080..0ddc77aeeca2 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -273,6 +273,9 @@ bool thermal_trip_is_bound_to_cdev(struct thermal_zone_device *tz,
 int thermal_zone_device_enable(struct thermal_zone_device *tz);
 int thermal_zone_device_disable(struct thermal_zone_device *tz);
 void thermal_zone_device_critical(struct thermal_zone_device *tz);
+
+void thermal_pm_prepare(void);
+void thermal_pm_complete(void);
 #else
 static inline struct thermal_zone_device *thermal_zone_device_register_with_trips(
 					const char *type,
@@ -350,6 +353,9 @@ static inline int thermal_zone_device_enable(struct thermal_zone_device *tz)
 
 static inline int thermal_zone_device_disable(struct thermal_zone_device *tz)
 { return -ENODEV; }
+
+static inline void thermal_pm_prepare(void) {}
+static inline void thermal_pm_complete(void) {}
 #endif /* CONFIG_THERMAL */
 
 #endif /* __THERMAL_H__ */
-- 
cgit v1.2.3


From 408df6213f56f467675dc0ecf156a8bd1984555e Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 6 Apr 2026 21:36:48 -0700
Subject: dma-fence: correct kernel-doc function parameter @flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

'make htmldocs' complains that dma_fence_unlock_irqrestore() is missing
a description of its @flags parameter. The description is there but it is
missing a ':' sign. Add that and correct the possessive form of "its".

WARNING: ../include/linux/dma-fence.h:414 function parameter 'flags' not described in 'dma_fence_unlock_irqrestore'

Fixes: 3e5067931b5d ("dma-buf: abstract fence locking v2")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20260407043649.2015894-1-rdunlap@infradead.org
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/linux/dma-fence.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 3dc93f068bf6..b52ab692b22e 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -408,9 +408,9 @@ static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence)
 /**
  * dma_fence_unlock_irqrestore - unlock the fence and irqrestore
  * @fence: the fence to unlock
- * @flags the CPU flags to restore
+ * @flags: the CPU flags to restore
  *
- * Unlock the fence, allowing it to change it's state to signaled again.
+ * Unlock the fence, allowing it to change its state to signaled again.
  */
 #define dma_fence_unlock_irqrestore(fence, flags)	\
 	spin_unlock_irqrestore(dma_fence_spinlock(fence), flags)
-- 
cgit v1.2.3


From 732af3aa6337fd56025c0548a9e54d6231052144 Mon Sep 17 00:00:00 2001
From: Viacheslav Dubeyko <slava@dubeyko.com>
Date: Fri, 3 Apr 2026 16:05:55 -0700
Subject: hfsplus: rework logic of map nodes creation in xattr b-tree

In hfsplus_init_header_node() when node_count > 63488
(header bitmap capacity), the code calculates map_nodes,
subtracts them from free_nodes, and marks their positions
used in the bitmap. However, it doesn't write the actual
map node structure (type, record offsets, bitmap) for
those physical positions, only node 0 is written.

This patch reworks hfsplus_create_attributes_file()
logic by introducing a specialized method of
hfsplus_init_map_node() and writing the allocated
map b-tree's nodes by means of
hfsplus_write_attributes_file_node() method.

cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
cc: Yangtao Li <frank.li@vivo.com>
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Viacheslav Dubeyko <slava@dubeyko.com>
Link: https://lore.kernel.org/r/20260403230556.614171-5-slava@dubeyko.com
Signed-off-by: Viacheslav Dubeyko <slava@dubeyko.com>
---
 fs/hfsplus/xattr.c         | 127 +++++++++++++++++++++++++++++++++++++--------
 include/linux/hfs_common.h |   2 +
 2 files changed, 106 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index 89e2e7e46e96..75cb74466738 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -50,7 +50,7 @@ static bool is_known_namespace(const char *name)
 	return true;
 }
 
-static void hfsplus_init_header_node(struct inode *attr_file,
+static u32 hfsplus_init_header_node(struct inode *attr_file,
 					u32 clump_size,
 					char *buf, u16 node_size)
 {
@@ -59,6 +59,7 @@ static void hfsplus_init_header_node(struct inode *attr_file,
 	u16 offset;
 	__be16 *rec_offsets;
 	u32 hdr_node_map_rec_bits;
+	u32 map_nodes = 0;
 	char *bmp;
 	u32 used_nodes;
 	u32 used_bmp_bytes;
@@ -93,7 +94,6 @@ static void hfsplus_init_header_node(struct inode *attr_file,
 	hdr_node_map_rec_bits = 8 * (node_size - offset - (4 * sizeof(u16)));
 	if (be32_to_cpu(head->node_count) > hdr_node_map_rec_bits) {
 		u32 map_node_bits;
-		u32 map_nodes;
 
 		desc->next = cpu_to_be32(be32_to_cpu(head->leaf_tail) + 1);
 		map_node_bits = 8 * (node_size - sizeof(struct hfs_bnode_desc) -
@@ -116,21 +116,100 @@ static void hfsplus_init_header_node(struct inode *attr_file,
 	*bmp = ~(0xFF >> used_nodes);
 	offset += hdr_node_map_rec_bits / 8;
 	*--rec_offsets = cpu_to_be16(offset);
+
+	return map_nodes;
+}
+
+/*
+ * Initialize a map node buffer. Map nodes have a single bitmap record.
+ */
+static void hfsplus_init_map_node(u8 *buf, u16 node_size, u32 next_node)
+{
+	struct hfs_bnode_desc *desc;
+	__be16 *rec_offsets;
+	size_t rec_size = sizeof(__be16);
+	u16 offset;
+
+	memset(buf, 0, node_size);
+
+	desc = (struct hfs_bnode_desc *)buf;
+	desc->type = HFS_NODE_MAP;
+	desc->num_recs = cpu_to_be16(1);
+	desc->next = cpu_to_be32(next_node);
+
+	/*
+	 * A map node consists of the node descriptor and a single
+	 * map record. The map record is a continuation of the map
+	 * record contained in the header node. The size of the map
+	 * record is the size of the node, minus the size of
+	 * the node descriptor (14 bytes), minus the size of two offsets
+	 * (4 bytes), minus two bytes of free space. That is, the size of
+	 * the map record is the size of the node minus 20 bytes;
+	 * this keeps the length of the map record an even multiple of 4 bytes.
+	 * The start of the map record is not aligned to a 4-byte boundary:
+	 * it starts immediately after the node descriptor
+	 * (at an offset of 14 bytes).
+	 *
+	 * Two record offsets stored at the end of the node:
+	 *   record[1] = start of record 0 -> sizeof(hfs_bnode_desc)
+	 *   record[2] = start of free space
+	 */
+	rec_offsets = (__be16 *)(buf + node_size);
+
+	/* record #1 */
+	offset = sizeof(struct hfs_bnode_desc);
+	*--rec_offsets = cpu_to_be16(offset);
+
+	/* record #2 */
+	offset = node_size;
+	offset -= (u16)HFSPLUS_BTREE_MAP_NODE_RECS_COUNT * rec_size;
+	offset -= HFSPLUS_BTREE_MAP_NODE_RESERVED_BYTES;
+	*--rec_offsets = cpu_to_be16(offset);
+}
+
+static inline
+int hfsplus_write_attributes_file_node(struct inode *attr_file, char *buf,
+					u16 node_size, int *index)
+{
+	struct address_space *mapping;
+	struct page *page;
+	void *kaddr;
+	u32 written = 0;
+
+	mapping = attr_file->i_mapping;
+
+	for (; written < node_size; (*index)++, written += PAGE_SIZE) {
+		page = read_mapping_page(mapping, *index, NULL);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		kaddr = kmap_local_page(page);
+		memcpy(kaddr, buf + written,
+			min_t(size_t, PAGE_SIZE, node_size - written));
+		kunmap_local(kaddr);
+
+		set_page_dirty(page);
+		put_page(page);
+	}
+
+	return 0;
 }
 
 static int hfsplus_create_attributes_file(struct super_block *sb)
 {
-	int err = 0;
 	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
 	struct inode *attr_file;
 	struct hfsplus_inode_info *hip;
+	struct hfs_bnode_desc *desc;
 	u32 clump_size;
 	u16 node_size = HFSPLUS_ATTR_TREE_NODE_SIZE;
+	u32 next_node;
+	u32 map_node_idx;
+	u32 map_nodes;
 	char *buf;
-	int index, written;
-	struct address_space *mapping;
-	struct page *page;
+	int index;
 	int old_state = HFSPLUS_EMPTY_ATTR_TREE;
+	int err = 0;
 
 	hfs_dbg("ino %d\n", HFSPLUS_ATTR_CNID);
 
@@ -195,7 +274,7 @@ check_attr_tree_state_again:
 	}
 
 	while (hip->alloc_blocks < hip->clump_blocks) {
-		err = hfsplus_file_extend(attr_file, false);
+		err = hfsplus_file_extend(attr_file, true);
 		if (unlikely(err)) {
 			pr_err("failed to extend attributes file\n");
 			goto end_attr_file_creation;
@@ -212,28 +291,30 @@ check_attr_tree_state_again:
 		goto end_attr_file_creation;
 	}
 
-	hfsplus_init_header_node(attr_file, clump_size, buf, node_size);
+	map_nodes = hfsplus_init_header_node(attr_file, clump_size, buf, node_size);
 
-	mapping = attr_file->i_mapping;
+	desc = (struct hfs_bnode_desc *)buf;
+	next_node = be32_to_cpu(desc->next);
 
 	index = 0;
-	written = 0;
-	for (; written < node_size; index++, written += PAGE_SIZE) {
-		void *kaddr;
 
-		page = read_mapping_page(mapping, index, NULL);
-		if (IS_ERR(page)) {
-			err = PTR_ERR(page);
-			goto failed_header_node_init;
-		}
+	err = hfsplus_write_attributes_file_node(attr_file, buf,
+						 node_size, &index);
+	if (unlikely(err))
+		goto failed_header_node_init;
 
-		kaddr = kmap_atomic(page);
-		memcpy(kaddr, buf + written,
-			min_t(size_t, PAGE_SIZE, node_size - written));
-		kunmap_atomic(kaddr);
+	for (map_node_idx = 0; map_node_idx < map_nodes; map_node_idx++) {
+		if (next_node >= map_nodes)
+			next_node = 0;
 
-		set_page_dirty(page);
-		put_page(page);
+		hfsplus_init_map_node(buf, node_size, next_node);
+
+		err = hfsplus_write_attributes_file_node(attr_file, buf,
+							 node_size, &index);
+		if (unlikely(err))
+			goto failed_header_node_init;
+
+		next_node++;
 	}
 
 	hfsplus_mark_inode_dirty(HFSPLUS_ATTR_TREE_I(sb), HFSPLUS_I_ATTR_DIRTY);
diff --git a/include/linux/hfs_common.h b/include/linux/hfs_common.h
index 9e71b9a03b60..07dfc39630ab 100644
--- a/include/linux/hfs_common.h
+++ b/include/linux/hfs_common.h
@@ -518,6 +518,8 @@ struct hfs_btree_header_rec {
 #define HFSPLUS_BTREE_HDR_MAP_REC_INDEX		2	/* Map (bitmap) record in Header node */
 #define HFSPLUS_BTREE_MAP_NODE_REC_INDEX	0	/* Map record in Map Node */
 #define HFSPLUS_BTREE_HDR_USER_BYTES		128
+#define HFSPLUS_BTREE_MAP_NODE_RECS_COUNT	2
+#define HFSPLUS_BTREE_MAP_NODE_RESERVED_BYTES	2
 
 /* btree key type */
 #define HFSPLUS_KEY_CASEFOLDING		0xCF	/* case-insensitive */
-- 
cgit v1.2.3


From fbb98834a9221de850a3b1afd78a25473685f9b5 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Wed, 8 Apr 2026 04:13:53 +0200
Subject: bpf: Extract bpf_get_linfo_file_line

Extract bpf_get_linfo_file_line as its own function so that the logic to
obtain the file, line, and line number for a given program can be shared
in subsequent patches.

Reviewed-by: Puranjay Mohan <puranjay@kernel.org>
Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260408021359.3786905-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h |  2 ++
 kernel/bpf/core.c   | 29 +++++++++++++++++++++--------
 2 files changed, 23 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 30d35d5fe40b..d8fb9d61f5ce 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3945,6 +3945,8 @@ static inline bool bpf_is_subprog(const struct bpf_prog *prog)
 	return prog->aux->func_idx != 0;
 }
 
+void bpf_get_linfo_file_line(struct btf *btf, const struct bpf_line_info *linfo,
+			     const char **filep, const char **linep, int *nump);
 int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep,
 			   const char **linep, int *nump);
 struct bpf_prog *bpf_prog_find_from_stack(void);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 89b89f55415c..ada76f997177 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -3315,6 +3315,26 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx);
 
 #ifdef CONFIG_BPF_SYSCALL
 
+void bpf_get_linfo_file_line(struct btf *btf, const struct bpf_line_info *linfo,
+			     const char **filep, const char **linep, int *nump)
+{
+	/* Get base component of the file path. */
+	if (filep) {
+		*filep = btf_name_by_offset(btf, linfo->file_name_off);
+		*filep = kbasename(*filep);
+	}
+
+	/* Obtain the source line, and strip whitespace in prefix. */
+	if (linep) {
+		*linep = btf_name_by_offset(btf, linfo->line_off);
+		while (isspace(**linep))
+			*linep += 1;
+	}
+
+	if (nump)
+		*nump = BPF_LINE_INFO_LINE_NUM(linfo->line_col);
+}
+
 int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep,
 			   const char **linep, int *nump)
 {
@@ -3349,14 +3369,7 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char *
 	if (idx == -1)
 		return -ENOENT;
 
-	/* Get base component of the file path. */
-	*filep = btf_name_by_offset(btf, linfo[idx].file_name_off);
-	*filep = kbasename(*filep);
-	/* Obtain the source line, and strip whitespace in prefix. */
-	*linep = btf_name_by_offset(btf, linfo[idx].line_off);
-	while (isspace(**linep))
-		*linep += 1;
-	*nump = BPF_LINE_INFO_LINE_NUM(linfo[idx].line_col);
+	bpf_get_linfo_file_line(btf, &linfo[idx], filep, linep, nump);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 4f64d5b66418b7f5967b7f7614d6107bb1fba705 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Wed, 8 Apr 2026 04:13:54 +0200
Subject: bpf: Make find_linfo widely available

Move find_linfo() as bpf_find_linfo() into core.c to allow for its use
in the verifier in subsequent patches.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
Link: https://lore.kernel.org/r/20260408021359.3786905-4-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h |  1 +
 kernel/bpf/core.c   | 37 +++++++++++++++++++++++++++++++++++++
 kernel/bpf/log.c    | 43 +------------------------------------------
 3 files changed, 39 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d8fb9d61f5ce..0136a108d083 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3945,6 +3945,7 @@ static inline bool bpf_is_subprog(const struct bpf_prog *prog)
 	return prog->aux->func_idx != 0;
 }
 
+const struct bpf_line_info *bpf_find_linfo(const struct bpf_prog *prog, u32 insn_off);
 void bpf_get_linfo_file_line(struct btf *btf, const struct bpf_line_info *linfo,
 			     const char **filep, const char **linep, int *nump);
 int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ada76f997177..066b86e7233c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -3335,6 +3335,43 @@ void bpf_get_linfo_file_line(struct btf *btf, const struct bpf_line_info *linfo,
 		*nump = BPF_LINE_INFO_LINE_NUM(linfo->line_col);
 }
 
+const struct bpf_line_info *bpf_find_linfo(const struct bpf_prog *prog, u32 insn_off)
+{
+	const struct bpf_line_info *linfo;
+	u32 nr_linfo;
+	int l, r, m;
+
+	nr_linfo = prog->aux->nr_linfo;
+	if (!nr_linfo || insn_off >= prog->len)
+		return NULL;
+
+	linfo = prog->aux->linfo;
+	/* Loop invariant: linfo[l].insn_off <= insns_off.
+	 * linfo[0].insn_off == 0 which always satisfies above condition.
+	 * Binary search is searching for rightmost linfo entry that satisfies
+	 * the above invariant, giving us the desired record that covers given
+	 * instruction offset.
+	 */
+	l = 0;
+	r = nr_linfo - 1;
+	while (l < r) {
+		/* (r - l + 1) / 2 means we break a tie to the right, so if:
+		 * l=1, r=2, linfo[l].insn_off <= insn_off, linfo[r].insn_off > insn_off,
+		 * then m=2, we see that linfo[m].insn_off > insn_off, and so
+		 * r becomes 1 and we exit the loop with correct l==1.
+		 * If the tie was broken to the left, m=1 would end us up in
+		 * an endless loop where l and m stay at 1 and r stays at 2.
+		 */
+		m = l + (r - l + 1) / 2;
+		if (linfo[m].insn_off <= insn_off)
+			l = m;
+		else
+			r = m - 1;
+	}
+
+	return &linfo[l];
+}
+
 int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep,
 			   const char **linep, int *nump)
 {
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 37d72b052192..6fd030fd6eeb 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -329,47 +329,6 @@ __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
 }
 EXPORT_SYMBOL_GPL(bpf_log);
 
-static const struct bpf_line_info *
-find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
-{
-	const struct bpf_line_info *linfo;
-	const struct bpf_prog *prog;
-	u32 nr_linfo;
-	int l, r, m;
-
-	prog = env->prog;
-	nr_linfo = prog->aux->nr_linfo;
-
-	if (!nr_linfo || insn_off >= prog->len)
-		return NULL;
-
-	linfo = prog->aux->linfo;
-	/* Loop invariant: linfo[l].insn_off <= insns_off.
-	 * linfo[0].insn_off == 0 which always satisfies above condition.
-	 * Binary search is searching for rightmost linfo entry that satisfies
-	 * the above invariant, giving us the desired record that covers given
-	 * instruction offset.
-	 */
-	l = 0;
-	r = nr_linfo - 1;
-	while (l < r) {
-		/* (r - l + 1) / 2 means we break a tie to the right, so if:
-		 * l=1, r=2, linfo[l].insn_off <= insn_off, linfo[r].insn_off > insn_off,
-		 * then m=2, we see that linfo[m].insn_off > insn_off, and so
-		 * r becomes 1 and we exit the loop with correct l==1.
-		 * If the tie was broken to the left, m=1 would end us up in
-		 * an endless loop where l and m stay at 1 and r stays at 2.
-		 */
-		m = l + (r - l + 1) / 2;
-		if (linfo[m].insn_off <= insn_off)
-			l = m;
-		else
-			r = m - 1;
-	}
-
-	return &linfo[l];
-}
-
 static const char *ltrim(const char *s)
 {
 	while (isspace(*s))
@@ -390,7 +349,7 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
 		return;
 
 	prev_linfo = env->prev_linfo;
-	linfo = find_linfo(env, insn_off);
+	linfo = bpf_find_linfo(env->prog, insn_off);
 	if (!linfo || linfo == prev_linfo)
 		return;
 
-- 
cgit v1.2.3


From b773b9935239e9bec86b96ce91b6ba2252c20b44 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 7 Apr 2026 00:21:55 +0300
Subject: net: dsa: remove struct platform_data

This is not used anywhere in the kernel.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20260406212158.721806-2-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/dsa/dsa.rst |  5 -----
 include/linux/platform_data/dsa.h    | 17 -----------------
 2 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index 5c79740a533b..fd3c254ced1d 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -383,11 +383,6 @@ DSA data structures are defined in ``include/net/dsa.h`` as well as
   well as various properties of its ports: names/labels, and finally a routing
   table indication (when cascading switches)
 
-- ``dsa_platform_data``: platform device configuration data which can reference
-  a collection of dsa_chip_data structures if multiple switches are cascaded,
-  the conduit network device this switch tree is attached to needs to be
-  referenced
-
 - ``dsa_switch_tree``: structure assigned to the conduit network device under
   ``dsa_ptr``, this structure references a dsa_platform_data structure as well as
   the tagging protocol supported by the switch tree, and which receive/transmit
diff --git a/include/linux/platform_data/dsa.h b/include/linux/platform_data/dsa.h
index d4d9bf2060a6..fec1ae5bddb9 100644
--- a/include/linux/platform_data/dsa.h
+++ b/include/linux/platform_data/dsa.h
@@ -48,21 +48,4 @@ struct dsa_chip_data {
 	s8		rtable[DSA_MAX_SWITCHES];
 };
 
-struct dsa_platform_data {
-	/*
-	 * Reference to a Linux network interface that connects
-	 * to the root switch chip of the tree.
-	 */
-	struct device	*netdev;
-	struct net_device *of_netdev;
-
-	/*
-	 * Info structs describing each of the switch chips
-	 * connected via this network interface.
-	 */
-	int		nr_chips;
-	struct dsa_chip_data	*chip;
-};
-
-
 #endif /* __DSA_PDATA_H */
-- 
cgit v1.2.3


From dc915f375e545cf72421d66ede983d88e298228f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 7 Apr 2026 00:21:56 +0300
Subject: net: dsa: clean up struct dsa_chip_data

This has accumulated some fields which are no longer parsed by the core
or set by any driver. Remove them.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20260406212158.721806-3-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/platform_data/dsa.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/dsa.h b/include/linux/platform_data/dsa.h
index fec1ae5bddb9..031f4cf83ae2 100644
--- a/include/linux/platform_data/dsa.h
+++ b/include/linux/platform_data/dsa.h
@@ -10,12 +10,6 @@ struct net_device;
 #define DSA_RTABLE_NONE		-1
 
 struct dsa_chip_data {
-	/*
-	 * How to access the switch configuration registers.
-	 */
-	struct device	*host_dev;
-	int		sw_addr;
-
 	/*
 	 * Reference to network devices
 	 */
@@ -24,12 +18,6 @@ struct dsa_chip_data {
 	/* set to size of eeprom if supported by the switch */
 	int		eeprom_len;
 
-	/* Device tree node pointer for this specific switch chip
-	 * used during switch setup in case additional properties
-	 * and resources needs to be used
-	 */
-	struct device_node *of_node;
-
 	/*
 	 * The names of the switch's ports.  Use "cpu" to
 	 * designate the switch port that the cpu is connected to,
@@ -38,14 +26,6 @@ struct dsa_chip_data {
 	 * or any other string to indicate this is a physical port.
 	 */
 	char		*port_names[DSA_MAX_PORTS];
-	struct device_node *port_dn[DSA_MAX_PORTS];
-
-	/*
-	 * An array of which element [a] indicates which port on this
-	 * switch should be used to send packets to that are destined
-	 * for switch a. Can be NULL if there is only one switch chip.
-	 */
-	s8		rtable[DSA_MAX_SWITCHES];
 };
 
 #endif /* __DSA_PDATA_H */
-- 
cgit v1.2.3


From c3b09190e658d3f1c3cd595df3a931962662f8f0 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 7 Apr 2026 00:21:57 +0300
Subject: net: dsa: remove unused platform_data definitions

Pretty self-explanatory, nobody needs these.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20260406212158.721806-4-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/platform_data/dsa.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/dsa.h b/include/linux/platform_data/dsa.h
index 031f4cf83ae2..77424bb24723 100644
--- a/include/linux/platform_data/dsa.h
+++ b/include/linux/platform_data/dsa.h
@@ -3,11 +3,8 @@
 #define __DSA_PDATA_H
 
 struct device;
-struct net_device;
 
-#define DSA_MAX_SWITCHES	4
 #define DSA_MAX_PORTS		12
-#define DSA_RTABLE_NONE		-1
 
 struct dsa_chip_data {
 	/*
-- 
cgit v1.2.3


From da9008674d9658de1e9f45d386ff6627313f39f7 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 7 Apr 2026 00:21:58 +0300
Subject: net: dsa: eliminate <linux/dsa/loop.h>

There is no reason at all to export these data types to the global
include directory.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20260406212158.721806-5-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/dsa_loop.c | 35 ++++++++++++++++++++++++++++++++++-
 include/linux/dsa/loop.h   | 42 ------------------------------------------
 2 files changed, 34 insertions(+), 43 deletions(-)
 delete mode 100644 include/linux/dsa/loop.h

(limited to 'include/linux')

diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index b41254b3ac42..7058faf23592 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -14,13 +14,46 @@
 #include <linux/workqueue.h>
 #include <linux/module.h>
 #include <linux/if_bridge.h>
-#include <linux/dsa/loop.h>
+#include <linux/if_vlan.h>
+#include <linux/types.h>
 #include <net/dsa.h>
 
 #define DSA_LOOP_NUM_PORTS	6
 #define DSA_LOOP_CPU_PORT	(DSA_LOOP_NUM_PORTS - 1)
 #define NUM_FIXED_PHYS		(DSA_LOOP_NUM_PORTS - 2)
 
+struct dsa_loop_vlan {
+	u16 members;
+	u16 untagged;
+};
+
+struct dsa_loop_mib_entry {
+	char name[ETH_GSTRING_LEN];
+	unsigned long val;
+};
+
+enum dsa_loop_mib_counters {
+	DSA_LOOP_PHY_READ_OK,
+	DSA_LOOP_PHY_READ_ERR,
+	DSA_LOOP_PHY_WRITE_OK,
+	DSA_LOOP_PHY_WRITE_ERR,
+	__DSA_LOOP_CNT_MAX,
+};
+
+struct dsa_loop_port {
+	struct dsa_loop_mib_entry mib[__DSA_LOOP_CNT_MAX];
+	u16 pvid;
+	int mtu;
+};
+
+struct dsa_loop_priv {
+	struct mii_bus	*bus;
+	unsigned int	port_base;
+	struct dsa_loop_vlan vlans[VLAN_N_VID];
+	struct net_device *netdev;
+	struct dsa_loop_port ports[DSA_MAX_PORTS];
+};
+
 struct dsa_loop_pdata {
 	/* Must be first, such that dsa_register_switch() can access this
 	 * without gory pointer manipulations
diff --git a/include/linux/dsa/loop.h b/include/linux/dsa/loop.h
deleted file mode 100644
index b8fef35591aa..000000000000
--- a/include/linux/dsa/loop.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef DSA_LOOP_H
-#define DSA_LOOP_H
-
-#include <linux/if_vlan.h>
-#include <linux/types.h>
-#include <linux/ethtool.h>
-#include <net/dsa.h>
-
-struct dsa_loop_vlan {
-	u16 members;
-	u16 untagged;
-};
-
-struct dsa_loop_mib_entry {
-	char name[ETH_GSTRING_LEN];
-	unsigned long val;
-};
-
-enum dsa_loop_mib_counters {
-	DSA_LOOP_PHY_READ_OK,
-	DSA_LOOP_PHY_READ_ERR,
-	DSA_LOOP_PHY_WRITE_OK,
-	DSA_LOOP_PHY_WRITE_ERR,
-	__DSA_LOOP_CNT_MAX,
-};
-
-struct dsa_loop_port {
-	struct dsa_loop_mib_entry mib[__DSA_LOOP_CNT_MAX];
-	u16 pvid;
-	int mtu;
-};
-
-struct dsa_loop_priv {
-	struct mii_bus	*bus;
-	unsigned int	port_base;
-	struct dsa_loop_vlan vlans[VLAN_N_VID];
-	struct net_device *netdev;
-	struct dsa_loop_port ports[DSA_MAX_PORTS];
-};
-
-#endif /* DSA_LOOP_H */
-- 
cgit v1.2.3


From f9e3bd43d55f24331e5ea65f667dbb33716e7d6b Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Fri, 3 Apr 2026 12:00:27 +0300
Subject: net/mlx5: Rename MLX5_PF page counter type to MLX5_SELF

The MLX5_PF enum value in mlx5_func_type is used to track firmware
page allocations for the page manager function itself, which is either
the ECPF on SmartNIC systems or the host PF when there is no ECPF.

Rename it to MLX5_SELF to accurately reflect that this counter tracks
pages allocated by the manager for its own use, regardless of whether
it is a PF or ECPF.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260403090028.137783-2-tariqt@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 3 ++-
 include/linux/mlx5/driver.h                         | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 5ccb3ce98acb..77ffa31cc505 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -77,7 +77,8 @@ static u32 get_function(u16 func_id, bool ec_function)
 static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_function)
 {
 	if (!func_id)
-		return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF;
+		return mlx5_core_is_ecpf(dev) && !ec_function ?
+			MLX5_HOST_PF : MLX5_SELF;
 
 	if (func_id <= max(mlx5_core_max_vfs(dev), mlx5_core_max_ec_vfs(dev))) {
 		if (ec_function)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b8b5af78284d..10bc913591d5 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -550,7 +550,7 @@ struct mlx5_debugfs_entries {
 };
 
 enum mlx5_func_type {
-	MLX5_PF,
+	MLX5_SELF,
 	MLX5_VF,
 	MLX5_SF,
 	MLX5_HOST_PF,
-- 
cgit v1.2.3


From a1bac8b70ede332a05487081c7512d2947f3a912 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Fri, 3 Apr 2026 12:00:28 +0300
Subject: net/mlx5: Add icm_mng_function_id_mode cap bit

Introduce the capability bit icm_mng_function_id_mode to indicate that
the device firmware uses vhca_id instead of function_id as the effective
identifier for the firmware commands MANAGE_PAGES, QUERY_PAGES, and page
request event.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Akiva Goldberger <agoldberger@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260403090028.137783-3-tariqt@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 2400b4c38c77..007f5138db2b 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1654,6 +1654,11 @@ enum {
 	MLX5_STEERING_FORMAT_CONNECTX_8   = 3,
 };
 
+enum {
+	MLX5_ID_MODE_FUNCTION_INDEX   = 0,
+	MLX5_ID_MODE_FUNCTION_VHCA_ID = 1,
+};
+
 struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_0[0x6];
 	u8         page_request_disable[0x1];
@@ -1916,7 +1921,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_280[0x10];
 	u8         max_wqe_sz_sq[0x10];
 
-	u8         reserved_at_2a0[0x7];
+	u8         icm_mng_function_id_mode[0x1];
+	u8         reserved_at_2a1[0x6];
 	u8         mkey_pcie_tph[0x1];
 	u8         reserved_at_2a8[0x1];
 	u8         tis_tir_td_order[0x1];
-- 
cgit v1.2.3


From 5267f6ef49cb5fba426f2d286817b1355fde31da Mon Sep 17 00:00:00 2001
From: Li Chen <me@linux.beauty>
Date: Fri, 6 Mar 2026 16:56:39 +0800
Subject: jbd2: add jinode dirty range accessors

Provide a helper to fetch jinode dirty ranges in bytes. This lets
filesystem callbacks avoid depending on the internal representation,
preparing for a later conversion to page units.

Suggested-by: Andreas Dilger <adilger@dilger.ca>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Li Chen <me@linux.beauty>
Link: https://patch.msgid.link/20260306085643.465275-2-me@linux.beauty
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/jbd2.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index a53a00d36228..64392baf5f4b 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -445,6 +445,20 @@ struct jbd2_inode {
 	loff_t i_dirty_end;
 };
 
+static inline bool jbd2_jinode_get_dirty_range(const struct jbd2_inode *jinode,
+					       loff_t *start, loff_t *end)
+{
+	loff_t start_byte = jinode->i_dirty_start;
+	loff_t end_byte = jinode->i_dirty_end;
+
+	if (!end_byte)
+		return false;
+
+	*start = start_byte;
+	*end = end_byte;
+	return true;
+}
+
 struct jbd2_revoke_table_s;
 
 /**
-- 
cgit v1.2.3


From 4edafa81a1d6020272d0c6eb68faeb810dd083c1 Mon Sep 17 00:00:00 2001
From: Li Chen <me@linux.beauty>
Date: Fri, 6 Mar 2026 16:56:42 +0800
Subject: jbd2: store jinode dirty range in PAGE_SIZE units

jbd2_inode fields are updated under journal->j_list_lock, but some paths
read them without holding the lock (e.g. fast commit helpers and ordered
truncate helpers).

READ_ONCE() alone is not sufficient for the dirty range fields when they
are stored as loff_t because 32-bit platforms can observe torn loads.
Store the dirty range in PAGE_SIZE units as pgoff_t instead.

Represent the dirty range end as an exclusive end page. This avoids a
special sentinel value and keeps MAX_LFS_FILESIZE on 32-bit representable.

Publish a new dirty range by updating end_page before start_page, and
treat start_page >= end_page as empty in the accessor for robustness.

Use READ_ONCE() on the read side and WRITE_ONCE() on the write side for the
dirty range and i_flags to match the existing lockless access pattern.

Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Li Chen <me@linux.beauty>
Link: https://patch.msgid.link/20260306085643.465275-5-me@linux.beauty
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/commit.c      | 55 +++++++++++++++++++++++++++++++++++++++------------
 fs/jbd2/journal.c     |  5 ++---
 fs/jbd2/transaction.c | 21 +++++++++++++-------
 include/linux/jbd2.h  | 34 ++++++++++++++++++++-----------
 4 files changed, 80 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7203d2d2624d..8cf61e7185c4 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -180,7 +180,13 @@ static int journal_wait_on_commit_record(journal_t *journal,
 /* Send all the data buffers related to an inode */
 int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
 {
-	if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
+	unsigned long flags;
+
+	if (!jinode)
+		return 0;
+
+	flags = READ_ONCE(jinode->i_flags);
+	if (!(flags & JI_WRITE_DATA))
 		return 0;
 
 	trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
@@ -191,12 +197,30 @@ EXPORT_SYMBOL(jbd2_submit_inode_data);
 
 int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
 {
-	if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
-		!jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
+	struct address_space *mapping;
+	struct inode *inode;
+	unsigned long flags;
+	loff_t start_byte, end_byte;
+
+	if (!jinode)
+		return 0;
+
+	flags = READ_ONCE(jinode->i_flags);
+	if (!(flags & JI_WAIT_DATA))
+		return 0;
+
+	inode = jinode->i_vfs_inode;
+	if (!inode)
+		return 0;
+
+	mapping = inode->i_mapping;
+	if (!mapping)
+		return 0;
+
+	if (!jbd2_jinode_get_dirty_range(jinode, &start_byte, &end_byte))
 		return 0;
 	return filemap_fdatawait_range_keep_errors(
-		jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
-		jinode->i_dirty_end);
+		mapping, start_byte, end_byte);
 }
 EXPORT_SYMBOL(jbd2_wait_inode_data);
 
@@ -218,7 +242,8 @@ static int journal_submit_data_buffers(journal_t *journal,
 	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
 		if (!(jinode->i_flags & JI_WRITE_DATA))
 			continue;
-		jinode->i_flags |= JI_COMMIT_RUNNING;
+		WRITE_ONCE(jinode->i_flags,
+			   jinode->i_flags | JI_COMMIT_RUNNING);
 		spin_unlock(&journal->j_list_lock);
 		/* submit the inode data buffers. */
 		trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
@@ -229,7 +254,8 @@ static int journal_submit_data_buffers(journal_t *journal,
 		}
 		spin_lock(&journal->j_list_lock);
 		J_ASSERT(jinode->i_transaction == commit_transaction);
-		jinode->i_flags &= ~JI_COMMIT_RUNNING;
+		WRITE_ONCE(jinode->i_flags,
+			   jinode->i_flags & ~JI_COMMIT_RUNNING);
 		smp_mb();
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
@@ -240,10 +266,13 @@ static int journal_submit_data_buffers(journal_t *journal,
 int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
 {
 	struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
+	loff_t start_byte, end_byte;
+
+	if (!jbd2_jinode_get_dirty_range(jinode, &start_byte, &end_byte))
+		return 0;
 
 	return filemap_fdatawait_range_keep_errors(mapping,
-						   jinode->i_dirty_start,
-						   jinode->i_dirty_end);
+						   start_byte, end_byte);
 }
 
 /*
@@ -262,7 +291,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
 		if (!(jinode->i_flags & JI_WAIT_DATA))
 			continue;
-		jinode->i_flags |= JI_COMMIT_RUNNING;
+		WRITE_ONCE(jinode->i_flags, jinode->i_flags | JI_COMMIT_RUNNING);
 		spin_unlock(&journal->j_list_lock);
 		/* wait for the inode data buffers writeout. */
 		if (journal->j_finish_inode_data_buffers) {
@@ -272,7 +301,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 		}
 		cond_resched();
 		spin_lock(&journal->j_list_lock);
-		jinode->i_flags &= ~JI_COMMIT_RUNNING;
+		WRITE_ONCE(jinode->i_flags, jinode->i_flags & ~JI_COMMIT_RUNNING);
 		smp_mb();
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
@@ -288,8 +317,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 				&jinode->i_transaction->t_inode_list);
 		} else {
 			jinode->i_transaction = NULL;
-			jinode->i_dirty_start = 0;
-			jinode->i_dirty_end = 0;
+			WRITE_ONCE(jinode->i_dirty_start_page, 0);
+			WRITE_ONCE(jinode->i_dirty_end_page, 0);
 		}
 	}
 	spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index cb2c529a8f1b..609c8d965f12 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -3018,8 +3018,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
 	jinode->i_next_transaction = NULL;
 	jinode->i_vfs_inode = inode;
 	jinode->i_flags = 0;
-	jinode->i_dirty_start = 0;
-	jinode->i_dirty_end = 0;
+	jinode->i_dirty_start_page = 0;
+	jinode->i_dirty_end_page = 0;
 	INIT_LIST_HEAD(&jinode->i_list);
 }
 
@@ -3176,4 +3176,3 @@ MODULE_DESCRIPTION("Generic filesystem journal-writing module");
 MODULE_LICENSE("GPL");
 module_init(journal_init);
 module_exit(journal_exit);
-
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 02cb87dc6fa8..495f00129844 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2694,6 +2694,7 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal;
+	pgoff_t start_page, end_page;
 	int err = 0;
 	int abort_transaction = 0;
 
@@ -2704,15 +2705,21 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
 	jbd2_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
 			transaction->t_tid);
 
+	start_page = (pgoff_t)(start_byte >> PAGE_SHIFT);
+	end_page = (pgoff_t)(end_byte >> PAGE_SHIFT) + 1;
+
 	spin_lock(&journal->j_list_lock);
-	jinode->i_flags |= flags;
+	WRITE_ONCE(jinode->i_flags, jinode->i_flags | flags);
 
-	if (jinode->i_dirty_end) {
-		jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
-		jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
+	if (jinode->i_dirty_start_page != jinode->i_dirty_end_page) {
+		WRITE_ONCE(jinode->i_dirty_start_page,
+			   min(jinode->i_dirty_start_page, start_page));
+		WRITE_ONCE(jinode->i_dirty_end_page,
+			   max(jinode->i_dirty_end_page, end_page));
 	} else {
-		jinode->i_dirty_start = start_byte;
-		jinode->i_dirty_end = end_byte;
+		/* Publish a new non-empty range by making end visible first. */
+		WRITE_ONCE(jinode->i_dirty_end_page, end_page);
+		WRITE_ONCE(jinode->i_dirty_start_page, start_page);
 	}
 
 	/* Is inode already attached where we need it? */
@@ -2802,7 +2809,7 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
 	int ret = 0;
 
 	/* This is a quick check to avoid locking if not necessary */
-	if (!jinode->i_transaction)
+	if (!READ_ONCE(jinode->i_transaction))
 		goto out;
 	/* Locks are here just to force reading of recent values, it is
 	 * enough that the transaction was not committing before we started
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 64392baf5f4b..7e785aa6d35d 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -429,33 +429,43 @@ struct jbd2_inode {
 	unsigned long i_flags;
 
 	/**
-	 * @i_dirty_start:
+	 * @i_dirty_start_page:
+	 *
+	 * Dirty range start in PAGE_SIZE units.
+	 *
+	 * The dirty range is empty if @i_dirty_start_page is greater than or
+	 * equal to @i_dirty_end_page.
 	 *
-	 * Offset in bytes where the dirty range for this inode starts.
 	 * [j_list_lock]
 	 */
-	loff_t i_dirty_start;
+	pgoff_t i_dirty_start_page;
 
 	/**
-	 * @i_dirty_end:
+	 * @i_dirty_end_page:
+	 *
+	 * Dirty range end in PAGE_SIZE units (exclusive).
 	 *
-	 * Inclusive offset in bytes where the dirty range for this inode
-	 * ends. [j_list_lock]
+	 * [j_list_lock]
 	 */
-	loff_t i_dirty_end;
+	pgoff_t i_dirty_end_page;
 };
 
+/*
+ * Lockless readers treat start_page >= end_page as an empty range.
+ * Writers publish a new non-empty range by storing i_dirty_end_page before
+ * i_dirty_start_page.
+ */
 static inline bool jbd2_jinode_get_dirty_range(const struct jbd2_inode *jinode,
 					       loff_t *start, loff_t *end)
 {
-	loff_t start_byte = jinode->i_dirty_start;
-	loff_t end_byte = jinode->i_dirty_end;
+	pgoff_t start_page = READ_ONCE(jinode->i_dirty_start_page);
+	pgoff_t end_page = READ_ONCE(jinode->i_dirty_end_page);
 
-	if (!end_byte)
+	if (start_page >= end_page)
 		return false;
 
-	*start = start_byte;
-	*end = end_byte;
+	*start = (loff_t)start_page << PAGE_SHIFT;
+	*end = ((loff_t)end_page << PAGE_SHIFT) - 1;
 	return true;
 }
 
-- 
cgit v1.2.3


From a142d0ae9f2ceb0fc7417e19ecfafc8179282e35 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Wed, 25 Feb 2026 13:39:48 +0100
Subject: memblock: Permit existing reserved regions to be marked RSRV_KERN

Permit existing memblock reservations to be marked as RSRV_KERN. This
will be used by the EFI code on x86 to distinguish between reservations
of boot services data regions that have actual significance to the
kernel and regions that are reserved temporarily to work around buggy
firmware.

Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/memblock.h |  1 +
 mm/memblock.c            | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 6ec5e9ac0699..9eac4f268359 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -155,6 +155,7 @@ int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
 int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size);
+int memblock_reserved_mark_kern(phys_addr_t base, phys_addr_t size);
 int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size);
 int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index b3ddfdec7a80..2505ce8b319c 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1115,6 +1115,21 @@ int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t
 				    MEMBLOCK_RSRV_NOINIT);
 }
 
+/**
+ * memblock_reserved_mark_kern - Mark a reserved memory region with flag
+ * MEMBLOCK_RSRV_KERN
+ *
+ * @base: the base phys addr of the region
+ * @size: the size of the region
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+int __init_memblock memblock_reserved_mark_kern(phys_addr_t base, phys_addr_t size)
+{
+	return memblock_setclr_flag(&memblock.reserved, base, size, 1,
+				    MEMBLOCK_RSRV_KERN);
+}
+
 /**
  * memblock_mark_kho_scratch - Mark a memory region as MEMBLOCK_KHO_SCRATCH.
  * @base: the base phys addr of the region
-- 
cgit v1.2.3


From 592a22338e5acfcd10983699cae8ea02ecd42935 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 7 Apr 2026 17:14:31 +0200
Subject: bitops: Update kernel-doc for sign_extendXX()

The sign_extendXX() lack of Return section and have other style
issues. Address that by updating kernel-doc accordingly.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Yury Norov <ynorov@nvidia.com>
---
 include/linux/bitops.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 1fe46703792f..657eab2725ce 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -179,9 +179,11 @@ static inline __u8 ror8(__u8 word, unsigned int shift)
 /**
  * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit
  * @value: value to sign extend
- * @index: 0 based bit index (0<=index<32) to sign bit
+ * @index: 0 based bit index (0 <= index < 32) to sign bit
  *
  * This is safe to use for 16- and 8-bit types as well.
+ *
+ * Return: 32-bit sign extended value
  */
 static __always_inline __s32 sign_extend32(__u32 value, int index)
 {
@@ -192,7 +194,11 @@ static __always_inline __s32 sign_extend32(__u32 value, int index)
 /**
  * sign_extend64 - sign extend a 64-bit value using specified bit as sign-bit
  * @value: value to sign extend
- * @index: 0 based bit index (0<=index<64) to sign bit
+ * @index: 0 based bit index (0 <= index < 64) to sign bit
+ *
+ * This is safe to use for 32-, 16- and 8-bit types as well.
+ *
+ * Return: 64-bit sign extended value
  */
 static __always_inline __s64 sign_extend64(__u64 value, int index)
 {
-- 
cgit v1.2.3


From d04686d9bc86432ea3008d5f358373d8466d1943 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 3 Apr 2026 01:10:19 +0200
Subject: net: Implement netdev_nl_queue_create_doit

Implement netdev_nl_queue_create_doit which creates a new rx queue in a
virtual netdev and then leases it to a rx queue in a physical netdev.

Example with ynl client:

  # ynl --family netdev --output-json --do queue-create \
        --json '{"ifindex": 8, "type": "rx", "lease": {"ifindex": 4, "queue": {"type": "rx", "id": 15}}}'
  {'id': 1}

Note that the netdevice locking order is always from the virtual to
the physical device.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Co-developed-by: David Wei <dw@davidwei.uk>
Signed-off-by: David Wei <dw@davidwei.uk>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20260402231031.447597-3-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/netdevices.rst |   6 ++
 include/linux/netdevice.h               |   9 +-
 include/net/netdev_queues.h             |  19 +++-
 include/net/netdev_rx_queue.h           |  15 ++-
 net/core/dev.c                          |   8 ++
 net/core/dev.h                          |   5 +
 net/core/netdev-genl.c                  | 164 +++++++++++++++++++++++++++++++-
 net/core/netdev_queues.c                |  62 ++++++++++++
 net/core/netdev_rx_queue.c              |  46 ++++++++-
 9 files changed, 323 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst
index 35704d115312..83e28b96884f 100644
--- a/Documentation/networking/netdevices.rst
+++ b/Documentation/networking/netdevices.rst
@@ -329,6 +329,12 @@ by setting ``request_ops_lock`` to true. Code comments and docs refer
 to drivers which have ops called under the instance lock as "ops locked".
 See also the documentation of the ``lock`` member of struct net_device.
 
+There is also a case of taking two per-netdev locks in sequence when netdev
+queues are leased, that is, the netdev-scope lock is taken for both the
+virtual and the physical device. To prevent deadlocks, the virtual device's
+lock must always be acquired before the physical device's (see
+``netdev_nl_queue_create_doit``).
+
 In the future, there will be an option for individual
 drivers to opt out of using ``rtnl_lock`` and instead perform their control
 operations directly under the netdev instance lock.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e15367373f7c..e8aa9cc4075d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2561,7 +2561,14 @@ struct net_device {
 	 * Also protects some fields in:
 	 *	struct napi_struct, struct netdev_queue, struct netdev_rx_queue
 	 *
-	 * Ordering: take after rtnl_lock.
+	 * Ordering:
+	 *
+	 * - take after rtnl_lock
+	 *
+	 * - for the case of netdev queue leasing, the netdev-scope lock is
+	 *   taken for both the virtual and the physical device; to prevent
+	 *   deadlocks, the virtual device's lock must always be acquired
+	 *   before the physical device's (see netdev_nl_queue_create_doit)
 	 */
 	struct mutex		lock;
 
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index 95ed28212f4e..748b70552ed1 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -150,6 +150,11 @@ enum {
  *			When NIC-wide config is changed the callback will
  *			be invoked for all queues.
  *
+ * @ndo_queue_create:	Create a new RX queue on a virtual device that will
+ *			be paired with a physical device's queue via leasing.
+ *			Return the new queue id on success, negative error
+ *			on failure.
+ *
  * @supported_params:	Bitmask of supported parameters, see QCFG_*.
  *
  * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while
@@ -178,6 +183,8 @@ struct netdev_queue_mgmt_ops {
 				     struct netlink_ext_ack *extack);
 	struct device *	(*ndo_queue_get_dma_dev)(struct net_device *dev,
 						 int idx);
+	int	(*ndo_queue_create)(struct net_device *dev,
+				    struct netlink_ext_ack *extack);
 
 	unsigned int supported_params;
 };
@@ -185,7 +192,7 @@ struct netdev_queue_mgmt_ops {
 void netdev_queue_config(struct net_device *dev, int rxq,
 			 struct netdev_queue_config *qcfg);
 
-bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx);
+bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx);
 
 /**
  * DOC: Lockless queue stopping / waking helpers.
@@ -374,5 +381,11 @@ static inline unsigned int netif_xmit_timeout_ms(struct netdev_queue *txq)
 	})
 
 struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx);
-
-#endif
+bool netdev_can_create_queue(const struct net_device *dev,
+			     struct netlink_ext_ack *extack);
+bool netdev_can_lease_queue(const struct net_device *dev,
+			    struct netlink_ext_ack *extack);
+bool netdev_queue_busy(struct net_device *dev, unsigned int idx,
+		       enum netdev_queue_type type,
+		       struct netlink_ext_ack *extack);
+#endif /* _LINUX_NET_QUEUES_H */
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index 08f81329fc11..1d41c253f0a3 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -31,6 +31,14 @@ struct netdev_rx_queue {
 	struct napi_struct		*napi;
 	struct netdev_queue_config	qcfg;
 	struct pp_memory_provider_params mp_params;
+
+	/* If a queue is leased, then the lease pointer is always
+	 * valid. From the physical device it points to the virtual
+	 * queue, and from the virtual device it points to the
+	 * physical queue.
+	 */
+	struct netdev_rx_queue		*lease;
+	netdevice_tracker		lease_tracker;
 } ____cacheline_aligned_in_smp;
 
 /*
@@ -60,5 +68,8 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
 }
 
 int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
-
-#endif
+void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst,
+			   struct netdev_rx_queue *rxq_src);
+void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst,
+			     struct netdev_rx_queue *rxq_src);
+#endif /* _LINUX_NETDEV_RX_QUEUE_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index 5a31f9d2128c..cc7bcac892af 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1121,6 +1121,14 @@ netdev_get_by_index_lock_ops_compat(struct net *net, int ifindex)
 	return __netdev_put_lock_ops_compat(dev, net);
 }
 
+struct net_device *
+netdev_put_lock(struct net_device *dev, struct net *net,
+		netdevice_tracker *tracker)
+{
+	netdev_tracker_free(dev, tracker);
+	return __netdev_put_lock(dev, net);
+}
+
 struct net_device *
 netdev_xa_find_lock(struct net *net, struct net_device *dev,
 		    unsigned long *index)
diff --git a/net/core/dev.h b/net/core/dev.h
index 781619e76b3e..6516ce2b5517 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -31,6 +31,8 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id);
 struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 
 struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net);
+struct net_device *netdev_put_lock(struct net_device *dev, struct net *net,
+				   netdevice_tracker *tracker);
 struct net_device *
 netdev_xa_find_lock(struct net *net, struct net_device *dev,
 		    unsigned long *index);
@@ -96,6 +98,9 @@ int netdev_queue_config_validate(struct net_device *dev, int rxq_idx,
 				 struct netdev_queue_config *qcfg,
 				 struct netlink_ext_ack *extack);
 
+bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx);
+bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx);
+
 /* netdev management, shared between various uAPI entry points */
 struct netdev_name_node {
 	struct hlist_node hlist;
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index aae75431858d..5d5e5b9a8af0 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -1122,7 +1122,169 @@ err_genlmsg_free:
 
 int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info)
 {
-	return -EOPNOTSUPP;
+	const int qmaxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1;
+	const int lmaxtype = ARRAY_SIZE(netdev_lease_nl_policy) - 1;
+	int err, ifindex, ifindex_lease, queue_id, queue_id_lease;
+	struct nlattr *qtb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
+	struct nlattr *ltb[ARRAY_SIZE(netdev_lease_nl_policy)];
+	struct netdev_rx_queue *rxq, *rxq_lease;
+	struct net_device *dev, *dev_lease;
+	netdevice_tracker dev_tracker;
+	s32 netns_lease = -1;
+	struct nlattr *nest;
+	struct sk_buff *rsp;
+	struct net *net;
+	void *hdr;
+
+	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX) ||
+	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
+	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_LEASE))
+		return -EINVAL;
+	if (nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]) !=
+	    NETDEV_QUEUE_TYPE_RX) {
+		NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QUEUE_TYPE]);
+		return -EINVAL;
+	}
+
+	ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
+
+	nest = info->attrs[NETDEV_A_QUEUE_LEASE];
+	err = nla_parse_nested(ltb, lmaxtype, nest,
+			       netdev_lease_nl_policy, info->extack);
+	if (err < 0)
+		return err;
+	if (NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_IFINDEX) ||
+	    NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_QUEUE))
+		return -EINVAL;
+	if (ltb[NETDEV_A_LEASE_NETNS_ID]) {
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		netns_lease = nla_get_s32(ltb[NETDEV_A_LEASE_NETNS_ID]);
+	}
+
+	ifindex_lease = nla_get_u32(ltb[NETDEV_A_LEASE_IFINDEX]);
+
+	nest = ltb[NETDEV_A_LEASE_QUEUE];
+	err = nla_parse_nested(qtb, qmaxtype, nest,
+			       netdev_queue_id_nl_policy, info->extack);
+	if (err < 0)
+		return err;
+	if (NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_ID) ||
+	    NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_TYPE))
+		return -EINVAL;
+	if (nla_get_u32(qtb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
+		NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_TYPE]);
+		return -EINVAL;
+	}
+
+	queue_id_lease = nla_get_u32(qtb[NETDEV_A_QUEUE_ID]);
+
+	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!rsp)
+		return -ENOMEM;
+
+	hdr = genlmsg_iput(rsp, info);
+	if (!hdr) {
+		err = -EMSGSIZE;
+		goto err_genlmsg_free;
+	}
+
+	/* Locking order is always from the virtual to the physical device
+	 * since this is also the same order when applications open the
+	 * memory provider later on.
+	 */
+	dev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
+	if (!dev) {
+		err = -ENODEV;
+		goto err_genlmsg_free;
+	}
+	if (!netdev_can_create_queue(dev, info->extack)) {
+		err = -EINVAL;
+		goto err_unlock_dev;
+	}
+
+	net = genl_info_net(info);
+	if (netns_lease >= 0) {
+		net = get_net_ns_by_id(net, netns_lease);
+		if (!net) {
+			err = -ENONET;
+			goto err_unlock_dev;
+		}
+	}
+
+	dev_lease = netdev_get_by_index(net, ifindex_lease, &dev_tracker,
+					GFP_KERNEL);
+	if (!dev_lease) {
+		err = -ENODEV;
+		goto err_put_netns;
+	}
+	if (!netdev_can_lease_queue(dev_lease, info->extack)) {
+		netdev_put(dev_lease, &dev_tracker);
+		err = -EINVAL;
+		goto err_put_netns;
+	}
+
+	dev_lease = netdev_put_lock(dev_lease, net, &dev_tracker);
+	if (!dev_lease) {
+		err = -ENODEV;
+		goto err_put_netns;
+	}
+	if (queue_id_lease >= dev_lease->real_num_rx_queues) {
+		err = -ERANGE;
+		NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_ID]);
+		goto err_unlock_dev_lease;
+	}
+	if (netdev_queue_busy(dev_lease, queue_id_lease, NETDEV_QUEUE_TYPE_RX,
+			      info->extack)) {
+		err = -EBUSY;
+		goto err_unlock_dev_lease;
+	}
+
+	rxq_lease = __netif_get_rx_queue(dev_lease, queue_id_lease);
+	rxq = __netif_get_rx_queue(dev, dev->real_num_rx_queues - 1);
+
+	/* Leasing queues from different physical devices is currently
+	 * not supported. Capabilities such as XDP features and DMA
+	 * device may differ between physical devices, and computing
+	 * a correct intersection for the virtual device is not yet
+	 * implemented.
+	 */
+	if (rxq->lease && rxq->lease->dev != dev_lease) {
+		err = -EOPNOTSUPP;
+		NL_SET_ERR_MSG(info->extack,
+			       "Leasing queues from different devices not supported");
+		goto err_unlock_dev_lease;
+	}
+
+	queue_id = dev->queue_mgmt_ops->ndo_queue_create(dev, info->extack);
+	if (queue_id < 0) {
+		err = queue_id;
+		goto err_unlock_dev_lease;
+	}
+	rxq = __netif_get_rx_queue(dev, queue_id);
+
+	netdev_rx_queue_lease(rxq, rxq_lease);
+
+	nla_put_u32(rsp, NETDEV_A_QUEUE_ID, queue_id);
+	genlmsg_end(rsp, hdr);
+
+	netdev_unlock(dev_lease);
+	netdev_unlock(dev);
+	if (netns_lease >= 0)
+		put_net(net);
+
+	return genlmsg_reply(rsp, info);
+
+err_unlock_dev_lease:
+	netdev_unlock(dev_lease);
+err_put_netns:
+	if (netns_lease >= 0)
+		put_net(net);
+err_unlock_dev:
+	netdev_unlock(dev);
+err_genlmsg_free:
+	nlmsg_free(rsp);
+	return err;
 }
 
 void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv)
diff --git a/net/core/netdev_queues.c b/net/core/netdev_queues.c
index 251f27a8307f..177401828e79 100644
--- a/net/core/netdev_queues.c
+++ b/net/core/netdev_queues.c
@@ -1,6 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
+#include <net/xdp_sock_drv.h>
+
+#include "dev.h"
 
 /**
  * netdev_queue_get_dma_dev() - get dma device for zero-copy operations
@@ -25,3 +29,61 @@ struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx)
 	return dma_dev && dma_dev->dma_mask ? dma_dev : NULL;
 }
 
+bool netdev_can_create_queue(const struct net_device *dev,
+			     struct netlink_ext_ack *extack)
+{
+	if (dev->dev.parent) {
+		NL_SET_ERR_MSG(extack, "Device is not a virtual device");
+		return false;
+	}
+	if (!dev->queue_mgmt_ops ||
+	    !dev->queue_mgmt_ops->ndo_queue_create) {
+		NL_SET_ERR_MSG(extack, "Device does not support queue creation");
+		return false;
+	}
+	if (dev->real_num_rx_queues < 1 ||
+	    dev->real_num_tx_queues < 1) {
+		NL_SET_ERR_MSG(extack, "Device must have at least one real queue");
+		return false;
+	}
+	return true;
+}
+
+bool netdev_can_lease_queue(const struct net_device *dev,
+			    struct netlink_ext_ack *extack)
+{
+	if (!dev->dev.parent) {
+		NL_SET_ERR_MSG(extack, "Lease device is a virtual device");
+		return false;
+	}
+	if (!netif_device_present(dev)) {
+		NL_SET_ERR_MSG(extack, "Lease device has been removed from the system");
+		return false;
+	}
+	if (!dev->queue_mgmt_ops) {
+		NL_SET_ERR_MSG(extack, "Lease device does not support queue management operations");
+		return false;
+	}
+	return true;
+}
+
+bool netdev_queue_busy(struct net_device *dev, unsigned int idx,
+		       enum netdev_queue_type type,
+		       struct netlink_ext_ack *extack)
+{
+	if (xsk_get_pool_from_qid(dev, idx)) {
+		NL_SET_ERR_MSG(extack, "Device queue in use by AF_XDP");
+		return true;
+	}
+	if (type == NETDEV_QUEUE_TYPE_TX)
+		return false;
+	if (netif_rxq_is_leased(dev, idx)) {
+		NL_SET_ERR_MSG(extack, "Device queue in use due to queue leasing");
+		return true;
+	}
+	if (netif_rxq_has_mp(dev, idx)) {
+		NL_SET_ERR_MSG(extack, "Device queue in use by memory provider");
+		return true;
+	}
+	return false;
+}
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index 668a90658f25..a1f23c2c96d4 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -10,15 +10,53 @@
 #include "dev.h"
 #include "page_pool_priv.h"
 
-/* See also page_pool_is_unreadable() */
-bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx)
+void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst,
+			   struct netdev_rx_queue *rxq_src)
 {
-	struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, idx);
+	netdev_assert_locked(rxq_src->dev);
+	netdev_assert_locked(rxq_dst->dev);
+
+	netdev_hold(rxq_src->dev, &rxq_src->lease_tracker, GFP_KERNEL);
 
-	return !!rxq->mp_params.mp_ops;
+	WRITE_ONCE(rxq_src->lease, rxq_dst);
+	WRITE_ONCE(rxq_dst->lease, rxq_src);
+}
+
+void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst,
+			     struct netdev_rx_queue *rxq_src)
+{
+	netdev_assert_locked(rxq_dst->dev);
+	netdev_assert_locked(rxq_src->dev);
+
+	WRITE_ONCE(rxq_src->lease, NULL);
+	WRITE_ONCE(rxq_dst->lease, NULL);
+
+	netdev_put(rxq_src->dev, &rxq_src->lease_tracker);
+}
+
+bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx)
+{
+	if (rxq_idx < dev->real_num_rx_queues)
+		return READ_ONCE(__netif_get_rx_queue(dev, rxq_idx)->lease);
+	return false;
+}
+
+/* See also page_pool_is_unreadable() */
+bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx)
+{
+	if (rxq_idx < dev->real_num_rx_queues)
+		return __netif_get_rx_queue(dev, rxq_idx)->mp_params.mp_ops;
+	return false;
 }
 EXPORT_SYMBOL(netif_rxq_has_unreadable_mp);
 
+bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx)
+{
+	if (rxq_idx < dev->real_num_rx_queues)
+		return __netif_get_rx_queue(dev, rxq_idx)->mp_params.mp_priv;
+	return false;
+}
+
 static int netdev_rx_queue_reconfig(struct net_device *dev,
 				    unsigned int rxq_idx,
 				    struct netdev_queue_config *qcfg_old,
-- 
cgit v1.2.3


From 25444470570b44da61366e307b3e54be653bf595 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 3 Apr 2026 01:10:29 +0200
Subject: netkit: Add netkit notifier to check for unregistering devices

Add a netdevice notifier in netkit to watch for NETDEV_UNREGISTER events.
If the target device is indeed NETREG_UNREGISTERING and previously leased
a queue to a netkit device, then collect the related netkit devices and
batch-unregister_netdevice_many() them.

If this were not done, then the netkit device would hold a reference on
the physical device preventing it from going away. However, in case of
both io_uring zero-copy as well as AF_XDP this situation is handled
gracefully and the allocated resources are torn down.

In the case where mentioned infra is used through netkit, the applications
have a reference on netkit, and netkit in turn holds a reference on the
physical device. In order to have netkit release the reference on the
physical device, we need such watcher to then unregister the netkit ones.

This is generally quite similar to the dependency handling in case of
tunnels (e.g. vxlan bound to a underlying netdev) where the tunnel device
gets removed along with the physical device.

  # ip a
  [...]
  4: enp10s0f0np0: <BROADCAST,MULTICAST> mtu 1500 qdisc mq state DOWN group default qlen 1000
      link/ether e8:eb:d3:a3:43:f6 brd ff:ff:ff:ff:ff:ff
      inet 10.0.0.2/24 scope global enp10s0f0np0
         valid_lft forever preferred_lft forever
  [...]
  8: nk@NONE: <BROADCAST,MULTICAST,NOARP> mtu 1500 qdisc noop state DOWN group default qlen 1000
      link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
  [...]

  # rmmod mlx5_ib
  # rmmod mlx5_core
  [...]
  [  309.261822] mlx5_core 0000:0a:00.0 mlx5_0: Port: 1 Link DOWN
  [  344.235236] mlx5_core 0000:0a:00.1: E-Switch: Unload vfs: mode(LEGACY), nvfs(0), necvfs(0), active vports(0)
  [  344.246948] mlx5_core 0000:0a:00.1: E-Switch: Disable: mode(LEGACY), nvfs(0), necvfs(0), active vports(0)
  [  344.463754] mlx5_core 0000:0a:00.1: E-Switch: Disable: mode(LEGACY), nvfs(0), necvfs(0), active vports(0)
  [  344.770155] mlx5_core 0000:0a:00.1: E-Switch: cleanup
  [...]

  # ip a
  [...]
  [ both enp10s0f0np0 and nk gone ]
  [...]

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Co-developed-by: David Wei <dw@davidwei.uk>
Signed-off-by: David Wei <dw@davidwei.uk>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20260402231031.447597-13-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/netkit.c      | 69 +++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            |  6 +++++
 3 files changed, 75 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index b22bd0b6508a..1ec21aef348f 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -983,7 +983,15 @@ static void netkit_del_link(struct net_device *dev, struct list_head *head)
 	if (peer) {
 		nk = netkit_priv(peer);
 		RCU_INIT_POINTER(nk->peer, NULL);
-		unregister_netdevice_queue(peer, head);
+		/* Guard against the peer already being in an unregister
+		 * list (e.g. same-namespace teardown where the peer is
+		 * in the caller's dev_kill_list). list_move_tail() on an
+		 * already-queued device would otherwise corrupt that
+		 * list's iteration. This situation can occur via netkit
+		 * notifier, hence guard against this scenario.
+		 */
+		if (!unregister_netdevice_queued(peer))
+			unregister_netdevice_queue(peer, head);
 	}
 }
 
@@ -1051,6 +1059,50 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
 	return 0;
 }
 
+static void netkit_check_lease_unregister(struct net_device *dev)
+{
+	LIST_HEAD(list_kill);
+	u32 q_idx;
+
+	if (READ_ONCE(dev->reg_state) != NETREG_UNREGISTERING ||
+	    !dev->dev.parent)
+		return;
+
+	netdev_lock_ops(dev);
+	for (q_idx = 0; q_idx < dev->real_num_rx_queues; q_idx++) {
+		struct net_device *tmp = dev;
+		struct netdev_rx_queue *rxq;
+		u32 tmp_q_idx = q_idx;
+
+		rxq = __netif_get_rx_queue_lease(&tmp, &tmp_q_idx,
+						 NETIF_PHYS_TO_VIRT);
+		if (rxq && tmp != dev &&
+		    tmp->netdev_ops == &netkit_netdev_ops) {
+			/* A single phys device can have multiple queues leased
+			 * to one netkit device. We can only queue that netkit
+			 * device once to the list_kill. Queues of that phys
+			 * device can be leased with different individual netkit
+			 * devices, hence we batch via list_kill.
+			 */
+			if (unregister_netdevice_queued(tmp))
+				continue;
+			netkit_del_link(tmp, &list_kill);
+		}
+	}
+	netdev_unlock_ops(dev);
+	unregister_netdevice_many(&list_kill);
+}
+
+static int netkit_notifier(struct notifier_block *this,
+			   unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+	if (event == NETDEV_UNREGISTER)
+		netkit_check_lease_unregister(dev);
+	return NOTIFY_DONE;
+}
+
 static size_t netkit_get_size(const struct net_device *dev)
 {
 	return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */
@@ -1127,18 +1179,31 @@ static struct rtnl_link_ops netkit_link_ops = {
 	.maxtype	= IFLA_NETKIT_MAX,
 };
 
+static struct notifier_block netkit_netdev_notifier = {
+	.notifier_call	= netkit_notifier,
+};
+
 static __init int netkit_mod_init(void)
 {
+	int ret;
+
 	BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT ||
 		     (int)NETKIT_PASS != (int)TCX_PASS ||
 		     (int)NETKIT_DROP != (int)TCX_DROP ||
 		     (int)NETKIT_REDIRECT != (int)TCX_REDIRECT);
 
-	return rtnl_link_register(&netkit_link_ops);
+	ret = rtnl_link_register(&netkit_link_ops);
+	if (ret)
+		return ret;
+	ret = register_netdevice_notifier(&netkit_netdev_notifier);
+	if (ret)
+		rtnl_link_unregister(&netkit_link_ops);
+	return ret;
 }
 
 static __exit void netkit_mod_exit(void)
 {
+	unregister_netdevice_notifier(&netkit_netdev_notifier);
 	rtnl_link_unregister(&netkit_link_ops);
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e8aa9cc4075d..47417b2d48a4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3420,6 +3420,8 @@ static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
 int register_netdevice(struct net_device *dev);
 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
 void unregister_netdevice_many(struct list_head *head);
+bool unregister_netdevice_queued(const struct net_device *dev);
+
 static inline void unregister_netdevice(struct net_device *dev)
 {
 	unregister_netdevice_queue(dev, NULL);
diff --git a/net/core/dev.c b/net/core/dev.c
index 2df8a2a5ecf5..e7bc95cbd1fa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -12384,6 +12384,12 @@ static void netif_close_many_and_unlock_cond(struct list_head *close_head)
 #endif
 }
 
+bool unregister_netdevice_queued(const struct net_device *dev)
+{
+	ASSERT_RTNL();
+	return !list_empty(&dev->unreg_list);
+}
+
 void unregister_netdevice_many_notify(struct list_head *head,
 				      u32 portid, const struct nlmsghdr *nlh)
 {
-- 
cgit v1.2.3


From 8ad7f3b265a87cd4e5052677545f90f14c855b10 Mon Sep 17 00:00:00 2001
From: Pei Xiao <xiaopei01@kylinos.cn>
Date: Fri, 10 Apr 2026 10:29:24 +0800
Subject: regmap: i3c: Add non-devm regmap_init_i3c() helper

Add __regmap_init_i3c() and the corresponding regmap_init_i3c() macro to
allow creating a regmap for I3C devices without using the device-managed
version. This mirrors the pattern already established for other buses
such as I2C, SPI and so on, giving drivers more flexibility when
the regmap lifetime is not directly tied to the device.

Signed-off-by: Pei Xiao <xiaopei01@kylinos.cn>
Link: https://patch.msgid.link/a81256a8866b163979a20406abf01df7d7440104.1775788105.git.xiaopei01@kylinos.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-i3c.c | 10 ++++++++++
 include/linux/regmap.h           | 17 +++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-i3c.c b/drivers/base/regmap/regmap-i3c.c
index 863b348704dc..5af583d472dd 100644
--- a/drivers/base/regmap/regmap-i3c.c
+++ b/drivers/base/regmap/regmap-i3c.c
@@ -46,6 +46,16 @@ static const struct regmap_bus regmap_i3c = {
 	.read = regmap_i3c_read,
 };
 
+struct regmap *__regmap_init_i3c(struct i3c_device *i3c,
+				 const struct regmap_config *config,
+				 struct lock_class_key *lock_key,
+				 const char *lock_name)
+{
+	return __regmap_init(&i3c->dev, &regmap_i3c, &i3c->dev, config,
+				  lock_key, lock_name);
+}
+EXPORT_SYMBOL_GPL(__regmap_init_i3c);
+
 struct regmap *__devm_regmap_init_i3c(struct i3c_device *i3c,
 				      const struct regmap_config *config,
 				      struct lock_class_key *lock_key,
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index f1c5cb63c171..df44cb30f53b 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -693,6 +693,10 @@ struct regmap *__regmap_init_sdw_mbq(struct device *dev, struct sdw_slave *sdw,
 				     const struct regmap_sdw_mbq_cfg *mbq_config,
 				     struct lock_class_key *lock_key,
 				     const char *lock_name);
+struct regmap *__regmap_init_i3c(struct i3c_device *i3c,
+				 const struct regmap_config *config,
+				 struct lock_class_key *lock_key,
+				 const char *lock_name);
 struct regmap *__regmap_init_spi_avmm(struct spi_device *spi,
 				      const struct regmap_config *config,
 				      struct lock_class_key *lock_key,
@@ -999,6 +1003,19 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 	__regmap_lockdep_wrapper(__regmap_init_sdw_mbq, #config,	\
 				dev, sdw, config, mbq_config)
 
+/**
+ * regmap_init_i3c() - Initialise register map
+ *
+ * @i3c: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+#define regmap_init_i3c(i3c, config)					\
+	__regmap_lockdep_wrapper(__regmap_init_i3c, #config,		\
+				i3c, config)
+
 /**
  * regmap_init_spi_avmm() - Initialize register map for Intel SPI Slave
  * to AVMM Bus Bridge
-- 
cgit v1.2.3


From 39237e3208209d1bb35d939d6fee1f36b642f562 Mon Sep 17 00:00:00 2001
From: Marco Nenciarini <mnencia@kcore.it>
Date: Wed, 1 Apr 2026 22:36:36 +0200
Subject: platform/x86: int3472: Rename pled to led in LED registration code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename the privacy LED type, struct member, and functions from "pled"
to "led" in preparation for supporting additional LED types beyond
just the privacy LED.

No functional change.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
Signed-off-by: Marco Nenciarini <mnencia@kcore.it>
Link: https://patch.msgid.link/20260401203638.1601661-3-mnencia@kcore.it
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 drivers/platform/x86/intel/int3472/discrete.c |  4 ++--
 drivers/platform/x86/intel/int3472/led.c      | 14 +++++++-------
 include/linux/platform_data/x86/int3472.h     |  8 ++++----
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c
index 1c65ce87cde0..b03f61dab25f 100644
--- a/drivers/platform/x86/intel/int3472/discrete.c
+++ b/drivers/platform/x86/intel/int3472/discrete.c
@@ -354,7 +354,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
 
 			break;
 		case INT3472_GPIO_TYPE_PRIVACY_LED:
-			ret = skl_int3472_register_pled(int3472, gpio);
+			ret = skl_int3472_register_led(int3472, gpio);
 			if (ret)
 				err_msg = "Failed to register LED\n";
 
@@ -429,7 +429,7 @@ void int3472_discrete_cleanup(struct int3472_discrete_device *int3472)
 	gpiod_remove_lookup_table(&int3472->gpios);
 
 	skl_int3472_unregister_clock(int3472);
-	skl_int3472_unregister_pled(int3472);
+	skl_int3472_unregister_led(int3472);
 	skl_int3472_unregister_regulator(int3472);
 }
 EXPORT_SYMBOL_NS_GPL(int3472_discrete_cleanup, "INTEL_INT3472_DISCRETE");
diff --git a/drivers/platform/x86/intel/int3472/led.c b/drivers/platform/x86/intel/int3472/led.c
index 35abad900bf3..fe412cb938cf 100644
--- a/drivers/platform/x86/intel/int3472/led.c
+++ b/drivers/platform/x86/intel/int3472/led.c
@@ -6,17 +6,17 @@
 #include <linux/leds.h>
 #include <linux/platform_data/x86/int3472.h>
 
-static int int3472_pled_set(struct led_classdev *led_cdev, enum led_brightness brightness)
+static int int3472_led_set(struct led_classdev *led_cdev, enum led_brightness brightness)
 {
-	struct int3472_pled *led = container_of(led_cdev, struct int3472_pled, classdev);
+	struct int3472_led *led = container_of(led_cdev, struct int3472_led, classdev);
 
 	gpiod_set_value_cansleep(led->gpio, brightness);
 	return 0;
 }
 
-int skl_int3472_register_pled(struct int3472_discrete_device *int3472, struct gpio_desc *gpio)
+int skl_int3472_register_led(struct int3472_discrete_device *int3472, struct gpio_desc *gpio)
 {
-	struct int3472_pled *led = &int3472->pled;
+	struct int3472_led *led = &int3472->led;
 	char *p;
 	int ret;
 
@@ -34,7 +34,7 @@ int skl_int3472_register_pled(struct int3472_discrete_device *int3472, struct gp
 
 	led->classdev.name = led->name;
 	led->classdev.max_brightness = 1;
-	led->classdev.brightness_set_blocking = int3472_pled_set;
+	led->classdev.brightness_set_blocking = int3472_led_set;
 
 	ret = led_classdev_register(int3472->dev, &led->classdev);
 	if (ret)
@@ -48,9 +48,9 @@ int skl_int3472_register_pled(struct int3472_discrete_device *int3472, struct gp
 	return 0;
 }
 
-void skl_int3472_unregister_pled(struct int3472_discrete_device *int3472)
+void skl_int3472_unregister_led(struct int3472_discrete_device *int3472)
 {
-	struct int3472_pled *led = &int3472->pled;
+	struct int3472_led *led = &int3472->led;
 
 	if (IS_ERR_OR_NULL(led->classdev.dev))
 		return;
diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h
index dbe745dc88d5..39a1938d77e1 100644
--- a/include/linux/platform_data/x86/int3472.h
+++ b/include/linux/platform_data/x86/int3472.h
@@ -122,12 +122,12 @@ struct int3472_discrete_device {
 		u8 imgclk_index;
 	} clock;
 
-	struct int3472_pled {
+	struct int3472_led {
 		struct led_classdev classdev;
 		struct led_lookup_data lookup;
 		char name[INT3472_LED_MAX_NAME_LEN];
 		struct gpio_desc *gpio;
-	} pled;
+	} led;
 
 	struct int3472_discrete_quirks quirks;
 
@@ -161,7 +161,7 @@ int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
 				   const char *second_sensor);
 void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472);
 
-int skl_int3472_register_pled(struct int3472_discrete_device *int3472, struct gpio_desc *gpio);
-void skl_int3472_unregister_pled(struct int3472_discrete_device *int3472);
+int skl_int3472_register_led(struct int3472_discrete_device *int3472, struct gpio_desc *gpio);
+void skl_int3472_unregister_led(struct int3472_discrete_device *int3472);
 
 #endif
-- 
cgit v1.2.3


From 218d3c44f5f0a3cc1647bc61a4e4eac663b37aa5 Mon Sep 17 00:00:00 2001
From: Marco Nenciarini <mnencia@kcore.it>
Date: Wed, 1 Apr 2026 22:36:37 +0200
Subject: platform/x86: int3472: Parameterize LED con_id in registration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a con_id parameter to skl_int3472_register_led() to allow callers
to specify both the LED name suffix and lookup con_id instead of
hardcoding "privacy". This prepares for registering additional LED
types with different names.

While at it, rename the privacy LED's GPIO con_id from "privacy-led"
to "privacy" in int3472_get_con_id_and_polarity() and pass it
directly to skl_int3472_register_led(), reducing churn when adding
new LED types.

No functional change.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
Signed-off-by: Marco Nenciarini <mnencia@kcore.it>
Link: https://patch.msgid.link/20260401203638.1601661-4-mnencia@kcore.it
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 drivers/platform/x86/intel/int3472/discrete.c | 4 ++--
 drivers/platform/x86/intel/int3472/led.c      | 7 ++++---
 include/linux/platform_data/x86/int3472.h     | 3 ++-
 3 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c
index b03f61dab25f..637e3821b496 100644
--- a/drivers/platform/x86/intel/int3472/discrete.c
+++ b/drivers/platform/x86/intel/int3472/discrete.c
@@ -212,7 +212,7 @@ static void int3472_get_con_id_and_polarity(struct int3472_discrete_device *int3
 		*gpio_flags = GPIO_ACTIVE_HIGH;
 		break;
 	case INT3472_GPIO_TYPE_PRIVACY_LED:
-		*con_id = "privacy-led";
+		*con_id = "privacy";
 		*gpio_flags = GPIO_ACTIVE_HIGH;
 		break;
 	case INT3472_GPIO_TYPE_HOTPLUG_DETECT:
@@ -354,7 +354,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
 
 			break;
 		case INT3472_GPIO_TYPE_PRIVACY_LED:
-			ret = skl_int3472_register_led(int3472, gpio);
+			ret = skl_int3472_register_led(int3472, gpio, con_id);
 			if (ret)
 				err_msg = "Failed to register LED\n";
 
diff --git a/drivers/platform/x86/intel/int3472/led.c b/drivers/platform/x86/intel/int3472/led.c
index fe412cb938cf..22d0d6c5e6ce 100644
--- a/drivers/platform/x86/intel/int3472/led.c
+++ b/drivers/platform/x86/intel/int3472/led.c
@@ -14,7 +14,8 @@ static int int3472_led_set(struct led_classdev *led_cdev, enum led_brightness br
 	return 0;
 }
 
-int skl_int3472_register_led(struct int3472_discrete_device *int3472, struct gpio_desc *gpio)
+int skl_int3472_register_led(struct int3472_discrete_device *int3472, struct gpio_desc *gpio,
+			     const char *con_id)
 {
 	struct int3472_led *led = &int3472->led;
 	char *p;
@@ -27,7 +28,7 @@ int skl_int3472_register_led(struct int3472_discrete_device *int3472, struct gpi
 
 	/* Generate the name, replacing the ':' in the ACPI devname with '_' */
 	snprintf(led->name, sizeof(led->name),
-		 "%s::privacy_led", acpi_dev_name(int3472->sensor));
+		 "%s::%s_led", acpi_dev_name(int3472->sensor), con_id);
 	p = strchr(led->name, ':');
 	if (p)
 		*p = '_';
@@ -42,7 +43,7 @@ int skl_int3472_register_led(struct int3472_discrete_device *int3472, struct gpi
 
 	led->lookup.provider = led->name;
 	led->lookup.dev_id = int3472->sensor_name;
-	led->lookup.con_id = "privacy";
+	led->lookup.con_id = con_id;
 	led_add_lookup(&led->lookup);
 
 	return 0;
diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h
index 39a1938d77e1..ebf4d0637624 100644
--- a/include/linux/platform_data/x86/int3472.h
+++ b/include/linux/platform_data/x86/int3472.h
@@ -161,7 +161,8 @@ int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
 				   const char *second_sensor);
 void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472);
 
-int skl_int3472_register_led(struct int3472_discrete_device *int3472, struct gpio_desc *gpio);
+int skl_int3472_register_led(struct int3472_discrete_device *int3472, struct gpio_desc *gpio,
+			     const char *con_id);
 void skl_int3472_unregister_led(struct int3472_discrete_device *int3472);
 
 #endif
-- 
cgit v1.2.3


From cde32a92d4562b686f730fc08d4d558ecc99d516 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Tue, 24 Feb 2026 00:13:18 -0600
Subject: mmc: sdio: add MediaTek MT7902 SDIO device ID

Add SDIO device ID (0x790a) for MediaTek MT7902 to sdio_ids.h.

Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/linux/mmc/sdio_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 673cbdf43453..dce89c110691 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -111,6 +111,7 @@
 #define SDIO_VENDOR_ID_MEDIATEK			0x037a
 #define SDIO_DEVICE_ID_MEDIATEK_MT7663		0x7663
 #define SDIO_DEVICE_ID_MEDIATEK_MT7668		0x7668
+#define SDIO_DEVICE_ID_MEDIATEK_MT7902		0x790a
 #define SDIO_DEVICE_ID_MEDIATEK_MT7961		0x7961
 
 #define SDIO_VENDOR_ID_MICROCHIP_WILC		0x0296
-- 
cgit v1.2.3


From 30a59dddd688bbd75f54e96b174a7aac914774d2 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@manguebit.org>
Date: Tue, 7 Apr 2026 16:58:09 -0300
Subject: vfs: introduce d_mark_tmpfile_name()

CIFS requires O_TMPFILE dentries to have names of newly created
delete-on-close files in the server so it can build full pathnames
from the root of the share when performing operations on them.

Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: David Howells <dhowells@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-cifs@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/dcache.c            | 19 +++++++++++++++++++
 include/linux/dcache.h |  1 +
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 7ba1801d8132..fcd5a40cce94 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3196,6 +3196,25 @@ void d_mark_tmpfile(struct file *file, struct inode *inode)
 }
 EXPORT_SYMBOL(d_mark_tmpfile);
 
+void d_mark_tmpfile_name(struct file *file, const struct qstr *name)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	char *dname = dentry->d_shortname.string;
+
+	BUG_ON(dname_external(dentry));
+	BUG_ON(d_really_is_positive(dentry));
+	BUG_ON(!d_unlinked(dentry));
+	BUG_ON(name->len > DNAME_INLINE_LEN - 1);
+	spin_lock(&dentry->d_parent->d_lock);
+	spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+	dentry->__d_name.len = name->len;
+	memcpy(dname, name->name, name->len);
+	dname[name->len] = '\0';
+	spin_unlock(&dentry->d_lock);
+	spin_unlock(&dentry->d_parent->d_lock);
+}
+EXPORT_SYMBOL(d_mark_tmpfile_name);
+
 void d_tmpfile(struct file *file, struct inode *inode)
 {
 	struct dentry *dentry = file->f_path.dentry;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 898c60d21c92..f60819dcfebd 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -264,6 +264,7 @@ extern void d_invalidate(struct dentry *);
 extern struct dentry * d_make_root(struct inode *);
 
 extern void d_mark_tmpfile(struct file *, struct inode *);
+void d_mark_tmpfile_name(struct file *file, const struct qstr *name);
 extern void d_tmpfile(struct file *, struct inode *);
 
 extern struct dentry *d_find_alias(struct inode *);
-- 
cgit v1.2.3


From 7cd9a5d7d4b75802b97aa89f6f53375a6d84d1d5 Mon Sep 17 00:00:00 2001
From: Cheng-Yang Chou <yphbchou0911@gmail.com>
Date: Fri, 10 Apr 2026 07:54:06 -1000
Subject: sched_ext: Remove runtime kfunc mask enforcement

Now that scx_kfunc_context_filter enforces context-sensitive kfunc
restrictions at BPF load time, the per-task runtime enforcement via
scx_kf_mask is redundant. Remove it entirely:

 - Delete enum scx_kf_mask, the kf_mask field on sched_ext_entity, and
   the scx_kf_allow()/scx_kf_disallow()/scx_kf_allowed() helpers along
   with the higher_bits()/highest_bit() helpers they used.
 - Strip the @mask parameter (and the BUILD_BUG_ON checks) from the
   SCX_CALL_OP[_RET]/SCX_CALL_OP_TASK[_RET]/SCX_CALL_OP_2TASKS_RET
   macros and update every call site. Reflow call sites that were
   wrapped only to fit the old 5-arg form and now collapse onto a single
   line under ~100 cols.
 - Remove the in-kfunc scx_kf_allowed() runtime checks from
   scx_dsq_insert_preamble(), scx_dsq_move(), scx_bpf_dispatch_nr_slots(),
   scx_bpf_dispatch_cancel(), scx_bpf_dsq_move_to_local___v2(),
   scx_bpf_sub_dispatch(), scx_bpf_reenqueue_local(), and the per-call
   guard inside select_cpu_from_kfunc().

scx_bpf_task_cgroup() and scx_kf_allowed_on_arg_tasks() were already
cleaned up in the "drop redundant rq-locked check" patch.
scx_kf_allowed_if_unlocked() was rewritten in the preceding "decouple"
patch. No further changes to those helpers here.

Co-developed-by: Juntong Deng <juntong.deng@outlook.com>
Signed-off-by: Juntong Deng <juntong.deng@outlook.com>
Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
---
 include/linux/sched/ext.h |  28 ------
 kernel/sched/ext.c        | 244 +++++++++++-----------------------------------
 kernel/sched/ext_idle.c   |   4 +-
 3 files changed, 58 insertions(+), 218 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 602dc83cab36..1a3af2ea2a79 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -147,33 +147,6 @@ enum scx_ent_dsq_flags {
 	SCX_TASK_DSQ_ON_PRIQ	= 1 << 0, /* task is queued on the priority queue of a dsq */
 };
 
-/*
- * Mask bits for scx_entity.kf_mask. Not all kfuncs can be called from
- * everywhere and the following bits track which kfunc sets are currently
- * allowed for %current. This simple per-task tracking works because SCX ops
- * nest in a limited way. BPF will likely implement a way to allow and disallow
- * kfuncs depending on the calling context which will replace this manual
- * mechanism. See scx_kf_allow().
- */
-enum scx_kf_mask {
-	SCX_KF_UNLOCKED		= 0,	  /* sleepable and not rq locked */
-	/* ENQUEUE and DISPATCH may be nested inside CPU_RELEASE */
-	SCX_KF_CPU_RELEASE	= 1 << 0, /* ops.cpu_release() */
-	/*
-	 * ops.dispatch() may release rq lock temporarily and thus ENQUEUE and
-	 * SELECT_CPU may be nested inside. ops.dequeue (in REST) may also be
-	 * nested inside DISPATCH.
-	 */
-	SCX_KF_DISPATCH		= 1 << 1, /* ops.dispatch() */
-	SCX_KF_ENQUEUE		= 1 << 2, /* ops.enqueue() and ops.select_cpu() */
-	SCX_KF_SELECT_CPU	= 1 << 3, /* ops.select_cpu() */
-	SCX_KF_REST		= 1 << 4, /* other rq-locked operations */
-
-	__SCX_KF_RQ_LOCKED	= SCX_KF_CPU_RELEASE | SCX_KF_DISPATCH |
-				  SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
-	__SCX_KF_TERMINAL	= SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
-};
-
 enum scx_dsq_lnode_flags {
 	SCX_DSQ_LNODE_ITER_CURSOR = 1 << 0,
 
@@ -221,7 +194,6 @@ struct sched_ext_entity {
 	s32			sticky_cpu;
 	s32			holding_cpu;
 	s32			selected_cpu;
-	u32			kf_mask;	/* see scx_kf_mask above */
 	struct task_struct	*kf_tasks[2];	/* see SCX_CALL_OP_TASK() */
 
 	struct list_head	runnable_node;	/* rq->scx.runnable_list */
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index edf51d91bab2..d37418a684e9 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -229,19 +229,6 @@ static long jiffies_delta_msecs(unsigned long at, unsigned long now)
 		return -(long)jiffies_to_msecs(now - at);
 }
 
-/* if the highest set bit is N, return a mask with bits [N+1, 31] set */
-static u32 higher_bits(u32 flags)
-{
-	return ~((1 << fls(flags)) - 1);
-}
-
-/* return the mask with only the highest bit set */
-static u32 highest_bit(u32 flags)
-{
-	int bit = fls(flags);
-	return ((u64)1 << bit) >> 1;
-}
-
 static bool u32_before(u32 a, u32 b)
 {
 	return (s32)(a - b) < 0;
@@ -462,30 +449,6 @@ static bool rq_is_open(struct rq *rq, u64 enq_flags)
 	return false;
 }
 
-/*
- * scx_kf_mask enforcement. Some kfuncs can only be called from specific SCX
- * ops. When invoking SCX ops, SCX_CALL_OP[_RET]() should be used to indicate
- * the allowed kfuncs and those kfuncs should use scx_kf_allowed() to check
- * whether it's running from an allowed context.
- *
- * @mask is constant, always inline to cull the mask calculations.
- */
-static __always_inline void scx_kf_allow(u32 mask)
-{
-	/* nesting is allowed only in increasing scx_kf_mask order */
-	WARN_ONCE((mask | higher_bits(mask)) & current->scx.kf_mask,
-		  "invalid nesting current->scx.kf_mask=0x%x mask=0x%x\n",
-		  current->scx.kf_mask, mask);
-	current->scx.kf_mask |= mask;
-	barrier();
-}
-
-static void scx_kf_disallow(u32 mask)
-{
-	barrier();
-	current->scx.kf_mask &= ~mask;
-}
-
 /*
  * Track the rq currently locked.
  *
@@ -506,34 +469,22 @@ static inline void update_locked_rq(struct rq *rq)
 	__this_cpu_write(scx_locked_rq_state, rq);
 }
 
-#define SCX_CALL_OP(sch, mask, op, rq, args...)					\
+#define SCX_CALL_OP(sch, op, rq, args...)					\
 do {										\
 	if (rq)									\
 		update_locked_rq(rq);						\
-	if (mask) {								\
-		scx_kf_allow(mask);						\
-		(sch)->ops.op(args);						\
-		scx_kf_disallow(mask);						\
-	} else {								\
-		(sch)->ops.op(args);						\
-	}									\
+	(sch)->ops.op(args);							\
 	if (rq)									\
 		update_locked_rq(NULL);						\
 } while (0)
 
-#define SCX_CALL_OP_RET(sch, mask, op, rq, args...)				\
+#define SCX_CALL_OP_RET(sch, op, rq, args...)					\
 ({										\
 	__typeof__((sch)->ops.op(args)) __ret;					\
 										\
 	if (rq)									\
 		update_locked_rq(rq);						\
-	if (mask) {								\
-		scx_kf_allow(mask);						\
-		__ret = (sch)->ops.op(args);					\
-		scx_kf_disallow(mask);						\
-	} else {								\
-		__ret = (sch)->ops.op(args);					\
-	}									\
+	__ret = (sch)->ops.op(args);						\
 	if (rq)									\
 		update_locked_rq(NULL);						\
 	__ret;									\
@@ -553,67 +504,33 @@ do {										\
  *
  * These macros only work for non-nesting ops since kf_tasks[] is not stacked.
  */
-#define SCX_CALL_OP_TASK(sch, mask, op, rq, task, args...)			\
+#define SCX_CALL_OP_TASK(sch, op, rq, task, args...)				\
 do {										\
-	BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL);				\
 	current->scx.kf_tasks[0] = task;					\
-	SCX_CALL_OP((sch), mask, op, rq, task, ##args);				\
+	SCX_CALL_OP((sch), op, rq, task, ##args);				\
 	current->scx.kf_tasks[0] = NULL;					\
 } while (0)
 
-#define SCX_CALL_OP_TASK_RET(sch, mask, op, rq, task, args...)			\
+#define SCX_CALL_OP_TASK_RET(sch, op, rq, task, args...)			\
 ({										\
 	__typeof__((sch)->ops.op(task, ##args)) __ret;				\
-	BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL);				\
 	current->scx.kf_tasks[0] = task;					\
-	__ret = SCX_CALL_OP_RET((sch), mask, op, rq, task, ##args);		\
+	__ret = SCX_CALL_OP_RET((sch), op, rq, task, ##args);			\
 	current->scx.kf_tasks[0] = NULL;					\
 	__ret;									\
 })
 
-#define SCX_CALL_OP_2TASKS_RET(sch, mask, op, rq, task0, task1, args...)	\
+#define SCX_CALL_OP_2TASKS_RET(sch, op, rq, task0, task1, args...)		\
 ({										\
 	__typeof__((sch)->ops.op(task0, task1, ##args)) __ret;			\
-	BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL);				\
 	current->scx.kf_tasks[0] = task0;					\
 	current->scx.kf_tasks[1] = task1;					\
-	__ret = SCX_CALL_OP_RET((sch), mask, op, rq, task0, task1, ##args);	\
+	__ret = SCX_CALL_OP_RET((sch), op, rq, task0, task1, ##args);		\
 	current->scx.kf_tasks[0] = NULL;					\
 	current->scx.kf_tasks[1] = NULL;					\
 	__ret;									\
 })
 
-/* @mask is constant, always inline to cull unnecessary branches */
-static __always_inline bool scx_kf_allowed(struct scx_sched *sch, u32 mask)
-{
-	if (unlikely(!(current->scx.kf_mask & mask))) {
-		scx_error(sch, "kfunc with mask 0x%x called from an operation only allowing 0x%x",
-			  mask, current->scx.kf_mask);
-		return false;
-	}
-
-	/*
-	 * Enforce nesting boundaries. e.g. A kfunc which can be called from
-	 * DISPATCH must not be called if we're running DEQUEUE which is nested
-	 * inside ops.dispatch(). We don't need to check boundaries for any
-	 * blocking kfuncs as the verifier ensures they're only called from
-	 * sleepable progs.
-	 */
-	if (unlikely(highest_bit(mask) == SCX_KF_CPU_RELEASE &&
-		     (current->scx.kf_mask & higher_bits(SCX_KF_CPU_RELEASE)))) {
-		scx_error(sch, "cpu_release kfunc called from a nested operation");
-		return false;
-	}
-
-	if (unlikely(highest_bit(mask) == SCX_KF_DISPATCH &&
-		     (current->scx.kf_mask & higher_bits(SCX_KF_DISPATCH)))) {
-		scx_error(sch, "dispatch kfunc called from a nested operation");
-		return false;
-	}
-
-	return true;
-}
-
 /* see SCX_CALL_OP_TASK() */
 static __always_inline bool scx_kf_allowed_on_arg_tasks(struct scx_sched *sch,
 							struct task_struct *p)
@@ -1461,7 +1378,7 @@ static void call_task_dequeue(struct scx_sched *sch, struct rq *rq,
 		return;
 
 	if (SCX_HAS_OP(sch, dequeue))
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, dequeue, rq, p, deq_flags);
+		SCX_CALL_OP_TASK(sch, dequeue, rq, p, deq_flags);
 
 	p->scx.flags &= ~SCX_TASK_IN_CUSTODY;
 }
@@ -1920,7 +1837,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
 	WARN_ON_ONCE(*ddsp_taskp);
 	*ddsp_taskp = p;
 
-	SCX_CALL_OP_TASK(sch, SCX_KF_ENQUEUE, enqueue, rq, p, enq_flags);
+	SCX_CALL_OP_TASK(sch, enqueue, rq, p, enq_flags);
 
 	*ddsp_taskp = NULL;
 	if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
@@ -2024,7 +1941,7 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int core_enq_
 	add_nr_running(rq, 1);
 
 	if (SCX_HAS_OP(sch, runnable) && !task_on_rq_migrating(p))
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, runnable, rq, p, enq_flags);
+		SCX_CALL_OP_TASK(sch, runnable, rq, p, enq_flags);
 
 	if (enq_flags & SCX_ENQ_WAKEUP)
 		touch_core_sched(rq, p);
@@ -2141,11 +2058,11 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int core_deq_
 	 */
 	if (SCX_HAS_OP(sch, stopping) && task_current(rq, p)) {
 		update_curr_scx(rq);
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, stopping, rq, p, false);
+		SCX_CALL_OP_TASK(sch, stopping, rq, p, false);
 	}
 
 	if (SCX_HAS_OP(sch, quiescent) && !task_on_rq_migrating(p))
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, quiescent, rq, p, deq_flags);
+		SCX_CALL_OP_TASK(sch, quiescent, rq, p, deq_flags);
 
 	if (deq_flags & SCX_DEQ_SLEEP)
 		p->scx.flags |= SCX_TASK_DEQD_FOR_SLEEP;
@@ -2167,7 +2084,7 @@ static void yield_task_scx(struct rq *rq)
 	struct scx_sched *sch = scx_task_sched(p);
 
 	if (SCX_HAS_OP(sch, yield))
-		SCX_CALL_OP_2TASKS_RET(sch, SCX_KF_REST, yield, rq, p, NULL);
+		SCX_CALL_OP_2TASKS_RET(sch, yield, rq, p, NULL);
 	else
 		p->scx.slice = 0;
 }
@@ -2178,8 +2095,7 @@ static bool yield_to_task_scx(struct rq *rq, struct task_struct *to)
 	struct scx_sched *sch = scx_task_sched(from);
 
 	if (SCX_HAS_OP(sch, yield) && sch == scx_task_sched(to))
-		return SCX_CALL_OP_2TASKS_RET(sch, SCX_KF_REST, yield, rq,
-					      from, to);
+		return SCX_CALL_OP_2TASKS_RET(sch, yield, rq, from, to);
 	else
 		return false;
 }
@@ -2799,20 +2715,11 @@ scx_dispatch_sched(struct scx_sched *sch, struct rq *rq,
 		dspc->nr_tasks = 0;
 
 		if (nested) {
-			/*
-			 * If nested, don't update kf_mask as the originating
-			 * invocation would already have set it up.
-			 */
-			SCX_CALL_OP(sch, 0, dispatch, rq, cpu,
-				    prev_on_sch ? prev : NULL);
+			SCX_CALL_OP(sch, dispatch, rq, cpu, prev_on_sch ? prev : NULL);
 		} else {
-			/*
-			 * If not nested, stash @prev so that nested invocations
-			 * can access it.
-			 */
+			/* stash @prev so that nested invocations can access it */
 			rq->scx.sub_dispatch_prev = prev;
-			SCX_CALL_OP(sch, SCX_KF_DISPATCH, dispatch, rq, cpu,
-				    prev_on_sch ? prev : NULL);
+			SCX_CALL_OP(sch, dispatch, rq, cpu, prev_on_sch ? prev : NULL);
 			rq->scx.sub_dispatch_prev = NULL;
 		}
 
@@ -2871,7 +2778,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
 		 * emitted in switch_class().
 		 */
 		if (SCX_HAS_OP(sch, cpu_acquire))
-			SCX_CALL_OP(sch, SCX_KF_REST, cpu_acquire, rq, cpu, NULL);
+			SCX_CALL_OP(sch, cpu_acquire, rq, cpu, NULL);
 		rq->scx.cpu_released = false;
 	}
 
@@ -2950,7 +2857,7 @@ static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first)
 
 	/* see dequeue_task_scx() on why we skip when !QUEUED */
 	if (SCX_HAS_OP(sch, running) && (p->scx.flags & SCX_TASK_QUEUED))
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, running, rq, p);
+		SCX_CALL_OP_TASK(sch, running, rq, p);
 
 	clr_task_runnable(p, true);
 
@@ -3022,8 +2929,7 @@ static void switch_class(struct rq *rq, struct task_struct *next)
 				.task = next,
 			};
 
-			SCX_CALL_OP(sch, SCX_KF_CPU_RELEASE, cpu_release, rq,
-				    cpu_of(rq), &args);
+			SCX_CALL_OP(sch, cpu_release, rq, cpu_of(rq), &args);
 		}
 		rq->scx.cpu_released = true;
 	}
@@ -3041,7 +2947,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
 
 	/* see dequeue_task_scx() on why we skip when !QUEUED */
 	if (SCX_HAS_OP(sch, stopping) && (p->scx.flags & SCX_TASK_QUEUED))
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, stopping, rq, p, true);
+		SCX_CALL_OP_TASK(sch, stopping, rq, p, true);
 
 	if (p->scx.flags & SCX_TASK_QUEUED) {
 		set_task_runnable(rq, p);
@@ -3271,7 +3177,7 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
 	 */
 	if (sch_a == sch_b && SCX_HAS_OP(sch_a, core_sched_before) &&
 	    !scx_bypassing(sch_a, task_cpu(a)))
-		return SCX_CALL_OP_2TASKS_RET(sch_a, SCX_KF_REST, core_sched_before,
+		return SCX_CALL_OP_2TASKS_RET(sch_a, core_sched_before,
 					      NULL,
 					      (struct task_struct *)a,
 					      (struct task_struct *)b);
@@ -3308,10 +3214,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
 		*ddsp_taskp = p;
 
 		this_rq()->scx.in_select_cpu = true;
-		cpu = SCX_CALL_OP_TASK_RET(sch,
-					   SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU,
-					   select_cpu, NULL, p, prev_cpu,
-					   wake_flags);
+		cpu = SCX_CALL_OP_TASK_RET(sch, select_cpu, NULL, p, prev_cpu, wake_flags);
 		this_rq()->scx.in_select_cpu = false;
 		p->scx.selected_cpu = cpu;
 		*ddsp_taskp = NULL;
@@ -3361,8 +3264,7 @@ static void set_cpus_allowed_scx(struct task_struct *p,
 	 * designation pointless. Cast it away when calling the operation.
 	 */
 	if (SCX_HAS_OP(sch, set_cpumask))
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, set_cpumask, task_rq(p),
-				 p, (struct cpumask *)p->cpus_ptr);
+		SCX_CALL_OP_TASK(sch, set_cpumask, task_rq(p), p, (struct cpumask *)p->cpus_ptr);
 }
 
 static void handle_hotplug(struct rq *rq, bool online)
@@ -3384,9 +3286,9 @@ static void handle_hotplug(struct rq *rq, bool online)
 		scx_idle_update_selcpu_topology(&sch->ops);
 
 	if (online && SCX_HAS_OP(sch, cpu_online))
-		SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cpu_online, NULL, cpu);
+		SCX_CALL_OP(sch, cpu_online, NULL, cpu);
 	else if (!online && SCX_HAS_OP(sch, cpu_offline))
-		SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cpu_offline, NULL, cpu);
+		SCX_CALL_OP(sch, cpu_offline, NULL, cpu);
 	else
 		scx_exit(sch, SCX_EXIT_UNREG_KERN,
 			 SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG,
@@ -3504,7 +3406,7 @@ static void task_tick_scx(struct rq *rq, struct task_struct *curr, int queued)
 		curr->scx.slice = 0;
 		touch_core_sched(rq, curr);
 	} else if (SCX_HAS_OP(sch, tick)) {
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, tick, rq, curr);
+		SCX_CALL_OP_TASK(sch, tick, rq, curr);
 	}
 
 	if (!curr->scx.slice)
@@ -3580,8 +3482,7 @@ static int __scx_init_task(struct scx_sched *sch, struct task_struct *p, bool fo
 			.fork = fork,
 		};
 
-		ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, init_task, NULL,
-				      p, &args);
+		ret = SCX_CALL_OP_RET(sch, init_task, NULL, p, &args);
 		if (unlikely(ret)) {
 			ret = ops_sanitize_err(sch, "init_task", ret);
 			return ret;
@@ -3662,11 +3563,10 @@ static void __scx_enable_task(struct scx_sched *sch, struct task_struct *p)
 	p->scx.weight = sched_weight_to_cgroup(weight);
 
 	if (SCX_HAS_OP(sch, enable))
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, enable, rq, p);
+		SCX_CALL_OP_TASK(sch, enable, rq, p);
 
 	if (SCX_HAS_OP(sch, set_weight))
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, set_weight, rq,
-				 p, p->scx.weight);
+		SCX_CALL_OP_TASK(sch, set_weight, rq, p, p->scx.weight);
 }
 
 static void scx_enable_task(struct scx_sched *sch, struct task_struct *p)
@@ -3685,7 +3585,7 @@ static void scx_disable_task(struct scx_sched *sch, struct task_struct *p)
 	clear_direct_dispatch(p);
 
 	if (SCX_HAS_OP(sch, disable))
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, disable, rq, p);
+		SCX_CALL_OP_TASK(sch, disable, rq, p);
 	scx_set_task_state(p, SCX_TASK_READY);
 
 	/*
@@ -3723,8 +3623,7 @@ static void __scx_disable_and_exit_task(struct scx_sched *sch,
 	}
 
 	if (SCX_HAS_OP(sch, exit_task))
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, exit_task, task_rq(p),
-				 p, &args);
+		SCX_CALL_OP_TASK(sch, exit_task, task_rq(p), p, &args);
 }
 
 static void scx_disable_and_exit_task(struct scx_sched *sch,
@@ -3903,8 +3802,7 @@ static void reweight_task_scx(struct rq *rq, struct task_struct *p,
 
 	p->scx.weight = sched_weight_to_cgroup(scale_load_down(lw->weight));
 	if (SCX_HAS_OP(sch, set_weight))
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, set_weight, rq,
-				 p, p->scx.weight);
+		SCX_CALL_OP_TASK(sch, set_weight, rq, p, p->scx.weight);
 }
 
 static void prio_changed_scx(struct rq *rq, struct task_struct *p, u64 oldprio)
@@ -3925,8 +3823,7 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p)
 	 * different scheduler class. Keep the BPF scheduler up-to-date.
 	 */
 	if (SCX_HAS_OP(sch, set_cpumask))
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, set_cpumask, rq,
-				 p, (struct cpumask *)p->cpus_ptr);
+		SCX_CALL_OP_TASK(sch, set_cpumask, rq, p, (struct cpumask *)p->cpus_ptr);
 }
 
 static void switched_from_scx(struct rq *rq, struct task_struct *p)
@@ -4309,7 +4206,7 @@ int scx_tg_online(struct task_group *tg)
 				  .bw_quota_us = tg->scx.bw_quota_us,
 				  .bw_burst_us = tg->scx.bw_burst_us };
 
-			ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, cgroup_init,
+			ret = SCX_CALL_OP_RET(sch, cgroup_init,
 					      NULL, tg->css.cgroup, &args);
 			if (ret)
 				ret = ops_sanitize_err(sch, "cgroup_init", ret);
@@ -4331,8 +4228,7 @@ void scx_tg_offline(struct task_group *tg)
 
 	if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_exit) &&
 	    (tg->scx.flags & SCX_TG_INITED))
-		SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_exit, NULL,
-			    tg->css.cgroup);
+		SCX_CALL_OP(sch, cgroup_exit, NULL, tg->css.cgroup);
 	tg->scx.flags &= ~(SCX_TG_ONLINE | SCX_TG_INITED);
 }
 
@@ -4361,8 +4257,7 @@ int scx_cgroup_can_attach(struct cgroup_taskset *tset)
 			continue;
 
 		if (SCX_HAS_OP(sch, cgroup_prep_move)) {
-			ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED,
-					      cgroup_prep_move, NULL,
+			ret = SCX_CALL_OP_RET(sch, cgroup_prep_move, NULL,
 					      p, from, css->cgroup);
 			if (ret)
 				goto err;
@@ -4377,7 +4272,7 @@ err:
 	cgroup_taskset_for_each(p, css, tset) {
 		if (SCX_HAS_OP(sch, cgroup_cancel_move) &&
 		    p->scx.cgrp_moving_from)
-			SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_cancel_move, NULL,
+			SCX_CALL_OP(sch, cgroup_cancel_move, NULL,
 				    p, p->scx.cgrp_moving_from, css->cgroup);
 		p->scx.cgrp_moving_from = NULL;
 	}
@@ -4398,7 +4293,7 @@ void scx_cgroup_move_task(struct task_struct *p)
 	 */
 	if (SCX_HAS_OP(sch, cgroup_move) &&
 	    !WARN_ON_ONCE(!p->scx.cgrp_moving_from))
-		SCX_CALL_OP_TASK(sch, SCX_KF_REST, cgroup_move, task_rq(p),
+		SCX_CALL_OP_TASK(sch, cgroup_move, task_rq(p),
 				 p, p->scx.cgrp_moving_from,
 				 tg_cgrp(task_group(p)));
 	p->scx.cgrp_moving_from = NULL;
@@ -4416,7 +4311,7 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset)
 	cgroup_taskset_for_each(p, css, tset) {
 		if (SCX_HAS_OP(sch, cgroup_cancel_move) &&
 		    p->scx.cgrp_moving_from)
-			SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_cancel_move, NULL,
+			SCX_CALL_OP(sch, cgroup_cancel_move, NULL,
 				    p, p->scx.cgrp_moving_from, css->cgroup);
 		p->scx.cgrp_moving_from = NULL;
 	}
@@ -4430,8 +4325,7 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight)
 
 	if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_weight) &&
 	    tg->scx.weight != weight)
-		SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL,
-			    tg_cgrp(tg), weight);
+		SCX_CALL_OP(sch, cgroup_set_weight, NULL, tg_cgrp(tg), weight);
 
 	tg->scx.weight = weight;
 
@@ -4445,8 +4339,7 @@ void scx_group_set_idle(struct task_group *tg, bool idle)
 	percpu_down_read(&scx_cgroup_ops_rwsem);
 
 	if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_idle))
-		SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_idle, NULL,
-			    tg_cgrp(tg), idle);
+		SCX_CALL_OP(sch, cgroup_set_idle, NULL, tg_cgrp(tg), idle);
 
 	/* Update the task group's idle state */
 	tg->scx.idle = idle;
@@ -4465,7 +4358,7 @@ void scx_group_set_bandwidth(struct task_group *tg,
 	    (tg->scx.bw_period_us != period_us ||
 	     tg->scx.bw_quota_us != quota_us ||
 	     tg->scx.bw_burst_us != burst_us))
-		SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_bandwidth, NULL,
+		SCX_CALL_OP(sch, cgroup_set_bandwidth, NULL,
 			    tg_cgrp(tg), period_us, quota_us, burst_us);
 
 	tg->scx.bw_period_us = period_us;
@@ -4690,8 +4583,7 @@ static void scx_cgroup_exit(struct scx_sched *sch)
 		if (!sch->ops.cgroup_exit)
 			continue;
 
-		SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_exit, NULL,
-			    css->cgroup);
+		SCX_CALL_OP(sch, cgroup_exit, NULL, css->cgroup);
 	}
 }
 
@@ -4722,7 +4614,7 @@ static int scx_cgroup_init(struct scx_sched *sch)
 			continue;
 		}
 
-		ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, cgroup_init, NULL,
+		ret = SCX_CALL_OP_RET(sch, cgroup_init, NULL,
 				      css->cgroup, &args);
 		if (ret) {
 			scx_error(sch, "ops.cgroup_init() failed (%d)", ret);
@@ -5795,12 +5687,12 @@ static void scx_sub_disable(struct scx_sched *sch)
 			.ops = &sch->ops,
 			.cgroup_path = sch->cgrp_path,
 		};
-		SCX_CALL_OP(parent, SCX_KF_UNLOCKED, sub_detach, NULL,
+		SCX_CALL_OP(parent, sub_detach, NULL,
 			    &sub_detach_args);
 	}
 
 	if (sch->ops.exit)
-		SCX_CALL_OP(sch, SCX_KF_UNLOCKED, exit, NULL, sch->exit_info);
+		SCX_CALL_OP(sch, exit, NULL, sch->exit_info);
 	kobject_del(&sch->kobj);
 }
 #else	/* CONFIG_EXT_SUB_SCHED */
@@ -5915,7 +5807,7 @@ static void scx_root_disable(struct scx_sched *sch)
 	}
 
 	if (sch->ops.exit)
-		SCX_CALL_OP(sch, SCX_KF_UNLOCKED, exit, NULL, ei);
+		SCX_CALL_OP(sch, exit, NULL, ei);
 
 	scx_unlink_sched(sch);
 
@@ -6178,7 +6070,7 @@ static void scx_dump_task(struct scx_sched *sch,
 
 	if (SCX_HAS_OP(sch, dump_task)) {
 		ops_dump_init(s, "    ");
-		SCX_CALL_OP(sch, SCX_KF_REST, dump_task, NULL, dctx, p);
+		SCX_CALL_OP(sch, dump_task, NULL, dctx, p);
 		ops_dump_exit();
 	}
 
@@ -6242,7 +6134,7 @@ static void scx_dump_state(struct scx_sched *sch, struct scx_exit_info *ei,
 
 	if (SCX_HAS_OP(sch, dump)) {
 		ops_dump_init(&s, "");
-		SCX_CALL_OP(sch, SCX_KF_UNLOCKED, dump, NULL, &dctx);
+		SCX_CALL_OP(sch, dump, NULL, &dctx);
 		ops_dump_exit();
 	}
 
@@ -6302,7 +6194,7 @@ static void scx_dump_state(struct scx_sched *sch, struct scx_exit_info *ei,
 		used = seq_buf_used(&ns);
 		if (SCX_HAS_OP(sch, dump_cpu)) {
 			ops_dump_init(&ns, "  ");
-			SCX_CALL_OP(sch, SCX_KF_REST, dump_cpu, NULL,
+			SCX_CALL_OP(sch, dump_cpu, NULL,
 				    &dctx, cpu, idle);
 			ops_dump_exit();
 		}
@@ -6748,7 +6640,7 @@ static void scx_root_enable_workfn(struct kthread_work *work)
 	scx_idle_enable(ops);
 
 	if (sch->ops.init) {
-		ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, init, NULL);
+		ret = SCX_CALL_OP_RET(sch, init, NULL);
 		if (ret) {
 			ret = ops_sanitize_err(sch, "init", ret);
 			cpus_read_unlock();
@@ -7020,7 +6912,7 @@ static void scx_sub_enable_workfn(struct kthread_work *work)
 	}
 
 	if (sch->ops.init) {
-		ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, init, NULL);
+		ret = SCX_CALL_OP_RET(sch, init, NULL);
 		if (ret) {
 			ret = ops_sanitize_err(sch, "init", ret);
 			scx_error(sch, "ops.init() failed (%d)", ret);
@@ -7037,7 +6929,7 @@ static void scx_sub_enable_workfn(struct kthread_work *work)
 		.cgroup_path = sch->cgrp_path,
 	};
 
-	ret = SCX_CALL_OP_RET(parent, SCX_KF_UNLOCKED, sub_attach, NULL,
+	ret = SCX_CALL_OP_RET(parent, sub_attach, NULL,
 			      &sub_attach_args);
 	if (ret) {
 		ret = ops_sanitize_err(sch, "sub_attach", ret);
@@ -7891,9 +7783,6 @@ static bool scx_vet_enq_flags(struct scx_sched *sch, u64 dsq_id, u64 *enq_flags)
 static bool scx_dsq_insert_preamble(struct scx_sched *sch, struct task_struct *p,
 				    u64 dsq_id, u64 *enq_flags)
 {
-	if (!scx_kf_allowed(sch, SCX_KF_ENQUEUE | SCX_KF_DISPATCH))
-		return false;
-
 	lockdep_assert_irqs_disabled();
 
 	if (unlikely(!p)) {
@@ -8146,10 +8035,6 @@ static bool scx_dsq_move(struct bpf_iter_scx_dsq_kern *kit,
 	bool in_balance;
 	unsigned long flags;
 
-	if ((scx_locked_rq() || this_rq()->scx.in_select_cpu) &&
-	    !scx_kf_allowed(sch, SCX_KF_DISPATCH))
-		return false;
-
 	if (!scx_vet_enq_flags(sch, dsq_id, &enq_flags))
 		return false;
 
@@ -8244,9 +8129,6 @@ __bpf_kfunc u32 scx_bpf_dispatch_nr_slots(const struct bpf_prog_aux *aux)
 	if (unlikely(!sch))
 		return 0;
 
-	if (!scx_kf_allowed(sch, SCX_KF_DISPATCH))
-		return 0;
-
 	return sch->dsp_max_batch - __this_cpu_read(sch->pcpu->dsp_ctx.cursor);
 }
 
@@ -8268,9 +8150,6 @@ __bpf_kfunc void scx_bpf_dispatch_cancel(const struct bpf_prog_aux *aux)
 	if (unlikely(!sch))
 		return;
 
-	if (!scx_kf_allowed(sch, SCX_KF_DISPATCH))
-		return;
-
 	dspc = &this_cpu_ptr(sch->pcpu)->dsp_ctx;
 
 	if (dspc->cursor > 0)
@@ -8317,9 +8196,6 @@ __bpf_kfunc bool scx_bpf_dsq_move_to_local___v2(u64 dsq_id, u64 enq_flags,
 	if (unlikely(!sch))
 		return false;
 
-	if (!scx_kf_allowed(sch, SCX_KF_DISPATCH))
-		return false;
-
 	if (!scx_vet_enq_flags(sch, SCX_DSQ_LOCAL, &enq_flags))
 		return false;
 
@@ -8473,9 +8349,6 @@ __bpf_kfunc bool scx_bpf_sub_dispatch(u64 cgroup_id, const struct bpf_prog_aux *
 	if (unlikely(!parent))
 		return false;
 
-	if (!scx_kf_allowed(parent, SCX_KF_DISPATCH))
-		return false;
-
 	child = scx_find_sub_sched(cgroup_id);
 
 	if (unlikely(!child))
@@ -8535,9 +8408,6 @@ __bpf_kfunc u32 scx_bpf_reenqueue_local(const struct bpf_prog_aux *aux)
 	if (unlikely(!sch))
 		return 0;
 
-	if (!scx_kf_allowed(sch, SCX_KF_CPU_RELEASE))
-		return 0;
-
 	rq = cpu_rq(smp_processor_id());
 	lockdep_assert_rq_held(rq);
 
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index ec49e0c9892e..443d12a3df67 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -789,7 +789,7 @@ void __scx_update_idle(struct rq *rq, bool idle, bool do_notify)
 	 */
 	if (SCX_HAS_OP(sch, update_idle) && do_notify &&
 	    !scx_bypassing(sch, cpu_of(rq)))
-		SCX_CALL_OP(sch, SCX_KF_REST, update_idle, rq, cpu_of(rq), idle);
+		SCX_CALL_OP(sch, update_idle, rq, cpu_of(rq), idle);
 }
 
 static void reset_idle_masks(struct sched_ext_ops *ops)
@@ -937,8 +937,6 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p,
 	} else if (!scx_locked_rq()) {
 		raw_spin_lock_irqsave(&p->pi_lock, irq_flags);
 		we_locked = true;
-	} else if (!scx_kf_allowed(sch, SCX_KF_ENQUEUE)) {
-		return -EPERM;
 	}
 
 	/*
-- 
cgit v1.2.3


From 33dfc521c20d02375c8696dcace04037d2a865e6 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Fri, 10 Apr 2026 13:55:52 -0700
Subject: bpf: share several utility functions as internal API

Namely:
- bpf_subprog_is_global
- bpf_vlog_alignment

Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260410-patch-set-v4-1-5d4eecb343db@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |  2 ++
 kernel/bpf/log.c             |  4 ++--
 kernel/bpf/verifier.c        | 10 +++++-----
 3 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 36bfd96d4563..15f7f9f35be9 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -1121,12 +1121,14 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
 			  u32 frameno, bool print_all);
 void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate,
 		      u32 frameno);
+u32 bpf_vlog_alignment(u32 pos);
 
 struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off);
 int bpf_jmp_offset(struct bpf_insn *insn);
 struct bpf_iarray *bpf_insn_successors(struct bpf_verifier_env *env, u32 idx);
 void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask);
 bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx);
+bool bpf_subprog_is_global(const struct bpf_verifier_env *env, int subprog);
 
 int bpf_find_subprog(struct bpf_verifier_env *env, int off);
 int bpf_compute_const_regs(struct bpf_verifier_env *env);
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 6fd030fd6eeb..803f21e61d92 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -806,7 +806,7 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
 		mark_verifier_state_clean(env);
 }
 
-static inline u32 vlog_alignment(u32 pos)
+u32 bpf_vlog_alignment(u32 pos)
 {
 	return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
 			BPF_LOG_MIN_ALIGNMENT) - pos - 1;
@@ -818,7 +818,7 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st
 	if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) {
 		/* remove new line character */
 		bpf_vlog_reset(&env->log, env->prev_log_pos - 1);
-		verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_pos), ' ');
+		verbose(env, "%*c;", bpf_vlog_alignment(env->prev_insn_print_pos), ' ');
 	} else {
 		verbose(env, "%d:", env->insn_idx);
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9c1135d373e2..c422f1142b99 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -423,7 +423,7 @@ static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
 	return rec;
 }
 
-static bool subprog_is_global(const struct bpf_verifier_env *env, int subprog)
+bool bpf_subprog_is_global(const struct bpf_verifier_env *env, int subprog)
 {
 	struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
 
@@ -4631,7 +4631,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
 			if (subprog < 0)
 				return -EFAULT;
 
-			if (subprog_is_global(env, subprog)) {
+			if (bpf_subprog_is_global(env, subprog)) {
 				/* check that jump history doesn't have any
 				 * extra instructions from subprog; the next
 				 * instruction after call to global subprog
@@ -7032,7 +7032,7 @@ continue_func:
 		if (subprog[idx].has_tail_call)
 			tail_call_reachable = true;
 
-		frame = subprog_is_global(env, idx) ? 0 : frame + 1;
+		frame = bpf_subprog_is_global(env, idx) ? 0 : frame + 1;
 		if (frame >= MAX_CALL_FRAMES) {
 			verbose(env, "the call stack of %d frames is too deep !\n",
 				frame);
@@ -11107,7 +11107,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	err = btf_check_subprog_call(env, subprog, caller->regs);
 	if (err == -EFAULT)
 		return err;
-	if (subprog_is_global(env, subprog)) {
+	if (bpf_subprog_is_global(env, subprog)) {
 		const char *sub_name = subprog_name(env, subprog);
 
 		if (env->cur_state->active_locks) {
@@ -25299,7 +25299,7 @@ static int do_check_subprogs(struct bpf_verifier_env *env)
 again:
 	new_cnt = 0;
 	for (i = 1; i < env->subprog_cnt; i++) {
-		if (!subprog_is_global(env, i))
+		if (!bpf_subprog_is_global(env, i))
 			continue;
 
 		sub_aux = subprog_aux(env, i);
-- 
cgit v1.2.3


From cf3ee1ecf3466ddb978a58df9d5b638e7dff673d Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Fri, 10 Apr 2026 13:55:53 -0700
Subject: bpf: save subprogram name in bpf_subprog_info

Subprogram name can be computed from function info and BTF, but it is
convenient to have the name readily available for logging purposes.
Update comment saying that bpf_subprog_info->start has to be the first
field, this is no longer true, relevant sites access .start field
by it's name.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260410-patch-set-v4-2-5d4eecb343db@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 2 +-
 kernel/bpf/verifier.c        | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 15f7f9f35be9..cec6054d6333 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -664,7 +664,7 @@ enum priv_stack_mode {
 };
 
 struct bpf_subprog_info {
-	/* 'start' has to be the first field otherwise find_subprog() won't work */
+	const char *name; /* name extracted from BTF */
 	u32 start; /* insn idx of function entry point */
 	u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
 	u32 postorder_start; /* The idx to the env->cfg.insn_postorder */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c422f1142b99..de63e5b17c92 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -19797,6 +19797,7 @@ static int check_btf_func(struct bpf_verifier_env *env,
 			goto err_free;
 		}
 
+		env->subprog_info[i].name = btf_name_by_offset(btf, type->name_off);
 		bpfptr_add(&urecord, urec_size);
 	}
 
-- 
cgit v1.2.3


From 2ad45b414b8779ba5c50f746fd767926cccde729 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 10 Apr 2026 13:55:54 -0700
Subject: bpf: Add spis_*() helpers for 4-byte stack slot bitmasks

Add helper functions for manipulating u64[2] bitmasks that represent
4-byte stack slot liveness. The 512-byte BPF stack is divided into
128 4-byte slots, requiring 128 bits (two u64s) to track.

These will be used by the static stack liveness analysis in the
next commit.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260410-patch-set-v4-3-5d4eecb343db@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 67 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index cec6054d6333..f34e7a074a20 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -224,6 +224,73 @@ enum bpf_stack_slot_type {
 
 #define BPF_REG_SIZE 8	/* size of eBPF register in bytes */
 
+/* 4-byte stack slot granularity for liveness analysis */
+#define BPF_HALF_REG_SIZE	4
+#define STACK_SLOTS		(MAX_BPF_STACK / BPF_HALF_REG_SIZE)	/* 128 */
+
+typedef struct {
+	u64 v[2];
+} spis_t;
+
+#define SPIS_ZERO	((spis_t){})
+#define SPIS_ALL	((spis_t){{ U64_MAX, U64_MAX }})
+
+static inline bool spis_is_zero(spis_t s)
+{
+	return s.v[0] == 0 && s.v[1] == 0;
+}
+
+static inline bool spis_equal(spis_t a, spis_t b)
+{
+	return a.v[0] == b.v[0] && a.v[1] == b.v[1];
+}
+
+static inline spis_t spis_or(spis_t a, spis_t b)
+{
+	return (spis_t){{ a.v[0] | b.v[0], a.v[1] | b.v[1] }};
+}
+
+static inline spis_t spis_and(spis_t a, spis_t b)
+{
+	return (spis_t){{ a.v[0] & b.v[0], a.v[1] & b.v[1] }};
+}
+
+static inline spis_t spis_xor(spis_t a, spis_t b)
+{
+	return (spis_t){{ a.v[0] ^ b.v[0], a.v[1] ^ b.v[1] }};
+}
+
+static inline spis_t spis_not(spis_t s)
+{
+	return (spis_t){{ ~s.v[0], ~s.v[1] }};
+}
+
+static inline bool spis_test_bit(spis_t s, u32 slot)
+{
+	return s.v[slot / 64] & BIT_ULL(slot % 64);
+}
+
+static inline void spis_or_range(spis_t *mask, u32 lo, u32 hi)
+{
+	u32 w;
+
+	for (w = lo; w <= hi && w < STACK_SLOTS; w++)
+		mask->v[w / 64] |= BIT_ULL(w % 64);
+}
+
+static inline spis_t spis_one_bit(u32 slot)
+{
+	if (slot < 64)
+		return (spis_t){{ BIT_ULL(slot), 0 }};
+	else
+		return (spis_t){{ 0, BIT_ULL(slot - 64) }};
+}
+
+static inline spis_t spis_single_slot(u32 spi)
+{
+	return spis_or(spis_one_bit(spi * 2), spis_one_bit(spi * 2 + 1));
+}
+
 #define BPF_REGMASK_ARGS ((1 << BPF_REG_1) | (1 << BPF_REG_2) | \
 			  (1 << BPF_REG_3) | (1 << BPF_REG_4) | \
 			  (1 << BPF_REG_5))
-- 
cgit v1.2.3


From 7ca5f68cda073a6c4aa6135e98a27c7b2a731cdd Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Fri, 10 Apr 2026 13:55:55 -0700
Subject: bpf: make liveness.c track stack with 4-byte granularity

Convert liveness bitmask type from u64 to spis_t, doubling the number
of trackable stack slots from 64 to 128 to support 4-byte granularity.

Each 8-byte SPI now maps to two consecutive 4-byte sub-slots in the
bitmask: spi*2 half and spi*2+1 half. In verifier.c,
check_stack_write_fixed_off() now reports 4-byte aligned writes of
4-byte writes as half-slot marks and 8-byte aligned 8-byte writes as
two slots. Similar logic applied in check_stack_read_fixed_off().

Queries (is_live_before) are not yet migrated to half-slot
granularity.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260410-patch-set-v4-4-5d4eecb343db@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |   4 +-
 kernel/bpf/liveness.c        | 122 ++++++++++++++++++++++++++++---------------
 kernel/bpf/verifier.c        |  55 +++++++++++--------
 3 files changed, 115 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index f34e7a074a20..7b31d8024c61 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -1217,8 +1217,8 @@ s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env,
 int bpf_stack_liveness_init(struct bpf_verifier_env *env);
 void bpf_stack_liveness_free(struct bpf_verifier_env *env);
 int bpf_update_live_stack(struct bpf_verifier_env *env);
-int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frameno, u32 insn_idx, u64 mask);
-void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frameno, u64 mask);
+int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frameno, u32 insn_idx, spis_t mask);
+void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frameno, spis_t mask);
 int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx);
 int bpf_commit_stack_write_marks(struct bpf_verifier_env *env);
 int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st);
diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c
index 998986853c61..9e36ea5f2eec 100644
--- a/kernel/bpf/liveness.c
+++ b/kernel/bpf/liveness.c
@@ -93,10 +93,10 @@ struct callchain {
 };
 
 struct per_frame_masks {
-	u64 may_read;		/* stack slots that may be read by this instruction */
-	u64 must_write;		/* stack slots written by this instruction */
-	u64 must_write_acc;	/* stack slots written by this instruction and its successors */
-	u64 live_before;	/* stack slots that may be read by this insn and its successors */
+	spis_t may_read;	/* stack slots that may be read by this instruction */
+	spis_t must_write;	/* stack slots written by this instruction */
+	spis_t must_write_acc;	/* stack slots written by this instruction and its successors */
+	spis_t live_before;	/* stack slots that may be read by this insn and its successors */
 };
 
 /*
@@ -131,7 +131,7 @@ struct bpf_liveness {
 	 * Below fields are used to accumulate stack write marks for instruction at
 	 * @write_insn_idx before submitting the marks to @cur_instance.
 	 */
-	u64 write_masks_acc[MAX_CALL_FRAMES];
+	spis_t write_masks_acc[MAX_CALL_FRAMES];
 	u32 write_insn_idx;
 };
 
@@ -299,23 +299,24 @@ static int ensure_cur_instance(struct bpf_verifier_env *env)
 
 /* Accumulate may_read masks for @frame at @insn_idx */
 static int mark_stack_read(struct bpf_verifier_env *env,
-			   struct func_instance *instance, u32 frame, u32 insn_idx, u64 mask)
+			   struct func_instance *instance, u32 frame, u32 insn_idx, spis_t mask)
 {
 	struct per_frame_masks *masks;
-	u64 new_may_read;
+	spis_t new_may_read;
 
 	masks = alloc_frame_masks(env, instance, frame, insn_idx);
 	if (IS_ERR(masks))
 		return PTR_ERR(masks);
-	new_may_read = masks->may_read | mask;
-	if (new_may_read != masks->may_read &&
-	    ((new_may_read | masks->live_before) != masks->live_before))
+	new_may_read = spis_or(masks->may_read, mask);
+	if (!spis_equal(new_may_read, masks->may_read) &&
+	    !spis_equal(spis_or(new_may_read, masks->live_before),
+				masks->live_before))
 		instance->updated = true;
-	masks->may_read |= mask;
+	masks->may_read = spis_or(masks->may_read, mask);
 	return 0;
 }
 
-int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frame, u32 insn_idx, u64 mask)
+int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frame, u32 insn_idx, spis_t mask)
 {
 	int err;
 
@@ -332,7 +333,7 @@ static void reset_stack_write_marks(struct bpf_verifier_env *env,
 
 	liveness->write_insn_idx = insn_idx;
 	for (i = 0; i <= instance->callchain.curframe; i++)
-		liveness->write_masks_acc[i] = 0;
+		liveness->write_masks_acc[i] = SPIS_ZERO;
 }
 
 int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx)
@@ -348,18 +349,18 @@ int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx)
 	return 0;
 }
 
-void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frame, u64 mask)
+void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frame, spis_t mask)
 {
-	env->liveness->write_masks_acc[frame] |= mask;
+	env->liveness->write_masks_acc[frame] = spis_or(env->liveness->write_masks_acc[frame], mask);
 }
 
 static int commit_stack_write_marks(struct bpf_verifier_env *env,
 				    struct func_instance *instance)
 {
 	struct bpf_liveness *liveness = env->liveness;
-	u32 idx, frame, curframe, old_must_write;
+	u32 idx, frame, curframe;
 	struct per_frame_masks *masks;
-	u64 mask;
+	spis_t mask, old_must_write, dropped;
 
 	if (!instance)
 		return 0;
@@ -369,7 +370,7 @@ static int commit_stack_write_marks(struct bpf_verifier_env *env,
 	for (frame = 0; frame <= curframe; frame++) {
 		mask = liveness->write_masks_acc[frame];
 		/* avoid allocating frames for zero masks */
-		if (mask == 0 && !instance->must_write_set[idx])
+		if (spis_is_zero(mask) && !instance->must_write_set[idx])
 			continue;
 		masks = alloc_frame_masks(env, instance, frame, liveness->write_insn_idx);
 		if (IS_ERR(masks))
@@ -380,12 +381,14 @@ static int commit_stack_write_marks(struct bpf_verifier_env *env,
 		 * to @mask. Otherwise take intersection with the previous value.
 		 */
 		if (instance->must_write_set[idx])
-			mask &= old_must_write;
-		if (old_must_write != mask) {
+			mask = spis_and(mask, old_must_write);
+		if (!spis_equal(old_must_write, mask)) {
 			masks->must_write = mask;
 			instance->updated = true;
 		}
-		if (old_must_write & ~mask)
+		/* dropped = old_must_write & ~mask */
+		dropped = spis_and(old_must_write, spis_not(mask));
+		if (!spis_is_zero(dropped))
 			instance->must_write_dropped = true;
 	}
 	instance->must_write_set[idx] = true;
@@ -415,22 +418,52 @@ static char *fmt_callchain(struct bpf_verifier_env *env, struct callchain *callc
 	return env->tmp_str_buf;
 }
 
+/*
+ * When both halves of an 8-byte SPI are set, print as "-8","-16",...
+ * When only one half is set, print as "-4h","-8h",...
+ */
+static void bpf_fmt_spis_mask(char *buf, ssize_t buf_sz, spis_t spis)
+{
+	bool first = true;
+	int spi, n;
+
+	buf[0] = '\0';
+
+	for (spi = 0; spi < STACK_SLOTS / 2 && buf_sz > 0; spi++) {
+		bool lo = spis_test_bit(spis, spi * 2);
+		bool hi = spis_test_bit(spis, spi * 2 + 1);
+
+		if (!lo && !hi)
+			continue;
+		n = snprintf(buf, buf_sz, "%s%d%s",
+			     first ? "" : ",",
+			     -(spi + 1) * BPF_REG_SIZE + (lo && !hi ? BPF_HALF_REG_SIZE : 0),
+			     lo && hi ? "" : "h");
+		first = false;
+		buf += n;
+		buf_sz -= n;
+	}
+}
+
 static void log_mask_change(struct bpf_verifier_env *env, struct callchain *callchain,
-			    char *pfx, u32 frame, u32 insn_idx, u64 old, u64 new)
+			    char *pfx, u32 frame, u32 insn_idx,
+			    spis_t old, spis_t new)
 {
-	u64 changed_bits = old ^ new;
-	u64 new_ones = new & changed_bits;
-	u64 new_zeros = ~new & changed_bits;
+	spis_t changed_bits, new_ones, new_zeros;
+
+	changed_bits = spis_xor(old, new);
+	new_ones = spis_and(new, changed_bits);
+	new_zeros = spis_and(spis_not(new), changed_bits);
 
-	if (!changed_bits)
+	if (spis_is_zero(changed_bits))
 		return;
 	bpf_log(&env->log, "%s frame %d insn %d ", fmt_callchain(env, callchain), frame, insn_idx);
-	if (new_ones) {
-		bpf_fmt_stack_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_ones);
+	if (!spis_is_zero(new_ones)) {
+		bpf_fmt_spis_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_ones);
 		bpf_log(&env->log, "+%s %s ", pfx, env->tmp_str_buf);
 	}
-	if (new_zeros) {
-		bpf_fmt_stack_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_zeros);
+	if (!spis_is_zero(new_zeros)) {
+		bpf_fmt_spis_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_zeros);
 		bpf_log(&env->log, "-%s %s", pfx, env->tmp_str_buf);
 	}
 	bpf_log(&env->log, "\n");
@@ -562,7 +595,7 @@ static inline bool update_insn(struct bpf_verifier_env *env,
 			       struct func_instance *instance, u32 frame, u32 insn_idx)
 {
 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
-	u64 new_before, new_after, must_write_acc;
+	spis_t new_before, new_after, must_write_acc;
 	struct per_frame_masks *insn, *succ_insn;
 	struct bpf_iarray *succ;
 	u32 s;
@@ -574,28 +607,30 @@ static inline bool update_insn(struct bpf_verifier_env *env,
 
 	changed = false;
 	insn = get_frame_masks(instance, frame, insn_idx);
-	new_before = 0;
-	new_after = 0;
+	new_before = SPIS_ZERO;
+	new_after = SPIS_ZERO;
 	/*
 	 * New "must_write_acc" is an intersection of all "must_write_acc"
 	 * of successors plus all "must_write" slots of instruction itself.
 	 */
-	must_write_acc = U64_MAX;
+	must_write_acc = SPIS_ALL;
 	for (s = 0; s < succ->cnt; ++s) {
 		succ_insn = get_frame_masks(instance, frame, succ->items[s]);
-		new_after |= succ_insn->live_before;
-		must_write_acc &= succ_insn->must_write_acc;
+		new_after = spis_or(new_after, succ_insn->live_before);
+		must_write_acc = spis_and(must_write_acc, succ_insn->must_write_acc);
 	}
-	must_write_acc |= insn->must_write;
+	must_write_acc = spis_or(must_write_acc, insn->must_write);
 	/*
 	 * New "live_before" is a union of all "live_before" of successors
 	 * minus slots written by instruction plus slots read by instruction.
+	 * new_before = (new_after & ~insn->must_write) | insn->may_read
 	 */
-	new_before = (new_after & ~insn->must_write) | insn->may_read;
-	changed |= new_before != insn->live_before;
-	changed |= must_write_acc != insn->must_write_acc;
+	new_before = spis_or(spis_and(new_after, spis_not(insn->must_write)),
+			     insn->may_read);
+	changed |= !spis_equal(new_before, insn->live_before);
+	changed |= !spis_equal(must_write_acc, insn->must_write_acc);
 	if (unlikely(env->log.level & BPF_LOG_LEVEL2) &&
-	    (insn->may_read || insn->must_write ||
+	    (!spis_is_zero(insn->may_read) || !spis_is_zero(insn->must_write) ||
 	     insn_idx == callchain_subprog_start(&instance->callchain) ||
 	     aux[insn_idx].prune_point)) {
 		log_mask_change(env, &instance->callchain, "live",
@@ -631,7 +666,7 @@ static int update_instance(struct bpf_verifier_env *env, struct func_instance *i
 
 			for (i = 0; i < instance->insn_cnt; i++) {
 				insn = get_frame_masks(instance, frame, this_subprog_start + i);
-				insn->must_write_acc = 0;
+				insn->must_write_acc = SPIS_ZERO;
 			}
 		}
 	}
@@ -702,7 +737,8 @@ static bool is_live_before(struct func_instance *instance, u32 insn_idx, u32 fra
 	struct per_frame_masks *masks;
 
 	masks = get_frame_masks(instance, frameno, insn_idx);
-	return masks && (masks->live_before & BIT(spi));
+	return masks && (spis_test_bit(masks->live_before, spi * 2) ||
+			 spis_test_bit(masks->live_before, spi * 2 + 1));
 }
 
 int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index de63e5b17c92..fe17114b643b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -830,7 +830,8 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
 		state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
 	}
 
-	bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi));
+	bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi));
+	bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi - 1));
 
 	return 0;
 }
@@ -847,7 +848,8 @@ static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_stat
 	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
 	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
 
-	bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi));
+	bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi));
+	bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi - 1));
 }
 
 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
@@ -984,7 +986,8 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
 	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
 	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
 
-	bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi));
+	bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi));
+	bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi - 1));
 
 	return 0;
 }
@@ -1111,7 +1114,7 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env,
 		for (j = 0; j < BPF_REG_SIZE; j++)
 			slot->slot_type[j] = STACK_ITER;
 
-		bpf_mark_stack_write(env, state->frameno, BIT(spi - i));
+		bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi - i));
 		mark_stack_slot_scratched(env, spi - i);
 	}
 
@@ -1140,7 +1143,7 @@ static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
 		for (j = 0; j < BPF_REG_SIZE; j++)
 			slot->slot_type[j] = STACK_INVALID;
 
-		bpf_mark_stack_write(env, state->frameno, BIT(spi - i));
+		bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi - i));
 		mark_stack_slot_scratched(env, spi - i);
 	}
 
@@ -1230,7 +1233,7 @@ static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
 	slot = &state->stack[spi];
 	st = &slot->spilled_ptr;
 
-	bpf_mark_stack_write(env, reg->frameno, BIT(spi));
+	bpf_mark_stack_write(env, reg->frameno, spis_single_slot(spi));
 	__mark_reg_known_zero(st);
 	st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
 	st->ref_obj_id = id;
@@ -1286,7 +1289,7 @@ static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_r
 
 	__mark_reg_not_init(env, st);
 
-	bpf_mark_stack_write(env, reg->frameno, BIT(spi));
+	bpf_mark_stack_write(env, reg->frameno, spis_single_slot(spi));
 
 	for (i = 0; i < BPF_REG_SIZE; i++)
 		slot->slot_type[i] = STACK_INVALID;
@@ -3867,7 +3870,8 @@ static int mark_stack_slot_obj_read(struct bpf_verifier_env *env, struct bpf_reg
 	int err, i;
 
 	for (i = 0; i < nr_slots; i++) {
-		err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx, BIT(spi - i));
+		err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx,
+					  spis_single_slot(spi - i));
 		if (err)
 			return err;
 		mark_stack_slot_scratched(env, spi - i);
@@ -5422,17 +5426,15 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 	if (err)
 		return err;
 
-	if (!(off % BPF_REG_SIZE) && size == BPF_REG_SIZE) {
-		/* only mark the slot as written if all 8 bytes were written
-		 * otherwise read propagation may incorrectly stop too soon
-		 * when stack slots are partially written.
-		 * This heuristic means that read propagation will be
-		 * conservative, since it will add reg_live_read marks
-		 * to stack slots all the way to first state when programs
-		 * writes+reads less than 8 bytes
-		 */
-		bpf_mark_stack_write(env, state->frameno, BIT(spi));
-	}
+	if (!(off % BPF_REG_SIZE) && size == BPF_REG_SIZE)
+		/* 8-byte aligned, 8-byte write */
+		bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi));
+	else if (!(off % BPF_REG_SIZE) && size == BPF_HALF_REG_SIZE)
+		/* 8-byte aligned, 4-byte write */
+		bpf_mark_stack_write(env, state->frameno, spis_one_bit(spi * 2 + 1));
+	else if (!(off % BPF_HALF_REG_SIZE) && size == BPF_HALF_REG_SIZE)
+		/* 4-byte aligned, 4-byte write */
+		bpf_mark_stack_write(env, state->frameno, spis_one_bit(spi * 2));
 
 	check_fastcall_stack_contract(env, state, insn_idx, off);
 	mark_stack_slot_scratched(env, spi);
@@ -5690,6 +5692,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 	struct bpf_reg_state *reg;
 	u8 *stype, type;
 	int insn_flags = insn_stack_access_flags(reg_state->frameno, spi);
+	spis_t mask;
 	int err;
 
 	stype = reg_state->stack[spi].slot_type;
@@ -5697,7 +5700,16 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 
 	mark_stack_slot_scratched(env, spi);
 	check_fastcall_stack_contract(env, state, env->insn_idx, off);
-	err = bpf_mark_stack_read(env, reg_state->frameno, env->insn_idx, BIT(spi));
+	if (!(off % BPF_REG_SIZE) && size == BPF_HALF_REG_SIZE)
+		/* 8-byte aligned, 4-byte read */
+		mask = spis_one_bit(spi * 2 + 1);
+	else if (!(off % BPF_HALF_REG_SIZE) && size == BPF_HALF_REG_SIZE)
+		/* 4-byte aligned, 4-byte read */
+		mask = spis_one_bit(spi * 2);
+	else
+		mask = spis_single_slot(spi);
+
+	err = bpf_mark_stack_read(env, reg_state->frameno, env->insn_idx, mask);
 	if (err)
 		return err;
 
@@ -8532,7 +8544,8 @@ mark:
 		/* reading any byte out of 8-byte 'spill_slot' will cause
 		 * the whole slot to be marked as 'read'
 		 */
-		err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx, BIT(spi));
+		err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx,
+					  spis_single_slot(spi));
 		if (err)
 			return err;
 		/* We do not call bpf_mark_stack_write(), as we can not
-- 
cgit v1.2.3


From bf0c571f7feb6fa05a512e2a5e50702501849d61 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Fri, 10 Apr 2026 13:55:58 -0700
Subject: bpf: introduce forward arg-tracking dataflow analysis
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The analysis is a basis for static liveness tracking mechanism
introduced by the next two commits.

A forward fixed-point analysis that tracks which frame's FP each
register value is derived from, and at what byte offset. This is
needed because a callee can receive a pointer to its caller's stack
frame (e.g. r1 = fp-16 at the call site), then do *(u64 *)(r1 + 0)
inside the callee — a cross-frame stack access that the callee's local
liveness must attribute to the caller's stack.

Each register holds an arg_track value from a three-level lattice:
- Precise {frame=N, off=[o1,o2,...]} — known frame index and
  up to 4 concrete byte offsets
- Offset-imprecise {frame=N, off_cnt=0} — known frame, unknown offset
- Fully-imprecise {frame=ARG_IMPRECISE, mask=bitmask} — unknown frame,
   mask says which frames might be involved

At CFG merge points the lattice moves toward imprecision (same
frame+offset stays precise, same frame different offsets merges offset
sets or becomes offset-imprecise, different frames become
fully-imprecise with OR'd bitmask).

The analysis also tracks spills/fills to the callee's own stack
(at_stack_in/out), so FP derived values spilled and reloaded.

This pass is run recursively per call site: when subprog A calls B
with specific FP-derived arguments, B is re-analyzed with those entry
args. The recursion follows analyze_subprog -> compute_subprog_args ->
(for each call insn) -> analyze_subprog. Subprogs that receive no
FP-derived args are skipped during recursion and analyzed
independently at depth 0.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260410-patch-set-v4-7-5d4eecb343db@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |    4 +
 kernel/bpf/liveness.c        | 1059 ++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c        |    5 +
 3 files changed, 1068 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7b31d8024c61..49b19118c326 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -226,6 +226,7 @@ enum bpf_stack_slot_type {
 
 /* 4-byte stack slot granularity for liveness analysis */
 #define BPF_HALF_REG_SIZE	4
+#define STACK_SLOT_SZ		4
 #define STACK_SLOTS		(MAX_BPF_STACK / BPF_HALF_REG_SIZE)	/* 128 */
 
 typedef struct {
@@ -886,6 +887,8 @@ struct bpf_verifier_env {
 	} cfg;
 	struct backtrack_state bt;
 	struct bpf_jmp_history_entry *cur_hist_ent;
+	/* Per-callsite copy of parent's converged at_stack_in for cross-frame fills. */
+	struct arg_track **callsite_at_stack;
 	u32 pass_cnt; /* number of times do_check() was called */
 	u32 subprog_cnt;
 	/* number of instructions analyzed by the verifier */
@@ -1213,6 +1216,7 @@ s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env,
 s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env,
 				 struct bpf_insn *insn, int arg,
 				 int insn_idx);
+int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env);
 
 int bpf_stack_liveness_init(struct bpf_verifier_env *env);
 void bpf_stack_liveness_free(struct bpf_verifier_env *env);
diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c
index a3af5972520f..c5d6760454d6 100644
--- a/kernel/bpf/liveness.c
+++ b/kernel/bpf/liveness.c
@@ -2,10 +2,13 @@
 /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
 
 #include <linux/bpf_verifier.h>
+#include <linux/btf.h>
 #include <linux/hashtable.h>
 #include <linux/jhash.h>
 #include <linux/slab.h>
 
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
 /*
  * This file implements live stack slots analysis. After accumulating
  * stack usage data, the analysis answers queries about whether a
@@ -107,6 +110,7 @@ struct per_frame_masks {
 struct func_instance {
 	struct hlist_node hl_node;
 	struct callchain callchain;
+	u32 subprog;		/* subprog index */
 	u32 insn_cnt;		/* cached number of insns in the function */
 	bool updated;
 	bool must_write_dropped;
@@ -133,6 +137,7 @@ struct bpf_liveness {
 	 */
 	spis_t write_masks_acc[MAX_CALL_FRAMES];
 	u32 write_insn_idx;
+	u32 subprog_calls;				/* analyze_subprog() invocations */
 };
 
 /* Compute callchain corresponding to state @st at depth @frameno */
@@ -200,11 +205,30 @@ static struct func_instance *__lookup_instance(struct bpf_verifier_env *env,
 		return ERR_PTR(-ENOMEM);
 	}
 	memcpy(&result->callchain, callchain, sizeof(*callchain));
+	result->subprog = subprog - env->subprog_info;
 	result->insn_cnt = subprog_sz;
 	hash_add(liveness->func_instances, &result->hl_node, key);
 	return result;
 }
 
+static struct func_instance *call_instance(struct bpf_verifier_env *env,
+					   struct func_instance *caller,
+					   u32 callsite, int subprog)
+{
+	struct callchain cc;
+
+	if (caller) {
+		cc = caller->callchain;
+		cc.callsites[cc.curframe] = callsite;
+		cc.curframe++;
+	} else {
+		memset(&cc, 0, sizeof(cc));
+	}
+	cc.sp_starts[cc.curframe] = env->subprog_info[subprog].start;
+	cc.callsites[cc.curframe] = cc.sp_starts[cc.curframe];
+	return __lookup_instance(env, &cc);
+}
+
 static struct func_instance *lookup_instance(struct bpf_verifier_env *env,
 					     struct bpf_verifier_state *st,
 					     u32 frameno)
@@ -786,3 +810,1038 @@ bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 half_sp
 
 	return false;
 }
+
+/*
+ * Per-register tracking state for compute_subprog_args().
+ * Tracks which frame's FP a value is derived from
+ * and the byte offset from that frame's FP.
+ *
+ * The .frame field forms a lattice with three levels of precision:
+ *
+ *   precise {frame=N, off=V}      -- known absolute frame index and byte offset
+ *        |
+ *   offset-imprecise {frame=N, off=OFF_IMPRECISE}
+ *        |                        -- known frame identity, unknown offset
+ *   fully-imprecise {frame=ARG_IMPRECISE, mask=bitmask}
+ *                                 -- unknown frame identity; .mask is a
+ *                                    bitmask of which frame indices might be
+ *                                    involved
+ *
+ * At CFG merge points, arg_track_join() moves down the lattice:
+ *   - same frame + same offset  -> precise
+ *   - same frame + different offset -> offset-imprecise
+ *   - different frames          -> fully-imprecise (bitmask OR)
+ *
+ * At memory access sites (LDX/STX/ST), offset-imprecise marks only
+ * the known frame's access mask as SPIS_ALL, while fully-imprecise
+ * iterates bits in the bitmask and routes each frame to its target.
+ */
+#define MAX_ARG_OFFSETS 4
+
+struct arg_track {
+	union {
+		s16 off[MAX_ARG_OFFSETS]; /* byte offsets; off_cnt says how many */
+		u16 mask;	/* arg bitmask when arg == ARG_IMPRECISE */
+	};
+	s8 frame;	/* absolute frame index, or enum arg_track_state */
+	s8 off_cnt;	/* 0 = offset-imprecise, 1-4 = # of precise offsets */
+};
+
+enum arg_track_state {
+	ARG_NONE	= -1,	/* not derived from any argument */
+	ARG_UNVISITED	= -2,	/* not yet reached by dataflow */
+	ARG_IMPRECISE	= -3,	/* lost identity; .mask is arg bitmask */
+};
+
+#define OFF_IMPRECISE	S16_MIN	/* arg identity known but offset unknown */
+
+/* Track callee stack slots fp-8 through fp-512 (64 slots of 8 bytes each) */
+#define MAX_ARG_SPILL_SLOTS 64
+
+static bool arg_is_visited(const struct arg_track *at)
+{
+	return at->frame != ARG_UNVISITED;
+}
+
+static bool arg_is_fp(const struct arg_track *at)
+{
+	return at->frame >= 0 || at->frame == ARG_IMPRECISE;
+}
+
+/*
+ * Clear all tracked callee stack slots overlapping the byte range
+ * [off, off+sz-1] where off is a negative FP-relative offset.
+ */
+static void clear_overlapping_stack_slots(struct arg_track *at_stack, s16 off, u32 sz)
+{
+	struct arg_track none = { .frame = ARG_NONE };
+
+	if (off == OFF_IMPRECISE) {
+		for (int i = 0; i < MAX_ARG_SPILL_SLOTS; i++)
+			at_stack[i] = none;
+		return;
+	}
+	for (int i = 0; i < MAX_ARG_SPILL_SLOTS; i++) {
+		int slot_start = -((i + 1) * 8);
+		int slot_end = slot_start + 8;
+
+		if (slot_start < off + (int)sz && slot_end > off)
+			at_stack[i] = none;
+	}
+}
+
+static void verbose_arg_track(struct bpf_verifier_env *env, struct arg_track *at)
+{
+	int i;
+
+	switch (at->frame) {
+	case ARG_NONE:      verbose(env, "_");                          break;
+	case ARG_UNVISITED: verbose(env, "?");                          break;
+	case ARG_IMPRECISE: verbose(env, "IMP%x", at->mask);            break;
+	default:
+		/* frame >= 0: absolute frame index */
+		if (at->off_cnt == 0) {
+			verbose(env, "fp%d ?", at->frame);
+		} else {
+			for (i = 0; i < at->off_cnt; i++) {
+				if (i)
+					verbose(env, "|");
+				verbose(env, "fp%d%+d", at->frame, at->off[i]);
+			}
+		}
+		break;
+	}
+}
+
+static bool arg_track_eq(const struct arg_track *a, const struct arg_track *b)
+{
+	int i;
+
+	if (a->frame != b->frame)
+		return false;
+	if (a->frame == ARG_IMPRECISE)
+		return a->mask == b->mask;
+	if (a->frame < 0)
+		return true;
+	if (a->off_cnt != b->off_cnt)
+		return false;
+	for (i = 0; i < a->off_cnt; i++)
+		if (a->off[i] != b->off[i])
+			return false;
+	return true;
+}
+
+static struct arg_track arg_single(s8 arg, s16 off)
+{
+	struct arg_track at = {};
+
+	at.frame = arg;
+	at.off[0] = off;
+	at.off_cnt = 1;
+	return at;
+}
+
+/*
+ * Merge two sorted offset arrays, deduplicate.
+ * Returns off_cnt=0 if the result exceeds MAX_ARG_OFFSETS.
+ * Both args must have the same frame and off_cnt > 0.
+ */
+static struct arg_track arg_merge_offsets(struct arg_track a, struct arg_track b)
+{
+	struct arg_track result = { .frame = a.frame };
+	struct arg_track imp = { .frame = a.frame };
+	int i = 0, j = 0, k = 0;
+
+	while (i < a.off_cnt && j < b.off_cnt) {
+		s16 v;
+
+		if (a.off[i] <= b.off[j]) {
+			v = a.off[i++];
+			if (v == b.off[j])
+				j++;
+		} else {
+			v = b.off[j++];
+		}
+		if (k > 0 && result.off[k - 1] == v)
+			continue;
+		if (k >= MAX_ARG_OFFSETS)
+			return imp;
+		result.off[k++] = v;
+	}
+	while (i < a.off_cnt) {
+		if (k >= MAX_ARG_OFFSETS)
+			return imp;
+		result.off[k++] = a.off[i++];
+	}
+	while (j < b.off_cnt) {
+		if (k >= MAX_ARG_OFFSETS)
+			return imp;
+		result.off[k++] = b.off[j++];
+	}
+	result.off_cnt = k;
+	return result;
+}
+
+/*
+ * Merge two arg_tracks into ARG_IMPRECISE, collecting the frame
+ * bits from both operands. Precise frame indices (frame >= 0)
+ * contribute a single bit; existing ARG_IMPRECISE values
+ * contribute their full bitmask.
+ */
+static struct arg_track arg_join_imprecise(struct arg_track a, struct arg_track b)
+{
+	u32 m = 0;
+
+	if (a.frame >= 0)
+		m |= BIT(a.frame);
+	else if (a.frame == ARG_IMPRECISE)
+		m |= a.mask;
+
+	if (b.frame >= 0)
+		m |= BIT(b.frame);
+	else if (b.frame == ARG_IMPRECISE)
+		m |= b.mask;
+
+	return (struct arg_track){ .mask = m, .frame = ARG_IMPRECISE };
+}
+
+/* Join two arg_track values at merge points */
+static struct arg_track __arg_track_join(struct arg_track a, struct arg_track b)
+{
+	if (!arg_is_visited(&b))
+		return a;
+	if (!arg_is_visited(&a))
+		return b;
+	if (a.frame == b.frame && a.frame >= 0) {
+		/* Both offset-imprecise: stay imprecise */
+		if (a.off_cnt == 0 || b.off_cnt == 0)
+			return (struct arg_track){ .frame = a.frame };
+		/* Merge offset sets; falls back to off_cnt=0 if >4 */
+		return arg_merge_offsets(a, b);
+	}
+
+	/*
+	 * args are different, but one of them is known
+	 * arg + none -> arg
+	 * none + arg -> arg
+	 *
+	 * none + none -> none
+	 */
+	if (a.frame == ARG_NONE && b.frame == ARG_NONE)
+		return a;
+	if (a.frame >= 0 && b.frame == ARG_NONE) {
+		/*
+		 * When joining single fp-N add fake fp+0 to
+		 * keep stack_use and prevent stack_def
+		 */
+		if (a.off_cnt == 1)
+			return arg_merge_offsets(a, arg_single(a.frame, 0));
+		return a;
+	}
+	if (b.frame >= 0 && a.frame == ARG_NONE) {
+		if (b.off_cnt == 1)
+			return arg_merge_offsets(b, arg_single(b.frame, 0));
+		return b;
+	}
+
+	return arg_join_imprecise(a, b);
+}
+
+static bool arg_track_join(struct bpf_verifier_env *env, int idx, int target, int r,
+			   struct arg_track *in, struct arg_track out)
+{
+	struct arg_track old = *in;
+	struct arg_track new_val = __arg_track_join(old, out);
+
+	if (arg_track_eq(&new_val, &old))
+		return false;
+
+	*in = new_val;
+	if (!(env->log.level & BPF_LOG_LEVEL2) || !arg_is_visited(&old))
+		return true;
+
+	verbose(env, "arg JOIN insn %d -> %d ", idx, target);
+	if (r >= 0)
+		verbose(env, "r%d: ", r);
+	else
+		verbose(env, "fp%+d: ", r * 8);
+	verbose_arg_track(env, &old);
+	verbose(env, " + ");
+	verbose_arg_track(env, &out);
+	verbose(env, " => ");
+	verbose_arg_track(env, &new_val);
+	verbose(env, "\n");
+	return true;
+}
+
+/*
+ * Compute the result when an ALU op destroys offset precision.
+ * If a single arg is identifiable, preserve it with OFF_IMPRECISE.
+ * If two different args are involved or one is already ARG_IMPRECISE,
+ * the result is fully ARG_IMPRECISE.
+ */
+static void arg_track_alu64(struct arg_track *dst, const struct arg_track *src)
+{
+	WARN_ON_ONCE(!arg_is_visited(dst));
+	WARN_ON_ONCE(!arg_is_visited(src));
+
+	if (dst->frame >= 0 && (src->frame == ARG_NONE || src->frame == dst->frame)) {
+		/*
+		 * rX += rY where rY is not arg derived
+		 * rX += rX
+		 */
+		dst->off_cnt = 0;
+		return;
+	}
+	if (src->frame >= 0 && dst->frame == ARG_NONE) {
+		/*
+		 * rX += rY where rX is not arg derived
+		 * rY identity leaks into rX
+		 */
+		dst->off_cnt = 0;
+		dst->frame = src->frame;
+		return;
+	}
+
+	if (dst->frame == ARG_NONE && src->frame == ARG_NONE)
+		return;
+
+	*dst = arg_join_imprecise(*dst, *src);
+}
+
+static s16 arg_add(s16 off, s64 delta)
+{
+	s64 res;
+
+	if (off == OFF_IMPRECISE)
+		return OFF_IMPRECISE;
+	res = (s64)off + delta;
+	if (res < S16_MIN + 1 || res > S16_MAX)
+		return OFF_IMPRECISE;
+	return res;
+}
+
+static void arg_padd(struct arg_track *at, s64 delta)
+{
+	int i;
+
+	if (at->off_cnt == 0)
+		return;
+	for (i = 0; i < at->off_cnt; i++) {
+		s16 new_off = arg_add(at->off[i], delta);
+
+		if (new_off == OFF_IMPRECISE) {
+			at->off_cnt = 0;
+			return;
+		}
+		at->off[i] = new_off;
+	}
+}
+
+/*
+ * Convert a byte offset from FP to a callee stack slot index.
+ * Returns -1 if out of range or not 8-byte aligned.
+ * Slot 0 = fp-8, slot 1 = fp-16, ..., slot 7 = fp-64, ....
+ */
+static int fp_off_to_slot(s16 off)
+{
+	if (off == OFF_IMPRECISE)
+		return -1;
+	if (off >= 0 || off < -(int)(MAX_ARG_SPILL_SLOTS * 8))
+		return -1;
+	if (off % 8)
+		return -1;
+	return (-off) / 8 - 1;
+}
+
+static struct arg_track fill_from_stack(struct bpf_insn *insn,
+					struct arg_track *at_out, int reg,
+					struct arg_track *at_stack_out,
+					int depth)
+{
+	struct arg_track imp = {
+		.mask = (1u << (depth + 1)) - 1,
+		.frame = ARG_IMPRECISE
+	};
+	struct arg_track result = { .frame = ARG_NONE };
+	int cnt, i;
+
+	if (reg == BPF_REG_FP) {
+		int slot = fp_off_to_slot(insn->off);
+
+		return slot >= 0 ? at_stack_out[slot] : imp;
+	}
+	cnt = at_out[reg].off_cnt;
+	if (cnt == 0)
+		return imp;
+
+	for (i = 0; i < cnt; i++) {
+		s16 fp_off = arg_add(at_out[reg].off[i], insn->off);
+		int slot = fp_off_to_slot(fp_off);
+
+		if (slot < 0)
+			return imp;
+		result = __arg_track_join(result, at_stack_out[slot]);
+	}
+	return result;
+}
+
+/*
+ * Spill @val to all possible stack slots indicated by the FP offsets in @reg.
+ * For an 8-byte store, single candidate slot gets @val. multi-slots are joined.
+ * sub-8-byte store joins with ARG_NONE.
+ * When exact offset is unknown conservatively add reg values to all slots in at_stack_out.
+ */
+static void spill_to_stack(struct bpf_insn *insn, struct arg_track *at_out,
+			   int reg, struct arg_track *at_stack_out,
+			   struct arg_track *val, u32 sz)
+{
+	struct arg_track none = { .frame = ARG_NONE };
+	struct arg_track new_val = sz == 8 ? *val : none;
+	int cnt, i;
+
+	if (reg == BPF_REG_FP) {
+		int slot = fp_off_to_slot(insn->off);
+
+		if (slot >= 0)
+			at_stack_out[slot] = new_val;
+		return;
+	}
+	cnt = at_out[reg].off_cnt;
+	if (cnt == 0) {
+		for (int slot = 0; slot < MAX_ARG_SPILL_SLOTS; slot++)
+			at_stack_out[slot] = __arg_track_join(at_stack_out[slot], new_val);
+		return;
+	}
+	for (i = 0; i < cnt; i++) {
+		s16 fp_off = arg_add(at_out[reg].off[i], insn->off);
+		int slot = fp_off_to_slot(fp_off);
+
+		if (slot < 0)
+			continue;
+		if (cnt == 1)
+			at_stack_out[slot] = new_val;
+		else
+			at_stack_out[slot] = __arg_track_join(at_stack_out[slot], new_val);
+	}
+}
+
+/*
+ * Clear stack slots overlapping all possible FP offsets in @reg.
+ */
+static void clear_stack_for_all_offs(struct bpf_insn *insn,
+				     struct arg_track *at_out, int reg,
+				     struct arg_track *at_stack_out, u32 sz)
+{
+	int cnt, i;
+
+	if (reg == BPF_REG_FP) {
+		clear_overlapping_stack_slots(at_stack_out, insn->off, sz);
+		return;
+	}
+	cnt = at_out[reg].off_cnt;
+	if (cnt == 0) {
+		clear_overlapping_stack_slots(at_stack_out, OFF_IMPRECISE, sz);
+		return;
+	}
+	for (i = 0; i < cnt; i++) {
+		s16 fp_off = arg_add(at_out[reg].off[i], insn->off);
+
+		clear_overlapping_stack_slots(at_stack_out, fp_off, sz);
+	}
+}
+
+static void arg_track_log(struct bpf_verifier_env *env, struct bpf_insn *insn, int idx,
+			  struct arg_track *at_in, struct arg_track *at_stack_in,
+			  struct arg_track *at_out, struct arg_track *at_stack_out)
+{
+	bool printed = false;
+	int i;
+
+	if (!(env->log.level & BPF_LOG_LEVEL2))
+		return;
+	for (i = 0; i < MAX_BPF_REG; i++) {
+		if (arg_track_eq(&at_out[i], &at_in[i]))
+			continue;
+		if (!printed) {
+			verbose(env, "%3d: ", idx);
+			bpf_verbose_insn(env, insn);
+			bpf_vlog_reset(&env->log, env->log.end_pos - 1);
+			printed = true;
+		}
+		verbose(env, "\tr%d: ", i); verbose_arg_track(env, &at_in[i]);
+		verbose(env, " -> "); verbose_arg_track(env, &at_out[i]);
+	}
+	for (i = 0; i < MAX_ARG_SPILL_SLOTS; i++) {
+		if (arg_track_eq(&at_stack_out[i], &at_stack_in[i]))
+			continue;
+		if (!printed) {
+			verbose(env, "%3d: ", idx);
+			bpf_verbose_insn(env, insn);
+			bpf_vlog_reset(&env->log, env->log.end_pos - 1);
+			printed = true;
+		}
+		verbose(env, "\tfp%+d: ", -(i + 1) * 8); verbose_arg_track(env, &at_stack_in[i]);
+		verbose(env, " -> "); verbose_arg_track(env, &at_stack_out[i]);
+	}
+	if (printed)
+		verbose(env, "\n");
+}
+
+/*
+ * Pure dataflow transfer function for arg_track state.
+ * Updates at_out[] based on how the instruction modifies registers.
+ * Tracks spill/fill, but not other memory accesses.
+ */
+static void arg_track_xfer(struct bpf_verifier_env *env, struct bpf_insn *insn,
+			   int insn_idx,
+			   struct arg_track *at_out, struct arg_track *at_stack_out,
+			   struct func_instance *instance,
+			   u32 *callsites)
+{
+	int depth = instance->callchain.curframe;
+	u8 class = BPF_CLASS(insn->code);
+	u8 code = BPF_OP(insn->code);
+	struct arg_track *dst = &at_out[insn->dst_reg];
+	struct arg_track *src = &at_out[insn->src_reg];
+	struct arg_track none = { .frame = ARG_NONE };
+	int r;
+
+	if (class == BPF_ALU64 && BPF_SRC(insn->code) == BPF_K) {
+		if (code == BPF_MOV) {
+			*dst = none;
+		} else if (dst->frame >= 0) {
+			if (code == BPF_ADD)
+				arg_padd(dst, insn->imm);
+			else if (code == BPF_SUB)
+				arg_padd(dst, -(s64)insn->imm);
+			else
+				/* Any other 64-bit alu on the pointer makes it imprecise */
+				dst->off_cnt = 0;
+		} /* else if dst->frame is imprecise it stays so */
+	} else if (class == BPF_ALU64 && BPF_SRC(insn->code) == BPF_X) {
+		if (code == BPF_MOV) {
+			if (insn->off == 0) {
+				*dst = *src;
+			} else {
+				/* addr_space_cast destroys a pointer */
+				*dst = none;
+			}
+		} else {
+			arg_track_alu64(dst, src);
+		}
+	} else if (class == BPF_ALU) {
+		/*
+		 * 32-bit alu destroys the pointer.
+		 * If src was a pointer it cannot leak into dst
+		 */
+		*dst = none;
+	} else if (class == BPF_JMP && code == BPF_CALL) {
+		/*
+		 * at_stack_out[slot] is not cleared by the helper and subprog calls.
+		 * The fill_from_stack() may return the stale spill — which is an FP-derived arg_track
+		 * (the value that was originally spilled there). The loaded register then carries
+		 * a phantom FP-derived identity that doesn't correspond to what's actually in the slot.
+		 * This phantom FP pointer propagates forward, and wherever it's subsequently used
+		 * (as a helper argument, another store, etc.), it sets stack liveness bits.
+		 * Those bits correspond to stack accesses that don't actually happen.
+		 * So the effect is over-reporting stack liveness — marking slots as live that aren't
+		 * actually accessed. The verifier preserves more state than necessary across calls,
+		 * which is conservative.
+		 *
+		 * helpers can scratch stack slots, but they won't make a valid pointer out of it.
+		 * subprogs are allowed to write into parent slots, but they cannot write
+		 * _any_ FP-derived pointer into it (either their own or parent's FP).
+		 */
+		for (r = BPF_REG_0; r <= BPF_REG_5; r++)
+			at_out[r] = none;
+	} else if (class == BPF_LDX) {
+		u32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code));
+		bool src_is_local_fp = insn->src_reg == BPF_REG_FP || src->frame == depth ||
+				       (src->frame == ARG_IMPRECISE && (src->mask & BIT(depth)));
+
+		/*
+		 * Reload from callee stack: if src is current-frame FP-derived
+		 * and the load is an 8-byte BPF_MEM, try to restore the spill
+		 * identity.  For imprecise sources fill_from_stack() returns
+		 * ARG_IMPRECISE (off_cnt == 0).
+		 */
+		if (src_is_local_fp && BPF_MODE(insn->code) == BPF_MEM && sz == 8) {
+			*dst = fill_from_stack(insn, at_out, insn->src_reg, at_stack_out, depth);
+		} else if (src->frame >= 0 && src->frame < depth &&
+			   BPF_MODE(insn->code) == BPF_MEM && sz == 8) {
+			struct arg_track *parent_stack =
+				env->callsite_at_stack[callsites[src->frame]];
+
+			*dst = fill_from_stack(insn, at_out, insn->src_reg,
+					       parent_stack, src->frame);
+		} else if (src->frame == ARG_IMPRECISE &&
+			   !(src->mask & BIT(depth)) && src->mask &&
+			   BPF_MODE(insn->code) == BPF_MEM && sz == 8) {
+			/*
+			 * Imprecise src with only parent-frame bits:
+			 * conservative fallback.
+			 */
+			*dst = *src;
+		} else {
+			*dst = none;
+		}
+	} else if (class == BPF_LD && BPF_MODE(insn->code) == BPF_IMM) {
+		*dst = none;
+	} else if (class == BPF_STX) {
+		u32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code));
+		bool dst_is_local_fp;
+
+		/* Track spills to current-frame FP-derived callee stack */
+		dst_is_local_fp = insn->dst_reg == BPF_REG_FP || dst->frame == depth;
+		if (dst_is_local_fp && BPF_MODE(insn->code) == BPF_MEM)
+			spill_to_stack(insn, at_out, insn->dst_reg,
+				       at_stack_out, src, sz);
+
+		if (BPF_MODE(insn->code) == BPF_ATOMIC) {
+			if (dst_is_local_fp && insn->imm != BPF_LOAD_ACQ)
+				clear_stack_for_all_offs(insn, at_out, insn->dst_reg,
+							 at_stack_out, sz);
+
+			if (insn->imm == BPF_CMPXCHG)
+				at_out[BPF_REG_0] = none;
+			else if (insn->imm == BPF_LOAD_ACQ)
+				*dst = none;
+			else if (insn->imm & BPF_FETCH)
+				*src = none;
+		}
+	} else if (class == BPF_ST && BPF_MODE(insn->code) == BPF_MEM) {
+		u32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code));
+		bool dst_is_local_fp = insn->dst_reg == BPF_REG_FP || dst->frame == depth;
+
+		/* BPF_ST to FP-derived dst: clear overlapping stack slots */
+		if (dst_is_local_fp)
+			clear_stack_for_all_offs(insn, at_out, insn->dst_reg,
+						 at_stack_out, sz);
+	}
+}
+
+/*
+ * For a calls_callback helper, find the callback subprog and determine
+ * which caller register maps to which callback register for FP passthrough.
+ */
+static int find_callback_subprog(struct bpf_verifier_env *env,
+				 struct bpf_insn *insn, int insn_idx,
+				 int *caller_reg, int *callee_reg)
+{
+	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+	int cb_reg = -1;
+
+	*caller_reg = -1;
+	*callee_reg = -1;
+
+	if (!bpf_helper_call(insn))
+		return -1;
+	switch (insn->imm) {
+	case BPF_FUNC_loop:
+		/* bpf_loop(nr, cb, ctx, flags): cb=R2, R3->cb R2 */
+		cb_reg = BPF_REG_2;
+		*caller_reg = BPF_REG_3;
+		*callee_reg = BPF_REG_2;
+		break;
+	case BPF_FUNC_for_each_map_elem:
+		/* for_each_map_elem(map, cb, ctx, flags): cb=R2, R3->cb R4 */
+		cb_reg = BPF_REG_2;
+		*caller_reg = BPF_REG_3;
+		*callee_reg = BPF_REG_4;
+		break;
+	case BPF_FUNC_find_vma:
+		/* find_vma(task, addr, cb, ctx, flags): cb=R3, R4->cb R3 */
+		cb_reg = BPF_REG_3;
+		*caller_reg = BPF_REG_4;
+		*callee_reg = BPF_REG_3;
+		break;
+	case BPF_FUNC_user_ringbuf_drain:
+		/* user_ringbuf_drain(map, cb, ctx, flags): cb=R2, R3->cb R2 */
+		cb_reg = BPF_REG_2;
+		*caller_reg = BPF_REG_3;
+		*callee_reg = BPF_REG_2;
+		break;
+	default:
+		return -1;
+	}
+
+	if (!(aux->const_reg_subprog_mask & BIT(cb_reg)))
+		return -2;
+
+	return aux->const_reg_vals[cb_reg];
+}
+
+/* Per-subprog intermediate state kept alive across analysis phases */
+struct subprog_at_info {
+	struct arg_track (*at_in)[MAX_BPF_REG];
+	int len;
+};
+
+static void print_subprog_arg_access(struct bpf_verifier_env *env,
+				     int subprog,
+				     struct subprog_at_info *info,
+				     struct arg_track (*at_stack_in)[MAX_ARG_SPILL_SLOTS])
+{
+	struct bpf_insn *insns = env->prog->insnsi;
+	int start = env->subprog_info[subprog].start;
+	int len = info->len;
+	int i, r;
+
+	if (!(env->log.level & BPF_LOG_LEVEL2))
+		return;
+
+	verbose(env, "subprog#%d %s:\n", subprog,
+		env->prog->aux->func_info
+		? btf_name_by_offset(env->prog->aux->btf,
+				     btf_type_by_id(env->prog->aux->btf,
+						    env->prog->aux->func_info[subprog].type_id)->name_off)
+		: "");
+	for (i = 0; i < len; i++) {
+		int idx = start + i;
+		bool has_extra = false;
+		u8 cls = BPF_CLASS(insns[idx].code);
+		bool is_ldx_stx_call = cls == BPF_LDX || cls == BPF_STX ||
+				       insns[idx].code == (BPF_JMP | BPF_CALL);
+
+		verbose(env, "%3d: ", idx);
+		bpf_verbose_insn(env, &insns[idx]);
+
+		/* Collect what needs printing */
+		if (is_ldx_stx_call &&
+		    arg_is_visited(&info->at_in[i][0])) {
+			for (r = 0; r < MAX_BPF_REG - 1; r++)
+				if (arg_is_fp(&info->at_in[i][r]))
+					has_extra = true;
+		}
+		if (is_ldx_stx_call) {
+			for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++)
+				if (arg_is_fp(&at_stack_in[i][r]))
+					has_extra = true;
+		}
+
+		if (!has_extra) {
+			if (bpf_is_ldimm64(&insns[idx]))
+				i++;
+			continue;
+		}
+
+		bpf_vlog_reset(&env->log, env->log.end_pos - 1);
+		verbose(env, " //");
+
+		if (is_ldx_stx_call && info->at_in &&
+		    arg_is_visited(&info->at_in[i][0])) {
+			for (r = 0; r < MAX_BPF_REG - 1; r++) {
+				if (!arg_is_fp(&info->at_in[i][r]))
+					continue;
+				verbose(env, " r%d=", r);
+				verbose_arg_track(env, &info->at_in[i][r]);
+			}
+		}
+
+		if (is_ldx_stx_call) {
+			for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++) {
+				if (!arg_is_fp(&at_stack_in[i][r]))
+					continue;
+				verbose(env, " fp%+d=", -(r + 1) * 8);
+				verbose_arg_track(env, &at_stack_in[i][r]);
+			}
+		}
+
+		verbose(env, "\n");
+		if (bpf_is_ldimm64(&insns[idx]))
+			i++;
+	}
+}
+
+/*
+ * Compute arg tracking dataflow for a single subprog.
+ * Runs forward fixed-point with arg_track_xfer(), then records
+ * memory accesses in a single linear pass over converged state.
+ *
+ * @callee_entry: pre-populated entry state for R1-R5
+ *                NULL for main (subprog 0).
+ * @info:         stores at_in, len for debug printing.
+ */
+static int compute_subprog_args(struct bpf_verifier_env *env,
+				struct subprog_at_info *info,
+				struct arg_track *callee_entry,
+				struct func_instance *instance,
+				u32 *callsites)
+{
+	int subprog = instance->subprog;
+	struct bpf_insn *insns = env->prog->insnsi;
+	int depth = instance->callchain.curframe;
+	int start = env->subprog_info[subprog].start;
+	int po_start = env->subprog_info[subprog].postorder_start;
+	int end = env->subprog_info[subprog + 1].start;
+	int po_end = env->subprog_info[subprog + 1].postorder_start;
+	int len = end - start;
+	struct arg_track (*at_in)[MAX_BPF_REG] = NULL;
+	struct arg_track at_out[MAX_BPF_REG];
+	struct arg_track (*at_stack_in)[MAX_ARG_SPILL_SLOTS] = NULL;
+	struct arg_track *at_stack_out = NULL;
+	struct arg_track unvisited = { .frame = ARG_UNVISITED };
+	struct arg_track none = { .frame = ARG_NONE };
+	bool changed;
+	int i, p, r, err = -ENOMEM;
+
+	at_in = kvmalloc_objs(*at_in, len, GFP_KERNEL_ACCOUNT);
+	if (!at_in)
+		goto err_free;
+
+	at_stack_in = kvmalloc_objs(*at_stack_in, len, GFP_KERNEL_ACCOUNT);
+	if (!at_stack_in)
+		goto err_free;
+
+	at_stack_out = kvmalloc_objs(*at_stack_out, MAX_ARG_SPILL_SLOTS, GFP_KERNEL_ACCOUNT);
+	if (!at_stack_out)
+		goto err_free;
+
+	for (i = 0; i < len; i++) {
+		for (r = 0; r < MAX_BPF_REG; r++)
+			at_in[i][r] = unvisited;
+		for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++)
+			at_stack_in[i][r] = unvisited;
+	}
+
+	for (r = 0; r < MAX_BPF_REG; r++)
+		at_in[0][r] = none;
+
+	/* Entry: R10 is always precisely the current frame's FP */
+	at_in[0][BPF_REG_FP] = arg_single(depth, 0);
+
+	/* R1-R5: from caller or ARG_NONE for main */
+	if (callee_entry) {
+		for (r = BPF_REG_1; r <= BPF_REG_5; r++)
+			at_in[0][r] = callee_entry[r];
+	}
+
+	/* Entry: all stack slots are ARG_NONE */
+	for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++)
+		at_stack_in[0][r] = none;
+
+	if (env->log.level & BPF_LOG_LEVEL2)
+		verbose(env, "subprog#%d: analyzing (depth %d)...\n", subprog, depth);
+
+	/* Forward fixed-point iteration in reverse post order */
+redo:
+	changed = false;
+	for (p = po_end - 1; p >= po_start; p--) {
+		int idx = env->cfg.insn_postorder[p];
+		int i = idx - start;
+		struct bpf_insn *insn = &insns[idx];
+		struct bpf_iarray *succ;
+
+		if (!arg_is_visited(&at_in[i][0]) && !arg_is_visited(&at_in[i][1]))
+			continue;
+
+		memcpy(at_out, at_in[i], sizeof(at_out));
+		memcpy(at_stack_out, at_stack_in[i], MAX_ARG_SPILL_SLOTS * sizeof(*at_stack_out));
+
+		arg_track_xfer(env, insn, idx, at_out, at_stack_out, instance, callsites);
+		arg_track_log(env, insn, idx, at_in[i], at_stack_in[i], at_out, at_stack_out);
+
+		/* Propagate to successors within this subprogram */
+		succ = bpf_insn_successors(env, idx);
+		for (int s = 0; s < succ->cnt; s++) {
+			int target = succ->items[s];
+			int ti;
+
+			/* Filter: stay within the subprogram's range */
+			if (target < start || target >= end)
+				continue;
+			ti = target - start;
+
+			for (r = 0; r < MAX_BPF_REG; r++)
+				changed |= arg_track_join(env, idx, target, r,
+							  &at_in[ti][r], at_out[r]);
+
+			for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++)
+				changed |= arg_track_join(env, idx, target, -r - 1,
+							  &at_stack_in[ti][r], at_stack_out[r]);
+		}
+	}
+	if (changed)
+		goto redo;
+
+	info->at_in = at_in;
+	at_in = NULL;
+	info->len = len;
+	print_subprog_arg_access(env, subprog, info, at_stack_in);
+	err = 0;
+
+err_free:
+	kvfree(at_stack_out);
+	kvfree(at_stack_in);
+	kvfree(at_in);
+	return err;
+}
+
+/*
+ * Recursively analyze a subprog with specific 'entry_args'.
+ * Each callee is analyzed with the exact args from its call site.
+ *
+ * Args are recomputed for each call because the dataflow result at_in[]
+ * depends on the entry args and frame depth. Consider: A->C->D and B->C->D
+ * Callsites in A and B pass different args into C, so C is recomputed.
+ * Then within C the same callsite passes different args into D.
+ */
+static int analyze_subprog(struct bpf_verifier_env *env,
+			   struct arg_track *entry_args,
+			   struct subprog_at_info *info,
+			   struct func_instance *instance,
+			   u32 *callsites)
+{
+	int subprog = instance->subprog;
+	int depth = instance->callchain.curframe;
+	struct bpf_insn *insns = env->prog->insnsi;
+	int start = env->subprog_info[subprog].start;
+	int po_start = env->subprog_info[subprog].postorder_start;
+	int po_end = env->subprog_info[subprog + 1].postorder_start;
+	int j, err;
+
+	if (++env->liveness->subprog_calls > 10000) {
+		verbose(env, "liveness analysis exceeded complexity limit (%d calls)\n",
+			env->liveness->subprog_calls);
+		return -E2BIG;
+	}
+
+	if (need_resched())
+		cond_resched();
+
+	/* Free prior analysis if this subprog was already visited */
+	kvfree(info[subprog].at_in);
+	info[subprog].at_in = NULL;
+
+	err = compute_subprog_args(env, &info[subprog], entry_args, instance, callsites);
+	if (err)
+		return err;
+
+	/* For each reachable call site in the subprog, recurse into callees */
+	for (int p = po_start; p < po_end; p++) {
+		int idx = env->cfg.insn_postorder[p];
+		struct arg_track callee_args[BPF_REG_5 + 1];
+		struct arg_track none = { .frame = ARG_NONE };
+		struct bpf_insn *insn = &insns[idx];
+		struct func_instance *callee_instance;
+		int callee, target;
+		int caller_reg, cb_callee_reg;
+
+		j = idx - start; /* relative index within this subprog */
+
+		if (bpf_pseudo_call(insn)) {
+			target = idx + insn->imm + 1;
+			callee = bpf_find_subprog(env, target);
+			if (callee < 0)
+				continue;
+
+			/* Build entry args: R1-R5 from at_in at call site */
+			for (int r = BPF_REG_1; r <= BPF_REG_5; r++)
+				callee_args[r] = info[subprog].at_in[j][r];
+		} else if (bpf_calls_callback(env, idx)) {
+			callee = find_callback_subprog(env, insn, idx, &caller_reg, &cb_callee_reg);
+			if (callee == -2) {
+				/*
+				 * same bpf_loop() calls two different callbacks and passes
+				 * stack pointer to them
+				 */
+				if (info[subprog].at_in[j][caller_reg].frame == ARG_NONE)
+					continue;
+				for (int f = 0; f <= depth; f++) {
+					err = mark_stack_read(instance, f, idx, SPIS_ALL);
+					if (err)
+						return err;
+				}
+				continue;
+			}
+			if (callee < 0)
+				continue;
+
+			for (int r = BPF_REG_1; r <= BPF_REG_5; r++)
+				callee_args[r] = none;
+			callee_args[cb_callee_reg] = info[subprog].at_in[j][caller_reg];
+		} else {
+			continue;
+		}
+
+		if (depth == MAX_CALL_FRAMES - 1)
+			return -EINVAL;
+
+		callee_instance = call_instance(env, instance, idx, callee);
+		if (IS_ERR(callee_instance))
+			return PTR_ERR(callee_instance);
+		callsites[depth] = idx;
+		err = analyze_subprog(env, callee_args, info, callee_instance, callsites);
+		if (err)
+			return err;
+	}
+
+	return update_instance(env, instance);
+}
+
+int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env)
+{
+	u32 callsites[MAX_CALL_FRAMES] = {};
+	int insn_cnt = env->prog->len;
+	struct func_instance *instance;
+	struct subprog_at_info *info;
+	int k, err = 0;
+
+	info = kvzalloc_objs(*info, env->subprog_cnt, GFP_KERNEL_ACCOUNT);
+	if (!info)
+		return -ENOMEM;
+
+	env->callsite_at_stack = kvzalloc_objs(*env->callsite_at_stack, insn_cnt,
+					       GFP_KERNEL_ACCOUNT);
+	if (!env->callsite_at_stack) {
+		kvfree(info);
+		return -ENOMEM;
+	}
+
+	instance = call_instance(env, NULL, 0, 0);
+	if (IS_ERR(instance)) {
+		err = PTR_ERR(instance);
+		goto out;
+	}
+	err = analyze_subprog(env, NULL, info, instance, callsites);
+	if (err)
+		goto out;
+
+	/*
+	 * Subprogs and callbacks that don't receive FP-derived arguments
+	 * cannot access ancestor stack frames, so they were skipped during
+	 * the recursive walk above.  Async callbacks (timer, workqueue) are
+	 * also not reachable from the main program's call graph.  Analyze
+	 * all unvisited subprogs as independent roots at depth 0.
+	 *
+	 * Use reverse topological order (callers before callees) so that
+	 * each subprog is analyzed before its callees, allowing the
+	 * recursive walk inside analyze_subprog() to naturally
+	 * reach nested callees that also lack FP-derived args.
+	 */
+	for (k = env->subprog_cnt - 1; k >= 0; k--) {
+		int sub = env->subprog_topo_order[k];
+
+		if (info[sub].at_in && !bpf_subprog_is_global(env, sub))
+			continue;
+		instance = call_instance(env, NULL, 0, sub);
+		if (IS_ERR(instance)) {
+			err = PTR_ERR(instance);
+			goto out;
+		}
+		err = analyze_subprog(env, NULL, info, instance, callsites);
+		if (err)
+			goto out;
+	}
+
+out:
+	for (k = 0; k < insn_cnt; k++)
+		kvfree(env->callsite_at_stack[k]);
+	kvfree(env->callsite_at_stack);
+	env->callsite_at_stack = NULL;
+	for (k = 0; k < env->subprog_cnt; k++)
+		kvfree(info[k].at_in);
+	kvfree(info);
+	return err;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e5f4bd5f2609..4c3db8e1c7a4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -26414,6 +26414,11 @@ static int compute_live_registers(struct bpf_verifier_env *env)
 	for (i = 0; i < insn_cnt; ++i)
 		compute_insn_live_regs(env, &insns[i], &state[i]);
 
+	/* Forward pass: resolve stack access through FP-derived pointers */
+	err = bpf_compute_subprog_arg_access(env);
+	if (err)
+		goto out;
+
 	changed = true;
 	while (changed) {
 		changed = false;
-- 
cgit v1.2.3


From fed53dbcdb61b0fbb1cf1d5bbd68d10f97aec974 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Fri, 10 Apr 2026 13:55:59 -0700
Subject: bpf: record arg tracking results in bpf_liveness masks

After arg tracking reaches a fixed point, perform a single linear scan
over the converged at_in[] state and translate each memory access into
liveness read/write masks on the func_instance:

- Load/store instructions: FP-derived pointer's frame and offset(s)
  are converted to half-slot masks targeting
  per_frame_masks->{may_read,must_write}

- Helper/kfunc calls: record_call_access() queries
  bpf_helper_stack_access_bytes() / bpf_kfunc_stack_access_bytes()
  for each FP-derived argument to determine access size and direction.
  Unknown access size (S64_MIN) conservatively marks all slots from
  fp_off to fp+0 as read.

- Imprecise pointers (frame == ARG_IMPRECISE): conservatively mark
  all slots in every frame covered by the pointer's frame bitmask
  as fully read.

- Static subprog calls with unresolved arguments: conservatively mark
  all frames as fully read.

Instead of a call to clean_live_states(), start cleaning the current
state continuously as registers and stack become dead since the static
analysis provides complete liveness information. This makes
clean_live_states() and bpf_verifier_state->cleaned unnecessary.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260410-patch-set-v4-8-5d4eecb343db@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |   1 -
 kernel/bpf/liveness.c        | 243 +++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c        |  64 +-----------
 3 files changed, 245 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 49b19118c326..8e83a5e66fd8 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -492,7 +492,6 @@ struct bpf_verifier_state {
 
 	bool speculative;
 	bool in_sleepable;
-	bool cleaned;
 
 	/* first and last insn idx of this verifier state */
 	u32 first_insn_idx;
diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c
index c5d6760454d6..2729ed965f62 100644
--- a/kernel/bpf/liveness.c
+++ b/kernel/bpf/liveness.c
@@ -1421,6 +1421,215 @@ static void arg_track_xfer(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	}
 }
 
+/*
+ * Record access_bytes from helper/kfunc or load/store insn.
+ *   access_bytes > 0:      stack read
+ *   access_bytes < 0:      stack write
+ *   access_bytes == S64_MIN: unknown   — conservative, mark [0..slot] as read
+ *   access_bytes == 0:      no access
+ *
+ */
+static int record_stack_access_off(struct bpf_verifier_env *env,
+				   struct func_instance *instance, s64 fp_off,
+				   s64 access_bytes, u32 frame, u32 insn_idx)
+{
+	s32 slot_hi, slot_lo;
+	spis_t mask;
+
+	if (fp_off >= 0)
+		/*
+		 * out of bounds stack access doesn't contribute
+		 * into actual stack liveness. It will be rejected
+		 * by the main verifier pass later.
+		 */
+		return 0;
+	if (access_bytes == S64_MIN) {
+		/* helper/kfunc read unknown amount of bytes from fp_off until fp+0 */
+		slot_hi = (-fp_off - 1) / STACK_SLOT_SZ;
+		mask = SPIS_ZERO;
+		spis_or_range(&mask, 0, slot_hi);
+		return mark_stack_read(instance, frame, insn_idx, mask);
+	}
+	if (access_bytes > 0) {
+		/* Mark any touched slot as use */
+		slot_hi = (-fp_off - 1) / STACK_SLOT_SZ;
+		slot_lo = max_t(s32, (-fp_off - access_bytes) / STACK_SLOT_SZ, 0);
+		mask = SPIS_ZERO;
+		spis_or_range(&mask, slot_lo, slot_hi);
+		return mark_stack_read(instance, frame, insn_idx, mask);
+	} else if (access_bytes < 0) {
+		/* Mark only fully covered slots as def */
+		access_bytes = -access_bytes;
+		slot_hi = (-fp_off) / STACK_SLOT_SZ - 1;
+		slot_lo = max_t(s32, (-fp_off - access_bytes + STACK_SLOT_SZ - 1) / STACK_SLOT_SZ, 0);
+		if (slot_lo <= slot_hi) {
+			mask = SPIS_ZERO;
+			spis_or_range(&mask, slot_lo, slot_hi);
+			bpf_mark_stack_write(env, frame, mask);
+		}
+	}
+	return 0;
+}
+
+/*
+ * 'arg' is FP-derived argument to helper/kfunc or load/store that
+ * reads (positive) or writes (negative) 'access_bytes' into 'use' or 'def'.
+ */
+static int record_stack_access(struct bpf_verifier_env *env,
+			       struct func_instance *instance,
+			       const struct arg_track *arg,
+			       s64 access_bytes, u32 frame, u32 insn_idx)
+{
+	int i, err;
+
+	if (access_bytes == 0)
+		return 0;
+	if (arg->off_cnt == 0) {
+		if (access_bytes > 0 || access_bytes == S64_MIN)
+			return mark_stack_read(instance, frame, insn_idx, SPIS_ALL);
+		return 0;
+	}
+	if (access_bytes != S64_MIN && access_bytes < 0 && arg->off_cnt != 1)
+		/* multi-offset write cannot set stack_def */
+		return 0;
+
+	for (i = 0; i < arg->off_cnt; i++) {
+		err = record_stack_access_off(env, instance, arg->off[i], access_bytes, frame, insn_idx);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+/*
+ * When a pointer is ARG_IMPRECISE, conservatively mark every frame in
+ * the bitmask as fully used.
+ */
+static int record_imprecise(struct func_instance *instance, u32 mask, u32 insn_idx)
+{
+	int depth = instance->callchain.curframe;
+	int f, err;
+
+	for (f = 0; mask; f++, mask >>= 1) {
+		if (!(mask & 1))
+			continue;
+		if (f <= depth) {
+			err = mark_stack_read(instance, f, insn_idx, SPIS_ALL);
+			if (err)
+				return err;
+		}
+	}
+	return 0;
+}
+
+/* Record load/store access for a given 'at' state of 'insn'. */
+static int record_load_store_access(struct bpf_verifier_env *env,
+				    struct func_instance *instance,
+				    struct arg_track *at, int insn_idx)
+{
+	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
+	int depth = instance->callchain.curframe;
+	s32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code));
+	u8 class = BPF_CLASS(insn->code);
+	struct arg_track resolved, *ptr;
+	int oi;
+
+	switch (class) {
+	case BPF_LDX:
+		ptr = &at[insn->src_reg];
+		break;
+	case BPF_STX:
+		if (BPF_MODE(insn->code) == BPF_ATOMIC) {
+			if (insn->imm == BPF_STORE_REL)
+				sz = -sz;
+			if (insn->imm == BPF_LOAD_ACQ)
+				ptr = &at[insn->src_reg];
+			else
+				ptr = &at[insn->dst_reg];
+		} else {
+			ptr = &at[insn->dst_reg];
+			sz = -sz;
+		}
+		break;
+	case BPF_ST:
+		ptr = &at[insn->dst_reg];
+		sz = -sz;
+		break;
+	default:
+		return 0;
+	}
+
+	/* Resolve offsets: fold insn->off into arg_track */
+	if (ptr->off_cnt > 0) {
+		resolved.off_cnt = ptr->off_cnt;
+		resolved.frame = ptr->frame;
+		for (oi = 0; oi < ptr->off_cnt; oi++) {
+			resolved.off[oi] = arg_add(ptr->off[oi], insn->off);
+			if (resolved.off[oi] == OFF_IMPRECISE) {
+				resolved.off_cnt = 0;
+				break;
+			}
+		}
+		ptr = &resolved;
+	}
+
+	if (ptr->frame >= 0 && ptr->frame <= depth)
+		return record_stack_access(env, instance, ptr, sz, ptr->frame, insn_idx);
+	if (ptr->frame == ARG_IMPRECISE)
+		return record_imprecise(instance, ptr->mask, insn_idx);
+	/* ARG_NONE: not derived from any frame pointer, skip */
+	return 0;
+}
+
+/* Record stack access for a given 'at' state of helper/kfunc 'insn' */
+static int record_call_access(struct bpf_verifier_env *env,
+			      struct func_instance *instance,
+			      struct arg_track *at,
+			      int insn_idx)
+{
+	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
+	int depth = instance->callchain.curframe;
+	struct bpf_call_summary cs;
+	int r, err = 0, num_params = 5;
+
+	if (bpf_pseudo_call(insn))
+		return 0;
+
+	if (bpf_get_call_summary(env, insn, &cs))
+		num_params = cs.num_params;
+
+	for (r = BPF_REG_1; r < BPF_REG_1 + num_params; r++) {
+		int frame = at[r].frame;
+		s64 bytes;
+
+		if (!arg_is_fp(&at[r]))
+			continue;
+
+		if (bpf_helper_call(insn)) {
+			bytes = bpf_helper_stack_access_bytes(env, insn, r - 1, insn_idx);
+		} else if (bpf_pseudo_kfunc_call(insn)) {
+			bytes = bpf_kfunc_stack_access_bytes(env, insn, r - 1, insn_idx);
+		} else {
+			for (int f = 0; f <= depth; f++) {
+				err = mark_stack_read(instance, f, insn_idx, SPIS_ALL);
+				if (err)
+					return err;
+			}
+			return 0;
+		}
+		if (bytes == 0)
+			continue;
+
+		if (frame >= 0 && frame <= depth)
+			err = record_stack_access(env, instance, &at[r], bytes, frame, insn_idx);
+		else if (frame == ARG_IMPRECISE)
+			err = record_imprecise(instance, at[r].mask, insn_idx);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 /*
  * For a calls_callback helper, find the callback subprog and determine
  * which caller register maps to which callback register for FP passthrough.
@@ -1665,6 +1874,40 @@ redo:
 	if (changed)
 		goto redo;
 
+	/* Record memory accesses using converged at_in (RPO skips dead code) */
+	for (p = po_end - 1; p >= po_start; p--) {
+		int idx = env->cfg.insn_postorder[p];
+		int i = idx - start;
+		struct bpf_insn *insn = &insns[idx];
+
+		reset_stack_write_marks(env, instance);
+		err = record_load_store_access(env, instance, at_in[i], idx);
+		if (err)
+			goto err_free;
+
+		if (insn->code == (BPF_JMP | BPF_CALL)) {
+			err = record_call_access(env, instance, at_in[i], idx);
+			if (err)
+				goto err_free;
+		}
+
+		if (bpf_pseudo_call(insn) || bpf_calls_callback(env, idx)) {
+			kvfree(env->callsite_at_stack[idx]);
+			env->callsite_at_stack[idx] =
+				kvmalloc_objs(*env->callsite_at_stack[idx],
+					      MAX_ARG_SPILL_SLOTS, GFP_KERNEL_ACCOUNT);
+			if (!env->callsite_at_stack[idx]) {
+				err = -ENOMEM;
+				goto err_free;
+			}
+			memcpy(env->callsite_at_stack[idx],
+			       at_stack_in[i], sizeof(struct arg_track) * MAX_ARG_SPILL_SLOTS);
+		}
+		err = commit_stack_write_marks(env, instance, idx);
+		if (err)
+			goto err_free;
+	}
+
 	info->at_in = at_in;
 	at_in = NULL;
 	info->len = len;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 4c3db8e1c7a4..85f6514b41f9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1804,7 +1804,6 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 		return err;
 	dst_state->speculative = src->speculative;
 	dst_state->in_sleepable = src->in_sleepable;
-	dst_state->cleaned = src->cleaned;
 	dst_state->curframe = src->curframe;
 	dst_state->branches = src->branches;
 	dst_state->parent = src->parent;
@@ -20254,8 +20253,6 @@ static int clean_verifier_state(struct bpf_verifier_env *env,
 {
 	int i, err;
 
-	if (env->cur_state != st)
-		st->cleaned = true;
 	err = bpf_live_stack_query_init(env, st);
 	if (err)
 		return err;
@@ -20268,37 +20265,6 @@ static int clean_verifier_state(struct bpf_verifier_env *env,
 	return 0;
 }
 
-/* the parentage chains form a tree.
- * the verifier states are added to state lists at given insn and
- * pushed into state stack for future exploration.
- * when the verifier reaches bpf_exit insn some of the verifier states
- * stored in the state lists have their final liveness state already,
- * but a lot of states will get revised from liveness point of view when
- * the verifier explores other branches.
- * Example:
- * 1: *(u64)(r10 - 8) = 1
- * 2: if r1 == 100 goto pc+1
- * 3: *(u64)(r10 - 8) = 2
- * 4: r0 = *(u64)(r10 - 8)
- * 5: exit
- * when the verifier reaches exit insn the stack slot -8 in the state list of
- * insn 2 is not yet marked alive. Then the verifier pops the other_branch
- * of insn 2 and goes exploring further. After the insn 4 read, liveness
- * analysis would propagate read mark for -8 at insn 2.
- *
- * Since the verifier pushes the branch states as it sees them while exploring
- * the program the condition of walking the branch instruction for the second
- * time means that all states below this branch were already explored and
- * their final liveness marks are already propagated.
- * Hence when the verifier completes the search of state list in is_state_visited()
- * we can call this clean_live_states() function to clear dead the registers and stack
- * slots to simplify state merging.
- *
- * Important note here that walking the same branch instruction in the callee
- * doesn't meant that the states are DONE. The verifier has to compare
- * the callsites
- */
-
 /* Find id in idset and increment its count, or add new entry */
 static void idset_cnt_inc(struct bpf_idset *idset, u32 id)
 {
@@ -20362,33 +20328,6 @@ static void clear_singular_ids(struct bpf_verifier_env *env,
 	}));
 }
 
-static int clean_live_states(struct bpf_verifier_env *env, int insn,
-			      struct bpf_verifier_state *cur)
-{
-	struct bpf_verifier_state_list *sl;
-	struct list_head *pos, *head;
-	int err;
-
-	head = explored_state(env, insn);
-	list_for_each(pos, head) {
-		sl = container_of(pos, struct bpf_verifier_state_list, node);
-		if (sl->state.branches)
-			continue;
-		if (sl->state.insn_idx != insn ||
-		    !same_callsites(&sl->state, cur))
-			continue;
-		if (sl->state.cleaned)
-			/* all regs in this state in all frames were already marked */
-			continue;
-		if (incomplete_read_marks(env, &sl->state))
-			continue;
-		err = clean_verifier_state(env, &sl->state);
-		if (err)
-			return err;
-	}
-	return 0;
-}
-
 static bool regs_exact(const struct bpf_reg_state *rold,
 		       const struct bpf_reg_state *rcur,
 		       struct bpf_idmap *idmap)
@@ -21089,7 +21028,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	    env->insn_processed - env->prev_insn_processed >= 8)
 		add_new_state = true;
 
-	err = clean_live_states(env, insn_idx, cur);
+	/* keep cleaning the current state as registers/stack become dead */
+	err = clean_verifier_state(env, cur);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3


From 6762e3a0bce5fce94bca3c34ff13cde6a07b87f3 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Fri, 10 Apr 2026 13:56:00 -0700
Subject: bpf: simplify liveness to use (callsite, depth) keyed func_instances

Rework func_instance identification and remove the dynamic liveness
API, completing the transition to fully static stack liveness analysis.

Replace callchain-based func_instance keys with (callsite, depth)
pairs. The full callchain (all ancestor callsites) is no longer part
of the hash key; only the immediate callsite and the call depth
matter. This does not lose precision in practice and simplifies the
data structure significantly: struct callchain is removed entirely,
func_instance stores just callsite, depth.

Drop must_write_acc propagation. Previously, must_write marks were
accumulated across successors and propagated to the caller via
propagate_to_outer_instance(). Instead, callee entry liveness
(live_before at subprog start) is pulled directly back to the
caller's callsite in analyze_subprog() after each callee returns.

Since (callsite, depth) instances are shared across different call
chains that invoke the same subprog at the same depth, must_write
marks from one call may be stale for another. To handle this,
analyze_subprog() records into a fresh_instance() when the instance
was already visited (must_write_initialized), then merge_instances()
combines the results: may_read is unioned, must_write is intersected.
This ensures only slots written on ALL paths through all call sites
are marked as guaranteed writes.
This replaces commit_stack_write_marks() logic.

Skip recursive descent into callees that receive no FP-derived
arguments (has_fp_args() check). This is needed because global
subprogram calls can push depth beyond MAX_CALL_FRAMES (max depth
is 64 for global calls but only 8 frames are accommodated for FP
passing). It also handles the case where a callback subprog cannot be
determined by argument tracking: such callbacks will be processed by
analyze_subprog() at depth 0 independently.

Update lookup_instance() (used by is_live_before queries) to search
for the func_instance with maximal depth at the corresponding
callsite, walking depth downward from frameno to 0. This accounts for
the fact that instance depth no longer corresponds 1:1 to
bpf_verifier_state->curframe, since skipped non-FP calls create gaps.

Remove the dynamic public liveness API from verifier.c:
  - bpf_mark_stack_{read,write}(), bpf_reset/commit_stack_write_marks()
  - bpf_update_live_stack(), bpf_reset_live_stack_callchain()
  - All call sites in check_stack_{read,write}_fixed_off(),
    check_stack_range_initialized(), mark_stack_slot_obj_read(),
    mark/unmark_stack_slots_{dynptr,iter,irq_flag}()
  - The per-instruction write mark accumulation in do_check()
  - The bpf_update_live_stack() call in prepare_func_exit()

mark_stack_read() and mark_stack_write() become static functions in
liveness.c, called only from the static analysis pass. The
func_instance->updated and must_write_dropped flags are removed.
Remove spis_single_slot(), spis_one_bit() helpers from bpf_verifier.h
as they are no longer used.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Tested-by: Paul Chaignon <paul.chaignon@gmail.com>
Link: https://lore.kernel.org/r/20260410-patch-set-v4-9-5d4eecb343db@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |  19 --
 kernel/bpf/liveness.c        | 722 +++++++++++++++----------------------------
 kernel/bpf/verifier.c        |  79 +----
 3 files changed, 254 insertions(+), 566 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 8e83a5e66fd8..d7fbc0e1559b 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -279,19 +279,6 @@ static inline void spis_or_range(spis_t *mask, u32 lo, u32 hi)
 		mask->v[w / 64] |= BIT_ULL(w % 64);
 }
 
-static inline spis_t spis_one_bit(u32 slot)
-{
-	if (slot < 64)
-		return (spis_t){{ BIT_ULL(slot), 0 }};
-	else
-		return (spis_t){{ 0, BIT_ULL(slot - 64) }};
-}
-
-static inline spis_t spis_single_slot(u32 spi)
-{
-	return spis_or(spis_one_bit(spi * 2), spis_one_bit(spi * 2 + 1));
-}
-
 #define BPF_REGMASK_ARGS ((1 << BPF_REG_1) | (1 << BPF_REG_2) | \
 			  (1 << BPF_REG_3) | (1 << BPF_REG_4) | \
 			  (1 << BPF_REG_5))
@@ -1219,13 +1206,7 @@ int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env);
 
 int bpf_stack_liveness_init(struct bpf_verifier_env *env);
 void bpf_stack_liveness_free(struct bpf_verifier_env *env);
-int bpf_update_live_stack(struct bpf_verifier_env *env);
-int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frameno, u32 insn_idx, spis_t mask);
-void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frameno, spis_t mask);
-int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx);
-int bpf_commit_stack_write_marks(struct bpf_verifier_env *env);
 int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st);
 bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi);
-void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env);
 
 #endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c
index 2729ed965f62..e797f8f76bc7 100644
--- a/kernel/bpf/liveness.c
+++ b/kernel/bpf/liveness.c
@@ -9,234 +9,111 @@
 
 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
 
-/*
- * This file implements live stack slots analysis. After accumulating
- * stack usage data, the analysis answers queries about whether a
- * particular stack slot may be read by an instruction or any of it's
- * successors.  This data is consumed by the verifier states caching
- * mechanism to decide which stack slots are important when looking for a
- * visited state corresponding to the current state.
- *
- * The analysis is call chain sensitive, meaning that data is collected
- * and queried for tuples (call chain, subprogram instruction index).
- * Such sensitivity allows identifying if some subprogram call always
- * leads to writes in the caller's stack.
- *
- * The basic idea is as follows:
- * - As the verifier accumulates a set of visited states, the analysis instance
- *   accumulates a conservative estimate of stack slots that can be read
- *   or must be written for each visited tuple (call chain, instruction index).
- * - If several states happen to visit the same instruction with the same
- *   call chain, stack usage information for the corresponding tuple is joined:
- *   - "may_read" set represents a union of all possibly read slots
- *     (any slot in "may_read" set might be read at or after the instruction);
- *   - "must_write" set represents an intersection of all possibly written slots
- *     (any slot in "must_write" set is guaranteed to be written by the instruction).
- * - The analysis is split into two phases:
- *   - read and write marks accumulation;
- *   - read and write marks propagation.
- * - The propagation phase is a textbook live variable data flow analysis:
- *
- *     state[cc, i].live_after = U [state[cc, s].live_before for s in bpf_insn_successors(i)]
- *     state[cc, i].live_before =
- *       (state[cc, i].live_after / state[cc, i].must_write) U state[i].may_read
- *
- *   Where:
- *   - `U`  stands for set union
- *   - `/`  stands for set difference;
- *   - `cc` stands for a call chain;
- *   - `i` and `s` are instruction indexes;
- *
- *   The above equations are computed for each call chain and instruction
- *   index until state stops changing.
- * - Additionally, in order to transfer "must_write" information from a
- *   subprogram to call instructions invoking this subprogram,
- *   the "must_write_acc" set is tracked for each (cc, i) tuple.
- *   A set of stack slots that are guaranteed to be written by this
- *   instruction or any of its successors (within the subprogram).
- *   The equation for "must_write_acc" propagation looks as follows:
- *
- *     state[cc, i].must_write_acc =
- *       ∩ [state[cc, s].must_write_acc for s in bpf_insn_successors(i)]
- *       U state[cc, i].must_write
- *
- *   (An intersection of all "must_write_acc" for instruction successors
- *    plus all "must_write" slots for the instruction itself).
- * - After the propagation phase completes for a subprogram, information from
- *   (cc, 0) tuple (subprogram entry) is transferred to the caller's call chain:
- *   - "must_write_acc" set is intersected with the call site's "must_write" set;
- *   - "may_read" set is added to the call site's "may_read" set.
- * - Any live stack queries must be taken after the propagation phase.
- * - Accumulation and propagation phases can be entered multiple times,
- *   at any point in time:
- *   - "may_read" set only grows;
- *   - "must_write" set only shrinks;
- *   - for each visited verifier state with zero branches, all relevant
- *     read and write marks are already recorded by the analysis instance.
- *
- * Technically, the analysis is facilitated by the following data structures:
- * - Call chain: for given verifier state, the call chain is a tuple of call
- *   instruction indexes leading to the current subprogram plus the subprogram
- *   entry point index.
- * - Function instance: for a given call chain, for each instruction in
- *   the current subprogram, a mapping between instruction index and a
- *   set of "may_read", "must_write" and other marks accumulated for this
- *   instruction.
- * - A hash table mapping call chains to function instances.
- */
-
-struct callchain {
-	u32 callsites[MAX_CALL_FRAMES];	/* instruction pointer for each frame */
-	/* cached subprog_info[*].start for functions owning the frames:
-	 * - sp_starts[curframe] used to get insn relative index within current function;
-	 * - sp_starts[0..current-1] used for fast callchain_frame_up().
-	 */
-	u32 sp_starts[MAX_CALL_FRAMES];
-	u32 curframe;			/* depth of callsites and sp_starts arrays */
-};
-
 struct per_frame_masks {
 	spis_t may_read;	/* stack slots that may be read by this instruction */
 	spis_t must_write;	/* stack slots written by this instruction */
-	spis_t must_write_acc;	/* stack slots written by this instruction and its successors */
 	spis_t live_before;	/* stack slots that may be read by this insn and its successors */
 };
 
 /*
- * A function instance created for a specific callchain.
+ * A function instance keyed by (callsite, depth).
  * Encapsulates read and write marks for each instruction in the function.
- * Marks are tracked for each frame in the callchain.
+ * Marks are tracked for each frame up to @depth.
  */
 struct func_instance {
 	struct hlist_node hl_node;
-	struct callchain callchain;
+	u32 callsite;		/* call insn that invoked this subprog (subprog_start for depth 0) */
+	u32 depth;		/* call depth (0 = entry subprog) */
 	u32 subprog;		/* subprog index */
+	u32 subprog_start;	/* cached env->subprog_info[subprog].start */
 	u32 insn_cnt;		/* cached number of insns in the function */
-	bool updated;
-	bool must_write_dropped;
 	/* Per frame, per instruction masks, frames allocated lazily. */
 	struct per_frame_masks *frames[MAX_CALL_FRAMES];
-	/* For each instruction a flag telling if "must_write" had been initialized for it. */
-	bool *must_write_set;
+	bool must_write_initialized;
 };
 
 struct live_stack_query {
 	struct func_instance *instances[MAX_CALL_FRAMES]; /* valid in range [0..curframe] */
+	u32 callsites[MAX_CALL_FRAMES]; /* callsite[i] = insn calling frame i+1 */
 	u32 curframe;
 	u32 insn_idx;
 };
 
 struct bpf_liveness {
-	DECLARE_HASHTABLE(func_instances, 8);		/* maps callchain to func_instance */
+	DECLARE_HASHTABLE(func_instances, 8);		/* maps (depth, callsite) to func_instance */
 	struct live_stack_query live_stack_query;	/* cache to avoid repetitive ht lookups */
-	/* Cached instance corresponding to env->cur_state, avoids per-instruction ht lookup */
-	struct func_instance *cur_instance;
-	/*
-	 * Below fields are used to accumulate stack write marks for instruction at
-	 * @write_insn_idx before submitting the marks to @cur_instance.
-	 */
-	spis_t write_masks_acc[MAX_CALL_FRAMES];
-	u32 write_insn_idx;
 	u32 subprog_calls;				/* analyze_subprog() invocations */
 };
 
-/* Compute callchain corresponding to state @st at depth @frameno */
-static void compute_callchain(struct bpf_verifier_env *env, struct bpf_verifier_state *st,
-			      struct callchain *callchain, u32 frameno)
-{
-	struct bpf_subprog_info *subprog_info = env->subprog_info;
-	u32 i;
-
-	memset(callchain, 0, sizeof(*callchain));
-	for (i = 0; i <= frameno; i++) {
-		callchain->sp_starts[i] = subprog_info[st->frame[i]->subprogno].start;
-		if (i < st->curframe)
-			callchain->callsites[i] = st->frame[i + 1]->callsite;
-	}
-	callchain->curframe = frameno;
-	callchain->callsites[callchain->curframe] = callchain->sp_starts[callchain->curframe];
-}
-
-static u32 hash_callchain(struct callchain *callchain)
-{
-	return jhash2(callchain->callsites, callchain->curframe, 0);
-}
-
-static bool same_callsites(struct callchain *a, struct callchain *b)
+/*
+ * Hash/compare key for func_instance: (depth, callsite).
+ * For depth == 0 (entry subprog), @callsite is the subprog start insn.
+ * For depth > 0, @callsite is the call instruction index that invoked the subprog.
+ */
+static u32 instance_hash(u32 callsite, u32 depth)
 {
-	int i;
+	u32 key[2] = { depth, callsite };
 
-	if (a->curframe != b->curframe)
-		return false;
-	for (i = a->curframe; i >= 0; i--)
-		if (a->callsites[i] != b->callsites[i])
-			return false;
-	return true;
+	return jhash2(key, 2, 0);
 }
 
-/*
- * Find existing or allocate new function instance corresponding to @callchain.
- * Instances are accumulated in env->liveness->func_instances and persist
- * until the end of the verification process.
- */
-static struct func_instance *__lookup_instance(struct bpf_verifier_env *env,
-					       struct callchain *callchain)
+static struct func_instance *find_instance(struct bpf_verifier_env *env,
+					   u32 callsite, u32 depth)
 {
 	struct bpf_liveness *liveness = env->liveness;
-	struct bpf_subprog_info *subprog;
-	struct func_instance *result;
-	u32 subprog_sz, size, key;
-
-	key = hash_callchain(callchain);
-	hash_for_each_possible(liveness->func_instances, result, hl_node, key)
-		if (same_callsites(&result->callchain, callchain))
-			return result;
-
-	subprog = bpf_find_containing_subprog(env, callchain->sp_starts[callchain->curframe]);
-	subprog_sz = (subprog + 1)->start - subprog->start;
-	size = sizeof(struct func_instance);
-	result = kvzalloc(size, GFP_KERNEL_ACCOUNT);
-	if (!result)
-		return ERR_PTR(-ENOMEM);
-	result->must_write_set = kvzalloc_objs(*result->must_write_set,
-					       subprog_sz, GFP_KERNEL_ACCOUNT);
-	if (!result->must_write_set) {
-		kvfree(result);
-		return ERR_PTR(-ENOMEM);
-	}
-	memcpy(&result->callchain, callchain, sizeof(*callchain));
-	result->subprog = subprog - env->subprog_info;
-	result->insn_cnt = subprog_sz;
-	hash_add(liveness->func_instances, &result->hl_node, key);
-	return result;
+	struct func_instance *f;
+	u32 key = instance_hash(callsite, depth);
+
+	hash_for_each_possible(liveness->func_instances, f, hl_node, key)
+		if (f->depth == depth && f->callsite == callsite)
+			return f;
+	return NULL;
 }
 
 static struct func_instance *call_instance(struct bpf_verifier_env *env,
 					   struct func_instance *caller,
 					   u32 callsite, int subprog)
 {
-	struct callchain cc;
-
-	if (caller) {
-		cc = caller->callchain;
-		cc.callsites[cc.curframe] = callsite;
-		cc.curframe++;
-	} else {
-		memset(&cc, 0, sizeof(cc));
-	}
-	cc.sp_starts[cc.curframe] = env->subprog_info[subprog].start;
-	cc.callsites[cc.curframe] = cc.sp_starts[cc.curframe];
-	return __lookup_instance(env, &cc);
+	u32 depth = caller ? caller->depth + 1 : 0;
+	u32 subprog_start = env->subprog_info[subprog].start;
+	u32 lookup_key = depth > 0 ? callsite : subprog_start;
+	struct func_instance *f;
+	u32 hash;
+
+	f = find_instance(env, lookup_key, depth);
+	if (f)
+		return f;
+
+	f = kvzalloc(sizeof(*f), GFP_KERNEL_ACCOUNT);
+	if (!f)
+		return ERR_PTR(-ENOMEM);
+	f->callsite = lookup_key;
+	f->depth = depth;
+	f->subprog = subprog;
+	f->subprog_start = subprog_start;
+	f->insn_cnt = (env->subprog_info + subprog + 1)->start - subprog_start;
+	hash = instance_hash(lookup_key, depth);
+	hash_add(env->liveness->func_instances, &f->hl_node, hash);
+	return f;
 }
 
 static struct func_instance *lookup_instance(struct bpf_verifier_env *env,
 					     struct bpf_verifier_state *st,
 					     u32 frameno)
 {
-	struct callchain callchain;
-
-	compute_callchain(env, st, &callchain, frameno);
-	return __lookup_instance(env, &callchain);
+	u32 callsite, subprog_start;
+	struct func_instance *f;
+	u32 key, depth;
+
+	subprog_start = env->subprog_info[st->frame[frameno]->subprogno].start;
+	callsite = frameno > 0 ? st->frame[frameno]->callsite : subprog_start;
+
+	for (depth = frameno; ; depth--) {
+		key = depth > 0 ? callsite : subprog_start;
+		f = find_instance(env, key, depth);
+		if (f || depth == 0)
+			return f;
+	}
 }
 
 int bpf_stack_liveness_init(struct bpf_verifier_env *env)
@@ -257,9 +134,8 @@ void bpf_stack_liveness_free(struct bpf_verifier_env *env)
 	if (!env->liveness)
 		return;
 	hash_for_each_safe(env->liveness->func_instances, bkt, tmp, instance, hl_node) {
-		for (i = 0; i <= instance->callchain.curframe; i++)
+		for (i = 0; i <= instance->depth; i++)
 			kvfree(instance->frames[i]);
-		kvfree(instance->must_write_set);
 		kvfree(instance);
 	}
 	kvfree(env->liveness);
@@ -271,7 +147,7 @@ void bpf_stack_liveness_free(struct bpf_verifier_env *env)
  */
 static int relative_idx(struct func_instance *instance, u32 insn_idx)
 {
-	return insn_idx - instance->callchain.sp_starts[instance->callchain.curframe];
+	return insn_idx - instance->subprog_start;
 }
 
 static struct per_frame_masks *get_frame_masks(struct func_instance *instance,
@@ -298,145 +174,33 @@ static struct per_frame_masks *alloc_frame_masks(struct func_instance *instance,
 	return get_frame_masks(instance, frame, insn_idx);
 }
 
-void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env)
-{
-	env->liveness->cur_instance = NULL;
-}
-
-/* If @env->liveness->cur_instance is null, set it to instance corresponding to @env->cur_state. */
-static int ensure_cur_instance(struct bpf_verifier_env *env)
-{
-	struct bpf_liveness *liveness = env->liveness;
-	struct func_instance *instance;
-
-	if (liveness->cur_instance)
-		return 0;
-
-	instance = lookup_instance(env, env->cur_state, env->cur_state->curframe);
-	if (IS_ERR(instance))
-		return PTR_ERR(instance);
-
-	liveness->cur_instance = instance;
-	return 0;
-}
-
 /* Accumulate may_read masks for @frame at @insn_idx */
 static int mark_stack_read(struct func_instance *instance, u32 frame, u32 insn_idx, spis_t mask)
 {
 	struct per_frame_masks *masks;
-	spis_t new_may_read;
 
 	masks = alloc_frame_masks(instance, frame, insn_idx);
 	if (IS_ERR(masks))
 		return PTR_ERR(masks);
-	new_may_read = spis_or(masks->may_read, mask);
-	if (!spis_equal(new_may_read, masks->may_read) &&
-	    !spis_equal(spis_or(new_may_read, masks->live_before),
-				masks->live_before))
-		instance->updated = true;
 	masks->may_read = spis_or(masks->may_read, mask);
 	return 0;
 }
 
-int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frame, u32 insn_idx, spis_t mask)
+static int mark_stack_write(struct func_instance *instance, u32 frame, u32 insn_idx, spis_t mask)
 {
-	int err;
-
-	err = ensure_cur_instance(env);
-	err = err ?: mark_stack_read(env->liveness->cur_instance, frame, insn_idx, mask);
-	return err;
-}
-
-static void reset_stack_write_marks(struct bpf_verifier_env *env, struct func_instance *instance)
-{
-	struct bpf_liveness *liveness = env->liveness;
-	int i;
-
-	for (i = 0; i <= instance->callchain.curframe; i++)
-		liveness->write_masks_acc[i] = SPIS_ZERO;
-}
-
-int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx)
-{
-	struct bpf_liveness *liveness = env->liveness;
-	int err;
-
-	err = ensure_cur_instance(env);
-	if (err)
-		return err;
-
-	liveness->write_insn_idx = insn_idx;
-	reset_stack_write_marks(env, liveness->cur_instance);
-	return 0;
-}
-
-void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frame, spis_t mask)
-{
-	env->liveness->write_masks_acc[frame] = spis_or(env->liveness->write_masks_acc[frame], mask);
-}
-
-static int commit_stack_write_marks(struct bpf_verifier_env *env,
-				    struct func_instance *instance,
-				    u32 insn_idx)
-{
-	struct bpf_liveness *liveness = env->liveness;
-	u32 idx, frame, curframe;
 	struct per_frame_masks *masks;
-	spis_t mask, old_must_write, dropped;
 
-	if (!instance)
-		return 0;
-
-	curframe = instance->callchain.curframe;
-	idx = relative_idx(instance, insn_idx);
-	for (frame = 0; frame <= curframe; frame++) {
-		mask = liveness->write_masks_acc[frame];
-		/* avoid allocating frames for zero masks */
-		if (spis_is_zero(mask) && !instance->must_write_set[idx])
-			continue;
-		masks = alloc_frame_masks(instance, frame, insn_idx);
-		if (IS_ERR(masks))
-			return PTR_ERR(masks);
-		old_must_write = masks->must_write;
-		/*
-		 * If instruction at this callchain is seen for a first time, set must_write equal
-		 * to @mask. Otherwise take intersection with the previous value.
-		 */
-		if (instance->must_write_set[idx])
-			mask = spis_and(mask, old_must_write);
-		if (!spis_equal(old_must_write, mask)) {
-			masks->must_write = mask;
-			instance->updated = true;
-		}
-		/* dropped = old_must_write & ~mask */
-		dropped = spis_and(old_must_write, spis_not(mask));
-		if (!spis_is_zero(dropped))
-			instance->must_write_dropped = true;
-	}
-	instance->must_write_set[idx] = true;
-	liveness->write_insn_idx = 0;
+	masks = alloc_frame_masks(instance, frame, insn_idx);
+	if (IS_ERR(masks))
+		return PTR_ERR(masks);
+	masks->must_write = spis_or(masks->must_write, mask);
 	return 0;
 }
 
-/*
- * Merge stack writes marks in @env->liveness->write_masks_acc
- * with information already in @env->liveness->cur_instance.
- */
-int bpf_commit_stack_write_marks(struct bpf_verifier_env *env)
-{
-	return commit_stack_write_marks(env, env->liveness->cur_instance, env->liveness->write_insn_idx);
-}
-
-static char *fmt_callchain(struct bpf_verifier_env *env, struct callchain *callchain)
+static char *fmt_instance(struct bpf_verifier_env *env, struct func_instance *instance)
 {
-	char *buf_end = env->tmp_str_buf + sizeof(env->tmp_str_buf);
-	char *buf = env->tmp_str_buf;
-	int i;
-
-	buf += snprintf(buf, buf_end - buf, "(");
-	for (i = 0; i <= callchain->curframe; i++)
-		buf += snprintf(buf, buf_end - buf, "%s%d", i ? "," : "", callchain->callsites[i]);
-	snprintf(buf, buf_end - buf, ")");
+	snprintf(env->tmp_str_buf, sizeof(env->tmp_str_buf),
+		 "(d%d,cs%d)", instance->depth, instance->callsite);
 	return env->tmp_str_buf;
 }
 
@@ -467,7 +231,7 @@ static void bpf_fmt_spis_mask(char *buf, ssize_t buf_sz, spis_t spis)
 	}
 }
 
-static void log_mask_change(struct bpf_verifier_env *env, struct callchain *callchain,
+static void log_mask_change(struct bpf_verifier_env *env, struct func_instance *instance,
 			    char *pfx, u32 frame, u32 insn_idx,
 			    spis_t old, spis_t new)
 {
@@ -479,7 +243,7 @@ static void log_mask_change(struct bpf_verifier_env *env, struct callchain *call
 
 	if (spis_is_zero(changed_bits))
 		return;
-	bpf_log(&env->log, "%s frame %d insn %d ", fmt_callchain(env, callchain), frame, insn_idx);
+	bpf_log(&env->log, "%s frame %d insn %d ", fmt_instance(env, instance), frame, insn_idx);
 	if (!spis_is_zero(new_ones)) {
 		bpf_fmt_spis_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_ones);
 		bpf_log(&env->log, "+%s %s ", pfx, env->tmp_str_buf);
@@ -562,61 +326,12 @@ bpf_insn_successors(struct bpf_verifier_env *env, u32 idx)
 
 __diag_pop();
 
-static struct func_instance *get_outer_instance(struct bpf_verifier_env *env,
-						struct func_instance *instance)
-{
-	struct callchain callchain = instance->callchain;
-
-	/* Adjust @callchain to represent callchain one frame up */
-	callchain.callsites[callchain.curframe] = 0;
-	callchain.sp_starts[callchain.curframe] = 0;
-	callchain.curframe--;
-	callchain.callsites[callchain.curframe] = callchain.sp_starts[callchain.curframe];
-	return __lookup_instance(env, &callchain);
-}
-
-static u32 callchain_subprog_start(struct callchain *callchain)
-{
-	return callchain->sp_starts[callchain->curframe];
-}
-
-/*
- * Transfer @may_read and @must_write_acc marks from the first instruction of @instance,
- * to the call instruction in function instance calling @instance.
- */
-static int propagate_to_outer_instance(struct bpf_verifier_env *env,
-				       struct func_instance *instance)
-{
-	struct callchain *callchain = &instance->callchain;
-	u32 this_subprog_start, callsite, frame;
-	struct func_instance *outer_instance;
-	struct per_frame_masks *insn;
-	int err;
-
-	this_subprog_start = callchain_subprog_start(callchain);
-	outer_instance = get_outer_instance(env, instance);
-	if (IS_ERR(outer_instance))
-		return PTR_ERR(outer_instance);
-	callsite = callchain->callsites[callchain->curframe - 1];
-	reset_stack_write_marks(env, outer_instance);
-	for (frame = 0; frame < callchain->curframe; frame++) {
-		insn = get_frame_masks(instance, frame, this_subprog_start);
-		if (!insn)
-			continue;
-		bpf_mark_stack_write(env, frame, insn->must_write_acc);
-		err = mark_stack_read(outer_instance, frame, callsite, insn->live_before);
-		if (err)
-			return err;
-	}
-	commit_stack_write_marks(env, outer_instance, callsite);
-	return 0;
-}
 
 static inline bool update_insn(struct bpf_verifier_env *env,
 			       struct func_instance *instance, u32 frame, u32 insn_idx)
 {
 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
-	spis_t new_before, new_after, must_write_acc;
+	spis_t new_before, new_after;
 	struct per_frame_masks *insn, *succ_insn;
 	struct bpf_iarray *succ;
 	u32 s;
@@ -630,17 +345,10 @@ static inline bool update_insn(struct bpf_verifier_env *env,
 	insn = get_frame_masks(instance, frame, insn_idx);
 	new_before = SPIS_ZERO;
 	new_after = SPIS_ZERO;
-	/*
-	 * New "must_write_acc" is an intersection of all "must_write_acc"
-	 * of successors plus all "must_write" slots of instruction itself.
-	 */
-	must_write_acc = SPIS_ALL;
 	for (s = 0; s < succ->cnt; ++s) {
 		succ_insn = get_frame_masks(instance, frame, succ->items[s]);
 		new_after = spis_or(new_after, succ_insn->live_before);
-		must_write_acc = spis_and(must_write_acc, succ_insn->must_write_acc);
 	}
-	must_write_acc = spis_or(must_write_acc, insn->must_write);
 	/*
 	 * New "live_before" is a union of all "live_before" of successors
 	 * minus slots written by instruction plus slots read by instruction.
@@ -649,53 +357,27 @@ static inline bool update_insn(struct bpf_verifier_env *env,
 	new_before = spis_or(spis_and(new_after, spis_not(insn->must_write)),
 			     insn->may_read);
 	changed |= !spis_equal(new_before, insn->live_before);
-	changed |= !spis_equal(must_write_acc, insn->must_write_acc);
 	if (unlikely(env->log.level & BPF_LOG_LEVEL2) &&
 	    (!spis_is_zero(insn->may_read) || !spis_is_zero(insn->must_write) ||
-	     insn_idx == callchain_subprog_start(&instance->callchain) ||
+	     insn_idx == instance->subprog_start ||
 	     aux[insn_idx].prune_point)) {
-		log_mask_change(env, &instance->callchain, "live",
+		log_mask_change(env, instance, "live",
 				frame, insn_idx, insn->live_before, new_before);
-		log_mask_change(env, &instance->callchain, "written",
-				frame, insn_idx, insn->must_write_acc, must_write_acc);
 	}
 	insn->live_before = new_before;
-	insn->must_write_acc = must_write_acc;
 	return changed;
 }
 
-/* Fixed-point computation of @live_before and @must_write_acc marks */
-static int update_instance(struct bpf_verifier_env *env, struct func_instance *instance)
+/* Fixed-point computation of @live_before marks */
+static void update_instance(struct bpf_verifier_env *env, struct func_instance *instance)
 {
-	u32 i, frame, po_start, po_end, cnt, this_subprog_start;
-	struct callchain *callchain = &instance->callchain;
+	u32 i, frame, po_start, po_end, cnt;
 	int *insn_postorder = env->cfg.insn_postorder;
 	struct bpf_subprog_info *subprog;
-	struct per_frame_masks *insn;
 	bool changed;
-	int err;
 
-	if (!instance->updated)
-		return 0;
-
-	this_subprog_start = callchain_subprog_start(callchain);
-	/*
-	 * If must_write marks were updated must_write_acc needs to be reset
-	 * (to account for the case when new must_write sets became smaller).
-	 */
-	if (instance->must_write_dropped) {
-		for (frame = 0; frame <= callchain->curframe; frame++) {
-			if (!instance->frames[frame])
-				continue;
-
-			for (i = 0; i < instance->insn_cnt; i++) {
-				insn = get_frame_masks(instance, frame, this_subprog_start + i);
-				insn->must_write_acc = SPIS_ZERO;
-			}
-		}
-	}
-
-	subprog = bpf_find_containing_subprog(env, this_subprog_start);
+	instance->must_write_initialized = true;
+	subprog = &env->subprog_info[instance->subprog];
 	po_start = subprog->postorder_start;
 	po_end = (subprog + 1)->postorder_start;
 	cnt = 0;
@@ -703,7 +385,7 @@ static int update_instance(struct bpf_verifier_env *env, struct func_instance *i
 	do {
 		cnt++;
 		changed = false;
-		for (frame = 0; frame <= instance->callchain.curframe; frame++) {
+		for (frame = 0; frame <= instance->depth; frame++) {
 			if (!instance->frames[frame])
 				continue;
 
@@ -714,44 +396,7 @@ static int update_instance(struct bpf_verifier_env *env, struct func_instance *i
 
 	if (env->log.level & BPF_LOG_LEVEL2)
 		bpf_log(&env->log, "%s live stack update done in %d iterations\n",
-			fmt_callchain(env, callchain), cnt);
-
-	/* transfer marks accumulated for outer frames to outer func instance (caller) */
-	if (callchain->curframe > 0) {
-		err = propagate_to_outer_instance(env, instance);
-		if (err)
-			return err;
-	}
-
-	instance->updated = false;
-	instance->must_write_dropped = false;
-	return 0;
-}
-
-/*
- * Prepare all callchains within @env->cur_state for querying.
- * This function should be called after each verifier.c:pop_stack()
- * and whenever verifier.c:do_check_insn() processes subprogram exit.
- * This would guarantee that visited verifier states with zero branches
- * have their bpf_mark_stack_{read,write}() effects propagated in
- * @env->liveness.
- */
-int bpf_update_live_stack(struct bpf_verifier_env *env)
-{
-	struct func_instance *instance;
-	int err, frame;
-
-	bpf_reset_live_stack_callchain(env);
-	for (frame = env->cur_state->curframe; frame >= 0; --frame) {
-		instance = lookup_instance(env, env->cur_state, frame);
-		if (IS_ERR(instance))
-			return PTR_ERR(instance);
-
-		err = update_instance(env, instance);
-		if (err)
-			return err;
-	}
-	return 0;
+			fmt_instance(env, instance), cnt);
 }
 
 static bool is_live_before(struct func_instance *instance, u32 insn_idx, u32 frameno, u32 half_spi)
@@ -771,9 +416,12 @@ int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_
 	memset(q, 0, sizeof(*q));
 	for (frame = 0; frame <= st->curframe; frame++) {
 		instance = lookup_instance(env, st, frame);
-		if (IS_ERR(instance))
-			return PTR_ERR(instance);
-		q->instances[frame] = instance;
+		if (IS_ERR_OR_NULL(instance))
+			q->instances[frame] = NULL;
+		else
+			q->instances[frame] = instance;
+		if (frame < st->curframe)
+			q->callsites[frame] = st->frame[frame + 1]->callsite;
 	}
 	q->curframe = st->curframe;
 	q->insn_idx = st->insn_idx;
@@ -783,27 +431,44 @@ int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_
 bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 half_spi)
 {
 	/*
-	 * Slot is alive if it is read before q->st->insn_idx in current func instance,
+	 * Slot is alive if it is read before q->insn_idx in current func instance,
 	 * or if for some outer func instance:
 	 * - alive before callsite if callsite calls callback, otherwise
 	 * - alive after callsite
 	 */
 	struct live_stack_query *q = &env->liveness->live_stack_query;
 	struct func_instance *instance, *curframe_instance;
-	u32 i, callsite;
-	bool alive;
+	u32 i, callsite, rel;
+	int cur_delta, delta;
+	bool alive = false;
 
 	curframe_instance = q->instances[q->curframe];
-	alive = is_live_before(curframe_instance, q->insn_idx, frameno, half_spi);
+	if (!curframe_instance)
+		return true;
+	cur_delta = (int)curframe_instance->depth - (int)q->curframe;
+	rel = frameno + cur_delta;
+	if (rel <= curframe_instance->depth)
+		alive = is_live_before(curframe_instance, q->insn_idx, rel, half_spi);
+
 	if (alive)
 		return true;
 
 	for (i = frameno; i < q->curframe; i++) {
-		callsite = curframe_instance->callchain.callsites[i];
 		instance = q->instances[i];
+		if (!instance)
+			return true;
+		/* Map actual frameno to frame index within this instance */
+		delta = (int)instance->depth - (int)i;
+		rel = frameno + delta;
+		if (rel > instance->depth)
+			return true;
+
+		/* Get callsite from verifier state, not from instance callchain */
+		callsite = q->callsites[i];
+
 		alive = bpf_calls_callback(env, callsite)
-			? is_live_before(instance, callsite, frameno, half_spi)
-			: is_live_before(instance, callsite + 1, frameno, half_spi);
+			? is_live_before(instance, callsite, rel, half_spi)
+			: is_live_before(instance, callsite + 1, rel, half_spi);
 		if (alive)
 			return true;
 	}
@@ -1299,7 +964,7 @@ static void arg_track_xfer(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			   struct func_instance *instance,
 			   u32 *callsites)
 {
-	int depth = instance->callchain.curframe;
+	int depth = instance->depth;
 	u8 class = BPF_CLASS(insn->code);
 	u8 code = BPF_OP(insn->code);
 	struct arg_track *dst = &at_out[insn->dst_reg];
@@ -1429,8 +1094,7 @@ static void arg_track_xfer(struct bpf_verifier_env *env, struct bpf_insn *insn,
  *   access_bytes == 0:      no access
  *
  */
-static int record_stack_access_off(struct bpf_verifier_env *env,
-				   struct func_instance *instance, s64 fp_off,
+static int record_stack_access_off(struct func_instance *instance, s64 fp_off,
 				   s64 access_bytes, u32 frame, u32 insn_idx)
 {
 	s32 slot_hi, slot_lo;
@@ -1465,7 +1129,7 @@ static int record_stack_access_off(struct bpf_verifier_env *env,
 		if (slot_lo <= slot_hi) {
 			mask = SPIS_ZERO;
 			spis_or_range(&mask, slot_lo, slot_hi);
-			bpf_mark_stack_write(env, frame, mask);
+			return mark_stack_write(instance, frame, insn_idx, mask);
 		}
 	}
 	return 0;
@@ -1475,8 +1139,7 @@ static int record_stack_access_off(struct bpf_verifier_env *env,
  * 'arg' is FP-derived argument to helper/kfunc or load/store that
  * reads (positive) or writes (negative) 'access_bytes' into 'use' or 'def'.
  */
-static int record_stack_access(struct bpf_verifier_env *env,
-			       struct func_instance *instance,
+static int record_stack_access(struct func_instance *instance,
 			       const struct arg_track *arg,
 			       s64 access_bytes, u32 frame, u32 insn_idx)
 {
@@ -1494,7 +1157,7 @@ static int record_stack_access(struct bpf_verifier_env *env,
 		return 0;
 
 	for (i = 0; i < arg->off_cnt; i++) {
-		err = record_stack_access_off(env, instance, arg->off[i], access_bytes, frame, insn_idx);
+		err = record_stack_access_off(instance, arg->off[i], access_bytes, frame, insn_idx);
 		if (err)
 			return err;
 	}
@@ -1507,7 +1170,7 @@ static int record_stack_access(struct bpf_verifier_env *env,
  */
 static int record_imprecise(struct func_instance *instance, u32 mask, u32 insn_idx)
 {
-	int depth = instance->callchain.curframe;
+	int depth = instance->depth;
 	int f, err;
 
 	for (f = 0; mask; f++, mask >>= 1) {
@@ -1528,7 +1191,7 @@ static int record_load_store_access(struct bpf_verifier_env *env,
 				    struct arg_track *at, int insn_idx)
 {
 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
-	int depth = instance->callchain.curframe;
+	int depth = instance->depth;
 	s32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code));
 	u8 class = BPF_CLASS(insn->code);
 	struct arg_track resolved, *ptr;
@@ -1574,7 +1237,7 @@ static int record_load_store_access(struct bpf_verifier_env *env,
 	}
 
 	if (ptr->frame >= 0 && ptr->frame <= depth)
-		return record_stack_access(env, instance, ptr, sz, ptr->frame, insn_idx);
+		return record_stack_access(instance, ptr, sz, ptr->frame, insn_idx);
 	if (ptr->frame == ARG_IMPRECISE)
 		return record_imprecise(instance, ptr->mask, insn_idx);
 	/* ARG_NONE: not derived from any frame pointer, skip */
@@ -1588,7 +1251,7 @@ static int record_call_access(struct bpf_verifier_env *env,
 			      int insn_idx)
 {
 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
-	int depth = instance->callchain.curframe;
+	int depth = instance->depth;
 	struct bpf_call_summary cs;
 	int r, err = 0, num_params = 5;
 
@@ -1621,7 +1284,7 @@ static int record_call_access(struct bpf_verifier_env *env,
 			continue;
 
 		if (frame >= 0 && frame <= depth)
-			err = record_stack_access(env, instance, &at[r], bytes, frame, insn_idx);
+			err = record_stack_access(instance, &at[r], bytes, frame, insn_idx);
 		else if (frame == ARG_IMPRECISE)
 			err = record_imprecise(instance, at[r].mask, insn_idx);
 		if (err)
@@ -1780,7 +1443,7 @@ static int compute_subprog_args(struct bpf_verifier_env *env,
 {
 	int subprog = instance->subprog;
 	struct bpf_insn *insns = env->prog->insnsi;
-	int depth = instance->callchain.curframe;
+	int depth = instance->depth;
 	int start = env->subprog_info[subprog].start;
 	int po_start = env->subprog_info[subprog].postorder_start;
 	int end = env->subprog_info[subprog + 1].start;
@@ -1880,7 +1543,6 @@ redo:
 		int i = idx - start;
 		struct bpf_insn *insn = &insns[idx];
 
-		reset_stack_write_marks(env, instance);
 		err = record_load_store_access(env, instance, at_in[i], idx);
 		if (err)
 			goto err_free;
@@ -1903,9 +1565,6 @@ redo:
 			memcpy(env->callsite_at_stack[idx],
 			       at_stack_in[i], sizeof(struct arg_track) * MAX_ARG_SPILL_SLOTS);
 		}
-		err = commit_stack_write_marks(env, instance, idx);
-		if (err)
-			goto err_free;
 	}
 
 	info->at_in = at_in;
@@ -1921,6 +1580,76 @@ err_free:
 	return err;
 }
 
+/* Return true if any of R1-R5 is derived from a frame pointer. */
+static bool has_fp_args(struct arg_track *args)
+{
+	for (int r = BPF_REG_1; r <= BPF_REG_5; r++)
+		if (args[r].frame != ARG_NONE)
+			return true;
+	return false;
+}
+
+/*
+ * Merge a freshly analyzed instance into the original.
+ * may_read: union (any pass might read the slot).
+ * must_write: intersection (only slots written on ALL passes are guaranteed).
+ * live_before is recomputed by a subsequent update_instance() on @dst.
+ */
+static void merge_instances(struct func_instance *dst, struct func_instance *src)
+{
+	int f, i;
+
+	for (f = 0; f <= dst->depth; f++) {
+		if (!src->frames[f]) {
+			/* This pass didn't touch frame f — must_write intersects with empty. */
+			if (dst->frames[f])
+				for (i = 0; i < dst->insn_cnt; i++)
+					dst->frames[f][i].must_write = SPIS_ZERO;
+			continue;
+		}
+		if (!dst->frames[f]) {
+			/* Previous pass didn't touch frame f — take src, zero must_write. */
+			dst->frames[f] = src->frames[f];
+			src->frames[f] = NULL;
+			for (i = 0; i < dst->insn_cnt; i++)
+				dst->frames[f][i].must_write = SPIS_ZERO;
+			continue;
+		}
+		for (i = 0; i < dst->insn_cnt; i++) {
+			dst->frames[f][i].may_read =
+				spis_or(dst->frames[f][i].may_read,
+					src->frames[f][i].may_read);
+			dst->frames[f][i].must_write =
+				spis_and(dst->frames[f][i].must_write,
+					 src->frames[f][i].must_write);
+		}
+	}
+}
+
+static struct func_instance *fresh_instance(struct func_instance *src)
+{
+	struct func_instance *f;
+
+	f = kvzalloc_obj(*f, GFP_KERNEL_ACCOUNT);
+	if (!f)
+		return ERR_PTR(-ENOMEM);
+	f->callsite = src->callsite;
+	f->depth = src->depth;
+	f->subprog = src->subprog;
+	f->subprog_start = src->subprog_start;
+	f->insn_cnt = src->insn_cnt;
+	return f;
+}
+
+static void free_instance(struct func_instance *instance)
+{
+	int i;
+
+	for (i = 0; i <= instance->depth; i++)
+		kvfree(instance->frames[i]);
+	kvfree(instance);
+}
+
 /*
  * Recursively analyze a subprog with specific 'entry_args'.
  * Each callee is analyzed with the exact args from its call site.
@@ -1937,11 +1666,12 @@ static int analyze_subprog(struct bpf_verifier_env *env,
 			   u32 *callsites)
 {
 	int subprog = instance->subprog;
-	int depth = instance->callchain.curframe;
+	int depth = instance->depth;
 	struct bpf_insn *insns = env->prog->insnsi;
 	int start = env->subprog_info[subprog].start;
 	int po_start = env->subprog_info[subprog].postorder_start;
 	int po_end = env->subprog_info[subprog + 1].postorder_start;
+	struct func_instance *prev_instance = NULL;
 	int j, err;
 
 	if (++env->liveness->subprog_calls > 10000) {
@@ -1953,13 +1683,28 @@ static int analyze_subprog(struct bpf_verifier_env *env,
 	if (need_resched())
 		cond_resched();
 
+
+	/*
+	 * When an instance is reused (must_write_initialized == true),
+	 * record into a fresh instance and merge afterward.  This avoids
+	 * stale must_write marks for instructions not reached in this pass.
+	 */
+	if (instance->must_write_initialized) {
+		struct func_instance *fresh = fresh_instance(instance);
+
+		if (IS_ERR(fresh))
+			return PTR_ERR(fresh);
+		prev_instance = instance;
+		instance = fresh;
+	}
+
 	/* Free prior analysis if this subprog was already visited */
 	kvfree(info[subprog].at_in);
 	info[subprog].at_in = NULL;
 
 	err = compute_subprog_args(env, &info[subprog], entry_args, instance, callsites);
 	if (err)
-		return err;
+		goto out_free;
 
 	/* For each reachable call site in the subprog, recurse into callees */
 	for (int p = po_start; p < po_end; p++) {
@@ -1994,7 +1739,7 @@ static int analyze_subprog(struct bpf_verifier_env *env,
 				for (int f = 0; f <= depth; f++) {
 					err = mark_stack_read(instance, f, idx, SPIS_ALL);
 					if (err)
-						return err;
+						goto out_free;
 				}
 				continue;
 			}
@@ -2008,19 +1753,52 @@ static int analyze_subprog(struct bpf_verifier_env *env,
 			continue;
 		}
 
-		if (depth == MAX_CALL_FRAMES - 1)
-			return -EINVAL;
+		if (!has_fp_args(callee_args))
+			continue;
+
+		if (depth == MAX_CALL_FRAMES - 1) {
+			err = -EINVAL;
+			goto out_free;
+		}
 
 		callee_instance = call_instance(env, instance, idx, callee);
-		if (IS_ERR(callee_instance))
-			return PTR_ERR(callee_instance);
+		if (IS_ERR(callee_instance)) {
+			err = PTR_ERR(callee_instance);
+			goto out_free;
+		}
 		callsites[depth] = idx;
 		err = analyze_subprog(env, callee_args, info, callee_instance, callsites);
 		if (err)
-			return err;
+			goto out_free;
+
+		/* Pull callee's entry liveness back to caller's callsite */
+		{
+			u32 callee_start = callee_instance->subprog_start;
+			struct per_frame_masks *entry;
+
+			for (int f = 0; f < callee_instance->depth; f++) {
+				entry = get_frame_masks(callee_instance, f, callee_start);
+				if (!entry)
+					continue;
+				err = mark_stack_read(instance, f, idx, entry->live_before);
+				if (err)
+					goto out_free;
+			}
+		}
+	}
+
+	if (prev_instance) {
+		merge_instances(prev_instance, instance);
+		free_instance(instance);
+		instance = prev_instance;
 	}
+	update_instance(env, instance);
+	return 0;
 
-	return update_instance(env, instance);
+out_free:
+	if (prev_instance)
+		free_instance(instance);
+	return err;
 }
 
 int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 85f6514b41f9..36e697e29e44 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -830,9 +830,6 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
 		state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
 	}
 
-	bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi));
-	bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi - 1));
-
 	return 0;
 }
 
@@ -847,9 +844,6 @@ static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_stat
 
 	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
 	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
-
-	bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi));
-	bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi - 1));
 }
 
 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
@@ -986,9 +980,6 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
 	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
 	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
 
-	bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi));
-	bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi - 1));
-
 	return 0;
 }
 
@@ -1114,7 +1105,6 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env,
 		for (j = 0; j < BPF_REG_SIZE; j++)
 			slot->slot_type[j] = STACK_ITER;
 
-		bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi - i));
 		mark_stack_slot_scratched(env, spi - i);
 	}
 
@@ -1143,7 +1133,6 @@ static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
 		for (j = 0; j < BPF_REG_SIZE; j++)
 			slot->slot_type[j] = STACK_INVALID;
 
-		bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi - i));
 		mark_stack_slot_scratched(env, spi - i);
 	}
 
@@ -1233,7 +1222,6 @@ static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
 	slot = &state->stack[spi];
 	st = &slot->spilled_ptr;
 
-	bpf_mark_stack_write(env, reg->frameno, spis_single_slot(spi));
 	__mark_reg_known_zero(st);
 	st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
 	st->ref_obj_id = id;
@@ -1289,8 +1277,6 @@ static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_r
 
 	__mark_reg_not_init(env, st);
 
-	bpf_mark_stack_write(env, reg->frameno, spis_single_slot(spi));
-
 	for (i = 0; i < BPF_REG_SIZE; i++)
 		slot->slot_type[i] = STACK_INVALID;
 
@@ -3866,15 +3852,10 @@ out:
 static int mark_stack_slot_obj_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 				    int spi, int nr_slots)
 {
-	int err, i;
+	int i;
 
-	for (i = 0; i < nr_slots; i++) {
-		err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx,
-					  spis_single_slot(spi - i));
-		if (err)
-			return err;
+	for (i = 0; i < nr_slots; i++)
 		mark_stack_slot_scratched(env, spi - i);
-	}
 	return 0;
 }
 
@@ -5425,16 +5406,6 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 	if (err)
 		return err;
 
-	if (!(off % BPF_REG_SIZE) && size == BPF_REG_SIZE)
-		/* 8-byte aligned, 8-byte write */
-		bpf_mark_stack_write(env, state->frameno, spis_single_slot(spi));
-	else if (!(off % BPF_REG_SIZE) && size == BPF_HALF_REG_SIZE)
-		/* 8-byte aligned, 4-byte write */
-		bpf_mark_stack_write(env, state->frameno, spis_one_bit(spi * 2 + 1));
-	else if (!(off % BPF_HALF_REG_SIZE) && size == BPF_HALF_REG_SIZE)
-		/* 4-byte aligned, 4-byte write */
-		bpf_mark_stack_write(env, state->frameno, spis_one_bit(spi * 2));
-
 	check_fastcall_stack_contract(env, state, insn_idx, off);
 	mark_stack_slot_scratched(env, spi);
 	if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
@@ -5691,26 +5662,12 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 	struct bpf_reg_state *reg;
 	u8 *stype, type;
 	int insn_flags = insn_stack_access_flags(reg_state->frameno, spi);
-	spis_t mask;
-	int err;
 
 	stype = reg_state->stack[spi].slot_type;
 	reg = &reg_state->stack[spi].spilled_ptr;
 
 	mark_stack_slot_scratched(env, spi);
 	check_fastcall_stack_contract(env, state, env->insn_idx, off);
-	if (!(off % BPF_REG_SIZE) && size == BPF_HALF_REG_SIZE)
-		/* 8-byte aligned, 4-byte read */
-		mask = spis_one_bit(spi * 2 + 1);
-	else if (!(off % BPF_HALF_REG_SIZE) && size == BPF_HALF_REG_SIZE)
-		/* 4-byte aligned, 4-byte read */
-		mask = spis_one_bit(spi * 2);
-	else
-		mask = spis_single_slot(spi);
-
-	err = bpf_mark_stack_read(env, reg_state->frameno, env->insn_idx, mask);
-	if (err)
-		return err;
 
 	if (is_spilled_reg(&reg_state->stack[spi])) {
 		u8 spill_size = 1;
@@ -8540,18 +8497,7 @@ static int check_stack_range_initialized(
 		}
 		return -EACCES;
 mark:
-		/* reading any byte out of 8-byte 'spill_slot' will cause
-		 * the whole slot to be marked as 'read'
-		 */
-		err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx,
-					  spis_single_slot(spi));
-		if (err)
-			return err;
-		/* We do not call bpf_mark_stack_write(), as we can not
-		 * be sure that whether stack slot is written to or not. Hence,
-		 * we must still conservatively propagate reads upwards even if
-		 * helper may write to the entire memory range.
-		 */
+		;
 	}
 	return 0;
 }
@@ -11171,8 +11117,6 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	/* and go analyze first insn of the callee */
 	*insn_idx = env->subprog_info[subprog].start - 1;
 
-	bpf_reset_live_stack_callchain(env);
-
 	if (env->log.level & BPF_LOG_LEVEL) {
 		verbose(env, "caller:\n");
 		print_verifier_state(env, state, caller->frameno, true);
@@ -11457,10 +11401,6 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 	bool in_callback_fn;
 	int err;
 
-	err = bpf_update_live_stack(env);
-	if (err)
-		return err;
-
 	callee = state->frame[state->curframe];
 	r0 = &callee->regs[BPF_REG_0];
 	if (r0->type == PTR_TO_STACK) {
@@ -21799,7 +21739,7 @@ static int do_check(struct bpf_verifier_env *env)
 	for (;;) {
 		struct bpf_insn *insn;
 		struct bpf_insn_aux_data *insn_aux;
-		int err, marks_err;
+		int err;
 
 		/* reset current history entry on each new instruction */
 		env->cur_hist_ent = NULL;
@@ -21913,15 +21853,7 @@ static int do_check(struct bpf_verifier_env *env)
 		if (state->speculative && insn_aux->nospec)
 			goto process_bpf_exit;
 
-		err = bpf_reset_stack_write_marks(env, env->insn_idx);
-		if (err)
-			return err;
 		err = do_check_insn(env, &do_print_state);
-		if (err >= 0 || error_recoverable_with_nospec(err)) {
-			marks_err = bpf_commit_stack_write_marks(env);
-			if (marks_err)
-				return marks_err;
-		}
 		if (error_recoverable_with_nospec(err) && state->speculative) {
 			/* Prevent this speculative path from ever reaching the
 			 * insn that would have been unsafe to execute.
@@ -21962,9 +21894,6 @@ static int do_check(struct bpf_verifier_env *env)
 process_bpf_exit:
 			mark_verifier_state_scratched(env);
 			err = update_branch_counts(env, env->cur_state);
-			if (err)
-				return err;
-			err = bpf_update_live_stack(env);
 			if (err)
 				return err;
 			err = pop_stack(env, &prev_insn_idx, &env->insn_idx,
-- 
cgit v1.2.3


From 2c167d91775b0928eba1d2b9b5483ede63ca7b2e Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Fri, 10 Apr 2026 13:56:01 -0700
Subject: bpf: change logging scheme for live stack analysis

Instead of breadcrumbs like:

  (d2,cs15) frame 0 insn 18 +live -16
  (d2,cs15) frame 0 insn 17 +live -16

Print final accumulated stack use/def data per-func_instance
per-instruction. printed func_instance's are ordered by callsite and
depth. For example:

  stack use/def subprog#0 shared_instance_must_write_overwrite (d0,cs0):
    0: (b7) r1 = 1
    1: (7b) *(u64 *)(r10 -8) = r1        ; def: fp0-8
    2: (7b) *(u64 *)(r10 -16) = r1       ; def: fp0-16
    3: (bf) r1 = r10
    4: (07) r1 += -8
    5: (bf) r2 = r10
    6: (07) r2 += -16
    7: (85) call pc+7                    ; use: fp0-8 fp0-16
    8: (bf) r1 = r10
    9: (07) r1 += -16
   10: (bf) r2 = r10
   11: (07) r2 += -8
   12: (85) call pc+2                    ; use: fp0-8 fp0-16
   13: (b7) r0 = 0
   14: (95) exit
  stack use/def subprog#1 forwarding_rw (d1,cs7):
   15: (85) call pc+1                    ; use: fp0-8 fp0-16
   16: (95) exit
  stack use/def subprog#1 forwarding_rw (d1,cs12):
   15: (85) call pc+1                    ; use: fp0-8 fp0-16
   16: (95) exit
  stack use/def subprog#2 write_first_read_second (d2,cs15):
   17: (7a) *(u64 *)(r1 +0) = 42
   18: (79) r0 = *(u64 *)(r2 +0)         ; use: fp0-8 fp0-16
   19: (95) exit

For groups of three or more consecutive stack slots, abbreviate as
follows:

   25: (85) call bpf_loop#181            ; use: fp2-8..-512 fp1-8..-512 fp0-8..-512

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260410-patch-set-v4-10-5d4eecb343db@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |   5 -
 kernel/bpf/liveness.c        | 241 +++++++++++++++++++++++++++++--------------
 2 files changed, 165 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index d7fbc0e1559b..d3dc46aae2e7 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -256,11 +256,6 @@ static inline spis_t spis_and(spis_t a, spis_t b)
 	return (spis_t){{ a.v[0] & b.v[0], a.v[1] & b.v[1] }};
 }
 
-static inline spis_t spis_xor(spis_t a, spis_t b)
-{
-	return (spis_t){{ a.v[0] ^ b.v[0], a.v[1] ^ b.v[1] }};
-}
-
 static inline spis_t spis_not(spis_t s)
 {
 	return (spis_t){{ ~s.v[0], ~s.v[1] }};
diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c
index e797f8f76bc7..9bb6574d73fe 100644
--- a/kernel/bpf/liveness.c
+++ b/kernel/bpf/liveness.c
@@ -6,6 +6,7 @@
 #include <linux/hashtable.h>
 #include <linux/jhash.h>
 #include <linux/slab.h>
+#include <linux/sort.h>
 
 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
 
@@ -197,64 +198,6 @@ static int mark_stack_write(struct func_instance *instance, u32 frame, u32 insn_
 	return 0;
 }
 
-static char *fmt_instance(struct bpf_verifier_env *env, struct func_instance *instance)
-{
-	snprintf(env->tmp_str_buf, sizeof(env->tmp_str_buf),
-		 "(d%d,cs%d)", instance->depth, instance->callsite);
-	return env->tmp_str_buf;
-}
-
-/*
- * When both halves of an 8-byte SPI are set, print as "-8","-16",...
- * When only one half is set, print as "-4h","-8h",...
- */
-static void bpf_fmt_spis_mask(char *buf, ssize_t buf_sz, spis_t spis)
-{
-	bool first = true;
-	int spi, n;
-
-	buf[0] = '\0';
-
-	for (spi = 0; spi < STACK_SLOTS / 2 && buf_sz > 0; spi++) {
-		bool lo = spis_test_bit(spis, spi * 2);
-		bool hi = spis_test_bit(spis, spi * 2 + 1);
-
-		if (!lo && !hi)
-			continue;
-		n = snprintf(buf, buf_sz, "%s%d%s",
-			     first ? "" : ",",
-			     -(spi + 1) * BPF_REG_SIZE + (lo && !hi ? BPF_HALF_REG_SIZE : 0),
-			     lo && hi ? "" : "h");
-		first = false;
-		buf += n;
-		buf_sz -= n;
-	}
-}
-
-static void log_mask_change(struct bpf_verifier_env *env, struct func_instance *instance,
-			    char *pfx, u32 frame, u32 insn_idx,
-			    spis_t old, spis_t new)
-{
-	spis_t changed_bits, new_ones, new_zeros;
-
-	changed_bits = spis_xor(old, new);
-	new_ones = spis_and(new, changed_bits);
-	new_zeros = spis_and(spis_not(new), changed_bits);
-
-	if (spis_is_zero(changed_bits))
-		return;
-	bpf_log(&env->log, "%s frame %d insn %d ", fmt_instance(env, instance), frame, insn_idx);
-	if (!spis_is_zero(new_ones)) {
-		bpf_fmt_spis_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_ones);
-		bpf_log(&env->log, "+%s %s ", pfx, env->tmp_str_buf);
-	}
-	if (!spis_is_zero(new_zeros)) {
-		bpf_fmt_spis_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_zeros);
-		bpf_log(&env->log, "-%s %s", pfx, env->tmp_str_buf);
-	}
-	bpf_log(&env->log, "\n");
-}
-
 int bpf_jmp_offset(struct bpf_insn *insn)
 {
 	u8 code = insn->code;
@@ -330,7 +273,6 @@ __diag_pop();
 static inline bool update_insn(struct bpf_verifier_env *env,
 			       struct func_instance *instance, u32 frame, u32 insn_idx)
 {
-	struct bpf_insn_aux_data *aux = env->insn_aux_data;
 	spis_t new_before, new_after;
 	struct per_frame_masks *insn, *succ_insn;
 	struct bpf_iarray *succ;
@@ -357,13 +299,6 @@ static inline bool update_insn(struct bpf_verifier_env *env,
 	new_before = spis_or(spis_and(new_after, spis_not(insn->must_write)),
 			     insn->may_read);
 	changed |= !spis_equal(new_before, insn->live_before);
-	if (unlikely(env->log.level & BPF_LOG_LEVEL2) &&
-	    (!spis_is_zero(insn->may_read) || !spis_is_zero(insn->must_write) ||
-	     insn_idx == instance->subprog_start ||
-	     aux[insn_idx].prune_point)) {
-		log_mask_change(env, instance, "live",
-				frame, insn_idx, insn->live_before, new_before);
-	}
 	insn->live_before = new_before;
 	return changed;
 }
@@ -393,10 +328,6 @@ static void update_instance(struct bpf_verifier_env *env, struct func_instance *
 				changed |= update_insn(env, instance, frame, insn_postorder[i]);
 		}
 	} while (changed);
-
-	if (env->log.level & BPF_LOG_LEVEL2)
-		bpf_log(&env->log, "%s live stack update done in %d iterations\n",
-			fmt_instance(env, instance), cnt);
 }
 
 static bool is_live_before(struct func_instance *instance, u32 insn_idx, u32 frameno, u32 half_spi)
@@ -476,6 +407,166 @@ bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 half_sp
 	return false;
 }
 
+static char *fmt_subprog(struct bpf_verifier_env *env, int subprog)
+{
+	const char *name = env->subprog_info[subprog].name;
+
+	snprintf(env->tmp_str_buf, sizeof(env->tmp_str_buf),
+		 "subprog#%d%s%s", subprog, name ? " " : "", name ? name : "");
+	return env->tmp_str_buf;
+}
+
+static char *fmt_instance(struct bpf_verifier_env *env, struct func_instance *instance)
+{
+	snprintf(env->tmp_str_buf, sizeof(env->tmp_str_buf),
+		 "(d%d,cs%d)", instance->depth, instance->callsite);
+	return env->tmp_str_buf;
+}
+
+static int spi_off(int spi)
+{
+	return -(spi + 1) * BPF_REG_SIZE;
+}
+
+/*
+ * When both halves of an 8-byte SPI are set, print as "-8","-16",...
+ * When only one half is set, print as "-4h","-8h",...
+ * Runs of 3+ consecutive fully-set SPIs are collapsed: "fp0-8..-24"
+ */
+static char *fmt_spis_mask(struct bpf_verifier_env *env, int frame, bool first, spis_t spis)
+{
+	int buf_sz = sizeof(env->tmp_str_buf);
+	char *buf = env->tmp_str_buf;
+	int spi, n, run_start;
+
+	buf[0] = '\0';
+
+	for (spi = 0; spi < STACK_SLOTS / 2 && buf_sz > 0; spi++) {
+		bool lo = spis_test_bit(spis, spi * 2);
+		bool hi = spis_test_bit(spis, spi * 2 + 1);
+		const char *space = first ? "" : " ";
+
+		if (!lo && !hi)
+			continue;
+
+		if (!lo || !hi) {
+			/* half-spi */
+			n = scnprintf(buf, buf_sz, "%sfp%d%d%s",
+				      space, frame, spi_off(spi) + (lo ? STACK_SLOT_SZ : 0), "h");
+		} else if (spi + 2 < STACK_SLOTS / 2 &&
+			   spis_test_bit(spis, spi * 2 + 2) &&
+			   spis_test_bit(spis, spi * 2 + 3) &&
+			   spis_test_bit(spis, spi * 2 + 4) &&
+			   spis_test_bit(spis, spi * 2 + 5)) {
+			/* 3+ consecutive full spis */
+			run_start = spi;
+			while (spi + 1 < STACK_SLOTS / 2 &&
+			       spis_test_bit(spis, (spi + 1) * 2) &&
+			       spis_test_bit(spis, (spi + 1) * 2 + 1))
+				spi++;
+			n = scnprintf(buf, buf_sz, "%sfp%d%d..%d",
+				      space, frame, spi_off(run_start), spi_off(spi));
+		} else {
+			/* just a full spi */
+			n = scnprintf(buf, buf_sz, "%sfp%d%d", space, frame, spi_off(spi));
+		}
+		first = false;
+		buf += n;
+		buf_sz -= n;
+	}
+	return env->tmp_str_buf;
+}
+
+static void print_instance(struct bpf_verifier_env *env, struct func_instance *instance)
+{
+	int start = env->subprog_info[instance->subprog].start;
+	struct bpf_insn *insns = env->prog->insnsi;
+	struct per_frame_masks *masks;
+	int len = instance->insn_cnt;
+	int insn_idx, frame, i;
+	bool has_use, has_def;
+	u64 pos, insn_pos;
+
+	if (!(env->log.level & BPF_LOG_LEVEL2))
+		return;
+
+	verbose(env, "stack use/def %s ", fmt_subprog(env, instance->subprog));
+	verbose(env, "%s:\n", fmt_instance(env, instance));
+	for (i = 0; i < len; i++) {
+		insn_idx = start + i;
+		has_use = false;
+		has_def = false;
+		pos = env->log.end_pos;
+		verbose(env, "%3d: ", insn_idx);
+		bpf_verbose_insn(env, &insns[insn_idx]);
+		bpf_vlog_reset(&env->log, env->log.end_pos - 1); /* remove \n */
+		insn_pos = env->log.end_pos;
+		verbose(env, "%*c;", bpf_vlog_alignment(insn_pos - pos), ' ');
+		pos = env->log.end_pos;
+		verbose(env, " use: ");
+		for (frame = instance->depth; frame >= 0; --frame) {
+			masks = get_frame_masks(instance, frame, insn_idx);
+			if (!masks || spis_is_zero(masks->may_read))
+				continue;
+			verbose(env, "%s", fmt_spis_mask(env, frame, !has_use, masks->may_read));
+			has_use = true;
+		}
+		if (!has_use)
+			bpf_vlog_reset(&env->log, pos);
+		pos = env->log.end_pos;
+		verbose(env, " def: ");
+		for (frame = instance->depth; frame >= 0; --frame) {
+			masks = get_frame_masks(instance, frame, insn_idx);
+			if (!masks || spis_is_zero(masks->must_write))
+				continue;
+			verbose(env, "%s", fmt_spis_mask(env, frame, !has_def, masks->must_write));
+			has_def = true;
+		}
+		if (!has_def)
+			bpf_vlog_reset(&env->log, has_use ? pos : insn_pos);
+		verbose(env, "\n");
+		if (bpf_is_ldimm64(&insns[insn_idx]))
+			i++;
+	}
+}
+
+static int cmp_instances(const void *pa, const void *pb)
+{
+	struct func_instance *a = *(struct func_instance **)pa;
+	struct func_instance *b = *(struct func_instance **)pb;
+	int dcallsite = (int)a->callsite - b->callsite;
+	int ddepth = (int)a->depth - b->depth;
+
+	if (dcallsite)
+		return dcallsite;
+	if (ddepth)
+		return ddepth;
+	return 0;
+}
+
+/* print use/def slots for all instances ordered by callsite first, then by depth */
+static int print_instances(struct bpf_verifier_env *env)
+{
+	struct func_instance *instance, **sorted_instances;
+	struct bpf_liveness *liveness = env->liveness;
+	int i, bkt, cnt;
+
+	cnt = 0;
+	hash_for_each(liveness->func_instances, bkt, instance, hl_node)
+		cnt++;
+	sorted_instances = kvmalloc_objs(*sorted_instances, cnt, GFP_KERNEL_ACCOUNT);
+	if (!sorted_instances)
+		return -ENOMEM;
+	cnt = 0;
+	hash_for_each(liveness->func_instances, bkt, instance, hl_node)
+		sorted_instances[cnt++] = instance;
+	sort(sorted_instances, cnt, sizeof(*sorted_instances), cmp_instances, NULL);
+	for (i = 0; i < cnt; i++)
+		print_instance(env, sorted_instances[i]);
+	kvfree(sorted_instances);
+	return 0;
+}
+
 /*
  * Per-register tracking state for compute_subprog_args().
  * Tracks which frame's FP a value is derived from
@@ -1363,12 +1454,7 @@ static void print_subprog_arg_access(struct bpf_verifier_env *env,
 	if (!(env->log.level & BPF_LOG_LEVEL2))
 		return;
 
-	verbose(env, "subprog#%d %s:\n", subprog,
-		env->prog->aux->func_info
-		? btf_name_by_offset(env->prog->aux->btf,
-				     btf_type_by_id(env->prog->aux->btf,
-						    env->prog->aux->func_info[subprog].type_id)->name_off)
-		: "");
+	verbose(env, "%s:\n", fmt_subprog(env, subprog));
 	for (i = 0; i < len; i++) {
 		int idx = start + i;
 		bool has_extra = false;
@@ -1856,6 +1942,9 @@ int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env)
 			goto out;
 	}
 
+	if (env->log.level & BPF_LOG_LEVEL2)
+		err = print_instances(env);
+
 out:
 	for (k = 0; k < insn_cnt; k++)
 		kvfree(env->callsite_at_stack[k]);
-- 
cgit v1.2.3


From 2cb27158adb38f1a78729e99f7469199d71c714a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 10 Apr 2026 13:56:05 -0700
Subject: bpf: poison dead stack slots

As a sanity check poison stack slots that stack liveness determined
to be dead, so that any read from such slots will cause program rejection.
If stack liveness logic is incorrect the poison can cause
valid program to be rejected, but it also will prevent unsafe program
to be accepted.

Allow global subprogs "read" poisoned stack slots.
The static stack liveness determined that subprog doesn't read certain
stack slots, but sizeof(arg_type) based global subprog validation
isn't accurate enough to know which slots will actually be read by
the callee, so it needs to check full sizeof(arg_type) at the caller.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260410-patch-set-v4-14-5d4eecb343db@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h                       |  1 +
 kernel/bpf/log.c                                   |  5 +-
 kernel/bpf/verifier.c                              | 89 ++++++++++++++++------
 .../selftests/bpf/progs/verifier_spill_fill.c      |  2 +
 4 files changed, 70 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index d3dc46aae2e7..05b9fe98b8f8 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -220,6 +220,7 @@ enum bpf_stack_slot_type {
 	STACK_DYNPTR,
 	STACK_ITER,
 	STACK_IRQ_FLAG,
+	STACK_POISON,
 };
 
 #define BPF_REG_SIZE 8	/* size of eBPF register in bytes */
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 803f21e61d92..011e4ec25acd 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -501,7 +501,8 @@ static char slot_type_char[] = {
 	[STACK_ZERO]	= '0',
 	[STACK_DYNPTR]	= 'd',
 	[STACK_ITER]	= 'i',
-	[STACK_IRQ_FLAG] = 'f'
+	[STACK_IRQ_FLAG] = 'f',
+	[STACK_POISON]	= 'p',
 };
 
 #define UNUM_MAX_DECIMAL U16_MAX
@@ -738,7 +739,7 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
 
 		for (j = 0; j < BPF_REG_SIZE; j++) {
 			slot_type = state->stack[i].slot_type[j];
-			if (slot_type != STACK_INVALID)
+			if (slot_type != STACK_INVALID && slot_type != STACK_POISON)
 				valid = true;
 			types_buf[j] = slot_type_char[slot_type];
 		}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 36e697e29e44..566311dd4fba 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1348,6 +1348,7 @@ static bool is_stack_slot_special(const struct bpf_stack_state *stack)
 	case STACK_IRQ_FLAG:
 		return true;
 	case STACK_INVALID:
+	case STACK_POISON:
 	case STACK_MISC:
 	case STACK_ZERO:
 		return false;
@@ -1377,9 +1378,11 @@ static bool is_spilled_scalar_after(const struct bpf_stack_state *stack, int im)
 	       stack->spilled_ptr.type == SCALAR_VALUE;
 }
 
-/* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which
- * case they are equivalent, or it's STACK_ZERO, in which case we preserve
- * more precise STACK_ZERO.
+/*
+ * Mark stack slot as STACK_MISC, unless it is already:
+ * - STACK_INVALID, in which case they are equivalent.
+ * - STACK_ZERO, in which case we preserve more precise STACK_ZERO.
+ * - STACK_POISON, which truly forbids access to the slot.
  * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged
  * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is
  * unnecessary as both are considered equivalent when loading data and pruning,
@@ -1390,14 +1393,14 @@ static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
 {
 	if (*stype == STACK_ZERO)
 		return;
-	if (*stype == STACK_INVALID)
+	if (*stype == STACK_INVALID || *stype == STACK_POISON)
 		return;
 	*stype = STACK_MISC;
 }
 
 static void scrub_spilled_slot(u8 *stype)
 {
-	if (*stype != STACK_INVALID)
+	if (*stype != STACK_INVALID && *stype != STACK_POISON)
 		*stype = STACK_MISC;
 }
 
@@ -5586,8 +5589,10 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
 		 * For privileged programs, we will accept such reads to slots
 		 * that may or may not be written because, if we're reject
 		 * them, the error would be too confusing.
+		 * Conservatively, treat STACK_POISON in a similar way.
 		 */
-		if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
+		if ((*stype == STACK_INVALID || *stype == STACK_POISON) &&
+		    !env->allow_uninit_stack) {
 			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
 					insn_idx, i);
 			return -EINVAL;
@@ -5723,8 +5728,13 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 					}
 					if (type == STACK_INVALID && env->allow_uninit_stack)
 						continue;
-					verbose(env, "invalid read from stack off %d+%d size %d\n",
-						off, i, size);
+					if (type == STACK_POISON) {
+						verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
+							off, i, size);
+					} else {
+						verbose(env, "invalid read from stack off %d+%d size %d\n",
+							off, i, size);
+					}
 					return -EACCES;
 				}
 
@@ -5773,8 +5783,13 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 				continue;
 			if (type == STACK_INVALID && env->allow_uninit_stack)
 				continue;
-			verbose(env, "invalid read from stack off %d+%d size %d\n",
-				off, i, size);
+			if (type == STACK_POISON) {
+				verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
+					off, i, size);
+			} else {
+				verbose(env, "invalid read from stack off %d+%d size %d\n",
+					off, i, size);
+			}
 			return -EACCES;
 		}
 		if (dst_regno >= 0)
@@ -8377,16 +8392,22 @@ static int check_stack_range_initialized(
 	/* Some accesses can write anything into the stack, others are
 	 * read-only.
 	 */
-	bool clobber = false;
+	bool clobber = type == BPF_WRITE;
+	/*
+	 * Negative access_size signals global subprog/kfunc arg check where
+	 * STACK_POISON slots are acceptable. static stack liveness
+	 * might have determined that subprog doesn't read them,
+	 * but BTF based global subprog validation isn't accurate enough.
+	 */
+	bool allow_poison = access_size < 0 || clobber;
+
+	access_size = abs(access_size);
 
 	if (access_size == 0 && !zero_size_allowed) {
 		verbose(env, "invalid zero-sized read\n");
 		return -EACCES;
 	}
 
-	if (type == BPF_WRITE)
-		clobber = true;
-
 	err = check_stack_access_within_bounds(env, regno, off, access_size, type);
 	if (err)
 		return err;
@@ -8485,7 +8506,12 @@ static int check_stack_range_initialized(
 			goto mark;
 		}
 
-		if (tnum_is_const(reg->var_off)) {
+		if (*stype == STACK_POISON) {
+			if (allow_poison)
+				goto mark;
+			verbose(env, "reading from stack R%d off %d+%d size %d, slot poisoned by dead code elimination\n",
+				regno, min_off, i - min_off, access_size);
+		} else if (tnum_is_const(reg->var_off)) {
 			verbose(env, "invalid read from stack R%d off %d+%d size %d\n",
 				regno, min_off, i - min_off, access_size);
 		} else {
@@ -8662,8 +8688,10 @@ static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg
 		mark_ptr_not_null_reg(reg);
 	}
 
-	err = check_helper_mem_access(env, regno, mem_size, BPF_READ, true, NULL);
-	err = err ?: check_helper_mem_access(env, regno, mem_size, BPF_WRITE, true, NULL);
+	int size = base_type(reg->type) == PTR_TO_STACK ? -(int)mem_size : mem_size;
+
+	err = check_helper_mem_access(env, regno, size, BPF_READ, true, NULL);
+	err = err ?: check_helper_mem_access(env, regno, size, BPF_WRITE, true, NULL);
 
 	if (may_be_null)
 		*reg = saved_reg;
@@ -20183,7 +20211,7 @@ static void __clean_func_state(struct bpf_verifier_env *env,
 				__mark_reg_not_init(env, spill);
 			}
 			for (j = start; j < end; j++)
-				st->stack[i].slot_type[j] = STACK_INVALID;
+				st->stack[i].slot_type[j] = STACK_POISON;
 		}
 	}
 }
@@ -20452,7 +20480,8 @@ static bool is_stack_misc_after(struct bpf_verifier_env *env,
 
 	for (i = im; i < ARRAY_SIZE(stack->slot_type); ++i) {
 		if ((stack->slot_type[i] == STACK_MISC) ||
-		    (stack->slot_type[i] == STACK_INVALID && env->allow_uninit_stack))
+		    ((stack->slot_type[i] == STACK_INVALID || stack->slot_type[i] == STACK_POISON) &&
+		     env->allow_uninit_stack))
 			continue;
 		return false;
 	}
@@ -20488,13 +20517,22 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
 
 		spi = i / BPF_REG_SIZE;
 
-		if (exact == EXACT &&
-		    (i >= cur->allocated_stack ||
-		     old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
-		     cur->stack[spi].slot_type[i % BPF_REG_SIZE]))
-			return false;
+		if (exact == EXACT) {
+			u8 old_type = old->stack[spi].slot_type[i % BPF_REG_SIZE];
+			u8 cur_type = i < cur->allocated_stack ?
+				      cur->stack[spi].slot_type[i % BPF_REG_SIZE] : STACK_INVALID;
+
+			/* STACK_INVALID and STACK_POISON are equivalent for pruning */
+			if (old_type == STACK_POISON)
+				old_type = STACK_INVALID;
+			if (cur_type == STACK_POISON)
+				cur_type = STACK_INVALID;
+			if (i >= cur->allocated_stack || old_type != cur_type)
+				return false;
+		}
 
-		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
+		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID ||
+		    old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_POISON)
 			continue;
 
 		if (env->allow_uninit_stack &&
@@ -20592,6 +20630,7 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
 		case STACK_MISC:
 		case STACK_ZERO:
 		case STACK_INVALID:
+		case STACK_POISON:
 			continue;
 		/* Ensure that new unhandled slot types return false by default */
 		default:
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index c6ae64b99cd6..6bc721accbae 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -780,6 +780,8 @@ __naked void stack_load_preserves_const_precision_subreg(void)
 		"r1 += r2;"
 		"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
 
+		"r2 = *(u64 *)(r10 -8);" /* keep slots alive */
+		"r2 = *(u64 *)(r10 -16);"
 		"r0 = 0;"
 		"exit;"
 	:
-- 
cgit v1.2.3


From 70cf146a674c447753ceeb34246ad0afdd0064bb Mon Sep 17 00:00:00 2001
From: Jacob Moroni <jmoroni@google.com>
Date: Thu, 9 Apr 2026 15:01:23 +0000
Subject: PCI/P2PDMA: Add Google SoCs to the P2P DMA host bridge list

All Google SoCs support peer-to-peer DMA between Root Ports, so add a
wildcard rule to the host bridge list.

Signed-off-by: Jacob Moroni <jmoroni@google.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: David Hu <xuehaohu@google.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Link: https://patch.msgid.link/20260409150123.3538444-2-jmoroni@google.com
---
 drivers/pci/p2pdma.c    | 2 ++
 include/linux/pci_ids.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 25e9358d066e..7c898542af8d 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -548,6 +548,8 @@ static const struct pci_p2pdma_whitelist_entry {
 	{PCI_VENDOR_ID_INTEL,	0x2033, 0},
 	{PCI_VENDOR_ID_INTEL,	0x2020, 0},
 	{PCI_VENDOR_ID_INTEL,	0x09a2, 0},
+	/* Google SoCs. */
+	{PCI_VENDOR_ID_GOOGLE,	PCI_ANY_ID, 0},
 	{}
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 406abf629be2..24cb42f66e4b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2586,6 +2586,8 @@
 
 #define PCI_VENDOR_ID_AZWAVE		0x1a3b
 
+#define PCI_VENDOR_ID_GOOGLE		0x1ae0
+
 #define PCI_VENDOR_ID_REDHAT_QUMRANET    0x1af4
 #define PCI_SUBVENDOR_ID_REDHAT_QUMRANET 0x1af4
 #define PCI_SUBDEVICE_ID_QEMU            0x1100
-- 
cgit v1.2.3


From 5063e775889948c0475ccdf21c74a6191b7b6482 Mon Sep 17 00:00:00 2001
From: Amery Hung <ameryhung@gmail.com>
Date: Fri, 10 Apr 2026 18:54:17 -0700
Subject: bpf: Use kmalloc_nolock() universally in local storage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Switch to kmalloc_nolock() universally in local storage. Socket local
storage didn't move to kmalloc_nolock() when BPF memory allocator was
replaced by it for performance reasons. Now that kfree_rcu() supports
freeing memory allocated by kmalloc_nolock(), we can move the remaining
local storages to use kmalloc_nolock() and cleanup the cluttered free
paths.

Use kfree() instead of kfree_nolock() in bpf_selem_free_trace_rcu() and
bpf_local_storage_free_trace_rcu(). Both callbacks run in process context
where spinning is allowed, so kfree_nolock() is unnecessary.

Benchmark:

./bench -p 1 local-storage-create --storage-type socket \
  --batch-size {16,32,64}

The benchmark is a microbenchmark stress-testing how fast local storage
can be created. There is no measurable throughput change for socket local
storage after switching from kzalloc() to kmalloc_nolock().

Socket local storage

                 batch  creation speed              diff
---------------  ----   ------------------          ----
Baseline          16    433.9 ± 0.6 k/s
                  32    434.3 ± 1.4 k/s
                  64    434.2 ± 0.7 k/s

After             16    439.0 ± 1.9 k/s             +1.2%
                  32    437.3 ± 2.0 k/s             +0.7%
                  64    435.8 ± 2.5k/s              +0.4%

Also worth noting that the baseline got a 5% throughput boost when sheaf
replaces percpu partial slab recently [0].

[0] https://lore.kernel.org/bpf/20260123-sheaves-for-all-v4-0-041323d506f7@suse.cz/

Signed-off-by: Amery Hung <ameryhung@gmail.com>
Link: https://lore.kernel.org/r/20260411015419.114016-3-ameryhung@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_local_storage.h |   8 +--
 kernel/bpf/bpf_cgrp_storage.c     |   2 +-
 kernel/bpf/bpf_inode_storage.c    |   2 +-
 kernel/bpf/bpf_local_storage.c    | 130 +++++---------------------------------
 kernel/bpf/bpf_task_storage.c     |   2 +-
 net/core/bpf_sk_storage.c         |   2 +-
 6 files changed, 21 insertions(+), 125 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 8157e8da61d4..dced54e9265f 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -54,7 +54,6 @@ struct bpf_local_storage_map {
 	u32 bucket_log;
 	u16 elem_size;
 	u16 cache_idx;
-	bool use_kmalloc_nolock;
 };
 
 struct bpf_local_storage_data {
@@ -86,8 +85,7 @@ struct bpf_local_storage_elem {
 						 */
 	};
 	atomic_t state;
-	bool use_kmalloc_nolock;
-	/* 3 bytes hole */
+	/* 4 bytes hole */
 	/* The data is stored in another cacheline to minimize
 	 * the number of cachelines access during a cache hit.
 	 */
@@ -104,7 +102,6 @@ struct bpf_local_storage {
 	rqspinlock_t lock;	/* Protect adding/removing from the "list" */
 	u64 mem_charge;		/* Copy of mem charged to owner. Protected by "lock" */
 	refcount_t owner_refcnt;/* Used to pin owner when map_free is uncharging */
-	bool use_kmalloc_nolock;
 };
 
 /* U16_MAX is much more than enough for sk local storage
@@ -137,8 +134,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
 
 struct bpf_map *
 bpf_local_storage_map_alloc(union bpf_attr *attr,
-			    struct bpf_local_storage_cache *cache,
-			    bool use_kmalloc_nolock);
+			    struct bpf_local_storage_cache *cache);
 
 void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
 				      struct bpf_local_storage_map *smap,
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index c2a2ead1f466..d93ac2866748 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -114,7 +114,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
 
 static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
 {
-	return bpf_local_storage_map_alloc(attr, &cgroup_cache, true);
+	return bpf_local_storage_map_alloc(attr, &cgroup_cache);
 }
 
 static void cgroup_storage_map_free(struct bpf_map *map)
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index e86734609f3d..efc8996a4c0a 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -179,7 +179,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
 
 static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
 {
-	return bpf_local_storage_map_alloc(attr, &inode_cache, false);
+	return bpf_local_storage_map_alloc(attr, &inode_cache);
 }
 
 static void inode_storage_map_free(struct bpf_map *map)
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index cad17ca8552f..bc687b9d25a9 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -75,18 +75,12 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
 	if (mem_charge(smap, owner, smap->elem_size))
 		return NULL;
 
-	if (smap->use_kmalloc_nolock) {
-		selem = bpf_map_kmalloc_nolock(&smap->map, smap->elem_size,
-					       __GFP_ZERO, NUMA_NO_NODE);
-	} else {
-		selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
-					gfp_flags | __GFP_NOWARN);
-	}
+	selem = bpf_map_kmalloc_nolock(&smap->map, smap->elem_size,
+				       __GFP_ZERO, NUMA_NO_NODE);
 
 	if (selem) {
 		RCU_INIT_POINTER(SDATA(selem)->smap, smap);
 		atomic_set(&selem->state, 0);
-		selem->use_kmalloc_nolock = smap->use_kmalloc_nolock;
 
 		if (value) {
 			/* No need to call check_and_init_map_value as memory is zero init */
@@ -102,8 +96,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
 	return NULL;
 }
 
-/* rcu tasks trace callback for use_kmalloc_nolock == false */
-static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
+static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
 {
 	struct bpf_local_storage *local_storage;
 
@@ -115,47 +108,14 @@ static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
 	kfree(local_storage);
 }
 
-/* Handle use_kmalloc_nolock == false */
-static void __bpf_local_storage_free(struct bpf_local_storage *local_storage,
-				     bool vanilla_rcu)
-{
-	if (vanilla_rcu)
-		kfree_rcu(local_storage, rcu);
-	else
-		call_rcu_tasks_trace(&local_storage->rcu,
-				     __bpf_local_storage_free_trace_rcu);
-}
-
-static void bpf_local_storage_free_rcu(struct rcu_head *rcu)
-{
-	struct bpf_local_storage *local_storage;
-
-	local_storage = container_of(rcu, struct bpf_local_storage, rcu);
-	kfree_nolock(local_storage);
-}
-
-static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
-{
-	/*
-	 * RCU Tasks Trace grace period implies RCU grace period, do
-	 * kfree() directly.
-	 */
-	bpf_local_storage_free_rcu(rcu);
-}
-
 static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
 				   bool reuse_now)
 {
 	if (!local_storage)
 		return;
 
-	if (!local_storage->use_kmalloc_nolock) {
-		__bpf_local_storage_free(local_storage, reuse_now);
-		return;
-	}
-
 	if (reuse_now) {
-		call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu);
+		kfree_rcu(local_storage, rcu);
 		return;
 	}
 
@@ -163,42 +123,7 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
 			     bpf_local_storage_free_trace_rcu);
 }
 
-/* rcu callback for use_kmalloc_nolock == false */
-static void __bpf_selem_free_rcu(struct rcu_head *rcu)
-{
-	struct bpf_local_storage_elem *selem;
-	struct bpf_local_storage_map *smap;
-
-	selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
-	/* bpf_selem_unlink_nofail may have already cleared smap and freed fields. */
-	smap = rcu_dereference_check(SDATA(selem)->smap, 1);
-
-	if (smap)
-		bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
-	kfree(selem);
-}
-
-/* rcu tasks trace callback for use_kmalloc_nolock == false */
-static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
-{
-	/*
-	 * RCU Tasks Trace grace period implies RCU grace period, do
-	 * kfree() directly.
-	 */
-	__bpf_selem_free_rcu(rcu);
-}
-
-/* Handle use_kmalloc_nolock == false */
-static void __bpf_selem_free(struct bpf_local_storage_elem *selem,
-			     bool vanilla_rcu)
-{
-	if (vanilla_rcu)
-		call_rcu(&selem->rcu, __bpf_selem_free_rcu);
-	else
-		call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu);
-}
-
-static void bpf_selem_free_rcu(struct rcu_head *rcu)
+static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
 {
 	struct bpf_local_storage_elem *selem;
 	struct bpf_local_storage_map *smap;
@@ -209,37 +134,24 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
 
 	if (smap)
 		bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
-	kfree_nolock(selem);
-}
-
-static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
-{
 	/*
 	 * RCU Tasks Trace grace period implies RCU grace period, do
 	 * kfree() directly.
 	 */
-	bpf_selem_free_rcu(rcu);
+	kfree(selem);
 }
 
 void bpf_selem_free(struct bpf_local_storage_elem *selem,
 		    bool reuse_now)
 {
-	if (!selem->use_kmalloc_nolock) {
-		/*
-		 * No uptr will be unpin even when reuse_now == false since uptr
-		 * is only supported in task local storage, where
-		 * smap->use_kmalloc_nolock == true.
-		 */
-		__bpf_selem_free(selem, reuse_now);
-		return;
-	}
+	struct bpf_local_storage_map *smap;
+
+	smap = rcu_dereference_check(SDATA(selem)->smap, 1);
 
 	if (reuse_now) {
-		/*
-		 * While it is okay to call bpf_obj_free_fields() that unpins uptr when
-		 * reuse_now == true, keep it in bpf_selem_free_rcu() for simplicity.
-		 */
-		call_rcu(&selem->rcu, bpf_selem_free_rcu);
+		if (smap)
+			bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+		kfree_rcu(selem, rcu);
 		return;
 	}
 
@@ -576,12 +488,8 @@ int bpf_local_storage_alloc(void *owner,
 	if (err)
 		return err;
 
-	if (smap->use_kmalloc_nolock)
-		storage = bpf_map_kmalloc_nolock(&smap->map, sizeof(*storage),
-						 __GFP_ZERO, NUMA_NO_NODE);
-	else
-		storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
-					  gfp_flags | __GFP_NOWARN);
+	storage = bpf_map_kmalloc_nolock(&smap->map, sizeof(*storage),
+					 __GFP_ZERO, NUMA_NO_NODE);
 	if (!storage) {
 		err = -ENOMEM;
 		goto uncharge;
@@ -591,7 +499,6 @@ int bpf_local_storage_alloc(void *owner,
 	raw_res_spin_lock_init(&storage->lock);
 	storage->owner = owner;
 	storage->mem_charge = sizeof(*storage);
-	storage->use_kmalloc_nolock = smap->use_kmalloc_nolock;
 	refcount_set(&storage->owner_refcnt, 1);
 
 	bpf_selem_link_storage_nolock(storage, first_selem);
@@ -868,8 +775,7 @@ u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
 
 struct bpf_map *
 bpf_local_storage_map_alloc(union bpf_attr *attr,
-			    struct bpf_local_storage_cache *cache,
-			    bool use_kmalloc_nolock)
+			    struct bpf_local_storage_cache *cache)
 {
 	struct bpf_local_storage_map *smap;
 	unsigned int i;
@@ -901,12 +807,6 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
 	smap->elem_size = offsetof(struct bpf_local_storage_elem,
 				   sdata.data[attr->value_size]);
 
-	/* In PREEMPT_RT, kmalloc(GFP_ATOMIC) is still not safe in non
-	 * preemptible context. Thus, enforce all storages to use
-	 * kmalloc_nolock() when CONFIG_PREEMPT_RT is enabled.
-	 */
-	smap->use_kmalloc_nolock = IS_ENABLED(CONFIG_PREEMPT_RT) ? true : use_kmalloc_nolock;
-
 	smap->cache_idx = bpf_local_storage_cache_idx_get(cache);
 	return &smap->map;
 
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 605506792b5b..55f4f22bb212 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -212,7 +212,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
 
 static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
 {
-	return bpf_local_storage_map_alloc(attr, &task_cache, true);
+	return bpf_local_storage_map_alloc(attr, &task_cache);
 }
 
 static void task_storage_map_free(struct bpf_map *map)
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index f8338acebf07..9fb22e352beb 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -68,7 +68,7 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
 
 static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
 {
-	return bpf_local_storage_map_alloc(attr, &sk_cache, false);
+	return bpf_local_storage_map_alloc(attr, &sk_cache);
 }
 
 static int notsupp_get_next_key(struct bpf_map *map, void *key,
-- 
cgit v1.2.3


From 136deea435dc83d7fe2304303bb9bccb54f69bb0 Mon Sep 17 00:00:00 2001
From: Amery Hung <ameryhung@gmail.com>
Date: Fri, 10 Apr 2026 18:54:18 -0700
Subject: bpf: Remove gfp_flags plumbing from bpf_local_storage_update()

Remove the check that rejects sleepable BPF programs from doing
BPF_ANY/BPF_EXIST updates on local storage. This restriction was added
in commit b00fa38a9c1c ("bpf: Enable non-atomic allocations in local
storage") because kzalloc(GFP_KERNEL) could sleep inside
local_storage->lock. This is no longer a concern: all local storage
allocations now use kmalloc_nolock() which never sleeps.

In addition, since kmalloc_nolock() only accepts __GFP_ACCOUNT,
__GFP_ZERO and __GFP_NO_OBJ_EXT, the gfp_flags parameter plumbing from
bpf_*_storage_get() to bpf_local_storage_update() becomes dead code.
Remove gfp_flags from bpf_selem_alloc(), bpf_local_storage_alloc() and
bpf_local_storage_update(). Drop the hidden 5th argument from
bpf_*_storage_get helpers, and remove the verifier patching that
injected GFP_KERNEL/GFP_ATOMIC into the fifth argument.

Signed-off-by: Amery Hung <ameryhung@gmail.com>
Link: https://lore.kernel.org/r/20260411015419.114016-4-ameryhung@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_local_storage.h |  7 +++----
 kernel/bpf/bpf_cgrp_storage.c     |  9 ++++-----
 kernel/bpf/bpf_inode_storage.c    |  9 ++++-----
 kernel/bpf/bpf_local_storage.c    | 16 ++++++----------
 kernel/bpf/bpf_task_storage.c     |  9 ++++-----
 kernel/bpf/verifier.c             | 26 --------------------------
 net/core/bpf_sk_storage.c         | 21 +++++++++------------
 7 files changed, 30 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index dced54e9265f..9e4f5c45c974 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -188,7 +188,7 @@ int bpf_selem_link_map(struct bpf_local_storage_map *smap,
 
 struct bpf_local_storage_elem *
 bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
-		bool swap_uptrs, gfp_t gfp_flags);
+		bool swap_uptrs);
 
 void bpf_selem_free(struct bpf_local_storage_elem *selem,
 		    bool reuse_now);
@@ -196,12 +196,11 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
 int
 bpf_local_storage_alloc(void *owner,
 			struct bpf_local_storage_map *smap,
-			struct bpf_local_storage_elem *first_selem,
-			gfp_t gfp_flags);
+			struct bpf_local_storage_elem *first_selem);
 
 struct bpf_local_storage_data *
 bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
-			 void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags);
+			 void *value, u64 map_flags, bool swap_uptrs);
 
 u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map);
 
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index d93ac2866748..c76e9b0fabba 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -76,7 +76,7 @@ static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key,
 		return PTR_ERR(cgroup);
 
 	sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
-					 value, map_flags, false, GFP_ATOMIC);
+					 value, map_flags, false);
 	cgroup_put(cgroup);
 	return PTR_ERR_OR_ZERO(sdata);
 }
@@ -122,9 +122,8 @@ static void cgroup_storage_map_free(struct bpf_map *map)
 	bpf_local_storage_map_free(map, &cgroup_cache);
 }
 
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
-	   void *, value, u64, flags, gfp_t, gfp_flags)
+BPF_CALL_4(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
+	   void *, value, u64, flags)
 {
 	struct bpf_local_storage_data *sdata;
 
@@ -143,7 +142,7 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
 	if (!percpu_ref_is_dying(&cgroup->self.refcnt) &&
 	    (flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
 		sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
-						 value, BPF_NOEXIST, false, gfp_flags);
+						 value, BPF_NOEXIST, false);
 
 out:
 	return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data;
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index efc8996a4c0a..0da8d923e39d 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -98,7 +98,7 @@ static long bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
 
 	sdata = bpf_local_storage_update(file_inode(fd_file(f)),
 					 (struct bpf_local_storage_map *)map,
-					 value, map_flags, false, GFP_ATOMIC);
+					 value, map_flags, false);
 	return PTR_ERR_OR_ZERO(sdata);
 }
 
@@ -122,9 +122,8 @@ static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
 	return inode_storage_delete(file_inode(fd_file(f)), map);
 }
 
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
-	   void *, value, u64, flags, gfp_t, gfp_flags)
+BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
+	   void *, value, u64, flags)
 {
 	struct bpf_local_storage_data *sdata;
 
@@ -150,7 +149,7 @@ BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
 	if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
 		sdata = bpf_local_storage_update(
 			inode, (struct bpf_local_storage_map *)map, value,
-			BPF_NOEXIST, false, gfp_flags);
+			BPF_NOEXIST, false);
 		return IS_ERR(sdata) ? (unsigned long)NULL :
 					     (unsigned long)sdata->data;
 	}
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index bc687b9d25a9..6fc6a4b672b5 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -68,7 +68,7 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
 
 struct bpf_local_storage_elem *
 bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
-		void *value, bool swap_uptrs, gfp_t gfp_flags)
+		void *value, bool swap_uptrs)
 {
 	struct bpf_local_storage_elem *selem;
 
@@ -475,8 +475,7 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata,
 
 int bpf_local_storage_alloc(void *owner,
 			    struct bpf_local_storage_map *smap,
-			    struct bpf_local_storage_elem *first_selem,
-			    gfp_t gfp_flags)
+			    struct bpf_local_storage_elem *first_selem)
 {
 	struct bpf_local_storage *prev_storage, *storage;
 	struct bpf_local_storage **owner_storage_ptr;
@@ -546,7 +545,7 @@ uncharge:
  */
 struct bpf_local_storage_data *
 bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
-			 void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags)
+			 void *value, u64 map_flags, bool swap_uptrs)
 {
 	struct bpf_local_storage_data *old_sdata = NULL;
 	struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
@@ -563,9 +562,6 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
 		     !btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)))
 		return ERR_PTR(-EINVAL);
 
-	if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
-		return ERR_PTR(-EINVAL);
-
 	local_storage = rcu_dereference_check(*owner_storage(smap, owner),
 					      bpf_rcu_lock_held());
 	if (!local_storage || hlist_empty(&local_storage->list)) {
@@ -574,11 +570,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
 		if (err)
 			return ERR_PTR(err);
 
-		selem = bpf_selem_alloc(smap, owner, value, swap_uptrs, gfp_flags);
+		selem = bpf_selem_alloc(smap, owner, value, swap_uptrs);
 		if (!selem)
 			return ERR_PTR(-ENOMEM);
 
-		err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
+		err = bpf_local_storage_alloc(owner, smap, selem);
 		if (err) {
 			bpf_selem_free(selem, true);
 			mem_uncharge(smap, owner, smap->elem_size);
@@ -608,7 +604,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
 	/* A lookup has just been done before and concluded a new selem is
 	 * needed. The chance of an unnecessary alloc is unlikely.
 	 */
-	alloc_selem = selem = bpf_selem_alloc(smap, owner, value, swap_uptrs, gfp_flags);
+	alloc_selem = selem = bpf_selem_alloc(smap, owner, value, swap_uptrs);
 	if (!alloc_selem)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 55f4f22bb212..4b342be29eac 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -118,7 +118,7 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
 
 	sdata = bpf_local_storage_update(
 		task, (struct bpf_local_storage_map *)map, value, map_flags,
-		true, GFP_ATOMIC);
+		true);
 
 	err = PTR_ERR_OR_ZERO(sdata);
 out:
@@ -165,9 +165,8 @@ out:
 	return err;
 }
 
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
-	   task, void *, value, u64, flags, gfp_t, gfp_flags)
+BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
+	   task, void *, value, u64, flags)
 {
 	struct bpf_local_storage_data *sdata;
 
@@ -184,7 +183,7 @@ BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
 	    (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) {
 		sdata = bpf_local_storage_update(
 			task, (struct bpf_local_storage_map *)map, value,
-			BPF_NOEXIST, false, gfp_flags);
+			BPF_NOEXIST, false);
 		return IS_ERR(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data;
 	}
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 530bc0c2f116..56fcc96dc780 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -589,14 +589,6 @@ static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
 	return bpf_is_may_goto_insn(&env->prog->insnsi[insn_idx]);
 }
 
-static bool is_storage_get_function(enum bpf_func_id func_id)
-{
-	return func_id == BPF_FUNC_sk_storage_get ||
-	       func_id == BPF_FUNC_inode_storage_get ||
-	       func_id == BPF_FUNC_task_storage_get ||
-	       func_id == BPF_FUNC_cgrp_storage_get;
-}
-
 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
 					const struct bpf_map *map)
 {
@@ -24429,24 +24421,6 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 			goto patch_call_imm;
 		}
 
-		if (is_storage_get_function(insn->imm)) {
-			if (env->insn_aux_data[i + delta].non_sleepable)
-				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
-			else
-				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
-			insn_buf[1] = *insn;
-			cnt = 2;
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta += cnt - 1;
-			env->prog = prog = new_prog;
-			insn = new_prog->insnsi + i + delta;
-			goto patch_call_imm;
-		}
-
 		/* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
 		if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
 			/* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 9fb22e352beb..14eb7812bda4 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -106,7 +106,7 @@ static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
 	if (sock) {
 		sdata = bpf_local_storage_update(
 			sock->sk, (struct bpf_local_storage_map *)map, value,
-			map_flags, false, GFP_ATOMIC);
+			map_flags, false);
 		sockfd_put(sock);
 		return PTR_ERR_OR_ZERO(sdata);
 	}
@@ -137,7 +137,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
 {
 	struct bpf_local_storage_elem *copy_selem;
 
-	copy_selem = bpf_selem_alloc(smap, newsk, NULL, false, GFP_ATOMIC);
+	copy_selem = bpf_selem_alloc(smap, newsk, NULL, false);
 	if (!copy_selem)
 		return NULL;
 
@@ -202,7 +202,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
 			}
 			bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
 		} else {
-			ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
+			ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
 			if (ret) {
 				bpf_selem_free(copy_selem, true);
 				atomic_sub(smap->elem_size,
@@ -227,9 +227,8 @@ out:
 	return ret;
 }
 
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
-	   void *, value, u64, flags, gfp_t, gfp_flags)
+BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
+	   void *, value, u64, flags)
 {
 	struct bpf_local_storage_data *sdata;
 
@@ -250,7 +249,7 @@ BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
 	    refcount_inc_not_zero(&sk->sk_refcnt)) {
 		sdata = bpf_local_storage_update(
 			sk, (struct bpf_local_storage_map *)map, value,
-			BPF_NOEXIST, false, gfp_flags);
+			BPF_NOEXIST, false);
 		/* sk must be a fullsock (guaranteed by verifier),
 		 * so sock_gen_put() is unnecessary.
 		 */
@@ -383,16 +382,14 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
 	return false;
 }
 
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
-	   void *, value, u64, flags, gfp_t, gfp_flags)
+BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
+	   void *, value, u64, flags)
 {
 	WARN_ON_ONCE(!bpf_rcu_lock_held());
 	if (in_hardirq() || in_nmi())
 		return (unsigned long)NULL;
 
-	return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags,
-						     gfp_flags);
+	return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags);
 }
 
 BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
-- 
cgit v1.2.3


From f79ee9e4b23244e77b28d176ce99a2d84d813ac5 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 10 Apr 2026 19:41:02 +0200
Subject: spi: spi-mem: Add a packed command operation

Instead of repeating the command opcode twice, some flash devices try to
pack command and address bits. In this case, the second opcode byte
being sent (LSB) is free to be used. The input data must be ANDed to
only provide the relevant bits.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://patch.msgid.link/20260410-winbond-6-19-rc1-oddr-v1-2-2ac4827a3868@bootlin.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi-mem.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 37f709784350..c8e207522223 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -28,6 +28,14 @@
 		.dtr = true,					\
 	}
 
+#define SPI_MEM_DTR_OP_PACKED_CMD(__opcode, __addr, __buswidth)	\
+	{							\
+		.nbytes = 2,					\
+		.opcode = __opcode << 8 | __addr,		\
+		.buswidth = __buswidth,				\
+		.dtr = true,					\
+	}
+
 #define SPI_MEM_OP_ADDR(__nbytes, __val, __buswidth)		\
 	{							\
 		.nbytes = __nbytes,				\
-- 
cgit v1.2.3


From a2225b6e834a838ae3c93709760edc0a169eb2f2 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Mon, 6 Apr 2026 16:22:54 -0700
Subject: driver core: Don't let a device probe until it's ready

The moment we link a "struct device" into the list of devices for the
bus, it's possible probe can happen. This is because another thread
can load the driver at any time and that can cause the device to
probe. This has been seen in practice with a stack crawl that looks
like this [1]:

  really_probe()
  __driver_probe_device()
  driver_probe_device()
  __driver_attach()
  bus_for_each_dev()
  driver_attach()
  bus_add_driver()
  driver_register()
  __platform_driver_register()
  init_module() [some module]
  do_one_initcall()
  do_init_module()
  load_module()
  __arm64_sys_finit_module()
  invoke_syscall()

As a result of the above, it was seen that device_links_driver_bound()
could be called for the device before "dev->fwnode->dev" was
assigned. This prevented __fw_devlink_pickup_dangling_consumers() from
being called which meant that other devices waiting on our driver's
sub-nodes were stuck deferring forever.

It's believed that this problem is showing up suddenly for two
reasons:
1. Android has recently (last ~1 year) implemented an optimization to
   the order it loads modules [2]. When devices opt-in to this faster
   loading, modules are loaded one-after-the-other very quickly. This
   is unlike how other distributions do it. The reproduction of this
   problem has only been seen on devices that opt-in to Android's
   "parallel module loading".
2. Android devices typically opt-in to fw_devlink, and the most
   noticeable issue is the NULL "dev->fwnode->dev" in
   device_links_driver_bound(). fw_devlink is somewhat new code and
   also not in use by all Linux devices.

Even though the specific symptom where "dev->fwnode->dev" wasn't
assigned could be fixed by moving that assignment higher in
device_add(), other parts of device_add() (like the call to
device_pm_add()) are also important to run before probe. Only moving
the "dev->fwnode->dev" assignment would likely fix the current
symptoms but lead to difficult-to-debug problems in the future.

Fix the problem by preventing probe until device_add() has run far
enough that the device is ready to probe. If somehow we end up trying
to probe before we're allowed, __driver_probe_device() will return
-EPROBE_DEFER which will make certain the device is noticed.

In the race condition that was seen with Android's faster module
loading, we will temporarily add the device to the deferred list and
then take it off immediately when device_add() probes the device.

Instead of adding another flag to the bitfields already in "struct
device", instead add a new "flags" field and use that. This allows us
to freely change the bit from different thread without worrying about
corrupting nearby bits (and means threads changing other bit won't
corrupt us).

[1] Captured on a machine running a downstream 6.6 kernel
[2] https://cs.android.com/android/platform/superproject/main/+/main:system/core/libmodprobe/libmodprobe.cpp?q=LoadModulesParallel

Cc: stable@vger.kernel.org
Fixes: 2023c610dc54 ("Driver core: add new device to bus's list before probing")
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
Reviewed-by: Rafael J. Wysocki (Intel) <rafael@kernel.org>
Reviewed-by: Danilo Krummrich <dakr@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patch.msgid.link/20260406162231.v5.1.Id750b0fbcc94f23ed04b7aecabcead688d0d8c17@changeid
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 drivers/base/core.c    | 15 +++++++++++++++
 drivers/base/dd.c      | 20 ++++++++++++++++++++
 include/linux/device.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 79 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 09b98f02f559..984d6bfbd6e4 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -3688,6 +3688,21 @@ int device_add(struct device *dev)
 		fw_devlink_link_device(dev);
 	}
 
+	/*
+	 * The moment the device was linked into the bus's "klist_devices" in
+	 * bus_add_device() then it's possible that probe could have been
+	 * attempted in a different thread via userspace loading a driver
+	 * matching the device. "ready_to_probe" being unset would have
+	 * blocked those attempts. Now that all of the above initialization has
+	 * happened, unblock probe. If probe happens through another thread
+	 * after this point but before bus_probe_device() runs then it's fine.
+	 * bus_probe_device() -> device_initial_probe() -> __device_attach()
+	 * will notice (under device_lock) that the device is already bound.
+	 */
+	device_lock(dev);
+	dev_set_ready_to_probe(dev);
+	device_unlock(dev);
+
 	bus_probe_device(dev);
 
 	/*
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 37c7e54e0e4c..ec7ef9c5d62e 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -848,6 +848,26 @@ static int __driver_probe_device(const struct device_driver *drv, struct device
 	if (dev->driver)
 		return -EBUSY;
 
+	/*
+	 * In device_add(), the "struct device" gets linked into the subsystem's
+	 * list of devices and broadcast to userspace (via uevent) before we're
+	 * quite ready to probe. Those open pathways to driver probe before
+	 * we've finished enough of device_add() to reliably support probe.
+	 * Detect this and tell other pathways to try again later. device_add()
+	 * itself will also try to probe immediately after setting
+	 * "ready_to_probe".
+	 */
+	if (!dev_ready_to_probe(dev))
+		return dev_err_probe(dev, -EPROBE_DEFER, "Device not ready to probe\n");
+
+	/*
+	 * Set can_match = true after calling dev_ready_to_probe(), so
+	 * driver_deferred_probe_add() won't actually add the device to the
+	 * deferred probe list when dev_ready_to_probe() returns false.
+	 *
+	 * When dev_ready_to_probe() returns false, it means that device_add()
+	 * will do another probe() attempt for us.
+	 */
 	dev->can_match = true;
 	dev_dbg(dev, "bus: '%s': %s: matched device with driver %s\n",
 		drv->bus->name, __func__, drv->name);
diff --git a/include/linux/device.h b/include/linux/device.h
index e65d564f01cd..f27ed6eb87a9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -458,6 +458,21 @@ struct device_physical_location {
 	bool lid;
 };
 
+/**
+ * enum struct_device_flags - Flags in struct device
+ *
+ * Each flag should have a set of accessor functions created via
+ * __create_dev_flag_accessors() for each access.
+ *
+ * @DEV_FLAG_READY_TO_PROBE: If set then device_add() has finished enough
+ *		initialization that probe could be called.
+ */
+enum struct_device_flags {
+	DEV_FLAG_READY_TO_PROBE = 0,
+
+	DEV_FLAG_COUNT
+};
+
 /**
  * struct device - The basic device structure
  * @parent:	The device's "parent" device, the device to which it is attached.
@@ -553,6 +568,7 @@ struct device_physical_location {
  * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers.
  * @dma_iommu: Device is using default IOMMU implementation for DMA and
  *		doesn't rely on dma_ops structure.
+ * @flags:	DEV_FLAG_XXX flags. Use atomic bitfield operations to modify.
  *
  * At the lowest level, every device in a Linux system is represented by an
  * instance of struct device. The device structure contains the information
@@ -675,8 +691,36 @@ struct device {
 #ifdef CONFIG_IOMMU_DMA
 	bool			dma_iommu:1;
 #endif
+
+	DECLARE_BITMAP(flags, DEV_FLAG_COUNT);
 };
 
+#define __create_dev_flag_accessors(accessor_name, flag_name) \
+static inline bool dev_##accessor_name(const struct device *dev) \
+{ \
+	return test_bit(flag_name, dev->flags); \
+} \
+static inline void dev_set_##accessor_name(struct device *dev) \
+{ \
+	set_bit(flag_name, dev->flags); \
+} \
+static inline void dev_clear_##accessor_name(struct device *dev) \
+{ \
+	clear_bit(flag_name, dev->flags); \
+} \
+static inline void dev_assign_##accessor_name(struct device *dev, bool value) \
+{ \
+	assign_bit(flag_name, dev->flags, value); \
+} \
+static inline bool dev_test_and_set_##accessor_name(struct device *dev) \
+{ \
+	return test_and_set_bit(flag_name, dev->flags); \
+}
+
+__create_dev_flag_accessors(ready_to_probe, DEV_FLAG_READY_TO_PROBE);
+
+#undef __create_dev_flag_accessors
+
 /**
  * struct device_link - Device link representation.
  * @supplier: The device on the supplier end of the link.
-- 
cgit v1.2.3


From 06c42142cf8aaeba3fa3c4336717b87ca4eebf8a Mon Sep 17 00:00:00 2001
From: Chenghai Huang <huangchenghai2@huawei.com>
Date: Mon, 30 Mar 2026 14:25:31 +0800
Subject: crypto: hisilicon - remove unused and non-public APIs for qm and sec

- sec_register_to_crypto() and sec_unregister_from_crypto()
have been removed, the function declarations have not been
removed. Remove them.
- hisi_qm_start_qp and hisi_qm_stop_qp are called internally by the
QM. Therefore, the EXPORT_SYMBOL_GPL declaration of these
non-public interfaces is deleted.

Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c       | 8 ++++----
 drivers/crypto/hisilicon/sec2/sec.h | 2 --
 include/linux/hisi_acc_qm.h         | 2 --
 3 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 2bb51d4d88a6..3ca47e2a9719 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -472,6 +472,8 @@ static struct qm_typical_qos_table shaper_cbs_s[] = {
 
 static void qm_irqs_unregister(struct hisi_qm *qm);
 static int qm_reset_device(struct hisi_qm *qm);
+static void hisi_qm_stop_qp(struct hisi_qp *qp);
+
 int hisi_qm_q_num_set(const char *val, const struct kernel_param *kp,
 		      unsigned int device)
 {
@@ -2262,7 +2264,7 @@ static int qm_start_qp_nolock(struct hisi_qp *qp, unsigned long arg)
  * After this function, qp can receive request from user. Return 0 if
  * successful, negative error code if failed.
  */
-int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg)
+static int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg)
 {
 	struct hisi_qm *qm = qp->qm;
 	int ret;
@@ -2273,7 +2275,6 @@ int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg)
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(hisi_qm_start_qp);
 
 /**
  * qp_stop_fail_cb() - call request cb.
@@ -2418,13 +2419,12 @@ static void qm_stop_qp_nolock(struct hisi_qp *qp)
  *
  * This function is reverse of hisi_qm_start_qp.
  */
-void hisi_qm_stop_qp(struct hisi_qp *qp)
+static void hisi_qm_stop_qp(struct hisi_qp *qp)
 {
 	down_write(&qp->qm->qps_lock);
 	qm_stop_qp_nolock(qp);
 	up_write(&qp->qm->qps_lock);
 }
-EXPORT_SYMBOL_GPL(hisi_qm_stop_qp);
 
 /**
  * hisi_qp_send() - Queue up a task in the hardware queue.
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index 0710977861f3..adf95795dffe 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -285,7 +285,5 @@ enum sec_cap_table_type {
 
 void sec_destroy_qps(struct hisi_qp **qps, int qp_num);
 struct hisi_qp **sec_create_qps(void);
-int sec_register_to_crypto(struct hisi_qm *qm);
-void sec_unregister_from_crypto(struct hisi_qm *qm);
 u64 sec_get_alg_bitmap(struct hisi_qm *qm, u32 high, u32 low);
 #endif
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 8a581b5bbbcd..a6268dc4f7cb 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -558,8 +558,6 @@ int hisi_qm_init(struct hisi_qm *qm);
 void hisi_qm_uninit(struct hisi_qm *qm);
 int hisi_qm_start(struct hisi_qm *qm);
 int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r);
-int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg);
-void hisi_qm_stop_qp(struct hisi_qp *qp);
 int hisi_qp_send(struct hisi_qp *qp, const void *msg);
 void hisi_qm_debug_init(struct hisi_qm *qm);
 void hisi_qm_debug_regs_clear(struct hisi_qm *qm);
-- 
cgit v1.2.3


From 82db77f6fb16d23ea60d0f96dcf2b502a322a28f Mon Sep 17 00:00:00 2001
From: Joe Damato <joe@dama.to>
Date: Wed, 8 Apr 2026 16:05:50 -0700
Subject: net: tso: Introduce tso_dma_map and helpers

Add struct tso_dma_map to tso.h for tracking DMA addresses of mapped
GSO payload data and tso_dma_map_completion_state.

The tso_dma_map combines DMA mapping storage with iterator state, allowing
drivers to walk pre-mapped DMA regions linearly. Includes fields for
the DMA IOVA path (iova_state, iova_offset, total_len) and a fallback
per-region path (linear_dma, frags[], frag_idx, offset).

The tso_dma_map_completion_state makes the IOVA completion state opaque
for drivers. Drivers are expected to allocate this and use the added
helpers to update the completion state.

Adds skb_frag_phys() to skbuff.h, returning the physical address
of a paged fragment's data, which is used by the tso_dma_map helpers
introduced in this commit described below.

The added TSO DMA map helpers are:

tso_dma_map_init(): DMA-maps the linear payload region and all frags
upfront. Prefers the DMA IOVA API for a single contiguous mapping with
one IOTLB sync; falls back to per-region dma_map_phys() otherwise.
Returns 0 on success, cleans up partial mappings on failure.

tso_dma_map_cleanup(): Handles both IOVA and fallback teardown paths.

tso_dma_map_count(): counts how many descriptors the next N bytes of
payload will need. Returns 1 if IOVA is used since the mapping is
contiguous.

tso_dma_map_next(): yields the next (dma_addr, chunk_len) pair.
On the IOVA path, each segment is a single contiguous chunk. On the
fallback path, indicates when a chunk starts a new DMA mapping so the
driver can set dma_unmap_len on that descriptor for completion-time
unmapping.

tso_dma_map_completion_save(): updates the completion state. Drivers
will call this at xmit time.

tso_dma_map_complete(): tears down the mapping at completion time and
returns true if the IOVA path was used. If it was not used, this is a
no-op and returns false.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Joe Damato <joe@dama.to>
Link: https://patch.msgid.link/20260408230607.2019402-2-joe@dama.to
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h |  11 ++
 include/net/tso.h      | 100 ++++++++++++++++++
 net/core/tso.c         | 269 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 380 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 26fe18bcfad8..2bcf78a4de7b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3763,6 +3763,17 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
 	return ptr + skb_frag_off(frag);
 }
 
+/**
+ * skb_frag_phys - gets the physical address of the data in a paged fragment
+ * @frag: the paged fragment buffer
+ *
+ * Returns: the physical address of the data within @frag.
+ */
+static inline phys_addr_t skb_frag_phys(const skb_frag_t *frag)
+{
+	return page_to_phys(skb_frag_page(frag)) + skb_frag_off(frag);
+}
+
 /**
  * skb_frag_page_copy() - sets the page in a fragment from another fragment
  * @fragto: skb fragment where page is set
diff --git a/include/net/tso.h b/include/net/tso.h
index e7e157ae0526..da82aabd1d48 100644
--- a/include/net/tso.h
+++ b/include/net/tso.h
@@ -3,6 +3,7 @@
 #define _TSO_H
 
 #include <linux/skbuff.h>
+#include <linux/dma-mapping.h>
 #include <net/ip.h>
 
 #define TSO_HEADER_SIZE		256
@@ -28,4 +29,103 @@ void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso,
 void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size);
 int tso_start(struct sk_buff *skb, struct tso_t *tso);
 
+/**
+ * struct tso_dma_map - DMA mapping state for GSO payload
+ * @dev: device used for DMA mapping
+ * @skb: the GSO skb being mapped
+ * @hdr_len: per-segment header length
+ * @iova_state: DMA IOVA state (when IOMMU available)
+ * @iova_offset: global byte offset into IOVA range (IOVA path only)
+ * @total_len: total payload length
+ * @frag_idx: current region (-1 = linear, 0..nr_frags-1 = frag)
+ * @offset: byte offset within current region
+ * @linear_dma: DMA address of the linear payload
+ * @linear_len: length of the linear payload
+ * @nr_frags: number of frags successfully DMA-mapped
+ * @frags: per-frag DMA address and length
+ *
+ * DMA-maps the payload regions of a GSO skb (linear data + frags).
+ * Prefers the DMA IOVA API for a single contiguous mapping with one
+ * IOTLB sync; falls back to per-region dma_map_phys() otherwise.
+ */
+struct tso_dma_map {
+	struct device		*dev;
+	const struct sk_buff	*skb;
+	unsigned int		hdr_len;
+	/* IOVA path */
+	struct dma_iova_state	iova_state;
+	size_t			iova_offset;
+	size_t			total_len;
+	/* Fallback path if IOVA path fails */
+	int			frag_idx;
+	unsigned int		offset;
+	dma_addr_t		linear_dma;
+	unsigned int		linear_len;
+	unsigned int		nr_frags;
+	struct {
+		dma_addr_t	dma;
+		unsigned int	len;
+	} frags[MAX_SKB_FRAGS];
+};
+
+/**
+ * struct tso_dma_map_completion_state - Completion-time cleanup state
+ * @iova_state: DMA IOVA state (when IOMMU available)
+ * @total_len: total payload length of the IOVA mapping
+ *
+ * Drivers store this on their SW ring at xmit time via
+ * tso_dma_map_completion_save(), then call tso_dma_map_complete() at
+ * completion time.
+ */
+struct tso_dma_map_completion_state {
+	struct dma_iova_state iova_state;
+	size_t total_len;
+};
+
+int tso_dma_map_init(struct tso_dma_map *map, struct device *dev,
+		     const struct sk_buff *skb, unsigned int hdr_len);
+void tso_dma_map_cleanup(struct tso_dma_map *map);
+unsigned int tso_dma_map_count(struct tso_dma_map *map, unsigned int len);
+bool tso_dma_map_next(struct tso_dma_map *map, dma_addr_t *addr,
+		      unsigned int *chunk_len, unsigned int *mapping_len,
+		      unsigned int seg_remaining);
+
+/**
+ * tso_dma_map_completion_save - save state needed for completion-time cleanup
+ * @map: the xmit-time DMA map
+ * @cstate: driver-owned storage that persists until completion
+ *
+ * Should be called at xmit time to update the completion state and later passed
+ * to tso_dma_map_complete().
+ */
+static inline void
+tso_dma_map_completion_save(const struct tso_dma_map *map,
+			    struct tso_dma_map_completion_state *cstate)
+{
+	cstate->iova_state = map->iova_state;
+	cstate->total_len = map->total_len;
+}
+
+/**
+ * tso_dma_map_complete - tear down mapping at completion time
+ * @dev: the device that owns the mapping
+ * @cstate: state saved by tso_dma_map_completion_save()
+ *
+ * Return: true if the IOVA path was used and the mapping has been
+ * destroyed; false if the fallback per-region path was used and the
+ * driver must unmap via its normal completion path.
+ */
+static inline bool
+tso_dma_map_complete(struct device *dev,
+		     struct tso_dma_map_completion_state *cstate)
+{
+	if (dma_use_iova(&cstate->iova_state)) {
+		dma_iova_destroy(dev, &cstate->iova_state, cstate->total_len,
+				 DMA_TO_DEVICE, 0);
+		return true;
+	}
+
+	return false;
+}
+
 #endif	/* _TSO_H */
diff --git a/net/core/tso.c b/net/core/tso.c
index 6df997b9076e..347b3856ddb9 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -3,6 +3,7 @@
 #include <linux/if_vlan.h>
 #include <net/ip.h>
 #include <net/tso.h>
+#include <linux/dma-mapping.h>
 #include <linux/unaligned.h>
 
 void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso,
@@ -87,3 +88,271 @@ int tso_start(struct sk_buff *skb, struct tso_t *tso)
 	return hdr_len;
 }
 EXPORT_SYMBOL(tso_start);
+
+static int tso_dma_iova_try(struct device *dev, struct tso_dma_map *map,
+			    phys_addr_t phys, size_t linear_len,
+			    size_t total_len, size_t *offset)
+{
+	const struct sk_buff *skb;
+	unsigned int nr_frags;
+	int i;
+
+	if (!dma_iova_try_alloc(dev, &map->iova_state, phys, total_len))
+		return 1;
+
+	skb = map->skb;
+	nr_frags = skb_shinfo(skb)->nr_frags;
+
+	if (linear_len) {
+		if (dma_iova_link(dev, &map->iova_state,
+				  phys, *offset, linear_len,
+				  DMA_TO_DEVICE, 0))
+			goto iova_fail;
+		map->linear_len = linear_len;
+		*offset += linear_len;
+	}
+
+	for (i = 0; i < nr_frags; i++) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+		unsigned int frag_len = skb_frag_size(frag);
+
+		if (dma_iova_link(dev, &map->iova_state,
+				  skb_frag_phys(frag), *offset,
+				  frag_len, DMA_TO_DEVICE, 0)) {
+			map->nr_frags = i;
+			goto iova_fail;
+		}
+		map->frags[i].len = frag_len;
+		*offset += frag_len;
+		map->nr_frags = i + 1;
+	}
+
+	if (dma_iova_sync(dev, &map->iova_state, 0, total_len))
+		goto iova_fail;
+
+	return 0;
+
+iova_fail:
+	dma_iova_destroy(dev, &map->iova_state, *offset,
+			 DMA_TO_DEVICE, 0);
+	memset(&map->iova_state, 0, sizeof(map->iova_state));
+
+	/* reset map state */
+	map->frag_idx = -1;
+	map->offset = 0;
+	map->linear_len = 0;
+	map->nr_frags = 0;
+
+	return 1;
+}
+
+/**
+ * tso_dma_map_init - DMA-map GSO payload regions
+ * @map: map struct to initialize
+ * @dev: device for DMA mapping
+ * @skb: the GSO skb
+ * @hdr_len: per-segment header length in bytes
+ *
+ * DMA-maps the linear payload (after headers) and all frags.
+ * Prefers the DMA IOVA API (one contiguous mapping, one IOTLB sync);
+ * falls back to per-region dma_map_phys() when IOVA is not available.
+ * Positions the iterator at byte 0 of the payload.
+ *
+ * Return: 0 on success, -ENOMEM on DMA mapping failure (partial mappings
+ * are cleaned up internally).
+ */
+int tso_dma_map_init(struct tso_dma_map *map, struct device *dev,
+		     const struct sk_buff *skb, unsigned int hdr_len)
+{
+	unsigned int linear_len = skb_headlen(skb) - hdr_len;
+	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+	size_t total_len = skb->len - hdr_len;
+	size_t offset = 0;
+	phys_addr_t phys;
+	int i;
+
+	map->dev = dev;
+	map->skb = skb;
+	map->hdr_len = hdr_len;
+	map->frag_idx = -1;
+	map->offset = 0;
+	map->iova_offset = 0;
+	map->total_len = total_len;
+	map->linear_len = 0;
+	map->nr_frags = 0;
+	memset(&map->iova_state, 0, sizeof(map->iova_state));
+
+	if (!total_len)
+		return 0;
+
+	if (linear_len)
+		phys = virt_to_phys(skb->data + hdr_len);
+	else
+		phys = skb_frag_phys(&skb_shinfo(skb)->frags[0]);
+
+	if (tso_dma_iova_try(dev, map, phys, linear_len, total_len, &offset)) {
+		/* IOVA path failed, map state was reset. Fallback to
+		 * per-region dma_map_phys()
+		 */
+		if (linear_len) {
+			map->linear_dma = dma_map_phys(dev, phys, linear_len,
+						       DMA_TO_DEVICE, 0);
+			if (dma_mapping_error(dev, map->linear_dma))
+				return -ENOMEM;
+			map->linear_len = linear_len;
+		}
+
+		for (i = 0; i < nr_frags; i++) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			unsigned int frag_len = skb_frag_size(frag);
+
+			map->frags[i].len = frag_len;
+			map->frags[i].dma = dma_map_phys(dev, skb_frag_phys(frag),
+							 frag_len, DMA_TO_DEVICE, 0);
+			if (dma_mapping_error(dev, map->frags[i].dma)) {
+				tso_dma_map_cleanup(map);
+				return -ENOMEM;
+			}
+			map->nr_frags = i + 1;
+		}
+	}
+
+	if (linear_len == 0 && nr_frags > 0)
+		map->frag_idx = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL(tso_dma_map_init);
+
+/**
+ * tso_dma_map_cleanup - unmap all DMA regions in a tso_dma_map
+ * @map: the map to clean up
+ *
+ * Handles both IOVA and fallback paths. For IOVA, calls
+ * dma_iova_destroy(). For fallback, unmaps each region individually.
+ */
+void tso_dma_map_cleanup(struct tso_dma_map *map)
+{
+	int i;
+
+	if (dma_use_iova(&map->iova_state)) {
+		dma_iova_destroy(map->dev, &map->iova_state, map->total_len,
+				 DMA_TO_DEVICE, 0);
+		memset(&map->iova_state, 0, sizeof(map->iova_state));
+	} else {
+		if (map->linear_len)
+			dma_unmap_phys(map->dev, map->linear_dma,
+				       map->linear_len, DMA_TO_DEVICE, 0);
+
+		for (i = 0; i < map->nr_frags; i++)
+			dma_unmap_phys(map->dev, map->frags[i].dma,
+				       map->frags[i].len, DMA_TO_DEVICE, 0);
+	}
+
+	map->linear_len = 0;
+	map->nr_frags = 0;
+}
+EXPORT_SYMBOL(tso_dma_map_cleanup);
+
+/**
+ * tso_dma_map_count - count descriptors for a payload range
+ * @map: the payload map
+ * @len: number of payload bytes in this segment
+ *
+ * Counts how many contiguous DMA region chunks the next @len bytes
+ * will span, without advancing the iterator. On the IOVA path this
+ * is always 1 (contiguous). On the fallback path, uses region sizes
+ * from the current position.
+ *
+ * Return: the number of descriptors needed for @len bytes of payload.
+ */
+unsigned int tso_dma_map_count(struct tso_dma_map *map, unsigned int len)
+{
+	unsigned int offset = map->offset;
+	int idx = map->frag_idx;
+	unsigned int count = 0;
+
+	if (!len)
+		return 0;
+
+	if (dma_use_iova(&map->iova_state))
+		return 1;
+
+	while (len > 0) {
+		unsigned int region_len, chunk;
+
+		if (idx == -1)
+			region_len = map->linear_len;
+		else
+			region_len = map->frags[idx].len;
+
+		chunk = min(len, region_len - offset);
+		len -= chunk;
+		count++;
+		offset = 0;
+		idx++;
+	}
+
+	return count;
+}
+EXPORT_SYMBOL(tso_dma_map_count);
+
+/**
+ * tso_dma_map_next - yield the next DMA address range
+ * @map: the payload map
+ * @addr: output DMA address
+ * @chunk_len: output chunk length
+ * @mapping_len: full DMA mapping length when this chunk starts a new
+ *               mapping region, or 0 when continuing a previous one.
+ *               On the IOVA path this is always 0 (driver must not
+ *               do per-region unmaps; use tso_dma_map_cleanup instead).
+ * @seg_remaining: bytes left in current segment
+ *
+ * Yields the next (dma_addr, chunk_len) pair and advances the iterator.
+ * On the IOVA path, the entire payload is contiguous so each segment
+ * is always a single chunk.
+ *
+ * Return: true if a chunk was yielded, false when @seg_remaining is 0.
+ */
+bool tso_dma_map_next(struct tso_dma_map *map, dma_addr_t *addr,
+		      unsigned int *chunk_len, unsigned int *mapping_len,
+		      unsigned int seg_remaining)
+{
+	unsigned int region_len, chunk;
+
+	if (!seg_remaining)
+		return false;
+
+	/* IOVA path: contiguous DMA range, no region boundaries */
+	if (dma_use_iova(&map->iova_state)) {
+		*addr = map->iova_state.addr + map->iova_offset;
+		*chunk_len = seg_remaining;
+		*mapping_len = 0;
+		map->iova_offset += seg_remaining;
+		return true;
+	}
+
+	/* Fallback path: per-region iteration */
+
+	if (map->frag_idx == -1) {
+		region_len = map->linear_len;
+		chunk = min(seg_remaining, region_len - map->offset);
+		*addr = map->linear_dma + map->offset;
+	} else {
+		region_len = map->frags[map->frag_idx].len;
+		chunk = min(seg_remaining, region_len - map->offset);
+		*addr = map->frags[map->frag_idx].dma + map->offset;
+	}
+
+	*mapping_len = (map->offset == 0) ? region_len : 0;
+	*chunk_len = chunk;
+	map->offset += chunk;
+
+	if (map->offset >= region_len) {
+		map->frag_idx++;
+		map->offset = 0;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(tso_dma_map_next);
-- 
cgit v1.2.3


From 7ef629b458018ed01dcab6cbdc644ef26b0d0d83 Mon Sep 17 00:00:00 2001
From: Nicolai Buchwitz <nb@tipi-net.de>
Date: Mon, 6 Apr 2026 09:13:07 +0200
Subject: net: phy: add support for disabling PHY-autonomous EEE

Some PHYs (e.g. Broadcom BCM54xx, Realtek RTL8211F) implement
autonomous EEE where the PHY manages LPI signaling without forwarding
it to the MAC. This conflicts with MAC drivers that implement their own
LPI control.

Add a .disable_autonomous_eee callback to struct phy_driver and call it
from phy_support_eee(). When a MAC driver indicates it supports EEE via
phy_support_eee(), the PHY's autonomous EEE is automatically disabled so
the MAC can manage LPI entry/exit.

Signed-off-by: Nicolai Buchwitz <nb@tipi-net.de>
Link: https://patch.msgid.link/20260406-devel-autonomous-eee-v1-1-b335e7143711@tipi-net.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/phy_device.c | 22 ++++++++++++++++++++++
 include/linux/phy.h          | 14 ++++++++++++++
 2 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 0edff47478c2..cda4abf4e68c 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1375,6 +1375,14 @@ int phy_init_hw(struct phy_device *phydev)
 			return ret;
 	}
 
+	/* Re-apply autonomous EEE disable after soft reset */
+	if (phydev->autonomous_eee_disabled &&
+	    phydev->drv->disable_autonomous_eee) {
+		ret = phydev->drv->disable_autonomous_eee(phydev);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL(phy_init_hw);
@@ -2898,6 +2906,20 @@ void phy_support_eee(struct phy_device *phydev)
 	linkmode_copy(phydev->advertising_eee, phydev->supported_eee);
 	phydev->eee_cfg.tx_lpi_enabled = true;
 	phydev->eee_cfg.eee_enabled = true;
+
+	/* If the PHY supports autonomous EEE, disable it so the MAC can
+	 * manage LPI signaling instead. The flag is stored so it can be
+	 * re-applied after a PHY soft reset (e.g. suspend/resume).
+	 */
+	if (phydev->drv && phydev->drv->disable_autonomous_eee) {
+		int ret = phydev->drv->disable_autonomous_eee(phydev);
+
+		if (ret)
+			phydev_warn(phydev, "Failed to disable autonomous EEE: %pe\n",
+				    ERR_PTR(ret));
+		else
+			phydev->autonomous_eee_disabled = true;
+	}
 }
 EXPORT_SYMBOL(phy_support_eee);
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 5de4b172cd0b..199a7aaa341b 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -612,6 +612,8 @@ struct phy_oatc14_sqi_capability {
  * @advertising_eee: Currently advertised EEE linkmodes
  * @enable_tx_lpi: When True, MAC should transmit LPI to PHY
  * @eee_active: phylib private state, indicating that EEE has been negotiated
+ * @autonomous_eee_disabled: Set when autonomous EEE has been disabled,
+ *	used to re-apply after PHY soft reset
  * @eee_cfg: User configuration of EEE
  * @lp_advertising: Current link partner advertised linkmodes
  * @host_interfaces: PHY interface modes supported by host
@@ -739,6 +741,7 @@ struct phy_device {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(eee_disabled_modes);
 	bool enable_tx_lpi;
 	bool eee_active;
+	bool autonomous_eee_disabled;
 	struct eee_config eee_cfg;
 
 	/* Host supported PHY interface types. Should be ignored if empty. */
@@ -1359,6 +1362,17 @@ struct phy_driver {
 	void (*get_stats)(struct phy_device *dev,
 			  struct ethtool_stats *stats, u64 *data);
 
+	/**
+	 * @disable_autonomous_eee: Disable PHY-autonomous EEE
+	 *
+	 * Some PHYs manage EEE autonomously, preventing the MAC from
+	 * controlling LPI signaling. This callback disables autonomous
+	 * EEE at the PHY.
+	 *
+	 * Return: 0 on success, negative errno on failure.
+	 */
+	int (*disable_autonomous_eee)(struct phy_device *dev);
+
 	/* Get and Set PHY tunables */
 	/** @get_tunable: Return the value of a tunable */
 	int (*get_tunable)(struct phy_device *dev,
-- 
cgit v1.2.3


From bcb3e89fc0ecbe7a2b7ce614b72deda39083ac74 Mon Sep 17 00:00:00 2001
From: Nicolai Buchwitz <nb@tipi-net.de>
Date: Mon, 6 Apr 2026 09:13:08 +0200
Subject: net: phy: broadcom: implement .disable_autonomous_eee for BCM54xx

Implement the .disable_autonomous_eee callback for the BCM54210E.

In AutogrEEEn mode the PHY manages EEE autonomously. Clearing the
AutogrEEEn enable bit in MII_BUF_CNTL_0 switches the PHY to Native
EEE mode.

Signed-off-by: Nicolai Buchwitz <nb@tipi-net.de>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/20260406-devel-autonomous-eee-v1-2-b335e7143711@tipi-net.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/broadcom.c | 7 +++++++
 include/linux/brcmphy.h    | 3 +++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index cb306f9e80cc..bf0c6a04481e 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -1452,6 +1452,12 @@ static int bcm54811_read_status(struct phy_device *phydev)
 	return genphy_read_status(phydev);
 }
 
+static int bcm54xx_disable_autonomous_eee(struct phy_device *phydev)
+{
+	return bcm_phy_modify_exp(phydev, BCM54XX_TOP_MISC_MII_BUF_CNTL0,
+				  BCM54XX_MII_BUF_CNTL0_AUTOGREEEN_EN, 0);
+}
+
 static struct phy_driver broadcom_drivers[] = {
 {
 	PHY_ID_MATCH_MODEL(PHY_ID_BCM5411),
@@ -1495,6 +1501,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.get_wol	= bcm54xx_phy_get_wol,
 	.set_wol	= bcm54xx_phy_set_wol,
 	.led_brightness_set	= bcm_phy_led_brightness_set,
+	.disable_autonomous_eee	= bcm54xx_disable_autonomous_eee,
 }, {
 	PHY_ID_MATCH_MODEL(PHY_ID_BCM5461),
 	.name		= "Broadcom BCM5461",
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 115a964f3006..174687c4c80a 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -266,6 +266,9 @@
 #define BCM54XX_TOP_MISC_IDDQ_SD		(1 << 2)
 #define BCM54XX_TOP_MISC_IDDQ_SR		(1 << 3)
 
+#define BCM54XX_TOP_MISC_MII_BUF_CNTL0		(MII_BCM54XX_EXP_SEL_TOP + 0x00)
+#define  BCM54XX_MII_BUF_CNTL0_AUTOGREEEN_EN	BIT(0)
+
 #define BCM54XX_TOP_MISC_LED_CTL		(MII_BCM54XX_EXP_SEL_TOP + 0x0C)
 #define  BCM54XX_LED4_SEL_INTR			BIT(1)
 
-- 
cgit v1.2.3


From 449f08fa59dda5da40317b6976604b877c4ecd63 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Sun, 12 Apr 2026 08:29:30 -0700
Subject: bpf: Move fixup/post-processing logic from verifier.c into fixups.c

verifier.c is huge. Split fixup/post-processing logic that runs after
the verifier accepted the program into fixups.c.

Mechanical move. No functional changes.

Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/20260412152936.54262-2-alexei.starovoitov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |   78 ++
 kernel/bpf/Makefile          |    1 +
 kernel/bpf/fixups.c          | 2457 +++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c        | 2955 ++++--------------------------------------
 4 files changed, 2766 insertions(+), 2725 deletions(-)
 create mode 100644 kernel/bpf/fixups.c

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 05b9fe98b8f8..4380ecad485b 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -1205,4 +1205,82 @@ void bpf_stack_liveness_free(struct bpf_verifier_env *env);
 int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st);
 bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi);
 
+#define BPF_MAP_KEY_POISON	(1ULL << 63)
+#define BPF_MAP_KEY_SEEN	(1ULL << 62)
+
+static inline bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
+{
+	return aux->map_ptr_state.poison;
+}
+
+static inline bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
+{
+	return aux->map_ptr_state.unpriv;
+}
+
+static inline bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
+{
+	return aux->map_key_state & BPF_MAP_KEY_POISON;
+}
+
+static inline bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
+{
+	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
+}
+
+static inline u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
+{
+	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
+}
+
+#define MAX_PACKET_OFF 0xffff
+
+enum bpf_reg_arg_type {
+	SRC_OP,		/* register is used as source operand */
+	DST_OP,		/* register is used as destination operand */
+	DST_OP_NO_MARK	/* same as above, check only, don't mark */
+};
+
+#define MAX_KFUNC_DESCS 256
+
+struct bpf_kfunc_desc {
+	struct btf_func_model func_model;
+	u32 func_id;
+	s32 imm;
+	u16 offset;
+	unsigned long addr;
+};
+
+struct bpf_kfunc_desc_tab {
+	/* Sorted by func_id (BTF ID) and offset (fd_array offset) during
+	 * verification. JITs do lookups by bpf_insn, where func_id may not be
+	 * available, therefore at the end of verification do_misc_fixups()
+	 * sorts this by imm and offset.
+	 */
+	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
+	u32 nr_descs;
+};
+
+/* Functions exported from verifier.c, used by fixups.c */
+bool bpf_is_reg64(struct bpf_insn *insn, u32 regno, struct bpf_reg_state *reg, enum bpf_reg_arg_type t);
+void bpf_clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len);
+void bpf_mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog);
+bool bpf_allow_tail_call_in_subprogs(struct bpf_verifier_env *env);
+bool bpf_verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm);
+int bpf_add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, u16 offset);
+int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+			 struct bpf_insn *insn_buf, int insn_idx, int *cnt);
+
+/* Functions in fixups.c, called from bpf_check() */
+int bpf_remove_fastcall_spills_fills(struct bpf_verifier_env *env);
+int bpf_optimize_bpf_loop(struct bpf_verifier_env *env);
+void bpf_opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env);
+int bpf_opt_remove_dead_code(struct bpf_verifier_env *env);
+int bpf_opt_remove_nops(struct bpf_verifier_env *env);
+int bpf_opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, const union bpf_attr *attr);
+int bpf_convert_ctx_accesses(struct bpf_verifier_env *env);
+int bpf_jit_subprogs(struct bpf_verifier_env *env);
+int bpf_fixup_call_args(struct bpf_verifier_env *env);
+int bpf_do_misc_fixups(struct bpf_verifier_env *env);
+
 #endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index b8ae7b0988a4..7c1eeee87fda 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_insn_array.o
 obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
+obj-$(CONFIG_BPF_SYSCALL) += fixups.o
 obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o
 obj-$(CONFIG_BPF_JIT) += trampoline.o
diff --git a/kernel/bpf/fixups.c b/kernel/bpf/fixups.c
new file mode 100644
index 000000000000..67c9b28767e1
--- /dev/null
+++ b/kernel/bpf/fixups.c
@@ -0,0 +1,2457 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/filter.h>
+#include <linux/vmalloc.h>
+#include <linux/bsearch.h>
+#include <linux/sort.h>
+#include <linux/perf_event.h>
+#include <net/xdp.h>
+#include "disasm.h"
+
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
+static bool is_cmpxchg_insn(const struct bpf_insn *insn)
+{
+	return BPF_CLASS(insn->code) == BPF_STX &&
+	       BPF_MODE(insn->code) == BPF_ATOMIC &&
+	       insn->imm == BPF_CMPXCHG;
+}
+
+/* Return the regno defined by the insn, or -1. */
+static int insn_def_regno(const struct bpf_insn *insn)
+{
+	switch (BPF_CLASS(insn->code)) {
+	case BPF_JMP:
+	case BPF_JMP32:
+	case BPF_ST:
+		return -1;
+	case BPF_STX:
+		if (BPF_MODE(insn->code) == BPF_ATOMIC ||
+		    BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
+			if (insn->imm == BPF_CMPXCHG)
+				return BPF_REG_0;
+			else if (insn->imm == BPF_LOAD_ACQ)
+				return insn->dst_reg;
+			else if (insn->imm & BPF_FETCH)
+				return insn->src_reg;
+		}
+		return -1;
+	default:
+		return insn->dst_reg;
+	}
+}
+
+/* Return TRUE if INSN has defined any 32-bit value explicitly. */
+static bool insn_has_def32(struct bpf_insn *insn)
+{
+	int dst_reg = insn_def_regno(insn);
+
+	if (dst_reg == -1)
+		return false;
+
+	return !bpf_is_reg64(insn, dst_reg, NULL, DST_OP);
+}
+
+static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
+{
+	const struct bpf_kfunc_desc *d0 = a;
+	const struct bpf_kfunc_desc *d1 = b;
+
+	if (d0->imm != d1->imm)
+		return d0->imm < d1->imm ? -1 : 1;
+	if (d0->offset != d1->offset)
+		return d0->offset < d1->offset ? -1 : 1;
+	return 0;
+}
+
+const struct btf_func_model *
+bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
+			 const struct bpf_insn *insn)
+{
+	const struct bpf_kfunc_desc desc = {
+		.imm = insn->imm,
+		.offset = insn->off,
+	};
+	const struct bpf_kfunc_desc *res;
+	struct bpf_kfunc_desc_tab *tab;
+
+	tab = prog->aux->kfunc_tab;
+	res = bsearch(&desc, tab->descs, tab->nr_descs,
+		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
+
+	return res ? &res->func_model : NULL;
+}
+
+static int set_kfunc_desc_imm(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc)
+{
+	unsigned long call_imm;
+
+	if (bpf_jit_supports_far_kfunc_call()) {
+		call_imm = desc->func_id;
+	} else {
+		call_imm = BPF_CALL_IMM(desc->addr);
+		/* Check whether the relative offset overflows desc->imm */
+		if ((unsigned long)(s32)call_imm != call_imm) {
+			verbose(env, "address of kernel func_id %u is out of range\n",
+				desc->func_id);
+			return -EINVAL;
+		}
+	}
+	desc->imm = call_imm;
+	return 0;
+}
+
+static int sort_kfunc_descs_by_imm_off(struct bpf_verifier_env *env)
+{
+	struct bpf_kfunc_desc_tab *tab;
+	int i, err;
+
+	tab = env->prog->aux->kfunc_tab;
+	if (!tab)
+		return 0;
+
+	for (i = 0; i < tab->nr_descs; i++) {
+		err = set_kfunc_desc_imm(env, &tab->descs[i]);
+		if (err)
+			return err;
+	}
+
+	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
+	     kfunc_desc_cmp_by_imm_off, NULL);
+	return 0;
+}
+
+static int add_kfunc_in_insns(struct bpf_verifier_env *env,
+			      struct bpf_insn *insn, int cnt)
+{
+	int i, ret;
+
+	for (i = 0; i < cnt; i++, insn++) {
+		if (bpf_pseudo_kfunc_call(insn)) {
+			ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
+			if (ret < 0)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+static int get_callee_stack_depth(struct bpf_verifier_env *env,
+				  const struct bpf_insn *insn, int idx)
+{
+	int start = idx + insn->imm + 1, subprog;
+
+	subprog = bpf_find_subprog(env, start);
+	if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start))
+		return -EFAULT;
+	return env->subprog_info[subprog].stack_depth;
+}
+#endif
+
+/* single env->prog->insni[off] instruction was replaced with the range
+ * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
+ * [0, off) and [off, end) to new locations, so the patched range stays zero
+ */
+static void adjust_insn_aux_data(struct bpf_verifier_env *env,
+				 struct bpf_prog *new_prog, u32 off, u32 cnt)
+{
+	struct bpf_insn_aux_data *data = env->insn_aux_data;
+	struct bpf_insn *insn = new_prog->insnsi;
+	u32 old_seen = data[off].seen;
+	u32 prog_len;
+	int i;
+
+	/* aux info at OFF always needs adjustment, no matter fast path
+	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
+	 * original insn at old prog.
+	 */
+	data[off].zext_dst = insn_has_def32(insn + off + cnt - 1);
+
+	if (cnt == 1)
+		return;
+	prog_len = new_prog->len;
+
+	memmove(data + off + cnt - 1, data + off,
+		sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+	memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1));
+	for (i = off; i < off + cnt - 1; i++) {
+		/* Expand insni[off]'s seen count to the patched range. */
+		data[i].seen = old_seen;
+		data[i].zext_dst = insn_has_def32(insn + i);
+	}
+}
+
+static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
+{
+	int i;
+
+	if (len == 1)
+		return;
+	/* NOTE: fake 'exit' subprog should be updated as well. */
+	for (i = 0; i <= env->subprog_cnt; i++) {
+		if (env->subprog_info[i].start <= off)
+			continue;
+		env->subprog_info[i].start += len - 1;
+	}
+}
+
+static void adjust_insn_arrays(struct bpf_verifier_env *env, u32 off, u32 len)
+{
+	int i;
+
+	if (len == 1)
+		return;
+
+	for (i = 0; i < env->insn_array_map_cnt; i++)
+		bpf_insn_array_adjust(env->insn_array_maps[i], off, len);
+}
+
+static void adjust_insn_arrays_after_remove(struct bpf_verifier_env *env, u32 off, u32 len)
+{
+	int i;
+
+	for (i = 0; i < env->insn_array_map_cnt; i++)
+		bpf_insn_array_adjust_after_remove(env->insn_array_maps[i], off, len);
+}
+
+static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
+{
+	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
+	int i, sz = prog->aux->size_poke_tab;
+	struct bpf_jit_poke_descriptor *desc;
+
+	for (i = 0; i < sz; i++) {
+		desc = &tab[i];
+		if (desc->insn_idx <= off)
+			continue;
+		desc->insn_idx += len - 1;
+	}
+}
+
+static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
+					    const struct bpf_insn *patch, u32 len)
+{
+	struct bpf_prog *new_prog;
+	struct bpf_insn_aux_data *new_data = NULL;
+
+	if (len > 1) {
+		new_data = vrealloc(env->insn_aux_data,
+				    array_size(env->prog->len + len - 1,
+					       sizeof(struct bpf_insn_aux_data)),
+				    GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+		if (!new_data)
+			return NULL;
+
+		env->insn_aux_data = new_data;
+	}
+
+	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
+	if (IS_ERR(new_prog)) {
+		if (PTR_ERR(new_prog) == -ERANGE)
+			verbose(env,
+				"insn %d cannot be patched due to 16-bit range\n",
+				env->insn_aux_data[off].orig_idx);
+		return NULL;
+	}
+	adjust_insn_aux_data(env, new_prog, off, len);
+	adjust_subprog_starts(env, off, len);
+	adjust_insn_arrays(env, off, len);
+	adjust_poke_descs(new_prog, off, len);
+	return new_prog;
+}
+
+/*
+ * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
+ * jump offset by 'delta'.
+ */
+static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
+{
+	struct bpf_insn *insn = prog->insnsi;
+	u32 insn_cnt = prog->len, i;
+	s32 imm;
+	s16 off;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		u8 code = insn->code;
+
+		if (tgt_idx <= i && i < tgt_idx + delta)
+			continue;
+
+		if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
+		    BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
+			continue;
+
+		if (insn->code == (BPF_JMP32 | BPF_JA)) {
+			if (i + 1 + insn->imm != tgt_idx)
+				continue;
+			if (check_add_overflow(insn->imm, delta, &imm))
+				return -ERANGE;
+			insn->imm = imm;
+		} else {
+			if (i + 1 + insn->off != tgt_idx)
+				continue;
+			if (check_add_overflow(insn->off, delta, &off))
+				return -ERANGE;
+			insn->off = off;
+		}
+	}
+	return 0;
+}
+
+static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
+					      u32 off, u32 cnt)
+{
+	int i, j;
+
+	/* find first prog starting at or after off (first to remove) */
+	for (i = 0; i < env->subprog_cnt; i++)
+		if (env->subprog_info[i].start >= off)
+			break;
+	/* find first prog starting at or after off + cnt (first to stay) */
+	for (j = i; j < env->subprog_cnt; j++)
+		if (env->subprog_info[j].start >= off + cnt)
+			break;
+	/* if j doesn't start exactly at off + cnt, we are just removing
+	 * the front of previous prog
+	 */
+	if (env->subprog_info[j].start != off + cnt)
+		j--;
+
+	if (j > i) {
+		struct bpf_prog_aux *aux = env->prog->aux;
+		int move;
+
+		/* move fake 'exit' subprog as well */
+		move = env->subprog_cnt + 1 - j;
+
+		memmove(env->subprog_info + i,
+			env->subprog_info + j,
+			sizeof(*env->subprog_info) * move);
+		env->subprog_cnt -= j - i;
+
+		/* remove func_info */
+		if (aux->func_info) {
+			move = aux->func_info_cnt - j;
+
+			memmove(aux->func_info + i,
+				aux->func_info + j,
+				sizeof(*aux->func_info) * move);
+			aux->func_info_cnt -= j - i;
+			/* func_info->insn_off is set after all code rewrites,
+			 * in adjust_btf_func() - no need to adjust
+			 */
+		}
+	} else {
+		/* convert i from "first prog to remove" to "first to adjust" */
+		if (env->subprog_info[i].start == off)
+			i++;
+	}
+
+	/* update fake 'exit' subprog as well */
+	for (; i <= env->subprog_cnt; i++)
+		env->subprog_info[i].start -= cnt;
+
+	return 0;
+}
+
+static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
+				      u32 cnt)
+{
+	struct bpf_prog *prog = env->prog;
+	u32 i, l_off, l_cnt, nr_linfo;
+	struct bpf_line_info *linfo;
+
+	nr_linfo = prog->aux->nr_linfo;
+	if (!nr_linfo)
+		return 0;
+
+	linfo = prog->aux->linfo;
+
+	/* find first line info to remove, count lines to be removed */
+	for (i = 0; i < nr_linfo; i++)
+		if (linfo[i].insn_off >= off)
+			break;
+
+	l_off = i;
+	l_cnt = 0;
+	for (; i < nr_linfo; i++)
+		if (linfo[i].insn_off < off + cnt)
+			l_cnt++;
+		else
+			break;
+
+	/* First live insn doesn't match first live linfo, it needs to "inherit"
+	 * last removed linfo.  prog is already modified, so prog->len == off
+	 * means no live instructions after (tail of the program was removed).
+	 */
+	if (prog->len != off && l_cnt &&
+	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
+		l_cnt--;
+		linfo[--i].insn_off = off + cnt;
+	}
+
+	/* remove the line info which refer to the removed instructions */
+	if (l_cnt) {
+		memmove(linfo + l_off, linfo + i,
+			sizeof(*linfo) * (nr_linfo - i));
+
+		prog->aux->nr_linfo -= l_cnt;
+		nr_linfo = prog->aux->nr_linfo;
+	}
+
+	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
+	for (i = l_off; i < nr_linfo; i++)
+		linfo[i].insn_off -= cnt;
+
+	/* fix up all subprogs (incl. 'exit') which start >= off */
+	for (i = 0; i <= env->subprog_cnt; i++)
+		if (env->subprog_info[i].linfo_idx > l_off) {
+			/* program may have started in the removed region but
+			 * may not be fully removed
+			 */
+			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
+				env->subprog_info[i].linfo_idx -= l_cnt;
+			else
+				env->subprog_info[i].linfo_idx = l_off;
+		}
+
+	return 0;
+}
+
+/*
+ * Clean up dynamically allocated fields of aux data for instructions [start, ...]
+ */
+void bpf_clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len)
+{
+	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+	struct bpf_insn *insns = env->prog->insnsi;
+	int end = start + len;
+	int i;
+
+	for (i = start; i < end; i++) {
+		if (aux_data[i].jt) {
+			kvfree(aux_data[i].jt);
+			aux_data[i].jt = NULL;
+		}
+
+		if (bpf_is_ldimm64(&insns[i]))
+			i++;
+	}
+}
+
+static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
+{
+	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+	unsigned int orig_prog_len = env->prog->len;
+	int err;
+
+	if (bpf_prog_is_offloaded(env->prog->aux))
+		bpf_prog_offload_remove_insns(env, off, cnt);
+
+	/* Should be called before bpf_remove_insns, as it uses prog->insnsi */
+	bpf_clear_insn_aux_data(env, off, cnt);
+
+	err = bpf_remove_insns(env->prog, off, cnt);
+	if (err)
+		return err;
+
+	err = adjust_subprog_starts_after_remove(env, off, cnt);
+	if (err)
+		return err;
+
+	err = bpf_adj_linfo_after_remove(env, off, cnt);
+	if (err)
+		return err;
+
+	adjust_insn_arrays_after_remove(env, off, cnt);
+
+	memmove(aux_data + off,	aux_data + off + cnt,
+		sizeof(*aux_data) * (orig_prog_len - off - cnt));
+
+	return 0;
+}
+
+static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
+static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0);
+
+bool bpf_insn_is_cond_jump(u8 code)
+{
+	u8 op;
+
+	op = BPF_OP(code);
+	if (BPF_CLASS(code) == BPF_JMP32)
+		return op != BPF_JA;
+
+	if (BPF_CLASS(code) != BPF_JMP)
+		return false;
+
+	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
+}
+
+void bpf_opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
+{
+	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
+	struct bpf_insn *insn = env->prog->insnsi;
+	const int insn_cnt = env->prog->len;
+	int i;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (!bpf_insn_is_cond_jump(insn->code))
+			continue;
+
+		if (!aux_data[i + 1].seen)
+			ja.off = insn->off;
+		else if (!aux_data[i + 1 + insn->off].seen)
+			ja.off = 0;
+		else
+			continue;
+
+		if (bpf_prog_is_offloaded(env->prog->aux))
+			bpf_prog_offload_replace_insn(env, i, &ja);
+
+		memcpy(insn, &ja, sizeof(ja));
+	}
+}
+
+int bpf_opt_remove_dead_code(struct bpf_verifier_env *env)
+{
+	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+	int insn_cnt = env->prog->len;
+	int i, err;
+
+	for (i = 0; i < insn_cnt; i++) {
+		int j;
+
+		j = 0;
+		while (i + j < insn_cnt && !aux_data[i + j].seen)
+			j++;
+		if (!j)
+			continue;
+
+		err = verifier_remove_insns(env, i, j);
+		if (err)
+			return err;
+		insn_cnt = env->prog->len;
+	}
+
+	return 0;
+}
+
+int bpf_opt_remove_nops(struct bpf_verifier_env *env)
+{
+	struct bpf_insn *insn = env->prog->insnsi;
+	int insn_cnt = env->prog->len;
+	bool is_may_goto_0, is_ja;
+	int i, err;
+
+	for (i = 0; i < insn_cnt; i++) {
+		is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0));
+		is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP));
+
+		if (!is_may_goto_0 && !is_ja)
+			continue;
+
+		err = verifier_remove_insns(env, i, 1);
+		if (err)
+			return err;
+		insn_cnt--;
+		/* Go back one insn to catch may_goto +1; may_goto +0 sequence */
+		i -= (is_may_goto_0 && i > 0) ? 2 : 1;
+	}
+
+	return 0;
+}
+
+int bpf_opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
+					 const union bpf_attr *attr)
+{
+	struct bpf_insn *patch;
+	/* use env->insn_buf as two independent buffers */
+	struct bpf_insn *zext_patch = env->insn_buf;
+	struct bpf_insn *rnd_hi32_patch = &env->insn_buf[2];
+	struct bpf_insn_aux_data *aux = env->insn_aux_data;
+	int i, patch_len, delta = 0, len = env->prog->len;
+	struct bpf_insn *insns = env->prog->insnsi;
+	struct bpf_prog *new_prog;
+	bool rnd_hi32;
+
+	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
+	zext_patch[1] = BPF_ZEXT_REG(0);
+	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
+	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
+	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
+	for (i = 0; i < len; i++) {
+		int adj_idx = i + delta;
+		struct bpf_insn insn;
+		int load_reg;
+
+		insn = insns[adj_idx];
+		load_reg = insn_def_regno(&insn);
+		if (!aux[adj_idx].zext_dst) {
+			u8 code, class;
+			u32 imm_rnd;
+
+			if (!rnd_hi32)
+				continue;
+
+			code = insn.code;
+			class = BPF_CLASS(code);
+			if (load_reg == -1)
+				continue;
+
+			/* NOTE: arg "reg" (the fourth one) is only used for
+			 *       BPF_STX + SRC_OP, so it is safe to pass NULL
+			 *       here.
+			 */
+			if (bpf_is_reg64(&insn, load_reg, NULL, DST_OP)) {
+				if (class == BPF_LD &&
+				    BPF_MODE(code) == BPF_IMM)
+					i++;
+				continue;
+			}
+
+			/* ctx load could be transformed into wider load. */
+			if (class == BPF_LDX &&
+			    aux[adj_idx].ptr_type == PTR_TO_CTX)
+				continue;
+
+			imm_rnd = get_random_u32();
+			rnd_hi32_patch[0] = insn;
+			rnd_hi32_patch[1].imm = imm_rnd;
+			rnd_hi32_patch[3].dst_reg = load_reg;
+			patch = rnd_hi32_patch;
+			patch_len = 4;
+			goto apply_patch_buffer;
+		}
+
+		/* Add in an zero-extend instruction if a) the JIT has requested
+		 * it or b) it's a CMPXCHG.
+		 *
+		 * The latter is because: BPF_CMPXCHG always loads a value into
+		 * R0, therefore always zero-extends. However some archs'
+		 * equivalent instruction only does this load when the
+		 * comparison is successful. This detail of CMPXCHG is
+		 * orthogonal to the general zero-extension behaviour of the
+		 * CPU, so it's treated independently of bpf_jit_needs_zext.
+		 */
+		if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
+			continue;
+
+		/* Zero-extension is done by the caller. */
+		if (bpf_pseudo_kfunc_call(&insn))
+			continue;
+
+		if (verifier_bug_if(load_reg == -1, env,
+				    "zext_dst is set, but no reg is defined"))
+			return -EFAULT;
+
+		zext_patch[0] = insn;
+		zext_patch[1].dst_reg = load_reg;
+		zext_patch[1].src_reg = load_reg;
+		patch = zext_patch;
+		patch_len = 2;
+apply_patch_buffer:
+		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
+		if (!new_prog)
+			return -ENOMEM;
+		env->prog = new_prog;
+		insns = new_prog->insnsi;
+		aux = env->insn_aux_data;
+		delta += patch_len - 1;
+	}
+
+	return 0;
+}
+
+/* convert load instructions that access fields of a context type into a
+ * sequence of instructions that access fields of the underlying structure:
+ *     struct __sk_buff    -> struct sk_buff
+ *     struct bpf_sock_ops -> struct sock
+ */
+int bpf_convert_ctx_accesses(struct bpf_verifier_env *env)
+{
+	struct bpf_subprog_info *subprogs = env->subprog_info;
+	const struct bpf_verifier_ops *ops = env->ops;
+	int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0;
+	const int insn_cnt = env->prog->len;
+	struct bpf_insn *epilogue_buf = env->epilogue_buf;
+	struct bpf_insn *insn_buf = env->insn_buf;
+	struct bpf_insn *insn;
+	u32 target_size, size_default, off;
+	struct bpf_prog *new_prog;
+	enum bpf_access_type type;
+	bool is_narrower_load;
+	int epilogue_idx = 0;
+
+	if (ops->gen_epilogue) {
+		epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
+						 -(subprogs[0].stack_depth + 8));
+		if (epilogue_cnt >= INSN_BUF_SIZE) {
+			verifier_bug(env, "epilogue is too long");
+			return -EFAULT;
+		} else if (epilogue_cnt) {
+			/* Save the ARG_PTR_TO_CTX for the epilogue to use */
+			cnt = 0;
+			subprogs[0].stack_depth += 8;
+			insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
+						      -subprogs[0].stack_depth);
+			insn_buf[cnt++] = env->prog->insnsi[0];
+			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+			env->prog = new_prog;
+			delta += cnt - 1;
+
+			ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	if (ops->gen_prologue || env->seen_direct_write) {
+		if (!ops->gen_prologue) {
+			verifier_bug(env, "gen_prologue is null");
+			return -EFAULT;
+		}
+		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
+					env->prog);
+		if (cnt >= INSN_BUF_SIZE) {
+			verifier_bug(env, "prologue is too long");
+			return -EFAULT;
+		} else if (cnt) {
+			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			env->prog = new_prog;
+			delta += cnt - 1;
+
+			ret = add_kfunc_in_insns(env, insn_buf, cnt - 1);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	if (delta)
+		WARN_ON(adjust_jmp_off(env->prog, 0, delta));
+
+	if (bpf_prog_is_offloaded(env->prog->aux))
+		return 0;
+
+	insn = env->prog->insnsi + delta;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		bpf_convert_ctx_access_t convert_ctx_access;
+		u8 mode;
+
+		if (env->insn_aux_data[i + delta].nospec) {
+			WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state);
+			struct bpf_insn *patch = insn_buf;
+
+			*patch++ = BPF_ST_NOSPEC();
+			*patch++ = *insn;
+			cnt = patch - insn_buf;
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			/* This can not be easily merged with the
+			 * nospec_result-case, because an insn may require a
+			 * nospec before and after itself. Therefore also do not
+			 * 'continue' here but potentially apply further
+			 * patching to insn. *insn should equal patch[1] now.
+			 */
+		}
+
+		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
+		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
+		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
+		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
+		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
+		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
+		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
+			type = BPF_READ;
+		} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
+			   insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
+			   insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+			   insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
+			   insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
+			   insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
+			   insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
+			   insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
+			type = BPF_WRITE;
+		} else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
+			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
+			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
+			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
+			   env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
+			insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
+			env->prog->aux->num_exentries++;
+			continue;
+		} else if (insn->code == (BPF_JMP | BPF_EXIT) &&
+			   epilogue_cnt &&
+			   i + delta < subprogs[1].start) {
+			/* Generate epilogue for the main prog */
+			if (epilogue_idx) {
+				/* jump back to the earlier generated epilogue */
+				insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
+				cnt = 1;
+			} else {
+				memcpy(insn_buf, epilogue_buf,
+				       epilogue_cnt * sizeof(*epilogue_buf));
+				cnt = epilogue_cnt;
+				/* epilogue_idx cannot be 0. It must have at
+				 * least one ctx ptr saving insn before the
+				 * epilogue.
+				 */
+				epilogue_idx = i + delta;
+			}
+			goto patch_insn_buf;
+		} else {
+			continue;
+		}
+
+		if (type == BPF_WRITE &&
+		    env->insn_aux_data[i + delta].nospec_result) {
+			/* nospec_result is only used to mitigate Spectre v4 and
+			 * to limit verification-time for Spectre v1.
+			 */
+			struct bpf_insn *patch = insn_buf;
+
+			*patch++ = *insn;
+			*patch++ = BPF_ST_NOSPEC();
+			cnt = patch - insn_buf;
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			continue;
+		}
+
+		switch ((int)env->insn_aux_data[i + delta].ptr_type) {
+		case PTR_TO_CTX:
+			if (!ops->convert_ctx_access)
+				continue;
+			convert_ctx_access = ops->convert_ctx_access;
+			break;
+		case PTR_TO_SOCKET:
+		case PTR_TO_SOCK_COMMON:
+			convert_ctx_access = bpf_sock_convert_ctx_access;
+			break;
+		case PTR_TO_TCP_SOCK:
+			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
+			break;
+		case PTR_TO_XDP_SOCK:
+			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
+			break;
+		case PTR_TO_BTF_ID:
+		case PTR_TO_BTF_ID | PTR_UNTRUSTED:
+		/* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
+		 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
+		 * be said once it is marked PTR_UNTRUSTED, hence we must handle
+		 * any faults for loads into such types. BPF_WRITE is disallowed
+		 * for this case.
+		 */
+		case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
+		case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED:
+			if (type == BPF_READ) {
+				if (BPF_MODE(insn->code) == BPF_MEM)
+					insn->code = BPF_LDX | BPF_PROBE_MEM |
+						     BPF_SIZE((insn)->code);
+				else
+					insn->code = BPF_LDX | BPF_PROBE_MEMSX |
+						     BPF_SIZE((insn)->code);
+				env->prog->aux->num_exentries++;
+			}
+			continue;
+		case PTR_TO_ARENA:
+			if (BPF_MODE(insn->code) == BPF_MEMSX) {
+				if (!bpf_jit_supports_insn(insn, true)) {
+					verbose(env, "sign extending loads from arena are not supported yet\n");
+					return -EOPNOTSUPP;
+				}
+				insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code);
+			} else {
+				insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
+			}
+			env->prog->aux->num_exentries++;
+			continue;
+		default:
+			continue;
+		}
+
+		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
+		size = BPF_LDST_BYTES(insn);
+		mode = BPF_MODE(insn->code);
+
+		/* If the read access is a narrower load of the field,
+		 * convert to a 4/8-byte load, to minimum program type specific
+		 * convert_ctx_access changes. If conversion is successful,
+		 * we will apply proper mask to the result.
+		 */
+		is_narrower_load = size < ctx_field_size;
+		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
+		off = insn->off;
+		if (is_narrower_load) {
+			u8 size_code;
+
+			if (type == BPF_WRITE) {
+				verifier_bug(env, "narrow ctx access misconfigured");
+				return -EFAULT;
+			}
+
+			size_code = BPF_H;
+			if (ctx_field_size == 4)
+				size_code = BPF_W;
+			else if (ctx_field_size == 8)
+				size_code = BPF_DW;
+
+			insn->off = off & ~(size_default - 1);
+			insn->code = BPF_LDX | BPF_MEM | size_code;
+		}
+
+		target_size = 0;
+		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
+					 &target_size);
+		if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
+		    (ctx_field_size && !target_size)) {
+			verifier_bug(env, "error during ctx access conversion (%d)", cnt);
+			return -EFAULT;
+		}
+
+		if (is_narrower_load && size < target_size) {
+			u8 shift = bpf_ctx_narrow_access_offset(
+				off, size, size_default) * 8;
+			if (shift && cnt + 1 >= INSN_BUF_SIZE) {
+				verifier_bug(env, "narrow ctx load misconfigured");
+				return -EFAULT;
+			}
+			if (ctx_field_size <= 4) {
+				if (shift)
+					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
+									insn->dst_reg,
+									shift);
+				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
+								(1 << size * 8) - 1);
+			} else {
+				if (shift)
+					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
+									insn->dst_reg,
+									shift);
+				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
+								(1ULL << size * 8) - 1);
+			}
+		}
+		if (mode == BPF_MEMSX)
+			insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
+						       insn->dst_reg, insn->dst_reg,
+						       size * 8, 0);
+
+patch_insn_buf:
+		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+		if (!new_prog)
+			return -ENOMEM;
+
+		delta += cnt - 1;
+
+		/* keep walking new program and skip insns we just inserted */
+		env->prog = new_prog;
+		insn      = new_prog->insnsi + i + delta;
+	}
+
+	return 0;
+}
+
+int bpf_jit_subprogs(struct bpf_verifier_env *env)
+{
+	struct bpf_prog *prog = env->prog, **func, *tmp;
+	int i, j, subprog_start, subprog_end = 0, len, subprog;
+	struct bpf_map *map_ptr;
+	struct bpf_insn *insn;
+	void *old_bpf_func;
+	int err, num_exentries;
+	int old_len, subprog_start_adjustment = 0;
+
+	if (env->subprog_cnt <= 1)
+		return 0;
+
+	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
+			continue;
+
+		/* Upon error here we cannot fall back to interpreter but
+		 * need a hard reject of the program. Thus -EFAULT is
+		 * propagated in any case.
+		 */
+		subprog = bpf_find_subprog(env, i + insn->imm + 1);
+		if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d",
+				    i + insn->imm + 1))
+			return -EFAULT;
+		/* temporarily remember subprog id inside insn instead of
+		 * aux_data, since next loop will split up all insns into funcs
+		 */
+		insn->off = subprog;
+		/* remember original imm in case JIT fails and fallback
+		 * to interpreter will be needed
+		 */
+		env->insn_aux_data[i].call_imm = insn->imm;
+		/* point imm to __bpf_call_base+1 from JITs point of view */
+		insn->imm = 1;
+		if (bpf_pseudo_func(insn)) {
+#if defined(MODULES_VADDR)
+			u64 addr = MODULES_VADDR;
+#else
+			u64 addr = VMALLOC_START;
+#endif
+			/* jit (e.g. x86_64) may emit fewer instructions
+			 * if it learns a u32 imm is the same as a u64 imm.
+			 * Set close enough to possible prog address.
+			 */
+			insn[0].imm = (u32)addr;
+			insn[1].imm = addr >> 32;
+		}
+	}
+
+	err = bpf_prog_alloc_jited_linfo(prog);
+	if (err)
+		goto out_undo_insn;
+
+	err = -ENOMEM;
+	func = kzalloc_objs(prog, env->subprog_cnt);
+	if (!func)
+		goto out_undo_insn;
+
+	for (i = 0; i < env->subprog_cnt; i++) {
+		subprog_start = subprog_end;
+		subprog_end = env->subprog_info[i + 1].start;
+
+		len = subprog_end - subprog_start;
+		/* bpf_prog_run() doesn't call subprogs directly,
+		 * hence main prog stats include the runtime of subprogs.
+		 * subprogs don't have IDs and not reachable via prog_get_next_id
+		 * func[i]->stats will never be accessed and stays NULL
+		 */
+		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
+		if (!func[i])
+			goto out_free;
+		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
+		       len * sizeof(struct bpf_insn));
+		func[i]->type = prog->type;
+		func[i]->len = len;
+		if (bpf_prog_calc_tag(func[i]))
+			goto out_free;
+		func[i]->is_func = 1;
+		func[i]->sleepable = prog->sleepable;
+		func[i]->aux->func_idx = i;
+		/* Below members will be freed only at prog->aux */
+		func[i]->aux->btf = prog->aux->btf;
+		func[i]->aux->subprog_start = subprog_start + subprog_start_adjustment;
+		func[i]->aux->func_info = prog->aux->func_info;
+		func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
+		func[i]->aux->poke_tab = prog->aux->poke_tab;
+		func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
+		func[i]->aux->main_prog_aux = prog->aux;
+
+		for (j = 0; j < prog->aux->size_poke_tab; j++) {
+			struct bpf_jit_poke_descriptor *poke;
+
+			poke = &prog->aux->poke_tab[j];
+			if (poke->insn_idx < subprog_end &&
+			    poke->insn_idx >= subprog_start)
+				poke->aux = func[i]->aux;
+		}
+
+		func[i]->aux->name[0] = 'F';
+		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
+		if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE)
+			func[i]->aux->jits_use_priv_stack = true;
+
+		func[i]->jit_requested = 1;
+		func[i]->blinding_requested = prog->blinding_requested;
+		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
+		func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
+		func[i]->aux->linfo = prog->aux->linfo;
+		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
+		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
+		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
+		func[i]->aux->arena = prog->aux->arena;
+		func[i]->aux->used_maps = env->used_maps;
+		func[i]->aux->used_map_cnt = env->used_map_cnt;
+		num_exentries = 0;
+		insn = func[i]->insnsi;
+		for (j = 0; j < func[i]->len; j++, insn++) {
+			if (BPF_CLASS(insn->code) == BPF_LDX &&
+			    (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
+			     BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
+			     BPF_MODE(insn->code) == BPF_PROBE_MEM32SX ||
+			     BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
+				num_exentries++;
+			if ((BPF_CLASS(insn->code) == BPF_STX ||
+			     BPF_CLASS(insn->code) == BPF_ST) &&
+			     BPF_MODE(insn->code) == BPF_PROBE_MEM32)
+				num_exentries++;
+			if (BPF_CLASS(insn->code) == BPF_STX &&
+			     BPF_MODE(insn->code) == BPF_PROBE_ATOMIC)
+				num_exentries++;
+		}
+		func[i]->aux->num_exentries = num_exentries;
+		func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
+		func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
+		func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data;
+		func[i]->aux->might_sleep = env->subprog_info[i].might_sleep;
+		if (!i)
+			func[i]->aux->exception_boundary = env->seen_exception;
+
+		/*
+		 * To properly pass the absolute subprog start to jit
+		 * all instruction adjustments should be accumulated
+		 */
+		old_len = func[i]->len;
+		func[i] = bpf_int_jit_compile(func[i]);
+		subprog_start_adjustment += func[i]->len - old_len;
+
+		if (!func[i]->jited) {
+			err = -ENOTSUPP;
+			goto out_free;
+		}
+		cond_resched();
+	}
+
+	/* at this point all bpf functions were successfully JITed
+	 * now populate all bpf_calls with correct addresses and
+	 * run last pass of JIT
+	 */
+	for (i = 0; i < env->subprog_cnt; i++) {
+		insn = func[i]->insnsi;
+		for (j = 0; j < func[i]->len; j++, insn++) {
+			if (bpf_pseudo_func(insn)) {
+				subprog = insn->off;
+				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
+				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
+				continue;
+			}
+			if (!bpf_pseudo_call(insn))
+				continue;
+			subprog = insn->off;
+			insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
+		}
+
+		/* we use the aux data to keep a list of the start addresses
+		 * of the JITed images for each function in the program
+		 *
+		 * for some architectures, such as powerpc64, the imm field
+		 * might not be large enough to hold the offset of the start
+		 * address of the callee's JITed image from __bpf_call_base
+		 *
+		 * in such cases, we can lookup the start address of a callee
+		 * by using its subprog id, available from the off field of
+		 * the call instruction, as an index for this list
+		 */
+		func[i]->aux->func = func;
+		func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
+		func[i]->aux->real_func_cnt = env->subprog_cnt;
+	}
+	for (i = 0; i < env->subprog_cnt; i++) {
+		old_bpf_func = func[i]->bpf_func;
+		tmp = bpf_int_jit_compile(func[i]);
+		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
+			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
+			err = -ENOTSUPP;
+			goto out_free;
+		}
+		cond_resched();
+	}
+
+	/*
+	 * Cleanup func[i]->aux fields which aren't required
+	 * or can become invalid in future
+	 */
+	for (i = 0; i < env->subprog_cnt; i++) {
+		func[i]->aux->used_maps = NULL;
+		func[i]->aux->used_map_cnt = 0;
+	}
+
+	/* finally lock prog and jit images for all functions and
+	 * populate kallsysm. Begin at the first subprogram, since
+	 * bpf_prog_load will add the kallsyms for the main program.
+	 */
+	for (i = 1; i < env->subprog_cnt; i++) {
+		err = bpf_prog_lock_ro(func[i]);
+		if (err)
+			goto out_free;
+	}
+
+	for (i = 1; i < env->subprog_cnt; i++)
+		bpf_prog_kallsyms_add(func[i]);
+
+	/* Last step: make now unused interpreter insns from main
+	 * prog consistent for later dump requests, so they can
+	 * later look the same as if they were interpreted only.
+	 */
+	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+		if (bpf_pseudo_func(insn)) {
+			insn[0].imm = env->insn_aux_data[i].call_imm;
+			insn[1].imm = insn->off;
+			insn->off = 0;
+			continue;
+		}
+		if (!bpf_pseudo_call(insn))
+			continue;
+		insn->off = env->insn_aux_data[i].call_imm;
+		subprog = bpf_find_subprog(env, i + insn->off + 1);
+		insn->imm = subprog;
+	}
+
+	prog->jited = 1;
+	prog->bpf_func = func[0]->bpf_func;
+	prog->jited_len = func[0]->jited_len;
+	prog->aux->extable = func[0]->aux->extable;
+	prog->aux->num_exentries = func[0]->aux->num_exentries;
+	prog->aux->func = func;
+	prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
+	prog->aux->real_func_cnt = env->subprog_cnt;
+	prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
+	prog->aux->exception_boundary = func[0]->aux->exception_boundary;
+	bpf_prog_jit_attempt_done(prog);
+	return 0;
+out_free:
+	/* We failed JIT'ing, so at this point we need to unregister poke
+	 * descriptors from subprogs, so that kernel is not attempting to
+	 * patch it anymore as we're freeing the subprog JIT memory.
+	 */
+	for (i = 0; i < prog->aux->size_poke_tab; i++) {
+		map_ptr = prog->aux->poke_tab[i].tail_call.map;
+		map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
+	}
+	/* At this point we're guaranteed that poke descriptors are not
+	 * live anymore. We can just unlink its descriptor table as it's
+	 * released with the main prog.
+	 */
+	for (i = 0; i < env->subprog_cnt; i++) {
+		if (!func[i])
+			continue;
+		func[i]->aux->poke_tab = NULL;
+		bpf_jit_free(func[i]);
+	}
+	kfree(func);
+out_undo_insn:
+	/* cleanup main prog to be interpreted */
+	prog->jit_requested = 0;
+	prog->blinding_requested = 0;
+	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+		if (!bpf_pseudo_call(insn))
+			continue;
+		insn->off = 0;
+		insn->imm = env->insn_aux_data[i].call_imm;
+	}
+	bpf_prog_jit_attempt_done(prog);
+	return err;
+}
+
+int bpf_fixup_call_args(struct bpf_verifier_env *env)
+{
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+	struct bpf_prog *prog = env->prog;
+	struct bpf_insn *insn = prog->insnsi;
+	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
+	int i, depth;
+#endif
+	int err = 0;
+
+	if (env->prog->jit_requested &&
+	    !bpf_prog_is_offloaded(env->prog->aux)) {
+		err = bpf_jit_subprogs(env);
+		if (err == 0)
+			return 0;
+		if (err == -EFAULT)
+			return err;
+	}
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+	if (has_kfunc_call) {
+		verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
+		return -EINVAL;
+	}
+	if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
+		/* When JIT fails the progs with bpf2bpf calls and tail_calls
+		 * have to be rejected, since interpreter doesn't support them yet.
+		 */
+		verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
+		return -EINVAL;
+	}
+	for (i = 0; i < prog->len; i++, insn++) {
+		if (bpf_pseudo_func(insn)) {
+			/* When JIT fails the progs with callback calls
+			 * have to be rejected, since interpreter doesn't support them yet.
+			 */
+			verbose(env, "callbacks are not allowed in non-JITed programs\n");
+			return -EINVAL;
+		}
+
+		if (!bpf_pseudo_call(insn))
+			continue;
+		depth = get_callee_stack_depth(env, insn, i);
+		if (depth < 0)
+			return depth;
+		bpf_patch_call_args(insn, depth);
+	}
+	err = 0;
+#endif
+	return err;
+}
+
+
+/* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
+static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len)
+{
+	struct bpf_subprog_info *info = env->subprog_info;
+	int cnt = env->subprog_cnt;
+	struct bpf_prog *prog;
+
+	/* We only reserve one slot for hidden subprogs in subprog_info. */
+	if (env->hidden_subprog_cnt) {
+		verifier_bug(env, "only one hidden subprog supported");
+		return -EFAULT;
+	}
+	/* We're not patching any existing instruction, just appending the new
+	 * ones for the hidden subprog. Hence all of the adjustment operations
+	 * in bpf_patch_insn_data are no-ops.
+	 */
+	prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len);
+	if (!prog)
+		return -ENOMEM;
+	env->prog = prog;
+	info[cnt + 1].start = info[cnt].start;
+	info[cnt].start = prog->len - len + 1;
+	env->subprog_cnt++;
+	env->hidden_subprog_cnt++;
+	return 0;
+}
+
+/* Do various post-verification rewrites in a single program pass.
+ * These rewrites simplify JIT and interpreter implementations.
+ */
+int bpf_do_misc_fixups(struct bpf_verifier_env *env)
+{
+	struct bpf_prog *prog = env->prog;
+	enum bpf_attach_type eatype = prog->expected_attach_type;
+	enum bpf_prog_type prog_type = resolve_prog_type(prog);
+	struct bpf_insn *insn = prog->insnsi;
+	const struct bpf_func_proto *fn;
+	const int insn_cnt = prog->len;
+	const struct bpf_map_ops *ops;
+	struct bpf_insn_aux_data *aux;
+	struct bpf_insn *insn_buf = env->insn_buf;
+	struct bpf_prog *new_prog;
+	struct bpf_map *map_ptr;
+	int i, ret, cnt, delta = 0, cur_subprog = 0;
+	struct bpf_subprog_info *subprogs = env->subprog_info;
+	u16 stack_depth = subprogs[cur_subprog].stack_depth;
+	u16 stack_depth_extra = 0;
+
+	if (env->seen_exception && !env->exception_callback_subprog) {
+		struct bpf_insn *patch = insn_buf;
+
+		*patch++ = env->prog->insnsi[insn_cnt - 1];
+		*patch++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
+		*patch++ = BPF_EXIT_INSN();
+		ret = add_hidden_subprog(env, insn_buf, patch - insn_buf);
+		if (ret < 0)
+			return ret;
+		prog = env->prog;
+		insn = prog->insnsi;
+
+		env->exception_callback_subprog = env->subprog_cnt - 1;
+		/* Don't update insn_cnt, as add_hidden_subprog always appends insns */
+		bpf_mark_subprog_exc_cb(env, env->exception_callback_subprog);
+	}
+
+	for (i = 0; i < insn_cnt;) {
+		if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) {
+			if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) ||
+			    (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
+				/* convert to 32-bit mov that clears upper 32-bit */
+				insn->code = BPF_ALU | BPF_MOV | BPF_X;
+				/* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
+				insn->off = 0;
+				insn->imm = 0;
+			} /* cast from as(0) to as(1) should be handled by JIT */
+			goto next_insn;
+		}
+
+		if (env->insn_aux_data[i + delta].needs_zext)
+			/* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
+			insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
+
+		/* Make sdiv/smod divide-by-minus-one exceptions impossible. */
+		if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
+		     insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
+		     insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
+		     insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
+		    insn->off == 1 && insn->imm == -1) {
+			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+			struct bpf_insn *patch = insn_buf;
+
+			if (isdiv)
+				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+							BPF_NEG | BPF_K, insn->dst_reg,
+							0, 0, 0);
+			else
+				*patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
+
+			cnt = patch - insn_buf;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		/* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
+		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
+		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
+		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
+		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
+			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+			bool is_sdiv = isdiv && insn->off == 1;
+			bool is_smod = !isdiv && insn->off == 1;
+			struct bpf_insn *patch = insn_buf;
+
+			if (is_sdiv) {
+				/* [R,W]x sdiv 0 -> 0
+				 * LLONG_MIN sdiv -1 -> LLONG_MIN
+				 * INT_MIN sdiv -1 -> INT_MIN
+				 */
+				*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
+				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+							BPF_ADD | BPF_K, BPF_REG_AX,
+							0, 0, 1);
+				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+							BPF_JGT | BPF_K, BPF_REG_AX,
+							0, 4, 1);
+				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+							BPF_JEQ | BPF_K, BPF_REG_AX,
+							0, 1, 0);
+				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+							BPF_MOV | BPF_K, insn->dst_reg,
+							0, 0, 0);
+				/* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
+				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+							BPF_NEG | BPF_K, insn->dst_reg,
+							0, 0, 0);
+				*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+				*patch++ = *insn;
+				cnt = patch - insn_buf;
+			} else if (is_smod) {
+				/* [R,W]x mod 0 -> [R,W]x */
+				/* [R,W]x mod -1 -> 0 */
+				*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
+				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+							BPF_ADD | BPF_K, BPF_REG_AX,
+							0, 0, 1);
+				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+							BPF_JGT | BPF_K, BPF_REG_AX,
+							0, 3, 1);
+				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+							BPF_JEQ | BPF_K, BPF_REG_AX,
+							0, 3 + (is64 ? 0 : 1), 1);
+				*patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
+				*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+				*patch++ = *insn;
+
+				if (!is64) {
+					*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+					*patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
+				}
+				cnt = patch - insn_buf;
+			} else if (isdiv) {
+				/* [R,W]x div 0 -> 0 */
+				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+							BPF_JNE | BPF_K, insn->src_reg,
+							0, 2, 0);
+				*patch++ = BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg);
+				*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+				*patch++ = *insn;
+				cnt = patch - insn_buf;
+			} else {
+				/* [R,W]x mod 0 -> [R,W]x */
+				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+							BPF_JEQ | BPF_K, insn->src_reg,
+							0, 1 + (is64 ? 0 : 1), 0);
+				*patch++ = *insn;
+
+				if (!is64) {
+					*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+					*patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
+				}
+				cnt = patch - insn_buf;
+			}
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		/* Make it impossible to de-reference a userspace address */
+		if (BPF_CLASS(insn->code) == BPF_LDX &&
+		    (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
+		     BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
+			struct bpf_insn *patch = insn_buf;
+			u64 uaddress_limit = bpf_arch_uaddress_limit();
+
+			if (!uaddress_limit)
+				goto next_insn;
+
+			*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
+			if (insn->off)
+				*patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
+			*patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
+			*patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
+			*patch++ = *insn;
+			*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+			*patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
+
+			cnt = patch - insn_buf;
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
+		if (BPF_CLASS(insn->code) == BPF_LD &&
+		    (BPF_MODE(insn->code) == BPF_ABS ||
+		     BPF_MODE(insn->code) == BPF_IND)) {
+			cnt = env->ops->gen_ld_abs(insn, insn_buf);
+			if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
+				verifier_bug(env, "%d insns generated for ld_abs", cnt);
+				return -EFAULT;
+			}
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
+		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
+		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
+			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
+			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
+			struct bpf_insn *patch = insn_buf;
+			bool issrc, isneg, isimm;
+			u32 off_reg;
+
+			aux = &env->insn_aux_data[i + delta];
+			if (!aux->alu_state ||
+			    aux->alu_state == BPF_ALU_NON_POINTER)
+				goto next_insn;
+
+			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
+			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
+				BPF_ALU_SANITIZE_SRC;
+			isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
+
+			off_reg = issrc ? insn->src_reg : insn->dst_reg;
+			if (isimm) {
+				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
+			} else {
+				if (isneg)
+					*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
+				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
+				*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
+				*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
+				*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
+				*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
+				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
+			}
+			if (!issrc)
+				*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
+			insn->src_reg = BPF_REG_AX;
+			if (isneg)
+				insn->code = insn->code == code_add ?
+					     code_sub : code_add;
+			*patch++ = *insn;
+			if (issrc && isneg && !isimm)
+				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
+			cnt = patch - insn_buf;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		if (bpf_is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) {
+			int stack_off_cnt = -stack_depth - 16;
+
+			/*
+			 * Two 8 byte slots, depth-16 stores the count, and
+			 * depth-8 stores the start timestamp of the loop.
+			 *
+			 * The starting value of count is BPF_MAX_TIMED_LOOPS
+			 * (0xffff).  Every iteration loads it and subs it by 1,
+			 * until the value becomes 0 in AX (thus, 1 in stack),
+			 * after which we call arch_bpf_timed_may_goto, which
+			 * either sets AX to 0xffff to keep looping, or to 0
+			 * upon timeout. AX is then stored into the stack. In
+			 * the next iteration, we either see 0 and break out, or
+			 * continue iterating until the next time value is 0
+			 * after subtraction, rinse and repeat.
+			 */
+			stack_depth_extra = 16;
+			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt);
+			if (insn->off >= 0)
+				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5);
+			else
+				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
+			insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
+			insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2);
+			/*
+			 * AX is used as an argument to pass in stack_off_cnt
+			 * (to add to r10/fp), and also as the return value of
+			 * the call to arch_bpf_timed_may_goto.
+			 */
+			insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt);
+			insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto);
+			insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt);
+			cnt = 7;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta += cnt - 1;
+			env->prog = prog = new_prog;
+			insn = new_prog->insnsi + i + delta;
+			goto next_insn;
+		} else if (bpf_is_may_goto_insn(insn)) {
+			int stack_off = -stack_depth - 8;
+
+			stack_depth_extra = 8;
+			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
+			if (insn->off >= 0)
+				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
+			else
+				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
+			insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
+			insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
+			cnt = 4;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta += cnt - 1;
+			env->prog = prog = new_prog;
+			insn = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		if (insn->code != (BPF_JMP | BPF_CALL))
+			goto next_insn;
+		if (insn->src_reg == BPF_PSEUDO_CALL)
+			goto next_insn;
+		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+			ret = bpf_fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
+			if (ret)
+				return ret;
+			if (cnt == 0)
+				goto next_insn;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta	 += cnt - 1;
+			env->prog = prog = new_prog;
+			insn	  = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		/* Skip inlining the helper call if the JIT does it. */
+		if (bpf_jit_inlines_helper_call(insn->imm))
+			goto next_insn;
+
+		if (insn->imm == BPF_FUNC_get_route_realm)
+			prog->dst_needed = 1;
+		if (insn->imm == BPF_FUNC_get_prandom_u32)
+			bpf_user_rnd_init_once();
+		if (insn->imm == BPF_FUNC_override_return)
+			prog->kprobe_override = 1;
+		if (insn->imm == BPF_FUNC_tail_call) {
+			/* If we tail call into other programs, we
+			 * cannot make any assumptions since they can
+			 * be replaced dynamically during runtime in
+			 * the program array.
+			 */
+			prog->cb_access = 1;
+			if (!bpf_allow_tail_call_in_subprogs(env))
+				prog->aux->stack_depth = MAX_BPF_STACK;
+			prog->aux->max_pkt_offset = MAX_PACKET_OFF;
+
+			/* mark bpf_tail_call as different opcode to avoid
+			 * conditional branch in the interpreter for every normal
+			 * call and to prevent accidental JITing by JIT compiler
+			 * that doesn't support bpf_tail_call yet
+			 */
+			insn->imm = 0;
+			insn->code = BPF_JMP | BPF_TAIL_CALL;
+
+			aux = &env->insn_aux_data[i + delta];
+			if (env->bpf_capable && !prog->blinding_requested &&
+			    prog->jit_requested &&
+			    !bpf_map_key_poisoned(aux) &&
+			    !bpf_map_ptr_poisoned(aux) &&
+			    !bpf_map_ptr_unpriv(aux)) {
+				struct bpf_jit_poke_descriptor desc = {
+					.reason = BPF_POKE_REASON_TAIL_CALL,
+					.tail_call.map = aux->map_ptr_state.map_ptr,
+					.tail_call.key = bpf_map_key_immediate(aux),
+					.insn_idx = i + delta,
+				};
+
+				ret = bpf_jit_add_poke_descriptor(prog, &desc);
+				if (ret < 0) {
+					verbose(env, "adding tail call poke descriptor failed\n");
+					return ret;
+				}
+
+				insn->imm = ret + 1;
+				goto next_insn;
+			}
+
+			if (!bpf_map_ptr_unpriv(aux))
+				goto next_insn;
+
+			/* instead of changing every JIT dealing with tail_call
+			 * emit two extra insns:
+			 * if (index >= max_entries) goto out;
+			 * index &= array->index_mask;
+			 * to avoid out-of-bounds cpu speculation
+			 */
+			if (bpf_map_ptr_poisoned(aux)) {
+				verbose(env, "tail_call abusing map_ptr\n");
+				return -EINVAL;
+			}
+
+			map_ptr = aux->map_ptr_state.map_ptr;
+			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
+						  map_ptr->max_entries, 2);
+			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
+						    container_of(map_ptr,
+								 struct bpf_array,
+								 map)->index_mask);
+			insn_buf[2] = *insn;
+			cnt = 3;
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		if (insn->imm == BPF_FUNC_timer_set_callback) {
+			/* The verifier will process callback_fn as many times as necessary
+			 * with different maps and the register states prepared by
+			 * set_timer_callback_state will be accurate.
+			 *
+			 * The following use case is valid:
+			 *   map1 is shared by prog1, prog2, prog3.
+			 *   prog1 calls bpf_timer_init for some map1 elements
+			 *   prog2 calls bpf_timer_set_callback for some map1 elements.
+			 *     Those that were not bpf_timer_init-ed will return -EINVAL.
+			 *   prog3 calls bpf_timer_start for some map1 elements.
+			 *     Those that were not both bpf_timer_init-ed and
+			 *     bpf_timer_set_callback-ed will return -EINVAL.
+			 */
+			struct bpf_insn ld_addrs[2] = {
+				BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
+			};
+
+			insn_buf[0] = ld_addrs[0];
+			insn_buf[1] = ld_addrs[1];
+			insn_buf[2] = *insn;
+			cnt = 3;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto patch_call_imm;
+		}
+
+		/* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
+		if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
+			/* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
+			 * bpf_mem_alloc() returns a ptr to the percpu data ptr.
+			 */
+			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
+			insn_buf[1] = *insn;
+			cnt = 2;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta += cnt - 1;
+			env->prog = prog = new_prog;
+			insn = new_prog->insnsi + i + delta;
+			goto patch_call_imm;
+		}
+
+		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
+		 * and other inlining handlers are currently limited to 64 bit
+		 * only.
+		 */
+		if (prog->jit_requested && BITS_PER_LONG == 64 &&
+		    (insn->imm == BPF_FUNC_map_lookup_elem ||
+		     insn->imm == BPF_FUNC_map_update_elem ||
+		     insn->imm == BPF_FUNC_map_delete_elem ||
+		     insn->imm == BPF_FUNC_map_push_elem   ||
+		     insn->imm == BPF_FUNC_map_pop_elem    ||
+		     insn->imm == BPF_FUNC_map_peek_elem   ||
+		     insn->imm == BPF_FUNC_redirect_map    ||
+		     insn->imm == BPF_FUNC_for_each_map_elem ||
+		     insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
+			aux = &env->insn_aux_data[i + delta];
+			if (bpf_map_ptr_poisoned(aux))
+				goto patch_call_imm;
+
+			map_ptr = aux->map_ptr_state.map_ptr;
+			ops = map_ptr->ops;
+			if (insn->imm == BPF_FUNC_map_lookup_elem &&
+			    ops->map_gen_lookup) {
+				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
+				if (cnt == -EOPNOTSUPP)
+					goto patch_map_ops_generic;
+				if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
+					verifier_bug(env, "%d insns generated for map lookup", cnt);
+					return -EFAULT;
+				}
+
+				new_prog = bpf_patch_insn_data(env, i + delta,
+							       insn_buf, cnt);
+				if (!new_prog)
+					return -ENOMEM;
+
+				delta    += cnt - 1;
+				env->prog = prog = new_prog;
+				insn      = new_prog->insnsi + i + delta;
+				goto next_insn;
+			}
+
+			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
+				     (void *(*)(struct bpf_map *map, void *key))NULL));
+			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
+				     (long (*)(struct bpf_map *map, void *key))NULL));
+			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
+				     (long (*)(struct bpf_map *map, void *key, void *value,
+					      u64 flags))NULL));
+			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
+				     (long (*)(struct bpf_map *map, void *value,
+					      u64 flags))NULL));
+			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
+				     (long (*)(struct bpf_map *map, void *value))NULL));
+			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
+				     (long (*)(struct bpf_map *map, void *value))NULL));
+			BUILD_BUG_ON(!__same_type(ops->map_redirect,
+				     (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
+			BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
+				     (long (*)(struct bpf_map *map,
+					      bpf_callback_t callback_fn,
+					      void *callback_ctx,
+					      u64 flags))NULL));
+			BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
+				     (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
+
+patch_map_ops_generic:
+			switch (insn->imm) {
+			case BPF_FUNC_map_lookup_elem:
+				insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
+				goto next_insn;
+			case BPF_FUNC_map_update_elem:
+				insn->imm = BPF_CALL_IMM(ops->map_update_elem);
+				goto next_insn;
+			case BPF_FUNC_map_delete_elem:
+				insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
+				goto next_insn;
+			case BPF_FUNC_map_push_elem:
+				insn->imm = BPF_CALL_IMM(ops->map_push_elem);
+				goto next_insn;
+			case BPF_FUNC_map_pop_elem:
+				insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
+				goto next_insn;
+			case BPF_FUNC_map_peek_elem:
+				insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
+				goto next_insn;
+			case BPF_FUNC_redirect_map:
+				insn->imm = BPF_CALL_IMM(ops->map_redirect);
+				goto next_insn;
+			case BPF_FUNC_for_each_map_elem:
+				insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
+				goto next_insn;
+			case BPF_FUNC_map_lookup_percpu_elem:
+				insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
+				goto next_insn;
+			}
+
+			goto patch_call_imm;
+		}
+
+		/* Implement bpf_jiffies64 inline. */
+		if (prog->jit_requested && BITS_PER_LONG == 64 &&
+		    insn->imm == BPF_FUNC_jiffies64) {
+			struct bpf_insn ld_jiffies_addr[2] = {
+				BPF_LD_IMM64(BPF_REG_0,
+					     (unsigned long)&jiffies),
+			};
+
+			insn_buf[0] = ld_jiffies_addr[0];
+			insn_buf[1] = ld_jiffies_addr[1];
+			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
+						  BPF_REG_0, 0);
+			cnt = 3;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
+						       cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
+		/* Implement bpf_get_smp_processor_id() inline. */
+		if (insn->imm == BPF_FUNC_get_smp_processor_id &&
+		    bpf_verifier_inlines_helper_call(env, insn->imm)) {
+			/* BPF_FUNC_get_smp_processor_id inlining is an
+			 * optimization, so if cpu_number is ever
+			 * changed in some incompatible and hard to support
+			 * way, it's fine to back out this inlining logic
+			 */
+#ifdef CONFIG_SMP
+			insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number);
+			insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
+			insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
+			cnt = 3;
+#else
+			insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
+			cnt = 1;
+#endif
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		/* Implement bpf_get_current_task() and bpf_get_current_task_btf() inline. */
+		if ((insn->imm == BPF_FUNC_get_current_task || insn->imm == BPF_FUNC_get_current_task_btf) &&
+		    bpf_verifier_inlines_helper_call(env, insn->imm)) {
+			insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&current_task);
+			insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
+			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
+			cnt = 3;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+#endif
+		/* Implement bpf_get_func_arg inline. */
+		if (prog_type == BPF_PROG_TYPE_TRACING &&
+		    insn->imm == BPF_FUNC_get_func_arg) {
+			if (eatype == BPF_TRACE_RAW_TP) {
+				int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
+
+				/* skip 'void *__data' in btf_trace_##name() and save to reg0 */
+				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
+				cnt = 1;
+			} else {
+				/* Load nr_args from ctx - 8 */
+				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+				insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
+				cnt = 2;
+			}
+			insn_buf[cnt++] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
+			insn_buf[cnt++] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
+			insn_buf[cnt++] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
+			insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
+			insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
+			insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, 0);
+			insn_buf[cnt++] = BPF_JMP_A(1);
+			insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		/* Implement bpf_get_func_ret inline. */
+		if (prog_type == BPF_PROG_TYPE_TRACING &&
+		    insn->imm == BPF_FUNC_get_func_ret) {
+			if (eatype == BPF_TRACE_FEXIT ||
+			    eatype == BPF_TRACE_FSESSION ||
+			    eatype == BPF_MODIFY_RETURN) {
+				/* Load nr_args from ctx - 8 */
+				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+				insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
+				insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
+				insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
+				insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
+				insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
+				insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
+				cnt = 7;
+			} else {
+				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
+				cnt = 1;
+			}
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		/* Implement get_func_arg_cnt inline. */
+		if (prog_type == BPF_PROG_TYPE_TRACING &&
+		    insn->imm == BPF_FUNC_get_func_arg_cnt) {
+			if (eatype == BPF_TRACE_RAW_TP) {
+				int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
+
+				/* skip 'void *__data' in btf_trace_##name() and save to reg0 */
+				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
+				cnt = 1;
+			} else {
+				/* Load nr_args from ctx - 8 */
+				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+				insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
+				cnt = 2;
+			}
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		/* Implement bpf_get_func_ip inline. */
+		if (prog_type == BPF_PROG_TYPE_TRACING &&
+		    insn->imm == BPF_FUNC_get_func_ip) {
+			/* Load IP address from ctx - 16 */
+			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
+			if (!new_prog)
+				return -ENOMEM;
+
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		/* Implement bpf_get_branch_snapshot inline. */
+		if (IS_ENABLED(CONFIG_PERF_EVENTS) &&
+		    prog->jit_requested && BITS_PER_LONG == 64 &&
+		    insn->imm == BPF_FUNC_get_branch_snapshot) {
+			/* We are dealing with the following func protos:
+			 * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
+			 * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
+			 */
+			const u32 br_entry_size = sizeof(struct perf_branch_entry);
+
+			/* struct perf_branch_entry is part of UAPI and is
+			 * used as an array element, so extremely unlikely to
+			 * ever grow or shrink
+			 */
+			BUILD_BUG_ON(br_entry_size != 24);
+
+			/* if (unlikely(flags)) return -EINVAL */
+			insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7);
+
+			/* Transform size (bytes) into number of entries (cnt = size / 24).
+			 * But to avoid expensive division instruction, we implement
+			 * divide-by-3 through multiplication, followed by further
+			 * division by 8 through 3-bit right shift.
+			 * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr.,
+			 * p. 227, chapter "Unsigned Division by 3" for details and proofs.
+			 *
+			 * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab.
+			 */
+			insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab);
+			insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0);
+			insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36);
+
+			/* call perf_snapshot_branch_stack implementation */
+			insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
+			/* if (entry_cnt == 0) return -ENOENT */
+			insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
+			/* return entry_cnt * sizeof(struct perf_branch_entry) */
+			insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
+			insn_buf[7] = BPF_JMP_A(3);
+			/* return -EINVAL; */
+			insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
+			insn_buf[9] = BPF_JMP_A(1);
+			/* return -ENOENT; */
+			insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
+			cnt = 11;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		/* Implement bpf_kptr_xchg inline */
+		if (prog->jit_requested && BITS_PER_LONG == 64 &&
+		    insn->imm == BPF_FUNC_kptr_xchg &&
+		    bpf_jit_supports_ptr_xchg()) {
+			insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
+			insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
+			cnt = 2;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+patch_call_imm:
+		fn = env->ops->get_func_proto(insn->imm, env->prog);
+		/* all functions that have prototype and verifier allowed
+		 * programs to call them, must be real in-kernel functions
+		 */
+		if (!fn->func) {
+			verifier_bug(env,
+				     "not inlined functions %s#%d is missing func",
+				     func_id_name(insn->imm), insn->imm);
+			return -EFAULT;
+		}
+		insn->imm = fn->func - __bpf_call_base;
+next_insn:
+		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
+			subprogs[cur_subprog].stack_depth += stack_depth_extra;
+			subprogs[cur_subprog].stack_extra = stack_depth_extra;
+
+			stack_depth = subprogs[cur_subprog].stack_depth;
+			if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) {
+				verbose(env, "stack size %d(extra %d) is too large\n",
+					stack_depth, stack_depth_extra);
+				return -EINVAL;
+			}
+			cur_subprog++;
+			stack_depth = subprogs[cur_subprog].stack_depth;
+			stack_depth_extra = 0;
+		}
+		i++;
+		insn++;
+	}
+
+	env->prog->aux->stack_depth = subprogs[0].stack_depth;
+	for (i = 0; i < env->subprog_cnt; i++) {
+		int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1;
+		int subprog_start = subprogs[i].start;
+		int stack_slots = subprogs[i].stack_extra / 8;
+		int slots = delta, cnt = 0;
+
+		if (!stack_slots)
+			continue;
+		/* We need two slots in case timed may_goto is supported. */
+		if (stack_slots > slots) {
+			verifier_bug(env, "stack_slots supports may_goto only");
+			return -EFAULT;
+		}
+
+		stack_depth = subprogs[i].stack_depth;
+		if (bpf_jit_supports_timed_may_goto()) {
+			insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
+						     BPF_MAX_TIMED_LOOPS);
+			insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0);
+		} else {
+			/* Add ST insn to subprog prologue to init extra stack */
+			insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
+						     BPF_MAX_LOOPS);
+		}
+		/* Copy first actual insn to preserve it */
+		insn_buf[cnt++] = env->prog->insnsi[subprog_start];
+
+		new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt);
+		if (!new_prog)
+			return -ENOMEM;
+		env->prog = prog = new_prog;
+		/*
+		 * If may_goto is a first insn of a prog there could be a jmp
+		 * insn that points to it, hence adjust all such jmps to point
+		 * to insn after BPF_ST that inits may_goto count.
+		 * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
+		 */
+		WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta));
+	}
+
+	/* Since poke tab is now finalized, publish aux to tracker. */
+	for (i = 0; i < prog->aux->size_poke_tab; i++) {
+		map_ptr = prog->aux->poke_tab[i].tail_call.map;
+		if (!map_ptr->ops->map_poke_track ||
+		    !map_ptr->ops->map_poke_untrack ||
+		    !map_ptr->ops->map_poke_run) {
+			verifier_bug(env, "poke tab is misconfigured");
+			return -EFAULT;
+		}
+
+		ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
+		if (ret < 0) {
+			verbose(env, "tracking tail call prog failed\n");
+			return ret;
+		}
+	}
+
+	ret = sort_kfunc_descs_by_imm_off(env);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
+					int position,
+					s32 stack_base,
+					u32 callback_subprogno,
+					u32 *total_cnt)
+{
+	s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
+	s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
+	s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
+	int reg_loop_max = BPF_REG_6;
+	int reg_loop_cnt = BPF_REG_7;
+	int reg_loop_ctx = BPF_REG_8;
+
+	struct bpf_insn *insn_buf = env->insn_buf;
+	struct bpf_prog *new_prog;
+	u32 callback_start;
+	u32 call_insn_offset;
+	s32 callback_offset;
+	u32 cnt = 0;
+
+	/* This represents an inlined version of bpf_iter.c:bpf_loop,
+	 * be careful to modify this code in sync.
+	 */
+
+	/* Return error and jump to the end of the patch if
+	 * expected number of iterations is too big.
+	 */
+	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
+	insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
+	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
+	/* spill R6, R7, R8 to use these as loop vars */
+	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
+	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
+	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
+	/* initialize loop vars */
+	insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
+	insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
+	insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
+	/* loop header,
+	 * if reg_loop_cnt >= reg_loop_max skip the loop body
+	 */
+	insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
+	/* callback call,
+	 * correct callback offset would be set after patching
+	 */
+	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
+	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
+	insn_buf[cnt++] = BPF_CALL_REL(0);
+	/* increment loop counter */
+	insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
+	/* jump to loop header if callback returned 0 */
+	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
+	/* return value of bpf_loop,
+	 * set R0 to the number of iterations
+	 */
+	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
+	/* restore original values of R6, R7, R8 */
+	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
+	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
+	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
+
+	*total_cnt = cnt;
+	new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
+	if (!new_prog)
+		return new_prog;
+
+	/* callback start is known only after patching */
+	callback_start = env->subprog_info[callback_subprogno].start;
+	/* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
+	call_insn_offset = position + 12;
+	callback_offset = callback_start - call_insn_offset - 1;
+	new_prog->insnsi[call_insn_offset].imm = callback_offset;
+
+	return new_prog;
+}
+
+static bool is_bpf_loop_call(struct bpf_insn *insn)
+{
+	return insn->code == (BPF_JMP | BPF_CALL) &&
+		insn->src_reg == 0 &&
+		insn->imm == BPF_FUNC_loop;
+}
+
+/* For all sub-programs in the program (including main) check
+ * insn_aux_data to see if there are bpf_loop calls that require
+ * inlining. If such calls are found the calls are replaced with a
+ * sequence of instructions produced by `inline_bpf_loop` function and
+ * subprog stack_depth is increased by the size of 3 registers.
+ * This stack space is used to spill values of the R6, R7, R8.  These
+ * registers are used to store the loop bound, counter and context
+ * variables.
+ */
+int bpf_optimize_bpf_loop(struct bpf_verifier_env *env)
+{
+	struct bpf_subprog_info *subprogs = env->subprog_info;
+	int i, cur_subprog = 0, cnt, delta = 0;
+	struct bpf_insn *insn = env->prog->insnsi;
+	int insn_cnt = env->prog->len;
+	u16 stack_depth = subprogs[cur_subprog].stack_depth;
+	u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+	u16 stack_depth_extra = 0;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		struct bpf_loop_inline_state *inline_state =
+			&env->insn_aux_data[i + delta].loop_inline_state;
+
+		if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
+			struct bpf_prog *new_prog;
+
+			stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
+			new_prog = inline_bpf_loop(env,
+						   i + delta,
+						   -(stack_depth + stack_depth_extra),
+						   inline_state->callback_subprogno,
+						   &cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta     += cnt - 1;
+			env->prog  = new_prog;
+			insn       = new_prog->insnsi + i + delta;
+		}
+
+		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
+			subprogs[cur_subprog].stack_depth += stack_depth_extra;
+			cur_subprog++;
+			stack_depth = subprogs[cur_subprog].stack_depth;
+			stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+			stack_depth_extra = 0;
+		}
+	}
+
+	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
+
+	return 0;
+}
+
+/* Remove unnecessary spill/fill pairs, members of fastcall pattern,
+ * adjust subprograms stack depth when possible.
+ */
+int bpf_remove_fastcall_spills_fills(struct bpf_verifier_env *env)
+{
+	struct bpf_subprog_info *subprog = env->subprog_info;
+	struct bpf_insn_aux_data *aux = env->insn_aux_data;
+	struct bpf_insn *insn = env->prog->insnsi;
+	int insn_cnt = env->prog->len;
+	u32 spills_num;
+	bool modified = false;
+	int i, j;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (aux[i].fastcall_spills_num > 0) {
+			spills_num = aux[i].fastcall_spills_num;
+			/* NOPs would be removed by opt_remove_nops() */
+			for (j = 1; j <= spills_num; ++j) {
+				*(insn - j) = NOP;
+				*(insn + j) = NOP;
+			}
+			modified = true;
+		}
+		if ((subprog + 1)->start == i + 1) {
+			if (modified && !subprog->keep_fastcall_stack)
+				subprog->stack_depth = -subprog->fastcall_stack_off;
+			subprog++;
+			modified = false;
+		}
+	}
+
+	return 0;
+}
+
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 967e132f2662..31e03aa6b070 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -195,9 +195,6 @@ struct bpf_verifier_stack_elem {
 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
 #define BPF_COMPLEXITY_LIMIT_STATES	64
 
-#define BPF_MAP_KEY_POISON	(1ULL << 63)
-#define BPF_MAP_KEY_SEEN	(1ULL << 62)
-
 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE  512
 
 #define BPF_PRIV_STACK_MIN_SIZE		64
@@ -215,16 +212,6 @@ static const char *non_sleepable_context_description(struct bpf_verifier_env *en
 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
 static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
 
-static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
-{
-	return aux->map_ptr_state.poison;
-}
-
-static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
-{
-	return aux->map_ptr_state.unpriv;
-}
-
 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
 			      struct bpf_map *map,
 			      bool unpriv, bool poison)
@@ -235,21 +222,6 @@ static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
 	aux->map_ptr_state.map_ptr = map;
 }
 
-static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
-{
-	return aux->map_key_state & BPF_MAP_KEY_POISON;
-}
-
-static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
-{
-	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
-}
-
-static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
-{
-	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
-}
-
 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
 {
 	bool poisoned = bpf_map_key_poisoned(aux);
@@ -464,7 +436,7 @@ static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
 	return btf_type_name(env->prog->aux->btf, info->type_id);
 }
 
-static void mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
+void bpf_mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
 {
 	struct bpf_subprog_info *info = subprog_info(env, subprog);
 
@@ -604,13 +576,6 @@ static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
 	return ref_obj_uses > 1;
 }
 
-static bool is_cmpxchg_insn(const struct bpf_insn *insn)
-{
-	return BPF_CLASS(insn->code) == BPF_STX &&
-	       BPF_MODE(insn->code) == BPF_ATOMIC &&
-	       insn->imm == BPF_CMPXCHG;
-}
-
 static bool is_atomic_load_insn(const struct bpf_insn *insn)
 {
 	return BPF_CLASS(insn->code) == BPF_STX &&
@@ -3062,12 +3027,6 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
 }
 
 
-enum reg_arg_type {
-	SRC_OP,		/* register is used as source operand */
-	DST_OP,		/* register is used as destination operand */
-	DST_OP_NO_MARK	/* same as above, check only, don't mark */
-};
-
 static int cmp_subprogs(const void *a, const void *b)
 {
 	return ((struct bpf_subprog_info *)a)->start -
@@ -3191,41 +3150,19 @@ static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
 	return ret;
 }
 
-#define MAX_KFUNC_DESCS 256
 #define MAX_KFUNC_BTFS	256
 
-struct bpf_kfunc_desc {
-	struct btf_func_model func_model;
-	u32 func_id;
-	s32 imm;
-	u16 offset;
-	unsigned long addr;
-};
-
 struct bpf_kfunc_btf {
 	struct btf *btf;
 	struct module *module;
 	u16 offset;
 };
 
-struct bpf_kfunc_desc_tab {
-	/* Sorted by func_id (BTF ID) and offset (fd_array offset) during
-	 * verification. JITs do lookups by bpf_insn, where func_id may not be
-	 * available, therefore at the end of verification do_misc_fixups()
-	 * sorts this by imm and offset.
-	 */
-	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
-	u32 nr_descs;
-};
-
 struct bpf_kfunc_btf_tab {
 	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
 	u32 nr_descs;
 };
 
-static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc,
-			    int insn_idx);
-
 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
 {
 	const struct bpf_kfunc_desc *d0 = a;
@@ -3453,7 +3390,7 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env,
 	return 0;
 }
 
-static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
+int bpf_add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, u16 offset)
 {
 	struct bpf_kfunc_btf_tab *btf_tab;
 	struct btf_func_model func_model;
@@ -3548,95 +3485,11 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
 	return 0;
 }
 
-static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
-{
-	const struct bpf_kfunc_desc *d0 = a;
-	const struct bpf_kfunc_desc *d1 = b;
-
-	if (d0->imm != d1->imm)
-		return d0->imm < d1->imm ? -1 : 1;
-	if (d0->offset != d1->offset)
-		return d0->offset < d1->offset ? -1 : 1;
-	return 0;
-}
-
-static int set_kfunc_desc_imm(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc)
-{
-	unsigned long call_imm;
-
-	if (bpf_jit_supports_far_kfunc_call()) {
-		call_imm = desc->func_id;
-	} else {
-		call_imm = BPF_CALL_IMM(desc->addr);
-		/* Check whether the relative offset overflows desc->imm */
-		if ((unsigned long)(s32)call_imm != call_imm) {
-			verbose(env, "address of kernel func_id %u is out of range\n",
-				desc->func_id);
-			return -EINVAL;
-		}
-	}
-	desc->imm = call_imm;
-	return 0;
-}
-
-static int sort_kfunc_descs_by_imm_off(struct bpf_verifier_env *env)
-{
-	struct bpf_kfunc_desc_tab *tab;
-	int i, err;
-
-	tab = env->prog->aux->kfunc_tab;
-	if (!tab)
-		return 0;
-
-	for (i = 0; i < tab->nr_descs; i++) {
-		err = set_kfunc_desc_imm(env, &tab->descs[i]);
-		if (err)
-			return err;
-	}
-
-	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
-	     kfunc_desc_cmp_by_imm_off, NULL);
-	return 0;
-}
-
 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
 {
 	return !!prog->aux->kfunc_tab;
 }
 
-const struct btf_func_model *
-bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
-			 const struct bpf_insn *insn)
-{
-	const struct bpf_kfunc_desc desc = {
-		.imm = insn->imm,
-		.offset = insn->off,
-	};
-	const struct bpf_kfunc_desc *res;
-	struct bpf_kfunc_desc_tab *tab;
-
-	tab = prog->aux->kfunc_tab;
-	res = bsearch(&desc, tab->descs, tab->nr_descs,
-		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
-
-	return res ? &res->func_model : NULL;
-}
-
-static int add_kfunc_in_insns(struct bpf_verifier_env *env,
-			      struct bpf_insn *insn, int cnt)
-{
-	int i, ret;
-
-	for (i = 0; i < cnt; i++, insn++) {
-		if (bpf_pseudo_kfunc_call(insn)) {
-			ret = add_kfunc_call(env, insn->imm, insn->off);
-			if (ret < 0)
-				return ret;
-		}
-	}
-	return 0;
-}
-
 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
 {
 	struct bpf_subprog_info *subprog = env->subprog_info;
@@ -3661,7 +3514,7 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
 		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
 			ret = add_subprog(env, i + insn->imm + 1);
 		else
-			ret = add_kfunc_call(env, insn->imm, insn->off);
+			ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
 
 		if (ret < 0)
 			return ret;
@@ -3683,7 +3536,7 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
 			if (env->subprog_info[i].start != ex_cb_insn)
 				continue;
 			env->exception_callback_subprog = i;
-			mark_subprog_exc_cb(env, i);
+			bpf_mark_subprog_exc_cb(env, i);
 			break;
 		}
 	}
@@ -3894,8 +3747,8 @@ static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state
  * code only. It returns TRUE if the source or destination register operates
  * on 64-bit, otherwise return FALSE.
  */
-static bool is_reg64(struct bpf_insn *insn,
-		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
+bool bpf_is_reg64(struct bpf_insn *insn,
+	      u32 regno, struct bpf_reg_state *reg, enum bpf_reg_arg_type t)
 {
 	u8 code, class, op;
 
@@ -3980,41 +3833,6 @@ static bool is_reg64(struct bpf_insn *insn,
 	return true;
 }
 
-/* Return the regno defined by the insn, or -1. */
-static int insn_def_regno(const struct bpf_insn *insn)
-{
-	switch (BPF_CLASS(insn->code)) {
-	case BPF_JMP:
-	case BPF_JMP32:
-	case BPF_ST:
-		return -1;
-	case BPF_STX:
-		if (BPF_MODE(insn->code) == BPF_ATOMIC ||
-		    BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
-			if (insn->imm == BPF_CMPXCHG)
-				return BPF_REG_0;
-			else if (insn->imm == BPF_LOAD_ACQ)
-				return insn->dst_reg;
-			else if (insn->imm & BPF_FETCH)
-				return insn->src_reg;
-		}
-		return -1;
-	default:
-		return insn->dst_reg;
-	}
-}
-
-/* Return TRUE if INSN has defined any 32-bit value explicitly. */
-static bool insn_has_def32(struct bpf_insn *insn)
-{
-	int dst_reg = insn_def_regno(insn);
-
-	if (dst_reg == -1)
-		return false;
-
-	return !is_reg64(insn, dst_reg, NULL, DST_OP);
-}
-
 static void mark_insn_zext(struct bpf_verifier_env *env,
 			   struct bpf_reg_state *reg)
 {
@@ -4029,7 +3847,7 @@ static void mark_insn_zext(struct bpf_verifier_env *env,
 }
 
 static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
-			   enum reg_arg_type t)
+			   enum bpf_reg_arg_type t)
 {
 	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
 	struct bpf_reg_state *reg;
@@ -4038,7 +3856,7 @@ static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *r
 	mark_reg_scratched(env, regno);
 
 	reg = &regs[regno];
-	rw64 = is_reg64(insn, regno, reg, t);
+	rw64 = bpf_is_reg64(insn, regno, reg, t);
 	if (t == SRC_OP) {
 		/* check whether register used as source operand can be read */
 		if (reg->type == NOT_INIT) {
@@ -4067,7 +3885,7 @@ static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *r
 }
 
 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
-			 enum reg_arg_type t)
+			 enum bpf_reg_arg_type t)
 {
 	struct bpf_verifier_state *vstate = env->cur_state;
 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
@@ -6407,11 +6225,9 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 	return 0;
 }
 
-#define MAX_PACKET_OFF 0xffff
-
 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
-				       const struct bpf_call_arg_meta *meta,
-				       enum bpf_access_type t)
+			       const struct bpf_call_arg_meta *meta,
+			       enum bpf_access_type t)
 {
 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
 
@@ -7103,19 +6919,6 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
 	return 0;
 }
 
-#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-static int get_callee_stack_depth(struct bpf_verifier_env *env,
-				  const struct bpf_insn *insn, int idx)
-{
-	int start = idx + insn->imm + 1, subprog;
-
-	subprog = bpf_find_subprog(env, start);
-	if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start))
-		return -EFAULT;
-	return env->subprog_info[subprog].stack_depth;
-}
-#endif
-
 static int __check_buffer_access(struct bpf_verifier_env *env,
 				 const char *buf_info,
 				 const struct bpf_reg_state *reg,
@@ -10351,7 +10154,7 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
 	return false;
 }
 
-static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
+bool bpf_allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
 {
 	return env->prog->jit_requested &&
 	       bpf_jit_supports_subprog_tailcalls();
@@ -10496,7 +10299,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 	case BPF_FUNC_tail_call:
 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
 			goto error;
-		if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
+		if (env->subprog_cnt > 1 && !bpf_allow_tail_call_in_subprogs(env)) {
 			verbose(env, "mixing of tail_calls and bpf-to-bpf calls is not supported\n");
 			return -EINVAL;
 		}
@@ -18733,7 +18536,7 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
  * replacement patch is presumed to follow bpf_fastcall contract
  * (see mark_fastcall_pattern_for_call() below).
  */
-static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
+bool bpf_verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
 {
 	switch (imm) {
 #ifdef CONFIG_X86_64
@@ -18765,7 +18568,7 @@ bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
 			/* error would be reported later */
 			return false;
 		cs->fastcall = fn->allow_fastcall &&
-			       (verifier_inlines_helper_call(env, call->imm) ||
+			       (bpf_verifier_inlines_helper_call(env, call->imm) ||
 				bpf_jit_inlines_helper_call(call->imm));
 		cs->is_void = fn->ret_type == RET_VOID;
 		cs->num_params = 0;
@@ -22555,53 +22358,6 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
 	}
 }
 
-/* single env->prog->insni[off] instruction was replaced with the range
- * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
- * [0, off) and [off, end) to new locations, so the patched range stays zero
- */
-static void adjust_insn_aux_data(struct bpf_verifier_env *env,
-				 struct bpf_prog *new_prog, u32 off, u32 cnt)
-{
-	struct bpf_insn_aux_data *data = env->insn_aux_data;
-	struct bpf_insn *insn = new_prog->insnsi;
-	u32 old_seen = data[off].seen;
-	u32 prog_len;
-	int i;
-
-	/* aux info at OFF always needs adjustment, no matter fast path
-	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
-	 * original insn at old prog.
-	 */
-	data[off].zext_dst = insn_has_def32(insn + off + cnt - 1);
-
-	if (cnt == 1)
-		return;
-	prog_len = new_prog->len;
-
-	memmove(data + off + cnt - 1, data + off,
-		sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
-	memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1));
-	for (i = off; i < off + cnt - 1; i++) {
-		/* Expand insni[off]'s seen count to the patched range. */
-		data[i].seen = old_seen;
-		data[i].zext_dst = insn_has_def32(insn + i);
-	}
-}
-
-static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
-{
-	int i;
-
-	if (len == 1)
-		return;
-	/* NOTE: fake 'exit' subprog should be updated as well. */
-	for (i = 0; i <= env->subprog_cnt; i++) {
-		if (env->subprog_info[i].start <= off)
-			continue;
-		env->subprog_info[i].start += len - 1;
-	}
-}
-
 static void release_insn_arrays(struct bpf_verifier_env *env)
 {
 	int i;
@@ -22610,281 +22366,7 @@ static void release_insn_arrays(struct bpf_verifier_env *env)
 		bpf_insn_array_release(env->insn_array_maps[i]);
 }
 
-static void adjust_insn_arrays(struct bpf_verifier_env *env, u32 off, u32 len)
-{
-	int i;
-
-	if (len == 1)
-		return;
-
-	for (i = 0; i < env->insn_array_map_cnt; i++)
-		bpf_insn_array_adjust(env->insn_array_maps[i], off, len);
-}
-
-static void adjust_insn_arrays_after_remove(struct bpf_verifier_env *env, u32 off, u32 len)
-{
-	int i;
-
-	for (i = 0; i < env->insn_array_map_cnt; i++)
-		bpf_insn_array_adjust_after_remove(env->insn_array_maps[i], off, len);
-}
-
-static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
-{
-	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
-	int i, sz = prog->aux->size_poke_tab;
-	struct bpf_jit_poke_descriptor *desc;
-
-	for (i = 0; i < sz; i++) {
-		desc = &tab[i];
-		if (desc->insn_idx <= off)
-			continue;
-		desc->insn_idx += len - 1;
-	}
-}
-
-static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
-					    const struct bpf_insn *patch, u32 len)
-{
-	struct bpf_prog *new_prog;
-	struct bpf_insn_aux_data *new_data = NULL;
-
-	if (len > 1) {
-		new_data = vrealloc(env->insn_aux_data,
-				    array_size(env->prog->len + len - 1,
-					       sizeof(struct bpf_insn_aux_data)),
-				    GFP_KERNEL_ACCOUNT | __GFP_ZERO);
-		if (!new_data)
-			return NULL;
-
-		env->insn_aux_data = new_data;
-	}
-
-	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
-	if (IS_ERR(new_prog)) {
-		if (PTR_ERR(new_prog) == -ERANGE)
-			verbose(env,
-				"insn %d cannot be patched due to 16-bit range\n",
-				env->insn_aux_data[off].orig_idx);
-		return NULL;
-	}
-	adjust_insn_aux_data(env, new_prog, off, len);
-	adjust_subprog_starts(env, off, len);
-	adjust_insn_arrays(env, off, len);
-	adjust_poke_descs(new_prog, off, len);
-	return new_prog;
-}
-
-/*
- * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
- * jump offset by 'delta'.
- */
-static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
-{
-	struct bpf_insn *insn = prog->insnsi;
-	u32 insn_cnt = prog->len, i;
-	s32 imm;
-	s16 off;
-
-	for (i = 0; i < insn_cnt; i++, insn++) {
-		u8 code = insn->code;
-
-		if (tgt_idx <= i && i < tgt_idx + delta)
-			continue;
-
-		if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
-		    BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
-			continue;
-
-		if (insn->code == (BPF_JMP32 | BPF_JA)) {
-			if (i + 1 + insn->imm != tgt_idx)
-				continue;
-			if (check_add_overflow(insn->imm, delta, &imm))
-				return -ERANGE;
-			insn->imm = imm;
-		} else {
-			if (i + 1 + insn->off != tgt_idx)
-				continue;
-			if (check_add_overflow(insn->off, delta, &off))
-				return -ERANGE;
-			insn->off = off;
-		}
-	}
-	return 0;
-}
-
-static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
-					      u32 off, u32 cnt)
-{
-	int i, j;
-
-	/* find first prog starting at or after off (first to remove) */
-	for (i = 0; i < env->subprog_cnt; i++)
-		if (env->subprog_info[i].start >= off)
-			break;
-	/* find first prog starting at or after off + cnt (first to stay) */
-	for (j = i; j < env->subprog_cnt; j++)
-		if (env->subprog_info[j].start >= off + cnt)
-			break;
-	/* if j doesn't start exactly at off + cnt, we are just removing
-	 * the front of previous prog
-	 */
-	if (env->subprog_info[j].start != off + cnt)
-		j--;
-
-	if (j > i) {
-		struct bpf_prog_aux *aux = env->prog->aux;
-		int move;
-
-		/* move fake 'exit' subprog as well */
-		move = env->subprog_cnt + 1 - j;
-
-		memmove(env->subprog_info + i,
-			env->subprog_info + j,
-			sizeof(*env->subprog_info) * move);
-		env->subprog_cnt -= j - i;
-
-		/* remove func_info */
-		if (aux->func_info) {
-			move = aux->func_info_cnt - j;
-
-			memmove(aux->func_info + i,
-				aux->func_info + j,
-				sizeof(*aux->func_info) * move);
-			aux->func_info_cnt -= j - i;
-			/* func_info->insn_off is set after all code rewrites,
-			 * in adjust_btf_func() - no need to adjust
-			 */
-		}
-	} else {
-		/* convert i from "first prog to remove" to "first to adjust" */
-		if (env->subprog_info[i].start == off)
-			i++;
-	}
-
-	/* update fake 'exit' subprog as well */
-	for (; i <= env->subprog_cnt; i++)
-		env->subprog_info[i].start -= cnt;
-
-	return 0;
-}
-
-static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
-				      u32 cnt)
-{
-	struct bpf_prog *prog = env->prog;
-	u32 i, l_off, l_cnt, nr_linfo;
-	struct bpf_line_info *linfo;
-
-	nr_linfo = prog->aux->nr_linfo;
-	if (!nr_linfo)
-		return 0;
-
-	linfo = prog->aux->linfo;
-
-	/* find first line info to remove, count lines to be removed */
-	for (i = 0; i < nr_linfo; i++)
-		if (linfo[i].insn_off >= off)
-			break;
-
-	l_off = i;
-	l_cnt = 0;
-	for (; i < nr_linfo; i++)
-		if (linfo[i].insn_off < off + cnt)
-			l_cnt++;
-		else
-			break;
-
-	/* First live insn doesn't match first live linfo, it needs to "inherit"
-	 * last removed linfo.  prog is already modified, so prog->len == off
-	 * means no live instructions after (tail of the program was removed).
-	 */
-	if (prog->len != off && l_cnt &&
-	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
-		l_cnt--;
-		linfo[--i].insn_off = off + cnt;
-	}
-
-	/* remove the line info which refer to the removed instructions */
-	if (l_cnt) {
-		memmove(linfo + l_off, linfo + i,
-			sizeof(*linfo) * (nr_linfo - i));
-
-		prog->aux->nr_linfo -= l_cnt;
-		nr_linfo = prog->aux->nr_linfo;
-	}
-
-	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
-	for (i = l_off; i < nr_linfo; i++)
-		linfo[i].insn_off -= cnt;
-
-	/* fix up all subprogs (incl. 'exit') which start >= off */
-	for (i = 0; i <= env->subprog_cnt; i++)
-		if (env->subprog_info[i].linfo_idx > l_off) {
-			/* program may have started in the removed region but
-			 * may not be fully removed
-			 */
-			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
-				env->subprog_info[i].linfo_idx -= l_cnt;
-			else
-				env->subprog_info[i].linfo_idx = l_off;
-		}
-
-	return 0;
-}
-
-/*
- * Clean up dynamically allocated fields of aux data for instructions [start, ...]
- */
-static void clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len)
-{
-	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
-	struct bpf_insn *insns = env->prog->insnsi;
-	int end = start + len;
-	int i;
-
-	for (i = start; i < end; i++) {
-		if (aux_data[i].jt) {
-			kvfree(aux_data[i].jt);
-			aux_data[i].jt = NULL;
-		}
-
-		if (bpf_is_ldimm64(&insns[i]))
-			i++;
-	}
-}
-
-static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
-{
-	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
-	unsigned int orig_prog_len = env->prog->len;
-	int err;
-
-	if (bpf_prog_is_offloaded(env->prog->aux))
-		bpf_prog_offload_remove_insns(env, off, cnt);
-
-	/* Should be called before bpf_remove_insns, as it uses prog->insnsi */
-	clear_insn_aux_data(env, off, cnt);
-
-	err = bpf_remove_insns(env->prog, off, cnt);
-	if (err)
-		return err;
-
-	err = adjust_subprog_starts_after_remove(env, off, cnt);
-	if (err)
-		return err;
-
-	err = bpf_adj_linfo_after_remove(env, off, cnt);
-	if (err)
-		return err;
 
-	adjust_insn_arrays_after_remove(env, off, cnt);
-
-	memmove(aux_data + off,	aux_data + off + cnt,
-		sizeof(*aux_data) * (orig_prog_len - off - cnt));
-
-	return 0;
-}
 
 /* The verifier does more data flow analysis than llvm and will not
  * explore branches that are dead at run time. Malicious programs can
@@ -22913,2189 +22395,7 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
 	}
 }
 
-bool bpf_insn_is_cond_jump(u8 code)
-{
-	u8 op;
-
-	op = BPF_OP(code);
-	if (BPF_CLASS(code) == BPF_JMP32)
-		return op != BPF_JA;
 
-	if (BPF_CLASS(code) != BPF_JMP)
-		return false;
-
-	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
-}
-
-static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
-{
-	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
-	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
-	struct bpf_insn *insn = env->prog->insnsi;
-	const int insn_cnt = env->prog->len;
-	int i;
-
-	for (i = 0; i < insn_cnt; i++, insn++) {
-		if (!bpf_insn_is_cond_jump(insn->code))
-			continue;
-
-		if (!aux_data[i + 1].seen)
-			ja.off = insn->off;
-		else if (!aux_data[i + 1 + insn->off].seen)
-			ja.off = 0;
-		else
-			continue;
-
-		if (bpf_prog_is_offloaded(env->prog->aux))
-			bpf_prog_offload_replace_insn(env, i, &ja);
-
-		memcpy(insn, &ja, sizeof(ja));
-	}
-}
-
-static int opt_remove_dead_code(struct bpf_verifier_env *env)
-{
-	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
-	int insn_cnt = env->prog->len;
-	int i, err;
-
-	for (i = 0; i < insn_cnt; i++) {
-		int j;
-
-		j = 0;
-		while (i + j < insn_cnt && !aux_data[i + j].seen)
-			j++;
-		if (!j)
-			continue;
-
-		err = verifier_remove_insns(env, i, j);
-		if (err)
-			return err;
-		insn_cnt = env->prog->len;
-	}
-
-	return 0;
-}
-
-static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
-static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0);
-
-static int opt_remove_nops(struct bpf_verifier_env *env)
-{
-	struct bpf_insn *insn = env->prog->insnsi;
-	int insn_cnt = env->prog->len;
-	bool is_may_goto_0, is_ja;
-	int i, err;
-
-	for (i = 0; i < insn_cnt; i++) {
-		is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0));
-		is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP));
-
-		if (!is_may_goto_0 && !is_ja)
-			continue;
-
-		err = verifier_remove_insns(env, i, 1);
-		if (err)
-			return err;
-		insn_cnt--;
-		/* Go back one insn to catch may_goto +1; may_goto +0 sequence */
-		i -= (is_may_goto_0 && i > 0) ? 2 : 1;
-	}
-
-	return 0;
-}
-
-static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
-					 const union bpf_attr *attr)
-{
-	struct bpf_insn *patch;
-	/* use env->insn_buf as two independent buffers */
-	struct bpf_insn *zext_patch = env->insn_buf;
-	struct bpf_insn *rnd_hi32_patch = &env->insn_buf[2];
-	struct bpf_insn_aux_data *aux = env->insn_aux_data;
-	int i, patch_len, delta = 0, len = env->prog->len;
-	struct bpf_insn *insns = env->prog->insnsi;
-	struct bpf_prog *new_prog;
-	bool rnd_hi32;
-
-	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
-	zext_patch[1] = BPF_ZEXT_REG(0);
-	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
-	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
-	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
-	for (i = 0; i < len; i++) {
-		int adj_idx = i + delta;
-		struct bpf_insn insn;
-		int load_reg;
-
-		insn = insns[adj_idx];
-		load_reg = insn_def_regno(&insn);
-		if (!aux[adj_idx].zext_dst) {
-			u8 code, class;
-			u32 imm_rnd;
-
-			if (!rnd_hi32)
-				continue;
-
-			code = insn.code;
-			class = BPF_CLASS(code);
-			if (load_reg == -1)
-				continue;
-
-			/* NOTE: arg "reg" (the fourth one) is only used for
-			 *       BPF_STX + SRC_OP, so it is safe to pass NULL
-			 *       here.
-			 */
-			if (is_reg64(&insn, load_reg, NULL, DST_OP)) {
-				if (class == BPF_LD &&
-				    BPF_MODE(code) == BPF_IMM)
-					i++;
-				continue;
-			}
-
-			/* ctx load could be transformed into wider load. */
-			if (class == BPF_LDX &&
-			    aux[adj_idx].ptr_type == PTR_TO_CTX)
-				continue;
-
-			imm_rnd = get_random_u32();
-			rnd_hi32_patch[0] = insn;
-			rnd_hi32_patch[1].imm = imm_rnd;
-			rnd_hi32_patch[3].dst_reg = load_reg;
-			patch = rnd_hi32_patch;
-			patch_len = 4;
-			goto apply_patch_buffer;
-		}
-
-		/* Add in an zero-extend instruction if a) the JIT has requested
-		 * it or b) it's a CMPXCHG.
-		 *
-		 * The latter is because: BPF_CMPXCHG always loads a value into
-		 * R0, therefore always zero-extends. However some archs'
-		 * equivalent instruction only does this load when the
-		 * comparison is successful. This detail of CMPXCHG is
-		 * orthogonal to the general zero-extension behaviour of the
-		 * CPU, so it's treated independently of bpf_jit_needs_zext.
-		 */
-		if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
-			continue;
-
-		/* Zero-extension is done by the caller. */
-		if (bpf_pseudo_kfunc_call(&insn))
-			continue;
-
-		if (verifier_bug_if(load_reg == -1, env,
-				    "zext_dst is set, but no reg is defined"))
-			return -EFAULT;
-
-		zext_patch[0] = insn;
-		zext_patch[1].dst_reg = load_reg;
-		zext_patch[1].src_reg = load_reg;
-		patch = zext_patch;
-		patch_len = 2;
-apply_patch_buffer:
-		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
-		if (!new_prog)
-			return -ENOMEM;
-		env->prog = new_prog;
-		insns = new_prog->insnsi;
-		aux = env->insn_aux_data;
-		delta += patch_len - 1;
-	}
-
-	return 0;
-}
-
-/* convert load instructions that access fields of a context type into a
- * sequence of instructions that access fields of the underlying structure:
- *     struct __sk_buff    -> struct sk_buff
- *     struct bpf_sock_ops -> struct sock
- */
-static int convert_ctx_accesses(struct bpf_verifier_env *env)
-{
-	struct bpf_subprog_info *subprogs = env->subprog_info;
-	const struct bpf_verifier_ops *ops = env->ops;
-	int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0;
-	const int insn_cnt = env->prog->len;
-	struct bpf_insn *epilogue_buf = env->epilogue_buf;
-	struct bpf_insn *insn_buf = env->insn_buf;
-	struct bpf_insn *insn;
-	u32 target_size, size_default, off;
-	struct bpf_prog *new_prog;
-	enum bpf_access_type type;
-	bool is_narrower_load;
-	int epilogue_idx = 0;
-
-	if (ops->gen_epilogue) {
-		epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
-						 -(subprogs[0].stack_depth + 8));
-		if (epilogue_cnt >= INSN_BUF_SIZE) {
-			verifier_bug(env, "epilogue is too long");
-			return -EFAULT;
-		} else if (epilogue_cnt) {
-			/* Save the ARG_PTR_TO_CTX for the epilogue to use */
-			cnt = 0;
-			subprogs[0].stack_depth += 8;
-			insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
-						      -subprogs[0].stack_depth);
-			insn_buf[cnt++] = env->prog->insnsi[0];
-			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-			env->prog = new_prog;
-			delta += cnt - 1;
-
-			ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1);
-			if (ret < 0)
-				return ret;
-		}
-	}
-
-	if (ops->gen_prologue || env->seen_direct_write) {
-		if (!ops->gen_prologue) {
-			verifier_bug(env, "gen_prologue is null");
-			return -EFAULT;
-		}
-		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
-					env->prog);
-		if (cnt >= INSN_BUF_SIZE) {
-			verifier_bug(env, "prologue is too long");
-			return -EFAULT;
-		} else if (cnt) {
-			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			env->prog = new_prog;
-			delta += cnt - 1;
-
-			ret = add_kfunc_in_insns(env, insn_buf, cnt - 1);
-			if (ret < 0)
-				return ret;
-		}
-	}
-
-	if (delta)
-		WARN_ON(adjust_jmp_off(env->prog, 0, delta));
-
-	if (bpf_prog_is_offloaded(env->prog->aux))
-		return 0;
-
-	insn = env->prog->insnsi + delta;
-
-	for (i = 0; i < insn_cnt; i++, insn++) {
-		bpf_convert_ctx_access_t convert_ctx_access;
-		u8 mode;
-
-		if (env->insn_aux_data[i + delta].nospec) {
-			WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state);
-			struct bpf_insn *patch = insn_buf;
-
-			*patch++ = BPF_ST_NOSPEC();
-			*patch++ = *insn;
-			cnt = patch - insn_buf;
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			/* This can not be easily merged with the
-			 * nospec_result-case, because an insn may require a
-			 * nospec before and after itself. Therefore also do not
-			 * 'continue' here but potentially apply further
-			 * patching to insn. *insn should equal patch[1] now.
-			 */
-		}
-
-		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
-		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
-		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
-		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
-		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
-		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
-		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
-			type = BPF_READ;
-		} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
-			   insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
-			   insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
-			   insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
-			   insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
-			   insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
-			   insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
-			   insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
-			type = BPF_WRITE;
-		} else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
-			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
-			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
-			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
-			   env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
-			insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
-			env->prog->aux->num_exentries++;
-			continue;
-		} else if (insn->code == (BPF_JMP | BPF_EXIT) &&
-			   epilogue_cnt &&
-			   i + delta < subprogs[1].start) {
-			/* Generate epilogue for the main prog */
-			if (epilogue_idx) {
-				/* jump back to the earlier generated epilogue */
-				insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
-				cnt = 1;
-			} else {
-				memcpy(insn_buf, epilogue_buf,
-				       epilogue_cnt * sizeof(*epilogue_buf));
-				cnt = epilogue_cnt;
-				/* epilogue_idx cannot be 0. It must have at
-				 * least one ctx ptr saving insn before the
-				 * epilogue.
-				 */
-				epilogue_idx = i + delta;
-			}
-			goto patch_insn_buf;
-		} else {
-			continue;
-		}
-
-		if (type == BPF_WRITE &&
-		    env->insn_aux_data[i + delta].nospec_result) {
-			/* nospec_result is only used to mitigate Spectre v4 and
-			 * to limit verification-time for Spectre v1.
-			 */
-			struct bpf_insn *patch = insn_buf;
-
-			*patch++ = *insn;
-			*patch++ = BPF_ST_NOSPEC();
-			cnt = patch - insn_buf;
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			continue;
-		}
-
-		switch ((int)env->insn_aux_data[i + delta].ptr_type) {
-		case PTR_TO_CTX:
-			if (!ops->convert_ctx_access)
-				continue;
-			convert_ctx_access = ops->convert_ctx_access;
-			break;
-		case PTR_TO_SOCKET:
-		case PTR_TO_SOCK_COMMON:
-			convert_ctx_access = bpf_sock_convert_ctx_access;
-			break;
-		case PTR_TO_TCP_SOCK:
-			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
-			break;
-		case PTR_TO_XDP_SOCK:
-			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
-			break;
-		case PTR_TO_BTF_ID:
-		case PTR_TO_BTF_ID | PTR_UNTRUSTED:
-		/* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
-		 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
-		 * be said once it is marked PTR_UNTRUSTED, hence we must handle
-		 * any faults for loads into such types. BPF_WRITE is disallowed
-		 * for this case.
-		 */
-		case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
-		case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED:
-			if (type == BPF_READ) {
-				if (BPF_MODE(insn->code) == BPF_MEM)
-					insn->code = BPF_LDX | BPF_PROBE_MEM |
-						     BPF_SIZE((insn)->code);
-				else
-					insn->code = BPF_LDX | BPF_PROBE_MEMSX |
-						     BPF_SIZE((insn)->code);
-				env->prog->aux->num_exentries++;
-			}
-			continue;
-		case PTR_TO_ARENA:
-			if (BPF_MODE(insn->code) == BPF_MEMSX) {
-				if (!bpf_jit_supports_insn(insn, true)) {
-					verbose(env, "sign extending loads from arena are not supported yet\n");
-					return -EOPNOTSUPP;
-				}
-				insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code);
-			} else {
-				insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
-			}
-			env->prog->aux->num_exentries++;
-			continue;
-		default:
-			continue;
-		}
-
-		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
-		size = BPF_LDST_BYTES(insn);
-		mode = BPF_MODE(insn->code);
-
-		/* If the read access is a narrower load of the field,
-		 * convert to a 4/8-byte load, to minimum program type specific
-		 * convert_ctx_access changes. If conversion is successful,
-		 * we will apply proper mask to the result.
-		 */
-		is_narrower_load = size < ctx_field_size;
-		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
-		off = insn->off;
-		if (is_narrower_load) {
-			u8 size_code;
-
-			if (type == BPF_WRITE) {
-				verifier_bug(env, "narrow ctx access misconfigured");
-				return -EFAULT;
-			}
-
-			size_code = BPF_H;
-			if (ctx_field_size == 4)
-				size_code = BPF_W;
-			else if (ctx_field_size == 8)
-				size_code = BPF_DW;
-
-			insn->off = off & ~(size_default - 1);
-			insn->code = BPF_LDX | BPF_MEM | size_code;
-		}
-
-		target_size = 0;
-		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
-					 &target_size);
-		if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
-		    (ctx_field_size && !target_size)) {
-			verifier_bug(env, "error during ctx access conversion (%d)", cnt);
-			return -EFAULT;
-		}
-
-		if (is_narrower_load && size < target_size) {
-			u8 shift = bpf_ctx_narrow_access_offset(
-				off, size, size_default) * 8;
-			if (shift && cnt + 1 >= INSN_BUF_SIZE) {
-				verifier_bug(env, "narrow ctx load misconfigured");
-				return -EFAULT;
-			}
-			if (ctx_field_size <= 4) {
-				if (shift)
-					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
-									insn->dst_reg,
-									shift);
-				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
-								(1 << size * 8) - 1);
-			} else {
-				if (shift)
-					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
-									insn->dst_reg,
-									shift);
-				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
-								(1ULL << size * 8) - 1);
-			}
-		}
-		if (mode == BPF_MEMSX)
-			insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
-						       insn->dst_reg, insn->dst_reg,
-						       size * 8, 0);
-
-patch_insn_buf:
-		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-		if (!new_prog)
-			return -ENOMEM;
-
-		delta += cnt - 1;
-
-		/* keep walking new program and skip insns we just inserted */
-		env->prog = new_prog;
-		insn      = new_prog->insnsi + i + delta;
-	}
-
-	return 0;
-}
-
-static int jit_subprogs(struct bpf_verifier_env *env)
-{
-	struct bpf_prog *prog = env->prog, **func, *tmp;
-	int i, j, subprog_start, subprog_end = 0, len, subprog;
-	struct bpf_map *map_ptr;
-	struct bpf_insn *insn;
-	void *old_bpf_func;
-	int err, num_exentries;
-	int old_len, subprog_start_adjustment = 0;
-
-	if (env->subprog_cnt <= 1)
-		return 0;
-
-	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
-		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
-			continue;
-
-		/* Upon error here we cannot fall back to interpreter but
-		 * need a hard reject of the program. Thus -EFAULT is
-		 * propagated in any case.
-		 */
-		subprog = bpf_find_subprog(env, i + insn->imm + 1);
-		if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d",
-				    i + insn->imm + 1))
-			return -EFAULT;
-		/* temporarily remember subprog id inside insn instead of
-		 * aux_data, since next loop will split up all insns into funcs
-		 */
-		insn->off = subprog;
-		/* remember original imm in case JIT fails and fallback
-		 * to interpreter will be needed
-		 */
-		env->insn_aux_data[i].call_imm = insn->imm;
-		/* point imm to __bpf_call_base+1 from JITs point of view */
-		insn->imm = 1;
-		if (bpf_pseudo_func(insn)) {
-#if defined(MODULES_VADDR)
-			u64 addr = MODULES_VADDR;
-#else
-			u64 addr = VMALLOC_START;
-#endif
-			/* jit (e.g. x86_64) may emit fewer instructions
-			 * if it learns a u32 imm is the same as a u64 imm.
-			 * Set close enough to possible prog address.
-			 */
-			insn[0].imm = (u32)addr;
-			insn[1].imm = addr >> 32;
-		}
-	}
-
-	err = bpf_prog_alloc_jited_linfo(prog);
-	if (err)
-		goto out_undo_insn;
-
-	err = -ENOMEM;
-	func = kzalloc_objs(prog, env->subprog_cnt);
-	if (!func)
-		goto out_undo_insn;
-
-	for (i = 0; i < env->subprog_cnt; i++) {
-		subprog_start = subprog_end;
-		subprog_end = env->subprog_info[i + 1].start;
-
-		len = subprog_end - subprog_start;
-		/* bpf_prog_run() doesn't call subprogs directly,
-		 * hence main prog stats include the runtime of subprogs.
-		 * subprogs don't have IDs and not reachable via prog_get_next_id
-		 * func[i]->stats will never be accessed and stays NULL
-		 */
-		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
-		if (!func[i])
-			goto out_free;
-		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
-		       len * sizeof(struct bpf_insn));
-		func[i]->type = prog->type;
-		func[i]->len = len;
-		if (bpf_prog_calc_tag(func[i]))
-			goto out_free;
-		func[i]->is_func = 1;
-		func[i]->sleepable = prog->sleepable;
-		func[i]->aux->func_idx = i;
-		/* Below members will be freed only at prog->aux */
-		func[i]->aux->btf = prog->aux->btf;
-		func[i]->aux->subprog_start = subprog_start + subprog_start_adjustment;
-		func[i]->aux->func_info = prog->aux->func_info;
-		func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
-		func[i]->aux->poke_tab = prog->aux->poke_tab;
-		func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
-		func[i]->aux->main_prog_aux = prog->aux;
-
-		for (j = 0; j < prog->aux->size_poke_tab; j++) {
-			struct bpf_jit_poke_descriptor *poke;
-
-			poke = &prog->aux->poke_tab[j];
-			if (poke->insn_idx < subprog_end &&
-			    poke->insn_idx >= subprog_start)
-				poke->aux = func[i]->aux;
-		}
-
-		func[i]->aux->name[0] = 'F';
-		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
-		if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE)
-			func[i]->aux->jits_use_priv_stack = true;
-
-		func[i]->jit_requested = 1;
-		func[i]->blinding_requested = prog->blinding_requested;
-		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
-		func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
-		func[i]->aux->linfo = prog->aux->linfo;
-		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
-		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
-		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
-		func[i]->aux->arena = prog->aux->arena;
-		func[i]->aux->used_maps = env->used_maps;
-		func[i]->aux->used_map_cnt = env->used_map_cnt;
-		num_exentries = 0;
-		insn = func[i]->insnsi;
-		for (j = 0; j < func[i]->len; j++, insn++) {
-			if (BPF_CLASS(insn->code) == BPF_LDX &&
-			    (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
-			     BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
-			     BPF_MODE(insn->code) == BPF_PROBE_MEM32SX ||
-			     BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
-				num_exentries++;
-			if ((BPF_CLASS(insn->code) == BPF_STX ||
-			     BPF_CLASS(insn->code) == BPF_ST) &&
-			     BPF_MODE(insn->code) == BPF_PROBE_MEM32)
-				num_exentries++;
-			if (BPF_CLASS(insn->code) == BPF_STX &&
-			     BPF_MODE(insn->code) == BPF_PROBE_ATOMIC)
-				num_exentries++;
-		}
-		func[i]->aux->num_exentries = num_exentries;
-		func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
-		func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
-		func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data;
-		func[i]->aux->might_sleep = env->subprog_info[i].might_sleep;
-		if (!i)
-			func[i]->aux->exception_boundary = env->seen_exception;
-
-		/*
-		 * To properly pass the absolute subprog start to jit
-		 * all instruction adjustments should be accumulated
-		 */
-		old_len = func[i]->len;
-		func[i] = bpf_int_jit_compile(func[i]);
-		subprog_start_adjustment += func[i]->len - old_len;
-
-		if (!func[i]->jited) {
-			err = -ENOTSUPP;
-			goto out_free;
-		}
-		cond_resched();
-	}
-
-	/* at this point all bpf functions were successfully JITed
-	 * now populate all bpf_calls with correct addresses and
-	 * run last pass of JIT
-	 */
-	for (i = 0; i < env->subprog_cnt; i++) {
-		insn = func[i]->insnsi;
-		for (j = 0; j < func[i]->len; j++, insn++) {
-			if (bpf_pseudo_func(insn)) {
-				subprog = insn->off;
-				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
-				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
-				continue;
-			}
-			if (!bpf_pseudo_call(insn))
-				continue;
-			subprog = insn->off;
-			insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
-		}
-
-		/* we use the aux data to keep a list of the start addresses
-		 * of the JITed images for each function in the program
-		 *
-		 * for some architectures, such as powerpc64, the imm field
-		 * might not be large enough to hold the offset of the start
-		 * address of the callee's JITed image from __bpf_call_base
-		 *
-		 * in such cases, we can lookup the start address of a callee
-		 * by using its subprog id, available from the off field of
-		 * the call instruction, as an index for this list
-		 */
-		func[i]->aux->func = func;
-		func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
-		func[i]->aux->real_func_cnt = env->subprog_cnt;
-	}
-	for (i = 0; i < env->subprog_cnt; i++) {
-		old_bpf_func = func[i]->bpf_func;
-		tmp = bpf_int_jit_compile(func[i]);
-		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
-			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
-			err = -ENOTSUPP;
-			goto out_free;
-		}
-		cond_resched();
-	}
-
-	/*
-	 * Cleanup func[i]->aux fields which aren't required
-	 * or can become invalid in future
-	 */
-	for (i = 0; i < env->subprog_cnt; i++) {
-		func[i]->aux->used_maps = NULL;
-		func[i]->aux->used_map_cnt = 0;
-	}
-
-	/* finally lock prog and jit images for all functions and
-	 * populate kallsysm. Begin at the first subprogram, since
-	 * bpf_prog_load will add the kallsyms for the main program.
-	 */
-	for (i = 1; i < env->subprog_cnt; i++) {
-		err = bpf_prog_lock_ro(func[i]);
-		if (err)
-			goto out_free;
-	}
-
-	for (i = 1; i < env->subprog_cnt; i++)
-		bpf_prog_kallsyms_add(func[i]);
-
-	/* Last step: make now unused interpreter insns from main
-	 * prog consistent for later dump requests, so they can
-	 * later look the same as if they were interpreted only.
-	 */
-	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
-		if (bpf_pseudo_func(insn)) {
-			insn[0].imm = env->insn_aux_data[i].call_imm;
-			insn[1].imm = insn->off;
-			insn->off = 0;
-			continue;
-		}
-		if (!bpf_pseudo_call(insn))
-			continue;
-		insn->off = env->insn_aux_data[i].call_imm;
-		subprog = bpf_find_subprog(env, i + insn->off + 1);
-		insn->imm = subprog;
-	}
-
-	prog->jited = 1;
-	prog->bpf_func = func[0]->bpf_func;
-	prog->jited_len = func[0]->jited_len;
-	prog->aux->extable = func[0]->aux->extable;
-	prog->aux->num_exentries = func[0]->aux->num_exentries;
-	prog->aux->func = func;
-	prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
-	prog->aux->real_func_cnt = env->subprog_cnt;
-	prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
-	prog->aux->exception_boundary = func[0]->aux->exception_boundary;
-	bpf_prog_jit_attempt_done(prog);
-	return 0;
-out_free:
-	/* We failed JIT'ing, so at this point we need to unregister poke
-	 * descriptors from subprogs, so that kernel is not attempting to
-	 * patch it anymore as we're freeing the subprog JIT memory.
-	 */
-	for (i = 0; i < prog->aux->size_poke_tab; i++) {
-		map_ptr = prog->aux->poke_tab[i].tail_call.map;
-		map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
-	}
-	/* At this point we're guaranteed that poke descriptors are not
-	 * live anymore. We can just unlink its descriptor table as it's
-	 * released with the main prog.
-	 */
-	for (i = 0; i < env->subprog_cnt; i++) {
-		if (!func[i])
-			continue;
-		func[i]->aux->poke_tab = NULL;
-		bpf_jit_free(func[i]);
-	}
-	kfree(func);
-out_undo_insn:
-	/* cleanup main prog to be interpreted */
-	prog->jit_requested = 0;
-	prog->blinding_requested = 0;
-	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
-		if (!bpf_pseudo_call(insn))
-			continue;
-		insn->off = 0;
-		insn->imm = env->insn_aux_data[i].call_imm;
-	}
-	bpf_prog_jit_attempt_done(prog);
-	return err;
-}
-
-static int fixup_call_args(struct bpf_verifier_env *env)
-{
-#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-	struct bpf_prog *prog = env->prog;
-	struct bpf_insn *insn = prog->insnsi;
-	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
-	int i, depth;
-#endif
-	int err = 0;
-
-	if (env->prog->jit_requested &&
-	    !bpf_prog_is_offloaded(env->prog->aux)) {
-		err = jit_subprogs(env);
-		if (err == 0)
-			return 0;
-		if (err == -EFAULT)
-			return err;
-	}
-#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-	if (has_kfunc_call) {
-		verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
-		return -EINVAL;
-	}
-	if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
-		/* When JIT fails the progs with bpf2bpf calls and tail_calls
-		 * have to be rejected, since interpreter doesn't support them yet.
-		 */
-		verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
-		return -EINVAL;
-	}
-	for (i = 0; i < prog->len; i++, insn++) {
-		if (bpf_pseudo_func(insn)) {
-			/* When JIT fails the progs with callback calls
-			 * have to be rejected, since interpreter doesn't support them yet.
-			 */
-			verbose(env, "callbacks are not allowed in non-JITed programs\n");
-			return -EINVAL;
-		}
-
-		if (!bpf_pseudo_call(insn))
-			continue;
-		depth = get_callee_stack_depth(env, insn, i);
-		if (depth < 0)
-			return depth;
-		bpf_patch_call_args(insn, depth);
-	}
-	err = 0;
-#endif
-	return err;
-}
-
-/* replace a generic kfunc with a specialized version if necessary */
-static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx)
-{
-	struct bpf_prog *prog = env->prog;
-	bool seen_direct_write;
-	void *xdp_kfunc;
-	bool is_rdonly;
-	u32 func_id = desc->func_id;
-	u16 offset = desc->offset;
-	unsigned long addr = desc->addr;
-
-	if (offset) /* return if module BTF is used */
-		return 0;
-
-	if (bpf_dev_bound_kfunc_id(func_id)) {
-		xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
-		if (xdp_kfunc)
-			addr = (unsigned long)xdp_kfunc;
-		/* fallback to default kfunc when not supported by netdev */
-	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
-		seen_direct_write = env->seen_direct_write;
-		is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
-
-		if (is_rdonly)
-			addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
-
-		/* restore env->seen_direct_write to its original value, since
-		 * may_access_direct_pkt_data mutates it
-		 */
-		env->seen_direct_write = seen_direct_write;
-	} else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) {
-		if (bpf_lsm_has_d_inode_locked(prog))
-			addr = (unsigned long)bpf_set_dentry_xattr_locked;
-	} else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) {
-		if (bpf_lsm_has_d_inode_locked(prog))
-			addr = (unsigned long)bpf_remove_dentry_xattr_locked;
-	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
-		if (!env->insn_aux_data[insn_idx].non_sleepable)
-			addr = (unsigned long)bpf_dynptr_from_file_sleepable;
-	} else if (func_id == special_kfunc_list[KF_bpf_arena_alloc_pages]) {
-		if (env->insn_aux_data[insn_idx].non_sleepable)
-			addr = (unsigned long)bpf_arena_alloc_pages_non_sleepable;
-	} else if (func_id == special_kfunc_list[KF_bpf_arena_free_pages]) {
-		if (env->insn_aux_data[insn_idx].non_sleepable)
-			addr = (unsigned long)bpf_arena_free_pages_non_sleepable;
-	}
-	desc->addr = addr;
-	return 0;
-}
-
-static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
-					    u16 struct_meta_reg,
-					    u16 node_offset_reg,
-					    struct bpf_insn *insn,
-					    struct bpf_insn *insn_buf,
-					    int *cnt)
-{
-	struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
-	struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
-
-	insn_buf[0] = addr[0];
-	insn_buf[1] = addr[1];
-	insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
-	insn_buf[3] = *insn;
-	*cnt = 4;
-}
-
-static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
-			    struct bpf_insn *insn_buf, int insn_idx, int *cnt)
-{
-	struct bpf_kfunc_desc *desc;
-	int err;
-
-	if (!insn->imm) {
-		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
-		return -EINVAL;
-	}
-
-	*cnt = 0;
-
-	/* insn->imm has the btf func_id. Replace it with an offset relative to
-	 * __bpf_call_base, unless the JIT needs to call functions that are
-	 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
-	 */
-	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
-	if (!desc) {
-		verifier_bug(env, "kernel function descriptor not found for func_id %u",
-			     insn->imm);
-		return -EFAULT;
-	}
-
-	err = specialize_kfunc(env, desc, insn_idx);
-	if (err)
-		return err;
-
-	if (!bpf_jit_supports_far_kfunc_call())
-		insn->imm = BPF_CALL_IMM(desc->addr);
-
-	if (is_bpf_obj_new_kfunc(desc->func_id) || is_bpf_percpu_obj_new_kfunc(desc->func_id)) {
-		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
-		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
-		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
-
-		if (is_bpf_percpu_obj_new_kfunc(desc->func_id) && kptr_struct_meta) {
-			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
-				     insn_idx);
-			return -EFAULT;
-		}
-
-		insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
-		insn_buf[1] = addr[0];
-		insn_buf[2] = addr[1];
-		insn_buf[3] = *insn;
-		*cnt = 4;
-	} else if (is_bpf_obj_drop_kfunc(desc->func_id) ||
-		   is_bpf_percpu_obj_drop_kfunc(desc->func_id) ||
-		   is_bpf_refcount_acquire_kfunc(desc->func_id)) {
-		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
-		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
-
-		if (is_bpf_percpu_obj_drop_kfunc(desc->func_id) && kptr_struct_meta) {
-			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
-				     insn_idx);
-			return -EFAULT;
-		}
-
-		if (is_bpf_refcount_acquire_kfunc(desc->func_id) && !kptr_struct_meta) {
-			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
-				     insn_idx);
-			return -EFAULT;
-		}
-
-		insn_buf[0] = addr[0];
-		insn_buf[1] = addr[1];
-		insn_buf[2] = *insn;
-		*cnt = 3;
-	} else if (is_bpf_list_push_kfunc(desc->func_id) ||
-		   is_bpf_rbtree_add_kfunc(desc->func_id)) {
-		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
-		int struct_meta_reg = BPF_REG_3;
-		int node_offset_reg = BPF_REG_4;
-
-		/* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
-		if (is_bpf_rbtree_add_kfunc(desc->func_id)) {
-			struct_meta_reg = BPF_REG_4;
-			node_offset_reg = BPF_REG_5;
-		}
-
-		if (!kptr_struct_meta) {
-			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
-				     insn_idx);
-			return -EFAULT;
-		}
-
-		__fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
-						node_offset_reg, insn, insn_buf, cnt);
-	} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
-		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
-		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
-		*cnt = 1;
-	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] &&
-		   env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
-		/*
-		 * inline the bpf_session_is_return() for fsession:
-		 *   bool bpf_session_is_return(void *ctx)
-		 *   {
-		 *       return (((u64 *)ctx)[-1] >> BPF_TRAMP_IS_RETURN_SHIFT) & 1;
-		 *   }
-		 */
-		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
-		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_IS_RETURN_SHIFT);
-		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
-		*cnt = 3;
-	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
-		   env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
-		/*
-		 * inline bpf_session_cookie() for fsession:
-		 *   __u64 *bpf_session_cookie(void *ctx)
-		 *   {
-		 *       u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_COOKIE_INDEX_SHIFT) & 0xFF;
-		 *       return &((u64 *)ctx)[-off];
-		 *   }
-		 */
-		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
-		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_COOKIE_INDEX_SHIFT);
-		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
-		insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
-		insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
-		insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
-		*cnt = 6;
-	}
-
-	if (env->insn_aux_data[insn_idx].arg_prog) {
-		u32 regno = env->insn_aux_data[insn_idx].arg_prog;
-		struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) };
-		int idx = *cnt;
-
-		insn_buf[idx++] = ld_addrs[0];
-		insn_buf[idx++] = ld_addrs[1];
-		insn_buf[idx++] = *insn;
-		*cnt = idx;
-	}
-	return 0;
-}
-
-/* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
-static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len)
-{
-	struct bpf_subprog_info *info = env->subprog_info;
-	int cnt = env->subprog_cnt;
-	struct bpf_prog *prog;
-
-	/* We only reserve one slot for hidden subprogs in subprog_info. */
-	if (env->hidden_subprog_cnt) {
-		verifier_bug(env, "only one hidden subprog supported");
-		return -EFAULT;
-	}
-	/* We're not patching any existing instruction, just appending the new
-	 * ones for the hidden subprog. Hence all of the adjustment operations
-	 * in bpf_patch_insn_data are no-ops.
-	 */
-	prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len);
-	if (!prog)
-		return -ENOMEM;
-	env->prog = prog;
-	info[cnt + 1].start = info[cnt].start;
-	info[cnt].start = prog->len - len + 1;
-	env->subprog_cnt++;
-	env->hidden_subprog_cnt++;
-	return 0;
-}
-
-/* Do various post-verification rewrites in a single program pass.
- * These rewrites simplify JIT and interpreter implementations.
- */
-static int do_misc_fixups(struct bpf_verifier_env *env)
-{
-	struct bpf_prog *prog = env->prog;
-	enum bpf_attach_type eatype = prog->expected_attach_type;
-	enum bpf_prog_type prog_type = resolve_prog_type(prog);
-	struct bpf_insn *insn = prog->insnsi;
-	const struct bpf_func_proto *fn;
-	const int insn_cnt = prog->len;
-	const struct bpf_map_ops *ops;
-	struct bpf_insn_aux_data *aux;
-	struct bpf_insn *insn_buf = env->insn_buf;
-	struct bpf_prog *new_prog;
-	struct bpf_map *map_ptr;
-	int i, ret, cnt, delta = 0, cur_subprog = 0;
-	struct bpf_subprog_info *subprogs = env->subprog_info;
-	u16 stack_depth = subprogs[cur_subprog].stack_depth;
-	u16 stack_depth_extra = 0;
-
-	if (env->seen_exception && !env->exception_callback_subprog) {
-		struct bpf_insn *patch = insn_buf;
-
-		*patch++ = env->prog->insnsi[insn_cnt - 1];
-		*patch++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
-		*patch++ = BPF_EXIT_INSN();
-		ret = add_hidden_subprog(env, insn_buf, patch - insn_buf);
-		if (ret < 0)
-			return ret;
-		prog = env->prog;
-		insn = prog->insnsi;
-
-		env->exception_callback_subprog = env->subprog_cnt - 1;
-		/* Don't update insn_cnt, as add_hidden_subprog always appends insns */
-		mark_subprog_exc_cb(env, env->exception_callback_subprog);
-	}
-
-	for (i = 0; i < insn_cnt;) {
-		if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) {
-			if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) ||
-			    (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
-				/* convert to 32-bit mov that clears upper 32-bit */
-				insn->code = BPF_ALU | BPF_MOV | BPF_X;
-				/* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
-				insn->off = 0;
-				insn->imm = 0;
-			} /* cast from as(0) to as(1) should be handled by JIT */
-			goto next_insn;
-		}
-
-		if (env->insn_aux_data[i + delta].needs_zext)
-			/* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
-			insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
-
-		/* Make sdiv/smod divide-by-minus-one exceptions impossible. */
-		if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
-		     insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
-		     insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
-		     insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
-		    insn->off == 1 && insn->imm == -1) {
-			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
-			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
-			struct bpf_insn *patch = insn_buf;
-
-			if (isdiv)
-				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
-							BPF_NEG | BPF_K, insn->dst_reg,
-							0, 0, 0);
-			else
-				*patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
-
-			cnt = patch - insn_buf;
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-		/* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
-		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
-		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
-		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
-		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
-			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
-			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
-			bool is_sdiv = isdiv && insn->off == 1;
-			bool is_smod = !isdiv && insn->off == 1;
-			struct bpf_insn *patch = insn_buf;
-
-			if (is_sdiv) {
-				/* [R,W]x sdiv 0 -> 0
-				 * LLONG_MIN sdiv -1 -> LLONG_MIN
-				 * INT_MIN sdiv -1 -> INT_MIN
-				 */
-				*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
-				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
-							BPF_ADD | BPF_K, BPF_REG_AX,
-							0, 0, 1);
-				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
-							BPF_JGT | BPF_K, BPF_REG_AX,
-							0, 4, 1);
-				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
-							BPF_JEQ | BPF_K, BPF_REG_AX,
-							0, 1, 0);
-				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
-							BPF_MOV | BPF_K, insn->dst_reg,
-							0, 0, 0);
-				/* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
-				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
-							BPF_NEG | BPF_K, insn->dst_reg,
-							0, 0, 0);
-				*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
-				*patch++ = *insn;
-				cnt = patch - insn_buf;
-			} else if (is_smod) {
-				/* [R,W]x mod 0 -> [R,W]x */
-				/* [R,W]x mod -1 -> 0 */
-				*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
-				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
-							BPF_ADD | BPF_K, BPF_REG_AX,
-							0, 0, 1);
-				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
-							BPF_JGT | BPF_K, BPF_REG_AX,
-							0, 3, 1);
-				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
-							BPF_JEQ | BPF_K, BPF_REG_AX,
-							0, 3 + (is64 ? 0 : 1), 1);
-				*patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
-				*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
-				*patch++ = *insn;
-
-				if (!is64) {
-					*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
-					*patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
-				}
-				cnt = patch - insn_buf;
-			} else if (isdiv) {
-				/* [R,W]x div 0 -> 0 */
-				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
-							BPF_JNE | BPF_K, insn->src_reg,
-							0, 2, 0);
-				*patch++ = BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg);
-				*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
-				*patch++ = *insn;
-				cnt = patch - insn_buf;
-			} else {
-				/* [R,W]x mod 0 -> [R,W]x */
-				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
-							BPF_JEQ | BPF_K, insn->src_reg,
-							0, 1 + (is64 ? 0 : 1), 0);
-				*patch++ = *insn;
-
-				if (!is64) {
-					*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
-					*patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
-				}
-				cnt = patch - insn_buf;
-			}
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-		/* Make it impossible to de-reference a userspace address */
-		if (BPF_CLASS(insn->code) == BPF_LDX &&
-		    (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
-		     BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
-			struct bpf_insn *patch = insn_buf;
-			u64 uaddress_limit = bpf_arch_uaddress_limit();
-
-			if (!uaddress_limit)
-				goto next_insn;
-
-			*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
-			if (insn->off)
-				*patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
-			*patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
-			*patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
-			*patch++ = *insn;
-			*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
-			*patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
-
-			cnt = patch - insn_buf;
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
-		if (BPF_CLASS(insn->code) == BPF_LD &&
-		    (BPF_MODE(insn->code) == BPF_ABS ||
-		     BPF_MODE(insn->code) == BPF_IND)) {
-			cnt = env->ops->gen_ld_abs(insn, insn_buf);
-			if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
-				verifier_bug(env, "%d insns generated for ld_abs", cnt);
-				return -EFAULT;
-			}
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
-		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
-		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
-			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
-			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
-			struct bpf_insn *patch = insn_buf;
-			bool issrc, isneg, isimm;
-			u32 off_reg;
-
-			aux = &env->insn_aux_data[i + delta];
-			if (!aux->alu_state ||
-			    aux->alu_state == BPF_ALU_NON_POINTER)
-				goto next_insn;
-
-			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
-			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
-				BPF_ALU_SANITIZE_SRC;
-			isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
-
-			off_reg = issrc ? insn->src_reg : insn->dst_reg;
-			if (isimm) {
-				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
-			} else {
-				if (isneg)
-					*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
-				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
-				*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
-				*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
-				*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
-				*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
-				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
-			}
-			if (!issrc)
-				*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
-			insn->src_reg = BPF_REG_AX;
-			if (isneg)
-				insn->code = insn->code == code_add ?
-					     code_sub : code_add;
-			*patch++ = *insn;
-			if (issrc && isneg && !isimm)
-				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
-			cnt = patch - insn_buf;
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-		if (bpf_is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) {
-			int stack_off_cnt = -stack_depth - 16;
-
-			/*
-			 * Two 8 byte slots, depth-16 stores the count, and
-			 * depth-8 stores the start timestamp of the loop.
-			 *
-			 * The starting value of count is BPF_MAX_TIMED_LOOPS
-			 * (0xffff).  Every iteration loads it and subs it by 1,
-			 * until the value becomes 0 in AX (thus, 1 in stack),
-			 * after which we call arch_bpf_timed_may_goto, which
-			 * either sets AX to 0xffff to keep looping, or to 0
-			 * upon timeout. AX is then stored into the stack. In
-			 * the next iteration, we either see 0 and break out, or
-			 * continue iterating until the next time value is 0
-			 * after subtraction, rinse and repeat.
-			 */
-			stack_depth_extra = 16;
-			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt);
-			if (insn->off >= 0)
-				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5);
-			else
-				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
-			insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
-			insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2);
-			/*
-			 * AX is used as an argument to pass in stack_off_cnt
-			 * (to add to r10/fp), and also as the return value of
-			 * the call to arch_bpf_timed_may_goto.
-			 */
-			insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt);
-			insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto);
-			insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt);
-			cnt = 7;
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta += cnt - 1;
-			env->prog = prog = new_prog;
-			insn = new_prog->insnsi + i + delta;
-			goto next_insn;
-		} else if (bpf_is_may_goto_insn(insn)) {
-			int stack_off = -stack_depth - 8;
-
-			stack_depth_extra = 8;
-			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
-			if (insn->off >= 0)
-				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
-			else
-				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
-			insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
-			insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
-			cnt = 4;
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta += cnt - 1;
-			env->prog = prog = new_prog;
-			insn = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-		if (insn->code != (BPF_JMP | BPF_CALL))
-			goto next_insn;
-		if (insn->src_reg == BPF_PSEUDO_CALL)
-			goto next_insn;
-		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
-			ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
-			if (ret)
-				return ret;
-			if (cnt == 0)
-				goto next_insn;
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta	 += cnt - 1;
-			env->prog = prog = new_prog;
-			insn	  = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-		/* Skip inlining the helper call if the JIT does it. */
-		if (bpf_jit_inlines_helper_call(insn->imm))
-			goto next_insn;
-
-		if (insn->imm == BPF_FUNC_get_route_realm)
-			prog->dst_needed = 1;
-		if (insn->imm == BPF_FUNC_get_prandom_u32)
-			bpf_user_rnd_init_once();
-		if (insn->imm == BPF_FUNC_override_return)
-			prog->kprobe_override = 1;
-		if (insn->imm == BPF_FUNC_tail_call) {
-			/* If we tail call into other programs, we
-			 * cannot make any assumptions since they can
-			 * be replaced dynamically during runtime in
-			 * the program array.
-			 */
-			prog->cb_access = 1;
-			if (!allow_tail_call_in_subprogs(env))
-				prog->aux->stack_depth = MAX_BPF_STACK;
-			prog->aux->max_pkt_offset = MAX_PACKET_OFF;
-
-			/* mark bpf_tail_call as different opcode to avoid
-			 * conditional branch in the interpreter for every normal
-			 * call and to prevent accidental JITing by JIT compiler
-			 * that doesn't support bpf_tail_call yet
-			 */
-			insn->imm = 0;
-			insn->code = BPF_JMP | BPF_TAIL_CALL;
-
-			aux = &env->insn_aux_data[i + delta];
-			if (env->bpf_capable && !prog->blinding_requested &&
-			    prog->jit_requested &&
-			    !bpf_map_key_poisoned(aux) &&
-			    !bpf_map_ptr_poisoned(aux) &&
-			    !bpf_map_ptr_unpriv(aux)) {
-				struct bpf_jit_poke_descriptor desc = {
-					.reason = BPF_POKE_REASON_TAIL_CALL,
-					.tail_call.map = aux->map_ptr_state.map_ptr,
-					.tail_call.key = bpf_map_key_immediate(aux),
-					.insn_idx = i + delta,
-				};
-
-				ret = bpf_jit_add_poke_descriptor(prog, &desc);
-				if (ret < 0) {
-					verbose(env, "adding tail call poke descriptor failed\n");
-					return ret;
-				}
-
-				insn->imm = ret + 1;
-				goto next_insn;
-			}
-
-			if (!bpf_map_ptr_unpriv(aux))
-				goto next_insn;
-
-			/* instead of changing every JIT dealing with tail_call
-			 * emit two extra insns:
-			 * if (index >= max_entries) goto out;
-			 * index &= array->index_mask;
-			 * to avoid out-of-bounds cpu speculation
-			 */
-			if (bpf_map_ptr_poisoned(aux)) {
-				verbose(env, "tail_call abusing map_ptr\n");
-				return -EINVAL;
-			}
-
-			map_ptr = aux->map_ptr_state.map_ptr;
-			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
-						  map_ptr->max_entries, 2);
-			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
-						    container_of(map_ptr,
-								 struct bpf_array,
-								 map)->index_mask);
-			insn_buf[2] = *insn;
-			cnt = 3;
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-		if (insn->imm == BPF_FUNC_timer_set_callback) {
-			/* The verifier will process callback_fn as many times as necessary
-			 * with different maps and the register states prepared by
-			 * set_timer_callback_state will be accurate.
-			 *
-			 * The following use case is valid:
-			 *   map1 is shared by prog1, prog2, prog3.
-			 *   prog1 calls bpf_timer_init for some map1 elements
-			 *   prog2 calls bpf_timer_set_callback for some map1 elements.
-			 *     Those that were not bpf_timer_init-ed will return -EINVAL.
-			 *   prog3 calls bpf_timer_start for some map1 elements.
-			 *     Those that were not both bpf_timer_init-ed and
-			 *     bpf_timer_set_callback-ed will return -EINVAL.
-			 */
-			struct bpf_insn ld_addrs[2] = {
-				BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
-			};
-
-			insn_buf[0] = ld_addrs[0];
-			insn_buf[1] = ld_addrs[1];
-			insn_buf[2] = *insn;
-			cnt = 3;
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto patch_call_imm;
-		}
-
-		/* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
-		if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
-			/* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
-			 * bpf_mem_alloc() returns a ptr to the percpu data ptr.
-			 */
-			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
-			insn_buf[1] = *insn;
-			cnt = 2;
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta += cnt - 1;
-			env->prog = prog = new_prog;
-			insn = new_prog->insnsi + i + delta;
-			goto patch_call_imm;
-		}
-
-		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
-		 * and other inlining handlers are currently limited to 64 bit
-		 * only.
-		 */
-		if (prog->jit_requested && BITS_PER_LONG == 64 &&
-		    (insn->imm == BPF_FUNC_map_lookup_elem ||
-		     insn->imm == BPF_FUNC_map_update_elem ||
-		     insn->imm == BPF_FUNC_map_delete_elem ||
-		     insn->imm == BPF_FUNC_map_push_elem   ||
-		     insn->imm == BPF_FUNC_map_pop_elem    ||
-		     insn->imm == BPF_FUNC_map_peek_elem   ||
-		     insn->imm == BPF_FUNC_redirect_map    ||
-		     insn->imm == BPF_FUNC_for_each_map_elem ||
-		     insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
-			aux = &env->insn_aux_data[i + delta];
-			if (bpf_map_ptr_poisoned(aux))
-				goto patch_call_imm;
-
-			map_ptr = aux->map_ptr_state.map_ptr;
-			ops = map_ptr->ops;
-			if (insn->imm == BPF_FUNC_map_lookup_elem &&
-			    ops->map_gen_lookup) {
-				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
-				if (cnt == -EOPNOTSUPP)
-					goto patch_map_ops_generic;
-				if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
-					verifier_bug(env, "%d insns generated for map lookup", cnt);
-					return -EFAULT;
-				}
-
-				new_prog = bpf_patch_insn_data(env, i + delta,
-							       insn_buf, cnt);
-				if (!new_prog)
-					return -ENOMEM;
-
-				delta    += cnt - 1;
-				env->prog = prog = new_prog;
-				insn      = new_prog->insnsi + i + delta;
-				goto next_insn;
-			}
-
-			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
-				     (void *(*)(struct bpf_map *map, void *key))NULL));
-			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
-				     (long (*)(struct bpf_map *map, void *key))NULL));
-			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
-				     (long (*)(struct bpf_map *map, void *key, void *value,
-					      u64 flags))NULL));
-			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
-				     (long (*)(struct bpf_map *map, void *value,
-					      u64 flags))NULL));
-			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
-				     (long (*)(struct bpf_map *map, void *value))NULL));
-			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
-				     (long (*)(struct bpf_map *map, void *value))NULL));
-			BUILD_BUG_ON(!__same_type(ops->map_redirect,
-				     (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
-			BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
-				     (long (*)(struct bpf_map *map,
-					      bpf_callback_t callback_fn,
-					      void *callback_ctx,
-					      u64 flags))NULL));
-			BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
-				     (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
-
-patch_map_ops_generic:
-			switch (insn->imm) {
-			case BPF_FUNC_map_lookup_elem:
-				insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
-				goto next_insn;
-			case BPF_FUNC_map_update_elem:
-				insn->imm = BPF_CALL_IMM(ops->map_update_elem);
-				goto next_insn;
-			case BPF_FUNC_map_delete_elem:
-				insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
-				goto next_insn;
-			case BPF_FUNC_map_push_elem:
-				insn->imm = BPF_CALL_IMM(ops->map_push_elem);
-				goto next_insn;
-			case BPF_FUNC_map_pop_elem:
-				insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
-				goto next_insn;
-			case BPF_FUNC_map_peek_elem:
-				insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
-				goto next_insn;
-			case BPF_FUNC_redirect_map:
-				insn->imm = BPF_CALL_IMM(ops->map_redirect);
-				goto next_insn;
-			case BPF_FUNC_for_each_map_elem:
-				insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
-				goto next_insn;
-			case BPF_FUNC_map_lookup_percpu_elem:
-				insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
-				goto next_insn;
-			}
-
-			goto patch_call_imm;
-		}
-
-		/* Implement bpf_jiffies64 inline. */
-		if (prog->jit_requested && BITS_PER_LONG == 64 &&
-		    insn->imm == BPF_FUNC_jiffies64) {
-			struct bpf_insn ld_jiffies_addr[2] = {
-				BPF_LD_IMM64(BPF_REG_0,
-					     (unsigned long)&jiffies),
-			};
-
-			insn_buf[0] = ld_jiffies_addr[0];
-			insn_buf[1] = ld_jiffies_addr[1];
-			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
-						  BPF_REG_0, 0);
-			cnt = 3;
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
-						       cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
-		/* Implement bpf_get_smp_processor_id() inline. */
-		if (insn->imm == BPF_FUNC_get_smp_processor_id &&
-		    verifier_inlines_helper_call(env, insn->imm)) {
-			/* BPF_FUNC_get_smp_processor_id inlining is an
-			 * optimization, so if cpu_number is ever
-			 * changed in some incompatible and hard to support
-			 * way, it's fine to back out this inlining logic
-			 */
-#ifdef CONFIG_SMP
-			insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number);
-			insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
-			insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
-			cnt = 3;
-#else
-			insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
-			cnt = 1;
-#endif
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-		/* Implement bpf_get_current_task() and bpf_get_current_task_btf() inline. */
-		if ((insn->imm == BPF_FUNC_get_current_task || insn->imm == BPF_FUNC_get_current_task_btf) &&
-		    verifier_inlines_helper_call(env, insn->imm)) {
-			insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&current_task);
-			insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
-			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
-			cnt = 3;
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-#endif
-		/* Implement bpf_get_func_arg inline. */
-		if (prog_type == BPF_PROG_TYPE_TRACING &&
-		    insn->imm == BPF_FUNC_get_func_arg) {
-			if (eatype == BPF_TRACE_RAW_TP) {
-				int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
-
-				/* skip 'void *__data' in btf_trace_##name() and save to reg0 */
-				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
-				cnt = 1;
-			} else {
-				/* Load nr_args from ctx - 8 */
-				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
-				insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
-				cnt = 2;
-			}
-			insn_buf[cnt++] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
-			insn_buf[cnt++] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
-			insn_buf[cnt++] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
-			insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
-			insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
-			insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, 0);
-			insn_buf[cnt++] = BPF_JMP_A(1);
-			insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-		/* Implement bpf_get_func_ret inline. */
-		if (prog_type == BPF_PROG_TYPE_TRACING &&
-		    insn->imm == BPF_FUNC_get_func_ret) {
-			if (eatype == BPF_TRACE_FEXIT ||
-			    eatype == BPF_TRACE_FSESSION ||
-			    eatype == BPF_MODIFY_RETURN) {
-				/* Load nr_args from ctx - 8 */
-				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
-				insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
-				insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
-				insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
-				insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
-				insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
-				insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
-				cnt = 7;
-			} else {
-				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
-				cnt = 1;
-			}
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-		/* Implement get_func_arg_cnt inline. */
-		if (prog_type == BPF_PROG_TYPE_TRACING &&
-		    insn->imm == BPF_FUNC_get_func_arg_cnt) {
-			if (eatype == BPF_TRACE_RAW_TP) {
-				int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
-
-				/* skip 'void *__data' in btf_trace_##name() and save to reg0 */
-				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
-				cnt = 1;
-			} else {
-				/* Load nr_args from ctx - 8 */
-				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
-				insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
-				cnt = 2;
-			}
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-		/* Implement bpf_get_func_ip inline. */
-		if (prog_type == BPF_PROG_TYPE_TRACING &&
-		    insn->imm == BPF_FUNC_get_func_ip) {
-			/* Load IP address from ctx - 16 */
-			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
-			if (!new_prog)
-				return -ENOMEM;
-
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-		/* Implement bpf_get_branch_snapshot inline. */
-		if (IS_ENABLED(CONFIG_PERF_EVENTS) &&
-		    prog->jit_requested && BITS_PER_LONG == 64 &&
-		    insn->imm == BPF_FUNC_get_branch_snapshot) {
-			/* We are dealing with the following func protos:
-			 * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
-			 * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
-			 */
-			const u32 br_entry_size = sizeof(struct perf_branch_entry);
-
-			/* struct perf_branch_entry is part of UAPI and is
-			 * used as an array element, so extremely unlikely to
-			 * ever grow or shrink
-			 */
-			BUILD_BUG_ON(br_entry_size != 24);
-
-			/* if (unlikely(flags)) return -EINVAL */
-			insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7);
-
-			/* Transform size (bytes) into number of entries (cnt = size / 24).
-			 * But to avoid expensive division instruction, we implement
-			 * divide-by-3 through multiplication, followed by further
-			 * division by 8 through 3-bit right shift.
-			 * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr.,
-			 * p. 227, chapter "Unsigned Division by 3" for details and proofs.
-			 *
-			 * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab.
-			 */
-			insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab);
-			insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0);
-			insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36);
-
-			/* call perf_snapshot_branch_stack implementation */
-			insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
-			/* if (entry_cnt == 0) return -ENOENT */
-			insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
-			/* return entry_cnt * sizeof(struct perf_branch_entry) */
-			insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
-			insn_buf[7] = BPF_JMP_A(3);
-			/* return -EINVAL; */
-			insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
-			insn_buf[9] = BPF_JMP_A(1);
-			/* return -ENOENT; */
-			insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
-			cnt = 11;
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-
-		/* Implement bpf_kptr_xchg inline */
-		if (prog->jit_requested && BITS_PER_LONG == 64 &&
-		    insn->imm == BPF_FUNC_kptr_xchg &&
-		    bpf_jit_supports_ptr_xchg()) {
-			insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
-			insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
-			cnt = 2;
-
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			goto next_insn;
-		}
-patch_call_imm:
-		fn = env->ops->get_func_proto(insn->imm, env->prog);
-		/* all functions that have prototype and verifier allowed
-		 * programs to call them, must be real in-kernel functions
-		 */
-		if (!fn->func) {
-			verifier_bug(env,
-				     "not inlined functions %s#%d is missing func",
-				     func_id_name(insn->imm), insn->imm);
-			return -EFAULT;
-		}
-		insn->imm = fn->func - __bpf_call_base;
-next_insn:
-		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
-			subprogs[cur_subprog].stack_depth += stack_depth_extra;
-			subprogs[cur_subprog].stack_extra = stack_depth_extra;
-
-			stack_depth = subprogs[cur_subprog].stack_depth;
-			if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) {
-				verbose(env, "stack size %d(extra %d) is too large\n",
-					stack_depth, stack_depth_extra);
-				return -EINVAL;
-			}
-			cur_subprog++;
-			stack_depth = subprogs[cur_subprog].stack_depth;
-			stack_depth_extra = 0;
-		}
-		i++;
-		insn++;
-	}
-
-	env->prog->aux->stack_depth = subprogs[0].stack_depth;
-	for (i = 0; i < env->subprog_cnt; i++) {
-		int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1;
-		int subprog_start = subprogs[i].start;
-		int stack_slots = subprogs[i].stack_extra / 8;
-		int slots = delta, cnt = 0;
-
-		if (!stack_slots)
-			continue;
-		/* We need two slots in case timed may_goto is supported. */
-		if (stack_slots > slots) {
-			verifier_bug(env, "stack_slots supports may_goto only");
-			return -EFAULT;
-		}
-
-		stack_depth = subprogs[i].stack_depth;
-		if (bpf_jit_supports_timed_may_goto()) {
-			insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
-						     BPF_MAX_TIMED_LOOPS);
-			insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0);
-		} else {
-			/* Add ST insn to subprog prologue to init extra stack */
-			insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
-						     BPF_MAX_LOOPS);
-		}
-		/* Copy first actual insn to preserve it */
-		insn_buf[cnt++] = env->prog->insnsi[subprog_start];
-
-		new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt);
-		if (!new_prog)
-			return -ENOMEM;
-		env->prog = prog = new_prog;
-		/*
-		 * If may_goto is a first insn of a prog there could be a jmp
-		 * insn that points to it, hence adjust all such jmps to point
-		 * to insn after BPF_ST that inits may_goto count.
-		 * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
-		 */
-		WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta));
-	}
-
-	/* Since poke tab is now finalized, publish aux to tracker. */
-	for (i = 0; i < prog->aux->size_poke_tab; i++) {
-		map_ptr = prog->aux->poke_tab[i].tail_call.map;
-		if (!map_ptr->ops->map_poke_track ||
-		    !map_ptr->ops->map_poke_untrack ||
-		    !map_ptr->ops->map_poke_run) {
-			verifier_bug(env, "poke tab is misconfigured");
-			return -EFAULT;
-		}
-
-		ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
-		if (ret < 0) {
-			verbose(env, "tracking tail call prog failed\n");
-			return ret;
-		}
-	}
-
-	ret = sort_kfunc_descs_by_imm_off(env);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
-					int position,
-					s32 stack_base,
-					u32 callback_subprogno,
-					u32 *total_cnt)
-{
-	s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
-	s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
-	s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
-	int reg_loop_max = BPF_REG_6;
-	int reg_loop_cnt = BPF_REG_7;
-	int reg_loop_ctx = BPF_REG_8;
-
-	struct bpf_insn *insn_buf = env->insn_buf;
-	struct bpf_prog *new_prog;
-	u32 callback_start;
-	u32 call_insn_offset;
-	s32 callback_offset;
-	u32 cnt = 0;
-
-	/* This represents an inlined version of bpf_iter.c:bpf_loop,
-	 * be careful to modify this code in sync.
-	 */
-
-	/* Return error and jump to the end of the patch if
-	 * expected number of iterations is too big.
-	 */
-	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
-	insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
-	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
-	/* spill R6, R7, R8 to use these as loop vars */
-	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
-	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
-	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
-	/* initialize loop vars */
-	insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
-	insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
-	insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
-	/* loop header,
-	 * if reg_loop_cnt >= reg_loop_max skip the loop body
-	 */
-	insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
-	/* callback call,
-	 * correct callback offset would be set after patching
-	 */
-	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
-	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
-	insn_buf[cnt++] = BPF_CALL_REL(0);
-	/* increment loop counter */
-	insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
-	/* jump to loop header if callback returned 0 */
-	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
-	/* return value of bpf_loop,
-	 * set R0 to the number of iterations
-	 */
-	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
-	/* restore original values of R6, R7, R8 */
-	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
-	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
-	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
-
-	*total_cnt = cnt;
-	new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
-	if (!new_prog)
-		return new_prog;
-
-	/* callback start is known only after patching */
-	callback_start = env->subprog_info[callback_subprogno].start;
-	/* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
-	call_insn_offset = position + 12;
-	callback_offset = callback_start - call_insn_offset - 1;
-	new_prog->insnsi[call_insn_offset].imm = callback_offset;
-
-	return new_prog;
-}
-
-static bool is_bpf_loop_call(struct bpf_insn *insn)
-{
-	return insn->code == (BPF_JMP | BPF_CALL) &&
-		insn->src_reg == 0 &&
-		insn->imm == BPF_FUNC_loop;
-}
-
-/* For all sub-programs in the program (including main) check
- * insn_aux_data to see if there are bpf_loop calls that require
- * inlining. If such calls are found the calls are replaced with a
- * sequence of instructions produced by `inline_bpf_loop` function and
- * subprog stack_depth is increased by the size of 3 registers.
- * This stack space is used to spill values of the R6, R7, R8.  These
- * registers are used to store the loop bound, counter and context
- * variables.
- */
-static int optimize_bpf_loop(struct bpf_verifier_env *env)
-{
-	struct bpf_subprog_info *subprogs = env->subprog_info;
-	int i, cur_subprog = 0, cnt, delta = 0;
-	struct bpf_insn *insn = env->prog->insnsi;
-	int insn_cnt = env->prog->len;
-	u16 stack_depth = subprogs[cur_subprog].stack_depth;
-	u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
-	u16 stack_depth_extra = 0;
-
-	for (i = 0; i < insn_cnt; i++, insn++) {
-		struct bpf_loop_inline_state *inline_state =
-			&env->insn_aux_data[i + delta].loop_inline_state;
-
-		if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
-			struct bpf_prog *new_prog;
-
-			stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
-			new_prog = inline_bpf_loop(env,
-						   i + delta,
-						   -(stack_depth + stack_depth_extra),
-						   inline_state->callback_subprogno,
-						   &cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta     += cnt - 1;
-			env->prog  = new_prog;
-			insn       = new_prog->insnsi + i + delta;
-		}
-
-		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
-			subprogs[cur_subprog].stack_depth += stack_depth_extra;
-			cur_subprog++;
-			stack_depth = subprogs[cur_subprog].stack_depth;
-			stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
-			stack_depth_extra = 0;
-		}
-	}
-
-	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
-
-	return 0;
-}
-
-/* Remove unnecessary spill/fill pairs, members of fastcall pattern,
- * adjust subprograms stack depth when possible.
- */
-static int remove_fastcall_spills_fills(struct bpf_verifier_env *env)
-{
-	struct bpf_subprog_info *subprog = env->subprog_info;
-	struct bpf_insn_aux_data *aux = env->insn_aux_data;
-	struct bpf_insn *insn = env->prog->insnsi;
-	int insn_cnt = env->prog->len;
-	u32 spills_num;
-	bool modified = false;
-	int i, j;
-
-	for (i = 0; i < insn_cnt; i++, insn++) {
-		if (aux[i].fastcall_spills_num > 0) {
-			spills_num = aux[i].fastcall_spills_num;
-			/* NOPs would be removed by opt_remove_nops() */
-			for (j = 1; j <= spills_num; ++j) {
-				*(insn - j) = NOP;
-				*(insn + j) = NOP;
-			}
-			modified = true;
-		}
-		if ((subprog + 1)->start == i + 1) {
-			if (modified && !subprog->keep_fastcall_stack)
-				subprog->stack_depth = -subprog->fastcall_stack_off;
-			subprog++;
-			modified = false;
-		}
-	}
-
-	return 0;
-}
 
 static void free_states(struct bpf_verifier_env *env)
 {
@@ -26592,6 +23892,211 @@ exit:
 	return err;
 }
 
+/* replace a generic kfunc with a specialized version if necessary */
+static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx)
+{
+	struct bpf_prog *prog = env->prog;
+	bool seen_direct_write;
+	void *xdp_kfunc;
+	bool is_rdonly;
+	u32 func_id = desc->func_id;
+	u16 offset = desc->offset;
+	unsigned long addr = desc->addr;
+
+	if (offset) /* return if module BTF is used */
+		return 0;
+
+	if (bpf_dev_bound_kfunc_id(func_id)) {
+		xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
+		if (xdp_kfunc)
+			addr = (unsigned long)xdp_kfunc;
+		/* fallback to default kfunc when not supported by netdev */
+	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
+		seen_direct_write = env->seen_direct_write;
+		is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
+
+		if (is_rdonly)
+			addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
+
+		/* restore env->seen_direct_write to its original value, since
+		 * may_access_direct_pkt_data mutates it
+		 */
+		env->seen_direct_write = seen_direct_write;
+	} else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) {
+		if (bpf_lsm_has_d_inode_locked(prog))
+			addr = (unsigned long)bpf_set_dentry_xattr_locked;
+	} else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) {
+		if (bpf_lsm_has_d_inode_locked(prog))
+			addr = (unsigned long)bpf_remove_dentry_xattr_locked;
+	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
+		if (!env->insn_aux_data[insn_idx].non_sleepable)
+			addr = (unsigned long)bpf_dynptr_from_file_sleepable;
+	} else if (func_id == special_kfunc_list[KF_bpf_arena_alloc_pages]) {
+		if (env->insn_aux_data[insn_idx].non_sleepable)
+			addr = (unsigned long)bpf_arena_alloc_pages_non_sleepable;
+	} else if (func_id == special_kfunc_list[KF_bpf_arena_free_pages]) {
+		if (env->insn_aux_data[insn_idx].non_sleepable)
+			addr = (unsigned long)bpf_arena_free_pages_non_sleepable;
+	}
+	desc->addr = addr;
+	return 0;
+}
+
+static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
+					    u16 struct_meta_reg,
+					    u16 node_offset_reg,
+					    struct bpf_insn *insn,
+					    struct bpf_insn *insn_buf,
+					    int *cnt)
+{
+	struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
+	struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
+
+	insn_buf[0] = addr[0];
+	insn_buf[1] = addr[1];
+	insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
+	insn_buf[3] = *insn;
+	*cnt = 4;
+}
+
+int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+		     struct bpf_insn *insn_buf, int insn_idx, int *cnt)
+{
+	struct bpf_kfunc_desc *desc;
+	int err;
+
+	if (!insn->imm) {
+		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
+		return -EINVAL;
+	}
+
+	*cnt = 0;
+
+	/* insn->imm has the btf func_id. Replace it with an offset relative to
+	 * __bpf_call_base, unless the JIT needs to call functions that are
+	 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
+	 */
+	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
+	if (!desc) {
+		verifier_bug(env, "kernel function descriptor not found for func_id %u",
+			     insn->imm);
+		return -EFAULT;
+	}
+
+	err = specialize_kfunc(env, desc, insn_idx);
+	if (err)
+		return err;
+
+	if (!bpf_jit_supports_far_kfunc_call())
+		insn->imm = BPF_CALL_IMM(desc->addr);
+
+	if (is_bpf_obj_new_kfunc(desc->func_id) || is_bpf_percpu_obj_new_kfunc(desc->func_id)) {
+		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
+		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
+		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
+
+		if (is_bpf_percpu_obj_new_kfunc(desc->func_id) && kptr_struct_meta) {
+			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
+				     insn_idx);
+			return -EFAULT;
+		}
+
+		insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
+		insn_buf[1] = addr[0];
+		insn_buf[2] = addr[1];
+		insn_buf[3] = *insn;
+		*cnt = 4;
+	} else if (is_bpf_obj_drop_kfunc(desc->func_id) ||
+		   is_bpf_percpu_obj_drop_kfunc(desc->func_id) ||
+		   is_bpf_refcount_acquire_kfunc(desc->func_id)) {
+		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
+		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
+
+		if (is_bpf_percpu_obj_drop_kfunc(desc->func_id) && kptr_struct_meta) {
+			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
+				     insn_idx);
+			return -EFAULT;
+		}
+
+		if (is_bpf_refcount_acquire_kfunc(desc->func_id) && !kptr_struct_meta) {
+			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
+				     insn_idx);
+			return -EFAULT;
+		}
+
+		insn_buf[0] = addr[0];
+		insn_buf[1] = addr[1];
+		insn_buf[2] = *insn;
+		*cnt = 3;
+	} else if (is_bpf_list_push_kfunc(desc->func_id) ||
+		   is_bpf_rbtree_add_kfunc(desc->func_id)) {
+		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
+		int struct_meta_reg = BPF_REG_3;
+		int node_offset_reg = BPF_REG_4;
+
+		/* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
+		if (is_bpf_rbtree_add_kfunc(desc->func_id)) {
+			struct_meta_reg = BPF_REG_4;
+			node_offset_reg = BPF_REG_5;
+		}
+
+		if (!kptr_struct_meta) {
+			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
+				     insn_idx);
+			return -EFAULT;
+		}
+
+		__fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
+						node_offset_reg, insn, insn_buf, cnt);
+	} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
+		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
+		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
+		*cnt = 1;
+	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] &&
+		   env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
+		/*
+		 * inline the bpf_session_is_return() for fsession:
+		 *   bool bpf_session_is_return(void *ctx)
+		 *   {
+		 *       return (((u64 *)ctx)[-1] >> BPF_TRAMP_IS_RETURN_SHIFT) & 1;
+		 *   }
+		 */
+		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_IS_RETURN_SHIFT);
+		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
+		*cnt = 3;
+	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
+		   env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
+		/*
+		 * inline bpf_session_cookie() for fsession:
+		 *   __u64 *bpf_session_cookie(void *ctx)
+		 *   {
+		 *       u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_COOKIE_INDEX_SHIFT) & 0xFF;
+		 *       return &((u64 *)ctx)[-off];
+		 *   }
+		 */
+		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_COOKIE_INDEX_SHIFT);
+		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
+		insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
+		insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
+		insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
+		*cnt = 6;
+	}
+
+	if (env->insn_aux_data[insn_idx].arg_prog) {
+		u32 regno = env->insn_aux_data[insn_idx].arg_prog;
+		struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) };
+		int idx = *cnt;
+
+		insn_buf[idx++] = ld_addrs[0];
+		insn_buf[idx++] = ld_addrs[1];
+		insn_buf[idx++] = *insn;
+		*cnt = idx;
+	}
+	return 0;
+}
+
 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
 {
 	u64 start_time = ktime_get_ns();
@@ -26763,22 +24268,22 @@ skip_full_check:
 	 * allocate additional slots.
 	 */
 	if (ret == 0)
-		ret = remove_fastcall_spills_fills(env);
+		ret = bpf_remove_fastcall_spills_fills(env);
 
 	if (ret == 0)
 		ret = check_max_stack_depth(env);
 
 	/* instruction rewrites happen after this point */
 	if (ret == 0)
-		ret = optimize_bpf_loop(env);
+		ret = bpf_optimize_bpf_loop(env);
 
 	if (is_priv) {
 		if (ret == 0)
-			opt_hard_wire_dead_code_branches(env);
+			bpf_opt_hard_wire_dead_code_branches(env);
 		if (ret == 0)
-			ret = opt_remove_dead_code(env);
+			ret = bpf_opt_remove_dead_code(env);
 		if (ret == 0)
-			ret = opt_remove_nops(env);
+			ret = bpf_opt_remove_nops(env);
 	} else {
 		if (ret == 0)
 			sanitize_dead_code(env);
@@ -26786,22 +24291,22 @@ skip_full_check:
 
 	if (ret == 0)
 		/* program is valid, convert *(u32*)(ctx + off) accesses */
-		ret = convert_ctx_accesses(env);
+		ret = bpf_convert_ctx_accesses(env);
 
 	if (ret == 0)
-		ret = do_misc_fixups(env);
+		ret = bpf_do_misc_fixups(env);
 
 	/* do 32-bit optimization after insn patching has done so those patched
 	 * insns could be handled correctly.
 	 */
 	if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
-		ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
+		ret = bpf_opt_subreg_zext_lo32_rnd_hi32(env, attr);
 		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
 								     : false;
 	}
 
 	if (ret == 0)
-		ret = fixup_call_args(env);
+		ret = bpf_fixup_call_args(env);
 
 	env->verification_time = ktime_get_ns() - start_time;
 	print_verification_stats(env);
@@ -26883,7 +24388,7 @@ err_release_maps:
 err_unlock:
 	if (!is_priv)
 		mutex_unlock(&bpf_verifier_lock);
-	clear_insn_aux_data(env, 0, env->prog->len);
+	bpf_clear_insn_aux_data(env, 0, env->prog->len);
 	vfree(env->insn_aux_data);
 err_free_env:
 	bpf_stack_liveness_free(env);
-- 
cgit v1.2.3


From fc150cddeea77561fbc94ac8f02cc75b016b09dd Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Sun, 12 Apr 2026 08:29:31 -0700
Subject: bpf: Move compute_insn_live_regs() into liveness.c

verifier.c is huge. Move compute_insn_live_regs() into liveness.c.

Mechanical move. No functional changes.

Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/20260412152936.54262-3-alexei.starovoitov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |   2 +
 kernel/bpf/liveness.c        | 247 ++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c        | 250 +------------------------------------------
 3 files changed, 250 insertions(+), 249 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 4380ecad485b..e3f18667e030 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -1204,6 +1204,7 @@ int bpf_stack_liveness_init(struct bpf_verifier_env *env);
 void bpf_stack_liveness_free(struct bpf_verifier_env *env);
 int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st);
 bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi);
+int bpf_compute_live_registers(struct bpf_verifier_env *env);
 
 #define BPF_MAP_KEY_POISON	(1ULL << 63)
 #define BPF_MAP_KEY_SEEN	(1ULL << 62)
@@ -1234,6 +1235,7 @@ static inline u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
 }
 
 #define MAX_PACKET_OFF 0xffff
+#define CALLER_SAVED_REGS 6
 
 enum bpf_reg_arg_type {
 	SRC_OP,		/* register is used as source operand */
diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c
index 59d990237cbd..1fb4c511db5a 100644
--- a/kernel/bpf/liveness.c
+++ b/kernel/bpf/liveness.c
@@ -1953,3 +1953,250 @@ out:
 	kvfree(info);
 	return err;
 }
+
+/* Each field is a register bitmask */
+struct insn_live_regs {
+	u16 use;	/* registers read by instruction */
+	u16 def;	/* registers written by instruction */
+	u16 in;		/* registers that may be alive before instruction */
+	u16 out;	/* registers that may be alive after instruction */
+};
+
+/* Bitmask with 1s for all caller saved registers */
+#define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
+
+/* Compute info->{use,def} fields for the instruction */
+static void compute_insn_live_regs(struct bpf_verifier_env *env,
+				   struct bpf_insn *insn,
+				   struct insn_live_regs *info)
+{
+	struct bpf_call_summary cs;
+	u8 class = BPF_CLASS(insn->code);
+	u8 code = BPF_OP(insn->code);
+	u8 mode = BPF_MODE(insn->code);
+	u16 src = BIT(insn->src_reg);
+	u16 dst = BIT(insn->dst_reg);
+	u16 r0  = BIT(0);
+	u16 def = 0;
+	u16 use = 0xffff;
+
+	switch (class) {
+	case BPF_LD:
+		switch (mode) {
+		case BPF_IMM:
+			if (BPF_SIZE(insn->code) == BPF_DW) {
+				def = dst;
+				use = 0;
+			}
+			break;
+		case BPF_LD | BPF_ABS:
+		case BPF_LD | BPF_IND:
+			/* stick with defaults */
+			break;
+		}
+		break;
+	case BPF_LDX:
+		switch (mode) {
+		case BPF_MEM:
+		case BPF_MEMSX:
+			def = dst;
+			use = src;
+			break;
+		}
+		break;
+	case BPF_ST:
+		switch (mode) {
+		case BPF_MEM:
+			def = 0;
+			use = dst;
+			break;
+		}
+		break;
+	case BPF_STX:
+		switch (mode) {
+		case BPF_MEM:
+			def = 0;
+			use = dst | src;
+			break;
+		case BPF_ATOMIC:
+			switch (insn->imm) {
+			case BPF_CMPXCHG:
+				use = r0 | dst | src;
+				def = r0;
+				break;
+			case BPF_LOAD_ACQ:
+				def = dst;
+				use = src;
+				break;
+			case BPF_STORE_REL:
+				def = 0;
+				use = dst | src;
+				break;
+			default:
+				use = dst | src;
+				if (insn->imm & BPF_FETCH)
+					def = src;
+				else
+					def = 0;
+			}
+			break;
+		}
+		break;
+	case BPF_ALU:
+	case BPF_ALU64:
+		switch (code) {
+		case BPF_END:
+			use = dst;
+			def = dst;
+			break;
+		case BPF_MOV:
+			def = dst;
+			if (BPF_SRC(insn->code) == BPF_K)
+				use = 0;
+			else
+				use = src;
+			break;
+		default:
+			def = dst;
+			if (BPF_SRC(insn->code) == BPF_K)
+				use = dst;
+			else
+				use = dst | src;
+		}
+		break;
+	case BPF_JMP:
+	case BPF_JMP32:
+		switch (code) {
+		case BPF_JA:
+			def = 0;
+			if (BPF_SRC(insn->code) == BPF_X)
+				use = dst;
+			else
+				use = 0;
+			break;
+		case BPF_JCOND:
+			def = 0;
+			use = 0;
+			break;
+		case BPF_EXIT:
+			def = 0;
+			use = r0;
+			break;
+		case BPF_CALL:
+			def = ALL_CALLER_SAVED_REGS;
+			use = def & ~BIT(BPF_REG_0);
+			if (bpf_get_call_summary(env, insn, &cs))
+				use = GENMASK(cs.num_params, 1);
+			break;
+		default:
+			def = 0;
+			if (BPF_SRC(insn->code) == BPF_K)
+				use = dst;
+			else
+				use = dst | src;
+		}
+		break;
+	}
+
+	info->def = def;
+	info->use = use;
+}
+
+/* Compute may-live registers after each instruction in the program.
+ * The register is live after the instruction I if it is read by some
+ * instruction S following I during program execution and is not
+ * overwritten between I and S.
+ *
+ * Store result in env->insn_aux_data[i].live_regs.
+ */
+int bpf_compute_live_registers(struct bpf_verifier_env *env)
+{
+	struct bpf_insn_aux_data *insn_aux = env->insn_aux_data;
+	struct bpf_insn *insns = env->prog->insnsi;
+	struct insn_live_regs *state;
+	int insn_cnt = env->prog->len;
+	int err = 0, i, j;
+	bool changed;
+
+	/* Use the following algorithm:
+	 * - define the following:
+	 *   - I.use : a set of all registers read by instruction I;
+	 *   - I.def : a set of all registers written by instruction I;
+	 *   - I.in  : a set of all registers that may be alive before I execution;
+	 *   - I.out : a set of all registers that may be alive after I execution;
+	 *   - insn_successors(I): a set of instructions S that might immediately
+	 *                         follow I for some program execution;
+	 * - associate separate empty sets 'I.in' and 'I.out' with each instruction;
+	 * - visit each instruction in a postorder and update
+	 *   state[i].in, state[i].out as follows:
+	 *
+	 *       state[i].out = U [state[s].in for S in insn_successors(i)]
+	 *       state[i].in  = (state[i].out / state[i].def) U state[i].use
+	 *
+	 *   (where U stands for set union, / stands for set difference)
+	 * - repeat the computation while {in,out} fields changes for
+	 *   any instruction.
+	 */
+	state = kvzalloc_objs(*state, insn_cnt, GFP_KERNEL_ACCOUNT);
+	if (!state) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < insn_cnt; ++i)
+		compute_insn_live_regs(env, &insns[i], &state[i]);
+
+	/* Forward pass: resolve stack access through FP-derived pointers */
+	err = bpf_compute_subprog_arg_access(env);
+	if (err)
+		goto out;
+
+	changed = true;
+	while (changed) {
+		changed = false;
+		for (i = 0; i < env->cfg.cur_postorder; ++i) {
+			int insn_idx = env->cfg.insn_postorder[i];
+			struct insn_live_regs *live = &state[insn_idx];
+			struct bpf_iarray *succ;
+			u16 new_out = 0;
+			u16 new_in = 0;
+
+			succ = bpf_insn_successors(env, insn_idx);
+			for (int s = 0; s < succ->cnt; ++s)
+				new_out |= state[succ->items[s]].in;
+			new_in = (new_out & ~live->def) | live->use;
+			if (new_out != live->out || new_in != live->in) {
+				live->in = new_in;
+				live->out = new_out;
+				changed = true;
+			}
+		}
+	}
+
+	for (i = 0; i < insn_cnt; ++i)
+		insn_aux[i].live_regs_before = state[i].in;
+
+	if (env->log.level & BPF_LOG_LEVEL2) {
+		verbose(env, "Live regs before insn:\n");
+		for (i = 0; i < insn_cnt; ++i) {
+			if (env->insn_aux_data[i].scc)
+				verbose(env, "%3d ", env->insn_aux_data[i].scc);
+			else
+				verbose(env, "    ");
+			verbose(env, "%3d: ", i);
+			for (j = BPF_REG_0; j < BPF_REG_10; ++j)
+				if (insn_aux[i].live_regs_before & BIT(j))
+					verbose(env, "%d", j);
+				else
+					verbose(env, ".");
+			verbose(env, " ");
+			bpf_verbose_insn(env, &insns[i]);
+			if (bpf_is_ldimm64(&insns[i]))
+				i++;
+		}
+	}
+
+out:
+	kvfree(state);
+	return err;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 31e03aa6b070..11f0c5a050b3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2144,7 +2144,6 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 	return &elem->st;
 }
 
-#define CALLER_SAVED_REGS 6
 static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
@@ -23461,253 +23460,6 @@ static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr,
 	return 0;
 }
 
-/* Each field is a register bitmask */
-struct insn_live_regs {
-	u16 use;	/* registers read by instruction */
-	u16 def;	/* registers written by instruction */
-	u16 in;		/* registers that may be alive before instruction */
-	u16 out;	/* registers that may be alive after instruction */
-};
-
-/* Bitmask with 1s for all caller saved registers */
-#define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
-
-/* Compute info->{use,def} fields for the instruction */
-static void compute_insn_live_regs(struct bpf_verifier_env *env,
-				   struct bpf_insn *insn,
-				   struct insn_live_regs *info)
-{
-	struct bpf_call_summary cs;
-	u8 class = BPF_CLASS(insn->code);
-	u8 code = BPF_OP(insn->code);
-	u8 mode = BPF_MODE(insn->code);
-	u16 src = BIT(insn->src_reg);
-	u16 dst = BIT(insn->dst_reg);
-	u16 r0  = BIT(0);
-	u16 def = 0;
-	u16 use = 0xffff;
-
-	switch (class) {
-	case BPF_LD:
-		switch (mode) {
-		case BPF_IMM:
-			if (BPF_SIZE(insn->code) == BPF_DW) {
-				def = dst;
-				use = 0;
-			}
-			break;
-		case BPF_LD | BPF_ABS:
-		case BPF_LD | BPF_IND:
-			/* stick with defaults */
-			break;
-		}
-		break;
-	case BPF_LDX:
-		switch (mode) {
-		case BPF_MEM:
-		case BPF_MEMSX:
-			def = dst;
-			use = src;
-			break;
-		}
-		break;
-	case BPF_ST:
-		switch (mode) {
-		case BPF_MEM:
-			def = 0;
-			use = dst;
-			break;
-		}
-		break;
-	case BPF_STX:
-		switch (mode) {
-		case BPF_MEM:
-			def = 0;
-			use = dst | src;
-			break;
-		case BPF_ATOMIC:
-			switch (insn->imm) {
-			case BPF_CMPXCHG:
-				use = r0 | dst | src;
-				def = r0;
-				break;
-			case BPF_LOAD_ACQ:
-				def = dst;
-				use = src;
-				break;
-			case BPF_STORE_REL:
-				def = 0;
-				use = dst | src;
-				break;
-			default:
-				use = dst | src;
-				if (insn->imm & BPF_FETCH)
-					def = src;
-				else
-					def = 0;
-			}
-			break;
-		}
-		break;
-	case BPF_ALU:
-	case BPF_ALU64:
-		switch (code) {
-		case BPF_END:
-			use = dst;
-			def = dst;
-			break;
-		case BPF_MOV:
-			def = dst;
-			if (BPF_SRC(insn->code) == BPF_K)
-				use = 0;
-			else
-				use = src;
-			break;
-		default:
-			def = dst;
-			if (BPF_SRC(insn->code) == BPF_K)
-				use = dst;
-			else
-				use = dst | src;
-		}
-		break;
-	case BPF_JMP:
-	case BPF_JMP32:
-		switch (code) {
-		case BPF_JA:
-			def = 0;
-			if (BPF_SRC(insn->code) == BPF_X)
-				use = dst;
-			else
-				use = 0;
-			break;
-		case BPF_JCOND:
-			def = 0;
-			use = 0;
-			break;
-		case BPF_EXIT:
-			def = 0;
-			use = r0;
-			break;
-		case BPF_CALL:
-			def = ALL_CALLER_SAVED_REGS;
-			use = def & ~BIT(BPF_REG_0);
-			if (bpf_get_call_summary(env, insn, &cs))
-				use = GENMASK(cs.num_params, 1);
-			break;
-		default:
-			def = 0;
-			if (BPF_SRC(insn->code) == BPF_K)
-				use = dst;
-			else
-				use = dst | src;
-		}
-		break;
-	}
-
-	info->def = def;
-	info->use = use;
-}
-
-/* Compute may-live registers after each instruction in the program.
- * The register is live after the instruction I if it is read by some
- * instruction S following I during program execution and is not
- * overwritten between I and S.
- *
- * Store result in env->insn_aux_data[i].live_regs.
- */
-static int compute_live_registers(struct bpf_verifier_env *env)
-{
-	struct bpf_insn_aux_data *insn_aux = env->insn_aux_data;
-	struct bpf_insn *insns = env->prog->insnsi;
-	struct insn_live_regs *state;
-	int insn_cnt = env->prog->len;
-	int err = 0, i, j;
-	bool changed;
-
-	/* Use the following algorithm:
-	 * - define the following:
-	 *   - I.use : a set of all registers read by instruction I;
-	 *   - I.def : a set of all registers written by instruction I;
-	 *   - I.in  : a set of all registers that may be alive before I execution;
-	 *   - I.out : a set of all registers that may be alive after I execution;
-	 *   - insn_successors(I): a set of instructions S that might immediately
-	 *                         follow I for some program execution;
-	 * - associate separate empty sets 'I.in' and 'I.out' with each instruction;
-	 * - visit each instruction in a postorder and update
-	 *   state[i].in, state[i].out as follows:
-	 *
-	 *       state[i].out = U [state[s].in for S in insn_successors(i)]
-	 *       state[i].in  = (state[i].out / state[i].def) U state[i].use
-	 *
-	 *   (where U stands for set union, / stands for set difference)
-	 * - repeat the computation while {in,out} fields changes for
-	 *   any instruction.
-	 */
-	state = kvzalloc_objs(*state, insn_cnt, GFP_KERNEL_ACCOUNT);
-	if (!state) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	for (i = 0; i < insn_cnt; ++i)
-		compute_insn_live_regs(env, &insns[i], &state[i]);
-
-	/* Forward pass: resolve stack access through FP-derived pointers */
-	err = bpf_compute_subprog_arg_access(env);
-	if (err)
-		goto out;
-
-	changed = true;
-	while (changed) {
-		changed = false;
-		for (i = 0; i < env->cfg.cur_postorder; ++i) {
-			int insn_idx = env->cfg.insn_postorder[i];
-			struct insn_live_regs *live = &state[insn_idx];
-			struct bpf_iarray *succ;
-			u16 new_out = 0;
-			u16 new_in = 0;
-
-			succ = bpf_insn_successors(env, insn_idx);
-			for (int s = 0; s < succ->cnt; ++s)
-				new_out |= state[succ->items[s]].in;
-			new_in = (new_out & ~live->def) | live->use;
-			if (new_out != live->out || new_in != live->in) {
-				live->in = new_in;
-				live->out = new_out;
-				changed = true;
-			}
-		}
-	}
-
-	for (i = 0; i < insn_cnt; ++i)
-		insn_aux[i].live_regs_before = state[i].in;
-
-	if (env->log.level & BPF_LOG_LEVEL2) {
-		verbose(env, "Live regs before insn:\n");
-		for (i = 0; i < insn_cnt; ++i) {
-			if (env->insn_aux_data[i].scc)
-				verbose(env, "%3d ", env->insn_aux_data[i].scc);
-			else
-				verbose(env, "    ");
-			verbose(env, "%3d: ", i);
-			for (j = BPF_REG_0; j < BPF_REG_10; ++j)
-				if (insn_aux[i].live_regs_before & BIT(j))
-					verbose(env, "%d", j);
-				else
-					verbose(env, ".");
-			verbose(env, " ");
-			bpf_verbose_insn(env, &insns[i]);
-			if (bpf_is_ldimm64(&insns[i]))
-				i++;
-		}
-	}
-
-out:
-	kvfree(state);
-	return err;
-}
-
 /*
  * Compute strongly connected components (SCCs) on the CFG.
  * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc.
@@ -24247,7 +23999,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (ret < 0)
 		goto skip_full_check;
 
-	ret = compute_live_registers(env);
+	ret = bpf_compute_live_registers(env);
 	if (ret < 0)
 		goto skip_full_check;
 
-- 
cgit v1.2.3


From f8a8faceab9953ed074cd4125b31cc6a562237d8 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Sun, 12 Apr 2026 08:29:32 -0700
Subject: bpf: Move check_cfg() into cfg.c

verifier.c is huge. Move check_cfg(), compute_postorder(),
compute_scc() into cfg.c

Mechanical move. No functional changes.

Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/20260412152936.54262-4-alexei.starovoitov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |  115 ++++-
 kernel/bpf/Makefile          |    2 +-
 kernel/bpf/cfg.c             |  872 +++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c        | 1026 ++----------------------------------------
 4 files changed, 1018 insertions(+), 997 deletions(-)
 create mode 100644 kernel/bpf/cfg.c

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index e3f18667e030..aa92a597bc5c 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -983,6 +983,41 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
 		bpf_log(&env->log, "verifier bug: " fmt "\n", ##args);				\
 	})
 
+static inline void mark_prune_point(struct bpf_verifier_env *env, int idx)
+{
+	env->insn_aux_data[idx].prune_point = true;
+}
+
+static inline bool bpf_is_prune_point(struct bpf_verifier_env *env, int insn_idx)
+{
+	return env->insn_aux_data[insn_idx].prune_point;
+}
+
+static inline void mark_force_checkpoint(struct bpf_verifier_env *env, int idx)
+{
+	env->insn_aux_data[idx].force_checkpoint = true;
+}
+
+static inline bool bpf_is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
+{
+	return env->insn_aux_data[insn_idx].force_checkpoint;
+}
+
+static inline void mark_calls_callback(struct bpf_verifier_env *env, int idx)
+{
+	env->insn_aux_data[idx].calls_callback = true;
+}
+
+static inline bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx)
+{
+	return env->insn_aux_data[insn_idx].calls_callback;
+}
+
+static inline void mark_jmp_point(struct bpf_verifier_env *env, int idx)
+{
+	env->insn_aux_data[idx].jmp_point = true;
+}
+
 static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *cur = env->cur_state;
@@ -1179,13 +1214,91 @@ struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *en
 int bpf_jmp_offset(struct bpf_insn *insn);
 struct bpf_iarray *bpf_insn_successors(struct bpf_verifier_env *env, u32 idx);
 void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask);
-bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx);
 bool bpf_subprog_is_global(const struct bpf_verifier_env *env, int subprog);
 
 int bpf_find_subprog(struct bpf_verifier_env *env, int off);
 int bpf_compute_const_regs(struct bpf_verifier_env *env);
 int bpf_prune_dead_branches(struct bpf_verifier_env *env);
+int bpf_check_cfg(struct bpf_verifier_env *env);
 int bpf_compute_postorder(struct bpf_verifier_env *env);
+int bpf_compute_scc(struct bpf_verifier_env *env);
+
+struct bpf_map_desc {
+	struct bpf_map *ptr;
+	int uid;
+};
+
+struct bpf_kfunc_call_arg_meta {
+	/* In parameters */
+	struct btf *btf;
+	u32 func_id;
+	u32 kfunc_flags;
+	const struct btf_type *func_proto;
+	const char *func_name;
+	/* Out parameters */
+	u32 ref_obj_id;
+	u8 release_regno;
+	bool r0_rdonly;
+	u32 ret_btf_id;
+	u64 r0_size;
+	u32 subprogno;
+	struct {
+		u64 value;
+		bool found;
+	} arg_constant;
+
+	/* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
+	 * generally to pass info about user-defined local kptr types to later
+	 * verification logic
+	 *   bpf_obj_drop/bpf_percpu_obj_drop
+	 *     Record the local kptr type to be drop'd
+	 *   bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
+	 *     Record the local kptr type to be refcount_incr'd and use
+	 *     arg_owning_ref to determine whether refcount_acquire should be
+	 *     fallible
+	 */
+	struct btf *arg_btf;
+	u32 arg_btf_id;
+	bool arg_owning_ref;
+	bool arg_prog;
+
+	struct {
+		struct btf_field *field;
+	} arg_list_head;
+	struct {
+		struct btf_field *field;
+	} arg_rbtree_root;
+	struct {
+		enum bpf_dynptr_type type;
+		u32 id;
+		u32 ref_obj_id;
+	} initialized_dynptr;
+	struct {
+		u8 spi;
+		u8 frameno;
+	} iter;
+	struct bpf_map_desc map;
+	u64 mem_size;
+};
+
+int bpf_get_helper_proto(struct bpf_verifier_env *env, int func_id,
+			 const struct bpf_func_proto **ptr);
+int bpf_fetch_kfunc_arg_meta(struct bpf_verifier_env *env, s32 func_id,
+			     s16 offset, struct bpf_kfunc_call_arg_meta *meta);
+bool bpf_is_async_callback_calling_insn(struct bpf_insn *insn);
+bool bpf_is_sync_callback_calling_insn(struct bpf_insn *insn);
+static inline bool bpf_is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta)
+{
+	return meta->kfunc_flags & KF_ITER_NEXT;
+}
+
+static inline bool bpf_is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
+{
+	return meta->kfunc_flags & KF_SLEEPABLE;
+}
+bool bpf_is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta);
+struct bpf_iarray *bpf_iarray_realloc(struct bpf_iarray *old, size_t n_elem);
+int bpf_copy_insn_array_uniq(struct bpf_map *map, u32 start, u32 end, u32 *off);
 bool bpf_insn_is_cond_jump(u8 code);
 bool bpf_is_may_goto_insn(struct bpf_insn *insn);
 
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 7c1eeee87fda..8649ee9651a9 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_insn_array.o
 obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
-obj-$(CONFIG_BPF_SYSCALL) += fixups.o
+obj-$(CONFIG_BPF_SYSCALL) += fixups.o cfg.o
 obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o
 obj-$(CONFIG_BPF_JIT) += trampoline.o
diff --git a/kernel/bpf/cfg.c b/kernel/bpf/cfg.c
new file mode 100644
index 000000000000..998f42a8189a
--- /dev/null
+++ b/kernel/bpf/cfg.c
@@ -0,0 +1,872 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/filter.h>
+#include <linux/sort.h>
+
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
+/* non-recursive DFS pseudo code
+ * 1  procedure DFS-iterative(G,v):
+ * 2      label v as discovered
+ * 3      let S be a stack
+ * 4      S.push(v)
+ * 5      while S is not empty
+ * 6            t <- S.peek()
+ * 7            if t is what we're looking for:
+ * 8                return t
+ * 9            for all edges e in G.adjacentEdges(t) do
+ * 10               if edge e is already labelled
+ * 11                   continue with the next edge
+ * 12               w <- G.adjacentVertex(t,e)
+ * 13               if vertex w is not discovered and not explored
+ * 14                   label e as tree-edge
+ * 15                   label w as discovered
+ * 16                   S.push(w)
+ * 17                   continue at 5
+ * 18               else if vertex w is discovered
+ * 19                   label e as back-edge
+ * 20               else
+ * 21                   // vertex w is explored
+ * 22                   label e as forward- or cross-edge
+ * 23           label t as explored
+ * 24           S.pop()
+ *
+ * convention:
+ * 0x10 - discovered
+ * 0x11 - discovered and fall-through edge labelled
+ * 0x12 - discovered and fall-through and branch edges labelled
+ * 0x20 - explored
+ */
+
+enum {
+	DISCOVERED = 0x10,
+	EXPLORED = 0x20,
+	FALLTHROUGH = 1,
+	BRANCH = 2,
+};
+
+
+static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off)
+{
+	struct bpf_subprog_info *subprog;
+
+	subprog = bpf_find_containing_subprog(env, off);
+	subprog->changes_pkt_data = true;
+}
+
+static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off)
+{
+	struct bpf_subprog_info *subprog;
+
+	subprog = bpf_find_containing_subprog(env, off);
+	subprog->might_sleep = true;
+}
+
+/* 't' is an index of a call-site.
+ * 'w' is a callee entry point.
+ * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED.
+ * Rely on DFS traversal order and absence of recursive calls to guarantee that
+ * callee's change_pkt_data marks would be correct at that moment.
+ */
+static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w)
+{
+	struct bpf_subprog_info *caller, *callee;
+
+	caller = bpf_find_containing_subprog(env, t);
+	callee = bpf_find_containing_subprog(env, w);
+	caller->changes_pkt_data |= callee->changes_pkt_data;
+	caller->might_sleep |= callee->might_sleep;
+}
+
+enum {
+	DONE_EXPLORING = 0,
+	KEEP_EXPLORING = 1,
+};
+
+/* t, w, e - match pseudo-code above:
+ * t - index of current instruction
+ * w - next instruction
+ * e - edge
+ */
+static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
+{
+	int *insn_stack = env->cfg.insn_stack;
+	int *insn_state = env->cfg.insn_state;
+
+	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
+		return DONE_EXPLORING;
+
+	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
+		return DONE_EXPLORING;
+
+	if (w < 0 || w >= env->prog->len) {
+		verbose_linfo(env, t, "%d: ", t);
+		verbose(env, "jump out of range from insn %d to %d\n", t, w);
+		return -EINVAL;
+	}
+
+	if (e == BRANCH) {
+		/* mark branch target for state pruning */
+		mark_prune_point(env, w);
+		mark_jmp_point(env, w);
+	}
+
+	if (insn_state[w] == 0) {
+		/* tree-edge */
+		insn_state[t] = DISCOVERED | e;
+		insn_state[w] = DISCOVERED;
+		if (env->cfg.cur_stack >= env->prog->len)
+			return -E2BIG;
+		insn_stack[env->cfg.cur_stack++] = w;
+		return KEEP_EXPLORING;
+	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
+		if (env->bpf_capable)
+			return DONE_EXPLORING;
+		verbose_linfo(env, t, "%d: ", t);
+		verbose_linfo(env, w, "%d: ", w);
+		verbose(env, "back-edge from insn %d to %d\n", t, w);
+		return -EINVAL;
+	} else if (insn_state[w] == EXPLORED) {
+		/* forward- or cross-edge */
+		insn_state[t] = DISCOVERED | e;
+	} else {
+		verifier_bug(env, "insn state internal bug");
+		return -EFAULT;
+	}
+	return DONE_EXPLORING;
+}
+
+static int visit_func_call_insn(int t, struct bpf_insn *insns,
+				struct bpf_verifier_env *env,
+				bool visit_callee)
+{
+	int ret, insn_sz;
+	int w;
+
+	insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
+	ret = push_insn(t, t + insn_sz, FALLTHROUGH, env);
+	if (ret)
+		return ret;
+
+	mark_prune_point(env, t + insn_sz);
+	/* when we exit from subprog, we need to record non-linear history */
+	mark_jmp_point(env, t + insn_sz);
+
+	if (visit_callee) {
+		w = t + insns[t].imm + 1;
+		mark_prune_point(env, t);
+		merge_callee_effects(env, t, w);
+		ret = push_insn(t, w, BRANCH, env);
+	}
+	return ret;
+}
+
+struct bpf_iarray *bpf_iarray_realloc(struct bpf_iarray *old, size_t n_elem)
+{
+	size_t new_size = sizeof(struct bpf_iarray) + n_elem * sizeof(old->items[0]);
+	struct bpf_iarray *new;
+
+	new = kvrealloc(old, new_size, GFP_KERNEL_ACCOUNT);
+	if (!new) {
+		/* this is what callers always want, so simplify the call site */
+		kvfree(old);
+		return NULL;
+	}
+
+	new->cnt = n_elem;
+	return new;
+}
+
+static int copy_insn_array(struct bpf_map *map, u32 start, u32 end, u32 *items)
+{
+	struct bpf_insn_array_value *value;
+	u32 i;
+
+	for (i = start; i <= end; i++) {
+		value = map->ops->map_lookup_elem(map, &i);
+		/*
+		 * map_lookup_elem of an array map will never return an error,
+		 * but not checking it makes some static analysers to worry
+		 */
+		if (IS_ERR(value))
+			return PTR_ERR(value);
+		else if (!value)
+			return -EINVAL;
+		items[i - start] = value->xlated_off;
+	}
+	return 0;
+}
+
+static int cmp_ptr_to_u32(const void *a, const void *b)
+{
+	return *(u32 *)a - *(u32 *)b;
+}
+
+static int sort_insn_array_uniq(u32 *items, int cnt)
+{
+	int unique = 1;
+	int i;
+
+	sort(items, cnt, sizeof(items[0]), cmp_ptr_to_u32, NULL);
+
+	for (i = 1; i < cnt; i++)
+		if (items[i] != items[unique - 1])
+			items[unique++] = items[i];
+
+	return unique;
+}
+
+/*
+ * sort_unique({map[start], ..., map[end]}) into off
+ */
+int bpf_copy_insn_array_uniq(struct bpf_map *map, u32 start, u32 end, u32 *off)
+{
+	u32 n = end - start + 1;
+	int err;
+
+	err = copy_insn_array(map, start, end, off);
+	if (err)
+		return err;
+
+	return sort_insn_array_uniq(off, n);
+}
+
+/*
+ * Copy all unique offsets from the map
+ */
+static struct bpf_iarray *jt_from_map(struct bpf_map *map)
+{
+	struct bpf_iarray *jt;
+	int err;
+	int n;
+
+	jt = bpf_iarray_realloc(NULL, map->max_entries);
+	if (!jt)
+		return ERR_PTR(-ENOMEM);
+
+	n = bpf_copy_insn_array_uniq(map, 0, map->max_entries - 1, jt->items);
+	if (n < 0) {
+		err = n;
+		goto err_free;
+	}
+	if (n == 0) {
+		err = -EINVAL;
+		goto err_free;
+	}
+	jt->cnt = n;
+	return jt;
+
+err_free:
+	kvfree(jt);
+	return ERR_PTR(err);
+}
+
+/*
+ * Find and collect all maps which fit in the subprog. Return the result as one
+ * combined jump table in jt->items (allocated with kvcalloc)
+ */
+static struct bpf_iarray *jt_from_subprog(struct bpf_verifier_env *env,
+					  int subprog_start, int subprog_end)
+{
+	struct bpf_iarray *jt = NULL;
+	struct bpf_map *map;
+	struct bpf_iarray *jt_cur;
+	int i;
+
+	for (i = 0; i < env->insn_array_map_cnt; i++) {
+		/*
+		 * TODO (when needed): collect only jump tables, not static keys
+		 * or maps for indirect calls
+		 */
+		map = env->insn_array_maps[i];
+
+		jt_cur = jt_from_map(map);
+		if (IS_ERR(jt_cur)) {
+			kvfree(jt);
+			return jt_cur;
+		}
+
+		/*
+		 * This is enough to check one element. The full table is
+		 * checked to fit inside the subprog later in create_jt()
+		 */
+		if (jt_cur->items[0] >= subprog_start && jt_cur->items[0] < subprog_end) {
+			u32 old_cnt = jt ? jt->cnt : 0;
+			jt = bpf_iarray_realloc(jt, old_cnt + jt_cur->cnt);
+			if (!jt) {
+				kvfree(jt_cur);
+				return ERR_PTR(-ENOMEM);
+			}
+			memcpy(jt->items + old_cnt, jt_cur->items, jt_cur->cnt << 2);
+		}
+
+		kvfree(jt_cur);
+	}
+
+	if (!jt) {
+		verbose(env, "no jump tables found for subprog starting at %u\n", subprog_start);
+		return ERR_PTR(-EINVAL);
+	}
+
+	jt->cnt = sort_insn_array_uniq(jt->items, jt->cnt);
+	return jt;
+}
+
+static struct bpf_iarray *
+create_jt(int t, struct bpf_verifier_env *env)
+{
+	struct bpf_subprog_info *subprog;
+	int subprog_start, subprog_end;
+	struct bpf_iarray *jt;
+	int i;
+
+	subprog = bpf_find_containing_subprog(env, t);
+	subprog_start = subprog->start;
+	subprog_end = (subprog + 1)->start;
+	jt = jt_from_subprog(env, subprog_start, subprog_end);
+	if (IS_ERR(jt))
+		return jt;
+
+	/* Check that the every element of the jump table fits within the given subprogram */
+	for (i = 0; i < jt->cnt; i++) {
+		if (jt->items[i] < subprog_start || jt->items[i] >= subprog_end) {
+			verbose(env, "jump table for insn %d points outside of the subprog [%u,%u]\n",
+					t, subprog_start, subprog_end);
+			kvfree(jt);
+			return ERR_PTR(-EINVAL);
+		}
+	}
+
+	return jt;
+}
+
+/* "conditional jump with N edges" */
+static int visit_gotox_insn(int t, struct bpf_verifier_env *env)
+{
+	int *insn_stack = env->cfg.insn_stack;
+	int *insn_state = env->cfg.insn_state;
+	bool keep_exploring = false;
+	struct bpf_iarray *jt;
+	int i, w;
+
+	jt = env->insn_aux_data[t].jt;
+	if (!jt) {
+		jt = create_jt(t, env);
+		if (IS_ERR(jt))
+			return PTR_ERR(jt);
+
+		env->insn_aux_data[t].jt = jt;
+	}
+
+	mark_prune_point(env, t);
+	for (i = 0; i < jt->cnt; i++) {
+		w = jt->items[i];
+		if (w < 0 || w >= env->prog->len) {
+			verbose(env, "indirect jump out of range from insn %d to %d\n", t, w);
+			return -EINVAL;
+		}
+
+		mark_jmp_point(env, w);
+
+		/* EXPLORED || DISCOVERED */
+		if (insn_state[w])
+			continue;
+
+		if (env->cfg.cur_stack >= env->prog->len)
+			return -E2BIG;
+
+		insn_stack[env->cfg.cur_stack++] = w;
+		insn_state[w] |= DISCOVERED;
+		keep_exploring = true;
+	}
+
+	return keep_exploring ? KEEP_EXPLORING : DONE_EXPLORING;
+}
+
+/*
+ * Instructions that can abnormally return from a subprog (tail_call
+ * upon success, ld_{abs,ind} upon load failure) have a hidden exit
+ * that the verifier must account for.
+ */
+static int visit_abnormal_return_insn(struct bpf_verifier_env *env, int t)
+{
+	struct bpf_subprog_info *subprog;
+	struct bpf_iarray *jt;
+
+	if (env->insn_aux_data[t].jt)
+		return 0;
+
+	jt = bpf_iarray_realloc(NULL, 2);
+	if (!jt)
+		return -ENOMEM;
+
+	subprog = bpf_find_containing_subprog(env, t);
+	jt->items[0] = t + 1;
+	jt->items[1] = subprog->exit_idx;
+	env->insn_aux_data[t].jt = jt;
+	return 0;
+}
+
+/* Visits the instruction at index t and returns one of the following:
+ *  < 0 - an error occurred
+ *  DONE_EXPLORING - the instruction was fully explored
+ *  KEEP_EXPLORING - there is still work to be done before it is fully explored
+ */
+static int visit_insn(int t, struct bpf_verifier_env *env)
+{
+	struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
+	int ret, off, insn_sz;
+
+	if (bpf_pseudo_func(insn))
+		return visit_func_call_insn(t, insns, env, true);
+
+	/* All non-branch instructions have a single fall-through edge. */
+	if (BPF_CLASS(insn->code) != BPF_JMP &&
+	    BPF_CLASS(insn->code) != BPF_JMP32) {
+		if (BPF_CLASS(insn->code) == BPF_LD &&
+		    (BPF_MODE(insn->code) == BPF_ABS ||
+		     BPF_MODE(insn->code) == BPF_IND)) {
+			ret = visit_abnormal_return_insn(env, t);
+			if (ret)
+				return ret;
+		}
+		insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
+		return push_insn(t, t + insn_sz, FALLTHROUGH, env);
+	}
+
+	switch (BPF_OP(insn->code)) {
+	case BPF_EXIT:
+		return DONE_EXPLORING;
+
+	case BPF_CALL:
+		if (bpf_is_async_callback_calling_insn(insn))
+			/* Mark this call insn as a prune point to trigger
+			 * is_state_visited() check before call itself is
+			 * processed by __check_func_call(). Otherwise new
+			 * async state will be pushed for further exploration.
+			 */
+			mark_prune_point(env, t);
+		/* For functions that invoke callbacks it is not known how many times
+		 * callback would be called. Verifier models callback calling functions
+		 * by repeatedly visiting callback bodies and returning to origin call
+		 * instruction.
+		 * In order to stop such iteration verifier needs to identify when a
+		 * state identical some state from a previous iteration is reached.
+		 * Check below forces creation of checkpoint before callback calling
+		 * instruction to allow search for such identical states.
+		 */
+		if (bpf_is_sync_callback_calling_insn(insn)) {
+			mark_calls_callback(env, t);
+			mark_force_checkpoint(env, t);
+			mark_prune_point(env, t);
+			mark_jmp_point(env, t);
+		}
+		if (bpf_helper_call(insn)) {
+			const struct bpf_func_proto *fp;
+
+			ret = bpf_get_helper_proto(env, insn->imm, &fp);
+			/* If called in a non-sleepable context program will be
+			 * rejected anyway, so we should end up with precise
+			 * sleepable marks on subprogs, except for dead code
+			 * elimination.
+			 */
+			if (ret == 0 && fp->might_sleep)
+				mark_subprog_might_sleep(env, t);
+			if (bpf_helper_changes_pkt_data(insn->imm))
+				mark_subprog_changes_pkt_data(env, t);
+			if (insn->imm == BPF_FUNC_tail_call) {
+				ret = visit_abnormal_return_insn(env, t);
+				if (ret)
+					return ret;
+			}
+		} else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+			struct bpf_kfunc_call_arg_meta meta;
+
+			ret = bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
+			if (ret == 0 && bpf_is_iter_next_kfunc(&meta)) {
+				mark_prune_point(env, t);
+				/* Checking and saving state checkpoints at iter_next() call
+				 * is crucial for fast convergence of open-coded iterator loop
+				 * logic, so we need to force it. If we don't do that,
+				 * is_state_visited() might skip saving a checkpoint, causing
+				 * unnecessarily long sequence of not checkpointed
+				 * instructions and jumps, leading to exhaustion of jump
+				 * history buffer, and potentially other undesired outcomes.
+				 * It is expected that with correct open-coded iterators
+				 * convergence will happen quickly, so we don't run a risk of
+				 * exhausting memory.
+				 */
+				mark_force_checkpoint(env, t);
+			}
+			/* Same as helpers, if called in a non-sleepable context
+			 * program will be rejected anyway, so we should end up
+			 * with precise sleepable marks on subprogs, except for
+			 * dead code elimination.
+			 */
+			if (ret == 0 && bpf_is_kfunc_sleepable(&meta))
+				mark_subprog_might_sleep(env, t);
+			if (ret == 0 && bpf_is_kfunc_pkt_changing(&meta))
+				mark_subprog_changes_pkt_data(env, t);
+		}
+		return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
+
+	case BPF_JA:
+		if (BPF_SRC(insn->code) == BPF_X)
+			return visit_gotox_insn(t, env);
+
+		if (BPF_CLASS(insn->code) == BPF_JMP)
+			off = insn->off;
+		else
+			off = insn->imm;
+
+		/* unconditional jump with single edge */
+		ret = push_insn(t, t + off + 1, FALLTHROUGH, env);
+		if (ret)
+			return ret;
+
+		mark_prune_point(env, t + off + 1);
+		mark_jmp_point(env, t + off + 1);
+
+		return ret;
+
+	default:
+		/* conditional jump with two edges */
+		mark_prune_point(env, t);
+		if (bpf_is_may_goto_insn(insn))
+			mark_force_checkpoint(env, t);
+
+		ret = push_insn(t, t + 1, FALLTHROUGH, env);
+		if (ret)
+			return ret;
+
+		return push_insn(t, t + insn->off + 1, BRANCH, env);
+	}
+}
+
+/* non-recursive depth-first-search to detect loops in BPF program
+ * loop == back-edge in directed graph
+ */
+int bpf_check_cfg(struct bpf_verifier_env *env)
+{
+	int insn_cnt = env->prog->len;
+	int *insn_stack, *insn_state;
+	int ex_insn_beg, i, ret = 0;
+
+	insn_state = env->cfg.insn_state = kvzalloc_objs(int, insn_cnt,
+							 GFP_KERNEL_ACCOUNT);
+	if (!insn_state)
+		return -ENOMEM;
+
+	insn_stack = env->cfg.insn_stack = kvzalloc_objs(int, insn_cnt,
+							 GFP_KERNEL_ACCOUNT);
+	if (!insn_stack) {
+		kvfree(insn_state);
+		return -ENOMEM;
+	}
+
+	ex_insn_beg = env->exception_callback_subprog
+		      ? env->subprog_info[env->exception_callback_subprog].start
+		      : 0;
+
+	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
+	insn_stack[0] = 0; /* 0 is the first instruction */
+	env->cfg.cur_stack = 1;
+
+walk_cfg:
+	while (env->cfg.cur_stack > 0) {
+		int t = insn_stack[env->cfg.cur_stack - 1];
+
+		ret = visit_insn(t, env);
+		switch (ret) {
+		case DONE_EXPLORING:
+			insn_state[t] = EXPLORED;
+			env->cfg.cur_stack--;
+			break;
+		case KEEP_EXPLORING:
+			break;
+		default:
+			if (ret > 0) {
+				verifier_bug(env, "visit_insn internal bug");
+				ret = -EFAULT;
+			}
+			goto err_free;
+		}
+	}
+
+	if (env->cfg.cur_stack < 0) {
+		verifier_bug(env, "pop stack internal bug");
+		ret = -EFAULT;
+		goto err_free;
+	}
+
+	if (ex_insn_beg && insn_state[ex_insn_beg] != EXPLORED) {
+		insn_state[ex_insn_beg] = DISCOVERED;
+		insn_stack[0] = ex_insn_beg;
+		env->cfg.cur_stack = 1;
+		goto walk_cfg;
+	}
+
+	for (i = 0; i < insn_cnt; i++) {
+		struct bpf_insn *insn = &env->prog->insnsi[i];
+
+		if (insn_state[i] != EXPLORED) {
+			verbose(env, "unreachable insn %d\n", i);
+			ret = -EINVAL;
+			goto err_free;
+		}
+		if (bpf_is_ldimm64(insn)) {
+			if (insn_state[i + 1] != 0) {
+				verbose(env, "jump into the middle of ldimm64 insn %d\n", i);
+				ret = -EINVAL;
+				goto err_free;
+			}
+			i++; /* skip second half of ldimm64 */
+		}
+	}
+	ret = 0; /* cfg looks good */
+	env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data;
+	env->prog->aux->might_sleep = env->subprog_info[0].might_sleep;
+
+err_free:
+	kvfree(insn_state);
+	kvfree(insn_stack);
+	env->cfg.insn_state = env->cfg.insn_stack = NULL;
+	return ret;
+}
+
+/*
+ * For each subprogram 'i' fill array env->cfg.insn_subprogram sub-range
+ * [env->subprog_info[i].postorder_start, env->subprog_info[i+1].postorder_start)
+ * with indices of 'i' instructions in postorder.
+ */
+int bpf_compute_postorder(struct bpf_verifier_env *env)
+{
+	u32 cur_postorder, i, top, stack_sz, s;
+	int *stack = NULL, *postorder = NULL, *state = NULL;
+	struct bpf_iarray *succ;
+
+	postorder = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
+	state = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
+	stack = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
+	if (!postorder || !state || !stack) {
+		kvfree(postorder);
+		kvfree(state);
+		kvfree(stack);
+		return -ENOMEM;
+	}
+	cur_postorder = 0;
+	for (i = 0; i < env->subprog_cnt; i++) {
+		env->subprog_info[i].postorder_start = cur_postorder;
+		stack[0] = env->subprog_info[i].start;
+		stack_sz = 1;
+		do {
+			top = stack[stack_sz - 1];
+			state[top] |= DISCOVERED;
+			if (state[top] & EXPLORED) {
+				postorder[cur_postorder++] = top;
+				stack_sz--;
+				continue;
+			}
+			succ = bpf_insn_successors(env, top);
+			for (s = 0; s < succ->cnt; ++s) {
+				if (!state[succ->items[s]]) {
+					stack[stack_sz++] = succ->items[s];
+					state[succ->items[s]] |= DISCOVERED;
+				}
+			}
+			state[top] |= EXPLORED;
+		} while (stack_sz);
+	}
+	env->subprog_info[i].postorder_start = cur_postorder;
+	env->cfg.insn_postorder = postorder;
+	env->cfg.cur_postorder = cur_postorder;
+	kvfree(stack);
+	kvfree(state);
+	return 0;
+}
+
+/*
+ * Compute strongly connected components (SCCs) on the CFG.
+ * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc.
+ * If instruction is a sole member of its SCC and there are no self edges,
+ * assign it SCC number of zero.
+ * Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation.
+ */
+int bpf_compute_scc(struct bpf_verifier_env *env)
+{
+	const u32 NOT_ON_STACK = U32_MAX;
+
+	struct bpf_insn_aux_data *aux = env->insn_aux_data;
+	const u32 insn_cnt = env->prog->len;
+	int stack_sz, dfs_sz, err = 0;
+	u32 *stack, *pre, *low, *dfs;
+	u32 i, j, t, w;
+	u32 next_preorder_num;
+	u32 next_scc_id;
+	bool assign_scc;
+	struct bpf_iarray *succ;
+
+	next_preorder_num = 1;
+	next_scc_id = 1;
+	/*
+	 * - 'stack' accumulates vertices in DFS order, see invariant comment below;
+	 * - 'pre[t] == p' => preorder number of vertex 't' is 'p';
+	 * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n';
+	 * - 'dfs' DFS traversal stack, used to emulate explicit recursion.
+	 */
+	stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
+	pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
+	low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
+	dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL_ACCOUNT);
+	if (!stack || !pre || !low || !dfs) {
+		err = -ENOMEM;
+		goto exit;
+	}
+	/*
+	 * References:
+	 * [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms"
+	 * [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components"
+	 *
+	 * The algorithm maintains the following invariant:
+	 * - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]';
+	 * - then, vertex 'u' remains on stack while vertex 'v' is on stack.
+	 *
+	 * Consequently:
+	 * - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u',
+	 *   such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack,
+	 *   and thus there is an SCC (loop) containing both 'u' and 'v'.
+	 * - If 'low[v] == pre[v]', loops containing 'v' have been explored,
+	 *   and 'v' can be considered the root of some SCC.
+	 *
+	 * Here is a pseudo-code for an explicitly recursive version of the algorithm:
+	 *
+	 *    NOT_ON_STACK = insn_cnt + 1
+	 *    pre = [0] * insn_cnt
+	 *    low = [0] * insn_cnt
+	 *    scc = [0] * insn_cnt
+	 *    stack = []
+	 *
+	 *    next_preorder_num = 1
+	 *    next_scc_id = 1
+	 *
+	 *    def recur(w):
+	 *        nonlocal next_preorder_num
+	 *        nonlocal next_scc_id
+	 *
+	 *        pre[w] = next_preorder_num
+	 *        low[w] = next_preorder_num
+	 *        next_preorder_num += 1
+	 *        stack.append(w)
+	 *        for s in successors(w):
+	 *            # Note: for classic algorithm the block below should look as:
+	 *            #
+	 *            # if pre[s] == 0:
+	 *            #     recur(s)
+	 *            #     low[w] = min(low[w], low[s])
+	 *            # elif low[s] != NOT_ON_STACK:
+	 *            #     low[w] = min(low[w], pre[s])
+	 *            #
+	 *            # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])'
+	 *            # does not break the invariant and makes iterative version of the algorithm
+	 *            # simpler. See 'Algorithm #3' from [2].
+	 *
+	 *            # 's' not yet visited
+	 *            if pre[s] == 0:
+	 *                recur(s)
+	 *            # if 's' is on stack, pick lowest reachable preorder number from it;
+	 *            # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]',
+	 *            # so 'min' would be a noop.
+	 *            low[w] = min(low[w], low[s])
+	 *
+	 *        if low[w] == pre[w]:
+	 *            # 'w' is the root of an SCC, pop all vertices
+	 *            # below 'w' on stack and assign same SCC to them.
+	 *            while True:
+	 *                t = stack.pop()
+	 *                low[t] = NOT_ON_STACK
+	 *                scc[t] = next_scc_id
+	 *                if t == w:
+	 *                    break
+	 *            next_scc_id += 1
+	 *
+	 *    for i in range(0, insn_cnt):
+	 *        if pre[i] == 0:
+	 *            recur(i)
+	 *
+	 * Below implementation replaces explicit recursion with array 'dfs'.
+	 */
+	for (i = 0; i < insn_cnt; i++) {
+		if (pre[i])
+			continue;
+		stack_sz = 0;
+		dfs_sz = 1;
+		dfs[0] = i;
+dfs_continue:
+		while (dfs_sz) {
+			w = dfs[dfs_sz - 1];
+			if (pre[w] == 0) {
+				low[w] = next_preorder_num;
+				pre[w] = next_preorder_num;
+				next_preorder_num++;
+				stack[stack_sz++] = w;
+			}
+			/* Visit 'w' successors */
+			succ = bpf_insn_successors(env, w);
+			for (j = 0; j < succ->cnt; ++j) {
+				if (pre[succ->items[j]]) {
+					low[w] = min(low[w], low[succ->items[j]]);
+				} else {
+					dfs[dfs_sz++] = succ->items[j];
+					goto dfs_continue;
+				}
+			}
+			/*
+			 * Preserve the invariant: if some vertex above in the stack
+			 * is reachable from 'w', keep 'w' on the stack.
+			 */
+			if (low[w] < pre[w]) {
+				dfs_sz--;
+				goto dfs_continue;
+			}
+			/*
+			 * Assign SCC number only if component has two or more elements,
+			 * or if component has a self reference, or if instruction is a
+			 * callback calling function (implicit loop).
+			 */
+			assign_scc = stack[stack_sz - 1] != w;	/* two or more elements? */
+			for (j = 0; j < succ->cnt; ++j) {	/* self reference? */
+				if (succ->items[j] == w) {
+					assign_scc = true;
+					break;
+				}
+			}
+			if (bpf_calls_callback(env, w)) /* implicit loop? */
+				assign_scc = true;
+			/* Pop component elements from stack */
+			do {
+				t = stack[--stack_sz];
+				low[t] = NOT_ON_STACK;
+				if (assign_scc)
+					aux[t].scc = next_scc_id;
+			} while (t != w);
+			if (assign_scc)
+				next_scc_id++;
+			dfs_sz--;
+		}
+	}
+	env->scc_info = kvzalloc_objs(*env->scc_info, next_scc_id,
+				      GFP_KERNEL_ACCOUNT);
+	if (!env->scc_info) {
+		err = -ENOMEM;
+		goto exit;
+	}
+	env->scc_cnt = next_scc_id;
+exit:
+	kvfree(stack);
+	kvfree(pre);
+	kvfree(low);
+	kvfree(dfs);
+	return err;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 11f0c5a050b3..00fcd7f9c06b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -230,11 +230,6 @@ static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
 			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
 }
 
-struct bpf_map_desc {
-	struct bpf_map *ptr;
-	int uid;
-};
-
 struct bpf_call_arg_meta {
 	struct bpf_map_desc map;
 	bool raw_mode;
@@ -264,59 +259,6 @@ struct bpf_kfunc_meta {
 	s32 id;
 };
 
-struct bpf_kfunc_call_arg_meta {
-	/* In parameters */
-	struct btf *btf;
-	u32 func_id;
-	u32 kfunc_flags;
-	const struct btf_type *func_proto;
-	const char *func_name;
-	/* Out parameters */
-	u32 ref_obj_id;
-	u8 release_regno;
-	bool r0_rdonly;
-	u32 ret_btf_id;
-	u64 r0_size;
-	u32 subprogno;
-	struct {
-		u64 value;
-		bool found;
-	} arg_constant;
-
-	/* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
-	 * generally to pass info about user-defined local kptr types to later
-	 * verification logic
-	 *   bpf_obj_drop/bpf_percpu_obj_drop
-	 *     Record the local kptr type to be drop'd
-	 *   bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
-	 *     Record the local kptr type to be refcount_incr'd and use
-	 *     arg_owning_ref to determine whether refcount_acquire should be
-	 *     fallible
-	 */
-	struct btf *arg_btf;
-	u32 arg_btf_id;
-	bool arg_owning_ref;
-	bool arg_prog;
-
-	struct {
-		struct btf_field *field;
-	} arg_list_head;
-	struct {
-		struct btf_field *field;
-	} arg_rbtree_root;
-	struct {
-		enum bpf_dynptr_type type;
-		u32 id;
-		u32 ref_obj_id;
-	} initialized_dynptr;
-	struct {
-		u8 spi;
-		u8 frameno;
-	} iter;
-	struct bpf_map_desc map;
-	u64 mem_size;
-};
-
 struct btf *btf_vmlinux;
 
 static const char *btf_type_name(const struct btf *btf, u32 id)
@@ -524,13 +466,13 @@ static bool is_callback_calling_function(enum bpf_func_id func_id)
 	       is_async_callback_calling_function(func_id);
 }
 
-static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
+bool bpf_is_sync_callback_calling_insn(struct bpf_insn *insn)
 {
 	return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
 	       (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
 }
 
-static bool is_async_callback_calling_insn(struct bpf_insn *insn)
+bool bpf_is_async_callback_calling_insn(struct bpf_insn *insn)
 {
 	return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
 	       (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
@@ -3907,11 +3849,6 @@ static int insn_stack_access_frameno(int insn_flags)
 	return insn_flags & INSN_F_FRAMENO_MASK;
 }
 
-static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
-{
-	env->insn_aux_data[idx].jmp_point = true;
-}
-
 static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
 {
 	return env->insn_aux_data[insn_idx].jmp_point;
@@ -4480,7 +4417,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
 					return -EFAULT;
 				return 0;
 			}
-		} else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
+		} else if (bpf_is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
 			/* exit from callback subprog to callback-calling helper or
 			 * kfunc call. Use idx/subseq_idx check to discern it from
 			 * straight line code backtracking.
@@ -8911,10 +8848,6 @@ static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
 	return meta->kfunc_flags & KF_ITER_NEW;
 }
 
-static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta)
-{
-	return meta->kfunc_flags & KF_ITER_NEXT;
-}
 
 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
 {
@@ -10831,7 +10764,7 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins
 		return -EFAULT;
 	}
 
-	if (is_async_callback_calling_insn(insn)) {
+	if (bpf_is_async_callback_calling_insn(insn)) {
 		struct bpf_verifier_state *async_cb;
 
 		/* there is no real recursion here. timer and workqueue callbacks are async */
@@ -11594,8 +11527,8 @@ static bool can_elide_value_nullness(enum bpf_map_type type)
 	}
 }
 
-static int get_helper_proto(struct bpf_verifier_env *env, int func_id,
-			    const struct bpf_func_proto **ptr)
+int bpf_get_helper_proto(struct bpf_verifier_env *env, int func_id,
+			 const struct bpf_func_proto **ptr)
 {
 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID)
 		return -ERANGE;
@@ -11646,7 +11579,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 
 	/* find function prototype */
 	func_id = insn->imm;
-	err = get_helper_proto(env, insn->imm, &fn);
+	err = bpf_get_helper_proto(env, insn->imm, &fn);
 	if (err == -ERANGE) {
 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
 		return -EINVAL;
@@ -12177,10 +12110,6 @@ static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
 	return meta->kfunc_flags & KF_RELEASE;
 }
 
-static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
-{
-	return meta->kfunc_flags & KF_SLEEPABLE;
-}
 
 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
 {
@@ -12720,7 +12649,7 @@ static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
 }
 
-static bool is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
+bool bpf_is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
 {
 	return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
 }
@@ -13949,10 +13878,10 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 	return 0;
 }
 
-static int fetch_kfunc_arg_meta(struct bpf_verifier_env *env,
-				s32 func_id,
-				s16 offset,
-				struct bpf_kfunc_call_arg_meta *meta)
+int bpf_fetch_kfunc_arg_meta(struct bpf_verifier_env *env,
+			     s32 func_id,
+			     s16 offset,
+			     struct bpf_kfunc_call_arg_meta *meta)
 {
 	struct bpf_kfunc_meta kfunc;
 	int err;
@@ -13993,7 +13922,7 @@ s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn
 	enum bpf_arg_type at;
 	s64 size;
 
-	if (get_helper_proto(env, insn->imm, &fn) < 0)
+	if (bpf_get_helper_proto(env, insn->imm, &fn) < 0)
 		return S64_MIN;
 
 	at = fn->arg_type[arg];
@@ -14114,7 +14043,7 @@ s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *
 	u32 nargs, type_size;
 	s64 size;
 
-	if (fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta) < 0)
+	if (bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta) < 0)
 		return S64_MIN;
 
 	btf = meta.btf;
@@ -14364,7 +14293,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	if (!insn->imm)
 		return 0;
 
-	err = fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
+	err = bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
 	if (err == -EACCES && meta.func_name)
 		verbose(env, "calling kernel function %s is not allowed\n", meta.func_name);
 	if (err)
@@ -14373,7 +14302,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	func_name = meta.func_name;
 	insn_aux = &env->insn_aux_data[insn_idx];
 
-	insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
+	insn_aux->is_iter_next = bpf_is_iter_next_kfunc(&meta);
 
 	if (!insn->off &&
 	    (insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] ||
@@ -14410,7 +14339,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		return -EACCES;
 	}
 
-	sleepable = is_kfunc_sleepable(&meta);
+	sleepable = bpf_is_kfunc_sleepable(&meta);
 	if (sleepable && !in_sleepable(env)) {
 		verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
 		return -EACCES;
@@ -14640,7 +14569,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache])
 				type |= PTR_UNTRUSTED;
 			else if (is_kfunc_rcu_protected(&meta) ||
-				 (is_iter_next_kfunc(&meta) &&
+				 (bpf_is_iter_next_kfunc(&meta) &&
 				  (get_iter_from_state(env->cur_state, &meta)
 					   ->type & MEM_RCU))) {
 				/*
@@ -14700,7 +14629,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		}
 	}
 
-	if (is_kfunc_pkt_changing(&meta))
+	if (bpf_is_kfunc_pkt_changing(&meta))
 		clear_all_pkt_pointers(env);
 
 	nargs = btf_type_vlen(meta.func_proto);
@@ -14716,7 +14645,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			mark_btf_func_reg_size(env, regno, t->size);
 	}
 
-	if (is_iter_next_kfunc(&meta)) {
+	if (bpf_is_iter_next_kfunc(&meta)) {
 		err = process_iter_next_call(env, insn_idx, &meta);
 		if (err)
 			return err;
@@ -18343,191 +18272,6 @@ static int check_global_subprog_return_code(struct bpf_verifier_env *env)
 	return 0;
 }
 
-static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off)
-{
-	struct bpf_subprog_info *subprog;
-
-	subprog = bpf_find_containing_subprog(env, off);
-	subprog->changes_pkt_data = true;
-}
-
-static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off)
-{
-	struct bpf_subprog_info *subprog;
-
-	subprog = bpf_find_containing_subprog(env, off);
-	subprog->might_sleep = true;
-}
-
-/* 't' is an index of a call-site.
- * 'w' is a callee entry point.
- * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED.
- * Rely on DFS traversal order and absence of recursive calls to guarantee that
- * callee's change_pkt_data marks would be correct at that moment.
- */
-static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w)
-{
-	struct bpf_subprog_info *caller, *callee;
-
-	caller = bpf_find_containing_subprog(env, t);
-	callee = bpf_find_containing_subprog(env, w);
-	caller->changes_pkt_data |= callee->changes_pkt_data;
-	caller->might_sleep |= callee->might_sleep;
-}
-
-/* non-recursive DFS pseudo code
- * 1  procedure DFS-iterative(G,v):
- * 2      label v as discovered
- * 3      let S be a stack
- * 4      S.push(v)
- * 5      while S is not empty
- * 6            t <- S.peek()
- * 7            if t is what we're looking for:
- * 8                return t
- * 9            for all edges e in G.adjacentEdges(t) do
- * 10               if edge e is already labelled
- * 11                   continue with the next edge
- * 12               w <- G.adjacentVertex(t,e)
- * 13               if vertex w is not discovered and not explored
- * 14                   label e as tree-edge
- * 15                   label w as discovered
- * 16                   S.push(w)
- * 17                   continue at 5
- * 18               else if vertex w is discovered
- * 19                   label e as back-edge
- * 20               else
- * 21                   // vertex w is explored
- * 22                   label e as forward- or cross-edge
- * 23           label t as explored
- * 24           S.pop()
- *
- * convention:
- * 0x10 - discovered
- * 0x11 - discovered and fall-through edge labelled
- * 0x12 - discovered and fall-through and branch edges labelled
- * 0x20 - explored
- */
-
-enum {
-	DISCOVERED = 0x10,
-	EXPLORED = 0x20,
-	FALLTHROUGH = 1,
-	BRANCH = 2,
-};
-
-static void mark_prune_point(struct bpf_verifier_env *env, int idx)
-{
-	env->insn_aux_data[idx].prune_point = true;
-}
-
-static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
-{
-	return env->insn_aux_data[insn_idx].prune_point;
-}
-
-static void mark_force_checkpoint(struct bpf_verifier_env *env, int idx)
-{
-	env->insn_aux_data[idx].force_checkpoint = true;
-}
-
-static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
-{
-	return env->insn_aux_data[insn_idx].force_checkpoint;
-}
-
-static void mark_calls_callback(struct bpf_verifier_env *env, int idx)
-{
-	env->insn_aux_data[idx].calls_callback = true;
-}
-
-bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx)
-{
-	return env->insn_aux_data[insn_idx].calls_callback;
-}
-
-enum {
-	DONE_EXPLORING = 0,
-	KEEP_EXPLORING = 1,
-};
-
-/* t, w, e - match pseudo-code above:
- * t - index of current instruction
- * w - next instruction
- * e - edge
- */
-static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
-{
-	int *insn_stack = env->cfg.insn_stack;
-	int *insn_state = env->cfg.insn_state;
-
-	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
-		return DONE_EXPLORING;
-
-	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
-		return DONE_EXPLORING;
-
-	if (w < 0 || w >= env->prog->len) {
-		verbose_linfo(env, t, "%d: ", t);
-		verbose(env, "jump out of range from insn %d to %d\n", t, w);
-		return -EINVAL;
-	}
-
-	if (e == BRANCH) {
-		/* mark branch target for state pruning */
-		mark_prune_point(env, w);
-		mark_jmp_point(env, w);
-	}
-
-	if (insn_state[w] == 0) {
-		/* tree-edge */
-		insn_state[t] = DISCOVERED | e;
-		insn_state[w] = DISCOVERED;
-		if (env->cfg.cur_stack >= env->prog->len)
-			return -E2BIG;
-		insn_stack[env->cfg.cur_stack++] = w;
-		return KEEP_EXPLORING;
-	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
-		if (env->bpf_capable)
-			return DONE_EXPLORING;
-		verbose_linfo(env, t, "%d: ", t);
-		verbose_linfo(env, w, "%d: ", w);
-		verbose(env, "back-edge from insn %d to %d\n", t, w);
-		return -EINVAL;
-	} else if (insn_state[w] == EXPLORED) {
-		/* forward- or cross-edge */
-		insn_state[t] = DISCOVERED | e;
-	} else {
-		verifier_bug(env, "insn state internal bug");
-		return -EFAULT;
-	}
-	return DONE_EXPLORING;
-}
-
-static int visit_func_call_insn(int t, struct bpf_insn *insns,
-				struct bpf_verifier_env *env,
-				bool visit_callee)
-{
-	int ret, insn_sz;
-	int w;
-
-	insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
-	ret = push_insn(t, t + insn_sz, FALLTHROUGH, env);
-	if (ret)
-		return ret;
-
-	mark_prune_point(env, t + insn_sz);
-	/* when we exit from subprog, we need to record non-linear history */
-	mark_jmp_point(env, t + insn_sz);
-
-	if (visit_callee) {
-		w = t + insns[t].imm + 1;
-		mark_prune_point(env, t);
-		merge_callee_effects(env, t, w);
-		ret = push_insn(t, w, BRANCH, env);
-	}
-	return ret;
-}
-
 /* Bitmask with 1s for all caller saved registers */
 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
 
@@ -18563,7 +18307,7 @@ bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
 
 	if (bpf_helper_call(call)) {
 
-		if (get_helper_proto(env, call->imm, &fn) < 0)
+		if (bpf_get_helper_proto(env, call->imm, &fn) < 0)
 			/* error would be reported later */
 			return false;
 		cs->fastcall = fn->allow_fastcall &&
@@ -18582,7 +18326,7 @@ bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
 	if (bpf_pseudo_kfunc_call(call)) {
 		int err;
 
-		err = fetch_kfunc_arg_meta(env, call->imm, call->off, &meta);
+		err = bpf_fetch_kfunc_arg_meta(env, call->imm, call->off, &meta);
 		if (err < 0)
 			/* error would be reported later */
 			return false;
@@ -18784,530 +18528,6 @@ static int mark_fastcall_patterns(struct bpf_verifier_env *env)
 	return 0;
 }
 
-static struct bpf_iarray *iarray_realloc(struct bpf_iarray *old, size_t n_elem)
-{
-	size_t new_size = sizeof(struct bpf_iarray) + n_elem * sizeof(old->items[0]);
-	struct bpf_iarray *new;
-
-	new = kvrealloc(old, new_size, GFP_KERNEL_ACCOUNT);
-	if (!new) {
-		/* this is what callers always want, so simplify the call site */
-		kvfree(old);
-		return NULL;
-	}
-
-	new->cnt = n_elem;
-	return new;
-}
-
-static int copy_insn_array(struct bpf_map *map, u32 start, u32 end, u32 *items)
-{
-	struct bpf_insn_array_value *value;
-	u32 i;
-
-	for (i = start; i <= end; i++) {
-		value = map->ops->map_lookup_elem(map, &i);
-		/*
-		 * map_lookup_elem of an array map will never return an error,
-		 * but not checking it makes some static analysers to worry
-		 */
-		if (IS_ERR(value))
-			return PTR_ERR(value);
-		else if (!value)
-			return -EINVAL;
-		items[i - start] = value->xlated_off;
-	}
-	return 0;
-}
-
-static int cmp_ptr_to_u32(const void *a, const void *b)
-{
-	return *(u32 *)a - *(u32 *)b;
-}
-
-static int sort_insn_array_uniq(u32 *items, int cnt)
-{
-	int unique = 1;
-	int i;
-
-	sort(items, cnt, sizeof(items[0]), cmp_ptr_to_u32, NULL);
-
-	for (i = 1; i < cnt; i++)
-		if (items[i] != items[unique - 1])
-			items[unique++] = items[i];
-
-	return unique;
-}
-
-/*
- * sort_unique({map[start], ..., map[end]}) into off
- */
-static int copy_insn_array_uniq(struct bpf_map *map, u32 start, u32 end, u32 *off)
-{
-	u32 n = end - start + 1;
-	int err;
-
-	err = copy_insn_array(map, start, end, off);
-	if (err)
-		return err;
-
-	return sort_insn_array_uniq(off, n);
-}
-
-/*
- * Copy all unique offsets from the map
- */
-static struct bpf_iarray *jt_from_map(struct bpf_map *map)
-{
-	struct bpf_iarray *jt;
-	int err;
-	int n;
-
-	jt = iarray_realloc(NULL, map->max_entries);
-	if (!jt)
-		return ERR_PTR(-ENOMEM);
-
-	n = copy_insn_array_uniq(map, 0, map->max_entries - 1, jt->items);
-	if (n < 0) {
-		err = n;
-		goto err_free;
-	}
-	if (n == 0) {
-		err = -EINVAL;
-		goto err_free;
-	}
-	jt->cnt = n;
-	return jt;
-
-err_free:
-	kvfree(jt);
-	return ERR_PTR(err);
-}
-
-/*
- * Find and collect all maps which fit in the subprog. Return the result as one
- * combined jump table in jt->items (allocated with kvcalloc)
- */
-static struct bpf_iarray *jt_from_subprog(struct bpf_verifier_env *env,
-					  int subprog_start, int subprog_end)
-{
-	struct bpf_iarray *jt = NULL;
-	struct bpf_map *map;
-	struct bpf_iarray *jt_cur;
-	int i;
-
-	for (i = 0; i < env->insn_array_map_cnt; i++) {
-		/*
-		 * TODO (when needed): collect only jump tables, not static keys
-		 * or maps for indirect calls
-		 */
-		map = env->insn_array_maps[i];
-
-		jt_cur = jt_from_map(map);
-		if (IS_ERR(jt_cur)) {
-			kvfree(jt);
-			return jt_cur;
-		}
-
-		/*
-		 * This is enough to check one element. The full table is
-		 * checked to fit inside the subprog later in create_jt()
-		 */
-		if (jt_cur->items[0] >= subprog_start && jt_cur->items[0] < subprog_end) {
-			u32 old_cnt = jt ? jt->cnt : 0;
-			jt = iarray_realloc(jt, old_cnt + jt_cur->cnt);
-			if (!jt) {
-				kvfree(jt_cur);
-				return ERR_PTR(-ENOMEM);
-			}
-			memcpy(jt->items + old_cnt, jt_cur->items, jt_cur->cnt << 2);
-		}
-
-		kvfree(jt_cur);
-	}
-
-	if (!jt) {
-		verbose(env, "no jump tables found for subprog starting at %u\n", subprog_start);
-		return ERR_PTR(-EINVAL);
-	}
-
-	jt->cnt = sort_insn_array_uniq(jt->items, jt->cnt);
-	return jt;
-}
-
-static struct bpf_iarray *
-create_jt(int t, struct bpf_verifier_env *env)
-{
-	struct bpf_subprog_info *subprog;
-	int subprog_start, subprog_end;
-	struct bpf_iarray *jt;
-	int i;
-
-	subprog = bpf_find_containing_subprog(env, t);
-	subprog_start = subprog->start;
-	subprog_end = (subprog + 1)->start;
-	jt = jt_from_subprog(env, subprog_start, subprog_end);
-	if (IS_ERR(jt))
-		return jt;
-
-	/* Check that the every element of the jump table fits within the given subprogram */
-	for (i = 0; i < jt->cnt; i++) {
-		if (jt->items[i] < subprog_start || jt->items[i] >= subprog_end) {
-			verbose(env, "jump table for insn %d points outside of the subprog [%u,%u]\n",
-					t, subprog_start, subprog_end);
-			kvfree(jt);
-			return ERR_PTR(-EINVAL);
-		}
-	}
-
-	return jt;
-}
-
-/* "conditional jump with N edges" */
-static int visit_gotox_insn(int t, struct bpf_verifier_env *env)
-{
-	int *insn_stack = env->cfg.insn_stack;
-	int *insn_state = env->cfg.insn_state;
-	bool keep_exploring = false;
-	struct bpf_iarray *jt;
-	int i, w;
-
-	jt = env->insn_aux_data[t].jt;
-	if (!jt) {
-		jt = create_jt(t, env);
-		if (IS_ERR(jt))
-			return PTR_ERR(jt);
-
-		env->insn_aux_data[t].jt = jt;
-	}
-
-	mark_prune_point(env, t);
-	for (i = 0; i < jt->cnt; i++) {
-		w = jt->items[i];
-		if (w < 0 || w >= env->prog->len) {
-			verbose(env, "indirect jump out of range from insn %d to %d\n", t, w);
-			return -EINVAL;
-		}
-
-		mark_jmp_point(env, w);
-
-		/* EXPLORED || DISCOVERED */
-		if (insn_state[w])
-			continue;
-
-		if (env->cfg.cur_stack >= env->prog->len)
-			return -E2BIG;
-
-		insn_stack[env->cfg.cur_stack++] = w;
-		insn_state[w] |= DISCOVERED;
-		keep_exploring = true;
-	}
-
-	return keep_exploring ? KEEP_EXPLORING : DONE_EXPLORING;
-}
-
-/*
- * Instructions that can abnormally return from a subprog (tail_call
- * upon success, ld_{abs,ind} upon load failure) have a hidden exit
- * that the verifier must account for.
- */
-static int visit_abnormal_return_insn(struct bpf_verifier_env *env, int t)
-{
-	struct bpf_subprog_info *subprog;
-	struct bpf_iarray *jt;
-
-	if (env->insn_aux_data[t].jt)
-		return 0;
-
-	jt = iarray_realloc(NULL, 2);
-	if (!jt)
-		return -ENOMEM;
-
-	subprog = bpf_find_containing_subprog(env, t);
-	jt->items[0] = t + 1;
-	jt->items[1] = subprog->exit_idx;
-	env->insn_aux_data[t].jt = jt;
-	return 0;
-}
-
-/* Visits the instruction at index t and returns one of the following:
- *  < 0 - an error occurred
- *  DONE_EXPLORING - the instruction was fully explored
- *  KEEP_EXPLORING - there is still work to be done before it is fully explored
- */
-static int visit_insn(int t, struct bpf_verifier_env *env)
-{
-	struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
-	int ret, off, insn_sz;
-
-	if (bpf_pseudo_func(insn))
-		return visit_func_call_insn(t, insns, env, true);
-
-	/* All non-branch instructions have a single fall-through edge. */
-	if (BPF_CLASS(insn->code) != BPF_JMP &&
-	    BPF_CLASS(insn->code) != BPF_JMP32) {
-		if (BPF_CLASS(insn->code) == BPF_LD &&
-		    (BPF_MODE(insn->code) == BPF_ABS ||
-		     BPF_MODE(insn->code) == BPF_IND)) {
-			ret = visit_abnormal_return_insn(env, t);
-			if (ret)
-				return ret;
-		}
-		insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
-		return push_insn(t, t + insn_sz, FALLTHROUGH, env);
-	}
-
-	switch (BPF_OP(insn->code)) {
-	case BPF_EXIT:
-		return DONE_EXPLORING;
-
-	case BPF_CALL:
-		if (is_async_callback_calling_insn(insn))
-			/* Mark this call insn as a prune point to trigger
-			 * is_state_visited() check before call itself is
-			 * processed by __check_func_call(). Otherwise new
-			 * async state will be pushed for further exploration.
-			 */
-			mark_prune_point(env, t);
-		/* For functions that invoke callbacks it is not known how many times
-		 * callback would be called. Verifier models callback calling functions
-		 * by repeatedly visiting callback bodies and returning to origin call
-		 * instruction.
-		 * In order to stop such iteration verifier needs to identify when a
-		 * state identical some state from a previous iteration is reached.
-		 * Check below forces creation of checkpoint before callback calling
-		 * instruction to allow search for such identical states.
-		 */
-		if (is_sync_callback_calling_insn(insn)) {
-			mark_calls_callback(env, t);
-			mark_force_checkpoint(env, t);
-			mark_prune_point(env, t);
-			mark_jmp_point(env, t);
-		}
-		if (bpf_helper_call(insn)) {
-			const struct bpf_func_proto *fp;
-
-			ret = get_helper_proto(env, insn->imm, &fp);
-			/* If called in a non-sleepable context program will be
-			 * rejected anyway, so we should end up with precise
-			 * sleepable marks on subprogs, except for dead code
-			 * elimination.
-			 */
-			if (ret == 0 && fp->might_sleep)
-				mark_subprog_might_sleep(env, t);
-			if (bpf_helper_changes_pkt_data(insn->imm))
-				mark_subprog_changes_pkt_data(env, t);
-			if (insn->imm == BPF_FUNC_tail_call) {
-				ret = visit_abnormal_return_insn(env, t);
-				if (ret)
-					return ret;
-			}
-		} else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
-			struct bpf_kfunc_call_arg_meta meta;
-
-			ret = fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
-			if (ret == 0 && is_iter_next_kfunc(&meta)) {
-				mark_prune_point(env, t);
-				/* Checking and saving state checkpoints at iter_next() call
-				 * is crucial for fast convergence of open-coded iterator loop
-				 * logic, so we need to force it. If we don't do that,
-				 * is_state_visited() might skip saving a checkpoint, causing
-				 * unnecessarily long sequence of not checkpointed
-				 * instructions and jumps, leading to exhaustion of jump
-				 * history buffer, and potentially other undesired outcomes.
-				 * It is expected that with correct open-coded iterators
-				 * convergence will happen quickly, so we don't run a risk of
-				 * exhausting memory.
-				 */
-				mark_force_checkpoint(env, t);
-			}
-			/* Same as helpers, if called in a non-sleepable context
-			 * program will be rejected anyway, so we should end up
-			 * with precise sleepable marks on subprogs, except for
-			 * dead code elimination.
-			 */
-			if (ret == 0 && is_kfunc_sleepable(&meta))
-				mark_subprog_might_sleep(env, t);
-			if (ret == 0 && is_kfunc_pkt_changing(&meta))
-				mark_subprog_changes_pkt_data(env, t);
-		}
-		return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
-
-	case BPF_JA:
-		if (BPF_SRC(insn->code) == BPF_X)
-			return visit_gotox_insn(t, env);
-
-		if (BPF_CLASS(insn->code) == BPF_JMP)
-			off = insn->off;
-		else
-			off = insn->imm;
-
-		/* unconditional jump with single edge */
-		ret = push_insn(t, t + off + 1, FALLTHROUGH, env);
-		if (ret)
-			return ret;
-
-		mark_prune_point(env, t + off + 1);
-		mark_jmp_point(env, t + off + 1);
-
-		return ret;
-
-	default:
-		/* conditional jump with two edges */
-		mark_prune_point(env, t);
-		if (bpf_is_may_goto_insn(insn))
-			mark_force_checkpoint(env, t);
-
-		ret = push_insn(t, t + 1, FALLTHROUGH, env);
-		if (ret)
-			return ret;
-
-		return push_insn(t, t + insn->off + 1, BRANCH, env);
-	}
-}
-
-/* non-recursive depth-first-search to detect loops in BPF program
- * loop == back-edge in directed graph
- */
-static int check_cfg(struct bpf_verifier_env *env)
-{
-	int insn_cnt = env->prog->len;
-	int *insn_stack, *insn_state;
-	int ex_insn_beg, i, ret = 0;
-
-	insn_state = env->cfg.insn_state = kvzalloc_objs(int, insn_cnt,
-							 GFP_KERNEL_ACCOUNT);
-	if (!insn_state)
-		return -ENOMEM;
-
-	insn_stack = env->cfg.insn_stack = kvzalloc_objs(int, insn_cnt,
-							 GFP_KERNEL_ACCOUNT);
-	if (!insn_stack) {
-		kvfree(insn_state);
-		return -ENOMEM;
-	}
-
-	ex_insn_beg = env->exception_callback_subprog
-		      ? env->subprog_info[env->exception_callback_subprog].start
-		      : 0;
-
-	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
-	insn_stack[0] = 0; /* 0 is the first instruction */
-	env->cfg.cur_stack = 1;
-
-walk_cfg:
-	while (env->cfg.cur_stack > 0) {
-		int t = insn_stack[env->cfg.cur_stack - 1];
-
-		ret = visit_insn(t, env);
-		switch (ret) {
-		case DONE_EXPLORING:
-			insn_state[t] = EXPLORED;
-			env->cfg.cur_stack--;
-			break;
-		case KEEP_EXPLORING:
-			break;
-		default:
-			if (ret > 0) {
-				verifier_bug(env, "visit_insn internal bug");
-				ret = -EFAULT;
-			}
-			goto err_free;
-		}
-	}
-
-	if (env->cfg.cur_stack < 0) {
-		verifier_bug(env, "pop stack internal bug");
-		ret = -EFAULT;
-		goto err_free;
-	}
-
-	if (ex_insn_beg && insn_state[ex_insn_beg] != EXPLORED) {
-		insn_state[ex_insn_beg] = DISCOVERED;
-		insn_stack[0] = ex_insn_beg;
-		env->cfg.cur_stack = 1;
-		goto walk_cfg;
-	}
-
-	for (i = 0; i < insn_cnt; i++) {
-		struct bpf_insn *insn = &env->prog->insnsi[i];
-
-		if (insn_state[i] != EXPLORED) {
-			verbose(env, "unreachable insn %d\n", i);
-			ret = -EINVAL;
-			goto err_free;
-		}
-		if (bpf_is_ldimm64(insn)) {
-			if (insn_state[i + 1] != 0) {
-				verbose(env, "jump into the middle of ldimm64 insn %d\n", i);
-				ret = -EINVAL;
-				goto err_free;
-			}
-			i++; /* skip second half of ldimm64 */
-		}
-	}
-	ret = 0; /* cfg looks good */
-	env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data;
-	env->prog->aux->might_sleep = env->subprog_info[0].might_sleep;
-
-err_free:
-	kvfree(insn_state);
-	kvfree(insn_stack);
-	env->cfg.insn_state = env->cfg.insn_stack = NULL;
-	return ret;
-}
-
-/*
- * For each subprogram 'i' fill array env->cfg.insn_subprogram sub-range
- * [env->subprog_info[i].postorder_start, env->subprog_info[i+1].postorder_start)
- * with indices of 'i' instructions in postorder.
- */
-int bpf_compute_postorder(struct bpf_verifier_env *env)
-{
-	u32 cur_postorder, i, top, stack_sz, s;
-	int *stack = NULL, *postorder = NULL, *state = NULL;
-	struct bpf_iarray *succ;
-
-	postorder = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
-	state = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
-	stack = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
-	if (!postorder || !state || !stack) {
-		kvfree(postorder);
-		kvfree(state);
-		kvfree(stack);
-		return -ENOMEM;
-	}
-	cur_postorder = 0;
-	for (i = 0; i < env->subprog_cnt; i++) {
-		env->subprog_info[i].postorder_start = cur_postorder;
-		stack[0] = env->subprog_info[i].start;
-		stack_sz = 1;
-		do {
-			top = stack[stack_sz - 1];
-			state[top] |= DISCOVERED;
-			if (state[top] & EXPLORED) {
-				postorder[cur_postorder++] = top;
-				stack_sz--;
-				continue;
-			}
-			succ = bpf_insn_successors(env, top);
-			for (s = 0; s < succ->cnt; ++s) {
-				if (!state[succ->items[s]]) {
-					stack[stack_sz++] = succ->items[s];
-					state[succ->items[s]] |= DISCOVERED;
-				}
-			}
-			state[top] |= EXPLORED;
-		} while (stack_sz);
-	}
-	env->subprog_info[i].postorder_start = cur_postorder;
-	env->cfg.insn_postorder = postorder;
-	env->cfg.cur_postorder = cur_postorder;
-	kvfree(stack);
-	kvfree(state);
-	return 0;
-}
-
 static int check_abnormal_return(struct bpf_verifier_env *env)
 {
 	int i;
@@ -20724,7 +19944,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	int n, err, states_cnt = 0;
 	struct list_head *pos, *tmp, *head;
 
-	force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) ||
+	force_new_state = env->test_state_freq || bpf_is_force_checkpoint(env, insn_idx) ||
 			  /* Avoid accumulating infinitely long jmp history */
 			  cur->jmp_history_cnt > 40;
 
@@ -21004,7 +20224,7 @@ miss:
 		 * Use bigger 'n' for checkpoints because evicting checkpoint states
 		 * too early would hinder iterator convergence.
 		 */
-		n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
+		n = bpf_is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
 		if (sl->miss_cnt > sl->hit_cnt * n + n) {
 			/* the state is unlikely to be useful. Remove it to
 			 * speed up verification
@@ -21307,13 +20527,13 @@ static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *in
 
 	/* Ensure that the buffer is large enough */
 	if (!env->gotox_tmp_buf || env->gotox_tmp_buf->cnt < max_index - min_index + 1) {
-		env->gotox_tmp_buf = iarray_realloc(env->gotox_tmp_buf,
-						    max_index - min_index + 1);
+		env->gotox_tmp_buf = bpf_iarray_realloc(env->gotox_tmp_buf,
+						        max_index - min_index + 1);
 		if (!env->gotox_tmp_buf)
 			return -ENOMEM;
 	}
 
-	n = copy_insn_array_uniq(map, min_index, max_index, env->gotox_tmp_buf->items);
+	n = bpf_copy_insn_array_uniq(map, min_index, max_index, env->gotox_tmp_buf->items);
 	if (n < 0)
 		return n;
 	if (n == 0) {
@@ -21465,7 +20685,7 @@ static int do_check(struct bpf_verifier_env *env)
 		state->last_insn_idx = env->prev_insn_idx;
 		state->insn_idx = env->insn_idx;
 
-		if (is_prune_point(env, env->insn_idx)) {
+		if (bpf_is_prune_point(env, env->insn_idx)) {
 			err = is_state_visited(env, env->insn_idx);
 			if (err < 0)
 				return err;
@@ -23460,190 +22680,6 @@ static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr,
 	return 0;
 }
 
-/*
- * Compute strongly connected components (SCCs) on the CFG.
- * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc.
- * If instruction is a sole member of its SCC and there are no self edges,
- * assign it SCC number of zero.
- * Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation.
- */
-static int compute_scc(struct bpf_verifier_env *env)
-{
-	const u32 NOT_ON_STACK = U32_MAX;
-
-	struct bpf_insn_aux_data *aux = env->insn_aux_data;
-	const u32 insn_cnt = env->prog->len;
-	int stack_sz, dfs_sz, err = 0;
-	u32 *stack, *pre, *low, *dfs;
-	u32 i, j, t, w;
-	u32 next_preorder_num;
-	u32 next_scc_id;
-	bool assign_scc;
-	struct bpf_iarray *succ;
-
-	next_preorder_num = 1;
-	next_scc_id = 1;
-	/*
-	 * - 'stack' accumulates vertices in DFS order, see invariant comment below;
-	 * - 'pre[t] == p' => preorder number of vertex 't' is 'p';
-	 * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n';
-	 * - 'dfs' DFS traversal stack, used to emulate explicit recursion.
-	 */
-	stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
-	pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
-	low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
-	dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL_ACCOUNT);
-	if (!stack || !pre || !low || !dfs) {
-		err = -ENOMEM;
-		goto exit;
-	}
-	/*
-	 * References:
-	 * [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms"
-	 * [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components"
-	 *
-	 * The algorithm maintains the following invariant:
-	 * - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]';
-	 * - then, vertex 'u' remains on stack while vertex 'v' is on stack.
-	 *
-	 * Consequently:
-	 * - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u',
-	 *   such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack,
-	 *   and thus there is an SCC (loop) containing both 'u' and 'v'.
-	 * - If 'low[v] == pre[v]', loops containing 'v' have been explored,
-	 *   and 'v' can be considered the root of some SCC.
-	 *
-	 * Here is a pseudo-code for an explicitly recursive version of the algorithm:
-	 *
-	 *    NOT_ON_STACK = insn_cnt + 1
-	 *    pre = [0] * insn_cnt
-	 *    low = [0] * insn_cnt
-	 *    scc = [0] * insn_cnt
-	 *    stack = []
-	 *
-	 *    next_preorder_num = 1
-	 *    next_scc_id = 1
-	 *
-	 *    def recur(w):
-	 *        nonlocal next_preorder_num
-	 *        nonlocal next_scc_id
-	 *
-	 *        pre[w] = next_preorder_num
-	 *        low[w] = next_preorder_num
-	 *        next_preorder_num += 1
-	 *        stack.append(w)
-	 *        for s in successors(w):
-	 *            # Note: for classic algorithm the block below should look as:
-	 *            #
-	 *            # if pre[s] == 0:
-	 *            #     recur(s)
-	 *            #	    low[w] = min(low[w], low[s])
-	 *            # elif low[s] != NOT_ON_STACK:
-	 *            #     low[w] = min(low[w], pre[s])
-	 *            #
-	 *            # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])'
-	 *            # does not break the invariant and makes itartive version of the algorithm
-	 *            # simpler. See 'Algorithm #3' from [2].
-	 *
-	 *            # 's' not yet visited
-	 *            if pre[s] == 0:
-	 *                recur(s)
-	 *            # if 's' is on stack, pick lowest reachable preorder number from it;
-	 *            # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]',
-	 *            # so 'min' would be a noop.
-	 *            low[w] = min(low[w], low[s])
-	 *
-	 *        if low[w] == pre[w]:
-	 *            # 'w' is the root of an SCC, pop all vertices
-	 *            # below 'w' on stack and assign same SCC to them.
-	 *            while True:
-	 *                t = stack.pop()
-	 *                low[t] = NOT_ON_STACK
-	 *                scc[t] = next_scc_id
-	 *                if t == w:
-	 *                    break
-	 *            next_scc_id += 1
-	 *
-	 *    for i in range(0, insn_cnt):
-	 *        if pre[i] == 0:
-	 *            recur(i)
-	 *
-	 * Below implementation replaces explicit recursion with array 'dfs'.
-	 */
-	for (i = 0; i < insn_cnt; i++) {
-		if (pre[i])
-			continue;
-		stack_sz = 0;
-		dfs_sz = 1;
-		dfs[0] = i;
-dfs_continue:
-		while (dfs_sz) {
-			w = dfs[dfs_sz - 1];
-			if (pre[w] == 0) {
-				low[w] = next_preorder_num;
-				pre[w] = next_preorder_num;
-				next_preorder_num++;
-				stack[stack_sz++] = w;
-			}
-			/* Visit 'w' successors */
-			succ = bpf_insn_successors(env, w);
-			for (j = 0; j < succ->cnt; ++j) {
-				if (pre[succ->items[j]]) {
-					low[w] = min(low[w], low[succ->items[j]]);
-				} else {
-					dfs[dfs_sz++] = succ->items[j];
-					goto dfs_continue;
-				}
-			}
-			/*
-			 * Preserve the invariant: if some vertex above in the stack
-			 * is reachable from 'w', keep 'w' on the stack.
-			 */
-			if (low[w] < pre[w]) {
-				dfs_sz--;
-				goto dfs_continue;
-			}
-			/*
-			 * Assign SCC number only if component has two or more elements,
-			 * or if component has a self reference, or if instruction is a
-			 * callback calling function (implicit loop).
-			 */
-			assign_scc = stack[stack_sz - 1] != w;	/* two or more elements? */
-			for (j = 0; j < succ->cnt; ++j) {	/* self reference? */
-				if (succ->items[j] == w) {
-					assign_scc = true;
-					break;
-				}
-			}
-			if (bpf_calls_callback(env, w)) /* implicit loop? */
-				assign_scc = true;
-			/* Pop component elements from stack */
-			do {
-				t = stack[--stack_sz];
-				low[t] = NOT_ON_STACK;
-				if (assign_scc)
-					aux[t].scc = next_scc_id;
-			} while (t != w);
-			if (assign_scc)
-				next_scc_id++;
-			dfs_sz--;
-		}
-	}
-	env->scc_info = kvzalloc_objs(*env->scc_info, next_scc_id,
-				      GFP_KERNEL_ACCOUNT);
-	if (!env->scc_info) {
-		err = -ENOMEM;
-		goto exit;
-	}
-	env->scc_cnt = next_scc_id;
-exit:
-	kvfree(stack);
-	kvfree(pre);
-	kvfree(low);
-	kvfree(dfs);
-	return err;
-}
-
 /* replace a generic kfunc with a specialized version if necessary */
 static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx)
 {
@@ -23880,7 +22916,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 		goto err_free_env;
 	for (i = 0; i < len; i++)
 		env->insn_aux_data[i].orig_idx = i;
-	env->succ = iarray_realloc(NULL, 2);
+	env->succ = bpf_iarray_realloc(NULL, 2);
 	if (!env->succ)
 		goto err_free_env;
 	env->prog = *prog;
@@ -23967,7 +23003,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 			goto skip_full_check;
 	}
 
-	ret = check_cfg(env);
+	ret = bpf_check_cfg(env);
 	if (ret < 0)
 		goto skip_full_check;
 
@@ -23995,7 +23031,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (ret < 0)
 		goto skip_full_check;
 
-	ret = compute_scc(env);
+	ret = bpf_compute_scc(env);
 	if (ret < 0)
 		goto skip_full_check;
 
-- 
cgit v1.2.3


From c82834a5a11f743f2926107d8f8150e80742b814 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Sun, 12 Apr 2026 08:29:33 -0700
Subject: bpf: Move state equivalence logic to states.c

verifier.c is huge. Move is_state_visited() to states.c,
so that all state equivalence logic is in one file.

Mechanical move. No functional changes.

Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/20260412152936.54262-5-alexei.starovoitov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |   67 ++
 kernel/bpf/Makefile          |    2 +-
 kernel/bpf/states.c          | 1563 +++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c        | 1879 +++---------------------------------------
 4 files changed, 1765 insertions(+), 1746 deletions(-)
 create mode 100644 kernel/bpf/states.c

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index aa92a597bc5c..669453386282 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -1068,6 +1068,73 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab);
 
 int mark_chain_precision(struct bpf_verifier_env *env, int regno);
 
+int bpf_is_state_visited(struct bpf_verifier_env *env, int insn_idx);
+int bpf_update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st);
+
+void bpf_clear_jmp_history(struct bpf_verifier_state *state);
+int bpf_copy_verifier_state(struct bpf_verifier_state *dst_state,
+			    const struct bpf_verifier_state *src);
+struct list_head *bpf_explored_state(struct bpf_verifier_env *env, int idx);
+void bpf_free_verifier_state(struct bpf_verifier_state *state, bool free_self);
+void bpf_free_backedges(struct bpf_scc_visit *visit);
+int bpf_push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
+			 int insn_flags, u64 linked_regs);
+void bpf_mark_reg_not_init(const struct bpf_verifier_env *env,
+			   struct bpf_reg_state *reg);
+void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg);
+void bpf_mark_all_scalars_precise(struct bpf_verifier_env *env,
+				  struct bpf_verifier_state *st);
+void bpf_clear_singular_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st);
+int bpf_mark_chain_precision(struct bpf_verifier_env *env,
+			     struct bpf_verifier_state *starting_state,
+			     int regno, bool *changed);
+
+static inline int bpf_get_spi(s32 off)
+{
+	return (-off - 1) / BPF_REG_SIZE;
+}
+
+static inline struct bpf_func_state *bpf_func(struct bpf_verifier_env *env,
+					      const struct bpf_reg_state *reg)
+{
+	struct bpf_verifier_state *cur = env->cur_state;
+
+	return cur->frame[reg->frameno];
+}
+
+/* Return IP for a given frame in a call stack */
+static inline u32 bpf_frame_insn_idx(struct bpf_verifier_state *st, u32 frame)
+{
+	return frame == st->curframe
+	       ? st->insn_idx
+	       : st->frame[frame + 1]->callsite;
+}
+
+static inline bool bpf_is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
+{
+	return env->insn_aux_data[insn_idx].jmp_point;
+}
+
+static inline bool bpf_is_spilled_reg(const struct bpf_stack_state *stack)
+{
+	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
+}
+
+static inline bool bpf_register_is_null(struct bpf_reg_state *reg)
+{
+	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
+}
+
+static inline void bpf_bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
+{
+	bt->reg_masks[frame] |= 1 << reg;
+}
+
+static inline void bpf_bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
+{
+	bt->stack_masks[frame] |= 1ull << slot;
+}
+
 bool bpf_map_is_rdonly(const struct bpf_map *map);
 int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
 			bool is_ldsx);
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 8649ee9651a9..3da5dae33827 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_insn_array.o
 obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
-obj-$(CONFIG_BPF_SYSCALL) += fixups.o cfg.o
+obj-$(CONFIG_BPF_SYSCALL) += fixups.o cfg.o states.o
 obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o
 obj-$(CONFIG_BPF_JIT) += trampoline.o
diff --git a/kernel/bpf/states.c b/kernel/bpf/states.c
new file mode 100644
index 000000000000..8478d2c6ed5b
--- /dev/null
+++ b/kernel/bpf/states.c
@@ -0,0 +1,1563 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/filter.h>
+
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
+#define BPF_COMPLEXITY_LIMIT_STATES	64
+
+static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
+{
+	return bpf_is_may_goto_insn(&env->prog->insnsi[insn_idx]);
+}
+
+static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
+{
+	return env->insn_aux_data[insn_idx].is_iter_next;
+}
+
+static void update_peak_states(struct bpf_verifier_env *env)
+{
+	u32 cur_states;
+
+	cur_states = env->explored_states_size + env->free_list_size + env->num_backedges;
+	env->peak_states = max(env->peak_states, cur_states);
+}
+
+/* struct bpf_verifier_state->parent refers to states
+ * that are in either of env->{expored_states,free_list}.
+ * In both cases the state is contained in struct bpf_verifier_state_list.
+ */
+static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st)
+{
+	if (st->parent)
+		return container_of(st->parent, struct bpf_verifier_state_list, state);
+	return NULL;
+}
+
+static bool incomplete_read_marks(struct bpf_verifier_env *env,
+				  struct bpf_verifier_state *st);
+
+/* A state can be freed if it is no longer referenced:
+ * - is in the env->free_list;
+ * - has no children states;
+ */
+static void maybe_free_verifier_state(struct bpf_verifier_env *env,
+				      struct bpf_verifier_state_list *sl)
+{
+	if (!sl->in_free_list
+	    || sl->state.branches != 0
+	    || incomplete_read_marks(env, &sl->state))
+		return;
+	list_del(&sl->node);
+	bpf_free_verifier_state(&sl->state, false);
+	kfree(sl);
+	env->free_list_size--;
+}
+
+/* For state @st look for a topmost frame with frame_insn_idx() in some SCC,
+ * if such frame exists form a corresponding @callchain as an array of
+ * call sites leading to this frame and SCC id.
+ * E.g.:
+ *
+ *    void foo()  { A: loop {... SCC#1 ...}; }
+ *    void bar()  { B: loop { C: foo(); ... SCC#2 ... }
+ *                  D: loop { E: foo(); ... SCC#3 ... } }
+ *    void main() { F: bar(); }
+ *
+ * @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending
+ * on @st frame call sites being (F,C,A) or (F,E,A).
+ */
+static bool compute_scc_callchain(struct bpf_verifier_env *env,
+				  struct bpf_verifier_state *st,
+				  struct bpf_scc_callchain *callchain)
+{
+	u32 i, scc, insn_idx;
+
+	memset(callchain, 0, sizeof(*callchain));
+	for (i = 0; i <= st->curframe; i++) {
+		insn_idx = bpf_frame_insn_idx(st, i);
+		scc = env->insn_aux_data[insn_idx].scc;
+		if (scc) {
+			callchain->scc = scc;
+			break;
+		} else if (i < st->curframe) {
+			callchain->callsites[i] = insn_idx;
+		} else {
+			return false;
+		}
+	}
+	return true;
+}
+
+/* Check if bpf_scc_visit instance for @callchain exists. */
+static struct bpf_scc_visit *scc_visit_lookup(struct bpf_verifier_env *env,
+					      struct bpf_scc_callchain *callchain)
+{
+	struct bpf_scc_info *info = env->scc_info[callchain->scc];
+	struct bpf_scc_visit *visits = info->visits;
+	u32 i;
+
+	if (!info)
+		return NULL;
+	for (i = 0; i < info->num_visits; i++)
+		if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0)
+			return &visits[i];
+	return NULL;
+}
+
+/* Allocate a new bpf_scc_visit instance corresponding to @callchain.
+ * Allocated instances are alive for a duration of the do_check_common()
+ * call and are freed by free_states().
+ */
+static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env,
+					     struct bpf_scc_callchain *callchain)
+{
+	struct bpf_scc_visit *visit;
+	struct bpf_scc_info *info;
+	u32 scc, num_visits;
+	u64 new_sz;
+
+	scc = callchain->scc;
+	info = env->scc_info[scc];
+	num_visits = info ? info->num_visits : 0;
+	new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1);
+	info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT);
+	if (!info)
+		return NULL;
+	env->scc_info[scc] = info;
+	info->num_visits = num_visits + 1;
+	visit = &info->visits[num_visits];
+	memset(visit, 0, sizeof(*visit));
+	memcpy(&visit->callchain, callchain, sizeof(*callchain));
+	return visit;
+}
+
+/* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */
+static char *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain)
+{
+	char *buf = env->tmp_str_buf;
+	int i, delta = 0;
+
+	delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "(");
+	for (i = 0; i < ARRAY_SIZE(callchain->callsites); i++) {
+		if (!callchain->callsites[i])
+			break;
+		delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u,",
+				  callchain->callsites[i]);
+	}
+	delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u)", callchain->scc);
+	return env->tmp_str_buf;
+}
+
+/* If callchain for @st exists (@st is in some SCC), ensure that
+ * bpf_scc_visit instance for this callchain exists.
+ * If instance does not exist or is empty, assign visit->entry_state to @st.
+ */
+static int maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+	struct bpf_scc_callchain *callchain = &env->callchain_buf;
+	struct bpf_scc_visit *visit;
+
+	if (!compute_scc_callchain(env, st, callchain))
+		return 0;
+	visit = scc_visit_lookup(env, callchain);
+	visit = visit ?: scc_visit_alloc(env, callchain);
+	if (!visit)
+		return -ENOMEM;
+	if (!visit->entry_state) {
+		visit->entry_state = st;
+		if (env->log.level & BPF_LOG_LEVEL2)
+			verbose(env, "SCC enter %s\n", format_callchain(env, callchain));
+	}
+	return 0;
+}
+
+static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit);
+
+/* If callchain for @st exists (@st is in some SCC), make it empty:
+ * - set visit->entry_state to NULL;
+ * - flush accumulated backedges.
+ */
+static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+	struct bpf_scc_callchain *callchain = &env->callchain_buf;
+	struct bpf_scc_visit *visit;
+
+	if (!compute_scc_callchain(env, st, callchain))
+		return 0;
+	visit = scc_visit_lookup(env, callchain);
+	if (!visit) {
+		/*
+		 * If path traversal stops inside an SCC, corresponding bpf_scc_visit
+		 * must exist for non-speculative paths. For non-speculative paths
+		 * traversal stops when:
+		 * a. Verification error is found, maybe_exit_scc() is not called.
+		 * b. Top level BPF_EXIT is reached. Top level BPF_EXIT is not a member
+		 *    of any SCC.
+		 * c. A checkpoint is reached and matched. Checkpoints are created by
+		 *    is_state_visited(), which calls maybe_enter_scc(), which allocates
+		 *    bpf_scc_visit instances for checkpoints within SCCs.
+		 * (c) is the only case that can reach this point.
+		 */
+		if (!st->speculative) {
+			verifier_bug(env, "scc exit: no visit info for call chain %s",
+				     format_callchain(env, callchain));
+			return -EFAULT;
+		}
+		return 0;
+	}
+	if (visit->entry_state != st)
+		return 0;
+	if (env->log.level & BPF_LOG_LEVEL2)
+		verbose(env, "SCC exit %s\n", format_callchain(env, callchain));
+	visit->entry_state = NULL;
+	env->num_backedges -= visit->num_backedges;
+	visit->num_backedges = 0;
+	update_peak_states(env);
+	return propagate_backedges(env, visit);
+}
+
+/* Lookup an bpf_scc_visit instance corresponding to @st callchain
+ * and add @backedge to visit->backedges. @st callchain must exist.
+ */
+static int add_scc_backedge(struct bpf_verifier_env *env,
+			    struct bpf_verifier_state *st,
+			    struct bpf_scc_backedge *backedge)
+{
+	struct bpf_scc_callchain *callchain = &env->callchain_buf;
+	struct bpf_scc_visit *visit;
+
+	if (!compute_scc_callchain(env, st, callchain)) {
+		verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d",
+			     st->insn_idx);
+		return -EFAULT;
+	}
+	visit = scc_visit_lookup(env, callchain);
+	if (!visit) {
+		verifier_bug(env, "add backedge: no visit info for call chain %s",
+			     format_callchain(env, callchain));
+		return -EFAULT;
+	}
+	if (env->log.level & BPF_LOG_LEVEL2)
+		verbose(env, "SCC backedge %s\n", format_callchain(env, callchain));
+	backedge->next = visit->backedges;
+	visit->backedges = backedge;
+	visit->num_backedges++;
+	env->num_backedges++;
+	update_peak_states(env);
+	return 0;
+}
+
+/* bpf_reg_state->live marks for registers in a state @st are incomplete,
+ * if state @st is in some SCC and not all execution paths starting at this
+ * SCC are fully explored.
+ */
+static bool incomplete_read_marks(struct bpf_verifier_env *env,
+				  struct bpf_verifier_state *st)
+{
+	struct bpf_scc_callchain *callchain = &env->callchain_buf;
+	struct bpf_scc_visit *visit;
+
+	if (!compute_scc_callchain(env, st, callchain))
+		return false;
+	visit = scc_visit_lookup(env, callchain);
+	if (!visit)
+		return false;
+	return !!visit->backedges;
+}
+
+int bpf_update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+	struct bpf_verifier_state_list *sl = NULL, *parent_sl;
+	struct bpf_verifier_state *parent;
+	int err;
+
+	while (st) {
+		u32 br = --st->branches;
+
+		/* verifier_bug_if(br > 1, ...) technically makes sense here,
+		 * but see comment in push_stack(), hence:
+		 */
+		verifier_bug_if((int)br < 0, env, "%s:branches_to_explore=%d", __func__, br);
+		if (br)
+			break;
+		err = maybe_exit_scc(env, st);
+		if (err)
+			return err;
+		parent = st->parent;
+		parent_sl = state_parent_as_list(st);
+		if (sl)
+			maybe_free_verifier_state(env, sl);
+		st = parent;
+		sl = parent_sl;
+	}
+	return 0;
+}
+
+/* check %cur's range satisfies %old's */
+static bool range_within(const struct bpf_reg_state *old,
+			 const struct bpf_reg_state *cur)
+{
+	return old->umin_value <= cur->umin_value &&
+	       old->umax_value >= cur->umax_value &&
+	       old->smin_value <= cur->smin_value &&
+	       old->smax_value >= cur->smax_value &&
+	       old->u32_min_value <= cur->u32_min_value &&
+	       old->u32_max_value >= cur->u32_max_value &&
+	       old->s32_min_value <= cur->s32_min_value &&
+	       old->s32_max_value >= cur->s32_max_value;
+}
+
+/* If in the old state two registers had the same id, then they need to have
+ * the same id in the new state as well.  But that id could be different from
+ * the old state, so we need to track the mapping from old to new ids.
+ * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
+ * regs with old id 5 must also have new id 9 for the new state to be safe.  But
+ * regs with a different old id could still have new id 9, we don't care about
+ * that.
+ * So we look through our idmap to see if this old id has been seen before.  If
+ * so, we require the new id to match; otherwise, we add the id pair to the map.
+ */
+static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
+{
+	struct bpf_id_pair *map = idmap->map;
+	unsigned int i;
+
+	/* either both IDs should be set or both should be zero */
+	if (!!old_id != !!cur_id)
+		return false;
+
+	if (old_id == 0) /* cur_id == 0 as well */
+		return true;
+
+	for (i = 0; i < idmap->cnt; i++) {
+		if (map[i].old == old_id)
+			return map[i].cur == cur_id;
+		if (map[i].cur == cur_id)
+			return false;
+	}
+
+	/* Reached the end of known mappings; haven't seen this id before */
+	if (idmap->cnt < BPF_ID_MAP_SIZE) {
+		map[idmap->cnt].old = old_id;
+		map[idmap->cnt].cur = cur_id;
+		idmap->cnt++;
+		return true;
+	}
+
+	/* We ran out of idmap slots, which should be impossible */
+	WARN_ON_ONCE(1);
+	return false;
+}
+
+/*
+ * Compare scalar register IDs for state equivalence.
+ *
+ * When old_id == 0, the old register is independent - not linked to any
+ * other register. Any linking in the current state only adds constraints,
+ * making it more restrictive. Since the old state didn't rely on any ID
+ * relationships for this register, it's always safe to accept cur regardless
+ * of its ID. Hence, return true immediately.
+ *
+ * When old_id != 0 but cur_id == 0, we need to ensure that different
+ * independent registers in cur don't incorrectly satisfy the ID matching
+ * requirements of linked registers in old.
+ *
+ * Example: if old has r6.id=X and r7.id=X (linked), but cur has r6.id=0
+ * and r7.id=0 (both independent), without temp IDs both would map old_id=X
+ * to cur_id=0 and pass. With temp IDs: r6 maps X->temp1, r7 tries to map
+ * X->temp2, but X is already mapped to temp1, so the check fails correctly.
+ *
+ * When old_id has BPF_ADD_CONST set, the compound id (base | flag) and the
+ * base id (flag stripped) must both map consistently. Example: old has
+ * r2.id=A, r3.id=A|flag (r3 = r2 + delta), cur has r2.id=B, r3.id=C|flag
+ * (r3 derived from unrelated r4). Without the base check, idmap gets two
+ * independent entries A->B and A|flag->C|flag, missing that A->C conflicts
+ * with A->B. The base ID cross-check catches this.
+ */
+static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
+{
+	if (!old_id)
+		return true;
+
+	cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
+
+	if (!check_ids(old_id, cur_id, idmap))
+		return false;
+	if (old_id & BPF_ADD_CONST) {
+		old_id &= ~BPF_ADD_CONST;
+		cur_id &= ~BPF_ADD_CONST;
+		if (!check_ids(old_id, cur_id, idmap))
+			return false;
+	}
+	return true;
+}
+
+static void __clean_func_state(struct bpf_verifier_env *env,
+			       struct bpf_func_state *st,
+			       u16 live_regs, int frame)
+{
+	int i, j;
+
+	for (i = 0; i < BPF_REG_FP; i++) {
+		/* liveness must not touch this register anymore */
+		if (!(live_regs & BIT(i)))
+			/* since the register is unused, clear its state
+			 * to make further comparison simpler
+			 */
+			bpf_mark_reg_not_init(env, &st->regs[i]);
+	}
+
+	/*
+	 * Clean dead 4-byte halves within each SPI independently.
+	 * half_spi 2*i   → lower half: slot_type[0..3] (closer to FP)
+	 * half_spi 2*i+1 → upper half: slot_type[4..7] (farther from FP)
+	 */
+	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
+		bool lo_live = bpf_stack_slot_alive(env, frame, i * 2);
+		bool hi_live = bpf_stack_slot_alive(env, frame, i * 2 + 1);
+
+		if (!hi_live || !lo_live) {
+			int start = !lo_live ? 0 : BPF_REG_SIZE / 2;
+			int end = !hi_live ? BPF_REG_SIZE : BPF_REG_SIZE / 2;
+			u8 stype = st->stack[i].slot_type[7];
+
+			/*
+			 * Don't clear special slots.
+			 * destroy_if_dynptr_stack_slot() needs STACK_DYNPTR to
+			 * detect overwrites and invalidate associated data slices.
+			 * is_iter_reg_valid_uninit() and is_irq_flag_reg_valid_uninit()
+			 * check for their respective slot types to detect double-create.
+			 */
+			if (stype == STACK_DYNPTR || stype == STACK_ITER ||
+			    stype == STACK_IRQ_FLAG)
+				continue;
+
+			/*
+			 * Only destroy spilled_ptr when hi half is dead.
+			 * If hi half is still live with STACK_SPILL, the
+			 * spilled_ptr metadata is needed for correct state
+			 * comparison in stacksafe().
+			 * is_spilled_reg() is using slot_type[7], but
+			 * is_spilled_scalar_after() check either slot_type[0] or [4]
+			 */
+			if (!hi_live) {
+				struct bpf_reg_state *spill = &st->stack[i].spilled_ptr;
+
+				if (lo_live && stype == STACK_SPILL) {
+					u8 val = STACK_MISC;
+
+					/*
+					 * 8 byte spill of scalar 0 where half slot is dead
+					 * should become STACK_ZERO in lo 4 bytes.
+					 */
+					if (bpf_register_is_null(spill))
+						val = STACK_ZERO;
+					for (j = 0; j < 4; j++) {
+						u8 *t = &st->stack[i].slot_type[j];
+
+						if (*t == STACK_SPILL)
+							*t = val;
+					}
+				}
+				bpf_mark_reg_not_init(env, spill);
+			}
+			for (j = start; j < end; j++)
+				st->stack[i].slot_type[j] = STACK_POISON;
+		}
+	}
+}
+
+static int clean_verifier_state(struct bpf_verifier_env *env,
+				 struct bpf_verifier_state *st)
+{
+	int i, err;
+
+	err = bpf_live_stack_query_init(env, st);
+	if (err)
+		return err;
+	for (i = 0; i <= st->curframe; i++) {
+		u32 ip = bpf_frame_insn_idx(st, i);
+		u16 live_regs = env->insn_aux_data[ip].live_regs_before;
+
+		__clean_func_state(env, st->frame[i], live_regs, i);
+	}
+	return 0;
+}
+
+static bool regs_exact(const struct bpf_reg_state *rold,
+		       const struct bpf_reg_state *rcur,
+		       struct bpf_idmap *idmap)
+{
+	return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+	       check_ids(rold->id, rcur->id, idmap) &&
+	       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
+}
+
+enum exact_level {
+	NOT_EXACT,
+	EXACT,
+	RANGE_WITHIN
+};
+
+/* Returns true if (rold safe implies rcur safe) */
+static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
+		    struct bpf_reg_state *rcur, struct bpf_idmap *idmap,
+		    enum exact_level exact)
+{
+	if (exact == EXACT)
+		return regs_exact(rold, rcur, idmap);
+
+	if (rold->type == NOT_INIT)
+		/* explored state can't have used this */
+		return true;
+
+	/* Enforce that register types have to match exactly, including their
+	 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
+	 * rule.
+	 *
+	 * One can make a point that using a pointer register as unbounded
+	 * SCALAR would be technically acceptable, but this could lead to
+	 * pointer leaks because scalars are allowed to leak while pointers
+	 * are not. We could make this safe in special cases if root is
+	 * calling us, but it's probably not worth the hassle.
+	 *
+	 * Also, register types that are *not* MAYBE_NULL could technically be
+	 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
+	 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
+	 * to the same map).
+	 * However, if the old MAYBE_NULL register then got NULL checked,
+	 * doing so could have affected others with the same id, and we can't
+	 * check for that because we lost the id when we converted to
+	 * a non-MAYBE_NULL variant.
+	 * So, as a general rule we don't allow mixing MAYBE_NULL and
+	 * non-MAYBE_NULL registers as well.
+	 */
+	if (rold->type != rcur->type)
+		return false;
+
+	switch (base_type(rold->type)) {
+	case SCALAR_VALUE:
+		if (env->explore_alu_limits) {
+			/* explore_alu_limits disables tnum_in() and range_within()
+			 * logic and requires everything to be strict
+			 */
+			return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+			       check_scalar_ids(rold->id, rcur->id, idmap);
+		}
+		if (!rold->precise && exact == NOT_EXACT)
+			return true;
+		/*
+		 * Linked register tracking uses rold->id to detect relationships.
+		 * When rold->id == 0, the register is independent and any linking
+		 * in rcur only adds constraints. When rold->id != 0, we must verify
+		 * id mapping and (for BPF_ADD_CONST) offset consistency.
+		 *
+		 * +------------------+-----------+------------------+---------------+
+		 * |                  | rold->id  | rold + ADD_CONST | rold->id == 0 |
+		 * |------------------+-----------+------------------+---------------|
+		 * | rcur->id         | range,ids | false            | range         |
+		 * | rcur + ADD_CONST | false     | range,ids,off    | range         |
+		 * | rcur->id == 0    | range,ids | false            | range         |
+		 * +------------------+-----------+------------------+---------------+
+		 *
+		 * Why check_ids() for scalar registers?
+		 *
+		 * Consider the following BPF code:
+		 *   1: r6 = ... unbound scalar, ID=a ...
+		 *   2: r7 = ... unbound scalar, ID=b ...
+		 *   3: if (r6 > r7) goto +1
+		 *   4: r6 = r7
+		 *   5: if (r6 > X) goto ...
+		 *   6: ... memory operation using r7 ...
+		 *
+		 * First verification path is [1-6]:
+		 * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
+		 * - at (5) r6 would be marked <= X, sync_linked_regs() would also mark
+		 *   r7 <= X, because r6 and r7 share same id.
+		 * Next verification path is [1-4, 6].
+		 *
+		 * Instruction (6) would be reached in two states:
+		 *   I.  r6{.id=b}, r7{.id=b} via path 1-6;
+		 *   II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
+		 *
+		 * Use check_ids() to distinguish these states.
+		 * ---
+		 * Also verify that new value satisfies old value range knowledge.
+		 */
+
+		/*
+		 * ADD_CONST flags must match exactly: BPF_ADD_CONST32 and
+		 * BPF_ADD_CONST64 have different linking semantics in
+		 * sync_linked_regs() (alu32 zero-extends, alu64 does not),
+		 * so pruning across different flag types is unsafe.
+		 */
+		if (rold->id &&
+		    (rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST))
+			return false;
+
+		/* Both have offset linkage: offsets must match */
+		if ((rold->id & BPF_ADD_CONST) && rold->delta != rcur->delta)
+			return false;
+
+		if (!check_scalar_ids(rold->id, rcur->id, idmap))
+			return false;
+
+		return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
+	case PTR_TO_MAP_KEY:
+	case PTR_TO_MAP_VALUE:
+	case PTR_TO_MEM:
+	case PTR_TO_BUF:
+	case PTR_TO_TP_BUFFER:
+		/* If the new min/max/var_off satisfy the old ones and
+		 * everything else matches, we are OK.
+		 */
+		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
+		       range_within(rold, rcur) &&
+		       tnum_in(rold->var_off, rcur->var_off) &&
+		       check_ids(rold->id, rcur->id, idmap) &&
+		       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
+	case PTR_TO_PACKET_META:
+	case PTR_TO_PACKET:
+		/* We must have at least as much range as the old ptr
+		 * did, so that any accesses which were safe before are
+		 * still safe.  This is true even if old range < old off,
+		 * since someone could have accessed through (ptr - k), or
+		 * even done ptr -= k in a register, to get a safe access.
+		 */
+		if (rold->range < 0 || rcur->range < 0) {
+			/* special case for [BEYOND|AT]_PKT_END */
+			if (rold->range != rcur->range)
+				return false;
+		} else if (rold->range > rcur->range) {
+			return false;
+		}
+		/* id relations must be preserved */
+		if (!check_ids(rold->id, rcur->id, idmap))
+			return false;
+		/* new val must satisfy old val knowledge */
+		return range_within(rold, rcur) &&
+		       tnum_in(rold->var_off, rcur->var_off);
+	case PTR_TO_STACK:
+		/* two stack pointers are equal only if they're pointing to
+		 * the same stack frame, since fp-8 in foo != fp-8 in bar
+		 */
+		return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
+	case PTR_TO_ARENA:
+		return true;
+	case PTR_TO_INSN:
+		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
+		       range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
+	default:
+		return regs_exact(rold, rcur, idmap);
+	}
+}
+
+static struct bpf_reg_state unbound_reg;
+
+static __init int unbound_reg_init(void)
+{
+	bpf_mark_reg_unknown_imprecise(&unbound_reg);
+	return 0;
+}
+late_initcall(unbound_reg_init);
+
+static bool is_spilled_scalar_after(const struct bpf_stack_state *stack, int im)
+{
+	return stack->slot_type[im] == STACK_SPILL &&
+	       stack->spilled_ptr.type == SCALAR_VALUE;
+}
+
+static bool is_stack_misc_after(struct bpf_verifier_env *env,
+				struct bpf_stack_state *stack, int im)
+{
+	u32 i;
+
+	for (i = im; i < ARRAY_SIZE(stack->slot_type); ++i) {
+		if ((stack->slot_type[i] == STACK_MISC) ||
+		    ((stack->slot_type[i] == STACK_INVALID || stack->slot_type[i] == STACK_POISON) &&
+		     env->allow_uninit_stack))
+			continue;
+		return false;
+	}
+
+	return true;
+}
+
+static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env,
+						  struct bpf_stack_state *stack, int im)
+{
+	if (is_spilled_scalar_after(stack, im))
+		return &stack->spilled_ptr;
+
+	if (is_stack_misc_after(env, stack, im))
+		return &unbound_reg;
+
+	return NULL;
+}
+
+static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
+		      struct bpf_func_state *cur, struct bpf_idmap *idmap,
+		      enum exact_level exact)
+{
+	int i, spi;
+
+	/* walk slots of the explored stack and ignore any additional
+	 * slots in the current stack, since explored(safe) state
+	 * didn't use them
+	 */
+	for (i = 0; i < old->allocated_stack; i++) {
+		struct bpf_reg_state *old_reg, *cur_reg;
+		int im = i % BPF_REG_SIZE;
+
+		spi = i / BPF_REG_SIZE;
+
+		if (exact == EXACT) {
+			u8 old_type = old->stack[spi].slot_type[i % BPF_REG_SIZE];
+			u8 cur_type = i < cur->allocated_stack ?
+				      cur->stack[spi].slot_type[i % BPF_REG_SIZE] : STACK_INVALID;
+
+			/* STACK_INVALID and STACK_POISON are equivalent for pruning */
+			if (old_type == STACK_POISON)
+				old_type = STACK_INVALID;
+			if (cur_type == STACK_POISON)
+				cur_type = STACK_INVALID;
+			if (i >= cur->allocated_stack || old_type != cur_type)
+				return false;
+		}
+
+		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID ||
+		    old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_POISON)
+			continue;
+
+		if (env->allow_uninit_stack &&
+		    old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
+			continue;
+
+		/* explored stack has more populated slots than current stack
+		 * and these slots were used
+		 */
+		if (i >= cur->allocated_stack)
+			return false;
+
+		/*
+		 * 64 and 32-bit scalar spills vs MISC/INVALID slots and vice versa.
+		 * Load from MISC/INVALID slots produces unbound scalar.
+		 * Construct a fake register for such stack and call
+		 * regsafe() to ensure scalar ids are compared.
+		 */
+		if (im == 0 || im == 4) {
+			old_reg = scalar_reg_for_stack(env, &old->stack[spi], im);
+			cur_reg = scalar_reg_for_stack(env, &cur->stack[spi], im);
+			if (old_reg && cur_reg) {
+				if (!regsafe(env, old_reg, cur_reg, idmap, exact))
+					return false;
+				i += (im == 0 ? BPF_REG_SIZE - 1 : 3);
+				continue;
+			}
+		}
+
+		/* if old state was safe with misc data in the stack
+		 * it will be safe with zero-initialized stack.
+		 * The opposite is not true
+		 */
+		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
+		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
+			continue;
+		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
+		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
+			/* Ex: old explored (safe) state has STACK_SPILL in
+			 * this stack slot, but current has STACK_MISC ->
+			 * this verifier states are not equivalent,
+			 * return false to continue verification of this path
+			 */
+			return false;
+		if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
+			continue;
+		/* Both old and cur are having same slot_type */
+		switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
+		case STACK_SPILL:
+			/* when explored and current stack slot are both storing
+			 * spilled registers, check that stored pointers types
+			 * are the same as well.
+			 * Ex: explored safe path could have stored
+			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
+			 * but current path has stored:
+			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
+			 * such verifier states are not equivalent.
+			 * return false to continue verification of this path
+			 */
+			if (!regsafe(env, &old->stack[spi].spilled_ptr,
+				     &cur->stack[spi].spilled_ptr, idmap, exact))
+				return false;
+			break;
+		case STACK_DYNPTR:
+			old_reg = &old->stack[spi].spilled_ptr;
+			cur_reg = &cur->stack[spi].spilled_ptr;
+			if (old_reg->dynptr.type != cur_reg->dynptr.type ||
+			    old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
+			    !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
+				return false;
+			break;
+		case STACK_ITER:
+			old_reg = &old->stack[spi].spilled_ptr;
+			cur_reg = &cur->stack[spi].spilled_ptr;
+			/* iter.depth is not compared between states as it
+			 * doesn't matter for correctness and would otherwise
+			 * prevent convergence; we maintain it only to prevent
+			 * infinite loop check triggering, see
+			 * iter_active_depths_differ()
+			 */
+			if (old_reg->iter.btf != cur_reg->iter.btf ||
+			    old_reg->iter.btf_id != cur_reg->iter.btf_id ||
+			    old_reg->iter.state != cur_reg->iter.state ||
+			    /* ignore {old_reg,cur_reg}->iter.depth, see above */
+			    !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
+				return false;
+			break;
+		case STACK_IRQ_FLAG:
+			old_reg = &old->stack[spi].spilled_ptr;
+			cur_reg = &cur->stack[spi].spilled_ptr;
+			if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap) ||
+			    old_reg->irq.kfunc_class != cur_reg->irq.kfunc_class)
+				return false;
+			break;
+		case STACK_MISC:
+		case STACK_ZERO:
+		case STACK_INVALID:
+		case STACK_POISON:
+			continue;
+		/* Ensure that new unhandled slot types return false by default */
+		default:
+			return false;
+		}
+	}
+	return true;
+}
+
+static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur,
+		    struct bpf_idmap *idmap)
+{
+	int i;
+
+	if (old->acquired_refs != cur->acquired_refs)
+		return false;
+
+	if (old->active_locks != cur->active_locks)
+		return false;
+
+	if (old->active_preempt_locks != cur->active_preempt_locks)
+		return false;
+
+	if (old->active_rcu_locks != cur->active_rcu_locks)
+		return false;
+
+	if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap))
+		return false;
+
+	if (!check_ids(old->active_lock_id, cur->active_lock_id, idmap) ||
+	    old->active_lock_ptr != cur->active_lock_ptr)
+		return false;
+
+	for (i = 0; i < old->acquired_refs; i++) {
+		if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) ||
+		    old->refs[i].type != cur->refs[i].type)
+			return false;
+		switch (old->refs[i].type) {
+		case REF_TYPE_PTR:
+		case REF_TYPE_IRQ:
+			break;
+		case REF_TYPE_LOCK:
+		case REF_TYPE_RES_LOCK:
+		case REF_TYPE_RES_LOCK_IRQ:
+			if (old->refs[i].ptr != cur->refs[i].ptr)
+				return false;
+			break;
+		default:
+			WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/* compare two verifier states
+ *
+ * all states stored in state_list are known to be valid, since
+ * verifier reached 'bpf_exit' instruction through them
+ *
+ * this function is called when verifier exploring different branches of
+ * execution popped from the state stack. If it sees an old state that has
+ * more strict register state and more strict stack state then this execution
+ * branch doesn't need to be explored further, since verifier already
+ * concluded that more strict state leads to valid finish.
+ *
+ * Therefore two states are equivalent if register state is more conservative
+ * and explored stack state is more conservative than the current one.
+ * Example:
+ *       explored                   current
+ * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
+ * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
+ *
+ * In other words if current stack state (one being explored) has more
+ * valid slots than old one that already passed validation, it means
+ * the verifier can stop exploring and conclude that current state is valid too
+ *
+ * Similarly with registers. If explored state has register type as invalid
+ * whereas register type in current state is meaningful, it means that
+ * the current state will reach 'bpf_exit' instruction safely
+ */
+static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
+			      struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact)
+{
+	u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before;
+	u16 i;
+
+	if (old->callback_depth > cur->callback_depth)
+		return false;
+
+	for (i = 0; i < MAX_BPF_REG; i++)
+		if (((1 << i) & live_regs) &&
+		    !regsafe(env, &old->regs[i], &cur->regs[i],
+			     &env->idmap_scratch, exact))
+			return false;
+
+	if (!stacksafe(env, old, cur, &env->idmap_scratch, exact))
+		return false;
+
+	return true;
+}
+
+static void reset_idmap_scratch(struct bpf_verifier_env *env)
+{
+	struct bpf_idmap *idmap = &env->idmap_scratch;
+
+	idmap->tmp_id_gen = env->id_gen;
+	idmap->cnt = 0;
+}
+
+static bool states_equal(struct bpf_verifier_env *env,
+			 struct bpf_verifier_state *old,
+			 struct bpf_verifier_state *cur,
+			 enum exact_level exact)
+{
+	u32 insn_idx;
+	int i;
+
+	if (old->curframe != cur->curframe)
+		return false;
+
+	reset_idmap_scratch(env);
+
+	/* Verification state from speculative execution simulation
+	 * must never prune a non-speculative execution one.
+	 */
+	if (old->speculative && !cur->speculative)
+		return false;
+
+	if (old->in_sleepable != cur->in_sleepable)
+		return false;
+
+	if (!refsafe(old, cur, &env->idmap_scratch))
+		return false;
+
+	/* for states to be equal callsites have to be the same
+	 * and all frame states need to be equivalent
+	 */
+	for (i = 0; i <= old->curframe; i++) {
+		insn_idx = bpf_frame_insn_idx(old, i);
+		if (old->frame[i]->callsite != cur->frame[i]->callsite)
+			return false;
+		if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact))
+			return false;
+	}
+	return true;
+}
+
+/* find precise scalars in the previous equivalent state and
+ * propagate them into the current state
+ */
+static int propagate_precision(struct bpf_verifier_env *env,
+			       const struct bpf_verifier_state *old,
+			       struct bpf_verifier_state *cur,
+			       bool *changed)
+{
+	struct bpf_reg_state *state_reg;
+	struct bpf_func_state *state;
+	int i, err = 0, fr;
+	bool first;
+
+	for (fr = old->curframe; fr >= 0; fr--) {
+		state = old->frame[fr];
+		state_reg = state->regs;
+		first = true;
+		for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
+			if (state_reg->type != SCALAR_VALUE ||
+			    !state_reg->precise)
+				continue;
+			if (env->log.level & BPF_LOG_LEVEL2) {
+				if (first)
+					verbose(env, "frame %d: propagating r%d", fr, i);
+				else
+					verbose(env, ",r%d", i);
+			}
+			bpf_bt_set_frame_reg(&env->bt, fr, i);
+			first = false;
+		}
+
+		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+			if (!bpf_is_spilled_reg(&state->stack[i]))
+				continue;
+			state_reg = &state->stack[i].spilled_ptr;
+			if (state_reg->type != SCALAR_VALUE ||
+			    !state_reg->precise)
+				continue;
+			if (env->log.level & BPF_LOG_LEVEL2) {
+				if (first)
+					verbose(env, "frame %d: propagating fp%d",
+						fr, (-i - 1) * BPF_REG_SIZE);
+				else
+					verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
+			}
+			bpf_bt_set_frame_slot(&env->bt, fr, i);
+			first = false;
+		}
+		if (!first && (env->log.level & BPF_LOG_LEVEL2))
+			verbose(env, "\n");
+	}
+
+	err = bpf_mark_chain_precision(env, cur, -1, changed);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+#define MAX_BACKEDGE_ITERS 64
+
+/* Propagate read and precision marks from visit->backedges[*].state->equal_state
+ * to corresponding parent states of visit->backedges[*].state until fixed point is reached,
+ * then free visit->backedges.
+ * After execution of this function incomplete_read_marks() will return false
+ * for all states corresponding to @visit->callchain.
+ */
+static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit)
+{
+	struct bpf_scc_backedge *backedge;
+	struct bpf_verifier_state *st;
+	bool changed;
+	int i, err;
+
+	i = 0;
+	do {
+		if (i++ > MAX_BACKEDGE_ITERS) {
+			if (env->log.level & BPF_LOG_LEVEL2)
+				verbose(env, "%s: too many iterations\n", __func__);
+			for (backedge = visit->backedges; backedge; backedge = backedge->next)
+				bpf_mark_all_scalars_precise(env, &backedge->state);
+			break;
+		}
+		changed = false;
+		for (backedge = visit->backedges; backedge; backedge = backedge->next) {
+			st = &backedge->state;
+			err = propagate_precision(env, st->equal_state, st, &changed);
+			if (err)
+				return err;
+		}
+	} while (changed);
+
+	bpf_free_backedges(visit);
+	return 0;
+}
+
+static bool states_maybe_looping(struct bpf_verifier_state *old,
+				 struct bpf_verifier_state *cur)
+{
+	struct bpf_func_state *fold, *fcur;
+	int i, fr = cur->curframe;
+
+	if (old->curframe != fr)
+		return false;
+
+	fold = old->frame[fr];
+	fcur = cur->frame[fr];
+	for (i = 0; i < MAX_BPF_REG; i++)
+		if (memcmp(&fold->regs[i], &fcur->regs[i],
+			   offsetof(struct bpf_reg_state, frameno)))
+			return false;
+	return true;
+}
+
+/* is_state_visited() handles iter_next() (see process_iter_next_call() for
+ * terminology) calls specially: as opposed to bounded BPF loops, it *expects*
+ * states to match, which otherwise would look like an infinite loop. So while
+ * iter_next() calls are taken care of, we still need to be careful and
+ * prevent erroneous and too eager declaration of "infinite loop", when
+ * iterators are involved.
+ *
+ * Here's a situation in pseudo-BPF assembly form:
+ *
+ *   0: again:                          ; set up iter_next() call args
+ *   1:   r1 = &it                      ; <CHECKPOINT HERE>
+ *   2:   call bpf_iter_num_next        ; this is iter_next() call
+ *   3:   if r0 == 0 goto done
+ *   4:   ... something useful here ...
+ *   5:   goto again                    ; another iteration
+ *   6: done:
+ *   7:   r1 = &it
+ *   8:   call bpf_iter_num_destroy     ; clean up iter state
+ *   9:   exit
+ *
+ * This is a typical loop. Let's assume that we have a prune point at 1:,
+ * before we get to `call bpf_iter_num_next` (e.g., because of that `goto
+ * again`, assuming other heuristics don't get in a way).
+ *
+ * When we first time come to 1:, let's say we have some state X. We proceed
+ * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
+ * Now we come back to validate that forked ACTIVE state. We proceed through
+ * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
+ * are converging. But the problem is that we don't know that yet, as this
+ * convergence has to happen at iter_next() call site only. So if nothing is
+ * done, at 1: verifier will use bounded loop logic and declare infinite
+ * looping (and would be *technically* correct, if not for iterator's
+ * "eventual sticky NULL" contract, see process_iter_next_call()). But we
+ * don't want that. So what we do in process_iter_next_call() when we go on
+ * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
+ * a different iteration. So when we suspect an infinite loop, we additionally
+ * check if any of the *ACTIVE* iterator states depths differ. If yes, we
+ * pretend we are not looping and wait for next iter_next() call.
+ *
+ * This only applies to ACTIVE state. In DRAINED state we don't expect to
+ * loop, because that would actually mean infinite loop, as DRAINED state is
+ * "sticky", and so we'll keep returning into the same instruction with the
+ * same state (at least in one of possible code paths).
+ *
+ * This approach allows to keep infinite loop heuristic even in the face of
+ * active iterator. E.g., C snippet below is and will be detected as
+ * infinitely looping:
+ *
+ *   struct bpf_iter_num it;
+ *   int *p, x;
+ *
+ *   bpf_iter_num_new(&it, 0, 10);
+ *   while ((p = bpf_iter_num_next(&t))) {
+ *       x = p;
+ *       while (x--) {} // <<-- infinite loop here
+ *   }
+ *
+ */
+static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
+{
+	struct bpf_reg_state *slot, *cur_slot;
+	struct bpf_func_state *state;
+	int i, fr;
+
+	for (fr = old->curframe; fr >= 0; fr--) {
+		state = old->frame[fr];
+		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+			if (state->stack[i].slot_type[0] != STACK_ITER)
+				continue;
+
+			slot = &state->stack[i].spilled_ptr;
+			if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
+				continue;
+
+			cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
+			if (cur_slot->iter.depth != slot->iter.depth)
+				return true;
+		}
+	}
+	return false;
+}
+
+static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+	struct bpf_func_state *func;
+	struct bpf_reg_state *reg;
+	int i, j;
+
+	for (i = 0; i <= st->curframe; i++) {
+		func = st->frame[i];
+		for (j = 0; j < BPF_REG_FP; j++) {
+			reg = &func->regs[j];
+			if (reg->type != SCALAR_VALUE)
+				continue;
+			reg->precise = false;
+		}
+		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
+			if (!bpf_is_spilled_reg(&func->stack[j]))
+				continue;
+			reg = &func->stack[j].spilled_ptr;
+			if (reg->type != SCALAR_VALUE)
+				continue;
+			reg->precise = false;
+		}
+	}
+}
+
+int bpf_is_state_visited(struct bpf_verifier_env *env, int insn_idx)
+{
+	struct bpf_verifier_state_list *new_sl;
+	struct bpf_verifier_state_list *sl;
+	struct bpf_verifier_state *cur = env->cur_state, *new;
+	bool force_new_state, add_new_state, loop;
+	int n, err, states_cnt = 0;
+	struct list_head *pos, *tmp, *head;
+
+	force_new_state = env->test_state_freq || bpf_is_force_checkpoint(env, insn_idx) ||
+			  /* Avoid accumulating infinitely long jmp history */
+			  cur->jmp_history_cnt > 40;
+
+	/* bpf progs typically have pruning point every 4 instructions
+	 * http://vger.kernel.org/bpfconf2019.html#session-1
+	 * Do not add new state for future pruning if the verifier hasn't seen
+	 * at least 2 jumps and at least 8 instructions.
+	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
+	 * In tests that amounts to up to 50% reduction into total verifier
+	 * memory consumption and 20% verifier time speedup.
+	 */
+	add_new_state = force_new_state;
+	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
+	    env->insn_processed - env->prev_insn_processed >= 8)
+		add_new_state = true;
+
+	/* keep cleaning the current state as registers/stack become dead */
+	err = clean_verifier_state(env, cur);
+	if (err)
+		return err;
+
+	loop = false;
+	head = bpf_explored_state(env, insn_idx);
+	list_for_each_safe(pos, tmp, head) {
+		sl = container_of(pos, struct bpf_verifier_state_list, node);
+		states_cnt++;
+		if (sl->state.insn_idx != insn_idx)
+			continue;
+
+		if (sl->state.branches) {
+			struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
+
+			if (frame->in_async_callback_fn &&
+			    frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
+				/* Different async_entry_cnt means that the verifier is
+				 * processing another entry into async callback.
+				 * Seeing the same state is not an indication of infinite
+				 * loop or infinite recursion.
+				 * But finding the same state doesn't mean that it's safe
+				 * to stop processing the current state. The previous state
+				 * hasn't yet reached bpf_exit, since state.branches > 0.
+				 * Checking in_async_callback_fn alone is not enough either.
+				 * Since the verifier still needs to catch infinite loops
+				 * inside async callbacks.
+				 */
+				goto skip_inf_loop_check;
+			}
+			/* BPF open-coded iterators loop detection is special.
+			 * states_maybe_looping() logic is too simplistic in detecting
+			 * states that *might* be equivalent, because it doesn't know
+			 * about ID remapping, so don't even perform it.
+			 * See process_iter_next_call() and iter_active_depths_differ()
+			 * for overview of the logic. When current and one of parent
+			 * states are detected as equivalent, it's a good thing: we prove
+			 * convergence and can stop simulating further iterations.
+			 * It's safe to assume that iterator loop will finish, taking into
+			 * account iter_next() contract of eventually returning
+			 * sticky NULL result.
+			 *
+			 * Note, that states have to be compared exactly in this case because
+			 * read and precision marks might not be finalized inside the loop.
+			 * E.g. as in the program below:
+			 *
+			 *     1. r7 = -16
+			 *     2. r6 = bpf_get_prandom_u32()
+			 *     3. while (bpf_iter_num_next(&fp[-8])) {
+			 *     4.   if (r6 != 42) {
+			 *     5.     r7 = -32
+			 *     6.     r6 = bpf_get_prandom_u32()
+			 *     7.     continue
+			 *     8.   }
+			 *     9.   r0 = r10
+			 *    10.   r0 += r7
+			 *    11.   r8 = *(u64 *)(r0 + 0)
+			 *    12.   r6 = bpf_get_prandom_u32()
+			 *    13. }
+			 *
+			 * Here verifier would first visit path 1-3, create a checkpoint at 3
+			 * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
+			 * not have read or precision mark for r7 yet, thus inexact states
+			 * comparison would discard current state with r7=-32
+			 * => unsafe memory access at 11 would not be caught.
+			 */
+			if (is_iter_next_insn(env, insn_idx)) {
+				if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
+					struct bpf_func_state *cur_frame;
+					struct bpf_reg_state *iter_state, *iter_reg;
+					int spi;
+
+					cur_frame = cur->frame[cur->curframe];
+					/* btf_check_iter_kfuncs() enforces that
+					 * iter state pointer is always the first arg
+					 */
+					iter_reg = &cur_frame->regs[BPF_REG_1];
+					/* current state is valid due to states_equal(),
+					 * so we can assume valid iter and reg state,
+					 * no need for extra (re-)validations
+					 */
+					spi = bpf_get_spi(iter_reg->var_off.value);
+					iter_state = &bpf_func(env, iter_reg)->stack[spi].spilled_ptr;
+					if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
+						loop = true;
+						goto hit;
+					}
+				}
+				goto skip_inf_loop_check;
+			}
+			if (is_may_goto_insn_at(env, insn_idx)) {
+				if (sl->state.may_goto_depth != cur->may_goto_depth &&
+				    states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
+					loop = true;
+					goto hit;
+				}
+			}
+			if (bpf_calls_callback(env, insn_idx)) {
+				if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
+					loop = true;
+					goto hit;
+				}
+				goto skip_inf_loop_check;
+			}
+			/* attempt to detect infinite loop to avoid unnecessary doomed work */
+			if (states_maybe_looping(&sl->state, cur) &&
+			    states_equal(env, &sl->state, cur, EXACT) &&
+			    !iter_active_depths_differ(&sl->state, cur) &&
+			    sl->state.may_goto_depth == cur->may_goto_depth &&
+			    sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
+				verbose_linfo(env, insn_idx, "; ");
+				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
+				verbose(env, "cur state:");
+				print_verifier_state(env, cur, cur->curframe, true);
+				verbose(env, "old state:");
+				print_verifier_state(env, &sl->state, cur->curframe, true);
+				return -EINVAL;
+			}
+			/* if the verifier is processing a loop, avoid adding new state
+			 * too often, since different loop iterations have distinct
+			 * states and may not help future pruning.
+			 * This threshold shouldn't be too low to make sure that
+			 * a loop with large bound will be rejected quickly.
+			 * The most abusive loop will be:
+			 * r1 += 1
+			 * if r1 < 1000000 goto pc-2
+			 * 1M insn_procssed limit / 100 == 10k peak states.
+			 * This threshold shouldn't be too high either, since states
+			 * at the end of the loop are likely to be useful in pruning.
+			 */
+skip_inf_loop_check:
+			if (!force_new_state &&
+			    env->jmps_processed - env->prev_jmps_processed < 20 &&
+			    env->insn_processed - env->prev_insn_processed < 100)
+				add_new_state = false;
+			goto miss;
+		}
+		/* See comments for mark_all_regs_read_and_precise() */
+		loop = incomplete_read_marks(env, &sl->state);
+		if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) {
+hit:
+			sl->hit_cnt++;
+
+			/* if previous state reached the exit with precision and
+			 * current state is equivalent to it (except precision marks)
+			 * the precision needs to be propagated back in
+			 * the current state.
+			 */
+			err = 0;
+			if (bpf_is_jmp_point(env, env->insn_idx))
+				err = bpf_push_jmp_history(env, cur, 0, 0);
+			err = err ? : propagate_precision(env, &sl->state, cur, NULL);
+			if (err)
+				return err;
+			/* When processing iterator based loops above propagate_liveness and
+			 * propagate_precision calls are not sufficient to transfer all relevant
+			 * read and precision marks. E.g. consider the following case:
+			 *
+			 *  .-> A --.  Assume the states are visited in the order A, B, C.
+			 *  |   |   |  Assume that state B reaches a state equivalent to state A.
+			 *  |   v   v  At this point, state C is not processed yet, so state A
+			 *  '-- B   C  has not received any read or precision marks from C.
+			 *             Thus, marks propagated from A to B are incomplete.
+			 *
+			 * The verifier mitigates this by performing the following steps:
+			 *
+			 * - Prior to the main verification pass, strongly connected components
+			 *   (SCCs) are computed over the program's control flow graph,
+			 *   intraprocedurally.
+			 *
+			 * - During the main verification pass, `maybe_enter_scc()` checks
+			 *   whether the current verifier state is entering an SCC. If so, an
+			 *   instance of a `bpf_scc_visit` object is created, and the state
+			 *   entering the SCC is recorded as the entry state.
+			 *
+			 * - This instance is associated not with the SCC itself, but with a
+			 *   `bpf_scc_callchain`: a tuple consisting of the call sites leading to
+			 *   the SCC and the SCC id. See `compute_scc_callchain()`.
+			 *
+			 * - When a verification path encounters a `states_equal(...,
+			 *   RANGE_WITHIN)` condition, there exists a call chain describing the
+			 *   current state and a corresponding `bpf_scc_visit` instance. A copy
+			 *   of the current state is created and added to
+			 *   `bpf_scc_visit->backedges`.
+			 *
+			 * - When a verification path terminates, `maybe_exit_scc()` is called
+			 *   from `bpf_update_branch_counts()`. For states with `branches == 0`, it
+			 *   checks whether the state is the entry state of any `bpf_scc_visit`
+			 *   instance. If it is, this indicates that all paths originating from
+			 *   this SCC visit have been explored. `propagate_backedges()` is then
+			 *   called, which propagates read and precision marks through the
+			 *   backedges until a fixed point is reached.
+			 *   (In the earlier example, this would propagate marks from A to B,
+			 *    from C to A, and then again from A to B.)
+			 *
+			 * A note on callchains
+			 * --------------------
+			 *
+			 * Consider the following example:
+			 *
+			 *     void foo() { loop { ... SCC#1 ... } }
+			 *     void main() {
+			 *       A: foo();
+			 *       B: ...
+			 *       C: foo();
+			 *     }
+			 *
+			 * Here, there are two distinct callchains leading to SCC#1:
+			 * - (A, SCC#1)
+			 * - (C, SCC#1)
+			 *
+			 * Each callchain identifies a separate `bpf_scc_visit` instance that
+			 * accumulates backedge states. The `propagate_{liveness,precision}()`
+			 * functions traverse the parent state of each backedge state, which
+			 * means these parent states must remain valid (i.e., not freed) while
+			 * the corresponding `bpf_scc_visit` instance exists.
+			 *
+			 * Associating `bpf_scc_visit` instances directly with SCCs instead of
+			 * callchains would break this invariant:
+			 * - States explored during `C: foo()` would contribute backedges to
+			 *   SCC#1, but SCC#1 would only be exited once the exploration of
+			 *   `A: foo()` completes.
+			 * - By that time, the states explored between `A: foo()` and `C: foo()`
+			 *   (i.e., `B: ...`) may have already been freed, causing the parent
+			 *   links for states from `C: foo()` to become invalid.
+			 */
+			if (loop) {
+				struct bpf_scc_backedge *backedge;
+
+				backedge = kzalloc_obj(*backedge,
+						       GFP_KERNEL_ACCOUNT);
+				if (!backedge)
+					return -ENOMEM;
+				err = bpf_copy_verifier_state(&backedge->state, cur);
+				backedge->state.equal_state = &sl->state;
+				backedge->state.insn_idx = insn_idx;
+				err = err ?: add_scc_backedge(env, &sl->state, backedge);
+				if (err) {
+					bpf_free_verifier_state(&backedge->state, false);
+					kfree(backedge);
+					return err;
+				}
+			}
+			return 1;
+		}
+miss:
+		/* when new state is not going to be added do not increase miss count.
+		 * Otherwise several loop iterations will remove the state
+		 * recorded earlier. The goal of these heuristics is to have
+		 * states from some iterations of the loop (some in the beginning
+		 * and some at the end) to help pruning.
+		 */
+		if (add_new_state)
+			sl->miss_cnt++;
+		/* heuristic to determine whether this state is beneficial
+		 * to keep checking from state equivalence point of view.
+		 * Higher numbers increase max_states_per_insn and verification time,
+		 * but do not meaningfully decrease insn_processed.
+		 * 'n' controls how many times state could miss before eviction.
+		 * Use bigger 'n' for checkpoints because evicting checkpoint states
+		 * too early would hinder iterator convergence.
+		 */
+		n = bpf_is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
+		if (sl->miss_cnt > sl->hit_cnt * n + n) {
+			/* the state is unlikely to be useful. Remove it to
+			 * speed up verification
+			 */
+			sl->in_free_list = true;
+			list_del(&sl->node);
+			list_add(&sl->node, &env->free_list);
+			env->free_list_size++;
+			env->explored_states_size--;
+			maybe_free_verifier_state(env, sl);
+		}
+	}
+
+	if (env->max_states_per_insn < states_cnt)
+		env->max_states_per_insn = states_cnt;
+
+	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
+		return 0;
+
+	if (!add_new_state)
+		return 0;
+
+	/* There were no equivalent states, remember the current one.
+	 * Technically the current state is not proven to be safe yet,
+	 * but it will either reach outer most bpf_exit (which means it's safe)
+	 * or it will be rejected. When there are no loops the verifier won't be
+	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
+	 * again on the way to bpf_exit.
+	 * When looping the sl->state.branches will be > 0 and this state
+	 * will not be considered for equivalence until branches == 0.
+	 */
+	new_sl = kzalloc_obj(struct bpf_verifier_state_list, GFP_KERNEL_ACCOUNT);
+	if (!new_sl)
+		return -ENOMEM;
+	env->total_states++;
+	env->explored_states_size++;
+	update_peak_states(env);
+	env->prev_jmps_processed = env->jmps_processed;
+	env->prev_insn_processed = env->insn_processed;
+
+	/* forget precise markings we inherited, see __mark_chain_precision */
+	if (env->bpf_capable)
+		mark_all_scalars_imprecise(env, cur);
+
+	bpf_clear_singular_ids(env, cur);
+
+	/* add new state to the head of linked list */
+	new = &new_sl->state;
+	err = bpf_copy_verifier_state(new, cur);
+	if (err) {
+		bpf_free_verifier_state(new, false);
+		kfree(new_sl);
+		return err;
+	}
+	new->insn_idx = insn_idx;
+	verifier_bug_if(new->branches != 1, env,
+			"%s:branches_to_explore=%d insn %d",
+			__func__, new->branches, insn_idx);
+	err = maybe_enter_scc(env, new);
+	if (err) {
+		bpf_free_verifier_state(new, false);
+		kfree(new_sl);
+		return err;
+	}
+
+	cur->parent = new;
+	cur->first_insn_idx = insn_idx;
+	cur->dfs_depth = new->dfs_depth + 1;
+	bpf_clear_jmp_history(cur);
+	list_add(&new_sl->node, head);
+	return 0;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 00fcd7f9c06b..33352f28b339 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -498,11 +498,6 @@ bool bpf_is_may_goto_insn(struct bpf_insn *insn)
 	return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
 }
 
-static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
-{
-	return bpf_is_may_goto_insn(&env->prog->insnsi[insn_idx]);
-}
-
 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
 					const struct bpf_map *map)
 {
@@ -532,18 +527,6 @@ static bool is_atomic_fetch_insn(const struct bpf_insn *insn)
 	       (insn->imm & BPF_FETCH);
 }
 
-static int __get_spi(s32 off)
-{
-	return (-off - 1) / BPF_REG_SIZE;
-}
-
-static struct bpf_func_state *func(struct bpf_verifier_env *env,
-				   const struct bpf_reg_state *reg)
-{
-	struct bpf_verifier_state *cur = env->cur_state;
-
-	return cur->frame[reg->frameno];
-}
 
 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
 {
@@ -575,13 +558,13 @@ static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_s
 		return -EINVAL;
 	}
 
-	spi = __get_spi(off);
+	spi = bpf_get_spi(off);
 	if (spi + 1 < nr_slots) {
 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
 		return -EINVAL;
 	}
 
-	if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots))
+	if (!is_spi_bounds_valid(bpf_func(env, reg), spi, nr_slots))
 		return -ERANGE;
 	return spi;
 }
@@ -650,8 +633,6 @@ static void __mark_dynptr_reg(struct bpf_reg_state *reg,
 			      enum bpf_dynptr_type type,
 			      bool first_slot, int dynptr_id);
 
-static void __mark_reg_not_init(const struct bpf_verifier_env *env,
-				struct bpf_reg_state *reg);
 
 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
 				   struct bpf_reg_state *sreg1,
@@ -677,7 +658,7 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 				   enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	enum bpf_dynptr_type type;
 	int spi, i, err;
 
@@ -741,13 +722,13 @@ static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_stat
 		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
 	}
 
-	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
-	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
+	bpf_mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
+	bpf_mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
 }
 
 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	int spi, ref_obj_id, i;
 
 	/*
@@ -806,7 +787,7 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env,
 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
 	if (!env->allow_ptr_leaks)
-		__mark_reg_not_init(env, reg);
+		bpf_mark_reg_not_init(env, reg);
 	else
 		__mark_reg_unknown(env, reg);
 }
@@ -876,8 +857,8 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
 	/* Do not release reference state, we are destroying dynptr on stack,
 	 * not using some helper to release it. Just reset register.
 	 */
-	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
-	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
+	bpf_mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
+	bpf_mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
 
 	return 0;
 }
@@ -912,7 +893,7 @@ static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_
 
 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	int i, spi;
 
 	/* This already represents first slot of initialized bpf_dynptr.
@@ -942,7 +923,7 @@ static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_re
 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 				    enum bpf_arg_type arg_type)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	enum bpf_dynptr_type dynptr_type;
 	int spi;
 
@@ -972,7 +953,7 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env,
 				 struct bpf_reg_state *reg, int insn_idx,
 				 struct btf *btf, u32 btf_id, int nr_slots)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	int spi, i, j, id;
 
 	spi = iter_get_spi(env, reg, nr_slots);
@@ -1013,7 +994,7 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env,
 static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
 				   struct bpf_reg_state *reg, int nr_slots)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	int spi, i, j;
 
 	spi = iter_get_spi(env, reg, nr_slots);
@@ -1027,7 +1008,7 @@ static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
 		if (i == 0)
 			WARN_ON_ONCE(release_reference(env, st->ref_obj_id));
 
-		__mark_reg_not_init(env, st);
+		bpf_mark_reg_not_init(env, st);
 
 		for (j = 0; j < BPF_REG_SIZE; j++)
 			slot->slot_type[j] = STACK_INVALID;
@@ -1041,7 +1022,7 @@ static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
 				     struct bpf_reg_state *reg, int nr_slots)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	int spi, i, j;
 
 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
@@ -1068,7 +1049,7 @@ static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
 static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 				   struct btf *btf, u32 btf_id, int nr_slots)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	int spi, i, j;
 
 	spi = iter_get_spi(env, reg, nr_slots);
@@ -1105,7 +1086,7 @@ static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
 				     struct bpf_reg_state *reg, int insn_idx,
 				     int kfunc_class)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	struct bpf_stack_state *slot;
 	struct bpf_reg_state *st;
 	int spi, i, id;
@@ -1136,7 +1117,7 @@ static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
 static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 				      int kfunc_class)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	struct bpf_stack_state *slot;
 	struct bpf_reg_state *st;
 	int spi, i, err;
@@ -1174,7 +1155,7 @@ static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_r
 		return err;
 	}
 
-	__mark_reg_not_init(env, st);
+	bpf_mark_reg_not_init(env, st);
 
 	for (i = 0; i < BPF_REG_SIZE; i++)
 		slot->slot_type[i] = STACK_INVALID;
@@ -1185,7 +1166,7 @@ static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_r
 
 static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	struct bpf_stack_state *slot;
 	int spi, i;
 
@@ -1209,7 +1190,7 @@ static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bp
 
 static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	struct bpf_stack_state *slot;
 	struct bpf_reg_state *st;
 	int spi, i;
@@ -1260,23 +1241,12 @@ static bool is_stack_slot_special(const struct bpf_stack_state *stack)
 /* The reg state of a pointer or a bounded scalar was saved when
  * it was spilled to the stack.
  */
-static bool is_spilled_reg(const struct bpf_stack_state *stack)
-{
-	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
-}
-
 static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
 {
 	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL &&
 	       stack->spilled_ptr.type == SCALAR_VALUE;
 }
 
-static bool is_spilled_scalar_after(const struct bpf_stack_state *stack, int im)
-{
-	return stack->slot_type[im] == STACK_SPILL &&
-	       stack->spilled_ptr.type == SCALAR_VALUE;
-}
-
 /*
  * Mark stack slot as STACK_MISC, unless it is already:
  * - STACK_INVALID, in which case they are equivalent.
@@ -1588,14 +1558,6 @@ static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *st
 	return NULL;
 }
 
-static void update_peak_states(struct bpf_verifier_env *env)
-{
-	u32 cur_states;
-
-	cur_states = env->explored_states_size + env->free_list_size + env->num_backedges;
-	env->peak_states = max(env->peak_states, cur_states);
-}
-
 static void free_func_state(struct bpf_func_state *state)
 {
 	if (!state)
@@ -1604,15 +1566,15 @@ static void free_func_state(struct bpf_func_state *state)
 	kfree(state);
 }
 
-static void clear_jmp_history(struct bpf_verifier_state *state)
+void bpf_clear_jmp_history(struct bpf_verifier_state *state)
 {
 	kfree(state->jmp_history);
 	state->jmp_history = NULL;
 	state->jmp_history_cnt = 0;
 }
 
-static void free_verifier_state(struct bpf_verifier_state *state,
-				bool free_self)
+void bpf_free_verifier_state(struct bpf_verifier_state *state,
+			    bool free_self)
 {
 	int i;
 
@@ -1621,42 +1583,11 @@ static void free_verifier_state(struct bpf_verifier_state *state,
 		state->frame[i] = NULL;
 	}
 	kfree(state->refs);
-	clear_jmp_history(state);
+	bpf_clear_jmp_history(state);
 	if (free_self)
 		kfree(state);
 }
 
-/* struct bpf_verifier_state->parent refers to states
- * that are in either of env->{expored_states,free_list}.
- * In both cases the state is contained in struct bpf_verifier_state_list.
- */
-static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st)
-{
-	if (st->parent)
-		return container_of(st->parent, struct bpf_verifier_state_list, state);
-	return NULL;
-}
-
-static bool incomplete_read_marks(struct bpf_verifier_env *env,
-				  struct bpf_verifier_state *st);
-
-/* A state can be freed if it is no longer referenced:
- * - is in the env->free_list;
- * - has no children states;
- */
-static void maybe_free_verifier_state(struct bpf_verifier_env *env,
-				      struct bpf_verifier_state_list *sl)
-{
-	if (!sl->in_free_list
-	    || sl->state.branches != 0
-	    || incomplete_read_marks(env, &sl->state))
-		return;
-	list_del(&sl->node);
-	free_verifier_state(&sl->state, false);
-	kfree(sl);
-	env->free_list_size--;
-}
-
 /* copy verifier state from src to dst growing dst stack space
  * when necessary to accommodate larger src stack
  */
@@ -1667,8 +1598,8 @@ static int copy_func_state(struct bpf_func_state *dst,
 	return copy_stack_state(dst, src);
 }
 
-static int copy_verifier_state(struct bpf_verifier_state *dst_state,
-			       const struct bpf_verifier_state *src)
+int bpf_copy_verifier_state(struct bpf_verifier_state *dst_state,
+			   const struct bpf_verifier_state *src)
 {
 	struct bpf_func_state *dst;
 	int i, err;
@@ -1721,7 +1652,7 @@ static u32 state_htab_size(struct bpf_verifier_env *env)
 	return env->prog->len;
 }
 
-static struct list_head *explored_state(struct bpf_verifier_env *env, int idx)
+struct list_head *bpf_explored_state(struct bpf_verifier_env *env, int idx)
 {
 	struct bpf_verifier_state *cur = env->cur_state;
 	struct bpf_func_state *state = cur->frame[cur->curframe];
@@ -1743,266 +1674,19 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta
 	return true;
 }
 
-/* Return IP for a given frame in a call stack */
-static u32 frame_insn_idx(struct bpf_verifier_state *st, u32 frame)
-{
-	return frame == st->curframe
-	       ? st->insn_idx
-	       : st->frame[frame + 1]->callsite;
-}
-
-/* For state @st look for a topmost frame with frame_insn_idx() in some SCC,
- * if such frame exists form a corresponding @callchain as an array of
- * call sites leading to this frame and SCC id.
- * E.g.:
- *
- *    void foo()  { A: loop {... SCC#1 ...}; }
- *    void bar()  { B: loop { C: foo(); ... SCC#2 ... }
- *                  D: loop { E: foo(); ... SCC#3 ... } }
- *    void main() { F: bar(); }
- *
- * @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending
- * on @st frame call sites being (F,C,A) or (F,E,A).
- */
-static bool compute_scc_callchain(struct bpf_verifier_env *env,
-				  struct bpf_verifier_state *st,
-				  struct bpf_scc_callchain *callchain)
-{
-	u32 i, scc, insn_idx;
-
-	memset(callchain, 0, sizeof(*callchain));
-	for (i = 0; i <= st->curframe; i++) {
-		insn_idx = frame_insn_idx(st, i);
-		scc = env->insn_aux_data[insn_idx].scc;
-		if (scc) {
-			callchain->scc = scc;
-			break;
-		} else if (i < st->curframe) {
-			callchain->callsites[i] = insn_idx;
-		} else {
-			return false;
-		}
-	}
-	return true;
-}
-
-/* Check if bpf_scc_visit instance for @callchain exists. */
-static struct bpf_scc_visit *scc_visit_lookup(struct bpf_verifier_env *env,
-					      struct bpf_scc_callchain *callchain)
-{
-	struct bpf_scc_info *info = env->scc_info[callchain->scc];
-	struct bpf_scc_visit *visits = info->visits;
-	u32 i;
-
-	if (!info)
-		return NULL;
-	for (i = 0; i < info->num_visits; i++)
-		if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0)
-			return &visits[i];
-	return NULL;
-}
-
-/* Allocate a new bpf_scc_visit instance corresponding to @callchain.
- * Allocated instances are alive for a duration of the do_check_common()
- * call and are freed by free_states().
- */
-static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env,
-					     struct bpf_scc_callchain *callchain)
-{
-	struct bpf_scc_visit *visit;
-	struct bpf_scc_info *info;
-	u32 scc, num_visits;
-	u64 new_sz;
-
-	scc = callchain->scc;
-	info = env->scc_info[scc];
-	num_visits = info ? info->num_visits : 0;
-	new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1);
-	info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT);
-	if (!info)
-		return NULL;
-	env->scc_info[scc] = info;
-	info->num_visits = num_visits + 1;
-	visit = &info->visits[num_visits];
-	memset(visit, 0, sizeof(*visit));
-	memcpy(&visit->callchain, callchain, sizeof(*callchain));
-	return visit;
-}
-
-/* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */
-static char *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain)
-{
-	char *buf = env->tmp_str_buf;
-	int i, delta = 0;
-
-	delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "(");
-	for (i = 0; i < ARRAY_SIZE(callchain->callsites); i++) {
-		if (!callchain->callsites[i])
-			break;
-		delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u,",
-				  callchain->callsites[i]);
-	}
-	delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u)", callchain->scc);
-	return env->tmp_str_buf;
-}
-
-/* If callchain for @st exists (@st is in some SCC), ensure that
- * bpf_scc_visit instance for this callchain exists.
- * If instance does not exist or is empty, assign visit->entry_state to @st.
- */
-static int maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
-{
-	struct bpf_scc_callchain *callchain = &env->callchain_buf;
-	struct bpf_scc_visit *visit;
-
-	if (!compute_scc_callchain(env, st, callchain))
-		return 0;
-	visit = scc_visit_lookup(env, callchain);
-	visit = visit ?: scc_visit_alloc(env, callchain);
-	if (!visit)
-		return -ENOMEM;
-	if (!visit->entry_state) {
-		visit->entry_state = st;
-		if (env->log.level & BPF_LOG_LEVEL2)
-			verbose(env, "SCC enter %s\n", format_callchain(env, callchain));
-	}
-	return 0;
-}
-
-static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit);
-
-/* If callchain for @st exists (@st is in some SCC), make it empty:
- * - set visit->entry_state to NULL;
- * - flush accumulated backedges.
- */
-static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
-{
-	struct bpf_scc_callchain *callchain = &env->callchain_buf;
-	struct bpf_scc_visit *visit;
-
-	if (!compute_scc_callchain(env, st, callchain))
-		return 0;
-	visit = scc_visit_lookup(env, callchain);
-	if (!visit) {
-		/*
-		 * If path traversal stops inside an SCC, corresponding bpf_scc_visit
-		 * must exist for non-speculative paths. For non-speculative paths
-		 * traversal stops when:
-		 * a. Verification error is found, maybe_exit_scc() is not called.
-		 * b. Top level BPF_EXIT is reached. Top level BPF_EXIT is not a member
-		 *    of any SCC.
-		 * c. A checkpoint is reached and matched. Checkpoints are created by
-		 *    is_state_visited(), which calls maybe_enter_scc(), which allocates
-		 *    bpf_scc_visit instances for checkpoints within SCCs.
-		 * (c) is the only case that can reach this point.
-		 */
-		if (!st->speculative) {
-			verifier_bug(env, "scc exit: no visit info for call chain %s",
-				     format_callchain(env, callchain));
-			return -EFAULT;
-		}
-		return 0;
-	}
-	if (visit->entry_state != st)
-		return 0;
-	if (env->log.level & BPF_LOG_LEVEL2)
-		verbose(env, "SCC exit %s\n", format_callchain(env, callchain));
-	visit->entry_state = NULL;
-	env->num_backedges -= visit->num_backedges;
-	visit->num_backedges = 0;
-	update_peak_states(env);
-	return propagate_backedges(env, visit);
-}
-
-/* Lookup an bpf_scc_visit instance corresponding to @st callchain
- * and add @backedge to visit->backedges. @st callchain must exist.
- */
-static int add_scc_backedge(struct bpf_verifier_env *env,
-			    struct bpf_verifier_state *st,
-			    struct bpf_scc_backedge *backedge)
-{
-	struct bpf_scc_callchain *callchain = &env->callchain_buf;
-	struct bpf_scc_visit *visit;
-
-	if (!compute_scc_callchain(env, st, callchain)) {
-		verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d",
-			     st->insn_idx);
-		return -EFAULT;
-	}
-	visit = scc_visit_lookup(env, callchain);
-	if (!visit) {
-		verifier_bug(env, "add backedge: no visit info for call chain %s",
-			     format_callchain(env, callchain));
-		return -EFAULT;
-	}
-	if (env->log.level & BPF_LOG_LEVEL2)
-		verbose(env, "SCC backedge %s\n", format_callchain(env, callchain));
-	backedge->next = visit->backedges;
-	visit->backedges = backedge;
-	visit->num_backedges++;
-	env->num_backedges++;
-	update_peak_states(env);
-	return 0;
-}
-
-/* bpf_reg_state->live marks for registers in a state @st are incomplete,
- * if state @st is in some SCC and not all execution paths starting at this
- * SCC are fully explored.
- */
-static bool incomplete_read_marks(struct bpf_verifier_env *env,
-				  struct bpf_verifier_state *st)
-{
-	struct bpf_scc_callchain *callchain = &env->callchain_buf;
-	struct bpf_scc_visit *visit;
-
-	if (!compute_scc_callchain(env, st, callchain))
-		return false;
-	visit = scc_visit_lookup(env, callchain);
-	if (!visit)
-		return false;
-	return !!visit->backedges;
-}
 
-static void free_backedges(struct bpf_scc_visit *visit)
+void bpf_free_backedges(struct bpf_scc_visit *visit)
 {
 	struct bpf_scc_backedge *backedge, *next;
 
 	for (backedge = visit->backedges; backedge; backedge = next) {
-		free_verifier_state(&backedge->state, false);
+		bpf_free_verifier_state(&backedge->state, false);
 		next = backedge->next;
 		kfree(backedge);
 	}
 	visit->backedges = NULL;
 }
 
-static int update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
-{
-	struct bpf_verifier_state_list *sl = NULL, *parent_sl;
-	struct bpf_verifier_state *parent;
-	int err;
-
-	while (st) {
-		u32 br = --st->branches;
-
-		/* verifier_bug_if(br > 1, ...) technically makes sense here,
-		 * but see comment in push_stack(), hence:
-		 */
-		verifier_bug_if((int)br < 0, env, "%s:branches_to_explore=%d", __func__, br);
-		if (br)
-			break;
-		err = maybe_exit_scc(env, st);
-		if (err)
-			return err;
-		parent = st->parent;
-		parent_sl = state_parent_as_list(st);
-		if (sl)
-			maybe_free_verifier_state(env, sl);
-		st = parent;
-		sl = parent_sl;
-	}
-	return 0;
-}
-
 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
 		     int *insn_idx, bool pop_log)
 {
@@ -2014,7 +1698,7 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
 		return -ENOENT;
 
 	if (cur) {
-		err = copy_verifier_state(cur, &head->st);
+		err = bpf_copy_verifier_state(cur, &head->st);
 		if (err)
 			return err;
 	}
@@ -2025,7 +1709,7 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
 	if (prev_insn_idx)
 		*prev_insn_idx = head->prev_insn_idx;
 	elem = head->next;
-	free_verifier_state(&head->st, false);
+	bpf_free_verifier_state(&head->st, false);
 	kfree(head);
 	env->head = elem;
 	env->stack_size--;
@@ -2062,7 +1746,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 	elem->log_pos = env->log.end_pos;
 	env->head = elem;
 	env->stack_size++;
-	err = copy_verifier_state(&elem->st, cur);
+	err = bpf_copy_verifier_state(&elem->st, cur);
 	if (err)
 		return ERR_PTR(-ENOMEM);
 	elem->st.speculative |= speculative;
@@ -2792,7 +2476,7 @@ static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
 }
 
 /* Mark a register as having a completely unknown (scalar) value. */
-static void __mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
+void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
 {
 	/*
 	 * Clear type, off, and union(map_ptr, range) and
@@ -2814,7 +2498,7 @@ static void __mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
 			       struct bpf_reg_state *reg)
 {
-	__mark_reg_unknown_imprecise(reg);
+	bpf_mark_reg_unknown_imprecise(reg);
 	reg->precise = !env->bpf_capable;
 }
 
@@ -2843,19 +2527,13 @@ static int __mark_reg_s32_range(struct bpf_verifier_env *env,
 	return reg_bounds_sanity_check(env, reg, "s32_range");
 }
 
-static void __mark_reg_not_init(const struct bpf_verifier_env *env,
-				struct bpf_reg_state *reg)
+void bpf_mark_reg_not_init(const struct bpf_verifier_env *env,
+			   struct bpf_reg_state *reg)
 {
 	__mark_reg_unknown(env, reg);
 	reg->type = NOT_INIT;
 }
 
-static void mark_reg_not_init(struct bpf_verifier_env *env,
-			      struct bpf_reg_state *regs, u32 regno)
-{
-	__mark_reg_not_init(env, regs + regno);
-}
-
 static int mark_btf_ld_reg(struct bpf_verifier_env *env,
 			   struct bpf_reg_state *regs, u32 regno,
 			   enum bpf_reg_type reg_type,
@@ -2893,7 +2571,7 @@ static void init_reg_state(struct bpf_verifier_env *env,
 	int i;
 
 	for (i = 0; i < MAX_BPF_REG; i++) {
-		mark_reg_not_init(env, regs, i);
+		bpf_mark_reg_not_init(env, &regs[i]);
 		regs[i].subreg_def = DEF_NOT_SUBREG;
 	}
 
@@ -2949,7 +2627,7 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
 			env->stack_size);
 		return ERR_PTR(-E2BIG);
 	}
-	/* Unlike push_stack() do not copy_verifier_state().
+	/* Unlike push_stack() do not bpf_copy_verifier_state().
 	 * The caller state doesn't matter.
 	 * This is async callback. It starts in a fresh stack.
 	 * Initialize it similar to do_check_common().
@@ -3849,11 +3527,6 @@ static int insn_stack_access_frameno(int insn_flags)
 	return insn_flags & INSN_F_FRAMENO_MASK;
 }
 
-static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
-{
-	return env->insn_aux_data[insn_idx].jmp_point;
-}
-
 #define LR_FRAMENO_BITS	3
 #define LR_SPI_BITS	6
 #define LR_ENTRY_BITS	(LR_SPI_BITS + LR_FRAMENO_BITS + 1)
@@ -3933,8 +3606,8 @@ static void linked_regs_unpack(u64 val, struct linked_regs *s)
 }
 
 /* for any branch, call, exit record the history of jmps in the given state */
-static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
-			    int insn_flags, u64 linked_regs)
+int bpf_push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
+			int insn_flags, u64 linked_regs)
 {
 	u32 cnt = cur->jmp_history_cnt;
 	struct bpf_jmp_history_entry *p;
@@ -4088,11 +3761,6 @@ static inline int bt_subprog_exit(struct backtrack_state *bt)
 	return 0;
 }
 
-static inline void bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
-{
-	bt->reg_masks[frame] |= 1 << reg;
-}
-
 static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
 {
 	bt->reg_masks[frame] &= ~(1 << reg);
@@ -4100,7 +3768,7 @@ static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32
 
 static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
 {
-	bt_set_frame_reg(bt, bt->frame, reg);
+	bpf_bt_set_frame_reg(bt, bt->frame, reg);
 }
 
 static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
@@ -4108,11 +3776,6 @@ static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
 	bt_clear_frame_reg(bt, bt->frame, reg);
 }
 
-static inline void bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
-{
-	bt->stack_masks[frame] |= 1ull << slot;
-}
-
 static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
 {
 	bt->stack_masks[frame] &= ~(1ull << slot);
@@ -4222,9 +3885,9 @@ static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_histo
 		struct linked_reg *e = &linked_regs.entries[i];
 
 		if (e->is_reg)
-			bt_set_frame_reg(bt, e->frameno, e->regno);
+			bpf_bt_set_frame_reg(bt, e->frameno, e->regno);
 		else
-			bt_set_frame_slot(bt, e->frameno, e->spi);
+			bpf_bt_set_frame_slot(bt, e->frameno, e->spi);
 	}
 }
 
@@ -4337,7 +4000,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
 		 */
 		spi = insn_stack_access_spi(hist->flags);
 		fr = insn_stack_access_frameno(hist->flags);
-		bt_set_frame_slot(bt, fr, spi);
+		bpf_bt_set_frame_slot(bt, fr, spi);
 	} else if (class == BPF_STX || class == BPF_ST) {
 		if (bt_is_reg_set(bt, dreg))
 			/* stx & st shouldn't be using _scalar_ dst_reg
@@ -4410,7 +4073,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
 				for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
 					if (bt_is_reg_set(bt, i)) {
 						bt_clear_reg(bt, i);
-						bt_set_frame_reg(bt, bt->frame - 1, i);
+						bpf_bt_set_frame_reg(bt, bt->frame - 1, i);
 					}
 				}
 				if (bt_subprog_exit(bt))
@@ -4596,8 +4259,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
  *
  * For now backtracking falls back into conservative marking.
  */
-static void mark_all_scalars_precise(struct bpf_verifier_env *env,
-				     struct bpf_verifier_state *st)
+void bpf_mark_all_scalars_precise(struct bpf_verifier_env *env,
+				 struct bpf_verifier_state *st)
 {
 	struct bpf_func_state *func;
 	struct bpf_reg_state *reg;
@@ -4628,7 +4291,7 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env,
 				}
 			}
 			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
-				if (!is_spilled_reg(&func->stack[j]))
+				if (!bpf_is_spilled_reg(&func->stack[j]))
 					continue;
 				reg = &func->stack[j].spilled_ptr;
 				if (reg->type != SCALAR_VALUE || reg->precise)
@@ -4643,33 +4306,8 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env,
 	}
 }
 
-static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
-{
-	struct bpf_func_state *func;
-	struct bpf_reg_state *reg;
-	int i, j;
-
-	for (i = 0; i <= st->curframe; i++) {
-		func = st->frame[i];
-		for (j = 0; j < BPF_REG_FP; j++) {
-			reg = &func->regs[j];
-			if (reg->type != SCALAR_VALUE)
-				continue;
-			reg->precise = false;
-		}
-		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
-			if (!is_spilled_reg(&func->stack[j]))
-				continue;
-			reg = &func->stack[j].spilled_ptr;
-			if (reg->type != SCALAR_VALUE)
-				continue;
-			reg->precise = false;
-		}
-	}
-}
-
 /*
- * __mark_chain_precision() backtracks BPF program instruction sequence and
+ * bpf_mark_chain_precision() backtracks BPF program instruction sequence and
  * chain of verifier states making sure that register *regno* (if regno >= 0)
  * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
  * SCALARS, as well as any other registers and slots that contribute to
@@ -4755,10 +4393,10 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_
  * mark_all_scalars_imprecise() to hopefully get more permissive and generic
  * finalized states which help in short circuiting more future states.
  */
-static int __mark_chain_precision(struct bpf_verifier_env *env,
-				  struct bpf_verifier_state *starting_state,
-				  int regno,
-				  bool *changed)
+int bpf_mark_chain_precision(struct bpf_verifier_env *env,
+			    struct bpf_verifier_state *starting_state,
+			    int regno,
+			    bool *changed)
 {
 	struct bpf_verifier_state *st = starting_state;
 	struct backtrack_state *bt = &env->bt;
@@ -4841,7 +4479,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env,
 				err = backtrack_insn(env, i, subseq_idx, hist, bt);
 			}
 			if (err == -ENOTSUPP) {
-				mark_all_scalars_precise(env, starting_state);
+				bpf_mark_all_scalars_precise(env, starting_state);
 				bt_reset(bt);
 				return 0;
 			} else if (err) {
@@ -4933,7 +4571,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env,
 	 * fallback to marking all precise
 	 */
 	if (!bt_empty(bt)) {
-		mark_all_scalars_precise(env, starting_state);
+		bpf_mark_all_scalars_precise(env, starting_state);
 		bt_reset(bt);
 	}
 
@@ -4942,7 +4580,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env,
 
 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
 {
-	return __mark_chain_precision(env, env->cur_state, regno, NULL);
+	return bpf_mark_chain_precision(env, env->cur_state, regno, NULL);
 }
 
 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to
@@ -4951,7 +4589,7 @@ int mark_chain_precision(struct bpf_verifier_env *env, int regno)
 static int mark_chain_precision_batch(struct bpf_verifier_env *env,
 				      struct bpf_verifier_state *starting_state)
 {
-	return __mark_chain_precision(env, starting_state, -1, NULL);
+	return bpf_mark_chain_precision(env, starting_state, -1, NULL);
 }
 
 static bool is_spillable_regtype(enum bpf_reg_type type)
@@ -4981,11 +4619,6 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	}
 }
 
-/* Does this register contain a constant zero? */
-static bool register_is_null(struct bpf_reg_state *reg)
-{
-	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
-}
 
 /* check if register is a constant scalar value */
 static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
@@ -5125,7 +4758,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 	 * so it's aligned access and [off, off + size) are within stack limits
 	 */
 	if (!env->allow_ptr_leaks &&
-	    is_spilled_reg(&state->stack[spi]) &&
+	    bpf_is_spilled_reg(&state->stack[spi]) &&
 	    !is_spilled_scalar_reg(&state->stack[spi]) &&
 	    size != BPF_REG_SIZE) {
 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
@@ -5194,7 +4827,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 		scrub_special_slot(state, spi);
 
 		/* when we zero initialize stack slots mark them as such */
-		if ((reg && register_is_null(reg)) ||
+		if ((reg && bpf_register_is_null(reg)) ||
 		    (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
 			/* STACK_ZERO case happened because register spill
 			 * wasn't properly aligned at the stack slot boundary,
@@ -5215,7 +4848,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 	}
 
 	if (insn_flags)
-		return push_jmp_history(env, env->cur_state, insn_flags, 0);
+		return bpf_push_jmp_history(env, env->cur_state, insn_flags, 0);
 	return 0;
 }
 
@@ -5260,14 +4893,14 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
 	max_off = ptr_reg->smax_value + off + size;
 	if (value_regno >= 0)
 		value_reg = &cur->regs[value_regno];
-	if ((value_reg && register_is_null(value_reg)) ||
+	if ((value_reg && bpf_register_is_null(value_reg)) ||
 	    (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
 		writing_zero = true;
 
 	for (i = min_off; i < max_off; i++) {
 		int spi;
 
-		spi = __get_spi(i);
+		spi = bpf_get_spi(i);
 		err = destroy_if_dynptr_stack_slot(env, state, spi);
 		if (err)
 			return err;
@@ -5316,7 +4949,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
 
 		/*
 		 * Scrub slots if variable-offset stack write goes over spilled pointers.
-		 * Otherwise is_spilled_reg() may == true && spilled_ptr.type == NOT_INIT
+		 * Otherwise bpf_is_spilled_reg() may == true && spilled_ptr.type == NOT_INIT
 		 * and valid program is rejected by check_stack_read_fixed_off()
 		 * with obscure "invalid size of register fill" message.
 		 */
@@ -5420,7 +5053,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 	mark_stack_slot_scratched(env, spi);
 	check_fastcall_stack_contract(env, state, env->insn_idx, off);
 
-	if (is_spilled_reg(&reg_state->stack[spi])) {
+	if (bpf_is_spilled_reg(&reg_state->stack[spi])) {
 		u8 spill_size = 1;
 
 		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
@@ -5543,7 +5176,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 		insn_flags = 0; /* we are not restoring spilled register */
 	}
 	if (insn_flags)
-		return push_jmp_history(env, env->cur_state, insn_flags, 0);
+		return bpf_push_jmp_history(env, env->cur_state, insn_flags, 0);
 	return 0;
 }
 
@@ -5581,7 +5214,7 @@ static int check_stack_read_var_off(struct bpf_verifier_env *env,
 {
 	/* The state of the source register. */
 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
-	struct bpf_func_state *ptr_state = func(env, reg);
+	struct bpf_func_state *ptr_state = bpf_func(env, reg);
 	int err;
 	int min_off, max_off;
 
@@ -5613,7 +5246,7 @@ static int check_stack_read(struct bpf_verifier_env *env,
 			    int dst_regno)
 {
 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	int err;
 	/* Some accesses are only permitted with a static offset. */
 	bool var_off = !tnum_is_const(reg->var_off);
@@ -5669,7 +5302,7 @@ static int check_stack_write(struct bpf_verifier_env *env,
 			     int value_regno, int insn_idx)
 {
 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	int err;
 
 	if (tnum_is_const(reg->var_off)) {
@@ -6066,7 +5699,7 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
 			return ret;
 	} else if (class == BPF_STX) {
 		val_reg = reg_state(env, value_regno);
-		if (!register_is_null(val_reg) &&
+		if (!bpf_register_is_null(val_reg) &&
 		    map_kptr_match_type(env, kptr_field, val_reg, value_regno))
 			return -EACCES;
 	} else if (class == BPF_ST) {
@@ -7532,7 +7165,7 @@ static int check_stack_access_within_bounds(
 		enum bpf_access_type type)
 {
 	struct bpf_reg_state *reg = reg_state(env, regno);
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	s64 min_off, max_off;
 	int err;
 	char *err_extra;
@@ -8118,7 +7751,7 @@ static int check_stack_range_initialized(
 		enum bpf_access_type type, struct bpf_call_arg_meta *meta)
 {
 	struct bpf_reg_state *reg = reg_state(env, regno);
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	int err, min_off, max_off, i, j, slot, spi;
 	/* Some accesses can write anything into the stack, others are
 	 * read-only.
@@ -8190,7 +7823,7 @@ static int check_stack_range_initialized(
 		for (i = min_off; i < max_off + access_size; i++) {
 			int stack_off = -i - 1;
 
-			spi = __get_spi(i);
+			spi = bpf_get_spi(i);
 			/* raw_mode may write past allocated_stack */
 			if (state->allocated_stack <= stack_off)
 				continue;
@@ -8226,7 +7859,7 @@ static int check_stack_range_initialized(
 			goto mark;
 		}
 
-		if (is_spilled_reg(&state->stack[spi]) &&
+		if (bpf_is_spilled_reg(&state->stack[spi]) &&
 		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
 		     env->allow_ptr_leaks)) {
 			if (clobber) {
@@ -8334,7 +7967,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 	default: /* scalar_value or invalid ptr */
 		/* Allow zero-byte read from NULL, regardless of pointer type */
 		if (zero_size_allowed && access_size == 0 &&
-		    register_is_null(reg))
+		    bpf_register_is_null(reg))
 			return 0;
 
 		verbose(env, "R%d type=%s ", regno,
@@ -8407,7 +8040,7 @@ static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg
 	struct bpf_reg_state saved_reg;
 	int err;
 
-	if (register_is_null(reg))
+	if (bpf_register_is_null(reg))
 		return 0;
 
 	/* Assuming that the register contains a value check if the memory
@@ -8833,7 +8466,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn
 
 static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 
 	return state->stack[spi].spilled_ptr.ref_obj_id;
 }
@@ -8965,7 +8598,7 @@ static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
 	struct list_head *pos, *head;
 
 	/* Explored states are pushed in stack order, most recent states come first */
-	head = explored_state(env, insn_idx);
+	head = bpf_explored_state(env, insn_idx);
 	list_for_each(pos, head) {
 		sl = container_of(pos, struct bpf_verifier_state_list, node);
 		/* If st->branches != 0 state is a part of current DFS verification path,
@@ -8980,11 +8613,6 @@ static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
 	return NULL;
 }
 
-static void reset_idmap_scratch(struct bpf_verifier_env *env);
-static bool regs_exact(const struct bpf_reg_state *rold,
-		       const struct bpf_reg_state *rcur,
-		       struct bpf_idmap *idmap);
-
 /*
  * Check if scalar registers are exact for the purpose of not widening.
  * More lenient than regs_exact()
@@ -9026,8 +8654,8 @@ static int widen_imprecise_scalars(struct bpf_verifier_env *env,
 		num_slots = min(fold->allocated_stack / BPF_REG_SIZE,
 				fcur->allocated_stack / BPF_REG_SIZE);
 		for (i = 0; i < num_slots; i++) {
-			if (!is_spilled_reg(&fold->stack[i]) ||
-			    !is_spilled_reg(&fcur->stack[i]))
+			if (!bpf_is_spilled_reg(&fold->stack[i]) ||
+			    !bpf_is_spilled_reg(&fcur->stack[i]))
 				continue;
 
 			maybe_widen_reg(env,
@@ -9620,7 +9248,7 @@ static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
 
 static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	int spi;
 
 	if (reg->type == CONST_PTR_TO_DYNPTR)
@@ -9633,7 +9261,7 @@ static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 
 static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	int spi;
 
 	if (reg->type == CONST_PTR_TO_DYNPTR)
@@ -9647,13 +9275,13 @@ static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state
 static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
 					    struct bpf_reg_state *reg)
 {
-	struct bpf_func_state *state = func(env, reg);
+	struct bpf_func_state *state = bpf_func(env, reg);
 	int spi;
 
 	if (reg->type == CONST_PTR_TO_DYNPTR)
 		return reg->dynptr.type;
 
-	spi = __get_spi(reg->var_off.value);
+	spi = bpf_get_spi(reg->var_off.value);
 	if (spi < 0) {
 		verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
 		return BPF_DYNPTR_TYPE_INVALID;
@@ -9721,7 +9349,7 @@ static int get_constant_map_key(struct bpf_verifier_env *env,
 				u32 key_size,
 				s64 *value)
 {
-	struct bpf_func_state *state = func(env, key);
+	struct bpf_func_state *state = bpf_func(env, key);
 	struct bpf_reg_state *reg;
 	int slot, spi, off;
 	int spill_size = 0;
@@ -9767,7 +9395,7 @@ static int get_constant_map_key(struct bpf_verifier_env *env,
 	/* We are relying on a constant value. So mark as precise
 	 * to prevent pruning on it.
 	 */
-	bt_set_frame_slot(&env->bt, key->frameno, spi);
+	bpf_bt_set_frame_slot(&env->bt, key->frameno, spi);
 	err = mark_chain_precision_batch(env, env->cur_state);
 	if (err < 0)
 		return err;
@@ -9819,7 +9447,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 			return err;
 	}
 
-	if (register_is_null(reg) && type_may_be_null(arg_type))
+	if (bpf_register_is_null(reg) && type_may_be_null(arg_type))
 		/* A NULL register has a SCALAR_VALUE type, so skip
 		 * type checking.
 		 */
@@ -9841,7 +9469,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 skip_type_check:
 	if (arg_type_is_release(arg_type)) {
 		if (arg_type_is_dynptr(arg_type)) {
-			struct bpf_func_state *state = func(env, reg);
+			struct bpf_func_state *state = bpf_func(env, reg);
 			int spi;
 
 			/* Only dynptr created on stack can be released, thus
@@ -9859,7 +9487,7 @@ skip_type_check:
 				verbose(env, "cannot release unowned const bpf_dynptr\n");
 				return -EINVAL;
 			}
-		} else if (!reg->ref_obj_id && !register_is_null(reg)) {
+		} else if (!reg->ref_obj_id && !bpf_register_is_null(reg)) {
 			verbose(env, "R%d must be referenced when passed to release function\n",
 				regno);
 			return -EINVAL;
@@ -9938,7 +9566,7 @@ skip_type_check:
 		}
 		break;
 	case ARG_PTR_TO_MAP_VALUE:
-		if (type_may_be_null(arg_type) && register_is_null(reg))
+		if (type_may_be_null(arg_type) && bpf_register_is_null(reg))
 			return 0;
 
 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
@@ -10543,7 +10171,7 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env,
 
 	/* after the call registers r0 - r5 were scratched */
 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
-		mark_reg_not_init(env, regs, caller_saved[i]);
+		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
 		__check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
 	}
 }
@@ -10682,7 +10310,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
 			struct bpf_call_arg_meta meta;
 			int err;
 
-			if (register_is_null(reg) && type_may_be_null(arg->arg_type))
+			if (bpf_register_is_null(reg) && type_may_be_null(arg->arg_type))
 				continue;
 
 			memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */
@@ -10905,7 +10533,7 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
 	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
 
 	/* unused */
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 	return 0;
 }
 
@@ -10962,9 +10590,9 @@ static int set_loop_callback_state(struct bpf_verifier_env *env,
 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
 
 	/* unused */
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 
 	callee->in_callback_fn = true;
 	callee->callback_ret_range = retval_range(0, 1);
@@ -10994,8 +10622,8 @@ static int set_timer_callback_state(struct bpf_verifier_env *env,
 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
 
 	/* unused */
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 	callee->in_async_callback_fn = true;
 	callee->callback_ret_range = retval_range(0, 0);
 	return 0;
@@ -11022,8 +10650,8 @@ static int set_find_vma_callback_state(struct bpf_verifier_env *env,
 	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
 
 	/* unused */
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 	callee->in_callback_fn = true;
 	callee->callback_ret_range = retval_range(0, 1);
 	return 0;
@@ -11038,14 +10666,14 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
 	 *			  callback_ctx, u64 flags);
 	 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
 	 */
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
 	mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
 
 	/* unused */
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 
 	callee->in_callback_fn = true;
 	callee->callback_ret_range = retval_range(0, 1);
@@ -11077,9 +10705,9 @@ static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
 	mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
 	ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
 
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 	callee->in_callback_fn = true;
 	callee->callback_ret_range = retval_range(0, 1);
 	return 0;
@@ -11108,8 +10736,8 @@ static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env,
 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
 
 	/* unused */
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
-	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 	callee->in_async_callback_fn = true;
 	callee->callback_ret_range = retval_range(S32_MIN, S32_MAX);
 	return 0;
@@ -11486,7 +11114,7 @@ static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env)
 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
 {
 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_4);
-	bool reg_is_null = register_is_null(reg);
+	bool reg_is_null = bpf_register_is_null(reg);
 
 	if (reg_is_null)
 		mark_chain_precision(env, BPF_REG_4);
@@ -11682,7 +11310,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 			}
 		} else if (meta.ref_obj_id) {
 			err = release_reference(env, meta.ref_obj_id);
-		} else if (register_is_null(&regs[meta.release_regno])) {
+		} else if (bpf_register_is_null(&regs[meta.release_regno])) {
 			/* meta.ref_obj_id can only be 0 if register that is meant to be
 			 * released is NULL, which must be > R0.
 			 */
@@ -11705,7 +11333,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		/* check that flags argument in get_local_storage(map, flags) is 0,
 		 * this is required because get_local_storage() can't return an error.
 		 */
-		if (!register_is_null(&regs[BPF_REG_2])) {
+		if (!bpf_register_is_null(&regs[BPF_REG_2])) {
 			verbose(env, "get_local_storage() doesn't support non-zero flags\n");
 			return -EINVAL;
 		}
@@ -11848,7 +11476,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 
 	/* reset caller saved regs */
 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
-		mark_reg_not_init(env, regs, caller_saved[i]);
+		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
 	}
 
@@ -12684,7 +12312,7 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
 	if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
 		return KF_ARG_PTR_TO_CTX;
 
-	if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg) &&
+	if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && bpf_register_is_null(reg) &&
 	    !arg_mem_size)
 		return KF_ARG_PTR_TO_NULL;
 
@@ -13425,7 +13053,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			return -EINVAL;
 		}
 
-		if ((register_is_null(reg) || type_may_be_null(reg->type)) &&
+		if ((bpf_register_is_null(reg) || type_may_be_null(reg->type)) &&
 		    !is_kfunc_arg_nullable(meta->btf, &args[i])) {
 			verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
 			return -EACCES;
@@ -13745,7 +13373,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			struct bpf_reg_state *size_reg = &regs[regno + 1];
 			const struct btf_param *size_arg = &args[i + 1];
 
-			if (!register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) {
+			if (!bpf_register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) {
 				ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
 				if (ret < 0) {
 					verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
@@ -14320,7 +13948,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 
 		/* Clear r0-r5 registers in forked state */
 		for (i = 0; i < CALLER_SAVED_REGS; i++)
-			mark_reg_not_init(env, regs, caller_saved[i]);
+			bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
 
 		mark_reg_unknown(env, regs, BPF_REG_0);
 		err = __mark_reg_s32_range(env, regs, BPF_REG_0, -MAX_ERRNO, -1);
@@ -14498,7 +14126,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 		u32 regno = caller_saved[i];
 
-		mark_reg_not_init(env, regs, regno);
+		bpf_mark_reg_not_init(env, &regs[regno]);
 		regs[regno].subreg_def = DEF_NOT_SUBREG;
 	}
 
@@ -17498,7 +17126,7 @@ static void collect_linked_regs(struct bpf_verifier_env *env,
 
 	id = id & ~BPF_ADD_CONST;
 	for (i = vstate->curframe; i >= 0; i--) {
-		live_regs = aux[frame_insn_idx(vstate, i)].live_regs_before;
+		live_regs = aux[bpf_frame_insn_idx(vstate, i)].live_regs_before;
 		func = vstate->frame[i];
 		for (j = 0; j < BPF_REG_FP; j++) {
 			if (!(live_regs & BIT(j)))
@@ -17507,7 +17135,7 @@ static void collect_linked_regs(struct bpf_verifier_env *env,
 			__collect_linked_regs(linked_regs, reg, id, i, j, true);
 		}
 		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
-			if (!is_spilled_reg(&func->stack[j]))
+			if (!bpf_is_spilled_reg(&func->stack[j]))
 				continue;
 			reg = &func->stack[j].spilled_ptr;
 			__collect_linked_regs(linked_regs, reg, id, i, j, false);
@@ -17652,7 +17280,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	}
 
 	if (insn_flags) {
-		err = push_jmp_history(env, this_branch, insn_flags, 0);
+		err = bpf_push_jmp_history(env, this_branch, insn_flags, 0);
 		if (err)
 			return err;
 	}
@@ -17716,7 +17344,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id)
 		collect_linked_regs(env, this_branch, dst_reg->id, &linked_regs);
 	if (linked_regs.cnt > 1) {
-		err = push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs));
+		err = bpf_push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs));
 		if (err)
 			return err;
 	}
@@ -17796,7 +17424,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	if (!is_jmp32 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
 	    type_may_be_null(dst_reg->type) &&
 	    ((BPF_SRC(insn->code) == BPF_K && insn->imm == 0) ||
-	     (BPF_SRC(insn->code) == BPF_X && register_is_null(src_reg)))) {
+	     (BPF_SRC(insn->code) == BPF_X && bpf_register_is_null(src_reg)))) {
 		/* Mark all identical registers in each branch as either
 		 * safe or unknown depending R == 0 or R != 0 conditional.
 		 */
@@ -17988,7 +17616,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 
 	/* reset caller saved regs to unreadable */
 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
-		mark_reg_not_init(env, regs, caller_saved[i]);
+		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
 	}
 
@@ -18996,197 +18624,6 @@ static int check_btf_info(struct bpf_verifier_env *env,
 	return 0;
 }
 
-/* check %cur's range satisfies %old's */
-static bool range_within(const struct bpf_reg_state *old,
-			 const struct bpf_reg_state *cur)
-{
-	return old->umin_value <= cur->umin_value &&
-	       old->umax_value >= cur->umax_value &&
-	       old->smin_value <= cur->smin_value &&
-	       old->smax_value >= cur->smax_value &&
-	       old->u32_min_value <= cur->u32_min_value &&
-	       old->u32_max_value >= cur->u32_max_value &&
-	       old->s32_min_value <= cur->s32_min_value &&
-	       old->s32_max_value >= cur->s32_max_value;
-}
-
-/* If in the old state two registers had the same id, then they need to have
- * the same id in the new state as well.  But that id could be different from
- * the old state, so we need to track the mapping from old to new ids.
- * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
- * regs with old id 5 must also have new id 9 for the new state to be safe.  But
- * regs with a different old id could still have new id 9, we don't care about
- * that.
- * So we look through our idmap to see if this old id has been seen before.  If
- * so, we require the new id to match; otherwise, we add the id pair to the map.
- */
-static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
-{
-	struct bpf_id_pair *map = idmap->map;
-	unsigned int i;
-
-	/* either both IDs should be set or both should be zero */
-	if (!!old_id != !!cur_id)
-		return false;
-
-	if (old_id == 0) /* cur_id == 0 as well */
-		return true;
-
-	for (i = 0; i < idmap->cnt; i++) {
-		if (map[i].old == old_id)
-			return map[i].cur == cur_id;
-		if (map[i].cur == cur_id)
-			return false;
-	}
-
-	/* Reached the end of known mappings; haven't seen this id before */
-	if (idmap->cnt < BPF_ID_MAP_SIZE) {
-		map[idmap->cnt].old = old_id;
-		map[idmap->cnt].cur = cur_id;
-		idmap->cnt++;
-		return true;
-	}
-
-	/* We ran out of idmap slots, which should be impossible */
-	WARN_ON_ONCE(1);
-	return false;
-}
-
-/*
- * Compare scalar register IDs for state equivalence.
- *
- * When old_id == 0, the old register is independent - not linked to any
- * other register. Any linking in the current state only adds constraints,
- * making it more restrictive. Since the old state didn't rely on any ID
- * relationships for this register, it's always safe to accept cur regardless
- * of its ID. Hence, return true immediately.
- *
- * When old_id != 0 but cur_id == 0, we need to ensure that different
- * independent registers in cur don't incorrectly satisfy the ID matching
- * requirements of linked registers in old.
- *
- * Example: if old has r6.id=X and r7.id=X (linked), but cur has r6.id=0
- * and r7.id=0 (both independent), without temp IDs both would map old_id=X
- * to cur_id=0 and pass. With temp IDs: r6 maps X->temp1, r7 tries to map
- * X->temp2, but X is already mapped to temp1, so the check fails correctly.
- *
- * When old_id has BPF_ADD_CONST set, the compound id (base | flag) and the
- * base id (flag stripped) must both map consistently. Example: old has
- * r2.id=A, r3.id=A|flag (r3 = r2 + delta), cur has r2.id=B, r3.id=C|flag
- * (r3 derived from unrelated r4). Without the base check, idmap gets two
- * independent entries A->B and A|flag->C|flag, missing that A->C conflicts
- * with A->B. The base ID cross-check catches this.
- */
-static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
-{
-	if (!old_id)
-		return true;
-
-	cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
-
-	if (!check_ids(old_id, cur_id, idmap))
-		return false;
-	if (old_id & BPF_ADD_CONST) {
-		old_id &= ~BPF_ADD_CONST;
-		cur_id &= ~BPF_ADD_CONST;
-		if (!check_ids(old_id, cur_id, idmap))
-			return false;
-	}
-	return true;
-}
-
-static void __clean_func_state(struct bpf_verifier_env *env,
-			       struct bpf_func_state *st,
-			       u16 live_regs, int frame)
-{
-	int i, j;
-
-	for (i = 0; i < BPF_REG_FP; i++) {
-		/* liveness must not touch this register anymore */
-		if (!(live_regs & BIT(i)))
-			/* since the register is unused, clear its state
-			 * to make further comparison simpler
-			 */
-			__mark_reg_not_init(env, &st->regs[i]);
-	}
-
-	/*
-	 * Clean dead 4-byte halves within each SPI independently.
-	 * half_spi 2*i   → lower half: slot_type[0..3] (closer to FP)
-	 * half_spi 2*i+1 → upper half: slot_type[4..7] (farther from FP)
-	 */
-	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
-		bool lo_live = bpf_stack_slot_alive(env, frame, i * 2);
-		bool hi_live = bpf_stack_slot_alive(env, frame, i * 2 + 1);
-
-		if (!hi_live || !lo_live) {
-			int start = !lo_live ? 0 : BPF_REG_SIZE / 2;
-			int end = !hi_live ? BPF_REG_SIZE : BPF_REG_SIZE / 2;
-			u8 stype = st->stack[i].slot_type[7];
-
-			/*
-			 * Don't clear special slots.
-			 * destroy_if_dynptr_stack_slot() needs STACK_DYNPTR to
-			 * detect overwrites and invalidate associated data slices.
-			 * is_iter_reg_valid_uninit() and is_irq_flag_reg_valid_uninit()
-			 * check for their respective slot types to detect double-create.
-			 */
-			if (stype == STACK_DYNPTR || stype == STACK_ITER ||
-			    stype == STACK_IRQ_FLAG)
-				continue;
-
-			/*
-			 * Only destroy spilled_ptr when hi half is dead.
-			 * If hi half is still live with STACK_SPILL, the
-			 * spilled_ptr metadata is needed for correct state
-			 * comparison in stacksafe().
-			 * is_spilled_reg() is using slot_type[7], but
-			 * is_spilled_scalar_after() check either slot_type[0] or [4]
-			 */
-			if (!hi_live) {
-				struct bpf_reg_state *spill = &st->stack[i].spilled_ptr;
-
-				if (lo_live && stype == STACK_SPILL) {
-					u8 val = STACK_MISC;
-
-					/*
-					 * 8 byte spill of scalar 0 where half slot is dead
-					 * should become STACK_ZERO in lo 4 bytes.
-					 */
-					if (register_is_null(spill))
-						val = STACK_ZERO;
-					for (j = 0; j < 4; j++) {
-						u8 *t = &st->stack[i].slot_type[j];
-
-						if (*t == STACK_SPILL)
-							*t = val;
-					}
-				}
-				__mark_reg_not_init(env, spill);
-			}
-			for (j = start; j < end; j++)
-				st->stack[i].slot_type[j] = STACK_POISON;
-		}
-	}
-}
-
-static int clean_verifier_state(struct bpf_verifier_env *env,
-				 struct bpf_verifier_state *st)
-{
-	int i, err;
-
-	err = bpf_live_stack_query_init(env, st);
-	if (err)
-		return err;
-	for (i = 0; i <= st->curframe; i++) {
-		u32 ip = frame_insn_idx(st, i);
-		u16 live_regs = env->insn_aux_data[ip].live_regs_before;
-
-		__clean_func_state(env, st->frame[i], live_regs, i);
-	}
-	return 0;
-}
-
 /* Find id in idset and increment its count, or add new entry */
 static void idset_cnt_inc(struct bpf_idset *idset, u32 id)
 {
@@ -19223,8 +18660,8 @@ static u32 idset_cnt_get(struct bpf_idset *idset, u32 id)
  * A register with a non-zero id is called singular if no other register shares
  * the same base id. Such registers can be treated as independent (id=0).
  */
-static void clear_singular_ids(struct bpf_verifier_env *env,
-			       struct bpf_verifier_state *st)
+void bpf_clear_singular_ids(struct bpf_verifier_env *env,
+			    struct bpf_verifier_state *st)
 {
 	struct bpf_idset *idset = &env->idset_scratch;
 	struct bpf_func_state *func;
@@ -19250,1054 +18687,6 @@ static void clear_singular_ids(struct bpf_verifier_env *env,
 	}));
 }
 
-static bool regs_exact(const struct bpf_reg_state *rold,
-		       const struct bpf_reg_state *rcur,
-		       struct bpf_idmap *idmap)
-{
-	return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
-	       check_ids(rold->id, rcur->id, idmap) &&
-	       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
-}
-
-enum exact_level {
-	NOT_EXACT,
-	EXACT,
-	RANGE_WITHIN
-};
-
-/* Returns true if (rold safe implies rcur safe) */
-static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
-		    struct bpf_reg_state *rcur, struct bpf_idmap *idmap,
-		    enum exact_level exact)
-{
-	if (exact == EXACT)
-		return regs_exact(rold, rcur, idmap);
-
-	if (rold->type == NOT_INIT)
-		/* explored state can't have used this */
-		return true;
-
-	/* Enforce that register types have to match exactly, including their
-	 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
-	 * rule.
-	 *
-	 * One can make a point that using a pointer register as unbounded
-	 * SCALAR would be technically acceptable, but this could lead to
-	 * pointer leaks because scalars are allowed to leak while pointers
-	 * are not. We could make this safe in special cases if root is
-	 * calling us, but it's probably not worth the hassle.
-	 *
-	 * Also, register types that are *not* MAYBE_NULL could technically be
-	 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
-	 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
-	 * to the same map).
-	 * However, if the old MAYBE_NULL register then got NULL checked,
-	 * doing so could have affected others with the same id, and we can't
-	 * check for that because we lost the id when we converted to
-	 * a non-MAYBE_NULL variant.
-	 * So, as a general rule we don't allow mixing MAYBE_NULL and
-	 * non-MAYBE_NULL registers as well.
-	 */
-	if (rold->type != rcur->type)
-		return false;
-
-	switch (base_type(rold->type)) {
-	case SCALAR_VALUE:
-		if (env->explore_alu_limits) {
-			/* explore_alu_limits disables tnum_in() and range_within()
-			 * logic and requires everything to be strict
-			 */
-			return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
-			       check_scalar_ids(rold->id, rcur->id, idmap);
-		}
-		if (!rold->precise && exact == NOT_EXACT)
-			return true;
-		/*
-		 * Linked register tracking uses rold->id to detect relationships.
-		 * When rold->id == 0, the register is independent and any linking
-		 * in rcur only adds constraints. When rold->id != 0, we must verify
-		 * id mapping and (for BPF_ADD_CONST) offset consistency.
-		 *
-		 * +------------------+-----------+------------------+---------------+
-		 * |                  | rold->id  | rold + ADD_CONST | rold->id == 0 |
-		 * |------------------+-----------+------------------+---------------|
-		 * | rcur->id         | range,ids | false            | range         |
-		 * | rcur + ADD_CONST | false     | range,ids,off    | range         |
-		 * | rcur->id == 0    | range,ids | false            | range         |
-		 * +------------------+-----------+------------------+---------------+
-		 *
-		 * Why check_ids() for scalar registers?
-		 *
-		 * Consider the following BPF code:
-		 *   1: r6 = ... unbound scalar, ID=a ...
-		 *   2: r7 = ... unbound scalar, ID=b ...
-		 *   3: if (r6 > r7) goto +1
-		 *   4: r6 = r7
-		 *   5: if (r6 > X) goto ...
-		 *   6: ... memory operation using r7 ...
-		 *
-		 * First verification path is [1-6]:
-		 * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
-		 * - at (5) r6 would be marked <= X, sync_linked_regs() would also mark
-		 *   r7 <= X, because r6 and r7 share same id.
-		 * Next verification path is [1-4, 6].
-		 *
-		 * Instruction (6) would be reached in two states:
-		 *   I.  r6{.id=b}, r7{.id=b} via path 1-6;
-		 *   II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
-		 *
-		 * Use check_ids() to distinguish these states.
-		 * ---
-		 * Also verify that new value satisfies old value range knowledge.
-		 */
-
-		/*
-		 * ADD_CONST flags must match exactly: BPF_ADD_CONST32 and
-		 * BPF_ADD_CONST64 have different linking semantics in
-		 * sync_linked_regs() (alu32 zero-extends, alu64 does not),
-		 * so pruning across different flag types is unsafe.
-		 */
-		if (rold->id &&
-		    (rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST))
-			return false;
-
-		/* Both have offset linkage: offsets must match */
-		if ((rold->id & BPF_ADD_CONST) && rold->delta != rcur->delta)
-			return false;
-
-		if (!check_scalar_ids(rold->id, rcur->id, idmap))
-			return false;
-
-		return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
-	case PTR_TO_MAP_KEY:
-	case PTR_TO_MAP_VALUE:
-	case PTR_TO_MEM:
-	case PTR_TO_BUF:
-	case PTR_TO_TP_BUFFER:
-		/* If the new min/max/var_off satisfy the old ones and
-		 * everything else matches, we are OK.
-		 */
-		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
-		       range_within(rold, rcur) &&
-		       tnum_in(rold->var_off, rcur->var_off) &&
-		       check_ids(rold->id, rcur->id, idmap) &&
-		       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
-	case PTR_TO_PACKET_META:
-	case PTR_TO_PACKET:
-		/* We must have at least as much range as the old ptr
-		 * did, so that any accesses which were safe before are
-		 * still safe.  This is true even if old range < old off,
-		 * since someone could have accessed through (ptr - k), or
-		 * even done ptr -= k in a register, to get a safe access.
-		 */
-		if (rold->range < 0 || rcur->range < 0) {
-			/* special case for [BEYOND|AT]_PKT_END */
-			if (rold->range != rcur->range)
-				return false;
-		} else if (rold->range > rcur->range) {
-			return false;
-		}
-		/* id relations must be preserved */
-		if (!check_ids(rold->id, rcur->id, idmap))
-			return false;
-		/* new val must satisfy old val knowledge */
-		return range_within(rold, rcur) &&
-		       tnum_in(rold->var_off, rcur->var_off);
-	case PTR_TO_STACK:
-		/* two stack pointers are equal only if they're pointing to
-		 * the same stack frame, since fp-8 in foo != fp-8 in bar
-		 */
-		return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
-	case PTR_TO_ARENA:
-		return true;
-	case PTR_TO_INSN:
-		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
-		       range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
-	default:
-		return regs_exact(rold, rcur, idmap);
-	}
-}
-
-static struct bpf_reg_state unbound_reg;
-
-static __init int unbound_reg_init(void)
-{
-	__mark_reg_unknown_imprecise(&unbound_reg);
-	return 0;
-}
-late_initcall(unbound_reg_init);
-
-static bool is_stack_misc_after(struct bpf_verifier_env *env,
-				struct bpf_stack_state *stack, int im)
-{
-	u32 i;
-
-	for (i = im; i < ARRAY_SIZE(stack->slot_type); ++i) {
-		if ((stack->slot_type[i] == STACK_MISC) ||
-		    ((stack->slot_type[i] == STACK_INVALID || stack->slot_type[i] == STACK_POISON) &&
-		     env->allow_uninit_stack))
-			continue;
-		return false;
-	}
-
-	return true;
-}
-
-static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env,
-						  struct bpf_stack_state *stack, int im)
-{
-	if (is_spilled_scalar_after(stack, im))
-		return &stack->spilled_ptr;
-
-	if (is_stack_misc_after(env, stack, im))
-		return &unbound_reg;
-
-	return NULL;
-}
-
-static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
-		      struct bpf_func_state *cur, struct bpf_idmap *idmap,
-		      enum exact_level exact)
-{
-	int i, spi;
-
-	/* walk slots of the explored stack and ignore any additional
-	 * slots in the current stack, since explored(safe) state
-	 * didn't use them
-	 */
-	for (i = 0; i < old->allocated_stack; i++) {
-		struct bpf_reg_state *old_reg, *cur_reg;
-		int im = i % BPF_REG_SIZE;
-
-		spi = i / BPF_REG_SIZE;
-
-		if (exact == EXACT) {
-			u8 old_type = old->stack[spi].slot_type[i % BPF_REG_SIZE];
-			u8 cur_type = i < cur->allocated_stack ?
-				      cur->stack[spi].slot_type[i % BPF_REG_SIZE] : STACK_INVALID;
-
-			/* STACK_INVALID and STACK_POISON are equivalent for pruning */
-			if (old_type == STACK_POISON)
-				old_type = STACK_INVALID;
-			if (cur_type == STACK_POISON)
-				cur_type = STACK_INVALID;
-			if (i >= cur->allocated_stack || old_type != cur_type)
-				return false;
-		}
-
-		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID ||
-		    old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_POISON)
-			continue;
-
-		if (env->allow_uninit_stack &&
-		    old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
-			continue;
-
-		/* explored stack has more populated slots than current stack
-		 * and these slots were used
-		 */
-		if (i >= cur->allocated_stack)
-			return false;
-
-		/*
-		 * 64 and 32-bit scalar spills vs MISC/INVALID slots and vice versa.
-		 * Load from MISC/INVALID slots produces unbound scalar.
-		 * Construct a fake register for such stack and call
-		 * regsafe() to ensure scalar ids are compared.
-		 */
-		if (im == 0 || im == 4) {
-			old_reg = scalar_reg_for_stack(env, &old->stack[spi], im);
-			cur_reg = scalar_reg_for_stack(env, &cur->stack[spi], im);
-			if (old_reg && cur_reg) {
-				if (!regsafe(env, old_reg, cur_reg, idmap, exact))
-					return false;
-				i += (im == 0 ? BPF_REG_SIZE - 1 : 3);
-				continue;
-			}
-		}
-
-		/* if old state was safe with misc data in the stack
-		 * it will be safe with zero-initialized stack.
-		 * The opposite is not true
-		 */
-		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
-		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
-			continue;
-		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
-		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
-			/* Ex: old explored (safe) state has STACK_SPILL in
-			 * this stack slot, but current has STACK_MISC ->
-			 * this verifier states are not equivalent,
-			 * return false to continue verification of this path
-			 */
-			return false;
-		if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
-			continue;
-		/* Both old and cur are having same slot_type */
-		switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
-		case STACK_SPILL:
-			/* when explored and current stack slot are both storing
-			 * spilled registers, check that stored pointers types
-			 * are the same as well.
-			 * Ex: explored safe path could have stored
-			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
-			 * but current path has stored:
-			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
-			 * such verifier states are not equivalent.
-			 * return false to continue verification of this path
-			 */
-			if (!regsafe(env, &old->stack[spi].spilled_ptr,
-				     &cur->stack[spi].spilled_ptr, idmap, exact))
-				return false;
-			break;
-		case STACK_DYNPTR:
-			old_reg = &old->stack[spi].spilled_ptr;
-			cur_reg = &cur->stack[spi].spilled_ptr;
-			if (old_reg->dynptr.type != cur_reg->dynptr.type ||
-			    old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
-			    !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
-				return false;
-			break;
-		case STACK_ITER:
-			old_reg = &old->stack[spi].spilled_ptr;
-			cur_reg = &cur->stack[spi].spilled_ptr;
-			/* iter.depth is not compared between states as it
-			 * doesn't matter for correctness and would otherwise
-			 * prevent convergence; we maintain it only to prevent
-			 * infinite loop check triggering, see
-			 * iter_active_depths_differ()
-			 */
-			if (old_reg->iter.btf != cur_reg->iter.btf ||
-			    old_reg->iter.btf_id != cur_reg->iter.btf_id ||
-			    old_reg->iter.state != cur_reg->iter.state ||
-			    /* ignore {old_reg,cur_reg}->iter.depth, see above */
-			    !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
-				return false;
-			break;
-		case STACK_IRQ_FLAG:
-			old_reg = &old->stack[spi].spilled_ptr;
-			cur_reg = &cur->stack[spi].spilled_ptr;
-			if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap) ||
-			    old_reg->irq.kfunc_class != cur_reg->irq.kfunc_class)
-				return false;
-			break;
-		case STACK_MISC:
-		case STACK_ZERO:
-		case STACK_INVALID:
-		case STACK_POISON:
-			continue;
-		/* Ensure that new unhandled slot types return false by default */
-		default:
-			return false;
-		}
-	}
-	return true;
-}
-
-static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur,
-		    struct bpf_idmap *idmap)
-{
-	int i;
-
-	if (old->acquired_refs != cur->acquired_refs)
-		return false;
-
-	if (old->active_locks != cur->active_locks)
-		return false;
-
-	if (old->active_preempt_locks != cur->active_preempt_locks)
-		return false;
-
-	if (old->active_rcu_locks != cur->active_rcu_locks)
-		return false;
-
-	if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap))
-		return false;
-
-	if (!check_ids(old->active_lock_id, cur->active_lock_id, idmap) ||
-	    old->active_lock_ptr != cur->active_lock_ptr)
-		return false;
-
-	for (i = 0; i < old->acquired_refs; i++) {
-		if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) ||
-		    old->refs[i].type != cur->refs[i].type)
-			return false;
-		switch (old->refs[i].type) {
-		case REF_TYPE_PTR:
-		case REF_TYPE_IRQ:
-			break;
-		case REF_TYPE_LOCK:
-		case REF_TYPE_RES_LOCK:
-		case REF_TYPE_RES_LOCK_IRQ:
-			if (old->refs[i].ptr != cur->refs[i].ptr)
-				return false;
-			break;
-		default:
-			WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type);
-			return false;
-		}
-	}
-
-	return true;
-}
-
-/* compare two verifier states
- *
- * all states stored in state_list are known to be valid, since
- * verifier reached 'bpf_exit' instruction through them
- *
- * this function is called when verifier exploring different branches of
- * execution popped from the state stack. If it sees an old state that has
- * more strict register state and more strict stack state then this execution
- * branch doesn't need to be explored further, since verifier already
- * concluded that more strict state leads to valid finish.
- *
- * Therefore two states are equivalent if register state is more conservative
- * and explored stack state is more conservative than the current one.
- * Example:
- *       explored                   current
- * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
- * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
- *
- * In other words if current stack state (one being explored) has more
- * valid slots than old one that already passed validation, it means
- * the verifier can stop exploring and conclude that current state is valid too
- *
- * Similarly with registers. If explored state has register type as invalid
- * whereas register type in current state is meaningful, it means that
- * the current state will reach 'bpf_exit' instruction safely
- */
-static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
-			      struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact)
-{
-	u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before;
-	u16 i;
-
-	if (old->callback_depth > cur->callback_depth)
-		return false;
-
-	for (i = 0; i < MAX_BPF_REG; i++)
-		if (((1 << i) & live_regs) &&
-		    !regsafe(env, &old->regs[i], &cur->regs[i],
-			     &env->idmap_scratch, exact))
-			return false;
-
-	if (!stacksafe(env, old, cur, &env->idmap_scratch, exact))
-		return false;
-
-	return true;
-}
-
-static void reset_idmap_scratch(struct bpf_verifier_env *env)
-{
-	struct bpf_idmap *idmap = &env->idmap_scratch;
-
-	idmap->tmp_id_gen = env->id_gen;
-	idmap->cnt = 0;
-}
-
-static bool states_equal(struct bpf_verifier_env *env,
-			 struct bpf_verifier_state *old,
-			 struct bpf_verifier_state *cur,
-			 enum exact_level exact)
-{
-	u32 insn_idx;
-	int i;
-
-	if (old->curframe != cur->curframe)
-		return false;
-
-	reset_idmap_scratch(env);
-
-	/* Verification state from speculative execution simulation
-	 * must never prune a non-speculative execution one.
-	 */
-	if (old->speculative && !cur->speculative)
-		return false;
-
-	if (old->in_sleepable != cur->in_sleepable)
-		return false;
-
-	if (!refsafe(old, cur, &env->idmap_scratch))
-		return false;
-
-	/* for states to be equal callsites have to be the same
-	 * and all frame states need to be equivalent
-	 */
-	for (i = 0; i <= old->curframe; i++) {
-		insn_idx = frame_insn_idx(old, i);
-		if (old->frame[i]->callsite != cur->frame[i]->callsite)
-			return false;
-		if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact))
-			return false;
-	}
-	return true;
-}
-
-/* find precise scalars in the previous equivalent state and
- * propagate them into the current state
- */
-static int propagate_precision(struct bpf_verifier_env *env,
-			       const struct bpf_verifier_state *old,
-			       struct bpf_verifier_state *cur,
-			       bool *changed)
-{
-	struct bpf_reg_state *state_reg;
-	struct bpf_func_state *state;
-	int i, err = 0, fr;
-	bool first;
-
-	for (fr = old->curframe; fr >= 0; fr--) {
-		state = old->frame[fr];
-		state_reg = state->regs;
-		first = true;
-		for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
-			if (state_reg->type != SCALAR_VALUE ||
-			    !state_reg->precise)
-				continue;
-			if (env->log.level & BPF_LOG_LEVEL2) {
-				if (first)
-					verbose(env, "frame %d: propagating r%d", fr, i);
-				else
-					verbose(env, ",r%d", i);
-			}
-			bt_set_frame_reg(&env->bt, fr, i);
-			first = false;
-		}
-
-		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
-			if (!is_spilled_reg(&state->stack[i]))
-				continue;
-			state_reg = &state->stack[i].spilled_ptr;
-			if (state_reg->type != SCALAR_VALUE ||
-			    !state_reg->precise)
-				continue;
-			if (env->log.level & BPF_LOG_LEVEL2) {
-				if (first)
-					verbose(env, "frame %d: propagating fp%d",
-						fr, (-i - 1) * BPF_REG_SIZE);
-				else
-					verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
-			}
-			bt_set_frame_slot(&env->bt, fr, i);
-			first = false;
-		}
-		if (!first && (env->log.level & BPF_LOG_LEVEL2))
-			verbose(env, "\n");
-	}
-
-	err = __mark_chain_precision(env, cur, -1, changed);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
-
-#define MAX_BACKEDGE_ITERS 64
-
-/* Propagate read and precision marks from visit->backedges[*].state->equal_state
- * to corresponding parent states of visit->backedges[*].state until fixed point is reached,
- * then free visit->backedges.
- * After execution of this function incomplete_read_marks() will return false
- * for all states corresponding to @visit->callchain.
- */
-static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit)
-{
-	struct bpf_scc_backedge *backedge;
-	struct bpf_verifier_state *st;
-	bool changed;
-	int i, err;
-
-	i = 0;
-	do {
-		if (i++ > MAX_BACKEDGE_ITERS) {
-			if (env->log.level & BPF_LOG_LEVEL2)
-				verbose(env, "%s: too many iterations\n", __func__);
-			for (backedge = visit->backedges; backedge; backedge = backedge->next)
-				mark_all_scalars_precise(env, &backedge->state);
-			break;
-		}
-		changed = false;
-		for (backedge = visit->backedges; backedge; backedge = backedge->next) {
-			st = &backedge->state;
-			err = propagate_precision(env, st->equal_state, st, &changed);
-			if (err)
-				return err;
-		}
-	} while (changed);
-
-	free_backedges(visit);
-	return 0;
-}
-
-static bool states_maybe_looping(struct bpf_verifier_state *old,
-				 struct bpf_verifier_state *cur)
-{
-	struct bpf_func_state *fold, *fcur;
-	int i, fr = cur->curframe;
-
-	if (old->curframe != fr)
-		return false;
-
-	fold = old->frame[fr];
-	fcur = cur->frame[fr];
-	for (i = 0; i < MAX_BPF_REG; i++)
-		if (memcmp(&fold->regs[i], &fcur->regs[i],
-			   offsetof(struct bpf_reg_state, frameno)))
-			return false;
-	return true;
-}
-
-static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
-{
-	return env->insn_aux_data[insn_idx].is_iter_next;
-}
-
-/* is_state_visited() handles iter_next() (see process_iter_next_call() for
- * terminology) calls specially: as opposed to bounded BPF loops, it *expects*
- * states to match, which otherwise would look like an infinite loop. So while
- * iter_next() calls are taken care of, we still need to be careful and
- * prevent erroneous and too eager declaration of "infinite loop", when
- * iterators are involved.
- *
- * Here's a situation in pseudo-BPF assembly form:
- *
- *   0: again:                          ; set up iter_next() call args
- *   1:   r1 = &it                      ; <CHECKPOINT HERE>
- *   2:   call bpf_iter_num_next        ; this is iter_next() call
- *   3:   if r0 == 0 goto done
- *   4:   ... something useful here ...
- *   5:   goto again                    ; another iteration
- *   6: done:
- *   7:   r1 = &it
- *   8:   call bpf_iter_num_destroy     ; clean up iter state
- *   9:   exit
- *
- * This is a typical loop. Let's assume that we have a prune point at 1:,
- * before we get to `call bpf_iter_num_next` (e.g., because of that `goto
- * again`, assuming other heuristics don't get in a way).
- *
- * When we first time come to 1:, let's say we have some state X. We proceed
- * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
- * Now we come back to validate that forked ACTIVE state. We proceed through
- * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
- * are converging. But the problem is that we don't know that yet, as this
- * convergence has to happen at iter_next() call site only. So if nothing is
- * done, at 1: verifier will use bounded loop logic and declare infinite
- * looping (and would be *technically* correct, if not for iterator's
- * "eventual sticky NULL" contract, see process_iter_next_call()). But we
- * don't want that. So what we do in process_iter_next_call() when we go on
- * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
- * a different iteration. So when we suspect an infinite loop, we additionally
- * check if any of the *ACTIVE* iterator states depths differ. If yes, we
- * pretend we are not looping and wait for next iter_next() call.
- *
- * This only applies to ACTIVE state. In DRAINED state we don't expect to
- * loop, because that would actually mean infinite loop, as DRAINED state is
- * "sticky", and so we'll keep returning into the same instruction with the
- * same state (at least in one of possible code paths).
- *
- * This approach allows to keep infinite loop heuristic even in the face of
- * active iterator. E.g., C snippet below is and will be detected as
- * infinitely looping:
- *
- *   struct bpf_iter_num it;
- *   int *p, x;
- *
- *   bpf_iter_num_new(&it, 0, 10);
- *   while ((p = bpf_iter_num_next(&t))) {
- *       x = p;
- *       while (x--) {} // <<-- infinite loop here
- *   }
- *
- */
-static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
-{
-	struct bpf_reg_state *slot, *cur_slot;
-	struct bpf_func_state *state;
-	int i, fr;
-
-	for (fr = old->curframe; fr >= 0; fr--) {
-		state = old->frame[fr];
-		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
-			if (state->stack[i].slot_type[0] != STACK_ITER)
-				continue;
-
-			slot = &state->stack[i].spilled_ptr;
-			if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
-				continue;
-
-			cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
-			if (cur_slot->iter.depth != slot->iter.depth)
-				return true;
-		}
-	}
-	return false;
-}
-
-static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
-{
-	struct bpf_verifier_state_list *new_sl;
-	struct bpf_verifier_state_list *sl;
-	struct bpf_verifier_state *cur = env->cur_state, *new;
-	bool force_new_state, add_new_state, loop;
-	int n, err, states_cnt = 0;
-	struct list_head *pos, *tmp, *head;
-
-	force_new_state = env->test_state_freq || bpf_is_force_checkpoint(env, insn_idx) ||
-			  /* Avoid accumulating infinitely long jmp history */
-			  cur->jmp_history_cnt > 40;
-
-	/* bpf progs typically have pruning point every 4 instructions
-	 * http://vger.kernel.org/bpfconf2019.html#session-1
-	 * Do not add new state for future pruning if the verifier hasn't seen
-	 * at least 2 jumps and at least 8 instructions.
-	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
-	 * In tests that amounts to up to 50% reduction into total verifier
-	 * memory consumption and 20% verifier time speedup.
-	 */
-	add_new_state = force_new_state;
-	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
-	    env->insn_processed - env->prev_insn_processed >= 8)
-		add_new_state = true;
-
-	/* keep cleaning the current state as registers/stack become dead */
-	err = clean_verifier_state(env, cur);
-	if (err)
-		return err;
-
-	loop = false;
-	head = explored_state(env, insn_idx);
-	list_for_each_safe(pos, tmp, head) {
-		sl = container_of(pos, struct bpf_verifier_state_list, node);
-		states_cnt++;
-		if (sl->state.insn_idx != insn_idx)
-			continue;
-
-		if (sl->state.branches) {
-			struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
-
-			if (frame->in_async_callback_fn &&
-			    frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
-				/* Different async_entry_cnt means that the verifier is
-				 * processing another entry into async callback.
-				 * Seeing the same state is not an indication of infinite
-				 * loop or infinite recursion.
-				 * But finding the same state doesn't mean that it's safe
-				 * to stop processing the current state. The previous state
-				 * hasn't yet reached bpf_exit, since state.branches > 0.
-				 * Checking in_async_callback_fn alone is not enough either.
-				 * Since the verifier still needs to catch infinite loops
-				 * inside async callbacks.
-				 */
-				goto skip_inf_loop_check;
-			}
-			/* BPF open-coded iterators loop detection is special.
-			 * states_maybe_looping() logic is too simplistic in detecting
-			 * states that *might* be equivalent, because it doesn't know
-			 * about ID remapping, so don't even perform it.
-			 * See process_iter_next_call() and iter_active_depths_differ()
-			 * for overview of the logic. When current and one of parent
-			 * states are detected as equivalent, it's a good thing: we prove
-			 * convergence and can stop simulating further iterations.
-			 * It's safe to assume that iterator loop will finish, taking into
-			 * account iter_next() contract of eventually returning
-			 * sticky NULL result.
-			 *
-			 * Note, that states have to be compared exactly in this case because
-			 * read and precision marks might not be finalized inside the loop.
-			 * E.g. as in the program below:
-			 *
-			 *     1. r7 = -16
-			 *     2. r6 = bpf_get_prandom_u32()
-			 *     3. while (bpf_iter_num_next(&fp[-8])) {
-			 *     4.   if (r6 != 42) {
-			 *     5.     r7 = -32
-			 *     6.     r6 = bpf_get_prandom_u32()
-			 *     7.     continue
-			 *     8.   }
-			 *     9.   r0 = r10
-			 *    10.   r0 += r7
-			 *    11.   r8 = *(u64 *)(r0 + 0)
-			 *    12.   r6 = bpf_get_prandom_u32()
-			 *    13. }
-			 *
-			 * Here verifier would first visit path 1-3, create a checkpoint at 3
-			 * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
-			 * not have read or precision mark for r7 yet, thus inexact states
-			 * comparison would discard current state with r7=-32
-			 * => unsafe memory access at 11 would not be caught.
-			 */
-			if (is_iter_next_insn(env, insn_idx)) {
-				if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
-					struct bpf_func_state *cur_frame;
-					struct bpf_reg_state *iter_state, *iter_reg;
-					int spi;
-
-					cur_frame = cur->frame[cur->curframe];
-					/* btf_check_iter_kfuncs() enforces that
-					 * iter state pointer is always the first arg
-					 */
-					iter_reg = &cur_frame->regs[BPF_REG_1];
-					/* current state is valid due to states_equal(),
-					 * so we can assume valid iter and reg state,
-					 * no need for extra (re-)validations
-					 */
-					spi = __get_spi(iter_reg->var_off.value);
-					iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
-					if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
-						loop = true;
-						goto hit;
-					}
-				}
-				goto skip_inf_loop_check;
-			}
-			if (is_may_goto_insn_at(env, insn_idx)) {
-				if (sl->state.may_goto_depth != cur->may_goto_depth &&
-				    states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
-					loop = true;
-					goto hit;
-				}
-			}
-			if (bpf_calls_callback(env, insn_idx)) {
-				if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
-					loop = true;
-					goto hit;
-				}
-				goto skip_inf_loop_check;
-			}
-			/* attempt to detect infinite loop to avoid unnecessary doomed work */
-			if (states_maybe_looping(&sl->state, cur) &&
-			    states_equal(env, &sl->state, cur, EXACT) &&
-			    !iter_active_depths_differ(&sl->state, cur) &&
-			    sl->state.may_goto_depth == cur->may_goto_depth &&
-			    sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
-				verbose_linfo(env, insn_idx, "; ");
-				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
-				verbose(env, "cur state:");
-				print_verifier_state(env, cur, cur->curframe, true);
-				verbose(env, "old state:");
-				print_verifier_state(env, &sl->state, cur->curframe, true);
-				return -EINVAL;
-			}
-			/* if the verifier is processing a loop, avoid adding new state
-			 * too often, since different loop iterations have distinct
-			 * states and may not help future pruning.
-			 * This threshold shouldn't be too low to make sure that
-			 * a loop with large bound will be rejected quickly.
-			 * The most abusive loop will be:
-			 * r1 += 1
-			 * if r1 < 1000000 goto pc-2
-			 * 1M insn_procssed limit / 100 == 10k peak states.
-			 * This threshold shouldn't be too high either, since states
-			 * at the end of the loop are likely to be useful in pruning.
-			 */
-skip_inf_loop_check:
-			if (!force_new_state &&
-			    env->jmps_processed - env->prev_jmps_processed < 20 &&
-			    env->insn_processed - env->prev_insn_processed < 100)
-				add_new_state = false;
-			goto miss;
-		}
-		/* See comments for mark_all_regs_read_and_precise() */
-		loop = incomplete_read_marks(env, &sl->state);
-		if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) {
-hit:
-			sl->hit_cnt++;
-
-			/* if previous state reached the exit with precision and
-			 * current state is equivalent to it (except precision marks)
-			 * the precision needs to be propagated back in
-			 * the current state.
-			 */
-			err = 0;
-			if (is_jmp_point(env, env->insn_idx))
-				err = push_jmp_history(env, cur, 0, 0);
-			err = err ? : propagate_precision(env, &sl->state, cur, NULL);
-			if (err)
-				return err;
-			/* When processing iterator based loops above propagate_liveness and
-			 * propagate_precision calls are not sufficient to transfer all relevant
-			 * read and precision marks. E.g. consider the following case:
-			 *
-			 *  .-> A --.  Assume the states are visited in the order A, B, C.
-			 *  |   |   |  Assume that state B reaches a state equivalent to state A.
-			 *  |   v   v  At this point, state C is not processed yet, so state A
-			 *  '-- B   C  has not received any read or precision marks from C.
-			 *             Thus, marks propagated from A to B are incomplete.
-			 *
-			 * The verifier mitigates this by performing the following steps:
-			 *
-			 * - Prior to the main verification pass, strongly connected components
-			 *   (SCCs) are computed over the program's control flow graph,
-			 *   intraprocedurally.
-			 *
-			 * - During the main verification pass, `maybe_enter_scc()` checks
-			 *   whether the current verifier state is entering an SCC. If so, an
-			 *   instance of a `bpf_scc_visit` object is created, and the state
-			 *   entering the SCC is recorded as the entry state.
-			 *
-			 * - This instance is associated not with the SCC itself, but with a
-			 *   `bpf_scc_callchain`: a tuple consisting of the call sites leading to
-			 *   the SCC and the SCC id. See `compute_scc_callchain()`.
-			 *
-			 * - When a verification path encounters a `states_equal(...,
-			 *   RANGE_WITHIN)` condition, there exists a call chain describing the
-			 *   current state and a corresponding `bpf_scc_visit` instance. A copy
-			 *   of the current state is created and added to
-			 *   `bpf_scc_visit->backedges`.
-			 *
-			 * - When a verification path terminates, `maybe_exit_scc()` is called
-			 *   from `update_branch_counts()`. For states with `branches == 0`, it
-			 *   checks whether the state is the entry state of any `bpf_scc_visit`
-			 *   instance. If it is, this indicates that all paths originating from
-			 *   this SCC visit have been explored. `propagate_backedges()` is then
-			 *   called, which propagates read and precision marks through the
-			 *   backedges until a fixed point is reached.
-			 *   (In the earlier example, this would propagate marks from A to B,
-			 *    from C to A, and then again from A to B.)
-			 *
-			 * A note on callchains
-			 * --------------------
-			 *
-			 * Consider the following example:
-			 *
-			 *     void foo() { loop { ... SCC#1 ... } }
-			 *     void main() {
-			 *       A: foo();
-			 *       B: ...
-			 *       C: foo();
-			 *     }
-			 *
-			 * Here, there are two distinct callchains leading to SCC#1:
-			 * - (A, SCC#1)
-			 * - (C, SCC#1)
-			 *
-			 * Each callchain identifies a separate `bpf_scc_visit` instance that
-			 * accumulates backedge states. The `propagate_{liveness,precision}()`
-			 * functions traverse the parent state of each backedge state, which
-			 * means these parent states must remain valid (i.e., not freed) while
-			 * the corresponding `bpf_scc_visit` instance exists.
-			 *
-			 * Associating `bpf_scc_visit` instances directly with SCCs instead of
-			 * callchains would break this invariant:
-			 * - States explored during `C: foo()` would contribute backedges to
-			 *   SCC#1, but SCC#1 would only be exited once the exploration of
-			 *   `A: foo()` completes.
-			 * - By that time, the states explored between `A: foo()` and `C: foo()`
-			 *   (i.e., `B: ...`) may have already been freed, causing the parent
-			 *   links for states from `C: foo()` to become invalid.
-			 */
-			if (loop) {
-				struct bpf_scc_backedge *backedge;
-
-				backedge = kzalloc_obj(*backedge,
-						       GFP_KERNEL_ACCOUNT);
-				if (!backedge)
-					return -ENOMEM;
-				err = copy_verifier_state(&backedge->state, cur);
-				backedge->state.equal_state = &sl->state;
-				backedge->state.insn_idx = insn_idx;
-				err = err ?: add_scc_backedge(env, &sl->state, backedge);
-				if (err) {
-					free_verifier_state(&backedge->state, false);
-					kfree(backedge);
-					return err;
-				}
-			}
-			return 1;
-		}
-miss:
-		/* when new state is not going to be added do not increase miss count.
-		 * Otherwise several loop iterations will remove the state
-		 * recorded earlier. The goal of these heuristics is to have
-		 * states from some iterations of the loop (some in the beginning
-		 * and some at the end) to help pruning.
-		 */
-		if (add_new_state)
-			sl->miss_cnt++;
-		/* heuristic to determine whether this state is beneficial
-		 * to keep checking from state equivalence point of view.
-		 * Higher numbers increase max_states_per_insn and verification time,
-		 * but do not meaningfully decrease insn_processed.
-		 * 'n' controls how many times state could miss before eviction.
-		 * Use bigger 'n' for checkpoints because evicting checkpoint states
-		 * too early would hinder iterator convergence.
-		 */
-		n = bpf_is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
-		if (sl->miss_cnt > sl->hit_cnt * n + n) {
-			/* the state is unlikely to be useful. Remove it to
-			 * speed up verification
-			 */
-			sl->in_free_list = true;
-			list_del(&sl->node);
-			list_add(&sl->node, &env->free_list);
-			env->free_list_size++;
-			env->explored_states_size--;
-			maybe_free_verifier_state(env, sl);
-		}
-	}
-
-	if (env->max_states_per_insn < states_cnt)
-		env->max_states_per_insn = states_cnt;
-
-	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
-		return 0;
-
-	if (!add_new_state)
-		return 0;
-
-	/* There were no equivalent states, remember the current one.
-	 * Technically the current state is not proven to be safe yet,
-	 * but it will either reach outer most bpf_exit (which means it's safe)
-	 * or it will be rejected. When there are no loops the verifier won't be
-	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
-	 * again on the way to bpf_exit.
-	 * When looping the sl->state.branches will be > 0 and this state
-	 * will not be considered for equivalence until branches == 0.
-	 */
-	new_sl = kzalloc_obj(struct bpf_verifier_state_list, GFP_KERNEL_ACCOUNT);
-	if (!new_sl)
-		return -ENOMEM;
-	env->total_states++;
-	env->explored_states_size++;
-	update_peak_states(env);
-	env->prev_jmps_processed = env->jmps_processed;
-	env->prev_insn_processed = env->insn_processed;
-
-	/* forget precise markings we inherited, see __mark_chain_precision */
-	if (env->bpf_capable)
-		mark_all_scalars_imprecise(env, cur);
-
-	clear_singular_ids(env, cur);
-
-	/* add new state to the head of linked list */
-	new = &new_sl->state;
-	err = copy_verifier_state(new, cur);
-	if (err) {
-		free_verifier_state(new, false);
-		kfree(new_sl);
-		return err;
-	}
-	new->insn_idx = insn_idx;
-	verifier_bug_if(new->branches != 1, env,
-			"%s:branches_to_explore=%d insn %d",
-			__func__, new->branches, insn_idx);
-	err = maybe_enter_scc(env, new);
-	if (err) {
-		free_verifier_state(new, false);
-		kfree(new_sl);
-		return err;
-	}
-
-	cur->parent = new;
-	cur->first_insn_idx = insn_idx;
-	cur->dfs_depth = new->dfs_depth + 1;
-	clear_jmp_history(cur);
-	list_add(&new_sl->node, head);
-	return 0;
-}
-
 /* Return true if it's OK to have the same insn return a different type. */
 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
 {
@@ -20686,7 +19075,7 @@ static int do_check(struct bpf_verifier_env *env)
 		state->insn_idx = env->insn_idx;
 
 		if (bpf_is_prune_point(env, env->insn_idx)) {
-			err = is_state_visited(env, env->insn_idx);
+			err = bpf_is_state_visited(env, env->insn_idx);
 			if (err < 0)
 				return err;
 			if (err == 1) {
@@ -20704,8 +19093,8 @@ static int do_check(struct bpf_verifier_env *env)
 			}
 		}
 
-		if (is_jmp_point(env, env->insn_idx)) {
-			err = push_jmp_history(env, state, 0, 0);
+		if (bpf_is_jmp_point(env, env->insn_idx)) {
+			err = bpf_push_jmp_history(env, state, 0, 0);
 			if (err)
 				return err;
 		}
@@ -20816,7 +19205,7 @@ static int do_check(struct bpf_verifier_env *env)
 				return -EFAULT;
 process_bpf_exit:
 			mark_verifier_state_scratched(env);
-			err = update_branch_counts(env, env->cur_state);
+			err = bpf_update_branch_counts(env, env->cur_state);
 			if (err)
 				return err;
 			err = pop_stack(env, &prev_insn_idx, &env->insn_idx,
@@ -21623,13 +20012,13 @@ static void free_states(struct bpf_verifier_env *env)
 	struct bpf_scc_info *info;
 	int i, j;
 
-	free_verifier_state(env->cur_state, true);
+	bpf_free_verifier_state(env->cur_state, true);
 	env->cur_state = NULL;
 	while (!pop_stack(env, NULL, NULL, false));
 
 	list_for_each_safe(pos, tmp, &env->free_list) {
 		sl = container_of(pos, struct bpf_verifier_state_list, node);
-		free_verifier_state(&sl->state, false);
+		bpf_free_verifier_state(&sl->state, false);
 		kfree(sl);
 	}
 	INIT_LIST_HEAD(&env->free_list);
@@ -21639,7 +20028,7 @@ static void free_states(struct bpf_verifier_env *env)
 		if (!info)
 			continue;
 		for (j = 0; j < info->num_visits; j++)
-			free_backedges(&info->visits[j]);
+			bpf_free_backedges(&info->visits[j]);
 		kvfree(info);
 		env->scc_info[i] = NULL;
 	}
@@ -21652,7 +20041,7 @@ static void free_states(struct bpf_verifier_env *env)
 
 		list_for_each_safe(pos, tmp, head) {
 			sl = container_of(pos, struct bpf_verifier_state_list, node);
-			free_verifier_state(&sl->state, false);
+			bpf_free_verifier_state(&sl->state, false);
 			kfree(sl);
 		}
 		INIT_LIST_HEAD(&env->explored_states[i]);
-- 
cgit v1.2.3


From ed0b9710bd2efbe663d89728cd9c680c31c6a4e3 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Sun, 12 Apr 2026 08:29:34 -0700
Subject: bpf: Move backtracking logic to backtrack.c

Move precision propagation and backtracking logic to backtrack.c
to reduce verifier.c size.

No functional changes.

Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/20260412152936.54262-6-alexei.starovoitov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |  18 +
 kernel/bpf/Makefile          |   2 +-
 kernel/bpf/backtrack.c       | 934 ++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c        | 948 +------------------------------------------
 4 files changed, 957 insertions(+), 945 deletions(-)
 create mode 100644 kernel/bpf/backtrack.c

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 669453386282..5389af612696 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -279,6 +279,8 @@ static inline void spis_or_range(spis_t *mask, u32 lo, u32 hi)
 			  (1 << BPF_REG_3) | (1 << BPF_REG_4) | \
 			  (1 << BPF_REG_5))
 
+#define BPF_MAIN_FUNC (-1)
+
 #define BPF_DYNPTR_SIZE		sizeof(struct bpf_dynptr_kern)
 #define BPF_DYNPTR_NR_SLOTS		(BPF_DYNPTR_SIZE / BPF_REG_SIZE)
 
@@ -1079,6 +1081,7 @@ void bpf_free_verifier_state(struct bpf_verifier_state *state, bool free_self);
 void bpf_free_backedges(struct bpf_scc_visit *visit);
 int bpf_push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
 			 int insn_flags, u64 linked_regs);
+void bpf_bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist);
 void bpf_mark_reg_not_init(const struct bpf_verifier_env *env,
 			   struct bpf_reg_state *reg);
 void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg);
@@ -1120,6 +1123,11 @@ static inline bool bpf_is_spilled_reg(const struct bpf_stack_state *stack)
 	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
 }
 
+static inline bool bpf_is_spilled_scalar_reg(const struct bpf_stack_state *stack)
+{
+	return bpf_is_spilled_reg(stack) && stack->spilled_ptr.type == SCALAR_VALUE;
+}
+
 static inline bool bpf_register_is_null(struct bpf_reg_state *reg)
 {
 	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
@@ -1135,6 +1143,16 @@ static inline void bpf_bt_set_frame_slot(struct backtrack_state *bt, u32 frame,
 	bt->stack_masks[frame] |= 1ull << slot;
 }
 
+static inline bool bt_is_frame_reg_set(struct backtrack_state *bt, u32 frame, u32 reg)
+{
+	return bt->reg_masks[frame] & (1 << reg);
+}
+
+static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot)
+{
+	return bt->stack_masks[frame] & (1ull << slot);
+}
+
 bool bpf_map_is_rdonly(const struct bpf_map *map);
 int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
 			bool is_ldsx);
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 3da5dae33827..fd1d901b8d3c 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_insn_array.o
 obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
-obj-$(CONFIG_BPF_SYSCALL) += fixups.o cfg.o states.o
+obj-$(CONFIG_BPF_SYSCALL) += fixups.o cfg.o states.o backtrack.o
 obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o
 obj-$(CONFIG_BPF_JIT) += trampoline.o
diff --git a/kernel/bpf/backtrack.c b/kernel/bpf/backtrack.c
new file mode 100644
index 000000000000..854731dc93fe
--- /dev/null
+++ b/kernel/bpf/backtrack.c
@@ -0,0 +1,934 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/filter.h>
+#include <linux/bitmap.h>
+
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
+/* for any branch, call, exit record the history of jmps in the given state */
+int bpf_push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
+			 int insn_flags, u64 linked_regs)
+{
+	u32 cnt = cur->jmp_history_cnt;
+	struct bpf_jmp_history_entry *p;
+	size_t alloc_size;
+
+	/* combine instruction flags if we already recorded this instruction */
+	if (env->cur_hist_ent) {
+		/* atomic instructions push insn_flags twice, for READ and
+		 * WRITE sides, but they should agree on stack slot
+		 */
+		verifier_bug_if((env->cur_hist_ent->flags & insn_flags) &&
+				(env->cur_hist_ent->flags & insn_flags) != insn_flags,
+				env, "insn history: insn_idx %d cur flags %x new flags %x",
+				env->insn_idx, env->cur_hist_ent->flags, insn_flags);
+		env->cur_hist_ent->flags |= insn_flags;
+		verifier_bug_if(env->cur_hist_ent->linked_regs != 0, env,
+				"insn history: insn_idx %d linked_regs: %#llx",
+				env->insn_idx, env->cur_hist_ent->linked_regs);
+		env->cur_hist_ent->linked_regs = linked_regs;
+		return 0;
+	}
+
+	cnt++;
+	alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
+	p = krealloc(cur->jmp_history, alloc_size, GFP_KERNEL_ACCOUNT);
+	if (!p)
+		return -ENOMEM;
+	cur->jmp_history = p;
+
+	p = &cur->jmp_history[cnt - 1];
+	p->idx = env->insn_idx;
+	p->prev_idx = env->prev_insn_idx;
+	p->flags = insn_flags;
+	p->linked_regs = linked_regs;
+	cur->jmp_history_cnt = cnt;
+	env->cur_hist_ent = p;
+
+	return 0;
+}
+
+static bool is_atomic_load_insn(const struct bpf_insn *insn)
+{
+	return BPF_CLASS(insn->code) == BPF_STX &&
+	       BPF_MODE(insn->code) == BPF_ATOMIC &&
+	       insn->imm == BPF_LOAD_ACQ;
+}
+
+static bool is_atomic_fetch_insn(const struct bpf_insn *insn)
+{
+	return BPF_CLASS(insn->code) == BPF_STX &&
+	       BPF_MODE(insn->code) == BPF_ATOMIC &&
+	       (insn->imm & BPF_FETCH);
+}
+
+static int insn_stack_access_spi(int insn_flags)
+{
+	return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK;
+}
+
+static int insn_stack_access_frameno(int insn_flags)
+{
+	return insn_flags & INSN_F_FRAMENO_MASK;
+}
+
+/* Backtrack one insn at a time. If idx is not at the top of recorded
+ * history then previous instruction came from straight line execution.
+ * Return -ENOENT if we exhausted all instructions within given state.
+ *
+ * It's legal to have a bit of a looping with the same starting and ending
+ * insn index within the same state, e.g.: 3->4->5->3, so just because current
+ * instruction index is the same as state's first_idx doesn't mean we are
+ * done. If there is still some jump history left, we should keep going. We
+ * need to take into account that we might have a jump history between given
+ * state's parent and itself, due to checkpointing. In this case, we'll have
+ * history entry recording a jump from last instruction of parent state and
+ * first instruction of given state.
+ */
+static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
+			     u32 *history)
+{
+	u32 cnt = *history;
+
+	if (i == st->first_insn_idx) {
+		if (cnt == 0)
+			return -ENOENT;
+		if (cnt == 1 && st->jmp_history[0].idx == i)
+			return -ENOENT;
+	}
+
+	if (cnt && st->jmp_history[cnt - 1].idx == i) {
+		i = st->jmp_history[cnt - 1].prev_idx;
+		(*history)--;
+	} else {
+		i--;
+	}
+	return i;
+}
+
+static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st,
+						        u32 hist_end, int insn_idx)
+{
+	if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx)
+		return &st->jmp_history[hist_end - 1];
+	return NULL;
+}
+
+static inline void bt_init(struct backtrack_state *bt, u32 frame)
+{
+	bt->frame = frame;
+}
+
+static inline void bt_reset(struct backtrack_state *bt)
+{
+	struct bpf_verifier_env *env = bt->env;
+
+	memset(bt, 0, sizeof(*bt));
+	bt->env = env;
+}
+
+static inline u32 bt_empty(struct backtrack_state *bt)
+{
+	u64 mask = 0;
+	int i;
+
+	for (i = 0; i <= bt->frame; i++)
+		mask |= bt->reg_masks[i] | bt->stack_masks[i];
+
+	return mask == 0;
+}
+
+static inline int bt_subprog_enter(struct backtrack_state *bt)
+{
+	if (bt->frame == MAX_CALL_FRAMES - 1) {
+		verifier_bug(bt->env, "subprog enter from frame %d", bt->frame);
+		return -EFAULT;
+	}
+	bt->frame++;
+	return 0;
+}
+
+static inline int bt_subprog_exit(struct backtrack_state *bt)
+{
+	if (bt->frame == 0) {
+		verifier_bug(bt->env, "subprog exit from frame 0");
+		return -EFAULT;
+	}
+	bt->frame--;
+	return 0;
+}
+
+static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
+{
+	bt->reg_masks[frame] &= ~(1 << reg);
+}
+
+static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
+{
+	bpf_bt_set_frame_reg(bt, bt->frame, reg);
+}
+
+static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
+{
+	bt_clear_frame_reg(bt, bt->frame, reg);
+}
+
+static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
+{
+	bt->stack_masks[frame] &= ~(1ull << slot);
+}
+
+static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
+{
+	return bt->reg_masks[frame];
+}
+
+static inline u32 bt_reg_mask(struct backtrack_state *bt)
+{
+	return bt->reg_masks[bt->frame];
+}
+
+static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
+{
+	return bt->stack_masks[frame];
+}
+
+static inline u64 bt_stack_mask(struct backtrack_state *bt)
+{
+	return bt->stack_masks[bt->frame];
+}
+
+static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
+{
+	return bt->reg_masks[bt->frame] & (1 << reg);
+}
+
+
+/* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
+static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
+{
+	DECLARE_BITMAP(mask, 64);
+	bool first = true;
+	int i, n;
+
+	buf[0] = '\0';
+
+	bitmap_from_u64(mask, reg_mask);
+	for_each_set_bit(i, mask, 32) {
+		n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i);
+		first = false;
+		buf += n;
+		buf_sz -= n;
+		if (buf_sz < 0)
+			break;
+	}
+}
+/* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
+void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
+{
+	DECLARE_BITMAP(mask, 64);
+	bool first = true;
+	int i, n;
+
+	buf[0] = '\0';
+
+	bitmap_from_u64(mask, stack_mask);
+	for_each_set_bit(i, mask, 64) {
+		n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8);
+		first = false;
+		buf += n;
+		buf_sz -= n;
+		if (buf_sz < 0)
+			break;
+	}
+}
+
+
+/* For given verifier state backtrack_insn() is called from the last insn to
+ * the first insn. Its purpose is to compute a bitmask of registers and
+ * stack slots that needs precision in the parent verifier state.
+ *
+ * @idx is an index of the instruction we are currently processing;
+ * @subseq_idx is an index of the subsequent instruction that:
+ *   - *would be* executed next, if jump history is viewed in forward order;
+ *   - *was* processed previously during backtracking.
+ */
+static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
+			  struct bpf_jmp_history_entry *hist, struct backtrack_state *bt)
+{
+	struct bpf_insn *insn = env->prog->insnsi + idx;
+	u8 class = BPF_CLASS(insn->code);
+	u8 opcode = BPF_OP(insn->code);
+	u8 mode = BPF_MODE(insn->code);
+	u32 dreg = insn->dst_reg;
+	u32 sreg = insn->src_reg;
+	u32 spi, i, fr;
+
+	if (insn->code == 0)
+		return 0;
+	if (env->log.level & BPF_LOG_LEVEL2) {
+		fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
+		verbose(env, "mark_precise: frame%d: regs=%s ",
+			bt->frame, env->tmp_str_buf);
+		bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
+		verbose(env, "stack=%s before ", env->tmp_str_buf);
+		verbose(env, "%d: ", idx);
+		bpf_verbose_insn(env, insn);
+	}
+
+	/* If there is a history record that some registers gained range at this insn,
+	 * propagate precision marks to those registers, so that bt_is_reg_set()
+	 * accounts for these registers.
+	 */
+	bpf_bt_sync_linked_regs(bt, hist);
+
+	if (class == BPF_ALU || class == BPF_ALU64) {
+		if (!bt_is_reg_set(bt, dreg))
+			return 0;
+		if (opcode == BPF_END || opcode == BPF_NEG) {
+			/* sreg is reserved and unused
+			 * dreg still need precision before this insn
+			 */
+			return 0;
+		} else if (opcode == BPF_MOV) {
+			if (BPF_SRC(insn->code) == BPF_X) {
+				/* dreg = sreg or dreg = (s8, s16, s32)sreg
+				 * dreg needs precision after this insn
+				 * sreg needs precision before this insn
+				 */
+				bt_clear_reg(bt, dreg);
+				if (sreg != BPF_REG_FP)
+					bt_set_reg(bt, sreg);
+			} else {
+				/* dreg = K
+				 * dreg needs precision after this insn.
+				 * Corresponding register is already marked
+				 * as precise=true in this verifier state.
+				 * No further markings in parent are necessary
+				 */
+				bt_clear_reg(bt, dreg);
+			}
+		} else {
+			if (BPF_SRC(insn->code) == BPF_X) {
+				/* dreg += sreg
+				 * both dreg and sreg need precision
+				 * before this insn
+				 */
+				if (sreg != BPF_REG_FP)
+					bt_set_reg(bt, sreg);
+			} /* else dreg += K
+			   * dreg still needs precision before this insn
+			   */
+		}
+	} else if (class == BPF_LDX ||
+		   is_atomic_load_insn(insn) ||
+		   is_atomic_fetch_insn(insn)) {
+		u32 load_reg = dreg;
+
+		/*
+		 * Atomic fetch operation writes the old value into
+		 * a register (sreg or r0) and if it was tracked for
+		 * precision, propagate to the stack slot like we do
+		 * in regular ldx.
+		 */
+		if (is_atomic_fetch_insn(insn))
+			load_reg = insn->imm == BPF_CMPXCHG ?
+				   BPF_REG_0 : sreg;
+
+		if (!bt_is_reg_set(bt, load_reg))
+			return 0;
+		bt_clear_reg(bt, load_reg);
+
+		/* scalars can only be spilled into stack w/o losing precision.
+		 * Load from any other memory can be zero extended.
+		 * The desire to keep that precision is already indicated
+		 * by 'precise' mark in corresponding register of this state.
+		 * No further tracking necessary.
+		 */
+		if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
+			return 0;
+		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
+		 * that [fp - off] slot contains scalar that needs to be
+		 * tracked with precision
+		 */
+		spi = insn_stack_access_spi(hist->flags);
+		fr = insn_stack_access_frameno(hist->flags);
+		bpf_bt_set_frame_slot(bt, fr, spi);
+	} else if (class == BPF_STX || class == BPF_ST) {
+		if (bt_is_reg_set(bt, dreg))
+			/* stx & st shouldn't be using _scalar_ dst_reg
+			 * to access memory. It means backtracking
+			 * encountered a case of pointer subtraction.
+			 */
+			return -ENOTSUPP;
+		/* scalars can only be spilled into stack */
+		if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
+			return 0;
+		spi = insn_stack_access_spi(hist->flags);
+		fr = insn_stack_access_frameno(hist->flags);
+		if (!bt_is_frame_slot_set(bt, fr, spi))
+			return 0;
+		bt_clear_frame_slot(bt, fr, spi);
+		if (class == BPF_STX)
+			bt_set_reg(bt, sreg);
+	} else if (class == BPF_JMP || class == BPF_JMP32) {
+		if (bpf_pseudo_call(insn)) {
+			int subprog_insn_idx, subprog;
+
+			subprog_insn_idx = idx + insn->imm + 1;
+			subprog = bpf_find_subprog(env, subprog_insn_idx);
+			if (subprog < 0)
+				return -EFAULT;
+
+			if (bpf_subprog_is_global(env, subprog)) {
+				/* check that jump history doesn't have any
+				 * extra instructions from subprog; the next
+				 * instruction after call to global subprog
+				 * should be literally next instruction in
+				 * caller program
+				 */
+				verifier_bug_if(idx + 1 != subseq_idx, env,
+						"extra insn from subprog");
+				/* r1-r5 are invalidated after subprog call,
+				 * so for global func call it shouldn't be set
+				 * anymore
+				 */
+				if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
+					verifier_bug(env, "global subprog unexpected regs %x",
+						     bt_reg_mask(bt));
+					return -EFAULT;
+				}
+				/* global subprog always sets R0 */
+				bt_clear_reg(bt, BPF_REG_0);
+				return 0;
+			} else {
+				/* static subprog call instruction, which
+				 * means that we are exiting current subprog,
+				 * so only r1-r5 could be still requested as
+				 * precise, r0 and r6-r10 or any stack slot in
+				 * the current frame should be zero by now
+				 */
+				if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
+					verifier_bug(env, "static subprog unexpected regs %x",
+						     bt_reg_mask(bt));
+					return -EFAULT;
+				}
+				/* we are now tracking register spills correctly,
+				 * so any instance of leftover slots is a bug
+				 */
+				if (bt_stack_mask(bt) != 0) {
+					verifier_bug(env,
+						     "static subprog leftover stack slots %llx",
+						     bt_stack_mask(bt));
+					return -EFAULT;
+				}
+				/* propagate r1-r5 to the caller */
+				for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
+					if (bt_is_reg_set(bt, i)) {
+						bt_clear_reg(bt, i);
+						bpf_bt_set_frame_reg(bt, bt->frame - 1, i);
+					}
+				}
+				if (bt_subprog_exit(bt))
+					return -EFAULT;
+				return 0;
+			}
+		} else if (bpf_is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
+			/* exit from callback subprog to callback-calling helper or
+			 * kfunc call. Use idx/subseq_idx check to discern it from
+			 * straight line code backtracking.
+			 * Unlike the subprog call handling above, we shouldn't
+			 * propagate precision of r1-r5 (if any requested), as they are
+			 * not actually arguments passed directly to callback subprogs
+			 */
+			if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
+				verifier_bug(env, "callback unexpected regs %x",
+					     bt_reg_mask(bt));
+				return -EFAULT;
+			}
+			if (bt_stack_mask(bt) != 0) {
+				verifier_bug(env, "callback leftover stack slots %llx",
+					     bt_stack_mask(bt));
+				return -EFAULT;
+			}
+			/* clear r1-r5 in callback subprog's mask */
+			for (i = BPF_REG_1; i <= BPF_REG_5; i++)
+				bt_clear_reg(bt, i);
+			if (bt_subprog_exit(bt))
+				return -EFAULT;
+			return 0;
+		} else if (opcode == BPF_CALL) {
+			/* kfunc with imm==0 is invalid and fixup_kfunc_call will
+			 * catch this error later. Make backtracking conservative
+			 * with ENOTSUPP.
+			 */
+			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
+				return -ENOTSUPP;
+			/* regular helper call sets R0 */
+			bt_clear_reg(bt, BPF_REG_0);
+			if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
+				/* if backtracking was looking for registers R1-R5
+				 * they should have been found already.
+				 */
+				verifier_bug(env, "backtracking call unexpected regs %x",
+					     bt_reg_mask(bt));
+				return -EFAULT;
+			}
+			if (insn->src_reg == BPF_REG_0 && insn->imm == BPF_FUNC_tail_call
+			    && subseq_idx - idx != 1) {
+				if (bt_subprog_enter(bt))
+					return -EFAULT;
+			}
+		} else if (opcode == BPF_EXIT) {
+			bool r0_precise;
+
+			/* Backtracking to a nested function call, 'idx' is a part of
+			 * the inner frame 'subseq_idx' is a part of the outer frame.
+			 * In case of a regular function call, instructions giving
+			 * precision to registers R1-R5 should have been found already.
+			 * In case of a callback, it is ok to have R1-R5 marked for
+			 * backtracking, as these registers are set by the function
+			 * invoking callback.
+			 */
+			if (subseq_idx >= 0 && bpf_calls_callback(env, subseq_idx))
+				for (i = BPF_REG_1; i <= BPF_REG_5; i++)
+					bt_clear_reg(bt, i);
+			if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
+				verifier_bug(env, "backtracking exit unexpected regs %x",
+					     bt_reg_mask(bt));
+				return -EFAULT;
+			}
+
+			/* BPF_EXIT in subprog or callback always returns
+			 * right after the call instruction, so by checking
+			 * whether the instruction at subseq_idx-1 is subprog
+			 * call or not we can distinguish actual exit from
+			 * *subprog* from exit from *callback*. In the former
+			 * case, we need to propagate r0 precision, if
+			 * necessary. In the former we never do that.
+			 */
+			r0_precise = subseq_idx - 1 >= 0 &&
+				     bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) &&
+				     bt_is_reg_set(bt, BPF_REG_0);
+
+			bt_clear_reg(bt, BPF_REG_0);
+			if (bt_subprog_enter(bt))
+				return -EFAULT;
+
+			if (r0_precise)
+				bt_set_reg(bt, BPF_REG_0);
+			/* r6-r9 and stack slots will stay set in caller frame
+			 * bitmasks until we return back from callee(s)
+			 */
+			return 0;
+		} else if (BPF_SRC(insn->code) == BPF_X) {
+			if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg))
+				return 0;
+			/* dreg <cond> sreg
+			 * Both dreg and sreg need precision before
+			 * this insn. If only sreg was marked precise
+			 * before it would be equally necessary to
+			 * propagate it to dreg.
+			 */
+			if (!hist || !(hist->flags & INSN_F_SRC_REG_STACK))
+				bt_set_reg(bt, sreg);
+			if (!hist || !(hist->flags & INSN_F_DST_REG_STACK))
+				bt_set_reg(bt, dreg);
+		} else if (BPF_SRC(insn->code) == BPF_K) {
+			 /* dreg <cond> K
+			  * Only dreg still needs precision before
+			  * this insn, so for the K-based conditional
+			  * there is nothing new to be marked.
+			  */
+		}
+	} else if (class == BPF_LD) {
+		if (!bt_is_reg_set(bt, dreg))
+			return 0;
+		bt_clear_reg(bt, dreg);
+		/* It's ld_imm64 or ld_abs or ld_ind.
+		 * For ld_imm64 no further tracking of precision
+		 * into parent is necessary
+		 */
+		if (mode == BPF_IND || mode == BPF_ABS)
+			/* to be analyzed */
+			return -ENOTSUPP;
+	}
+	/* Propagate precision marks to linked registers, to account for
+	 * registers marked as precise in this function.
+	 */
+	bpf_bt_sync_linked_regs(bt, hist);
+	return 0;
+}
+
+/* the scalar precision tracking algorithm:
+ * . at the start all registers have precise=false.
+ * . scalar ranges are tracked as normal through alu and jmp insns.
+ * . once precise value of the scalar register is used in:
+ *   .  ptr + scalar alu
+ *   . if (scalar cond K|scalar)
+ *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
+ *   backtrack through the verifier states and mark all registers and
+ *   stack slots with spilled constants that these scalar registers
+ *   should be precise.
+ * . during state pruning two registers (or spilled stack slots)
+ *   are equivalent if both are not precise.
+ *
+ * Note the verifier cannot simply walk register parentage chain,
+ * since many different registers and stack slots could have been
+ * used to compute single precise scalar.
+ *
+ * The approach of starting with precise=true for all registers and then
+ * backtrack to mark a register as not precise when the verifier detects
+ * that program doesn't care about specific value (e.g., when helper
+ * takes register as ARG_ANYTHING parameter) is not safe.
+ *
+ * It's ok to walk single parentage chain of the verifier states.
+ * It's possible that this backtracking will go all the way till 1st insn.
+ * All other branches will be explored for needing precision later.
+ *
+ * The backtracking needs to deal with cases like:
+ *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
+ * r9 -= r8
+ * r5 = r9
+ * if r5 > 0x79f goto pc+7
+ *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
+ * r5 += 1
+ * ...
+ * call bpf_perf_event_output#25
+ *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
+ *
+ * and this case:
+ * r6 = 1
+ * call foo // uses callee's r6 inside to compute r0
+ * r0 += r6
+ * if r0 == 0 goto
+ *
+ * to track above reg_mask/stack_mask needs to be independent for each frame.
+ *
+ * Also if parent's curframe > frame where backtracking started,
+ * the verifier need to mark registers in both frames, otherwise callees
+ * may incorrectly prune callers. This is similar to
+ * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
+ *
+ * For now backtracking falls back into conservative marking.
+ */
+void bpf_mark_all_scalars_precise(struct bpf_verifier_env *env,
+				 struct bpf_verifier_state *st)
+{
+	struct bpf_func_state *func;
+	struct bpf_reg_state *reg;
+	int i, j;
+
+	if (env->log.level & BPF_LOG_LEVEL2) {
+		verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
+			st->curframe);
+	}
+
+	/* big hammer: mark all scalars precise in this path.
+	 * pop_stack may still get !precise scalars.
+	 * We also skip current state and go straight to first parent state,
+	 * because precision markings in current non-checkpointed state are
+	 * not needed. See why in the comment in __mark_chain_precision below.
+	 */
+	for (st = st->parent; st; st = st->parent) {
+		for (i = 0; i <= st->curframe; i++) {
+			func = st->frame[i];
+			for (j = 0; j < BPF_REG_FP; j++) {
+				reg = &func->regs[j];
+				if (reg->type != SCALAR_VALUE || reg->precise)
+					continue;
+				reg->precise = true;
+				if (env->log.level & BPF_LOG_LEVEL2) {
+					verbose(env, "force_precise: frame%d: forcing r%d to be precise\n",
+						i, j);
+				}
+			}
+			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
+				if (!bpf_is_spilled_reg(&func->stack[j]))
+					continue;
+				reg = &func->stack[j].spilled_ptr;
+				if (reg->type != SCALAR_VALUE || reg->precise)
+					continue;
+				reg->precise = true;
+				if (env->log.level & BPF_LOG_LEVEL2) {
+					verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n",
+						i, -(j + 1) * 8);
+				}
+			}
+		}
+	}
+}
+
+/*
+ * bpf_mark_chain_precision() backtracks BPF program instruction sequence and
+ * chain of verifier states making sure that register *regno* (if regno >= 0)
+ * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
+ * SCALARS, as well as any other registers and slots that contribute to
+ * a tracked state of given registers/stack slots, depending on specific BPF
+ * assembly instructions (see backtrack_insns() for exact instruction handling
+ * logic). This backtracking relies on recorded jmp_history and is able to
+ * traverse entire chain of parent states. This process ends only when all the
+ * necessary registers/slots and their transitive dependencies are marked as
+ * precise.
+ *
+ * One important and subtle aspect is that precise marks *do not matter* in
+ * the currently verified state (current state). It is important to understand
+ * why this is the case.
+ *
+ * First, note that current state is the state that is not yet "checkpointed",
+ * i.e., it is not yet put into env->explored_states, and it has no children
+ * states as well. It's ephemeral, and can end up either a) being discarded if
+ * compatible explored state is found at some point or BPF_EXIT instruction is
+ * reached or b) checkpointed and put into env->explored_states, branching out
+ * into one or more children states.
+ *
+ * In the former case, precise markings in current state are completely
+ * ignored by state comparison code (see regsafe() for details). Only
+ * checkpointed ("old") state precise markings are important, and if old
+ * state's register/slot is precise, regsafe() assumes current state's
+ * register/slot as precise and checks value ranges exactly and precisely. If
+ * states turn out to be compatible, current state's necessary precise
+ * markings and any required parent states' precise markings are enforced
+ * after the fact with propagate_precision() logic, after the fact. But it's
+ * important to realize that in this case, even after marking current state
+ * registers/slots as precise, we immediately discard current state. So what
+ * actually matters is any of the precise markings propagated into current
+ * state's parent states, which are always checkpointed (due to b) case above).
+ * As such, for scenario a) it doesn't matter if current state has precise
+ * markings set or not.
+ *
+ * Now, for the scenario b), checkpointing and forking into child(ren)
+ * state(s). Note that before current state gets to checkpointing step, any
+ * processed instruction always assumes precise SCALAR register/slot
+ * knowledge: if precise value or range is useful to prune jump branch, BPF
+ * verifier takes this opportunity enthusiastically. Similarly, when
+ * register's value is used to calculate offset or memory address, exact
+ * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
+ * what we mentioned above about state comparison ignoring precise markings
+ * during state comparison, BPF verifier ignores and also assumes precise
+ * markings *at will* during instruction verification process. But as verifier
+ * assumes precision, it also propagates any precision dependencies across
+ * parent states, which are not yet finalized, so can be further restricted
+ * based on new knowledge gained from restrictions enforced by their children
+ * states. This is so that once those parent states are finalized, i.e., when
+ * they have no more active children state, state comparison logic in
+ * is_state_visited() would enforce strict and precise SCALAR ranges, if
+ * required for correctness.
+ *
+ * To build a bit more intuition, note also that once a state is checkpointed,
+ * the path we took to get to that state is not important. This is crucial
+ * property for state pruning. When state is checkpointed and finalized at
+ * some instruction index, it can be correctly and safely used to "short
+ * circuit" any *compatible* state that reaches exactly the same instruction
+ * index. I.e., if we jumped to that instruction from a completely different
+ * code path than original finalized state was derived from, it doesn't
+ * matter, current state can be discarded because from that instruction
+ * forward having a compatible state will ensure we will safely reach the
+ * exit. States describe preconditions for further exploration, but completely
+ * forget the history of how we got here.
+ *
+ * This also means that even if we needed precise SCALAR range to get to
+ * finalized state, but from that point forward *that same* SCALAR register is
+ * never used in a precise context (i.e., it's precise value is not needed for
+ * correctness), it's correct and safe to mark such register as "imprecise"
+ * (i.e., precise marking set to false). This is what we rely on when we do
+ * not set precise marking in current state. If no child state requires
+ * precision for any given SCALAR register, it's safe to dictate that it can
+ * be imprecise. If any child state does require this register to be precise,
+ * we'll mark it precise later retroactively during precise markings
+ * propagation from child state to parent states.
+ *
+ * Skipping precise marking setting in current state is a mild version of
+ * relying on the above observation. But we can utilize this property even
+ * more aggressively by proactively forgetting any precise marking in the
+ * current state (which we inherited from the parent state), right before we
+ * checkpoint it and branch off into new child state. This is done by
+ * mark_all_scalars_imprecise() to hopefully get more permissive and generic
+ * finalized states which help in short circuiting more future states.
+ */
+int bpf_mark_chain_precision(struct bpf_verifier_env *env,
+			    struct bpf_verifier_state *starting_state,
+			    int regno,
+			    bool *changed)
+{
+	struct bpf_verifier_state *st = starting_state;
+	struct backtrack_state *bt = &env->bt;
+	int first_idx = st->first_insn_idx;
+	int last_idx = starting_state->insn_idx;
+	int subseq_idx = -1;
+	struct bpf_func_state *func;
+	bool tmp, skip_first = true;
+	struct bpf_reg_state *reg;
+	int i, fr, err;
+
+	if (!env->bpf_capable)
+		return 0;
+
+	changed = changed ?: &tmp;
+	/* set frame number from which we are starting to backtrack */
+	bt_init(bt, starting_state->curframe);
+
+	/* Do sanity checks against current state of register and/or stack
+	 * slot, but don't set precise flag in current state, as precision
+	 * tracking in the current state is unnecessary.
+	 */
+	func = st->frame[bt->frame];
+	if (regno >= 0) {
+		reg = &func->regs[regno];
+		if (reg->type != SCALAR_VALUE) {
+			verifier_bug(env, "backtracking misuse");
+			return -EFAULT;
+		}
+		bt_set_reg(bt, regno);
+	}
+
+	if (bt_empty(bt))
+		return 0;
+
+	for (;;) {
+		DECLARE_BITMAP(mask, 64);
+		u32 history = st->jmp_history_cnt;
+		struct bpf_jmp_history_entry *hist;
+
+		if (env->log.level & BPF_LOG_LEVEL2) {
+			verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
+				bt->frame, last_idx, first_idx, subseq_idx);
+		}
+
+		if (last_idx < 0) {
+			/* we are at the entry into subprog, which
+			 * is expected for global funcs, but only if
+			 * requested precise registers are R1-R5
+			 * (which are global func's input arguments)
+			 */
+			if (st->curframe == 0 &&
+			    st->frame[0]->subprogno > 0 &&
+			    st->frame[0]->callsite == BPF_MAIN_FUNC &&
+			    bt_stack_mask(bt) == 0 &&
+			    (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) {
+				bitmap_from_u64(mask, bt_reg_mask(bt));
+				for_each_set_bit(i, mask, 32) {
+					reg = &st->frame[0]->regs[i];
+					bt_clear_reg(bt, i);
+					if (reg->type == SCALAR_VALUE) {
+						reg->precise = true;
+						*changed = true;
+					}
+				}
+				return 0;
+			}
+
+			verifier_bug(env, "backtracking func entry subprog %d reg_mask %x stack_mask %llx",
+				     st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
+			return -EFAULT;
+		}
+
+		for (i = last_idx;;) {
+			if (skip_first) {
+				err = 0;
+				skip_first = false;
+			} else {
+				hist = get_jmp_hist_entry(st, history, i);
+				err = backtrack_insn(env, i, subseq_idx, hist, bt);
+			}
+			if (err == -ENOTSUPP) {
+				bpf_mark_all_scalars_precise(env, starting_state);
+				bt_reset(bt);
+				return 0;
+			} else if (err) {
+				return err;
+			}
+			if (bt_empty(bt))
+				/* Found assignment(s) into tracked register in this state.
+				 * Since this state is already marked, just return.
+				 * Nothing to be tracked further in the parent state.
+				 */
+				return 0;
+			subseq_idx = i;
+			i = get_prev_insn_idx(st, i, &history);
+			if (i == -ENOENT)
+				break;
+			if (i >= env->prog->len) {
+				/* This can happen if backtracking reached insn 0
+				 * and there are still reg_mask or stack_mask
+				 * to backtrack.
+				 * It means the backtracking missed the spot where
+				 * particular register was initialized with a constant.
+				 */
+				verifier_bug(env, "backtracking idx %d", i);
+				return -EFAULT;
+			}
+		}
+		st = st->parent;
+		if (!st)
+			break;
+
+		for (fr = bt->frame; fr >= 0; fr--) {
+			func = st->frame[fr];
+			bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
+			for_each_set_bit(i, mask, 32) {
+				reg = &func->regs[i];
+				if (reg->type != SCALAR_VALUE) {
+					bt_clear_frame_reg(bt, fr, i);
+					continue;
+				}
+				if (reg->precise) {
+					bt_clear_frame_reg(bt, fr, i);
+				} else {
+					reg->precise = true;
+					*changed = true;
+				}
+			}
+
+			bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
+			for_each_set_bit(i, mask, 64) {
+				if (verifier_bug_if(i >= func->allocated_stack / BPF_REG_SIZE,
+						    env, "stack slot %d, total slots %d",
+						    i, func->allocated_stack / BPF_REG_SIZE))
+					return -EFAULT;
+
+				if (!bpf_is_spilled_scalar_reg(&func->stack[i])) {
+					bt_clear_frame_slot(bt, fr, i);
+					continue;
+				}
+				reg = &func->stack[i].spilled_ptr;
+				if (reg->precise) {
+					bt_clear_frame_slot(bt, fr, i);
+				} else {
+					reg->precise = true;
+					*changed = true;
+				}
+			}
+			if (env->log.level & BPF_LOG_LEVEL2) {
+				fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
+					     bt_frame_reg_mask(bt, fr));
+				verbose(env, "mark_precise: frame%d: parent state regs=%s ",
+					fr, env->tmp_str_buf);
+				bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
+					       bt_frame_stack_mask(bt, fr));
+				verbose(env, "stack=%s: ", env->tmp_str_buf);
+				print_verifier_state(env, st, fr, true);
+			}
+		}
+
+		if (bt_empty(bt))
+			return 0;
+
+		subseq_idx = first_idx;
+		last_idx = st->last_insn_idx;
+		first_idx = st->first_insn_idx;
+	}
+
+	/* if we still have requested precise regs or slots, we missed
+	 * something (e.g., stack access through non-r10 register), so
+	 * fallback to marking all precise
+	 */
+	if (!bt_empty(bt)) {
+		bpf_mark_all_scalars_precise(env, starting_state);
+		bt_reset(bt);
+	}
+
+	return 0;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 33352f28b339..e210dd7205cf 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -513,20 +513,6 @@ static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
 	return ref_obj_uses > 1;
 }
 
-static bool is_atomic_load_insn(const struct bpf_insn *insn)
-{
-	return BPF_CLASS(insn->code) == BPF_STX &&
-	       BPF_MODE(insn->code) == BPF_ATOMIC &&
-	       insn->imm == BPF_LOAD_ACQ;
-}
-
-static bool is_atomic_fetch_insn(const struct bpf_insn *insn)
-{
-	return BPF_CLASS(insn->code) == BPF_STX &&
-	       BPF_MODE(insn->code) == BPF_ATOMIC &&
-	       (insn->imm & BPF_FETCH);
-}
-
 
 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
 {
@@ -1241,11 +1227,6 @@ static bool is_stack_slot_special(const struct bpf_stack_state *stack)
 /* The reg state of a pointer or a bounded scalar was saved when
  * it was spilled to the stack.
  */
-static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
-{
-	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL &&
-	       stack->spilled_ptr.type == SCALAR_VALUE;
-}
 
 /*
  * Mark stack slot as STACK_MISC, unless it is already:
@@ -2590,7 +2571,6 @@ static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
 	return (struct bpf_retval_range){ minval, maxval, false };
 }
 
-#define BPF_MAIN_FUNC (-1)
 static void init_func_state(struct bpf_verifier_env *env,
 			    struct bpf_func_state *state,
 			    int callsite, int frameno, int subprogno)
@@ -3517,16 +3497,6 @@ static int insn_stack_access_flags(int frameno, int spi)
 	return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno;
 }
 
-static int insn_stack_access_spi(int insn_flags)
-{
-	return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK;
-}
-
-static int insn_stack_access_frameno(int insn_flags)
-{
-	return insn_flags & INSN_F_FRAMENO_MASK;
-}
-
 #define LR_FRAMENO_BITS	3
 #define LR_SPI_BITS	6
 #define LR_ENTRY_BITS	(LR_SPI_BITS + LR_FRAMENO_BITS + 1)
@@ -3605,91 +3575,6 @@ static void linked_regs_unpack(u64 val, struct linked_regs *s)
 	}
 }
 
-/* for any branch, call, exit record the history of jmps in the given state */
-int bpf_push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
-			int insn_flags, u64 linked_regs)
-{
-	u32 cnt = cur->jmp_history_cnt;
-	struct bpf_jmp_history_entry *p;
-	size_t alloc_size;
-
-	/* combine instruction flags if we already recorded this instruction */
-	if (env->cur_hist_ent) {
-		/* atomic instructions push insn_flags twice, for READ and
-		 * WRITE sides, but they should agree on stack slot
-		 */
-		verifier_bug_if((env->cur_hist_ent->flags & insn_flags) &&
-				(env->cur_hist_ent->flags & insn_flags) != insn_flags,
-				env, "insn history: insn_idx %d cur flags %x new flags %x",
-				env->insn_idx, env->cur_hist_ent->flags, insn_flags);
-		env->cur_hist_ent->flags |= insn_flags;
-		verifier_bug_if(env->cur_hist_ent->linked_regs != 0, env,
-				"insn history: insn_idx %d linked_regs: %#llx",
-				env->insn_idx, env->cur_hist_ent->linked_regs);
-		env->cur_hist_ent->linked_regs = linked_regs;
-		return 0;
-	}
-
-	cnt++;
-	alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
-	p = krealloc(cur->jmp_history, alloc_size, GFP_KERNEL_ACCOUNT);
-	if (!p)
-		return -ENOMEM;
-	cur->jmp_history = p;
-
-	p = &cur->jmp_history[cnt - 1];
-	p->idx = env->insn_idx;
-	p->prev_idx = env->prev_insn_idx;
-	p->flags = insn_flags;
-	p->linked_regs = linked_regs;
-	cur->jmp_history_cnt = cnt;
-	env->cur_hist_ent = p;
-
-	return 0;
-}
-
-static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st,
-						        u32 hist_end, int insn_idx)
-{
-	if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx)
-		return &st->jmp_history[hist_end - 1];
-	return NULL;
-}
-
-/* Backtrack one insn at a time. If idx is not at the top of recorded
- * history then previous instruction came from straight line execution.
- * Return -ENOENT if we exhausted all instructions within given state.
- *
- * It's legal to have a bit of a looping with the same starting and ending
- * insn index within the same state, e.g.: 3->4->5->3, so just because current
- * instruction index is the same as state's first_idx doesn't mean we are
- * done. If there is still some jump history left, we should keep going. We
- * need to take into account that we might have a jump history between given
- * state's parent and itself, due to checkpointing. In this case, we'll have
- * history entry recording a jump from last instruction of parent state and
- * first instruction of given state.
- */
-static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
-			     u32 *history)
-{
-	u32 cnt = *history;
-
-	if (i == st->first_insn_idx) {
-		if (cnt == 0)
-			return -ENOENT;
-		if (cnt == 1 && st->jmp_history[0].idx == i)
-			return -ENOENT;
-	}
-
-	if (cnt && st->jmp_history[cnt - 1].idx == i) {
-		i = st->jmp_history[cnt - 1].prev_idx;
-		(*history)--;
-	} else {
-		i--;
-	}
-	return i;
-}
-
 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
 {
 	const struct btf_type *func;
@@ -3717,148 +3602,10 @@ void bpf_verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
 }
 
-static inline void bt_init(struct backtrack_state *bt, u32 frame)
-{
-	bt->frame = frame;
-}
-
-static inline void bt_reset(struct backtrack_state *bt)
-{
-	struct bpf_verifier_env *env = bt->env;
-
-	memset(bt, 0, sizeof(*bt));
-	bt->env = env;
-}
-
-static inline u32 bt_empty(struct backtrack_state *bt)
-{
-	u64 mask = 0;
-	int i;
-
-	for (i = 0; i <= bt->frame; i++)
-		mask |= bt->reg_masks[i] | bt->stack_masks[i];
-
-	return mask == 0;
-}
-
-static inline int bt_subprog_enter(struct backtrack_state *bt)
-{
-	if (bt->frame == MAX_CALL_FRAMES - 1) {
-		verifier_bug(bt->env, "subprog enter from frame %d", bt->frame);
-		return -EFAULT;
-	}
-	bt->frame++;
-	return 0;
-}
-
-static inline int bt_subprog_exit(struct backtrack_state *bt)
-{
-	if (bt->frame == 0) {
-		verifier_bug(bt->env, "subprog exit from frame 0");
-		return -EFAULT;
-	}
-	bt->frame--;
-	return 0;
-}
-
-static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
-{
-	bt->reg_masks[frame] &= ~(1 << reg);
-}
-
-static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
-{
-	bpf_bt_set_frame_reg(bt, bt->frame, reg);
-}
-
-static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
-{
-	bt_clear_frame_reg(bt, bt->frame, reg);
-}
-
-static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
-{
-	bt->stack_masks[frame] &= ~(1ull << slot);
-}
-
-static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
-{
-	return bt->reg_masks[frame];
-}
-
-static inline u32 bt_reg_mask(struct backtrack_state *bt)
-{
-	return bt->reg_masks[bt->frame];
-}
-
-static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
-{
-	return bt->stack_masks[frame];
-}
-
-static inline u64 bt_stack_mask(struct backtrack_state *bt)
-{
-	return bt->stack_masks[bt->frame];
-}
-
-static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
-{
-	return bt->reg_masks[bt->frame] & (1 << reg);
-}
-
-static inline bool bt_is_frame_reg_set(struct backtrack_state *bt, u32 frame, u32 reg)
-{
-	return bt->reg_masks[frame] & (1 << reg);
-}
-
-static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot)
-{
-	return bt->stack_masks[frame] & (1ull << slot);
-}
-
-/* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
-static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
-{
-	DECLARE_BITMAP(mask, 64);
-	bool first = true;
-	int i, n;
-
-	buf[0] = '\0';
-
-	bitmap_from_u64(mask, reg_mask);
-	for_each_set_bit(i, mask, 32) {
-		n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i);
-		first = false;
-		buf += n;
-		buf_sz -= n;
-		if (buf_sz < 0)
-			break;
-	}
-}
-/* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
-void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
-{
-	DECLARE_BITMAP(mask, 64);
-	bool first = true;
-	int i, n;
-
-	buf[0] = '\0';
-
-	bitmap_from_u64(mask, stack_mask);
-	for_each_set_bit(i, mask, 64) {
-		n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8);
-		first = false;
-		buf += n;
-		buf_sz -= n;
-		if (buf_sz < 0)
-			break;
-	}
-}
-
 /* If any register R in hist->linked_regs is marked as precise in bt,
  * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs.
  */
-static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist)
+void bpf_bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist)
 {
 	struct linked_regs linked_regs;
 	bool some_precise = false;
@@ -3891,693 +3638,6 @@ static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_histo
 	}
 }
 
-/* For given verifier state backtrack_insn() is called from the last insn to
- * the first insn. Its purpose is to compute a bitmask of registers and
- * stack slots that needs precision in the parent verifier state.
- *
- * @idx is an index of the instruction we are currently processing;
- * @subseq_idx is an index of the subsequent instruction that:
- *   - *would be* executed next, if jump history is viewed in forward order;
- *   - *was* processed previously during backtracking.
- */
-static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
-			  struct bpf_jmp_history_entry *hist, struct backtrack_state *bt)
-{
-	struct bpf_insn *insn = env->prog->insnsi + idx;
-	u8 class = BPF_CLASS(insn->code);
-	u8 opcode = BPF_OP(insn->code);
-	u8 mode = BPF_MODE(insn->code);
-	u32 dreg = insn->dst_reg;
-	u32 sreg = insn->src_reg;
-	u32 spi, i, fr;
-
-	if (insn->code == 0)
-		return 0;
-	if (env->log.level & BPF_LOG_LEVEL2) {
-		fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
-		verbose(env, "mark_precise: frame%d: regs=%s ",
-			bt->frame, env->tmp_str_buf);
-		bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
-		verbose(env, "stack=%s before ", env->tmp_str_buf);
-		verbose(env, "%d: ", idx);
-		bpf_verbose_insn(env, insn);
-	}
-
-	/* If there is a history record that some registers gained range at this insn,
-	 * propagate precision marks to those registers, so that bt_is_reg_set()
-	 * accounts for these registers.
-	 */
-	bt_sync_linked_regs(bt, hist);
-
-	if (class == BPF_ALU || class == BPF_ALU64) {
-		if (!bt_is_reg_set(bt, dreg))
-			return 0;
-		if (opcode == BPF_END || opcode == BPF_NEG) {
-			/* sreg is reserved and unused
-			 * dreg still need precision before this insn
-			 */
-			return 0;
-		} else if (opcode == BPF_MOV) {
-			if (BPF_SRC(insn->code) == BPF_X) {
-				/* dreg = sreg or dreg = (s8, s16, s32)sreg
-				 * dreg needs precision after this insn
-				 * sreg needs precision before this insn
-				 */
-				bt_clear_reg(bt, dreg);
-				if (sreg != BPF_REG_FP)
-					bt_set_reg(bt, sreg);
-			} else {
-				/* dreg = K
-				 * dreg needs precision after this insn.
-				 * Corresponding register is already marked
-				 * as precise=true in this verifier state.
-				 * No further markings in parent are necessary
-				 */
-				bt_clear_reg(bt, dreg);
-			}
-		} else {
-			if (BPF_SRC(insn->code) == BPF_X) {
-				/* dreg += sreg
-				 * both dreg and sreg need precision
-				 * before this insn
-				 */
-				if (sreg != BPF_REG_FP)
-					bt_set_reg(bt, sreg);
-			} /* else dreg += K
-			   * dreg still needs precision before this insn
-			   */
-		}
-	} else if (class == BPF_LDX ||
-		   is_atomic_load_insn(insn) ||
-		   is_atomic_fetch_insn(insn)) {
-		u32 load_reg = dreg;
-
-		/*
-		 * Atomic fetch operation writes the old value into
-		 * a register (sreg or r0) and if it was tracked for
-		 * precision, propagate to the stack slot like we do
-		 * in regular ldx.
-		 */
-		if (is_atomic_fetch_insn(insn))
-			load_reg = insn->imm == BPF_CMPXCHG ?
-				   BPF_REG_0 : sreg;
-
-		if (!bt_is_reg_set(bt, load_reg))
-			return 0;
-		bt_clear_reg(bt, load_reg);
-
-		/* scalars can only be spilled into stack w/o losing precision.
-		 * Load from any other memory can be zero extended.
-		 * The desire to keep that precision is already indicated
-		 * by 'precise' mark in corresponding register of this state.
-		 * No further tracking necessary.
-		 */
-		if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
-			return 0;
-		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
-		 * that [fp - off] slot contains scalar that needs to be
-		 * tracked with precision
-		 */
-		spi = insn_stack_access_spi(hist->flags);
-		fr = insn_stack_access_frameno(hist->flags);
-		bpf_bt_set_frame_slot(bt, fr, spi);
-	} else if (class == BPF_STX || class == BPF_ST) {
-		if (bt_is_reg_set(bt, dreg))
-			/* stx & st shouldn't be using _scalar_ dst_reg
-			 * to access memory. It means backtracking
-			 * encountered a case of pointer subtraction.
-			 */
-			return -ENOTSUPP;
-		/* scalars can only be spilled into stack */
-		if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
-			return 0;
-		spi = insn_stack_access_spi(hist->flags);
-		fr = insn_stack_access_frameno(hist->flags);
-		if (!bt_is_frame_slot_set(bt, fr, spi))
-			return 0;
-		bt_clear_frame_slot(bt, fr, spi);
-		if (class == BPF_STX)
-			bt_set_reg(bt, sreg);
-	} else if (class == BPF_JMP || class == BPF_JMP32) {
-		if (bpf_pseudo_call(insn)) {
-			int subprog_insn_idx, subprog;
-
-			subprog_insn_idx = idx + insn->imm + 1;
-			subprog = bpf_find_subprog(env, subprog_insn_idx);
-			if (subprog < 0)
-				return -EFAULT;
-
-			if (bpf_subprog_is_global(env, subprog)) {
-				/* check that jump history doesn't have any
-				 * extra instructions from subprog; the next
-				 * instruction after call to global subprog
-				 * should be literally next instruction in
-				 * caller program
-				 */
-				verifier_bug_if(idx + 1 != subseq_idx, env,
-						"extra insn from subprog");
-				/* r1-r5 are invalidated after subprog call,
-				 * so for global func call it shouldn't be set
-				 * anymore
-				 */
-				if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
-					verifier_bug(env, "global subprog unexpected regs %x",
-						     bt_reg_mask(bt));
-					return -EFAULT;
-				}
-				/* global subprog always sets R0 */
-				bt_clear_reg(bt, BPF_REG_0);
-				return 0;
-			} else {
-				/* static subprog call instruction, which
-				 * means that we are exiting current subprog,
-				 * so only r1-r5 could be still requested as
-				 * precise, r0 and r6-r10 or any stack slot in
-				 * the current frame should be zero by now
-				 */
-				if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
-					verifier_bug(env, "static subprog unexpected regs %x",
-						     bt_reg_mask(bt));
-					return -EFAULT;
-				}
-				/* we are now tracking register spills correctly,
-				 * so any instance of leftover slots is a bug
-				 */
-				if (bt_stack_mask(bt) != 0) {
-					verifier_bug(env,
-						     "static subprog leftover stack slots %llx",
-						     bt_stack_mask(bt));
-					return -EFAULT;
-				}
-				/* propagate r1-r5 to the caller */
-				for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
-					if (bt_is_reg_set(bt, i)) {
-						bt_clear_reg(bt, i);
-						bpf_bt_set_frame_reg(bt, bt->frame - 1, i);
-					}
-				}
-				if (bt_subprog_exit(bt))
-					return -EFAULT;
-				return 0;
-			}
-		} else if (bpf_is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
-			/* exit from callback subprog to callback-calling helper or
-			 * kfunc call. Use idx/subseq_idx check to discern it from
-			 * straight line code backtracking.
-			 * Unlike the subprog call handling above, we shouldn't
-			 * propagate precision of r1-r5 (if any requested), as they are
-			 * not actually arguments passed directly to callback subprogs
-			 */
-			if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
-				verifier_bug(env, "callback unexpected regs %x",
-					     bt_reg_mask(bt));
-				return -EFAULT;
-			}
-			if (bt_stack_mask(bt) != 0) {
-				verifier_bug(env, "callback leftover stack slots %llx",
-					     bt_stack_mask(bt));
-				return -EFAULT;
-			}
-			/* clear r1-r5 in callback subprog's mask */
-			for (i = BPF_REG_1; i <= BPF_REG_5; i++)
-				bt_clear_reg(bt, i);
-			if (bt_subprog_exit(bt))
-				return -EFAULT;
-			return 0;
-		} else if (opcode == BPF_CALL) {
-			/* kfunc with imm==0 is invalid and fixup_kfunc_call will
-			 * catch this error later. Make backtracking conservative
-			 * with ENOTSUPP.
-			 */
-			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
-				return -ENOTSUPP;
-			/* regular helper call sets R0 */
-			bt_clear_reg(bt, BPF_REG_0);
-			if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
-				/* if backtracking was looking for registers R1-R5
-				 * they should have been found already.
-				 */
-				verifier_bug(env, "backtracking call unexpected regs %x",
-					     bt_reg_mask(bt));
-				return -EFAULT;
-			}
-			if (insn->src_reg == BPF_REG_0 && insn->imm == BPF_FUNC_tail_call
-			    && subseq_idx - idx != 1) {
-				if (bt_subprog_enter(bt))
-					return -EFAULT;
-			}
-		} else if (opcode == BPF_EXIT) {
-			bool r0_precise;
-
-			/* Backtracking to a nested function call, 'idx' is a part of
-			 * the inner frame 'subseq_idx' is a part of the outer frame.
-			 * In case of a regular function call, instructions giving
-			 * precision to registers R1-R5 should have been found already.
-			 * In case of a callback, it is ok to have R1-R5 marked for
-			 * backtracking, as these registers are set by the function
-			 * invoking callback.
-			 */
-			if (subseq_idx >= 0 && bpf_calls_callback(env, subseq_idx))
-				for (i = BPF_REG_1; i <= BPF_REG_5; i++)
-					bt_clear_reg(bt, i);
-			if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
-				verifier_bug(env, "backtracking exit unexpected regs %x",
-					     bt_reg_mask(bt));
-				return -EFAULT;
-			}
-
-			/* BPF_EXIT in subprog or callback always returns
-			 * right after the call instruction, so by checking
-			 * whether the instruction at subseq_idx-1 is subprog
-			 * call or not we can distinguish actual exit from
-			 * *subprog* from exit from *callback*. In the former
-			 * case, we need to propagate r0 precision, if
-			 * necessary. In the former we never do that.
-			 */
-			r0_precise = subseq_idx - 1 >= 0 &&
-				     bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) &&
-				     bt_is_reg_set(bt, BPF_REG_0);
-
-			bt_clear_reg(bt, BPF_REG_0);
-			if (bt_subprog_enter(bt))
-				return -EFAULT;
-
-			if (r0_precise)
-				bt_set_reg(bt, BPF_REG_0);
-			/* r6-r9 and stack slots will stay set in caller frame
-			 * bitmasks until we return back from callee(s)
-			 */
-			return 0;
-		} else if (BPF_SRC(insn->code) == BPF_X) {
-			if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg))
-				return 0;
-			/* dreg <cond> sreg
-			 * Both dreg and sreg need precision before
-			 * this insn. If only sreg was marked precise
-			 * before it would be equally necessary to
-			 * propagate it to dreg.
-			 */
-			if (!hist || !(hist->flags & INSN_F_SRC_REG_STACK))
-				bt_set_reg(bt, sreg);
-			if (!hist || !(hist->flags & INSN_F_DST_REG_STACK))
-				bt_set_reg(bt, dreg);
-		} else if (BPF_SRC(insn->code) == BPF_K) {
-			 /* dreg <cond> K
-			  * Only dreg still needs precision before
-			  * this insn, so for the K-based conditional
-			  * there is nothing new to be marked.
-			  */
-		}
-	} else if (class == BPF_LD) {
-		if (!bt_is_reg_set(bt, dreg))
-			return 0;
-		bt_clear_reg(bt, dreg);
-		/* It's ld_imm64 or ld_abs or ld_ind.
-		 * For ld_imm64 no further tracking of precision
-		 * into parent is necessary
-		 */
-		if (mode == BPF_IND || mode == BPF_ABS)
-			/* to be analyzed */
-			return -ENOTSUPP;
-	}
-	/* Propagate precision marks to linked registers, to account for
-	 * registers marked as precise in this function.
-	 */
-	bt_sync_linked_regs(bt, hist);
-	return 0;
-}
-
-/* the scalar precision tracking algorithm:
- * . at the start all registers have precise=false.
- * . scalar ranges are tracked as normal through alu and jmp insns.
- * . once precise value of the scalar register is used in:
- *   .  ptr + scalar alu
- *   . if (scalar cond K|scalar)
- *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
- *   backtrack through the verifier states and mark all registers and
- *   stack slots with spilled constants that these scalar registers
- *   should be precise.
- * . during state pruning two registers (or spilled stack slots)
- *   are equivalent if both are not precise.
- *
- * Note the verifier cannot simply walk register parentage chain,
- * since many different registers and stack slots could have been
- * used to compute single precise scalar.
- *
- * The approach of starting with precise=true for all registers and then
- * backtrack to mark a register as not precise when the verifier detects
- * that program doesn't care about specific value (e.g., when helper
- * takes register as ARG_ANYTHING parameter) is not safe.
- *
- * It's ok to walk single parentage chain of the verifier states.
- * It's possible that this backtracking will go all the way till 1st insn.
- * All other branches will be explored for needing precision later.
- *
- * The backtracking needs to deal with cases like:
- *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
- * r9 -= r8
- * r5 = r9
- * if r5 > 0x79f goto pc+7
- *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
- * r5 += 1
- * ...
- * call bpf_perf_event_output#25
- *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
- *
- * and this case:
- * r6 = 1
- * call foo // uses callee's r6 inside to compute r0
- * r0 += r6
- * if r0 == 0 goto
- *
- * to track above reg_mask/stack_mask needs to be independent for each frame.
- *
- * Also if parent's curframe > frame where backtracking started,
- * the verifier need to mark registers in both frames, otherwise callees
- * may incorrectly prune callers. This is similar to
- * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
- *
- * For now backtracking falls back into conservative marking.
- */
-void bpf_mark_all_scalars_precise(struct bpf_verifier_env *env,
-				 struct bpf_verifier_state *st)
-{
-	struct bpf_func_state *func;
-	struct bpf_reg_state *reg;
-	int i, j;
-
-	if (env->log.level & BPF_LOG_LEVEL2) {
-		verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
-			st->curframe);
-	}
-
-	/* big hammer: mark all scalars precise in this path.
-	 * pop_stack may still get !precise scalars.
-	 * We also skip current state and go straight to first parent state,
-	 * because precision markings in current non-checkpointed state are
-	 * not needed. See why in the comment in __mark_chain_precision below.
-	 */
-	for (st = st->parent; st; st = st->parent) {
-		for (i = 0; i <= st->curframe; i++) {
-			func = st->frame[i];
-			for (j = 0; j < BPF_REG_FP; j++) {
-				reg = &func->regs[j];
-				if (reg->type != SCALAR_VALUE || reg->precise)
-					continue;
-				reg->precise = true;
-				if (env->log.level & BPF_LOG_LEVEL2) {
-					verbose(env, "force_precise: frame%d: forcing r%d to be precise\n",
-						i, j);
-				}
-			}
-			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
-				if (!bpf_is_spilled_reg(&func->stack[j]))
-					continue;
-				reg = &func->stack[j].spilled_ptr;
-				if (reg->type != SCALAR_VALUE || reg->precise)
-					continue;
-				reg->precise = true;
-				if (env->log.level & BPF_LOG_LEVEL2) {
-					verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n",
-						i, -(j + 1) * 8);
-				}
-			}
-		}
-	}
-}
-
-/*
- * bpf_mark_chain_precision() backtracks BPF program instruction sequence and
- * chain of verifier states making sure that register *regno* (if regno >= 0)
- * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
- * SCALARS, as well as any other registers and slots that contribute to
- * a tracked state of given registers/stack slots, depending on specific BPF
- * assembly instructions (see backtrack_insns() for exact instruction handling
- * logic). This backtracking relies on recorded jmp_history and is able to
- * traverse entire chain of parent states. This process ends only when all the
- * necessary registers/slots and their transitive dependencies are marked as
- * precise.
- *
- * One important and subtle aspect is that precise marks *do not matter* in
- * the currently verified state (current state). It is important to understand
- * why this is the case.
- *
- * First, note that current state is the state that is not yet "checkpointed",
- * i.e., it is not yet put into env->explored_states, and it has no children
- * states as well. It's ephemeral, and can end up either a) being discarded if
- * compatible explored state is found at some point or BPF_EXIT instruction is
- * reached or b) checkpointed and put into env->explored_states, branching out
- * into one or more children states.
- *
- * In the former case, precise markings in current state are completely
- * ignored by state comparison code (see regsafe() for details). Only
- * checkpointed ("old") state precise markings are important, and if old
- * state's register/slot is precise, regsafe() assumes current state's
- * register/slot as precise and checks value ranges exactly and precisely. If
- * states turn out to be compatible, current state's necessary precise
- * markings and any required parent states' precise markings are enforced
- * after the fact with propagate_precision() logic, after the fact. But it's
- * important to realize that in this case, even after marking current state
- * registers/slots as precise, we immediately discard current state. So what
- * actually matters is any of the precise markings propagated into current
- * state's parent states, which are always checkpointed (due to b) case above).
- * As such, for scenario a) it doesn't matter if current state has precise
- * markings set or not.
- *
- * Now, for the scenario b), checkpointing and forking into child(ren)
- * state(s). Note that before current state gets to checkpointing step, any
- * processed instruction always assumes precise SCALAR register/slot
- * knowledge: if precise value or range is useful to prune jump branch, BPF
- * verifier takes this opportunity enthusiastically. Similarly, when
- * register's value is used to calculate offset or memory address, exact
- * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
- * what we mentioned above about state comparison ignoring precise markings
- * during state comparison, BPF verifier ignores and also assumes precise
- * markings *at will* during instruction verification process. But as verifier
- * assumes precision, it also propagates any precision dependencies across
- * parent states, which are not yet finalized, so can be further restricted
- * based on new knowledge gained from restrictions enforced by their children
- * states. This is so that once those parent states are finalized, i.e., when
- * they have no more active children state, state comparison logic in
- * is_state_visited() would enforce strict and precise SCALAR ranges, if
- * required for correctness.
- *
- * To build a bit more intuition, note also that once a state is checkpointed,
- * the path we took to get to that state is not important. This is crucial
- * property for state pruning. When state is checkpointed and finalized at
- * some instruction index, it can be correctly and safely used to "short
- * circuit" any *compatible* state that reaches exactly the same instruction
- * index. I.e., if we jumped to that instruction from a completely different
- * code path than original finalized state was derived from, it doesn't
- * matter, current state can be discarded because from that instruction
- * forward having a compatible state will ensure we will safely reach the
- * exit. States describe preconditions for further exploration, but completely
- * forget the history of how we got here.
- *
- * This also means that even if we needed precise SCALAR range to get to
- * finalized state, but from that point forward *that same* SCALAR register is
- * never used in a precise context (i.e., it's precise value is not needed for
- * correctness), it's correct and safe to mark such register as "imprecise"
- * (i.e., precise marking set to false). This is what we rely on when we do
- * not set precise marking in current state. If no child state requires
- * precision for any given SCALAR register, it's safe to dictate that it can
- * be imprecise. If any child state does require this register to be precise,
- * we'll mark it precise later retroactively during precise markings
- * propagation from child state to parent states.
- *
- * Skipping precise marking setting in current state is a mild version of
- * relying on the above observation. But we can utilize this property even
- * more aggressively by proactively forgetting any precise marking in the
- * current state (which we inherited from the parent state), right before we
- * checkpoint it and branch off into new child state. This is done by
- * mark_all_scalars_imprecise() to hopefully get more permissive and generic
- * finalized states which help in short circuiting more future states.
- */
-int bpf_mark_chain_precision(struct bpf_verifier_env *env,
-			    struct bpf_verifier_state *starting_state,
-			    int regno,
-			    bool *changed)
-{
-	struct bpf_verifier_state *st = starting_state;
-	struct backtrack_state *bt = &env->bt;
-	int first_idx = st->first_insn_idx;
-	int last_idx = starting_state->insn_idx;
-	int subseq_idx = -1;
-	struct bpf_func_state *func;
-	bool tmp, skip_first = true;
-	struct bpf_reg_state *reg;
-	int i, fr, err;
-
-	if (!env->bpf_capable)
-		return 0;
-
-	changed = changed ?: &tmp;
-	/* set frame number from which we are starting to backtrack */
-	bt_init(bt, starting_state->curframe);
-
-	/* Do sanity checks against current state of register and/or stack
-	 * slot, but don't set precise flag in current state, as precision
-	 * tracking in the current state is unnecessary.
-	 */
-	func = st->frame[bt->frame];
-	if (regno >= 0) {
-		reg = &func->regs[regno];
-		if (reg->type != SCALAR_VALUE) {
-			verifier_bug(env, "backtracking misuse");
-			return -EFAULT;
-		}
-		bt_set_reg(bt, regno);
-	}
-
-	if (bt_empty(bt))
-		return 0;
-
-	for (;;) {
-		DECLARE_BITMAP(mask, 64);
-		u32 history = st->jmp_history_cnt;
-		struct bpf_jmp_history_entry *hist;
-
-		if (env->log.level & BPF_LOG_LEVEL2) {
-			verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
-				bt->frame, last_idx, first_idx, subseq_idx);
-		}
-
-		if (last_idx < 0) {
-			/* we are at the entry into subprog, which
-			 * is expected for global funcs, but only if
-			 * requested precise registers are R1-R5
-			 * (which are global func's input arguments)
-			 */
-			if (st->curframe == 0 &&
-			    st->frame[0]->subprogno > 0 &&
-			    st->frame[0]->callsite == BPF_MAIN_FUNC &&
-			    bt_stack_mask(bt) == 0 &&
-			    (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) {
-				bitmap_from_u64(mask, bt_reg_mask(bt));
-				for_each_set_bit(i, mask, 32) {
-					reg = &st->frame[0]->regs[i];
-					bt_clear_reg(bt, i);
-					if (reg->type == SCALAR_VALUE) {
-						reg->precise = true;
-						*changed = true;
-					}
-				}
-				return 0;
-			}
-
-			verifier_bug(env, "backtracking func entry subprog %d reg_mask %x stack_mask %llx",
-				     st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
-			return -EFAULT;
-		}
-
-		for (i = last_idx;;) {
-			if (skip_first) {
-				err = 0;
-				skip_first = false;
-			} else {
-				hist = get_jmp_hist_entry(st, history, i);
-				err = backtrack_insn(env, i, subseq_idx, hist, bt);
-			}
-			if (err == -ENOTSUPP) {
-				bpf_mark_all_scalars_precise(env, starting_state);
-				bt_reset(bt);
-				return 0;
-			} else if (err) {
-				return err;
-			}
-			if (bt_empty(bt))
-				/* Found assignment(s) into tracked register in this state.
-				 * Since this state is already marked, just return.
-				 * Nothing to be tracked further in the parent state.
-				 */
-				return 0;
-			subseq_idx = i;
-			i = get_prev_insn_idx(st, i, &history);
-			if (i == -ENOENT)
-				break;
-			if (i >= env->prog->len) {
-				/* This can happen if backtracking reached insn 0
-				 * and there are still reg_mask or stack_mask
-				 * to backtrack.
-				 * It means the backtracking missed the spot where
-				 * particular register was initialized with a constant.
-				 */
-				verifier_bug(env, "backtracking idx %d", i);
-				return -EFAULT;
-			}
-		}
-		st = st->parent;
-		if (!st)
-			break;
-
-		for (fr = bt->frame; fr >= 0; fr--) {
-			func = st->frame[fr];
-			bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
-			for_each_set_bit(i, mask, 32) {
-				reg = &func->regs[i];
-				if (reg->type != SCALAR_VALUE) {
-					bt_clear_frame_reg(bt, fr, i);
-					continue;
-				}
-				if (reg->precise) {
-					bt_clear_frame_reg(bt, fr, i);
-				} else {
-					reg->precise = true;
-					*changed = true;
-				}
-			}
-
-			bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
-			for_each_set_bit(i, mask, 64) {
-				if (verifier_bug_if(i >= func->allocated_stack / BPF_REG_SIZE,
-						    env, "stack slot %d, total slots %d",
-						    i, func->allocated_stack / BPF_REG_SIZE))
-					return -EFAULT;
-
-				if (!is_spilled_scalar_reg(&func->stack[i])) {
-					bt_clear_frame_slot(bt, fr, i);
-					continue;
-				}
-				reg = &func->stack[i].spilled_ptr;
-				if (reg->precise) {
-					bt_clear_frame_slot(bt, fr, i);
-				} else {
-					reg->precise = true;
-					*changed = true;
-				}
-			}
-			if (env->log.level & BPF_LOG_LEVEL2) {
-				fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
-					     bt_frame_reg_mask(bt, fr));
-				verbose(env, "mark_precise: frame%d: parent state regs=%s ",
-					fr, env->tmp_str_buf);
-				bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
-					       bt_frame_stack_mask(bt, fr));
-				verbose(env, "stack=%s: ", env->tmp_str_buf);
-				print_verifier_state(env, st, fr, true);
-			}
-		}
-
-		if (bt_empty(bt))
-			return 0;
-
-		subseq_idx = first_idx;
-		last_idx = st->last_insn_idx;
-		first_idx = st->first_insn_idx;
-	}
-
-	/* if we still have requested precise regs or slots, we missed
-	 * something (e.g., stack access through non-r10 register), so
-	 * fallback to marking all precise
-	 */
-	if (!bt_empty(bt)) {
-		bpf_mark_all_scalars_precise(env, starting_state);
-		bt_reset(bt);
-	}
-
-	return 0;
-}
-
 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
 {
 	return bpf_mark_chain_precision(env, env->cur_state, regno, NULL);
@@ -4759,7 +3819,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 	 */
 	if (!env->allow_ptr_leaks &&
 	    bpf_is_spilled_reg(&state->stack[spi]) &&
-	    !is_spilled_scalar_reg(&state->stack[spi]) &&
+	    !bpf_is_spilled_scalar_reg(&state->stack[spi]) &&
 	    size != BPF_REG_SIZE) {
 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
 		return -EACCES;
@@ -4938,7 +3998,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
 		 * maintain the spill type.
 		 */
 		if (writing_zero && *stype == STACK_SPILL &&
-		    is_spilled_scalar_reg(&state->stack[spi])) {
+		    bpf_is_spilled_scalar_reg(&state->stack[spi])) {
 			struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
 
 			if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) {
@@ -9380,7 +8440,7 @@ static int get_constant_map_key(struct bpf_verifier_env *env,
 	}
 
 	/* Check that stack contains a scalar spill of expected size */
-	if (!is_spilled_scalar_reg(&state->stack[spi]))
+	if (!bpf_is_spilled_scalar_reg(&state->stack[spi]))
 		return -EOPNOTSUPP;
 	for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--)
 		spill_size++;
-- 
cgit v1.2.3


From 99a832a2b5b8a8ecc1a2bdd64017892caf4aa096 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Sun, 12 Apr 2026 08:29:35 -0700
Subject: bpf: Move BTF checking logic into check_btf.c

BTF validation logic is independent from the main verifier.
Move it into check_btf.c

Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/20260412152936.54262-7-alexei.starovoitov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |   5 +
 kernel/bpf/Makefile          |   2 +-
 kernel/bpf/check_btf.c       | 463 +++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c        | 459 +-----------------------------------------
 4 files changed, 471 insertions(+), 458 deletions(-)
 create mode 100644 kernel/bpf/check_btf.c

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 5389af612696..53e8664cb566 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -1061,6 +1061,11 @@ static inline void bpf_trampoline_unpack_key(u64 key, u32 *obj_id, u32 *btf_id)
 		*btf_id = key & 0x7FFFFFFF;
 }
 
+int bpf_check_btf_info_early(struct bpf_verifier_env *env,
+			     const union bpf_attr *attr, bpfptr_t uattr);
+int bpf_check_btf_info(struct bpf_verifier_env *env,
+		       const union bpf_attr *attr, bpfptr_t uattr);
+
 int bpf_check_attach_target(struct bpf_verifier_log *log,
 			    const struct bpf_prog *prog,
 			    const struct bpf_prog *tgt_prog,
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index fd1d901b8d3c..399007b67a92 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_insn_array.o
 obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
-obj-$(CONFIG_BPF_SYSCALL) += fixups.o cfg.o states.o backtrack.o
+obj-$(CONFIG_BPF_SYSCALL) += fixups.o cfg.o states.o backtrack.o check_btf.o
 obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o
 obj-$(CONFIG_BPF_JIT) += trampoline.o
diff --git a/kernel/bpf/check_btf.c b/kernel/bpf/check_btf.c
new file mode 100644
index 000000000000..93bebe6fe12e
--- /dev/null
+++ b/kernel/bpf/check_btf.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/filter.h>
+#include <linux/btf.h>
+
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
+static int check_abnormal_return(struct bpf_verifier_env *env)
+{
+	int i;
+
+	for (i = 1; i < env->subprog_cnt; i++) {
+		if (env->subprog_info[i].has_ld_abs) {
+			verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
+			return -EINVAL;
+		}
+		if (env->subprog_info[i].has_tail_call) {
+			verbose(env, "tail_call is not allowed in subprogs without BTF\n");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+/* The minimum supported BTF func info size */
+#define MIN_BPF_FUNCINFO_SIZE	8
+#define MAX_FUNCINFO_REC_SIZE	252
+
+static int check_btf_func_early(struct bpf_verifier_env *env,
+				const union bpf_attr *attr,
+				bpfptr_t uattr)
+{
+	u32 krec_size = sizeof(struct bpf_func_info);
+	const struct btf_type *type, *func_proto;
+	u32 i, nfuncs, urec_size, min_size;
+	struct bpf_func_info *krecord;
+	struct bpf_prog *prog;
+	const struct btf *btf;
+	u32 prev_offset = 0;
+	bpfptr_t urecord;
+	int ret = -ENOMEM;
+
+	nfuncs = attr->func_info_cnt;
+	if (!nfuncs) {
+		if (check_abnormal_return(env))
+			return -EINVAL;
+		return 0;
+	}
+
+	urec_size = attr->func_info_rec_size;
+	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
+	    urec_size > MAX_FUNCINFO_REC_SIZE ||
+	    urec_size % sizeof(u32)) {
+		verbose(env, "invalid func info rec size %u\n", urec_size);
+		return -EINVAL;
+	}
+
+	prog = env->prog;
+	btf = prog->aux->btf;
+
+	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
+	min_size = min_t(u32, krec_size, urec_size);
+
+	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+	if (!krecord)
+		return -ENOMEM;
+
+	for (i = 0; i < nfuncs; i++) {
+		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
+		if (ret) {
+			if (ret == -E2BIG) {
+				verbose(env, "nonzero tailing record in func info");
+				/* set the size kernel expects so loader can zero
+				 * out the rest of the record.
+				 */
+				if (copy_to_bpfptr_offset(uattr,
+							  offsetof(union bpf_attr, func_info_rec_size),
+							  &min_size, sizeof(min_size)))
+					ret = -EFAULT;
+			}
+			goto err_free;
+		}
+
+		if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
+			ret = -EFAULT;
+			goto err_free;
+		}
+
+		/* check insn_off */
+		ret = -EINVAL;
+		if (i == 0) {
+			if (krecord[i].insn_off) {
+				verbose(env,
+					"nonzero insn_off %u for the first func info record",
+					krecord[i].insn_off);
+				goto err_free;
+			}
+		} else if (krecord[i].insn_off <= prev_offset) {
+			verbose(env,
+				"same or smaller insn offset (%u) than previous func info record (%u)",
+				krecord[i].insn_off, prev_offset);
+			goto err_free;
+		}
+
+		/* check type_id */
+		type = btf_type_by_id(btf, krecord[i].type_id);
+		if (!type || !btf_type_is_func(type)) {
+			verbose(env, "invalid type id %d in func info",
+				krecord[i].type_id);
+			goto err_free;
+		}
+
+		func_proto = btf_type_by_id(btf, type->type);
+		if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
+			/* btf_func_check() already verified it during BTF load */
+			goto err_free;
+
+		prev_offset = krecord[i].insn_off;
+		bpfptr_add(&urecord, urec_size);
+	}
+
+	prog->aux->func_info = krecord;
+	prog->aux->func_info_cnt = nfuncs;
+	return 0;
+
+err_free:
+	kvfree(krecord);
+	return ret;
+}
+
+static int check_btf_func(struct bpf_verifier_env *env,
+			  const union bpf_attr *attr,
+			  bpfptr_t uattr)
+{
+	const struct btf_type *type, *func_proto, *ret_type;
+	u32 i, nfuncs, urec_size;
+	struct bpf_func_info *krecord;
+	struct bpf_func_info_aux *info_aux = NULL;
+	struct bpf_prog *prog;
+	const struct btf *btf;
+	bpfptr_t urecord;
+	bool scalar_return;
+	int ret = -ENOMEM;
+
+	nfuncs = attr->func_info_cnt;
+	if (!nfuncs) {
+		if (check_abnormal_return(env))
+			return -EINVAL;
+		return 0;
+	}
+	if (nfuncs != env->subprog_cnt) {
+		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
+		return -EINVAL;
+	}
+
+	urec_size = attr->func_info_rec_size;
+
+	prog = env->prog;
+	btf = prog->aux->btf;
+
+	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
+
+	krecord = prog->aux->func_info;
+	info_aux = kzalloc_objs(*info_aux, nfuncs,
+				GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+	if (!info_aux)
+		return -ENOMEM;
+
+	for (i = 0; i < nfuncs; i++) {
+		/* check insn_off */
+		ret = -EINVAL;
+
+		if (env->subprog_info[i].start != krecord[i].insn_off) {
+			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
+			goto err_free;
+		}
+
+		/* Already checked type_id */
+		type = btf_type_by_id(btf, krecord[i].type_id);
+		info_aux[i].linkage = BTF_INFO_VLEN(type->info);
+		/* Already checked func_proto */
+		func_proto = btf_type_by_id(btf, type->type);
+
+		ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
+		scalar_return =
+			btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
+		if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
+			verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
+			goto err_free;
+		}
+		if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
+			verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
+			goto err_free;
+		}
+
+		env->subprog_info[i].name = btf_name_by_offset(btf, type->name_off);
+		bpfptr_add(&urecord, urec_size);
+	}
+
+	prog->aux->func_info_aux = info_aux;
+	return 0;
+
+err_free:
+	kfree(info_aux);
+	return ret;
+}
+
+#define MIN_BPF_LINEINFO_SIZE	offsetofend(struct bpf_line_info, line_col)
+#define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
+
+static int check_btf_line(struct bpf_verifier_env *env,
+			  const union bpf_attr *attr,
+			  bpfptr_t uattr)
+{
+	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
+	struct bpf_subprog_info *sub;
+	struct bpf_line_info *linfo;
+	struct bpf_prog *prog;
+	const struct btf *btf;
+	bpfptr_t ulinfo;
+	int err;
+
+	nr_linfo = attr->line_info_cnt;
+	if (!nr_linfo)
+		return 0;
+	if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
+		return -EINVAL;
+
+	rec_size = attr->line_info_rec_size;
+	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
+	    rec_size > MAX_LINEINFO_REC_SIZE ||
+	    rec_size & (sizeof(u32) - 1))
+		return -EINVAL;
+
+	/* Need to zero it in case the userspace may
+	 * pass in a smaller bpf_line_info object.
+	 */
+	linfo = kvzalloc_objs(struct bpf_line_info, nr_linfo,
+			      GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+	if (!linfo)
+		return -ENOMEM;
+
+	prog = env->prog;
+	btf = prog->aux->btf;
+
+	s = 0;
+	sub = env->subprog_info;
+	ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
+	expected_size = sizeof(struct bpf_line_info);
+	ncopy = min_t(u32, expected_size, rec_size);
+	for (i = 0; i < nr_linfo; i++) {
+		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
+		if (err) {
+			if (err == -E2BIG) {
+				verbose(env, "nonzero tailing record in line_info");
+				if (copy_to_bpfptr_offset(uattr,
+							  offsetof(union bpf_attr, line_info_rec_size),
+							  &expected_size, sizeof(expected_size)))
+					err = -EFAULT;
+			}
+			goto err_free;
+		}
+
+		if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
+			err = -EFAULT;
+			goto err_free;
+		}
+
+		/*
+		 * Check insn_off to ensure
+		 * 1) strictly increasing AND
+		 * 2) bounded by prog->len
+		 *
+		 * The linfo[0].insn_off == 0 check logically falls into
+		 * the later "missing bpf_line_info for func..." case
+		 * because the first linfo[0].insn_off must be the
+		 * first sub also and the first sub must have
+		 * subprog_info[0].start == 0.
+		 */
+		if ((i && linfo[i].insn_off <= prev_offset) ||
+		    linfo[i].insn_off >= prog->len) {
+			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
+				i, linfo[i].insn_off, prev_offset,
+				prog->len);
+			err = -EINVAL;
+			goto err_free;
+		}
+
+		if (!prog->insnsi[linfo[i].insn_off].code) {
+			verbose(env,
+				"Invalid insn code at line_info[%u].insn_off\n",
+				i);
+			err = -EINVAL;
+			goto err_free;
+		}
+
+		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
+		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
+			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
+			err = -EINVAL;
+			goto err_free;
+		}
+
+		if (s != env->subprog_cnt) {
+			if (linfo[i].insn_off == sub[s].start) {
+				sub[s].linfo_idx = i;
+				s++;
+			} else if (sub[s].start < linfo[i].insn_off) {
+				verbose(env, "missing bpf_line_info for func#%u\n", s);
+				err = -EINVAL;
+				goto err_free;
+			}
+		}
+
+		prev_offset = linfo[i].insn_off;
+		bpfptr_add(&ulinfo, rec_size);
+	}
+
+	if (s != env->subprog_cnt) {
+		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
+			env->subprog_cnt - s, s);
+		err = -EINVAL;
+		goto err_free;
+	}
+
+	prog->aux->linfo = linfo;
+	prog->aux->nr_linfo = nr_linfo;
+
+	return 0;
+
+err_free:
+	kvfree(linfo);
+	return err;
+}
+
+#define MIN_CORE_RELO_SIZE	sizeof(struct bpf_core_relo)
+#define MAX_CORE_RELO_SIZE	MAX_FUNCINFO_REC_SIZE
+
+static int check_core_relo(struct bpf_verifier_env *env,
+			   const union bpf_attr *attr,
+			   bpfptr_t uattr)
+{
+	u32 i, nr_core_relo, ncopy, expected_size, rec_size;
+	struct bpf_core_relo core_relo = {};
+	struct bpf_prog *prog = env->prog;
+	const struct btf *btf = prog->aux->btf;
+	struct bpf_core_ctx ctx = {
+		.log = &env->log,
+		.btf = btf,
+	};
+	bpfptr_t u_core_relo;
+	int err;
+
+	nr_core_relo = attr->core_relo_cnt;
+	if (!nr_core_relo)
+		return 0;
+	if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
+		return -EINVAL;
+
+	rec_size = attr->core_relo_rec_size;
+	if (rec_size < MIN_CORE_RELO_SIZE ||
+	    rec_size > MAX_CORE_RELO_SIZE ||
+	    rec_size % sizeof(u32))
+		return -EINVAL;
+
+	u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
+	expected_size = sizeof(struct bpf_core_relo);
+	ncopy = min_t(u32, expected_size, rec_size);
+
+	/* Unlike func_info and line_info, copy and apply each CO-RE
+	 * relocation record one at a time.
+	 */
+	for (i = 0; i < nr_core_relo; i++) {
+		/* future proofing when sizeof(bpf_core_relo) changes */
+		err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
+		if (err) {
+			if (err == -E2BIG) {
+				verbose(env, "nonzero tailing record in core_relo");
+				if (copy_to_bpfptr_offset(uattr,
+							  offsetof(union bpf_attr, core_relo_rec_size),
+							  &expected_size, sizeof(expected_size)))
+					err = -EFAULT;
+			}
+			break;
+		}
+
+		if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
+			verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
+				i, core_relo.insn_off, prog->len);
+			err = -EINVAL;
+			break;
+		}
+
+		err = bpf_core_apply(&ctx, &core_relo, i,
+				     &prog->insnsi[core_relo.insn_off / 8]);
+		if (err)
+			break;
+		bpfptr_add(&u_core_relo, rec_size);
+	}
+	return err;
+}
+
+int bpf_check_btf_info_early(struct bpf_verifier_env *env,
+			     const union bpf_attr *attr,
+			     bpfptr_t uattr)
+{
+	struct btf *btf;
+	int err;
+
+	if (!attr->func_info_cnt && !attr->line_info_cnt) {
+		if (check_abnormal_return(env))
+			return -EINVAL;
+		return 0;
+	}
+
+	btf = btf_get_by_fd(attr->prog_btf_fd);
+	if (IS_ERR(btf))
+		return PTR_ERR(btf);
+	if (btf_is_kernel(btf)) {
+		btf_put(btf);
+		return -EACCES;
+	}
+	env->prog->aux->btf = btf;
+
+	err = check_btf_func_early(env, attr, uattr);
+	if (err)
+		return err;
+	return 0;
+}
+
+int bpf_check_btf_info(struct bpf_verifier_env *env,
+		       const union bpf_attr *attr,
+		       bpfptr_t uattr)
+{
+	int err;
+
+	if (!attr->func_info_cnt && !attr->line_info_cnt) {
+		if (check_abnormal_return(env))
+			return -EINVAL;
+		return 0;
+	}
+
+	err = check_btf_func(env, attr, uattr);
+	if (err)
+		return err;
+
+	err = check_btf_line(env, attr, uattr);
+	if (err)
+		return err;
+
+	err = check_core_relo(env, attr, uattr);
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e210dd7205cf..817eb7815011 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -17216,206 +17216,6 @@ static int mark_fastcall_patterns(struct bpf_verifier_env *env)
 	return 0;
 }
 
-static int check_abnormal_return(struct bpf_verifier_env *env)
-{
-	int i;
-
-	for (i = 1; i < env->subprog_cnt; i++) {
-		if (env->subprog_info[i].has_ld_abs) {
-			verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
-			return -EINVAL;
-		}
-		if (env->subprog_info[i].has_tail_call) {
-			verbose(env, "tail_call is not allowed in subprogs without BTF\n");
-			return -EINVAL;
-		}
-	}
-	return 0;
-}
-
-/* The minimum supported BTF func info size */
-#define MIN_BPF_FUNCINFO_SIZE	8
-#define MAX_FUNCINFO_REC_SIZE	252
-
-static int check_btf_func_early(struct bpf_verifier_env *env,
-				const union bpf_attr *attr,
-				bpfptr_t uattr)
-{
-	u32 krec_size = sizeof(struct bpf_func_info);
-	const struct btf_type *type, *func_proto;
-	u32 i, nfuncs, urec_size, min_size;
-	struct bpf_func_info *krecord;
-	struct bpf_prog *prog;
-	const struct btf *btf;
-	u32 prev_offset = 0;
-	bpfptr_t urecord;
-	int ret = -ENOMEM;
-
-	nfuncs = attr->func_info_cnt;
-	if (!nfuncs) {
-		if (check_abnormal_return(env))
-			return -EINVAL;
-		return 0;
-	}
-
-	urec_size = attr->func_info_rec_size;
-	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
-	    urec_size > MAX_FUNCINFO_REC_SIZE ||
-	    urec_size % sizeof(u32)) {
-		verbose(env, "invalid func info rec size %u\n", urec_size);
-		return -EINVAL;
-	}
-
-	prog = env->prog;
-	btf = prog->aux->btf;
-
-	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
-	min_size = min_t(u32, krec_size, urec_size);
-
-	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
-	if (!krecord)
-		return -ENOMEM;
-
-	for (i = 0; i < nfuncs; i++) {
-		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
-		if (ret) {
-			if (ret == -E2BIG) {
-				verbose(env, "nonzero tailing record in func info");
-				/* set the size kernel expects so loader can zero
-				 * out the rest of the record.
-				 */
-				if (copy_to_bpfptr_offset(uattr,
-							  offsetof(union bpf_attr, func_info_rec_size),
-							  &min_size, sizeof(min_size)))
-					ret = -EFAULT;
-			}
-			goto err_free;
-		}
-
-		if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
-			ret = -EFAULT;
-			goto err_free;
-		}
-
-		/* check insn_off */
-		ret = -EINVAL;
-		if (i == 0) {
-			if (krecord[i].insn_off) {
-				verbose(env,
-					"nonzero insn_off %u for the first func info record",
-					krecord[i].insn_off);
-				goto err_free;
-			}
-		} else if (krecord[i].insn_off <= prev_offset) {
-			verbose(env,
-				"same or smaller insn offset (%u) than previous func info record (%u)",
-				krecord[i].insn_off, prev_offset);
-			goto err_free;
-		}
-
-		/* check type_id */
-		type = btf_type_by_id(btf, krecord[i].type_id);
-		if (!type || !btf_type_is_func(type)) {
-			verbose(env, "invalid type id %d in func info",
-				krecord[i].type_id);
-			goto err_free;
-		}
-
-		func_proto = btf_type_by_id(btf, type->type);
-		if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
-			/* btf_func_check() already verified it during BTF load */
-			goto err_free;
-
-		prev_offset = krecord[i].insn_off;
-		bpfptr_add(&urecord, urec_size);
-	}
-
-	prog->aux->func_info = krecord;
-	prog->aux->func_info_cnt = nfuncs;
-	return 0;
-
-err_free:
-	kvfree(krecord);
-	return ret;
-}
-
-static int check_btf_func(struct bpf_verifier_env *env,
-			  const union bpf_attr *attr,
-			  bpfptr_t uattr)
-{
-	const struct btf_type *type, *func_proto, *ret_type;
-	u32 i, nfuncs, urec_size;
-	struct bpf_func_info *krecord;
-	struct bpf_func_info_aux *info_aux = NULL;
-	struct bpf_prog *prog;
-	const struct btf *btf;
-	bpfptr_t urecord;
-	bool scalar_return;
-	int ret = -ENOMEM;
-
-	nfuncs = attr->func_info_cnt;
-	if (!nfuncs) {
-		if (check_abnormal_return(env))
-			return -EINVAL;
-		return 0;
-	}
-	if (nfuncs != env->subprog_cnt) {
-		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
-		return -EINVAL;
-	}
-
-	urec_size = attr->func_info_rec_size;
-
-	prog = env->prog;
-	btf = prog->aux->btf;
-
-	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
-
-	krecord = prog->aux->func_info;
-	info_aux = kzalloc_objs(*info_aux, nfuncs,
-				GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
-	if (!info_aux)
-		return -ENOMEM;
-
-	for (i = 0; i < nfuncs; i++) {
-		/* check insn_off */
-		ret = -EINVAL;
-
-		if (env->subprog_info[i].start != krecord[i].insn_off) {
-			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
-			goto err_free;
-		}
-
-		/* Already checked type_id */
-		type = btf_type_by_id(btf, krecord[i].type_id);
-		info_aux[i].linkage = BTF_INFO_VLEN(type->info);
-		/* Already checked func_proto */
-		func_proto = btf_type_by_id(btf, type->type);
-
-		ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
-		scalar_return =
-			btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
-		if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
-			verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
-			goto err_free;
-		}
-		if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
-			verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
-			goto err_free;
-		}
-
-		env->subprog_info[i].name = btf_name_by_offset(btf, type->name_off);
-		bpfptr_add(&urecord, urec_size);
-	}
-
-	prog->aux->func_info_aux = info_aux;
-	return 0;
-
-err_free:
-	kfree(info_aux);
-	return ret;
-}
-
 static void adjust_btf_func(struct bpf_verifier_env *env)
 {
 	struct bpf_prog_aux *aux = env->prog->aux;
@@ -17429,261 +17229,6 @@ static void adjust_btf_func(struct bpf_verifier_env *env)
 		aux->func_info[i].insn_off = env->subprog_info[i].start;
 }
 
-#define MIN_BPF_LINEINFO_SIZE	offsetofend(struct bpf_line_info, line_col)
-#define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
-
-static int check_btf_line(struct bpf_verifier_env *env,
-			  const union bpf_attr *attr,
-			  bpfptr_t uattr)
-{
-	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
-	struct bpf_subprog_info *sub;
-	struct bpf_line_info *linfo;
-	struct bpf_prog *prog;
-	const struct btf *btf;
-	bpfptr_t ulinfo;
-	int err;
-
-	nr_linfo = attr->line_info_cnt;
-	if (!nr_linfo)
-		return 0;
-	if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
-		return -EINVAL;
-
-	rec_size = attr->line_info_rec_size;
-	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
-	    rec_size > MAX_LINEINFO_REC_SIZE ||
-	    rec_size & (sizeof(u32) - 1))
-		return -EINVAL;
-
-	/* Need to zero it in case the userspace may
-	 * pass in a smaller bpf_line_info object.
-	 */
-	linfo = kvzalloc_objs(struct bpf_line_info, nr_linfo,
-			      GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
-	if (!linfo)
-		return -ENOMEM;
-
-	prog = env->prog;
-	btf = prog->aux->btf;
-
-	s = 0;
-	sub = env->subprog_info;
-	ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
-	expected_size = sizeof(struct bpf_line_info);
-	ncopy = min_t(u32, expected_size, rec_size);
-	for (i = 0; i < nr_linfo; i++) {
-		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
-		if (err) {
-			if (err == -E2BIG) {
-				verbose(env, "nonzero tailing record in line_info");
-				if (copy_to_bpfptr_offset(uattr,
-							  offsetof(union bpf_attr, line_info_rec_size),
-							  &expected_size, sizeof(expected_size)))
-					err = -EFAULT;
-			}
-			goto err_free;
-		}
-
-		if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
-			err = -EFAULT;
-			goto err_free;
-		}
-
-		/*
-		 * Check insn_off to ensure
-		 * 1) strictly increasing AND
-		 * 2) bounded by prog->len
-		 *
-		 * The linfo[0].insn_off == 0 check logically falls into
-		 * the later "missing bpf_line_info for func..." case
-		 * because the first linfo[0].insn_off must be the
-		 * first sub also and the first sub must have
-		 * subprog_info[0].start == 0.
-		 */
-		if ((i && linfo[i].insn_off <= prev_offset) ||
-		    linfo[i].insn_off >= prog->len) {
-			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
-				i, linfo[i].insn_off, prev_offset,
-				prog->len);
-			err = -EINVAL;
-			goto err_free;
-		}
-
-		if (!prog->insnsi[linfo[i].insn_off].code) {
-			verbose(env,
-				"Invalid insn code at line_info[%u].insn_off\n",
-				i);
-			err = -EINVAL;
-			goto err_free;
-		}
-
-		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
-		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
-			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
-			err = -EINVAL;
-			goto err_free;
-		}
-
-		if (s != env->subprog_cnt) {
-			if (linfo[i].insn_off == sub[s].start) {
-				sub[s].linfo_idx = i;
-				s++;
-			} else if (sub[s].start < linfo[i].insn_off) {
-				verbose(env, "missing bpf_line_info for func#%u\n", s);
-				err = -EINVAL;
-				goto err_free;
-			}
-		}
-
-		prev_offset = linfo[i].insn_off;
-		bpfptr_add(&ulinfo, rec_size);
-	}
-
-	if (s != env->subprog_cnt) {
-		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
-			env->subprog_cnt - s, s);
-		err = -EINVAL;
-		goto err_free;
-	}
-
-	prog->aux->linfo = linfo;
-	prog->aux->nr_linfo = nr_linfo;
-
-	return 0;
-
-err_free:
-	kvfree(linfo);
-	return err;
-}
-
-#define MIN_CORE_RELO_SIZE	sizeof(struct bpf_core_relo)
-#define MAX_CORE_RELO_SIZE	MAX_FUNCINFO_REC_SIZE
-
-static int check_core_relo(struct bpf_verifier_env *env,
-			   const union bpf_attr *attr,
-			   bpfptr_t uattr)
-{
-	u32 i, nr_core_relo, ncopy, expected_size, rec_size;
-	struct bpf_core_relo core_relo = {};
-	struct bpf_prog *prog = env->prog;
-	const struct btf *btf = prog->aux->btf;
-	struct bpf_core_ctx ctx = {
-		.log = &env->log,
-		.btf = btf,
-	};
-	bpfptr_t u_core_relo;
-	int err;
-
-	nr_core_relo = attr->core_relo_cnt;
-	if (!nr_core_relo)
-		return 0;
-	if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
-		return -EINVAL;
-
-	rec_size = attr->core_relo_rec_size;
-	if (rec_size < MIN_CORE_RELO_SIZE ||
-	    rec_size > MAX_CORE_RELO_SIZE ||
-	    rec_size % sizeof(u32))
-		return -EINVAL;
-
-	u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
-	expected_size = sizeof(struct bpf_core_relo);
-	ncopy = min_t(u32, expected_size, rec_size);
-
-	/* Unlike func_info and line_info, copy and apply each CO-RE
-	 * relocation record one at a time.
-	 */
-	for (i = 0; i < nr_core_relo; i++) {
-		/* future proofing when sizeof(bpf_core_relo) changes */
-		err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
-		if (err) {
-			if (err == -E2BIG) {
-				verbose(env, "nonzero tailing record in core_relo");
-				if (copy_to_bpfptr_offset(uattr,
-							  offsetof(union bpf_attr, core_relo_rec_size),
-							  &expected_size, sizeof(expected_size)))
-					err = -EFAULT;
-			}
-			break;
-		}
-
-		if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
-			err = -EFAULT;
-			break;
-		}
-
-		if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
-			verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
-				i, core_relo.insn_off, prog->len);
-			err = -EINVAL;
-			break;
-		}
-
-		err = bpf_core_apply(&ctx, &core_relo, i,
-				     &prog->insnsi[core_relo.insn_off / 8]);
-		if (err)
-			break;
-		bpfptr_add(&u_core_relo, rec_size);
-	}
-	return err;
-}
-
-static int check_btf_info_early(struct bpf_verifier_env *env,
-				const union bpf_attr *attr,
-				bpfptr_t uattr)
-{
-	struct btf *btf;
-	int err;
-
-	if (!attr->func_info_cnt && !attr->line_info_cnt) {
-		if (check_abnormal_return(env))
-			return -EINVAL;
-		return 0;
-	}
-
-	btf = btf_get_by_fd(attr->prog_btf_fd);
-	if (IS_ERR(btf))
-		return PTR_ERR(btf);
-	if (btf_is_kernel(btf)) {
-		btf_put(btf);
-		return -EACCES;
-	}
-	env->prog->aux->btf = btf;
-
-	err = check_btf_func_early(env, attr, uattr);
-	if (err)
-		return err;
-	return 0;
-}
-
-static int check_btf_info(struct bpf_verifier_env *env,
-			  const union bpf_attr *attr,
-			  bpfptr_t uattr)
-{
-	int err;
-
-	if (!attr->func_info_cnt && !attr->line_info_cnt) {
-		if (check_abnormal_return(env))
-			return -EINVAL;
-		return 0;
-	}
-
-	err = check_btf_func(env, attr, uattr);
-	if (err)
-		return err;
-
-	err = check_btf_line(env, attr, uattr);
-	if (err)
-		return err;
-
-	err = check_core_relo(env, attr, uattr);
-	if (err)
-		return err;
-
-	return 0;
-}
-
 /* Find id in idset and increment its count, or add new entry */
 static void idset_cnt_inc(struct bpf_idset *idset, u32 id)
 {
@@ -20426,7 +19971,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 		INIT_LIST_HEAD(&env->explored_states[i]);
 	INIT_LIST_HEAD(&env->free_list);
 
-	ret = check_btf_info_early(env, attr, uattr);
+	ret = bpf_check_btf_info_early(env, attr, uattr);
 	if (ret < 0)
 		goto skip_full_check;
 
@@ -20438,7 +19983,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (ret < 0)
 		goto skip_full_check;
 
-	ret = check_btf_info(env, attr, uattr);
+	ret = bpf_check_btf_info(env, attr, uattr);
 	if (ret < 0)
 		goto skip_full_check;
 
-- 
cgit v1.2.3


From c78bcbd51976f123909e5c2baf8cebb699453c2f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Apr 2026 14:56:22 +0000
Subject: net: change sk_filter_reason() to return the reason by value

sk_filter_trim_cap will soon return the reason by value,
do the same for sk_filter_reason().

$ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-21 (-21)
Function                                     old     new   delta
sock_queue_rcv_skb_reason                    128     126      -2
tun_net_xmit                                1146    1127     -19
Total: Before=29722661, After=29722640, chg -0.00%

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260409145625.2306224-4-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/tun.c      | 8 +++++---
 include/linux/filter.h | 9 ++++++---
 net/core/sock.c        | 4 ++--
 3 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index c492fda6fc15..b183189f1853 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1031,9 +1031,11 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto drop;
 	}
 
-	if (tfile->socket.sk->sk_filter &&
-	    sk_filter_reason(tfile->socket.sk, skb, &drop_reason))
-		goto drop;
+	if (tfile->socket.sk->sk_filter) {
+		drop_reason = sk_filter_reason(tfile->socket.sk, skb);
+		if (drop_reason)
+			goto drop;
+	}
 
 	len = run_ebpf_filter(tun, skb, len);
 	if (len == 0) {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 44d7ae95ddbc..59931e5810b4 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1102,10 +1102,13 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
 	return sk_filter_trim_cap(sk, skb, 1, &ignore_reason);
 }
 
-static inline int sk_filter_reason(struct sock *sk, struct sk_buff *skb,
-				   enum skb_drop_reason *reason)
+static inline enum skb_drop_reason
+sk_filter_reason(struct sock *sk, struct sk_buff *skb)
 {
-	return sk_filter_trim_cap(sk, skb, 1, reason);
+	enum skb_drop_reason drop_reason;
+
+	sk_filter_trim_cap(sk, skb, 1, &drop_reason);
+	return drop_reason;
 }
 
 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);
diff --git a/net/core/sock.c b/net/core/sock.c
index d39a4d6ccafd..1ffcb15d0fc5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -526,8 +526,8 @@ sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb)
 	enum skb_drop_reason drop_reason;
 	int err;
 
-	err = sk_filter_reason(sk, skb, &drop_reason);
-	if (err)
+	drop_reason = sk_filter_reason(sk, skb);
+	if (drop_reason)
 		return drop_reason;
 
 	err = __sock_queue_rcv_skb(sk, skb);
-- 
cgit v1.2.3


From fb37aea2a00e67ef5264ea39371d350a1d19b24f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Apr 2026 14:56:24 +0000
Subject: net: change sk_filter_trim_cap() to return a drop_reason by value

Current return value can be replaced with the drop_reason,
reducing kernel bloat:

$ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
add/remove: 0/2 grow/shrink: 1/11 up/down: 32/-603 (-571)
Function                                     old     new   delta
tcp_v6_rcv                                  3135    3167     +32
unix_dgram_sendmsg                          1731    1726      -5
netlink_unicast                              957     945     -12
netlink_dump                                1372    1359     -13
sk_filter_trim_cap                           882     858     -24
tcp_v4_rcv                                  3143    3111     -32
__pfx_tcp_filter                              32       -     -32
netlink_broadcast_filtered                  1633    1595     -38
sock_queue_rcv_skb_reason                    126      76     -50
tun_net_xmit                                1127    1074     -53
__sk_receive_skb                             690     632     -58
udpv6_queue_rcv_one_skb                      935     869     -66
udp_queue_rcv_one_skb                        919     853     -66
tcp_filter                                   154       -    -154
Total: Before=29722783, After=29722212, chg -0.00%

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260409145625.2306224-6-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/filter.h | 14 ++++++--------
 include/net/tcp.h      |  4 +---
 net/core/filter.c      | 31 ++++++++++++++-----------------
 net/core/sock.c        |  5 +++--
 net/ipv4/udp.c         |  3 ++-
 net/ipv6/udp.c         |  3 ++-
 net/rose/rose_in.c     |  3 +--
 7 files changed, 29 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 59931e5810b4..5ac08aa70123 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1092,23 +1092,21 @@ bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 	return set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
 }
 
-int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap,
-		       enum skb_drop_reason *reason);
+enum skb_drop_reason
+sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
 
 static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
 {
-	enum skb_drop_reason ignore_reason;
+	enum skb_drop_reason drop_reason;
 
-	return sk_filter_trim_cap(sk, skb, 1, &ignore_reason);
+	drop_reason = sk_filter_trim_cap(sk, skb, 1);
+	return drop_reason ? -EPERM : 0;
 }
 
 static inline enum skb_drop_reason
 sk_filter_reason(struct sock *sk, struct sk_buff *skb)
 {
-	enum skb_drop_reason drop_reason;
-
-	sk_filter_trim_cap(sk, skb, 1, &drop_reason);
-	return drop_reason;
+	return sk_filter_trim_cap(sk, skb, 1);
 }
 
 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 098e52269a04..49f45bcff917 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1687,10 +1687,8 @@ static inline enum skb_drop_reason
 tcp_filter(struct sock *sk, struct sk_buff *skb)
 {
 	const struct tcphdr *th = (const struct tcphdr *)skb->data;
-	enum skb_drop_reason reason;
 
-	sk_filter_trim_cap(sk, skb, __tcp_hdrlen(th), &reason);
-	return reason;
+	return sk_filter_trim_cap(sk, skb, __tcp_hdrlen(th));
 }
 
 void tcp_set_state(struct sock *sk, int state);
diff --git a/net/core/filter.c b/net/core/filter.c
index 5569d83b8be0..bf9c37b27646 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -121,20 +121,20 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
  *	@sk: sock associated with &sk_buff
  *	@skb: buffer to filter
  *	@cap: limit on how short the eBPF program may trim the packet
- *	@reason: record drop reason
  *
  * Run the eBPF program and then cut skb->data to correct size returned by
  * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
  * than pkt_len we keep whole skb->data. This is the socket level
  * wrapper to bpf_prog_run. It returns 0 if the packet should
- * be accepted or -EPERM if the packet should be tossed.
+ * be accepted or a drop_reason if the packet should be tossed.
  *
  */
-int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb,
-		       unsigned int cap, enum skb_drop_reason *reason)
+enum skb_drop_reason
+sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
 {
-	int err;
+	enum skb_drop_reason drop_reason;
 	struct sk_filter *filter;
+	int err;
 
 	/*
 	 * If the skb was allocated from pfmemalloc reserves, only
@@ -143,21 +143,17 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb,
 	 */
 	if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
-		*reason = SKB_DROP_REASON_PFMEMALLOC;
-		return -ENOMEM;
+		return SKB_DROP_REASON_PFMEMALLOC;
 	}
 	err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
-	if (err) {
-		*reason = SKB_DROP_REASON_SOCKET_FILTER;
-		return err;
-	}
+	if (err)
+		return SKB_DROP_REASON_SOCKET_FILTER;
 
 	err = security_sock_rcv_skb(sk, skb);
-	if (err) {
-		*reason = SKB_DROP_REASON_SECURITY_HOOK;
-		return err;
-	}
+	if (err)
+		return SKB_DROP_REASON_SECURITY_HOOK;
 
+	drop_reason = 0;
 	rcu_read_lock();
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
@@ -168,11 +164,12 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb,
 		pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
 		skb->sk = save_sk;
 		err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
+		if (err)
+			drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
 	}
 	rcu_read_unlock();
 
-	*reason = err ? SKB_DROP_REASON_SOCKET_FILTER : 0;
-	return err;
+	return drop_reason;
 }
 EXPORT_SYMBOL(sk_filter_trim_cap);
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 1ffcb15d0fc5..367fd7bad4ac 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -544,11 +544,12 @@ EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
 		     const int nested, unsigned int trim_cap, bool refcounted)
 {
-	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
+	enum skb_drop_reason reason;
 	int rc = NET_RX_SUCCESS;
 	int err;
 
-	if (sk_filter_trim_cap(sk, skb, trim_cap, &reason))
+	reason = sk_filter_trim_cap(sk, skb, trim_cap);
+	if (reason)
 		goto discard_and_relse;
 
 	skb->dev = NULL;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ab415de32443..2fddc7b6b717 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2392,7 +2392,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 	    udp_lib_checksum_complete(skb))
 			goto csum_error;
 
-	if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr), &drop_reason))
+	drop_reason = sk_filter_trim_cap(sk, skb, sizeof(struct udphdr));
+	if (drop_reason)
 		goto drop;
 
 	udp_csum_pull_header(skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d7cf4c9508b2..3fac9cb47ae0 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -853,7 +853,8 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 	    udp_lib_checksum_complete(skb))
 		goto csum_error;
 
-	if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr), &drop_reason))
+	drop_reason = sk_filter_trim_cap(sk, skb, sizeof(struct udphdr));
+	if (drop_reason)
 		goto drop;
 
 	udp_csum_pull_header(skb);
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index 0276b393f0e5..3aff3c2d45a9 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -101,7 +101,6 @@ static int rose_state2_machine(struct sock *sk, struct sk_buff *skb, int framety
  */
 static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype, int ns, int nr, int q, int d, int m)
 {
-	enum skb_drop_reason dr; /* ignored */
 	struct rose_sock *rose = rose_sk(sk);
 	int queued = 0;
 
@@ -163,7 +162,7 @@ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int framety
 		rose_frames_acked(sk, nr);
 		if (ns == rose->vr) {
 			rose_start_idletimer(sk);
-			if (!sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN, &dr) &&
+			if (!sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN) &&
 			    __sock_queue_rcv_skb(sk, skb) == 0) {
 				rose->vr = (rose->vr + 1) % ROSE_MODULUS;
 				queued = 1;
-- 
cgit v1.2.3


From 105369d627b946f6a05f25e9c399167b1674d4bc Mon Sep 17 00:00:00 2001
From: Qingfang Deng <qingfang.deng@linux.dev>
Date: Fri, 10 Apr 2026 13:49:49 +0800
Subject: pppox: remove sk_pppox() helper

The sk member can be directly accessed from struct pppox_sock without
relying on type casting. Remove the sk_pppox() helper and update all
call sites to use po->sk directly.

Signed-off-by: Qingfang Deng <qingfang.deng@linux.dev>
Link: https://patch.msgid.link/20260410054954.114031-1-qingfang.deng@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ppp/pppoe.c  | 10 +++++-----
 drivers/net/ppp/pptp.c   |  6 +++---
 include/linux/if_pppox.h |  5 -----
 3 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 1ac61c273b28..d546a7af0d54 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -231,7 +231,7 @@ static inline struct pppox_sock *get_item(struct pppoe_net *pn, __be16 sid,
 	struct pppox_sock *po;
 
 	po = __get_item(pn, sid, addr, ifindex);
-	if (po && !refcount_inc_not_zero(&sk_pppox(po)->sk_refcnt))
+	if (po && !refcount_inc_not_zero(&po->sk.sk_refcnt))
 		po = NULL;
 
 	return po;
@@ -273,7 +273,7 @@ static void pppoe_flush_dev(struct net_device *dev)
 			if (!po)
 				break;
 
-			sk = sk_pppox(po);
+			sk = &po->sk;
 
 			/* We always grab the socket lock, followed by the
 			 * hash_lock, in that order.  Since we should hold the
@@ -413,7 +413,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (!po)
 		goto drop;
 
-	return __sk_receive_skb(sk_pppox(po), skb, 0, 1, false);
+	return __sk_receive_skb(&po->sk, skb, 0, 1, false);
 
 drop:
 	kfree_skb(skb);
@@ -425,7 +425,7 @@ static void pppoe_unbind_sock_work(struct work_struct *work)
 {
 	struct pppox_sock *po = container_of(work, struct pppox_sock,
 					     proto.pppoe.padt_work);
-	struct sock *sk = sk_pppox(po);
+	struct sock *sk = &po->sk;
 
 	lock_sock(sk);
 	if (po->pppoe_dev) {
@@ -469,7 +469,7 @@ static int pppoe_disc_rcv(struct sk_buff *skb, struct net_device *dev,
 	po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex);
 	if (po)
 		if (!schedule_work(&po->proto.pppoe.padt_work))
-			sock_put(sk_pppox(po));
+			sock_put(&po->sk);
 
 abort:
 	kfree_skb(skb);
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index b18acd810561..cc8c102122d8 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -62,7 +62,7 @@ static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr)
 		if (opt->dst_addr.sin_addr.s_addr != s_addr)
 			sock = NULL;
 		else
-			sock_hold(sk_pppox(sock));
+			sock_hold(&sock->sk);
 	}
 	rcu_read_unlock();
 
@@ -164,7 +164,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	struct iphdr  *iph;
 	int    max_headroom;
 
-	if (sk_pppox(po)->sk_state & PPPOX_DEAD)
+	if (po->sk.sk_state & PPPOX_DEAD)
 		goto tx_drop;
 
 	rt = pptp_route_output(po, &fl4);
@@ -375,7 +375,7 @@ static int pptp_rcv(struct sk_buff *skb)
 	if (po) {
 		skb_dst_drop(skb);
 		nf_reset_ct(skb);
-		return sk_receive_skb(sk_pppox(po), skb, 0);
+		return sk_receive_skb(&po->sk, skb, 0);
 	}
 drop:
 	kfree_skb(skb);
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 8bbf676c2a85..636772693f9a 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -57,11 +57,6 @@ static inline struct pppox_sock *pppox_sk(struct sock *sk)
 	return (struct pppox_sock *)sk;
 }
 
-static inline struct sock *sk_pppox(struct pppox_sock *po)
-{
-	return (struct sock *)po;
-}
-
 struct module;
 
 struct pppox_proto {
-- 
cgit v1.2.3


From 6bc78039a77a46d89df987813fbafe333cd81367 Mon Sep 17 00:00:00 2001
From: Qingfang Deng <qingfang.deng@linux.dev>
Date: Fri, 10 Apr 2026 13:49:50 +0800
Subject: pppox: convert pppox_sk() to use container_of()

Use container_of() macro instead of direct pointer casting to get the
pppox_sock from a sock pointer.

Signed-off-by: Qingfang Deng <qingfang.deng@linux.dev>
Link: https://patch.msgid.link/20260410054954.114031-2-qingfang.deng@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/if_pppox.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 636772693f9a..594d6dc3f4c9 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -54,7 +54,7 @@ struct pppox_sock {
 
 static inline struct pppox_sock *pppox_sk(struct sock *sk)
 {
-	return (struct pppox_sock *)sk;
+	return container_of(sk, struct pppox_sock, sk);
 }
 
 struct module;
-- 
cgit v1.2.3


From 0ec7f158dc01e354ba83d808e46346dba826e353 Mon Sep 17 00:00:00 2001
From: Marco Nenciarini <mnencia@kcore.it>
Date: Wed, 1 Apr 2026 22:36:38 +0200
Subject: platform/x86: int3472: Add support for GPIO type 0x02 (IR flood LED)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for GPIO type 0x02, which controls an IR flood LED used
for face authentication on some laptops (e.g. Dell Pro Max 16 Premium).

Without this patch, the kernel logs "GPIO type 0x02 unknown; the sensor
may not work" and IR sensors paired with a flood LED cannot function.

The flood LED is registered through the LED subsystem like the existing
privacy LED, including a lookup entry to allow future consumer drivers
to find and control it via led_get().

To support multiple LEDs per INT3472 device, convert the single led
struct member to an array with a counter.

Signed-off-by: Marco Nenciarini <mnencia@kcore.it>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
Link: https://patch.msgid.link/20260401203638.1601661-5-mnencia@kcore.it
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 drivers/platform/x86/intel/int3472/discrete.c |  9 ++++++++-
 drivers/platform/x86/intel/int3472/led.c      | 23 ++++++++++++-----------
 include/linux/platform_data/x86/int3472.h     |  7 +++++--
 3 files changed, 25 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c
index 637e3821b496..115bb37577a1 100644
--- a/drivers/platform/x86/intel/int3472/discrete.c
+++ b/drivers/platform/x86/intel/int3472/discrete.c
@@ -215,6 +215,10 @@ static void int3472_get_con_id_and_polarity(struct int3472_discrete_device *int3
 		*con_id = "privacy";
 		*gpio_flags = GPIO_ACTIVE_HIGH;
 		break;
+	case INT3472_GPIO_TYPE_STROBE:
+		*con_id = "ir_flood";
+		*gpio_flags = GPIO_ACTIVE_HIGH;
+		break;
 	case INT3472_GPIO_TYPE_HOTPLUG_DETECT:
 		*con_id = "hpd";
 		*gpio_flags = GPIO_ACTIVE_HIGH;
@@ -252,6 +256,7 @@ static void int3472_get_con_id_and_polarity(struct int3472_discrete_device *int3
  *
  * 0x00 Reset
  * 0x01 Power down
+ * 0x02 Strobe
  * 0x0b Power enable
  * 0x0c Clock enable
  * 0x0d Privacy LED
@@ -336,6 +341,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
 		break;
 	case INT3472_GPIO_TYPE_CLK_ENABLE:
 	case INT3472_GPIO_TYPE_PRIVACY_LED:
+	case INT3472_GPIO_TYPE_STROBE:
 	case INT3472_GPIO_TYPE_POWER_ENABLE:
 	case INT3472_GPIO_TYPE_DOVDD:
 	case INT3472_GPIO_TYPE_HANDSHAKE:
@@ -354,6 +360,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
 
 			break;
 		case INT3472_GPIO_TYPE_PRIVACY_LED:
+		case INT3472_GPIO_TYPE_STROBE:
 			ret = skl_int3472_register_led(int3472, gpio, con_id);
 			if (ret)
 				err_msg = "Failed to register LED\n";
@@ -429,7 +436,7 @@ void int3472_discrete_cleanup(struct int3472_discrete_device *int3472)
 	gpiod_remove_lookup_table(&int3472->gpios);
 
 	skl_int3472_unregister_clock(int3472);
-	skl_int3472_unregister_led(int3472);
+	skl_int3472_unregister_leds(int3472);
 	skl_int3472_unregister_regulator(int3472);
 }
 EXPORT_SYMBOL_NS_GPL(int3472_discrete_cleanup, "INTEL_INT3472_DISCRETE");
diff --git a/drivers/platform/x86/intel/int3472/led.c b/drivers/platform/x86/intel/int3472/led.c
index 22d0d6c5e6ce..9b2573cc347b 100644
--- a/drivers/platform/x86/intel/int3472/led.c
+++ b/drivers/platform/x86/intel/int3472/led.c
@@ -17,13 +17,14 @@ static int int3472_led_set(struct led_classdev *led_cdev, enum led_brightness br
 int skl_int3472_register_led(struct int3472_discrete_device *int3472, struct gpio_desc *gpio,
 			     const char *con_id)
 {
-	struct int3472_led *led = &int3472->led;
+	struct int3472_led *led;
 	char *p;
 	int ret;
 
-	if (led->classdev.dev)
-		return -EBUSY;
+	if (int3472->n_leds >= INT3472_MAX_LEDS)
+		return -ENOSPC;
 
+	led = &int3472->leds[int3472->n_leds];
 	led->gpio = gpio;
 
 	/* Generate the name, replacing the ':' in the ACPI devname with '_' */
@@ -46,17 +47,17 @@ int skl_int3472_register_led(struct int3472_discrete_device *int3472, struct gpi
 	led->lookup.con_id = con_id;
 	led_add_lookup(&led->lookup);
 
+	int3472->n_leds++;
 	return 0;
 }
 
-void skl_int3472_unregister_led(struct int3472_discrete_device *int3472)
+void skl_int3472_unregister_leds(struct int3472_discrete_device *int3472)
 {
-	struct int3472_led *led = &int3472->led;
+	for (unsigned int i = 0; i < int3472->n_leds; i++) {
+		struct int3472_led *led = &int3472->leds[i];
 
-	if (IS_ERR_OR_NULL(led->classdev.dev))
-		return;
-
-	led_remove_lookup(&led->lookup);
-	led_classdev_unregister(&led->classdev);
-	gpiod_put(led->gpio);
+		led_remove_lookup(&led->lookup);
+		led_classdev_unregister(&led->classdev);
+		gpiod_put(led->gpio);
+	}
 }
diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h
index ebf4d0637624..93f1e1fe09b4 100644
--- a/include/linux/platform_data/x86/int3472.h
+++ b/include/linux/platform_data/x86/int3472.h
@@ -23,6 +23,7 @@
 /* PMIC GPIO Types */
 #define INT3472_GPIO_TYPE_RESET					0x00
 #define INT3472_GPIO_TYPE_POWERDOWN				0x01
+#define INT3472_GPIO_TYPE_STROBE				0x02
 #define INT3472_GPIO_TYPE_POWER_ENABLE				0x0b
 #define INT3472_GPIO_TYPE_CLK_ENABLE				0x0c
 #define INT3472_GPIO_TYPE_PRIVACY_LED				0x0d
@@ -32,6 +33,7 @@
 
 #define INT3472_PDEV_MAX_NAME_LEN				23
 #define INT3472_MAX_SENSOR_GPIOS				3
+#define INT3472_MAX_LEDS					2
 #define INT3472_MAX_REGULATORS					3
 
 /* E.g. "dovdd\0" */
@@ -127,11 +129,12 @@ struct int3472_discrete_device {
 		struct led_lookup_data lookup;
 		char name[INT3472_LED_MAX_NAME_LEN];
 		struct gpio_desc *gpio;
-	} led;
+	} leds[INT3472_MAX_LEDS];
 
 	struct int3472_discrete_quirks quirks;
 
 	unsigned int ngpios; /* how many GPIOs have we seen */
+	unsigned int n_leds; /* how many LEDs have we registered */
 	unsigned int n_sensor_gpios; /* how many have we mapped to sensor */
 	unsigned int n_regulator_gpios; /* how many have we mapped to a regulator */
 	struct gpiod_lookup_table gpios;
@@ -163,6 +166,6 @@ void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472);
 
 int skl_int3472_register_led(struct int3472_discrete_device *int3472, struct gpio_desc *gpio,
 			     const char *con_id);
-void skl_int3472_unregister_led(struct int3472_discrete_device *int3472);
+void skl_int3472_unregister_leds(struct int3472_discrete_device *int3472);
 
 #endif
-- 
cgit v1.2.3


From 7e2d964f417ec13763eecfecc5d2813f63cb8da0 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Mon, 6 Apr 2026 22:32:32 +0200
Subject: platform/wmi: Add wmidev_invoke_procedure()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some WMI methods return no values, so the whole postprocessing
of the result data is not needed for them. Add a special function
for calling such WMI methods to prepare for future changes of
the main wmidev_invoke_method() function.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20260406203237.2970-2-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 Documentation/wmi/driver-development-guide.rst |  3 +-
 drivers/platform/wmi/core.c                    | 44 ++++++++++++++++++++++++++
 include/linux/wmi.h                            |  3 ++
 3 files changed, 49 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/wmi/driver-development-guide.rst b/Documentation/wmi/driver-development-guide.rst
index fbc2d9b12fe9..5b94402874c4 100644
--- a/Documentation/wmi/driver-development-guide.rst
+++ b/Documentation/wmi/driver-development-guide.rst
@@ -106,7 +106,8 @@ WMI method drivers
 
 WMI drivers can call WMI device methods using wmidev_invoke_method(). For each WMI method
 invocation the WMI driver needs to provide the instance number and the method ID, as well as
-a buffer with the method arguments and optionally a buffer for the results.
+a buffer with the method arguments and optionally a buffer for the results. When calling WMI
+methods that do not return any values, wmidev_invoke_procedure() should be used instead.
 
 The layout of said buffers is device-specific and described by the Binary MOF data associated
 with a given WMI device. Said Binary MOF data also describes the method ID of a given WMI method
diff --git a/drivers/platform/wmi/core.c b/drivers/platform/wmi/core.c
index b8e6b9a421c6..7cc5ca11a60d 100644
--- a/drivers/platform/wmi/core.c
+++ b/drivers/platform/wmi/core.c
@@ -427,6 +427,50 @@ int wmidev_invoke_method(struct wmi_device *wdev, u8 instance, u32 method_id,
 }
 EXPORT_SYMBOL_GPL(wmidev_invoke_method);
 
+/**
+ * wmidev_invoke_procedure - Invoke a WMI method that does not return values
+ * @wdev: A wmi bus device from a driver
+ * @instance: Instance index
+ * @method_id: Method ID to call
+ * @in: Mandatory WMI buffer containing input for the method call
+ *
+ * Invoke a WMI method that does not return any values. Use wmidev_invoke_method()
+ * for WMI methods that do return values.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int wmidev_invoke_procedure(struct wmi_device *wdev, u8 instance, u32 method_id,
+			    const struct wmi_buffer *in)
+{
+	struct wmi_block *wblock = container_of(wdev, struct wmi_block, dev);
+	struct acpi_buffer ain;
+	acpi_status status;
+	int ret;
+
+	if (wblock->gblock.flags & ACPI_WMI_STRING) {
+		ret = wmi_marshal_string(in, &ain);
+		if (ret < 0)
+			return ret;
+	} else {
+		if (in->length > U32_MAX)
+			return -E2BIG;
+
+		ain.length = in->length;
+		ain.pointer = in->data;
+	}
+
+	status = wmidev_evaluate_method(wdev, instance, method_id, &ain, NULL);
+
+	if (wblock->gblock.flags & ACPI_WMI_STRING)
+		kfree(ain.pointer);
+
+	if (ACPI_FAILURE(status))
+		return -EIO;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(wmidev_invoke_procedure);
+
 static acpi_status __query_block(struct wmi_block *wblock, u8 instance,
 				 struct acpi_buffer *out)
 {
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index 75cb0c7cfe57..b00950dc1231 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -70,6 +70,9 @@ ssize_t wmi_string_from_utf8s(struct wmi_string *str, size_t max_chars, const u8
 int wmidev_invoke_method(struct wmi_device *wdev, u8 instance, u32 method_id,
 			 const struct wmi_buffer *in, struct wmi_buffer *out);
 
+int wmidev_invoke_procedure(struct wmi_device *wdev, u8 instance, u32 method_id,
+			    const struct wmi_buffer *in);
+
 int wmidev_query_block(struct wmi_device *wdev, u8 instance, struct wmi_buffer *out);
 
 int wmidev_set_block(struct wmi_device *wdev, u8 instance, const struct wmi_buffer *in);
-- 
cgit v1.2.3


From 96b1b053e10d89f666a37b52be25ed4294e342be Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Mon, 6 Apr 2026 22:32:35 +0200
Subject: platform/wmi: Extend wmidev_invoke_method() to reject undersized data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

WMI drivers using the buffer-based WMI API are expected to reject
undersized method return values. Extend wmidev_invoke_method() to
enable the WMI driver core to perform this size check internally.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20260406203237.2970-5-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 drivers/platform/wmi/core.c             | 23 ++++++++++-------------
 drivers/platform/x86/bitland-mifs-wmi.c | 11 +++--------
 include/linux/wmi.h                     |  2 +-
 3 files changed, 14 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/wmi/core.c b/drivers/platform/wmi/core.c
index 66ec885bffd7..a1a612f33233 100644
--- a/drivers/platform/wmi/core.c
+++ b/drivers/platform/wmi/core.c
@@ -364,20 +364,23 @@ acpi_status wmidev_evaluate_method(struct wmi_device *wdev, u8 instance, u32 met
 EXPORT_SYMBOL_GPL(wmidev_evaluate_method);
 
 /**
- * wmidev_invoke_method - Invoke a WMI method
+ * wmidev_invoke_method - Invoke a WMI method that returns values
  * @wdev: A wmi bus device from a driver
  * @instance: Instance index
  * @method_id: Method ID to call
  * @in: Mandatory WMI buffer containing input for the method call
- * @out: Optional WMI buffer to return the method results
+ * @out: Mandatory WMI buffer to return the method results
+ * @min_size: Minimum size of the method result data in bytes
  *
- * Invoke a WMI method, the caller must free the resulting data inside @out.
- * Said data is guaranteed to be aligned on a 8-byte boundary.
+ * Invoke a WMI method that returns values, the caller must free the resulting
+ * data inside @out using kfree(). Said data is guaranteed to be aligned on a
+ * 8-byte boundary. Use wmidev_invoke_procedure() for WMI methods that
+ * return no values.
  *
  * Return: 0 on success or negative error code on failure.
  */
 int wmidev_invoke_method(struct wmi_device *wdev, u8 instance, u32 method_id,
-			 const struct wmi_buffer *in, struct wmi_buffer *out)
+			 const struct wmi_buffer *in, struct wmi_buffer *out, size_t min_size)
 {
 	struct wmi_block *wblock = container_of(wdev, struct wmi_block, dev);
 	struct acpi_buffer aout = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -398,10 +401,7 @@ int wmidev_invoke_method(struct wmi_device *wdev, u8 instance, u32 method_id,
 		ain.pointer = in->data;
 	}
 
-	if (out)
-		status = wmidev_evaluate_method(wdev, instance, method_id, &ain, &aout);
-	else
-		status = wmidev_evaluate_method(wdev, instance, method_id, &ain, NULL);
+	status = wmidev_evaluate_method(wdev, instance, method_id, &ain, &aout);
 
 	if (wblock->gblock.flags & ACPI_WMI_STRING)
 		kfree(ain.pointer);
@@ -409,9 +409,6 @@ int wmidev_invoke_method(struct wmi_device *wdev, u8 instance, u32 method_id,
 	if (ACPI_FAILURE(status))
 		return -EIO;
 
-	if (!out)
-		return 0;
-
 	obj = aout.pointer;
 	if (!obj) {
 		out->length = 0;
@@ -420,7 +417,7 @@ int wmidev_invoke_method(struct wmi_device *wdev, u8 instance, u32 method_id,
 		return 0;
 	}
 
-	ret = wmi_unmarshal_acpi_object(obj, out, 0);
+	ret = wmi_unmarshal_acpi_object(obj, out, min_size);
 	kfree(obj);
 
 	return ret;
diff --git a/drivers/platform/x86/bitland-mifs-wmi.c b/drivers/platform/x86/bitland-mifs-wmi.c
index cd3cdd087511..78639407d67e 100644
--- a/drivers/platform/x86/bitland-mifs-wmi.c
+++ b/drivers/platform/x86/bitland-mifs-wmi.c
@@ -10,7 +10,6 @@
 #include <linux/acpi.h>
 #include <linux/array_size.h>
 #include <linux/bits.h>
-#include <linux/cleanup.h>
 #include <linux/container_of.h>
 #include <linux/dev_printk.h>
 #include <linux/device.h>
@@ -167,7 +166,6 @@ static int bitland_mifs_wmi_call(struct bitland_mifs_wmi_data *data,
 				 struct bitland_mifs_output *output)
 {
 	struct wmi_buffer in_buf = { .length = sizeof(*input), .data = (void *)input };
-	void *out_data __free(kfree) = NULL;
 	struct wmi_buffer out_buf = { 0 };
 	int ret;
 
@@ -176,15 +174,12 @@ static int bitland_mifs_wmi_call(struct bitland_mifs_wmi_data *data,
 	if (!output)
 		return wmidev_invoke_procedure(data->wdev, 0, 1, &in_buf);
 
-	ret = wmidev_invoke_method(data->wdev, 0, 1, &in_buf, &out_buf);
+	ret = wmidev_invoke_method(data->wdev, 0, 1, &in_buf, &out_buf, sizeof(*output));
 	if (ret)
 		return ret;
 
-	out_data = out_buf.data;
-	if (out_buf.length < sizeof(*output))
-		return -EIO;
-
-	memcpy(output, out_data, sizeof(*output));
+	memcpy(output, out_buf.data, sizeof(*output));
+	kfree(out_buf.data);
 
 	return 0;
 }
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index b00950dc1231..858398beb01a 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -68,7 +68,7 @@ ssize_t wmi_string_from_utf8s(struct wmi_string *str, size_t max_chars, const u8
 			      size_t src_length);
 
 int wmidev_invoke_method(struct wmi_device *wdev, u8 instance, u32 method_id,
-			 const struct wmi_buffer *in, struct wmi_buffer *out);
+			 const struct wmi_buffer *in, struct wmi_buffer *out, size_t min_size);
 
 int wmidev_invoke_procedure(struct wmi_device *wdev, u8 instance, u32 method_id,
 			    const struct wmi_buffer *in);
-- 
cgit v1.2.3


From 1aeded2f55f04fafb07b01e12142fd20c2a3d288 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Mon, 6 Apr 2026 22:32:36 +0200
Subject: platform/wmi: Extend wmidev_query_block() to reject undersized data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

WMI drivers using the buffer-based WMI API are expected to reject
undersized query results. Extend wmidev_query_block() to enable
the WMI driver core to perform this size check internally.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20260406203237.2970-6-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 drivers/platform/wmi/core.c                    | 10 ++++++----
 drivers/platform/x86/intel/wmi/sbl-fw-update.c |  7 +------
 drivers/platform/x86/wmi-bmof.c                |  2 +-
 include/linux/wmi.h                            |  3 ++-
 4 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/wmi/core.c b/drivers/platform/wmi/core.c
index a1a612f33233..87b0e54dde5a 100644
--- a/drivers/platform/wmi/core.c
+++ b/drivers/platform/wmi/core.c
@@ -565,13 +565,15 @@ EXPORT_SYMBOL_GPL(wmidev_block_query);
  * @wdev: A wmi bus device from a driver
  * @instance: Instance index
  * @out: WMI buffer to fill
+ * @min_size: Minimum size of the result data in bytes
  *
- * Query a WMI data block, the caller must free the resulting data inside @out.
- * Said data is guaranteed to be aligned on a 8-byte boundary.
+ * Query a WMI data block, the caller must free the resulting data inside @out
+ * using kfree(). Said data is guaranteed to be aligned on a 8-byte boundary.
  *
  * Return: 0 on success or a negative error code on failure.
  */
-int wmidev_query_block(struct wmi_device *wdev, u8 instance, struct wmi_buffer *out)
+int wmidev_query_block(struct wmi_device *wdev, u8 instance, struct wmi_buffer *out,
+		       size_t min_size)
 {
 	union acpi_object *obj;
 	int ret;
@@ -580,7 +582,7 @@ int wmidev_query_block(struct wmi_device *wdev, u8 instance, struct wmi_buffer *
 	if (!obj)
 		return -EIO;
 
-	ret = wmi_unmarshal_acpi_object(obj, out, 0);
+	ret = wmi_unmarshal_acpi_object(obj, out, min_size);
 	kfree(obj);
 
 	return ret;
diff --git a/drivers/platform/x86/intel/wmi/sbl-fw-update.c b/drivers/platform/x86/intel/wmi/sbl-fw-update.c
index 3716ccaaed6a..62c9c7f1842b 100644
--- a/drivers/platform/x86/intel/wmi/sbl-fw-update.c
+++ b/drivers/platform/x86/intel/wmi/sbl-fw-update.c
@@ -28,15 +28,10 @@ static int get_fwu_request(struct device *dev, u32 *out)
 	__le32 *result;
 	int ret;
 
-	ret = wmidev_query_block(to_wmi_device(dev), 0, &buffer);
+	ret = wmidev_query_block(to_wmi_device(dev), 0, &buffer, sizeof(*result));
 	if (ret < 0)
 		return ret;
 
-	if (buffer.length < sizeof(*result)) {
-		kfree(buffer.data);
-		return -ENODATA;
-	}
-
 	result = buffer.data;
 	*out = le32_to_cpu(*result);
 	kfree(result);
diff --git a/drivers/platform/x86/wmi-bmof.c b/drivers/platform/x86/wmi-bmof.c
index e3a126de421b..6623cf60e4b4 100644
--- a/drivers/platform/x86/wmi-bmof.c
+++ b/drivers/platform/x86/wmi-bmof.c
@@ -62,7 +62,7 @@ static int wmi_bmof_probe(struct wmi_device *wdev, const void *context)
 	if (!buffer)
 		return -ENOMEM;
 
-	ret = wmidev_query_block(wdev, 0, buffer);
+	ret = wmidev_query_block(wdev, 0, buffer, 0);
 	if (ret < 0)
 		return ret;
 
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index 858398beb01a..da94580572a9 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -73,7 +73,8 @@ int wmidev_invoke_method(struct wmi_device *wdev, u8 instance, u32 method_id,
 int wmidev_invoke_procedure(struct wmi_device *wdev, u8 instance, u32 method_id,
 			    const struct wmi_buffer *in);
 
-int wmidev_query_block(struct wmi_device *wdev, u8 instance, struct wmi_buffer *out);
+int wmidev_query_block(struct wmi_device *wdev, u8 instance, struct wmi_buffer *out,
+		       size_t min_size);
 
 int wmidev_set_block(struct wmi_device *wdev, u8 instance, const struct wmi_buffer *in);
 
-- 
cgit v1.2.3


From 2e2a39149fe37327e0af225f09cad19526a90d48 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Mon, 6 Apr 2026 22:32:37 +0200
Subject: platform/wmi: Replace .no_notify_data with .min_event_size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

WMI drivers using the buffer-based WMI API are expected to reject
undersized event payloads. Extend the WMI driver core to allow
such drivers to specify their minimum supported event payload size.
Also remove the now redundant .no_notify_data field.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20260406203237.2970-7-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 Documentation/wmi/driver-development-guide.rst |  8 +++++---
 drivers/platform/wmi/core.c                    | 12 ++++++++----
 drivers/platform/x86/bitland-mifs-wmi.c        |  8 ++------
 drivers/platform/x86/dell/dell-wmi-base.c      |  1 +
 drivers/platform/x86/lenovo/ideapad-laptop.c   |  1 +
 drivers/platform/x86/lenovo/wmi-camera.c       |  1 +
 drivers/platform/x86/lenovo/wmi-events.c       |  1 +
 drivers/platform/x86/lenovo/ymc.c              |  1 +
 drivers/platform/x86/lenovo/yogabook.c         |  2 +-
 drivers/platform/x86/redmi-wmi.c               |  1 +
 drivers/platform/x86/uniwill/uniwill-wmi.c     |  1 +
 drivers/platform/x86/xiaomi-wmi.c              |  1 +
 include/linux/wmi.h                            |  7 +++++--
 13 files changed, 29 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/wmi/driver-development-guide.rst b/Documentation/wmi/driver-development-guide.rst
index 5b94402874c4..387f508d57ad 100644
--- a/Documentation/wmi/driver-development-guide.rst
+++ b/Documentation/wmi/driver-development-guide.rst
@@ -71,7 +71,7 @@ to matching WMI devices using a struct wmi_device_id table:
         .remove = foo_remove,         /* optional, devres is preferred */
         .shutdown = foo_shutdown,     /* optional, called during shutdown */
         .notify_new = foo_notify,     /* optional, for event handling */
-        .no_notify_data = true,       /* optional, enables events containing no additional data */
+        .min_event_size = X,          /* optional, simplifies event payload size verification */
         .no_singleton = true,         /* required for new WMI drivers */
   };
   module_wmi_driver(foo_driver);
@@ -142,8 +142,10 @@ right before and after calling its remove() or shutdown() callback.
 However WMI driver developers should be aware that multiple WMI events can be received concurrently,
 so any locking (if necessary) needs to be provided by the WMI driver itself.
 
-In order to be able to receive WMI events containing no additional event data,
-the ``no_notify_data`` flag inside struct wmi_driver should be set to ``true``.
+The WMI driver can furthermore instruct the WMI driver core to automatically reject WMI events
+that contain a undersized event payload by populating the ``min_event_size`` field inside
+struct wmi_driver. Setting this field to 0 will thus enable the WMI driver to receive WMI events
+without any event payload.
 
 Take a look at drivers/platform/x86/xiaomi-wmi.c for an example WMI event driver.
 
diff --git a/drivers/platform/wmi/core.c b/drivers/platform/wmi/core.c
index 87b0e54dde5a..7df50c8238e5 100644
--- a/drivers/platform/wmi/core.c
+++ b/drivers/platform/wmi/core.c
@@ -1040,7 +1040,7 @@ static int wmi_dev_probe(struct device *dev)
 	}
 
 	if (wdriver->notify || wdriver->notify_new) {
-		if (test_bit(WMI_NO_EVENT_DATA, &wblock->flags) && !wdriver->no_notify_data)
+		if (test_bit(WMI_NO_EVENT_DATA, &wblock->flags) && wdriver->min_event_size)
 			return -ENODEV;
 	}
 
@@ -1398,10 +1398,14 @@ static int wmi_get_notify_data(struct wmi_block *wblock, union acpi_object **obj
 static void wmi_notify_driver(struct wmi_block *wblock, union acpi_object *obj)
 {
 	struct wmi_driver *driver = to_wmi_driver(wblock->dev.dev.driver);
+	struct wmi_buffer dummy = {
+		.length = 0,
+		.data = ZERO_SIZE_PTR,
+	};
 	struct wmi_buffer buffer;
 	int ret;
 
-	if (!obj && !driver->no_notify_data) {
+	if (!obj && driver->min_event_size) {
 		dev_warn(&wblock->dev.dev, "Event contains no event data\n");
 		return;
 	}
@@ -1411,11 +1415,11 @@ static void wmi_notify_driver(struct wmi_block *wblock, union acpi_object *obj)
 
 	if (driver->notify_new) {
 		if (!obj) {
-			driver->notify_new(&wblock->dev, NULL);
+			driver->notify_new(&wblock->dev, &dummy);
 			return;
 		}
 
-		ret = wmi_unmarshal_acpi_object(obj, &buffer, 0);
+		ret = wmi_unmarshal_acpi_object(obj, &buffer, driver->min_event_size);
 		if (ret < 0) {
 			dev_warn(&wblock->dev.dev, "Failed to unmarshal event data: %d\n", ret);
 			return;
diff --git a/drivers/platform/x86/bitland-mifs-wmi.c b/drivers/platform/x86/bitland-mifs-wmi.c
index 78639407d67e..b0d06a80e89e 100644
--- a/drivers/platform/x86/bitland-mifs-wmi.c
+++ b/drivers/platform/x86/bitland-mifs-wmi.c
@@ -734,15 +734,10 @@ static void bitland_mifs_wmi_notify(struct wmi_device *wdev,
 				    const struct wmi_buffer *buffer)
 {
 	struct bitland_mifs_wmi_data *data = dev_get_drvdata(&wdev->dev);
-	const struct bitland_mifs_event *event;
+	const struct bitland_mifs_event *event = buffer->data;
 	struct bitland_fan_notify_data fan_data;
 	u8 brightness;
 
-	if (buffer->length < sizeof(*event))
-		return;
-
-	event = buffer->data;
-
 	/* Validate event type */
 	if (event->event_type != WMI_EVENT_TYPE_HOTKEY)
 		return;
@@ -830,6 +825,7 @@ static struct wmi_driver bitland_mifs_wmi_driver = {
 		.pm = pm_sleep_ptr(&bitland_mifs_wmi_pm_ops),
 	},
 	.id_table = bitland_mifs_wmi_id_table,
+	.min_event_size = sizeof(struct bitland_mifs_event),
 	.probe = bitland_mifs_wmi_probe,
 	.notify_new = bitland_mifs_wmi_notify,
 };
diff --git a/drivers/platform/x86/dell/dell-wmi-base.c b/drivers/platform/x86/dell/dell-wmi-base.c
index e7a411ae9ca1..2a5804efd3ea 100644
--- a/drivers/platform/x86/dell/dell-wmi-base.c
+++ b/drivers/platform/x86/dell/dell-wmi-base.c
@@ -825,6 +825,7 @@ static struct wmi_driver dell_wmi_driver = {
 		.name = "dell-wmi",
 	},
 	.id_table = dell_wmi_id_table,
+	.min_event_size = sizeof(u16),
 	.probe = dell_wmi_probe,
 	.remove = dell_wmi_remove,
 	.notify = dell_wmi_notify,
diff --git a/drivers/platform/x86/lenovo/ideapad-laptop.c b/drivers/platform/x86/lenovo/ideapad-laptop.c
index ae1ebb071fab..4fbc904f1fc3 100644
--- a/drivers/platform/x86/lenovo/ideapad-laptop.c
+++ b/drivers/platform/x86/lenovo/ideapad-laptop.c
@@ -2340,6 +2340,7 @@ static struct wmi_driver ideapad_wmi_driver = {
 		.name = "ideapad_wmi",
 	},
 	.id_table = ideapad_wmi_ids,
+	.min_event_size = sizeof(u32),
 	.probe = ideapad_wmi_probe,
 	.notify = ideapad_wmi_notify,
 };
diff --git a/drivers/platform/x86/lenovo/wmi-camera.c b/drivers/platform/x86/lenovo/wmi-camera.c
index eb60fb9a5b3f..89ecbce60bf4 100644
--- a/drivers/platform/x86/lenovo/wmi-camera.c
+++ b/drivers/platform/x86/lenovo/wmi-camera.c
@@ -134,6 +134,7 @@ static struct wmi_driver lenovo_wmi_driver = {
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
 	},
 	.id_table = lenovo_wmi_id_table,
+	.min_event_size = sizeof(u8),
 	.no_singleton = true,
 	.probe = lenovo_wmi_probe,
 	.notify = lenovo_wmi_notify,
diff --git a/drivers/platform/x86/lenovo/wmi-events.c b/drivers/platform/x86/lenovo/wmi-events.c
index 0994cd7dd504..4a6a2c82413a 100644
--- a/drivers/platform/x86/lenovo/wmi-events.c
+++ b/drivers/platform/x86/lenovo/wmi-events.c
@@ -183,6 +183,7 @@ static struct wmi_driver lwmi_events_driver = {
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
 	},
 	.id_table = lwmi_events_id_table,
+	.min_event_size = sizeof(u32),
 	.probe = lwmi_events_probe,
 	.notify = lwmi_events_notify,
 	.no_singleton = true,
diff --git a/drivers/platform/x86/lenovo/ymc.c b/drivers/platform/x86/lenovo/ymc.c
index 470d53e3c9d2..1b73a55f1b89 100644
--- a/drivers/platform/x86/lenovo/ymc.c
+++ b/drivers/platform/x86/lenovo/ymc.c
@@ -153,6 +153,7 @@ static struct wmi_driver lenovo_ymc_driver = {
 		.name = "lenovo-ymc",
 	},
 	.id_table = lenovo_ymc_wmi_id_table,
+	.min_event_size = sizeof(u32),
 	.probe = lenovo_ymc_probe,
 	.notify = lenovo_ymc_notify,
 };
diff --git a/drivers/platform/x86/lenovo/yogabook.c b/drivers/platform/x86/lenovo/yogabook.c
index 69887de36c9b..1a4b2ab1f35d 100644
--- a/drivers/platform/x86/lenovo/yogabook.c
+++ b/drivers/platform/x86/lenovo/yogabook.c
@@ -411,8 +411,8 @@ static struct wmi_driver yogabook_wmi_driver = {
 		.name = "yogabook-wmi",
 		.pm = pm_sleep_ptr(&yogabook_pm_ops),
 	},
-	.no_notify_data = true,
 	.id_table = yogabook_wmi_id_table,
+	.min_event_size = 0,
 	.probe = yogabook_wmi_probe,
 	.remove = yogabook_wmi_remove,
 	.notify = yogabook_wmi_notify,
diff --git a/drivers/platform/x86/redmi-wmi.c b/drivers/platform/x86/redmi-wmi.c
index e5cb348e3a39..58898630eda6 100644
--- a/drivers/platform/x86/redmi-wmi.c
+++ b/drivers/platform/x86/redmi-wmi.c
@@ -141,6 +141,7 @@ static struct wmi_driver redmi_wmi_driver = {
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
 	},
 	.id_table = redmi_wmi_id_table,
+	.min_event_size = 32,
 	.probe = redmi_wmi_probe,
 	.notify = redmi_wmi_notify,
 	.no_singleton = true,
diff --git a/drivers/platform/x86/uniwill/uniwill-wmi.c b/drivers/platform/x86/uniwill/uniwill-wmi.c
index 31d9c39f14ab..097882f10b1e 100644
--- a/drivers/platform/x86/uniwill/uniwill-wmi.c
+++ b/drivers/platform/x86/uniwill/uniwill-wmi.c
@@ -77,6 +77,7 @@ static struct wmi_driver uniwill_wmi_driver = {
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
 	},
 	.id_table = uniwill_wmi_id_table,
+	.min_event_size = sizeof(u32),
 	.notify = uniwill_wmi_notify,
 	.no_singleton = true,
 };
diff --git a/drivers/platform/x86/xiaomi-wmi.c b/drivers/platform/x86/xiaomi-wmi.c
index badf9e42e015..3874f3336a0d 100644
--- a/drivers/platform/x86/xiaomi-wmi.c
+++ b/drivers/platform/x86/xiaomi-wmi.c
@@ -83,6 +83,7 @@ static struct wmi_driver xiaomi_wmi_driver = {
 		.name = "xiaomi-wmi",
 	},
 	.id_table = xiaomi_wmi_id_table,
+	.min_event_size = 0,
 	.probe = xiaomi_wmi_probe,
 	.notify_new = xiaomi_wmi_notify,
 	.no_singleton = true,
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index da94580572a9..2d242575a8b3 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -91,7 +91,7 @@ u8 wmidev_instance_count(struct wmi_device *wdev);
  * struct wmi_driver - WMI driver structure
  * @driver: Driver model structure
  * @id_table: List of WMI GUIDs supported by this driver
- * @no_notify_data: Driver supports WMI events which provide no event data
+ * @min_event_size: Minimum event payload size supported by this driver
  * @no_singleton: Driver can be instantiated multiple times
  * @probe: Callback for device binding
  * @remove: Callback for device unbinding
@@ -101,11 +101,14 @@ u8 wmidev_instance_count(struct wmi_device *wdev);
  *
  * This represents WMI drivers which handle WMI devices. The data inside the buffer
  * passed to the @notify_new callback is guaranteed to be aligned on a 8-byte boundary.
+ * The minimum supported size for said buffer can be specified using @min_event_size.
+ * WMI drivers that still use the deprecated @notify callback can still set @min_event_size
+ * to 0 in order to signal that they support WMI events which provide no event data.
  */
 struct wmi_driver {
 	struct device_driver driver;
 	const struct wmi_device_id *id_table;
-	bool no_notify_data;
+	size_t min_event_size;
 	bool no_singleton;
 
 	int (*probe)(struct wmi_device *wdev, const void *context);
-- 
cgit v1.2.3


From 3faf0ce6e499dfd32e596bcb5bca2c44d64f4cc1 Mon Sep 17 00:00:00 2001
From: Marc Harvey <marcharvey@google.com>
Date: Thu, 9 Apr 2026 02:59:23 +0000
Subject: net: team: Annotate reads and writes for mixed lock accessed values

The team_port's "index" and the team's "en_port_count" are read in
the hot transmit path, but are only written to when holding the rtnl
lock.

Use READ_ONCE() for all lockless reads of these values, and use
WRITE_ONCE() for all writes.

Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Marc Harvey <marcharvey@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-1-f47e7589685d@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/team/team_core.c        | 11 ++++++-----
 drivers/net/team/team_mode_random.c |  2 +-
 include/linux/if_team.h             |  4 ++--
 3 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index 566a5d102c23..becd066279a6 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -938,7 +938,8 @@ static void team_port_enable(struct team *team,
 {
 	if (team_port_enabled(port))
 		return;
-	port->index = team->en_port_count++;
+	WRITE_ONCE(port->index, team->en_port_count);
+	WRITE_ONCE(team->en_port_count, team->en_port_count + 1);
 	hlist_add_head_rcu(&port->hlist,
 			   team_port_index_hash(team, port->index));
 	team_adjust_ops(team);
@@ -958,7 +959,7 @@ static void __reconstruct_port_hlist(struct team *team, int rm_index)
 	for (i = rm_index + 1; i < team->en_port_count; i++) {
 		port = team_get_port_by_index(team, i);
 		hlist_del_rcu(&port->hlist);
-		port->index--;
+		WRITE_ONCE(port->index, port->index - 1);
 		hlist_add_head_rcu(&port->hlist,
 				   team_port_index_hash(team, port->index));
 	}
@@ -973,8 +974,8 @@ static void team_port_disable(struct team *team,
 		team->ops.port_disabled(team, port);
 	hlist_del_rcu(&port->hlist);
 	__reconstruct_port_hlist(team, port->index);
-	port->index = -1;
-	team->en_port_count--;
+	WRITE_ONCE(port->index, -1);
+	WRITE_ONCE(team->en_port_count, team->en_port_count - 1);
 	team_queue_override_port_del(team, port);
 	team_adjust_ops(team);
 	team_lower_state_changed(port);
@@ -1245,7 +1246,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
 		netif_addr_unlock_bh(dev);
 	}
 
-	port->index = -1;
+	WRITE_ONCE(port->index, -1);
 	list_add_tail_rcu(&port->list, &team->port_list);
 	team_port_enable(team, port);
 	netdev_compute_master_upper_features(dev, true);
diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c
index 53d0ce34b8ce..169a7bc865b2 100644
--- a/drivers/net/team/team_mode_random.c
+++ b/drivers/net/team/team_mode_random.c
@@ -16,7 +16,7 @@ static bool rnd_transmit(struct team *team, struct sk_buff *skb)
 	struct team_port *port;
 	int port_index;
 
-	port_index = get_random_u32_below(team->en_port_count);
+	port_index = get_random_u32_below(READ_ONCE(team->en_port_count));
 	port = team_get_port_by_index_rcu(team, port_index);
 	if (unlikely(!port))
 		goto drop;
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index ccb5327de26d..06f4d7400c1e 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -77,7 +77,7 @@ static inline struct team_port *team_port_get_rcu(const struct net_device *dev)
 
 static inline bool team_port_enabled(struct team_port *port)
 {
-	return port->index != -1;
+	return READ_ONCE(port->index) != -1;
 }
 
 static inline bool team_port_txable(struct team_port *port)
@@ -272,7 +272,7 @@ static inline struct team_port *team_get_port_by_index_rcu(struct team *team,
 	struct hlist_head *head = team_port_index_hash(team, port_index);
 
 	hlist_for_each_entry_rcu(port, head, hlist)
-		if (port->index == port_index)
+		if (READ_ONCE(port->index) == port_index)
 			return port;
 	return NULL;
 }
-- 
cgit v1.2.3


From 014f249121d73909528df320818fba7693d0ec92 Mon Sep 17 00:00:00 2001
From: Marc Harvey <marcharvey@google.com>
Date: Thu, 9 Apr 2026 02:59:24 +0000
Subject: net: team: Remove unused team_mode_op, port_enabled

This team_mode_op wasn't used by any of the team modes, so remove it.

Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Marc Harvey <marcharvey@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-2-f47e7589685d@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/team/team_core.c | 2 --
 include/linux/if_team.h      | 1 -
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index becd066279a6..e54bd21bd068 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -944,8 +944,6 @@ static void team_port_enable(struct team *team,
 			   team_port_index_hash(team, port->index));
 	team_adjust_ops(team);
 	team_queue_override_port_add(team, port);
-	if (team->ops.port_enabled)
-		team->ops.port_enabled(team, port);
 	team_notify_peers(team);
 	team_mcast_rejoin(team);
 	team_lower_state_changed(port);
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 06f4d7400c1e..a761f5282bcf 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -121,7 +121,6 @@ struct team_mode_ops {
 	int (*port_enter)(struct team *team, struct team_port *port);
 	void (*port_leave)(struct team *team, struct team_port *port);
 	void (*port_change_dev_addr)(struct team *team, struct team_port *port);
-	void (*port_enabled)(struct team *team, struct team_port *port);
 	void (*port_disabled)(struct team *team, struct team_port *port);
 };
 
-- 
cgit v1.2.3


From cfa477df2cc62ba53cb936669886361152b594a7 Mon Sep 17 00:00:00 2001
From: Marc Harvey <marcharvey@google.com>
Date: Thu, 9 Apr 2026 02:59:25 +0000
Subject: net: team: Rename port_disabled team mode op to port_tx_disabled

This team mode op is only used by the load balance mode, and it only
uses it in the tx path.

Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Marc Harvey <marcharvey@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-3-f47e7589685d@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/team/team_core.c             | 4 ++--
 drivers/net/team/team_mode_loadbalance.c | 4 ++--
 include/linux/if_team.h                  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index e54bd21bd068..2ce31999c99f 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -968,8 +968,8 @@ static void team_port_disable(struct team *team,
 {
 	if (!team_port_enabled(port))
 		return;
-	if (team->ops.port_disabled)
-		team->ops.port_disabled(team, port);
+	if (team->ops.port_tx_disabled)
+		team->ops.port_tx_disabled(team, port);
 	hlist_del_rcu(&port->hlist);
 	__reconstruct_port_hlist(team, port->index);
 	WRITE_ONCE(port->index, -1);
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index 684954c2a8de..840f409d250b 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -655,7 +655,7 @@ static void lb_port_leave(struct team *team, struct team_port *port)
 	free_percpu(lb_port_priv->pcpu_stats);
 }
 
-static void lb_port_disabled(struct team *team, struct team_port *port)
+static void lb_port_tx_disabled(struct team *team, struct team_port *port)
 {
 	lb_tx_hash_to_port_mapping_null_port(team, port);
 }
@@ -665,7 +665,7 @@ static const struct team_mode_ops lb_mode_ops = {
 	.exit			= lb_exit,
 	.port_enter		= lb_port_enter,
 	.port_leave		= lb_port_leave,
-	.port_disabled		= lb_port_disabled,
+	.port_tx_disabled	= lb_port_tx_disabled,
 	.receive		= lb_receive,
 	.transmit		= lb_transmit,
 };
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index a761f5282bcf..740cb3100dfc 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -121,7 +121,7 @@ struct team_mode_ops {
 	int (*port_enter)(struct team *team, struct team_port *port);
 	void (*port_leave)(struct team *team, struct team_port *port);
 	void (*port_change_dev_addr)(struct team *team, struct team_port *port);
-	void (*port_disabled)(struct team *team, struct team_port *port);
+	void (*port_tx_disabled)(struct team *team, struct team_port *port);
 };
 
 extern int team_modeop_port_enter(struct team *team, struct team_port *port);
-- 
cgit v1.2.3


From fa6ed31dd913b0f68c75ec80c3f4a324572071fc Mon Sep 17 00:00:00 2001
From: Marc Harvey <marcharvey@google.com>
Date: Thu, 9 Apr 2026 02:59:28 +0000
Subject: net: team: Rename enablement functions and struct members to tx

Add no functional changes, but rename enablement functions, variables
etc. that are used in teaming driver transmit decisions.

Since rx and tx enablement are still coupled, some of the variables
renamed in this patch are still used for the rx path, but that will
change in a follow-up patch.

Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Marc Harvey <marcharvey@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-6-f47e7589685d@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/team/team_core.c             | 44 +++++++++++++++---------------
 drivers/net/team/team_mode_loadbalance.c |  2 +-
 drivers/net/team/team_mode_random.c      |  4 +--
 drivers/net/team/team_mode_roundrobin.c  |  2 +-
 include/linux/if_team.h                  | 46 +++++++++++++++++---------------
 5 files changed, 51 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index 2ce31999c99f..826769473878 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -532,13 +532,13 @@ static void team_adjust_ops(struct team *team)
 	 * correct ops are always set.
 	 */
 
-	if (!team->en_port_count || !team_is_mode_set(team) ||
+	if (!team->tx_en_port_count || !team_is_mode_set(team) ||
 	    !team->mode->ops->transmit)
 		team->ops.transmit = team_dummy_transmit;
 	else
 		team->ops.transmit = team->mode->ops->transmit;
 
-	if (!team->en_port_count || !team_is_mode_set(team) ||
+	if (!team->tx_en_port_count || !team_is_mode_set(team) ||
 	    !team->mode->ops->receive)
 		team->ops.receive = team_dummy_receive;
 	else
@@ -831,7 +831,7 @@ static bool team_queue_override_port_has_gt_prio_than(struct team_port *port,
 		return true;
 	if (port->priority > cur->priority)
 		return false;
-	if (port->index < cur->index)
+	if (port->tx_index < cur->tx_index)
 		return true;
 	return false;
 }
@@ -929,7 +929,7 @@ static bool team_port_find(const struct team *team,
 
 /*
  * Enable/disable port by adding to enabled port hashlist and setting
- * port->index (Might be racy so reader could see incorrect ifindex when
+ * port->tx_index (Might be racy so reader could see incorrect ifindex when
  * processing a flying packet, but that is not a problem). Write guarded
  * by RTNL.
  */
@@ -938,10 +938,10 @@ static void team_port_enable(struct team *team,
 {
 	if (team_port_enabled(port))
 		return;
-	WRITE_ONCE(port->index, team->en_port_count);
-	WRITE_ONCE(team->en_port_count, team->en_port_count + 1);
-	hlist_add_head_rcu(&port->hlist,
-			   team_port_index_hash(team, port->index));
+	WRITE_ONCE(port->tx_index, team->tx_en_port_count);
+	WRITE_ONCE(team->tx_en_port_count, team->tx_en_port_count + 1);
+	hlist_add_head_rcu(&port->tx_hlist,
+			   team_tx_port_index_hash(team, port->tx_index));
 	team_adjust_ops(team);
 	team_queue_override_port_add(team, port);
 	team_notify_peers(team);
@@ -951,15 +951,17 @@ static void team_port_enable(struct team *team,
 
 static void __reconstruct_port_hlist(struct team *team, int rm_index)
 {
-	int i;
+	struct hlist_head *tx_port_index_hash;
 	struct team_port *port;
+	int i;
 
-	for (i = rm_index + 1; i < team->en_port_count; i++) {
-		port = team_get_port_by_index(team, i);
-		hlist_del_rcu(&port->hlist);
-		WRITE_ONCE(port->index, port->index - 1);
-		hlist_add_head_rcu(&port->hlist,
-				   team_port_index_hash(team, port->index));
+	for (i = rm_index + 1; i < team->tx_en_port_count; i++) {
+		port = team_get_port_by_tx_index(team, i);
+		hlist_del_rcu(&port->tx_hlist);
+		WRITE_ONCE(port->tx_index, port->tx_index - 1);
+		tx_port_index_hash = team_tx_port_index_hash(team,
+							     port->tx_index);
+		hlist_add_head_rcu(&port->tx_hlist, tx_port_index_hash);
 	}
 }
 
@@ -970,10 +972,10 @@ static void team_port_disable(struct team *team,
 		return;
 	if (team->ops.port_tx_disabled)
 		team->ops.port_tx_disabled(team, port);
-	hlist_del_rcu(&port->hlist);
-	__reconstruct_port_hlist(team, port->index);
-	WRITE_ONCE(port->index, -1);
-	WRITE_ONCE(team->en_port_count, team->en_port_count - 1);
+	hlist_del_rcu(&port->tx_hlist);
+	__reconstruct_port_hlist(team, port->tx_index);
+	WRITE_ONCE(port->tx_index, -1);
+	WRITE_ONCE(team->tx_en_port_count, team->tx_en_port_count - 1);
 	team_queue_override_port_del(team, port);
 	team_adjust_ops(team);
 	team_lower_state_changed(port);
@@ -1244,7 +1246,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
 		netif_addr_unlock_bh(dev);
 	}
 
-	WRITE_ONCE(port->index, -1);
+	WRITE_ONCE(port->tx_index, -1);
 	list_add_tail_rcu(&port->list, &team->port_list);
 	team_port_enable(team, port);
 	netdev_compute_master_upper_features(dev, true);
@@ -1595,7 +1597,7 @@ static int team_init(struct net_device *dev)
 		return -ENOMEM;
 
 	for (i = 0; i < TEAM_PORT_HASHENTRIES; i++)
-		INIT_HLIST_HEAD(&team->en_port_hlist[i]);
+		INIT_HLIST_HEAD(&team->tx_en_port_hlist[i]);
 	INIT_LIST_HEAD(&team->port_list);
 	err = team_queue_override_init(team);
 	if (err)
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index 840f409d250b..4833fbfe241e 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -120,7 +120,7 @@ static struct team_port *lb_hash_select_tx_port(struct team *team,
 {
 	int port_index = team_num_to_port_index(team, hash);
 
-	return team_get_port_by_index_rcu(team, port_index);
+	return team_get_port_by_tx_index_rcu(team, port_index);
 }
 
 /* Hash to port mapping select tx port */
diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c
index 169a7bc865b2..370e974f3dca 100644
--- a/drivers/net/team/team_mode_random.c
+++ b/drivers/net/team/team_mode_random.c
@@ -16,8 +16,8 @@ static bool rnd_transmit(struct team *team, struct sk_buff *skb)
 	struct team_port *port;
 	int port_index;
 
-	port_index = get_random_u32_below(READ_ONCE(team->en_port_count));
-	port = team_get_port_by_index_rcu(team, port_index);
+	port_index = get_random_u32_below(READ_ONCE(team->tx_en_port_count));
+	port = team_get_port_by_tx_index_rcu(team, port_index);
 	if (unlikely(!port))
 		goto drop;
 	port = team_get_first_port_txable_rcu(team, port);
diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c
index dd405d82c6ac..ecbeef28c221 100644
--- a/drivers/net/team/team_mode_roundrobin.c
+++ b/drivers/net/team/team_mode_roundrobin.c
@@ -27,7 +27,7 @@ static bool rr_transmit(struct team *team, struct sk_buff *skb)
 
 	port_index = team_num_to_port_index(team,
 					    rr_priv(team)->sent_packets++);
-	port = team_get_port_by_index_rcu(team, port_index);
+	port = team_get_port_by_tx_index_rcu(team, port_index);
 	if (unlikely(!port))
 		goto drop;
 	port = team_get_first_port_txable_rcu(team, port);
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 740cb3100dfc..c777170ef552 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -27,10 +27,10 @@ struct team;
 
 struct team_port {
 	struct net_device *dev;
-	struct hlist_node hlist; /* node in enabled ports hash list */
+	struct hlist_node tx_hlist; /* node in tx-enabled ports hash list */
 	struct list_head list; /* node in ordinary list */
 	struct team *team;
-	int index; /* index of enabled port. If disabled, it's set to -1 */
+	int tx_index; /* index of tx enabled port. If disabled, -1 */
 
 	bool linkup; /* either state.linkup or user.linkup */
 
@@ -77,7 +77,7 @@ static inline struct team_port *team_port_get_rcu(const struct net_device *dev)
 
 static inline bool team_port_enabled(struct team_port *port)
 {
-	return READ_ONCE(port->index) != -1;
+	return READ_ONCE(port->tx_index) != -1;
 }
 
 static inline bool team_port_txable(struct team_port *port)
@@ -190,10 +190,10 @@ struct team {
 	const struct header_ops *header_ops_cache;
 
 	/*
-	 * List of enabled ports and their count
+	 * List of tx-enabled ports and counts of rx and tx-enabled ports.
 	 */
-	int en_port_count;
-	struct hlist_head en_port_hlist[TEAM_PORT_HASHENTRIES];
+	int tx_en_port_count;
+	struct hlist_head tx_en_port_hlist[TEAM_PORT_HASHENTRIES];
 
 	struct list_head port_list; /* list of all ports */
 
@@ -237,41 +237,43 @@ static inline int team_dev_queue_xmit(struct team *team, struct team_port *port,
 	return dev_queue_xmit(skb);
 }
 
-static inline struct hlist_head *team_port_index_hash(struct team *team,
-						      int port_index)
+static inline struct hlist_head *team_tx_port_index_hash(struct team *team,
+							 int tx_port_index)
 {
-	return &team->en_port_hlist[port_index & (TEAM_PORT_HASHENTRIES - 1)];
+	unsigned int list_entry = tx_port_index & (TEAM_PORT_HASHENTRIES - 1);
+
+	return &team->tx_en_port_hlist[list_entry];
 }
 
-static inline struct team_port *team_get_port_by_index(struct team *team,
-						       int port_index)
+static inline struct team_port *team_get_port_by_tx_index(struct team *team,
+							  int tx_port_index)
 {
+	struct hlist_head *head = team_tx_port_index_hash(team, tx_port_index);
 	struct team_port *port;
-	struct hlist_head *head = team_port_index_hash(team, port_index);
 
-	hlist_for_each_entry(port, head, hlist)
-		if (port->index == port_index)
+	hlist_for_each_entry(port, head, tx_hlist)
+		if (port->tx_index == tx_port_index)
 			return port;
 	return NULL;
 }
 
 static inline int team_num_to_port_index(struct team *team, unsigned int num)
 {
-	int en_port_count = READ_ONCE(team->en_port_count);
+	int tx_en_port_count = READ_ONCE(team->tx_en_port_count);
 
-	if (unlikely(!en_port_count))
+	if (unlikely(!tx_en_port_count))
 		return 0;
-	return num % en_port_count;
+	return num % tx_en_port_count;
 }
 
-static inline struct team_port *team_get_port_by_index_rcu(struct team *team,
-							   int port_index)
+static inline struct team_port *team_get_port_by_tx_index_rcu(struct team *team,
+							      int tx_port_index)
 {
+	struct hlist_head *head = team_tx_port_index_hash(team, tx_port_index);
 	struct team_port *port;
-	struct hlist_head *head = team_port_index_hash(team, port_index);
 
-	hlist_for_each_entry_rcu(port, head, hlist)
-		if (READ_ONCE(port->index) == port_index)
+	hlist_for_each_entry_rcu(port, head, tx_hlist)
+		if (READ_ONCE(port->tx_index) == tx_port_index)
 			return port;
 	return NULL;
 }
-- 
cgit v1.2.3


From 68f0833f279ac209ec865da76568c843dd38c508 Mon Sep 17 00:00:00 2001
From: Marc Harvey <marcharvey@google.com>
Date: Thu, 9 Apr 2026 02:59:29 +0000
Subject: net: team: Track rx enablement separately from tx enablement

Separate the rx and tx enablement/disablement into different
functions so that it is easier to interact with them independently
later.

Although this patch changes receive and transmit paths, the actual
behavior of the teaming driver should remain unchanged, since there
is no option introduced yet to change rx or tx enablement
independently. Those options will be added in follow-up patches.

Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Marc Harvey <marcharvey@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-7-f47e7589685d@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/team/team_core.c             | 104 ++++++++++++++++++++++++-------
 drivers/net/team/team_mode_loadbalance.c |   2 +-
 include/linux/if_team.h                  |  16 ++++-
 3 files changed, 95 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index 826769473878..e437099a5a17 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -87,7 +87,7 @@ static void team_lower_state_changed(struct team_port *port)
 	struct netdev_lag_lower_state_info info;
 
 	info.link_up = port->linkup;
-	info.tx_enabled = team_port_enabled(port);
+	info.tx_enabled = team_port_tx_enabled(port);
 	netdev_lower_state_changed(port->dev, &info);
 }
 
@@ -538,7 +538,7 @@ static void team_adjust_ops(struct team *team)
 	else
 		team->ops.transmit = team->mode->ops->transmit;
 
-	if (!team->tx_en_port_count || !team_is_mode_set(team) ||
+	if (!team->rx_en_port_count || !team_is_mode_set(team) ||
 	    !team->mode->ops->receive)
 		team->ops.receive = team_dummy_receive;
 	else
@@ -734,7 +734,7 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb)
 
 	port = team_port_get_rcu(skb->dev);
 	team = port->team;
-	if (!team_port_enabled(port)) {
+	if (!team_port_rx_enabled(port)) {
 		if (is_link_local_ether_addr(eth_hdr(skb)->h_dest))
 			/* link-local packets are mostly useful when stack receives them
 			 * with the link they arrive on.
@@ -876,7 +876,7 @@ static void __team_queue_override_enabled_check(struct team *team)
 static void team_queue_override_port_prio_changed(struct team *team,
 						  struct team_port *port)
 {
-	if (!port->queue_id || !team_port_enabled(port))
+	if (!port->queue_id || !team_port_tx_enabled(port))
 		return;
 	__team_queue_override_port_del(team, port);
 	__team_queue_override_port_add(team, port);
@@ -887,7 +887,7 @@ static void team_queue_override_port_change_queue_id(struct team *team,
 						     struct team_port *port,
 						     u16 new_queue_id)
 {
-	if (team_port_enabled(port)) {
+	if (team_port_tx_enabled(port)) {
 		__team_queue_override_port_del(team, port);
 		port->queue_id = new_queue_id;
 		__team_queue_override_port_add(team, port);
@@ -927,26 +927,33 @@ static bool team_port_find(const struct team *team,
 	return false;
 }
 
+static void __team_port_enable_rx(struct team *team,
+				  struct team_port *port)
+{
+	team->rx_en_port_count++;
+	WRITE_ONCE(port->rx_enabled, true);
+}
+
+static void __team_port_disable_rx(struct team *team,
+				   struct team_port *port)
+{
+	team->rx_en_port_count--;
+	WRITE_ONCE(port->rx_enabled, false);
+}
+
 /*
- * Enable/disable port by adding to enabled port hashlist and setting
- * port->tx_index (Might be racy so reader could see incorrect ifindex when
- * processing a flying packet, but that is not a problem). Write guarded
- * by RTNL.
+ * Enable just TX on the port by adding to tx-enabled port hashlist and
+ * setting port->tx_index (Might be racy so reader could see incorrect
+ * ifindex when processing a flying packet, but that is not a problem).
+ * Write guarded by RTNL.
  */
-static void team_port_enable(struct team *team,
-			     struct team_port *port)
+static void __team_port_enable_tx(struct team *team,
+				  struct team_port *port)
 {
-	if (team_port_enabled(port))
-		return;
 	WRITE_ONCE(port->tx_index, team->tx_en_port_count);
 	WRITE_ONCE(team->tx_en_port_count, team->tx_en_port_count + 1);
 	hlist_add_head_rcu(&port->tx_hlist,
 			   team_tx_port_index_hash(team, port->tx_index));
-	team_adjust_ops(team);
-	team_queue_override_port_add(team, port);
-	team_notify_peers(team);
-	team_mcast_rejoin(team);
-	team_lower_state_changed(port);
 }
 
 static void __reconstruct_port_hlist(struct team *team, int rm_index)
@@ -965,20 +972,69 @@ static void __reconstruct_port_hlist(struct team *team, int rm_index)
 	}
 }
 
-static void team_port_disable(struct team *team,
-			      struct team_port *port)
+static void __team_port_disable_tx(struct team *team,
+				   struct team_port *port)
 {
-	if (!team_port_enabled(port))
-		return;
 	if (team->ops.port_tx_disabled)
 		team->ops.port_tx_disabled(team, port);
+
 	hlist_del_rcu(&port->tx_hlist);
 	__reconstruct_port_hlist(team, port->tx_index);
+
 	WRITE_ONCE(port->tx_index, -1);
 	WRITE_ONCE(team->tx_en_port_count, team->tx_en_port_count - 1);
-	team_queue_override_port_del(team, port);
+}
+
+/*
+ * Enable TX AND RX on the port.
+ */
+static void team_port_enable(struct team *team,
+			     struct team_port *port)
+{
+	bool rx_was_enabled;
+	bool tx_was_enabled;
+
+	if (team_port_enabled(port))
+		return;
+
+	rx_was_enabled = team_port_rx_enabled(port);
+	tx_was_enabled = team_port_tx_enabled(port);
+
+	if (!rx_was_enabled)
+		__team_port_enable_rx(team, port);
+	if (!tx_was_enabled)
+		__team_port_enable_tx(team, port);
+
+	team_adjust_ops(team);
+	if (!tx_was_enabled)
+		team_queue_override_port_add(team, port);
+	team_notify_peers(team);
+	if (!rx_was_enabled)
+		team_mcast_rejoin(team);
+	if (!tx_was_enabled)
+		team_lower_state_changed(port);
+}
+
+static void team_port_disable(struct team *team,
+			      struct team_port *port)
+{
+	bool rx_was_enabled = team_port_rx_enabled(port);
+	bool tx_was_enabled = team_port_tx_enabled(port);
+
+	if (!tx_was_enabled && !rx_was_enabled)
+		return;
+
+	if (tx_was_enabled) {
+		__team_port_disable_tx(team, port);
+		team_queue_override_port_del(team, port);
+	}
+	if (rx_was_enabled)
+		__team_port_disable_rx(team, port);
+
 	team_adjust_ops(team);
-	team_lower_state_changed(port);
+
+	if (tx_was_enabled)
+		team_lower_state_changed(port);
 }
 
 static int team_port_enter(struct team *team, struct team_port *port)
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index 4833fbfe241e..38a459649569 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -380,7 +380,7 @@ static int lb_tx_hash_to_port_mapping_set(struct team *team,
 
 	list_for_each_entry(port, &team->port_list, list) {
 		if (ctx->data.u32_val == port->dev->ifindex &&
-		    team_port_enabled(port)) {
+		    team_port_tx_enabled(port)) {
 			rcu_assign_pointer(LB_HTPM_PORT_BY_HASH(lb_priv, hash),
 					   port);
 			return 0;
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index c777170ef552..3d21e06fda67 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -31,6 +31,7 @@ struct team_port {
 	struct list_head list; /* node in ordinary list */
 	struct team *team;
 	int tx_index; /* index of tx enabled port. If disabled, -1 */
+	bool rx_enabled;
 
 	bool linkup; /* either state.linkup or user.linkup */
 
@@ -75,14 +76,24 @@ static inline struct team_port *team_port_get_rcu(const struct net_device *dev)
 	return rcu_dereference(dev->rx_handler_data);
 }
 
-static inline bool team_port_enabled(struct team_port *port)
+static inline bool team_port_rx_enabled(struct team_port *port)
+{
+	return READ_ONCE(port->rx_enabled);
+}
+
+static inline bool team_port_tx_enabled(struct team_port *port)
 {
 	return READ_ONCE(port->tx_index) != -1;
 }
 
+static inline bool team_port_enabled(struct team_port *port)
+{
+	return team_port_rx_enabled(port) && team_port_tx_enabled(port);
+}
+
 static inline bool team_port_txable(struct team_port *port)
 {
-	return port->linkup && team_port_enabled(port);
+	return port->linkup && team_port_tx_enabled(port);
 }
 
 static inline bool team_port_dev_txable(const struct net_device *port_dev)
@@ -193,6 +204,7 @@ struct team {
 	 * List of tx-enabled ports and counts of rx and tx-enabled ports.
 	 */
 	int tx_en_port_count;
+	int rx_en_port_count;
 	struct hlist_head tx_en_port_hlist[TEAM_PORT_HASHENTRIES];
 
 	struct list_head port_list; /* list of all ports */
-- 
cgit v1.2.3


From b025461303d87923abfaae6cc07ba8a83ddfd844 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 7 Apr 2026 17:14:38 -0700
Subject: tcp: update window_clamp when SO_RCVBUF is set

Commit under Fixes moved recomputing the window clamp to
tcp_measure_rcv_mss() (when scaling_ratio changes).
I suspect it missed the fact that we don't recompute the clamp
when rcvbuf is set. Until scaling_ratio changes we are
stuck with the old window clamp which may be based on
the small initial buffer. scaling_ratio may never change.

Inspired by Eric's recent commit d1361840f8c5 ("tcp: fix
SO_RCVLOWAT and RCVBUF autotuning") plumb the user action
thru to TCP and have it update the clamp.

A smaller fix would be to just have tcp_rcvbuf_grow()
adjust the clamp even if SOCK_RCVBUF_LOCK is set.
But IIUC this is what we were trying to get away from
in the first place.

Fixes: a2cbb1603943 ("tcp: Update window clamping condition")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Eric Dumazet <edumaze@google.com>
Link: https://patch.msgid.link/20260408001438.129165-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/net.h | 1 +
 include/net/tcp.h   | 1 +
 net/core/sock.c     | 9 +++++++++
 net/ipv4/af_inet.c  | 1 +
 net/ipv4/tcp.c      | 5 +++++
 net/ipv6/af_inet6.c | 1 +
 6 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index a8e818de95b3..ca6a7bc5c9ae 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -223,6 +223,7 @@ struct proto_ops {
 	int		(*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
 					  size_t size);
 	int		(*set_rcvlowat)(struct sock *sk, int val);
+	void		(*set_rcvbuf)(struct sock *sk, int val);
 };
 
 #define DECLARE_SOCKADDR(type, dst, src)	\
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0f09429ff4cb..dfa52ceefd23 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -516,6 +516,7 @@ void tcp_syn_ack_timeout(const struct request_sock *req);
 int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		int flags);
 int tcp_set_rcvlowat(struct sock *sk, int val);
+void tcp_set_rcvbuf(struct sock *sk, int val);
 int tcp_set_window_clamp(struct sock *sk, int val);
 
 static inline void
diff --git a/net/core/sock.c b/net/core/sock.c
index 367fd7bad4ac..b37b664b6eb9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -966,6 +966,8 @@ EXPORT_SYMBOL(sock_set_keepalive);
 
 static void __sock_set_rcvbuf(struct sock *sk, int val)
 {
+	struct socket *sock = sk->sk_socket;
+
 	/* Ensure val * 2 fits into an int, to prevent max_t() from treating it
 	 * as a negative value.
 	 */
@@ -983,6 +985,13 @@ static void __sock_set_rcvbuf(struct sock *sk, int val)
 	 * we actually used in getsockopt is the most desirable behavior.
 	 */
 	WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
+
+	if (sock) {
+		const struct proto_ops *ops = READ_ONCE(sock->ops);
+
+		if (ops->set_rcvbuf)
+			ops->set_rcvbuf(sk, sk->sk_rcvbuf);
+	}
 }
 
 void sock_set_rcvbuf(struct sock *sk, int val)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f98e46ae3e30..0e62032e76b1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1091,6 +1091,7 @@ const struct proto_ops inet_stream_ops = {
 	.compat_ioctl	   = inet_compat_ioctl,
 #endif
 	.set_rcvlowat	   = tcp_set_rcvlowat,
+	.set_rcvbuf	   = tcp_set_rcvbuf,
 };
 EXPORT_SYMBOL(inet_stream_ops);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e57eaffc007a..1a494d18c5fd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1858,6 +1858,11 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
 	return 0;
 }
 
+void tcp_set_rcvbuf(struct sock *sk, int val)
+{
+	tcp_set_window_clamp(sk, tcp_win_from_space(sk, val));
+}
+
 #ifdef CONFIG_MMU
 static const struct vm_operations_struct tcp_vm_ops = {
 };
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ee341a8254bf..0a88b376141d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -690,6 +690,7 @@ const struct proto_ops inet6_stream_ops = {
 	.compat_ioctl	   = inet6_compat_ioctl,
 #endif
 	.set_rcvlowat	   = tcp_set_rcvlowat,
+	.set_rcvbuf	   = tcp_set_rcvbuf,
 };
 EXPORT_SYMBOL_GPL(inet6_stream_ops);
 
-- 
cgit v1.2.3


From 9c332d7f63401c3ff1765c9998531b3784f3f9a4 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Tue, 24 Mar 2026 13:32:12 -0400
Subject: nfs: update inode ctime after removexattr operation

xfstest generic/728 fails with delegated timestamps. The client does a
removexattr and then a stat to test the ctime, which doesn't change. The
stat() doesn't trigger a GETATTR because of the delegated timestamps, so
it relies on the cached ctime, which is wrong.

The setxattr compound has a trailing GETATTR, which ensures that its
ctime gets updated. Follow the same strategy with removexattr.

Fixes: 3e1f02123fba ("NFSv4.2: add client side XDR handling for extended attributes")
Reported-by: Olga Kornievskaia <aglo@umich.edu>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs42proc.c      | 18 ++++++++++++++++--
 fs/nfs/nfs42xdr.c       | 10 ++++++++--
 include/linux/nfs_xdr.h |  3 +++
 3 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 7b3ca68fb4bb..7e5c1172fc11 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -1372,11 +1372,15 @@ out_put_src_lock:
 static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
+	__u32 bitmask[NFS_BITMASK_SZ];
 	struct nfs42_removexattrargs args = {
 		.fh = NFS_FH(inode),
+		.bitmask = bitmask,
 		.xattr_name = name,
 	};
-	struct nfs42_removexattrres res;
+	struct nfs42_removexattrres res = {
+		.server = server,
+	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
 		.rpc_argp = &args,
@@ -1385,12 +1389,22 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
 	int ret;
 	unsigned long timestamp = jiffies;
 
+	res.fattr = nfs_alloc_fattr();
+	if (!res.fattr)
+		return -ENOMEM;
+
+	nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
+			 inode, NFS_INO_INVALID_CHANGE);
+
 	ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
 	    &res.seq_res, 1);
 	trace_nfs4_removexattr(inode, name, ret);
-	if (!ret)
+	if (!ret) {
 		nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
+		ret = nfs_post_op_update_inode(inode, res.fattr);
+	}
 
+	kfree(res.fattr);
 	return ret;
 }
 
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 5c7452ce6e8a..ec105c62f721 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -263,11 +263,13 @@
 #define NFS4_enc_removexattr_sz		(compound_encode_hdr_maxsz + \
 					 encode_sequence_maxsz + \
 					 encode_putfh_maxsz + \
-					 encode_removexattr_maxsz)
+					 encode_removexattr_maxsz + \
+					 encode_getattr_maxsz)
 #define NFS4_dec_removexattr_sz		(compound_decode_hdr_maxsz + \
 					 decode_sequence_maxsz + \
 					 decode_putfh_maxsz + \
-					 decode_removexattr_maxsz)
+					 decode_removexattr_maxsz + \
+					 decode_getattr_maxsz)
 
 /*
  * These values specify the maximum amount of data that is not
@@ -869,6 +871,7 @@ static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
 	encode_removexattr(xdr, args->xattr_name, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -1818,6 +1821,9 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
 		goto out;
 
 	status = decode_removexattr(xdr, &res->cinfo);
+	if (status)
+		goto out;
+	status = decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ff1f12aa73d2..fcbd21b5685f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1611,12 +1611,15 @@ struct nfs42_listxattrsres {
 struct nfs42_removexattrargs {
 	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh			*fh;
+	const u32			*bitmask;
 	const char			*xattr_name;
 };
 
 struct nfs42_removexattrres {
 	struct nfs4_sequence_res	seq_res;
 	struct nfs4_change_info		cinfo;
+	struct nfs_fattr		*fattr;
+	const struct nfs_server		*server;
 };
 
 #endif /* CONFIG_NFS_V4_2 */
-- 
cgit v1.2.3


From 765bde47fe7f197dabeb12da76831f40d0b20377 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 6 Mar 2026 16:56:24 -0500
Subject: xprtrdma: Close lost-wakeup race in xprt_rdma_alloc_slot

xprt_rdma_alloc_slot() and xprt_rdma_free_slot() lack serialization
between the buffer pool and the backlog queue.  A buffer freed
after rpcrdma_buffer_get() finds the pool empty but before
rpc_sleep_on() places the task on the backlog is returned to the
pool with no waiter to wake, leaving the task stuck on the backlog
indefinitely.

After joining the backlog, re-check the pool and route any
recovered buffer through xprt_wake_up_backlog(), whose queue lock
serializes with concurrent wakeups and avoids double-assignment
of slots.

Because xprt_rdma_free_slot() does not hold reserve_lock, the
XPRT_CONGESTED double-check in xprt_throttle_congested() is
ineffective: a task can join the backlog through that path after
free_slot has already found it empty and cleared the bit.  Avoid
this by using xprt_add_backlog_noncongested(), which queues the
task without setting XPRT_CONGESTED, so every allocation reaches
xprt_rdma_alloc_slot() and its post-sleep re-check.

Fixes: edb41e61a54e ("xprtrdma: Make rpc_rqst part of rpcrdma_req")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h     |  2 ++
 net/sunrpc/xprt.c               | 16 ++++++++++++++++
 net/sunrpc/xprtrdma/transport.c | 15 ++++++++++++++-
 3 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index f46d1fb8f71a..a82045804d34 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -404,6 +404,8 @@ struct rpc_xprt *	xprt_alloc(struct net *net, size_t size,
 				unsigned int max_req);
 void			xprt_free(struct rpc_xprt *);
 void			xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task);
+void			xprt_add_backlog_noncongested(struct rpc_xprt *xprt,
+					struct rpc_task *task);
 bool			xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req);
 void			xprt_cleanup_ids(void);
 
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 4fbb57a29704..48a3618cbb29 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1663,6 +1663,22 @@ void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
 }
 EXPORT_SYMBOL_GPL(xprt_add_backlog);
 
+/**
+ * xprt_add_backlog_noncongested - queue task on backlog
+ * @xprt: transport whose backlog queue receives the task
+ * @task: task to queue
+ *
+ * Like xprt_add_backlog, but does not set XPRT_CONGESTED.
+ * For transports whose free_slot path does not synchronize
+ * with xprt_throttle_congested via reserve_lock.
+ */
+void xprt_add_backlog_noncongested(struct rpc_xprt *xprt,
+				   struct rpc_task *task)
+{
+	rpc_sleep_on(&xprt->backlog, task, xprt_complete_request_init);
+}
+EXPORT_SYMBOL_GPL(xprt_add_backlog_noncongested);
+
 static bool __xprt_set_rq(struct rpc_task *task, void *data)
 {
 	struct rpc_rqst *req = data;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index ca079439f9cc..61706df5e485 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -511,7 +511,20 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
 
 out_sleep:
 	task->tk_status = -EAGAIN;
-	xprt_add_backlog(xprt, task);
+	xprt_add_backlog_noncongested(xprt, task);
+	/* A buffer freed between buffer_get and rpc_sleep_on
+	 * goes back to the pool with no waiter to wake.
+	 * Re-check after joining the backlog to close that gap.
+	 */
+	req = rpcrdma_buffer_get(&r_xprt->rx_buf);
+	if (req) {
+		struct rpc_rqst *rqst = &req->rl_slot;
+
+		if (!xprt_wake_up_backlog(xprt, rqst)) {
+			memset(rqst, 0, sizeof(*rqst));
+			rpcrdma_buffer_put(&r_xprt->rx_buf, req);
+		}
+	}
 }
 
 /**
-- 
cgit v1.2.3


From 67fab22a7adcec0279b9b057eb3dc669e32834f0 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Wed, 8 Apr 2026 03:30:29 -0700
Subject: net: add getsockopt_iter callback to proto_ops

Add a new getsockopt_iter callback to struct proto_ops that uses
sockopt_t, a type-safe wrapper around iov_iter. This provides a clean
interface for socket option operations that works with both user and
kernel buffers.

The sockopt_t type encapsulates an iov_iter and an optlen field.

The optlen field, although not suggested by Linus, serves as both input
(buffer size) and output (returned data size), allowing callbacks to
return random values independent of the bytes written via
copy_to_iter(), so, keep it separated from iov_iter.count.

This is preparatory work for removing the SOL_SOCKET level restriction
from io_uring getsockopt operations.

Keep in mind that both iter_out and iter_in always point to the same
data at all times, and we just have two of them to make the callback
implementation sane.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Breno Leitao <leitao@debian.org>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20260408-getsockopt-v3-1-061bb9cb355d@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/net.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index ca6a7bc5c9ae..f268f395ce47 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -23,9 +23,30 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/sockptr.h>
+#include <linux/uio.h>
 
 #include <uapi/linux/net.h>
 
+/**
+ * struct sockopt - socket option value container
+ * @iter_in: iov_iter for reading optval with the content from the caller.
+ *	     Use copy_from_iter() given this iov direction is ITER_SOURCE
+ * @iter_out: iov_iter for protocols to update optval data to userspace
+ *	      Use _copy_to_iter() given iov direction is ITER_DEST
+ * @optlen: serves as both input (buffer size) and output (returned data size).
+ *
+ * Type-safe wrapper for socket option data that works with both
+ * user and kernel buffers.
+ *
+ * The optlen field allows callbacks to return a specific length value
+ * independent of the bytes written via copy_to_iter().
+ */
+typedef struct sockopt {
+	struct iov_iter iter_in;
+	struct iov_iter iter_out;
+	int optlen;
+} sockopt_t;
+
 struct poll_table_struct;
 struct pipe_inode_info;
 struct inode;
@@ -192,6 +213,8 @@ struct proto_ops {
 				      unsigned int optlen);
 	int		(*getsockopt)(struct socket *sock, int level,
 				      int optname, char __user *optval, int __user *optlen);
+	int		(*getsockopt_iter)(struct socket *sock, int level,
+					   int optname, sockopt_t *opt);
 	void		(*show_fdinfo)(struct seq_file *m, struct socket *sock);
 	int		(*sendmsg)   (struct socket *sock, struct msghdr *m,
 				      size_t total_len);
-- 
cgit v1.2.3


From 0d5acba6331c326f394a677daf49a67f44a0416a Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Thu, 9 Apr 2026 14:52:32 -0700
Subject: Drivers: hv: vmbus: Export hv_vmbus_exists() and use it in pci-hyperv

With commit f84b21da3624 ("PCI: hv: Don't load the driver for baremetal root partition"),
the bare metal Linux root partition won't use the pci-hyperv driver, but
when a Linux VM runs on the Linux root partition, pci-hyperv's module_init
function init_hv_pci_drv() can still run, e.g. in the case of
CONFIG_PCI_HYPERV=y, even if the VMBus driver is not used in such a VM
(i.e. the hv_vmbus driver's init function returns -ENODEV due to
vmbus_root_device being NULL).

In such a Linux VM, init_hv_pci_drv() runs with a side effect: the 3
hvpci_block_ops callbacks are set to functions that depend on hv_vmbus.

Later, when the MLX driver in such a VM invokes the callbacks, e.g. in
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c:
mlx5_hv_register_invalidate(), hvpci_block_ops.reg_blk_invalidate() is
hv_register_block_invalidate() rather than a NULL function pointer, and
hv_register_block_invalidate() assumes that it can find a struct
hv_pcibus_device from pdev->bus->sysdata, which is false in such a VM.

Consequently, hv_register_block_invalidate() -> get_pcichild_wslot() ->
spin_lock_irqsave() may hang since it can be accessing an invalid
spinlock pointer.

Fix the issue by exporting hv_vmbus_exists() and using it in pci-hyperv:

    hv_root_partition() is true and hv_nested is false ==>
	hv_vmbus_exists() is false.

    hv_root_partition() is true and hv_nested is true ==>
	hv_vmbus_exists() is true.

    hv_root_partition() is false ==> hv_vmbus_exists() is true.

While at it, rename vmbus_exists() to hv_vmbus_exists() to follow the
convention that all public functions have the hv_ prefix; also change
the return value's type from int to bool to make the code more readable;
also move the two pr_info() calls.

Reported-by: Mukesh Rathor <mrathor@linux.microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/vmbus_drv.c              | 20 ++++++++------------
 drivers/pci/controller/pci-hyperv.c |  2 +-
 include/linux/hyperv.h              |  2 ++
 3 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 3d1a58b667db..24fa0b2443c3 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -101,13 +101,11 @@ struct device *hv_get_vmbus_root_device(void)
 }
 EXPORT_SYMBOL_GPL(hv_get_vmbus_root_device);
 
-static int vmbus_exists(void)
+bool hv_vmbus_exists(void)
 {
-	if (vmbus_root_device == NULL)
-		return -ENODEV;
-
-	return 0;
+	return vmbus_root_device != NULL;
 }
+EXPORT_SYMBOL_GPL(hv_vmbus_exists);
 
 static u8 channel_monitor_group(const struct vmbus_channel *channel)
 {
@@ -1577,11 +1575,10 @@ int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, c
 {
 	int ret;
 
-	pr_info("registering driver %s\n", hv_driver->name);
+	if (!hv_vmbus_exists())
+		return -ENODEV;
 
-	ret = vmbus_exists();
-	if (ret < 0)
-		return ret;
+	pr_info("registering driver %s\n", hv_driver->name);
 
 	hv_driver->driver.name = hv_driver->name;
 	hv_driver->driver.owner = owner;
@@ -1607,9 +1604,8 @@ EXPORT_SYMBOL_GPL(__vmbus_driver_register);
  */
 void vmbus_driver_unregister(struct hv_driver *hv_driver)
 {
-	pr_info("unregistering driver %s\n", hv_driver->name);
-
-	if (!vmbus_exists()) {
+	if (hv_vmbus_exists()) {
+		pr_info("unregistering driver %s\n", hv_driver->name);
 		driver_unregister(&hv_driver->driver);
 		vmbus_free_dynids(hv_driver);
 	}
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 49c0a2d51162..cfc8fa403dad 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -4172,7 +4172,7 @@ static int __init init_hv_pci_drv(void)
 	if (!hv_is_hyperv_initialized())
 		return -ENODEV;
 
-	if (hv_root_partition() && !hv_nested)
+	if (!hv_vmbus_exists())
 		return -ENODEV;
 
 	ret = hv_pci_irqchip_init();
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index dfc516c1c719..5459e776ec17 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1304,6 +1304,8 @@ static inline void *hv_get_drvdata(struct hv_device *dev)
 
 struct device *hv_get_vmbus_root_device(void);
 
+bool hv_vmbus_exists(void);
+
 struct hv_ring_buffer_debug_info {
 	u32 current_interrupt_mask;
 	u32 current_read_index;
-- 
cgit v1.2.3


From 5b484311507b5d403c1f7a45f6aa3778549e268b Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Mon, 13 Apr 2026 19:59:11 -0700
Subject: driver core: Add kernel-doc for DEV_FLAG_COUNT enum value

Even though nobody should use this value (except when declaring the
"flags" bitmap), kernel-doc still gets upset that it's not documented.
It reports:

  WARNING: ../include/linux/device.h:519
  Enum value 'DEV_FLAG_COUNT' not described in enum 'struct_device_flags'

Add the description of DEV_FLAG_COUNT.

Fixes: a2225b6e834a ("driver core: Don't let a device probe until it's ready")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Closes: https://lore.kernel.org/f318cd43-81fd-48b9-abf7-92af85f12f91@infradead.org
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260413195910.1.I23aca74fe2d3636a47df196a80920fecb2643220@changeid
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 include/linux/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index f27ed6eb87a9..ac972e7bead4 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -466,6 +466,7 @@ struct device_physical_location {
  *
  * @DEV_FLAG_READY_TO_PROBE: If set then device_add() has finished enough
  *		initialization that probe could be called.
+ * @DEV_FLAG_COUNT: Number of defined struct_device_flags.
  */
 enum struct_device_flags {
 	DEV_FLAG_READY_TO_PROBE = 0,
-- 
cgit v1.2.3


From 15e9e00a5aa4f56ca1cff7749c166e072d7cb6ac Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@manguebit.org>
Date: Tue, 14 Apr 2026 11:37:21 -0300
Subject: vfs: get rid of BUG_ON() in d_mark_tmpfile_name()

Do proper error handling in d_mark_tmpfile_name() by returning errors
rather than using BUG_ON()'s.

Adjust caller to check for errors from d_mark_tmpfile_name() as well
as clean it up for using return value from scnprintf() in QSTR_LEN()
to make it more obvious where the tmpfile name's length is coming
from.

Link: https://lore.kernel.org/r/CAHk-=wgerpUKCDhdzKH0FEdLyfhj3doc9t+kO9Yb6rSsTp7hdQ@mail.gmail.com
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
CC: linux-fsdevel@vger.kernel.org
Cc: linux-cifs@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/dcache.c            | 14 +++++++++-----
 fs/smb/client/cifsfs.h | 17 ++++++-----------
 fs/smb/client/dir.c    | 25 +++++++++++++++----------
 include/linux/dcache.h |  2 +-
 4 files changed, 31 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index df11bbba0342..dbcbd0affb26 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3196,15 +3196,18 @@ void d_mark_tmpfile(struct file *file, struct inode *inode)
 }
 EXPORT_SYMBOL(d_mark_tmpfile);
 
-void d_mark_tmpfile_name(struct file *file, const struct qstr *name)
+int d_mark_tmpfile_name(struct file *file, const struct qstr *name)
 {
 	struct dentry *dentry = file->f_path.dentry;
 	char *dname = dentry->d_shortname.string;
 
-	BUG_ON(dname_external(dentry));
-	BUG_ON(d_really_is_positive(dentry));
-	BUG_ON(!d_unlinked(dentry));
-	BUG_ON(name->len > DNAME_INLINE_LEN - 1);
+	if (unlikely(dname_external(dentry) ||
+		     d_really_is_positive(dentry) ||
+		     !d_unlinked(dentry)))
+		return -EINVAL;
+	if (unlikely(name->len > DNAME_INLINE_LEN - 1))
+		return -ENAMETOOLONG;
+
 	spin_lock(&dentry->d_parent->d_lock);
 	spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 	dentry->__d_name.len = name->len;
@@ -3212,6 +3215,7 @@ void d_mark_tmpfile_name(struct file *file, const struct qstr *name)
 	dname[name->len] = '\0';
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dentry->d_parent->d_lock);
+	return 0;
 }
 EXPORT_SYMBOL(d_mark_tmpfile_name);
 
diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h
index 18f9f93a01b4..7370b38da938 100644
--- a/fs/smb/client/cifsfs.h
+++ b/fs/smb/client/cifsfs.h
@@ -10,6 +10,7 @@
 #define _CIFSFS_H
 
 #include <linux/hash.h>
+#include <linux/dcache.h>
 
 #define ROOT_I 2
 
@@ -149,17 +150,11 @@ struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type, int flags,
 
 char *cifs_silly_fullpath(struct dentry *dentry);
 
-#define CIFS_TMPNAME_PREFIX        ".__smbfile_tmp"
-#define CIFS_TMPNAME_PREFIX_LEN    ((int)sizeof(CIFS_TMPNAME_PREFIX) - 1)
-#define CIFS_TMPNAME_COUNTER_LEN   ((int)sizeof(cifs_tmpcounter) * 2)
-#define CIFS_TMPNAME_LEN \
-	(CIFS_TMPNAME_PREFIX_LEN + CIFS_TMPNAME_COUNTER_LEN)
-
-#define CIFS_SILLYNAME_PREFIX	    ".__smbfile_silly"
-#define CIFS_SILLYNAME_PREFIX_LEN  ((int)sizeof(CIFS_SILLYNAME_PREFIX) - 1)
-#define CIFS_SILLYNAME_COUNTER_LEN ((int)sizeof(cifs_sillycounter) * 2)
-#define CIFS_SILLYNAME_LEN \
-	(CIFS_SILLYNAME_PREFIX_LEN + CIFS_SILLYNAME_COUNTER_LEN)
+#define CIFS_TMPNAME_PREFIX	".__smbfile_tmp"
+#define CIFS_TMPNAME_LEN	(DNAME_INLINE_LEN - 1)
+
+#define CIFS_SILLYNAME_PREFIX	".__smbfile_silly"
+#define CIFS_SILLYNAME_LEN	(DNAME_INLINE_LEN - 1)
 
 #ifdef CONFIG_CIFS_NFSD_EXPORT
 extern const struct export_operations cifs_export_ops;
diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c
index 6ea1ae7f7a46..e4295a5b55b3 100644
--- a/fs/smb/client/dir.c
+++ b/fs/smb/client/dir.c
@@ -1056,9 +1056,9 @@ int cifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
 {
 	struct dentry *dentry = file->f_path.dentry;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(dir);
+	size_t namesize = CIFS_TMPNAME_LEN + 1;
 	char *path __free(kfree) = NULL, *name;
 	unsigned int oflags = file->f_flags;
-	size_t size = CIFS_TMPNAME_LEN + 1;
 	int retries = 0, max_retries = 16;
 	struct TCP_Server_Info *server;
 	struct cifs_pending_open open;
@@ -1070,6 +1070,7 @@ int cifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
 	struct inode *inode;
 	unsigned int xid;
 	__u32 oplock;
+	int namelen;
 	int rc;
 
 	if (unlikely(cifs_forced_shutdown(cifs_sb)))
@@ -1093,7 +1094,7 @@ int cifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
 		server->ops->new_lease_key(&fid);
 	cifs_add_pending_open(&fid, tlink, &open);
 
-	path = alloc_parent_path(dentry, size - 1);
+	path = alloc_parent_path(dentry, namesize - 1);
 	if (IS_ERR(path)) {
 		cifs_del_pending_open(&open);
 		rc = PTR_ERR(path);
@@ -1103,16 +1104,22 @@ int cifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
 
 	name = path + strlen(path);
 	do {
-		scnprintf(name, size,
-			  CIFS_TMPNAME_PREFIX "%0*x",
-			  CIFS_TMPNAME_COUNTER_LEN,
-			  atomic_inc_return(&cifs_tmpcounter));
+		/* Append tmpfile name to @path */
+		namelen = scnprintf(name, namesize, CIFS_TMPNAME_PREFIX "%x",
+				    atomic_inc_return(&cifs_tmpcounter));
 		rc = __cifs_do_create(dir, dentry, path, xid, tlink, oflags,
 				      mode, &oplock, &fid, NULL, &inode);
 		if (!rc) {
+			rc = d_mark_tmpfile_name(file, &QSTR_LEN(name, namelen));
+			if (rc) {
+				cifs_dbg(VFS | ONCE, "%s: failed to set filename in dentry: %d\n",
+					 __func__, rc);
+				rc = -EISDIR;
+				iput(inode);
+				goto err_open;
+			}
 			set_nlink(inode, 0);
 			mark_inode_dirty(inode);
-			d_mark_tmpfile_name(file, &QSTR_LEN(name, size - 1));
 			d_instantiate(dentry, inode);
 			break;
 		}
@@ -1168,9 +1175,7 @@ char *cifs_silly_fullpath(struct dentry *dentry)
 
 	do {
 		dput(sdentry);
-		scnprintf(name, namesize,
-			  CIFS_SILLYNAME_PREFIX "%0*x",
-			  CIFS_SILLYNAME_COUNTER_LEN,
+		scnprintf(name, namesize, CIFS_SILLYNAME_PREFIX "%x",
 			  atomic_inc_return(&cifs_sillycounter));
 		sdentry = lookup_noperm(&QSTR(name), dentry->d_parent);
 		if (IS_ERR(sdentry))
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index f60819dcfebd..c5bd5a74baba 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -264,7 +264,7 @@ extern void d_invalidate(struct dentry *);
 extern struct dentry * d_make_root(struct inode *);
 
 extern void d_mark_tmpfile(struct file *, struct inode *);
-void d_mark_tmpfile_name(struct file *file, const struct qstr *name);
+int d_mark_tmpfile_name(struct file *file, const struct qstr *name);
 extern void d_tmpfile(struct file *, struct inode *);
 
 extern struct dentry *d_find_alias(struct inode *);
-- 
cgit v1.2.3


From 1f5ffc672165ff851063a5fd044b727ab2517ae3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 14 Apr 2026 23:03:02 -0700
Subject: Fix mismerge of the arm64 / timer-core interrupt handling changes

Commit c43267e6794a ("Merge tag 'arm64-upstream' of git://...") had a
conflict in the irq entry/exit code due to commit c5538d0141b3 ("entry:
Split kernel mode logic from irqentry_{enter,exit}()") having moved the
core code in irqentry_enter/exit() from kernel/entry/common.c into
helper inline functions in include/linux/irq-entry-common.h.

On the other side of the merge, the timer-core code had introduced
deferred hrtimer rearming infrastructure in commit 0e98eb14814e ("entry:
Prepare for deferred hrtimer rearming"), adding two calls to
hrtimer_rearm_deferred() in irqentry_enter().

When merging the two, moving the two calls to the new location wasn't a
problem, but afterwards I had made the mistake of looking what had
happened in linux-next.  And linux-next had a very different merge
resolution in commit 04f02dc3ea74 ("Merge tag 'entry-for-arm64-26-04-08'
into sched/hrtick"), which had unified the two calls into one single
call-site in irqentry_exit_to_kernel_mode_preempt().

And that merge resolution looked cleverer than the straightforward one I
had done, so I re-did my merge the way it had been done in linux-next.

But it turns out nobody apparently tests linux-next, and the merge in
linux-next was just wrong.

The difference is that hrtimer_rearm_deferred() doesn't get called at
all for the case when state.exit_rcu is true, and the boot will
typically fail due to timers not triggering correctly.

So this undoes the "clever" merge, and does the straightforward one
instead.

Fixes: c43267e6794a ("Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux"
Reported-and-tested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Link: https://lore.kernel.org/all/CAADnVQJ=MoiX4=guPWhL9vtnAELkpNx=GNm8RA1-aV424UFz2A@mail.gmail.com/
Link: https://lore.kernel.org/all/CAHk-=wg8+BER4VyFKG3rnPi2gXxbf-jbHS=EU+xhFqGVQfbutw@mail.gmail.com/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/irq-entry-common.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h
index 7ab41eec549f..167fba7dbf04 100644
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -474,8 +474,6 @@ static inline void irqentry_exit_to_kernel_mode_preempt(struct pt_regs *regs,
 
 	if (IS_ENABLED(CONFIG_PREEMPTION))
 		irqentry_exit_cond_resched();
-
-	hrtimer_rearm_deferred();
 }
 
 /**
@@ -501,6 +499,7 @@ irqentry_exit_to_kernel_mode_after_preempt(struct pt_regs *regs, irqentry_state_
 		 */
 		if (state.exit_rcu) {
 			instrumentation_begin();
+			hrtimer_rearm_deferred();
 			/* Tell the tracer that IRET will enable interrupts */
 			trace_hardirqs_on_prepare();
 			lockdep_hardirqs_on_prepare();
@@ -511,6 +510,7 @@ irqentry_exit_to_kernel_mode_after_preempt(struct pt_regs *regs, irqentry_state_
 		}
 
 		instrumentation_begin();
+		hrtimer_rearm_deferred();
 		/* Covers both tracing and lockdep */
 		trace_hardirqs_on();
 		instrumentation_end();
-- 
cgit v1.2.3


From fbd5d52ebf49595975e24e14e57632d580738091 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 13 Apr 2026 09:01:26 +0200
Subject: ACPI: add acpi_get_cpu_uid() stub helper

When ACPI is disabled, x86 Xen support fails to build:

arch/x86/xen/enlighten_hvm.c: In function 'xen_cpu_up_prepare_hvm':
arch/x86/xen/enlighten_hvm.c:165:13: error: implicit declaration of function 'acpi_get_cpu_uid' [-Wimplicit-function-declaration]
  165 |         if (acpi_get_cpu_uid(cpu, &cpu_uid) == 0)
      |             ^~~~~~~~~~~~~~~~

Add a trivial stub that can be used in place of the real function.

Fixes: f652d0a4e13c ("ACPI: Centralize acpi_get_cpu_uid() declaration in include/linux/acpi.h")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Chengwen Feng <fengchengwen@huawei.com>
Link: https://patch.msgid.link/20260413070132.3828606-1-arnd@kernel.org
Signed-off-by: Rafael J. Wysocki <rjw@rjwysocki.net>
---
 include/linux/acpi.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index bfacb9475aac..67effb91fa98 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -959,6 +959,12 @@ static inline int acpi_table_parse(char *id,
 	return -ENODEV;
 }
 
+static inline int acpi_get_cpu_uid(unsigned int cpu, u32 *uid)
+{
+	*uid = cpu;
+	return 0;
+}
+
 static inline int acpi_nvs_register(__u64 start, __u64 size)
 {
 	return 0;
-- 
cgit v1.2.3


From d3e945223e0158c85dbde23de4f89493a2a817f6 Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Thu, 16 Apr 2026 06:43:37 +0000
Subject: bpf: Move constants blinding out of arch-specific JITs

During the JIT stage, constants blinding rewrites instructions but only
rewrites the private instruction copy of the JITed subprog, leaving the
global env->prog->insnsi and env->insn_aux_data untouched. This causes a
mismatch between subprog instructions and the global state, making it
difficult to use the global data in the JIT.

To avoid this mismatch, and given that all arch-specific JITs already
support constants blinding, move it to the generic verifier code, and
switch to rewrite the global env->prog->insnsi with the global states
adjusted, as other rewrites in the verifier do.

This removes the constants blinding calls in each JIT, which are largely
duplicated code across architectures.

Since constants blinding is only required for JIT, and there are two
JIT entry functions, jit_subprogs() for BPF programs with multiple
subprogs and bpf_prog_select_runtime() for programs with no subprogs,
move the constants blinding invocation into these two functions.

In the verifier path, bpf_patch_insn_data() is used to keep global
verifier auxiliary data in sync with patched instructions. A key
question is whether this global auxiliary data should be restored
on the failure path.

Besides instructions, bpf_patch_insn_data() adjusts:
  - prog->aux->poke_tab
  - env->insn_array_maps
  - env->subprog_info
  - env->insn_aux_data

For prog->aux->poke_tab, it is only used by JIT or only meaningful after
JIT succeeds, so it does not need to be restored on the failure path.

For env->insn_array_maps, when JIT fails, programs using insn arrays
are rejected by bpf_insn_array_ready() due to missing JIT addresses.
Hence, env->insn_array_maps is only meaningful for JIT and does not need
to be restored.

For subprog_info, if jit_subprogs fails and CONFIG_BPF_JIT_ALWAYS_ON
is not enabled, kernel falls back to interpreter. In this case,
env->subprog_info is used to determine subprogram stack depth. So it
must be restored on failure.

For env->insn_aux_data, it is freed by clear_insn_aux_data() at the
end of bpf_check(). Before freeing, clear_insn_aux_data() loops over
env->insn_aux_data to release jump targets recorded in it. The loop
uses env->prog->len as the array length, but this length no longer
matches the actual size of the adjusted env->insn_aux_data array after
constants blinding.

To address it, a simple approach is to keep insn_aux_data as adjusted
after failure, since it will be freed shortly, and record its actual size
for the loop in clear_insn_aux_data(). But since clear_insn_aux_data()
uses the same index to loop over both env->prog->insnsi and env->insn_aux_data,
this approach results in incorrect index for the insnsi array. So an
alternative approach is adopted: clone the original env->insn_aux_data
before blinding and restore it after failure, similar to env->prog.

For classic BPF programs, constants blinding works as before since it
is still invoked from bpf_prog_select_runtime().

Reviewed-by: Anton Protopopov <a.s.protopopov@gmail.com> # v8
Reviewed-by: Hari Bathini <hbathini@linux.ibm.com> # powerpc jit
Reviewed-by: Pu Lehui <pulehui@huawei.com> # riscv jit
Acked-by: Hengqi Chen <hengqi.chen@gmail.com> # loongarch jit
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Link: https://lore.kernel.org/r/20260416064341.151802-2-xukuohai@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/arc/net/bpf_jit_core.c      |  39 ++++-------
 arch/arm/net/bpf_jit_32.c        |  41 ++---------
 arch/arm64/net/bpf_jit_comp.c    |  72 ++++++-------------
 arch/loongarch/net/bpf_jit.c     |  59 +++++-----------
 arch/mips/net/bpf_jit_comp.c     |  20 +-----
 arch/parisc/net/bpf_jit_core.c   |  73 ++++++++------------
 arch/powerpc/net/bpf_jit_comp.c  |  72 +++++++------------
 arch/riscv/net/bpf_jit_core.c    |  61 ++++++----------
 arch/s390/net/bpf_jit_comp.c     |  59 ++++++----------
 arch/sparc/net/bpf_jit_comp_64.c |  61 ++++++----------
 arch/x86/net/bpf_jit_comp.c      |  43 ++----------
 arch/x86/net/bpf_jit_comp32.c    |  33 ++-------
 include/linux/filter.h           |  33 ++++++++-
 kernel/bpf/core.c                |  69 +++++++++++++++---
 kernel/bpf/fixups.c              | 146 +++++++++++++++++++++++++++++++++------
 15 files changed, 403 insertions(+), 478 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arc/net/bpf_jit_core.c b/arch/arc/net/bpf_jit_core.c
index 1421eeced0f5..973ceae48675 100644
--- a/arch/arc/net/bpf_jit_core.c
+++ b/arch/arc/net/bpf_jit_core.c
@@ -79,7 +79,6 @@ struct arc_jit_data {
  * The JIT pertinent context that is used by different functions.
  *
  * prog:		The current eBPF program being handled.
- * orig_prog:		The original eBPF program before any possible change.
  * jit:			The JIT buffer and its length.
  * bpf_header:		The JITed program header. "jit.buf" points inside it.
  * emit:		If set, opcodes are written to memory; else, a dry-run.
@@ -94,12 +93,10 @@ struct arc_jit_data {
  * need_extra_pass:	A forecast if an "extra_pass" will occur.
  * is_extra_pass:	Indicates if the current pass is an extra pass.
  * user_bpf_prog:	True, if VM opcodes come from a real program.
- * blinded:		True if "constant blinding" step returned a new "prog".
  * success:		Indicates if the whole JIT went OK.
  */
 struct jit_context {
 	struct bpf_prog			*prog;
-	struct bpf_prog			*orig_prog;
 	struct jit_buffer		jit;
 	struct bpf_binary_header	*bpf_header;
 	bool				emit;
@@ -114,7 +111,6 @@ struct jit_context {
 	bool				need_extra_pass;
 	bool				is_extra_pass;
 	bool				user_bpf_prog;
-	bool				blinded;
 	bool				success;
 };
 
@@ -161,13 +157,7 @@ static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
 {
 	memset(ctx, 0, sizeof(*ctx));
 
-	ctx->orig_prog = prog;
-
-	/* If constant blinding was requested but failed, scram. */
-	ctx->prog = bpf_jit_blind_constants(prog);
-	if (IS_ERR(ctx->prog))
-		return PTR_ERR(ctx->prog);
-	ctx->blinded = (ctx->prog != ctx->orig_prog);
+	ctx->prog = prog;
 
 	/* If the verifier doesn't zero-extend, then we have to do it. */
 	ctx->do_zext = !ctx->prog->aux->verifier_zext;
@@ -214,14 +204,6 @@ static inline void maybe_free(struct jit_context *ctx, void **mem)
  */
 static void jit_ctx_cleanup(struct jit_context *ctx)
 {
-	if (ctx->blinded) {
-		/* if all went well, release the orig_prog. */
-		if (ctx->success)
-			bpf_jit_prog_release_other(ctx->prog, ctx->orig_prog);
-		else
-			bpf_jit_prog_release_other(ctx->orig_prog, ctx->prog);
-	}
-
 	maybe_free(ctx, (void **)&ctx->bpf2insn);
 	maybe_free(ctx, (void **)&ctx->jit_data);
 
@@ -229,12 +211,19 @@ static void jit_ctx_cleanup(struct jit_context *ctx)
 		ctx->bpf2insn_valid = false;
 
 	/* Freeing "bpf_header" is enough. "jit.buf" is a sub-array of it. */
-	if (!ctx->success && ctx->bpf_header) {
-		bpf_jit_binary_free(ctx->bpf_header);
-		ctx->bpf_header = NULL;
-		ctx->jit.buf    = NULL;
-		ctx->jit.index  = 0;
-		ctx->jit.len    = 0;
+	if (!ctx->success) {
+		if (ctx->bpf_header) {
+			bpf_jit_binary_free(ctx->bpf_header);
+			ctx->bpf_header = NULL;
+			ctx->jit.buf    = NULL;
+			ctx->jit.index  = 0;
+			ctx->jit.len    = 0;
+		}
+		if (ctx->is_extra_pass) {
+			ctx->prog->bpf_func = NULL;
+			ctx->prog->jited = 0;
+			ctx->prog->jited_len = 0;
+		}
 	}
 
 	ctx->emit = false;
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index deeb8f292454..e6b1bb2de627 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -2144,9 +2144,7 @@ bool bpf_jit_needs_zext(void)
 
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
-	struct bpf_prog *tmp, *orig_prog = prog;
 	struct bpf_binary_header *header;
-	bool tmp_blinded = false;
 	struct jit_ctx ctx;
 	unsigned int tmp_idx;
 	unsigned int image_size;
@@ -2156,20 +2154,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	 * the interpreter.
 	 */
 	if (!prog->jit_requested)
-		return orig_prog;
-
-	/* If constant blinding was enabled and we failed during blinding
-	 * then we must fall back to the interpreter. Otherwise, we save
-	 * the new JITed code.
-	 */
-	tmp = bpf_jit_blind_constants(prog);
-
-	if (IS_ERR(tmp))
-		return orig_prog;
-	if (tmp != prog) {
-		tmp_blinded = true;
-		prog = tmp;
-	}
+		return prog;
 
 	memset(&ctx, 0, sizeof(ctx));
 	ctx.prog = prog;
@@ -2179,10 +2164,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	 * we must fall back to the interpreter
 	 */
 	ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
-	if (ctx.offsets == NULL) {
-		prog = orig_prog;
-		goto out;
-	}
+	if (ctx.offsets == NULL)
+		return prog;
 
 	/* 1) fake pass to find in the length of the JITed code,
 	 * to compute ctx->offsets and other context variables
@@ -2194,10 +2177,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	 * being successful in the second pass, so just fall back
 	 * to the interpreter.
 	 */
-	if (build_body(&ctx)) {
-		prog = orig_prog;
+	if (build_body(&ctx))
 		goto out_off;
-	}
 
 	tmp_idx = ctx.idx;
 	build_prologue(&ctx);
@@ -2213,10 +2194,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	ctx.idx += ctx.imm_count;
 	if (ctx.imm_count) {
 		ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL);
-		if (ctx.imms == NULL) {
-			prog = orig_prog;
+		if (ctx.imms == NULL)
 			goto out_off;
-		}
 	}
 #else
 	/* there's nothing about the epilogue on ARMv7 */
@@ -2238,10 +2217,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	/* Not able to allocate memory for the structure then
 	 * we must fall back to the interpretation
 	 */
-	if (header == NULL) {
-		prog = orig_prog;
+	if (header == NULL)
 		goto out_imms;
-	}
 
 	/* 2.) Actual pass to generate final JIT code */
 	ctx.target = (u32 *) image_ptr;
@@ -2278,16 +2255,12 @@ out_imms:
 #endif
 out_off:
 	kfree(ctx.offsets);
-out:
-	if (tmp_blinded)
-		bpf_jit_prog_release_other(prog, prog == orig_prog ?
-					   tmp : orig_prog);
+
 	return prog;
 
 out_free:
 	image_ptr = NULL;
 	bpf_jit_binary_free(header);
-	prog = orig_prog;
 	goto out_imms;
 }
 
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 524b67c0867e..d310d1c35192 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2003,14 +2003,12 @@ struct arm64_jit_data {
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	int image_size, prog_size, extable_size, extable_align, extable_offset;
-	struct bpf_prog *tmp, *orig_prog = prog;
 	struct bpf_binary_header *header;
 	struct bpf_binary_header *ro_header = NULL;
 	struct arm64_jit_data *jit_data;
 	void __percpu *priv_stack_ptr = NULL;
 	bool was_classic = bpf_prog_was_classic(prog);
 	int priv_stack_alloc_sz;
-	bool tmp_blinded = false;
 	bool extra_pass = false;
 	struct jit_ctx ctx;
 	u8 *image_ptr;
@@ -2019,26 +2017,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	int exentry_idx;
 
 	if (!prog->jit_requested)
-		return orig_prog;
-
-	tmp = bpf_jit_blind_constants(prog);
-	/* If blinding was requested and we failed during blinding,
-	 * we must fall back to the interpreter.
-	 */
-	if (IS_ERR(tmp))
-		return orig_prog;
-	if (tmp != prog) {
-		tmp_blinded = true;
-		prog = tmp;
-	}
+		return prog;
 
 	jit_data = prog->aux->jit_data;
 	if (!jit_data) {
 		jit_data = kzalloc_obj(*jit_data);
-		if (!jit_data) {
-			prog = orig_prog;
-			goto out;
-		}
+		if (!jit_data)
+			return prog;
 		prog->aux->jit_data = jit_data;
 	}
 	priv_stack_ptr = prog->aux->priv_stack_ptr;
@@ -2050,10 +2035,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
 				      2 * PRIV_STACK_GUARD_SZ;
 		priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 16, GFP_KERNEL);
-		if (!priv_stack_ptr) {
-			prog = orig_prog;
+		if (!priv_stack_ptr)
 			goto out_priv_stack;
-		}
 
 		priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz);
 		prog->aux->priv_stack_ptr = priv_stack_ptr;
@@ -2073,10 +2056,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	ctx.prog = prog;
 
 	ctx.offset = kvzalloc_objs(int, prog->len + 1);
-	if (ctx.offset == NULL) {
-		prog = orig_prog;
+	if (ctx.offset == NULL)
 		goto out_off;
-	}
 
 	ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
 	ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
@@ -2089,15 +2070,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	 * BPF line info needs ctx->offset[i] to be the offset of
 	 * instruction[i] in jited image, so build prologue first.
 	 */
-	if (build_prologue(&ctx, was_classic)) {
-		prog = orig_prog;
+	if (build_prologue(&ctx, was_classic))
 		goto out_off;
-	}
 
-	if (build_body(&ctx, extra_pass)) {
-		prog = orig_prog;
+	if (build_body(&ctx, extra_pass))
 		goto out_off;
-	}
 
 	ctx.epilogue_offset = ctx.idx;
 	build_epilogue(&ctx, was_classic);
@@ -2115,10 +2092,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr,
 					      sizeof(u64), &header, &image_ptr,
 					      jit_fill_hole);
-	if (!ro_header) {
-		prog = orig_prog;
+	if (!ro_header)
 		goto out_off;
-	}
 
 	/* Pass 2: Determine jited position and result for each instruction */
 
@@ -2146,10 +2121,8 @@ skip_init_ctx:
 	/* Dont write body instructions to memory for now */
 	ctx.write = false;
 
-	if (build_body(&ctx, extra_pass)) {
-		prog = orig_prog;
+	if (build_body(&ctx, extra_pass))
 		goto out_free_hdr;
-	}
 
 	ctx.epilogue_offset = ctx.idx;
 	ctx.exentry_idx = exentry_idx;
@@ -2158,19 +2131,15 @@ skip_init_ctx:
 
 	/* Pass 3: Adjust jump offset and write final image */
 	if (build_body(&ctx, extra_pass) ||
-		WARN_ON_ONCE(ctx.idx != ctx.epilogue_offset)) {
-		prog = orig_prog;
+		WARN_ON_ONCE(ctx.idx != ctx.epilogue_offset))
 		goto out_free_hdr;
-	}
 
 	build_epilogue(&ctx, was_classic);
 	build_plt(&ctx);
 
 	/* Extra pass to validate JITed code. */
-	if (validate_ctx(&ctx)) {
-		prog = orig_prog;
+	if (validate_ctx(&ctx))
 		goto out_free_hdr;
-	}
 
 	/* update the real prog size */
 	prog_size = sizeof(u32) * ctx.idx;
@@ -2187,16 +2156,13 @@ skip_init_ctx:
 		if (extra_pass && ctx.idx > jit_data->ctx.idx) {
 			pr_err_once("multi-func JIT bug %d > %d\n",
 				    ctx.idx, jit_data->ctx.idx);
-			prog->bpf_func = NULL;
-			prog->jited = 0;
-			prog->jited_len = 0;
 			goto out_free_hdr;
 		}
 		if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) {
-			/* ro_header has been freed */
+			/* ro_header and header has been freed */
 			ro_header = NULL;
-			prog = orig_prog;
-			goto out_off;
+			header = NULL;
+			goto out_free_hdr;
 		}
 	} else {
 		jit_data->ctx = ctx;
@@ -2233,13 +2199,15 @@ out_priv_stack:
 		kfree(jit_data);
 		prog->aux->jit_data = NULL;
 	}
-out:
-	if (tmp_blinded)
-		bpf_jit_prog_release_other(prog, prog == orig_prog ?
-					   tmp : orig_prog);
+
 	return prog;
 
 out_free_hdr:
+	if (extra_pass) {
+		prog->bpf_func = NULL;
+		prog->jited = 0;
+		prog->jited_len = 0;
+	}
 	if (header) {
 		bpf_arch_text_copy(&ro_header->size, &header->size,
 				   sizeof(header->size));
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 9cb796e16379..fcc8c0c29fb0 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1922,43 +1922,26 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
 
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
-	bool tmp_blinded = false, extra_pass = false;
+	bool extra_pass = false;
 	u8 *image_ptr, *ro_image_ptr;
 	int image_size, prog_size, extable_size;
 	struct jit_ctx ctx;
 	struct jit_data *jit_data;
 	struct bpf_binary_header *header;
 	struct bpf_binary_header *ro_header;
-	struct bpf_prog *tmp, *orig_prog = prog;
 
 	/*
 	 * If BPF JIT was not enabled then we must fall back to
 	 * the interpreter.
 	 */
 	if (!prog->jit_requested)
-		return orig_prog;
-
-	tmp = bpf_jit_blind_constants(prog);
-	/*
-	 * If blinding was requested and we failed during blinding,
-	 * we must fall back to the interpreter. Otherwise, we save
-	 * the new JITed code.
-	 */
-	if (IS_ERR(tmp))
-		return orig_prog;
-
-	if (tmp != prog) {
-		tmp_blinded = true;
-		prog = tmp;
-	}
+		return prog;
 
 	jit_data = prog->aux->jit_data;
 	if (!jit_data) {
 		jit_data = kzalloc_obj(*jit_data);
-		if (!jit_data) {
-			prog = orig_prog;
-			goto out;
-		}
+		if (!jit_data)
+			return prog;
 		prog->aux->jit_data = jit_data;
 	}
 	if (jit_data->ctx.offset) {
@@ -1978,17 +1961,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
 
 	ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
-	if (ctx.offset == NULL) {
-		prog = orig_prog;
+	if (ctx.offset == NULL)
 		goto out_offset;
-	}
 
 	/* 1. Initial fake pass to compute ctx->idx and set ctx->flags */
 	build_prologue(&ctx);
-	if (build_body(&ctx, extra_pass)) {
-		prog = orig_prog;
+	if (build_body(&ctx, extra_pass))
 		goto out_offset;
-	}
 	ctx.epilogue_offset = ctx.idx;
 	build_epilogue(&ctx);
 
@@ -2004,10 +1983,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	/* Now we know the size of the structure to make */
 	ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, sizeof(u32),
 					      &header, &image_ptr, jit_fill_hole);
-	if (!ro_header) {
-		prog = orig_prog;
+	if (!ro_header)
 		goto out_offset;
-	}
 
 	/* 2. Now, the actual pass to generate final JIT code */
 	/*
@@ -2027,17 +2004,13 @@ skip_init_ctx:
 	ctx.num_exentries = 0;
 
 	build_prologue(&ctx);
-	if (build_body(&ctx, extra_pass)) {
-		prog = orig_prog;
+	if (build_body(&ctx, extra_pass))
 		goto out_free;
-	}
 	build_epilogue(&ctx);
 
 	/* 3. Extra pass to validate JITed code */
-	if (validate_ctx(&ctx)) {
-		prog = orig_prog;
+	if (validate_ctx(&ctx))
 		goto out_free;
-	}
 
 	/* And we're done */
 	if (bpf_jit_enable > 1)
@@ -2050,9 +2023,9 @@ skip_init_ctx:
 			goto out_free;
 		}
 		if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) {
-			/* ro_header has been freed */
+			/* ro_header and header have been freed */
 			ro_header = NULL;
-			prog = orig_prog;
+			header = NULL;
 			goto out_free;
 		}
 		/*
@@ -2084,13 +2057,15 @@ out_offset:
 		prog->aux->jit_data = NULL;
 	}
 
-out:
-	if (tmp_blinded)
-		bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
-
 	return prog;
 
 out_free:
+	if (extra_pass) {
+		prog->bpf_func = NULL;
+		prog->jited = 0;
+		prog->jited_len = 0;
+	}
+
 	if (header) {
 		bpf_arch_text_copy(&ro_header->size, &header->size, sizeof(header->size));
 		bpf_jit_binary_pack_free(ro_header, header);
diff --git a/arch/mips/net/bpf_jit_comp.c b/arch/mips/net/bpf_jit_comp.c
index e355dfca4400..d2b6c955f18e 100644
--- a/arch/mips/net/bpf_jit_comp.c
+++ b/arch/mips/net/bpf_jit_comp.c
@@ -911,10 +911,8 @@ bool bpf_jit_needs_zext(void)
 
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
-	struct bpf_prog *tmp, *orig_prog = prog;
 	struct bpf_binary_header *header = NULL;
 	struct jit_context ctx;
-	bool tmp_blinded = false;
 	unsigned int tmp_idx;
 	unsigned int image_size;
 	u8 *image_ptr;
@@ -925,19 +923,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	 * the interpreter.
 	 */
 	if (!prog->jit_requested)
-		return orig_prog;
-	/*
-	 * If constant blinding was enabled and we failed during blinding
-	 * then we must fall back to the interpreter. Otherwise, we save
-	 * the new JITed code.
-	 */
-	tmp = bpf_jit_blind_constants(prog);
-	if (IS_ERR(tmp))
-		return orig_prog;
-	if (tmp != prog) {
-		tmp_blinded = true;
-		prog = tmp;
-	}
+		return prog;
 
 	memset(&ctx, 0, sizeof(ctx));
 	ctx.program = prog;
@@ -1025,14 +1011,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	prog->jited_len = image_size;
 
 out:
-	if (tmp_blinded)
-		bpf_jit_prog_release_other(prog, prog == orig_prog ?
-					   tmp : orig_prog);
 	kfree(ctx.descriptors);
 	return prog;
 
 out_err:
-	prog = orig_prog;
 	if (header)
 		bpf_jit_binary_free(header);
 	goto out;
diff --git a/arch/parisc/net/bpf_jit_core.c b/arch/parisc/net/bpf_jit_core.c
index a5eb6b51e27a..35dca372b5df 100644
--- a/arch/parisc/net/bpf_jit_core.c
+++ b/arch/parisc/net/bpf_jit_core.c
@@ -44,30 +44,19 @@ bool bpf_jit_needs_zext(void)
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	unsigned int prog_size = 0, extable_size = 0;
-	bool tmp_blinded = false, extra_pass = false;
-	struct bpf_prog *tmp, *orig_prog = prog;
+	bool extra_pass = false;
 	int pass = 0, prev_ninsns = 0, prologue_len, i;
 	struct hppa_jit_data *jit_data;
 	struct hppa_jit_context *ctx;
 
 	if (!prog->jit_requested)
-		return orig_prog;
-
-	tmp = bpf_jit_blind_constants(prog);
-	if (IS_ERR(tmp))
-		return orig_prog;
-	if (tmp != prog) {
-		tmp_blinded = true;
-		prog = tmp;
-	}
+		return prog;
 
 	jit_data = prog->aux->jit_data;
 	if (!jit_data) {
 		jit_data = kzalloc_obj(*jit_data);
-		if (!jit_data) {
-			prog = orig_prog;
-			goto out;
-		}
+		if (!jit_data)
+			return prog;
 		prog->aux->jit_data = jit_data;
 	}
 
@@ -81,10 +70,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 
 	ctx->prog = prog;
 	ctx->offset = kzalloc_objs(int, prog->len);
-	if (!ctx->offset) {
-		prog = orig_prog;
-		goto out_offset;
-	}
+	if (!ctx->offset)
+		goto out_err;
 	for (i = 0; i < prog->len; i++) {
 		prev_ninsns += 20;
 		ctx->offset[i] = prev_ninsns;
@@ -93,10 +80,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	for (i = 0; i < NR_JIT_ITERATIONS; i++) {
 		pass++;
 		ctx->ninsns = 0;
-		if (build_body(ctx, extra_pass, ctx->offset)) {
-			prog = orig_prog;
-			goto out_offset;
-		}
+		if (build_body(ctx, extra_pass, ctx->offset))
+			goto out_err;
 		ctx->body_len = ctx->ninsns;
 		bpf_jit_build_prologue(ctx);
 		ctx->prologue_len = ctx->ninsns - ctx->body_len;
@@ -116,10 +101,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 						     &jit_data->image,
 						     sizeof(long),
 						     bpf_fill_ill_insns);
-			if (!jit_data->header) {
-				prog = orig_prog;
-				goto out_offset;
-			}
+			if (!jit_data->header)
+				goto out_err;
 
 			ctx->insns = (u32 *)jit_data->image;
 			/*
@@ -134,8 +117,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		pr_err("bpf-jit: image did not converge in <%d passes!\n", i);
 		if (jit_data->header)
 			bpf_jit_binary_free(jit_data->header);
-		prog = orig_prog;
-		goto out_offset;
+		goto out_err;
 	}
 
 	if (extable_size)
@@ -148,8 +130,7 @@ skip_init_ctx:
 	bpf_jit_build_prologue(ctx);
 	if (build_body(ctx, extra_pass, NULL)) {
 		bpf_jit_binary_free(jit_data->header);
-		prog = orig_prog;
-		goto out_offset;
+		goto out_err;
 	}
 	bpf_jit_build_epilogue(ctx);
 
@@ -160,20 +141,19 @@ skip_init_ctx:
 			{ extern int machine_restart(char *); machine_restart(""); }
 	}
 
+	if (!prog->is_func || extra_pass) {
+		if (bpf_jit_binary_lock_ro(jit_data->header)) {
+			bpf_jit_binary_free(jit_data->header);
+			goto out_err;
+		}
+		bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns);
+	}
+
 	prog->bpf_func = (void *)ctx->insns;
 	prog->jited = 1;
 	prog->jited_len = prog_size;
 
-	bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns);
-
 	if (!prog->is_func || extra_pass) {
-		if (bpf_jit_binary_lock_ro(jit_data->header)) {
-			bpf_jit_binary_free(jit_data->header);
-			prog->bpf_func = NULL;
-			prog->jited = 0;
-			prog->jited_len = 0;
-			goto out_offset;
-		}
 		prologue_len = ctx->epilogue_offset - ctx->body_len;
 		for (i = 0; i < prog->len; i++)
 			ctx->offset[i] += prologue_len;
@@ -183,14 +163,19 @@ out_offset:
 		kfree(jit_data);
 		prog->aux->jit_data = NULL;
 	}
-out:
+
 	if (HPPA_JIT_REBOOT)
 		{ extern int machine_restart(char *); machine_restart(""); }
 
-	if (tmp_blinded)
-		bpf_jit_prog_release_other(prog, prog == orig_prog ?
-					   tmp : orig_prog);
 	return prog;
+
+out_err:
+	if (extra_pass) {
+		prog->bpf_func = NULL;
+		prog->jited = 0;
+		prog->jited_len = 0;
+	}
+	goto out_offset;
 }
 
 u64 hppa_div64(u64 div, u64 divisor)
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 50103b3794fb..2bae4699e78f 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -177,9 +177,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	void __percpu *priv_stack_ptr = NULL;
 	struct bpf_binary_header *fhdr = NULL;
 	struct bpf_binary_header *hdr = NULL;
-	struct bpf_prog *org_fp = fp;
-	struct bpf_prog *tmp_fp = NULL;
-	bool bpf_blinded = false;
 	bool extra_pass = false;
 	u8 *fimage = NULL;
 	u32 *fcode_base = NULL;
@@ -187,24 +184,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	u32 fixup_len;
 
 	if (!fp->jit_requested)
-		return org_fp;
-
-	tmp_fp = bpf_jit_blind_constants(org_fp);
-	if (IS_ERR(tmp_fp))
-		return org_fp;
-
-	if (tmp_fp != org_fp) {
-		bpf_blinded = true;
-		fp = tmp_fp;
-	}
+		return fp;
 
 	jit_data = fp->aux->jit_data;
 	if (!jit_data) {
 		jit_data = kzalloc_obj(*jit_data);
-		if (!jit_data) {
-			fp = org_fp;
-			goto out;
-		}
+		if (!jit_data)
+			return fp;
 		fp->aux->jit_data = jit_data;
 	}
 
@@ -219,10 +205,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
 							2 * PRIV_STACK_GUARD_SZ;
 		priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_size, 16, GFP_KERNEL);
-		if (!priv_stack_ptr) {
-			fp = org_fp;
+		if (!priv_stack_ptr)
 			goto out_priv_stack;
-		}
 
 		priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_size);
 		fp->aux->priv_stack_ptr = priv_stack_ptr;
@@ -249,10 +233,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	}
 
 	addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
-	if (addrs == NULL) {
-		fp = org_fp;
-		goto out_addrs;
-	}
+	if (addrs == NULL)
+		goto out_err;
 
 	memset(&cgctx, 0, sizeof(struct codegen_context));
 	bpf_jit_init_reg_mapping(&cgctx);
@@ -279,11 +261,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	}
 
 	/* Scouting faux-generate pass 0 */
-	if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
+	if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false))
 		/* We hit something illegal or unsupported. */
-		fp = org_fp;
-		goto out_addrs;
-	}
+		goto out_err;
 
 	/*
 	 * If we have seen a tail call, we need a second pass.
@@ -294,10 +274,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	 */
 	if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
 		cgctx.idx = 0;
-		if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
-			fp = org_fp;
-			goto out_addrs;
-		}
+		if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false))
+			goto out_err;
 	}
 
 	bpf_jit_realloc_regs(&cgctx);
@@ -318,10 +296,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 
 	fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
 					      bpf_jit_fill_ill_insns);
-	if (!fhdr) {
-		fp = org_fp;
-		goto out_addrs;
-	}
+	if (!fhdr)
+		goto out_err;
 
 	if (extable_len)
 		fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
@@ -340,8 +316,7 @@ skip_init_ctx:
 				       extra_pass)) {
 			bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
 			bpf_jit_binary_pack_free(fhdr, hdr);
-			fp = org_fp;
-			goto out_addrs;
+			goto out_err;
 		}
 		bpf_jit_build_epilogue(code_base, &cgctx);
 
@@ -363,15 +338,16 @@ skip_init_ctx:
 	((u64 *)image)[1] = local_paca->kernel_toc;
 #endif
 
+	if (!fp->is_func || extra_pass) {
+		if (bpf_jit_binary_pack_finalize(fhdr, hdr))
+			goto out_err;
+	}
+
 	fp->bpf_func = (void *)fimage;
 	fp->jited = 1;
 	fp->jited_len = cgctx.idx * 4 + FUNCTION_DESCR_SIZE;
 
 	if (!fp->is_func || extra_pass) {
-		if (bpf_jit_binary_pack_finalize(fhdr, hdr)) {
-			fp = org_fp;
-			goto out_addrs;
-		}
 		bpf_prog_fill_jited_linfo(fp, addrs);
 		/*
 		 * On ABI V1, executable code starts after the function
@@ -398,11 +374,15 @@ out_priv_stack:
 		jit_data->hdr = hdr;
 	}
 
-out:
-	if (bpf_blinded)
-		bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
-
 	return fp;
+
+out_err:
+	if (extra_pass) {
+		fp->bpf_func = NULL;
+		fp->jited = 0;
+		fp->jited_len = 0;
+	}
+	goto out_addrs;
 }
 
 /*
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index f7fd4afc3ca3..36f0aea8096d 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -44,29 +44,19 @@ bool bpf_jit_needs_zext(void)
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	unsigned int prog_size = 0, extable_size = 0;
-	bool tmp_blinded = false, extra_pass = false;
-	struct bpf_prog *tmp, *orig_prog = prog;
+	bool extra_pass = false;
 	int pass = 0, prev_ninsns = 0, i;
 	struct rv_jit_data *jit_data;
 	struct rv_jit_context *ctx;
 
 	if (!prog->jit_requested)
-		return orig_prog;
-
-	tmp = bpf_jit_blind_constants(prog);
-	if (IS_ERR(tmp))
-		return orig_prog;
-	if (tmp != prog) {
-		tmp_blinded = true;
-		prog = tmp;
-	}
+		return prog;
 
 	jit_data = prog->aux->jit_data;
 	if (!jit_data) {
 		jit_data = kzalloc_obj(*jit_data);
 		if (!jit_data) {
-			prog = orig_prog;
-			goto out;
+			return prog;
 		}
 		prog->aux->jit_data = jit_data;
 	}
@@ -83,15 +73,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	ctx->user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
 	ctx->prog = prog;
 	ctx->offset = kzalloc_objs(int, prog->len);
-	if (!ctx->offset) {
-		prog = orig_prog;
+	if (!ctx->offset)
 		goto out_offset;
-	}
 
-	if (build_body(ctx, extra_pass, NULL)) {
-		prog = orig_prog;
+	if (build_body(ctx, extra_pass, NULL))
 		goto out_offset;
-	}
 
 	for (i = 0; i < prog->len; i++) {
 		prev_ninsns += 32;
@@ -105,10 +91,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		bpf_jit_build_prologue(ctx, bpf_is_subprog(prog));
 		ctx->prologue_len = ctx->ninsns;
 
-		if (build_body(ctx, extra_pass, ctx->offset)) {
-			prog = orig_prog;
+		if (build_body(ctx, extra_pass, ctx->offset))
 			goto out_offset;
-		}
 
 		ctx->epilogue_offset = ctx->ninsns;
 		bpf_jit_build_epilogue(ctx);
@@ -126,10 +110,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 							  &jit_data->ro_image, sizeof(u32),
 							  &jit_data->header, &jit_data->image,
 							  bpf_fill_ill_insns);
-			if (!jit_data->ro_header) {
-				prog = orig_prog;
+			if (!jit_data->ro_header)
 				goto out_offset;
-			}
 
 			/*
 			 * Use the image(RW) for writing the JITed instructions. But also save
@@ -150,7 +132,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 
 	if (i == NR_JIT_ITERATIONS) {
 		pr_err("bpf-jit: image did not converge in <%d passes!\n", i);
-		prog = orig_prog;
 		goto out_free_hdr;
 	}
 
@@ -163,26 +144,27 @@ skip_init_ctx:
 	ctx->nexentries = 0;
 
 	bpf_jit_build_prologue(ctx, bpf_is_subprog(prog));
-	if (build_body(ctx, extra_pass, NULL)) {
-		prog = orig_prog;
+	if (build_body(ctx, extra_pass, NULL))
 		goto out_free_hdr;
-	}
 	bpf_jit_build_epilogue(ctx);
 
 	if (bpf_jit_enable > 1)
 		bpf_jit_dump(prog->len, prog_size, pass, ctx->insns);
 
-	prog->bpf_func = (void *)ctx->ro_insns + cfi_get_offset();
-	prog->jited = 1;
-	prog->jited_len = prog_size - cfi_get_offset();
-
 	if (!prog->is_func || extra_pass) {
 		if (WARN_ON(bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header))) {
 			/* ro_header has been freed */
 			jit_data->ro_header = NULL;
-			prog = orig_prog;
-			goto out_offset;
+			jit_data->header = NULL;
+			goto out_free_hdr;
 		}
+	}
+
+	prog->bpf_func = (void *)ctx->ro_insns + cfi_get_offset();
+	prog->jited = 1;
+	prog->jited_len = prog_size - cfi_get_offset();
+
+	if (!prog->is_func || extra_pass) {
 		for (i = 0; i < prog->len; i++)
 			ctx->offset[i] = ninsns_rvoff(ctx->offset[i]);
 		bpf_prog_fill_jited_linfo(prog, ctx->offset);
@@ -191,14 +173,15 @@ out_offset:
 		kfree(jit_data);
 		prog->aux->jit_data = NULL;
 	}
-out:
 
-	if (tmp_blinded)
-		bpf_jit_prog_release_other(prog, prog == orig_prog ?
-					   tmp : orig_prog);
 	return prog;
 
 out_free_hdr:
+	if (extra_pass) {
+		prog->bpf_func = NULL;
+		prog->jited = 0;
+		prog->jited_len = 0;
+	}
 	if (jit_data->header) {
 		bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size,
 				   sizeof(jit_data->header->size));
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index d08d159b6319..2dfc279b1be2 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2314,36 +2314,20 @@ static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
  */
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 {
-	struct bpf_prog *tmp, *orig_fp = fp;
 	struct bpf_binary_header *header;
 	struct s390_jit_data *jit_data;
-	bool tmp_blinded = false;
 	bool extra_pass = false;
 	struct bpf_jit jit;
 	int pass;
 
 	if (!fp->jit_requested)
-		return orig_fp;
-
-	tmp = bpf_jit_blind_constants(fp);
-	/*
-	 * If blinding was requested and we failed during blinding,
-	 * we must fall back to the interpreter.
-	 */
-	if (IS_ERR(tmp))
-		return orig_fp;
-	if (tmp != fp) {
-		tmp_blinded = true;
-		fp = tmp;
-	}
+		return fp;
 
 	jit_data = fp->aux->jit_data;
 	if (!jit_data) {
 		jit_data = kzalloc_obj(*jit_data);
-		if (!jit_data) {
-			fp = orig_fp;
-			goto out;
-		}
+		if (!jit_data)
+			return fp;
 		fp->aux->jit_data = jit_data;
 	}
 	if (jit_data->ctx.addrs) {
@@ -2356,34 +2340,27 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 
 	memset(&jit, 0, sizeof(jit));
 	jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
-	if (jit.addrs == NULL) {
-		fp = orig_fp;
-		goto free_addrs;
-	}
+	if (jit.addrs == NULL)
+		goto out_err;
 	/*
 	 * Three initial passes:
 	 *   - 1/2: Determine clobbered registers
 	 *   - 3:   Calculate program size and addrs array
 	 */
 	for (pass = 1; pass <= 3; pass++) {
-		if (bpf_jit_prog(&jit, fp, extra_pass)) {
-			fp = orig_fp;
-			goto free_addrs;
-		}
+		if (bpf_jit_prog(&jit, fp, extra_pass))
+			goto out_err;
 	}
 	/*
 	 * Final pass: Allocate and generate program
 	 */
 	header = bpf_jit_alloc(&jit, fp);
-	if (!header) {
-		fp = orig_fp;
-		goto free_addrs;
-	}
+	if (!header)
+		goto out_err;
 skip_init_ctx:
 	if (bpf_jit_prog(&jit, fp, extra_pass)) {
 		bpf_jit_binary_free(header);
-		fp = orig_fp;
-		goto free_addrs;
+		goto out_err;
 	}
 	if (bpf_jit_enable > 1) {
 		bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
@@ -2392,8 +2369,7 @@ skip_init_ctx:
 	if (!fp->is_func || extra_pass) {
 		if (bpf_jit_binary_lock_ro(header)) {
 			bpf_jit_binary_free(header);
-			fp = orig_fp;
-			goto free_addrs;
+			goto out_err;
 		}
 	} else {
 		jit_data->header = header;
@@ -2411,11 +2387,16 @@ free_addrs:
 		kfree(jit_data);
 		fp->aux->jit_data = NULL;
 	}
-out:
-	if (tmp_blinded)
-		bpf_jit_prog_release_other(fp, fp == orig_fp ?
-					   tmp : orig_fp);
+
 	return fp;
+
+out_err:
+	if (extra_pass) {
+		fp->bpf_func = NULL;
+		fp->jited = 0;
+		fp->jited_len = 0;
+	}
+	goto free_addrs;
 }
 
 bool bpf_jit_supports_kfunc_call(void)
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index b23d1c645ae5..e83e29137566 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1479,37 +1479,22 @@ struct sparc64_jit_data {
 
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
-	struct bpf_prog *tmp, *orig_prog = prog;
 	struct sparc64_jit_data *jit_data;
 	struct bpf_binary_header *header;
 	u32 prev_image_size, image_size;
-	bool tmp_blinded = false;
 	bool extra_pass = false;
 	struct jit_ctx ctx;
 	u8 *image_ptr;
 	int pass, i;
 
 	if (!prog->jit_requested)
-		return orig_prog;
-
-	tmp = bpf_jit_blind_constants(prog);
-	/* If blinding was requested and we failed during blinding,
-	 * we must fall back to the interpreter.
-	 */
-	if (IS_ERR(tmp))
-		return orig_prog;
-	if (tmp != prog) {
-		tmp_blinded = true;
-		prog = tmp;
-	}
+		return prog;
 
 	jit_data = prog->aux->jit_data;
 	if (!jit_data) {
 		jit_data = kzalloc_obj(*jit_data);
-		if (!jit_data) {
-			prog = orig_prog;
-			goto out;
-		}
+		if (!jit_data)
+			return prog;
 		prog->aux->jit_data = jit_data;
 	}
 	if (jit_data->ctx.offset) {
@@ -1527,10 +1512,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	ctx.prog = prog;
 
 	ctx.offset = kmalloc_array(prog->len, sizeof(unsigned int), GFP_KERNEL);
-	if (ctx.offset == NULL) {
-		prog = orig_prog;
-		goto out_off;
-	}
+	if (ctx.offset == NULL)
+		goto out_err;
 
 	/* Longest sequence emitted is for bswap32, 12 instructions.  Pre-cook
 	 * the offset array so that we converge faster.
@@ -1543,10 +1526,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		ctx.idx = 0;
 
 		build_prologue(&ctx);
-		if (build_body(&ctx)) {
-			prog = orig_prog;
-			goto out_off;
-		}
+		if (build_body(&ctx))
+			goto out_err;
 		build_epilogue(&ctx);
 
 		if (bpf_jit_enable > 1)
@@ -1569,10 +1550,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	image_size = sizeof(u32) * ctx.idx;
 	header = bpf_jit_binary_alloc(image_size, &image_ptr,
 				      sizeof(u32), jit_fill_hole);
-	if (header == NULL) {
-		prog = orig_prog;
-		goto out_off;
-	}
+	if (header == NULL)
+		goto out_err;
 
 	ctx.image = (u32 *)image_ptr;
 skip_init_ctx:
@@ -1582,8 +1561,7 @@ skip_init_ctx:
 
 	if (build_body(&ctx)) {
 		bpf_jit_binary_free(header);
-		prog = orig_prog;
-		goto out_off;
+		goto out_err;
 	}
 
 	build_epilogue(&ctx);
@@ -1592,8 +1570,7 @@ skip_init_ctx:
 		pr_err("bpf_jit: Failed to converge, prev_size=%u size=%d\n",
 		       prev_image_size, ctx.idx * 4);
 		bpf_jit_binary_free(header);
-		prog = orig_prog;
-		goto out_off;
+		goto out_err;
 	}
 
 	if (bpf_jit_enable > 1)
@@ -1604,8 +1581,7 @@ skip_init_ctx:
 	if (!prog->is_func || extra_pass) {
 		if (bpf_jit_binary_lock_ro(header)) {
 			bpf_jit_binary_free(header);
-			prog = orig_prog;
-			goto out_off;
+			goto out_err;
 		}
 	} else {
 		jit_data->ctx = ctx;
@@ -1624,9 +1600,14 @@ out_off:
 		kfree(jit_data);
 		prog->aux->jit_data = NULL;
 	}
-out:
-	if (tmp_blinded)
-		bpf_jit_prog_release_other(prog, prog == orig_prog ?
-					   tmp : orig_prog);
+
 	return prog;
+
+out_err:
+	if (extra_pass) {
+		prog->bpf_func = NULL;
+		prog->jited = 0;
+		prog->jited_len = 0;
+	}
+	goto out_off;
 }
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e9b78040d703..77d00a8dec87 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -3717,13 +3717,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	struct bpf_binary_header *rw_header = NULL;
 	struct bpf_binary_header *header = NULL;
-	struct bpf_prog *tmp, *orig_prog = prog;
 	void __percpu *priv_stack_ptr = NULL;
 	struct x64_jit_data *jit_data;
 	int priv_stack_alloc_sz;
 	int proglen, oldproglen = 0;
 	struct jit_context ctx = {};
-	bool tmp_blinded = false;
 	bool extra_pass = false;
 	bool padding = false;
 	u8 *rw_image = NULL;
@@ -3733,27 +3731,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	int i;
 
 	if (!prog->jit_requested)
-		return orig_prog;
-
-	tmp = bpf_jit_blind_constants(prog);
-	/*
-	 * If blinding was requested and we failed during blinding,
-	 * we must fall back to the interpreter.
-	 */
-	if (IS_ERR(tmp))
-		return orig_prog;
-	if (tmp != prog) {
-		tmp_blinded = true;
-		prog = tmp;
-	}
+		return prog;
 
 	jit_data = prog->aux->jit_data;
 	if (!jit_data) {
 		jit_data = kzalloc_obj(*jit_data);
-		if (!jit_data) {
-			prog = orig_prog;
-			goto out;
-		}
+		if (!jit_data)
+			return prog;
 		prog->aux->jit_data = jit_data;
 	}
 	priv_stack_ptr = prog->aux->priv_stack_ptr;
@@ -3765,10 +3749,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 8) +
 				      2 * PRIV_STACK_GUARD_SZ;
 		priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 8, GFP_KERNEL);
-		if (!priv_stack_ptr) {
-			prog = orig_prog;
+		if (!priv_stack_ptr)
 			goto out_priv_stack;
-		}
 
 		priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz);
 		prog->aux->priv_stack_ptr = priv_stack_ptr;
@@ -3786,10 +3768,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		goto skip_init_addrs;
 	}
 	addrs = kvmalloc_objs(*addrs, prog->len + 1);
-	if (!addrs) {
-		prog = orig_prog;
+	if (!addrs)
 		goto out_addrs;
-	}
 
 	/*
 	 * Before first pass, make a rough estimation of addrs[]
@@ -3820,8 +3800,6 @@ out_image:
 						   sizeof(rw_header->size));
 				bpf_jit_binary_pack_free(header, rw_header);
 			}
-			/* Fall back to interpreter mode */
-			prog = orig_prog;
 			if (extra_pass) {
 				prog->bpf_func = NULL;
 				prog->jited = 0;
@@ -3852,10 +3830,8 @@ out_image:
 			header = bpf_jit_binary_pack_alloc(roundup(proglen, align) + extable_size,
 							   &image, align, &rw_header, &rw_image,
 							   jit_fill_hole);
-			if (!header) {
-				prog = orig_prog;
+			if (!header)
 				goto out_addrs;
-			}
 			prog->aux->extable = (void *) image + roundup(proglen, align);
 		}
 		oldproglen = proglen;
@@ -3908,8 +3884,6 @@ out_image:
 		prog->bpf_func = (void *)image + cfi_get_offset();
 		prog->jited = 1;
 		prog->jited_len = proglen - cfi_get_offset();
-	} else {
-		prog = orig_prog;
 	}
 
 	if (!image || !prog->is_func || extra_pass) {
@@ -3925,10 +3899,7 @@ out_priv_stack:
 		kfree(jit_data);
 		prog->aux->jit_data = NULL;
 	}
-out:
-	if (tmp_blinded)
-		bpf_jit_prog_release_other(prog, prog == orig_prog ?
-					   tmp : orig_prog);
+
 	return prog;
 }
 
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index dda423025c3d..5f259577614a 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2521,35 +2521,19 @@ bool bpf_jit_needs_zext(void)
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	struct bpf_binary_header *header = NULL;
-	struct bpf_prog *tmp, *orig_prog = prog;
 	int proglen, oldproglen = 0;
 	struct jit_context ctx = {};
-	bool tmp_blinded = false;
 	u8 *image = NULL;
 	int *addrs;
 	int pass;
 	int i;
 
 	if (!prog->jit_requested)
-		return orig_prog;
-
-	tmp = bpf_jit_blind_constants(prog);
-	/*
-	 * If blinding was requested and we failed during blinding,
-	 * we must fall back to the interpreter.
-	 */
-	if (IS_ERR(tmp))
-		return orig_prog;
-	if (tmp != prog) {
-		tmp_blinded = true;
-		prog = tmp;
-	}
+		return prog;
 
 	addrs = kmalloc_objs(*addrs, prog->len);
-	if (!addrs) {
-		prog = orig_prog;
-		goto out;
-	}
+	if (!addrs)
+		return prog;
 
 	/*
 	 * Before first pass, make a rough estimation of addrs[]
@@ -2574,7 +2558,6 @@ out_image:
 			image = NULL;
 			if (header)
 				bpf_jit_binary_free(header);
-			prog = orig_prog;
 			goto out_addrs;
 		}
 		if (image) {
@@ -2588,10 +2571,8 @@ out_image:
 		if (proglen == oldproglen) {
 			header = bpf_jit_binary_alloc(proglen, &image,
 						      1, jit_fill_hole);
-			if (!header) {
-				prog = orig_prog;
+			if (!header)
 				goto out_addrs;
-			}
 		}
 		oldproglen = proglen;
 		cond_resched();
@@ -2604,16 +2585,10 @@ out_image:
 		prog->bpf_func = (void *)image;
 		prog->jited = 1;
 		prog->jited_len = proglen;
-	} else {
-		prog = orig_prog;
 	}
 
 out_addrs:
 	kfree(addrs);
-out:
-	if (tmp_blinded)
-		bpf_jit_prog_release_other(prog, prog == orig_prog ?
-					   tmp : orig_prog);
 	return prog;
 }
 
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f552170eacf4..9fa4d4090093 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1184,6 +1184,18 @@ static inline bool bpf_dump_raw_ok(const struct cred *cred)
 
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
+
+#ifdef CONFIG_BPF_SYSCALL
+struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
+				     const struct bpf_insn *patch, u32 len);
+#else
+static inline struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
+						   const struct bpf_insn *patch, u32 len)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
+#endif /* CONFIG_BPF_SYSCALL */
+
 int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt);
 
 static inline bool xdp_return_frame_no_direct(void)
@@ -1310,9 +1322,14 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog,
 
 const char *bpf_jit_get_prog_name(struct bpf_prog *prog);
 
-struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp);
+struct bpf_prog *bpf_jit_blind_constants(struct bpf_verifier_env *env, struct bpf_prog *prog);
 void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other);
 
+static inline bool bpf_prog_need_blind(const struct bpf_prog *prog)
+{
+	return prog->blinding_requested && !prog->blinded;
+}
+
 static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
 				u32 pass, void *image)
 {
@@ -1451,6 +1468,20 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
 {
 }
 
+static inline bool bpf_prog_need_blind(const struct bpf_prog *prog)
+{
+	return false;
+}
+
+static inline
+struct bpf_prog *bpf_jit_blind_constants(struct bpf_verifier_env *env, struct bpf_prog *prog)
+{
+	return prog;
+}
+
+static inline void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other)
+{
+}
 #endif /* CONFIG_BPF_JIT */
 
 void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 066b86e7233c..fc9fb3c07866 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1508,7 +1508,11 @@ static void adjust_insn_arrays(struct bpf_prog *prog, u32 off, u32 len)
 #endif
 }
 
-struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
+/*
+ * Now this function is used only to blind the main prog and must be invoked only when
+ * bpf_prog_need_blind() returns true.
+ */
+struct bpf_prog *bpf_jit_blind_constants(struct bpf_verifier_env *env, struct bpf_prog *prog)
 {
 	struct bpf_insn insn_buff[16], aux[2];
 	struct bpf_prog *clone, *tmp;
@@ -1516,13 +1520,17 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
 	struct bpf_insn *insn;
 	int i, rewritten;
 
-	if (!prog->blinding_requested || prog->blinded)
-		return prog;
+	if (WARN_ON_ONCE(env && env->prog != prog))
+		return ERR_PTR(-EINVAL);
 
 	clone = bpf_prog_clone_create(prog, GFP_USER);
 	if (!clone)
 		return ERR_PTR(-ENOMEM);
 
+	/* make sure bpf_patch_insn_data() patches the correct prog */
+	if (env)
+		env->prog = clone;
+
 	insn_cnt = clone->len;
 	insn = clone->insnsi;
 
@@ -1550,21 +1558,35 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
 		if (!rewritten)
 			continue;
 
-		tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
-		if (IS_ERR(tmp)) {
+		if (env)
+			tmp = bpf_patch_insn_data(env, i, insn_buff, rewritten);
+		else
+			tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
+
+		if (IS_ERR_OR_NULL(tmp)) {
+			if (env)
+				/* restore the original prog */
+				env->prog = prog;
 			/* Patching may have repointed aux->prog during
 			 * realloc from the original one, so we need to
 			 * fix it up here on error.
 			 */
 			bpf_jit_prog_release_other(prog, clone);
-			return tmp;
+			return IS_ERR(tmp) ? tmp : ERR_PTR(-ENOMEM);
 		}
 
 		clone = tmp;
 		insn_delta = rewritten - 1;
 
-		/* Instructions arrays must be updated using absolute xlated offsets */
-		adjust_insn_arrays(clone, prog->aux->subprog_start + i, rewritten);
+		if (env)
+			env->prog = clone;
+		else
+			/*
+			 * Instructions arrays must be updated using absolute xlated offsets.
+			 * The arrays have already been adjusted by bpf_patch_insn_data() when
+			 * env is not NULL.
+			 */
+			adjust_insn_arrays(clone, i, rewritten);
 
 		/* Walk new program and skip insns we just inserted. */
 		insn = clone->insnsi + i + insn_delta;
@@ -2533,6 +2555,35 @@ static bool bpf_prog_select_interpreter(struct bpf_prog *fp)
 	return select_interpreter;
 }
 
+static struct bpf_prog *bpf_prog_jit_compile(struct bpf_prog *prog)
+{
+#ifdef CONFIG_BPF_JIT
+	struct bpf_prog *orig_prog;
+
+	if (!bpf_prog_need_blind(prog))
+		return bpf_int_jit_compile(prog);
+
+	orig_prog = prog;
+	prog = bpf_jit_blind_constants(NULL, prog);
+	/*
+	 * If blinding was requested and we failed during blinding, we must fall
+	 * back to the interpreter.
+	 */
+	if (IS_ERR(prog))
+		return orig_prog;
+
+	prog = bpf_int_jit_compile(prog);
+	if (prog->jited) {
+		bpf_jit_prog_release_other(prog, orig_prog);
+		return prog;
+	}
+
+	bpf_jit_prog_release_other(orig_prog, prog);
+	prog = orig_prog;
+#endif
+	return prog;
+}
+
 /**
  *	bpf_prog_select_runtime - select exec runtime for BPF program
  *	@fp: bpf_prog populated with BPF program
@@ -2572,7 +2623,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 		if (*err)
 			return fp;
 
-		fp = bpf_int_jit_compile(fp);
+		fp = bpf_prog_jit_compile(fp);
 		bpf_prog_jit_attempt_done(fp);
 		if (!fp->jited && jit_needed) {
 			*err = -ENOTSUPP;
diff --git a/kernel/bpf/fixups.c b/kernel/bpf/fixups.c
index dd00a680e4ea..721b830b5ef2 100644
--- a/kernel/bpf/fixups.c
+++ b/kernel/bpf/fixups.c
@@ -232,8 +232,8 @@ static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
 	}
 }
 
-static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
-					    const struct bpf_insn *patch, u32 len)
+struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
+				     const struct bpf_insn *patch, u32 len)
 {
 	struct bpf_prog *new_prog;
 	struct bpf_insn_aux_data *new_data = NULL;
@@ -973,7 +973,47 @@ patch_insn_buf:
 	return 0;
 }
 
-int bpf_jit_subprogs(struct bpf_verifier_env *env)
+static u32 *bpf_dup_subprog_starts(struct bpf_verifier_env *env)
+{
+	u32 *starts = NULL;
+
+	starts = kvmalloc_objs(u32, env->subprog_cnt, GFP_KERNEL_ACCOUNT);
+	if (starts) {
+		for (int i = 0; i < env->subprog_cnt; i++)
+			starts[i] = env->subprog_info[i].start;
+	}
+	return starts;
+}
+
+static void bpf_restore_subprog_starts(struct bpf_verifier_env *env, u32 *orig_starts)
+{
+	for (int i = 0; i < env->subprog_cnt; i++)
+		env->subprog_info[i].start = orig_starts[i];
+	/* restore the start of fake 'exit' subprog as well */
+	env->subprog_info[env->subprog_cnt].start = env->prog->len;
+}
+
+static struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env)
+{
+	size_t size;
+	void *new_aux;
+
+	size = array_size(sizeof(struct bpf_insn_aux_data), env->prog->len);
+	new_aux = __vmalloc(size, GFP_KERNEL_ACCOUNT);
+	if (new_aux)
+		memcpy(new_aux, env->insn_aux_data, size);
+	return new_aux;
+}
+
+static void bpf_restore_insn_aux_data(struct bpf_verifier_env *env,
+				      struct bpf_insn_aux_data *orig_insn_aux)
+{
+	/* the expanded elements are zero-filled, so no special handling is required */
+	vfree(env->insn_aux_data);
+	env->insn_aux_data = orig_insn_aux;
+}
+
+static int jit_subprogs(struct bpf_verifier_env *env)
 {
 	struct bpf_prog *prog = env->prog, **func, *tmp;
 	int i, j, subprog_start, subprog_end = 0, len, subprog;
@@ -981,10 +1021,6 @@ int bpf_jit_subprogs(struct bpf_verifier_env *env)
 	struct bpf_insn *insn;
 	void *old_bpf_func;
 	int err, num_exentries;
-	int old_len, subprog_start_adjustment = 0;
-
-	if (env->subprog_cnt <= 1)
-		return 0;
 
 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
@@ -1053,10 +1089,11 @@ int bpf_jit_subprogs(struct bpf_verifier_env *env)
 			goto out_free;
 		func[i]->is_func = 1;
 		func[i]->sleepable = prog->sleepable;
+		func[i]->blinded = prog->blinded;
 		func[i]->aux->func_idx = i;
 		/* Below members will be freed only at prog->aux */
 		func[i]->aux->btf = prog->aux->btf;
-		func[i]->aux->subprog_start = subprog_start + subprog_start_adjustment;
+		func[i]->aux->subprog_start = subprog_start;
 		func[i]->aux->func_info = prog->aux->func_info;
 		func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
 		func[i]->aux->poke_tab = prog->aux->poke_tab;
@@ -1113,15 +1150,7 @@ int bpf_jit_subprogs(struct bpf_verifier_env *env)
 		func[i]->aux->token = prog->aux->token;
 		if (!i)
 			func[i]->aux->exception_boundary = env->seen_exception;
-
-		/*
-		 * To properly pass the absolute subprog start to jit
-		 * all instruction adjustments should be accumulated
-		 */
-		old_len = func[i]->len;
 		func[i] = bpf_int_jit_compile(func[i]);
-		subprog_start_adjustment += func[i]->len - old_len;
-
 		if (!func[i]->jited) {
 			err = -ENOTSUPP;
 			goto out_free;
@@ -1247,16 +1276,87 @@ out_free:
 	}
 	kfree(func);
 out_undo_insn:
+	bpf_prog_jit_attempt_done(prog);
+	return err;
+}
+
+int bpf_jit_subprogs(struct bpf_verifier_env *env)
+{
+	int err, i;
+	bool blinded = false;
+	struct bpf_insn *insn;
+	struct bpf_prog *prog, *orig_prog;
+	struct bpf_insn_aux_data *orig_insn_aux;
+	u32 *orig_subprog_starts;
+
+	if (env->subprog_cnt <= 1)
+		return 0;
+
+	prog = orig_prog = env->prog;
+	if (bpf_prog_need_blind(prog)) {
+		orig_insn_aux = bpf_dup_insn_aux_data(env);
+		if (!orig_insn_aux) {
+			err = -ENOMEM;
+			goto out_cleanup;
+		}
+		orig_subprog_starts = bpf_dup_subprog_starts(env);
+		if (!orig_subprog_starts) {
+			vfree(orig_insn_aux);
+			err = -ENOMEM;
+			goto out_cleanup;
+		}
+		prog = bpf_jit_blind_constants(env, prog);
+		if (IS_ERR(prog)) {
+			err = -ENOMEM;
+			prog = orig_prog;
+			goto out_restore;
+		}
+		blinded = true;
+	}
+
+	err = jit_subprogs(env);
+	if (err)
+		goto out_jit_err;
+
+	if (blinded) {
+		bpf_jit_prog_release_other(prog, orig_prog);
+		kvfree(orig_subprog_starts);
+		vfree(orig_insn_aux);
+	}
+
+	return 0;
+
+out_jit_err:
+	if (blinded) {
+		bpf_jit_prog_release_other(orig_prog, prog);
+		/* roll back to the clean original prog */
+		prog = env->prog = orig_prog;
+		goto out_restore;
+	} else {
+		if (err != -EFAULT) {
+			/*
+			 * We will fall back to interpreter mode when err is not -EFAULT, before
+			 * that, insn->off and insn->imm should be restored to their original
+			 * values since they were modified by jit_subprogs.
+			 */
+			for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+				if (!bpf_pseudo_call(insn))
+					continue;
+				insn->off = 0;
+				insn->imm = env->insn_aux_data[i].call_imm;
+			}
+		}
+		goto out_cleanup;
+	}
+
+out_restore:
+	bpf_restore_subprog_starts(env, orig_subprog_starts);
+	bpf_restore_insn_aux_data(env, orig_insn_aux);
+	kvfree(orig_subprog_starts);
+out_cleanup:
 	/* cleanup main prog to be interpreted */
 	prog->jit_requested = 0;
 	prog->blinding_requested = 0;
-	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
-		if (!bpf_pseudo_call(insn))
-			continue;
-		insn->off = 0;
-		insn->imm = env->insn_aux_data[i].call_imm;
-	}
-	bpf_prog_jit_attempt_done(prog);
 	return err;
 }
 
-- 
cgit v1.2.3


From d9ef13f72711f2dad64cd4445472ded98fb6c954 Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Thu, 16 Apr 2026 06:43:38 +0000
Subject: bpf: Pass bpf_verifier_env to JIT

Pass bpf_verifier_env to bpf_int_jit_compile(). The follow-up patch will
use env->insn_aux_data in the JIT stage to detect indirect jump targets.

Since bpf_prog_select_runtime() can be called by cbpf and lib/test_bpf.c
code without verifier, introduce helper __bpf_prog_select_runtime()
to accept the env parameter.

Remove the call to bpf_prog_select_runtime() in bpf_prog_load(), and
switch to call __bpf_prog_select_runtime() in the verifier, with env
variable passed. The original bpf_prog_select_runtime() is preserved for
cbpf and lib/test_bpf.c, where env is NULL.

Now all constants blinding calls are moved into the verifier, except
the cbpf and lib/test_bpf.c cases. The instructions arrays are adjusted
by bpf_patch_insn_data() function for normal cases, so there is no need
to call adjust_insn_arrays() in bpf_jit_blind_constants(). Remove it.

Reviewed-by: Anton Protopopov <a.s.protopopov@gmail.com> # v8
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com> # v12
Acked-by: Hengqi Chen <hengqi.chen@gmail.com> # v14
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Link: https://lore.kernel.org/r/20260416064341.151802-3-xukuohai@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/arc/net/bpf_jit_core.c      |  2 +-
 arch/arm/net/bpf_jit_32.c        |  2 +-
 arch/arm64/net/bpf_jit_comp.c    |  2 +-
 arch/loongarch/net/bpf_jit.c     |  2 +-
 arch/mips/net/bpf_jit_comp.c     |  2 +-
 arch/parisc/net/bpf_jit_core.c   |  2 +-
 arch/powerpc/net/bpf_jit_comp.c  |  2 +-
 arch/riscv/net/bpf_jit_core.c    |  2 +-
 arch/s390/net/bpf_jit_comp.c     |  2 +-
 arch/sparc/net/bpf_jit_comp_64.c |  2 +-
 arch/x86/net/bpf_jit_comp.c      |  2 +-
 arch/x86/net/bpf_jit_comp32.c    |  2 +-
 include/linux/filter.h           | 17 +++++++-
 kernel/bpf/core.c                | 86 ++++++++++++++++++++--------------------
 kernel/bpf/fixups.c              | 10 ++---
 kernel/bpf/syscall.c             |  4 --
 kernel/bpf/verifier.c            | 14 ++++---
 17 files changed, 84 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arc/net/bpf_jit_core.c b/arch/arc/net/bpf_jit_core.c
index 973ceae48675..639a2736f029 100644
--- a/arch/arc/net/bpf_jit_core.c
+++ b/arch/arc/net/bpf_jit_core.c
@@ -1400,7 +1400,7 @@ static struct bpf_prog *do_extra_pass(struct bpf_prog *prog)
  * (re)locations involved that their addresses are not known
  * during the first run.
  */
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
 {
 	vm_dump(prog);
 
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index e6b1bb2de627..1628b6fc70a4 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -2142,7 +2142,7 @@ bool bpf_jit_needs_zext(void)
 	return true;
 }
 
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
 {
 	struct bpf_binary_header *header;
 	struct jit_ctx ctx;
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index d310d1c35192..bd8757952507 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2000,7 +2000,7 @@ struct arm64_jit_data {
 	struct jit_ctx ctx;
 };
 
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
 {
 	int image_size, prog_size, extable_size, extable_align, extable_offset;
 	struct bpf_binary_header *header;
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index fcc8c0c29fb0..5149ce4cef7e 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1920,7 +1920,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
 	return ret < 0 ? ret : ret * LOONGARCH_INSN_SIZE;
 }
 
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
 {
 	bool extra_pass = false;
 	u8 *image_ptr, *ro_image_ptr;
diff --git a/arch/mips/net/bpf_jit_comp.c b/arch/mips/net/bpf_jit_comp.c
index d2b6c955f18e..6ee4abe6a1f7 100644
--- a/arch/mips/net/bpf_jit_comp.c
+++ b/arch/mips/net/bpf_jit_comp.c
@@ -909,7 +909,7 @@ bool bpf_jit_needs_zext(void)
 	return true;
 }
 
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
 {
 	struct bpf_binary_header *header = NULL;
 	struct jit_context ctx;
diff --git a/arch/parisc/net/bpf_jit_core.c b/arch/parisc/net/bpf_jit_core.c
index 35dca372b5df..172770132440 100644
--- a/arch/parisc/net/bpf_jit_core.c
+++ b/arch/parisc/net/bpf_jit_core.c
@@ -41,7 +41,7 @@ bool bpf_jit_needs_zext(void)
 	return true;
 }
 
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
 {
 	unsigned int prog_size = 0, extable_size = 0;
 	bool extra_pass = false;
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 2bae4699e78f..53ab97ad6074 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -162,7 +162,7 @@ static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size
 	}
 }
 
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *fp)
 {
 	u32 proglen;
 	u32 alloclen;
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 36f0aea8096d..4365d07aaf54 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -41,7 +41,7 @@ bool bpf_jit_needs_zext(void)
 	return true;
 }
 
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
 {
 	unsigned int prog_size = 0, extable_size = 0;
 	bool extra_pass = false;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 2dfc279b1be2..94128fe6be23 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2312,7 +2312,7 @@ static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
 /*
  * Compile eBPF program "fp"
  */
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *fp)
 {
 	struct bpf_binary_header *header;
 	struct s390_jit_data *jit_data;
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index e83e29137566..2fa0e9375127 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1477,7 +1477,7 @@ struct sparc64_jit_data {
 	struct jit_ctx ctx;
 };
 
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
 {
 	struct sparc64_jit_data *jit_data;
 	struct bpf_binary_header *header;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 77d00a8dec87..72d9a5faa230 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -3713,7 +3713,7 @@ struct x64_jit_data {
 #define MAX_PASSES 20
 #define PADDING_PASSES (MAX_PASSES - 5)
 
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
 {
 	struct bpf_binary_header *rw_header = NULL;
 	struct bpf_binary_header *header = NULL;
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 5f259577614a..852baf2e4db4 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2518,7 +2518,7 @@ bool bpf_jit_needs_zext(void)
 	return true;
 }
 
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
 {
 	struct bpf_binary_header *header = NULL;
 	int proglen, oldproglen = 0;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9fa4d4090093..1ec6d5ba64cc 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1108,6 +1108,8 @@ sk_filter_reason(struct sock *sk, struct sk_buff *skb)
 	return sk_filter_trim_cap(sk, skb, 1);
 }
 
+struct bpf_prog *__bpf_prog_select_runtime(struct bpf_verifier_env *env, struct bpf_prog *fp,
+					   int *err);
 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);
 void bpf_prog_free(struct bpf_prog *fp);
 
@@ -1153,7 +1155,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 	((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \
 	 (void *)__bpf_call_base)
 
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog);
 void bpf_jit_compile(struct bpf_prog *prog);
 bool bpf_jit_needs_zext(void);
 bool bpf_jit_inlines_helper_call(s32 imm);
@@ -1188,12 +1190,25 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 #ifdef CONFIG_BPF_SYSCALL
 struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
 				     const struct bpf_insn *patch, u32 len);
+struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env);
+void bpf_restore_insn_aux_data(struct bpf_verifier_env *env,
+			       struct bpf_insn_aux_data *orig_insn_aux);
 #else
 static inline struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
 						   const struct bpf_insn *patch, u32 len)
 {
 	return ERR_PTR(-ENOTSUPP);
 }
+
+static inline struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env)
+{
+	return NULL;
+}
+
+static inline void bpf_restore_insn_aux_data(struct bpf_verifier_env *env,
+					     struct bpf_insn_aux_data *orig_insn_aux)
+{
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index fc9fb3c07866..79361aa11757 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1491,23 +1491,6 @@ void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other)
 	bpf_prog_clone_free(fp_other);
 }
 
-static void adjust_insn_arrays(struct bpf_prog *prog, u32 off, u32 len)
-{
-#ifdef CONFIG_BPF_SYSCALL
-	struct bpf_map *map;
-	int i;
-
-	if (len <= 1)
-		return;
-
-	for (i = 0; i < prog->aux->used_map_cnt; i++) {
-		map = prog->aux->used_maps[i];
-		if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY)
-			bpf_insn_array_adjust(map, off, len);
-	}
-#endif
-}
-
 /*
  * Now this function is used only to blind the main prog and must be invoked only when
  * bpf_prog_need_blind() returns true.
@@ -1580,13 +1563,6 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_verifier_env *env, struct bp
 
 		if (env)
 			env->prog = clone;
-		else
-			/*
-			 * Instructions arrays must be updated using absolute xlated offsets.
-			 * The arrays have already been adjusted by bpf_patch_insn_data() when
-			 * env is not NULL.
-			 */
-			adjust_insn_arrays(clone, i, rewritten);
 
 		/* Walk new program and skip insns we just inserted. */
 		insn = clone->insnsi + i + insn_delta;
@@ -2555,47 +2531,55 @@ static bool bpf_prog_select_interpreter(struct bpf_prog *fp)
 	return select_interpreter;
 }
 
-static struct bpf_prog *bpf_prog_jit_compile(struct bpf_prog *prog)
+static struct bpf_prog *bpf_prog_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
 {
 #ifdef CONFIG_BPF_JIT
 	struct bpf_prog *orig_prog;
+	struct bpf_insn_aux_data *orig_insn_aux;
 
 	if (!bpf_prog_need_blind(prog))
-		return bpf_int_jit_compile(prog);
+		return bpf_int_jit_compile(env, prog);
+
+	if (env) {
+		/*
+		 * If env is not NULL, we are called from the end of bpf_check(), at this
+		 * point, only insn_aux_data is used after failure, so it should be restored
+		 * on failure.
+		 */
+		orig_insn_aux = bpf_dup_insn_aux_data(env);
+		if (!orig_insn_aux)
+			return prog;
+	}
 
 	orig_prog = prog;
-	prog = bpf_jit_blind_constants(NULL, prog);
+	prog = bpf_jit_blind_constants(env, prog);
 	/*
 	 * If blinding was requested and we failed during blinding, we must fall
 	 * back to the interpreter.
 	 */
 	if (IS_ERR(prog))
-		return orig_prog;
+		goto out_restore;
 
-	prog = bpf_int_jit_compile(prog);
+	prog = bpf_int_jit_compile(env, prog);
 	if (prog->jited) {
 		bpf_jit_prog_release_other(prog, orig_prog);
+		if (env)
+			vfree(orig_insn_aux);
 		return prog;
 	}
 
 	bpf_jit_prog_release_other(orig_prog, prog);
+
+out_restore:
 	prog = orig_prog;
+	if (env)
+		bpf_restore_insn_aux_data(env, orig_insn_aux);
 #endif
 	return prog;
 }
 
-/**
- *	bpf_prog_select_runtime - select exec runtime for BPF program
- *	@fp: bpf_prog populated with BPF program
- *	@err: pointer to error variable
- *
- * Try to JIT eBPF program, if JIT is not available, use interpreter.
- * The BPF program will be executed via bpf_prog_run() function.
- *
- * Return: the &fp argument along with &err set to 0 for success or
- * a negative errno code on failure
- */
-struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
+struct bpf_prog *__bpf_prog_select_runtime(struct bpf_verifier_env *env, struct bpf_prog *fp,
+					   int *err)
 {
 	/* In case of BPF to BPF calls, verifier did all the prep
 	 * work with regards to JITing, etc.
@@ -2623,7 +2607,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 		if (*err)
 			return fp;
 
-		fp = bpf_prog_jit_compile(fp);
+		fp = bpf_prog_jit_compile(env, fp);
 		bpf_prog_jit_attempt_done(fp);
 		if (!fp->jited && jit_needed) {
 			*err = -ENOTSUPP;
@@ -2649,6 +2633,22 @@ finalize:
 
 	return fp;
 }
+
+/**
+ *	bpf_prog_select_runtime - select exec runtime for BPF program
+ *	@fp: bpf_prog populated with BPF program
+ *	@err: pointer to error variable
+ *
+ * Try to JIT eBPF program, if JIT is not available, use interpreter.
+ * The BPF program will be executed via bpf_prog_run() function.
+ *
+ * Return: the &fp argument along with &err set to 0 for success or
+ * a negative errno code on failure
+ */
+struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
+{
+	return __bpf_prog_select_runtime(NULL, fp, err);
+}
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 
 static unsigned int __bpf_prog_ret1(const void *ctx,
@@ -3136,7 +3136,7 @@ const struct bpf_func_proto bpf_tail_call_proto = {
  * It is encouraged to implement bpf_int_jit_compile() instead, so that
  * eBPF and implicitly also cBPF can get JITed!
  */
-struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)
+struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
 {
 	return prog;
 }
diff --git a/kernel/bpf/fixups.c b/kernel/bpf/fixups.c
index 721b830b5ef2..6c86980cc9e8 100644
--- a/kernel/bpf/fixups.c
+++ b/kernel/bpf/fixups.c
@@ -993,7 +993,7 @@ static void bpf_restore_subprog_starts(struct bpf_verifier_env *env, u32 *orig_s
 	env->subprog_info[env->subprog_cnt].start = env->prog->len;
 }
 
-static struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env)
+struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env)
 {
 	size_t size;
 	void *new_aux;
@@ -1005,8 +1005,8 @@ static struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *
 	return new_aux;
 }
 
-static void bpf_restore_insn_aux_data(struct bpf_verifier_env *env,
-				      struct bpf_insn_aux_data *orig_insn_aux)
+void bpf_restore_insn_aux_data(struct bpf_verifier_env *env,
+			       struct bpf_insn_aux_data *orig_insn_aux)
 {
 	/* the expanded elements are zero-filled, so no special handling is required */
 	vfree(env->insn_aux_data);
@@ -1150,7 +1150,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		func[i]->aux->token = prog->aux->token;
 		if (!i)
 			func[i]->aux->exception_boundary = env->seen_exception;
-		func[i] = bpf_int_jit_compile(func[i]);
+		func[i] = bpf_int_jit_compile(env, func[i]);
 		if (!func[i]->jited) {
 			err = -ENOTSUPP;
 			goto out_free;
@@ -1194,7 +1194,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	}
 	for (i = 0; i < env->subprog_cnt; i++) {
 		old_bpf_func = func[i]->bpf_func;
-		tmp = bpf_int_jit_compile(func[i]);
+		tmp = bpf_int_jit_compile(env, func[i]);
 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
 			err = -ENOTSUPP;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b73b25c63073..a3c0214ca934 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3083,10 +3083,6 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 	if (err < 0)
 		goto free_used_maps;
 
-	prog = bpf_prog_select_runtime(prog, &err);
-	if (err < 0)
-		goto free_used_maps;
-
 	err = bpf_prog_mark_insn_arrays_ready(prog);
 	if (err < 0)
 		goto free_used_maps;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9e4980128151..e804e0da3500 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20155,6 +20155,14 @@ skip_full_check:
 
 	adjust_btf_func(env);
 
+	/* extension progs temporarily inherit the attach_type of their targets
+	   for verification purposes, so set it back to zero before returning
+	 */
+	if (env->prog->type == BPF_PROG_TYPE_EXT)
+		env->prog->expected_attach_type = 0;
+
+	env->prog = __bpf_prog_select_runtime(env, env->prog, &ret);
+
 err_release_maps:
 	if (ret)
 		release_insn_arrays(env);
@@ -20166,12 +20174,6 @@ err_release_maps:
 	if (!env->prog->aux->used_btfs)
 		release_btfs(env);
 
-	/* extension progs temporarily inherit the attach_type of their targets
-	   for verification purposes, so set it back to zero before returning
-	 */
-	if (env->prog->type == BPF_PROG_TYPE_EXT)
-		env->prog->expected_attach_type = 0;
-
 	*prog = env->prog;
 
 	module_put(env->attach_btf_mod);
-- 
cgit v1.2.3


From 07ae6c130b46cf5e3e1a7dc5c1889fefe9adc2d3 Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Thu, 16 Apr 2026 06:43:39 +0000
Subject: bpf: Add helper to detect indirect jump targets

Introduce helper bpf_insn_is_indirect_target to check whether a BPF
instruction is an indirect jump target.

Since the verifier knows which instructions are indirect jump targets,
add a new flag indirect_target to struct bpf_insn_aux_data to mark
them. The verifier sets this flag when verifying an indirect jump target
instruction, and the helper checks the flag to determine whether an
instruction is an indirect jump target.

Reviewed-by: Anton Protopopov <a.s.protopopov@gmail.com> #v8
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com> #v12
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Link: https://lore.kernel.org/r/20260416064341.151802-4-xukuohai@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          |  2 ++
 include/linux/bpf_verifier.h |  9 +++++----
 kernel/bpf/core.c            |  9 +++++++++
 kernel/bpf/fixups.c          | 12 ++++++++++++
 kernel/bpf/verifier.c        |  7 +++++++
 5 files changed, 35 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0136a108d083..b4b703c90ca9 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1541,6 +1541,8 @@ bool bpf_has_frame_pointer(unsigned long ip);
 int bpf_jit_charge_modmem(u32 size);
 void bpf_jit_uncharge_modmem(u32 size);
 bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
+bool bpf_insn_is_indirect_target(const struct bpf_verifier_env *env, const struct bpf_prog *prog,
+				 int insn_idx);
 #else
 static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
 					   struct bpf_trampoline *tr,
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 53e8664cb566..b148f816f25b 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -630,16 +630,17 @@ struct bpf_insn_aux_data {
 
 	/* below fields are initialized once */
 	unsigned int orig_idx; /* original instruction index */
-	bool jmp_point;
-	bool prune_point;
+	u32 jmp_point:1;
+	u32 prune_point:1;
 	/* ensure we check state equivalence and save state checkpoint and
 	 * this instruction, regardless of any heuristics
 	 */
-	bool force_checkpoint;
+	u32 force_checkpoint:1;
 	/* true if instruction is a call to a helper function that
 	 * accepts callback function as a parameter.
 	 */
-	bool calls_callback;
+	u32 calls_callback:1;
+	u32 indirect_target:1; /* if it is an indirect jump target */
 	/*
 	 * CFG strongly connected component this instruction belongs to,
 	 * zero if it is a singleton SCC.
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 79361aa11757..8b018ff48875 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1573,6 +1573,15 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_verifier_env *env, struct bp
 	clone->blinded = 1;
 	return clone;
 }
+
+bool bpf_insn_is_indirect_target(const struct bpf_verifier_env *env, const struct bpf_prog *prog,
+				 int insn_idx)
+{
+	if (!env)
+		return false;
+	insn_idx += prog->aux->subprog_start;
+	return env->insn_aux_data[insn_idx].indirect_target;
+}
 #endif /* CONFIG_BPF_JIT */
 
 /* Base function for offset calculation. Needs to go into .text section,
diff --git a/kernel/bpf/fixups.c b/kernel/bpf/fixups.c
index 6c86980cc9e8..fba9e8c00878 100644
--- a/kernel/bpf/fixups.c
+++ b/kernel/bpf/fixups.c
@@ -183,6 +183,18 @@ static void adjust_insn_aux_data(struct bpf_verifier_env *env,
 		data[i].seen = old_seen;
 		data[i].zext_dst = insn_has_def32(insn + i);
 	}
+
+	/*
+	 * The indirect_target flag of the original instruction was moved to the last of the
+	 * new instructions by the above memmove and memset, but the indirect jump target is
+	 * actually the first instruction, so move it back. This also matches with the behavior
+	 * of bpf_insn_array_adjust(), which preserves xlated_off to point to the first new
+	 * instruction.
+	 */
+	if (data[off + cnt - 1].indirect_target) {
+		data[off].indirect_target = 1;
+		data[off + cnt - 1].indirect_target = 0;
+	}
 }
 
 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e804e0da3500..1e36b9e91277 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3497,6 +3497,11 @@ static int insn_stack_access_flags(int frameno, int spi)
 	return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno;
 }
 
+static void mark_indirect_target(struct bpf_verifier_env *env, int idx)
+{
+	env->insn_aux_data[idx].indirect_target = true;
+}
+
 #define LR_FRAMENO_BITS	3
 #define LR_SPI_BITS	6
 #define LR_ENTRY_BITS	(LR_SPI_BITS + LR_FRAMENO_BITS + 1)
@@ -17545,12 +17550,14 @@ static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *in
 	}
 
 	for (i = 0; i < n - 1; i++) {
+		mark_indirect_target(env, env->gotox_tmp_buf->items[i]);
 		other_branch = push_stack(env, env->gotox_tmp_buf->items[i],
 					  env->insn_idx, env->cur_state->speculative);
 		if (IS_ERR(other_branch))
 			return PTR_ERR(other_branch);
 	}
 	env->insn_idx = env->gotox_tmp_buf->items[n-1];
+	mark_indirect_target(env, env->insn_idx);
 	return INSN_IDX_UPDATED;
 }
 
-- 
cgit v1.2.3


From 7b41ff29c8d386257bae62ad557fd6bad8cc6787 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 12 Apr 2026 20:07:21 +0200
Subject: entry: Kill ARCH_SYSCALL_WORK_{ENTER,EXIT}

Nowadays nothing redefines these flags.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Jinjie Ruan <ruanjinjie@huawei.com>
Link: https://patch.msgid.link/advfWWKgOQkFkwp9@redhat.com
---
 include/linux/entry-common.h | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index e04d67e999a1..416a3352261f 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -20,31 +20,21 @@
 /*
  * SYSCALL_WORK flags handled in syscall_enter_from_user_mode()
  */
-#ifndef ARCH_SYSCALL_WORK_ENTER
-# define ARCH_SYSCALL_WORK_ENTER	(0)
-#endif
-
-/*
- * SYSCALL_WORK flags handled in syscall_exit_to_user_mode()
- */
-#ifndef ARCH_SYSCALL_WORK_EXIT
-# define ARCH_SYSCALL_WORK_EXIT		(0)
-#endif
-
 #define SYSCALL_WORK_ENTER	(SYSCALL_WORK_SECCOMP |			\
 				 SYSCALL_WORK_SYSCALL_TRACEPOINT |	\
 				 SYSCALL_WORK_SYSCALL_TRACE |		\
 				 SYSCALL_WORK_SYSCALL_EMU |		\
 				 SYSCALL_WORK_SYSCALL_AUDIT |		\
 				 SYSCALL_WORK_SYSCALL_USER_DISPATCH |	\
-				 SYSCALL_WORK_SYSCALL_RSEQ_SLICE |	\
-				 ARCH_SYSCALL_WORK_ENTER)
+				 SYSCALL_WORK_SYSCALL_RSEQ_SLICE)
+/*
+ * SYSCALL_WORK flags handled in syscall_exit_to_user_mode()
+ */
 #define SYSCALL_WORK_EXIT	(SYSCALL_WORK_SYSCALL_TRACEPOINT |	\
 				 SYSCALL_WORK_SYSCALL_TRACE |		\
 				 SYSCALL_WORK_SYSCALL_AUDIT |		\
 				 SYSCALL_WORK_SYSCALL_USER_DISPATCH |	\
-				 SYSCALL_WORK_SYSCALL_EXIT_TRAP	|	\
-				 ARCH_SYSCALL_WORK_EXIT)
+				 SYSCALL_WORK_SYSCALL_EXIT_TRAP)
 
 /**
  * arch_ptrace_report_syscall_entry - Architecture specific ptrace_report_syscall_entry() wrapper
-- 
cgit v1.2.3


From 3cade698881eb238f88cbbfec82acc2110440a3f Mon Sep 17 00:00:00 2001
From: Wei Fang <wei.fang@nxp.com>
Date: Wed, 15 Apr 2026 14:08:33 +0800
Subject: net: enetc: fix NTMP DMA use-after-free issue

The AI-generated review reported a potential DMA use-after-free issue
[1]. If netc_xmit_ntmp_cmd() times out and returns an error, the pending
command is not explicitly aborted, while ntmp_free_data_mem()
unconditionally frees the DMA buffer. If the buffer has already been
reallocated elsewhere, this may lead to silent memory corruption. Because
the hardware eventually processes the pending command and perform a DMA
write of the response to the physical address of the freed buffer.

To resolve this issue, this patch does the following modifications:

1. Convert cbdr->ring_lock from a spinlock to a mutex

The lock was originally a spinlock in case NTMP operations might be
invoked from atomic context. After downstream support for all NTMP
tables, no such usage has materialized. A mutex lock is now required
because the driver now needs to reclaim used BDs and release associated
DMA memory within the lock's context, while dma_free_coherent() might
sleep.

2. Introduce software command BD (struct netc_swcbd)

The hardware write-back overwrites the addr and len fields of the BD,
so the driver cannot rely on the hardware BD to free the associated DMA
memory. The driver now maintains a software shadow BD storing the DMA
buffer pointer, DMA address, and size. And netc_xmit_ntmp_cmd() only
reclaims older BDs when the number of used BDs reaches
NETC_CBDR_CLEAN_WORK (16). The software BD enables correct DMA memory
release. With this, struct ntmp_dma_buf and ntmp_free_data_mem() are no
longer needed and are removed.

3. Require callers to hold ring_lock across netc_xmit_ntmp_cmd()

netc_xmit_ntmp_cmd() releases the ring_lock before the caller finishes
consuming the response. At this point, if a concurrent thread submits
a new command, it may trigger ntmp_clean_cbdr() and free the DMA buffer
while it is still in use. Move ring_lock ownership to the caller to
ensure the response buffer cannot be reclaimed prematurely. So the
helpers ntmp_select_and_lock_cbdr() and ntmp_unlock_cbdr() are added.

These changes eliminate the DMA use-after-free condition and ensure safe
and consistent BD reclamation and DMA buffer lifecycle management.

Fixes: 4701073c3deb ("net: enetc: add initial netc-lib driver to support NTMP")
Link: https://lore.kernel.org/netdev/20260403011729.1795413-1-kuba@kernel.org/ # [1]
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Link: https://patch.msgid.link/20260415060833.2303846-3-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/enetc/ntmp.c        | 214 ++++++++++++---------
 .../net/ethernet/freescale/enetc/ntmp_private.h    |   8 +-
 include/linux/fsl/ntmp.h                           |   9 +-
 3 files changed, 134 insertions(+), 97 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/freescale/enetc/ntmp.c b/drivers/net/ethernet/freescale/enetc/ntmp.c
index 0dad38735234..c94a928622fd 100644
--- a/drivers/net/ethernet/freescale/enetc/ntmp.c
+++ b/drivers/net/ethernet/freescale/enetc/ntmp.c
@@ -7,6 +7,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/fsl/netc_global.h>
 #include <linux/iopoll.h>
+#include <linux/vmalloc.h>
 
 #include "ntmp_private.h"
 
@@ -42,6 +43,12 @@ int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev,
 	if (!cbdr->addr_base)
 		return -ENOMEM;
 
+	cbdr->swcbd = vcalloc(cbd_num, sizeof(struct netc_swcbd));
+	if (!cbdr->swcbd) {
+		dma_free_coherent(dev, size, cbdr->addr_base, cbdr->dma_base);
+		return -ENOMEM;
+	}
+
 	cbdr->dma_size = size;
 	cbdr->bd_num = cbd_num;
 	cbdr->regs = *regs;
@@ -52,7 +59,7 @@ int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev,
 	cbdr->addr_base_align = PTR_ALIGN(cbdr->addr_base,
 					  NTMP_BASE_ADDR_ALIGN);
 
-	spin_lock_init(&cbdr->ring_lock);
+	mutex_init(&cbdr->ring_lock);
 
 	cbdr->next_to_use = netc_read(cbdr->regs.pir);
 	cbdr->next_to_clean = netc_read(cbdr->regs.cir) & NETC_CBDRCIR_INDEX;
@@ -71,10 +78,24 @@ int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev,
 }
 EXPORT_SYMBOL_GPL(ntmp_init_cbdr);
 
+static void ntmp_free_data_mem(struct device *dev, struct netc_swcbd *swcbd)
+{
+	if (unlikely(!swcbd->buf))
+		return;
+
+	dma_free_coherent(dev, swcbd->size + NTMP_DATA_ADDR_ALIGN,
+			  swcbd->buf, swcbd->dma);
+}
+
 void ntmp_free_cbdr(struct netc_cbdr *cbdr)
 {
 	/* Disable the Control BD Ring */
 	netc_write(cbdr->regs.mr, 0);
+
+	for (int i = 0; i < cbdr->bd_num; i++)
+		ntmp_free_data_mem(cbdr->dev, &cbdr->swcbd[i]);
+
+	vfree(cbdr->swcbd);
 	dma_free_coherent(cbdr->dev, cbdr->dma_size, cbdr->addr_base,
 			  cbdr->dma_base);
 	memset(cbdr, 0, sizeof(*cbdr));
@@ -94,40 +115,59 @@ static union netc_cbd *ntmp_get_cbd(struct netc_cbdr *cbdr, int index)
 
 static void ntmp_clean_cbdr(struct netc_cbdr *cbdr)
 {
-	union netc_cbd *cbd;
-	int i;
+	int i = cbdr->next_to_clean;
 
-	i = cbdr->next_to_clean;
 	while ((netc_read(cbdr->regs.cir) & NETC_CBDRCIR_INDEX) != i) {
-		cbd = ntmp_get_cbd(cbdr, i);
+		union netc_cbd *cbd = ntmp_get_cbd(cbdr, i);
+		struct netc_swcbd *swcbd = &cbdr->swcbd[i];
+
+		ntmp_free_data_mem(cbdr->dev, swcbd);
+		memset(swcbd, 0, sizeof(*swcbd));
 		memset(cbd, 0, sizeof(*cbd));
 		i = (i + 1) % cbdr->bd_num;
 	}
 
+	dma_wmb();
 	cbdr->next_to_clean = i;
 }
 
-static int netc_xmit_ntmp_cmd(struct ntmp_user *user, union netc_cbd *cbd)
+static void ntmp_select_and_lock_cbdr(struct ntmp_user *user,
+				      struct netc_cbdr **cbdr)
+{
+	/* Currently only ENETC is supported, and it has only one command
+	 * BD ring.
+	 */
+	*cbdr = &user->ring[0];
+
+	mutex_lock(&(*cbdr)->ring_lock);
+}
+
+static void ntmp_unlock_cbdr(struct netc_cbdr *cbdr)
+{
+	mutex_unlock(&cbdr->ring_lock);
+}
+
+static int netc_xmit_ntmp_cmd(struct netc_cbdr *cbdr, union netc_cbd *cbd,
+			      struct netc_swcbd *swcbd)
 {
 	union netc_cbd *cur_cbd;
-	struct netc_cbdr *cbdr;
-	int i, err;
+	int i, err, used_bds;
 	u16 status;
 	u32 val;
 
-	/* Currently only i.MX95 ENETC is supported, and it only has one
-	 * command BD ring
-	 */
-	cbdr = &user->ring[0];
-
-	spin_lock_bh(&cbdr->ring_lock);
-
-	if (unlikely(!ntmp_get_free_cbd_num(cbdr)))
+	used_bds = cbdr->bd_num - ntmp_get_free_cbd_num(cbdr);
+	if (unlikely(used_bds >= NETC_CBDR_CLEAN_WORK)) {
 		ntmp_clean_cbdr(cbdr);
+		if (unlikely(!ntmp_get_free_cbd_num(cbdr))) {
+			ntmp_free_data_mem(cbdr->dev, swcbd);
+			return -EBUSY;
+		}
+	}
 
 	i = cbdr->next_to_use;
 	cur_cbd = ntmp_get_cbd(cbdr, i);
 	*cur_cbd = *cbd;
+	cbdr->swcbd[i] = *swcbd;
 	dma_wmb();
 
 	/* Update producer index of both software and hardware */
@@ -135,17 +175,16 @@ static int netc_xmit_ntmp_cmd(struct ntmp_user *user, union netc_cbd *cbd)
 	cbdr->next_to_use = i;
 	netc_write(cbdr->regs.pir, i);
 
-	err = read_poll_timeout_atomic(netc_read, val,
-				       (val & NETC_CBDRCIR_INDEX) == i,
-				       NETC_CBDR_DELAY_US, NETC_CBDR_TIMEOUT,
-				       true, cbdr->regs.cir);
+	err = read_poll_timeout(netc_read, val,
+				(val & NETC_CBDRCIR_INDEX) == i,
+				NETC_CBDR_DELAY_US, NETC_CBDR_TIMEOUT,
+				true, cbdr->regs.cir);
 	if (unlikely(err))
-		goto cbdr_unlock;
+		return err;
 
 	if (unlikely(val & NETC_CBDRCIR_SBE)) {
-		dev_err(user->dev, "Command BD system bus error\n");
-		err = -EIO;
-		goto cbdr_unlock;
+		dev_err(cbdr->dev, "Command BD system bus error\n");
+		return -EIO;
 	}
 
 	dma_rmb();
@@ -157,40 +196,29 @@ static int netc_xmit_ntmp_cmd(struct ntmp_user *user, union netc_cbd *cbd)
 	/* Check the writeback error status */
 	status = le16_to_cpu(cbd->resp_hdr.error_rr) & NTMP_RESP_ERROR;
 	if (unlikely(status)) {
-		err = -EIO;
-		dev_err(user->dev, "Command BD error: 0x%04x\n", status);
+		dev_err(cbdr->dev, "Command BD error: 0x%04x\n", status);
+		return -EIO;
 	}
 
-	ntmp_clean_cbdr(cbdr);
-	dma_wmb();
-
-cbdr_unlock:
-	spin_unlock_bh(&cbdr->ring_lock);
-
-	return err;
+	return 0;
 }
 
-static int ntmp_alloc_data_mem(struct ntmp_dma_buf *data, void **buf_align)
+static int ntmp_alloc_data_mem(struct device *dev, struct netc_swcbd *swcbd,
+			       void **buf_align)
 {
 	void *buf;
 
-	buf = dma_alloc_coherent(data->dev, data->size + NTMP_DATA_ADDR_ALIGN,
-				 &data->dma, GFP_KERNEL);
+	buf = dma_alloc_coherent(dev, swcbd->size + NTMP_DATA_ADDR_ALIGN,
+				 &swcbd->dma, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
-	data->buf = buf;
+	swcbd->buf = buf;
 	*buf_align = PTR_ALIGN(buf, NTMP_DATA_ADDR_ALIGN);
 
 	return 0;
 }
 
-static void ntmp_free_data_mem(struct ntmp_dma_buf *data)
-{
-	dma_free_coherent(data->dev, data->size + NTMP_DATA_ADDR_ALIGN,
-			  data->buf, data->dma);
-}
-
 static void ntmp_fill_request_hdr(union netc_cbd *cbd, dma_addr_t dma,
 				  int len, int table_id, int cmd,
 				  int access_method)
@@ -241,37 +269,39 @@ static int ntmp_delete_entry_by_id(struct ntmp_user *user, int tbl_id,
 				   u8 tbl_ver, u32 entry_id, u32 req_len,
 				   u32 resp_len)
 {
-	struct ntmp_dma_buf data = {
-		.dev = user->dev,
+	struct netc_swcbd swcbd = {
 		.size = max(req_len, resp_len),
 	};
 	struct ntmp_req_by_eid *req;
+	struct netc_cbdr *cbdr;
 	union netc_cbd cbd;
 	int err;
 
-	err = ntmp_alloc_data_mem(&data, (void **)&req);
+	err = ntmp_alloc_data_mem(user->dev, &swcbd, (void **)&req);
 	if (err)
 		return err;
 
 	ntmp_fill_crd_eid(req, tbl_ver, 0, 0, entry_id);
-	ntmp_fill_request_hdr(&cbd, data.dma, NTMP_LEN(req_len, resp_len),
+	ntmp_fill_request_hdr(&cbd, swcbd.dma, NTMP_LEN(req_len, resp_len),
 			      tbl_id, NTMP_CMD_DELETE, NTMP_AM_ENTRY_ID);
 
-	err = netc_xmit_ntmp_cmd(user, &cbd);
+	ntmp_select_and_lock_cbdr(user, &cbdr);
+	err = netc_xmit_ntmp_cmd(cbdr, &cbd, &swcbd);
 	if (err)
 		dev_err(user->dev,
 			"Failed to delete entry 0x%x of %s, err: %pe",
 			entry_id, ntmp_table_name(tbl_id), ERR_PTR(err));
-
-	ntmp_free_data_mem(&data);
+	ntmp_unlock_cbdr(cbdr);
 
 	return err;
 }
 
-static int ntmp_query_entry_by_id(struct ntmp_user *user, int tbl_id,
-				  u32 len, struct ntmp_req_by_eid *req,
-				  dma_addr_t dma, bool compare_eid)
+static int ntmp_query_entry_by_id(struct netc_cbdr *cbdr, int tbl_id,
+				  struct ntmp_req_by_eid *req,
+				  struct netc_swcbd *swcbd,
+				  bool compare_eid)
 {
+	u32 len = NTMP_LEN(sizeof(*req), swcbd->size);
 	struct ntmp_cmn_resp_query *resp;
 	int cmd = NTMP_CMD_QUERY;
 	union netc_cbd cbd;
@@ -283,10 +313,11 @@ static int ntmp_query_entry_by_id(struct ntmp_user *user, int tbl_id,
 		cmd = NTMP_CMD_QU;
 
 	/* Request header */
-	ntmp_fill_request_hdr(&cbd, dma, len, tbl_id, cmd, NTMP_AM_ENTRY_ID);
-	err = netc_xmit_ntmp_cmd(user, &cbd);
+	ntmp_fill_request_hdr(&cbd, swcbd->dma, len, tbl_id, cmd,
+			      NTMP_AM_ENTRY_ID);
+	err = netc_xmit_ntmp_cmd(cbdr, &cbd, swcbd);
 	if (err) {
-		dev_err(user->dev,
+		dev_err(cbdr->dev,
 			"Failed to query entry 0x%x of %s, err: %pe\n",
 			entry_id, ntmp_table_name(tbl_id), ERR_PTR(err));
 		return err;
@@ -300,7 +331,7 @@ static int ntmp_query_entry_by_id(struct ntmp_user *user, int tbl_id,
 
 	resp = (struct ntmp_cmn_resp_query *)req;
 	if (unlikely(le32_to_cpu(resp->entry_id) != entry_id)) {
-		dev_err(user->dev,
+		dev_err(cbdr->dev,
 			"%s: query EID 0x%x doesn't match response EID 0x%x\n",
 			ntmp_table_name(tbl_id), entry_id, le32_to_cpu(resp->entry_id));
 		return -EIO;
@@ -312,15 +343,15 @@ static int ntmp_query_entry_by_id(struct ntmp_user *user, int tbl_id,
 int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id,
 			struct maft_entry_data *maft)
 {
-	struct ntmp_dma_buf data = {
-		.dev = user->dev,
+	struct netc_swcbd swcbd = {
 		.size = sizeof(struct maft_req_add),
 	};
 	struct maft_req_add *req;
+	struct netc_cbdr *cbdr;
 	union netc_cbd cbd;
 	int err;
 
-	err = ntmp_alloc_data_mem(&data, (void **)&req);
+	err = ntmp_alloc_data_mem(user->dev, &swcbd, (void **)&req);
 	if (err)
 		return err;
 
@@ -329,14 +360,15 @@ int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id,
 	req->keye = maft->keye;
 	req->cfge = maft->cfge;
 
-	ntmp_fill_request_hdr(&cbd, data.dma, NTMP_LEN(data.size, 0),
+	ntmp_fill_request_hdr(&cbd, swcbd.dma, NTMP_LEN(swcbd.size, 0),
 			      NTMP_MAFT_ID, NTMP_CMD_ADD, NTMP_AM_ENTRY_ID);
-	err = netc_xmit_ntmp_cmd(user, &cbd);
+
+	ntmp_select_and_lock_cbdr(user, &cbdr);
+	err = netc_xmit_ntmp_cmd(cbdr, &cbd, &swcbd);
 	if (err)
 		dev_err(user->dev, "Failed to add MAFT entry 0x%x, err: %pe\n",
 			entry_id, ERR_PTR(err));
-
-	ntmp_free_data_mem(&data);
+	ntmp_unlock_cbdr(cbdr);
 
 	return err;
 }
@@ -345,31 +377,31 @@ EXPORT_SYMBOL_GPL(ntmp_maft_add_entry);
 int ntmp_maft_query_entry(struct ntmp_user *user, u32 entry_id,
 			  struct maft_entry_data *maft)
 {
-	struct ntmp_dma_buf data = {
-		.dev = user->dev,
+	struct netc_swcbd swcbd = {
 		.size = sizeof(struct maft_resp_query),
 	};
 	struct maft_resp_query *resp;
 	struct ntmp_req_by_eid *req;
+	struct netc_cbdr *cbdr;
 	int err;
 
-	err = ntmp_alloc_data_mem(&data, (void **)&req);
+	err = ntmp_alloc_data_mem(user->dev, &swcbd, (void **)&req);
 	if (err)
 		return err;
 
 	ntmp_fill_crd_eid(req, user->tbl.maft_ver, 0, 0, entry_id);
-	err = ntmp_query_entry_by_id(user, NTMP_MAFT_ID,
-				     NTMP_LEN(sizeof(*req), data.size),
-				     req, data.dma, true);
+
+	ntmp_select_and_lock_cbdr(user, &cbdr);
+	err = ntmp_query_entry_by_id(cbdr, NTMP_MAFT_ID, req, &swcbd, true);
 	if (err)
-		goto end;
+		goto unlock_cbdr;
 
 	resp = (struct maft_resp_query *)req;
 	maft->keye = resp->keye;
 	maft->cfge = resp->cfge;
 
-end:
-	ntmp_free_data_mem(&data);
+unlock_cbdr:
+	ntmp_unlock_cbdr(cbdr);
 
 	return err;
 }
@@ -385,8 +417,9 @@ EXPORT_SYMBOL_GPL(ntmp_maft_delete_entry);
 int ntmp_rsst_update_entry(struct ntmp_user *user, const u32 *table,
 			   int count)
 {
-	struct ntmp_dma_buf data = {.dev = user->dev};
 	struct rsst_req_update *req;
+	struct netc_swcbd swcbd;
+	struct netc_cbdr *cbdr;
 	union netc_cbd cbd;
 	int err, i;
 
@@ -394,8 +427,8 @@ int ntmp_rsst_update_entry(struct ntmp_user *user, const u32 *table,
 		/* HW only takes in a full 64 entry table */
 		return -EINVAL;
 
-	data.size = struct_size(req, groups, count);
-	err = ntmp_alloc_data_mem(&data, (void **)&req);
+	swcbd.size = struct_size(req, groups, count);
+	err = ntmp_alloc_data_mem(user->dev, &swcbd, (void **)&req);
 	if (err)
 		return err;
 
@@ -405,15 +438,15 @@ int ntmp_rsst_update_entry(struct ntmp_user *user, const u32 *table,
 	for (i = 0; i < count; i++)
 		req->groups[i] = (u8)(table[i]);
 
-	ntmp_fill_request_hdr(&cbd, data.dma, NTMP_LEN(data.size, 0),
+	ntmp_fill_request_hdr(&cbd, swcbd.dma, NTMP_LEN(swcbd.size, 0),
 			      NTMP_RSST_ID, NTMP_CMD_UPDATE, NTMP_AM_ENTRY_ID);
 
-	err = netc_xmit_ntmp_cmd(user, &cbd);
+	ntmp_select_and_lock_cbdr(user, &cbdr);
+	err = netc_xmit_ntmp_cmd(cbdr, &cbd, &swcbd);
 	if (err)
 		dev_err(user->dev, "Failed to update RSST entry, err: %pe\n",
 			ERR_PTR(err));
-
-	ntmp_free_data_mem(&data);
+	ntmp_unlock_cbdr(cbdr);
 
 	return err;
 }
@@ -421,8 +454,9 @@ EXPORT_SYMBOL_GPL(ntmp_rsst_update_entry);
 
 int ntmp_rsst_query_entry(struct ntmp_user *user, u32 *table, int count)
 {
-	struct ntmp_dma_buf data = {.dev = user->dev};
 	struct ntmp_req_by_eid *req;
+	struct netc_swcbd swcbd;
+	struct netc_cbdr *cbdr;
 	union netc_cbd cbd;
 	int err, i;
 	u8 *group;
@@ -431,21 +465,23 @@ int ntmp_rsst_query_entry(struct ntmp_user *user, u32 *table, int count)
 		/* HW only takes in a full 64 entry table */
 		return -EINVAL;
 
-	data.size = NTMP_ENTRY_ID_SIZE + RSST_STSE_DATA_SIZE(count) +
-		    RSST_CFGE_DATA_SIZE(count);
-	err = ntmp_alloc_data_mem(&data, (void **)&req);
+	swcbd.size = NTMP_ENTRY_ID_SIZE + RSST_STSE_DATA_SIZE(count) +
+		     RSST_CFGE_DATA_SIZE(count);
+	err = ntmp_alloc_data_mem(user->dev, &swcbd, (void **)&req);
 	if (err)
 		return err;
 
 	/* Set the request data buffer */
 	ntmp_fill_crd_eid(req, user->tbl.rsst_ver, 0, 0, 0);
-	ntmp_fill_request_hdr(&cbd, data.dma, NTMP_LEN(sizeof(*req), data.size),
+	ntmp_fill_request_hdr(&cbd, swcbd.dma, NTMP_LEN(sizeof(*req), swcbd.size),
 			      NTMP_RSST_ID, NTMP_CMD_QUERY, NTMP_AM_ENTRY_ID);
-	err = netc_xmit_ntmp_cmd(user, &cbd);
+
+	ntmp_select_and_lock_cbdr(user, &cbdr);
+	err = netc_xmit_ntmp_cmd(cbdr, &cbd, &swcbd);
 	if (err) {
 		dev_err(user->dev, "Failed to query RSST entry, err: %pe\n",
 			ERR_PTR(err));
-		goto end;
+		goto unlock_cbdr;
 	}
 
 	group = (u8 *)req;
@@ -453,8 +489,8 @@ int ntmp_rsst_query_entry(struct ntmp_user *user, u32 *table, int count)
 	for (i = 0; i < count; i++)
 		table[i] = group[i];
 
-end:
-	ntmp_free_data_mem(&data);
+unlock_cbdr:
+	ntmp_unlock_cbdr(cbdr);
 
 	return err;
 }
diff --git a/drivers/net/ethernet/freescale/enetc/ntmp_private.h b/drivers/net/ethernet/freescale/enetc/ntmp_private.h
index 3459cc45b610..f8dff3ba2c28 100644
--- a/drivers/net/ethernet/freescale/enetc/ntmp_private.h
+++ b/drivers/net/ethernet/freescale/enetc/ntmp_private.h
@@ -14,6 +14,7 @@
 #define NETC_CBDR_BD_NUM	256
 #define NETC_CBDRCIR_INDEX	GENMASK(9, 0)
 #define NETC_CBDRCIR_SBE	BIT(31)
+#define NETC_CBDR_CLEAN_WORK	16
 
 union netc_cbd {
 	struct {
@@ -56,13 +57,6 @@ union netc_cbd {
 	} resp_hdr; /* NTMP Response Message Header Format */
 };
 
-struct ntmp_dma_buf {
-	struct device *dev;
-	size_t size;
-	void *buf;
-	dma_addr_t dma;
-};
-
 struct ntmp_cmn_req_data {
 	__le16 update_act;
 	u8 dbg_opt;
diff --git a/include/linux/fsl/ntmp.h b/include/linux/fsl/ntmp.h
index 916dc4fe7de3..83a449b4d6ec 100644
--- a/include/linux/fsl/ntmp.h
+++ b/include/linux/fsl/ntmp.h
@@ -31,6 +31,12 @@ struct netc_tbl_vers {
 	u8 rsst_ver;
 };
 
+struct netc_swcbd {
+	void *buf;
+	dma_addr_t dma;
+	size_t size;
+};
+
 struct netc_cbdr {
 	struct device *dev;
 	struct netc_cbdr_regs regs;
@@ -44,9 +50,10 @@ struct netc_cbdr {
 	void *addr_base_align;
 	dma_addr_t dma_base;
 	dma_addr_t dma_base_align;
+	struct netc_swcbd *swcbd;
 
 	/* Serialize the order of command BD ring */
-	spinlock_t ring_lock;
+	struct mutex ring_lock;
 };
 
 struct ntmp_user {
-- 
cgit v1.2.3


From 2f5015461984caa8ebf265a60b22f38c94d9c70a Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Wed, 15 Apr 2026 15:08:47 -0600
Subject: t10-pi: reduce ref tag code duplication

t10_pi_ref_tag() and ext_pi_ref_tag() are identical except for the final
truncation of the ref tag to 32 or 48 bits. Factor out a helper
full_pi_ref_tag() to return the untruncated ref tag and use it in
t10_pi_ref_tag() and ext_pi_ref_tag().

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260415210847.1730016-1-csander@purestorage.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/t10-pi.h | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 2c59fe3efcd4..b6c2496866ea 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -4,6 +4,7 @@
 
 #include <linux/types.h>
 #include <linux/blk-mq.h>
+#include <linux/wordpart.h>
 
 /*
  * A T10 PI-capable target device can be formatted with different
@@ -25,6 +26,16 @@ enum t10_dif_type {
 	T10_PI_TYPE3_PROTECTION = 0x3,
 };
 
+static inline u64 full_pi_ref_tag(const struct request *rq)
+{
+	unsigned int shift = ilog2(queue_logical_block_size(rq->q));
+
+	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+	    rq->q->limits.integrity.interval_exp)
+		shift = rq->q->limits.integrity.interval_exp;
+	return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT);
+}
+
 /*
  * T10 Protection Information tuple.
  */
@@ -39,12 +50,7 @@ struct t10_pi_tuple {
 
 static inline u32 t10_pi_ref_tag(struct request *rq)
 {
-	unsigned int shift = ilog2(queue_logical_block_size(rq->q));
-
-	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
-	    rq->q->limits.integrity.interval_exp)
-		shift = rq->q->limits.integrity.interval_exp;
-	return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffff;
+	return lower_32_bits(full_pi_ref_tag(rq));
 }
 
 struct crc64_pi_tuple {
@@ -64,12 +70,7 @@ static inline u64 lower_48_bits(u64 n)
 
 static inline u64 ext_pi_ref_tag(struct request *rq)
 {
-	unsigned int shift = ilog2(queue_logical_block_size(rq->q));
-
-	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
-	    rq->q->limits.integrity.interval_exp)
-		shift = rq->q->limits.integrity.interval_exp;
-	return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT));
+	return lower_48_bits(full_pi_ref_tag(rq));
 }
 
 #endif
-- 
cgit v1.2.3


From 3d3544a6c996e88bb793bb6b2665c3e3f674f5eb Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <ljs@kernel.org>
Date: Mon, 13 Apr 2026 11:57:13 +0100
Subject: mm/vma: remove __vma_check_mmap_hook()

Commit c50ca15dd496 ("mm: add vm_ops->mapped hook") introduced
__vma_check_mmap_hook() in order to assert that a driver doesn't
incorrectly implement both an f_op->mmap() and a vm_ops->mapped hook, the
latter of which would not ultimately get invoked.

However, this did not correctly account for stacked drivers (or drivers
that otherwise use the compatibility layer) which might recursively call
an mmap_prepare hook via the compatibility layer.

Thus the nested mmap_prepare() invocation might result in a VMA which has
vm_ops->mapped set with an overlaying mmap() hook, causing the
__vma_check_mmap_hook() to fail in vfs_mmap(), wrongly failing the
operation.

This patch resolves this by simply removing the check, as we can't be
certain that an mmap() hook doesn't at some point invoke the compatibility
layer, and it's not worth trying to track it.

Link: https://lore.kernel.org/20260413105713.92625-1-ljs@kernel.org
Fixes: c50ca15dd496 ("mm: add vm_ops->mapped hook")
Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Closes: https://lore.kernel.org/all/adx2ws5z0NMIe5Yj@shinmob/
Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Tested-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/fs.h |  9 +--------
 mm/util.c          | 10 ----------
 2 files changed, 1 insertion(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0bdccfa70b44..f3ca9b841892 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2062,20 +2062,13 @@ void compat_set_desc_from_vma(struct vm_area_desc *desc, const struct file *file
 			      const struct vm_area_struct *vma);
 int __compat_vma_mmap(struct vm_area_desc *desc, struct vm_area_struct *vma);
 int compat_vma_mmap(struct file *file, struct vm_area_struct *vma);
-int __vma_check_mmap_hook(struct vm_area_struct *vma);
 
 static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	int err;
-
 	if (file->f_op->mmap_prepare)
 		return compat_vma_mmap(file, vma);
 
-	err = file->f_op->mmap(file, vma);
-	if (err)
-		return err;
-
-	return __vma_check_mmap_hook(vma);
+	return file->f_op->mmap(file, vma);
 }
 
 static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
diff --git a/mm/util.c b/mm/util.c
index f063fd4de1e8..232c3930a662 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1281,16 +1281,6 @@ int compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
 }
 EXPORT_SYMBOL(compat_vma_mmap);
 
-int __vma_check_mmap_hook(struct vm_area_struct *vma)
-{
-	/* vm_ops->mapped is not valid if mmap() is specified. */
-	if (vma->vm_ops && WARN_ON_ONCE(vma->vm_ops->mapped))
-		return -EINVAL;
-
-	return 0;
-}
-EXPORT_SYMBOL(__vma_check_mmap_hook);
-
 static void set_ps_flags(struct page_snapshot *ps, const struct folio *folio,
 			 const struct page *page)
 {
-- 
cgit v1.2.3


From db128b2c6b7d0c9b514327a0873425bbf18e739b Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Thu, 5 Mar 2026 19:52:21 +0800
Subject: mm: rename unlock_page_lruvec_irq and its variants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is inappropriate to use folio_lruvec_lock() variants in conjunction
with unlock_page_lruvec() variants, as this involves the inconsistent
operation of locking a folio while unlocking a page.  To rectify this, the
functions unlock_page_lruvec{_irq, _irqrestore} are renamed to
lruvec_unlock{_irq,_irqrestore}.

Link: https://lore.kernel.org/4e5e05271a250df4d1812e1832be65636a78c957.1772711148.git.zhengqi.arch@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Reviewed-by: Chen Ridong <chenridong@huawei.com>
Acked-by: David Hildenbrand (Red Hat) <david@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yosry Ahmed <yosry@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 10 +++++-----
 mm/compaction.c            | 14 +++++++-------
 mm/huge_memory.c           |  2 +-
 mm/mlock.c                 |  2 +-
 mm/swap.c                  | 12 ++++++------
 mm/vmscan.c                |  4 ++--
 6 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5173a9f16721..6e88288e90d8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1479,17 +1479,17 @@ static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
 	return mem_cgroup_lruvec(memcg, lruvec_pgdat(lruvec));
 }
 
-static inline void unlock_page_lruvec(struct lruvec *lruvec)
+static inline void lruvec_unlock(struct lruvec *lruvec)
 {
 	spin_unlock(&lruvec->lru_lock);
 }
 
-static inline void unlock_page_lruvec_irq(struct lruvec *lruvec)
+static inline void lruvec_unlock_irq(struct lruvec *lruvec)
 {
 	spin_unlock_irq(&lruvec->lru_lock);
 }
 
-static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
+static inline void lruvec_unlock_irqrestore(struct lruvec *lruvec,
 		unsigned long flags)
 {
 	spin_unlock_irqrestore(&lruvec->lru_lock, flags);
@@ -1511,7 +1511,7 @@ static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio,
 		if (folio_matches_lruvec(folio, locked_lruvec))
 			return locked_lruvec;
 
-		unlock_page_lruvec_irq(locked_lruvec);
+		lruvec_unlock_irq(locked_lruvec);
 	}
 
 	return folio_lruvec_lock_irq(folio);
@@ -1525,7 +1525,7 @@ static inline void folio_lruvec_relock_irqsave(struct folio *folio,
 		if (folio_matches_lruvec(folio, *lruvecp))
 			return;
 
-		unlock_page_lruvec_irqrestore(*lruvecp, *flags);
+		lruvec_unlock_irqrestore(*lruvecp, *flags);
 	}
 
 	*lruvecp = folio_lruvec_lock_irqsave(folio, flags);
diff --git a/mm/compaction.c b/mm/compaction.c
index 1e8f8eca318c..c3e338aaa0ff 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -913,7 +913,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		 */
 		if (!(low_pfn % COMPACT_CLUSTER_MAX)) {
 			if (locked) {
-				unlock_page_lruvec_irqrestore(locked, flags);
+				lruvec_unlock_irqrestore(locked, flags);
 				locked = NULL;
 			}
 
@@ -964,7 +964,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 			}
 			/* for alloc_contig case */
 			if (locked) {
-				unlock_page_lruvec_irqrestore(locked, flags);
+				lruvec_unlock_irqrestore(locked, flags);
 				locked = NULL;
 			}
 
@@ -1053,7 +1053,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 			if (unlikely(page_has_movable_ops(page)) &&
 			    !PageMovableOpsIsolated(page)) {
 				if (locked) {
-					unlock_page_lruvec_irqrestore(locked, flags);
+					lruvec_unlock_irqrestore(locked, flags);
 					locked = NULL;
 				}
 
@@ -1158,7 +1158,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		/* If we already hold the lock, we can skip some rechecking */
 		if (lruvec != locked) {
 			if (locked)
-				unlock_page_lruvec_irqrestore(locked, flags);
+				lruvec_unlock_irqrestore(locked, flags);
 
 			compact_lock_irqsave(&lruvec->lru_lock, &flags, cc);
 			locked = lruvec;
@@ -1226,7 +1226,7 @@ isolate_success_no_list:
 isolate_fail_put:
 		/* Avoid potential deadlock in freeing page under lru_lock */
 		if (locked) {
-			unlock_page_lruvec_irqrestore(locked, flags);
+			lruvec_unlock_irqrestore(locked, flags);
 			locked = NULL;
 		}
 		folio_put(folio);
@@ -1242,7 +1242,7 @@ isolate_fail:
 		 */
 		if (nr_isolated) {
 			if (locked) {
-				unlock_page_lruvec_irqrestore(locked, flags);
+				lruvec_unlock_irqrestore(locked, flags);
 				locked = NULL;
 			}
 			putback_movable_pages(&cc->migratepages);
@@ -1274,7 +1274,7 @@ isolate_fail:
 
 isolate_abort:
 	if (locked)
-		unlock_page_lruvec_irqrestore(locked, flags);
+		lruvec_unlock_irqrestore(locked, flags);
 	if (folio) {
 		folio_set_lru(folio);
 		folio_put(folio);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 42c983821c03..958b580c6619 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3994,7 +3994,7 @@ static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int n
 		folio_ref_unfreeze(folio, folio_cache_ref_count(folio) + 1);
 
 		if (do_lru)
-			unlock_page_lruvec(lruvec);
+			lruvec_unlock(lruvec);
 
 		if (ci)
 			swap_cluster_unlock(ci);
diff --git a/mm/mlock.c b/mm/mlock.c
index fdbd1434a35f..8c227fefa2df 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -205,7 +205,7 @@ static void mlock_folio_batch(struct folio_batch *fbatch)
 	}
 
 	if (lruvec)
-		unlock_page_lruvec_irq(lruvec);
+		lruvec_unlock_irq(lruvec);
 	folios_put(fbatch);
 }
 
diff --git a/mm/swap.c b/mm/swap.c
index 78b4aa811fc6..23df893e2ed7 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -91,7 +91,7 @@ static void page_cache_release(struct folio *folio)
 
 	__page_cache_release(folio, &lruvec, &flags);
 	if (lruvec)
-		unlock_page_lruvec_irqrestore(lruvec, flags);
+		lruvec_unlock_irqrestore(lruvec, flags);
 }
 
 void __folio_put(struct folio *folio)
@@ -175,7 +175,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
 	}
 
 	if (lruvec)
-		unlock_page_lruvec_irqrestore(lruvec, flags);
+		lruvec_unlock_irqrestore(lruvec, flags);
 	folios_put(fbatch);
 }
 
@@ -349,7 +349,7 @@ void folio_activate(struct folio *folio)
 
 	lruvec = folio_lruvec_lock_irq(folio);
 	lru_activate(lruvec, folio);
-	unlock_page_lruvec_irq(lruvec);
+	lruvec_unlock_irq(lruvec);
 	folio_set_lru(folio);
 }
 #endif
@@ -963,7 +963,7 @@ void folios_put_refs(struct folio_batch *folios, unsigned int *refs)
 
 		if (folio_is_zone_device(folio)) {
 			if (lruvec) {
-				unlock_page_lruvec_irqrestore(lruvec, flags);
+				lruvec_unlock_irqrestore(lruvec, flags);
 				lruvec = NULL;
 			}
 			if (folio_ref_sub_and_test(folio, nr_refs))
@@ -977,7 +977,7 @@ void folios_put_refs(struct folio_batch *folios, unsigned int *refs)
 		/* hugetlb has its own memcg */
 		if (folio_test_hugetlb(folio)) {
 			if (lruvec) {
-				unlock_page_lruvec_irqrestore(lruvec, flags);
+				lruvec_unlock_irqrestore(lruvec, flags);
 				lruvec = NULL;
 			}
 			free_huge_folio(folio);
@@ -991,7 +991,7 @@ void folios_put_refs(struct folio_batch *folios, unsigned int *refs)
 		j++;
 	}
 	if (lruvec)
-		unlock_page_lruvec_irqrestore(lruvec, flags);
+		lruvec_unlock_irqrestore(lruvec, flags);
 	if (!j) {
 		folio_batch_reinit(folios);
 		return;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4bf091b1c8af..88bb3337e5eb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1831,7 +1831,7 @@ bool folio_isolate_lru(struct folio *folio)
 		folio_get(folio);
 		lruvec = folio_lruvec_lock_irq(folio);
 		lruvec_del_folio(lruvec, folio);
-		unlock_page_lruvec_irq(lruvec);
+		lruvec_unlock_irq(lruvec);
 		ret = true;
 	}
 
@@ -7898,7 +7898,7 @@ void check_move_unevictable_folios(struct folio_batch *fbatch)
 	if (lruvec) {
 		__count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
 		__count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
-		unlock_page_lruvec_irq(lruvec);
+		lruvec_unlock_irq(lruvec);
 	} else if (pgscanned) {
 		count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
 	}
-- 
cgit v1.2.3


From d5aa8c1d136e7de89defb06f42f8108992967a70 Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Thu, 5 Mar 2026 19:52:25 +0800
Subject: mm: memcontrol: return root object cgroup for root memory cgroup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Memory cgroup functions such as get_mem_cgroup_from_folio() and
get_mem_cgroup_from_mm() return a valid memory cgroup pointer, even for
the root memory cgroup.  In contrast, the situation for object cgroups has
been different.

Previously, the root object cgroup couldn't be returned because it didn't
exist.  Now that a valid root object cgroup exists, for the sake of
consistency, it's necessary to align the behavior of object-cgroup-related
operations with that of memory cgroup APIs.

Link: https://lore.kernel.org/e9c3f40ba7681d9753372d4ee2ac7a0216848b95.1772711148.git.zhengqi.arch@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yosry Ahmed <yosry@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 26 ++++++++++++++++++++------
 mm/memcontrol.c            | 45 ++++++++++++++++++++++++---------------------
 mm/percpu.c                |  2 +-
 3 files changed, 45 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6e88288e90d8..9a015258a2ff 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -332,6 +332,7 @@ struct mem_cgroup {
 #define MEMCG_CHARGE_BATCH 64U
 
 extern struct mem_cgroup *root_mem_cgroup;
+extern struct obj_cgroup *root_obj_cgroup;
 
 enum page_memcg_data_flags {
 	/* page->memcg_data is a pointer to an slabobj_ext vector */
@@ -548,6 +549,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 	return (memcg == root_mem_cgroup);
 }
 
+static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
+{
+	return objcg == root_obj_cgroup;
+}
+
 static inline bool mem_cgroup_disabled(void)
 {
 	return !cgroup_subsys_enabled(memory_cgrp_subsys);
@@ -774,23 +780,26 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 
 static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
 {
+	if (obj_cgroup_is_root(objcg))
+		return true;
 	return percpu_ref_tryget(&objcg->refcnt);
 }
 
-static inline void obj_cgroup_get(struct obj_cgroup *objcg)
+static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
+				       unsigned long nr)
 {
-	percpu_ref_get(&objcg->refcnt);
+	if (!obj_cgroup_is_root(objcg))
+		percpu_ref_get_many(&objcg->refcnt, nr);
 }
 
-static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
-				       unsigned long nr)
+static inline void obj_cgroup_get(struct obj_cgroup *objcg)
 {
-	percpu_ref_get_many(&objcg->refcnt, nr);
+	obj_cgroup_get_many(objcg, 1);
 }
 
 static inline void obj_cgroup_put(struct obj_cgroup *objcg)
 {
-	if (objcg)
+	if (objcg && !obj_cgroup_is_root(objcg))
 		percpu_ref_put(&objcg->refcnt);
 }
 
@@ -1087,6 +1096,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 	return true;
 }
 
+static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
+{
+	return true;
+}
+
 static inline bool mem_cgroup_disabled(void)
 {
 	return true;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2cb2d66579d3..e7022adcea7f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -83,6 +83,8 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
 struct mem_cgroup *root_mem_cgroup __read_mostly;
 EXPORT_SYMBOL(root_mem_cgroup);
 
+struct obj_cgroup *root_obj_cgroup __read_mostly;
+
 /* Active memory cgroup to use from an interrupt context */
 DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
 EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
@@ -2693,15 +2695,14 @@ struct mem_cgroup *mem_cgroup_from_virt(void *p)
 
 static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
 {
-	struct obj_cgroup *objcg = NULL;
+	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
+		struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
 
-	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
-		objcg = rcu_dereference(memcg->objcg);
 		if (likely(objcg && obj_cgroup_tryget(objcg)))
-			break;
-		objcg = NULL;
+			return objcg;
 	}
-	return objcg;
+
+	return NULL;
 }
 
 static struct obj_cgroup *current_objcg_update(void)
@@ -2775,18 +2776,17 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
 		 * Objcg reference is kept by the task, so it's safe
 		 * to use the objcg by the current task.
 		 */
-		return objcg;
+		return objcg ? : root_obj_cgroup;
 	}
 
 	memcg = this_cpu_read(int_active_memcg);
 	if (unlikely(memcg))
 		goto from_memcg;
 
-	return NULL;
+	return root_obj_cgroup;
 
 from_memcg:
-	objcg = NULL;
-	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
+	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
 		/*
 		 * Memcg pointer is protected by scope (see set_active_memcg())
 		 * and is pinning the corresponding objcg, so objcg can't go
@@ -2795,10 +2795,10 @@ from_memcg:
 		 */
 		objcg = rcu_dereference_check(memcg->objcg, 1);
 		if (likely(objcg))
-			break;
+			return objcg;
 	}
 
-	return objcg;
+	return root_obj_cgroup;
 }
 
 struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
@@ -2812,14 +2812,8 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
 		objcg = __folio_objcg(folio);
 		obj_cgroup_get(objcg);
 	} else {
-		struct mem_cgroup *memcg;
-
 		rcu_read_lock();
-		memcg = __folio_memcg(folio);
-		if (memcg)
-			objcg = __get_obj_cgroup_from_memcg(memcg);
-		else
-			objcg = NULL;
+		objcg = __get_obj_cgroup_from_memcg(__folio_memcg(folio));
 		rcu_read_unlock();
 	}
 	return objcg;
@@ -2922,7 +2916,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
 	int ret = 0;
 
 	objcg = current_obj_cgroup();
-	if (objcg) {
+	if (objcg && !obj_cgroup_is_root(objcg)) {
 		ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order);
 		if (!ret) {
 			obj_cgroup_get(objcg);
@@ -3251,7 +3245,7 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
 	 * obj_cgroup_get() is used to get a permanent reference.
 	 */
 	objcg = current_obj_cgroup();
-	if (!objcg)
+	if (!objcg || obj_cgroup_is_root(objcg))
 		return true;
 
 	/*
@@ -3927,6 +3921,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	if (!objcg)
 		goto free_shrinker;
 
+	if (unlikely(mem_cgroup_is_root(memcg)))
+		root_obj_cgroup = objcg;
+
 	objcg->memcg = memcg;
 	rcu_assign_pointer(memcg->objcg, objcg);
 	obj_cgroup_get(objcg);
@@ -5551,6 +5548,9 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
 	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
 		return;
 
+	if (obj_cgroup_is_root(objcg))
+		return;
+
 	VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
 
 	/* PF_MEMALLOC context, charging must succeed */
@@ -5580,6 +5580,9 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
 	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
 		return;
 
+	if (obj_cgroup_is_root(objcg))
+		return;
+
 	obj_cgroup_uncharge(objcg, size);
 
 	rcu_read_lock();
diff --git a/mm/percpu.c b/mm/percpu.c
index a2107bdebf0b..b0676b8054ed 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1622,7 +1622,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
 		return true;
 
 	objcg = current_obj_cgroup();
-	if (!objcg)
+	if (!objcg || obj_cgroup_is_root(objcg))
 		return true;
 
 	if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size)))
-- 
cgit v1.2.3


From 49717c7bd6b8e14329c2d04b1e8ec691175b6f4e Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Thu, 5 Mar 2026 19:52:28 +0800
Subject: writeback: prevent memory cgroup release in writeback module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the near future, a folio will no longer pin its corresponding memory
cgroup.  To ensure safety, it will only be appropriate to hold the rcu
read lock or acquire a reference to the memory cgroup returned by
folio_memcg(), thereby preventing it from being released.

In the current patch, the function get_mem_cgroup_css_from_folio() and the
rcu read lock are employed to safeguard against the release of the memory
cgroup.

This serves as a preparatory measure for the reparenting of the
LRU pages.

Link: https://lore.kernel.org/645f99bc344575417f67def3744f975596df2793.1772711148.git.zhengqi.arch@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yosry Ahmed <yosry@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/fs-writeback.c                | 22 +++++++++++-----------
 include/linux/memcontrol.h       |  9 +++++++--
 include/trace/events/writeback.h |  3 +++
 mm/memcontrol.c                  | 14 ++++++++------
 4 files changed, 29 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7c75ed7e8979..c3442a38450c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -280,15 +280,13 @@ void __inode_attach_wb(struct inode *inode, struct folio *folio)
 	if (inode_cgwb_enabled(inode)) {
 		struct cgroup_subsys_state *memcg_css;
 
-		if (folio) {
-			memcg_css = mem_cgroup_css_from_folio(folio);
-			wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
-		} else {
-			/* must pin memcg_css, see wb_get_create() */
+		/* must pin memcg_css, see wb_get_create() */
+		if (folio)
+			memcg_css = get_mem_cgroup_css_from_folio(folio);
+		else
 			memcg_css = task_get_css(current, memory_cgrp_id);
-			wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
-			css_put(memcg_css);
-		}
+		wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
+		css_put(memcg_css);
 	}
 
 	if (!wb)
@@ -979,16 +977,16 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio
 	if (!wbc->wb || wbc->no_cgroup_owner)
 		return;
 
-	css = mem_cgroup_css_from_folio(folio);
+	css = get_mem_cgroup_css_from_folio(folio);
 	/* dead cgroups shouldn't contribute to inode ownership arbitration */
 	if (!css_is_online(css))
-		return;
+		goto out;
 
 	id = css->id;
 
 	if (id == wbc->wb_id) {
 		wbc->wb_bytes += bytes;
-		return;
+		goto out;
 	}
 
 	if (id == wbc->wb_lcand_id)
@@ -1001,6 +999,8 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio
 		wbc->wb_tcand_bytes += bytes;
 	else
 		wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes);
+out:
+	css_put(css);
 }
 EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner);
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9a015258a2ff..4454f03a4acf 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -894,7 +894,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm,
 	return match;
 }
 
-struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio);
+struct cgroup_subsys_state *get_mem_cgroup_css_from_folio(struct folio *folio);
 ino_t page_cgroup_ino(struct page *page);
 
 static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
@@ -1563,9 +1563,14 @@ static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
 	if (mem_cgroup_disabled())
 		return;
 
+	if (!folio_memcg_charged(folio))
+		return;
+
+	rcu_read_lock();
 	memcg = folio_memcg(folio);
-	if (unlikely(memcg && &memcg->css != wb->memcg_css))
+	if (unlikely(&memcg->css != wb->memcg_css))
 		mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
+	rcu_read_unlock();
 }
 
 void mem_cgroup_flush_foreign(struct bdi_writeback *wb);
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 4d3d8c8f3a1b..b849b8cc96b1 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -294,7 +294,10 @@ TRACE_EVENT(track_foreign_dirty,
 		__entry->ino		= inode ? inode->i_ino : 0;
 		__entry->memcg_id	= wb->memcg_css->id;
 		__entry->cgroup_ino	= __trace_wb_assign_cgroup(wb);
+
+		rcu_read_lock();
 		__entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup);
+		rcu_read_unlock();
 	),
 
 	TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu",
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dbcf0d2bf114..d7d4b44c5af5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -243,7 +243,7 @@ DEFINE_STATIC_KEY_FALSE(memcg_bpf_enabled_key);
 EXPORT_SYMBOL(memcg_bpf_enabled_key);
 
 /**
- * mem_cgroup_css_from_folio - css of the memcg associated with a folio
+ * get_mem_cgroup_css_from_folio - acquire a css of the memcg associated with a folio
  * @folio: folio of interest
  *
  * If memcg is bound to the default hierarchy, css of the memcg associated
@@ -253,14 +253,16 @@ EXPORT_SYMBOL(memcg_bpf_enabled_key);
  * If memcg is bound to a traditional hierarchy, the css of root_mem_cgroup
  * is returned.
  */
-struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio)
+struct cgroup_subsys_state *get_mem_cgroup_css_from_folio(struct folio *folio)
 {
-	struct mem_cgroup *memcg = folio_memcg(folio);
+	struct mem_cgroup *memcg;
 
-	if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
-		memcg = root_mem_cgroup;
+	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+		return &root_mem_cgroup->css;
 
-	return &memcg->css;
+	memcg = get_mem_cgroup_from_folio(folio);
+
+	return memcg ? &memcg->css : &root_mem_cgroup->css;
 }
 
 /**
-- 
cgit v1.2.3


From f995da5341c1854e59415c2c2c6f0b6406b498f2 Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Thu, 5 Mar 2026 19:52:29 +0800
Subject: mm: memcontrol: prevent memory cgroup release in
 count_memcg_folio_events()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the near future, a folio will no longer pin its corresponding memory
cgroup.  To ensure safety, it will only be appropriate to hold the rcu
read lock or acquire a reference to the memory cgroup returned by
folio_memcg(), thereby preventing it from being released.

In the current patch, the rcu read lock is employed to safeguard against
the release of the memory cgroup in count_memcg_folio_events().

This serves as a preparatory measure for the reparenting of the
LRU pages.

Link: https://lore.kernel.org/dea6aa0389367f7fd6b715c8837a2cf7506bd889.1772711148.git.zhengqi.arch@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yosry Ahmed <yosry@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4454f03a4acf..ef26ba087844 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -975,10 +975,15 @@ void count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
 static inline void count_memcg_folio_events(struct folio *folio,
 		enum vm_event_item idx, unsigned long nr)
 {
-	struct mem_cgroup *memcg = folio_memcg(folio);
+	struct mem_cgroup *memcg;
 
-	if (memcg)
-		count_memcg_events(memcg, idx, nr);
+	if (!folio_memcg_charged(folio))
+		return;
+
+	rcu_read_lock();
+	memcg = folio_memcg(folio);
+	count_memcg_events(memcg, idx, nr);
+	rcu_read_unlock();
 }
 
 static inline void count_memcg_events_mm(struct mm_struct *mm,
-- 
cgit v1.2.3


From d14f87858178c64cc94ecd05bb41bba474c1c654 Mon Sep 17 00:00:00 2001
From: Qi Zheng <zhengqi.arch@bytedance.com>
Date: Thu, 5 Mar 2026 19:52:41 +0800
Subject: mm: do not open-code lruvec lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now we have lruvec_unlock(), lruvec_unlock_irq() and
lruvec_unlock_irqrestore(), but no the paired lruvec_lock(),
lruvec_lock_irq() and lruvec_lock_irqsave().

There is currently no use case for lruvec_lock_irqsave(), so only
introduce lruvec_lock_irq(), and change all open-code places to use this
helper function.  This looks cleaner and prepares for reparenting LRU
pages, preventing user from missing RCU lock calls due to open-code lruvec
lock.

Link: https://lore.kernel.org/2d0bafe7564e17ece46dfd58197af22ce57017dc.1772711148.git.zhengqi.arch@bytedance.com
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Muchun Song <muchun.song@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yosry Ahmed <yosry@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h |  5 +++++
 mm/vmscan.c                | 38 +++++++++++++++++++-------------------
 2 files changed, 24 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ef26ba087844..38f94c7271c1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1498,6 +1498,11 @@ static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
 	return mem_cgroup_lruvec(memcg, lruvec_pgdat(lruvec));
 }
 
+static inline void lruvec_lock_irq(struct lruvec *lruvec)
+{
+	spin_lock_irq(&lruvec->lru_lock);
+}
+
 static inline void lruvec_unlock(struct lruvec *lruvec)
 {
 	spin_unlock(&lruvec->lru_lock);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6f3f9e20ff67..d4b649abe645 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1998,7 +1998,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
 
 	lru_add_drain();
 
-	spin_lock_irq(&lruvec->lru_lock);
+	lruvec_lock_irq(lruvec);
 
 	nr_taken = isolate_lru_folios(nr_to_scan, lruvec, &folio_list,
 				     &nr_scanned, sc, lru);
@@ -2008,7 +2008,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
 	mod_lruvec_state(lruvec, item, nr_scanned);
 	mod_lruvec_state(lruvec, PGSCAN_ANON + file, nr_scanned);
 
-	spin_unlock_irq(&lruvec->lru_lock);
+	lruvec_unlock_irq(lruvec);
 
 	if (nr_taken == 0)
 		return 0;
@@ -2025,7 +2025,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
 	mod_lruvec_state(lruvec, item, nr_reclaimed);
 	mod_lruvec_state(lruvec, PGSTEAL_ANON + file, nr_reclaimed);
 
-	spin_lock_irq(&lruvec->lru_lock);
+	lruvec_lock_irq(lruvec);
 	lru_note_cost_unlock_irq(lruvec, file, stat.nr_pageout,
 					nr_scanned - nr_reclaimed);
 
@@ -2104,7 +2104,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
 
 	lru_add_drain();
 
-	spin_lock_irq(&lruvec->lru_lock);
+	lruvec_lock_irq(lruvec);
 
 	nr_taken = isolate_lru_folios(nr_to_scan, lruvec, &l_hold,
 				     &nr_scanned, sc, lru);
@@ -2113,7 +2113,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
 
 	mod_lruvec_state(lruvec, PGREFILL, nr_scanned);
 
-	spin_unlock_irq(&lruvec->lru_lock);
+	lruvec_unlock_irq(lruvec);
 
 	while (!list_empty(&l_hold)) {
 		struct folio *folio;
@@ -2169,7 +2169,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
 	count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate);
 	mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
 
-	spin_lock_irq(&lruvec->lru_lock);
+	lruvec_lock_irq(lruvec);
 	lru_note_cost_unlock_irq(lruvec, file, 0, nr_rotated);
 	trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate,
 			nr_deactivate, nr_rotated, sc->priority, file);
@@ -3803,9 +3803,9 @@ static void walk_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk)
 		}
 
 		if (walk->batched) {
-			spin_lock_irq(&lruvec->lru_lock);
+			lruvec_lock_irq(lruvec);
 			reset_batch_size(walk);
-			spin_unlock_irq(&lruvec->lru_lock);
+			lruvec_unlock_irq(lruvec);
 		}
 
 		cond_resched();
@@ -3965,7 +3965,7 @@ restart:
 	if (seq < READ_ONCE(lrugen->max_seq))
 		return false;
 
-	spin_lock_irq(&lruvec->lru_lock);
+	lruvec_lock_irq(lruvec);
 
 	VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
 
@@ -3980,7 +3980,7 @@ restart:
 		if (inc_min_seq(lruvec, type, swappiness))
 			continue;
 
-		spin_unlock_irq(&lruvec->lru_lock);
+		lruvec_unlock_irq(lruvec);
 		cond_resched();
 		goto restart;
 	}
@@ -4015,7 +4015,7 @@ restart:
 	/* make sure preceding modifications appear */
 	smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
 unlock:
-	spin_unlock_irq(&lruvec->lru_lock);
+	lruvec_unlock_irq(lruvec);
 
 	return success;
 }
@@ -4715,7 +4715,7 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 
-	spin_lock_irq(&lruvec->lru_lock);
+	lruvec_lock_irq(lruvec);
 
 	scanned = isolate_folios(nr_to_scan, lruvec, sc, swappiness, &type, &list);
 
@@ -4724,7 +4724,7 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 	if (evictable_min_seq(lrugen->min_seq, swappiness) + MIN_NR_GENS > lrugen->max_seq)
 		scanned = 0;
 
-	spin_unlock_irq(&lruvec->lru_lock);
+	lruvec_unlock_irq(lruvec);
 
 	if (list_empty(&list))
 		return scanned;
@@ -4762,9 +4762,9 @@ retry:
 	walk = current->reclaim_state->mm_walk;
 	if (walk && walk->batched) {
 		walk->lruvec = lruvec;
-		spin_lock_irq(&lruvec->lru_lock);
+		lruvec_lock_irq(lruvec);
 		reset_batch_size(walk);
-		spin_unlock_irq(&lruvec->lru_lock);
+		lruvec_unlock_irq(lruvec);
 	}
 
 	mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(sc),
@@ -5202,7 +5202,7 @@ static void lru_gen_change_state(bool enabled)
 		for_each_node(nid) {
 			struct lruvec *lruvec = get_lruvec(memcg, nid);
 
-			spin_lock_irq(&lruvec->lru_lock);
+			lruvec_lock_irq(lruvec);
 
 			VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
 			VM_WARN_ON_ONCE(!state_is_valid(lruvec));
@@ -5210,12 +5210,12 @@ static void lru_gen_change_state(bool enabled)
 			lruvec->lrugen.enabled = enabled;
 
 			while (!(enabled ? fill_evictable(lruvec) : drain_evictable(lruvec))) {
-				spin_unlock_irq(&lruvec->lru_lock);
+				lruvec_unlock_irq(lruvec);
 				cond_resched();
-				spin_lock_irq(&lruvec->lru_lock);
+				lruvec_lock_irq(lruvec);
 			}
 
-			spin_unlock_irq(&lruvec->lru_lock);
+			lruvec_unlock_irq(lruvec);
 		}
 
 		cond_resched();
-- 
cgit v1.2.3


From 31b54a5e8916fdd4819880e3aed93f65ecbb47e3 Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Thu, 5 Mar 2026 19:52:42 +0800
Subject: mm: memcontrol: prepare for reparenting LRU pages for lruvec lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The following diagram illustrates how to ensure the safety of the folio
lruvec lock when LRU folios undergo reparenting.

In the folio_lruvec_lock(folio) function:

    rcu_read_lock();
retry:
    lruvec = folio_lruvec(folio);
    /* There is a possibility of folio reparenting at this point. */
    spin_lock(&lruvec->lru_lock);
    if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
        /*
         * The wrong lruvec lock was acquired, and a retry is required.
         * This is because the folio resides on the parent memcg lruvec
         * list.
         */
        spin_unlock(&lruvec->lru_lock);
        goto retry;
    }

    /* Reaching here indicates that folio_memcg() is stable. */


In the memcg_reparent_objcgs(memcg) function:

    spin_lock(&lruvec->lru_lock);
    spin_lock(&lruvec_parent->lru_lock);
    /* Transfer folios from the lruvec list to the parent's. */
    spin_unlock(&lruvec_parent->lru_lock);
    spin_unlock(&lruvec->lru_lock);

After acquiring the lruvec lock, it is necessary to verify whether the
folio has been reparented.  If reparenting has occurred, the new lruvec
lock must be reacquired.  During the LRU folio reparenting process, the
lruvec lock will also be acquired (this will be implemented in a
subsequent patch).  Therefore, folio_memcg() remains unchanged while the
lruvec lock is held.

Given that lruvec_memcg(lruvec) is always equal to folio_memcg(folio)
after the lruvec lock is acquired, the lruvec_memcg_debug() check is
redundant.  Hence, it is removed.

This patch serves as a preparation for the reparenting of LRU folios.

Link: https://lore.kernel.org/23f22cbb1419f277a3483018b32158ae2b86c666.1772711148.git.zhengqi.arch@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yosry Ahmed <yosry@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 34 ++++++++++++++---------------
 include/linux/swap.h       |  3 +--
 mm/compaction.c            | 29 +++++++++++++++++++------
 mm/memcontrol.c            | 53 +++++++++++++++++++++++-----------------------
 mm/swap.c                  |  6 +++++-
 5 files changed, 73 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 38f94c7271c1..12982875073e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -741,7 +741,15 @@ out:
  * folio_lruvec - return lruvec for isolating/putting an LRU folio
  * @folio: Pointer to the folio.
  *
- * This function relies on folio->mem_cgroup being stable.
+ * Call with rcu_read_lock() held to ensure the lifetime of the returned lruvec.
+ * Note that this alone will NOT guarantee the stability of the folio->lruvec
+ * association; the folio can be reparented to an ancestor if this races with
+ * cgroup deletion.
+ *
+ * Use folio_lruvec_lock() to ensure both lifetime and stability of the binding.
+ * Once a lruvec is locked, folio_lruvec() can be called on other folios, and
+ * their binding is stable if the returned lruvec matches the one the caller has
+ * locked. Useful for lock batching.
  */
 static inline struct lruvec *folio_lruvec(struct folio *folio)
 {
@@ -764,15 +772,6 @@ struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
 struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
 						unsigned long *flags);
 
-#ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio);
-#else
-static inline
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
-{
-}
-#endif
-
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 	return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -1198,11 +1197,6 @@ static inline struct lruvec *folio_lruvec(struct folio *folio)
 	return &pgdat->__lruvec;
 }
 
-static inline
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
-{
-}
-
 static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
 {
 	return NULL;
@@ -1261,6 +1255,7 @@ static inline struct lruvec *folio_lruvec_lock(struct folio *folio)
 {
 	struct pglist_data *pgdat = folio_pgdat(folio);
 
+	rcu_read_lock();
 	spin_lock(&pgdat->__lruvec.lru_lock);
 	return &pgdat->__lruvec;
 }
@@ -1269,6 +1264,7 @@ static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
 {
 	struct pglist_data *pgdat = folio_pgdat(folio);
 
+	rcu_read_lock();
 	spin_lock_irq(&pgdat->__lruvec.lru_lock);
 	return &pgdat->__lruvec;
 }
@@ -1278,6 +1274,7 @@ static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
 {
 	struct pglist_data *pgdat = folio_pgdat(folio);
 
+	rcu_read_lock();
 	spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp);
 	return &pgdat->__lruvec;
 }
@@ -1500,23 +1497,26 @@ static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
 
 static inline void lruvec_lock_irq(struct lruvec *lruvec)
 {
+	rcu_read_lock();
 	spin_lock_irq(&lruvec->lru_lock);
 }
 
 static inline void lruvec_unlock(struct lruvec *lruvec)
 {
 	spin_unlock(&lruvec->lru_lock);
+	rcu_read_unlock();
 }
 
 static inline void lruvec_unlock_irq(struct lruvec *lruvec)
 {
 	spin_unlock_irq(&lruvec->lru_lock);
+	rcu_read_unlock();
 }
 
-static inline void lruvec_unlock_irqrestore(struct lruvec *lruvec,
-		unsigned long flags)
+static inline void lruvec_unlock_irqrestore(struct lruvec *lruvec, unsigned long flags)
 {
 	spin_unlock_irqrestore(&lruvec->lru_lock, flags);
+	rcu_read_unlock();
 }
 
 /* Test requires a stable folio->memcg binding, see folio_memcg() */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4b1f13b5bbad..ea08e2afa2b4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -310,8 +310,7 @@ extern unsigned long totalreserve_pages;
 
 /* linux/mm/swap.c */
 void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
-		unsigned int nr_io, unsigned int nr_rotated)
-		__releases(lruvec->lru_lock);
+		unsigned int nr_io, unsigned int nr_rotated);
 void lru_note_cost_refault(struct folio *);
 void folio_add_lru(struct folio *);
 void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
diff --git a/mm/compaction.c b/mm/compaction.c
index c3e338aaa0ff..3648ce22c807 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -518,6 +518,24 @@ static bool compact_lock_irqsave(spinlock_t *lock, unsigned long *flags,
 	return true;
 }
 
+static struct lruvec *
+compact_folio_lruvec_lock_irqsave(struct folio *folio, unsigned long *flags,
+				  struct compact_control *cc)
+{
+	struct lruvec *lruvec;
+
+	rcu_read_lock();
+retry:
+	lruvec = folio_lruvec(folio);
+	compact_lock_irqsave(&lruvec->lru_lock, flags, cc);
+	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+		spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
+		goto retry;
+	}
+
+	return lruvec;
+}
+
 /*
  * Compaction requires the taking of some coarse locks that are potentially
  * very heavily contended. The lock should be periodically unlocked to avoid
@@ -839,7 +857,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 {
 	pg_data_t *pgdat = cc->zone->zone_pgdat;
 	unsigned long nr_scanned = 0, nr_isolated = 0;
-	struct lruvec *lruvec;
+	struct lruvec *lruvec = NULL;
 	unsigned long flags = 0;
 	struct lruvec *locked = NULL;
 	struct folio *folio = NULL;
@@ -1153,18 +1171,17 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		if (!folio_test_clear_lru(folio))
 			goto isolate_fail_put;
 
-		lruvec = folio_lruvec(folio);
+		if (locked)
+			lruvec = folio_lruvec(folio);
 
 		/* If we already hold the lock, we can skip some rechecking */
-		if (lruvec != locked) {
+		if (lruvec != locked || !locked) {
 			if (locked)
 				lruvec_unlock_irqrestore(locked, flags);
 
-			compact_lock_irqsave(&lruvec->lru_lock, &flags, cc);
+			lruvec = compact_folio_lruvec_lock_irqsave(folio, &flags, cc);
 			locked = lruvec;
 
-			lruvec_memcg_debug(lruvec, folio);
-
 			/*
 			 * Try get exclusive access under lock. If marked for
 			 * skip, the scan is aborted unless the current context
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 10021cef176b..0d4eaaea2b54 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1206,23 +1206,6 @@ void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
 	}
 }
 
-#ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
-{
-	struct mem_cgroup *memcg;
-
-	if (mem_cgroup_disabled())
-		return;
-
-	memcg = folio_memcg(folio);
-
-	if (!memcg)
-		VM_BUG_ON_FOLIO(!mem_cgroup_is_root(lruvec_memcg(lruvec)), folio);
-	else
-		VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != memcg, folio);
-}
-#endif
-
 /**
  * folio_lruvec_lock - Lock the lruvec for a folio.
  * @folio: Pointer to the folio.
@@ -1232,14 +1215,20 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
  * - folio_test_lru false
  * - folio frozen (refcount of 0)
  *
- * Return: The lruvec this folio is on with its lock held.
+ * Return: The lruvec this folio is on with its lock held and rcu read lock held.
  */
 struct lruvec *folio_lruvec_lock(struct folio *folio)
 {
-	struct lruvec *lruvec = folio_lruvec(folio);
+	struct lruvec *lruvec;
 
+	rcu_read_lock();
+retry:
+	lruvec = folio_lruvec(folio);
 	spin_lock(&lruvec->lru_lock);
-	lruvec_memcg_debug(lruvec, folio);
+	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+		spin_unlock(&lruvec->lru_lock);
+		goto retry;
+	}
 
 	return lruvec;
 }
@@ -1254,14 +1243,20 @@ struct lruvec *folio_lruvec_lock(struct folio *folio)
  * - folio frozen (refcount of 0)
  *
  * Return: The lruvec this folio is on with its lock held and interrupts
- * disabled.
+ * disabled and rcu read lock held.
  */
 struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
 {
-	struct lruvec *lruvec = folio_lruvec(folio);
+	struct lruvec *lruvec;
 
+	rcu_read_lock();
+retry:
+	lruvec = folio_lruvec(folio);
 	spin_lock_irq(&lruvec->lru_lock);
-	lruvec_memcg_debug(lruvec, folio);
+	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+		spin_unlock_irq(&lruvec->lru_lock);
+		goto retry;
+	}
 
 	return lruvec;
 }
@@ -1277,15 +1272,21 @@ struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
  * - folio frozen (refcount of 0)
  *
  * Return: The lruvec this folio is on with its lock held and interrupts
- * disabled.
+ * disabled and rcu read lock held.
  */
 struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
 		unsigned long *flags)
 {
-	struct lruvec *lruvec = folio_lruvec(folio);
+	struct lruvec *lruvec;
 
+	rcu_read_lock();
+retry:
+	lruvec = folio_lruvec(folio);
 	spin_lock_irqsave(&lruvec->lru_lock, *flags);
-	lruvec_memcg_debug(lruvec, folio);
+	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+		spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
+		goto retry;
+	}
 
 	return lruvec;
 }
diff --git a/mm/swap.c b/mm/swap.c
index 009b32d6d344..bcd2b52e5def 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -240,6 +240,7 @@ void folio_rotate_reclaimable(struct folio *folio)
 void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
 		unsigned int nr_io, unsigned int nr_rotated)
 		__releases(lruvec->lru_lock)
+		__releases(rcu)
 {
 	unsigned long cost;
 
@@ -253,6 +254,7 @@ void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
 	cost = nr_io * SWAP_CLUSTER_MAX + nr_rotated;
 	if (!cost) {
 		spin_unlock_irq(&lruvec->lru_lock);
+		rcu_read_unlock();
 		return;
 	}
 
@@ -285,8 +287,10 @@ void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
 
 		spin_unlock_irq(&lruvec->lru_lock);
 		lruvec = parent_lruvec(lruvec);
-		if (!lruvec)
+		if (!lruvec) {
+			rcu_read_unlock();
 			break;
+		}
 		spin_lock_irq(&lruvec->lru_lock);
 	}
 }
-- 
cgit v1.2.3


From 07a6e9a2c199fed361f528781284d56771d0016f Mon Sep 17 00:00:00 2001
From: Qi Zheng <zhengqi.arch@bytedance.com>
Date: Thu, 5 Mar 2026 19:52:43 +0800
Subject: mm: vmscan: prepare for reparenting traditional LRU folios
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To resolve the dying memcg issue, we need to reparent LRU folios of child
memcg to its parent memcg.  For traditional LRU list, each lruvec of every
memcg comprises four LRU lists.  Due to the symmetry of the LRU lists, it
is feasible to transfer the LRU lists from a memcg to its parent memcg
during the reparenting process.

This commit implements the specific function, which will be used during
the reparenting process.

Link: https://lore.kernel.org/a92d217a9fc82bd0c401210204a095caaf615b1c.1772711148.git.zhengqi.arch@bytedance.com
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Muchun Song <muchun.song@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yosry Ahmed <yosry@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 21 +++++++++++++++++++++
 mm/swap.c            | 33 +++++++++++++++++++++++++++++++++
 mm/vmscan.c          | 19 -------------------
 3 files changed, 54 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ea08e2afa2b4..d653fe050b8f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -546,6 +546,8 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
 
 	return READ_ONCE(memcg->swappiness);
 }
+
+void lru_reparent_memcg(struct mem_cgroup *memcg, struct mem_cgroup *parent, int nid);
 #else
 static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 {
@@ -610,5 +612,24 @@ static inline bool mem_cgroup_swap_full(struct folio *folio)
 }
 #endif
 
+/* for_each_managed_zone_pgdat - helper macro to iterate over all managed zones in a pgdat up to
+ * and including the specified highidx
+ * @zone: The current zone in the iterator
+ * @pgdat: The pgdat which node_zones are being iterated
+ * @idx: The index variable
+ * @highidx: The index of the highest zone to return
+ *
+ * This macro iterates through all managed zones up to and including the specified highidx.
+ * The zone iterator enters an invalid state after macro call and must be reinitialized
+ * before it can be used again.
+ */
+#define for_each_managed_zone_pgdat(zone, pgdat, idx, highidx)	\
+	for ((idx) = 0, (zone) = (pgdat)->node_zones;		\
+	    (idx) <= (highidx);					\
+	    (idx)++, (zone)++)					\
+		if (!managed_zone(zone))			\
+			continue;				\
+		else
+
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
diff --git a/mm/swap.c b/mm/swap.c
index bcd2b52e5def..5cc44f0de987 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1090,6 +1090,39 @@ void folio_batch_remove_exceptionals(struct folio_batch *fbatch)
 	fbatch->nr = j;
 }
 
+#ifdef CONFIG_MEMCG
+static void lruvec_reparent_lru(struct lruvec *child_lruvec,
+				struct lruvec *parent_lruvec,
+				enum lru_list lru, int nid)
+{
+	int zid;
+	struct zone *zone;
+
+	if (lru != LRU_UNEVICTABLE)
+		list_splice_tail_init(&child_lruvec->lists[lru], &parent_lruvec->lists[lru]);
+
+	for_each_managed_zone_pgdat(zone, NODE_DATA(nid), zid, MAX_NR_ZONES - 1) {
+		unsigned long size = mem_cgroup_get_zone_lru_size(child_lruvec, lru, zid);
+
+		mem_cgroup_update_lru_size(parent_lruvec, lru, zid, size);
+	}
+}
+
+void lru_reparent_memcg(struct mem_cgroup *memcg, struct mem_cgroup *parent, int nid)
+{
+	enum lru_list lru;
+	struct lruvec *child_lruvec, *parent_lruvec;
+
+	child_lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+	parent_lruvec = mem_cgroup_lruvec(parent, NODE_DATA(nid));
+	parent_lruvec->anon_cost += child_lruvec->anon_cost;
+	parent_lruvec->file_cost += child_lruvec->file_cost;
+
+	for_each_lru(lru)
+		lruvec_reparent_lru(child_lruvec, parent_lruvec, lru, nid);
+}
+#endif
+
 static const struct ctl_table swap_sysctl_table[] = {
 	{
 		.procname	= "page-cluster",
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d4b649abe645..d225e84b5263 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -269,25 +269,6 @@ static int sc_swappiness(struct scan_control *sc, struct mem_cgroup *memcg)
 }
 #endif
 
-/* for_each_managed_zone_pgdat - helper macro to iterate over all managed zones in a pgdat up to
- * and including the specified highidx
- * @zone: The current zone in the iterator
- * @pgdat: The pgdat which node_zones are being iterated
- * @idx: The index variable
- * @highidx: The index of the highest zone to return
- *
- * This macro iterates through all managed zones up to and including the specified highidx.
- * The zone iterator enters an invalid state after macro call and must be reinitialized
- * before it can be used again.
- */
-#define for_each_managed_zone_pgdat(zone, pgdat, idx, highidx)	\
-	for ((idx) = 0, (zone) = (pgdat)->node_zones;		\
-	    (idx) <= (highidx);					\
-	    (idx)++, (zone)++)					\
-		if (!managed_zone(zone))			\
-			continue;				\
-		else
-
 static void set_task_reclaim_state(struct task_struct *task,
 				   struct reclaim_state *rs)
 {
-- 
cgit v1.2.3


From f304652609eae3814b0e9d11c75c0e0cb62da31f Mon Sep 17 00:00:00 2001
From: Qi Zheng <zhengqi.arch@bytedance.com>
Date: Thu, 5 Mar 2026 19:52:44 +0800
Subject: mm: vmscan: prepare for reparenting MGLRU folios
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Similar to traditional LRU folios, in order to solve the dying memcg
problem, we also need to reparenting MGLRU folios to the parent memcg when
memcg offline.

However, there are the following challenges:

1. Each lruvec has between MIN_NR_GENS and MAX_NR_GENS generations, the
   number of generations of the parent and child memcg may be different,
   so we cannot simply transfer MGLRU folios in the child memcg to the
   parent memcg as we did for traditional LRU folios.
2. The generation information is stored in folio->flags, but we cannot
   traverse these folios while holding the lru lock, otherwise it may
   cause softlockup.
3. In walk_update_folio(), the gen of folio and corresponding lru size
   may be updated, but the folio is not immediately moved to the
   corresponding lru list. Therefore, there may be folios of different
   generations on an LRU list.
4. In lru_gen_del_folio(), the generation to which the folio belongs is
   found based on the generation information in folio->flags, and the
   corresponding LRU size will be updated. Therefore, we need to update
   the lru size correctly during reparenting, otherwise the lru size may
   be updated incorrectly in lru_gen_del_folio().

Finally, this patch chose a compromise method, which is to splice the lru
list in the child memcg to the lru list of the same generation in the
parent memcg during reparenting.  And in order to ensure that the parent
memcg has the same generation, we need to increase the generations in the
parent memcg to the MAX_NR_GENS before reparenting.

Of course, the same generation has different meanings in the parent and
child memcg, this will cause confusion in the hot and cold information of
folios.  But other than that, this method is simple enough, the lru size
is correct, and there is no need to consider some concurrency issues (such
as lru_gen_del_folio()).

To prepare for the above work, this commit implements the specific
functions, which will be used during reparenting.

[zhengqi.arch@bytedance.com: use list_splice_tail_init() to reparent child folios]
  Link: https://lore.kernel.org/20260324114937.28569-1-qi.zheng@linux.dev
Link: https://lore.kernel.org/e75050354cdbc42221a04f7cf133292b61105548.1772711148.git.zhengqi.arch@bytedance.com
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Suggested-by: Harry Yoo <harry.yoo@oracle.com>
Suggested-by: Imran Khan <imran.f.khan@oracle.com>
Acked-by: Harry Yoo <harry.yoo@oracle.com>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yosry Ahmed <yosry@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h |  17 ++++++
 mm/vmscan.c            | 142 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 159 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4a20df132258..20f920dede65 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -692,6 +692,9 @@ void lru_gen_online_memcg(struct mem_cgroup *memcg);
 void lru_gen_offline_memcg(struct mem_cgroup *memcg);
 void lru_gen_release_memcg(struct mem_cgroup *memcg);
 void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid);
+void max_lru_gen_memcg(struct mem_cgroup *memcg, int nid);
+bool recheck_lru_gen_max_memcg(struct mem_cgroup *memcg, int nid);
+void lru_gen_reparent_memcg(struct mem_cgroup *memcg, struct mem_cgroup *parent, int nid);
 
 #else /* !CONFIG_LRU_GEN */
 
@@ -733,6 +736,20 @@ static inline void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid)
 {
 }
 
+static inline void max_lru_gen_memcg(struct mem_cgroup *memcg, int nid)
+{
+}
+
+static inline bool recheck_lru_gen_max_memcg(struct mem_cgroup *memcg, int nid)
+{
+	return true;
+}
+
+static inline
+void lru_gen_reparent_memcg(struct mem_cgroup *memcg, struct mem_cgroup *parent, int nid)
+{
+}
+
 #endif /* CONFIG_LRU_GEN */
 
 struct lruvec {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d225e84b5263..8472aa4bddd5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4426,6 +4426,148 @@ void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid)
 		lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD);
 }
 
+bool recheck_lru_gen_max_memcg(struct mem_cgroup *memcg, int nid)
+{
+	struct lruvec *lruvec = get_lruvec(memcg, nid);
+	int type;
+
+	for (type = 0; type < ANON_AND_FILE; type++) {
+		if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
+			return false;
+	}
+
+	return true;
+}
+
+static void try_to_inc_max_seq_nowalk(struct mem_cgroup *memcg,
+				      struct lruvec *lruvec)
+{
+	struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
+	struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
+	int swappiness = mem_cgroup_swappiness(memcg);
+	DEFINE_MAX_SEQ(lruvec);
+	bool success = false;
+
+	/*
+	 * We are not iterating the mm_list here, updating mm_state->seq is just
+	 * to make mm walkers work properly.
+	 */
+	if (mm_state) {
+		spin_lock(&mm_list->lock);
+		VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq);
+		if (max_seq > mm_state->seq) {
+			WRITE_ONCE(mm_state->seq, mm_state->seq + 1);
+			success = true;
+		}
+		spin_unlock(&mm_list->lock);
+	} else {
+		success = true;
+	}
+
+	if (success)
+		inc_max_seq(lruvec, max_seq, swappiness);
+}
+
+/*
+ * We need to ensure that the folios of child memcg can be reparented to the
+ * same gen of the parent memcg, so the gens of the parent memcg needed be
+ * incremented to the MAX_NR_GENS before reparenting.
+ */
+void max_lru_gen_memcg(struct mem_cgroup *memcg, int nid)
+{
+	struct lruvec *lruvec = get_lruvec(memcg, nid);
+	int type;
+
+	for (type = 0; type < ANON_AND_FILE; type++) {
+		while (get_nr_gens(lruvec, type) < MAX_NR_GENS) {
+			try_to_inc_max_seq_nowalk(memcg, lruvec);
+			cond_resched();
+		}
+	}
+}
+
+/*
+ * Compared to traditional LRU, MGLRU faces the following challenges:
+ *
+ * 1. Each lruvec has between MIN_NR_GENS and MAX_NR_GENS generations, the
+ *    number of generations of the parent and child memcg may be different,
+ *    so we cannot simply transfer MGLRU folios in the child memcg to the
+ *    parent memcg as we did for traditional LRU folios.
+ * 2. The generation information is stored in folio->flags, but we cannot
+ *    traverse these folios while holding the lru lock, otherwise it may
+ *    cause softlockup.
+ * 3. In walk_update_folio(), the gen of folio and corresponding lru size
+ *    may be updated, but the folio is not immediately moved to the
+ *    corresponding lru list. Therefore, there may be folios of different
+ *    generations on an LRU list.
+ * 4. In lru_gen_del_folio(), the generation to which the folio belongs is
+ *    found based on the generation information in folio->flags, and the
+ *    corresponding LRU size will be updated. Therefore, we need to update
+ *    the lru size correctly during reparenting, otherwise the lru size may
+ *    be updated incorrectly in lru_gen_del_folio().
+ *
+ * Finally, we choose a compromise method, which is to splice the lru list in
+ * the child memcg to the lru list of the same generation in the parent memcg
+ * during reparenting.
+ *
+ * The same generation has different meanings in the parent and child memcg,
+ * so this compromise method will cause the LRU inversion problem. But as the
+ * system runs, this problem will be fixed automatically.
+ */
+static void __lru_gen_reparent_memcg(struct lruvec *child_lruvec, struct lruvec *parent_lruvec,
+				     int zone, int type)
+{
+	struct lru_gen_folio *child_lrugen, *parent_lrugen;
+	enum lru_list lru = type * LRU_INACTIVE_FILE;
+	int i;
+
+	child_lrugen = &child_lruvec->lrugen;
+	parent_lrugen = &parent_lruvec->lrugen;
+
+	for (i = 0; i < get_nr_gens(child_lruvec, type); i++) {
+		int gen = lru_gen_from_seq(child_lrugen->max_seq - i);
+		long nr_pages = child_lrugen->nr_pages[gen][type][zone];
+		int child_lru_active = lru_gen_is_active(child_lruvec, gen) ? LRU_ACTIVE : 0;
+		int parent_lru_active = lru_gen_is_active(parent_lruvec, gen) ? LRU_ACTIVE : 0;
+
+		/* Assuming that child pages are colder than parent pages */
+		list_splice_tail_init(&child_lrugen->folios[gen][type][zone],
+				      &parent_lrugen->folios[gen][type][zone]);
+
+		WRITE_ONCE(child_lrugen->nr_pages[gen][type][zone], 0);
+		WRITE_ONCE(parent_lrugen->nr_pages[gen][type][zone],
+			   parent_lrugen->nr_pages[gen][type][zone] + nr_pages);
+
+		if (lru_gen_is_active(child_lruvec, gen) != lru_gen_is_active(parent_lruvec, gen)) {
+			__update_lru_size(child_lruvec, lru + child_lru_active, zone, -nr_pages);
+			__update_lru_size(parent_lruvec, lru + parent_lru_active, zone, nr_pages);
+		}
+	}
+}
+
+void lru_gen_reparent_memcg(struct mem_cgroup *memcg, struct mem_cgroup *parent, int nid)
+{
+	struct lruvec *child_lruvec, *parent_lruvec;
+	int type, zid;
+	struct zone *zone;
+	enum lru_list lru;
+
+	child_lruvec = get_lruvec(memcg, nid);
+	parent_lruvec = get_lruvec(parent, nid);
+
+	for_each_managed_zone_pgdat(zone, NODE_DATA(nid), zid, MAX_NR_ZONES - 1)
+		for (type = 0; type < ANON_AND_FILE; type++)
+			__lru_gen_reparent_memcg(child_lruvec, parent_lruvec, zid, type);
+
+	for_each_lru(lru) {
+		for_each_managed_zone_pgdat(zone, NODE_DATA(nid), zid, MAX_NR_ZONES - 1) {
+			unsigned long size = mem_cgroup_get_zone_lru_size(child_lruvec, lru, zid);
+
+			mem_cgroup_update_lru_size(parent_lruvec, lru, zid, size);
+		}
+	}
+}
+
 #endif /* CONFIG_MEMCG */
 
 /******************************************************************************
-- 
cgit v1.2.3


From 7404bd37cfbeb2aa06249418c1788ca94bae2875 Mon Sep 17 00:00:00 2001
From: Qi Zheng <zhengqi.arch@bytedance.com>
Date: Thu, 5 Mar 2026 19:52:46 +0800
Subject: mm: workingset: use lruvec_lru_size() to get the number of lru pages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For cgroup v2, count_shadow_nodes() is the only place to read
non-hierarchical stats (lruvec_stats->state_local).  To avoid the need to
consider cgroup v2 during subsequent non-hierarchical stats reparenting,
use lruvec_lru_size() instead of lruvec_page_state_local() to get the
number of lru pages.

For NR_SLAB_RECLAIMABLE_B and NR_SLAB_UNRECLAIMABLE_B cases, it appears
that the statistics here have already been problematic for a while since
slab pages have been reparented.  So just ignore it for now.

Link: https://lore.kernel.org/b1d448c667a8fb377c3390d9aba43bdb7e4d5739.1772711148.git.zhengqi.arch@bytedance.com
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Muchun Song <muchun.song@linux.dev>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yosry Ahmed <yosry@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 1 +
 mm/vmscan.c          | 3 +--
 mm/workingset.c      | 5 +++--
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index d653fe050b8f..7a09df6977a5 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -352,6 +352,7 @@ extern void swap_setup(void);
 extern unsigned long zone_reclaimable_pages(struct zone *zone);
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask, nodemask_t *mask);
+unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx);
 
 #define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
 #define MEMCG_RECLAIM_PROACTIVE (1 << 2)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8472aa4bddd5..1ac4f959ec1c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -390,8 +390,7 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
  * @lru: lru to use
  * @zone_idx: zones to consider (use MAX_NR_ZONES - 1 for the whole LRU list)
  */
-static unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru,
-				     int zone_idx)
+unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx)
 {
 	unsigned long size = 0;
 	int zid;
diff --git a/mm/workingset.c b/mm/workingset.c
index 95d722a452e1..07e6836d0502 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -691,9 +691,10 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 
 		mem_cgroup_flush_stats_ratelimited(sc->memcg);
 		lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid));
+
 		for (pages = 0, i = 0; i < NR_LRU_LISTS; i++)
-			pages += lruvec_page_state_local(lruvec,
-							 NR_LRU_BASE + i);
+			pages += lruvec_lru_size(lruvec, i, MAX_NR_ZONES - 1);
+
 		pages += lruvec_page_state_local(
 			lruvec, NR_SLAB_RECLAIMABLE_B) >> PAGE_SHIFT;
 		pages += lruvec_page_state_local(
-- 
cgit v1.2.3


From 01b9da291c4969354807b52956f4aae1f41b4924 Mon Sep 17 00:00:00 2001
From: Qi Zheng <zhengqi.arch@bytedance.com>
Date: Thu, 5 Mar 2026 19:52:49 +0800
Subject: mm: memcontrol: convert objcg to be per-memcg per-node type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert objcg to be per-memcg per-node type, so that when reparent LRU
folios later, we can hold the lru lock at the node level, thus avoiding
holding too many lru locks at once.

[zhengqi.arch@bytedance.com: reset pn->orig_objcg to NULL]
  Link: https://lore.kernel.org/20260309112939.31937-1-qi.zheng@linux.dev
[akpm@linux-foundation.org: fix comment typo, per Usama.  Reflow comment to 80 cols]
[devnexen@gmail.com: fix obj_cgroup leak in mem_cgroup_css_online() error path]
  Link: https://lore.kernel.org/20260322193631.45457-1-devnexen@gmail.com
[devnexen@gmail.com: add newline, per Qi Zheng]
  Link: https://lore.kernel.org/20260323063007.7783-1-devnexen@gmail.com
Link: https://lore.kernel.org/56c04b1c5d54f75ccdc12896df6c1ca35403ecc3.1772711148.git.zhengqi.arch@bytedance.com
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Signed-off-by: David Carlier <devnexen@gmail.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yosry Ahmed <yosry@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Usama Arif <usama.arif@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 23 ++++++------
 include/linux/sched.h      |  2 +-
 mm/memcontrol.c            | 92 +++++++++++++++++++++++++++++++---------------
 3 files changed, 75 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 12982875073e..3e836b56bfcb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -115,6 +115,16 @@ struct mem_cgroup_per_node {
 	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
 	struct mem_cgroup_reclaim_iter	iter;
 
+	/*
+	 * objcg is wiped out as a part of the objcg repaprenting process.
+	 * orig_objcg preserves a pointer (and a reference) to the original
+	 * objcg until the end of live of memcg.
+	 */
+	struct obj_cgroup __rcu	*objcg;
+	struct obj_cgroup	*orig_objcg;
+	/* list of inherited objcgs, protected by objcg_lock */
+	struct list_head objcg_list;
+
 #ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
 	/* slab stats for nmi context */
 	atomic_t		slab_reclaimable;
@@ -179,6 +189,7 @@ struct obj_cgroup {
 		struct list_head list; /* protected by objcg_lock */
 		struct rcu_head rcu;
 	};
+	bool is_root;
 };
 
 /*
@@ -257,15 +268,6 @@ struct mem_cgroup {
 	seqlock_t		socket_pressure_seqlock;
 #endif
 	int kmemcg_id;
-	/*
-	 * memcg->objcg is wiped out as a part of the objcg repaprenting
-	 * process. memcg->orig_objcg preserves a pointer (and a reference)
-	 * to the original objcg until the end of live of memcg.
-	 */
-	struct obj_cgroup __rcu	*objcg;
-	struct obj_cgroup	*orig_objcg;
-	/* list of inherited objcgs, protected by objcg_lock */
-	struct list_head objcg_list;
 
 	struct memcg_vmstats_percpu __percpu *vmstats_percpu;
 
@@ -332,7 +334,6 @@ struct mem_cgroup {
 #define MEMCG_CHARGE_BATCH 64U
 
 extern struct mem_cgroup *root_mem_cgroup;
-extern struct obj_cgroup *root_obj_cgroup;
 
 enum page_memcg_data_flags {
 	/* page->memcg_data is a pointer to an slabobj_ext vector */
@@ -551,7 +552,7 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 
 static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
 {
-	return objcg == root_obj_cgroup;
+	return objcg->is_root;
 }
 
 static inline bool mem_cgroup_disabled(void)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a5d3dbc9cdf..0d27775546f8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1533,7 +1533,7 @@ struct task_struct {
 	/* Used by memcontrol for targeted memcg charge: */
 	struct mem_cgroup		*active_memcg;
 
-	/* Cache for current->cgroups->memcg->objcg lookups: */
+	/* Cache for current->cgroups->memcg->nodeinfo[nid]->objcg lookups: */
 	struct obj_cgroup		*objcg;
 #endif
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c9e5ea0d9fc6..1aaa66f729b3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -83,8 +83,6 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
 struct mem_cgroup *root_mem_cgroup __read_mostly;
 EXPORT_SYMBOL(root_mem_cgroup);
 
-struct obj_cgroup *root_obj_cgroup __read_mostly;
-
 /* Active memory cgroup to use from an interrupt context */
 DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
 EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
@@ -209,18 +207,21 @@ static struct obj_cgroup *obj_cgroup_alloc(void)
 }
 
 static inline struct obj_cgroup *__memcg_reparent_objcgs(struct mem_cgroup *memcg,
-							 struct mem_cgroup *parent)
+							 struct mem_cgroup *parent,
+							 int nid)
 {
 	struct obj_cgroup *objcg, *iter;
+	struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
+	struct mem_cgroup_per_node *parent_pn = parent->nodeinfo[nid];
 
-	objcg = rcu_replace_pointer(memcg->objcg, NULL, true);
+	objcg = rcu_replace_pointer(pn->objcg, NULL, true);
 	/* 1) Ready to reparent active objcg. */
-	list_add(&objcg->list, &memcg->objcg_list);
+	list_add(&objcg->list, &pn->objcg_list);
 	/* 2) Reparent active objcg and already reparented objcgs to parent. */
-	list_for_each_entry(iter, &memcg->objcg_list, list)
+	list_for_each_entry(iter, &pn->objcg_list, list)
 		WRITE_ONCE(iter->memcg, parent);
 	/* 3) Move already reparented objcgs to the parent's list */
-	list_splice(&memcg->objcg_list, &parent->objcg_list);
+	list_splice(&pn->objcg_list, &parent_pn->objcg_list);
 
 	return objcg;
 }
@@ -267,14 +268,17 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg)
 {
 	struct obj_cgroup *objcg;
 	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+	int nid;
 
-	reparent_locks(memcg, parent);
+	for_each_node(nid) {
+		reparent_locks(memcg, parent);
 
-	objcg = __memcg_reparent_objcgs(memcg, parent);
+		objcg = __memcg_reparent_objcgs(memcg, parent, nid);
 
-	reparent_unlocks(memcg, parent);
+		reparent_unlocks(memcg, parent);
 
-	percpu_ref_kill(&objcg->refcnt);
+		percpu_ref_kill(&objcg->refcnt);
+	}
 }
 
 /*
@@ -2830,8 +2834,10 @@ struct mem_cgroup *mem_cgroup_from_virt(void *p)
 
 static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
 {
+	int nid = numa_node_id();
+
 	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
-		struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
+		struct obj_cgroup *objcg = rcu_dereference(memcg->nodeinfo[nid]->objcg);
 
 		if (likely(objcg && obj_cgroup_tryget(objcg)))
 			return objcg;
@@ -2895,6 +2901,7 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
 {
 	struct mem_cgroup *memcg;
 	struct obj_cgroup *objcg;
+	int nid = numa_node_id();
 
 	if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi())
 		return NULL;
@@ -2911,14 +2918,14 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
 		 * Objcg reference is kept by the task, so it's safe
 		 * to use the objcg by the current task.
 		 */
-		return objcg ? : root_obj_cgroup;
+		return objcg ? : rcu_dereference_check(root_mem_cgroup->nodeinfo[nid]->objcg, 1);
 	}
 
 	memcg = this_cpu_read(int_active_memcg);
 	if (unlikely(memcg))
 		goto from_memcg;
 
-	return root_obj_cgroup;
+	return rcu_dereference_check(root_mem_cgroup->nodeinfo[nid]->objcg, 1);
 
 from_memcg:
 	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
@@ -2928,12 +2935,12 @@ from_memcg:
 		 * away and can be used within the scope without any additional
 		 * protection.
 		 */
-		objcg = rcu_dereference_check(memcg->objcg, 1);
+		objcg = rcu_dereference_check(memcg->nodeinfo[nid]->objcg, 1);
 		if (likely(objcg))
 			return objcg;
 	}
 
-	return root_obj_cgroup;
+	return rcu_dereference_check(root_mem_cgroup->nodeinfo[nid]->objcg, 1);
 }
 
 struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
@@ -3876,6 +3883,8 @@ static bool alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 	if (!pn->lruvec_stats_percpu)
 		goto fail;
 
+	INIT_LIST_HEAD(&pn->objcg_list);
+
 	lruvec_init(&pn->lruvec);
 	pn->memcg = memcg;
 
@@ -3890,10 +3899,14 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 {
 	int node;
 
-	obj_cgroup_put(memcg->orig_objcg);
+	for_each_node(node) {
+		struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
+		if (!pn)
+			continue;
 
-	for_each_node(node)
-		free_mem_cgroup_per_node_info(memcg->nodeinfo[node]);
+		obj_cgroup_put(pn->orig_objcg);
+		free_mem_cgroup_per_node_info(pn);
+	}
 	memcg1_free_events(memcg);
 	kfree(memcg->vmstats);
 	free_percpu(memcg->vmstats_percpu);
@@ -3964,7 +3977,6 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)
 #endif
 	memcg1_memcg_init(memcg);
 	memcg->kmemcg_id = -1;
-	INIT_LIST_HEAD(&memcg->objcg_list);
 #ifdef CONFIG_CGROUP_WRITEBACK
 	INIT_LIST_HEAD(&memcg->cgwb_list);
 	for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++)
@@ -4041,6 +4053,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct obj_cgroup *objcg;
+	int nid;
 
 	memcg_online_kmem(memcg);
 
@@ -4052,17 +4065,19 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	if (alloc_shrinker_info(memcg))
 		goto offline_kmem;
 
-	objcg = obj_cgroup_alloc();
-	if (!objcg)
-		goto free_shrinker;
+	for_each_node(nid) {
+		objcg = obj_cgroup_alloc();
+		if (!objcg)
+			goto free_objcg;
 
-	if (unlikely(mem_cgroup_is_root(memcg)))
-		root_obj_cgroup = objcg;
+		if (unlikely(mem_cgroup_is_root(memcg)))
+			objcg->is_root = true;
 
-	objcg->memcg = memcg;
-	rcu_assign_pointer(memcg->objcg, objcg);
-	obj_cgroup_get(objcg);
-	memcg->orig_objcg = objcg;
+		objcg->memcg = memcg;
+		rcu_assign_pointer(memcg->nodeinfo[nid]->objcg, objcg);
+		obj_cgroup_get(objcg);
+		memcg->nodeinfo[nid]->orig_objcg = objcg;
+	}
 
 	if (unlikely(mem_cgroup_is_root(memcg)) && !mem_cgroup_disabled())
 		queue_delayed_work(system_dfl_wq, &stats_flush_dwork,
@@ -4086,7 +4101,24 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	xa_store(&mem_cgroup_private_ids, memcg->id.id, memcg, GFP_KERNEL);
 
 	return 0;
-free_shrinker:
+free_objcg:
+	for_each_node(nid) {
+		struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
+
+		objcg = rcu_replace_pointer(pn->objcg, NULL, true);
+		if (objcg)
+			percpu_ref_kill(&objcg->refcnt);
+
+		if (pn->orig_objcg) {
+			obj_cgroup_put(pn->orig_objcg);
+			/*
+			 * Reset pn->orig_objcg to NULL to prevent
+			 * obj_cgroup_put() from being called again in
+			 * __mem_cgroup_free().
+			 */
+			pn->orig_objcg = NULL;
+		}
+	}
 	free_shrinker_info(memcg);
 offline_kmem:
 	memcg_offline_kmem(memcg);
-- 
cgit v1.2.3


From f1cf8d2f36dc369688bbe61ce064fbd829dbc9e1 Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Thu, 5 Mar 2026 19:52:50 +0800
Subject: mm: memcontrol: eliminate the problem of dying memory cgroup for LRU
 folios
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that everything is set up, switch folio->memcg_data pointers to
objcgs, update the accessors, and execute reparenting on cgroup death.

Finally, folio->memcg_data of LRU folios and kmem folios will always point
to an object cgroup pointer.  The folio->memcg_data of slab folios will
point to an vector of object cgroups.

Link: https://lore.kernel.org/80cb7af198dc6f2173fe616d1207a4c315ece141.1772711148.git.zhengqi.arch@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yosry Ahmed <yosry@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h |  77 ++++++------------
 mm/memcontrol-v1.c         |  15 ++--
 mm/memcontrol.c            | 194 +++++++++++++++++++++++++++------------------
 3 files changed, 151 insertions(+), 135 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3e836b56bfcb..086158969529 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -369,9 +369,6 @@ enum objext_flags {
 #define OBJEXTS_FLAGS_MASK (__NR_OBJEXTS_FLAGS - 1)
 
 #ifdef CONFIG_MEMCG
-
-static inline bool folio_memcg_kmem(struct folio *folio);
-
 /*
  * After the initialization objcg->memcg is always pointing at
  * a valid memcg, but can be atomically swapped to the parent memcg.
@@ -385,43 +382,19 @@ static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
 }
 
 /*
- * __folio_memcg - Get the memory cgroup associated with a non-kmem folio
- * @folio: Pointer to the folio.
- *
- * Returns a pointer to the memory cgroup associated with the folio,
- * or NULL. This function assumes that the folio is known to have a
- * proper memory cgroup pointer. It's not safe to call this function
- * against some type of folios, e.g. slab folios or ex-slab folios or
- * kmem folios.
- */
-static inline struct mem_cgroup *__folio_memcg(struct folio *folio)
-{
-	unsigned long memcg_data = folio->memcg_data;
-
-	VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
-	VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJEXTS, folio);
-	VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio);
-
-	return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK);
-}
-
-/*
- * __folio_objcg - get the object cgroup associated with a kmem folio.
+ * folio_objcg - get the object cgroup associated with a folio.
  * @folio: Pointer to the folio.
  *
  * Returns a pointer to the object cgroup associated with the folio,
  * or NULL. This function assumes that the folio is known to have a
- * proper object cgroup pointer. It's not safe to call this function
- * against some type of folios, e.g. slab folios or ex-slab folios or
- * LRU folios.
+ * proper object cgroup pointer.
  */
-static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
+static inline struct obj_cgroup *folio_objcg(struct folio *folio)
 {
 	unsigned long memcg_data = folio->memcg_data;
 
 	VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
 	VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJEXTS, folio);
-	VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio);
 
 	return (struct obj_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK);
 }
@@ -435,21 +408,30 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
  * proper memory cgroup pointer. It's not safe to call this function
  * against some type of folios, e.g. slab folios or ex-slab folios.
  *
- * For a non-kmem folio any of the following ensures folio and memcg binding
- * stability:
+ * For a folio any of the following ensures folio and objcg binding stability:
  *
  * - the folio lock
  * - LRU isolation
  * - exclusive reference
  *
- * For a kmem folio a caller should hold an rcu read lock to protect memcg
- * associated with a kmem folio from being released.
+ * Based on the stable binding of folio and objcg, for a folio any of the
+ * following ensures folio and memcg binding stability:
+ *
+ * - cgroup_mutex
+ * - the lruvec lock
+ *
+ * If the caller only want to ensure that the page counters of memcg are
+ * updated correctly, ensure that the binding stability of folio and objcg
+ * is sufficient.
+ *
+ * Note: The caller should hold an rcu read lock or cgroup_mutex to protect
+ * memcg associated with a folio from being released.
  */
 static inline struct mem_cgroup *folio_memcg(struct folio *folio)
 {
-	if (folio_memcg_kmem(folio))
-		return obj_cgroup_memcg(__folio_objcg(folio));
-	return __folio_memcg(folio);
+	struct obj_cgroup *objcg = folio_objcg(folio);
+
+	return objcg ? obj_cgroup_memcg(objcg) : NULL;
 }
 
 /*
@@ -473,15 +455,10 @@ static inline bool folio_memcg_charged(struct folio *folio)
  * has an associated memory cgroup pointer or an object cgroups vector or
  * an object cgroup.
  *
- * For a non-kmem folio any of the following ensures folio and memcg binding
- * stability:
+ * The page and objcg or memcg binding rules can refer to folio_memcg().
  *
- * - the folio lock
- * - LRU isolation
- * - exclusive reference
- *
- * For a kmem folio a caller should hold an rcu read lock to protect memcg
- * associated with a kmem folio from being released.
+ * A caller should hold an rcu read lock to protect memcg associated with a
+ * page from being released.
  */
 static inline struct mem_cgroup *folio_memcg_check(struct folio *folio)
 {
@@ -490,18 +467,14 @@ static inline struct mem_cgroup *folio_memcg_check(struct folio *folio)
 	 * for slabs, READ_ONCE() should be used here.
 	 */
 	unsigned long memcg_data = READ_ONCE(folio->memcg_data);
+	struct obj_cgroup *objcg;
 
 	if (memcg_data & MEMCG_DATA_OBJEXTS)
 		return NULL;
 
-	if (memcg_data & MEMCG_DATA_KMEM) {
-		struct obj_cgroup *objcg;
-
-		objcg = (void *)(memcg_data & ~OBJEXTS_FLAGS_MASK);
-		return obj_cgroup_memcg(objcg);
-	}
+	objcg = (void *)(memcg_data & ~OBJEXTS_FLAGS_MASK);
 
-	return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK);
+	return objcg ? obj_cgroup_memcg(objcg) : NULL;
 }
 
 static inline struct mem_cgroup *page_memcg_check(struct page *page)
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 8380adfa0f68..433bba9dfe71 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -613,6 +613,7 @@ void memcg1_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
 void memcg1_swapout(struct folio *folio, swp_entry_t entry)
 {
 	struct mem_cgroup *memcg, *swap_memcg;
+	struct obj_cgroup *objcg;
 	unsigned int nr_entries;
 
 	VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
@@ -624,12 +625,13 @@ void memcg1_swapout(struct folio *folio, swp_entry_t entry)
 	if (!do_memsw_account())
 		return;
 
-	memcg = folio_memcg(folio);
-
-	VM_WARN_ON_ONCE_FOLIO(!memcg, folio);
-	if (!memcg)
+	objcg = folio_objcg(folio);
+	VM_WARN_ON_ONCE_FOLIO(!objcg, folio);
+	if (!objcg)
 		return;
 
+	rcu_read_lock();
+	memcg = obj_cgroup_memcg(objcg);
 	/*
 	 * In case the memcg owning these pages has been offlined and doesn't
 	 * have an ID allocated to it anymore, charge the closest online
@@ -644,7 +646,7 @@ void memcg1_swapout(struct folio *folio, swp_entry_t entry)
 	folio_unqueue_deferred_split(folio);
 	folio->memcg_data = 0;
 
-	if (!mem_cgroup_is_root(memcg))
+	if (!obj_cgroup_is_root(objcg))
 		page_counter_uncharge(&memcg->memory, nr_entries);
 
 	if (memcg != swap_memcg) {
@@ -665,7 +667,8 @@ void memcg1_swapout(struct folio *folio, swp_entry_t entry)
 	preempt_enable_nested();
 	memcg1_check_events(memcg, folio_nid(folio));
 
-	css_put(&memcg->css);
+	rcu_read_unlock();
+	obj_cgroup_put(objcg);
 }
 
 /*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1aaa66f729b3..b696823b34d0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -254,13 +254,17 @@ static inline void reparent_state_local(struct mem_cgroup *memcg, struct mem_cgr
 }
 #endif
 
-static inline void reparent_locks(struct mem_cgroup *memcg, struct mem_cgroup *parent)
+static inline void reparent_locks(struct mem_cgroup *memcg, struct mem_cgroup *parent, int nid)
 {
 	spin_lock_irq(&objcg_lock);
+	spin_lock_nested(&mem_cgroup_lruvec(memcg, NODE_DATA(nid))->lru_lock, 1);
+	spin_lock_nested(&mem_cgroup_lruvec(parent, NODE_DATA(nid))->lru_lock, 2);
 }
 
-static inline void reparent_unlocks(struct mem_cgroup *memcg, struct mem_cgroup *parent)
+static inline void reparent_unlocks(struct mem_cgroup *memcg, struct mem_cgroup *parent, int nid)
 {
+	spin_unlock(&mem_cgroup_lruvec(parent, NODE_DATA(nid))->lru_lock);
+	spin_unlock(&mem_cgroup_lruvec(memcg, NODE_DATA(nid))->lru_lock);
 	spin_unlock_irq(&objcg_lock);
 }
 
@@ -271,14 +275,31 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg)
 	int nid;
 
 	for_each_node(nid) {
-		reparent_locks(memcg, parent);
+retry:
+		if (lru_gen_enabled())
+			max_lru_gen_memcg(parent, nid);
+
+		reparent_locks(memcg, parent, nid);
+
+		if (lru_gen_enabled()) {
+			if (!recheck_lru_gen_max_memcg(parent, nid)) {
+				reparent_unlocks(memcg, parent, nid);
+				cond_resched();
+				goto retry;
+			}
+			lru_gen_reparent_memcg(memcg, parent, nid);
+		} else {
+			lru_reparent_memcg(memcg, parent, nid);
+		}
 
 		objcg = __memcg_reparent_objcgs(memcg, parent, nid);
 
-		reparent_unlocks(memcg, parent);
+		reparent_unlocks(memcg, parent, nid);
 
 		percpu_ref_kill(&objcg->refcnt);
 	}
+
+	reparent_state_local(memcg, parent);
 }
 
 /*
@@ -823,6 +844,7 @@ static void __mod_memcg_state(struct mem_cgroup *memcg,
 	this_cpu_add(memcg->vmstats_percpu->state[i], val);
 	val = memcg_state_val_in_pages(idx, val);
 	memcg_rstat_updated(memcg, val, cpu);
+
 	trace_mod_memcg_state(memcg, idx, val);
 
 	put_cpu();
@@ -840,7 +862,9 @@ void mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx,
 	if (mem_cgroup_disabled())
 		return;
 
+	memcg = get_non_dying_memcg_start(memcg);
 	__mod_memcg_state(memcg, idx, val);
+	get_non_dying_memcg_end();
 }
 
 #ifdef CONFIG_MEMCG_V1
@@ -900,11 +924,17 @@ static void mod_memcg_lruvec_state(struct lruvec *lruvec,
 				     enum node_stat_item idx,
 				     int val)
 {
+	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 	struct mem_cgroup_per_node *pn;
+	struct mem_cgroup *memcg;
 
 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+	memcg = get_non_dying_memcg_start(pn->memcg);
+	pn = memcg->nodeinfo[pgdat->node_id];
 
 	__mod_memcg_lruvec_state(pn, idx, val);
+
+	get_non_dying_memcg_end();
 }
 
 /**
@@ -1127,6 +1157,8 @@ again:
 /**
  * get_mem_cgroup_from_folio - Obtain a reference on a given folio's memcg.
  * @folio: folio from which memcg should be extracted.
+ *
+ * See folio_memcg() for folio->objcg/memcg binding rules.
  */
 struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio)
 {
@@ -2722,17 +2754,17 @@ static inline int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	return try_charge_memcg(memcg, gfp_mask, nr_pages);
 }
 
-static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
+static void commit_charge(struct folio *folio, struct obj_cgroup *objcg)
 {
 	VM_BUG_ON_FOLIO(folio_memcg_charged(folio), folio);
 	/*
-	 * Any of the following ensures page's memcg stability:
+	 * Any of the following ensures folio's objcg stability:
 	 *
 	 * - the page lock
 	 * - LRU isolation
 	 * - exclusive reference
 	 */
-	folio->memcg_data = (unsigned long)memcg;
+	folio->memcg_data = (unsigned long)objcg;
 }
 
 #ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
@@ -2846,6 +2878,17 @@ static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
 	return NULL;
 }
 
+static inline struct obj_cgroup *get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
+{
+	struct obj_cgroup *objcg;
+
+	rcu_read_lock();
+	objcg = __get_obj_cgroup_from_memcg(memcg);
+	rcu_read_unlock();
+
+	return objcg;
+}
+
 static struct obj_cgroup *current_objcg_update(void)
 {
 	struct mem_cgroup *memcg;
@@ -2947,17 +2990,10 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
 {
 	struct obj_cgroup *objcg;
 
-	if (!memcg_kmem_online())
-		return NULL;
-
-	if (folio_memcg_kmem(folio)) {
-		objcg = __folio_objcg(folio);
+	objcg = folio_objcg(folio);
+	if (objcg)
 		obj_cgroup_get(objcg);
-	} else {
-		rcu_read_lock();
-		objcg = __get_obj_cgroup_from_memcg(__folio_memcg(folio));
-		rcu_read_unlock();
-	}
+
 	return objcg;
 }
 
@@ -3519,7 +3555,7 @@ void folio_split_memcg_refs(struct folio *folio, unsigned old_order,
 		return;
 
 	new_refs = (1 << (old_order - new_order)) - 1;
-	css_get_many(&__folio_memcg(folio)->css, new_refs);
+	obj_cgroup_get_many(folio_objcg(folio), new_refs);
 }
 
 static void memcg_online_kmem(struct mem_cgroup *memcg)
@@ -4955,16 +4991,20 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root,
 static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
 			gfp_t gfp)
 {
-	int ret;
-
-	ret = try_charge(memcg, gfp, folio_nr_pages(folio));
-	if (ret)
-		goto out;
+	int ret = 0;
+	struct obj_cgroup *objcg;
 
-	css_get(&memcg->css);
-	commit_charge(folio, memcg);
+	objcg = get_obj_cgroup_from_memcg(memcg);
+	/* Do not account at the root objcg level. */
+	if (!obj_cgroup_is_root(objcg))
+		ret = try_charge_memcg(memcg, gfp, folio_nr_pages(folio));
+	if (ret) {
+		obj_cgroup_put(objcg);
+		return ret;
+	}
+	commit_charge(folio, objcg);
 	memcg1_commit_charge(folio, memcg);
-out:
+
 	return ret;
 }
 
@@ -5050,7 +5090,7 @@ int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
 }
 
 struct uncharge_gather {
-	struct mem_cgroup *memcg;
+	struct obj_cgroup *objcg;
 	unsigned long nr_memory;
 	unsigned long pgpgout;
 	unsigned long nr_kmem;
@@ -5064,58 +5104,52 @@ static inline void uncharge_gather_clear(struct uncharge_gather *ug)
 
 static void uncharge_batch(const struct uncharge_gather *ug)
 {
+	struct mem_cgroup *memcg;
+
+	rcu_read_lock();
+	memcg = obj_cgroup_memcg(ug->objcg);
 	if (ug->nr_memory) {
-		memcg_uncharge(ug->memcg, ug->nr_memory);
+		memcg_uncharge(memcg, ug->nr_memory);
 		if (ug->nr_kmem) {
-			mod_memcg_state(ug->memcg, MEMCG_KMEM, -ug->nr_kmem);
-			memcg1_account_kmem(ug->memcg, -ug->nr_kmem);
+			mod_memcg_state(memcg, MEMCG_KMEM, -ug->nr_kmem);
+			memcg1_account_kmem(memcg, -ug->nr_kmem);
 		}
-		memcg1_oom_recover(ug->memcg);
+		memcg1_oom_recover(memcg);
 	}
 
-	memcg1_uncharge_batch(ug->memcg, ug->pgpgout, ug->nr_memory, ug->nid);
+	memcg1_uncharge_batch(memcg, ug->pgpgout, ug->nr_memory, ug->nid);
+	rcu_read_unlock();
 
 	/* drop reference from uncharge_folio */
-	css_put(&ug->memcg->css);
+	obj_cgroup_put(ug->objcg);
 }
 
 static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
 {
 	long nr_pages;
-	struct mem_cgroup *memcg;
 	struct obj_cgroup *objcg;
 
 	VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
 
 	/*
 	 * Nobody should be changing or seriously looking at
-	 * folio memcg or objcg at this point, we have fully
-	 * exclusive access to the folio.
+	 * folio objcg at this point, we have fully exclusive
+	 * access to the folio.
 	 */
-	if (folio_memcg_kmem(folio)) {
-		objcg = __folio_objcg(folio);
-		/*
-		 * This get matches the put at the end of the function and
-		 * kmem pages do not hold memcg references anymore.
-		 */
-		memcg = get_mem_cgroup_from_objcg(objcg);
-	} else {
-		memcg = __folio_memcg(folio);
-	}
-
-	if (!memcg)
+	objcg = folio_objcg(folio);
+	if (!objcg)
 		return;
 
-	if (ug->memcg != memcg) {
-		if (ug->memcg) {
+	if (ug->objcg != objcg) {
+		if (ug->objcg) {
 			uncharge_batch(ug);
 			uncharge_gather_clear(ug);
 		}
-		ug->memcg = memcg;
+		ug->objcg = objcg;
 		ug->nid = folio_nid(folio);
 
-		/* pairs with css_put in uncharge_batch */
-		css_get(&memcg->css);
+		/* pairs with obj_cgroup_put in uncharge_batch */
+		obj_cgroup_get(objcg);
 	}
 
 	nr_pages = folio_nr_pages(folio);
@@ -5123,20 +5157,17 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
 	if (folio_memcg_kmem(folio)) {
 		ug->nr_memory += nr_pages;
 		ug->nr_kmem += nr_pages;
-
-		folio->memcg_data = 0;
-		obj_cgroup_put(objcg);
 	} else {
 		/* LRU pages aren't accounted at the root level */
-		if (!mem_cgroup_is_root(memcg))
+		if (!obj_cgroup_is_root(objcg))
 			ug->nr_memory += nr_pages;
 		ug->pgpgout++;
 
 		WARN_ON_ONCE(folio_unqueue_deferred_split(folio));
-		folio->memcg_data = 0;
 	}
 
-	css_put(&memcg->css);
+	folio->memcg_data = 0;
+	obj_cgroup_put(objcg);
 }
 
 void __mem_cgroup_uncharge(struct folio *folio)
@@ -5160,7 +5191,7 @@ void __mem_cgroup_uncharge_folios(struct folio_batch *folios)
 	uncharge_gather_clear(&ug);
 	for (i = 0; i < folios->nr; i++)
 		uncharge_folio(folios->folios[i], &ug);
-	if (ug.memcg)
+	if (ug.objcg)
 		uncharge_batch(&ug);
 }
 
@@ -5177,6 +5208,7 @@ void __mem_cgroup_uncharge_folios(struct folio_batch *folios)
 void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
 {
 	struct mem_cgroup *memcg;
+	struct obj_cgroup *objcg;
 	long nr_pages = folio_nr_pages(new);
 
 	VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
@@ -5191,21 +5223,24 @@ void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
 	if (folio_memcg_charged(new))
 		return;
 
-	memcg = folio_memcg(old);
-	VM_WARN_ON_ONCE_FOLIO(!memcg, old);
-	if (!memcg)
+	objcg = folio_objcg(old);
+	VM_WARN_ON_ONCE_FOLIO(!objcg, old);
+	if (!objcg)
 		return;
 
+	rcu_read_lock();
+	memcg = obj_cgroup_memcg(objcg);
 	/* Force-charge the new page. The old one will be freed soon */
-	if (!mem_cgroup_is_root(memcg)) {
+	if (!obj_cgroup_is_root(objcg)) {
 		page_counter_charge(&memcg->memory, nr_pages);
 		if (do_memsw_account())
 			page_counter_charge(&memcg->memsw, nr_pages);
 	}
 
-	css_get(&memcg->css);
-	commit_charge(new, memcg);
+	obj_cgroup_get(objcg);
+	commit_charge(new, objcg);
 	memcg1_commit_charge(new, memcg);
+	rcu_read_unlock();
 }
 
 /**
@@ -5221,7 +5256,7 @@ void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
  */
 void mem_cgroup_migrate(struct folio *old, struct folio *new)
 {
-	struct mem_cgroup *memcg;
+	struct obj_cgroup *objcg;
 
 	VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
 	VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
@@ -5232,18 +5267,18 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
 	if (mem_cgroup_disabled())
 		return;
 
-	memcg = folio_memcg(old);
+	objcg = folio_objcg(old);
 	/*
-	 * Note that it is normal to see !memcg for a hugetlb folio.
+	 * Note that it is normal to see !objcg for a hugetlb folio.
 	 * For e.g, it could have been allocated when memory_hugetlb_accounting
 	 * was not selected.
 	 */
-	VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(old) && !memcg, old);
-	if (!memcg)
+	VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(old) && !objcg, old);
+	if (!objcg)
 		return;
 
-	/* Transfer the charge and the css ref */
-	commit_charge(new, memcg);
+	/* Transfer the charge and the objcg ref */
+	commit_charge(new, objcg);
 
 	/* Warning should never happen, so don't worry about refcount non-0 */
 	WARN_ON_ONCE(folio_unqueue_deferred_split(old));
@@ -5426,22 +5461,27 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry)
 	unsigned int nr_pages = folio_nr_pages(folio);
 	struct page_counter *counter;
 	struct mem_cgroup *memcg;
+	struct obj_cgroup *objcg;
 
 	if (do_memsw_account())
 		return 0;
 
-	memcg = folio_memcg(folio);
-
-	VM_WARN_ON_ONCE_FOLIO(!memcg, folio);
-	if (!memcg)
+	objcg = folio_objcg(folio);
+	VM_WARN_ON_ONCE_FOLIO(!objcg, folio);
+	if (!objcg)
 		return 0;
 
+	rcu_read_lock();
+	memcg = obj_cgroup_memcg(objcg);
 	if (!entry.val) {
 		memcg_memory_event(memcg, MEMCG_SWAP_FAIL);
+		rcu_read_unlock();
 		return 0;
 	}
 
 	memcg = mem_cgroup_private_id_get_online(memcg, nr_pages);
+	/* memcg is pined by memcg ID. */
+	rcu_read_unlock();
 
 	if (!mem_cgroup_is_root(memcg) &&
 	    !page_counter_try_charge(&memcg->swap, nr_pages, &counter)) {
-- 
cgit v1.2.3


From 0a98e13963424d7f1f50211c692f46a3b1e8d03f Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Thu, 5 Mar 2026 19:52:51 +0800
Subject: mm: lru: add VM_WARN_ON_ONCE_FOLIO to lru maintenance helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We must ensure the folio is deleted from or added to the correct lruvec
list.  So, add VM_WARN_ON_ONCE_FOLIO() to catch invalid users.  The
VM_BUG_ON_PAGE() in move_pages_to_lru() can be removed as
add_page_to_lru_list() will perform the necessary check.

Link: https://lore.kernel.org/2c90fc006d9d730331a3caeef96f7e5dabe2036d.1772711148.git.zhengqi.arch@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yosry Ahmed <yosry@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_inline.h | 6 ++++++
 mm/vmscan.c               | 1 -
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 7fc2ced00f8f..a171070e15f0 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -348,6 +348,8 @@ void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
 {
 	enum lru_list lru = folio_lru_list(folio);
 
+	VM_WARN_ON_ONCE_FOLIO(!folio_matches_lruvec(folio, lruvec), folio);
+
 	if (lru_gen_add_folio(lruvec, folio, false))
 		return;
 
@@ -362,6 +364,8 @@ void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
 {
 	enum lru_list lru = folio_lru_list(folio);
 
+	VM_WARN_ON_ONCE_FOLIO(!folio_matches_lruvec(folio, lruvec), folio);
+
 	if (lru_gen_add_folio(lruvec, folio, true))
 		return;
 
@@ -376,6 +380,8 @@ void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
 {
 	enum lru_list lru = folio_lru_list(folio);
 
+	VM_WARN_ON_ONCE_FOLIO(!folio_matches_lruvec(folio, lruvec), folio);
+
 	if (lru_gen_del_folio(lruvec, folio, false))
 		return;
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1ac4f959ec1c..fd120e898c70 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1916,7 +1916,6 @@ static unsigned int move_folios_to_lru(struct list_head *list)
 			continue;
 		}
 
-		VM_BUG_ON_FOLIO(!folio_matches_lruvec(folio, lruvec), folio);
 		lruvec_add_folio(lruvec, folio);
 		nr_pages = folio_nr_pages(folio);
 		nr_moved += nr_pages;
-- 
cgit v1.2.3


From 1c514a2c6e4c3bf2016a1dbbddc36d19fdf52ce5 Mon Sep 17 00:00:00 2001
From: Qi Zheng <zhengqi.arch@bytedance.com>
Date: Fri, 27 Mar 2026 18:16:30 +0800
Subject: mm: memcontrol: correct the nr_pages parameter type of
 mem_cgroup_update_lru_size()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The nr_pages parameter of mem_cgroup_update_lru_size() represents a page
count.  During the reparenting of LRU folios, the value passed to it can
potentially exceed the maximum value of a 32-bit integer.  It should be
declared as long instead of int to match the types used in lruvec size
accounting and to prevent possible overflow.

Update the parameter type to long to ensure correctness.

Link: https://lore.kernel.org/fd4140de44fa0a3978e4e2426731187fe8625f0b.1774604356.git.zhengqi.arch@bytedance.com
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 +-
 mm/memcontrol.c            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 086158969529..dc3fa687759b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -878,7 +878,7 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
 }
 
 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
-		int zid, int nr_pages);
+		int zid, long nr_pages);
 
 static inline
 unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 685e6dd48ce5..c3d98ab41f1f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1472,7 +1472,7 @@ retry:
  * to or just after a page is removed from an lru list.
  */
 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
-				int zid, int nr_pages)
+				int zid, long nr_pages)
 {
 	struct mem_cgroup_per_node *mz;
 	unsigned long *lru_size;
@@ -1489,7 +1489,7 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 
 	size = *lru_size;
 	if (WARN_ONCE(size < 0,
-		"%s(%p, %d, %d): lru_size %ld\n",
+		"%s(%p, %d, %ld): lru_size %ld\n",
 		__func__, lruvec, lru, nr_pages, size)) {
 		VM_BUG_ON(1);
 		*lru_size = 0;
-- 
cgit v1.2.3


From d9e4142e7635f6f7173854667c0695ce5b836bbc Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Mon, 16 Mar 2026 04:54:31 -0700
Subject: kho: add size parameter to kho_add_subtree()

Patch series "kho: history: track previous kernel version and kexec boot
count", v9.

Use Kexec Handover (KHO) to pass the previous kernel's version string and
the number of kexec reboots since the last cold boot to the next kernel,
and print it at boot time.

Example
=======
	[    0.000000] Linux version 6.19.0-rc3-upstream-00047-ge5d992347849
	...
	[    0.000000] KHO: exec from: 6.19.0-rc4-next-20260107upstream-00004-g3071b0dc4498 (count 1)

Motivation
==========

Bugs that only reproduce when kexecing from specific kernel versions are
difficult to diagnose.  These issues occur when a buggy kernel kexecs into
a new kernel, with the bug manifesting only in the second kernel.

Recent examples include:

 * eb2266312507 ("x86/boot: Fix page table access in 5-level to 4-level paging transition")
 * 77d48d39e991 ("efistub/tpm: Use ACPI reclaim memory for event log to avoid corruption")
 * 64b45dd46e15 ("x86/efi: skip memattr table on kexec boot")

As kexec-based reboots become more common, these version-dependent bugs
are appearing more frequently.  At scale, correlating crashes to the
previous kernel version is challenging, especially when issues only occur
in specific transition scenarios.

Some bugs manifest only after multiple consecutive kexec reboots.
Tracking the kexec count helps identify these cases (this metric is
already used by live update sub-system).

KHO provides a reliable mechanism to pass information between kernels.  By
carrying the previous kernel's release string and kexec count forward, we
can print this context at boot time to aid debugging.

The goal of this feature is to have this information being printed in
early boot, so, users can trace back kernel releases in kexec.  Systemd is
not helpful because we cannot assume that the previous kernel has systemd
or even write access to the disk (common when using Linux as bootloaders)


This patch (of 6):

kho_add_subtree() assumes the fdt argument is always an FDT and calls
fdt_totalsize() on it in the debugfs code path.  This assumption will
break if a caller passes arbitrary data instead of an FDT.

When CONFIG_KEXEC_HANDOVER_DEBUGFS is enabled, kho_debugfs_fdt_add() calls
__kho_debugfs_fdt_add(), which executes:

    f->wrapper.size = fdt_totalsize(fdt);

Fix this by adding an explicit size parameter to kho_add_subtree() so
callers specify the blob size.  This allows subtrees to contain arbitrary
data formats, not just FDTs.  Update all callers:

  - memblock.c: use fdt_totalsize(fdt)
  - luo_core.c: use fdt_totalsize(fdt_out)
  - test_kho.c: use fdt_totalsize()
  - kexec_handover.c (root fdt): use fdt_totalsize(kho_out.fdt)

Also update __kho_debugfs_fdt_add() to receive the size explicitly instead
of computing it internally via fdt_totalsize().  In kho_in_debugfs_init(),
pass fdt_totalsize() for the root FDT and sub-blobs since all current
users are FDTs.  A subsequent patch will persist the size in the KHO FDT
so the incoming side can handle non-FDT blobs correctly.

Link: https://lore.kernel.org/20260323110747.193569-1-duanchenghao@kylinos.cn
Link: https://lore.kernel.org/20260316-kho-v9-1-ed6dcd951988@debian.org
Signed-off-by: Breno Leitao <leitao@debian.org>
Suggested-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec_handover.h              |  4 ++--
 kernel/liveupdate/kexec_handover.c          |  8 +++++---
 kernel/liveupdate/kexec_handover_debugfs.c  | 15 +++++++++------
 kernel/liveupdate/kexec_handover_internal.h |  5 +++--
 kernel/liveupdate/luo_core.c                |  3 ++-
 lib/test_kho.c                              |  3 ++-
 mm/memblock.c                               |  2 +-
 7 files changed, 24 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index ac4129d1d741..abb1d324f42d 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -32,7 +32,7 @@ void kho_restore_free(void *mem);
 struct folio *kho_restore_folio(phys_addr_t phys);
 struct page *kho_restore_pages(phys_addr_t phys, unsigned long nr_pages);
 void *kho_restore_vmalloc(const struct kho_vmalloc *preservation);
-int kho_add_subtree(const char *name, void *fdt);
+int kho_add_subtree(const char *name, void *fdt, size_t size);
 void kho_remove_subtree(void *fdt);
 int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
 
@@ -97,7 +97,7 @@ static inline void *kho_restore_vmalloc(const struct kho_vmalloc *preservation)
 	return NULL;
 }
 
-static inline int kho_add_subtree(const char *name, void *fdt)
+static inline int kho_add_subtree(const char *name, void *fdt, size_t size)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 532f455c5d4f..8cc25e29ff91 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -727,6 +727,7 @@ err_disable_kho:
  * kho_add_subtree - record the physical address of a sub FDT in KHO root tree.
  * @name: name of the sub tree.
  * @fdt: the sub tree blob.
+ * @size: size of the blob in bytes.
  *
  * Creates a new child node named @name in KHO root FDT and records
  * the physical address of @fdt. The pages of @fdt must also be preserved
@@ -738,7 +739,7 @@ err_disable_kho:
  *
  * Return: 0 on success, error code on failure
  */
-int kho_add_subtree(const char *name, void *fdt)
+int kho_add_subtree(const char *name, void *fdt, size_t size)
 {
 	phys_addr_t phys = virt_to_phys(fdt);
 	void *root_fdt = kho_out.fdt;
@@ -763,7 +764,7 @@ int kho_add_subtree(const char *name, void *fdt)
 	if (err < 0)
 		goto out_pack;
 
-	WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, name, fdt, false));
+	WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, name, fdt, size, false));
 
 out_pack:
 	fdt_pack(root_fdt);
@@ -1431,7 +1432,8 @@ static __init int kho_init(void)
 	}
 
 	WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, "fdt",
-					 kho_out.fdt, true));
+					 kho_out.fdt,
+					 fdt_totalsize(kho_out.fdt), true));
 
 	return 0;
 
diff --git a/kernel/liveupdate/kexec_handover_debugfs.c b/kernel/liveupdate/kexec_handover_debugfs.c
index acf368222682..ca0153736af1 100644
--- a/kernel/liveupdate/kexec_handover_debugfs.c
+++ b/kernel/liveupdate/kexec_handover_debugfs.c
@@ -25,7 +25,7 @@ struct fdt_debugfs {
 };
 
 static int __kho_debugfs_fdt_add(struct list_head *list, struct dentry *dir,
-				 const char *name, const void *fdt)
+				 const char *name, const void *fdt, size_t size)
 {
 	struct fdt_debugfs *f;
 	struct dentry *file;
@@ -35,7 +35,7 @@ static int __kho_debugfs_fdt_add(struct list_head *list, struct dentry *dir,
 		return -ENOMEM;
 
 	f->wrapper.data = (void *)fdt;
-	f->wrapper.size = fdt_totalsize(fdt);
+	f->wrapper.size = size;
 
 	file = debugfs_create_blob(name, 0400, dir, &f->wrapper);
 	if (IS_ERR(file)) {
@@ -50,7 +50,7 @@ static int __kho_debugfs_fdt_add(struct list_head *list, struct dentry *dir,
 }
 
 int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
-			const void *fdt, bool root)
+			const void *fdt, size_t size, bool root)
 {
 	struct dentry *dir;
 
@@ -59,7 +59,7 @@ int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
 	else
 		dir = dbg->sub_fdt_dir;
 
-	return __kho_debugfs_fdt_add(&dbg->fdt_list, dir, name, fdt);
+	return __kho_debugfs_fdt_add(&dbg->fdt_list, dir, name, fdt, size);
 }
 
 void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt)
@@ -113,7 +113,8 @@ __init void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt)
 		goto err_rmdir;
 	}
 
-	err = __kho_debugfs_fdt_add(&dbg->fdt_list, dir, "fdt", fdt);
+	err = __kho_debugfs_fdt_add(&dbg->fdt_list, dir, "fdt", fdt,
+				    fdt_totalsize(fdt));
 	if (err)
 		goto err_rmdir;
 
@@ -121,6 +122,7 @@ __init void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt)
 		int len = 0;
 		const char *name = fdt_get_name(fdt, child, NULL);
 		const u64 *fdt_phys;
+		void *sub_fdt;
 
 		fdt_phys = fdt_getprop(fdt, child, KHO_FDT_SUB_TREE_PROP_NAME, &len);
 		if (!fdt_phys)
@@ -130,8 +132,9 @@ __init void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt)
 				name, len);
 			continue;
 		}
+		sub_fdt = phys_to_virt(*fdt_phys);
 		err = __kho_debugfs_fdt_add(&dbg->fdt_list, sub_fdt_dir, name,
-					    phys_to_virt(*fdt_phys));
+					    sub_fdt, fdt_totalsize(sub_fdt));
 		if (err) {
 			pr_warn("failed to add fdt %s to debugfs: %pe\n", name,
 				ERR_PTR(err));
diff --git a/kernel/liveupdate/kexec_handover_internal.h b/kernel/liveupdate/kexec_handover_internal.h
index 9a832a35254c..2a28cb8db9b0 100644
--- a/kernel/liveupdate/kexec_handover_internal.h
+++ b/kernel/liveupdate/kexec_handover_internal.h
@@ -27,7 +27,7 @@ int kho_debugfs_init(void);
 void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt);
 int kho_out_debugfs_init(struct kho_debugfs *dbg);
 int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
-			const void *fdt, bool root);
+			const void *fdt, size_t size, bool root);
 void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt);
 #else
 static inline int kho_debugfs_init(void) { return 0; }
@@ -35,7 +35,8 @@ static inline void kho_in_debugfs_init(struct kho_debugfs *dbg,
 				       const void *fdt) { }
 static inline int kho_out_debugfs_init(struct kho_debugfs *dbg) { return 0; }
 static inline int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
-				      const void *fdt, bool root) { return 0; }
+				      const void *fdt, size_t size,
+				      bool root) { return 0; }
 static inline void kho_debugfs_fdt_remove(struct kho_debugfs *dbg,
 					  void *fdt) { }
 #endif /* CONFIG_KEXEC_HANDOVER_DEBUGFS */
diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c
index 84ac728d63ba..04d06a0906c0 100644
--- a/kernel/liveupdate/luo_core.c
+++ b/kernel/liveupdate/luo_core.c
@@ -172,7 +172,8 @@ static int __init luo_fdt_setup(void)
 	if (err)
 		goto exit_free;
 
-	err = kho_add_subtree(LUO_FDT_KHO_ENTRY_NAME, fdt_out);
+	err = kho_add_subtree(LUO_FDT_KHO_ENTRY_NAME, fdt_out,
+			      fdt_totalsize(fdt_out));
 	if (err)
 		goto exit_free;
 	luo_global.fdt_out = fdt_out;
diff --git a/lib/test_kho.c b/lib/test_kho.c
index 7ef9e4061869..263182437315 100644
--- a/lib/test_kho.c
+++ b/lib/test_kho.c
@@ -143,7 +143,8 @@ static int kho_test_preserve(struct kho_test_state *state)
 	if (err)
 		goto err_unpreserve_data;
 
-	err = kho_add_subtree(KHO_TEST_FDT, folio_address(state->fdt));
+	err = kho_add_subtree(KHO_TEST_FDT, folio_address(state->fdt),
+			      fdt_totalsize(folio_address(state->fdt)));
 	if (err)
 		goto err_unpreserve_data;
 
diff --git a/mm/memblock.c b/mm/memblock.c
index b3ddfdec7a80..91d4162eec63 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -2510,7 +2510,7 @@ static int __init prepare_kho_fdt(void)
 	if (err)
 		goto err_unpreserve_fdt;
 
-	err = kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
+	err = kho_add_subtree(MEMBLOCK_KHO_FDT, fdt, fdt_totalsize(fdt));
 	if (err)
 		goto err_unpreserve_fdt;
 
-- 
cgit v1.2.3


From 4916ae386760ad666eafa8afc075957bf479afbc Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Mon, 16 Mar 2026 04:54:32 -0700
Subject: kho: rename fdt parameter to blob in kho_add/remove_subtree()

Since kho_add_subtree() now accepts arbitrary data blobs (not just FDTs),
rename the parameter from 'fdt' to 'blob' to better reflect its purpose.
Apply the same rename to kho_remove_subtree() for consistency.

Also rename kho_debugfs_fdt_add() and kho_debugfs_fdt_remove() to
kho_debugfs_blob_add() and kho_debugfs_blob_remove() respectively, with
the same parameter rename from 'fdt' to 'blob'.

Link: https://lore.kernel.org/20260316-kho-v9-2-ed6dcd951988@debian.org
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/mm/kho.rst        |  2 +-
 include/linux/kexec_handover.h              |  8 +++----
 kernel/liveupdate/kexec_handover.c          | 33 +++++++++++++++--------------
 kernel/liveupdate/kexec_handover_debugfs.c  | 25 +++++++++++-----------
 kernel/liveupdate/kexec_handover_internal.h | 16 +++++++-------
 5 files changed, 43 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/mm/kho.rst b/Documentation/admin-guide/mm/kho.rst
index cb9a20f64920..6a4ddf344046 100644
--- a/Documentation/admin-guide/mm/kho.rst
+++ b/Documentation/admin-guide/mm/kho.rst
@@ -80,5 +80,5 @@ stabilized.
     it finished to interpret their metadata.
 
 ``/sys/kernel/debug/kho/in/sub_fdts/``
-    Similar to ``kho/out/sub_fdts/``, but contains sub FDT blobs
+    Similar to ``kho/out/sub_fdts/``, but contains sub blobs
     of KHO producers passed from the old kernel.
diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index abb1d324f42d..0666cf298c7f 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -32,8 +32,8 @@ void kho_restore_free(void *mem);
 struct folio *kho_restore_folio(phys_addr_t phys);
 struct page *kho_restore_pages(phys_addr_t phys, unsigned long nr_pages);
 void *kho_restore_vmalloc(const struct kho_vmalloc *preservation);
-int kho_add_subtree(const char *name, void *fdt, size_t size);
-void kho_remove_subtree(void *fdt);
+int kho_add_subtree(const char *name, void *blob, size_t size);
+void kho_remove_subtree(void *blob);
 int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
 
 void kho_memory_init(void);
@@ -97,12 +97,12 @@ static inline void *kho_restore_vmalloc(const struct kho_vmalloc *preservation)
 	return NULL;
 }
 
-static inline int kho_add_subtree(const char *name, void *fdt, size_t size)
+static inline int kho_add_subtree(const char *name, void *blob, size_t size)
 {
 	return -EOPNOTSUPP;
 }
 
-static inline void kho_remove_subtree(void *fdt) { }
+static inline void kho_remove_subtree(void *blob) { }
 
 static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
 {
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 8cc25e29ff91..711b6c3376e7 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -724,13 +724,13 @@ err_disable_kho:
 }
 
 /**
- * kho_add_subtree - record the physical address of a sub FDT in KHO root tree.
+ * kho_add_subtree - record the physical address of a sub blob in KHO root tree.
  * @name: name of the sub tree.
- * @fdt: the sub tree blob.
+ * @blob: the sub tree blob.
  * @size: size of the blob in bytes.
  *
  * Creates a new child node named @name in KHO root FDT and records
- * the physical address of @fdt. The pages of @fdt must also be preserved
+ * the physical address of @blob. The pages of @blob must also be preserved
  * by KHO for the new kernel to retrieve it after kexec.
  *
  * A debugfs blob entry is also created at
@@ -739,9 +739,9 @@ err_disable_kho:
  *
  * Return: 0 on success, error code on failure
  */
-int kho_add_subtree(const char *name, void *fdt, size_t size)
+int kho_add_subtree(const char *name, void *blob, size_t size)
 {
-	phys_addr_t phys = virt_to_phys(fdt);
+	phys_addr_t phys = virt_to_phys(blob);
 	void *root_fdt = kho_out.fdt;
 	int err = -ENOMEM;
 	int off, fdt_err;
@@ -764,7 +764,8 @@ int kho_add_subtree(const char *name, void *fdt, size_t size)
 	if (err < 0)
 		goto out_pack;
 
-	WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, name, fdt, size, false));
+	WARN_ON_ONCE(kho_debugfs_blob_add(&kho_out.dbg, name, blob,
+					  size, false));
 
 out_pack:
 	fdt_pack(root_fdt);
@@ -773,9 +774,9 @@ out_pack:
 }
 EXPORT_SYMBOL_GPL(kho_add_subtree);
 
-void kho_remove_subtree(void *fdt)
+void kho_remove_subtree(void *blob)
 {
-	phys_addr_t target_phys = virt_to_phys(fdt);
+	phys_addr_t target_phys = virt_to_phys(blob);
 	void *root_fdt = kho_out.fdt;
 	int off;
 	int err;
@@ -797,7 +798,7 @@ void kho_remove_subtree(void *fdt)
 
 		if ((phys_addr_t)*val == target_phys) {
 			fdt_del_node(root_fdt, off);
-			kho_debugfs_fdt_remove(&kho_out.dbg, fdt);
+			kho_debugfs_blob_remove(&kho_out.dbg, blob);
 			break;
 		}
 	}
@@ -1293,11 +1294,11 @@ bool is_kho_boot(void)
 EXPORT_SYMBOL_GPL(is_kho_boot);
 
 /**
- * kho_retrieve_subtree - retrieve a preserved sub FDT by its name.
- * @name: the name of the sub FDT passed to kho_add_subtree().
- * @phys: if found, the physical address of the sub FDT is stored in @phys.
+ * kho_retrieve_subtree - retrieve a preserved sub blob by its name.
+ * @name: the name of the sub blob passed to kho_add_subtree().
+ * @phys: if found, the physical address of the sub blob is stored in @phys.
  *
- * Retrieve a preserved sub FDT named @name and store its physical
+ * Retrieve a preserved sub blob named @name and store its physical
  * address in @phys.
  *
  * Return: 0 on success, error code on failure
@@ -1431,9 +1432,9 @@ static __init int kho_init(void)
 			init_cma_reserved_pageblock(pfn_to_page(pfn));
 	}
 
-	WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, "fdt",
-					 kho_out.fdt,
-					 fdt_totalsize(kho_out.fdt), true));
+	WARN_ON_ONCE(kho_debugfs_blob_add(&kho_out.dbg, "fdt",
+					  kho_out.fdt,
+					  fdt_totalsize(kho_out.fdt), true));
 
 	return 0;
 
diff --git a/kernel/liveupdate/kexec_handover_debugfs.c b/kernel/liveupdate/kexec_handover_debugfs.c
index ca0153736af1..cab923e4f5c8 100644
--- a/kernel/liveupdate/kexec_handover_debugfs.c
+++ b/kernel/liveupdate/kexec_handover_debugfs.c
@@ -24,8 +24,9 @@ struct fdt_debugfs {
 	struct dentry *file;
 };
 
-static int __kho_debugfs_fdt_add(struct list_head *list, struct dentry *dir,
-				 const char *name, const void *fdt, size_t size)
+static int __kho_debugfs_blob_add(struct list_head *list, struct dentry *dir,
+				  const char *name, const void *blob,
+				  size_t size)
 {
 	struct fdt_debugfs *f;
 	struct dentry *file;
@@ -34,7 +35,7 @@ static int __kho_debugfs_fdt_add(struct list_head *list, struct dentry *dir,
 	if (!f)
 		return -ENOMEM;
 
-	f->wrapper.data = (void *)fdt;
+	f->wrapper.data = (void *)blob;
 	f->wrapper.size = size;
 
 	file = debugfs_create_blob(name, 0400, dir, &f->wrapper);
@@ -49,8 +50,8 @@ static int __kho_debugfs_fdt_add(struct list_head *list, struct dentry *dir,
 	return 0;
 }
 
-int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
-			const void *fdt, size_t size, bool root)
+int kho_debugfs_blob_add(struct kho_debugfs *dbg, const char *name,
+			 const void *blob, size_t size, bool root)
 {
 	struct dentry *dir;
 
@@ -59,15 +60,15 @@ int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
 	else
 		dir = dbg->sub_fdt_dir;
 
-	return __kho_debugfs_fdt_add(&dbg->fdt_list, dir, name, fdt, size);
+	return __kho_debugfs_blob_add(&dbg->fdt_list, dir, name, blob, size);
 }
 
-void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt)
+void kho_debugfs_blob_remove(struct kho_debugfs *dbg, void *blob)
 {
 	struct fdt_debugfs *ff;
 
 	list_for_each_entry(ff, &dbg->fdt_list, list) {
-		if (ff->wrapper.data == fdt) {
+		if (ff->wrapper.data == blob) {
 			debugfs_remove(ff->file);
 			list_del(&ff->list);
 			kfree(ff);
@@ -113,8 +114,8 @@ __init void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt)
 		goto err_rmdir;
 	}
 
-	err = __kho_debugfs_fdt_add(&dbg->fdt_list, dir, "fdt", fdt,
-				    fdt_totalsize(fdt));
+	err = __kho_debugfs_blob_add(&dbg->fdt_list, dir, "fdt", fdt,
+				     fdt_totalsize(fdt));
 	if (err)
 		goto err_rmdir;
 
@@ -133,8 +134,8 @@ __init void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt)
 			continue;
 		}
 		sub_fdt = phys_to_virt(*fdt_phys);
-		err = __kho_debugfs_fdt_add(&dbg->fdt_list, sub_fdt_dir, name,
-					    sub_fdt, fdt_totalsize(sub_fdt));
+		err = __kho_debugfs_blob_add(&dbg->fdt_list, sub_fdt_dir, name,
+					     sub_fdt, fdt_totalsize(sub_fdt));
 		if (err) {
 			pr_warn("failed to add fdt %s to debugfs: %pe\n", name,
 				ERR_PTR(err));
diff --git a/kernel/liveupdate/kexec_handover_internal.h b/kernel/liveupdate/kexec_handover_internal.h
index 2a28cb8db9b0..0399ff107775 100644
--- a/kernel/liveupdate/kexec_handover_internal.h
+++ b/kernel/liveupdate/kexec_handover_internal.h
@@ -26,19 +26,19 @@ extern unsigned int kho_scratch_cnt;
 int kho_debugfs_init(void);
 void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt);
 int kho_out_debugfs_init(struct kho_debugfs *dbg);
-int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
-			const void *fdt, size_t size, bool root);
-void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt);
+int kho_debugfs_blob_add(struct kho_debugfs *dbg, const char *name,
+			 const void *blob, size_t size, bool root);
+void kho_debugfs_blob_remove(struct kho_debugfs *dbg, void *blob);
 #else
 static inline int kho_debugfs_init(void) { return 0; }
 static inline void kho_in_debugfs_init(struct kho_debugfs *dbg,
 				       const void *fdt) { }
 static inline int kho_out_debugfs_init(struct kho_debugfs *dbg) { return 0; }
-static inline int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
-				      const void *fdt, size_t size,
-				      bool root) { return 0; }
-static inline void kho_debugfs_fdt_remove(struct kho_debugfs *dbg,
-					  void *fdt) { }
+static inline int kho_debugfs_blob_add(struct kho_debugfs *dbg,
+				       const char *name, const void *blob,
+				       size_t size, bool root) { return 0; }
+static inline void kho_debugfs_blob_remove(struct kho_debugfs *dbg,
+					   void *blob) { }
 #endif /* CONFIG_KEXEC_HANDOVER_DEBUGFS */
 
 #ifdef CONFIG_KEXEC_HANDOVER_DEBUG
-- 
cgit v1.2.3


From 85e41392820fcf0f7a3f9784cea907905f921358 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Mon, 16 Mar 2026 04:54:33 -0700
Subject: kho: persist blob size in KHO FDT

kho_add_subtree() accepts a size parameter but only forwards it to
debugfs.  The size is not persisted in the KHO FDT, so it is lost across
kexec.  This makes it impossible for the incoming kernel to determine the
blob size without understanding the blob format.

Store the blob size as a "blob-size" property in the KHO FDT alongside the
"preserved-data" physical address.  This allows the receiving kernel to
recover the size for any blob regardless of format.

Also extend kho_retrieve_subtree() with an optional size output parameter
so callers can learn the blob size without needing to understand the blob
format.  Update all callers to pass NULL for the new parameter.

Link: https://lore.kernel.org/20260316-kho-v9-3-ed6dcd951988@debian.org
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec_handover.h             |  5 +++--
 include/linux/kho/abi/kexec_handover.h     | 20 ++++++++++++++++----
 kernel/liveupdate/kexec_handover.c         | 27 ++++++++++++++++++++++-----
 kernel/liveupdate/kexec_handover_debugfs.c |  3 ++-
 kernel/liveupdate/luo_core.c               |  2 +-
 lib/test_kho.c                             |  2 +-
 mm/memblock.c                              |  2 +-
 7 files changed, 46 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index 0666cf298c7f..8968c56d2d73 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -34,7 +34,7 @@ struct page *kho_restore_pages(phys_addr_t phys, unsigned long nr_pages);
 void *kho_restore_vmalloc(const struct kho_vmalloc *preservation);
 int kho_add_subtree(const char *name, void *blob, size_t size);
 void kho_remove_subtree(void *blob);
-int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
+int kho_retrieve_subtree(const char *name, phys_addr_t *phys, size_t *size);
 
 void kho_memory_init(void);
 
@@ -104,7 +104,8 @@ static inline int kho_add_subtree(const char *name, void *blob, size_t size)
 
 static inline void kho_remove_subtree(void *blob) { }
 
-static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
+static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys,
+				       size_t *size)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/include/linux/kho/abi/kexec_handover.h b/include/linux/kho/abi/kexec_handover.h
index 6b7d8ef550f9..7e847a2339b0 100644
--- a/include/linux/kho/abi/kexec_handover.h
+++ b/include/linux/kho/abi/kexec_handover.h
@@ -41,25 +41,28 @@
  *   restore the preserved data.::
  *
  *     / {
- *         compatible = "kho-v2";
+ *         compatible = "kho-v3";
  *
  *         preserved-memory-map = <0x...>;
  *
  *         <subnode-name-1> {
  *             preserved-data = <0x...>;
+ *             blob-size = <0x...>;
  *         };
  *
  *         <subnode-name-2> {
  *             preserved-data = <0x...>;
+ *             blob-size = <0x...>;
  *         };
  *               ... ...
  *         <subnode-name-N> {
  *             preserved-data = <0x...>;
+ *             blob-size = <0x...>;
  *         };
  *     };
  *
  *   Root KHO Node (/):
- *     - compatible: "kho-v2"
+ *     - compatible: "kho-v3"
  *
  *       Indentifies the overall KHO ABI version.
  *
@@ -78,16 +81,25 @@
  *
  *       Physical address pointing to a subnode data blob that is also
  *       being preserved.
+ *
+ *     - blob-size: u64
+ *
+ *       Size in bytes of the preserved data blob. This is needed because
+ *       blobs may use arbitrary formats (not just FDT), so the size
+ *       cannot be determined from the blob content alone.
  */
 
 /* The compatible string for the KHO FDT root node. */
-#define KHO_FDT_COMPATIBLE "kho-v2"
+#define KHO_FDT_COMPATIBLE "kho-v3"
 
 /* The FDT property for the preserved memory map. */
 #define KHO_FDT_MEMORY_MAP_PROP_NAME "preserved-memory-map"
 
 /* The FDT property for preserved data blobs. */
-#define KHO_FDT_SUB_TREE_PROP_NAME "preserved-data"
+#define KHO_SUB_TREE_PROP_NAME "preserved-data"
+
+/* The FDT property for the size of preserved data blobs. */
+#define KHO_SUB_TREE_SIZE_PROP_NAME "blob-size"
 
 /**
  * DOC: Kexec Handover ABI for vmalloc Preservation
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 711b6c3376e7..adf6541f70f9 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -743,6 +743,7 @@ int kho_add_subtree(const char *name, void *blob, size_t size)
 {
 	phys_addr_t phys = virt_to_phys(blob);
 	void *root_fdt = kho_out.fdt;
+	u64 size_u64 = size;
 	int err = -ENOMEM;
 	int off, fdt_err;
 
@@ -759,11 +760,16 @@ int kho_add_subtree(const char *name, void *blob, size_t size)
 		goto out_pack;
 	}
 
-	err = fdt_setprop(root_fdt, off, KHO_FDT_SUB_TREE_PROP_NAME,
+	err = fdt_setprop(root_fdt, off, KHO_SUB_TREE_PROP_NAME,
 			  &phys, sizeof(phys));
 	if (err < 0)
 		goto out_pack;
 
+	err = fdt_setprop(root_fdt, off, KHO_SUB_TREE_SIZE_PROP_NAME,
+			  &size_u64, sizeof(size_u64));
+	if (err < 0)
+		goto out_pack;
+
 	WARN_ON_ONCE(kho_debugfs_blob_add(&kho_out.dbg, name, blob,
 					  size, false));
 
@@ -792,7 +798,7 @@ void kho_remove_subtree(void *blob)
 		const u64 *val;
 		int len;
 
-		val = fdt_getprop(root_fdt, off, KHO_FDT_SUB_TREE_PROP_NAME, &len);
+		val = fdt_getprop(root_fdt, off, KHO_SUB_TREE_PROP_NAME, &len);
 		if (!val || len != sizeof(phys_addr_t))
 			continue;
 
@@ -1297,13 +1303,14 @@ EXPORT_SYMBOL_GPL(is_kho_boot);
  * kho_retrieve_subtree - retrieve a preserved sub blob by its name.
  * @name: the name of the sub blob passed to kho_add_subtree().
  * @phys: if found, the physical address of the sub blob is stored in @phys.
+ * @size: if not NULL and found, the size of the sub blob is stored in @size.
  *
  * Retrieve a preserved sub blob named @name and store its physical
- * address in @phys.
+ * address in @phys and optionally its size in @size.
  *
  * Return: 0 on success, error code on failure
  */
-int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
+int kho_retrieve_subtree(const char *name, phys_addr_t *phys, size_t *size)
 {
 	const void *fdt = kho_get_fdt();
 	const u64 *val;
@@ -1319,12 +1326,22 @@ int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
 	if (offset < 0)
 		return -ENOENT;
 
-	val = fdt_getprop(fdt, offset, KHO_FDT_SUB_TREE_PROP_NAME, &len);
+	val = fdt_getprop(fdt, offset, KHO_SUB_TREE_PROP_NAME, &len);
 	if (!val || len != sizeof(*val))
 		return -EINVAL;
 
 	*phys = (phys_addr_t)*val;
 
+	val = fdt_getprop(fdt, offset, KHO_SUB_TREE_SIZE_PROP_NAME, &len);
+	if (!val || len != sizeof(*val)) {
+		pr_warn("broken KHO subnode '%s': missing or invalid blob-size property\n",
+			name);
+		return -EINVAL;
+	}
+
+	if (size)
+		*size = (size_t)*val;
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kho_retrieve_subtree);
diff --git a/kernel/liveupdate/kexec_handover_debugfs.c b/kernel/liveupdate/kexec_handover_debugfs.c
index cab923e4f5c8..b416846810d7 100644
--- a/kernel/liveupdate/kexec_handover_debugfs.c
+++ b/kernel/liveupdate/kexec_handover_debugfs.c
@@ -125,7 +125,8 @@ __init void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt)
 		const u64 *fdt_phys;
 		void *sub_fdt;
 
-		fdt_phys = fdt_getprop(fdt, child, KHO_FDT_SUB_TREE_PROP_NAME, &len);
+		fdt_phys = fdt_getprop(fdt, child,
+					KHO_SUB_TREE_PROP_NAME, &len);
 		if (!fdt_phys)
 			continue;
 		if (len != sizeof(*fdt_phys)) {
diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c
index 04d06a0906c0..48b25c9abeda 100644
--- a/kernel/liveupdate/luo_core.c
+++ b/kernel/liveupdate/luo_core.c
@@ -88,7 +88,7 @@ static int __init luo_early_startup(void)
 	}
 
 	/* Retrieve LUO subtree, and verify its format. */
-	err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys);
+	err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys, NULL);
 	if (err) {
 		if (err != -ENOENT) {
 			pr_err("failed to retrieve FDT '%s' from KHO: %pe\n",
diff --git a/lib/test_kho.c b/lib/test_kho.c
index 263182437315..aa6a0956bb8b 100644
--- a/lib/test_kho.c
+++ b/lib/test_kho.c
@@ -319,7 +319,7 @@ static int __init kho_test_init(void)
 	if (!kho_is_enabled())
 		return 0;
 
-	err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys);
+	err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys, NULL);
 	if (!err) {
 		err = kho_test_restore(fdt_phys);
 		if (err)
diff --git a/mm/memblock.c b/mm/memblock.c
index 91d4162eec63..a1c6dd0f6fad 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -2555,7 +2555,7 @@ static void *__init reserve_mem_kho_retrieve_fdt(void)
 	if (fdt)
 		return fdt;
 
-	err = kho_retrieve_subtree(MEMBLOCK_KHO_FDT, &fdt_phys);
+	err = kho_retrieve_subtree(MEMBLOCK_KHO_FDT, &fdt_phys, NULL);
 	if (err) {
 		if (err != -ENOENT)
 			pr_warn("failed to retrieve FDT '%s' from KHO: %d\n",
-- 
cgit v1.2.3


From 76aa46b9e4049247858309c6e3527d477da2b2fe Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Mon, 16 Mar 2026 04:54:35 -0700
Subject: kho: kexec-metadata: track previous kernel chain

Use Kexec Handover (KHO) to pass the previous kernel's version string and
the number of kexec reboots since the last cold boot to the next kernel,
and print it at boot time.

Example output:
    [    0.000000] KHO: exec from: 6.19.0-rc4-next-20260107 (count 1)

Motivation
==========

Bugs that only reproduce when kexecing from specific kernel versions are
difficult to diagnose.  These issues occur when a buggy kernel kexecs into
a new kernel, with the bug manifesting only in the second kernel.

Recent examples include the following commits:

 * commit eb2266312507 ("x86/boot: Fix page table access in
   5-level to 4-level paging transition")
 * commit 77d48d39e991 ("efistub/tpm: Use ACPI reclaim memory
   for event log to avoid corruption")
 * commit 64b45dd46e15 ("x86/efi: skip memattr table on kexec
   boot")

As kexec-based reboots become more common, these version-dependent bugs
are appearing more frequently.  At scale, correlating crashes to the
previous kernel version is challenging, especially when issues only occur
in specific transition scenarios.

Implementation
==============

The kexec metadata is stored as a plain C struct (struct
kho_kexec_metadata) rather than FDT format, for simplicity and direct
field access.  It is registered via kho_add_subtree() as a separate
subtree, keeping it independent from the core KHO ABI.  This design
choice:

 - Keeps the core KHO ABI minimal and stable
 - Allows the metadata format to evolve independently
 - Avoids requiring version bumps for all KHO consumers (LUO, etc.)
   when the metadata format changes

The struct kho_kexec_metadata contains two fields:
 - previous_release: The kernel version that initiated the kexec
 - kexec_count: Number of kexec boots since last cold boot

On cold boot, kexec_count starts at 0 and increments with each kexec.  The
count helps identify issues that only manifest after multiple consecutive
kexec reboots.

[leitao@debian.org: call kho_kexec_metadata_init() for both boot paths]
  Link: https://lore.kernel.org/all/20260309-kho-v8-5-c3abcf4ac750@debian.org/ [1]
  Link: https://lore.kernel.org/20260409-kho_fix_merge_issue-v1-1-710c84ceaa85@debian.org
Link: https://lore.kernel.org/20260316-kho-v9-5-ed6dcd951988@debian.org
Signed-off-by: Breno Leitao <leitao@debian.org>
Acked-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kho/abi/kexec_metadata.h | 46 ++++++++++++++++
 kernel/liveupdate/kexec_handover.c     | 98 ++++++++++++++++++++++++++++++++++
 2 files changed, 144 insertions(+)
 create mode 100644 include/linux/kho/abi/kexec_metadata.h

(limited to 'include/linux')

diff --git a/include/linux/kho/abi/kexec_metadata.h b/include/linux/kho/abi/kexec_metadata.h
new file mode 100644
index 000000000000..e9e3f7e38a7c
--- /dev/null
+++ b/include/linux/kho/abi/kexec_metadata.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/**
+ * DOC: Kexec Metadata ABI
+ *
+ * The "kexec-metadata" subtree stores optional metadata about the kexec chain.
+ * It is registered via kho_add_subtree(), keeping it independent from the core
+ * KHO ABI. This allows the metadata format to evolve without affecting other
+ * KHO consumers.
+ *
+ * The metadata is stored as a plain C struct rather than FDT format for
+ * simplicity and direct field access.
+ *
+ * Copyright (c) 2026 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2026 Breno Leitao <leitao@debian.org>
+ */
+
+#ifndef _LINUX_KHO_ABI_KEXEC_METADATA_H
+#define _LINUX_KHO_ABI_KEXEC_METADATA_H
+
+#include <linux/types.h>
+#include <linux/utsname.h>
+
+#define KHO_KEXEC_METADATA_VERSION 1
+
+/**
+ * struct kho_kexec_metadata - Kexec metadata passed between kernels
+ * @version: ABI version of this struct (must be first field)
+ * @previous_release: Kernel version string that initiated the kexec
+ * @kexec_count: Number of kexec boots since last cold boot
+ *
+ * This structure is preserved across kexec and allows the new kernel to
+ * identify which kernel it was booted from and how many kexec reboots
+ * have occurred.
+ *
+ * __NEW_UTS_LEN is part of uABI, so it safe to use it in here.
+ */
+struct kho_kexec_metadata {
+	u32 version;
+	char previous_release[__NEW_UTS_LEN + 1];
+	u32 kexec_count;
+} __packed;
+
+#define KHO_METADATA_NODE_NAME "kexec-metadata"
+
+#endif /* _LINUX_KHO_ABI_KEXEC_METADATA_H */
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index adf6541f70f9..94762de1fe5f 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -18,7 +18,9 @@
 #include <linux/kexec.h>
 #include <linux/kexec_handover.h>
 #include <linux/kho_radix_tree.h>
+#include <linux/utsname.h>
 #include <linux/kho/abi/kexec_handover.h>
+#include <linux/kho/abi/kexec_metadata.h>
 #include <linux/libfdt.h>
 #include <linux/list.h>
 #include <linux/memblock.h>
@@ -1268,6 +1270,8 @@ EXPORT_SYMBOL_GPL(kho_restore_free);
 struct kho_in {
 	phys_addr_t fdt_phys;
 	phys_addr_t scratch_phys;
+	char previous_release[__NEW_UTS_LEN + 1];
+	u32 kexec_count;
 	struct kho_debugfs dbg;
 };
 
@@ -1392,6 +1396,96 @@ static __init int kho_out_fdt_setup(void)
 	return err;
 }
 
+static void __init kho_in_kexec_metadata(void)
+{
+	struct kho_kexec_metadata *metadata;
+	phys_addr_t metadata_phys;
+	size_t blob_size;
+	int err;
+
+	err = kho_retrieve_subtree(KHO_METADATA_NODE_NAME, &metadata_phys,
+				   &blob_size);
+	if (err)
+		/* This is fine, previous kernel didn't export metadata */
+		return;
+
+	/* Check that, at least, "version" is present */
+	if (blob_size < sizeof(u32)) {
+		pr_warn("kexec-metadata blob too small (%zu bytes)\n",
+			blob_size);
+		return;
+	}
+
+	metadata = phys_to_virt(metadata_phys);
+
+	if (metadata->version != KHO_KEXEC_METADATA_VERSION) {
+		pr_warn("kexec-metadata version %u not supported (expected %u)\n",
+			metadata->version, KHO_KEXEC_METADATA_VERSION);
+		return;
+	}
+
+	if (blob_size < sizeof(*metadata)) {
+		pr_warn("kexec-metadata blob too small for v%u (%zu < %zu)\n",
+			metadata->version, blob_size, sizeof(*metadata));
+		return;
+	}
+
+	/*
+	 * Copy data to the kernel structure that will persist during
+	 * kernel lifetime.
+	 */
+	kho_in.kexec_count = metadata->kexec_count;
+	strscpy(kho_in.previous_release, metadata->previous_release,
+		sizeof(kho_in.previous_release));
+
+	pr_info("exec from: %s (count %u)\n",
+		kho_in.previous_release, kho_in.kexec_count);
+}
+
+/*
+ * Create kexec metadata to pass kernel version and boot count to the
+ * next kernel. This keeps the core KHO ABI minimal and allows the
+ * metadata format to evolve independently.
+ */
+static __init int kho_out_kexec_metadata(void)
+{
+	struct kho_kexec_metadata *metadata;
+	int err;
+
+	metadata = kho_alloc_preserve(sizeof(*metadata));
+	if (IS_ERR(metadata))
+		return PTR_ERR(metadata);
+
+	metadata->version = KHO_KEXEC_METADATA_VERSION;
+	strscpy(metadata->previous_release, init_uts_ns.name.release,
+		sizeof(metadata->previous_release));
+	/* kho_in.kexec_count is set to 0 on cold boot */
+	metadata->kexec_count = kho_in.kexec_count + 1;
+
+	err = kho_add_subtree(KHO_METADATA_NODE_NAME, metadata,
+			      sizeof(*metadata));
+	if (err)
+		kho_unpreserve_free(metadata);
+
+	return err;
+}
+
+static int __init kho_kexec_metadata_init(const void *fdt)
+{
+	int err;
+
+	if (fdt)
+		kho_in_kexec_metadata();
+
+	/* Populate kexec metadata for the possible next kexec */
+	err = kho_out_kexec_metadata();
+	if (err)
+		pr_warn("failed to initialize kexec-metadata subtree: %d\n",
+			err);
+
+	return err;
+}
+
 static __init int kho_init(void)
 {
 	struct kho_radix_tree *tree = &kho_out.radix_tree;
@@ -1425,6 +1519,10 @@ static __init int kho_init(void)
 	if (err)
 		goto err_free_fdt;
 
+	err = kho_kexec_metadata_init(fdt);
+	if (err)
+		goto err_free_fdt;
+
 	if (fdt) {
 		kho_in_debugfs_init(&kho_in.dbg, fdt);
 		return 0;
-- 
cgit v1.2.3


From 00d0b372374f2528394aabf7b1f53f8dafe294de Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Thu, 26 Mar 2026 16:39:41 +0000
Subject: liveupdate: prevent double management of files

Patch series "liveupdate: prevent double preservation", v4.

Currently, LUO does not prevent the same file from being managed twice
across different active sessions.

Because LUO preserves files of absolutely different types: memfd, and
upcoming vfiofd [1], iommufd [2], guestmefd (and possible kvmfd/cpufd).
There is no common private data or guarantee on how to prevent that the
same file is not preserved twice beside using inode or some slower and
expensive method like hashtables.


This patch (of 4)

Currently, LUO does not prevent the same file from being managed twice
across different active sessions.

Use a global xarray luo_preserved_files to keep track of file identifiers
being preserved by LUO.  Update luo_preserve_file() to check and insert
the file identifier into this xarray when it is preserved, and erase it in
luo_file_unpreserve_files() when it is released.

To allow handlers to define what constitutes a "unique" file (e.g.,
different struct file objects pointing to the same hardware resource), add
a get_id() callback to struct liveupdate_file_ops.  If not provided, the
default identifier is the struct file pointer itself.

This ensures that the same file (or resource) cannot be managed by
multiple sessions.  If another session attempts to preserve an already
managed file, it will now fail with -EBUSY.

Link: https://lore.kernel.org/20260326163943.574070-1-pasha.tatashin@soleen.com
Link: https://lore.kernel.org/20260326163943.574070-2-pasha.tatashin@soleen.com
Link: https://lore.kernel.org/all/20260129212510.967611-1-dmatlack@google.com [1]
Link: https://lore.kernel.org/all/20260203220948.2176157-1-skhawaja@google.com [2]
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Samiullah Khawaja <skhawaja@google.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: David Matlack <dmatlack@google.com>
Cc: Pratyush Yadav <pratyush@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/liveupdate.h   |  2 ++
 kernel/liveupdate/luo_file.c | 32 ++++++++++++++++++++++++++++++--
 2 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h
index dd11fdc76a5f..61325ad26526 100644
--- a/include/linux/liveupdate.h
+++ b/include/linux/liveupdate.h
@@ -63,6 +63,7 @@ struct liveupdate_file_op_args {
  *                finish, in order to do successful finish calls for all
  *                resources in the session.
  * @finish:       Required. Final cleanup in the new kernel.
+ * @get_id:       Optional. Returns a unique identifier for the file.
  * @owner:        Module reference
  *
  * All operations (except can_preserve) receive a pointer to a
@@ -78,6 +79,7 @@ struct liveupdate_file_ops {
 	int (*retrieve)(struct liveupdate_file_op_args *args);
 	bool (*can_finish)(struct liveupdate_file_op_args *args);
 	void (*finish)(struct liveupdate_file_op_args *args);
+	unsigned long (*get_id)(struct file *file);
 	struct module *owner;
 };
 
diff --git a/kernel/liveupdate/luo_file.c b/kernel/liveupdate/luo_file.c
index 5acee4174bf0..09103cf81107 100644
--- a/kernel/liveupdate/luo_file.c
+++ b/kernel/liveupdate/luo_file.c
@@ -108,12 +108,16 @@
 #include <linux/liveupdate.h>
 #include <linux/module.h>
 #include <linux/sizes.h>
+#include <linux/xarray.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include "luo_internal.h"
 
 static LIST_HEAD(luo_file_handler_list);
 
+/* Keep track of files being preserved by LUO */
+static DEFINE_XARRAY(luo_preserved_files);
+
 /* 2 4K pages, give space for 128 files per file_set */
 #define LUO_FILE_PGCNT		2ul
 #define LUO_FILE_MAX							\
@@ -203,6 +207,12 @@ static void luo_free_files_mem(struct luo_file_set *file_set)
 	file_set->files = NULL;
 }
 
+static unsigned long luo_get_id(struct liveupdate_file_handler *fh,
+				struct file *file)
+{
+	return fh->ops->get_id ? fh->ops->get_id(file) : (unsigned long)file;
+}
+
 static bool luo_token_is_used(struct luo_file_set *file_set, u64 token)
 {
 	struct luo_file *iter;
@@ -248,6 +258,7 @@ static bool luo_token_is_used(struct luo_file_set *file_set, u64 token)
  * Context: Can be called from an ioctl handler during normal system operation.
  * Return: 0 on success. Returns a negative errno on failure:
  *         -EEXIST if the token is already used.
+ *         -EBUSY if the file descriptor is already preserved by another session.
  *         -EBADF if the file descriptor is invalid.
  *         -ENOSPC if the file_set is full.
  *         -ENOENT if no compatible handler is found.
@@ -288,10 +299,15 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd)
 	if (err)
 		goto err_free_files_mem;
 
-	err = luo_flb_file_preserve(fh);
+	err = xa_insert(&luo_preserved_files, luo_get_id(fh, file),
+			file, GFP_KERNEL);
 	if (err)
 		goto err_free_files_mem;
 
+	err = luo_flb_file_preserve(fh);
+	if (err)
+		goto err_erase_xa;
+
 	luo_file = kzalloc_obj(*luo_file);
 	if (!luo_file) {
 		err = -ENOMEM;
@@ -320,6 +336,8 @@ err_kfree:
 	kfree(luo_file);
 err_flb_unpreserve:
 	luo_flb_file_unpreserve(fh);
+err_erase_xa:
+	xa_erase(&luo_preserved_files, luo_get_id(fh, file));
 err_free_files_mem:
 	luo_free_files_mem(file_set);
 err_fput:
@@ -363,6 +381,8 @@ void luo_file_unpreserve_files(struct luo_file_set *file_set)
 		luo_file->fh->ops->unpreserve(&args);
 		luo_flb_file_unpreserve(luo_file->fh);
 
+		xa_erase(&luo_preserved_files,
+			 luo_get_id(luo_file->fh, luo_file->file));
 		list_del(&luo_file->list);
 		file_set->count--;
 
@@ -606,6 +626,11 @@ int luo_retrieve_file(struct luo_file_set *file_set, u64 token,
 	luo_file->file = args.file;
 	/* Get reference so we can keep this file in LUO until finish */
 	get_file(luo_file->file);
+
+	WARN_ON(xa_insert(&luo_preserved_files,
+			  luo_get_id(luo_file->fh, luo_file->file),
+			  luo_file->file, GFP_KERNEL));
+
 	*filep = luo_file->file;
 	luo_file->retrieve_status = 1;
 
@@ -701,8 +726,11 @@ int luo_file_finish(struct luo_file_set *file_set)
 
 		luo_file_finish_one(file_set, luo_file);
 
-		if (luo_file->file)
+		if (luo_file->file) {
+			xa_erase(&luo_preserved_files,
+				 luo_get_id(luo_file->fh, luo_file->file));
 			fput(luo_file->file);
+		}
 		list_del(&luo_file->list);
 		file_set->count--;
 		mutex_destroy(&luo_file->mutex);
-- 
cgit v1.2.3


From 6b2b22f7c8cf1596490beaac96a989cbafdfea57 Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Fri, 27 Mar 2026 03:33:28 +0000
Subject: liveupdate: protect FLB lists with luo_register_rwlock

Because liveupdate FLB objects will soon drop their persistent module
references when registered, list traversals must be protected against
concurrent module unloading.

To provide this protection, utilize the global luo_register_rwlock.  It
protects the global registry of FLBs and the handler's specific list of
FLB dependencies.

Read locks are used during concurrent list traversals (e.g., during
preservation and serialization).  Write locks are taken during
registration and unregistration.

Link: https://lore.kernel.org/20260327033335.696621-5-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Pratyush Yadav (Google) <pratyush@kernel.org>
Cc: David Matlack <dmatlack@google.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Samiullah Khawaja <skhawaja@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/liveupdate.h  |  1 +
 kernel/liveupdate/luo_flb.c | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h
index 61325ad26526..9c761d9bacf8 100644
--- a/include/linux/liveupdate.h
+++ b/include/linux/liveupdate.h
@@ -12,6 +12,7 @@
 #include <linux/kho/abi/luo.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/types.h>
 #include <uapi/linux/liveupdate.h>
 
diff --git a/kernel/liveupdate/luo_flb.c b/kernel/liveupdate/luo_flb.c
index cf4a8f854c83..fdb274410e8f 100644
--- a/kernel/liveupdate/luo_flb.c
+++ b/kernel/liveupdate/luo_flb.c
@@ -245,17 +245,20 @@ int luo_flb_file_preserve(struct liveupdate_file_handler *fh)
 	struct luo_flb_link *iter;
 	int err = 0;
 
+	down_read(&luo_register_rwlock);
 	list_for_each_entry(iter, flb_list, list) {
 		err = luo_flb_file_preserve_one(iter->flb);
 		if (err)
 			goto exit_err;
 	}
+	up_read(&luo_register_rwlock);
 
 	return 0;
 
 exit_err:
 	list_for_each_entry_continue_reverse(iter, flb_list, list)
 		luo_flb_file_unpreserve_one(iter->flb);
+	up_read(&luo_register_rwlock);
 
 	return err;
 }
@@ -277,6 +280,7 @@ void luo_flb_file_unpreserve(struct liveupdate_file_handler *fh)
 	struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list);
 	struct luo_flb_link *iter;
 
+	guard(rwsem_read)(&luo_register_rwlock);
 	list_for_each_entry_reverse(iter, flb_list, list)
 		luo_flb_file_unpreserve_one(iter->flb);
 }
@@ -297,6 +301,7 @@ void luo_flb_file_finish(struct liveupdate_file_handler *fh)
 	struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list);
 	struct luo_flb_link *iter;
 
+	guard(rwsem_read)(&luo_register_rwlock);
 	list_for_each_entry_reverse(iter, flb_list, list)
 		luo_flb_file_finish_one(iter->flb);
 }
@@ -360,6 +365,8 @@ int liveupdate_register_flb(struct liveupdate_file_handler *fh,
 	if (!luo_session_quiesce())
 		return -EBUSY;
 
+	down_write(&luo_register_rwlock);
+
 	/* Check that this FLB is not already linked to this file handler */
 	err = -EEXIST;
 	list_for_each_entry(iter, flb_list, list) {
@@ -401,11 +408,13 @@ int liveupdate_register_flb(struct liveupdate_file_handler *fh,
 	private->users++;
 	link->flb = flb;
 	list_add_tail(&no_free_ptr(link)->list, flb_list);
+	up_write(&luo_register_rwlock);
 	luo_session_resume();
 
 	return 0;
 
 err_resume:
+	up_write(&luo_register_rwlock);
 	luo_session_resume();
 	return err;
 }
@@ -449,6 +458,8 @@ int liveupdate_unregister_flb(struct liveupdate_file_handler *fh,
 	if (!luo_session_quiesce())
 		return -EBUSY;
 
+	down_write(&luo_register_rwlock);
+
 	/* Find and remove the link from the file handler's list */
 	list_for_each_entry(iter, flb_list, list) {
 		if (iter->flb == flb) {
@@ -473,11 +484,13 @@ int liveupdate_unregister_flb(struct liveupdate_file_handler *fh,
 		module_put(flb->ops->owner);
 	}
 
+	up_write(&luo_register_rwlock);
 	luo_session_resume();
 
 	return 0;
 
 err_resume:
+	up_write(&luo_register_rwlock);
 	luo_session_resume();
 	return err;
 }
@@ -643,6 +656,7 @@ void luo_flb_serialize(void)
 	struct liveupdate_flb *gflb;
 	int i = 0;
 
+	guard(rwsem_read)(&luo_register_rwlock);
 	list_private_for_each_entry(gflb, &luo_flb_global.list, private.list) {
 		struct luo_flb_private *private = luo_flb_get_private(gflb);
 
-- 
cgit v1.2.3


From 2ab7207e7ec6cd5af1912d9be5174f114633286b Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Fri, 27 Mar 2026 03:33:33 +0000
Subject: liveupdate: make unregister functions return void

Change liveupdate_unregister_file_handler and liveupdate_unregister_flb to
return void instead of an error code.  This follows the design principle
that unregistration during module unload should not fail, as the unload
cannot be stopped at that point.

Link: https://lore.kernel.org/20260327033335.696621-10-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Pratyush Yadav (Google) <pratyush@kernel.org>
Cc: David Matlack <dmatlack@google.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Samiullah Khawaja <skhawaja@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/liveupdate.h   | 14 ++++++--------
 kernel/liveupdate/luo_file.c | 14 ++------------
 kernel/liveupdate/luo_flb.c  | 11 +++--------
 3 files changed, 11 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h
index 9c761d9bacf8..30c5a39ff9e9 100644
--- a/include/linux/liveupdate.h
+++ b/include/linux/liveupdate.h
@@ -231,12 +231,12 @@ bool liveupdate_enabled(void);
 int liveupdate_reboot(void);
 
 int liveupdate_register_file_handler(struct liveupdate_file_handler *fh);
-int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh);
+void liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh);
 
 int liveupdate_register_flb(struct liveupdate_file_handler *fh,
 			    struct liveupdate_flb *flb);
-int liveupdate_unregister_flb(struct liveupdate_file_handler *fh,
-			      struct liveupdate_flb *flb);
+void liveupdate_unregister_flb(struct liveupdate_file_handler *fh,
+			       struct liveupdate_flb *flb);
 
 int liveupdate_flb_get_incoming(struct liveupdate_flb *flb, void **objp);
 int liveupdate_flb_get_outgoing(struct liveupdate_flb *flb, void **objp);
@@ -258,9 +258,8 @@ static inline int liveupdate_register_file_handler(struct liveupdate_file_handle
 	return -EOPNOTSUPP;
 }
 
-static inline int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh)
+static inline void liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh)
 {
-	return -EOPNOTSUPP;
 }
 
 static inline int liveupdate_register_flb(struct liveupdate_file_handler *fh,
@@ -269,10 +268,9 @@ static inline int liveupdate_register_flb(struct liveupdate_file_handler *fh,
 	return -EOPNOTSUPP;
 }
 
-static inline int liveupdate_unregister_flb(struct liveupdate_file_handler *fh,
-					    struct liveupdate_flb *flb)
+static inline void liveupdate_unregister_flb(struct liveupdate_file_handler *fh,
+					     struct liveupdate_flb *flb)
 {
-	return -EOPNOTSUPP;
 }
 
 static inline int liveupdate_flb_get_incoming(struct liveupdate_flb *flb,
diff --git a/kernel/liveupdate/luo_file.c b/kernel/liveupdate/luo_file.c
index 4060b6064248..0730865711c1 100644
--- a/kernel/liveupdate/luo_file.c
+++ b/kernel/liveupdate/luo_file.c
@@ -912,25 +912,15 @@ err_unlock:
  *
  * Unregisters the file handler from the liveupdate core. This function
  * reverses the operations of liveupdate_register_file_handler().
- *
- * It ensures safe removal by checking that:
- * No FLB registered with this file handler.
- *
- * If the unregistration fails, the internal test state is reverted.
- *
- * Return: 0 Success. -EOPNOTSUPP when live update is not enabled. -EBUSY A live
- * update is in progress, FLB is registred with this file handler.
  */
-int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh)
+void liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh)
 {
 	if (!liveupdate_enabled())
-		return -EOPNOTSUPP;
+		return;
 
 	guard(rwsem_write)(&luo_register_rwlock);
 	luo_flb_unregister_all(fh);
 	list_del(&ACCESS_PRIVATE(fh, list));
 
 	module_put(fh->ops->owner);
-
-	return 0;
 }
diff --git a/kernel/liveupdate/luo_flb.c b/kernel/liveupdate/luo_flb.c
index e069d694163e..00f5494812c4 100644
--- a/kernel/liveupdate/luo_flb.c
+++ b/kernel/liveupdate/luo_flb.c
@@ -475,21 +475,16 @@ int liveupdate_register_flb(struct liveupdate_file_handler *fh,
  * owner module (acquired during registration) is released.
  *
  * Context: It is typically called from a subsystem's module exit function.
- * Return: 0 on success.
- *         -EOPNOTSUPP if live update is disabled.
- *         -ENOENT if the FLB was not found in the file handler's list.
  */
-int liveupdate_unregister_flb(struct liveupdate_file_handler *fh,
-			      struct liveupdate_flb *flb)
+void liveupdate_unregister_flb(struct liveupdate_file_handler *fh,
+			       struct liveupdate_flb *flb)
 {
 	if (!liveupdate_enabled())
-		return -EOPNOTSUPP;
+		return;
 
 	guard(rwsem_write)(&luo_register_rwlock);
 
 	luo_flb_unregister_one(fh, flb);
-
-	return 0;
 }
 
 /**
-- 
cgit v1.2.3


From 6b1842775a460245e97d36d3a67d0cfba7c4ff79 Mon Sep 17 00:00:00 2001
From: Hao Ge <hao.ge@linux.dev>
Date: Tue, 31 Mar 2026 16:13:12 +0800
Subject: mm/alloc_tag: clear codetag for pages allocated before page_ext
 initialization

Due to initialization ordering, page_ext is allocated and initialized
relatively late during boot.  Some pages have already been allocated and
freed before page_ext becomes available, leaving their codetag
uninitialized.

A clear example is in init_section_page_ext(): alloc_page_ext() calls
kmemleak_alloc().  If the slab cache has no free objects, it falls back to
the buddy allocator to allocate memory.  However, at this point page_ext
is not yet fully initialized, so these newly allocated pages have no
codetag set.  These pages may later be reclaimed by KASAN, which causes
the warning to trigger when they are freed because their codetag ref is
still empty.

Use a global array to track pages allocated before page_ext is fully
initialized.  The array size is fixed at 8192 entries, and will emit a
warning if this limit is exceeded.  When page_ext initialization
completes, set their codetag to empty to avoid warnings when they are
freed later.

This warning is only observed with CONFIG_MEM_ALLOC_PROFILING_DEBUG=Y and
mem_profiling_compressed disabled:

[    9.582133] ------------[ cut here ]------------
[    9.582137] alloc_tag was not set
[    9.582139] WARNING: ./include/linux/alloc_tag.h:164 at __pgalloc_tag_sub+0x40f/0x550, CPU#5: systemd/1
[    9.582190] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 7.0.0-rc4 #1 PREEMPT(lazy)
[    9.582192] Hardware name: Red Hat KVM, BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[    9.582194] RIP: 0010:__pgalloc_tag_sub+0x40f/0x550
[    9.582196] Code: 00 00 4c 29 e5 48 8b 05 1f 88 56 05 48 8d 4c ad 00 48 8d 2c c8 e9 87 fd ff ff 0f 0b 0f 0b e9 f3 fe ff ff 48 8d 3d 61 2f ed 03 <67> 48 0f b9 3a e9 b3 fd ff ff 0f 0b eb e4 e8 5e cd 14 02 4c 89 c7
[    9.582197] RSP: 0018:ffffc9000001f940 EFLAGS: 00010246
[    9.582200] RAX: dffffc0000000000 RBX: 1ffff92000003f2b RCX: 1ffff110200d806c
[    9.582201] RDX: ffff8881006c0360 RSI: 0000000000000004 RDI: ffffffff9bc7b460
[    9.582202] RBP: 0000000000000000 R08: 0000000000000000 R09: fffffbfff3a62324
[    9.582203] R10: ffffffff9d311923 R11: 0000000000000000 R12: ffffea0004001b00
[    9.582204] R13: 0000000000002000 R14: ffffea0000000000 R15: ffff8881006c0360
[    9.582206] FS:  00007ffbbcf2d940(0000) GS:ffff888450479000(0000) knlGS:0000000000000000
[    9.582208] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    9.582210] CR2: 000055ee3aa260d0 CR3: 0000000148b67005 CR4: 0000000000770ef0
[    9.582211] PKRU: 55555554
[    9.582212] Call Trace:
[    9.582213]  <TASK>
[    9.582214]  ? __pfx___pgalloc_tag_sub+0x10/0x10
[    9.582216]  ? check_bytes_and_report+0x68/0x140
[    9.582219]  __free_frozen_pages+0x2e4/0x1150
[    9.582221]  ? __free_slab+0xc2/0x2b0
[    9.582224]  qlist_free_all+0x4c/0xf0
[    9.582227]  kasan_quarantine_reduce+0x15d/0x180
[    9.582229]  __kasan_slab_alloc+0x69/0x90
[    9.582232]  kmem_cache_alloc_noprof+0x14a/0x500
[    9.582234]  do_getname+0x96/0x310
[    9.582237]  do_readlinkat+0x91/0x2f0
[    9.582239]  ? __pfx_do_readlinkat+0x10/0x10
[    9.582240]  ? get_random_bytes_user+0x1df/0x2c0
[    9.582244]  __x64_sys_readlinkat+0x96/0x100
[    9.582246]  do_syscall_64+0xce/0x650
[    9.582250]  ? __x64_sys_getrandom+0x13a/0x1e0
[    9.582252]  ? __pfx___x64_sys_getrandom+0x10/0x10
[    9.582254]  ? do_syscall_64+0x114/0x650
[    9.582255]  ? ksys_read+0xfc/0x1d0
[    9.582258]  ? __pfx_ksys_read+0x10/0x10
[    9.582260]  ? do_syscall_64+0x114/0x650
[    9.582262]  ? do_syscall_64+0x114/0x650
[    9.582264]  ? __pfx_fput_close_sync+0x10/0x10
[    9.582266]  ? file_close_fd_locked+0x178/0x2a0
[    9.582268]  ? __x64_sys_faccessat2+0x96/0x100
[    9.582269]  ? __x64_sys_close+0x7d/0xd0
[    9.582271]  ? do_syscall_64+0x114/0x650
[    9.582273]  ? do_syscall_64+0x114/0x650
[    9.582275]  ? clear_bhb_loop+0x50/0xa0
[    9.582277]  ? clear_bhb_loop+0x50/0xa0
[    9.582279]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[    9.582280] RIP: 0033:0x7ffbbda345ee
[    9.582282] Code: 0f 1f 40 00 48 8b 15 29 38 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff c3 0f 1f 40 00 f3 0f 1e fa 49 89 ca b8 0b 01 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d fa 37 0d 00 f7 d8 64 89 01 48
[    9.582284] RSP: 002b:00007ffe2ad8de58 EFLAGS: 00000202 ORIG_RAX: 000000000000010b
[    9.582286] RAX: ffffffffffffffda RBX: 000055ee3aa25570 RCX: 00007ffbbda345ee
[    9.582287] RDX: 000055ee3aa25570 RSI: 00007ffe2ad8dee0 RDI: 00000000ffffff9c
[    9.582288] RBP: 0000000000001000 R08: 0000000000000003 R09: 0000000000001001
[    9.582289] R10: 0000000000001000 R11: 0000000000000202 R12: 0000000000000033
[    9.582290] R13: 00007ffe2ad8dee0 R14: 00000000ffffff9c R15: 00007ffe2ad8deb0
[    9.582292]  </TASK>
[    9.582293] ---[ end trace 0000000000000000 ]---

Link: https://lore.kernel.org/20260331081312.123719-1-hao.ge@linux.dev
Fixes: dcfe378c81f72 ("lib: introduce support for page allocation tagging")
Signed-off-by: Hao Ge <hao.ge@linux.dev>
Suggested-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Suren Baghdasaryan <surenb@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/alloc_tag.h   |   2 +
 include/linux/pgalloc_tag.h |   2 +-
 lib/alloc_tag.c             | 109 ++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c             |  10 +++-
 4 files changed, 121 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index d40ac39bfbe8..02de2ede560f 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -163,9 +163,11 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref)
 {
 	WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
 }
+void alloc_tag_add_early_pfn(unsigned long pfn);
 #else
 static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {}
 static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
+static inline void alloc_tag_add_early_pfn(unsigned long pfn) {}
 #endif
 
 /* Caller should verify both ref and tag to be valid */
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 38a82d65e58e..951d33362268 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -181,7 +181,7 @@ static inline struct alloc_tag *__pgalloc_tag_get(struct page *page)
 
 	if (get_page_tag_ref(page, &ref, &handle)) {
 		alloc_tag_sub_check(&ref);
-		if (ref.ct)
+		if (ref.ct && !is_codetag_empty(&ref))
 			tag = ct_to_alloc_tag(ref.ct);
 		put_page_tag_ref(handle);
 	}
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 58991ab09d84..ed1bdcf1f8ab 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -6,7 +6,9 @@
 #include <linux/kallsyms.h>
 #include <linux/module.h>
 #include <linux/page_ext.h>
+#include <linux/pgalloc_tag.h>
 #include <linux/proc_fs.h>
+#include <linux/rcupdate.h>
 #include <linux/seq_buf.h>
 #include <linux/seq_file.h>
 #include <linux/string_choices.h>
@@ -758,8 +760,115 @@ static __init bool need_page_alloc_tagging(void)
 	return mem_profiling_support;
 }
 
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+/*
+ * Track page allocations before page_ext is initialized.
+ * Some pages are allocated before page_ext becomes available, leaving
+ * their codetag uninitialized. Track these early PFNs so we can clear
+ * their codetag refs later to avoid warnings when they are freed.
+ *
+ * Early allocations include:
+ *   - Base allocations independent of CPU count
+ *   - Per-CPU allocations (e.g., CPU hotplug callbacks during smp_init,
+ *     such as trace ring buffers, scheduler per-cpu data)
+ *
+ * For simplicity, we fix the size to 8192.
+ * If insufficient, a warning will be triggered to alert the user.
+ *
+ * TODO: Replace fixed-size array with dynamic allocation using
+ * a GFP flag similar to ___GFP_NO_OBJ_EXT to avoid recursion.
+ */
+#define EARLY_ALLOC_PFN_MAX		8192
+
+static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX] __initdata;
+static atomic_t early_pfn_count __initdata = ATOMIC_INIT(0);
+
+static void __init __alloc_tag_add_early_pfn(unsigned long pfn)
+{
+	int old_idx, new_idx;
+
+	do {
+		old_idx = atomic_read(&early_pfn_count);
+		if (old_idx >= EARLY_ALLOC_PFN_MAX) {
+			pr_warn_once("Early page allocations before page_ext init exceeded EARLY_ALLOC_PFN_MAX (%d)\n",
+				      EARLY_ALLOC_PFN_MAX);
+			return;
+		}
+		new_idx = old_idx + 1;
+	} while (!atomic_try_cmpxchg(&early_pfn_count, &old_idx, new_idx));
+
+	early_pfns[old_idx] = pfn;
+}
+
+typedef void alloc_tag_add_func(unsigned long pfn);
+static alloc_tag_add_func __rcu *alloc_tag_add_early_pfn_ptr __refdata =
+	RCU_INITIALIZER(__alloc_tag_add_early_pfn);
+
+void alloc_tag_add_early_pfn(unsigned long pfn)
+{
+	alloc_tag_add_func *alloc_tag_add;
+
+	if (static_key_enabled(&mem_profiling_compressed))
+		return;
+
+	rcu_read_lock();
+	alloc_tag_add = rcu_dereference(alloc_tag_add_early_pfn_ptr);
+	if (alloc_tag_add)
+		alloc_tag_add(pfn);
+	rcu_read_unlock();
+}
+
+static void __init clear_early_alloc_pfn_tag_refs(void)
+{
+	unsigned int i;
+
+	if (static_key_enabled(&mem_profiling_compressed))
+		return;
+
+	rcu_assign_pointer(alloc_tag_add_early_pfn_ptr, NULL);
+	/* Make sure we are not racing with __alloc_tag_add_early_pfn() */
+	synchronize_rcu();
+
+	for (i = 0; i < atomic_read(&early_pfn_count); i++) {
+		unsigned long pfn = early_pfns[i];
+
+		if (pfn_valid(pfn)) {
+			struct page *page = pfn_to_page(pfn);
+			union pgtag_ref_handle handle;
+			union codetag_ref ref;
+
+			if (get_page_tag_ref(page, &ref, &handle)) {
+				/*
+				 * An early-allocated page could be freed and reallocated
+				 * after its page_ext is initialized but before we clear it.
+				 * In that case, it already has a valid tag set.
+				 * We should not overwrite that valid tag with CODETAG_EMPTY.
+				 *
+				 * Note: there is still a small race window between checking
+				 * ref.ct and calling set_codetag_empty(). We accept this
+				 * race as it's unlikely and the extra complexity of atomic
+				 * cmpxchg is not worth it for this debug-only code path.
+				 */
+				if (ref.ct) {
+					put_page_tag_ref(handle);
+					continue;
+				}
+
+				set_codetag_empty(&ref);
+				update_page_tag_ref(handle, &ref);
+				put_page_tag_ref(handle);
+			}
+		}
+
+	}
+}
+#else /* !CONFIG_MEM_ALLOC_PROFILING_DEBUG */
+static inline void __init clear_early_alloc_pfn_tag_refs(void) {}
+#endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
+
 static __init void init_page_alloc_tagging(void)
 {
+	clear_early_alloc_pfn_tag_refs();
 }
 
 struct page_ext_operations page_alloc_tagging_ops = {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 111b54df8a3c..b1c5430cad4e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1252,10 +1252,18 @@ void __pgalloc_tag_add(struct page *page, struct task_struct *task,
 	union pgtag_ref_handle handle;
 	union codetag_ref ref;
 
-	if (get_page_tag_ref(page, &ref, &handle)) {
+	if (likely(get_page_tag_ref(page, &ref, &handle))) {
 		alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr);
 		update_page_tag_ref(handle, &ref);
 		put_page_tag_ref(handle);
+	} else {
+		/*
+		 * page_ext is not available yet, record the pfn so we can
+		 * clear the tag ref later when page_ext is initialized.
+		 */
+		alloc_tag_add_early_pfn(page_to_pfn(page));
+		if (task->alloc_tag)
+			alloc_tag_set_inaccurate(task->alloc_tag);
 	}
 }
 
-- 
cgit v1.2.3


From 55da81663b9642dd046b26dd6f1baddbcf337c1e Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 27 Mar 2026 16:33:14 -0700
Subject: mm/damon/core: fix damon_call() vs kdamond_fn() exit race

Patch series "mm/damon/core: fix damon_call()/damos_walk() vs kdmond exit
race".

damon_call() and damos_walk() can leak memory and/or deadlock when they
race with kdamond terminations.  Fix those.


This patch (of 2);

When kdamond_fn() main loop is finished, the function cancels all
remaining damon_call() requests and unset the damon_ctx->kdamond so that
API callers and API functions themselves can know the context is
terminated.  damon_call() adds the caller's request to the queue first.
After that, it shows if the kdamond of the damon_ctx is still running
(damon_ctx->kdamond is set).  Only if the kdamond is running, damon_call()
starts waiting for the kdamond's handling of the newly added request.

The damon_call() requests registration and damon_ctx->kdamond unset are
protected by different mutexes, though.  Hence, damon_call() could race
with damon_ctx->kdamond unset, and result in deadlocks.

For example, let's suppose kdamond successfully finished the damon_call()
requests cancelling.  Right after that, damon_call() is called for the
context.  It registers the new request, and shows the context is still
running, because damon_ctx->kdamond unset is not yet done.  Hence the
damon_call() caller starts waiting for the handling of the request.
However, the kdamond is already on the termination steps, so it never
handles the new request.  As a result, the damon_call() caller threads
infinitely waits.

Fix this by introducing another damon_ctx field, namely
call_controls_obsolete.  It is protected by the
damon_ctx->call_controls_lock, which protects damon_call() requests
registration.  Initialize (unset) it in kdamond_fn() before letting
damon_start() returns and set it just before the cancelling of remaining
damon_call() requests is executed.  damon_call() reads the obsolete field
under the lock and avoids adding a new request.

After this change, only requests that are guaranteed to be handled or
cancelled are registered.  Hence the after-registration DAMON context
termination check is no longer needed.  Remove it together.

Note that the deadlock will not happen when damon_call() is called for
repeat mode request.  In tis case, damon_call() returns instead of waiting
for the handling when the request registration succeeds and it shows the
kdamond is running.  However, if the request also has dealloc_on_cancel,
the request memory would be leaked.

The issue is found by sashiko [1].

Link: https://lore.kernel.org/20260327233319.3528-1-sj@kernel.org
Link: https://lore.kernel.org/20260327233319.3528-2-sj@kernel.org
Link: https://lore.kernel.org/20260325141956.87144-1-sj@kernel.org [1]
Fixes: 42b7491af14c ("mm/damon/core: introduce damon_call()")
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: <stable@vger.kernel.org> # 6.14.x
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h |  1 +
 mm/damon/core.c       | 45 ++++++++++++++-------------------------------
 2 files changed, 15 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index d9a3babbafc1..5129de70e7b7 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -818,6 +818,7 @@ struct damon_ctx {
 
 	/* lists of &struct damon_call_control */
 	struct list_head call_controls;
+	bool call_controls_obsolete;
 	struct mutex call_controls_lock;
 
 	struct damos_walk_control *walk_control;
diff --git a/mm/damon/core.c b/mm/damon/core.c
index db6c67e52d2b..9bcda2765ac9 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1573,35 +1573,6 @@ int damon_kdamond_pid(struct damon_ctx *ctx)
 	return pid;
 }
 
-/*
- * damon_call_handle_inactive_ctx() - handle DAMON call request that added to
- *				      an inactive context.
- * @ctx:	The inactive DAMON context.
- * @control:	Control variable of the call request.
- *
- * This function is called in a case that @control is added to @ctx but @ctx is
- * not running (inactive).  See if @ctx handled @control or not, and cleanup
- * @control if it was not handled.
- *
- * Returns 0 if @control was handled by @ctx, negative error code otherwise.
- */
-static int damon_call_handle_inactive_ctx(
-		struct damon_ctx *ctx, struct damon_call_control *control)
-{
-	struct damon_call_control *c;
-
-	mutex_lock(&ctx->call_controls_lock);
-	list_for_each_entry(c, &ctx->call_controls, list) {
-		if (c == control) {
-			list_del(&control->list);
-			mutex_unlock(&ctx->call_controls_lock);
-			return -EINVAL;
-		}
-	}
-	mutex_unlock(&ctx->call_controls_lock);
-	return 0;
-}
-
 /**
  * damon_call() - Invoke a given function on DAMON worker thread (kdamond).
  * @ctx:	DAMON context to call the function for.
@@ -1619,6 +1590,10 @@ static int damon_call_handle_inactive_ctx(
  * synchronization.  The return value of the function will be saved in
  * &damon_call_control->return_code.
  *
+ * Note that this function should be called only after damon_start() with the
+ * @ctx has succeeded.  Otherwise, this function could fall into an indefinite
+ * wait.
+ *
  * Return: 0 on success, negative error code otherwise.
  */
 int damon_call(struct damon_ctx *ctx, struct damon_call_control *control)
@@ -1629,10 +1604,12 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control)
 	INIT_LIST_HEAD(&control->list);
 
 	mutex_lock(&ctx->call_controls_lock);
+	if (ctx->call_controls_obsolete) {
+		mutex_unlock(&ctx->call_controls_lock);
+		return -ECANCELED;
+	}
 	list_add_tail(&control->list, &ctx->call_controls);
 	mutex_unlock(&ctx->call_controls_lock);
-	if (!damon_is_running(ctx))
-		return damon_call_handle_inactive_ctx(ctx, control);
 	if (control->repeat)
 		return 0;
 	wait_for_completion(&control->completion);
@@ -2952,6 +2929,9 @@ static int kdamond_fn(void *data)
 
 	pr_debug("kdamond (%d) starts\n", current->pid);
 
+	mutex_lock(&ctx->call_controls_lock);
+	ctx->call_controls_obsolete = false;
+	mutex_unlock(&ctx->call_controls_lock);
 	complete(&ctx->kdamond_started);
 	kdamond_init_ctx(ctx);
 
@@ -3062,6 +3042,9 @@ done:
 	damon_destroy_targets(ctx);
 
 	kfree(ctx->regions_score_histogram);
+	mutex_lock(&ctx->call_controls_lock);
+	ctx->call_controls_obsolete = true;
+	mutex_unlock(&ctx->call_controls_lock);
 	kdamond_call(ctx, true);
 	damos_walk_cancel(ctx);
 
-- 
cgit v1.2.3


From 33c3f6c2b48cd84b441dba1ee3e62290e53930f4 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 27 Mar 2026 16:33:15 -0700
Subject: mm/damon/core: fix damos_walk() vs kdamond_fn() exit race

When kdamond_fn() main loop is finished, the function cancels remaining
damos_walk() request and unset the damon_ctx->kdamond so that API callers
and API functions themselves can show the context is terminated.
damos_walk() adds the caller's request to the queue first.  After that, it
shows if the kdamond of the damon_ctx is still running (damon_ctx->kdamond
is set).  Only if the kdamond is running, damos_walk() starts waiting for
the kdamond's handling of the newly added request.

The damos_walk() requests registration and damon_ctx->kdamond unset are
protected by different mutexes, though.  Hence, damos_walk() could race
with damon_ctx->kdamond unset, and result in deadlocks.

For example, let's suppose kdamond successfully finished the damow_walk()
request cancelling.  Right after that, damos_walk() is called for the
context.  It registers the new request, and shows the context is still
running, because damon_ctx->kdamond unset is not yet done.  Hence the
damos_walk() caller starts waiting for the handling of the request.
However, the kdamond is already on the termination steps, so it never
handles the new request.  As a result, the damos_walk() caller thread
infinitely waits.

Fix this by introducing another damon_ctx field, namely
walk_control_obsolete.  It is protected by the
damon_ctx->walk_control_lock, which protects damos_walk() request
registration.  Initialize (unset) it in kdamond_fn() before letting
damon_start() returns and set it just before the cancelling of the
remaining damos_walk() request is executed.  damos_walk() reads the
obsolete field under the lock and avoids adding a new request.

After this change, only requests that are guaranteed to be handled or
cancelled are registered.  Hence the after-registration DAMON context
termination check is no longer needed.  Remove it together.

The issue is found by sashiko [1].


Link: https://lore.kernel.org/20260327233319.3528-3-sj@kernel.org
Link: https://lore.kernel.org/20260325141956.87144-1-sj@kernel.org [1]
Fixes: bf0eaba0ff9c ("mm/damon/core: implement damos_walk()")
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: <stable@vger.kernel.org> # 6.14.x
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h |  1 +
 mm/damon/core.c       | 21 ++++++++++++++-------
 2 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 5129de70e7b7..f2cdb7c3f5e6 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -822,6 +822,7 @@ struct damon_ctx {
 	struct mutex call_controls_lock;
 
 	struct damos_walk_control *walk_control;
+	bool walk_control_obsolete;
 	struct mutex walk_control_lock;
 
 	/*
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 9bcda2765ac9..ddabb93f2377 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1637,6 +1637,10 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control)
  * passed at least one &damos->apply_interval_us, kdamond marks the request as
  * completed so that damos_walk() can wakeup and return.
  *
+ * Note that this function should be called only after damon_start() with the
+ * @ctx has succeeded.  Otherwise, this function could fall into an indefinite
+ * wait.
+ *
  * Return: 0 on success, negative error code otherwise.
  */
 int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control)
@@ -1644,19 +1648,16 @@ int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control)
 	init_completion(&control->completion);
 	control->canceled = false;
 	mutex_lock(&ctx->walk_control_lock);
+	if (ctx->walk_control_obsolete) {
+		mutex_unlock(&ctx->walk_control_lock);
+		return -ECANCELED;
+	}
 	if (ctx->walk_control) {
 		mutex_unlock(&ctx->walk_control_lock);
 		return -EBUSY;
 	}
 	ctx->walk_control = control;
 	mutex_unlock(&ctx->walk_control_lock);
-	if (!damon_is_running(ctx)) {
-		mutex_lock(&ctx->walk_control_lock);
-		if (ctx->walk_control == control)
-			ctx->walk_control = NULL;
-		mutex_unlock(&ctx->walk_control_lock);
-		return -EINVAL;
-	}
 	wait_for_completion(&control->completion);
 	if (control->canceled)
 		return -ECANCELED;
@@ -2932,6 +2933,9 @@ static int kdamond_fn(void *data)
 	mutex_lock(&ctx->call_controls_lock);
 	ctx->call_controls_obsolete = false;
 	mutex_unlock(&ctx->call_controls_lock);
+	mutex_lock(&ctx->walk_control_lock);
+	ctx->walk_control_obsolete = false;
+	mutex_unlock(&ctx->walk_control_lock);
 	complete(&ctx->kdamond_started);
 	kdamond_init_ctx(ctx);
 
@@ -3046,6 +3050,9 @@ done:
 	ctx->call_controls_obsolete = true;
 	mutex_unlock(&ctx->call_controls_lock);
 	kdamond_call(ctx, true);
+	mutex_lock(&ctx->walk_control_lock);
+	ctx->walk_control_obsolete = true;
+	mutex_unlock(&ctx->walk_control_lock);
 	damos_walk_cancel(ctx);
 
 	pr_debug("kdamond (%d) finishes\n", current->pid);
-- 
cgit v1.2.3


From a5bb8669872b6b8463b8777a7a259a8305060016 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Thu, 2 Apr 2026 07:11:47 +0300
Subject: userfaultfd: move vma_can_userfault out of line

vma_can_userfault() has grown pretty big and it's not called on
performance critical path.

Move it out of line.

No functional changes.

Link: https://lore.kernel.org/20260402041156.1377214-7-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: David Hildenbrand (Red Hat) <david@kernel.org>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrei Vagin <avagin@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nikita Kalyazin <kalyazin@amazon.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Carlier <devnexen@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/userfaultfd_k.h | 35 ++---------------------------------
 mm/userfaultfd.c              | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index d83e349900a3..ce0201c3dd82 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -211,39 +211,8 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
 	return vma->vm_flags & __VM_UFFD_FLAGS;
 }
 
-static inline bool vma_can_userfault(struct vm_area_struct *vma,
-				     vm_flags_t vm_flags,
-				     bool wp_async)
-{
-	vm_flags &= __VM_UFFD_FLAGS;
-
-	if (vma->vm_flags & VM_DROPPABLE)
-		return false;
-
-	if ((vm_flags & VM_UFFD_MINOR) &&
-	    (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma)))
-		return false;
-
-	/*
-	 * If wp async enabled, and WP is the only mode enabled, allow any
-	 * memory type.
-	 */
-	if (wp_async && (vm_flags == VM_UFFD_WP))
-		return true;
-
-	/*
-	 * If user requested uffd-wp but not enabled pte markers for
-	 * uffd-wp, then shmem & hugetlbfs are not supported but only
-	 * anonymous.
-	 */
-	if (!uffd_supports_wp_marker() && (vm_flags & VM_UFFD_WP) &&
-	    !vma_is_anonymous(vma))
-		return false;
-
-	/* By default, allow any of anon|shmem|hugetlb */
-	return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) ||
-	    vma_is_shmem(vma);
-}
+bool vma_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags,
+		       bool wp_async);
 
 static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
 {
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 4857be5a7fa2..ebdc6e24a2c7 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -2018,6 +2018,39 @@ out:
 	return moved ? moved : err;
 }
 
+bool vma_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags,
+		       bool wp_async)
+{
+	vm_flags &= __VM_UFFD_FLAGS;
+
+	if (vma->vm_flags & VM_DROPPABLE)
+		return false;
+
+	if ((vm_flags & VM_UFFD_MINOR) &&
+	    (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma)))
+		return false;
+
+	/*
+	 * If wp async enabled, and WP is the only mode enabled, allow any
+	 * memory type.
+	 */
+	if (wp_async && (vm_flags == VM_UFFD_WP))
+		return true;
+
+	/*
+	 * If user requested uffd-wp but not enabled pte markers for
+	 * uffd-wp, then shmem & hugetlbfs are not supported but only
+	 * anonymous.
+	 */
+	if (!uffd_supports_wp_marker() && (vm_flags & VM_UFFD_WP) &&
+	    !vma_is_anonymous(vma))
+		return false;
+
+	/* By default, allow any of anon|shmem|hugetlb */
+	return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) ||
+	    vma_is_shmem(vma);
+}
+
 static void userfaultfd_set_vm_flags(struct vm_area_struct *vma,
 				     vm_flags_t vm_flags)
 {
-- 
cgit v1.2.3


From 0f48947c4232c934885711dde0b49066f9d8ee87 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Thu, 2 Apr 2026 07:11:48 +0300
Subject: userfaultfd: introduce vm_uffd_ops

Current userfaultfd implementation works only with memory managed by core
MM: anonymous, shmem and hugetlb.

First, there is no fundamental reason to limit userfaultfd support only to
the core memory types and userfaults can be handled similarly to regular
page faults provided a VMA owner implements appropriate callbacks.

Second, historically various code paths were conditioned on
vma_is_anonymous(), vma_is_shmem() and is_vm_hugetlb_page() and some of
these conditions can be expressed as operations implemented by a
particular memory type.

Introduce vm_uffd_ops extension to vm_operations_struct that will delegate
memory type specific operations to a VMA owner.

Operations for anonymous memory are handled internally in userfaultfd
using anon_uffd_ops that implicitly assigned to anonymous VMAs.

Start with a single operation, ->can_userfault() that will verify that a
VMA meets requirements for userfaultfd support at registration time.

Implement that method for anonymous, shmem and hugetlb and move relevant
parts of vma_can_userfault() into the new callbacks.

[rppt@kernel.org: relocate VM_DROPPABLE test, per Tal]
  Link: https://lore.kernel.org/adffgfM5ANxtPIEF@kernel.org
Link: https://lore.kernel.org/20260402041156.1377214-8-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrei Vagin <avagin@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand (Arm) <david@kernel.org>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nikita Kalyazin <kalyazin@amazon.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Carlier <devnexen@gmail.com>
Cc: Tal Zussman <tz2294@columbia.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h            |  5 +++++
 include/linux/userfaultfd_k.h |  6 ++++++
 mm/hugetlb.c                  | 15 +++++++++++++++
 mm/shmem.c                    | 15 +++++++++++++++
 mm/userfaultfd.c              | 38 ++++++++++++++++++++++++++++----------
 5 files changed, 69 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8260e28205e9..633bbf9a184a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -758,6 +758,8 @@ struct vm_fault {
 					 */
 };
 
+struct vm_uffd_ops;
+
 /*
  * These are the virtual MM functions - opening of an area, closing and
  * unmapping it (needed to keep files on disk up-to-date etc), pointer
@@ -865,6 +867,9 @@ struct vm_operations_struct {
 	struct page *(*find_normal_page)(struct vm_area_struct *vma,
 					 unsigned long addr);
 #endif /* CONFIG_FIND_NORMAL_PAGE */
+#ifdef CONFIG_USERFAULTFD
+	const struct vm_uffd_ops *uffd_ops;
+#endif
 };
 
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index ce0201c3dd82..6d445dbfe8ff 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -83,6 +83,12 @@ struct userfaultfd_ctx {
 
 extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
 
+/* VMA userfaultfd operations */
+struct vm_uffd_ops {
+	/* Checks if a VMA can support userfaultfd */
+	bool (*can_userfault)(struct vm_area_struct *vma, vm_flags_t vm_flags);
+};
+
 /* A combined operation mode + behavior flags. */
 typedef unsigned int __bitwise uffd_flags_t;
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a786034ac95c..88009cd2a846 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4792,6 +4792,18 @@ static vm_fault_t hugetlb_vm_op_fault(struct vm_fault *vmf)
 	return 0;
 }
 
+#ifdef CONFIG_USERFAULTFD
+static bool hugetlb_can_userfault(struct vm_area_struct *vma,
+				  vm_flags_t vm_flags)
+{
+	return true;
+}
+
+static const struct vm_uffd_ops hugetlb_uffd_ops = {
+	.can_userfault = hugetlb_can_userfault,
+};
+#endif
+
 /*
  * When a new function is introduced to vm_operations_struct and added
  * to hugetlb_vm_ops, please consider adding the function to shm_vm_ops.
@@ -4805,6 +4817,9 @@ const struct vm_operations_struct hugetlb_vm_ops = {
 	.close = hugetlb_vm_op_close,
 	.may_split = hugetlb_vm_op_split,
 	.pagesize = hugetlb_vm_op_pagesize,
+#ifdef CONFIG_USERFAULTFD
+	.uffd_ops = &hugetlb_uffd_ops,
+#endif
 };
 
 static pte_t make_huge_pte(struct vm_area_struct *vma, struct folio *folio,
diff --git a/mm/shmem.c b/mm/shmem.c
index 6fa1e8340c93..389b2d76396e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3288,6 +3288,15 @@ out_unacct_blocks:
 	shmem_inode_unacct_blocks(inode, 1);
 	return ret;
 }
+
+static bool shmem_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags)
+{
+	return true;
+}
+
+static const struct vm_uffd_ops shmem_uffd_ops = {
+	.can_userfault	= shmem_can_userfault,
+};
 #endif /* CONFIG_USERFAULTFD */
 
 #ifdef CONFIG_TMPFS
@@ -5307,6 +5316,9 @@ static const struct vm_operations_struct shmem_vm_ops = {
 	.set_policy     = shmem_set_policy,
 	.get_policy     = shmem_get_policy,
 #endif
+#ifdef CONFIG_USERFAULTFD
+	.uffd_ops	= &shmem_uffd_ops,
+#endif
 };
 
 static const struct vm_operations_struct shmem_anon_vm_ops = {
@@ -5316,6 +5328,9 @@ static const struct vm_operations_struct shmem_anon_vm_ops = {
 	.set_policy     = shmem_set_policy,
 	.get_policy     = shmem_get_policy,
 #endif
+#ifdef CONFIG_USERFAULTFD
+	.uffd_ops	= &shmem_uffd_ops,
+#endif
 };
 
 int shmem_init_fs_context(struct fs_context *fc)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index ebdc6e24a2c7..3a824e034a09 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -34,6 +34,25 @@ struct mfill_state {
 	pmd_t *pmd;
 };
 
+static bool anon_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags)
+{
+	/* anonymous memory does not support MINOR mode */
+	if (vm_flags & VM_UFFD_MINOR)
+		return false;
+	return true;
+}
+
+static const struct vm_uffd_ops anon_uffd_ops = {
+	.can_userfault	= anon_can_userfault,
+};
+
+static const struct vm_uffd_ops *vma_uffd_ops(struct vm_area_struct *vma)
+{
+	if (vma_is_anonymous(vma))
+		return &anon_uffd_ops;
+	return vma->vm_ops ? vma->vm_ops->uffd_ops : NULL;
+}
+
 static __always_inline
 bool validate_dst_vma(struct vm_area_struct *dst_vma, unsigned long dst_end)
 {
@@ -2021,34 +2040,33 @@ out:
 bool vma_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags,
 		       bool wp_async)
 {
-	vm_flags &= __VM_UFFD_FLAGS;
+	const struct vm_uffd_ops *ops = vma_uffd_ops(vma);
 
 	if (vma->vm_flags & VM_DROPPABLE)
 		return false;
 
-	if ((vm_flags & VM_UFFD_MINOR) &&
-	    (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma)))
-		return false;
+	vm_flags &= __VM_UFFD_FLAGS;
 
 	/*
-	 * If wp async enabled, and WP is the only mode enabled, allow any
+	 * If WP is the only mode enabled and context is wp async, allow any
 	 * memory type.
 	 */
 	if (wp_async && (vm_flags == VM_UFFD_WP))
 		return true;
 
+	/* For any other mode reject VMAs that don't implement vm_uffd_ops */
+	if (!ops)
+		return false;
+
 	/*
 	 * If user requested uffd-wp but not enabled pte markers for
-	 * uffd-wp, then shmem & hugetlbfs are not supported but only
-	 * anonymous.
+	 * uffd-wp, then only anonymous memory is supported
 	 */
 	if (!uffd_supports_wp_marker() && (vm_flags & VM_UFFD_WP) &&
 	    !vma_is_anonymous(vma))
 		return false;
 
-	/* By default, allow any of anon|shmem|hugetlb */
-	return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) ||
-	    vma_is_shmem(vma);
+	return ops->can_userfault(vma, vm_flags);
 }
 
 static void userfaultfd_set_vm_flags(struct vm_area_struct *vma,
-- 
cgit v1.2.3


From dfc4d771820a171bd701d06252fcf920d0ede25c Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Thu, 2 Apr 2026 07:11:49 +0300
Subject: shmem, userfaultfd: use a VMA callback to handle UFFDIO_CONTINUE

When userspace resolves a page fault in a shmem VMA with UFFDIO_CONTINUE
it needs to get a folio that already exists in the pagecache backing that
VMA.

Instead of using shmem_get_folio() for that, add a get_folio_noalloc()
method to 'struct vm_uffd_ops' that will return a folio if it exists in
the VMA's pagecache at given pgoff.

Implement get_folio_noalloc() method for shmem and slightly refactor
userfaultfd's mfill_get_vma() and mfill_atomic_pte_continue() to support
this new API.

Link: https://lore.kernel.org/20260402041156.1377214-9-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: James Houghton <jthoughton@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrei Vagin <avagin@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand (Arm) <david@kernel.org>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nikita Kalyazin <kalyazin@amazon.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Carlier <devnexen@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/userfaultfd_k.h |  7 +++++++
 mm/shmem.c                    | 15 ++++++++++++++-
 mm/userfaultfd.c              | 34 ++++++++++++++++++----------------
 3 files changed, 39 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 6d445dbfe8ff..4bda632dae88 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -87,6 +87,13 @@ extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
 struct vm_uffd_ops {
 	/* Checks if a VMA can support userfaultfd */
 	bool (*can_userfault)(struct vm_area_struct *vma, vm_flags_t vm_flags);
+	/*
+	 * Called to resolve UFFDIO_CONTINUE request.
+	 * Should return the folio found at pgoff in the VMA's pagecache if it
+	 * exists or ERR_PTR otherwise.
+	 * The returned folio is locked and with reference held.
+	 */
+	struct folio *(*get_folio_noalloc)(struct inode *inode, pgoff_t pgoff);
 };
 
 /* A combined operation mode + behavior flags. */
diff --git a/mm/shmem.c b/mm/shmem.c
index 389b2d76396e..ed07d0c03312 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3289,13 +3289,26 @@ out_unacct_blocks:
 	return ret;
 }
 
+static struct folio *shmem_get_folio_noalloc(struct inode *inode, pgoff_t pgoff)
+{
+	struct folio *folio;
+	int err;
+
+	err = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC);
+	if (err)
+		return ERR_PTR(err);
+
+	return folio;
+}
+
 static bool shmem_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags)
 {
 	return true;
 }
 
 static const struct vm_uffd_ops shmem_uffd_ops = {
-	.can_userfault	= shmem_can_userfault,
+	.can_userfault		= shmem_can_userfault,
+	.get_folio_noalloc	= shmem_get_folio_noalloc,
 };
 #endif /* CONFIG_USERFAULTFD */
 
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 3a824e034a09..5b204c3ec986 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -191,6 +191,7 @@ static int mfill_get_vma(struct mfill_state *state)
 	struct userfaultfd_ctx *ctx = state->ctx;
 	uffd_flags_t flags = state->flags;
 	struct vm_area_struct *dst_vma;
+	const struct vm_uffd_ops *ops;
 	int err;
 
 	/*
@@ -232,10 +233,12 @@ static int mfill_get_vma(struct mfill_state *state)
 	if (is_vm_hugetlb_page(dst_vma))
 		return 0;
 
-	if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
+	ops = vma_uffd_ops(dst_vma);
+	if (!ops)
 		goto out_unlock;
-	if (!vma_is_shmem(dst_vma) &&
-	    uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
+
+	if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE) &&
+	    !ops->get_folio_noalloc)
 		goto out_unlock;
 
 	return 0;
@@ -575,6 +578,7 @@ out:
 static int mfill_atomic_pte_continue(struct mfill_state *state)
 {
 	struct vm_area_struct *dst_vma = state->vma;
+	const struct vm_uffd_ops *ops = vma_uffd_ops(dst_vma);
 	unsigned long dst_addr = state->dst_addr;
 	pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
 	struct inode *inode = file_inode(dst_vma->vm_file);
@@ -584,17 +588,16 @@ static int mfill_atomic_pte_continue(struct mfill_state *state)
 	struct page *page;
 	int ret;
 
-	ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC);
-	/* Our caller expects us to return -EFAULT if we failed to find folio */
-	if (ret == -ENOENT)
-		ret = -EFAULT;
-	if (ret)
-		goto out;
-	if (!folio) {
-		ret = -EFAULT;
-		goto out;
+	if (!ops) {
+		VM_WARN_ONCE(1, "UFFDIO_CONTINUE for unsupported VMA");
+		return -EOPNOTSUPP;
 	}
 
+	folio = ops->get_folio_noalloc(inode, pgoff);
+	/* Our caller expects us to return -EFAULT if we failed to find folio */
+	if (IS_ERR_OR_NULL(folio))
+		return -EFAULT;
+
 	page = folio_file_page(folio, pgoff);
 	if (PageHWPoison(page)) {
 		ret = -EIO;
@@ -607,13 +610,12 @@ static int mfill_atomic_pte_continue(struct mfill_state *state)
 		goto out_release;
 
 	folio_unlock(folio);
-	ret = 0;
-out:
-	return ret;
+	return 0;
+
 out_release:
 	folio_unlock(folio);
 	folio_put(folio);
-	goto out;
+	return ret;
 }
 
 /* Handles UFFDIO_POISON for all non-hugetlb VMAs. */
-- 
cgit v1.2.3


From ad9ac3081332e955bc4b513018a1e0e86683bfb5 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Thu, 2 Apr 2026 07:11:50 +0300
Subject: userfaultfd: introduce vm_uffd_ops->alloc_folio()

and use it to refactor mfill_atomic_pte_zeroed_folio() and
mfill_atomic_pte_copy().

mfill_atomic_pte_zeroed_folio() and mfill_atomic_pte_copy() perform
almost identical actions:
* allocate a folio
* update folio contents (either copy from userspace of fill with zeros)
* update page tables with the new folio

Split a __mfill_atomic_pte() helper that handles both cases and uses newly
introduced vm_uffd_ops->alloc_folio() to allocate the folio.

Pass the ops structure from the callers to __mfill_atomic_pte() to later
allow using anon_uffd_ops for MAP_PRIVATE mappings of file-backed VMAs.

Note, that the new ops method is called alloc_folio() rather than
folio_alloc() to avoid clash with alloc_tag macro folio_alloc().

Link: https://lore.kernel.org/20260402041156.1377214-10-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: James Houghton <jthoughton@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrei Vagin <avagin@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand (Arm) <david@kernel.org>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nikita Kalyazin <kalyazin@amazon.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Carlier <devnexen@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/userfaultfd_k.h |  6 +++
 mm/userfaultfd.c              | 92 ++++++++++++++++++++++---------------------
 2 files changed, 54 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 4bda632dae88..0f508c752741 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -94,6 +94,12 @@ struct vm_uffd_ops {
 	 * The returned folio is locked and with reference held.
 	 */
 	struct folio *(*get_folio_noalloc)(struct inode *inode, pgoff_t pgoff);
+	/*
+	 * Called during resolution of UFFDIO_COPY request.
+	 * Should allocate and return a folio or NULL if allocation fails.
+	 */
+	struct folio *(*alloc_folio)(struct vm_area_struct *vma,
+				     unsigned long addr);
 };
 
 /* A combined operation mode + behavior flags. */
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5b204c3ec986..dd191703b320 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -42,8 +42,26 @@ static bool anon_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags)
 	return true;
 }
 
+static struct folio *anon_alloc_folio(struct vm_area_struct *vma,
+				      unsigned long addr)
+{
+	struct folio *folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma,
+					      addr);
+
+	if (!folio)
+		return NULL;
+
+	if (mem_cgroup_charge(folio, vma->vm_mm, GFP_KERNEL)) {
+		folio_put(folio);
+		return NULL;
+	}
+
+	return folio;
+}
+
 static const struct vm_uffd_ops anon_uffd_ops = {
 	.can_userfault	= anon_can_userfault,
+	.alloc_folio	= anon_alloc_folio,
 };
 
 static const struct vm_uffd_ops *vma_uffd_ops(struct vm_area_struct *vma)
@@ -456,7 +474,8 @@ static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio
 	return 0;
 }
 
-static int mfill_atomic_pte_copy(struct mfill_state *state)
+static int __mfill_atomic_pte(struct mfill_state *state,
+			      const struct vm_uffd_ops *ops)
 {
 	unsigned long dst_addr = state->dst_addr;
 	unsigned long src_addr = state->src_addr;
@@ -464,16 +483,12 @@ static int mfill_atomic_pte_copy(struct mfill_state *state)
 	struct folio *folio;
 	int ret;
 
-	folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, state->vma, dst_addr);
+	folio = ops->alloc_folio(state->vma, state->dst_addr);
 	if (!folio)
 		return -ENOMEM;
 
-	ret = -ENOMEM;
-	if (mem_cgroup_charge(folio, state->vma->vm_mm, GFP_KERNEL))
-		goto out_release;
-
-	ret = mfill_copy_folio_locked(folio, src_addr);
-	if (unlikely(ret)) {
+	if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) {
+		ret = mfill_copy_folio_locked(folio, src_addr);
 		/*
 		 * Fallback to copy_from_user outside mmap_lock.
 		 * If retry is successful, mfill_copy_folio_locked() returns
@@ -481,9 +496,15 @@ static int mfill_atomic_pte_copy(struct mfill_state *state)
 		 * If there was an error, we must mfill_put_vma() anyway and it
 		 * will take care of unlocking if needed.
 		 */
-		ret = mfill_copy_folio_retry(state, folio);
-		if (ret)
-			goto out_release;
+		if (unlikely(ret)) {
+			ret = mfill_copy_folio_retry(state, folio);
+			if (ret)
+				goto err_folio_put;
+		}
+	} else if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE)) {
+		clear_user_highpage(&folio->page, state->dst_addr);
+	} else {
+		VM_WARN_ONCE(1, "Unknown UFFDIO operation, flags: %x", flags);
 	}
 
 	/*
@@ -496,47 +517,30 @@ static int mfill_atomic_pte_copy(struct mfill_state *state)
 	ret = mfill_atomic_install_pte(state->pmd, state->vma, dst_addr,
 				       &folio->page, true, flags);
 	if (ret)
-		goto out_release;
-out:
-	return ret;
-out_release:
+		goto err_folio_put;
+
+	return 0;
+
+err_folio_put:
+	folio_put(folio);
 	/* Don't return -ENOENT so that our caller won't retry */
 	if (ret == -ENOENT)
 		ret = -EFAULT;
-	folio_put(folio);
-	goto out;
+	return ret;
 }
 
-static int mfill_atomic_pte_zeroed_folio(pmd_t *dst_pmd,
-					 struct vm_area_struct *dst_vma,
-					 unsigned long dst_addr)
+static int mfill_atomic_pte_copy(struct mfill_state *state)
 {
-	struct folio *folio;
-	int ret = -ENOMEM;
-
-	folio = vma_alloc_zeroed_movable_folio(dst_vma, dst_addr);
-	if (!folio)
-		return ret;
-
-	if (mem_cgroup_charge(folio, dst_vma->vm_mm, GFP_KERNEL))
-		goto out_put;
+	const struct vm_uffd_ops *ops = vma_uffd_ops(state->vma);
 
-	/*
-	 * The memory barrier inside __folio_mark_uptodate makes sure that
-	 * zeroing out the folio become visible before mapping the page
-	 * using set_pte_at(). See do_anonymous_page().
-	 */
-	__folio_mark_uptodate(folio);
+	return __mfill_atomic_pte(state, ops);
+}
 
-	ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr,
-				       &folio->page, true, 0);
-	if (ret)
-		goto out_put;
+static int mfill_atomic_pte_zeroed_folio(struct mfill_state *state)
+{
+	const struct vm_uffd_ops *ops = vma_uffd_ops(state->vma);
 
-	return 0;
-out_put:
-	folio_put(folio);
-	return ret;
+	return __mfill_atomic_pte(state, ops);
 }
 
 static int mfill_atomic_pte_zeropage(struct mfill_state *state)
@@ -549,7 +553,7 @@ static int mfill_atomic_pte_zeropage(struct mfill_state *state)
 	int ret;
 
 	if (mm_forbids_zeropage(dst_vma->vm_mm))
-		return mfill_atomic_pte_zeroed_folio(dst_pmd, dst_vma, dst_addr);
+		return mfill_atomic_pte_zeroed_folio(state);
 
 	_dst_pte = pte_mkspecial(pfn_pte(zero_pfn(dst_addr),
 					 dst_vma->vm_page_prot));
-- 
cgit v1.2.3


From f74991b4e3836dd38f3adb41b146994b283942a1 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Thu, 2 Apr 2026 07:11:51 +0300
Subject: shmem, userfaultfd: implement shmem uffd operations using vm_uffd_ops

Add filemap_add() and filemap_remove() methods to vm_uffd_ops and use them
in __mfill_atomic_pte() to add shmem folios to page cache and remove them
in case of error.

Implement these methods in shmem along with vm_uffd_ops->alloc_folio() and
drop shmem_mfill_atomic_pte().

Since userfaultfd now does not reference any functions from shmem, drop
include if linux/shmem_fs.h from mm/userfaultfd.c

mfill_atomic_install_pte() is not used anywhere outside of mm/userfaultfd,
make it static.

Link: https://lore.kernel.org/20260402041156.1377214-11-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: James Houghton <jthoughton@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrei Vagin <avagin@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand (Arm) <david@kernel.org>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nikita Kalyazin <kalyazin@amazon.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Carlier <devnexen@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/shmem_fs.h      |  14 ----
 include/linux/userfaultfd_k.h |  19 ++++--
 mm/shmem.c                    | 148 +++++++++++++++---------------------------
 mm/userfaultfd.c              |  80 +++++++++++------------
 4 files changed, 106 insertions(+), 155 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index a8273b32e041..1a345142af7d 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -221,20 +221,6 @@ static inline pgoff_t shmem_fallocend(struct inode *inode, pgoff_t eof)
 
 extern bool shmem_charge(struct inode *inode, long pages);
 
-#ifdef CONFIG_USERFAULTFD
-#ifdef CONFIG_SHMEM
-extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
-				  struct vm_area_struct *dst_vma,
-				  unsigned long dst_addr,
-				  unsigned long src_addr,
-				  uffd_flags_t flags,
-				  struct folio **foliop);
-#else /* !CONFIG_SHMEM */
-#define shmem_mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, \
-			       src_addr, flags, foliop) ({ BUG(); 0; })
-#endif /* CONFIG_SHMEM */
-#endif /* CONFIG_USERFAULTFD */
-
 /*
  * Used space is stored as unsigned 64-bit value in bytes but
  * quota core supports only signed 64-bit values so use that
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 0f508c752741..d2920f98ab86 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -100,6 +100,20 @@ struct vm_uffd_ops {
 	 */
 	struct folio *(*alloc_folio)(struct vm_area_struct *vma,
 				     unsigned long addr);
+	/*
+	 * Called during resolution of UFFDIO_COPY request.
+	 * Should only be called with a folio returned by alloc_folio() above.
+	 * The folio will be set to locked.
+	 * Returns 0 on success, error code on failure.
+	 */
+	int (*filemap_add)(struct folio *folio, struct vm_area_struct *vma,
+			 unsigned long addr);
+	/*
+	 * Called during resolution of UFFDIO_COPY request on the error
+	 * handling path.
+	 * Should revert the operation of ->filemap_add().
+	 */
+	void (*filemap_remove)(struct folio *folio, struct vm_area_struct *vma);
 };
 
 /* A combined operation mode + behavior flags. */
@@ -133,11 +147,6 @@ static inline uffd_flags_t uffd_flags_set_mode(uffd_flags_t flags, enum mfill_at
 /* Flags controlling behavior. These behavior changes are mode-independent. */
 #define MFILL_ATOMIC_WP MFILL_ATOMIC_FLAG(0)
 
-extern int mfill_atomic_install_pte(pmd_t *dst_pmd,
-				    struct vm_area_struct *dst_vma,
-				    unsigned long dst_addr, struct page *page,
-				    bool newly_allocated, uffd_flags_t flags);
-
 extern ssize_t mfill_atomic_copy(struct userfaultfd_ctx *ctx, unsigned long dst_start,
 				 unsigned long src_start, unsigned long len,
 				 uffd_flags_t flags);
diff --git a/mm/shmem.c b/mm/shmem.c
index ed07d0c03312..5aa43657886c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3175,118 +3175,73 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap,
 #endif /* CONFIG_TMPFS_QUOTA */
 
 #ifdef CONFIG_USERFAULTFD
-int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
-			   struct vm_area_struct *dst_vma,
-			   unsigned long dst_addr,
-			   unsigned long src_addr,
-			   uffd_flags_t flags,
-			   struct folio **foliop)
-{
-	struct inode *inode = file_inode(dst_vma->vm_file);
-	struct shmem_inode_info *info = SHMEM_I(inode);
+static struct folio *shmem_mfill_folio_alloc(struct vm_area_struct *vma,
+					     unsigned long addr)
+{
+	struct inode *inode = file_inode(vma->vm_file);
 	struct address_space *mapping = inode->i_mapping;
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	pgoff_t pgoff = linear_page_index(vma, addr);
 	gfp_t gfp = mapping_gfp_mask(mapping);
-	pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
-	void *page_kaddr;
 	struct folio *folio;
-	int ret;
-	pgoff_t max_off;
-
-	if (shmem_inode_acct_blocks(inode, 1)) {
-		/*
-		 * We may have got a page, returned -ENOENT triggering a retry,
-		 * and now we find ourselves with -ENOMEM. Release the page, to
-		 * avoid a BUG_ON in our caller.
-		 */
-		if (unlikely(*foliop)) {
-			folio_put(*foliop);
-			*foliop = NULL;
-		}
-		return -ENOMEM;
-	}
 
-	if (!*foliop) {
-		ret = -ENOMEM;
-		folio = shmem_alloc_folio(gfp, 0, info, pgoff);
-		if (!folio)
-			goto out_unacct_blocks;
+	if (unlikely(pgoff >= DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)))
+		return NULL;
 
-		if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) {
-			page_kaddr = kmap_local_folio(folio, 0);
-			/*
-			 * The read mmap_lock is held here.  Despite the
-			 * mmap_lock being read recursive a deadlock is still
-			 * possible if a writer has taken a lock.  For example:
-			 *
-			 * process A thread 1 takes read lock on own mmap_lock
-			 * process A thread 2 calls mmap, blocks taking write lock
-			 * process B thread 1 takes page fault, read lock on own mmap lock
-			 * process B thread 2 calls mmap, blocks taking write lock
-			 * process A thread 1 blocks taking read lock on process B
-			 * process B thread 1 blocks taking read lock on process A
-			 *
-			 * Disable page faults to prevent potential deadlock
-			 * and retry the copy outside the mmap_lock.
-			 */
-			pagefault_disable();
-			ret = copy_from_user(page_kaddr,
-					     (const void __user *)src_addr,
-					     PAGE_SIZE);
-			pagefault_enable();
-			kunmap_local(page_kaddr);
-
-			/* fallback to copy_from_user outside mmap_lock */
-			if (unlikely(ret)) {
-				*foliop = folio;
-				ret = -ENOENT;
-				/* don't free the page */
-				goto out_unacct_blocks;
-			}
+	folio = shmem_alloc_folio(gfp, 0, info, pgoff);
+	if (!folio)
+		return NULL;
 
-			flush_dcache_folio(folio);
-		} else {		/* ZEROPAGE */
-			clear_user_highpage(&folio->page, dst_addr);
-		}
-	} else {
-		folio = *foliop;
-		VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
-		*foliop = NULL;
+	if (mem_cgroup_charge(folio, vma->vm_mm, GFP_KERNEL)) {
+		folio_put(folio);
+		return NULL;
 	}
 
-	VM_BUG_ON(folio_test_locked(folio));
-	VM_BUG_ON(folio_test_swapbacked(folio));
+	return folio;
+}
+
+static int shmem_mfill_filemap_add(struct folio *folio,
+				   struct vm_area_struct *vma,
+				   unsigned long addr)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+	struct address_space *mapping = inode->i_mapping;
+	pgoff_t pgoff = linear_page_index(vma, addr);
+	gfp_t gfp = mapping_gfp_mask(mapping);
+	int err;
+
 	__folio_set_locked(folio);
 	__folio_set_swapbacked(folio);
-	__folio_mark_uptodate(folio);
-
-	ret = -EFAULT;
-	max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
-	if (unlikely(pgoff >= max_off))
-		goto out_release;
 
-	ret = mem_cgroup_charge(folio, dst_vma->vm_mm, gfp);
-	if (ret)
-		goto out_release;
-	ret = shmem_add_to_page_cache(folio, mapping, pgoff, NULL, gfp);
-	if (ret)
-		goto out_release;
+	err = shmem_add_to_page_cache(folio, mapping, pgoff, NULL, gfp);
+	if (err)
+		goto err_unlock;
 
-	ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr,
-				       &folio->page, true, flags);
-	if (ret)
-		goto out_delete_from_cache;
+	if (shmem_inode_acct_blocks(inode, 1)) {
+		err = -ENOMEM;
+		goto err_delete_from_cache;
+	}
 
+	folio_add_lru(folio);
 	shmem_recalc_inode(inode, 1, 0);
-	folio_unlock(folio);
+
 	return 0;
-out_delete_from_cache:
+
+err_delete_from_cache:
 	filemap_remove_folio(folio);
-out_release:
+err_unlock:
+	folio_unlock(folio);
+	return err;
+}
+
+static void shmem_mfill_filemap_remove(struct folio *folio,
+				       struct vm_area_struct *vma)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+
+	filemap_remove_folio(folio);
+	shmem_recalc_inode(inode, 0, 0);
 	folio_unlock(folio);
-	folio_put(folio);
-out_unacct_blocks:
-	shmem_inode_unacct_blocks(inode, 1);
-	return ret;
 }
 
 static struct folio *shmem_get_folio_noalloc(struct inode *inode, pgoff_t pgoff)
@@ -3309,6 +3264,9 @@ static bool shmem_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags)
 static const struct vm_uffd_ops shmem_uffd_ops = {
 	.can_userfault		= shmem_can_userfault,
 	.get_folio_noalloc	= shmem_get_folio_noalloc,
+	.alloc_folio		= shmem_mfill_folio_alloc,
+	.filemap_add		= shmem_mfill_filemap_add,
+	.filemap_remove		= shmem_mfill_filemap_remove,
 };
 #endif /* CONFIG_USERFAULTFD */
 
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index dd191703b320..8a023d9326c2 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -14,7 +14,6 @@
 #include <linux/userfaultfd_k.h>
 #include <linux/mmu_notifier.h>
 #include <linux/hugetlb.h>
-#include <linux/shmem_fs.h>
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 #include "internal.h"
@@ -338,10 +337,10 @@ static bool mfill_file_over_size(struct vm_area_struct *dst_vma,
  * This function handles both MCOPY_ATOMIC_NORMAL and _CONTINUE for both shmem
  * and anon, and for both shared and private VMAs.
  */
-int mfill_atomic_install_pte(pmd_t *dst_pmd,
-			     struct vm_area_struct *dst_vma,
-			     unsigned long dst_addr, struct page *page,
-			     bool newly_allocated, uffd_flags_t flags)
+static int mfill_atomic_install_pte(pmd_t *dst_pmd,
+				    struct vm_area_struct *dst_vma,
+				    unsigned long dst_addr, struct page *page,
+				    uffd_flags_t flags)
 {
 	int ret;
 	struct mm_struct *dst_mm = dst_vma->vm_mm;
@@ -385,9 +384,6 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
 		goto out_unlock;
 
 	if (page_in_cache) {
-		/* Usually, cache pages are already added to LRU */
-		if (newly_allocated)
-			folio_add_lru(folio);
 		folio_add_file_rmap_pte(folio, page, dst_vma);
 	} else {
 		folio_add_new_anon_rmap(folio, dst_vma, dst_addr, RMAP_EXCLUSIVE);
@@ -402,6 +398,9 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
 
 	set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
 
+	if (page_in_cache)
+		folio_unlock(folio);
+
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(dst_vma, dst_addr, dst_pte);
 	ret = 0;
@@ -514,13 +513,22 @@ static int __mfill_atomic_pte(struct mfill_state *state,
 	 */
 	__folio_mark_uptodate(folio);
 
+	if (ops->filemap_add) {
+		ret = ops->filemap_add(folio, state->vma, state->dst_addr);
+		if (ret)
+			goto err_folio_put;
+	}
+
 	ret = mfill_atomic_install_pte(state->pmd, state->vma, dst_addr,
-				       &folio->page, true, flags);
+				       &folio->page, flags);
 	if (ret)
-		goto err_folio_put;
+		goto err_filemap_remove;
 
 	return 0;
 
+err_filemap_remove:
+	if (ops->filemap_remove)
+		ops->filemap_remove(folio, state->vma);
 err_folio_put:
 	folio_put(folio);
 	/* Don't return -ENOENT so that our caller won't retry */
@@ -533,6 +541,18 @@ static int mfill_atomic_pte_copy(struct mfill_state *state)
 {
 	const struct vm_uffd_ops *ops = vma_uffd_ops(state->vma);
 
+	/*
+	 * The normal page fault path for a MAP_PRIVATE mapping in a
+	 * file-backed VMA will invoke the fault, fill the hole in the file and
+	 * COW it right away. The result generates plain anonymous memory.
+	 * So when we are asked to fill a hole in a MAP_PRIVATE mapping, we'll
+	 * generate anonymous memory directly without actually filling the
+	 * hole. For the MAP_PRIVATE case the robustness check only happens in
+	 * the pagetable (to verify it's still none) and not in the page cache.
+	 */
+	if (!(state->vma->vm_flags & VM_SHARED))
+		ops = &anon_uffd_ops;
+
 	return __mfill_atomic_pte(state, ops);
 }
 
@@ -552,7 +572,8 @@ static int mfill_atomic_pte_zeropage(struct mfill_state *state)
 	spinlock_t *ptl;
 	int ret;
 
-	if (mm_forbids_zeropage(dst_vma->vm_mm))
+	if (mm_forbids_zeropage(dst_vma->vm_mm) ||
+	    (dst_vma->vm_flags & VM_SHARED))
 		return mfill_atomic_pte_zeroed_folio(state);
 
 	_dst_pte = pte_mkspecial(pfn_pte(zero_pfn(dst_addr),
@@ -609,11 +630,10 @@ static int mfill_atomic_pte_continue(struct mfill_state *state)
 	}
 
 	ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr,
-				       page, false, flags);
+				       page, flags);
 	if (ret)
 		goto out_release;
 
-	folio_unlock(folio);
 	return 0;
 
 out_release:
@@ -836,41 +856,19 @@ extern ssize_t mfill_atomic_hugetlb(struct userfaultfd_ctx *ctx,
 
 static __always_inline ssize_t mfill_atomic_pte(struct mfill_state *state)
 {
-	struct vm_area_struct *dst_vma = state->vma;
-	unsigned long src_addr = state->src_addr;
-	unsigned long dst_addr = state->dst_addr;
-	struct folio **foliop = &state->folio;
 	uffd_flags_t flags = state->flags;
-	pmd_t *dst_pmd = state->pmd;
-	ssize_t err;
 
 	if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
 		return mfill_atomic_pte_continue(state);
 	if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON))
 		return mfill_atomic_pte_poison(state);
+	if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY))
+		return mfill_atomic_pte_copy(state);
+	if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE))
+		return mfill_atomic_pte_zeropage(state);
 
-	/*
-	 * The normal page fault path for a shmem will invoke the
-	 * fault, fill the hole in the file and COW it right away. The
-	 * result generates plain anonymous memory. So when we are
-	 * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll
-	 * generate anonymous memory directly without actually filling
-	 * the hole. For the MAP_PRIVATE case the robustness check
-	 * only happens in the pagetable (to verify it's still none)
-	 * and not in the radix tree.
-	 */
-	if (!(dst_vma->vm_flags & VM_SHARED)) {
-		if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY))
-			err = mfill_atomic_pte_copy(state);
-		else
-			err = mfill_atomic_pte_zeropage(state);
-	} else {
-		err = shmem_mfill_atomic_pte(dst_pmd, dst_vma,
-					     dst_addr, src_addr,
-					     flags, foliop);
-	}
-
-	return err;
+	VM_WARN_ONCE(1, "Unknown UFFDIO operation, flags: %x", flags);
+	return -EOPNOTSUPP;
 }
 
 static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
-- 
cgit v1.2.3


From 77c368f057e17b59b23899a1907ee9d4f4d7a532 Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Thu, 2 Apr 2026 18:23:20 +0800
Subject: mm/sparse: fix comment for section map alignment

The comment in mmzone.h currently details exhaustive per-architecture
bit-width lists and explains alignment using min(PAGE_SHIFT,
PFN_SECTION_SHIFT).  Such details risk falling out of date over time and
may inadvertently be left un-updated.

We always expect a single section to cover full pages.  Therefore, we can
safely assume that PFN_SECTION_SHIFT is large enough to accommodate
SECTION_MAP_LAST_BIT.  We use BUILD_BUG_ON() to ensure this.

Update the comment to accurately reflect this consensus, making it clear
that we rely on a single section covering full pages.

Link: https://lore.kernel.org/20260402102320.3617578-1-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Petr Tesarik <ptesarik@suse.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 20f920dede65..07f501a62d67 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -2068,21 +2068,16 @@ static inline struct mem_section *__nr_to_section(unsigned long nr)
 extern size_t mem_section_usage_size(void);
 
 /*
- * We use the lower bits of the mem_map pointer to store
- * a little bit of information.  The pointer is calculated
- * as mem_map - section_nr_to_pfn(pnum).  The result is
- * aligned to the minimum alignment of the two values:
- *   1. All mem_map arrays are page-aligned.
- *   2. section_nr_to_pfn() always clears PFN_SECTION_SHIFT
- *      lowest bits.  PFN_SECTION_SHIFT is arch-specific
- *      (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the
- *      worst combination is powerpc with 256k pages,
- *      which results in PFN_SECTION_SHIFT equal 6.
- * To sum it up, at least 6 bits are available on all architectures.
- * However, we can exceed 6 bits on some other architectures except
- * powerpc (e.g. 15 bits are available on x86_64, 13 bits are available
- * with the worst case of 64K pages on arm64) if we make sure the
- * exceeded bit is not applicable to powerpc.
+ * We use the lower bits of the mem_map pointer to store a little bit of
+ * information. The pointer is calculated as mem_map - section_nr_to_pfn().
+ * The result is aligned to the minimum alignment of the two values:
+ *
+ * 1. All mem_map arrays are page-aligned.
+ * 2. section_nr_to_pfn() always clears PFN_SECTION_SHIFT lowest bits.
+ *
+ * We always expect a single section to cover full pages. Therefore,
+ * we can safely assume that PFN_SECTION_SHIFT is large enough to
+ * accommodate SECTION_MAP_LAST_BIT. We use BUILD_BUG_ON() to ensure this.
  */
 enum {
 	SECTION_MARKED_PRESENT_BIT,
-- 
cgit v1.2.3


From cc1ff87bce1ccd38410ab10960f576dcd17db679 Mon Sep 17 00:00:00 2001
From: Qingfang Deng <qingfang.deng@linux.dev>
Date: Wed, 15 Apr 2026 10:24:51 +0800
Subject: pppoe: drop PFC frames

RFC 2516 Section 7 states that Protocol Field Compression (PFC) is NOT
RECOMMENDED for PPPoE. In practice, pppd does not support negotiating
PFC for PPPoE sessions, and the current PPPoE driver assumes an
uncompressed (2-byte) protocol field. However, the generic PPP layer
function ppp_input() is not aware of the negotiation result, and still
accepts PFC frames.

If a peer with a broken implementation or an attacker sends a frame with
a compressed (1-byte) protocol field, the subsequent PPP payload is
shifted by one byte. This causes the network header to be 4-byte
misaligned, which may trigger unaligned access exceptions on some
architectures.

To reduce the attack surface, drop PPPoE PFC frames. Introduce
ppp_skb_is_compressed_proto() helper function to be used in both
ppp_generic.c and pppoe.c to avoid open-coding.

Fixes: 7fb1b8ca8fa1 ("ppp: Move PFC decompression to PPP generic layer")
Signed-off-by: Qingfang Deng <qingfang.deng@linux.dev>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260415022456.141758-2-qingfang.deng@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ppp/ppp_generic.c |  2 +-
 drivers/net/ppp/pppoe.c       |  8 +++++++-
 include/linux/ppp_defs.h      | 16 ++++++++++++++++
 3 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index b0d3bc49c685..57c68efa5ff8 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -2245,7 +2245,7 @@ ppp_do_recv(struct ppp *ppp, struct sk_buff *skb, struct channel *pch)
  */
 static void __ppp_decompress_proto(struct sk_buff *skb)
 {
-	if (skb->data[0] & 0x01)
+	if (ppp_skb_is_compressed_proto(skb))
 		*(u8 *)skb_push(skb, 1) = 0x00;
 }
 
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index d546a7af0d54..bdd61c504a1c 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -393,7 +393,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (skb_mac_header_len(skb) < ETH_HLEN)
 		goto drop;
 
-	if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr)))
+	if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
 		goto drop;
 
 	ph = pppoe_hdr(skb);
@@ -403,6 +403,12 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (skb->len < len)
 		goto drop;
 
+	/* skb->data points to the PPP protocol header after skb_pull_rcsum.
+	 * Drop PFC frames.
+	 */
+	if (ppp_skb_is_compressed_proto(skb))
+		goto drop;
+
 	if (pskb_trim_rcsum(skb, len))
 		goto drop;
 
diff --git a/include/linux/ppp_defs.h b/include/linux/ppp_defs.h
index b7e57fdbd413..b1d1f46d7d3b 100644
--- a/include/linux/ppp_defs.h
+++ b/include/linux/ppp_defs.h
@@ -8,6 +8,7 @@
 #define _PPP_DEFS_H_
 
 #include <linux/crc-ccitt.h>
+#include <linux/skbuff.h>
 #include <uapi/linux/ppp_defs.h>
 
 #define PPP_FCS(fcs, c) crc_ccitt_byte(fcs, c)
@@ -25,4 +26,19 @@ static inline bool ppp_proto_is_valid(u16 proto)
 	return !!((proto & 0x0101) == 0x0001);
 }
 
+/**
+ * ppp_skb_is_compressed_proto - checks if PPP protocol in a skb is compressed
+ * @skb: skb to check
+ *
+ * Check if the PPP protocol field is compressed (the least significant
+ * bit of the most significant octet is 1). skb->data must point to the PPP
+ * protocol header.
+ *
+ * Return: Whether the PPP protocol field is compressed.
+ */
+static inline bool ppp_skb_is_compressed_proto(const struct sk_buff *skb)
+{
+	return unlikely(skb->data[0] & 0x01);
+}
+
 #endif /* _PPP_DEFS_H_ */
-- 
cgit v1.2.3


From db9e726525e45dbd713c07897a4d20bc18333ccc Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf.kernel@gmail.com>
Date: Thu, 16 Apr 2026 11:56:58 -0700
Subject: net: add address list snapshot and reconciliation infrastructure

Introduce __hw_addr_list_snapshot() and __hw_addr_list_reconcile()
for use by the upcoming ndo_set_rx_mode_async callback.

The async rx_mode path needs to snapshot the device's unicast and
multicast address lists under the addr_lock, hand those snapshots
to the driver (which may sleep), and then propagate any sync_cnt
changes back to the real lists. Two identical snapshots are taken:
a work copy for the driver to pass to __hw_addr_sync_dev() and a
reference copy to compute deltas against.

__hw_addr_list_reconcile() walks the reference snapshot comparing
each entry against the work snapshot to determine what the driver
synced or unsynced. It then applies those deltas to the real list,
handling concurrent modifications:

  - If the real entry was concurrently removed but the driver synced
    it to hardware (delta > 0), re-insert a stale entry so the next
    work run properly unsyncs it from hardware.
  - If the entry still exists, apply the delta normally. An entry
    whose refcount drops to zero is removed.

  # dev_addr_test_snapshot_benchmark: 1024 addrs x 1000 snapshots: 89872802 ns total, 89872 ns/iter
  # dev_addr_test_snapshot_benchmark.speed: slow

Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20260416185712.2155425-2-sdf@fomichev.me
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h      |   7 +
 net/core/dev.h                 |   1 +
 net/core/dev_addr_lists.c      | 109 ++++++++++++-
 net/core/dev_addr_lists_test.c | 363 ++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 477 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7969fcdd5ac4..a84c55488b8c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -5004,6 +5004,13 @@ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
 			  int (*unsync)(struct net_device *,
 					const unsigned char *));
 void __hw_addr_init(struct netdev_hw_addr_list *list);
+void __hw_addr_flush(struct netdev_hw_addr_list *list);
+int __hw_addr_list_snapshot(struct netdev_hw_addr_list *snap,
+			    const struct netdev_hw_addr_list *list,
+			    int addr_len);
+void __hw_addr_list_reconcile(struct netdev_hw_addr_list *real_list,
+			      struct netdev_hw_addr_list *work,
+			      struct netdev_hw_addr_list *ref, int addr_len);
 
 /* Functions used for device addresses handling */
 void dev_addr_mod(struct net_device *dev, unsigned int offset,
diff --git a/net/core/dev.h b/net/core/dev.h
index 628bdaebf0ca..585b6d7e88df 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -78,6 +78,7 @@ void linkwatch_run_queue(void);
 void dev_addr_flush(struct net_device *dev);
 int dev_addr_init(struct net_device *dev);
 void dev_addr_check(struct net_device *dev);
+void __hw_addr_flush(struct netdev_hw_addr_list *list);
 
 #if IS_ENABLED(CONFIG_NET_SHAPER)
 void net_shaper_flush_netdev(struct net_device *dev);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 76c91f224886..bb4851bc55ce 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -11,6 +11,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/export.h>
 #include <linux/list.h>
+#include <kunit/visibility.h>
 
 #include "dev.h"
 
@@ -481,7 +482,7 @@ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
 }
 EXPORT_SYMBOL(__hw_addr_unsync_dev);
 
-static void __hw_addr_flush(struct netdev_hw_addr_list *list)
+void __hw_addr_flush(struct netdev_hw_addr_list *list)
 {
 	struct netdev_hw_addr *ha, *tmp;
 
@@ -492,6 +493,7 @@ static void __hw_addr_flush(struct netdev_hw_addr_list *list)
 	}
 	list->count = 0;
 }
+EXPORT_SYMBOL_IF_KUNIT(__hw_addr_flush);
 
 void __hw_addr_init(struct netdev_hw_addr_list *list)
 {
@@ -501,6 +503,111 @@ void __hw_addr_init(struct netdev_hw_addr_list *list)
 }
 EXPORT_SYMBOL(__hw_addr_init);
 
+/**
+ *  __hw_addr_list_snapshot - create a snapshot copy of an address list
+ *  @snap: destination snapshot list (needs to be __hw_addr_init-initialized)
+ *  @list: source address list to snapshot
+ *  @addr_len: length of addresses
+ *
+ *  Creates a copy of @list with individually allocated entries suitable
+ *  for use with __hw_addr_sync_dev() and other list manipulation helpers.
+ *  Each entry is allocated with GFP_ATOMIC; must be called under a spinlock.
+ *
+ *  Return: 0 on success, -errno on failure.
+ */
+int __hw_addr_list_snapshot(struct netdev_hw_addr_list *snap,
+			    const struct netdev_hw_addr_list *list,
+			    int addr_len)
+{
+	struct netdev_hw_addr *ha, *entry;
+
+	list_for_each_entry(ha, &list->list, list) {
+		entry = __hw_addr_create(ha->addr, addr_len, ha->type,
+					 false, false);
+		if (!entry) {
+			__hw_addr_flush(snap);
+			return -ENOMEM;
+		}
+		entry->sync_cnt = ha->sync_cnt;
+		entry->refcount = ha->refcount;
+
+		list_add_tail(&entry->list, &snap->list);
+		__hw_addr_insert(snap, entry, addr_len);
+		snap->count++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_IF_KUNIT(__hw_addr_list_snapshot);
+
+/**
+ *  __hw_addr_list_reconcile - sync snapshot changes back and free snapshots
+ *  @real_list: the real address list to update
+ *  @work: the working snapshot (modified by driver via __hw_addr_sync_dev)
+ *  @ref: the reference snapshot (untouched copy of original state)
+ *  @addr_len: length of addresses
+ *
+ *  Walks the reference snapshot and compares each entry against the work
+ *  snapshot to compute sync_cnt deltas. Applies those deltas to @real_list.
+ *  Frees both snapshots when done.
+ *  Caller must hold netif_addr_lock_bh.
+ */
+void __hw_addr_list_reconcile(struct netdev_hw_addr_list *real_list,
+			      struct netdev_hw_addr_list *work,
+			      struct netdev_hw_addr_list *ref, int addr_len)
+{
+	struct netdev_hw_addr *ref_ha, *tmp, *work_ha, *real_ha;
+	int delta;
+
+	list_for_each_entry_safe(ref_ha, tmp, &ref->list, list) {
+		work_ha = __hw_addr_lookup(work, ref_ha->addr, addr_len,
+					   ref_ha->type);
+		if (work_ha)
+			delta = work_ha->sync_cnt - ref_ha->sync_cnt;
+		else
+			delta = -1;
+
+		if (delta == 0)
+			continue;
+
+		real_ha = __hw_addr_lookup(real_list, ref_ha->addr, addr_len,
+					   ref_ha->type);
+		if (!real_ha) {
+			/* The real entry was concurrently removed. If the
+			 * driver synced this addr to hardware (delta > 0),
+			 * re-insert it as a stale entry so the next work
+			 * run unsyncs it from hardware.
+			 */
+			if (delta > 0) {
+				rb_erase(&ref_ha->node, &ref->tree);
+				list_del(&ref_ha->list);
+				ref->count--;
+				ref_ha->sync_cnt = delta;
+				ref_ha->refcount = delta;
+				list_add_tail_rcu(&ref_ha->list,
+						  &real_list->list);
+				__hw_addr_insert(real_list, ref_ha,
+						 addr_len);
+				real_list->count++;
+			}
+			continue;
+		}
+
+		real_ha->sync_cnt += delta;
+		real_ha->refcount += delta;
+		if (!real_ha->refcount) {
+			rb_erase(&real_ha->node, &real_list->tree);
+			list_del_rcu(&real_ha->list);
+			kfree_rcu(real_ha, rcu_head);
+			real_list->count--;
+		}
+	}
+
+	__hw_addr_flush(work);
+	__hw_addr_flush(ref);
+}
+EXPORT_SYMBOL_IF_KUNIT(__hw_addr_list_reconcile);
+
 /*
  * Device addresses handling functions
  */
diff --git a/net/core/dev_addr_lists_test.c b/net/core/dev_addr_lists_test.c
index 8e1dba825e94..fba926d5ec0d 100644
--- a/net/core/dev_addr_lists_test.c
+++ b/net/core/dev_addr_lists_test.c
@@ -2,22 +2,31 @@
 
 #include <kunit/test.h>
 #include <linux/etherdevice.h>
+#include <linux/math64.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 
 static const struct net_device_ops dummy_netdev_ops = {
 };
 
+#define ADDR_A	1
+#define ADDR_B	2
+#define ADDR_C	3
+
 struct dev_addr_test_priv {
 	u32 addr_seen;
+	u32 addr_synced;
+	u32 addr_unsynced;
 };
 
 static int dev_addr_test_sync(struct net_device *netdev, const unsigned char *a)
 {
 	struct dev_addr_test_priv *datp = netdev_priv(netdev);
 
-	if (a[0] < 31 && !memchr_inv(a, a[0], ETH_ALEN))
+	if (a[0] < 31 && !memchr_inv(a, a[0], ETH_ALEN)) {
 		datp->addr_seen |= 1 << a[0];
+		datp->addr_synced |= 1 << a[0];
+	}
 	return 0;
 }
 
@@ -26,11 +35,22 @@ static int dev_addr_test_unsync(struct net_device *netdev,
 {
 	struct dev_addr_test_priv *datp = netdev_priv(netdev);
 
-	if (a[0] < 31 && !memchr_inv(a, a[0], ETH_ALEN))
+	if (a[0] < 31 && !memchr_inv(a, a[0], ETH_ALEN)) {
 		datp->addr_seen &= ~(1 << a[0]);
+		datp->addr_unsynced |= 1 << a[0];
+	}
 	return 0;
 }
 
+static void dev_addr_test_reset(struct net_device *netdev)
+{
+	struct dev_addr_test_priv *datp = netdev_priv(netdev);
+
+	datp->addr_seen = 0;
+	datp->addr_synced = 0;
+	datp->addr_unsynced = 0;
+}
+
 static int dev_addr_test_init(struct kunit *test)
 {
 	struct dev_addr_test_priv *datp;
@@ -225,6 +245,339 @@ static void dev_addr_test_add_excl(struct kunit *test)
 	rtnl_unlock();
 }
 
+/* Snapshot test: basic sync with no concurrent modifications.
+ * Add one address, snapshot, driver syncs it, reconcile propagates
+ * sync_cnt delta back to real list.
+ */
+static void dev_addr_test_snapshot_sync(struct kunit *test)
+{
+	struct net_device *netdev = test->priv;
+	struct netdev_hw_addr_list snap, ref;
+	struct dev_addr_test_priv *datp;
+	struct netdev_hw_addr *ha;
+	u8 addr[ETH_ALEN];
+
+	datp = netdev_priv(netdev);
+
+	rtnl_lock();
+
+	memset(addr, ADDR_A, sizeof(addr));
+	KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr));
+
+	/* Snapshot: ADDR_A has sync_cnt=0, refcount=1 (new) */
+	netif_addr_lock_bh(netdev);
+	__hw_addr_init(&snap);
+	__hw_addr_init(&ref);
+	KUNIT_EXPECT_EQ(test, 0,
+			__hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN));
+	KUNIT_EXPECT_EQ(test, 0,
+			__hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN));
+	netif_addr_unlock_bh(netdev);
+
+	/* Driver syncs ADDR_A to hardware */
+	dev_addr_test_reset(netdev);
+	__hw_addr_sync_dev(&snap, netdev, dev_addr_test_sync,
+			   dev_addr_test_unsync);
+	KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_synced);
+	KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced);
+
+	/* Reconcile: delta=+1 applied to real entry */
+	netif_addr_lock_bh(netdev);
+	__hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN);
+	netif_addr_unlock_bh(netdev);
+
+	/* Real entry should now reflect the sync: sync_cnt=1, refcount=2 */
+	KUNIT_EXPECT_EQ(test, 1, netdev->uc.count);
+	ha = list_first_entry(&netdev->uc.list, struct netdev_hw_addr, list);
+	KUNIT_EXPECT_MEMEQ(test, ha->addr, addr, ETH_ALEN);
+	KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt);
+	KUNIT_EXPECT_EQ(test, 2, ha->refcount);
+
+	/* Second work run: already synced, nothing to do */
+	dev_addr_test_reset(netdev);
+	__hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync,
+			   dev_addr_test_unsync);
+	KUNIT_EXPECT_EQ(test, 0, datp->addr_synced);
+	KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced);
+	KUNIT_EXPECT_EQ(test, 1, netdev->uc.count);
+
+	rtnl_unlock();
+}
+
+/* Snapshot test: ADDR_A synced to hardware, then concurrently removed
+ * from the real list before reconcile runs. Reconcile re-inserts ADDR_A as
+ * a stale entry so the next work run unsyncs it from hardware.
+ */
+static void dev_addr_test_snapshot_remove_during_sync(struct kunit *test)
+{
+	struct net_device *netdev = test->priv;
+	struct netdev_hw_addr_list snap, ref;
+	struct dev_addr_test_priv *datp;
+	struct netdev_hw_addr *ha;
+	u8 addr[ETH_ALEN];
+
+	datp = netdev_priv(netdev);
+
+	rtnl_lock();
+
+	memset(addr, ADDR_A, sizeof(addr));
+	KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr));
+
+	/* Snapshot: ADDR_A is new (sync_cnt=0, refcount=1) */
+	netif_addr_lock_bh(netdev);
+	__hw_addr_init(&snap);
+	__hw_addr_init(&ref);
+	KUNIT_EXPECT_EQ(test, 0,
+			__hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN));
+	KUNIT_EXPECT_EQ(test, 0,
+			__hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN));
+	netif_addr_unlock_bh(netdev);
+
+	/* Driver syncs ADDR_A to hardware */
+	dev_addr_test_reset(netdev);
+	__hw_addr_sync_dev(&snap, netdev, dev_addr_test_sync,
+			   dev_addr_test_unsync);
+	KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_synced);
+	KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced);
+
+	/* Concurrent removal: user deletes ADDR_A while driver was working */
+	memset(addr, ADDR_A, sizeof(addr));
+	KUNIT_EXPECT_EQ(test, 0, dev_uc_del(netdev, addr));
+	KUNIT_EXPECT_EQ(test, 0, netdev->uc.count);
+
+	/* Reconcile: ADDR_A gone from real list but driver synced it,
+	 * so it gets re-inserted as stale (sync_cnt=1, refcount=1).
+	 */
+	netif_addr_lock_bh(netdev);
+	__hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN);
+	netif_addr_unlock_bh(netdev);
+
+	KUNIT_EXPECT_EQ(test, 1, netdev->uc.count);
+	ha = list_first_entry(&netdev->uc.list, struct netdev_hw_addr, list);
+	KUNIT_EXPECT_MEMEQ(test, ha->addr, addr, ETH_ALEN);
+	KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt);
+	KUNIT_EXPECT_EQ(test, 1, ha->refcount);
+
+	/* Second work run: stale entry gets unsynced from HW and removed */
+	dev_addr_test_reset(netdev);
+	__hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync,
+			   dev_addr_test_unsync);
+	KUNIT_EXPECT_EQ(test, 0, datp->addr_synced);
+	KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_unsynced);
+	KUNIT_EXPECT_EQ(test, 0, netdev->uc.count);
+
+	rtnl_unlock();
+}
+
+/* Snapshot test: ADDR_A was stale (unsynced from hardware by driver),
+ * but concurrently re-added by the user. The re-add bumps refcount of
+ * the existing stale entry. Reconcile applies delta=-1, leaving ADDR_A
+ * as a fresh entry (sync_cnt=0, refcount=1) for the next work run.
+ */
+static void dev_addr_test_snapshot_readd_during_unsync(struct kunit *test)
+{
+	struct net_device *netdev = test->priv;
+	struct netdev_hw_addr_list snap, ref;
+	struct dev_addr_test_priv *datp;
+	struct netdev_hw_addr *ha;
+	u8 addr[ETH_ALEN];
+
+	datp = netdev_priv(netdev);
+
+	rtnl_lock();
+
+	memset(addr, ADDR_A, sizeof(addr));
+	KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr));
+
+	/* Sync ADDR_A to hardware: sync_cnt=1, refcount=2 */
+	dev_addr_test_reset(netdev);
+	__hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync,
+			   dev_addr_test_unsync);
+	KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_synced);
+	KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced);
+
+	/* User removes ADDR_A: refcount=1, sync_cnt=1 -> stale */
+	KUNIT_EXPECT_EQ(test, 0, dev_uc_del(netdev, addr));
+
+	/* Snapshot: ADDR_A is stale (sync_cnt=1, refcount=1) */
+	netif_addr_lock_bh(netdev);
+	__hw_addr_init(&snap);
+	__hw_addr_init(&ref);
+	KUNIT_EXPECT_EQ(test, 0,
+			__hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN));
+	KUNIT_EXPECT_EQ(test, 0,
+			__hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN));
+	netif_addr_unlock_bh(netdev);
+
+	/* Driver unsyncs stale ADDR_A from hardware */
+	dev_addr_test_reset(netdev);
+	__hw_addr_sync_dev(&snap, netdev, dev_addr_test_sync,
+			   dev_addr_test_unsync);
+	KUNIT_EXPECT_EQ(test, 0, datp->addr_synced);
+	KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_unsynced);
+
+	/* Concurrent: user re-adds ADDR_A.  dev_uc_add finds the existing
+	 * stale entry and bumps refcount from 1 -> 2.  sync_cnt stays 1.
+	 */
+	KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr));
+	KUNIT_EXPECT_EQ(test, 1, netdev->uc.count);
+
+	/* Reconcile: ref sync_cnt=1 matches real sync_cnt=1, delta=-1
+	 * applied. Result: sync_cnt=0, refcount=1 (fresh).
+	 */
+	netif_addr_lock_bh(netdev);
+	__hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN);
+	netif_addr_unlock_bh(netdev);
+
+	/* Entry survives as fresh: needs re-sync to HW */
+	KUNIT_EXPECT_EQ(test, 1, netdev->uc.count);
+	ha = list_first_entry(&netdev->uc.list, struct netdev_hw_addr, list);
+	KUNIT_EXPECT_MEMEQ(test, ha->addr, addr, ETH_ALEN);
+	KUNIT_EXPECT_EQ(test, 0, ha->sync_cnt);
+	KUNIT_EXPECT_EQ(test, 1, ha->refcount);
+
+	/* Second work run: fresh entry gets synced to HW */
+	dev_addr_test_reset(netdev);
+	__hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync,
+			   dev_addr_test_unsync);
+	KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_synced);
+	KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced);
+
+	rtnl_unlock();
+}
+
+/* Snapshot test: ADDR_A is new (synced by driver), and independent ADDR_B
+ * is concurrently removed from the real list. A's sync delta propagates
+ * normally; B's absence doesn't interfere.
+ */
+static void dev_addr_test_snapshot_add_and_remove(struct kunit *test)
+{
+	struct net_device *netdev = test->priv;
+	struct netdev_hw_addr_list snap, ref;
+	struct dev_addr_test_priv *datp;
+	struct netdev_hw_addr *ha;
+	u8 addr[ETH_ALEN];
+
+	datp = netdev_priv(netdev);
+
+	rtnl_lock();
+
+	/* Add ADDR_A and ADDR_B (will be synced then removed) */
+	memset(addr, ADDR_A, sizeof(addr));
+	KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr));
+	memset(addr, ADDR_B, sizeof(addr));
+	KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr));
+
+	/* Sync both to hardware: sync_cnt=1, refcount=2 */
+	__hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync,
+			   dev_addr_test_unsync);
+
+	/* Add ADDR_C (new, will be synced by snapshot) */
+	memset(addr, ADDR_C, sizeof(addr));
+	KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr));
+
+	/* Snapshot: A,B synced (sync_cnt=1,refcount=2); C new (0,1) */
+	netif_addr_lock_bh(netdev);
+	__hw_addr_init(&snap);
+	__hw_addr_init(&ref);
+	KUNIT_EXPECT_EQ(test, 0,
+			__hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN));
+	KUNIT_EXPECT_EQ(test, 0,
+			__hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN));
+	netif_addr_unlock_bh(netdev);
+
+	/* Driver syncs snapshot: ADDR_C is new -> synced; A,B already synced */
+	dev_addr_test_reset(netdev);
+	__hw_addr_sync_dev(&snap, netdev, dev_addr_test_sync,
+			   dev_addr_test_unsync);
+	KUNIT_EXPECT_EQ(test, 1 << ADDR_C, datp->addr_synced);
+	KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced);
+
+	/* Concurrent: user removes addr B while driver was working */
+	memset(addr, ADDR_B, sizeof(addr));
+	KUNIT_EXPECT_EQ(test, 0, dev_uc_del(netdev, addr));
+
+	/* Reconcile: ADDR_C's delta=+1 applied to real list.
+	 * ADDR_B's delta=0 (unchanged in snapshot),
+	 * so nothing to apply to ADDR_B.
+	 */
+	netif_addr_lock_bh(netdev);
+	__hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN);
+	netif_addr_unlock_bh(netdev);
+
+	/* ADDR_A: unchanged (sync_cnt=1, refcount=2)
+	 * ADDR_B: refcount went from 2->1 via dev_uc_del (still present, stale)
+	 * ADDR_C: sync propagated (sync_cnt=1, refcount=2)
+	 */
+	KUNIT_EXPECT_EQ(test, 3, netdev->uc.count);
+	netdev_hw_addr_list_for_each(ha, &netdev->uc) {
+		u8 id = ha->addr[0];
+
+		if (!memchr_inv(ha->addr, id, ETH_ALEN)) {
+			if (id == ADDR_A) {
+				KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt);
+				KUNIT_EXPECT_EQ(test, 2, ha->refcount);
+			} else if (id == ADDR_B) {
+				/* B: still present but now stale */
+				KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt);
+				KUNIT_EXPECT_EQ(test, 1, ha->refcount);
+			} else if (id == ADDR_C) {
+				KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt);
+				KUNIT_EXPECT_EQ(test, 2, ha->refcount);
+			}
+		}
+	}
+
+	/* Second work run: ADDR_B is stale, gets unsynced and removed */
+	dev_addr_test_reset(netdev);
+	__hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync,
+			   dev_addr_test_unsync);
+	KUNIT_EXPECT_EQ(test, 0, datp->addr_synced);
+	KUNIT_EXPECT_EQ(test, 1 << ADDR_B, datp->addr_unsynced);
+	KUNIT_EXPECT_EQ(test, 2, netdev->uc.count);
+
+	rtnl_unlock();
+}
+
+static void dev_addr_test_snapshot_benchmark(struct kunit *test)
+{
+	struct net_device *netdev = test->priv;
+	struct netdev_hw_addr_list snap;
+	u8 addr[ETH_ALEN];
+	s64 duration = 0;
+	ktime_t start;
+	int i, iter;
+
+	rtnl_lock();
+
+	for (i = 0; i < 1024; i++) {
+		memset(addr, 0, sizeof(addr));
+		addr[0] = (i >> 8) & 0xff;
+		addr[1] = i & 0xff;
+		KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr));
+	}
+
+	for (iter = 0; iter < 1000; iter++) {
+		netif_addr_lock_bh(netdev);
+		__hw_addr_init(&snap);
+
+		start = ktime_get();
+		KUNIT_EXPECT_EQ(test, 0,
+				__hw_addr_list_snapshot(&snap, &netdev->uc,
+							ETH_ALEN));
+		duration += ktime_to_ns(ktime_sub(ktime_get(), start));
+
+		netif_addr_unlock_bh(netdev);
+		__hw_addr_flush(&snap);
+	}
+
+	kunit_info(test,
+		   "1024 addrs x 1000 snapshots: %lld ns total, %lld ns/iter",
+		   duration, div_s64(duration, 1000));
+
+	rtnl_unlock();
+}
+
 static struct kunit_case dev_addr_test_cases[] = {
 	KUNIT_CASE(dev_addr_test_basic),
 	KUNIT_CASE(dev_addr_test_sync_one),
@@ -232,6 +585,11 @@ static struct kunit_case dev_addr_test_cases[] = {
 	KUNIT_CASE(dev_addr_test_del_main),
 	KUNIT_CASE(dev_addr_test_add_set),
 	KUNIT_CASE(dev_addr_test_add_excl),
+	KUNIT_CASE(dev_addr_test_snapshot_sync),
+	KUNIT_CASE(dev_addr_test_snapshot_remove_during_sync),
+	KUNIT_CASE(dev_addr_test_snapshot_readd_during_unsync),
+	KUNIT_CASE(dev_addr_test_snapshot_add_and_remove),
+	KUNIT_CASE_SLOW(dev_addr_test_snapshot_benchmark),
 	{}
 };
 
@@ -243,5 +601,6 @@ static struct kunit_suite dev_addr_test_suite = {
 };
 kunit_test_suite(dev_addr_test_suite);
 
+MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
 MODULE_DESCRIPTION("KUnit tests for struct netdev_hw_addr_list");
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 3554b4345d855089ab7af5e3557f5dc3262d14c9 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf.kernel@gmail.com>
Date: Thu, 16 Apr 2026 11:56:59 -0700
Subject: net: introduce ndo_set_rx_mode_async and netdev_rx_mode_work

Add ndo_set_rx_mode_async callback that drivers can implement instead
of the legacy ndo_set_rx_mode. The legacy callback runs under the
netif_addr_lock spinlock with BHs disabled, preventing drivers from
sleeping. The async variant runs from a work queue with rtnl_lock and
netdev_lock_ops held, in fully sleepable context.

When __dev_set_rx_mode() sees ndo_set_rx_mode_async, it schedules
netdev_rx_mode_work instead of calling the driver inline. The work
function takes two snapshots of each address list (uc/mc) under
the addr_lock, then drops the lock and calls the driver with the
work copies. After the driver returns, it reconciles the snapshots
back to the real lists under the lock.

Add netif_rx_mode_sync() to opportunistically execute the pending
workqueue update inline, so that rx mode changes are committed
before returning to userspace:
  - dev_change_flags (SIOCSIFFLAGS / RTM_NEWLINK)
  - dev_set_promiscuity
  - dev_set_allmulti
  - dev_ifsioc SIOCADDMULTI / SIOCDELMULTI
  - do_setlink (RTM_SETLINK)

Note that some deep hierarchies still do skip the lower updates via:
  - dev_uc_sync
  - dev_mc_sync

If we do end up hitting user-visible issues, we can add more calls to
netif_rx_mode_sync in specific places. But hopefully we should not,
the actual user-visible lists are still synced, it's that just HW state
that might be lagging.

Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20260416185712.2155425-3-sdf@fomichev.me
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/netdevices.rst |   9 ++
 include/linux/netdevice.h               |  18 +++
 net/core/dev.c                          |  43 +------
 net/core/dev.h                          |   3 +
 net/core/dev_addr_lists.c               | 209 ++++++++++++++++++++++++++++++++
 net/core/dev_api.c                      |   3 +
 net/core/dev_ioctl.c                    |   6 +-
 net/core/rtnetlink.c                    |   1 +
 8 files changed, 249 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst
index 83e28b96884f..e89b12d4f3a7 100644
--- a/Documentation/networking/netdevices.rst
+++ b/Documentation/networking/netdevices.rst
@@ -289,6 +289,15 @@ ndo_tx_timeout:
 ndo_set_rx_mode:
 	Synchronization: netif_addr_lock spinlock.
 	Context: BHs disabled
+	Notes: Deprecated in favor of ndo_set_rx_mode_async which runs
+	in process context.
+
+ndo_set_rx_mode_async:
+	Synchronization: rtnl_lock() semaphore. In addition, netdev instance
+	lock if the driver implements queue management or shaper API.
+	Context: process (from a work queue)
+	Notes: Async version of ndo_set_rx_mode which runs in process
+	context. Receives snapshots of the unicast and multicast address lists.
 
 ndo_setup_tc:
 	``TC_SETUP_BLOCK`` and ``TC_SETUP_FT`` are running under NFT locks
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a84c55488b8c..6ed97f4c3bc6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1119,6 +1119,16 @@ struct netdev_net_notifier {
  *	This function is called device changes address list filtering.
  *	If driver handles unicast address filtering, it should set
  *	IFF_UNICAST_FLT in its priv_flags.
+ *	Cannot sleep, called with netif_addr_lock_bh held.
+ *	Deprecated in favor of ndo_set_rx_mode_async.
+ *
+ * void (*ndo_set_rx_mode_async)(struct net_device *dev,
+ *				 struct netdev_hw_addr_list *uc,
+ *				 struct netdev_hw_addr_list *mc);
+ *	Async version of ndo_set_rx_mode which runs in process context
+ *	with rtnl_lock and netdev_lock_ops(dev) held. The uc/mc parameters
+ *	are snapshots of the address lists - iterate with
+ *	netdev_hw_addr_list_for_each(ha, uc).
  *
  * int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
  *	This function  is called when the Media Access Control address
@@ -1439,6 +1449,10 @@ struct net_device_ops {
 	void			(*ndo_change_rx_flags)(struct net_device *dev,
 						       int flags);
 	void			(*ndo_set_rx_mode)(struct net_device *dev);
+	void			(*ndo_set_rx_mode_async)(
+					struct net_device *dev,
+					struct netdev_hw_addr_list *uc,
+					struct netdev_hw_addr_list *mc);
 	int			(*ndo_set_mac_address)(struct net_device *dev,
 						       void *addr);
 	int			(*ndo_validate_addr)(struct net_device *dev);
@@ -1903,6 +1917,8 @@ enum netdev_reg_state {
  *				has been enabled due to the need to listen to
  *				additional unicast addresses in a device that
  *				does not implement ndo_set_rx_mode()
+ *	@rx_mode_node:		List entry for rx_mode work processing
+ *	@rx_mode_tracker:	Refcount tracker for rx_mode work
  *	@uc:			unicast mac addresses
  *	@mc:			multicast mac addresses
  *	@dev_addrs:		list of device hw addresses
@@ -2294,6 +2310,8 @@ struct net_device {
 	unsigned int		promiscuity;
 	unsigned int		allmulti;
 	bool			uc_promisc;
+	struct list_head	rx_mode_node;
+	netdevice_tracker	rx_mode_tracker;
 #ifdef CONFIG_LOCKDEP
 	unsigned char		nested_level;
 #endif
diff --git a/net/core/dev.c b/net/core/dev.c
index e59f6025067c..b37061238a25 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9593,7 +9593,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
 		ops->ndo_change_rx_flags(dev, flags);
 }
 
-static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
+int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
 {
 	unsigned int old_flags = dev->flags;
 	unsigned int promiscuity, flags;
@@ -9697,46 +9697,6 @@ int netif_set_allmulti(struct net_device *dev, int inc, bool notify)
 	return 0;
 }
 
-/*
- *	Upload unicast and multicast address lists to device and
- *	configure RX filtering. When the device doesn't support unicast
- *	filtering it is put in promiscuous mode while unicast addresses
- *	are present.
- */
-void __dev_set_rx_mode(struct net_device *dev)
-{
-	const struct net_device_ops *ops = dev->netdev_ops;
-
-	/* dev_open will call this function so the list will stay sane. */
-	if (!(dev->flags&IFF_UP))
-		return;
-
-	if (!netif_device_present(dev))
-		return;
-
-	if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
-		/* Unicast addresses changes may only happen under the rtnl,
-		 * therefore calling __dev_set_promiscuity here is safe.
-		 */
-		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
-			__dev_set_promiscuity(dev, 1, false);
-			dev->uc_promisc = true;
-		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
-			__dev_set_promiscuity(dev, -1, false);
-			dev->uc_promisc = false;
-		}
-	}
-
-	if (ops->ndo_set_rx_mode)
-		ops->ndo_set_rx_mode(dev);
-}
-
-void dev_set_rx_mode(struct net_device *dev)
-{
-	netif_addr_lock_bh(dev);
-	__dev_set_rx_mode(dev);
-	netif_addr_unlock_bh(dev);
-}
 
 /**
  * netif_get_flags() - get flags reported to userspace
@@ -12127,6 +12087,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 #endif
 
 	mutex_init(&dev->lock);
+	INIT_LIST_HEAD(&dev->rx_mode_node);
 
 	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
 	setup(dev);
diff --git a/net/core/dev.h b/net/core/dev.h
index 585b6d7e88df..0cf24b8f5008 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -165,6 +165,9 @@ int netif_change_carrier(struct net_device *dev, bool new_carrier);
 int dev_change_carrier(struct net_device *dev, bool new_carrier);
 
 void __dev_set_rx_mode(struct net_device *dev);
+int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify);
+bool netif_rx_mode_clean(struct net_device *dev);
+void netif_rx_mode_sync(struct net_device *dev);
 
 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
 			unsigned int gchanges, u32 portid,
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index bb4851bc55ce..056bca6fce12 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -11,10 +11,18 @@
 #include <linux/rtnetlink.h>
 #include <linux/export.h>
 #include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
 #include <kunit/visibility.h>
 
 #include "dev.h"
 
+static void netdev_rx_mode_work(struct work_struct *work);
+
+static LIST_HEAD(rx_mode_list);
+static DEFINE_SPINLOCK(rx_mode_lock);
+static DECLARE_WORK(rx_mode_work, netdev_rx_mode_work);
+
 /*
  * General list handling functions
  */
@@ -1156,3 +1164,204 @@ void dev_mc_init(struct net_device *dev)
 	__hw_addr_init(&dev->mc);
 }
 EXPORT_SYMBOL(dev_mc_init);
+
+static int netif_addr_lists_snapshot(struct net_device *dev,
+				     struct netdev_hw_addr_list *uc_snap,
+				     struct netdev_hw_addr_list *mc_snap,
+				     struct netdev_hw_addr_list *uc_ref,
+				     struct netdev_hw_addr_list *mc_ref)
+{
+	int err;
+
+	err = __hw_addr_list_snapshot(uc_snap, &dev->uc, dev->addr_len);
+	if (!err)
+		err = __hw_addr_list_snapshot(uc_ref, &dev->uc, dev->addr_len);
+	if (!err)
+		err = __hw_addr_list_snapshot(mc_snap, &dev->mc,
+					      dev->addr_len);
+	if (!err)
+		err = __hw_addr_list_snapshot(mc_ref, &dev->mc, dev->addr_len);
+
+	if (err) {
+		__hw_addr_flush(uc_snap);
+		__hw_addr_flush(uc_ref);
+		__hw_addr_flush(mc_snap);
+	}
+
+	return err;
+}
+
+static void netif_addr_lists_reconcile(struct net_device *dev,
+				       struct netdev_hw_addr_list *uc_snap,
+				       struct netdev_hw_addr_list *mc_snap,
+				       struct netdev_hw_addr_list *uc_ref,
+				       struct netdev_hw_addr_list *mc_ref)
+{
+	__hw_addr_list_reconcile(&dev->uc, uc_snap, uc_ref, dev->addr_len);
+	__hw_addr_list_reconcile(&dev->mc, mc_snap, mc_ref, dev->addr_len);
+}
+
+static void netif_rx_mode_run(struct net_device *dev)
+{
+	struct netdev_hw_addr_list uc_snap, mc_snap, uc_ref, mc_ref;
+	const struct net_device_ops *ops = dev->netdev_ops;
+	int err;
+
+	might_sleep();
+	netdev_ops_assert_locked(dev);
+
+	__hw_addr_init(&uc_snap);
+	__hw_addr_init(&mc_snap);
+	__hw_addr_init(&uc_ref);
+	__hw_addr_init(&mc_ref);
+
+	if (!(dev->flags & IFF_UP) || !netif_device_present(dev))
+		return;
+
+	netif_addr_lock_bh(dev);
+	err = netif_addr_lists_snapshot(dev, &uc_snap, &mc_snap,
+					&uc_ref, &mc_ref);
+	if (err) {
+		netdev_WARN(dev, "failed to sync uc/mc addresses\n");
+		netif_addr_unlock_bh(dev);
+		return;
+	}
+	netif_addr_unlock_bh(dev);
+
+	ops->ndo_set_rx_mode_async(dev, &uc_snap, &mc_snap);
+
+	netif_addr_lock_bh(dev);
+	netif_addr_lists_reconcile(dev, &uc_snap, &mc_snap,
+				   &uc_ref, &mc_ref);
+	netif_addr_unlock_bh(dev);
+}
+
+static void netdev_rx_mode_work(struct work_struct *work)
+{
+	struct net_device *dev;
+
+	rtnl_lock();
+
+	while (true) {
+		spin_lock_bh(&rx_mode_lock);
+		if (list_empty(&rx_mode_list)) {
+			spin_unlock_bh(&rx_mode_lock);
+			break;
+		}
+		dev = list_first_entry(&rx_mode_list, struct net_device,
+				       rx_mode_node);
+		list_del_init(&dev->rx_mode_node);
+		/* We must free netdev tracker under
+		 * the spinlock protection.
+		 */
+		netdev_tracker_free(dev, &dev->rx_mode_tracker);
+		spin_unlock_bh(&rx_mode_lock);
+
+		netdev_lock_ops(dev);
+		netif_rx_mode_run(dev);
+		netdev_unlock_ops(dev);
+		/* Use __dev_put() because netdev_tracker_free() was already
+		 * called above. Must be after netdev_unlock_ops() to prevent
+		 * netdev_run_todo() from freeing the device while still in use.
+		 */
+		__dev_put(dev);
+	}
+
+	rtnl_unlock();
+}
+
+static void netif_rx_mode_queue(struct net_device *dev)
+{
+	spin_lock_bh(&rx_mode_lock);
+	if (list_empty(&dev->rx_mode_node)) {
+		list_add_tail(&dev->rx_mode_node, &rx_mode_list);
+		netdev_hold(dev, &dev->rx_mode_tracker, GFP_ATOMIC);
+	}
+	spin_unlock_bh(&rx_mode_lock);
+	schedule_work(&rx_mode_work);
+}
+
+/**
+ * __dev_set_rx_mode() - upload unicast and multicast address lists to device
+ * and configure RX filtering.
+ * @dev: device
+ *
+ * When the device doesn't support unicast filtering it is put in promiscuous
+ * mode while unicast addresses are present.
+ */
+void __dev_set_rx_mode(struct net_device *dev)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	/* dev_open will call this function so the list will stay sane. */
+	if (!(dev->flags & IFF_UP))
+		return;
+
+	if (!netif_device_present(dev))
+		return;
+
+	if (ops->ndo_set_rx_mode_async) {
+		netif_rx_mode_queue(dev);
+		return;
+	}
+
+	if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
+		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
+			__dev_set_promiscuity(dev, 1, false);
+			dev->uc_promisc = true;
+		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
+			__dev_set_promiscuity(dev, -1, false);
+			dev->uc_promisc = false;
+		}
+	}
+
+	if (ops->ndo_set_rx_mode)
+		ops->ndo_set_rx_mode(dev);
+}
+
+void dev_set_rx_mode(struct net_device *dev)
+{
+	netif_addr_lock_bh(dev);
+	__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+}
+
+bool netif_rx_mode_clean(struct net_device *dev)
+{
+	bool clean = false;
+
+	spin_lock_bh(&rx_mode_lock);
+	if (!list_empty(&dev->rx_mode_node)) {
+		list_del_init(&dev->rx_mode_node);
+		clean = true;
+		/* We must release netdev tracker under
+		 * the spinlock protection.
+		 */
+		netdev_tracker_free(dev, &dev->rx_mode_tracker);
+	}
+	spin_unlock_bh(&rx_mode_lock);
+
+	return clean;
+}
+
+/**
+ * netif_rx_mode_sync() - sync rx mode inline
+ * @dev: network device
+ *
+ * Drivers implementing ndo_set_rx_mode_async() have their rx mode callback
+ * executed from a workqueue. This allows the callback to sleep, but means
+ * the hardware update is deferred and may not be visible to userspace
+ * by the time the initiating syscall returns. netif_rx_mode_sync() steals
+ * workqueue update and executes it inline. This preserves the atomicity of
+ * operations to the userspace.
+ */
+void netif_rx_mode_sync(struct net_device *dev)
+{
+	if (netif_rx_mode_clean(dev)) {
+		netif_rx_mode_run(dev);
+		/* Use __dev_put() because netdev_tracker_free() was already
+		 * called inside netif_rx_mode_clean().
+		 */
+		__dev_put(dev);
+	}
+}
diff --git a/net/core/dev_api.c b/net/core/dev_api.c
index f28852078aa6..437947dd08ed 100644
--- a/net/core/dev_api.c
+++ b/net/core/dev_api.c
@@ -66,6 +66,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags,
 
 	netdev_lock_ops(dev);
 	ret = netif_change_flags(dev, flags, extack);
+	netif_rx_mode_sync(dev);
 	netdev_unlock_ops(dev);
 
 	return ret;
@@ -285,6 +286,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc)
 
 	netdev_lock_ops(dev);
 	ret = netif_set_promiscuity(dev, inc);
+	netif_rx_mode_sync(dev);
 	netdev_unlock_ops(dev);
 
 	return ret;
@@ -311,6 +313,7 @@ int dev_set_allmulti(struct net_device *dev, int inc)
 
 	netdev_lock_ops(dev);
 	ret = netif_set_allmulti(dev, inc, true);
+	netif_rx_mode_sync(dev);
 	netdev_unlock_ops(dev);
 
 	return ret;
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 7a8966544c9d..f3979b276090 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -586,24 +586,26 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data,
 		return err;
 
 	case SIOCADDMULTI:
-		if (!ops->ndo_set_rx_mode ||
+		if ((!ops->ndo_set_rx_mode && !ops->ndo_set_rx_mode_async) ||
 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
 			return -EINVAL;
 		if (!netif_device_present(dev))
 			return -ENODEV;
 		netdev_lock_ops(dev);
 		err = dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
+		netif_rx_mode_sync(dev);
 		netdev_unlock_ops(dev);
 		return err;
 
 	case SIOCDELMULTI:
-		if (!ops->ndo_set_rx_mode ||
+		if ((!ops->ndo_set_rx_mode && !ops->ndo_set_rx_mode_async) ||
 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
 			return -EINVAL;
 		if (!netif_device_present(dev))
 			return -ENODEV;
 		netdev_lock_ops(dev);
 		err = dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
+		netif_rx_mode_sync(dev);
 		netdev_unlock_ops(dev);
 		return err;
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 69daba3ddaf0..b613bb6e07df 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3431,6 +3431,7 @@ errout:
 					     dev->name);
 	}
 
+	netif_rx_mode_sync(dev);
 	netdev_unlock_ops(dev);
 
 	return err;
-- 
cgit v1.2.3


From a4c833278144917982510ca43a3438155756122a Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf.kernel@gmail.com>
Date: Thu, 16 Apr 2026 11:57:00 -0700
Subject: net: cache snapshot entries for ndo_set_rx_mode_async

Add a per-device netdev_hw_addr_list cache (rx_mode_addr_cache) that
allows __hw_addr_list_snapshot() and __hw_addr_list_reconcile() to
reuse previously allocated entries instead of hitting GFP_ATOMIC on
every snapshot cycle.

snapshot pops entries from the cache when available, falling back to
__hw_addr_create(). reconcile splices both snapshot lists back into
the cache via __hw_addr_splice(). The cache is flushed in
free_netdev().

Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20260416185712.2155425-4-sdf@fomichev.me
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h      |  7 +++--
 net/core/dev.c                 |  3 ++
 net/core/dev_addr_lists.c      | 66 ++++++++++++++++++++++++++++++------------
 net/core/dev_addr_lists_test.c | 60 ++++++++++++++++++++++++++------------
 4 files changed, 97 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6ed97f4c3bc6..97b435da5771 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1919,6 +1919,7 @@ enum netdev_reg_state {
  *				does not implement ndo_set_rx_mode()
  *	@rx_mode_node:		List entry for rx_mode work processing
  *	@rx_mode_tracker:	Refcount tracker for rx_mode work
+ *	@rx_mode_addr_cache:	Recycled snapshot entries for rx_mode work
  *	@uc:			unicast mac addresses
  *	@mc:			multicast mac addresses
  *	@dev_addrs:		list of device hw addresses
@@ -2312,6 +2313,7 @@ struct net_device {
 	bool			uc_promisc;
 	struct list_head	rx_mode_node;
 	netdevice_tracker	rx_mode_tracker;
+	struct netdev_hw_addr_list	rx_mode_addr_cache;
 #ifdef CONFIG_LOCKDEP
 	unsigned char		nested_level;
 #endif
@@ -5025,10 +5027,11 @@ void __hw_addr_init(struct netdev_hw_addr_list *list);
 void __hw_addr_flush(struct netdev_hw_addr_list *list);
 int __hw_addr_list_snapshot(struct netdev_hw_addr_list *snap,
 			    const struct netdev_hw_addr_list *list,
-			    int addr_len);
+			    int addr_len, struct netdev_hw_addr_list *cache);
 void __hw_addr_list_reconcile(struct netdev_hw_addr_list *real_list,
 			      struct netdev_hw_addr_list *work,
-			      struct netdev_hw_addr_list *ref, int addr_len);
+			      struct netdev_hw_addr_list *ref, int addr_len,
+			      struct netdev_hw_addr_list *cache);
 
 /* Functions used for device addresses handling */
 void dev_addr_mod(struct net_device *dev, unsigned int offset,
diff --git a/net/core/dev.c b/net/core/dev.c
index b37061238a25..8597ec56fd64 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -12088,6 +12088,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
 	mutex_init(&dev->lock);
 	INIT_LIST_HEAD(&dev->rx_mode_node);
+	__hw_addr_init(&dev->rx_mode_addr_cache);
 
 	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
 	setup(dev);
@@ -12192,6 +12193,8 @@ void free_netdev(struct net_device *dev)
 
 	kfree(rcu_dereference_protected(dev->ingress_queue, 1));
 
+	__hw_addr_flush(&dev->rx_mode_addr_cache);
+
 	/* Flush device addresses */
 	dev_addr_flush(dev);
 
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 056bca6fce12..7bab2ed0f625 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -511,30 +511,50 @@ void __hw_addr_init(struct netdev_hw_addr_list *list)
 }
 EXPORT_SYMBOL(__hw_addr_init);
 
+static void __hw_addr_splice(struct netdev_hw_addr_list *dst,
+			     struct netdev_hw_addr_list *src)
+{
+	src->tree = RB_ROOT;
+	list_splice_init(&src->list, &dst->list);
+	dst->count += src->count;
+	src->count = 0;
+}
+
 /**
  *  __hw_addr_list_snapshot - create a snapshot copy of an address list
  *  @snap: destination snapshot list (needs to be __hw_addr_init-initialized)
  *  @list: source address list to snapshot
  *  @addr_len: length of addresses
+ *  @cache: entry cache to reuse entries from; falls back to GFP_ATOMIC
  *
- *  Creates a copy of @list with individually allocated entries suitable
- *  for use with __hw_addr_sync_dev() and other list manipulation helpers.
- *  Each entry is allocated with GFP_ATOMIC; must be called under a spinlock.
+ *  Creates a copy of @list reusing entries from @cache when available.
+ *  Must be called under a spinlock.
  *
  *  Return: 0 on success, -errno on failure.
  */
 int __hw_addr_list_snapshot(struct netdev_hw_addr_list *snap,
 			    const struct netdev_hw_addr_list *list,
-			    int addr_len)
+			    int addr_len, struct netdev_hw_addr_list *cache)
 {
 	struct netdev_hw_addr *ha, *entry;
 
 	list_for_each_entry(ha, &list->list, list) {
-		entry = __hw_addr_create(ha->addr, addr_len, ha->type,
-					 false, false);
-		if (!entry) {
-			__hw_addr_flush(snap);
-			return -ENOMEM;
+		if (cache->count) {
+			entry = list_first_entry(&cache->list,
+						 struct netdev_hw_addr, list);
+			list_del(&entry->list);
+			cache->count--;
+			memcpy(entry->addr, ha->addr, addr_len);
+			entry->type = ha->type;
+			entry->global_use = false;
+			entry->synced = 0;
+		} else {
+			entry = __hw_addr_create(ha->addr, addr_len, ha->type,
+						 false, false);
+			if (!entry) {
+				__hw_addr_flush(snap);
+				return -ENOMEM;
+			}
 		}
 		entry->sync_cnt = ha->sync_cnt;
 		entry->refcount = ha->refcount;
@@ -554,15 +574,17 @@ EXPORT_SYMBOL_IF_KUNIT(__hw_addr_list_snapshot);
  *  @work: the working snapshot (modified by driver via __hw_addr_sync_dev)
  *  @ref: the reference snapshot (untouched copy of original state)
  *  @addr_len: length of addresses
+ *  @cache: entry cache to return snapshot entries to for reuse
  *
  *  Walks the reference snapshot and compares each entry against the work
  *  snapshot to compute sync_cnt deltas. Applies those deltas to @real_list.
- *  Frees both snapshots when done.
+ *  Returns snapshot entries to @cache for reuse; frees both snapshots.
  *  Caller must hold netif_addr_lock_bh.
  */
 void __hw_addr_list_reconcile(struct netdev_hw_addr_list *real_list,
 			      struct netdev_hw_addr_list *work,
-			      struct netdev_hw_addr_list *ref, int addr_len)
+			      struct netdev_hw_addr_list *ref, int addr_len,
+			      struct netdev_hw_addr_list *cache)
 {
 	struct netdev_hw_addr *ref_ha, *tmp, *work_ha, *real_ha;
 	int delta;
@@ -611,8 +633,8 @@ void __hw_addr_list_reconcile(struct netdev_hw_addr_list *real_list,
 		}
 	}
 
-	__hw_addr_flush(work);
-	__hw_addr_flush(ref);
+	__hw_addr_splice(cache, work);
+	__hw_addr_splice(cache, ref);
 }
 EXPORT_SYMBOL_IF_KUNIT(__hw_addr_list_reconcile);
 
@@ -1173,14 +1195,18 @@ static int netif_addr_lists_snapshot(struct net_device *dev,
 {
 	int err;
 
-	err = __hw_addr_list_snapshot(uc_snap, &dev->uc, dev->addr_len);
+	err = __hw_addr_list_snapshot(uc_snap, &dev->uc, dev->addr_len,
+				      &dev->rx_mode_addr_cache);
 	if (!err)
-		err = __hw_addr_list_snapshot(uc_ref, &dev->uc, dev->addr_len);
+		err = __hw_addr_list_snapshot(uc_ref, &dev->uc, dev->addr_len,
+					      &dev->rx_mode_addr_cache);
 	if (!err)
 		err = __hw_addr_list_snapshot(mc_snap, &dev->mc,
-					      dev->addr_len);
+					      dev->addr_len,
+					      &dev->rx_mode_addr_cache);
 	if (!err)
-		err = __hw_addr_list_snapshot(mc_ref, &dev->mc, dev->addr_len);
+		err = __hw_addr_list_snapshot(mc_ref, &dev->mc, dev->addr_len,
+					      &dev->rx_mode_addr_cache);
 
 	if (err) {
 		__hw_addr_flush(uc_snap);
@@ -1197,8 +1223,10 @@ static void netif_addr_lists_reconcile(struct net_device *dev,
 				       struct netdev_hw_addr_list *uc_ref,
 				       struct netdev_hw_addr_list *mc_ref)
 {
-	__hw_addr_list_reconcile(&dev->uc, uc_snap, uc_ref, dev->addr_len);
-	__hw_addr_list_reconcile(&dev->mc, mc_snap, mc_ref, dev->addr_len);
+	__hw_addr_list_reconcile(&dev->uc, uc_snap, uc_ref, dev->addr_len,
+				 &dev->rx_mode_addr_cache);
+	__hw_addr_list_reconcile(&dev->mc, mc_snap, mc_ref, dev->addr_len,
+				 &dev->rx_mode_addr_cache);
 }
 
 static void netif_rx_mode_run(struct net_device *dev)
diff --git a/net/core/dev_addr_lists_test.c b/net/core/dev_addr_lists_test.c
index fba926d5ec0d..260e71a2399f 100644
--- a/net/core/dev_addr_lists_test.c
+++ b/net/core/dev_addr_lists_test.c
@@ -251,8 +251,8 @@ static void dev_addr_test_add_excl(struct kunit *test)
  */
 static void dev_addr_test_snapshot_sync(struct kunit *test)
 {
+	struct netdev_hw_addr_list snap, ref, cache;
 	struct net_device *netdev = test->priv;
-	struct netdev_hw_addr_list snap, ref;
 	struct dev_addr_test_priv *datp;
 	struct netdev_hw_addr *ha;
 	u8 addr[ETH_ALEN];
@@ -268,10 +268,13 @@ static void dev_addr_test_snapshot_sync(struct kunit *test)
 	netif_addr_lock_bh(netdev);
 	__hw_addr_init(&snap);
 	__hw_addr_init(&ref);
+	__hw_addr_init(&cache);
 	KUNIT_EXPECT_EQ(test, 0,
-			__hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN));
+			__hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN,
+						&cache));
 	KUNIT_EXPECT_EQ(test, 0,
-			__hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN));
+			__hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN,
+						&cache));
 	netif_addr_unlock_bh(netdev);
 
 	/* Driver syncs ADDR_A to hardware */
@@ -283,7 +286,8 @@ static void dev_addr_test_snapshot_sync(struct kunit *test)
 
 	/* Reconcile: delta=+1 applied to real entry */
 	netif_addr_lock_bh(netdev);
-	__hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN);
+	__hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN,
+				 &cache);
 	netif_addr_unlock_bh(netdev);
 
 	/* Real entry should now reflect the sync: sync_cnt=1, refcount=2 */
@@ -301,6 +305,7 @@ static void dev_addr_test_snapshot_sync(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced);
 	KUNIT_EXPECT_EQ(test, 1, netdev->uc.count);
 
+	__hw_addr_flush(&cache);
 	rtnl_unlock();
 }
 
@@ -310,8 +315,8 @@ static void dev_addr_test_snapshot_sync(struct kunit *test)
  */
 static void dev_addr_test_snapshot_remove_during_sync(struct kunit *test)
 {
+	struct netdev_hw_addr_list snap, ref, cache;
 	struct net_device *netdev = test->priv;
-	struct netdev_hw_addr_list snap, ref;
 	struct dev_addr_test_priv *datp;
 	struct netdev_hw_addr *ha;
 	u8 addr[ETH_ALEN];
@@ -327,10 +332,13 @@ static void dev_addr_test_snapshot_remove_during_sync(struct kunit *test)
 	netif_addr_lock_bh(netdev);
 	__hw_addr_init(&snap);
 	__hw_addr_init(&ref);
+	__hw_addr_init(&cache);
 	KUNIT_EXPECT_EQ(test, 0,
-			__hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN));
+			__hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN,
+						&cache));
 	KUNIT_EXPECT_EQ(test, 0,
-			__hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN));
+			__hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN,
+						&cache));
 	netif_addr_unlock_bh(netdev);
 
 	/* Driver syncs ADDR_A to hardware */
@@ -349,7 +357,8 @@ static void dev_addr_test_snapshot_remove_during_sync(struct kunit *test)
 	 * so it gets re-inserted as stale (sync_cnt=1, refcount=1).
 	 */
 	netif_addr_lock_bh(netdev);
-	__hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN);
+	__hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN,
+				 &cache);
 	netif_addr_unlock_bh(netdev);
 
 	KUNIT_EXPECT_EQ(test, 1, netdev->uc.count);
@@ -366,6 +375,7 @@ static void dev_addr_test_snapshot_remove_during_sync(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_unsynced);
 	KUNIT_EXPECT_EQ(test, 0, netdev->uc.count);
 
+	__hw_addr_flush(&cache);
 	rtnl_unlock();
 }
 
@@ -376,8 +386,8 @@ static void dev_addr_test_snapshot_remove_during_sync(struct kunit *test)
  */
 static void dev_addr_test_snapshot_readd_during_unsync(struct kunit *test)
 {
+	struct netdev_hw_addr_list snap, ref, cache;
 	struct net_device *netdev = test->priv;
-	struct netdev_hw_addr_list snap, ref;
 	struct dev_addr_test_priv *datp;
 	struct netdev_hw_addr *ha;
 	u8 addr[ETH_ALEN];
@@ -403,10 +413,13 @@ static void dev_addr_test_snapshot_readd_during_unsync(struct kunit *test)
 	netif_addr_lock_bh(netdev);
 	__hw_addr_init(&snap);
 	__hw_addr_init(&ref);
+	__hw_addr_init(&cache);
 	KUNIT_EXPECT_EQ(test, 0,
-			__hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN));
+			__hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN,
+						&cache));
 	KUNIT_EXPECT_EQ(test, 0,
-			__hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN));
+			__hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN,
+						&cache));
 	netif_addr_unlock_bh(netdev);
 
 	/* Driver unsyncs stale ADDR_A from hardware */
@@ -426,7 +439,8 @@ static void dev_addr_test_snapshot_readd_during_unsync(struct kunit *test)
 	 * applied. Result: sync_cnt=0, refcount=1 (fresh).
 	 */
 	netif_addr_lock_bh(netdev);
-	__hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN);
+	__hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN,
+				 &cache);
 	netif_addr_unlock_bh(netdev);
 
 	/* Entry survives as fresh: needs re-sync to HW */
@@ -443,6 +457,7 @@ static void dev_addr_test_snapshot_readd_during_unsync(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_synced);
 	KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced);
 
+	__hw_addr_flush(&cache);
 	rtnl_unlock();
 }
 
@@ -452,8 +467,8 @@ static void dev_addr_test_snapshot_readd_during_unsync(struct kunit *test)
  */
 static void dev_addr_test_snapshot_add_and_remove(struct kunit *test)
 {
+	struct netdev_hw_addr_list snap, ref, cache;
 	struct net_device *netdev = test->priv;
-	struct netdev_hw_addr_list snap, ref;
 	struct dev_addr_test_priv *datp;
 	struct netdev_hw_addr *ha;
 	u8 addr[ETH_ALEN];
@@ -480,10 +495,13 @@ static void dev_addr_test_snapshot_add_and_remove(struct kunit *test)
 	netif_addr_lock_bh(netdev);
 	__hw_addr_init(&snap);
 	__hw_addr_init(&ref);
+	__hw_addr_init(&cache);
 	KUNIT_EXPECT_EQ(test, 0,
-			__hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN));
+			__hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN,
+						&cache));
 	KUNIT_EXPECT_EQ(test, 0,
-			__hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN));
+			__hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN,
+						&cache));
 	netif_addr_unlock_bh(netdev);
 
 	/* Driver syncs snapshot: ADDR_C is new -> synced; A,B already synced */
@@ -502,7 +520,8 @@ static void dev_addr_test_snapshot_add_and_remove(struct kunit *test)
 	 * so nothing to apply to ADDR_B.
 	 */
 	netif_addr_lock_bh(netdev);
-	__hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN);
+	__hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN,
+				 &cache);
 	netif_addr_unlock_bh(netdev);
 
 	/* ADDR_A: unchanged (sync_cnt=1, refcount=2)
@@ -536,13 +555,14 @@ static void dev_addr_test_snapshot_add_and_remove(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, 1 << ADDR_B, datp->addr_unsynced);
 	KUNIT_EXPECT_EQ(test, 2, netdev->uc.count);
 
+	__hw_addr_flush(&cache);
 	rtnl_unlock();
 }
 
 static void dev_addr_test_snapshot_benchmark(struct kunit *test)
 {
 	struct net_device *netdev = test->priv;
-	struct netdev_hw_addr_list snap;
+	struct netdev_hw_addr_list snap, cache;
 	u8 addr[ETH_ALEN];
 	s64 duration = 0;
 	ktime_t start;
@@ -557,6 +577,8 @@ static void dev_addr_test_snapshot_benchmark(struct kunit *test)
 		KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr));
 	}
 
+	__hw_addr_init(&cache);
+
 	for (iter = 0; iter < 1000; iter++) {
 		netif_addr_lock_bh(netdev);
 		__hw_addr_init(&snap);
@@ -564,13 +586,15 @@ static void dev_addr_test_snapshot_benchmark(struct kunit *test)
 		start = ktime_get();
 		KUNIT_EXPECT_EQ(test, 0,
 				__hw_addr_list_snapshot(&snap, &netdev->uc,
-							ETH_ALEN));
+							ETH_ALEN, &cache));
 		duration += ktime_to_ns(ktime_sub(ktime_get(), start));
 
 		netif_addr_unlock_bh(netdev);
 		__hw_addr_flush(&snap);
 	}
 
+	__hw_addr_flush(&cache);
+
 	kunit_info(test,
 		   "1024 addrs x 1000 snapshots: %lld ns total, %lld ns/iter",
 		   duration, div_s64(duration, 1000));
-- 
cgit v1.2.3


From 922f8c28811f266fe5fc52a6d2852871e40ce098 Mon Sep 17 00:00:00 2001
From: Dewei Meng <mengdewei@cqsoftware.com.cn>
Date: Tue, 21 Apr 2026 10:58:08 +0800
Subject: spi: Fix the error description in the `ptp_sts_word_post` comment

Based on the comment information, the description within the
`ptp_sts_word_post` section should be changed to "See @ptp_sts_word_pre".

Signed-off-by: Dewei Meng <mengdewei@cqsoftware.com.cn>
Link: https://patch.msgid.link/20260421025808.6572-1-mengdewei@cqsoftware.com.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 7587b1c5d7ec..82682dd9961d 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -1019,7 +1019,7 @@ struct spi_res {
  *	this value may have changed compared to what was requested, depending
  *	on the available snapshotting resolution (DMA transfer,
  *	@ptp_sts_supported is false, etc).
- * @ptp_sts_word_post: See @ptp_sts_word_post. The two can be equal (meaning
+ * @ptp_sts_word_post: See @ptp_sts_word_pre. The two can be equal (meaning
  *	that a single byte should be snapshotted).
  *	If the core takes care of the timestamp (if @ptp_sts_supported is false
  *	for this controller), it will set @ptp_sts_word_pre to 0, and
-- 
cgit v1.2.3


From 256e5254efff48d6de97e314dc17d55504c55164 Mon Sep 17 00:00:00 2001
From: Kexin Sun <kexinsun@smail.nju.edu.cn>
Date: Tue, 24 Mar 2026 11:23:44 +0800
Subject: kgdb: update outdated references to kgdb_wait()

The function kgdb_wait() was folded into the static function
kgdb_cpu_enter() by commit 62fae312197a ("kgdb: eliminate
kgdb_wait(), all cpus enter the same way").  Update the four stale
references accordingly:

 - include/linux/kgdb.h and arch/x86/kernel/kgdb.c: the
   kgdb_roundup_cpus() kdoc describes what other CPUs are rounded up
   to call.  Because kgdb_cpu_enter() is static, the correct public
   entry point is kgdb_handle_exception(); also fix a pre-existing
   grammar error ("get them be" -> "get them into") and reflow the
   text.

 - kernel/debug/debug_core.c: replace with the generic description
   "the debug trap handler", since the actual entry path is
   architecture-specific.

 - kernel/debug/gdbstub.c: kgdb_cpu_enter() is correct here (it
   describes internal state, not a call target); add the missing
   parentheses.

Suggested-by: Daniel Thompson <daniel@riscstar.com>
Assisted-by: unnamed:deepseek-v3.2 coccinelle
Signed-off-by: Kexin Sun <kexinsun@smail.nju.edu.cn>
---
 arch/x86/kernel/kgdb.c    | 9 +++++----
 include/linux/kgdb.h      | 7 ++++---
 kernel/debug/debug_core.c | 2 +-
 kernel/debug/gdbstub.c    | 2 +-
 4 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8b1a9733d13e..96af1242454e 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -407,10 +407,11 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs)
  *	kgdb_roundup_cpus - Get other CPUs into a holding pattern
  *
  *	On SMP systems, we need to get the attention of the other CPUs
- *	and get them be in a known state.  This should do what is needed
- *	to get the other CPUs to call kgdb_wait(). Note that on some arches,
- *	the NMI approach is not used for rounding up all the CPUs. For example,
- *	in case of MIPS, smp_call_function() is used to roundup CPUs.
+ *	and get them into a known state.  This should do what is needed
+ *	to get the other CPUs to call kgdb_handle_exception().  Note that
+ *	on some arches, the NMI approach is not used for rounding up all
+ *	the CPUs.  For example, in case of MIPS, smp_call_function() is
+ *	used to roundup CPUs.
  *
  *	On non-SMP systems, this is not called.
  */
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 22b3f3839f30..6c46591a2eac 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -202,9 +202,10 @@ extern void kgdb_call_nmi_hook(void *ignored);
  *
  *	On SMP systems, we need to get the attention of the other CPUs
  *	and get them into a known state.  This should do what is needed
- *	to get the other CPUs to call kgdb_wait(). Note that on some arches,
- *	the NMI approach is not used for rounding up all the CPUs.  Normally
- *	those architectures can just not implement this and get the default.
+ *	to get the other CPUs to call kgdb_handle_exception().  Note that
+ *	on some arches, the NMI approach is not used for rounding up all
+ *	the CPUs.  Normally those architectures can just not implement
+ *	this and get the default.
  *
  *	On non-SMP systems, this is not called.
  */
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 0b9495187fba..b276504c1c6b 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -704,7 +704,7 @@ return_normal:
 	if (ks->send_ready)
 		atomic_set(ks->send_ready, 1);
 
-	/* Signal the other CPUs to enter kgdb_wait() */
+	/* Signal the other CPUs to enter the debug trap handler */
 	else if ((!kgdb_single_step) && kgdb_do_roundup)
 		kgdb_roundup_cpus();
 #endif
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index f586afd76c80..e271a436d60e 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -517,7 +517,7 @@ static void gdb_get_regs_helper(struct kgdb_state *ks)
 	/*
 	 * All threads that don't have debuggerinfo should be
 	 * in schedule() sleeping, since all other CPUs
-	 * are in kgdb_wait, and thus have debuggerinfo.
+	 * are in kgdb_cpu_enter(), and thus have debuggerinfo.
 	 */
 	if (local_debuggerinfo) {
 		pt_regs_to_gdb_regs(gdb_regs, local_debuggerinfo);
-- 
cgit v1.2.3


From 6f1d4d2ecfcd1b577dc87350ea965fe81f272e83 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 22 Mar 2024 14:22:48 +0100
Subject: tpm: avoid -Wunused-but-set-variable

Outside of the EFI tpm code, the TPM_MEMREMAP()/TPM_MEMUNMAP functions are
defined as trivial macros, leading to the mapping_size variable ending
up unused:

In file included from drivers/char/tpm/tpm-sysfs.c:16:
In file included from drivers/char/tpm/tpm.h:28:
include/linux/tpm_eventlog.h:167:6: error: variable 'mapping_size' set but not used [-Werror,-Wunused-but-set-variable]
  167 |         int mapping_size;

Turn the stubs into inline functions to avoid this warning.

Cc: stable@vger.kernel.org # v5.3+
Fixes: c46f3405692d ("tpm: Reserve the TPM final events table")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Thorsten Blum <thorsten.blum@linux.dev>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 include/linux/tpm_eventlog.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
index 891368e82558..aff8ea2fa98e 100644
--- a/include/linux/tpm_eventlog.h
+++ b/include/linux/tpm_eventlog.h
@@ -131,11 +131,16 @@ struct tcg_algorithm_info {
 };
 
 #ifndef TPM_MEMREMAP
-#define TPM_MEMREMAP(start, size) NULL
+static inline void *TPM_MEMREMAP(unsigned long start, size_t size)
+{
+	return NULL;
+}
 #endif
 
 #ifndef TPM_MEMUNMAP
-#define TPM_MEMUNMAP(start, size) do{} while(0)
+static inline void TPM_MEMUNMAP(void *mapping, size_t size)
+{
+}
 #endif
 
 /**
-- 
cgit v1.2.3


From 5d3869a41f3608101c00ff9c9c7c2364c555fa65 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <okorniev@redhat.com>
Date: Mon, 13 Apr 2026 18:24:23 -0400
Subject: NFS: fix writeback in presence of errors

After running xfstest generic/751, in certain conditions, can have
a writeback IO stuck while experiencing one of the two patterns.

Pattern#1: writeback IO experiences ENOSPC on an offset smaller
than the filesize. Example,
write offset=0 len=4096 how=unstable OK
write offset=8192 len=4096 how=unstable OK
write offset=12288 len=4096 how=unstable ENOSPC
write offset=4096 len=4096 how=unstable ENOSPC
client sends a commit and receives a verifier which is different
from the last successful write. It marks pages dirty and writeback
retries. But it again send writes unstable and gets into the same
pattern, running into the ENOSPC error and sending a commit because
writes were sent at unstable.

Pattern#2: an unstable write followed by a short write and ENOSPC.
write offset=0 len=4096 how=unstable OK
write offset=4096 len=4096 how=unstable returns OK but count=100
write offset=4197 len=3996 how=stable returns ENOSPC
client send a commit and receives a verifier different from
the last unstable write. The same behaviour is retried in a loop.

Instead, this patch proposes to identify those conditions and mark
requests to be done synchronously instead. Previous solution tried
to mark it in the nfs_page, however that's not persistent thus
instead mark it in the nfs_open_context.

Furthermore, the same problem occurs during localio code path so
recognize that IO needs to be done sync in that case as well.

Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/localio.c       | 15 ++++++++++++++-
 fs/nfs/pagelist.c      |  3 +++
 fs/nfs/write.c         |  9 +++++++++
 include/linux/nfs_fs.h |  1 +
 4 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index 4c7d16a99ed6..e55c5977fcc3 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -865,6 +865,8 @@ static void nfs_local_call_write(struct work_struct *work)
 	file_start_write(filp);
 	n_iters = atomic_read(&iocb->n_iters);
 	for (int i = 0; i < n_iters ; i++) {
+		size_t icount;
+
 		if (iocb->iter_is_dio_aligned[i]) {
 			iocb->kiocb.ki_flags |= IOCB_DIRECT;
 			/* Only use AIO completion if DIO-aligned segment is last */
@@ -881,8 +883,16 @@ static void nfs_local_call_write(struct work_struct *work)
 		if (status == -EIOCBQUEUED)
 			continue;
 		/* Break on completion, errors, or short writes */
+		icount = iov_iter_count(&iocb->iters[i]);
 		if (nfs_local_pgio_done(iocb, status) || status < 0 ||
-		    (size_t)status < iov_iter_count(&iocb->iters[i])) {
+		    (size_t)status < icount) {
+			if ((size_t)status < icount) {
+				struct nfs_lock_context *ctx =
+					iocb->hdr->req->wb_lock_context;
+
+				set_bit(NFS_CONTEXT_WRITE_SYNC,
+					&ctx->open_context->flags);
+			}
 			nfs_local_write_iocb_done(iocb);
 			break;
 		}
@@ -901,6 +911,9 @@ static void nfs_local_do_write(struct nfs_local_kiocb *iocb,
 		__func__, hdr->args.count, hdr->args.offset,
 		(hdr->args.stable == NFS_UNSTABLE) ?  "unstable" : "stable");
 
+	if (test_bit(NFS_CONTEXT_WRITE_SYNC,
+		     &hdr->req->wb_lock_context->open_context->flags))
+		hdr->args.stable = NFS_FILE_SYNC;
 	switch (hdr->args.stable) {
 	default:
 		break;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a9373de891c9..4a87b2fdb2e6 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -1186,6 +1186,9 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 
 	nfs_page_group_lock(req);
 
+	if (test_bit(NFS_CONTEXT_WRITE_SYNC,
+		     &req->wb_lock_context->open_context->flags))
+		desc->pg_ioflags |= FLUSH_STABLE;
 	subreq = req;
 	subreq_size = subreq->wb_bytes;
 	for(;;) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f1f62787dd74..f224b73fa30e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -927,9 +927,13 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 			goto remove_req;
 		}
 		if (nfs_write_need_commit(hdr)) {
+			struct nfs_open_context *ctx =
+				hdr->req->wb_lock_context->open_context;
+
 			/* Reset wb_nio, since the write was successful. */
 			req->wb_nio = 0;
 			memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
+			clear_bit(NFS_CONTEXT_WRITE_SYNC, &ctx->flags);
 			nfs_mark_request_commit(req, hdr->lseg, &cinfo,
 				hdr->ds_commit_idx);
 			goto next;
@@ -1553,7 +1557,10 @@ static void nfs_writeback_result(struct rpc_task *task,
 
 	if (resp->count < argp->count && !list_empty(&hdr->pages)) {
 		static unsigned long    complain;
+		struct nfs_open_context *ctx =
+			hdr->req->wb_lock_context->open_context;
 
+		set_bit(NFS_CONTEXT_WRITE_SYNC, &ctx->flags);
 		/* This a short write! */
 		nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
 
@@ -1837,6 +1844,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 		/* We have a mismatch. Write the page again */
 		dprintk(" mismatch\n");
 		nfs_mark_request_dirty(req);
+		set_bit(NFS_CONTEXT_WRITE_SYNC,
+			&req->wb_lock_context->open_context->flags);
 		atomic_long_inc(&NFS_I(data->inode)->redirtied_pages);
 	next:
 		nfs_unlock_and_release_request(req);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 8dd79a3f3d66..4623262da3c0 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -109,6 +109,7 @@ struct nfs_open_context {
 #define NFS_CONTEXT_BAD			(2)
 #define NFS_CONTEXT_UNLOCK	(3)
 #define NFS_CONTEXT_FILE_OPEN		(4)
+#define NFS_CONTEXT_WRITE_SYNC		(5)
 
 	struct nfs4_threshold	*mdsthreshold;
 	struct list_head list;
-- 
cgit v1.2.3


From a6e23843e949081b417b6078f02074074a190499 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 10 Apr 2026 17:49:07 +0200
Subject: spi: fix controller cleanup() documentation

The controller cleanup() callback is no longer called when releasing a
device, but rather when deregistering it (and on registration failures).

Fixes: c7299fea6769 ("spi: Fix spi device unregister flow")
Cc: Saravana Kannan <saravanak@kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Link: https://patch.msgid.link/20260410154907.129248-3-johan@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 7587b1c5d7ec..bbb5b870baeb 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -701,7 +701,7 @@ struct spi_controller {
 	int			(*transfer)(struct spi_device *spi,
 						struct spi_message *mesg);
 
-	/* Called on release() to free memory provided by spi_controller */
+	/* Called on deregistration to free memory provided by spi_controller */
 	void			(*cleanup)(struct spi_device *spi);
 
 	/*
-- 
cgit v1.2.3


From fc69decc811b155a0ed8eef17ee940f28c4f6dbc Mon Sep 17 00:00:00 2001
From: Longxuan Yu <ylong030@ucr.edu>
Date: Mon, 20 Apr 2026 11:18:45 +0800
Subject: 8021q: use RCU for egress QoS mappings

The TX fast path and reporting paths walk egress QoS mappings without
RTNL. Convert the mapping lists to RCU-protected pointers, use RCU
reader annotations in readers, and defer freeing mapping nodes with an
embedded rcu_head.

This prepares the egress QoS mapping code for safe removal of mapping
nodes in a follow-up change while preserving the current behavior.

Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Longxuan Yu <ylong030@ucr.edu>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
Link: https://patch.msgid.link/9136768189f8c6d3f824f476c62d2fa1111688e8.1776647968.git.yuantan098@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/if_vlan.h  | 25 ++++++++++++++++---------
 net/8021q/vlan_dev.c     | 31 ++++++++++++++++---------------
 net/8021q/vlan_netlink.c | 10 ++++++----
 net/8021q/vlanproc.c     | 12 ++++++++----
 4 files changed, 46 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index e6272f9c5e42..20cc16ea4e5a 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -147,11 +147,13 @@ extern __be16 vlan_dev_vlan_proto(const struct net_device *dev);
  *	@priority: skb priority
  *	@vlan_qos: vlan priority: (skb->priority << 13) & 0xE000
  *	@next: pointer to next struct
+ *	@rcu: used for deferred freeing of mapping nodes
  */
 struct vlan_priority_tci_mapping {
 	u32					priority;
 	u16					vlan_qos;
-	struct vlan_priority_tci_mapping	*next;
+	struct vlan_priority_tci_mapping __rcu	*next;
+	struct rcu_head			rcu;
 };
 
 struct proc_dir_entry;
@@ -177,7 +179,7 @@ struct vlan_dev_priv {
 	unsigned int				nr_ingress_mappings;
 	u32					ingress_priority_map[8];
 	unsigned int				nr_egress_mappings;
-	struct vlan_priority_tci_mapping	*egress_priority_map[16];
+	struct vlan_priority_tci_mapping __rcu	*egress_priority_map[16];
 
 	__be16					vlan_proto;
 	u16					vlan_id;
@@ -209,19 +211,24 @@ static inline u16
 vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
 {
 	struct vlan_priority_tci_mapping *mp;
+	u16 vlan_qos = 0;
 
-	smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */
+	rcu_read_lock();
 
-	mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)];
+	mp = rcu_dereference(vlan_dev_priv(dev)->egress_priority_map[skprio & 0xF]);
 	while (mp) {
 		if (mp->priority == skprio) {
-			return mp->vlan_qos; /* This should already be shifted
-					      * to mask correctly with the
-					      * VLAN's TCI */
+			vlan_qos = READ_ONCE(mp->vlan_qos);
+			break;
 		}
-		mp = mp->next;
+		mp = rcu_dereference(mp->next);
 	}
-	return 0;
+	rcu_read_unlock();
+
+	/* This should already be shifted to mask correctly with
+	 * the VLAN's TCI.
+	 */
+	return vlan_qos;
 }
 
 extern bool vlan_do_receive(struct sk_buff **skb);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c40f7d5c4fca..a5340932b657 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -172,39 +172,34 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 				 u32 skb_prio, u16 vlan_prio)
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
-	struct vlan_priority_tci_mapping *mp = NULL;
+	struct vlan_priority_tci_mapping *mp;
 	struct vlan_priority_tci_mapping *np;
+	u32 bucket = skb_prio & 0xF;
 	u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
 
 	/* See if a priority mapping exists.. */
-	mp = vlan->egress_priority_map[skb_prio & 0xF];
+	mp = rtnl_dereference(vlan->egress_priority_map[bucket]);
 	while (mp) {
 		if (mp->priority == skb_prio) {
 			if (mp->vlan_qos && !vlan_qos)
 				vlan->nr_egress_mappings--;
 			else if (!mp->vlan_qos && vlan_qos)
 				vlan->nr_egress_mappings++;
-			mp->vlan_qos = vlan_qos;
+			WRITE_ONCE(mp->vlan_qos, vlan_qos);
 			return 0;
 		}
-		mp = mp->next;
+		mp = rtnl_dereference(mp->next);
 	}
 
 	/* Create a new mapping then. */
-	mp = vlan->egress_priority_map[skb_prio & 0xF];
 	np = kmalloc_obj(struct vlan_priority_tci_mapping);
 	if (!np)
 		return -ENOBUFS;
 
-	np->next = mp;
 	np->priority = skb_prio;
 	np->vlan_qos = vlan_qos;
-	/* Before inserting this element in hash table, make sure all its fields
-	 * are committed to memory.
-	 * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask()
-	 */
-	smp_wmb();
-	vlan->egress_priority_map[skb_prio & 0xF] = np;
+	RCU_INIT_POINTER(np->next, rtnl_dereference(vlan->egress_priority_map[bucket]));
+	rcu_assign_pointer(vlan->egress_priority_map[bucket], np);
 	if (vlan_qos)
 		vlan->nr_egress_mappings++;
 	return 0;
@@ -604,11 +599,17 @@ void vlan_dev_free_egress_priority(const struct net_device *dev)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
-		while ((pm = vlan->egress_priority_map[i]) != NULL) {
-			vlan->egress_priority_map[i] = pm->next;
-			kfree(pm);
+		pm = rtnl_dereference(vlan->egress_priority_map[i]);
+		RCU_INIT_POINTER(vlan->egress_priority_map[i], NULL);
+		while (pm) {
+			struct vlan_priority_tci_mapping *next;
+
+			next = rtnl_dereference(pm->next);
+			kfree_rcu(pm, rcu);
+			pm = next;
 		}
 	}
+	vlan->nr_egress_mappings = 0;
 }
 
 static void vlan_dev_uninit(struct net_device *dev)
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index a000b1ef0520..a5b16833e2ce 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -260,13 +260,15 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			goto nla_put_failure;
 
 		for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
-			for (pm = vlan->egress_priority_map[i]; pm;
-			     pm = pm->next) {
-				if (!pm->vlan_qos)
+			for (pm = rcu_dereference_rtnl(vlan->egress_priority_map[i]); pm;
+			     pm = rcu_dereference_rtnl(pm->next)) {
+				u16 vlan_qos = READ_ONCE(pm->vlan_qos);
+
+				if (!vlan_qos)
 					continue;
 
 				m.from = pm->priority;
-				m.to   = (pm->vlan_qos >> 13) & 0x7;
+				m.to   = (vlan_qos >> 13) & 0x7;
 				if (nla_put(skb, IFLA_VLAN_QOS_MAPPING,
 					    sizeof(m), &m))
 					goto nla_put_failure;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index fa67374bda49..0e424e0895b7 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -262,15 +262,19 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 		   vlan->ingress_priority_map[7]);
 
 	seq_printf(seq, " EGRESS priority mappings: ");
+	rcu_read_lock();
 	for (i = 0; i < 16; i++) {
-		const struct vlan_priority_tci_mapping *mp
-			= vlan->egress_priority_map[i];
+		const struct vlan_priority_tci_mapping *mp =
+			rcu_dereference(vlan->egress_priority_map[i]);
 		while (mp) {
+			u16 vlan_qos = READ_ONCE(mp->vlan_qos);
+
 			seq_printf(seq, "%u:%d ",
-				   mp->priority, ((mp->vlan_qos >> 13) & 0x7));
-			mp = mp->next;
+				   mp->priority, ((vlan_qos >> 13) & 0x7));
+			mp = rcu_dereference(mp->next);
 		}
 	}
+	rcu_read_unlock();
 	seq_puts(seq, "\n");
 
 	return 0;
-- 
cgit v1.2.3


From 6d5431555de032f5ad9e08a7fb372f37bf493903 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 16 Apr 2026 11:28:28 -0700
Subject: caif: remove CAIF NETWORK LAYER

Remove CAIF (Communication CPU to Application CPU Interface), the
ST-Ericsson modem protocol. The subsystem has been orphaned since 2013.
The last meaningful changes from the maintainers were in March 2013:
  a8c7687bf216 ("caif_virtio: Check that vringh_config is not null")
  b2273be8d2df ("caif_virtio: Use vringh_notify_enable correctly")
  0d2e1a2926b1 ("caif_virtio: Introduce caif over virtio")

Not-so-coincidentally, according to "the Internet" ST-Ericsson officially
shut down its modem joint venture in Aug 2013.

If anyone is using this code please yell!

In the 13 years since, the code has accumulated 200 non-merge commits,
of which 71 were cross-tree API changes, 21 carried Fixes: tags, and
the remaining ~110 were cleanups, doc conversions, treewide refactors,
and one partial removal (caif_hsi, ca75bcf0a83b).

We are still getting fixes to this code, in the last 10 days there were
3 reports on security@ about CAIF that I have been CCed on.

UAPI constants (AF_CAIF, ARPHRD_CAIF, N_CAIF, VIRTIO_ID_CAIF) and the
SELinux classmap entry are intentionally kept for ABI stability.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Linus Walleij <linusw@kernel.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260416182829.1440262-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/caif/caif.rst             |  138 ---
 Documentation/networking/caif/index.rst            |   12 -
 Documentation/networking/caif/linux_caif.rst       |  195 ----
 Documentation/networking/index.rst                 |    1 -
 .../translations/zh_CN/networking/index.rst        |    1 -
 MAINTAINERS                                        |    9 -
 arch/arm/configs/u8500_defconfig                   |    1 -
 drivers/net/Kconfig                                |    2 -
 drivers/net/Makefile                               |    1 -
 drivers/net/caif/Kconfig                           |   33 -
 drivers/net/caif/Makefile                          |    8 -
 drivers/net/caif/caif_serial.c                     |  443 --------
 drivers/net/caif/caif_virtio.c                     |  791 --------------
 include/linux/virtio_caif.h                        |   24 -
 include/net/caif/caif_dev.h                        |  128 ---
 include/net/caif/caif_device.h                     |   55 -
 include/net/caif/caif_layer.h                      |  277 -----
 include/net/caif/cfcnfg.h                          |   90 --
 include/net/caif/cfctrl.h                          |  130 ---
 include/net/caif/cffrml.h                          |   21 -
 include/net/caif/cfmuxl.h                          |   20 -
 include/net/caif/cfpkt.h                           |  232 ----
 include/net/caif/cfserl.h                          |   13 -
 include/net/caif/cfsrvl.h                          |   61 --
 include/uapi/linux/caif/caif_socket.h              |  195 ----
 include/uapi/linux/caif/if_caif.h                  |   35 -
 net/Kconfig                                        |    1 -
 net/Makefile                                       |    1 -
 net/caif/Kconfig                                   |   54 -
 net/caif/Makefile                                  |   16 -
 net/caif/caif_dev.c                                |  586 ----------
 net/caif/caif_socket.c                             | 1114 --------------------
 net/caif/caif_usb.c                                |  216 ----
 net/caif/cfcnfg.c                                  |  612 -----------
 net/caif/cfctrl.c                                  |  631 -----------
 net/caif/cfdbgl.c                                  |   55 -
 net/caif/cfdgml.c                                  |  113 --
 net/caif/cffrml.c                                  |  204 ----
 net/caif/cfmuxl.c                                  |  267 -----
 net/caif/cfpkt_skbuff.c                            |  373 -------
 net/caif/cfrfml.c                                  |  299 ------
 net/caif/cfserl.c                                  |  192 ----
 net/caif/cfsrvl.c                                  |  224 ----
 net/caif/cfutill.c                                 |  104 --
 net/caif/cfveil.c                                  |  101 --
 net/caif/cfvidl.c                                  |   65 --
 net/caif/chnl_net.c                                |  531 ----------
 47 files changed, 8675 deletions(-)
 delete mode 100644 Documentation/networking/caif/caif.rst
 delete mode 100644 Documentation/networking/caif/index.rst
 delete mode 100644 Documentation/networking/caif/linux_caif.rst
 delete mode 100644 drivers/net/caif/Kconfig
 delete mode 100644 drivers/net/caif/Makefile
 delete mode 100644 drivers/net/caif/caif_serial.c
 delete mode 100644 drivers/net/caif/caif_virtio.c
 delete mode 100644 include/linux/virtio_caif.h
 delete mode 100644 include/net/caif/caif_dev.h
 delete mode 100644 include/net/caif/caif_device.h
 delete mode 100644 include/net/caif/caif_layer.h
 delete mode 100644 include/net/caif/cfcnfg.h
 delete mode 100644 include/net/caif/cfctrl.h
 delete mode 100644 include/net/caif/cffrml.h
 delete mode 100644 include/net/caif/cfmuxl.h
 delete mode 100644 include/net/caif/cfpkt.h
 delete mode 100644 include/net/caif/cfserl.h
 delete mode 100644 include/net/caif/cfsrvl.h
 delete mode 100644 include/uapi/linux/caif/caif_socket.h
 delete mode 100644 include/uapi/linux/caif/if_caif.h
 delete mode 100644 net/caif/Kconfig
 delete mode 100644 net/caif/Makefile
 delete mode 100644 net/caif/caif_dev.c
 delete mode 100644 net/caif/caif_socket.c
 delete mode 100644 net/caif/caif_usb.c
 delete mode 100644 net/caif/cfcnfg.c
 delete mode 100644 net/caif/cfctrl.c
 delete mode 100644 net/caif/cfdbgl.c
 delete mode 100644 net/caif/cfdgml.c
 delete mode 100644 net/caif/cffrml.c
 delete mode 100644 net/caif/cfmuxl.c
 delete mode 100644 net/caif/cfpkt_skbuff.c
 delete mode 100644 net/caif/cfrfml.c
 delete mode 100644 net/caif/cfserl.c
 delete mode 100644 net/caif/cfsrvl.c
 delete mode 100644 net/caif/cfutill.c
 delete mode 100644 net/caif/cfveil.c
 delete mode 100644 net/caif/cfvidl.c
 delete mode 100644 net/caif/chnl_net.c

(limited to 'include/linux')

diff --git a/Documentation/networking/caif/caif.rst b/Documentation/networking/caif/caif.rst
deleted file mode 100644
index d922d419c513..000000000000
--- a/Documentation/networking/caif/caif.rst
+++ /dev/null
@@ -1,138 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-.. include:: <isonum.txt>
-
-
-================
-Using Linux CAIF
-================
-
-
-:Copyright: |copy| ST-Ericsson AB 2010
-
-:Author: Sjur Brendeland/ sjur.brandeland@stericsson.com
-
-Start
-=====
-
-If you have compiled CAIF for modules do::
-
-    $modprobe crc_ccitt
-    $modprobe caif
-    $modprobe caif_socket
-    $modprobe chnl_net
-
-
-Preparing the setup with a STE modem
-====================================
-
-If you are working on integration of CAIF you should make sure
-that the kernel is built with module support.
-
-There are some things that need to be tweaked to get the host TTY correctly
-set up to talk to the modem.
-Since the CAIF stack is running in the kernel and we want to use the existing
-TTY, we are installing our physical serial driver as a line discipline above
-the TTY device.
-
-To achieve this we need to install the N_CAIF ldisc from user space.
-The benefit is that we can hook up to any TTY.
-
-The use of Start-of-frame-extension (STX) must also be set as
-module parameter "ser_use_stx".
-
-Normally Frame Checksum is always used on UART, but this is also provided as a
-module parameter "ser_use_fcs".
-
-::
-
-    $ modprobe caif_serial ser_ttyname=/dev/ttyS0 ser_use_stx=yes
-    $ ifconfig caif_ttyS0 up
-
-PLEASE NOTE:
-		There is a limitation in Android shell.
-		It only accepts one argument to insmod/modprobe!
-
-Trouble shooting
-================
-
-There are debugfs parameters provided for serial communication.
-/sys/kernel/debug/caif_serial/<tty-name>/
-
-* ser_state:   Prints the bit-mask status where
-
-  - 0x02 means SENDING, this is a transient state.
-  - 0x10 means FLOW_OFF_SENT, i.e. the previous frame has not been sent
-    and is blocking further send operation. Flow OFF has been propagated
-    to all CAIF Channels using this TTY.
-
-* tty_status: Prints the bit-mask tty status information
-
-  - 0x01 - tty->warned is on.
-  - 0x04 - tty->packed is on.
-  - 0x08 - tty->flow.tco_stopped is on.
-  - 0x10 - tty->hw_stopped is on.
-  - 0x20 - tty->flow.stopped is on.
-
-* last_tx_msg: Binary blob Prints the last transmitted frame.
-
-  This can be printed with::
-
-	$od --format=x1 /sys/kernel/debug/caif_serial/<tty>/last_rx_msg.
-
-  The first two tx messages sent look like this. Note: The initial
-  byte 02 is start of frame extension (STX) used for re-syncing
-  upon errors.
-
-  - Enumeration::
-
-        0000000  02 05 00 00 03 01 d2 02
-                 |  |     |  |  |  |
-                 STX(1)   |  |  |  |
-                    Length(2)|  |  |
-                          Control Channel(1)
-                             Command:Enumeration(1)
-                                Link-ID(1)
-                                    Checksum(2)
-
-  - Channel Setup::
-
-        0000000  02 07 00 00 00 21 a1 00 48 df
-                 |  |     |  |  |  |  |  |
-                 STX(1)   |  |  |  |  |  |
-                    Length(2)|  |  |  |  |
-                          Control Channel(1)
-                             Command:Channel Setup(1)
-                                Channel Type(1)
-                                    Priority and Link-ID(1)
-				      Endpoint(1)
-					  Checksum(2)
-
-* last_rx_msg: Prints the last transmitted frame.
-
-  The RX messages for LinkSetup look almost identical but they have the
-  bit 0x20 set in the command bit, and Channel Setup has added one byte
-  before Checksum containing Channel ID.
-
-  NOTE:
-	Several CAIF Messages might be concatenated. The maximum debug
-	buffer size is 128 bytes.
-
-Error Scenarios
-===============
-
-- last_tx_msg contains channel setup message and last_rx_msg is empty ->
-  The host seems to be able to send over the UART, at least the CAIF ldisc get
-  notified that sending is completed.
-
-- last_tx_msg contains enumeration message and last_rx_msg is empty ->
-  The host is not able to send the message from UART, the tty has not been
-  able to complete the transmit operation.
-
-- if /sys/kernel/debug/caif_serial/<tty>/tty_status is non-zero there
-  might be problems transmitting over UART.
-
-  E.g. host and modem wiring is not correct you will typically see
-  tty_status = 0x10 (hw_stopped) and ser_state = 0x10 (FLOW_OFF_SENT).
-
-  You will probably see the enumeration message in last_tx_message
-  and empty last_rx_message.
diff --git a/Documentation/networking/caif/index.rst b/Documentation/networking/caif/index.rst
deleted file mode 100644
index ec29b6f4bdb4..000000000000
--- a/Documentation/networking/caif/index.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-CAIF
-====
-
-Contents:
-
-.. toctree::
-   :maxdepth: 2
-
-   linux_caif
-   caif
diff --git a/Documentation/networking/caif/linux_caif.rst b/Documentation/networking/caif/linux_caif.rst
deleted file mode 100644
index a0480862ab8c..000000000000
--- a/Documentation/networking/caif/linux_caif.rst
+++ /dev/null
@@ -1,195 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-.. include:: <isonum.txt>
-
-==========
-Linux CAIF
-==========
-
-Copyright |copy| ST-Ericsson AB 2010
-
-:Author: Sjur Brendeland/ sjur.brandeland@stericsson.com
-:License terms: GNU General Public License (GPL) version 2
-
-
-Introduction
-============
-
-CAIF is a MUX protocol used by ST-Ericsson cellular modems for
-communication between Modem and host. The host processes can open virtual AT
-channels, initiate GPRS Data connections, Video channels and Utility Channels.
-The Utility Channels are general purpose pipes between modem and host.
-
-ST-Ericsson modems support a number of transports between modem
-and host. Currently, UART and Loopback are available for Linux.
-
-
-Architecture
-============
-
-The implementation of CAIF is divided into:
-
-* CAIF Socket Layer and GPRS IP Interface.
-* CAIF Core Protocol Implementation
-* CAIF Link Layer, implemented as NET devices.
-
-::
-
-  RTNL
-   !
-   !	      +------+	 +------+
-   !	     +------+!	+------+!
-   !	     !	IP  !!	!Socket!!
-   +-------> !interf!+	! API  !+	<- CAIF Client APIs
-   !	     +------+	+------!
-   !		!	    !
-   !		+-----------+
-   !		      !
-   !		   +------+		<- CAIF Core Protocol
-   !		   ! CAIF !
-   !		   ! Core !
-   !		   +------+
-   !	   +----------!---------+
-   !	   !	      !		!
-   !	+------+   +-----+   +------+
-   +--> ! HSI  !   ! TTY !   ! USB  !	<- Link Layer (Net Devices)
-	+------+   +-----+   +------+
-
-
-
-Implementation
-==============
-
-
-CAIF Core Protocol Layer
-------------------------
-
-CAIF Core layer implements the CAIF protocol as defined by ST-Ericsson.
-It implements the CAIF protocol stack in a layered approach, where
-each layer described in the specification is implemented as a separate layer.
-The architecture is inspired by the design patterns "Protocol Layer" and
-"Protocol Packet".
-
-CAIF structure
-^^^^^^^^^^^^^^
-
-The Core CAIF implementation contains:
-
-      -	Simple implementation of CAIF.
-      -	Layered architecture (a la Streams), each layer in the CAIF
-	specification is implemented in a separate c-file.
-      -	Clients must call configuration function to add PHY layer.
-      -	Clients must implement CAIF layer to consume/produce
-	CAIF payload with receive and transmit functions.
-      -	Clients must call configuration function to add and connect the
-	Client layer.
-      - When receiving / transmitting CAIF Packets (cfpkt), ownership is passed
-	to the called function (except for framing layers' receive function)
-
-Layered Architecture
-====================
-
-The CAIF protocol can be divided into two parts: Support functions and Protocol
-Implementation. The support functions include:
-
-      - CFPKT CAIF Packet. Implementation of CAIF Protocol Packet. The
-	CAIF Packet has functions for creating, destroying and adding content
-	and for adding/extracting header and trailers to protocol packets.
-
-The CAIF Protocol implementation contains:
-
-      - CFCNFG CAIF Configuration layer. Configures the CAIF Protocol
-	Stack and provides a Client interface for adding Link-Layer and
-	Driver interfaces on top of the CAIF Stack.
-
-      - CFCTRL CAIF Control layer. Encodes and Decodes control messages
-	such as enumeration and channel setup. Also matches request and
-	response messages.
-
-      - CFSERVL General CAIF Service Layer functionality; handles flow
-	control and remote shutdown requests.
-
-      - CFVEI CAIF VEI layer. Handles CAIF AT Channels on VEI (Virtual
-	External Interface). This layer encodes/decodes VEI frames.
-
-      - CFDGML CAIF Datagram layer. Handles CAIF Datagram layer (IP
-	traffic), encodes/decodes Datagram frames.
-
-      - CFMUX CAIF Mux layer. Handles multiplexing between multiple
-	physical bearers and multiple channels such as VEI, Datagram, etc.
-	The MUX keeps track of the existing CAIF Channels and
-	Physical Instances and selects the appropriate instance based
-	on Channel-Id and Physical-ID.
-
-      - CFFRML CAIF Framing layer. Handles Framing i.e. Frame length
-	and frame checksum.
-
-      - CFSERL CAIF Serial layer. Handles concatenation/split of frames
-	into CAIF Frames with correct length.
-
-::
-
-		    +---------+
-		    | Config  |
-		    | CFCNFG  |
-		    +---------+
-			 !
-    +---------+	    +---------+	    +---------+
-    |	AT    |	    | Control |	    | Datagram|
-    | CFVEIL  |	    | CFCTRL  |	    | CFDGML  |
-    +---------+	    +---------+	    +---------+
-	   \_____________!______________/
-			 !
-		    +---------+
-		    |	MUX   |
-		    |	      |
-		    +---------+
-		    _____!_____
-		   /	       \
-	    +---------+	    +---------+
-	    | CFFRML  |	    | CFFRML  |
-	    | Framing |	    | Framing |
-	    +---------+	    +---------+
-		 !		!
-	    +---------+	    +---------+
-	    |	      |	    | Serial  |
-	    |	      |	    | CFSERL  |
-	    +---------+	    +---------+
-
-
-In this layered approach the following "rules" apply.
-
-      - All layers embed the same structure "struct cflayer"
-      - A layer does not depend on any other layer's private data.
-      - Layers are stacked by setting the pointers::
-
-		  layer->up , layer->dn
-
-      -	In order to send data upwards, each layer should do::
-
-		 layer->up->receive(layer->up, packet);
-
-      - In order to send data downwards, each layer should do::
-
-		 layer->dn->transmit(layer->dn, packet);
-
-
-CAIF Socket and IP interface
-============================
-
-The IP interface and CAIF socket API are implemented on top of the
-CAIF Core protocol. The IP Interface and CAIF socket have an instance of
-'struct cflayer', just like the CAIF Core protocol stack.
-Net device and Socket implement the 'receive()' function defined by
-'struct cflayer', just like the rest of the CAIF stack. In this way, transmit and
-receive of packets is handled as by the rest of the layers: the 'dn->transmit()'
-function is called in order to transmit data.
-
-Configuration of Link Layer
----------------------------
-The Link Layer is implemented as Linux network devices (struct net_device).
-Payload handling and registration is done using standard Linux mechanisms.
-
-The CAIF Protocol relies on a loss-less link layer without implementing
-retransmission. This implies that packet drops must not happen.
-Therefore a flow-control mechanism is implemented where the physical
-interface can initiate flow stop for all CAIF Channels.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index c2406bd8ae0b..2e946924ad3f 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -17,7 +17,6 @@ Contents:
    diagnostic/index
    dsa/index
    devlink/index
-   caif/index
    ethtool-netlink
    ieee802154
    iso15765-2
diff --git a/Documentation/translations/zh_CN/networking/index.rst b/Documentation/translations/zh_CN/networking/index.rst
index c276c0993c51..333e9f6cafff 100644
--- a/Documentation/translations/zh_CN/networking/index.rst
+++ b/Documentation/translations/zh_CN/networking/index.rst
@@ -42,7 +42,6 @@ Todolist:
 *   diagnostic/index
 *   dsa/index
 *   devlink/index
-*   caif/index
 *   ethtool-netlink
 *   ieee802154
 *   iso15765-2
diff --git a/MAINTAINERS b/MAINTAINERS
index e7dc9e6fad2e..2b1b5e93c272 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5674,15 +5674,6 @@ T:	git git://linuxtv.org/media.git
 F:	Documentation/admin-guide/media/cafe_ccic*
 F:	drivers/media/platform/marvell/
 
-CAIF NETWORK LAYER
-L:	netdev@vger.kernel.org
-S:	Orphan
-F:	Documentation/networking/caif/
-F:	drivers/net/caif/
-F:	include/net/caif/
-F:	include/uapi/linux/caif/
-F:	net/caif/
-
 CAKE QDISC
 M:	Toke Høiland-Jørgensen <toke@toke.dk>
 L:	cake@lists.bufferbloat.net (moderated for non-subscribers)
diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig
index e88533b78327..de4af7c750ca 100644
--- a/arch/arm/configs/u8500_defconfig
+++ b/arch/arm/configs/u8500_defconfig
@@ -37,7 +37,6 @@ CONFIG_CFG80211=y
 CONFIG_CFG80211_DEBUGFS=y
 CONFIG_MAC80211=y
 CONFIG_MAC80211_LEDS=y
-CONFIG_CAIF=y
 CONFIG_NFC=m
 CONFIG_NFC_HCI=m
 CONFIG_NFC_SHDLC=y
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index edaab759dc50..8ec98f6dfef9 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -503,8 +503,6 @@ source "drivers/net/arcnet/Kconfig"
 
 source "drivers/atm/Kconfig"
 
-source "drivers/net/caif/Kconfig"
-
 source "drivers/net/dsa/Kconfig"
 
 source "drivers/net/ethernet/Kconfig"
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 5b01215f6829..3b2d28127634 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -48,7 +48,6 @@ obj-$(CONFIG_MHI_NET) += mhi_net.o
 # Networking Drivers
 #
 obj-$(CONFIG_ARCNET) += arcnet/
-obj-$(CONFIG_CAIF) += caif/
 obj-$(CONFIG_CAN) += can/
 ifdef CONFIG_NET_DSA
 obj-y += dsa/
diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig
deleted file mode 100644
index 709660cb38f8..000000000000
--- a/drivers/net/caif/Kconfig
+++ /dev/null
@@ -1,33 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# CAIF physical drivers
-#
-
-menuconfig CAIF_DRIVERS
-	bool "CAIF transport drivers"
-	depends on CAIF
-	help
-	  Enable this to see CAIF physical drivers.
-
-if CAIF_DRIVERS
-
-config CAIF_TTY
-	tristate "CAIF TTY transport driver"
-	depends on CAIF && TTY
-	default n
-	help
-	  The CAIF TTY transport driver is a Line Discipline (ldisc)
-	  identified as N_CAIF. When this ldisc is opened from user space
-	  it will redirect the TTY's traffic into the CAIF stack.
-
-config CAIF_VIRTIO
-	tristate "CAIF virtio transport driver"
-	depends on CAIF && HAS_DMA
-	select VHOST_RING
-	select VIRTIO
-	select GENERIC_ALLOCATOR
-	default n
-	help
-	  The CAIF driver for CAIF over Virtio.
-
-endif # CAIF_DRIVERS
diff --git a/drivers/net/caif/Makefile b/drivers/net/caif/Makefile
deleted file mode 100644
index 97f664f8016c..000000000000
--- a/drivers/net/caif/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-ccflags-$(CONFIG_CAIF_DEBUG) := -DDEBUG
-
-# Serial interface
-obj-$(CONFIG_CAIF_TTY) += caif_serial.o
-
-# Virtio interface
-obj-$(CONFIG_CAIF_VIRTIO) += caif_virtio.o
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
deleted file mode 100644
index 1873d8287bb9..000000000000
--- a/drivers/net/caif/caif_serial.c
+++ /dev/null
@@ -1,443 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#include <linux/hardirq.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/rtnetlink.h>
-#include <linux/tty.h>
-#include <linux/file.h>
-#include <linux/if_arp.h>
-#include <net/caif/caif_device.h>
-#include <net/caif/cfcnfg.h>
-#include <linux/err.h>
-#include <linux/debugfs.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Sjur Brendeland");
-MODULE_DESCRIPTION("CAIF serial device TTY line discipline");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_LDISC(N_CAIF);
-
-#define SEND_QUEUE_LOW 10
-#define SEND_QUEUE_HIGH 100
-#define CAIF_SENDING	        1 /* Bit 1 = 0x02*/
-#define CAIF_FLOW_OFF_SENT	4 /* Bit 4 = 0x10 */
-#define MAX_WRITE_CHUNK	     4096
-#define ON 1
-#define OFF 0
-#define CAIF_MAX_MTU 4096
-
-static DEFINE_SPINLOCK(ser_lock);
-static LIST_HEAD(ser_list);
-static LIST_HEAD(ser_release_list);
-
-static bool ser_loop;
-module_param(ser_loop, bool, 0444);
-MODULE_PARM_DESC(ser_loop, "Run in simulated loopback mode.");
-
-static bool ser_use_stx = true;
-module_param(ser_use_stx, bool, 0444);
-MODULE_PARM_DESC(ser_use_stx, "STX enabled or not.");
-
-static bool ser_use_fcs = true;
-
-module_param(ser_use_fcs, bool, 0444);
-MODULE_PARM_DESC(ser_use_fcs, "FCS enabled or not.");
-
-static int ser_write_chunk = MAX_WRITE_CHUNK;
-module_param(ser_write_chunk, int, 0444);
-
-MODULE_PARM_DESC(ser_write_chunk, "Maximum size of data written to UART.");
-
-static struct dentry *debugfsdir;
-
-static int caif_net_open(struct net_device *dev);
-static int caif_net_close(struct net_device *dev);
-
-struct ser_device {
-	struct caif_dev_common common;
-	struct list_head node;
-	struct net_device *dev;
-	struct sk_buff_head head;
-	struct tty_struct *tty;
-	bool tx_started;
-	unsigned long state;
-#ifdef CONFIG_DEBUG_FS
-	struct dentry *debugfs_tty_dir;
-	struct debugfs_blob_wrapper tx_blob;
-	struct debugfs_blob_wrapper rx_blob;
-	u8 rx_data[128];
-	u8 tx_data[128];
-	u8 tty_status;
-
-#endif
-};
-
-static void caifdev_setup(struct net_device *dev);
-static void ldisc_tx_wakeup(struct tty_struct *tty);
-#ifdef CONFIG_DEBUG_FS
-static inline void update_tty_status(struct ser_device *ser)
-{
-	ser->tty_status =
-		ser->tty->flow.stopped << 5 |
-		ser->tty->flow.tco_stopped << 3 |
-		ser->tty->ctrl.packet << 2;
-}
-static inline void debugfs_init(struct ser_device *ser, struct tty_struct *tty)
-{
-	ser->debugfs_tty_dir = debugfs_create_dir(tty->name, debugfsdir);
-
-	debugfs_create_blob("last_tx_msg", 0400, ser->debugfs_tty_dir,
-			    &ser->tx_blob);
-
-	debugfs_create_blob("last_rx_msg", 0400, ser->debugfs_tty_dir,
-			    &ser->rx_blob);
-
-	debugfs_create_xul("ser_state", 0400, ser->debugfs_tty_dir,
-			   &ser->state);
-
-	debugfs_create_x8("tty_status", 0400, ser->debugfs_tty_dir,
-			  &ser->tty_status);
-
-	ser->tx_blob.data = ser->tx_data;
-	ser->tx_blob.size = 0;
-	ser->rx_blob.data = ser->rx_data;
-	ser->rx_blob.size = 0;
-}
-
-static inline void debugfs_deinit(struct ser_device *ser)
-{
-	debugfs_remove_recursive(ser->debugfs_tty_dir);
-}
-
-static inline void debugfs_rx(struct ser_device *ser, const u8 *data, int size)
-{
-	if (size > sizeof(ser->rx_data))
-		size = sizeof(ser->rx_data);
-	memcpy(ser->rx_data, data, size);
-	ser->rx_blob.data = ser->rx_data;
-	ser->rx_blob.size = size;
-}
-#else
-static inline void debugfs_init(struct ser_device *ser, struct tty_struct *tty)
-{
-}
-
-static inline void debugfs_deinit(struct ser_device *ser)
-{
-}
-
-static inline void update_tty_status(struct ser_device *ser)
-{
-}
-
-static inline void debugfs_rx(struct ser_device *ser, const u8 *data, int size)
-{
-}
-#endif
-
-static void ldisc_receive(struct tty_struct *tty, const u8 *data,
-			  const u8 *flags, size_t count)
-{
-	struct sk_buff *skb = NULL;
-	struct ser_device *ser;
-	int ret;
-
-	ser = tty->disc_data;
-
-	/*
-	 * NOTE: flags may contain information about break or overrun.
-	 * This is not yet handled.
-	 */
-
-
-	/*
-	 * Workaround for garbage at start of transmission,
-	 * only enable if STX handling is not enabled.
-	 */
-	if (!ser->common.use_stx && !ser->tx_started) {
-		dev_info(&ser->dev->dev,
-			"Bytes received before initial transmission -"
-			"bytes discarded.\n");
-		return;
-	}
-
-	BUG_ON(ser->dev == NULL);
-
-	/* Get a suitable caif packet and copy in data. */
-	skb = netdev_alloc_skb(ser->dev, count+1);
-	if (skb == NULL)
-		return;
-	skb_put_data(skb, data, count);
-
-	skb->protocol = htons(ETH_P_CAIF);
-	skb_reset_mac_header(skb);
-	debugfs_rx(ser, data, count);
-	/* Push received packet up the stack. */
-	ret = netif_rx(skb);
-	if (!ret) {
-		ser->dev->stats.rx_packets++;
-		ser->dev->stats.rx_bytes += count;
-	} else
-		++ser->dev->stats.rx_dropped;
-	update_tty_status(ser);
-}
-
-static int handle_tx(struct ser_device *ser)
-{
-	struct tty_struct *tty;
-	struct sk_buff *skb;
-	int tty_wr, len, room;
-
-	tty = ser->tty;
-	ser->tx_started = true;
-
-	/* Enter critical section */
-	if (test_and_set_bit(CAIF_SENDING, &ser->state))
-		return 0;
-
-	/* skb_peek is safe because handle_tx is called after skb_queue_tail */
-	while ((skb = skb_peek(&ser->head)) != NULL) {
-
-		/* Make sure you don't write too much */
-		len = skb->len;
-		room = tty_write_room(tty);
-		if (!room)
-			break;
-		if (room > ser_write_chunk)
-			room = ser_write_chunk;
-		if (len > room)
-			len = room;
-
-		/* Write to tty or loopback */
-		if (!ser_loop) {
-			tty_wr = tty->ops->write(tty, skb->data, len);
-			update_tty_status(ser);
-		} else {
-			tty_wr = len;
-			ldisc_receive(tty, skb->data, NULL, len);
-		}
-		ser->dev->stats.tx_packets++;
-		ser->dev->stats.tx_bytes += tty_wr;
-
-		/* Error on TTY ?! */
-		if (tty_wr < 0)
-			goto error;
-		/* Reduce buffer written, and discard if empty */
-		skb_pull(skb, tty_wr);
-		if (skb->len == 0) {
-			struct sk_buff *tmp = skb_dequeue(&ser->head);
-			WARN_ON(tmp != skb);
-			dev_consume_skb_any(skb);
-		}
-	}
-	/* Send flow off if queue is empty */
-	if (ser->head.qlen <= SEND_QUEUE_LOW &&
-		test_and_clear_bit(CAIF_FLOW_OFF_SENT, &ser->state) &&
-		ser->common.flowctrl != NULL)
-				ser->common.flowctrl(ser->dev, ON);
-	clear_bit(CAIF_SENDING, &ser->state);
-	return 0;
-error:
-	clear_bit(CAIF_SENDING, &ser->state);
-	return tty_wr;
-}
-
-static netdev_tx_t caif_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct ser_device *ser;
-
-	ser = netdev_priv(dev);
-
-	/* Send flow off once, on high water mark */
-	if (ser->head.qlen > SEND_QUEUE_HIGH &&
-		!test_and_set_bit(CAIF_FLOW_OFF_SENT, &ser->state) &&
-		ser->common.flowctrl != NULL)
-
-		ser->common.flowctrl(ser->dev, OFF);
-
-	skb_queue_tail(&ser->head, skb);
-	return handle_tx(ser);
-}
-
-
-static void ldisc_tx_wakeup(struct tty_struct *tty)
-{
-	struct ser_device *ser;
-
-	ser = tty->disc_data;
-	BUG_ON(ser == NULL);
-	WARN_ON(ser->tty != tty);
-	handle_tx(ser);
-}
-
-
-static void ser_release(struct work_struct *work)
-{
-	struct list_head list;
-	struct ser_device *ser, *tmp;
-	struct tty_struct *tty;
-
-	spin_lock(&ser_lock);
-	list_replace_init(&ser_release_list, &list);
-	spin_unlock(&ser_lock);
-
-	if (!list_empty(&list)) {
-		rtnl_lock();
-		list_for_each_entry_safe(ser, tmp, &list, node) {
-			tty = ser->tty;
-			dev_close(ser->dev);
-			unregister_netdevice(ser->dev);
-			debugfs_deinit(ser);
-			tty_kref_put(tty->link);
-			tty_kref_put(tty);
-		}
-		rtnl_unlock();
-	}
-}
-
-static DECLARE_WORK(ser_release_work, ser_release);
-
-static int ldisc_open(struct tty_struct *tty)
-{
-	struct ser_device *ser;
-	struct net_device *dev;
-	char name[64];
-	int result;
-
-	/* No write no play */
-	if (tty->ops->write == NULL)
-		return -EOPNOTSUPP;
-	if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_TTY_CONFIG))
-		return -EPERM;
-
-	/* release devices to avoid name collision */
-	ser_release(NULL);
-
-	result = snprintf(name, sizeof(name), "cf%s", tty->name);
-	if (result >= IFNAMSIZ)
-		return -EINVAL;
-	dev = alloc_netdev(sizeof(*ser), name, NET_NAME_UNKNOWN,
-			   caifdev_setup);
-	if (!dev)
-		return -ENOMEM;
-
-	ser = netdev_priv(dev);
-	ser->tty = tty_kref_get(tty);
-	tty_kref_get(tty->link);
-	ser->dev = dev;
-	debugfs_init(ser, tty);
-	tty->receive_room = 4096;
-	tty->disc_data = ser;
-	set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
-	rtnl_lock();
-	result = register_netdevice(dev);
-	if (result) {
-		tty_kref_put(tty->link);
-		tty_kref_put(tty);
-		rtnl_unlock();
-		free_netdev(dev);
-		return -ENODEV;
-	}
-
-	spin_lock(&ser_lock);
-	list_add(&ser->node, &ser_list);
-	spin_unlock(&ser_lock);
-	rtnl_unlock();
-	netif_stop_queue(dev);
-	update_tty_status(ser);
-	return 0;
-}
-
-static void ldisc_close(struct tty_struct *tty)
-{
-	struct ser_device *ser = tty->disc_data;
-
-	spin_lock(&ser_lock);
-	list_move(&ser->node, &ser_release_list);
-	spin_unlock(&ser_lock);
-	schedule_work(&ser_release_work);
-}
-
-/* The line discipline structure. */
-static struct tty_ldisc_ops caif_ldisc = {
-	.owner =	THIS_MODULE,
-	.num =		N_CAIF,
-	.name =		"n_caif",
-	.open =		ldisc_open,
-	.close =	ldisc_close,
-	.receive_buf =	ldisc_receive,
-	.write_wakeup =	ldisc_tx_wakeup
-};
-
-static const struct net_device_ops netdev_ops = {
-	.ndo_open = caif_net_open,
-	.ndo_stop = caif_net_close,
-	.ndo_start_xmit = caif_xmit
-};
-
-static void caifdev_setup(struct net_device *dev)
-{
-	struct ser_device *serdev = netdev_priv(dev);
-
-	dev->features = 0;
-	dev->netdev_ops = &netdev_ops;
-	dev->type = ARPHRD_CAIF;
-	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
-	dev->mtu = CAIF_MAX_MTU;
-	dev->priv_flags |= IFF_NO_QUEUE;
-	dev->needs_free_netdev = true;
-	skb_queue_head_init(&serdev->head);
-	serdev->common.link_select = CAIF_LINK_LOW_LATENCY;
-	serdev->common.use_frag = true;
-	serdev->common.use_stx = ser_use_stx;
-	serdev->common.use_fcs = ser_use_fcs;
-	serdev->dev = dev;
-}
-
-
-static int caif_net_open(struct net_device *dev)
-{
-	netif_wake_queue(dev);
-	return 0;
-}
-
-static int caif_net_close(struct net_device *dev)
-{
-	netif_stop_queue(dev);
-	return 0;
-}
-
-static int __init caif_ser_init(void)
-{
-	int ret;
-
-	ret = tty_register_ldisc(&caif_ldisc);
-	if (ret < 0)
-		pr_err("cannot register CAIF ldisc=%d err=%d\n", N_CAIF, ret);
-
-	debugfsdir = debugfs_create_dir("caif_serial", NULL);
-	return ret;
-}
-
-static void __exit caif_ser_exit(void)
-{
-	spin_lock(&ser_lock);
-	list_splice(&ser_list, &ser_release_list);
-	spin_unlock(&ser_lock);
-	ser_release(NULL);
-	cancel_work_sync(&ser_release_work);
-	tty_unregister_ldisc(&caif_ldisc);
-	debugfs_remove_recursive(debugfsdir);
-}
-
-module_init(caif_ser_init);
-module_exit(caif_ser_exit);
diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
deleted file mode 100644
index 8ac1a4b8e055..000000000000
--- a/drivers/net/caif/caif_virtio.c
+++ /dev/null
@@ -1,791 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2013
- * Authors: Vicram Arv
- *	    Dmitry Tarnyagin <dmitry.tarnyagin@lockless.no>
- *	    Sjur Brendeland
- */
-#include <linux/module.h>
-#include <linux/if_arp.h>
-#include <linux/virtio.h>
-#include <linux/vringh.h>
-#include <linux/debugfs.h>
-#include <linux/spinlock.h>
-#include <linux/genalloc.h>
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
-#include <linux/rtnetlink.h>
-#include <linux/virtio_ids.h>
-#include <linux/virtio_caif.h>
-#include <linux/virtio_ring.h>
-#include <linux/dma-mapping.h>
-#include <net/caif/caif_dev.h>
-#include <linux/virtio_config.h>
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Vicram Arv");
-MODULE_AUTHOR("Sjur Brendeland");
-MODULE_DESCRIPTION("Virtio CAIF Driver");
-
-/* NAPI schedule quota */
-#define CFV_DEFAULT_QUOTA 32
-
-/* Defaults used if virtio config space is unavailable */
-#define CFV_DEF_MTU_SIZE 4096
-#define CFV_DEF_HEADROOM 32
-#define CFV_DEF_TAILROOM 32
-
-/* Required IP header alignment */
-#define IP_HDR_ALIGN 4
-
-/* struct cfv_napi_contxt - NAPI context info
- * @riov: IOV holding data read from the ring. Note that riov may
- *	  still hold data when cfv_rx_poll() returns.
- * @head: Last descriptor ID we received from vringh_getdesc_kern.
- *	  We use this to put descriptor back on the used ring. USHRT_MAX is
- *	  used to indicate invalid head-id.
- */
-struct cfv_napi_context {
-	struct vringh_kiov riov;
-	unsigned short head;
-};
-
-/* struct cfv_stats - statistics for debugfs
- * @rx_napi_complete:	Number of NAPI completions (RX)
- * @rx_napi_resched:	Number of calls where the full quota was used (RX)
- * @rx_nomem:		Number of SKB alloc failures (RX)
- * @rx_kicks:		Number of RX kicks
- * @tx_full_ring:	Number times TX ring was full
- * @tx_no_mem:		Number of times TX went out of memory
- * @tx_flow_on:		Number of flow on (TX)
- * @tx_kicks:		Number of TX kicks
- */
-struct cfv_stats {
-	u32 rx_napi_complete;
-	u32 rx_napi_resched;
-	u32 rx_nomem;
-	u32 rx_kicks;
-	u32 tx_full_ring;
-	u32 tx_no_mem;
-	u32 tx_flow_on;
-	u32 tx_kicks;
-};
-
-/* struct cfv_info - Caif Virtio control structure
- * @cfdev:	caif common header
- * @vdev:	Associated virtio device
- * @vr_rx:	rx/downlink host vring
- * @vq_tx:	tx/uplink virtqueue
- * @ndev:	CAIF link layer device
- * @watermark_tx: indicates number of free descriptors we need
- *		to reopen the tx-queues after overload.
- * @tx_lock:	protects vq_tx from concurrent use
- * @tx_release_tasklet: Tasklet for freeing consumed TX buffers
- * @napi:       Napi context used in cfv_rx_poll()
- * @ctx:        Context data used in cfv_rx_poll()
- * @tx_hr:	transmit headroom
- * @rx_hr:	receive headroom
- * @tx_tr:	transmit tail room
- * @rx_tr:	receive tail room
- * @mtu:	transmit max size
- * @mru:	receive max size
- * @allocsz:    size of dma memory reserved for TX buffers
- * @alloc_addr: virtual address to dma memory for TX buffers
- * @alloc_dma:  dma address to dma memory for TX buffers
- * @genpool:    Gen Pool used for allocating TX buffers
- * @reserved_mem: Pointer to memory reserve allocated from genpool
- * @reserved_size: Size of memory reserve allocated from genpool
- * @stats:       Statistics exposed in sysfs
- * @debugfs:    Debugfs dentry for statistic counters
- */
-struct cfv_info {
-	struct caif_dev_common cfdev;
-	struct virtio_device *vdev;
-	struct vringh *vr_rx;
-	struct virtqueue *vq_tx;
-	struct net_device *ndev;
-	unsigned int watermark_tx;
-	/* Protect access to vq_tx */
-	spinlock_t tx_lock;
-	struct tasklet_struct tx_release_tasklet;
-	struct napi_struct napi;
-	struct cfv_napi_context ctx;
-	u16 tx_hr;
-	u16 rx_hr;
-	u16 tx_tr;
-	u16 rx_tr;
-	u32 mtu;
-	u32 mru;
-	size_t allocsz;
-	void *alloc_addr;
-	dma_addr_t alloc_dma;
-	struct gen_pool *genpool;
-	unsigned long reserved_mem;
-	size_t reserved_size;
-	struct cfv_stats stats;
-	struct dentry *debugfs;
-};
-
-/* struct buf_info - maintains transmit buffer data handle
- * @size:	size of transmit buffer
- * @dma_handle: handle to allocated dma device memory area
- * @vaddr:	virtual address mapping to allocated memory area
- */
-struct buf_info {
-	size_t size;
-	u8 *vaddr;
-};
-
-/* Called from virtio device, in IRQ context */
-static void cfv_release_cb(struct virtqueue *vq_tx)
-{
-	struct cfv_info *cfv = vq_tx->vdev->priv;
-
-	++cfv->stats.tx_kicks;
-	tasklet_schedule(&cfv->tx_release_tasklet);
-}
-
-static void free_buf_info(struct cfv_info *cfv, struct buf_info *buf_info)
-{
-	if (!buf_info)
-		return;
-	gen_pool_free(cfv->genpool, (unsigned long) buf_info->vaddr,
-		      buf_info->size);
-	kfree(buf_info);
-}
-
-/* This is invoked whenever the remote processor completed processing
- * a TX msg we just sent, and the buffer is put back to the used ring.
- */
-static void cfv_release_used_buf(struct virtqueue *vq_tx)
-{
-	struct cfv_info *cfv = vq_tx->vdev->priv;
-	unsigned long flags;
-
-	BUG_ON(vq_tx != cfv->vq_tx);
-
-	for (;;) {
-		unsigned int len;
-		struct buf_info *buf_info;
-
-		/* Get used buffer from used ring to recycle used descriptors */
-		spin_lock_irqsave(&cfv->tx_lock, flags);
-		buf_info = virtqueue_get_buf(vq_tx, &len);
-		spin_unlock_irqrestore(&cfv->tx_lock, flags);
-
-		/* Stop looping if there are no more buffers to free */
-		if (!buf_info)
-			break;
-
-		free_buf_info(cfv, buf_info);
-
-		/* watermark_tx indicates if we previously stopped the tx
-		 * queues. If we have enough free stots in the virtio ring,
-		 * re-establish memory reserved and open up tx queues.
-		 */
-		if (cfv->vq_tx->num_free <= cfv->watermark_tx)
-			continue;
-
-		/* Re-establish memory reserve */
-		if (cfv->reserved_mem == 0 && cfv->genpool)
-			cfv->reserved_mem =
-				gen_pool_alloc(cfv->genpool,
-					       cfv->reserved_size);
-
-		/* Open up the tx queues */
-		if (cfv->reserved_mem) {
-			cfv->watermark_tx =
-				virtqueue_get_vring_size(cfv->vq_tx);
-			netif_tx_wake_all_queues(cfv->ndev);
-			/* Buffers are recycled in cfv_netdev_tx, so
-			 * disable notifications when queues are opened.
-			 */
-			virtqueue_disable_cb(cfv->vq_tx);
-			++cfv->stats.tx_flow_on;
-		} else {
-			/* if no memory reserve, wait for more free slots */
-			WARN_ON(cfv->watermark_tx >
-			       virtqueue_get_vring_size(cfv->vq_tx));
-			cfv->watermark_tx +=
-				virtqueue_get_vring_size(cfv->vq_tx) / 4;
-		}
-	}
-}
-
-/* Allocate a SKB and copy packet data to it */
-static struct sk_buff *cfv_alloc_and_copy_skb(int *err,
-					      struct cfv_info *cfv,
-					      u8 *frm, u32 frm_len)
-{
-	struct sk_buff *skb;
-	u32 cfpkt_len, pad_len;
-
-	*err = 0;
-	/* Verify that packet size with down-link header and mtu size */
-	if (frm_len > cfv->mru || frm_len <= cfv->rx_hr + cfv->rx_tr) {
-		netdev_err(cfv->ndev,
-			   "Invalid frmlen:%u  mtu:%u hr:%d tr:%d\n",
-			   frm_len, cfv->mru,  cfv->rx_hr,
-			   cfv->rx_tr);
-		*err = -EPROTO;
-		return NULL;
-	}
-
-	cfpkt_len = frm_len - (cfv->rx_hr + cfv->rx_tr);
-	pad_len = (unsigned long)(frm + cfv->rx_hr) & (IP_HDR_ALIGN - 1);
-
-	skb = netdev_alloc_skb(cfv->ndev, frm_len + pad_len);
-	if (!skb) {
-		*err = -ENOMEM;
-		return NULL;
-	}
-
-	skb_reserve(skb, cfv->rx_hr + pad_len);
-
-	skb_put_data(skb, frm + cfv->rx_hr, cfpkt_len);
-	return skb;
-}
-
-/* Get packets from the host vring */
-static int cfv_rx_poll(struct napi_struct *napi, int quota)
-{
-	struct cfv_info *cfv = container_of(napi, struct cfv_info, napi);
-	int rxcnt = 0;
-	int err = 0;
-	void *buf;
-	struct sk_buff *skb;
-	struct vringh_kiov *riov = &cfv->ctx.riov;
-	unsigned int skb_len;
-
-	do {
-		skb = NULL;
-
-		/* Put the previous iovec back on the used ring and
-		 * fetch a new iovec if we have processed all elements.
-		 */
-		if (riov->i == riov->used) {
-			if (cfv->ctx.head != USHRT_MAX) {
-				vringh_complete_kern(cfv->vr_rx,
-						     cfv->ctx.head,
-						     0);
-				cfv->ctx.head = USHRT_MAX;
-			}
-
-			err = vringh_getdesc_kern(
-				cfv->vr_rx,
-				riov,
-				NULL,
-				&cfv->ctx.head,
-				GFP_ATOMIC);
-
-			if (err <= 0)
-				goto exit;
-		}
-
-		buf = phys_to_virt((unsigned long) riov->iov[riov->i].iov_base);
-		/* TODO: Add check on valid buffer address */
-
-		skb = cfv_alloc_and_copy_skb(&err, cfv, buf,
-					     riov->iov[riov->i].iov_len);
-		if (unlikely(err))
-			goto exit;
-
-		/* Push received packet up the stack. */
-		skb_len = skb->len;
-		skb->protocol = htons(ETH_P_CAIF);
-		skb_reset_mac_header(skb);
-		skb->dev = cfv->ndev;
-		err = netif_receive_skb(skb);
-		if (unlikely(err)) {
-			++cfv->ndev->stats.rx_dropped;
-		} else {
-			++cfv->ndev->stats.rx_packets;
-			cfv->ndev->stats.rx_bytes += skb_len;
-		}
-
-		++riov->i;
-		++rxcnt;
-	} while (rxcnt < quota);
-
-	++cfv->stats.rx_napi_resched;
-	goto out;
-
-exit:
-	switch (err) {
-	case 0:
-		++cfv->stats.rx_napi_complete;
-
-		/* Really out of packets? (stolen from virtio_net)*/
-		napi_complete(napi);
-		if (unlikely(!vringh_notify_enable_kern(cfv->vr_rx)) &&
-		    napi_schedule_prep(napi)) {
-			vringh_notify_disable_kern(cfv->vr_rx);
-			__napi_schedule(napi);
-		}
-		break;
-
-	case -ENOMEM:
-		++cfv->stats.rx_nomem;
-		dev_kfree_skb(skb);
-		/* Stop NAPI poll on OOM, we hope to be polled later */
-		napi_complete(napi);
-		vringh_notify_enable_kern(cfv->vr_rx);
-		break;
-
-	default:
-		/* We're doomed, any modem fault is fatal */
-		netdev_warn(cfv->ndev, "Bad ring, disable device\n");
-		cfv->ndev->stats.rx_dropped = riov->used - riov->i;
-		napi_complete(napi);
-		vringh_notify_disable_kern(cfv->vr_rx);
-		netif_carrier_off(cfv->ndev);
-		break;
-	}
-out:
-	if (rxcnt && vringh_need_notify_kern(cfv->vr_rx) > 0)
-		vringh_notify(cfv->vr_rx);
-	return rxcnt;
-}
-
-static void cfv_recv(struct virtio_device *vdev, struct vringh *vr_rx)
-{
-	struct cfv_info *cfv = vdev->priv;
-
-	++cfv->stats.rx_kicks;
-	vringh_notify_disable_kern(cfv->vr_rx);
-	napi_schedule(&cfv->napi);
-}
-
-static void cfv_destroy_genpool(struct cfv_info *cfv)
-{
-	if (cfv->alloc_addr)
-		dma_free_coherent(cfv->vdev->dev.parent->parent,
-				  cfv->allocsz, cfv->alloc_addr,
-				  cfv->alloc_dma);
-
-	if (!cfv->genpool)
-		return;
-	gen_pool_free(cfv->genpool,  cfv->reserved_mem,
-		      cfv->reserved_size);
-	gen_pool_destroy(cfv->genpool);
-	cfv->genpool = NULL;
-}
-
-static int cfv_create_genpool(struct cfv_info *cfv)
-{
-	int err;
-
-	/* dma_alloc can only allocate whole pages, and we need a more
-	 * fine graned allocation so we use genpool. We ask for space needed
-	 * by IP and a full ring. If the dma allcoation fails we retry with a
-	 * smaller allocation size.
-	 */
-	err = -ENOMEM;
-	cfv->allocsz = (virtqueue_get_vring_size(cfv->vq_tx) *
-			(ETH_DATA_LEN + cfv->tx_hr + cfv->tx_tr) * 11)/10;
-	if (cfv->allocsz <= (num_possible_cpus() + 1) * cfv->ndev->mtu)
-		return -EINVAL;
-
-	for (;;) {
-		if (cfv->allocsz <= num_possible_cpus() * cfv->ndev->mtu) {
-			netdev_info(cfv->ndev, "Not enough device memory\n");
-			return -ENOMEM;
-		}
-
-		cfv->alloc_addr = dma_alloc_coherent(
-						cfv->vdev->dev.parent->parent,
-						cfv->allocsz, &cfv->alloc_dma,
-						GFP_ATOMIC);
-		if (cfv->alloc_addr)
-			break;
-
-		cfv->allocsz = (cfv->allocsz * 3) >> 2;
-	}
-
-	netdev_dbg(cfv->ndev, "Allocated %zd bytes from dma-memory\n",
-		   cfv->allocsz);
-
-	/* Allocate on 128 bytes boundaries (1 << 7)*/
-	cfv->genpool = gen_pool_create(7, -1);
-	if (!cfv->genpool)
-		goto err;
-
-	err = gen_pool_add_virt(cfv->genpool, (unsigned long)cfv->alloc_addr,
-				(phys_addr_t)virt_to_phys(cfv->alloc_addr),
-				cfv->allocsz, -1);
-	if (err)
-		goto err;
-
-	/* Reserve some memory for low memory situations. If we hit the roof
-	 * in the memory pool, we stop TX flow and release the reserve.
-	 */
-	cfv->reserved_size = num_possible_cpus() * cfv->ndev->mtu;
-	cfv->reserved_mem = gen_pool_alloc(cfv->genpool,
-					   cfv->reserved_size);
-	if (!cfv->reserved_mem) {
-		err = -ENOMEM;
-		goto err;
-	}
-
-	cfv->watermark_tx = virtqueue_get_vring_size(cfv->vq_tx);
-	return 0;
-err:
-	cfv_destroy_genpool(cfv);
-	return err;
-}
-
-/* Enable the CAIF interface and allocate the memory-pool */
-static int cfv_netdev_open(struct net_device *netdev)
-{
-	struct cfv_info *cfv = netdev_priv(netdev);
-
-	if (cfv_create_genpool(cfv))
-		return -ENOMEM;
-
-	netif_carrier_on(netdev);
-	napi_enable(&cfv->napi);
-
-	/* Schedule NAPI to read any pending packets */
-	napi_schedule(&cfv->napi);
-	return 0;
-}
-
-/* Disable the CAIF interface and free the memory-pool */
-static int cfv_netdev_close(struct net_device *netdev)
-{
-	struct cfv_info *cfv = netdev_priv(netdev);
-	unsigned long flags;
-	struct buf_info *buf_info;
-
-	/* Disable interrupts, queues and NAPI polling */
-	netif_carrier_off(netdev);
-	virtqueue_disable_cb(cfv->vq_tx);
-	vringh_notify_disable_kern(cfv->vr_rx);
-	napi_disable(&cfv->napi);
-
-	/* Release any TX buffers on both used and available rings */
-	cfv_release_used_buf(cfv->vq_tx);
-	spin_lock_irqsave(&cfv->tx_lock, flags);
-	while ((buf_info = virtqueue_detach_unused_buf(cfv->vq_tx)))
-		free_buf_info(cfv, buf_info);
-	spin_unlock_irqrestore(&cfv->tx_lock, flags);
-
-	/* Release all dma allocated memory and destroy the pool */
-	cfv_destroy_genpool(cfv);
-	return 0;
-}
-
-/* Allocate a buffer in dma-memory and copy skb to it */
-static struct buf_info *cfv_alloc_and_copy_to_shm(struct cfv_info *cfv,
-						       struct sk_buff *skb,
-						       struct scatterlist *sg)
-{
-	struct caif_payload_info *info = (void *)&skb->cb;
-	struct buf_info *buf_info = NULL;
-	u8 pad_len, hdr_ofs;
-
-	if (!cfv->genpool)
-		goto err;
-
-	if (unlikely(cfv->tx_hr + skb->len + cfv->tx_tr > cfv->mtu)) {
-		netdev_warn(cfv->ndev, "Invalid packet len (%d > %d)\n",
-			    cfv->tx_hr + skb->len + cfv->tx_tr, cfv->mtu);
-		goto err;
-	}
-
-	buf_info = kmalloc_obj(struct buf_info, GFP_ATOMIC);
-	if (unlikely(!buf_info))
-		goto err;
-
-	/* Make the IP header aligned in the buffer */
-	hdr_ofs = cfv->tx_hr + info->hdr_len;
-	pad_len = hdr_ofs & (IP_HDR_ALIGN - 1);
-	buf_info->size = cfv->tx_hr + skb->len + cfv->tx_tr + pad_len;
-
-	/* allocate dma memory buffer */
-	buf_info->vaddr = (void *)gen_pool_alloc(cfv->genpool, buf_info->size);
-	if (unlikely(!buf_info->vaddr))
-		goto err;
-
-	/* copy skbuf contents to send buffer */
-	skb_copy_bits(skb, 0, buf_info->vaddr + cfv->tx_hr + pad_len, skb->len);
-	sg_init_one(sg, buf_info->vaddr + pad_len,
-		    skb->len + cfv->tx_hr + cfv->rx_hr);
-
-	return buf_info;
-err:
-	kfree(buf_info);
-	return NULL;
-}
-
-/* Put the CAIF packet on the virtio ring and kick the receiver */
-static netdev_tx_t cfv_netdev_tx(struct sk_buff *skb, struct net_device *netdev)
-{
-	struct cfv_info *cfv = netdev_priv(netdev);
-	struct buf_info *buf_info;
-	struct scatterlist sg;
-	unsigned long flags;
-	bool flow_off = false;
-	int ret;
-
-	/* garbage collect released buffers */
-	cfv_release_used_buf(cfv->vq_tx);
-	spin_lock_irqsave(&cfv->tx_lock, flags);
-
-	/* Flow-off check takes into account number of cpus to make sure
-	 * virtqueue will not be overfilled in any possible smp conditions.
-	 *
-	 * Flow-on is triggered when sufficient buffers are freed
-	 */
-	if (unlikely(cfv->vq_tx->num_free <= num_present_cpus())) {
-		flow_off = true;
-		cfv->stats.tx_full_ring++;
-	}
-
-	/* If we run out of memory, we release the memory reserve and retry
-	 * allocation.
-	 */
-	buf_info = cfv_alloc_and_copy_to_shm(cfv, skb, &sg);
-	if (unlikely(!buf_info)) {
-		cfv->stats.tx_no_mem++;
-		flow_off = true;
-
-		if (cfv->reserved_mem && cfv->genpool) {
-			gen_pool_free(cfv->genpool,  cfv->reserved_mem,
-				      cfv->reserved_size);
-			cfv->reserved_mem = 0;
-			buf_info = cfv_alloc_and_copy_to_shm(cfv, skb, &sg);
-		}
-	}
-
-	if (unlikely(flow_off)) {
-		/* Turn flow on when a 1/4 of the descriptors are released */
-		cfv->watermark_tx = virtqueue_get_vring_size(cfv->vq_tx) / 4;
-		/* Enable notifications of recycled TX buffers */
-		virtqueue_enable_cb(cfv->vq_tx);
-		netif_tx_stop_all_queues(netdev);
-	}
-
-	if (unlikely(!buf_info)) {
-		/* If the memory reserve does it's job, this shouldn't happen */
-		netdev_warn(cfv->ndev, "Out of gen_pool memory\n");
-		goto err;
-	}
-
-	ret = virtqueue_add_outbuf(cfv->vq_tx, &sg, 1, buf_info, GFP_ATOMIC);
-	if (unlikely((ret < 0))) {
-		/* If flow control works, this shouldn't happen */
-		netdev_warn(cfv->ndev, "Failed adding buffer to TX vring:%d\n",
-			    ret);
-		goto err;
-	}
-
-	/* update netdev statistics */
-	cfv->ndev->stats.tx_packets++;
-	cfv->ndev->stats.tx_bytes += skb->len;
-	spin_unlock_irqrestore(&cfv->tx_lock, flags);
-
-	/* tell the remote processor it has a pending message to read */
-	virtqueue_kick(cfv->vq_tx);
-
-	dev_kfree_skb(skb);
-	return NETDEV_TX_OK;
-err:
-	spin_unlock_irqrestore(&cfv->tx_lock, flags);
-	cfv->ndev->stats.tx_dropped++;
-	free_buf_info(cfv, buf_info);
-	dev_kfree_skb(skb);
-	return NETDEV_TX_OK;
-}
-
-static void cfv_tx_release_tasklet(struct tasklet_struct *t)
-{
-	struct cfv_info *cfv = from_tasklet(cfv, t, tx_release_tasklet);
-	cfv_release_used_buf(cfv->vq_tx);
-}
-
-static const struct net_device_ops cfv_netdev_ops = {
-	.ndo_open = cfv_netdev_open,
-	.ndo_stop = cfv_netdev_close,
-	.ndo_start_xmit = cfv_netdev_tx,
-};
-
-static void cfv_netdev_setup(struct net_device *netdev)
-{
-	netdev->netdev_ops = &cfv_netdev_ops;
-	netdev->type = ARPHRD_CAIF;
-	netdev->tx_queue_len = 100;
-	netdev->flags = IFF_POINTOPOINT | IFF_NOARP;
-	netdev->mtu = CFV_DEF_MTU_SIZE;
-	netdev->needs_free_netdev = true;
-}
-
-/* Create debugfs counters for the device */
-static inline void debugfs_init(struct cfv_info *cfv)
-{
-	cfv->debugfs = debugfs_create_dir(netdev_name(cfv->ndev), NULL);
-
-	debugfs_create_u32("rx-napi-complete", 0400, cfv->debugfs,
-			   &cfv->stats.rx_napi_complete);
-	debugfs_create_u32("rx-napi-resched", 0400, cfv->debugfs,
-			   &cfv->stats.rx_napi_resched);
-	debugfs_create_u32("rx-nomem", 0400, cfv->debugfs,
-			   &cfv->stats.rx_nomem);
-	debugfs_create_u32("rx-kicks", 0400, cfv->debugfs,
-			   &cfv->stats.rx_kicks);
-	debugfs_create_u32("tx-full-ring", 0400, cfv->debugfs,
-			   &cfv->stats.tx_full_ring);
-	debugfs_create_u32("tx-no-mem", 0400, cfv->debugfs,
-			   &cfv->stats.tx_no_mem);
-	debugfs_create_u32("tx-kicks", 0400, cfv->debugfs,
-			   &cfv->stats.tx_kicks);
-	debugfs_create_u32("tx-flow-on", 0400, cfv->debugfs,
-			   &cfv->stats.tx_flow_on);
-}
-
-/* Setup CAIF for the a virtio device */
-static int cfv_probe(struct virtio_device *vdev)
-{
-	vrh_callback_t *vrh_cbs = cfv_recv;
-	const char *cfv_netdev_name = "cfvrt";
-	struct net_device *netdev;
-	struct cfv_info *cfv;
-	int err;
-
-	netdev = alloc_netdev(sizeof(struct cfv_info), cfv_netdev_name,
-			      NET_NAME_UNKNOWN, cfv_netdev_setup);
-	if (!netdev)
-		return -ENOMEM;
-
-	cfv = netdev_priv(netdev);
-	cfv->vdev = vdev;
-	cfv->ndev = netdev;
-
-	spin_lock_init(&cfv->tx_lock);
-
-	/* Get the RX virtio ring. This is a "host side vring". */
-	err = -ENODEV;
-	if (!vdev->vringh_config || !vdev->vringh_config->find_vrhs)
-		goto err;
-
-	err = vdev->vringh_config->find_vrhs(vdev, 1, &cfv->vr_rx, &vrh_cbs);
-	if (err)
-		goto err;
-
-	/* Get the TX virtio ring. This is a "guest side vring". */
-	cfv->vq_tx = virtio_find_single_vq(vdev, cfv_release_cb, "output");
-	if (IS_ERR(cfv->vq_tx)) {
-		err = PTR_ERR(cfv->vq_tx);
-		goto err;
-	}
-
-	/* Get the CAIF configuration from virtio config space, if available */
-	if (vdev->config->get) {
-		virtio_cread(vdev, struct virtio_caif_transf_config, headroom,
-			     &cfv->tx_hr);
-		virtio_cread(vdev, struct virtio_caif_transf_config, headroom,
-			     &cfv->rx_hr);
-		virtio_cread(vdev, struct virtio_caif_transf_config, tailroom,
-			     &cfv->tx_tr);
-		virtio_cread(vdev, struct virtio_caif_transf_config, tailroom,
-			     &cfv->rx_tr);
-		virtio_cread(vdev, struct virtio_caif_transf_config, mtu,
-			     &cfv->mtu);
-		virtio_cread(vdev, struct virtio_caif_transf_config, mtu,
-			     &cfv->mru);
-	} else {
-		cfv->tx_hr = CFV_DEF_HEADROOM;
-		cfv->rx_hr = CFV_DEF_HEADROOM;
-		cfv->tx_tr = CFV_DEF_TAILROOM;
-		cfv->rx_tr = CFV_DEF_TAILROOM;
-		cfv->mtu = CFV_DEF_MTU_SIZE;
-		cfv->mru = CFV_DEF_MTU_SIZE;
-	}
-
-	netdev->needed_headroom = cfv->tx_hr;
-	netdev->needed_tailroom = cfv->tx_tr;
-
-	/* Disable buffer release interrupts unless we have stopped TX queues */
-	virtqueue_disable_cb(cfv->vq_tx);
-
-	netdev->mtu = cfv->mtu - cfv->tx_tr;
-	vdev->priv = cfv;
-
-	/* Initialize NAPI poll context data */
-	vringh_kiov_init(&cfv->ctx.riov, NULL, 0);
-	cfv->ctx.head = USHRT_MAX;
-	netif_napi_add_weight(netdev, &cfv->napi, cfv_rx_poll,
-			      CFV_DEFAULT_QUOTA);
-
-	tasklet_setup(&cfv->tx_release_tasklet, cfv_tx_release_tasklet);
-
-	/* Carrier is off until netdevice is opened */
-	netif_carrier_off(netdev);
-
-	/* serialize netdev register + virtio_device_ready() with ndo_open() */
-	rtnl_lock();
-
-	/* register Netdev */
-	err = register_netdevice(netdev);
-	if (err) {
-		rtnl_unlock();
-		dev_err(&vdev->dev, "Unable to register netdev (%d)\n", err);
-		goto err;
-	}
-
-	virtio_device_ready(vdev);
-
-	rtnl_unlock();
-
-	debugfs_init(cfv);
-
-	return 0;
-err:
-	netdev_warn(cfv->ndev, "CAIF Virtio probe failed:%d\n", err);
-
-	if (cfv->vr_rx)
-		vdev->vringh_config->del_vrhs(cfv->vdev);
-	if (cfv->vq_tx)
-		vdev->config->del_vqs(cfv->vdev);
-	free_netdev(netdev);
-	return err;
-}
-
-static void cfv_remove(struct virtio_device *vdev)
-{
-	struct cfv_info *cfv = vdev->priv;
-
-	rtnl_lock();
-	dev_close(cfv->ndev);
-	rtnl_unlock();
-
-	tasklet_kill(&cfv->tx_release_tasklet);
-	debugfs_remove_recursive(cfv->debugfs);
-
-	vringh_kiov_cleanup(&cfv->ctx.riov);
-	virtio_reset_device(vdev);
-	vdev->vringh_config->del_vrhs(cfv->vdev);
-	cfv->vr_rx = NULL;
-	vdev->config->del_vqs(cfv->vdev);
-	unregister_netdev(cfv->ndev);
-}
-
-static struct virtio_device_id id_table[] = {
-	{ VIRTIO_ID_CAIF, VIRTIO_DEV_ANY_ID },
-	{ 0 },
-};
-
-static unsigned int features[] = {
-};
-
-static struct virtio_driver caif_virtio_driver = {
-	.feature_table		= features,
-	.feature_table_size	= ARRAY_SIZE(features),
-	.driver.name		= KBUILD_MODNAME,
-	.id_table		= id_table,
-	.probe			= cfv_probe,
-	.remove			= cfv_remove,
-};
-
-module_virtio_driver(caif_virtio_driver);
-MODULE_DEVICE_TABLE(virtio, id_table);
diff --git a/include/linux/virtio_caif.h b/include/linux/virtio_caif.h
deleted file mode 100644
index ea722479510c..000000000000
--- a/include/linux/virtio_caif.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (C) ST-Ericsson AB 2012
- * Author: Sjur Brændeland <sjur.brandeland@stericsson.com>
- *
- * This header is BSD licensed so
- * anyone can use the definitions to implement compatible remote processors
- */
-
-#ifndef VIRTIO_CAIF_H
-#define VIRTIO_CAIF_H
-
-#include <linux/types.h>
-struct virtio_caif_transf_config {
-	__virtio16 headroom;
-	__virtio16 tailroom;
-	__virtio32 mtu;
-	u8 reserved[4];
-};
-
-struct virtio_caif_config {
-	struct virtio_caif_transf_config uplink, downlink;
-	u8 reserved[8];
-};
-#endif
diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h
deleted file mode 100644
index b655d8666f55..000000000000
--- a/include/net/caif/caif_dev.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#ifndef CAIF_DEV_H_
-#define CAIF_DEV_H_
-
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfcnfg.h>
-#include <net/caif/caif_device.h>
-#include <linux/caif/caif_socket.h>
-#include <linux/if.h>
-#include <linux/net.h>
-
-/**
- * struct caif_param - CAIF parameters.
- * @size:	Length of data
- * @data:	Binary Data Blob
- */
-struct caif_param {
-	u16  size;
-	u8   data[256];
-};
-
-/**
- * struct caif_connect_request - Request data for CAIF channel setup.
- * @protocol:		Type of CAIF protocol to use (at, datagram etc)
- * @sockaddr:		Socket address to connect.
- * @priority:		Priority of the connection.
- * @link_selector:	Link selector (high bandwidth or low latency)
- * @ifindex:		kernel index of the interface.
- * @param:		Connect Request parameters (CAIF_SO_REQ_PARAM).
- *
- * This struct is used when connecting a CAIF channel.
- * It contains all CAIF channel configuration options.
- */
-struct caif_connect_request {
-	enum caif_protocol_type protocol;
-	struct sockaddr_caif sockaddr;
-	enum caif_channel_priority priority;
-	enum caif_link_selector link_selector;
-	int ifindex;
-	struct caif_param param;
-};
-
-/**
- * caif_connect_client - Connect a client to CAIF Core Stack.
- * @config:		Channel setup parameters, specifying what address
- *			to connect on the Modem.
- * @client_layer:	User implementation of client layer. This layer
- *			MUST have receive and control callback functions
- *			implemented.
- * @ifindex:		Link layer interface index used for this connection.
- * @headroom:		Head room needed by CAIF protocol.
- * @tailroom:		Tail room needed by CAIF protocol.
- *
- * This function connects a CAIF channel. The Client must implement
- * the struct cflayer. This layer represents the Client layer and holds
- * receive functions and control callback functions. Control callback
- * function will receive information about connect/disconnect responses,
- * flow control etc (see enum caif_control).
- * E.g. CAIF Socket will call this function for each socket it connects
- * and have one client_layer instance for each socket.
- */
-int caif_connect_client(struct net *net,
-			struct caif_connect_request *conn_req,
-			struct cflayer *client_layer, int *ifindex,
-			int *headroom, int *tailroom);
-
-/**
- * caif_disconnect_client - Disconnects a client from the CAIF stack.
- *
- * @client_layer: Client layer to be disconnected.
- */
-int caif_disconnect_client(struct net *net, struct cflayer *client_layer);
-
-
-/**
- * caif_client_register_refcnt - register ref-count functions provided by client.
- *
- * @adapt_layer: Client layer using CAIF Stack.
- * @hold:	Function provided by client layer increasing ref-count
- * @put:	Function provided by client layer decreasing ref-count
- *
- * Client of the CAIF Stack must register functions for reference counting.
- * These functions are called by the CAIF Stack for every upstream packet,
- * and must therefore be implemented efficiently.
- *
- * Client should call caif_free_client when reference count degrease to zero.
- */
-
-void caif_client_register_refcnt(struct cflayer *adapt_layer,
-					void (*hold)(struct cflayer *lyr),
-					void (*put)(struct cflayer *lyr));
-/**
- * caif_free_client - Free memory used to manage the client in the CAIF Stack.
- *
- * @client_layer: Client layer to be removed.
- *
- * This function must be called from client layer in order to free memory.
- * Caller must guarantee that no packets are in flight upstream when calling
- * this function.
- */
-void caif_free_client(struct cflayer *adap_layer);
-
-/**
- * struct caif_enroll_dev - Enroll a net-device as a CAIF Link layer
- * @dev:		Network device to enroll.
- * @caifdev:		Configuration information from CAIF Link Layer
- * @link_support:	Link layer support layer
- * @head_room:		Head room needed by link support layer
- * @layer:		Lowest layer in CAIF stack
- * @rcv_fun:		Receive function for CAIF stack.
- *
- * This function enroll a CAIF link layer into CAIF Stack and
- * expects the interface to be able to handle CAIF payload.
- * The link_support layer is used to add any Link Layer specific
- * framing.
- */
-int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
-			struct cflayer *link_support, int head_room,
-			struct cflayer **layer, int (**rcv_func)(
-				struct sk_buff *, struct net_device *,
-				struct packet_type *, struct net_device *));
-
-#endif /* CAIF_DEV_H_ */
diff --git a/include/net/caif/caif_device.h b/include/net/caif/caif_device.h
deleted file mode 100644
index 91d1fd5b44a4..000000000000
--- a/include/net/caif/caif_device.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#ifndef CAIF_DEVICE_H_
-#define CAIF_DEVICE_H_
-#include <linux/kernel.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/caif/caif_socket.h>
-#include <net/caif/caif_device.h>
-
-/**
- * struct caif_dev_common - data shared between CAIF drivers and stack.
- * @flowctrl:		Flow Control callback function. This function is
- *                      supplied by CAIF Core Stack and is used by CAIF
- *                      Link Layer to send flow-stop to CAIF Core.
- *                      The flow information will be distributed to all
- *                      clients of CAIF.
- *
- * @link_select:	Profile of device, either high-bandwidth or
- *			low-latency. This member is set by CAIF Link
- *			Layer Device in	order to indicate if this device
- *			is a high bandwidth or low latency device.
- *
- * @use_frag:		CAIF Frames may be fragmented.
- *			Is set by CAIF Link Layer in order to indicate if the
- *			interface receives fragmented frames that must be
- *			assembled by CAIF Core Layer.
- *
- * @use_fcs:		Indicate if Frame CheckSum (fcs) is used.
- *			Is set if the physical interface is
- *			using Frame Checksum on the CAIF Frames.
- *
- * @use_stx:		Indicate STart of frame eXtension (stx) in use.
- *			Is set if the CAIF Link Layer expects
- *			CAIF Frames to start with the STX byte.
- *
- * This structure is shared between the CAIF drivers and the CAIF stack.
- * It is used by the device to register its behavior.
- * CAIF Core layer must set the member flowctrl in order to supply
- * CAIF Link Layer with the flow control function.
- *
- */
- struct caif_dev_common {
-	void (*flowctrl)(struct net_device *net, int on);
-	enum caif_link_selector link_select;
-	int use_frag;
-	int use_fcs;
-	int use_stx;
-};
-
-#endif	/* CAIF_DEVICE_H_ */
diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h
deleted file mode 100644
index 053e7c6a6a66..000000000000
--- a/include/net/caif/caif_layer.h
+++ /dev/null
@@ -1,277 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#ifndef CAIF_LAYER_H_
-#define CAIF_LAYER_H_
-
-#include <linux/list.h>
-
-struct cflayer;
-struct cfpkt;
-struct caif_payload_info;
-
-#define CAIF_LAYER_NAME_SZ 16
-
-/**
- * caif_assert() - Assert function for CAIF.
- * @assert: expression to evaluate.
- *
- * This function will print a error message and a do WARN_ON if the
- * assertion fails. Normally this will do a stack up at the current location.
- */
-#define caif_assert(assert)					\
-do {								\
-	if (!(assert)) {					\
-		pr_err("caif:Assert detected:'%s'\n", #assert); \
-		WARN_ON(!(assert));				\
-	}							\
-} while (0)
-
-/**
- * enum caif_ctrlcmd - CAIF Stack Control Signaling sent in layer.ctrlcmd().
- *
- * @CAIF_CTRLCMD_FLOW_OFF_IND:		Flow Control is OFF, transmit function
- *					should stop sending data
- *
- * @CAIF_CTRLCMD_FLOW_ON_IND:		Flow Control is ON, transmit function
- *					can start sending data
- *
- * @CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:	Remote end modem has decided to close
- *					down channel
- *
- * @CAIF_CTRLCMD_INIT_RSP:		Called initially when the layer below
- *					has finished initialization
- *
- * @CAIF_CTRLCMD_DEINIT_RSP:		Called when de-initialization is
- *					complete
- *
- * @CAIF_CTRLCMD_INIT_FAIL_RSP:		Called if initialization fails
- *
- * @_CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:	CAIF Link layer temporarily cannot
- *					send more packets.
- * @_CAIF_CTRLCMD_PHYIF_FLOW_ON_IND:	Called if CAIF Link layer is able
- *					to send packets again.
- * @_CAIF_CTRLCMD_PHYIF_DOWN_IND:	Called if CAIF Link layer is going
- *					down.
- *
- * These commands are sent upwards in the CAIF stack to the CAIF Client.
- * They are used for signaling originating from the modem or CAIF Link Layer.
- * These are either responses (*_RSP) or events (*_IND).
- */
-enum caif_ctrlcmd {
-	CAIF_CTRLCMD_FLOW_OFF_IND,
-	CAIF_CTRLCMD_FLOW_ON_IND,
-	CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
-	CAIF_CTRLCMD_INIT_RSP,
-	CAIF_CTRLCMD_DEINIT_RSP,
-	CAIF_CTRLCMD_INIT_FAIL_RSP,
-	_CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND,
-	_CAIF_CTRLCMD_PHYIF_FLOW_ON_IND,
-	_CAIF_CTRLCMD_PHYIF_DOWN_IND,
-};
-
-/**
- * enum caif_modemcmd -	 Modem Control Signaling, sent from CAIF Client
- *			 to the CAIF Link Layer or modem.
- *
- * @CAIF_MODEMCMD_FLOW_ON_REQ:		Flow Control is ON, transmit function
- *					can start sending data.
- *
- * @CAIF_MODEMCMD_FLOW_OFF_REQ:		Flow Control is OFF, transmit function
- *					should stop sending data.
- *
- * @_CAIF_MODEMCMD_PHYIF_USEFULL:	Notify physical layer that it is in use
- *
- * @_CAIF_MODEMCMD_PHYIF_USELESS:	Notify physical layer that it is
- *					no longer in use.
- *
- * These are requests sent 'downwards' in the stack.
- * Flow ON, OFF can be indicated to the modem.
- */
-enum caif_modemcmd {
-	CAIF_MODEMCMD_FLOW_ON_REQ = 0,
-	CAIF_MODEMCMD_FLOW_OFF_REQ = 1,
-	_CAIF_MODEMCMD_PHYIF_USEFULL = 3,
-	_CAIF_MODEMCMD_PHYIF_USELESS = 4
-};
-
-/**
- * enum caif_direction - CAIF Packet Direction.
- * Indicate if a packet is to be sent out or to be received in.
- * @CAIF_DIR_IN:		Incoming packet received.
- * @CAIF_DIR_OUT:		Outgoing packet to be transmitted.
- */
-enum caif_direction {
-	CAIF_DIR_IN = 0,
-	CAIF_DIR_OUT = 1
-};
-
-/**
- * struct cflayer - CAIF Stack layer.
- * Defines the framework for the CAIF Core Stack.
- * @up:		Pointer up to the layer above.
- * @dn:		Pointer down to the layer below.
- * @node:	List node used when layer participate in a list.
- * @receive:	Packet receive function.
- * @transmit:	Packet transmit function.
- * @ctrlcmd:	Used for control signalling upwards in the stack.
- * @modemcmd:	Used for control signaling downwards in the stack.
- * @id:		The identity of this layer
- * @name:	Name of the layer.
- *
- *  This structure defines the layered structure in CAIF.
- *
- *  It defines CAIF layering structure, used by all CAIF Layers and the
- *  layers interfacing CAIF.
- *
- *  In order to integrate with CAIF an adaptation layer on top of the CAIF stack
- *  and PHY layer below the CAIF stack
- *  must be implemented. These layer must follow the design principles below.
- *
- *  Principles for layering of protocol layers:
- *    - All layers must use this structure. If embedding it, then place this
- *	structure first in the layer specific structure.
- *
- *    - Each layer should not depend on any others layer's private data.
- *
- *    - In order to send data upwards do
- *	layer->up->receive(layer->up, packet);
- *
- *    - In order to send data downwards do
- *	layer->dn->transmit(layer->dn, info, packet);
- */
-struct cflayer {
-	struct cflayer *up;
-	struct cflayer *dn;
-	struct list_head node;
-
-	/*
-	 *  receive() - Receive Function (non-blocking).
-	 *  Contract: Each layer must implement a receive function passing the
-	 *  CAIF packets upwards in the stack.
-	 *	Packet handling rules:
-	 *	      - The CAIF packet (cfpkt) ownership is passed to the
-	 *		called receive function. This means that the
-	 *		packet cannot be accessed after passing it to the
-	 *		above layer using up->receive().
-	 *
-	 *	      - If parsing of the packet fails, the packet must be
-	 *		destroyed and negative error code returned
-	 *		from the function.
-	 *		EXCEPTION: If the framing layer (cffrml) returns
-	 *			-EILSEQ, the packet is not freed.
-	 *
-	 *	      - If parsing succeeds (and above layers return OK) then
-	 *		      the function must return a value >= 0.
-	 *
-	 *  Returns result < 0 indicates an error, 0 or positive value
-	 *	     indicates success.
-	 *
-	 *  @layr: Pointer to the current layer the receive function is
-	 *		implemented for (this pointer).
-	 *  @cfpkt: Pointer to CaifPacket to be handled.
-	 */
-	int (*receive)(struct cflayer *layr, struct cfpkt *cfpkt);
-
-	/*
-	 *  transmit() - Transmit Function (non-blocking).
-	 *  Contract: Each layer must implement a transmit function passing the
-	 *	CAIF packet downwards in the stack.
-	 *	Packet handling rules:
-	 *	      - The CAIF packet (cfpkt) ownership is passed to the
-	 *		transmit function. This means that the packet
-	 *		cannot be accessed after passing it to the below
-	 *		layer using dn->transmit().
-	 *
-	 *	      - Upon error the packet ownership is still passed on,
-	 *		so the packet shall be freed where error is detected.
-	 *		Callers of the transmit function shall not free packets,
-	 *		but errors shall be returned.
-	 *
-	 *	      - Return value less than zero means error, zero or
-	 *		greater than zero means OK.
-	 *
-	 *  Returns result < 0 indicates an error, 0 or positive value
-	 *		indicates success.
-	 *
-	 *  @layr:	Pointer to the current layer the receive function
-	 *		isimplemented for (this pointer).
-	 *  @cfpkt:	 Pointer to CaifPacket to be handled.
-	 */
-	int (*transmit) (struct cflayer *layr, struct cfpkt *cfpkt);
-
-	/*
-	 *  cttrlcmd() - Control Function upwards in CAIF Stack  (non-blocking).
-	 *  Used for signaling responses (CAIF_CTRLCMD_*_RSP)
-	 *  and asynchronous events from the modem  (CAIF_CTRLCMD_*_IND)
-	 *
-	 *  @layr:	Pointer to the current layer the receive function
-	 *		is implemented for (this pointer).
-	 *  @ctrl:	Control Command.
-	 */
-	void (*ctrlcmd) (struct cflayer *layr, enum caif_ctrlcmd ctrl,
-			 int phyid);
-
-	/*
-	 *  modemctrl() - Control Function used for controlling the modem.
-	 *  Used to signal down-wards in the CAIF stack.
-	 *  Returns 0 on success, < 0 upon failure.
-	 *
-	 *  @layr:	Pointer to the current layer the receive function
-	 *		is implemented for (this pointer).
-	 *  @ctrl:  Control Command.
-	 */
-	int (*modemcmd) (struct cflayer *layr, enum caif_modemcmd ctrl);
-
-	unsigned int id;
-	char name[CAIF_LAYER_NAME_SZ];
-};
-
-/**
- * layer_set_up() - Set the up pointer for a specified layer.
- *  @layr: Layer where up pointer shall be set.
- *  @above: Layer above.
- */
-#define layer_set_up(layr, above) ((layr)->up = (struct cflayer *)(above))
-
-/**
- *  layer_set_dn() - Set the down pointer for a specified layer.
- *  @layr:  Layer where down pointer shall be set.
- *  @below: Layer below.
- */
-#define layer_set_dn(layr, below) ((layr)->dn = (struct cflayer *)(below))
-
-/**
- * struct dev_info - Physical Device info information about physical layer.
- * @dev:	Pointer to native physical device.
- * @id:		Physical ID of the physical connection used by the
- *		logical CAIF connection. Used by service layers to
- *		identify their physical id to Caif MUX (CFMUXL)so
- *		that the MUX can add the correct physical ID to the
- *		packet.
- */
-struct dev_info {
-	void *dev;
-	unsigned int id;
-};
-
-/**
- * struct caif_payload_info - Payload information embedded in packet (sk_buff).
- *
- * @dev_info:	Information about the receiving device.
- *
- * @hdr_len:	Header length, used to align pay load on 32bit boundary.
- *
- * @channel_id: Channel ID of the logical CAIF connection.
- *		Used by mux to insert channel id into the caif packet.
- */
-struct caif_payload_info {
-	struct dev_info *dev_info;
-	unsigned short hdr_len;
-	unsigned short channel_id;
-};
-
-#endif	/* CAIF_LAYER_H_ */
diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h
deleted file mode 100644
index 8819ff4db35a..000000000000
--- a/include/net/caif/cfcnfg.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#ifndef CFCNFG_H_
-#define CFCNFG_H_
-#include <linux/spinlock.h>
-#include <linux/netdevice.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfctrl.h>
-
-struct cfcnfg;
-
-/**
- * enum cfcnfg_phy_preference - Physical preference HW Abstraction
- *
- * @CFPHYPREF_UNSPECIFIED:	Default physical interface
- *
- * @CFPHYPREF_LOW_LAT:		Default physical interface for low-latency
- *				traffic
- * @CFPHYPREF_HIGH_BW:		Default physical interface for high-bandwidth
- *				traffic
- * @CFPHYPREF_LOOP:		TEST only Loopback interface simulating modem
- *				responses.
- *
- */
-enum cfcnfg_phy_preference {
-	CFPHYPREF_UNSPECIFIED,
-	CFPHYPREF_LOW_LAT,
-	CFPHYPREF_HIGH_BW,
-	CFPHYPREF_LOOP
-};
-
-/**
- * cfcnfg_create() - Get the CAIF configuration object given network.
- * @net:	Network for the CAIF configuration object.
- */
-struct cfcnfg *get_cfcnfg(struct net *net);
-
-/**
- * cfcnfg_create() - Create the CAIF configuration object.
- */
-struct cfcnfg *cfcnfg_create(void);
-
-/**
- * cfcnfg_remove() -  Remove the CFCNFG object
- * @cfg: config object
- */
-void cfcnfg_remove(struct cfcnfg *cfg);
-
-/**
- * cfcnfg_add_phy_layer() - Adds a physical layer to the CAIF stack.
- * @cnfg:	Pointer to a CAIF configuration object, created by
- *		cfcnfg_create().
- * @dev:	Pointer to link layer device
- * @phy_layer:	Specify the physical layer. The transmit function
- *		MUST be set in the structure.
- * @pref:	The phy (link layer) preference.
- * @link_support: Protocol implementation for link layer specific protocol.
- * @fcs:	Specify if checksum is used in CAIF Framing Layer.
- * @head_room:	Head space needed by link specific protocol.
- */
-int
-cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
-		     struct net_device *dev, struct cflayer *phy_layer,
-		     enum cfcnfg_phy_preference pref,
-		     struct cflayer *link_support,
-		     bool fcs, int head_room);
-
-/**
- * cfcnfg_del_phy_layer - Deletes an phy layer from the CAIF stack.
- *
- * @cnfg:	Pointer to a CAIF configuration object, created by
- *		cfcnfg_create().
- * @phy_layer:	Adaptation layer to be removed.
- */
-int cfcnfg_del_phy_layer(struct cfcnfg *cnfg, struct cflayer *phy_layer);
-
-/**
- * cfcnfg_set_phy_state() - Set the state of the physical interface device.
- * @cnfg:	Configuration object
- * @phy_layer:	Physical Layer representation
- * @up:	State of device
- */
-int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer,
-				bool up);
-
-#endif				/* CFCNFG_H_ */
diff --git a/include/net/caif/cfctrl.h b/include/net/caif/cfctrl.h
deleted file mode 100644
index 86d17315c8a1..000000000000
--- a/include/net/caif/cfctrl.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#ifndef CFCTRL_H_
-#define CFCTRL_H_
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfsrvl.h>
-
-/* CAIF Control packet commands */
-enum cfctrl_cmd {
-	CFCTRL_CMD_LINK_SETUP = 0,
-	CFCTRL_CMD_LINK_DESTROY = 1,
-	CFCTRL_CMD_LINK_ERR = 2,
-	CFCTRL_CMD_ENUM = 3,
-	CFCTRL_CMD_SLEEP = 4,
-	CFCTRL_CMD_WAKE = 5,
-	CFCTRL_CMD_LINK_RECONF = 6,
-	CFCTRL_CMD_START_REASON = 7,
-	CFCTRL_CMD_RADIO_SET = 8,
-	CFCTRL_CMD_MODEM_SET = 9,
-	CFCTRL_CMD_MASK = 0xf
-};
-
-/* Channel types */
-enum cfctrl_srv {
-	CFCTRL_SRV_DECM = 0,
-	CFCTRL_SRV_VEI = 1,
-	CFCTRL_SRV_VIDEO = 2,
-	CFCTRL_SRV_DBG = 3,
-	CFCTRL_SRV_DATAGRAM = 4,
-	CFCTRL_SRV_RFM = 5,
-	CFCTRL_SRV_UTIL = 6,
-	CFCTRL_SRV_MASK = 0xf
-};
-
-#define CFCTRL_RSP_BIT 0x20
-#define CFCTRL_ERR_BIT 0x10
-
-struct cfctrl_rsp {
-	void (*linksetup_rsp)(struct cflayer *layer, u8 linkid,
-			      enum cfctrl_srv serv, u8 phyid,
-			      struct cflayer *adapt_layer);
-	void (*linkdestroy_rsp)(struct cflayer *layer, u8 linkid);
-	void (*linkerror_ind)(void);
-	void (*enum_rsp)(void);
-	void (*sleep_rsp)(void);
-	void (*wake_rsp)(void);
-	void (*restart_rsp)(void);
-	void (*radioset_rsp)(void);
-	void (*reject_rsp)(struct cflayer *layer, u8 linkid,
-				struct cflayer *client_layer);
-};
-
-/* Link Setup Parameters for CAIF-Links. */
-struct cfctrl_link_param {
-	enum cfctrl_srv linktype;/* (T3,T0) Type of Channel */
-	u8 priority;		  /* (P4,P0) Priority of the channel */
-	u8 phyid;		  /* (U2-U0) Physical interface to connect */
-	u8 endpoint;		  /* (E1,E0) Endpoint for data channels */
-	u8 chtype;		  /* (H1,H0) Channel-Type, applies to
-				   *            VEI, DEBUG */
-	union {
-		struct {
-			u8 connid;	/*  (D7,D0) Video LinkId */
-		} video;
-
-		struct {
-			u32 connid;	/* (N31,Ngit0) Connection ID used
-					 *  for Datagram */
-		} datagram;
-
-		struct {
-			u32 connid;	/* Connection ID used for RFM */
-			char volume[20];	/* Volume to mount for RFM */
-		} rfm;		/* Configuration for RFM */
-
-		struct {
-			u16 fifosize_kb;	/* Psock FIFO size in KB */
-			u16 fifosize_bufs;	/* Psock # signal buffers */
-			char name[16];	/* Name of the PSOCK service */
-			u8 params[255];	/* Link setup Parameters> */
-			u16 paramlen;	/* Length of Link Setup
-						 *   Parameters */
-		} utility;	/* Configuration for Utility Links (Psock) */
-	} u;
-};
-
-/* This structure is used internally in CFCTRL */
-struct cfctrl_request_info {
-	int sequence_no;
-	enum cfctrl_cmd cmd;
-	u8 channel_id;
-	struct cfctrl_link_param param;
-	struct cflayer *client_layer;
-	struct list_head list;
-};
-
-struct cfctrl {
-	struct cfsrvl serv;
-	struct cfctrl_rsp res;
-	atomic_t req_seq_no;
-	atomic_t rsp_seq_no;
-	struct list_head list;
-	/* Protects from simultaneous access to first_req list */
-	spinlock_t info_list_lock;
-#ifndef CAIF_NO_LOOP
-	u8 loop_linkid;
-	int loop_linkused[256];
-	/* Protects simultaneous access to loop_linkid and loop_linkused */
-	spinlock_t loop_linkid_lock;
-#endif
-
-};
-
-void cfctrl_enum_req(struct cflayer *cfctrl, u8 physlinkid);
-int cfctrl_linkup_request(struct cflayer *cfctrl,
-			   struct cfctrl_link_param *param,
-			   struct cflayer *user_layer);
-int  cfctrl_linkdown_req(struct cflayer *cfctrl, u8 linkid,
-			 struct cflayer *client);
-
-struct cflayer *cfctrl_create(void);
-struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer);
-int cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer);
-void cfctrl_remove(struct cflayer *layr);
-
-#endif				/* CFCTRL_H_ */
diff --git a/include/net/caif/cffrml.h b/include/net/caif/cffrml.h
deleted file mode 100644
index 1ab8a80ede4d..000000000000
--- a/include/net/caif/cffrml.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#ifndef CFFRML_H_
-#define CFFRML_H_
-#include <net/caif/caif_layer.h>
-#include <linux/netdevice.h>
-
-struct cffrml;
-struct cflayer *cffrml_create(u16 phyid, bool use_fcs);
-void cffrml_free(struct cflayer *layr);
-void cffrml_set_uplayer(struct cflayer *this, struct cflayer *up);
-void cffrml_set_dnlayer(struct cflayer *this, struct cflayer *dn);
-void cffrml_put(struct cflayer *layr);
-void cffrml_hold(struct cflayer *layr);
-int cffrml_refcnt_read(struct cflayer *layr);
-
-#endif /* CFFRML_H_ */
diff --git a/include/net/caif/cfmuxl.h b/include/net/caif/cfmuxl.h
deleted file mode 100644
index 92ccb2648309..000000000000
--- a/include/net/caif/cfmuxl.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#ifndef CFMUXL_H_
-#define CFMUXL_H_
-#include <net/caif/caif_layer.h>
-
-struct cfsrvl;
-struct cffrml;
-
-struct cflayer *cfmuxl_create(void);
-int cfmuxl_set_uplayer(struct cflayer *layr, struct cflayer *up, u8 linkid);
-struct cflayer *cfmuxl_remove_dnlayer(struct cflayer *layr, u8 phyid);
-int cfmuxl_set_dnlayer(struct cflayer *layr, struct cflayer *up, u8 phyid);
-struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 linkid);
-
-#endif				/* CFMUXL_H_ */
diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h
deleted file mode 100644
index acf664227d96..000000000000
--- a/include/net/caif/cfpkt.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#ifndef CFPKT_H_
-#define CFPKT_H_
-#include <net/caif/caif_layer.h>
-#include <linux/types.h>
-struct cfpkt;
-
-/* Create a CAIF packet.
- * len: Length of packet to be created
- * @return New packet.
- */
-struct cfpkt *cfpkt_create(u16 len);
-
-/*
- * Destroy a CAIF Packet.
- * pkt Packet to be destroyed.
- */
-void cfpkt_destroy(struct cfpkt *pkt);
-
-/*
- * Extract header from packet.
- *
- * pkt Packet to extract header data from.
- * data Pointer to copy the header data into.
- * len Length of head data to copy.
- * @return zero on success and error code upon failure
- */
-int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len);
-
-static inline u8 cfpkt_extr_head_u8(struct cfpkt *pkt)
-{
-	u8 tmp;
-
-	cfpkt_extr_head(pkt, &tmp, 1);
-
-	return tmp;
-}
-
-static inline u16 cfpkt_extr_head_u16(struct cfpkt *pkt)
-{
-	__le16 tmp;
-
-	cfpkt_extr_head(pkt, &tmp, 2);
-
-	return le16_to_cpu(tmp);
-}
-
-static inline u32 cfpkt_extr_head_u32(struct cfpkt *pkt)
-{
-	__le32 tmp;
-
-	cfpkt_extr_head(pkt, &tmp, 4);
-
-	return le32_to_cpu(tmp);
-}
-
-/*
- * Peek header from packet.
- * Reads data from packet without changing packet.
- *
- * pkt Packet to extract header data from.
- * data Pointer to copy the header data into.
- * len Length of head data to copy.
- * @return zero on success and error code upon failure
- */
-int cfpkt_peek_head(struct cfpkt *pkt, void *data, u16 len);
-
-/*
- * Extract header from trailer (end of packet).
- *
- * pkt Packet to extract header data from.
- * data Pointer to copy the trailer data into.
- * len Length of header data to copy.
- * @return zero on success and error code upon failure
- */
-int cfpkt_extr_trail(struct cfpkt *pkt, void *data, u16 len);
-
-/*
- * Add header to packet.
- *
- *
- * pkt Packet to add header data to.
- * data Pointer to data to copy into the header.
- * len Length of header data to copy.
- * @return zero on success and error code upon failure
- */
-int cfpkt_add_head(struct cfpkt *pkt, const void *data, u16 len);
-
-/*
- * Add trailer to packet.
- *
- *
- * pkt Packet to add trailer data to.
- * data Pointer to data to copy into the trailer.
- * len Length of trailer data to copy.
- * @return zero on success and error code upon failure
- */
-int cfpkt_add_trail(struct cfpkt *pkt, const void *data, u16 len);
-
-/*
- * Pad trailer on packet.
- * Moves data pointer in packet, no content copied.
- *
- * pkt Packet in which to pad trailer.
- * len Length of padding to add.
- * @return zero on success and error code upon failure
- */
-int cfpkt_pad_trail(struct cfpkt *pkt, u16 len);
-
-/*
- * Add a single byte to packet body (tail).
- *
- * pkt Packet in which to add byte.
- * data Byte to add.
- * @return zero on success and error code upon failure
- */
-int cfpkt_addbdy(struct cfpkt *pkt, const u8 data);
-
-/*
- * Add a data to packet body (tail).
- *
- * pkt Packet in which to add data.
- * data Pointer to data to copy into the packet body.
- * len Length of data to add.
- * @return zero on success and error code upon failure
- */
-int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len);
-
-/*
- * Checks whether there are more data to process in packet.
- * pkt Packet to check.
- * @return true if more data are available in packet false otherwise
- */
-bool cfpkt_more(struct cfpkt *pkt);
-
-/*
- * Checks whether the packet is erroneous,
- * i.e. if it has been attempted to extract more data than available in packet
- * or writing more data than has been allocated in cfpkt_create().
- * pkt Packet to check.
- * @return true on error false otherwise
- */
-bool cfpkt_erroneous(struct cfpkt *pkt);
-
-/*
- * Get the packet length.
- * pkt Packet to get length from.
- * @return Number of bytes in packet.
- */
-u16 cfpkt_getlen(struct cfpkt *pkt);
-
-/*
- * Set the packet length, by adjusting the trailer pointer according to length.
- * pkt Packet to set length.
- * len Packet length.
- * @return Number of bytes in packet.
- */
-int cfpkt_setlen(struct cfpkt *pkt, u16 len);
-
-/*
- * cfpkt_append - Appends a packet's data to another packet.
- * dstpkt:    Packet to append data into, WILL BE FREED BY THIS FUNCTION
- * addpkt:    Packet to be appended and automatically released,
- *            WILL BE FREED BY THIS FUNCTION.
- * expectlen: Packet's expected total length. This should be considered
- *            as a hint.
- * NB: Input packets will be destroyed after appending and cannot be used
- * after calling this function.
- * @return    The new appended packet.
- */
-struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, struct cfpkt *addpkt,
-		      u16 expectlen);
-
-/*
- * cfpkt_split - Split a packet into two packets at the specified split point.
- * pkt: Packet to be split (will contain the first part of the data on exit)
- * pos: Position to split packet in two parts.
- * @return The new packet, containing the second part of the data.
- */
-struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos);
-
-/*
- * Iteration function, iterates the packet buffers from start to end.
- *
- * Checksum iteration function used to iterate buffers
- * (we may have packets consisting of a chain of buffers)
- * pkt:       Packet to calculate checksum for
- * iter_func: Function pointer to iteration function
- * chks:      Checksum calculated so far.
- * buf:       Pointer to the buffer to checksum
- * len:       Length of buf.
- * data:      Initial checksum value.
- * @return    Checksum of buffer.
- */
-
-int cfpkt_iterate(struct cfpkt *pkt,
-		u16 (*iter_func)(u16 chks, void *buf, u16 len),
-		u16 data);
-
-/* Map from a "native" packet (e.g. Linux Socket Buffer) to a CAIF packet.
- *  dir - Direction indicating whether this packet is to be sent or received.
- *  nativepkt  - The native packet to be transformed to a CAIF packet
- *  @return The mapped CAIF Packet CFPKT.
- */
-struct cfpkt *cfpkt_fromnative(enum caif_direction dir, void *nativepkt);
-
-/* Map from a CAIF packet to a "native" packet (e.g. Linux Socket Buffer).
- *  pkt  - The CAIF packet to be transformed into a "native" packet.
- *  @return The native packet transformed from a CAIF packet.
- */
-void *cfpkt_tonative(struct cfpkt *pkt);
-
-/*
- * Returns packet information for a packet.
- * pkt Packet to get info from;
- * @return Packet information
- */
-struct caif_payload_info *cfpkt_info(struct cfpkt *pkt);
-
-/** cfpkt_set_prio - set priority for a CAIF packet.
- *
- * @pkt: The CAIF packet to be adjusted.
- * @prio: one of TC_PRIO_ constants.
- */
-void cfpkt_set_prio(struct cfpkt *pkt, int prio);
-
-#endif				/* CFPKT_H_ */
diff --git a/include/net/caif/cfserl.h b/include/net/caif/cfserl.h
deleted file mode 100644
index 67cce8757175..000000000000
--- a/include/net/caif/cfserl.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#ifndef CFSERL_H_
-#define CFSERL_H_
-#include <net/caif/caif_layer.h>
-
-struct cflayer *cfserl_create(int instance, bool use_stx);
-void cfserl_release(struct cflayer *layer);
-#endif
diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h
deleted file mode 100644
index a000dc45f966..000000000000
--- a/include/net/caif/cfsrvl.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#ifndef CFSRVL_H_
-#define CFSRVL_H_
-#include <linux/list.h>
-#include <linux/stddef.h>
-#include <linux/types.h>
-#include <linux/kref.h>
-#include <linux/rculist.h>
-
-struct cfsrvl {
-	struct cflayer layer;
-	bool open;
-	bool phy_flow_on;
-	bool modem_flow_on;
-	bool supports_flowctrl;
-	void (*release)(struct cflayer *layer);
-	struct dev_info dev_info;
-	void (*hold)(struct cflayer *lyr);
-	void (*put)(struct cflayer *lyr);
-	struct rcu_head rcu;
-};
-
-struct cflayer *cfvei_create(u8 linkid, struct dev_info *dev_info);
-struct cflayer *cfdgml_create(u8 linkid, struct dev_info *dev_info);
-struct cflayer *cfutill_create(u8 linkid, struct dev_info *dev_info);
-struct cflayer *cfvidl_create(u8 linkid, struct dev_info *dev_info);
-struct cflayer *cfrfml_create(u8 linkid, struct dev_info *dev_info,
-				int mtu_size);
-struct cflayer *cfdbgl_create(u8 linkid, struct dev_info *dev_info);
-
-bool cfsrvl_phyid_match(struct cflayer *layer, int phyid);
-
-void cfsrvl_init(struct cfsrvl *service,
-			u8 channel_id,
-			struct dev_info *dev_info,
-			bool supports_flowctrl);
-bool cfsrvl_ready(struct cfsrvl *service, int *err);
-
-static inline void cfsrvl_get(struct cflayer *layr)
-{
-	struct cfsrvl *s = container_of(layr, struct cfsrvl, layer);
-	if (layr == NULL || layr->up == NULL || s->hold == NULL)
-		return;
-
-	s->hold(layr->up);
-}
-
-static inline void cfsrvl_put(struct cflayer *layr)
-{
-	struct cfsrvl *s = container_of(layr, struct cfsrvl, layer);
-	if (layr == NULL || layr->up == NULL || s->hold == NULL)
-		return;
-
-	s->put(layr->up);
-}
-#endif				/* CFSRVL_H_ */
diff --git a/include/uapi/linux/caif/caif_socket.h b/include/uapi/linux/caif/caif_socket.h
deleted file mode 100644
index d9970bbaa156..000000000000
--- a/include/uapi/linux/caif/caif_socket.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/* linux/caif_socket.h
- * CAIF Definitions for CAIF socket and network layer
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	 Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
- */
-
-#ifndef _LINUX_CAIF_SOCKET_H
-#define _LINUX_CAIF_SOCKET_H
-
-#include <linux/types.h>
-#include <linux/socket.h>
-
-/**
- * enum caif_link_selector -    Physical Link Selection.
- * @CAIF_LINK_HIGH_BANDW:	Physical interface for high-bandwidth
- *				traffic.
- * @CAIF_LINK_LOW_LATENCY:	Physical interface for low-latency
- *				traffic.
- *
- * CAIF Link Layers can register their link properties.
- * This enum is used for choosing between CAIF Link Layers when
- * setting up CAIF Channels when multiple CAIF Link Layers exists.
- */
-enum caif_link_selector {
-	CAIF_LINK_HIGH_BANDW,
-	CAIF_LINK_LOW_LATENCY
-};
-
-/**
- * enum caif_channel_priority - CAIF channel priorities.
- *
- * @CAIF_PRIO_MIN:	Min priority for a channel.
- * @CAIF_PRIO_LOW:	Low-priority channel.
- * @CAIF_PRIO_NORMAL:	Normal/default priority level.
- * @CAIF_PRIO_HIGH:	High priority level
- * @CAIF_PRIO_MAX:	Max priority for channel
- *
- * Priority can be set on CAIF Channels in order to
- * prioritize between traffic on different CAIF Channels.
- * These priority levels are recommended, but the priority value
- * is not restricted to the values defined in this enum, any value
- * between CAIF_PRIO_MIN and CAIF_PRIO_MAX could be used.
- */
-enum caif_channel_priority {
-	CAIF_PRIO_MIN	 = 0x01,
-	CAIF_PRIO_LOW	 = 0x04,
-	CAIF_PRIO_NORMAL = 0x0f,
-	CAIF_PRIO_HIGH	 = 0x14,
-	CAIF_PRIO_MAX	 = 0x1F
-};
-
-/**
- * enum caif_protocol_type  -	CAIF Channel type.
- * @CAIFPROTO_AT:		Classic AT channel.
- * @CAIFPROTO_DATAGRAM:	Datagram channel.
- * @CAIFPROTO_DATAGRAM_LOOP:	Datagram loopback channel, used for testing.
- * @CAIFPROTO_UTIL:		Utility (Psock) channel.
- * @CAIFPROTO_RFM:		Remote File Manager
- * @CAIFPROTO_DEBUG:		Debug link
- *
- * This enum defines the CAIF Channel type to be used. This defines
- * the service to connect to on the modem.
- */
-enum caif_protocol_type {
-	CAIFPROTO_AT,
-	CAIFPROTO_DATAGRAM,
-	CAIFPROTO_DATAGRAM_LOOP,
-	CAIFPROTO_UTIL,
-	CAIFPROTO_RFM,
-	CAIFPROTO_DEBUG,
-	_CAIFPROTO_MAX
-};
-#define	CAIFPROTO_MAX _CAIFPROTO_MAX
-
-/**
- * enum caif_at_type - AT Service Endpoint
- * @CAIF_ATTYPE_PLAIN:	     Connects to a plain vanilla AT channel.
- */
-enum caif_at_type {
-	CAIF_ATTYPE_PLAIN = 2
-};
- /**
- * enum caif_debug_type - Content selection for debug connection
- * @CAIF_DEBUG_TRACE_INTERACTIVE: Connection will contain
- *				both trace and interactive debug.
- * @CAIF_DEBUG_TRACE:		Connection contains trace only.
- * @CAIF_DEBUG_INTERACTIVE:	Connection to interactive debug.
- */
-enum caif_debug_type {
-	CAIF_DEBUG_TRACE_INTERACTIVE = 0,
-	CAIF_DEBUG_TRACE,
-	CAIF_DEBUG_INTERACTIVE,
-};
-
-/**
- * enum caif_debug_service - Debug Service Endpoint
- * @CAIF_RADIO_DEBUG_SERVICE:	Debug service on the Radio sub-system
- * @CAIF_APP_DEBUG_SERVICE:	Debug for the applications sub-system
- */
-enum caif_debug_service {
-	CAIF_RADIO_DEBUG_SERVICE = 1,
-	CAIF_APP_DEBUG_SERVICE
-};
-
-/**
- * struct sockaddr_caif - the sockaddr structure for CAIF sockets.
- * @family:		     Address family number, must be AF_CAIF.
- * @u:			     Union of address data 'switched' by family.
- * :
- * @u.at:                    Applies when family = CAIFPROTO_AT.
- *
- * @u.at.type:               Type of AT link to set up (enum caif_at_type).
- *
- * @u.util:                  Applies when family = CAIFPROTO_UTIL
- *
- * @u.util.service:          Utility service name.
- *
- * @u.dgm:                   Applies when family = CAIFPROTO_DATAGRAM
- *
- * @u.dgm.connection_id:     Datagram connection id.
- *
- * @u.dgm.nsapi:             NSAPI of the PDP-Context.
- *
- * @u.rfm:                   Applies when family = CAIFPROTO_RFM
- *
- * @u.rfm.connection_id:     Connection ID for RFM.
- *
- * @u.rfm.volume:            Volume to mount.
- *
- * @u.dbg:		      Applies when family = CAIFPROTO_DEBUG.
- *
- * @u.dbg.type:			     Type of debug connection to set up
- *			      (caif_debug_type).
- *
- * @u.dbg.service:	      Service sub-system to connect (caif_debug_service
- * Description:
- * This structure holds the connect parameters used for setting up a
- * CAIF Channel. It defines the service to connect to on the modem.
- */
-struct sockaddr_caif {
-	__kernel_sa_family_t  family;
-	union {
-		struct {
-			__u8  type;		/* type: enum caif_at_type */
-		} at;				/* CAIFPROTO_AT */
-		struct {
-			char	  service[16];
-		} util;				/* CAIFPROTO_UTIL */
-		union {
-			__u32 connection_id;
-			__u8  nsapi;
-		} dgm;				/* CAIFPROTO_DATAGRAM(_LOOP)*/
-		struct {
-			__u32 connection_id;
-			char	  volume[16];
-		} rfm;				/* CAIFPROTO_RFM */
-		struct {
-			__u8  type;		/* type:enum caif_debug_type */
-			__u8  service;		/* service:caif_debug_service */
-		} dbg;				/* CAIFPROTO_DEBUG */
-	} u;
-};
-
-/**
- * enum caif_socket_opts - CAIF option values for getsockopt and setsockopt.
- *
- * @CAIFSO_LINK_SELECT:		Selector used if multiple CAIF Link layers are
- *				available. Either a high bandwidth
- *				link can be selected (CAIF_LINK_HIGH_BANDW) or
- *				a low latency link (CAIF_LINK_LOW_LATENCY).
- *                              This option is of type __u32.
- *				Alternatively SO_BINDTODEVICE can be used.
- *
- * @CAIFSO_REQ_PARAM:		Used to set the request parameters for a
- *				utility channel. (maximum 256 bytes). This
- *				option must be set before connecting.
- *
- * @CAIFSO_RSP_PARAM:		Gets the response parameters for a utility
- *				channel. (maximum 256 bytes). This option
- *				is valid after a successful connect.
- *
- *
- * This enum defines the CAIF Socket options to be used on a socket
- * of type PF_CAIF.
- *
- */
-enum caif_socket_opts {
-	CAIFSO_LINK_SELECT	= 127,
-	CAIFSO_REQ_PARAM	= 128,
-	CAIFSO_RSP_PARAM	= 129,
-};
-
-#endif /* _LINUX_CAIF_SOCKET_H */
diff --git a/include/uapi/linux/caif/if_caif.h b/include/uapi/linux/caif/if_caif.h
deleted file mode 100644
index 74bca19403fa..000000000000
--- a/include/uapi/linux/caif/if_caif.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
- */
-
-#ifndef IF_CAIF_H_
-#define IF_CAIF_H_
-#include <linux/sockios.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-
-/**
- * enum ifla_caif - CAIF NetlinkRT parameters.
- * @IFLA_CAIF_IPV4_CONNID:  Connection ID for IPv4 PDP Context.
- *			    The type of attribute is NLA_U32.
- * @IFLA_CAIF_IPV6_CONNID:  Connection ID for IPv6 PDP Context.
- *			    The type of attribute is NLA_U32.
- * @IFLA_CAIF_LOOPBACK:	    If different from zero, device is doing loopback
- *			    The type of attribute is NLA_U8.
- *
- * When using RT Netlink to create, destroy or configure a CAIF IP interface,
- * enum ifla_caif is used to specify the configuration attributes.
- */
-enum ifla_caif {
-	__IFLA_CAIF_UNSPEC,
-	IFLA_CAIF_IPV4_CONNID,
-	IFLA_CAIF_IPV6_CONNID,
-	IFLA_CAIF_LOOPBACK,
-	__IFLA_CAIF_MAX
-};
-#define	IFLA_CAIF_MAX (__IFLA_CAIF_MAX-1)
-
-#endif /*IF_CAIF_H_*/
diff --git a/net/Kconfig b/net/Kconfig
index 62266eaf0e95..5c588dbcbdbd 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -439,7 +439,6 @@ endif # WIRELESS
 
 source "net/rfkill/Kconfig"
 source "net/9p/Kconfig"
-source "net/caif/Kconfig"
 source "net/ceph/Kconfig"
 source "net/nfc/Kconfig"
 source "net/psample/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index 90e3d72bf58b..98e182829eff 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -53,7 +53,6 @@ obj-$(CONFIG_IUCV)		+= iucv/
 obj-$(CONFIG_SMC)		+= smc/
 obj-$(CONFIG_RFKILL)		+= rfkill/
 obj-$(CONFIG_NET_9P)		+= 9p/
-obj-$(CONFIG_CAIF)		+= caif/
 obj-$(CONFIG_DCB)		+= dcb/
 obj-$(CONFIG_6LOWPAN)		+= 6lowpan/
 obj-$(CONFIG_IEEE802154)	+= ieee802154/
diff --git a/net/caif/Kconfig b/net/caif/Kconfig
deleted file mode 100644
index 87205251cc25..000000000000
--- a/net/caif/Kconfig
+++ /dev/null
@@ -1,54 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# CAIF net configurations
-#
-
-menuconfig CAIF
-	tristate "CAIF support"
-	select CRC_CCITT
-	default n
-	help
-	The "Communication CPU to Application CPU Interface" (CAIF) is a packet
-	based connection-oriented MUX protocol developed by ST-Ericsson for use
-	with its modems. It is accessed from user space as sockets (PF_CAIF).
-
-	Say Y (or M) here if you build for a phone product (e.g. Android or
-	MeeGo) that uses CAIF as transport. If unsure say N.
-
-	If you select to build it as module then CAIF_NETDEV also needs to be
-	built as a module. You will also need to say Y (or M) to any CAIF
-	physical devices that your platform requires.
-
-	See Documentation/networking/caif for a further explanation on how to
-	use and configure CAIF.
-
-config  CAIF_DEBUG
-	bool "Enable Debug"
-	depends on CAIF
-	default n
-	help
-	Enable the inclusion of debug code in the CAIF stack.
-	Be aware that doing this will impact performance.
-	If unsure say N.
-
-config CAIF_NETDEV
-	tristate "CAIF GPRS Network device"
-	depends on CAIF
-	default CAIF
-	help
-	Say Y if you will be using a CAIF based GPRS network device.
-	This can be either built-in or a loadable module.
-	If you select to build it as a built-in then the main CAIF device must
-	also be a built-in.
-	If unsure say Y.
-
-config CAIF_USB
-	tristate "CAIF USB support"
-	depends on CAIF
-	default n
-	help
-	Say Y if you are using CAIF over USB CDC NCM.
-	This can be either built-in or a loadable module.
-	If you select to build it as a built-in then the main CAIF device must
-	also be a built-in.
-	If unsure say N.
diff --git a/net/caif/Makefile b/net/caif/Makefile
deleted file mode 100644
index 4f6c0517cdfb..000000000000
--- a/net/caif/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-ccflags-$(CONFIG_CAIF_DEBUG)     :=      -DDEBUG
-
-caif-y := caif_dev.o \
-	cfcnfg.o cfmuxl.o cfctrl.o  \
-	cffrml.o cfveil.o cfdbgl.o\
-	cfserl.o cfdgml.o  \
-	cfrfml.o cfvidl.o cfutill.o \
-	cfsrvl.o cfpkt_skbuff.o
-
-obj-$(CONFIG_CAIF) += caif.o
-obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o
-obj-$(CONFIG_CAIF) += caif_socket.o
-obj-$(CONFIG_CAIF_USB) += caif_usb.o
-
-export-y := caif.o
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
deleted file mode 100644
index 922de3d611c0..000000000000
--- a/net/caif/caif_dev.c
+++ /dev/null
@@ -1,586 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * CAIF Interface registration.
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- *
- * Borrowed heavily from file: pn_dev.c. Thanks to Remi Denis-Courmont
- *  and Sakari Ailus <sakari.ailus@nokia.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/kernel.h>
-#include <linux/if_arp.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/mutex.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <net/netns/generic.h>
-#include <net/net_namespace.h>
-#include <net/pkt_sched.h>
-#include <net/caif/caif_device.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/caif_dev.h>
-#include <net/caif/cfpkt.h>
-#include <net/caif/cfcnfg.h>
-#include <net/caif/cfserl.h>
-
-MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol support");
-MODULE_LICENSE("GPL");
-
-/* Used for local tracking of the CAIF net devices */
-struct caif_device_entry {
-	struct cflayer layer;
-	struct list_head list;
-	struct net_device *netdev;
-	int __percpu *pcpu_refcnt;
-	spinlock_t flow_lock;
-	struct sk_buff *xoff_skb;
-	void (*xoff_skb_dtor)(struct sk_buff *skb);
-	bool xoff;
-};
-
-struct caif_device_entry_list {
-	struct list_head list;
-	/* Protects simulanous deletes in list */
-	struct mutex lock;
-};
-
-struct caif_net {
-	struct cfcnfg *cfg;
-	struct caif_device_entry_list caifdevs;
-};
-
-static unsigned int caif_net_id;
-static int q_high = 50; /* Percent */
-
-struct cfcnfg *get_cfcnfg(struct net *net)
-{
-	struct caif_net *caifn;
-	caifn = net_generic(net, caif_net_id);
-	return caifn->cfg;
-}
-EXPORT_SYMBOL(get_cfcnfg);
-
-static struct caif_device_entry_list *caif_device_list(struct net *net)
-{
-	struct caif_net *caifn;
-	caifn = net_generic(net, caif_net_id);
-	return &caifn->caifdevs;
-}
-
-static void caifd_put(struct caif_device_entry *e)
-{
-	this_cpu_dec(*e->pcpu_refcnt);
-}
-
-static void caifd_hold(struct caif_device_entry *e)
-{
-	this_cpu_inc(*e->pcpu_refcnt);
-}
-
-static int caifd_refcnt_read(struct caif_device_entry *e)
-{
-	int i, refcnt = 0;
-	for_each_possible_cpu(i)
-		refcnt += *per_cpu_ptr(e->pcpu_refcnt, i);
-	return refcnt;
-}
-
-/* Allocate new CAIF device. */
-static struct caif_device_entry *caif_device_alloc(struct net_device *dev)
-{
-	struct caif_device_entry *caifd;
-
-	caifd = kzalloc_obj(*caifd);
-	if (!caifd)
-		return NULL;
-	caifd->pcpu_refcnt = alloc_percpu(int);
-	if (!caifd->pcpu_refcnt) {
-		kfree(caifd);
-		return NULL;
-	}
-	caifd->netdev = dev;
-	dev_hold(dev);
-	return caifd;
-}
-
-static struct caif_device_entry *caif_get(struct net_device *dev)
-{
-	struct caif_device_entry_list *caifdevs =
-	    caif_device_list(dev_net(dev));
-	struct caif_device_entry *caifd;
-
-	list_for_each_entry_rcu(caifd, &caifdevs->list, list,
-				lockdep_rtnl_is_held()) {
-		if (caifd->netdev == dev)
-			return caifd;
-	}
-	return NULL;
-}
-
-static void caif_flow_cb(struct sk_buff *skb)
-{
-	struct caif_device_entry *caifd;
-	void (*dtor)(struct sk_buff *skb) = NULL;
-	bool send_xoff;
-
-	WARN_ON(skb->dev == NULL);
-
-	rcu_read_lock();
-	caifd = caif_get(skb->dev);
-
-	WARN_ON(caifd == NULL);
-	if (!caifd) {
-		rcu_read_unlock();
-		return;
-	}
-
-	caifd_hold(caifd);
-	rcu_read_unlock();
-
-	spin_lock_bh(&caifd->flow_lock);
-	send_xoff = caifd->xoff;
-	caifd->xoff = false;
-	dtor = caifd->xoff_skb_dtor;
-
-	if (WARN_ON(caifd->xoff_skb != skb))
-		skb = NULL;
-
-	caifd->xoff_skb = NULL;
-	caifd->xoff_skb_dtor = NULL;
-
-	spin_unlock_bh(&caifd->flow_lock);
-
-	if (dtor && skb)
-		dtor(skb);
-
-	if (send_xoff)
-		caifd->layer.up->
-			ctrlcmd(caifd->layer.up,
-				_CAIF_CTRLCMD_PHYIF_FLOW_ON_IND,
-				caifd->layer.id);
-	caifd_put(caifd);
-}
-
-static int transmit(struct cflayer *layer, struct cfpkt *pkt)
-{
-	int err, high = 0, qlen = 0;
-	struct caif_device_entry *caifd =
-	    container_of(layer, struct caif_device_entry, layer);
-	struct sk_buff *skb;
-	struct netdev_queue *txq;
-
-	rcu_read_lock_bh();
-
-	skb = cfpkt_tonative(pkt);
-	skb->dev = caifd->netdev;
-	skb_reset_network_header(skb);
-	skb->protocol = htons(ETH_P_CAIF);
-
-	/* Check if we need to handle xoff */
-	if (likely(caifd->netdev->priv_flags & IFF_NO_QUEUE))
-		goto noxoff;
-
-	if (unlikely(caifd->xoff))
-		goto noxoff;
-
-	if (likely(!netif_queue_stopped(caifd->netdev))) {
-		struct Qdisc *sch;
-
-		/* If we run with a TX queue, check if the queue is too long*/
-		txq = netdev_get_tx_queue(skb->dev, 0);
-		sch = rcu_dereference_bh(txq->qdisc);
-		if (likely(qdisc_is_empty(sch)))
-			goto noxoff;
-
-		/* can check for explicit qdisc len value only !NOLOCK,
-		 * always set flow off otherwise
-		 */
-		high = (caifd->netdev->tx_queue_len * q_high) / 100;
-		if (!(sch->flags & TCQ_F_NOLOCK) && likely(sch->q.qlen < high))
-			goto noxoff;
-	}
-
-	/* Hold lock while accessing xoff */
-	spin_lock_bh(&caifd->flow_lock);
-	if (caifd->xoff) {
-		spin_unlock_bh(&caifd->flow_lock);
-		goto noxoff;
-	}
-
-	/*
-	 * Handle flow off, we do this by temporary hi-jacking this
-	 * skb's destructor function, and replace it with our own
-	 * flow-on callback. The callback will set flow-on and call
-	 * the original destructor.
-	 */
-
-	pr_debug("queue has stopped(%d) or is full (%d > %d)\n",
-			netif_queue_stopped(caifd->netdev),
-			qlen, high);
-	caifd->xoff = true;
-	caifd->xoff_skb = skb;
-	caifd->xoff_skb_dtor = skb->destructor;
-	skb->destructor = caif_flow_cb;
-	spin_unlock_bh(&caifd->flow_lock);
-
-	caifd->layer.up->ctrlcmd(caifd->layer.up,
-					_CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND,
-					caifd->layer.id);
-noxoff:
-	rcu_read_unlock_bh();
-
-	err = dev_queue_xmit(skb);
-	if (err > 0)
-		err = -EIO;
-
-	return err;
-}
-
-/*
- * Stuff received packets into the CAIF stack.
- * On error, returns non-zero and releases the skb.
- */
-static int receive(struct sk_buff *skb, struct net_device *dev,
-		   struct packet_type *pkttype, struct net_device *orig_dev)
-{
-	struct cfpkt *pkt;
-	struct caif_device_entry *caifd;
-	int err;
-
-	pkt = cfpkt_fromnative(CAIF_DIR_IN, skb);
-
-	rcu_read_lock();
-	caifd = caif_get(dev);
-
-	if (!caifd || !caifd->layer.up || !caifd->layer.up->receive ||
-			!netif_oper_up(caifd->netdev)) {
-		rcu_read_unlock();
-		kfree_skb(skb);
-		return NET_RX_DROP;
-	}
-
-	/* Hold reference to netdevice while using CAIF stack */
-	caifd_hold(caifd);
-	rcu_read_unlock();
-
-	err = caifd->layer.up->receive(caifd->layer.up, pkt);
-
-	/* For -EILSEQ the packet is not freed so free it now */
-	if (err == -EILSEQ)
-		cfpkt_destroy(pkt);
-
-	/* Release reference to stack upwards */
-	caifd_put(caifd);
-
-	if (err != 0)
-		err = NET_RX_DROP;
-	return err;
-}
-
-static struct packet_type caif_packet_type __read_mostly = {
-	.type = cpu_to_be16(ETH_P_CAIF),
-	.func = receive,
-};
-
-static void dev_flowctrl(struct net_device *dev, int on)
-{
-	struct caif_device_entry *caifd;
-
-	rcu_read_lock();
-
-	caifd = caif_get(dev);
-	if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd) {
-		rcu_read_unlock();
-		return;
-	}
-
-	caifd_hold(caifd);
-	rcu_read_unlock();
-
-	caifd->layer.up->ctrlcmd(caifd->layer.up,
-				 on ?
-				 _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND :
-				 _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND,
-				 caifd->layer.id);
-	caifd_put(caifd);
-}
-
-int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
-		     struct cflayer *link_support, int head_room,
-		     struct cflayer **layer,
-		     int (**rcv_func)(struct sk_buff *, struct net_device *,
-				      struct packet_type *,
-				      struct net_device *))
-{
-	struct caif_device_entry *caifd;
-	enum cfcnfg_phy_preference pref;
-	struct cfcnfg *cfg = get_cfcnfg(dev_net(dev));
-	struct caif_device_entry_list *caifdevs;
-	int res;
-
-	caifdevs = caif_device_list(dev_net(dev));
-	caifd = caif_device_alloc(dev);
-	if (!caifd)
-		return -ENOMEM;
-	*layer = &caifd->layer;
-	spin_lock_init(&caifd->flow_lock);
-
-	switch (caifdev->link_select) {
-	case CAIF_LINK_HIGH_BANDW:
-		pref = CFPHYPREF_HIGH_BW;
-		break;
-	case CAIF_LINK_LOW_LATENCY:
-		pref = CFPHYPREF_LOW_LAT;
-		break;
-	default:
-		pref = CFPHYPREF_HIGH_BW;
-		break;
-	}
-	mutex_lock(&caifdevs->lock);
-	list_add_rcu(&caifd->list, &caifdevs->list);
-
-	strscpy(caifd->layer.name, dev->name,
-		sizeof(caifd->layer.name));
-	caifd->layer.transmit = transmit;
-	res = cfcnfg_add_phy_layer(cfg,
-				dev,
-				&caifd->layer,
-				pref,
-				link_support,
-				caifdev->use_fcs,
-				head_room);
-	mutex_unlock(&caifdevs->lock);
-	if (rcv_func)
-		*rcv_func = receive;
-	return res;
-}
-EXPORT_SYMBOL(caif_enroll_dev);
-
-/* notify Caif of device events */
-static int caif_device_notify(struct notifier_block *me, unsigned long what,
-			      void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct caif_device_entry *caifd = NULL;
-	struct caif_dev_common *caifdev;
-	struct cfcnfg *cfg;
-	struct cflayer *layer, *link_support;
-	int head_room = 0;
-	struct caif_device_entry_list *caifdevs;
-	int res;
-
-	cfg = get_cfcnfg(dev_net(dev));
-	caifdevs = caif_device_list(dev_net(dev));
-
-	caifd = caif_get(dev);
-	if (caifd == NULL && dev->type != ARPHRD_CAIF)
-		return 0;
-
-	switch (what) {
-	case NETDEV_REGISTER:
-		if (caifd != NULL)
-			break;
-
-		caifdev = netdev_priv(dev);
-
-		link_support = NULL;
-		if (caifdev->use_frag) {
-			head_room = 1;
-			link_support = cfserl_create(dev->ifindex,
-							caifdev->use_stx);
-			if (!link_support) {
-				pr_warn("Out of memory\n");
-				break;
-			}
-		}
-		res = caif_enroll_dev(dev, caifdev, link_support, head_room,
-				&layer, NULL);
-		if (res)
-			cfserl_release(link_support);
-		caifdev->flowctrl = dev_flowctrl;
-		break;
-
-	case NETDEV_UP:
-		rcu_read_lock();
-
-		caifd = caif_get(dev);
-		if (caifd == NULL) {
-			rcu_read_unlock();
-			break;
-		}
-
-		caifd->xoff = false;
-		cfcnfg_set_phy_state(cfg, &caifd->layer, true);
-		rcu_read_unlock();
-
-		break;
-
-	case NETDEV_DOWN:
-		rcu_read_lock();
-
-		caifd = caif_get(dev);
-		if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd) {
-			rcu_read_unlock();
-			return -EINVAL;
-		}
-
-		cfcnfg_set_phy_state(cfg, &caifd->layer, false);
-		caifd_hold(caifd);
-		rcu_read_unlock();
-
-		caifd->layer.up->ctrlcmd(caifd->layer.up,
-					 _CAIF_CTRLCMD_PHYIF_DOWN_IND,
-					 caifd->layer.id);
-
-		spin_lock_bh(&caifd->flow_lock);
-
-		/*
-		 * Replace our xoff-destructor with original destructor.
-		 * We trust that skb->destructor *always* is called before
-		 * the skb reference is invalid. The hijacked SKB destructor
-		 * takes the flow_lock so manipulating the skb->destructor here
-		 * should be safe.
-		*/
-		if (caifd->xoff_skb_dtor != NULL && caifd->xoff_skb != NULL)
-			caifd->xoff_skb->destructor = caifd->xoff_skb_dtor;
-
-		caifd->xoff = false;
-		caifd->xoff_skb_dtor = NULL;
-		caifd->xoff_skb = NULL;
-
-		spin_unlock_bh(&caifd->flow_lock);
-		caifd_put(caifd);
-		break;
-
-	case NETDEV_UNREGISTER:
-		mutex_lock(&caifdevs->lock);
-
-		caifd = caif_get(dev);
-		if (caifd == NULL) {
-			mutex_unlock(&caifdevs->lock);
-			break;
-		}
-		list_del_rcu(&caifd->list);
-
-		/*
-		 * NETDEV_UNREGISTER is called repeatedly until all reference
-		 * counts for the net-device are released. If references to
-		 * caifd is taken, simply ignore NETDEV_UNREGISTER and wait for
-		 * the next call to NETDEV_UNREGISTER.
-		 *
-		 * If any packets are in flight down the CAIF Stack,
-		 * cfcnfg_del_phy_layer will return nonzero.
-		 * If no packets are in flight, the CAIF Stack associated
-		 * with the net-device un-registering is freed.
-		 */
-
-		if (caifd_refcnt_read(caifd) != 0 ||
-			cfcnfg_del_phy_layer(cfg, &caifd->layer) != 0) {
-
-			pr_info("Wait for device inuse\n");
-			/* Enrole device if CAIF Stack is still in use */
-			list_add_rcu(&caifd->list, &caifdevs->list);
-			mutex_unlock(&caifdevs->lock);
-			break;
-		}
-
-		synchronize_rcu();
-		dev_put(caifd->netdev);
-		free_percpu(caifd->pcpu_refcnt);
-		kfree(caifd);
-
-		mutex_unlock(&caifdevs->lock);
-		break;
-	}
-	return 0;
-}
-
-static struct notifier_block caif_device_notifier = {
-	.notifier_call = caif_device_notify,
-	.priority = 0,
-};
-
-/* Per-namespace Caif devices handling */
-static int caif_init_net(struct net *net)
-{
-	struct caif_net *caifn = net_generic(net, caif_net_id);
-	INIT_LIST_HEAD(&caifn->caifdevs.list);
-	mutex_init(&caifn->caifdevs.lock);
-
-	caifn->cfg = cfcnfg_create();
-	if (!caifn->cfg)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static void caif_exit_net(struct net *net)
-{
-	struct caif_device_entry *caifd, *tmp;
-	struct caif_device_entry_list *caifdevs =
-	    caif_device_list(net);
-	struct cfcnfg *cfg =  get_cfcnfg(net);
-
-	rtnl_lock();
-	mutex_lock(&caifdevs->lock);
-
-	list_for_each_entry_safe(caifd, tmp, &caifdevs->list, list) {
-		int i = 0;
-		list_del_rcu(&caifd->list);
-		cfcnfg_set_phy_state(cfg, &caifd->layer, false);
-
-		while (i < 10 &&
-			(caifd_refcnt_read(caifd) != 0 ||
-			cfcnfg_del_phy_layer(cfg, &caifd->layer) != 0)) {
-
-			pr_info("Wait for device inuse\n");
-			msleep(250);
-			i++;
-		}
-		synchronize_rcu();
-		dev_put(caifd->netdev);
-		free_percpu(caifd->pcpu_refcnt);
-		kfree(caifd);
-	}
-	cfcnfg_remove(cfg);
-
-	mutex_unlock(&caifdevs->lock);
-	rtnl_unlock();
-}
-
-static struct pernet_operations caif_net_ops = {
-	.init = caif_init_net,
-	.exit = caif_exit_net,
-	.id   = &caif_net_id,
-	.size = sizeof(struct caif_net),
-};
-
-/* Initialize Caif devices list */
-static int __init caif_device_init(void)
-{
-	int result;
-
-	result = register_pernet_subsys(&caif_net_ops);
-
-	if (result)
-		return result;
-
-	register_netdevice_notifier(&caif_device_notifier);
-	dev_add_pack(&caif_packet_type);
-
-	return result;
-}
-
-static void __exit caif_device_exit(void)
-{
-	unregister_netdevice_notifier(&caif_device_notifier);
-	dev_remove_pack(&caif_packet_type);
-	unregister_pernet_subsys(&caif_net_ops);
-}
-
-module_init(caif_device_init);
-module_exit(caif_device_exit);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
deleted file mode 100644
index af218742af5a..000000000000
--- a/net/caif/caif_socket.c
+++ /dev/null
@@ -1,1114 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/filter.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/sched/signal.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-#include <linux/list.h>
-#include <linux/wait.h>
-#include <linux/poll.h>
-#include <linux/tcp.h>
-#include <linux/uaccess.h>
-#include <linux/debugfs.h>
-#include <linux/caif/caif_socket.h>
-#include <linux/pkt_sched.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/caif_dev.h>
-#include <net/caif/cfpkt.h>
-
-MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol socket support (AF_CAIF)");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(AF_CAIF);
-
-/*
- * CAIF state is re-using the TCP socket states.
- * caif_states stored in sk_state reflect the state as reported by
- * the CAIF stack, while sk_socket->state is the state of the socket.
- */
-enum caif_states {
-	CAIF_CONNECTED		= TCP_ESTABLISHED,
-	CAIF_CONNECTING	= TCP_SYN_SENT,
-	CAIF_DISCONNECTED	= TCP_CLOSE
-};
-
-#define TX_FLOW_ON_BIT	1
-#define RX_FLOW_ON_BIT	2
-
-struct caifsock {
-	struct sock sk; /* must be first member */
-	struct cflayer layer;
-	unsigned long flow_state;
-	struct caif_connect_request conn_req;
-	struct mutex readlock;
-	struct dentry *debugfs_socket_dir;
-	int headroom, tailroom, maxframe;
-};
-
-static int rx_flow_is_on(struct caifsock *cf_sk)
-{
-	return test_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
-}
-
-static int tx_flow_is_on(struct caifsock *cf_sk)
-{
-	return test_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
-}
-
-static void set_rx_flow_off(struct caifsock *cf_sk)
-{
-	clear_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
-}
-
-static void set_rx_flow_on(struct caifsock *cf_sk)
-{
-	set_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
-}
-
-static void set_tx_flow_off(struct caifsock *cf_sk)
-{
-	clear_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
-}
-
-static void set_tx_flow_on(struct caifsock *cf_sk)
-{
-	set_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
-}
-
-static void caif_read_lock(struct sock *sk)
-{
-	struct caifsock *cf_sk;
-	cf_sk = container_of(sk, struct caifsock, sk);
-	mutex_lock(&cf_sk->readlock);
-}
-
-static void caif_read_unlock(struct sock *sk)
-{
-	struct caifsock *cf_sk;
-	cf_sk = container_of(sk, struct caifsock, sk);
-	mutex_unlock(&cf_sk->readlock);
-}
-
-static int sk_rcvbuf_lowwater(struct caifsock *cf_sk)
-{
-	/* A quarter of full buffer is used a low water mark */
-	return cf_sk->sk.sk_rcvbuf / 4;
-}
-
-static void caif_flow_ctrl(struct sock *sk, int mode)
-{
-	struct caifsock *cf_sk;
-	cf_sk = container_of(sk, struct caifsock, sk);
-	if (cf_sk->layer.dn && cf_sk->layer.dn->modemcmd)
-		cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, mode);
-}
-
-/*
- * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are
- * not dropped, but CAIF is sending flow off instead.
- */
-static void caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
-{
-	int err;
-	unsigned long flags;
-	struct sk_buff_head *list = &sk->sk_receive_queue;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	bool queued = false;
-
-	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-		(unsigned int)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) {
-		net_dbg_ratelimited("sending flow OFF (queue len = %d %d)\n",
-				    atomic_read(&cf_sk->sk.sk_rmem_alloc),
-				    sk_rcvbuf_lowwater(cf_sk));
-		set_rx_flow_off(cf_sk);
-		caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
-	}
-
-	err = sk_filter(sk, skb);
-	if (err)
-		goto out;
-
-	if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) {
-		set_rx_flow_off(cf_sk);
-		net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n");
-		caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
-	}
-	skb->dev = NULL;
-	skb_set_owner_r(skb, sk);
-	spin_lock_irqsave(&list->lock, flags);
-	queued = !sock_flag(sk, SOCK_DEAD);
-	if (queued)
-		__skb_queue_tail(list, skb);
-	spin_unlock_irqrestore(&list->lock, flags);
-out:
-	if (queued)
-		sk->sk_data_ready(sk);
-	else
-		kfree_skb(skb);
-}
-
-/* Packet Receive Callback function called from CAIF Stack */
-static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt)
-{
-	struct caifsock *cf_sk;
-	struct sk_buff *skb;
-
-	cf_sk = container_of(layr, struct caifsock, layer);
-	skb = cfpkt_tonative(pkt);
-
-	if (unlikely(cf_sk->sk.sk_state != CAIF_CONNECTED)) {
-		kfree_skb(skb);
-		return 0;
-	}
-	caif_queue_rcv_skb(&cf_sk->sk, skb);
-	return 0;
-}
-
-static void cfsk_hold(struct cflayer *layr)
-{
-	struct caifsock *cf_sk = container_of(layr, struct caifsock, layer);
-	sock_hold(&cf_sk->sk);
-}
-
-static void cfsk_put(struct cflayer *layr)
-{
-	struct caifsock *cf_sk = container_of(layr, struct caifsock, layer);
-	sock_put(&cf_sk->sk);
-}
-
-/* Packet Control Callback function called from CAIF */
-static void caif_ctrl_cb(struct cflayer *layr,
-			 enum caif_ctrlcmd flow,
-			 int phyid)
-{
-	struct caifsock *cf_sk = container_of(layr, struct caifsock, layer);
-	switch (flow) {
-	case CAIF_CTRLCMD_FLOW_ON_IND:
-		/* OK from modem to start sending again */
-		set_tx_flow_on(cf_sk);
-		cf_sk->sk.sk_state_change(&cf_sk->sk);
-		break;
-
-	case CAIF_CTRLCMD_FLOW_OFF_IND:
-		/* Modem asks us to shut up */
-		set_tx_flow_off(cf_sk);
-		cf_sk->sk.sk_state_change(&cf_sk->sk);
-		break;
-
-	case CAIF_CTRLCMD_INIT_RSP:
-		/* We're now connected */
-		caif_client_register_refcnt(&cf_sk->layer,
-						cfsk_hold, cfsk_put);
-		cf_sk->sk.sk_state = CAIF_CONNECTED;
-		set_tx_flow_on(cf_sk);
-		cf_sk->sk.sk_shutdown = 0;
-		cf_sk->sk.sk_state_change(&cf_sk->sk);
-		break;
-
-	case CAIF_CTRLCMD_DEINIT_RSP:
-		/* We're now disconnected */
-		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
-		cf_sk->sk.sk_state_change(&cf_sk->sk);
-		break;
-
-	case CAIF_CTRLCMD_INIT_FAIL_RSP:
-		/* Connect request failed */
-		cf_sk->sk.sk_err = ECONNREFUSED;
-		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
-		cf_sk->sk.sk_shutdown = SHUTDOWN_MASK;
-		/*
-		 * Socket "standards" seems to require POLLOUT to
-		 * be set at connect failure.
-		 */
-		set_tx_flow_on(cf_sk);
-		cf_sk->sk.sk_state_change(&cf_sk->sk);
-		break;
-
-	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
-		/* Modem has closed this connection, or device is down. */
-		cf_sk->sk.sk_shutdown = SHUTDOWN_MASK;
-		cf_sk->sk.sk_err = ECONNRESET;
-		set_rx_flow_on(cf_sk);
-		sk_error_report(&cf_sk->sk);
-		break;
-
-	default:
-		pr_debug("Unexpected flow command %d\n", flow);
-	}
-}
-
-static void caif_check_flow_release(struct sock *sk)
-{
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-
-	if (rx_flow_is_on(cf_sk))
-		return;
-
-	if (atomic_read(&sk->sk_rmem_alloc) <= sk_rcvbuf_lowwater(cf_sk)) {
-			set_rx_flow_on(cf_sk);
-			caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_ON_REQ);
-	}
-}
-
-/*
- * Copied from unix_dgram_recvmsg, but removed credit checks,
- * changed locking, address handling and added MSG_TRUNC.
- */
-static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m,
-			       size_t len, int flags)
-
-{
-	struct sock *sk = sock->sk;
-	struct sk_buff *skb;
-	int ret;
-	int copylen;
-
-	ret = -EOPNOTSUPP;
-	if (flags & MSG_OOB)
-		goto read_error;
-
-	skb = skb_recv_datagram(sk, flags, &ret);
-	if (!skb)
-		goto read_error;
-	copylen = skb->len;
-	if (len < copylen) {
-		m->msg_flags |= MSG_TRUNC;
-		copylen = len;
-	}
-
-	ret = skb_copy_datagram_msg(skb, 0, m, copylen);
-	if (ret)
-		goto out_free;
-
-	ret = (flags & MSG_TRUNC) ? skb->len : copylen;
-out_free:
-	skb_free_datagram(sk, skb);
-	caif_check_flow_release(sk);
-	return ret;
-
-read_error:
-	return ret;
-}
-
-
-/* Copied from unix_stream_wait_data, identical except for lock call. */
-static long caif_stream_data_wait(struct sock *sk, long timeo)
-{
-	DEFINE_WAIT(wait);
-	lock_sock(sk);
-
-	for (;;) {
-		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-
-		if (!skb_queue_empty(&sk->sk_receive_queue) ||
-			sk->sk_err ||
-			sk->sk_state != CAIF_CONNECTED ||
-			sock_flag(sk, SOCK_DEAD) ||
-			(sk->sk_shutdown & RCV_SHUTDOWN) ||
-			signal_pending(current) ||
-			!timeo)
-			break;
-
-		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-		release_sock(sk);
-		timeo = schedule_timeout(timeo);
-		lock_sock(sk);
-
-		if (sock_flag(sk, SOCK_DEAD))
-			break;
-
-		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-	}
-
-	finish_wait(sk_sleep(sk), &wait);
-	release_sock(sk);
-	return timeo;
-}
-
-
-/*
- * Copied from unix_stream_recvmsg, but removed credit checks,
- * changed locking calls, changed address handling.
- */
-static int caif_stream_recvmsg(struct socket *sock, struct msghdr *msg,
-			       size_t size, int flags)
-{
-	struct sock *sk = sock->sk;
-	int copied = 0;
-	int target;
-	int err = 0;
-	long timeo;
-
-	err = -EOPNOTSUPP;
-	if (flags&MSG_OOB)
-		goto out;
-
-	/*
-	 * Lock the socket to prevent queue disordering
-	 * while sleeps in memcpy_tomsg
-	 */
-	err = -EAGAIN;
-	if (sk->sk_state == CAIF_CONNECTING)
-		goto out;
-
-	caif_read_lock(sk);
-	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
-	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
-
-	do {
-		int chunk;
-		struct sk_buff *skb;
-
-		lock_sock(sk);
-		if (sock_flag(sk, SOCK_DEAD)) {
-			err = -ECONNRESET;
-			goto unlock;
-		}
-		skb = skb_dequeue(&sk->sk_receive_queue);
-		caif_check_flow_release(sk);
-
-		if (skb == NULL) {
-			if (copied >= target)
-				goto unlock;
-			/*
-			 *	POSIX 1003.1g mandates this order.
-			 */
-			err = sock_error(sk);
-			if (err)
-				goto unlock;
-			err = -ECONNRESET;
-			if (sk->sk_shutdown & RCV_SHUTDOWN)
-				goto unlock;
-
-			err = -EPIPE;
-			if (sk->sk_state != CAIF_CONNECTED)
-				goto unlock;
-			if (sock_flag(sk, SOCK_DEAD))
-				goto unlock;
-
-			release_sock(sk);
-
-			err = -EAGAIN;
-			if (!timeo)
-				break;
-
-			caif_read_unlock(sk);
-
-			timeo = caif_stream_data_wait(sk, timeo);
-
-			if (signal_pending(current)) {
-				err = sock_intr_errno(timeo);
-				goto out;
-			}
-			caif_read_lock(sk);
-			continue;
-unlock:
-			release_sock(sk);
-			break;
-		}
-		release_sock(sk);
-		chunk = min_t(unsigned int, skb->len, size);
-		if (memcpy_to_msg(msg, skb->data, chunk)) {
-			skb_queue_head(&sk->sk_receive_queue, skb);
-			if (copied == 0)
-				copied = -EFAULT;
-			break;
-		}
-		copied += chunk;
-		size -= chunk;
-
-		/* Mark read part of skb as used */
-		if (!(flags & MSG_PEEK)) {
-			skb_pull(skb, chunk);
-
-			/* put the skb back if we didn't use it up. */
-			if (skb->len) {
-				skb_queue_head(&sk->sk_receive_queue, skb);
-				break;
-			}
-			kfree_skb(skb);
-
-		} else {
-			/*
-			 * It is questionable, see note in unix_dgram_recvmsg.
-			 */
-			/* put message back and return */
-			skb_queue_head(&sk->sk_receive_queue, skb);
-			break;
-		}
-	} while (size);
-	caif_read_unlock(sk);
-
-out:
-	return copied ? : err;
-}
-
-/*
- * Copied from sock.c:sock_wait_for_wmem, but change to wait for
- * CAIF flow-on and sock_writable.
- */
-static long caif_wait_for_flow_on(struct caifsock *cf_sk,
-				  int wait_writeable, long timeo, int *err)
-{
-	struct sock *sk = &cf_sk->sk;
-	DEFINE_WAIT(wait);
-	for (;;) {
-		*err = 0;
-		if (tx_flow_is_on(cf_sk) &&
-			(!wait_writeable || sock_writeable(&cf_sk->sk)))
-			break;
-		*err = -ETIMEDOUT;
-		if (!timeo)
-			break;
-		*err = -ERESTARTSYS;
-		if (signal_pending(current))
-			break;
-		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-		*err = -ECONNRESET;
-		if (sk->sk_shutdown & SHUTDOWN_MASK)
-			break;
-		*err = -sk->sk_err;
-		if (sk->sk_err)
-			break;
-		*err = -EPIPE;
-		if (cf_sk->sk.sk_state != CAIF_CONNECTED)
-			break;
-		timeo = schedule_timeout(timeo);
-	}
-	finish_wait(sk_sleep(sk), &wait);
-	return timeo;
-}
-
-/*
- * Transmit a SKB. The device may temporarily request re-transmission
- * by returning EAGAIN.
- */
-static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
-			int noblock, long timeo)
-{
-	struct cfpkt *pkt;
-
-	pkt = cfpkt_fromnative(CAIF_DIR_OUT, skb);
-	memset(skb->cb, 0, sizeof(struct caif_payload_info));
-	cfpkt_set_prio(pkt, cf_sk->sk.sk_priority);
-
-	if (cf_sk->layer.dn == NULL) {
-		kfree_skb(skb);
-		return -EINVAL;
-	}
-
-	return cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt);
-}
-
-/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */
-static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg,
-			       size_t len)
-{
-	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	int buffer_size;
-	int ret = 0;
-	struct sk_buff *skb = NULL;
-	int noblock;
-	long timeo;
-	caif_assert(cf_sk);
-	ret = sock_error(sk);
-	if (ret)
-		goto err;
-
-	ret = -EOPNOTSUPP;
-	if (msg->msg_flags&MSG_OOB)
-		goto err;
-
-	ret = -EOPNOTSUPP;
-	if (msg->msg_namelen)
-		goto err;
-
-	noblock = msg->msg_flags & MSG_DONTWAIT;
-
-	timeo = sock_sndtimeo(sk, noblock);
-	timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk),
-				1, timeo, &ret);
-
-	if (ret)
-		goto err;
-	ret = -EPIPE;
-	if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
-		sock_flag(sk, SOCK_DEAD) ||
-		(sk->sk_shutdown & RCV_SHUTDOWN))
-		goto err;
-
-	/* Error if trying to write more than maximum frame size. */
-	ret = -EMSGSIZE;
-	if (len > cf_sk->maxframe && cf_sk->sk.sk_protocol != CAIFPROTO_RFM)
-		goto err;
-
-	buffer_size = len + cf_sk->headroom + cf_sk->tailroom;
-
-	ret = -ENOMEM;
-	skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret);
-
-	if (!skb || skb_tailroom(skb) < buffer_size)
-		goto err;
-
-	skb_reserve(skb, cf_sk->headroom);
-
-	ret = memcpy_from_msg(skb_put(skb, len), msg, len);
-
-	if (ret)
-		goto err;
-	ret = transmit_skb(skb, cf_sk, noblock, timeo);
-	if (ret < 0)
-		/* skb is already freed */
-		return ret;
-
-	return len;
-err:
-	kfree_skb(skb);
-	return ret;
-}
-
-/*
- * Copied from unix_stream_sendmsg and adapted to CAIF:
- * Changed removed permission handling and added waiting for flow on
- * and other minor adaptations.
- */
-static int caif_stream_sendmsg(struct socket *sock, struct msghdr *msg,
-			       size_t len)
-{
-	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	int err, size;
-	struct sk_buff *skb;
-	int sent = 0;
-	long timeo;
-
-	err = -EOPNOTSUPP;
-	if (unlikely(msg->msg_flags&MSG_OOB))
-		goto out_err;
-
-	if (unlikely(msg->msg_namelen))
-		goto out_err;
-
-	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
-	timeo = caif_wait_for_flow_on(cf_sk, 1, timeo, &err);
-
-	if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN))
-		goto pipe_err;
-
-	while (sent < len) {
-
-		size = len-sent;
-
-		if (size > cf_sk->maxframe)
-			size = cf_sk->maxframe;
-
-		/* If size is more than half of sndbuf, chop up message */
-		if (size > ((sk->sk_sndbuf >> 1) - 64))
-			size = (sk->sk_sndbuf >> 1) - 64;
-
-		if (size > SKB_MAX_ALLOC)
-			size = SKB_MAX_ALLOC;
-
-		skb = sock_alloc_send_skb(sk,
-					size + cf_sk->headroom +
-					cf_sk->tailroom,
-					msg->msg_flags&MSG_DONTWAIT,
-					&err);
-		if (skb == NULL)
-			goto out_err;
-
-		skb_reserve(skb, cf_sk->headroom);
-		/*
-		 *	If you pass two values to the sock_alloc_send_skb
-		 *	it tries to grab the large buffer with GFP_NOFS
-		 *	(which can fail easily), and if it fails grab the
-		 *	fallback size buffer which is under a page and will
-		 *	succeed. [Alan]
-		 */
-		size = min_t(int, size, skb_tailroom(skb));
-
-		err = memcpy_from_msg(skb_put(skb, size), msg, size);
-		if (err) {
-			kfree_skb(skb);
-			goto out_err;
-		}
-		err = transmit_skb(skb, cf_sk,
-				msg->msg_flags&MSG_DONTWAIT, timeo);
-		if (err < 0)
-			/* skb is already freed */
-			goto pipe_err;
-
-		sent += size;
-	}
-
-	return sent;
-
-pipe_err:
-	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
-		send_sig(SIGPIPE, current, 0);
-	err = -EPIPE;
-out_err:
-	return sent ? : err;
-}
-
-static int setsockopt(struct socket *sock, int lvl, int opt, sockptr_t ov,
-		unsigned int ol)
-{
-	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	int linksel;
-
-	if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED)
-		return -ENOPROTOOPT;
-
-	switch (opt) {
-	case CAIFSO_LINK_SELECT:
-		if (ol < sizeof(int))
-			return -EINVAL;
-		if (lvl != SOL_CAIF)
-			goto bad_sol;
-		if (copy_from_sockptr(&linksel, ov, sizeof(int)))
-			return -EINVAL;
-		lock_sock(&(cf_sk->sk));
-		cf_sk->conn_req.link_selector = linksel;
-		release_sock(&cf_sk->sk);
-		return 0;
-
-	case CAIFSO_REQ_PARAM:
-		if (lvl != SOL_CAIF)
-			goto bad_sol;
-		if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL)
-			return -ENOPROTOOPT;
-		lock_sock(&(cf_sk->sk));
-		if (ol > sizeof(cf_sk->conn_req.param.data) ||
-		    copy_from_sockptr(&cf_sk->conn_req.param.data, ov, ol)) {
-			release_sock(&cf_sk->sk);
-			return -EINVAL;
-		}
-		cf_sk->conn_req.param.size = ol;
-		release_sock(&cf_sk->sk);
-		return 0;
-
-	default:
-		return -ENOPROTOOPT;
-	}
-
-	return 0;
-bad_sol:
-	return -ENOPROTOOPT;
-
-}
-
-/*
- * caif_connect() - Connect a CAIF Socket
- * Copied and modified af_irda.c:irda_connect().
- *
- * Note : by consulting "errno", the user space caller may learn the cause
- * of the failure. Most of them are visible in the function, others may come
- * from subroutines called and are listed here :
- *  o -EAFNOSUPPORT: bad socket family or type.
- *  o -ESOCKTNOSUPPORT: bad socket type or protocol
- *  o -EINVAL: bad socket address, or CAIF link type
- *  o -ECONNREFUSED: remote end refused the connection.
- *  o -EINPROGRESS: connect request sent but timed out (or non-blocking)
- *  o -EISCONN: already connected.
- *  o -ETIMEDOUT: Connection timed out (send timeout)
- *  o -ENODEV: No link layer to send request
- *  o -ECONNRESET: Received Shutdown indication or lost link layer
- *  o -ENOMEM: Out of memory
- *
- *  State Strategy:
- *  o sk_state: holds the CAIF_* protocol state, it's updated by
- *	caif_ctrl_cb.
- *  o sock->state: holds the SS_* socket state and is updated by connect and
- *	disconnect.
- */
-static int caif_connect(struct socket *sock, struct sockaddr_unsized *uaddr,
-			int addr_len, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	long timeo;
-	int err;
-	int ifindex, headroom, tailroom;
-	unsigned int mtu;
-	struct net_device *dev;
-
-	lock_sock(sk);
-
-	err = -EINVAL;
-	if (addr_len < offsetofend(struct sockaddr, sa_family))
-		goto out;
-
-	err = -EAFNOSUPPORT;
-	if (uaddr->sa_family != AF_CAIF)
-		goto out;
-
-	switch (sock->state) {
-	case SS_UNCONNECTED:
-		/* Normal case, a fresh connect */
-		caif_assert(sk->sk_state == CAIF_DISCONNECTED);
-		break;
-	case SS_CONNECTING:
-		switch (sk->sk_state) {
-		case CAIF_CONNECTED:
-			sock->state = SS_CONNECTED;
-			err = -EISCONN;
-			goto out;
-		case CAIF_DISCONNECTED:
-			/* Reconnect allowed */
-			break;
-		case CAIF_CONNECTING:
-			err = -EALREADY;
-			if (flags & O_NONBLOCK)
-				goto out;
-			goto wait_connect;
-		}
-		break;
-	case SS_CONNECTED:
-		caif_assert(sk->sk_state == CAIF_CONNECTED ||
-				sk->sk_state == CAIF_DISCONNECTED);
-		if (sk->sk_shutdown & SHUTDOWN_MASK) {
-			/* Allow re-connect after SHUTDOWN_IND */
-			caif_disconnect_client(sock_net(sk), &cf_sk->layer);
-			caif_free_client(&cf_sk->layer);
-			break;
-		}
-		/* No reconnect on a seqpacket socket */
-		err = -EISCONN;
-		goto out;
-	case SS_DISCONNECTING:
-	case SS_FREE:
-		caif_assert(1); /*Should never happen */
-		break;
-	}
-	sk->sk_state = CAIF_DISCONNECTED;
-	sock->state = SS_UNCONNECTED;
-	sk_stream_kill_queues(&cf_sk->sk);
-
-	err = -EINVAL;
-	if (addr_len != sizeof(struct sockaddr_caif))
-		goto out;
-
-	memcpy(&cf_sk->conn_req.sockaddr, uaddr,
-		sizeof(struct sockaddr_caif));
-
-	/* Move to connecting socket, start sending Connect Requests */
-	sock->state = SS_CONNECTING;
-	sk->sk_state = CAIF_CONNECTING;
-
-	/* Check priority value comming from socket */
-	/* if priority value is out of range it will be ajusted */
-	if (cf_sk->sk.sk_priority > CAIF_PRIO_MAX)
-		cf_sk->conn_req.priority = CAIF_PRIO_MAX;
-	else if (cf_sk->sk.sk_priority < CAIF_PRIO_MIN)
-		cf_sk->conn_req.priority = CAIF_PRIO_MIN;
-	else
-		cf_sk->conn_req.priority = cf_sk->sk.sk_priority;
-
-	/*ifindex = id of the interface.*/
-	cf_sk->conn_req.ifindex = cf_sk->sk.sk_bound_dev_if;
-
-	cf_sk->layer.receive = caif_sktrecv_cb;
-
-	err = caif_connect_client(sock_net(sk), &cf_sk->conn_req,
-				&cf_sk->layer, &ifindex, &headroom, &tailroom);
-
-	if (err < 0) {
-		cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
-		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
-		goto out;
-	}
-
-	err = -ENODEV;
-	rcu_read_lock();
-	dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
-	if (!dev) {
-		rcu_read_unlock();
-		goto out;
-	}
-	cf_sk->headroom = LL_RESERVED_SPACE_EXTRA(dev, headroom);
-	mtu = dev->mtu;
-	rcu_read_unlock();
-
-	cf_sk->tailroom = tailroom;
-	cf_sk->maxframe = mtu - (headroom + tailroom);
-	if (cf_sk->maxframe < 1) {
-		pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu);
-		err = -ENODEV;
-		goto out;
-	}
-
-	err = -EINPROGRESS;
-wait_connect:
-
-	if (sk->sk_state != CAIF_CONNECTED && (flags & O_NONBLOCK))
-		goto out;
-
-	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
-
-	release_sock(sk);
-	err = -ERESTARTSYS;
-	timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
-			sk->sk_state != CAIF_CONNECTING,
-			timeo);
-	lock_sock(sk);
-	if (timeo < 0)
-		goto out; /* -ERESTARTSYS */
-
-	err = -ETIMEDOUT;
-	if (timeo == 0 && sk->sk_state != CAIF_CONNECTED)
-		goto out;
-	if (sk->sk_state != CAIF_CONNECTED) {
-		sock->state = SS_UNCONNECTED;
-		err = sock_error(sk);
-		if (!err)
-			err = -ECONNREFUSED;
-		goto out;
-	}
-	sock->state = SS_CONNECTED;
-	err = 0;
-out:
-	release_sock(sk);
-	return err;
-}
-
-/*
- * caif_release() - Disconnect a CAIF Socket
- * Copied and modified af_irda.c:irda_release().
- */
-static int caif_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-
-	if (!sk)
-		return 0;
-
-	set_tx_flow_off(cf_sk);
-
-	/*
-	 * Ensure that packets are not queued after this point in time.
-	 * caif_queue_rcv_skb checks SOCK_DEAD holding the queue lock,
-	 * this ensures no packets when sock is dead.
-	 */
-	spin_lock_bh(&sk->sk_receive_queue.lock);
-	sock_set_flag(sk, SOCK_DEAD);
-	spin_unlock_bh(&sk->sk_receive_queue.lock);
-	sock->sk = NULL;
-
-	WARN_ON(IS_ERR(cf_sk->debugfs_socket_dir));
-	debugfs_remove_recursive(cf_sk->debugfs_socket_dir);
-
-	lock_sock(&(cf_sk->sk));
-	sk->sk_state = CAIF_DISCONNECTED;
-	sk->sk_shutdown = SHUTDOWN_MASK;
-
-	caif_disconnect_client(sock_net(sk), &cf_sk->layer);
-	cf_sk->sk.sk_socket->state = SS_DISCONNECTING;
-	wake_up_interruptible_poll(sk_sleep(sk), EPOLLERR|EPOLLHUP);
-
-	sock_orphan(sk);
-	sk_stream_kill_queues(&cf_sk->sk);
-	release_sock(sk);
-	sock_put(sk);
-	return 0;
-}
-
-/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
-static __poll_t caif_poll(struct file *file,
-			      struct socket *sock, poll_table *wait)
-{
-	struct sock *sk = sock->sk;
-	__poll_t mask;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-
-	sock_poll_wait(file, sock, wait);
-	mask = 0;
-
-	/* exceptional events? */
-	if (sk->sk_err)
-		mask |= EPOLLERR;
-	if (sk->sk_shutdown == SHUTDOWN_MASK)
-		mask |= EPOLLHUP;
-	if (sk->sk_shutdown & RCV_SHUTDOWN)
-		mask |= EPOLLRDHUP;
-
-	/* readable? */
-	if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
-		(sk->sk_shutdown & RCV_SHUTDOWN))
-		mask |= EPOLLIN | EPOLLRDNORM;
-
-	/*
-	 * we set writable also when the other side has shut down the
-	 * connection. This prevents stuck sockets.
-	 */
-	if (sock_writeable(sk) && tx_flow_is_on(cf_sk))
-		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
-
-	return mask;
-}
-
-static const struct proto_ops caif_seqpacket_ops = {
-	.family = PF_CAIF,
-	.owner = THIS_MODULE,
-	.release = caif_release,
-	.bind = sock_no_bind,
-	.connect = caif_connect,
-	.socketpair = sock_no_socketpair,
-	.accept = sock_no_accept,
-	.getname = sock_no_getname,
-	.poll = caif_poll,
-	.ioctl = sock_no_ioctl,
-	.listen = sock_no_listen,
-	.shutdown = sock_no_shutdown,
-	.setsockopt = setsockopt,
-	.sendmsg = caif_seqpkt_sendmsg,
-	.recvmsg = caif_seqpkt_recvmsg,
-	.mmap = sock_no_mmap,
-};
-
-static const struct proto_ops caif_stream_ops = {
-	.family = PF_CAIF,
-	.owner = THIS_MODULE,
-	.release = caif_release,
-	.bind = sock_no_bind,
-	.connect = caif_connect,
-	.socketpair = sock_no_socketpair,
-	.accept = sock_no_accept,
-	.getname = sock_no_getname,
-	.poll = caif_poll,
-	.ioctl = sock_no_ioctl,
-	.listen = sock_no_listen,
-	.shutdown = sock_no_shutdown,
-	.setsockopt = setsockopt,
-	.sendmsg = caif_stream_sendmsg,
-	.recvmsg = caif_stream_recvmsg,
-	.mmap = sock_no_mmap,
-};
-
-/* This function is called when a socket is finally destroyed. */
-static void caif_sock_destructor(struct sock *sk)
-{
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	caif_assert(!refcount_read(&sk->sk_wmem_alloc));
-	caif_assert(sk_unhashed(sk));
-	caif_assert(!sk->sk_socket);
-	if (!sock_flag(sk, SOCK_DEAD)) {
-		pr_debug("Attempt to release alive CAIF socket: %p\n", sk);
-		return;
-	}
-	sk_stream_kill_queues(&cf_sk->sk);
-	WARN_ON_ONCE(sk->sk_forward_alloc);
-	caif_free_client(&cf_sk->layer);
-}
-
-static int caif_create(struct net *net, struct socket *sock, int protocol,
-		       int kern)
-{
-	struct sock *sk = NULL;
-	struct caifsock *cf_sk = NULL;
-	static struct proto prot = {.name = "PF_CAIF",
-		.owner = THIS_MODULE,
-		.obj_size = sizeof(struct caifsock),
-		.useroffset = offsetof(struct caifsock, conn_req.param),
-		.usersize = sizeof_field(struct caifsock, conn_req.param)
-	};
-
-	if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN))
-		return -EPERM;
-	/*
-	 * The sock->type specifies the socket type to use.
-	 * The CAIF socket is a packet stream in the sense
-	 * that it is packet based. CAIF trusts the reliability
-	 * of the link, no resending is implemented.
-	 */
-	if (sock->type == SOCK_SEQPACKET)
-		sock->ops = &caif_seqpacket_ops;
-	else if (sock->type == SOCK_STREAM)
-		sock->ops = &caif_stream_ops;
-	else
-		return -ESOCKTNOSUPPORT;
-
-	if (protocol < 0 || protocol >= CAIFPROTO_MAX)
-		return -EPROTONOSUPPORT;
-	/*
-	 * Set the socket state to unconnected.	 The socket state
-	 * is really not used at all in the net/core or socket.c but the
-	 * initialization makes sure that sock->state is not uninitialized.
-	 */
-	sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot, kern);
-	if (!sk)
-		return -ENOMEM;
-
-	cf_sk = container_of(sk, struct caifsock, sk);
-
-	/* Store the protocol */
-	sk->sk_protocol = (unsigned char) protocol;
-
-	/* Initialize default priority for well-known cases */
-	switch (protocol) {
-	case CAIFPROTO_AT:
-		sk->sk_priority = TC_PRIO_CONTROL;
-		break;
-	case CAIFPROTO_RFM:
-		sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
-		break;
-	default:
-		sk->sk_priority = TC_PRIO_BESTEFFORT;
-	}
-
-	/*
-	 * Lock in order to try to stop someone from opening the socket
-	 * too early.
-	 */
-	lock_sock(&(cf_sk->sk));
-
-	/* Initialize the nozero default sock structure data. */
-	sock_init_data(sock, sk);
-	sk->sk_destruct = caif_sock_destructor;
-
-	mutex_init(&cf_sk->readlock); /* single task reading lock */
-	cf_sk->layer.ctrlcmd = caif_ctrl_cb;
-	cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
-	cf_sk->sk.sk_state = CAIF_DISCONNECTED;
-
-	set_tx_flow_off(cf_sk);
-	set_rx_flow_on(cf_sk);
-
-	/* Set default options on configuration */
-	cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY;
-	cf_sk->conn_req.protocol = protocol;
-	release_sock(&cf_sk->sk);
-	return 0;
-}
-
-
-static const struct net_proto_family caif_family_ops = {
-	.family = PF_CAIF,
-	.create = caif_create,
-	.owner = THIS_MODULE,
-};
-
-static int __init caif_sktinit_module(void)
-{
-	return sock_register(&caif_family_ops);
-}
-
-static void __exit caif_sktexit_module(void)
-{
-	sock_unregister(PF_CAIF);
-}
-module_init(caif_sktinit_module);
-module_exit(caif_sktexit_module);
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
deleted file mode 100644
index 4d44960d4c2f..000000000000
--- a/net/caif/caif_usb.c
+++ /dev/null
@@ -1,216 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * CAIF USB handler
- * Copyright (C) ST-Ericsson AB 2011
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/slab.h>
-#include <linux/mii.h>
-#include <linux/usb.h>
-#include <linux/usb/usbnet.h>
-#include <linux/etherdevice.h>
-#include <net/netns/generic.h>
-#include <net/caif/caif_dev.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfpkt.h>
-#include <net/caif/cfcnfg.h>
-
-MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol USB support");
-MODULE_LICENSE("GPL");
-
-#define CFUSB_PAD_DESCR_SZ 1	/* Alignment descriptor length */
-#define CFUSB_ALIGNMENT 4	/* Number of bytes to align. */
-#define CFUSB_MAX_HEADLEN (CFUSB_PAD_DESCR_SZ + CFUSB_ALIGNMENT-1)
-#define STE_USB_VID 0x04cc	/* USB Product ID for ST-Ericsson */
-#define STE_USB_PID_CAIF 0x230f	/* Product id for CAIF Modems */
-
-struct cfusbl {
-	struct cflayer layer;
-	u8 tx_eth_hdr[ETH_HLEN];
-};
-
-static bool pack_added;
-
-static int cfusbl_receive(struct cflayer *layr, struct cfpkt *pkt)
-{
-	u8 hpad;
-
-	/* Remove padding. */
-	cfpkt_extr_head(pkt, &hpad, 1);
-	cfpkt_extr_head(pkt, NULL, hpad);
-	return layr->up->receive(layr->up, pkt);
-}
-
-static int cfusbl_transmit(struct cflayer *layr, struct cfpkt *pkt)
-{
-	struct caif_payload_info *info;
-	u8 hpad;
-	u8 zeros[CFUSB_ALIGNMENT];
-	struct sk_buff *skb;
-	struct cfusbl *usbl = container_of(layr, struct cfusbl, layer);
-
-	skb = cfpkt_tonative(pkt);
-
-	skb_reset_network_header(skb);
-	skb->protocol = htons(ETH_P_IP);
-
-	info = cfpkt_info(pkt);
-	hpad = (info->hdr_len + CFUSB_PAD_DESCR_SZ) & (CFUSB_ALIGNMENT - 1);
-
-	if (skb_headroom(skb) < ETH_HLEN + CFUSB_PAD_DESCR_SZ + hpad) {
-		pr_warn("Headroom too small\n");
-		kfree_skb(skb);
-		return -EIO;
-	}
-	memset(zeros, 0, hpad);
-
-	cfpkt_add_head(pkt, zeros, hpad);
-	cfpkt_add_head(pkt, &hpad, 1);
-	cfpkt_add_head(pkt, usbl->tx_eth_hdr, sizeof(usbl->tx_eth_hdr));
-	return layr->dn->transmit(layr->dn, pkt);
-}
-
-static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-			   int phyid)
-{
-	if (layr->up && layr->up->ctrlcmd)
-		layr->up->ctrlcmd(layr->up, ctrl, layr->id);
-}
-
-static struct cflayer *cfusbl_create(int phyid, const u8 ethaddr[ETH_ALEN],
-				      u8 braddr[ETH_ALEN])
-{
-	struct cfusbl *this = kmalloc_obj(struct cfusbl, GFP_ATOMIC);
-
-	if (!this)
-		return NULL;
-
-	caif_assert(offsetof(struct cfusbl, layer) == 0);
-
-	memset(&this->layer, 0, sizeof(this->layer));
-	this->layer.receive = cfusbl_receive;
-	this->layer.transmit = cfusbl_transmit;
-	this->layer.ctrlcmd = cfusbl_ctrlcmd;
-	snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "usb%d", phyid);
-	this->layer.id = phyid;
-
-	/*
-	 * Construct TX ethernet header:
-	 *	0-5	destination address
-	 *	5-11	source address
-	 *	12-13	protocol type
-	 */
-	ether_addr_copy(&this->tx_eth_hdr[ETH_ALEN], braddr);
-	ether_addr_copy(&this->tx_eth_hdr[ETH_ALEN], ethaddr);
-	this->tx_eth_hdr[12] = cpu_to_be16(ETH_P_802_EX1) & 0xff;
-	this->tx_eth_hdr[13] = (cpu_to_be16(ETH_P_802_EX1) >> 8) & 0xff;
-	pr_debug("caif ethernet TX-header dst:%pM src:%pM type:%02x%02x\n",
-			this->tx_eth_hdr, this->tx_eth_hdr + ETH_ALEN,
-			this->tx_eth_hdr[12], this->tx_eth_hdr[13]);
-
-	return (struct cflayer *) this;
-}
-
-static void cfusbl_release(struct cflayer *layer)
-{
-	kfree(layer);
-}
-
-static struct packet_type caif_usb_type __read_mostly = {
-	.type = cpu_to_be16(ETH_P_802_EX1),
-};
-
-static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
-				void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct caif_dev_common common;
-	struct cflayer *layer, *link_support;
-	struct usbnet *usbnet;
-	struct usb_device *usbdev;
-	int res;
-
-	if (what == NETDEV_UNREGISTER && dev->reg_state >= NETREG_UNREGISTERED)
-		return 0;
-
-	/* Check whether we have a NCM device, and find its VID/PID. */
-	if (!(dev->dev.parent && dev->dev.parent->driver &&
-	      strcmp(dev->dev.parent->driver->name, "cdc_ncm") == 0))
-		return 0;
-
-	usbnet = netdev_priv(dev);
-	usbdev = usbnet->udev;
-
-	pr_debug("USB CDC NCM device VID:0x%4x PID:0x%4x\n",
-		le16_to_cpu(usbdev->descriptor.idVendor),
-		le16_to_cpu(usbdev->descriptor.idProduct));
-
-	/* Check for VID/PID that supports CAIF */
-	if (!(le16_to_cpu(usbdev->descriptor.idVendor) == STE_USB_VID &&
-		le16_to_cpu(usbdev->descriptor.idProduct) == STE_USB_PID_CAIF))
-		return 0;
-
-	if (what == NETDEV_UNREGISTER)
-		module_put(THIS_MODULE);
-
-	if (what != NETDEV_REGISTER)
-		return 0;
-
-	__module_get(THIS_MODULE);
-
-	memset(&common, 0, sizeof(common));
-	common.use_frag = false;
-	common.use_fcs = false;
-	common.use_stx = false;
-	common.link_select = CAIF_LINK_HIGH_BANDW;
-	common.flowctrl = NULL;
-
-	link_support = cfusbl_create(dev->ifindex, dev->dev_addr,
-					dev->broadcast);
-
-	if (!link_support)
-		return -ENOMEM;
-
-	if (dev->num_tx_queues > 1)
-		pr_warn("USB device uses more than one tx queue\n");
-
-	res = caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN,
-			&layer, &caif_usb_type.func);
-	if (res)
-		goto err;
-
-	if (!pack_added)
-		dev_add_pack(&caif_usb_type);
-	pack_added = true;
-
-	strscpy(layer->name, dev->name, sizeof(layer->name));
-
-	return 0;
-err:
-	cfusbl_release(link_support);
-	return res;
-}
-
-static struct notifier_block caif_device_notifier = {
-	.notifier_call = cfusbl_device_notify,
-	.priority = 0,
-};
-
-static int __init cfusbl_init(void)
-{
-	return register_netdevice_notifier(&caif_device_notifier);
-}
-
-static void __exit cfusbl_exit(void)
-{
-	unregister_netdevice_notifier(&caif_device_notifier);
-	dev_remove_pack(&caif_usb_type);
-}
-
-module_init(cfusbl_init);
-module_exit(cfusbl_exit);
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
deleted file mode 100644
index 8a80914783e8..000000000000
--- a/net/caif/cfcnfg.c
+++ /dev/null
@@ -1,612 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/kernel.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/netdevice.h>
-#include <linux/module.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfpkt.h>
-#include <net/caif/cfcnfg.h>
-#include <net/caif/cfctrl.h>
-#include <net/caif/cfmuxl.h>
-#include <net/caif/cffrml.h>
-#include <net/caif/cfserl.h>
-#include <net/caif/cfsrvl.h>
-#include <net/caif/caif_dev.h>
-
-#define container_obj(layr) container_of(layr, struct cfcnfg, layer)
-
-/* Information about CAIF physical interfaces held by Config Module in order
- * to manage physical interfaces
- */
-struct cfcnfg_phyinfo {
-	struct list_head node;
-	bool up;
-
-	/* Pointer to the layer below the MUX (framing layer) */
-	struct cflayer *frm_layer;
-	/* Pointer to the lowest actual physical layer */
-	struct cflayer *phy_layer;
-	/* Unique identifier of the physical interface */
-	unsigned int id;
-	/* Preference of the physical in interface */
-	enum cfcnfg_phy_preference pref;
-
-	/* Information about the physical device */
-	struct dev_info dev_info;
-
-	/* Interface index */
-	int ifindex;
-
-	/* Protocol head room added for CAIF link layer */
-	int head_room;
-
-	/* Use Start of frame checksum */
-	bool use_fcs;
-};
-
-struct cfcnfg {
-	struct cflayer layer;
-	struct cflayer *ctrl;
-	struct cflayer *mux;
-	struct list_head phys;
-	struct mutex lock;
-};
-
-static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id,
-			      enum cfctrl_srv serv, u8 phyid,
-			      struct cflayer *adapt_layer);
-static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id);
-static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
-			      struct cflayer *adapt_layer);
-static void cfctrl_resp_func(void);
-static void cfctrl_enum_resp(void);
-
-struct cfcnfg *cfcnfg_create(void)
-{
-	struct cfcnfg *this;
-	struct cfctrl_rsp *resp;
-
-	might_sleep();
-
-	/* Initiate this layer */
-	this = kzalloc_obj(struct cfcnfg, GFP_ATOMIC);
-	if (!this)
-		return NULL;
-	this->mux = cfmuxl_create();
-	if (!this->mux)
-		goto out_of_mem;
-	this->ctrl = cfctrl_create();
-	if (!this->ctrl)
-		goto out_of_mem;
-	/* Initiate response functions */
-	resp = cfctrl_get_respfuncs(this->ctrl);
-	resp->enum_rsp = cfctrl_enum_resp;
-	resp->linkerror_ind = cfctrl_resp_func;
-	resp->linkdestroy_rsp = cfcnfg_linkdestroy_rsp;
-	resp->sleep_rsp = cfctrl_resp_func;
-	resp->wake_rsp = cfctrl_resp_func;
-	resp->restart_rsp = cfctrl_resp_func;
-	resp->radioset_rsp = cfctrl_resp_func;
-	resp->linksetup_rsp = cfcnfg_linkup_rsp;
-	resp->reject_rsp = cfcnfg_reject_rsp;
-	INIT_LIST_HEAD(&this->phys);
-
-	cfmuxl_set_uplayer(this->mux, this->ctrl, 0);
-	layer_set_dn(this->ctrl, this->mux);
-	layer_set_up(this->ctrl, this);
-	mutex_init(&this->lock);
-
-	return this;
-out_of_mem:
-	synchronize_rcu();
-
-	kfree(this->mux);
-	kfree(this->ctrl);
-	kfree(this);
-	return NULL;
-}
-
-void cfcnfg_remove(struct cfcnfg *cfg)
-{
-	might_sleep();
-	if (cfg) {
-		synchronize_rcu();
-
-		kfree(cfg->mux);
-		cfctrl_remove(cfg->ctrl);
-		kfree(cfg);
-	}
-}
-
-static void cfctrl_resp_func(void)
-{
-}
-
-static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo_rcu(struct cfcnfg *cnfg,
-						     u8 phyid)
-{
-	struct cfcnfg_phyinfo *phy;
-
-	list_for_each_entry_rcu(phy, &cnfg->phys, node)
-		if (phy->id == phyid)
-			return phy;
-	return NULL;
-}
-
-static void cfctrl_enum_resp(void)
-{
-}
-
-static struct dev_info *cfcnfg_get_phyid(struct cfcnfg *cnfg,
-				  enum cfcnfg_phy_preference phy_pref)
-{
-	/* Try to match with specified preference */
-	struct cfcnfg_phyinfo *phy;
-
-	list_for_each_entry_rcu(phy, &cnfg->phys, node) {
-		if (phy->up && phy->pref == phy_pref &&
-				phy->frm_layer != NULL)
-
-			return &phy->dev_info;
-	}
-
-	/* Otherwise just return something */
-	list_for_each_entry_rcu(phy, &cnfg->phys, node)
-		if (phy->up)
-			return &phy->dev_info;
-
-	return NULL;
-}
-
-static int cfcnfg_get_id_from_ifi(struct cfcnfg *cnfg, int ifi)
-{
-	struct cfcnfg_phyinfo *phy;
-
-	list_for_each_entry_rcu(phy, &cnfg->phys, node)
-		if (phy->ifindex == ifi && phy->up)
-			return phy->id;
-	return -ENODEV;
-}
-
-int caif_disconnect_client(struct net *net, struct cflayer *adap_layer)
-{
-	u8 channel_id;
-	struct cfcnfg *cfg = get_cfcnfg(net);
-
-	caif_assert(adap_layer != NULL);
-	cfctrl_cancel_req(cfg->ctrl, adap_layer);
-	channel_id = adap_layer->id;
-	if (channel_id != 0) {
-		struct cflayer *servl;
-		servl = cfmuxl_remove_uplayer(cfg->mux, channel_id);
-		cfctrl_linkdown_req(cfg->ctrl, channel_id, adap_layer);
-		if (servl != NULL)
-			layer_set_up(servl, NULL);
-	} else
-		pr_debug("nothing to disconnect\n");
-
-	/* Do RCU sync before initiating cleanup */
-	synchronize_rcu();
-	if (adap_layer->ctrlcmd != NULL)
-		adap_layer->ctrlcmd(adap_layer, CAIF_CTRLCMD_DEINIT_RSP, 0);
-	return 0;
-
-}
-EXPORT_SYMBOL(caif_disconnect_client);
-
-static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id)
-{
-}
-
-static const int protohead[CFCTRL_SRV_MASK] = {
-	[CFCTRL_SRV_VEI] = 4,
-	[CFCTRL_SRV_DATAGRAM] = 7,
-	[CFCTRL_SRV_UTIL] = 4,
-	[CFCTRL_SRV_RFM] = 3,
-	[CFCTRL_SRV_DBG] = 3,
-};
-
-
-static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
-					  struct caif_connect_request *s,
-					  struct cfctrl_link_param *l)
-{
-	struct dev_info *dev_info;
-	enum cfcnfg_phy_preference pref;
-	int res;
-
-	memset(l, 0, sizeof(*l));
-	/* In caif protocol low value is high priority */
-	l->priority = CAIF_PRIO_MAX - s->priority + 1;
-
-	if (s->ifindex != 0) {
-		res = cfcnfg_get_id_from_ifi(cnfg, s->ifindex);
-		if (res < 0)
-			return res;
-		l->phyid = res;
-	} else {
-		switch (s->link_selector) {
-		case CAIF_LINK_HIGH_BANDW:
-			pref = CFPHYPREF_HIGH_BW;
-			break;
-		case CAIF_LINK_LOW_LATENCY:
-			pref = CFPHYPREF_LOW_LAT;
-			break;
-		default:
-			return -EINVAL;
-		}
-		dev_info = cfcnfg_get_phyid(cnfg, pref);
-		if (dev_info == NULL)
-			return -ENODEV;
-		l->phyid = dev_info->id;
-	}
-	switch (s->protocol) {
-	case CAIFPROTO_AT:
-		l->linktype = CFCTRL_SRV_VEI;
-		l->endpoint = (s->sockaddr.u.at.type >> 2) & 0x3;
-		l->chtype = s->sockaddr.u.at.type & 0x3;
-		break;
-	case CAIFPROTO_DATAGRAM:
-		l->linktype = CFCTRL_SRV_DATAGRAM;
-		l->chtype = 0x00;
-		l->u.datagram.connid = s->sockaddr.u.dgm.connection_id;
-		break;
-	case CAIFPROTO_DATAGRAM_LOOP:
-		l->linktype = CFCTRL_SRV_DATAGRAM;
-		l->chtype = 0x03;
-		l->endpoint = 0x00;
-		l->u.datagram.connid = s->sockaddr.u.dgm.connection_id;
-		break;
-	case CAIFPROTO_RFM:
-		l->linktype = CFCTRL_SRV_RFM;
-		l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
-		strscpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
-			sizeof(l->u.rfm.volume));
-		break;
-	case CAIFPROTO_UTIL:
-		l->linktype = CFCTRL_SRV_UTIL;
-		l->endpoint = 0x00;
-		l->chtype = 0x00;
-		strscpy(l->u.utility.name, s->sockaddr.u.util.service,
-			sizeof(l->u.utility.name));
-		caif_assert(sizeof(l->u.utility.name) > 10);
-		l->u.utility.paramlen = s->param.size;
-		if (l->u.utility.paramlen > sizeof(l->u.utility.params))
-			l->u.utility.paramlen = sizeof(l->u.utility.params);
-
-		memcpy(l->u.utility.params, s->param.data,
-		       l->u.utility.paramlen);
-
-		break;
-	case CAIFPROTO_DEBUG:
-		l->linktype = CFCTRL_SRV_DBG;
-		l->endpoint = s->sockaddr.u.dbg.service;
-		l->chtype = s->sockaddr.u.dbg.type;
-		break;
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-
-int caif_connect_client(struct net *net, struct caif_connect_request *conn_req,
-			struct cflayer *adap_layer, int *ifindex,
-			int *proto_head, int *proto_tail)
-{
-	struct cflayer *frml;
-	struct cfcnfg_phyinfo *phy;
-	int err;
-	struct cfctrl_link_param param;
-	struct cfcnfg *cfg = get_cfcnfg(net);
-
-	rcu_read_lock();
-	err = caif_connect_req_to_link_param(cfg, conn_req, &param);
-	if (err)
-		goto unlock;
-
-	phy = cfcnfg_get_phyinfo_rcu(cfg, param.phyid);
-	if (!phy) {
-		err = -ENODEV;
-		goto unlock;
-	}
-	err = -EINVAL;
-
-	if (adap_layer == NULL) {
-		pr_err("adap_layer is zero\n");
-		goto unlock;
-	}
-	if (adap_layer->receive == NULL) {
-		pr_err("adap_layer->receive is NULL\n");
-		goto unlock;
-	}
-	if (adap_layer->ctrlcmd == NULL) {
-		pr_err("adap_layer->ctrlcmd == NULL\n");
-		goto unlock;
-	}
-
-	err = -ENODEV;
-	frml = phy->frm_layer;
-	if (frml == NULL) {
-		pr_err("Specified PHY type does not exist!\n");
-		goto unlock;
-	}
-	caif_assert(param.phyid == phy->id);
-	caif_assert(phy->frm_layer->id ==
-		     param.phyid);
-	caif_assert(phy->phy_layer->id ==
-		     param.phyid);
-
-	*ifindex = phy->ifindex;
-	*proto_tail = 2;
-	*proto_head = protohead[param.linktype] + phy->head_room;
-
-	rcu_read_unlock();
-
-	/* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */
-	cfctrl_enum_req(cfg->ctrl, param.phyid);
-	return cfctrl_linkup_request(cfg->ctrl, &param, adap_layer);
-
-unlock:
-	rcu_read_unlock();
-	return err;
-}
-EXPORT_SYMBOL(caif_connect_client);
-
-static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
-			      struct cflayer *adapt_layer)
-{
-	if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL)
-		adapt_layer->ctrlcmd(adapt_layer,
-				     CAIF_CTRLCMD_INIT_FAIL_RSP, 0);
-}
-
-static void
-cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
-		  u8 phyid, struct cflayer *adapt_layer)
-{
-	struct cfcnfg *cnfg = container_obj(layer);
-	struct cflayer *servicel = NULL;
-	struct cfcnfg_phyinfo *phyinfo;
-	struct net_device *netdev;
-
-	if (channel_id == 0) {
-		pr_warn("received channel_id zero\n");
-		if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL)
-			adapt_layer->ctrlcmd(adapt_layer,
-						CAIF_CTRLCMD_INIT_FAIL_RSP, 0);
-		return;
-	}
-
-	rcu_read_lock();
-
-	if (adapt_layer == NULL) {
-		pr_debug("link setup response but no client exist, send linkdown back\n");
-		cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL);
-		goto unlock;
-	}
-
-	caif_assert(cnfg != NULL);
-	caif_assert(phyid != 0);
-
-	phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid);
-	if (phyinfo == NULL) {
-		pr_err("ERROR: Link Layer Device disappeared while connecting\n");
-		goto unlock;
-	}
-
-	caif_assert(phyinfo != NULL);
-	caif_assert(phyinfo->id == phyid);
-	caif_assert(phyinfo->phy_layer != NULL);
-	caif_assert(phyinfo->phy_layer->id == phyid);
-
-	adapt_layer->id = channel_id;
-
-	switch (serv) {
-	case CFCTRL_SRV_VEI:
-		servicel = cfvei_create(channel_id, &phyinfo->dev_info);
-		break;
-	case CFCTRL_SRV_DATAGRAM:
-		servicel = cfdgml_create(channel_id,
-					&phyinfo->dev_info);
-		break;
-	case CFCTRL_SRV_RFM:
-		netdev = phyinfo->dev_info.dev;
-		servicel = cfrfml_create(channel_id, &phyinfo->dev_info,
-						netdev->mtu);
-		break;
-	case CFCTRL_SRV_UTIL:
-		servicel = cfutill_create(channel_id, &phyinfo->dev_info);
-		break;
-	case CFCTRL_SRV_VIDEO:
-		servicel = cfvidl_create(channel_id, &phyinfo->dev_info);
-		break;
-	case CFCTRL_SRV_DBG:
-		servicel = cfdbgl_create(channel_id, &phyinfo->dev_info);
-		break;
-	default:
-		pr_err("Protocol error. Link setup response - unknown channel type\n");
-		goto unlock;
-	}
-	if (!servicel)
-		goto unlock;
-	layer_set_dn(servicel, cnfg->mux);
-	cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id);
-	layer_set_up(servicel, adapt_layer);
-	layer_set_dn(adapt_layer, servicel);
-
-	rcu_read_unlock();
-
-	servicel->ctrlcmd(servicel, CAIF_CTRLCMD_INIT_RSP, 0);
-	return;
-unlock:
-	rcu_read_unlock();
-}
-
-int
-cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
-		     struct net_device *dev, struct cflayer *phy_layer,
-		     enum cfcnfg_phy_preference pref,
-		     struct cflayer *link_support,
-		     bool fcs, int head_room)
-{
-	struct cflayer *frml;
-	struct cfcnfg_phyinfo *phyinfo = NULL;
-	int i, res = 0;
-	u8 phyid;
-
-	mutex_lock(&cnfg->lock);
-
-	/* CAIF protocol allow maximum 6 link-layers */
-	for (i = 0; i < 7; i++) {
-		phyid = (dev->ifindex + i) & 0x7;
-		if (phyid == 0)
-			continue;
-		if (cfcnfg_get_phyinfo_rcu(cnfg, phyid) == NULL)
-			goto got_phyid;
-	}
-	pr_warn("Too many CAIF Link Layers (max 6)\n");
-	res = -EEXIST;
-	goto out;
-
-got_phyid:
-	phyinfo = kzalloc_obj(struct cfcnfg_phyinfo, GFP_ATOMIC);
-	if (!phyinfo) {
-		res = -ENOMEM;
-		goto out;
-	}
-
-	phy_layer->id = phyid;
-	phyinfo->pref = pref;
-	phyinfo->id = phyid;
-	phyinfo->dev_info.id = phyid;
-	phyinfo->dev_info.dev = dev;
-	phyinfo->phy_layer = phy_layer;
-	phyinfo->ifindex = dev->ifindex;
-	phyinfo->head_room = head_room;
-	phyinfo->use_fcs = fcs;
-
-	frml = cffrml_create(phyid, fcs);
-
-	if (!frml) {
-		res = -ENOMEM;
-		goto out_err;
-	}
-	phyinfo->frm_layer = frml;
-	layer_set_up(frml, cnfg->mux);
-
-	if (link_support != NULL) {
-		link_support->id = phyid;
-		layer_set_dn(frml, link_support);
-		layer_set_up(link_support, frml);
-		layer_set_dn(link_support, phy_layer);
-		layer_set_up(phy_layer, link_support);
-	} else {
-		layer_set_dn(frml, phy_layer);
-		layer_set_up(phy_layer, frml);
-	}
-
-	list_add_rcu(&phyinfo->node, &cnfg->phys);
-out:
-	mutex_unlock(&cnfg->lock);
-	return res;
-
-out_err:
-	kfree(phyinfo);
-	mutex_unlock(&cnfg->lock);
-	return res;
-}
-EXPORT_SYMBOL(cfcnfg_add_phy_layer);
-
-int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer,
-			 bool up)
-{
-	struct cfcnfg_phyinfo *phyinfo;
-
-	rcu_read_lock();
-	phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phy_layer->id);
-	if (phyinfo == NULL) {
-		rcu_read_unlock();
-		return -ENODEV;
-	}
-
-	if (phyinfo->up == up) {
-		rcu_read_unlock();
-		return 0;
-	}
-	phyinfo->up = up;
-
-	if (up) {
-		cffrml_hold(phyinfo->frm_layer);
-		cfmuxl_set_dnlayer(cnfg->mux, phyinfo->frm_layer,
-					phy_layer->id);
-	} else {
-		cfmuxl_remove_dnlayer(cnfg->mux, phy_layer->id);
-		cffrml_put(phyinfo->frm_layer);
-	}
-
-	rcu_read_unlock();
-	return 0;
-}
-EXPORT_SYMBOL(cfcnfg_set_phy_state);
-
-int cfcnfg_del_phy_layer(struct cfcnfg *cnfg, struct cflayer *phy_layer)
-{
-	struct cflayer *frml, *frml_dn;
-	u16 phyid;
-	struct cfcnfg_phyinfo *phyinfo;
-
-	might_sleep();
-
-	mutex_lock(&cnfg->lock);
-
-	phyid = phy_layer->id;
-	phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid);
-
-	if (phyinfo == NULL) {
-		mutex_unlock(&cnfg->lock);
-		return 0;
-	}
-	caif_assert(phyid == phyinfo->id);
-	caif_assert(phy_layer == phyinfo->phy_layer);
-	caif_assert(phy_layer->id == phyid);
-	caif_assert(phyinfo->frm_layer->id == phyid);
-
-	list_del_rcu(&phyinfo->node);
-	synchronize_rcu();
-
-	/* Fail if reference count is not zero */
-	if (cffrml_refcnt_read(phyinfo->frm_layer) != 0) {
-		pr_info("Wait for device inuse\n");
-		list_add_rcu(&phyinfo->node, &cnfg->phys);
-		mutex_unlock(&cnfg->lock);
-		return -EAGAIN;
-	}
-
-	frml = phyinfo->frm_layer;
-	frml_dn = frml->dn;
-	cffrml_set_uplayer(frml, NULL);
-	cffrml_set_dnlayer(frml, NULL);
-	if (phy_layer != frml_dn) {
-		layer_set_up(frml_dn, NULL);
-		layer_set_dn(frml_dn, NULL);
-	}
-	layer_set_up(phy_layer, NULL);
-
-	if (phyinfo->phy_layer != frml_dn)
-		kfree(frml_dn);
-
-	cffrml_free(frml);
-	kfree(phyinfo);
-	mutex_unlock(&cnfg->lock);
-
-	return 0;
-}
-EXPORT_SYMBOL(cfcnfg_del_phy_layer);
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
deleted file mode 100644
index c6cc2bfed65d..000000000000
--- a/net/caif/cfctrl.c
+++ /dev/null
@@ -1,631 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/stddef.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/pkt_sched.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfpkt.h>
-#include <net/caif/cfctrl.h>
-
-#define container_obj(layr) container_of(layr, struct cfctrl, serv.layer)
-#define UTILITY_NAME_LENGTH 16
-#define CFPKT_CTRL_PKT_LEN 20
-
-#ifdef CAIF_NO_LOOP
-static int handle_loop(struct cfctrl *ctrl,
-		       int cmd, struct cfpkt *pkt){
-	return -1;
-}
-#else
-static int handle_loop(struct cfctrl *ctrl,
-		       int cmd, struct cfpkt *pkt);
-#endif
-static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt);
-static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-			   int phyid);
-
-
-struct cflayer *cfctrl_create(void)
-{
-	struct dev_info dev_info;
-	struct cfctrl *this =
-		kzalloc_obj(struct cfctrl, GFP_ATOMIC);
-	if (!this)
-		return NULL;
-	caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
-	memset(&dev_info, 0, sizeof(dev_info));
-	dev_info.id = 0xff;
-	cfsrvl_init(&this->serv, 0, &dev_info, false);
-	atomic_set(&this->req_seq_no, 1);
-	atomic_set(&this->rsp_seq_no, 1);
-	this->serv.layer.receive = cfctrl_recv;
-	sprintf(this->serv.layer.name, "ctrl");
-	this->serv.layer.ctrlcmd = cfctrl_ctrlcmd;
-#ifndef CAIF_NO_LOOP
-	spin_lock_init(&this->loop_linkid_lock);
-	this->loop_linkid = 1;
-#endif
-	spin_lock_init(&this->info_list_lock);
-	INIT_LIST_HEAD(&this->list);
-	return &this->serv.layer;
-}
-
-void cfctrl_remove(struct cflayer *layer)
-{
-	struct cfctrl_request_info *p, *tmp;
-	struct cfctrl *ctrl = container_obj(layer);
-
-	spin_lock_bh(&ctrl->info_list_lock);
-	list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
-		list_del(&p->list);
-		kfree(p);
-	}
-	spin_unlock_bh(&ctrl->info_list_lock);
-	kfree(layer);
-}
-
-static bool param_eq(const struct cfctrl_link_param *p1,
-		     const struct cfctrl_link_param *p2)
-{
-	bool eq =
-	    p1->linktype == p2->linktype &&
-	    p1->priority == p2->priority &&
-	    p1->phyid == p2->phyid &&
-	    p1->endpoint == p2->endpoint && p1->chtype == p2->chtype;
-
-	if (!eq)
-		return false;
-
-	switch (p1->linktype) {
-	case CFCTRL_SRV_VEI:
-		return true;
-	case CFCTRL_SRV_DATAGRAM:
-		return p1->u.datagram.connid == p2->u.datagram.connid;
-	case CFCTRL_SRV_RFM:
-		return
-		    p1->u.rfm.connid == p2->u.rfm.connid &&
-		    strcmp(p1->u.rfm.volume, p2->u.rfm.volume) == 0;
-	case CFCTRL_SRV_UTIL:
-		return
-		    p1->u.utility.fifosize_kb == p2->u.utility.fifosize_kb
-		    && p1->u.utility.fifosize_bufs ==
-		    p2->u.utility.fifosize_bufs
-		    && strcmp(p1->u.utility.name, p2->u.utility.name) == 0
-		    && p1->u.utility.paramlen == p2->u.utility.paramlen
-		    && memcmp(p1->u.utility.params, p2->u.utility.params,
-			      p1->u.utility.paramlen) == 0;
-
-	case CFCTRL_SRV_VIDEO:
-		return p1->u.video.connid == p2->u.video.connid;
-	case CFCTRL_SRV_DBG:
-		return true;
-	case CFCTRL_SRV_DECM:
-		return false;
-	default:
-		return false;
-	}
-	return false;
-}
-
-static bool cfctrl_req_eq(const struct cfctrl_request_info *r1,
-			  const struct cfctrl_request_info *r2)
-{
-	if (r1->cmd != r2->cmd)
-		return false;
-	if (r1->cmd == CFCTRL_CMD_LINK_SETUP)
-		return param_eq(&r1->param, &r2->param);
-	else
-		return r1->channel_id == r2->channel_id;
-}
-
-/* Insert request at the end */
-static void cfctrl_insert_req(struct cfctrl *ctrl,
-			      struct cfctrl_request_info *req)
-{
-	spin_lock_bh(&ctrl->info_list_lock);
-	atomic_inc(&ctrl->req_seq_no);
-	req->sequence_no = atomic_read(&ctrl->req_seq_no);
-	list_add_tail(&req->list, &ctrl->list);
-	spin_unlock_bh(&ctrl->info_list_lock);
-}
-
-/* Compare and remove request */
-static struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
-						struct cfctrl_request_info *req)
-{
-	struct cfctrl_request_info *p, *tmp, *first;
-
-	first = list_first_entry(&ctrl->list, struct cfctrl_request_info, list);
-
-	list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
-		if (cfctrl_req_eq(req, p)) {
-			if (p != first)
-				pr_warn("Requests are not received in order\n");
-
-			atomic_set(&ctrl->rsp_seq_no,
-					 p->sequence_no);
-			list_del(&p->list);
-			goto out;
-		}
-	}
-	p = NULL;
-out:
-	return p;
-}
-
-struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer)
-{
-	struct cfctrl *this = container_obj(layer);
-	return &this->res;
-}
-
-static void init_info(struct caif_payload_info *info, struct cfctrl *cfctrl)
-{
-	info->hdr_len = 0;
-	info->channel_id = cfctrl->serv.layer.id;
-	info->dev_info = &cfctrl->serv.dev_info;
-}
-
-void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
-{
-	struct cfpkt *pkt;
-	struct cfctrl *cfctrl = container_obj(layer);
-	struct cflayer *dn = cfctrl->serv.layer.dn;
-
-	if (!dn) {
-		pr_debug("not able to send enum request\n");
-		return;
-	}
-	pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
-	if (!pkt)
-		return;
-	caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
-	init_info(cfpkt_info(pkt), cfctrl);
-	cfpkt_info(pkt)->dev_info->id = physlinkid;
-	cfctrl->serv.dev_info.id = physlinkid;
-	cfpkt_addbdy(pkt, CFCTRL_CMD_ENUM);
-	cfpkt_addbdy(pkt, physlinkid);
-	cfpkt_set_prio(pkt, TC_PRIO_CONTROL);
-	dn->transmit(dn, pkt);
-}
-
-int cfctrl_linkup_request(struct cflayer *layer,
-			  struct cfctrl_link_param *param,
-			  struct cflayer *user_layer)
-{
-	struct cfctrl *cfctrl = container_obj(layer);
-	struct cflayer *dn = cfctrl->serv.layer.dn;
-	char utility_name[UTILITY_NAME_LENGTH];
-	struct cfctrl_request_info *req;
-	struct cfpkt *pkt;
-	u32 tmp32;
-	u16 tmp16;
-	u8 tmp8;
-	int ret;
-
-	if (!dn) {
-		pr_debug("not able to send linkup request\n");
-		return -ENODEV;
-	}
-
-	if (cfctrl_cancel_req(layer, user_layer) > 0) {
-		/* Slight Paranoia, check if already connecting */
-		pr_err("Duplicate connect request for same client\n");
-		WARN_ON(1);
-		return -EALREADY;
-	}
-
-	pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
-	if (!pkt)
-		return -ENOMEM;
-	cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
-	cfpkt_addbdy(pkt, (param->chtype << 4) | param->linktype);
-	cfpkt_addbdy(pkt, (param->priority << 3) | param->phyid);
-	cfpkt_addbdy(pkt, param->endpoint & 0x03);
-
-	switch (param->linktype) {
-	case CFCTRL_SRV_VEI:
-		break;
-	case CFCTRL_SRV_VIDEO:
-		cfpkt_addbdy(pkt, (u8) param->u.video.connid);
-		break;
-	case CFCTRL_SRV_DBG:
-		break;
-	case CFCTRL_SRV_DATAGRAM:
-		tmp32 = cpu_to_le32(param->u.datagram.connid);
-		cfpkt_add_body(pkt, &tmp32, 4);
-		break;
-	case CFCTRL_SRV_RFM:
-		/* Construct a frame, convert DatagramConnectionID to network
-		 * format long and copy it out...
-		 */
-		tmp32 = cpu_to_le32(param->u.rfm.connid);
-		cfpkt_add_body(pkt, &tmp32, 4);
-		/* Add volume name, including zero termination... */
-		cfpkt_add_body(pkt, param->u.rfm.volume,
-			       strlen(param->u.rfm.volume) + 1);
-		break;
-	case CFCTRL_SRV_UTIL:
-		tmp16 = cpu_to_le16(param->u.utility.fifosize_kb);
-		cfpkt_add_body(pkt, &tmp16, 2);
-		tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
-		cfpkt_add_body(pkt, &tmp16, 2);
-		strscpy_pad(utility_name, param->u.utility.name);
-		cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
-		tmp8 = param->u.utility.paramlen;
-		cfpkt_add_body(pkt, &tmp8, 1);
-		cfpkt_add_body(pkt, param->u.utility.params,
-			       param->u.utility.paramlen);
-		break;
-	default:
-		pr_warn("Request setup of bad link type = %d\n",
-			param->linktype);
-		cfpkt_destroy(pkt);
-		return -EINVAL;
-	}
-	req = kzalloc_obj(*req);
-	if (!req) {
-		cfpkt_destroy(pkt);
-		return -ENOMEM;
-	}
-
-	req->client_layer = user_layer;
-	req->cmd = CFCTRL_CMD_LINK_SETUP;
-	req->param = *param;
-	cfctrl_insert_req(cfctrl, req);
-	init_info(cfpkt_info(pkt), cfctrl);
-	/*
-	 * NOTE:Always send linkup and linkdown request on the same
-	 *	device as the payload. Otherwise old queued up payload
-	 *	might arrive with the newly allocated channel ID.
-	 */
-	cfpkt_info(pkt)->dev_info->id = param->phyid;
-	cfpkt_set_prio(pkt, TC_PRIO_CONTROL);
-	ret =
-	    dn->transmit(dn, pkt);
-	if (ret < 0) {
-		int count;
-
-		count = cfctrl_cancel_req(&cfctrl->serv.layer,
-						user_layer);
-		if (count != 1) {
-			pr_err("Could not remove request (%d)", count);
-			return -ENODEV;
-		}
-	}
-	return 0;
-}
-
-int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
-			struct cflayer *client)
-{
-	int ret;
-	struct cfpkt *pkt;
-	struct cfctrl *cfctrl = container_obj(layer);
-	struct cflayer *dn = cfctrl->serv.layer.dn;
-
-	if (!dn) {
-		pr_debug("not able to send link-down request\n");
-		return -ENODEV;
-	}
-	pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
-	if (!pkt)
-		return -ENOMEM;
-	cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY);
-	cfpkt_addbdy(pkt, channelid);
-	init_info(cfpkt_info(pkt), cfctrl);
-	cfpkt_set_prio(pkt, TC_PRIO_CONTROL);
-	ret =
-	    dn->transmit(dn, pkt);
-#ifndef CAIF_NO_LOOP
-	cfctrl->loop_linkused[channelid] = 0;
-#endif
-	return ret;
-}
-
-int cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
-{
-	struct cfctrl_request_info *p, *tmp;
-	struct cfctrl *ctrl = container_obj(layr);
-	int found = 0;
-	spin_lock_bh(&ctrl->info_list_lock);
-
-	list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
-		if (p->client_layer == adap_layer) {
-			list_del(&p->list);
-			kfree(p);
-			found++;
-		}
-	}
-
-	spin_unlock_bh(&ctrl->info_list_lock);
-	return found;
-}
-
-static int cfctrl_link_setup(struct cfctrl *cfctrl, struct cfpkt *pkt, u8 cmdrsp)
-{
-	u8 len;
-	u8 linkid = 0;
-	enum cfctrl_srv serv;
-	enum cfctrl_srv servtype;
-	u8 endpoint;
-	u8 physlinkid;
-	u8 prio;
-	u8 tmp;
-	u8 *cp;
-	int i;
-	struct cfctrl_link_param linkparam;
-	struct cfctrl_request_info rsp, *req;
-
-	memset(&linkparam, 0, sizeof(linkparam));
-
-	tmp = cfpkt_extr_head_u8(pkt);
-
-	serv = tmp & CFCTRL_SRV_MASK;
-	linkparam.linktype = serv;
-
-	servtype = tmp >> 4;
-	linkparam.chtype = servtype;
-
-	tmp = cfpkt_extr_head_u8(pkt);
-	physlinkid = tmp & 0x07;
-	prio = tmp >> 3;
-
-	linkparam.priority = prio;
-	linkparam.phyid = physlinkid;
-	endpoint = cfpkt_extr_head_u8(pkt);
-	linkparam.endpoint = endpoint & 0x03;
-
-	switch (serv) {
-	case CFCTRL_SRV_VEI:
-	case CFCTRL_SRV_DBG:
-		if (CFCTRL_ERR_BIT & cmdrsp)
-			break;
-		/* Link ID */
-		linkid = cfpkt_extr_head_u8(pkt);
-		break;
-	case CFCTRL_SRV_VIDEO:
-		tmp = cfpkt_extr_head_u8(pkt);
-		linkparam.u.video.connid = tmp;
-		if (CFCTRL_ERR_BIT & cmdrsp)
-			break;
-		/* Link ID */
-		linkid = cfpkt_extr_head_u8(pkt);
-		break;
-
-	case CFCTRL_SRV_DATAGRAM:
-		linkparam.u.datagram.connid = cfpkt_extr_head_u32(pkt);
-		if (CFCTRL_ERR_BIT & cmdrsp)
-			break;
-		/* Link ID */
-		linkid = cfpkt_extr_head_u8(pkt);
-		break;
-	case CFCTRL_SRV_RFM:
-		/* Construct a frame, convert
-		 * DatagramConnectionID
-		 * to network format long and copy it out...
-		 */
-		linkparam.u.rfm.connid = cfpkt_extr_head_u32(pkt);
-		cp = (u8 *) linkparam.u.rfm.volume;
-		for (tmp = cfpkt_extr_head_u8(pkt);
-		     cfpkt_more(pkt) && tmp != '\0';
-		     tmp = cfpkt_extr_head_u8(pkt))
-			*cp++ = tmp;
-		*cp = '\0';
-
-		if (CFCTRL_ERR_BIT & cmdrsp)
-			break;
-		/* Link ID */
-		linkid = cfpkt_extr_head_u8(pkt);
-
-		break;
-	case CFCTRL_SRV_UTIL:
-		/* Construct a frame, convert
-		 * DatagramConnectionID
-		 * to network format long and copy it out...
-		 */
-		/* Fifosize KB */
-		linkparam.u.utility.fifosize_kb = cfpkt_extr_head_u16(pkt);
-		/* Fifosize bufs */
-		linkparam.u.utility.fifosize_bufs = cfpkt_extr_head_u16(pkt);
-		/* name */
-		cp = (u8 *) linkparam.u.utility.name;
-		caif_assert(sizeof(linkparam.u.utility.name)
-			     >= UTILITY_NAME_LENGTH);
-		for (i = 0; i < UTILITY_NAME_LENGTH && cfpkt_more(pkt); i++) {
-			tmp = cfpkt_extr_head_u8(pkt);
-			*cp++ = tmp;
-		}
-		/* Length */
-		len = cfpkt_extr_head_u8(pkt);
-		linkparam.u.utility.paramlen = len;
-		/* Param Data */
-		cp = linkparam.u.utility.params;
-		while (cfpkt_more(pkt) && len--) {
-			tmp = cfpkt_extr_head_u8(pkt);
-			*cp++ = tmp;
-		}
-		if (CFCTRL_ERR_BIT & cmdrsp)
-			break;
-		/* Link ID */
-		linkid = cfpkt_extr_head_u8(pkt);
-		/* Length */
-		len = cfpkt_extr_head_u8(pkt);
-		/* Param Data */
-		cfpkt_extr_head(pkt, NULL, len);
-		break;
-	default:
-		pr_warn("Request setup, invalid type (%d)\n", serv);
-		return -1;
-	}
-
-	rsp.cmd = CFCTRL_CMD_LINK_SETUP;
-	rsp.param = linkparam;
-	spin_lock_bh(&cfctrl->info_list_lock);
-	req = cfctrl_remove_req(cfctrl, &rsp);
-
-	if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
-		cfpkt_erroneous(pkt)) {
-		pr_err("Invalid O/E bit or parse error "
-				"on CAIF control channel\n");
-		cfctrl->res.reject_rsp(cfctrl->serv.layer.up, 0,
-				       req ? req->client_layer : NULL);
-	} else {
-		cfctrl->res.linksetup_rsp(cfctrl->serv.layer.up, linkid,
-					  serv, physlinkid,
-					  req ?  req->client_layer : NULL);
-	}
-
-	kfree(req);
-
-	spin_unlock_bh(&cfctrl->info_list_lock);
-
-	return 0;
-}
-
-static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
-{
-	u8 cmdrsp;
-	u8 cmd;
-	int ret = 0;
-	u8 linkid = 0;
-	struct cfctrl *cfctrl = container_obj(layer);
-
-	cmdrsp = cfpkt_extr_head_u8(pkt);
-	cmd = cmdrsp & CFCTRL_CMD_MASK;
-	if (cmd != CFCTRL_CMD_LINK_ERR
-	    && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)
-		&& CFCTRL_ERR_BIT != (CFCTRL_ERR_BIT & cmdrsp)) {
-		if (handle_loop(cfctrl, cmd, pkt) != 0)
-			cmdrsp |= CFCTRL_ERR_BIT;
-	}
-
-	switch (cmd) {
-	case CFCTRL_CMD_LINK_SETUP:
-		ret = cfctrl_link_setup(cfctrl, pkt, cmdrsp);
-		break;
-	case CFCTRL_CMD_LINK_DESTROY:
-		linkid = cfpkt_extr_head_u8(pkt);
-		cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid);
-		break;
-	case CFCTRL_CMD_LINK_ERR:
-		pr_err("Frame Error Indication received\n");
-		cfctrl->res.linkerror_ind();
-		break;
-	case CFCTRL_CMD_ENUM:
-		cfctrl->res.enum_rsp();
-		break;
-	case CFCTRL_CMD_SLEEP:
-		cfctrl->res.sleep_rsp();
-		break;
-	case CFCTRL_CMD_WAKE:
-		cfctrl->res.wake_rsp();
-		break;
-	case CFCTRL_CMD_LINK_RECONF:
-		cfctrl->res.restart_rsp();
-		break;
-	case CFCTRL_CMD_RADIO_SET:
-		cfctrl->res.radioset_rsp();
-		break;
-	default:
-		pr_err("Unrecognized Control Frame\n");
-		ret = -1;
-		goto error;
-	}
-error:
-	cfpkt_destroy(pkt);
-	return ret;
-}
-
-static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-			   int phyid)
-{
-	struct cfctrl *this = container_obj(layr);
-	switch (ctrl) {
-	case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
-	case CAIF_CTRLCMD_FLOW_OFF_IND:
-		spin_lock_bh(&this->info_list_lock);
-		if (!list_empty(&this->list))
-			pr_debug("Received flow off in control layer\n");
-		spin_unlock_bh(&this->info_list_lock);
-		break;
-	case _CAIF_CTRLCMD_PHYIF_DOWN_IND: {
-		struct cfctrl_request_info *p, *tmp;
-
-		/* Find all connect request and report failure */
-		spin_lock_bh(&this->info_list_lock);
-		list_for_each_entry_safe(p, tmp, &this->list, list) {
-			if (p->param.phyid == phyid) {
-				list_del(&p->list);
-				p->client_layer->ctrlcmd(p->client_layer,
-						CAIF_CTRLCMD_INIT_FAIL_RSP,
-						phyid);
-				kfree(p);
-			}
-		}
-		spin_unlock_bh(&this->info_list_lock);
-		break;
-	}
-	default:
-		break;
-	}
-}
-
-#ifndef CAIF_NO_LOOP
-static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt)
-{
-	static int last_linkid;
-	static int dec;
-	u8 linkid, linktype, tmp;
-	switch (cmd) {
-	case CFCTRL_CMD_LINK_SETUP:
-		spin_lock_bh(&ctrl->loop_linkid_lock);
-		if (!dec) {
-			for (linkid = last_linkid + 1; linkid < 254; linkid++)
-				if (!ctrl->loop_linkused[linkid])
-					goto found;
-		}
-		dec = 1;
-		for (linkid = last_linkid - 1; linkid > 1; linkid--)
-			if (!ctrl->loop_linkused[linkid])
-				goto found;
-		spin_unlock_bh(&ctrl->loop_linkid_lock);
-		return -1;
-found:
-		if (linkid < 10)
-			dec = 0;
-
-		if (!ctrl->loop_linkused[linkid])
-			ctrl->loop_linkused[linkid] = 1;
-
-		last_linkid = linkid;
-
-		cfpkt_add_trail(pkt, &linkid, 1);
-		spin_unlock_bh(&ctrl->loop_linkid_lock);
-		cfpkt_peek_head(pkt, &linktype, 1);
-		if (linktype ==  CFCTRL_SRV_UTIL) {
-			tmp = 0x01;
-			cfpkt_add_trail(pkt, &tmp, 1);
-			cfpkt_add_trail(pkt, &tmp, 1);
-		}
-		break;
-
-	case CFCTRL_CMD_LINK_DESTROY:
-		spin_lock_bh(&ctrl->loop_linkid_lock);
-		cfpkt_peek_head(pkt, &linkid, 1);
-		ctrl->loop_linkused[linkid] = 0;
-		spin_unlock_bh(&ctrl->loop_linkid_lock);
-		break;
-	default:
-		break;
-	}
-	return 0;
-}
-#endif
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
deleted file mode 100644
index 57ad3f82e004..000000000000
--- a/net/caif/cfdbgl.c
+++ /dev/null
@@ -1,55 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfsrvl.h>
-#include <net/caif/cfpkt.h>
-
-#define container_obj(layr) ((struct cfsrvl *) layr)
-
-static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt);
-static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt);
-
-struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
-{
-	struct cfsrvl *dbg = kzalloc_obj(struct cfsrvl, GFP_ATOMIC);
-	if (!dbg)
-		return NULL;
-	caif_assert(offsetof(struct cfsrvl, layer) == 0);
-	cfsrvl_init(dbg, channel_id, dev_info, false);
-	dbg->layer.receive = cfdbgl_receive;
-	dbg->layer.transmit = cfdbgl_transmit;
-	snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ, "dbg%d", channel_id);
-	return &dbg->layer;
-}
-
-static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt)
-{
-	return layr->up->receive(layr->up, pkt);
-}
-
-static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt)
-{
-	struct cfsrvl *service = container_obj(layr);
-	struct caif_payload_info *info;
-	int ret;
-
-	if (!cfsrvl_ready(service, &ret)) {
-		cfpkt_destroy(pkt);
-		return ret;
-	}
-
-	/* Add info for MUX-layer to route the packet out */
-	info = cfpkt_info(pkt);
-	info->channel_id = service->layer.id;
-	info->dev_info = &service->dev_info;
-
-	return layr->dn->transmit(layr->dn, pkt);
-}
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
deleted file mode 100644
index c451ddd155a7..000000000000
--- a/net/caif/cfdgml.c
+++ /dev/null
@@ -1,113 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/stddef.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfsrvl.h>
-#include <net/caif/cfpkt.h>
-
-
-#define container_obj(layr) ((struct cfsrvl *) layr)
-
-#define DGM_CMD_BIT  0x80
-#define DGM_FLOW_OFF 0x81
-#define DGM_FLOW_ON  0x80
-#define DGM_MTU 1500
-
-static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
-static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt);
-
-struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
-{
-	struct cfsrvl *dgm = kzalloc_obj(struct cfsrvl, GFP_ATOMIC);
-	if (!dgm)
-		return NULL;
-	caif_assert(offsetof(struct cfsrvl, layer) == 0);
-	cfsrvl_init(dgm, channel_id, dev_info, true);
-	dgm->layer.receive = cfdgml_receive;
-	dgm->layer.transmit = cfdgml_transmit;
-	snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ, "dgm%d", channel_id);
-	return &dgm->layer;
-}
-
-static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
-{
-	u8 cmd = -1;
-	u8 dgmhdr[3];
-	int ret;
-	caif_assert(layr->up != NULL);
-	caif_assert(layr->receive != NULL);
-	caif_assert(layr->ctrlcmd != NULL);
-
-	if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
-		pr_err("Packet is erroneous!\n");
-		cfpkt_destroy(pkt);
-		return -EPROTO;
-	}
-
-	if ((cmd & DGM_CMD_BIT) == 0) {
-		if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) {
-			pr_err("Packet is erroneous!\n");
-			cfpkt_destroy(pkt);
-			return -EPROTO;
-		}
-		ret = layr->up->receive(layr->up, pkt);
-		return ret;
-	}
-
-	switch (cmd) {
-	case DGM_FLOW_OFF:	/* FLOW OFF */
-		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
-		cfpkt_destroy(pkt);
-		return 0;
-	case DGM_FLOW_ON:	/* FLOW ON */
-		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
-		cfpkt_destroy(pkt);
-		return 0;
-	default:
-		cfpkt_destroy(pkt);
-		pr_info("Unknown datagram control %d (0x%x)\n", cmd, cmd);
-		return -EPROTO;
-	}
-}
-
-static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
-{
-	u8 packet_type;
-	u32 zero = 0;
-	struct caif_payload_info *info;
-	struct cfsrvl *service = container_obj(layr);
-	int ret;
-
-	if (!cfsrvl_ready(service, &ret)) {
-		cfpkt_destroy(pkt);
-		return ret;
-	}
-
-	/* STE Modem cannot handle more than 1500 bytes datagrams */
-	if (cfpkt_getlen(pkt) > DGM_MTU) {
-		cfpkt_destroy(pkt);
-		return -EMSGSIZE;
-	}
-
-	cfpkt_add_head(pkt, &zero, 3);
-	packet_type = 0x08; /* B9 set - UNCLASSIFIED */
-	cfpkt_add_head(pkt, &packet_type, 1);
-
-	/* Add info for MUX-layer to route the packet out. */
-	info = cfpkt_info(pkt);
-	info->channel_id = service->layer.id;
-	/* To optimize alignment, we add up the size of CAIF header
-	 * before payload.
-	 */
-	info->hdr_len = 4;
-	info->dev_info = &service->dev_info;
-	return layr->dn->transmit(layr->dn, pkt);
-}
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
deleted file mode 100644
index 0f4979d89fcb..000000000000
--- a/net/caif/cffrml.c
+++ /dev/null
@@ -1,204 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * CAIF Framing Layer.
- *
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/stddef.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/crc-ccitt.h>
-#include <linux/netdevice.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfpkt.h>
-#include <net/caif/cffrml.h>
-
-#define container_obj(layr) container_of(layr, struct cffrml, layer)
-
-struct cffrml {
-	struct cflayer layer;
-	bool dofcs;		/* !< FCS active */
-	int __percpu		*pcpu_refcnt;
-};
-
-static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt);
-static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt);
-static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-			   int phyid);
-
-static u32 cffrml_rcv_error;
-static u32 cffrml_rcv_checsum_error;
-struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
-{
-	struct cffrml *this = kzalloc_obj(struct cffrml, GFP_ATOMIC);
-	if (!this)
-		return NULL;
-	this->pcpu_refcnt = alloc_percpu(int);
-	if (this->pcpu_refcnt == NULL) {
-		kfree(this);
-		return NULL;
-	}
-
-	caif_assert(offsetof(struct cffrml, layer) == 0);
-
-	this->layer.receive = cffrml_receive;
-	this->layer.transmit = cffrml_transmit;
-	this->layer.ctrlcmd = cffrml_ctrlcmd;
-	snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "frm%d", phyid);
-	this->dofcs = use_fcs;
-	this->layer.id = phyid;
-	return (struct cflayer *) this;
-}
-
-void cffrml_free(struct cflayer *layer)
-{
-	struct cffrml *this = container_obj(layer);
-	free_percpu(this->pcpu_refcnt);
-	kfree(layer);
-}
-
-void cffrml_set_uplayer(struct cflayer *this, struct cflayer *up)
-{
-	this->up = up;
-}
-
-void cffrml_set_dnlayer(struct cflayer *this, struct cflayer *dn)
-{
-	this->dn = dn;
-}
-
-static u16 cffrml_checksum(u16 chks, void *buf, u16 len)
-{
-	/* FIXME: FCS should be moved to glue in order to use OS-Specific
-	 * solutions
-	 */
-	return crc_ccitt(chks, buf, len);
-}
-
-static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
-{
-	u16 tmp;
-	u16 len;
-	u16 hdrchks;
-	int pktchks;
-	struct cffrml *this;
-	this = container_obj(layr);
-
-	cfpkt_extr_head(pkt, &tmp, 2);
-	len = le16_to_cpu(tmp);
-
-	/* Subtract for FCS on length if FCS is not used. */
-	if (!this->dofcs) {
-		if (len < 2) {
-			++cffrml_rcv_error;
-			pr_err("Invalid frame length (%d)\n", len);
-			cfpkt_destroy(pkt);
-			return -EPROTO;
-		}
-		len -= 2;
-	}
-
-	if (cfpkt_setlen(pkt, len) < 0) {
-		++cffrml_rcv_error;
-		pr_err("Framing length error (%d)\n", len);
-		cfpkt_destroy(pkt);
-		return -EPROTO;
-	}
-	/*
-	 * Don't do extract if FCS is false, rather do setlen - then we don't
-	 * get a cache-miss.
-	 */
-	if (this->dofcs) {
-		cfpkt_extr_trail(pkt, &tmp, 2);
-		hdrchks = le16_to_cpu(tmp);
-		pktchks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff);
-		if (pktchks != hdrchks) {
-			cfpkt_add_trail(pkt, &tmp, 2);
-			++cffrml_rcv_error;
-			++cffrml_rcv_checsum_error;
-			pr_info("Frame checksum error (0x%x != 0x%x)\n",
-				hdrchks, pktchks);
-			return -EILSEQ;
-		}
-	}
-	if (cfpkt_erroneous(pkt)) {
-		++cffrml_rcv_error;
-		pr_err("Packet is erroneous!\n");
-		cfpkt_destroy(pkt);
-		return -EPROTO;
-	}
-
-	if (layr->up == NULL) {
-		pr_err("Layr up is missing!\n");
-		cfpkt_destroy(pkt);
-		return -EINVAL;
-	}
-
-	return layr->up->receive(layr->up, pkt);
-}
-
-static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
-{
-	u16 chks;
-	u16 len;
-	__le16 data;
-
-	struct cffrml *this = container_obj(layr);
-	if (this->dofcs) {
-		chks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff);
-		data = cpu_to_le16(chks);
-		cfpkt_add_trail(pkt, &data, 2);
-	} else {
-		cfpkt_pad_trail(pkt, 2);
-	}
-	len = cfpkt_getlen(pkt);
-	data = cpu_to_le16(len);
-	cfpkt_add_head(pkt, &data, 2);
-	cfpkt_info(pkt)->hdr_len += 2;
-	if (cfpkt_erroneous(pkt)) {
-		pr_err("Packet is erroneous!\n");
-		cfpkt_destroy(pkt);
-		return -EPROTO;
-	}
-
-	if (layr->dn == NULL) {
-		cfpkt_destroy(pkt);
-		return -ENODEV;
-
-	}
-	return layr->dn->transmit(layr->dn, pkt);
-}
-
-static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-			   int phyid)
-{
-	if (layr->up && layr->up->ctrlcmd)
-		layr->up->ctrlcmd(layr->up, ctrl, layr->id);
-}
-
-void cffrml_put(struct cflayer *layr)
-{
-	struct cffrml *this = container_obj(layr);
-	if (layr != NULL && this->pcpu_refcnt != NULL)
-		this_cpu_dec(*this->pcpu_refcnt);
-}
-
-void cffrml_hold(struct cflayer *layr)
-{
-	struct cffrml *this = container_obj(layr);
-	if (layr != NULL && this->pcpu_refcnt != NULL)
-		this_cpu_inc(*this->pcpu_refcnt);
-}
-
-int cffrml_refcnt_read(struct cflayer *layr)
-{
-	int i, refcnt = 0;
-	struct cffrml *this = container_obj(layr);
-	for_each_possible_cpu(i)
-		refcnt += *per_cpu_ptr(this->pcpu_refcnt, i);
-	return refcnt;
-}
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
deleted file mode 100644
index 77a1f31639b7..000000000000
--- a/net/caif/cfmuxl.c
+++ /dev/null
@@ -1,267 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/stddef.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/rculist.h>
-#include <net/caif/cfpkt.h>
-#include <net/caif/cfmuxl.h>
-#include <net/caif/cfsrvl.h>
-#include <net/caif/cffrml.h>
-
-#define container_obj(layr) container_of(layr, struct cfmuxl, layer)
-
-#define CAIF_CTRL_CHANNEL 0
-#define UP_CACHE_SIZE 8
-#define DN_CACHE_SIZE 8
-
-struct cfmuxl {
-	struct cflayer layer;
-	struct list_head srvl_list;
-	struct list_head frml_list;
-	struct cflayer *up_cache[UP_CACHE_SIZE];
-	struct cflayer *dn_cache[DN_CACHE_SIZE];
-	/*
-	 * Set when inserting or removing downwards layers.
-	 */
-	spinlock_t transmit_lock;
-
-	/*
-	 * Set when inserting or removing upwards layers.
-	 */
-	spinlock_t receive_lock;
-
-};
-
-static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt);
-static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt);
-static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-			   int phyid);
-static struct cflayer *get_up(struct cfmuxl *muxl, u16 id);
-
-struct cflayer *cfmuxl_create(void)
-{
-	struct cfmuxl *this = kzalloc_obj(struct cfmuxl, GFP_ATOMIC);
-
-	if (!this)
-		return NULL;
-	this->layer.receive = cfmuxl_receive;
-	this->layer.transmit = cfmuxl_transmit;
-	this->layer.ctrlcmd = cfmuxl_ctrlcmd;
-	INIT_LIST_HEAD(&this->srvl_list);
-	INIT_LIST_HEAD(&this->frml_list);
-	spin_lock_init(&this->transmit_lock);
-	spin_lock_init(&this->receive_lock);
-	snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "mux");
-	return &this->layer;
-}
-
-int cfmuxl_set_dnlayer(struct cflayer *layr, struct cflayer *dn, u8 phyid)
-{
-	struct cfmuxl *muxl = (struct cfmuxl *) layr;
-
-	spin_lock_bh(&muxl->transmit_lock);
-	list_add_rcu(&dn->node, &muxl->frml_list);
-	spin_unlock_bh(&muxl->transmit_lock);
-	return 0;
-}
-
-static struct cflayer *get_from_id(struct list_head *list, u16 id)
-{
-	struct cflayer *lyr;
-	list_for_each_entry_rcu(lyr, list, node) {
-		if (lyr->id == id)
-			return lyr;
-	}
-
-	return NULL;
-}
-
-int cfmuxl_set_uplayer(struct cflayer *layr, struct cflayer *up, u8 linkid)
-{
-	struct cfmuxl *muxl = container_obj(layr);
-	struct cflayer *old;
-
-	spin_lock_bh(&muxl->receive_lock);
-
-	/* Two entries with same id is wrong, so remove old layer from mux */
-	old = get_from_id(&muxl->srvl_list, linkid);
-	if (old != NULL)
-		list_del_rcu(&old->node);
-
-	list_add_rcu(&up->node, &muxl->srvl_list);
-	spin_unlock_bh(&muxl->receive_lock);
-
-	return 0;
-}
-
-struct cflayer *cfmuxl_remove_dnlayer(struct cflayer *layr, u8 phyid)
-{
-	struct cfmuxl *muxl = container_obj(layr);
-	struct cflayer *dn;
-	int idx = phyid % DN_CACHE_SIZE;
-
-	spin_lock_bh(&muxl->transmit_lock);
-	RCU_INIT_POINTER(muxl->dn_cache[idx], NULL);
-	dn = get_from_id(&muxl->frml_list, phyid);
-	if (dn == NULL)
-		goto out;
-
-	list_del_rcu(&dn->node);
-	caif_assert(dn != NULL);
-out:
-	spin_unlock_bh(&muxl->transmit_lock);
-	return dn;
-}
-
-static struct cflayer *get_up(struct cfmuxl *muxl, u16 id)
-{
-	struct cflayer *up;
-	int idx = id % UP_CACHE_SIZE;
-	up = rcu_dereference(muxl->up_cache[idx]);
-	if (up == NULL || up->id != id) {
-		spin_lock_bh(&muxl->receive_lock);
-		up = get_from_id(&muxl->srvl_list, id);
-		rcu_assign_pointer(muxl->up_cache[idx], up);
-		spin_unlock_bh(&muxl->receive_lock);
-	}
-	return up;
-}
-
-static struct cflayer *get_dn(struct cfmuxl *muxl, struct dev_info *dev_info)
-{
-	struct cflayer *dn;
-	int idx = dev_info->id % DN_CACHE_SIZE;
-	dn = rcu_dereference(muxl->dn_cache[idx]);
-	if (dn == NULL || dn->id != dev_info->id) {
-		spin_lock_bh(&muxl->transmit_lock);
-		dn = get_from_id(&muxl->frml_list, dev_info->id);
-		rcu_assign_pointer(muxl->dn_cache[idx], dn);
-		spin_unlock_bh(&muxl->transmit_lock);
-	}
-	return dn;
-}
-
-struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id)
-{
-	struct cflayer *up;
-	struct cfmuxl *muxl = container_obj(layr);
-	int idx = id % UP_CACHE_SIZE;
-
-	if (id == 0) {
-		pr_warn("Trying to remove control layer\n");
-		return NULL;
-	}
-
-	spin_lock_bh(&muxl->receive_lock);
-	up = get_from_id(&muxl->srvl_list, id);
-	if (up == NULL)
-		goto out;
-
-	RCU_INIT_POINTER(muxl->up_cache[idx], NULL);
-	list_del_rcu(&up->node);
-out:
-	spin_unlock_bh(&muxl->receive_lock);
-	return up;
-}
-
-static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
-{
-	int ret;
-	struct cfmuxl *muxl = container_obj(layr);
-	u8 id;
-	struct cflayer *up;
-	if (cfpkt_extr_head(pkt, &id, 1) < 0) {
-		pr_err("erroneous Caif Packet\n");
-		cfpkt_destroy(pkt);
-		return -EPROTO;
-	}
-	rcu_read_lock();
-	up = get_up(muxl, id);
-
-	if (up == NULL) {
-		pr_debug("Received data on unknown link ID = %d (0x%x)"
-			" up == NULL", id, id);
-		cfpkt_destroy(pkt);
-		/*
-		 * Don't return ERROR, since modem misbehaves and sends out
-		 * flow on before linksetup response.
-		 */
-
-		rcu_read_unlock();
-		return /* CFGLU_EPROT; */ 0;
-	}
-
-	/* We can't hold rcu_lock during receive, so take a ref count instead */
-	cfsrvl_get(up);
-	rcu_read_unlock();
-
-	ret = up->receive(up, pkt);
-
-	cfsrvl_put(up);
-	return ret;
-}
-
-static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
-{
-	struct cfmuxl *muxl = container_obj(layr);
-	int err;
-	u8 linkid;
-	struct cflayer *dn;
-	struct caif_payload_info *info = cfpkt_info(pkt);
-	BUG_ON(!info);
-
-	rcu_read_lock();
-
-	dn = get_dn(muxl, info->dev_info);
-	if (dn == NULL) {
-		pr_debug("Send data on unknown phy ID = %d (0x%x)\n",
-			info->dev_info->id, info->dev_info->id);
-		rcu_read_unlock();
-		cfpkt_destroy(pkt);
-		return -ENOTCONN;
-	}
-
-	info->hdr_len += 1;
-	linkid = info->channel_id;
-	cfpkt_add_head(pkt, &linkid, 1);
-
-	/* We can't hold rcu_lock during receive, so take a ref count instead */
-	cffrml_hold(dn);
-
-	rcu_read_unlock();
-
-	err = dn->transmit(dn, pkt);
-
-	cffrml_put(dn);
-	return err;
-}
-
-static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-			   int phyid)
-{
-	struct cfmuxl *muxl = container_obj(layr);
-	struct cflayer *layer;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(layer, &muxl->srvl_list, node) {
-
-		if (cfsrvl_phyid_match(layer, phyid) && layer->ctrlcmd) {
-
-			if ((ctrl == _CAIF_CTRLCMD_PHYIF_DOWN_IND ||
-				ctrl == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND) &&
-					layer->id != 0)
-				cfmuxl_remove_uplayer(layr, layer->id);
-
-			/* NOTE: ctrlcmd is not allowed to block */
-			layer->ctrlcmd(layer, ctrl, phyid);
-		}
-	}
-	rcu_read_unlock();
-}
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
deleted file mode 100644
index 96236d21b18e..000000000000
--- a/net/caif/cfpkt_skbuff.c
+++ /dev/null
@@ -1,373 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/string.h>
-#include <linux/skbuff.h>
-#include <linux/export.h>
-#include <net/caif/cfpkt.h>
-
-#define PKT_PREFIX  48
-#define PKT_POSTFIX 2
-#define PKT_LEN_WHEN_EXTENDING 128
-#define PKT_ERROR(pkt, errmsg)		   \
-do {					   \
-	cfpkt_priv(pkt)->erronous = true;  \
-	skb_reset_tail_pointer(&pkt->skb); \
-	pr_warn(errmsg);		   \
-} while (0)
-
-/*
- * net/caif/ is generic and does not
- * understand SKB, so we do this typecast
- */
-struct cfpkt {
-	struct sk_buff skb;
-};
-
-/* Private data inside SKB */
-struct cfpkt_priv_data {
-	struct dev_info dev_info;
-	bool erronous;
-};
-
-static inline struct cfpkt_priv_data *cfpkt_priv(struct cfpkt *pkt)
-{
-	return (struct cfpkt_priv_data *) pkt->skb.cb;
-}
-
-static inline bool is_erronous(struct cfpkt *pkt)
-{
-	return cfpkt_priv(pkt)->erronous;
-}
-
-static inline struct sk_buff *pkt_to_skb(struct cfpkt *pkt)
-{
-	return &pkt->skb;
-}
-
-static inline struct cfpkt *skb_to_pkt(struct sk_buff *skb)
-{
-	return (struct cfpkt *) skb;
-}
-
-struct cfpkt *cfpkt_fromnative(enum caif_direction dir, void *nativepkt)
-{
-	struct cfpkt *pkt = skb_to_pkt(nativepkt);
-	cfpkt_priv(pkt)->erronous = false;
-	return pkt;
-}
-EXPORT_SYMBOL(cfpkt_fromnative);
-
-void *cfpkt_tonative(struct cfpkt *pkt)
-{
-	return (void *) pkt;
-}
-EXPORT_SYMBOL(cfpkt_tonative);
-
-static struct cfpkt *cfpkt_create_pfx(u16 len, u16 pfx)
-{
-	struct sk_buff *skb;
-
-	skb = alloc_skb(len + pfx, GFP_ATOMIC);
-	if (unlikely(skb == NULL))
-		return NULL;
-
-	skb_reserve(skb, pfx);
-	return skb_to_pkt(skb);
-}
-
-inline struct cfpkt *cfpkt_create(u16 len)
-{
-	return cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX);
-}
-
-void cfpkt_destroy(struct cfpkt *pkt)
-{
-	struct sk_buff *skb = pkt_to_skb(pkt);
-	kfree_skb(skb);
-}
-
-inline bool cfpkt_more(struct cfpkt *pkt)
-{
-	struct sk_buff *skb = pkt_to_skb(pkt);
-	return skb->len > 0;
-}
-
-int cfpkt_peek_head(struct cfpkt *pkt, void *data, u16 len)
-{
-	struct sk_buff *skb = pkt_to_skb(pkt);
-	if (skb_headlen(skb) >= len) {
-		memcpy(data, skb->data, len);
-		return 0;
-	}
-	return !cfpkt_extr_head(pkt, data, len) &&
-	    !cfpkt_add_head(pkt, data, len);
-}
-
-int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len)
-{
-	struct sk_buff *skb = pkt_to_skb(pkt);
-	u8 *from;
-	if (unlikely(is_erronous(pkt)))
-		return -EPROTO;
-
-	if (unlikely(len > skb->len)) {
-		PKT_ERROR(pkt, "read beyond end of packet\n");
-		return -EPROTO;
-	}
-
-	if (unlikely(len > skb_headlen(skb))) {
-		if (unlikely(skb_linearize(skb) != 0)) {
-			PKT_ERROR(pkt, "linearize failed\n");
-			return -EPROTO;
-		}
-	}
-	from = skb_pull(skb, len);
-	from -= len;
-	if (data)
-		memcpy(data, from, len);
-	return 0;
-}
-EXPORT_SYMBOL(cfpkt_extr_head);
-
-int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
-{
-	struct sk_buff *skb = pkt_to_skb(pkt);
-	u8 *data = dta;
-	u8 *from;
-	if (unlikely(is_erronous(pkt)))
-		return -EPROTO;
-
-	if (unlikely(skb_linearize(skb) != 0)) {
-		PKT_ERROR(pkt, "linearize failed\n");
-		return -EPROTO;
-	}
-	if (unlikely(skb->data + len > skb_tail_pointer(skb))) {
-		PKT_ERROR(pkt, "read beyond end of packet\n");
-		return -EPROTO;
-	}
-	from = skb_tail_pointer(skb) - len;
-	skb_trim(skb, skb->len - len);
-	memcpy(data, from, len);
-	return 0;
-}
-
-int cfpkt_pad_trail(struct cfpkt *pkt, u16 len)
-{
-	return cfpkt_add_body(pkt, NULL, len);
-}
-
-int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
-{
-	struct sk_buff *skb = pkt_to_skb(pkt);
-	struct sk_buff *lastskb;
-	u8 *to;
-	u16 addlen = 0;
-
-
-	if (unlikely(is_erronous(pkt)))
-		return -EPROTO;
-
-	lastskb = skb;
-
-	/* Check whether we need to add space at the tail */
-	if (unlikely(skb_tailroom(skb) < len)) {
-		if (likely(len < PKT_LEN_WHEN_EXTENDING))
-			addlen = PKT_LEN_WHEN_EXTENDING;
-		else
-			addlen = len;
-	}
-
-	/* Check whether we need to change the SKB before writing to the tail */
-	if (unlikely((addlen > 0) || skb_cloned(skb) || skb_shared(skb))) {
-
-		/* Make sure data is writable */
-		if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) {
-			PKT_ERROR(pkt, "cow failed\n");
-			return -EPROTO;
-		}
-	}
-
-	/* All set to put the last SKB and optionally write data there. */
-	to = pskb_put(skb, lastskb, len);
-	if (likely(data))
-		memcpy(to, data, len);
-	return 0;
-}
-
-inline int cfpkt_addbdy(struct cfpkt *pkt, u8 data)
-{
-	return cfpkt_add_body(pkt, &data, 1);
-}
-
-int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
-{
-	struct sk_buff *skb = pkt_to_skb(pkt);
-	struct sk_buff *lastskb;
-	u8 *to;
-	const u8 *data = data2;
-	int ret;
-	if (unlikely(is_erronous(pkt)))
-		return -EPROTO;
-	if (unlikely(skb_headroom(skb) < len)) {
-		PKT_ERROR(pkt, "no headroom\n");
-		return -EPROTO;
-	}
-
-	/* Make sure data is writable */
-	ret = skb_cow_data(skb, 0, &lastskb);
-	if (unlikely(ret < 0)) {
-		PKT_ERROR(pkt, "cow failed\n");
-		return ret;
-	}
-
-	to = skb_push(skb, len);
-	memcpy(to, data, len);
-	return 0;
-}
-EXPORT_SYMBOL(cfpkt_add_head);
-
-inline int cfpkt_add_trail(struct cfpkt *pkt, const void *data, u16 len)
-{
-	return cfpkt_add_body(pkt, data, len);
-}
-
-inline u16 cfpkt_getlen(struct cfpkt *pkt)
-{
-	struct sk_buff *skb = pkt_to_skb(pkt);
-	return skb->len;
-}
-
-int cfpkt_iterate(struct cfpkt *pkt,
-		  u16 (*iter_func)(u16, void *, u16),
-		  u16 data)
-{
-	/*
-	 * Don't care about the performance hit of linearizing,
-	 * Checksum should not be used on high-speed interfaces anyway.
-	 */
-	if (unlikely(is_erronous(pkt)))
-		return -EPROTO;
-	if (unlikely(skb_linearize(&pkt->skb) != 0)) {
-		PKT_ERROR(pkt, "linearize failed\n");
-		return -EPROTO;
-	}
-	return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt));
-}
-
-int cfpkt_setlen(struct cfpkt *pkt, u16 len)
-{
-	struct sk_buff *skb = pkt_to_skb(pkt);
-
-
-	if (unlikely(is_erronous(pkt)))
-		return -EPROTO;
-
-	if (likely(len <= skb->len)) {
-		if (unlikely(skb->data_len))
-			___pskb_trim(skb, len);
-		else
-			skb_trim(skb, len);
-
-		return cfpkt_getlen(pkt);
-	}
-
-	/* Need to expand SKB */
-	if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len)))
-		PKT_ERROR(pkt, "skb_pad_trail failed\n");
-
-	return cfpkt_getlen(pkt);
-}
-
-struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
-			   struct cfpkt *addpkt,
-			   u16 expectlen)
-{
-	struct sk_buff *dst = pkt_to_skb(dstpkt);
-	struct sk_buff *add = pkt_to_skb(addpkt);
-	u16 addlen = skb_headlen(add);
-	u16 neededtailspace;
-	struct sk_buff *tmp;
-	u16 dstlen;
-	u16 createlen;
-	if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) {
-		return dstpkt;
-	}
-
-	neededtailspace = max(expectlen, addlen);
-
-	if (dst->tail + neededtailspace > dst->end) {
-		/* Create a dumplicate of 'dst' with more tail space */
-		struct cfpkt *tmppkt;
-		dstlen = skb_headlen(dst);
-		createlen = dstlen + neededtailspace;
-		tmppkt = cfpkt_create(createlen + PKT_PREFIX + PKT_POSTFIX);
-		if (tmppkt == NULL)
-			return NULL;
-		tmp = pkt_to_skb(tmppkt);
-		skb_put_data(tmp, dst->data, dstlen);
-		cfpkt_destroy(dstpkt);
-		dst = tmp;
-	}
-	skb_put_data(dst, add->data, skb_headlen(add));
-	cfpkt_destroy(addpkt);
-	return skb_to_pkt(dst);
-}
-
-struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
-{
-	struct sk_buff *skb2;
-	struct sk_buff *skb = pkt_to_skb(pkt);
-	struct cfpkt *tmppkt;
-	u8 *split = skb->data + pos;
-	u16 len2nd = skb_tail_pointer(skb) - split;
-
-	if (unlikely(is_erronous(pkt)))
-		return NULL;
-
-	if (skb->data + pos > skb_tail_pointer(skb)) {
-		PKT_ERROR(pkt, "trying to split beyond end of packet\n");
-		return NULL;
-	}
-
-	/* Create a new packet for the second part of the data */
-	tmppkt = cfpkt_create_pfx(len2nd + PKT_PREFIX + PKT_POSTFIX,
-				  PKT_PREFIX);
-	if (tmppkt == NULL)
-		return NULL;
-	skb2 = pkt_to_skb(tmppkt);
-
-
-	if (skb2 == NULL)
-		return NULL;
-
-	skb_put_data(skb2, split, len2nd);
-
-	/* Reduce the length of the original packet */
-	skb_trim(skb, pos);
-
-	skb2->priority = skb->priority;
-	return skb_to_pkt(skb2);
-}
-
-bool cfpkt_erroneous(struct cfpkt *pkt)
-{
-	return cfpkt_priv(pkt)->erronous;
-}
-
-struct caif_payload_info *cfpkt_info(struct cfpkt *pkt)
-{
-	return (struct caif_payload_info *)&pkt_to_skb(pkt)->cb;
-}
-EXPORT_SYMBOL(cfpkt_info);
-
-void cfpkt_set_prio(struct cfpkt *pkt, int prio)
-{
-	pkt_to_skb(pkt)->priority = prio;
-}
-EXPORT_SYMBOL(cfpkt_set_prio);
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
deleted file mode 100644
index 93732ebbd1e2..000000000000
--- a/net/caif/cfrfml.c
+++ /dev/null
@@ -1,299 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/stddef.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/unaligned.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfsrvl.h>
-#include <net/caif/cfpkt.h>
-
-#define container_obj(layr) container_of(layr, struct cfrfml, serv.layer)
-#define RFM_SEGMENTATION_BIT 0x01
-#define RFM_HEAD_SIZE 7
-
-static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt);
-static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt);
-
-struct cfrfml {
-	struct cfsrvl serv;
-	struct cfpkt *incomplete_frm;
-	int fragment_size;
-	u8  seghead[6];
-	u16 pdu_size;
-	/* Protects serialized processing of packets */
-	spinlock_t sync;
-};
-
-static void cfrfml_release(struct cflayer *layer)
-{
-	struct cfsrvl *srvl = container_of(layer, struct cfsrvl, layer);
-	struct cfrfml *rfml = container_obj(&srvl->layer);
-
-	if (rfml->incomplete_frm)
-		cfpkt_destroy(rfml->incomplete_frm);
-
-	kfree(srvl);
-}
-
-struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
-			      int mtu_size)
-{
-	int tmp;
-	struct cfrfml *this = kzalloc_obj(struct cfrfml, GFP_ATOMIC);
-
-	if (!this)
-		return NULL;
-
-	cfsrvl_init(&this->serv, channel_id, dev_info, false);
-	this->serv.release = cfrfml_release;
-	this->serv.layer.receive = cfrfml_receive;
-	this->serv.layer.transmit = cfrfml_transmit;
-
-	/* Round down to closest multiple of 16 */
-	tmp = (mtu_size - RFM_HEAD_SIZE - 6) / 16;
-	tmp *= 16;
-
-	this->fragment_size = tmp;
-	spin_lock_init(&this->sync);
-	snprintf(this->serv.layer.name, CAIF_LAYER_NAME_SZ,
-		"rfm%d", channel_id);
-
-	return &this->serv.layer;
-}
-
-static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead,
-				struct cfpkt *pkt, int *err)
-{
-	struct cfpkt *tmppkt;
-	*err = -EPROTO;
-	/* n-th but not last segment */
-
-	if (cfpkt_extr_head(pkt, seghead, 6) < 0)
-		return NULL;
-
-	/* Verify correct header */
-	if (memcmp(seghead, rfml->seghead, 6) != 0)
-		return NULL;
-
-	tmppkt = cfpkt_append(rfml->incomplete_frm, pkt,
-			rfml->pdu_size + RFM_HEAD_SIZE);
-
-	/* If cfpkt_append failes input pkts are not freed */
-	*err = -ENOMEM;
-	if (tmppkt == NULL)
-		return NULL;
-
-	*err = 0;
-	return tmppkt;
-}
-
-static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
-{
-	u8 tmp;
-	bool segmented;
-	int err;
-	u8 seghead[6];
-	struct cfrfml *rfml;
-	struct cfpkt *tmppkt = NULL;
-
-	caif_assert(layr->up != NULL);
-	caif_assert(layr->receive != NULL);
-	rfml = container_obj(layr);
-	spin_lock(&rfml->sync);
-
-	err = -EPROTO;
-	if (cfpkt_extr_head(pkt, &tmp, 1) < 0)
-		goto out;
-	segmented = tmp & RFM_SEGMENTATION_BIT;
-
-	if (segmented) {
-		if (rfml->incomplete_frm == NULL) {
-			/* Initial Segment */
-			if (cfpkt_peek_head(pkt, rfml->seghead, 6) != 0)
-				goto out;
-
-			rfml->pdu_size = get_unaligned_le16(rfml->seghead+4);
-
-			if (cfpkt_erroneous(pkt))
-				goto out;
-			rfml->incomplete_frm = pkt;
-			pkt = NULL;
-		} else {
-
-			tmppkt = rfm_append(rfml, seghead, pkt, &err);
-			if (tmppkt == NULL)
-				goto out;
-
-			if (cfpkt_erroneous(tmppkt))
-				goto out;
-
-			rfml->incomplete_frm = tmppkt;
-
-
-			if (cfpkt_erroneous(tmppkt))
-				goto out;
-		}
-		err = 0;
-		goto out;
-	}
-
-	if (rfml->incomplete_frm) {
-
-		/* Last Segment */
-		tmppkt = rfm_append(rfml, seghead, pkt, &err);
-		if (tmppkt == NULL)
-			goto out;
-
-		if (cfpkt_erroneous(tmppkt))
-			goto out;
-
-		rfml->incomplete_frm = NULL;
-		pkt = tmppkt;
-		tmppkt = NULL;
-
-		/* Verify that length is correct */
-		err = -EPROTO;
-		if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1)
-			goto out;
-	}
-
-	err = rfml->serv.layer.up->receive(rfml->serv.layer.up, pkt);
-
-out:
-
-	if (err != 0) {
-		if (tmppkt)
-			cfpkt_destroy(tmppkt);
-		if (pkt)
-			cfpkt_destroy(pkt);
-		if (rfml->incomplete_frm)
-			cfpkt_destroy(rfml->incomplete_frm);
-		rfml->incomplete_frm = NULL;
-
-		pr_info("Connection error %d triggered on RFM link\n", err);
-
-		/* Trigger connection error upon failure.*/
-		layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
-					rfml->serv.dev_info.id);
-	}
-	spin_unlock(&rfml->sync);
-
-	if (unlikely(err == -EAGAIN))
-		/* It is not possible to recover after drop of a fragment */
-		err = -EIO;
-
-	return err;
-}
-
-
-static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
-{
-	caif_assert(cfpkt_getlen(pkt) < rfml->fragment_size + RFM_HEAD_SIZE);
-
-	/* Add info for MUX-layer to route the packet out. */
-	cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
-
-	/*
-	 * To optimize alignment, we add up the size of CAIF header before
-	 * payload.
-	 */
-	cfpkt_info(pkt)->hdr_len = RFM_HEAD_SIZE;
-	cfpkt_info(pkt)->dev_info = &rfml->serv.dev_info;
-
-	return rfml->serv.layer.dn->transmit(rfml->serv.layer.dn, pkt);
-}
-
-static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
-{
-	int err;
-	u8 seg;
-	u8 head[6];
-	struct cfpkt *rearpkt = NULL;
-	struct cfpkt *frontpkt = pkt;
-	struct cfrfml *rfml = container_obj(layr);
-
-	caif_assert(layr->dn != NULL);
-	caif_assert(layr->dn->transmit != NULL);
-
-	if (!cfsrvl_ready(&rfml->serv, &err))
-		goto out;
-
-	err = -EPROTO;
-	if (cfpkt_getlen(pkt) <= RFM_HEAD_SIZE-1)
-		goto out;
-
-	err = 0;
-	if (cfpkt_getlen(pkt) > rfml->fragment_size + RFM_HEAD_SIZE)
-		err = cfpkt_peek_head(pkt, head, 6);
-
-	if (err != 0)
-		goto out;
-
-	while (cfpkt_getlen(frontpkt) > rfml->fragment_size + RFM_HEAD_SIZE) {
-
-		seg = 1;
-		err = -EPROTO;
-
-		if (cfpkt_add_head(frontpkt, &seg, 1) < 0)
-			goto out;
-		/*
-		 * On OOM error cfpkt_split returns NULL.
-		 *
-		 * NOTE: Segmented pdu is not correctly aligned.
-		 * This has negative performance impact.
-		 */
-
-		rearpkt = cfpkt_split(frontpkt, rfml->fragment_size);
-		if (rearpkt == NULL)
-			goto out;
-
-		err = cfrfml_transmit_segment(rfml, frontpkt);
-
-		if (err != 0) {
-			frontpkt = NULL;
-			goto out;
-		}
-
-		frontpkt = rearpkt;
-		rearpkt = NULL;
-
-		err = -EPROTO;
-		if (cfpkt_add_head(frontpkt, head, 6) < 0)
-			goto out;
-
-	}
-
-	seg = 0;
-	err = -EPROTO;
-
-	if (cfpkt_add_head(frontpkt, &seg, 1) < 0)
-		goto out;
-
-	err = cfrfml_transmit_segment(rfml, frontpkt);
-
-	frontpkt = NULL;
-out:
-
-	if (err != 0) {
-		pr_info("Connection error %d triggered on RFM link\n", err);
-		/* Trigger connection error upon failure.*/
-
-		layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
-					rfml->serv.dev_info.id);
-
-		if (rearpkt)
-			cfpkt_destroy(rearpkt);
-
-		if (frontpkt)
-			cfpkt_destroy(frontpkt);
-	}
-
-	return err;
-}
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
deleted file mode 100644
index faf78fb754e2..000000000000
--- a/net/caif/cfserl.c
+++ /dev/null
@@ -1,192 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/stddef.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfpkt.h>
-#include <net/caif/cfserl.h>
-
-#define container_obj(layr) ((struct cfserl *) layr)
-
-#define CFSERL_STX 0x02
-#define SERIAL_MINIUM_PACKET_SIZE 4
-#define SERIAL_MAX_FRAMESIZE 4096
-struct cfserl {
-	struct cflayer layer;
-	struct cfpkt *incomplete_frm;
-	/* Protects parallel processing of incoming packets */
-	spinlock_t sync;
-	bool usestx;
-};
-
-static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
-static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
-static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-			   int phyid);
-
-void cfserl_release(struct cflayer *layer)
-{
-	kfree(layer);
-}
-
-struct cflayer *cfserl_create(int instance, bool use_stx)
-{
-	struct cfserl *this = kzalloc_obj(struct cfserl, GFP_ATOMIC);
-	if (!this)
-		return NULL;
-	caif_assert(offsetof(struct cfserl, layer) == 0);
-	this->layer.receive = cfserl_receive;
-	this->layer.transmit = cfserl_transmit;
-	this->layer.ctrlcmd = cfserl_ctrlcmd;
-	this->usestx = use_stx;
-	spin_lock_init(&this->sync);
-	snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "ser1");
-	return &this->layer;
-}
-
-static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
-{
-	struct cfserl *layr = container_obj(l);
-	u16 pkt_len;
-	struct cfpkt *pkt = NULL;
-	struct cfpkt *tail_pkt = NULL;
-	u8 tmp8;
-	u16 tmp;
-	u8 stx = CFSERL_STX;
-	int ret;
-	u16 expectlen = 0;
-
-	caif_assert(newpkt != NULL);
-	spin_lock(&layr->sync);
-
-	if (layr->incomplete_frm != NULL) {
-		layr->incomplete_frm =
-		    cfpkt_append(layr->incomplete_frm, newpkt, expectlen);
-		pkt = layr->incomplete_frm;
-		if (pkt == NULL) {
-			spin_unlock(&layr->sync);
-			return -ENOMEM;
-		}
-	} else {
-		pkt = newpkt;
-	}
-	layr->incomplete_frm = NULL;
-
-	do {
-		/* Search for STX at start of pkt if STX is used */
-		if (layr->usestx) {
-			cfpkt_extr_head(pkt, &tmp8, 1);
-			if (tmp8 != CFSERL_STX) {
-				while (cfpkt_more(pkt)
-				       && tmp8 != CFSERL_STX) {
-					cfpkt_extr_head(pkt, &tmp8, 1);
-				}
-				if (!cfpkt_more(pkt)) {
-					cfpkt_destroy(pkt);
-					layr->incomplete_frm = NULL;
-					spin_unlock(&layr->sync);
-					return -EPROTO;
-				}
-			}
-		}
-
-		pkt_len = cfpkt_getlen(pkt);
-
-		/*
-		 *  pkt_len is the accumulated length of the packet data
-		 *  we have received so far.
-		 *  Exit if frame doesn't hold length.
-		 */
-
-		if (pkt_len < 2) {
-			if (layr->usestx)
-				cfpkt_add_head(pkt, &stx, 1);
-			layr->incomplete_frm = pkt;
-			spin_unlock(&layr->sync);
-			return 0;
-		}
-
-		/*
-		 *  Find length of frame.
-		 *  expectlen is the length we need for a full frame.
-		 */
-		cfpkt_peek_head(pkt, &tmp, 2);
-		expectlen = le16_to_cpu(tmp) + 2;
-		/*
-		 * Frame error handling
-		 */
-		if (expectlen < SERIAL_MINIUM_PACKET_SIZE
-		    || expectlen > SERIAL_MAX_FRAMESIZE) {
-			if (!layr->usestx) {
-				if (pkt != NULL)
-					cfpkt_destroy(pkt);
-				layr->incomplete_frm = NULL;
-				spin_unlock(&layr->sync);
-				return -EPROTO;
-			}
-			continue;
-		}
-
-		if (pkt_len < expectlen) {
-			/* Too little received data */
-			if (layr->usestx)
-				cfpkt_add_head(pkt, &stx, 1);
-			layr->incomplete_frm = pkt;
-			spin_unlock(&layr->sync);
-			return 0;
-		}
-
-		/*
-		 * Enough data for at least one frame.
-		 * Split the frame, if too long
-		 */
-		if (pkt_len > expectlen)
-			tail_pkt = cfpkt_split(pkt, expectlen);
-		else
-			tail_pkt = NULL;
-
-		/* Send the first part of packet upwards.*/
-		spin_unlock(&layr->sync);
-		ret = layr->layer.up->receive(layr->layer.up, pkt);
-		spin_lock(&layr->sync);
-		if (ret == -EILSEQ) {
-			if (layr->usestx) {
-				if (tail_pkt != NULL)
-					pkt = cfpkt_append(pkt, tail_pkt, 0);
-				/* Start search for next STX if frame failed */
-				continue;
-			} else {
-				cfpkt_destroy(pkt);
-				pkt = NULL;
-			}
-		}
-
-		pkt = tail_pkt;
-
-	} while (pkt != NULL);
-
-	spin_unlock(&layr->sync);
-	return 0;
-}
-
-static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt)
-{
-	struct cfserl *layr = container_obj(layer);
-	u8 tmp8 = CFSERL_STX;
-	if (layr->usestx)
-		cfpkt_add_head(newpkt, &tmp8, 1);
-	return layer->dn->transmit(layer->dn, newpkt);
-}
-
-static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-			   int phyid)
-{
-	layr->up->ctrlcmd(layr->up, ctrl, phyid);
-}
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
deleted file mode 100644
index d687fd0b4ed3..000000000000
--- a/net/caif/cfsrvl.c
+++ /dev/null
@@ -1,224 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/pkt_sched.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfsrvl.h>
-#include <net/caif/cfpkt.h>
-#include <net/caif/caif_dev.h>
-
-#define SRVL_CTRL_PKT_SIZE 1
-#define SRVL_FLOW_OFF 0x81
-#define SRVL_FLOW_ON  0x80
-#define SRVL_SET_PIN  0x82
-
-#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
-
-static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-			    int phyid)
-{
-	struct cfsrvl *service = container_obj(layr);
-
-	if (layr->up == NULL || layr->up->ctrlcmd == NULL)
-		return;
-
-	switch (ctrl) {
-	case CAIF_CTRLCMD_INIT_RSP:
-		service->open = true;
-		layr->up->ctrlcmd(layr->up, ctrl, phyid);
-		break;
-	case CAIF_CTRLCMD_DEINIT_RSP:
-	case CAIF_CTRLCMD_INIT_FAIL_RSP:
-		service->open = false;
-		layr->up->ctrlcmd(layr->up, ctrl, phyid);
-		break;
-	case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
-		if (phyid != service->dev_info.id)
-			break;
-		if (service->modem_flow_on)
-			layr->up->ctrlcmd(layr->up,
-					  CAIF_CTRLCMD_FLOW_OFF_IND, phyid);
-		service->phy_flow_on = false;
-		break;
-	case _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND:
-		if (phyid != service->dev_info.id)
-			return;
-		if (service->modem_flow_on) {
-			layr->up->ctrlcmd(layr->up,
-					   CAIF_CTRLCMD_FLOW_ON_IND,
-					   phyid);
-		}
-		service->phy_flow_on = true;
-		break;
-	case CAIF_CTRLCMD_FLOW_OFF_IND:
-		if (service->phy_flow_on) {
-			layr->up->ctrlcmd(layr->up,
-					  CAIF_CTRLCMD_FLOW_OFF_IND, phyid);
-		}
-		service->modem_flow_on = false;
-		break;
-	case CAIF_CTRLCMD_FLOW_ON_IND:
-		if (service->phy_flow_on) {
-			layr->up->ctrlcmd(layr->up,
-					  CAIF_CTRLCMD_FLOW_ON_IND, phyid);
-		}
-		service->modem_flow_on = true;
-		break;
-	case _CAIF_CTRLCMD_PHYIF_DOWN_IND:
-		/* In case interface is down, let's fake a remove shutdown */
-		layr->up->ctrlcmd(layr->up,
-				CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, phyid);
-		break;
-	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
-		layr->up->ctrlcmd(layr->up, ctrl, phyid);
-		break;
-	default:
-		pr_warn("Unexpected ctrl in cfsrvl (%d)\n", ctrl);
-		/* We have both modem and phy flow on, send flow on */
-		layr->up->ctrlcmd(layr->up, ctrl, phyid);
-		service->phy_flow_on = true;
-		break;
-	}
-}
-
-static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
-{
-	struct cfsrvl *service = container_obj(layr);
-
-	caif_assert(layr != NULL);
-	caif_assert(layr->dn != NULL);
-	caif_assert(layr->dn->transmit != NULL);
-
-	if (!service->supports_flowctrl)
-		return 0;
-
-	switch (ctrl) {
-	case CAIF_MODEMCMD_FLOW_ON_REQ:
-		{
-			struct cfpkt *pkt;
-			struct caif_payload_info *info;
-			u8 flow_on = SRVL_FLOW_ON;
-			pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
-			if (!pkt)
-				return -ENOMEM;
-
-			if (cfpkt_add_head(pkt, &flow_on, 1) < 0) {
-				pr_err("Packet is erroneous!\n");
-				cfpkt_destroy(pkt);
-				return -EPROTO;
-			}
-			info = cfpkt_info(pkt);
-			info->channel_id = service->layer.id;
-			info->hdr_len = 1;
-			info->dev_info = &service->dev_info;
-			cfpkt_set_prio(pkt, TC_PRIO_CONTROL);
-			return layr->dn->transmit(layr->dn, pkt);
-		}
-	case CAIF_MODEMCMD_FLOW_OFF_REQ:
-		{
-			struct cfpkt *pkt;
-			struct caif_payload_info *info;
-			u8 flow_off = SRVL_FLOW_OFF;
-			pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
-			if (!pkt)
-				return -ENOMEM;
-
-			if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
-				pr_err("Packet is erroneous!\n");
-				cfpkt_destroy(pkt);
-				return -EPROTO;
-			}
-			info = cfpkt_info(pkt);
-			info->channel_id = service->layer.id;
-			info->hdr_len = 1;
-			info->dev_info = &service->dev_info;
-			cfpkt_set_prio(pkt, TC_PRIO_CONTROL);
-			return layr->dn->transmit(layr->dn, pkt);
-		}
-	default:
-	  break;
-	}
-	return -EINVAL;
-}
-
-static void cfsrvl_release(struct cflayer *layer)
-{
-	struct cfsrvl *service = container_of(layer, struct cfsrvl, layer);
-	kfree(service);
-}
-
-void cfsrvl_init(struct cfsrvl *service,
-		 u8 channel_id,
-		 struct dev_info *dev_info,
-		 bool supports_flowctrl)
-{
-	caif_assert(offsetof(struct cfsrvl, layer) == 0);
-	service->open = false;
-	service->modem_flow_on = true;
-	service->phy_flow_on = true;
-	service->layer.id = channel_id;
-	service->layer.ctrlcmd = cfservl_ctrlcmd;
-	service->layer.modemcmd = cfservl_modemcmd;
-	service->dev_info = *dev_info;
-	service->supports_flowctrl = supports_flowctrl;
-	service->release = cfsrvl_release;
-}
-
-bool cfsrvl_ready(struct cfsrvl *service, int *err)
-{
-	if (!service->open) {
-		*err = -ENOTCONN;
-		return false;
-	}
-	return true;
-}
-
-bool cfsrvl_phyid_match(struct cflayer *layer, int phyid)
-{
-	struct cfsrvl *servl = container_obj(layer);
-	return servl->dev_info.id == phyid;
-}
-
-void caif_free_client(struct cflayer *adap_layer)
-{
-	struct cflayer *serv_layer;
-	struct cfsrvl *servl;
-
-	if (!adap_layer)
-		return;
-
-	serv_layer = adap_layer->dn;
-	if (!serv_layer)
-		return;
-
-	layer_set_dn(adap_layer, NULL);
-	layer_set_up(serv_layer, NULL);
-
-	servl = container_obj(serv_layer);
-	servl->release(&servl->layer);
-}
-EXPORT_SYMBOL(caif_free_client);
-
-void caif_client_register_refcnt(struct cflayer *adapt_layer,
-				 void (*hold)(struct cflayer *lyr),
-				 void (*put)(struct cflayer *lyr))
-{
-	struct cfsrvl *service;
-
-	if (WARN_ON(adapt_layer == NULL || adapt_layer->dn == NULL))
-		return;
-	service = container_of(adapt_layer->dn, struct cfsrvl, layer);
-	service->hold = hold;
-	service->put = put;
-}
-EXPORT_SYMBOL(caif_client_register_refcnt);
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
deleted file mode 100644
index 5111090bb2c0..000000000000
--- a/net/caif/cfutill.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfsrvl.h>
-#include <net/caif/cfpkt.h>
-
-#define container_obj(layr) ((struct cfsrvl *) layr)
-#define UTIL_PAYLOAD  0x00
-#define UTIL_CMD_BIT  0x80
-#define UTIL_REMOTE_SHUTDOWN 0x82
-#define UTIL_FLOW_OFF 0x81
-#define UTIL_FLOW_ON  0x80
-
-static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt);
-static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt);
-
-struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
-{
-	struct cfsrvl *util = kzalloc_obj(struct cfsrvl, GFP_ATOMIC);
-	if (!util)
-		return NULL;
-	caif_assert(offsetof(struct cfsrvl, layer) == 0);
-	cfsrvl_init(util, channel_id, dev_info, true);
-	util->layer.receive = cfutill_receive;
-	util->layer.transmit = cfutill_transmit;
-	snprintf(util->layer.name, CAIF_LAYER_NAME_SZ, "util1");
-	return &util->layer;
-}
-
-static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
-{
-	u8 cmd = -1;
-	struct cfsrvl *service = container_obj(layr);
-	caif_assert(layr != NULL);
-	caif_assert(layr->up != NULL);
-	caif_assert(layr->up->receive != NULL);
-	caif_assert(layr->up->ctrlcmd != NULL);
-	if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
-		pr_err("Packet is erroneous!\n");
-		cfpkt_destroy(pkt);
-		return -EPROTO;
-	}
-
-	switch (cmd) {
-	case UTIL_PAYLOAD:
-		return layr->up->receive(layr->up, pkt);
-	case UTIL_FLOW_OFF:
-		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
-		cfpkt_destroy(pkt);
-		return 0;
-	case UTIL_FLOW_ON:
-		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
-		cfpkt_destroy(pkt);
-		return 0;
-	case UTIL_REMOTE_SHUTDOWN:	/* Remote Shutdown Request */
-		pr_err("REMOTE SHUTDOWN REQUEST RECEIVED\n");
-		layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0);
-		service->open = false;
-		cfpkt_destroy(pkt);
-		return 0;
-	default:
-		cfpkt_destroy(pkt);
-		pr_warn("Unknown service control %d (0x%x)\n", cmd, cmd);
-		return -EPROTO;
-	}
-}
-
-static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt)
-{
-	u8 zero = 0;
-	struct caif_payload_info *info;
-	int ret;
-	struct cfsrvl *service = container_obj(layr);
-	caif_assert(layr != NULL);
-	caif_assert(layr->dn != NULL);
-	caif_assert(layr->dn->transmit != NULL);
-
-	if (!cfsrvl_ready(service, &ret)) {
-		cfpkt_destroy(pkt);
-		return ret;
-	}
-
-	cfpkt_add_head(pkt, &zero, 1);
-	/* Add info for MUX-layer to route the packet out. */
-	info = cfpkt_info(pkt);
-	info->channel_id = service->layer.id;
-	/*
-	 * To optimize alignment, we add up the size of CAIF header before
-	 * payload.
-	 */
-	info->hdr_len = 1;
-	info->dev_info = &service->dev_info;
-	return layr->dn->transmit(layr->dn, pkt);
-}
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
deleted file mode 100644
index 53f844c49bbb..000000000000
--- a/net/caif/cfveil.c
+++ /dev/null
@@ -1,101 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfsrvl.h>
-#include <net/caif/cfpkt.h>
-
-#define VEI_PAYLOAD  0x00
-#define VEI_CMD_BIT  0x80
-#define VEI_FLOW_OFF 0x81
-#define VEI_FLOW_ON  0x80
-#define VEI_SET_PIN  0x82
-
-#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
-
-static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt);
-static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt);
-
-struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
-{
-	struct cfsrvl *vei = kzalloc_obj(struct cfsrvl, GFP_ATOMIC);
-	if (!vei)
-		return NULL;
-	caif_assert(offsetof(struct cfsrvl, layer) == 0);
-	cfsrvl_init(vei, channel_id, dev_info, true);
-	vei->layer.receive = cfvei_receive;
-	vei->layer.transmit = cfvei_transmit;
-	snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ, "vei%d", channel_id);
-	return &vei->layer;
-}
-
-static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
-{
-	u8 cmd;
-	int ret;
-	caif_assert(layr->up != NULL);
-	caif_assert(layr->receive != NULL);
-	caif_assert(layr->ctrlcmd != NULL);
-
-
-	if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
-		pr_err("Packet is erroneous!\n");
-		cfpkt_destroy(pkt);
-		return -EPROTO;
-	}
-	switch (cmd) {
-	case VEI_PAYLOAD:
-		ret = layr->up->receive(layr->up, pkt);
-		return ret;
-	case VEI_FLOW_OFF:
-		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
-		cfpkt_destroy(pkt);
-		return 0;
-	case VEI_FLOW_ON:
-		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
-		cfpkt_destroy(pkt);
-		return 0;
-	case VEI_SET_PIN:	/* SET RS232 PIN */
-		cfpkt_destroy(pkt);
-		return 0;
-	default:		/* SET RS232 PIN */
-		pr_warn("Unknown VEI control packet %d (0x%x)!\n", cmd, cmd);
-		cfpkt_destroy(pkt);
-		return -EPROTO;
-	}
-}
-
-static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
-{
-	u8 tmp = 0;
-	struct caif_payload_info *info;
-	int ret;
-	struct cfsrvl *service = container_obj(layr);
-	if (!cfsrvl_ready(service, &ret))
-		goto err;
-	caif_assert(layr->dn != NULL);
-	caif_assert(layr->dn->transmit != NULL);
-
-	if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
-		pr_err("Packet is erroneous!\n");
-		ret = -EPROTO;
-		goto err;
-	}
-
-	/* Add info-> for MUX-layer to route the packet out. */
-	info = cfpkt_info(pkt);
-	info->channel_id = service->layer.id;
-	info->hdr_len = 1;
-	info->dev_info = &service->dev_info;
-	return layr->dn->transmit(layr->dn, pkt);
-err:
-	cfpkt_destroy(pkt);
-	return ret;
-}
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
deleted file mode 100644
index 39e075b0a259..000000000000
--- a/net/caif/cfvidl.c
+++ /dev/null
@@ -1,65 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfsrvl.h>
-#include <net/caif/cfpkt.h>
-
-#define container_obj(layr) ((struct cfsrvl *) layr)
-
-static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt);
-static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt);
-
-struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
-{
-	struct cfsrvl *vid = kzalloc_obj(struct cfsrvl, GFP_ATOMIC);
-	if (!vid)
-		return NULL;
-	caif_assert(offsetof(struct cfsrvl, layer) == 0);
-
-	cfsrvl_init(vid, channel_id, dev_info, false);
-	vid->layer.receive = cfvidl_receive;
-	vid->layer.transmit = cfvidl_transmit;
-	snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ, "vid1");
-	return &vid->layer;
-}
-
-static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt)
-{
-	u32 videoheader;
-	if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) {
-		pr_err("Packet is erroneous!\n");
-		cfpkt_destroy(pkt);
-		return -EPROTO;
-	}
-	return layr->up->receive(layr->up, pkt);
-}
-
-static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt)
-{
-	struct cfsrvl *service = container_obj(layr);
-	struct caif_payload_info *info;
-	u32 videoheader = 0;
-	int ret;
-
-	if (!cfsrvl_ready(service, &ret)) {
-		cfpkt_destroy(pkt);
-		return ret;
-	}
-
-	cfpkt_add_head(pkt, &videoheader, 4);
-	/* Add info for MUX-layer to route the packet out */
-	info = cfpkt_info(pkt);
-	info->channel_id = service->layer.id;
-	info->dev_info = &service->dev_info;
-	return layr->dn->transmit(layr->dn, pkt);
-}
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
deleted file mode 100644
index fa6a3c2634a8..000000000000
--- a/net/caif/chnl_net.c
+++ /dev/null
@@ -1,531 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Authors:	Sjur Brendeland
- *		Daniel Martensson
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
-
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/sched.h>
-#include <linux/sockios.h>
-#include <linux/caif/if_caif.h>
-#include <net/rtnetlink.h>
-#include <net/caif/caif_layer.h>
-#include <net/caif/cfpkt.h>
-#include <net/caif/caif_dev.h>
-
-/* GPRS PDP connection has MTU to 1500 */
-#define GPRS_PDP_MTU 1500
-/* 5 sec. connect timeout */
-#define CONNECT_TIMEOUT (5 * HZ)
-#define CAIF_NET_DEFAULT_QUEUE_LEN 500
-#define UNDEF_CONNID 0xffffffff
-
-/*This list is protected by the rtnl lock. */
-static LIST_HEAD(chnl_net_list);
-
-MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol GPRS network device");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_RTNL_LINK("caif");
-
-enum caif_states {
-	CAIF_CONNECTED		= 1,
-	CAIF_CONNECTING,
-	CAIF_DISCONNECTED,
-	CAIF_SHUTDOWN
-};
-
-struct chnl_net {
-	struct cflayer chnl;
-	struct caif_connect_request conn_req;
-	struct list_head list_field;
-	struct net_device *netdev;
-	wait_queue_head_t netmgmt_wq;
-	/* Flow status to remember and control the transmission. */
-	bool flowenabled;
-	enum caif_states state;
-};
-
-static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
-{
-	struct sk_buff *skb;
-	struct chnl_net *priv;
-	int pktlen;
-	const u8 *ip_version;
-	u8 buf;
-
-	priv = container_of(layr, struct chnl_net, chnl);
-
-	skb = (struct sk_buff *) cfpkt_tonative(pkt);
-
-	/* Get length of CAIF packet. */
-	pktlen = skb->len;
-
-	/* Pass some minimum information and
-	 * send the packet to the net stack.
-	 */
-	skb->dev = priv->netdev;
-
-	/* check the version of IP */
-	ip_version = skb_header_pointer(skb, 0, 1, &buf);
-	if (!ip_version) {
-		kfree_skb(skb);
-		return -EINVAL;
-	}
-
-	switch (*ip_version >> 4) {
-	case 4:
-		skb->protocol = htons(ETH_P_IP);
-		break;
-	case 6:
-		skb->protocol = htons(ETH_P_IPV6);
-		break;
-	default:
-		kfree_skb(skb);
-		priv->netdev->stats.rx_errors++;
-		return -EINVAL;
-	}
-
-	/* If we change the header in loop mode, the checksum is corrupted. */
-	if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP)
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	else
-		skb->ip_summed = CHECKSUM_NONE;
-
-	netif_rx(skb);
-
-	/* Update statistics. */
-	priv->netdev->stats.rx_packets++;
-	priv->netdev->stats.rx_bytes += pktlen;
-
-	return 0;
-}
-
-static int delete_device(struct chnl_net *dev)
-{
-	ASSERT_RTNL();
-	if (dev->netdev)
-		unregister_netdevice(dev->netdev);
-	return 0;
-}
-
-static void close_work(struct work_struct *work)
-{
-	struct chnl_net *dev = NULL;
-	struct list_head *list_node;
-	struct list_head *_tmp;
-
-	rtnl_lock();
-	list_for_each_safe(list_node, _tmp, &chnl_net_list) {
-		dev = list_entry(list_node, struct chnl_net, list_field);
-		if (dev->state == CAIF_SHUTDOWN)
-			dev_close(dev->netdev);
-	}
-	rtnl_unlock();
-}
-static DECLARE_WORK(close_worker, close_work);
-
-static void chnl_hold(struct cflayer *lyr)
-{
-	struct chnl_net *priv = container_of(lyr, struct chnl_net, chnl);
-	dev_hold(priv->netdev);
-}
-
-static void chnl_put(struct cflayer *lyr)
-{
-	struct chnl_net *priv = container_of(lyr, struct chnl_net, chnl);
-	dev_put(priv->netdev);
-}
-
-static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
-			     int phyid)
-{
-	struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
-	pr_debug("NET flowctrl func called flow: %s\n",
-		flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
-		flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" :
-		flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
-		flow == CAIF_CTRLCMD_DEINIT_RSP ? "CLOSE/DEINIT" :
-		flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "OPEN_FAIL" :
-		flow == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ?
-		 "REMOTE_SHUTDOWN" : "UNKNOWN CTRL COMMAND");
-
-
-
-	switch (flow) {
-	case CAIF_CTRLCMD_FLOW_OFF_IND:
-		priv->flowenabled = false;
-		netif_stop_queue(priv->netdev);
-		break;
-	case CAIF_CTRLCMD_DEINIT_RSP:
-		priv->state = CAIF_DISCONNECTED;
-		break;
-	case CAIF_CTRLCMD_INIT_FAIL_RSP:
-		priv->state = CAIF_DISCONNECTED;
-		wake_up_interruptible(&priv->netmgmt_wq);
-		break;
-	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
-		priv->state = CAIF_SHUTDOWN;
-		netif_tx_disable(priv->netdev);
-		schedule_work(&close_worker);
-		break;
-	case CAIF_CTRLCMD_FLOW_ON_IND:
-		priv->flowenabled = true;
-		netif_wake_queue(priv->netdev);
-		break;
-	case CAIF_CTRLCMD_INIT_RSP:
-		caif_client_register_refcnt(&priv->chnl, chnl_hold, chnl_put);
-		priv->state = CAIF_CONNECTED;
-		priv->flowenabled = true;
-		netif_wake_queue(priv->netdev);
-		wake_up_interruptible(&priv->netmgmt_wq);
-		break;
-	default:
-		break;
-	}
-}
-
-static netdev_tx_t chnl_net_start_xmit(struct sk_buff *skb,
-				       struct net_device *dev)
-{
-	struct chnl_net *priv;
-	struct cfpkt *pkt = NULL;
-	int len;
-	int result = -1;
-	/* Get our private data. */
-	priv = netdev_priv(dev);
-
-	if (skb->len > priv->netdev->mtu) {
-		pr_warn("Size of skb exceeded MTU\n");
-		kfree_skb(skb);
-		dev->stats.tx_errors++;
-		return NETDEV_TX_OK;
-	}
-
-	if (!priv->flowenabled) {
-		pr_debug("dropping packets flow off\n");
-		kfree_skb(skb);
-		dev->stats.tx_dropped++;
-		return NETDEV_TX_OK;
-	}
-
-	if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP)
-		swap(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
-
-	/* Store original SKB length. */
-	len = skb->len;
-
-	pkt = cfpkt_fromnative(CAIF_DIR_OUT, (void *) skb);
-
-	/* Send the packet down the stack. */
-	result = priv->chnl.dn->transmit(priv->chnl.dn, pkt);
-	if (result) {
-		dev->stats.tx_dropped++;
-		return NETDEV_TX_OK;
-	}
-
-	/* Update statistics. */
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += len;
-
-	return NETDEV_TX_OK;
-}
-
-static int chnl_net_open(struct net_device *dev)
-{
-	struct chnl_net *priv = NULL;
-	int result = -1;
-	int llifindex, headroom, tailroom, mtu;
-	struct net_device *lldev;
-	ASSERT_RTNL();
-	priv = netdev_priv(dev);
-	if (!priv) {
-		pr_debug("chnl_net_open: no priv\n");
-		return -ENODEV;
-	}
-
-	if (priv->state != CAIF_CONNECTING) {
-		priv->state = CAIF_CONNECTING;
-		result = caif_connect_client(dev_net(dev), &priv->conn_req,
-						&priv->chnl, &llifindex,
-						&headroom, &tailroom);
-		if (result != 0) {
-				pr_debug("err: "
-					 "Unable to register and open device,"
-					 " Err:%d\n",
-					 result);
-				goto error;
-		}
-
-		lldev = __dev_get_by_index(dev_net(dev), llifindex);
-
-		if (lldev == NULL) {
-			pr_debug("no interface?\n");
-			result = -ENODEV;
-			goto error;
-		}
-
-		dev->needed_tailroom = tailroom + lldev->needed_tailroom;
-		dev->hard_header_len = headroom + lldev->hard_header_len +
-			lldev->needed_tailroom;
-
-		/*
-		 * MTU, head-room etc is not know before we have a
-		 * CAIF link layer device available. MTU calculation may
-		 * override initial RTNL configuration.
-		 * MTU is minimum of current mtu, link layer mtu pluss
-		 * CAIF head and tail, and PDP GPRS contexts max MTU.
-		 */
-		mtu = min_t(int, dev->mtu, lldev->mtu - (headroom + tailroom));
-		mtu = min_t(int, GPRS_PDP_MTU, mtu);
-		dev_set_mtu(dev, mtu);
-
-		if (mtu < 100) {
-			pr_warn("CAIF Interface MTU too small (%d)\n", mtu);
-			result = -ENODEV;
-			goto error;
-		}
-	}
-
-	rtnl_unlock();  /* Release RTNL lock during connect wait */
-
-	result = wait_event_interruptible_timeout(priv->netmgmt_wq,
-						priv->state != CAIF_CONNECTING,
-						CONNECT_TIMEOUT);
-
-	rtnl_lock();
-
-	if (result == -ERESTARTSYS) {
-		pr_debug("wait_event_interruptible woken by a signal\n");
-		result = -ERESTARTSYS;
-		goto error;
-	}
-
-	if (result == 0) {
-		pr_debug("connect timeout\n");
-		result = -ETIMEDOUT;
-		goto error;
-	}
-
-	if (priv->state != CAIF_CONNECTED) {
-		pr_debug("connect failed\n");
-		result = -ECONNREFUSED;
-		goto error;
-	}
-	pr_debug("CAIF Netdevice connected\n");
-	return 0;
-
-error:
-	caif_disconnect_client(dev_net(dev), &priv->chnl);
-	priv->state = CAIF_DISCONNECTED;
-	pr_debug("state disconnected\n");
-	return result;
-
-}
-
-static int chnl_net_stop(struct net_device *dev)
-{
-	struct chnl_net *priv;
-
-	ASSERT_RTNL();
-	priv = netdev_priv(dev);
-	priv->state = CAIF_DISCONNECTED;
-	caif_disconnect_client(dev_net(dev), &priv->chnl);
-	return 0;
-}
-
-static int chnl_net_init(struct net_device *dev)
-{
-	struct chnl_net *priv;
-	ASSERT_RTNL();
-	priv = netdev_priv(dev);
-	INIT_LIST_HEAD(&priv->list_field);
-	return 0;
-}
-
-static void chnl_net_uninit(struct net_device *dev)
-{
-	struct chnl_net *priv;
-	ASSERT_RTNL();
-	priv = netdev_priv(dev);
-	list_del_init(&priv->list_field);
-}
-
-static const struct net_device_ops netdev_ops = {
-	.ndo_open = chnl_net_open,
-	.ndo_stop = chnl_net_stop,
-	.ndo_init = chnl_net_init,
-	.ndo_uninit = chnl_net_uninit,
-	.ndo_start_xmit = chnl_net_start_xmit,
-};
-
-static void chnl_net_destructor(struct net_device *dev)
-{
-	struct chnl_net *priv = netdev_priv(dev);
-	caif_free_client(&priv->chnl);
-}
-
-static void ipcaif_net_setup(struct net_device *dev)
-{
-	struct chnl_net *priv;
-	dev->netdev_ops = &netdev_ops;
-	dev->needs_free_netdev = true;
-	dev->priv_destructor = chnl_net_destructor;
-	dev->flags |= IFF_NOARP;
-	dev->flags |= IFF_POINTOPOINT;
-	dev->mtu = GPRS_PDP_MTU;
-	dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN;
-
-	priv = netdev_priv(dev);
-	priv->chnl.receive = chnl_recv_cb;
-	priv->chnl.ctrlcmd = chnl_flowctrl_cb;
-	priv->netdev = dev;
-	priv->conn_req.protocol = CAIFPROTO_DATAGRAM;
-	priv->conn_req.link_selector = CAIF_LINK_HIGH_BANDW;
-	priv->conn_req.priority = CAIF_PRIO_LOW;
-	/* Insert illegal value */
-	priv->conn_req.sockaddr.u.dgm.connection_id = UNDEF_CONNID;
-	priv->flowenabled = false;
-
-	init_waitqueue_head(&priv->netmgmt_wq);
-}
-
-
-static int ipcaif_fill_info(struct sk_buff *skb, const struct net_device *dev)
-{
-	struct chnl_net *priv;
-	u8 loop;
-	priv = netdev_priv(dev);
-	if (nla_put_u32(skb, IFLA_CAIF_IPV4_CONNID,
-			priv->conn_req.sockaddr.u.dgm.connection_id) ||
-	    nla_put_u32(skb, IFLA_CAIF_IPV6_CONNID,
-			priv->conn_req.sockaddr.u.dgm.connection_id))
-		goto nla_put_failure;
-	loop = priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP;
-	if (nla_put_u8(skb, IFLA_CAIF_LOOPBACK, loop))
-		goto nla_put_failure;
-	return 0;
-nla_put_failure:
-	return -EMSGSIZE;
-
-}
-
-static void caif_netlink_parms(struct nlattr *data[],
-			       struct caif_connect_request *conn_req)
-{
-	if (!data) {
-		pr_warn("no params data found\n");
-		return;
-	}
-	if (data[IFLA_CAIF_IPV4_CONNID])
-		conn_req->sockaddr.u.dgm.connection_id =
-			nla_get_u32(data[IFLA_CAIF_IPV4_CONNID]);
-	if (data[IFLA_CAIF_IPV6_CONNID])
-		conn_req->sockaddr.u.dgm.connection_id =
-			nla_get_u32(data[IFLA_CAIF_IPV6_CONNID]);
-	if (data[IFLA_CAIF_LOOPBACK]) {
-		if (nla_get_u8(data[IFLA_CAIF_LOOPBACK]))
-			conn_req->protocol = CAIFPROTO_DATAGRAM_LOOP;
-		else
-			conn_req->protocol = CAIFPROTO_DATAGRAM;
-	}
-}
-
-static int ipcaif_newlink(struct net_device *dev,
-			  struct rtnl_newlink_params *params,
-			  struct netlink_ext_ack *extack)
-{
-	struct nlattr **data = params->data;
-	int ret;
-	struct chnl_net *caifdev;
-	ASSERT_RTNL();
-	caifdev = netdev_priv(dev);
-	caif_netlink_parms(data, &caifdev->conn_req);
-
-	ret = register_netdevice(dev);
-	if (ret)
-		pr_warn("device rtml registration failed\n");
-	else
-		list_add(&caifdev->list_field, &chnl_net_list);
-
-	/* Use ifindex as connection id, and use loopback channel default. */
-	if (caifdev->conn_req.sockaddr.u.dgm.connection_id == UNDEF_CONNID) {
-		caifdev->conn_req.sockaddr.u.dgm.connection_id = dev->ifindex;
-		caifdev->conn_req.protocol = CAIFPROTO_DATAGRAM_LOOP;
-	}
-	return ret;
-}
-
-static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[],
-			     struct nlattr *data[],
-			     struct netlink_ext_ack *extack)
-{
-	struct chnl_net *caifdev;
-	ASSERT_RTNL();
-	caifdev = netdev_priv(dev);
-	caif_netlink_parms(data, &caifdev->conn_req);
-	netdev_state_change(dev);
-	return 0;
-}
-
-static size_t ipcaif_get_size(const struct net_device *dev)
-{
-	return
-		/* IFLA_CAIF_IPV4_CONNID */
-		nla_total_size(4) +
-		/* IFLA_CAIF_IPV6_CONNID */
-		nla_total_size(4) +
-		/* IFLA_CAIF_LOOPBACK */
-		nla_total_size(2) +
-		0;
-}
-
-static const struct nla_policy ipcaif_policy[IFLA_CAIF_MAX + 1] = {
-	[IFLA_CAIF_IPV4_CONNID]	      = { .type = NLA_U32 },
-	[IFLA_CAIF_IPV6_CONNID]	      = { .type = NLA_U32 },
-	[IFLA_CAIF_LOOPBACK]	      = { .type = NLA_U8 }
-};
-
-
-static struct rtnl_link_ops ipcaif_link_ops __read_mostly = {
-	.kind		= "caif",
-	.priv_size	= sizeof(struct chnl_net),
-	.setup		= ipcaif_net_setup,
-	.maxtype	= IFLA_CAIF_MAX,
-	.policy		= ipcaif_policy,
-	.newlink	= ipcaif_newlink,
-	.changelink	= ipcaif_changelink,
-	.get_size	= ipcaif_get_size,
-	.fill_info	= ipcaif_fill_info,
-
-};
-
-static int __init chnl_init_module(void)
-{
-	return rtnl_link_register(&ipcaif_link_ops);
-}
-
-static void __exit chnl_exit_module(void)
-{
-	struct chnl_net *dev = NULL;
-	struct list_head *list_node;
-	struct list_head *_tmp;
-	rtnl_link_unregister(&ipcaif_link_ops);
-	rtnl_lock();
-	list_for_each_safe(list_node, _tmp, &chnl_net_list) {
-		dev = list_entry(list_node, struct chnl_net, list_field);
-		list_del_init(list_node);
-		delete_device(dev);
-	}
-	rtnl_unlock();
-}
-
-module_init(chnl_init_module);
-module_exit(chnl_exit_module);
-- 
cgit v1.2.3


From 4f10f1dfb235a28bd86cf0b00d86a59696ddbe5b Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 20 Apr 2026 19:21:07 -0700
Subject: net: remove ISDN subsystem and Bluetooth CMTP

Remove the ISDN (mISDN, CAPI) subsystem and Bluetooth CMTP protocol
from the kernel tree.

ISDN is a pretty old technology and it's unclear whether anyone still
uses it. I went over the last few years of git history and all the
commits are either tree-wide conversions or syzbot/static analyzer
fixes.

When we discussed removal in the past IIRC there were some concerns
about ISDN still being used in parts of Germany. Unfortunately, the
code base is quite old, none of the current maintainers are familiar
with it and AI tools will have a field day finding bugs here.

Delete this code and preserve it in an out-of-tree repository
for any remaining users:
https://github.com/linux-netdev/mod-orphan

UAPI constants AF_ISDN/PF_ISDN and the SELinux isdn_socket class
are preserved for ABI stability, but the rest of uAPI is removed.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20260421022108.1299678-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 CREDITS                                     |    5 +
 Documentation/isdn/credits.rst              |   73 -
 Documentation/isdn/index.rst                |   14 -
 Documentation/isdn/interface_capi.rst       |  336 --
 Documentation/isdn/m_isdn.rst               |    9 -
 Documentation/subsystem-apis.rst            |    1 -
 MAINTAINERS                                 |   19 -
 drivers/Kconfig                             |    2 -
 drivers/Makefile                            |    1 -
 drivers/isdn/Kconfig                        |   27 -
 drivers/isdn/Makefile                       |    8 -
 drivers/isdn/capi/Kconfig                   |   32 -
 drivers/isdn/capi/Makefile                  |    6 -
 drivers/isdn/capi/capi.c                    | 1435 -------
 drivers/isdn/capi/capiutil.c                |  677 ----
 drivers/isdn/capi/kcapi.c                   |  933 -----
 drivers/isdn/capi/kcapi.h                   |  182 -
 drivers/isdn/capi/kcapi_proc.c              |  231 --
 drivers/isdn/hardware/Makefile              |    6 -
 drivers/isdn/hardware/mISDN/Kconfig         |   98 -
 drivers/isdn/hardware/mISDN/Makefile        |   19 -
 drivers/isdn/hardware/mISDN/avmfritz.c      | 1164 ------
 drivers/isdn/hardware/mISDN/hfc_multi.h     | 1236 ------
 drivers/isdn/hardware/mISDN/hfc_multi_8xx.h |  167 -
 drivers/isdn/hardware/mISDN/hfc_pci.h       |  214 --
 drivers/isdn/hardware/mISDN/hfcmulti.c      | 5540 ---------------------------
 drivers/isdn/hardware/mISDN/hfcpci.c        | 2360 ------------
 drivers/isdn/hardware/mISDN/hfcsusb.c       | 2157 -----------
 drivers/isdn/hardware/mISDN/hfcsusb.h       |  425 --
 drivers/isdn/hardware/mISDN/iohelper.h      |   96 -
 drivers/isdn/hardware/mISDN/ipac.h          |  393 --
 drivers/isdn/hardware/mISDN/isar.h          |  256 --
 drivers/isdn/hardware/mISDN/isdnhdlc.c      |  617 ---
 drivers/isdn/hardware/mISDN/isdnhdlc.h      |   69 -
 drivers/isdn/hardware/mISDN/mISDNinfineon.c | 1168 ------
 drivers/isdn/hardware/mISDN/mISDNipac.c     | 1636 --------
 drivers/isdn/hardware/mISDN/mISDNisar.c     | 1694 --------
 drivers/isdn/hardware/mISDN/netjet.c        | 1154 ------
 drivers/isdn/hardware/mISDN/netjet.h        |   44 -
 drivers/isdn/hardware/mISDN/speedfax.c      |  520 ---
 drivers/isdn/hardware/mISDN/w6692.c         | 1417 -------
 drivers/isdn/hardware/mISDN/w6692.h         |  177 -
 drivers/isdn/mISDN/Kconfig                  |   48 -
 drivers/isdn/mISDN/Makefile                 |   14 -
 drivers/isdn/mISDN/clock.c                  |  197 -
 drivers/isdn/mISDN/core.c                   |  400 --
 drivers/isdn/mISDN/core.h                   |   69 -
 drivers/isdn/mISDN/dsp.h                    |  277 --
 drivers/isdn/mISDN/dsp_audio.c              |  421 --
 drivers/isdn/mISDN/dsp_biquad.h             |   51 -
 drivers/isdn/mISDN/dsp_blowfish.c           |  667 ----
 drivers/isdn/mISDN/dsp_cmx.c                | 1949 ----------
 drivers/isdn/mISDN/dsp_core.c               | 1227 ------
 drivers/isdn/mISDN/dsp_dtmf.c               |  313 --
 drivers/isdn/mISDN/dsp_ecdis.h              |   96 -
 drivers/isdn/mISDN/dsp_hwec.c               |  122 -
 drivers/isdn/mISDN/dsp_hwec.h               |   10 -
 drivers/isdn/mISDN/dsp_pipeline.c           |  300 --
 drivers/isdn/mISDN/dsp_tones.c              |  550 ---
 drivers/isdn/mISDN/fsm.c                    |  176 -
 drivers/isdn/mISDN/fsm.h                    |   58 -
 drivers/isdn/mISDN/hwchannel.c              |  516 ---
 drivers/isdn/mISDN/l1oip.h                  |   92 -
 drivers/isdn/mISDN/l1oip_codec.c            |  358 --
 drivers/isdn/mISDN/l1oip_core.c             | 1505 --------
 drivers/isdn/mISDN/layer1.c                 |  415 --
 drivers/isdn/mISDN/layer1.h                 |   16 -
 drivers/isdn/mISDN/layer2.c                 | 2266 -----------
 drivers/isdn/mISDN/layer2.h                 |  131 -
 drivers/isdn/mISDN/socket.c                 |  825 ----
 drivers/isdn/mISDN/stack.c                  |  654 ----
 drivers/isdn/mISDN/tei.c                    | 1416 -------
 drivers/isdn/mISDN/timerdev.c               |  295 --
 include/linux/isdn/capilli.h                |   95 -
 include/linux/isdn/capiutil.h               |   60 -
 include/linux/kernelcapi.h                  |   45 -
 include/linux/mISDNdsp.h                    |   40 -
 include/linux/mISDNhw.h                     |  192 -
 include/linux/mISDNif.h                     |  603 ---
 include/uapi/linux/capi.h                   |  134 -
 include/uapi/linux/isdn/capicmd.h           |  117 -
 include/uapi/linux/kernelcapi.h             |   48 -
 net/bluetooth/Kconfig                       |    3 -
 net/bluetooth/Makefile                      |    1 -
 net/bluetooth/cmtp/Kconfig                  |   12 -
 net/bluetooth/cmtp/Makefile                 |    8 -
 net/bluetooth/cmtp/capi.c                   |  579 ---
 net/bluetooth/cmtp/cmtp.h                   |  129 -
 net/bluetooth/cmtp/core.c                   |  519 ---
 net/bluetooth/cmtp/sock.c                   |  271 --
 90 files changed, 5 insertions(+), 44903 deletions(-)
 delete mode 100644 Documentation/isdn/credits.rst
 delete mode 100644 Documentation/isdn/index.rst
 delete mode 100644 Documentation/isdn/interface_capi.rst
 delete mode 100644 Documentation/isdn/m_isdn.rst
 delete mode 100644 drivers/isdn/Kconfig
 delete mode 100644 drivers/isdn/Makefile
 delete mode 100644 drivers/isdn/capi/Kconfig
 delete mode 100644 drivers/isdn/capi/Makefile
 delete mode 100644 drivers/isdn/capi/capi.c
 delete mode 100644 drivers/isdn/capi/capiutil.c
 delete mode 100644 drivers/isdn/capi/kcapi.c
 delete mode 100644 drivers/isdn/capi/kcapi.h
 delete mode 100644 drivers/isdn/capi/kcapi_proc.c
 delete mode 100644 drivers/isdn/hardware/Makefile
 delete mode 100644 drivers/isdn/hardware/mISDN/Kconfig
 delete mode 100644 drivers/isdn/hardware/mISDN/Makefile
 delete mode 100644 drivers/isdn/hardware/mISDN/avmfritz.c
 delete mode 100644 drivers/isdn/hardware/mISDN/hfc_multi.h
 delete mode 100644 drivers/isdn/hardware/mISDN/hfc_multi_8xx.h
 delete mode 100644 drivers/isdn/hardware/mISDN/hfc_pci.h
 delete mode 100644 drivers/isdn/hardware/mISDN/hfcmulti.c
 delete mode 100644 drivers/isdn/hardware/mISDN/hfcpci.c
 delete mode 100644 drivers/isdn/hardware/mISDN/hfcsusb.c
 delete mode 100644 drivers/isdn/hardware/mISDN/hfcsusb.h
 delete mode 100644 drivers/isdn/hardware/mISDN/iohelper.h
 delete mode 100644 drivers/isdn/hardware/mISDN/ipac.h
 delete mode 100644 drivers/isdn/hardware/mISDN/isar.h
 delete mode 100644 drivers/isdn/hardware/mISDN/isdnhdlc.c
 delete mode 100644 drivers/isdn/hardware/mISDN/isdnhdlc.h
 delete mode 100644 drivers/isdn/hardware/mISDN/mISDNinfineon.c
 delete mode 100644 drivers/isdn/hardware/mISDN/mISDNipac.c
 delete mode 100644 drivers/isdn/hardware/mISDN/mISDNisar.c
 delete mode 100644 drivers/isdn/hardware/mISDN/netjet.c
 delete mode 100644 drivers/isdn/hardware/mISDN/netjet.h
 delete mode 100644 drivers/isdn/hardware/mISDN/speedfax.c
 delete mode 100644 drivers/isdn/hardware/mISDN/w6692.c
 delete mode 100644 drivers/isdn/hardware/mISDN/w6692.h
 delete mode 100644 drivers/isdn/mISDN/Kconfig
 delete mode 100644 drivers/isdn/mISDN/Makefile
 delete mode 100644 drivers/isdn/mISDN/clock.c
 delete mode 100644 drivers/isdn/mISDN/core.c
 delete mode 100644 drivers/isdn/mISDN/core.h
 delete mode 100644 drivers/isdn/mISDN/dsp.h
 delete mode 100644 drivers/isdn/mISDN/dsp_audio.c
 delete mode 100644 drivers/isdn/mISDN/dsp_biquad.h
 delete mode 100644 drivers/isdn/mISDN/dsp_blowfish.c
 delete mode 100644 drivers/isdn/mISDN/dsp_cmx.c
 delete mode 100644 drivers/isdn/mISDN/dsp_core.c
 delete mode 100644 drivers/isdn/mISDN/dsp_dtmf.c
 delete mode 100644 drivers/isdn/mISDN/dsp_ecdis.h
 delete mode 100644 drivers/isdn/mISDN/dsp_hwec.c
 delete mode 100644 drivers/isdn/mISDN/dsp_hwec.h
 delete mode 100644 drivers/isdn/mISDN/dsp_pipeline.c
 delete mode 100644 drivers/isdn/mISDN/dsp_tones.c
 delete mode 100644 drivers/isdn/mISDN/fsm.c
 delete mode 100644 drivers/isdn/mISDN/fsm.h
 delete mode 100644 drivers/isdn/mISDN/hwchannel.c
 delete mode 100644 drivers/isdn/mISDN/l1oip.h
 delete mode 100644 drivers/isdn/mISDN/l1oip_codec.c
 delete mode 100644 drivers/isdn/mISDN/l1oip_core.c
 delete mode 100644 drivers/isdn/mISDN/layer1.c
 delete mode 100644 drivers/isdn/mISDN/layer1.h
 delete mode 100644 drivers/isdn/mISDN/layer2.c
 delete mode 100644 drivers/isdn/mISDN/layer2.h
 delete mode 100644 drivers/isdn/mISDN/socket.c
 delete mode 100644 drivers/isdn/mISDN/stack.c
 delete mode 100644 drivers/isdn/mISDN/tei.c
 delete mode 100644 drivers/isdn/mISDN/timerdev.c
 delete mode 100644 include/linux/isdn/capilli.h
 delete mode 100644 include/linux/isdn/capiutil.h
 delete mode 100644 include/linux/kernelcapi.h
 delete mode 100644 include/linux/mISDNdsp.h
 delete mode 100644 include/linux/mISDNhw.h
 delete mode 100644 include/linux/mISDNif.h
 delete mode 100644 include/uapi/linux/capi.h
 delete mode 100644 include/uapi/linux/isdn/capicmd.h
 delete mode 100644 include/uapi/linux/kernelcapi.h
 delete mode 100644 net/bluetooth/cmtp/Kconfig
 delete mode 100644 net/bluetooth/cmtp/Makefile
 delete mode 100644 net/bluetooth/cmtp/capi.c
 delete mode 100644 net/bluetooth/cmtp/cmtp.h
 delete mode 100644 net/bluetooth/cmtp/core.c
 delete mode 100644 net/bluetooth/cmtp/sock.c

(limited to 'include/linux')

diff --git a/CREDITS b/CREDITS
index a03b00452a1e..eeeece8ed868 100644
--- a/CREDITS
+++ b/CREDITS
@@ -3649,6 +3649,11 @@ S: Dag Hammerskjolds v. 3E
 S: S-226 64 LUND
 S: Sweden
 
+N: Tilman Schmidt
+E: tilman@imap.cc
+D: Siemens Gigaset ISDN driver author and maintainer
+D: ISDN CAPI subsystem contributions
+
 N: Henning P. Schmiedehausen
 E: hps@tanstaafl.de
 D: added PCI support to the serial driver
diff --git a/Documentation/isdn/credits.rst b/Documentation/isdn/credits.rst
deleted file mode 100644
index 319323f2091f..000000000000
--- a/Documentation/isdn/credits.rst
+++ /dev/null
@@ -1,73 +0,0 @@
-=======
-Credits
-=======
-
-
-I want to thank all who contributed to this project and especially to:
-(in alphabetical order)
-
-Thomas Bogendörfer (tsbogend@bigbug.franken.de)
-  Tester, lots of bugfixes and hints.
-
-Alan Cox (alan@lxorguk.ukuu.org.uk)
-  For help getting into standard-kernel.
-
-Henner Eisen (eis@baty.hanse.de)
-  For X.25 implementation.
-
-Volker Götz (volker@oops.franken.de)
-  For contribution of man-pages, the imontty-tool and a perfect
-  maintaining of the mailing-list at hub-wue.
-
-Matthias Hessler (hessler@isdn4linux.de)
-  For creating and maintaining the FAQ.
-
-Bernhard Hailer (Bernhard.Hailer@lrz.uni-muenchen.de)
-  For creating the FAQ, and the leafsite HOWTO.
-
-Michael 'Ghandi' Herold (michael@abadonna.franken.de)
-  For contribution of the vbox answering machine.
-
-Michael Hipp (Michael.Hipp@student.uni-tuebingen.de)
-  For his Sync-PPP-code.
-
-Karsten Keil (keil@isdn4linux.de)
-  For adding 1TR6-support to the Teles-driver.
-  For the HiSax-driver.
-
-Michael Knigge (knick@cove.han.de)
-  For contributing the imon-tool
-
-Andreas Kool (akool@Kool.f.EUnet.de)
-  For contribution of the isdnlog/isdnrep-tool
-
-Pedro Roque Marques (roque@di.fc.ul.pt)
-  For lot of new ideas and the pcbit driver.
-
-Eberhard Mönkeberg (emoenke@gwdg.de)
-  For testing and help to get into kernel.
-
-Thomas Neumann (tn@ruhr.de)
-  For help with Cisco-SLARP and keepalive
-
-Jan den Ouden (denouden@groovin.xs4all.nl)
-  For contribution of the original teles-driver
-
-Carsten Paeth (calle@calle.in-berlin.de)
-  For the AVM-B1-CAPI2.0 driver
-
-Thomas Pfeiffer (pfeiffer@pds.de)
-  For V.110, extended T.70 and Hylafax extensions in isdn_tty.c
-
-Max Riegel (riegel@max.franken.de)
-  For making the ICN hardware-documentation and test-equipment available.
-
-Armin Schindler (mac@melware.de)
-  For the eicon active card driver.
-
-Gerhard 'Fido' Schneider (fido@wuff.mayn.de)
-  For heavy-duty-beta-testing with his BBS ;)
-
-Thomas Uhl (uhl@think.de)
-  For distributing the cards.
-  For pushing me to work ;-)
diff --git a/Documentation/isdn/index.rst b/Documentation/isdn/index.rst
deleted file mode 100644
index d1125a16a746..000000000000
--- a/Documentation/isdn/index.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-====
-ISDN
-====
-
-.. toctree::
-   :maxdepth: 2
-
-   interface_capi
-
-   m_isdn
-
-   credits
diff --git a/Documentation/isdn/interface_capi.rst b/Documentation/isdn/interface_capi.rst
deleted file mode 100644
index 4d63b34b35cf..000000000000
--- a/Documentation/isdn/interface_capi.rst
+++ /dev/null
@@ -1,336 +0,0 @@
-=========================================
-Kernel CAPI Interface to Hardware Drivers
-=========================================
-
-1. Overview
-===========
-
-From the CAPI 2.0 specification:
-COMMON-ISDN-API (CAPI) is an application programming interface standard used
-to access ISDN equipment connected to basic rate interfaces (BRI) and primary
-rate interfaces (PRI).
-
-Kernel CAPI operates as a dispatching layer between CAPI applications and CAPI
-hardware drivers. Hardware drivers register ISDN devices (controllers, in CAPI
-lingo) with Kernel CAPI to indicate their readiness to provide their service
-to CAPI applications. CAPI applications also register with Kernel CAPI,
-requesting association with a CAPI device. Kernel CAPI then dispatches the
-application registration to an available device, forwarding it to the
-corresponding hardware driver. Kernel CAPI then forwards CAPI messages in both
-directions between the application and the hardware driver.
-
-Format and semantics of CAPI messages are specified in the CAPI 2.0 standard.
-This standard is freely available from https://www.capi.org.
-
-
-2. Driver and Device Registration
-=================================
-
-CAPI drivers must register each of the ISDN devices they control with Kernel
-CAPI by calling the Kernel CAPI function attach_capi_ctr() with a pointer to a
-struct capi_ctr before they can be used. This structure must be filled with
-the names of the driver and controller, and a number of callback function
-pointers which are subsequently used by Kernel CAPI for communicating with the
-driver. The registration can be revoked by calling the function
-detach_capi_ctr() with a pointer to the same struct capi_ctr.
-
-Before the device can be actually used, the driver must fill in the device
-information fields 'manu', 'version', 'profile' and 'serial' in the capi_ctr
-structure of the device, and signal its readiness by calling capi_ctr_ready().
-From then on, Kernel CAPI may call the registered callback functions for the
-device.
-
-If the device becomes unusable for any reason (shutdown, disconnect ...), the
-driver has to call capi_ctr_down(). This will prevent further calls to the
-callback functions by Kernel CAPI.
-
-
-3. Application Registration and Communication
-=============================================
-
-Kernel CAPI forwards registration requests from applications (calls to CAPI
-operation CAPI_REGISTER) to an appropriate hardware driver by calling its
-register_appl() callback function. A unique Application ID (ApplID, u16) is
-allocated by Kernel CAPI and passed to register_appl() along with the
-parameter structure provided by the application. This is analogous to the
-open() operation on regular files or character devices.
-
-After a successful return from register_appl(), CAPI messages from the
-application may be passed to the driver for the device via calls to the
-send_message() callback function. Conversely, the driver may call Kernel
-CAPI's capi_ctr_handle_message() function to pass a received CAPI message to
-Kernel CAPI for forwarding to an application, specifying its ApplID.
-
-Deregistration requests (CAPI operation CAPI_RELEASE) from applications are
-forwarded as calls to the release_appl() callback function, passing the same
-ApplID as with register_appl(). After return from release_appl(), no CAPI
-messages for that application may be passed to or from the device anymore.
-
-
-4. Data Structures
-==================
-
-4.1 struct capi_driver
-----------------------
-
-This structure describes a Kernel CAPI driver itself. It is used in the
-register_capi_driver() and unregister_capi_driver() functions, and contains
-the following non-private fields, all to be set by the driver before calling
-register_capi_driver():
-
-``char name[32]``
-	the name of the driver, as a zero-terminated ASCII string
-``char revision[32]``
-	the revision number of the driver, as a zero-terminated ASCII string
-
-4.2 struct capi_ctr
--------------------
-
-This structure describes an ISDN device (controller) handled by a Kernel CAPI
-driver. After registration via the attach_capi_ctr() function it is passed to
-all controller specific lower layer interface and callback functions to
-identify the controller to operate on.
-
-It contains the following non-private fields:
-
-to be set by the driver before calling attach_capi_ctr():
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-``struct module *owner``
-	pointer to the driver module owning the device
-
-``void *driverdata``
-	an opaque pointer to driver specific data, not touched by Kernel CAPI
-
-``char name[32]``
-	the name of the controller, as a zero-terminated ASCII string
-
-``char *driver_name``
-	the name of the driver, as a zero-terminated ASCII string
-
-``int (*load_firmware)(struct capi_ctr *ctrlr, capiloaddata *ldata)``
-	(optional) pointer to a callback function for sending firmware and
-	configuration data to the device
-
-	The function may return before the operation has completed.
-
-	Completion must be signalled by a call to capi_ctr_ready().
-
-	Return value: 0 on success, error code on error
-	Called in process context.
-
-``void (*reset_ctr)(struct capi_ctr *ctrlr)``
-	(optional) pointer to a callback function for stopping the device,
-	releasing all registered applications
-
-	The function may return before the operation has completed.
-
-	Completion must be signalled by a call to capi_ctr_down().
-
-	Called in process context.
-
-``void (*register_appl)(struct capi_ctr *ctrlr, u16 applid, capi_register_params *rparam)``
-	pointers to callback function for registration of
-	applications with the device
-
-	Calls to these functions are serialized by Kernel CAPI so that only
-	one call to any of them is active at any time.
-
-``void (*release_appl)(struct capi_ctr *ctrlr, u16 applid)``
-	pointers to callback functions deregistration of
-	applications with the device
-
-	Calls to these functions are serialized by Kernel CAPI so that only
-	one call to any of them is active at any time.
-
-``u16  (*send_message)(struct capi_ctr *ctrlr, struct sk_buff *skb)``
-	pointer to a callback function for sending a CAPI message to the
-	device
-
-	Return value: CAPI error code
-
-	If the method returns 0 (CAPI_NOERROR) the driver has taken ownership
-	of the skb and the caller may no longer access it. If it returns a
-	non-zero (error) value then ownership of the skb returns to the caller
-	who may reuse or free it.
-
-	The return value should only be used to signal problems with respect
-	to accepting or queueing the message. Errors occurring during the
-	actual processing of the message should be signaled with an
-	appropriate reply message.
-
-	May be called in process or interrupt context.
-
-	Calls to this function are not serialized by Kernel CAPI, ie. it must
-	be prepared to be re-entered.
-
-``char *(*procinfo)(struct capi_ctr *ctrlr)``
-	pointer to a callback function returning the entry for the device in
-	the CAPI controller info table, /proc/capi/controller
-
-Note:
-  Callback functions except send_message() are never called in interrupt
-  context.
-
-to be filled in before calling capi_ctr_ready():
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-``u8 manu[CAPI_MANUFACTURER_LEN]``
-	value to return for CAPI_GET_MANUFACTURER
-
-``capi_version version``
-	value to return for CAPI_GET_VERSION
-
-``capi_profile profile``
-	value to return for CAPI_GET_PROFILE
-
-``u8 serial[CAPI_SERIAL_LEN]``
-	value to return for CAPI_GET_SERIAL
-
-
-4.3 SKBs
---------
-
-CAPI messages are passed between Kernel CAPI and the driver via send_message()
-and capi_ctr_handle_message(), stored in the data portion of a socket buffer
-(skb).  Each skb contains a single CAPI message coded according to the CAPI 2.0
-standard.
-
-For the data transfer messages, DATA_B3_REQ and DATA_B3_IND, the actual
-payload data immediately follows the CAPI message itself within the same skb.
-The Data and Data64 parameters are not used for processing. The Data64
-parameter may be omitted by setting the length field of the CAPI message to 22
-instead of 30.
-
-
-4.4 The _cmsg Structure
------------------------
-
-(declared in <linux/isdn/capiutil.h>)
-
-The _cmsg structure stores the contents of a CAPI 2.0 message in an easily
-accessible form. It contains members for all possible CAPI 2.0 parameters,
-including subparameters of the Additional Info and B Protocol structured
-parameters, with the following exceptions:
-
-* second Calling party number (CONNECT_IND)
-
-* Data64 (DATA_B3_REQ and DATA_B3_IND)
-
-* Sending complete (subparameter of Additional Info, CONNECT_REQ and INFO_REQ)
-
-* Global Configuration (subparameter of B Protocol, CONNECT_REQ, CONNECT_RESP
-  and SELECT_B_PROTOCOL_REQ)
-
-Only those parameters appearing in the message type currently being processed
-are actually used. Unused members should be set to zero.
-
-Members are named after the CAPI 2.0 standard names of the parameters they
-represent. See <linux/isdn/capiutil.h> for the exact spelling. Member data
-types are:
-
-=========== =================================================================
-u8          for CAPI parameters of type 'byte'
-
-u16         for CAPI parameters of type 'word'
-
-u32         for CAPI parameters of type 'dword'
-
-_cstruct    for CAPI parameters of type 'struct'
-	    The member is a pointer to a buffer containing the parameter in
-	    CAPI encoding (length + content). It may also be NULL, which will
-	    be taken to represent an empty (zero length) parameter.
-	    Subparameters are stored in encoded form within the content part.
-
-_cmstruct   alternative representation for CAPI parameters of type 'struct'
-	    (used only for the 'Additional Info' and 'B Protocol' parameters)
-	    The representation is a single byte containing one of the values:
-	    CAPI_DEFAULT: The parameter is empty/absent.
-	    CAPI_COMPOSE: The parameter is present.
-	    Subparameter values are stored individually in the corresponding
-	    _cmsg structure members.
-=========== =================================================================
-
-
-5. Lower Layer Interface Functions
-==================================
-
-::
-
-  int attach_capi_ctr(struct capi_ctr *ctrlr)
-  int detach_capi_ctr(struct capi_ctr *ctrlr)
-
-register/unregister a device (controller) with Kernel CAPI
-
-::
-
-  void capi_ctr_ready(struct capi_ctr *ctrlr)
-  void capi_ctr_down(struct capi_ctr *ctrlr)
-
-signal controller ready/not ready
-
-::
-
-  void capi_ctr_handle_message(struct capi_ctr * ctrlr, u16 applid,
-			       struct sk_buff *skb)
-
-pass a received CAPI message to Kernel CAPI
-for forwarding to the specified application
-
-
-6. Helper Functions and Macros
-==============================
-
-Macros to extract/set element values from/in a CAPI message header
-(from <linux/isdn/capiutil.h>):
-
-======================  =============================   ====================
-Get Macro		Set Macro			Element (Type)
-======================  =============================   ====================
-CAPIMSG_LEN(m)		CAPIMSG_SETLEN(m, len)		Total Length (u16)
-CAPIMSG_APPID(m)	CAPIMSG_SETAPPID(m, applid)	ApplID (u16)
-CAPIMSG_COMMAND(m)	CAPIMSG_SETCOMMAND(m,cmd)	Command (u8)
-CAPIMSG_SUBCOMMAND(m)	CAPIMSG_SETSUBCOMMAND(m, cmd)	Subcommand (u8)
-CAPIMSG_CMD(m)		-				Command*256
-							+ Subcommand (u16)
-CAPIMSG_MSGID(m)	CAPIMSG_SETMSGID(m, msgid)	Message Number (u16)
-
-CAPIMSG_CONTROL(m)	CAPIMSG_SETCONTROL(m, contr)	Controller/PLCI/NCCI
-							(u32)
-CAPIMSG_DATALEN(m)	CAPIMSG_SETDATALEN(m, len)	Data Length (u16)
-======================  =============================   ====================
-
-
-Library functions for working with _cmsg structures
-(from <linux/isdn/capiutil.h>):
-
-``char *capi_cmd2str(u8 Command, u8 Subcommand)``
-	Returns the CAPI 2.0 message name corresponding to the given command
-	and subcommand values, as a static ASCII string. The return value may
-	be NULL if the command/subcommand is not one of those defined in the
-	CAPI 2.0 standard.
-
-
-7. Debugging
-============
-
-The module kernelcapi has a module parameter showcapimsgs controlling some
-debugging output produced by the module. It can only be set when the module is
-loaded, via a parameter "showcapimsgs=<n>" to the modprobe command, either on
-the command line or in the configuration file.
-
-If the lowest bit of showcapimsgs is set, kernelcapi logs controller and
-application up and down events.
-
-In addition, every registered CAPI controller has an associated traceflag
-parameter controlling how CAPI messages sent from and to the controller are
-logged. The traceflag parameter is initialized with the value of the
-showcapimsgs parameter when the controller is registered, but can later be
-changed via the MANUFACTURER_REQ command KCAPI_CMD_TRACE.
-
-If the value of traceflag is non-zero, CAPI messages are logged.
-DATA_B3 messages are only logged if the value of traceflag is > 2.
-
-If the lowest bit of traceflag is set, only the command/subcommand and message
-length are logged. Otherwise, kernelcapi logs a readable representation of
-the entire message.
diff --git a/Documentation/isdn/m_isdn.rst b/Documentation/isdn/m_isdn.rst
deleted file mode 100644
index 5847a164287e..000000000000
--- a/Documentation/isdn/m_isdn.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-============
-mISDN Driver
-============
-
-mISDN is a new modular ISDN driver, in the long term it should replace
-the old I4L driver architecture for passive ISDN cards.
-It was designed to allow a broad range of applications and interfaces
-but only have the basic function in kernel, the interface to the user
-space is based on sockets with a own address family AF_ISDN.
diff --git a/Documentation/subsystem-apis.rst b/Documentation/subsystem-apis.rst
index ff4fe8c936c8..b1ad48bb4001 100644
--- a/Documentation/subsystem-apis.rst
+++ b/Documentation/subsystem-apis.rst
@@ -46,7 +46,6 @@ Networking interfaces
    networking/index
    netlabel/index
    infiniband/index
-   isdn/index
    mhi/index
 
 Storage interfaces
diff --git a/MAINTAINERS b/MAINTAINERS
index 2b1b5e93c272..e4856d3427d9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13599,25 +13599,6 @@ S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git master
 F:	drivers/infiniband/ulp/isert
 
-ISDN/CMTP OVER BLUETOOTH
-L:	netdev@vger.kernel.org
-S:	Orphan
-W:	http://www.isdn4linux.de
-F:	Documentation/isdn/
-F:	drivers/isdn/capi/
-F:	include/linux/isdn/
-F:	include/uapi/linux/isdn/
-F:	net/bluetooth/cmtp/
-
-ISDN/mISDN SUBSYSTEM
-L:	netdev@vger.kernel.org
-S:	Orphan
-W:	http://www.isdn4linux.de
-F:	drivers/isdn/Kconfig
-F:	drivers/isdn/Makefile
-F:	drivers/isdn/hardware/
-F:	drivers/isdn/mISDN/
-
 ISL28022 HARDWARE MONITORING DRIVER
 M:	Carsten Spieß <mail@carsten-spiess.de>
 L:	linux-hwmon@vger.kernel.org
diff --git a/drivers/Kconfig b/drivers/Kconfig
index c0f1fb893ec0..f2bed2ddeb66 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -61,8 +61,6 @@ source "drivers/macintosh/Kconfig"
 
 source "drivers/net/Kconfig"
 
-source "drivers/isdn/Kconfig"
-
 # input before char - char/joystick depends on it. As does USB.
 
 source "drivers/input/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 53fbd2e0acdd..0841ea851847 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -124,7 +124,6 @@ obj-$(CONFIG_WATCHDOG)		+= watchdog/
 obj-$(CONFIG_MD)		+= md/
 obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_ACCESSIBILITY)	+= accessibility/
-obj-$(CONFIG_ISDN)		+= isdn/
 obj-$(CONFIG_EDAC)		+= edac/
 obj-$(CONFIG_EISA)		+= eisa/
 obj-$(CONFIG_PM_OPP)		+= opp/
diff --git a/drivers/isdn/Kconfig b/drivers/isdn/Kconfig
deleted file mode 100644
index 6fd1b3f84a29..000000000000
--- a/drivers/isdn/Kconfig
+++ /dev/null
@@ -1,27 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# ISDN device configuration
-#
-
-menuconfig ISDN
-	bool "ISDN support"
-	depends on NET && NETDEVICES
-	help
-	  ISDN ("Integrated Services Digital Network", called RNIS in France)
-	  is a fully digital telephone service that can be used for voice and
-	  data connections.  If your computer is equipped with an ISDN
-	  adapter you can use it to connect to your Internet service provider
-	  (with SLIP or PPP) faster than via a conventional telephone modem
-	  (though still much slower than with DSL) or to make and accept
-	  voice calls (eg. turning your PC into a software answering machine
-	  or PABX).
-
-	  Select this option if you want your kernel to support ISDN.
-
-if ISDN
-
-source "drivers/isdn/capi/Kconfig"
-
-source "drivers/isdn/mISDN/Kconfig"
-
-endif # ISDN
diff --git a/drivers/isdn/Makefile b/drivers/isdn/Makefile
deleted file mode 100644
index d14334f4007e..000000000000
--- a/drivers/isdn/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# Makefile for the kernel ISDN subsystem and device drivers.
-
-# Object files in subdirectories
-
-obj-$(CONFIG_BT_CMTP)			+= capi/
-obj-$(CONFIG_MISDN)			+= mISDN/
-obj-$(CONFIG_ISDN)			+= hardware/
diff --git a/drivers/isdn/capi/Kconfig b/drivers/isdn/capi/Kconfig
deleted file mode 100644
index fdb43a632215..000000000000
--- a/drivers/isdn/capi/Kconfig
+++ /dev/null
@@ -1,32 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-config ISDN_CAPI
-	def_bool ISDN && BT
-	help
-	  This provides CAPI (the Common ISDN Application Programming
-	  Interface) Version 2.0, a standard making it easy for programs to
-	  access ISDN hardware in a device independent way. (For details see
-	  <https://www.capi.org/>.)  CAPI supports making and accepting voice
-	  and data connections, controlling call options and protocols,
-	  as well as ISDN supplementary services like call forwarding or
-	  three-party conferences (if supported by the specific hardware
-	  driver).
-
-	  This subsystem requires a hardware specific driver.
-	  See CONFIG_BT_CMTP for the last remaining regular driver
-	  in the kernel that uses the CAPI subsystem.
-
-config CAPI_TRACE
-	def_bool BT_CMTP
-	help
-	  If you say Y here, the kernelcapi driver can make verbose traces
-	  of CAPI messages. This feature can be enabled/disabled via IOCTL for
-	  every controller (default disabled).
-
-config ISDN_CAPI_MIDDLEWARE
-	def_bool BT_CMTP && TTY
-	help
-	  This option will enhance the capabilities of the /dev/capi20
-	  interface.  It will provide a means of moving a data connection,
-	  established via the usual /dev/capi20 interface to a special tty
-	  device.  If you want to use pppd with pppdcapiplugin to dial up to
-	  your ISP, say Y here.
diff --git a/drivers/isdn/capi/Makefile b/drivers/isdn/capi/Makefile
deleted file mode 100644
index 4fd3a4d7133f..000000000000
--- a/drivers/isdn/capi/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# Makefile for the CAPI subsystem used by BT_CMTP
-
-obj-$(CONFIG_BT_CMTP)			+= kernelcapi.o
-kernelcapi-y				:= kcapi.o capiutil.o capi.o
-kernelcapi-$(CONFIG_PROC_FS)		+= kcapi_proc.o
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
deleted file mode 100644
index aa28b1d32c4e..000000000000
--- a/drivers/isdn/capi/capi.c
+++ /dev/null
@@ -1,1435 +0,0 @@
-/* $Id: capi.c,v 1.1.2.7 2004/04/28 09:48:59 armin Exp $
- *
- * CAPI 2.0 Interface for Linux
- *
- * Copyright 1996 by Carsten Paeth <calle@calle.de>
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-#include <linux/compiler.h>
-#include <linux/module.h>
-#include <linux/ethtool.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/major.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/fcntl.h>
-#include <linux/fs.h>
-#include <linux/signal.h>
-#include <linux/mutex.h>
-#include <linux/mm.h>
-#include <linux/timer.h>
-#include <linux/wait.h>
-#include <linux/tty.h>
-#include <linux/netdevice.h>
-#include <linux/ppp_defs.h>
-#include <linux/ppp-ioctl.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/poll.h>
-#include <linux/capi.h>
-#include <linux/kernelcapi.h>
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/moduleparam.h>
-#include <linux/isdn/capiutil.h>
-#include <linux/isdn/capicmd.h>
-
-#include "kcapi.h"
-
-MODULE_DESCRIPTION("CAPI4Linux: kernel CAPI layer and /dev/capi20 interface");
-MODULE_AUTHOR("Carsten Paeth");
-MODULE_LICENSE("GPL");
-
-/* -------- driver information -------------------------------------- */
-
-static DEFINE_MUTEX(capi_mutex);
-static const struct class capi_class = {
-	.name = "capi",
-};
-static int capi_major = 68;		/* allocated */
-
-module_param_named(major, capi_major, uint, 0);
-
-#ifdef CONFIG_ISDN_CAPI_MIDDLEWARE
-#define CAPINC_NR_PORTS		32
-#define CAPINC_MAX_PORTS	256
-
-static int capi_ttyminors = CAPINC_NR_PORTS;
-
-module_param_named(ttyminors, capi_ttyminors, uint, 0);
-#endif /* CONFIG_ISDN_CAPI_MIDDLEWARE */
-
-/* -------- defines ------------------------------------------------- */
-
-#define CAPINC_MAX_RECVQUEUE	10
-#define CAPINC_MAX_SENDQUEUE	10
-#define CAPI_MAX_BLKSIZE	2048
-
-/* -------- data structures ----------------------------------------- */
-
-struct capidev;
-struct capincci;
-struct capiminor;
-
-struct ackqueue_entry {
-	struct list_head	list;
-	u16			datahandle;
-};
-
-struct capiminor {
-	unsigned int      minor;
-
-	struct capi20_appl	*ap;
-	u32			ncci;
-	atomic_t		datahandle;
-	atomic_t		msgid;
-
-	struct tty_port port;
-	int                ttyinstop;
-	int                ttyoutstop;
-
-	struct sk_buff_head	inqueue;
-
-	struct sk_buff_head	outqueue;
-	int			outbytes;
-	struct sk_buff		*outskb;
-	spinlock_t		outlock;
-
-	/* transmit path */
-	struct list_head ackqueue;
-	int nack;
-	spinlock_t ackqlock;
-};
-
-struct capincci {
-	struct list_head list;
-	u32		 ncci;
-	struct capidev	*cdev;
-#ifdef CONFIG_ISDN_CAPI_MIDDLEWARE
-	struct capiminor *minorp;
-#endif /* CONFIG_ISDN_CAPI_MIDDLEWARE */
-};
-
-struct capidev {
-	struct list_head list;
-	struct capi20_appl ap;
-	u16		errcode;
-	unsigned        userflags;
-
-	struct sk_buff_head recvqueue;
-	wait_queue_head_t recvwait;
-
-	struct list_head nccis;
-
-	struct mutex lock;
-};
-
-/* -------- global variables ---------------------------------------- */
-
-static DEFINE_MUTEX(capidev_list_lock);
-static LIST_HEAD(capidev_list);
-
-#ifdef CONFIG_ISDN_CAPI_MIDDLEWARE
-
-static DEFINE_SPINLOCK(capiminors_lock);
-static struct capiminor **capiminors;
-
-static struct tty_driver *capinc_tty_driver;
-
-/* -------- datahandles --------------------------------------------- */
-
-static int capiminor_add_ack(struct capiminor *mp, u16 datahandle)
-{
-	struct ackqueue_entry *n;
-
-	n = kmalloc_obj(*n, GFP_ATOMIC);
-	if (unlikely(!n)) {
-		printk(KERN_ERR "capi: alloc datahandle failed\n");
-		return -1;
-	}
-	n->datahandle = datahandle;
-	INIT_LIST_HEAD(&n->list);
-	spin_lock_bh(&mp->ackqlock);
-	list_add_tail(&n->list, &mp->ackqueue);
-	mp->nack++;
-	spin_unlock_bh(&mp->ackqlock);
-	return 0;
-}
-
-static int capiminor_del_ack(struct capiminor *mp, u16 datahandle)
-{
-	struct ackqueue_entry *p, *tmp;
-
-	spin_lock_bh(&mp->ackqlock);
-	list_for_each_entry_safe(p, tmp, &mp->ackqueue, list) {
-		if (p->datahandle == datahandle) {
-			list_del(&p->list);
-			mp->nack--;
-			spin_unlock_bh(&mp->ackqlock);
-			kfree(p);
-			return 0;
-		}
-	}
-	spin_unlock_bh(&mp->ackqlock);
-	return -1;
-}
-
-static void capiminor_del_all_ack(struct capiminor *mp)
-{
-	struct ackqueue_entry *p, *tmp;
-
-	list_for_each_entry_safe(p, tmp, &mp->ackqueue, list) {
-		list_del(&p->list);
-		kfree(p);
-		mp->nack--;
-	}
-}
-
-
-/* -------- struct capiminor ---------------------------------------- */
-
-static void capiminor_destroy(struct tty_port *port)
-{
-	struct capiminor *mp = container_of(port, struct capiminor, port);
-
-	kfree_skb(mp->outskb);
-	skb_queue_purge(&mp->inqueue);
-	skb_queue_purge(&mp->outqueue);
-	capiminor_del_all_ack(mp);
-	kfree(mp);
-}
-
-static const struct tty_port_operations capiminor_port_ops = {
-	.destruct = capiminor_destroy,
-};
-
-static struct capiminor *capiminor_alloc(struct capi20_appl *ap, u32 ncci)
-{
-	struct capiminor *mp;
-	struct device *dev;
-	unsigned int minor;
-
-	mp = kzalloc_obj(*mp);
-	if (!mp) {
-		printk(KERN_ERR "capi: can't alloc capiminor\n");
-		return NULL;
-	}
-
-	mp->ap = ap;
-	mp->ncci = ncci;
-	INIT_LIST_HEAD(&mp->ackqueue);
-	spin_lock_init(&mp->ackqlock);
-
-	skb_queue_head_init(&mp->inqueue);
-	skb_queue_head_init(&mp->outqueue);
-	spin_lock_init(&mp->outlock);
-
-	tty_port_init(&mp->port);
-	mp->port.ops = &capiminor_port_ops;
-
-	/* Allocate the least unused minor number. */
-	spin_lock(&capiminors_lock);
-	for (minor = 0; minor < capi_ttyminors; minor++)
-		if (!capiminors[minor]) {
-			capiminors[minor] = mp;
-			break;
-		}
-	spin_unlock(&capiminors_lock);
-
-	if (minor == capi_ttyminors) {
-		printk(KERN_NOTICE "capi: out of minors\n");
-		goto err_out1;
-	}
-
-	mp->minor = minor;
-
-	dev = tty_port_register_device(&mp->port, capinc_tty_driver, minor,
-			NULL);
-	if (IS_ERR(dev))
-		goto err_out2;
-
-	return mp;
-
-err_out2:
-	spin_lock(&capiminors_lock);
-	capiminors[minor] = NULL;
-	spin_unlock(&capiminors_lock);
-
-err_out1:
-	tty_port_put(&mp->port);
-	return NULL;
-}
-
-static struct capiminor *capiminor_get(unsigned int minor)
-{
-	struct capiminor *mp;
-
-	spin_lock(&capiminors_lock);
-	mp = capiminors[minor];
-	if (mp)
-		tty_port_get(&mp->port);
-	spin_unlock(&capiminors_lock);
-
-	return mp;
-}
-
-static inline void capiminor_put(struct capiminor *mp)
-{
-	tty_port_put(&mp->port);
-}
-
-static void capiminor_free(struct capiminor *mp)
-{
-	tty_unregister_device(capinc_tty_driver, mp->minor);
-
-	spin_lock(&capiminors_lock);
-	capiminors[mp->minor] = NULL;
-	spin_unlock(&capiminors_lock);
-
-	capiminor_put(mp);
-}
-
-/* -------- struct capincci ----------------------------------------- */
-
-static void capincci_alloc_minor(struct capidev *cdev, struct capincci *np)
-{
-	if (cdev->userflags & CAPIFLAG_HIGHJACKING)
-		np->minorp = capiminor_alloc(&cdev->ap, np->ncci);
-}
-
-static void capincci_free_minor(struct capincci *np)
-{
-	struct capiminor *mp = np->minorp;
-
-	if (mp) {
-		tty_port_tty_vhangup(&mp->port);
-		capiminor_free(mp);
-	}
-}
-
-static inline unsigned int capincci_minor_opencount(struct capincci *np)
-{
-	struct capiminor *mp = np->minorp;
-	unsigned int count = 0;
-	struct tty_struct *tty;
-
-	if (mp) {
-		tty = tty_port_tty_get(&mp->port);
-		if (tty) {
-			count = tty->count;
-			tty_kref_put(tty);
-		}
-	}
-	return count;
-}
-
-#else /* !CONFIG_ISDN_CAPI_MIDDLEWARE */
-
-static inline void
-capincci_alloc_minor(struct capidev *cdev, struct capincci *np) { }
-static inline void capincci_free_minor(struct capincci *np) { }
-
-#endif /* !CONFIG_ISDN_CAPI_MIDDLEWARE */
-
-static struct capincci *capincci_alloc(struct capidev *cdev, u32 ncci)
-{
-	struct capincci *np;
-
-	np = kzalloc_obj(*np);
-	if (!np)
-		return NULL;
-	np->ncci = ncci;
-	np->cdev = cdev;
-
-	capincci_alloc_minor(cdev, np);
-
-	list_add_tail(&np->list, &cdev->nccis);
-
-	return np;
-}
-
-static void capincci_free(struct capidev *cdev, u32 ncci)
-{
-	struct capincci *np, *tmp;
-
-	list_for_each_entry_safe(np, tmp, &cdev->nccis, list)
-		if (ncci == 0xffffffff || np->ncci == ncci) {
-			capincci_free_minor(np);
-			list_del(&np->list);
-			kfree(np);
-		}
-}
-
-#ifdef CONFIG_ISDN_CAPI_MIDDLEWARE
-static struct capincci *capincci_find(struct capidev *cdev, u32 ncci)
-{
-	struct capincci *np;
-
-	list_for_each_entry(np, &cdev->nccis, list)
-		if (np->ncci == ncci)
-			return np;
-	return NULL;
-}
-
-/* -------- handle data queue --------------------------------------- */
-
-static struct sk_buff *
-gen_data_b3_resp_for(struct capiminor *mp, struct sk_buff *skb)
-{
-	struct sk_buff *nskb;
-	nskb = alloc_skb(CAPI_DATA_B3_RESP_LEN, GFP_KERNEL);
-	if (nskb) {
-		u16 datahandle = CAPIMSG_U16(skb->data, CAPIMSG_BASELEN + 4 + 4 + 2);
-		unsigned char *s = skb_put(nskb, CAPI_DATA_B3_RESP_LEN);
-		capimsg_setu16(s, 0, CAPI_DATA_B3_RESP_LEN);
-		capimsg_setu16(s, 2, mp->ap->applid);
-		capimsg_setu8 (s, 4, CAPI_DATA_B3);
-		capimsg_setu8 (s, 5, CAPI_RESP);
-		capimsg_setu16(s, 6, atomic_inc_return(&mp->msgid));
-		capimsg_setu32(s, 8, mp->ncci);
-		capimsg_setu16(s, 12, datahandle);
-	}
-	return nskb;
-}
-
-static int handle_recv_skb(struct capiminor *mp, struct sk_buff *skb)
-{
-	unsigned int datalen = skb->len - CAPIMSG_LEN(skb->data);
-	struct tty_struct *tty;
-	struct sk_buff *nskb;
-	u16 errcode, datahandle;
-	struct tty_ldisc *ld;
-	int ret = -1;
-
-	tty = tty_port_tty_get(&mp->port);
-	if (!tty) {
-		pr_debug("capi: currently no receiver\n");
-		return -1;
-	}
-
-	ld = tty_ldisc_ref(tty);
-	if (!ld) {
-		/* fatal error, do not requeue */
-		ret = 0;
-		kfree_skb(skb);
-		goto deref_tty;
-	}
-
-	if (ld->ops->receive_buf == NULL) {
-		pr_debug("capi: ldisc has no receive_buf function\n");
-		/* fatal error, do not requeue */
-		goto free_skb;
-	}
-	if (mp->ttyinstop) {
-		pr_debug("capi: recv tty throttled\n");
-		goto deref_ldisc;
-	}
-
-	if (tty->receive_room < datalen) {
-		pr_debug("capi: no room in tty\n");
-		goto deref_ldisc;
-	}
-
-	nskb = gen_data_b3_resp_for(mp, skb);
-	if (!nskb) {
-		printk(KERN_ERR "capi: gen_data_b3_resp failed\n");
-		goto deref_ldisc;
-	}
-
-	datahandle = CAPIMSG_U16(skb->data, CAPIMSG_BASELEN + 4);
-
-	errcode = capi20_put_message(mp->ap, nskb);
-
-	if (errcode == CAPI_NOERROR) {
-		skb_pull(skb, CAPIMSG_LEN(skb->data));
-		pr_debug("capi: DATA_B3_RESP %u len=%d => ldisc\n",
-			 datahandle, skb->len);
-		ld->ops->receive_buf(tty, skb->data, NULL, skb->len);
-	} else {
-		printk(KERN_ERR "capi: send DATA_B3_RESP failed=%x\n",
-		       errcode);
-		kfree_skb(nskb);
-
-		if (errcode == CAPI_SENDQUEUEFULL)
-			goto deref_ldisc;
-	}
-
-free_skb:
-	ret = 0;
-	kfree_skb(skb);
-
-deref_ldisc:
-	tty_ldisc_deref(ld);
-
-deref_tty:
-	tty_kref_put(tty);
-	return ret;
-}
-
-static void handle_minor_recv(struct capiminor *mp)
-{
-	struct sk_buff *skb;
-
-	while ((skb = skb_dequeue(&mp->inqueue)) != NULL)
-		if (handle_recv_skb(mp, skb) < 0) {
-			skb_queue_head(&mp->inqueue, skb);
-			return;
-		}
-}
-
-static void handle_minor_send(struct capiminor *mp)
-{
-	struct tty_struct *tty;
-	struct sk_buff *skb;
-	u16 len;
-	u16 errcode;
-	u16 datahandle;
-
-	tty = tty_port_tty_get(&mp->port);
-	if (!tty)
-		return;
-
-	if (mp->ttyoutstop) {
-		pr_debug("capi: send: tty stopped\n");
-		tty_kref_put(tty);
-		return;
-	}
-
-	while (1) {
-		spin_lock_bh(&mp->outlock);
-		skb = __skb_dequeue(&mp->outqueue);
-		if (!skb) {
-			spin_unlock_bh(&mp->outlock);
-			break;
-		}
-		len = (u16)skb->len;
-		mp->outbytes -= len;
-		spin_unlock_bh(&mp->outlock);
-
-		datahandle = atomic_inc_return(&mp->datahandle);
-		skb_push(skb, CAPI_DATA_B3_REQ_LEN);
-		memset(skb->data, 0, CAPI_DATA_B3_REQ_LEN);
-		capimsg_setu16(skb->data, 0, CAPI_DATA_B3_REQ_LEN);
-		capimsg_setu16(skb->data, 2, mp->ap->applid);
-		capimsg_setu8 (skb->data, 4, CAPI_DATA_B3);
-		capimsg_setu8 (skb->data, 5, CAPI_REQ);
-		capimsg_setu16(skb->data, 6, atomic_inc_return(&mp->msgid));
-		capimsg_setu32(skb->data, 8, mp->ncci);	/* NCCI */
-		capimsg_setu32(skb->data, 12, (u32)(long)skb->data);/* Data32 */
-		capimsg_setu16(skb->data, 16, len);	/* Data length */
-		capimsg_setu16(skb->data, 18, datahandle);
-		capimsg_setu16(skb->data, 20, 0);	/* Flags */
-
-		if (capiminor_add_ack(mp, datahandle) < 0) {
-			skb_pull(skb, CAPI_DATA_B3_REQ_LEN);
-
-			spin_lock_bh(&mp->outlock);
-			__skb_queue_head(&mp->outqueue, skb);
-			mp->outbytes += len;
-			spin_unlock_bh(&mp->outlock);
-
-			break;
-		}
-		errcode = capi20_put_message(mp->ap, skb);
-		if (errcode == CAPI_NOERROR) {
-			pr_debug("capi: DATA_B3_REQ %u len=%u\n",
-				 datahandle, len);
-			continue;
-		}
-		capiminor_del_ack(mp, datahandle);
-
-		if (errcode == CAPI_SENDQUEUEFULL) {
-			skb_pull(skb, CAPI_DATA_B3_REQ_LEN);
-
-			spin_lock_bh(&mp->outlock);
-			__skb_queue_head(&mp->outqueue, skb);
-			mp->outbytes += len;
-			spin_unlock_bh(&mp->outlock);
-
-			break;
-		}
-
-		/* ups, drop packet */
-		printk(KERN_ERR "capi: put_message = %x\n", errcode);
-		kfree_skb(skb);
-	}
-	tty_kref_put(tty);
-}
-
-#endif /* CONFIG_ISDN_CAPI_MIDDLEWARE */
-/* -------- function called by lower level -------------------------- */
-
-static void capi_recv_message(struct capi20_appl *ap, struct sk_buff *skb)
-{
-	struct capidev *cdev = ap->private;
-#ifdef CONFIG_ISDN_CAPI_MIDDLEWARE
-	struct capiminor *mp;
-	u16 datahandle;
-	struct capincci *np;
-#endif /* CONFIG_ISDN_CAPI_MIDDLEWARE */
-
-	mutex_lock(&cdev->lock);
-
-	if (CAPIMSG_CMD(skb->data) == CAPI_CONNECT_B3_CONF) {
-		u16 info = CAPIMSG_U16(skb->data, 12); // Info field
-		if ((info & 0xff00) == 0)
-			capincci_alloc(cdev, CAPIMSG_NCCI(skb->data));
-	}
-	if (CAPIMSG_CMD(skb->data) == CAPI_CONNECT_B3_IND)
-		capincci_alloc(cdev, CAPIMSG_NCCI(skb->data));
-
-	if (CAPIMSG_COMMAND(skb->data) != CAPI_DATA_B3) {
-		skb_queue_tail(&cdev->recvqueue, skb);
-		wake_up_interruptible(&cdev->recvwait);
-		goto unlock_out;
-	}
-
-#ifndef CONFIG_ISDN_CAPI_MIDDLEWARE
-	skb_queue_tail(&cdev->recvqueue, skb);
-	wake_up_interruptible(&cdev->recvwait);
-
-#else /* CONFIG_ISDN_CAPI_MIDDLEWARE */
-
-	np = capincci_find(cdev, CAPIMSG_CONTROL(skb->data));
-	if (!np) {
-		printk(KERN_ERR "BUG: capi_signal: ncci not found\n");
-		skb_queue_tail(&cdev->recvqueue, skb);
-		wake_up_interruptible(&cdev->recvwait);
-		goto unlock_out;
-	}
-
-	mp = np->minorp;
-	if (!mp) {
-		skb_queue_tail(&cdev->recvqueue, skb);
-		wake_up_interruptible(&cdev->recvwait);
-		goto unlock_out;
-	}
-	if (CAPIMSG_SUBCOMMAND(skb->data) == CAPI_IND) {
-		datahandle = CAPIMSG_U16(skb->data, CAPIMSG_BASELEN + 4 + 4 + 2);
-		pr_debug("capi_signal: DATA_B3_IND %u len=%d\n",
-			 datahandle, skb->len-CAPIMSG_LEN(skb->data));
-		skb_queue_tail(&mp->inqueue, skb);
-
-		handle_minor_recv(mp);
-
-	} else if (CAPIMSG_SUBCOMMAND(skb->data) == CAPI_CONF) {
-
-		datahandle = CAPIMSG_U16(skb->data, CAPIMSG_BASELEN + 4);
-		pr_debug("capi_signal: DATA_B3_CONF %u 0x%x\n",
-			 datahandle,
-			 CAPIMSG_U16(skb->data, CAPIMSG_BASELEN + 4 + 2));
-		kfree_skb(skb);
-		capiminor_del_ack(mp, datahandle);
-		tty_port_tty_wakeup(&mp->port);
-		handle_minor_send(mp);
-
-	} else {
-		/* ups, let capi application handle it :-) */
-		skb_queue_tail(&cdev->recvqueue, skb);
-		wake_up_interruptible(&cdev->recvwait);
-	}
-#endif /* CONFIG_ISDN_CAPI_MIDDLEWARE */
-
-unlock_out:
-	mutex_unlock(&cdev->lock);
-}
-
-/* -------- file_operations for capidev ----------------------------- */
-
-static ssize_t
-capi_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
-{
-	struct capidev *cdev = file->private_data;
-	struct sk_buff *skb;
-	size_t copied;
-	int err;
-
-	if (!cdev->ap.applid)
-		return -ENODEV;
-
-	skb = skb_dequeue(&cdev->recvqueue);
-	if (!skb) {
-		if (file->f_flags & O_NONBLOCK)
-			return -EAGAIN;
-		err = wait_event_interruptible(cdev->recvwait,
-					       (skb = skb_dequeue(&cdev->recvqueue)));
-		if (err)
-			return err;
-	}
-	if (skb->len > count) {
-		skb_queue_head(&cdev->recvqueue, skb);
-		return -EMSGSIZE;
-	}
-	if (copy_to_user(buf, skb->data, skb->len)) {
-		skb_queue_head(&cdev->recvqueue, skb);
-		return -EFAULT;
-	}
-	copied = skb->len;
-
-	kfree_skb(skb);
-
-	return copied;
-}
-
-static ssize_t
-capi_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
-{
-	struct capidev *cdev = file->private_data;
-	struct sk_buff *skb;
-	u16 mlen;
-
-	if (!cdev->ap.applid)
-		return -ENODEV;
-
-	if (count < CAPIMSG_BASELEN)
-		return -EINVAL;
-
-	skb = alloc_skb(count, GFP_USER);
-	if (!skb)
-		return -ENOMEM;
-
-	if (copy_from_user(skb_put(skb, count), buf, count)) {
-		kfree_skb(skb);
-		return -EFAULT;
-	}
-	mlen = CAPIMSG_LEN(skb->data);
-	if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_REQ) {
-		if (count < CAPI_DATA_B3_REQ_LEN ||
-		    (size_t)(mlen + CAPIMSG_DATALEN(skb->data)) != count) {
-			kfree_skb(skb);
-			return -EINVAL;
-		}
-	} else {
-		if (mlen != count) {
-			kfree_skb(skb);
-			return -EINVAL;
-		}
-	}
-	CAPIMSG_SETAPPID(skb->data, cdev->ap.applid);
-
-	if (CAPIMSG_CMD(skb->data) == CAPI_DISCONNECT_B3_RESP) {
-		if (count < CAPI_DISCONNECT_B3_RESP_LEN) {
-			kfree_skb(skb);
-			return -EINVAL;
-		}
-		mutex_lock(&cdev->lock);
-		capincci_free(cdev, CAPIMSG_NCCI(skb->data));
-		mutex_unlock(&cdev->lock);
-	}
-
-	cdev->errcode = capi20_put_message(&cdev->ap, skb);
-
-	if (cdev->errcode) {
-		kfree_skb(skb);
-		return -EIO;
-	}
-	return count;
-}
-
-static __poll_t
-capi_poll(struct file *file, poll_table *wait)
-{
-	struct capidev *cdev = file->private_data;
-	__poll_t mask = 0;
-
-	if (!cdev->ap.applid)
-		return EPOLLERR;
-
-	poll_wait(file, &(cdev->recvwait), wait);
-	mask = EPOLLOUT | EPOLLWRNORM;
-	if (!skb_queue_empty_lockless(&cdev->recvqueue))
-		mask |= EPOLLIN | EPOLLRDNORM;
-	return mask;
-}
-
-static int
-capi_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	struct capidev *cdev = file->private_data;
-	capi_ioctl_struct data;
-	int retval = -EINVAL;
-	void __user *argp = (void __user *)arg;
-
-	switch (cmd) {
-	case CAPI_REGISTER:
-		mutex_lock(&cdev->lock);
-
-		if (cdev->ap.applid) {
-			retval = -EEXIST;
-			goto register_out;
-		}
-		if (copy_from_user(&cdev->ap.rparam, argp,
-				   sizeof(struct capi_register_params))) {
-			retval = -EFAULT;
-			goto register_out;
-		}
-		cdev->ap.private = cdev;
-		cdev->ap.recv_message = capi_recv_message;
-		cdev->errcode = capi20_register(&cdev->ap);
-		retval = (int)cdev->ap.applid;
-		if (cdev->errcode) {
-			cdev->ap.applid = 0;
-			retval = -EIO;
-		}
-
-register_out:
-		mutex_unlock(&cdev->lock);
-		return retval;
-
-	case CAPI_GET_VERSION:
-		if (copy_from_user(&data.contr, argp,
-				   sizeof(data.contr)))
-			return -EFAULT;
-		cdev->errcode = capi20_get_version(data.contr, &data.version);
-		if (cdev->errcode)
-			return -EIO;
-		if (copy_to_user(argp, &data.version,
-				 sizeof(data.version)))
-			return -EFAULT;
-		return 0;
-
-	case CAPI_GET_SERIAL:
-		if (copy_from_user(&data.contr, argp,
-				   sizeof(data.contr)))
-			return -EFAULT;
-		cdev->errcode = capi20_get_serial(data.contr, data.serial);
-		if (cdev->errcode)
-			return -EIO;
-		if (copy_to_user(argp, data.serial,
-				 sizeof(data.serial)))
-			return -EFAULT;
-		return 0;
-
-	case CAPI_GET_PROFILE:
-		if (copy_from_user(&data.contr, argp,
-				   sizeof(data.contr)))
-			return -EFAULT;
-
-		if (data.contr == 0) {
-			cdev->errcode = capi20_get_profile(data.contr, &data.profile);
-			if (cdev->errcode)
-				return -EIO;
-
-			retval = copy_to_user(argp,
-					      &data.profile.ncontroller,
-					      sizeof(data.profile.ncontroller));
-
-		} else {
-			cdev->errcode = capi20_get_profile(data.contr, &data.profile);
-			if (cdev->errcode)
-				return -EIO;
-
-			retval = copy_to_user(argp, &data.profile,
-					      sizeof(data.profile));
-		}
-		if (retval)
-			return -EFAULT;
-		return 0;
-
-	case CAPI_GET_MANUFACTURER:
-		if (copy_from_user(&data.contr, argp,
-				   sizeof(data.contr)))
-			return -EFAULT;
-		cdev->errcode = capi20_get_manufacturer(data.contr, data.manufacturer);
-		if (cdev->errcode)
-			return -EIO;
-
-		if (copy_to_user(argp, data.manufacturer,
-				 sizeof(data.manufacturer)))
-			return -EFAULT;
-
-		return 0;
-
-	case CAPI_GET_ERRCODE:
-		data.errcode = cdev->errcode;
-		cdev->errcode = CAPI_NOERROR;
-		if (arg) {
-			if (copy_to_user(argp, &data.errcode,
-					 sizeof(data.errcode)))
-				return -EFAULT;
-		}
-		return data.errcode;
-
-	case CAPI_INSTALLED:
-		if (capi20_isinstalled() == CAPI_NOERROR)
-			return 0;
-		return -ENXIO;
-
-	case CAPI_MANUFACTURER_CMD: {
-		struct capi_manufacturer_cmd mcmd;
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-		if (copy_from_user(&mcmd, argp, sizeof(mcmd)))
-			return -EFAULT;
-		return capi20_manufacturer(mcmd.cmd, mcmd.data);
-	}
-	case CAPI_SET_FLAGS:
-	case CAPI_CLR_FLAGS: {
-		unsigned userflags;
-
-		if (copy_from_user(&userflags, argp, sizeof(userflags)))
-			return -EFAULT;
-
-		mutex_lock(&cdev->lock);
-		if (cmd == CAPI_SET_FLAGS)
-			cdev->userflags |= userflags;
-		else
-			cdev->userflags &= ~userflags;
-		mutex_unlock(&cdev->lock);
-		return 0;
-	}
-	case CAPI_GET_FLAGS:
-		if (copy_to_user(argp, &cdev->userflags,
-				 sizeof(cdev->userflags)))
-			return -EFAULT;
-		return 0;
-
-#ifndef CONFIG_ISDN_CAPI_MIDDLEWARE
-	case CAPI_NCCI_OPENCOUNT:
-		return 0;
-
-#else /* CONFIG_ISDN_CAPI_MIDDLEWARE */
-	case CAPI_NCCI_OPENCOUNT: {
-		struct capincci *nccip;
-		unsigned ncci;
-		int count = 0;
-
-		if (copy_from_user(&ncci, argp, sizeof(ncci)))
-			return -EFAULT;
-
-		mutex_lock(&cdev->lock);
-		nccip = capincci_find(cdev, (u32)ncci);
-		if (nccip)
-			count = capincci_minor_opencount(nccip);
-		mutex_unlock(&cdev->lock);
-		return count;
-	}
-
-	case CAPI_NCCI_GETUNIT: {
-		struct capincci *nccip;
-		struct capiminor *mp;
-		unsigned ncci;
-		int unit = -ESRCH;
-
-		if (copy_from_user(&ncci, argp, sizeof(ncci)))
-			return -EFAULT;
-
-		mutex_lock(&cdev->lock);
-		nccip = capincci_find(cdev, (u32)ncci);
-		if (nccip) {
-			mp = nccip->minorp;
-			if (mp)
-				unit = mp->minor;
-		}
-		mutex_unlock(&cdev->lock);
-		return unit;
-	}
-#endif /* CONFIG_ISDN_CAPI_MIDDLEWARE */
-
-	default:
-		return -EINVAL;
-	}
-}
-
-static long
-capi_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	int ret;
-
-	mutex_lock(&capi_mutex);
-	ret = capi_ioctl(file, cmd, arg);
-	mutex_unlock(&capi_mutex);
-
-	return ret;
-}
-
-#ifdef CONFIG_COMPAT
-static long
-capi_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	int ret;
-
-	if (cmd == CAPI_MANUFACTURER_CMD) {
-		struct {
-			compat_ulong_t cmd;
-			compat_uptr_t data;
-		} mcmd32;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-		if (copy_from_user(&mcmd32, compat_ptr(arg), sizeof(mcmd32)))
-			return -EFAULT;
-
-		mutex_lock(&capi_mutex);
-		ret = capi20_manufacturer(mcmd32.cmd, compat_ptr(mcmd32.data));
-		mutex_unlock(&capi_mutex);
-
-		return ret;
-	}
-
-	return capi_unlocked_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
-}
-#endif
-
-static int capi_open(struct inode *inode, struct file *file)
-{
-	struct capidev *cdev;
-
-	cdev = kzalloc_obj(*cdev);
-	if (!cdev)
-		return -ENOMEM;
-
-	mutex_init(&cdev->lock);
-	skb_queue_head_init(&cdev->recvqueue);
-	init_waitqueue_head(&cdev->recvwait);
-	INIT_LIST_HEAD(&cdev->nccis);
-	file->private_data = cdev;
-
-	mutex_lock(&capidev_list_lock);
-	list_add_tail(&cdev->list, &capidev_list);
-	mutex_unlock(&capidev_list_lock);
-
-	return stream_open(inode, file);
-}
-
-static int capi_release(struct inode *inode, struct file *file)
-{
-	struct capidev *cdev = file->private_data;
-
-	mutex_lock(&capidev_list_lock);
-	list_del(&cdev->list);
-	mutex_unlock(&capidev_list_lock);
-
-	if (cdev->ap.applid)
-		capi20_release(&cdev->ap);
-	skb_queue_purge(&cdev->recvqueue);
-	capincci_free(cdev, 0xffffffff);
-
-	kfree(cdev);
-	return 0;
-}
-
-static const struct file_operations capi_fops =
-{
-	.owner		= THIS_MODULE,
-	.read		= capi_read,
-	.write		= capi_write,
-	.poll		= capi_poll,
-	.unlocked_ioctl	= capi_unlocked_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= capi_compat_ioctl,
-#endif
-	.open		= capi_open,
-	.release	= capi_release,
-};
-
-#ifdef CONFIG_ISDN_CAPI_MIDDLEWARE
-/* -------- tty_operations for capincci ----------------------------- */
-
-static int
-capinc_tty_install(struct tty_driver *driver, struct tty_struct *tty)
-{
-	struct capiminor *mp = capiminor_get(tty->index);
-	int ret = tty_standard_install(driver, tty);
-
-	if (ret == 0)
-		tty->driver_data = mp;
-	else
-		capiminor_put(mp);
-	return ret;
-}
-
-static void capinc_tty_cleanup(struct tty_struct *tty)
-{
-	struct capiminor *mp = tty->driver_data;
-	tty->driver_data = NULL;
-	capiminor_put(mp);
-}
-
-static int capinc_tty_open(struct tty_struct *tty, struct file *filp)
-{
-	struct capiminor *mp = tty->driver_data;
-	int err;
-
-	err = tty_port_open(&mp->port, tty, filp);
-	if (err)
-		return err;
-
-	handle_minor_recv(mp);
-	return 0;
-}
-
-static void capinc_tty_close(struct tty_struct *tty, struct file *filp)
-{
-	struct capiminor *mp = tty->driver_data;
-
-	tty_port_close(&mp->port, tty, filp);
-}
-
-static ssize_t capinc_tty_write(struct tty_struct *tty, const u8 *buf,
-				size_t count)
-{
-	struct capiminor *mp = tty->driver_data;
-	struct sk_buff *skb;
-
-	pr_debug("capinc_tty_write(count=%zu)\n", count);
-
-	spin_lock_bh(&mp->outlock);
-	skb = mp->outskb;
-	if (skb) {
-		mp->outskb = NULL;
-		__skb_queue_tail(&mp->outqueue, skb);
-		mp->outbytes += skb->len;
-	}
-
-	skb = alloc_skb(CAPI_DATA_B3_REQ_LEN + count, GFP_ATOMIC);
-	if (!skb) {
-		printk(KERN_ERR "capinc_tty_write: alloc_skb failed\n");
-		spin_unlock_bh(&mp->outlock);
-		return -ENOMEM;
-	}
-
-	skb_reserve(skb, CAPI_DATA_B3_REQ_LEN);
-	skb_put_data(skb, buf, count);
-
-	__skb_queue_tail(&mp->outqueue, skb);
-	mp->outbytes += skb->len;
-	spin_unlock_bh(&mp->outlock);
-
-	handle_minor_send(mp);
-
-	return count;
-}
-
-static int capinc_tty_put_char(struct tty_struct *tty, u8 ch)
-{
-	struct capiminor *mp = tty->driver_data;
-	bool invoke_send = false;
-	struct sk_buff *skb;
-	int ret = 1;
-
-	pr_debug("capinc_put_char(%u)\n", ch);
-
-	spin_lock_bh(&mp->outlock);
-	skb = mp->outskb;
-	if (skb) {
-		if (skb_tailroom(skb) > 0) {
-			skb_put_u8(skb, ch);
-			goto unlock_out;
-		}
-		mp->outskb = NULL;
-		__skb_queue_tail(&mp->outqueue, skb);
-		mp->outbytes += skb->len;
-		invoke_send = true;
-	}
-
-	skb = alloc_skb(CAPI_DATA_B3_REQ_LEN + CAPI_MAX_BLKSIZE, GFP_ATOMIC);
-	if (skb) {
-		skb_reserve(skb, CAPI_DATA_B3_REQ_LEN);
-		skb_put_u8(skb, ch);
-		mp->outskb = skb;
-	} else {
-		printk(KERN_ERR "capinc_put_char: char %u lost\n", ch);
-		ret = 0;
-	}
-
-unlock_out:
-	spin_unlock_bh(&mp->outlock);
-
-	if (invoke_send)
-		handle_minor_send(mp);
-
-	return ret;
-}
-
-static void capinc_tty_flush_chars(struct tty_struct *tty)
-{
-	struct capiminor *mp = tty->driver_data;
-	struct sk_buff *skb;
-
-	spin_lock_bh(&mp->outlock);
-	skb = mp->outskb;
-	if (skb) {
-		mp->outskb = NULL;
-		__skb_queue_tail(&mp->outqueue, skb);
-		mp->outbytes += skb->len;
-		spin_unlock_bh(&mp->outlock);
-
-		handle_minor_send(mp);
-	} else
-		spin_unlock_bh(&mp->outlock);
-
-	handle_minor_recv(mp);
-}
-
-static unsigned int capinc_tty_write_room(struct tty_struct *tty)
-{
-	struct capiminor *mp = tty->driver_data;
-	unsigned int room;
-
-	room = CAPINC_MAX_SENDQUEUE-skb_queue_len(&mp->outqueue);
-	room *= CAPI_MAX_BLKSIZE;
-	pr_debug("capinc_tty_write_room = %u\n", room);
-	return room;
-}
-
-static unsigned int capinc_tty_chars_in_buffer(struct tty_struct *tty)
-{
-	struct capiminor *mp = tty->driver_data;
-
-	pr_debug("capinc_tty_chars_in_buffer = %d nack=%d sq=%d rq=%d\n",
-		 mp->outbytes, mp->nack,
-		 skb_queue_len(&mp->outqueue),
-		 skb_queue_len(&mp->inqueue));
-	return mp->outbytes;
-}
-
-static void capinc_tty_throttle(struct tty_struct *tty)
-{
-	struct capiminor *mp = tty->driver_data;
-	mp->ttyinstop = 1;
-}
-
-static void capinc_tty_unthrottle(struct tty_struct *tty)
-{
-	struct capiminor *mp = tty->driver_data;
-
-	mp->ttyinstop = 0;
-	handle_minor_recv(mp);
-}
-
-static void capinc_tty_stop(struct tty_struct *tty)
-{
-	struct capiminor *mp = tty->driver_data;
-
-	mp->ttyoutstop = 1;
-}
-
-static void capinc_tty_start(struct tty_struct *tty)
-{
-	struct capiminor *mp = tty->driver_data;
-
-	mp->ttyoutstop = 0;
-	handle_minor_send(mp);
-}
-
-static void capinc_tty_hangup(struct tty_struct *tty)
-{
-	struct capiminor *mp = tty->driver_data;
-
-	tty_port_hangup(&mp->port);
-}
-
-static void capinc_tty_send_xchar(struct tty_struct *tty, u8 ch)
-{
-	pr_debug("capinc_tty_send_xchar(%u)\n", ch);
-}
-
-static const struct tty_operations capinc_ops = {
-	.open = capinc_tty_open,
-	.close = capinc_tty_close,
-	.write = capinc_tty_write,
-	.put_char = capinc_tty_put_char,
-	.flush_chars = capinc_tty_flush_chars,
-	.write_room = capinc_tty_write_room,
-	.chars_in_buffer = capinc_tty_chars_in_buffer,
-	.throttle = capinc_tty_throttle,
-	.unthrottle = capinc_tty_unthrottle,
-	.stop = capinc_tty_stop,
-	.start = capinc_tty_start,
-	.hangup = capinc_tty_hangup,
-	.send_xchar = capinc_tty_send_xchar,
-	.install = capinc_tty_install,
-	.cleanup = capinc_tty_cleanup,
-};
-
-static int __init capinc_tty_init(void)
-{
-	struct tty_driver *drv;
-	int err;
-
-	if (capi_ttyminors > CAPINC_MAX_PORTS)
-		capi_ttyminors = CAPINC_MAX_PORTS;
-	if (capi_ttyminors <= 0)
-		capi_ttyminors = CAPINC_NR_PORTS;
-
-	capiminors = kzalloc_objs(struct capiminor *, capi_ttyminors);
-	if (!capiminors)
-		return -ENOMEM;
-
-	drv = tty_alloc_driver(capi_ttyminors, TTY_DRIVER_REAL_RAW |
-			TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_DYNAMIC_DEV);
-	if (IS_ERR(drv)) {
-		kfree(capiminors);
-		return PTR_ERR(drv);
-	}
-	drv->driver_name = "capi_nc";
-	drv->name = "capi!";
-	drv->major = 0;
-	drv->minor_start = 0;
-	drv->type = TTY_DRIVER_TYPE_SERIAL;
-	drv->subtype = SERIAL_TYPE_NORMAL;
-	drv->init_termios = tty_std_termios;
-	drv->init_termios.c_iflag = ICRNL;
-	drv->init_termios.c_oflag = OPOST | ONLCR;
-	drv->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
-	drv->init_termios.c_lflag = 0;
-	tty_set_operations(drv, &capinc_ops);
-
-	err = tty_register_driver(drv);
-	if (err) {
-		tty_driver_kref_put(drv);
-		kfree(capiminors);
-		printk(KERN_ERR "Couldn't register capi_nc driver\n");
-		return err;
-	}
-	capinc_tty_driver = drv;
-	return 0;
-}
-
-static void __exit capinc_tty_exit(void)
-{
-	tty_unregister_driver(capinc_tty_driver);
-	tty_driver_kref_put(capinc_tty_driver);
-	kfree(capiminors);
-}
-
-#else /* !CONFIG_ISDN_CAPI_MIDDLEWARE */
-
-static inline int capinc_tty_init(void)
-{
-	return 0;
-}
-
-static inline void capinc_tty_exit(void) { }
-
-#endif /* !CONFIG_ISDN_CAPI_MIDDLEWARE */
-
-/* -------- /proc functions ----------------------------------------- */
-
-/*
- * /proc/capi/capi20:
- *  minor applid nrecvctlpkt nrecvdatapkt nsendctlpkt nsenddatapkt
- */
-static int __maybe_unused capi20_proc_show(struct seq_file *m, void *v)
-{
-	struct capidev *cdev;
-	struct list_head *l;
-
-	mutex_lock(&capidev_list_lock);
-	list_for_each(l, &capidev_list) {
-		cdev = list_entry(l, struct capidev, list);
-		seq_printf(m, "0 %d %lu %lu %lu %lu\n",
-			   cdev->ap.applid,
-			   cdev->ap.nrecvctlpkt,
-			   cdev->ap.nrecvdatapkt,
-			   cdev->ap.nsentctlpkt,
-			   cdev->ap.nsentdatapkt);
-	}
-	mutex_unlock(&capidev_list_lock);
-	return 0;
-}
-
-/*
- * /proc/capi/capi20ncci:
- *  applid ncci
- */
-static int __maybe_unused capi20ncci_proc_show(struct seq_file *m, void *v)
-{
-	struct capidev *cdev;
-	struct capincci *np;
-
-	mutex_lock(&capidev_list_lock);
-	list_for_each_entry(cdev, &capidev_list, list) {
-		mutex_lock(&cdev->lock);
-		list_for_each_entry(np, &cdev->nccis, list)
-			seq_printf(m, "%d 0x%x\n", cdev->ap.applid, np->ncci);
-		mutex_unlock(&cdev->lock);
-	}
-	mutex_unlock(&capidev_list_lock);
-	return 0;
-}
-
-static void __init proc_init(void)
-{
-	proc_create_single("capi/capi20", 0, NULL, capi20_proc_show);
-	proc_create_single("capi/capi20ncci", 0, NULL, capi20ncci_proc_show);
-}
-
-static void __exit proc_exit(void)
-{
-	remove_proc_entry("capi/capi20", NULL);
-	remove_proc_entry("capi/capi20ncci", NULL);
-}
-
-/* -------- init function and module interface ---------------------- */
-
-
-static int __init capi_init(void)
-{
-	const char *compileinfo;
-	int major_ret;
-	int ret;
-
-	ret = kcapi_init();
-	if (ret)
-		return ret;
-
-	major_ret = register_chrdev(capi_major, "capi20", &capi_fops);
-	if (major_ret < 0) {
-		printk(KERN_ERR "capi20: unable to get major %d\n", capi_major);
-		kcapi_exit();
-		return major_ret;
-	}
-
-	ret = class_register(&capi_class);
-	if (ret) {
-		unregister_chrdev(capi_major, "capi20");
-		kcapi_exit();
-		return ret;
-	}
-
-	device_create(&capi_class, NULL, MKDEV(capi_major, 0), NULL, "capi20");
-
-	if (capinc_tty_init() < 0) {
-		device_destroy(&capi_class, MKDEV(capi_major, 0));
-		class_unregister(&capi_class);
-		unregister_chrdev(capi_major, "capi20");
-		kcapi_exit();
-		return -ENOMEM;
-	}
-
-	proc_init();
-
-#ifdef CONFIG_ISDN_CAPI_MIDDLEWARE
-	compileinfo = " (middleware)";
-#else
-	compileinfo = " (no middleware)";
-#endif
-	printk(KERN_NOTICE "CAPI 2.0 started up with major %d%s\n",
-	       capi_major, compileinfo);
-
-	return 0;
-}
-
-static void __exit capi_exit(void)
-{
-	proc_exit();
-
-	device_destroy(&capi_class, MKDEV(capi_major, 0));
-	class_unregister(&capi_class);
-	unregister_chrdev(capi_major, "capi20");
-
-	capinc_tty_exit();
-
-	kcapi_exit();
-}
-
-module_init(capi_init);
-module_exit(capi_exit);
diff --git a/drivers/isdn/capi/capiutil.c b/drivers/isdn/capi/capiutil.c
deleted file mode 100644
index eec9b36343b7..000000000000
--- a/drivers/isdn/capi/capiutil.c
+++ /dev/null
@@ -1,677 +0,0 @@
-/* $Id: capiutil.c,v 1.13.6.4 2001/09/23 22:24:33 kai Exp $
- *
- * CAPI 2.0 convert capi message to capi message struct
- *
- * From CAPI 2.0 Development Kit AVM 1995 (msg.c)
- * Rewritten for Linux 1996 by Carsten Paeth <calle@calle.de>
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/isdn/capiutil.h>
-#include <linux/slab.h>
-
-#include "kcapi.h"
-
-/* from CAPI2.0 DDK AVM Berlin GmbH */
-
-typedef struct {
-	int typ;
-	size_t off;
-} _cdef;
-
-#define _CBYTE	       1
-#define _CWORD	       2
-#define _CDWORD        3
-#define _CSTRUCT       4
-#define _CMSTRUCT      5
-#define _CEND	       6
-
-static _cdef cdef[] =
-{
-	/*00 */
-	{_CEND},
-	/*01 */
-	{_CEND},
-	/*02 */
-	{_CEND},
-	/*03 */
-	{_CDWORD, offsetof(_cmsg, adr.adrController)},
-	/*04 */
-	{_CMSTRUCT, offsetof(_cmsg, AdditionalInfo)},
-	/*05 */
-	{_CSTRUCT, offsetof(_cmsg, B1configuration)},
-	/*06 */
-	{_CWORD, offsetof(_cmsg, B1protocol)},
-	/*07 */
-	{_CSTRUCT, offsetof(_cmsg, B2configuration)},
-	/*08 */
-	{_CWORD, offsetof(_cmsg, B2protocol)},
-	/*09 */
-	{_CSTRUCT, offsetof(_cmsg, B3configuration)},
-	/*0a */
-	{_CWORD, offsetof(_cmsg, B3protocol)},
-	/*0b */
-	{_CSTRUCT, offsetof(_cmsg, BC)},
-	/*0c */
-	{_CSTRUCT, offsetof(_cmsg, BChannelinformation)},
-	/*0d */
-	{_CMSTRUCT, offsetof(_cmsg, BProtocol)},
-	/*0e */
-	{_CSTRUCT, offsetof(_cmsg, CalledPartyNumber)},
-	/*0f */
-	{_CSTRUCT, offsetof(_cmsg, CalledPartySubaddress)},
-	/*10 */
-	{_CSTRUCT, offsetof(_cmsg, CallingPartyNumber)},
-	/*11 */
-	{_CSTRUCT, offsetof(_cmsg, CallingPartySubaddress)},
-	/*12 */
-	{_CDWORD, offsetof(_cmsg, CIPmask)},
-	/*13 */
-	{_CDWORD, offsetof(_cmsg, CIPmask2)},
-	/*14 */
-	{_CWORD, offsetof(_cmsg, CIPValue)},
-	/*15 */
-	{_CDWORD, offsetof(_cmsg, Class)},
-	/*16 */
-	{_CSTRUCT, offsetof(_cmsg, ConnectedNumber)},
-	/*17 */
-	{_CSTRUCT, offsetof(_cmsg, ConnectedSubaddress)},
-	/*18 */
-	{_CDWORD, offsetof(_cmsg, Data)},
-	/*19 */
-	{_CWORD, offsetof(_cmsg, DataHandle)},
-	/*1a */
-	{_CWORD, offsetof(_cmsg, DataLength)},
-	/*1b */
-	{_CSTRUCT, offsetof(_cmsg, FacilityConfirmationParameter)},
-	/*1c */
-	{_CSTRUCT, offsetof(_cmsg, Facilitydataarray)},
-	/*1d */
-	{_CSTRUCT, offsetof(_cmsg, FacilityIndicationParameter)},
-	/*1e */
-	{_CSTRUCT, offsetof(_cmsg, FacilityRequestParameter)},
-	/*1f */
-	{_CWORD, offsetof(_cmsg, FacilitySelector)},
-	/*20 */
-	{_CWORD, offsetof(_cmsg, Flags)},
-	/*21 */
-	{_CDWORD, offsetof(_cmsg, Function)},
-	/*22 */
-	{_CSTRUCT, offsetof(_cmsg, HLC)},
-	/*23 */
-	{_CWORD, offsetof(_cmsg, Info)},
-	/*24 */
-	{_CSTRUCT, offsetof(_cmsg, InfoElement)},
-	/*25 */
-	{_CDWORD, offsetof(_cmsg, InfoMask)},
-	/*26 */
-	{_CWORD, offsetof(_cmsg, InfoNumber)},
-	/*27 */
-	{_CSTRUCT, offsetof(_cmsg, Keypadfacility)},
-	/*28 */
-	{_CSTRUCT, offsetof(_cmsg, LLC)},
-	/*29 */
-	{_CSTRUCT, offsetof(_cmsg, ManuData)},
-	/*2a */
-	{_CDWORD, offsetof(_cmsg, ManuID)},
-	/*2b */
-	{_CSTRUCT, offsetof(_cmsg, NCPI)},
-	/*2c */
-	{_CWORD, offsetof(_cmsg, Reason)},
-	/*2d */
-	{_CWORD, offsetof(_cmsg, Reason_B3)},
-	/*2e */
-	{_CWORD, offsetof(_cmsg, Reject)},
-	/*2f */
-	{_CSTRUCT, offsetof(_cmsg, Useruserdata)}
-};
-
-static unsigned char *cpars[] =
-{
-	/* ALERT_REQ */ [0x01] = "\x03\x04\x0c\x27\x2f\x1c\x01\x01",
-	/* CONNECT_REQ */ [0x02] = "\x03\x14\x0e\x10\x0f\x11\x0d\x06\x08\x0a\x05\x07\x09\x01\x0b\x28\x22\x04\x0c\x27\x2f\x1c\x01\x01",
-	/* DISCONNECT_REQ */ [0x04] = "\x03\x04\x0c\x27\x2f\x1c\x01\x01",
-	/* LISTEN_REQ */ [0x05] = "\x03\x25\x12\x13\x10\x11\x01",
-	/* INFO_REQ */ [0x08] = "\x03\x0e\x04\x0c\x27\x2f\x1c\x01\x01",
-	/* FACILITY_REQ */ [0x09] = "\x03\x1f\x1e\x01",
-	/* SELECT_B_PROTOCOL_REQ */ [0x0a] = "\x03\x0d\x06\x08\x0a\x05\x07\x09\x01\x01",
-	/* CONNECT_B3_REQ */ [0x0b] = "\x03\x2b\x01",
-	/* DISCONNECT_B3_REQ */ [0x0d] = "\x03\x2b\x01",
-	/* DATA_B3_REQ */ [0x0f] = "\x03\x18\x1a\x19\x20\x01",
-	/* RESET_B3_REQ */ [0x10] = "\x03\x2b\x01",
-	/* ALERT_CONF */ [0x13] = "\x03\x23\x01",
-	/* CONNECT_CONF */ [0x14] = "\x03\x23\x01",
-	/* DISCONNECT_CONF */ [0x16] = "\x03\x23\x01",
-	/* LISTEN_CONF */ [0x17] = "\x03\x23\x01",
-	/* MANUFACTURER_REQ */ [0x18] = "\x03\x2a\x15\x21\x29\x01",
-	/* INFO_CONF */ [0x1a] = "\x03\x23\x01",
-	/* FACILITY_CONF */ [0x1b] = "\x03\x23\x1f\x1b\x01",
-	/* SELECT_B_PROTOCOL_CONF */ [0x1c] = "\x03\x23\x01",
-	/* CONNECT_B3_CONF */ [0x1d] = "\x03\x23\x01",
-	/* DISCONNECT_B3_CONF */ [0x1f] = "\x03\x23\x01",
-	/* DATA_B3_CONF */ [0x21] = "\x03\x19\x23\x01",
-	/* RESET_B3_CONF */ [0x22] = "\x03\x23\x01",
-	/* CONNECT_IND */ [0x26] = "\x03\x14\x0e\x10\x0f\x11\x0b\x28\x22\x04\x0c\x27\x2f\x1c\x01\x01",
-	/* CONNECT_ACTIVE_IND */ [0x27] = "\x03\x16\x17\x28\x01",
-	/* DISCONNECT_IND */ [0x28] = "\x03\x2c\x01",
-	/* MANUFACTURER_CONF */ [0x2a] = "\x03\x2a\x15\x21\x29\x01",
-	/* INFO_IND */ [0x2c] = "\x03\x26\x24\x01",
-	/* FACILITY_IND */ [0x2d] = "\x03\x1f\x1d\x01",
-	/* CONNECT_B3_IND */ [0x2f] = "\x03\x2b\x01",
-	/* CONNECT_B3_ACTIVE_IND */ [0x30] = "\x03\x2b\x01",
-	/* DISCONNECT_B3_IND */ [0x31] = "\x03\x2d\x2b\x01",
-	/* DATA_B3_IND */ [0x33] = "\x03\x18\x1a\x19\x20\x01",
-	/* RESET_B3_IND */ [0x34] = "\x03\x2b\x01",
-	/* CONNECT_B3_T90_ACTIVE_IND */ [0x35] = "\x03\x2b\x01",
-	/* CONNECT_RESP */ [0x38] = "\x03\x2e\x0d\x06\x08\x0a\x05\x07\x09\x01\x16\x17\x28\x04\x0c\x27\x2f\x1c\x01\x01",
-	/* CONNECT_ACTIVE_RESP */ [0x39] = "\x03\x01",
-	/* DISCONNECT_RESP */ [0x3a] = "\x03\x01",
-	/* MANUFACTURER_IND */ [0x3c] = "\x03\x2a\x15\x21\x29\x01",
-	/* INFO_RESP */ [0x3e] = "\x03\x01",
-	/* FACILITY_RESP */ [0x3f] = "\x03\x1f\x01",
-	/* CONNECT_B3_RESP */ [0x41] = "\x03\x2e\x2b\x01",
-	/* CONNECT_B3_ACTIVE_RESP */ [0x42] = "\x03\x01",
-	/* DISCONNECT_B3_RESP */ [0x43] = "\x03\x01",
-	/* DATA_B3_RESP */ [0x45] = "\x03\x19\x01",
-	/* RESET_B3_RESP */ [0x46] = "\x03\x01",
-	/* CONNECT_B3_T90_ACTIVE_RESP */ [0x47] = "\x03\x01",
-	/* MANUFACTURER_RESP */ [0x4e] = "\x03\x2a\x15\x21\x29\x01",
-};
-
-/*-------------------------------------------------------*/
-
-#define byteTLcpy(x, y)         *(u8 *)(x) = *(u8 *)(y);
-#define wordTLcpy(x, y)         *(u16 *)(x) = *(u16 *)(y);
-#define dwordTLcpy(x, y)        memcpy(x, y, 4);
-#define structTLcpy(x, y, l)    memcpy(x, y, l)
-#define structTLcpyovl(x, y, l) memmove(x, y, l)
-
-#define byteTRcpy(x, y)         *(u8 *)(y) = *(u8 *)(x);
-#define wordTRcpy(x, y)         *(u16 *)(y) = *(u16 *)(x);
-#define dwordTRcpy(x, y)        memcpy(y, x, 4);
-#define structTRcpy(x, y, l)    memcpy(y, x, l)
-#define structTRcpyovl(x, y, l) memmove(y, x, l)
-
-/*-------------------------------------------------------*/
-static unsigned command_2_index(u8 c, u8 sc)
-{
-	if (c & 0x80)
-		c = 0x9 + (c & 0x0f);
-	else if (c == 0x41)
-		c = 0x9 + 0x1;
-	if (c > 0x18)
-		c = 0x00;
-	return (sc & 3) * (0x9 + 0x9) + c;
-}
-
-/**
- * capi_cmd2par() - find parameter string for CAPI 2.0 command/subcommand
- * @cmd:	command number
- * @subcmd:	subcommand number
- *
- * Return value: static string, NULL if command/subcommand unknown
- */
-
-static unsigned char *capi_cmd2par(u8 cmd, u8 subcmd)
-{
-	return cpars[command_2_index(cmd, subcmd)];
-}
-
-/*-------------------------------------------------------*/
-#define TYP (cdef[cmsg->par[cmsg->p]].typ)
-#define OFF (((u8 *)cmsg) + cdef[cmsg->par[cmsg->p]].off)
-
-static void jumpcstruct(_cmsg *cmsg)
-{
-	unsigned layer;
-	for (cmsg->p++, layer = 1; layer;) {
-		/* $$$$$ assert (cmsg->p); */
-		cmsg->p++;
-		switch (TYP) {
-		case _CMSTRUCT:
-			layer++;
-			break;
-		case _CEND:
-			layer--;
-			break;
-		}
-	}
-}
-
-/*-------------------------------------------------------*/
-
-static char *mnames[] =
-{
-	[0x01] = "ALERT_REQ",
-	[0x02] = "CONNECT_REQ",
-	[0x04] = "DISCONNECT_REQ",
-	[0x05] = "LISTEN_REQ",
-	[0x08] = "INFO_REQ",
-	[0x09] = "FACILITY_REQ",
-	[0x0a] = "SELECT_B_PROTOCOL_REQ",
-	[0x0b] = "CONNECT_B3_REQ",
-	[0x0d] = "DISCONNECT_B3_REQ",
-	[0x0f] = "DATA_B3_REQ",
-	[0x10] = "RESET_B3_REQ",
-	[0x13] = "ALERT_CONF",
-	[0x14] = "CONNECT_CONF",
-	[0x16] = "DISCONNECT_CONF",
-	[0x17] = "LISTEN_CONF",
-	[0x18] = "MANUFACTURER_REQ",
-	[0x1a] = "INFO_CONF",
-	[0x1b] = "FACILITY_CONF",
-	[0x1c] = "SELECT_B_PROTOCOL_CONF",
-	[0x1d] = "CONNECT_B3_CONF",
-	[0x1f] = "DISCONNECT_B3_CONF",
-	[0x21] = "DATA_B3_CONF",
-	[0x22] = "RESET_B3_CONF",
-	[0x26] = "CONNECT_IND",
-	[0x27] = "CONNECT_ACTIVE_IND",
-	[0x28] = "DISCONNECT_IND",
-	[0x2a] = "MANUFACTURER_CONF",
-	[0x2c] = "INFO_IND",
-	[0x2d] = "FACILITY_IND",
-	[0x2f] = "CONNECT_B3_IND",
-	[0x30] = "CONNECT_B3_ACTIVE_IND",
-	[0x31] = "DISCONNECT_B3_IND",
-	[0x33] = "DATA_B3_IND",
-	[0x34] = "RESET_B3_IND",
-	[0x35] = "CONNECT_B3_T90_ACTIVE_IND",
-	[0x38] = "CONNECT_RESP",
-	[0x39] = "CONNECT_ACTIVE_RESP",
-	[0x3a] = "DISCONNECT_RESP",
-	[0x3c] = "MANUFACTURER_IND",
-	[0x3e] = "INFO_RESP",
-	[0x3f] = "FACILITY_RESP",
-	[0x41] = "CONNECT_B3_RESP",
-	[0x42] = "CONNECT_B3_ACTIVE_RESP",
-	[0x43] = "DISCONNECT_B3_RESP",
-	[0x45] = "DATA_B3_RESP",
-	[0x46] = "RESET_B3_RESP",
-	[0x47] = "CONNECT_B3_T90_ACTIVE_RESP",
-	[0x4e] = "MANUFACTURER_RESP"
-};
-
-/**
- * capi_cmd2str() - convert CAPI 2.0 command/subcommand number to name
- * @cmd:	command number
- * @subcmd:	subcommand number
- *
- * Return value: static string
- */
-
-char *capi_cmd2str(u8 cmd, u8 subcmd)
-{
-	char *result;
-
-	result = mnames[command_2_index(cmd, subcmd)];
-	if (result == NULL)
-		result = "INVALID_COMMAND";
-	return result;
-}
-
-
-/*-------------------------------------------------------*/
-
-#ifdef CONFIG_CAPI_TRACE
-
-/*-------------------------------------------------------*/
-
-static char *pnames[] =
-{
-	/*00 */ NULL,
-	/*01 */ NULL,
-	/*02 */ NULL,
-	/*03 */ "Controller/PLCI/NCCI",
-	/*04 */ "AdditionalInfo",
-	/*05 */ "B1configuration",
-	/*06 */ "B1protocol",
-	/*07 */ "B2configuration",
-	/*08 */ "B2protocol",
-	/*09 */ "B3configuration",
-	/*0a */ "B3protocol",
-	/*0b */ "BC",
-	/*0c */ "BChannelinformation",
-	/*0d */ "BProtocol",
-	/*0e */ "CalledPartyNumber",
-	/*0f */ "CalledPartySubaddress",
-	/*10 */ "CallingPartyNumber",
-	/*11 */ "CallingPartySubaddress",
-	/*12 */ "CIPmask",
-	/*13 */ "CIPmask2",
-	/*14 */ "CIPValue",
-	/*15 */ "Class",
-	/*16 */ "ConnectedNumber",
-	/*17 */ "ConnectedSubaddress",
-	/*18 */ "Data32",
-	/*19 */ "DataHandle",
-	/*1a */ "DataLength",
-	/*1b */ "FacilityConfirmationParameter",
-	/*1c */ "Facilitydataarray",
-	/*1d */ "FacilityIndicationParameter",
-	/*1e */ "FacilityRequestParameter",
-	/*1f */ "FacilitySelector",
-	/*20 */ "Flags",
-	/*21 */ "Function",
-	/*22 */ "HLC",
-	/*23 */ "Info",
-	/*24 */ "InfoElement",
-	/*25 */ "InfoMask",
-	/*26 */ "InfoNumber",
-	/*27 */ "Keypadfacility",
-	/*28 */ "LLC",
-	/*29 */ "ManuData",
-	/*2a */ "ManuID",
-	/*2b */ "NCPI",
-	/*2c */ "Reason",
-	/*2d */ "Reason_B3",
-	/*2e */ "Reject",
-	/*2f */ "Useruserdata"
-};
-
-#include <linux/stdarg.h>
-
-/*-------------------------------------------------------*/
-static _cdebbuf *bufprint(_cdebbuf *cdb, char *fmt, ...)
-{
-	va_list f;
-	size_t n, r;
-
-	if (!cdb)
-		return NULL;
-	va_start(f, fmt);
-	r = cdb->size - cdb->pos;
-	n = vsnprintf(cdb->p, r, fmt, f);
-	va_end(f);
-	if (n >= r) {
-		/* truncated, need bigger buffer */
-		size_t ns = 2 * cdb->size;
-		u_char *nb;
-
-		while ((ns - cdb->pos) <= n)
-			ns *= 2;
-		nb = kmalloc(ns, GFP_ATOMIC);
-		if (!nb) {
-			cdebbuf_free(cdb);
-			return NULL;
-		}
-		memcpy(nb, cdb->buf, cdb->pos);
-		kfree(cdb->buf);
-		nb[cdb->pos] = 0;
-		cdb->buf = nb;
-		cdb->p = cdb->buf + cdb->pos;
-		cdb->size = ns;
-		va_start(f, fmt);
-		r = cdb->size - cdb->pos;
-		n = vsnprintf(cdb->p, r, fmt, f);
-		va_end(f);
-	}
-	cdb->p += n;
-	cdb->pos += n;
-	return cdb;
-}
-
-static _cdebbuf *printstructlen(_cdebbuf *cdb, u8 *m, unsigned len)
-{
-	unsigned hex = 0;
-
-	if (!cdb)
-		return NULL;
-	for (; len; len--, m++)
-		if (isalnum(*m) || *m == ' ') {
-			if (hex)
-				cdb = bufprint(cdb, ">");
-			cdb = bufprint(cdb, "%c", *m);
-			hex = 0;
-		} else {
-			if (!hex)
-				cdb = bufprint(cdb, "<%02x", *m);
-			else
-				cdb = bufprint(cdb, " %02x", *m);
-			hex = 1;
-		}
-	if (hex)
-		cdb = bufprint(cdb, ">");
-	return cdb;
-}
-
-static _cdebbuf *printstruct(_cdebbuf *cdb, u8 *m)
-{
-	unsigned len;
-
-	if (m[0] != 0xff) {
-		len = m[0];
-		m += 1;
-	} else {
-		len = ((u16 *) (m + 1))[0];
-		m += 3;
-	}
-	cdb = printstructlen(cdb, m, len);
-	return cdb;
-}
-
-/*-------------------------------------------------------*/
-#define NAME (pnames[cmsg->par[cmsg->p]])
-
-static _cdebbuf *protocol_message_2_pars(_cdebbuf *cdb, _cmsg *cmsg, int level)
-{
-	if (!cmsg->par)
-		return NULL;	/* invalid command/subcommand */
-
-	for (; TYP != _CEND; cmsg->p++) {
-		int slen = 29 + 3 - level;
-		int i;
-
-		if (!cdb)
-			return NULL;
-		cdb = bufprint(cdb, "  ");
-		for (i = 0; i < level - 1; i++)
-			cdb = bufprint(cdb, " ");
-
-		switch (TYP) {
-		case _CBYTE:
-			cdb = bufprint(cdb, "%-*s = 0x%x\n", slen, NAME, *(u8 *) (cmsg->m + cmsg->l));
-			cmsg->l++;
-			break;
-		case _CWORD:
-			cdb = bufprint(cdb, "%-*s = 0x%x\n", slen, NAME, *(u16 *) (cmsg->m + cmsg->l));
-			cmsg->l += 2;
-			break;
-		case _CDWORD:
-			cdb = bufprint(cdb, "%-*s = 0x%lx\n", slen, NAME, *(u32 *) (cmsg->m + cmsg->l));
-			cmsg->l += 4;
-			break;
-		case _CSTRUCT:
-			cdb = bufprint(cdb, "%-*s = ", slen, NAME);
-			if (cmsg->m[cmsg->l] == '\0')
-				cdb = bufprint(cdb, "default");
-			else
-				cdb = printstruct(cdb, cmsg->m + cmsg->l);
-			cdb = bufprint(cdb, "\n");
-			if (cmsg->m[cmsg->l] != 0xff)
-				cmsg->l += 1 + cmsg->m[cmsg->l];
-			else
-				cmsg->l += 3 + *(u16 *) (cmsg->m + cmsg->l + 1);
-
-			break;
-
-		case _CMSTRUCT:
-/*----- Metastruktur 0 -----*/
-			if (cmsg->m[cmsg->l] == '\0') {
-				cdb = bufprint(cdb, "%-*s = default\n", slen, NAME);
-				cmsg->l++;
-				jumpcstruct(cmsg);
-			} else {
-				char *name = NAME;
-				unsigned _l = cmsg->l;
-				cdb = bufprint(cdb, "%-*s\n", slen, name);
-				cmsg->l = (cmsg->m + _l)[0] == 255 ? cmsg->l + 3 : cmsg->l + 1;
-				cmsg->p++;
-				cdb = protocol_message_2_pars(cdb, cmsg, level + 1);
-			}
-			break;
-		}
-	}
-	return cdb;
-}
-/*-------------------------------------------------------*/
-
-static _cdebbuf *g_debbuf;
-static u_long g_debbuf_lock;
-static _cmsg *g_cmsg;
-
-static _cdebbuf *cdebbuf_alloc(void)
-{
-	_cdebbuf *cdb;
-
-	if (likely(!test_and_set_bit(1, &g_debbuf_lock))) {
-		cdb = g_debbuf;
-		goto init;
-	} else
-		cdb = kmalloc_obj(_cdebbuf, GFP_ATOMIC);
-	if (!cdb)
-		return NULL;
-	cdb->buf = kmalloc(CDEBUG_SIZE, GFP_ATOMIC);
-	if (!cdb->buf) {
-		kfree(cdb);
-		return NULL;
-	}
-	cdb->size = CDEBUG_SIZE;
-init:
-	cdb->buf[0] = 0;
-	cdb->p = cdb->buf;
-	cdb->pos = 0;
-	return cdb;
-}
-
-/**
- * cdebbuf_free() - free CAPI debug buffer
- * @cdb:	buffer to free
- */
-
-void cdebbuf_free(_cdebbuf *cdb)
-{
-	if (likely(cdb == g_debbuf)) {
-		test_and_clear_bit(1, &g_debbuf_lock);
-		return;
-	}
-	if (likely(cdb))
-		kfree(cdb->buf);
-	kfree(cdb);
-}
-
-
-/**
- * capi_message2str() - format CAPI 2.0 message for printing
- * @msg:	CAPI 2.0 message
- *
- * Allocates a CAPI debug buffer and fills it with a printable representation
- * of the CAPI 2.0 message in @msg.
- * Return value: allocated debug buffer, NULL on error
- * The returned buffer should be freed by a call to cdebbuf_free() after use.
- */
-
-_cdebbuf *capi_message2str(u8 *msg)
-{
-	_cdebbuf *cdb;
-	_cmsg	*cmsg;
-
-	cdb = cdebbuf_alloc();
-	if (unlikely(!cdb))
-		return NULL;
-	if (likely(cdb == g_debbuf))
-		cmsg = g_cmsg;
-	else
-		cmsg = kmalloc_obj(_cmsg, GFP_ATOMIC);
-	if (unlikely(!cmsg)) {
-		cdebbuf_free(cdb);
-		return NULL;
-	}
-	cmsg->m = msg;
-	cmsg->l = 8;
-	cmsg->p = 0;
-	byteTRcpy(cmsg->m + 4, &cmsg->Command);
-	byteTRcpy(cmsg->m + 5, &cmsg->Subcommand);
-	cmsg->par = capi_cmd2par(cmsg->Command, cmsg->Subcommand);
-
-	cdb = bufprint(cdb, "%-26s ID=%03d #0x%04x LEN=%04d\n",
-		       capi_cmd2str(cmsg->Command, cmsg->Subcommand),
-		       ((unsigned short *) msg)[1],
-		       ((unsigned short *) msg)[3],
-		       ((unsigned short *) msg)[0]);
-
-	cdb = protocol_message_2_pars(cdb, cmsg, 1);
-	if (unlikely(cmsg != g_cmsg))
-		kfree(cmsg);
-	return cdb;
-}
-
-int __init cdebug_init(void)
-{
-	g_cmsg = kmalloc_obj(_cmsg);
-	if (!g_cmsg)
-		return -ENOMEM;
-	g_debbuf = kmalloc_obj(_cdebbuf);
-	if (!g_debbuf) {
-		kfree(g_cmsg);
-		return -ENOMEM;
-	}
-	g_debbuf->buf = kmalloc(CDEBUG_GSIZE, GFP_KERNEL);
-	if (!g_debbuf->buf) {
-		kfree(g_cmsg);
-		kfree(g_debbuf);
-		return -ENOMEM;
-	}
-	g_debbuf->size = CDEBUG_GSIZE;
-	g_debbuf->buf[0] = 0;
-	g_debbuf->p = g_debbuf->buf;
-	g_debbuf->pos = 0;
-	return 0;
-}
-
-void cdebug_exit(void)
-{
-	if (g_debbuf)
-		kfree(g_debbuf->buf);
-	kfree(g_debbuf);
-	kfree(g_cmsg);
-}
-
-#else /* !CONFIG_CAPI_TRACE */
-
-static _cdebbuf g_debbuf = {"CONFIG_CAPI_TRACE not enabled", NULL, 0, 0};
-
-_cdebbuf *capi_message2str(u8 *msg)
-{
-	return &g_debbuf;
-}
-
-_cdebbuf *capi_cmsg2str(_cmsg *cmsg)
-{
-	return &g_debbuf;
-}
-
-void cdebbuf_free(_cdebbuf *cdb)
-{
-}
-
-int __init cdebug_init(void)
-{
-	return 0;
-}
-
-void cdebug_exit(void)
-{
-}
-
-#endif
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
deleted file mode 100644
index f9fa17d68095..000000000000
--- a/drivers/isdn/capi/kcapi.c
+++ /dev/null
@@ -1,933 +0,0 @@
-/* $Id: kcapi.c,v 1.1.2.8 2004/03/26 19:57:20 armin Exp $
- *
- * Kernel CAPI 2.0 Module
- *
- * Copyright 1999 by Carsten Paeth <calle@calle.de>
- * Copyright 2002 by Kai Germaschewski <kai@germaschewski.name>
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-#include "kcapi.h"
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/proc_fs.h>
-#include <linux/sched/signal.h>
-#include <linux/seq_file.h>
-#include <linux/skbuff.h>
-#include <linux/workqueue.h>
-#include <linux/capi.h>
-#include <linux/kernelcapi.h>
-#include <linux/init.h>
-#include <linux/moduleparam.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <linux/isdn/capicmd.h>
-#include <linux/isdn/capiutil.h>
-#include <linux/mutex.h>
-#include <linux/rcupdate.h>
-
-static int showcapimsgs;
-static struct workqueue_struct *kcapi_wq;
-
-module_param(showcapimsgs, uint, 0);
-
-/* ------------------------------------------------------------- */
-
-struct capictr_event {
-	struct work_struct work;
-	unsigned int type;
-	u32 controller;
-};
-
-/* ------------------------------------------------------------- */
-
-static const struct capi_version driver_version = {2, 0, 1, 1 << 4};
-static char driver_serial[CAPI_SERIAL_LEN] = "0004711";
-static char capi_manufakturer[64] = "AVM Berlin";
-
-#define NCCI2CTRL(ncci)    (((ncci) >> 24) & 0x7f)
-
-struct capi_ctr *capi_controller[CAPI_MAXCONTR];
-DEFINE_MUTEX(capi_controller_lock);
-
-struct capi20_appl *capi_applications[CAPI_MAXAPPL];
-
-static int ncontrollers;
-
-/* -------- controller ref counting -------------------------------------- */
-
-static inline struct capi_ctr *
-capi_ctr_get(struct capi_ctr *ctr)
-{
-	if (!try_module_get(ctr->owner))
-		return NULL;
-	return ctr;
-}
-
-static inline void
-capi_ctr_put(struct capi_ctr *ctr)
-{
-	module_put(ctr->owner);
-}
-
-/* ------------------------------------------------------------- */
-
-static inline struct capi_ctr *get_capi_ctr_by_nr(u16 contr)
-{
-	if (contr < 1 || contr - 1 >= CAPI_MAXCONTR)
-		return NULL;
-
-	return capi_controller[contr - 1];
-}
-
-static inline struct capi20_appl *__get_capi_appl_by_nr(u16 applid)
-{
-	lockdep_assert_held(&capi_controller_lock);
-
-	if (applid < 1 || applid - 1 >= CAPI_MAXAPPL)
-		return NULL;
-
-	return capi_applications[applid - 1];
-}
-
-static inline struct capi20_appl *get_capi_appl_by_nr(u16 applid)
-{
-	if (applid < 1 || applid - 1 >= CAPI_MAXAPPL)
-		return NULL;
-
-	return rcu_dereference(capi_applications[applid - 1]);
-}
-
-/* -------- util functions ------------------------------------ */
-
-static inline int capi_cmd_valid(u8 cmd)
-{
-	switch (cmd) {
-	case CAPI_ALERT:
-	case CAPI_CONNECT:
-	case CAPI_CONNECT_ACTIVE:
-	case CAPI_CONNECT_B3_ACTIVE:
-	case CAPI_CONNECT_B3:
-	case CAPI_CONNECT_B3_T90_ACTIVE:
-	case CAPI_DATA_B3:
-	case CAPI_DISCONNECT_B3:
-	case CAPI_DISCONNECT:
-	case CAPI_FACILITY:
-	case CAPI_INFO:
-	case CAPI_LISTEN:
-	case CAPI_MANUFACTURER:
-	case CAPI_RESET_B3:
-	case CAPI_SELECT_B_PROTOCOL:
-		return 1;
-	}
-	return 0;
-}
-
-static inline int capi_subcmd_valid(u8 subcmd)
-{
-	switch (subcmd) {
-	case CAPI_REQ:
-	case CAPI_CONF:
-	case CAPI_IND:
-	case CAPI_RESP:
-		return 1;
-	}
-	return 0;
-}
-
-/* ------------------------------------------------------------ */
-
-static void
-register_appl(struct capi_ctr *ctr, u16 applid, capi_register_params *rparam)
-{
-	ctr = capi_ctr_get(ctr);
-
-	if (ctr)
-		ctr->register_appl(ctr, applid, rparam);
-	else
-		printk(KERN_WARNING "%s: cannot get controller resources\n",
-		       __func__);
-}
-
-
-static void release_appl(struct capi_ctr *ctr, u16 applid)
-{
-	DBG("applid %#x", applid);
-
-	ctr->release_appl(ctr, applid);
-	capi_ctr_put(ctr);
-}
-
-static void notify_up(u32 contr)
-{
-	struct capi20_appl *ap;
-	struct capi_ctr *ctr;
-	u16 applid;
-
-	mutex_lock(&capi_controller_lock);
-
-	if (showcapimsgs & 1)
-		printk(KERN_DEBUG "kcapi: notify up contr %d\n", contr);
-
-	ctr = get_capi_ctr_by_nr(contr);
-	if (ctr) {
-		if (ctr->state == CAPI_CTR_RUNNING)
-			goto unlock_out;
-
-		ctr->state = CAPI_CTR_RUNNING;
-
-		for (applid = 1; applid <= CAPI_MAXAPPL; applid++) {
-			ap = __get_capi_appl_by_nr(applid);
-			if (ap)
-				register_appl(ctr, applid, &ap->rparam);
-		}
-	} else
-		printk(KERN_WARNING "%s: invalid contr %d\n", __func__, contr);
-
-unlock_out:
-	mutex_unlock(&capi_controller_lock);
-}
-
-static void ctr_down(struct capi_ctr *ctr, int new_state)
-{
-	struct capi20_appl *ap;
-	u16 applid;
-
-	if (ctr->state == CAPI_CTR_DETECTED || ctr->state == CAPI_CTR_DETACHED)
-		return;
-
-	ctr->state = new_state;
-
-	memset(ctr->manu, 0, sizeof(ctr->manu));
-	memset(&ctr->version, 0, sizeof(ctr->version));
-	memset(&ctr->profile, 0, sizeof(ctr->profile));
-	memset(ctr->serial, 0, sizeof(ctr->serial));
-
-	for (applid = 1; applid <= CAPI_MAXAPPL; applid++) {
-		ap = __get_capi_appl_by_nr(applid);
-		if (ap)
-			capi_ctr_put(ctr);
-	}
-}
-
-static void notify_down(u32 contr)
-{
-	struct capi_ctr *ctr;
-
-	mutex_lock(&capi_controller_lock);
-
-	if (showcapimsgs & 1)
-		printk(KERN_DEBUG "kcapi: notify down contr %d\n", contr);
-
-	ctr = get_capi_ctr_by_nr(contr);
-	if (ctr)
-		ctr_down(ctr, CAPI_CTR_DETECTED);
-	else
-		printk(KERN_WARNING "%s: invalid contr %d\n", __func__, contr);
-
-	mutex_unlock(&capi_controller_lock);
-}
-
-static void do_notify_work(struct work_struct *work)
-{
-	struct capictr_event *event =
-		container_of(work, struct capictr_event, work);
-
-	switch (event->type) {
-	case CAPICTR_UP:
-		notify_up(event->controller);
-		break;
-	case CAPICTR_DOWN:
-		notify_down(event->controller);
-		break;
-	}
-
-	kfree(event);
-}
-
-static int notify_push(unsigned int event_type, u32 controller)
-{
-	struct capictr_event *event = kmalloc_obj(*event, GFP_ATOMIC);
-
-	if (!event)
-		return -ENOMEM;
-
-	INIT_WORK(&event->work, do_notify_work);
-	event->type = event_type;
-	event->controller = controller;
-
-	queue_work(kcapi_wq, &event->work);
-	return 0;
-}
-
-/* -------- Receiver ------------------------------------------ */
-
-static void recv_handler(struct work_struct *work)
-{
-	struct sk_buff *skb;
-	struct capi20_appl *ap =
-		container_of(work, struct capi20_appl, recv_work);
-
-	if ((!ap) || (ap->release_in_progress))
-		return;
-
-	mutex_lock(&ap->recv_mtx);
-	while ((skb = skb_dequeue(&ap->recv_queue))) {
-		if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_IND)
-			ap->nrecvdatapkt++;
-		else
-			ap->nrecvctlpkt++;
-
-		ap->recv_message(ap, skb);
-	}
-	mutex_unlock(&ap->recv_mtx);
-}
-
-/**
- * capi_ctr_handle_message() - handle incoming CAPI message
- * @ctr:	controller descriptor structure.
- * @appl:	application ID.
- * @skb:	message.
- *
- * Called by hardware driver to pass a CAPI message to the application.
- */
-
-void capi_ctr_handle_message(struct capi_ctr *ctr, u16 appl,
-			     struct sk_buff *skb)
-{
-	struct capi20_appl *ap;
-	int showctl = 0;
-	u8 cmd, subcmd;
-	_cdebbuf *cdb;
-
-	if (ctr->state != CAPI_CTR_RUNNING) {
-		cdb = capi_message2str(skb->data);
-		if (cdb) {
-			printk(KERN_INFO "kcapi: controller [%03d] not active, got: %s",
-			       ctr->cnr, cdb->buf);
-			cdebbuf_free(cdb);
-		} else
-			printk(KERN_INFO "kcapi: controller [%03d] not active, cannot trace\n",
-			       ctr->cnr);
-		goto error;
-	}
-
-	cmd = CAPIMSG_COMMAND(skb->data);
-	subcmd = CAPIMSG_SUBCOMMAND(skb->data);
-	if (cmd == CAPI_DATA_B3 && subcmd == CAPI_IND) {
-		ctr->nrecvdatapkt++;
-		if (ctr->traceflag > 2)
-			showctl |= 2;
-	} else {
-		ctr->nrecvctlpkt++;
-		if (ctr->traceflag)
-			showctl |= 2;
-	}
-	showctl |= (ctr->traceflag & 1);
-	if (showctl & 2) {
-		if (showctl & 1) {
-			printk(KERN_DEBUG "kcapi: got [%03d] id#%d %s len=%u\n",
-			       ctr->cnr, CAPIMSG_APPID(skb->data),
-			       capi_cmd2str(cmd, subcmd),
-			       CAPIMSG_LEN(skb->data));
-		} else {
-			cdb = capi_message2str(skb->data);
-			if (cdb) {
-				printk(KERN_DEBUG "kcapi: got [%03d] %s\n",
-				       ctr->cnr, cdb->buf);
-				cdebbuf_free(cdb);
-			} else
-				printk(KERN_DEBUG "kcapi: got [%03d] id#%d %s len=%u, cannot trace\n",
-				       ctr->cnr, CAPIMSG_APPID(skb->data),
-				       capi_cmd2str(cmd, subcmd),
-				       CAPIMSG_LEN(skb->data));
-		}
-
-	}
-
-	rcu_read_lock();
-	ap = get_capi_appl_by_nr(CAPIMSG_APPID(skb->data));
-	if (!ap) {
-		rcu_read_unlock();
-		cdb = capi_message2str(skb->data);
-		if (cdb) {
-			printk(KERN_ERR "kcapi: handle_message: applid %d state released (%s)\n",
-			       CAPIMSG_APPID(skb->data), cdb->buf);
-			cdebbuf_free(cdb);
-		} else
-			printk(KERN_ERR "kcapi: handle_message: applid %d state released (%s) cannot trace\n",
-			       CAPIMSG_APPID(skb->data),
-			       capi_cmd2str(cmd, subcmd));
-		goto error;
-	}
-	skb_queue_tail(&ap->recv_queue, skb);
-	queue_work(kcapi_wq, &ap->recv_work);
-	rcu_read_unlock();
-
-	return;
-
-error:
-	kfree_skb(skb);
-}
-
-EXPORT_SYMBOL(capi_ctr_handle_message);
-
-/**
- * capi_ctr_ready() - signal CAPI controller ready
- * @ctr:	controller descriptor structure.
- *
- * Called by hardware driver to signal that the controller is up and running.
- */
-
-void capi_ctr_ready(struct capi_ctr *ctr)
-{
-	printk(KERN_NOTICE "kcapi: controller [%03d] \"%s\" ready.\n",
-	       ctr->cnr, ctr->name);
-
-	notify_push(CAPICTR_UP, ctr->cnr);
-}
-
-EXPORT_SYMBOL(capi_ctr_ready);
-
-/**
- * capi_ctr_down() - signal CAPI controller not ready
- * @ctr:	controller descriptor structure.
- *
- * Called by hardware driver to signal that the controller is down and
- * unavailable for use.
- */
-
-void capi_ctr_down(struct capi_ctr *ctr)
-{
-	printk(KERN_NOTICE "kcapi: controller [%03d] down.\n", ctr->cnr);
-
-	notify_push(CAPICTR_DOWN, ctr->cnr);
-}
-
-EXPORT_SYMBOL(capi_ctr_down);
-
-/* ------------------------------------------------------------- */
-
-/**
- * attach_capi_ctr() - register CAPI controller
- * @ctr:	controller descriptor structure.
- *
- * Called by hardware driver to register a controller with the CAPI subsystem.
- * Return value: 0 on success, error code < 0 on error
- */
-
-int attach_capi_ctr(struct capi_ctr *ctr)
-{
-	int i;
-
-	mutex_lock(&capi_controller_lock);
-
-	for (i = 0; i < CAPI_MAXCONTR; i++) {
-		if (!capi_controller[i])
-			break;
-	}
-	if (i == CAPI_MAXCONTR) {
-		mutex_unlock(&capi_controller_lock);
-		printk(KERN_ERR "kcapi: out of controller slots\n");
-		return -EBUSY;
-	}
-	capi_controller[i] = ctr;
-
-	ctr->nrecvctlpkt = 0;
-	ctr->nrecvdatapkt = 0;
-	ctr->nsentctlpkt = 0;
-	ctr->nsentdatapkt = 0;
-	ctr->cnr = i + 1;
-	ctr->state = CAPI_CTR_DETECTED;
-	ctr->blocked = 0;
-	ctr->traceflag = showcapimsgs;
-
-	sprintf(ctr->procfn, "capi/controllers/%d", ctr->cnr);
-	ctr->procent = proc_create_single_data(ctr->procfn, 0, NULL,
-			ctr->proc_show, ctr);
-
-	ncontrollers++;
-
-	mutex_unlock(&capi_controller_lock);
-
-	printk(KERN_NOTICE "kcapi: controller [%03d]: %s attached\n",
-	       ctr->cnr, ctr->name);
-	return 0;
-}
-
-EXPORT_SYMBOL(attach_capi_ctr);
-
-/**
- * detach_capi_ctr() - unregister CAPI controller
- * @ctr:	controller descriptor structure.
- *
- * Called by hardware driver to remove the registration of a controller
- * with the CAPI subsystem.
- * Return value: 0 on success, error code < 0 on error
- */
-
-int detach_capi_ctr(struct capi_ctr *ctr)
-{
-	int err = 0;
-
-	mutex_lock(&capi_controller_lock);
-
-	ctr_down(ctr, CAPI_CTR_DETACHED);
-
-	if (ctr->cnr < 1 || ctr->cnr - 1 >= CAPI_MAXCONTR) {
-		err = -EINVAL;
-		goto unlock_out;
-	}
-
-	if (capi_controller[ctr->cnr - 1] != ctr) {
-		err = -EINVAL;
-		goto unlock_out;
-	}
-	capi_controller[ctr->cnr - 1] = NULL;
-	ncontrollers--;
-
-	if (ctr->procent)
-		remove_proc_entry(ctr->procfn, NULL);
-
-	printk(KERN_NOTICE "kcapi: controller [%03d]: %s unregistered\n",
-	       ctr->cnr, ctr->name);
-
-unlock_out:
-	mutex_unlock(&capi_controller_lock);
-
-	return err;
-}
-
-EXPORT_SYMBOL(detach_capi_ctr);
-
-/* ------------------------------------------------------------- */
-/* -------- CAPI2.0 Interface ---------------------------------- */
-/* ------------------------------------------------------------- */
-
-/**
- * capi20_isinstalled() - CAPI 2.0 operation CAPI_INSTALLED
- *
- * Return value: CAPI result code (CAPI_NOERROR if at least one ISDN controller
- *	is ready for use, CAPI_REGNOTINSTALLED otherwise)
- */
-
-u16 capi20_isinstalled(void)
-{
-	u16 ret = CAPI_REGNOTINSTALLED;
-	int i;
-
-	mutex_lock(&capi_controller_lock);
-
-	for (i = 0; i < CAPI_MAXCONTR; i++)
-		if (capi_controller[i] &&
-		    capi_controller[i]->state == CAPI_CTR_RUNNING) {
-			ret = CAPI_NOERROR;
-			break;
-		}
-
-	mutex_unlock(&capi_controller_lock);
-
-	return ret;
-}
-
-/**
- * capi20_register() - CAPI 2.0 operation CAPI_REGISTER
- * @ap:		CAPI application descriptor structure.
- *
- * Register an application's presence with CAPI.
- * A unique application ID is assigned and stored in @ap->applid.
- * After this function returns successfully, the message receive
- * callback function @ap->recv_message() may be called at any time
- * until capi20_release() has been called for the same @ap.
- * Return value: CAPI result code
- */
-
-u16 capi20_register(struct capi20_appl *ap)
-{
-	int i;
-	u16 applid;
-
-	DBG("");
-
-	if (ap->rparam.datablklen < 128)
-		return CAPI_LOGBLKSIZETOSMALL;
-
-	ap->nrecvctlpkt = 0;
-	ap->nrecvdatapkt = 0;
-	ap->nsentctlpkt = 0;
-	ap->nsentdatapkt = 0;
-	mutex_init(&ap->recv_mtx);
-	skb_queue_head_init(&ap->recv_queue);
-	INIT_WORK(&ap->recv_work, recv_handler);
-	ap->release_in_progress = 0;
-
-	mutex_lock(&capi_controller_lock);
-
-	for (applid = 1; applid <= CAPI_MAXAPPL; applid++) {
-		if (capi_applications[applid - 1] == NULL)
-			break;
-	}
-	if (applid > CAPI_MAXAPPL) {
-		mutex_unlock(&capi_controller_lock);
-		return CAPI_TOOMANYAPPLS;
-	}
-
-	ap->applid = applid;
-	capi_applications[applid - 1] = ap;
-
-	for (i = 0; i < CAPI_MAXCONTR; i++) {
-		if (!capi_controller[i] ||
-		    capi_controller[i]->state != CAPI_CTR_RUNNING)
-			continue;
-		register_appl(capi_controller[i], applid, &ap->rparam);
-	}
-
-	mutex_unlock(&capi_controller_lock);
-
-	if (showcapimsgs & 1) {
-		printk(KERN_DEBUG "kcapi: appl %d up\n", applid);
-	}
-
-	return CAPI_NOERROR;
-}
-
-/**
- * capi20_release() - CAPI 2.0 operation CAPI_RELEASE
- * @ap:		CAPI application descriptor structure.
- *
- * Terminate an application's registration with CAPI.
- * After this function returns successfully, the message receive
- * callback function @ap->recv_message() will no longer be called.
- * Return value: CAPI result code
- */
-
-u16 capi20_release(struct capi20_appl *ap)
-{
-	int i;
-
-	DBG("applid %#x", ap->applid);
-
-	mutex_lock(&capi_controller_lock);
-
-	ap->release_in_progress = 1;
-	capi_applications[ap->applid - 1] = NULL;
-
-	synchronize_rcu();
-
-	for (i = 0; i < CAPI_MAXCONTR; i++) {
-		if (!capi_controller[i] ||
-		    capi_controller[i]->state != CAPI_CTR_RUNNING)
-			continue;
-		release_appl(capi_controller[i], ap->applid);
-	}
-
-	mutex_unlock(&capi_controller_lock);
-
-	flush_workqueue(kcapi_wq);
-	skb_queue_purge(&ap->recv_queue);
-
-	if (showcapimsgs & 1) {
-		printk(KERN_DEBUG "kcapi: appl %d down\n", ap->applid);
-	}
-
-	return CAPI_NOERROR;
-}
-
-/**
- * capi20_put_message() - CAPI 2.0 operation CAPI_PUT_MESSAGE
- * @ap:		CAPI application descriptor structure.
- * @skb:	CAPI message.
- *
- * Transfer a single message to CAPI.
- * Return value: CAPI result code
- */
-
-u16 capi20_put_message(struct capi20_appl *ap, struct sk_buff *skb)
-{
-	struct capi_ctr *ctr;
-	int showctl = 0;
-	u8 cmd, subcmd;
-
-	DBG("applid %#x", ap->applid);
-
-	if (ncontrollers == 0)
-		return CAPI_REGNOTINSTALLED;
-	if ((ap->applid == 0) || ap->release_in_progress)
-		return CAPI_ILLAPPNR;
-	if (skb->len < 12
-	    || !capi_cmd_valid(CAPIMSG_COMMAND(skb->data))
-	    || !capi_subcmd_valid(CAPIMSG_SUBCOMMAND(skb->data)))
-		return CAPI_ILLCMDORSUBCMDORMSGTOSMALL;
-
-	/*
-	 * The controller reference is protected by the existence of the
-	 * application passed to us. We assume that the caller properly
-	 * synchronizes this service with capi20_release.
-	 */
-	ctr = get_capi_ctr_by_nr(CAPIMSG_CONTROLLER(skb->data));
-	if (!ctr || ctr->state != CAPI_CTR_RUNNING)
-		return CAPI_REGNOTINSTALLED;
-	if (ctr->blocked)
-		return CAPI_SENDQUEUEFULL;
-
-	cmd = CAPIMSG_COMMAND(skb->data);
-	subcmd = CAPIMSG_SUBCOMMAND(skb->data);
-
-	if (cmd == CAPI_DATA_B3 && subcmd == CAPI_REQ) {
-		ctr->nsentdatapkt++;
-		ap->nsentdatapkt++;
-		if (ctr->traceflag > 2)
-			showctl |= 2;
-	} else {
-		ctr->nsentctlpkt++;
-		ap->nsentctlpkt++;
-		if (ctr->traceflag)
-			showctl |= 2;
-	}
-	showctl |= (ctr->traceflag & 1);
-	if (showctl & 2) {
-		if (showctl & 1) {
-			printk(KERN_DEBUG "kcapi: put [%03d] id#%d %s len=%u\n",
-			       CAPIMSG_CONTROLLER(skb->data),
-			       CAPIMSG_APPID(skb->data),
-			       capi_cmd2str(cmd, subcmd),
-			       CAPIMSG_LEN(skb->data));
-		} else {
-			_cdebbuf *cdb = capi_message2str(skb->data);
-			if (cdb) {
-				printk(KERN_DEBUG "kcapi: put [%03d] %s\n",
-				       CAPIMSG_CONTROLLER(skb->data),
-				       cdb->buf);
-				cdebbuf_free(cdb);
-			} else
-				printk(KERN_DEBUG "kcapi: put [%03d] id#%d %s len=%u cannot trace\n",
-				       CAPIMSG_CONTROLLER(skb->data),
-				       CAPIMSG_APPID(skb->data),
-				       capi_cmd2str(cmd, subcmd),
-				       CAPIMSG_LEN(skb->data));
-		}
-	}
-	return ctr->send_message(ctr, skb);
-}
-
-/**
- * capi20_get_manufacturer() - CAPI 2.0 operation CAPI_GET_MANUFACTURER
- * @contr:	controller number.
- * @buf:	result buffer (64 bytes).
- *
- * Retrieve information about the manufacturer of the specified ISDN controller
- * or (for @contr == 0) the driver itself.
- * Return value: CAPI result code
- */
-
-u16 capi20_get_manufacturer(u32 contr, u8 buf[CAPI_MANUFACTURER_LEN])
-{
-	struct capi_ctr *ctr;
-	u16 ret;
-
-	if (contr == 0) {
-		strscpy_pad(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN);
-		return CAPI_NOERROR;
-	}
-
-	mutex_lock(&capi_controller_lock);
-
-	ctr = get_capi_ctr_by_nr(contr);
-	if (ctr && ctr->state == CAPI_CTR_RUNNING) {
-		strscpy_pad(buf, ctr->manu, CAPI_MANUFACTURER_LEN);
-		ret = CAPI_NOERROR;
-	} else
-		ret = CAPI_REGNOTINSTALLED;
-
-	mutex_unlock(&capi_controller_lock);
-	return ret;
-}
-
-/**
- * capi20_get_version() - CAPI 2.0 operation CAPI_GET_VERSION
- * @contr:	controller number.
- * @verp:	result structure.
- *
- * Retrieve version information for the specified ISDN controller
- * or (for @contr == 0) the driver itself.
- * Return value: CAPI result code
- */
-
-u16 capi20_get_version(u32 contr, struct capi_version *verp)
-{
-	struct capi_ctr *ctr;
-	u16 ret;
-
-	if (contr == 0) {
-		*verp = driver_version;
-		return CAPI_NOERROR;
-	}
-
-	mutex_lock(&capi_controller_lock);
-
-	ctr = get_capi_ctr_by_nr(contr);
-	if (ctr && ctr->state == CAPI_CTR_RUNNING) {
-		memcpy(verp, &ctr->version, sizeof(capi_version));
-		ret = CAPI_NOERROR;
-	} else
-		ret = CAPI_REGNOTINSTALLED;
-
-	mutex_unlock(&capi_controller_lock);
-	return ret;
-}
-
-/**
- * capi20_get_serial() - CAPI 2.0 operation CAPI_GET_SERIAL_NUMBER
- * @contr:	controller number.
- * @serial:	result buffer (8 bytes).
- *
- * Retrieve the serial number of the specified ISDN controller
- * or (for @contr == 0) the driver itself.
- * Return value: CAPI result code
- */
-
-u16 capi20_get_serial(u32 contr, u8 serial[CAPI_SERIAL_LEN])
-{
-	struct capi_ctr *ctr;
-	u16 ret;
-
-	if (contr == 0) {
-		strscpy(serial, driver_serial, CAPI_SERIAL_LEN);
-		return CAPI_NOERROR;
-	}
-
-	mutex_lock(&capi_controller_lock);
-
-	ctr = get_capi_ctr_by_nr(contr);
-	if (ctr && ctr->state == CAPI_CTR_RUNNING) {
-		strscpy(serial, ctr->serial, CAPI_SERIAL_LEN);
-		ret = CAPI_NOERROR;
-	} else
-		ret = CAPI_REGNOTINSTALLED;
-
-	mutex_unlock(&capi_controller_lock);
-	return ret;
-}
-
-/**
- * capi20_get_profile() - CAPI 2.0 operation CAPI_GET_PROFILE
- * @contr:	controller number.
- * @profp:	result structure.
- *
- * Retrieve capability information for the specified ISDN controller
- * or (for @contr == 0) the number of installed controllers.
- * Return value: CAPI result code
- */
-
-u16 capi20_get_profile(u32 contr, struct capi_profile *profp)
-{
-	struct capi_ctr *ctr;
-	u16 ret;
-
-	if (contr == 0) {
-		profp->ncontroller = ncontrollers;
-		return CAPI_NOERROR;
-	}
-
-	mutex_lock(&capi_controller_lock);
-
-	ctr = get_capi_ctr_by_nr(contr);
-	if (ctr && ctr->state == CAPI_CTR_RUNNING) {
-		memcpy(profp, &ctr->profile, sizeof(struct capi_profile));
-		ret = CAPI_NOERROR;
-	} else
-		ret = CAPI_REGNOTINSTALLED;
-
-	mutex_unlock(&capi_controller_lock);
-	return ret;
-}
-
-/**
- * capi20_manufacturer() - CAPI 2.0 operation CAPI_MANUFACTURER
- * @cmd:	command.
- * @data:	parameter.
- *
- * Perform manufacturer specific command.
- * Return value: CAPI result code
- */
-
-int capi20_manufacturer(unsigned long cmd, void __user *data)
-{
-	struct capi_ctr *ctr;
-	int retval;
-
-	switch (cmd) {
-	case KCAPI_CMD_TRACE:
-	{
-		kcapi_flagdef fdef;
-
-		if (copy_from_user(&fdef, data, sizeof(kcapi_flagdef)))
-			return -EFAULT;
-
-		mutex_lock(&capi_controller_lock);
-
-		ctr = get_capi_ctr_by_nr(fdef.contr);
-		if (ctr) {
-			ctr->traceflag = fdef.flag;
-			printk(KERN_INFO "kcapi: contr [%03d] set trace=%d\n",
-			       ctr->cnr, ctr->traceflag);
-			retval = 0;
-		} else
-			retval = -ESRCH;
-
-		mutex_unlock(&capi_controller_lock);
-
-		return retval;
-	}
-
-	default:
-		printk(KERN_ERR "kcapi: manufacturer command %lu unknown.\n",
-		       cmd);
-		break;
-
-	}
-	return -EINVAL;
-}
-
-/* ------------------------------------------------------------- */
-/* -------- Init & Cleanup ------------------------------------- */
-/* ------------------------------------------------------------- */
-
-/*
- * init / exit functions
- */
-
-int __init kcapi_init(void)
-{
-	int err;
-
-	kcapi_wq = alloc_workqueue("kcapi", WQ_PERCPU, 0);
-	if (!kcapi_wq)
-		return -ENOMEM;
-
-	err = cdebug_init();
-	if (err) {
-		destroy_workqueue(kcapi_wq);
-		return err;
-	}
-
-	if (IS_ENABLED(CONFIG_PROC_FS))
-		kcapi_proc_init();
-
-	return 0;
-}
-
-void kcapi_exit(void)
-{
-	if (IS_ENABLED(CONFIG_PROC_FS))
-		kcapi_proc_exit();
-
-	cdebug_exit();
-	destroy_workqueue(kcapi_wq);
-}
diff --git a/drivers/isdn/capi/kcapi.h b/drivers/isdn/capi/kcapi.h
deleted file mode 100644
index 479623e1db2a..000000000000
--- a/drivers/isdn/capi/kcapi.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Kernel CAPI 2.0 Module
- *
- * Copyright 1999 by Carsten Paeth <calle@calle.de>
- * Copyright 2002 by Kai Germaschewski <kai@germaschewski.name>
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/list.h>
-#include <linux/isdn/capilli.h>
-
-#ifdef KCAPI_DEBUG
-#define DBG(format, arg...) do {					\
-		printk(KERN_DEBUG "%s: " format "\n" , __func__ , ## arg); \
-	} while (0)
-#else
-#define DBG(format, arg...) /* */
-#endif
-
-enum {
-	CAPI_CTR_DETACHED = 0,
-	CAPI_CTR_DETECTED = 1,
-	CAPI_CTR_LOADING  = 2,
-	CAPI_CTR_RUNNING  = 3,
-};
-
-extern struct capi_ctr *capi_controller[CAPI_MAXCONTR];
-extern struct mutex capi_controller_lock;
-
-extern struct capi20_appl *capi_applications[CAPI_MAXAPPL];
-
-void kcapi_proc_init(void);
-void kcapi_proc_exit(void);
-
-struct capi20_appl {
-	u16 applid;
-	capi_register_params rparam;
-	void (*recv_message)(struct capi20_appl *ap, struct sk_buff *skb);
-	void *private;
-
-	/* internal to kernelcapi.o */
-	unsigned long nrecvctlpkt;
-	unsigned long nrecvdatapkt;
-	unsigned long nsentctlpkt;
-	unsigned long nsentdatapkt;
-	struct mutex recv_mtx;
-	struct sk_buff_head recv_queue;
-	struct work_struct recv_work;
-	int release_in_progress;
-};
-
-u16 capi20_isinstalled(void);
-u16 capi20_register(struct capi20_appl *ap);
-u16 capi20_release(struct capi20_appl *ap);
-u16 capi20_put_message(struct capi20_appl *ap, struct sk_buff *skb);
-u16 capi20_get_manufacturer(u32 contr, u8 buf[CAPI_MANUFACTURER_LEN]);
-u16 capi20_get_version(u32 contr, struct capi_version *verp);
-u16 capi20_get_serial(u32 contr, u8 serial[CAPI_SERIAL_LEN]);
-u16 capi20_get_profile(u32 contr, struct capi_profile *profp);
-int capi20_manufacturer(unsigned long cmd, void __user *data);
-
-#define CAPICTR_UP			0
-#define CAPICTR_DOWN			1
-
-int kcapi_init(void);
-void kcapi_exit(void);
-
-/*----- basic-type definitions -----*/
-
-typedef __u8 *_cstruct;
-
-typedef enum {
-	CAPI_COMPOSE,
-	CAPI_DEFAULT
-} _cmstruct;
-
-/*
-   The _cmsg structure contains all possible CAPI 2.0 parameter.
-   All parameters are stored here first. The function CAPI_CMSG_2_MESSAGE
-   assembles the parameter and builds CAPI2.0 conform messages.
-   CAPI_MESSAGE_2_CMSG disassembles CAPI 2.0 messages and stores the
-   parameter in the _cmsg structure
- */
-
-typedef struct {
-	/* Header */
-	__u16 ApplId;
-	__u8 Command;
-	__u8 Subcommand;
-	__u16 Messagenumber;
-
-	/* Parameter */
-	union {
-		__u32 adrController;
-		__u32 adrPLCI;
-		__u32 adrNCCI;
-	} adr;
-
-	_cmstruct AdditionalInfo;
-	_cstruct B1configuration;
-	__u16 B1protocol;
-	_cstruct B2configuration;
-	__u16 B2protocol;
-	_cstruct B3configuration;
-	__u16 B3protocol;
-	_cstruct BC;
-	_cstruct BChannelinformation;
-	_cmstruct BProtocol;
-	_cstruct CalledPartyNumber;
-	_cstruct CalledPartySubaddress;
-	_cstruct CallingPartyNumber;
-	_cstruct CallingPartySubaddress;
-	__u32 CIPmask;
-	__u32 CIPmask2;
-	__u16 CIPValue;
-	__u32 Class;
-	_cstruct ConnectedNumber;
-	_cstruct ConnectedSubaddress;
-	__u32 Data;
-	__u16 DataHandle;
-	__u16 DataLength;
-	_cstruct FacilityConfirmationParameter;
-	_cstruct Facilitydataarray;
-	_cstruct FacilityIndicationParameter;
-	_cstruct FacilityRequestParameter;
-	__u16 FacilitySelector;
-	__u16 Flags;
-	__u32 Function;
-	_cstruct HLC;
-	__u16 Info;
-	_cstruct InfoElement;
-	__u32 InfoMask;
-	__u16 InfoNumber;
-	_cstruct Keypadfacility;
-	_cstruct LLC;
-	_cstruct ManuData;
-	__u32 ManuID;
-	_cstruct NCPI;
-	__u16 Reason;
-	__u16 Reason_B3;
-	__u16 Reject;
-	_cstruct Useruserdata;
-
-	/* intern */
-	unsigned l, p;
-	unsigned char *par;
-	__u8 *m;
-
-	/* buffer to construct message */
-	__u8 buf[180];
-
-} _cmsg;
-
-/*-----------------------------------------------------------------------*/
-
-/*
- * Debugging / Tracing functions
- */
-
-char *capi_cmd2str(__u8 cmd, __u8 subcmd);
-
-typedef struct {
-	u_char	*buf;
-	u_char	*p;
-	size_t	size;
-	size_t	pos;
-} _cdebbuf;
-
-#define	CDEBUG_SIZE	1024
-#define	CDEBUG_GSIZE	4096
-
-void cdebbuf_free(_cdebbuf *cdb);
-int cdebug_init(void);
-void cdebug_exit(void);
-
-_cdebbuf *capi_message2str(__u8 *msg);
diff --git a/drivers/isdn/capi/kcapi_proc.c b/drivers/isdn/capi/kcapi_proc.c
deleted file mode 100644
index 77e951206809..000000000000
--- a/drivers/isdn/capi/kcapi_proc.c
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * Kernel CAPI 2.0 Module - /proc/capi handling
- *
- * Copyright 1999 by Carsten Paeth <calle@calle.de>
- * Copyright 2002 by Kai Germaschewski <kai@germaschewski.name>
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-
-#include "kcapi.h"
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/export.h>
-
-static char *state2str(unsigned short state)
-{
-	switch (state) {
-	case CAPI_CTR_DETECTED:	return "detected";
-	case CAPI_CTR_LOADING:	return "loading";
-	case CAPI_CTR_RUNNING:	return "running";
-	default:	        return "???";
-	}
-}
-
-// /proc/capi
-// ===========================================================================
-
-// /proc/capi/controller:
-//      cnr driver cardstate name driverinfo
-// /proc/capi/contrstats:
-//      cnr nrecvctlpkt nrecvdatapkt nsentctlpkt nsentdatapkt
-// ---------------------------------------------------------------------------
-
-static void *controller_start(struct seq_file *seq, loff_t *pos)
-	__acquires(capi_controller_lock)
-{
-	mutex_lock(&capi_controller_lock);
-
-	if (*pos < CAPI_MAXCONTR)
-		return &capi_controller[*pos];
-
-	return NULL;
-}
-
-static void *controller_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-	if (*pos < CAPI_MAXCONTR)
-		return &capi_controller[*pos];
-
-	return NULL;
-}
-
-static void controller_stop(struct seq_file *seq, void *v)
-	__releases(capi_controller_lock)
-{
-	mutex_unlock(&capi_controller_lock);
-}
-
-static int controller_show(struct seq_file *seq, void *v)
-{
-	struct capi_ctr *ctr = *(struct capi_ctr **) v;
-
-	if (!ctr)
-		return 0;
-
-	seq_printf(seq, "%d %-10s %-8s %-16s %s\n",
-		   ctr->cnr, ctr->driver_name,
-		   state2str(ctr->state),
-		   ctr->name,
-		   ctr->procinfo ?  ctr->procinfo(ctr) : "");
-
-	return 0;
-}
-
-static int contrstats_show(struct seq_file *seq, void *v)
-{
-	struct capi_ctr *ctr = *(struct capi_ctr **) v;
-
-	if (!ctr)
-		return 0;
-
-	seq_printf(seq, "%d %lu %lu %lu %lu\n",
-		   ctr->cnr,
-		   ctr->nrecvctlpkt,
-		   ctr->nrecvdatapkt,
-		   ctr->nsentctlpkt,
-		   ctr->nsentdatapkt);
-
-	return 0;
-}
-
-static const struct seq_operations seq_controller_ops = {
-	.start	= controller_start,
-	.next	= controller_next,
-	.stop	= controller_stop,
-	.show	= controller_show,
-};
-
-static const struct seq_operations seq_contrstats_ops = {
-	.start	= controller_start,
-	.next	= controller_next,
-	.stop	= controller_stop,
-	.show	= contrstats_show,
-};
-
-// /proc/capi/applications:
-//      applid l3cnt dblkcnt dblklen #ncci recvqueuelen
-// /proc/capi/applstats:
-//      applid nrecvctlpkt nrecvdatapkt nsentctlpkt nsentdatapkt
-// ---------------------------------------------------------------------------
-
-static void *applications_start(struct seq_file *seq, loff_t *pos)
-	__acquires(capi_controller_lock)
-{
-	mutex_lock(&capi_controller_lock);
-
-	if (*pos < CAPI_MAXAPPL)
-		return &capi_applications[*pos];
-
-	return NULL;
-}
-
-static void *
-applications_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-	if (*pos < CAPI_MAXAPPL)
-		return &capi_applications[*pos];
-
-	return NULL;
-}
-
-static void applications_stop(struct seq_file *seq, void *v)
-	__releases(capi_controller_lock)
-{
-	mutex_unlock(&capi_controller_lock);
-}
-
-static int
-applications_show(struct seq_file *seq, void *v)
-{
-	struct capi20_appl *ap = *(struct capi20_appl **) v;
-
-	if (!ap)
-		return 0;
-
-	seq_printf(seq, "%u %d %d %d\n",
-		   ap->applid,
-		   ap->rparam.level3cnt,
-		   ap->rparam.datablkcnt,
-		   ap->rparam.datablklen);
-
-	return 0;
-}
-
-static int
-applstats_show(struct seq_file *seq, void *v)
-{
-	struct capi20_appl *ap = *(struct capi20_appl **) v;
-
-	if (!ap)
-		return 0;
-
-	seq_printf(seq, "%u %lu %lu %lu %lu\n",
-		   ap->applid,
-		   ap->nrecvctlpkt,
-		   ap->nrecvdatapkt,
-		   ap->nsentctlpkt,
-		   ap->nsentdatapkt);
-
-	return 0;
-}
-
-static const struct seq_operations seq_applications_ops = {
-	.start	= applications_start,
-	.next	= applications_next,
-	.stop	= applications_stop,
-	.show	= applications_show,
-};
-
-static const struct seq_operations seq_applstats_ops = {
-	.start	= applications_start,
-	.next	= applications_next,
-	.stop	= applications_stop,
-	.show	= applstats_show,
-};
-
-// ---------------------------------------------------------------------------
-
-/* /proc/capi/drivers is always empty */
-static ssize_t empty_read(struct file *file, char __user *buf,
-			  size_t size, loff_t *off)
-{
-	return 0;
-}
-
-static const struct proc_ops empty_proc_ops = {
-	.proc_read	= empty_read,
-	.proc_lseek	= default_llseek,
-};
-
-// ---------------------------------------------------------------------------
-
-void __init
-kcapi_proc_init(void)
-{
-	proc_mkdir("capi",             NULL);
-	proc_mkdir("capi/controllers", NULL);
-	proc_create_seq("capi/controller",   0, NULL, &seq_controller_ops);
-	proc_create_seq("capi/contrstats",   0, NULL, &seq_contrstats_ops);
-	proc_create_seq("capi/applications", 0, NULL, &seq_applications_ops);
-	proc_create_seq("capi/applstats",    0, NULL, &seq_applstats_ops);
-	proc_create("capi/driver",           0, NULL, &empty_proc_ops);
-}
-
-void
-kcapi_proc_exit(void)
-{
-	remove_proc_entry("capi/driver",       NULL);
-	remove_proc_entry("capi/controller",   NULL);
-	remove_proc_entry("capi/contrstats",   NULL);
-	remove_proc_entry("capi/applications", NULL);
-	remove_proc_entry("capi/applstats",    NULL);
-	remove_proc_entry("capi/controllers",  NULL);
-	remove_proc_entry("capi",              NULL);
-}
diff --git a/drivers/isdn/hardware/Makefile b/drivers/isdn/hardware/Makefile
deleted file mode 100644
index 96f9eb2e46ba..000000000000
--- a/drivers/isdn/hardware/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-# Makefile for the CAPI hardware drivers
-
-# Object files in subdirectories
-
-obj-$(CONFIG_MISDN)		+= mISDN/
diff --git a/drivers/isdn/hardware/mISDN/Kconfig b/drivers/isdn/hardware/mISDN/Kconfig
deleted file mode 100644
index a35bff8a93f5..000000000000
--- a/drivers/isdn/hardware/mISDN/Kconfig
+++ /dev/null
@@ -1,98 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Hardware for mISDN
-#
-comment "mISDN hardware drivers"
-
-config MISDN_HFCPCI
-	tristate "Support for HFC PCI cards"
-	depends on MISDN
-	depends on PCI
-	help
-	  Enable support for cards with Cologne Chip AG's
-	  HFC PCI chip.
-
-config MISDN_HFCMULTI
-	tristate "Support for HFC multiport cards (HFC-4S/8S/E1)"
-	depends on (PCI || CPM1) && HAS_IOPORT
-	depends on MISDN
-	help
-	  Enable support for cards with Cologne Chip AG's HFC multiport
-	  chip. There are three types of chips that are quite similar,
-	  but the interface is different:
-	   * HFC-4S (4 S/T interfaces on one chip)
-	   * HFC-8S (8 S/T interfaces on one chip)
-	   * HFC-E1 (E1 interface for 2Mbit ISDN)
-
-config MISDN_HFCMULTI_8xx
-	bool "Support for XHFC embedded board in HFC multiport driver"
-	depends on MISDN
-	depends on MISDN_HFCMULTI
-	depends on CPM1
-	default CPM1
-	help
-	  Enable support for the XHFC embedded solution from Speech Design.
-
-config MISDN_HFCUSB
-	tristate "Support for HFC-S USB based TAs"
-	depends on USB
-	help
-	  Enable support for USB ISDN TAs with Cologne Chip AG's
-	  HFC-S USB ISDN Controller
-
-config MISDN_AVMFRITZ
-	tristate "Support for AVM FRITZ!CARD PCI"
-	depends on MISDN
-	depends on PCI && HAS_IOPORT
-	select MISDN_IPAC
-	help
-	  Enable support for AVMs FRITZ!CARD PCI cards
-
-config MISDN_SPEEDFAX
-	tristate "Support for Sedlbauer Speedfax+"
-	depends on MISDN
-	depends on PCI && HAS_IOPORT
-	select MISDN_IPAC
-	select MISDN_ISAR
-	help
-	  Enable support for Sedlbauer Speedfax+.
-
-config MISDN_INFINEON
-	tristate "Support for cards with Infineon chipset"
-	depends on MISDN
-	depends on PCI && HAS_IOPORT
-	select MISDN_IPAC
-	help
-	  Enable support for cards with ISAC + HSCX, IPAC or IPAC-SX
-	  chip from Infineon (former manufacturer Siemens).
-
-config MISDN_W6692
-	tristate "Support for cards with Winbond 6692"
-	depends on MISDN
-	depends on PCI && HAS_IOPORT
-	help
-	  Enable support for Winbond 6692 PCI chip based cards.
-
-config MISDN_NETJET
-	tristate "Support for NETJet cards"
-	depends on MISDN
-	depends on PCI && HAS_IOPORT
-	depends on TTY
-	select MISDN_IPAC
-	select MISDN_HDLC
-	help
-	  Enable support for Traverse Technologies NETJet PCI cards.
-
-config MISDN_HDLC
-	tristate
-	select CRC_CCITT
-	select BITREVERSE
-
-config MISDN_IPAC
-	tristate
-	depends on MISDN
-
-config MISDN_ISAR
-	tristate
-	depends on MISDN
-
diff --git a/drivers/isdn/hardware/mISDN/Makefile b/drivers/isdn/hardware/mISDN/Makefile
deleted file mode 100644
index 3f50f8c4753f..000000000000
--- a/drivers/isdn/hardware/mISDN/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the modular ISDN hardware drivers
-#
-#
-
-obj-$(CONFIG_MISDN_HFCPCI) += hfcpci.o
-obj-$(CONFIG_MISDN_HFCMULTI) += hfcmulti.o
-obj-$(CONFIG_MISDN_HFCUSB) += hfcsusb.o
-obj-$(CONFIG_MISDN_AVMFRITZ) += avmfritz.o
-obj-$(CONFIG_MISDN_SPEEDFAX) += speedfax.o
-obj-$(CONFIG_MISDN_INFINEON) += mISDNinfineon.o
-obj-$(CONFIG_MISDN_W6692) += w6692.o
-obj-$(CONFIG_MISDN_NETJET) += netjet.o
-# chip modules
-obj-$(CONFIG_MISDN_IPAC) += mISDNipac.o
-obj-$(CONFIG_MISDN_ISAR) += mISDNisar.o
-
-obj-$(CONFIG_MISDN_HDLC) += isdnhdlc.o
diff --git a/drivers/isdn/hardware/mISDN/avmfritz.c b/drivers/isdn/hardware/mISDN/avmfritz.c
deleted file mode 100644
index 55e0b9efa194..000000000000
--- a/drivers/isdn/hardware/mISDN/avmfritz.c
+++ /dev/null
@@ -1,1164 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * avm_fritz.c    low level stuff for AVM FRITZ!CARD PCI ISDN cards
- *                Thanks to AVM, Berlin for informations
- *
- * Author       Karsten Keil <keil@isdn4linux.de>
- *
- * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
- */
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/mISDNhw.h>
-#include <linux/slab.h>
-#include <linux/unaligned.h>
-#include "ipac.h"
-
-
-#define AVMFRITZ_REV	"2.3"
-
-static int AVM_cnt;
-static int debug;
-
-enum {
-	AVM_FRITZ_PCI,
-	AVM_FRITZ_PCIV2,
-};
-
-#define HDLC_FIFO		0x0
-#define HDLC_STATUS		0x4
-#define CHIP_WINDOW		0x10
-
-#define CHIP_INDEX		0x4
-#define AVM_HDLC_1		0x00
-#define AVM_HDLC_2		0x01
-#define AVM_ISAC_FIFO		0x02
-#define AVM_ISAC_REG_LOW	0x04
-#define AVM_ISAC_REG_HIGH	0x06
-
-#define AVM_STATUS0_IRQ_ISAC	0x01
-#define AVM_STATUS0_IRQ_HDLC	0x02
-#define AVM_STATUS0_IRQ_TIMER	0x04
-#define AVM_STATUS0_IRQ_MASK	0x07
-
-#define AVM_STATUS0_RESET	0x01
-#define AVM_STATUS0_DIS_TIMER	0x02
-#define AVM_STATUS0_RES_TIMER	0x04
-#define AVM_STATUS0_ENA_IRQ	0x08
-#define AVM_STATUS0_TESTBIT	0x10
-
-#define AVM_STATUS1_INT_SEL	0x0f
-#define AVM_STATUS1_ENA_IOM	0x80
-
-#define HDLC_MODE_ITF_FLG	0x01
-#define HDLC_MODE_TRANS		0x02
-#define HDLC_MODE_CCR_7		0x04
-#define HDLC_MODE_CCR_16	0x08
-#define HDLC_FIFO_SIZE_128	0x20
-#define HDLC_MODE_TESTLOOP	0x80
-
-#define HDLC_INT_XPR		0x80
-#define HDLC_INT_XDU		0x40
-#define HDLC_INT_RPR		0x20
-#define HDLC_INT_MASK		0xE0
-
-#define HDLC_STAT_RME		0x01
-#define HDLC_STAT_RDO		0x10
-#define HDLC_STAT_CRCVFRRAB	0x0E
-#define HDLC_STAT_CRCVFR	0x06
-#define HDLC_STAT_RML_MASK_V1	0x3f00
-#define HDLC_STAT_RML_MASK_V2	0x7f00
-
-#define HDLC_CMD_XRS		0x80
-#define HDLC_CMD_XME		0x01
-#define HDLC_CMD_RRS		0x20
-#define HDLC_CMD_XML_MASK	0x3f00
-
-#define HDLC_FIFO_SIZE_V1	32
-#define HDLC_FIFO_SIZE_V2	128
-
-/* Fritz PCI v2.0 */
-
-#define AVM_HDLC_FIFO_1		0x10
-#define AVM_HDLC_FIFO_2		0x18
-
-#define AVM_HDLC_STATUS_1	0x14
-#define AVM_HDLC_STATUS_2	0x1c
-
-#define AVM_ISACX_INDEX		0x04
-#define AVM_ISACX_DATA		0x08
-
-/* data struct */
-#define LOG_SIZE		63
-
-struct hdlc_stat_reg {
-#ifdef __BIG_ENDIAN
-	u8 fill;
-	u8 mode;
-	u8 xml;
-	u8 cmd;
-#else
-	u8 cmd;
-	u8 xml;
-	u8 mode;
-	u8 fill;
-#endif
-} __attribute__((packed));
-
-struct hdlc_hw {
-	union {
-		u32 ctrl;
-		struct hdlc_stat_reg sr;
-	} ctrl;
-	u32 stat;
-};
-
-struct fritzcard {
-	struct list_head	list;
-	struct pci_dev		*pdev;
-	char			name[MISDN_MAX_IDLEN];
-	u8			type;
-	u8			ctrlreg;
-	u16			irq;
-	u32			irqcnt;
-	u32			addr;
-	spinlock_t		lock; /* hw lock */
-	struct isac_hw		isac;
-	struct hdlc_hw		hdlc[2];
-	struct bchannel		bch[2];
-	char			log[LOG_SIZE + 1];
-};
-
-static LIST_HEAD(Cards);
-static DEFINE_RWLOCK(card_lock); /* protect Cards */
-
-static void
-_set_debug(struct fritzcard *card)
-{
-	card->isac.dch.debug = debug;
-	card->bch[0].debug = debug;
-	card->bch[1].debug = debug;
-}
-
-static int
-set_debug(const char *val, const struct kernel_param *kp)
-{
-	int ret;
-	struct fritzcard *card;
-
-	ret = param_set_uint(val, kp);
-	if (!ret) {
-		read_lock(&card_lock);
-		list_for_each_entry(card, &Cards, list)
-			_set_debug(card);
-		read_unlock(&card_lock);
-	}
-	return ret;
-}
-
-MODULE_AUTHOR("Karsten Keil");
-MODULE_DESCRIPTION("mISDN driver for AVM FRITZ!CARD PCI ISDN cards");
-MODULE_LICENSE("GPL v2");
-MODULE_VERSION(AVMFRITZ_REV);
-module_param_call(debug, set_debug, param_get_uint, &debug, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(debug, "avmfritz debug mask");
-
-/* Interface functions */
-
-static u8
-ReadISAC_V1(void *p, u8 offset)
-{
-	struct fritzcard *fc = p;
-	u8 idx = (offset > 0x2f) ? AVM_ISAC_REG_HIGH : AVM_ISAC_REG_LOW;
-
-	outb(idx, fc->addr + CHIP_INDEX);
-	return inb(fc->addr + CHIP_WINDOW + (offset & 0xf));
-}
-
-static void
-WriteISAC_V1(void *p, u8 offset, u8 value)
-{
-	struct fritzcard *fc = p;
-	u8 idx = (offset > 0x2f) ? AVM_ISAC_REG_HIGH : AVM_ISAC_REG_LOW;
-
-	outb(idx, fc->addr + CHIP_INDEX);
-	outb(value, fc->addr + CHIP_WINDOW + (offset & 0xf));
-}
-
-static void
-ReadFiFoISAC_V1(void *p, u8 off, u8 *data, int size)
-{
-	struct fritzcard *fc = p;
-
-	outb(AVM_ISAC_FIFO, fc->addr + CHIP_INDEX);
-	insb(fc->addr + CHIP_WINDOW, data, size);
-}
-
-static void
-WriteFiFoISAC_V1(void *p, u8 off, u8 *data, int size)
-{
-	struct fritzcard *fc = p;
-
-	outb(AVM_ISAC_FIFO, fc->addr + CHIP_INDEX);
-	outsb(fc->addr + CHIP_WINDOW, data, size);
-}
-
-static u8
-ReadISAC_V2(void *p, u8 offset)
-{
-	struct fritzcard *fc = p;
-
-	outl(offset, fc->addr + AVM_ISACX_INDEX);
-	return 0xff & inl(fc->addr + AVM_ISACX_DATA);
-}
-
-static void
-WriteISAC_V2(void *p, u8 offset, u8 value)
-{
-	struct fritzcard *fc = p;
-
-	outl(offset, fc->addr + AVM_ISACX_INDEX);
-	outl(value, fc->addr + AVM_ISACX_DATA);
-}
-
-static void
-ReadFiFoISAC_V2(void *p, u8 off, u8 *data, int size)
-{
-	struct fritzcard *fc = p;
-	int i;
-
-	outl(off, fc->addr + AVM_ISACX_INDEX);
-	for (i = 0; i < size; i++)
-		data[i] = 0xff & inl(fc->addr + AVM_ISACX_DATA);
-}
-
-static void
-WriteFiFoISAC_V2(void *p, u8 off, u8 *data, int size)
-{
-	struct fritzcard *fc = p;
-	int i;
-
-	outl(off, fc->addr + AVM_ISACX_INDEX);
-	for (i = 0; i < size; i++)
-		outl(data[i], fc->addr + AVM_ISACX_DATA);
-}
-
-static struct bchannel *
-Sel_BCS(struct fritzcard *fc, u32 channel)
-{
-	if (test_bit(FLG_ACTIVE, &fc->bch[0].Flags) &&
-	    (fc->bch[0].nr & channel))
-		return &fc->bch[0];
-	else if (test_bit(FLG_ACTIVE, &fc->bch[1].Flags) &&
-		 (fc->bch[1].nr & channel))
-		return &fc->bch[1];
-	else
-		return NULL;
-}
-
-static inline void
-__write_ctrl_pci(struct fritzcard *fc, struct hdlc_hw *hdlc, u32 channel) {
-	u32 idx = channel == 2 ? AVM_HDLC_2 : AVM_HDLC_1;
-
-	outl(idx, fc->addr + CHIP_INDEX);
-	outl(hdlc->ctrl.ctrl, fc->addr + CHIP_WINDOW + HDLC_STATUS);
-}
-
-static inline void
-__write_ctrl_pciv2(struct fritzcard *fc, struct hdlc_hw *hdlc, u32 channel) {
-	outl(hdlc->ctrl.ctrl, fc->addr + (channel == 2 ? AVM_HDLC_STATUS_2 :
-					  AVM_HDLC_STATUS_1));
-}
-
-static void
-write_ctrl(struct bchannel *bch, int which) {
-	struct fritzcard *fc = bch->hw;
-	struct hdlc_hw *hdlc;
-
-	hdlc = &fc->hdlc[(bch->nr - 1) & 1];
-	pr_debug("%s: hdlc %c wr%x ctrl %x\n", fc->name, '@' + bch->nr,
-		 which, hdlc->ctrl.ctrl);
-	switch (fc->type) {
-	case AVM_FRITZ_PCIV2:
-		__write_ctrl_pciv2(fc, hdlc, bch->nr);
-		break;
-	case AVM_FRITZ_PCI:
-		__write_ctrl_pci(fc, hdlc, bch->nr);
-		break;
-	}
-}
-
-
-static inline u32
-__read_status_pci(u_long addr, u32 channel)
-{
-	outl(channel == 2 ? AVM_HDLC_2 : AVM_HDLC_1, addr + CHIP_INDEX);
-	return inl(addr + CHIP_WINDOW + HDLC_STATUS);
-}
-
-static inline u32
-__read_status_pciv2(u_long addr, u32 channel)
-{
-	return inl(addr + (channel == 2 ? AVM_HDLC_STATUS_2 :
-			   AVM_HDLC_STATUS_1));
-}
-
-
-static u32
-read_status(struct fritzcard *fc, u32 channel)
-{
-	switch (fc->type) {
-	case AVM_FRITZ_PCIV2:
-		return __read_status_pciv2(fc->addr, channel);
-	case AVM_FRITZ_PCI:
-		return __read_status_pci(fc->addr, channel);
-	}
-	/* dummy */
-	return 0;
-}
-
-static void
-enable_hwirq(struct fritzcard *fc)
-{
-	fc->ctrlreg |= AVM_STATUS0_ENA_IRQ;
-	outb(fc->ctrlreg, fc->addr + 2);
-}
-
-static void
-disable_hwirq(struct fritzcard *fc)
-{
-	fc->ctrlreg &= ~AVM_STATUS0_ENA_IRQ;
-	outb(fc->ctrlreg, fc->addr + 2);
-}
-
-static int
-modehdlc(struct bchannel *bch, int protocol)
-{
-	struct fritzcard *fc = bch->hw;
-	struct hdlc_hw *hdlc;
-	u8 mode;
-
-	hdlc = &fc->hdlc[(bch->nr - 1) & 1];
-	pr_debug("%s: hdlc %c protocol %x-->%x ch %d\n", fc->name,
-		 '@' + bch->nr, bch->state, protocol, bch->nr);
-	hdlc->ctrl.ctrl = 0;
-	mode = (fc->type == AVM_FRITZ_PCIV2) ? HDLC_FIFO_SIZE_128 : 0;
-
-	switch (protocol) {
-	case -1: /* used for init */
-		bch->state = -1;
-		fallthrough;
-	case ISDN_P_NONE:
-		if (bch->state == ISDN_P_NONE)
-			break;
-		hdlc->ctrl.sr.cmd  = HDLC_CMD_XRS | HDLC_CMD_RRS;
-		hdlc->ctrl.sr.mode = mode | HDLC_MODE_TRANS;
-		write_ctrl(bch, 5);
-		bch->state = ISDN_P_NONE;
-		test_and_clear_bit(FLG_HDLC, &bch->Flags);
-		test_and_clear_bit(FLG_TRANSPARENT, &bch->Flags);
-		break;
-	case ISDN_P_B_RAW:
-		bch->state = protocol;
-		hdlc->ctrl.sr.cmd  = HDLC_CMD_XRS | HDLC_CMD_RRS;
-		hdlc->ctrl.sr.mode = mode | HDLC_MODE_TRANS;
-		write_ctrl(bch, 5);
-		hdlc->ctrl.sr.cmd = HDLC_CMD_XRS;
-		write_ctrl(bch, 1);
-		hdlc->ctrl.sr.cmd = 0;
-		test_and_set_bit(FLG_TRANSPARENT, &bch->Flags);
-		break;
-	case ISDN_P_B_HDLC:
-		bch->state = protocol;
-		hdlc->ctrl.sr.cmd  = HDLC_CMD_XRS | HDLC_CMD_RRS;
-		hdlc->ctrl.sr.mode = mode | HDLC_MODE_ITF_FLG;
-		write_ctrl(bch, 5);
-		hdlc->ctrl.sr.cmd = HDLC_CMD_XRS;
-		write_ctrl(bch, 1);
-		hdlc->ctrl.sr.cmd = 0;
-		test_and_set_bit(FLG_HDLC, &bch->Flags);
-		break;
-	default:
-		pr_info("%s: protocol not known %x\n", fc->name, protocol);
-		return -ENOPROTOOPT;
-	}
-	return 0;
-}
-
-static void
-hdlc_empty_fifo(struct bchannel *bch, int count)
-{
-	u32 *ptr;
-	u8 *p;
-	u32  val, addr;
-	int cnt;
-	struct fritzcard *fc = bch->hw;
-
-	pr_debug("%s: %s %d\n", fc->name, __func__, count);
-	if (test_bit(FLG_RX_OFF, &bch->Flags)) {
-		p = NULL;
-		bch->dropcnt += count;
-	} else {
-		cnt = bchannel_get_rxbuf(bch, count);
-		if (cnt < 0) {
-			pr_warn("%s.B%d: No bufferspace for %d bytes\n",
-				fc->name, bch->nr, count);
-			return;
-		}
-		p = skb_put(bch->rx_skb, count);
-	}
-	ptr = (u32 *)p;
-	if (fc->type == AVM_FRITZ_PCIV2)
-		addr = fc->addr + (bch->nr == 2 ?
-				   AVM_HDLC_FIFO_2 : AVM_HDLC_FIFO_1);
-	else {
-		addr = fc->addr + CHIP_WINDOW;
-		outl(bch->nr == 2 ? AVM_HDLC_2 : AVM_HDLC_1, fc->addr);
-	}
-	cnt = 0;
-	while (cnt < count) {
-		val = le32_to_cpu(inl(addr));
-		if (p) {
-			put_unaligned(val, ptr);
-			ptr++;
-		}
-		cnt += 4;
-	}
-	if (p && (debug & DEBUG_HW_BFIFO)) {
-		snprintf(fc->log, LOG_SIZE, "B%1d-recv %s %d ",
-			 bch->nr, fc->name, count);
-		print_hex_dump_bytes(fc->log, DUMP_PREFIX_OFFSET, p, count);
-	}
-}
-
-static void
-hdlc_fill_fifo(struct bchannel *bch)
-{
-	struct fritzcard *fc = bch->hw;
-	struct hdlc_hw *hdlc;
-	int count, fs, cnt = 0, idx;
-	bool fillempty = false;
-	u8 *p;
-	u32 *ptr, val, addr;
-
-	idx = (bch->nr - 1) & 1;
-	hdlc = &fc->hdlc[idx];
-	fs = (fc->type == AVM_FRITZ_PCIV2) ?
-		HDLC_FIFO_SIZE_V2 : HDLC_FIFO_SIZE_V1;
-	if (!bch->tx_skb) {
-		if (!test_bit(FLG_TX_EMPTY, &bch->Flags))
-			return;
-		count = fs;
-		p = bch->fill;
-		fillempty = true;
-	} else {
-		count = bch->tx_skb->len - bch->tx_idx;
-		if (count <= 0)
-			return;
-		p = bch->tx_skb->data + bch->tx_idx;
-	}
-	hdlc->ctrl.sr.cmd &= ~HDLC_CMD_XME;
-	if (count > fs) {
-		count = fs;
-	} else {
-		if (test_bit(FLG_HDLC, &bch->Flags))
-			hdlc->ctrl.sr.cmd |= HDLC_CMD_XME;
-	}
-	ptr = (u32 *)p;
-	if (!fillempty) {
-		pr_debug("%s.B%d: %d/%d/%d", fc->name, bch->nr, count,
-			 bch->tx_idx, bch->tx_skb->len);
-		bch->tx_idx += count;
-	} else {
-		pr_debug("%s.B%d: fillempty %d\n", fc->name, bch->nr, count);
-	}
-	hdlc->ctrl.sr.xml = ((count == fs) ? 0 : count);
-	if (fc->type == AVM_FRITZ_PCIV2) {
-		__write_ctrl_pciv2(fc, hdlc, bch->nr);
-		addr = fc->addr + (bch->nr == 2 ?
-				   AVM_HDLC_FIFO_2 : AVM_HDLC_FIFO_1);
-	} else {
-		__write_ctrl_pci(fc, hdlc, bch->nr);
-		addr = fc->addr + CHIP_WINDOW;
-	}
-	if (fillempty) {
-		while (cnt < count) {
-			/* all bytes the same - no worry about endian */
-			outl(*ptr, addr);
-			cnt += 4;
-		}
-	} else {
-		while (cnt < count) {
-			val = get_unaligned(ptr);
-			outl(cpu_to_le32(val), addr);
-			ptr++;
-			cnt += 4;
-		}
-	}
-	if ((debug & DEBUG_HW_BFIFO) && !fillempty) {
-		snprintf(fc->log, LOG_SIZE, "B%1d-send %s %d ",
-			 bch->nr, fc->name, count);
-		print_hex_dump_bytes(fc->log, DUMP_PREFIX_OFFSET, p, count);
-	}
-}
-
-static void
-HDLC_irq_xpr(struct bchannel *bch)
-{
-	if (bch->tx_skb && bch->tx_idx < bch->tx_skb->len) {
-		hdlc_fill_fifo(bch);
-	} else {
-		dev_kfree_skb(bch->tx_skb);
-		if (get_next_bframe(bch)) {
-			hdlc_fill_fifo(bch);
-			test_and_clear_bit(FLG_TX_EMPTY, &bch->Flags);
-		} else if (test_bit(FLG_TX_EMPTY, &bch->Flags)) {
-			hdlc_fill_fifo(bch);
-		}
-	}
-}
-
-static void
-HDLC_irq(struct bchannel *bch, u32 stat)
-{
-	struct fritzcard *fc = bch->hw;
-	int		len, fs;
-	u32		rmlMask;
-	struct hdlc_hw	*hdlc;
-
-	hdlc = &fc->hdlc[(bch->nr - 1) & 1];
-	pr_debug("%s: ch%d stat %#x\n", fc->name, bch->nr, stat);
-	if (fc->type == AVM_FRITZ_PCIV2) {
-		rmlMask = HDLC_STAT_RML_MASK_V2;
-		fs = HDLC_FIFO_SIZE_V2;
-	} else {
-		rmlMask = HDLC_STAT_RML_MASK_V1;
-		fs = HDLC_FIFO_SIZE_V1;
-	}
-	if (stat & HDLC_INT_RPR) {
-		if (stat & HDLC_STAT_RDO) {
-			pr_warn("%s: ch%d stat %x RDO\n",
-				fc->name, bch->nr, stat);
-			hdlc->ctrl.sr.xml = 0;
-			hdlc->ctrl.sr.cmd |= HDLC_CMD_RRS;
-			write_ctrl(bch, 1);
-			hdlc->ctrl.sr.cmd &= ~HDLC_CMD_RRS;
-			write_ctrl(bch, 1);
-			if (bch->rx_skb)
-				skb_trim(bch->rx_skb, 0);
-		} else {
-			len = (stat & rmlMask) >> 8;
-			if (!len)
-				len = fs;
-			hdlc_empty_fifo(bch, len);
-			if (!bch->rx_skb)
-				goto handle_tx;
-			if (test_bit(FLG_TRANSPARENT, &bch->Flags)) {
-				recv_Bchannel(bch, 0, false);
-			} else if (stat & HDLC_STAT_RME) {
-				if ((stat & HDLC_STAT_CRCVFRRAB) ==
-				    HDLC_STAT_CRCVFR) {
-					recv_Bchannel(bch, 0, false);
-				} else {
-					pr_warn("%s: got invalid frame\n",
-						fc->name);
-					skb_trim(bch->rx_skb, 0);
-				}
-			}
-		}
-	}
-handle_tx:
-	if (stat & HDLC_INT_XDU) {
-		/* Here we lost an TX interrupt, so
-		 * restart transmitting the whole frame on HDLC
-		 * in transparent mode we send the next data
-		 */
-		pr_warn("%s: ch%d stat %x XDU %s\n", fc->name, bch->nr,
-			stat, bch->tx_skb ? "tx_skb" : "no tx_skb");
-		if (bch->tx_skb && bch->tx_skb->len) {
-			if (!test_bit(FLG_TRANSPARENT, &bch->Flags))
-				bch->tx_idx = 0;
-		} else if (test_bit(FLG_FILLEMPTY, &bch->Flags)) {
-			test_and_set_bit(FLG_TX_EMPTY, &bch->Flags);
-		}
-		hdlc->ctrl.sr.xml = 0;
-		hdlc->ctrl.sr.cmd |= HDLC_CMD_XRS;
-		write_ctrl(bch, 1);
-		hdlc->ctrl.sr.cmd &= ~HDLC_CMD_XRS;
-		HDLC_irq_xpr(bch);
-		return;
-	} else if (stat & HDLC_INT_XPR)
-		HDLC_irq_xpr(bch);
-}
-
-static inline void
-HDLC_irq_main(struct fritzcard *fc)
-{
-	u32 stat;
-	struct bchannel *bch;
-
-	stat = read_status(fc, 1);
-	if (stat & HDLC_INT_MASK) {
-		bch = Sel_BCS(fc, 1);
-		if (bch)
-			HDLC_irq(bch, stat);
-		else
-			pr_debug("%s: spurious ch1 IRQ\n", fc->name);
-	}
-	stat = read_status(fc, 2);
-	if (stat & HDLC_INT_MASK) {
-		bch = Sel_BCS(fc, 2);
-		if (bch)
-			HDLC_irq(bch, stat);
-		else
-			pr_debug("%s: spurious ch2 IRQ\n", fc->name);
-	}
-}
-
-static irqreturn_t
-avm_fritz_interrupt(int intno, void *dev_id)
-{
-	struct fritzcard *fc = dev_id;
-	u8 val;
-	u8 sval;
-
-	spin_lock(&fc->lock);
-	sval = inb(fc->addr + 2);
-	pr_debug("%s: irq stat0 %x\n", fc->name, sval);
-	if ((sval & AVM_STATUS0_IRQ_MASK) == AVM_STATUS0_IRQ_MASK) {
-		/* shared  IRQ from other HW */
-		spin_unlock(&fc->lock);
-		return IRQ_NONE;
-	}
-	fc->irqcnt++;
-
-	if (!(sval & AVM_STATUS0_IRQ_ISAC)) {
-		val = ReadISAC_V1(fc, ISAC_ISTA);
-		mISDNisac_irq(&fc->isac, val);
-	}
-	if (!(sval & AVM_STATUS0_IRQ_HDLC))
-		HDLC_irq_main(fc);
-	spin_unlock(&fc->lock);
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t
-avm_fritzv2_interrupt(int intno, void *dev_id)
-{
-	struct fritzcard *fc = dev_id;
-	u8 val;
-	u8 sval;
-
-	spin_lock(&fc->lock);
-	sval = inb(fc->addr + 2);
-	pr_debug("%s: irq stat0 %x\n", fc->name, sval);
-	if (!(sval & AVM_STATUS0_IRQ_MASK)) {
-		/* shared  IRQ from other HW */
-		spin_unlock(&fc->lock);
-		return IRQ_NONE;
-	}
-	fc->irqcnt++;
-
-	if (sval & AVM_STATUS0_IRQ_HDLC)
-		HDLC_irq_main(fc);
-	if (sval & AVM_STATUS0_IRQ_ISAC) {
-		val = ReadISAC_V2(fc, ISACX_ISTA);
-		mISDNisac_irq(&fc->isac, val);
-	}
-	if (sval & AVM_STATUS0_IRQ_TIMER) {
-		pr_debug("%s: timer irq\n", fc->name);
-		outb(fc->ctrlreg | AVM_STATUS0_RES_TIMER, fc->addr + 2);
-		udelay(1);
-		outb(fc->ctrlreg, fc->addr + 2);
-	}
-	spin_unlock(&fc->lock);
-	return IRQ_HANDLED;
-}
-
-static int
-avm_l2l1B(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct bchannel *bch = container_of(ch, struct bchannel, ch);
-	struct fritzcard *fc = bch->hw;
-	int ret = -EINVAL;
-	struct mISDNhead *hh = mISDN_HEAD_P(skb);
-	unsigned long flags;
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		spin_lock_irqsave(&fc->lock, flags);
-		ret = bchannel_senddata(bch, skb);
-		if (ret > 0) { /* direct TX */
-			hdlc_fill_fifo(bch);
-			ret = 0;
-		}
-		spin_unlock_irqrestore(&fc->lock, flags);
-		return ret;
-	case PH_ACTIVATE_REQ:
-		spin_lock_irqsave(&fc->lock, flags);
-		if (!test_and_set_bit(FLG_ACTIVE, &bch->Flags))
-			ret = modehdlc(bch, ch->protocol);
-		else
-			ret = 0;
-		spin_unlock_irqrestore(&fc->lock, flags);
-		if (!ret)
-			_queue_data(ch, PH_ACTIVATE_IND, MISDN_ID_ANY, 0,
-				    NULL, GFP_KERNEL);
-		break;
-	case PH_DEACTIVATE_REQ:
-		spin_lock_irqsave(&fc->lock, flags);
-		mISDN_clear_bchannel(bch);
-		modehdlc(bch, ISDN_P_NONE);
-		spin_unlock_irqrestore(&fc->lock, flags);
-		_queue_data(ch, PH_DEACTIVATE_IND, MISDN_ID_ANY, 0,
-			    NULL, GFP_KERNEL);
-		ret = 0;
-		break;
-	}
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-static void
-inithdlc(struct fritzcard *fc)
-{
-	modehdlc(&fc->bch[0], -1);
-	modehdlc(&fc->bch[1], -1);
-}
-
-static void
-clear_pending_hdlc_ints(struct fritzcard *fc)
-{
-	u32 val;
-
-	val = read_status(fc, 1);
-	pr_debug("%s: HDLC 1 STA %x\n", fc->name, val);
-	val = read_status(fc, 2);
-	pr_debug("%s: HDLC 2 STA %x\n", fc->name, val);
-}
-
-static void
-reset_avm(struct fritzcard *fc)
-{
-	switch (fc->type) {
-	case AVM_FRITZ_PCI:
-		fc->ctrlreg = AVM_STATUS0_RESET | AVM_STATUS0_DIS_TIMER;
-		break;
-	case AVM_FRITZ_PCIV2:
-		fc->ctrlreg = AVM_STATUS0_RESET;
-		break;
-	}
-	if (debug & DEBUG_HW)
-		pr_notice("%s: reset\n", fc->name);
-	disable_hwirq(fc);
-	mdelay(5);
-	switch (fc->type) {
-	case AVM_FRITZ_PCI:
-		fc->ctrlreg = AVM_STATUS0_DIS_TIMER | AVM_STATUS0_RES_TIMER;
-		disable_hwirq(fc);
-		outb(AVM_STATUS1_ENA_IOM, fc->addr + 3);
-		break;
-	case AVM_FRITZ_PCIV2:
-		fc->ctrlreg = 0;
-		disable_hwirq(fc);
-		break;
-	}
-	mdelay(1);
-	if (debug & DEBUG_HW)
-		pr_notice("%s: S0/S1 %x/%x\n", fc->name,
-			  inb(fc->addr + 2), inb(fc->addr + 3));
-}
-
-static int
-init_card(struct fritzcard *fc)
-{
-	int		ret, cnt = 3;
-	u_long		flags;
-
-	reset_avm(fc); /* disable IRQ */
-	if (fc->type == AVM_FRITZ_PCIV2)
-		ret = request_irq(fc->irq, avm_fritzv2_interrupt,
-				  IRQF_SHARED, fc->name, fc);
-	else
-		ret = request_irq(fc->irq, avm_fritz_interrupt,
-				  IRQF_SHARED, fc->name, fc);
-	if (ret) {
-		pr_info("%s: couldn't get interrupt %d\n",
-			fc->name, fc->irq);
-		return ret;
-	}
-	while (cnt--) {
-		spin_lock_irqsave(&fc->lock, flags);
-		ret = fc->isac.init(&fc->isac);
-		if (ret) {
-			spin_unlock_irqrestore(&fc->lock, flags);
-			pr_info("%s: ISAC init failed with %d\n",
-				fc->name, ret);
-			break;
-		}
-		clear_pending_hdlc_ints(fc);
-		inithdlc(fc);
-		enable_hwirq(fc);
-		/* RESET Receiver and Transmitter */
-		if (fc->type == AVM_FRITZ_PCIV2) {
-			WriteISAC_V2(fc, ISACX_MASK, 0);
-			WriteISAC_V2(fc, ISACX_CMDRD, 0x41);
-		} else {
-			WriteISAC_V1(fc, ISAC_MASK, 0);
-			WriteISAC_V1(fc, ISAC_CMDR, 0x41);
-		}
-		spin_unlock_irqrestore(&fc->lock, flags);
-		/* Timeout 10ms */
-		msleep_interruptible(10);
-		if (debug & DEBUG_HW)
-			pr_notice("%s: IRQ %d count %d\n", fc->name,
-				  fc->irq, fc->irqcnt);
-		if (!fc->irqcnt) {
-			pr_info("%s: IRQ(%d) getting no IRQs during init %d\n",
-				fc->name, fc->irq, 3 - cnt);
-			reset_avm(fc);
-		} else
-			return 0;
-	}
-	free_irq(fc->irq, fc);
-	return -EIO;
-}
-
-static int
-channel_bctrl(struct bchannel *bch, struct mISDN_ctrl_req *cq)
-{
-	return mISDN_ctrl_bchannel(bch, cq);
-}
-
-static int
-avm_bctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
-{
-	struct bchannel *bch = container_of(ch, struct bchannel, ch);
-	struct fritzcard *fc = bch->hw;
-	int ret = -EINVAL;
-	u_long flags;
-
-	pr_debug("%s: %s cmd:%x %p\n", fc->name, __func__, cmd, arg);
-	switch (cmd) {
-	case CLOSE_CHANNEL:
-		test_and_clear_bit(FLG_OPEN, &bch->Flags);
-		cancel_work_sync(&bch->workq);
-		spin_lock_irqsave(&fc->lock, flags);
-		mISDN_clear_bchannel(bch);
-		modehdlc(bch, ISDN_P_NONE);
-		spin_unlock_irqrestore(&fc->lock, flags);
-		ch->protocol = ISDN_P_NONE;
-		ch->peer = NULL;
-		module_put(THIS_MODULE);
-		ret = 0;
-		break;
-	case CONTROL_CHANNEL:
-		ret = channel_bctrl(bch, arg);
-		break;
-	default:
-		pr_info("%s: %s unknown prim(%x)\n", fc->name, __func__, cmd);
-	}
-	return ret;
-}
-
-static int
-channel_ctrl(struct fritzcard  *fc, struct mISDN_ctrl_req *cq)
-{
-	int	ret = 0;
-
-	switch (cq->op) {
-	case MISDN_CTRL_GETOP:
-		cq->op = MISDN_CTRL_LOOP | MISDN_CTRL_L1_TIMER3;
-		break;
-	case MISDN_CTRL_LOOP:
-		/* cq->channel: 0 disable, 1 B1 loop 2 B2 loop, 3 both */
-		if (cq->channel < 0 || cq->channel > 3) {
-			ret = -EINVAL;
-			break;
-		}
-		ret = fc->isac.ctrl(&fc->isac, HW_TESTLOOP, cq->channel);
-		break;
-	case MISDN_CTRL_L1_TIMER3:
-		ret = fc->isac.ctrl(&fc->isac, HW_TIMER3_VALUE, cq->p1);
-		break;
-	default:
-		pr_info("%s: %s unknown Op %x\n", fc->name, __func__, cq->op);
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static int
-open_bchannel(struct fritzcard *fc, struct channel_req *rq)
-{
-	struct bchannel		*bch;
-
-	if (rq->adr.channel == 0 || rq->adr.channel > 2)
-		return -EINVAL;
-	if (rq->protocol == ISDN_P_NONE)
-		return -EINVAL;
-	bch = &fc->bch[rq->adr.channel - 1];
-	if (test_and_set_bit(FLG_OPEN, &bch->Flags))
-		return -EBUSY; /* b-channel can be only open once */
-	bch->ch.protocol = rq->protocol;
-	rq->ch = &bch->ch;
-	return 0;
-}
-
-/*
- * device control function
- */
-static int
-avm_dctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
-{
-	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
-	struct fritzcard	*fc = dch->hw;
-	struct channel_req	*rq;
-	int			err = 0;
-
-	pr_debug("%s: %s cmd:%x %p\n", fc->name, __func__, cmd, arg);
-	switch (cmd) {
-	case OPEN_CHANNEL:
-		rq = arg;
-		if (rq->protocol == ISDN_P_TE_S0)
-			err = fc->isac.open(&fc->isac, rq);
-		else
-			err = open_bchannel(fc, rq);
-		if (err)
-			break;
-		if (!try_module_get(THIS_MODULE))
-			pr_info("%s: cannot get module\n", fc->name);
-		break;
-	case CLOSE_CHANNEL:
-		pr_debug("%s: dev(%d) close from %p\n", fc->name, dch->dev.id,
-			 __builtin_return_address(0));
-		module_put(THIS_MODULE);
-		break;
-	case CONTROL_CHANNEL:
-		err = channel_ctrl(fc, arg);
-		break;
-	default:
-		pr_debug("%s: %s unknown command %x\n",
-			 fc->name, __func__, cmd);
-		return -EINVAL;
-	}
-	return err;
-}
-
-static int
-setup_fritz(struct fritzcard *fc)
-{
-	u32 val, ver;
-
-	if (!request_region(fc->addr, 32, fc->name)) {
-		pr_info("%s: AVM config port %x-%x already in use\n",
-			fc->name, fc->addr, fc->addr + 31);
-		return -EIO;
-	}
-	switch (fc->type) {
-	case AVM_FRITZ_PCI:
-		val = inl(fc->addr);
-		outl(AVM_HDLC_1, fc->addr + CHIP_INDEX);
-		ver = inl(fc->addr + CHIP_WINDOW + HDLC_STATUS) >> 24;
-		if (debug & DEBUG_HW) {
-			pr_notice("%s: PCI stat %#x\n", fc->name, val);
-			pr_notice("%s: PCI Class %X Rev %d\n", fc->name,
-				  val & 0xff, (val >> 8) & 0xff);
-			pr_notice("%s: HDLC version %x\n", fc->name, ver & 0xf);
-		}
-		ASSIGN_FUNC(V1, ISAC, fc->isac);
-		fc->isac.type = IPAC_TYPE_ISAC;
-		break;
-	case AVM_FRITZ_PCIV2:
-		val = inl(fc->addr);
-		ver = inl(fc->addr + AVM_HDLC_STATUS_1) >> 24;
-		if (debug & DEBUG_HW) {
-			pr_notice("%s: PCI V2 stat %#x\n", fc->name, val);
-			pr_notice("%s: PCI V2 Class %X Rev %d\n", fc->name,
-				  val & 0xff, (val >> 8) & 0xff);
-			pr_notice("%s: HDLC version %x\n", fc->name, ver & 0xf);
-		}
-		ASSIGN_FUNC(V2, ISAC, fc->isac);
-		fc->isac.type = IPAC_TYPE_ISACX;
-		break;
-	default:
-		release_region(fc->addr, 32);
-		pr_info("%s: AVM unknown type %d\n", fc->name, fc->type);
-		return -ENODEV;
-	}
-	pr_notice("%s: %s config irq:%d base:0x%X\n", fc->name,
-		  (fc->type == AVM_FRITZ_PCI) ? "AVM Fritz!CARD PCI" :
-		  "AVM Fritz!CARD PCIv2", fc->irq, fc->addr);
-	return 0;
-}
-
-static void
-release_card(struct fritzcard *card)
-{
-	u_long flags;
-
-	disable_hwirq(card);
-	spin_lock_irqsave(&card->lock, flags);
-	modehdlc(&card->bch[0], ISDN_P_NONE);
-	modehdlc(&card->bch[1], ISDN_P_NONE);
-	spin_unlock_irqrestore(&card->lock, flags);
-	card->isac.release(&card->isac);
-	free_irq(card->irq, card);
-	mISDN_freebchannel(&card->bch[1]);
-	mISDN_freebchannel(&card->bch[0]);
-	mISDN_unregister_device(&card->isac.dch.dev);
-	release_region(card->addr, 32);
-	pci_disable_device(card->pdev);
-	pci_set_drvdata(card->pdev, NULL);
-	write_lock_irqsave(&card_lock, flags);
-	list_del(&card->list);
-	write_unlock_irqrestore(&card_lock, flags);
-	kfree(card);
-	AVM_cnt--;
-}
-
-static int
-setup_instance(struct fritzcard *card)
-{
-	int i, err;
-	unsigned short minsize;
-	u_long flags;
-
-	snprintf(card->name, MISDN_MAX_IDLEN - 1, "AVM.%d", AVM_cnt + 1);
-	write_lock_irqsave(&card_lock, flags);
-	list_add_tail(&card->list, &Cards);
-	write_unlock_irqrestore(&card_lock, flags);
-
-	_set_debug(card);
-	card->isac.name = card->name;
-	spin_lock_init(&card->lock);
-	card->isac.hwlock = &card->lock;
-	mISDNisac_init(&card->isac, card);
-
-	card->isac.dch.dev.Bprotocols = (1 << (ISDN_P_B_RAW & ISDN_P_B_MASK)) |
-		(1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK));
-	card->isac.dch.dev.D.ctrl = avm_dctrl;
-	for (i = 0; i < 2; i++) {
-		card->bch[i].nr = i + 1;
-		set_channelmap(i + 1, card->isac.dch.dev.channelmap);
-		if (AVM_FRITZ_PCIV2 == card->type)
-			minsize = HDLC_FIFO_SIZE_V2;
-		else
-			minsize = HDLC_FIFO_SIZE_V1;
-		mISDN_initbchannel(&card->bch[i], MAX_DATA_MEM, minsize);
-		card->bch[i].hw = card;
-		card->bch[i].ch.send = avm_l2l1B;
-		card->bch[i].ch.ctrl = avm_bctrl;
-		card->bch[i].ch.nr = i + 1;
-		list_add(&card->bch[i].ch.list, &card->isac.dch.dev.bchannels);
-	}
-	err = setup_fritz(card);
-	if (err)
-		goto error;
-	err = mISDN_register_device(&card->isac.dch.dev, &card->pdev->dev,
-				    card->name);
-	if (err)
-		goto error_reg;
-	err = init_card(card);
-	if (!err)  {
-		AVM_cnt++;
-		pr_notice("AVM %d cards installed DEBUG\n", AVM_cnt);
-		return 0;
-	}
-	mISDN_unregister_device(&card->isac.dch.dev);
-error_reg:
-	release_region(card->addr, 32);
-error:
-	card->isac.release(&card->isac);
-	mISDN_freebchannel(&card->bch[1]);
-	mISDN_freebchannel(&card->bch[0]);
-	write_lock_irqsave(&card_lock, flags);
-	list_del(&card->list);
-	write_unlock_irqrestore(&card_lock, flags);
-	kfree(card);
-	return err;
-}
-
-static int
-fritzpci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-	int err = -ENOMEM;
-	struct fritzcard *card;
-
-	card = kzalloc_obj(struct fritzcard);
-	if (!card) {
-		pr_info("No kmem for fritzcard\n");
-		return err;
-	}
-	if (pdev->device == PCI_DEVICE_ID_AVM_A1_V2)
-		card->type = AVM_FRITZ_PCIV2;
-	else
-		card->type = AVM_FRITZ_PCI;
-	card->pdev = pdev;
-	err = pci_enable_device(pdev);
-	if (err) {
-		kfree(card);
-		return err;
-	}
-
-	pr_notice("mISDN: found adapter %s at %s\n",
-		  (char *) ent->driver_data, pci_name(pdev));
-
-	card->addr = pci_resource_start(pdev, 1);
-	card->irq = pdev->irq;
-	pci_set_drvdata(pdev, card);
-	err = setup_instance(card);
-	if (err)
-		pci_set_drvdata(pdev, NULL);
-	return err;
-}
-
-static void
-fritz_remove_pci(struct pci_dev *pdev)
-{
-	struct fritzcard *card = pci_get_drvdata(pdev);
-
-	if (card)
-		release_card(card);
-	else
-		if (debug)
-			pr_info("%s: drvdata already removed\n", __func__);
-}
-
-static const struct pci_device_id fcpci_ids[] = {
-	{ PCI_VENDOR_ID_AVM, PCI_DEVICE_ID_AVM_A1, PCI_ANY_ID, PCI_ANY_ID,
-	  0, 0, (unsigned long) "Fritz!Card PCI"},
-	{ PCI_VENDOR_ID_AVM, PCI_DEVICE_ID_AVM_A1_V2, PCI_ANY_ID, PCI_ANY_ID,
-	  0, 0, (unsigned long) "Fritz!Card PCI v2" },
-	{ }
-};
-MODULE_DEVICE_TABLE(pci, fcpci_ids);
-
-static struct pci_driver fcpci_driver = {
-	.name = "fcpci",
-	.probe = fritzpci_probe,
-	.remove = fritz_remove_pci,
-	.id_table = fcpci_ids,
-};
-
-static int __init AVM_init(void)
-{
-	int err;
-
-	pr_notice("AVM Fritz PCI driver Rev. %s\n", AVMFRITZ_REV);
-	err = pci_register_driver(&fcpci_driver);
-	return err;
-}
-
-static void __exit AVM_cleanup(void)
-{
-	pci_unregister_driver(&fcpci_driver);
-}
-
-module_init(AVM_init);
-module_exit(AVM_cleanup);
diff --git a/drivers/isdn/hardware/mISDN/hfc_multi.h b/drivers/isdn/hardware/mISDN/hfc_multi.h
deleted file mode 100644
index 5acf826d913c..000000000000
--- a/drivers/isdn/hardware/mISDN/hfc_multi.h
+++ /dev/null
@@ -1,1236 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * see notice in hfc_multi.c
- */
-
-#define DEBUG_HFCMULTI_FIFO	0x00010000
-#define	DEBUG_HFCMULTI_CRC	0x00020000
-#define	DEBUG_HFCMULTI_INIT	0x00040000
-#define	DEBUG_HFCMULTI_PLXSD	0x00080000
-#define	DEBUG_HFCMULTI_MODE	0x00100000
-#define	DEBUG_HFCMULTI_MSG	0x00200000
-#define	DEBUG_HFCMULTI_STATE	0x00400000
-#define	DEBUG_HFCMULTI_FILL	0x00800000
-#define	DEBUG_HFCMULTI_SYNC	0x01000000
-#define	DEBUG_HFCMULTI_DTMF	0x02000000
-#define	DEBUG_HFCMULTI_LOCK	0x80000000
-
-#define	PCI_ENA_REGIO	0x01
-#define	PCI_ENA_MEMIO	0x02
-
-#define XHFC_IRQ	4		/* SIU_IRQ2 */
-#define XHFC_MEMBASE	0xFE000000
-#define XHFC_MEMSIZE    0x00001000
-#define XHFC_OFFSET	0x00001000
-#define PA_XHFC_A0	0x0020		/* PA10 */
-#define PB_XHFC_IRQ1	0x00000100	/* PB23 */
-#define PB_XHFC_IRQ2	0x00000200	/* PB22 */
-#define PB_XHFC_IRQ3	0x00000400	/* PB21 */
-#define PB_XHFC_IRQ4	0x00000800	/* PB20 */
-
-/*
- * NOTE: some registers are assigned multiple times due to different modes
- *       also registers are assigned differen for HFC-4s/8s and HFC-E1
- */
-
-/*
-  #define MAX_FRAME_SIZE	2048
-*/
-
-struct hfc_chan {
-	struct dchannel	*dch;	/* link if channel is a D-channel */
-	struct bchannel	*bch;	/* link if channel is a B-channel */
-	int		port;	/* the interface port this */
-				/* channel is associated with */
-	int		nt_timer; /* -1 if off, 0 if elapsed, >0 if running */
-	int		los, ais, slip_tx, slip_rx, rdi; /* current alarms */
-	int		jitter;
-	u_long		cfg;	/* port configuration */
-	int		sync;	/* sync state (used by E1) */
-	u_int		protocol; /* current protocol */
-	int		slot_tx; /* current pcm slot */
-	int		bank_tx; /* current pcm bank */
-	int		slot_rx;
-	int		bank_rx;
-	int		conf;	/* conference setting of TX slot */
-	int		txpending;	/* if there is currently data in */
-					/* the FIFO 0=no, 1=yes, 2=splloop */
-	int		Zfill;	/* rx-fifo level on last hfcmulti_tx */
-	int		rx_off; /* set to turn fifo receive off */
-	int		coeff_count; /* curren coeff block */
-	s32		*coeff; /* memory pointer to 8 coeff blocks */
-};
-
-
-struct hfcm_hw {
-	u_char	r_ctrl;
-	u_char	r_irq_ctrl;
-	u_char	r_cirm;
-	u_char	r_ram_sz;
-	u_char	r_pcm_md0;
-	u_char	r_irqmsk_misc;
-	u_char	r_dtmf;
-	u_char	r_st_sync;
-	u_char	r_sci_msk;
-	u_char	r_tx0, r_tx1;
-	u_char	a_st_ctrl0[8];
-	u_char	r_bert_wd_md;
-	timer_t	timer;
-};
-
-
-/* for each stack these flags are used (cfg) */
-#define	HFC_CFG_NONCAP_TX	1 /* S/T TX interface has less capacity */
-#define	HFC_CFG_DIS_ECHANNEL	2 /* disable E-channel processing */
-#define	HFC_CFG_REG_ECHANNEL	3 /* register E-channel */
-#define	HFC_CFG_OPTICAL		4 /* the E1 interface is optical */
-#define	HFC_CFG_REPORT_LOS	5 /* the card should report loss of signal */
-#define	HFC_CFG_REPORT_AIS	6 /* the card should report alarm ind. sign. */
-#define	HFC_CFG_REPORT_SLIP	7 /* the card should report bit slips */
-#define	HFC_CFG_REPORT_RDI	8 /* the card should report remote alarm */
-#define	HFC_CFG_DTMF		9 /* enable DTMF-detection */
-#define	HFC_CFG_CRC4		10 /* disable CRC-4 Multiframe mode, */
-/* use double frame instead. */
-
-#define HFC_TYPE_E1		1 /* controller is HFC-E1 */
-#define HFC_TYPE_4S		4 /* controller is HFC-4S */
-#define HFC_TYPE_8S		8 /* controller is HFC-8S */
-#define HFC_TYPE_XHFC		5 /* controller is XHFC */
-
-#define	HFC_CHIP_EXRAM_128	0 /* external ram 128k */
-#define	HFC_CHIP_EXRAM_512	1 /* external ram 256k */
-#define	HFC_CHIP_REVISION0	2 /* old fifo handling */
-#define	HFC_CHIP_PCM_SLAVE	3 /* PCM is slave */
-#define	HFC_CHIP_PCM_MASTER	4 /* PCM is master */
-#define	HFC_CHIP_RX_SYNC	5 /* disable pll sync for pcm */
-#define	HFC_CHIP_DTMF		6 /* DTMF decoding is enabled */
-#define	HFC_CHIP_CONF		7 /* conference handling is enabled */
-#define	HFC_CHIP_ULAW		8 /* ULAW mode */
-#define	HFC_CHIP_CLOCK2		9 /* double clock mode */
-#define	HFC_CHIP_E1CLOCK_GET	10 /* always get clock from E1 interface */
-#define	HFC_CHIP_E1CLOCK_PUT	11 /* always put clock from E1 interface */
-#define	HFC_CHIP_WATCHDOG	12 /* whether we should send signals */
-/* to the watchdog */
-#define	HFC_CHIP_B410P		13 /* whether we have a b410p with echocan in */
-/* hw */
-#define	HFC_CHIP_PLXSD		14 /* whether we have a Speech-Design PLX */
-#define	HFC_CHIP_EMBSD          15 /* whether we have a SD Embedded board */
-
-#define HFC_IO_MODE_PCIMEM	0x00 /* normal memory mapped IO */
-#define HFC_IO_MODE_REGIO	0x01 /* PCI io access */
-#define HFC_IO_MODE_PLXSD	0x02 /* access HFC via PLX9030 */
-#define HFC_IO_MODE_EMBSD	0x03 /* direct access */
-
-/* table entry in the PCI devices list */
-struct hm_map {
-	char *vendor_name;
-	char *card_name;
-	int type;
-	int ports;
-	int clock2;
-	int leds;
-	int opticalsupport;
-	int dip_type;
-	int io_mode;
-	int irq;
-};
-
-struct hfc_multi {
-	struct list_head	list;
-	struct hm_map	*mtyp;
-	int		id;
-	int		pcm;	/* id of pcm bus */
-	int		ctype;	/* controller type */
-	int		ports;
-
-	u_int		irq;	/* irq used by card */
-	u_int		irqcnt;
-	struct pci_dev	*pci_dev;
-	int		io_mode; /* selects mode */
-#ifdef HFC_REGISTER_DEBUG
-	void		(*HFC_outb)(struct hfc_multi *hc, u_char reg,
-				    u_char val, const char *function, int line);
-	void		(*HFC_outb_nodebug)(struct hfc_multi *hc, u_char reg,
-					    u_char val, const char *function, int line);
-	u_char		(*HFC_inb)(struct hfc_multi *hc, u_char reg,
-				   const char *function, int line);
-	u_char		(*HFC_inb_nodebug)(struct hfc_multi *hc, u_char reg,
-					   const char *function, int line);
-	u_short		(*HFC_inw)(struct hfc_multi *hc, u_char reg,
-				   const char *function, int line);
-	u_short		(*HFC_inw_nodebug)(struct hfc_multi *hc, u_char reg,
-					   const char *function, int line);
-	void		(*HFC_wait)(struct hfc_multi *hc,
-				    const char *function, int line);
-	void		(*HFC_wait_nodebug)(struct hfc_multi *hc,
-					    const char *function, int line);
-#else
-	void		(*HFC_outb)(struct hfc_multi *hc, u_char reg,
-				    u_char val);
-	void		(*HFC_outb_nodebug)(struct hfc_multi *hc, u_char reg,
-					    u_char val);
-	u_char		(*HFC_inb)(struct hfc_multi *hc, u_char reg);
-	u_char		(*HFC_inb_nodebug)(struct hfc_multi *hc, u_char reg);
-	u_short		(*HFC_inw)(struct hfc_multi *hc, u_char reg);
-	u_short		(*HFC_inw_nodebug)(struct hfc_multi *hc, u_char reg);
-	void		(*HFC_wait)(struct hfc_multi *hc);
-	void		(*HFC_wait_nodebug)(struct hfc_multi *hc);
-#endif
-	void		(*read_fifo)(struct hfc_multi *hc, u_char *data,
-				     int len);
-	void		(*write_fifo)(struct hfc_multi *hc, u_char *data,
-				      int len);
-	u_long		pci_origmembase, plx_origmembase;
-	void __iomem	*pci_membase; /* PCI memory */
-	void __iomem	*plx_membase; /* PLX memory */
-	u_long		xhfc_origmembase;
-	u_char		*xhfc_membase;
-	u_long		*xhfc_memaddr, *xhfc_memdata;
-#ifdef CONFIG_MISDN_HFCMULTI_8xx
-	struct immap	*immap;
-#endif
-	u_long		pb_irqmsk;	/* Portbit mask to check the IRQ line */
-	u_long		pci_iobase; /* PCI IO */
-	struct hfcm_hw	hw;	/* remember data of write-only-registers */
-
-	u_long		chip;	/* chip configuration */
-	int		masterclk; /* port that provides master clock -1=off */
-	unsigned char	silence;/* silence byte */
-	unsigned char	silence_data[128];/* silence block */
-	int		dtmf;	/* flag that dtmf is currently in process */
-	int		Flen;	/* F-buffer size */
-	int		Zlen;	/* Z-buffer size (must be int for calculation)*/
-	int		max_trans; /* maximum transparent fifo fill */
-	int		Zmin;	/* Z-buffer offset */
-	int		DTMFbase; /* base address of DTMF coefficients */
-
-	u_int		slots;	/* number of PCM slots */
-	u_int		leds;	/* type of leds */
-	u_long		ledstate; /* save last state of leds */
-	int		opticalsupport; /* has the e1 board */
-					/* an optical Interface */
-
-	u_int		bmask[32]; /* bitmask of bchannels for port */
-	u_char		dnum[32]; /* array of used dchannel numbers for port */
-	u_char		created[32]; /* what port is created */
-	u_int		activity_tx; /* if there is data TX / RX */
-	u_int		activity_rx; /* bitmask according to port number */
-				     /* (will be cleared after */
-				     /* showing led-states) */
-	u_int		flash[8]; /* counter for flashing 8 leds on activity */
-
-	u_long		wdcount;	/* every 500 ms we need to */
-					/* send the watchdog a signal */
-	u_char		wdbyte; /* watchdog toggle byte */
-	int		e1_state; /* keep track of last state */
-	int		e1_getclock; /* if sync is retrieved from interface */
-	int		syncronized; /* keep track of existing sync interface */
-	int		e1_resync; /* resync jobs */
-
-	spinlock_t	lock;	/* the lock */
-
-	struct mISDNclock *iclock; /* isdn clock support */
-	int		iclock_on;
-
-	/*
-	 * the channel index is counted from 0, regardless where the channel
-	 * is located on the hfc-channel.
-	 * the bch->channel is equvalent to the hfc-channel
-	 */
-	struct hfc_chan	chan[32];
-	signed char	slot_owner[256]; /* owner channel of slot */
-};
-
-/* PLX GPIOs */
-#define	PLX_GPIO4_DIR_BIT	13
-#define	PLX_GPIO4_BIT		14
-#define	PLX_GPIO5_DIR_BIT	16
-#define	PLX_GPIO5_BIT		17
-#define	PLX_GPIO6_DIR_BIT	19
-#define	PLX_GPIO6_BIT		20
-#define	PLX_GPIO7_DIR_BIT	22
-#define	PLX_GPIO7_BIT		23
-#define PLX_GPIO8_DIR_BIT	25
-#define PLX_GPIO8_BIT		26
-
-#define	PLX_GPIO4		(1 << PLX_GPIO4_BIT)
-#define	PLX_GPIO5		(1 << PLX_GPIO5_BIT)
-#define	PLX_GPIO6		(1 << PLX_GPIO6_BIT)
-#define	PLX_GPIO7		(1 << PLX_GPIO7_BIT)
-#define PLX_GPIO8		(1 << PLX_GPIO8_BIT)
-
-#define	PLX_GPIO4_DIR		(1 << PLX_GPIO4_DIR_BIT)
-#define	PLX_GPIO5_DIR		(1 << PLX_GPIO5_DIR_BIT)
-#define	PLX_GPIO6_DIR		(1 << PLX_GPIO6_DIR_BIT)
-#define	PLX_GPIO7_DIR		(1 << PLX_GPIO7_DIR_BIT)
-#define PLX_GPIO8_DIR		(1 << PLX_GPIO8_DIR_BIT)
-
-#define	PLX_TERM_ON			PLX_GPIO7
-#define	PLX_SLAVE_EN_N		PLX_GPIO5
-#define	PLX_MASTER_EN		PLX_GPIO6
-#define	PLX_SYNC_O_EN		PLX_GPIO4
-#define PLX_DSP_RES_N		PLX_GPIO8
-/* GPIO4..8 Enable & Set to OUT, SLAVE_EN_N = 1 */
-#define PLX_GPIOC_INIT		(PLX_GPIO4_DIR | PLX_GPIO5_DIR | PLX_GPIO6_DIR \
-				 | PLX_GPIO7_DIR | PLX_GPIO8_DIR | PLX_SLAVE_EN_N)
-
-/* PLX Interrupt Control/STATUS */
-#define PLX_INTCSR_LINTI1_ENABLE 0x01
-#define PLX_INTCSR_LINTI1_STATUS 0x04
-#define PLX_INTCSR_LINTI2_ENABLE 0x08
-#define PLX_INTCSR_LINTI2_STATUS 0x20
-#define PLX_INTCSR_PCIINT_ENABLE 0x40
-
-/* PLX Registers */
-#define PLX_INTCSR 0x4c
-#define PLX_CNTRL  0x50
-#define PLX_GPIOC  0x54
-
-
-/*
- * REGISTER SETTING FOR HFC-4S/8S AND HFC-E1
- */
-
-/* write only registers */
-#define R_CIRM			0x00
-#define R_CTRL			0x01
-#define R_BRG_PCM_CFG		0x02
-#define R_RAM_ADDR0		0x08
-#define R_RAM_ADDR1		0x09
-#define R_RAM_ADDR2		0x0A
-#define R_FIRST_FIFO		0x0B
-#define R_RAM_SZ		0x0C
-#define R_FIFO_MD		0x0D
-#define R_INC_RES_FIFO		0x0E
-#define R_FSM_IDX		0x0F
-#define R_FIFO			0x0F
-#define R_SLOT			0x10
-#define R_IRQMSK_MISC		0x11
-#define R_SCI_MSK		0x12
-#define R_IRQ_CTRL		0x13
-#define R_PCM_MD0		0x14
-#define R_PCM_MD1		0x15
-#define R_PCM_MD2		0x15
-#define R_SH0H			0x15
-#define R_SH1H			0x15
-#define R_SH0L			0x15
-#define R_SH1L			0x15
-#define R_SL_SEL0		0x15
-#define R_SL_SEL1		0x15
-#define R_SL_SEL2		0x15
-#define R_SL_SEL3		0x15
-#define R_SL_SEL4		0x15
-#define R_SL_SEL5		0x15
-#define R_SL_SEL6		0x15
-#define R_SL_SEL7		0x15
-#define R_ST_SEL		0x16
-#define R_ST_SYNC		0x17
-#define R_CONF_EN		0x18
-#define R_TI_WD			0x1A
-#define R_BERT_WD_MD		0x1B
-#define R_DTMF			0x1C
-#define R_DTMF_N		0x1D
-#define R_E1_WR_STA		0x20
-#define R_E1_RD_STA		0x20
-#define R_LOS0			0x22
-#define R_LOS1			0x23
-#define R_RX0			0x24
-#define R_RX_FR0		0x25
-#define R_RX_FR1		0x26
-#define R_TX0			0x28
-#define R_TX1			0x29
-#define R_TX_FR0		0x2C
-
-#define R_TX_FR1		0x2D
-#define R_TX_FR2		0x2E
-#define R_JATT_ATT		0x2F /* undocumented */
-#define A_ST_RD_STATE		0x30
-#define A_ST_WR_STATE		0x30
-#define R_RX_OFF		0x30
-#define A_ST_CTRL0		0x31
-#define R_SYNC_OUT		0x31
-#define A_ST_CTRL1		0x32
-#define A_ST_CTRL2		0x33
-#define A_ST_SQ_WR		0x34
-#define R_TX_OFF		0x34
-#define R_SYNC_CTRL		0x35
-#define A_ST_CLK_DLY		0x37
-#define R_PWM0			0x38
-#define R_PWM1			0x39
-#define A_ST_B1_TX		0x3C
-#define A_ST_B2_TX		0x3D
-#define A_ST_D_TX		0x3E
-#define R_GPIO_OUT0		0x40
-#define R_GPIO_OUT1		0x41
-#define R_GPIO_EN0		0x42
-#define R_GPIO_EN1		0x43
-#define R_GPIO_SEL		0x44
-#define R_BRG_CTRL		0x45
-#define R_PWM_MD		0x46
-#define R_BRG_MD		0x47
-#define R_BRG_TIM0		0x48
-#define R_BRG_TIM1		0x49
-#define R_BRG_TIM2		0x4A
-#define R_BRG_TIM3		0x4B
-#define R_BRG_TIM_SEL01		0x4C
-#define R_BRG_TIM_SEL23		0x4D
-#define R_BRG_TIM_SEL45		0x4E
-#define R_BRG_TIM_SEL67		0x4F
-#define A_SL_CFG		0xD0
-#define A_CONF			0xD1
-#define A_CH_MSK		0xF4
-#define A_CON_HDLC		0xFA
-#define A_SUBCH_CFG		0xFB
-#define A_CHANNEL		0xFC
-#define A_FIFO_SEQ		0xFD
-#define A_IRQ_MSK		0xFF
-
-/* read only registers */
-#define A_Z12			0x04
-#define A_Z1L			0x04
-#define A_Z1			0x04
-#define A_Z1H			0x05
-#define A_Z2L			0x06
-#define A_Z2			0x06
-#define A_Z2H			0x07
-#define A_F1			0x0C
-#define A_F12			0x0C
-#define A_F2			0x0D
-#define R_IRQ_OVIEW		0x10
-#define R_IRQ_MISC		0x11
-#define R_IRQ_STATECH		0x12
-#define R_CONF_OFLOW		0x14
-#define R_RAM_USE		0x15
-#define R_CHIP_ID		0x16
-#define R_BERT_STA		0x17
-#define R_F0_CNTL		0x18
-#define R_F0_CNTH		0x19
-#define R_BERT_EC		0x1A
-#define R_BERT_ECL		0x1A
-#define R_BERT_ECH		0x1B
-#define R_STATUS		0x1C
-#define R_CHIP_RV		0x1F
-#define R_STATE			0x20
-#define R_SYNC_STA		0x24
-#define R_RX_SL0_0		0x25
-#define R_RX_SL0_1		0x26
-#define R_RX_SL0_2		0x27
-#define R_JATT_DIR		0x2b /* undocumented */
-#define R_SLIP			0x2c
-#define A_ST_RD_STA		0x30
-#define R_FAS_EC		0x30
-#define R_FAS_ECL		0x30
-#define R_FAS_ECH		0x31
-#define R_VIO_EC		0x32
-#define R_VIO_ECL		0x32
-#define R_VIO_ECH		0x33
-#define A_ST_SQ_RD		0x34
-#define R_CRC_EC		0x34
-#define R_CRC_ECL		0x34
-#define R_CRC_ECH		0x35
-#define R_E_EC			0x36
-#define R_E_ECL			0x36
-#define R_E_ECH			0x37
-#define R_SA6_SA13_EC		0x38
-#define R_SA6_SA13_ECL		0x38
-#define R_SA6_SA13_ECH		0x39
-#define R_SA6_SA23_EC		0x3A
-#define R_SA6_SA23_ECL		0x3A
-#define R_SA6_SA23_ECH		0x3B
-#define A_ST_B1_RX		0x3C
-#define A_ST_B2_RX		0x3D
-#define A_ST_D_RX		0x3E
-#define A_ST_E_RX		0x3F
-#define R_GPIO_IN0		0x40
-#define R_GPIO_IN1		0x41
-#define R_GPI_IN0		0x44
-#define R_GPI_IN1		0x45
-#define R_GPI_IN2		0x46
-#define R_GPI_IN3		0x47
-#define R_INT_DATA		0x88
-#define R_IRQ_FIFO_BL0		0xC8
-#define R_IRQ_FIFO_BL1		0xC9
-#define R_IRQ_FIFO_BL2		0xCA
-#define R_IRQ_FIFO_BL3		0xCB
-#define R_IRQ_FIFO_BL4		0xCC
-#define R_IRQ_FIFO_BL5		0xCD
-#define R_IRQ_FIFO_BL6		0xCE
-#define R_IRQ_FIFO_BL7		0xCF
-
-/* read and write registers */
-#define A_FIFO_DATA0		0x80
-#define A_FIFO_DATA1		0x80
-#define A_FIFO_DATA2		0x80
-#define A_FIFO_DATA0_NOINC	0x84
-#define A_FIFO_DATA1_NOINC	0x84
-#define A_FIFO_DATA2_NOINC	0x84
-#define R_RAM_DATA		0xC0
-
-
-/*
- * BIT SETTING FOR HFC-4S/8S AND HFC-E1
- */
-
-/* chapter 2: universal bus interface */
-/* R_CIRM */
-#define V_IRQ_SEL		0x01
-#define V_SRES			0x08
-#define V_HFCRES		0x10
-#define V_PCMRES		0x20
-#define V_STRES			0x40
-#define V_ETRES			0x40
-#define V_RLD_EPR		0x80
-/* R_CTRL */
-#define V_FIFO_LPRIO		0x02
-#define V_SLOW_RD		0x04
-#define V_EXT_RAM		0x08
-#define V_CLK_OFF		0x20
-#define V_ST_CLK		0x40
-/* R_RAM_ADDR0 */
-#define V_RAM_ADDR2		0x01
-#define V_ADDR_RES		0x40
-#define V_ADDR_INC		0x80
-/* R_RAM_SZ */
-#define V_RAM_SZ		0x01
-#define V_PWM0_16KHZ		0x10
-#define V_PWM1_16KHZ		0x20
-#define V_FZ_MD			0x80
-/* R_CHIP_ID */
-#define V_PNP_IRQ		0x01
-#define V_CHIP_ID		0x10
-
-/* chapter 3: data flow */
-/* R_FIRST_FIFO */
-#define V_FIRST_FIRO_DIR	0x01
-#define V_FIRST_FIFO_NUM	0x02
-/* R_FIFO_MD */
-#define V_FIFO_MD		0x01
-#define V_CSM_MD		0x04
-#define V_FSM_MD		0x08
-#define V_FIFO_SZ		0x10
-/* R_FIFO */
-#define V_FIFO_DIR		0x01
-#define V_FIFO_NUM		0x02
-#define V_REV			0x80
-/* R_SLOT */
-#define V_SL_DIR		0x01
-#define V_SL_NUM		0x02
-/* A_SL_CFG */
-#define V_CH_DIR		0x01
-#define V_CH_SEL		0x02
-#define V_ROUTING		0x40
-/* A_CON_HDLC */
-#define V_IFF			0x01
-#define V_HDLC_TRP		0x02
-#define V_TRP_IRQ		0x04
-#define V_DATA_FLOW		0x20
-/* A_SUBCH_CFG */
-#define V_BIT_CNT		0x01
-#define V_START_BIT		0x08
-#define V_LOOP_FIFO		0x40
-#define V_INV_DATA		0x80
-/* A_CHANNEL */
-#define V_CH_DIR0		0x01
-#define V_CH_NUM0		0x02
-/* A_FIFO_SEQ */
-#define V_NEXT_FIFO_DIR		0x01
-#define V_NEXT_FIFO_NUM		0x02
-#define V_SEQ_END		0x40
-
-/* chapter 4: FIFO handling and HDLC controller */
-/* R_INC_RES_FIFO */
-#define V_INC_F			0x01
-#define V_RES_F			0x02
-#define V_RES_LOST		0x04
-
-/* chapter 5: S/T interface */
-/* R_SCI_MSK */
-#define V_SCI_MSK_ST0		0x01
-#define V_SCI_MSK_ST1		0x02
-#define V_SCI_MSK_ST2		0x04
-#define V_SCI_MSK_ST3		0x08
-#define V_SCI_MSK_ST4		0x10
-#define V_SCI_MSK_ST5		0x20
-#define V_SCI_MSK_ST6		0x40
-#define V_SCI_MSK_ST7		0x80
-/* R_ST_SEL */
-#define V_ST_SEL		0x01
-#define V_MULT_ST		0x08
-/* R_ST_SYNC */
-#define V_SYNC_SEL		0x01
-#define V_AUTO_SYNC		0x08
-/* A_ST_WR_STA */
-#define V_ST_SET_STA		0x01
-#define V_ST_LD_STA		0x10
-#define V_ST_ACT		0x20
-#define V_SET_G2_G3		0x80
-/* A_ST_CTRL0 */
-#define V_B1_EN			0x01
-#define V_B2_EN			0x02
-#define V_ST_MD			0x04
-#define V_D_PRIO		0x08
-#define V_SQ_EN			0x10
-#define V_96KHZ			0x20
-#define V_TX_LI			0x40
-#define V_ST_STOP		0x80
-/* A_ST_CTRL1 */
-#define V_G2_G3_EN		0x01
-#define V_D_HI			0x04
-#define V_E_IGNO		0x08
-#define V_E_LO			0x10
-#define V_B12_SWAP		0x80
-/* A_ST_CTRL2 */
-#define V_B1_RX_EN		0x01
-#define V_B2_RX_EN		0x02
-#define V_ST_TRIS		0x40
-/* A_ST_CLK_DLY */
-#define V_ST_CK_DLY		0x01
-#define V_ST_SMPL		0x10
-/* A_ST_D_TX */
-#define V_ST_D_TX		0x40
-/* R_IRQ_STATECH */
-#define V_SCI_ST0		0x01
-#define V_SCI_ST1		0x02
-#define V_SCI_ST2		0x04
-#define V_SCI_ST3		0x08
-#define V_SCI_ST4		0x10
-#define V_SCI_ST5		0x20
-#define V_SCI_ST6		0x40
-#define V_SCI_ST7		0x80
-/* A_ST_RD_STA */
-#define V_ST_STA		0x01
-#define V_FR_SYNC_ST		0x10
-#define V_TI2_EXP		0x20
-#define V_INFO0			0x40
-#define V_G2_G3			0x80
-/* A_ST_SQ_RD */
-#define V_ST_SQ			0x01
-#define V_MF_RX_RDY		0x10
-#define V_MF_TX_RDY		0x80
-/* A_ST_D_RX */
-#define V_ST_D_RX		0x40
-/* A_ST_E_RX */
-#define V_ST_E_RX		0x40
-
-/* chapter 5: E1 interface */
-/* R_E1_WR_STA */
-/* R_E1_RD_STA */
-#define V_E1_SET_STA		0x01
-#define V_E1_LD_STA		0x10
-/* R_RX0 */
-#define V_RX_CODE		0x01
-#define V_RX_FBAUD		0x04
-#define V_RX_CMI		0x08
-#define V_RX_INV_CMI		0x10
-#define V_RX_INV_CLK		0x20
-#define V_RX_INV_DATA		0x40
-#define V_AIS_ITU		0x80
-/* R_RX_FR0 */
-#define V_NO_INSYNC		0x01
-#define V_AUTO_RESYNC		0x02
-#define V_AUTO_RECO		0x04
-#define V_SWORD_COND		0x08
-#define V_SYNC_LOSS		0x10
-#define V_XCRC_SYNC		0x20
-#define V_MF_RESYNC		0x40
-#define V_RESYNC		0x80
-/* R_RX_FR1 */
-#define V_RX_MF			0x01
-#define V_RX_MF_SYNC		0x02
-#define V_RX_SL0_RAM		0x04
-#define V_ERR_SIM		0x20
-#define V_RES_NMF		0x40
-/* R_TX0 */
-#define V_TX_CODE		0x01
-#define V_TX_FBAUD		0x04
-#define V_TX_CMI_CODE		0x08
-#define V_TX_INV_CMI_CODE	0x10
-#define V_TX_INV_CLK		0x20
-#define V_TX_INV_DATA		0x40
-#define V_OUT_EN		0x80
-/* R_TX1 */
-#define V_INV_CLK		0x01
-#define V_EXCHG_DATA_LI		0x02
-#define V_AIS_OUT		0x04
-#define V_ATX			0x20
-#define V_NTRI			0x40
-#define V_AUTO_ERR_RES		0x80
-/* R_TX_FR0 */
-#define V_TRP_FAS		0x01
-#define V_TRP_NFAS		0x02
-#define V_TRP_RAL		0x04
-#define V_TRP_SA		0x08
-/* R_TX_FR1 */
-#define V_TX_FAS		0x01
-#define V_TX_NFAS		0x02
-#define V_TX_RAL		0x04
-#define V_TX_SA			0x08
-/* R_TX_FR2 */
-#define V_TX_MF			0x01
-#define V_TRP_SL0		0x02
-#define V_TX_SL0_RAM		0x04
-#define V_TX_E			0x10
-#define V_NEG_E			0x20
-#define V_XS12_ON		0x40
-#define V_XS15_ON		0x80
-/* R_RX_OFF */
-#define V_RX_SZ			0x01
-#define V_RX_INIT		0x04
-/* R_SYNC_OUT */
-#define V_SYNC_E1_RX		0x01
-#define V_IPATS0		0x20
-#define V_IPATS1		0x40
-#define V_IPATS2		0x80
-/* R_TX_OFF */
-#define V_TX_SZ			0x01
-#define V_TX_INIT		0x04
-/* R_SYNC_CTRL */
-#define V_EXT_CLK_SYNC		0x01
-#define V_SYNC_OFFS		0x02
-#define V_PCM_SYNC		0x04
-#define V_NEG_CLK		0x08
-#define V_HCLK			0x10
-/*
-  #define V_JATT_AUTO_DEL		0x20
-  #define V_JATT_AUTO		0x40
-*/
-#define V_JATT_OFF		0x80
-/* R_STATE */
-#define V_E1_STA		0x01
-#define V_ALT_FR_RX		0x40
-#define V_ALT_FR_TX		0x80
-/* R_SYNC_STA */
-#define V_RX_STA		0x01
-#define V_FR_SYNC_E1		0x04
-#define V_SIG_LOS		0x08
-#define V_MFA_STA		0x10
-#define V_AIS			0x40
-#define V_NO_MF_SYNC		0x80
-/* R_RX_SL0_0 */
-#define V_SI_FAS		0x01
-#define V_SI_NFAS		0x02
-#define V_A			0x04
-#define V_CRC_OK		0x08
-#define V_TX_E1			0x10
-#define V_TX_E2			0x20
-#define V_RX_E1			0x40
-#define V_RX_E2			0x80
-/* R_SLIP */
-#define V_SLIP_RX		0x01
-#define V_FOSLIP_RX		0x08
-#define V_SLIP_TX		0x10
-#define V_FOSLIP_TX		0x80
-
-/* chapter 6: PCM interface */
-/* R_PCM_MD0 */
-#define V_PCM_MD		0x01
-#define V_C4_POL		0x02
-#define V_F0_NEG		0x04
-#define V_F0_LEN		0x08
-#define V_PCM_ADDR		0x10
-/* R_SL_SEL0 */
-#define V_SL_SEL0		0x01
-#define V_SH_SEL0		0x80
-/* R_SL_SEL1 */
-#define V_SL_SEL1		0x01
-#define V_SH_SEL1		0x80
-/* R_SL_SEL2 */
-#define V_SL_SEL2		0x01
-#define V_SH_SEL2		0x80
-/* R_SL_SEL3 */
-#define V_SL_SEL3		0x01
-#define V_SH_SEL3		0x80
-/* R_SL_SEL4 */
-#define V_SL_SEL4		0x01
-#define V_SH_SEL4		0x80
-/* R_SL_SEL5 */
-#define V_SL_SEL5		0x01
-#define V_SH_SEL5		0x80
-/* R_SL_SEL6 */
-#define V_SL_SEL6		0x01
-#define V_SH_SEL6		0x80
-/* R_SL_SEL7 */
-#define V_SL_SEL7		0x01
-#define V_SH_SEL7		0x80
-/* R_PCM_MD1 */
-#define V_ODEC_CON		0x01
-#define V_PLL_ADJ		0x04
-#define V_PCM_DR		0x10
-#define V_PCM_LOOP		0x40
-/* R_PCM_MD2 */
-#define V_SYNC_PLL		0x02
-#define V_SYNC_SRC		0x04
-#define V_SYNC_OUT		0x08
-#define V_ICR_FR_TIME		0x40
-#define V_EN_PLL		0x80
-
-/* chapter 7: pulse width modulation */
-/* R_PWM_MD */
-#define V_EXT_IRQ_EN		0x08
-#define V_PWM0_MD		0x10
-#define V_PWM1_MD		0x40
-
-/* chapter 8: multiparty audio conferences */
-/* R_CONF_EN */
-#define V_CONF_EN		0x01
-#define V_ULAW			0x80
-/* A_CONF */
-#define V_CONF_NUM		0x01
-#define V_NOISE_SUPPR		0x08
-#define V_ATT_LEV		0x20
-#define V_CONF_SL		0x80
-/* R_CONF_OFLOW */
-#define V_CONF_OFLOW0		0x01
-#define V_CONF_OFLOW1		0x02
-#define V_CONF_OFLOW2		0x04
-#define V_CONF_OFLOW3		0x08
-#define V_CONF_OFLOW4		0x10
-#define V_CONF_OFLOW5		0x20
-#define V_CONF_OFLOW6		0x40
-#define V_CONF_OFLOW7		0x80
-
-/* chapter 9: DTMF contoller */
-/* R_DTMF0 */
-#define V_DTMF_EN		0x01
-#define V_HARM_SEL		0x02
-#define V_DTMF_RX_CH		0x04
-#define V_DTMF_STOP		0x08
-#define V_CHBL_SEL		0x10
-#define V_RST_DTMF		0x40
-#define V_ULAW_SEL		0x80
-
-/* chapter 10: BERT */
-/* R_BERT_WD_MD */
-#define V_PAT_SEQ		0x01
-#define V_BERT_ERR		0x08
-#define V_AUTO_WD_RES		0x20
-#define V_WD_RES		0x80
-/* R_BERT_STA */
-#define V_BERT_SYNC_SRC		0x01
-#define V_BERT_SYNC		0x10
-#define V_BERT_INV_DATA		0x20
-
-/* chapter 11: auxiliary interface */
-/* R_BRG_PCM_CFG */
-#define V_BRG_EN		0x01
-#define V_BRG_MD		0x02
-#define V_PCM_CLK		0x20
-#define V_ADDR_WRDLY		0x40
-/* R_BRG_CTRL */
-#define V_BRG_CS		0x01
-#define V_BRG_ADDR		0x08
-#define V_BRG_CS_SRC		0x80
-/* R_BRG_MD */
-#define V_BRG_MD0		0x01
-#define V_BRG_MD1		0x02
-#define V_BRG_MD2		0x04
-#define V_BRG_MD3		0x08
-#define V_BRG_MD4		0x10
-#define V_BRG_MD5		0x20
-#define V_BRG_MD6		0x40
-#define V_BRG_MD7		0x80
-/* R_BRG_TIM0 */
-#define V_BRG_TIM0_IDLE		0x01
-#define V_BRG_TIM0_CLK		0x10
-/* R_BRG_TIM1 */
-#define V_BRG_TIM1_IDLE		0x01
-#define V_BRG_TIM1_CLK		0x10
-/* R_BRG_TIM2 */
-#define V_BRG_TIM2_IDLE		0x01
-#define V_BRG_TIM2_CLK		0x10
-/* R_BRG_TIM3 */
-#define V_BRG_TIM3_IDLE		0x01
-#define V_BRG_TIM3_CLK		0x10
-/* R_BRG_TIM_SEL01 */
-#define V_BRG_WR_SEL0		0x01
-#define V_BRG_RD_SEL0		0x04
-#define V_BRG_WR_SEL1		0x10
-#define V_BRG_RD_SEL1		0x40
-/* R_BRG_TIM_SEL23 */
-#define V_BRG_WR_SEL2		0x01
-#define V_BRG_RD_SEL2		0x04
-#define V_BRG_WR_SEL3		0x10
-#define V_BRG_RD_SEL3		0x40
-/* R_BRG_TIM_SEL45 */
-#define V_BRG_WR_SEL4		0x01
-#define V_BRG_RD_SEL4		0x04
-#define V_BRG_WR_SEL5		0x10
-#define V_BRG_RD_SEL5		0x40
-/* R_BRG_TIM_SEL67 */
-#define V_BRG_WR_SEL6		0x01
-#define V_BRG_RD_SEL6		0x04
-#define V_BRG_WR_SEL7		0x10
-#define V_BRG_RD_SEL7		0x40
-
-/* chapter 12: clock, reset, interrupt, timer and watchdog */
-/* R_IRQMSK_MISC */
-#define V_STA_IRQMSK		0x01
-#define V_TI_IRQMSK		0x02
-#define V_PROC_IRQMSK		0x04
-#define V_DTMF_IRQMSK		0x08
-#define V_IRQ1S_MSK		0x10
-#define V_SA6_IRQMSK		0x20
-#define V_RX_EOMF_MSK		0x40
-#define V_TX_EOMF_MSK		0x80
-/* R_IRQ_CTRL */
-#define V_FIFO_IRQ		0x01
-#define V_GLOB_IRQ_EN		0x08
-#define V_IRQ_POL		0x10
-/* R_TI_WD */
-#define V_EV_TS			0x01
-#define V_WD_TS			0x10
-/* A_IRQ_MSK */
-#define V_IRQ			0x01
-#define V_BERT_EN		0x02
-#define V_MIX_IRQ		0x04
-/* R_IRQ_OVIEW */
-#define V_IRQ_FIFO_BL0		0x01
-#define V_IRQ_FIFO_BL1		0x02
-#define V_IRQ_FIFO_BL2		0x04
-#define V_IRQ_FIFO_BL3		0x08
-#define V_IRQ_FIFO_BL4		0x10
-#define V_IRQ_FIFO_BL5		0x20
-#define V_IRQ_FIFO_BL6		0x40
-#define V_IRQ_FIFO_BL7		0x80
-/* R_IRQ_MISC */
-#define V_STA_IRQ		0x01
-#define V_TI_IRQ		0x02
-#define V_IRQ_PROC		0x04
-#define V_DTMF_IRQ		0x08
-#define V_IRQ1S			0x10
-#define V_SA6_IRQ		0x20
-#define V_RX_EOMF		0x40
-#define V_TX_EOMF		0x80
-/* R_STATUS */
-#define V_BUSY			0x01
-#define V_PROC			0x02
-#define V_DTMF_STA		0x04
-#define V_LOST_STA		0x08
-#define V_SYNC_IN		0x10
-#define V_EXT_IRQSTA		0x20
-#define V_MISC_IRQSTA		0x40
-#define V_FR_IRQSTA		0x80
-/* R_IRQ_FIFO_BL0 */
-#define V_IRQ_FIFO0_TX		0x01
-#define V_IRQ_FIFO0_RX		0x02
-#define V_IRQ_FIFO1_TX		0x04
-#define V_IRQ_FIFO1_RX		0x08
-#define V_IRQ_FIFO2_TX		0x10
-#define V_IRQ_FIFO2_RX		0x20
-#define V_IRQ_FIFO3_TX		0x40
-#define V_IRQ_FIFO3_RX		0x80
-/* R_IRQ_FIFO_BL1 */
-#define V_IRQ_FIFO4_TX		0x01
-#define V_IRQ_FIFO4_RX		0x02
-#define V_IRQ_FIFO5_TX		0x04
-#define V_IRQ_FIFO5_RX		0x08
-#define V_IRQ_FIFO6_TX		0x10
-#define V_IRQ_FIFO6_RX		0x20
-#define V_IRQ_FIFO7_TX		0x40
-#define V_IRQ_FIFO7_RX		0x80
-/* R_IRQ_FIFO_BL2 */
-#define V_IRQ_FIFO8_TX		0x01
-#define V_IRQ_FIFO8_RX		0x02
-#define V_IRQ_FIFO9_TX		0x04
-#define V_IRQ_FIFO9_RX		0x08
-#define V_IRQ_FIFO10_TX		0x10
-#define V_IRQ_FIFO10_RX		0x20
-#define V_IRQ_FIFO11_TX		0x40
-#define V_IRQ_FIFO11_RX		0x80
-/* R_IRQ_FIFO_BL3 */
-#define V_IRQ_FIFO12_TX		0x01
-#define V_IRQ_FIFO12_RX		0x02
-#define V_IRQ_FIFO13_TX		0x04
-#define V_IRQ_FIFO13_RX		0x08
-#define V_IRQ_FIFO14_TX		0x10
-#define V_IRQ_FIFO14_RX		0x20
-#define V_IRQ_FIFO15_TX		0x40
-#define V_IRQ_FIFO15_RX		0x80
-/* R_IRQ_FIFO_BL4 */
-#define V_IRQ_FIFO16_TX		0x01
-#define V_IRQ_FIFO16_RX		0x02
-#define V_IRQ_FIFO17_TX		0x04
-#define V_IRQ_FIFO17_RX		0x08
-#define V_IRQ_FIFO18_TX		0x10
-#define V_IRQ_FIFO18_RX		0x20
-#define V_IRQ_FIFO19_TX		0x40
-#define V_IRQ_FIFO19_RX		0x80
-/* R_IRQ_FIFO_BL5 */
-#define V_IRQ_FIFO20_TX		0x01
-#define V_IRQ_FIFO20_RX		0x02
-#define V_IRQ_FIFO21_TX		0x04
-#define V_IRQ_FIFO21_RX		0x08
-#define V_IRQ_FIFO22_TX		0x10
-#define V_IRQ_FIFO22_RX		0x20
-#define V_IRQ_FIFO23_TX		0x40
-#define V_IRQ_FIFO23_RX		0x80
-/* R_IRQ_FIFO_BL6 */
-#define V_IRQ_FIFO24_TX		0x01
-#define V_IRQ_FIFO24_RX		0x02
-#define V_IRQ_FIFO25_TX		0x04
-#define V_IRQ_FIFO25_RX		0x08
-#define V_IRQ_FIFO26_TX		0x10
-#define V_IRQ_FIFO26_RX		0x20
-#define V_IRQ_FIFO27_TX		0x40
-#define V_IRQ_FIFO27_RX		0x80
-/* R_IRQ_FIFO_BL7 */
-#define V_IRQ_FIFO28_TX		0x01
-#define V_IRQ_FIFO28_RX		0x02
-#define V_IRQ_FIFO29_TX		0x04
-#define V_IRQ_FIFO29_RX		0x08
-#define V_IRQ_FIFO30_TX		0x10
-#define V_IRQ_FIFO30_RX		0x20
-#define V_IRQ_FIFO31_TX		0x40
-#define V_IRQ_FIFO31_RX		0x80
-
-/* chapter 13: general purpose I/O pins (GPIO) and input pins (GPI) */
-/* R_GPIO_OUT0 */
-#define V_GPIO_OUT0		0x01
-#define V_GPIO_OUT1		0x02
-#define V_GPIO_OUT2		0x04
-#define V_GPIO_OUT3		0x08
-#define V_GPIO_OUT4		0x10
-#define V_GPIO_OUT5		0x20
-#define V_GPIO_OUT6		0x40
-#define V_GPIO_OUT7		0x80
-/* R_GPIO_OUT1 */
-#define V_GPIO_OUT8		0x01
-#define V_GPIO_OUT9		0x02
-#define V_GPIO_OUT10		0x04
-#define V_GPIO_OUT11		0x08
-#define V_GPIO_OUT12		0x10
-#define V_GPIO_OUT13		0x20
-#define V_GPIO_OUT14		0x40
-#define V_GPIO_OUT15		0x80
-/* R_GPIO_EN0 */
-#define V_GPIO_EN0		0x01
-#define V_GPIO_EN1		0x02
-#define V_GPIO_EN2		0x04
-#define V_GPIO_EN3		0x08
-#define V_GPIO_EN4		0x10
-#define V_GPIO_EN5		0x20
-#define V_GPIO_EN6		0x40
-#define V_GPIO_EN7		0x80
-/* R_GPIO_EN1 */
-#define V_GPIO_EN8		0x01
-#define V_GPIO_EN9		0x02
-#define V_GPIO_EN10		0x04
-#define V_GPIO_EN11		0x08
-#define V_GPIO_EN12		0x10
-#define V_GPIO_EN13		0x20
-#define V_GPIO_EN14		0x40
-#define V_GPIO_EN15		0x80
-/* R_GPIO_SEL */
-#define V_GPIO_SEL0		0x01
-#define V_GPIO_SEL1		0x02
-#define V_GPIO_SEL2		0x04
-#define V_GPIO_SEL3		0x08
-#define V_GPIO_SEL4		0x10
-#define V_GPIO_SEL5		0x20
-#define V_GPIO_SEL6		0x40
-#define V_GPIO_SEL7		0x80
-/* R_GPIO_IN0 */
-#define V_GPIO_IN0		0x01
-#define V_GPIO_IN1		0x02
-#define V_GPIO_IN2		0x04
-#define V_GPIO_IN3		0x08
-#define V_GPIO_IN4		0x10
-#define V_GPIO_IN5		0x20
-#define V_GPIO_IN6		0x40
-#define V_GPIO_IN7		0x80
-/* R_GPIO_IN1 */
-#define V_GPIO_IN8		0x01
-#define V_GPIO_IN9		0x02
-#define V_GPIO_IN10		0x04
-#define V_GPIO_IN11		0x08
-#define V_GPIO_IN12		0x10
-#define V_GPIO_IN13		0x20
-#define V_GPIO_IN14		0x40
-#define V_GPIO_IN15		0x80
-/* R_GPI_IN0 */
-#define V_GPI_IN0		0x01
-#define V_GPI_IN1		0x02
-#define V_GPI_IN2		0x04
-#define V_GPI_IN3		0x08
-#define V_GPI_IN4		0x10
-#define V_GPI_IN5		0x20
-#define V_GPI_IN6		0x40
-#define V_GPI_IN7		0x80
-/* R_GPI_IN1 */
-#define V_GPI_IN8		0x01
-#define V_GPI_IN9		0x02
-#define V_GPI_IN10		0x04
-#define V_GPI_IN11		0x08
-#define V_GPI_IN12		0x10
-#define V_GPI_IN13		0x20
-#define V_GPI_IN14		0x40
-#define V_GPI_IN15		0x80
-/* R_GPI_IN2 */
-#define V_GPI_IN16		0x01
-#define V_GPI_IN17		0x02
-#define V_GPI_IN18		0x04
-#define V_GPI_IN19		0x08
-#define V_GPI_IN20		0x10
-#define V_GPI_IN21		0x20
-#define V_GPI_IN22		0x40
-#define V_GPI_IN23		0x80
-/* R_GPI_IN3 */
-#define V_GPI_IN24		0x01
-#define V_GPI_IN25		0x02
-#define V_GPI_IN26		0x04
-#define V_GPI_IN27		0x08
-#define V_GPI_IN28		0x10
-#define V_GPI_IN29		0x20
-#define V_GPI_IN30		0x40
-#define V_GPI_IN31		0x80
-
-/* map of all registers, used for debugging */
-
-#ifdef HFC_REGISTER_DEBUG
-struct hfc_register_names {
-	char *name;
-	u_char reg;
-} hfc_register_names[] = {
-	/* write registers */
-	{"R_CIRM",		0x00},
-	{"R_CTRL",		0x01},
-	{"R_BRG_PCM_CFG ",	0x02},
-	{"R_RAM_ADDR0",		0x08},
-	{"R_RAM_ADDR1",		0x09},
-	{"R_RAM_ADDR2",		0x0A},
-	{"R_FIRST_FIFO",	0x0B},
-	{"R_RAM_SZ",		0x0C},
-	{"R_FIFO_MD",		0x0D},
-	{"R_INC_RES_FIFO",	0x0E},
-	{"R_FIFO / R_FSM_IDX",	0x0F},
-	{"R_SLOT",		0x10},
-	{"R_IRQMSK_MISC",	0x11},
-	{"R_SCI_MSK",		0x12},
-	{"R_IRQ_CTRL",		0x13},
-	{"R_PCM_MD0",		0x14},
-	{"R_0x15",		0x15},
-	{"R_ST_SEL",		0x16},
-	{"R_ST_SYNC",		0x17},
-	{"R_CONF_EN",		0x18},
-	{"R_TI_WD",		0x1A},
-	{"R_BERT_WD_MD",	0x1B},
-	{"R_DTMF",		0x1C},
-	{"R_DTMF_N",		0x1D},
-	{"R_E1_XX_STA",		0x20},
-	{"R_LOS0",		0x22},
-	{"R_LOS1",		0x23},
-	{"R_RX0",		0x24},
-	{"R_RX_FR0",		0x25},
-	{"R_RX_FR1",		0x26},
-	{"R_TX0",		0x28},
-	{"R_TX1",		0x29},
-	{"R_TX_FR0",		0x2C},
-	{"R_TX_FR1",		0x2D},
-	{"R_TX_FR2",		0x2E},
-	{"R_JATT_ATT",		0x2F},
-	{"A_ST_xx_STA/R_RX_OFF", 0x30},
-	{"A_ST_CTRL0/R_SYNC_OUT", 0x31},
-	{"A_ST_CTRL1",		0x32},
-	{"A_ST_CTRL2",		0x33},
-	{"A_ST_SQ_WR",		0x34},
-	{"R_TX_OFF",		0x34},
-	{"R_SYNC_CTRL",		0x35},
-	{"A_ST_CLK_DLY",	0x37},
-	{"R_PWM0",		0x38},
-	{"R_PWM1",		0x39},
-	{"A_ST_B1_TX",		0x3C},
-	{"A_ST_B2_TX",		0x3D},
-	{"A_ST_D_TX",		0x3E},
-	{"R_GPIO_OUT0",		0x40},
-	{"R_GPIO_OUT1",		0x41},
-	{"R_GPIO_EN0",		0x42},
-	{"R_GPIO_EN1",		0x43},
-	{"R_GPIO_SEL",		0x44},
-	{"R_BRG_CTRL",		0x45},
-	{"R_PWM_MD",		0x46},
-	{"R_BRG_MD",		0x47},
-	{"R_BRG_TIM0",		0x48},
-	{"R_BRG_TIM1",		0x49},
-	{"R_BRG_TIM2",		0x4A},
-	{"R_BRG_TIM3",		0x4B},
-	{"R_BRG_TIM_SEL01",	0x4C},
-	{"R_BRG_TIM_SEL23",	0x4D},
-	{"R_BRG_TIM_SEL45",	0x4E},
-	{"R_BRG_TIM_SEL67",	0x4F},
-	{"A_FIFO_DATA0-2",	0x80},
-	{"A_FIFO_DATA0-2_NOINC", 0x84},
-	{"R_RAM_DATA",		0xC0},
-	{"A_SL_CFG",		0xD0},
-	{"A_CONF",		0xD1},
-	{"A_CH_MSK",		0xF4},
-	{"A_CON_HDLC",		0xFA},
-	{"A_SUBCH_CFG",		0xFB},
-	{"A_CHANNEL",		0xFC},
-	{"A_FIFO_SEQ",		0xFD},
-	{"A_IRQ_MSK",		0xFF},
-	{NULL, 0},
-
-	/* read registers */
-	{"A_Z1",		0x04},
-	{"A_Z1H",		0x05},
-	{"A_Z2",		0x06},
-	{"A_Z2H",		0x07},
-	{"A_F1",		0x0C},
-	{"A_F2",		0x0D},
-	{"R_IRQ_OVIEW",		0x10},
-	{"R_IRQ_MISC",		0x11},
-	{"R_IRQ_STATECH",	0x12},
-	{"R_CONF_OFLOW",	0x14},
-	{"R_RAM_USE",		0x15},
-	{"R_CHIP_ID",		0x16},
-	{"R_BERT_STA",		0x17},
-	{"R_F0_CNTL",		0x18},
-	{"R_F0_CNTH",		0x19},
-	{"R_BERT_ECL",		0x1A},
-	{"R_BERT_ECH",		0x1B},
-	{"R_STATUS",		0x1C},
-	{"R_CHIP_RV",		0x1F},
-	{"R_STATE",		0x20},
-	{"R_SYNC_STA",		0x24},
-	{"R_RX_SL0_0",		0x25},
-	{"R_RX_SL0_1",		0x26},
-	{"R_RX_SL0_2",		0x27},
-	{"R_JATT_DIR",		0x2b},
-	{"R_SLIP",		0x2c},
-	{"A_ST_RD_STA",		0x30},
-	{"R_FAS_ECL",		0x30},
-	{"R_FAS_ECH",		0x31},
-	{"R_VIO_ECL",		0x32},
-	{"R_VIO_ECH",		0x33},
-	{"R_CRC_ECL / A_ST_SQ_RD", 0x34},
-	{"R_CRC_ECH",		0x35},
-	{"R_E_ECL",		0x36},
-	{"R_E_ECH",		0x37},
-	{"R_SA6_SA13_ECL",	0x38},
-	{"R_SA6_SA13_ECH",	0x39},
-	{"R_SA6_SA23_ECL",	0x3A},
-	{"R_SA6_SA23_ECH",	0x3B},
-	{"A_ST_B1_RX",		0x3C},
-	{"A_ST_B2_RX",		0x3D},
-	{"A_ST_D_RX",		0x3E},
-	{"A_ST_E_RX",		0x3F},
-	{"R_GPIO_IN0",		0x40},
-	{"R_GPIO_IN1",		0x41},
-	{"R_GPI_IN0",		0x44},
-	{"R_GPI_IN1",		0x45},
-	{"R_GPI_IN2",		0x46},
-	{"R_GPI_IN3",		0x47},
-	{"A_FIFO_DATA0-2",	0x80},
-	{"A_FIFO_DATA0-2_NOINC", 0x84},
-	{"R_INT_DATA",		0x88},
-	{"R_RAM_DATA",		0xC0},
-	{"R_IRQ_FIFO_BL0",	0xC8},
-	{"R_IRQ_FIFO_BL1",	0xC9},
-	{"R_IRQ_FIFO_BL2",	0xCA},
-	{"R_IRQ_FIFO_BL3",	0xCB},
-	{"R_IRQ_FIFO_BL4",	0xCC},
-	{"R_IRQ_FIFO_BL5",	0xCD},
-	{"R_IRQ_FIFO_BL6",	0xCE},
-	{"R_IRQ_FIFO_BL7",	0xCF},
-};
-#endif /* HFC_REGISTER_DEBUG */
diff --git a/drivers/isdn/hardware/mISDN/hfc_multi_8xx.h b/drivers/isdn/hardware/mISDN/hfc_multi_8xx.h
deleted file mode 100644
index 448ded8f9d24..000000000000
--- a/drivers/isdn/hardware/mISDN/hfc_multi_8xx.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * For License see notice in hfc_multi.c
- *
- * special IO and init functions for the embedded XHFC board
- * from Speech Design
- *
- */
-
-#include <asm/cpm1.h>
-
-/* Change this to the value used by your board */
-#ifndef IMAP_ADDR
-#define IMAP_ADDR	0xFFF00000
-#endif
-
-static void
-#ifdef HFC_REGISTER_DEBUG
-HFC_outb_embsd(struct hfc_multi *hc, u_char reg, u_char val,
-	       const char *function, int line)
-#else
-	HFC_outb_embsd(struct hfc_multi *hc, u_char reg, u_char val)
-#endif
-{
-	hc->immap->im_ioport.iop_padat |= PA_XHFC_A0;
-	writeb(reg, hc->xhfc_memaddr);
-	hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0);
-	writeb(val, hc->xhfc_memdata);
-}
-static u_char
-#ifdef HFC_REGISTER_DEBUG
-HFC_inb_embsd(struct hfc_multi *hc, u_char reg, const char *function, int line)
-#else
-	HFC_inb_embsd(struct hfc_multi *hc, u_char reg)
-#endif
-{
-	hc->immap->im_ioport.iop_padat |= PA_XHFC_A0;
-	writeb(reg, hc->xhfc_memaddr);
-	hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0);
-	return readb(hc->xhfc_memdata);
-}
-static u_short
-#ifdef HFC_REGISTER_DEBUG
-HFC_inw_embsd(struct hfc_multi *hc, u_char reg, const char *function, int line)
-#else
-	HFC_inw_embsd(struct hfc_multi *hc, u_char reg)
-#endif
-{
-	hc->immap->im_ioport.iop_padat |= PA_XHFC_A0;
-	writeb(reg, hc->xhfc_memaddr);
-	hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0);
-	return readb(hc->xhfc_memdata);
-}
-static void
-#ifdef HFC_REGISTER_DEBUG
-HFC_wait_embsd(struct hfc_multi *hc, const char *function, int line)
-#else
-	HFC_wait_embsd(struct hfc_multi *hc)
-#endif
-{
-	hc->immap->im_ioport.iop_padat |= PA_XHFC_A0;
-	writeb(R_STATUS, hc->xhfc_memaddr);
-	hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0);
-	while (readb(hc->xhfc_memdata) & V_BUSY)
-		cpu_relax();
-}
-
-/* write fifo data (EMBSD) */
-void
-write_fifo_embsd(struct hfc_multi *hc, u_char *data, int len)
-{
-	hc->immap->im_ioport.iop_padat |= PA_XHFC_A0;
-	*hc->xhfc_memaddr = A_FIFO_DATA0;
-	hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0);
-	while (len) {
-		*hc->xhfc_memdata = *data;
-		data++;
-		len--;
-	}
-}
-
-/* read fifo data (EMBSD) */
-void
-read_fifo_embsd(struct hfc_multi *hc, u_char *data, int len)
-{
-	hc->immap->im_ioport.iop_padat |= PA_XHFC_A0;
-	*hc->xhfc_memaddr = A_FIFO_DATA0;
-	hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0);
-	while (len) {
-		*data = (u_char)(*hc->xhfc_memdata);
-		data++;
-		len--;
-	}
-}
-
-static int
-setup_embedded(struct hfc_multi *hc, struct hm_map *m)
-{
-	printk(KERN_INFO
-	       "HFC-multi: card manufacturer: '%s' card name: '%s' clock: %s\n",
-	       m->vendor_name, m->card_name, m->clock2 ? "double" : "normal");
-
-	hc->pci_dev = NULL;
-	if (m->clock2)
-		test_and_set_bit(HFC_CHIP_CLOCK2, &hc->chip);
-
-	hc->leds = m->leds;
-	hc->ledstate = 0xAFFEAFFE;
-	hc->opticalsupport = m->opticalsupport;
-
-	hc->pci_iobase = 0;
-	hc->pci_membase = 0;
-	hc->xhfc_membase = NULL;
-	hc->xhfc_memaddr = NULL;
-	hc->xhfc_memdata = NULL;
-
-	/* set memory access methods */
-	if (m->io_mode) /* use mode from card config */
-		hc->io_mode = m->io_mode;
-	switch (hc->io_mode) {
-	case HFC_IO_MODE_EMBSD:
-		test_and_set_bit(HFC_CHIP_EMBSD, &hc->chip);
-		hc->slots = 128; /* required */
-		hc->HFC_outb = HFC_outb_embsd;
-		hc->HFC_inb = HFC_inb_embsd;
-		hc->HFC_inw = HFC_inw_embsd;
-		hc->HFC_wait = HFC_wait_embsd;
-		hc->read_fifo = read_fifo_embsd;
-		hc->write_fifo = write_fifo_embsd;
-		hc->xhfc_origmembase = XHFC_MEMBASE + XHFC_OFFSET * hc->id;
-		hc->xhfc_membase = (u_char *)ioremap(hc->xhfc_origmembase,
-						     XHFC_MEMSIZE);
-		if (!hc->xhfc_membase) {
-			printk(KERN_WARNING
-			       "HFC-multi: failed to remap xhfc address space. "
-			       "(internal error)\n");
-			return -EIO;
-		}
-		hc->xhfc_memaddr = (u_long *)(hc->xhfc_membase + 4);
-		hc->xhfc_memdata = (u_long *)(hc->xhfc_membase);
-		printk(KERN_INFO
-		       "HFC-multi: xhfc_membase:%#lx xhfc_origmembase:%#lx "
-		       "xhfc_memaddr:%#lx xhfc_memdata:%#lx\n",
-		       (u_long)hc->xhfc_membase, hc->xhfc_origmembase,
-		       (u_long)hc->xhfc_memaddr, (u_long)hc->xhfc_memdata);
-		break;
-	default:
-		printk(KERN_WARNING "HFC-multi: Invalid IO mode.\n");
-		return -EIO;
-	}
-
-	/* Prepare the MPC8XX PortA 10 as output (address/data selector) */
-	hc->immap = (struct immap *)(IMAP_ADDR);
-	hc->immap->im_ioport.iop_papar &= ~(PA_XHFC_A0);
-	hc->immap->im_ioport.iop_paodr &= ~(PA_XHFC_A0);
-	hc->immap->im_ioport.iop_padir |=   PA_XHFC_A0;
-
-	/* Prepare the MPC8xx PortB __X__ as input (ISDN__X__IRQ) */
-	hc->pb_irqmsk = (PB_XHFC_IRQ1 << hc->id);
-	hc->immap->im_cpm.cp_pbpar &= ~(hc->pb_irqmsk);
-	hc->immap->im_cpm.cp_pbodr &= ~(hc->pb_irqmsk);
-	hc->immap->im_cpm.cp_pbdir &= ~(hc->pb_irqmsk);
-
-	/* At this point the needed config is done */
-	/* fifos are still not enabled */
-	return 0;
-}
diff --git a/drivers/isdn/hardware/mISDN/hfc_pci.h b/drivers/isdn/hardware/mISDN/hfc_pci.h
deleted file mode 100644
index a0e4806c11fa..000000000000
--- a/drivers/isdn/hardware/mISDN/hfc_pci.h
+++ /dev/null
@@ -1,214 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  specific defines for CCD's HFC 2BDS0 PCI chips
- *
- * Author     Werner Cornelius (werner@isdn4linux.de)
- *
- * Copyright 1999  by Werner Cornelius (werner@isdn4linux.de)
- */
-
-/*
- * thresholds for transparent B-channel mode
- * change mask and threshold simultaneously
- */
-#define HFCPCI_BTRANS_THRESHOLD 128
-#define HFCPCI_FILLEMPTY	64
-#define HFCPCI_BTRANS_THRESMASK 0x00
-
-/* defines for PCI config */
-#define PCI_ENA_MEMIO		0x02
-#define PCI_ENA_MASTER		0x04
-
-/* GCI/IOM bus monitor registers */
-#define HCFPCI_C_I		0x08
-#define HFCPCI_TRxR		0x0C
-#define HFCPCI_MON1_D		0x28
-#define HFCPCI_MON2_D		0x2C
-
-/* GCI/IOM bus timeslot registers */
-#define HFCPCI_B1_SSL		0x80
-#define HFCPCI_B2_SSL		0x84
-#define HFCPCI_AUX1_SSL		0x88
-#define HFCPCI_AUX2_SSL		0x8C
-#define HFCPCI_B1_RSL		0x90
-#define HFCPCI_B2_RSL		0x94
-#define HFCPCI_AUX1_RSL		0x98
-#define HFCPCI_AUX2_RSL		0x9C
-
-/* GCI/IOM bus data registers */
-#define HFCPCI_B1_D		0xA0
-#define HFCPCI_B2_D		0xA4
-#define HFCPCI_AUX1_D		0xA8
-#define HFCPCI_AUX2_D		0xAC
-
-/* GCI/IOM bus configuration registers */
-#define HFCPCI_MST_EMOD		0xB4
-#define HFCPCI_MST_MODE		0xB8
-#define HFCPCI_CONNECT		0xBC
-
-
-/* Interrupt and status registers */
-#define HFCPCI_FIFO_EN		0x44
-#define HFCPCI_TRM		0x48
-#define HFCPCI_B_MODE		0x4C
-#define HFCPCI_CHIP_ID		0x58
-#define HFCPCI_CIRM		0x60
-#define HFCPCI_CTMT		0x64
-#define HFCPCI_INT_M1		0x68
-#define HFCPCI_INT_M2		0x6C
-#define HFCPCI_INT_S1		0x78
-#define HFCPCI_INT_S2		0x7C
-#define HFCPCI_STATUS		0x70
-
-/* S/T section registers */
-#define HFCPCI_STATES		0xC0
-#define HFCPCI_SCTRL		0xC4
-#define HFCPCI_SCTRL_E		0xC8
-#define HFCPCI_SCTRL_R		0xCC
-#define HFCPCI_SQ		0xD0
-#define HFCPCI_CLKDEL		0xDC
-#define HFCPCI_B1_REC		0xF0
-#define HFCPCI_B1_SEND		0xF0
-#define HFCPCI_B2_REC		0xF4
-#define HFCPCI_B2_SEND		0xF4
-#define HFCPCI_D_REC		0xF8
-#define HFCPCI_D_SEND		0xF8
-#define HFCPCI_E_REC		0xFC
-
-
-/* bits in status register (READ) */
-#define HFCPCI_PCI_PROC		0x02
-#define HFCPCI_NBUSY		0x04
-#define HFCPCI_TIMER_ELAP	0x10
-#define HFCPCI_STATINT		0x20
-#define HFCPCI_FRAMEINT		0x40
-#define HFCPCI_ANYINT		0x80
-
-/* bits in CTMT (Write) */
-#define HFCPCI_CLTIMER		0x80
-#define HFCPCI_TIM3_125		0x04
-#define HFCPCI_TIM25		0x10
-#define HFCPCI_TIM50		0x14
-#define HFCPCI_TIM400		0x18
-#define HFCPCI_TIM800		0x1C
-#define HFCPCI_AUTO_TIMER	0x20
-#define HFCPCI_TRANSB2		0x02
-#define HFCPCI_TRANSB1		0x01
-
-/* bits in CIRM (Write) */
-#define HFCPCI_AUX_MSK		0x07
-#define HFCPCI_RESET		0x08
-#define HFCPCI_B1_REV		0x40
-#define HFCPCI_B2_REV		0x80
-
-/* bits in INT_M1 and INT_S1 */
-#define HFCPCI_INTS_B1TRANS	0x01
-#define HFCPCI_INTS_B2TRANS	0x02
-#define HFCPCI_INTS_DTRANS	0x04
-#define HFCPCI_INTS_B1REC	0x08
-#define HFCPCI_INTS_B2REC	0x10
-#define HFCPCI_INTS_DREC	0x20
-#define HFCPCI_INTS_L1STATE	0x40
-#define HFCPCI_INTS_TIMER	0x80
-
-/* bits in INT_M2 */
-#define HFCPCI_PROC_TRANS	0x01
-#define HFCPCI_GCI_I_CHG	0x02
-#define HFCPCI_GCI_MON_REC	0x04
-#define HFCPCI_IRQ_ENABLE	0x08
-#define HFCPCI_PMESEL		0x80
-
-/* bits in STATES */
-#define HFCPCI_STATE_MSK	0x0F
-#define HFCPCI_LOAD_STATE	0x10
-#define HFCPCI_ACTIVATE		0x20
-#define HFCPCI_DO_ACTION	0x40
-#define HFCPCI_NT_G2_G3		0x80
-
-/* bits in HFCD_MST_MODE */
-#define HFCPCI_MASTER		0x01
-#define HFCPCI_SLAVE		0x00
-#define HFCPCI_F0IO_POSITIV	0x02
-#define HFCPCI_F0_NEGATIV	0x04
-#define HFCPCI_F0_2C4		0x08
-/* remaining bits are for codecs control */
-
-/* bits in HFCD_SCTRL */
-#define SCTRL_B1_ENA		0x01
-#define SCTRL_B2_ENA		0x02
-#define SCTRL_MODE_TE		0x00
-#define SCTRL_MODE_NT		0x04
-#define SCTRL_LOW_PRIO		0x08
-#define SCTRL_SQ_ENA		0x10
-#define SCTRL_TEST		0x20
-#define SCTRL_NONE_CAP		0x40
-#define SCTRL_PWR_DOWN		0x80
-
-/* bits in SCTRL_E  */
-#define HFCPCI_AUTO_AWAKE	0x01
-#define HFCPCI_DBIT_1		0x04
-#define HFCPCI_IGNORE_COL	0x08
-#define HFCPCI_CHG_B1_B2	0x80
-
-/* bits in FIFO_EN register */
-#define HFCPCI_FIFOEN_B1	0x03
-#define HFCPCI_FIFOEN_B2	0x0C
-#define HFCPCI_FIFOEN_DTX	0x10
-#define HFCPCI_FIFOEN_B1TX	0x01
-#define HFCPCI_FIFOEN_B1RX	0x02
-#define HFCPCI_FIFOEN_B2TX	0x04
-#define HFCPCI_FIFOEN_B2RX	0x08
-
-
-/* definitions of fifo memory area */
-#define MAX_D_FRAMES 15
-#define MAX_B_FRAMES 31
-#define B_SUB_VAL    0x200
-#define B_FIFO_SIZE  (0x2000 - B_SUB_VAL)
-#define D_FIFO_SIZE  512
-#define D_FREG_MASK  0xF
-
-struct zt {
-	__le16 z1;  /* Z1 pointer 16 Bit */
-	__le16 z2;  /* Z2 pointer 16 Bit */
-};
-
-struct dfifo {
-	u_char data[D_FIFO_SIZE]; /* FIFO data space */
-	u_char fill1[0x20A0 - D_FIFO_SIZE]; /* reserved, do not use */
-	u_char f1, f2; /* f pointers */
-	u_char fill2[0x20C0 - 0x20A2]; /* reserved, do not use */
-	/* mask index with D_FREG_MASK for access */
-	struct zt za[MAX_D_FRAMES + 1];
-	u_char fill3[0x4000 - 0x2100]; /* align 16K */
-};
-
-struct bzfifo {
-	struct zt	za[MAX_B_FRAMES + 1]; /* only range 0x0..0x1F allowed */
-	u_char		f1, f2; /* f pointers */
-	u_char		fill[0x2100 - 0x2082]; /* alignment */
-};
-
-
-union fifo_area {
-	struct {
-		struct dfifo d_tx; /* D-send channel */
-		struct dfifo d_rx; /* D-receive channel */
-	} d_chan;
-	struct {
-		u_char		fill1[0x200];
-		u_char		txdat_b1[B_FIFO_SIZE];
-		struct bzfifo	txbz_b1;
-		struct bzfifo	txbz_b2;
-		u_char		txdat_b2[B_FIFO_SIZE];
-		u_char		fill2[D_FIFO_SIZE];
-		u_char		rxdat_b1[B_FIFO_SIZE];
-		struct bzfifo	rxbz_b1;
-		struct bzfifo	rxbz_b2;
-		u_char rxdat_b2[B_FIFO_SIZE];
-	} b_chans;
-	u_char fill[32768];
-};
-
-#define Write_hfc(a, b, c) (writeb(c, (a->hw.pci_io) + b))
-#define Read_hfc(a, b) (readb((a->hw.pci_io) + b))
diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
deleted file mode 100644
index b3d28976b33a..000000000000
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ /dev/null
@@ -1,5540 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * hfcmulti.c  low level driver for hfc-4s/hfc-8s/hfc-e1 based cards
- *
- * Author	Andreas Eversberg (jolly@eversberg.eu)
- * ported to mqueue mechanism:
- *		Peter Sprenger (sprengermoving-bytes.de)
- *
- * inspired by existing hfc-pci driver:
- * Copyright 1999  by Werner Cornelius (werner@isdn-development.de)
- * Copyright 2008  by Karsten Keil (kkeil@suse.de)
- * Copyright 2008  by Andreas Eversberg (jolly@eversberg.eu)
- *
- * Thanks to Cologne Chip AG for this great controller!
- */
-
-/*
- * module parameters:
- * type:
- *	By default (0), the card is automatically detected.
- *	Or use the following combinations:
- *	Bit 0-7   = 0x00001 = HFC-E1 (1 port)
- * or	Bit 0-7   = 0x00004 = HFC-4S (4 ports)
- * or	Bit 0-7   = 0x00008 = HFC-8S (8 ports)
- *	Bit 8     = 0x00100 = uLaw (instead of aLaw)
- *	Bit 9     = 0x00200 = Disable DTMF detect on all B-channels via hardware
- *	Bit 10    = spare
- *	Bit 11    = 0x00800 = Force PCM bus into slave mode. (otherwise auto)
- * or   Bit 12    = 0x01000 = Force PCM bus into master mode. (otherwise auto)
- *	Bit 13	  = spare
- *	Bit 14    = 0x04000 = Use external ram (128K)
- *	Bit 15    = 0x08000 = Use external ram (512K)
- *	Bit 16    = 0x10000 = Use 64 timeslots instead of 32
- * or	Bit 17    = 0x20000 = Use 128 timeslots instead of anything else
- *	Bit 18    = spare
- *	Bit 19    = 0x80000 = Send the Watchdog a Signal (Dual E1 with Watchdog)
- * (all other bits are reserved and shall be 0)
- *	example: 0x20204 one HFC-4S with dtmf detection and 128 timeslots on PCM
- *		 bus (PCM master)
- *
- * port: (optional or required for all ports on all installed cards)
- *	HFC-4S/HFC-8S only bits:
- *	Bit 0	  = 0x001 = Use master clock for this S/T interface
- *			    (only once per chip).
- *	Bit 1     = 0x002 = transmitter line setup (non capacitive mode)
- *			    Don't use this unless you know what you are doing!
- *	Bit 2     = 0x004 = Disable E-channel. (No E-channel processing)
- *	example: 0x0001,0x0000,0x0000,0x0000 one HFC-4S with master clock
- *		 received from port 1
- *
- *	HFC-E1 only bits:
- *	Bit 0     = 0x0001 = interface: 0=copper, 1=optical
- *	Bit 1     = 0x0002 = reserved (later for 32 B-channels transparent mode)
- *	Bit 2     = 0x0004 = Report LOS
- *	Bit 3     = 0x0008 = Report AIS
- *	Bit 4     = 0x0010 = Report SLIP
- *	Bit 5     = 0x0020 = Report RDI
- *	Bit 8     = 0x0100 = Turn off CRC-4 Multiframe Mode, use double frame
- *			     mode instead.
- *	Bit 9	  = 0x0200 = Force get clock from interface, even in NT mode.
- * or	Bit 10	  = 0x0400 = Force put clock to interface, even in TE mode.
- *	Bit 11    = 0x0800 = Use direct RX clock for PCM sync rather than PLL.
- *			     (E1 only)
- *	Bit 12-13 = 0xX000 = elastic jitter buffer (1-3), Set both bits to 0
- *			     for default.
- * (all other bits are reserved and shall be 0)
- *
- * debug:
- *	NOTE: only one debug value must be given for all cards
- *	enable debugging (see hfc_multi.h for debug options)
- *
- * poll:
- *	NOTE: only one poll value must be given for all cards
- *	Give the number of samples for each fifo process.
- *	By default 128 is used. Decrease to reduce delay, increase to
- *	reduce cpu load. If unsure, don't mess with it!
- *	Valid is 8, 16, 32, 64, 128, 256.
- *
- * pcm:
- *	NOTE: only one pcm value must be given for every card.
- *	The PCM bus id tells the mISDNdsp module about the connected PCM bus.
- *	By default (0), the PCM bus id is 100 for the card that is PCM master.
- *	If multiple cards are PCM master (because they are not interconnected),
- *	each card with PCM master will have increasing PCM id.
- *	All PCM buses with the same ID are expected to be connected and have
- *	common time slots slots.
- *	Only one chip of the PCM bus must be master, the others slave.
- *	-1 means no support of PCM bus not even.
- *	Omit this value, if all cards are interconnected or none is connected.
- *	If unsure, don't give this parameter.
- *
- * dmask and bmask:
- *	NOTE: One dmask value must be given for every HFC-E1 card.
- *	If omitted, the E1 card has D-channel on time slot 16, which is default.
- *	dmask is a 32 bit mask. The bit must be set for an alternate time slot.
- *	If multiple bits are set, multiple virtual card fragments are created.
- *	For each bit set, a bmask value must be given. Each bit on the bmask
- *	value stands for a B-channel. The bmask may not overlap with dmask or
- *	with other bmask values for that card.
- *	Example: dmask=0x00020002 bmask=0x0000fffc,0xfffc0000
- *		This will create one fragment with D-channel on slot 1 with
- *		B-channels on slots 2..15, and a second fragment with D-channel
- *		on slot 17 with B-channels on slot 18..31. Slot 16 is unused.
- *	If bit 0 is set (dmask=0x00000001) the D-channel is on slot 0 and will
- *	not function.
- *	Example: dmask=0x00000001 bmask=0xfffffffe
- *		This will create a port with all 31 usable timeslots as
- *		B-channels.
- *	If no bits are set on bmask, no B-channel is created for that fragment.
- *	Example: dmask=0xfffffffe bmask=0,0,0,0.... (31 0-values for bmask)
- *		This will create 31 ports with one D-channel only.
- *	If you don't know how to use it, you don't need it!
- *
- * iomode:
- *	NOTE: only one mode value must be given for every card.
- *	-> See hfc_multi.h for HFC_IO_MODE_* values
- *	By default, the IO mode is pci memory IO (MEMIO).
- *	Some cards require specific IO mode, so it cannot be changed.
- *	It may be useful to set IO mode to register io (REGIO) to solve
- *	PCI bridge problems.
- *	If unsure, don't give this parameter.
- *
- * clockdelay_nt:
- *	NOTE: only one clockdelay_nt value must be given once for all cards.
- *	Give the value of the clock control register (A_ST_CLK_DLY)
- *	of the S/T interfaces in NT mode.
- *	This register is needed for the TBR3 certification, so don't change it.
- *
- * clockdelay_te:
- *	NOTE: only one clockdelay_te value must be given once
- *	Give the value of the clock control register (A_ST_CLK_DLY)
- *	of the S/T interfaces in TE mode.
- *	This register is needed for the TBR3 certification, so don't change it.
- *
- * clock:
- *	NOTE: only one clock value must be given once
- *	Selects interface with clock source for mISDN and applications.
- *	Set to card number starting with 1. Set to -1 to disable.
- *	By default, the first card is used as clock source.
- *
- * hwid:
- *	NOTE: only one hwid value must be given once
- *	Enable special embedded devices with XHFC controllers.
- */
-
-/*
- * debug register access (never use this, it will flood your system log)
- * #define HFC_REGISTER_DEBUG
- */
-
-#define HFC_MULTI_VERSION	"2.03"
-
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/mISDNhw.h>
-#include <linux/mISDNdsp.h>
-
-/*
-  #define IRQCOUNT_DEBUG
-  #define IRQ_DEBUG
-*/
-
-#include "hfc_multi.h"
-#ifdef ECHOPREP
-#include "gaintab.h"
-#endif
-
-#define	MAX_CARDS	8
-#define	MAX_PORTS	(8 * MAX_CARDS)
-#define	MAX_FRAGS	(32 * MAX_CARDS)
-
-static LIST_HEAD(HFClist);
-static DEFINE_SPINLOCK(HFClock); /* global hfc list lock */
-
-static void ph_state_change(struct dchannel *);
-
-static struct hfc_multi *syncmaster;
-static int plxsd_master; /* if we have a master card (yet) */
-static DEFINE_SPINLOCK(plx_lock); /* may not acquire other lock inside */
-
-#define	TYP_E1		1
-#define	TYP_4S		4
-#define TYP_8S		8
-
-static int poll_timer = 6;	/* default = 128 samples = 16ms */
-/* number of POLL_TIMER interrupts for G2 timeout (ca 1s) */
-static int nt_t1_count[] = { 3840, 1920, 960, 480, 240, 120, 60, 30  };
-#define	CLKDEL_TE	0x0f	/* CLKDEL in TE mode */
-#define	CLKDEL_NT	0x6c	/* CLKDEL in NT mode
-				   (0x60 MUST be included!) */
-
-#define	DIP_4S	0x1		/* DIP Switches for Beronet 1S/2S/4S cards */
-#define	DIP_8S	0x2		/* DIP Switches for Beronet 8S+ cards */
-#define	DIP_E1	0x3		/* DIP Switches for Beronet E1 cards */
-
-/*
- * module stuff
- */
-
-static uint	type[MAX_CARDS];
-static int	pcm[MAX_CARDS];
-static uint	dmask[MAX_CARDS];
-static uint	bmask[MAX_FRAGS];
-static uint	iomode[MAX_CARDS];
-static uint	port[MAX_PORTS];
-static uint	debug;
-static uint	poll;
-static int	clock;
-static uint	timer;
-static uint	clockdelay_te = CLKDEL_TE;
-static uint	clockdelay_nt = CLKDEL_NT;
-#define HWID_NONE	0
-#define HWID_MINIP4	1
-#define HWID_MINIP8	2
-#define HWID_MINIP16	3
-static uint	hwid = HWID_NONE;
-
-static int	HFC_cnt, E1_cnt, bmask_cnt, Port_cnt, PCM_cnt = 99;
-
-MODULE_AUTHOR("Andreas Eversberg");
-MODULE_DESCRIPTION("mISDN driver for hfc-4s/hfc-8s/hfc-e1 based cards");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(HFC_MULTI_VERSION);
-module_param(debug, uint, S_IRUGO | S_IWUSR);
-module_param(poll, uint, S_IRUGO | S_IWUSR);
-module_param(clock, int, S_IRUGO | S_IWUSR);
-module_param(timer, uint, S_IRUGO | S_IWUSR);
-module_param(clockdelay_te, uint, S_IRUGO | S_IWUSR);
-module_param(clockdelay_nt, uint, S_IRUGO | S_IWUSR);
-module_param_array(type, uint, NULL, S_IRUGO | S_IWUSR);
-module_param_array(pcm, int, NULL, S_IRUGO | S_IWUSR);
-module_param_array(dmask, uint, NULL, S_IRUGO | S_IWUSR);
-module_param_array(bmask, uint, NULL, S_IRUGO | S_IWUSR);
-module_param_array(iomode, uint, NULL, S_IRUGO | S_IWUSR);
-module_param_array(port, uint, NULL, S_IRUGO | S_IWUSR);
-module_param(hwid, uint, S_IRUGO | S_IWUSR); /* The hardware ID */
-
-#ifdef HFC_REGISTER_DEBUG
-#define HFC_outb(hc, reg, val)					\
-	(hc->HFC_outb(hc, reg, val, __func__, __LINE__))
-#define HFC_outb_nodebug(hc, reg, val)					\
-	(hc->HFC_outb_nodebug(hc, reg, val, __func__, __LINE__))
-#define HFC_inb(hc, reg)				\
-	(hc->HFC_inb(hc, reg, __func__, __LINE__))
-#define HFC_inb_nodebug(hc, reg)				\
-	(hc->HFC_inb_nodebug(hc, reg, __func__, __LINE__))
-#define HFC_inw(hc, reg)				\
-	(hc->HFC_inw(hc, reg, __func__, __LINE__))
-#define HFC_inw_nodebug(hc, reg)				\
-	(hc->HFC_inw_nodebug(hc, reg, __func__, __LINE__))
-#define HFC_wait(hc)				\
-	(hc->HFC_wait(hc, __func__, __LINE__))
-#define HFC_wait_nodebug(hc)				\
-	(hc->HFC_wait_nodebug(hc, __func__, __LINE__))
-#else
-#define HFC_outb(hc, reg, val)		(hc->HFC_outb(hc, reg, val))
-#define HFC_outb_nodebug(hc, reg, val)	(hc->HFC_outb_nodebug(hc, reg, val))
-#define HFC_inb(hc, reg)		(hc->HFC_inb(hc, reg))
-#define HFC_inb_nodebug(hc, reg)	(hc->HFC_inb_nodebug(hc, reg))
-#define HFC_inw(hc, reg)		(hc->HFC_inw(hc, reg))
-#define HFC_inw_nodebug(hc, reg)	(hc->HFC_inw_nodebug(hc, reg))
-#define HFC_wait(hc)			(hc->HFC_wait(hc))
-#define HFC_wait_nodebug(hc)		(hc->HFC_wait_nodebug(hc))
-#endif
-
-#ifdef CONFIG_MISDN_HFCMULTI_8xx
-#include "hfc_multi_8xx.h"
-#endif
-
-/* HFC_IO_MODE_PCIMEM */
-static void
-#ifdef HFC_REGISTER_DEBUG
-HFC_outb_pcimem(struct hfc_multi *hc, u_char reg, u_char val,
-		const char *function, int line)
-#else
-	HFC_outb_pcimem(struct hfc_multi *hc, u_char reg, u_char val)
-#endif
-{
-	writeb(val, hc->pci_membase + reg);
-}
-static u_char
-#ifdef HFC_REGISTER_DEBUG
-HFC_inb_pcimem(struct hfc_multi *hc, u_char reg, const char *function, int line)
-#else
-	HFC_inb_pcimem(struct hfc_multi *hc, u_char reg)
-#endif
-{
-	return readb(hc->pci_membase + reg);
-}
-static u_short
-#ifdef HFC_REGISTER_DEBUG
-HFC_inw_pcimem(struct hfc_multi *hc, u_char reg, const char *function, int line)
-#else
-	HFC_inw_pcimem(struct hfc_multi *hc, u_char reg)
-#endif
-{
-	return readw(hc->pci_membase + reg);
-}
-static void
-#ifdef HFC_REGISTER_DEBUG
-HFC_wait_pcimem(struct hfc_multi *hc, const char *function, int line)
-#else
-	HFC_wait_pcimem(struct hfc_multi *hc)
-#endif
-{
-	while (readb(hc->pci_membase + R_STATUS) & V_BUSY)
-		cpu_relax();
-}
-
-/* HFC_IO_MODE_REGIO */
-static void
-#ifdef HFC_REGISTER_DEBUG
-HFC_outb_regio(struct hfc_multi *hc, u_char reg, u_char val,
-	       const char *function, int line)
-#else
-	HFC_outb_regio(struct hfc_multi *hc, u_char reg, u_char val)
-#endif
-{
-	outb(reg, hc->pci_iobase + 4);
-	outb(val, hc->pci_iobase);
-}
-static u_char
-#ifdef HFC_REGISTER_DEBUG
-HFC_inb_regio(struct hfc_multi *hc, u_char reg, const char *function, int line)
-#else
-	HFC_inb_regio(struct hfc_multi *hc, u_char reg)
-#endif
-{
-	outb(reg, hc->pci_iobase + 4);
-	return inb(hc->pci_iobase);
-}
-static u_short
-#ifdef HFC_REGISTER_DEBUG
-HFC_inw_regio(struct hfc_multi *hc, u_char reg, const char *function, int line)
-#else
-	HFC_inw_regio(struct hfc_multi *hc, u_char reg)
-#endif
-{
-	outb(reg, hc->pci_iobase + 4);
-	return inw(hc->pci_iobase);
-}
-static void
-#ifdef HFC_REGISTER_DEBUG
-HFC_wait_regio(struct hfc_multi *hc, const char *function, int line)
-#else
-	HFC_wait_regio(struct hfc_multi *hc)
-#endif
-{
-	outb(R_STATUS, hc->pci_iobase + 4);
-	while (inb(hc->pci_iobase) & V_BUSY)
-		cpu_relax();
-}
-
-#ifdef HFC_REGISTER_DEBUG
-static void
-HFC_outb_debug(struct hfc_multi *hc, u_char reg, u_char val,
-	       const char *function, int line)
-{
-	char regname[256] = "", bits[9] = "xxxxxxxx";
-	int i;
-
-	i = -1;
-	while (hfc_register_names[++i].name) {
-		if (hfc_register_names[i].reg == reg)
-			strcat(regname, hfc_register_names[i].name);
-	}
-	if (regname[0] == '\0')
-		strcpy(regname, "register");
-
-	bits[7] = '0' + (!!(val & 1));
-	bits[6] = '0' + (!!(val & 2));
-	bits[5] = '0' + (!!(val & 4));
-	bits[4] = '0' + (!!(val & 8));
-	bits[3] = '0' + (!!(val & 16));
-	bits[2] = '0' + (!!(val & 32));
-	bits[1] = '0' + (!!(val & 64));
-	bits[0] = '0' + (!!(val & 128));
-	printk(KERN_DEBUG
-	       "HFC_outb(chip %d, %02x=%s, 0x%02x=%s); in %s() line %d\n",
-	       hc->id, reg, regname, val, bits, function, line);
-	HFC_outb_nodebug(hc, reg, val);
-}
-static u_char
-HFC_inb_debug(struct hfc_multi *hc, u_char reg, const char *function, int line)
-{
-	char regname[256] = "", bits[9] = "xxxxxxxx";
-	u_char val = HFC_inb_nodebug(hc, reg);
-	int i;
-
-	i = 0;
-	while (hfc_register_names[i++].name)
-		;
-	while (hfc_register_names[++i].name) {
-		if (hfc_register_names[i].reg == reg)
-			strcat(regname, hfc_register_names[i].name);
-	}
-	if (regname[0] == '\0')
-		strcpy(regname, "register");
-
-	bits[7] = '0' + (!!(val & 1));
-	bits[6] = '0' + (!!(val & 2));
-	bits[5] = '0' + (!!(val & 4));
-	bits[4] = '0' + (!!(val & 8));
-	bits[3] = '0' + (!!(val & 16));
-	bits[2] = '0' + (!!(val & 32));
-	bits[1] = '0' + (!!(val & 64));
-	bits[0] = '0' + (!!(val & 128));
-	printk(KERN_DEBUG
-	       "HFC_inb(chip %d, %02x=%s) = 0x%02x=%s; in %s() line %d\n",
-	       hc->id, reg, regname, val, bits, function, line);
-	return val;
-}
-static u_short
-HFC_inw_debug(struct hfc_multi *hc, u_char reg, const char *function, int line)
-{
-	char regname[256] = "";
-	u_short val = HFC_inw_nodebug(hc, reg);
-	int i;
-
-	i = 0;
-	while (hfc_register_names[i++].name)
-		;
-	while (hfc_register_names[++i].name) {
-		if (hfc_register_names[i].reg == reg)
-			strcat(regname, hfc_register_names[i].name);
-	}
-	if (regname[0] == '\0')
-		strcpy(regname, "register");
-
-	printk(KERN_DEBUG
-	       "HFC_inw(chip %d, %02x=%s) = 0x%04x; in %s() line %d\n",
-	       hc->id, reg, regname, val, function, line);
-	return val;
-}
-static void
-HFC_wait_debug(struct hfc_multi *hc, const char *function, int line)
-{
-	printk(KERN_DEBUG "HFC_wait(chip %d); in %s() line %d\n",
-	       hc->id, function, line);
-	HFC_wait_nodebug(hc);
-}
-#endif
-
-/* write fifo data (REGIO) */
-static void
-write_fifo_regio(struct hfc_multi *hc, u_char *data, int len)
-{
-	outb(A_FIFO_DATA0, (hc->pci_iobase) + 4);
-	while (len >> 2) {
-		outl(cpu_to_le32(*(u32 *)data), hc->pci_iobase);
-		data += 4;
-		len -= 4;
-	}
-	while (len >> 1) {
-		outw(cpu_to_le16(*(u16 *)data), hc->pci_iobase);
-		data += 2;
-		len -= 2;
-	}
-	while (len) {
-		outb(*data, hc->pci_iobase);
-		data++;
-		len--;
-	}
-}
-/* write fifo data (PCIMEM) */
-static void
-write_fifo_pcimem(struct hfc_multi *hc, u_char *data, int len)
-{
-	while (len >> 2) {
-		writel(cpu_to_le32(*(u32 *)data),
-		       hc->pci_membase + A_FIFO_DATA0);
-		data += 4;
-		len -= 4;
-	}
-	while (len >> 1) {
-		writew(cpu_to_le16(*(u16 *)data),
-		       hc->pci_membase + A_FIFO_DATA0);
-		data += 2;
-		len -= 2;
-	}
-	while (len) {
-		writeb(*data, hc->pci_membase + A_FIFO_DATA0);
-		data++;
-		len--;
-	}
-}
-
-/* read fifo data (REGIO) */
-static void
-read_fifo_regio(struct hfc_multi *hc, u_char *data, int len)
-{
-	outb(A_FIFO_DATA0, (hc->pci_iobase) + 4);
-	while (len >> 2) {
-		*(u32 *)data = le32_to_cpu(inl(hc->pci_iobase));
-		data += 4;
-		len -= 4;
-	}
-	while (len >> 1) {
-		*(u16 *)data = le16_to_cpu(inw(hc->pci_iobase));
-		data += 2;
-		len -= 2;
-	}
-	while (len) {
-		*data = inb(hc->pci_iobase);
-		data++;
-		len--;
-	}
-}
-
-/* read fifo data (PCIMEM) */
-static void
-read_fifo_pcimem(struct hfc_multi *hc, u_char *data, int len)
-{
-	while (len >> 2) {
-		*(u32 *)data =
-			le32_to_cpu(readl(hc->pci_membase + A_FIFO_DATA0));
-		data += 4;
-		len -= 4;
-	}
-	while (len >> 1) {
-		*(u16 *)data =
-			le16_to_cpu(readw(hc->pci_membase + A_FIFO_DATA0));
-		data += 2;
-		len -= 2;
-	}
-	while (len) {
-		*data = readb(hc->pci_membase + A_FIFO_DATA0);
-		data++;
-		len--;
-	}
-}
-
-static void
-enable_hwirq(struct hfc_multi *hc)
-{
-	hc->hw.r_irq_ctrl |= V_GLOB_IRQ_EN;
-	HFC_outb(hc, R_IRQ_CTRL, hc->hw.r_irq_ctrl);
-}
-
-static void
-disable_hwirq(struct hfc_multi *hc)
-{
-	hc->hw.r_irq_ctrl &= ~((u_char)V_GLOB_IRQ_EN);
-	HFC_outb(hc, R_IRQ_CTRL, hc->hw.r_irq_ctrl);
-}
-
-#define	NUM_EC 2
-#define	MAX_TDM_CHAN 32
-
-
-static inline void
-enablepcibridge(struct hfc_multi *c)
-{
-	HFC_outb(c, R_BRG_PCM_CFG, (0x0 << 6) | 0x3); /* was _io before */
-}
-
-static inline void
-disablepcibridge(struct hfc_multi *c)
-{
-	HFC_outb(c, R_BRG_PCM_CFG, (0x0 << 6) | 0x2); /* was _io before */
-}
-
-static inline unsigned char
-readpcibridge(struct hfc_multi *hc, unsigned char address)
-{
-	unsigned short cipv;
-	unsigned char data;
-
-	if (!hc->pci_iobase)
-		return 0;
-
-	/* slow down a PCI read access by 1 PCI clock cycle */
-	HFC_outb(hc, R_CTRL, 0x4); /*was _io before*/
-
-	if (address == 0)
-		cipv = 0x4000;
-	else
-		cipv = 0x5800;
-
-	/* select local bridge port address by writing to CIP port */
-	/* data = HFC_inb(c, cipv); * was _io before */
-	outw(cipv, hc->pci_iobase + 4);
-	data = inb(hc->pci_iobase);
-
-	/* restore R_CTRL for normal PCI read cycle speed */
-	HFC_outb(hc, R_CTRL, 0x0); /* was _io before */
-
-	return data;
-}
-
-static inline void
-writepcibridge(struct hfc_multi *hc, unsigned char address, unsigned char data)
-{
-	unsigned short cipv;
-	unsigned int datav;
-
-	if (!hc->pci_iobase)
-		return;
-
-	if (address == 0)
-		cipv = 0x4000;
-	else
-		cipv = 0x5800;
-
-	/* select local bridge port address by writing to CIP port */
-	outw(cipv, hc->pci_iobase + 4);
-	/* define a 32 bit dword with 4 identical bytes for write sequence */
-	datav = data | ((__u32) data << 8) | ((__u32) data << 16) |
-		((__u32) data << 24);
-
-	/*
-	 * write this 32 bit dword to the bridge data port
-	 * this will initiate a write sequence of up to 4 writes to the same
-	 * address on the local bus interface the number of write accesses
-	 * is undefined but >=1 and depends on the next PCI transaction
-	 * during write sequence on the local bus
-	 */
-	outl(datav, hc->pci_iobase);
-}
-
-static inline void
-cpld_set_reg(struct hfc_multi *hc, unsigned char reg)
-{
-	/* Do data pin read low byte */
-	HFC_outb(hc, R_GPIO_OUT1, reg);
-}
-
-static inline void
-cpld_write_reg(struct hfc_multi *hc, unsigned char reg, unsigned char val)
-{
-	cpld_set_reg(hc, reg);
-
-	enablepcibridge(hc);
-	writepcibridge(hc, 1, val);
-	disablepcibridge(hc);
-
-	return;
-}
-
-static inline void
-vpm_write_address(struct hfc_multi *hc, unsigned short addr)
-{
-	cpld_write_reg(hc, 0, 0xff & addr);
-	cpld_write_reg(hc, 1, 0x01 & (addr >> 8));
-}
-
-static inline unsigned char
-vpm_in(struct hfc_multi *c, int which, unsigned short addr)
-{
-	unsigned char res;
-
-	vpm_write_address(c, addr);
-
-	if (!which)
-		cpld_set_reg(c, 2);
-	else
-		cpld_set_reg(c, 3);
-
-	enablepcibridge(c);
-	res = readpcibridge(c, 1);
-	disablepcibridge(c);
-
-	cpld_set_reg(c, 0);
-
-	return res;
-}
-
-static inline void
-vpm_out(struct hfc_multi *c, int which, unsigned short addr,
-	unsigned char data)
-{
-	vpm_write_address(c, addr);
-
-	enablepcibridge(c);
-
-	if (!which)
-		cpld_set_reg(c, 2);
-	else
-		cpld_set_reg(c, 3);
-
-	writepcibridge(c, 1, data);
-
-	cpld_set_reg(c, 0);
-
-	disablepcibridge(c);
-
-	{
-		unsigned char regin;
-		regin = vpm_in(c, which, addr);
-		if (regin != data)
-			printk(KERN_DEBUG "Wrote 0x%x to register 0x%x but got back "
-			       "0x%x\n", data, addr, regin);
-	}
-
-}
-
-
-static void
-vpm_init(struct hfc_multi *wc)
-{
-	unsigned char reg;
-	unsigned int mask;
-	unsigned int i, x, y;
-	unsigned int ver;
-
-	for (x = 0; x < NUM_EC; x++) {
-		/* Setup GPIO's */
-		if (!x) {
-			ver = vpm_in(wc, x, 0x1a0);
-			printk(KERN_DEBUG "VPM: Chip %d: ver %02x\n", x, ver);
-		}
-
-		for (y = 0; y < 4; y++) {
-			vpm_out(wc, x, 0x1a8 + y, 0x00); /* GPIO out */
-			vpm_out(wc, x, 0x1ac + y, 0x00); /* GPIO dir */
-			vpm_out(wc, x, 0x1b0 + y, 0x00); /* GPIO sel */
-		}
-
-		/* Setup TDM path - sets fsync and tdm_clk as inputs */
-		reg = vpm_in(wc, x, 0x1a3); /* misc_con */
-		vpm_out(wc, x, 0x1a3, reg & ~2);
-
-		/* Setup Echo length (256 taps) */
-		vpm_out(wc, x, 0x022, 1);
-		vpm_out(wc, x, 0x023, 0xff);
-
-		/* Setup timeslots */
-		vpm_out(wc, x, 0x02f, 0x00);
-		mask = 0x02020202 << (x * 4);
-
-		/* Setup the tdm channel masks for all chips */
-		for (i = 0; i < 4; i++)
-			vpm_out(wc, x, 0x33 - i, (mask >> (i << 3)) & 0xff);
-
-		/* Setup convergence rate */
-		printk(KERN_DEBUG "VPM: A-law mode\n");
-		reg = 0x00 | 0x10 | 0x01;
-		vpm_out(wc, x, 0x20, reg);
-		printk(KERN_DEBUG "VPM reg 0x20 is %x\n", reg);
-		/*vpm_out(wc, x, 0x20, (0x00 | 0x08 | 0x20 | 0x10)); */
-
-		vpm_out(wc, x, 0x24, 0x02);
-		reg = vpm_in(wc, x, 0x24);
-		printk(KERN_DEBUG "NLP Thresh is set to %d (0x%x)\n", reg, reg);
-
-		/* Initialize echo cans */
-		for (i = 0; i < MAX_TDM_CHAN; i++) {
-			if (mask & (0x00000001 << i))
-				vpm_out(wc, x, i, 0x00);
-		}
-
-		/*
-		 * ARM arch at least disallows a udelay of
-		 * more than 2ms... it gives a fake "__bad_udelay"
-		 * reference at link-time.
-		 * long delays in kernel code are pretty sucky anyway
-		 * for now work around it using 5 x 2ms instead of 1 x 10ms
-		 */
-
-		udelay(2000);
-		udelay(2000);
-		udelay(2000);
-		udelay(2000);
-		udelay(2000);
-
-		/* Put in bypass mode */
-		for (i = 0; i < MAX_TDM_CHAN; i++) {
-			if (mask & (0x00000001 << i))
-				vpm_out(wc, x, i, 0x01);
-		}
-
-		/* Enable bypass */
-		for (i = 0; i < MAX_TDM_CHAN; i++) {
-			if (mask & (0x00000001 << i))
-				vpm_out(wc, x, 0x78 + i, 0x01);
-		}
-
-	}
-}
-
-#ifdef UNUSED
-static void
-vpm_check(struct hfc_multi *hctmp)
-{
-	unsigned char gpi2;
-
-	gpi2 = HFC_inb(hctmp, R_GPI_IN2);
-
-	if ((gpi2 & 0x3) != 0x3)
-		printk(KERN_DEBUG "Got interrupt 0x%x from VPM!\n", gpi2);
-}
-#endif /* UNUSED */
-
-
-/*
- * Interface to enable/disable the HW Echocan
- *
- * these functions are called within a spin_lock_irqsave on
- * the channel instance lock, so we are not disturbed by irqs
- *
- * we can later easily change the interface to make  other
- * things configurable, for now we configure the taps
- *
- */
-
-static void
-vpm_echocan_on(struct hfc_multi *hc, int ch, int taps)
-{
-	unsigned int timeslot;
-	unsigned int unit;
-	struct bchannel *bch = hc->chan[ch].bch;
-#ifdef TXADJ
-	int txadj = -4;
-	struct sk_buff *skb;
-#endif
-	if (hc->chan[ch].protocol != ISDN_P_B_RAW)
-		return;
-
-	if (!bch)
-		return;
-
-#ifdef TXADJ
-	skb = _alloc_mISDN_skb(PH_CONTROL_IND, HFC_VOL_CHANGE_TX,
-			       sizeof(int), &txadj, GFP_ATOMIC);
-	if (skb)
-		recv_Bchannel_skb(bch, skb);
-#endif
-
-	timeslot = ((ch / 4) * 8) + ((ch % 4) * 4) + 1;
-	unit = ch % 4;
-
-	printk(KERN_NOTICE "vpm_echocan_on called taps [%d] on timeslot %d\n",
-	       taps, timeslot);
-
-	vpm_out(hc, unit, timeslot, 0x7e);
-}
-
-static void
-vpm_echocan_off(struct hfc_multi *hc, int ch)
-{
-	unsigned int timeslot;
-	unsigned int unit;
-	struct bchannel *bch = hc->chan[ch].bch;
-#ifdef TXADJ
-	int txadj = 0;
-	struct sk_buff *skb;
-#endif
-
-	if (hc->chan[ch].protocol != ISDN_P_B_RAW)
-		return;
-
-	if (!bch)
-		return;
-
-#ifdef TXADJ
-	skb = _alloc_mISDN_skb(PH_CONTROL_IND, HFC_VOL_CHANGE_TX,
-			       sizeof(int), &txadj, GFP_ATOMIC);
-	if (skb)
-		recv_Bchannel_skb(bch, skb);
-#endif
-
-	timeslot = ((ch / 4) * 8) + ((ch % 4) * 4) + 1;
-	unit = ch % 4;
-
-	printk(KERN_NOTICE "vpm_echocan_off called on timeslot %d\n",
-	       timeslot);
-	/* FILLME */
-	vpm_out(hc, unit, timeslot, 0x01);
-}
-
-
-/*
- * Speech Design resync feature
- * NOTE: This is called sometimes outside interrupt handler.
- * We must lock irqsave, so no other interrupt (other card) will occur!
- * Also multiple interrupts may nest, so must lock each access (lists, card)!
- */
-static inline void
-hfcmulti_resync(struct hfc_multi *locked, struct hfc_multi *newmaster, int rm)
-{
-	struct hfc_multi *hc, *next, *pcmmaster = NULL;
-	void __iomem *plx_acc_32;
-	u_int pv;
-	u_long flags;
-
-	spin_lock_irqsave(&HFClock, flags);
-	spin_lock(&plx_lock); /* must be locked inside other locks */
-
-	if (debug & DEBUG_HFCMULTI_PLXSD)
-		printk(KERN_DEBUG "%s: RESYNC(syncmaster=0x%p)\n",
-		       __func__, syncmaster);
-
-	/* select new master */
-	if (newmaster) {
-		if (debug & DEBUG_HFCMULTI_PLXSD)
-			printk(KERN_DEBUG "using provided controller\n");
-	} else {
-		list_for_each_entry_safe(hc, next, &HFClist, list) {
-			if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-				if (hc->syncronized) {
-					newmaster = hc;
-					break;
-				}
-			}
-		}
-	}
-
-	/* Disable sync of all cards */
-	list_for_each_entry_safe(hc, next, &HFClist, list) {
-		if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-			plx_acc_32 = hc->plx_membase + PLX_GPIOC;
-			pv = readl(plx_acc_32);
-			pv &= ~PLX_SYNC_O_EN;
-			writel(pv, plx_acc_32);
-			if (test_bit(HFC_CHIP_PCM_MASTER, &hc->chip)) {
-				pcmmaster = hc;
-				if (hc->ctype == HFC_TYPE_E1) {
-					if (debug & DEBUG_HFCMULTI_PLXSD)
-						printk(KERN_DEBUG
-						       "Schedule SYNC_I\n");
-					hc->e1_resync |= 1; /* get SYNC_I */
-				}
-			}
-		}
-	}
-
-	if (newmaster) {
-		hc = newmaster;
-		if (debug & DEBUG_HFCMULTI_PLXSD)
-			printk(KERN_DEBUG "id=%d (0x%p) = synchronized with "
-			       "interface.\n", hc->id, hc);
-		/* Enable new sync master */
-		plx_acc_32 = hc->plx_membase + PLX_GPIOC;
-		pv = readl(plx_acc_32);
-		pv |= PLX_SYNC_O_EN;
-		writel(pv, plx_acc_32);
-		/* switch to jatt PLL, if not disabled by RX_SYNC */
-		if (hc->ctype == HFC_TYPE_E1
-		    && !test_bit(HFC_CHIP_RX_SYNC, &hc->chip)) {
-			if (debug & DEBUG_HFCMULTI_PLXSD)
-				printk(KERN_DEBUG "Schedule jatt PLL\n");
-			hc->e1_resync |= 2; /* switch to jatt */
-		}
-	} else {
-		if (pcmmaster) {
-			hc = pcmmaster;
-			if (debug & DEBUG_HFCMULTI_PLXSD)
-				printk(KERN_DEBUG
-				       "id=%d (0x%p) = PCM master synchronized "
-				       "with QUARTZ\n", hc->id, hc);
-			if (hc->ctype == HFC_TYPE_E1) {
-				/* Use the crystal clock for the PCM
-				   master card */
-				if (debug & DEBUG_HFCMULTI_PLXSD)
-					printk(KERN_DEBUG
-					       "Schedule QUARTZ for HFC-E1\n");
-				hc->e1_resync |= 4; /* switch quartz */
-			} else {
-				if (debug & DEBUG_HFCMULTI_PLXSD)
-					printk(KERN_DEBUG
-					       "QUARTZ is automatically "
-					       "enabled by HFC-%dS\n", hc->ctype);
-			}
-			plx_acc_32 = hc->plx_membase + PLX_GPIOC;
-			pv = readl(plx_acc_32);
-			pv |= PLX_SYNC_O_EN;
-			writel(pv, plx_acc_32);
-		} else
-			if (!rm)
-				printk(KERN_ERR "%s no pcm master, this MUST "
-				       "not happen!\n", __func__);
-	}
-	syncmaster = newmaster;
-
-	spin_unlock(&plx_lock);
-	spin_unlock_irqrestore(&HFClock, flags);
-}
-
-/* This must be called AND hc must be locked irqsave!!! */
-static inline void
-plxsd_checksync(struct hfc_multi *hc, int rm)
-{
-	if (hc->syncronized) {
-		if (syncmaster == NULL) {
-			if (debug & DEBUG_HFCMULTI_PLXSD)
-				printk(KERN_DEBUG "%s: GOT sync on card %d"
-				       " (id=%d)\n", __func__, hc->id + 1,
-				       hc->id);
-			hfcmulti_resync(hc, hc, rm);
-		}
-	} else {
-		if (syncmaster == hc) {
-			if (debug & DEBUG_HFCMULTI_PLXSD)
-				printk(KERN_DEBUG "%s: LOST sync on card %d"
-				       " (id=%d)\n", __func__, hc->id + 1,
-				       hc->id);
-			hfcmulti_resync(hc, NULL, rm);
-		}
-	}
-}
-
-
-/*
- * free hardware resources used by driver
- */
-static void
-release_io_hfcmulti(struct hfc_multi *hc)
-{
-	void __iomem *plx_acc_32;
-	u_int	pv;
-	u_long	plx_flags;
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: entered\n", __func__);
-
-	/* soft reset also masks all interrupts */
-	hc->hw.r_cirm |= V_SRES;
-	HFC_outb(hc, R_CIRM, hc->hw.r_cirm);
-	udelay(1000);
-	hc->hw.r_cirm &= ~V_SRES;
-	HFC_outb(hc, R_CIRM, hc->hw.r_cirm);
-	udelay(1000); /* instead of 'wait' that may cause locking */
-
-	/* release Speech Design card, if PLX was initialized */
-	if (test_bit(HFC_CHIP_PLXSD, &hc->chip) && hc->plx_membase) {
-		if (debug & DEBUG_HFCMULTI_PLXSD)
-			printk(KERN_DEBUG "%s: release PLXSD card %d\n",
-			       __func__, hc->id + 1);
-		spin_lock_irqsave(&plx_lock, plx_flags);
-		plx_acc_32 = hc->plx_membase + PLX_GPIOC;
-		writel(PLX_GPIOC_INIT, plx_acc_32);
-		pv = readl(plx_acc_32);
-		/* Termination off */
-		pv &= ~PLX_TERM_ON;
-		/* Disconnect the PCM */
-		pv |= PLX_SLAVE_EN_N;
-		pv &= ~PLX_MASTER_EN;
-		pv &= ~PLX_SYNC_O_EN;
-		/* Put the DSP in Reset */
-		pv &= ~PLX_DSP_RES_N;
-		writel(pv, plx_acc_32);
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: PCM off: PLX_GPIO=%x\n",
-			       __func__, pv);
-		spin_unlock_irqrestore(&plx_lock, plx_flags);
-	}
-
-	/* disable memory mapped ports / io ports */
-	test_and_clear_bit(HFC_CHIP_PLXSD, &hc->chip); /* prevent resync */
-	if (hc->pci_dev)
-		pci_write_config_word(hc->pci_dev, PCI_COMMAND, 0);
-	if (hc->pci_membase)
-		iounmap(hc->pci_membase);
-	if (hc->plx_membase)
-		iounmap(hc->plx_membase);
-	if (hc->pci_iobase)
-		release_region(hc->pci_iobase, 8);
-	if (hc->xhfc_membase)
-		iounmap((void *)hc->xhfc_membase);
-
-	if (hc->pci_dev) {
-		pci_disable_device(hc->pci_dev);
-		pci_set_drvdata(hc->pci_dev, NULL);
-	}
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: done\n", __func__);
-}
-
-/*
- * function called to reset the HFC chip. A complete software reset of chip
- * and fifos is done. All configuration of the chip is done.
- */
-
-static int
-init_chip(struct hfc_multi *hc)
-{
-	u_long			flags, val, val2 = 0, rev;
-	int			i, err = 0;
-	u_char			r_conf_en, rval;
-	void __iomem		*plx_acc_32;
-	u_int			pv;
-	u_long			plx_flags, hfc_flags;
-	int			plx_count;
-	struct hfc_multi	*pos, *next, *plx_last_hc;
-
-	spin_lock_irqsave(&hc->lock, flags);
-	/* reset all registers */
-	memset(&hc->hw, 0, sizeof(struct hfcm_hw));
-
-	/* revision check */
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: entered\n", __func__);
-	val = HFC_inb(hc, R_CHIP_ID);
-	if ((val >> 4) != 0x8 && (val >> 4) != 0xc && (val >> 4) != 0xe &&
-	    (val >> 1) != 0x31) {
-		printk(KERN_INFO "HFC_multi: unknown CHIP_ID:%x\n", (u_int)val);
-		err = -EIO;
-		goto out;
-	}
-	rev = HFC_inb(hc, R_CHIP_RV);
-	printk(KERN_INFO
-	       "HFC_multi: detected HFC with chip ID=0x%lx revision=%ld%s\n",
-	       val, rev, (rev == 0 && (hc->ctype != HFC_TYPE_XHFC)) ?
-	       " (old FIFO handling)" : "");
-	if (hc->ctype != HFC_TYPE_XHFC && rev == 0) {
-		test_and_set_bit(HFC_CHIP_REVISION0, &hc->chip);
-		printk(KERN_WARNING
-		       "HFC_multi: NOTE: Your chip is revision 0, "
-		       "ask Cologne Chip for update. Newer chips "
-		       "have a better FIFO handling. Old chips "
-		       "still work but may have slightly lower "
-		       "HDLC transmit performance.\n");
-	}
-	if (rev > 1) {
-		printk(KERN_WARNING "HFC_multi: WARNING: This driver doesn't "
-		       "consider chip revision = %ld. The chip / "
-		       "bridge may not work.\n", rev);
-	}
-
-	/* set s-ram size */
-	hc->Flen = 0x10;
-	hc->Zmin = 0x80;
-	hc->Zlen = 384;
-	hc->DTMFbase = 0x1000;
-	if (test_bit(HFC_CHIP_EXRAM_128, &hc->chip)) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: changing to 128K external RAM\n",
-			       __func__);
-		hc->hw.r_ctrl |= V_EXT_RAM;
-		hc->hw.r_ram_sz = 1;
-		hc->Flen = 0x20;
-		hc->Zmin = 0xc0;
-		hc->Zlen = 1856;
-		hc->DTMFbase = 0x2000;
-	}
-	if (test_bit(HFC_CHIP_EXRAM_512, &hc->chip)) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: changing to 512K external RAM\n",
-			       __func__);
-		hc->hw.r_ctrl |= V_EXT_RAM;
-		hc->hw.r_ram_sz = 2;
-		hc->Flen = 0x20;
-		hc->Zmin = 0xc0;
-		hc->Zlen = 8000;
-		hc->DTMFbase = 0x2000;
-	}
-	if (hc->ctype == HFC_TYPE_XHFC) {
-		hc->Flen = 0x8;
-		hc->Zmin = 0x0;
-		hc->Zlen = 64;
-		hc->DTMFbase = 0x0;
-	}
-	hc->max_trans = poll << 1;
-	if (hc->max_trans > hc->Zlen)
-		hc->max_trans = hc->Zlen;
-
-	/* Speech Design PLX bridge */
-	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-		if (debug & DEBUG_HFCMULTI_PLXSD)
-			printk(KERN_DEBUG "%s: initializing PLXSD card %d\n",
-			       __func__, hc->id + 1);
-		spin_lock_irqsave(&plx_lock, plx_flags);
-		plx_acc_32 = hc->plx_membase + PLX_GPIOC;
-		writel(PLX_GPIOC_INIT, plx_acc_32);
-		pv = readl(plx_acc_32);
-		/* The first and the last cards are terminating the PCM bus */
-		pv |= PLX_TERM_ON; /* hc is currently the last */
-		/* Disconnect the PCM */
-		pv |= PLX_SLAVE_EN_N;
-		pv &= ~PLX_MASTER_EN;
-		pv &= ~PLX_SYNC_O_EN;
-		/* Put the DSP in Reset */
-		pv &= ~PLX_DSP_RES_N;
-		writel(pv, plx_acc_32);
-		spin_unlock_irqrestore(&plx_lock, plx_flags);
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: slave/term: PLX_GPIO=%x\n",
-			       __func__, pv);
-		/*
-		 * If we are the 3rd PLXSD card or higher, we must turn
-		 * termination of last PLXSD card off.
-		 */
-		spin_lock_irqsave(&HFClock, hfc_flags);
-		plx_count = 0;
-		plx_last_hc = NULL;
-		list_for_each_entry_safe(pos, next, &HFClist, list) {
-			if (test_bit(HFC_CHIP_PLXSD, &pos->chip)) {
-				plx_count++;
-				if (pos != hc)
-					plx_last_hc = pos;
-			}
-		}
-		if (plx_count >= 3) {
-			if (debug & DEBUG_HFCMULTI_PLXSD)
-				printk(KERN_DEBUG "%s: card %d is between, so "
-				       "we disable termination\n",
-				       __func__, plx_last_hc->id + 1);
-			spin_lock_irqsave(&plx_lock, plx_flags);
-			plx_acc_32 = plx_last_hc->plx_membase + PLX_GPIOC;
-			pv = readl(plx_acc_32);
-			pv &= ~PLX_TERM_ON;
-			writel(pv, plx_acc_32);
-			spin_unlock_irqrestore(&plx_lock, plx_flags);
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG
-				       "%s: term off: PLX_GPIO=%x\n",
-				       __func__, pv);
-		}
-		spin_unlock_irqrestore(&HFClock, hfc_flags);
-		hc->hw.r_pcm_md0 = V_F0_LEN; /* shift clock for DSP */
-	}
-
-	if (test_bit(HFC_CHIP_EMBSD, &hc->chip))
-		hc->hw.r_pcm_md0 = V_F0_LEN; /* shift clock for DSP */
-
-	/* we only want the real Z2 read-pointer for revision > 0 */
-	if (!test_bit(HFC_CHIP_REVISION0, &hc->chip))
-		hc->hw.r_ram_sz |= V_FZ_MD;
-
-	/* select pcm mode */
-	if (test_bit(HFC_CHIP_PCM_SLAVE, &hc->chip)) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: setting PCM into slave mode\n",
-			       __func__);
-	} else
-		if (test_bit(HFC_CHIP_PCM_MASTER, &hc->chip) && !plxsd_master) {
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG "%s: setting PCM into master mode\n",
-				       __func__);
-			hc->hw.r_pcm_md0 |= V_PCM_MD;
-		} else {
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG "%s: performing PCM auto detect\n",
-				       __func__);
-		}
-
-	/* soft reset */
-	HFC_outb(hc, R_CTRL, hc->hw.r_ctrl);
-	if (hc->ctype == HFC_TYPE_XHFC)
-		HFC_outb(hc, 0x0C /* R_FIFO_THRES */,
-			 0x11 /* 16 Bytes TX/RX */);
-	else
-		HFC_outb(hc, R_RAM_SZ, hc->hw.r_ram_sz);
-	HFC_outb(hc, R_FIFO_MD, 0);
-	if (hc->ctype == HFC_TYPE_XHFC)
-		hc->hw.r_cirm = V_SRES | V_HFCRES | V_PCMRES | V_STRES;
-	else
-		hc->hw.r_cirm = V_SRES | V_HFCRES | V_PCMRES | V_STRES
-			| V_RLD_EPR;
-	HFC_outb(hc, R_CIRM, hc->hw.r_cirm);
-	udelay(100);
-	hc->hw.r_cirm = 0;
-	HFC_outb(hc, R_CIRM, hc->hw.r_cirm);
-	udelay(100);
-	if (hc->ctype != HFC_TYPE_XHFC)
-		HFC_outb(hc, R_RAM_SZ, hc->hw.r_ram_sz);
-
-	/* Speech Design PLX bridge pcm and sync mode */
-	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-		spin_lock_irqsave(&plx_lock, plx_flags);
-		plx_acc_32 = hc->plx_membase + PLX_GPIOC;
-		pv = readl(plx_acc_32);
-		/* Connect PCM */
-		if (hc->hw.r_pcm_md0 & V_PCM_MD) {
-			pv |= PLX_MASTER_EN | PLX_SLAVE_EN_N;
-			pv |= PLX_SYNC_O_EN;
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG "%s: master: PLX_GPIO=%x\n",
-				       __func__, pv);
-		} else {
-			pv &= ~(PLX_MASTER_EN | PLX_SLAVE_EN_N);
-			pv &= ~PLX_SYNC_O_EN;
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG "%s: slave: PLX_GPIO=%x\n",
-				       __func__, pv);
-		}
-		writel(pv, plx_acc_32);
-		spin_unlock_irqrestore(&plx_lock, plx_flags);
-	}
-
-	/* PCM setup */
-	HFC_outb(hc, R_PCM_MD0, hc->hw.r_pcm_md0 | 0x90);
-	if (hc->slots == 32)
-		HFC_outb(hc, R_PCM_MD1, 0x00);
-	if (hc->slots == 64)
-		HFC_outb(hc, R_PCM_MD1, 0x10);
-	if (hc->slots == 128)
-		HFC_outb(hc, R_PCM_MD1, 0x20);
-	HFC_outb(hc, R_PCM_MD0, hc->hw.r_pcm_md0 | 0xa0);
-	if (test_bit(HFC_CHIP_PLXSD, &hc->chip))
-		HFC_outb(hc, R_PCM_MD2, V_SYNC_SRC); /* sync via SYNC_I / O */
-	else if (test_bit(HFC_CHIP_EMBSD, &hc->chip))
-		HFC_outb(hc, R_PCM_MD2, 0x10); /* V_C2O_EN */
-	else
-		HFC_outb(hc, R_PCM_MD2, 0x00); /* sync from interface */
-	HFC_outb(hc, R_PCM_MD0, hc->hw.r_pcm_md0 | 0x00);
-	for (i = 0; i < 256; i++) {
-		HFC_outb_nodebug(hc, R_SLOT, i);
-		HFC_outb_nodebug(hc, A_SL_CFG, 0);
-		if (hc->ctype != HFC_TYPE_XHFC)
-			HFC_outb_nodebug(hc, A_CONF, 0);
-		hc->slot_owner[i] = -1;
-	}
-
-	/* set clock speed */
-	if (test_bit(HFC_CHIP_CLOCK2, &hc->chip)) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG
-			       "%s: setting double clock\n", __func__);
-		HFC_outb(hc, R_BRG_PCM_CFG, V_PCM_CLK);
-	}
-
-	if (test_bit(HFC_CHIP_EMBSD, &hc->chip))
-		HFC_outb(hc, 0x02 /* R_CLK_CFG */, 0x40 /* V_CLKO_OFF */);
-
-	/* B410P GPIO */
-	if (test_bit(HFC_CHIP_B410P, &hc->chip)) {
-		printk(KERN_NOTICE "Setting GPIOs\n");
-		HFC_outb(hc, R_GPIO_SEL, 0x30);
-		HFC_outb(hc, R_GPIO_EN1, 0x3);
-		udelay(1000);
-		printk(KERN_NOTICE "calling vpm_init\n");
-		vpm_init(hc);
-	}
-
-	/* check if R_F0_CNT counts (8 kHz frame count) */
-	val = HFC_inb(hc, R_F0_CNTL);
-	val += HFC_inb(hc, R_F0_CNTH) << 8;
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG
-		       "HFC_multi F0_CNT %ld after reset\n", val);
-	spin_unlock_irqrestore(&hc->lock, flags);
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	schedule_timeout((HZ / 100) ? : 1); /* Timeout minimum 10ms */
-	spin_lock_irqsave(&hc->lock, flags);
-	val2 = HFC_inb(hc, R_F0_CNTL);
-	val2 += HFC_inb(hc, R_F0_CNTH) << 8;
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG
-		       "HFC_multi F0_CNT %ld after 10 ms (1st try)\n",
-		       val2);
-	if (val2 >= val + 8) { /* 1 ms */
-		/* it counts, so we keep the pcm mode */
-		if (test_bit(HFC_CHIP_PCM_MASTER, &hc->chip))
-			printk(KERN_INFO "controller is PCM bus MASTER\n");
-		else
-			if (test_bit(HFC_CHIP_PCM_SLAVE, &hc->chip))
-				printk(KERN_INFO "controller is PCM bus SLAVE\n");
-			else {
-				test_and_set_bit(HFC_CHIP_PCM_SLAVE, &hc->chip);
-				printk(KERN_INFO "controller is PCM bus SLAVE "
-				       "(auto detected)\n");
-			}
-	} else {
-		/* does not count */
-		if (test_bit(HFC_CHIP_PCM_MASTER, &hc->chip)) {
-		controller_fail:
-			printk(KERN_ERR "HFC_multi ERROR, getting no 125us "
-			       "pulse. Seems that controller fails.\n");
-			err = -EIO;
-			goto out;
-		}
-		if (test_bit(HFC_CHIP_PCM_SLAVE, &hc->chip)) {
-			printk(KERN_INFO "controller is PCM bus SLAVE "
-			       "(ignoring missing PCM clock)\n");
-		} else {
-			/* only one pcm master */
-			if (test_bit(HFC_CHIP_PLXSD, &hc->chip)
-			    && plxsd_master) {
-				printk(KERN_ERR "HFC_multi ERROR, no clock "
-				       "on another Speech Design card found. "
-				       "Please be sure to connect PCM cable.\n");
-				err = -EIO;
-				goto out;
-			}
-			/* retry with master clock */
-			if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-				spin_lock_irqsave(&plx_lock, plx_flags);
-				plx_acc_32 = hc->plx_membase + PLX_GPIOC;
-				pv = readl(plx_acc_32);
-				pv |= PLX_MASTER_EN | PLX_SLAVE_EN_N;
-				pv |= PLX_SYNC_O_EN;
-				writel(pv, plx_acc_32);
-				spin_unlock_irqrestore(&plx_lock, plx_flags);
-				if (debug & DEBUG_HFCMULTI_INIT)
-					printk(KERN_DEBUG "%s: master: "
-					       "PLX_GPIO=%x\n", __func__, pv);
-			}
-			hc->hw.r_pcm_md0 |= V_PCM_MD;
-			HFC_outb(hc, R_PCM_MD0, hc->hw.r_pcm_md0 | 0x00);
-			spin_unlock_irqrestore(&hc->lock, flags);
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_timeout((HZ / 100) ?: 1); /* Timeout min. 10ms */
-			spin_lock_irqsave(&hc->lock, flags);
-			val2 = HFC_inb(hc, R_F0_CNTL);
-			val2 += HFC_inb(hc, R_F0_CNTH) << 8;
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG "HFC_multi F0_CNT %ld after "
-				       "10 ms (2nd try)\n", val2);
-			if (val2 >= val + 8) { /* 1 ms */
-				test_and_set_bit(HFC_CHIP_PCM_MASTER,
-						 &hc->chip);
-				printk(KERN_INFO "controller is PCM bus MASTER "
-				       "(auto detected)\n");
-			} else
-				goto controller_fail;
-		}
-	}
-
-	/* Release the DSP Reset */
-	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-		if (test_bit(HFC_CHIP_PCM_MASTER, &hc->chip))
-			plxsd_master = 1;
-		spin_lock_irqsave(&plx_lock, plx_flags);
-		plx_acc_32 = hc->plx_membase + PLX_GPIOC;
-		pv = readl(plx_acc_32);
-		pv |=  PLX_DSP_RES_N;
-		writel(pv, plx_acc_32);
-		spin_unlock_irqrestore(&plx_lock, plx_flags);
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: reset off: PLX_GPIO=%x\n",
-			       __func__, pv);
-	}
-
-	/* pcm id */
-	if (hc->pcm)
-		printk(KERN_INFO "controller has given PCM BUS ID %d\n",
-		       hc->pcm);
-	else {
-		if (test_bit(HFC_CHIP_PCM_MASTER, &hc->chip)
-		    || test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-			PCM_cnt++; /* SD has proprietary bridging */
-		}
-		hc->pcm = PCM_cnt;
-		printk(KERN_INFO "controller has PCM BUS ID %d "
-		       "(auto selected)\n", hc->pcm);
-	}
-
-	/* set up timer */
-	HFC_outb(hc, R_TI_WD, poll_timer);
-	hc->hw.r_irqmsk_misc |= V_TI_IRQMSK;
-
-	/* set E1 state machine IRQ */
-	if (hc->ctype == HFC_TYPE_E1)
-		hc->hw.r_irqmsk_misc |= V_STA_IRQMSK;
-
-	/* set DTMF detection */
-	if (test_bit(HFC_CHIP_DTMF, &hc->chip)) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: enabling DTMF detection "
-			       "for all B-channel\n", __func__);
-		hc->hw.r_dtmf = V_DTMF_EN | V_DTMF_STOP;
-		if (test_bit(HFC_CHIP_ULAW, &hc->chip))
-			hc->hw.r_dtmf |= V_ULAW_SEL;
-		HFC_outb(hc, R_DTMF_N, 102 - 1);
-		hc->hw.r_irqmsk_misc |= V_DTMF_IRQMSK;
-	}
-
-	/* conference engine */
-	if (test_bit(HFC_CHIP_ULAW, &hc->chip))
-		r_conf_en = V_CONF_EN | V_ULAW;
-	else
-		r_conf_en = V_CONF_EN;
-	if (hc->ctype != HFC_TYPE_XHFC)
-		HFC_outb(hc, R_CONF_EN, r_conf_en);
-
-	/* setting leds */
-	switch (hc->leds) {
-	case 1: /* HFC-E1 OEM */
-		if (test_bit(HFC_CHIP_WATCHDOG, &hc->chip))
-			HFC_outb(hc, R_GPIO_SEL, 0x32);
-		else
-			HFC_outb(hc, R_GPIO_SEL, 0x30);
-
-		HFC_outb(hc, R_GPIO_EN1, 0x0f);
-		HFC_outb(hc, R_GPIO_OUT1, 0x00);
-
-		HFC_outb(hc, R_GPIO_EN0, V_GPIO_EN2 | V_GPIO_EN3);
-		break;
-
-	case 2: /* HFC-4S OEM */
-	case 3:
-		HFC_outb(hc, R_GPIO_SEL, 0xf0);
-		HFC_outb(hc, R_GPIO_EN1, 0xff);
-		HFC_outb(hc, R_GPIO_OUT1, 0x00);
-		break;
-	}
-
-	if (test_bit(HFC_CHIP_EMBSD, &hc->chip)) {
-		hc->hw.r_st_sync = 0x10; /* V_AUTO_SYNCI */
-		HFC_outb(hc, R_ST_SYNC, hc->hw.r_st_sync);
-	}
-
-	/* set master clock */
-	if (hc->masterclk >= 0) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: setting ST master clock "
-			       "to port %d (0..%d)\n",
-			       __func__, hc->masterclk, hc->ports - 1);
-		hc->hw.r_st_sync |= (hc->masterclk | V_AUTO_SYNC);
-		HFC_outb(hc, R_ST_SYNC, hc->hw.r_st_sync);
-	}
-
-
-
-	/* setting misc irq */
-	HFC_outb(hc, R_IRQMSK_MISC, hc->hw.r_irqmsk_misc);
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "r_irqmsk_misc.2: 0x%x\n",
-		       hc->hw.r_irqmsk_misc);
-
-	/* RAM access test */
-	HFC_outb(hc, R_RAM_ADDR0, 0);
-	HFC_outb(hc, R_RAM_ADDR1, 0);
-	HFC_outb(hc, R_RAM_ADDR2, 0);
-	for (i = 0; i < 256; i++) {
-		HFC_outb_nodebug(hc, R_RAM_ADDR0, i);
-		HFC_outb_nodebug(hc, R_RAM_DATA, ((i * 3) & 0xff));
-	}
-	for (i = 0; i < 256; i++) {
-		HFC_outb_nodebug(hc, R_RAM_ADDR0, i);
-		HFC_inb_nodebug(hc, R_RAM_DATA);
-		rval = HFC_inb_nodebug(hc, R_INT_DATA);
-		if (rval != ((i * 3) & 0xff)) {
-			printk(KERN_DEBUG
-			       "addr:%x val:%x should:%x\n", i, rval,
-			       (i * 3) & 0xff);
-			err++;
-		}
-	}
-	if (err) {
-		printk(KERN_DEBUG "aborting - %d RAM access errors\n", err);
-		err = -EIO;
-		goto out;
-	}
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: done\n", __func__);
-out:
-	spin_unlock_irqrestore(&hc->lock, flags);
-	return err;
-}
-
-
-/*
- * control the watchdog
- */
-static void
-hfcmulti_watchdog(struct hfc_multi *hc)
-{
-	hc->wdcount++;
-
-	if (hc->wdcount > 10) {
-		hc->wdcount = 0;
-		hc->wdbyte = hc->wdbyte == V_GPIO_OUT2 ?
-			V_GPIO_OUT3 : V_GPIO_OUT2;
-
-		/* printk("Sending Watchdog Kill %x\n",hc->wdbyte); */
-		HFC_outb(hc, R_GPIO_EN0, V_GPIO_EN2 | V_GPIO_EN3);
-		HFC_outb(hc, R_GPIO_OUT0, hc->wdbyte);
-	}
-}
-
-
-
-/*
- * output leds
- */
-static void
-hfcmulti_leds(struct hfc_multi *hc)
-{
-	unsigned long lled;
-	unsigned long leddw;
-	int i, state, active, leds;
-	struct dchannel *dch;
-	int led[4];
-
-	switch (hc->leds) {
-	case 1: /* HFC-E1 OEM */
-		/* 2 red steady:       LOS
-		 * 1 red steady:       L1 not active
-		 * 2 green steady:     L1 active
-		 * 1st green flashing: activity on TX
-		 * 2nd green flashing: activity on RX
-		 */
-		led[0] = 0;
-		led[1] = 0;
-		led[2] = 0;
-		led[3] = 0;
-		dch = hc->chan[hc->dnum[0]].dch;
-		if (dch) {
-			if (hc->chan[hc->dnum[0]].los)
-				led[1] = 1;
-			if (hc->e1_state != 1) {
-				led[0] = 1;
-				hc->flash[2] = 0;
-				hc->flash[3] = 0;
-			} else {
-				led[2] = 1;
-				led[3] = 1;
-				if (!hc->flash[2] && hc->activity_tx)
-					hc->flash[2] = poll;
-				if (!hc->flash[3] && hc->activity_rx)
-					hc->flash[3] = poll;
-				if (hc->flash[2] && hc->flash[2] < 1024)
-					led[2] = 0;
-				if (hc->flash[3] && hc->flash[3] < 1024)
-					led[3] = 0;
-				if (hc->flash[2] >= 2048)
-					hc->flash[2] = 0;
-				if (hc->flash[3] >= 2048)
-					hc->flash[3] = 0;
-				if (hc->flash[2])
-					hc->flash[2] += poll;
-				if (hc->flash[3])
-					hc->flash[3] += poll;
-			}
-		}
-		leds = (led[0] | (led[1]<<2) | (led[2]<<1) | (led[3]<<3))^0xF;
-		/* leds are inverted */
-		if (leds != (int)hc->ledstate) {
-			HFC_outb_nodebug(hc, R_GPIO_OUT1, leds);
-			hc->ledstate = leds;
-		}
-		break;
-
-	case 2: /* HFC-4S OEM */
-		/* red steady:     PH_DEACTIVATE
-		 * green steady:   PH_ACTIVATE
-		 * green flashing: activity on TX
-		 */
-		for (i = 0; i < 4; i++) {
-			state = 0;
-			active = -1;
-			dch = hc->chan[(i << 2) | 2].dch;
-			if (dch) {
-				state = dch->state;
-				if (dch->dev.D.protocol == ISDN_P_NT_S0)
-					active = 3;
-				else
-					active = 7;
-			}
-			if (state) {
-				if (state == active) {
-					led[i] = 1; /* led green */
-					hc->activity_tx |= hc->activity_rx;
-					if (!hc->flash[i] &&
-						(hc->activity_tx & (1 << i)))
-							hc->flash[i] = poll;
-					if (hc->flash[i] && hc->flash[i] < 1024)
-						led[i] = 0; /* led off */
-					if (hc->flash[i] >= 2048)
-						hc->flash[i] = 0;
-					if (hc->flash[i])
-						hc->flash[i] += poll;
-				} else {
-					led[i] = 2; /* led red */
-					hc->flash[i] = 0;
-				}
-			} else
-				led[i] = 0; /* led off */
-		}
-		if (test_bit(HFC_CHIP_B410P, &hc->chip)) {
-			leds = 0;
-			for (i = 0; i < 4; i++) {
-				if (led[i] == 1) {
-					/*green*/
-					leds |= (0x2 << (i * 2));
-				} else if (led[i] == 2) {
-					/*red*/
-					leds |= (0x1 << (i * 2));
-				}
-			}
-			if (leds != (int)hc->ledstate) {
-				vpm_out(hc, 0, 0x1a8 + 3, leds);
-				hc->ledstate = leds;
-			}
-		} else {
-			leds = ((led[3] > 0) << 0) | ((led[1] > 0) << 1) |
-				((led[0] > 0) << 2) | ((led[2] > 0) << 3) |
-				((led[3] & 1) << 4) | ((led[1] & 1) << 5) |
-				((led[0] & 1) << 6) | ((led[2] & 1) << 7);
-			if (leds != (int)hc->ledstate) {
-				HFC_outb_nodebug(hc, R_GPIO_EN1, leds & 0x0F);
-				HFC_outb_nodebug(hc, R_GPIO_OUT1, leds >> 4);
-				hc->ledstate = leds;
-			}
-		}
-		break;
-
-	case 3: /* HFC 1S/2S Beronet */
-		/* red steady:     PH_DEACTIVATE
-		 * green steady:   PH_ACTIVATE
-		 * green flashing: activity on TX
-		 */
-		for (i = 0; i < 2; i++) {
-			state = 0;
-			active = -1;
-			dch = hc->chan[(i << 2) | 2].dch;
-			if (dch) {
-				state = dch->state;
-				if (dch->dev.D.protocol == ISDN_P_NT_S0)
-					active = 3;
-				else
-					active = 7;
-			}
-			if (state) {
-				if (state == active) {
-					led[i] = 1; /* led green */
-					hc->activity_tx |= hc->activity_rx;
-					if (!hc->flash[i] &&
-						(hc->activity_tx & (1 << i)))
-							hc->flash[i] = poll;
-					if (hc->flash[i] < 1024)
-						led[i] = 0; /* led off */
-					if (hc->flash[i] >= 2048)
-						hc->flash[i] = 0;
-					if (hc->flash[i])
-						hc->flash[i] += poll;
-				} else {
-					led[i] = 2; /* led red */
-					hc->flash[i] = 0;
-				}
-			} else
-				led[i] = 0; /* led off */
-		}
-		leds = (led[0] > 0) | ((led[1] > 0) << 1) | ((led[0]&1) << 2)
-			| ((led[1]&1) << 3);
-		if (leds != (int)hc->ledstate) {
-			HFC_outb_nodebug(hc, R_GPIO_EN1,
-					 ((led[0] > 0) << 2) | ((led[1] > 0) << 3));
-			HFC_outb_nodebug(hc, R_GPIO_OUT1,
-					 ((led[0] & 1) << 2) | ((led[1] & 1) << 3));
-			hc->ledstate = leds;
-		}
-		break;
-	case 8: /* HFC 8S+ Beronet */
-		/* off:      PH_DEACTIVATE
-		 * steady:   PH_ACTIVATE
-		 * flashing: activity on TX
-		 */
-		lled = 0xff; /* leds off */
-		for (i = 0; i < 8; i++) {
-			state = 0;
-			active = -1;
-			dch = hc->chan[(i << 2) | 2].dch;
-			if (dch) {
-				state = dch->state;
-				if (dch->dev.D.protocol == ISDN_P_NT_S0)
-					active = 3;
-				else
-					active = 7;
-			}
-			if (state) {
-				if (state == active) {
-					lled &= ~(1 << i); /* led on */
-					hc->activity_tx |= hc->activity_rx;
-					if (!hc->flash[i] &&
-						(hc->activity_tx & (1 << i)))
-							hc->flash[i] = poll;
-					if (hc->flash[i] < 1024)
-						lled |= 1 << i; /* led off */
-					if (hc->flash[i] >= 2048)
-						hc->flash[i] = 0;
-					if (hc->flash[i])
-						hc->flash[i] += poll;
-				} else
-					hc->flash[i] = 0;
-			}
-		}
-		leddw = lled << 24 | lled << 16 | lled << 8 | lled;
-		if (leddw != hc->ledstate) {
-			/* HFC_outb(hc, R_BRG_PCM_CFG, 1);
-			   HFC_outb(c, R_BRG_PCM_CFG, (0x0 << 6) | 0x3); */
-			/* was _io before */
-			HFC_outb_nodebug(hc, R_BRG_PCM_CFG, 1 | V_PCM_CLK);
-			outw(0x4000, hc->pci_iobase + 4);
-			outl(leddw, hc->pci_iobase);
-			HFC_outb_nodebug(hc, R_BRG_PCM_CFG, V_PCM_CLK);
-			hc->ledstate = leddw;
-		}
-		break;
-	}
-	hc->activity_tx = 0;
-	hc->activity_rx = 0;
-}
-/*
- * read dtmf coefficients
- */
-
-static void
-hfcmulti_dtmf(struct hfc_multi *hc)
-{
-	s32		*coeff;
-	u_int		mantissa;
-	int		co, ch;
-	struct bchannel	*bch = NULL;
-	u8		exponent;
-	int		dtmf = 0;
-	int		addr;
-	u16		w_float;
-	struct sk_buff	*skb;
-	struct mISDNhead *hh;
-
-	if (debug & DEBUG_HFCMULTI_DTMF)
-		printk(KERN_DEBUG "%s: dtmf detection irq\n", __func__);
-	for (ch = 0; ch <= 31; ch++) {
-		/* only process enabled B-channels */
-		bch = hc->chan[ch].bch;
-		if (!bch)
-			continue;
-		if (!hc->created[hc->chan[ch].port])
-			continue;
-		if (!test_bit(FLG_TRANSPARENT, &bch->Flags))
-			continue;
-		if (debug & DEBUG_HFCMULTI_DTMF)
-			printk(KERN_DEBUG "%s: dtmf channel %d:",
-			       __func__, ch);
-		coeff = &(hc->chan[ch].coeff[hc->chan[ch].coeff_count * 16]);
-		dtmf = 1;
-		for (co = 0; co < 8; co++) {
-			/* read W(n-1) coefficient */
-			addr = hc->DTMFbase + ((co << 7) | (ch << 2));
-			HFC_outb_nodebug(hc, R_RAM_ADDR0, addr);
-			HFC_outb_nodebug(hc, R_RAM_ADDR1, addr >> 8);
-			HFC_outb_nodebug(hc, R_RAM_ADDR2, (addr >> 16)
-					 | V_ADDR_INC);
-			w_float = HFC_inb_nodebug(hc, R_RAM_DATA);
-			w_float |= (HFC_inb_nodebug(hc, R_RAM_DATA) << 8);
-			if (debug & DEBUG_HFCMULTI_DTMF)
-				printk(" %04x", w_float);
-
-			/* decode float (see chip doc) */
-			mantissa = w_float & 0x0fff;
-			if (w_float & 0x8000)
-				mantissa |= 0xfffff000;
-			exponent = (w_float >> 12) & 0x7;
-			if (exponent) {
-				mantissa ^= 0x1000;
-				mantissa <<= (exponent - 1);
-			}
-
-			/* store coefficient */
-			coeff[co << 1] = mantissa;
-
-			/* read W(n) coefficient */
-			w_float = HFC_inb_nodebug(hc, R_RAM_DATA);
-			w_float |= (HFC_inb_nodebug(hc, R_RAM_DATA) << 8);
-			if (debug & DEBUG_HFCMULTI_DTMF)
-				printk(" %04x", w_float);
-
-			/* decode float (see chip doc) */
-			mantissa = w_float & 0x0fff;
-			if (w_float & 0x8000)
-				mantissa |= 0xfffff000;
-			exponent = (w_float >> 12) & 0x7;
-			if (exponent) {
-				mantissa ^= 0x1000;
-				mantissa <<= (exponent - 1);
-			}
-
-			/* store coefficient */
-			coeff[(co << 1) | 1] = mantissa;
-		}
-		if (debug & DEBUG_HFCMULTI_DTMF)
-			printk(" DTMF ready %08x %08x %08x %08x "
-			       "%08x %08x %08x %08x\n",
-			       coeff[0], coeff[1], coeff[2], coeff[3],
-			       coeff[4], coeff[5], coeff[6], coeff[7]);
-		hc->chan[ch].coeff_count++;
-		if (hc->chan[ch].coeff_count == 8) {
-			hc->chan[ch].coeff_count = 0;
-			skb = mI_alloc_skb(512, GFP_ATOMIC);
-			if (!skb) {
-				printk(KERN_DEBUG "%s: No memory for skb\n",
-				       __func__);
-				continue;
-			}
-			hh = mISDN_HEAD_P(skb);
-			hh->prim = PH_CONTROL_IND;
-			hh->id = DTMF_HFC_COEF;
-			skb_put_data(skb, hc->chan[ch].coeff, 512);
-			recv_Bchannel_skb(bch, skb);
-		}
-	}
-
-	/* restart DTMF processing */
-	hc->dtmf = dtmf;
-	if (dtmf)
-		HFC_outb_nodebug(hc, R_DTMF, hc->hw.r_dtmf | V_RST_DTMF);
-}
-
-
-/*
- * fill fifo as much as possible
- */
-
-static void
-hfcmulti_tx(struct hfc_multi *hc, int ch)
-{
-	int i, ii, temp, tmp_len, len = 0;
-	int Zspace, z1, z2; /* must be int for calculation */
-	int Fspace, f1, f2;
-	u_char *d;
-	int *txpending, slot_tx;
-	struct	bchannel *bch;
-	struct  dchannel *dch;
-	struct  sk_buff **sp = NULL;
-	int *idxp;
-
-	bch = hc->chan[ch].bch;
-	dch = hc->chan[ch].dch;
-	if ((!dch) && (!bch))
-		return;
-
-	txpending = &hc->chan[ch].txpending;
-	slot_tx = hc->chan[ch].slot_tx;
-	if (dch) {
-		if (!test_bit(FLG_ACTIVE, &dch->Flags))
-			return;
-		sp = &dch->tx_skb;
-		idxp = &dch->tx_idx;
-	} else {
-		if (!test_bit(FLG_ACTIVE, &bch->Flags))
-			return;
-		sp = &bch->tx_skb;
-		idxp = &bch->tx_idx;
-	}
-	if (*sp)
-		len = (*sp)->len;
-
-	if ((!len) && *txpending != 1)
-		return; /* no data */
-
-	if (test_bit(HFC_CHIP_B410P, &hc->chip) &&
-	    (hc->chan[ch].protocol == ISDN_P_B_RAW) &&
-	    (hc->chan[ch].slot_rx < 0) &&
-	    (hc->chan[ch].slot_tx < 0))
-		HFC_outb_nodebug(hc, R_FIFO, 0x20 | (ch << 1));
-	else
-		HFC_outb_nodebug(hc, R_FIFO, ch << 1);
-	HFC_wait_nodebug(hc);
-
-	if (*txpending == 2) {
-		/* reset fifo */
-		HFC_outb_nodebug(hc, R_INC_RES_FIFO, V_RES_F);
-		HFC_wait_nodebug(hc);
-		HFC_outb(hc, A_SUBCH_CFG, 0);
-		*txpending = 1;
-	}
-next_frame:
-	if (dch || test_bit(FLG_HDLC, &bch->Flags)) {
-		f1 = HFC_inb_nodebug(hc, A_F1);
-		f2 = HFC_inb_nodebug(hc, A_F2);
-		while (f2 != (temp = HFC_inb_nodebug(hc, A_F2))) {
-			if (debug & DEBUG_HFCMULTI_FIFO)
-				printk(KERN_DEBUG
-				       "%s(card %d): reread f2 because %d!=%d\n",
-				       __func__, hc->id + 1, temp, f2);
-			f2 = temp; /* repeat until F2 is equal */
-		}
-		Fspace = f2 - f1 - 1;
-		if (Fspace < 0)
-			Fspace += hc->Flen;
-		/*
-		 * Old FIFO handling doesn't give us the current Z2 read
-		 * pointer, so we cannot send the next frame before the fifo
-		 * is empty. It makes no difference except for a slightly
-		 * lower performance.
-		 */
-		if (test_bit(HFC_CHIP_REVISION0, &hc->chip)) {
-			if (f1 != f2)
-				Fspace = 0;
-			else
-				Fspace = 1;
-		}
-		/* one frame only for ST D-channels, to allow resending */
-		if (hc->ctype != HFC_TYPE_E1 && dch) {
-			if (f1 != f2)
-				Fspace = 0;
-		}
-		/* F-counter full condition */
-		if (Fspace == 0)
-			return;
-	}
-	z1 = HFC_inw_nodebug(hc, A_Z1) - hc->Zmin;
-	z2 = HFC_inw_nodebug(hc, A_Z2) - hc->Zmin;
-	while (z2 != (temp = (HFC_inw_nodebug(hc, A_Z2) - hc->Zmin))) {
-		if (debug & DEBUG_HFCMULTI_FIFO)
-			printk(KERN_DEBUG "%s(card %d): reread z2 because "
-			       "%d!=%d\n", __func__, hc->id + 1, temp, z2);
-		z2 = temp; /* repeat unti Z2 is equal */
-	}
-	hc->chan[ch].Zfill = z1 - z2;
-	if (hc->chan[ch].Zfill < 0)
-		hc->chan[ch].Zfill += hc->Zlen;
-	Zspace = z2 - z1;
-	if (Zspace <= 0)
-		Zspace += hc->Zlen;
-	Zspace -= 4; /* keep not too full, so pointers will not overrun */
-	/* fill transparent data only to maximum transparent load (minus 4) */
-	if (bch && test_bit(FLG_TRANSPARENT, &bch->Flags))
-		Zspace = Zspace - hc->Zlen + hc->max_trans;
-	if (Zspace <= 0) /* no space of 4 bytes */
-		return;
-
-	/* if no data */
-	if (!len) {
-		if (z1 == z2) { /* empty */
-			/* if done with FIFO audio data during PCM connection */
-			if (bch && (!test_bit(FLG_HDLC, &bch->Flags)) &&
-			    *txpending && slot_tx >= 0) {
-				if (debug & DEBUG_HFCMULTI_MODE)
-					printk(KERN_DEBUG
-					       "%s: reconnecting PCM due to no "
-					       "more FIFO data: channel %d "
-					       "slot_tx %d\n",
-					       __func__, ch, slot_tx);
-				/* connect slot */
-				if (hc->ctype == HFC_TYPE_XHFC)
-					HFC_outb(hc, A_CON_HDLC, 0xc0
-						 | 0x07 << 2 | V_HDLC_TRP | V_IFF);
-				/* Enable FIFO, no interrupt */
-				else
-					HFC_outb(hc, A_CON_HDLC, 0xc0 | 0x00 |
-						 V_HDLC_TRP | V_IFF);
-				HFC_outb_nodebug(hc, R_FIFO, ch << 1 | 1);
-				HFC_wait_nodebug(hc);
-				if (hc->ctype == HFC_TYPE_XHFC)
-					HFC_outb(hc, A_CON_HDLC, 0xc0
-						 | 0x07 << 2 | V_HDLC_TRP | V_IFF);
-				/* Enable FIFO, no interrupt */
-				else
-					HFC_outb(hc, A_CON_HDLC, 0xc0 | 0x00 |
-						 V_HDLC_TRP | V_IFF);
-				HFC_outb_nodebug(hc, R_FIFO, ch << 1);
-				HFC_wait_nodebug(hc);
-			}
-			*txpending = 0;
-		}
-		return; /* no data */
-	}
-
-	/* "fill fifo if empty" feature */
-	if (bch && test_bit(FLG_FILLEMPTY, &bch->Flags)
-	    && !test_bit(FLG_HDLC, &bch->Flags) && z2 == z1) {
-		if (debug & DEBUG_HFCMULTI_FILL)
-			printk(KERN_DEBUG "%s: buffer empty, so we have "
-			       "underrun\n", __func__);
-		/* fill buffer, to prevent future underrun */
-		hc->write_fifo(hc, hc->silence_data, poll >> 1);
-		Zspace -= (poll >> 1);
-	}
-
-	/* if audio data and connected slot */
-	if (bch && (!test_bit(FLG_HDLC, &bch->Flags)) && (!*txpending)
-	    && slot_tx >= 0) {
-		if (debug & DEBUG_HFCMULTI_MODE)
-			printk(KERN_DEBUG "%s: disconnecting PCM due to "
-			       "FIFO data: channel %d slot_tx %d\n",
-			       __func__, ch, slot_tx);
-		/* disconnect slot */
-		if (hc->ctype == HFC_TYPE_XHFC)
-			HFC_outb(hc, A_CON_HDLC, 0x80
-				 | 0x07 << 2 | V_HDLC_TRP | V_IFF);
-		/* Enable FIFO, no interrupt */
-		else
-			HFC_outb(hc, A_CON_HDLC, 0x80 | 0x00 |
-				 V_HDLC_TRP | V_IFF);
-		HFC_outb_nodebug(hc, R_FIFO, ch << 1 | 1);
-		HFC_wait_nodebug(hc);
-		if (hc->ctype == HFC_TYPE_XHFC)
-			HFC_outb(hc, A_CON_HDLC, 0x80
-				 | 0x07 << 2 | V_HDLC_TRP | V_IFF);
-		/* Enable FIFO, no interrupt */
-		else
-			HFC_outb(hc, A_CON_HDLC, 0x80 | 0x00 |
-				 V_HDLC_TRP | V_IFF);
-		HFC_outb_nodebug(hc, R_FIFO, ch << 1);
-		HFC_wait_nodebug(hc);
-	}
-	*txpending = 1;
-
-	/* show activity */
-	if (dch)
-		hc->activity_tx |= 1 << hc->chan[ch].port;
-
-	/* fill fifo to what we have left */
-	ii = len;
-	if (dch || test_bit(FLG_HDLC, &bch->Flags))
-		temp = 1;
-	else
-		temp = 0;
-	i = *idxp;
-	d = (*sp)->data + i;
-	if (ii - i > Zspace)
-		ii = Zspace + i;
-	if (debug & DEBUG_HFCMULTI_FIFO)
-		printk(KERN_DEBUG "%s(card %d): fifo(%d) has %d bytes space "
-		       "left (z1=%04x, z2=%04x) sending %d of %d bytes %s\n",
-		       __func__, hc->id + 1, ch, Zspace, z1, z2, ii-i, len-i,
-		       temp ? "HDLC" : "TRANS");
-
-	/* Have to prep the audio data */
-	hc->write_fifo(hc, d, ii - i);
-	hc->chan[ch].Zfill += ii - i;
-	*idxp = ii;
-
-	/* if not all data has been written */
-	if (ii != len) {
-		/* NOTE: fifo is started by the calling function */
-		return;
-	}
-
-	/* if all data has been written, terminate frame */
-	if (dch || test_bit(FLG_HDLC, &bch->Flags)) {
-		/* increment f-counter */
-		HFC_outb_nodebug(hc, R_INC_RES_FIFO, V_INC_F);
-		HFC_wait_nodebug(hc);
-	}
-
-	tmp_len = (*sp)->len;
-	dev_kfree_skb(*sp);
-	/* check for next frame */
-	if (bch && get_next_bframe(bch)) {
-		len = tmp_len;
-		goto next_frame;
-	}
-	if (dch && get_next_dframe(dch)) {
-		len = tmp_len;
-		goto next_frame;
-	}
-
-	/*
-	 * now we have no more data, so in case of transparent,
-	 * we set the last byte in fifo to 'silence' in case we will get
-	 * no more data at all. this prevents sending an undefined value.
-	 */
-	if (bch && test_bit(FLG_TRANSPARENT, &bch->Flags))
-		HFC_outb_nodebug(hc, A_FIFO_DATA0_NOINC, hc->silence);
-}
-
-
-/* NOTE: only called if E1 card is in active state */
-static void
-hfcmulti_rx(struct hfc_multi *hc, int ch)
-{
-	int temp;
-	int Zsize, z1, z2 = 0; /* = 0, to make GCC happy */
-	int f1 = 0, f2 = 0; /* = 0, to make GCC happy */
-	int again = 0;
-	struct	bchannel *bch;
-	struct  dchannel *dch = NULL;
-	struct sk_buff	*skb, **sp = NULL;
-	int	maxlen;
-
-	bch = hc->chan[ch].bch;
-	if (bch) {
-		if (!test_bit(FLG_ACTIVE, &bch->Flags))
-			return;
-	} else if (hc->chan[ch].dch) {
-		dch = hc->chan[ch].dch;
-		if (!test_bit(FLG_ACTIVE, &dch->Flags))
-			return;
-	} else {
-		return;
-	}
-next_frame:
-	/* on first AND before getting next valid frame, R_FIFO must be written
-	   to. */
-	if (test_bit(HFC_CHIP_B410P, &hc->chip) &&
-	    (hc->chan[ch].protocol == ISDN_P_B_RAW) &&
-	    (hc->chan[ch].slot_rx < 0) &&
-	    (hc->chan[ch].slot_tx < 0))
-		HFC_outb_nodebug(hc, R_FIFO, 0x20 | (ch << 1) | 1);
-	else
-		HFC_outb_nodebug(hc, R_FIFO, (ch << 1) | 1);
-	HFC_wait_nodebug(hc);
-
-	/* ignore if rx is off BUT change fifo (above) to start pending TX */
-	if (hc->chan[ch].rx_off) {
-		if (bch)
-			bch->dropcnt += poll; /* not exact but fair enough */
-		return;
-	}
-
-	if (dch || test_bit(FLG_HDLC, &bch->Flags)) {
-		f1 = HFC_inb_nodebug(hc, A_F1);
-		while (f1 != (temp = HFC_inb_nodebug(hc, A_F1))) {
-			if (debug & DEBUG_HFCMULTI_FIFO)
-				printk(KERN_DEBUG
-				       "%s(card %d): reread f1 because %d!=%d\n",
-				       __func__, hc->id + 1, temp, f1);
-			f1 = temp; /* repeat until F1 is equal */
-		}
-		f2 = HFC_inb_nodebug(hc, A_F2);
-	}
-	z1 = HFC_inw_nodebug(hc, A_Z1) - hc->Zmin;
-	while (z1 != (temp = (HFC_inw_nodebug(hc, A_Z1) - hc->Zmin))) {
-		if (debug & DEBUG_HFCMULTI_FIFO)
-			printk(KERN_DEBUG "%s(card %d): reread z2 because "
-			       "%d!=%d\n", __func__, hc->id + 1, temp, z2);
-		z1 = temp; /* repeat until Z1 is equal */
-	}
-	z2 = HFC_inw_nodebug(hc, A_Z2) - hc->Zmin;
-	Zsize = z1 - z2;
-	if ((dch || test_bit(FLG_HDLC, &bch->Flags)) && f1 != f2)
-		/* complete hdlc frame */
-		Zsize++;
-	if (Zsize < 0)
-		Zsize += hc->Zlen;
-	/* if buffer is empty */
-	if (Zsize <= 0)
-		return;
-
-	if (bch) {
-		maxlen = bchannel_get_rxbuf(bch, Zsize);
-		if (maxlen < 0) {
-			pr_warn("card%d.B%d: No bufferspace for %d bytes\n",
-				hc->id + 1, bch->nr, Zsize);
-			return;
-		}
-		sp = &bch->rx_skb;
-		maxlen = bch->maxlen;
-	} else { /* Dchannel */
-		sp = &dch->rx_skb;
-		maxlen = dch->maxlen + 3;
-		if (*sp == NULL) {
-			*sp = mI_alloc_skb(maxlen, GFP_ATOMIC);
-			if (*sp == NULL) {
-				pr_warn("card%d: No mem for dch rx_skb\n",
-					hc->id + 1);
-				return;
-			}
-		}
-	}
-	/* show activity */
-	if (dch)
-		hc->activity_rx |= 1 << hc->chan[ch].port;
-
-	/* empty fifo with what we have */
-	if (dch || test_bit(FLG_HDLC, &bch->Flags)) {
-		if (debug & DEBUG_HFCMULTI_FIFO)
-			printk(KERN_DEBUG "%s(card %d): fifo(%d) reading %d "
-			       "bytes (z1=%04x, z2=%04x) HDLC %s (f1=%d, f2=%d) "
-			       "got=%d (again %d)\n", __func__, hc->id + 1, ch,
-			       Zsize, z1, z2, (f1 == f2) ? "fragment" : "COMPLETE",
-			       f1, f2, Zsize + (*sp)->len, again);
-		/* HDLC */
-		if ((Zsize + (*sp)->len) > maxlen) {
-			if (debug & DEBUG_HFCMULTI_FIFO)
-				printk(KERN_DEBUG
-				       "%s(card %d): hdlc-frame too large.\n",
-				       __func__, hc->id + 1);
-			skb_trim(*sp, 0);
-			HFC_outb_nodebug(hc, R_INC_RES_FIFO, V_RES_F);
-			HFC_wait_nodebug(hc);
-			return;
-		}
-
-		hc->read_fifo(hc, skb_put(*sp, Zsize), Zsize);
-
-		if (f1 != f2) {
-			/* increment Z2,F2-counter */
-			HFC_outb_nodebug(hc, R_INC_RES_FIFO, V_INC_F);
-			HFC_wait_nodebug(hc);
-			/* check size */
-			if ((*sp)->len < 4) {
-				if (debug & DEBUG_HFCMULTI_FIFO)
-					printk(KERN_DEBUG
-					       "%s(card %d): Frame below minimum "
-					       "size\n", __func__, hc->id + 1);
-				skb_trim(*sp, 0);
-				goto next_frame;
-			}
-			/* there is at least one complete frame, check crc */
-			if ((*sp)->data[(*sp)->len - 1]) {
-				if (debug & DEBUG_HFCMULTI_CRC)
-					printk(KERN_DEBUG
-					       "%s: CRC-error\n", __func__);
-				skb_trim(*sp, 0);
-				goto next_frame;
-			}
-			skb_trim(*sp, (*sp)->len - 3);
-			if ((*sp)->len < MISDN_COPY_SIZE) {
-				skb = *sp;
-				*sp = mI_alloc_skb(skb->len, GFP_ATOMIC);
-				if (*sp) {
-					skb_put_data(*sp, skb->data, skb->len);
-					skb_trim(skb, 0);
-				} else {
-					printk(KERN_DEBUG "%s: No mem\n",
-					       __func__);
-					*sp = skb;
-					skb = NULL;
-				}
-			} else {
-				skb = NULL;
-			}
-			if (debug & DEBUG_HFCMULTI_FIFO) {
-				printk(KERN_DEBUG "%s(card %d):",
-				       __func__, hc->id + 1);
-				temp = 0;
-				while (temp < (*sp)->len)
-					printk(" %02x", (*sp)->data[temp++]);
-				printk("\n");
-			}
-			if (dch)
-				recv_Dchannel(dch);
-			else
-				recv_Bchannel(bch, MISDN_ID_ANY, false);
-			*sp = skb;
-			again++;
-			goto next_frame;
-		}
-		/* there is an incomplete frame */
-	} else {
-		/* transparent */
-		hc->read_fifo(hc, skb_put(*sp, Zsize), Zsize);
-		if (debug & DEBUG_HFCMULTI_FIFO)
-			printk(KERN_DEBUG
-			       "%s(card %d): fifo(%d) reading %d bytes "
-			       "(z1=%04x, z2=%04x) TRANS\n",
-			       __func__, hc->id + 1, ch, Zsize, z1, z2);
-		/* only bch is transparent */
-		recv_Bchannel(bch, hc->chan[ch].Zfill, false);
-	}
-}
-
-
-/*
- * Interrupt handler
- */
-static void
-signal_state_up(struct dchannel *dch, int info, char *msg)
-{
-	struct sk_buff	*skb;
-	int		id, data = info;
-
-	if (debug & DEBUG_HFCMULTI_STATE)
-		printk(KERN_DEBUG "%s: %s\n", __func__, msg);
-
-	id = TEI_SAPI | (GROUP_TEI << 8); /* manager address */
-
-	skb = _alloc_mISDN_skb(MPH_INFORMATION_IND, id, sizeof(data), &data,
-			       GFP_ATOMIC);
-	if (!skb)
-		return;
-	recv_Dchannel_skb(dch, skb);
-}
-
-static inline void
-handle_timer_irq(struct hfc_multi *hc)
-{
-	int		ch, temp;
-	struct dchannel	*dch;
-	u_long		flags;
-
-	/* process queued resync jobs */
-	if (hc->e1_resync) {
-		/* lock, so e1_resync gets not changed */
-		spin_lock_irqsave(&HFClock, flags);
-		if (hc->e1_resync & 1) {
-			if (debug & DEBUG_HFCMULTI_PLXSD)
-				printk(KERN_DEBUG "Enable SYNC_I\n");
-			HFC_outb(hc, R_SYNC_CTRL, V_EXT_CLK_SYNC);
-			/* disable JATT, if RX_SYNC is set */
-			if (test_bit(HFC_CHIP_RX_SYNC, &hc->chip))
-				HFC_outb(hc, R_SYNC_OUT, V_SYNC_E1_RX);
-		}
-		if (hc->e1_resync & 2) {
-			if (debug & DEBUG_HFCMULTI_PLXSD)
-				printk(KERN_DEBUG "Enable jatt PLL\n");
-			HFC_outb(hc, R_SYNC_CTRL, V_SYNC_OFFS);
-		}
-		if (hc->e1_resync & 4) {
-			if (debug & DEBUG_HFCMULTI_PLXSD)
-				printk(KERN_DEBUG
-				       "Enable QUARTZ for HFC-E1\n");
-			/* set jatt to quartz */
-			HFC_outb(hc, R_SYNC_CTRL, V_EXT_CLK_SYNC
-				 | V_JATT_OFF);
-			/* switch to JATT, in case it is not already */
-			HFC_outb(hc, R_SYNC_OUT, 0);
-		}
-		hc->e1_resync = 0;
-		spin_unlock_irqrestore(&HFClock, flags);
-	}
-
-	if (hc->ctype != HFC_TYPE_E1 || hc->e1_state == 1)
-		for (ch = 0; ch <= 31; ch++) {
-			if (hc->created[hc->chan[ch].port]) {
-				hfcmulti_tx(hc, ch);
-				/* fifo is started when switching to rx-fifo */
-				hfcmulti_rx(hc, ch);
-				if (hc->chan[ch].dch &&
-				    hc->chan[ch].nt_timer > -1) {
-					dch = hc->chan[ch].dch;
-					if (!(--hc->chan[ch].nt_timer)) {
-						schedule_event(dch,
-							       FLG_PHCHANGE);
-						if (debug &
-						    DEBUG_HFCMULTI_STATE)
-							printk(KERN_DEBUG
-							       "%s: nt_timer at "
-							       "state %x\n",
-							       __func__,
-							       dch->state);
-					}
-				}
-			}
-		}
-	if (hc->ctype == HFC_TYPE_E1 && hc->created[0]) {
-		dch = hc->chan[hc->dnum[0]].dch;
-		/* LOS */
-		temp = HFC_inb_nodebug(hc, R_SYNC_STA) & V_SIG_LOS;
-		hc->chan[hc->dnum[0]].los = temp;
-		if (test_bit(HFC_CFG_REPORT_LOS, &hc->chan[hc->dnum[0]].cfg)) {
-			if (!temp && hc->chan[hc->dnum[0]].los)
-				signal_state_up(dch, L1_SIGNAL_LOS_ON,
-						"LOS detected");
-			if (temp && !hc->chan[hc->dnum[0]].los)
-				signal_state_up(dch, L1_SIGNAL_LOS_OFF,
-						"LOS gone");
-		}
-		if (test_bit(HFC_CFG_REPORT_AIS, &hc->chan[hc->dnum[0]].cfg)) {
-			/* AIS */
-			temp = HFC_inb_nodebug(hc, R_SYNC_STA) & V_AIS;
-			if (!temp && hc->chan[hc->dnum[0]].ais)
-				signal_state_up(dch, L1_SIGNAL_AIS_ON,
-						"AIS detected");
-			if (temp && !hc->chan[hc->dnum[0]].ais)
-				signal_state_up(dch, L1_SIGNAL_AIS_OFF,
-						"AIS gone");
-			hc->chan[hc->dnum[0]].ais = temp;
-		}
-		if (test_bit(HFC_CFG_REPORT_SLIP, &hc->chan[hc->dnum[0]].cfg)) {
-			/* SLIP */
-			temp = HFC_inb_nodebug(hc, R_SLIP) & V_FOSLIP_RX;
-			if (!temp && hc->chan[hc->dnum[0]].slip_rx)
-				signal_state_up(dch, L1_SIGNAL_SLIP_RX,
-						" bit SLIP detected RX");
-			hc->chan[hc->dnum[0]].slip_rx = temp;
-			temp = HFC_inb_nodebug(hc, R_SLIP) & V_FOSLIP_TX;
-			if (!temp && hc->chan[hc->dnum[0]].slip_tx)
-				signal_state_up(dch, L1_SIGNAL_SLIP_TX,
-						" bit SLIP detected TX");
-			hc->chan[hc->dnum[0]].slip_tx = temp;
-		}
-		if (test_bit(HFC_CFG_REPORT_RDI, &hc->chan[hc->dnum[0]].cfg)) {
-			/* RDI */
-			temp = HFC_inb_nodebug(hc, R_RX_SL0_0) & V_A;
-			if (!temp && hc->chan[hc->dnum[0]].rdi)
-				signal_state_up(dch, L1_SIGNAL_RDI_ON,
-						"RDI detected");
-			if (temp && !hc->chan[hc->dnum[0]].rdi)
-				signal_state_up(dch, L1_SIGNAL_RDI_OFF,
-						"RDI gone");
-			hc->chan[hc->dnum[0]].rdi = temp;
-		}
-		temp = HFC_inb_nodebug(hc, R_JATT_DIR);
-		switch (hc->chan[hc->dnum[0]].sync) {
-		case 0:
-			if ((temp & 0x60) == 0x60) {
-				if (debug & DEBUG_HFCMULTI_SYNC)
-					printk(KERN_DEBUG
-					       "%s: (id=%d) E1 now "
-					       "in clock sync\n",
-					       __func__, hc->id);
-				HFC_outb(hc, R_RX_OFF,
-				    hc->chan[hc->dnum[0]].jitter | V_RX_INIT);
-				HFC_outb(hc, R_TX_OFF,
-				    hc->chan[hc->dnum[0]].jitter | V_RX_INIT);
-				hc->chan[hc->dnum[0]].sync = 1;
-				goto check_framesync;
-			}
-			break;
-		case 1:
-			if ((temp & 0x60) != 0x60) {
-				if (debug & DEBUG_HFCMULTI_SYNC)
-					printk(KERN_DEBUG
-					       "%s: (id=%d) E1 "
-					       "lost clock sync\n",
-					       __func__, hc->id);
-				hc->chan[hc->dnum[0]].sync = 0;
-				break;
-			}
-		check_framesync:
-			temp = HFC_inb_nodebug(hc, R_SYNC_STA);
-			if (temp == 0x27) {
-				if (debug & DEBUG_HFCMULTI_SYNC)
-					printk(KERN_DEBUG
-					       "%s: (id=%d) E1 "
-					       "now in frame sync\n",
-					       __func__, hc->id);
-				hc->chan[hc->dnum[0]].sync = 2;
-			}
-			break;
-		case 2:
-			if ((temp & 0x60) != 0x60) {
-				if (debug & DEBUG_HFCMULTI_SYNC)
-					printk(KERN_DEBUG
-					       "%s: (id=%d) E1 lost "
-					       "clock & frame sync\n",
-					       __func__, hc->id);
-				hc->chan[hc->dnum[0]].sync = 0;
-				break;
-			}
-			temp = HFC_inb_nodebug(hc, R_SYNC_STA);
-			if (temp != 0x27) {
-				if (debug & DEBUG_HFCMULTI_SYNC)
-					printk(KERN_DEBUG
-					       "%s: (id=%d) E1 "
-					       "lost frame sync\n",
-					       __func__, hc->id);
-				hc->chan[hc->dnum[0]].sync = 1;
-			}
-			break;
-		}
-	}
-
-	if (test_bit(HFC_CHIP_WATCHDOG, &hc->chip))
-		hfcmulti_watchdog(hc);
-
-	if (hc->leds)
-		hfcmulti_leds(hc);
-}
-
-static void
-ph_state_irq(struct hfc_multi *hc, u_char r_irq_statech)
-{
-	struct dchannel	*dch;
-	int		ch;
-	int		active;
-	u_char		st_status, temp;
-
-	/* state machine */
-	for (ch = 0; ch <= 31; ch++) {
-		if (hc->chan[ch].dch) {
-			dch = hc->chan[ch].dch;
-			if (r_irq_statech & 1) {
-				HFC_outb_nodebug(hc, R_ST_SEL,
-						 hc->chan[ch].port);
-				/* undocumented: delay after R_ST_SEL */
-				udelay(1);
-				/* undocumented: status changes during read */
-				st_status = HFC_inb_nodebug(hc, A_ST_RD_STATE);
-				while (st_status != (temp =
-						     HFC_inb_nodebug(hc, A_ST_RD_STATE))) {
-					if (debug & DEBUG_HFCMULTI_STATE)
-						printk(KERN_DEBUG "%s: reread "
-						       "STATE because %d!=%d\n",
-						       __func__, temp,
-						       st_status);
-					st_status = temp; /* repeat */
-				}
-
-				/* Speech Design TE-sync indication */
-				if (test_bit(HFC_CHIP_PLXSD, &hc->chip) &&
-				    dch->dev.D.protocol == ISDN_P_TE_S0) {
-					if (st_status & V_FR_SYNC_ST)
-						hc->syncronized |=
-							(1 << hc->chan[ch].port);
-					else
-						hc->syncronized &=
-							~(1 << hc->chan[ch].port);
-				}
-				dch->state = st_status & 0x0f;
-				if (dch->dev.D.protocol == ISDN_P_NT_S0)
-					active = 3;
-				else
-					active = 7;
-				if (dch->state == active) {
-					HFC_outb_nodebug(hc, R_FIFO,
-							 (ch << 1) | 1);
-					HFC_wait_nodebug(hc);
-					HFC_outb_nodebug(hc,
-							 R_INC_RES_FIFO, V_RES_F);
-					HFC_wait_nodebug(hc);
-					dch->tx_idx = 0;
-				}
-				schedule_event(dch, FLG_PHCHANGE);
-				if (debug & DEBUG_HFCMULTI_STATE)
-					printk(KERN_DEBUG
-					       "%s: S/T newstate %x port %d\n",
-					       __func__, dch->state,
-					       hc->chan[ch].port);
-			}
-			r_irq_statech >>= 1;
-		}
-	}
-	if (test_bit(HFC_CHIP_PLXSD, &hc->chip))
-		plxsd_checksync(hc, 0);
-}
-
-static void
-fifo_irq(struct hfc_multi *hc, int block)
-{
-	int	ch, j;
-	struct dchannel	*dch;
-	struct bchannel	*bch;
-	u_char r_irq_fifo_bl;
-
-	r_irq_fifo_bl = HFC_inb_nodebug(hc, R_IRQ_FIFO_BL0 + block);
-	j = 0;
-	while (j < 8) {
-		ch = (block << 2) + (j >> 1);
-		dch = hc->chan[ch].dch;
-		bch = hc->chan[ch].bch;
-		if (((!dch) && (!bch)) || (!hc->created[hc->chan[ch].port])) {
-			j += 2;
-			continue;
-		}
-		if (dch && (r_irq_fifo_bl & (1 << j)) &&
-		    test_bit(FLG_ACTIVE, &dch->Flags)) {
-			hfcmulti_tx(hc, ch);
-			/* start fifo */
-			HFC_outb_nodebug(hc, R_FIFO, 0);
-			HFC_wait_nodebug(hc);
-		}
-		if (bch && (r_irq_fifo_bl & (1 << j)) &&
-		    test_bit(FLG_ACTIVE, &bch->Flags)) {
-			hfcmulti_tx(hc, ch);
-			/* start fifo */
-			HFC_outb_nodebug(hc, R_FIFO, 0);
-			HFC_wait_nodebug(hc);
-		}
-		j++;
-		if (dch && (r_irq_fifo_bl & (1 << j)) &&
-		    test_bit(FLG_ACTIVE, &dch->Flags)) {
-			hfcmulti_rx(hc, ch);
-		}
-		if (bch && (r_irq_fifo_bl & (1 << j)) &&
-		    test_bit(FLG_ACTIVE, &bch->Flags)) {
-			hfcmulti_rx(hc, ch);
-		}
-		j++;
-	}
-}
-
-#ifdef IRQ_DEBUG
-int irqsem;
-#endif
-static irqreturn_t
-hfcmulti_interrupt(int intno, void *dev_id)
-{
-#ifdef IRQCOUNT_DEBUG
-	static int iq1 = 0, iq2 = 0, iq3 = 0, iq4 = 0,
-		iq5 = 0, iq6 = 0, iqcnt = 0;
-#endif
-	struct hfc_multi	*hc = dev_id;
-	struct dchannel		*dch;
-	u_char			r_irq_statech, status, r_irq_misc, r_irq_oview;
-	int			i;
-	void __iomem		*plx_acc;
-	u_short			wval;
-	u_char			e1_syncsta, temp, temp2;
-	u_long			flags;
-
-	if (!hc) {
-		printk(KERN_ERR "HFC-multi: Spurious interrupt!\n");
-		return IRQ_NONE;
-	}
-
-	spin_lock(&hc->lock);
-
-#ifdef IRQ_DEBUG
-	if (irqsem)
-		printk(KERN_ERR "irq for card %d during irq from "
-		       "card %d, this is no bug.\n", hc->id + 1, irqsem);
-	irqsem = hc->id + 1;
-#endif
-#ifdef CONFIG_MISDN_HFCMULTI_8xx
-	if (hc->immap->im_cpm.cp_pbdat & hc->pb_irqmsk)
-		goto irq_notforus;
-#endif
-	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-		spin_lock_irqsave(&plx_lock, flags);
-		plx_acc = hc->plx_membase + PLX_INTCSR;
-		wval = readw(plx_acc);
-		spin_unlock_irqrestore(&plx_lock, flags);
-		if (!(wval & PLX_INTCSR_LINTI1_STATUS))
-			goto irq_notforus;
-	}
-
-	status = HFC_inb_nodebug(hc, R_STATUS);
-	r_irq_statech = HFC_inb_nodebug(hc, R_IRQ_STATECH);
-#ifdef IRQCOUNT_DEBUG
-	if (r_irq_statech)
-		iq1++;
-	if (status & V_DTMF_STA)
-		iq2++;
-	if (status & V_LOST_STA)
-		iq3++;
-	if (status & V_EXT_IRQSTA)
-		iq4++;
-	if (status & V_MISC_IRQSTA)
-		iq5++;
-	if (status & V_FR_IRQSTA)
-		iq6++;
-	if (iqcnt++ > 5000) {
-		printk(KERN_ERR "iq1:%x iq2:%x iq3:%x iq4:%x iq5:%x iq6:%x\n",
-		       iq1, iq2, iq3, iq4, iq5, iq6);
-		iqcnt = 0;
-	}
-#endif
-
-	if (!r_irq_statech &&
-	    !(status & (V_DTMF_STA | V_LOST_STA | V_EXT_IRQSTA |
-			V_MISC_IRQSTA | V_FR_IRQSTA))) {
-		/* irq is not for us */
-		goto irq_notforus;
-	}
-	hc->irqcnt++;
-	if (r_irq_statech) {
-		if (hc->ctype != HFC_TYPE_E1)
-			ph_state_irq(hc, r_irq_statech);
-	}
-	if (status & V_LOST_STA) {
-		/* LOST IRQ */
-		HFC_outb(hc, R_INC_RES_FIFO, V_RES_LOST); /* clear irq! */
-	}
-	if (status & V_MISC_IRQSTA) {
-		/* misc IRQ */
-		r_irq_misc = HFC_inb_nodebug(hc, R_IRQ_MISC);
-		r_irq_misc &= hc->hw.r_irqmsk_misc; /* ignore disabled irqs */
-		if (r_irq_misc & V_STA_IRQ) {
-			if (hc->ctype == HFC_TYPE_E1) {
-				/* state machine */
-				dch = hc->chan[hc->dnum[0]].dch;
-				e1_syncsta = HFC_inb_nodebug(hc, R_SYNC_STA);
-				if (test_bit(HFC_CHIP_PLXSD, &hc->chip)
-				    && hc->e1_getclock) {
-					if (e1_syncsta & V_FR_SYNC_E1)
-						hc->syncronized = 1;
-					else
-						hc->syncronized = 0;
-				}
-				/* undocumented: status changes during read */
-				temp = HFC_inb_nodebug(hc, R_E1_RD_STA);
-				while (temp != (temp2 =
-						      HFC_inb_nodebug(hc, R_E1_RD_STA))) {
-					if (debug & DEBUG_HFCMULTI_STATE)
-						printk(KERN_DEBUG "%s: reread "
-						       "STATE because %d!=%d\n",
-						    __func__, temp, temp2);
-					temp = temp2; /* repeat */
-				}
-				/* broadcast state change to all fragments */
-				if (debug & DEBUG_HFCMULTI_STATE)
-					printk(KERN_DEBUG
-					       "%s: E1 (id=%d) newstate %x\n",
-					    __func__, hc->id, temp & 0x7);
-				for (i = 0; i < hc->ports; i++) {
-					dch = hc->chan[hc->dnum[i]].dch;
-					dch->state = temp & 0x7;
-					schedule_event(dch, FLG_PHCHANGE);
-				}
-
-				if (test_bit(HFC_CHIP_PLXSD, &hc->chip))
-					plxsd_checksync(hc, 0);
-			}
-		}
-		if (r_irq_misc & V_TI_IRQ) {
-			if (hc->iclock_on)
-				mISDN_clock_update(hc->iclock, poll, NULL);
-			handle_timer_irq(hc);
-		}
-
-		if (r_irq_misc & V_DTMF_IRQ)
-			hfcmulti_dtmf(hc);
-
-		if (r_irq_misc & V_IRQ_PROC) {
-			static int irq_proc_cnt;
-			if (!irq_proc_cnt++)
-				printk(KERN_DEBUG "%s: got V_IRQ_PROC -"
-				       " this should not happen\n", __func__);
-		}
-
-	}
-	if (status & V_FR_IRQSTA) {
-		/* FIFO IRQ */
-		r_irq_oview = HFC_inb_nodebug(hc, R_IRQ_OVIEW);
-		for (i = 0; i < 8; i++) {
-			if (r_irq_oview & (1 << i))
-				fifo_irq(hc, i);
-		}
-	}
-
-#ifdef IRQ_DEBUG
-	irqsem = 0;
-#endif
-	spin_unlock(&hc->lock);
-	return IRQ_HANDLED;
-
-irq_notforus:
-#ifdef IRQ_DEBUG
-	irqsem = 0;
-#endif
-	spin_unlock(&hc->lock);
-	return IRQ_NONE;
-}
-
-
-/*
- * timer callback for D-chan busy resolution. Currently no function
- */
-
-static void
-hfcmulti_dbusy_timer(struct timer_list *t)
-{
-}
-
-
-/*
- * activate/deactivate hardware for selected channels and mode
- *
- * configure B-channel with the given protocol
- * ch eqals to the HFC-channel (0-31)
- * ch is the number of channel (0-4,4-7,8-11,12-15,16-19,20-23,24-27,28-31
- * for S/T, 1-31 for E1)
- * the hdlc interrupts will be set/unset
- */
-static int
-mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx,
-	      int bank_tx, int slot_rx, int bank_rx)
-{
-	int flow_tx = 0, flow_rx = 0, routing = 0;
-	int oslot_tx, oslot_rx;
-	int conf;
-
-	if (ch < 0 || ch > 31)
-		return -EINVAL;
-	oslot_tx = hc->chan[ch].slot_tx;
-	oslot_rx = hc->chan[ch].slot_rx;
-	conf = hc->chan[ch].conf;
-
-	if (debug & DEBUG_HFCMULTI_MODE)
-		printk(KERN_DEBUG
-		       "%s: card %d channel %d protocol %x slot old=%d new=%d "
-		       "bank new=%d (TX) slot old=%d new=%d bank new=%d (RX)\n",
-		       __func__, hc->id, ch, protocol, oslot_tx, slot_tx,
-		       bank_tx, oslot_rx, slot_rx, bank_rx);
-
-	if (oslot_tx >= 0 && slot_tx != oslot_tx) {
-		/* remove from slot */
-		if (debug & DEBUG_HFCMULTI_MODE)
-			printk(KERN_DEBUG "%s: remove from slot %d (TX)\n",
-			       __func__, oslot_tx);
-		if (hc->slot_owner[oslot_tx << 1] == ch) {
-			HFC_outb(hc, R_SLOT, oslot_tx << 1);
-			HFC_outb(hc, A_SL_CFG, 0);
-			if (hc->ctype != HFC_TYPE_XHFC)
-				HFC_outb(hc, A_CONF, 0);
-			hc->slot_owner[oslot_tx << 1] = -1;
-		} else {
-			if (debug & DEBUG_HFCMULTI_MODE)
-				printk(KERN_DEBUG
-				       "%s: we are not owner of this tx slot "
-				       "anymore, channel %d is.\n",
-				       __func__, hc->slot_owner[oslot_tx << 1]);
-		}
-	}
-
-	if (oslot_rx >= 0 && slot_rx != oslot_rx) {
-		/* remove from slot */
-		if (debug & DEBUG_HFCMULTI_MODE)
-			printk(KERN_DEBUG
-			       "%s: remove from slot %d (RX)\n",
-			       __func__, oslot_rx);
-		if (hc->slot_owner[(oslot_rx << 1) | 1] == ch) {
-			HFC_outb(hc, R_SLOT, (oslot_rx << 1) | V_SL_DIR);
-			HFC_outb(hc, A_SL_CFG, 0);
-			hc->slot_owner[(oslot_rx << 1) | 1] = -1;
-		} else {
-			if (debug & DEBUG_HFCMULTI_MODE)
-				printk(KERN_DEBUG
-				       "%s: we are not owner of this rx slot "
-				       "anymore, channel %d is.\n",
-				       __func__,
-				       hc->slot_owner[(oslot_rx << 1) | 1]);
-		}
-	}
-
-	if (slot_tx < 0) {
-		flow_tx = 0x80; /* FIFO->ST */
-		/* disable pcm slot */
-		hc->chan[ch].slot_tx = -1;
-		hc->chan[ch].bank_tx = 0;
-	} else {
-		/* set pcm slot */
-		if (hc->chan[ch].txpending)
-			flow_tx = 0x80; /* FIFO->ST */
-		else
-			flow_tx = 0xc0; /* PCM->ST */
-		/* put on slot */
-		routing = bank_tx ? 0xc0 : 0x80;
-		if (conf >= 0 || bank_tx > 1)
-			routing = 0x40; /* loop */
-		if (debug & DEBUG_HFCMULTI_MODE)
-			printk(KERN_DEBUG "%s: put channel %d to slot %d bank"
-			       " %d flow %02x routing %02x conf %d (TX)\n",
-			       __func__, ch, slot_tx, bank_tx,
-			       flow_tx, routing, conf);
-		HFC_outb(hc, R_SLOT, slot_tx << 1);
-		HFC_outb(hc, A_SL_CFG, (ch << 1) | routing);
-		if (hc->ctype != HFC_TYPE_XHFC)
-			HFC_outb(hc, A_CONF,
-				 (conf < 0) ? 0 : (conf | V_CONF_SL));
-		hc->slot_owner[slot_tx << 1] = ch;
-		hc->chan[ch].slot_tx = slot_tx;
-		hc->chan[ch].bank_tx = bank_tx;
-	}
-	if (slot_rx < 0) {
-		/* disable pcm slot */
-		flow_rx = 0x80; /* ST->FIFO */
-		hc->chan[ch].slot_rx = -1;
-		hc->chan[ch].bank_rx = 0;
-	} else {
-		/* set pcm slot */
-		if (hc->chan[ch].txpending)
-			flow_rx = 0x80; /* ST->FIFO */
-		else
-			flow_rx = 0xc0; /* ST->(FIFO,PCM) */
-		/* put on slot */
-		routing = bank_rx ? 0x80 : 0xc0; /* reversed */
-		if (conf >= 0 || bank_rx > 1)
-			routing = 0x40; /* loop */
-		if (debug & DEBUG_HFCMULTI_MODE)
-			printk(KERN_DEBUG "%s: put channel %d to slot %d bank"
-			       " %d flow %02x routing %02x conf %d (RX)\n",
-			       __func__, ch, slot_rx, bank_rx,
-			       flow_rx, routing, conf);
-		HFC_outb(hc, R_SLOT, (slot_rx << 1) | V_SL_DIR);
-		HFC_outb(hc, A_SL_CFG, (ch << 1) | V_CH_DIR | routing);
-		hc->slot_owner[(slot_rx << 1) | 1] = ch;
-		hc->chan[ch].slot_rx = slot_rx;
-		hc->chan[ch].bank_rx = bank_rx;
-	}
-
-	switch (protocol) {
-	case (ISDN_P_NONE):
-		/* disable TX fifo */
-		HFC_outb(hc, R_FIFO, ch << 1);
-		HFC_wait(hc);
-		HFC_outb(hc, A_CON_HDLC, flow_tx | 0x00 | V_IFF);
-		HFC_outb(hc, A_SUBCH_CFG, 0);
-		HFC_outb(hc, A_IRQ_MSK, 0);
-		HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
-		HFC_wait(hc);
-		/* disable RX fifo */
-		HFC_outb(hc, R_FIFO, (ch << 1) | 1);
-		HFC_wait(hc);
-		HFC_outb(hc, A_CON_HDLC, flow_rx | 0x00);
-		HFC_outb(hc, A_SUBCH_CFG, 0);
-		HFC_outb(hc, A_IRQ_MSK, 0);
-		HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
-		HFC_wait(hc);
-		if (hc->chan[ch].bch && hc->ctype != HFC_TYPE_E1) {
-			hc->hw.a_st_ctrl0[hc->chan[ch].port] &=
-				((ch & 0x3) == 0) ? ~V_B1_EN : ~V_B2_EN;
-			HFC_outb(hc, R_ST_SEL, hc->chan[ch].port);
-			/* undocumented: delay after R_ST_SEL */
-			udelay(1);
-			HFC_outb(hc, A_ST_CTRL0,
-				 hc->hw.a_st_ctrl0[hc->chan[ch].port]);
-		}
-		if (hc->chan[ch].bch) {
-			test_and_clear_bit(FLG_HDLC, &hc->chan[ch].bch->Flags);
-			test_and_clear_bit(FLG_TRANSPARENT,
-					   &hc->chan[ch].bch->Flags);
-		}
-		break;
-	case (ISDN_P_B_RAW): /* B-channel */
-
-		if (test_bit(HFC_CHIP_B410P, &hc->chip) &&
-		    (hc->chan[ch].slot_rx < 0) &&
-		    (hc->chan[ch].slot_tx < 0)) {
-
-			printk(KERN_DEBUG
-			       "Setting B-channel %d to echo cancelable "
-			       "state on PCM slot %d\n", ch,
-			       ((ch / 4) * 8) + ((ch % 4) * 4) + 1);
-			printk(KERN_DEBUG
-			       "Enabling pass through for channel\n");
-			vpm_out(hc, ch, ((ch / 4) * 8) +
-				((ch % 4) * 4) + 1, 0x01);
-			/* rx path */
-			/* S/T -> PCM */
-			HFC_outb(hc, R_FIFO, (ch << 1));
-			HFC_wait(hc);
-			HFC_outb(hc, A_CON_HDLC, 0xc0 | V_HDLC_TRP | V_IFF);
-			HFC_outb(hc, R_SLOT, (((ch / 4) * 8) +
-					      ((ch % 4) * 4) + 1) << 1);
-			HFC_outb(hc, A_SL_CFG, 0x80 | (ch << 1));
-
-			/* PCM -> FIFO */
-			HFC_outb(hc, R_FIFO, 0x20 | (ch << 1) | 1);
-			HFC_wait(hc);
-			HFC_outb(hc, A_CON_HDLC, 0x20 | V_HDLC_TRP | V_IFF);
-			HFC_outb(hc, A_SUBCH_CFG, 0);
-			HFC_outb(hc, A_IRQ_MSK, 0);
-			if (hc->chan[ch].protocol != protocol) {
-				HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
-				HFC_wait(hc);
-			}
-			HFC_outb(hc, R_SLOT, ((((ch / 4) * 8) +
-					       ((ch % 4) * 4) + 1) << 1) | 1);
-			HFC_outb(hc, A_SL_CFG, 0x80 | 0x20 | (ch << 1) | 1);
-
-			/* tx path */
-			/* PCM -> S/T */
-			HFC_outb(hc, R_FIFO, (ch << 1) | 1);
-			HFC_wait(hc);
-			HFC_outb(hc, A_CON_HDLC, 0xc0 | V_HDLC_TRP | V_IFF);
-			HFC_outb(hc, R_SLOT, ((((ch / 4) * 8) +
-					       ((ch % 4) * 4)) << 1) | 1);
-			HFC_outb(hc, A_SL_CFG, 0x80 | 0x40 | (ch << 1) | 1);
-
-			/* FIFO -> PCM */
-			HFC_outb(hc, R_FIFO, 0x20 | (ch << 1));
-			HFC_wait(hc);
-			HFC_outb(hc, A_CON_HDLC, 0x20 | V_HDLC_TRP | V_IFF);
-			HFC_outb(hc, A_SUBCH_CFG, 0);
-			HFC_outb(hc, A_IRQ_MSK, 0);
-			if (hc->chan[ch].protocol != protocol) {
-				HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
-				HFC_wait(hc);
-			}
-			/* tx silence */
-			HFC_outb_nodebug(hc, A_FIFO_DATA0_NOINC, hc->silence);
-			HFC_outb(hc, R_SLOT, (((ch / 4) * 8) +
-					      ((ch % 4) * 4)) << 1);
-			HFC_outb(hc, A_SL_CFG, 0x80 | 0x20 | (ch << 1));
-		} else {
-			/* enable TX fifo */
-			HFC_outb(hc, R_FIFO, ch << 1);
-			HFC_wait(hc);
-			if (hc->ctype == HFC_TYPE_XHFC)
-				HFC_outb(hc, A_CON_HDLC, flow_tx | 0x07 << 2 |
-					 V_HDLC_TRP | V_IFF);
-			/* Enable FIFO, no interrupt */
-			else
-				HFC_outb(hc, A_CON_HDLC, flow_tx | 0x00 |
-					 V_HDLC_TRP | V_IFF);
-			HFC_outb(hc, A_SUBCH_CFG, 0);
-			HFC_outb(hc, A_IRQ_MSK, 0);
-			if (hc->chan[ch].protocol != protocol) {
-				HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
-				HFC_wait(hc);
-			}
-			/* tx silence */
-			HFC_outb_nodebug(hc, A_FIFO_DATA0_NOINC, hc->silence);
-			/* enable RX fifo */
-			HFC_outb(hc, R_FIFO, (ch << 1) | 1);
-			HFC_wait(hc);
-			if (hc->ctype == HFC_TYPE_XHFC)
-				HFC_outb(hc, A_CON_HDLC, flow_rx | 0x07 << 2 |
-					 V_HDLC_TRP);
-			/* Enable FIFO, no interrupt*/
-			else
-				HFC_outb(hc, A_CON_HDLC, flow_rx | 0x00 |
-					 V_HDLC_TRP);
-			HFC_outb(hc, A_SUBCH_CFG, 0);
-			HFC_outb(hc, A_IRQ_MSK, 0);
-			if (hc->chan[ch].protocol != protocol) {
-				HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
-				HFC_wait(hc);
-			}
-		}
-		if (hc->ctype != HFC_TYPE_E1) {
-			hc->hw.a_st_ctrl0[hc->chan[ch].port] |=
-				((ch & 0x3) == 0) ? V_B1_EN : V_B2_EN;
-			HFC_outb(hc, R_ST_SEL, hc->chan[ch].port);
-			/* undocumented: delay after R_ST_SEL */
-			udelay(1);
-			HFC_outb(hc, A_ST_CTRL0,
-				 hc->hw.a_st_ctrl0[hc->chan[ch].port]);
-		}
-		if (hc->chan[ch].bch)
-			test_and_set_bit(FLG_TRANSPARENT,
-					 &hc->chan[ch].bch->Flags);
-		break;
-	case (ISDN_P_B_HDLC): /* B-channel */
-	case (ISDN_P_TE_S0): /* D-channel */
-	case (ISDN_P_NT_S0):
-	case (ISDN_P_TE_E1):
-	case (ISDN_P_NT_E1):
-		/* enable TX fifo */
-		HFC_outb(hc, R_FIFO, ch << 1);
-		HFC_wait(hc);
-		if (hc->ctype == HFC_TYPE_E1 || hc->chan[ch].bch) {
-			/* E1 or B-channel */
-			HFC_outb(hc, A_CON_HDLC, flow_tx | 0x04);
-			HFC_outb(hc, A_SUBCH_CFG, 0);
-		} else {
-			/* D-Channel without HDLC fill flags */
-			HFC_outb(hc, A_CON_HDLC, flow_tx | 0x04 | V_IFF);
-			HFC_outb(hc, A_SUBCH_CFG, 2);
-		}
-		HFC_outb(hc, A_IRQ_MSK, V_IRQ);
-		HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
-		HFC_wait(hc);
-		/* enable RX fifo */
-		HFC_outb(hc, R_FIFO, (ch << 1) | 1);
-		HFC_wait(hc);
-		HFC_outb(hc, A_CON_HDLC, flow_rx | 0x04);
-		if (hc->ctype == HFC_TYPE_E1 || hc->chan[ch].bch)
-			HFC_outb(hc, A_SUBCH_CFG, 0); /* full 8 bits */
-		else
-			HFC_outb(hc, A_SUBCH_CFG, 2); /* 2 bits dchannel */
-		HFC_outb(hc, A_IRQ_MSK, V_IRQ);
-		HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
-		HFC_wait(hc);
-		if (hc->chan[ch].bch) {
-			test_and_set_bit(FLG_HDLC, &hc->chan[ch].bch->Flags);
-			if (hc->ctype != HFC_TYPE_E1) {
-				hc->hw.a_st_ctrl0[hc->chan[ch].port] |=
-					((ch & 0x3) == 0) ? V_B1_EN : V_B2_EN;
-				HFC_outb(hc, R_ST_SEL, hc->chan[ch].port);
-				/* undocumented: delay after R_ST_SEL */
-				udelay(1);
-				HFC_outb(hc, A_ST_CTRL0,
-					 hc->hw.a_st_ctrl0[hc->chan[ch].port]);
-			}
-		}
-		break;
-	default:
-		printk(KERN_DEBUG "%s: protocol not known %x\n",
-		       __func__, protocol);
-		hc->chan[ch].protocol = ISDN_P_NONE;
-		return -ENOPROTOOPT;
-	}
-	hc->chan[ch].protocol = protocol;
-	return 0;
-}
-
-
-/*
- * connect/disconnect PCM
- */
-
-static void
-hfcmulti_pcm(struct hfc_multi *hc, int ch, int slot_tx, int bank_tx,
-	     int slot_rx, int bank_rx)
-{
-	if (slot_tx < 0 || slot_rx < 0 || bank_tx < 0 || bank_rx < 0) {
-		/* disable PCM */
-		mode_hfcmulti(hc, ch, hc->chan[ch].protocol, -1, 0, -1, 0);
-		return;
-	}
-
-	/* enable pcm */
-	mode_hfcmulti(hc, ch, hc->chan[ch].protocol, slot_tx, bank_tx,
-		      slot_rx, bank_rx);
-}
-
-/*
- * set/disable conference
- */
-
-static void
-hfcmulti_conf(struct hfc_multi *hc, int ch, int num)
-{
-	if (num >= 0 && num <= 7)
-		hc->chan[ch].conf = num;
-	else
-		hc->chan[ch].conf = -1;
-	mode_hfcmulti(hc, ch, hc->chan[ch].protocol, hc->chan[ch].slot_tx,
-		      hc->chan[ch].bank_tx, hc->chan[ch].slot_rx,
-		      hc->chan[ch].bank_rx);
-}
-
-
-/*
- * set/disable sample loop
- */
-
-/* NOTE: this function is experimental and therefore disabled */
-
-/*
- * Layer 1 callback function
- */
-static int
-hfcm_l1callback(struct dchannel *dch, u_int cmd)
-{
-	struct hfc_multi	*hc = dch->hw;
-	struct sk_buff_head	free_queue;
-	u_long	flags;
-
-	switch (cmd) {
-	case INFO3_P8:
-	case INFO3_P10:
-		break;
-	case HW_RESET_REQ:
-		/* start activation */
-		spin_lock_irqsave(&hc->lock, flags);
-		if (hc->ctype == HFC_TYPE_E1) {
-			if (debug & DEBUG_HFCMULTI_MSG)
-				printk(KERN_DEBUG
-				       "%s: HW_RESET_REQ no BRI\n",
-				       __func__);
-		} else {
-			HFC_outb(hc, R_ST_SEL, hc->chan[dch->slot].port);
-			/* undocumented: delay after R_ST_SEL */
-			udelay(1);
-			HFC_outb(hc, A_ST_WR_STATE, V_ST_LD_STA | 3); /* F3 */
-			udelay(6); /* wait at least 5,21us */
-			HFC_outb(hc, A_ST_WR_STATE, 3);
-			HFC_outb(hc, A_ST_WR_STATE, 3 | (V_ST_ACT * 3));
-			/* activate */
-		}
-		spin_unlock_irqrestore(&hc->lock, flags);
-		l1_event(dch->l1, HW_POWERUP_IND);
-		break;
-	case HW_DEACT_REQ:
-		__skb_queue_head_init(&free_queue);
-		/* start deactivation */
-		spin_lock_irqsave(&hc->lock, flags);
-		if (hc->ctype == HFC_TYPE_E1) {
-			if (debug & DEBUG_HFCMULTI_MSG)
-				printk(KERN_DEBUG
-				       "%s: HW_DEACT_REQ no BRI\n",
-				       __func__);
-		} else {
-			HFC_outb(hc, R_ST_SEL, hc->chan[dch->slot].port);
-			/* undocumented: delay after R_ST_SEL */
-			udelay(1);
-			HFC_outb(hc, A_ST_WR_STATE, V_ST_ACT * 2);
-			/* deactivate */
-			if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-				hc->syncronized &=
-					~(1 << hc->chan[dch->slot].port);
-				plxsd_checksync(hc, 0);
-			}
-		}
-		skb_queue_splice_init(&dch->squeue, &free_queue);
-		if (dch->tx_skb) {
-			__skb_queue_tail(&free_queue, dch->tx_skb);
-			dch->tx_skb = NULL;
-		}
-		dch->tx_idx = 0;
-		if (dch->rx_skb) {
-			__skb_queue_tail(&free_queue, dch->rx_skb);
-			dch->rx_skb = NULL;
-		}
-		test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
-		if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags))
-			timer_delete(&dch->timer);
-		spin_unlock_irqrestore(&hc->lock, flags);
-		__skb_queue_purge(&free_queue);
-		break;
-	case HW_POWERUP_REQ:
-		spin_lock_irqsave(&hc->lock, flags);
-		if (hc->ctype == HFC_TYPE_E1) {
-			if (debug & DEBUG_HFCMULTI_MSG)
-				printk(KERN_DEBUG
-				       "%s: HW_POWERUP_REQ no BRI\n",
-				       __func__);
-		} else {
-			HFC_outb(hc, R_ST_SEL, hc->chan[dch->slot].port);
-			/* undocumented: delay after R_ST_SEL */
-			udelay(1);
-			HFC_outb(hc, A_ST_WR_STATE, 3 | 0x10); /* activate */
-			udelay(6); /* wait at least 5,21us */
-			HFC_outb(hc, A_ST_WR_STATE, 3); /* activate */
-		}
-		spin_unlock_irqrestore(&hc->lock, flags);
-		break;
-	case PH_ACTIVATE_IND:
-		test_and_set_bit(FLG_ACTIVE, &dch->Flags);
-		_queue_data(&dch->dev.D, cmd, MISDN_ID_ANY, 0, NULL,
-			    GFP_ATOMIC);
-		break;
-	case PH_DEACTIVATE_IND:
-		test_and_clear_bit(FLG_ACTIVE, &dch->Flags);
-		_queue_data(&dch->dev.D, cmd, MISDN_ID_ANY, 0, NULL,
-			    GFP_ATOMIC);
-		break;
-	default:
-		if (dch->debug & DEBUG_HW)
-			printk(KERN_DEBUG "%s: unknown command %x\n",
-			       __func__, cmd);
-		return -1;
-	}
-	return 0;
-}
-
-/*
- * Layer2 -> Layer 1 Transfer
- */
-
-static int
-handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
-	struct hfc_multi	*hc = dch->hw;
-	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
-	int			ret = -EINVAL;
-	unsigned int		id;
-	u_long			flags;
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		if (skb->len < 1)
-			break;
-		spin_lock_irqsave(&hc->lock, flags);
-		ret = dchannel_senddata(dch, skb);
-		if (ret > 0) { /* direct TX */
-			id = hh->id; /* skb can be freed */
-			hfcmulti_tx(hc, dch->slot);
-			ret = 0;
-			/* start fifo */
-			HFC_outb(hc, R_FIFO, 0);
-			HFC_wait(hc);
-			spin_unlock_irqrestore(&hc->lock, flags);
-			queue_ch_frame(ch, PH_DATA_CNF, id, NULL);
-		} else
-			spin_unlock_irqrestore(&hc->lock, flags);
-		return ret;
-	case PH_ACTIVATE_REQ:
-		if (dch->dev.D.protocol != ISDN_P_TE_S0) {
-			spin_lock_irqsave(&hc->lock, flags);
-			ret = 0;
-			if (debug & DEBUG_HFCMULTI_MSG)
-				printk(KERN_DEBUG
-				       "%s: PH_ACTIVATE port %d (0..%d)\n",
-				       __func__, hc->chan[dch->slot].port,
-				       hc->ports - 1);
-			/* start activation */
-			if (hc->ctype == HFC_TYPE_E1) {
-				ph_state_change(dch);
-				if (debug & DEBUG_HFCMULTI_STATE)
-					printk(KERN_DEBUG
-					       "%s: E1 report state %x \n",
-					       __func__, dch->state);
-			} else {
-				HFC_outb(hc, R_ST_SEL,
-					 hc->chan[dch->slot].port);
-				/* undocumented: delay after R_ST_SEL */
-				udelay(1);
-				HFC_outb(hc, A_ST_WR_STATE, V_ST_LD_STA | 1);
-				/* G1 */
-				udelay(6); /* wait at least 5,21us */
-				HFC_outb(hc, A_ST_WR_STATE, 1);
-				HFC_outb(hc, A_ST_WR_STATE, 1 |
-					 (V_ST_ACT * 3)); /* activate */
-				dch->state = 1;
-			}
-			spin_unlock_irqrestore(&hc->lock, flags);
-		} else
-			ret = l1_event(dch->l1, hh->prim);
-		break;
-	case PH_DEACTIVATE_REQ:
-		test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags);
-		if (dch->dev.D.protocol != ISDN_P_TE_S0) {
-			struct sk_buff_head free_queue;
-
-			__skb_queue_head_init(&free_queue);
-			spin_lock_irqsave(&hc->lock, flags);
-			if (debug & DEBUG_HFCMULTI_MSG)
-				printk(KERN_DEBUG
-				       "%s: PH_DEACTIVATE port %d (0..%d)\n",
-				       __func__, hc->chan[dch->slot].port,
-				       hc->ports - 1);
-			/* start deactivation */
-			if (hc->ctype == HFC_TYPE_E1) {
-				if (debug & DEBUG_HFCMULTI_MSG)
-					printk(KERN_DEBUG
-					       "%s: PH_DEACTIVATE no BRI\n",
-					       __func__);
-			} else {
-				HFC_outb(hc, R_ST_SEL,
-					 hc->chan[dch->slot].port);
-				/* undocumented: delay after R_ST_SEL */
-				udelay(1);
-				HFC_outb(hc, A_ST_WR_STATE, V_ST_ACT * 2);
-				/* deactivate */
-				dch->state = 1;
-			}
-			skb_queue_splice_init(&dch->squeue, &free_queue);
-			if (dch->tx_skb) {
-				__skb_queue_tail(&free_queue, dch->tx_skb);
-				dch->tx_skb = NULL;
-			}
-			dch->tx_idx = 0;
-			if (dch->rx_skb) {
-				__skb_queue_tail(&free_queue, dch->rx_skb);
-				dch->rx_skb = NULL;
-			}
-			test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
-			if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags))
-				timer_delete(&dch->timer);
-#ifdef FIXME
-			if (test_and_clear_bit(FLG_L1_BUSY, &dch->Flags))
-				dchannel_sched_event(&hc->dch, D_CLEARBUSY);
-#endif
-			ret = 0;
-			spin_unlock_irqrestore(&hc->lock, flags);
-			__skb_queue_purge(&free_queue);
-		} else
-			ret = l1_event(dch->l1, hh->prim);
-		break;
-	}
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-static void
-deactivate_bchannel(struct bchannel *bch)
-{
-	struct hfc_multi	*hc = bch->hw;
-	u_long			flags;
-
-	spin_lock_irqsave(&hc->lock, flags);
-	mISDN_clear_bchannel(bch);
-	hc->chan[bch->slot].coeff_count = 0;
-	hc->chan[bch->slot].rx_off = 0;
-	hc->chan[bch->slot].conf = -1;
-	mode_hfcmulti(hc, bch->slot, ISDN_P_NONE, -1, 0, -1, 0);
-	spin_unlock_irqrestore(&hc->lock, flags);
-}
-
-static int
-handle_bmsg(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct bchannel		*bch = container_of(ch, struct bchannel, ch);
-	struct hfc_multi	*hc = bch->hw;
-	int			ret = -EINVAL;
-	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
-	unsigned long		flags;
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		if (!skb->len)
-			break;
-		spin_lock_irqsave(&hc->lock, flags);
-		ret = bchannel_senddata(bch, skb);
-		if (ret > 0) { /* direct TX */
-			hfcmulti_tx(hc, bch->slot);
-			ret = 0;
-			/* start fifo */
-			HFC_outb_nodebug(hc, R_FIFO, 0);
-			HFC_wait_nodebug(hc);
-		}
-		spin_unlock_irqrestore(&hc->lock, flags);
-		return ret;
-	case PH_ACTIVATE_REQ:
-		if (debug & DEBUG_HFCMULTI_MSG)
-			printk(KERN_DEBUG "%s: PH_ACTIVATE ch %d (0..32)\n",
-			       __func__, bch->slot);
-		spin_lock_irqsave(&hc->lock, flags);
-		/* activate B-channel if not already activated */
-		if (!test_and_set_bit(FLG_ACTIVE, &bch->Flags)) {
-			hc->chan[bch->slot].txpending = 0;
-			ret = mode_hfcmulti(hc, bch->slot,
-					    ch->protocol,
-					    hc->chan[bch->slot].slot_tx,
-					    hc->chan[bch->slot].bank_tx,
-					    hc->chan[bch->slot].slot_rx,
-					    hc->chan[bch->slot].bank_rx);
-			if (!ret) {
-				if (ch->protocol == ISDN_P_B_RAW && !hc->dtmf
-				    && test_bit(HFC_CHIP_DTMF, &hc->chip)) {
-					/* start decoder */
-					hc->dtmf = 1;
-					if (debug & DEBUG_HFCMULTI_DTMF)
-						printk(KERN_DEBUG
-						       "%s: start dtmf decoder\n",
-						       __func__);
-					HFC_outb(hc, R_DTMF, hc->hw.r_dtmf |
-						 V_RST_DTMF);
-				}
-			}
-		} else
-			ret = 0;
-		spin_unlock_irqrestore(&hc->lock, flags);
-		if (!ret)
-			_queue_data(ch, PH_ACTIVATE_IND, MISDN_ID_ANY, 0, NULL,
-				    GFP_KERNEL);
-		break;
-	case PH_CONTROL_REQ:
-		spin_lock_irqsave(&hc->lock, flags);
-		switch (hh->id) {
-		case HFC_SPL_LOOP_ON: /* set sample loop */
-			if (debug & DEBUG_HFCMULTI_MSG)
-				printk(KERN_DEBUG
-				       "%s: HFC_SPL_LOOP_ON (len = %d)\n",
-				       __func__, skb->len);
-			ret = 0;
-			break;
-		case HFC_SPL_LOOP_OFF: /* set silence */
-			if (debug & DEBUG_HFCMULTI_MSG)
-				printk(KERN_DEBUG "%s: HFC_SPL_LOOP_OFF\n",
-				       __func__);
-			ret = 0;
-			break;
-		default:
-			printk(KERN_ERR
-			       "%s: unknown PH_CONTROL_REQ info %x\n",
-			       __func__, hh->id);
-			ret = -EINVAL;
-		}
-		spin_unlock_irqrestore(&hc->lock, flags);
-		break;
-	case PH_DEACTIVATE_REQ:
-		deactivate_bchannel(bch); /* locked there */
-		_queue_data(ch, PH_DEACTIVATE_IND, MISDN_ID_ANY, 0, NULL,
-			    GFP_KERNEL);
-		ret = 0;
-		break;
-	}
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-/*
- * bchannel control function
- */
-static int
-channel_bctrl(struct bchannel *bch, struct mISDN_ctrl_req *cq)
-{
-	int			ret = 0;
-	struct dsp_features	*features =
-		(struct dsp_features *)(*((u_long *)&cq->p1));
-	struct hfc_multi	*hc = bch->hw;
-	int			slot_tx;
-	int			bank_tx;
-	int			slot_rx;
-	int			bank_rx;
-	int			num;
-
-	switch (cq->op) {
-	case MISDN_CTRL_GETOP:
-		ret = mISDN_ctrl_bchannel(bch, cq);
-		cq->op |= MISDN_CTRL_HFC_OP | MISDN_CTRL_HW_FEATURES_OP;
-		break;
-	case MISDN_CTRL_RX_OFF: /* turn off / on rx stream */
-		ret = mISDN_ctrl_bchannel(bch, cq);
-		hc->chan[bch->slot].rx_off = !!cq->p1;
-		if (!hc->chan[bch->slot].rx_off) {
-			/* reset fifo on rx on */
-			HFC_outb_nodebug(hc, R_FIFO, (bch->slot << 1) | 1);
-			HFC_wait_nodebug(hc);
-			HFC_outb_nodebug(hc, R_INC_RES_FIFO, V_RES_F);
-			HFC_wait_nodebug(hc);
-		}
-		if (debug & DEBUG_HFCMULTI_MSG)
-			printk(KERN_DEBUG "%s: RX_OFF request (nr=%d off=%d)\n",
-			       __func__, bch->nr, hc->chan[bch->slot].rx_off);
-		break;
-	case MISDN_CTRL_FILL_EMPTY:
-		ret = mISDN_ctrl_bchannel(bch, cq);
-		hc->silence = bch->fill[0];
-		memset(hc->silence_data, hc->silence, sizeof(hc->silence_data));
-		break;
-	case MISDN_CTRL_HW_FEATURES: /* fill features structure */
-		if (debug & DEBUG_HFCMULTI_MSG)
-			printk(KERN_DEBUG "%s: HW_FEATURE request\n",
-			       __func__);
-		/* create confirm */
-		features->hfc_id = hc->id;
-		if (test_bit(HFC_CHIP_DTMF, &hc->chip))
-			features->hfc_dtmf = 1;
-		if (test_bit(HFC_CHIP_CONF, &hc->chip))
-			features->hfc_conf = 1;
-		features->hfc_loops = 0;
-		if (test_bit(HFC_CHIP_B410P, &hc->chip)) {
-			features->hfc_echocanhw = 1;
-		} else {
-			features->pcm_id = hc->pcm;
-			features->pcm_slots = hc->slots;
-			features->pcm_banks = 2;
-		}
-		break;
-	case MISDN_CTRL_HFC_PCM_CONN: /* connect to pcm timeslot (0..N) */
-		slot_tx = cq->p1 & 0xff;
-		bank_tx = cq->p1 >> 8;
-		slot_rx = cq->p2 & 0xff;
-		bank_rx = cq->p2 >> 8;
-		if (debug & DEBUG_HFCMULTI_MSG)
-			printk(KERN_DEBUG
-			       "%s: HFC_PCM_CONN slot %d bank %d (TX) "
-			       "slot %d bank %d (RX)\n",
-			       __func__, slot_tx, bank_tx,
-			       slot_rx, bank_rx);
-		if (slot_tx < hc->slots && bank_tx <= 2 &&
-		    slot_rx < hc->slots && bank_rx <= 2)
-			hfcmulti_pcm(hc, bch->slot,
-				     slot_tx, bank_tx, slot_rx, bank_rx);
-		else {
-			printk(KERN_WARNING
-			       "%s: HFC_PCM_CONN slot %d bank %d (TX) "
-			       "slot %d bank %d (RX) out of range\n",
-			       __func__, slot_tx, bank_tx,
-			       slot_rx, bank_rx);
-			ret = -EINVAL;
-		}
-		break;
-	case MISDN_CTRL_HFC_PCM_DISC: /* release interface from pcm timeslot */
-		if (debug & DEBUG_HFCMULTI_MSG)
-			printk(KERN_DEBUG "%s: HFC_PCM_DISC\n",
-			       __func__);
-		hfcmulti_pcm(hc, bch->slot, -1, 0, -1, 0);
-		break;
-	case MISDN_CTRL_HFC_CONF_JOIN: /* join conference (0..7) */
-		num = cq->p1 & 0xff;
-		if (debug & DEBUG_HFCMULTI_MSG)
-			printk(KERN_DEBUG "%s: HFC_CONF_JOIN conf %d\n",
-			       __func__, num);
-		if (num <= 7)
-			hfcmulti_conf(hc, bch->slot, num);
-		else {
-			printk(KERN_WARNING
-			       "%s: HW_CONF_JOIN conf %d out of range\n",
-			       __func__, num);
-			ret = -EINVAL;
-		}
-		break;
-	case MISDN_CTRL_HFC_CONF_SPLIT: /* split conference */
-		if (debug & DEBUG_HFCMULTI_MSG)
-			printk(KERN_DEBUG "%s: HFC_CONF_SPLIT\n", __func__);
-		hfcmulti_conf(hc, bch->slot, -1);
-		break;
-	case MISDN_CTRL_HFC_ECHOCAN_ON:
-		if (debug & DEBUG_HFCMULTI_MSG)
-			printk(KERN_DEBUG "%s: HFC_ECHOCAN_ON\n", __func__);
-		if (test_bit(HFC_CHIP_B410P, &hc->chip))
-			vpm_echocan_on(hc, bch->slot, cq->p1);
-		else
-			ret = -EINVAL;
-		break;
-
-	case MISDN_CTRL_HFC_ECHOCAN_OFF:
-		if (debug & DEBUG_HFCMULTI_MSG)
-			printk(KERN_DEBUG "%s: HFC_ECHOCAN_OFF\n",
-			       __func__);
-		if (test_bit(HFC_CHIP_B410P, &hc->chip))
-			vpm_echocan_off(hc, bch->slot);
-		else
-			ret = -EINVAL;
-		break;
-	default:
-		ret = mISDN_ctrl_bchannel(bch, cq);
-		break;
-	}
-	return ret;
-}
-
-static int
-hfcm_bctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
-{
-	struct bchannel		*bch = container_of(ch, struct bchannel, ch);
-	struct hfc_multi	*hc = bch->hw;
-	int			err = -EINVAL;
-	u_long	flags;
-
-	if (bch->debug & DEBUG_HW)
-		printk(KERN_DEBUG "%s: cmd:%x %p\n",
-		       __func__, cmd, arg);
-	switch (cmd) {
-	case CLOSE_CHANNEL:
-		test_and_clear_bit(FLG_OPEN, &bch->Flags);
-		deactivate_bchannel(bch); /* locked there */
-		ch->protocol = ISDN_P_NONE;
-		ch->peer = NULL;
-		module_put(THIS_MODULE);
-		err = 0;
-		break;
-	case CONTROL_CHANNEL:
-		spin_lock_irqsave(&hc->lock, flags);
-		err = channel_bctrl(bch, arg);
-		spin_unlock_irqrestore(&hc->lock, flags);
-		break;
-	default:
-		printk(KERN_WARNING "%s: unknown prim(%x)\n",
-		       __func__, cmd);
-	}
-	return err;
-}
-
-/*
- * handle D-channel events
- *
- * handle state change event
- */
-static void
-ph_state_change(struct dchannel *dch)
-{
-	struct hfc_multi *hc;
-	int ch, i;
-
-	if (!dch) {
-		printk(KERN_WARNING "%s: ERROR given dch is NULL\n", __func__);
-		return;
-	}
-	hc = dch->hw;
-	ch = dch->slot;
-
-	if (hc->ctype == HFC_TYPE_E1) {
-		if (dch->dev.D.protocol == ISDN_P_TE_E1) {
-			if (debug & DEBUG_HFCMULTI_STATE)
-				printk(KERN_DEBUG
-				       "%s: E1 TE (id=%d) newstate %x\n",
-				       __func__, hc->id, dch->state);
-		} else {
-			if (debug & DEBUG_HFCMULTI_STATE)
-				printk(KERN_DEBUG
-				       "%s: E1 NT (id=%d) newstate %x\n",
-				       __func__, hc->id, dch->state);
-		}
-		switch (dch->state) {
-		case (1):
-			if (hc->e1_state != 1) {
-				for (i = 1; i <= 31; i++) {
-					/* reset fifos on e1 activation */
-					HFC_outb_nodebug(hc, R_FIFO,
-							 (i << 1) | 1);
-					HFC_wait_nodebug(hc);
-					HFC_outb_nodebug(hc, R_INC_RES_FIFO,
-							 V_RES_F);
-					HFC_wait_nodebug(hc);
-				}
-			}
-			test_and_set_bit(FLG_ACTIVE, &dch->Flags);
-			_queue_data(&dch->dev.D, PH_ACTIVATE_IND,
-				    MISDN_ID_ANY, 0, NULL, GFP_ATOMIC);
-			break;
-
-		default:
-			if (hc->e1_state != 1)
-				return;
-			test_and_clear_bit(FLG_ACTIVE, &dch->Flags);
-			_queue_data(&dch->dev.D, PH_DEACTIVATE_IND,
-				    MISDN_ID_ANY, 0, NULL, GFP_ATOMIC);
-		}
-		hc->e1_state = dch->state;
-	} else {
-		if (dch->dev.D.protocol == ISDN_P_TE_S0) {
-			if (debug & DEBUG_HFCMULTI_STATE)
-				printk(KERN_DEBUG
-				       "%s: S/T TE newstate %x\n",
-				       __func__, dch->state);
-			switch (dch->state) {
-			case (0):
-				l1_event(dch->l1, HW_RESET_IND);
-				break;
-			case (3):
-				l1_event(dch->l1, HW_DEACT_IND);
-				break;
-			case (5):
-			case (8):
-				l1_event(dch->l1, ANYSIGNAL);
-				break;
-			case (6):
-				l1_event(dch->l1, INFO2);
-				break;
-			case (7):
-				l1_event(dch->l1, INFO4_P8);
-				break;
-			}
-		} else {
-			if (debug & DEBUG_HFCMULTI_STATE)
-				printk(KERN_DEBUG "%s: S/T NT newstate %x\n",
-				       __func__, dch->state);
-			switch (dch->state) {
-			case (2):
-				if (hc->chan[ch].nt_timer == 0) {
-					hc->chan[ch].nt_timer = -1;
-					HFC_outb(hc, R_ST_SEL,
-						 hc->chan[ch].port);
-					/* undocumented: delay after R_ST_SEL */
-					udelay(1);
-					HFC_outb(hc, A_ST_WR_STATE, 4 |
-						 V_ST_LD_STA); /* G4 */
-					udelay(6); /* wait at least 5,21us */
-					HFC_outb(hc, A_ST_WR_STATE, 4);
-					dch->state = 4;
-				} else {
-					/* one extra count for the next event */
-					hc->chan[ch].nt_timer =
-						nt_t1_count[poll_timer] + 1;
-					HFC_outb(hc, R_ST_SEL,
-						 hc->chan[ch].port);
-					/* undocumented: delay after R_ST_SEL */
-					udelay(1);
-					/* allow G2 -> G3 transition */
-					HFC_outb(hc, A_ST_WR_STATE, 2 |
-						 V_SET_G2_G3);
-				}
-				break;
-			case (1):
-				hc->chan[ch].nt_timer = -1;
-				test_and_clear_bit(FLG_ACTIVE, &dch->Flags);
-				_queue_data(&dch->dev.D, PH_DEACTIVATE_IND,
-					    MISDN_ID_ANY, 0, NULL, GFP_ATOMIC);
-				break;
-			case (4):
-				hc->chan[ch].nt_timer = -1;
-				break;
-			case (3):
-				hc->chan[ch].nt_timer = -1;
-				test_and_set_bit(FLG_ACTIVE, &dch->Flags);
-				_queue_data(&dch->dev.D, PH_ACTIVATE_IND,
-					    MISDN_ID_ANY, 0, NULL, GFP_ATOMIC);
-				break;
-			}
-		}
-	}
-}
-
-/*
- * called for card mode init message
- */
-
-static void
-hfcmulti_initmode(struct dchannel *dch)
-{
-	struct hfc_multi *hc = dch->hw;
-	u_char		a_st_wr_state, r_e1_wr_sta;
-	int		i, pt;
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: entered\n", __func__);
-
-	i = dch->slot;
-	pt = hc->chan[i].port;
-	if (hc->ctype == HFC_TYPE_E1) {
-		/* E1 */
-		hc->chan[hc->dnum[pt]].slot_tx = -1;
-		hc->chan[hc->dnum[pt]].slot_rx = -1;
-		hc->chan[hc->dnum[pt]].conf = -1;
-		if (hc->dnum[pt]) {
-			mode_hfcmulti(hc, dch->slot, dch->dev.D.protocol,
-				      -1, 0, -1, 0);
-			timer_setup(&dch->timer, hfcmulti_dbusy_timer, 0);
-		}
-		for (i = 1; i <= 31; i++) {
-			if (!((1 << i) & hc->bmask[pt])) /* skip unused chan */
-				continue;
-			hc->chan[i].slot_tx = -1;
-			hc->chan[i].slot_rx = -1;
-			hc->chan[i].conf = -1;
-			mode_hfcmulti(hc, i, ISDN_P_NONE, -1, 0, -1, 0);
-		}
-	}
-	if (hc->ctype == HFC_TYPE_E1 && pt == 0) {
-		/* E1, port 0 */
-		dch = hc->chan[hc->dnum[0]].dch;
-		if (test_bit(HFC_CFG_REPORT_LOS, &hc->chan[hc->dnum[0]].cfg)) {
-			HFC_outb(hc, R_LOS0, 255); /* 2 ms */
-			HFC_outb(hc, R_LOS1, 255); /* 512 ms */
-		}
-		if (test_bit(HFC_CFG_OPTICAL, &hc->chan[hc->dnum[0]].cfg)) {
-			HFC_outb(hc, R_RX0, 0);
-			hc->hw.r_tx0 = 0 | V_OUT_EN;
-		} else {
-			HFC_outb(hc, R_RX0, 1);
-			hc->hw.r_tx0 = 1 | V_OUT_EN;
-		}
-		hc->hw.r_tx1 = V_ATX | V_NTRI;
-		HFC_outb(hc, R_TX0, hc->hw.r_tx0);
-		HFC_outb(hc, R_TX1, hc->hw.r_tx1);
-		HFC_outb(hc, R_TX_FR0, 0x00);
-		HFC_outb(hc, R_TX_FR1, 0xf8);
-
-		if (test_bit(HFC_CFG_CRC4, &hc->chan[hc->dnum[0]].cfg))
-			HFC_outb(hc, R_TX_FR2, V_TX_MF | V_TX_E | V_NEG_E);
-
-		HFC_outb(hc, R_RX_FR0, V_AUTO_RESYNC | V_AUTO_RECO | 0);
-
-		if (test_bit(HFC_CFG_CRC4, &hc->chan[hc->dnum[0]].cfg))
-			HFC_outb(hc, R_RX_FR1, V_RX_MF | V_RX_MF_SYNC);
-
-		if (dch->dev.D.protocol == ISDN_P_NT_E1) {
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG "%s: E1 port is NT-mode\n",
-				       __func__);
-			r_e1_wr_sta = 0; /* G0 */
-			hc->e1_getclock = 0;
-		} else {
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG "%s: E1 port is TE-mode\n",
-				       __func__);
-			r_e1_wr_sta = 0; /* F0 */
-			hc->e1_getclock = 1;
-		}
-		if (test_bit(HFC_CHIP_RX_SYNC, &hc->chip))
-			HFC_outb(hc, R_SYNC_OUT, V_SYNC_E1_RX);
-		else
-			HFC_outb(hc, R_SYNC_OUT, 0);
-		if (test_bit(HFC_CHIP_E1CLOCK_GET, &hc->chip))
-			hc->e1_getclock = 1;
-		if (test_bit(HFC_CHIP_E1CLOCK_PUT, &hc->chip))
-			hc->e1_getclock = 0;
-		if (test_bit(HFC_CHIP_PCM_SLAVE, &hc->chip)) {
-			/* SLAVE (clock master) */
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG
-				       "%s: E1 port is clock master "
-				       "(clock from PCM)\n", __func__);
-			HFC_outb(hc, R_SYNC_CTRL, V_EXT_CLK_SYNC | V_PCM_SYNC);
-		} else {
-			if (hc->e1_getclock) {
-				/* MASTER (clock slave) */
-				if (debug & DEBUG_HFCMULTI_INIT)
-					printk(KERN_DEBUG
-					       "%s: E1 port is clock slave "
-					       "(clock to PCM)\n", __func__);
-				HFC_outb(hc, R_SYNC_CTRL, V_SYNC_OFFS);
-			} else {
-				/* MASTER (clock master) */
-				if (debug & DEBUG_HFCMULTI_INIT)
-					printk(KERN_DEBUG "%s: E1 port is "
-					       "clock master "
-					       "(clock from QUARTZ)\n",
-					       __func__);
-				HFC_outb(hc, R_SYNC_CTRL, V_EXT_CLK_SYNC |
-					 V_PCM_SYNC | V_JATT_OFF);
-				HFC_outb(hc, R_SYNC_OUT, 0);
-			}
-		}
-		HFC_outb(hc, R_JATT_ATT, 0x9c); /* undoc register */
-		HFC_outb(hc, R_PWM_MD, V_PWM0_MD);
-		HFC_outb(hc, R_PWM0, 0x50);
-		HFC_outb(hc, R_PWM1, 0xff);
-		/* state machine setup */
-		HFC_outb(hc, R_E1_WR_STA, r_e1_wr_sta | V_E1_LD_STA);
-		udelay(6); /* wait at least 5,21us */
-		HFC_outb(hc, R_E1_WR_STA, r_e1_wr_sta);
-		if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-			hc->syncronized = 0;
-			plxsd_checksync(hc, 0);
-		}
-	}
-	if (hc->ctype != HFC_TYPE_E1) {
-		/* ST */
-		hc->chan[i].slot_tx = -1;
-		hc->chan[i].slot_rx = -1;
-		hc->chan[i].conf = -1;
-		mode_hfcmulti(hc, i, dch->dev.D.protocol, -1, 0, -1, 0);
-		timer_setup(&dch->timer, hfcmulti_dbusy_timer, 0);
-		hc->chan[i - 2].slot_tx = -1;
-		hc->chan[i - 2].slot_rx = -1;
-		hc->chan[i - 2].conf = -1;
-		mode_hfcmulti(hc, i - 2, ISDN_P_NONE, -1, 0, -1, 0);
-		hc->chan[i - 1].slot_tx = -1;
-		hc->chan[i - 1].slot_rx = -1;
-		hc->chan[i - 1].conf = -1;
-		mode_hfcmulti(hc, i - 1, ISDN_P_NONE, -1, 0, -1, 0);
-		/* select interface */
-		HFC_outb(hc, R_ST_SEL, pt);
-		/* undocumented: delay after R_ST_SEL */
-		udelay(1);
-		if (dch->dev.D.protocol == ISDN_P_NT_S0) {
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG
-				       "%s: ST port %d is NT-mode\n",
-				       __func__, pt);
-			/* clock delay */
-			HFC_outb(hc, A_ST_CLK_DLY, clockdelay_nt);
-			a_st_wr_state = 1; /* G1 */
-			hc->hw.a_st_ctrl0[pt] = V_ST_MD;
-		} else {
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG
-				       "%s: ST port %d is TE-mode\n",
-				       __func__, pt);
-			/* clock delay */
-			HFC_outb(hc, A_ST_CLK_DLY, clockdelay_te);
-			a_st_wr_state = 2; /* F2 */
-			hc->hw.a_st_ctrl0[pt] = 0;
-		}
-		if (!test_bit(HFC_CFG_NONCAP_TX, &hc->chan[i].cfg))
-			hc->hw.a_st_ctrl0[pt] |= V_TX_LI;
-		if (hc->ctype == HFC_TYPE_XHFC) {
-			hc->hw.a_st_ctrl0[pt] |= 0x40 /* V_ST_PU_CTRL */;
-			HFC_outb(hc, 0x35 /* A_ST_CTRL3 */,
-				 0x7c << 1 /* V_ST_PULSE */);
-		}
-		/* line setup */
-		HFC_outb(hc, A_ST_CTRL0,  hc->hw.a_st_ctrl0[pt]);
-		/* disable E-channel */
-		if ((dch->dev.D.protocol == ISDN_P_NT_S0) ||
-		    test_bit(HFC_CFG_DIS_ECHANNEL, &hc->chan[i].cfg))
-			HFC_outb(hc, A_ST_CTRL1, V_E_IGNO);
-		else
-			HFC_outb(hc, A_ST_CTRL1, 0);
-		/* enable B-channel receive */
-		HFC_outb(hc, A_ST_CTRL2,  V_B1_RX_EN | V_B2_RX_EN);
-		/* state machine setup */
-		HFC_outb(hc, A_ST_WR_STATE, a_st_wr_state | V_ST_LD_STA);
-		udelay(6); /* wait at least 5,21us */
-		HFC_outb(hc, A_ST_WR_STATE, a_st_wr_state);
-		hc->hw.r_sci_msk |= 1 << pt;
-		/* state machine interrupts */
-		HFC_outb(hc, R_SCI_MSK, hc->hw.r_sci_msk);
-		/* unset sync on port */
-		if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-			hc->syncronized &=
-				~(1 << hc->chan[dch->slot].port);
-			plxsd_checksync(hc, 0);
-		}
-	}
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk("%s: done\n", __func__);
-}
-
-
-static int
-open_dchannel(struct hfc_multi *hc, struct dchannel *dch,
-	      struct channel_req *rq)
-{
-	int	err = 0;
-	u_long	flags;
-
-	if (debug & DEBUG_HW_OPEN)
-		printk(KERN_DEBUG "%s: dev(%d) open from %p\n", __func__,
-		       dch->dev.id, __builtin_return_address(0));
-	if (rq->protocol == ISDN_P_NONE)
-		return -EINVAL;
-	if ((dch->dev.D.protocol != ISDN_P_NONE) &&
-	    (dch->dev.D.protocol != rq->protocol)) {
-		if (debug & DEBUG_HFCMULTI_MODE)
-			printk(KERN_DEBUG "%s: change protocol %x to %x\n",
-			       __func__, dch->dev.D.protocol, rq->protocol);
-	}
-	if ((dch->dev.D.protocol == ISDN_P_TE_S0) &&
-	    (rq->protocol != ISDN_P_TE_S0))
-		l1_event(dch->l1, CLOSE_CHANNEL);
-	if (dch->dev.D.protocol != rq->protocol) {
-		if (rq->protocol == ISDN_P_TE_S0) {
-			err = create_l1(dch, hfcm_l1callback);
-			if (err)
-				return err;
-		}
-		dch->dev.D.protocol = rq->protocol;
-		spin_lock_irqsave(&hc->lock, flags);
-		hfcmulti_initmode(dch);
-		spin_unlock_irqrestore(&hc->lock, flags);
-	}
-	if (test_bit(FLG_ACTIVE, &dch->Flags))
-		_queue_data(&dch->dev.D, PH_ACTIVATE_IND, MISDN_ID_ANY,
-			    0, NULL, GFP_KERNEL);
-	rq->ch = &dch->dev.D;
-	if (!try_module_get(THIS_MODULE))
-		printk(KERN_WARNING "%s:cannot get module\n", __func__);
-	return 0;
-}
-
-static int
-open_bchannel(struct hfc_multi *hc, struct dchannel *dch,
-	      struct channel_req *rq)
-{
-	struct bchannel	*bch;
-	int		ch;
-
-	if (!test_channelmap(rq->adr.channel, dch->dev.channelmap))
-		return -EINVAL;
-	if (rq->protocol == ISDN_P_NONE)
-		return -EINVAL;
-	if (hc->ctype == HFC_TYPE_E1)
-		ch = rq->adr.channel;
-	else
-		ch = (rq->adr.channel - 1) + (dch->slot - 2);
-	bch = hc->chan[ch].bch;
-	if (!bch) {
-		printk(KERN_ERR "%s:internal error ch %d has no bch\n",
-		       __func__, ch);
-		return -EINVAL;
-	}
-	if (test_and_set_bit(FLG_OPEN, &bch->Flags))
-		return -EBUSY; /* b-channel can be only open once */
-	bch->ch.protocol = rq->protocol;
-	hc->chan[ch].rx_off = 0;
-	rq->ch = &bch->ch;
-	if (!try_module_get(THIS_MODULE))
-		printk(KERN_WARNING "%s:cannot get module\n", __func__);
-	return 0;
-}
-
-/*
- * device control function
- */
-static int
-channel_dctrl(struct dchannel *dch, struct mISDN_ctrl_req *cq)
-{
-	struct hfc_multi	*hc = dch->hw;
-	int	ret = 0;
-	int	wd_mode, wd_cnt;
-
-	switch (cq->op) {
-	case MISDN_CTRL_GETOP:
-		cq->op = MISDN_CTRL_HFC_OP | MISDN_CTRL_L1_TIMER3;
-		break;
-	case MISDN_CTRL_HFC_WD_INIT: /* init the watchdog */
-		wd_cnt = cq->p1 & 0xf;
-		wd_mode = !!(cq->p1 >> 4);
-		if (debug & DEBUG_HFCMULTI_MSG)
-			printk(KERN_DEBUG "%s: MISDN_CTRL_HFC_WD_INIT mode %s"
-			       ", counter 0x%x\n", __func__,
-			       wd_mode ? "AUTO" : "MANUAL", wd_cnt);
-		/* set the watchdog timer */
-		HFC_outb(hc, R_TI_WD, poll_timer | (wd_cnt << 4));
-		hc->hw.r_bert_wd_md = (wd_mode ? V_AUTO_WD_RES : 0);
-		if (hc->ctype == HFC_TYPE_XHFC)
-			hc->hw.r_bert_wd_md |= 0x40 /* V_WD_EN */;
-		/* init the watchdog register and reset the counter */
-		HFC_outb(hc, R_BERT_WD_MD, hc->hw.r_bert_wd_md | V_WD_RES);
-		if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-			/* enable the watchdog output for Speech-Design */
-			HFC_outb(hc, R_GPIO_SEL,  V_GPIO_SEL7);
-			HFC_outb(hc, R_GPIO_EN1,  V_GPIO_EN15);
-			HFC_outb(hc, R_GPIO_OUT1, 0);
-			HFC_outb(hc, R_GPIO_OUT1, V_GPIO_OUT15);
-		}
-		break;
-	case MISDN_CTRL_HFC_WD_RESET: /* reset the watchdog counter */
-		if (debug & DEBUG_HFCMULTI_MSG)
-			printk(KERN_DEBUG "%s: MISDN_CTRL_HFC_WD_RESET\n",
-			       __func__);
-		HFC_outb(hc, R_BERT_WD_MD, hc->hw.r_bert_wd_md | V_WD_RES);
-		break;
-	case MISDN_CTRL_L1_TIMER3:
-		ret = l1_event(dch->l1, HW_TIMER3_VALUE | (cq->p1 & 0xff));
-		break;
-	default:
-		printk(KERN_WARNING "%s: unknown Op %x\n",
-		       __func__, cq->op);
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static int
-hfcm_dctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
-{
-	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
-	struct hfc_multi	*hc = dch->hw;
-	struct channel_req	*rq;
-	int			err = 0;
-	u_long			flags;
-
-	if (dch->debug & DEBUG_HW)
-		printk(KERN_DEBUG "%s: cmd:%x %p\n",
-		       __func__, cmd, arg);
-	switch (cmd) {
-	case OPEN_CHANNEL:
-		rq = arg;
-		switch (rq->protocol) {
-		case ISDN_P_TE_S0:
-		case ISDN_P_NT_S0:
-			if (hc->ctype == HFC_TYPE_E1) {
-				err = -EINVAL;
-				break;
-			}
-			err = open_dchannel(hc, dch, rq); /* locked there */
-			break;
-		case ISDN_P_TE_E1:
-		case ISDN_P_NT_E1:
-			if (hc->ctype != HFC_TYPE_E1) {
-				err = -EINVAL;
-				break;
-			}
-			err = open_dchannel(hc, dch, rq); /* locked there */
-			break;
-		default:
-			spin_lock_irqsave(&hc->lock, flags);
-			err = open_bchannel(hc, dch, rq);
-			spin_unlock_irqrestore(&hc->lock, flags);
-		}
-		break;
-	case CLOSE_CHANNEL:
-		if (debug & DEBUG_HW_OPEN)
-			printk(KERN_DEBUG "%s: dev(%d) close from %p\n",
-			       __func__, dch->dev.id,
-			       __builtin_return_address(0));
-		module_put(THIS_MODULE);
-		break;
-	case CONTROL_CHANNEL:
-		spin_lock_irqsave(&hc->lock, flags);
-		err = channel_dctrl(dch, arg);
-		spin_unlock_irqrestore(&hc->lock, flags);
-		break;
-	default:
-		if (dch->debug & DEBUG_HW)
-			printk(KERN_DEBUG "%s: unknown command %x\n",
-			       __func__, cmd);
-		err = -EINVAL;
-	}
-	return err;
-}
-
-static int
-clockctl(void *priv, int enable)
-{
-	struct hfc_multi *hc = priv;
-
-	hc->iclock_on = enable;
-	return 0;
-}
-
-/*
- * initialize the card
- */
-
-/*
- * start timer irq, wait some time and check if we have interrupts.
- * if not, reset chip and try again.
- */
-static int
-init_card(struct hfc_multi *hc)
-{
-	int	err = -EIO;
-	u_long	flags;
-	void	__iomem *plx_acc;
-	u_long	plx_flags;
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: entered\n", __func__);
-
-	spin_lock_irqsave(&hc->lock, flags);
-	/* set interrupts but leave global interrupt disabled */
-	hc->hw.r_irq_ctrl = V_FIFO_IRQ;
-	disable_hwirq(hc);
-	spin_unlock_irqrestore(&hc->lock, flags);
-
-	if (request_irq(hc->irq, hfcmulti_interrupt, IRQF_SHARED,
-			"HFC-multi", hc)) {
-		printk(KERN_WARNING "mISDN: Could not get interrupt %d.\n",
-		       hc->irq);
-		hc->irq = 0;
-		return -EIO;
-	}
-
-	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-		spin_lock_irqsave(&plx_lock, plx_flags);
-		plx_acc = hc->plx_membase + PLX_INTCSR;
-		writew((PLX_INTCSR_PCIINT_ENABLE | PLX_INTCSR_LINTI1_ENABLE),
-		       plx_acc); /* enable PCI & LINT1 irq */
-		spin_unlock_irqrestore(&plx_lock, plx_flags);
-	}
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: IRQ %d count %d\n",
-		       __func__, hc->irq, hc->irqcnt);
-	err = init_chip(hc);
-	if (err)
-		goto error;
-	/*
-	 * Finally enable IRQ output
-	 * this is only allowed, if an IRQ routine is already
-	 * established for this HFC, so don't do that earlier
-	 */
-	spin_lock_irqsave(&hc->lock, flags);
-	enable_hwirq(hc);
-	spin_unlock_irqrestore(&hc->lock, flags);
-	/* printk(KERN_DEBUG "no master irq set!!!\n"); */
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	schedule_timeout((100 * HZ) / 1000); /* Timeout 100ms */
-	/* turn IRQ off until chip is completely initialized */
-	spin_lock_irqsave(&hc->lock, flags);
-	disable_hwirq(hc);
-	spin_unlock_irqrestore(&hc->lock, flags);
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: IRQ %d count %d\n",
-		       __func__, hc->irq, hc->irqcnt);
-	if (hc->irqcnt) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: done\n", __func__);
-
-		return 0;
-	}
-	if (test_bit(HFC_CHIP_PCM_SLAVE, &hc->chip)) {
-		printk(KERN_INFO "ignoring missing interrupts\n");
-		return 0;
-	}
-
-	printk(KERN_ERR "HFC PCI: IRQ(%d) getting no interrupts during init.\n",
-	       hc->irq);
-
-	err = -EIO;
-
-error:
-	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-		spin_lock_irqsave(&plx_lock, plx_flags);
-		plx_acc = hc->plx_membase + PLX_INTCSR;
-		writew(0x00, plx_acc); /*disable IRQs*/
-		spin_unlock_irqrestore(&plx_lock, plx_flags);
-	}
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: free irq %d\n", __func__, hc->irq);
-	if (hc->irq) {
-		free_irq(hc->irq, hc);
-		hc->irq = 0;
-	}
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: done (err=%d)\n", __func__, err);
-	return err;
-}
-
-/*
- * find pci device and set it up
- */
-
-static int
-setup_pci(struct hfc_multi *hc, struct pci_dev *pdev,
-	  const struct pci_device_id *ent)
-{
-	struct hm_map	*m = (struct hm_map *)ent->driver_data;
-
-	printk(KERN_INFO
-	       "HFC-multi: card manufacturer: '%s' card name: '%s' clock: %s\n",
-	       m->vendor_name, m->card_name, m->clock2 ? "double" : "normal");
-
-	hc->pci_dev = pdev;
-	if (m->clock2)
-		test_and_set_bit(HFC_CHIP_CLOCK2, &hc->chip);
-
-	if (ent->vendor == PCI_VENDOR_ID_DIGIUM &&
-	    ent->device == PCI_DEVICE_ID_DIGIUM_HFC4S) {
-		test_and_set_bit(HFC_CHIP_B410P, &hc->chip);
-		test_and_set_bit(HFC_CHIP_PCM_MASTER, &hc->chip);
-		test_and_clear_bit(HFC_CHIP_PCM_SLAVE, &hc->chip);
-		hc->slots = 32;
-	}
-
-	if (hc->pci_dev->irq <= 0) {
-		printk(KERN_WARNING "HFC-multi: No IRQ for PCI card found.\n");
-		return -EIO;
-	}
-	if (pci_enable_device(hc->pci_dev)) {
-		printk(KERN_WARNING "HFC-multi: Error enabling PCI card.\n");
-		return -EIO;
-	}
-	hc->leds = m->leds;
-	hc->ledstate = 0xAFFEAFFE;
-	hc->opticalsupport = m->opticalsupport;
-
-	hc->pci_iobase = 0;
-	hc->pci_membase = NULL;
-	hc->plx_membase = NULL;
-
-	/* set memory access methods */
-	if (m->io_mode) /* use mode from card config */
-		hc->io_mode = m->io_mode;
-	switch (hc->io_mode) {
-	case HFC_IO_MODE_PLXSD:
-		test_and_set_bit(HFC_CHIP_PLXSD, &hc->chip);
-		hc->slots = 128; /* required */
-		hc->HFC_outb = HFC_outb_pcimem;
-		hc->HFC_inb = HFC_inb_pcimem;
-		hc->HFC_inw = HFC_inw_pcimem;
-		hc->HFC_wait = HFC_wait_pcimem;
-		hc->read_fifo = read_fifo_pcimem;
-		hc->write_fifo = write_fifo_pcimem;
-		hc->plx_origmembase =  hc->pci_dev->resource[0].start;
-		/* MEMBASE 1 is PLX PCI Bridge */
-
-		if (!hc->plx_origmembase) {
-			printk(KERN_WARNING
-			       "HFC-multi: No IO-Memory for PCI PLX bridge found\n");
-			pci_disable_device(hc->pci_dev);
-			return -EIO;
-		}
-
-		hc->plx_membase = ioremap(hc->plx_origmembase, 0x80);
-		if (!hc->plx_membase) {
-			printk(KERN_WARNING
-			       "HFC-multi: failed to remap plx address space. "
-			       "(internal error)\n");
-			pci_disable_device(hc->pci_dev);
-			return -EIO;
-		}
-		printk(KERN_INFO
-		       "HFC-multi: plx_membase:%#lx plx_origmembase:%#lx\n",
-		       (u_long)hc->plx_membase, hc->plx_origmembase);
-
-		hc->pci_origmembase =  hc->pci_dev->resource[2].start;
-		/* MEMBASE 1 is PLX PCI Bridge */
-		if (!hc->pci_origmembase) {
-			printk(KERN_WARNING
-			       "HFC-multi: No IO-Memory for PCI card found\n");
-			pci_disable_device(hc->pci_dev);
-			return -EIO;
-		}
-
-		hc->pci_membase = ioremap(hc->pci_origmembase, 0x400);
-		if (!hc->pci_membase) {
-			printk(KERN_WARNING "HFC-multi: failed to remap io "
-			       "address space. (internal error)\n");
-			pci_disable_device(hc->pci_dev);
-			return -EIO;
-		}
-
-		printk(KERN_INFO
-		       "card %d: defined at MEMBASE %#lx (%#lx) IRQ %d HZ %d "
-		       "leds-type %d\n",
-		       hc->id, (u_long)hc->pci_membase, hc->pci_origmembase,
-		       hc->pci_dev->irq, HZ, hc->leds);
-		pci_write_config_word(hc->pci_dev, PCI_COMMAND, PCI_ENA_MEMIO);
-		break;
-	case HFC_IO_MODE_PCIMEM:
-		hc->HFC_outb = HFC_outb_pcimem;
-		hc->HFC_inb = HFC_inb_pcimem;
-		hc->HFC_inw = HFC_inw_pcimem;
-		hc->HFC_wait = HFC_wait_pcimem;
-		hc->read_fifo = read_fifo_pcimem;
-		hc->write_fifo = write_fifo_pcimem;
-		hc->pci_origmembase = hc->pci_dev->resource[1].start;
-		if (!hc->pci_origmembase) {
-			printk(KERN_WARNING
-			       "HFC-multi: No IO-Memory for PCI card found\n");
-			pci_disable_device(hc->pci_dev);
-			return -EIO;
-		}
-
-		hc->pci_membase = ioremap(hc->pci_origmembase, 256);
-		if (!hc->pci_membase) {
-			printk(KERN_WARNING
-			       "HFC-multi: failed to remap io address space. "
-			       "(internal error)\n");
-			pci_disable_device(hc->pci_dev);
-			return -EIO;
-		}
-		printk(KERN_INFO "card %d: defined at MEMBASE %#lx (%#lx) IRQ "
-		       "%d HZ %d leds-type %d\n", hc->id, (u_long)hc->pci_membase,
-		       hc->pci_origmembase, hc->pci_dev->irq, HZ, hc->leds);
-		pci_write_config_word(hc->pci_dev, PCI_COMMAND, PCI_ENA_MEMIO);
-		break;
-	case HFC_IO_MODE_REGIO:
-		hc->HFC_outb = HFC_outb_regio;
-		hc->HFC_inb = HFC_inb_regio;
-		hc->HFC_inw = HFC_inw_regio;
-		hc->HFC_wait = HFC_wait_regio;
-		hc->read_fifo = read_fifo_regio;
-		hc->write_fifo = write_fifo_regio;
-		hc->pci_iobase = (u_int) hc->pci_dev->resource[0].start;
-		if (!hc->pci_iobase) {
-			printk(KERN_WARNING
-			       "HFC-multi: No IO for PCI card found\n");
-			pci_disable_device(hc->pci_dev);
-			return -EIO;
-		}
-
-		if (!request_region(hc->pci_iobase, 8, "hfcmulti")) {
-			printk(KERN_WARNING "HFC-multi: failed to request "
-			       "address space at 0x%08lx (internal error)\n",
-			       hc->pci_iobase);
-			pci_disable_device(hc->pci_dev);
-			return -EIO;
-		}
-
-		printk(KERN_INFO
-		       "%s %s: defined at IOBASE %#x IRQ %d HZ %d leds-type %d\n",
-		       m->vendor_name, m->card_name, (u_int) hc->pci_iobase,
-		       hc->pci_dev->irq, HZ, hc->leds);
-		pci_write_config_word(hc->pci_dev, PCI_COMMAND, PCI_ENA_REGIO);
-		break;
-	default:
-		printk(KERN_WARNING "HFC-multi: Invalid IO mode.\n");
-		pci_disable_device(hc->pci_dev);
-		return -EIO;
-	}
-
-	pci_set_drvdata(hc->pci_dev, hc);
-
-	/* At this point the needed PCI config is done */
-	/* fifos are still not enabled */
-	return 0;
-}
-
-
-/*
- * remove port
- */
-
-static void
-release_port(struct hfc_multi *hc, struct dchannel *dch)
-{
-	int	pt, ci, i = 0;
-	u_long	flags;
-	struct bchannel *pb;
-
-	ci = dch->slot;
-	pt = hc->chan[ci].port;
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: entered for port %d\n",
-		       __func__, pt + 1);
-
-	if (pt >= hc->ports) {
-		printk(KERN_WARNING "%s: ERROR port out of range (%d).\n",
-		       __func__, pt + 1);
-		return;
-	}
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: releasing port=%d\n",
-		       __func__, pt + 1);
-
-	if (dch->dev.D.protocol == ISDN_P_TE_S0)
-		l1_event(dch->l1, CLOSE_CHANNEL);
-
-	hc->chan[ci].dch = NULL;
-
-	if (hc->created[pt]) {
-		hc->created[pt] = 0;
-		mISDN_unregister_device(&dch->dev);
-	}
-
-	spin_lock_irqsave(&hc->lock, flags);
-
-	if (dch->timer.function) {
-		timer_delete(&dch->timer);
-		dch->timer.function = NULL;
-	}
-
-	if (hc->ctype == HFC_TYPE_E1) { /* E1 */
-		/* remove sync */
-		if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-			hc->syncronized = 0;
-			plxsd_checksync(hc, 1);
-		}
-		/* free channels */
-		for (i = 0; i <= 31; i++) {
-			if (!((1 << i) & hc->bmask[pt])) /* skip unused chan */
-				continue;
-			if (hc->chan[i].bch) {
-				if (debug & DEBUG_HFCMULTI_INIT)
-					printk(KERN_DEBUG
-					       "%s: free port %d channel %d\n",
-					       __func__, hc->chan[i].port + 1, i);
-				pb = hc->chan[i].bch;
-				hc->chan[i].bch = NULL;
-				spin_unlock_irqrestore(&hc->lock, flags);
-				mISDN_freebchannel(pb);
-				kfree(pb);
-				kfree(hc->chan[i].coeff);
-				spin_lock_irqsave(&hc->lock, flags);
-			}
-		}
-	} else {
-		/* remove sync */
-		if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
-			hc->syncronized &=
-				~(1 << hc->chan[ci].port);
-			plxsd_checksync(hc, 1);
-		}
-		/* free channels */
-		if (hc->chan[ci - 2].bch) {
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG
-				       "%s: free port %d channel %d\n",
-				       __func__, hc->chan[ci - 2].port + 1,
-				       ci - 2);
-			pb = hc->chan[ci - 2].bch;
-			hc->chan[ci - 2].bch = NULL;
-			spin_unlock_irqrestore(&hc->lock, flags);
-			mISDN_freebchannel(pb);
-			kfree(pb);
-			kfree(hc->chan[ci - 2].coeff);
-			spin_lock_irqsave(&hc->lock, flags);
-		}
-		if (hc->chan[ci - 1].bch) {
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG
-				       "%s: free port %d channel %d\n",
-				       __func__, hc->chan[ci - 1].port + 1,
-				       ci - 1);
-			pb = hc->chan[ci - 1].bch;
-			hc->chan[ci - 1].bch = NULL;
-			spin_unlock_irqrestore(&hc->lock, flags);
-			mISDN_freebchannel(pb);
-			kfree(pb);
-			kfree(hc->chan[ci - 1].coeff);
-			spin_lock_irqsave(&hc->lock, flags);
-		}
-	}
-
-	spin_unlock_irqrestore(&hc->lock, flags);
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: free port %d channel D(%d)\n", __func__,
-			pt+1, ci);
-	mISDN_freedchannel(dch);
-	kfree(dch);
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: done!\n", __func__);
-}
-
-static void
-release_card(struct hfc_multi *hc)
-{
-	u_long	flags;
-	int	ch;
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: release card (%d) entered\n",
-		       __func__, hc->id);
-
-	/* unregister clock source */
-	if (hc->iclock)
-		mISDN_unregister_clock(hc->iclock);
-
-	/* disable and free irq */
-	spin_lock_irqsave(&hc->lock, flags);
-	disable_hwirq(hc);
-	spin_unlock_irqrestore(&hc->lock, flags);
-	udelay(1000);
-	if (hc->irq) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: free irq %d (hc=%p)\n",
-			    __func__, hc->irq, hc);
-		free_irq(hc->irq, hc);
-		hc->irq = 0;
-
-	}
-
-	/* disable D-channels & B-channels */
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: disable all channels (d and b)\n",
-		       __func__);
-	for (ch = 0; ch <= 31; ch++) {
-		if (hc->chan[ch].dch)
-			release_port(hc, hc->chan[ch].dch);
-	}
-
-	/* dimm leds */
-	if (hc->leds)
-		hfcmulti_leds(hc);
-
-	/* release hardware */
-	release_io_hfcmulti(hc);
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: remove instance from list\n",
-		       __func__);
-	list_del(&hc->list);
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: delete instance\n", __func__);
-	if (hc == syncmaster)
-		syncmaster = NULL;
-	kfree(hc);
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: card successfully removed\n",
-		       __func__);
-}
-
-static void
-init_e1_port_hw(struct hfc_multi *hc, struct hm_map *m)
-{
-	/* set optical line type */
-	if (port[Port_cnt] & 0x001) {
-		if (!m->opticalsupport)  {
-			printk(KERN_INFO
-			       "This board has no optical "
-			       "support\n");
-		} else {
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG
-				       "%s: PORT set optical "
-				       "interface: card(%d) "
-				       "port(%d)\n",
-				       __func__,
-				       HFC_cnt + 1, 1);
-			test_and_set_bit(HFC_CFG_OPTICAL,
-			    &hc->chan[hc->dnum[0]].cfg);
-		}
-	}
-	/* set LOS report */
-	if (port[Port_cnt] & 0x004) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: PORT set "
-			       "LOS report: card(%d) port(%d)\n",
-			       __func__, HFC_cnt + 1, 1);
-		test_and_set_bit(HFC_CFG_REPORT_LOS,
-		    &hc->chan[hc->dnum[0]].cfg);
-	}
-	/* set AIS report */
-	if (port[Port_cnt] & 0x008) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: PORT set "
-			       "AIS report: card(%d) port(%d)\n",
-			       __func__, HFC_cnt + 1, 1);
-		test_and_set_bit(HFC_CFG_REPORT_AIS,
-		    &hc->chan[hc->dnum[0]].cfg);
-	}
-	/* set SLIP report */
-	if (port[Port_cnt] & 0x010) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG
-			       "%s: PORT set SLIP report: "
-			       "card(%d) port(%d)\n",
-			       __func__, HFC_cnt + 1, 1);
-		test_and_set_bit(HFC_CFG_REPORT_SLIP,
-		    &hc->chan[hc->dnum[0]].cfg);
-	}
-	/* set RDI report */
-	if (port[Port_cnt] & 0x020) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG
-			       "%s: PORT set RDI report: "
-			       "card(%d) port(%d)\n",
-			       __func__, HFC_cnt + 1, 1);
-		test_and_set_bit(HFC_CFG_REPORT_RDI,
-		    &hc->chan[hc->dnum[0]].cfg);
-	}
-	/* set CRC-4 Mode */
-	if (!(port[Port_cnt] & 0x100)) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: PORT turn on CRC4 report:"
-			       " card(%d) port(%d)\n",
-			       __func__, HFC_cnt + 1, 1);
-		test_and_set_bit(HFC_CFG_CRC4,
-		    &hc->chan[hc->dnum[0]].cfg);
-	} else {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: PORT turn off CRC4"
-			       " report: card(%d) port(%d)\n",
-			       __func__, HFC_cnt + 1, 1);
-	}
-	/* set forced clock */
-	if (port[Port_cnt] & 0x0200) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: PORT force getting clock from "
-			       "E1: card(%d) port(%d)\n",
-			       __func__, HFC_cnt + 1, 1);
-		test_and_set_bit(HFC_CHIP_E1CLOCK_GET, &hc->chip);
-	} else
-		if (port[Port_cnt] & 0x0400) {
-			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_DEBUG "%s: PORT force putting clock to "
-				       "E1: card(%d) port(%d)\n",
-				       __func__, HFC_cnt + 1, 1);
-			test_and_set_bit(HFC_CHIP_E1CLOCK_PUT, &hc->chip);
-		}
-	/* set JATT PLL */
-	if (port[Port_cnt] & 0x0800) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG "%s: PORT disable JATT PLL on "
-			       "E1: card(%d) port(%d)\n",
-			       __func__, HFC_cnt + 1, 1);
-		test_and_set_bit(HFC_CHIP_RX_SYNC, &hc->chip);
-	}
-	/* set elastic jitter buffer */
-	if (port[Port_cnt] & 0x3000) {
-		hc->chan[hc->dnum[0]].jitter = (port[Port_cnt]>>12) & 0x3;
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG
-			       "%s: PORT set elastic "
-			       "buffer to %d: card(%d) port(%d)\n",
-			    __func__, hc->chan[hc->dnum[0]].jitter,
-			       HFC_cnt + 1, 1);
-	} else
-		hc->chan[hc->dnum[0]].jitter = 2; /* default */
-}
-
-static int
-init_e1_port(struct hfc_multi *hc, struct hm_map *m, int pt)
-{
-	struct dchannel	*dch;
-	struct bchannel	*bch;
-	int		ch, ret = 0;
-	char		name[MISDN_MAX_IDLEN];
-	int		bcount = 0;
-
-	dch = kzalloc_obj(struct dchannel);
-	if (!dch)
-		return -ENOMEM;
-	dch->debug = debug;
-	mISDN_initdchannel(dch, MAX_DFRAME_LEN_L1, ph_state_change);
-	dch->hw = hc;
-	dch->dev.Dprotocols = (1 << ISDN_P_TE_E1) | (1 << ISDN_P_NT_E1);
-	dch->dev.Bprotocols = (1 << (ISDN_P_B_RAW & ISDN_P_B_MASK)) |
-	    (1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK));
-	dch->dev.D.send = handle_dmsg;
-	dch->dev.D.ctrl = hfcm_dctrl;
-	dch->slot = hc->dnum[pt];
-	hc->chan[hc->dnum[pt]].dch = dch;
-	hc->chan[hc->dnum[pt]].port = pt;
-	hc->chan[hc->dnum[pt]].nt_timer = -1;
-	for (ch = 1; ch <= 31; ch++) {
-		if (!((1 << ch) & hc->bmask[pt])) /* skip unused channel */
-			continue;
-		bch = kzalloc_obj(struct bchannel);
-		if (!bch) {
-			printk(KERN_ERR "%s: no memory for bchannel\n",
-			    __func__);
-			ret = -ENOMEM;
-			goto free_chan;
-		}
-		hc->chan[ch].coeff = kzalloc(512, GFP_KERNEL);
-		if (!hc->chan[ch].coeff) {
-			printk(KERN_ERR "%s: no memory for coeffs\n",
-			    __func__);
-			ret = -ENOMEM;
-			kfree(bch);
-			goto free_chan;
-		}
-		bch->nr = ch;
-		bch->slot = ch;
-		bch->debug = debug;
-		mISDN_initbchannel(bch, MAX_DATA_MEM, poll >> 1);
-		bch->hw = hc;
-		bch->ch.send = handle_bmsg;
-		bch->ch.ctrl = hfcm_bctrl;
-		bch->ch.nr = ch;
-		list_add(&bch->ch.list, &dch->dev.bchannels);
-		hc->chan[ch].bch = bch;
-		hc->chan[ch].port = pt;
-		set_channelmap(bch->nr, dch->dev.channelmap);
-		bcount++;
-	}
-	dch->dev.nrbchan = bcount;
-	if (pt == 0)
-		init_e1_port_hw(hc, m);
-	if (hc->ports > 1)
-		snprintf(name, MISDN_MAX_IDLEN - 1, "hfc-e1.%d-%d",
-				HFC_cnt + 1, pt+1);
-	else
-		snprintf(name, MISDN_MAX_IDLEN - 1, "hfc-e1.%d", HFC_cnt + 1);
-	ret = mISDN_register_device(&dch->dev, &hc->pci_dev->dev, name);
-	if (ret)
-		goto free_chan;
-	hc->created[pt] = 1;
-	return ret;
-free_chan:
-	release_port(hc, dch);
-	return ret;
-}
-
-static int
-init_multi_port(struct hfc_multi *hc, int pt)
-{
-	struct dchannel	*dch;
-	struct bchannel	*bch;
-	int		ch, i, ret = 0;
-	char		name[MISDN_MAX_IDLEN];
-
-	dch = kzalloc_obj(struct dchannel);
-	if (!dch)
-		return -ENOMEM;
-	dch->debug = debug;
-	mISDN_initdchannel(dch, MAX_DFRAME_LEN_L1, ph_state_change);
-	dch->hw = hc;
-	dch->dev.Dprotocols = (1 << ISDN_P_TE_S0) | (1 << ISDN_P_NT_S0);
-	dch->dev.Bprotocols = (1 << (ISDN_P_B_RAW & ISDN_P_B_MASK)) |
-		(1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK));
-	dch->dev.D.send = handle_dmsg;
-	dch->dev.D.ctrl = hfcm_dctrl;
-	dch->dev.nrbchan = 2;
-	i = pt << 2;
-	dch->slot = i + 2;
-	hc->chan[i + 2].dch = dch;
-	hc->chan[i + 2].port = pt;
-	hc->chan[i + 2].nt_timer = -1;
-	for (ch = 0; ch < dch->dev.nrbchan; ch++) {
-		bch = kzalloc_obj(struct bchannel);
-		if (!bch) {
-			printk(KERN_ERR "%s: no memory for bchannel\n",
-			       __func__);
-			ret = -ENOMEM;
-			goto free_chan;
-		}
-		hc->chan[i + ch].coeff = kzalloc(512, GFP_KERNEL);
-		if (!hc->chan[i + ch].coeff) {
-			printk(KERN_ERR "%s: no memory for coeffs\n",
-			       __func__);
-			ret = -ENOMEM;
-			kfree(bch);
-			goto free_chan;
-		}
-		bch->nr = ch + 1;
-		bch->slot = i + ch;
-		bch->debug = debug;
-		mISDN_initbchannel(bch, MAX_DATA_MEM, poll >> 1);
-		bch->hw = hc;
-		bch->ch.send = handle_bmsg;
-		bch->ch.ctrl = hfcm_bctrl;
-		bch->ch.nr = ch + 1;
-		list_add(&bch->ch.list, &dch->dev.bchannels);
-		hc->chan[i + ch].bch = bch;
-		hc->chan[i + ch].port = pt;
-		set_channelmap(bch->nr, dch->dev.channelmap);
-	}
-	/* set master clock */
-	if (port[Port_cnt] & 0x001) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG
-			       "%s: PROTOCOL set master clock: "
-			       "card(%d) port(%d)\n",
-			       __func__, HFC_cnt + 1, pt + 1);
-		if (dch->dev.D.protocol != ISDN_P_TE_S0) {
-			printk(KERN_ERR "Error: Master clock "
-			       "for port(%d) of card(%d) is only"
-			       " possible with TE-mode\n",
-			       pt + 1, HFC_cnt + 1);
-			ret = -EINVAL;
-			goto free_chan;
-		}
-		if (hc->masterclk >= 0) {
-			printk(KERN_ERR "Error: Master clock "
-			       "for port(%d) of card(%d) already "
-			       "defined for port(%d)\n",
-			       pt + 1, HFC_cnt + 1, hc->masterclk + 1);
-			ret = -EINVAL;
-			goto free_chan;
-		}
-		hc->masterclk = pt;
-	}
-	/* set transmitter line to non capacitive */
-	if (port[Port_cnt] & 0x002) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG
-			       "%s: PROTOCOL set non capacitive "
-			       "transmitter: card(%d) port(%d)\n",
-			       __func__, HFC_cnt + 1, pt + 1);
-		test_and_set_bit(HFC_CFG_NONCAP_TX,
-				 &hc->chan[i + 2].cfg);
-	}
-	/* disable E-channel */
-	if (port[Port_cnt] & 0x004) {
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG
-			       "%s: PROTOCOL disable E-channel: "
-			       "card(%d) port(%d)\n",
-			       __func__, HFC_cnt + 1, pt + 1);
-		test_and_set_bit(HFC_CFG_DIS_ECHANNEL,
-				 &hc->chan[i + 2].cfg);
-	}
-	if (hc->ctype == HFC_TYPE_XHFC) {
-		snprintf(name, MISDN_MAX_IDLEN - 1, "xhfc.%d-%d",
-			 HFC_cnt + 1, pt + 1);
-		ret = mISDN_register_device(&dch->dev, NULL, name);
-	} else {
-		snprintf(name, MISDN_MAX_IDLEN - 1, "hfc-%ds.%d-%d",
-			 hc->ctype, HFC_cnt + 1, pt + 1);
-		ret = mISDN_register_device(&dch->dev, &hc->pci_dev->dev, name);
-	}
-	if (ret)
-		goto free_chan;
-	hc->created[pt] = 1;
-	return ret;
-free_chan:
-	release_port(hc, dch);
-	return ret;
-}
-
-static int
-hfcmulti_init(struct hm_map *m, struct pci_dev *pdev,
-	      const struct pci_device_id *ent)
-{
-	int		ret_err = 0;
-	int		pt;
-	struct hfc_multi	*hc;
-	u_long		flags;
-	u_char		dips = 0, pmj = 0; /* dip settings, port mode Jumpers */
-	int		i, ch;
-	u_int		maskcheck;
-
-	if (HFC_cnt >= MAX_CARDS) {
-		printk(KERN_ERR "too many cards (max=%d).\n",
-		       MAX_CARDS);
-		return -EINVAL;
-	}
-	if ((type[HFC_cnt] & 0xff) && (type[HFC_cnt] & 0xff) != m->type) {
-		printk(KERN_WARNING "HFC-MULTI: Card '%s:%s' type %d found but "
-		       "type[%d] %d was supplied as module parameter\n",
-		       m->vendor_name, m->card_name, m->type, HFC_cnt,
-		       type[HFC_cnt] & 0xff);
-		printk(KERN_WARNING "HFC-MULTI: Load module without parameters "
-		       "first, to see cards and their types.");
-		return -EINVAL;
-	}
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: Registering %s:%s chip type %d (0x%x)\n",
-		       __func__, m->vendor_name, m->card_name, m->type,
-		       type[HFC_cnt]);
-
-	/* allocate card+fifo structure */
-	hc = kzalloc_obj(struct hfc_multi);
-	if (!hc) {
-		printk(KERN_ERR "No kmem for HFC-Multi card\n");
-		return -ENOMEM;
-	}
-	spin_lock_init(&hc->lock);
-	hc->mtyp = m;
-	hc->ctype =  m->type;
-	hc->ports = m->ports;
-	hc->id = HFC_cnt;
-	hc->pcm = pcm[HFC_cnt];
-	hc->io_mode = iomode[HFC_cnt];
-	if (hc->ctype == HFC_TYPE_E1 && dmask[E1_cnt]) {
-		/* fragment card */
-		pt = 0;
-		maskcheck = 0;
-		for (ch = 0; ch <= 31; ch++) {
-			if (!((1 << ch) & dmask[E1_cnt]))
-				continue;
-			hc->dnum[pt] = ch;
-			hc->bmask[pt] = bmask[bmask_cnt++];
-			if ((maskcheck & hc->bmask[pt])
-			 || (dmask[E1_cnt] & hc->bmask[pt])) {
-				printk(KERN_INFO
-				       "HFC-E1 #%d has overlapping B-channels on fragment #%d\n",
-				       E1_cnt + 1, pt);
-				kfree(hc);
-				return -EINVAL;
-			}
-			maskcheck |= hc->bmask[pt];
-			printk(KERN_INFO
-			       "HFC-E1 #%d uses D-channel on slot %d and a B-channel map of 0x%08x\n",
-				E1_cnt + 1, ch, hc->bmask[pt]);
-			pt++;
-		}
-		hc->ports = pt;
-	}
-	if (hc->ctype == HFC_TYPE_E1 && !dmask[E1_cnt]) {
-		/* default card layout */
-		hc->dnum[0] = 16;
-		hc->bmask[0] = 0xfffefffe;
-		hc->ports = 1;
-	}
-
-	/* set chip specific features */
-	hc->masterclk = -1;
-	if (type[HFC_cnt] & 0x100) {
-		test_and_set_bit(HFC_CHIP_ULAW, &hc->chip);
-		hc->silence = 0xff; /* ulaw silence */
-	} else
-		hc->silence = 0x2a; /* alaw silence */
-	if ((poll >> 1) > sizeof(hc->silence_data)) {
-		printk(KERN_ERR "HFCMULTI error: silence_data too small, "
-		       "please fix\n");
-		kfree(hc);
-		return -EINVAL;
-	}
-	for (i = 0; i < (poll >> 1); i++)
-		hc->silence_data[i] = hc->silence;
-
-	if (hc->ctype != HFC_TYPE_XHFC) {
-		if (!(type[HFC_cnt] & 0x200))
-			test_and_set_bit(HFC_CHIP_DTMF, &hc->chip);
-		test_and_set_bit(HFC_CHIP_CONF, &hc->chip);
-	}
-
-	if (type[HFC_cnt] & 0x800)
-		test_and_set_bit(HFC_CHIP_PCM_SLAVE, &hc->chip);
-	if (type[HFC_cnt] & 0x1000) {
-		test_and_set_bit(HFC_CHIP_PCM_MASTER, &hc->chip);
-		test_and_clear_bit(HFC_CHIP_PCM_SLAVE, &hc->chip);
-	}
-	if (type[HFC_cnt] & 0x4000)
-		test_and_set_bit(HFC_CHIP_EXRAM_128, &hc->chip);
-	if (type[HFC_cnt] & 0x8000)
-		test_and_set_bit(HFC_CHIP_EXRAM_512, &hc->chip);
-	hc->slots = 32;
-	if (type[HFC_cnt] & 0x10000)
-		hc->slots = 64;
-	if (type[HFC_cnt] & 0x20000)
-		hc->slots = 128;
-	if (type[HFC_cnt] & 0x80000) {
-		test_and_set_bit(HFC_CHIP_WATCHDOG, &hc->chip);
-		hc->wdcount = 0;
-		hc->wdbyte = V_GPIO_OUT2;
-		printk(KERN_NOTICE "Watchdog enabled\n");
-	}
-
-	if (pdev && ent)
-		/* setup pci, hc->slots may change due to PLXSD */
-		ret_err = setup_pci(hc, pdev, ent);
-	else
-#ifdef CONFIG_MISDN_HFCMULTI_8xx
-		ret_err = setup_embedded(hc, m);
-#else
-	{
-		printk(KERN_WARNING "Embedded IO Mode not selected\n");
-		ret_err = -EIO;
-	}
-#endif
-	if (ret_err) {
-		if (hc == syncmaster)
-			syncmaster = NULL;
-		kfree(hc);
-		return ret_err;
-	}
-
-	hc->HFC_outb_nodebug = hc->HFC_outb;
-	hc->HFC_inb_nodebug = hc->HFC_inb;
-	hc->HFC_inw_nodebug = hc->HFC_inw;
-	hc->HFC_wait_nodebug = hc->HFC_wait;
-#ifdef HFC_REGISTER_DEBUG
-	hc->HFC_outb = HFC_outb_debug;
-	hc->HFC_inb = HFC_inb_debug;
-	hc->HFC_inw = HFC_inw_debug;
-	hc->HFC_wait = HFC_wait_debug;
-#endif
-	/* create channels */
-	for (pt = 0; pt < hc->ports; pt++) {
-		if (Port_cnt >= MAX_PORTS) {
-			printk(KERN_ERR "too many ports (max=%d).\n",
-			       MAX_PORTS);
-			ret_err = -EINVAL;
-			goto free_card;
-		}
-		if (hc->ctype == HFC_TYPE_E1)
-			ret_err = init_e1_port(hc, m, pt);
-		else
-			ret_err = init_multi_port(hc, pt);
-		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_DEBUG
-			    "%s: Registering D-channel, card(%d) port(%d) "
-			       "result %d\n",
-			    __func__, HFC_cnt + 1, pt + 1, ret_err);
-
-		if (ret_err) {
-			while (pt) { /* release already registered ports */
-				pt--;
-				if (hc->ctype == HFC_TYPE_E1)
-					release_port(hc,
-						hc->chan[hc->dnum[pt]].dch);
-				else
-					release_port(hc,
-						hc->chan[(pt << 2) + 2].dch);
-			}
-			goto free_card;
-		}
-		if (hc->ctype != HFC_TYPE_E1)
-			Port_cnt++; /* for each S0 port */
-	}
-	if (hc->ctype == HFC_TYPE_E1) {
-		Port_cnt++; /* for each E1 port */
-		E1_cnt++;
-	}
-
-	/* disp switches */
-	switch (m->dip_type) {
-	case DIP_4S:
-		/*
-		 * Get DIP setting for beroNet 1S/2S/4S cards
-		 * DIP Setting: (collect GPIO 13/14/15 (R_GPIO_IN1) +
-		 * GPI 19/23 (R_GPI_IN2))
-		 */
-		dips = ((~HFC_inb(hc, R_GPIO_IN1) & 0xE0) >> 5) |
-			((~HFC_inb(hc, R_GPI_IN2) & 0x80) >> 3) |
-			(~HFC_inb(hc, R_GPI_IN2) & 0x08);
-
-		/* Port mode (TE/NT) jumpers */
-		pmj = ((HFC_inb(hc, R_GPI_IN3) >> 4)  & 0xf);
-
-		if (test_bit(HFC_CHIP_B410P, &hc->chip))
-			pmj = ~pmj & 0xf;
-
-		printk(KERN_INFO "%s: %s DIPs(0x%x) jumpers(0x%x)\n",
-		       m->vendor_name, m->card_name, dips, pmj);
-		break;
-	case DIP_8S:
-		/*
-		 * Get DIP Setting for beroNet 8S0+ cards
-		 * Enable PCI auxbridge function
-		 */
-		HFC_outb(hc, R_BRG_PCM_CFG, 1 | V_PCM_CLK);
-		/* prepare access to auxport */
-		outw(0x4000, hc->pci_iobase + 4);
-		/*
-		 * some dummy reads are required to
-		 * read valid DIP switch data
-		 */
-		dips = inb(hc->pci_iobase);
-		dips = inb(hc->pci_iobase);
-		dips = inb(hc->pci_iobase);
-		dips = ~inb(hc->pci_iobase) & 0x3F;
-		outw(0x0, hc->pci_iobase + 4);
-		/* disable PCI auxbridge function */
-		HFC_outb(hc, R_BRG_PCM_CFG, V_PCM_CLK);
-		printk(KERN_INFO "%s: %s DIPs(0x%x)\n",
-		       m->vendor_name, m->card_name, dips);
-		break;
-	case DIP_E1:
-		/*
-		 * get DIP Setting for beroNet E1 cards
-		 * DIP Setting: collect GPI 4/5/6/7 (R_GPI_IN0)
-		 */
-		dips = (~HFC_inb(hc, R_GPI_IN0) & 0xF0) >> 4;
-		printk(KERN_INFO "%s: %s DIPs(0x%x)\n",
-		       m->vendor_name, m->card_name, dips);
-		break;
-	}
-
-	/* add to list */
-	spin_lock_irqsave(&HFClock, flags);
-	list_add_tail(&hc->list, &HFClist);
-	spin_unlock_irqrestore(&HFClock, flags);
-
-	/* use as clock source */
-	if (clock == HFC_cnt + 1)
-		hc->iclock = mISDN_register_clock("HFCMulti", 0, clockctl, hc);
-
-	/* initialize hardware */
-	hc->irq = (m->irq) ? : hc->pci_dev->irq;
-	ret_err = init_card(hc);
-	if (ret_err) {
-		printk(KERN_ERR "init card returns %d\n", ret_err);
-		release_card(hc);
-		return ret_err;
-	}
-
-	/* start IRQ and return */
-	spin_lock_irqsave(&hc->lock, flags);
-	enable_hwirq(hc);
-	spin_unlock_irqrestore(&hc->lock, flags);
-	return 0;
-
-free_card:
-	release_io_hfcmulti(hc);
-	if (hc == syncmaster)
-		syncmaster = NULL;
-	kfree(hc);
-	return ret_err;
-}
-
-static void hfc_remove_pci(struct pci_dev *pdev)
-{
-	struct hfc_multi	*card = pci_get_drvdata(pdev);
-	u_long			flags;
-
-	if (debug)
-		printk(KERN_INFO "removing hfc_multi card vendor:%x "
-		       "device:%x subvendor:%x subdevice:%x\n",
-		       pdev->vendor, pdev->device,
-		       pdev->subsystem_vendor, pdev->subsystem_device);
-
-	if (card) {
-		spin_lock_irqsave(&HFClock, flags);
-		release_card(card);
-		spin_unlock_irqrestore(&HFClock, flags);
-	}  else {
-		if (debug)
-			printk(KERN_DEBUG "%s: drvdata already removed\n",
-			       __func__);
-	}
-}
-
-#define	VENDOR_CCD	"Cologne Chip AG"
-#define	VENDOR_BN	"beroNet GmbH"
-#define	VENDOR_DIG	"Digium Inc."
-#define VENDOR_JH	"Junghanns.NET GmbH"
-#define VENDOR_PRIM	"PrimuX"
-
-static const struct hm_map hfcm_map[] = {
-	/*0*/	{VENDOR_BN, "HFC-1S Card (mini PCI)", 4, 1, 1, 3, 0, DIP_4S, 0, 0},
-	/*1*/	{VENDOR_BN, "HFC-2S Card", 4, 2, 1, 3, 0, DIP_4S, 0, 0},
-	/*2*/	{VENDOR_BN, "HFC-2S Card (mini PCI)", 4, 2, 1, 3, 0, DIP_4S, 0, 0},
-	/*3*/	{VENDOR_BN, "HFC-4S Card", 4, 4, 1, 2, 0, DIP_4S, 0, 0},
-	/*4*/	{VENDOR_BN, "HFC-4S Card (mini PCI)", 4, 4, 1, 2, 0, 0, 0, 0},
-	/*5*/	{VENDOR_CCD, "HFC-4S Eval (old)", 4, 4, 0, 0, 0, 0, 0, 0},
-	/*6*/	{VENDOR_CCD, "HFC-4S IOB4ST", 4, 4, 1, 2, 0, DIP_4S, 0, 0},
-	/*7*/	{VENDOR_CCD, "HFC-4S", 4, 4, 1, 2, 0, 0, 0, 0},
-	/*8*/	{VENDOR_DIG, "HFC-4S Card", 4, 4, 0, 2, 0, 0, HFC_IO_MODE_REGIO, 0},
-	/*9*/	{VENDOR_CCD, "HFC-4S Swyx 4xS0 SX2 QuadBri", 4, 4, 1, 2, 0, 0, 0, 0},
-	/*10*/	{VENDOR_JH, "HFC-4S (junghanns 2.0)", 4, 4, 1, 2, 0, 0, 0, 0},
-	/*11*/	{VENDOR_PRIM, "HFC-2S Primux Card", 4, 2, 0, 0, 0, 0, 0, 0},
-
-	/*12*/	{VENDOR_BN, "HFC-8S Card", 8, 8, 1, 0, 0, 0, 0, 0},
-	/*13*/	{VENDOR_BN, "HFC-8S Card (+)", 8, 8, 1, 8, 0, DIP_8S,
-		 HFC_IO_MODE_REGIO, 0},
-	/*14*/	{VENDOR_CCD, "HFC-8S Eval (old)", 8, 8, 0, 0, 0, 0, 0, 0},
-	/*15*/	{VENDOR_CCD, "HFC-8S IOB4ST Recording", 8, 8, 1, 0, 0, 0, 0, 0},
-
-	/*16*/	{VENDOR_CCD, "HFC-8S IOB8ST", 8, 8, 1, 0, 0, 0, 0, 0},
-	/*17*/	{VENDOR_CCD, "HFC-8S", 8, 8, 1, 0, 0, 0, 0, 0},
-	/*18*/	{VENDOR_CCD, "HFC-8S", 8, 8, 1, 0, 0, 0, 0, 0},
-
-	/*19*/	{VENDOR_BN, "HFC-E1 Card", 1, 1, 0, 1, 0, DIP_E1, 0, 0},
-	/*20*/	{VENDOR_BN, "HFC-E1 Card (mini PCI)", 1, 1, 0, 1, 0, 0, 0, 0},
-	/*21*/	{VENDOR_BN, "HFC-E1+ Card (Dual)", 1, 1, 0, 1, 0, DIP_E1, 0, 0},
-	/*22*/	{VENDOR_BN, "HFC-E1 Card (Dual)", 1, 1, 0, 1, 0, DIP_E1, 0, 0},
-
-	/*23*/	{VENDOR_CCD, "HFC-E1 Eval (old)", 1, 1, 0, 0, 0, 0, 0, 0},
-	/*24*/	{VENDOR_CCD, "HFC-E1 IOB1E1", 1, 1, 0, 1, 0, 0, 0, 0},
-	/*25*/	{VENDOR_CCD, "HFC-E1", 1, 1, 0, 1, 0, 0, 0, 0},
-
-	/*26*/	{VENDOR_CCD, "HFC-4S Speech Design", 4, 4, 0, 0, 0, 0,
-		 HFC_IO_MODE_PLXSD, 0},
-	/*27*/	{VENDOR_CCD, "HFC-E1 Speech Design", 1, 1, 0, 0, 0, 0,
-		 HFC_IO_MODE_PLXSD, 0},
-	/*28*/	{VENDOR_CCD, "HFC-4S OpenVox", 4, 4, 1, 0, 0, 0, 0, 0},
-	/*29*/	{VENDOR_CCD, "HFC-2S OpenVox", 4, 2, 1, 0, 0, 0, 0, 0},
-	/*30*/	{VENDOR_CCD, "HFC-8S OpenVox", 8, 8, 1, 0, 0, 0, 0, 0},
-	/*31*/	{VENDOR_CCD, "XHFC-4S Speech Design", 5, 4, 0, 0, 0, 0,
-		 HFC_IO_MODE_EMBSD, XHFC_IRQ},
-	/*32*/	{VENDOR_JH, "HFC-8S (junghanns)", 8, 8, 1, 0, 0, 0, 0, 0},
-	/*33*/	{VENDOR_BN, "HFC-2S Beronet Card PCIe", 4, 2, 1, 3, 0, DIP_4S, 0, 0},
-	/*34*/	{VENDOR_BN, "HFC-4S Beronet Card PCIe", 4, 4, 1, 2, 0, DIP_4S, 0, 0},
-};
-
-#undef H
-#define H(x)	((unsigned long)&hfcm_map[x])
-static const struct pci_device_id hfmultipci_ids[] = {
-
-	/* Cards with HFC-4S Chip */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_BN1SM, 0, 0, H(0)}, /* BN1S mini PCI */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_BN2S, 0, 0, H(1)}, /* BN2S */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_BN2SM, 0, 0, H(2)}, /* BN2S mini PCI */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_BN4S, 0, 0, H(3)}, /* BN4S */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_BN4SM, 0, 0, H(4)}, /* BN4S mini PCI */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  PCI_DEVICE_ID_CCD_HFC4S, 0, 0, H(5)}, /* Old Eval */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_IOB4ST, 0, 0, H(6)}, /* IOB4ST */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_HFC4S, 0, 0, H(7)}, /* 4S */
-	{ PCI_VENDOR_ID_DIGIUM, PCI_DEVICE_ID_DIGIUM_HFC4S,
-	  PCI_VENDOR_ID_DIGIUM, PCI_DEVICE_ID_DIGIUM_HFC4S, 0, 0, H(8)},
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_SWYX4S, 0, 0, H(9)}, /* 4S Swyx */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_JH4S20, 0, 0, H(10)},
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_PMX2S, 0, 0, H(11)}, /* Primux */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_OV4S, 0, 0, H(28)}, /* OpenVox 4 */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_OV2S, 0, 0, H(29)}, /* OpenVox 2 */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  0xb761, 0, 0, H(33)}, /* BN2S PCIe */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-	  0xb762, 0, 0, H(34)}, /* BN4S PCIe */
-
-	/* Cards with HFC-8S Chip */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_BN8S, 0, 0, H(12)}, /* BN8S */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_BN8SP, 0, 0, H(13)}, /* BN8S+ */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
-	  PCI_DEVICE_ID_CCD_HFC8S, 0, 0, H(14)}, /* old Eval */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_IOB8STR, 0, 0, H(15)}, /* IOB8ST Recording */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_IOB8ST, 0, 0, H(16)}, /* IOB8ST  */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_IOB8ST_1, 0, 0, H(17)}, /* IOB8ST  */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_HFC8S, 0, 0, H(18)}, /* 8S */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_OV8S, 0, 0, H(30)}, /* OpenVox 8 */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_JH8S, 0, 0, H(32)}, /* Junganns 8S  */
-
-
-	/* Cards with HFC-E1 Chip */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_BNE1, 0, 0, H(19)}, /* BNE1 */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_BNE1M, 0, 0, H(20)}, /* BNE1 mini PCI */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_BNE1DP, 0, 0, H(21)}, /* BNE1 + (Dual) */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_BNE1D, 0, 0, H(22)}, /* BNE1 (Dual) */
-
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
-	  PCI_DEVICE_ID_CCD_HFCE1, 0, 0, H(23)}, /* Old Eval */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_IOB1E1, 0, 0, H(24)}, /* IOB1E1 */
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_HFCE1, 0, 0, H(25)}, /* E1 */
-
-	{ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_SPD4S, 0, 0, H(26)}, /* PLX PCI Bridge */
-	{ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_SPDE1, 0, 0, H(27)}, /* PLX PCI Bridge */
-
-	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
-	  PCI_SUBDEVICE_ID_CCD_JHSE1, 0, 0, H(25)}, /* Junghanns E1 */
-
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_HFC4S), 0 },
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_HFC8S), 0 },
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_HFCE1), 0 },
-	{0, }
-};
-#undef H
-
-MODULE_DEVICE_TABLE(pci, hfmultipci_ids);
-
-static int
-hfcmulti_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-	struct hm_map	*m = (struct hm_map *)ent->driver_data;
-	int		ret;
-
-	if (m == NULL && ent->vendor == PCI_VENDOR_ID_CCD && (
-		    ent->device == PCI_DEVICE_ID_CCD_HFC4S ||
-		    ent->device == PCI_DEVICE_ID_CCD_HFC8S ||
-		    ent->device == PCI_DEVICE_ID_CCD_HFCE1)) {
-		printk(KERN_ERR
-		       "Unknown HFC multiport controller (vendor:%04x device:%04x "
-		       "subvendor:%04x subdevice:%04x)\n", pdev->vendor,
-		       pdev->device, pdev->subsystem_vendor,
-		       pdev->subsystem_device);
-		printk(KERN_ERR
-		       "Please contact the driver maintainer for support.\n");
-		return -ENODEV;
-	}
-	ret = hfcmulti_init(m, pdev, ent);
-	if (ret)
-		return ret;
-	HFC_cnt++;
-	printk(KERN_INFO "%d devices registered\n", HFC_cnt);
-	return 0;
-}
-
-static struct pci_driver hfcmultipci_driver = {
-	.name		= "hfc_multi",
-	.probe		= hfcmulti_probe,
-	.remove		= hfc_remove_pci,
-	.id_table	= hfmultipci_ids,
-};
-
-static void __exit
-HFCmulti_cleanup(void)
-{
-	struct hfc_multi *card, *next;
-
-	/* get rid of all devices of this driver */
-	list_for_each_entry_safe(card, next, &HFClist, list)
-		release_card(card);
-	pci_unregister_driver(&hfcmultipci_driver);
-}
-
-static int __init
-HFCmulti_init(void)
-{
-	int err;
-	int i, xhfc = 0;
-	struct hm_map m;
-
-	printk(KERN_INFO "mISDN: HFC-multi driver %s\n", HFC_MULTI_VERSION);
-
-#ifdef IRQ_DEBUG
-	printk(KERN_DEBUG "%s: IRQ_DEBUG IS ENABLED!\n", __func__);
-#endif
-
-	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_DEBUG "%s: init entered\n", __func__);
-
-	switch (poll) {
-	case 0:
-		poll_timer = 6;
-		poll = 128;
-		break;
-	case 8:
-		poll_timer = 2;
-		break;
-	case 16:
-		poll_timer = 3;
-		break;
-	case 32:
-		poll_timer = 4;
-		break;
-	case 64:
-		poll_timer = 5;
-		break;
-	case 128:
-		poll_timer = 6;
-		break;
-	case 256:
-		poll_timer = 7;
-		break;
-	default:
-		printk(KERN_ERR
-		       "%s: Wrong poll value (%d).\n", __func__, poll);
-		err = -EINVAL;
-		return err;
-
-	}
-
-	if (!clock)
-		clock = 1;
-
-	/* Register the embedded devices.
-	 * This should be done before the PCI cards registration */
-	switch (hwid) {
-	case HWID_MINIP4:
-		xhfc = 1;
-		m = hfcm_map[31];
-		break;
-	case HWID_MINIP8:
-		xhfc = 2;
-		m = hfcm_map[31];
-		break;
-	case HWID_MINIP16:
-		xhfc = 4;
-		m = hfcm_map[31];
-		break;
-	default:
-		xhfc = 0;
-	}
-
-	for (i = 0; i < xhfc; ++i) {
-		err = hfcmulti_init(&m, NULL, NULL);
-		if (err) {
-			printk(KERN_ERR "error registering embedded driver: "
-			       "%x\n", err);
-			return err;
-		}
-		HFC_cnt++;
-		printk(KERN_INFO "%d devices registered\n", HFC_cnt);
-	}
-
-	/* Register the PCI cards */
-	err = pci_register_driver(&hfcmultipci_driver);
-	if (err < 0) {
-		printk(KERN_ERR "error registering pci driver: %x\n", err);
-		return err;
-	}
-
-	return 0;
-}
-
-
-module_init(HFCmulti_init);
-module_exit(HFCmulti_cleanup);
diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
deleted file mode 100644
index 554a1c640321..000000000000
--- a/drivers/isdn/hardware/mISDN/hfcpci.c
+++ /dev/null
@@ -1,2360 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * hfcpci.c     low level driver for CCD's hfc-pci based cards
- *
- * Author     Werner Cornelius (werner@isdn4linux.de)
- *            based on existing driver for CCD hfc ISA cards
- *            type approval valid for HFC-S PCI A based card
- *
- * Copyright 1999  by Werner Cornelius (werner@isdn-development.de)
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- *
- * Module options:
- *
- * debug:
- *	NOTE: only one poll value must be given for all cards
- *	See hfc_pci.h for debug flags.
- *
- * poll:
- *	NOTE: only one poll value must be given for all cards
- *	Give the number of samples for each fifo process.
- *	By default 128 is used. Decrease to reduce delay, increase to
- *	reduce cpu load. If unsure, don't mess with it!
- *	A value of 128 will use controller's interrupt. Other values will
- *	use kernel timer, because the controller will not allow lower values
- *	than 128.
- *	Also note that the value depends on the kernel timer frequency.
- *	If kernel uses a frequency of 1000 Hz, steps of 8 samples are possible.
- *	If the kernel uses 100 Hz, steps of 80 samples are possible.
- *	If the kernel uses 300 Hz, steps of about 26 samples are possible.
- */
-
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/mISDNhw.h>
-#include <linux/slab.h>
-
-#include "hfc_pci.h"
-
-static void hfcpci_softirq(struct timer_list *unused);
-static const char *hfcpci_revision = "2.0";
-
-static int HFC_cnt;
-static uint debug;
-static uint poll, tics;
-static DEFINE_TIMER(hfc_tl, hfcpci_softirq);
-static unsigned long hfc_jiffies;
-
-MODULE_AUTHOR("Karsten Keil");
-MODULE_DESCRIPTION("mISDN driver for CCD's hfc-pci based cards");
-MODULE_LICENSE("GPL");
-module_param(debug, uint, S_IRUGO | S_IWUSR);
-module_param(poll, uint, S_IRUGO | S_IWUSR);
-
-enum {
-	HFC_CCD_2BD0,
-	HFC_CCD_B000,
-	HFC_CCD_B006,
-	HFC_CCD_B007,
-	HFC_CCD_B008,
-	HFC_CCD_B009,
-	HFC_CCD_B00A,
-	HFC_CCD_B00B,
-	HFC_CCD_B00C,
-	HFC_CCD_B100,
-	HFC_CCD_B700,
-	HFC_CCD_B701,
-	HFC_ASUS_0675,
-	HFC_BERKOM_A1T,
-	HFC_BERKOM_TCONCEPT,
-	HFC_ANIGMA_MC145575,
-	HFC_ZOLTRIX_2BD0,
-	HFC_DIGI_DF_M_IOM2_E,
-	HFC_DIGI_DF_M_E,
-	HFC_DIGI_DF_M_IOM2_A,
-	HFC_DIGI_DF_M_A,
-	HFC_ABOCOM_2BD1,
-	HFC_SITECOM_DC105V2,
-};
-
-struct hfcPCI_hw {
-	unsigned char		cirm;
-	unsigned char		ctmt;
-	unsigned char		clkdel;
-	unsigned char		states;
-	unsigned char		conn;
-	unsigned char		mst_m;
-	unsigned char		int_m1;
-	unsigned char		int_m2;
-	unsigned char		sctrl;
-	unsigned char		sctrl_r;
-	unsigned char		sctrl_e;
-	unsigned char		trm;
-	unsigned char		fifo_en;
-	unsigned char		bswapped;
-	unsigned char		protocol;
-	int			nt_timer;
-	unsigned char __iomem	*pci_io; /* start of PCI IO memory */
-	dma_addr_t		dmahandle;
-	void			*fifos; /* FIFO memory */
-	int			last_bfifo_cnt[2];
-	/* marker saving last b-fifo frame count */
-	struct timer_list	timer;
-};
-
-#define	HFC_CFG_MASTER		1
-#define HFC_CFG_SLAVE		2
-#define	HFC_CFG_PCM		3
-#define HFC_CFG_2HFC		4
-#define HFC_CFG_SLAVEHFC	5
-#define HFC_CFG_NEG_F0		6
-#define HFC_CFG_SW_DD_DU	7
-
-#define FLG_HFC_TIMER_T1	16
-#define FLG_HFC_TIMER_T3	17
-
-#define NT_T1_COUNT	1120	/* number of 3.125ms interrupts (3.5s) */
-#define NT_T3_COUNT	31	/* number of 3.125ms interrupts (97 ms) */
-#define CLKDEL_TE	0x0e	/* CLKDEL in TE mode */
-#define CLKDEL_NT	0x6c	/* CLKDEL in NT mode */
-
-
-struct hfc_pci {
-	u_char			subtype;
-	u_char			chanlimit;
-	u_char			initdone;
-	u_long			cfg;
-	u_int			irq;
-	u_int			irqcnt;
-	struct pci_dev		*pdev;
-	struct hfcPCI_hw	hw;
-	spinlock_t		lock;	/* card lock */
-	struct dchannel		dch;
-	struct bchannel		bch[2];
-};
-
-/* Interface functions */
-static void
-enable_hwirq(struct hfc_pci *hc)
-{
-	hc->hw.int_m2 |= HFCPCI_IRQ_ENABLE;
-	Write_hfc(hc, HFCPCI_INT_M2, hc->hw.int_m2);
-}
-
-static void
-disable_hwirq(struct hfc_pci *hc)
-{
-	hc->hw.int_m2 &= ~((u_char)HFCPCI_IRQ_ENABLE);
-	Write_hfc(hc, HFCPCI_INT_M2, hc->hw.int_m2);
-}
-
-/*
- * free hardware resources used by driver
- */
-static void
-release_io_hfcpci(struct hfc_pci *hc)
-{
-	/* disable memory mapped ports + busmaster */
-	pci_write_config_word(hc->pdev, PCI_COMMAND, 0);
-	timer_delete(&hc->hw.timer);
-	dma_free_coherent(&hc->pdev->dev, 0x8000, hc->hw.fifos,
-			  hc->hw.dmahandle);
-	iounmap(hc->hw.pci_io);
-}
-
-/*
- * set mode (NT or TE)
- */
-static void
-hfcpci_setmode(struct hfc_pci *hc)
-{
-	if (hc->hw.protocol == ISDN_P_NT_S0) {
-		hc->hw.clkdel = CLKDEL_NT;	/* ST-Bit delay for NT-Mode */
-		hc->hw.sctrl |= SCTRL_MODE_NT;	/* NT-MODE */
-		hc->hw.states = 1;		/* G1 */
-	} else {
-		hc->hw.clkdel = CLKDEL_TE;	/* ST-Bit delay for TE-Mode */
-		hc->hw.sctrl &= ~SCTRL_MODE_NT;	/* TE-MODE */
-		hc->hw.states = 2;		/* F2 */
-	}
-	Write_hfc(hc, HFCPCI_CLKDEL, hc->hw.clkdel);
-	Write_hfc(hc, HFCPCI_STATES, HFCPCI_LOAD_STATE | hc->hw.states);
-	udelay(10);
-	Write_hfc(hc, HFCPCI_STATES, hc->hw.states | 0x40); /* Deactivate */
-	Write_hfc(hc, HFCPCI_SCTRL, hc->hw.sctrl);
-}
-
-/*
- * function called to reset the HFC PCI chip. A complete software reset of chip
- * and fifos is done.
- */
-static void
-reset_hfcpci(struct hfc_pci *hc)
-{
-	u_char	val;
-	int	cnt = 0;
-
-	printk(KERN_DEBUG "reset_hfcpci: entered\n");
-	val = Read_hfc(hc, HFCPCI_CHIP_ID);
-	printk(KERN_INFO "HFC_PCI: resetting HFC ChipId(%x)\n", val);
-	/* enable memory mapped ports, disable busmaster */
-	pci_write_config_word(hc->pdev, PCI_COMMAND, PCI_ENA_MEMIO);
-	disable_hwirq(hc);
-	/* enable memory ports + busmaster */
-	pci_write_config_word(hc->pdev, PCI_COMMAND,
-			      PCI_ENA_MEMIO + PCI_ENA_MASTER);
-	val = Read_hfc(hc, HFCPCI_STATUS);
-	printk(KERN_DEBUG "HFC-PCI status(%x) before reset\n", val);
-	hc->hw.cirm = HFCPCI_RESET;	/* Reset On */
-	Write_hfc(hc, HFCPCI_CIRM, hc->hw.cirm);
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	mdelay(10);			/* Timeout 10ms */
-	hc->hw.cirm = 0;		/* Reset Off */
-	Write_hfc(hc, HFCPCI_CIRM, hc->hw.cirm);
-	val = Read_hfc(hc, HFCPCI_STATUS);
-	printk(KERN_DEBUG "HFC-PCI status(%x) after reset\n", val);
-	while (cnt < 50000) { /* max 50000 us */
-		udelay(5);
-		cnt += 5;
-		val = Read_hfc(hc, HFCPCI_STATUS);
-		if (!(val & 2))
-			break;
-	}
-	printk(KERN_DEBUG "HFC-PCI status(%x) after %dus\n", val, cnt);
-
-	hc->hw.fifo_en = 0x30;	/* only D fifos enabled */
-
-	hc->hw.bswapped = 0;	/* no exchange */
-	hc->hw.ctmt = HFCPCI_TIM3_125 | HFCPCI_AUTO_TIMER;
-	hc->hw.trm = HFCPCI_BTRANS_THRESMASK; /* no echo connect , threshold */
-	hc->hw.sctrl = 0x40;	/* set tx_lo mode, error in datasheet ! */
-	hc->hw.sctrl_r = 0;
-	hc->hw.sctrl_e = HFCPCI_AUTO_AWAKE;	/* S/T Auto awake */
-	hc->hw.mst_m = 0;
-	if (test_bit(HFC_CFG_MASTER, &hc->cfg))
-		hc->hw.mst_m |= HFCPCI_MASTER;	/* HFC Master Mode */
-	if (test_bit(HFC_CFG_NEG_F0, &hc->cfg))
-		hc->hw.mst_m |= HFCPCI_F0_NEGATIV;
-	Write_hfc(hc, HFCPCI_FIFO_EN, hc->hw.fifo_en);
-	Write_hfc(hc, HFCPCI_TRM, hc->hw.trm);
-	Write_hfc(hc, HFCPCI_SCTRL_E, hc->hw.sctrl_e);
-	Write_hfc(hc, HFCPCI_CTMT, hc->hw.ctmt);
-
-	hc->hw.int_m1 = HFCPCI_INTS_DTRANS | HFCPCI_INTS_DREC |
-		HFCPCI_INTS_L1STATE | HFCPCI_INTS_TIMER;
-	Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1);
-
-	/* Clear already pending ints */
-	val = Read_hfc(hc, HFCPCI_INT_S1);
-
-	/* set NT/TE mode */
-	hfcpci_setmode(hc);
-
-	Write_hfc(hc, HFCPCI_MST_MODE, hc->hw.mst_m);
-	Write_hfc(hc, HFCPCI_SCTRL_R, hc->hw.sctrl_r);
-
-	/*
-	 * Init GCI/IOM2 in master mode
-	 * Slots 0 and 1 are set for B-chan 1 and 2
-	 * D- and monitor/CI channel are not enabled
-	 * STIO1 is used as output for data, B1+B2 from ST->IOM+HFC
-	 * STIO2 is used as data input, B1+B2 from IOM->ST
-	 * ST B-channel send disabled -> continuous 1s
-	 * The IOM slots are always enabled
-	 */
-	if (test_bit(HFC_CFG_PCM, &hc->cfg)) {
-		/* set data flow directions: connect B1,B2: HFC to/from PCM */
-		hc->hw.conn = 0x09;
-	} else {
-		hc->hw.conn = 0x36;	/* set data flow directions */
-		if (test_bit(HFC_CFG_SW_DD_DU, &hc->cfg)) {
-			Write_hfc(hc, HFCPCI_B1_SSL, 0xC0);
-			Write_hfc(hc, HFCPCI_B2_SSL, 0xC1);
-			Write_hfc(hc, HFCPCI_B1_RSL, 0xC0);
-			Write_hfc(hc, HFCPCI_B2_RSL, 0xC1);
-		} else {
-			Write_hfc(hc, HFCPCI_B1_SSL, 0x80);
-			Write_hfc(hc, HFCPCI_B2_SSL, 0x81);
-			Write_hfc(hc, HFCPCI_B1_RSL, 0x80);
-			Write_hfc(hc, HFCPCI_B2_RSL, 0x81);
-		}
-	}
-	Write_hfc(hc, HFCPCI_CONNECT, hc->hw.conn);
-	val = Read_hfc(hc, HFCPCI_INT_S2);
-}
-
-/*
- * Timer function called when kernel timer expires
- */
-static void
-hfcpci_Timer(struct timer_list *t)
-{
-	struct hfc_pci *hc = timer_container_of(hc, t, hw.timer);
-	hc->hw.timer.expires = jiffies + 75;
-	/* WD RESET */
-/*
- *	WriteReg(hc, HFCD_DATA, HFCD_CTMT, hc->hw.ctmt | 0x80);
- *	add_timer(&hc->hw.timer);
- */
-}
-
-
-/*
- * select a b-channel entry matching and active
- */
-static struct bchannel *
-Sel_BCS(struct hfc_pci *hc, int channel)
-{
-	if (test_bit(FLG_ACTIVE, &hc->bch[0].Flags) &&
-	    (hc->bch[0].nr & channel))
-		return &hc->bch[0];
-	else if (test_bit(FLG_ACTIVE, &hc->bch[1].Flags) &&
-		 (hc->bch[1].nr & channel))
-		return &hc->bch[1];
-	else
-		return NULL;
-}
-
-/*
- * clear the desired B-channel rx fifo
- */
-static void
-hfcpci_clear_fifo_rx(struct hfc_pci *hc, int fifo)
-{
-	u_char		fifo_state;
-	struct bzfifo	*bzr;
-
-	if (fifo) {
-		bzr = &((union fifo_area *)(hc->hw.fifos))->b_chans.rxbz_b2;
-		fifo_state = hc->hw.fifo_en & HFCPCI_FIFOEN_B2RX;
-	} else {
-		bzr = &((union fifo_area *)(hc->hw.fifos))->b_chans.rxbz_b1;
-		fifo_state = hc->hw.fifo_en & HFCPCI_FIFOEN_B1RX;
-	}
-	if (fifo_state)
-		hc->hw.fifo_en ^= fifo_state;
-	Write_hfc(hc, HFCPCI_FIFO_EN, hc->hw.fifo_en);
-	hc->hw.last_bfifo_cnt[fifo] = 0;
-	bzr->f1 = MAX_B_FRAMES;
-	bzr->f2 = bzr->f1;	/* init F pointers to remain constant */
-	bzr->za[MAX_B_FRAMES].z1 = cpu_to_le16(B_FIFO_SIZE + B_SUB_VAL - 1);
-	bzr->za[MAX_B_FRAMES].z2 = cpu_to_le16(
-		le16_to_cpu(bzr->za[MAX_B_FRAMES].z1));
-	if (fifo_state)
-		hc->hw.fifo_en |= fifo_state;
-	Write_hfc(hc, HFCPCI_FIFO_EN, hc->hw.fifo_en);
-}
-
-/*
- * clear the desired B-channel tx fifo
- */
-static void hfcpci_clear_fifo_tx(struct hfc_pci *hc, int fifo)
-{
-	u_char		fifo_state;
-	struct bzfifo	*bzt;
-
-	if (fifo) {
-		bzt = &((union fifo_area *)(hc->hw.fifos))->b_chans.txbz_b2;
-		fifo_state = hc->hw.fifo_en & HFCPCI_FIFOEN_B2TX;
-	} else {
-		bzt = &((union fifo_area *)(hc->hw.fifos))->b_chans.txbz_b1;
-		fifo_state = hc->hw.fifo_en & HFCPCI_FIFOEN_B1TX;
-	}
-	if (fifo_state)
-		hc->hw.fifo_en ^= fifo_state;
-	Write_hfc(hc, HFCPCI_FIFO_EN, hc->hw.fifo_en);
-	if (hc->bch[fifo].debug & DEBUG_HW_BCHANNEL)
-		printk(KERN_DEBUG "hfcpci_clear_fifo_tx%d f1(%x) f2(%x) "
-		       "z1(%x) z2(%x) state(%x)\n",
-		       fifo, bzt->f1, bzt->f2,
-		       le16_to_cpu(bzt->za[MAX_B_FRAMES].z1),
-		       le16_to_cpu(bzt->za[MAX_B_FRAMES].z2),
-		       fifo_state);
-	bzt->f2 = MAX_B_FRAMES;
-	bzt->f1 = bzt->f2;	/* init F pointers to remain constant */
-	bzt->za[MAX_B_FRAMES].z1 = cpu_to_le16(B_FIFO_SIZE + B_SUB_VAL - 1);
-	bzt->za[MAX_B_FRAMES].z2 = cpu_to_le16(B_FIFO_SIZE + B_SUB_VAL - 2);
-	if (fifo_state)
-		hc->hw.fifo_en |= fifo_state;
-	Write_hfc(hc, HFCPCI_FIFO_EN, hc->hw.fifo_en);
-	if (hc->bch[fifo].debug & DEBUG_HW_BCHANNEL)
-		printk(KERN_DEBUG
-		       "hfcpci_clear_fifo_tx%d f1(%x) f2(%x) z1(%x) z2(%x)\n",
-		       fifo, bzt->f1, bzt->f2,
-		       le16_to_cpu(bzt->za[MAX_B_FRAMES].z1),
-		       le16_to_cpu(bzt->za[MAX_B_FRAMES].z2));
-}
-
-/*
- * read a complete B-frame out of the buffer
- */
-static void
-hfcpci_empty_bfifo(struct bchannel *bch, struct bzfifo *bz,
-		   u_char *bdata, int count)
-{
-	u_char		*ptr, *ptr1, new_f2;
-	int		maxlen, new_z2;
-	struct zt	*zp;
-
-	if ((bch->debug & DEBUG_HW_BCHANNEL) && !(bch->debug & DEBUG_HW_BFIFO))
-		printk(KERN_DEBUG "hfcpci_empty_fifo\n");
-	zp = &bz->za[bz->f2];	/* point to Z-Regs */
-	new_z2 = le16_to_cpu(zp->z2) + count;	/* new position in fifo */
-	if (new_z2 >= (B_FIFO_SIZE + B_SUB_VAL))
-		new_z2 -= B_FIFO_SIZE;	/* buffer wrap */
-	new_f2 = (bz->f2 + 1) & MAX_B_FRAMES;
-	if ((count > MAX_DATA_SIZE + 3) || (count < 4) ||
-	    (*(bdata + (le16_to_cpu(zp->z1) - B_SUB_VAL)))) {
-		if (bch->debug & DEBUG_HW)
-			printk(KERN_DEBUG "hfcpci_empty_fifo: incoming packet "
-			       "invalid length %d or crc\n", count);
-#ifdef ERROR_STATISTIC
-		bch->err_inv++;
-#endif
-		bz->za[new_f2].z2 = cpu_to_le16(new_z2);
-		bz->f2 = new_f2;	/* next buffer */
-	} else {
-		bch->rx_skb = mI_alloc_skb(count - 3, GFP_ATOMIC);
-		if (!bch->rx_skb) {
-			printk(KERN_WARNING "HFCPCI: receive out of memory\n");
-			return;
-		}
-		count -= 3;
-		ptr = skb_put(bch->rx_skb, count);
-
-		if (le16_to_cpu(zp->z2) + count <= B_FIFO_SIZE + B_SUB_VAL)
-			maxlen = count;		/* complete transfer */
-		else
-			maxlen = B_FIFO_SIZE + B_SUB_VAL -
-				le16_to_cpu(zp->z2);	/* maximum */
-
-		ptr1 = bdata + (le16_to_cpu(zp->z2) - B_SUB_VAL);
-		/* start of data */
-		memcpy(ptr, ptr1, maxlen);	/* copy data */
-		count -= maxlen;
-
-		if (count) {	/* rest remaining */
-			ptr += maxlen;
-			ptr1 = bdata;	/* start of buffer */
-			memcpy(ptr, ptr1, count);	/* rest */
-		}
-		bz->za[new_f2].z2 = cpu_to_le16(new_z2);
-		bz->f2 = new_f2;	/* next buffer */
-		recv_Bchannel(bch, MISDN_ID_ANY, false);
-	}
-}
-
-/*
- * D-channel receive procedure
- */
-static int
-receive_dmsg(struct hfc_pci *hc)
-{
-	struct dchannel	*dch = &hc->dch;
-	int		maxlen;
-	int		rcnt, total;
-	int		count = 5;
-	u_char		*ptr, *ptr1;
-	struct dfifo	*df;
-	struct zt	*zp;
-
-	df = &((union fifo_area *)(hc->hw.fifos))->d_chan.d_rx;
-	while (((df->f1 & D_FREG_MASK) != (df->f2 & D_FREG_MASK)) && count--) {
-		zp = &df->za[df->f2 & D_FREG_MASK];
-		rcnt = le16_to_cpu(zp->z1) - le16_to_cpu(zp->z2);
-		if (rcnt < 0)
-			rcnt += D_FIFO_SIZE;
-		rcnt++;
-		if (dch->debug & DEBUG_HW_DCHANNEL)
-			printk(KERN_DEBUG
-			       "hfcpci recd f1(%d) f2(%d) z1(%x) z2(%x) cnt(%d)\n",
-			       df->f1, df->f2,
-			       le16_to_cpu(zp->z1),
-			       le16_to_cpu(zp->z2),
-			       rcnt);
-
-		if ((rcnt > MAX_DFRAME_LEN + 3) || (rcnt < 4) ||
-		    (df->data[le16_to_cpu(zp->z1)])) {
-			if (dch->debug & DEBUG_HW)
-				printk(KERN_DEBUG
-				       "empty_fifo hfcpci packet inv. len "
-				       "%d or crc %d\n",
-				       rcnt,
-				       df->data[le16_to_cpu(zp->z1)]);
-#ifdef ERROR_STATISTIC
-			cs->err_rx++;
-#endif
-			df->f2 = ((df->f2 + 1) & MAX_D_FRAMES) |
-				(MAX_D_FRAMES + 1);	/* next buffer */
-			df->za[df->f2 & D_FREG_MASK].z2 =
-				cpu_to_le16((le16_to_cpu(zp->z2) + rcnt) &
-					    (D_FIFO_SIZE - 1));
-		} else {
-			dch->rx_skb = mI_alloc_skb(rcnt - 3, GFP_ATOMIC);
-			if (!dch->rx_skb) {
-				printk(KERN_WARNING
-				       "HFC-PCI: D receive out of memory\n");
-				break;
-			}
-			total = rcnt;
-			rcnt -= 3;
-			ptr = skb_put(dch->rx_skb, rcnt);
-
-			if (le16_to_cpu(zp->z2) + rcnt <= D_FIFO_SIZE)
-				maxlen = rcnt;	/* complete transfer */
-			else
-				maxlen = D_FIFO_SIZE - le16_to_cpu(zp->z2);
-			/* maximum */
-
-			ptr1 = df->data + le16_to_cpu(zp->z2);
-			/* start of data */
-			memcpy(ptr, ptr1, maxlen);	/* copy data */
-			rcnt -= maxlen;
-
-			if (rcnt) {	/* rest remaining */
-				ptr += maxlen;
-				ptr1 = df->data;	/* start of buffer */
-				memcpy(ptr, ptr1, rcnt);	/* rest */
-			}
-			df->f2 = ((df->f2 + 1) & MAX_D_FRAMES) |
-				(MAX_D_FRAMES + 1);	/* next buffer */
-			df->za[df->f2 & D_FREG_MASK].z2 = cpu_to_le16((
-									      le16_to_cpu(zp->z2) + total) & (D_FIFO_SIZE - 1));
-			recv_Dchannel(dch);
-		}
-	}
-	return 1;
-}
-
-/*
- * check for transparent receive data and read max one 'poll' size if avail
- */
-static void
-hfcpci_empty_fifo_trans(struct bchannel *bch, struct bzfifo *rxbz,
-			struct bzfifo *txbz, u_char *bdata)
-{
-	__le16	*z1r, *z2r, *z1t, *z2t;
-	int	new_z2, fcnt_rx, fcnt_tx, maxlen;
-	u_char	*ptr, *ptr1;
-
-	z1r = &rxbz->za[MAX_B_FRAMES].z1;	/* pointer to z reg */
-	z2r = z1r + 1;
-	z1t = &txbz->za[MAX_B_FRAMES].z1;
-	z2t = z1t + 1;
-
-	fcnt_rx = le16_to_cpu(*z1r) - le16_to_cpu(*z2r);
-	if (!fcnt_rx)
-		return;	/* no data avail */
-
-	if (fcnt_rx <= 0)
-		fcnt_rx += B_FIFO_SIZE;	/* bytes actually buffered */
-	new_z2 = le16_to_cpu(*z2r) + fcnt_rx;	/* new position in fifo */
-	if (new_z2 >= (B_FIFO_SIZE + B_SUB_VAL))
-		new_z2 -= B_FIFO_SIZE;	/* buffer wrap */
-
-	fcnt_tx = le16_to_cpu(*z2t) - le16_to_cpu(*z1t);
-	if (fcnt_tx <= 0)
-		fcnt_tx += B_FIFO_SIZE;
-	/* fcnt_tx contains available bytes in tx-fifo */
-	fcnt_tx = B_FIFO_SIZE - fcnt_tx;
-	/* remaining bytes to send (bytes in tx-fifo) */
-
-	if (test_bit(FLG_RX_OFF, &bch->Flags)) {
-		bch->dropcnt += fcnt_rx;
-		*z2r = cpu_to_le16(new_z2);
-		return;
-	}
-	maxlen = bchannel_get_rxbuf(bch, fcnt_rx);
-	if (maxlen < 0) {
-		pr_warn("B%d: No bufferspace for %d bytes\n", bch->nr, fcnt_rx);
-	} else {
-		ptr = skb_put(bch->rx_skb, fcnt_rx);
-		if (le16_to_cpu(*z2r) + fcnt_rx <= B_FIFO_SIZE + B_SUB_VAL)
-			maxlen = fcnt_rx;	/* complete transfer */
-		else
-			maxlen = B_FIFO_SIZE + B_SUB_VAL - le16_to_cpu(*z2r);
-		/* maximum */
-
-		ptr1 = bdata + (le16_to_cpu(*z2r) - B_SUB_VAL);
-		/* start of data */
-		memcpy(ptr, ptr1, maxlen);	/* copy data */
-		fcnt_rx -= maxlen;
-
-		if (fcnt_rx) {	/* rest remaining */
-			ptr += maxlen;
-			ptr1 = bdata;	/* start of buffer */
-			memcpy(ptr, ptr1, fcnt_rx);	/* rest */
-		}
-		recv_Bchannel(bch, fcnt_tx, false); /* bch, id, !force */
-	}
-	*z2r = cpu_to_le16(new_z2);		/* new position */
-}
-
-/*
- * B-channel main receive routine
- */
-static void
-main_rec_hfcpci(struct bchannel *bch)
-{
-	struct hfc_pci	*hc = bch->hw;
-	int		rcnt, real_fifo;
-	int		receive = 0, count = 5;
-	struct bzfifo	*txbz, *rxbz;
-	u_char		*bdata;
-	struct zt	*zp;
-
-	if ((bch->nr & 2) && (!hc->hw.bswapped)) {
-		rxbz = &((union fifo_area *)(hc->hw.fifos))->b_chans.rxbz_b2;
-		txbz = &((union fifo_area *)(hc->hw.fifos))->b_chans.txbz_b2;
-		bdata = ((union fifo_area *)(hc->hw.fifos))->b_chans.rxdat_b2;
-		real_fifo = 1;
-	} else {
-		rxbz = &((union fifo_area *)(hc->hw.fifos))->b_chans.rxbz_b1;
-		txbz = &((union fifo_area *)(hc->hw.fifos))->b_chans.txbz_b1;
-		bdata = ((union fifo_area *)(hc->hw.fifos))->b_chans.rxdat_b1;
-		real_fifo = 0;
-	}
-Begin:
-	count--;
-	if (rxbz->f1 != rxbz->f2) {
-		if (bch->debug & DEBUG_HW_BCHANNEL)
-			printk(KERN_DEBUG "hfcpci rec ch(%x) f1(%d) f2(%d)\n",
-			       bch->nr, rxbz->f1, rxbz->f2);
-		zp = &rxbz->za[rxbz->f2];
-
-		rcnt = le16_to_cpu(zp->z1) - le16_to_cpu(zp->z2);
-		if (rcnt < 0)
-			rcnt += B_FIFO_SIZE;
-		rcnt++;
-		if (bch->debug & DEBUG_HW_BCHANNEL)
-			printk(KERN_DEBUG
-			       "hfcpci rec ch(%x) z1(%x) z2(%x) cnt(%d)\n",
-			       bch->nr, le16_to_cpu(zp->z1),
-			       le16_to_cpu(zp->z2), rcnt);
-		hfcpci_empty_bfifo(bch, rxbz, bdata, rcnt);
-		rcnt = rxbz->f1 - rxbz->f2;
-		if (rcnt < 0)
-			rcnt += MAX_B_FRAMES + 1;
-		if (hc->hw.last_bfifo_cnt[real_fifo] > rcnt + 1) {
-			rcnt = 0;
-			hfcpci_clear_fifo_rx(hc, real_fifo);
-		}
-		hc->hw.last_bfifo_cnt[real_fifo] = rcnt;
-		if (rcnt > 1)
-			receive = 1;
-		else
-			receive = 0;
-	} else if (test_bit(FLG_TRANSPARENT, &bch->Flags)) {
-		hfcpci_empty_fifo_trans(bch, rxbz, txbz, bdata);
-		return;
-	} else
-		receive = 0;
-	if (count && receive)
-		goto Begin;
-
-}
-
-/*
- * D-channel send routine
- */
-static void
-hfcpci_fill_dfifo(struct hfc_pci *hc)
-{
-	struct dchannel	*dch = &hc->dch;
-	int		fcnt;
-	int		count, new_z1, maxlen;
-	struct dfifo	*df;
-	u_char		*src, *dst, new_f1;
-
-	if ((dch->debug & DEBUG_HW_DCHANNEL) && !(dch->debug & DEBUG_HW_DFIFO))
-		printk(KERN_DEBUG "%s\n", __func__);
-
-	if (!dch->tx_skb)
-		return;
-	count = dch->tx_skb->len - dch->tx_idx;
-	if (count <= 0)
-		return;
-	df = &((union fifo_area *) (hc->hw.fifos))->d_chan.d_tx;
-
-	if (dch->debug & DEBUG_HW_DFIFO)
-		printk(KERN_DEBUG "%s:f1(%d) f2(%d) z1(f1)(%x)\n", __func__,
-		       df->f1, df->f2,
-		       le16_to_cpu(df->za[df->f1 & D_FREG_MASK].z1));
-	fcnt = df->f1 - df->f2;	/* frame count actually buffered */
-	if (fcnt < 0)
-		fcnt += (MAX_D_FRAMES + 1);	/* if wrap around */
-	if (fcnt > (MAX_D_FRAMES - 1)) {
-		if (dch->debug & DEBUG_HW_DCHANNEL)
-			printk(KERN_DEBUG
-			       "hfcpci_fill_Dfifo more as 14 frames\n");
-#ifdef ERROR_STATISTIC
-		cs->err_tx++;
-#endif
-		return;
-	}
-	/* now determine free bytes in FIFO buffer */
-	maxlen = le16_to_cpu(df->za[df->f2 & D_FREG_MASK].z2) -
-		le16_to_cpu(df->za[df->f1 & D_FREG_MASK].z1) - 1;
-	if (maxlen <= 0)
-		maxlen += D_FIFO_SIZE;	/* count now contains available bytes */
-
-	if (dch->debug & DEBUG_HW_DCHANNEL)
-		printk(KERN_DEBUG "hfcpci_fill_Dfifo count(%d/%d)\n",
-		       count, maxlen);
-	if (count > maxlen) {
-		if (dch->debug & DEBUG_HW_DCHANNEL)
-			printk(KERN_DEBUG "hfcpci_fill_Dfifo no fifo mem\n");
-		return;
-	}
-	new_z1 = (le16_to_cpu(df->za[df->f1 & D_FREG_MASK].z1) + count) &
-		(D_FIFO_SIZE - 1);
-	new_f1 = ((df->f1 + 1) & D_FREG_MASK) | (D_FREG_MASK + 1);
-	src = dch->tx_skb->data + dch->tx_idx;	/* source pointer */
-	dst = df->data + le16_to_cpu(df->za[df->f1 & D_FREG_MASK].z1);
-	maxlen = D_FIFO_SIZE - le16_to_cpu(df->za[df->f1 & D_FREG_MASK].z1);
-	/* end fifo */
-	if (maxlen > count)
-		maxlen = count;	/* limit size */
-	memcpy(dst, src, maxlen);	/* first copy */
-
-	count -= maxlen;	/* remaining bytes */
-	if (count) {
-		dst = df->data;	/* start of buffer */
-		src += maxlen;	/* new position */
-		memcpy(dst, src, count);
-	}
-	df->za[new_f1 & D_FREG_MASK].z1 = cpu_to_le16(new_z1);
-	/* for next buffer */
-	df->za[df->f1 & D_FREG_MASK].z1 = cpu_to_le16(new_z1);
-	/* new pos actual buffer */
-	df->f1 = new_f1;	/* next frame */
-	dch->tx_idx = dch->tx_skb->len;
-}
-
-/*
- * B-channel send routine
- */
-static void
-hfcpci_fill_fifo(struct bchannel *bch)
-{
-	struct hfc_pci	*hc = bch->hw;
-	int		maxlen, fcnt;
-	int		count, new_z1;
-	struct bzfifo	*bz;
-	u_char		*bdata;
-	u_char		new_f1, *src, *dst;
-	__le16 *z1t, *z2t;
-
-	if ((bch->debug & DEBUG_HW_BCHANNEL) && !(bch->debug & DEBUG_HW_BFIFO))
-		printk(KERN_DEBUG "%s\n", __func__);
-	if ((!bch->tx_skb) || bch->tx_skb->len == 0) {
-		if (!test_bit(FLG_FILLEMPTY, &bch->Flags) &&
-		    !test_bit(FLG_TRANSPARENT, &bch->Flags))
-			return;
-		count = HFCPCI_FILLEMPTY;
-	} else {
-		count = bch->tx_skb->len - bch->tx_idx;
-	}
-	if ((bch->nr & 2) && (!hc->hw.bswapped)) {
-		bz = &((union fifo_area *)(hc->hw.fifos))->b_chans.txbz_b2;
-		bdata = ((union fifo_area *)(hc->hw.fifos))->b_chans.txdat_b2;
-	} else {
-		bz = &((union fifo_area *)(hc->hw.fifos))->b_chans.txbz_b1;
-		bdata = ((union fifo_area *)(hc->hw.fifos))->b_chans.txdat_b1;
-	}
-
-	if (test_bit(FLG_TRANSPARENT, &bch->Flags)) {
-		z1t = &bz->za[MAX_B_FRAMES].z1;
-		z2t = z1t + 1;
-		if (bch->debug & DEBUG_HW_BCHANNEL)
-			printk(KERN_DEBUG "hfcpci_fill_fifo_trans ch(%x) "
-			       "cnt(%d) z1(%x) z2(%x)\n", bch->nr, count,
-			       le16_to_cpu(*z1t), le16_to_cpu(*z2t));
-		fcnt = le16_to_cpu(*z2t) - le16_to_cpu(*z1t);
-		if (fcnt <= 0)
-			fcnt += B_FIFO_SIZE;
-		if (test_bit(FLG_FILLEMPTY, &bch->Flags)) {
-			/* fcnt contains available bytes in fifo */
-			if (count > fcnt)
-				count = fcnt;
-			new_z1 = le16_to_cpu(*z1t) + count;
-			/* new buffer Position */
-			if (new_z1 >= (B_FIFO_SIZE + B_SUB_VAL))
-				new_z1 -= B_FIFO_SIZE;	/* buffer wrap */
-			dst = bdata + (le16_to_cpu(*z1t) - B_SUB_VAL);
-			maxlen = (B_FIFO_SIZE + B_SUB_VAL) - le16_to_cpu(*z1t);
-			/* end of fifo */
-			if (bch->debug & DEBUG_HW_BFIFO)
-				printk(KERN_DEBUG "hfcpci_FFt fillempty "
-				       "fcnt(%d) maxl(%d) nz1(%x) dst(%p)\n",
-				       fcnt, maxlen, new_z1, dst);
-			if (maxlen > count)
-				maxlen = count;		/* limit size */
-			memset(dst, bch->fill[0], maxlen); /* first copy */
-			count -= maxlen;		/* remaining bytes */
-			if (count) {
-				dst = bdata;		/* start of buffer */
-				memset(dst, bch->fill[0], count);
-			}
-			*z1t = cpu_to_le16(new_z1);	/* now send data */
-			return;
-		}
-		/* fcnt contains available bytes in fifo */
-		fcnt = B_FIFO_SIZE - fcnt;
-		/* remaining bytes to send (bytes in fifo) */
-
-	next_t_frame:
-		count = bch->tx_skb->len - bch->tx_idx;
-		/* maximum fill shall be poll*2 */
-		if (count > (poll << 1) - fcnt)
-			count = (poll << 1) - fcnt;
-		if (count <= 0)
-			return;
-		/* data is suitable for fifo */
-		new_z1 = le16_to_cpu(*z1t) + count;
-		/* new buffer Position */
-		if (new_z1 >= (B_FIFO_SIZE + B_SUB_VAL))
-			new_z1 -= B_FIFO_SIZE;	/* buffer wrap */
-		src = bch->tx_skb->data + bch->tx_idx;
-		/* source pointer */
-		dst = bdata + (le16_to_cpu(*z1t) - B_SUB_VAL);
-		maxlen = (B_FIFO_SIZE + B_SUB_VAL) - le16_to_cpu(*z1t);
-		/* end of fifo */
-		if (bch->debug & DEBUG_HW_BFIFO)
-			printk(KERN_DEBUG "hfcpci_FFt fcnt(%d) "
-			       "maxl(%d) nz1(%x) dst(%p)\n",
-			       fcnt, maxlen, new_z1, dst);
-		fcnt += count;
-		bch->tx_idx += count;
-		if (maxlen > count)
-			maxlen = count;		/* limit size */
-		memcpy(dst, src, maxlen);	/* first copy */
-		count -= maxlen;	/* remaining bytes */
-		if (count) {
-			dst = bdata;	/* start of buffer */
-			src += maxlen;	/* new position */
-			memcpy(dst, src, count);
-		}
-		*z1t = cpu_to_le16(new_z1);	/* now send data */
-		if (bch->tx_idx < bch->tx_skb->len)
-			return;
-		dev_kfree_skb_any(bch->tx_skb);
-		if (get_next_bframe(bch))
-			goto next_t_frame;
-		return;
-	}
-	if (bch->debug & DEBUG_HW_BCHANNEL)
-		printk(KERN_DEBUG
-		       "%s: ch(%x) f1(%d) f2(%d) z1(f1)(%x)\n",
-		       __func__, bch->nr, bz->f1, bz->f2,
-		       bz->za[bz->f1].z1);
-	fcnt = bz->f1 - bz->f2;	/* frame count actually buffered */
-	if (fcnt < 0)
-		fcnt += (MAX_B_FRAMES + 1);	/* if wrap around */
-	if (fcnt > (MAX_B_FRAMES - 1)) {
-		if (bch->debug & DEBUG_HW_BCHANNEL)
-			printk(KERN_DEBUG
-			       "hfcpci_fill_Bfifo more as 14 frames\n");
-		return;
-	}
-	/* now determine free bytes in FIFO buffer */
-	maxlen = le16_to_cpu(bz->za[bz->f2].z2) -
-		le16_to_cpu(bz->za[bz->f1].z1) - 1;
-	if (maxlen <= 0)
-		maxlen += B_FIFO_SIZE;	/* count now contains available bytes */
-
-	if (bch->debug & DEBUG_HW_BCHANNEL)
-		printk(KERN_DEBUG "hfcpci_fill_fifo ch(%x) count(%d/%d)\n",
-		       bch->nr, count, maxlen);
-
-	if (maxlen < count) {
-		if (bch->debug & DEBUG_HW_BCHANNEL)
-			printk(KERN_DEBUG "hfcpci_fill_fifo no fifo mem\n");
-		return;
-	}
-	new_z1 = le16_to_cpu(bz->za[bz->f1].z1) + count;
-	/* new buffer Position */
-	if (new_z1 >= (B_FIFO_SIZE + B_SUB_VAL))
-		new_z1 -= B_FIFO_SIZE;	/* buffer wrap */
-
-	new_f1 = ((bz->f1 + 1) & MAX_B_FRAMES);
-	src = bch->tx_skb->data + bch->tx_idx;	/* source pointer */
-	dst = bdata + (le16_to_cpu(bz->za[bz->f1].z1) - B_SUB_VAL);
-	maxlen = (B_FIFO_SIZE + B_SUB_VAL) - le16_to_cpu(bz->za[bz->f1].z1);
-	/* end fifo */
-	if (maxlen > count)
-		maxlen = count;	/* limit size */
-	memcpy(dst, src, maxlen);	/* first copy */
-
-	count -= maxlen;	/* remaining bytes */
-	if (count) {
-		dst = bdata;	/* start of buffer */
-		src += maxlen;	/* new position */
-		memcpy(dst, src, count);
-	}
-	bz->za[new_f1].z1 = cpu_to_le16(new_z1);	/* for next buffer */
-	bz->f1 = new_f1;	/* next frame */
-	dev_kfree_skb_any(bch->tx_skb);
-	get_next_bframe(bch);
-}
-
-
-
-/*
- * handle L1 state changes TE
- */
-
-static void
-ph_state_te(struct dchannel *dch)
-{
-	if (dch->debug)
-		printk(KERN_DEBUG "%s: TE newstate %x\n",
-		       __func__, dch->state);
-	switch (dch->state) {
-	case 0:
-		l1_event(dch->l1, HW_RESET_IND);
-		break;
-	case 3:
-		l1_event(dch->l1, HW_DEACT_IND);
-		break;
-	case 5:
-	case 8:
-		l1_event(dch->l1, ANYSIGNAL);
-		break;
-	case 6:
-		l1_event(dch->l1, INFO2);
-		break;
-	case 7:
-		l1_event(dch->l1, INFO4_P8);
-		break;
-	}
-}
-
-/*
- * handle L1 state changes NT
- */
-
-static void
-handle_nt_timer3(struct dchannel *dch) {
-	struct hfc_pci	*hc = dch->hw;
-
-	test_and_clear_bit(FLG_HFC_TIMER_T3, &dch->Flags);
-	hc->hw.int_m1 &= ~HFCPCI_INTS_TIMER;
-	Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1);
-	hc->hw.nt_timer = 0;
-	test_and_set_bit(FLG_ACTIVE, &dch->Flags);
-	if (test_bit(HFC_CFG_MASTER, &hc->cfg))
-		hc->hw.mst_m |= HFCPCI_MASTER;
-	Write_hfc(hc, HFCPCI_MST_MODE, hc->hw.mst_m);
-	_queue_data(&dch->dev.D, PH_ACTIVATE_IND,
-		    MISDN_ID_ANY, 0, NULL, GFP_ATOMIC);
-}
-
-static void
-ph_state_nt(struct dchannel *dch)
-{
-	struct hfc_pci	*hc = dch->hw;
-
-	if (dch->debug)
-		printk(KERN_DEBUG "%s: NT newstate %x\n",
-		       __func__, dch->state);
-	switch (dch->state) {
-	case 2:
-		if (hc->hw.nt_timer < 0) {
-			hc->hw.nt_timer = 0;
-			test_and_clear_bit(FLG_HFC_TIMER_T3, &dch->Flags);
-			test_and_clear_bit(FLG_HFC_TIMER_T1, &dch->Flags);
-			hc->hw.int_m1 &= ~HFCPCI_INTS_TIMER;
-			Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1);
-			/* Clear already pending ints */
-			(void) Read_hfc(hc, HFCPCI_INT_S1);
-			Write_hfc(hc, HFCPCI_STATES, 4 | HFCPCI_LOAD_STATE);
-			udelay(10);
-			Write_hfc(hc, HFCPCI_STATES, 4);
-			dch->state = 4;
-		} else if (hc->hw.nt_timer == 0) {
-			hc->hw.int_m1 |= HFCPCI_INTS_TIMER;
-			Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1);
-			hc->hw.nt_timer = NT_T1_COUNT;
-			hc->hw.ctmt &= ~HFCPCI_AUTO_TIMER;
-			hc->hw.ctmt |= HFCPCI_TIM3_125;
-			Write_hfc(hc, HFCPCI_CTMT, hc->hw.ctmt |
-				  HFCPCI_CLTIMER);
-			test_and_clear_bit(FLG_HFC_TIMER_T3, &dch->Flags);
-			test_and_set_bit(FLG_HFC_TIMER_T1, &dch->Flags);
-			/* allow G2 -> G3 transition */
-			Write_hfc(hc, HFCPCI_STATES, 2 | HFCPCI_NT_G2_G3);
-		} else {
-			Write_hfc(hc, HFCPCI_STATES, 2 | HFCPCI_NT_G2_G3);
-		}
-		break;
-	case 1:
-		hc->hw.nt_timer = 0;
-		test_and_clear_bit(FLG_HFC_TIMER_T3, &dch->Flags);
-		test_and_clear_bit(FLG_HFC_TIMER_T1, &dch->Flags);
-		hc->hw.int_m1 &= ~HFCPCI_INTS_TIMER;
-		Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1);
-		test_and_clear_bit(FLG_ACTIVE, &dch->Flags);
-		hc->hw.mst_m &= ~HFCPCI_MASTER;
-		Write_hfc(hc, HFCPCI_MST_MODE, hc->hw.mst_m);
-		test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags);
-		_queue_data(&dch->dev.D, PH_DEACTIVATE_IND,
-			    MISDN_ID_ANY, 0, NULL, GFP_ATOMIC);
-		break;
-	case 4:
-		hc->hw.nt_timer = 0;
-		test_and_clear_bit(FLG_HFC_TIMER_T3, &dch->Flags);
-		test_and_clear_bit(FLG_HFC_TIMER_T1, &dch->Flags);
-		hc->hw.int_m1 &= ~HFCPCI_INTS_TIMER;
-		Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1);
-		break;
-	case 3:
-		if (!test_and_set_bit(FLG_HFC_TIMER_T3, &dch->Flags)) {
-			if (!test_and_clear_bit(FLG_L2_ACTIVATED,
-						&dch->Flags)) {
-				handle_nt_timer3(dch);
-				break;
-			}
-			test_and_clear_bit(FLG_HFC_TIMER_T1, &dch->Flags);
-			hc->hw.int_m1 |= HFCPCI_INTS_TIMER;
-			Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1);
-			hc->hw.nt_timer = NT_T3_COUNT;
-			hc->hw.ctmt &= ~HFCPCI_AUTO_TIMER;
-			hc->hw.ctmt |= HFCPCI_TIM3_125;
-			Write_hfc(hc, HFCPCI_CTMT, hc->hw.ctmt |
-				  HFCPCI_CLTIMER);
-		}
-		break;
-	}
-}
-
-static void
-ph_state(struct dchannel *dch)
-{
-	struct hfc_pci	*hc = dch->hw;
-
-	if (hc->hw.protocol == ISDN_P_NT_S0) {
-		if (test_bit(FLG_HFC_TIMER_T3, &dch->Flags) &&
-		    hc->hw.nt_timer < 0)
-			handle_nt_timer3(dch);
-		else
-			ph_state_nt(dch);
-	} else
-		ph_state_te(dch);
-}
-
-/*
- * Layer 1 callback function
- */
-static int
-hfc_l1callback(struct dchannel *dch, u_int cmd)
-{
-	struct hfc_pci		*hc = dch->hw;
-
-	switch (cmd) {
-	case INFO3_P8:
-	case INFO3_P10:
-		if (test_bit(HFC_CFG_MASTER, &hc->cfg))
-			hc->hw.mst_m |= HFCPCI_MASTER;
-		Write_hfc(hc, HFCPCI_MST_MODE, hc->hw.mst_m);
-		break;
-	case HW_RESET_REQ:
-		Write_hfc(hc, HFCPCI_STATES, HFCPCI_LOAD_STATE | 3);
-		/* HFC ST 3 */
-		udelay(6);
-		Write_hfc(hc, HFCPCI_STATES, 3);	/* HFC ST 2 */
-		if (test_bit(HFC_CFG_MASTER, &hc->cfg))
-			hc->hw.mst_m |= HFCPCI_MASTER;
-		Write_hfc(hc, HFCPCI_MST_MODE, hc->hw.mst_m);
-		Write_hfc(hc, HFCPCI_STATES, HFCPCI_ACTIVATE |
-			  HFCPCI_DO_ACTION);
-		l1_event(dch->l1, HW_POWERUP_IND);
-		break;
-	case HW_DEACT_REQ:
-		hc->hw.mst_m &= ~HFCPCI_MASTER;
-		Write_hfc(hc, HFCPCI_MST_MODE, hc->hw.mst_m);
-		skb_queue_purge(&dch->squeue);
-		if (dch->tx_skb) {
-			dev_kfree_skb(dch->tx_skb);
-			dch->tx_skb = NULL;
-		}
-		dch->tx_idx = 0;
-		if (dch->rx_skb) {
-			dev_kfree_skb(dch->rx_skb);
-			dch->rx_skb = NULL;
-		}
-		test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
-		if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags))
-			timer_delete(&dch->timer);
-		break;
-	case HW_POWERUP_REQ:
-		Write_hfc(hc, HFCPCI_STATES, HFCPCI_DO_ACTION);
-		break;
-	case PH_ACTIVATE_IND:
-		test_and_set_bit(FLG_ACTIVE, &dch->Flags);
-		_queue_data(&dch->dev.D, cmd, MISDN_ID_ANY, 0, NULL,
-			    GFP_ATOMIC);
-		break;
-	case PH_DEACTIVATE_IND:
-		test_and_clear_bit(FLG_ACTIVE, &dch->Flags);
-		_queue_data(&dch->dev.D, cmd, MISDN_ID_ANY, 0, NULL,
-			    GFP_ATOMIC);
-		break;
-	default:
-		if (dch->debug & DEBUG_HW)
-			printk(KERN_DEBUG "%s: unknown command %x\n",
-			       __func__, cmd);
-		return -1;
-	}
-	return 0;
-}
-
-/*
- * Interrupt handler
- */
-static inline void
-tx_birq(struct bchannel *bch)
-{
-	if (bch->tx_skb && bch->tx_idx < bch->tx_skb->len)
-		hfcpci_fill_fifo(bch);
-	else {
-		dev_kfree_skb_any(bch->tx_skb);
-		if (get_next_bframe(bch))
-			hfcpci_fill_fifo(bch);
-	}
-}
-
-static inline void
-tx_dirq(struct dchannel *dch)
-{
-	if (dch->tx_skb && dch->tx_idx < dch->tx_skb->len)
-		hfcpci_fill_dfifo(dch->hw);
-	else {
-		dev_kfree_skb(dch->tx_skb);
-		if (get_next_dframe(dch))
-			hfcpci_fill_dfifo(dch->hw);
-	}
-}
-
-static irqreturn_t
-hfcpci_int(int intno, void *dev_id)
-{
-	struct hfc_pci	*hc = dev_id;
-	u_char		exval;
-	struct bchannel	*bch;
-	u_char		val, stat;
-
-	spin_lock(&hc->lock);
-	if (!(hc->hw.int_m2 & 0x08)) {
-		spin_unlock(&hc->lock);
-		return IRQ_NONE; /* not initialised */
-	}
-	stat = Read_hfc(hc, HFCPCI_STATUS);
-	if (HFCPCI_ANYINT & stat) {
-		val = Read_hfc(hc, HFCPCI_INT_S1);
-		if (hc->dch.debug & DEBUG_HW_DCHANNEL)
-			printk(KERN_DEBUG
-			       "HFC-PCI: stat(%02x) s1(%02x)\n", stat, val);
-	} else {
-		/* shared */
-		spin_unlock(&hc->lock);
-		return IRQ_NONE;
-	}
-	hc->irqcnt++;
-
-	if (hc->dch.debug & DEBUG_HW_DCHANNEL)
-		printk(KERN_DEBUG "HFC-PCI irq %x\n", val);
-	val &= hc->hw.int_m1;
-	if (val & 0x40) {	/* state machine irq */
-		exval = Read_hfc(hc, HFCPCI_STATES) & 0xf;
-		if (hc->dch.debug & DEBUG_HW_DCHANNEL)
-			printk(KERN_DEBUG "ph_state chg %d->%d\n",
-			       hc->dch.state, exval);
-		hc->dch.state = exval;
-		schedule_event(&hc->dch, FLG_PHCHANGE);
-		val &= ~0x40;
-	}
-	if (val & 0x80) {	/* timer irq */
-		if (hc->hw.protocol == ISDN_P_NT_S0) {
-			if ((--hc->hw.nt_timer) < 0)
-				schedule_event(&hc->dch, FLG_PHCHANGE);
-		}
-		val &= ~0x80;
-		Write_hfc(hc, HFCPCI_CTMT, hc->hw.ctmt | HFCPCI_CLTIMER);
-	}
-	if (val & 0x08) {	/* B1 rx */
-		bch = Sel_BCS(hc, hc->hw.bswapped ? 2 : 1);
-		if (bch)
-			main_rec_hfcpci(bch);
-		else if (hc->dch.debug)
-			printk(KERN_DEBUG "hfcpci spurious 0x08 IRQ\n");
-	}
-	if (val & 0x10) {	/* B2 rx */
-		bch = Sel_BCS(hc, 2);
-		if (bch)
-			main_rec_hfcpci(bch);
-		else if (hc->dch.debug)
-			printk(KERN_DEBUG "hfcpci spurious 0x10 IRQ\n");
-	}
-	if (val & 0x01) {	/* B1 tx */
-		bch = Sel_BCS(hc, hc->hw.bswapped ? 2 : 1);
-		if (bch)
-			tx_birq(bch);
-		else if (hc->dch.debug)
-			printk(KERN_DEBUG "hfcpci spurious 0x01 IRQ\n");
-	}
-	if (val & 0x02) {	/* B2 tx */
-		bch = Sel_BCS(hc, 2);
-		if (bch)
-			tx_birq(bch);
-		else if (hc->dch.debug)
-			printk(KERN_DEBUG "hfcpci spurious 0x02 IRQ\n");
-	}
-	if (val & 0x20)		/* D rx */
-		receive_dmsg(hc);
-	if (val & 0x04) {	/* D tx */
-		if (test_and_clear_bit(FLG_BUSY_TIMER, &hc->dch.Flags))
-			timer_delete(&hc->dch.timer);
-		tx_dirq(&hc->dch);
-	}
-	spin_unlock(&hc->lock);
-	return IRQ_HANDLED;
-}
-
-/*
- * timer callback for D-chan busy resolution. Currently no function
- */
-static void
-hfcpci_dbusy_timer(struct timer_list *t)
-{
-}
-
-/*
- * activate/deactivate hardware for selected channels and mode
- */
-static int
-mode_hfcpci(struct bchannel *bch, int bc, int protocol)
-{
-	struct hfc_pci	*hc = bch->hw;
-	int		fifo2;
-	u_char		rx_slot = 0, tx_slot = 0, pcm_mode;
-
-	if (bch->debug & DEBUG_HW_BCHANNEL)
-		printk(KERN_DEBUG
-		       "HFCPCI bchannel protocol %x-->%x ch %x-->%x\n",
-		       bch->state, protocol, bch->nr, bc);
-
-	fifo2 = bc;
-	pcm_mode = (bc >> 24) & 0xff;
-	if (pcm_mode) { /* PCM SLOT USE */
-		if (!test_bit(HFC_CFG_PCM, &hc->cfg))
-			printk(KERN_WARNING
-			       "%s: pcm channel id without HFC_CFG_PCM\n",
-			       __func__);
-		rx_slot = (bc >> 8) & 0xff;
-		tx_slot = (bc >> 16) & 0xff;
-		bc = bc & 0xff;
-	} else if (test_bit(HFC_CFG_PCM, &hc->cfg) && (protocol > ISDN_P_NONE))
-		printk(KERN_WARNING "%s: no pcm channel id but HFC_CFG_PCM\n",
-		       __func__);
-	if (hc->chanlimit > 1) {
-		hc->hw.bswapped = 0;	/* B1 and B2 normal mode */
-		hc->hw.sctrl_e &= ~0x80;
-	} else {
-		if (bc & 2) {
-			if (protocol != ISDN_P_NONE) {
-				hc->hw.bswapped = 1; /* B1 and B2 exchanged */
-				hc->hw.sctrl_e |= 0x80;
-			} else {
-				hc->hw.bswapped = 0; /* B1 and B2 normal mode */
-				hc->hw.sctrl_e &= ~0x80;
-			}
-			fifo2 = 1;
-		} else {
-			hc->hw.bswapped = 0;	/* B1 and B2 normal mode */
-			hc->hw.sctrl_e &= ~0x80;
-		}
-	}
-	switch (protocol) {
-	case (-1): /* used for init */
-		bch->state = -1;
-		bch->nr = bc;
-		fallthrough;
-	case (ISDN_P_NONE):
-		if (bch->state == ISDN_P_NONE)
-			return 0;
-		if (bc & 2) {
-			hc->hw.sctrl &= ~SCTRL_B2_ENA;
-			hc->hw.sctrl_r &= ~SCTRL_B2_ENA;
-		} else {
-			hc->hw.sctrl &= ~SCTRL_B1_ENA;
-			hc->hw.sctrl_r &= ~SCTRL_B1_ENA;
-		}
-		if (fifo2 & 2) {
-			hc->hw.fifo_en &= ~HFCPCI_FIFOEN_B2;
-			hc->hw.int_m1 &= ~(HFCPCI_INTS_B2TRANS |
-					   HFCPCI_INTS_B2REC);
-		} else {
-			hc->hw.fifo_en &= ~HFCPCI_FIFOEN_B1;
-			hc->hw.int_m1 &= ~(HFCPCI_INTS_B1TRANS |
-					   HFCPCI_INTS_B1REC);
-		}
-#ifdef REVERSE_BITORDER
-		if (bch->nr & 2)
-			hc->hw.cirm &= 0x7f;
-		else
-			hc->hw.cirm &= 0xbf;
-#endif
-		bch->state = ISDN_P_NONE;
-		bch->nr = bc;
-		test_and_clear_bit(FLG_HDLC, &bch->Flags);
-		test_and_clear_bit(FLG_TRANSPARENT, &bch->Flags);
-		break;
-	case (ISDN_P_B_RAW):
-		bch->state = protocol;
-		bch->nr = bc;
-		hfcpci_clear_fifo_rx(hc, (fifo2 & 2) ? 1 : 0);
-		hfcpci_clear_fifo_tx(hc, (fifo2 & 2) ? 1 : 0);
-		if (bc & 2) {
-			hc->hw.sctrl |= SCTRL_B2_ENA;
-			hc->hw.sctrl_r |= SCTRL_B2_ENA;
-#ifdef REVERSE_BITORDER
-			hc->hw.cirm |= 0x80;
-#endif
-		} else {
-			hc->hw.sctrl |= SCTRL_B1_ENA;
-			hc->hw.sctrl_r |= SCTRL_B1_ENA;
-#ifdef REVERSE_BITORDER
-			hc->hw.cirm |= 0x40;
-#endif
-		}
-		if (fifo2 & 2) {
-			hc->hw.fifo_en |= HFCPCI_FIFOEN_B2;
-			if (!tics)
-				hc->hw.int_m1 |= (HFCPCI_INTS_B2TRANS |
-						  HFCPCI_INTS_B2REC);
-			hc->hw.ctmt |= 2;
-			hc->hw.conn &= ~0x18;
-		} else {
-			hc->hw.fifo_en |= HFCPCI_FIFOEN_B1;
-			if (!tics)
-				hc->hw.int_m1 |= (HFCPCI_INTS_B1TRANS |
-						  HFCPCI_INTS_B1REC);
-			hc->hw.ctmt |= 1;
-			hc->hw.conn &= ~0x03;
-		}
-		test_and_set_bit(FLG_TRANSPARENT, &bch->Flags);
-		break;
-	case (ISDN_P_B_HDLC):
-		bch->state = protocol;
-		bch->nr = bc;
-		hfcpci_clear_fifo_rx(hc, (fifo2 & 2) ? 1 : 0);
-		hfcpci_clear_fifo_tx(hc, (fifo2 & 2) ? 1 : 0);
-		if (bc & 2) {
-			hc->hw.sctrl |= SCTRL_B2_ENA;
-			hc->hw.sctrl_r |= SCTRL_B2_ENA;
-		} else {
-			hc->hw.sctrl |= SCTRL_B1_ENA;
-			hc->hw.sctrl_r |= SCTRL_B1_ENA;
-		}
-		if (fifo2 & 2) {
-			hc->hw.last_bfifo_cnt[1] = 0;
-			hc->hw.fifo_en |= HFCPCI_FIFOEN_B2;
-			hc->hw.int_m1 |= (HFCPCI_INTS_B2TRANS |
-					  HFCPCI_INTS_B2REC);
-			hc->hw.ctmt &= ~2;
-			hc->hw.conn &= ~0x18;
-		} else {
-			hc->hw.last_bfifo_cnt[0] = 0;
-			hc->hw.fifo_en |= HFCPCI_FIFOEN_B1;
-			hc->hw.int_m1 |= (HFCPCI_INTS_B1TRANS |
-					  HFCPCI_INTS_B1REC);
-			hc->hw.ctmt &= ~1;
-			hc->hw.conn &= ~0x03;
-		}
-		test_and_set_bit(FLG_HDLC, &bch->Flags);
-		break;
-	default:
-		printk(KERN_DEBUG "prot not known %x\n", protocol);
-		return -ENOPROTOOPT;
-	}
-	if (test_bit(HFC_CFG_PCM, &hc->cfg)) {
-		if ((protocol == ISDN_P_NONE) ||
-		    (protocol == -1)) {	/* init case */
-			rx_slot = 0;
-			tx_slot = 0;
-		} else {
-			if (test_bit(HFC_CFG_SW_DD_DU, &hc->cfg)) {
-				rx_slot |= 0xC0;
-				tx_slot |= 0xC0;
-			} else {
-				rx_slot |= 0x80;
-				tx_slot |= 0x80;
-			}
-		}
-		if (bc & 2) {
-			hc->hw.conn &= 0xc7;
-			hc->hw.conn |= 0x08;
-			printk(KERN_DEBUG "%s: Write_hfc: B2_SSL 0x%x\n",
-			       __func__, tx_slot);
-			printk(KERN_DEBUG "%s: Write_hfc: B2_RSL 0x%x\n",
-			       __func__, rx_slot);
-			Write_hfc(hc, HFCPCI_B2_SSL, tx_slot);
-			Write_hfc(hc, HFCPCI_B2_RSL, rx_slot);
-		} else {
-			hc->hw.conn &= 0xf8;
-			hc->hw.conn |= 0x01;
-			printk(KERN_DEBUG "%s: Write_hfc: B1_SSL 0x%x\n",
-			       __func__, tx_slot);
-			printk(KERN_DEBUG "%s: Write_hfc: B1_RSL 0x%x\n",
-			       __func__, rx_slot);
-			Write_hfc(hc, HFCPCI_B1_SSL, tx_slot);
-			Write_hfc(hc, HFCPCI_B1_RSL, rx_slot);
-		}
-	}
-	Write_hfc(hc, HFCPCI_SCTRL_E, hc->hw.sctrl_e);
-	Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1);
-	Write_hfc(hc, HFCPCI_FIFO_EN, hc->hw.fifo_en);
-	Write_hfc(hc, HFCPCI_SCTRL, hc->hw.sctrl);
-	Write_hfc(hc, HFCPCI_SCTRL_R, hc->hw.sctrl_r);
-	Write_hfc(hc, HFCPCI_CTMT, hc->hw.ctmt);
-	Write_hfc(hc, HFCPCI_CONNECT, hc->hw.conn);
-#ifdef REVERSE_BITORDER
-	Write_hfc(hc, HFCPCI_CIRM, hc->hw.cirm);
-#endif
-	return 0;
-}
-
-static int
-set_hfcpci_rxtest(struct bchannel *bch, int protocol, int chan)
-{
-	struct hfc_pci	*hc = bch->hw;
-
-	if (bch->debug & DEBUG_HW_BCHANNEL)
-		printk(KERN_DEBUG
-		       "HFCPCI bchannel test rx protocol %x-->%x ch %x-->%x\n",
-		       bch->state, protocol, bch->nr, chan);
-	if (bch->nr != chan) {
-		printk(KERN_DEBUG
-		       "HFCPCI rxtest wrong channel parameter %x/%x\n",
-		       bch->nr, chan);
-		return -EINVAL;
-	}
-	switch (protocol) {
-	case (ISDN_P_B_RAW):
-		bch->state = protocol;
-		hfcpci_clear_fifo_rx(hc, (chan & 2) ? 1 : 0);
-		if (chan & 2) {
-			hc->hw.sctrl_r |= SCTRL_B2_ENA;
-			hc->hw.fifo_en |= HFCPCI_FIFOEN_B2RX;
-			if (!tics)
-				hc->hw.int_m1 |= HFCPCI_INTS_B2REC;
-			hc->hw.ctmt |= 2;
-			hc->hw.conn &= ~0x18;
-#ifdef REVERSE_BITORDER
-			hc->hw.cirm |= 0x80;
-#endif
-		} else {
-			hc->hw.sctrl_r |= SCTRL_B1_ENA;
-			hc->hw.fifo_en |= HFCPCI_FIFOEN_B1RX;
-			if (!tics)
-				hc->hw.int_m1 |= HFCPCI_INTS_B1REC;
-			hc->hw.ctmt |= 1;
-			hc->hw.conn &= ~0x03;
-#ifdef REVERSE_BITORDER
-			hc->hw.cirm |= 0x40;
-#endif
-		}
-		break;
-	case (ISDN_P_B_HDLC):
-		bch->state = protocol;
-		hfcpci_clear_fifo_rx(hc, (chan & 2) ? 1 : 0);
-		if (chan & 2) {
-			hc->hw.sctrl_r |= SCTRL_B2_ENA;
-			hc->hw.last_bfifo_cnt[1] = 0;
-			hc->hw.fifo_en |= HFCPCI_FIFOEN_B2RX;
-			hc->hw.int_m1 |= HFCPCI_INTS_B2REC;
-			hc->hw.ctmt &= ~2;
-			hc->hw.conn &= ~0x18;
-		} else {
-			hc->hw.sctrl_r |= SCTRL_B1_ENA;
-			hc->hw.last_bfifo_cnt[0] = 0;
-			hc->hw.fifo_en |= HFCPCI_FIFOEN_B1RX;
-			hc->hw.int_m1 |= HFCPCI_INTS_B1REC;
-			hc->hw.ctmt &= ~1;
-			hc->hw.conn &= ~0x03;
-		}
-		break;
-	default:
-		printk(KERN_DEBUG "prot not known %x\n", protocol);
-		return -ENOPROTOOPT;
-	}
-	Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1);
-	Write_hfc(hc, HFCPCI_FIFO_EN, hc->hw.fifo_en);
-	Write_hfc(hc, HFCPCI_SCTRL_R, hc->hw.sctrl_r);
-	Write_hfc(hc, HFCPCI_CTMT, hc->hw.ctmt);
-	Write_hfc(hc, HFCPCI_CONNECT, hc->hw.conn);
-#ifdef REVERSE_BITORDER
-	Write_hfc(hc, HFCPCI_CIRM, hc->hw.cirm);
-#endif
-	return 0;
-}
-
-static void
-deactivate_bchannel(struct bchannel *bch)
-{
-	struct hfc_pci	*hc = bch->hw;
-	u_long		flags;
-
-	spin_lock_irqsave(&hc->lock, flags);
-	mISDN_clear_bchannel(bch);
-	mode_hfcpci(bch, bch->nr, ISDN_P_NONE);
-	spin_unlock_irqrestore(&hc->lock, flags);
-}
-
-/*
- * Layer 1 B-channel hardware access
- */
-static int
-channel_bctrl(struct bchannel *bch, struct mISDN_ctrl_req *cq)
-{
-	return mISDN_ctrl_bchannel(bch, cq);
-}
-static int
-hfc_bctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
-{
-	struct bchannel	*bch = container_of(ch, struct bchannel, ch);
-	struct hfc_pci	*hc = bch->hw;
-	int		ret = -EINVAL;
-	u_long		flags;
-
-	if (bch->debug & DEBUG_HW)
-		printk(KERN_DEBUG "%s: cmd:%x %p\n", __func__, cmd, arg);
-	switch (cmd) {
-	case HW_TESTRX_RAW:
-		spin_lock_irqsave(&hc->lock, flags);
-		ret = set_hfcpci_rxtest(bch, ISDN_P_B_RAW, (int)(long)arg);
-		spin_unlock_irqrestore(&hc->lock, flags);
-		break;
-	case HW_TESTRX_HDLC:
-		spin_lock_irqsave(&hc->lock, flags);
-		ret = set_hfcpci_rxtest(bch, ISDN_P_B_HDLC, (int)(long)arg);
-		spin_unlock_irqrestore(&hc->lock, flags);
-		break;
-	case HW_TESTRX_OFF:
-		spin_lock_irqsave(&hc->lock, flags);
-		mode_hfcpci(bch, bch->nr, ISDN_P_NONE);
-		spin_unlock_irqrestore(&hc->lock, flags);
-		ret = 0;
-		break;
-	case CLOSE_CHANNEL:
-		test_and_clear_bit(FLG_OPEN, &bch->Flags);
-		deactivate_bchannel(bch);
-		ch->protocol = ISDN_P_NONE;
-		ch->peer = NULL;
-		module_put(THIS_MODULE);
-		ret = 0;
-		break;
-	case CONTROL_CHANNEL:
-		ret = channel_bctrl(bch, arg);
-		break;
-	default:
-		printk(KERN_WARNING "%s: unknown prim(%x)\n",
-		       __func__, cmd);
-	}
-	return ret;
-}
-
-/*
- * Layer2 -> Layer 1 Dchannel data
- */
-static int
-hfcpci_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
-	struct hfc_pci		*hc = dch->hw;
-	int			ret = -EINVAL;
-	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
-	unsigned int		id;
-	u_long			flags;
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		spin_lock_irqsave(&hc->lock, flags);
-		ret = dchannel_senddata(dch, skb);
-		if (ret > 0) { /* direct TX */
-			id = hh->id; /* skb can be freed */
-			hfcpci_fill_dfifo(dch->hw);
-			ret = 0;
-			spin_unlock_irqrestore(&hc->lock, flags);
-			queue_ch_frame(ch, PH_DATA_CNF, id, NULL);
-		} else
-			spin_unlock_irqrestore(&hc->lock, flags);
-		return ret;
-	case PH_ACTIVATE_REQ:
-		spin_lock_irqsave(&hc->lock, flags);
-		if (hc->hw.protocol == ISDN_P_NT_S0) {
-			ret = 0;
-			if (test_bit(HFC_CFG_MASTER, &hc->cfg))
-				hc->hw.mst_m |= HFCPCI_MASTER;
-			Write_hfc(hc, HFCPCI_MST_MODE, hc->hw.mst_m);
-			if (test_bit(FLG_ACTIVE, &dch->Flags)) {
-				spin_unlock_irqrestore(&hc->lock, flags);
-				_queue_data(&dch->dev.D, PH_ACTIVATE_IND,
-					    MISDN_ID_ANY, 0, NULL, GFP_ATOMIC);
-				break;
-			}
-			test_and_set_bit(FLG_L2_ACTIVATED, &dch->Flags);
-			Write_hfc(hc, HFCPCI_STATES, HFCPCI_ACTIVATE |
-				  HFCPCI_DO_ACTION | 1);
-		} else
-			ret = l1_event(dch->l1, hh->prim);
-		spin_unlock_irqrestore(&hc->lock, flags);
-		break;
-	case PH_DEACTIVATE_REQ:
-		test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags);
-		spin_lock_irqsave(&hc->lock, flags);
-		if (hc->hw.protocol == ISDN_P_NT_S0) {
-			struct sk_buff_head free_queue;
-
-			__skb_queue_head_init(&free_queue);
-			/* prepare deactivation */
-			Write_hfc(hc, HFCPCI_STATES, 0x40);
-			skb_queue_splice_init(&dch->squeue, &free_queue);
-			if (dch->tx_skb) {
-				__skb_queue_tail(&free_queue, dch->tx_skb);
-				dch->tx_skb = NULL;
-			}
-			dch->tx_idx = 0;
-			if (dch->rx_skb) {
-				__skb_queue_tail(&free_queue, dch->rx_skb);
-				dch->rx_skb = NULL;
-			}
-			test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
-			if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags))
-				timer_delete(&dch->timer);
-#ifdef FIXME
-			if (test_and_clear_bit(FLG_L1_BUSY, &dch->Flags))
-				dchannel_sched_event(&hc->dch, D_CLEARBUSY);
-#endif
-			hc->hw.mst_m &= ~HFCPCI_MASTER;
-			Write_hfc(hc, HFCPCI_MST_MODE, hc->hw.mst_m);
-			ret = 0;
-			spin_unlock_irqrestore(&hc->lock, flags);
-			__skb_queue_purge(&free_queue);
-		} else {
-			ret = l1_event(dch->l1, hh->prim);
-			spin_unlock_irqrestore(&hc->lock, flags);
-		}
-		break;
-	}
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-/*
- * Layer2 -> Layer 1 Bchannel data
- */
-static int
-hfcpci_l2l1B(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct bchannel		*bch = container_of(ch, struct bchannel, ch);
-	struct hfc_pci		*hc = bch->hw;
-	int			ret = -EINVAL;
-	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
-	unsigned long		flags;
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		spin_lock_irqsave(&hc->lock, flags);
-		ret = bchannel_senddata(bch, skb);
-		if (ret > 0) { /* direct TX */
-			hfcpci_fill_fifo(bch);
-			ret = 0;
-		}
-		spin_unlock_irqrestore(&hc->lock, flags);
-		return ret;
-	case PH_ACTIVATE_REQ:
-		spin_lock_irqsave(&hc->lock, flags);
-		if (!test_and_set_bit(FLG_ACTIVE, &bch->Flags))
-			ret = mode_hfcpci(bch, bch->nr, ch->protocol);
-		else
-			ret = 0;
-		spin_unlock_irqrestore(&hc->lock, flags);
-		if (!ret)
-			_queue_data(ch, PH_ACTIVATE_IND, MISDN_ID_ANY, 0,
-				    NULL, GFP_KERNEL);
-		break;
-	case PH_DEACTIVATE_REQ:
-		deactivate_bchannel(bch);
-		_queue_data(ch, PH_DEACTIVATE_IND, MISDN_ID_ANY, 0,
-			    NULL, GFP_KERNEL);
-		ret = 0;
-		break;
-	}
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-/*
- * called for card init message
- */
-
-static void
-inithfcpci(struct hfc_pci *hc)
-{
-	printk(KERN_DEBUG "inithfcpci: entered\n");
-	timer_setup(&hc->dch.timer, hfcpci_dbusy_timer, 0);
-	hc->chanlimit = 2;
-	mode_hfcpci(&hc->bch[0], 1, -1);
-	mode_hfcpci(&hc->bch[1], 2, -1);
-}
-
-
-static int
-init_card(struct hfc_pci *hc)
-{
-	int	cnt = 3;
-	u_long	flags;
-
-	printk(KERN_DEBUG "init_card: entered\n");
-
-
-	spin_lock_irqsave(&hc->lock, flags);
-	disable_hwirq(hc);
-	spin_unlock_irqrestore(&hc->lock, flags);
-	if (request_irq(hc->irq, hfcpci_int, IRQF_SHARED, "HFC PCI", hc)) {
-		printk(KERN_WARNING
-		       "mISDN: couldn't get interrupt %d\n", hc->irq);
-		return -EIO;
-	}
-	spin_lock_irqsave(&hc->lock, flags);
-	reset_hfcpci(hc);
-	while (cnt) {
-		inithfcpci(hc);
-		/*
-		 * Finally enable IRQ output
-		 * this is only allowed, if an IRQ routine is already
-		 * established for this HFC, so don't do that earlier
-		 */
-		enable_hwirq(hc);
-		spin_unlock_irqrestore(&hc->lock, flags);
-		/* Timeout 80ms */
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout((80 * HZ) / 1000);
-		printk(KERN_INFO "HFC PCI: IRQ %d count %d\n",
-		       hc->irq, hc->irqcnt);
-		/* now switch timer interrupt off */
-		spin_lock_irqsave(&hc->lock, flags);
-		hc->hw.int_m1 &= ~HFCPCI_INTS_TIMER;
-		Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1);
-		/* reinit mode reg */
-		Write_hfc(hc, HFCPCI_MST_MODE, hc->hw.mst_m);
-		if (!hc->irqcnt) {
-			printk(KERN_WARNING
-			       "HFC PCI: IRQ(%d) getting no interrupts "
-			       "during init %d\n", hc->irq, 4 - cnt);
-			if (cnt == 1)
-				break;
-			else {
-				reset_hfcpci(hc);
-				cnt--;
-			}
-		} else {
-			spin_unlock_irqrestore(&hc->lock, flags);
-			hc->initdone = 1;
-			return 0;
-		}
-	}
-	disable_hwirq(hc);
-	spin_unlock_irqrestore(&hc->lock, flags);
-	free_irq(hc->irq, hc);
-	return -EIO;
-}
-
-static int
-channel_ctrl(struct hfc_pci *hc, struct mISDN_ctrl_req *cq)
-{
-	int	ret = 0;
-	u_char	slot;
-
-	switch (cq->op) {
-	case MISDN_CTRL_GETOP:
-		cq->op = MISDN_CTRL_LOOP | MISDN_CTRL_CONNECT |
-			 MISDN_CTRL_DISCONNECT | MISDN_CTRL_L1_TIMER3;
-		break;
-	case MISDN_CTRL_LOOP:
-		/* channel 0 disabled loop */
-		if (cq->channel < 0 || cq->channel > 2) {
-			ret = -EINVAL;
-			break;
-		}
-		if (cq->channel & 1) {
-			if (test_bit(HFC_CFG_SW_DD_DU, &hc->cfg))
-				slot = 0xC0;
-			else
-				slot = 0x80;
-			printk(KERN_DEBUG "%s: Write_hfc: B1_SSL/RSL 0x%x\n",
-			       __func__, slot);
-			Write_hfc(hc, HFCPCI_B1_SSL, slot);
-			Write_hfc(hc, HFCPCI_B1_RSL, slot);
-			hc->hw.conn = (hc->hw.conn & ~7) | 6;
-			Write_hfc(hc, HFCPCI_CONNECT, hc->hw.conn);
-		}
-		if (cq->channel & 2) {
-			if (test_bit(HFC_CFG_SW_DD_DU, &hc->cfg))
-				slot = 0xC1;
-			else
-				slot = 0x81;
-			printk(KERN_DEBUG "%s: Write_hfc: B2_SSL/RSL 0x%x\n",
-			       __func__, slot);
-			Write_hfc(hc, HFCPCI_B2_SSL, slot);
-			Write_hfc(hc, HFCPCI_B2_RSL, slot);
-			hc->hw.conn = (hc->hw.conn & ~0x38) | 0x30;
-			Write_hfc(hc, HFCPCI_CONNECT, hc->hw.conn);
-		}
-		if (cq->channel & 3)
-			hc->hw.trm |= 0x80;	/* enable IOM-loop */
-		else {
-			hc->hw.conn = (hc->hw.conn & ~0x3f) | 0x09;
-			Write_hfc(hc, HFCPCI_CONNECT, hc->hw.conn);
-			hc->hw.trm &= 0x7f;	/* disable IOM-loop */
-		}
-		Write_hfc(hc, HFCPCI_TRM, hc->hw.trm);
-		break;
-	case MISDN_CTRL_CONNECT:
-		if (cq->channel == cq->p1) {
-			ret = -EINVAL;
-			break;
-		}
-		if (cq->channel < 1 || cq->channel > 2 ||
-		    cq->p1 < 1 || cq->p1 > 2) {
-			ret = -EINVAL;
-			break;
-		}
-		if (test_bit(HFC_CFG_SW_DD_DU, &hc->cfg))
-			slot = 0xC0;
-		else
-			slot = 0x80;
-		printk(KERN_DEBUG "%s: Write_hfc: B1_SSL/RSL 0x%x\n",
-		       __func__, slot);
-		Write_hfc(hc, HFCPCI_B1_SSL, slot);
-		Write_hfc(hc, HFCPCI_B2_RSL, slot);
-		if (test_bit(HFC_CFG_SW_DD_DU, &hc->cfg))
-			slot = 0xC1;
-		else
-			slot = 0x81;
-		printk(KERN_DEBUG "%s: Write_hfc: B2_SSL/RSL 0x%x\n",
-		       __func__, slot);
-		Write_hfc(hc, HFCPCI_B2_SSL, slot);
-		Write_hfc(hc, HFCPCI_B1_RSL, slot);
-		hc->hw.conn = (hc->hw.conn & ~0x3f) | 0x36;
-		Write_hfc(hc, HFCPCI_CONNECT, hc->hw.conn);
-		hc->hw.trm |= 0x80;
-		Write_hfc(hc, HFCPCI_TRM, hc->hw.trm);
-		break;
-	case MISDN_CTRL_DISCONNECT:
-		hc->hw.conn = (hc->hw.conn & ~0x3f) | 0x09;
-		Write_hfc(hc, HFCPCI_CONNECT, hc->hw.conn);
-		hc->hw.trm &= 0x7f;	/* disable IOM-loop */
-		break;
-	case MISDN_CTRL_L1_TIMER3:
-		ret = l1_event(hc->dch.l1, HW_TIMER3_VALUE | (cq->p1 & 0xff));
-		break;
-	default:
-		printk(KERN_WARNING "%s: unknown Op %x\n",
-		       __func__, cq->op);
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static int
-open_dchannel(struct hfc_pci *hc, struct mISDNchannel *ch,
-	      struct channel_req *rq)
-{
-	int err = 0;
-
-	if (debug & DEBUG_HW_OPEN)
-		printk(KERN_DEBUG "%s: dev(%d) open from %p\n", __func__,
-		       hc->dch.dev.id, __builtin_return_address(0));
-	if (rq->protocol == ISDN_P_NONE)
-		return -EINVAL;
-	if (rq->adr.channel == 1) {
-		/* TODO: E-Channel */
-		return -EINVAL;
-	}
-	if (!hc->initdone) {
-		if (rq->protocol == ISDN_P_TE_S0) {
-			err = create_l1(&hc->dch, hfc_l1callback);
-			if (err)
-				return err;
-		}
-		hc->hw.protocol = rq->protocol;
-		ch->protocol = rq->protocol;
-		err = init_card(hc);
-		if (err)
-			return err;
-	} else {
-		if (rq->protocol != ch->protocol) {
-			if (hc->hw.protocol == ISDN_P_TE_S0)
-				l1_event(hc->dch.l1, CLOSE_CHANNEL);
-			if (rq->protocol == ISDN_P_TE_S0) {
-				err = create_l1(&hc->dch, hfc_l1callback);
-				if (err)
-					return err;
-			}
-			hc->hw.protocol = rq->protocol;
-			ch->protocol = rq->protocol;
-			hfcpci_setmode(hc);
-		}
-	}
-
-	if (((ch->protocol == ISDN_P_NT_S0) && (hc->dch.state == 3)) ||
-	    ((ch->protocol == ISDN_P_TE_S0) && (hc->dch.state == 7))) {
-		_queue_data(ch, PH_ACTIVATE_IND, MISDN_ID_ANY,
-			    0, NULL, GFP_KERNEL);
-	}
-	rq->ch = ch;
-	if (!try_module_get(THIS_MODULE))
-		printk(KERN_WARNING "%s:cannot get module\n", __func__);
-	return 0;
-}
-
-static int
-open_bchannel(struct hfc_pci *hc, struct channel_req *rq)
-{
-	struct bchannel		*bch;
-
-	if (rq->adr.channel == 0 || rq->adr.channel > 2)
-		return -EINVAL;
-	if (rq->protocol == ISDN_P_NONE)
-		return -EINVAL;
-	bch = &hc->bch[rq->adr.channel - 1];
-	if (test_and_set_bit(FLG_OPEN, &bch->Flags))
-		return -EBUSY; /* b-channel can be only open once */
-	bch->ch.protocol = rq->protocol;
-	rq->ch = &bch->ch; /* TODO: E-channel */
-	if (!try_module_get(THIS_MODULE))
-		printk(KERN_WARNING "%s:cannot get module\n", __func__);
-	return 0;
-}
-
-/*
- * device control function
- */
-static int
-hfc_dctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
-{
-	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
-	struct hfc_pci		*hc = dch->hw;
-	struct channel_req	*rq;
-	int			err = 0;
-
-	if (dch->debug & DEBUG_HW)
-		printk(KERN_DEBUG "%s: cmd:%x %p\n",
-		       __func__, cmd, arg);
-	switch (cmd) {
-	case OPEN_CHANNEL:
-		rq = arg;
-		if ((rq->protocol == ISDN_P_TE_S0) ||
-		    (rq->protocol == ISDN_P_NT_S0))
-			err = open_dchannel(hc, ch, rq);
-		else
-			err = open_bchannel(hc, rq);
-		break;
-	case CLOSE_CHANNEL:
-		if (debug & DEBUG_HW_OPEN)
-			printk(KERN_DEBUG "%s: dev(%d) close from %p\n",
-			       __func__, hc->dch.dev.id,
-			       __builtin_return_address(0));
-		module_put(THIS_MODULE);
-		break;
-	case CONTROL_CHANNEL:
-		err = channel_ctrl(hc, arg);
-		break;
-	default:
-		if (dch->debug & DEBUG_HW)
-			printk(KERN_DEBUG "%s: unknown command %x\n",
-			       __func__, cmd);
-		return -EINVAL;
-	}
-	return err;
-}
-
-static int
-setup_hw(struct hfc_pci *hc)
-{
-	void	*buffer;
-
-	printk(KERN_INFO "mISDN: HFC-PCI driver %s\n", hfcpci_revision);
-	hc->hw.cirm = 0;
-	hc->dch.state = 0;
-	pci_set_master(hc->pdev);
-	if (!hc->irq) {
-		printk(KERN_WARNING "HFC-PCI: No IRQ for PCI card found\n");
-		return -EINVAL;
-	}
-	hc->hw.pci_io =
-		(char __iomem *)(unsigned long)hc->pdev->resource[1].start;
-
-	if (!hc->hw.pci_io) {
-		printk(KERN_WARNING "HFC-PCI: No IO-Mem for PCI card found\n");
-		return -ENOMEM;
-	}
-	/* Allocate memory for FIFOS */
-	/* the memory needs to be on a 32k boundary within the first 4G */
-	if (dma_set_mask(&hc->pdev->dev, 0xFFFF8000)) {
-		printk(KERN_WARNING
-		       "HFC-PCI: No usable DMA configuration!\n");
-		return -EIO;
-	}
-	buffer = dma_alloc_coherent(&hc->pdev->dev, 0x8000, &hc->hw.dmahandle,
-				    GFP_KERNEL);
-	/* We silently assume the address is okay if nonzero */
-	if (!buffer) {
-		printk(KERN_WARNING
-		       "HFC-PCI: Error allocating memory for FIFO!\n");
-		return -ENOMEM;
-	}
-	hc->hw.fifos = buffer;
-	pci_write_config_dword(hc->pdev, 0x80, hc->hw.dmahandle);
-	hc->hw.pci_io = ioremap((ulong) hc->hw.pci_io, 256);
-	if (unlikely(!hc->hw.pci_io)) {
-		printk(KERN_WARNING
-		       "HFC-PCI: Error in ioremap for PCI!\n");
-		dma_free_coherent(&hc->pdev->dev, 0x8000, hc->hw.fifos,
-				  hc->hw.dmahandle);
-		return -ENOMEM;
-	}
-
-	printk(KERN_INFO
-	       "HFC-PCI: defined at mem %#lx fifo %p(%pad) IRQ %d HZ %d\n",
-	       (u_long) hc->hw.pci_io, hc->hw.fifos,
-	       &hc->hw.dmahandle, hc->irq, HZ);
-
-	/* enable memory mapped ports, disable busmaster */
-	pci_write_config_word(hc->pdev, PCI_COMMAND, PCI_ENA_MEMIO);
-	hc->hw.int_m2 = 0;
-	disable_hwirq(hc);
-	hc->hw.int_m1 = 0;
-	Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1);
-	/* At this point the needed PCI config is done */
-	/* fifos are still not enabled */
-	timer_setup(&hc->hw.timer, hfcpci_Timer, 0);
-	/* default PCM master */
-	test_and_set_bit(HFC_CFG_MASTER, &hc->cfg);
-	return 0;
-}
-
-static void
-release_card(struct hfc_pci *hc) {
-	u_long	flags;
-
-	spin_lock_irqsave(&hc->lock, flags);
-	hc->hw.int_m2 = 0; /* interrupt output off ! */
-	disable_hwirq(hc);
-	mode_hfcpci(&hc->bch[0], 1, ISDN_P_NONE);
-	mode_hfcpci(&hc->bch[1], 2, ISDN_P_NONE);
-	if (hc->dch.timer.function != NULL) {
-		timer_delete(&hc->dch.timer);
-		hc->dch.timer.function = NULL;
-	}
-	spin_unlock_irqrestore(&hc->lock, flags);
-	if (hc->hw.protocol == ISDN_P_TE_S0)
-		l1_event(hc->dch.l1, CLOSE_CHANNEL);
-	if (hc->initdone)
-		free_irq(hc->irq, hc);
-	release_io_hfcpci(hc); /* must release after free_irq! */
-	mISDN_unregister_device(&hc->dch.dev);
-	mISDN_freebchannel(&hc->bch[1]);
-	mISDN_freebchannel(&hc->bch[0]);
-	mISDN_freedchannel(&hc->dch);
-	pci_set_drvdata(hc->pdev, NULL);
-	kfree(hc);
-}
-
-static int
-setup_card(struct hfc_pci *card)
-{
-	int		err = -EINVAL;
-	u_int		i;
-	char		name[MISDN_MAX_IDLEN];
-
-	card->dch.debug = debug;
-	spin_lock_init(&card->lock);
-	mISDN_initdchannel(&card->dch, MAX_DFRAME_LEN_L1, ph_state);
-	card->dch.hw = card;
-	card->dch.dev.Dprotocols = (1 << ISDN_P_TE_S0) | (1 << ISDN_P_NT_S0);
-	card->dch.dev.Bprotocols = (1 << (ISDN_P_B_RAW & ISDN_P_B_MASK)) |
-		(1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK));
-	card->dch.dev.D.send = hfcpci_l2l1D;
-	card->dch.dev.D.ctrl = hfc_dctrl;
-	card->dch.dev.nrbchan = 2;
-	for (i = 0; i < 2; i++) {
-		card->bch[i].nr = i + 1;
-		set_channelmap(i + 1, card->dch.dev.channelmap);
-		card->bch[i].debug = debug;
-		mISDN_initbchannel(&card->bch[i], MAX_DATA_MEM, poll >> 1);
-		card->bch[i].hw = card;
-		card->bch[i].ch.send = hfcpci_l2l1B;
-		card->bch[i].ch.ctrl = hfc_bctrl;
-		card->bch[i].ch.nr = i + 1;
-		list_add(&card->bch[i].ch.list, &card->dch.dev.bchannels);
-	}
-	err = setup_hw(card);
-	if (err)
-		goto error;
-	snprintf(name, MISDN_MAX_IDLEN - 1, "hfc-pci.%d", HFC_cnt + 1);
-	err = mISDN_register_device(&card->dch.dev, &card->pdev->dev, name);
-	if (err)
-		goto error;
-	HFC_cnt++;
-	printk(KERN_INFO "HFC %d cards installed\n", HFC_cnt);
-	return 0;
-error:
-	mISDN_freebchannel(&card->bch[1]);
-	mISDN_freebchannel(&card->bch[0]);
-	mISDN_freedchannel(&card->dch);
-	kfree(card);
-	return err;
-}
-
-/* private data in the PCI devices list */
-struct _hfc_map {
-	u_int	subtype;
-	u_int	flag;
-	char	*name;
-};
-
-static const struct _hfc_map hfc_map[] =
-{
-	{HFC_CCD_2BD0, 0, "CCD/Billion/Asuscom 2BD0"},
-	{HFC_CCD_B000, 0, "Billion B000"},
-	{HFC_CCD_B006, 0, "Billion B006"},
-	{HFC_CCD_B007, 0, "Billion B007"},
-	{HFC_CCD_B008, 0, "Billion B008"},
-	{HFC_CCD_B009, 0, "Billion B009"},
-	{HFC_CCD_B00A, 0, "Billion B00A"},
-	{HFC_CCD_B00B, 0, "Billion B00B"},
-	{HFC_CCD_B00C, 0, "Billion B00C"},
-	{HFC_CCD_B100, 0, "Seyeon B100"},
-	{HFC_CCD_B700, 0, "Primux II S0 B700"},
-	{HFC_CCD_B701, 0, "Primux II S0 NT B701"},
-	{HFC_ABOCOM_2BD1, 0, "Abocom/Magitek 2BD1"},
-	{HFC_ASUS_0675, 0, "Asuscom/Askey 675"},
-	{HFC_BERKOM_TCONCEPT, 0, "German telekom T-Concept"},
-	{HFC_BERKOM_A1T, 0, "German telekom A1T"},
-	{HFC_ANIGMA_MC145575, 0, "Motorola MC145575"},
-	{HFC_ZOLTRIX_2BD0, 0, "Zoltrix 2BD0"},
-	{HFC_DIGI_DF_M_IOM2_E, 0,
-	 "Digi International DataFire Micro V IOM2 (Europe)"},
-	{HFC_DIGI_DF_M_E, 0,
-	 "Digi International DataFire Micro V (Europe)"},
-	{HFC_DIGI_DF_M_IOM2_A, 0,
-	 "Digi International DataFire Micro V IOM2 (North America)"},
-	{HFC_DIGI_DF_M_A, 0,
-	 "Digi International DataFire Micro V (North America)"},
-	{HFC_SITECOM_DC105V2, 0, "Sitecom Connectivity DC-105 ISDN TA"},
-	{},
-};
-
-static const struct pci_device_id hfc_ids[] =
-{
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_2BD0),
-	  (unsigned long) &hfc_map[0] },
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_B000),
-	  (unsigned long) &hfc_map[1] },
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_B006),
-	  (unsigned long) &hfc_map[2] },
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_B007),
-	  (unsigned long) &hfc_map[3] },
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_B008),
-	  (unsigned long) &hfc_map[4] },
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_B009),
-	  (unsigned long) &hfc_map[5] },
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_B00A),
-	  (unsigned long) &hfc_map[6] },
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_B00B),
-	  (unsigned long) &hfc_map[7] },
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_B00C),
-	  (unsigned long) &hfc_map[8] },
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_B100),
-	  (unsigned long) &hfc_map[9] },
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_B700),
-	  (unsigned long) &hfc_map[10] },
-	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_B701),
-	  (unsigned long) &hfc_map[11] },
-	{ PCI_VDEVICE(ABOCOM, PCI_DEVICE_ID_ABOCOM_2BD1),
-	  (unsigned long) &hfc_map[12] },
-	{ PCI_VDEVICE(ASUSTEK, PCI_DEVICE_ID_ASUSTEK_0675),
-	  (unsigned long) &hfc_map[13] },
-	{ PCI_VDEVICE(BERKOM, PCI_DEVICE_ID_BERKOM_T_CONCEPT),
-	  (unsigned long) &hfc_map[14] },
-	{ PCI_VDEVICE(BERKOM, PCI_DEVICE_ID_BERKOM_A1T),
-	  (unsigned long) &hfc_map[15] },
-	{ PCI_VDEVICE(ANIGMA, PCI_DEVICE_ID_ANIGMA_MC145575),
-	  (unsigned long) &hfc_map[16] },
-	{ PCI_VDEVICE(ZOLTRIX, PCI_DEVICE_ID_ZOLTRIX_2BD0),
-	  (unsigned long) &hfc_map[17] },
-	{ PCI_VDEVICE(DIGI, PCI_DEVICE_ID_DIGI_DF_M_IOM2_E),
-	  (unsigned long) &hfc_map[18] },
-	{ PCI_VDEVICE(DIGI, PCI_DEVICE_ID_DIGI_DF_M_E),
-	  (unsigned long) &hfc_map[19] },
-	{ PCI_VDEVICE(DIGI, PCI_DEVICE_ID_DIGI_DF_M_IOM2_A),
-	  (unsigned long) &hfc_map[20] },
-	{ PCI_VDEVICE(DIGI, PCI_DEVICE_ID_DIGI_DF_M_A),
-	  (unsigned long) &hfc_map[21] },
-	{ PCI_VDEVICE(SITECOM, PCI_DEVICE_ID_SITECOM_DC105V2),
-	  (unsigned long) &hfc_map[22] },
-	{},
-};
-
-static int
-hfc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-	int		err = -ENOMEM;
-	struct hfc_pci	*card;
-	struct _hfc_map	*m = (struct _hfc_map *)ent->driver_data;
-
-	card = kzalloc_obj(struct hfc_pci);
-	if (!card) {
-		printk(KERN_ERR "No kmem for HFC card\n");
-		return err;
-	}
-	card->pdev = pdev;
-	card->subtype = m->subtype;
-	err = pci_enable_device(pdev);
-	if (err) {
-		kfree(card);
-		return err;
-	}
-
-	printk(KERN_INFO "mISDN_hfcpci: found adapter %s at %s\n",
-	       m->name, pci_name(pdev));
-
-	card->irq = pdev->irq;
-	pci_set_drvdata(pdev, card);
-	err = setup_card(card);
-	if (err)
-		pci_set_drvdata(pdev, NULL);
-	return err;
-}
-
-static void
-hfc_remove_pci(struct pci_dev *pdev)
-{
-	struct hfc_pci	*card = pci_get_drvdata(pdev);
-
-	if (card)
-		release_card(card);
-	else
-		if (debug)
-			printk(KERN_DEBUG "%s: drvdata already removed\n",
-			       __func__);
-}
-
-
-static struct pci_driver hfc_driver = {
-	.name = "hfcpci",
-	.probe = hfc_probe,
-	.remove = hfc_remove_pci,
-	.id_table = hfc_ids,
-};
-
-static int
-_hfcpci_softirq(struct device *dev, void *unused)
-{
-	struct hfc_pci  *hc = dev_get_drvdata(dev);
-	struct bchannel *bch;
-	if (hc == NULL)
-		return 0;
-
-	if (hc->hw.int_m2 & HFCPCI_IRQ_ENABLE) {
-		spin_lock_irq(&hc->lock);
-		bch = Sel_BCS(hc, hc->hw.bswapped ? 2 : 1);
-		if (bch && bch->state == ISDN_P_B_RAW) { /* B1 rx&tx */
-			main_rec_hfcpci(bch);
-			tx_birq(bch);
-		}
-		bch = Sel_BCS(hc, hc->hw.bswapped ? 1 : 2);
-		if (bch && bch->state == ISDN_P_B_RAW) { /* B2 rx&tx */
-			main_rec_hfcpci(bch);
-			tx_birq(bch);
-		}
-		spin_unlock_irq(&hc->lock);
-	}
-	return 0;
-}
-
-static void
-hfcpci_softirq(struct timer_list *unused)
-{
-	WARN_ON_ONCE(driver_for_each_device(&hfc_driver.driver, NULL, NULL,
-				      _hfcpci_softirq) != 0);
-
-	/* if next event would be in the past ... */
-	if ((s32)(hfc_jiffies + tics - jiffies) <= 0)
-		hfc_jiffies = jiffies + 1;
-	else
-		hfc_jiffies += tics;
-	mod_timer(&hfc_tl, hfc_jiffies);
-}
-
-static int __init
-HFC_init(void)
-{
-	int		err;
-
-	if (!poll)
-		poll = HFCPCI_BTRANS_THRESHOLD;
-
-	if (poll != HFCPCI_BTRANS_THRESHOLD) {
-		tics = (poll * HZ) / 8000;
-		if (tics < 1)
-			tics = 1;
-		poll = (tics * 8000) / HZ;
-		if (poll > 256 || poll < 8) {
-			printk(KERN_ERR "%s: Wrong poll value %d not in range "
-			       "of 8..256.\n", __func__, poll);
-			err = -EINVAL;
-			return err;
-		}
-	}
-	if (poll != HFCPCI_BTRANS_THRESHOLD) {
-		printk(KERN_INFO "%s: Using alternative poll value of %d\n",
-		       __func__, poll);
-		hfc_jiffies = jiffies + tics;
-		mod_timer(&hfc_tl, hfc_jiffies);
-	} else
-		tics = 0; /* indicate the use of controller's timer */
-
-	err = pci_register_driver(&hfc_driver);
-	if (err) {
-		if (timer_pending(&hfc_tl))
-			timer_delete(&hfc_tl);
-	}
-
-	return err;
-}
-
-static void __exit
-HFC_cleanup(void)
-{
-	timer_delete_sync(&hfc_tl);
-
-	pci_unregister_driver(&hfc_driver);
-}
-
-module_init(HFC_init);
-module_exit(HFC_cleanup);
-
-MODULE_DEVICE_TABLE(pci, hfc_ids);
diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c
deleted file mode 100644
index 227babe83879..000000000000
--- a/drivers/isdn/hardware/mISDN/hfcsusb.c
+++ /dev/null
@@ -1,2157 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* hfcsusb.c
- * mISDN driver for Colognechip HFC-S USB chip
- *
- * Copyright 2001 by Peter Sprenger (sprenger@moving-bytes.de)
- * Copyright 2008 by Martin Bachem (info@bachem-it.com)
- *
- * module params
- *   debug=<n>, default=0, with n=0xHHHHGGGG
- *      H - l1 driver flags described in hfcsusb.h
- *      G - common mISDN debug flags described at mISDNhw.h
- *
- *   poll=<n>, default 128
- *     n : burst size of PH_DATA_IND at transparent rx data
- *
- * Revision: 0.3.3 (socket), 2008-11-05
- */
-
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/usb.h>
-#include <linux/mISDNhw.h>
-#include <linux/slab.h>
-#include "hfcsusb.h"
-
-static unsigned int debug;
-static int poll = DEFAULT_TRANSP_BURST_SZ;
-
-static LIST_HEAD(HFClist);
-static DEFINE_RWLOCK(HFClock);
-
-
-MODULE_AUTHOR("Martin Bachem");
-MODULE_DESCRIPTION("mISDN driver for Colognechip HFC-S USB chip");
-MODULE_LICENSE("GPL");
-module_param(debug, uint, S_IRUGO | S_IWUSR);
-module_param(poll, int, 0);
-
-static int hfcsusb_cnt;
-
-/* some function prototypes */
-static void hfcsusb_ph_command(struct hfcsusb *hw, u_char command);
-static void release_hw(struct hfcsusb *hw);
-static void reset_hfcsusb(struct hfcsusb *hw);
-static void setPortMode(struct hfcsusb *hw);
-static void hfcsusb_start_endpoint(struct hfcsusb *hw, int channel);
-static void hfcsusb_stop_endpoint(struct hfcsusb *hw, int channel);
-static int  hfcsusb_setup_bch(struct bchannel *bch, int protocol);
-static void deactivate_bchannel(struct bchannel *bch);
-static int  hfcsusb_ph_info(struct hfcsusb *hw);
-
-/* start next background transfer for control channel */
-static void
-ctrl_start_transfer(struct hfcsusb *hw)
-{
-	if (debug & DBG_HFC_CALL_TRACE)
-		printk(KERN_DEBUG "%s: %s\n", hw->name, __func__);
-
-	if (hw->ctrl_cnt) {
-		hw->ctrl_urb->pipe = hw->ctrl_out_pipe;
-		hw->ctrl_urb->setup_packet = (u_char *)&hw->ctrl_write;
-		hw->ctrl_urb->transfer_buffer = NULL;
-		hw->ctrl_urb->transfer_buffer_length = 0;
-		hw->ctrl_write.wIndex =
-			cpu_to_le16(hw->ctrl_buff[hw->ctrl_out_idx].hfcs_reg);
-		hw->ctrl_write.wValue =
-			cpu_to_le16(hw->ctrl_buff[hw->ctrl_out_idx].reg_val);
-
-		usb_submit_urb(hw->ctrl_urb, GFP_ATOMIC);
-	}
-}
-
-/*
- * queue a control transfer request to write HFC-S USB
- * chip register using CTRL resuest queue
- */
-static int write_reg(struct hfcsusb *hw, __u8 reg, __u8 val)
-{
-	struct ctrl_buf *buf;
-
-	if (debug & DBG_HFC_CALL_TRACE)
-		printk(KERN_DEBUG "%s: %s reg(0x%02x) val(0x%02x)\n",
-		       hw->name, __func__, reg, val);
-
-	spin_lock(&hw->ctrl_lock);
-	if (hw->ctrl_cnt >= HFC_CTRL_BUFSIZE) {
-		spin_unlock(&hw->ctrl_lock);
-		return 1;
-	}
-	buf = &hw->ctrl_buff[hw->ctrl_in_idx];
-	buf->hfcs_reg = reg;
-	buf->reg_val = val;
-	if (++hw->ctrl_in_idx >= HFC_CTRL_BUFSIZE)
-		hw->ctrl_in_idx = 0;
-	if (++hw->ctrl_cnt == 1)
-		ctrl_start_transfer(hw);
-	spin_unlock(&hw->ctrl_lock);
-
-	return 0;
-}
-
-/* control completion routine handling background control cmds */
-static void
-ctrl_complete(struct urb *urb)
-{
-	struct hfcsusb *hw = (struct hfcsusb *) urb->context;
-
-	if (debug & DBG_HFC_CALL_TRACE)
-		printk(KERN_DEBUG "%s: %s\n", hw->name, __func__);
-
-	urb->dev = hw->dev;
-	if (hw->ctrl_cnt) {
-		hw->ctrl_cnt--;	/* decrement actual count */
-		if (++hw->ctrl_out_idx >= HFC_CTRL_BUFSIZE)
-			hw->ctrl_out_idx = 0;	/* pointer wrap */
-
-		ctrl_start_transfer(hw); /* start next transfer */
-	}
-}
-
-/* handle LED bits   */
-static void
-set_led_bit(struct hfcsusb *hw, signed short led_bits, int set_on)
-{
-	if (set_on) {
-		if (led_bits < 0)
-			hw->led_state &= ~abs(led_bits);
-		else
-			hw->led_state |= led_bits;
-	} else {
-		if (led_bits < 0)
-			hw->led_state |= abs(led_bits);
-		else
-			hw->led_state &= ~led_bits;
-	}
-}
-
-/* handle LED requests  */
-static void
-handle_led(struct hfcsusb *hw, int event)
-{
-	struct hfcsusb_vdata *driver_info = (struct hfcsusb_vdata *)
-		hfcsusb_idtab[hw->vend_idx].driver_info;
-	__u8 tmpled;
-
-	if (driver_info->led_scheme == LED_OFF)
-		return;
-	tmpled = hw->led_state;
-
-	switch (event) {
-	case LED_POWER_ON:
-		set_led_bit(hw, driver_info->led_bits[0], 1);
-		set_led_bit(hw, driver_info->led_bits[1], 0);
-		set_led_bit(hw, driver_info->led_bits[2], 0);
-		set_led_bit(hw, driver_info->led_bits[3], 0);
-		break;
-	case LED_POWER_OFF:
-		set_led_bit(hw, driver_info->led_bits[0], 0);
-		set_led_bit(hw, driver_info->led_bits[1], 0);
-		set_led_bit(hw, driver_info->led_bits[2], 0);
-		set_led_bit(hw, driver_info->led_bits[3], 0);
-		break;
-	case LED_S0_ON:
-		set_led_bit(hw, driver_info->led_bits[1], 1);
-		break;
-	case LED_S0_OFF:
-		set_led_bit(hw, driver_info->led_bits[1], 0);
-		break;
-	case LED_B1_ON:
-		set_led_bit(hw, driver_info->led_bits[2], 1);
-		break;
-	case LED_B1_OFF:
-		set_led_bit(hw, driver_info->led_bits[2], 0);
-		break;
-	case LED_B2_ON:
-		set_led_bit(hw, driver_info->led_bits[3], 1);
-		break;
-	case LED_B2_OFF:
-		set_led_bit(hw, driver_info->led_bits[3], 0);
-		break;
-	}
-
-	if (hw->led_state != tmpled) {
-		if (debug & DBG_HFC_CALL_TRACE)
-			printk(KERN_DEBUG "%s: %s reg(0x%02x) val(x%02x)\n",
-			       hw->name, __func__,
-			       HFCUSB_P_DATA, hw->led_state);
-
-		write_reg(hw, HFCUSB_P_DATA, hw->led_state);
-	}
-}
-
-/*
- * Layer2 -> Layer 1 Bchannel data
- */
-static int
-hfcusb_l2l1B(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct bchannel		*bch = container_of(ch, struct bchannel, ch);
-	struct hfcsusb		*hw = bch->hw;
-	int			ret = -EINVAL;
-	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
-	u_long			flags;
-
-	if (debug & DBG_HFC_CALL_TRACE)
-		printk(KERN_DEBUG "%s: %s\n", hw->name, __func__);
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		spin_lock_irqsave(&hw->lock, flags);
-		ret = bchannel_senddata(bch, skb);
-		spin_unlock_irqrestore(&hw->lock, flags);
-		if (debug & DBG_HFC_CALL_TRACE)
-			printk(KERN_DEBUG "%s: %s PH_DATA_REQ ret(%i)\n",
-			       hw->name, __func__, ret);
-		if (ret > 0)
-			ret = 0;
-		return ret;
-	case PH_ACTIVATE_REQ:
-		if (!test_and_set_bit(FLG_ACTIVE, &bch->Flags)) {
-			hfcsusb_start_endpoint(hw, bch->nr - 1);
-			ret = hfcsusb_setup_bch(bch, ch->protocol);
-		} else
-			ret = 0;
-		if (!ret)
-			_queue_data(ch, PH_ACTIVATE_IND, MISDN_ID_ANY,
-				    0, NULL, GFP_KERNEL);
-		break;
-	case PH_DEACTIVATE_REQ:
-		deactivate_bchannel(bch);
-		_queue_data(ch, PH_DEACTIVATE_IND, MISDN_ID_ANY,
-			    0, NULL, GFP_KERNEL);
-		ret = 0;
-		break;
-	}
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-/*
- * send full D/B channel status information
- * as MPH_INFORMATION_IND
- */
-static int
-hfcsusb_ph_info(struct hfcsusb *hw)
-{
-	struct ph_info *phi;
-	struct dchannel *dch = &hw->dch;
-	int i;
-
-	phi = kzalloc_flex(*phi, bch, dch->dev.nrbchan, GFP_ATOMIC);
-	if (!phi)
-		return -ENOMEM;
-
-	phi->dch.ch.protocol = hw->protocol;
-	phi->dch.ch.Flags = dch->Flags;
-	phi->dch.state = dch->state;
-	phi->dch.num_bch = dch->dev.nrbchan;
-	for (i = 0; i < dch->dev.nrbchan; i++) {
-		phi->bch[i].protocol = hw->bch[i].ch.protocol;
-		phi->bch[i].Flags = hw->bch[i].Flags;
-	}
-	_queue_data(&dch->dev.D, MPH_INFORMATION_IND, MISDN_ID_ANY,
-		    struct_size(phi, bch, dch->dev.nrbchan), phi, GFP_ATOMIC);
-	kfree(phi);
-
-	return 0;
-}
-
-/*
- * Layer2 -> Layer 1 Dchannel data
- */
-static int
-hfcusb_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
-	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
-	struct hfcsusb		*hw = dch->hw;
-	int			ret = -EINVAL;
-	u_long			flags;
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		if (debug & DBG_HFC_CALL_TRACE)
-			printk(KERN_DEBUG "%s: %s: PH_DATA_REQ\n",
-			       hw->name, __func__);
-
-		spin_lock_irqsave(&hw->lock, flags);
-		ret = dchannel_senddata(dch, skb);
-		spin_unlock_irqrestore(&hw->lock, flags);
-		if (ret > 0) {
-			ret = 0;
-			queue_ch_frame(ch, PH_DATA_CNF, hh->id, NULL);
-		}
-		break;
-
-	case PH_ACTIVATE_REQ:
-		if (debug & DBG_HFC_CALL_TRACE)
-			printk(KERN_DEBUG "%s: %s: PH_ACTIVATE_REQ %s\n",
-			       hw->name, __func__,
-			       (hw->protocol == ISDN_P_NT_S0) ? "NT" : "TE");
-
-		if (hw->protocol == ISDN_P_NT_S0) {
-			ret = 0;
-			if (test_bit(FLG_ACTIVE, &dch->Flags)) {
-				_queue_data(&dch->dev.D,
-					    PH_ACTIVATE_IND, MISDN_ID_ANY, 0,
-					    NULL, GFP_ATOMIC);
-			} else {
-				hfcsusb_ph_command(hw,
-						   HFC_L1_ACTIVATE_NT);
-				test_and_set_bit(FLG_L2_ACTIVATED,
-						 &dch->Flags);
-			}
-		} else {
-			hfcsusb_ph_command(hw, HFC_L1_ACTIVATE_TE);
-			ret = l1_event(dch->l1, hh->prim);
-		}
-		break;
-
-	case PH_DEACTIVATE_REQ:
-		if (debug & DBG_HFC_CALL_TRACE)
-			printk(KERN_DEBUG "%s: %s: PH_DEACTIVATE_REQ\n",
-			       hw->name, __func__);
-		test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags);
-
-		if (hw->protocol == ISDN_P_NT_S0) {
-			struct sk_buff_head free_queue;
-
-			__skb_queue_head_init(&free_queue);
-			hfcsusb_ph_command(hw, HFC_L1_DEACTIVATE_NT);
-			spin_lock_irqsave(&hw->lock, flags);
-			skb_queue_splice_init(&dch->squeue, &free_queue);
-			if (dch->tx_skb) {
-				__skb_queue_tail(&free_queue, dch->tx_skb);
-				dch->tx_skb = NULL;
-			}
-			dch->tx_idx = 0;
-			if (dch->rx_skb) {
-				__skb_queue_tail(&free_queue, dch->rx_skb);
-				dch->rx_skb = NULL;
-			}
-			test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
-			spin_unlock_irqrestore(&hw->lock, flags);
-			__skb_queue_purge(&free_queue);
-#ifdef FIXME
-			if (test_and_clear_bit(FLG_L1_BUSY, &dch->Flags))
-				dchannel_sched_event(&hc->dch, D_CLEARBUSY);
-#endif
-			ret = 0;
-		} else
-			ret = l1_event(dch->l1, hh->prim);
-		break;
-	case MPH_INFORMATION_REQ:
-		ret = hfcsusb_ph_info(hw);
-		break;
-	}
-
-	return ret;
-}
-
-/*
- * Layer 1 callback function
- */
-static int
-hfc_l1callback(struct dchannel *dch, u_int cmd)
-{
-	struct hfcsusb *hw = dch->hw;
-
-	if (debug & DBG_HFC_CALL_TRACE)
-		printk(KERN_DEBUG "%s: %s cmd 0x%x\n",
-		       hw->name, __func__, cmd);
-
-	switch (cmd) {
-	case INFO3_P8:
-	case INFO3_P10:
-	case HW_RESET_REQ:
-	case HW_POWERUP_REQ:
-		break;
-
-	case HW_DEACT_REQ:
-		skb_queue_purge(&dch->squeue);
-		if (dch->tx_skb) {
-			dev_kfree_skb(dch->tx_skb);
-			dch->tx_skb = NULL;
-		}
-		dch->tx_idx = 0;
-		if (dch->rx_skb) {
-			dev_kfree_skb(dch->rx_skb);
-			dch->rx_skb = NULL;
-		}
-		test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
-		break;
-	case PH_ACTIVATE_IND:
-		test_and_set_bit(FLG_ACTIVE, &dch->Flags);
-		_queue_data(&dch->dev.D, cmd, MISDN_ID_ANY, 0, NULL,
-			    GFP_ATOMIC);
-		break;
-	case PH_DEACTIVATE_IND:
-		test_and_clear_bit(FLG_ACTIVE, &dch->Flags);
-		_queue_data(&dch->dev.D, cmd, MISDN_ID_ANY, 0, NULL,
-			    GFP_ATOMIC);
-		break;
-	default:
-		if (dch->debug & DEBUG_HW)
-			printk(KERN_DEBUG "%s: %s: unknown cmd %x\n",
-			       hw->name, __func__, cmd);
-		return -1;
-	}
-	return hfcsusb_ph_info(hw);
-}
-
-static int
-open_dchannel(struct hfcsusb *hw, struct mISDNchannel *ch,
-	      struct channel_req *rq)
-{
-	int err = 0;
-
-	if (debug & DEBUG_HW_OPEN)
-		printk(KERN_DEBUG "%s: %s: dev(%d) open addr(%i) from %p\n",
-		       hw->name, __func__, hw->dch.dev.id, rq->adr.channel,
-		       __builtin_return_address(0));
-	if (rq->protocol == ISDN_P_NONE)
-		return -EINVAL;
-
-	test_and_clear_bit(FLG_ACTIVE, &hw->dch.Flags);
-	test_and_clear_bit(FLG_ACTIVE, &hw->ech.Flags);
-	hfcsusb_start_endpoint(hw, HFC_CHAN_D);
-
-	/* E-Channel logging */
-	if (rq->adr.channel == 1) {
-		if (hw->fifos[HFCUSB_PCM_RX].pipe) {
-			hfcsusb_start_endpoint(hw, HFC_CHAN_E);
-			set_bit(FLG_ACTIVE, &hw->ech.Flags);
-			_queue_data(&hw->ech.dev.D, PH_ACTIVATE_IND,
-				    MISDN_ID_ANY, 0, NULL, GFP_ATOMIC);
-		} else
-			return -EINVAL;
-	}
-
-	if (!hw->initdone) {
-		hw->protocol = rq->protocol;
-		if (rq->protocol == ISDN_P_TE_S0) {
-			err = create_l1(&hw->dch, hfc_l1callback);
-			if (err)
-				return err;
-		}
-		setPortMode(hw);
-		ch->protocol = rq->protocol;
-		hw->initdone = 1;
-	} else {
-		if (rq->protocol != ch->protocol)
-			return -EPROTONOSUPPORT;
-	}
-
-	if (((ch->protocol == ISDN_P_NT_S0) && (hw->dch.state == 3)) ||
-	    ((ch->protocol == ISDN_P_TE_S0) && (hw->dch.state == 7)))
-		_queue_data(ch, PH_ACTIVATE_IND, MISDN_ID_ANY,
-			    0, NULL, GFP_KERNEL);
-	rq->ch = ch;
-	if (!try_module_get(THIS_MODULE))
-		printk(KERN_WARNING "%s: %s: cannot get module\n",
-		       hw->name, __func__);
-	return 0;
-}
-
-static int
-open_bchannel(struct hfcsusb *hw, struct channel_req *rq)
-{
-	struct bchannel		*bch;
-
-	if (rq->adr.channel == 0 || rq->adr.channel > 2)
-		return -EINVAL;
-	if (rq->protocol == ISDN_P_NONE)
-		return -EINVAL;
-
-	if (debug & DBG_HFC_CALL_TRACE)
-		printk(KERN_DEBUG "%s: %s B%i\n",
-		       hw->name, __func__, rq->adr.channel);
-
-	bch = &hw->bch[rq->adr.channel - 1];
-	if (test_and_set_bit(FLG_OPEN, &bch->Flags))
-		return -EBUSY; /* b-channel can be only open once */
-	bch->ch.protocol = rq->protocol;
-	rq->ch = &bch->ch;
-
-	if (!try_module_get(THIS_MODULE))
-		printk(KERN_WARNING "%s: %s:cannot get module\n",
-		       hw->name, __func__);
-	return 0;
-}
-
-static int
-channel_ctrl(struct hfcsusb *hw, struct mISDN_ctrl_req *cq)
-{
-	int ret = 0;
-
-	if (debug & DBG_HFC_CALL_TRACE)
-		printk(KERN_DEBUG "%s: %s op(0x%x) channel(0x%x)\n",
-		       hw->name, __func__, (cq->op), (cq->channel));
-
-	switch (cq->op) {
-	case MISDN_CTRL_GETOP:
-		cq->op = MISDN_CTRL_LOOP | MISDN_CTRL_CONNECT |
-			MISDN_CTRL_DISCONNECT;
-		break;
-	default:
-		printk(KERN_WARNING "%s: %s: unknown Op %x\n",
-		       hw->name, __func__, cq->op);
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-/*
- * device control function
- */
-static int
-hfc_dctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
-{
-	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
-	struct hfcsusb		*hw = dch->hw;
-	struct channel_req	*rq;
-	int			err = 0;
-
-	if (dch->debug & DEBUG_HW)
-		printk(KERN_DEBUG "%s: %s: cmd:%x %p\n",
-		       hw->name, __func__, cmd, arg);
-	switch (cmd) {
-	case OPEN_CHANNEL:
-		rq = arg;
-		if ((rq->protocol == ISDN_P_TE_S0) ||
-		    (rq->protocol == ISDN_P_NT_S0))
-			err = open_dchannel(hw, ch, rq);
-		else
-			err = open_bchannel(hw, rq);
-		if (!err)
-			hw->open++;
-		break;
-	case CLOSE_CHANNEL:
-		hw->open--;
-		if (debug & DEBUG_HW_OPEN)
-			printk(KERN_DEBUG
-			       "%s: %s: dev(%d) close from %p (open %d)\n",
-			       hw->name, __func__, hw->dch.dev.id,
-			       __builtin_return_address(0), hw->open);
-		if (!hw->open) {
-			hfcsusb_stop_endpoint(hw, HFC_CHAN_D);
-			if (hw->fifos[HFCUSB_PCM_RX].pipe)
-				hfcsusb_stop_endpoint(hw, HFC_CHAN_E);
-			handle_led(hw, LED_POWER_ON);
-		}
-		module_put(THIS_MODULE);
-		break;
-	case CONTROL_CHANNEL:
-		err = channel_ctrl(hw, arg);
-		break;
-	default:
-		if (dch->debug & DEBUG_HW)
-			printk(KERN_DEBUG "%s: %s: unknown command %x\n",
-			       hw->name, __func__, cmd);
-		return -EINVAL;
-	}
-	return err;
-}
-
-/*
- * S0 TE state change event handler
- */
-static void
-ph_state_te(struct dchannel *dch)
-{
-	struct hfcsusb *hw = dch->hw;
-
-	if (debug & DEBUG_HW) {
-		if (dch->state <= HFC_MAX_TE_LAYER1_STATE)
-			printk(KERN_DEBUG "%s: %s: %s\n", hw->name, __func__,
-			       HFC_TE_LAYER1_STATES[dch->state]);
-		else
-			printk(KERN_DEBUG "%s: %s: TE F%d\n",
-			       hw->name, __func__, dch->state);
-	}
-
-	switch (dch->state) {
-	case 0:
-		l1_event(dch->l1, HW_RESET_IND);
-		break;
-	case 3:
-		l1_event(dch->l1, HW_DEACT_IND);
-		break;
-	case 5:
-	case 8:
-		l1_event(dch->l1, ANYSIGNAL);
-		break;
-	case 6:
-		l1_event(dch->l1, INFO2);
-		break;
-	case 7:
-		l1_event(dch->l1, INFO4_P8);
-		break;
-	}
-	if (dch->state == 7)
-		handle_led(hw, LED_S0_ON);
-	else
-		handle_led(hw, LED_S0_OFF);
-}
-
-/*
- * S0 NT state change event handler
- */
-static void
-ph_state_nt(struct dchannel *dch)
-{
-	struct hfcsusb *hw = dch->hw;
-
-	if (debug & DEBUG_HW) {
-		if (dch->state <= HFC_MAX_NT_LAYER1_STATE)
-			printk(KERN_DEBUG "%s: %s: %s\n",
-			       hw->name, __func__,
-			       HFC_NT_LAYER1_STATES[dch->state]);
-
-		else
-			printk(KERN_INFO DRIVER_NAME "%s: %s: NT G%d\n",
-			       hw->name, __func__, dch->state);
-	}
-
-	switch (dch->state) {
-	case (1):
-		test_and_clear_bit(FLG_ACTIVE, &dch->Flags);
-		test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags);
-		hw->nt_timer = 0;
-		hw->timers &= ~NT_ACTIVATION_TIMER;
-		handle_led(hw, LED_S0_OFF);
-		break;
-
-	case (2):
-		if (hw->nt_timer < 0) {
-			hw->nt_timer = 0;
-			hw->timers &= ~NT_ACTIVATION_TIMER;
-			hfcsusb_ph_command(dch->hw, HFC_L1_DEACTIVATE_NT);
-		} else {
-			hw->timers |= NT_ACTIVATION_TIMER;
-			hw->nt_timer = NT_T1_COUNT;
-			/* allow G2 -> G3 transition */
-			write_reg(hw, HFCUSB_STATES, 2 | HFCUSB_NT_G2_G3);
-		}
-		break;
-	case (3):
-		hw->nt_timer = 0;
-		hw->timers &= ~NT_ACTIVATION_TIMER;
-		test_and_set_bit(FLG_ACTIVE, &dch->Flags);
-		_queue_data(&dch->dev.D, PH_ACTIVATE_IND,
-			    MISDN_ID_ANY, 0, NULL, GFP_ATOMIC);
-		handle_led(hw, LED_S0_ON);
-		break;
-	case (4):
-		hw->nt_timer = 0;
-		hw->timers &= ~NT_ACTIVATION_TIMER;
-		break;
-	default:
-		break;
-	}
-	hfcsusb_ph_info(hw);
-}
-
-static void
-ph_state(struct dchannel *dch)
-{
-	struct hfcsusb *hw = dch->hw;
-
-	if (hw->protocol == ISDN_P_NT_S0)
-		ph_state_nt(dch);
-	else if (hw->protocol == ISDN_P_TE_S0)
-		ph_state_te(dch);
-}
-
-/*
- * disable/enable BChannel for desired protocol
- */
-static int
-hfcsusb_setup_bch(struct bchannel *bch, int protocol)
-{
-	struct hfcsusb *hw = bch->hw;
-	__u8 conhdlc, sctrl, sctrl_r;
-
-	if (debug & DEBUG_HW)
-		printk(KERN_DEBUG "%s: %s: protocol %x-->%x B%d\n",
-		       hw->name, __func__, bch->state, protocol,
-		       bch->nr);
-
-	/* setup val for CON_HDLC */
-	conhdlc = 0;
-	if (protocol > ISDN_P_NONE)
-		conhdlc = 8;	/* enable FIFO */
-
-	switch (protocol) {
-	case (-1):	/* used for init */
-		bch->state = -1;
-		fallthrough;
-	case (ISDN_P_NONE):
-		if (bch->state == ISDN_P_NONE)
-			return 0; /* already in idle state */
-		bch->state = ISDN_P_NONE;
-		clear_bit(FLG_HDLC, &bch->Flags);
-		clear_bit(FLG_TRANSPARENT, &bch->Flags);
-		break;
-	case (ISDN_P_B_RAW):
-		conhdlc |= 2;
-		bch->state = protocol;
-		set_bit(FLG_TRANSPARENT, &bch->Flags);
-		break;
-	case (ISDN_P_B_HDLC):
-		bch->state = protocol;
-		set_bit(FLG_HDLC, &bch->Flags);
-		break;
-	default:
-		if (debug & DEBUG_HW)
-			printk(KERN_DEBUG "%s: %s: prot not known %x\n",
-			       hw->name, __func__, protocol);
-		return -ENOPROTOOPT;
-	}
-
-	if (protocol >= ISDN_P_NONE) {
-		write_reg(hw, HFCUSB_FIFO, (bch->nr == 1) ? 0 : 2);
-		write_reg(hw, HFCUSB_CON_HDLC, conhdlc);
-		write_reg(hw, HFCUSB_INC_RES_F, 2);
-		write_reg(hw, HFCUSB_FIFO, (bch->nr == 1) ? 1 : 3);
-		write_reg(hw, HFCUSB_CON_HDLC, conhdlc);
-		write_reg(hw, HFCUSB_INC_RES_F, 2);
-
-		sctrl = 0x40 + ((hw->protocol == ISDN_P_TE_S0) ? 0x00 : 0x04);
-		sctrl_r = 0x0;
-		if (test_bit(FLG_ACTIVE, &hw->bch[0].Flags)) {
-			sctrl |= 1;
-			sctrl_r |= 1;
-		}
-		if (test_bit(FLG_ACTIVE, &hw->bch[1].Flags)) {
-			sctrl |= 2;
-			sctrl_r |= 2;
-		}
-		write_reg(hw, HFCUSB_SCTRL, sctrl);
-		write_reg(hw, HFCUSB_SCTRL_R, sctrl_r);
-
-		if (protocol > ISDN_P_NONE)
-			handle_led(hw, (bch->nr == 1) ? LED_B1_ON : LED_B2_ON);
-		else
-			handle_led(hw, (bch->nr == 1) ? LED_B1_OFF :
-				   LED_B2_OFF);
-	}
-	return hfcsusb_ph_info(hw);
-}
-
-static void
-hfcsusb_ph_command(struct hfcsusb *hw, u_char command)
-{
-	if (debug & DEBUG_HW)
-		printk(KERN_DEBUG "%s: %s: %x\n",
-		       hw->name, __func__, command);
-
-	switch (command) {
-	case HFC_L1_ACTIVATE_TE:
-		/* force sending sending INFO1 */
-		write_reg(hw, HFCUSB_STATES, 0x14);
-		/* start l1 activation */
-		write_reg(hw, HFCUSB_STATES, 0x04);
-		break;
-
-	case HFC_L1_FORCE_DEACTIVATE_TE:
-		write_reg(hw, HFCUSB_STATES, 0x10);
-		write_reg(hw, HFCUSB_STATES, 0x03);
-		break;
-
-	case HFC_L1_ACTIVATE_NT:
-		if (hw->dch.state == 3)
-			_queue_data(&hw->dch.dev.D, PH_ACTIVATE_IND,
-				    MISDN_ID_ANY, 0, NULL, GFP_ATOMIC);
-		else
-			write_reg(hw, HFCUSB_STATES, HFCUSB_ACTIVATE |
-				  HFCUSB_DO_ACTION | HFCUSB_NT_G2_G3);
-		break;
-
-	case HFC_L1_DEACTIVATE_NT:
-		write_reg(hw, HFCUSB_STATES,
-			  HFCUSB_DO_ACTION);
-		break;
-	}
-}
-
-/*
- * Layer 1 B-channel hardware access
- */
-static int
-channel_bctrl(struct bchannel *bch, struct mISDN_ctrl_req *cq)
-{
-	return mISDN_ctrl_bchannel(bch, cq);
-}
-
-/* collect data from incoming interrupt or isochron USB data */
-static void
-hfcsusb_rx_frame(struct usb_fifo *fifo, __u8 *data, unsigned int len,
-		 int finish)
-{
-	struct hfcsusb	*hw = fifo->hw;
-	struct sk_buff	*rx_skb = NULL;
-	int		maxlen = 0;
-	int		fifon = fifo->fifonum;
-	int		i;
-	int		hdlc = 0;
-	unsigned long	flags;
-
-	if (debug & DBG_HFC_CALL_TRACE)
-		printk(KERN_DEBUG "%s: %s: fifo(%i) len(%i) "
-		       "dch(%p) bch(%p) ech(%p)\n",
-		       hw->name, __func__, fifon, len,
-		       fifo->dch, fifo->bch, fifo->ech);
-
-	if (!len)
-		return;
-
-	if ((!!fifo->dch + !!fifo->bch + !!fifo->ech) != 1) {
-		printk(KERN_DEBUG "%s: %s: undefined channel\n",
-		       hw->name, __func__);
-		return;
-	}
-
-	spin_lock_irqsave(&hw->lock, flags);
-	if (fifo->dch) {
-		rx_skb = fifo->dch->rx_skb;
-		maxlen = fifo->dch->maxlen;
-		hdlc = 1;
-	}
-	if (fifo->bch) {
-		if (test_bit(FLG_RX_OFF, &fifo->bch->Flags)) {
-			fifo->bch->dropcnt += len;
-			spin_unlock_irqrestore(&hw->lock, flags);
-			return;
-		}
-		maxlen = bchannel_get_rxbuf(fifo->bch, len);
-		rx_skb = fifo->bch->rx_skb;
-		if (maxlen < 0) {
-			if (rx_skb)
-				skb_trim(rx_skb, 0);
-			pr_warn("%s.B%d: No bufferspace for %d bytes\n",
-				hw->name, fifo->bch->nr, len);
-			spin_unlock_irqrestore(&hw->lock, flags);
-			return;
-		}
-		maxlen = fifo->bch->maxlen;
-		hdlc = test_bit(FLG_HDLC, &fifo->bch->Flags);
-	}
-	if (fifo->ech) {
-		rx_skb = fifo->ech->rx_skb;
-		maxlen = fifo->ech->maxlen;
-		hdlc = 1;
-	}
-
-	if (fifo->dch || fifo->ech) {
-		if (!rx_skb) {
-			rx_skb = mI_alloc_skb(maxlen, GFP_ATOMIC);
-			if (rx_skb) {
-				if (fifo->dch)
-					fifo->dch->rx_skb = rx_skb;
-				if (fifo->ech)
-					fifo->ech->rx_skb = rx_skb;
-				skb_trim(rx_skb, 0);
-			} else {
-				printk(KERN_DEBUG "%s: %s: No mem for rx_skb\n",
-				       hw->name, __func__);
-				spin_unlock_irqrestore(&hw->lock, flags);
-				return;
-			}
-		}
-		/* D/E-Channel SKB range check */
-		if ((rx_skb->len + len) >= MAX_DFRAME_LEN_L1) {
-			printk(KERN_DEBUG "%s: %s: sbk mem exceeded "
-			       "for fifo(%d) HFCUSB_D_RX\n",
-			       hw->name, __func__, fifon);
-			skb_trim(rx_skb, 0);
-			spin_unlock_irqrestore(&hw->lock, flags);
-			return;
-		}
-	}
-
-	skb_put_data(rx_skb, data, len);
-
-	if (hdlc) {
-		/* we have a complete hdlc packet */
-		if (finish) {
-			if ((rx_skb->len > 3) &&
-			    (!(rx_skb->data[rx_skb->len - 1]))) {
-				if (debug & DBG_HFC_FIFO_VERBOSE) {
-					printk(KERN_DEBUG "%s: %s: fifon(%i)"
-					       " new RX len(%i): ",
-					       hw->name, __func__, fifon,
-					       rx_skb->len);
-					i = 0;
-					while (i < rx_skb->len)
-						printk("%02x ",
-						       rx_skb->data[i++]);
-					printk("\n");
-				}
-
-				/* remove CRC & status */
-				skb_trim(rx_skb, rx_skb->len - 3);
-
-				if (fifo->dch)
-					recv_Dchannel(fifo->dch);
-				if (fifo->bch)
-					recv_Bchannel(fifo->bch, MISDN_ID_ANY,
-						      0);
-				if (fifo->ech)
-					recv_Echannel(fifo->ech,
-						      &hw->dch);
-			} else {
-				if (debug & DBG_HFC_FIFO_VERBOSE) {
-					printk(KERN_DEBUG
-					       "%s: CRC or minlen ERROR fifon(%i) "
-					       "RX len(%i): ",
-					       hw->name, fifon, rx_skb->len);
-					i = 0;
-					while (i < rx_skb->len)
-						printk("%02x ",
-						       rx_skb->data[i++]);
-					printk("\n");
-				}
-				skb_trim(rx_skb, 0);
-			}
-		}
-	} else {
-		/* deliver transparent data to layer2 */
-		recv_Bchannel(fifo->bch, MISDN_ID_ANY, false);
-	}
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void
-fill_isoc_urb(struct urb *urb, struct usb_device *dev, unsigned int pipe,
-	      void *buf, int num_packets, int packet_size, int interval,
-	      usb_complete_t complete, void *context)
-{
-	int k;
-
-	usb_fill_bulk_urb(urb, dev, pipe, buf, packet_size * num_packets,
-			  complete, context);
-
-	urb->number_of_packets = num_packets;
-	urb->transfer_flags = URB_ISO_ASAP;
-	urb->actual_length = 0;
-	urb->interval = interval;
-
-	for (k = 0; k < num_packets; k++) {
-		urb->iso_frame_desc[k].offset = packet_size * k;
-		urb->iso_frame_desc[k].length = packet_size;
-		urb->iso_frame_desc[k].actual_length = 0;
-	}
-}
-
-/* receive completion routine for all ISO tx fifos   */
-static void
-rx_iso_complete(struct urb *urb)
-{
-	struct iso_urb *context_iso_urb = (struct iso_urb *) urb->context;
-	struct usb_fifo *fifo = context_iso_urb->owner_fifo;
-	struct hfcsusb *hw = fifo->hw;
-	int k, len, errcode, offset, num_isoc_packets, fifon, maxlen,
-		status, iso_status, i;
-	__u8 *buf;
-	static __u8 eof[8];
-	__u8 s0_state;
-	unsigned long flags;
-
-	fifon = fifo->fifonum;
-	status = urb->status;
-
-	spin_lock_irqsave(&hw->lock, flags);
-	if (fifo->stop_gracefull) {
-		fifo->stop_gracefull = 0;
-		fifo->active = 0;
-		spin_unlock_irqrestore(&hw->lock, flags);
-		return;
-	}
-	spin_unlock_irqrestore(&hw->lock, flags);
-
-	/*
-	 * ISO transfer only partially completed,
-	 * look at individual frame status for details
-	 */
-	if (status == -EXDEV) {
-		if (debug & DEBUG_HW)
-			printk(KERN_DEBUG "%s: %s: with -EXDEV "
-			       "urb->status %d, fifonum %d\n",
-			       hw->name, __func__,  status, fifon);
-
-		/* clear status, so go on with ISO transfers */
-		status = 0;
-	}
-
-	s0_state = 0;
-	if (fifo->active && !status) {
-		num_isoc_packets = iso_packets[fifon];
-		maxlen = fifo->usb_packet_maxlen;
-
-		for (k = 0; k < num_isoc_packets; ++k) {
-			len = urb->iso_frame_desc[k].actual_length;
-			offset = urb->iso_frame_desc[k].offset;
-			buf = context_iso_urb->buffer + offset;
-			iso_status = urb->iso_frame_desc[k].status;
-
-			if (iso_status && (debug & DBG_HFC_FIFO_VERBOSE)) {
-				printk(KERN_DEBUG "%s: %s: "
-				       "ISO packet %i, status: %i\n",
-				       hw->name, __func__, k, iso_status);
-			}
-
-			/* USB data log for every D ISO in */
-			if ((fifon == HFCUSB_D_RX) &&
-			    (debug & DBG_HFC_USB_VERBOSE)) {
-				printk(KERN_DEBUG
-				       "%s: %s: %d (%d/%d) len(%d) ",
-				       hw->name, __func__, urb->start_frame,
-				       k, num_isoc_packets - 1,
-				       len);
-				for (i = 0; i < len; i++)
-					printk("%x ", buf[i]);
-				printk("\n");
-			}
-
-			if (!iso_status) {
-				if (fifo->last_urblen != maxlen) {
-					/*
-					 * save fifo fill-level threshold bits
-					 * to use them later in TX ISO URB
-					 * completions
-					 */
-					hw->threshold_mask = buf[1];
-
-					if (fifon == HFCUSB_D_RX)
-						s0_state = (buf[0] >> 4);
-
-					eof[fifon] = buf[0] & 1;
-					if (len > 2)
-						hfcsusb_rx_frame(fifo, buf + 2,
-								 len - 2, (len < maxlen)
-								 ? eof[fifon] : 0);
-				} else
-					hfcsusb_rx_frame(fifo, buf, len,
-							 (len < maxlen) ?
-							 eof[fifon] : 0);
-				fifo->last_urblen = len;
-			}
-		}
-
-		/* signal S0 layer1 state change */
-		if ((s0_state) && (hw->initdone) &&
-		    (s0_state != hw->dch.state)) {
-			hw->dch.state = s0_state;
-			schedule_event(&hw->dch, FLG_PHCHANGE);
-		}
-
-		fill_isoc_urb(urb, fifo->hw->dev, fifo->pipe,
-			      context_iso_urb->buffer, num_isoc_packets,
-			      fifo->usb_packet_maxlen, fifo->intervall,
-			      (usb_complete_t)rx_iso_complete, urb->context);
-		errcode = usb_submit_urb(urb, GFP_ATOMIC);
-		if (errcode < 0) {
-			if (debug & DEBUG_HW)
-				printk(KERN_DEBUG "%s: %s: error submitting "
-				       "ISO URB: %d\n",
-				       hw->name, __func__, errcode);
-		}
-	} else {
-		if (status && (debug & DBG_HFC_URB_INFO))
-			printk(KERN_DEBUG "%s: %s: rx_iso_complete : "
-			       "urb->status %d, fifonum %d\n",
-			       hw->name, __func__, status, fifon);
-	}
-}
-
-/* receive completion routine for all interrupt rx fifos */
-static void
-rx_int_complete(struct urb *urb)
-{
-	int len, status, i;
-	__u8 *buf, maxlen, fifon;
-	struct usb_fifo *fifo = (struct usb_fifo *) urb->context;
-	struct hfcsusb *hw = fifo->hw;
-	static __u8 eof[8];
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-	if (fifo->stop_gracefull) {
-		fifo->stop_gracefull = 0;
-		fifo->active = 0;
-		spin_unlock_irqrestore(&hw->lock, flags);
-		return;
-	}
-	spin_unlock_irqrestore(&hw->lock, flags);
-
-	fifon = fifo->fifonum;
-	if ((!fifo->active) || (urb->status)) {
-		if (debug & DBG_HFC_URB_ERROR)
-			printk(KERN_DEBUG
-			       "%s: %s: RX-Fifo %i is going down (%i)\n",
-			       hw->name, __func__, fifon, urb->status);
-
-		fifo->urb->interval = 0; /* cancel automatic rescheduling */
-		return;
-	}
-	len = urb->actual_length;
-	buf = fifo->buffer;
-	maxlen = fifo->usb_packet_maxlen;
-
-	/* USB data log for every D INT in */
-	if ((fifon == HFCUSB_D_RX) && (debug & DBG_HFC_USB_VERBOSE)) {
-		printk(KERN_DEBUG "%s: %s: D RX INT len(%d) ",
-		       hw->name, __func__, len);
-		for (i = 0; i < len; i++)
-			printk("%02x ", buf[i]);
-		printk("\n");
-	}
-
-	if (fifo->last_urblen != fifo->usb_packet_maxlen) {
-		/* the threshold mask is in the 2nd status byte */
-		hw->threshold_mask = buf[1];
-
-		/* signal S0 layer1 state change */
-		if (hw->initdone && ((buf[0] >> 4) != hw->dch.state)) {
-			hw->dch.state = (buf[0] >> 4);
-			schedule_event(&hw->dch, FLG_PHCHANGE);
-		}
-
-		eof[fifon] = buf[0] & 1;
-		/* if we have more than the 2 status bytes -> collect data */
-		if (len > 2)
-			hfcsusb_rx_frame(fifo, buf + 2,
-					 urb->actual_length - 2,
-					 (len < maxlen) ? eof[fifon] : 0);
-	} else {
-		hfcsusb_rx_frame(fifo, buf, urb->actual_length,
-				 (len < maxlen) ? eof[fifon] : 0);
-	}
-	fifo->last_urblen = urb->actual_length;
-
-	status = usb_submit_urb(urb, GFP_ATOMIC);
-	if (status) {
-		if (debug & DEBUG_HW)
-			printk(KERN_DEBUG "%s: %s: error resubmitting USB\n",
-			       hw->name, __func__);
-	}
-}
-
-/* transmit completion routine for all ISO tx fifos */
-static void
-tx_iso_complete(struct urb *urb)
-{
-	struct iso_urb *context_iso_urb = (struct iso_urb *) urb->context;
-	struct usb_fifo *fifo = context_iso_urb->owner_fifo;
-	struct hfcsusb *hw = fifo->hw;
-	struct sk_buff *tx_skb;
-	int k, tx_offset, num_isoc_packets, sink, remain, current_len,
-		errcode, hdlc, i;
-	int *tx_idx;
-	int frame_complete, fifon, status, fillempty = 0;
-	__u8 threshbit, *p;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-	if (fifo->stop_gracefull) {
-		fifo->stop_gracefull = 0;
-		fifo->active = 0;
-		spin_unlock_irqrestore(&hw->lock, flags);
-		return;
-	}
-
-	if (fifo->dch) {
-		tx_skb = fifo->dch->tx_skb;
-		tx_idx = &fifo->dch->tx_idx;
-		hdlc = 1;
-	} else if (fifo->bch) {
-		tx_skb = fifo->bch->tx_skb;
-		tx_idx = &fifo->bch->tx_idx;
-		hdlc = test_bit(FLG_HDLC, &fifo->bch->Flags);
-		if (!tx_skb && !hdlc &&
-		    test_bit(FLG_FILLEMPTY, &fifo->bch->Flags))
-			fillempty = 1;
-	} else {
-		printk(KERN_DEBUG "%s: %s: neither BCH nor DCH\n",
-		       hw->name, __func__);
-		spin_unlock_irqrestore(&hw->lock, flags);
-		return;
-	}
-
-	fifon = fifo->fifonum;
-	status = urb->status;
-
-	tx_offset = 0;
-
-	/*
-	 * ISO transfer only partially completed,
-	 * look at individual frame status for details
-	 */
-	if (status == -EXDEV) {
-		if (debug & DBG_HFC_URB_ERROR)
-			printk(KERN_DEBUG "%s: %s: "
-			       "-EXDEV (%i) fifon (%d)\n",
-			       hw->name, __func__, status, fifon);
-
-		/* clear status, so go on with ISO transfers */
-		status = 0;
-	}
-
-	if (fifo->active && !status) {
-		/* is FifoFull-threshold set for our channel? */
-		threshbit = (hw->threshold_mask & (1 << fifon));
-		num_isoc_packets = iso_packets[fifon];
-
-		/* predict dataflow to avoid fifo overflow */
-		if (fifon >= HFCUSB_D_TX)
-			sink = (threshbit) ? SINK_DMIN : SINK_DMAX;
-		else
-			sink = (threshbit) ? SINK_MIN : SINK_MAX;
-		fill_isoc_urb(urb, fifo->hw->dev, fifo->pipe,
-			      context_iso_urb->buffer, num_isoc_packets,
-			      fifo->usb_packet_maxlen, fifo->intervall,
-			      (usb_complete_t)tx_iso_complete, urb->context);
-		memset(context_iso_urb->buffer, 0,
-		       sizeof(context_iso_urb->buffer));
-		frame_complete = 0;
-
-		for (k = 0; k < num_isoc_packets; ++k) {
-			/* analyze tx success of previous ISO packets */
-			if (debug & DBG_HFC_URB_ERROR) {
-				errcode = urb->iso_frame_desc[k].status;
-				if (errcode) {
-					printk(KERN_DEBUG "%s: %s: "
-					       "ISO packet %i, status: %i\n",
-					       hw->name, __func__, k, errcode);
-				}
-			}
-
-			/* Generate next ISO Packets */
-			if (tx_skb)
-				remain = tx_skb->len - *tx_idx;
-			else if (fillempty)
-				remain = 15; /* > not complete */
-			else
-				remain = 0;
-
-			if (remain > 0) {
-				fifo->bit_line -= sink;
-				current_len = (0 - fifo->bit_line) / 8;
-				if (current_len > 14)
-					current_len = 14;
-				if (current_len < 0)
-					current_len = 0;
-				if (remain < current_len)
-					current_len = remain;
-
-				/* how much bit do we put on the line? */
-				fifo->bit_line += current_len * 8;
-
-				context_iso_urb->buffer[tx_offset] = 0;
-				if (current_len == remain) {
-					if (hdlc) {
-						/* signal frame completion */
-						context_iso_urb->
-							buffer[tx_offset] = 1;
-						/* add 2 byte flags and 16bit
-						 * CRC at end of ISDN frame */
-						fifo->bit_line += 32;
-					}
-					frame_complete = 1;
-				}
-
-				/* copy tx data to iso-urb buffer */
-				p = context_iso_urb->buffer + tx_offset + 1;
-				if (fillempty) {
-					memset(p, fifo->bch->fill[0],
-					       current_len);
-				} else {
-					memcpy(p, (tx_skb->data + *tx_idx),
-					       current_len);
-					*tx_idx += current_len;
-				}
-				urb->iso_frame_desc[k].offset = tx_offset;
-				urb->iso_frame_desc[k].length = current_len + 1;
-
-				/* USB data log for every D ISO out */
-				if ((fifon == HFCUSB_D_RX) && !fillempty &&
-				    (debug & DBG_HFC_USB_VERBOSE)) {
-					printk(KERN_DEBUG
-					       "%s: %s (%d/%d) offs(%d) len(%d) ",
-					       hw->name, __func__,
-					       k, num_isoc_packets - 1,
-					       urb->iso_frame_desc[k].offset,
-					       urb->iso_frame_desc[k].length);
-
-					for (i = urb->iso_frame_desc[k].offset;
-					     i < (urb->iso_frame_desc[k].offset
-						  + urb->iso_frame_desc[k].length);
-					     i++)
-						printk("%x ",
-						       context_iso_urb->buffer[i]);
-
-					printk(" skb->len(%i) tx-idx(%d)\n",
-					       tx_skb->len, *tx_idx);
-				}
-
-				tx_offset += (current_len + 1);
-			} else {
-				urb->iso_frame_desc[k].offset = tx_offset++;
-				urb->iso_frame_desc[k].length = 1;
-				/* we lower data margin every msec */
-				fifo->bit_line -= sink;
-				if (fifo->bit_line < BITLINE_INF)
-					fifo->bit_line = BITLINE_INF;
-			}
-
-			if (frame_complete) {
-				frame_complete = 0;
-
-				if (debug & DBG_HFC_FIFO_VERBOSE) {
-					printk(KERN_DEBUG  "%s: %s: "
-					       "fifon(%i) new TX len(%i): ",
-					       hw->name, __func__,
-					       fifon, tx_skb->len);
-					i = 0;
-					while (i < tx_skb->len)
-						printk("%02x ",
-						       tx_skb->data[i++]);
-					printk("\n");
-				}
-
-				dev_consume_skb_irq(tx_skb);
-				tx_skb = NULL;
-				if (fifo->dch && get_next_dframe(fifo->dch))
-					tx_skb = fifo->dch->tx_skb;
-				else if (fifo->bch &&
-					 get_next_bframe(fifo->bch))
-					tx_skb = fifo->bch->tx_skb;
-			}
-		}
-		errcode = usb_submit_urb(urb, GFP_ATOMIC);
-		if (errcode < 0) {
-			if (debug & DEBUG_HW)
-				printk(KERN_DEBUG
-				       "%s: %s: error submitting ISO URB: %d \n",
-				       hw->name, __func__, errcode);
-		}
-
-		/*
-		 * abuse DChannel tx iso completion to trigger NT mode state
-		 * changes tx_iso_complete is assumed to be called every
-		 * fifo->intervall (ms)
-		 */
-		if ((fifon == HFCUSB_D_TX) && (hw->protocol == ISDN_P_NT_S0)
-		    && (hw->timers & NT_ACTIVATION_TIMER)) {
-			if ((--hw->nt_timer) < 0)
-				schedule_event(&hw->dch, FLG_PHCHANGE);
-		}
-
-	} else {
-		if (status && (debug & DBG_HFC_URB_ERROR))
-			printk(KERN_DEBUG  "%s: %s: urb->status %s (%i)"
-			       "fifonum=%d\n",
-			       hw->name, __func__,
-			       symbolic(urb_errlist, status), status, fifon);
-	}
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-/*
- * allocs urbs and start isoc transfer with two pending urbs to avoid
- * gaps in the transfer chain
- */
-static int
-start_isoc_chain(struct usb_fifo *fifo, int num_packets_per_urb,
-		 usb_complete_t complete, int packet_size)
-{
-	struct hfcsusb *hw = fifo->hw;
-	int i, k, errcode;
-
-	if (debug)
-		printk(KERN_DEBUG "%s: %s: fifo %i\n",
-		       hw->name, __func__, fifo->fifonum);
-
-	/* allocate Memory for Iso out Urbs */
-	for (i = 0; i < 2; i++) {
-		if (!(fifo->iso[i].urb)) {
-			fifo->iso[i].urb =
-				usb_alloc_urb(num_packets_per_urb, GFP_KERNEL);
-			if (!(fifo->iso[i].urb)) {
-				printk(KERN_DEBUG
-				       "%s: %s: alloc urb for fifo %i failed",
-				       hw->name, __func__, fifo->fifonum);
-				continue;
-			}
-			fifo->iso[i].owner_fifo = (struct usb_fifo *) fifo;
-			fifo->iso[i].indx = i;
-
-			/* Init the first iso */
-			if (ISO_BUFFER_SIZE >=
-			    (fifo->usb_packet_maxlen *
-			     num_packets_per_urb)) {
-				fill_isoc_urb(fifo->iso[i].urb,
-					      fifo->hw->dev, fifo->pipe,
-					      fifo->iso[i].buffer,
-					      num_packets_per_urb,
-					      fifo->usb_packet_maxlen,
-					      fifo->intervall, complete,
-					      &fifo->iso[i]);
-				memset(fifo->iso[i].buffer, 0,
-				       sizeof(fifo->iso[i].buffer));
-
-				for (k = 0; k < num_packets_per_urb; k++) {
-					fifo->iso[i].urb->
-						iso_frame_desc[k].offset =
-						k * packet_size;
-					fifo->iso[i].urb->
-						iso_frame_desc[k].length =
-						packet_size;
-				}
-			} else {
-				printk(KERN_DEBUG
-				       "%s: %s: ISO Buffer size to small!\n",
-				       hw->name, __func__);
-			}
-		}
-		fifo->bit_line = BITLINE_INF;
-
-		errcode = usb_submit_urb(fifo->iso[i].urb, GFP_KERNEL);
-		fifo->active = (errcode >= 0) ? 1 : 0;
-		fifo->stop_gracefull = 0;
-		if (errcode < 0) {
-			printk(KERN_DEBUG "%s: %s: %s URB nr:%d\n",
-			       hw->name, __func__,
-			       symbolic(urb_errlist, errcode), i);
-		}
-	}
-	return fifo->active;
-}
-
-static void
-stop_iso_gracefull(struct usb_fifo *fifo)
-{
-	struct hfcsusb *hw = fifo->hw;
-	int i, timeout;
-	u_long flags;
-
-	for (i = 0; i < 2; i++) {
-		spin_lock_irqsave(&hw->lock, flags);
-		if (debug)
-			printk(KERN_DEBUG "%s: %s for fifo %i.%i\n",
-			       hw->name, __func__, fifo->fifonum, i);
-		fifo->stop_gracefull = 1;
-		spin_unlock_irqrestore(&hw->lock, flags);
-	}
-
-	for (i = 0; i < 2; i++) {
-		timeout = 3;
-		while (fifo->stop_gracefull && timeout--)
-			schedule_timeout_interruptible((HZ / 1000) * 16);
-		if (debug && fifo->stop_gracefull)
-			printk(KERN_DEBUG "%s: ERROR %s for fifo %i.%i\n",
-			       hw->name, __func__, fifo->fifonum, i);
-	}
-}
-
-static void
-stop_int_gracefull(struct usb_fifo *fifo)
-{
-	struct hfcsusb *hw = fifo->hw;
-	int timeout;
-	u_long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-	if (debug)
-		printk(KERN_DEBUG "%s: %s for fifo %i\n",
-		       hw->name, __func__, fifo->fifonum);
-	fifo->stop_gracefull = 1;
-	spin_unlock_irqrestore(&hw->lock, flags);
-
-	timeout = 3;
-	while (fifo->stop_gracefull && timeout--)
-		schedule_timeout_interruptible((HZ / 1000) * 3);
-	if (debug && fifo->stop_gracefull)
-		printk(KERN_DEBUG "%s: ERROR %s for fifo %i\n",
-		       hw->name, __func__, fifo->fifonum);
-}
-
-/* start the interrupt transfer for the given fifo */
-static void
-start_int_fifo(struct usb_fifo *fifo)
-{
-	struct hfcsusb *hw = fifo->hw;
-	int errcode;
-
-	if (debug)
-		printk(KERN_DEBUG "%s: %s: INT IN fifo:%d\n",
-		       hw->name, __func__, fifo->fifonum);
-
-	if (!fifo->urb) {
-		fifo->urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!fifo->urb)
-			return;
-	}
-	usb_fill_int_urb(fifo->urb, fifo->hw->dev, fifo->pipe,
-			 fifo->buffer, fifo->usb_packet_maxlen,
-			 (usb_complete_t)rx_int_complete, fifo, fifo->intervall);
-	fifo->active = 1;
-	fifo->stop_gracefull = 0;
-	errcode = usb_submit_urb(fifo->urb, GFP_KERNEL);
-	if (errcode) {
-		printk(KERN_DEBUG "%s: %s: submit URB: status:%i\n",
-		       hw->name, __func__, errcode);
-		fifo->active = 0;
-	}
-}
-
-static void
-setPortMode(struct hfcsusb *hw)
-{
-	if (debug & DEBUG_HW)
-		printk(KERN_DEBUG "%s: %s %s\n", hw->name, __func__,
-		       (hw->protocol == ISDN_P_TE_S0) ? "TE" : "NT");
-
-	if (hw->protocol == ISDN_P_TE_S0) {
-		write_reg(hw, HFCUSB_SCTRL, 0x40);
-		write_reg(hw, HFCUSB_SCTRL_E, 0x00);
-		write_reg(hw, HFCUSB_CLKDEL, CLKDEL_TE);
-		write_reg(hw, HFCUSB_STATES, 3 | 0x10);
-		write_reg(hw, HFCUSB_STATES, 3);
-	} else {
-		write_reg(hw, HFCUSB_SCTRL, 0x44);
-		write_reg(hw, HFCUSB_SCTRL_E, 0x09);
-		write_reg(hw, HFCUSB_CLKDEL, CLKDEL_NT);
-		write_reg(hw, HFCUSB_STATES, 1 | 0x10);
-		write_reg(hw, HFCUSB_STATES, 1);
-	}
-}
-
-static void
-reset_hfcsusb(struct hfcsusb *hw)
-{
-	struct usb_fifo *fifo;
-	int i;
-
-	if (debug & DEBUG_HW)
-		printk(KERN_DEBUG "%s: %s\n", hw->name, __func__);
-
-	/* do Chip reset */
-	write_reg(hw, HFCUSB_CIRM, 8);
-
-	/* aux = output, reset off */
-	write_reg(hw, HFCUSB_CIRM, 0x10);
-
-	/* set USB_SIZE to match the wMaxPacketSize for INT or BULK transfers */
-	write_reg(hw, HFCUSB_USB_SIZE, (hw->packet_size / 8) |
-		  ((hw->packet_size / 8) << 4));
-
-	/* set USB_SIZE_I to match the wMaxPacketSize for ISO transfers */
-	write_reg(hw, HFCUSB_USB_SIZE_I, hw->iso_packet_size);
-
-	/* enable PCM/GCI master mode */
-	write_reg(hw, HFCUSB_MST_MODE1, 0);	/* set default values */
-	write_reg(hw, HFCUSB_MST_MODE0, 1);	/* enable master mode */
-
-	/* init the fifos */
-	write_reg(hw, HFCUSB_F_THRES,
-		  (HFCUSB_TX_THRESHOLD / 8) | ((HFCUSB_RX_THRESHOLD / 8) << 4));
-
-	fifo = hw->fifos;
-	for (i = 0; i < HFCUSB_NUM_FIFOS; i++) {
-		write_reg(hw, HFCUSB_FIFO, i);	/* select the desired fifo */
-		fifo[i].max_size =
-			(i <= HFCUSB_B2_RX) ? MAX_BCH_SIZE : MAX_DFRAME_LEN;
-		fifo[i].last_urblen = 0;
-
-		/* set 2 bit for D- & E-channel */
-		write_reg(hw, HFCUSB_HDLC_PAR, ((i <= HFCUSB_B2_RX) ? 0 : 2));
-
-		/* enable all fifos */
-		if (i == HFCUSB_D_TX)
-			write_reg(hw, HFCUSB_CON_HDLC,
-				  (hw->protocol == ISDN_P_NT_S0) ? 0x08 : 0x09);
-		else
-			write_reg(hw, HFCUSB_CON_HDLC, 0x08);
-		write_reg(hw, HFCUSB_INC_RES_F, 2); /* reset the fifo */
-	}
-
-	write_reg(hw, HFCUSB_SCTRL_R, 0); /* disable both B receivers */
-	handle_led(hw, LED_POWER_ON);
-}
-
-/* start USB data pipes dependand on device's endpoint configuration */
-static void
-hfcsusb_start_endpoint(struct hfcsusb *hw, int channel)
-{
-	/* quick check if endpoint already running */
-	if ((channel == HFC_CHAN_D) && (hw->fifos[HFCUSB_D_RX].active))
-		return;
-	if ((channel == HFC_CHAN_B1) && (hw->fifos[HFCUSB_B1_RX].active))
-		return;
-	if ((channel == HFC_CHAN_B2) && (hw->fifos[HFCUSB_B2_RX].active))
-		return;
-	if ((channel == HFC_CHAN_E) && (hw->fifos[HFCUSB_PCM_RX].active))
-		return;
-
-	/* start rx endpoints using USB INT IN method */
-	if (hw->cfg_used == CNF_3INT3ISO || hw->cfg_used == CNF_4INT3ISO)
-		start_int_fifo(hw->fifos + channel * 2 + 1);
-
-	/* start rx endpoints using USB ISO IN method */
-	if (hw->cfg_used == CNF_3ISO3ISO || hw->cfg_used == CNF_4ISO3ISO) {
-		switch (channel) {
-		case HFC_CHAN_D:
-			start_isoc_chain(hw->fifos + HFCUSB_D_RX,
-					 ISOC_PACKETS_D,
-					 (usb_complete_t)rx_iso_complete,
-					 16);
-			break;
-		case HFC_CHAN_E:
-			start_isoc_chain(hw->fifos + HFCUSB_PCM_RX,
-					 ISOC_PACKETS_D,
-					 (usb_complete_t)rx_iso_complete,
-					 16);
-			break;
-		case HFC_CHAN_B1:
-			start_isoc_chain(hw->fifos + HFCUSB_B1_RX,
-					 ISOC_PACKETS_B,
-					 (usb_complete_t)rx_iso_complete,
-					 16);
-			break;
-		case HFC_CHAN_B2:
-			start_isoc_chain(hw->fifos + HFCUSB_B2_RX,
-					 ISOC_PACKETS_B,
-					 (usb_complete_t)rx_iso_complete,
-					 16);
-			break;
-		}
-	}
-
-	/* start tx endpoints using USB ISO OUT method */
-	switch (channel) {
-	case HFC_CHAN_D:
-		start_isoc_chain(hw->fifos + HFCUSB_D_TX,
-				 ISOC_PACKETS_B,
-				 (usb_complete_t)tx_iso_complete, 1);
-		break;
-	case HFC_CHAN_B1:
-		start_isoc_chain(hw->fifos + HFCUSB_B1_TX,
-				 ISOC_PACKETS_D,
-				 (usb_complete_t)tx_iso_complete, 1);
-		break;
-	case HFC_CHAN_B2:
-		start_isoc_chain(hw->fifos + HFCUSB_B2_TX,
-				 ISOC_PACKETS_B,
-				 (usb_complete_t)tx_iso_complete, 1);
-		break;
-	}
-}
-
-/* stop USB data pipes dependand on device's endpoint configuration */
-static void
-hfcsusb_stop_endpoint(struct hfcsusb *hw, int channel)
-{
-	/* quick check if endpoint currently running */
-	if ((channel == HFC_CHAN_D) && (!hw->fifos[HFCUSB_D_RX].active))
-		return;
-	if ((channel == HFC_CHAN_B1) && (!hw->fifos[HFCUSB_B1_RX].active))
-		return;
-	if ((channel == HFC_CHAN_B2) && (!hw->fifos[HFCUSB_B2_RX].active))
-		return;
-	if ((channel == HFC_CHAN_E) && (!hw->fifos[HFCUSB_PCM_RX].active))
-		return;
-
-	/* rx endpoints using USB INT IN method */
-	if (hw->cfg_used == CNF_3INT3ISO || hw->cfg_used == CNF_4INT3ISO)
-		stop_int_gracefull(hw->fifos + channel * 2 + 1);
-
-	/* rx endpoints using USB ISO IN method */
-	if (hw->cfg_used == CNF_3ISO3ISO || hw->cfg_used == CNF_4ISO3ISO)
-		stop_iso_gracefull(hw->fifos + channel * 2 + 1);
-
-	/* tx endpoints using USB ISO OUT method */
-	if (channel != HFC_CHAN_E)
-		stop_iso_gracefull(hw->fifos + channel * 2);
-}
-
-
-/* Hardware Initialization */
-static int
-setup_hfcsusb(struct hfcsusb *hw)
-{
-	void *dmabuf = kmalloc_obj(u_char);
-	u_char b;
-	int ret;
-
-	if (debug & DBG_HFC_CALL_TRACE)
-		printk(KERN_DEBUG "%s: %s\n", hw->name, __func__);
-
-	if (!dmabuf)
-		return -ENOMEM;
-
-	ret = read_reg_atomic(hw, HFCUSB_CHIP_ID, dmabuf);
-
-	memcpy(&b, dmabuf, sizeof(u_char));
-	kfree(dmabuf);
-
-	/* check the chip id */
-	if (ret != 1) {
-		printk(KERN_DEBUG "%s: %s: cannot read chip id\n",
-		       hw->name, __func__);
-		return 1;
-	}
-	if (b != HFCUSB_CHIPID) {
-		printk(KERN_DEBUG "%s: %s: Invalid chip id 0x%02x\n",
-		       hw->name, __func__, b);
-		return 1;
-	}
-
-	/* first set the needed config, interface and alternate */
-	(void) usb_set_interface(hw->dev, hw->if_used, hw->alt_used);
-
-	hw->led_state = 0;
-
-	/* init the background machinery for control requests */
-	hw->ctrl_read.bRequestType = 0xc0;
-	hw->ctrl_read.bRequest = 1;
-	hw->ctrl_read.wLength = cpu_to_le16(1);
-	hw->ctrl_write.bRequestType = 0x40;
-	hw->ctrl_write.bRequest = 0;
-	hw->ctrl_write.wLength = 0;
-	usb_fill_control_urb(hw->ctrl_urb, hw->dev, hw->ctrl_out_pipe,
-			     (u_char *)&hw->ctrl_write, NULL, 0,
-			     (usb_complete_t)ctrl_complete, hw);
-
-	reset_hfcsusb(hw);
-	return 0;
-}
-
-static void
-release_hw(struct hfcsusb *hw)
-{
-	if (debug & DBG_HFC_CALL_TRACE)
-		printk(KERN_DEBUG "%s: %s\n", hw->name, __func__);
-
-	/*
-	 * stop all endpoints gracefully
-	 * TODO: mISDN_core should generate CLOSE_CHANNEL
-	 *       signals after calling mISDN_unregister_device()
-	 */
-	hfcsusb_stop_endpoint(hw, HFC_CHAN_D);
-	hfcsusb_stop_endpoint(hw, HFC_CHAN_B1);
-	hfcsusb_stop_endpoint(hw, HFC_CHAN_B2);
-	if (hw->fifos[HFCUSB_PCM_RX].pipe)
-		hfcsusb_stop_endpoint(hw, HFC_CHAN_E);
-	if (hw->protocol == ISDN_P_TE_S0)
-		l1_event(hw->dch.l1, CLOSE_CHANNEL);
-
-	mISDN_unregister_device(&hw->dch.dev);
-	mISDN_freebchannel(&hw->bch[1]);
-	mISDN_freebchannel(&hw->bch[0]);
-	mISDN_freedchannel(&hw->dch);
-
-	if (hw->ctrl_urb) {
-		usb_kill_urb(hw->ctrl_urb);
-		usb_free_urb(hw->ctrl_urb);
-		hw->ctrl_urb = NULL;
-	}
-
-	if (hw->intf)
-		usb_set_intfdata(hw->intf, NULL);
-	list_del(&hw->list);
-	kfree(hw);
-	hw = NULL;
-}
-
-static void
-deactivate_bchannel(struct bchannel *bch)
-{
-	struct hfcsusb *hw = bch->hw;
-	u_long flags;
-
-	if (bch->debug & DEBUG_HW)
-		printk(KERN_DEBUG "%s: %s: bch->nr(%i)\n",
-		       hw->name, __func__, bch->nr);
-
-	spin_lock_irqsave(&hw->lock, flags);
-	mISDN_clear_bchannel(bch);
-	spin_unlock_irqrestore(&hw->lock, flags);
-	hfcsusb_setup_bch(bch, ISDN_P_NONE);
-	hfcsusb_stop_endpoint(hw, bch->nr - 1);
-}
-
-/*
- * Layer 1 B-channel hardware access
- */
-static int
-hfc_bctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
-{
-	struct bchannel	*bch = container_of(ch, struct bchannel, ch);
-	int		ret = -EINVAL;
-
-	if (bch->debug & DEBUG_HW)
-		printk(KERN_DEBUG "%s: cmd:%x %p\n", __func__, cmd, arg);
-
-	switch (cmd) {
-	case HW_TESTRX_RAW:
-	case HW_TESTRX_HDLC:
-	case HW_TESTRX_OFF:
-		ret = -EINVAL;
-		break;
-
-	case CLOSE_CHANNEL:
-		test_and_clear_bit(FLG_OPEN, &bch->Flags);
-		deactivate_bchannel(bch);
-		ch->protocol = ISDN_P_NONE;
-		ch->peer = NULL;
-		module_put(THIS_MODULE);
-		ret = 0;
-		break;
-	case CONTROL_CHANNEL:
-		ret = channel_bctrl(bch, arg);
-		break;
-	default:
-		printk(KERN_WARNING "%s: unknown prim(%x)\n",
-		       __func__, cmd);
-	}
-	return ret;
-}
-
-static int
-setup_instance(struct hfcsusb *hw, struct device *parent)
-{
-	u_long	flags;
-	int	err, i;
-
-	if (debug & DBG_HFC_CALL_TRACE)
-		printk(KERN_DEBUG "%s: %s\n", hw->name, __func__);
-
-	spin_lock_init(&hw->ctrl_lock);
-	spin_lock_init(&hw->lock);
-
-	mISDN_initdchannel(&hw->dch, MAX_DFRAME_LEN_L1, ph_state);
-	hw->dch.debug = debug & 0xFFFF;
-	hw->dch.hw = hw;
-	hw->dch.dev.Dprotocols = (1 << ISDN_P_TE_S0) | (1 << ISDN_P_NT_S0);
-	hw->dch.dev.D.send = hfcusb_l2l1D;
-	hw->dch.dev.D.ctrl = hfc_dctrl;
-
-	/* enable E-Channel logging */
-	if (hw->fifos[HFCUSB_PCM_RX].pipe)
-		mISDN_initdchannel(&hw->ech, MAX_DFRAME_LEN_L1, NULL);
-
-	hw->dch.dev.Bprotocols = (1 << (ISDN_P_B_RAW & ISDN_P_B_MASK)) |
-		(1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK));
-	hw->dch.dev.nrbchan = 2;
-	for (i = 0; i < 2; i++) {
-		hw->bch[i].nr = i + 1;
-		set_channelmap(i + 1, hw->dch.dev.channelmap);
-		hw->bch[i].debug = debug;
-		mISDN_initbchannel(&hw->bch[i], MAX_DATA_MEM, poll >> 1);
-		hw->bch[i].hw = hw;
-		hw->bch[i].ch.send = hfcusb_l2l1B;
-		hw->bch[i].ch.ctrl = hfc_bctrl;
-		hw->bch[i].ch.nr = i + 1;
-		list_add(&hw->bch[i].ch.list, &hw->dch.dev.bchannels);
-	}
-
-	hw->fifos[HFCUSB_B1_TX].bch = &hw->bch[0];
-	hw->fifos[HFCUSB_B1_RX].bch = &hw->bch[0];
-	hw->fifos[HFCUSB_B2_TX].bch = &hw->bch[1];
-	hw->fifos[HFCUSB_B2_RX].bch = &hw->bch[1];
-	hw->fifos[HFCUSB_D_TX].dch = &hw->dch;
-	hw->fifos[HFCUSB_D_RX].dch = &hw->dch;
-	hw->fifos[HFCUSB_PCM_RX].ech = &hw->ech;
-	hw->fifos[HFCUSB_PCM_TX].ech = &hw->ech;
-
-	err = setup_hfcsusb(hw);
-	if (err)
-		goto out;
-
-	snprintf(hw->name, MISDN_MAX_IDLEN - 1, "%s.%d", DRIVER_NAME,
-		 hfcsusb_cnt + 1);
-	printk(KERN_INFO "%s: registered as '%s'\n",
-	       DRIVER_NAME, hw->name);
-
-	err = mISDN_register_device(&hw->dch.dev, parent, hw->name);
-	if (err)
-		goto out;
-
-	hfcsusb_cnt++;
-	write_lock_irqsave(&HFClock, flags);
-	list_add_tail(&hw->list, &HFClist);
-	write_unlock_irqrestore(&HFClock, flags);
-	return 0;
-
-out:
-	mISDN_freebchannel(&hw->bch[1]);
-	mISDN_freebchannel(&hw->bch[0]);
-	mISDN_freedchannel(&hw->dch);
-	return err;
-}
-
-static int
-hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id)
-{
-	int err;
-	struct hfcsusb			*hw;
-	struct usb_device		*dev = interface_to_usbdev(intf);
-	struct usb_host_interface	*iface = intf->cur_altsetting;
-	struct usb_host_interface	*iface_used = NULL;
-	struct usb_host_endpoint	*ep;
-	struct hfcsusb_vdata		*driver_info;
-	int ifnum = iface->desc.bInterfaceNumber, i, idx, alt_idx,
-		probe_alt_setting, vend_idx, cfg_used, *vcf, attr, cfg_found,
-		ep_addr, cmptbl[16], small_match, iso_packet_size, packet_size,
-		alt_used = 0;
-
-	vend_idx = 0xffff;
-	for (i = 0; hfcsusb_idtab[i].idVendor; i++) {
-		if ((le16_to_cpu(dev->descriptor.idVendor)
-		     == hfcsusb_idtab[i].idVendor) &&
-		    (le16_to_cpu(dev->descriptor.idProduct)
-		     == hfcsusb_idtab[i].idProduct)) {
-			vend_idx = i;
-			continue;
-		}
-	}
-
-	printk(KERN_DEBUG
-	       "%s: interface(%d) actalt(%d) minor(%d) vend_idx(%d)\n",
-	       __func__, ifnum, iface->desc.bAlternateSetting,
-	       intf->minor, vend_idx);
-
-	if (vend_idx == 0xffff) {
-		printk(KERN_WARNING
-		       "%s: no valid vendor found in USB descriptor\n",
-		       __func__);
-		return -EIO;
-	}
-	/* if vendor and product ID is OK, start probing alternate settings */
-	alt_idx = 0;
-	small_match = -1;
-
-	/* default settings */
-	iso_packet_size = 16;
-	packet_size = 64;
-
-	while (alt_idx < intf->num_altsetting) {
-		iface = intf->altsetting + alt_idx;
-		probe_alt_setting = iface->desc.bAlternateSetting;
-		cfg_used = 0;
-
-		while (validconf[cfg_used][0]) {
-			cfg_found = 1;
-			vcf = validconf[cfg_used];
-			ep = iface->endpoint;
-			memcpy(cmptbl, vcf, 16 * sizeof(int));
-
-			/* check for all endpoints in this alternate setting */
-			for (i = 0; i < iface->desc.bNumEndpoints; i++) {
-				ep_addr = ep->desc.bEndpointAddress;
-
-				/* get endpoint base */
-				idx = ((ep_addr & 0x7f) - 1) * 2;
-				if (idx > 15)
-					return -EIO;
-
-				if (ep_addr & 0x80)
-					idx++;
-				attr = ep->desc.bmAttributes;
-
-				if (cmptbl[idx] != EP_NOP) {
-					if (cmptbl[idx] == EP_NUL)
-						cfg_found = 0;
-					if (attr == USB_ENDPOINT_XFER_INT
-					    && cmptbl[idx] == EP_INT)
-						cmptbl[idx] = EP_NUL;
-					if (attr == USB_ENDPOINT_XFER_BULK
-					    && cmptbl[idx] == EP_BLK)
-						cmptbl[idx] = EP_NUL;
-					if (attr == USB_ENDPOINT_XFER_ISOC
-					    && cmptbl[idx] == EP_ISO)
-						cmptbl[idx] = EP_NUL;
-
-					if (attr == USB_ENDPOINT_XFER_INT &&
-					    ep->desc.bInterval < vcf[17]) {
-						cfg_found = 0;
-					}
-				}
-				ep++;
-			}
-
-			for (i = 0; i < 16; i++)
-				if (cmptbl[i] != EP_NOP && cmptbl[i] != EP_NUL)
-					cfg_found = 0;
-
-			if (cfg_found) {
-				if (small_match < cfg_used) {
-					small_match = cfg_used;
-					alt_used = probe_alt_setting;
-					iface_used = iface;
-				}
-			}
-			cfg_used++;
-		}
-		alt_idx++;
-	}	/* (alt_idx < intf->num_altsetting) */
-
-	/* not found a valid USB Ta Endpoint config */
-	if (small_match == -1)
-		return -EIO;
-
-	iface = iface_used;
-	hw = kzalloc_obj(struct hfcsusb);
-	if (!hw)
-		return -ENOMEM;	/* got no mem */
-	snprintf(hw->name, MISDN_MAX_IDLEN - 1, "%s", DRIVER_NAME);
-
-	ep = iface->endpoint;
-	vcf = validconf[small_match];
-
-	for (i = 0; i < iface->desc.bNumEndpoints; i++) {
-		struct usb_fifo *f;
-
-		ep_addr = ep->desc.bEndpointAddress;
-		/* get endpoint base */
-		idx = ((ep_addr & 0x7f) - 1) * 2;
-		if (ep_addr & 0x80)
-			idx++;
-		f = &hw->fifos[idx & 7];
-
-		/* init Endpoints */
-		if (vcf[idx] == EP_NOP || vcf[idx] == EP_NUL) {
-			ep++;
-			continue;
-		}
-		switch (ep->desc.bmAttributes) {
-		case USB_ENDPOINT_XFER_INT:
-			f->pipe = usb_rcvintpipe(dev,
-						 ep->desc.bEndpointAddress);
-			f->usb_transfer_mode = USB_INT;
-			packet_size = le16_to_cpu(ep->desc.wMaxPacketSize);
-			break;
-		case USB_ENDPOINT_XFER_BULK:
-			if (ep_addr & 0x80)
-				f->pipe = usb_rcvbulkpipe(dev,
-							  ep->desc.bEndpointAddress);
-			else
-				f->pipe = usb_sndbulkpipe(dev,
-							  ep->desc.bEndpointAddress);
-			f->usb_transfer_mode = USB_BULK;
-			packet_size = le16_to_cpu(ep->desc.wMaxPacketSize);
-			break;
-		case USB_ENDPOINT_XFER_ISOC:
-			if (ep_addr & 0x80)
-				f->pipe = usb_rcvisocpipe(dev,
-							  ep->desc.bEndpointAddress);
-			else
-				f->pipe = usb_sndisocpipe(dev,
-							  ep->desc.bEndpointAddress);
-			f->usb_transfer_mode = USB_ISOC;
-			iso_packet_size = le16_to_cpu(ep->desc.wMaxPacketSize);
-			break;
-		default:
-			f->pipe = 0;
-		}
-
-		if (f->pipe) {
-			f->fifonum = idx & 7;
-			f->hw = hw;
-			f->usb_packet_maxlen =
-				le16_to_cpu(ep->desc.wMaxPacketSize);
-			f->intervall = ep->desc.bInterval;
-		}
-		ep++;
-	}
-	hw->dev = dev; /* save device */
-	hw->if_used = ifnum; /* save used interface */
-	hw->alt_used = alt_used; /* and alternate config */
-	hw->ctrl_paksize = dev->descriptor.bMaxPacketSize0; /* control size */
-	hw->cfg_used = vcf[16];	/* store used config */
-	hw->vend_idx = vend_idx; /* store found vendor */
-	hw->packet_size = packet_size;
-	hw->iso_packet_size = iso_packet_size;
-
-	/* create the control pipes needed for register access */
-	hw->ctrl_in_pipe = usb_rcvctrlpipe(hw->dev, 0);
-	hw->ctrl_out_pipe = usb_sndctrlpipe(hw->dev, 0);
-
-	driver_info = (struct hfcsusb_vdata *)
-		      hfcsusb_idtab[vend_idx].driver_info;
-
-	hw->ctrl_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!hw->ctrl_urb) {
-		pr_warn("%s: No memory for control urb\n",
-			driver_info->vend_name);
-		err = -ENOMEM;
-		goto err_free_hw;
-	}
-
-	pr_info("%s: %s: detected \"%s\" (%s, if=%d alt=%d)\n",
-		hw->name, __func__, driver_info->vend_name,
-		conf_str[small_match], ifnum, alt_used);
-
-	if (setup_instance(hw, dev->dev.parent)) {
-		err = -EIO;
-		goto err_free_urb;
-	}
-
-	hw->intf = intf;
-	usb_set_intfdata(hw->intf, hw);
-	return 0;
-
-err_free_urb:
-	usb_free_urb(hw->ctrl_urb);
-err_free_hw:
-	kfree(hw);
-	return err;
-}
-
-/* function called when an active device is removed */
-static void
-hfcsusb_disconnect(struct usb_interface *intf)
-{
-	struct hfcsusb *hw = usb_get_intfdata(intf);
-	struct hfcsusb *next;
-	int cnt = 0;
-
-	printk(KERN_INFO "%s: device disconnected\n", hw->name);
-
-	handle_led(hw, LED_POWER_OFF);
-	release_hw(hw);
-
-	list_for_each_entry_safe(hw, next, &HFClist, list)
-		cnt++;
-	if (!cnt)
-		hfcsusb_cnt = 0;
-
-	usb_set_intfdata(intf, NULL);
-}
-
-static struct usb_driver hfcsusb_drv = {
-	.name = DRIVER_NAME,
-	.id_table = hfcsusb_idtab,
-	.probe = hfcsusb_probe,
-	.disconnect = hfcsusb_disconnect,
-	.disable_hub_initiated_lpm = 1,
-};
-
-module_usb_driver(hfcsusb_drv);
diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.h b/drivers/isdn/hardware/mISDN/hfcsusb.h
deleted file mode 100644
index 7e2bc5068019..000000000000
--- a/drivers/isdn/hardware/mISDN/hfcsusb.h
+++ /dev/null
@@ -1,425 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * hfcsusb.h, HFC-S USB mISDN driver
- */
-
-#ifndef __HFCSUSB_H__
-#define __HFCSUSB_H__
-
-
-#define DRIVER_NAME "HFC-S_USB"
-
-#define DBG_HFC_CALL_TRACE	0x00010000
-#define DBG_HFC_FIFO_VERBOSE	0x00020000
-#define DBG_HFC_USB_VERBOSE	0x00100000
-#define DBG_HFC_URB_INFO	0x00200000
-#define DBG_HFC_URB_ERROR	0x00400000
-
-#define DEFAULT_TRANSP_BURST_SZ 128
-
-#define HFC_CTRL_TIMEOUT	20	/* 5ms timeout writing/reading regs */
-#define CLKDEL_TE		0x0f	/* CLKDEL in TE mode */
-#define CLKDEL_NT		0x6c	/* CLKDEL in NT mode */
-
-/* hfcsusb Layer1 commands */
-#define HFC_L1_ACTIVATE_TE		1
-#define HFC_L1_ACTIVATE_NT		2
-#define HFC_L1_DEACTIVATE_NT		3
-#define HFC_L1_FORCE_DEACTIVATE_TE	4
-
-/* cmd FLAGS in HFCUSB_STATES register */
-#define HFCUSB_LOAD_STATE	0x10
-#define HFCUSB_ACTIVATE		0x20
-#define HFCUSB_DO_ACTION	0x40
-#define HFCUSB_NT_G2_G3		0x80
-
-/* timers */
-#define NT_ACTIVATION_TIMER	0x01	/* enables NT mode activation Timer */
-#define NT_T1_COUNT		10
-
-#define MAX_BCH_SIZE		2048	/* allowed B-channel packet size */
-
-#define HFCUSB_RX_THRESHOLD	64	/* threshold for fifo report bit rx */
-#define HFCUSB_TX_THRESHOLD	96	/* threshold for fifo report bit tx */
-
-#define HFCUSB_CHIP_ID		0x16	/* Chip ID register index */
-#define HFCUSB_CIRM		0x00	/* cirm register index */
-#define HFCUSB_USB_SIZE		0x07	/* int length register */
-#define HFCUSB_USB_SIZE_I	0x06	/* iso length register */
-#define HFCUSB_F_CROSS		0x0b	/* bit order register */
-#define HFCUSB_CLKDEL		0x37	/* bit delay register */
-#define HFCUSB_CON_HDLC		0xfa	/* channel connect register */
-#define HFCUSB_HDLC_PAR		0xfb
-#define HFCUSB_SCTRL		0x31	/* S-bus control register (tx) */
-#define HFCUSB_SCTRL_E		0x32	/* same for E and special funcs */
-#define HFCUSB_SCTRL_R		0x33	/* S-bus control register (rx) */
-#define HFCUSB_F_THRES		0x0c	/* threshold register */
-#define HFCUSB_FIFO		0x0f	/* fifo select register */
-#define HFCUSB_F_USAGE		0x1a	/* fifo usage register */
-#define HFCUSB_MST_MODE0	0x14
-#define HFCUSB_MST_MODE1	0x15
-#define HFCUSB_P_DATA		0x1f
-#define HFCUSB_INC_RES_F	0x0e
-#define HFCUSB_B1_SSL		0x20
-#define HFCUSB_B2_SSL		0x21
-#define HFCUSB_B1_RSL		0x24
-#define HFCUSB_B2_RSL		0x25
-#define HFCUSB_STATES		0x30
-
-
-#define HFCUSB_CHIPID		0x40	/* ID value of HFC-S USB */
-
-/* fifo registers */
-#define HFCUSB_NUM_FIFOS	8	/* maximum number of fifos */
-#define HFCUSB_B1_TX		0	/* index for B1 transmit bulk/int */
-#define HFCUSB_B1_RX		1	/* index for B1 receive bulk/int */
-#define HFCUSB_B2_TX		2
-#define HFCUSB_B2_RX		3
-#define HFCUSB_D_TX		4
-#define HFCUSB_D_RX		5
-#define HFCUSB_PCM_TX		6
-#define HFCUSB_PCM_RX		7
-
-
-#define USB_INT		0
-#define USB_BULK	1
-#define USB_ISOC	2
-
-#define ISOC_PACKETS_D	8
-#define ISOC_PACKETS_B	8
-#define ISO_BUFFER_SIZE	128
-
-/* defines how much ISO packets are handled in one URB */
-static int iso_packets[8] =
-{ ISOC_PACKETS_B, ISOC_PACKETS_B, ISOC_PACKETS_B, ISOC_PACKETS_B,
-  ISOC_PACKETS_D, ISOC_PACKETS_D, ISOC_PACKETS_D, ISOC_PACKETS_D
-};
-
-
-/* Fifo flow Control for TX ISO */
-#define SINK_MAX	68
-#define SINK_MIN	48
-#define SINK_DMIN	12
-#define SINK_DMAX	18
-#define BITLINE_INF	(-96 * 8)
-
-/* HFC-S USB register access by Control-URSs */
-#define write_reg_atomic(a, b, c)					\
-	usb_control_msg((a)->dev, (a)->ctrl_out_pipe, 0, 0x40, (c), (b), \
-			0, 0, HFC_CTRL_TIMEOUT)
-#define read_reg_atomic(a, b, c)					\
-	usb_control_msg((a)->dev, (a)->ctrl_in_pipe, 1, 0xC0, 0, (b), (c), \
-			1, HFC_CTRL_TIMEOUT)
-#define HFC_CTRL_BUFSIZE 64
-
-struct ctrl_buf {
-	__u8 hfcs_reg;		/* register number */
-	__u8 reg_val;		/* value to be written (or read) */
-};
-
-/*
- * URB error codes
- * Used to represent a list of values and their respective symbolic names
- */
-struct hfcusb_symbolic_list {
-	const int num;
-	const char *name;
-};
-
-static struct hfcusb_symbolic_list urb_errlist[] = {
-	{-ENOMEM, "No memory for allocation of internal structures"},
-	{-ENOSPC, "The host controller's bandwidth is already consumed"},
-	{-ENOENT, "URB was canceled by unlink_urb"},
-	{-EXDEV, "ISO transfer only partially completed"},
-	{-EAGAIN, "Too match scheduled for the future"},
-	{-ENXIO, "URB already queued"},
-	{-EFBIG, "Too much ISO frames requested"},
-	{-ENOSR, "Buffer error (overrun)"},
-	{-EPIPE, "Specified endpoint is stalled (device not responding)"},
-	{-EOVERFLOW, "Babble (bad cable?)"},
-	{-EPROTO, "Bit-stuff error (bad cable?)"},
-	{-EILSEQ, "CRC/Timeout"},
-	{-ETIMEDOUT, "NAK (device does not respond)"},
-	{-ESHUTDOWN, "Device unplugged"},
-	{-1, NULL}
-};
-
-static inline const char *
-symbolic(struct hfcusb_symbolic_list list[], const int num)
-{
-	int i;
-	for (i = 0; list[i].name != NULL; i++)
-		if (list[i].num == num)
-			return list[i].name;
-	return "<unknown USB Error>";
-}
-
-/* USB descriptor need to contain one of the following EndPoint combination: */
-#define CNF_4INT3ISO	1	/* 4 INT IN, 3 ISO OUT */
-#define CNF_3INT3ISO	2	/* 3 INT IN, 3 ISO OUT */
-#define CNF_4ISO3ISO	3	/* 4 ISO IN, 3 ISO OUT */
-#define CNF_3ISO3ISO	4	/* 3 ISO IN, 3 ISO OUT */
-
-#define EP_NUL 1	/* Endpoint at this position not allowed */
-#define EP_NOP 2	/* all type of endpoints allowed at this position */
-#define EP_ISO 3	/* Isochron endpoint mandatory at this position */
-#define EP_BLK 4	/* Bulk endpoint mandatory at this position */
-#define EP_INT 5	/* Interrupt endpoint mandatory at this position */
-
-#define HFC_CHAN_B1	0
-#define HFC_CHAN_B2	1
-#define HFC_CHAN_D	2
-#define HFC_CHAN_E	3
-
-
-/*
- * List of all supported endpoint configuration sets, used to find the
- * best matching endpoint configuration within a device's USB descriptor.
- * We need at least 3 RX endpoints, and 3 TX endpoints, either
- * INT-in and ISO-out, or ISO-in and ISO-out)
- * with 4 RX endpoints even E-Channel logging is possible
- */
-static int
-validconf[][19] = {
-	/* INT in, ISO out config */
-	{EP_NUL, EP_INT, EP_NUL, EP_INT, EP_NUL, EP_INT, EP_NOP, EP_INT,
-	 EP_ISO, EP_NUL, EP_ISO, EP_NUL, EP_ISO, EP_NUL, EP_NUL, EP_NUL,
-	 CNF_4INT3ISO, 2, 1},
-	{EP_NUL, EP_INT, EP_NUL, EP_INT, EP_NUL, EP_INT, EP_NUL, EP_NUL,
-	 EP_ISO, EP_NUL, EP_ISO, EP_NUL, EP_ISO, EP_NUL, EP_NUL, EP_NUL,
-	 CNF_3INT3ISO, 2, 0},
-	/* ISO in, ISO out config */
-	{EP_NOP, EP_NOP, EP_NOP, EP_NOP, EP_NOP, EP_NOP, EP_NOP, EP_NOP,
-	 EP_ISO, EP_ISO, EP_ISO, EP_ISO, EP_ISO, EP_ISO, EP_NOP, EP_ISO,
-	 CNF_4ISO3ISO, 2, 1},
-	{EP_NUL, EP_NUL, EP_NUL, EP_NUL, EP_NUL, EP_NUL, EP_NUL, EP_NUL,
-	 EP_ISO, EP_ISO, EP_ISO, EP_ISO, EP_ISO, EP_ISO, EP_NUL, EP_NUL,
-	 CNF_3ISO3ISO, 2, 0},
-	{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} /* EOL element */
-};
-
-/* string description of chosen config */
-static char *conf_str[] = {
-	"4 Interrupt IN + 3 Isochron OUT",
-	"3 Interrupt IN + 3 Isochron OUT",
-	"4 Isochron IN + 3 Isochron OUT",
-	"3 Isochron IN + 3 Isochron OUT"
-};
-
-
-#define LED_OFF		0	/* no LED support */
-#define LED_SCHEME1	1	/* LED standard scheme */
-#define LED_SCHEME2	2	/* not used yet... */
-
-#define LED_POWER_ON	1
-#define LED_POWER_OFF	2
-#define LED_S0_ON	3
-#define LED_S0_OFF	4
-#define LED_B1_ON	5
-#define LED_B1_OFF	6
-#define LED_B1_DATA	7
-#define LED_B2_ON	8
-#define LED_B2_OFF	9
-#define LED_B2_DATA	10
-
-#define LED_NORMAL	0	/* LEDs are normal */
-#define LED_INVERTED	1	/* LEDs are inverted */
-
-/* time in ms to perform a Flashing LED when B-Channel has traffic */
-#define LED_TIME      250
-
-
-
-struct hfcsusb;
-struct usb_fifo;
-
-/* structure defining input+output fifos (interrupt/bulk mode) */
-struct iso_urb {
-	struct urb *urb;
-	__u8 buffer[ISO_BUFFER_SIZE];	/* buffer rx/tx USB URB data */
-	struct usb_fifo *owner_fifo;	/* pointer to owner fifo */
-	__u8 indx; /* Fifos's ISO double buffer 0 or 1 ? */
-#ifdef ISO_FRAME_START_DEBUG
-	int start_frames[ISO_FRAME_START_RING_COUNT];
-	__u8 iso_frm_strt_pos; /* index in start_frame[] */
-#endif
-};
-
-struct usb_fifo {
-	int fifonum;		/* fifo index attached to this structure */
-	int active;		/* fifo is currently active */
-	struct hfcsusb *hw;	/* pointer to main structure */
-	int pipe;		/* address of endpoint */
-	__u8 usb_packet_maxlen;	/* maximum length for usb transfer */
-	unsigned int max_size;	/* maximum size of receive/send packet */
-	__u8 intervall;		/* interrupt interval */
-	struct urb *urb;	/* transfer structure for usb routines */
-	__u8 buffer[128];	/* buffer USB INT OUT URB data */
-	int bit_line;		/* how much bits are in the fifo? */
-
-	__u8 usb_transfer_mode; /* switched between ISO and INT */
-	struct iso_urb	iso[2]; /* two urbs to have one always
-				   one pending */
-
-	struct dchannel *dch;	/* link to hfcsusb_t->dch */
-	struct bchannel *bch;	/* link to hfcsusb_t->bch */
-	struct dchannel *ech;	/* link to hfcsusb_t->ech, TODO: E-CHANNEL */
-	int last_urblen;	/* remember length of last packet */
-	__u8 stop_gracefull;	/* stops URB retransmission */
-};
-
-struct hfcsusb {
-	struct list_head	list;
-	struct dchannel		dch;
-	struct bchannel		bch[2];
-	struct dchannel		ech; /* TODO : wait for struct echannel ;) */
-
-	struct usb_device	*dev;		/* our device */
-	struct usb_interface	*intf;		/* used interface */
-	int			if_used;	/* used interface number */
-	int			alt_used;	/* used alternate config */
-	int			cfg_used;	/* configuration index used */
-	int			vend_idx;	/* index in hfcsusb_idtab */
-	int			packet_size;
-	int			iso_packet_size;
-	struct usb_fifo		fifos[HFCUSB_NUM_FIFOS];
-
-	/* control pipe background handling */
-	struct ctrl_buf		ctrl_buff[HFC_CTRL_BUFSIZE];
-	int			ctrl_in_idx, ctrl_out_idx, ctrl_cnt;
-	struct urb		*ctrl_urb;
-	struct usb_ctrlrequest	ctrl_write;
-	struct usb_ctrlrequest	ctrl_read;
-	int			ctrl_paksize;
-	int			ctrl_in_pipe, ctrl_out_pipe;
-	spinlock_t		ctrl_lock; /* lock for ctrl */
-	spinlock_t              lock;
-
-	__u8			threshold_mask;
-	__u8			led_state;
-
-	__u8			protocol;
-	int			nt_timer;
-	int			open;
-	__u8			timers;
-	__u8			initdone;
-	char			name[MISDN_MAX_IDLEN];
-};
-
-/* private vendor specific data */
-struct hfcsusb_vdata {
-	__u8		led_scheme;  /* led display scheme */
-	signed short	led_bits[8]; /* array of 8 possible LED bitmask */
-	char		*vend_name;  /* device name */
-};
-
-
-#define HFC_MAX_TE_LAYER1_STATE 8
-#define HFC_MAX_NT_LAYER1_STATE 4
-
-static const char *HFC_TE_LAYER1_STATES[HFC_MAX_TE_LAYER1_STATE + 1] = {
-	"TE F0 - Reset",
-	"TE F1 - Reset",
-	"TE F2 - Sensing",
-	"TE F3 - Deactivated",
-	"TE F4 - Awaiting signal",
-	"TE F5 - Identifying input",
-	"TE F6 - Synchronized",
-	"TE F7 - Activated",
-	"TE F8 - Lost framing",
-};
-
-static const char *HFC_NT_LAYER1_STATES[HFC_MAX_NT_LAYER1_STATE + 1] = {
-	"NT G0 - Reset",
-	"NT G1 - Deactive",
-	"NT G2 - Pending activation",
-	"NT G3 - Active",
-	"NT G4 - Pending deactivation",
-};
-
-/* supported devices */
-static const struct usb_device_id hfcsusb_idtab[] = {
-	{
-		USB_DEVICE(0x0959, 0x2bd0),
-		.driver_info = (unsigned long) &((struct hfcsusb_vdata)
-			{LED_OFF, {4, 0, 2, 1},
-					"ISDN USB TA (Cologne Chip HFC-S USB based)"}),
-	},
-	{
-		USB_DEVICE(0x0675, 0x1688),
-		.driver_info = (unsigned long) &((struct hfcsusb_vdata)
-			{LED_SCHEME1, {1, 2, 0, 0},
-					"DrayTek miniVigor 128 USB ISDN TA"}),
-	},
-	{
-		USB_DEVICE(0x07b0, 0x0007),
-		.driver_info = (unsigned long) &((struct hfcsusb_vdata)
-			{LED_SCHEME1, {0x80, -64, -32, -16},
-					"Billion tiny USB ISDN TA 128"}),
-	},
-	{
-		USB_DEVICE(0x0742, 0x2008),
-		.driver_info = (unsigned long) &((struct hfcsusb_vdata)
-			{LED_SCHEME1, {4, 0, 2, 1},
-					"Stollmann USB TA"}),
-	},
-	{
-		USB_DEVICE(0x0742, 0x2009),
-		.driver_info = (unsigned long) &((struct hfcsusb_vdata)
-			{LED_SCHEME1, {4, 0, 2, 1},
-					"Aceex USB ISDN TA"}),
-	},
-	{
-		USB_DEVICE(0x0742, 0x200A),
-		.driver_info = (unsigned long) &((struct hfcsusb_vdata)
-			{LED_SCHEME1, {4, 0, 2, 1},
-					"OEM USB ISDN TA"}),
-	},
-	{
-		USB_DEVICE(0x08e3, 0x0301),
-		.driver_info = (unsigned long) &((struct hfcsusb_vdata)
-			{LED_SCHEME1, {2, 0, 1, 4},
-					"Olitec USB RNIS"}),
-	},
-	{
-		USB_DEVICE(0x07fa, 0x0846),
-		.driver_info = (unsigned long) &((struct hfcsusb_vdata)
-			{LED_SCHEME1, {0x80, -64, -32, -16},
-					"Bewan Modem RNIS USB"}),
-	},
-	{
-		USB_DEVICE(0x07fa, 0x0847),
-		.driver_info = (unsigned long) &((struct hfcsusb_vdata)
-			{LED_SCHEME1, {0x80, -64, -32, -16},
-					"Djinn Numeris USB"}),
-	},
-	{
-		USB_DEVICE(0x07b0, 0x0006),
-		.driver_info = (unsigned long) &((struct hfcsusb_vdata)
-			{LED_SCHEME1, {0x80, -64, -32, -16},
-					"Twister ISDN TA"}),
-	},
-	{
-		USB_DEVICE(0x071d, 0x1005),
-		.driver_info = (unsigned long) &((struct hfcsusb_vdata)
-			{LED_SCHEME1, {0x02, 0, 0x01, 0x04},
-					"Eicon DIVA USB 4.0"}),
-	},
-	{
-		USB_DEVICE(0x0586, 0x0102),
-		.driver_info = (unsigned long) &((struct hfcsusb_vdata)
-			{LED_SCHEME1, {0x88, -64, -32, -16},
-					"ZyXEL OMNI.NET USB II"}),
-	},
-	{
-		USB_DEVICE(0x1ae7, 0x0525),
-		.driver_info = (unsigned long) &((struct hfcsusb_vdata)
-			{LED_SCHEME1, {0x88, -64, -32, -16},
-					"X-Tensions USB ISDN TA XC-525"}),
-	},
-	{ }
-};
-
-MODULE_DEVICE_TABLE(usb, hfcsusb_idtab);
-
-#endif	/* __HFCSUSB_H__ */
diff --git a/drivers/isdn/hardware/mISDN/iohelper.h b/drivers/isdn/hardware/mISDN/iohelper.h
deleted file mode 100644
index c81f7aba4b57..000000000000
--- a/drivers/isdn/hardware/mISDN/iohelper.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * iohelper.h
- *		helper for define functions to access ISDN hardware
- *              supported are memory mapped IO
- *		indirect port IO (one port for address, one for data)
- *
- * Author       Karsten Keil <keil@isdn4linux.de>
- *
- * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
- */
-
-#ifndef _IOHELPER_H
-#define _IOHELPER_H
-
-typedef u8 (read_reg_func)(void *hwp, u8 offset);
-typedef void (write_reg_func)(void *hwp, u8 offset, u8 value);
-typedef void (fifo_func)(void *hwp, u8 offset, u8 *datap, int size);
-
-struct _ioport {
-	u32 port;
-	u32 ale;
-};
-
-#define IOFUNC_IO(name, hws, ap)					\
-	static u8 Read##name##_IO(void *p, u8 off) {			\
-		struct hws *hw = p;					\
-		return inb(hw->ap.port + off);				\
-	}								\
-	static void Write##name##_IO(void *p, u8 off, u8 val) {		\
-		struct hws *hw = p;					\
-		outb(val, hw->ap.port + off);				\
-	}								\
-	static void ReadFiFo##name##_IO(void *p, u8 off, u8 *dp, int size) { \
-		struct hws *hw = p;					\
-		insb(hw->ap.port + off, dp, size);			\
-	}								\
-	static void WriteFiFo##name##_IO(void *p, u8 off, u8 *dp, int size) { \
-		struct hws *hw = p;					\
-		outsb(hw->ap.port + off, dp, size);			\
-	}
-
-#define IOFUNC_IND(name, hws, ap)					\
-	static u8 Read##name##_IND(void *p, u8 off) {			\
-		struct hws *hw = p;					\
-		outb(off, hw->ap.ale);					\
-		return inb(hw->ap.port);				\
-	}								\
-	static void Write##name##_IND(void *p, u8 off, u8 val) {	\
-		struct hws *hw = p;					\
-		outb(off, hw->ap.ale);					\
-		outb(val, hw->ap.port);					\
-	}								\
-	static void ReadFiFo##name##_IND(void *p, u8 off, u8 *dp, int size) { \
-		struct hws *hw = p;					\
-		outb(off, hw->ap.ale);					\
-		insb(hw->ap.port, dp, size);				\
-	}								\
-	static void WriteFiFo##name##_IND(void *p, u8 off, u8 *dp, int size) { \
-		struct hws *hw = p;					\
-		outb(off, hw->ap.ale);					\
-		outsb(hw->ap.port, dp, size);				\
-	}
-
-#define IOFUNC_MEMIO(name, hws, typ, adr)				\
-	static u8 Read##name##_MIO(void *p, u8 off) {			\
-		struct hws *hw = p;					\
-		return readb(((typ *)hw->adr) + off);			\
-	}								\
-	static void Write##name##_MIO(void *p, u8 off, u8 val) {	\
-		struct hws *hw = p;					\
-		writeb(val, ((typ *)hw->adr) + off);			\
-	}								\
-	static void ReadFiFo##name##_MIO(void *p, u8 off, u8 *dp, int size) { \
-		struct hws *hw = p;					\
-		while (size--)						\
-			*dp++ = readb(((typ *)hw->adr) + off);		\
-	}								\
-	static void WriteFiFo##name##_MIO(void *p, u8 off, u8 *dp, int size) { \
-		struct hws *hw = p;					\
-		while (size--)						\
-			writeb(*dp++, ((typ *)hw->adr) + off);		\
-	}
-
-#define ASSIGN_FUNC(typ, name, dest)	do {			\
-		dest.read_reg = &Read##name##_##typ;		\
-		dest.write_reg = &Write##name##_##typ;		\
-		dest.read_fifo = &ReadFiFo##name##_##typ;	\
-		dest.write_fifo = &WriteFiFo##name##_##typ;	\
-	} while (0)
-#define ASSIGN_FUNC_IPAC(typ, target)	do {		\
-		ASSIGN_FUNC(typ, ISAC, target.isac);	\
-		ASSIGN_FUNC(typ, IPAC, target);		\
-	} while (0)
-
-#endif
diff --git a/drivers/isdn/hardware/mISDN/ipac.h b/drivers/isdn/hardware/mISDN/ipac.h
deleted file mode 100644
index 2f0c4978a7a5..000000000000
--- a/drivers/isdn/hardware/mISDN/ipac.h
+++ /dev/null
@@ -1,393 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * ipac.h	Defines for the Infineon (former Siemens) ISDN
- *		chip series
- *
- * Author       Karsten Keil <keil@isdn4linux.de>
- *
- * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
- */
-
-#include "iohelper.h"
-
-struct isac_hw {
-	struct dchannel		dch;
-	u32			type;
-	u32			off;		/* offset to isac regs */
-	char			*name;
-	spinlock_t		*hwlock;	/* lock HW access */
-	read_reg_func		*read_reg;
-	write_reg_func		*write_reg;
-	fifo_func		*read_fifo;
-	fifo_func		*write_fifo;
-	int			(*monitor)(void *, u32, u8 *, int);
-	void			(*release)(struct isac_hw *);
-	int			(*init)(struct isac_hw *);
-	int			(*ctrl)(struct isac_hw *, u32, u_long);
-	int			(*open)(struct isac_hw *, struct channel_req *);
-	u8			*mon_tx;
-	u8			*mon_rx;
-	int			mon_txp;
-	int			mon_txc;
-	int			mon_rxp;
-	struct arcofi_msg	*arcofi_list;
-	struct timer_list	arcofitimer;
-	wait_queue_head_t	arcofi_wait;
-	u8			arcofi_bc;
-	u8			arcofi_state;
-	u8			mocr;
-	u8			adf2;
-	u8			state;
-};
-
-struct ipac_hw;
-
-struct hscx_hw {
-	struct bchannel		bch;
-	struct ipac_hw		*ip;
-	u8			fifo_size;
-	u8			off;	/* offset to ICA or ICB */
-	u8			slot;
-	char			log[64];
-};
-
-struct ipac_hw {
-	struct isac_hw		isac;
-	struct hscx_hw		hscx[2];
-	char			*name;
-	void			*hw;
-	spinlock_t		*hwlock;	/* lock HW access */
-	struct module		*owner;
-	u32			type;
-	read_reg_func		*read_reg;
-	write_reg_func		*write_reg;
-	fifo_func		*read_fifo;
-	fifo_func		*write_fifo;
-	void			(*release)(struct ipac_hw *);
-	int			(*init)(struct ipac_hw *);
-	int			(*ctrl)(struct ipac_hw *, u32, u_long);
-	u8			conf;
-};
-
-#define IPAC_TYPE_ISAC		0x0010
-#define IPAC_TYPE_IPAC		0x0020
-#define IPAC_TYPE_ISACX		0x0040
-#define IPAC_TYPE_IPACX		0x0080
-#define IPAC_TYPE_HSCX		0x0100
-
-#define ISAC_USE_ARCOFI		0x1000
-
-/* Monitor functions */
-#define MONITOR_RX_0		0x1000
-#define MONITOR_RX_1		0x1001
-#define MONITOR_TX_0		0x2000
-#define MONITOR_TX_1		0x2001
-
-/* All registers original Siemens Spec  */
-/* IPAC/ISAC registers */
-#define ISAC_ISTA		0x20
-#define ISAC_MASK		0x20
-#define ISAC_CMDR		0x21
-#define ISAC_STAR		0x21
-#define ISAC_MODE		0x22
-#define ISAC_TIMR		0x23
-#define ISAC_EXIR		0x24
-#define ISAC_RBCL		0x25
-#define ISAC_RSTA		0x27
-#define ISAC_RBCH		0x2A
-#define ISAC_SPCR		0x30
-#define ISAC_CIR0		0x31
-#define ISAC_CIX0		0x31
-#define ISAC_MOR0		0x32
-#define ISAC_MOX0		0x32
-#define ISAC_CIR1		0x33
-#define ISAC_CIX1		0x33
-#define ISAC_MOR1		0x34
-#define ISAC_MOX1		0x34
-#define ISAC_STCR		0x37
-#define ISAC_ADF1		0x38
-#define ISAC_ADF2		0x39
-#define ISAC_MOCR		0x3a
-#define ISAC_MOSR		0x3a
-#define ISAC_SQRR		0x3b
-#define ISAC_SQXR		0x3b
-
-#define ISAC_RBCH_XAC		0x80
-
-#define IPAC_D_TIN2		0x01
-
-/* IPAC/HSCX */
-#define IPAC_ISTAB		0x20	/* RD	*/
-#define IPAC_MASKB		0x20	/* WR	*/
-#define IPAC_STARB		0x21	/* RD	*/
-#define IPAC_CMDRB		0x21	/* WR	*/
-#define IPAC_MODEB		0x22	/* R/W	*/
-#define IPAC_EXIRB		0x24	/* RD	*/
-#define IPAC_RBCLB		0x25	/* RD	*/
-#define IPAC_RAH1		0x26	/* WR	*/
-#define IPAC_RAH2		0x27	/* WR	*/
-#define IPAC_RSTAB		0x27	/* RD	*/
-#define IPAC_RAL1		0x28	/* R/W	*/
-#define IPAC_RAL2		0x29	/* WR	*/
-#define IPAC_RHCRB		0x29	/* RD	*/
-#define IPAC_XBCL		0x2A	/* WR	*/
-#define IPAC_CCR2		0x2C	/* R/W	*/
-#define IPAC_RBCHB		0x2D	/* RD	*/
-#define IPAC_XBCH		0x2D	/* WR	*/
-#define HSCX_VSTR		0x2E	/* RD	*/
-#define IPAC_RLCR		0x2E	/* WR	*/
-#define IPAC_CCR1		0x2F	/* R/W	*/
-#define IPAC_TSAX		0x30	/* WR	*/
-#define IPAC_TSAR		0x31	/* WR	*/
-#define IPAC_XCCR		0x32	/* WR	*/
-#define IPAC_RCCR		0x33	/* WR	*/
-
-/* IPAC_ISTAB/IPAC_MASKB bits */
-#define IPAC_B_XPR		0x10
-#define IPAC_B_RPF		0x40
-#define IPAC_B_RME		0x80
-#define IPAC_B_ON		0x2F
-
-/* IPAC_EXIRB bits */
-#define IPAC_B_RFS		0x04
-#define IPAC_B_RFO		0x10
-#define IPAC_B_XDU		0x40
-#define IPAC_B_XMR		0x80
-
-/* IPAC special registers */
-#define IPAC_CONF		0xC0	/* R/W	*/
-#define IPAC_ISTA		0xC1	/* RD	*/
-#define IPAC_MASK		0xC1	/* WR	*/
-#define IPAC_ID			0xC2	/* RD	*/
-#define IPAC_ACFG		0xC3	/* R/W	*/
-#define IPAC_AOE		0xC4	/* R/W	*/
-#define IPAC_ARX		0xC5	/* RD	*/
-#define IPAC_ATX		0xC5	/* WR	*/
-#define IPAC_PITA1		0xC6	/* R/W	*/
-#define IPAC_PITA2		0xC7	/* R/W	*/
-#define IPAC_POTA1		0xC8	/* R/W	*/
-#define IPAC_POTA2		0xC9	/* R/W	*/
-#define IPAC_PCFG		0xCA	/* R/W	*/
-#define IPAC_SCFG		0xCB	/* R/W	*/
-#define IPAC_TIMR2		0xCC	/* R/W	*/
-
-/* IPAC_ISTA/_MASK bits */
-#define IPAC__EXB		0x01
-#define IPAC__ICB		0x02
-#define IPAC__EXA		0x04
-#define IPAC__ICA		0x08
-#define IPAC__EXD		0x10
-#define IPAC__ICD		0x20
-#define IPAC__INT0		0x40
-#define IPAC__INT1		0x80
-#define IPAC__ON		0xC0
-
-/* HSCX ISTA/MASK bits */
-#define HSCX__EXB		0x01
-#define HSCX__EXA		0x02
-#define HSCX__ICA		0x04
-
-/* ISAC/ISACX/IPAC/IPACX L1 commands */
-#define ISAC_CMD_TIM		0x0
-#define ISAC_CMD_RS		0x1
-#define ISAC_CMD_SCZ		0x4
-#define ISAC_CMD_SSZ		0x2
-#define ISAC_CMD_AR8		0x8
-#define ISAC_CMD_AR10		0x9
-#define ISAC_CMD_ARL		0xA
-#define ISAC_CMD_DUI		0xF
-
-/* ISAC/ISACX/IPAC/IPACX L1 indications */
-#define ISAC_IND_DR		0x0
-#define ISAC_IND_RS		0x1
-#define ISAC_IND_SD		0x2
-#define ISAC_IND_DIS		0x3
-#define ISAC_IND_RSY		0x4
-#define ISAC_IND_DR6		0x5
-#define ISAC_IND_EI		0x6
-#define ISAC_IND_PU		0x7
-#define ISAC_IND_ARD		0x8
-#define ISAC_IND_TI		0xA
-#define ISAC_IND_ATI		0xB
-#define ISAC_IND_AI8		0xC
-#define ISAC_IND_AI10		0xD
-#define ISAC_IND_DID		0xF
-
-/* the new ISACX / IPACX */
-/* D-channel registers   */
-#define ISACX_RFIFOD		0x00	/* RD	*/
-#define ISACX_XFIFOD		0x00	/* WR	*/
-#define ISACX_ISTAD		0x20	/* RD	*/
-#define ISACX_MASKD		0x20	/* WR	*/
-#define ISACX_STARD		0x21	/* RD	*/
-#define ISACX_CMDRD		0x21	/* WR	*/
-#define ISACX_MODED		0x22	/* R/W	*/
-#define ISACX_EXMD1		0x23	/* R/W	*/
-#define ISACX_TIMR1		0x24	/* R/W	*/
-#define ISACX_SAP1		0x25	/* WR	*/
-#define ISACX_SAP2		0x26	/* WR	*/
-#define ISACX_RBCLD		0x26	/* RD	*/
-#define ISACX_RBCHD		0x27	/* RD	*/
-#define ISACX_TEI1		0x27	/* WR	*/
-#define ISACX_TEI2		0x28	/* WR	*/
-#define ISACX_RSTAD		0x28	/* RD	*/
-#define ISACX_TMD		0x29	/* R/W	*/
-#define ISACX_CIR0		0x2E	/* RD	*/
-#define ISACX_CIX0		0x2E	/* WR	*/
-#define ISACX_CIR1		0x2F	/* RD	*/
-#define ISACX_CIX1		0x2F	/* WR	*/
-
-/* Transceiver registers  */
-#define ISACX_TR_CONF0		0x30	/* R/W	*/
-#define ISACX_TR_CONF1		0x31	/* R/W	*/
-#define ISACX_TR_CONF2		0x32	/* R/W	*/
-#define ISACX_TR_STA		0x33	/* RD	*/
-#define ISACX_TR_CMD		0x34	/* R/W	*/
-#define ISACX_SQRR1		0x35	/* RD	*/
-#define ISACX_SQXR1		0x35	/* WR	*/
-#define ISACX_SQRR2		0x36	/* RD	*/
-#define ISACX_SQXR2		0x36	/* WR	*/
-#define ISACX_SQRR3		0x37	/* RD	*/
-#define ISACX_SQXR3		0x37	/* WR	*/
-#define ISACX_ISTATR		0x38	/* RD	*/
-#define ISACX_MASKTR		0x39	/* R/W	*/
-#define ISACX_TR_MODE		0x3A	/* R/W	*/
-#define ISACX_ACFG1		0x3C	/* R/W	*/
-#define ISACX_ACFG2		0x3D	/* R/W	*/
-#define ISACX_AOE		0x3E	/* R/W	*/
-#define ISACX_ARX		0x3F	/* RD	*/
-#define ISACX_ATX		0x3F	/* WR	*/
-
-/* IOM: Timeslot, DPS, CDA  */
-#define ISACX_CDA10		0x40	/* R/W	*/
-#define ISACX_CDA11		0x41	/* R/W	*/
-#define ISACX_CDA20		0x42	/* R/W	*/
-#define ISACX_CDA21		0x43	/* R/W	*/
-#define ISACX_CDA_TSDP10	0x44	/* R/W	*/
-#define ISACX_CDA_TSDP11	0x45	/* R/W	*/
-#define ISACX_CDA_TSDP20	0x46	/* R/W	*/
-#define ISACX_CDA_TSDP21	0x47	/* R/W	*/
-#define ISACX_BCHA_TSDP_BC1	0x48	/* R/W	*/
-#define ISACX_BCHA_TSDP_BC2	0x49	/* R/W	*/
-#define ISACX_BCHB_TSDP_BC1	0x4A	/* R/W	*/
-#define ISACX_BCHB_TSDP_BC2	0x4B	/* R/W	*/
-#define ISACX_TR_TSDP_BC1	0x4C	/* R/W	*/
-#define ISACX_TR_TSDP_BC2	0x4D	/* R/W	*/
-#define ISACX_CDA1_CR		0x4E	/* R/W	*/
-#define ISACX_CDA2_CR		0x4F	/* R/W	*/
-
-/* IOM: Contol, Sync transfer, Monitor    */
-#define ISACX_TR_CR		0x50	/* R/W	*/
-#define ISACX_TRC_CR		0x50	/* R/W	*/
-#define ISACX_BCHA_CR		0x51	/* R/W	*/
-#define ISACX_BCHB_CR		0x52	/* R/W	*/
-#define ISACX_DCI_CR		0x53	/* R/W	*/
-#define ISACX_DCIC_CR		0x53	/* R/W	*/
-#define ISACX_MON_CR		0x54	/* R/W	*/
-#define ISACX_SDS1_CR		0x55	/* R/W	*/
-#define ISACX_SDS2_CR		0x56	/* R/W	*/
-#define ISACX_IOM_CR		0x57	/* R/W	*/
-#define ISACX_STI		0x58	/* RD	*/
-#define ISACX_ASTI		0x58	/* WR	*/
-#define ISACX_MSTI		0x59	/* R/W	*/
-#define ISACX_SDS_CONF		0x5A	/* R/W	*/
-#define ISACX_MCDA		0x5B	/* RD	*/
-#define ISACX_MOR		0x5C	/* RD	*/
-#define ISACX_MOX		0x5C	/* WR	*/
-#define ISACX_MOSR		0x5D	/* RD	*/
-#define ISACX_MOCR		0x5E	/* R/W	*/
-#define ISACX_MSTA		0x5F	/* RD	*/
-#define ISACX_MCONF		0x5F	/* WR	*/
-
-/* Interrupt and general registers */
-#define ISACX_ISTA		0x60	/* RD	*/
-#define ISACX_MASK		0x60	/* WR	*/
-#define ISACX_AUXI		0x61	/* RD	*/
-#define ISACX_AUXM		0x61	/* WR	*/
-#define ISACX_MODE1		0x62	/* R/W	*/
-#define ISACX_MODE2		0x63	/* R/W	*/
-#define ISACX_ID		0x64	/* RD	*/
-#define ISACX_SRES		0x64	/* WR	*/
-#define ISACX_TIMR2		0x65	/* R/W	*/
-
-/* Register Bits */
-/* ISACX/IPACX _ISTAD (R) and _MASKD (W) */
-#define ISACX_D_XDU		0x04
-#define ISACX_D_XMR		0x08
-#define ISACX_D_XPR		0x10
-#define ISACX_D_RFO		0x20
-#define ISACX_D_RPF		0x40
-#define ISACX_D_RME		0x80
-
-/* ISACX/IPACX _ISTA (R) and _MASK (W) */
-#define ISACX__ICD		0x01
-#define ISACX__MOS		0x02
-#define ISACX__TRAN		0x04
-#define ISACX__AUX		0x08
-#define ISACX__CIC		0x10
-#define ISACX__ST		0x20
-#define IPACX__ON		0x2C
-#define IPACX__ICB		0x40
-#define IPACX__ICA		0x80
-
-/* ISACX/IPACX _CMDRD (W) */
-#define ISACX_CMDRD_XRES	0x01
-#define ISACX_CMDRD_XME		0x02
-#define ISACX_CMDRD_XTF		0x08
-#define ISACX_CMDRD_STI		0x10
-#define ISACX_CMDRD_RRES	0x40
-#define ISACX_CMDRD_RMC		0x80
-
-/* ISACX/IPACX _RSTAD (R) */
-#define ISACX_RSTAD_TA		0x01
-#define ISACX_RSTAD_CR		0x02
-#define ISACX_RSTAD_SA0		0x04
-#define ISACX_RSTAD_SA1		0x08
-#define ISACX_RSTAD_RAB		0x10
-#define ISACX_RSTAD_CRC		0x20
-#define ISACX_RSTAD_RDO		0x40
-#define ISACX_RSTAD_VFR		0x80
-
-/* ISACX/IPACX _CIR0 (R) */
-#define ISACX_CIR0_BAS		0x01
-#define ISACX_CIR0_SG		0x08
-#define ISACX_CIR0_CIC1		0x08
-#define ISACX_CIR0_CIC0		0x08
-
-/* B-channel registers */
-#define IPACX_OFF_ICA		0x70
-#define IPACX_OFF_ICB		0x80
-
-/* ICA: IPACX_OFF_ICA + Reg ICB: IPACX_OFF_ICB + Reg */
-
-#define IPACX_ISTAB		0x00    /* RD	*/
-#define IPACX_MASKB		0x00	/* WR	*/
-#define IPACX_STARB		0x01	/* RD	*/
-#define IPACX_CMDRB		0x01	/* WR	*/
-#define IPACX_MODEB		0x02	/* R/W	*/
-#define IPACX_EXMB		0x03	/* R/W	*/
-#define IPACX_RAH1		0x05	/* WR	*/
-#define IPACX_RAH2		0x06	/* WR	*/
-#define IPACX_RBCLB		0x06	/* RD	*/
-#define IPACX_RBCHB		0x07	/* RD	*/
-#define IPACX_RAL1		0x07	/* WR	*/
-#define IPACX_RAL2		0x08	/* WR	*/
-#define IPACX_RSTAB		0x08	/* RD	*/
-#define IPACX_TMB		0x09	/* R/W	*/
-#define IPACX_RFIFOB		0x0A	/* RD	*/
-#define IPACX_XFIFOB		0x0A	/* WR	*/
-
-/* IPACX_ISTAB / IPACX_MASKB bits */
-#define IPACX_B_XDU		0x04
-#define IPACX_B_XPR		0x10
-#define IPACX_B_RFO		0x20
-#define IPACX_B_RPF		0x40
-#define IPACX_B_RME		0x80
-
-#define IPACX_B_ON		0x0B
-
-extern int mISDNisac_init(struct isac_hw *, void *);
-extern irqreturn_t mISDNisac_irq(struct isac_hw *, u8);
-extern u32 mISDNipac_init(struct ipac_hw *, void *);
-extern irqreturn_t mISDNipac_irq(struct ipac_hw *, int);
diff --git a/drivers/isdn/hardware/mISDN/isar.h b/drivers/isdn/hardware/mISDN/isar.h
deleted file mode 100644
index 36a9fa564b17..000000000000
--- a/drivers/isdn/hardware/mISDN/isar.h
+++ /dev/null
@@ -1,256 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * isar.h   ISAR (Siemens PSB 7110) specific defines
- *
- * Author Karsten Keil (keil@isdn4linux.de)
- *
- * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
- */
-
-#include "iohelper.h"
-
-struct isar_hw;
-
-struct isar_ch {
-	struct bchannel		bch;
-	struct isar_hw		*is;
-	struct timer_list	ftimer;
-	u8			nr;
-	u8			dpath;
-	u8			mml;
-	u8			state;
-	u8			cmd;
-	u8			mod;
-	u8			newcmd;
-	u8			newmod;
-	u8			try_mod;
-	u8			conmsg[16];
-};
-
-struct isar_hw {
-	struct	isar_ch	ch[2];
-	void		*hw;
-	spinlock_t	*hwlock;	/* lock HW access */
-	char		*name;
-	struct module	*owner;
-	read_reg_func	*read_reg;
-	write_reg_func	*write_reg;
-	fifo_func	*read_fifo;
-	fifo_func	*write_fifo;
-	int		(*ctrl)(void *, u32, u_long);
-	void		(*release)(struct isar_hw *);
-	int		(*init)(struct isar_hw *);
-	int		(*open)(struct isar_hw *, struct channel_req *);
-	int		(*firmware)(struct isar_hw *, const u8 *, int);
-	unsigned long	Flags;
-	int		version;
-	u8		bstat;
-	u8		iis;
-	u8		cmsb;
-	u8		clsb;
-	u8		buf[256];
-	u8		log[256];
-};
-
-#define ISAR_IRQMSK	0x04
-#define ISAR_IRQSTA	0x04
-#define ISAR_IRQBIT	0x75
-#define ISAR_CTRL_H	0x61
-#define ISAR_CTRL_L	0x60
-#define ISAR_IIS	0x58
-#define ISAR_IIA	0x58
-#define ISAR_HIS	0x50
-#define ISAR_HIA	0x50
-#define ISAR_MBOX	0x4c
-#define ISAR_WADR	0x4a
-#define ISAR_RADR	0x48
-
-#define ISAR_HIS_VNR		0x14
-#define ISAR_HIS_DKEY		0x02
-#define ISAR_HIS_FIRM		0x1e
-#define ISAR_HIS_STDSP		0x08
-#define ISAR_HIS_DIAG		0x05
-#define ISAR_HIS_P0CFG		0x3c
-#define ISAR_HIS_P12CFG		0x24
-#define ISAR_HIS_SARTCFG	0x25
-#define ISAR_HIS_PUMPCFG	0x26
-#define ISAR_HIS_PUMPCTRL	0x2a
-#define ISAR_HIS_IOM2CFG	0x27
-#define ISAR_HIS_IOM2REQ	0x07
-#define ISAR_HIS_IOM2CTRL	0x2b
-#define ISAR_HIS_BSTREQ		0x0c
-#define ISAR_HIS_PSTREQ		0x0e
-#define ISAR_HIS_SDATA		0x20
-#define ISAR_HIS_DPS1		0x40
-#define ISAR_HIS_DPS2		0x80
-#define SET_DPS(x)		((x << 6) & 0xc0)
-
-#define ISAR_IIS_MSCMSD		0x3f
-#define ISAR_IIS_VNR		0x15
-#define ISAR_IIS_DKEY		0x03
-#define ISAR_IIS_FIRM		0x1f
-#define ISAR_IIS_STDSP		0x09
-#define ISAR_IIS_DIAG		0x25
-#define ISAR_IIS_GSTEV		0x00
-#define ISAR_IIS_BSTEV		0x28
-#define ISAR_IIS_BSTRSP		0x2c
-#define ISAR_IIS_PSTRSP		0x2e
-#define ISAR_IIS_PSTEV		0x2a
-#define ISAR_IIS_IOM2RSP	0x27
-#define ISAR_IIS_RDATA		0x20
-#define ISAR_IIS_INVMSG		0x3f
-
-#define ISAR_CTRL_SWVER	0x10
-#define ISAR_CTRL_STST	0x40
-
-#define ISAR_MSG_HWVER	0x20
-
-#define ISAR_DP1_USE	1
-#define ISAR_DP2_USE	2
-#define ISAR_RATE_REQ	3
-
-#define PMOD_DISABLE	0
-#define PMOD_FAX	1
-#define PMOD_DATAMODEM	2
-#define PMOD_HALFDUPLEX	3
-#define PMOD_V110	4
-#define PMOD_DTMF	5
-#define PMOD_DTMF_TRANS	6
-#define PMOD_BYPASS	7
-
-#define PCTRL_ORIG	0x80
-#define PV32P2_V23R	0x40
-#define PV32P2_V22A	0x20
-#define PV32P2_V22B	0x10
-#define PV32P2_V22C	0x08
-#define PV32P2_V21	0x02
-#define PV32P2_BEL	0x01
-
-/* LSB MSB in ISAR doc wrong !!! Arghhh */
-#define PV32P3_AMOD	0x80
-#define PV32P3_V32B	0x02
-#define PV32P3_V23B	0x01
-#define PV32P4_48	0x11
-#define PV32P5_48	0x05
-#define PV32P4_UT48	0x11
-#define PV32P5_UT48	0x0d
-#define PV32P4_96	0x11
-#define PV32P5_96	0x03
-#define PV32P4_UT96	0x11
-#define PV32P5_UT96	0x0f
-#define PV32P4_B96	0x91
-#define PV32P5_B96	0x0b
-#define PV32P4_UTB96	0xd1
-#define PV32P5_UTB96	0x0f
-#define PV32P4_120	0xb1
-#define PV32P5_120	0x09
-#define PV32P4_UT120	0xf1
-#define PV32P5_UT120	0x0f
-#define PV32P4_144	0x99
-#define PV32P5_144	0x09
-#define PV32P4_UT144	0xf9
-#define PV32P5_UT144	0x0f
-#define PV32P6_CTN	0x01
-#define PV32P6_ATN	0x02
-
-#define PFAXP2_CTN	0x01
-#define PFAXP2_ATN	0x04
-
-#define PSEV_10MS_TIMER	0x02
-#define PSEV_CON_ON	0x18
-#define PSEV_CON_OFF	0x19
-#define PSEV_V24_OFF	0x20
-#define PSEV_CTS_ON	0x21
-#define PSEV_CTS_OFF	0x22
-#define PSEV_DCD_ON	0x23
-#define PSEV_DCD_OFF	0x24
-#define PSEV_DSR_ON	0x25
-#define PSEV_DSR_OFF	0x26
-#define PSEV_REM_RET	0xcc
-#define PSEV_REM_REN	0xcd
-#define PSEV_GSTN_CLR	0xd4
-
-#define PSEV_RSP_READY	0xbc
-#define PSEV_LINE_TX_H	0xb3
-#define PSEV_LINE_TX_B	0xb2
-#define PSEV_LINE_RX_H	0xb1
-#define PSEV_LINE_RX_B	0xb0
-#define PSEV_RSP_CONN	0xb5
-#define PSEV_RSP_DISC	0xb7
-#define PSEV_RSP_FCERR	0xb9
-#define PSEV_RSP_SILDET	0xbe
-#define PSEV_RSP_SILOFF	0xab
-#define PSEV_FLAGS_DET	0xba
-
-#define PCTRL_CMD_TDTMF	0x5a
-
-#define PCTRL_CMD_FTH	0xa7
-#define PCTRL_CMD_FRH	0xa5
-#define PCTRL_CMD_FTM	0xa8
-#define PCTRL_CMD_FRM	0xa6
-#define PCTRL_CMD_SILON	0xac
-#define PCTRL_CMD_CONT	0xa2
-#define PCTRL_CMD_ESC	0xa4
-#define PCTRL_CMD_SILOFF 0xab
-#define PCTRL_CMD_HALT	0xa9
-
-#define PCTRL_LOC_RET	0xcf
-#define PCTRL_LOC_REN	0xce
-
-#define SMODE_DISABLE	0
-#define SMODE_V14	2
-#define SMODE_HDLC	3
-#define SMODE_BINARY	4
-#define SMODE_FSK_V14	5
-
-#define SCTRL_HDMC_BOTH	0x00
-#define SCTRL_HDMC_DTX	0x80
-#define SCTRL_HDMC_DRX	0x40
-#define S_P1_OVSP	0x40
-#define S_P1_SNP	0x20
-#define S_P1_EOP	0x10
-#define S_P1_EDP	0x08
-#define S_P1_NSB	0x04
-#define S_P1_CHS_8	0x03
-#define S_P1_CHS_7	0x02
-#define S_P1_CHS_6	0x01
-#define S_P1_CHS_5	0x00
-
-#define S_P2_BFT_DEF	0x10
-
-#define IOM_CTRL_ENA	0x80
-#define IOM_CTRL_NOPCM	0x00
-#define IOM_CTRL_ALAW	0x02
-#define IOM_CTRL_ULAW	0x04
-#define IOM_CTRL_RCV	0x01
-
-#define IOM_P1_TXD	0x10
-
-#define HDLC_FED	0x40
-#define HDLC_FSD	0x20
-#define HDLC_FST	0x20
-#define HDLC_ERROR	0x1c
-#define HDLC_ERR_FAD	0x10
-#define HDLC_ERR_RER	0x08
-#define HDLC_ERR_CER	0x04
-#define SART_NMD	0x01
-
-#define BSTAT_RDM0	0x1
-#define BSTAT_RDM1	0x2
-#define BSTAT_RDM2	0x4
-#define BSTAT_RDM3	0x8
-#define BSTEV_TBO	0x1f
-#define BSTEV_RBO	0x2f
-
-/* FAX State Machine */
-#define STFAX_NULL	0
-#define STFAX_READY	1
-#define STFAX_LINE	2
-#define STFAX_CONT	3
-#define STFAX_ACTIV	4
-#define STFAX_ESCAPE	5
-#define STFAX_SILDET	6
-
-extern u32 mISDNisar_init(struct isar_hw *, void *);
-extern void mISDNisar_irq(struct isar_hw *);
diff --git a/drivers/isdn/hardware/mISDN/isdnhdlc.c b/drivers/isdn/hardware/mISDN/isdnhdlc.c
deleted file mode 100644
index 985367e6711d..000000000000
--- a/drivers/isdn/hardware/mISDN/isdnhdlc.c
+++ /dev/null
@@ -1,617 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * isdnhdlc.c  --  General purpose ISDN HDLC decoder.
- *
- * Copyright (C)
- *	2009	Karsten Keil		<keil@b1-systems.de>
- *	2002	Wolfgang Mües		<wolfgang@iksw-muees.de>
- *	2001	Frode Isaksen		<fisaksen@bewan.com>
- *      2001	Kai Germaschewski	<kai.germaschewski@gmx.de>
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/crc-ccitt.h>
-#include <linux/bitrev.h>
-#include "isdnhdlc.h"
-
-/*-------------------------------------------------------------------*/
-
-MODULE_AUTHOR("Wolfgang Mües <wolfgang@iksw-muees.de>, "
-	      "Frode Isaksen <fisaksen@bewan.com>, "
-	      "Kai Germaschewski <kai.germaschewski@gmx.de>");
-MODULE_DESCRIPTION("General purpose ISDN HDLC decoder");
-MODULE_LICENSE("GPL");
-
-/*-------------------------------------------------------------------*/
-
-enum {
-	HDLC_FAST_IDLE, HDLC_GET_FLAG_B0, HDLC_GETFLAG_B1A6, HDLC_GETFLAG_B7,
-	HDLC_GET_DATA, HDLC_FAST_FLAG
-};
-
-enum {
-	HDLC_SEND_DATA, HDLC_SEND_CRC1, HDLC_SEND_FAST_FLAG,
-	HDLC_SEND_FIRST_FLAG, HDLC_SEND_CRC2, HDLC_SEND_CLOSING_FLAG,
-	HDLC_SEND_IDLE1, HDLC_SEND_FAST_IDLE, HDLC_SENDFLAG_B0,
-	HDLC_SENDFLAG_B1A6, HDLC_SENDFLAG_B7, STOPPED, HDLC_SENDFLAG_ONE
-};
-
-void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, u32 features)
-{
-	memset(hdlc, 0, sizeof(struct isdnhdlc_vars));
-	hdlc->state = HDLC_GET_DATA;
-	if (features & HDLC_56KBIT)
-		hdlc->do_adapt56 = 1;
-	if (features & HDLC_BITREVERSE)
-		hdlc->do_bitreverse = 1;
-}
-EXPORT_SYMBOL(isdnhdlc_out_init);
-
-void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, u32 features)
-{
-	memset(hdlc, 0, sizeof(struct isdnhdlc_vars));
-	if (features & HDLC_DCHANNEL) {
-		hdlc->dchannel = 1;
-		hdlc->state = HDLC_SEND_FIRST_FLAG;
-	} else {
-		hdlc->dchannel = 0;
-		hdlc->state = HDLC_SEND_FAST_FLAG;
-		hdlc->ffvalue = 0x7e;
-	}
-	hdlc->cbin = 0x7e;
-	if (features & HDLC_56KBIT) {
-		hdlc->do_adapt56 = 1;
-		hdlc->state = HDLC_SENDFLAG_B0;
-	} else
-		hdlc->data_bits = 8;
-	if (features & HDLC_BITREVERSE)
-		hdlc->do_bitreverse = 1;
-}
-EXPORT_SYMBOL(isdnhdlc_rcv_init);
-
-static int
-check_frame(struct isdnhdlc_vars *hdlc)
-{
-	int status;
-
-	if (hdlc->dstpos < 2)	/* too small - framing error */
-		status = -HDLC_FRAMING_ERROR;
-	else if (hdlc->crc != 0xf0b8)	/* crc error */
-		status = -HDLC_CRC_ERROR;
-	else {
-		/* remove CRC */
-		hdlc->dstpos -= 2;
-		/* good frame */
-		status = hdlc->dstpos;
-	}
-	return status;
-}
-
-/*
-  isdnhdlc_decode - decodes HDLC frames from a transparent bit stream.
-
-  The source buffer is scanned for valid HDLC frames looking for
-  flags (01111110) to indicate the start of a frame. If the start of
-  the frame is found, the bit stuffing is removed (0 after 5 1's).
-  When a new flag is found, the complete frame has been received
-  and the CRC is checked.
-  If a valid frame is found, the function returns the frame length
-  excluding the CRC with the bit HDLC_END_OF_FRAME set.
-  If the beginning of a valid frame is found, the function returns
-  the length.
-  If a framing error is found (too many 1s and not a flag) the function
-  returns the length with the bit HDLC_FRAMING_ERROR set.
-  If a CRC error is found the function returns the length with the
-  bit HDLC_CRC_ERROR set.
-  If the frame length exceeds the destination buffer size, the function
-  returns the length with the bit HDLC_LENGTH_ERROR set.
-
-  src - source buffer
-  slen - source buffer length
-  count - number of bytes removed (decoded) from the source buffer
-  dst _ destination buffer
-  dsize - destination buffer size
-  returns - number of decoded bytes in the destination buffer and status
-  flag.
-*/
-int isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src, int slen,
-		    int *count, u8 *dst, int dsize)
-{
-	int status = 0;
-
-	static const unsigned char fast_flag[] = {
-		0x00, 0x00, 0x00, 0x20, 0x30, 0x38, 0x3c, 0x3e, 0x3f
-	};
-
-	static const unsigned char fast_flag_value[] = {
-		0x00, 0x7e, 0xfc, 0xf9, 0xf3, 0xe7, 0xcf, 0x9f, 0x3f
-	};
-
-	static const unsigned char fast_abort[] = {
-		0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff
-	};
-
-#define handle_fast_flag(h)						\
-	do {								\
-		if (h->cbin == fast_flag[h->bit_shift]) {		\
-			h->ffvalue = fast_flag_value[h->bit_shift];	\
-			h->state = HDLC_FAST_FLAG;			\
-			h->ffbit_shift = h->bit_shift;			\
-			h->bit_shift = 1;				\
-		} else {						\
-			h->state = HDLC_GET_DATA;			\
-			h->data_received = 0;				\
-		}							\
-	} while (0)
-
-#define handle_abort(h)						\
-	do {							\
-		h->shift_reg = fast_abort[h->ffbit_shift - 1];	\
-		h->hdlc_bits1 = h->ffbit_shift - 2;		\
-		if (h->hdlc_bits1 < 0)				\
-			h->hdlc_bits1 = 0;			\
-		h->data_bits = h->ffbit_shift - 1;		\
-		h->state = HDLC_GET_DATA;			\
-		h->data_received = 0;				\
-	} while (0)
-
-	*count = slen;
-
-	while (slen > 0) {
-		if (hdlc->bit_shift == 0) {
-			/* the code is for bitreverse streams */
-			if (hdlc->do_bitreverse == 0)
-				hdlc->cbin = bitrev8(*src++);
-			else
-				hdlc->cbin = *src++;
-			slen--;
-			hdlc->bit_shift = 8;
-			if (hdlc->do_adapt56)
-				hdlc->bit_shift--;
-		}
-
-		switch (hdlc->state) {
-		case STOPPED:
-			return 0;
-		case HDLC_FAST_IDLE:
-			if (hdlc->cbin == 0xff) {
-				hdlc->bit_shift = 0;
-				break;
-			}
-			hdlc->state = HDLC_GET_FLAG_B0;
-			hdlc->hdlc_bits1 = 0;
-			hdlc->bit_shift = 8;
-			break;
-		case HDLC_GET_FLAG_B0:
-			if (!(hdlc->cbin & 0x80)) {
-				hdlc->state = HDLC_GETFLAG_B1A6;
-				hdlc->hdlc_bits1 = 0;
-			} else {
-				if ((!hdlc->do_adapt56) &&
-				    (++hdlc->hdlc_bits1 >= 8) &&
-				    (hdlc->bit_shift == 1))
-					hdlc->state = HDLC_FAST_IDLE;
-			}
-			hdlc->cbin <<= 1;
-			hdlc->bit_shift--;
-			break;
-		case HDLC_GETFLAG_B1A6:
-			if (hdlc->cbin & 0x80) {
-				hdlc->hdlc_bits1++;
-				if (hdlc->hdlc_bits1 == 6)
-					hdlc->state = HDLC_GETFLAG_B7;
-			} else
-				hdlc->hdlc_bits1 = 0;
-			hdlc->cbin <<= 1;
-			hdlc->bit_shift--;
-			break;
-		case HDLC_GETFLAG_B7:
-			if (hdlc->cbin & 0x80) {
-				hdlc->state = HDLC_GET_FLAG_B0;
-			} else {
-				hdlc->state = HDLC_GET_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->shift_reg = 0;
-				hdlc->hdlc_bits1 = 0;
-				hdlc->data_bits = 0;
-				hdlc->data_received = 0;
-			}
-			hdlc->cbin <<= 1;
-			hdlc->bit_shift--;
-			break;
-		case HDLC_GET_DATA:
-			if (hdlc->cbin & 0x80) {
-				hdlc->hdlc_bits1++;
-				switch (hdlc->hdlc_bits1) {
-				case 6:
-					break;
-				case 7:
-					if (hdlc->data_received)
-						/* bad frame */
-						status = -HDLC_FRAMING_ERROR;
-					if (!hdlc->do_adapt56) {
-						if (hdlc->cbin == fast_abort
-						    [hdlc->bit_shift + 1]) {
-							hdlc->state =
-								HDLC_FAST_IDLE;
-							hdlc->bit_shift = 1;
-							break;
-						}
-					} else
-						hdlc->state = HDLC_GET_FLAG_B0;
-					break;
-				default:
-					hdlc->shift_reg >>= 1;
-					hdlc->shift_reg |= 0x80;
-					hdlc->data_bits++;
-					break;
-				}
-			} else {
-				switch (hdlc->hdlc_bits1) {
-				case 5:
-					break;
-				case 6:
-					if (hdlc->data_received)
-						status = check_frame(hdlc);
-					hdlc->crc = 0xffff;
-					hdlc->shift_reg = 0;
-					hdlc->data_bits = 0;
-					if (!hdlc->do_adapt56)
-						handle_fast_flag(hdlc);
-					else {
-						hdlc->state = HDLC_GET_DATA;
-						hdlc->data_received = 0;
-					}
-					break;
-				default:
-					hdlc->shift_reg >>= 1;
-					hdlc->data_bits++;
-					break;
-				}
-				hdlc->hdlc_bits1 = 0;
-			}
-			if (status) {
-				hdlc->dstpos = 0;
-				*count -= slen;
-				hdlc->cbin <<= 1;
-				hdlc->bit_shift--;
-				return status;
-			}
-			if (hdlc->data_bits == 8) {
-				hdlc->data_bits = 0;
-				hdlc->data_received = 1;
-				hdlc->crc = crc_ccitt_byte(hdlc->crc,
-							   hdlc->shift_reg);
-
-				/* good byte received */
-				if (hdlc->dstpos < dsize)
-					dst[hdlc->dstpos++] = hdlc->shift_reg;
-				else {
-					/* frame too long */
-					status = -HDLC_LENGTH_ERROR;
-					hdlc->dstpos = 0;
-				}
-			}
-			hdlc->cbin <<= 1;
-			hdlc->bit_shift--;
-			break;
-		case HDLC_FAST_FLAG:
-			if (hdlc->cbin == hdlc->ffvalue) {
-				hdlc->bit_shift = 0;
-				break;
-			} else {
-				if (hdlc->cbin == 0xff) {
-					hdlc->state = HDLC_FAST_IDLE;
-					hdlc->bit_shift = 0;
-				} else if (hdlc->ffbit_shift == 8) {
-					hdlc->state = HDLC_GETFLAG_B7;
-					break;
-				} else
-					handle_abort(hdlc);
-			}
-			break;
-		default:
-			break;
-		}
-	}
-	*count -= slen;
-	return 0;
-}
-EXPORT_SYMBOL(isdnhdlc_decode);
-/*
-  isdnhdlc_encode - encodes HDLC frames to a transparent bit stream.
-
-  The bit stream starts with a beginning flag (01111110). After
-  that each byte is added to the bit stream with bit stuffing added
-  (0 after 5 1's).
-  When the last byte has been removed from the source buffer, the
-  CRC (2 bytes is added) and the frame terminates with the ending flag.
-  For the dchannel, the idle character (all 1's) is also added at the end.
-  If this function is called with empty source buffer (slen=0), flags or
-  idle character will be generated.
-
-  src - source buffer
-  slen - source buffer length
-  count - number of bytes removed (encoded) from source buffer
-  dst _ destination buffer
-  dsize - destination buffer size
-  returns - number of encoded bytes in the destination buffer
-*/
-int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, u16 slen,
-		    int *count, u8 *dst, int dsize)
-{
-	static const unsigned char xfast_flag_value[] = {
-		0x7e, 0x3f, 0x9f, 0xcf, 0xe7, 0xf3, 0xf9, 0xfc, 0x7e
-	};
-
-	int len = 0;
-
-	*count = slen;
-
-	/* special handling for one byte frames */
-	if ((slen == 1) && (hdlc->state == HDLC_SEND_FAST_FLAG))
-		hdlc->state = HDLC_SENDFLAG_ONE;
-	while (dsize > 0) {
-		if (hdlc->bit_shift == 0) {
-			if (slen && !hdlc->do_closing) {
-				hdlc->shift_reg = *src++;
-				slen--;
-				if (slen == 0)
-					/* closing sequence, CRC + flag(s) */
-					hdlc->do_closing = 1;
-				hdlc->bit_shift = 8;
-			} else {
-				if (hdlc->state == HDLC_SEND_DATA) {
-					if (hdlc->data_received) {
-						hdlc->state = HDLC_SEND_CRC1;
-						hdlc->crc ^= 0xffff;
-						hdlc->bit_shift = 8;
-						hdlc->shift_reg =
-							hdlc->crc & 0xff;
-					} else if (!hdlc->do_adapt56)
-						hdlc->state =
-							HDLC_SEND_FAST_FLAG;
-					else
-						hdlc->state =
-							HDLC_SENDFLAG_B0;
-				}
-
-			}
-		}
-
-		switch (hdlc->state) {
-		case STOPPED:
-			while (dsize--)
-				*dst++ = 0xff;
-			return dsize;
-		case HDLC_SEND_FAST_FLAG:
-			hdlc->do_closing = 0;
-			if (slen == 0) {
-				/* the code is for bitreverse streams */
-				if (hdlc->do_bitreverse == 0)
-					*dst++ = bitrev8(hdlc->ffvalue);
-				else
-					*dst++ = hdlc->ffvalue;
-				len++;
-				dsize--;
-				break;
-			}
-			fallthrough;
-		case HDLC_SENDFLAG_ONE:
-			if (hdlc->bit_shift == 8) {
-				hdlc->cbin = hdlc->ffvalue >>
-					(8 - hdlc->data_bits);
-				hdlc->state = HDLC_SEND_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->hdlc_bits1 = 0;
-				hdlc->data_received = 1;
-			}
-			break;
-		case HDLC_SENDFLAG_B0:
-			hdlc->do_closing = 0;
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			hdlc->hdlc_bits1 = 0;
-			hdlc->state = HDLC_SENDFLAG_B1A6;
-			break;
-		case HDLC_SENDFLAG_B1A6:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			hdlc->cbin++;
-			if (++hdlc->hdlc_bits1 == 6)
-				hdlc->state = HDLC_SENDFLAG_B7;
-			break;
-		case HDLC_SENDFLAG_B7:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if (slen == 0) {
-				hdlc->state = HDLC_SENDFLAG_B0;
-				break;
-			}
-			if (hdlc->bit_shift == 8) {
-				hdlc->state = HDLC_SEND_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->hdlc_bits1 = 0;
-				hdlc->data_received = 1;
-			}
-			break;
-		case HDLC_SEND_FIRST_FLAG:
-			hdlc->data_received = 1;
-			if (hdlc->data_bits == 8) {
-				hdlc->state = HDLC_SEND_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if (hdlc->shift_reg & 0x01)
-				hdlc->cbin++;
-			hdlc->shift_reg >>= 1;
-			hdlc->bit_shift--;
-			if (hdlc->bit_shift == 0) {
-				hdlc->state = HDLC_SEND_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->hdlc_bits1 = 0;
-			}
-			break;
-		case HDLC_SEND_DATA:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if (hdlc->hdlc_bits1 == 5) {
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			if (hdlc->bit_shift == 8)
-				hdlc->crc = crc_ccitt_byte(hdlc->crc,
-							   hdlc->shift_reg);
-			if (hdlc->shift_reg & 0x01) {
-				hdlc->hdlc_bits1++;
-				hdlc->cbin++;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			} else {
-				hdlc->hdlc_bits1 = 0;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			}
-			break;
-		case HDLC_SEND_CRC1:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if (hdlc->hdlc_bits1 == 5) {
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			if (hdlc->shift_reg & 0x01) {
-				hdlc->hdlc_bits1++;
-				hdlc->cbin++;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			} else {
-				hdlc->hdlc_bits1 = 0;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			}
-			if (hdlc->bit_shift == 0) {
-				hdlc->shift_reg = (hdlc->crc >> 8);
-				hdlc->state = HDLC_SEND_CRC2;
-				hdlc->bit_shift = 8;
-			}
-			break;
-		case HDLC_SEND_CRC2:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if (hdlc->hdlc_bits1 == 5) {
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			if (hdlc->shift_reg & 0x01) {
-				hdlc->hdlc_bits1++;
-				hdlc->cbin++;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			} else {
-				hdlc->hdlc_bits1 = 0;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			}
-			if (hdlc->bit_shift == 0) {
-				hdlc->shift_reg = 0x7e;
-				hdlc->state = HDLC_SEND_CLOSING_FLAG;
-				hdlc->bit_shift = 8;
-			}
-			break;
-		case HDLC_SEND_CLOSING_FLAG:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if (hdlc->hdlc_bits1 == 5) {
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			if (hdlc->shift_reg & 0x01)
-				hdlc->cbin++;
-			hdlc->shift_reg >>= 1;
-			hdlc->bit_shift--;
-			if (hdlc->bit_shift == 0) {
-				hdlc->ffvalue =
-					xfast_flag_value[hdlc->data_bits];
-				if (hdlc->dchannel) {
-					hdlc->ffvalue = 0x7e;
-					hdlc->state = HDLC_SEND_IDLE1;
-					hdlc->bit_shift = 8-hdlc->data_bits;
-					if (hdlc->bit_shift == 0)
-						hdlc->state =
-							HDLC_SEND_FAST_IDLE;
-				} else {
-					if (!hdlc->do_adapt56) {
-						hdlc->state =
-							HDLC_SEND_FAST_FLAG;
-						hdlc->data_received = 0;
-					} else {
-						hdlc->state = HDLC_SENDFLAG_B0;
-						hdlc->data_received = 0;
-					}
-					/* Finished this frame, send flags */
-					if (dsize > 1)
-						dsize = 1;
-				}
-			}
-			break;
-		case HDLC_SEND_IDLE1:
-			hdlc->do_closing = 0;
-			hdlc->cbin <<= 1;
-			hdlc->cbin++;
-			hdlc->data_bits++;
-			hdlc->bit_shift--;
-			if (hdlc->bit_shift == 0) {
-				hdlc->state = HDLC_SEND_FAST_IDLE;
-				hdlc->bit_shift = 0;
-			}
-			break;
-		case HDLC_SEND_FAST_IDLE:
-			hdlc->do_closing = 0;
-			hdlc->cbin = 0xff;
-			hdlc->data_bits = 8;
-			if (hdlc->bit_shift == 8) {
-				hdlc->cbin = 0x7e;
-				hdlc->state = HDLC_SEND_FIRST_FLAG;
-			} else {
-				/* the code is for bitreverse streams */
-				if (hdlc->do_bitreverse == 0)
-					*dst++ = bitrev8(hdlc->cbin);
-				else
-					*dst++ = hdlc->cbin;
-				hdlc->bit_shift = 0;
-				hdlc->data_bits = 0;
-				len++;
-				dsize = 0;
-			}
-			break;
-		default:
-			break;
-		}
-		if (hdlc->do_adapt56) {
-			if (hdlc->data_bits == 7) {
-				hdlc->cbin <<= 1;
-				hdlc->cbin++;
-				hdlc->data_bits++;
-			}
-		}
-		if (hdlc->data_bits == 8) {
-			/* the code is for bitreverse streams */
-			if (hdlc->do_bitreverse == 0)
-				*dst++ = bitrev8(hdlc->cbin);
-			else
-				*dst++ = hdlc->cbin;
-			hdlc->data_bits = 0;
-			len++;
-			dsize--;
-		}
-	}
-	*count -= slen;
-
-	return len;
-}
-EXPORT_SYMBOL(isdnhdlc_encode);
diff --git a/drivers/isdn/hardware/mISDN/isdnhdlc.h b/drivers/isdn/hardware/mISDN/isdnhdlc.h
deleted file mode 100644
index fe2c1279c139..000000000000
--- a/drivers/isdn/hardware/mISDN/isdnhdlc.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * hdlc.h  --  General purpose ISDN HDLC decoder.
- *
- * Implementation of a HDLC decoder/encoder in software.
- * Necessary because some ISDN devices don't have HDLC
- * controllers.
- *
- * Copyright (C)
- *	2009	Karsten Keil		<keil@b1-systems.de>
- *	2002	Wolfgang Mües		<wolfgang@iksw-muees.de>
- *	2001	Frode Isaksen		<fisaksen@bewan.com>
- *	2001	Kai Germaschewski	<kai.germaschewski@gmx.de>
- */
-
-#ifndef __ISDNHDLC_H__
-#define __ISDNHDLC_H__
-
-struct isdnhdlc_vars {
-	int bit_shift;
-	int hdlc_bits1;
-	int data_bits;
-	int ffbit_shift;	/* encoding only */
-	int state;
-	int dstpos;
-
-	u16 crc;
-
-	u8 cbin;
-	u8 shift_reg;
-	u8 ffvalue;
-
-	/* set if transferring data */
-	u32 data_received:1;
-	/* set if D channel (send idle instead of flags) */
-	u32 dchannel:1;
-	/* set if 56K adaptation */
-	u32 do_adapt56:1;
-	/* set if in closing phase (need to send CRC + flag) */
-	u32 do_closing:1;
-	/* set if data is bitreverse */
-	u32 do_bitreverse:1;
-};
-
-/* Feature Flags */
-#define HDLC_56KBIT	0x01
-#define HDLC_DCHANNEL	0x02
-#define HDLC_BITREVERSE	0x04
-
-/*
-  The return value from isdnhdlc_decode is
-  the frame length, 0 if no complete frame was decoded,
-  or a negative error number
-*/
-#define HDLC_FRAMING_ERROR     1
-#define HDLC_CRC_ERROR         2
-#define HDLC_LENGTH_ERROR      3
-
-extern void	isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, u32 features);
-
-extern int	isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src,
-			int slen, int *count, u8 *dst, int dsize);
-
-extern void	isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, u32 features);
-
-extern int	isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src,
-			u16 slen, int *count, u8 *dst, int dsize);
-
-#endif /* __ISDNHDLC_H__ */
diff --git a/drivers/isdn/hardware/mISDN/mISDNinfineon.c b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
deleted file mode 100644
index aaa639ad5526..000000000000
--- a/drivers/isdn/hardware/mISDN/mISDNinfineon.c
+++ /dev/null
@@ -1,1168 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * mISDNinfineon.c
- *		Support for cards based on following Infineon ISDN chipsets
- *		- ISAC + HSCX
- *		- IPAC and IPAC-X
- *		- ISAC-SX + HSCX
- *
- * Supported cards:
- *		- Dialogic Diva 2.0
- *		- Dialogic Diva 2.0U
- *		- Dialogic Diva 2.01
- *		- Dialogic Diva 2.02
- *		- Sedlbauer Speedwin
- *		- HST Saphir3
- *		- Develo (former ELSA) Microlink PCI (Quickstep 1000)
- *		- Develo (former ELSA) Quickstep 3000
- *		- Berkom Scitel BRIX Quadro
- *		- Dr.Neuhaus (Sagem) Niccy
- *
- * Author       Karsten Keil <keil@isdn4linux.de>
- *
- * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
- */
-
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/mISDNhw.h>
-#include <linux/slab.h>
-#include "ipac.h"
-
-#define INFINEON_REV	"1.0"
-
-static int inf_cnt;
-static u32 debug;
-static u32 irqloops = 4;
-
-enum inf_types {
-	INF_NONE,
-	INF_DIVA20,
-	INF_DIVA20U,
-	INF_DIVA201,
-	INF_DIVA202,
-	INF_SPEEDWIN,
-	INF_SAPHIR3,
-	INF_QS1000,
-	INF_QS3000,
-	INF_NICCY,
-	INF_SCT_1,
-	INF_SCT_2,
-	INF_SCT_3,
-	INF_SCT_4,
-	INF_GAZEL_R685,
-	INF_GAZEL_R753
-};
-
-enum addr_mode {
-	AM_NONE = 0,
-	AM_IO,
-	AM_MEMIO,
-	AM_IND_IO,
-};
-
-struct inf_cinfo {
-	enum inf_types	typ;
-	const char	*full;
-	const char	*name;
-	enum addr_mode	cfg_mode;
-	enum addr_mode	addr_mode;
-	u8		cfg_bar;
-	u8		addr_bar;
-	void		*irqfunc;
-};
-
-struct _ioaddr {
-	enum addr_mode	mode;
-	union {
-		void __iomem	*p;
-		struct _ioport	io;
-	} a;
-};
-
-struct _iohandle {
-	enum addr_mode	mode;
-	resource_size_t	size;
-	resource_size_t	start;
-	void __iomem	*p;
-};
-
-struct inf_hw {
-	struct list_head	list;
-	struct pci_dev		*pdev;
-	const struct inf_cinfo	*ci;
-	char			name[MISDN_MAX_IDLEN];
-	u32			irq;
-	u32			irqcnt;
-	struct _iohandle	cfg;
-	struct _iohandle	addr;
-	struct _ioaddr		isac;
-	struct _ioaddr		hscx;
-	spinlock_t		lock;	/* HW access lock */
-	struct ipac_hw		ipac;
-	struct inf_hw		*sc[3];	/* slave cards */
-};
-
-
-#define PCI_SUBVENDOR_HST_SAPHIR3       0x52
-#define PCI_SUBVENDOR_SEDLBAUER_PCI     0x53
-#define PCI_SUB_ID_SEDLBAUER            0x01
-
-static struct pci_device_id infineon_ids[] = {
-	{ PCI_VDEVICE(EICON, PCI_DEVICE_ID_EICON_DIVA20), INF_DIVA20 },
-	{ PCI_VDEVICE(EICON, PCI_DEVICE_ID_EICON_DIVA20_U), INF_DIVA20U },
-	{ PCI_VDEVICE(EICON, PCI_DEVICE_ID_EICON_DIVA201), INF_DIVA201 },
-	{ PCI_VDEVICE(EICON, PCI_DEVICE_ID_EICON_DIVA202), INF_DIVA202 },
-	{ PCI_VENDOR_ID_TIGERJET, PCI_DEVICE_ID_TIGERJET_100,
-	  PCI_SUBVENDOR_SEDLBAUER_PCI, PCI_SUB_ID_SEDLBAUER, 0, 0,
-	  INF_SPEEDWIN },
-	{ PCI_VENDOR_ID_TIGERJET, PCI_DEVICE_ID_TIGERJET_100,
-	  PCI_SUBVENDOR_HST_SAPHIR3, PCI_SUB_ID_SEDLBAUER, 0, 0, INF_SAPHIR3 },
-	{ PCI_VDEVICE(ELSA, PCI_DEVICE_ID_ELSA_MICROLINK), INF_QS1000 },
-	{ PCI_VDEVICE(ELSA, PCI_DEVICE_ID_ELSA_QS3000), INF_QS3000 },
-	{ PCI_VDEVICE(SATSAGEM, PCI_DEVICE_ID_SATSAGEM_NICCY), INF_NICCY },
-	{ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050,
-	  PCI_VENDOR_ID_BERKOM, PCI_DEVICE_ID_BERKOM_SCITEL_QUADRO, 0, 0,
-	  INF_SCT_1 },
-	{ PCI_VDEVICE(PLX, PCI_DEVICE_ID_PLX_R685), INF_GAZEL_R685 },
-	{ PCI_VDEVICE(PLX, PCI_DEVICE_ID_PLX_R753), INF_GAZEL_R753 },
-	{ PCI_VDEVICE(PLX, PCI_DEVICE_ID_PLX_DJINN_ITOO), INF_GAZEL_R753 },
-	{ PCI_VDEVICE(PLX, PCI_DEVICE_ID_PLX_OLITEC), INF_GAZEL_R753 },
-	{ }
-};
-MODULE_DEVICE_TABLE(pci, infineon_ids);
-
-/* PCI interface specific defines */
-/* Diva 2.0/2.0U */
-#define DIVA_HSCX_PORT		0x00
-#define DIVA_HSCX_ALE		0x04
-#define DIVA_ISAC_PORT		0x08
-#define DIVA_ISAC_ALE		0x0C
-#define DIVA_PCI_CTRL           0x10
-
-/* DIVA_PCI_CTRL bits */
-#define DIVA_IRQ_BIT		0x01
-#define DIVA_RESET_BIT		0x08
-#define DIVA_EEPROM_CLK		0x40
-#define DIVA_LED_A		0x10
-#define DIVA_LED_B		0x20
-#define DIVA_IRQ_CLR		0x80
-
-/* Diva 2.01/2.02 */
-/* Siemens PITA */
-#define PITA_ICR_REG		0x00
-#define PITA_INT0_STATUS	0x02
-
-#define PITA_MISC_REG		0x1c
-#define PITA_PARA_SOFTRESET	0x01000000
-#define PITA_SER_SOFTRESET	0x02000000
-#define PITA_PARA_MPX_MODE	0x04000000
-#define PITA_INT0_ENABLE	0x00020000
-
-/* TIGER 100 Registers */
-#define TIGER_RESET_ADDR	0x00
-#define TIGER_EXTERN_RESET	0x01
-#define TIGER_AUX_CTRL		0x02
-#define TIGER_AUX_DATA		0x03
-#define TIGER_AUX_IRQMASK	0x05
-#define TIGER_AUX_STATUS	0x07
-
-/* Tiger AUX BITs */
-#define TIGER_IOMASK		0xdd	/* 1 and 5 are inputs */
-#define TIGER_IRQ_BIT		0x02
-
-#define TIGER_IPAC_ALE		0xC0
-#define TIGER_IPAC_PORT		0xC8
-
-/* ELSA (now Develo) PCI cards */
-#define ELSA_IRQ_ADDR		0x4c
-#define ELSA_IRQ_MASK		0x04
-#define QS1000_IRQ_OFF		0x01
-#define QS3000_IRQ_OFF		0x03
-#define QS1000_IRQ_ON		0x41
-#define QS3000_IRQ_ON		0x43
-
-/* Dr Neuhaus/Sagem Niccy */
-#define NICCY_ISAC_PORT		0x00
-#define NICCY_HSCX_PORT		0x01
-#define NICCY_ISAC_ALE		0x02
-#define NICCY_HSCX_ALE		0x03
-
-#define NICCY_IRQ_CTRL_REG	0x38
-#define NICCY_IRQ_ENABLE	0x001f00
-#define NICCY_IRQ_DISABLE	0xff0000
-#define NICCY_IRQ_BIT		0x800000
-
-
-/* Scitel PLX */
-#define SCT_PLX_IRQ_ADDR	0x4c
-#define SCT_PLX_RESET_ADDR	0x50
-#define SCT_PLX_IRQ_ENABLE	0x41
-#define SCT_PLX_RESET_BIT	0x04
-
-/* Gazel */
-#define	GAZEL_IPAC_DATA_PORT	0x04
-/* Gazel PLX */
-#define GAZEL_CNTRL		0x50
-#define GAZEL_RESET		0x04
-#define GAZEL_RESET_9050	0x40000000
-#define GAZEL_INCSR		0x4C
-#define GAZEL_ISAC_EN		0x08
-#define GAZEL_INT_ISAC		0x20
-#define GAZEL_HSCX_EN		0x01
-#define GAZEL_INT_HSCX		0x04
-#define GAZEL_PCI_EN		0x40
-#define GAZEL_IPAC_EN		0x03
-
-
-static LIST_HEAD(Cards);
-static DEFINE_RWLOCK(card_lock); /* protect Cards */
-
-static void
-_set_debug(struct inf_hw *card)
-{
-	card->ipac.isac.dch.debug = debug;
-	card->ipac.hscx[0].bch.debug = debug;
-	card->ipac.hscx[1].bch.debug = debug;
-}
-
-static int
-set_debug(const char *val, const struct kernel_param *kp)
-{
-	int ret;
-	struct inf_hw *card;
-
-	ret = param_set_uint(val, kp);
-	if (!ret) {
-		read_lock(&card_lock);
-		list_for_each_entry(card, &Cards, list)
-			_set_debug(card);
-		read_unlock(&card_lock);
-	}
-	return ret;
-}
-
-MODULE_AUTHOR("Karsten Keil");
-MODULE_DESCRIPTION("mISDN driver for cards based on Infineon ISDN chipsets");
-MODULE_LICENSE("GPL v2");
-MODULE_VERSION(INFINEON_REV);
-module_param_call(debug, set_debug, param_get_uint, &debug, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(debug, "infineon debug mask");
-module_param(irqloops, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(irqloops, "infineon maximal irqloops (default 4)");
-
-/* Interface functions */
-
-IOFUNC_IO(ISAC, inf_hw, isac.a.io)
-IOFUNC_IO(IPAC, inf_hw, hscx.a.io)
-IOFUNC_IND(ISAC, inf_hw, isac.a.io)
-IOFUNC_IND(IPAC, inf_hw, hscx.a.io)
-IOFUNC_MEMIO(ISAC, inf_hw, u32, isac.a.p)
-IOFUNC_MEMIO(IPAC, inf_hw, u32, hscx.a.p)
-
-static irqreturn_t
-diva_irq(int intno, void *dev_id)
-{
-	struct inf_hw *hw = dev_id;
-	u8 val;
-
-	spin_lock(&hw->lock);
-	val = inb((u32)hw->cfg.start + DIVA_PCI_CTRL);
-	if (!(val & DIVA_IRQ_BIT)) { /* for us or shared ? */
-		spin_unlock(&hw->lock);
-		return IRQ_NONE; /* shared */
-	}
-	hw->irqcnt++;
-	mISDNipac_irq(&hw->ipac, irqloops);
-	spin_unlock(&hw->lock);
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t
-diva20x_irq(int intno, void *dev_id)
-{
-	struct inf_hw *hw = dev_id;
-	u8 val;
-
-	spin_lock(&hw->lock);
-	val = readb(hw->cfg.p);
-	if (!(val & PITA_INT0_STATUS)) { /* for us or shared ? */
-		spin_unlock(&hw->lock);
-		return IRQ_NONE; /* shared */
-	}
-	hw->irqcnt++;
-	mISDNipac_irq(&hw->ipac, irqloops);
-	writeb(PITA_INT0_STATUS, hw->cfg.p); /* ACK PITA INT0 */
-	spin_unlock(&hw->lock);
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t
-tiger_irq(int intno, void *dev_id)
-{
-	struct inf_hw *hw = dev_id;
-	u8 val;
-
-	spin_lock(&hw->lock);
-	val = inb((u32)hw->cfg.start + TIGER_AUX_STATUS);
-	if (val & TIGER_IRQ_BIT) { /* for us or shared ? */
-		spin_unlock(&hw->lock);
-		return IRQ_NONE; /* shared */
-	}
-	hw->irqcnt++;
-	mISDNipac_irq(&hw->ipac, irqloops);
-	spin_unlock(&hw->lock);
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t
-elsa_irq(int intno, void *dev_id)
-{
-	struct inf_hw *hw = dev_id;
-	u8 val;
-
-	spin_lock(&hw->lock);
-	val = inb((u32)hw->cfg.start + ELSA_IRQ_ADDR);
-	if (!(val & ELSA_IRQ_MASK)) {
-		spin_unlock(&hw->lock);
-		return IRQ_NONE; /* shared */
-	}
-	hw->irqcnt++;
-	mISDNipac_irq(&hw->ipac, irqloops);
-	spin_unlock(&hw->lock);
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t
-niccy_irq(int intno, void *dev_id)
-{
-	struct inf_hw *hw = dev_id;
-	u32 val;
-
-	spin_lock(&hw->lock);
-	val = inl((u32)hw->cfg.start + NICCY_IRQ_CTRL_REG);
-	if (!(val & NICCY_IRQ_BIT)) { /* for us or shared ? */
-		spin_unlock(&hw->lock);
-		return IRQ_NONE; /* shared */
-	}
-	outl(val, (u32)hw->cfg.start + NICCY_IRQ_CTRL_REG);
-	hw->irqcnt++;
-	mISDNipac_irq(&hw->ipac, irqloops);
-	spin_unlock(&hw->lock);
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t
-gazel_irq(int intno, void *dev_id)
-{
-	struct inf_hw *hw = dev_id;
-	irqreturn_t ret;
-
-	spin_lock(&hw->lock);
-	ret = mISDNipac_irq(&hw->ipac, irqloops);
-	spin_unlock(&hw->lock);
-	return ret;
-}
-
-static irqreturn_t
-ipac_irq(int intno, void *dev_id)
-{
-	struct inf_hw *hw = dev_id;
-	u8 val;
-
-	spin_lock(&hw->lock);
-	val = hw->ipac.read_reg(hw, IPAC_ISTA);
-	if (!(val & 0x3f)) {
-		spin_unlock(&hw->lock);
-		return IRQ_NONE; /* shared */
-	}
-	hw->irqcnt++;
-	mISDNipac_irq(&hw->ipac, irqloops);
-	spin_unlock(&hw->lock);
-	return IRQ_HANDLED;
-}
-
-static void
-enable_hwirq(struct inf_hw *hw)
-{
-	u16 w;
-	u32 val;
-
-	switch (hw->ci->typ) {
-	case INF_DIVA201:
-	case INF_DIVA202:
-		writel(PITA_INT0_ENABLE, hw->cfg.p);
-		break;
-	case INF_SPEEDWIN:
-	case INF_SAPHIR3:
-		outb(TIGER_IRQ_BIT, (u32)hw->cfg.start + TIGER_AUX_IRQMASK);
-		break;
-	case INF_QS1000:
-		outb(QS1000_IRQ_ON, (u32)hw->cfg.start + ELSA_IRQ_ADDR);
-		break;
-	case INF_QS3000:
-		outb(QS3000_IRQ_ON, (u32)hw->cfg.start + ELSA_IRQ_ADDR);
-		break;
-	case INF_NICCY:
-		val = inl((u32)hw->cfg.start + NICCY_IRQ_CTRL_REG);
-		val |= NICCY_IRQ_ENABLE;
-		outl(val, (u32)hw->cfg.start + NICCY_IRQ_CTRL_REG);
-		break;
-	case INF_SCT_1:
-		w = inw((u32)hw->cfg.start + SCT_PLX_IRQ_ADDR);
-		w |= SCT_PLX_IRQ_ENABLE;
-		outw(w, (u32)hw->cfg.start + SCT_PLX_IRQ_ADDR);
-		break;
-	case INF_GAZEL_R685:
-		outb(GAZEL_ISAC_EN + GAZEL_HSCX_EN + GAZEL_PCI_EN,
-		     (u32)hw->cfg.start + GAZEL_INCSR);
-		break;
-	case INF_GAZEL_R753:
-		outb(GAZEL_IPAC_EN + GAZEL_PCI_EN,
-		     (u32)hw->cfg.start + GAZEL_INCSR);
-		break;
-	default:
-		break;
-	}
-}
-
-static void
-disable_hwirq(struct inf_hw *hw)
-{
-	u16 w;
-	u32 val;
-
-	switch (hw->ci->typ) {
-	case INF_DIVA201:
-	case INF_DIVA202:
-		writel(0, hw->cfg.p);
-		break;
-	case INF_SPEEDWIN:
-	case INF_SAPHIR3:
-		outb(0, (u32)hw->cfg.start + TIGER_AUX_IRQMASK);
-		break;
-	case INF_QS1000:
-		outb(QS1000_IRQ_OFF, (u32)hw->cfg.start + ELSA_IRQ_ADDR);
-		break;
-	case INF_QS3000:
-		outb(QS3000_IRQ_OFF, (u32)hw->cfg.start + ELSA_IRQ_ADDR);
-		break;
-	case INF_NICCY:
-		val = inl((u32)hw->cfg.start + NICCY_IRQ_CTRL_REG);
-		val &= NICCY_IRQ_DISABLE;
-		outl(val, (u32)hw->cfg.start + NICCY_IRQ_CTRL_REG);
-		break;
-	case INF_SCT_1:
-		w = inw((u32)hw->cfg.start + SCT_PLX_IRQ_ADDR);
-		w &= (~SCT_PLX_IRQ_ENABLE);
-		outw(w, (u32)hw->cfg.start + SCT_PLX_IRQ_ADDR);
-		break;
-	case INF_GAZEL_R685:
-	case INF_GAZEL_R753:
-		outb(0, (u32)hw->cfg.start + GAZEL_INCSR);
-		break;
-	default:
-		break;
-	}
-}
-
-static void
-ipac_chip_reset(struct inf_hw *hw)
-{
-	hw->ipac.write_reg(hw, IPAC_POTA2, 0x20);
-	mdelay(5);
-	hw->ipac.write_reg(hw, IPAC_POTA2, 0x00);
-	mdelay(5);
-	hw->ipac.write_reg(hw, IPAC_CONF, hw->ipac.conf);
-	hw->ipac.write_reg(hw, IPAC_MASK, 0xc0);
-}
-
-static void
-reset_inf(struct inf_hw *hw)
-{
-	u16 w;
-	u32 val;
-
-	if (debug & DEBUG_HW)
-		pr_notice("%s: resetting card\n", hw->name);
-	switch (hw->ci->typ) {
-	case INF_DIVA20:
-	case INF_DIVA20U:
-		outb(0, (u32)hw->cfg.start + DIVA_PCI_CTRL);
-		mdelay(10);
-		outb(DIVA_RESET_BIT, (u32)hw->cfg.start + DIVA_PCI_CTRL);
-		mdelay(10);
-		/* Workaround PCI9060 */
-		outb(9, (u32)hw->cfg.start + 0x69);
-		outb(DIVA_RESET_BIT | DIVA_LED_A,
-		     (u32)hw->cfg.start + DIVA_PCI_CTRL);
-		break;
-	case INF_DIVA201:
-		writel(PITA_PARA_SOFTRESET | PITA_PARA_MPX_MODE,
-		       hw->cfg.p + PITA_MISC_REG);
-		mdelay(1);
-		writel(PITA_PARA_MPX_MODE, hw->cfg.p + PITA_MISC_REG);
-		mdelay(10);
-		break;
-	case INF_DIVA202:
-		writel(PITA_PARA_SOFTRESET | PITA_PARA_MPX_MODE,
-		       hw->cfg.p + PITA_MISC_REG);
-		mdelay(1);
-		writel(PITA_PARA_MPX_MODE | PITA_SER_SOFTRESET,
-		       hw->cfg.p + PITA_MISC_REG);
-		mdelay(10);
-		break;
-	case INF_SPEEDWIN:
-	case INF_SAPHIR3:
-		ipac_chip_reset(hw);
-		hw->ipac.write_reg(hw, IPAC_ACFG, 0xff);
-		hw->ipac.write_reg(hw, IPAC_AOE, 0x00);
-		hw->ipac.write_reg(hw, IPAC_PCFG, 0x12);
-		break;
-	case INF_QS1000:
-	case INF_QS3000:
-		ipac_chip_reset(hw);
-		hw->ipac.write_reg(hw, IPAC_ACFG, 0x00);
-		hw->ipac.write_reg(hw, IPAC_AOE, 0x3c);
-		hw->ipac.write_reg(hw, IPAC_ATX, 0xff);
-		break;
-	case INF_NICCY:
-		break;
-	case INF_SCT_1:
-		w = inw((u32)hw->cfg.start + SCT_PLX_RESET_ADDR);
-		w &= (~SCT_PLX_RESET_BIT);
-		outw(w, (u32)hw->cfg.start + SCT_PLX_RESET_ADDR);
-		mdelay(10);
-		w = inw((u32)hw->cfg.start + SCT_PLX_RESET_ADDR);
-		w |= SCT_PLX_RESET_BIT;
-		outw(w, (u32)hw->cfg.start + SCT_PLX_RESET_ADDR);
-		mdelay(10);
-		break;
-	case INF_GAZEL_R685:
-		val = inl((u32)hw->cfg.start + GAZEL_CNTRL);
-		val |= (GAZEL_RESET_9050 + GAZEL_RESET);
-		outl(val, (u32)hw->cfg.start + GAZEL_CNTRL);
-		val &= ~(GAZEL_RESET_9050 + GAZEL_RESET);
-		mdelay(4);
-		outl(val, (u32)hw->cfg.start + GAZEL_CNTRL);
-		mdelay(10);
-		hw->ipac.isac.adf2 = 0x87;
-		hw->ipac.hscx[0].slot = 0x1f;
-		hw->ipac.hscx[1].slot = 0x23;
-		break;
-	case INF_GAZEL_R753:
-		val = inl((u32)hw->cfg.start + GAZEL_CNTRL);
-		val |= (GAZEL_RESET_9050 + GAZEL_RESET);
-		outl(val, (u32)hw->cfg.start + GAZEL_CNTRL);
-		val &= ~(GAZEL_RESET_9050 + GAZEL_RESET);
-		mdelay(4);
-		outl(val, (u32)hw->cfg.start + GAZEL_CNTRL);
-		mdelay(10);
-		ipac_chip_reset(hw);
-		hw->ipac.write_reg(hw, IPAC_ACFG, 0xff);
-		hw->ipac.write_reg(hw, IPAC_AOE, 0x00);
-		hw->ipac.conf = 0x01; /* IOM off */
-		break;
-	default:
-		return;
-	}
-	enable_hwirq(hw);
-}
-
-static int
-inf_ctrl(struct inf_hw *hw, u32 cmd, u_long arg)
-{
-	int ret = 0;
-
-	switch (cmd) {
-	case HW_RESET_REQ:
-		reset_inf(hw);
-		break;
-	default:
-		pr_info("%s: %s unknown command %x %lx\n",
-			hw->name, __func__, cmd, arg);
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static int
-init_irq(struct inf_hw *hw)
-{
-	int	ret, cnt = 3;
-	u_long	flags;
-
-	if (!hw->ci->irqfunc)
-		return -EINVAL;
-	ret = request_irq(hw->irq, hw->ci->irqfunc, IRQF_SHARED, hw->name, hw);
-	if (ret) {
-		pr_info("%s: couldn't get interrupt %d\n", hw->name, hw->irq);
-		return ret;
-	}
-	while (cnt--) {
-		spin_lock_irqsave(&hw->lock, flags);
-		reset_inf(hw);
-		ret = hw->ipac.init(&hw->ipac);
-		if (ret) {
-			spin_unlock_irqrestore(&hw->lock, flags);
-			pr_info("%s: ISAC init failed with %d\n",
-				hw->name, ret);
-			break;
-		}
-		spin_unlock_irqrestore(&hw->lock, flags);
-		msleep_interruptible(10);
-		if (debug & DEBUG_HW)
-			pr_notice("%s: IRQ %d count %d\n", hw->name,
-				  hw->irq, hw->irqcnt);
-		if (!hw->irqcnt) {
-			pr_info("%s: IRQ(%d) got no requests during init %d\n",
-				hw->name, hw->irq, 3 - cnt);
-		} else
-			return 0;
-	}
-	free_irq(hw->irq, hw);
-	return -EIO;
-}
-
-static void
-release_io(struct inf_hw *hw)
-{
-	if (hw->cfg.mode) {
-		if (hw->cfg.mode == AM_MEMIO) {
-			release_mem_region(hw->cfg.start, hw->cfg.size);
-			if (hw->cfg.p)
-				iounmap(hw->cfg.p);
-		} else
-			release_region(hw->cfg.start, hw->cfg.size);
-		hw->cfg.mode = AM_NONE;
-	}
-	if (hw->addr.mode) {
-		if (hw->addr.mode == AM_MEMIO) {
-			release_mem_region(hw->addr.start, hw->addr.size);
-			if (hw->addr.p)
-				iounmap(hw->addr.p);
-		} else
-			release_region(hw->addr.start, hw->addr.size);
-		hw->addr.mode = AM_NONE;
-	}
-}
-
-static int
-setup_io(struct inf_hw *hw)
-{
-	int err = 0;
-
-	if (hw->ci->cfg_mode) {
-		hw->cfg.start = pci_resource_start(hw->pdev, hw->ci->cfg_bar);
-		hw->cfg.size = pci_resource_len(hw->pdev, hw->ci->cfg_bar);
-		if (hw->ci->cfg_mode == AM_MEMIO) {
-			if (!request_mem_region(hw->cfg.start, hw->cfg.size,
-						hw->name))
-				err = -EBUSY;
-		} else {
-			if (!request_region(hw->cfg.start, hw->cfg.size,
-					    hw->name))
-				err = -EBUSY;
-		}
-		if (err) {
-			pr_info("mISDN: %s config port %lx (%lu bytes)"
-				"already in use\n", hw->name,
-				(ulong)hw->cfg.start, (ulong)hw->cfg.size);
-			return err;
-		}
-		hw->cfg.mode = hw->ci->cfg_mode;
-		if (hw->ci->cfg_mode == AM_MEMIO) {
-			hw->cfg.p = ioremap(hw->cfg.start, hw->cfg.size);
-			if (!hw->cfg.p)
-				return -ENOMEM;
-		}
-		if (debug & DEBUG_HW)
-			pr_notice("%s: IO cfg %lx (%lu bytes) mode%d\n",
-				  hw->name, (ulong)hw->cfg.start,
-				  (ulong)hw->cfg.size, hw->ci->cfg_mode);
-
-	}
-	if (hw->ci->addr_mode) {
-		hw->addr.start = pci_resource_start(hw->pdev, hw->ci->addr_bar);
-		hw->addr.size = pci_resource_len(hw->pdev, hw->ci->addr_bar);
-		if (hw->ci->addr_mode == AM_MEMIO) {
-			if (!request_mem_region(hw->addr.start, hw->addr.size,
-						hw->name))
-				err = -EBUSY;
-		} else {
-			if (!request_region(hw->addr.start, hw->addr.size,
-					    hw->name))
-				err = -EBUSY;
-		}
-		if (err) {
-			pr_info("mISDN: %s address port %lx (%lu bytes)"
-				"already in use\n", hw->name,
-				(ulong)hw->addr.start, (ulong)hw->addr.size);
-			return err;
-		}
-		hw->addr.mode = hw->ci->addr_mode;
-		if (hw->ci->addr_mode == AM_MEMIO) {
-			hw->addr.p = ioremap(hw->addr.start, hw->addr.size);
-			if (!hw->addr.p)
-				return -ENOMEM;
-		}
-		if (debug & DEBUG_HW)
-			pr_notice("%s: IO addr %lx (%lu bytes) mode%d\n",
-				  hw->name, (ulong)hw->addr.start,
-				  (ulong)hw->addr.size, hw->ci->addr_mode);
-
-	}
-
-	switch (hw->ci->typ) {
-	case INF_DIVA20:
-	case INF_DIVA20U:
-		hw->ipac.type = IPAC_TYPE_ISAC | IPAC_TYPE_HSCX;
-		hw->isac.mode = hw->cfg.mode;
-		hw->isac.a.io.ale = (u32)hw->cfg.start + DIVA_ISAC_ALE;
-		hw->isac.a.io.port = (u32)hw->cfg.start + DIVA_ISAC_PORT;
-		hw->hscx.mode = hw->cfg.mode;
-		hw->hscx.a.io.ale = (u32)hw->cfg.start + DIVA_HSCX_ALE;
-		hw->hscx.a.io.port = (u32)hw->cfg.start + DIVA_HSCX_PORT;
-		break;
-	case INF_DIVA201:
-		hw->ipac.type = IPAC_TYPE_IPAC;
-		hw->ipac.isac.off = 0x80;
-		hw->isac.mode = hw->addr.mode;
-		hw->isac.a.p = hw->addr.p;
-		hw->hscx.mode = hw->addr.mode;
-		hw->hscx.a.p = hw->addr.p;
-		break;
-	case INF_DIVA202:
-		hw->ipac.type = IPAC_TYPE_IPACX;
-		hw->isac.mode = hw->addr.mode;
-		hw->isac.a.p = hw->addr.p;
-		hw->hscx.mode = hw->addr.mode;
-		hw->hscx.a.p = hw->addr.p;
-		break;
-	case INF_SPEEDWIN:
-	case INF_SAPHIR3:
-		hw->ipac.type = IPAC_TYPE_IPAC;
-		hw->ipac.isac.off = 0x80;
-		hw->isac.mode = hw->cfg.mode;
-		hw->isac.a.io.ale = (u32)hw->cfg.start + TIGER_IPAC_ALE;
-		hw->isac.a.io.port = (u32)hw->cfg.start + TIGER_IPAC_PORT;
-		hw->hscx.mode = hw->cfg.mode;
-		hw->hscx.a.io.ale = (u32)hw->cfg.start + TIGER_IPAC_ALE;
-		hw->hscx.a.io.port = (u32)hw->cfg.start + TIGER_IPAC_PORT;
-		outb(0xff, (ulong)hw->cfg.start);
-		mdelay(1);
-		outb(0x00, (ulong)hw->cfg.start);
-		mdelay(1);
-		outb(TIGER_IOMASK, (ulong)hw->cfg.start + TIGER_AUX_CTRL);
-		break;
-	case INF_QS1000:
-	case INF_QS3000:
-		hw->ipac.type = IPAC_TYPE_IPAC;
-		hw->ipac.isac.off = 0x80;
-		hw->isac.a.io.ale = (u32)hw->addr.start;
-		hw->isac.a.io.port = (u32)hw->addr.start + 1;
-		hw->isac.mode = hw->addr.mode;
-		hw->hscx.a.io.ale = (u32)hw->addr.start;
-		hw->hscx.a.io.port = (u32)hw->addr.start + 1;
-		hw->hscx.mode = hw->addr.mode;
-		break;
-	case INF_NICCY:
-		hw->ipac.type = IPAC_TYPE_ISAC | IPAC_TYPE_HSCX;
-		hw->isac.mode = hw->addr.mode;
-		hw->isac.a.io.ale = (u32)hw->addr.start + NICCY_ISAC_ALE;
-		hw->isac.a.io.port = (u32)hw->addr.start + NICCY_ISAC_PORT;
-		hw->hscx.mode = hw->addr.mode;
-		hw->hscx.a.io.ale = (u32)hw->addr.start + NICCY_HSCX_ALE;
-		hw->hscx.a.io.port = (u32)hw->addr.start + NICCY_HSCX_PORT;
-		break;
-	case INF_SCT_1:
-		hw->ipac.type = IPAC_TYPE_IPAC;
-		hw->ipac.isac.off = 0x80;
-		hw->isac.a.io.ale = (u32)hw->addr.start;
-		hw->isac.a.io.port = hw->isac.a.io.ale + 4;
-		hw->isac.mode = hw->addr.mode;
-		hw->hscx.a.io.ale = hw->isac.a.io.ale;
-		hw->hscx.a.io.port = hw->isac.a.io.port;
-		hw->hscx.mode = hw->addr.mode;
-		break;
-	case INF_SCT_2:
-		hw->ipac.type = IPAC_TYPE_IPAC;
-		hw->ipac.isac.off = 0x80;
-		hw->isac.a.io.ale = (u32)hw->addr.start + 0x08;
-		hw->isac.a.io.port = hw->isac.a.io.ale + 4;
-		hw->isac.mode = hw->addr.mode;
-		hw->hscx.a.io.ale = hw->isac.a.io.ale;
-		hw->hscx.a.io.port = hw->isac.a.io.port;
-		hw->hscx.mode = hw->addr.mode;
-		break;
-	case INF_SCT_3:
-		hw->ipac.type = IPAC_TYPE_IPAC;
-		hw->ipac.isac.off = 0x80;
-		hw->isac.a.io.ale = (u32)hw->addr.start + 0x10;
-		hw->isac.a.io.port = hw->isac.a.io.ale + 4;
-		hw->isac.mode = hw->addr.mode;
-		hw->hscx.a.io.ale = hw->isac.a.io.ale;
-		hw->hscx.a.io.port = hw->isac.a.io.port;
-		hw->hscx.mode = hw->addr.mode;
-		break;
-	case INF_SCT_4:
-		hw->ipac.type = IPAC_TYPE_IPAC;
-		hw->ipac.isac.off = 0x80;
-		hw->isac.a.io.ale = (u32)hw->addr.start + 0x20;
-		hw->isac.a.io.port = hw->isac.a.io.ale + 4;
-		hw->isac.mode = hw->addr.mode;
-		hw->hscx.a.io.ale = hw->isac.a.io.ale;
-		hw->hscx.a.io.port = hw->isac.a.io.port;
-		hw->hscx.mode = hw->addr.mode;
-		break;
-	case INF_GAZEL_R685:
-		hw->ipac.type = IPAC_TYPE_ISAC | IPAC_TYPE_HSCX;
-		hw->ipac.isac.off = 0x80;
-		hw->isac.mode = hw->addr.mode;
-		hw->isac.a.io.port = (u32)hw->addr.start;
-		hw->hscx.mode = hw->addr.mode;
-		hw->hscx.a.io.port = hw->isac.a.io.port;
-		break;
-	case INF_GAZEL_R753:
-		hw->ipac.type = IPAC_TYPE_IPAC;
-		hw->ipac.isac.off = 0x80;
-		hw->isac.mode = hw->addr.mode;
-		hw->isac.a.io.ale = (u32)hw->addr.start;
-		hw->isac.a.io.port = (u32)hw->addr.start + GAZEL_IPAC_DATA_PORT;
-		hw->hscx.mode = hw->addr.mode;
-		hw->hscx.a.io.ale = hw->isac.a.io.ale;
-		hw->hscx.a.io.port = hw->isac.a.io.port;
-		break;
-	default:
-		return -EINVAL;
-	}
-	switch (hw->isac.mode) {
-	case AM_MEMIO:
-		ASSIGN_FUNC_IPAC(MIO, hw->ipac);
-		break;
-	case AM_IND_IO:
-		ASSIGN_FUNC_IPAC(IND, hw->ipac);
-		break;
-	case AM_IO:
-		ASSIGN_FUNC_IPAC(IO, hw->ipac);
-		break;
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void
-release_card(struct inf_hw *card) {
-	ulong	flags;
-	int	i;
-
-	spin_lock_irqsave(&card->lock, flags);
-	disable_hwirq(card);
-	spin_unlock_irqrestore(&card->lock, flags);
-	card->ipac.isac.release(&card->ipac.isac);
-	free_irq(card->irq, card);
-	mISDN_unregister_device(&card->ipac.isac.dch.dev);
-	release_io(card);
-	write_lock_irqsave(&card_lock, flags);
-	list_del(&card->list);
-	write_unlock_irqrestore(&card_lock, flags);
-	switch (card->ci->typ) {
-	case INF_SCT_2:
-	case INF_SCT_3:
-	case INF_SCT_4:
-		break;
-	case INF_SCT_1:
-		for (i = 0; i < 3; i++) {
-			if (card->sc[i])
-				release_card(card->sc[i]);
-			card->sc[i] = NULL;
-		}
-		fallthrough;
-	default:
-		pci_disable_device(card->pdev);
-		pci_set_drvdata(card->pdev, NULL);
-		break;
-	}
-	kfree(card);
-	inf_cnt--;
-}
-
-static int
-setup_instance(struct inf_hw *card)
-{
-	int err;
-	ulong flags;
-
-	snprintf(card->name, MISDN_MAX_IDLEN - 1, "%s.%d", card->ci->name,
-		 inf_cnt + 1);
-	write_lock_irqsave(&card_lock, flags);
-	list_add_tail(&card->list, &Cards);
-	write_unlock_irqrestore(&card_lock, flags);
-
-	_set_debug(card);
-	card->ipac.isac.name = card->name;
-	card->ipac.name = card->name;
-	card->ipac.owner = THIS_MODULE;
-	spin_lock_init(&card->lock);
-	card->ipac.isac.hwlock = &card->lock;
-	card->ipac.hwlock = &card->lock;
-	card->ipac.ctrl = (void *)&inf_ctrl;
-
-	err = setup_io(card);
-	if (err)
-		goto error_setup;
-
-	card->ipac.isac.dch.dev.Bprotocols =
-		mISDNipac_init(&card->ipac, card);
-
-	if (card->ipac.isac.dch.dev.Bprotocols == 0)
-		goto error_setup;
-
-	err = mISDN_register_device(&card->ipac.isac.dch.dev,
-				    &card->pdev->dev, card->name);
-	if (err)
-		goto error;
-
-	err = init_irq(card);
-	if (!err)  {
-		inf_cnt++;
-		pr_notice("Infineon %d cards installed\n", inf_cnt);
-		return 0;
-	}
-	mISDN_unregister_device(&card->ipac.isac.dch.dev);
-error:
-	card->ipac.release(&card->ipac);
-error_setup:
-	release_io(card);
-	write_lock_irqsave(&card_lock, flags);
-	list_del(&card->list);
-	write_unlock_irqrestore(&card_lock, flags);
-	return err;
-}
-
-static const struct inf_cinfo inf_card_info[] = {
-	{
-		INF_DIVA20,
-		"Dialogic Diva 2.0",
-		"diva20",
-		AM_IND_IO, AM_NONE, 2, 0,
-		&diva_irq
-	},
-	{
-		INF_DIVA20U,
-		"Dialogic Diva 2.0U",
-		"diva20U",
-		AM_IND_IO, AM_NONE, 2, 0,
-		&diva_irq
-	},
-	{
-		INF_DIVA201,
-		"Dialogic Diva 2.01",
-		"diva201",
-		AM_MEMIO, AM_MEMIO, 0, 1,
-		&diva20x_irq
-	},
-	{
-		INF_DIVA202,
-		"Dialogic Diva 2.02",
-		"diva202",
-		AM_MEMIO, AM_MEMIO, 0, 1,
-		&diva20x_irq
-	},
-	{
-		INF_SPEEDWIN,
-		"Sedlbauer SpeedWin PCI",
-		"speedwin",
-		AM_IND_IO, AM_NONE, 0, 0,
-		&tiger_irq
-	},
-	{
-		INF_SAPHIR3,
-		"HST Saphir 3",
-		"saphir",
-		AM_IND_IO, AM_NONE, 0, 0,
-		&tiger_irq
-	},
-	{
-		INF_QS1000,
-		"Develo Microlink PCI",
-		"qs1000",
-		AM_IO, AM_IND_IO, 1, 3,
-		&elsa_irq
-	},
-	{
-		INF_QS3000,
-		"Develo QuickStep 3000",
-		"qs3000",
-		AM_IO, AM_IND_IO, 1, 3,
-		&elsa_irq
-	},
-	{
-		INF_NICCY,
-		"Sagem NICCY",
-		"niccy",
-		AM_IO, AM_IND_IO, 0, 1,
-		&niccy_irq
-	},
-	{
-		INF_SCT_1,
-		"SciTel Quadro",
-		"p1_scitel",
-		AM_IO, AM_IND_IO, 1, 5,
-		&ipac_irq
-	},
-	{
-		INF_SCT_2,
-		"SciTel Quadro",
-		"p2_scitel",
-		AM_NONE, AM_IND_IO, 0, 4,
-		&ipac_irq
-	},
-	{
-		INF_SCT_3,
-		"SciTel Quadro",
-		"p3_scitel",
-		AM_NONE, AM_IND_IO, 0, 3,
-		&ipac_irq
-	},
-	{
-		INF_SCT_4,
-		"SciTel Quadro",
-		"p4_scitel",
-		AM_NONE, AM_IND_IO, 0, 2,
-		&ipac_irq
-	},
-	{
-		INF_GAZEL_R685,
-		"Gazel R685",
-		"gazel685",
-		AM_IO, AM_IO, 1, 2,
-		&gazel_irq
-	},
-	{
-		INF_GAZEL_R753,
-		"Gazel R753",
-		"gazel753",
-		AM_IO, AM_IND_IO, 1, 2,
-		&ipac_irq
-	},
-	{
-		INF_NONE,
-	}
-};
-
-static const struct inf_cinfo *
-get_card_info(enum inf_types typ)
-{
-	const struct inf_cinfo *ci = inf_card_info;
-
-	while (ci->typ != INF_NONE) {
-		if (ci->typ == typ)
-			return ci;
-		ci++;
-	}
-	return NULL;
-}
-
-static int
-inf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-	int err = -ENOMEM;
-	struct inf_hw *card;
-
-	card = kzalloc_obj(struct inf_hw);
-	if (!card) {
-		pr_info("No memory for Infineon ISDN card\n");
-		return err;
-	}
-	card->pdev = pdev;
-	err = pci_enable_device(pdev);
-	if (err) {
-		kfree(card);
-		return err;
-	}
-	card->ci = get_card_info(ent->driver_data);
-	if (!card->ci) {
-		pr_info("mISDN: do not have information about adapter at %s\n",
-			pci_name(pdev));
-		kfree(card);
-		pci_disable_device(pdev);
-		return -EINVAL;
-	} else
-		pr_notice("mISDN: found adapter %s at %s\n",
-			  card->ci->full, pci_name(pdev));
-
-	card->irq = pdev->irq;
-	pci_set_drvdata(pdev, card);
-	err = setup_instance(card);
-	if (err) {
-		pci_disable_device(pdev);
-		kfree(card);
-		pci_set_drvdata(pdev, NULL);
-	} else if (ent->driver_data == INF_SCT_1) {
-		int i;
-		struct inf_hw *sc;
-
-		for (i = 1; i < 4; i++) {
-			sc = kzalloc_obj(struct inf_hw);
-			if (!sc) {
-				release_card(card);
-				pci_disable_device(pdev);
-				return -ENOMEM;
-			}
-			sc->irq = card->irq;
-			sc->pdev = card->pdev;
-			sc->ci = card->ci + i;
-			err = setup_instance(sc);
-			if (err) {
-				pci_disable_device(pdev);
-				kfree(sc);
-				release_card(card);
-				break;
-			} else
-				card->sc[i - 1] = sc;
-		}
-	}
-	return err;
-}
-
-static void
-inf_remove(struct pci_dev *pdev)
-{
-	struct inf_hw	*card = pci_get_drvdata(pdev);
-
-	if (card)
-		release_card(card);
-	else
-		pr_debug("%s: drvdata already removed\n", __func__);
-}
-
-static struct pci_driver infineon_driver = {
-	.name = "ISDN Infineon pci",
-	.probe = inf_probe,
-	.remove = inf_remove,
-	.id_table = infineon_ids,
-};
-
-static int __init
-infineon_init(void)
-{
-	int err;
-
-	pr_notice("Infineon ISDN Driver Rev. %s\n", INFINEON_REV);
-	err = pci_register_driver(&infineon_driver);
-	return err;
-}
-
-static void __exit
-infineon_cleanup(void)
-{
-	pci_unregister_driver(&infineon_driver);
-}
-
-module_init(infineon_init);
-module_exit(infineon_cleanup);
diff --git a/drivers/isdn/hardware/mISDN/mISDNipac.c b/drivers/isdn/hardware/mISDN/mISDNipac.c
deleted file mode 100644
index a34ea6058960..000000000000
--- a/drivers/isdn/hardware/mISDN/mISDNipac.c
+++ /dev/null
@@ -1,1636 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * isac.c   ISAC specific routines
- *
- * Author       Karsten Keil <keil@isdn4linux.de>
- *
- * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
- */
-
-#include <linux/irqreturn.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/mISDNhw.h>
-#include "ipac.h"
-
-
-#define DBUSY_TIMER_VALUE	80
-#define ARCOFI_USE		1
-
-#define ISAC_REV		"2.0"
-
-MODULE_AUTHOR("Karsten Keil");
-MODULE_VERSION(ISAC_REV);
-MODULE_DESCRIPTION("mISDN driver for ISAC specific functions");
-MODULE_LICENSE("GPL v2");
-
-#define ReadISAC(is, o)		(is->read_reg(is->dch.hw, o + is->off))
-#define	WriteISAC(is, o, v)	(is->write_reg(is->dch.hw, o + is->off, v))
-#define ReadHSCX(h, o)		(h->ip->read_reg(h->ip->hw, h->off + o))
-#define WriteHSCX(h, o, v)	(h->ip->write_reg(h->ip->hw, h->off + o, v))
-#define ReadIPAC(ip, o)		(ip->read_reg(ip->hw, o))
-#define WriteIPAC(ip, o, v)	(ip->write_reg(ip->hw, o, v))
-
-static inline void
-ph_command(struct isac_hw *isac, u8 command)
-{
-	pr_debug("%s: ph_command %x\n", isac->name, command);
-	if (isac->type & IPAC_TYPE_ISACX)
-		WriteISAC(isac, ISACX_CIX0, (command << 4) | 0xE);
-	else
-		WriteISAC(isac, ISAC_CIX0, (command << 2) | 3);
-}
-
-static void
-isac_ph_state_change(struct isac_hw *isac)
-{
-	switch (isac->state) {
-	case (ISAC_IND_RS):
-	case (ISAC_IND_EI):
-		ph_command(isac, ISAC_CMD_DUI);
-	}
-	schedule_event(&isac->dch, FLG_PHCHANGE);
-}
-
-static void
-isac_ph_state_bh(struct dchannel *dch)
-{
-	struct isac_hw *isac = container_of(dch, struct isac_hw, dch);
-
-	switch (isac->state) {
-	case ISAC_IND_RS:
-	case ISAC_IND_EI:
-		dch->state = 0;
-		l1_event(dch->l1, HW_RESET_IND);
-		break;
-	case ISAC_IND_DID:
-		dch->state = 3;
-		l1_event(dch->l1, HW_DEACT_CNF);
-		break;
-	case ISAC_IND_DR:
-	case ISAC_IND_DR6:
-		dch->state = 3;
-		l1_event(dch->l1, HW_DEACT_IND);
-		break;
-	case ISAC_IND_PU:
-		dch->state = 4;
-		l1_event(dch->l1, HW_POWERUP_IND);
-		break;
-	case ISAC_IND_RSY:
-		if (dch->state <= 5) {
-			dch->state = 5;
-			l1_event(dch->l1, ANYSIGNAL);
-		} else {
-			dch->state = 8;
-			l1_event(dch->l1, LOSTFRAMING);
-		}
-		break;
-	case ISAC_IND_ARD:
-		dch->state = 6;
-		l1_event(dch->l1, INFO2);
-		break;
-	case ISAC_IND_AI8:
-		dch->state = 7;
-		l1_event(dch->l1, INFO4_P8);
-		break;
-	case ISAC_IND_AI10:
-		dch->state = 7;
-		l1_event(dch->l1, INFO4_P10);
-		break;
-	}
-	pr_debug("%s: TE newstate %x\n", isac->name, dch->state);
-}
-
-static void
-isac_empty_fifo(struct isac_hw *isac, int count)
-{
-	u8 *ptr;
-
-	pr_debug("%s: %s  %d\n", isac->name, __func__, count);
-
-	if (!isac->dch.rx_skb) {
-		isac->dch.rx_skb = mI_alloc_skb(isac->dch.maxlen, GFP_ATOMIC);
-		if (!isac->dch.rx_skb) {
-			pr_info("%s: D receive out of memory\n", isac->name);
-			WriteISAC(isac, ISAC_CMDR, 0x80);
-			return;
-		}
-	}
-	if ((isac->dch.rx_skb->len + count) >= isac->dch.maxlen) {
-		pr_debug("%s: %s overrun %d\n", isac->name, __func__,
-			 isac->dch.rx_skb->len + count);
-		WriteISAC(isac, ISAC_CMDR, 0x80);
-		return;
-	}
-	ptr = skb_put(isac->dch.rx_skb, count);
-	isac->read_fifo(isac->dch.hw, isac->off, ptr, count);
-	WriteISAC(isac, ISAC_CMDR, 0x80);
-	if (isac->dch.debug & DEBUG_HW_DFIFO) {
-		char	pfx[MISDN_MAX_IDLEN + 16];
-
-		snprintf(pfx, MISDN_MAX_IDLEN + 15, "D-recv %s %d ",
-			 isac->name, count);
-		print_hex_dump_bytes(pfx, DUMP_PREFIX_OFFSET, ptr, count);
-	}
-}
-
-static void
-isac_fill_fifo(struct isac_hw *isac)
-{
-	int count, more;
-	u8 *ptr;
-
-	if (!isac->dch.tx_skb)
-		return;
-	count = isac->dch.tx_skb->len - isac->dch.tx_idx;
-	if (count <= 0)
-		return;
-
-	more = 0;
-	if (count > 32) {
-		more = !0;
-		count = 32;
-	}
-	pr_debug("%s: %s  %d\n", isac->name, __func__, count);
-	ptr = isac->dch.tx_skb->data + isac->dch.tx_idx;
-	isac->dch.tx_idx += count;
-	isac->write_fifo(isac->dch.hw, isac->off, ptr, count);
-	WriteISAC(isac, ISAC_CMDR, more ? 0x8 : 0xa);
-	if (test_and_set_bit(FLG_BUSY_TIMER, &isac->dch.Flags)) {
-		pr_debug("%s: %s dbusytimer running\n", isac->name, __func__);
-		timer_delete(&isac->dch.timer);
-	}
-	isac->dch.timer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ)/1000);
-	add_timer(&isac->dch.timer);
-	if (isac->dch.debug & DEBUG_HW_DFIFO) {
-		char	pfx[MISDN_MAX_IDLEN + 16];
-
-		snprintf(pfx, MISDN_MAX_IDLEN + 15, "D-send %s %d ",
-			 isac->name, count);
-		print_hex_dump_bytes(pfx, DUMP_PREFIX_OFFSET, ptr, count);
-	}
-}
-
-static void
-isac_rme_irq(struct isac_hw *isac)
-{
-	u8 val, count;
-
-	val = ReadISAC(isac, ISAC_RSTA);
-	if ((val & 0x70) != 0x20) {
-		if (val & 0x40) {
-			pr_debug("%s: ISAC RDO\n", isac->name);
-#ifdef ERROR_STATISTIC
-			isac->dch.err_rx++;
-#endif
-		}
-		if (!(val & 0x20)) {
-			pr_debug("%s: ISAC CRC error\n", isac->name);
-#ifdef ERROR_STATISTIC
-			isac->dch.err_crc++;
-#endif
-		}
-		WriteISAC(isac, ISAC_CMDR, 0x80);
-		dev_kfree_skb(isac->dch.rx_skb);
-		isac->dch.rx_skb = NULL;
-	} else {
-		count = ReadISAC(isac, ISAC_RBCL) & 0x1f;
-		if (count == 0)
-			count = 32;
-		isac_empty_fifo(isac, count);
-		recv_Dchannel(&isac->dch);
-	}
-}
-
-static void
-isac_xpr_irq(struct isac_hw *isac)
-{
-	if (test_and_clear_bit(FLG_BUSY_TIMER, &isac->dch.Flags))
-		timer_delete(&isac->dch.timer);
-	if (isac->dch.tx_skb && isac->dch.tx_idx < isac->dch.tx_skb->len) {
-		isac_fill_fifo(isac);
-	} else {
-		dev_kfree_skb(isac->dch.tx_skb);
-		if (get_next_dframe(&isac->dch))
-			isac_fill_fifo(isac);
-	}
-}
-
-static void
-isac_retransmit(struct isac_hw *isac)
-{
-	if (test_and_clear_bit(FLG_BUSY_TIMER, &isac->dch.Flags))
-		timer_delete(&isac->dch.timer);
-	if (test_bit(FLG_TX_BUSY, &isac->dch.Flags)) {
-		/* Restart frame */
-		isac->dch.tx_idx = 0;
-		isac_fill_fifo(isac);
-	} else if (isac->dch.tx_skb) { /* should not happen */
-		pr_info("%s: tx_skb exist but not busy\n", isac->name);
-		test_and_set_bit(FLG_TX_BUSY, &isac->dch.Flags);
-		isac->dch.tx_idx = 0;
-		isac_fill_fifo(isac);
-	} else {
-		pr_info("%s: ISAC XDU no TX_BUSY\n", isac->name);
-		if (get_next_dframe(&isac->dch))
-			isac_fill_fifo(isac);
-	}
-}
-
-static void
-isac_mos_irq(struct isac_hw *isac)
-{
-	u8 val;
-	int ret;
-
-	val = ReadISAC(isac, ISAC_MOSR);
-	pr_debug("%s: ISAC MOSR %02x\n", isac->name, val);
-#if ARCOFI_USE
-	if (val & 0x08) {
-		if (!isac->mon_rx) {
-			isac->mon_rx = kmalloc(MAX_MON_FRAME, GFP_ATOMIC);
-			if (!isac->mon_rx) {
-				pr_info("%s: ISAC MON RX out of memory!\n",
-					isac->name);
-				isac->mocr &= 0xf0;
-				isac->mocr |= 0x0a;
-				WriteISAC(isac, ISAC_MOCR, isac->mocr);
-				goto afterMONR0;
-			} else
-				isac->mon_rxp = 0;
-		}
-		if (isac->mon_rxp >= MAX_MON_FRAME) {
-			isac->mocr &= 0xf0;
-			isac->mocr |= 0x0a;
-			WriteISAC(isac, ISAC_MOCR, isac->mocr);
-			isac->mon_rxp = 0;
-			pr_debug("%s: ISAC MON RX overflow!\n", isac->name);
-			goto afterMONR0;
-		}
-		isac->mon_rx[isac->mon_rxp++] = ReadISAC(isac, ISAC_MOR0);
-		pr_debug("%s: ISAC MOR0 %02x\n", isac->name,
-			 isac->mon_rx[isac->mon_rxp - 1]);
-		if (isac->mon_rxp == 1) {
-			isac->mocr |= 0x04;
-			WriteISAC(isac, ISAC_MOCR, isac->mocr);
-		}
-	}
-afterMONR0:
-	if (val & 0x80) {
-		if (!isac->mon_rx) {
-			isac->mon_rx = kmalloc(MAX_MON_FRAME, GFP_ATOMIC);
-			if (!isac->mon_rx) {
-				pr_info("%s: ISAC MON RX out of memory!\n",
-					isac->name);
-				isac->mocr &= 0x0f;
-				isac->mocr |= 0xa0;
-				WriteISAC(isac, ISAC_MOCR, isac->mocr);
-				goto afterMONR1;
-			} else
-				isac->mon_rxp = 0;
-		}
-		if (isac->mon_rxp >= MAX_MON_FRAME) {
-			isac->mocr &= 0x0f;
-			isac->mocr |= 0xa0;
-			WriteISAC(isac, ISAC_MOCR, isac->mocr);
-			isac->mon_rxp = 0;
-			pr_debug("%s: ISAC MON RX overflow!\n", isac->name);
-			goto afterMONR1;
-		}
-		isac->mon_rx[isac->mon_rxp++] = ReadISAC(isac, ISAC_MOR1);
-		pr_debug("%s: ISAC MOR1 %02x\n", isac->name,
-			 isac->mon_rx[isac->mon_rxp - 1]);
-		isac->mocr |= 0x40;
-		WriteISAC(isac, ISAC_MOCR, isac->mocr);
-	}
-afterMONR1:
-	if (val & 0x04) {
-		isac->mocr &= 0xf0;
-		WriteISAC(isac, ISAC_MOCR, isac->mocr);
-		isac->mocr |= 0x0a;
-		WriteISAC(isac, ISAC_MOCR, isac->mocr);
-		if (isac->monitor) {
-			ret = isac->monitor(isac->dch.hw, MONITOR_RX_0,
-					    isac->mon_rx, isac->mon_rxp);
-			if (ret)
-				kfree(isac->mon_rx);
-		} else {
-			pr_info("%s: MONITOR 0 received %d but no user\n",
-				isac->name, isac->mon_rxp);
-			kfree(isac->mon_rx);
-		}
-		isac->mon_rx = NULL;
-		isac->mon_rxp = 0;
-	}
-	if (val & 0x40) {
-		isac->mocr &= 0x0f;
-		WriteISAC(isac, ISAC_MOCR, isac->mocr);
-		isac->mocr |= 0xa0;
-		WriteISAC(isac, ISAC_MOCR, isac->mocr);
-		if (isac->monitor) {
-			ret = isac->monitor(isac->dch.hw, MONITOR_RX_1,
-					    isac->mon_rx, isac->mon_rxp);
-			if (ret)
-				kfree(isac->mon_rx);
-		} else {
-			pr_info("%s: MONITOR 1 received %d but no user\n",
-				isac->name, isac->mon_rxp);
-			kfree(isac->mon_rx);
-		}
-		isac->mon_rx = NULL;
-		isac->mon_rxp = 0;
-	}
-	if (val & 0x02) {
-		if ((!isac->mon_tx) || (isac->mon_txc &&
-					(isac->mon_txp >= isac->mon_txc) && !(val & 0x08))) {
-			isac->mocr &= 0xf0;
-			WriteISAC(isac, ISAC_MOCR, isac->mocr);
-			isac->mocr |= 0x0a;
-			WriteISAC(isac, ISAC_MOCR, isac->mocr);
-			if (isac->mon_txc && (isac->mon_txp >= isac->mon_txc)) {
-				if (isac->monitor)
-					isac->monitor(isac->dch.hw,
-						      MONITOR_TX_0, NULL, 0);
-			}
-			kfree(isac->mon_tx);
-			isac->mon_tx = NULL;
-			isac->mon_txc = 0;
-			isac->mon_txp = 0;
-			goto AfterMOX0;
-		}
-		if (isac->mon_txc && (isac->mon_txp >= isac->mon_txc)) {
-			if (isac->monitor)
-				isac->monitor(isac->dch.hw,
-					      MONITOR_TX_0, NULL, 0);
-			kfree(isac->mon_tx);
-			isac->mon_tx = NULL;
-			isac->mon_txc = 0;
-			isac->mon_txp = 0;
-			goto AfterMOX0;
-		}
-		WriteISAC(isac, ISAC_MOX0, isac->mon_tx[isac->mon_txp++]);
-		pr_debug("%s: ISAC %02x -> MOX0\n", isac->name,
-			 isac->mon_tx[isac->mon_txp - 1]);
-	}
-AfterMOX0:
-	if (val & 0x20) {
-		if ((!isac->mon_tx) || (isac->mon_txc &&
-					(isac->mon_txp >= isac->mon_txc) && !(val & 0x80))) {
-			isac->mocr &= 0x0f;
-			WriteISAC(isac, ISAC_MOCR, isac->mocr);
-			isac->mocr |= 0xa0;
-			WriteISAC(isac, ISAC_MOCR, isac->mocr);
-			if (isac->mon_txc && (isac->mon_txp >= isac->mon_txc)) {
-				if (isac->monitor)
-					isac->monitor(isac->dch.hw,
-						      MONITOR_TX_1, NULL, 0);
-			}
-			kfree(isac->mon_tx);
-			isac->mon_tx = NULL;
-			isac->mon_txc = 0;
-			isac->mon_txp = 0;
-			goto AfterMOX1;
-		}
-		if (isac->mon_txc && (isac->mon_txp >= isac->mon_txc)) {
-			if (isac->monitor)
-				isac->monitor(isac->dch.hw,
-					      MONITOR_TX_1, NULL, 0);
-			kfree(isac->mon_tx);
-			isac->mon_tx = NULL;
-			isac->mon_txc = 0;
-			isac->mon_txp = 0;
-			goto AfterMOX1;
-		}
-		WriteISAC(isac, ISAC_MOX1, isac->mon_tx[isac->mon_txp++]);
-		pr_debug("%s: ISAC %02x -> MOX1\n", isac->name,
-			 isac->mon_tx[isac->mon_txp - 1]);
-	}
-AfterMOX1:
-	val = 0; /* dummy to avoid warning */
-#endif
-}
-
-static void
-isac_cisq_irq(struct isac_hw *isac) {
-	u8 val;
-
-	val = ReadISAC(isac, ISAC_CIR0);
-	pr_debug("%s: ISAC CIR0 %02X\n", isac->name, val);
-	if (val & 2) {
-		pr_debug("%s: ph_state change %x->%x\n", isac->name,
-			 isac->state, (val >> 2) & 0xf);
-		isac->state = (val >> 2) & 0xf;
-		isac_ph_state_change(isac);
-	}
-	if (val & 1) {
-		val = ReadISAC(isac, ISAC_CIR1);
-		pr_debug("%s: ISAC CIR1 %02X\n", isac->name, val);
-	}
-}
-
-static void
-isacsx_cic_irq(struct isac_hw *isac)
-{
-	u8 val;
-
-	val = ReadISAC(isac, ISACX_CIR0);
-	pr_debug("%s: ISACX CIR0 %02X\n", isac->name, val);
-	if (val & ISACX_CIR0_CIC0) {
-		pr_debug("%s: ph_state change %x->%x\n", isac->name,
-			 isac->state, val >> 4);
-		isac->state = val >> 4;
-		isac_ph_state_change(isac);
-	}
-}
-
-static void
-isacsx_rme_irq(struct isac_hw *isac)
-{
-	int count;
-	u8 val;
-
-	val = ReadISAC(isac, ISACX_RSTAD);
-	if ((val & (ISACX_RSTAD_VFR |
-		    ISACX_RSTAD_RDO |
-		    ISACX_RSTAD_CRC |
-		    ISACX_RSTAD_RAB))
-	    != (ISACX_RSTAD_VFR | ISACX_RSTAD_CRC)) {
-		pr_debug("%s: RSTAD %#x, dropped\n", isac->name, val);
-#ifdef ERROR_STATISTIC
-		if (val & ISACX_RSTAD_CRC)
-			isac->dch.err_rx++;
-		else
-			isac->dch.err_crc++;
-#endif
-		WriteISAC(isac, ISACX_CMDRD, ISACX_CMDRD_RMC);
-		dev_kfree_skb(isac->dch.rx_skb);
-		isac->dch.rx_skb = NULL;
-	} else {
-		count = ReadISAC(isac, ISACX_RBCLD) & 0x1f;
-		if (count == 0)
-			count = 32;
-		isac_empty_fifo(isac, count);
-		if (isac->dch.rx_skb) {
-			skb_trim(isac->dch.rx_skb, isac->dch.rx_skb->len - 1);
-			pr_debug("%s: dchannel received %d\n", isac->name,
-				 isac->dch.rx_skb->len);
-			recv_Dchannel(&isac->dch);
-		}
-	}
-}
-
-irqreturn_t
-mISDNisac_irq(struct isac_hw *isac, u8 val)
-{
-	if (unlikely(!val))
-		return IRQ_NONE;
-	pr_debug("%s: ISAC interrupt %02x\n", isac->name, val);
-	if (isac->type & IPAC_TYPE_ISACX) {
-		if (val & ISACX__CIC)
-			isacsx_cic_irq(isac);
-		if (val & ISACX__ICD) {
-			val = ReadISAC(isac, ISACX_ISTAD);
-			pr_debug("%s: ISTAD %02x\n", isac->name, val);
-			if (val & ISACX_D_XDU) {
-				pr_debug("%s: ISAC XDU\n", isac->name);
-#ifdef ERROR_STATISTIC
-				isac->dch.err_tx++;
-#endif
-				isac_retransmit(isac);
-			}
-			if (val & ISACX_D_XMR) {
-				pr_debug("%s: ISAC XMR\n", isac->name);
-#ifdef ERROR_STATISTIC
-				isac->dch.err_tx++;
-#endif
-				isac_retransmit(isac);
-			}
-			if (val & ISACX_D_XPR)
-				isac_xpr_irq(isac);
-			if (val & ISACX_D_RFO) {
-				pr_debug("%s: ISAC RFO\n", isac->name);
-				WriteISAC(isac, ISACX_CMDRD, ISACX_CMDRD_RMC);
-			}
-			if (val & ISACX_D_RME)
-				isacsx_rme_irq(isac);
-			if (val & ISACX_D_RPF)
-				isac_empty_fifo(isac, 0x20);
-		}
-	} else {
-		if (val & 0x80)	/* RME */
-			isac_rme_irq(isac);
-		if (val & 0x40)	/* RPF */
-			isac_empty_fifo(isac, 32);
-		if (val & 0x10)	/* XPR */
-			isac_xpr_irq(isac);
-		if (val & 0x04)	/* CISQ */
-			isac_cisq_irq(isac);
-		if (val & 0x20)	/* RSC - never */
-			pr_debug("%s: ISAC RSC interrupt\n", isac->name);
-		if (val & 0x02)	/* SIN - never */
-			pr_debug("%s: ISAC SIN interrupt\n", isac->name);
-		if (val & 0x01) {	/* EXI */
-			val = ReadISAC(isac, ISAC_EXIR);
-			pr_debug("%s: ISAC EXIR %02x\n", isac->name, val);
-			if (val & 0x80)	/* XMR */
-				pr_debug("%s: ISAC XMR\n", isac->name);
-			if (val & 0x40) { /* XDU */
-				pr_debug("%s: ISAC XDU\n", isac->name);
-#ifdef ERROR_STATISTIC
-				isac->dch.err_tx++;
-#endif
-				isac_retransmit(isac);
-			}
-			if (val & 0x04)	/* MOS */
-				isac_mos_irq(isac);
-		}
-	}
-	return IRQ_HANDLED;
-}
-EXPORT_SYMBOL(mISDNisac_irq);
-
-static int
-isac_l1hw(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
-	struct isac_hw		*isac = container_of(dch, struct isac_hw, dch);
-	int			ret = -EINVAL;
-	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
-	u32			id;
-	u_long			flags;
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		spin_lock_irqsave(isac->hwlock, flags);
-		ret = dchannel_senddata(dch, skb);
-		if (ret > 0) { /* direct TX */
-			id = hh->id; /* skb can be freed */
-			isac_fill_fifo(isac);
-			ret = 0;
-			spin_unlock_irqrestore(isac->hwlock, flags);
-			queue_ch_frame(ch, PH_DATA_CNF, id, NULL);
-		} else
-			spin_unlock_irqrestore(isac->hwlock, flags);
-		return ret;
-	case PH_ACTIVATE_REQ:
-		ret = l1_event(dch->l1, hh->prim);
-		break;
-	case PH_DEACTIVATE_REQ:
-		test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags);
-		ret = l1_event(dch->l1, hh->prim);
-		break;
-	}
-
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-static int
-isac_ctrl(struct isac_hw *isac, u32 cmd, unsigned long para)
-{
-	u8 tl = 0;
-	unsigned long flags;
-	int ret = 0;
-
-	switch (cmd) {
-	case HW_TESTLOOP:
-		spin_lock_irqsave(isac->hwlock, flags);
-		if (!(isac->type & IPAC_TYPE_ISACX)) {
-			/* TODO: implement for IPAC_TYPE_ISACX */
-			if (para & 1) /* B1 */
-				tl |= 0x0c;
-			else if (para & 2) /* B2 */
-				tl |= 0x3;
-			/* we only support IOM2 mode */
-			WriteISAC(isac, ISAC_SPCR, tl);
-			if (tl)
-				WriteISAC(isac, ISAC_ADF1, 0x8);
-			else
-				WriteISAC(isac, ISAC_ADF1, 0x0);
-		}
-		spin_unlock_irqrestore(isac->hwlock, flags);
-		break;
-	case HW_TIMER3_VALUE:
-		ret = l1_event(isac->dch.l1, HW_TIMER3_VALUE | (para & 0xff));
-		break;
-	default:
-		pr_debug("%s: %s unknown command %x %lx\n", isac->name,
-			 __func__, cmd, para);
-		ret = -1;
-	}
-	return ret;
-}
-
-static int
-isac_l1cmd(struct dchannel *dch, u32 cmd)
-{
-	struct isac_hw *isac = container_of(dch, struct isac_hw, dch);
-	u_long flags;
-
-	pr_debug("%s: cmd(%x) state(%02x)\n", isac->name, cmd, isac->state);
-	switch (cmd) {
-	case INFO3_P8:
-		spin_lock_irqsave(isac->hwlock, flags);
-		ph_command(isac, ISAC_CMD_AR8);
-		spin_unlock_irqrestore(isac->hwlock, flags);
-		break;
-	case INFO3_P10:
-		spin_lock_irqsave(isac->hwlock, flags);
-		ph_command(isac, ISAC_CMD_AR10);
-		spin_unlock_irqrestore(isac->hwlock, flags);
-		break;
-	case HW_RESET_REQ:
-		spin_lock_irqsave(isac->hwlock, flags);
-		if ((isac->state == ISAC_IND_EI) ||
-		    (isac->state == ISAC_IND_DR) ||
-		    (isac->state == ISAC_IND_DR6) ||
-		    (isac->state == ISAC_IND_RS))
-			ph_command(isac, ISAC_CMD_TIM);
-		else
-			ph_command(isac, ISAC_CMD_RS);
-		spin_unlock_irqrestore(isac->hwlock, flags);
-		break;
-	case HW_DEACT_REQ:
-		skb_queue_purge(&dch->squeue);
-		if (dch->tx_skb) {
-			dev_kfree_skb(dch->tx_skb);
-			dch->tx_skb = NULL;
-		}
-		dch->tx_idx = 0;
-		if (dch->rx_skb) {
-			dev_kfree_skb(dch->rx_skb);
-			dch->rx_skb = NULL;
-		}
-		test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
-		if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags))
-			timer_delete(&dch->timer);
-		break;
-	case HW_POWERUP_REQ:
-		spin_lock_irqsave(isac->hwlock, flags);
-		ph_command(isac, ISAC_CMD_TIM);
-		spin_unlock_irqrestore(isac->hwlock, flags);
-		break;
-	case PH_ACTIVATE_IND:
-		test_and_set_bit(FLG_ACTIVE, &dch->Flags);
-		_queue_data(&dch->dev.D, cmd, MISDN_ID_ANY, 0, NULL,
-			    GFP_ATOMIC);
-		break;
-	case PH_DEACTIVATE_IND:
-		test_and_clear_bit(FLG_ACTIVE, &dch->Flags);
-		_queue_data(&dch->dev.D, cmd, MISDN_ID_ANY, 0, NULL,
-			    GFP_ATOMIC);
-		break;
-	default:
-		pr_debug("%s: %s unknown command %x\n", isac->name,
-			 __func__, cmd);
-		return -1;
-	}
-	return 0;
-}
-
-static void
-isac_release(struct isac_hw *isac)
-{
-	if (isac->type & IPAC_TYPE_ISACX)
-		WriteISAC(isac, ISACX_MASK, 0xff);
-	else if (isac->type != 0)
-		WriteISAC(isac, ISAC_MASK, 0xff);
-	if (isac->dch.timer.function != NULL) {
-		timer_delete(&isac->dch.timer);
-		isac->dch.timer.function = NULL;
-	}
-	kfree(isac->mon_rx);
-	isac->mon_rx = NULL;
-	kfree(isac->mon_tx);
-	isac->mon_tx = NULL;
-	if (isac->dch.l1)
-		l1_event(isac->dch.l1, CLOSE_CHANNEL);
-	mISDN_freedchannel(&isac->dch);
-}
-
-static void
-dbusy_timer_handler(struct timer_list *t)
-{
-	struct isac_hw *isac = timer_container_of(isac, t, dch.timer);
-	int rbch, star;
-	u_long flags;
-
-	if (test_bit(FLG_BUSY_TIMER, &isac->dch.Flags)) {
-		spin_lock_irqsave(isac->hwlock, flags);
-		rbch = ReadISAC(isac, ISAC_RBCH);
-		star = ReadISAC(isac, ISAC_STAR);
-		pr_debug("%s: D-Channel Busy RBCH %02x STAR %02x\n",
-			 isac->name, rbch, star);
-		if (rbch & ISAC_RBCH_XAC) /* D-Channel Busy */
-			test_and_set_bit(FLG_L1_BUSY, &isac->dch.Flags);
-		else {
-			/* discard frame; reset transceiver */
-			test_and_clear_bit(FLG_BUSY_TIMER, &isac->dch.Flags);
-			if (isac->dch.tx_idx)
-				isac->dch.tx_idx = 0;
-			else
-				pr_info("%s: ISAC D-Channel Busy no tx_idx\n",
-					isac->name);
-			/* Transmitter reset */
-			WriteISAC(isac, ISAC_CMDR, 0x01);
-		}
-		spin_unlock_irqrestore(isac->hwlock, flags);
-	}
-}
-
-static int
-open_dchannel_caller(struct isac_hw *isac, struct channel_req *rq, void *caller)
-{
-	pr_debug("%s: %s dev(%d) open from %p\n", isac->name, __func__,
-		 isac->dch.dev.id, caller);
-	if (rq->protocol != ISDN_P_TE_S0)
-		return -EINVAL;
-	if (rq->adr.channel == 1)
-		/* E-Channel not supported */
-		return -EINVAL;
-	rq->ch = &isac->dch.dev.D;
-	rq->ch->protocol = rq->protocol;
-	if (isac->dch.state == 7)
-		_queue_data(rq->ch, PH_ACTIVATE_IND, MISDN_ID_ANY,
-			    0, NULL, GFP_KERNEL);
-	return 0;
-}
-
-static int
-open_dchannel(struct isac_hw *isac, struct channel_req *rq)
-{
-	return open_dchannel_caller(isac, rq, __builtin_return_address(0));
-}
-
-static const char *ISACVer[] =
-{"2086/2186 V1.1", "2085 B1", "2085 B2",
- "2085 V2.3"};
-
-static int
-isac_init(struct isac_hw *isac)
-{
-	u8 val;
-	int err = 0;
-
-	if (!isac->dch.l1) {
-		err = create_l1(&isac->dch, isac_l1cmd);
-		if (err)
-			return err;
-	}
-	isac->mon_tx = NULL;
-	isac->mon_rx = NULL;
-	timer_setup(&isac->dch.timer, dbusy_timer_handler, 0);
-	isac->mocr = 0xaa;
-	if (isac->type & IPAC_TYPE_ISACX) {
-		/* Disable all IRQ */
-		WriteISAC(isac, ISACX_MASK, 0xff);
-		val = ReadISAC(isac, ISACX_STARD);
-		pr_debug("%s: ISACX STARD %x\n", isac->name, val);
-		val = ReadISAC(isac, ISACX_ISTAD);
-		pr_debug("%s: ISACX ISTAD %x\n", isac->name, val);
-		val = ReadISAC(isac, ISACX_ISTA);
-		pr_debug("%s: ISACX ISTA %x\n", isac->name, val);
-		/* clear LDD */
-		WriteISAC(isac, ISACX_TR_CONF0, 0x00);
-		/* enable transmitter */
-		WriteISAC(isac, ISACX_TR_CONF2, 0x00);
-		/* transparent mode 0, RAC, stop/go */
-		WriteISAC(isac, ISACX_MODED, 0xc9);
-		/* all HDLC IRQ unmasked */
-		val = ReadISAC(isac, ISACX_ID);
-		if (isac->dch.debug & DEBUG_HW)
-			pr_notice("%s: ISACX Design ID %x\n",
-				  isac->name, val & 0x3f);
-		val = ReadISAC(isac, ISACX_CIR0);
-		pr_debug("%s: ISACX CIR0 %02X\n", isac->name, val);
-		isac->state = val >> 4;
-		isac_ph_state_change(isac);
-		ph_command(isac, ISAC_CMD_RS);
-		WriteISAC(isac, ISACX_MASK, IPACX__ON);
-		WriteISAC(isac, ISACX_MASKD, 0x00);
-	} else { /* old isac */
-		WriteISAC(isac, ISAC_MASK, 0xff);
-		val = ReadISAC(isac, ISAC_STAR);
-		pr_debug("%s: ISAC STAR %x\n", isac->name, val);
-		val = ReadISAC(isac, ISAC_MODE);
-		pr_debug("%s: ISAC MODE %x\n", isac->name, val);
-		val = ReadISAC(isac, ISAC_ADF2);
-		pr_debug("%s: ISAC ADF2 %x\n", isac->name, val);
-		val = ReadISAC(isac, ISAC_ISTA);
-		pr_debug("%s: ISAC ISTA %x\n", isac->name, val);
-		if (val & 0x01) {
-			val = ReadISAC(isac, ISAC_EXIR);
-			pr_debug("%s: ISAC EXIR %x\n", isac->name, val);
-		}
-		val = ReadISAC(isac, ISAC_RBCH);
-		if (isac->dch.debug & DEBUG_HW)
-			pr_notice("%s: ISAC version (%x): %s\n", isac->name,
-				  val, ISACVer[(val >> 5) & 3]);
-		isac->type |= ((val >> 5) & 3);
-		if (!isac->adf2)
-			isac->adf2 = 0x80;
-		if (!(isac->adf2 & 0x80)) { /* only IOM 2 Mode */
-			pr_info("%s: only support IOM2 mode but adf2=%02x\n",
-				isac->name, isac->adf2);
-			isac_release(isac);
-			return -EINVAL;
-		}
-		WriteISAC(isac, ISAC_ADF2, isac->adf2);
-		WriteISAC(isac, ISAC_SQXR, 0x2f);
-		WriteISAC(isac, ISAC_SPCR, 0x00);
-		WriteISAC(isac, ISAC_STCR, 0x70);
-		WriteISAC(isac, ISAC_MODE, 0xc9);
-		WriteISAC(isac, ISAC_TIMR, 0x00);
-		WriteISAC(isac, ISAC_ADF1, 0x00);
-		val = ReadISAC(isac, ISAC_CIR0);
-		pr_debug("%s: ISAC CIR0 %x\n", isac->name, val);
-		isac->state = (val >> 2) & 0xf;
-		isac_ph_state_change(isac);
-		ph_command(isac, ISAC_CMD_RS);
-		WriteISAC(isac, ISAC_MASK, 0);
-	}
-	return err;
-}
-
-int
-mISDNisac_init(struct isac_hw *isac, void *hw)
-{
-	mISDN_initdchannel(&isac->dch, MAX_DFRAME_LEN_L1, isac_ph_state_bh);
-	isac->dch.hw = hw;
-	isac->dch.dev.D.send = isac_l1hw;
-	isac->init = isac_init;
-	isac->release = isac_release;
-	isac->ctrl = isac_ctrl;
-	isac->open = open_dchannel;
-	isac->dch.dev.Dprotocols = (1 << ISDN_P_TE_S0);
-	isac->dch.dev.nrbchan = 2;
-	return 0;
-}
-EXPORT_SYMBOL(mISDNisac_init);
-
-static void
-waitforCEC(struct hscx_hw *hx)
-{
-	u8 starb, to = 50;
-
-	while (to) {
-		starb = ReadHSCX(hx, IPAC_STARB);
-		if (!(starb & 0x04))
-			break;
-		udelay(1);
-		to--;
-	}
-	if (to < 50)
-		pr_debug("%s: B%1d CEC %d us\n", hx->ip->name, hx->bch.nr,
-			 50 - to);
-	if (!to)
-		pr_info("%s: B%1d CEC timeout\n", hx->ip->name, hx->bch.nr);
-}
-
-
-static void
-waitforXFW(struct hscx_hw *hx)
-{
-	u8 starb, to = 50;
-
-	while (to) {
-		starb = ReadHSCX(hx, IPAC_STARB);
-		if ((starb & 0x44) == 0x40)
-			break;
-		udelay(1);
-		to--;
-	}
-	if (to < 50)
-		pr_debug("%s: B%1d XFW %d us\n", hx->ip->name, hx->bch.nr,
-			 50 - to);
-	if (!to)
-		pr_info("%s: B%1d XFW timeout\n", hx->ip->name, hx->bch.nr);
-}
-
-static void
-hscx_cmdr(struct hscx_hw *hx, u8 cmd)
-{
-	if (hx->ip->type & IPAC_TYPE_IPACX)
-		WriteHSCX(hx, IPACX_CMDRB, cmd);
-	else {
-		waitforCEC(hx);
-		WriteHSCX(hx, IPAC_CMDRB, cmd);
-	}
-}
-
-static void
-hscx_empty_fifo(struct hscx_hw *hscx, u8 count)
-{
-	u8 *p;
-	int maxlen;
-
-	pr_debug("%s: B%1d %d\n", hscx->ip->name, hscx->bch.nr, count);
-	if (test_bit(FLG_RX_OFF, &hscx->bch.Flags)) {
-		hscx->bch.dropcnt += count;
-		hscx_cmdr(hscx, 0x80); /* RMC */
-		return;
-	}
-	maxlen = bchannel_get_rxbuf(&hscx->bch, count);
-	if (maxlen < 0) {
-		hscx_cmdr(hscx, 0x80); /* RMC */
-		if (hscx->bch.rx_skb)
-			skb_trim(hscx->bch.rx_skb, 0);
-		pr_warn("%s.B%d: No bufferspace for %d bytes\n",
-			hscx->ip->name, hscx->bch.nr, count);
-		return;
-	}
-	p = skb_put(hscx->bch.rx_skb, count);
-
-	if (hscx->ip->type & IPAC_TYPE_IPACX)
-		hscx->ip->read_fifo(hscx->ip->hw,
-				    hscx->off + IPACX_RFIFOB, p, count);
-	else
-		hscx->ip->read_fifo(hscx->ip->hw,
-				    hscx->off, p, count);
-
-	hscx_cmdr(hscx, 0x80); /* RMC */
-
-	if (hscx->bch.debug & DEBUG_HW_BFIFO) {
-		snprintf(hscx->log, 64, "B%1d-recv %s %d ",
-			 hscx->bch.nr, hscx->ip->name, count);
-		print_hex_dump_bytes(hscx->log, DUMP_PREFIX_OFFSET, p, count);
-	}
-}
-
-static void
-hscx_fill_fifo(struct hscx_hw *hscx)
-{
-	int count, more;
-	u8 *p;
-
-	if (!hscx->bch.tx_skb) {
-		if (!test_bit(FLG_TX_EMPTY, &hscx->bch.Flags))
-			return;
-		count = hscx->fifo_size;
-		more = 1;
-		p = hscx->log;
-		memset(p, hscx->bch.fill[0], count);
-	} else {
-		count = hscx->bch.tx_skb->len - hscx->bch.tx_idx;
-		if (count <= 0)
-			return;
-		p = hscx->bch.tx_skb->data + hscx->bch.tx_idx;
-
-		more = test_bit(FLG_TRANSPARENT, &hscx->bch.Flags) ? 1 : 0;
-		if (count > hscx->fifo_size) {
-			count = hscx->fifo_size;
-			more = 1;
-		}
-		pr_debug("%s: B%1d %d/%d/%d\n", hscx->ip->name, hscx->bch.nr,
-			 count, hscx->bch.tx_idx, hscx->bch.tx_skb->len);
-		hscx->bch.tx_idx += count;
-	}
-	if (hscx->ip->type & IPAC_TYPE_IPACX)
-		hscx->ip->write_fifo(hscx->ip->hw,
-				     hscx->off + IPACX_XFIFOB, p, count);
-	else {
-		waitforXFW(hscx);
-		hscx->ip->write_fifo(hscx->ip->hw,
-				     hscx->off, p, count);
-	}
-	hscx_cmdr(hscx, more ? 0x08 : 0x0a);
-
-	if (hscx->bch.tx_skb && (hscx->bch.debug & DEBUG_HW_BFIFO)) {
-		snprintf(hscx->log, 64, "B%1d-send %s %d ",
-			 hscx->bch.nr, hscx->ip->name, count);
-		print_hex_dump_bytes(hscx->log, DUMP_PREFIX_OFFSET, p, count);
-	}
-}
-
-static void
-hscx_xpr(struct hscx_hw *hx)
-{
-	if (hx->bch.tx_skb && hx->bch.tx_idx < hx->bch.tx_skb->len) {
-		hscx_fill_fifo(hx);
-	} else {
-		dev_kfree_skb(hx->bch.tx_skb);
-		if (get_next_bframe(&hx->bch)) {
-			hscx_fill_fifo(hx);
-			test_and_clear_bit(FLG_TX_EMPTY, &hx->bch.Flags);
-		} else if (test_bit(FLG_TX_EMPTY, &hx->bch.Flags)) {
-			hscx_fill_fifo(hx);
-		}
-	}
-}
-
-static void
-ipac_rme(struct hscx_hw *hx)
-{
-	int count;
-	u8 rstab;
-
-	if (hx->ip->type & IPAC_TYPE_IPACX)
-		rstab = ReadHSCX(hx, IPACX_RSTAB);
-	else
-		rstab = ReadHSCX(hx, IPAC_RSTAB);
-	pr_debug("%s: B%1d RSTAB %02x\n", hx->ip->name, hx->bch.nr, rstab);
-	if ((rstab & 0xf0) != 0xa0) {
-		/* !(VFR && !RDO && CRC && !RAB) */
-		if (!(rstab & 0x80)) {
-			if (hx->bch.debug & DEBUG_HW_BCHANNEL)
-				pr_notice("%s: B%1d invalid frame\n",
-					  hx->ip->name, hx->bch.nr);
-		}
-		if (rstab & 0x40) {
-			if (hx->bch.debug & DEBUG_HW_BCHANNEL)
-				pr_notice("%s: B%1d RDO proto=%x\n",
-					  hx->ip->name, hx->bch.nr,
-					  hx->bch.state);
-		}
-		if (!(rstab & 0x20)) {
-			if (hx->bch.debug & DEBUG_HW_BCHANNEL)
-				pr_notice("%s: B%1d CRC error\n",
-					  hx->ip->name, hx->bch.nr);
-		}
-		hscx_cmdr(hx, 0x80); /* Do RMC */
-		return;
-	}
-	if (hx->ip->type & IPAC_TYPE_IPACX)
-		count = ReadHSCX(hx, IPACX_RBCLB);
-	else
-		count = ReadHSCX(hx, IPAC_RBCLB);
-	count &= (hx->fifo_size - 1);
-	if (count == 0)
-		count = hx->fifo_size;
-	hscx_empty_fifo(hx, count);
-	if (!hx->bch.rx_skb)
-		return;
-	if (hx->bch.rx_skb->len < 2) {
-		pr_debug("%s: B%1d frame too short %d\n",
-			 hx->ip->name, hx->bch.nr, hx->bch.rx_skb->len);
-		skb_trim(hx->bch.rx_skb, 0);
-	} else {
-		skb_trim(hx->bch.rx_skb, hx->bch.rx_skb->len - 1);
-		recv_Bchannel(&hx->bch, 0, false);
-	}
-}
-
-static void
-ipac_irq(struct hscx_hw *hx, u8 ista)
-{
-	u8 istab, m, exirb = 0;
-
-	if (hx->ip->type & IPAC_TYPE_IPACX)
-		istab = ReadHSCX(hx, IPACX_ISTAB);
-	else if (hx->ip->type & IPAC_TYPE_IPAC) {
-		istab = ReadHSCX(hx, IPAC_ISTAB);
-		m = (hx->bch.nr & 1) ? IPAC__EXA : IPAC__EXB;
-		if (m & ista) {
-			exirb = ReadHSCX(hx, IPAC_EXIRB);
-			pr_debug("%s: B%1d EXIRB %02x\n", hx->ip->name,
-				 hx->bch.nr, exirb);
-		}
-	} else if (hx->bch.nr & 2) { /* HSCX B */
-		if (ista & (HSCX__EXA | HSCX__ICA))
-			ipac_irq(&hx->ip->hscx[0], ista);
-		if (ista & HSCX__EXB) {
-			exirb = ReadHSCX(hx, IPAC_EXIRB);
-			pr_debug("%s: B%1d EXIRB %02x\n", hx->ip->name,
-				 hx->bch.nr, exirb);
-		}
-		istab = ista & 0xF8;
-	} else { /* HSCX A */
-		istab = ReadHSCX(hx, IPAC_ISTAB);
-		if (ista & HSCX__EXA) {
-			exirb = ReadHSCX(hx, IPAC_EXIRB);
-			pr_debug("%s: B%1d EXIRB %02x\n", hx->ip->name,
-				 hx->bch.nr, exirb);
-		}
-		istab = istab & 0xF8;
-	}
-	if (exirb & IPAC_B_XDU)
-		istab |= IPACX_B_XDU;
-	if (exirb & IPAC_B_RFO)
-		istab |= IPACX_B_RFO;
-	pr_debug("%s: B%1d ISTAB %02x\n", hx->ip->name, hx->bch.nr, istab);
-
-	if (!test_bit(FLG_ACTIVE, &hx->bch.Flags))
-		return;
-
-	if (istab & IPACX_B_RME)
-		ipac_rme(hx);
-
-	if (istab & IPACX_B_RPF) {
-		hscx_empty_fifo(hx, hx->fifo_size);
-		if (test_bit(FLG_TRANSPARENT, &hx->bch.Flags))
-			recv_Bchannel(&hx->bch, 0, false);
-	}
-
-	if (istab & IPACX_B_RFO) {
-		pr_debug("%s: B%1d RFO error\n", hx->ip->name, hx->bch.nr);
-		hscx_cmdr(hx, 0x40);	/* RRES */
-	}
-
-	if (istab & IPACX_B_XPR)
-		hscx_xpr(hx);
-
-	if (istab & IPACX_B_XDU) {
-		if (test_bit(FLG_TRANSPARENT, &hx->bch.Flags)) {
-			if (test_bit(FLG_FILLEMPTY, &hx->bch.Flags))
-				test_and_set_bit(FLG_TX_EMPTY, &hx->bch.Flags);
-			hscx_xpr(hx);
-			return;
-		}
-		pr_debug("%s: B%1d XDU error at len %d\n", hx->ip->name,
-			 hx->bch.nr, hx->bch.tx_idx);
-		hx->bch.tx_idx = 0;
-		hscx_cmdr(hx, 0x01);	/* XRES */
-	}
-}
-
-irqreturn_t
-mISDNipac_irq(struct ipac_hw *ipac, int maxloop)
-{
-	int cnt = maxloop + 1;
-	u8 ista, istad;
-	struct isac_hw  *isac = &ipac->isac;
-
-	if (ipac->type & IPAC_TYPE_IPACX) {
-		ista = ReadIPAC(ipac, ISACX_ISTA);
-		while (ista && --cnt) {
-			pr_debug("%s: ISTA %02x\n", ipac->name, ista);
-			if (ista & IPACX__ICA)
-				ipac_irq(&ipac->hscx[0], ista);
-			if (ista & IPACX__ICB)
-				ipac_irq(&ipac->hscx[1], ista);
-			if (ista & (ISACX__ICD | ISACX__CIC))
-				mISDNisac_irq(&ipac->isac, ista);
-			ista = ReadIPAC(ipac, ISACX_ISTA);
-		}
-	} else if (ipac->type & IPAC_TYPE_IPAC) {
-		ista = ReadIPAC(ipac, IPAC_ISTA);
-		while (ista && --cnt) {
-			pr_debug("%s: ISTA %02x\n", ipac->name, ista);
-			if (ista & (IPAC__ICD | IPAC__EXD)) {
-				istad = ReadISAC(isac, ISAC_ISTA);
-				pr_debug("%s: ISTAD %02x\n", ipac->name, istad);
-				if (istad & IPAC_D_TIN2)
-					pr_debug("%s TIN2 irq\n", ipac->name);
-				if (ista & IPAC__EXD)
-					istad |= 1; /* ISAC EXI */
-				mISDNisac_irq(isac, istad);
-			}
-			if (ista & (IPAC__ICA | IPAC__EXA))
-				ipac_irq(&ipac->hscx[0], ista);
-			if (ista & (IPAC__ICB | IPAC__EXB))
-				ipac_irq(&ipac->hscx[1], ista);
-			ista = ReadIPAC(ipac, IPAC_ISTA);
-		}
-	} else if (ipac->type & IPAC_TYPE_HSCX) {
-		while (--cnt) {
-			ista = ReadIPAC(ipac, IPAC_ISTAB + ipac->hscx[1].off);
-			pr_debug("%s: B2 ISTA %02x\n", ipac->name, ista);
-			if (ista)
-				ipac_irq(&ipac->hscx[1], ista);
-			istad = ReadISAC(isac, ISAC_ISTA);
-			pr_debug("%s: ISTAD %02x\n", ipac->name, istad);
-			if (istad)
-				mISDNisac_irq(isac, istad);
-			if (0 == (ista | istad))
-				break;
-		}
-	}
-	if (cnt > maxloop) /* only for ISAC/HSCX without PCI IRQ test */
-		return IRQ_NONE;
-	if (cnt < maxloop)
-		pr_debug("%s: %d irqloops cpu%d\n", ipac->name,
-			 maxloop - cnt, smp_processor_id());
-	if (maxloop && !cnt)
-		pr_notice("%s: %d IRQ LOOP cpu%d\n", ipac->name,
-			  maxloop, smp_processor_id());
-	return IRQ_HANDLED;
-}
-EXPORT_SYMBOL(mISDNipac_irq);
-
-static int
-hscx_mode(struct hscx_hw *hscx, u32 bprotocol)
-{
-	pr_debug("%s: HSCX %c protocol %x-->%x ch %d\n", hscx->ip->name,
-		 '@' + hscx->bch.nr, hscx->bch.state, bprotocol, hscx->bch.nr);
-	if (hscx->ip->type & IPAC_TYPE_IPACX) {
-		if (hscx->bch.nr & 1) { /* B1 and ICA */
-			WriteIPAC(hscx->ip, ISACX_BCHA_TSDP_BC1, 0x80);
-			WriteIPAC(hscx->ip, ISACX_BCHA_CR, 0x88);
-		} else { /* B2 and ICB */
-			WriteIPAC(hscx->ip, ISACX_BCHB_TSDP_BC1, 0x81);
-			WriteIPAC(hscx->ip, ISACX_BCHB_CR, 0x88);
-		}
-		switch (bprotocol) {
-		case ISDN_P_NONE: /* init */
-			WriteHSCX(hscx, IPACX_MODEB, 0xC0);	/* rec off */
-			WriteHSCX(hscx, IPACX_EXMB,  0x30);	/* std adj. */
-			WriteHSCX(hscx, IPACX_MASKB, 0xFF);	/* ints off */
-			hscx_cmdr(hscx, 0x41);
-			test_and_clear_bit(FLG_HDLC, &hscx->bch.Flags);
-			test_and_clear_bit(FLG_TRANSPARENT, &hscx->bch.Flags);
-			break;
-		case ISDN_P_B_RAW:
-			WriteHSCX(hscx, IPACX_MODEB, 0x88);	/* ex trans */
-			WriteHSCX(hscx, IPACX_EXMB,  0x00);	/* trans */
-			hscx_cmdr(hscx, 0x41);
-			WriteHSCX(hscx, IPACX_MASKB, IPACX_B_ON);
-			test_and_set_bit(FLG_TRANSPARENT, &hscx->bch.Flags);
-			break;
-		case ISDN_P_B_HDLC:
-			WriteHSCX(hscx, IPACX_MODEB, 0xC0);	/* trans */
-			WriteHSCX(hscx, IPACX_EXMB,  0x00);	/* hdlc,crc */
-			hscx_cmdr(hscx, 0x41);
-			WriteHSCX(hscx, IPACX_MASKB, IPACX_B_ON);
-			test_and_set_bit(FLG_HDLC, &hscx->bch.Flags);
-			break;
-		default:
-			pr_info("%s: protocol not known %x\n", hscx->ip->name,
-				bprotocol);
-			return -ENOPROTOOPT;
-		}
-	} else if (hscx->ip->type & IPAC_TYPE_IPAC) { /* IPAC */
-		WriteHSCX(hscx, IPAC_CCR1, 0x82);
-		WriteHSCX(hscx, IPAC_CCR2, 0x30);
-		WriteHSCX(hscx, IPAC_XCCR, 0x07);
-		WriteHSCX(hscx, IPAC_RCCR, 0x07);
-		WriteHSCX(hscx, IPAC_TSAX, hscx->slot);
-		WriteHSCX(hscx, IPAC_TSAR, hscx->slot);
-		switch (bprotocol) {
-		case ISDN_P_NONE:
-			WriteHSCX(hscx, IPAC_TSAX, 0x1F);
-			WriteHSCX(hscx, IPAC_TSAR, 0x1F);
-			WriteHSCX(hscx, IPAC_MODEB, 0x84);
-			WriteHSCX(hscx, IPAC_CCR1, 0x82);
-			WriteHSCX(hscx, IPAC_MASKB, 0xFF);	/* ints off */
-			test_and_clear_bit(FLG_HDLC, &hscx->bch.Flags);
-			test_and_clear_bit(FLG_TRANSPARENT, &hscx->bch.Flags);
-			break;
-		case ISDN_P_B_RAW:
-			WriteHSCX(hscx, IPAC_MODEB, 0xe4);	/* ex trans */
-			WriteHSCX(hscx, IPAC_CCR1, 0x82);
-			hscx_cmdr(hscx, 0x41);
-			WriteHSCX(hscx, IPAC_MASKB, 0);
-			test_and_set_bit(FLG_TRANSPARENT, &hscx->bch.Flags);
-			break;
-		case ISDN_P_B_HDLC:
-			WriteHSCX(hscx, IPAC_MODEB, 0x8c);
-			WriteHSCX(hscx, IPAC_CCR1, 0x8a);
-			hscx_cmdr(hscx, 0x41);
-			WriteHSCX(hscx, IPAC_MASKB, 0);
-			test_and_set_bit(FLG_HDLC, &hscx->bch.Flags);
-			break;
-		default:
-			pr_info("%s: protocol not known %x\n", hscx->ip->name,
-				bprotocol);
-			return -ENOPROTOOPT;
-		}
-	} else if (hscx->ip->type & IPAC_TYPE_HSCX) { /* HSCX */
-		WriteHSCX(hscx, IPAC_CCR1, 0x85);
-		WriteHSCX(hscx, IPAC_CCR2, 0x30);
-		WriteHSCX(hscx, IPAC_XCCR, 0x07);
-		WriteHSCX(hscx, IPAC_RCCR, 0x07);
-		WriteHSCX(hscx, IPAC_TSAX, hscx->slot);
-		WriteHSCX(hscx, IPAC_TSAR, hscx->slot);
-		switch (bprotocol) {
-		case ISDN_P_NONE:
-			WriteHSCX(hscx, IPAC_TSAX, 0x1F);
-			WriteHSCX(hscx, IPAC_TSAR, 0x1F);
-			WriteHSCX(hscx, IPAC_MODEB, 0x84);
-			WriteHSCX(hscx, IPAC_CCR1, 0x85);
-			WriteHSCX(hscx, IPAC_MASKB, 0xFF);	/* ints off */
-			test_and_clear_bit(FLG_HDLC, &hscx->bch.Flags);
-			test_and_clear_bit(FLG_TRANSPARENT, &hscx->bch.Flags);
-			break;
-		case ISDN_P_B_RAW:
-			WriteHSCX(hscx, IPAC_MODEB, 0xe4);	/* ex trans */
-			WriteHSCX(hscx, IPAC_CCR1, 0x85);
-			hscx_cmdr(hscx, 0x41);
-			WriteHSCX(hscx, IPAC_MASKB, 0);
-			test_and_set_bit(FLG_TRANSPARENT, &hscx->bch.Flags);
-			break;
-		case ISDN_P_B_HDLC:
-			WriteHSCX(hscx, IPAC_MODEB, 0x8c);
-			WriteHSCX(hscx, IPAC_CCR1, 0x8d);
-			hscx_cmdr(hscx, 0x41);
-			WriteHSCX(hscx, IPAC_MASKB, 0);
-			test_and_set_bit(FLG_HDLC, &hscx->bch.Flags);
-			break;
-		default:
-			pr_info("%s: protocol not known %x\n", hscx->ip->name,
-				bprotocol);
-			return -ENOPROTOOPT;
-		}
-	} else
-		return -EINVAL;
-	hscx->bch.state = bprotocol;
-	return 0;
-}
-
-static int
-hscx_l2l1(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct bchannel *bch = container_of(ch, struct bchannel, ch);
-	struct hscx_hw	*hx = container_of(bch, struct hscx_hw, bch);
-	int ret = -EINVAL;
-	struct mISDNhead *hh = mISDN_HEAD_P(skb);
-	unsigned long flags;
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		spin_lock_irqsave(hx->ip->hwlock, flags);
-		ret = bchannel_senddata(bch, skb);
-		if (ret > 0) { /* direct TX */
-			ret = 0;
-			hscx_fill_fifo(hx);
-		}
-		spin_unlock_irqrestore(hx->ip->hwlock, flags);
-		return ret;
-	case PH_ACTIVATE_REQ:
-		spin_lock_irqsave(hx->ip->hwlock, flags);
-		if (!test_and_set_bit(FLG_ACTIVE, &bch->Flags))
-			ret = hscx_mode(hx, ch->protocol);
-		else
-			ret = 0;
-		spin_unlock_irqrestore(hx->ip->hwlock, flags);
-		if (!ret)
-			_queue_data(ch, PH_ACTIVATE_IND, MISDN_ID_ANY, 0,
-				    NULL, GFP_KERNEL);
-		break;
-	case PH_DEACTIVATE_REQ:
-		spin_lock_irqsave(hx->ip->hwlock, flags);
-		mISDN_clear_bchannel(bch);
-		hscx_mode(hx, ISDN_P_NONE);
-		spin_unlock_irqrestore(hx->ip->hwlock, flags);
-		_queue_data(ch, PH_DEACTIVATE_IND, MISDN_ID_ANY, 0,
-			    NULL, GFP_KERNEL);
-		ret = 0;
-		break;
-	default:
-		pr_info("%s: %s unknown prim(%x,%x)\n",
-			hx->ip->name, __func__, hh->prim, hh->id);
-		ret = -EINVAL;
-	}
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-static int
-channel_bctrl(struct bchannel *bch, struct mISDN_ctrl_req *cq)
-{
-	return mISDN_ctrl_bchannel(bch, cq);
-}
-
-static int
-hscx_bctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
-{
-	struct bchannel *bch = container_of(ch, struct bchannel, ch);
-	struct hscx_hw	*hx = container_of(bch, struct hscx_hw, bch);
-	int ret = -EINVAL;
-	u_long flags;
-
-	pr_debug("%s: %s cmd:%x %p\n", hx->ip->name, __func__, cmd, arg);
-	switch (cmd) {
-	case CLOSE_CHANNEL:
-		test_and_clear_bit(FLG_OPEN, &bch->Flags);
-		cancel_work_sync(&bch->workq);
-		spin_lock_irqsave(hx->ip->hwlock, flags);
-		mISDN_clear_bchannel(bch);
-		hscx_mode(hx, ISDN_P_NONE);
-		spin_unlock_irqrestore(hx->ip->hwlock, flags);
-		ch->protocol = ISDN_P_NONE;
-		ch->peer = NULL;
-		module_put(hx->ip->owner);
-		ret = 0;
-		break;
-	case CONTROL_CHANNEL:
-		ret = channel_bctrl(bch, arg);
-		break;
-	default:
-		pr_info("%s: %s unknown prim(%x)\n",
-			hx->ip->name, __func__, cmd);
-	}
-	return ret;
-}
-
-static void
-free_ipac(struct ipac_hw *ipac)
-{
-	isac_release(&ipac->isac);
-}
-
-static const char *HSCXVer[] =
-{"A1", "?1", "A2", "?3", "A3", "V2.1", "?6", "?7",
- "?8", "?9", "?10", "?11", "?12", "?13", "?14", "???"};
-
-
-
-static void
-hscx_init(struct hscx_hw *hx)
-{
-	u8 val;
-
-	WriteHSCX(hx, IPAC_RAH2, 0xFF);
-	WriteHSCX(hx, IPAC_XBCH, 0x00);
-	WriteHSCX(hx, IPAC_RLCR, 0x00);
-
-	if (hx->ip->type & IPAC_TYPE_HSCX) {
-		WriteHSCX(hx, IPAC_CCR1, 0x85);
-		val = ReadHSCX(hx, HSCX_VSTR);
-		pr_debug("%s: HSCX VSTR %02x\n", hx->ip->name, val);
-		if (hx->bch.debug & DEBUG_HW)
-			pr_notice("%s: HSCX version %s\n", hx->ip->name,
-				  HSCXVer[val & 0x0f]);
-	} else
-		WriteHSCX(hx, IPAC_CCR1, 0x82);
-	WriteHSCX(hx, IPAC_CCR2, 0x30);
-	WriteHSCX(hx, IPAC_XCCR, 0x07);
-	WriteHSCX(hx, IPAC_RCCR, 0x07);
-}
-
-static int
-ipac_init(struct ipac_hw *ipac)
-{
-	u8 val;
-
-	if (ipac->type & IPAC_TYPE_HSCX) {
-		hscx_init(&ipac->hscx[0]);
-		hscx_init(&ipac->hscx[1]);
-		val = ReadIPAC(ipac, IPAC_ID);
-	} else if (ipac->type & IPAC_TYPE_IPAC) {
-		hscx_init(&ipac->hscx[0]);
-		hscx_init(&ipac->hscx[1]);
-		WriteIPAC(ipac, IPAC_MASK, IPAC__ON);
-		val = ReadIPAC(ipac, IPAC_CONF);
-		/* conf is default 0, but can be overwritten by card setup */
-		pr_debug("%s: IPAC CONF %02x/%02x\n", ipac->name,
-			 val, ipac->conf);
-		WriteIPAC(ipac, IPAC_CONF, ipac->conf);
-		val = ReadIPAC(ipac, IPAC_ID);
-		if (ipac->hscx[0].bch.debug & DEBUG_HW)
-			pr_notice("%s: IPAC Design ID %02x\n", ipac->name, val);
-	}
-	/* nothing special for IPACX to do here */
-	return isac_init(&ipac->isac);
-}
-
-static int
-open_bchannel(struct ipac_hw *ipac, struct channel_req *rq)
-{
-	struct bchannel		*bch;
-
-	if (rq->adr.channel == 0 || rq->adr.channel > 2)
-		return -EINVAL;
-	if (rq->protocol == ISDN_P_NONE)
-		return -EINVAL;
-	bch = &ipac->hscx[rq->adr.channel - 1].bch;
-	if (test_and_set_bit(FLG_OPEN, &bch->Flags))
-		return -EBUSY; /* b-channel can be only open once */
-	test_and_clear_bit(FLG_FILLEMPTY, &bch->Flags);
-	bch->ch.protocol = rq->protocol;
-	rq->ch = &bch->ch;
-	return 0;
-}
-
-static int
-channel_ctrl(struct ipac_hw *ipac, struct mISDN_ctrl_req *cq)
-{
-	int	ret = 0;
-
-	switch (cq->op) {
-	case MISDN_CTRL_GETOP:
-		cq->op = MISDN_CTRL_LOOP | MISDN_CTRL_L1_TIMER3;
-		break;
-	case MISDN_CTRL_LOOP:
-		/* cq->channel: 0 disable, 1 B1 loop 2 B2 loop, 3 both */
-		if (cq->channel < 0 || cq->channel > 3) {
-			ret = -EINVAL;
-			break;
-		}
-		ret = ipac->ctrl(ipac, HW_TESTLOOP, cq->channel);
-		break;
-	case MISDN_CTRL_L1_TIMER3:
-		ret = ipac->isac.ctrl(&ipac->isac, HW_TIMER3_VALUE, cq->p1);
-		break;
-	default:
-		pr_info("%s: unknown CTRL OP %x\n", ipac->name, cq->op);
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static int
-ipac_dctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
-{
-	struct mISDNdevice *dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel *dch = container_of(dev, struct dchannel, dev);
-	struct isac_hw *isac = container_of(dch, struct isac_hw, dch);
-	struct ipac_hw *ipac = container_of(isac, struct ipac_hw, isac);
-	struct channel_req *rq;
-	int err = 0;
-
-	pr_debug("%s: DCTRL: %x %p\n", ipac->name, cmd, arg);
-	switch (cmd) {
-	case OPEN_CHANNEL:
-		rq = arg;
-		if (rq->protocol == ISDN_P_TE_S0)
-			err = open_dchannel_caller(isac, rq, __builtin_return_address(0));
-		else
-			err = open_bchannel(ipac, rq);
-		if (err)
-			break;
-		if (!try_module_get(ipac->owner))
-			pr_info("%s: cannot get module\n", ipac->name);
-		break;
-	case CLOSE_CHANNEL:
-		pr_debug("%s: dev(%d) close from %p\n", ipac->name,
-			 dch->dev.id, __builtin_return_address(0));
-		module_put(ipac->owner);
-		break;
-	case CONTROL_CHANNEL:
-		err = channel_ctrl(ipac, arg);
-		break;
-	default:
-		pr_debug("%s: unknown DCTRL command %x\n", ipac->name, cmd);
-		return -EINVAL;
-	}
-	return err;
-}
-
-u32
-mISDNipac_init(struct ipac_hw *ipac, void *hw)
-{
-	u32 ret;
-	u8 i;
-
-	ipac->hw = hw;
-	if (ipac->isac.dch.debug & DEBUG_HW)
-		pr_notice("%s: ipac type %x\n", ipac->name, ipac->type);
-	if (ipac->type & IPAC_TYPE_HSCX) {
-		ipac->isac.type = IPAC_TYPE_ISAC;
-		ipac->hscx[0].off = 0;
-		ipac->hscx[1].off = 0x40;
-		ipac->hscx[0].fifo_size = 32;
-		ipac->hscx[1].fifo_size = 32;
-	} else if (ipac->type & IPAC_TYPE_IPAC) {
-		ipac->isac.type = IPAC_TYPE_IPAC | IPAC_TYPE_ISAC;
-		ipac->hscx[0].off = 0;
-		ipac->hscx[1].off = 0x40;
-		ipac->hscx[0].fifo_size = 64;
-		ipac->hscx[1].fifo_size = 64;
-	} else if (ipac->type & IPAC_TYPE_IPACX) {
-		ipac->isac.type = IPAC_TYPE_IPACX | IPAC_TYPE_ISACX;
-		ipac->hscx[0].off = IPACX_OFF_ICA;
-		ipac->hscx[1].off = IPACX_OFF_ICB;
-		ipac->hscx[0].fifo_size = 64;
-		ipac->hscx[1].fifo_size = 64;
-	} else
-		return 0;
-
-	mISDNisac_init(&ipac->isac, hw);
-
-	ipac->isac.dch.dev.D.ctrl = ipac_dctrl;
-
-	for (i = 0; i < 2; i++) {
-		ipac->hscx[i].bch.nr = i + 1;
-		set_channelmap(i + 1, ipac->isac.dch.dev.channelmap);
-		list_add(&ipac->hscx[i].bch.ch.list,
-			 &ipac->isac.dch.dev.bchannels);
-		mISDN_initbchannel(&ipac->hscx[i].bch, MAX_DATA_MEM,
-				   ipac->hscx[i].fifo_size);
-		ipac->hscx[i].bch.ch.nr = i + 1;
-		ipac->hscx[i].bch.ch.send = &hscx_l2l1;
-		ipac->hscx[i].bch.ch.ctrl = hscx_bctrl;
-		ipac->hscx[i].bch.hw = hw;
-		ipac->hscx[i].ip = ipac;
-		/* default values for IOM time slots
-		 * can be overwritten by card */
-		ipac->hscx[i].slot = (i == 0) ? 0x2f : 0x03;
-	}
-
-	ipac->init = ipac_init;
-	ipac->release = free_ipac;
-
-	ret =	(1 << (ISDN_P_B_RAW & ISDN_P_B_MASK)) |
-		(1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK));
-	return ret;
-}
-EXPORT_SYMBOL(mISDNipac_init);
-
-static int __init
-isac_mod_init(void)
-{
-	pr_notice("mISDNipac module version %s\n", ISAC_REV);
-	return 0;
-}
-
-static void __exit
-isac_mod_cleanup(void)
-{
-	pr_notice("mISDNipac module unloaded\n");
-}
-module_init(isac_mod_init);
-module_exit(isac_mod_cleanup);
diff --git a/drivers/isdn/hardware/mISDN/mISDNisar.c b/drivers/isdn/hardware/mISDN/mISDNisar.c
deleted file mode 100644
index dace91ba412b..000000000000
--- a/drivers/isdn/hardware/mISDN/mISDNisar.c
+++ /dev/null
@@ -1,1694 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * mISDNisar.c   ISAR (Siemens PSB 7110) specific functions
- *
- * Author Karsten Keil (keil@isdn4linux.de)
- *
- * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
- */
-
-/* define this to enable static debug messages, if you kernel supports
- * dynamic debugging, you should use debugfs for this
- */
-/* #define DEBUG */
-
-#include <linux/gfp.h>
-#include <linux/delay.h>
-#include <linux/vmalloc.h>
-#include <linux/mISDNhw.h>
-#include <linux/module.h>
-#include "isar.h"
-
-#define ISAR_REV	"2.1"
-
-MODULE_AUTHOR("Karsten Keil");
-MODULE_DESCRIPTION("mISDN driver for ISAR (Siemens PSB 7110) specific functions");
-MODULE_LICENSE("GPL v2");
-MODULE_VERSION(ISAR_REV);
-
-#define DEBUG_HW_FIRMWARE_FIFO	0x10000
-
-static const u8 faxmodulation[] = {3, 24, 48, 72, 73, 74, 96, 97, 98, 121,
-				   122, 145, 146};
-#define FAXMODCNT 13
-
-static void isar_setup(struct isar_hw *);
-
-static inline int
-waitforHIA(struct isar_hw *isar, int timeout)
-{
-	int t = timeout;
-	u8 val = isar->read_reg(isar->hw, ISAR_HIA);
-
-	while ((val & 1) && t) {
-		udelay(1);
-		t--;
-		val = isar->read_reg(isar->hw, ISAR_HIA);
-	}
-	pr_debug("%s: HIA after %dus\n", isar->name, timeout - t);
-	return timeout;
-}
-
-/*
- * send msg to ISAR mailbox
- * if msg is NULL use isar->buf
- */
-static int
-send_mbox(struct isar_hw *isar, u8 his, u8 creg, u8 len, u8 *msg)
-{
-	if (!waitforHIA(isar, 1000))
-		return 0;
-	pr_debug("send_mbox(%02x,%02x,%d)\n", his, creg, len);
-	isar->write_reg(isar->hw, ISAR_CTRL_H, creg);
-	isar->write_reg(isar->hw, ISAR_CTRL_L, len);
-	isar->write_reg(isar->hw, ISAR_WADR, 0);
-	if (!msg)
-		msg = isar->buf;
-	if (msg && len) {
-		isar->write_fifo(isar->hw, ISAR_MBOX, msg, len);
-		if (isar->ch[0].bch.debug & DEBUG_HW_BFIFO) {
-			int l = 0;
-
-			while (l < (int)len) {
-				hex_dump_to_buffer(msg + l, len - l, 32, 1,
-						   isar->log, 256, 1);
-				pr_debug("%s: %s %02x: %s\n", isar->name,
-					 __func__, l, isar->log);
-				l += 32;
-			}
-		}
-	}
-	isar->write_reg(isar->hw, ISAR_HIS, his);
-	waitforHIA(isar, 1000);
-	return 1;
-}
-
-/*
- * receive message from ISAR mailbox
- * if msg is NULL use isar->buf
- */
-static void
-rcv_mbox(struct isar_hw *isar, u8 *msg)
-{
-	if (!msg)
-		msg = isar->buf;
-	isar->write_reg(isar->hw, ISAR_RADR, 0);
-	if (msg && isar->clsb) {
-		isar->read_fifo(isar->hw, ISAR_MBOX, msg, isar->clsb);
-		if (isar->ch[0].bch.debug & DEBUG_HW_BFIFO) {
-			int l = 0;
-
-			while (l < (int)isar->clsb) {
-				hex_dump_to_buffer(msg + l, isar->clsb - l, 32,
-						   1, isar->log, 256, 1);
-				pr_debug("%s: %s %02x: %s\n", isar->name,
-					 __func__, l, isar->log);
-				l += 32;
-			}
-		}
-	}
-	isar->write_reg(isar->hw, ISAR_IIA, 0);
-}
-
-static inline void
-get_irq_infos(struct isar_hw *isar)
-{
-	isar->iis = isar->read_reg(isar->hw, ISAR_IIS);
-	isar->cmsb = isar->read_reg(isar->hw, ISAR_CTRL_H);
-	isar->clsb = isar->read_reg(isar->hw, ISAR_CTRL_L);
-	pr_debug("%s: rcv_mbox(%02x,%02x,%d)\n", isar->name,
-		 isar->iis, isar->cmsb, isar->clsb);
-}
-
-/*
- * poll answer message from ISAR mailbox
- * should be used only with ISAR IRQs disabled before DSP was started
- *
- */
-static int
-poll_mbox(struct isar_hw *isar, int maxdelay)
-{
-	int t = maxdelay;
-	u8 irq;
-
-	irq = isar->read_reg(isar->hw, ISAR_IRQBIT);
-	while (t && !(irq & ISAR_IRQSTA)) {
-		udelay(1);
-		t--;
-	}
-	if (t)	{
-		get_irq_infos(isar);
-		rcv_mbox(isar, NULL);
-	}
-	pr_debug("%s: pulled %d bytes after %d us\n",
-		 isar->name, isar->clsb, maxdelay - t);
-	return t;
-}
-
-static int
-ISARVersion(struct isar_hw *isar)
-{
-	int ver;
-
-	/* disable ISAR IRQ */
-	isar->write_reg(isar->hw, ISAR_IRQBIT, 0);
-	isar->buf[0] = ISAR_MSG_HWVER;
-	isar->buf[1] = 0;
-	isar->buf[2] = 1;
-	if (!send_mbox(isar, ISAR_HIS_VNR, 0, 3, NULL))
-		return -1;
-	if (!poll_mbox(isar, 1000))
-		return -2;
-	if (isar->iis == ISAR_IIS_VNR) {
-		if (isar->clsb == 1) {
-			ver = isar->buf[0] & 0xf;
-			return ver;
-		}
-		return -3;
-	}
-	return -4;
-}
-
-static int
-load_firmware(struct isar_hw *isar, const u8 *buf, int size)
-{
-	u32	saved_debug = isar->ch[0].bch.debug;
-	int	ret, cnt;
-	u8	nom, noc;
-	u16	left, val, *sp = (u16 *)buf;
-	u8	*mp;
-	u_long	flags;
-
-	struct {
-		u16 sadr;
-		u16 len;
-		u16 d_key;
-	} blk_head;
-
-	if (1 != isar->version) {
-		pr_err("%s: ISAR wrong version %d firmware download aborted\n",
-		       isar->name, isar->version);
-		return -EINVAL;
-	}
-	if (!(saved_debug & DEBUG_HW_FIRMWARE_FIFO))
-		isar->ch[0].bch.debug &= ~DEBUG_HW_BFIFO;
-	pr_debug("%s: load firmware %d words (%d bytes)\n",
-		 isar->name, size / 2, size);
-	cnt = 0;
-	size /= 2;
-	/* disable ISAR IRQ */
-	spin_lock_irqsave(isar->hwlock, flags);
-	isar->write_reg(isar->hw, ISAR_IRQBIT, 0);
-	spin_unlock_irqrestore(isar->hwlock, flags);
-	while (cnt < size) {
-		blk_head.sadr = le16_to_cpu(*sp++);
-		blk_head.len = le16_to_cpu(*sp++);
-		blk_head.d_key = le16_to_cpu(*sp++);
-		cnt += 3;
-		pr_debug("ISAR firmware block (%#x,%d,%#x)\n",
-			 blk_head.sadr, blk_head.len, blk_head.d_key & 0xff);
-		left = blk_head.len;
-		if (cnt + left > size) {
-			pr_info("%s: firmware error have %d need %d words\n",
-				isar->name, size, cnt + left);
-			ret = -EINVAL;
-			goto reterrflg;
-		}
-		spin_lock_irqsave(isar->hwlock, flags);
-		if (!send_mbox(isar, ISAR_HIS_DKEY, blk_head.d_key & 0xff,
-			       0, NULL)) {
-			pr_info("ISAR send_mbox dkey failed\n");
-			ret = -ETIME;
-			goto reterror;
-		}
-		if (!poll_mbox(isar, 1000)) {
-			pr_warn("ISAR poll_mbox dkey failed\n");
-			ret = -ETIME;
-			goto reterror;
-		}
-		spin_unlock_irqrestore(isar->hwlock, flags);
-		if ((isar->iis != ISAR_IIS_DKEY) || isar->cmsb || isar->clsb) {
-			pr_info("ISAR wrong dkey response (%x,%x,%x)\n",
-				isar->iis, isar->cmsb, isar->clsb);
-			ret = 1;
-			goto reterrflg;
-		}
-		while (left > 0) {
-			if (left > 126)
-				noc = 126;
-			else
-				noc = left;
-			nom = (2 * noc) + 3;
-			mp  = isar->buf;
-			/* the ISAR is big endian */
-			*mp++ = blk_head.sadr >> 8;
-			*mp++ = blk_head.sadr & 0xFF;
-			left -= noc;
-			cnt += noc;
-			*mp++ = noc;
-			pr_debug("%s: load %3d words at %04x\n", isar->name,
-				 noc, blk_head.sadr);
-			blk_head.sadr += noc;
-			while (noc) {
-				val = le16_to_cpu(*sp++);
-				*mp++ = val >> 8;
-				*mp++ = val & 0xFF;
-				noc--;
-			}
-			spin_lock_irqsave(isar->hwlock, flags);
-			if (!send_mbox(isar, ISAR_HIS_FIRM, 0, nom, NULL)) {
-				pr_info("ISAR send_mbox prog failed\n");
-				ret = -ETIME;
-				goto reterror;
-			}
-			if (!poll_mbox(isar, 1000)) {
-				pr_info("ISAR poll_mbox prog failed\n");
-				ret = -ETIME;
-				goto reterror;
-			}
-			spin_unlock_irqrestore(isar->hwlock, flags);
-			if ((isar->iis != ISAR_IIS_FIRM) ||
-			    isar->cmsb || isar->clsb) {
-				pr_info("ISAR wrong prog response (%x,%x,%x)\n",
-					isar->iis, isar->cmsb, isar->clsb);
-				ret = -EIO;
-				goto reterrflg;
-			}
-		}
-		pr_debug("%s: ISAR firmware block %d words loaded\n",
-			 isar->name, blk_head.len);
-	}
-	isar->ch[0].bch.debug = saved_debug;
-	/* 10ms delay */
-	cnt = 10;
-	while (cnt--)
-		mdelay(1);
-	isar->buf[0] = 0xff;
-	isar->buf[1] = 0xfe;
-	isar->bstat = 0;
-	spin_lock_irqsave(isar->hwlock, flags);
-	if (!send_mbox(isar, ISAR_HIS_STDSP, 0, 2, NULL)) {
-		pr_info("ISAR send_mbox start dsp failed\n");
-		ret = -ETIME;
-		goto reterror;
-	}
-	if (!poll_mbox(isar, 1000)) {
-		pr_info("ISAR poll_mbox start dsp failed\n");
-		ret = -ETIME;
-		goto reterror;
-	}
-	if ((isar->iis != ISAR_IIS_STDSP) || isar->cmsb || isar->clsb) {
-		pr_info("ISAR wrong start dsp response (%x,%x,%x)\n",
-			isar->iis, isar->cmsb, isar->clsb);
-		ret = -EIO;
-		goto reterror;
-	} else
-		pr_debug("%s: ISAR start dsp success\n", isar->name);
-
-	/* NORMAL mode entered */
-	/* Enable IRQs of ISAR */
-	isar->write_reg(isar->hw, ISAR_IRQBIT, ISAR_IRQSTA);
-	spin_unlock_irqrestore(isar->hwlock, flags);
-	cnt = 1000; /* max 1s */
-	while ((!isar->bstat) && cnt) {
-		mdelay(1);
-		cnt--;
-	}
-	if (!cnt) {
-		pr_info("ISAR no general status event received\n");
-		ret = -ETIME;
-		goto reterrflg;
-	} else
-		pr_debug("%s: ISAR general status event %x\n",
-			 isar->name, isar->bstat);
-	/* 10ms delay */
-	cnt = 10;
-	while (cnt--)
-		mdelay(1);
-	isar->iis = 0;
-	spin_lock_irqsave(isar->hwlock, flags);
-	if (!send_mbox(isar, ISAR_HIS_DIAG, ISAR_CTRL_STST, 0, NULL)) {
-		pr_info("ISAR send_mbox self tst failed\n");
-		ret = -ETIME;
-		goto reterror;
-	}
-	spin_unlock_irqrestore(isar->hwlock, flags);
-	cnt = 10000; /* max 100 ms */
-	while ((isar->iis != ISAR_IIS_DIAG) && cnt) {
-		udelay(10);
-		cnt--;
-	}
-	mdelay(1);
-	if (!cnt) {
-		pr_info("ISAR no self tst response\n");
-		ret = -ETIME;
-		goto reterrflg;
-	}
-	if ((isar->cmsb == ISAR_CTRL_STST) && (isar->clsb == 1)
-	    && (isar->buf[0] == 0))
-		pr_debug("%s: ISAR selftest OK\n", isar->name);
-	else {
-		pr_info("ISAR selftest not OK %x/%x/%x\n",
-			isar->cmsb, isar->clsb, isar->buf[0]);
-		ret = -EIO;
-		goto reterrflg;
-	}
-	spin_lock_irqsave(isar->hwlock, flags);
-	isar->iis = 0;
-	if (!send_mbox(isar, ISAR_HIS_DIAG, ISAR_CTRL_SWVER, 0, NULL)) {
-		pr_info("ISAR RQST SVN failed\n");
-		ret = -ETIME;
-		goto reterror;
-	}
-	spin_unlock_irqrestore(isar->hwlock, flags);
-	cnt = 30000; /* max 300 ms */
-	while ((isar->iis != ISAR_IIS_DIAG) && cnt) {
-		udelay(10);
-		cnt--;
-	}
-	mdelay(1);
-	if (!cnt) {
-		pr_info("ISAR no SVN response\n");
-		ret = -ETIME;
-		goto reterrflg;
-	} else {
-		if ((isar->cmsb == ISAR_CTRL_SWVER) && (isar->clsb == 1)) {
-			pr_notice("%s: ISAR software version %#x\n",
-				  isar->name, isar->buf[0]);
-		} else {
-			pr_info("%s: ISAR wrong swver response (%x,%x)"
-				" cnt(%d)\n", isar->name, isar->cmsb,
-				isar->clsb, cnt);
-			ret = -EIO;
-			goto reterrflg;
-		}
-	}
-	spin_lock_irqsave(isar->hwlock, flags);
-	isar_setup(isar);
-	spin_unlock_irqrestore(isar->hwlock, flags);
-	ret = 0;
-reterrflg:
-	spin_lock_irqsave(isar->hwlock, flags);
-reterror:
-	isar->ch[0].bch.debug = saved_debug;
-	if (ret)
-		/* disable ISAR IRQ */
-		isar->write_reg(isar->hw, ISAR_IRQBIT, 0);
-	spin_unlock_irqrestore(isar->hwlock, flags);
-	return ret;
-}
-
-static inline void
-deliver_status(struct isar_ch *ch, int status)
-{
-	pr_debug("%s: HL->LL FAXIND %x\n", ch->is->name, status);
-	_queue_data(&ch->bch.ch, PH_CONTROL_IND, status, 0, NULL, GFP_ATOMIC);
-}
-
-static inline void
-isar_rcv_frame(struct isar_ch *ch)
-{
-	u8	*ptr;
-	int	maxlen;
-
-	if (!ch->is->clsb) {
-		pr_debug("%s; ISAR zero len frame\n", ch->is->name);
-		ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
-		return;
-	}
-	if (test_bit(FLG_RX_OFF, &ch->bch.Flags)) {
-		ch->bch.dropcnt += ch->is->clsb;
-		ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
-		return;
-	}
-	switch (ch->bch.state) {
-	case ISDN_P_NONE:
-		pr_debug("%s: ISAR protocol 0 spurious IIS_RDATA %x/%x/%x\n",
-			 ch->is->name, ch->is->iis, ch->is->cmsb, ch->is->clsb);
-		ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
-		break;
-	case ISDN_P_B_RAW:
-	case ISDN_P_B_L2DTMF:
-	case ISDN_P_B_MODEM_ASYNC:
-		maxlen = bchannel_get_rxbuf(&ch->bch, ch->is->clsb);
-		if (maxlen < 0) {
-			pr_warn("%s.B%d: No bufferspace for %d bytes\n",
-				ch->is->name, ch->bch.nr, ch->is->clsb);
-			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
-			break;
-		}
-		rcv_mbox(ch->is, skb_put(ch->bch.rx_skb, ch->is->clsb));
-		recv_Bchannel(&ch->bch, 0, false);
-		break;
-	case ISDN_P_B_HDLC:
-		maxlen = bchannel_get_rxbuf(&ch->bch, ch->is->clsb);
-		if (maxlen < 0) {
-			pr_warn("%s.B%d: No bufferspace for %d bytes\n",
-				ch->is->name, ch->bch.nr, ch->is->clsb);
-			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
-			break;
-		}
-		if (ch->is->cmsb & HDLC_ERROR) {
-			pr_debug("%s: ISAR frame error %x len %d\n",
-				 ch->is->name, ch->is->cmsb, ch->is->clsb);
-#ifdef ERROR_STATISTIC
-			if (ch->is->cmsb & HDLC_ERR_RER)
-				ch->bch.err_inv++;
-			if (ch->is->cmsb & HDLC_ERR_CER)
-				ch->bch.err_crc++;
-#endif
-			skb_trim(ch->bch.rx_skb, 0);
-			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
-			break;
-		}
-		if (ch->is->cmsb & HDLC_FSD)
-			skb_trim(ch->bch.rx_skb, 0);
-		ptr = skb_put(ch->bch.rx_skb, ch->is->clsb);
-		rcv_mbox(ch->is, ptr);
-		if (ch->is->cmsb & HDLC_FED) {
-			if (ch->bch.rx_skb->len < 3) { /* last 2 are the FCS */
-				pr_debug("%s: ISAR frame too short %d\n",
-					 ch->is->name, ch->bch.rx_skb->len);
-				skb_trim(ch->bch.rx_skb, 0);
-				break;
-			}
-			skb_trim(ch->bch.rx_skb, ch->bch.rx_skb->len - 2);
-			recv_Bchannel(&ch->bch, 0, false);
-		}
-		break;
-	case ISDN_P_B_T30_FAX:
-		if (ch->state != STFAX_ACTIV) {
-			pr_debug("%s: isar_rcv_frame: not ACTIV\n",
-				 ch->is->name);
-			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
-			if (ch->bch.rx_skb)
-				skb_trim(ch->bch.rx_skb, 0);
-			break;
-		}
-		if (!ch->bch.rx_skb) {
-			ch->bch.rx_skb = mI_alloc_skb(ch->bch.maxlen,
-						      GFP_ATOMIC);
-			if (unlikely(!ch->bch.rx_skb)) {
-				pr_info("%s: B receive out of memory\n",
-					__func__);
-				ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
-				break;
-			}
-		}
-		if (ch->cmd == PCTRL_CMD_FRM) {
-			rcv_mbox(ch->is, skb_put(ch->bch.rx_skb, ch->is->clsb));
-			pr_debug("%s: isar_rcv_frame: %d\n",
-				 ch->is->name, ch->bch.rx_skb->len);
-			if (ch->is->cmsb & SART_NMD) { /* ABORT */
-				pr_debug("%s: isar_rcv_frame: no more data\n",
-					 ch->is->name);
-				ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
-				send_mbox(ch->is, SET_DPS(ch->dpath) |
-					  ISAR_HIS_PUMPCTRL, PCTRL_CMD_ESC,
-					  0, NULL);
-				ch->state = STFAX_ESCAPE;
-				/* set_skb_flag(skb, DF_NOMOREDATA); */
-			}
-			recv_Bchannel(&ch->bch, 0, false);
-			if (ch->is->cmsb & SART_NMD)
-				deliver_status(ch, HW_MOD_NOCARR);
-			break;
-		}
-		if (ch->cmd != PCTRL_CMD_FRH) {
-			pr_debug("%s: isar_rcv_frame: unknown fax mode %x\n",
-				 ch->is->name, ch->cmd);
-			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
-			if (ch->bch.rx_skb)
-				skb_trim(ch->bch.rx_skb, 0);
-			break;
-		}
-		/* PCTRL_CMD_FRH */
-		if ((ch->bch.rx_skb->len + ch->is->clsb) >
-		    (ch->bch.maxlen + 2)) {
-			pr_info("%s: %s incoming packet too large\n",
-				ch->is->name, __func__);
-			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
-			skb_trim(ch->bch.rx_skb, 0);
-			break;
-		}  else if (ch->is->cmsb & HDLC_ERROR) {
-			pr_info("%s: ISAR frame error %x len %d\n",
-				ch->is->name, ch->is->cmsb, ch->is->clsb);
-			skb_trim(ch->bch.rx_skb, 0);
-			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
-			break;
-		}
-		if (ch->is->cmsb & HDLC_FSD)
-			skb_trim(ch->bch.rx_skb, 0);
-		ptr = skb_put(ch->bch.rx_skb, ch->is->clsb);
-		rcv_mbox(ch->is, ptr);
-		if (ch->is->cmsb & HDLC_FED) {
-			if (ch->bch.rx_skb->len < 3) { /* last 2 are the FCS */
-				pr_info("%s: ISAR frame too short %d\n",
-					ch->is->name, ch->bch.rx_skb->len);
-				skb_trim(ch->bch.rx_skb, 0);
-				break;
-			}
-			skb_trim(ch->bch.rx_skb, ch->bch.rx_skb->len - 2);
-			recv_Bchannel(&ch->bch, 0, false);
-		}
-		if (ch->is->cmsb & SART_NMD) { /* ABORT */
-			pr_debug("%s: isar_rcv_frame: no more data\n",
-				 ch->is->name);
-			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
-			if (ch->bch.rx_skb)
-				skb_trim(ch->bch.rx_skb, 0);
-			send_mbox(ch->is, SET_DPS(ch->dpath) |
-				  ISAR_HIS_PUMPCTRL, PCTRL_CMD_ESC, 0, NULL);
-			ch->state = STFAX_ESCAPE;
-			deliver_status(ch, HW_MOD_NOCARR);
-		}
-		break;
-	default:
-		pr_info("isar_rcv_frame protocol (%x)error\n", ch->bch.state);
-		ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
-		break;
-	}
-}
-
-static void
-isar_fill_fifo(struct isar_ch *ch)
-{
-	int count;
-	u8 msb;
-	u8 *ptr;
-
-	pr_debug("%s: ch%d  tx_skb %d tx_idx %d\n", ch->is->name, ch->bch.nr,
-		 ch->bch.tx_skb ? ch->bch.tx_skb->len : -1, ch->bch.tx_idx);
-	if (!(ch->is->bstat &
-	      (ch->dpath == 1 ? BSTAT_RDM1 : BSTAT_RDM2)))
-		return;
-	if (!ch->bch.tx_skb) {
-		if (!test_bit(FLG_TX_EMPTY, &ch->bch.Flags) ||
-		    (ch->bch.state != ISDN_P_B_RAW))
-			return;
-		count = ch->mml;
-		/* use the card buffer */
-		memset(ch->is->buf, ch->bch.fill[0], count);
-		send_mbox(ch->is, SET_DPS(ch->dpath) | ISAR_HIS_SDATA,
-			  0, count, ch->is->buf);
-		return;
-	}
-	count = ch->bch.tx_skb->len - ch->bch.tx_idx;
-	if (count <= 0)
-		return;
-	if (count > ch->mml) {
-		msb = 0;
-		count = ch->mml;
-	} else {
-		msb = HDLC_FED;
-	}
-	ptr = ch->bch.tx_skb->data + ch->bch.tx_idx;
-	if (!ch->bch.tx_idx) {
-		pr_debug("%s: frame start\n", ch->is->name);
-		if ((ch->bch.state == ISDN_P_B_T30_FAX) &&
-		    (ch->cmd == PCTRL_CMD_FTH)) {
-			if (count > 1) {
-				if ((ptr[0] == 0xff) && (ptr[1] == 0x13)) {
-					/* last frame */
-					test_and_set_bit(FLG_LASTDATA,
-							 &ch->bch.Flags);
-					pr_debug("%s: set LASTDATA\n",
-						 ch->is->name);
-					if (msb == HDLC_FED)
-						test_and_set_bit(FLG_DLEETX,
-								 &ch->bch.Flags);
-				}
-			}
-		}
-		msb |= HDLC_FST;
-	}
-	ch->bch.tx_idx += count;
-	switch (ch->bch.state) {
-	case ISDN_P_NONE:
-		pr_info("%s: wrong protocol 0\n", __func__);
-		break;
-	case ISDN_P_B_RAW:
-	case ISDN_P_B_L2DTMF:
-	case ISDN_P_B_MODEM_ASYNC:
-		send_mbox(ch->is, SET_DPS(ch->dpath) | ISAR_HIS_SDATA,
-			  0, count, ptr);
-		break;
-	case ISDN_P_B_HDLC:
-		send_mbox(ch->is, SET_DPS(ch->dpath) | ISAR_HIS_SDATA,
-			  msb, count, ptr);
-		break;
-	case ISDN_P_B_T30_FAX:
-		if (ch->state != STFAX_ACTIV)
-			pr_debug("%s: not ACTIV\n", ch->is->name);
-		else if (ch->cmd == PCTRL_CMD_FTH)
-			send_mbox(ch->is, SET_DPS(ch->dpath) | ISAR_HIS_SDATA,
-				  msb, count, ptr);
-		else if (ch->cmd == PCTRL_CMD_FTM)
-			send_mbox(ch->is, SET_DPS(ch->dpath) | ISAR_HIS_SDATA,
-				  0, count, ptr);
-		else
-			pr_debug("%s: not FTH/FTM\n", ch->is->name);
-		break;
-	default:
-		pr_info("%s: protocol(%x) error\n",
-			__func__, ch->bch.state);
-		break;
-	}
-}
-
-static inline struct isar_ch *
-sel_bch_isar(struct isar_hw *isar, u8 dpath)
-{
-	struct isar_ch	*base = &isar->ch[0];
-
-	if ((!dpath) || (dpath > 2))
-		return NULL;
-	if (base->dpath == dpath)
-		return base;
-	base++;
-	if (base->dpath == dpath)
-		return base;
-	return NULL;
-}
-
-static void
-send_next(struct isar_ch *ch)
-{
-	pr_debug("%s: %s ch%d tx_skb %d tx_idx %d\n", ch->is->name, __func__,
-		 ch->bch.nr, ch->bch.tx_skb ? ch->bch.tx_skb->len : -1,
-		 ch->bch.tx_idx);
-	if (ch->bch.state == ISDN_P_B_T30_FAX) {
-		if (ch->cmd == PCTRL_CMD_FTH) {
-			if (test_bit(FLG_LASTDATA, &ch->bch.Flags)) {
-				pr_debug("set NMD_DATA\n");
-				test_and_set_bit(FLG_NMD_DATA, &ch->bch.Flags);
-			}
-		} else if (ch->cmd == PCTRL_CMD_FTM) {
-			if (test_bit(FLG_DLEETX, &ch->bch.Flags)) {
-				test_and_set_bit(FLG_LASTDATA, &ch->bch.Flags);
-				test_and_set_bit(FLG_NMD_DATA, &ch->bch.Flags);
-			}
-		}
-	}
-	dev_kfree_skb(ch->bch.tx_skb);
-	if (get_next_bframe(&ch->bch)) {
-		isar_fill_fifo(ch);
-		test_and_clear_bit(FLG_TX_EMPTY, &ch->bch.Flags);
-	} else if (test_bit(FLG_TX_EMPTY, &ch->bch.Flags)) {
-		isar_fill_fifo(ch);
-	} else {
-		if (test_and_clear_bit(FLG_DLEETX, &ch->bch.Flags)) {
-			if (test_and_clear_bit(FLG_LASTDATA,
-					       &ch->bch.Flags)) {
-				if (test_and_clear_bit(FLG_NMD_DATA,
-						       &ch->bch.Flags)) {
-					u8 zd = 0;
-					send_mbox(ch->is, SET_DPS(ch->dpath) |
-						  ISAR_HIS_SDATA, 0x01, 1, &zd);
-				}
-				test_and_set_bit(FLG_LL_OK, &ch->bch.Flags);
-			} else {
-				deliver_status(ch, HW_MOD_CONNECT);
-			}
-		} else if (test_bit(FLG_FILLEMPTY, &ch->bch.Flags)) {
-			test_and_set_bit(FLG_TX_EMPTY, &ch->bch.Flags);
-		}
-	}
-}
-
-static void
-check_send(struct isar_hw *isar, u8 rdm)
-{
-	struct isar_ch	*ch;
-
-	pr_debug("%s: rdm %x\n", isar->name, rdm);
-	if (rdm & BSTAT_RDM1) {
-		ch = sel_bch_isar(isar, 1);
-		if (ch && test_bit(FLG_ACTIVE, &ch->bch.Flags)) {
-			if (ch->bch.tx_skb && (ch->bch.tx_skb->len >
-					       ch->bch.tx_idx))
-				isar_fill_fifo(ch);
-			else
-				send_next(ch);
-		}
-	}
-	if (rdm & BSTAT_RDM2) {
-		ch = sel_bch_isar(isar, 2);
-		if (ch && test_bit(FLG_ACTIVE, &ch->bch.Flags)) {
-			if (ch->bch.tx_skb && (ch->bch.tx_skb->len >
-					       ch->bch.tx_idx))
-				isar_fill_fifo(ch);
-			else
-				send_next(ch);
-		}
-	}
-}
-
-static const char *dmril[] = {"NO SPEED", "1200/75", "NODEF2", "75/1200", "NODEF4",
-		       "300", "600", "1200", "2400", "4800", "7200",
-		       "9600nt", "9600t", "12000", "14400", "WRONG"};
-static const char *dmrim[] = {"NO MOD", "NO DEF", "V32/V32b", "V22", "V21",
-		       "Bell103", "V23", "Bell202", "V17", "V29", "V27ter"};
-
-static void
-isar_pump_status_rsp(struct isar_ch *ch) {
-	u8 ril = ch->is->buf[0];
-	u8 rim;
-
-	if (!test_and_clear_bit(ISAR_RATE_REQ, &ch->is->Flags))
-		return;
-	if (ril > 14) {
-		pr_info("%s: wrong pstrsp ril=%d\n", ch->is->name, ril);
-		ril = 15;
-	}
-	switch (ch->is->buf[1]) {
-	case 0:
-		rim = 0;
-		break;
-	case 0x20:
-		rim = 2;
-		break;
-	case 0x40:
-		rim = 3;
-		break;
-	case 0x41:
-		rim = 4;
-		break;
-	case 0x51:
-		rim = 5;
-		break;
-	case 0x61:
-		rim = 6;
-		break;
-	case 0x71:
-		rim = 7;
-		break;
-	case 0x82:
-		rim = 8;
-		break;
-	case 0x92:
-		rim = 9;
-		break;
-	case 0xa2:
-		rim = 10;
-		break;
-	default:
-		rim = 1;
-		break;
-	}
-	sprintf(ch->conmsg, "%s %s", dmril[ril], dmrim[rim]);
-	pr_debug("%s: pump strsp %s\n", ch->is->name, ch->conmsg);
-}
-
-static void
-isar_pump_statev_modem(struct isar_ch *ch, u8 devt) {
-	u8 dps = SET_DPS(ch->dpath);
-
-	switch (devt) {
-	case PSEV_10MS_TIMER:
-		pr_debug("%s: pump stev TIMER\n", ch->is->name);
-		break;
-	case PSEV_CON_ON:
-		pr_debug("%s: pump stev CONNECT\n", ch->is->name);
-		deliver_status(ch, HW_MOD_CONNECT);
-		break;
-	case PSEV_CON_OFF:
-		pr_debug("%s: pump stev NO CONNECT\n", ch->is->name);
-		send_mbox(ch->is, dps | ISAR_HIS_PSTREQ, 0, 0, NULL);
-		deliver_status(ch, HW_MOD_NOCARR);
-		break;
-	case PSEV_V24_OFF:
-		pr_debug("%s: pump stev V24 OFF\n", ch->is->name);
-		break;
-	case PSEV_CTS_ON:
-		pr_debug("%s: pump stev CTS ON\n", ch->is->name);
-		break;
-	case PSEV_CTS_OFF:
-		pr_debug("%s pump stev CTS OFF\n", ch->is->name);
-		break;
-	case PSEV_DCD_ON:
-		pr_debug("%s: pump stev CARRIER ON\n", ch->is->name);
-		test_and_set_bit(ISAR_RATE_REQ, &ch->is->Flags);
-		send_mbox(ch->is, dps | ISAR_HIS_PSTREQ, 0, 0, NULL);
-		break;
-	case PSEV_DCD_OFF:
-		pr_debug("%s: pump stev CARRIER OFF\n", ch->is->name);
-		break;
-	case PSEV_DSR_ON:
-		pr_debug("%s: pump stev DSR ON\n", ch->is->name);
-		break;
-	case PSEV_DSR_OFF:
-		pr_debug("%s: pump stev DSR_OFF\n", ch->is->name);
-		break;
-	case PSEV_REM_RET:
-		pr_debug("%s: pump stev REMOTE RETRAIN\n", ch->is->name);
-		break;
-	case PSEV_REM_REN:
-		pr_debug("%s: pump stev REMOTE RENEGOTIATE\n", ch->is->name);
-		break;
-	case PSEV_GSTN_CLR:
-		pr_debug("%s: pump stev GSTN CLEAR\n", ch->is->name);
-		break;
-	default:
-		pr_info("u%s: unknown pump stev %x\n", ch->is->name, devt);
-		break;
-	}
-}
-
-static void
-isar_pump_statev_fax(struct isar_ch *ch, u8 devt) {
-	u8 dps = SET_DPS(ch->dpath);
-	u8 p1;
-
-	switch (devt) {
-	case PSEV_10MS_TIMER:
-		pr_debug("%s: pump stev TIMER\n", ch->is->name);
-		break;
-	case PSEV_RSP_READY:
-		pr_debug("%s: pump stev RSP_READY\n", ch->is->name);
-		ch->state = STFAX_READY;
-		deliver_status(ch, HW_MOD_READY);
-#ifdef AUTOCON
-		if (test_bit(BC_FLG_ORIG, &ch->bch.Flags))
-			isar_pump_cmd(bch, HW_MOD_FRH, 3);
-		else
-			isar_pump_cmd(bch, HW_MOD_FTH, 3);
-#endif
-		break;
-	case PSEV_LINE_TX_H:
-		if (ch->state == STFAX_LINE) {
-			pr_debug("%s: pump stev LINE_TX_H\n", ch->is->name);
-			ch->state = STFAX_CONT;
-			send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
-				  PCTRL_CMD_CONT, 0, NULL);
-		} else {
-			pr_debug("%s: pump stev LINE_TX_H wrong st %x\n",
-				 ch->is->name, ch->state);
-		}
-		break;
-	case PSEV_LINE_RX_H:
-		if (ch->state == STFAX_LINE) {
-			pr_debug("%s: pump stev LINE_RX_H\n", ch->is->name);
-			ch->state = STFAX_CONT;
-			send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
-				  PCTRL_CMD_CONT, 0, NULL);
-		} else {
-			pr_debug("%s: pump stev LINE_RX_H wrong st %x\n",
-				 ch->is->name, ch->state);
-		}
-		break;
-	case PSEV_LINE_TX_B:
-		if (ch->state == STFAX_LINE) {
-			pr_debug("%s: pump stev LINE_TX_B\n", ch->is->name);
-			ch->state = STFAX_CONT;
-			send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
-				  PCTRL_CMD_CONT, 0, NULL);
-		} else {
-			pr_debug("%s: pump stev LINE_TX_B wrong st %x\n",
-				 ch->is->name, ch->state);
-		}
-		break;
-	case PSEV_LINE_RX_B:
-		if (ch->state == STFAX_LINE) {
-			pr_debug("%s: pump stev LINE_RX_B\n", ch->is->name);
-			ch->state = STFAX_CONT;
-			send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
-				  PCTRL_CMD_CONT, 0, NULL);
-		} else {
-			pr_debug("%s: pump stev LINE_RX_B wrong st %x\n",
-				 ch->is->name, ch->state);
-		}
-		break;
-	case PSEV_RSP_CONN:
-		if (ch->state == STFAX_CONT) {
-			pr_debug("%s: pump stev RSP_CONN\n", ch->is->name);
-			ch->state = STFAX_ACTIV;
-			test_and_set_bit(ISAR_RATE_REQ, &ch->is->Flags);
-			send_mbox(ch->is, dps | ISAR_HIS_PSTREQ, 0, 0, NULL);
-			if (ch->cmd == PCTRL_CMD_FTH) {
-				int delay = (ch->mod == 3) ? 1000 : 200;
-				/* 1s (200 ms) Flags before data */
-				if (test_and_set_bit(FLG_FTI_RUN,
-						     &ch->bch.Flags))
-					timer_delete(&ch->ftimer);
-				ch->ftimer.expires =
-					jiffies + ((delay * HZ) / 1000);
-				test_and_set_bit(FLG_LL_CONN,
-						 &ch->bch.Flags);
-				add_timer(&ch->ftimer);
-			} else {
-				deliver_status(ch, HW_MOD_CONNECT);
-			}
-		} else {
-			pr_debug("%s: pump stev RSP_CONN wrong st %x\n",
-				 ch->is->name, ch->state);
-		}
-		break;
-	case PSEV_FLAGS_DET:
-		pr_debug("%s: pump stev FLAGS_DET\n", ch->is->name);
-		break;
-	case PSEV_RSP_DISC:
-		pr_debug("%s: pump stev RSP_DISC state(%d)\n",
-			 ch->is->name, ch->state);
-		if (ch->state == STFAX_ESCAPE) {
-			p1 = 5;
-			switch (ch->newcmd) {
-			case 0:
-				ch->state = STFAX_READY;
-				break;
-			case PCTRL_CMD_FTM:
-				p1 = 2;
-				fallthrough;
-			case PCTRL_CMD_FTH:
-				send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
-					  PCTRL_CMD_SILON, 1, &p1);
-				ch->state = STFAX_SILDET;
-				break;
-			case PCTRL_CMD_FRH:
-			case PCTRL_CMD_FRM:
-				ch->mod = ch->newmod;
-				p1 = ch->newmod;
-				ch->newmod = 0;
-				ch->cmd = ch->newcmd;
-				ch->newcmd = 0;
-				send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
-					  ch->cmd, 1, &p1);
-				ch->state = STFAX_LINE;
-				ch->try_mod = 3;
-				break;
-			default:
-				pr_debug("%s: RSP_DISC unknown newcmd %x\n",
-					 ch->is->name, ch->newcmd);
-				break;
-			}
-		} else if (ch->state == STFAX_ACTIV) {
-			if (test_and_clear_bit(FLG_LL_OK, &ch->bch.Flags))
-				deliver_status(ch, HW_MOD_OK);
-			else if (ch->cmd == PCTRL_CMD_FRM)
-				deliver_status(ch, HW_MOD_NOCARR);
-			else
-				deliver_status(ch, HW_MOD_FCERROR);
-			ch->state = STFAX_READY;
-		} else if (ch->state != STFAX_SILDET) {
-			/* ignore in STFAX_SILDET */
-			ch->state = STFAX_READY;
-			deliver_status(ch, HW_MOD_FCERROR);
-		}
-		break;
-	case PSEV_RSP_SILDET:
-		pr_debug("%s: pump stev RSP_SILDET\n", ch->is->name);
-		if (ch->state == STFAX_SILDET) {
-			ch->mod = ch->newmod;
-			p1 = ch->newmod;
-			ch->newmod = 0;
-			ch->cmd = ch->newcmd;
-			ch->newcmd = 0;
-			send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
-				  ch->cmd, 1, &p1);
-			ch->state = STFAX_LINE;
-			ch->try_mod = 3;
-		}
-		break;
-	case PSEV_RSP_SILOFF:
-		pr_debug("%s: pump stev RSP_SILOFF\n", ch->is->name);
-		break;
-	case PSEV_RSP_FCERR:
-		if (ch->state == STFAX_LINE) {
-			pr_debug("%s: pump stev RSP_FCERR try %d\n",
-				 ch->is->name, ch->try_mod);
-			if (ch->try_mod--) {
-				send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
-					  ch->cmd, 1, &ch->mod);
-				break;
-			}
-		}
-		pr_debug("%s: pump stev RSP_FCERR\n", ch->is->name);
-		ch->state = STFAX_ESCAPE;
-		send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL, PCTRL_CMD_ESC,
-			  0, NULL);
-		deliver_status(ch, HW_MOD_FCERROR);
-		break;
-	default:
-		break;
-	}
-}
-
-void
-mISDNisar_irq(struct isar_hw *isar)
-{
-	struct isar_ch *ch;
-
-	get_irq_infos(isar);
-	switch (isar->iis & ISAR_IIS_MSCMSD) {
-	case ISAR_IIS_RDATA:
-		ch = sel_bch_isar(isar, isar->iis >> 6);
-		if (ch)
-			isar_rcv_frame(ch);
-		else {
-			pr_debug("%s: ISAR spurious IIS_RDATA %x/%x/%x\n",
-				 isar->name, isar->iis, isar->cmsb,
-				 isar->clsb);
-			isar->write_reg(isar->hw, ISAR_IIA, 0);
-		}
-		break;
-	case ISAR_IIS_GSTEV:
-		isar->write_reg(isar->hw, ISAR_IIA, 0);
-		isar->bstat |= isar->cmsb;
-		check_send(isar, isar->cmsb);
-		break;
-	case ISAR_IIS_BSTEV:
-#ifdef ERROR_STATISTIC
-		ch = sel_bch_isar(isar, isar->iis >> 6);
-		if (ch) {
-			if (isar->cmsb == BSTEV_TBO)
-				ch->bch.err_tx++;
-			if (isar->cmsb == BSTEV_RBO)
-				ch->bch.err_rdo++;
-		}
-#endif
-		pr_debug("%s: Buffer STEV dpath%d msb(%x)\n",
-			 isar->name, isar->iis >> 6, isar->cmsb);
-		isar->write_reg(isar->hw, ISAR_IIA, 0);
-		break;
-	case ISAR_IIS_PSTEV:
-		ch = sel_bch_isar(isar, isar->iis >> 6);
-		if (ch) {
-			rcv_mbox(isar, NULL);
-			if (ch->bch.state == ISDN_P_B_MODEM_ASYNC)
-				isar_pump_statev_modem(ch, isar->cmsb);
-			else if (ch->bch.state == ISDN_P_B_T30_FAX)
-				isar_pump_statev_fax(ch, isar->cmsb);
-			else if (ch->bch.state == ISDN_P_B_RAW) {
-				int	tt;
-				tt = isar->cmsb | 0x30;
-				if (tt == 0x3e)
-					tt = '*';
-				else if (tt == 0x3f)
-					tt = '#';
-				else if (tt > '9')
-					tt += 7;
-				tt |= DTMF_TONE_VAL;
-				_queue_data(&ch->bch.ch, PH_CONTROL_IND,
-					    MISDN_ID_ANY, sizeof(tt), &tt,
-					    GFP_ATOMIC);
-			} else
-				pr_debug("%s: ISAR IIS_PSTEV pm %d sta %x\n",
-					 isar->name, ch->bch.state,
-					 isar->cmsb);
-		} else {
-			pr_debug("%s: ISAR spurious IIS_PSTEV %x/%x/%x\n",
-				 isar->name, isar->iis, isar->cmsb,
-				 isar->clsb);
-			isar->write_reg(isar->hw, ISAR_IIA, 0);
-		}
-		break;
-	case ISAR_IIS_PSTRSP:
-		ch = sel_bch_isar(isar, isar->iis >> 6);
-		if (ch) {
-			rcv_mbox(isar, NULL);
-			isar_pump_status_rsp(ch);
-		} else {
-			pr_debug("%s: ISAR spurious IIS_PSTRSP %x/%x/%x\n",
-				 isar->name, isar->iis, isar->cmsb,
-				 isar->clsb);
-			isar->write_reg(isar->hw, ISAR_IIA, 0);
-		}
-		break;
-	case ISAR_IIS_DIAG:
-	case ISAR_IIS_BSTRSP:
-	case ISAR_IIS_IOM2RSP:
-		rcv_mbox(isar, NULL);
-		break;
-	case ISAR_IIS_INVMSG:
-		rcv_mbox(isar, NULL);
-		pr_debug("%s: invalid msg his:%x\n", isar->name, isar->cmsb);
-		break;
-	default:
-		rcv_mbox(isar, NULL);
-		pr_debug("%s: unhandled msg iis(%x) ctrl(%x/%x)\n",
-			 isar->name, isar->iis, isar->cmsb, isar->clsb);
-		break;
-	}
-}
-EXPORT_SYMBOL(mISDNisar_irq);
-
-static void
-ftimer_handler(struct timer_list *t)
-{
-	struct isar_ch *ch = timer_container_of(ch, t, ftimer);
-
-	pr_debug("%s: ftimer flags %lx\n", ch->is->name, ch->bch.Flags);
-	test_and_clear_bit(FLG_FTI_RUN, &ch->bch.Flags);
-	if (test_and_clear_bit(FLG_LL_CONN, &ch->bch.Flags))
-		deliver_status(ch, HW_MOD_CONNECT);
-}
-
-static void
-setup_pump(struct isar_ch *ch) {
-	u8 dps = SET_DPS(ch->dpath);
-	u8 ctrl, param[6];
-
-	switch (ch->bch.state) {
-	case ISDN_P_NONE:
-	case ISDN_P_B_RAW:
-	case ISDN_P_B_HDLC:
-		send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG, PMOD_BYPASS, 0, NULL);
-		break;
-	case ISDN_P_B_L2DTMF:
-		if (test_bit(FLG_DTMFSEND, &ch->bch.Flags)) {
-			param[0] = 5; /* TOA 5 db */
-			send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG,
-				  PMOD_DTMF_TRANS, 1, param);
-		} else {
-			param[0] = 40; /* REL -46 dbm */
-			send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG,
-				  PMOD_DTMF, 1, param);
-		}
-		fallthrough;
-	case ISDN_P_B_MODEM_ASYNC:
-		ctrl = PMOD_DATAMODEM;
-		if (test_bit(FLG_ORIGIN, &ch->bch.Flags)) {
-			ctrl |= PCTRL_ORIG;
-			param[5] = PV32P6_CTN;
-		} else {
-			param[5] = PV32P6_ATN;
-		}
-		param[0] = 6; /* 6 db */
-		param[1] = PV32P2_V23R | PV32P2_V22A | PV32P2_V22B |
-			PV32P2_V22C | PV32P2_V21 | PV32P2_BEL;
-		param[2] = PV32P3_AMOD | PV32P3_V32B | PV32P3_V23B;
-		param[3] = PV32P4_UT144;
-		param[4] = PV32P5_UT144;
-		send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG, ctrl, 6, param);
-		break;
-	case ISDN_P_B_T30_FAX:
-		ctrl = PMOD_FAX;
-		if (test_bit(FLG_ORIGIN, &ch->bch.Flags)) {
-			ctrl |= PCTRL_ORIG;
-			param[1] = PFAXP2_CTN;
-		} else {
-			param[1] = PFAXP2_ATN;
-		}
-		param[0] = 6; /* 6 db */
-		send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG, ctrl, 2, param);
-		ch->state = STFAX_NULL;
-		ch->newcmd = 0;
-		ch->newmod = 0;
-		test_and_set_bit(FLG_FTI_RUN, &ch->bch.Flags);
-		break;
-	}
-	udelay(1000);
-	send_mbox(ch->is, dps | ISAR_HIS_PSTREQ, 0, 0, NULL);
-	udelay(1000);
-}
-
-static void
-setup_sart(struct isar_ch *ch) {
-	u8 dps = SET_DPS(ch->dpath);
-	u8 ctrl, param[2] = {0, 0};
-
-	switch (ch->bch.state) {
-	case ISDN_P_NONE:
-		send_mbox(ch->is, dps | ISAR_HIS_SARTCFG, SMODE_DISABLE,
-			  0, NULL);
-		break;
-	case ISDN_P_B_RAW:
-	case ISDN_P_B_L2DTMF:
-		send_mbox(ch->is, dps | ISAR_HIS_SARTCFG, SMODE_BINARY,
-			  2, param);
-		break;
-	case ISDN_P_B_HDLC:
-	case ISDN_P_B_T30_FAX:
-		send_mbox(ch->is, dps | ISAR_HIS_SARTCFG, SMODE_HDLC,
-			  1, param);
-		break;
-	case ISDN_P_B_MODEM_ASYNC:
-		ctrl = SMODE_V14 | SCTRL_HDMC_BOTH;
-		param[0] = S_P1_CHS_8;
-		param[1] = S_P2_BFT_DEF;
-		send_mbox(ch->is, dps | ISAR_HIS_SARTCFG, ctrl, 2, param);
-		break;
-	}
-	udelay(1000);
-	send_mbox(ch->is, dps | ISAR_HIS_BSTREQ, 0, 0, NULL);
-	udelay(1000);
-}
-
-static void
-setup_iom2(struct isar_ch *ch) {
-	u8 dps = SET_DPS(ch->dpath);
-	u8 cmsb = IOM_CTRL_ENA, msg[5] = {IOM_P1_TXD, 0, 0, 0, 0};
-
-	if (ch->bch.nr == 2) {
-		msg[1] = 1;
-		msg[3] = 1;
-	}
-	switch (ch->bch.state) {
-	case ISDN_P_NONE:
-		cmsb = 0;
-		/* dummy slot */
-		msg[1] = ch->dpath + 2;
-		msg[3] = ch->dpath + 2;
-		break;
-	case ISDN_P_B_RAW:
-	case ISDN_P_B_HDLC:
-		break;
-	case ISDN_P_B_MODEM_ASYNC:
-	case ISDN_P_B_T30_FAX:
-		cmsb |= IOM_CTRL_RCV;
-		fallthrough;
-	case ISDN_P_B_L2DTMF:
-		if (test_bit(FLG_DTMFSEND, &ch->bch.Flags))
-			cmsb |= IOM_CTRL_RCV;
-		cmsb |= IOM_CTRL_ALAW;
-		break;
-	}
-	send_mbox(ch->is, dps | ISAR_HIS_IOM2CFG, cmsb, 5, msg);
-	udelay(1000);
-	send_mbox(ch->is, dps | ISAR_HIS_IOM2REQ, 0, 0, NULL);
-	udelay(1000);
-}
-
-static int
-modeisar(struct isar_ch *ch, u32 bprotocol)
-{
-	/* Here we are selecting the best datapath for requested protocol */
-	if (ch->bch.state == ISDN_P_NONE) { /* New Setup */
-		switch (bprotocol) {
-		case ISDN_P_NONE: /* init */
-			if (!ch->dpath)
-				/* no init for dpath 0 */
-				return 0;
-			test_and_clear_bit(FLG_HDLC, &ch->bch.Flags);
-			test_and_clear_bit(FLG_TRANSPARENT, &ch->bch.Flags);
-			break;
-		case ISDN_P_B_RAW:
-		case ISDN_P_B_HDLC:
-			/* best is datapath 2 */
-			if (!test_and_set_bit(ISAR_DP2_USE, &ch->is->Flags))
-				ch->dpath = 2;
-			else if (!test_and_set_bit(ISAR_DP1_USE,
-						   &ch->is->Flags))
-				ch->dpath = 1;
-			else {
-				pr_info("modeisar both paths in use\n");
-				return -EBUSY;
-			}
-			if (bprotocol == ISDN_P_B_HDLC)
-				test_and_set_bit(FLG_HDLC, &ch->bch.Flags);
-			else
-				test_and_set_bit(FLG_TRANSPARENT,
-						 &ch->bch.Flags);
-			break;
-		case ISDN_P_B_MODEM_ASYNC:
-		case ISDN_P_B_T30_FAX:
-		case ISDN_P_B_L2DTMF:
-			/* only datapath 1 */
-			if (!test_and_set_bit(ISAR_DP1_USE, &ch->is->Flags))
-				ch->dpath = 1;
-			else {
-				pr_info("%s: ISAR modeisar analog functions"
-					"only with DP1\n", ch->is->name);
-				return -EBUSY;
-			}
-			break;
-		default:
-			pr_info("%s: protocol not known %x\n", ch->is->name,
-				bprotocol);
-			return -ENOPROTOOPT;
-		}
-	}
-	pr_debug("%s: ISAR ch%d dp%d protocol %x->%x\n", ch->is->name,
-		 ch->bch.nr, ch->dpath, ch->bch.state, bprotocol);
-	ch->bch.state = bprotocol;
-	setup_pump(ch);
-	setup_iom2(ch);
-	setup_sart(ch);
-	if (ch->bch.state == ISDN_P_NONE) {
-		/* Clear resources */
-		if (ch->dpath == 1)
-			test_and_clear_bit(ISAR_DP1_USE, &ch->is->Flags);
-		else if (ch->dpath == 2)
-			test_and_clear_bit(ISAR_DP2_USE, &ch->is->Flags);
-		ch->dpath = 0;
-		ch->is->ctrl(ch->is->hw, HW_DEACT_IND, ch->bch.nr);
-	} else
-		ch->is->ctrl(ch->is->hw, HW_ACTIVATE_IND, ch->bch.nr);
-	return 0;
-}
-
-static void
-isar_pump_cmd(struct isar_ch *ch, u32 cmd, u8 para)
-{
-	u8 dps = SET_DPS(ch->dpath);
-	u8 ctrl = 0, nom = 0, p1 = 0;
-
-	pr_debug("%s: isar_pump_cmd %x/%x state(%x)\n",
-		 ch->is->name, cmd, para, ch->bch.state);
-	switch (cmd) {
-	case HW_MOD_FTM:
-		if (ch->state == STFAX_READY) {
-			p1 = para;
-			ctrl = PCTRL_CMD_FTM;
-			nom = 1;
-			ch->state = STFAX_LINE;
-			ch->cmd = ctrl;
-			ch->mod = para;
-			ch->newmod = 0;
-			ch->newcmd = 0;
-			ch->try_mod = 3;
-		} else if ((ch->state == STFAX_ACTIV) &&
-			   (ch->cmd == PCTRL_CMD_FTM) && (ch->mod == para))
-			deliver_status(ch, HW_MOD_CONNECT);
-		else {
-			ch->newmod = para;
-			ch->newcmd = PCTRL_CMD_FTM;
-			nom = 0;
-			ctrl = PCTRL_CMD_ESC;
-			ch->state = STFAX_ESCAPE;
-		}
-		break;
-	case HW_MOD_FTH:
-		if (ch->state == STFAX_READY) {
-			p1 = para;
-			ctrl = PCTRL_CMD_FTH;
-			nom = 1;
-			ch->state = STFAX_LINE;
-			ch->cmd = ctrl;
-			ch->mod = para;
-			ch->newmod = 0;
-			ch->newcmd = 0;
-			ch->try_mod = 3;
-		} else if ((ch->state == STFAX_ACTIV) &&
-			   (ch->cmd == PCTRL_CMD_FTH) && (ch->mod == para))
-			deliver_status(ch, HW_MOD_CONNECT);
-		else {
-			ch->newmod = para;
-			ch->newcmd = PCTRL_CMD_FTH;
-			nom = 0;
-			ctrl = PCTRL_CMD_ESC;
-			ch->state = STFAX_ESCAPE;
-		}
-		break;
-	case HW_MOD_FRM:
-		if (ch->state == STFAX_READY) {
-			p1 = para;
-			ctrl = PCTRL_CMD_FRM;
-			nom = 1;
-			ch->state = STFAX_LINE;
-			ch->cmd = ctrl;
-			ch->mod = para;
-			ch->newmod = 0;
-			ch->newcmd = 0;
-			ch->try_mod = 3;
-		} else if ((ch->state == STFAX_ACTIV) &&
-			   (ch->cmd == PCTRL_CMD_FRM) && (ch->mod == para))
-			deliver_status(ch, HW_MOD_CONNECT);
-		else {
-			ch->newmod = para;
-			ch->newcmd = PCTRL_CMD_FRM;
-			nom = 0;
-			ctrl = PCTRL_CMD_ESC;
-			ch->state = STFAX_ESCAPE;
-		}
-		break;
-	case HW_MOD_FRH:
-		if (ch->state == STFAX_READY) {
-			p1 = para;
-			ctrl = PCTRL_CMD_FRH;
-			nom = 1;
-			ch->state = STFAX_LINE;
-			ch->cmd = ctrl;
-			ch->mod = para;
-			ch->newmod = 0;
-			ch->newcmd = 0;
-			ch->try_mod = 3;
-		} else if ((ch->state == STFAX_ACTIV) &&
-			   (ch->cmd == PCTRL_CMD_FRH) && (ch->mod == para))
-			deliver_status(ch, HW_MOD_CONNECT);
-		else {
-			ch->newmod = para;
-			ch->newcmd = PCTRL_CMD_FRH;
-			nom = 0;
-			ctrl = PCTRL_CMD_ESC;
-			ch->state = STFAX_ESCAPE;
-		}
-		break;
-	case PCTRL_CMD_TDTMF:
-		p1 = para;
-		nom = 1;
-		ctrl = PCTRL_CMD_TDTMF;
-		break;
-	}
-	if (ctrl)
-		send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL, ctrl, nom, &p1);
-}
-
-static void
-isar_setup(struct isar_hw *isar)
-{
-	u8 msg;
-	int i;
-
-	/* Dpath 1, 2 */
-	msg = 61;
-	for (i = 0; i < 2; i++) {
-		/* Buffer Config */
-		send_mbox(isar, (i ? ISAR_HIS_DPS2 : ISAR_HIS_DPS1) |
-			  ISAR_HIS_P12CFG, 4, 1, &msg);
-		isar->ch[i].mml = msg;
-		isar->ch[i].bch.state = 0;
-		isar->ch[i].dpath = i + 1;
-		modeisar(&isar->ch[i], ISDN_P_NONE);
-	}
-}
-
-static int
-isar_l2l1(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct bchannel *bch = container_of(ch, struct bchannel, ch);
-	struct isar_ch *ich = container_of(bch, struct isar_ch, bch);
-	int ret = -EINVAL;
-	struct mISDNhead *hh = mISDN_HEAD_P(skb);
-	u32 id, *val;
-	u_long flags;
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		spin_lock_irqsave(ich->is->hwlock, flags);
-		ret = bchannel_senddata(bch, skb);
-		if (ret > 0) { /* direct TX */
-			ret = 0;
-			isar_fill_fifo(ich);
-		}
-		spin_unlock_irqrestore(ich->is->hwlock, flags);
-		return ret;
-	case PH_ACTIVATE_REQ:
-		spin_lock_irqsave(ich->is->hwlock, flags);
-		if (!test_and_set_bit(FLG_ACTIVE, &bch->Flags))
-			ret = modeisar(ich, ch->protocol);
-		else
-			ret = 0;
-		spin_unlock_irqrestore(ich->is->hwlock, flags);
-		if (!ret)
-			_queue_data(ch, PH_ACTIVATE_IND, MISDN_ID_ANY, 0,
-				    NULL, GFP_KERNEL);
-		break;
-	case PH_DEACTIVATE_REQ:
-		spin_lock_irqsave(ich->is->hwlock, flags);
-		mISDN_clear_bchannel(bch);
-		modeisar(ich, ISDN_P_NONE);
-		spin_unlock_irqrestore(ich->is->hwlock, flags);
-		_queue_data(ch, PH_DEACTIVATE_IND, MISDN_ID_ANY, 0,
-			    NULL, GFP_KERNEL);
-		ret = 0;
-		break;
-	case PH_CONTROL_REQ:
-		val = (u32 *)skb->data;
-		pr_debug("%s: PH_CONTROL | REQUEST %x/%x\n", ich->is->name,
-			 hh->id, *val);
-		if ((hh->id == 0) && ((*val & ~DTMF_TONE_MASK) ==
-				      DTMF_TONE_VAL)) {
-			if (bch->state == ISDN_P_B_L2DTMF) {
-				char tt = *val & DTMF_TONE_MASK;
-
-				if (tt == '*')
-					tt = 0x1e;
-				else if (tt == '#')
-					tt = 0x1f;
-				else if (tt > '9')
-					tt -= 7;
-				tt &= 0x1f;
-				spin_lock_irqsave(ich->is->hwlock, flags);
-				isar_pump_cmd(ich, PCTRL_CMD_TDTMF, tt);
-				spin_unlock_irqrestore(ich->is->hwlock, flags);
-			} else {
-				pr_info("%s: DTMF send wrong protocol %x\n",
-					__func__, bch->state);
-				return -EINVAL;
-			}
-		} else if ((hh->id == HW_MOD_FRM) || (hh->id == HW_MOD_FRH) ||
-			   (hh->id == HW_MOD_FTM) || (hh->id == HW_MOD_FTH)) {
-			for (id = 0; id < FAXMODCNT; id++)
-				if (faxmodulation[id] == *val)
-					break;
-			if ((FAXMODCNT > id) &&
-			    test_bit(FLG_INITIALIZED, &bch->Flags)) {
-				pr_debug("%s: isar: new mod\n", ich->is->name);
-				isar_pump_cmd(ich, hh->id, *val);
-				ret = 0;
-			} else {
-				pr_info("%s: wrong modulation\n",
-					ich->is->name);
-				ret = -EINVAL;
-			}
-		} else if (hh->id == HW_MOD_LASTDATA)
-			test_and_set_bit(FLG_DLEETX, &bch->Flags);
-		else {
-			pr_info("%s: unknown PH_CONTROL_REQ %x\n",
-				ich->is->name, hh->id);
-			ret = -EINVAL;
-		}
-		fallthrough;
-	default:
-		pr_info("%s: %s unknown prim(%x,%x)\n",
-			ich->is->name, __func__, hh->prim, hh->id);
-		ret = -EINVAL;
-	}
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-static int
-channel_bctrl(struct bchannel *bch, struct mISDN_ctrl_req *cq)
-{
-	return mISDN_ctrl_bchannel(bch, cq);
-}
-
-static int
-isar_bctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
-{
-	struct bchannel *bch = container_of(ch, struct bchannel, ch);
-	struct isar_ch *ich = container_of(bch, struct isar_ch, bch);
-	int ret = -EINVAL;
-	u_long flags;
-
-	pr_debug("%s: %s cmd:%x %p\n", ich->is->name, __func__, cmd, arg);
-	switch (cmd) {
-	case CLOSE_CHANNEL:
-		test_and_clear_bit(FLG_OPEN, &bch->Flags);
-		cancel_work_sync(&bch->workq);
-		spin_lock_irqsave(ich->is->hwlock, flags);
-		mISDN_clear_bchannel(bch);
-		modeisar(ich, ISDN_P_NONE);
-		spin_unlock_irqrestore(ich->is->hwlock, flags);
-		ch->protocol = ISDN_P_NONE;
-		ch->peer = NULL;
-		module_put(ich->is->owner);
-		ret = 0;
-		break;
-	case CONTROL_CHANNEL:
-		ret = channel_bctrl(bch, arg);
-		break;
-	default:
-		pr_info("%s: %s unknown prim(%x)\n",
-			ich->is->name, __func__, cmd);
-	}
-	return ret;
-}
-
-static void
-free_isar(struct isar_hw *isar)
-{
-	modeisar(&isar->ch[0], ISDN_P_NONE);
-	modeisar(&isar->ch[1], ISDN_P_NONE);
-	timer_delete(&isar->ch[0].ftimer);
-	timer_delete(&isar->ch[1].ftimer);
-	test_and_clear_bit(FLG_INITIALIZED, &isar->ch[0].bch.Flags);
-	test_and_clear_bit(FLG_INITIALIZED, &isar->ch[1].bch.Flags);
-}
-
-static int
-init_isar(struct isar_hw *isar)
-{
-	int	cnt = 3;
-
-	while (cnt--) {
-		isar->version = ISARVersion(isar);
-		if (isar->ch[0].bch.debug & DEBUG_HW)
-			pr_notice("%s: Testing version %d (%d time)\n",
-				  isar->name, isar->version, 3 - cnt);
-		if (isar->version == 1)
-			break;
-		isar->ctrl(isar->hw, HW_RESET_REQ, 0);
-	}
-	if (isar->version != 1)
-		return -EINVAL;
-	timer_setup(&isar->ch[0].ftimer, ftimer_handler, 0);
-	test_and_set_bit(FLG_INITIALIZED, &isar->ch[0].bch.Flags);
-	timer_setup(&isar->ch[1].ftimer, ftimer_handler, 0);
-	test_and_set_bit(FLG_INITIALIZED, &isar->ch[1].bch.Flags);
-	return 0;
-}
-
-static int
-isar_open(struct isar_hw *isar, struct channel_req *rq)
-{
-	struct bchannel		*bch;
-
-	if (rq->adr.channel == 0 || rq->adr.channel > 2)
-		return -EINVAL;
-	if (rq->protocol == ISDN_P_NONE)
-		return -EINVAL;
-	bch = &isar->ch[rq->adr.channel - 1].bch;
-	if (test_and_set_bit(FLG_OPEN, &bch->Flags))
-		return -EBUSY; /* b-channel can be only open once */
-	bch->ch.protocol = rq->protocol;
-	rq->ch = &bch->ch;
-	return 0;
-}
-
-u32
-mISDNisar_init(struct isar_hw *isar, void *hw)
-{
-	u32 ret, i;
-
-	isar->hw = hw;
-	for (i = 0; i < 2; i++) {
-		isar->ch[i].bch.nr = i + 1;
-		mISDN_initbchannel(&isar->ch[i].bch, MAX_DATA_MEM, 32);
-		isar->ch[i].bch.ch.nr = i + 1;
-		isar->ch[i].bch.ch.send = &isar_l2l1;
-		isar->ch[i].bch.ch.ctrl = isar_bctrl;
-		isar->ch[i].bch.hw = hw;
-		isar->ch[i].is = isar;
-	}
-
-	isar->init = &init_isar;
-	isar->release = &free_isar;
-	isar->firmware = &load_firmware;
-	isar->open = &isar_open;
-
-	ret =	(1 << (ISDN_P_B_RAW & ISDN_P_B_MASK)) |
-		(1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK)) |
-		(1 << (ISDN_P_B_L2DTMF & ISDN_P_B_MASK)) |
-		(1 << (ISDN_P_B_MODEM_ASYNC & ISDN_P_B_MASK)) |
-		(1 << (ISDN_P_B_T30_FAX & ISDN_P_B_MASK));
-
-	return ret;
-}
-EXPORT_SYMBOL(mISDNisar_init);
-
-static int __init isar_mod_init(void)
-{
-	pr_notice("mISDN: ISAR driver Rev. %s\n", ISAR_REV);
-	return 0;
-}
-
-static void __exit isar_mod_cleanup(void)
-{
-	pr_notice("mISDN: ISAR module unloaded\n");
-}
-module_init(isar_mod_init);
-module_exit(isar_mod_cleanup);
diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c
deleted file mode 100644
index 8d740d8eacec..000000000000
--- a/drivers/isdn/hardware/mISDN/netjet.c
+++ /dev/null
@@ -1,1154 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * NETJet mISDN driver
- *
- * Author       Karsten Keil <keil@isdn4linux.de>
- *
- * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
- */
-
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/mISDNhw.h>
-#include <linux/slab.h>
-#include "ipac.h"
-#include "iohelper.h"
-#include "netjet.h"
-#include "isdnhdlc.h"
-
-#define NETJET_REV	"2.0"
-
-enum nj_types {
-	NETJET_S_TJ300,
-	NETJET_S_TJ320,
-	ENTERNOW__TJ320,
-};
-
-struct tiger_dma {
-	size_t		size;
-	u32		*start;
-	int		idx;
-	u32		dmastart;
-	u32		dmairq;
-	u32		dmaend;
-	u32		dmacur;
-};
-
-struct tiger_hw;
-
-struct tiger_ch {
-	struct bchannel		bch;
-	struct tiger_hw		*nj;
-	int			idx;
-	int			free;
-	int			lastrx;
-	u16			rxstate;
-	u16			txstate;
-	struct isdnhdlc_vars	hsend;
-	struct isdnhdlc_vars	hrecv;
-	u8			*hsbuf;
-	u8			*hrbuf;
-};
-
-#define TX_INIT		0x0001
-#define TX_IDLE		0x0002
-#define TX_RUN		0x0004
-#define TX_UNDERRUN	0x0100
-#define RX_OVERRUN	0x0100
-
-#define LOG_SIZE	64
-
-struct tiger_hw {
-	struct list_head	list;
-	struct pci_dev		*pdev;
-	char			name[MISDN_MAX_IDLEN];
-	enum nj_types		typ;
-	int			irq;
-	u32			irqcnt;
-	u32			base;
-	size_t			base_s;
-	dma_addr_t		dma;
-	void			*dma_p;
-	spinlock_t		lock;	/* lock HW */
-	struct isac_hw		isac;
-	struct tiger_dma	send;
-	struct tiger_dma	recv;
-	struct tiger_ch		bc[2];
-	u8			ctrlreg;
-	u8			dmactrl;
-	u8			auxd;
-	u8			last_is0;
-	u8			irqmask0;
-	char			log[LOG_SIZE];
-};
-
-static LIST_HEAD(Cards);
-static DEFINE_RWLOCK(card_lock); /* protect Cards */
-static u32 debug;
-static int nj_cnt;
-
-static void
-_set_debug(struct tiger_hw *card)
-{
-	card->isac.dch.debug = debug;
-	card->bc[0].bch.debug = debug;
-	card->bc[1].bch.debug = debug;
-}
-
-static int
-set_debug(const char *val, const struct kernel_param *kp)
-{
-	int ret;
-	struct tiger_hw *card;
-
-	ret = param_set_uint(val, kp);
-	if (!ret) {
-		read_lock(&card_lock);
-		list_for_each_entry(card, &Cards, list)
-			_set_debug(card);
-		read_unlock(&card_lock);
-	}
-	return ret;
-}
-
-MODULE_AUTHOR("Karsten Keil");
-MODULE_DESCRIPTION("mISDN driver for NETJet cards");
-MODULE_LICENSE("GPL v2");
-MODULE_VERSION(NETJET_REV);
-module_param_call(debug, set_debug, param_get_uint, &debug, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(debug, "Netjet debug mask");
-
-static void
-nj_disable_hwirq(struct tiger_hw *card)
-{
-	outb(0, card->base + NJ_IRQMASK0);
-	outb(0, card->base + NJ_IRQMASK1);
-}
-
-
-static u8
-ReadISAC_nj(void *p, u8 offset)
-{
-	struct tiger_hw *card = p;
-	u8 ret;
-
-	card->auxd &= 0xfc;
-	card->auxd |= (offset >> 4) & 3;
-	outb(card->auxd, card->base + NJ_AUXDATA);
-	ret = inb(card->base + NJ_ISAC_OFF + ((offset & 0x0f) << 2));
-	return ret;
-}
-
-static void
-WriteISAC_nj(void *p, u8 offset, u8 value)
-{
-	struct tiger_hw *card = p;
-
-	card->auxd &= 0xfc;
-	card->auxd |= (offset >> 4) & 3;
-	outb(card->auxd, card->base + NJ_AUXDATA);
-	outb(value, card->base + NJ_ISAC_OFF + ((offset & 0x0f) << 2));
-}
-
-static void
-ReadFiFoISAC_nj(void *p, u8 offset, u8 *data, int size)
-{
-	struct tiger_hw *card = p;
-
-	card->auxd &= 0xfc;
-	outb(card->auxd, card->base + NJ_AUXDATA);
-	insb(card->base + NJ_ISAC_OFF, data, size);
-}
-
-static void
-WriteFiFoISAC_nj(void *p, u8 offset, u8 *data, int size)
-{
-	struct tiger_hw *card = p;
-
-	card->auxd &= 0xfc;
-	outb(card->auxd, card->base + NJ_AUXDATA);
-	outsb(card->base + NJ_ISAC_OFF, data, size);
-}
-
-static void
-fill_mem(struct tiger_ch *bc, u32 idx, u32 cnt, u32 fill)
-{
-	struct tiger_hw *card = bc->bch.hw;
-	u32 mask = 0xff, val;
-
-	pr_debug("%s: B%1d fill %02x len %d idx %d/%d\n", card->name,
-		 bc->bch.nr, fill, cnt, idx, card->send.idx);
-	if (bc->bch.nr & 2) {
-		fill  <<= 8;
-		mask <<= 8;
-	}
-	mask ^= 0xffffffff;
-	while (cnt--) {
-		val = card->send.start[idx];
-		val &= mask;
-		val |= fill;
-		card->send.start[idx++] = val;
-		if (idx >= card->send.size)
-			idx = 0;
-	}
-}
-
-static int
-mode_tiger(struct tiger_ch *bc, u32 protocol)
-{
-	struct tiger_hw *card = bc->bch.hw;
-
-	pr_debug("%s: B%1d protocol %x-->%x\n", card->name,
-		 bc->bch.nr, bc->bch.state, protocol);
-	switch (protocol) {
-	case ISDN_P_NONE:
-		if (bc->bch.state == ISDN_P_NONE)
-			break;
-		fill_mem(bc, 0, card->send.size, 0xff);
-		bc->bch.state = protocol;
-		/* only stop dma and interrupts if both channels NULL */
-		if ((card->bc[0].bch.state == ISDN_P_NONE) &&
-		    (card->bc[1].bch.state == ISDN_P_NONE)) {
-			card->dmactrl = 0;
-			outb(card->dmactrl, card->base + NJ_DMACTRL);
-			outb(0, card->base + NJ_IRQMASK0);
-		}
-		test_and_clear_bit(FLG_HDLC, &bc->bch.Flags);
-		test_and_clear_bit(FLG_TRANSPARENT, &bc->bch.Flags);
-		bc->txstate = 0;
-		bc->rxstate = 0;
-		bc->lastrx = -1;
-		break;
-	case ISDN_P_B_RAW:
-		test_and_set_bit(FLG_TRANSPARENT, &bc->bch.Flags);
-		bc->bch.state = protocol;
-		bc->idx = 0;
-		bc->free = card->send.size / 2;
-		bc->rxstate = 0;
-		bc->txstate = TX_INIT | TX_IDLE;
-		bc->lastrx = -1;
-		if (!card->dmactrl) {
-			card->dmactrl = 1;
-			outb(card->dmactrl, card->base + NJ_DMACTRL);
-			outb(0x0f, card->base + NJ_IRQMASK0);
-		}
-		break;
-	case ISDN_P_B_HDLC:
-		test_and_set_bit(FLG_HDLC, &bc->bch.Flags);
-		bc->bch.state = protocol;
-		bc->idx = 0;
-		bc->free = card->send.size / 2;
-		bc->rxstate = 0;
-		bc->txstate = TX_INIT | TX_IDLE;
-		isdnhdlc_rcv_init(&bc->hrecv, 0);
-		isdnhdlc_out_init(&bc->hsend, 0);
-		bc->lastrx = -1;
-		if (!card->dmactrl) {
-			card->dmactrl = 1;
-			outb(card->dmactrl, card->base + NJ_DMACTRL);
-			outb(0x0f, card->base + NJ_IRQMASK0);
-		}
-		break;
-	default:
-		pr_info("%s: %s protocol %x not handled\n", card->name,
-			__func__, protocol);
-		return -ENOPROTOOPT;
-	}
-	card->send.dmacur = inl(card->base + NJ_DMA_READ_ADR);
-	card->recv.dmacur = inl(card->base + NJ_DMA_WRITE_ADR);
-	card->send.idx = (card->send.dmacur - card->send.dmastart) >> 2;
-	card->recv.idx = (card->recv.dmacur - card->recv.dmastart) >> 2;
-	pr_debug("%s: %s ctrl %x irq  %02x/%02x idx %d/%d\n",
-		 card->name, __func__,
-		 inb(card->base + NJ_DMACTRL),
-		 inb(card->base + NJ_IRQMASK0),
-		 inb(card->base + NJ_IRQSTAT0),
-		 card->send.idx,
-		 card->recv.idx);
-	return 0;
-}
-
-static void
-nj_reset(struct tiger_hw *card)
-{
-	outb(0xff, card->base + NJ_CTRL); /* Reset On */
-	mdelay(1);
-
-	/* now edge triggered for TJ320 GE 13/07/00 */
-	/* see comment in IRQ function */
-	if (card->typ == NETJET_S_TJ320) /* TJ320 */
-		card->ctrlreg = 0x40;  /* Reset Off and status read clear */
-	else
-		card->ctrlreg = 0x00;  /* Reset Off and status read clear */
-	outb(card->ctrlreg, card->base + NJ_CTRL);
-	mdelay(10);
-
-	/* configure AUX pins (all output except ISAC IRQ pin) */
-	card->auxd = 0;
-	card->dmactrl = 0;
-	outb(~NJ_ISACIRQ, card->base + NJ_AUXCTRL);
-	outb(NJ_ISACIRQ,  card->base + NJ_IRQMASK1);
-	outb(card->auxd, card->base + NJ_AUXDATA);
-}
-
-static int
-inittiger(struct tiger_hw *card)
-{
-	int i;
-
-	card->dma_p = dma_alloc_coherent(&card->pdev->dev, NJ_DMA_SIZE,
-					 &card->dma, GFP_ATOMIC);
-	if (!card->dma_p) {
-		pr_info("%s: No DMA memory\n", card->name);
-		return -ENOMEM;
-	}
-	if ((u64)card->dma > 0xffffffff) {
-		pr_info("%s: DMA outside 32 bit\n", card->name);
-		return -ENOMEM;
-	}
-	for (i = 0; i < 2; i++) {
-		card->bc[i].hsbuf = kmalloc(NJ_DMA_TXSIZE, GFP_ATOMIC);
-		if (!card->bc[i].hsbuf) {
-			pr_info("%s: no B%d send buffer\n", card->name, i + 1);
-			return -ENOMEM;
-		}
-		card->bc[i].hrbuf = kmalloc(NJ_DMA_RXSIZE, GFP_ATOMIC);
-		if (!card->bc[i].hrbuf) {
-			pr_info("%s: no B%d recv buffer\n", card->name, i + 1);
-			return -ENOMEM;
-		}
-	}
-	memset(card->dma_p, 0xff, NJ_DMA_SIZE);
-
-	card->send.start = card->dma_p;
-	card->send.dmastart = (u32)card->dma;
-	card->send.dmaend = card->send.dmastart +
-		(4 * (NJ_DMA_TXSIZE - 1));
-	card->send.dmairq = card->send.dmastart +
-		(4 * ((NJ_DMA_TXSIZE / 2) - 1));
-	card->send.size = NJ_DMA_TXSIZE;
-
-	if (debug & DEBUG_HW)
-		pr_notice("%s: send buffer phy %#x - %#x - %#x  virt %p"
-			  " size %zu u32\n", card->name,
-			  card->send.dmastart, card->send.dmairq,
-			  card->send.dmaend, card->send.start, card->send.size);
-
-	outl(card->send.dmastart, card->base + NJ_DMA_READ_START);
-	outl(card->send.dmairq, card->base + NJ_DMA_READ_IRQ);
-	outl(card->send.dmaend, card->base + NJ_DMA_READ_END);
-
-	card->recv.start = card->dma_p + (NJ_DMA_SIZE / 2);
-	card->recv.dmastart = (u32)card->dma  + (NJ_DMA_SIZE / 2);
-	card->recv.dmaend = card->recv.dmastart +
-		(4 * (NJ_DMA_RXSIZE - 1));
-	card->recv.dmairq = card->recv.dmastart +
-		(4 * ((NJ_DMA_RXSIZE / 2) - 1));
-	card->recv.size = NJ_DMA_RXSIZE;
-
-	if (debug & DEBUG_HW)
-		pr_notice("%s: recv buffer phy %#x - %#x - %#x  virt %p"
-			  " size %zu u32\n", card->name,
-			  card->recv.dmastart, card->recv.dmairq,
-			  card->recv.dmaend, card->recv.start, card->recv.size);
-
-	outl(card->recv.dmastart, card->base + NJ_DMA_WRITE_START);
-	outl(card->recv.dmairq, card->base + NJ_DMA_WRITE_IRQ);
-	outl(card->recv.dmaend, card->base + NJ_DMA_WRITE_END);
-	return 0;
-}
-
-static void
-read_dma(struct tiger_ch *bc, u32 idx, int cnt)
-{
-	struct tiger_hw *card = bc->bch.hw;
-	int i, stat;
-	u32 val;
-	u8 *p, *pn;
-
-	if (bc->lastrx == idx) {
-		bc->rxstate |= RX_OVERRUN;
-		pr_info("%s: B%1d overrun at idx %d\n", card->name,
-			bc->bch.nr, idx);
-	}
-	bc->lastrx = idx;
-	if (test_bit(FLG_RX_OFF, &bc->bch.Flags)) {
-		bc->bch.dropcnt += cnt;
-		return;
-	}
-	stat = bchannel_get_rxbuf(&bc->bch, cnt);
-	/* only transparent use the count here, HDLC overun is detected later */
-	if (stat == -ENOMEM) {
-		pr_warn("%s.B%d: No memory for %d bytes\n",
-			card->name, bc->bch.nr, cnt);
-		return;
-	}
-	if (test_bit(FLG_TRANSPARENT, &bc->bch.Flags))
-		p = skb_put(bc->bch.rx_skb, cnt);
-	else
-		p = bc->hrbuf;
-
-	for (i = 0; i < cnt; i++) {
-		val = card->recv.start[idx++];
-		if (bc->bch.nr & 2)
-			val >>= 8;
-		if (idx >= card->recv.size)
-			idx = 0;
-		p[i] = val & 0xff;
-	}
-
-	if (test_bit(FLG_TRANSPARENT, &bc->bch.Flags)) {
-		recv_Bchannel(&bc->bch, 0, false);
-		return;
-	}
-
-	pn = bc->hrbuf;
-	while (cnt > 0) {
-		stat = isdnhdlc_decode(&bc->hrecv, pn, cnt, &i,
-				       bc->bch.rx_skb->data, bc->bch.maxlen);
-		if (stat > 0) { /* valid frame received */
-			p = skb_put(bc->bch.rx_skb, stat);
-			if (debug & DEBUG_HW_BFIFO) {
-				snprintf(card->log, LOG_SIZE,
-					 "B%1d-recv %s %d ", bc->bch.nr,
-					 card->name, stat);
-				print_hex_dump_bytes(card->log,
-						     DUMP_PREFIX_OFFSET, p,
-						     stat);
-			}
-			recv_Bchannel(&bc->bch, 0, false);
-			stat = bchannel_get_rxbuf(&bc->bch, bc->bch.maxlen);
-			if (stat < 0) {
-				pr_warn("%s.B%d: No memory for %d bytes\n",
-					card->name, bc->bch.nr, cnt);
-				return;
-			}
-		} else if (stat == -HDLC_CRC_ERROR) {
-			pr_info("%s: B%1d receive frame CRC error\n",
-				card->name, bc->bch.nr);
-		} else if (stat == -HDLC_FRAMING_ERROR) {
-			pr_info("%s: B%1d receive framing error\n",
-				card->name, bc->bch.nr);
-		} else if (stat == -HDLC_LENGTH_ERROR) {
-			pr_info("%s: B%1d receive frame too long (> %d)\n",
-				card->name, bc->bch.nr, bc->bch.maxlen);
-		}
-		pn += i;
-		cnt -= i;
-	}
-}
-
-static void
-recv_tiger(struct tiger_hw *card, u8 irq_stat)
-{
-	u32 idx;
-	int cnt = card->recv.size / 2;
-
-	/* Note receive is via the WRITE DMA channel */
-	card->last_is0 &= ~NJ_IRQM0_WR_MASK;
-	card->last_is0 |= (irq_stat & NJ_IRQM0_WR_MASK);
-
-	if (irq_stat & NJ_IRQM0_WR_END)
-		idx = cnt - 1;
-	else
-		idx = card->recv.size - 1;
-
-	if (test_bit(FLG_ACTIVE, &card->bc[0].bch.Flags))
-		read_dma(&card->bc[0], idx, cnt);
-	if (test_bit(FLG_ACTIVE, &card->bc[1].bch.Flags))
-		read_dma(&card->bc[1], idx, cnt);
-}
-
-/* sync with current DMA address at start or after exception */
-static void
-resync(struct tiger_ch *bc, struct tiger_hw *card)
-{
-	card->send.dmacur = inl(card->base | NJ_DMA_READ_ADR);
-	card->send.idx = (card->send.dmacur - card->send.dmastart) >> 2;
-	if (bc->free > card->send.size / 2)
-		bc->free = card->send.size / 2;
-	/* currently we simple sync to the next complete free area
-	 * this hast the advantage that we have always maximum time to
-	 * handle TX irq
-	 */
-	if (card->send.idx < ((card->send.size / 2) - 1))
-		bc->idx = (card->recv.size / 2) - 1;
-	else
-		bc->idx = card->recv.size - 1;
-	bc->txstate = TX_RUN;
-	pr_debug("%s: %s B%1d free %d idx %d/%d\n", card->name,
-		 __func__, bc->bch.nr, bc->free, bc->idx, card->send.idx);
-}
-
-static int bc_next_frame(struct tiger_ch *);
-
-static void
-fill_hdlc_flag(struct tiger_ch *bc)
-{
-	struct tiger_hw *card = bc->bch.hw;
-	int count, i;
-	u32 m, v;
-	u8  *p;
-
-	if (bc->free == 0)
-		return;
-	pr_debug("%s: %s B%1d %d state %x idx %d/%d\n", card->name,
-		 __func__, bc->bch.nr, bc->free, bc->txstate,
-		 bc->idx, card->send.idx);
-	if (bc->txstate & (TX_IDLE | TX_INIT | TX_UNDERRUN))
-		resync(bc, card);
-	count = isdnhdlc_encode(&bc->hsend, NULL, 0, &i,
-				bc->hsbuf, bc->free);
-	pr_debug("%s: B%1d hdlc encoded %d flags\n", card->name,
-		 bc->bch.nr, count);
-	bc->free -= count;
-	p = bc->hsbuf;
-	m = (bc->bch.nr & 1) ? 0xffffff00 : 0xffff00ff;
-	for (i = 0; i < count; i++) {
-		if (bc->idx >= card->send.size)
-			bc->idx = 0;
-		v = card->send.start[bc->idx];
-		v &= m;
-		v |= (bc->bch.nr & 1) ? (u32)(p[i]) : ((u32)(p[i])) << 8;
-		card->send.start[bc->idx++] = v;
-	}
-	if (debug & DEBUG_HW_BFIFO) {
-		snprintf(card->log, LOG_SIZE, "B%1d-send %s %d ",
-			 bc->bch.nr, card->name, count);
-		print_hex_dump_bytes(card->log, DUMP_PREFIX_OFFSET, p, count);
-	}
-}
-
-static void
-fill_dma(struct tiger_ch *bc)
-{
-	struct tiger_hw *card = bc->bch.hw;
-	int count, i, fillempty = 0;
-	u32 m, v, n = 0;
-	u8  *p;
-
-	if (bc->free == 0)
-		return;
-	if (!bc->bch.tx_skb) {
-		if (!test_bit(FLG_TX_EMPTY, &bc->bch.Flags))
-			return;
-		fillempty = 1;
-		count = card->send.size >> 1;
-		p = bc->bch.fill;
-	} else {
-		count = bc->bch.tx_skb->len - bc->bch.tx_idx;
-		if (count <= 0)
-			return;
-		pr_debug("%s: %s B%1d %d/%d/%d/%d state %x idx %d/%d\n",
-			 card->name, __func__, bc->bch.nr, count, bc->free,
-			 bc->bch.tx_idx, bc->bch.tx_skb->len, bc->txstate,
-			 bc->idx, card->send.idx);
-		p = bc->bch.tx_skb->data + bc->bch.tx_idx;
-	}
-	if (bc->txstate & (TX_IDLE | TX_INIT | TX_UNDERRUN))
-		resync(bc, card);
-	if (test_bit(FLG_HDLC, &bc->bch.Flags) && !fillempty) {
-		count = isdnhdlc_encode(&bc->hsend, p, count, &i,
-					bc->hsbuf, bc->free);
-		pr_debug("%s: B%1d hdlc encoded %d in %d\n", card->name,
-			 bc->bch.nr, i, count);
-		bc->bch.tx_idx += i;
-		bc->free -= count;
-		p = bc->hsbuf;
-	} else {
-		if (count > bc->free)
-			count = bc->free;
-		if (!fillempty)
-			bc->bch.tx_idx += count;
-		bc->free -= count;
-	}
-	m = (bc->bch.nr & 1) ? 0xffffff00 : 0xffff00ff;
-	if (fillempty) {
-		n = p[0];
-		if (!(bc->bch.nr & 1))
-			n <<= 8;
-		for (i = 0; i < count; i++) {
-			if (bc->idx >= card->send.size)
-				bc->idx = 0;
-			v = card->send.start[bc->idx];
-			v &= m;
-			v |= n;
-			card->send.start[bc->idx++] = v;
-		}
-	} else {
-		for (i = 0; i < count; i++) {
-			if (bc->idx >= card->send.size)
-				bc->idx = 0;
-			v = card->send.start[bc->idx];
-			v &= m;
-			n = p[i];
-			v |= (bc->bch.nr & 1) ? n : n << 8;
-			card->send.start[bc->idx++] = v;
-		}
-	}
-	if (debug & DEBUG_HW_BFIFO) {
-		snprintf(card->log, LOG_SIZE, "B%1d-send %s %d ",
-			 bc->bch.nr, card->name, count);
-		print_hex_dump_bytes(card->log, DUMP_PREFIX_OFFSET, p, count);
-	}
-	if (bc->free)
-		bc_next_frame(bc);
-}
-
-
-static int
-bc_next_frame(struct tiger_ch *bc)
-{
-	int ret = 1;
-
-	if (bc->bch.tx_skb && bc->bch.tx_idx < bc->bch.tx_skb->len) {
-		fill_dma(bc);
-	} else {
-		dev_kfree_skb(bc->bch.tx_skb);
-		if (get_next_bframe(&bc->bch)) {
-			fill_dma(bc);
-			test_and_clear_bit(FLG_TX_EMPTY, &bc->bch.Flags);
-		} else if (test_bit(FLG_TX_EMPTY, &bc->bch.Flags)) {
-			fill_dma(bc);
-		} else if (test_bit(FLG_FILLEMPTY, &bc->bch.Flags)) {
-			test_and_set_bit(FLG_TX_EMPTY, &bc->bch.Flags);
-			ret = 0;
-		} else {
-			ret = 0;
-		}
-	}
-	return ret;
-}
-
-static void
-send_tiger_bc(struct tiger_hw *card, struct tiger_ch *bc)
-{
-	int ret;
-
-	bc->free += card->send.size / 2;
-	if (bc->free >= card->send.size) {
-		if (!(bc->txstate & (TX_UNDERRUN | TX_INIT))) {
-			pr_info("%s: B%1d TX underrun state %x\n", card->name,
-				bc->bch.nr, bc->txstate);
-			bc->txstate |= TX_UNDERRUN;
-		}
-		bc->free = card->send.size;
-	}
-	ret = bc_next_frame(bc);
-	if (!ret) {
-		if (test_bit(FLG_HDLC, &bc->bch.Flags)) {
-			fill_hdlc_flag(bc);
-			return;
-		}
-		pr_debug("%s: B%1d TX no data free %d idx %d/%d\n", card->name,
-			 bc->bch.nr, bc->free, bc->idx, card->send.idx);
-		if (!(bc->txstate & (TX_IDLE | TX_INIT))) {
-			fill_mem(bc, bc->idx, bc->free, 0xff);
-			if (bc->free == card->send.size)
-				bc->txstate |= TX_IDLE;
-		}
-	}
-}
-
-static void
-send_tiger(struct tiger_hw *card, u8 irq_stat)
-{
-	int i;
-
-	/* Note send is via the READ DMA channel */
-	if ((irq_stat & card->last_is0) & NJ_IRQM0_RD_MASK) {
-		pr_info("%s: tiger warn write double dma %x/%x\n",
-			card->name, irq_stat, card->last_is0);
-		return;
-	} else {
-		card->last_is0 &= ~NJ_IRQM0_RD_MASK;
-		card->last_is0 |= (irq_stat & NJ_IRQM0_RD_MASK);
-	}
-	for (i = 0; i < 2; i++) {
-		if (test_bit(FLG_ACTIVE, &card->bc[i].bch.Flags))
-			send_tiger_bc(card, &card->bc[i]);
-	}
-}
-
-static irqreturn_t
-nj_irq(int intno, void *dev_id)
-{
-	struct tiger_hw *card = dev_id;
-	u8 val, s1val, s0val;
-
-	spin_lock(&card->lock);
-	s0val = inb(card->base | NJ_IRQSTAT0);
-	s1val = inb(card->base | NJ_IRQSTAT1);
-	if ((s1val & NJ_ISACIRQ) && (s0val == 0)) {
-		/* shared IRQ */
-		spin_unlock(&card->lock);
-		return IRQ_NONE;
-	}
-	pr_debug("%s: IRQSTAT0 %02x IRQSTAT1 %02x\n", card->name, s0val, s1val);
-	card->irqcnt++;
-	if (!(s1val & NJ_ISACIRQ)) {
-		val = ReadISAC_nj(card, ISAC_ISTA);
-		if (val)
-			mISDNisac_irq(&card->isac, val);
-	}
-
-	if (s0val)
-		/* write to clear */
-		outb(s0val, card->base | NJ_IRQSTAT0);
-	else
-		goto end;
-	s1val = s0val;
-	/* set bits in sval to indicate which page is free */
-	card->recv.dmacur = inl(card->base | NJ_DMA_WRITE_ADR);
-	card->recv.idx = (card->recv.dmacur - card->recv.dmastart) >> 2;
-	if (card->recv.dmacur < card->recv.dmairq)
-		s0val = 0x08;	/* the 2nd write area is free */
-	else
-		s0val = 0x04;	/* the 1st write area is free */
-
-	card->send.dmacur = inl(card->base | NJ_DMA_READ_ADR);
-	card->send.idx = (card->send.dmacur - card->send.dmastart) >> 2;
-	if (card->send.dmacur < card->send.dmairq)
-		s0val |= 0x02;	/* the 2nd read area is free */
-	else
-		s0val |= 0x01;	/* the 1st read area is free */
-
-	pr_debug("%s: DMA Status %02x/%02x/%02x %d/%d\n", card->name,
-		 s1val, s0val, card->last_is0,
-		 card->recv.idx, card->send.idx);
-	/* test if we have a DMA interrupt */
-	if (s0val != card->last_is0) {
-		if ((s0val & NJ_IRQM0_RD_MASK) !=
-		    (card->last_is0 & NJ_IRQM0_RD_MASK))
-			/* got a write dma int */
-			send_tiger(card, s0val);
-		if ((s0val & NJ_IRQM0_WR_MASK) !=
-		    (card->last_is0 & NJ_IRQM0_WR_MASK))
-			/* got a read dma int */
-			recv_tiger(card, s0val);
-	}
-end:
-	spin_unlock(&card->lock);
-	return IRQ_HANDLED;
-}
-
-static int
-nj_l2l1B(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	int ret = -EINVAL;
-	struct bchannel *bch = container_of(ch, struct bchannel, ch);
-	struct tiger_ch *bc = container_of(bch, struct tiger_ch, bch);
-	struct tiger_hw *card = bch->hw;
-	struct mISDNhead *hh = mISDN_HEAD_P(skb);
-	unsigned long flags;
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		spin_lock_irqsave(&card->lock, flags);
-		ret = bchannel_senddata(bch, skb);
-		if (ret > 0) { /* direct TX */
-			fill_dma(bc);
-			ret = 0;
-		}
-		spin_unlock_irqrestore(&card->lock, flags);
-		return ret;
-	case PH_ACTIVATE_REQ:
-		spin_lock_irqsave(&card->lock, flags);
-		if (!test_and_set_bit(FLG_ACTIVE, &bch->Flags))
-			ret = mode_tiger(bc, ch->protocol);
-		else
-			ret = 0;
-		spin_unlock_irqrestore(&card->lock, flags);
-		if (!ret)
-			_queue_data(ch, PH_ACTIVATE_IND, MISDN_ID_ANY, 0,
-				    NULL, GFP_KERNEL);
-		break;
-	case PH_DEACTIVATE_REQ:
-		spin_lock_irqsave(&card->lock, flags);
-		mISDN_clear_bchannel(bch);
-		mode_tiger(bc, ISDN_P_NONE);
-		spin_unlock_irqrestore(&card->lock, flags);
-		_queue_data(ch, PH_DEACTIVATE_IND, MISDN_ID_ANY, 0,
-			    NULL, GFP_KERNEL);
-		ret = 0;
-		break;
-	}
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-static int
-channel_bctrl(struct tiger_ch *bc, struct mISDN_ctrl_req *cq)
-{
-	return mISDN_ctrl_bchannel(&bc->bch, cq);
-}
-
-static int
-nj_bctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
-{
-	struct bchannel *bch = container_of(ch, struct bchannel, ch);
-	struct tiger_ch *bc = container_of(bch, struct tiger_ch, bch);
-	struct tiger_hw *card  = bch->hw;
-	int ret = -EINVAL;
-	u_long flags;
-
-	pr_debug("%s: %s cmd:%x %p\n", card->name, __func__, cmd, arg);
-	switch (cmd) {
-	case CLOSE_CHANNEL:
-		test_and_clear_bit(FLG_OPEN, &bch->Flags);
-		cancel_work_sync(&bch->workq);
-		spin_lock_irqsave(&card->lock, flags);
-		mISDN_clear_bchannel(bch);
-		mode_tiger(bc, ISDN_P_NONE);
-		spin_unlock_irqrestore(&card->lock, flags);
-		ch->protocol = ISDN_P_NONE;
-		ch->peer = NULL;
-		module_put(THIS_MODULE);
-		ret = 0;
-		break;
-	case CONTROL_CHANNEL:
-		ret = channel_bctrl(bc, arg);
-		break;
-	default:
-		pr_info("%s: %s unknown prim(%x)\n", card->name, __func__, cmd);
-	}
-	return ret;
-}
-
-static int
-channel_ctrl(struct tiger_hw *card, struct mISDN_ctrl_req *cq)
-{
-	int	ret = 0;
-
-	switch (cq->op) {
-	case MISDN_CTRL_GETOP:
-		cq->op = MISDN_CTRL_LOOP | MISDN_CTRL_L1_TIMER3;
-		break;
-	case MISDN_CTRL_LOOP:
-		/* cq->channel: 0 disable, 1 B1 loop 2 B2 loop, 3 both */
-		if (cq->channel < 0 || cq->channel > 3) {
-			ret = -EINVAL;
-			break;
-		}
-		ret = card->isac.ctrl(&card->isac, HW_TESTLOOP, cq->channel);
-		break;
-	case MISDN_CTRL_L1_TIMER3:
-		ret = card->isac.ctrl(&card->isac, HW_TIMER3_VALUE, cq->p1);
-		break;
-	default:
-		pr_info("%s: %s unknown Op %x\n", card->name, __func__, cq->op);
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static int
-open_bchannel(struct tiger_hw *card, struct channel_req *rq)
-{
-	struct bchannel *bch;
-
-	if (rq->adr.channel == 0 || rq->adr.channel > 2)
-		return -EINVAL;
-	if (rq->protocol == ISDN_P_NONE)
-		return -EINVAL;
-	bch = &card->bc[rq->adr.channel - 1].bch;
-	if (test_and_set_bit(FLG_OPEN, &bch->Flags))
-		return -EBUSY; /* b-channel can be only open once */
-	test_and_clear_bit(FLG_FILLEMPTY, &bch->Flags);
-	bch->ch.protocol = rq->protocol;
-	rq->ch = &bch->ch;
-	return 0;
-}
-
-/*
- * device control function
- */
-static int
-nj_dctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
-{
-	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
-	struct tiger_hw	*card = dch->hw;
-	struct channel_req	*rq;
-	int			err = 0;
-
-	pr_debug("%s: %s cmd:%x %p\n", card->name, __func__, cmd, arg);
-	switch (cmd) {
-	case OPEN_CHANNEL:
-		rq = arg;
-		if (rq->protocol == ISDN_P_TE_S0)
-			err = card->isac.open(&card->isac, rq);
-		else
-			err = open_bchannel(card, rq);
-		if (err)
-			break;
-		if (!try_module_get(THIS_MODULE))
-			pr_info("%s: cannot get module\n", card->name);
-		break;
-	case CLOSE_CHANNEL:
-		pr_debug("%s: dev(%d) close from %p\n", card->name, dch->dev.id,
-			 __builtin_return_address(0));
-		module_put(THIS_MODULE);
-		break;
-	case CONTROL_CHANNEL:
-		err = channel_ctrl(card, arg);
-		break;
-	default:
-		pr_debug("%s: %s unknown command %x\n",
-			 card->name, __func__, cmd);
-		return -EINVAL;
-	}
-	return err;
-}
-
-static int
-nj_init_card(struct tiger_hw *card)
-{
-	u_long flags;
-	int ret;
-
-	spin_lock_irqsave(&card->lock, flags);
-	nj_disable_hwirq(card);
-	spin_unlock_irqrestore(&card->lock, flags);
-
-	card->irq = card->pdev->irq;
-	if (request_irq(card->irq, nj_irq, IRQF_SHARED, card->name, card)) {
-		pr_info("%s: couldn't get interrupt %d\n",
-			card->name, card->irq);
-		card->irq = -1;
-		return -EIO;
-	}
-
-	spin_lock_irqsave(&card->lock, flags);
-	nj_reset(card);
-	ret = card->isac.init(&card->isac);
-	if (ret)
-		goto error;
-	ret = inittiger(card);
-	if (ret)
-		goto error;
-	mode_tiger(&card->bc[0], ISDN_P_NONE);
-	mode_tiger(&card->bc[1], ISDN_P_NONE);
-error:
-	spin_unlock_irqrestore(&card->lock, flags);
-	return ret;
-}
-
-
-static void
-nj_release(struct tiger_hw *card)
-{
-	u_long flags;
-	int i;
-
-	if (card->base_s) {
-		spin_lock_irqsave(&card->lock, flags);
-		nj_disable_hwirq(card);
-		mode_tiger(&card->bc[0], ISDN_P_NONE);
-		mode_tiger(&card->bc[1], ISDN_P_NONE);
-		spin_unlock_irqrestore(&card->lock, flags);
-		card->isac.release(&card->isac);
-		release_region(card->base, card->base_s);
-		card->base_s = 0;
-	}
-	if (card->irq > 0)
-		free_irq(card->irq, card);
-	if (device_is_registered(&card->isac.dch.dev.dev))
-		mISDN_unregister_device(&card->isac.dch.dev);
-
-	for (i = 0; i < 2; i++) {
-		mISDN_freebchannel(&card->bc[i].bch);
-		kfree(card->bc[i].hsbuf);
-		kfree(card->bc[i].hrbuf);
-	}
-	if (card->dma_p)
-		dma_free_coherent(&card->pdev->dev, NJ_DMA_SIZE, card->dma_p,
-				  card->dma);
-	write_lock_irqsave(&card_lock, flags);
-	list_del(&card->list);
-	write_unlock_irqrestore(&card_lock, flags);
-	pci_disable_device(card->pdev);
-	pci_set_drvdata(card->pdev, NULL);
-	kfree(card);
-}
-
-
-static int
-nj_setup(struct tiger_hw *card)
-{
-	card->base = pci_resource_start(card->pdev, 0);
-	card->base_s = pci_resource_len(card->pdev, 0);
-	if (!request_region(card->base, card->base_s, card->name)) {
-		pr_info("%s: NETjet config port %#x-%#x already in use\n",
-			card->name, card->base,
-			(u32)(card->base + card->base_s - 1));
-		card->base_s = 0;
-		return -EIO;
-	}
-	ASSIGN_FUNC(nj, ISAC, card->isac);
-	return 0;
-}
-
-
-static int
-setup_instance(struct tiger_hw *card)
-{
-	int i, err;
-	u_long flags;
-
-	snprintf(card->name, MISDN_MAX_IDLEN - 1, "netjet.%d", nj_cnt + 1);
-	write_lock_irqsave(&card_lock, flags);
-	list_add_tail(&card->list, &Cards);
-	write_unlock_irqrestore(&card_lock, flags);
-
-	_set_debug(card);
-	card->isac.name = card->name;
-	spin_lock_init(&card->lock);
-	card->isac.hwlock = &card->lock;
-	mISDNisac_init(&card->isac, card);
-
-	card->isac.dch.dev.Bprotocols = (1 << (ISDN_P_B_RAW & ISDN_P_B_MASK)) |
-		(1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK));
-	card->isac.dch.dev.D.ctrl = nj_dctrl;
-	for (i = 0; i < 2; i++) {
-		card->bc[i].bch.nr = i + 1;
-		set_channelmap(i + 1, card->isac.dch.dev.channelmap);
-		mISDN_initbchannel(&card->bc[i].bch, MAX_DATA_MEM,
-				   NJ_DMA_RXSIZE >> 1);
-		card->bc[i].bch.hw = card;
-		card->bc[i].bch.ch.send = nj_l2l1B;
-		card->bc[i].bch.ch.ctrl = nj_bctrl;
-		card->bc[i].bch.ch.nr = i + 1;
-		list_add(&card->bc[i].bch.ch.list,
-			 &card->isac.dch.dev.bchannels);
-		card->bc[i].bch.hw = card;
-	}
-	err = nj_setup(card);
-	if (err)
-		goto error;
-	err = mISDN_register_device(&card->isac.dch.dev, &card->pdev->dev,
-				    card->name);
-	if (err)
-		goto error;
-	err = nj_init_card(card);
-	if (!err)  {
-		nj_cnt++;
-		pr_notice("Netjet %d cards installed\n", nj_cnt);
-		return 0;
-	}
-error:
-	nj_release(card);
-	return err;
-}
-
-static int
-nj_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-	int err = -ENOMEM;
-	int cfg;
-	struct tiger_hw *card;
-
-	if (pdev->subsystem_vendor == 0x8086 &&
-	    pdev->subsystem_device == 0x0003) {
-		pr_notice("Netjet: Digium X100P/X101P not handled\n");
-		return -ENODEV;
-	}
-
-	if (pdev->subsystem_vendor == 0x55 &&
-	    pdev->subsystem_device == 0x02) {
-		pr_notice("Netjet: Enter!Now not handled yet\n");
-		return -ENODEV;
-	}
-
-	if (pdev->subsystem_vendor == 0xb100 &&
-	    pdev->subsystem_device == 0x0003) {
-		pr_notice("Netjet: Digium TDM400P not handled yet\n");
-		return -ENODEV;
-	}
-
-	card = kzalloc_obj(struct tiger_hw);
-	if (!card) {
-		pr_info("No kmem for Netjet\n");
-		return err;
-	}
-
-	card->pdev = pdev;
-
-	err = pci_enable_device(pdev);
-	if (err) {
-		kfree(card);
-		return err;
-	}
-
-	printk(KERN_INFO "nj_probe(mISDN): found adapter at %s\n",
-	       pci_name(pdev));
-
-	pci_set_master(pdev);
-
-	/* the TJ300 and TJ320 must be detected, the IRQ handling is different
-	 * unfortunately the chips use the same device ID, but the TJ320 has
-	 * the bit20 in status PCI cfg register set
-	 */
-	pci_read_config_dword(pdev, 0x04, &cfg);
-	if (cfg & 0x00100000)
-		card->typ = NETJET_S_TJ320;
-	else
-		card->typ = NETJET_S_TJ300;
-
-	card->base = pci_resource_start(pdev, 0);
-	pci_set_drvdata(pdev, card);
-	err = setup_instance(card);
-	if (err)
-		pci_set_drvdata(pdev, NULL);
-
-	return err;
-}
-
-
-static void nj_remove(struct pci_dev *pdev)
-{
-	struct tiger_hw *card = pci_get_drvdata(pdev);
-
-	if (card)
-		nj_release(card);
-	else
-		pr_info("%s drvdata already removed\n", __func__);
-}
-
-/* We cannot select cards with PCI_SUB... IDs, since here are cards with
- * SUB IDs set to PCI_ANY_ID, so we need to match all and reject
- * known other cards which not work with this driver - see probe function */
-static const struct pci_device_id nj_pci_ids[] = {
-	{ PCI_VENDOR_ID_TIGERJET, PCI_DEVICE_ID_TIGERJET_300,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
-	{ }
-};
-MODULE_DEVICE_TABLE(pci, nj_pci_ids);
-
-static struct pci_driver nj_driver = {
-	.name = "netjet",
-	.probe = nj_probe,
-	.remove = nj_remove,
-	.id_table = nj_pci_ids,
-};
-
-static int __init nj_init(void)
-{
-	int err;
-
-	pr_notice("Netjet PCI driver Rev. %s\n", NETJET_REV);
-	err = pci_register_driver(&nj_driver);
-	return err;
-}
-
-static void __exit nj_cleanup(void)
-{
-	pci_unregister_driver(&nj_driver);
-}
-
-module_init(nj_init);
-module_exit(nj_cleanup);
diff --git a/drivers/isdn/hardware/mISDN/netjet.h b/drivers/isdn/hardware/mISDN/netjet.h
deleted file mode 100644
index b23ad9f6d4d0..000000000000
--- a/drivers/isdn/hardware/mISDN/netjet.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * NETjet common header file
- *
- * Author	Karsten Keil
- *              based on work of Matt Henderson and Daniel Potts,
- *              Traverse Technologies P/L www.traverse.com.au
- *
- * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
- */
-
-#define NJ_CTRL			0x00
-#define NJ_DMACTRL		0x01
-#define NJ_AUXCTRL		0x02
-#define NJ_AUXDATA		0x03
-#define NJ_IRQMASK0		0x04
-#define NJ_IRQMASK1		0x05
-#define NJ_IRQSTAT0		0x06
-#define NJ_IRQSTAT1		0x07
-#define NJ_DMA_READ_START	0x08
-#define NJ_DMA_READ_IRQ		0x0c
-#define NJ_DMA_READ_END		0x10
-#define NJ_DMA_READ_ADR		0x14
-#define NJ_DMA_WRITE_START	0x18
-#define NJ_DMA_WRITE_IRQ	0x1c
-#define NJ_DMA_WRITE_END	0x20
-#define NJ_DMA_WRITE_ADR	0x24
-#define NJ_PULSE_CNT		0x28
-
-#define NJ_ISAC_OFF		0xc0
-#define NJ_ISACIRQ		0x10
-
-#define NJ_IRQM0_RD_MASK	0x03
-#define NJ_IRQM0_RD_IRQ		0x01
-#define NJ_IRQM0_RD_END		0x02
-#define NJ_IRQM0_WR_MASK	0x0c
-#define NJ_IRQM0_WR_IRQ		0x04
-#define NJ_IRQM0_WR_END		0x08
-
-/* one page here is no need to be smaller */
-#define NJ_DMA_SIZE		4096
-/* 2 * 64 byte is a compromise between IRQ count and latency */
-#define NJ_DMA_RXSIZE		128  /* 2 * 64 */
-#define NJ_DMA_TXSIZE		128  /* 2 * 64 */
diff --git a/drivers/isdn/hardware/mISDN/speedfax.c b/drivers/isdn/hardware/mISDN/speedfax.c
deleted file mode 100644
index ab24c3c460e6..000000000000
--- a/drivers/isdn/hardware/mISDN/speedfax.c
+++ /dev/null
@@ -1,520 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * speedfax.c	low level stuff for Sedlbauer Speedfax+ cards
- *		based on the ISAR DSP
- *		Thanks to Sedlbauer AG for informations and HW
- *
- * Author       Karsten Keil <keil@isdn4linux.de>
- *
- * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
- */
-
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/mISDNhw.h>
-#include <linux/firmware.h>
-#include "ipac.h"
-#include "isar.h"
-
-#define SPEEDFAX_REV	"2.0"
-
-#define PCI_SUBVENDOR_SPEEDFAX_PYRAMID	0x51
-#define PCI_SUBVENDOR_SPEEDFAX_PCI	0x54
-#define PCI_SUB_ID_SEDLBAUER		0x01
-
-#define SFAX_PCI_ADDR		0xc8
-#define SFAX_PCI_ISAC		0xd0
-#define SFAX_PCI_ISAR		0xe0
-
-/* TIGER 100 Registers */
-
-#define TIGER_RESET_ADDR	0x00
-#define TIGER_EXTERN_RESET_ON	0x01
-#define TIGER_EXTERN_RESET_OFF	0x00
-#define TIGER_AUX_CTRL		0x02
-#define TIGER_AUX_DATA		0x03
-#define TIGER_AUX_IRQMASK	0x05
-#define TIGER_AUX_STATUS	0x07
-
-/* Tiger AUX BITs */
-#define SFAX_AUX_IOMASK		0xdd	/* 1 and 5 are inputs */
-#define SFAX_ISAR_RESET_BIT_OFF 0x00
-#define SFAX_ISAR_RESET_BIT_ON	0x01
-#define SFAX_TIGER_IRQ_BIT	0x02
-#define SFAX_LED1_BIT		0x08
-#define SFAX_LED2_BIT		0x10
-
-#define SFAX_PCI_RESET_ON	(SFAX_ISAR_RESET_BIT_ON)
-#define SFAX_PCI_RESET_OFF	(SFAX_LED1_BIT | SFAX_LED2_BIT)
-
-static int sfax_cnt;
-static u32 debug;
-static u32 irqloops = 4;
-
-struct sfax_hw {
-	struct list_head	list;
-	struct pci_dev		*pdev;
-	char			name[MISDN_MAX_IDLEN];
-	u32			irq;
-	u32			irqcnt;
-	u32			cfg;
-	struct _ioport		p_isac;
-	struct _ioport		p_isar;
-	u8			aux_data;
-	spinlock_t		lock;	/* HW access lock */
-	struct isac_hw		isac;
-	struct isar_hw		isar;
-};
-
-static LIST_HEAD(Cards);
-static DEFINE_RWLOCK(card_lock); /* protect Cards */
-
-static void
-_set_debug(struct sfax_hw *card)
-{
-	card->isac.dch.debug = debug;
-	card->isar.ch[0].bch.debug = debug;
-	card->isar.ch[1].bch.debug = debug;
-}
-
-static int
-set_debug(const char *val, const struct kernel_param *kp)
-{
-	int ret;
-	struct sfax_hw *card;
-
-	ret = param_set_uint(val, kp);
-	if (!ret) {
-		read_lock(&card_lock);
-		list_for_each_entry(card, &Cards, list)
-			_set_debug(card);
-		read_unlock(&card_lock);
-	}
-	return ret;
-}
-
-MODULE_AUTHOR("Karsten Keil");
-MODULE_DESCRIPTION("mISDN driver for Sedlbauer Speedfax+ cards");
-MODULE_LICENSE("GPL v2");
-MODULE_VERSION(SPEEDFAX_REV);
-MODULE_FIRMWARE("isdn/ISAR.BIN");
-module_param_call(debug, set_debug, param_get_uint, &debug, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(debug, "Speedfax debug mask");
-module_param(irqloops, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(irqloops, "Speedfax maximal irqloops (default 4)");
-
-IOFUNC_IND(ISAC, sfax_hw, p_isac)
-IOFUNC_IND(ISAR, sfax_hw, p_isar)
-
-static irqreturn_t
-speedfax_irq(int intno, void *dev_id)
-{
-	struct sfax_hw	*sf = dev_id;
-	u8 val;
-	int cnt = irqloops;
-
-	spin_lock(&sf->lock);
-	val = inb(sf->cfg + TIGER_AUX_STATUS);
-	if (val & SFAX_TIGER_IRQ_BIT) { /* for us or shared ? */
-		spin_unlock(&sf->lock);
-		return IRQ_NONE; /* shared */
-	}
-	sf->irqcnt++;
-	val = ReadISAR_IND(sf, ISAR_IRQBIT);
-Start_ISAR:
-	if (val & ISAR_IRQSTA)
-		mISDNisar_irq(&sf->isar);
-	val = ReadISAC_IND(sf, ISAC_ISTA);
-	if (val)
-		mISDNisac_irq(&sf->isac, val);
-	val = ReadISAR_IND(sf, ISAR_IRQBIT);
-	if ((val & ISAR_IRQSTA) && cnt--)
-		goto Start_ISAR;
-	if (cnt < irqloops)
-		pr_debug("%s: %d irqloops cpu%d\n", sf->name,
-			 irqloops - cnt, smp_processor_id());
-	if (irqloops && !cnt)
-		pr_notice("%s: %d IRQ LOOP cpu%d\n", sf->name,
-			  irqloops, smp_processor_id());
-	spin_unlock(&sf->lock);
-	return IRQ_HANDLED;
-}
-
-static void
-enable_hwirq(struct sfax_hw *sf)
-{
-	WriteISAC_IND(sf, ISAC_MASK, 0);
-	WriteISAR_IND(sf, ISAR_IRQBIT, ISAR_IRQMSK);
-	outb(SFAX_TIGER_IRQ_BIT, sf->cfg + TIGER_AUX_IRQMASK);
-}
-
-static void
-disable_hwirq(struct sfax_hw *sf)
-{
-	WriteISAC_IND(sf, ISAC_MASK, 0xFF);
-	WriteISAR_IND(sf, ISAR_IRQBIT, 0);
-	outb(0, sf->cfg + TIGER_AUX_IRQMASK);
-}
-
-static void
-reset_speedfax(struct sfax_hw *sf)
-{
-
-	pr_debug("%s: resetting card\n", sf->name);
-	outb(TIGER_EXTERN_RESET_ON, sf->cfg + TIGER_RESET_ADDR);
-	outb(SFAX_PCI_RESET_ON, sf->cfg + TIGER_AUX_DATA);
-	mdelay(1);
-	outb(TIGER_EXTERN_RESET_OFF, sf->cfg + TIGER_RESET_ADDR);
-	sf->aux_data = SFAX_PCI_RESET_OFF;
-	outb(sf->aux_data, sf->cfg + TIGER_AUX_DATA);
-	mdelay(1);
-}
-
-static int
-sfax_ctrl(struct sfax_hw  *sf, u32 cmd, u_long arg)
-{
-	int ret = 0;
-
-	switch (cmd) {
-	case HW_RESET_REQ:
-		reset_speedfax(sf);
-		break;
-	case HW_ACTIVATE_IND:
-		if (arg & 1)
-			sf->aux_data &= ~SFAX_LED1_BIT;
-		if (arg & 2)
-			sf->aux_data &= ~SFAX_LED2_BIT;
-		outb(sf->aux_data, sf->cfg + TIGER_AUX_DATA);
-		break;
-	case HW_DEACT_IND:
-		if (arg & 1)
-			sf->aux_data |= SFAX_LED1_BIT;
-		if (arg & 2)
-			sf->aux_data |= SFAX_LED2_BIT;
-		outb(sf->aux_data, sf->cfg + TIGER_AUX_DATA);
-		break;
-	default:
-		pr_info("%s: %s unknown command %x %lx\n",
-			sf->name, __func__, cmd, arg);
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static int
-channel_ctrl(struct sfax_hw  *sf, struct mISDN_ctrl_req *cq)
-{
-	int	ret = 0;
-
-	switch (cq->op) {
-	case MISDN_CTRL_GETOP:
-		cq->op = MISDN_CTRL_LOOP | MISDN_CTRL_L1_TIMER3;
-		break;
-	case MISDN_CTRL_LOOP:
-		/* cq->channel: 0 disable, 1 B1 loop 2 B2 loop, 3 both */
-		if (cq->channel < 0 || cq->channel > 3) {
-			ret = -EINVAL;
-			break;
-		}
-		ret = sf->isac.ctrl(&sf->isac, HW_TESTLOOP, cq->channel);
-		break;
-	case MISDN_CTRL_L1_TIMER3:
-		ret = sf->isac.ctrl(&sf->isac, HW_TIMER3_VALUE, cq->p1);
-		break;
-	default:
-		pr_info("%s: unknown Op %x\n", sf->name, cq->op);
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static int
-sfax_dctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
-{
-	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
-	struct sfax_hw		*sf = dch->hw;
-	struct channel_req	*rq;
-	int			err = 0;
-
-	pr_debug("%s: cmd:%x %p\n", sf->name, cmd, arg);
-	switch (cmd) {
-	case OPEN_CHANNEL:
-		rq = arg;
-		if (rq->protocol == ISDN_P_TE_S0)
-			err = sf->isac.open(&sf->isac, rq);
-		else
-			err = sf->isar.open(&sf->isar, rq);
-		if (err)
-			break;
-		if (!try_module_get(THIS_MODULE))
-			pr_info("%s: cannot get module\n", sf->name);
-		break;
-	case CLOSE_CHANNEL:
-		pr_debug("%s: dev(%d) close from %p\n", sf->name,
-			 dch->dev.id, __builtin_return_address(0));
-		module_put(THIS_MODULE);
-		break;
-	case CONTROL_CHANNEL:
-		err = channel_ctrl(sf, arg);
-		break;
-	default:
-		pr_debug("%s: unknown command %x\n", sf->name, cmd);
-		return -EINVAL;
-	}
-	return err;
-}
-
-static int
-init_card(struct sfax_hw *sf)
-{
-	int	ret, cnt = 3;
-	u_long	flags;
-
-	ret = request_irq(sf->irq, speedfax_irq, IRQF_SHARED, sf->name, sf);
-	if (ret) {
-		pr_info("%s: couldn't get interrupt %d\n", sf->name, sf->irq);
-		return ret;
-	}
-	while (cnt--) {
-		spin_lock_irqsave(&sf->lock, flags);
-		ret = sf->isac.init(&sf->isac);
-		if (ret) {
-			spin_unlock_irqrestore(&sf->lock, flags);
-			pr_info("%s: ISAC init failed with %d\n",
-				sf->name, ret);
-			break;
-		}
-		enable_hwirq(sf);
-		/* RESET Receiver and Transmitter */
-		WriteISAC_IND(sf, ISAC_CMDR, 0x41);
-		spin_unlock_irqrestore(&sf->lock, flags);
-		msleep_interruptible(10);
-		if (debug & DEBUG_HW)
-			pr_notice("%s: IRQ %d count %d\n", sf->name,
-				  sf->irq, sf->irqcnt);
-		if (!sf->irqcnt) {
-			pr_info("%s: IRQ(%d) got no requests during init %d\n",
-				sf->name, sf->irq, 3 - cnt);
-		} else
-			return 0;
-	}
-	free_irq(sf->irq, sf);
-	return -EIO;
-}
-
-
-static int
-setup_speedfax(struct sfax_hw *sf)
-{
-	u_long flags;
-
-	if (!request_region(sf->cfg, 256, sf->name)) {
-		pr_info("mISDN: %s config port %x-%x already in use\n",
-			sf->name, sf->cfg, sf->cfg + 255);
-		return -EIO;
-	}
-	outb(0xff, sf->cfg);
-	outb(0, sf->cfg);
-	outb(0xdd, sf->cfg + TIGER_AUX_CTRL);
-	outb(0, sf->cfg + TIGER_AUX_IRQMASK);
-
-	sf->isac.type = IPAC_TYPE_ISAC;
-	sf->p_isac.ale = sf->cfg + SFAX_PCI_ADDR;
-	sf->p_isac.port = sf->cfg + SFAX_PCI_ISAC;
-	sf->p_isar.ale = sf->cfg + SFAX_PCI_ADDR;
-	sf->p_isar.port = sf->cfg + SFAX_PCI_ISAR;
-	ASSIGN_FUNC(IND, ISAC, sf->isac);
-	ASSIGN_FUNC(IND, ISAR, sf->isar);
-	spin_lock_irqsave(&sf->lock, flags);
-	reset_speedfax(sf);
-	disable_hwirq(sf);
-	spin_unlock_irqrestore(&sf->lock, flags);
-	return 0;
-}
-
-static void
-release_card(struct sfax_hw *card) {
-	u_long	flags;
-
-	spin_lock_irqsave(&card->lock, flags);
-	disable_hwirq(card);
-	spin_unlock_irqrestore(&card->lock, flags);
-	card->isac.release(&card->isac);
-	free_irq(card->irq, card);
-	card->isar.release(&card->isar);
-	mISDN_unregister_device(&card->isac.dch.dev);
-	release_region(card->cfg, 256);
-	pci_disable_device(card->pdev);
-	pci_set_drvdata(card->pdev, NULL);
-	write_lock_irqsave(&card_lock, flags);
-	list_del(&card->list);
-	write_unlock_irqrestore(&card_lock, flags);
-	kfree(card);
-	sfax_cnt--;
-}
-
-static int
-setup_instance(struct sfax_hw *card)
-{
-	const struct firmware *firmware;
-	int i, err;
-	u_long flags;
-
-	snprintf(card->name, MISDN_MAX_IDLEN - 1, "Speedfax.%d", sfax_cnt + 1);
-	write_lock_irqsave(&card_lock, flags);
-	list_add_tail(&card->list, &Cards);
-	write_unlock_irqrestore(&card_lock, flags);
-	_set_debug(card);
-	spin_lock_init(&card->lock);
-	card->isac.hwlock = &card->lock;
-	card->isar.hwlock = &card->lock;
-	card->isar.ctrl = (void *)&sfax_ctrl;
-	card->isac.name = card->name;
-	card->isar.name = card->name;
-	card->isar.owner = THIS_MODULE;
-
-	err = request_firmware(&firmware, "isdn/ISAR.BIN", &card->pdev->dev);
-	if (err < 0) {
-		pr_info("%s: firmware request failed %d\n",
-			card->name, err);
-		goto error_fw;
-	}
-	if (debug & DEBUG_HW)
-		pr_notice("%s: got firmware %zu bytes\n",
-			  card->name, firmware->size);
-
-	mISDNisac_init(&card->isac, card);
-
-	card->isac.dch.dev.D.ctrl = sfax_dctrl;
-	card->isac.dch.dev.Bprotocols =
-		mISDNisar_init(&card->isar, card);
-	for (i = 0; i < 2; i++) {
-		set_channelmap(i + 1, card->isac.dch.dev.channelmap);
-		list_add(&card->isar.ch[i].bch.ch.list,
-			 &card->isac.dch.dev.bchannels);
-	}
-
-	err = setup_speedfax(card);
-	if (err)
-		goto error_setup;
-	err = card->isar.init(&card->isar);
-	if (err)
-		goto error;
-	err = mISDN_register_device(&card->isac.dch.dev,
-				    &card->pdev->dev, card->name);
-	if (err)
-		goto error;
-	err = init_card(card);
-	if (err)
-		goto error_init;
-	err = card->isar.firmware(&card->isar, firmware->data, firmware->size);
-	if (!err)  {
-		release_firmware(firmware);
-		sfax_cnt++;
-		pr_notice("SpeedFax %d cards installed\n", sfax_cnt);
-		return 0;
-	}
-	disable_hwirq(card);
-	free_irq(card->irq, card);
-error_init:
-	mISDN_unregister_device(&card->isac.dch.dev);
-error:
-	release_region(card->cfg, 256);
-error_setup:
-	card->isac.release(&card->isac);
-	card->isar.release(&card->isar);
-	release_firmware(firmware);
-error_fw:
-	pci_disable_device(card->pdev);
-	write_lock_irqsave(&card_lock, flags);
-	list_del(&card->list);
-	write_unlock_irqrestore(&card_lock, flags);
-	kfree(card);
-	return err;
-}
-
-static int
-sfaxpci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-	int err = -ENOMEM;
-	struct sfax_hw *card = kzalloc_obj(struct sfax_hw);
-
-	if (!card) {
-		pr_info("No memory for Speedfax+ PCI\n");
-		return err;
-	}
-	card->pdev = pdev;
-	err = pci_enable_device(pdev);
-	if (err) {
-		kfree(card);
-		return err;
-	}
-
-	pr_notice("mISDN: Speedfax found adapter %s at %s\n",
-		  (char *)ent->driver_data, pci_name(pdev));
-
-	card->cfg = pci_resource_start(pdev, 0);
-	card->irq = pdev->irq;
-	pci_set_drvdata(pdev, card);
-	err = setup_instance(card);
-	if (err)
-		pci_set_drvdata(pdev, NULL);
-	return err;
-}
-
-static void
-sfax_remove_pci(struct pci_dev *pdev)
-{
-	struct sfax_hw	*card = pci_get_drvdata(pdev);
-
-	if (card)
-		release_card(card);
-	else
-		pr_debug("%s: drvdata already removed\n", __func__);
-}
-
-static struct pci_device_id sfaxpci_ids[] = {
-	{ PCI_VENDOR_ID_TIGERJET, PCI_DEVICE_ID_TIGERJET_100,
-	  PCI_SUBVENDOR_SPEEDFAX_PYRAMID, PCI_SUB_ID_SEDLBAUER,
-	  0, 0, (unsigned long) "Pyramid Speedfax + PCI"
-	},
-	{ PCI_VENDOR_ID_TIGERJET, PCI_DEVICE_ID_TIGERJET_100,
-	  PCI_SUBVENDOR_SPEEDFAX_PCI, PCI_SUB_ID_SEDLBAUER,
-	  0, 0, (unsigned long) "Sedlbauer Speedfax + PCI"
-	},
-	{ }
-};
-MODULE_DEVICE_TABLE(pci, sfaxpci_ids);
-
-static struct pci_driver sfaxpci_driver = {
-	.name = "speedfax+ pci",
-	.probe = sfaxpci_probe,
-	.remove = sfax_remove_pci,
-	.id_table = sfaxpci_ids,
-};
-
-static int __init
-Speedfax_init(void)
-{
-	int err;
-
-	pr_notice("Sedlbauer Speedfax+ Driver Rev. %s\n",
-		  SPEEDFAX_REV);
-	err = pci_register_driver(&sfaxpci_driver);
-	return err;
-}
-
-static void __exit
-Speedfax_cleanup(void)
-{
-	pci_unregister_driver(&sfaxpci_driver);
-}
-
-module_init(Speedfax_init);
-module_exit(Speedfax_cleanup);
diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c
deleted file mode 100644
index a341470c042f..000000000000
--- a/drivers/isdn/hardware/mISDN/w6692.c
+++ /dev/null
@@ -1,1417 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * w6692.c     mISDN driver for Winbond w6692 based cards
- *
- * Author      Karsten Keil <kkeil@suse.de>
- *             based on the w6692 I4L driver from Petr Novak <petr.novak@i.cz>
- *
- * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
- */
-
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/mISDNhw.h>
-#include <linux/slab.h>
-#include "w6692.h"
-
-#define W6692_REV	"2.0"
-
-#define DBUSY_TIMER_VALUE	80
-
-enum {
-	W6692_ASUS,
-	W6692_WINBOND,
-	W6692_USR
-};
-
-/* private data in the PCI devices list */
-struct w6692map {
-	u_int	subtype;
-	char	*name;
-};
-
-static const struct w6692map  w6692_map[] =
-{
-	{W6692_ASUS, "Dynalink/AsusCom IS64PH"},
-	{W6692_WINBOND, "Winbond W6692"},
-	{W6692_USR, "USR W6692"}
-};
-
-#define PCI_DEVICE_ID_USR_6692	0x3409
-
-struct w6692_ch {
-	struct bchannel		bch;
-	u32			addr;
-	struct timer_list	timer;
-	u8			b_mode;
-};
-
-struct w6692_hw {
-	struct list_head	list;
-	struct pci_dev		*pdev;
-	char			name[MISDN_MAX_IDLEN];
-	u32			irq;
-	u32			irqcnt;
-	u32			addr;
-	u32			fmask;	/* feature mask - bit set per card nr */
-	int			subtype;
-	spinlock_t		lock;	/* hw lock */
-	u8			imask;
-	u8			pctl;
-	u8			xaddr;
-	u8			xdata;
-	u8			state;
-	struct w6692_ch		bc[2];
-	struct dchannel		dch;
-	char			log[64];
-};
-
-static LIST_HEAD(Cards);
-static DEFINE_RWLOCK(card_lock); /* protect Cards */
-
-static int w6692_cnt;
-static int debug;
-static u32 led;
-static u32 pots;
-
-static void
-_set_debug(struct w6692_hw *card)
-{
-	card->dch.debug = debug;
-	card->bc[0].bch.debug = debug;
-	card->bc[1].bch.debug = debug;
-}
-
-static int
-set_debug(const char *val, const struct kernel_param *kp)
-{
-	int ret;
-	struct w6692_hw *card;
-
-	ret = param_set_uint(val, kp);
-	if (!ret) {
-		read_lock(&card_lock);
-		list_for_each_entry(card, &Cards, list)
-			_set_debug(card);
-		read_unlock(&card_lock);
-	}
-	return ret;
-}
-
-MODULE_AUTHOR("Karsten Keil");
-MODULE_DESCRIPTION("mISDN driver for Winbond w6692 based cards");
-MODULE_LICENSE("GPL v2");
-MODULE_VERSION(W6692_REV);
-module_param_call(debug, set_debug, param_get_uint, &debug, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(debug, "W6692 debug mask");
-module_param(led, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(led, "W6692 LED support bitmask (one bit per card)");
-module_param(pots, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(pots, "W6692 POTS support bitmask (one bit per card)");
-
-static inline u8
-ReadW6692(struct w6692_hw *card, u8 offset)
-{
-	return inb(card->addr + offset);
-}
-
-static inline void
-WriteW6692(struct w6692_hw *card, u8 offset, u8 value)
-{
-	outb(value, card->addr + offset);
-}
-
-static inline u8
-ReadW6692B(struct w6692_ch *bc, u8 offset)
-{
-	return inb(bc->addr + offset);
-}
-
-static inline void
-WriteW6692B(struct w6692_ch *bc, u8 offset, u8 value)
-{
-	outb(value, bc->addr + offset);
-}
-
-static void
-enable_hwirq(struct w6692_hw *card)
-{
-	WriteW6692(card, W_IMASK, card->imask);
-}
-
-static void
-disable_hwirq(struct w6692_hw *card)
-{
-	WriteW6692(card, W_IMASK, 0xff);
-}
-
-static const char *W6692Ver[] = {"V00", "V01", "V10", "V11"};
-
-static void
-W6692Version(struct w6692_hw *card)
-{
-	int val;
-
-	val = ReadW6692(card, W_D_RBCH);
-	pr_notice("%s: Winbond W6692 version: %s\n", card->name,
-		  W6692Ver[(val >> 6) & 3]);
-}
-
-static void
-w6692_led_handler(struct w6692_hw *card, int on)
-{
-	if ((!(card->fmask & led)) || card->subtype == W6692_USR)
-		return;
-	if (on) {
-		card->xdata &= 0xfb;	/*  LED ON */
-		WriteW6692(card, W_XDATA, card->xdata);
-	} else {
-		card->xdata |= 0x04;	/*  LED OFF */
-		WriteW6692(card, W_XDATA, card->xdata);
-	}
-}
-
-static void
-ph_command(struct w6692_hw *card, u8 cmd)
-{
-	pr_debug("%s: ph_command %x\n", card->name, cmd);
-	WriteW6692(card, W_CIX, cmd);
-}
-
-static void
-W6692_new_ph(struct w6692_hw *card)
-{
-	if (card->state == W_L1CMD_RST)
-		ph_command(card, W_L1CMD_DRC);
-	schedule_event(&card->dch, FLG_PHCHANGE);
-}
-
-static void
-W6692_ph_bh(struct dchannel *dch)
-{
-	struct w6692_hw *card = dch->hw;
-
-	switch (card->state) {
-	case W_L1CMD_RST:
-		dch->state = 0;
-		l1_event(dch->l1, HW_RESET_IND);
-		break;
-	case W_L1IND_CD:
-		dch->state = 3;
-		l1_event(dch->l1, HW_DEACT_CNF);
-		break;
-	case W_L1IND_DRD:
-		dch->state = 3;
-		l1_event(dch->l1, HW_DEACT_IND);
-		break;
-	case W_L1IND_CE:
-		dch->state = 4;
-		l1_event(dch->l1, HW_POWERUP_IND);
-		break;
-	case W_L1IND_LD:
-		if (dch->state <= 5) {
-			dch->state = 5;
-			l1_event(dch->l1, ANYSIGNAL);
-		} else {
-			dch->state = 8;
-			l1_event(dch->l1, LOSTFRAMING);
-		}
-		break;
-	case W_L1IND_ARD:
-		dch->state = 6;
-		l1_event(dch->l1, INFO2);
-		break;
-	case W_L1IND_AI8:
-		dch->state = 7;
-		l1_event(dch->l1, INFO4_P8);
-		break;
-	case W_L1IND_AI10:
-		dch->state = 7;
-		l1_event(dch->l1, INFO4_P10);
-		break;
-	default:
-		pr_debug("%s: TE unknown state %02x dch state %02x\n",
-			 card->name, card->state, dch->state);
-		break;
-	}
-	pr_debug("%s: TE newstate %02x\n", card->name, dch->state);
-}
-
-static void
-W6692_empty_Dfifo(struct w6692_hw *card, int count)
-{
-	struct dchannel *dch = &card->dch;
-	u8 *ptr;
-
-	pr_debug("%s: empty_Dfifo %d\n", card->name, count);
-	if (!dch->rx_skb) {
-		dch->rx_skb = mI_alloc_skb(card->dch.maxlen, GFP_ATOMIC);
-		if (!dch->rx_skb) {
-			pr_info("%s: D receive out of memory\n", card->name);
-			WriteW6692(card, W_D_CMDR, W_D_CMDR_RACK);
-			return;
-		}
-	}
-	if ((dch->rx_skb->len + count) >= dch->maxlen) {
-		pr_debug("%s: empty_Dfifo overrun %d\n", card->name,
-			 dch->rx_skb->len + count);
-		WriteW6692(card, W_D_CMDR, W_D_CMDR_RACK);
-		return;
-	}
-	ptr = skb_put(dch->rx_skb, count);
-	insb(card->addr + W_D_RFIFO, ptr, count);
-	WriteW6692(card, W_D_CMDR, W_D_CMDR_RACK);
-	if (debug & DEBUG_HW_DFIFO) {
-		snprintf(card->log, 63, "D-recv %s %d ",
-			 card->name, count);
-		print_hex_dump_bytes(card->log, DUMP_PREFIX_OFFSET, ptr, count);
-	}
-}
-
-static void
-W6692_fill_Dfifo(struct w6692_hw *card)
-{
-	struct dchannel *dch = &card->dch;
-	int count;
-	u8 *ptr;
-	u8 cmd = W_D_CMDR_XMS;
-
-	pr_debug("%s: fill_Dfifo\n", card->name);
-	if (!dch->tx_skb)
-		return;
-	count = dch->tx_skb->len - dch->tx_idx;
-	if (count <= 0)
-		return;
-	if (count > W_D_FIFO_THRESH)
-		count = W_D_FIFO_THRESH;
-	else
-		cmd |= W_D_CMDR_XME;
-	ptr = dch->tx_skb->data + dch->tx_idx;
-	dch->tx_idx += count;
-	outsb(card->addr + W_D_XFIFO, ptr, count);
-	WriteW6692(card, W_D_CMDR, cmd);
-	if (test_and_set_bit(FLG_BUSY_TIMER, &dch->Flags)) {
-		pr_debug("%s: fill_Dfifo dbusytimer running\n", card->name);
-		timer_delete(&dch->timer);
-	}
-	dch->timer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ) / 1000);
-	add_timer(&dch->timer);
-	if (debug & DEBUG_HW_DFIFO) {
-		snprintf(card->log, 63, "D-send %s %d ",
-			 card->name, count);
-		print_hex_dump_bytes(card->log, DUMP_PREFIX_OFFSET, ptr, count);
-	}
-}
-
-static void
-d_retransmit(struct w6692_hw *card)
-{
-	struct dchannel *dch = &card->dch;
-
-	if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags))
-		timer_delete(&dch->timer);
-#ifdef FIXME
-	if (test_and_clear_bit(FLG_L1_BUSY, &dch->Flags))
-		dchannel_sched_event(dch, D_CLEARBUSY);
-#endif
-	if (test_bit(FLG_TX_BUSY, &dch->Flags)) {
-		/* Restart frame */
-		dch->tx_idx = 0;
-		W6692_fill_Dfifo(card);
-	} else if (dch->tx_skb) { /* should not happen */
-		pr_info("%s: %s without TX_BUSY\n", card->name, __func__);
-		test_and_set_bit(FLG_TX_BUSY, &dch->Flags);
-		dch->tx_idx = 0;
-		W6692_fill_Dfifo(card);
-	} else {
-		pr_info("%s: XDU no TX_BUSY\n", card->name);
-		if (get_next_dframe(dch))
-			W6692_fill_Dfifo(card);
-	}
-}
-
-static void
-handle_rxD(struct w6692_hw *card) {
-	u8	stat;
-	int	count;
-
-	stat = ReadW6692(card, W_D_RSTA);
-	if (stat & (W_D_RSTA_RDOV | W_D_RSTA_CRCE | W_D_RSTA_RMB)) {
-		if (stat & W_D_RSTA_RDOV) {
-			pr_debug("%s: D-channel RDOV\n", card->name);
-#ifdef ERROR_STATISTIC
-			card->dch.err_rx++;
-#endif
-		}
-		if (stat & W_D_RSTA_CRCE) {
-			pr_debug("%s: D-channel CRC error\n", card->name);
-#ifdef ERROR_STATISTIC
-			card->dch.err_crc++;
-#endif
-		}
-		if (stat & W_D_RSTA_RMB) {
-			pr_debug("%s: D-channel ABORT\n", card->name);
-#ifdef ERROR_STATISTIC
-			card->dch.err_rx++;
-#endif
-		}
-		dev_kfree_skb(card->dch.rx_skb);
-		card->dch.rx_skb = NULL;
-		WriteW6692(card, W_D_CMDR, W_D_CMDR_RACK | W_D_CMDR_RRST);
-	} else {
-		count = ReadW6692(card, W_D_RBCL) & (W_D_FIFO_THRESH - 1);
-		if (count == 0)
-			count = W_D_FIFO_THRESH;
-		W6692_empty_Dfifo(card, count);
-		recv_Dchannel(&card->dch);
-	}
-}
-
-static void
-handle_txD(struct w6692_hw *card) {
-	if (test_and_clear_bit(FLG_BUSY_TIMER, &card->dch.Flags))
-		timer_delete(&card->dch.timer);
-	if (card->dch.tx_skb && card->dch.tx_idx < card->dch.tx_skb->len) {
-		W6692_fill_Dfifo(card);
-	} else {
-		dev_kfree_skb(card->dch.tx_skb);
-		if (get_next_dframe(&card->dch))
-			W6692_fill_Dfifo(card);
-	}
-}
-
-static void
-handle_statusD(struct w6692_hw *card)
-{
-	struct dchannel *dch = &card->dch;
-	u8 exval, v1, cir;
-
-	exval = ReadW6692(card, W_D_EXIR);
-
-	pr_debug("%s: D_EXIR %02x\n", card->name, exval);
-	if (exval & (W_D_EXI_XDUN | W_D_EXI_XCOL)) {
-		/* Transmit underrun/collision */
-		pr_debug("%s: D-channel underrun/collision\n", card->name);
-#ifdef ERROR_STATISTIC
-		dch->err_tx++;
-#endif
-		d_retransmit(card);
-	}
-	if (exval & W_D_EXI_RDOV) {	/* RDOV */
-		pr_debug("%s: D-channel RDOV\n", card->name);
-		WriteW6692(card, W_D_CMDR, W_D_CMDR_RRST);
-	}
-	if (exval & W_D_EXI_TIN2)	/* TIN2 - never */
-		pr_debug("%s: spurious TIN2 interrupt\n", card->name);
-	if (exval & W_D_EXI_MOC) {	/* MOC - not supported */
-		v1 = ReadW6692(card, W_MOSR);
-		pr_debug("%s: spurious MOC interrupt MOSR %02x\n",
-			 card->name, v1);
-	}
-	if (exval & W_D_EXI_ISC) {	/* ISC - Level1 change */
-		cir = ReadW6692(card, W_CIR);
-		pr_debug("%s: ISC CIR %02X\n", card->name, cir);
-		if (cir & W_CIR_ICC) {
-			v1 = cir & W_CIR_COD_MASK;
-			pr_debug("%s: ph_state_change %x -> %x\n", card->name,
-				 dch->state, v1);
-			card->state = v1;
-			if (card->fmask & led) {
-				switch (v1) {
-				case W_L1IND_AI8:
-				case W_L1IND_AI10:
-					w6692_led_handler(card, 1);
-					break;
-				default:
-					w6692_led_handler(card, 0);
-					break;
-				}
-			}
-			W6692_new_ph(card);
-		}
-		if (cir & W_CIR_SCC) {
-			v1 = ReadW6692(card, W_SQR);
-			pr_debug("%s: SCC SQR %02X\n", card->name, v1);
-		}
-	}
-	if (exval & W_D_EXI_WEXP)
-		pr_debug("%s: spurious WEXP interrupt!\n", card->name);
-	if (exval & W_D_EXI_TEXP)
-		pr_debug("%s: spurious TEXP interrupt!\n", card->name);
-}
-
-static void
-W6692_empty_Bfifo(struct w6692_ch *wch, int count)
-{
-	struct w6692_hw *card = wch->bch.hw;
-	u8 *ptr;
-	int maxlen;
-
-	pr_debug("%s: empty_Bfifo %d\n", card->name, count);
-	if (unlikely(wch->bch.state == ISDN_P_NONE)) {
-		pr_debug("%s: empty_Bfifo ISDN_P_NONE\n", card->name);
-		WriteW6692B(wch, W_B_CMDR, W_B_CMDR_RACK | W_B_CMDR_RACT);
-		if (wch->bch.rx_skb)
-			skb_trim(wch->bch.rx_skb, 0);
-		return;
-	}
-	if (test_bit(FLG_RX_OFF, &wch->bch.Flags)) {
-		wch->bch.dropcnt += count;
-		WriteW6692B(wch, W_B_CMDR, W_B_CMDR_RACK | W_B_CMDR_RACT);
-		return;
-	}
-	maxlen = bchannel_get_rxbuf(&wch->bch, count);
-	if (maxlen < 0) {
-		WriteW6692B(wch, W_B_CMDR, W_B_CMDR_RACK | W_B_CMDR_RACT);
-		if (wch->bch.rx_skb)
-			skb_trim(wch->bch.rx_skb, 0);
-		pr_warn("%s.B%d: No bufferspace for %d bytes\n",
-			card->name, wch->bch.nr, count);
-		return;
-	}
-	ptr = skb_put(wch->bch.rx_skb, count);
-	insb(wch->addr + W_B_RFIFO, ptr, count);
-	WriteW6692B(wch, W_B_CMDR, W_B_CMDR_RACK | W_B_CMDR_RACT);
-	if (debug & DEBUG_HW_DFIFO) {
-		snprintf(card->log, 63, "B%1d-recv %s %d ",
-			 wch->bch.nr, card->name, count);
-		print_hex_dump_bytes(card->log, DUMP_PREFIX_OFFSET, ptr, count);
-	}
-}
-
-static void
-W6692_fill_Bfifo(struct w6692_ch *wch)
-{
-	struct w6692_hw *card = wch->bch.hw;
-	int count, fillempty = 0;
-	u8 *ptr, cmd = W_B_CMDR_RACT | W_B_CMDR_XMS;
-
-	pr_debug("%s: fill Bfifo\n", card->name);
-	if (!wch->bch.tx_skb) {
-		if (!test_bit(FLG_TX_EMPTY, &wch->bch.Flags))
-			return;
-		ptr = wch->bch.fill;
-		count = W_B_FIFO_THRESH;
-		fillempty = 1;
-	} else {
-		count = wch->bch.tx_skb->len - wch->bch.tx_idx;
-		if (count <= 0)
-			return;
-		ptr = wch->bch.tx_skb->data + wch->bch.tx_idx;
-	}
-	if (count > W_B_FIFO_THRESH)
-		count = W_B_FIFO_THRESH;
-	else if (test_bit(FLG_HDLC, &wch->bch.Flags))
-		cmd |= W_B_CMDR_XME;
-
-	pr_debug("%s: fill Bfifo%d/%d\n", card->name,
-		 count, wch->bch.tx_idx);
-	wch->bch.tx_idx += count;
-	if (fillempty) {
-		while (count > 0) {
-			outsb(wch->addr + W_B_XFIFO, ptr, MISDN_BCH_FILL_SIZE);
-			count -= MISDN_BCH_FILL_SIZE;
-		}
-	} else {
-		outsb(wch->addr + W_B_XFIFO, ptr, count);
-	}
-	WriteW6692B(wch, W_B_CMDR, cmd);
-	if ((debug & DEBUG_HW_BFIFO) && !fillempty) {
-		snprintf(card->log, 63, "B%1d-send %s %d ",
-			 wch->bch.nr, card->name, count);
-		print_hex_dump_bytes(card->log, DUMP_PREFIX_OFFSET, ptr, count);
-	}
-}
-
-#if 0
-static int
-setvolume(struct w6692_ch *wch, int mic, struct sk_buff *skb)
-{
-	struct w6692_hw *card = wch->bch.hw;
-	u16 *vol = (u16 *)skb->data;
-	u8 val;
-
-	if ((!(card->fmask & pots)) ||
-	    !test_bit(FLG_TRANSPARENT, &wch->bch.Flags))
-		return -ENODEV;
-	if (skb->len < 2)
-		return -EINVAL;
-	if (*vol > 7)
-		return -EINVAL;
-	val = *vol & 7;
-	val = 7 - val;
-	if (mic) {
-		val <<= 3;
-		card->xaddr &= 0xc7;
-	} else {
-		card->xaddr &= 0xf8;
-	}
-	card->xaddr |= val;
-	WriteW6692(card, W_XADDR, card->xaddr);
-	return 0;
-}
-
-static int
-enable_pots(struct w6692_ch *wch)
-{
-	struct w6692_hw *card = wch->bch.hw;
-
-	if ((!(card->fmask & pots)) ||
-	    !test_bit(FLG_TRANSPARENT, &wch->bch.Flags))
-		return -ENODEV;
-	wch->b_mode |= W_B_MODE_EPCM | W_B_MODE_BSW0;
-	WriteW6692B(wch, W_B_MODE, wch->b_mode);
-	WriteW6692B(wch, W_B_CMDR, W_B_CMDR_RRST | W_B_CMDR_XRST);
-	card->pctl |= ((wch->bch.nr & 2) ? W_PCTL_PCX : 0);
-	WriteW6692(card, W_PCTL, card->pctl);
-	return 0;
-}
-#endif
-
-static int
-disable_pots(struct w6692_ch *wch)
-{
-	struct w6692_hw *card = wch->bch.hw;
-
-	if (!(card->fmask & pots))
-		return -ENODEV;
-	wch->b_mode &= ~(W_B_MODE_EPCM | W_B_MODE_BSW0);
-	WriteW6692B(wch, W_B_MODE, wch->b_mode);
-	WriteW6692B(wch, W_B_CMDR, W_B_CMDR_RRST | W_B_CMDR_RACT |
-		    W_B_CMDR_XRST);
-	return 0;
-}
-
-static int
-w6692_mode(struct w6692_ch *wch, u32 pr)
-{
-	struct w6692_hw	*card;
-
-	card = wch->bch.hw;
-	pr_debug("%s: B%d protocol %x-->%x\n", card->name,
-		 wch->bch.nr, wch->bch.state, pr);
-	switch (pr) {
-	case ISDN_P_NONE:
-		if ((card->fmask & pots) && (wch->b_mode & W_B_MODE_EPCM))
-			disable_pots(wch);
-		wch->b_mode = 0;
-		mISDN_clear_bchannel(&wch->bch);
-		WriteW6692B(wch, W_B_MODE, wch->b_mode);
-		WriteW6692B(wch, W_B_CMDR, W_B_CMDR_RRST | W_B_CMDR_XRST);
-		test_and_clear_bit(FLG_HDLC, &wch->bch.Flags);
-		test_and_clear_bit(FLG_TRANSPARENT, &wch->bch.Flags);
-		break;
-	case ISDN_P_B_RAW:
-		wch->b_mode = W_B_MODE_MMS;
-		WriteW6692B(wch, W_B_MODE, wch->b_mode);
-		WriteW6692B(wch, W_B_EXIM, 0);
-		WriteW6692B(wch, W_B_CMDR, W_B_CMDR_RRST | W_B_CMDR_RACT |
-			    W_B_CMDR_XRST);
-		test_and_set_bit(FLG_TRANSPARENT, &wch->bch.Flags);
-		break;
-	case ISDN_P_B_HDLC:
-		wch->b_mode = W_B_MODE_ITF;
-		WriteW6692B(wch, W_B_MODE, wch->b_mode);
-		WriteW6692B(wch, W_B_ADM1, 0xff);
-		WriteW6692B(wch, W_B_ADM2, 0xff);
-		WriteW6692B(wch, W_B_EXIM, 0);
-		WriteW6692B(wch, W_B_CMDR, W_B_CMDR_RRST | W_B_CMDR_RACT |
-			    W_B_CMDR_XRST);
-		test_and_set_bit(FLG_HDLC, &wch->bch.Flags);
-		break;
-	default:
-		pr_info("%s: protocol %x not known\n", card->name, pr);
-		return -ENOPROTOOPT;
-	}
-	wch->bch.state = pr;
-	return 0;
-}
-
-static void
-send_next(struct w6692_ch *wch)
-{
-	if (wch->bch.tx_skb && wch->bch.tx_idx < wch->bch.tx_skb->len) {
-		W6692_fill_Bfifo(wch);
-	} else {
-		dev_kfree_skb(wch->bch.tx_skb);
-		if (get_next_bframe(&wch->bch)) {
-			W6692_fill_Bfifo(wch);
-			test_and_clear_bit(FLG_TX_EMPTY, &wch->bch.Flags);
-		} else if (test_bit(FLG_TX_EMPTY, &wch->bch.Flags)) {
-			W6692_fill_Bfifo(wch);
-		}
-	}
-}
-
-static void
-W6692B_interrupt(struct w6692_hw *card, int ch)
-{
-	struct w6692_ch	*wch = &card->bc[ch];
-	int		count;
-	u8		stat, star = 0;
-
-	stat = ReadW6692B(wch, W_B_EXIR);
-	pr_debug("%s: B%d EXIR %02x\n", card->name, wch->bch.nr, stat);
-	if (stat & W_B_EXI_RME) {
-		star = ReadW6692B(wch, W_B_STAR);
-		if (star & (W_B_STAR_RDOV | W_B_STAR_CRCE | W_B_STAR_RMB)) {
-			if ((star & W_B_STAR_RDOV) &&
-			    test_bit(FLG_ACTIVE, &wch->bch.Flags)) {
-				pr_debug("%s: B%d RDOV proto=%x\n", card->name,
-					 wch->bch.nr, wch->bch.state);
-#ifdef ERROR_STATISTIC
-				wch->bch.err_rdo++;
-#endif
-			}
-			if (test_bit(FLG_HDLC, &wch->bch.Flags)) {
-				if (star & W_B_STAR_CRCE) {
-					pr_debug("%s: B%d CRC error\n",
-						 card->name, wch->bch.nr);
-#ifdef ERROR_STATISTIC
-					wch->bch.err_crc++;
-#endif
-				}
-				if (star & W_B_STAR_RMB) {
-					pr_debug("%s: B%d message abort\n",
-						 card->name, wch->bch.nr);
-#ifdef ERROR_STATISTIC
-					wch->bch.err_inv++;
-#endif
-				}
-			}
-			WriteW6692B(wch, W_B_CMDR, W_B_CMDR_RACK |
-				    W_B_CMDR_RRST | W_B_CMDR_RACT);
-			if (wch->bch.rx_skb)
-				skb_trim(wch->bch.rx_skb, 0);
-		} else {
-			count = ReadW6692B(wch, W_B_RBCL) &
-				(W_B_FIFO_THRESH - 1);
-			if (count == 0)
-				count = W_B_FIFO_THRESH;
-			W6692_empty_Bfifo(wch, count);
-			recv_Bchannel(&wch->bch, 0, false);
-		}
-	}
-	if (stat & W_B_EXI_RMR) {
-		if (!(stat & W_B_EXI_RME))
-			star = ReadW6692B(wch, W_B_STAR);
-		if (star & W_B_STAR_RDOV) {
-			pr_debug("%s: B%d RDOV proto=%x\n", card->name,
-				 wch->bch.nr, wch->bch.state);
-#ifdef ERROR_STATISTIC
-			wch->bch.err_rdo++;
-#endif
-			WriteW6692B(wch, W_B_CMDR, W_B_CMDR_RACK |
-				    W_B_CMDR_RRST | W_B_CMDR_RACT);
-		} else {
-			W6692_empty_Bfifo(wch, W_B_FIFO_THRESH);
-			if (test_bit(FLG_TRANSPARENT, &wch->bch.Flags))
-				recv_Bchannel(&wch->bch, 0, false);
-		}
-	}
-	if (stat & W_B_EXI_RDOV) {
-		/* only if it is not handled yet */
-		if (!(star & W_B_STAR_RDOV)) {
-			pr_debug("%s: B%d RDOV IRQ proto=%x\n", card->name,
-				 wch->bch.nr, wch->bch.state);
-#ifdef ERROR_STATISTIC
-			wch->bch.err_rdo++;
-#endif
-			WriteW6692B(wch, W_B_CMDR, W_B_CMDR_RACK |
-				    W_B_CMDR_RRST | W_B_CMDR_RACT);
-		}
-	}
-	if (stat & W_B_EXI_XFR) {
-		if (!(stat & (W_B_EXI_RME | W_B_EXI_RMR))) {
-			star = ReadW6692B(wch, W_B_STAR);
-			pr_debug("%s: B%d star %02x\n", card->name,
-				 wch->bch.nr, star);
-		}
-		if (star & W_B_STAR_XDOW) {
-			pr_warn("%s: B%d XDOW proto=%x\n", card->name,
-				wch->bch.nr, wch->bch.state);
-#ifdef ERROR_STATISTIC
-			wch->bch.err_xdu++;
-#endif
-			WriteW6692B(wch, W_B_CMDR, W_B_CMDR_XRST |
-				    W_B_CMDR_RACT);
-			/* resend */
-			if (wch->bch.tx_skb) {
-				if (!test_bit(FLG_TRANSPARENT, &wch->bch.Flags))
-					wch->bch.tx_idx = 0;
-			}
-		}
-		send_next(wch);
-		if (star & W_B_STAR_XDOW)
-			return; /* handle XDOW only once */
-	}
-	if (stat & W_B_EXI_XDUN) {
-		pr_warn("%s: B%d XDUN proto=%x\n", card->name,
-			wch->bch.nr, wch->bch.state);
-#ifdef ERROR_STATISTIC
-		wch->bch.err_xdu++;
-#endif
-		/* resend - no XRST needed */
-		if (wch->bch.tx_skb) {
-			if (!test_bit(FLG_TRANSPARENT, &wch->bch.Flags))
-				wch->bch.tx_idx = 0;
-		} else if (test_bit(FLG_FILLEMPTY, &wch->bch.Flags)) {
-			test_and_set_bit(FLG_TX_EMPTY, &wch->bch.Flags);
-		}
-		send_next(wch);
-	}
-}
-
-static irqreturn_t
-w6692_irq(int intno, void *dev_id)
-{
-	struct w6692_hw	*card = dev_id;
-	u8		ista;
-
-	spin_lock(&card->lock);
-	ista = ReadW6692(card, W_ISTA);
-	if ((ista | card->imask) == card->imask) {
-		/* possible a shared  IRQ reqest */
-		spin_unlock(&card->lock);
-		return IRQ_NONE;
-	}
-	card->irqcnt++;
-	pr_debug("%s: ista %02x\n", card->name, ista);
-	ista &= ~card->imask;
-	if (ista & W_INT_B1_EXI)
-		W6692B_interrupt(card, 0);
-	if (ista & W_INT_B2_EXI)
-		W6692B_interrupt(card, 1);
-	if (ista & W_INT_D_RME)
-		handle_rxD(card);
-	if (ista & W_INT_D_RMR)
-		W6692_empty_Dfifo(card, W_D_FIFO_THRESH);
-	if (ista & W_INT_D_XFR)
-		handle_txD(card);
-	if (ista & W_INT_D_EXI)
-		handle_statusD(card);
-	if (ista & (W_INT_XINT0 | W_INT_XINT1)) /* XINT0/1 - never */
-		pr_debug("%s: W6692 spurious XINT!\n", card->name);
-/* End IRQ Handler */
-	spin_unlock(&card->lock);
-	return IRQ_HANDLED;
-}
-
-static void
-dbusy_timer_handler(struct timer_list *t)
-{
-	struct dchannel *dch = timer_container_of(dch, t, timer);
-	struct w6692_hw	*card = dch->hw;
-	int		rbch, star;
-	u_long		flags;
-
-	if (test_bit(FLG_BUSY_TIMER, &dch->Flags)) {
-		spin_lock_irqsave(&card->lock, flags);
-		rbch = ReadW6692(card, W_D_RBCH);
-		star = ReadW6692(card, W_D_STAR);
-		pr_debug("%s: D-Channel Busy RBCH %02x STAR %02x\n",
-			 card->name, rbch, star);
-		if (star & W_D_STAR_XBZ)	/* D-Channel Busy */
-			test_and_set_bit(FLG_L1_BUSY, &dch->Flags);
-		else {
-			/* discard frame; reset transceiver */
-			test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags);
-			if (dch->tx_idx)
-				dch->tx_idx = 0;
-			else
-				pr_info("%s: W6692 D-Channel Busy no tx_idx\n",
-					card->name);
-			/* Transmitter reset */
-			WriteW6692(card, W_D_CMDR, W_D_CMDR_XRST);
-		}
-		spin_unlock_irqrestore(&card->lock, flags);
-	}
-}
-
-static void initW6692(struct w6692_hw *card)
-{
-	u8	val;
-
-	timer_setup(&card->dch.timer, dbusy_timer_handler, 0);
-	w6692_mode(&card->bc[0], ISDN_P_NONE);
-	w6692_mode(&card->bc[1], ISDN_P_NONE);
-	WriteW6692(card, W_D_CTL, 0x00);
-	disable_hwirq(card);
-	WriteW6692(card, W_D_SAM, 0xff);
-	WriteW6692(card, W_D_TAM, 0xff);
-	WriteW6692(card, W_D_MODE, W_D_MODE_RACT);
-	card->state = W_L1CMD_RST;
-	ph_command(card, W_L1CMD_RST);
-	ph_command(card, W_L1CMD_ECK);
-	/* enable all IRQ but extern */
-	card->imask = 0x18;
-	WriteW6692(card, W_D_EXIM, 0x00);
-	WriteW6692B(&card->bc[0], W_B_EXIM, 0);
-	WriteW6692B(&card->bc[1], W_B_EXIM, 0);
-	/* Reset D-chan receiver and transmitter */
-	WriteW6692(card, W_D_CMDR, W_D_CMDR_RRST | W_D_CMDR_XRST);
-	/* Reset B-chan receiver and transmitter */
-	WriteW6692B(&card->bc[0], W_B_CMDR, W_B_CMDR_RRST | W_B_CMDR_XRST);
-	WriteW6692B(&card->bc[1], W_B_CMDR, W_B_CMDR_RRST | W_B_CMDR_XRST);
-	/* enable peripheral */
-	if (card->subtype == W6692_USR) {
-		/* seems that USR implemented some power control features
-		 * Pin 79 is connected to the oscilator circuit so we
-		 * have to handle it here
-		 */
-		card->pctl = 0x80;
-		card->xdata = 0;
-		WriteW6692(card, W_PCTL, card->pctl);
-		WriteW6692(card, W_XDATA, card->xdata);
-	} else {
-		card->pctl = W_PCTL_OE5 | W_PCTL_OE4 | W_PCTL_OE2 |
-			W_PCTL_OE1 | W_PCTL_OE0;
-		card->xaddr = 0x00;/* all sw off */
-		if (card->fmask & pots)
-			card->xdata |= 0x06;	/*  POWER UP/ LED OFF / ALAW */
-		if (card->fmask & led)
-			card->xdata |= 0x04;	/* LED OFF */
-		if ((card->fmask & pots) || (card->fmask & led)) {
-			WriteW6692(card, W_PCTL, card->pctl);
-			WriteW6692(card, W_XADDR, card->xaddr);
-			WriteW6692(card, W_XDATA, card->xdata);
-			val = ReadW6692(card, W_XADDR);
-			if (debug & DEBUG_HW)
-				pr_notice("%s: W_XADDR=%02x\n",
-					  card->name, val);
-		}
-	}
-}
-
-static void
-reset_w6692(struct w6692_hw *card)
-{
-	WriteW6692(card, W_D_CTL, W_D_CTL_SRST);
-	mdelay(10);
-	WriteW6692(card, W_D_CTL, 0);
-}
-
-static int
-init_card(struct w6692_hw *card)
-{
-	int	cnt = 3;
-	u_long	flags;
-
-	spin_lock_irqsave(&card->lock, flags);
-	disable_hwirq(card);
-	spin_unlock_irqrestore(&card->lock, flags);
-	if (request_irq(card->irq, w6692_irq, IRQF_SHARED, card->name, card)) {
-		pr_info("%s: couldn't get interrupt %d\n", card->name,
-			card->irq);
-		return -EIO;
-	}
-	while (cnt--) {
-		spin_lock_irqsave(&card->lock, flags);
-		initW6692(card);
-		enable_hwirq(card);
-		spin_unlock_irqrestore(&card->lock, flags);
-		/* Timeout 10ms */
-		msleep_interruptible(10);
-		if (debug & DEBUG_HW)
-			pr_notice("%s: IRQ %d count %d\n", card->name,
-				  card->irq, card->irqcnt);
-		if (!card->irqcnt) {
-			pr_info("%s: IRQ(%d) getting no IRQs during init %d\n",
-				card->name, card->irq, 3 - cnt);
-			reset_w6692(card);
-		} else
-			return 0;
-	}
-	free_irq(card->irq, card);
-	return -EIO;
-}
-
-static int
-w6692_l2l1B(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct bchannel *bch = container_of(ch, struct bchannel, ch);
-	struct w6692_ch	*bc = container_of(bch, struct w6692_ch, bch);
-	struct w6692_hw *card = bch->hw;
-	int ret = -EINVAL;
-	struct mISDNhead *hh = mISDN_HEAD_P(skb);
-	unsigned long flags;
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		spin_lock_irqsave(&card->lock, flags);
-		ret = bchannel_senddata(bch, skb);
-		if (ret > 0) { /* direct TX */
-			ret = 0;
-			W6692_fill_Bfifo(bc);
-		}
-		spin_unlock_irqrestore(&card->lock, flags);
-		return ret;
-	case PH_ACTIVATE_REQ:
-		spin_lock_irqsave(&card->lock, flags);
-		if (!test_and_set_bit(FLG_ACTIVE, &bch->Flags))
-			ret = w6692_mode(bc, ch->protocol);
-		else
-			ret = 0;
-		spin_unlock_irqrestore(&card->lock, flags);
-		if (!ret)
-			_queue_data(ch, PH_ACTIVATE_IND, MISDN_ID_ANY, 0,
-				    NULL, GFP_KERNEL);
-		break;
-	case PH_DEACTIVATE_REQ:
-		spin_lock_irqsave(&card->lock, flags);
-		mISDN_clear_bchannel(bch);
-		w6692_mode(bc, ISDN_P_NONE);
-		spin_unlock_irqrestore(&card->lock, flags);
-		_queue_data(ch, PH_DEACTIVATE_IND, MISDN_ID_ANY, 0,
-			    NULL, GFP_KERNEL);
-		ret = 0;
-		break;
-	default:
-		pr_info("%s: %s unknown prim(%x,%x)\n",
-			card->name, __func__, hh->prim, hh->id);
-		ret = -EINVAL;
-	}
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-static int
-channel_bctrl(struct bchannel *bch, struct mISDN_ctrl_req *cq)
-{
-	return mISDN_ctrl_bchannel(bch, cq);
-}
-
-static int
-open_bchannel(struct w6692_hw *card, struct channel_req *rq)
-{
-	struct bchannel *bch;
-
-	if (rq->adr.channel == 0 || rq->adr.channel > 2)
-		return -EINVAL;
-	if (rq->protocol == ISDN_P_NONE)
-		return -EINVAL;
-	bch = &card->bc[rq->adr.channel - 1].bch;
-	if (test_and_set_bit(FLG_OPEN, &bch->Flags))
-		return -EBUSY; /* b-channel can be only open once */
-	bch->ch.protocol = rq->protocol;
-	rq->ch = &bch->ch;
-	return 0;
-}
-
-static int
-channel_ctrl(struct w6692_hw *card, struct mISDN_ctrl_req *cq)
-{
-	int	ret = 0;
-
-	switch (cq->op) {
-	case MISDN_CTRL_GETOP:
-		cq->op = MISDN_CTRL_L1_TIMER3;
-		break;
-	case MISDN_CTRL_L1_TIMER3:
-		ret = l1_event(card->dch.l1, HW_TIMER3_VALUE | (cq->p1 & 0xff));
-		break;
-	default:
-		pr_info("%s: unknown CTRL OP %x\n", card->name, cq->op);
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static int
-w6692_bctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
-{
-	struct bchannel *bch = container_of(ch, struct bchannel, ch);
-	struct w6692_ch *bc = container_of(bch, struct w6692_ch, bch);
-	struct w6692_hw *card = bch->hw;
-	int ret = -EINVAL;
-	u_long flags;
-
-	pr_debug("%s: %s cmd:%x %p\n", card->name, __func__, cmd, arg);
-	switch (cmd) {
-	case CLOSE_CHANNEL:
-		test_and_clear_bit(FLG_OPEN, &bch->Flags);
-		cancel_work_sync(&bch->workq);
-		spin_lock_irqsave(&card->lock, flags);
-		mISDN_clear_bchannel(bch);
-		w6692_mode(bc, ISDN_P_NONE);
-		spin_unlock_irqrestore(&card->lock, flags);
-		ch->protocol = ISDN_P_NONE;
-		ch->peer = NULL;
-		module_put(THIS_MODULE);
-		ret = 0;
-		break;
-	case CONTROL_CHANNEL:
-		ret = channel_bctrl(bch, arg);
-		break;
-	default:
-		pr_info("%s: %s unknown prim(%x)\n",
-			card->name, __func__, cmd);
-	}
-	return ret;
-}
-
-static int
-w6692_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
-	struct w6692_hw		*card = container_of(dch, struct w6692_hw, dch);
-	int			ret = -EINVAL;
-	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
-	u32			id;
-	u_long			flags;
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		spin_lock_irqsave(&card->lock, flags);
-		ret = dchannel_senddata(dch, skb);
-		if (ret > 0) { /* direct TX */
-			id = hh->id; /* skb can be freed */
-			W6692_fill_Dfifo(card);
-			ret = 0;
-			spin_unlock_irqrestore(&card->lock, flags);
-			queue_ch_frame(ch, PH_DATA_CNF, id, NULL);
-		} else
-			spin_unlock_irqrestore(&card->lock, flags);
-		return ret;
-	case PH_ACTIVATE_REQ:
-		ret = l1_event(dch->l1, hh->prim);
-		break;
-	case PH_DEACTIVATE_REQ:
-		test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags);
-		ret = l1_event(dch->l1, hh->prim);
-		break;
-	}
-
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-static int
-w6692_l1callback(struct dchannel *dch, u32 cmd)
-{
-	struct w6692_hw *card = container_of(dch, struct w6692_hw, dch);
-	u_long flags;
-
-	pr_debug("%s: cmd(%x) state(%02x)\n", card->name, cmd, card->state);
-	switch (cmd) {
-	case INFO3_P8:
-		spin_lock_irqsave(&card->lock, flags);
-		ph_command(card, W_L1CMD_AR8);
-		spin_unlock_irqrestore(&card->lock, flags);
-		break;
-	case INFO3_P10:
-		spin_lock_irqsave(&card->lock, flags);
-		ph_command(card, W_L1CMD_AR10);
-		spin_unlock_irqrestore(&card->lock, flags);
-		break;
-	case HW_RESET_REQ:
-		spin_lock_irqsave(&card->lock, flags);
-		if (card->state != W_L1IND_DRD)
-			ph_command(card, W_L1CMD_RST);
-		ph_command(card, W_L1CMD_ECK);
-		spin_unlock_irqrestore(&card->lock, flags);
-		break;
-	case HW_DEACT_REQ:
-		skb_queue_purge(&dch->squeue);
-		if (dch->tx_skb) {
-			dev_kfree_skb(dch->tx_skb);
-			dch->tx_skb = NULL;
-		}
-		dch->tx_idx = 0;
-		if (dch->rx_skb) {
-			dev_kfree_skb(dch->rx_skb);
-			dch->rx_skb = NULL;
-		}
-		test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
-		if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags))
-			timer_delete(&dch->timer);
-		break;
-	case HW_POWERUP_REQ:
-		spin_lock_irqsave(&card->lock, flags);
-		ph_command(card, W_L1CMD_ECK);
-		spin_unlock_irqrestore(&card->lock, flags);
-		break;
-	case PH_ACTIVATE_IND:
-		test_and_set_bit(FLG_ACTIVE, &dch->Flags);
-		_queue_data(&dch->dev.D, cmd, MISDN_ID_ANY, 0, NULL,
-			    GFP_ATOMIC);
-		break;
-	case PH_DEACTIVATE_IND:
-		test_and_clear_bit(FLG_ACTIVE, &dch->Flags);
-		_queue_data(&dch->dev.D, cmd, MISDN_ID_ANY, 0, NULL,
-			    GFP_ATOMIC);
-		break;
-	default:
-		pr_debug("%s: %s unknown command %x\n", card->name,
-			 __func__, cmd);
-		return -1;
-	}
-	return 0;
-}
-
-static int
-open_dchannel(struct w6692_hw *card, struct channel_req *rq, void *caller)
-{
-	pr_debug("%s: %s dev(%d) open from %p\n", card->name, __func__,
-		 card->dch.dev.id, caller);
-	if (rq->protocol != ISDN_P_TE_S0)
-		return -EINVAL;
-	if (rq->adr.channel == 1)
-		/* E-Channel not supported */
-		return -EINVAL;
-	rq->ch = &card->dch.dev.D;
-	rq->ch->protocol = rq->protocol;
-	if (card->dch.state == 7)
-		_queue_data(rq->ch, PH_ACTIVATE_IND, MISDN_ID_ANY,
-			    0, NULL, GFP_KERNEL);
-	return 0;
-}
-
-static int
-w6692_dctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
-{
-	struct mISDNdevice *dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel *dch = container_of(dev, struct dchannel, dev);
-	struct w6692_hw *card = container_of(dch, struct w6692_hw, dch);
-	struct channel_req *rq;
-	int err = 0;
-
-	pr_debug("%s: DCTRL: %x %p\n", card->name, cmd, arg);
-	switch (cmd) {
-	case OPEN_CHANNEL:
-		rq = arg;
-		if (rq->protocol == ISDN_P_TE_S0)
-			err = open_dchannel(card, rq, __builtin_return_address(0));
-		else
-			err = open_bchannel(card, rq);
-		if (err)
-			break;
-		if (!try_module_get(THIS_MODULE))
-			pr_info("%s: cannot get module\n", card->name);
-		break;
-	case CLOSE_CHANNEL:
-		pr_debug("%s: dev(%d) close from %p\n", card->name,
-			 dch->dev.id, __builtin_return_address(0));
-		module_put(THIS_MODULE);
-		break;
-	case CONTROL_CHANNEL:
-		err = channel_ctrl(card, arg);
-		break;
-	default:
-		pr_debug("%s: unknown DCTRL command %x\n", card->name, cmd);
-		return -EINVAL;
-	}
-	return err;
-}
-
-static int
-setup_w6692(struct w6692_hw *card)
-{
-	u32	val;
-
-	if (!request_region(card->addr, 256, card->name)) {
-		pr_info("%s: config port %x-%x already in use\n", card->name,
-			card->addr, card->addr + 255);
-		return -EIO;
-	}
-	W6692Version(card);
-	card->bc[0].addr = card->addr;
-	card->bc[1].addr = card->addr + 0x40;
-	val = ReadW6692(card, W_ISTA);
-	if (debug & DEBUG_HW)
-		pr_notice("%s ISTA=%02x\n", card->name, val);
-	val = ReadW6692(card, W_IMASK);
-	if (debug & DEBUG_HW)
-		pr_notice("%s IMASK=%02x\n", card->name, val);
-	val = ReadW6692(card, W_D_EXIR);
-	if (debug & DEBUG_HW)
-		pr_notice("%s D_EXIR=%02x\n", card->name, val);
-	val = ReadW6692(card, W_D_EXIM);
-	if (debug & DEBUG_HW)
-		pr_notice("%s D_EXIM=%02x\n", card->name, val);
-	val = ReadW6692(card, W_D_RSTA);
-	if (debug & DEBUG_HW)
-		pr_notice("%s D_RSTA=%02x\n", card->name, val);
-	return 0;
-}
-
-static void
-release_card(struct w6692_hw *card)
-{
-	u_long	flags;
-
-	spin_lock_irqsave(&card->lock, flags);
-	disable_hwirq(card);
-	w6692_mode(&card->bc[0], ISDN_P_NONE);
-	w6692_mode(&card->bc[1], ISDN_P_NONE);
-	if ((card->fmask & led) || card->subtype == W6692_USR) {
-		card->xdata |= 0x04;	/*  LED OFF */
-		WriteW6692(card, W_XDATA, card->xdata);
-	}
-	spin_unlock_irqrestore(&card->lock, flags);
-	free_irq(card->irq, card);
-	l1_event(card->dch.l1, CLOSE_CHANNEL);
-	mISDN_unregister_device(&card->dch.dev);
-	release_region(card->addr, 256);
-	mISDN_freebchannel(&card->bc[1].bch);
-	mISDN_freebchannel(&card->bc[0].bch);
-	mISDN_freedchannel(&card->dch);
-	write_lock_irqsave(&card_lock, flags);
-	list_del(&card->list);
-	write_unlock_irqrestore(&card_lock, flags);
-	pci_disable_device(card->pdev);
-	pci_set_drvdata(card->pdev, NULL);
-	kfree(card);
-}
-
-static int
-setup_instance(struct w6692_hw *card)
-{
-	int		i, err;
-	u_long		flags;
-
-	snprintf(card->name, MISDN_MAX_IDLEN - 1, "w6692.%d", w6692_cnt + 1);
-	write_lock_irqsave(&card_lock, flags);
-	list_add_tail(&card->list, &Cards);
-	write_unlock_irqrestore(&card_lock, flags);
-	card->fmask = (1 << w6692_cnt);
-	_set_debug(card);
-	spin_lock_init(&card->lock);
-	mISDN_initdchannel(&card->dch, MAX_DFRAME_LEN_L1, W6692_ph_bh);
-	card->dch.dev.Dprotocols = (1 << ISDN_P_TE_S0);
-	card->dch.dev.D.send = w6692_l2l1D;
-	card->dch.dev.D.ctrl = w6692_dctrl;
-	card->dch.dev.Bprotocols = (1 << (ISDN_P_B_RAW & ISDN_P_B_MASK)) |
-		(1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK));
-	card->dch.hw = card;
-	card->dch.dev.nrbchan = 2;
-	for (i = 0; i < 2; i++) {
-		mISDN_initbchannel(&card->bc[i].bch, MAX_DATA_MEM,
-				   W_B_FIFO_THRESH);
-		card->bc[i].bch.hw = card;
-		card->bc[i].bch.nr = i + 1;
-		card->bc[i].bch.ch.nr = i + 1;
-		card->bc[i].bch.ch.send = w6692_l2l1B;
-		card->bc[i].bch.ch.ctrl = w6692_bctrl;
-		set_channelmap(i + 1, card->dch.dev.channelmap);
-		list_add(&card->bc[i].bch.ch.list, &card->dch.dev.bchannels);
-	}
-	err = setup_w6692(card);
-	if (err)
-		goto error_setup;
-	err = mISDN_register_device(&card->dch.dev, &card->pdev->dev,
-				    card->name);
-	if (err)
-		goto error_reg;
-	err = init_card(card);
-	if (err)
-		goto error_init;
-	err = create_l1(&card->dch, w6692_l1callback);
-	if (!err) {
-		w6692_cnt++;
-		pr_notice("W6692 %d cards installed\n", w6692_cnt);
-		return 0;
-	}
-
-	free_irq(card->irq, card);
-error_init:
-	mISDN_unregister_device(&card->dch.dev);
-error_reg:
-	release_region(card->addr, 256);
-error_setup:
-	mISDN_freebchannel(&card->bc[1].bch);
-	mISDN_freebchannel(&card->bc[0].bch);
-	mISDN_freedchannel(&card->dch);
-	write_lock_irqsave(&card_lock, flags);
-	list_del(&card->list);
-	write_unlock_irqrestore(&card_lock, flags);
-	kfree(card);
-	return err;
-}
-
-static int
-w6692_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-	int		err = -ENOMEM;
-	struct w6692_hw	*card;
-	struct w6692map	*m = (struct w6692map *)ent->driver_data;
-
-	card = kzalloc_obj(struct w6692_hw);
-	if (!card) {
-		pr_info("No kmem for w6692 card\n");
-		return err;
-	}
-	card->pdev = pdev;
-	card->subtype = m->subtype;
-	err = pci_enable_device(pdev);
-	if (err) {
-		kfree(card);
-		return err;
-	}
-
-	printk(KERN_INFO "mISDN_w6692: found adapter %s at %s\n",
-	       m->name, pci_name(pdev));
-
-	card->addr = pci_resource_start(pdev, 1);
-	card->irq = pdev->irq;
-	pci_set_drvdata(pdev, card);
-	err = setup_instance(card);
-	if (err)
-		pci_set_drvdata(pdev, NULL);
-	return err;
-}
-
-static void
-w6692_remove_pci(struct pci_dev *pdev)
-{
-	struct w6692_hw	*card = pci_get_drvdata(pdev);
-
-	if (card)
-		release_card(card);
-	else
-		if (debug)
-			pr_notice("%s: drvdata already removed\n", __func__);
-}
-
-static const struct pci_device_id w6692_ids[] = {
-	{ PCI_VENDOR_ID_DYNALINK, PCI_DEVICE_ID_DYNALINK_IS64PH,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, (ulong)&w6692_map[0]},
-	{ PCI_VENDOR_ID_WINBOND2, PCI_DEVICE_ID_WINBOND2_6692,
-	  PCI_VENDOR_ID_USR, PCI_DEVICE_ID_USR_6692, 0, 0,
-	  (ulong)&w6692_map[2]},
-	{ PCI_VENDOR_ID_WINBOND2, PCI_DEVICE_ID_WINBOND2_6692,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, (ulong)&w6692_map[1]},
-	{ }
-};
-MODULE_DEVICE_TABLE(pci, w6692_ids);
-
-static struct pci_driver w6692_driver = {
-	.name =  "w6692",
-	.probe = w6692_probe,
-	.remove = w6692_remove_pci,
-	.id_table = w6692_ids,
-};
-
-static int __init w6692_init(void)
-{
-	int err;
-
-	pr_notice("Winbond W6692 PCI driver Rev. %s\n", W6692_REV);
-
-	err = pci_register_driver(&w6692_driver);
-	return err;
-}
-
-static void __exit w6692_cleanup(void)
-{
-	pci_unregister_driver(&w6692_driver);
-}
-
-module_init(w6692_init);
-module_exit(w6692_cleanup);
diff --git a/drivers/isdn/hardware/mISDN/w6692.h b/drivers/isdn/hardware/mISDN/w6692.h
deleted file mode 100644
index 45e1dc5d6c2d..000000000000
--- a/drivers/isdn/hardware/mISDN/w6692.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Winbond W6692 specific defines
- *
- * Author       Karsten Keil <keil@isdn4linux.de>
- *		based on the w6692 I4L driver from Petr Novak <petr.novak@i.cz>
- *
- * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
- */
-
-/* Specifications of W6692 registers */
-
-#define W_D_RFIFO	0x00	/* R */
-#define W_D_XFIFO	0x04	/* W */
-#define W_D_CMDR	0x08	/* W */
-#define W_D_MODE	0x0c	/* R/W */
-#define W_D_TIMR	0x10	/* R/W */
-#define W_ISTA		0x14	/* R_clr */
-#define W_IMASK		0x18	/* R/W */
-#define W_D_EXIR	0x1c	/* R_clr */
-#define W_D_EXIM	0x20	/* R/W */
-#define W_D_STAR	0x24	/* R */
-#define W_D_RSTA	0x28	/* R */
-#define W_D_SAM		0x2c	/* R/W */
-#define W_D_SAP1	0x30	/* R/W */
-#define W_D_SAP2	0x34	/* R/W */
-#define W_D_TAM		0x38	/* R/W */
-#define W_D_TEI1	0x3c	/* R/W */
-#define W_D_TEI2	0x40	/* R/W */
-#define W_D_RBCH	0x44	/* R */
-#define W_D_RBCL	0x48	/* R */
-#define W_TIMR2		0x4c	/* W */
-#define W_L1_RC		0x50	/* R/W */
-#define W_D_CTL		0x54	/* R/W */
-#define W_CIR		0x58	/* R */
-#define W_CIX		0x5c	/* W */
-#define W_SQR		0x60	/* R */
-#define W_SQX		0x64	/* W */
-#define W_PCTL		0x68	/* R/W */
-#define W_MOR		0x6c	/* R */
-#define W_MOX		0x70	/* R/W */
-#define W_MOSR		0x74	/* R_clr */
-#define W_MOCR		0x78	/* R/W */
-#define W_GCR		0x7c	/* R/W */
-
-#define	W_B_RFIFO	0x80	/* R */
-#define	W_B_XFIFO	0x84	/* W */
-#define	W_B_CMDR	0x88	/* W */
-#define	W_B_MODE	0x8c	/* R/W */
-#define	W_B_EXIR	0x90	/* R_clr */
-#define	W_B_EXIM	0x94	/* R/W */
-#define	W_B_STAR	0x98	/* R */
-#define	W_B_ADM1	0x9c	/* R/W */
-#define	W_B_ADM2	0xa0	/* R/W */
-#define	W_B_ADR1	0xa4	/* R/W */
-#define	W_B_ADR2	0xa8	/* R/W */
-#define	W_B_RBCL	0xac	/* R */
-#define	W_B_RBCH	0xb0	/* R */
-
-#define W_XADDR		0xf4	/* R/W */
-#define W_XDATA		0xf8	/* R/W */
-#define W_EPCTL		0xfc	/* W */
-
-/* W6692 register bits */
-
-#define	W_D_CMDR_XRST	0x01
-#define	W_D_CMDR_XME	0x02
-#define	W_D_CMDR_XMS	0x08
-#define	W_D_CMDR_STT	0x10
-#define	W_D_CMDR_RRST	0x40
-#define	W_D_CMDR_RACK	0x80
-
-#define	W_D_MODE_RLP	0x01
-#define	W_D_MODE_DLP	0x02
-#define	W_D_MODE_MFD	0x04
-#define	W_D_MODE_TEE	0x08
-#define	W_D_MODE_TMS	0x10
-#define	W_D_MODE_RACT	0x40
-#define	W_D_MODE_MMS	0x80
-
-#define W_INT_B2_EXI	0x01
-#define W_INT_B1_EXI	0x02
-#define W_INT_D_EXI	0x04
-#define W_INT_XINT0	0x08
-#define W_INT_XINT1	0x10
-#define W_INT_D_XFR	0x20
-#define W_INT_D_RME	0x40
-#define W_INT_D_RMR	0x80
-
-#define W_D_EXI_WEXP	0x01
-#define W_D_EXI_TEXP	0x02
-#define W_D_EXI_ISC	0x04
-#define W_D_EXI_MOC	0x08
-#define W_D_EXI_TIN2	0x10
-#define W_D_EXI_XCOL	0x20
-#define W_D_EXI_XDUN	0x40
-#define W_D_EXI_RDOV	0x80
-
-#define	W_D_STAR_DRDY	0x10
-#define	W_D_STAR_XBZ	0x20
-#define	W_D_STAR_XDOW	0x80
-
-#define W_D_RSTA_RMB	0x10
-#define W_D_RSTA_CRCE	0x20
-#define W_D_RSTA_RDOV	0x40
-
-#define W_D_CTL_SRST	0x20
-
-#define W_CIR_SCC	0x80
-#define W_CIR_ICC	0x40
-#define W_CIR_COD_MASK	0x0f
-
-#define W_PCTL_PCX	0x01
-#define W_PCTL_XMODE	0x02
-#define W_PCTL_OE0	0x04
-#define W_PCTL_OE1	0x08
-#define W_PCTL_OE2	0x10
-#define W_PCTL_OE3	0x20
-#define W_PCTL_OE4	0x40
-#define W_PCTL_OE5	0x80
-
-#define	W_B_CMDR_XRST	0x01
-#define	W_B_CMDR_XME	0x02
-#define	W_B_CMDR_XMS	0x04
-#define	W_B_CMDR_RACT	0x20
-#define	W_B_CMDR_RRST	0x40
-#define	W_B_CMDR_RACK	0x80
-
-#define	W_B_MODE_FTS0	0x01
-#define	W_B_MODE_FTS1	0x02
-#define	W_B_MODE_SW56	0x04
-#define	W_B_MODE_BSW0	0x08
-#define	W_B_MODE_BSW1	0x10
-#define	W_B_MODE_EPCM	0x20
-#define	W_B_MODE_ITF	0x40
-#define	W_B_MODE_MMS	0x80
-
-#define	W_B_EXI_XDUN	0x01
-#define	W_B_EXI_XFR	0x02
-#define	W_B_EXI_RDOV	0x10
-#define	W_B_EXI_RME	0x20
-#define	W_B_EXI_RMR	0x40
-
-#define	W_B_STAR_XBZ	0x01
-#define	W_B_STAR_XDOW	0x04
-#define	W_B_STAR_RMB	0x10
-#define	W_B_STAR_CRCE	0x20
-#define	W_B_STAR_RDOV	0x40
-
-#define	W_B_RBCH_LOV	0x20
-
-/* W6692 Layer1 commands */
-
-#define	W_L1CMD_ECK	0x00
-#define W_L1CMD_RST	0x01
-#define W_L1CMD_SCP	0x04
-#define W_L1CMD_SSP	0x02
-#define W_L1CMD_AR8	0x08
-#define W_L1CMD_AR10	0x09
-#define W_L1CMD_EAL	0x0a
-#define W_L1CMD_DRC	0x0f
-
-/* W6692 Layer1 indications */
-
-#define W_L1IND_CE	0x07
-#define W_L1IND_DRD	0x00
-#define W_L1IND_LD	0x04
-#define W_L1IND_ARD	0x08
-#define W_L1IND_TI	0x0a
-#define W_L1IND_ATI	0x0b
-#define W_L1IND_AI8	0x0c
-#define W_L1IND_AI10	0x0d
-#define W_L1IND_CD	0x0f
-
-/* FIFO thresholds */
-#define W_D_FIFO_THRESH	64
-#define W_B_FIFO_THRESH	64
diff --git a/drivers/isdn/mISDN/Kconfig b/drivers/isdn/mISDN/Kconfig
deleted file mode 100644
index c9a53c222472..000000000000
--- a/drivers/isdn/mISDN/Kconfig
+++ /dev/null
@@ -1,48 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# modularer ISDN driver
-#
-
-menuconfig MISDN
-	tristate "Modular ISDN driver"
-	help
-	  Enable support for the modular ISDN driver.
-
-if MISDN != n
-
-config MISDN_DSP
-	tristate "Digital Audio Processing of transparent data"
-	depends on MISDN
-	select BITREVERSE
-	help
-	  Enable support for digital audio processing capability.
-
-	  This module may be used for special applications that require
-	  cross connecting of bchannels, conferencing, dtmf decoding,
-	  echo cancellation, tone generation, and Blowfish encryption and
-	  decryption. It may use hardware features if available.
-
-	  E.g. it is required for PBX4Linux. Go to http://isdn.eversberg.eu
-	  and get more information about this module and its usage.
-
-	  If unsure, say 'N'.
-
-config MISDN_L1OIP
-	tristate "ISDN over IP tunnel"
-	depends on MISDN
-	help
-	  Enable support for ISDN over IP tunnel.
-
-	  It features:
-	    - dynamic IP exchange, if one or both peers have dynamic IPs
-	    - BRI (S0) and PRI (S2M) interface
-	    - layer 1 control via network keepalive frames
-	    - direct tunneling of physical interface via IP
-
-	  NOTE: This protocol is called 'Layer 1 over IP' and is not
-	  compatible with ISDNoIP (Agfeo) or TDMoIP. Protocol description is
-	  provided in the source code.
-
-source "drivers/isdn/hardware/mISDN/Kconfig"
-
-endif #MISDN
diff --git a/drivers/isdn/mISDN/Makefile b/drivers/isdn/mISDN/Makefile
deleted file mode 100644
index f3b4b7fa85f8..000000000000
--- a/drivers/isdn/mISDN/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the modular ISDN driver
-#
-
-obj-$(CONFIG_MISDN) += mISDN_core.o
-obj-$(CONFIG_MISDN_DSP) += mISDN_dsp.o
-obj-$(CONFIG_MISDN_L1OIP) += l1oip.o
-
-# multi objects
-
-mISDN_core-objs := core.o fsm.o socket.o clock.o hwchannel.o stack.o layer1.o layer2.o tei.o timerdev.o
-mISDN_dsp-objs := dsp_core.o dsp_cmx.o dsp_tones.o dsp_dtmf.o dsp_audio.o dsp_blowfish.o dsp_pipeline.o dsp_hwec.o
-l1oip-objs := l1oip_core.o l1oip_codec.o
diff --git a/drivers/isdn/mISDN/clock.c b/drivers/isdn/mISDN/clock.c
deleted file mode 100644
index 2668be9de20a..000000000000
--- a/drivers/isdn/mISDN/clock.c
+++ /dev/null
@@ -1,197 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright 2008  by Andreas Eversberg <andreas@eversberg.eu>
- *
- * Quick API description:
- *
- * A clock source registers using mISDN_register_clock:
- *	name = text string to name clock source
- *	priority = value to priorize clock sources (0 = default)
- *	ctl = callback function to enable/disable clock source
- *	priv = private pointer of clock source
- *	return = pointer to clock source structure;
- *
- * Note: Callback 'ctl' can be called before mISDN_register_clock returns!
- *       Also it can be called during mISDN_unregister_clock.
- *
- * A clock source calls mISDN_clock_update with given samples elapsed, if
- * enabled. If function call is delayed, tv must be set with the timestamp
- * of the actual event.
- *
- * A clock source unregisters using mISDN_unregister_clock.
- *
- * To get current clock, call mISDN_clock_get. The signed short value
- * counts the number of samples since. Time since last clock event is added.
- */
-
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/spinlock.h>
-#include <linux/ktime.h>
-#include <linux/mISDNif.h>
-#include <linux/export.h>
-#include "core.h"
-
-static u_int *debug;
-static LIST_HEAD(iclock_list);
-static DEFINE_RWLOCK(iclock_lock);
-static u16 iclock_count;		/* counter of last clock */
-static ktime_t iclock_timestamp;	/* time stamp of last clock */
-static int iclock_timestamp_valid;	/* already received one timestamp */
-static struct mISDNclock *iclock_current;
-
-void
-mISDN_init_clock(u_int *dp)
-{
-	debug = dp;
-	iclock_timestamp = ktime_get();
-}
-
-static void
-select_iclock(void)
-{
-	struct mISDNclock *iclock, *bestclock = NULL, *lastclock = NULL;
-	int pri = -128;
-
-	list_for_each_entry(iclock, &iclock_list, list) {
-		if (iclock->pri > pri) {
-			pri = iclock->pri;
-			bestclock = iclock;
-		}
-		if (iclock_current == iclock)
-			lastclock = iclock;
-	}
-	if (lastclock && bestclock != lastclock) {
-		/* last used clock source still exists but changes, disable */
-		if (*debug & DEBUG_CLOCK)
-			printk(KERN_DEBUG "Old clock source '%s' disable.\n",
-			       lastclock->name);
-		lastclock->ctl(lastclock->priv, 0);
-	}
-	if (bestclock && bestclock != iclock_current) {
-		/* new clock source selected, enable */
-		if (*debug & DEBUG_CLOCK)
-			printk(KERN_DEBUG "New clock source '%s' enable.\n",
-			       bestclock->name);
-		bestclock->ctl(bestclock->priv, 1);
-	}
-	if (bestclock != iclock_current) {
-		/* no clock received yet */
-		iclock_timestamp_valid = 0;
-	}
-	iclock_current = bestclock;
-}
-
-struct mISDNclock
-*mISDN_register_clock(char *name, int pri, clockctl_func_t *ctl, void *priv)
-{
-	u_long			flags;
-	struct mISDNclock	*iclock;
-
-	if (*debug & (DEBUG_CORE | DEBUG_CLOCK))
-		printk(KERN_DEBUG "%s: %s %d\n", __func__, name, pri);
-	iclock = kzalloc_obj(struct mISDNclock, GFP_ATOMIC);
-	if (!iclock) {
-		printk(KERN_ERR "%s: No memory for clock entry.\n", __func__);
-		return NULL;
-	}
-	strscpy(iclock->name, name, sizeof(iclock->name));
-	iclock->pri = pri;
-	iclock->priv = priv;
-	iclock->ctl = ctl;
-	write_lock_irqsave(&iclock_lock, flags);
-	list_add_tail(&iclock->list, &iclock_list);
-	select_iclock();
-	write_unlock_irqrestore(&iclock_lock, flags);
-	return iclock;
-}
-EXPORT_SYMBOL(mISDN_register_clock);
-
-void
-mISDN_unregister_clock(struct mISDNclock *iclock)
-{
-	u_long	flags;
-
-	if (*debug & (DEBUG_CORE | DEBUG_CLOCK))
-		printk(KERN_DEBUG "%s: %s %d\n", __func__, iclock->name,
-		       iclock->pri);
-	write_lock_irqsave(&iclock_lock, flags);
-	if (iclock_current == iclock) {
-		if (*debug & DEBUG_CLOCK)
-			printk(KERN_DEBUG
-			       "Current clock source '%s' unregisters.\n",
-			       iclock->name);
-		iclock->ctl(iclock->priv, 0);
-	}
-	list_del(&iclock->list);
-	select_iclock();
-	write_unlock_irqrestore(&iclock_lock, flags);
-}
-EXPORT_SYMBOL(mISDN_unregister_clock);
-
-void
-mISDN_clock_update(struct mISDNclock *iclock, int samples, ktime_t *timestamp)
-{
-	u_long		flags;
-	ktime_t		timestamp_now;
-	u16		delta;
-
-	write_lock_irqsave(&iclock_lock, flags);
-	if (iclock_current != iclock) {
-		printk(KERN_ERR "%s: '%s' sends us clock updates, but we do "
-		       "listen to '%s'. This is a bug!\n", __func__,
-		       iclock->name,
-		       iclock_current ? iclock_current->name : "nothing");
-		iclock->ctl(iclock->priv, 0);
-		write_unlock_irqrestore(&iclock_lock, flags);
-		return;
-	}
-	if (iclock_timestamp_valid) {
-		/* increment sample counter by given samples */
-		iclock_count += samples;
-		if (timestamp) { /* timestamp must be set, if function call is delayed */
-			iclock_timestamp = *timestamp;
-		} else	{
-			iclock_timestamp = ktime_get();
-		}
-	} else {
-		/* calc elapsed time by system clock */
-		if (timestamp) { /* timestamp must be set, if function call is delayed */
-			timestamp_now = *timestamp;
-		} else {
-			timestamp_now = ktime_get();
-		}
-		delta = ktime_divns(ktime_sub(timestamp_now, iclock_timestamp),
-				(NSEC_PER_SEC / 8000));
-		/* add elapsed time to counter and set new timestamp */
-		iclock_count += delta;
-		iclock_timestamp = timestamp_now;
-		iclock_timestamp_valid = 1;
-		if (*debug & DEBUG_CLOCK)
-			printk("Received first clock from source '%s'.\n",
-			       iclock_current ? iclock_current->name : "nothing");
-	}
-	write_unlock_irqrestore(&iclock_lock, flags);
-}
-EXPORT_SYMBOL(mISDN_clock_update);
-
-unsigned short
-mISDN_clock_get(void)
-{
-	u_long		flags;
-	ktime_t		timestamp_now;
-	u16		delta;
-	u16		count;
-
-	read_lock_irqsave(&iclock_lock, flags);
-	/* calc elapsed time by system clock */
-	timestamp_now = ktime_get();
-	delta = ktime_divns(ktime_sub(timestamp_now, iclock_timestamp),
-			(NSEC_PER_SEC / 8000));
-	/* add elapsed time to counter */
-	count =	iclock_count + delta;
-	read_unlock_irqrestore(&iclock_lock, flags);
-	return count;
-}
-EXPORT_SYMBOL(mISDN_clock_get);
diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c
deleted file mode 100644
index 8ec2d4d4f135..000000000000
--- a/drivers/isdn/mISDN/core.c
+++ /dev/null
@@ -1,400 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- */
-
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/mISDNif.h>
-#include "core.h"
-
-static u_int debug;
-
-MODULE_AUTHOR("Karsten Keil");
-MODULE_DESCRIPTION("Modular ISDN core driver");
-MODULE_LICENSE("GPL");
-module_param(debug, uint, S_IRUGO | S_IWUSR);
-
-static u64		device_ids;
-#define MAX_DEVICE_ID	63
-
-static LIST_HEAD(Bprotocols);
-static DEFINE_RWLOCK(bp_lock);
-
-static void mISDN_dev_release(struct device *dev)
-{
-	/* nothing to do: the device is part of its parent's data structure */
-}
-
-static ssize_t id_show(struct device *dev,
-		       struct device_attribute *attr, char *buf)
-{
-	struct mISDNdevice *mdev = dev_to_mISDN(dev);
-
-	if (!mdev)
-		return -ENODEV;
-	return sprintf(buf, "%d\n", mdev->id);
-}
-static DEVICE_ATTR_RO(id);
-
-static ssize_t nrbchan_show(struct device *dev,
-			    struct device_attribute *attr, char *buf)
-{
-	struct mISDNdevice *mdev = dev_to_mISDN(dev);
-
-	if (!mdev)
-		return -ENODEV;
-	return sprintf(buf, "%d\n", mdev->nrbchan);
-}
-static DEVICE_ATTR_RO(nrbchan);
-
-static ssize_t d_protocols_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct mISDNdevice *mdev = dev_to_mISDN(dev);
-
-	if (!mdev)
-		return -ENODEV;
-	return sprintf(buf, "%d\n", mdev->Dprotocols);
-}
-static DEVICE_ATTR_RO(d_protocols);
-
-static ssize_t b_protocols_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct mISDNdevice *mdev = dev_to_mISDN(dev);
-
-	if (!mdev)
-		return -ENODEV;
-	return sprintf(buf, "%d\n", mdev->Bprotocols | get_all_Bprotocols());
-}
-static DEVICE_ATTR_RO(b_protocols);
-
-static ssize_t protocol_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
-{
-	struct mISDNdevice *mdev = dev_to_mISDN(dev);
-
-	if (!mdev)
-		return -ENODEV;
-	return sprintf(buf, "%d\n", mdev->D.protocol);
-}
-static DEVICE_ATTR_RO(protocol);
-
-static ssize_t name_show(struct device *dev,
-			 struct device_attribute *attr, char *buf)
-{
-	strcpy(buf, dev_name(dev));
-	return strlen(buf);
-}
-static DEVICE_ATTR_RO(name);
-
-#if 0 /* hangs */
-static ssize_t name_set(struct device *dev, struct device_attribute *attr,
-			const char *buf, size_t count)
-{
-	int err = 0;
-	char *out = kmalloc(count + 1, GFP_KERNEL);
-
-	if (!out)
-		return -ENOMEM;
-
-	memcpy(out, buf, count);
-	if (count && out[count - 1] == '\n')
-		out[--count] = 0;
-	if (count)
-		err = device_rename(dev, out);
-	kfree(out);
-
-	return (err < 0) ? err : count;
-}
-static DEVICE_ATTR_RW(name);
-#endif
-
-static ssize_t channelmap_show(struct device *dev,
-			       struct device_attribute *attr, char *buf)
-{
-	struct mISDNdevice *mdev = dev_to_mISDN(dev);
-	char *bp = buf;
-	int i;
-
-	for (i = 0; i <= mdev->nrbchan; i++)
-		*bp++ = test_channelmap(i, mdev->channelmap) ? '1' : '0';
-
-	return bp - buf;
-}
-static DEVICE_ATTR_RO(channelmap);
-
-static struct attribute *mISDN_attrs[] = {
-	&dev_attr_id.attr,
-	&dev_attr_d_protocols.attr,
-	&dev_attr_b_protocols.attr,
-	&dev_attr_protocol.attr,
-	&dev_attr_channelmap.attr,
-	&dev_attr_nrbchan.attr,
-	&dev_attr_name.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(mISDN);
-
-static int mISDN_uevent(const struct device *dev, struct kobj_uevent_env *env)
-{
-	const struct mISDNdevice *mdev = dev_to_mISDN(dev);
-
-	if (!mdev)
-		return 0;
-
-	if (add_uevent_var(env, "nchans=%d", mdev->nrbchan))
-		return -ENOMEM;
-
-	return 0;
-}
-
-static struct class mISDN_class = {
-	.name = "mISDN",
-	.dev_uevent = mISDN_uevent,
-	.dev_groups = mISDN_groups,
-	.dev_release = mISDN_dev_release,
-};
-
-static int
-_get_mdevice(struct device *dev, const void *id)
-{
-	struct mISDNdevice *mdev = dev_to_mISDN(dev);
-
-	if (!mdev)
-		return 0;
-	if (mdev->id != *(const u_int *)id)
-		return 0;
-	return 1;
-}
-
-struct mISDNdevice
-*get_mdevice(u_int id)
-{
-	return dev_to_mISDN(class_find_device(&mISDN_class, NULL, &id,
-					      _get_mdevice));
-}
-
-static int
-_get_mdevice_count(struct device *dev, void *cnt)
-{
-	*(int *)cnt += 1;
-	return 0;
-}
-
-int
-get_mdevice_count(void)
-{
-	int cnt = 0;
-
-	class_for_each_device(&mISDN_class, NULL, &cnt, _get_mdevice_count);
-	return cnt;
-}
-
-static int
-get_free_devid(void)
-{
-	u_int	i;
-
-	for (i = 0; i <= MAX_DEVICE_ID; i++)
-		if (!test_and_set_bit(i, (u_long *)&device_ids))
-			break;
-	if (i > MAX_DEVICE_ID)
-		return -EBUSY;
-	return i;
-}
-
-int
-mISDN_register_device(struct mISDNdevice *dev,
-		      struct device *parent, char *name)
-{
-	int	err;
-
-	err = get_free_devid();
-	if (err < 0)
-		return err;
-	dev->id = err;
-
-	device_initialize(&dev->dev);
-	if (name && name[0])
-		dev_set_name(&dev->dev, "%s", name);
-	else
-		dev_set_name(&dev->dev, "mISDN%d", dev->id);
-	if (debug & DEBUG_CORE)
-		printk(KERN_DEBUG "mISDN_register %s %d\n",
-		       dev_name(&dev->dev), dev->id);
-	dev->dev.class = &mISDN_class;
-
-	err = create_stack(dev);
-	if (err)
-		goto error1;
-
-	dev->dev.platform_data = dev;
-	dev->dev.parent = parent;
-	dev_set_drvdata(&dev->dev, dev);
-
-	err = device_add(&dev->dev);
-	if (err)
-		goto error3;
-	return 0;
-
-error3:
-	delete_stack(dev);
-error1:
-	put_device(&dev->dev);
-	return err;
-
-}
-EXPORT_SYMBOL(mISDN_register_device);
-
-void
-mISDN_unregister_device(struct mISDNdevice *dev) {
-	if (debug & DEBUG_CORE)
-		printk(KERN_DEBUG "mISDN_unregister %s %d\n",
-		       dev_name(&dev->dev), dev->id);
-	/* sysfs_remove_link(&dev->dev.kobj, "device"); */
-	device_del(&dev->dev);
-	dev_set_drvdata(&dev->dev, NULL);
-
-	test_and_clear_bit(dev->id, (u_long *)&device_ids);
-	delete_stack(dev);
-	put_device(&dev->dev);
-}
-EXPORT_SYMBOL(mISDN_unregister_device);
-
-u_int
-get_all_Bprotocols(void)
-{
-	struct Bprotocol	*bp;
-	u_int	m = 0;
-
-	read_lock(&bp_lock);
-	list_for_each_entry(bp, &Bprotocols, list)
-		m |= bp->Bprotocols;
-	read_unlock(&bp_lock);
-	return m;
-}
-
-struct Bprotocol *
-get_Bprotocol4mask(u_int m)
-{
-	struct Bprotocol	*bp;
-
-	read_lock(&bp_lock);
-	list_for_each_entry(bp, &Bprotocols, list)
-		if (bp->Bprotocols & m) {
-			read_unlock(&bp_lock);
-			return bp;
-		}
-	read_unlock(&bp_lock);
-	return NULL;
-}
-
-int
-mISDN_register_Bprotocol(struct Bprotocol *bp)
-{
-	u_long			flags;
-	struct Bprotocol	*old;
-
-	if (debug & DEBUG_CORE)
-		printk(KERN_DEBUG "%s: %s/%x\n", __func__,
-		       bp->name, bp->Bprotocols);
-	old = get_Bprotocol4mask(bp->Bprotocols);
-	if (old) {
-		printk(KERN_WARNING
-		       "register duplicate protocol old %s/%x new %s/%x\n",
-		       old->name, old->Bprotocols, bp->name, bp->Bprotocols);
-		return -EBUSY;
-	}
-	write_lock_irqsave(&bp_lock, flags);
-	list_add_tail(&bp->list, &Bprotocols);
-	write_unlock_irqrestore(&bp_lock, flags);
-	return 0;
-}
-EXPORT_SYMBOL(mISDN_register_Bprotocol);
-
-void
-mISDN_unregister_Bprotocol(struct Bprotocol *bp)
-{
-	u_long	flags;
-
-	if (debug & DEBUG_CORE)
-		printk(KERN_DEBUG "%s: %s/%x\n", __func__, bp->name,
-		       bp->Bprotocols);
-	write_lock_irqsave(&bp_lock, flags);
-	list_del(&bp->list);
-	write_unlock_irqrestore(&bp_lock, flags);
-}
-EXPORT_SYMBOL(mISDN_unregister_Bprotocol);
-
-static const char *msg_no_channel = "<no channel>";
-static const char *msg_no_stack = "<no stack>";
-static const char *msg_no_stackdev = "<no stack device>";
-
-const char *mISDNDevName4ch(struct mISDNchannel *ch)
-{
-	if (!ch)
-		return msg_no_channel;
-	if (!ch->st)
-		return msg_no_stack;
-	if (!ch->st->dev)
-		return msg_no_stackdev;
-	return dev_name(&ch->st->dev->dev);
-};
-EXPORT_SYMBOL(mISDNDevName4ch);
-
-static int
-mISDNInit(void)
-{
-	int	err;
-
-	printk(KERN_INFO "Modular ISDN core version %d.%d.%d\n",
-	       MISDN_MAJOR_VERSION, MISDN_MINOR_VERSION, MISDN_RELEASE);
-	mISDN_init_clock(&debug);
-	mISDN_initstack(&debug);
-	err = class_register(&mISDN_class);
-	if (err)
-		goto error1;
-	err = mISDN_inittimer(&debug);
-	if (err)
-		goto error2;
-	err = Isdnl1_Init(&debug);
-	if (err)
-		goto error3;
-	err = Isdnl2_Init(&debug);
-	if (err)
-		goto error4;
-	err = misdn_sock_init(&debug);
-	if (err)
-		goto error5;
-	return 0;
-
-error5:
-	Isdnl2_cleanup();
-error4:
-	Isdnl1_cleanup();
-error3:
-	mISDN_timer_cleanup();
-error2:
-	class_unregister(&mISDN_class);
-error1:
-	return err;
-}
-
-static void mISDN_cleanup(void)
-{
-	misdn_sock_cleanup();
-	Isdnl2_cleanup();
-	Isdnl1_cleanup();
-	mISDN_timer_cleanup();
-	class_unregister(&mISDN_class);
-
-	printk(KERN_DEBUG "mISDNcore unloaded\n");
-}
-
-module_init(mISDNInit);
-module_exit(mISDN_cleanup);
diff --git a/drivers/isdn/mISDN/core.h b/drivers/isdn/mISDN/core.h
deleted file mode 100644
index 5617c06de8e4..000000000000
--- a/drivers/isdn/mISDN/core.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- */
-
-#ifndef mISDN_CORE_H
-#define mISDN_CORE_H
-
-extern struct mISDNdevice	*get_mdevice(u_int);
-extern int			get_mdevice_count(void);
-
-/* stack status flag */
-#define mISDN_STACK_ACTION_MASK		0x0000ffff
-#define mISDN_STACK_COMMAND_MASK	0x000f0000
-#define mISDN_STACK_STATUS_MASK		0xfff00000
-/* action bits 0-15 */
-#define mISDN_STACK_WORK	0
-#define mISDN_STACK_SETUP	1
-#define mISDN_STACK_CLEARING	2
-#define mISDN_STACK_RESTART	3
-#define mISDN_STACK_WAKEUP	4
-#define mISDN_STACK_ABORT	15
-/* command bits 16-19 */
-#define mISDN_STACK_STOPPED	16
-#define mISDN_STACK_INIT	17
-#define mISDN_STACK_THREADSTART	18
-/* status bits 20-31 */
-#define mISDN_STACK_BCHANNEL	20
-#define mISDN_STACK_ACTIVE      29
-#define mISDN_STACK_RUNNING     30
-#define mISDN_STACK_KILLED      31
-
-
-/* manager options */
-#define MGR_OPT_USER		24
-#define MGR_OPT_NETWORK		25
-
-extern int	connect_Bstack(struct mISDNdevice *, struct mISDNchannel *,
-			       u_int, struct sockaddr_mISDN *);
-extern int	connect_layer1(struct mISDNdevice *, struct mISDNchannel *,
-			       u_int, struct sockaddr_mISDN *);
-extern int	create_l2entity(struct mISDNdevice *, struct mISDNchannel *,
-				u_int, struct sockaddr_mISDN *);
-
-extern int	create_stack(struct mISDNdevice *);
-extern int	create_teimanager(struct mISDNdevice *);
-extern void	delete_teimanager(struct mISDNchannel *);
-extern void	delete_channel(struct mISDNchannel *);
-extern void	delete_stack(struct mISDNdevice *);
-extern void	mISDN_initstack(u_int *);
-extern int      misdn_sock_init(u_int *);
-extern void     misdn_sock_cleanup(void);
-extern void	add_layer2(struct mISDNchannel *, struct mISDNstack *);
-extern void	__add_layer2(struct mISDNchannel *, struct mISDNstack *);
-
-extern u_int		get_all_Bprotocols(void);
-struct Bprotocol	*get_Bprotocol4mask(u_int);
-
-extern int	mISDN_inittimer(u_int *);
-extern void	mISDN_timer_cleanup(void);
-
-extern int	Isdnl1_Init(u_int *);
-extern void	Isdnl1_cleanup(void);
-extern int	Isdnl2_Init(u_int *);
-extern void	Isdnl2_cleanup(void);
-
-extern void	mISDN_init_clock(u_int *);
-
-#endif
diff --git a/drivers/isdn/mISDN/dsp.h b/drivers/isdn/mISDN/dsp.h
deleted file mode 100644
index baf31258f5c9..000000000000
--- a/drivers/isdn/mISDN/dsp.h
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Audio support data for ISDN4Linux.
- *
- * Copyright 2002/2003 by Andreas Eversberg (jolly@eversberg.eu)
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-#define DEBUG_DSP_CTRL		0x0001
-#define DEBUG_DSP_CORE		0x0002
-#define DEBUG_DSP_DTMF		0x0004
-#define DEBUG_DSP_CMX		0x0010
-#define DEBUG_DSP_TONE		0x0020
-#define DEBUG_DSP_BLOWFISH	0x0040
-#define DEBUG_DSP_DELAY		0x0100
-#define DEBUG_DSP_CLOCK		0x0200
-#define DEBUG_DSP_DTMFCOEFF	0x8000 /* heavy output */
-
-/* options may be:
- *
- * bit 0 = use ulaw instead of alaw
- * bit 1 = enable hfc hardware acceleration for all channels
- *
- */
-#define DSP_OPT_ULAW		(1 << 0)
-#define DSP_OPT_NOHARDWARE	(1 << 1)
-
-#include <linux/timer.h>
-#include <linux/workqueue.h>
-
-#include "dsp_ecdis.h"
-
-extern int dsp_options;
-extern int dsp_debug;
-extern int dsp_poll;
-extern int dsp_tics;
-extern spinlock_t dsp_lock;
-extern struct work_struct dsp_workq;
-extern u32 dsp_poll_diff; /* calculated fix-comma corrected poll value */
-
-/***************
- * audio stuff *
- ***************/
-
-extern s32 dsp_audio_alaw_to_s32[256];
-extern s32 dsp_audio_ulaw_to_s32[256];
-extern s32 *dsp_audio_law_to_s32;
-extern u8 dsp_audio_s16_to_law[65536];
-extern u8 dsp_audio_alaw_to_ulaw[256];
-extern u8 dsp_audio_mix_law[65536];
-extern u8 dsp_audio_seven2law[128];
-extern u8 dsp_audio_law2seven[256];
-extern void dsp_audio_generate_law_tables(void);
-extern void dsp_audio_generate_s2law_table(void);
-extern void dsp_audio_generate_seven(void);
-extern void dsp_audio_generate_mix_table(void);
-extern void dsp_audio_generate_ulaw_samples(void);
-extern void dsp_audio_generate_volume_changes(void);
-extern u8 dsp_silence;
-
-
-/*************
- * cmx stuff *
- *************/
-
-#define MAX_POLL	256	/* maximum number of send-chunks */
-
-#define CMX_BUFF_SIZE	0x8000	/* must be 2**n (0x1000 about 1/2 second) */
-#define CMX_BUFF_HALF	0x4000	/* CMX_BUFF_SIZE / 2 */
-#define CMX_BUFF_MASK	0x7fff	/* CMX_BUFF_SIZE - 1 */
-
-/* how many seconds will we check the lowest delay until the jitter buffer
-   is reduced by that delay */
-#define MAX_SECONDS_JITTER_CHECK 5
-
-extern struct timer_list dsp_spl_tl;
-
-/* the datatype need to match jiffies datatype */
-extern unsigned long dsp_spl_jiffies;
-
-/* the structure of conferences:
- *
- * each conference has a unique number, given by user space.
- * the conferences are linked in a chain.
- * each conference has members linked in a chain.
- * each dsplayer points to a member, each member points to a dsplayer.
- */
-
-/* all members within a conference (this is linked 1:1 with the dsp) */
-struct dsp;
-struct dsp_conf_member {
-	struct list_head	list;
-	struct dsp		*dsp;
-};
-
-/* the list of all conferences */
-struct dsp_conf {
-	struct list_head	list;
-	u32			id;
-	/* all cmx stacks with the same ID are
-	   connected */
-	struct list_head	mlist;
-	int			software; /* conf is processed by software */
-	int			hardware; /* conf is processed by hardware */
-	/* note: if both unset, has only one member */
-};
-
-
-/**************
- * DTMF stuff *
- **************/
-
-#define DSP_DTMF_NPOINTS 102
-
-#define ECHOCAN_BUFF_SIZE 0x400 /* must be 2**n */
-#define ECHOCAN_BUFF_MASK 0x3ff /* -1 */
-
-struct dsp_dtmf {
-	int		enable; /* dtmf is enabled */
-	int		treshold; /* above this is dtmf (square of) */
-	int		software; /* dtmf uses software decoding */
-	int		hardware; /* dtmf uses hardware decoding */
-	int		size; /* number of bytes in buffer */
-	signed short	buffer[DSP_DTMF_NPOINTS];
-	/* buffers one full dtmf frame */
-	u8		lastwhat, lastdigit;
-	int		count;
-	u8		digits[16]; /* dtmf result */
-};
-
-
-/******************
- * pipeline stuff *
- ******************/
-struct dsp_pipeline {
-	rwlock_t  lock;
-	struct list_head list;
-	int inuse;
-};
-
-/***************
- * tones stuff *
- ***************/
-
-struct dsp_tone {
-	int		software; /* tones are generated by software */
-	int		hardware; /* tones are generated by hardware */
-	int		tone;
-	void		*pattern;
-	int		count;
-	int		index;
-	struct timer_list tl;
-};
-
-/***************
- * echo stuff *
- ***************/
-
-struct dsp_echo {
-	int		software; /* echo is generated by software */
-	int		hardware; /* echo is generated by hardware */
-};
-
-/*****************
- * general stuff *
- *****************/
-
-struct dsp {
-	struct list_head list;
-	struct mISDNchannel	ch;
-	struct mISDNchannel	*up;
-	unsigned char	name[64];
-	int		b_active;
-	struct dsp_echo	echo;
-	int		rx_disabled; /* what the user wants */
-	int		rx_is_off; /* what the card is */
-	int		tx_mix;
-	struct dsp_tone	tone;
-	struct dsp_dtmf	dtmf;
-	int		tx_volume, rx_volume;
-
-	/* queue for sending frames */
-	struct work_struct	workq;
-	struct sk_buff_head	sendq;
-	int		hdlc;	/* if mode is hdlc */
-	int		data_pending;	/* currently an unconfirmed frame */
-
-	/* conference stuff */
-	u32		conf_id;
-	struct dsp_conf	*conf;
-	struct dsp_conf_member
-	*member;
-
-	/* buffer stuff */
-	int		rx_W; /* current write pos for data without timestamp */
-	int		rx_R; /* current read pos for transmit clock */
-	int		rx_init; /* if set, pointers will be adjusted first */
-	int		tx_W; /* current write pos for transmit data */
-	int		tx_R; /* current read pos for transmit clock */
-	int		rx_delay[MAX_SECONDS_JITTER_CHECK];
-	int		tx_delay[MAX_SECONDS_JITTER_CHECK];
-	u8		tx_buff[CMX_BUFF_SIZE];
-	u8		rx_buff[CMX_BUFF_SIZE];
-	int		last_tx; /* if set, we transmitted last poll interval */
-	int		cmx_delay; /* initial delay of buffers,
-				      or 0 for dynamic jitter buffer */
-	int		tx_dejitter; /* if set, dejitter tx buffer */
-	int		tx_data; /* enables tx-data of CMX to upper layer */
-
-	/* hardware stuff */
-	struct dsp_features features;
-	int		features_rx_off; /* set if rx_off is featured */
-	int		features_fill_empty; /* set if fill_empty is featured */
-	int		pcm_slot_rx; /* current PCM slot (or -1) */
-	int		pcm_bank_rx;
-	int		pcm_slot_tx;
-	int		pcm_bank_tx;
-	int		hfc_conf; /* unique id of current conference (or -1) */
-
-	/* encryption stuff */
-	int		bf_enable;
-	u32		bf_p[18];
-	u32		bf_s[1024];
-	int		bf_crypt_pos;
-	u8		bf_data_in[9];
-	u8		bf_crypt_out[9];
-	int		bf_decrypt_in_pos;
-	int		bf_decrypt_out_pos;
-	u8		bf_crypt_inring[16];
-	u8		bf_data_out[9];
-	int		bf_sync;
-
-	struct dsp_pipeline
-	pipeline;
-};
-
-/* functions */
-
-extern void dsp_change_volume(struct sk_buff *skb, int volume);
-
-extern struct list_head dsp_ilist;
-extern struct list_head conf_ilist;
-extern void dsp_cmx_debug(struct dsp *dsp);
-extern void dsp_cmx_hardware(struct dsp_conf *conf, struct dsp *dsp);
-extern int dsp_cmx_conf(struct dsp *dsp, u32 conf_id);
-extern void dsp_cmx_receive(struct dsp *dsp, struct sk_buff *skb);
-extern void dsp_cmx_hdlc(struct dsp *dsp, struct sk_buff *skb);
-extern void dsp_cmx_send(struct timer_list *arg);
-extern void dsp_cmx_transmit(struct dsp *dsp, struct sk_buff *skb);
-extern int dsp_cmx_del_conf_member(struct dsp *dsp);
-extern int dsp_cmx_del_conf(struct dsp_conf *conf);
-
-extern void dsp_dtmf_goertzel_init(struct dsp *dsp);
-extern void dsp_dtmf_hardware(struct dsp *dsp);
-extern u8 *dsp_dtmf_goertzel_decode(struct dsp *dsp, u8 *data, int len,
-				    int fmt);
-
-extern int dsp_tone(struct dsp *dsp, int tone);
-extern void dsp_tone_copy(struct dsp *dsp, u8 *data, int len);
-extern void dsp_tone_timeout(struct timer_list *t);
-
-extern void dsp_bf_encrypt(struct dsp *dsp, u8 *data, int len);
-extern void dsp_bf_decrypt(struct dsp *dsp, u8 *data, int len);
-extern int dsp_bf_init(struct dsp *dsp, const u8 *key, unsigned int keylen);
-extern void dsp_bf_cleanup(struct dsp *dsp);
-
-extern int  dsp_pipeline_module_init(void);
-extern void dsp_pipeline_module_exit(void);
-extern int  dsp_pipeline_init(struct dsp_pipeline *pipeline);
-extern void dsp_pipeline_destroy(struct dsp_pipeline *pipeline);
-extern int  dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg);
-extern void dsp_pipeline_process_tx(struct dsp_pipeline *pipeline, u8 *data,
-				    int len);
-extern void dsp_pipeline_process_rx(struct dsp_pipeline *pipeline, u8 *data,
-				    int len, unsigned int txlen);
diff --git a/drivers/isdn/mISDN/dsp_audio.c b/drivers/isdn/mISDN/dsp_audio.c
deleted file mode 100644
index bbef98e7a16e..000000000000
--- a/drivers/isdn/mISDN/dsp_audio.c
+++ /dev/null
@@ -1,421 +0,0 @@
-/*
- * Audio support data for mISDN_dsp.
- *
- * Copyright 2002/2003 by Andreas Eversberg (jolly@eversberg.eu)
- * Rewritten by Peter
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-#include <linux/delay.h>
-#include <linux/mISDNif.h>
-#include <linux/mISDNdsp.h>
-#include <linux/export.h>
-#include <linux/bitrev.h>
-#include "core.h"
-#include "dsp.h"
-
-/* ulaw[unsigned char] -> signed 16-bit */
-s32 dsp_audio_ulaw_to_s32[256];
-/* alaw[unsigned char] -> signed 16-bit */
-s32 dsp_audio_alaw_to_s32[256];
-
-s32 *dsp_audio_law_to_s32;
-EXPORT_SYMBOL(dsp_audio_law_to_s32);
-
-/* signed 16-bit -> law */
-u8 dsp_audio_s16_to_law[65536];
-EXPORT_SYMBOL(dsp_audio_s16_to_law);
-
-/* alaw -> ulaw */
-u8 dsp_audio_alaw_to_ulaw[256];
-/* ulaw -> alaw */
-static u8 dsp_audio_ulaw_to_alaw[256];
-u8 dsp_silence;
-
-
-/*****************************************************
- * generate table for conversion of s16 to alaw/ulaw *
- *****************************************************/
-
-#define AMI_MASK 0x55
-
-static inline unsigned char linear2alaw(short int linear)
-{
-	int mask;
-	int seg;
-	int pcm_val;
-	static int seg_end[8] = {
-		0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF
-	};
-
-	pcm_val = linear;
-	if (pcm_val >= 0) {
-		/* Sign (7th) bit = 1 */
-		mask = AMI_MASK | 0x80;
-	} else {
-		/* Sign bit = 0 */
-		mask = AMI_MASK;
-		pcm_val = -pcm_val;
-	}
-
-	/* Convert the scaled magnitude to segment number. */
-	for (seg = 0; seg < 8; seg++) {
-		if (pcm_val <= seg_end[seg])
-			break;
-	}
-	/* Combine the sign, segment, and quantization bits. */
-	return  ((seg << 4) |
-		 ((pcm_val >> ((seg)  ?  (seg + 3)  :  4)) & 0x0F)) ^ mask;
-}
-
-
-static inline short int alaw2linear(unsigned char alaw)
-{
-	int i;
-	int seg;
-
-	alaw ^= AMI_MASK;
-	i = ((alaw & 0x0F) << 4) + 8 /* rounding error */;
-	seg = (((int) alaw & 0x70) >> 4);
-	if (seg)
-		i = (i + 0x100) << (seg - 1);
-	return (short int) ((alaw & 0x80)  ?  i  :  -i);
-}
-
-static inline short int ulaw2linear(unsigned char ulaw)
-{
-	short mu, e, f, y;
-	static short etab[] = {0, 132, 396, 924, 1980, 4092, 8316, 16764};
-
-	mu = 255 - ulaw;
-	e = (mu & 0x70) / 16;
-	f = mu & 0x0f;
-	y = f * (1 << (e + 3));
-	y += etab[e];
-	if (mu & 0x80)
-		y = -y;
-	return y;
-}
-
-#define BIAS 0x84   /*!< define the add-in bias for 16 bit samples */
-
-static unsigned char linear2ulaw(short sample)
-{
-	static int exp_lut[256] = {
-		0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
-		4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-		5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-		5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7};
-	int sign, exponent, mantissa;
-	unsigned char ulawbyte;
-
-	/* Get the sample into sign-magnitude. */
-	sign = (sample >> 8) & 0x80;	  /* set aside the sign */
-	if (sign != 0)
-		sample = -sample;	      /* get magnitude */
-
-	/* Convert from 16 bit linear to ulaw. */
-	sample = sample + BIAS;
-	exponent = exp_lut[(sample >> 7) & 0xFF];
-	mantissa = (sample >> (exponent + 3)) & 0x0F;
-	ulawbyte = ~(sign | (exponent << 4) | mantissa);
-
-	return ulawbyte;
-}
-
-void dsp_audio_generate_law_tables(void)
-{
-	int i;
-	for (i = 0; i < 256; i++)
-		dsp_audio_alaw_to_s32[i] = alaw2linear(bitrev8((u8)i));
-
-	for (i = 0; i < 256; i++)
-		dsp_audio_ulaw_to_s32[i] = ulaw2linear(bitrev8((u8)i));
-
-	for (i = 0; i < 256; i++) {
-		dsp_audio_alaw_to_ulaw[i] =
-			linear2ulaw(dsp_audio_alaw_to_s32[i]);
-		dsp_audio_ulaw_to_alaw[i] =
-			linear2alaw(dsp_audio_ulaw_to_s32[i]);
-	}
-}
-
-void
-dsp_audio_generate_s2law_table(void)
-{
-	int i;
-
-	if (dsp_options & DSP_OPT_ULAW) {
-		/* generating ulaw-table */
-		for (i = -32768; i < 32768; i++) {
-			dsp_audio_s16_to_law[i & 0xffff] =
-				bitrev8(linear2ulaw(i));
-		}
-	} else {
-		/* generating alaw-table */
-		for (i = -32768; i < 32768; i++) {
-			dsp_audio_s16_to_law[i & 0xffff] =
-				bitrev8(linear2alaw(i));
-		}
-	}
-}
-
-
-/*
- * the seven bit sample is the number of every second alaw-sample ordered by
- * aplitude. 0x00 is negative, 0x7f is positive amplitude.
- */
-u8 dsp_audio_seven2law[128];
-u8 dsp_audio_law2seven[256];
-
-/********************************************************************
- * generate table for conversion law from/to 7-bit alaw-like sample *
- ********************************************************************/
-
-void
-dsp_audio_generate_seven(void)
-{
-	int i, j, k;
-	u8 spl;
-	u8 sorted_alaw[256];
-
-	/* generate alaw table, sorted by the linear value */
-	for (i = 0; i < 256; i++) {
-		j = 0;
-		for (k = 0; k < 256; k++) {
-			if (dsp_audio_alaw_to_s32[k]
-			    < dsp_audio_alaw_to_s32[i])
-				j++;
-		}
-		sorted_alaw[j] = i;
-	}
-
-	/* generate tabels */
-	for (i = 0; i < 256; i++) {
-		/* spl is the source: the law-sample (converted to alaw) */
-		spl = i;
-		if (dsp_options & DSP_OPT_ULAW)
-			spl = dsp_audio_ulaw_to_alaw[i];
-		/* find the 7-bit-sample */
-		for (j = 0; j < 256; j++) {
-			if (sorted_alaw[j] == spl)
-				break;
-		}
-		/* write 7-bit audio value */
-		dsp_audio_law2seven[i] = j >> 1;
-	}
-	for (i = 0; i < 128; i++) {
-		spl = sorted_alaw[i << 1];
-		if (dsp_options & DSP_OPT_ULAW)
-			spl = dsp_audio_alaw_to_ulaw[spl];
-		dsp_audio_seven2law[i] = spl;
-	}
-}
-
-
-/* mix 2*law -> law */
-u8 dsp_audio_mix_law[65536];
-
-/******************************************************
- * generate mix table to mix two law samples into one *
- ******************************************************/
-
-void
-dsp_audio_generate_mix_table(void)
-{
-	int i, j;
-	s32 sample;
-
-	i = 0;
-	while (i < 256) {
-		j = 0;
-		while (j < 256) {
-			sample = dsp_audio_law_to_s32[i];
-			sample += dsp_audio_law_to_s32[j];
-			if (sample > 32767)
-				sample = 32767;
-			if (sample < -32768)
-				sample = -32768;
-			dsp_audio_mix_law[(i << 8) | j] =
-				dsp_audio_s16_to_law[sample & 0xffff];
-			j++;
-		}
-		i++;
-	}
-}
-
-
-/*************************************
- * generate different volume changes *
- *************************************/
-
-static u8 dsp_audio_reduce8[256];
-static u8 dsp_audio_reduce7[256];
-static u8 dsp_audio_reduce6[256];
-static u8 dsp_audio_reduce5[256];
-static u8 dsp_audio_reduce4[256];
-static u8 dsp_audio_reduce3[256];
-static u8 dsp_audio_reduce2[256];
-static u8 dsp_audio_reduce1[256];
-static u8 dsp_audio_increase1[256];
-static u8 dsp_audio_increase2[256];
-static u8 dsp_audio_increase3[256];
-static u8 dsp_audio_increase4[256];
-static u8 dsp_audio_increase5[256];
-static u8 dsp_audio_increase6[256];
-static u8 dsp_audio_increase7[256];
-static u8 dsp_audio_increase8[256];
-
-static u8 *dsp_audio_volume_change[16] = {
-	dsp_audio_reduce8,
-	dsp_audio_reduce7,
-	dsp_audio_reduce6,
-	dsp_audio_reduce5,
-	dsp_audio_reduce4,
-	dsp_audio_reduce3,
-	dsp_audio_reduce2,
-	dsp_audio_reduce1,
-	dsp_audio_increase1,
-	dsp_audio_increase2,
-	dsp_audio_increase3,
-	dsp_audio_increase4,
-	dsp_audio_increase5,
-	dsp_audio_increase6,
-	dsp_audio_increase7,
-	dsp_audio_increase8,
-};
-
-void
-dsp_audio_generate_volume_changes(void)
-{
-	register s32 sample;
-	int i;
-	int num[]   = { 110, 125, 150, 175, 200, 300, 400, 500 };
-	int denum[] = { 100, 100, 100, 100, 100, 100, 100, 100 };
-
-	i = 0;
-	while (i < 256) {
-		dsp_audio_reduce8[i] = dsp_audio_s16_to_law[
-			(dsp_audio_law_to_s32[i] * denum[7] / num[7]) & 0xffff];
-		dsp_audio_reduce7[i] = dsp_audio_s16_to_law[
-			(dsp_audio_law_to_s32[i] * denum[6] / num[6]) & 0xffff];
-		dsp_audio_reduce6[i] = dsp_audio_s16_to_law[
-			(dsp_audio_law_to_s32[i] * denum[5] / num[5]) & 0xffff];
-		dsp_audio_reduce5[i] = dsp_audio_s16_to_law[
-			(dsp_audio_law_to_s32[i] * denum[4] / num[4]) & 0xffff];
-		dsp_audio_reduce4[i] = dsp_audio_s16_to_law[
-			(dsp_audio_law_to_s32[i] * denum[3] / num[3]) & 0xffff];
-		dsp_audio_reduce3[i] = dsp_audio_s16_to_law[
-			(dsp_audio_law_to_s32[i] * denum[2] / num[2]) & 0xffff];
-		dsp_audio_reduce2[i] = dsp_audio_s16_to_law[
-			(dsp_audio_law_to_s32[i] * denum[1] / num[1]) & 0xffff];
-		dsp_audio_reduce1[i] = dsp_audio_s16_to_law[
-			(dsp_audio_law_to_s32[i] * denum[0] / num[0]) & 0xffff];
-		sample = dsp_audio_law_to_s32[i] * num[0] / denum[0];
-		if (sample < -32768)
-			sample = -32768;
-		else if (sample > 32767)
-			sample = 32767;
-		dsp_audio_increase1[i] = dsp_audio_s16_to_law[sample & 0xffff];
-		sample = dsp_audio_law_to_s32[i] * num[1] / denum[1];
-		if (sample < -32768)
-			sample = -32768;
-		else if (sample > 32767)
-			sample = 32767;
-		dsp_audio_increase2[i] = dsp_audio_s16_to_law[sample & 0xffff];
-		sample = dsp_audio_law_to_s32[i] * num[2] / denum[2];
-		if (sample < -32768)
-			sample = -32768;
-		else if (sample > 32767)
-			sample = 32767;
-		dsp_audio_increase3[i] = dsp_audio_s16_to_law[sample & 0xffff];
-		sample = dsp_audio_law_to_s32[i] * num[3] / denum[3];
-		if (sample < -32768)
-			sample = -32768;
-		else if (sample > 32767)
-			sample = 32767;
-		dsp_audio_increase4[i] = dsp_audio_s16_to_law[sample & 0xffff];
-		sample = dsp_audio_law_to_s32[i] * num[4] / denum[4];
-		if (sample < -32768)
-			sample = -32768;
-		else if (sample > 32767)
-			sample = 32767;
-		dsp_audio_increase5[i] = dsp_audio_s16_to_law[sample & 0xffff];
-		sample = dsp_audio_law_to_s32[i] * num[5] / denum[5];
-		if (sample < -32768)
-			sample = -32768;
-		else if (sample > 32767)
-			sample = 32767;
-		dsp_audio_increase6[i] = dsp_audio_s16_to_law[sample & 0xffff];
-		sample = dsp_audio_law_to_s32[i] * num[6] / denum[6];
-		if (sample < -32768)
-			sample = -32768;
-		else if (sample > 32767)
-			sample = 32767;
-		dsp_audio_increase7[i] = dsp_audio_s16_to_law[sample & 0xffff];
-		sample = dsp_audio_law_to_s32[i] * num[7] / denum[7];
-		if (sample < -32768)
-			sample = -32768;
-		else if (sample > 32767)
-			sample = 32767;
-		dsp_audio_increase8[i] = dsp_audio_s16_to_law[sample & 0xffff];
-
-		i++;
-	}
-}
-
-
-/**************************************
- * change the volume of the given skb *
- **************************************/
-
-/* this is a helper function for changing volume of skb. the range may be
- * -8 to 8, which is a shift to the power of 2. 0 == no volume, 3 == volume*8
- */
-void
-dsp_change_volume(struct sk_buff *skb, int volume)
-{
-	u8 *volume_change;
-	int i, ii;
-	u8 *p;
-	int shift;
-
-	if (volume == 0)
-		return;
-
-	/* get correct conversion table */
-	if (volume < 0) {
-		shift = volume + 8;
-		if (shift < 0)
-			shift = 0;
-	} else {
-		shift = volume + 7;
-		if (shift > 15)
-			shift = 15;
-	}
-	volume_change = dsp_audio_volume_change[shift];
-	i = 0;
-	ii = skb->len;
-	p = skb->data;
-	/* change volume */
-	while (i < ii) {
-		*p = volume_change[*p];
-		p++;
-		i++;
-	}
-}
diff --git a/drivers/isdn/mISDN/dsp_biquad.h b/drivers/isdn/mISDN/dsp_biquad.h
deleted file mode 100644
index f40d52a4c4ee..000000000000
--- a/drivers/isdn/mISDN/dsp_biquad.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * SpanDSP - a series of DSP components for telephony
- *
- * biquad.h - General telephony bi-quad section routines (currently this just
- *            handles canonic/type 2 form)
- *
- * Written by Steve Underwood <steveu@coppice.org>
- *
- * Copyright (C) 2001 Steve Underwood
- *
- * All rights reserved.
- */
-
-struct biquad2_state {
-	int32_t gain;
-	int32_t a1;
-	int32_t a2;
-	int32_t b1;
-	int32_t b2;
-
-	int32_t z1;
-	int32_t z2;
-};
-
-static inline void biquad2_init(struct biquad2_state *bq,
-				int32_t gain, int32_t a1, int32_t a2, int32_t b1, int32_t b2)
-{
-	bq->gain = gain;
-	bq->a1 = a1;
-	bq->a2 = a2;
-	bq->b1 = b1;
-	bq->b2 = b2;
-
-	bq->z1 = 0;
-	bq->z2 = 0;
-}
-
-static inline int16_t biquad2(struct biquad2_state *bq, int16_t sample)
-{
-	int32_t y;
-	int32_t z0;
-
-	z0 = sample * bq->gain + bq->z1 * bq->a1 + bq->z2 * bq->a2;
-	y = z0 + bq->z1 * bq->b1 + bq->z2 * bq->b2;
-
-	bq->z2 = bq->z1;
-	bq->z1 = z0 >> 15;
-	y >>= 15;
-	return  y;
-}
diff --git a/drivers/isdn/mISDN/dsp_blowfish.c b/drivers/isdn/mISDN/dsp_blowfish.c
deleted file mode 100644
index 0e77c282c862..000000000000
--- a/drivers/isdn/mISDN/dsp_blowfish.c
+++ /dev/null
@@ -1,667 +0,0 @@
-/*
- * Blowfish encryption/decryption for mISDN_dsp.
- *
- * Copyright Andreas Eversberg (jolly@eversberg.eu)
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-#include <linux/mISDNif.h>
-#include <linux/mISDNdsp.h>
-#include "core.h"
-#include "dsp.h"
-
-/*
- * how to encode a sample stream to 64-bit blocks that will be encryped
- *
- * first of all, data is collected until a block of 9 samples are received.
- * of course, a packet may have much more than 9 sample, but is may have
- * not excacly the multiple of 9 samples. if there is a rest, the next
- * received data will complete the block.
- *
- * the block is then converted to 9 uLAW samples without the least sigificant
- * bit. the result is a 7-bit encoded sample.
- *
- * the samples will be reoganised to form 8 bytes of data:
- * (5(6) means: encoded sample no. 5, bit 6)
- *
- * 0(6) 0(5) 0(4) 0(3) 0(2) 0(1) 0(0) 1(6)
- * 1(5) 1(4) 1(3) 1(2) 1(1) 1(0) 2(6) 2(5)
- * 2(4) 2(3) 2(2) 2(1) 2(0) 3(6) 3(5) 3(4)
- * 3(3) 3(2) 3(1) 3(0) 4(6) 4(5) 4(4) 4(3)
- * 4(2) 4(1) 4(0) 5(6) 5(5) 5(4) 5(3) 5(2)
- * 5(1) 5(0) 6(6) 6(5) 6(4) 6(3) 6(2) 6(1)
- * 6(0) 7(6) 7(5) 7(4) 7(3) 7(2) 7(1) 7(0)
- * 8(6) 8(5) 8(4) 8(3) 8(2) 8(1) 8(0)
- *
- * the missing bit 0 of the last byte is filled with some
- * random noise, to fill all 8 bytes.
- *
- * the 8 bytes will be encrypted using blowfish.
- *
- * the result will be converted into 9 bytes. the bit 7 is used for
- * checksumme (CS) for sync (0, 1) and for the last bit:
- * (5(6) means: crypted byte 5, bit 6)
- *
- * 1    0(7) 0(6) 0(5) 0(4) 0(3) 0(2) 0(1)
- * 0    0(0) 1(7) 1(6) 1(5) 1(4) 1(3) 1(2)
- * 0    1(1) 1(0) 2(7) 2(6) 2(5) 2(4) 2(3)
- * 0    2(2) 2(1) 2(0) 3(7) 3(6) 3(5) 3(4)
- * 0    3(3) 3(2) 3(1) 3(0) 4(7) 4(6) 4(5)
- * CS   4(4) 4(3) 4(2) 4(1) 4(0) 5(7) 5(6)
- * CS   5(5) 5(4) 5(3) 5(2) 5(1) 5(0) 6(7)
- * CS   6(6) 6(5) 6(4) 6(3) 6(2) 6(1) 6(0)
- * 7(0) 7(6) 7(5) 7(4) 7(3) 7(2) 7(1) 7(0)
- *
- * the checksum is used to detect transmission errors and frame drops.
- *
- * synchronisation of received block is done by shifting the upper bit of each
- * byte (bit 7) to a shift register. if the rigister has the first five bits
- * (10000), this is used to find the sync. only if sync has been found, the
- * current block of 9 received bytes are decrypted. before that the check
- * sum is calculated. if it is incorrect the block is dropped.
- * this will avoid loud noise due to corrupt encrypted data.
- *
- * if the last block is corrupt, the current decoded block is repeated
- * until a valid block has been received.
- */
-
-/*
- *  some blowfish parts are taken from the
- * crypto-api for faster implementation
- */
-
-static const u32 bf_pbox[16 + 2] = {
-	0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
-	0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
-	0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
-	0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917,
-	0x9216d5d9, 0x8979fb1b,
-};
-
-static const u32 bf_sbox[256 * 4] = {
-	0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
-	0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
-	0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
-	0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e,
-	0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee,
-	0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
-	0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef,
-	0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e,
-	0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
-	0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440,
-	0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce,
-	0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
-	0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e,
-	0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677,
-	0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
-	0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032,
-	0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88,
-	0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
-	0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e,
-	0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0,
-	0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
-	0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98,
-	0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88,
-	0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
-	0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6,
-	0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d,
-	0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
-	0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7,
-	0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba,
-	0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
-	0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f,
-	0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09,
-	0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
-	0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb,
-	0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279,
-	0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
-	0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab,
-	0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82,
-	0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
-	0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573,
-	0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0,
-	0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
-	0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790,
-	0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8,
-	0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
-	0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0,
-	0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7,
-	0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
-	0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad,
-	0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1,
-	0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
-	0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9,
-	0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477,
-	0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
-	0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49,
-	0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af,
-	0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
-	0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5,
-	0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41,
-	0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
-	0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400,
-	0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915,
-	0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
-	0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a,
-	0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
-	0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
-	0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
-	0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e,
-	0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6,
-	0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
-	0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e,
-	0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1,
-	0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
-	0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8,
-	0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff,
-	0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
-	0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701,
-	0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7,
-	0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
-	0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331,
-	0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf,
-	0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
-	0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e,
-	0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87,
-	0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
-	0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2,
-	0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16,
-	0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
-	0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b,
-	0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509,
-	0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
-	0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3,
-	0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f,
-	0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
-	0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4,
-	0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960,
-	0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
-	0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28,
-	0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802,
-	0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
-	0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510,
-	0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf,
-	0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
-	0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e,
-	0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50,
-	0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
-	0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8,
-	0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281,
-	0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
-	0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696,
-	0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128,
-	0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
-	0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0,
-	0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0,
-	0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
-	0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250,
-	0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3,
-	0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
-	0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00,
-	0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061,
-	0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
-	0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e,
-	0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735,
-	0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
-	0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9,
-	0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340,
-	0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
-	0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7,
-	0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
-	0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
-	0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
-	0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840,
-	0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45,
-	0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
-	0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a,
-	0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb,
-	0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
-	0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6,
-	0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42,
-	0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
-	0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2,
-	0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb,
-	0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
-	0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b,
-	0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33,
-	0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
-	0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3,
-	0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc,
-	0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
-	0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564,
-	0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b,
-	0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
-	0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922,
-	0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728,
-	0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
-	0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e,
-	0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37,
-	0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
-	0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804,
-	0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b,
-	0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
-	0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb,
-	0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d,
-	0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
-	0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350,
-	0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9,
-	0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
-	0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe,
-	0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d,
-	0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
-	0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f,
-	0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61,
-	0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
-	0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9,
-	0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2,
-	0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
-	0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e,
-	0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633,
-	0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
-	0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169,
-	0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52,
-	0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
-	0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5,
-	0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62,
-	0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
-	0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76,
-	0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24,
-	0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
-	0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4,
-	0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c,
-	0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
-	0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0,
-	0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
-	0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
-	0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
-	0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4,
-	0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8,
-	0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
-	0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304,
-	0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22,
-	0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
-	0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6,
-	0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9,
-	0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
-	0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593,
-	0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51,
-	0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
-	0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c,
-	0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b,
-	0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
-	0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c,
-	0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd,
-	0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
-	0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319,
-	0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb,
-	0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
-	0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991,
-	0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32,
-	0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
-	0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166,
-	0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae,
-	0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
-	0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5,
-	0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47,
-	0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
-	0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d,
-	0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84,
-	0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
-	0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8,
-	0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd,
-	0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
-	0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7,
-	0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38,
-	0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
-	0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c,
-	0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525,
-	0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
-	0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442,
-	0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964,
-	0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
-	0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8,
-	0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d,
-	0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
-	0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299,
-	0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02,
-	0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
-	0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614,
-	0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a,
-	0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
-	0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b,
-	0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0,
-	0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
-	0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e,
-	0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9,
-	0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
-	0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6,
-};
-
-/*
- * Round loop unrolling macros, S is a pointer to a S-Box array
- * organized in 4 unsigned longs at a row.
- */
-#define GET32_3(x) (((x) & 0xff))
-#define GET32_2(x) (((x) >> (8)) & (0xff))
-#define GET32_1(x) (((x) >> (16)) & (0xff))
-#define GET32_0(x) (((x) >> (24)) & (0xff))
-
-#define bf_F(x) (((S[GET32_0(x)] + S[256 + GET32_1(x)]) ^	\
-		  S[512 + GET32_2(x)]) + S[768 + GET32_3(x)])
-
-#define EROUND(a, b, n)  do { b ^= P[n]; a ^= bf_F(b); } while (0)
-#define DROUND(a, b, n)  do { a ^= bf_F(b); b ^= P[n]; } while (0)
-
-
-/*
- * encrypt isdn data frame
- * every block with 9 samples is encrypted
- */
-void
-dsp_bf_encrypt(struct dsp *dsp, u8 *data, int len)
-{
-	int i = 0, j = dsp->bf_crypt_pos;
-	u8 *bf_data_in = dsp->bf_data_in;
-	u8 *bf_crypt_out = dsp->bf_crypt_out;
-	u32 *P = dsp->bf_p;
-	u32 *S = dsp->bf_s;
-	u32 yl, yr;
-	u32 cs;
-	u8 nibble;
-
-	while (i < len) {
-		/* collect a block of 9 samples */
-		if (j < 9) {
-			bf_data_in[j] = *data;
-			*data++ = bf_crypt_out[j++];
-			i++;
-			continue;
-		}
-		j = 0;
-		/* transcode 9 samples xlaw to 8 bytes */
-		yl = dsp_audio_law2seven[bf_data_in[0]];
-		yl = (yl << 7) | dsp_audio_law2seven[bf_data_in[1]];
-		yl = (yl << 7) | dsp_audio_law2seven[bf_data_in[2]];
-		yl = (yl << 7) | dsp_audio_law2seven[bf_data_in[3]];
-		nibble = dsp_audio_law2seven[bf_data_in[4]];
-		yr = nibble;
-		yl = (yl << 4) | (nibble >> 3);
-		yr = (yr << 7) | dsp_audio_law2seven[bf_data_in[5]];
-		yr = (yr << 7) | dsp_audio_law2seven[bf_data_in[6]];
-		yr = (yr << 7) | dsp_audio_law2seven[bf_data_in[7]];
-		yr = (yr << 7) | dsp_audio_law2seven[bf_data_in[8]];
-		yr = (yr << 1) | (bf_data_in[0] & 1);
-
-		/* fill unused bit with random noise of audio input */
-		/* encrypt */
-
-		EROUND(yr, yl, 0);
-		EROUND(yl, yr, 1);
-		EROUND(yr, yl, 2);
-		EROUND(yl, yr, 3);
-		EROUND(yr, yl, 4);
-		EROUND(yl, yr, 5);
-		EROUND(yr, yl, 6);
-		EROUND(yl, yr, 7);
-		EROUND(yr, yl, 8);
-		EROUND(yl, yr, 9);
-		EROUND(yr, yl, 10);
-		EROUND(yl, yr, 11);
-		EROUND(yr, yl, 12);
-		EROUND(yl, yr, 13);
-		EROUND(yr, yl, 14);
-		EROUND(yl, yr, 15);
-		yl ^= P[16];
-		yr ^= P[17];
-
-		/* calculate 3-bit checksumme */
-		cs = yl ^ (yl >> 3) ^ (yl >> 6) ^ (yl >> 9) ^ (yl >> 12) ^ (yl >> 15)
-			^ (yl >> 18) ^ (yl >> 21) ^ (yl >> 24) ^ (yl >> 27) ^ (yl >> 30)
-			^ (yr << 2) ^ (yr >> 1) ^ (yr >> 4) ^ (yr >> 7) ^ (yr >> 10)
-			^ (yr >> 13) ^ (yr >> 16) ^ (yr >> 19) ^ (yr >> 22) ^ (yr >> 25)
-			^ (yr >> 28) ^ (yr >> 31);
-
-		/*
-		 * transcode 8 crypted bytes to 9 data bytes with sync
-		 * and checksum information
-		 */
-		bf_crypt_out[0] = (yl >> 25) | 0x80;
-		bf_crypt_out[1] = (yl >> 18) & 0x7f;
-		bf_crypt_out[2] = (yl >> 11) & 0x7f;
-		bf_crypt_out[3] = (yl >> 4) & 0x7f;
-		bf_crypt_out[4] = ((yl << 3) & 0x78) | ((yr >> 29) & 0x07);
-		bf_crypt_out[5] = ((yr >> 22) & 0x7f) | ((cs << 5) & 0x80);
-		bf_crypt_out[6] = ((yr >> 15) & 0x7f) | ((cs << 6) & 0x80);
-		bf_crypt_out[7] = ((yr >> 8) & 0x7f) | (cs << 7);
-		bf_crypt_out[8] = yr;
-	}
-
-	/* write current count */
-	dsp->bf_crypt_pos = j;
-
-}
-
-
-/*
- * decrypt isdn data frame
- * every block with 9 bytes is decrypted
- */
-void
-dsp_bf_decrypt(struct dsp *dsp, u8 *data, int len)
-{
-	int i = 0;
-	u8 j = dsp->bf_decrypt_in_pos;
-	u8 k = dsp->bf_decrypt_out_pos;
-	u8 *bf_crypt_inring = dsp->bf_crypt_inring;
-	u8 *bf_data_out = dsp->bf_data_out;
-	u16 sync = dsp->bf_sync;
-	u32 *P = dsp->bf_p;
-	u32 *S = dsp->bf_s;
-	u32 yl, yr;
-	u8 nibble;
-	u8 cs, cs0, cs1, cs2;
-
-	while (i < len) {
-		/*
-		 * shift upper bit and rotate data to buffer ring
-		 * send current decrypted data
-		 */
-		sync = (sync << 1) | ((*data) >> 7);
-		bf_crypt_inring[j++ & 15] = *data;
-		*data++ = bf_data_out[k++];
-		i++;
-		if (k == 9)
-			k = 0; /* repeat if no sync has been found */
-		/* check if not in sync */
-		if ((sync & 0x1f0) != 0x100)
-			continue;
-		j -= 9;
-		/* transcode receive data to 64 bit block of encrypted data */
-		yl = bf_crypt_inring[j++ & 15];
-		yl = (yl << 7) | bf_crypt_inring[j++ & 15]; /* bit7 = 0 */
-		yl = (yl << 7) | bf_crypt_inring[j++ & 15]; /* bit7 = 0 */
-		yl = (yl << 7) | bf_crypt_inring[j++ & 15]; /* bit7 = 0 */
-		nibble = bf_crypt_inring[j++ & 15]; /* bit7 = 0 */
-		yr = nibble;
-		yl = (yl << 4) | (nibble >> 3);
-		cs2 = bf_crypt_inring[j++ & 15];
-		yr = (yr << 7) | (cs2 & 0x7f);
-		cs1 = bf_crypt_inring[j++ & 15];
-		yr = (yr << 7) | (cs1 & 0x7f);
-		cs0 = bf_crypt_inring[j++ & 15];
-		yr = (yr << 7) | (cs0 & 0x7f);
-		yr = (yr << 8) | bf_crypt_inring[j++ & 15];
-
-		/* calculate 3-bit checksumme */
-		cs = yl ^ (yl >> 3) ^ (yl >> 6) ^ (yl >> 9) ^ (yl >> 12) ^ (yl >> 15)
-			^ (yl >> 18) ^ (yl >> 21) ^ (yl >> 24) ^ (yl >> 27) ^ (yl >> 30)
-			^ (yr << 2) ^ (yr >> 1) ^ (yr >> 4) ^ (yr >> 7) ^ (yr >> 10)
-			^ (yr >> 13) ^ (yr >> 16) ^ (yr >> 19) ^ (yr >> 22) ^ (yr >> 25)
-			^ (yr >> 28) ^ (yr >> 31);
-
-		/* check if frame is valid */
-		if ((cs & 0x7) != (((cs2 >> 5) & 4) | ((cs1 >> 6) & 2) | (cs0 >> 7))) {
-			if (dsp_debug & DEBUG_DSP_BLOWFISH)
-				printk(KERN_DEBUG
-				       "DSP BLOWFISH: received corrupt frame, "
-				       "checksumme is not correct\n");
-			continue;
-		}
-
-		/* decrypt */
-		yr ^= P[17];
-		yl ^= P[16];
-		DROUND(yl, yr, 15);
-		DROUND(yr, yl, 14);
-		DROUND(yl, yr, 13);
-		DROUND(yr, yl, 12);
-		DROUND(yl, yr, 11);
-		DROUND(yr, yl, 10);
-		DROUND(yl, yr, 9);
-		DROUND(yr, yl, 8);
-		DROUND(yl, yr, 7);
-		DROUND(yr, yl, 6);
-		DROUND(yl, yr, 5);
-		DROUND(yr, yl, 4);
-		DROUND(yl, yr, 3);
-		DROUND(yr, yl, 2);
-		DROUND(yl, yr, 1);
-		DROUND(yr, yl, 0);
-
-		/* transcode 8 crypted bytes to 9 sample bytes */
-		bf_data_out[0] = dsp_audio_seven2law[(yl >> 25) & 0x7f];
-		bf_data_out[1] = dsp_audio_seven2law[(yl >> 18) & 0x7f];
-		bf_data_out[2] = dsp_audio_seven2law[(yl >> 11) & 0x7f];
-		bf_data_out[3] = dsp_audio_seven2law[(yl >> 4) & 0x7f];
-		bf_data_out[4] = dsp_audio_seven2law[((yl << 3) & 0x78) |
-						     ((yr >> 29) & 0x07)];
-
-		bf_data_out[5] = dsp_audio_seven2law[(yr >> 22) & 0x7f];
-		bf_data_out[6] = dsp_audio_seven2law[(yr >> 15) & 0x7f];
-		bf_data_out[7] = dsp_audio_seven2law[(yr >> 8) & 0x7f];
-		bf_data_out[8] = dsp_audio_seven2law[(yr >> 1) & 0x7f];
-		k = 0; /* start with new decoded frame */
-	}
-
-	/* write current count and sync */
-	dsp->bf_decrypt_in_pos = j;
-	dsp->bf_decrypt_out_pos = k;
-	dsp->bf_sync = sync;
-}
-
-
-/* used to encrypt S and P boxes */
-static inline void
-encrypt_block(const u32 *P, const u32 *S, u32 *dst, u32 *src)
-{
-	u32 yl = src[0];
-	u32 yr = src[1];
-
-	EROUND(yr, yl, 0);
-	EROUND(yl, yr, 1);
-	EROUND(yr, yl, 2);
-	EROUND(yl, yr, 3);
-	EROUND(yr, yl, 4);
-	EROUND(yl, yr, 5);
-	EROUND(yr, yl, 6);
-	EROUND(yl, yr, 7);
-	EROUND(yr, yl, 8);
-	EROUND(yl, yr, 9);
-	EROUND(yr, yl, 10);
-	EROUND(yl, yr, 11);
-	EROUND(yr, yl, 12);
-	EROUND(yl, yr, 13);
-	EROUND(yr, yl, 14);
-	EROUND(yl, yr, 15);
-
-	yl ^= P[16];
-	yr ^= P[17];
-
-	dst[0] = yr;
-	dst[1] = yl;
-}
-
-/*
- * initialize the dsp for encryption and decryption using the same key
- * Calculates the blowfish S and P boxes for encryption and decryption.
- * The margin of keylen must be 4-56 bytes.
- * returns 0 if ok.
- */
-int
-dsp_bf_init(struct dsp *dsp, const u8 *key, uint keylen)
-{
-	short i, j, count;
-	u32 data[2], temp;
-	u32 *P = (u32 *)dsp->bf_p;
-	u32 *S = (u32 *)dsp->bf_s;
-
-	if (keylen < 4 || keylen > 56)
-		return 1;
-
-	/* Set dsp states */
-	i = 0;
-	while (i < 9) {
-		dsp->bf_crypt_out[i] = 0xff;
-		dsp->bf_data_out[i] = dsp_silence;
-		i++;
-	}
-	dsp->bf_crypt_pos = 0;
-	dsp->bf_decrypt_in_pos = 0;
-	dsp->bf_decrypt_out_pos = 0;
-	dsp->bf_sync = 0x1ff;
-	dsp->bf_enable = 1;
-
-	/* Copy the initialization s-boxes */
-	for (i = 0, count = 0; i < 256; i++)
-		for (j = 0; j < 4; j++, count++)
-			S[count] = bf_sbox[count];
-
-	/* Set the p-boxes */
-	for (i = 0; i < 16 + 2; i++)
-		P[i] = bf_pbox[i];
-
-	/* Actual subkey generation */
-	for (j = 0, i = 0; i < 16 + 2; i++) {
-		temp = (((u32)key[j] << 24) |
-			((u32)key[(j + 1) % keylen] << 16) |
-			((u32)key[(j + 2) % keylen] << 8) |
-			((u32)key[(j + 3) % keylen]));
-
-		P[i] = P[i] ^ temp;
-		j = (j + 4) % keylen;
-	}
-
-	data[0] = 0x00000000;
-	data[1] = 0x00000000;
-
-	for (i = 0; i < 16 + 2; i += 2) {
-		encrypt_block(P, S, data, data);
-
-		P[i] = data[0];
-		P[i + 1] = data[1];
-	}
-
-	for (i = 0; i < 4; i++) {
-		for (j = 0, count = i * 256; j < 256; j += 2, count += 2) {
-			encrypt_block(P, S, data, data);
-
-			S[count] = data[0];
-			S[count + 1] = data[1];
-		}
-	}
-
-	return 0;
-}
-
-
-/*
- * turn encryption off
- */
-void
-dsp_bf_cleanup(struct dsp *dsp)
-{
-	dsp->bf_enable = 0;
-}
diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c
deleted file mode 100644
index d5eb2349c414..000000000000
--- a/drivers/isdn/mISDN/dsp_cmx.c
+++ /dev/null
@@ -1,1949 +0,0 @@
-/*
- * Audio crossconnecting/conferrencing (hardware level).
- *
- * Copyright 2002 by Andreas Eversberg (jolly@eversberg.eu)
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-/*
- * The process of adding and removing parties to/from a conference:
- *
- * There is a chain of struct dsp_conf which has one or more members in a chain
- * of struct dsp_conf_member.
- *
- * After a party is added, the conference is checked for hardware capability.
- * Also if a party is removed, the conference is checked again.
- *
- * There are 3 different solutions: -1 = software, 0 = hardware-crossconnect
- * 1-n = hardware-conference. The n will give the conference number.
- *
- * Depending on the change after removal or insertion of a party, hardware
- * commands are given.
- *
- * The current solution is stored within the struct dsp_conf entry.
- */
-
-/*
- * HOW THE CMX WORKS:
- *
- * There are 3 types of interaction: One member is alone, in this case only
- * data flow from upper to lower layer is done.
- * Two members will also exchange their data so they are crossconnected.
- * Three or more members will be added in a conference and will hear each
- * other but will not receive their own speech (echo) if not enabled.
- *
- * Features of CMX are:
- *  - Crossconnecting or even conference, if more than two members are together.
- *  - Force mixing of transmit data with other crossconnect/conference members.
- *  - Echo generation to benchmark the delay of audio processing.
- *  - Use hardware to minimize cpu load, disable FIFO load and minimize delay.
- *  - Dejittering and clock generation.
- *
- * There are 2 buffers:
- *
- *
- * RX-Buffer
- *                 R             W
- *                 |             |
- * ----------------+-------------+-------------------
- *
- * The rx-buffer is a ring buffer used to store the received data for each
- * individual member. This is only the case if data needs to be dejittered
- * or in case of a conference where different clocks require reclocking.
- * The transmit-clock (R) will read the buffer.
- * If the clock overruns the write-pointer, we will have a buffer underrun.
- * If the write pointer always has a certain distance from the transmit-
- * clock, we will have a delay. The delay will dynamically be increased and
- * reduced.
- *
- *
- * TX-Buffer
- *                  R        W
- *                  |        |
- * -----------------+--------+-----------------------
- *
- * The tx-buffer is a ring buffer to queue the transmit data from user space
- * until it will be mixed or sent. There are two pointers, R and W. If the write
- * pointer W would reach or overrun R, the buffer would overrun. In this case
- * (some) data is dropped so that it will not overrun.
- * Additionally a dynamic dejittering can be enabled. this allows data from
- * user space that have jitter and different clock source.
- *
- *
- * Clock:
- *
- * A Clock is not required, if the data source has exactly one clock. In this
- * case the data source is forwarded to the destination.
- *
- * A Clock is required, because the data source
- *  - has multiple clocks.
- *  - has no usable clock due to jitter or packet loss (VoIP).
- * In this case the system's clock is used. The clock resolution depends on
- * the jiffy resolution.
- *
- * If a member joins a conference:
- *
- * - If a member joins, its rx_buff is set to silence and change read pointer
- *   to transmit clock.
- *
- * The procedure of received data from card is explained in cmx_receive.
- * The procedure of received data from user space is explained in cmx_transmit.
- * The procedure of transmit data to card is cmx_send.
- *
- *
- * Interaction with other features:
- *
- * DTMF:
- * DTMF decoding is done before the data is crossconnected.
- *
- * Volume change:
- * Changing rx-volume is done before the data is crossconnected. The tx-volume
- * must be changed whenever data is transmitted to the card by the cmx.
- *
- * Tones:
- * If a tone is enabled, it will be processed whenever data is transmitted to
- * the card. It will replace the tx-data from the user space.
- * If tones are generated by hardware, this conference member is removed for
- * this time.
- *
- * Disable rx-data:
- * If cmx is realized in hardware, rx data will be disabled if requested by
- * the upper layer. If dtmf decoding is done by software and enabled, rx data
- * will not be disabled but blocked to the upper layer.
- *
- * HFC conference engine:
- * If it is possible to realize all features using hardware, hardware will be
- * used if not forbidden by control command. Disabling rx-data provides
- * absolutely traffic free audio processing. (except for the quick 1-frame
- * upload of a tone loop, only once for a new tone)
- *
- */
-
-/* delay.h is required for hw_lock.h */
-
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/mISDNif.h>
-#include <linux/mISDNdsp.h>
-#include "core.h"
-#include "dsp.h"
-/*
- * debugging of multi party conference,
- * by using conference even with two members
- */
-
-/* #define CMX_CONF_DEBUG */
-
-/*#define CMX_DEBUG * massive read/write pointer output */
-/*#define CMX_DELAY_DEBUG * gives rx-buffer delay overview */
-/*#define CMX_TX_DEBUG * massive read/write on tx-buffer with content */
-
-/*
- * debug cmx memory structure
- */
-void
-dsp_cmx_debug(struct dsp *dsp)
-{
-	struct dsp_conf	*conf;
-	struct dsp_conf_member	*member;
-	struct dsp		*odsp;
-
-	printk(KERN_DEBUG "-----Current DSP\n");
-	list_for_each_entry(odsp, &dsp_ilist, list) {
-		printk(KERN_DEBUG "* %s hardecho=%d softecho=%d txmix=%d",
-		       odsp->name, odsp->echo.hardware, odsp->echo.software,
-		       odsp->tx_mix);
-		if (odsp->conf)
-			printk(" (Conf %d)", odsp->conf->id);
-		if (dsp == odsp)
-			printk(" *this*");
-		printk("\n");
-	}
-	printk(KERN_DEBUG "-----Current Conf:\n");
-	list_for_each_entry(conf, &conf_ilist, list) {
-		printk(KERN_DEBUG "* Conf %d (%p)\n", conf->id, conf);
-		list_for_each_entry(member, &conf->mlist, list) {
-			printk(KERN_DEBUG
-			       "  - member = %s (slot_tx %d, bank_tx %d, "
-			       "slot_rx %d, bank_rx %d hfc_conf %d "
-			       "tx_data %d rx_is_off %d)%s\n",
-			       member->dsp->name, member->dsp->pcm_slot_tx,
-			       member->dsp->pcm_bank_tx, member->dsp->pcm_slot_rx,
-			       member->dsp->pcm_bank_rx, member->dsp->hfc_conf,
-			       member->dsp->tx_data, member->dsp->rx_is_off,
-			       (member->dsp == dsp) ? " *this*" : "");
-		}
-	}
-	printk(KERN_DEBUG "-----end\n");
-}
-
-/*
- * search conference
- */
-static struct dsp_conf *
-dsp_cmx_search_conf(u32 id)
-{
-	struct dsp_conf *conf;
-
-	if (!id) {
-		printk(KERN_WARNING "%s: conference ID is 0.\n", __func__);
-		return NULL;
-	}
-
-	/* search conference */
-	list_for_each_entry(conf, &conf_ilist, list)
-		if (conf->id == id)
-			return conf;
-
-	return NULL;
-}
-
-
-/*
- * add member to conference
- */
-static int
-dsp_cmx_add_conf_member(struct dsp *dsp, struct dsp_conf *conf)
-{
-	struct dsp_conf_member *member;
-
-	if (!conf || !dsp) {
-		printk(KERN_WARNING "%s: conf or dsp is 0.\n", __func__);
-		return -EINVAL;
-	}
-	if (dsp->member) {
-		printk(KERN_WARNING "%s: dsp is already member in a conf.\n",
-		       __func__);
-		return -EINVAL;
-	}
-
-	if (dsp->conf) {
-		printk(KERN_WARNING "%s: dsp is already in a conf.\n",
-		       __func__);
-		return -EINVAL;
-	}
-
-	member = kzalloc_obj(struct dsp_conf_member, GFP_ATOMIC);
-	if (!member) {
-		printk(KERN_ERR "kzalloc struct dsp_conf_member failed\n");
-		return -ENOMEM;
-	}
-	member->dsp = dsp;
-	/* clear rx buffer */
-	memset(dsp->rx_buff, dsp_silence, sizeof(dsp->rx_buff));
-	dsp->rx_init = 1; /* rx_W and rx_R will be adjusted on first frame */
-	dsp->rx_W = 0;
-	dsp->rx_R = 0;
-
-	list_add_tail(&member->list, &conf->mlist);
-
-	dsp->conf = conf;
-	dsp->member = member;
-
-	return 0;
-}
-
-
-/*
- * del member from conference
- */
-int
-dsp_cmx_del_conf_member(struct dsp *dsp)
-{
-	struct dsp_conf_member *member;
-
-	if (!dsp) {
-		printk(KERN_WARNING "%s: dsp is 0.\n",
-		       __func__);
-		return -EINVAL;
-	}
-
-	if (!dsp->conf) {
-		printk(KERN_WARNING "%s: dsp is not in a conf.\n",
-		       __func__);
-		return -EINVAL;
-	}
-
-	if (list_empty(&dsp->conf->mlist)) {
-		printk(KERN_WARNING "%s: dsp has linked an empty conf.\n",
-		       __func__);
-		return -EINVAL;
-	}
-
-	/* find us in conf */
-	list_for_each_entry(member, &dsp->conf->mlist, list) {
-		if (member->dsp == dsp) {
-			list_del(&member->list);
-			dsp->conf = NULL;
-			dsp->member = NULL;
-			kfree(member);
-			return 0;
-		}
-	}
-	printk(KERN_WARNING
-	       "%s: dsp is not present in its own conf_member list.\n",
-	       __func__);
-
-	return -EINVAL;
-}
-
-
-/*
- * new conference
- */
-static struct dsp_conf
-*dsp_cmx_new_conf(u32 id)
-{
-	struct dsp_conf *conf;
-
-	if (!id) {
-		printk(KERN_WARNING "%s: id is 0.\n",
-		       __func__);
-		return NULL;
-	}
-
-	conf = kzalloc_obj(struct dsp_conf, GFP_ATOMIC);
-	if (!conf) {
-		printk(KERN_ERR "kzalloc struct dsp_conf failed\n");
-		return NULL;
-	}
-	INIT_LIST_HEAD(&conf->mlist);
-	conf->id = id;
-
-	list_add_tail(&conf->list, &conf_ilist);
-
-	return conf;
-}
-
-
-/*
- * del conference
- */
-int
-dsp_cmx_del_conf(struct dsp_conf *conf)
-{
-	if (!conf) {
-		printk(KERN_WARNING "%s: conf is null.\n",
-		       __func__);
-		return -EINVAL;
-	}
-
-	if (!list_empty(&conf->mlist)) {
-		printk(KERN_WARNING "%s: conf not empty.\n",
-		       __func__);
-		return -EINVAL;
-	}
-	list_del(&conf->list);
-	kfree(conf);
-
-	return 0;
-}
-
-
-/*
- * send HW message to hfc card
- */
-static void
-dsp_cmx_hw_message(struct dsp *dsp, u32 message, u32 param1, u32 param2,
-		   u32 param3, u32 param4)
-{
-	struct mISDN_ctrl_req cq;
-
-	memset(&cq, 0, sizeof(cq));
-	cq.op = message;
-	cq.p1 = param1 | (param2 << 8);
-	cq.p2 = param3 | (param4 << 8);
-	if (dsp->ch.peer)
-		dsp->ch.peer->ctrl(dsp->ch.peer, CONTROL_CHANNEL, &cq);
-}
-
-
-/*
- * do hardware update and set the software/hardware flag
- *
- * either a conference or a dsp instance can be given
- * if only dsp instance is given, the instance is not associated with a conf
- * and therefore removed. if a conference is given, the dsp is expected to
- * be member of that conference.
- */
-void
-dsp_cmx_hardware(struct dsp_conf *conf, struct dsp *dsp)
-{
-	struct dsp_conf_member	*member, *nextm;
-	struct dsp		*finddsp;
-	int		memb = 0, i, ii, i1, i2;
-	int		freeunits[8];
-	u_char		freeslots[256];
-	int		same_hfc = -1, same_pcm = -1, current_conf = -1,
-		all_conf = 1, tx_data = 0;
-
-	/* dsp gets updated (no conf) */
-	if (!conf) {
-		if (!dsp)
-			return;
-		if (dsp_debug & DEBUG_DSP_CMX)
-			printk(KERN_DEBUG "%s checking dsp %s\n",
-			       __func__, dsp->name);
-	one_member:
-		/* remove HFC conference if enabled */
-		if (dsp->hfc_conf >= 0) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s removing %s from HFC conf %d "
-				       "because dsp is split\n", __func__,
-				       dsp->name, dsp->hfc_conf);
-			dsp_cmx_hw_message(dsp, MISDN_CTRL_HFC_CONF_SPLIT,
-					   0, 0, 0, 0);
-			dsp->hfc_conf = -1;
-		}
-		/* process hw echo */
-		if (dsp->features.pcm_banks < 1)
-			return;
-		if (!dsp->echo.software && !dsp->echo.hardware) {
-			/* NO ECHO: remove PCM slot if assigned */
-			if (dsp->pcm_slot_tx >= 0 || dsp->pcm_slot_rx >= 0) {
-				if (dsp_debug & DEBUG_DSP_CMX)
-					printk(KERN_DEBUG "%s removing %s from"
-					       " PCM slot %d (TX) %d (RX) because"
-					       " dsp is split (no echo)\n",
-					       __func__, dsp->name,
-					       dsp->pcm_slot_tx, dsp->pcm_slot_rx);
-				dsp_cmx_hw_message(dsp, MISDN_CTRL_HFC_PCM_DISC,
-						   0, 0, 0, 0);
-				dsp->pcm_slot_tx = -1;
-				dsp->pcm_bank_tx = -1;
-				dsp->pcm_slot_rx = -1;
-				dsp->pcm_bank_rx = -1;
-			}
-			return;
-		}
-		/* echo is enabled, find out if we use soft or hardware */
-		dsp->echo.software = dsp->tx_data;
-		dsp->echo.hardware = 0;
-		/* ECHO: already echo */
-		if (dsp->pcm_slot_tx >= 0 && dsp->pcm_slot_rx < 0 &&
-		    dsp->pcm_bank_tx == 2 && dsp->pcm_bank_rx == 2) {
-			dsp->echo.hardware = 1;
-			return;
-		}
-		/* ECHO: if slot already assigned */
-		if (dsp->pcm_slot_tx >= 0) {
-			dsp->pcm_slot_rx = dsp->pcm_slot_tx;
-			dsp->pcm_bank_tx = 2; /* 2 means loop */
-			dsp->pcm_bank_rx = 2;
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s refresh %s for echo using slot %d\n",
-				       __func__, dsp->name,
-				       dsp->pcm_slot_tx);
-			dsp_cmx_hw_message(dsp, MISDN_CTRL_HFC_PCM_CONN,
-					   dsp->pcm_slot_tx, 2, dsp->pcm_slot_rx, 2);
-			dsp->echo.hardware = 1;
-			return;
-		}
-		/* ECHO: find slot */
-		dsp->pcm_slot_tx = -1;
-		dsp->pcm_slot_rx = -1;
-		memset(freeslots, 1, sizeof(freeslots));
-		list_for_each_entry(finddsp, &dsp_ilist, list) {
-			if (finddsp->features.pcm_id == dsp->features.pcm_id) {
-				if (finddsp->pcm_slot_rx >= 0 &&
-				    finddsp->pcm_slot_rx < sizeof(freeslots))
-					freeslots[finddsp->pcm_slot_rx] = 0;
-				if (finddsp->pcm_slot_tx >= 0 &&
-				    finddsp->pcm_slot_tx < sizeof(freeslots))
-					freeslots[finddsp->pcm_slot_tx] = 0;
-			}
-		}
-		i = 0;
-		ii = dsp->features.pcm_slots;
-		while (i < ii) {
-			if (freeslots[i])
-				break;
-			i++;
-		}
-		if (i == ii) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s no slot available for echo\n",
-				       __func__);
-			/* no more slots available */
-			dsp->echo.software = 1;
-			return;
-		}
-		/* assign free slot */
-		dsp->pcm_slot_tx = i;
-		dsp->pcm_slot_rx = i;
-		dsp->pcm_bank_tx = 2; /* loop */
-		dsp->pcm_bank_rx = 2;
-		if (dsp_debug & DEBUG_DSP_CMX)
-			printk(KERN_DEBUG
-			       "%s assign echo for %s using slot %d\n",
-			       __func__, dsp->name, dsp->pcm_slot_tx);
-		dsp_cmx_hw_message(dsp, MISDN_CTRL_HFC_PCM_CONN,
-				   dsp->pcm_slot_tx, 2, dsp->pcm_slot_rx, 2);
-		dsp->echo.hardware = 1;
-		return;
-	}
-
-	/* conf gets updated (all members) */
-	if (dsp_debug & DEBUG_DSP_CMX)
-		printk(KERN_DEBUG "%s checking conference %d\n",
-		       __func__, conf->id);
-
-	if (list_empty(&conf->mlist)) {
-		printk(KERN_ERR "%s: conference without members\n",
-		       __func__);
-		return;
-	}
-	member = list_entry(conf->mlist.next, struct dsp_conf_member, list);
-	same_hfc = member->dsp->features.hfc_id;
-	same_pcm = member->dsp->features.pcm_id;
-	/* check all members in our conference */
-	list_for_each_entry(member, &conf->mlist, list) {
-		/* check if member uses mixing */
-		if (member->dsp->tx_mix) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s dsp %s cannot form a conf, because "
-				       "tx_mix is turned on\n", __func__,
-				       member->dsp->name);
-		conf_software:
-			list_for_each_entry(member, &conf->mlist, list) {
-				dsp = member->dsp;
-				/* remove HFC conference if enabled */
-				if (dsp->hfc_conf >= 0) {
-					if (dsp_debug & DEBUG_DSP_CMX)
-						printk(KERN_DEBUG
-						       "%s removing %s from HFC "
-						       "conf %d because not "
-						       "possible with hardware\n",
-						       __func__,
-						       dsp->name,
-						       dsp->hfc_conf);
-					dsp_cmx_hw_message(dsp,
-							   MISDN_CTRL_HFC_CONF_SPLIT,
-							   0, 0, 0, 0);
-					dsp->hfc_conf = -1;
-				}
-				/* remove PCM slot if assigned */
-				if (dsp->pcm_slot_tx >= 0 ||
-				    dsp->pcm_slot_rx >= 0) {
-					if (dsp_debug & DEBUG_DSP_CMX)
-						printk(KERN_DEBUG "%s removing "
-						       "%s from PCM slot %d (TX)"
-						       " slot %d (RX) because not"
-						       " possible with hardware\n",
-						       __func__,
-						       dsp->name,
-						       dsp->pcm_slot_tx,
-						       dsp->pcm_slot_rx);
-					dsp_cmx_hw_message(dsp,
-							   MISDN_CTRL_HFC_PCM_DISC,
-							   0, 0, 0, 0);
-					dsp->pcm_slot_tx = -1;
-					dsp->pcm_bank_tx = -1;
-					dsp->pcm_slot_rx = -1;
-					dsp->pcm_bank_rx = -1;
-				}
-			}
-			conf->hardware = 0;
-			conf->software = 1;
-			return;
-		}
-		/* check if member has echo turned on */
-		if (member->dsp->echo.hardware || member->dsp->echo.software) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s dsp %s cannot form a conf, because "
-				       "echo is turned on\n", __func__,
-				       member->dsp->name);
-			goto conf_software;
-		}
-		/* check if member has tx_mix turned on */
-		if (member->dsp->tx_mix) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s dsp %s cannot form a conf, because "
-				       "tx_mix is turned on\n",
-				       __func__, member->dsp->name);
-			goto conf_software;
-		}
-		/* check if member changes volume at an not suppoted level */
-		if (member->dsp->tx_volume) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s dsp %s cannot form a conf, because "
-				       "tx_volume is changed\n",
-				       __func__, member->dsp->name);
-			goto conf_software;
-		}
-		if (member->dsp->rx_volume) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s dsp %s cannot form a conf, because "
-				       "rx_volume is changed\n",
-				       __func__, member->dsp->name);
-			goto conf_software;
-		}
-		/* check if tx-data turned on */
-		if (member->dsp->tx_data) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s dsp %s tx_data is turned on\n",
-				       __func__, member->dsp->name);
-			tx_data = 1;
-		}
-		/* check if pipeline exists */
-		if (member->dsp->pipeline.inuse) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s dsp %s cannot form a conf, because "
-				       "pipeline exists\n", __func__,
-				       member->dsp->name);
-			goto conf_software;
-		}
-		/* check if encryption is enabled */
-		if (member->dsp->bf_enable) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG "%s dsp %s cannot form a "
-				       "conf, because encryption is enabled\n",
-				       __func__, member->dsp->name);
-			goto conf_software;
-		}
-		/* check if member is on a card with PCM support */
-		if (member->dsp->features.pcm_id < 0) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s dsp %s cannot form a conf, because "
-				       "dsp has no PCM bus\n",
-				       __func__, member->dsp->name);
-			goto conf_software;
-		}
-		/* check if relations are on the same PCM bus */
-		if (member->dsp->features.pcm_id != same_pcm) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s dsp %s cannot form a conf, because "
-				       "dsp is on a different PCM bus than the "
-				       "first dsp\n",
-				       __func__, member->dsp->name);
-			goto conf_software;
-		}
-		/* determine if members are on the same hfc chip */
-		if (same_hfc != member->dsp->features.hfc_id)
-			same_hfc = -1;
-		/* if there are members already in a conference */
-		if (current_conf < 0 && member->dsp->hfc_conf >= 0)
-			current_conf = member->dsp->hfc_conf;
-		/* if any member is not in a conference */
-		if (member->dsp->hfc_conf < 0)
-			all_conf = 0;
-
-		memb++;
-	}
-
-	/* if no member, this is an error */
-	if (memb < 1)
-		return;
-
-	/* one member */
-	if (memb == 1) {
-		if (dsp_debug & DEBUG_DSP_CMX)
-			printk(KERN_DEBUG
-			       "%s conf %d cannot form a HW conference, "
-			       "because dsp is alone\n", __func__, conf->id);
-		conf->hardware = 0;
-		conf->software = 0;
-		member = list_entry(conf->mlist.next, struct dsp_conf_member,
-				    list);
-		dsp = member->dsp;
-		goto one_member;
-	}
-
-	/*
-	 * ok, now we are sure that all members are on the same pcm.
-	 * now we will see if we have only two members, so we can do
-	 * crossconnections, which don't have any limitations.
-	 */
-
-	/* if we have only two members */
-	if (memb == 2) {
-		member = list_entry(conf->mlist.next, struct dsp_conf_member,
-				    list);
-		nextm = list_entry(member->list.next, struct dsp_conf_member,
-				   list);
-		/* remove HFC conference if enabled */
-		if (member->dsp->hfc_conf >= 0) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s removing %s from HFC conf %d because "
-				       "two parties require only a PCM slot\n",
-				       __func__, member->dsp->name,
-				       member->dsp->hfc_conf);
-			dsp_cmx_hw_message(member->dsp,
-					   MISDN_CTRL_HFC_CONF_SPLIT, 0, 0, 0, 0);
-			member->dsp->hfc_conf = -1;
-		}
-		if (nextm->dsp->hfc_conf >= 0) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s removing %s from HFC conf %d because "
-				       "two parties require only a PCM slot\n",
-				       __func__, nextm->dsp->name,
-				       nextm->dsp->hfc_conf);
-			dsp_cmx_hw_message(nextm->dsp,
-					   MISDN_CTRL_HFC_CONF_SPLIT, 0, 0, 0, 0);
-			nextm->dsp->hfc_conf = -1;
-		}
-		/* if members have two banks (and not on the same chip) */
-		if (member->dsp->features.pcm_banks > 1 &&
-		    nextm->dsp->features.pcm_banks > 1 &&
-		    member->dsp->features.hfc_id !=
-		    nextm->dsp->features.hfc_id) {
-			/* if both members have same slots with crossed banks */
-			if (member->dsp->pcm_slot_tx >= 0 &&
-			    member->dsp->pcm_slot_rx >= 0 &&
-			    nextm->dsp->pcm_slot_tx >= 0 &&
-			    nextm->dsp->pcm_slot_rx >= 0 &&
-			    nextm->dsp->pcm_slot_tx ==
-			    member->dsp->pcm_slot_rx &&
-			    nextm->dsp->pcm_slot_rx ==
-			    member->dsp->pcm_slot_tx &&
-			    nextm->dsp->pcm_slot_tx ==
-			    member->dsp->pcm_slot_tx &&
-			    member->dsp->pcm_bank_tx !=
-			    member->dsp->pcm_bank_rx &&
-			    nextm->dsp->pcm_bank_tx !=
-			    nextm->dsp->pcm_bank_rx) {
-				/* all members have same slot */
-				if (dsp_debug & DEBUG_DSP_CMX)
-					printk(KERN_DEBUG
-					       "%s dsp %s & %s stay joined on "
-					       "PCM slot %d bank %d (TX) bank %d "
-					       "(RX) (on different chips)\n",
-					       __func__,
-					       member->dsp->name,
-					       nextm->dsp->name,
-					       member->dsp->pcm_slot_tx,
-					       member->dsp->pcm_bank_tx,
-					       member->dsp->pcm_bank_rx);
-				conf->hardware = 1;
-				conf->software = tx_data;
-				return;
-			}
-			/* find a new slot */
-			memset(freeslots, 1, sizeof(freeslots));
-			list_for_each_entry(dsp, &dsp_ilist, list) {
-				if (dsp != member->dsp &&
-				    dsp != nextm->dsp &&
-				    member->dsp->features.pcm_id ==
-				    dsp->features.pcm_id) {
-					if (dsp->pcm_slot_rx >= 0 &&
-					    dsp->pcm_slot_rx <
-					    sizeof(freeslots))
-						freeslots[dsp->pcm_slot_rx] = 0;
-					if (dsp->pcm_slot_tx >= 0 &&
-					    dsp->pcm_slot_tx <
-					    sizeof(freeslots))
-						freeslots[dsp->pcm_slot_tx] = 0;
-				}
-			}
-			i = 0;
-			ii = member->dsp->features.pcm_slots;
-			while (i < ii) {
-				if (freeslots[i])
-					break;
-				i++;
-			}
-			if (i == ii) {
-				if (dsp_debug & DEBUG_DSP_CMX)
-					printk(KERN_DEBUG
-					       "%s no slot available for "
-					       "%s & %s\n", __func__,
-					       member->dsp->name,
-					       nextm->dsp->name);
-				/* no more slots available */
-				goto conf_software;
-			}
-			/* assign free slot */
-			member->dsp->pcm_slot_tx = i;
-			member->dsp->pcm_slot_rx = i;
-			nextm->dsp->pcm_slot_tx = i;
-			nextm->dsp->pcm_slot_rx = i;
-			member->dsp->pcm_bank_rx = 0;
-			member->dsp->pcm_bank_tx = 1;
-			nextm->dsp->pcm_bank_rx = 1;
-			nextm->dsp->pcm_bank_tx = 0;
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s adding %s & %s to new PCM slot %d "
-				       "(TX and RX on different chips) because "
-				       "both members have not same slots\n",
-				       __func__,
-				       member->dsp->name,
-				       nextm->dsp->name,
-				       member->dsp->pcm_slot_tx);
-			dsp_cmx_hw_message(member->dsp, MISDN_CTRL_HFC_PCM_CONN,
-					   member->dsp->pcm_slot_tx, member->dsp->pcm_bank_tx,
-					   member->dsp->pcm_slot_rx, member->dsp->pcm_bank_rx);
-			dsp_cmx_hw_message(nextm->dsp, MISDN_CTRL_HFC_PCM_CONN,
-					   nextm->dsp->pcm_slot_tx, nextm->dsp->pcm_bank_tx,
-					   nextm->dsp->pcm_slot_rx, nextm->dsp->pcm_bank_rx);
-			conf->hardware = 1;
-			conf->software = tx_data;
-			return;
-			/* if members have one bank (or on the same chip) */
-		} else {
-			/* if both members have different crossed slots */
-			if (member->dsp->pcm_slot_tx >= 0 &&
-			    member->dsp->pcm_slot_rx >= 0 &&
-			    nextm->dsp->pcm_slot_tx >= 0 &&
-			    nextm->dsp->pcm_slot_rx >= 0 &&
-			    nextm->dsp->pcm_slot_tx ==
-			    member->dsp->pcm_slot_rx &&
-			    nextm->dsp->pcm_slot_rx ==
-			    member->dsp->pcm_slot_tx &&
-			    member->dsp->pcm_slot_tx !=
-			    member->dsp->pcm_slot_rx &&
-			    member->dsp->pcm_bank_tx == 0 &&
-			    member->dsp->pcm_bank_rx == 0 &&
-			    nextm->dsp->pcm_bank_tx == 0 &&
-			    nextm->dsp->pcm_bank_rx == 0) {
-				/* all members have same slot */
-				if (dsp_debug & DEBUG_DSP_CMX)
-					printk(KERN_DEBUG
-					       "%s dsp %s & %s stay joined on PCM "
-					       "slot %d (TX) %d (RX) on same chip "
-					       "or one bank PCM)\n", __func__,
-					       member->dsp->name,
-					       nextm->dsp->name,
-					       member->dsp->pcm_slot_tx,
-					       member->dsp->pcm_slot_rx);
-				conf->hardware = 1;
-				conf->software = tx_data;
-				return;
-			}
-			/* find two new slot */
-			memset(freeslots, 1, sizeof(freeslots));
-			list_for_each_entry(dsp, &dsp_ilist, list) {
-				if (dsp != member->dsp &&
-				    dsp != nextm->dsp &&
-				    member->dsp->features.pcm_id ==
-				    dsp->features.pcm_id) {
-					if (dsp->pcm_slot_rx >= 0 &&
-					    dsp->pcm_slot_rx <
-					    sizeof(freeslots))
-						freeslots[dsp->pcm_slot_rx] = 0;
-					if (dsp->pcm_slot_tx >= 0 &&
-					    dsp->pcm_slot_tx <
-					    sizeof(freeslots))
-						freeslots[dsp->pcm_slot_tx] = 0;
-				}
-			}
-			i1 = 0;
-			ii = member->dsp->features.pcm_slots;
-			while (i1 < ii) {
-				if (freeslots[i1])
-					break;
-				i1++;
-			}
-			if (i1 == ii) {
-				if (dsp_debug & DEBUG_DSP_CMX)
-					printk(KERN_DEBUG
-					       "%s no slot available "
-					       "for %s & %s\n", __func__,
-					       member->dsp->name,
-					       nextm->dsp->name);
-				/* no more slots available */
-				goto conf_software;
-			}
-			i2 = i1 + 1;
-			while (i2 < ii) {
-				if (freeslots[i2])
-					break;
-				i2++;
-			}
-			if (i2 == ii) {
-				if (dsp_debug & DEBUG_DSP_CMX)
-					printk(KERN_DEBUG
-					       "%s no slot available "
-					       "for %s & %s\n",
-					       __func__,
-					       member->dsp->name,
-					       nextm->dsp->name);
-				/* no more slots available */
-				goto conf_software;
-			}
-			/* assign free slots */
-			member->dsp->pcm_slot_tx = i1;
-			member->dsp->pcm_slot_rx = i2;
-			nextm->dsp->pcm_slot_tx = i2;
-			nextm->dsp->pcm_slot_rx = i1;
-			member->dsp->pcm_bank_rx = 0;
-			member->dsp->pcm_bank_tx = 0;
-			nextm->dsp->pcm_bank_rx = 0;
-			nextm->dsp->pcm_bank_tx = 0;
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s adding %s & %s to new PCM slot %d "
-				       "(TX) %d (RX) on same chip or one bank "
-				       "PCM, because both members have not "
-				       "crossed slots\n", __func__,
-				       member->dsp->name,
-				       nextm->dsp->name,
-				       member->dsp->pcm_slot_tx,
-				       member->dsp->pcm_slot_rx);
-			dsp_cmx_hw_message(member->dsp, MISDN_CTRL_HFC_PCM_CONN,
-					   member->dsp->pcm_slot_tx, member->dsp->pcm_bank_tx,
-					   member->dsp->pcm_slot_rx, member->dsp->pcm_bank_rx);
-			dsp_cmx_hw_message(nextm->dsp, MISDN_CTRL_HFC_PCM_CONN,
-					   nextm->dsp->pcm_slot_tx, nextm->dsp->pcm_bank_tx,
-					   nextm->dsp->pcm_slot_rx, nextm->dsp->pcm_bank_rx);
-			conf->hardware = 1;
-			conf->software = tx_data;
-			return;
-		}
-	}
-
-	/*
-	 * if we have more than two, we may check if we have a conference
-	 * unit available on the chip. also all members must be on the same
-	 */
-
-	/* if not the same HFC chip */
-	if (same_hfc < 0) {
-		if (dsp_debug & DEBUG_DSP_CMX)
-			printk(KERN_DEBUG
-			       "%s conference %d cannot be formed, because "
-			       "members are on different chips or not "
-			       "on HFC chip\n",
-			       __func__, conf->id);
-		goto conf_software;
-	}
-
-	/* for more than two members.. */
-
-	/* if all members already have the same conference */
-	if (all_conf) {
-		conf->hardware = 1;
-		conf->software = tx_data;
-		return;
-	}
-
-	/*
-	 * if there is an existing conference, but not all members have joined
-	 */
-	if (current_conf >= 0) {
-	join_members:
-		list_for_each_entry(member, &conf->mlist, list) {
-			/* if no conference engine on our chip, change to
-			 * software */
-			if (!member->dsp->features.hfc_conf)
-				goto conf_software;
-			/* in case of hdlc, change to software */
-			if (member->dsp->hdlc)
-				goto conf_software;
-			/* join to current conference */
-			if (member->dsp->hfc_conf == current_conf)
-				continue;
-			/* get a free timeslot first */
-			memset(freeslots, 1, sizeof(freeslots));
-			list_for_each_entry(dsp, &dsp_ilist, list) {
-				/*
-				 * not checking current member, because
-				 * slot will be overwritten.
-				 */
-				if (
-					dsp != member->dsp &&
-					/* dsp must be on the same PCM */
-					member->dsp->features.pcm_id ==
-					dsp->features.pcm_id) {
-					/* dsp must be on a slot */
-					if (dsp->pcm_slot_tx >= 0 &&
-					    dsp->pcm_slot_tx <
-					    sizeof(freeslots))
-						freeslots[dsp->pcm_slot_tx] = 0;
-					if (dsp->pcm_slot_rx >= 0 &&
-					    dsp->pcm_slot_rx <
-					    sizeof(freeslots))
-						freeslots[dsp->pcm_slot_rx] = 0;
-				}
-			}
-			i = 0;
-			ii = member->dsp->features.pcm_slots;
-			while (i < ii) {
-				if (freeslots[i])
-					break;
-				i++;
-			}
-			if (i == ii) {
-				/* no more slots available */
-				if (dsp_debug & DEBUG_DSP_CMX)
-					printk(KERN_DEBUG
-					       "%s conference %d cannot be formed,"
-					       " because no slot free\n",
-					       __func__, conf->id);
-				goto conf_software;
-			}
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "%s changing dsp %s to HW conference "
-				       "%d slot %d\n", __func__,
-				       member->dsp->name, current_conf, i);
-			/* assign free slot & set PCM & join conf */
-			member->dsp->pcm_slot_tx = i;
-			member->dsp->pcm_slot_rx = i;
-			member->dsp->pcm_bank_tx = 2; /* loop */
-			member->dsp->pcm_bank_rx = 2;
-			member->dsp->hfc_conf = current_conf;
-			dsp_cmx_hw_message(member->dsp, MISDN_CTRL_HFC_PCM_CONN,
-					   i, 2, i, 2);
-			dsp_cmx_hw_message(member->dsp,
-					   MISDN_CTRL_HFC_CONF_JOIN, current_conf, 0, 0, 0);
-		}
-		conf->hardware = 1;
-		conf->software = tx_data;
-		return;
-	}
-
-	/*
-	 * no member is in a conference yet, so we find a free one
-	 */
-	memset(freeunits, 1, sizeof(freeunits));
-	list_for_each_entry(dsp, &dsp_ilist, list) {
-		/* dsp must be on the same chip */
-		if (dsp->features.hfc_id == same_hfc &&
-		    /* dsp must have joined a HW conference */
-		    dsp->hfc_conf >= 0 &&
-		    /* slot must be within range */
-		    dsp->hfc_conf < 8)
-			freeunits[dsp->hfc_conf] = 0;
-	}
-	i = 0;
-	ii = 8;
-	while (i < ii) {
-		if (freeunits[i])
-			break;
-		i++;
-	}
-	if (i == ii) {
-		/* no more conferences available */
-		if (dsp_debug & DEBUG_DSP_CMX)
-			printk(KERN_DEBUG
-			       "%s conference %d cannot be formed, because "
-			       "no conference number free\n",
-			       __func__, conf->id);
-		goto conf_software;
-	}
-	/* join all members */
-	current_conf = i;
-	goto join_members;
-}
-
-
-/*
- * conf_id != 0: join or change conference
- * conf_id == 0: split from conference if not already
- */
-int
-dsp_cmx_conf(struct dsp *dsp, u32 conf_id)
-{
-	int err;
-	struct dsp_conf *conf;
-	struct dsp_conf_member	*member;
-
-	/* if conference doesn't change */
-	if (dsp->conf_id == conf_id)
-		return 0;
-
-	/* first remove us from current conf */
-	if (dsp->conf_id) {
-		if (dsp_debug & DEBUG_DSP_CMX)
-			printk(KERN_DEBUG "removing us from conference %d\n",
-			       dsp->conf->id);
-		/* remove us from conf */
-		conf = dsp->conf;
-		err = dsp_cmx_del_conf_member(dsp);
-		if (err)
-			return err;
-		dsp->conf_id = 0;
-
-		/* update hardware */
-		dsp_cmx_hardware(NULL, dsp);
-
-		/* conf now empty? */
-		if (list_empty(&conf->mlist)) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "conference is empty, so we remove it.\n");
-			err = dsp_cmx_del_conf(conf);
-			if (err)
-				return err;
-		} else {
-			/* update members left on conf */
-			dsp_cmx_hardware(conf, NULL);
-		}
-	}
-
-	/* if split */
-	if (!conf_id)
-		return 0;
-
-	/* now add us to conf */
-	if (dsp_debug & DEBUG_DSP_CMX)
-		printk(KERN_DEBUG "searching conference %d\n",
-		       conf_id);
-	conf = dsp_cmx_search_conf(conf_id);
-	if (!conf) {
-		if (dsp_debug & DEBUG_DSP_CMX)
-			printk(KERN_DEBUG
-			       "conference doesn't exist yet, creating.\n");
-		/* the conference doesn't exist, so we create */
-		conf = dsp_cmx_new_conf(conf_id);
-		if (!conf)
-			return -EINVAL;
-	} else if (!list_empty(&conf->mlist)) {
-		member = list_entry(conf->mlist.next, struct dsp_conf_member,
-				    list);
-		if (dsp->hdlc && !member->dsp->hdlc) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "cannot join transparent conference.\n");
-			return -EINVAL;
-		}
-		if (!dsp->hdlc && member->dsp->hdlc) {
-			if (dsp_debug & DEBUG_DSP_CMX)
-				printk(KERN_DEBUG
-				       "cannot join hdlc conference.\n");
-			return -EINVAL;
-		}
-	}
-	/* add conference member */
-	err = dsp_cmx_add_conf_member(dsp, conf);
-	if (err)
-		return err;
-	dsp->conf_id = conf_id;
-
-	/* if we are alone, we do nothing! */
-	if (list_empty(&conf->mlist)) {
-		if (dsp_debug & DEBUG_DSP_CMX)
-			printk(KERN_DEBUG
-			       "we are alone in this conference, so exit.\n");
-		/* update hardware */
-		dsp_cmx_hardware(NULL, dsp);
-		return 0;
-	}
-
-	/* update members on conf */
-	dsp_cmx_hardware(conf, NULL);
-
-	return 0;
-}
-
-#ifdef CMX_DELAY_DEBUG
-int delaycount;
-static void
-showdelay(struct dsp *dsp, int samples, int delay)
-{
-	char bar[] = "--------------------------------------------------|";
-	int sdelay;
-
-	delaycount += samples;
-	if (delaycount < 8000)
-		return;
-	delaycount = 0;
-
-	sdelay = delay * 50 / (dsp_poll << 2);
-
-	printk(KERN_DEBUG "DELAY (%s) %3d >%s\n", dsp->name, delay,
-	       sdelay > 50 ? "..." : bar + 50 - sdelay);
-}
-#endif
-
-/*
- * audio data is received from card
- */
-void
-dsp_cmx_receive(struct dsp *dsp, struct sk_buff *skb)
-{
-	u8 *d, *p;
-	int len = skb->len;
-	struct mISDNhead *hh = mISDN_HEAD_P(skb);
-	int w, i, ii;
-
-	/* check if we have sompen */
-	if (len < 1)
-		return;
-
-	/* half of the buffer should be larger than maximum packet size */
-	if (len >= CMX_BUFF_HALF) {
-		printk(KERN_ERR
-		       "%s line %d: packet from card is too large (%d bytes). "
-		       "please make card send smaller packets OR increase "
-		       "CMX_BUFF_SIZE\n", __FILE__, __LINE__, len);
-		return;
-	}
-
-	/*
-	 * initialize pointers if not already -
-	 * also add delay if requested by PH_SIGNAL
-	 */
-	if (dsp->rx_init) {
-		dsp->rx_init = 0;
-		if (dsp->features.unordered) {
-			dsp->rx_R = (hh->id & CMX_BUFF_MASK);
-			if (dsp->cmx_delay)
-				dsp->rx_W = (dsp->rx_R + dsp->cmx_delay)
-					& CMX_BUFF_MASK;
-			else
-				dsp->rx_W = (dsp->rx_R + (dsp_poll >> 1))
-					& CMX_BUFF_MASK;
-		} else {
-			dsp->rx_R = 0;
-			if (dsp->cmx_delay)
-				dsp->rx_W = dsp->cmx_delay;
-			else
-				dsp->rx_W = dsp_poll >> 1;
-		}
-	}
-	/* if frame contains time code, write directly */
-	if (dsp->features.unordered) {
-		dsp->rx_W = (hh->id & CMX_BUFF_MASK);
-		/* printk(KERN_DEBUG "%s %08x\n", dsp->name, hh->id); */
-	}
-	/*
-	 * if we underrun (or maybe overrun),
-	 * we set our new read pointer, and write silence to buffer
-	 */
-	if (((dsp->rx_W-dsp->rx_R) & CMX_BUFF_MASK) >= CMX_BUFF_HALF) {
-		if (dsp_debug & DEBUG_DSP_CLOCK)
-			printk(KERN_DEBUG
-			       "cmx_receive(dsp=%lx): UNDERRUN (or overrun the "
-			       "maximum delay), adjusting read pointer! "
-			       "(inst %s)\n", (u_long)dsp, dsp->name);
-		/* flush rx buffer and set delay to dsp_poll / 2 */
-		if (dsp->features.unordered) {
-			dsp->rx_R = (hh->id & CMX_BUFF_MASK);
-			if (dsp->cmx_delay)
-				dsp->rx_W = (dsp->rx_R + dsp->cmx_delay)
-					& CMX_BUFF_MASK;
-			else
-				dsp->rx_W = (dsp->rx_R + (dsp_poll >> 1))
-					& CMX_BUFF_MASK;
-		} else {
-			dsp->rx_R = 0;
-			if (dsp->cmx_delay)
-				dsp->rx_W = dsp->cmx_delay;
-			else
-				dsp->rx_W = dsp_poll >> 1;
-		}
-		memset(dsp->rx_buff, dsp_silence, sizeof(dsp->rx_buff));
-	}
-	/* if we have reached double delay, jump back to middle */
-	if (dsp->cmx_delay)
-		if (((dsp->rx_W - dsp->rx_R) & CMX_BUFF_MASK) >=
-		    (dsp->cmx_delay << 1)) {
-			if (dsp_debug & DEBUG_DSP_CLOCK)
-				printk(KERN_DEBUG
-				       "cmx_receive(dsp=%lx): OVERRUN (because "
-				       "twice the delay is reached), adjusting "
-				       "read pointer! (inst %s)\n",
-				       (u_long)dsp, dsp->name);
-			/* flush buffer */
-			if (dsp->features.unordered) {
-				dsp->rx_R = (hh->id & CMX_BUFF_MASK);
-				dsp->rx_W = (dsp->rx_R + dsp->cmx_delay)
-					& CMX_BUFF_MASK;
-			} else {
-				dsp->rx_R = 0;
-				dsp->rx_W = dsp->cmx_delay;
-			}
-			memset(dsp->rx_buff, dsp_silence, sizeof(dsp->rx_buff));
-		}
-
-	/* show where to write */
-#ifdef CMX_DEBUG
-	printk(KERN_DEBUG
-	       "cmx_receive(dsp=%lx): rx_R(dsp)=%05x rx_W(dsp)=%05x len=%d %s\n",
-	       (u_long)dsp, dsp->rx_R, dsp->rx_W, len, dsp->name);
-#endif
-
-	/* write data into rx_buffer */
-	p = skb->data;
-	d = dsp->rx_buff;
-	w = dsp->rx_W;
-	i = 0;
-	ii = len;
-	while (i < ii) {
-		d[w++ & CMX_BUFF_MASK] = *p++;
-		i++;
-	}
-
-	/* increase write-pointer */
-	dsp->rx_W = ((dsp->rx_W + len) & CMX_BUFF_MASK);
-#ifdef CMX_DELAY_DEBUG
-	showdelay(dsp, len, (dsp->rx_W-dsp->rx_R) & CMX_BUFF_MASK);
-#endif
-}
-
-
-/*
- * send (mixed) audio data to card and control jitter
- */
-static void
-dsp_cmx_send_member(struct dsp *dsp, int len, s32 *c, int members)
-{
-	struct dsp_conf *conf = dsp->conf;
-	struct dsp *member, *other;
-	register s32 sample;
-	u8 *d, *p, *q, *o_q;
-	struct sk_buff *nskb, *txskb;
-	int r, rr, t, tt, o_r, o_rr;
-	int preload = 0;
-	struct mISDNhead *hh, *thh;
-	int tx_data_only = 0;
-
-	/* don't process if: */
-	if (!dsp->b_active) { /* if not active */
-		dsp->last_tx = 0;
-		return;
-	}
-	if (((dsp->conf && dsp->conf->hardware) || /* hardware conf */
-	     dsp->echo.hardware) && /* OR hardware echo */
-	    dsp->tx_R == dsp->tx_W && /* AND no tx-data */
-	    !(dsp->tone.tone && dsp->tone.software)) { /* AND not soft tones */
-		if (!dsp->tx_data) { /* no tx_data for user space required */
-			dsp->last_tx = 0;
-			return;
-		}
-		if (dsp->conf && dsp->conf->software && dsp->conf->hardware)
-			tx_data_only = 1;
-		if (dsp->echo.software && dsp->echo.hardware)
-			tx_data_only = 1;
-	}
-
-#ifdef CMX_DEBUG
-	printk(KERN_DEBUG
-	       "SEND members=%d dsp=%s, conf=%p, rx_R=%05x rx_W=%05x\n",
-	       members, dsp->name, conf, dsp->rx_R, dsp->rx_W);
-#endif
-
-	/* preload if we have delay set */
-	if (dsp->cmx_delay && !dsp->last_tx) {
-		preload = len;
-		if (preload < 128)
-			preload = 128;
-	}
-
-	/* PREPARE RESULT */
-	nskb = mI_alloc_skb(len + preload, GFP_ATOMIC);
-	if (!nskb) {
-		printk(KERN_ERR
-		       "FATAL ERROR in mISDN_dsp.o: cannot alloc %d bytes\n",
-		       len + preload);
-		return;
-	}
-	hh = mISDN_HEAD_P(nskb);
-	hh->prim = PH_DATA_REQ;
-	hh->id = 0;
-	dsp->last_tx = 1;
-
-	/* set pointers, indexes and stuff */
-	member = dsp;
-	p = dsp->tx_buff; /* transmit data */
-	q = dsp->rx_buff; /* received data */
-	d = skb_put(nskb, preload + len); /* result */
-	t = dsp->tx_R; /* tx-pointers */
-	tt = dsp->tx_W;
-	r = dsp->rx_R; /* rx-pointers */
-	rr = (r + len) & CMX_BUFF_MASK;
-
-	/* preload with silence, if required */
-	if (preload) {
-		memset(d, dsp_silence, preload);
-		d += preload;
-	}
-
-	/* PROCESS TONES/TX-DATA ONLY */
-	if (dsp->tone.tone && dsp->tone.software) {
-		/* -> copy tone */
-		dsp_tone_copy(dsp, d, len);
-		dsp->tx_R = 0; /* clear tx buffer */
-		dsp->tx_W = 0;
-		goto send_packet;
-	}
-	/* if we have tx-data but do not use mixing */
-	if (!dsp->tx_mix && t != tt) {
-		/* -> send tx-data and continue when not enough */
-#ifdef CMX_TX_DEBUG
-		sprintf(debugbuf, "TX sending (%04x-%04x)%p: ", t, tt, p);
-#endif
-		while (r != rr && t != tt) {
-#ifdef CMX_TX_DEBUG
-			if (strlen(debugbuf) < 48)
-				sprintf(debugbuf + strlen(debugbuf), " %02x",
-					p[t]);
-#endif
-			*d++ = p[t]; /* write tx_buff */
-			t = (t + 1) & CMX_BUFF_MASK;
-			r = (r + 1) & CMX_BUFF_MASK;
-		}
-		if (r == rr) {
-			dsp->tx_R = t;
-#ifdef CMX_TX_DEBUG
-			printk(KERN_DEBUG "%s\n", debugbuf);
-#endif
-			goto send_packet;
-		}
-	}
-#ifdef CMX_TX_DEBUG
-	printk(KERN_DEBUG "%s\n", debugbuf);
-#endif
-
-	/* PROCESS DATA (one member / no conf) */
-	if (!conf || members <= 1) {
-		/* -> if echo is NOT enabled */
-		if (!dsp->echo.software) {
-			/* -> send tx-data if available or use 0-volume */
-			while (r != rr && t != tt) {
-				*d++ = p[t]; /* write tx_buff */
-				t = (t + 1) & CMX_BUFF_MASK;
-				r = (r + 1) & CMX_BUFF_MASK;
-			}
-			if (r != rr) {
-				if (dsp_debug & DEBUG_DSP_CLOCK)
-					printk(KERN_DEBUG "%s: RX empty\n",
-					       __func__);
-				memset(d, dsp_silence, (rr - r) & CMX_BUFF_MASK);
-			}
-			/* -> if echo is enabled */
-		} else {
-			/*
-			 * -> mix tx-data with echo if available,
-			 * or use echo only
-			 */
-			while (r != rr && t != tt) {
-				*d++ = dsp_audio_mix_law[(p[t] << 8) | q[r]];
-				t = (t + 1) & CMX_BUFF_MASK;
-				r = (r + 1) & CMX_BUFF_MASK;
-			}
-			while (r != rr) {
-				*d++ = q[r]; /* echo */
-				r = (r + 1) & CMX_BUFF_MASK;
-			}
-		}
-		dsp->tx_R = t;
-		goto send_packet;
-	}
-	/* PROCESS DATA (two members) */
-#ifdef CMX_CONF_DEBUG
-	if (0) {
-#else
-	if (members == 2) {
-#endif
-		/* "other" becomes other party */
-		other = (list_entry(conf->mlist.next,
-				    struct dsp_conf_member, list))->dsp;
-		if (other == member)
-			other = (list_entry(conf->mlist.prev,
-				    struct dsp_conf_member, list))->dsp;
-		o_q = other->rx_buff; /* received data */
-		o_rr = (other->rx_R + len) & CMX_BUFF_MASK;
-		/* end of rx-pointer */
-		o_r = (o_rr - rr + r) & CMX_BUFF_MASK;
-		/* start rx-pointer at current read position*/
-		/* -> if echo is NOT enabled */
-		if (!dsp->echo.software) {
-			/*
-			 * -> copy other member's rx-data,
-			 * if tx-data is available, mix
-			 */
-			while (o_r != o_rr && t != tt) {
-				*d++ = dsp_audio_mix_law[(p[t] << 8) | o_q[o_r]];
-				t = (t + 1) & CMX_BUFF_MASK;
-				o_r = (o_r + 1) & CMX_BUFF_MASK;
-			}
-			while (o_r != o_rr) {
-				*d++ = o_q[o_r];
-				o_r = (o_r + 1) & CMX_BUFF_MASK;
-			}
-			/* -> if echo is enabled */
-		} else {
-			/*
-			 * -> mix other member's rx-data with echo,
-			 * if tx-data is available, mix
-			 */
-			while (r != rr && t != tt) {
-				sample = dsp_audio_law_to_s32[p[t]] +
-					dsp_audio_law_to_s32[q[r]] +
-					dsp_audio_law_to_s32[o_q[o_r]];
-				if (sample < -32768)
-					sample = -32768;
-				else if (sample > 32767)
-					sample = 32767;
-				*d++ = dsp_audio_s16_to_law[sample & 0xffff];
-				/* tx-data + rx_data + echo */
-				t = (t + 1) & CMX_BUFF_MASK;
-				r = (r + 1) & CMX_BUFF_MASK;
-				o_r = (o_r + 1) & CMX_BUFF_MASK;
-			}
-			while (r != rr) {
-				*d++ = dsp_audio_mix_law[(q[r] << 8) | o_q[o_r]];
-				r = (r + 1) & CMX_BUFF_MASK;
-				o_r = (o_r + 1) & CMX_BUFF_MASK;
-			}
-		}
-		dsp->tx_R = t;
-		goto send_packet;
-	}
-	/* PROCESS DATA (three or more members) */
-	/* -> if echo is NOT enabled */
-	if (!dsp->echo.software) {
-		/*
-		 * -> subtract rx-data from conf-data,
-		 * if tx-data is available, mix
-		 */
-		while (r != rr && t != tt) {
-			sample = dsp_audio_law_to_s32[p[t]] + *c++ -
-				dsp_audio_law_to_s32[q[r]];
-			if (sample < -32768)
-				sample = -32768;
-			else if (sample > 32767)
-				sample = 32767;
-			*d++ = dsp_audio_s16_to_law[sample & 0xffff];
-			/* conf-rx+tx */
-			r = (r + 1) & CMX_BUFF_MASK;
-			t = (t + 1) & CMX_BUFF_MASK;
-		}
-		while (r != rr) {
-			sample = *c++ - dsp_audio_law_to_s32[q[r]];
-			if (sample < -32768)
-				sample = -32768;
-			else if (sample > 32767)
-				sample = 32767;
-			*d++ = dsp_audio_s16_to_law[sample & 0xffff];
-			/* conf-rx */
-			r = (r + 1) & CMX_BUFF_MASK;
-		}
-		/* -> if echo is enabled */
-	} else {
-		/*
-		 * -> encode conf-data, if tx-data
-		 * is available, mix
-		 */
-		while (r != rr && t != tt) {
-			sample = dsp_audio_law_to_s32[p[t]] + *c++;
-			if (sample < -32768)
-				sample = -32768;
-			else if (sample > 32767)
-				sample = 32767;
-			*d++ = dsp_audio_s16_to_law[sample & 0xffff];
-			/* conf(echo)+tx */
-			t = (t + 1) & CMX_BUFF_MASK;
-			r = (r + 1) & CMX_BUFF_MASK;
-		}
-		while (r != rr) {
-			sample = *c++;
-			if (sample < -32768)
-				sample = -32768;
-			else if (sample > 32767)
-				sample = 32767;
-			*d++ = dsp_audio_s16_to_law[sample & 0xffff];
-			/* conf(echo) */
-			r = (r + 1) & CMX_BUFF_MASK;
-		}
-	}
-	dsp->tx_R = t;
-	goto send_packet;
-
-send_packet:
-	/*
-	 * send tx-data if enabled - don't filter,
-	 * because we want what we send, not what we filtered
-	 */
-	if (dsp->tx_data) {
-		if (tx_data_only) {
-			hh->prim = DL_DATA_REQ;
-			hh->id = 0;
-			/* queue and trigger */
-			skb_queue_tail(&dsp->sendq, nskb);
-			schedule_work(&dsp->workq);
-			/* exit because only tx_data is used */
-			return;
-		} else {
-			txskb = mI_alloc_skb(len, GFP_ATOMIC);
-			if (!txskb) {
-				printk(KERN_ERR
-				       "FATAL ERROR in mISDN_dsp.o: "
-				       "cannot alloc %d bytes\n", len);
-			} else {
-				thh = mISDN_HEAD_P(txskb);
-				thh->prim = DL_DATA_REQ;
-				thh->id = 0;
-				skb_put_data(txskb, nskb->data + preload, len);
-				/* queue (trigger later) */
-				skb_queue_tail(&dsp->sendq, txskb);
-			}
-		}
-	}
-
-	/* send data only to card, if we don't just calculated tx_data */
-	/* adjust volume */
-	if (dsp->tx_volume)
-		dsp_change_volume(nskb, dsp->tx_volume);
-	/* pipeline */
-	if (dsp->pipeline.inuse)
-		dsp_pipeline_process_tx(&dsp->pipeline, nskb->data,
-					nskb->len);
-	/* crypt */
-	if (dsp->bf_enable)
-		dsp_bf_encrypt(dsp, nskb->data, nskb->len);
-	/* queue and trigger */
-	skb_queue_tail(&dsp->sendq, nskb);
-	schedule_work(&dsp->workq);
-}
-
-static u32	jittercount; /* counter for jitter check */
-struct timer_list dsp_spl_tl;
-unsigned long	dsp_spl_jiffies; /* calculate the next time to fire */
-static u16	dsp_count; /* last sample count */
-static int	dsp_count_valid; /* if we have last sample count */
-
-void
-dsp_cmx_send(struct timer_list *arg)
-{
-	struct dsp_conf *conf;
-	struct dsp_conf_member *member;
-	struct dsp *dsp;
-	int mustmix, members;
-	static s32 mixbuffer[MAX_POLL + 100];
-	s32 *c;
-	u8 *p, *q;
-	int r, rr;
-	int jittercheck = 0, delay, i;
-	u_long flags;
-	u16 length, count;
-
-	/* lock */
-	spin_lock_irqsave(&dsp_lock, flags);
-
-	if (!dsp_count_valid) {
-		dsp_count = mISDN_clock_get();
-		length = dsp_poll;
-		dsp_count_valid = 1;
-	} else {
-		count = mISDN_clock_get();
-		length = count - dsp_count;
-		dsp_count = count;
-	}
-	if (length > MAX_POLL + 100)
-		length = MAX_POLL + 100;
-	/* printk(KERN_DEBUG "len=%d dsp_count=0x%x\n", length, dsp_count); */
-
-	/*
-	 * check if jitter needs to be checked (this is every second)
-	 */
-	jittercount += length;
-	if (jittercount >= 8000) {
-		jittercount -= 8000;
-		jittercheck = 1;
-	}
-
-	/* loop all members that do not require conference mixing */
-	list_for_each_entry(dsp, &dsp_ilist, list) {
-		if (dsp->hdlc)
-			continue;
-		conf = dsp->conf;
-		mustmix = 0;
-		members = 0;
-		if (conf) {
-			members = list_count_nodes(&conf->mlist);
-#ifdef CMX_CONF_DEBUG
-			if (conf->software && members > 1)
-#else
-			if (conf->software && members > 2)
-#endif
-				mustmix = 1;
-		}
-
-		/* transmission required */
-		if (!mustmix) {
-			dsp_cmx_send_member(dsp, length, mixbuffer, members);
-
-			/*
-			 * unused mixbuffer is given to prevent a
-			 * potential null-pointer-bug
-			 */
-		}
-	}
-
-	/* loop all members that require conference mixing */
-	list_for_each_entry(conf, &conf_ilist, list) {
-		/* count members and check hardware */
-		members = list_count_nodes(&conf->mlist);
-#ifdef CMX_CONF_DEBUG
-		if (conf->software && members > 1) {
-#else
-		if (conf->software && members > 2) {
-#endif
-			/* check for hdlc conf */
-			member = list_entry(conf->mlist.next,
-					    struct dsp_conf_member, list);
-			if (member->dsp->hdlc)
-				continue;
-			/* mix all data */
-			memset(mixbuffer, 0, length * sizeof(s32));
-			list_for_each_entry(member, &conf->mlist, list) {
-				dsp = member->dsp;
-				/* get range of data to mix */
-				c = mixbuffer;
-				q = dsp->rx_buff;
-				r = dsp->rx_R;
-				rr = (r + length) & CMX_BUFF_MASK;
-				/* add member's data */
-				while (r != rr) {
-					*c++ += dsp_audio_law_to_s32[q[r]];
-					r = (r + 1) & CMX_BUFF_MASK;
-				}
-			}
-
-			/* process each member */
-			list_for_each_entry(member, &conf->mlist, list) {
-				/* transmission */
-				dsp_cmx_send_member(member->dsp, length,
-						    mixbuffer, members);
-			}
-		}
-	}
-
-	/* delete rx-data, increment buffers, change pointers */
-	list_for_each_entry(dsp, &dsp_ilist, list) {
-		if (dsp->hdlc)
-			continue;
-		p = dsp->rx_buff;
-		q = dsp->tx_buff;
-		r = dsp->rx_R;
-		/* move receive pointer when receiving */
-		if (!dsp->rx_is_off) {
-			rr = (r + length) & CMX_BUFF_MASK;
-			/* delete rx-data */
-			while (r != rr) {
-				p[r] = dsp_silence;
-				r = (r + 1) & CMX_BUFF_MASK;
-			}
-			/* increment rx-buffer pointer */
-			dsp->rx_R = r; /* write incremented read pointer */
-		}
-
-		/* check current rx_delay */
-		delay = (dsp->rx_W-dsp->rx_R) & CMX_BUFF_MASK;
-		if (delay >= CMX_BUFF_HALF)
-			delay = 0; /* will be the delay before next write */
-		/* check for lower delay */
-		if (delay < dsp->rx_delay[0])
-			dsp->rx_delay[0] = delay;
-		/* check current tx_delay */
-		delay = (dsp->tx_W-dsp->tx_R) & CMX_BUFF_MASK;
-		if (delay >= CMX_BUFF_HALF)
-			delay = 0; /* will be the delay before next write */
-		/* check for lower delay */
-		if (delay < dsp->tx_delay[0])
-			dsp->tx_delay[0] = delay;
-		if (jittercheck) {
-			/* find the lowest of all rx_delays */
-			delay = dsp->rx_delay[0];
-			i = 1;
-			while (i < MAX_SECONDS_JITTER_CHECK) {
-				if (delay > dsp->rx_delay[i])
-					delay = dsp->rx_delay[i];
-				i++;
-			}
-			/*
-			 * remove rx_delay only if we have delay AND we
-			 * have not preset cmx_delay AND
-			 * the delay is greater dsp_poll
-			 */
-			if (delay > dsp_poll && !dsp->cmx_delay) {
-				if (dsp_debug & DEBUG_DSP_CLOCK)
-					printk(KERN_DEBUG
-					       "%s lowest rx_delay of %d bytes for"
-					       " dsp %s are now removed.\n",
-					       __func__, delay,
-					       dsp->name);
-				r = dsp->rx_R;
-				rr = (r + delay - (dsp_poll >> 1))
-					& CMX_BUFF_MASK;
-				/* delete rx-data */
-				while (r != rr) {
-					p[r] = dsp_silence;
-					r = (r + 1) & CMX_BUFF_MASK;
-				}
-				/* increment rx-buffer pointer */
-				dsp->rx_R = r;
-				/* write incremented read pointer */
-			}
-			/* find the lowest of all tx_delays */
-			delay = dsp->tx_delay[0];
-			i = 1;
-			while (i < MAX_SECONDS_JITTER_CHECK) {
-				if (delay > dsp->tx_delay[i])
-					delay = dsp->tx_delay[i];
-				i++;
-			}
-			/*
-			 * remove delay only if we have delay AND we
-			 * have enabled tx_dejitter
-			 */
-			if (delay > dsp_poll && dsp->tx_dejitter) {
-				if (dsp_debug & DEBUG_DSP_CLOCK)
-					printk(KERN_DEBUG
-					       "%s lowest tx_delay of %d bytes for"
-					       " dsp %s are now removed.\n",
-					       __func__, delay,
-					       dsp->name);
-				r = dsp->tx_R;
-				rr = (r + delay - (dsp_poll >> 1))
-					& CMX_BUFF_MASK;
-				/* delete tx-data */
-				while (r != rr) {
-					q[r] = dsp_silence;
-					r = (r + 1) & CMX_BUFF_MASK;
-				}
-				/* increment rx-buffer pointer */
-				dsp->tx_R = r;
-				/* write incremented read pointer */
-			}
-			/* scroll up delays */
-			i = MAX_SECONDS_JITTER_CHECK - 1;
-			while (i) {
-				dsp->rx_delay[i] = dsp->rx_delay[i - 1];
-				dsp->tx_delay[i] = dsp->tx_delay[i - 1];
-				i--;
-			}
-			dsp->tx_delay[0] = CMX_BUFF_HALF; /* (infinite) delay */
-			dsp->rx_delay[0] = CMX_BUFF_HALF; /* (infinite) delay */
-		}
-	}
-
-	/* if next event would be in the past ... */
-	if ((s32)(dsp_spl_jiffies + dsp_tics-jiffies) <= 0)
-		dsp_spl_jiffies = jiffies + 1;
-	else
-		dsp_spl_jiffies += dsp_tics;
-
-	dsp_spl_tl.expires = dsp_spl_jiffies;
-	add_timer(&dsp_spl_tl);
-
-	/* unlock */
-	spin_unlock_irqrestore(&dsp_lock, flags);
-}
-
-/*
- * audio data is transmitted from upper layer to the dsp
- */
-void
-dsp_cmx_transmit(struct dsp *dsp, struct sk_buff *skb)
-{
-	u_int w, ww;
-	u8 *d, *p;
-	int space; /* todo: , l = skb->len; */
-#ifdef CMX_TX_DEBUG
-	char debugbuf[256] = "";
-#endif
-
-	/* check if there is enough space, and then copy */
-	w = dsp->tx_W;
-	ww = dsp->tx_R;
-	p = dsp->tx_buff;
-	d = skb->data;
-	space = (ww - w - 1) & CMX_BUFF_MASK;
-	/* write-pointer should not overrun nor reach read pointer */
-	if (space < skb->len) {
-		/* write to the space we have left */
-		ww = (ww - 1) & CMX_BUFF_MASK; /* end one byte prior tx_R */
-		if (dsp_debug & DEBUG_DSP_CLOCK)
-			printk(KERN_DEBUG "%s: TX overflow space=%d skb->len="
-			       "%d, w=0x%04x, ww=0x%04x\n", __func__, space,
-			       skb->len, w, ww);
-	} else
-		/* write until all byte are copied */
-		ww = (w + skb->len) & CMX_BUFF_MASK;
-	dsp->tx_W = ww;
-		/* show current buffer */
-#ifdef CMX_DEBUG
-	printk(KERN_DEBUG
-	       "cmx_transmit(dsp=%lx) %d bytes to 0x%x-0x%x. %s\n",
-	       (u_long)dsp, (ww - w) & CMX_BUFF_MASK, w, ww, dsp->name);
-#endif
-
-	/* copy transmit data to tx-buffer */
-#ifdef CMX_TX_DEBUG
-	sprintf(debugbuf, "TX getting (%04x-%04x)%p: ", w, ww, p);
-#endif
-	while (w != ww) {
-#ifdef CMX_TX_DEBUG
-		if (strlen(debugbuf) < 48)
-			sprintf(debugbuf + strlen(debugbuf), " %02x", *d);
-#endif
-		p[w] = *d++;
-		w = (w + 1) & CMX_BUFF_MASK;
-	}
-#ifdef CMX_TX_DEBUG
-	printk(KERN_DEBUG "%s\n", debugbuf);
-#endif
-
-}
-
-/*
- * hdlc data is received from card and sent to all members.
- */
-void
-dsp_cmx_hdlc(struct dsp *dsp, struct sk_buff *skb)
-{
-	struct sk_buff *nskb = NULL;
-	struct dsp_conf_member *member;
-	struct mISDNhead *hh;
-
-	/* not if not active */
-	if (!dsp->b_active)
-		return;
-
-	/* check if we have sompen */
-	if (skb->len < 1)
-		return;
-
-	/* no conf */
-	if (!dsp->conf) {
-		/* in case of software echo */
-		if (dsp->echo.software) {
-			nskb = skb_clone(skb, GFP_ATOMIC);
-			if (nskb) {
-				hh = mISDN_HEAD_P(nskb);
-				hh->prim = PH_DATA_REQ;
-				hh->id = 0;
-				skb_queue_tail(&dsp->sendq, nskb);
-				schedule_work(&dsp->workq);
-			}
-		}
-		return;
-	}
-	/* in case of hardware conference */
-	if (dsp->conf->hardware)
-		return;
-	list_for_each_entry(member, &dsp->conf->mlist, list) {
-		if (dsp->echo.software || member->dsp != dsp) {
-			nskb = skb_clone(skb, GFP_ATOMIC);
-			if (nskb) {
-				hh = mISDN_HEAD_P(nskb);
-				hh->prim = PH_DATA_REQ;
-				hh->id = 0;
-				skb_queue_tail(&member->dsp->sendq, nskb);
-				schedule_work(&member->dsp->workq);
-			}
-		}
-	}
-}
diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
deleted file mode 100644
index d0aa415a6b09..000000000000
--- a/drivers/isdn/mISDN/dsp_core.c
+++ /dev/null
@@ -1,1227 +0,0 @@
-/*
- * Author       Andreas Eversberg (jolly@eversberg.eu)
- * Based on source code structure by
- *		Karsten Keil (keil@isdn4linux.de)
- *
- *		This file is (c) under GNU PUBLIC LICENSE
- *
- * Thanks to    Karsten Keil (great drivers)
- *              Cologne Chip (great chips)
- *
- * This module does:
- *		Real-time tone generation
- *		DTMF detection
- *		Real-time cross-connection and conferrence
- *		Compensate jitter due to system load and hardware fault.
- *		All features are done in kernel space and will be realized
- *		using hardware, if available and supported by chip set.
- *		Blowfish encryption/decryption
- */
-
-/* STRUCTURE:
- *
- * The dsp module provides layer 2 for b-channels (64kbit). It provides
- * transparent audio forwarding with special digital signal processing:
- *
- * - (1) generation of tones
- * - (2) detection of dtmf tones
- * - (3) crossconnecting and conferences (clocking)
- * - (4) echo generation for delay test
- * - (5) volume control
- * - (6) disable receive data
- * - (7) pipeline
- * - (8) encryption/decryption
- *
- * Look:
- *             TX            RX
- *         ------upper layer------
- *             |             ^
- *             |             |(6)
- *             v             |
- *       +-----+-------------+-----+
- *       |(3)(4)                   |
- *       |           CMX           |
- *       |                         |
- *       |           +-------------+
- *       |           |       ^
- *       |           |       |
- *       |+---------+|  +----+----+
- *       ||(1)      ||  |(2)      |
- *       ||         ||  |         |
- *       ||  Tones  ||  |  DTMF   |
- *       ||         ||  |         |
- *       ||         ||  |         |
- *       |+----+----+|  +----+----+
- *       +-----+-----+       ^
- *             |             |
- *             v             |
- *        +----+----+   +----+----+
- *        |(5)      |   |(5)      |
- *        |         |   |         |
- *        |TX Volume|   |RX Volume|
- *        |         |   |         |
- *        |         |   |         |
- *        +----+----+   +----+----+
- *             |             ^
- *             |             |
- *             v             |
- *        +----+-------------+----+
- *        |(7)                    |
- *        |                       |
- *        |  Pipeline Processing  |
- *        |                       |
- *        |                       |
- *        +----+-------------+----+
- *             |             ^
- *             |             |
- *             v             |
- *        +----+----+   +----+----+
- *        |(8)      |   |(8)      |
- *        |         |   |         |
- *        | Encrypt |   | Decrypt |
- *        |         |   |         |
- *        |         |   |         |
- *        +----+----+   +----+----+
- *             |             ^
- *             |             |
- *             v             |
- *         ------card  layer------
- *             TX            RX
- *
- * Above you can see the logical data flow. If software is used to do the
- * process, it is actually the real data flow. If hardware is used, data
- * may not flow, but hardware commands to the card, to provide the data flow
- * as shown.
- *
- * NOTE: The channel must be activated in order to make dsp work, even if
- * no data flow to the upper layer is intended. Activation can be done
- * after and before controlling the setting using PH_CONTROL requests.
- *
- * DTMF: Will be detected by hardware if possible. It is done before CMX
- * processing.
- *
- * Tones: Will be generated via software if endless looped audio fifos are
- * not supported by hardware. Tones will override all data from CMX.
- * It is not required to join a conference to use tones at any time.
- *
- * CMX: Is transparent when not used. When it is used, it will do
- * crossconnections and conferences via software if not possible through
- * hardware. If hardware capability is available, hardware is used.
- *
- * Echo: Is generated by CMX and is used to check performance of hard and
- * software CMX.
- *
- * The CMX has special functions for conferences with one, two and more
- * members. It will allow different types of data flow. Receive and transmit
- * data to/form upper layer may be switched on/off individually without losing
- * features of CMX, Tones and DTMF.
- *
- * Echo Cancellation: Sometimes we like to cancel echo from the interface.
- * Note that a VoIP call may not have echo caused by the IP phone. The echo
- * is generated by the telephone line connected to it. Because the delay
- * is high, it becomes an echo. RESULT: Echo Cachelation is required if
- * both echo AND delay is applied to an interface.
- * Remember that software CMX always generates a more or less delay.
- *
- * If all used features can be realized in hardware, and if transmit and/or
- * receive data ist disabled, the card may not send/receive any data at all.
- * Not receiving is useful if only announcements are played. Not sending is
- * useful if an answering machine records audio. Not sending and receiving is
- * useful during most states of the call. If supported by hardware, tones
- * will be played without cpu load. Small PBXs and NT-Mode applications will
- * not need expensive hardware when processing calls.
- *
- *
- * LOCKING:
- *
- * When data is received from upper or lower layer (card), the complete dsp
- * module is locked by a global lock.  This lock MUST lock irq, because it
- * must lock timer events by DSP poll timer.
- * When data is ready to be transmitted down, the data is queued and sent
- * outside lock and timer event.
- * PH_CONTROL must not change any settings, join or split conference members
- * during process of data.
- *
- * HDLC:
- *
- * It works quite the same as transparent, except that HDLC data is forwarded
- * to all other conference members if no hardware bridging is possible.
- * Send data will be writte to sendq. Sendq will be sent if confirm is received.
- * Conference cannot join, if one member is not hdlc.
- *
- */
-
-#include <linux/delay.h>
-#include <linux/gfp.h>
-#include <linux/mISDNif.h>
-#include <linux/mISDNdsp.h>
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include "core.h"
-#include "dsp.h"
-
-static const char *mISDN_dsp_revision = "2.0";
-
-static int debug;
-static int options;
-static int poll;
-static int dtmfthreshold = 100;
-
-MODULE_AUTHOR("Andreas Eversberg");
-module_param(debug, uint, S_IRUGO | S_IWUSR);
-module_param(options, uint, S_IRUGO | S_IWUSR);
-module_param(poll, uint, S_IRUGO | S_IWUSR);
-module_param(dtmfthreshold, uint, S_IRUGO | S_IWUSR);
-MODULE_DESCRIPTION("mISDN driver for Digital Audio Processing of transparent data");
-MODULE_LICENSE("GPL");
-
-/*int spinnest = 0;*/
-
-DEFINE_SPINLOCK(dsp_lock); /* global dsp lock */
-LIST_HEAD(dsp_ilist);
-LIST_HEAD(conf_ilist);
-int dsp_debug;
-int dsp_options;
-int dsp_poll, dsp_tics;
-
-/* check if rx may be turned off or must be turned on */
-static void
-dsp_rx_off_member(struct dsp *dsp)
-{
-	struct mISDN_ctrl_req	cq;
-	int rx_off = 1;
-
-	memset(&cq, 0, sizeof(cq));
-
-	if (!dsp->features_rx_off)
-		return;
-
-	/* not disabled */
-	if (!dsp->rx_disabled)
-		rx_off = 0;
-	/* software dtmf */
-	else if (dsp->dtmf.software)
-		rx_off = 0;
-	/* echo in software */
-	else if (dsp->echo.software)
-		rx_off = 0;
-	/* bridge in software */
-	else if (dsp->conf && dsp->conf->software)
-		rx_off = 0;
-	/* data is not required by user space and not required
-	 * for echo dtmf detection, soft-echo, soft-bridging */
-
-	if (rx_off == dsp->rx_is_off)
-		return;
-
-	if (!dsp->ch.peer) {
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: no peer, no rx_off\n",
-			       __func__);
-		return;
-	}
-	cq.op = MISDN_CTRL_RX_OFF;
-	cq.p1 = rx_off;
-	if (dsp->ch.peer->ctrl(dsp->ch.peer, CONTROL_CHANNEL, &cq)) {
-		printk(KERN_DEBUG "%s: 2nd CONTROL_CHANNEL failed\n",
-		       __func__);
-		return;
-	}
-	dsp->rx_is_off = rx_off;
-	if (dsp_debug & DEBUG_DSP_CORE)
-		printk(KERN_DEBUG "%s: %s set rx_off = %d\n",
-		       __func__, dsp->name, rx_off);
-}
-static void
-dsp_rx_off(struct dsp *dsp)
-{
-	struct dsp_conf_member	*member;
-
-	if (dsp_options & DSP_OPT_NOHARDWARE)
-		return;
-
-	/* no conf */
-	if (!dsp->conf) {
-		dsp_rx_off_member(dsp);
-		return;
-	}
-	/* check all members in conf */
-	list_for_each_entry(member, &dsp->conf->mlist, list) {
-		dsp_rx_off_member(member->dsp);
-	}
-}
-
-/* enable "fill empty" feature */
-static void
-dsp_fill_empty(struct dsp *dsp)
-{
-	struct mISDN_ctrl_req	cq;
-
-	memset(&cq, 0, sizeof(cq));
-
-	if (!dsp->ch.peer) {
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: no peer, no fill_empty\n",
-			       __func__);
-		return;
-	}
-	cq.op = MISDN_CTRL_FILL_EMPTY;
-	cq.p1 = 1;
-	cq.p2 = dsp_silence;
-	if (dsp->ch.peer->ctrl(dsp->ch.peer, CONTROL_CHANNEL, &cq)) {
-		printk(KERN_DEBUG "%s: CONTROL_CHANNEL failed\n",
-		       __func__);
-		return;
-	}
-	if (dsp_debug & DEBUG_DSP_CORE)
-		printk(KERN_DEBUG "%s: %s set fill_empty = 1\n",
-		       __func__, dsp->name);
-}
-
-static int
-dsp_control_req(struct dsp *dsp, struct mISDNhead *hh, struct sk_buff *skb)
-{
-	struct sk_buff	*nskb;
-	int ret = 0;
-	int cont;
-	u8 *data;
-	int len;
-
-	if (skb->len < sizeof(int)) {
-		printk(KERN_ERR "%s: PH_CONTROL message too short\n", __func__);
-		return -EINVAL;
-	}
-	cont = *((int *)skb->data);
-	len = skb->len - sizeof(int);
-	data = skb->data + sizeof(int);
-
-	switch (cont) {
-	case DTMF_TONE_START: /* turn on DTMF */
-		if (dsp->hdlc) {
-			ret = -EINVAL;
-			break;
-		}
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: start dtmf\n", __func__);
-		if (len == sizeof(int)) {
-			if (dsp_debug & DEBUG_DSP_CORE)
-				printk(KERN_NOTICE "changing DTMF Threshold "
-				       "to %d\n", *((int *)data));
-			dsp->dtmf.treshold = (*(int *)data) * 10000;
-		}
-		dsp->dtmf.enable = 1;
-		/* init goertzel */
-		dsp_dtmf_goertzel_init(dsp);
-
-		/* check dtmf hardware */
-		dsp_dtmf_hardware(dsp);
-		dsp_rx_off(dsp);
-		break;
-	case DTMF_TONE_STOP: /* turn off DTMF */
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: stop dtmf\n", __func__);
-		dsp->dtmf.enable = 0;
-		dsp->dtmf.hardware = 0;
-		dsp->dtmf.software = 0;
-		break;
-	case DSP_CONF_JOIN: /* join / update conference */
-		if (len < sizeof(int)) {
-			ret = -EINVAL;
-			break;
-		}
-		if (*((u32 *)data) == 0)
-			goto conf_split;
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: join conference %d\n",
-			       __func__, *((u32 *)data));
-		ret = dsp_cmx_conf(dsp, *((u32 *)data));
-		/* dsp_cmx_hardware will also be called here */
-		dsp_rx_off(dsp);
-		if (dsp_debug & DEBUG_DSP_CMX)
-			dsp_cmx_debug(dsp);
-		break;
-	case DSP_CONF_SPLIT: /* remove from conference */
-	conf_split:
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: release conference\n", __func__);
-		ret = dsp_cmx_conf(dsp, 0);
-		/* dsp_cmx_hardware will also be called here */
-		if (dsp_debug & DEBUG_DSP_CMX)
-			dsp_cmx_debug(dsp);
-		dsp_rx_off(dsp);
-		break;
-	case DSP_TONE_PATT_ON: /* play tone */
-		if (dsp->hdlc) {
-			ret = -EINVAL;
-			break;
-		}
-		if (len < sizeof(int)) {
-			ret = -EINVAL;
-			break;
-		}
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: turn tone 0x%x on\n",
-			       __func__, *((int *)skb->data));
-		ret = dsp_tone(dsp, *((int *)data));
-		if (!ret) {
-			dsp_cmx_hardware(dsp->conf, dsp);
-			dsp_rx_off(dsp);
-		}
-		if (!dsp->tone.tone)
-			goto tone_off;
-		break;
-	case DSP_TONE_PATT_OFF: /* stop tone */
-		if (dsp->hdlc) {
-			ret = -EINVAL;
-			break;
-		}
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: turn tone off\n", __func__);
-		dsp_tone(dsp, 0);
-		dsp_cmx_hardware(dsp->conf, dsp);
-		dsp_rx_off(dsp);
-		/* reset tx buffers (user space data) */
-	tone_off:
-		dsp->rx_W = 0;
-		dsp->rx_R = 0;
-		break;
-	case DSP_VOL_CHANGE_TX: /* change volume */
-		if (dsp->hdlc) {
-			ret = -EINVAL;
-			break;
-		}
-		if (len < sizeof(int)) {
-			ret = -EINVAL;
-			break;
-		}
-		dsp->tx_volume = *((int *)data);
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: change tx vol to %d\n",
-			       __func__, dsp->tx_volume);
-		dsp_cmx_hardware(dsp->conf, dsp);
-		dsp_dtmf_hardware(dsp);
-		dsp_rx_off(dsp);
-		break;
-	case DSP_VOL_CHANGE_RX: /* change volume */
-		if (dsp->hdlc) {
-			ret = -EINVAL;
-			break;
-		}
-		if (len < sizeof(int)) {
-			ret = -EINVAL;
-			break;
-		}
-		dsp->rx_volume = *((int *)data);
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: change rx vol to %d\n",
-			       __func__, dsp->tx_volume);
-		dsp_cmx_hardware(dsp->conf, dsp);
-		dsp_dtmf_hardware(dsp);
-		dsp_rx_off(dsp);
-		break;
-	case DSP_ECHO_ON: /* enable echo */
-		dsp->echo.software = 1; /* soft echo */
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: enable cmx-echo\n", __func__);
-		dsp_cmx_hardware(dsp->conf, dsp);
-		dsp_rx_off(dsp);
-		if (dsp_debug & DEBUG_DSP_CMX)
-			dsp_cmx_debug(dsp);
-		break;
-	case DSP_ECHO_OFF: /* disable echo */
-		dsp->echo.software = 0;
-		dsp->echo.hardware = 0;
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: disable cmx-echo\n", __func__);
-		dsp_cmx_hardware(dsp->conf, dsp);
-		dsp_rx_off(dsp);
-		if (dsp_debug & DEBUG_DSP_CMX)
-			dsp_cmx_debug(dsp);
-		break;
-	case DSP_RECEIVE_ON: /* enable receive to user space */
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: enable receive to user "
-			       "space\n", __func__);
-		dsp->rx_disabled = 0;
-		dsp_rx_off(dsp);
-		break;
-	case DSP_RECEIVE_OFF: /* disable receive to user space */
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: disable receive to "
-			       "user space\n", __func__);
-		dsp->rx_disabled = 1;
-		dsp_rx_off(dsp);
-		break;
-	case DSP_MIX_ON: /* enable mixing of tx data */
-		if (dsp->hdlc) {
-			ret = -EINVAL;
-			break;
-		}
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: enable mixing of "
-			       "tx-data with conf members\n", __func__);
-		dsp->tx_mix = 1;
-		dsp_cmx_hardware(dsp->conf, dsp);
-		dsp_rx_off(dsp);
-		if (dsp_debug & DEBUG_DSP_CMX)
-			dsp_cmx_debug(dsp);
-		break;
-	case DSP_MIX_OFF: /* disable mixing of tx data */
-		if (dsp->hdlc) {
-			ret = -EINVAL;
-			break;
-		}
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: disable mixing of "
-			       "tx-data with conf members\n", __func__);
-		dsp->tx_mix = 0;
-		dsp_cmx_hardware(dsp->conf, dsp);
-		dsp_rx_off(dsp);
-		if (dsp_debug & DEBUG_DSP_CMX)
-			dsp_cmx_debug(dsp);
-		break;
-	case DSP_TXDATA_ON: /* enable txdata */
-		dsp->tx_data = 1;
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: enable tx-data\n", __func__);
-		dsp_cmx_hardware(dsp->conf, dsp);
-		dsp_rx_off(dsp);
-		if (dsp_debug & DEBUG_DSP_CMX)
-			dsp_cmx_debug(dsp);
-		break;
-	case DSP_TXDATA_OFF: /* disable txdata */
-		dsp->tx_data = 0;
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: disable tx-data\n", __func__);
-		dsp_cmx_hardware(dsp->conf, dsp);
-		dsp_rx_off(dsp);
-		if (dsp_debug & DEBUG_DSP_CMX)
-			dsp_cmx_debug(dsp);
-		break;
-	case DSP_DELAY: /* use delay algorithm instead of dynamic
-			   jitter algorithm */
-		if (dsp->hdlc) {
-			ret = -EINVAL;
-			break;
-		}
-		if (len < sizeof(int)) {
-			ret = -EINVAL;
-			break;
-		}
-		dsp->cmx_delay = (*((int *)data)) << 3;
-		/* milliseconds to samples */
-		if (dsp->cmx_delay >= (CMX_BUFF_HALF >> 1))
-			/* clip to half of maximum usable buffer
-			   (half of half buffer) */
-			dsp->cmx_delay = (CMX_BUFF_HALF >> 1) - 1;
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: use delay algorithm to "
-			       "compensate jitter (%d samples)\n",
-			       __func__, dsp->cmx_delay);
-		break;
-	case DSP_JITTER: /* use dynamic jitter algorithm instead of
-			    delay algorithm */
-		if (dsp->hdlc) {
-			ret = -EINVAL;
-			break;
-		}
-		dsp->cmx_delay = 0;
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: use jitter algorithm to "
-			       "compensate jitter\n", __func__);
-		break;
-	case DSP_TX_DEJITTER: /* use dynamic jitter algorithm for tx-buffer */
-		if (dsp->hdlc) {
-			ret = -EINVAL;
-			break;
-		}
-		dsp->tx_dejitter = 1;
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: use dejitter on TX "
-			       "buffer\n", __func__);
-		break;
-	case DSP_TX_DEJ_OFF: /* use tx-buffer without dejittering*/
-		if (dsp->hdlc) {
-			ret = -EINVAL;
-			break;
-		}
-		dsp->tx_dejitter = 0;
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: use TX buffer without "
-			       "dejittering\n", __func__);
-		break;
-	case DSP_PIPELINE_CFG:
-		if (dsp->hdlc) {
-			ret = -EINVAL;
-			break;
-		}
-		if (len > 0 && ((char *)data)[len - 1]) {
-			printk(KERN_DEBUG "%s: pipeline config string "
-			       "is not NULL terminated!\n", __func__);
-			ret = -EINVAL;
-		} else {
-			dsp->pipeline.inuse = 1;
-			dsp_cmx_hardware(dsp->conf, dsp);
-			ret = dsp_pipeline_build(&dsp->pipeline,
-						 len > 0 ? data : NULL);
-			dsp_cmx_hardware(dsp->conf, dsp);
-			dsp_rx_off(dsp);
-		}
-		break;
-	case DSP_BF_ENABLE_KEY: /* turn blowfish on */
-		if (dsp->hdlc) {
-			ret = -EINVAL;
-			break;
-		}
-		if (len < 4 || len > 56) {
-			ret = -EINVAL;
-			break;
-		}
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: turn blowfish on (key "
-			       "not shown)\n", __func__);
-		ret = dsp_bf_init(dsp, (u8 *)data, len);
-		/* set new cont */
-		if (!ret)
-			cont = DSP_BF_ACCEPT;
-		else
-			cont = DSP_BF_REJECT;
-		/* send indication if it worked to set it */
-		nskb = _alloc_mISDN_skb(PH_CONTROL_IND, MISDN_ID_ANY,
-					sizeof(int), &cont, GFP_ATOMIC);
-		if (nskb) {
-			if (dsp->up) {
-				if (dsp->up->send(dsp->up, nskb))
-					dev_kfree_skb(nskb);
-			} else
-				dev_kfree_skb(nskb);
-		}
-		if (!ret) {
-			dsp_cmx_hardware(dsp->conf, dsp);
-			dsp_dtmf_hardware(dsp);
-			dsp_rx_off(dsp);
-		}
-		break;
-	case DSP_BF_DISABLE: /* turn blowfish off */
-		if (dsp->hdlc) {
-			ret = -EINVAL;
-			break;
-		}
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: turn blowfish off\n", __func__);
-		dsp_bf_cleanup(dsp);
-		dsp_cmx_hardware(dsp->conf, dsp);
-		dsp_dtmf_hardware(dsp);
-		dsp_rx_off(dsp);
-		break;
-	default:
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: ctrl req %x unhandled\n",
-			       __func__, cont);
-		ret = -EINVAL;
-	}
-	return ret;
-}
-
-static void
-get_features(struct mISDNchannel *ch)
-{
-	struct dsp		*dsp = container_of(ch, struct dsp, ch);
-	struct mISDN_ctrl_req	cq;
-
-	if (!ch->peer) {
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: no peer, no features\n",
-			       __func__);
-		return;
-	}
-	memset(&cq, 0, sizeof(cq));
-	cq.op = MISDN_CTRL_GETOP;
-	if (ch->peer->ctrl(ch->peer, CONTROL_CHANNEL, &cq) < 0) {
-		printk(KERN_DEBUG "%s: CONTROL_CHANNEL failed\n",
-		       __func__);
-		return;
-	}
-	if (cq.op & MISDN_CTRL_RX_OFF)
-		dsp->features_rx_off = 1;
-	if (cq.op & MISDN_CTRL_FILL_EMPTY)
-		dsp->features_fill_empty = 1;
-	if (dsp_options & DSP_OPT_NOHARDWARE)
-		return;
-	if ((cq.op & MISDN_CTRL_HW_FEATURES_OP)) {
-		cq.op = MISDN_CTRL_HW_FEATURES;
-		*((u_long *)&cq.p1) = (u_long)&dsp->features;
-		if (ch->peer->ctrl(ch->peer, CONTROL_CHANNEL, &cq)) {
-			printk(KERN_DEBUG "%s: 2nd CONTROL_CHANNEL failed\n",
-			       __func__);
-		}
-	} else
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: features not supported for %s\n",
-			       __func__, dsp->name);
-}
-
-static int
-dsp_function(struct mISDNchannel *ch,  struct sk_buff *skb)
-{
-	struct dsp		*dsp = container_of(ch, struct dsp, ch);
-	struct mISDNhead	*hh;
-	int			ret = 0;
-	u8			*digits = NULL;
-	u_long			flags;
-
-	hh = mISDN_HEAD_P(skb);
-	switch (hh->prim) {
-		/* FROM DOWN */
-	case (PH_DATA_CNF):
-		dsp->data_pending = 0;
-		/* trigger next hdlc frame, if any */
-		if (dsp->hdlc) {
-			spin_lock_irqsave(&dsp_lock, flags);
-			if (dsp->b_active)
-				schedule_work(&dsp->workq);
-			spin_unlock_irqrestore(&dsp_lock, flags);
-		}
-		break;
-	case (PH_DATA_IND):
-	case (DL_DATA_IND):
-		if (skb->len < 1) {
-			ret = -EINVAL;
-			break;
-		}
-		if (dsp->rx_is_off) {
-			if (dsp_debug & DEBUG_DSP_CORE)
-				printk(KERN_DEBUG "%s: rx-data during rx_off"
-				       " for %s\n",
-				       __func__, dsp->name);
-		}
-		if (dsp->hdlc) {
-			/* hdlc */
-			spin_lock_irqsave(&dsp_lock, flags);
-			dsp_cmx_hdlc(dsp, skb);
-			spin_unlock_irqrestore(&dsp_lock, flags);
-			if (dsp->rx_disabled) {
-				/* if receive is not allowed */
-				break;
-			}
-			hh->prim = DL_DATA_IND;
-			if (dsp->up)
-				return dsp->up->send(dsp->up, skb);
-			break;
-		}
-
-		spin_lock_irqsave(&dsp_lock, flags);
-
-		/* decrypt if enabled */
-		if (dsp->bf_enable)
-			dsp_bf_decrypt(dsp, skb->data, skb->len);
-		/* pipeline */
-		if (dsp->pipeline.inuse)
-			dsp_pipeline_process_rx(&dsp->pipeline, skb->data,
-						skb->len, hh->id);
-		/* change volume if requested */
-		if (dsp->rx_volume)
-			dsp_change_volume(skb, dsp->rx_volume);
-		/* check if dtmf soft decoding is turned on */
-		if (dsp->dtmf.software) {
-			digits = dsp_dtmf_goertzel_decode(dsp, skb->data,
-							  skb->len, (dsp_options & DSP_OPT_ULAW) ? 1 : 0);
-		}
-		/* we need to process receive data if software */
-		if (dsp->conf && dsp->conf->software) {
-			/* process data from card at cmx */
-			dsp_cmx_receive(dsp, skb);
-		}
-
-		spin_unlock_irqrestore(&dsp_lock, flags);
-
-		/* send dtmf result, if any */
-		if (digits) {
-			while (*digits) {
-				int k;
-				struct sk_buff *nskb;
-				if (dsp_debug & DEBUG_DSP_DTMF)
-					printk(KERN_DEBUG "%s: digit"
-					       "(%c) to layer %s\n",
-					       __func__, *digits, dsp->name);
-				k = *digits | DTMF_TONE_VAL;
-				nskb = _alloc_mISDN_skb(PH_CONTROL_IND,
-							MISDN_ID_ANY, sizeof(int), &k,
-							GFP_ATOMIC);
-				if (nskb) {
-					if (dsp->up) {
-						if (dsp->up->send(
-							    dsp->up, nskb))
-							dev_kfree_skb(nskb);
-					} else
-						dev_kfree_skb(nskb);
-				}
-				digits++;
-			}
-		}
-		if (dsp->rx_disabled) {
-			/* if receive is not allowed */
-			break;
-		}
-		hh->prim = DL_DATA_IND;
-		if (dsp->up)
-			return dsp->up->send(dsp->up, skb);
-		break;
-	case (PH_CONTROL_IND):
-		if (dsp_debug & DEBUG_DSP_DTMFCOEFF)
-			printk(KERN_DEBUG "%s: PH_CONTROL INDICATION "
-			       "received: %x (len %d) %s\n", __func__,
-			       hh->id, skb->len, dsp->name);
-		switch (hh->id) {
-		case (DTMF_HFC_COEF): /* getting coefficients */
-			if (!dsp->dtmf.hardware) {
-				if (dsp_debug & DEBUG_DSP_DTMFCOEFF)
-					printk(KERN_DEBUG "%s: ignoring DTMF "
-					       "coefficients from HFC\n",
-					       __func__);
-				break;
-			}
-			digits = dsp_dtmf_goertzel_decode(dsp, skb->data,
-							  skb->len, 2);
-			while (*digits) {
-				int k;
-				struct sk_buff *nskb;
-				if (dsp_debug & DEBUG_DSP_DTMF)
-					printk(KERN_DEBUG "%s: digit"
-					       "(%c) to layer %s\n",
-					       __func__, *digits, dsp->name);
-				k = *digits | DTMF_TONE_VAL;
-				nskb = _alloc_mISDN_skb(PH_CONTROL_IND,
-							MISDN_ID_ANY, sizeof(int), &k,
-							GFP_ATOMIC);
-				if (nskb) {
-					if (dsp->up) {
-						if (dsp->up->send(
-							    dsp->up, nskb))
-							dev_kfree_skb(nskb);
-					} else
-						dev_kfree_skb(nskb);
-				}
-				digits++;
-			}
-			break;
-		case (HFC_VOL_CHANGE_TX): /* change volume */
-			if (skb->len != sizeof(int)) {
-				ret = -EINVAL;
-				break;
-			}
-			spin_lock_irqsave(&dsp_lock, flags);
-			dsp->tx_volume = *((int *)skb->data);
-			if (dsp_debug & DEBUG_DSP_CORE)
-				printk(KERN_DEBUG "%s: change tx volume to "
-				       "%d\n", __func__, dsp->tx_volume);
-			dsp_cmx_hardware(dsp->conf, dsp);
-			dsp_dtmf_hardware(dsp);
-			dsp_rx_off(dsp);
-			spin_unlock_irqrestore(&dsp_lock, flags);
-			break;
-		default:
-			if (dsp_debug & DEBUG_DSP_CORE)
-				printk(KERN_DEBUG "%s: ctrl ind %x unhandled "
-				       "%s\n", __func__, hh->id, dsp->name);
-			ret = -EINVAL;
-		}
-		break;
-	case (PH_ACTIVATE_IND):
-	case (PH_ACTIVATE_CNF):
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: b_channel is now active %s\n",
-			       __func__, dsp->name);
-		/* bchannel now active */
-		spin_lock_irqsave(&dsp_lock, flags);
-		dsp->b_active = 1;
-		dsp->data_pending = 0;
-		dsp->rx_init = 1;
-		/* rx_W and rx_R will be adjusted on first frame */
-		dsp->rx_W = 0;
-		dsp->rx_R = 0;
-		memset(dsp->rx_buff, 0, sizeof(dsp->rx_buff));
-		dsp_cmx_hardware(dsp->conf, dsp);
-		dsp_dtmf_hardware(dsp);
-		dsp_rx_off(dsp);
-		spin_unlock_irqrestore(&dsp_lock, flags);
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: done with activation, sending "
-			       "confirm to user space. %s\n", __func__,
-			       dsp->name);
-		/* send activation to upper layer */
-		hh->prim = DL_ESTABLISH_CNF;
-		if (dsp->up)
-			return dsp->up->send(dsp->up, skb);
-		break;
-	case (PH_DEACTIVATE_IND):
-	case (PH_DEACTIVATE_CNF):
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: b_channel is now inactive %s\n",
-			       __func__, dsp->name);
-		/* bchannel now inactive */
-		spin_lock_irqsave(&dsp_lock, flags);
-		dsp->b_active = 0;
-		dsp->data_pending = 0;
-		dsp_cmx_hardware(dsp->conf, dsp);
-		dsp_rx_off(dsp);
-		spin_unlock_irqrestore(&dsp_lock, flags);
-		hh->prim = DL_RELEASE_CNF;
-		if (dsp->up)
-			return dsp->up->send(dsp->up, skb);
-		break;
-		/* FROM UP */
-	case (DL_DATA_REQ):
-	case (PH_DATA_REQ):
-		if (skb->len < 1) {
-			ret = -EINVAL;
-			break;
-		}
-		if (dsp->hdlc) {
-			/* hdlc */
-			if (!dsp->b_active) {
-				ret = -EIO;
-				break;
-			}
-			hh->prim = PH_DATA_REQ;
-			spin_lock_irqsave(&dsp_lock, flags);
-			skb_queue_tail(&dsp->sendq, skb);
-			schedule_work(&dsp->workq);
-			spin_unlock_irqrestore(&dsp_lock, flags);
-			return 0;
-		}
-		/* send data to tx-buffer (if no tone is played) */
-		if (!dsp->tone.tone) {
-			spin_lock_irqsave(&dsp_lock, flags);
-			dsp_cmx_transmit(dsp, skb);
-			spin_unlock_irqrestore(&dsp_lock, flags);
-		}
-		break;
-	case (PH_CONTROL_REQ):
-		spin_lock_irqsave(&dsp_lock, flags);
-		ret = dsp_control_req(dsp, hh, skb);
-		spin_unlock_irqrestore(&dsp_lock, flags);
-		break;
-	case (DL_ESTABLISH_REQ):
-	case (PH_ACTIVATE_REQ):
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: activating b_channel %s\n",
-			       __func__, dsp->name);
-		if (dsp->dtmf.hardware || dsp->dtmf.software)
-			dsp_dtmf_goertzel_init(dsp);
-		get_features(ch);
-		/* enable fill_empty feature */
-		if (dsp->features_fill_empty)
-			dsp_fill_empty(dsp);
-		/* send ph_activate */
-		hh->prim = PH_ACTIVATE_REQ;
-		if (ch->peer)
-			return ch->recv(ch->peer, skb);
-		break;
-	case (DL_RELEASE_REQ):
-	case (PH_DEACTIVATE_REQ):
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: releasing b_channel %s\n",
-			       __func__, dsp->name);
-		spin_lock_irqsave(&dsp_lock, flags);
-		dsp->tone.tone = 0;
-		dsp->tone.hardware = 0;
-		dsp->tone.software = 0;
-		if (timer_pending(&dsp->tone.tl))
-			timer_delete(&dsp->tone.tl);
-		if (dsp->conf)
-			dsp_cmx_conf(dsp, 0); /* dsp_cmx_hardware will also be
-						 called here */
-		skb_queue_purge(&dsp->sendq);
-		spin_unlock_irqrestore(&dsp_lock, flags);
-		hh->prim = PH_DEACTIVATE_REQ;
-		if (ch->peer)
-			return ch->recv(ch->peer, skb);
-		break;
-	default:
-		if (dsp_debug & DEBUG_DSP_CORE)
-			printk(KERN_DEBUG "%s: msg %x unhandled %s\n",
-			       __func__, hh->prim, dsp->name);
-		ret = -EINVAL;
-	}
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-static int
-dsp_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
-{
-	struct dsp		*dsp = container_of(ch, struct dsp, ch);
-	u_long		flags;
-
-	if (debug & DEBUG_DSP_CTRL)
-		printk(KERN_DEBUG "%s:(%x)\n", __func__, cmd);
-
-	switch (cmd) {
-	case OPEN_CHANNEL:
-		break;
-	case CLOSE_CHANNEL:
-		if (dsp->ch.peer)
-			dsp->ch.peer->ctrl(dsp->ch.peer, CLOSE_CHANNEL, NULL);
-
-		/* wait until workqueue has finished,
-		 * must lock here, or we may hit send-process currently
-		 * queueing. */
-		spin_lock_irqsave(&dsp_lock, flags);
-		dsp->b_active = 0;
-		spin_unlock_irqrestore(&dsp_lock, flags);
-		/* MUST not be locked, because it waits until queue is done. */
-		cancel_work_sync(&dsp->workq);
-		spin_lock_irqsave(&dsp_lock, flags);
-		if (timer_pending(&dsp->tone.tl))
-			timer_delete(&dsp->tone.tl);
-		skb_queue_purge(&dsp->sendq);
-		if (dsp_debug & DEBUG_DSP_CTRL)
-			printk(KERN_DEBUG "%s: releasing member %s\n",
-			       __func__, dsp->name);
-		dsp->b_active = 0;
-		dsp_cmx_conf(dsp, 0); /* dsp_cmx_hardware will also be called
-					 here */
-		dsp_pipeline_destroy(&dsp->pipeline);
-
-		if (dsp_debug & DEBUG_DSP_CTRL)
-			printk(KERN_DEBUG "%s: remove & destroy object %s\n",
-			       __func__, dsp->name);
-		list_del(&dsp->list);
-		spin_unlock_irqrestore(&dsp_lock, flags);
-
-		if (dsp_debug & DEBUG_DSP_CTRL)
-			printk(KERN_DEBUG "%s: dsp instance released\n",
-			       __func__);
-		vfree(dsp);
-		module_put(THIS_MODULE);
-		break;
-	}
-	return 0;
-}
-
-static void
-dsp_send_bh(struct work_struct *work)
-{
-	struct dsp *dsp = container_of(work, struct dsp, workq);
-	struct sk_buff *skb;
-	struct mISDNhead	*hh;
-
-	if (dsp->hdlc && dsp->data_pending)
-		return; /* wait until data has been acknowledged */
-
-	/* send queued data */
-	while ((skb = skb_dequeue(&dsp->sendq))) {
-		/* in locked date, we must have still data in queue */
-		if (dsp->data_pending) {
-			if (dsp_debug & DEBUG_DSP_CORE)
-				printk(KERN_DEBUG "%s: fifo full %s, this is "
-				       "no bug!\n", __func__, dsp->name);
-			/* flush transparent data, if not acked */
-			dev_kfree_skb(skb);
-			continue;
-		}
-		hh = mISDN_HEAD_P(skb);
-		if (hh->prim == DL_DATA_REQ) {
-			/* send packet up */
-			if (dsp->up) {
-				if (dsp->up->send(dsp->up, skb))
-					dev_kfree_skb(skb);
-			} else
-				dev_kfree_skb(skb);
-		} else {
-			/* send packet down */
-			if (dsp->ch.peer) {
-				dsp->data_pending = 1;
-				if (dsp->ch.recv(dsp->ch.peer, skb)) {
-					dev_kfree_skb(skb);
-					dsp->data_pending = 0;
-				}
-			} else
-				dev_kfree_skb(skb);
-		}
-	}
-}
-
-static int
-dspcreate(struct channel_req *crq)
-{
-	struct dsp		*ndsp;
-	u_long		flags;
-
-	if (crq->protocol != ISDN_P_B_L2DSP
-	    && crq->protocol != ISDN_P_B_L2DSPHDLC)
-		return -EPROTONOSUPPORT;
-	ndsp = vzalloc(sizeof(struct dsp));
-	if (!ndsp) {
-		printk(KERN_ERR "%s: vmalloc struct dsp failed\n", __func__);
-		return -ENOMEM;
-	}
-	if (dsp_debug & DEBUG_DSP_CTRL)
-		printk(KERN_DEBUG "%s: creating new dsp instance\n", __func__);
-
-	/* default enabled */
-	INIT_WORK(&ndsp->workq, (void *)dsp_send_bh);
-	skb_queue_head_init(&ndsp->sendq);
-	ndsp->ch.send = dsp_function;
-	ndsp->ch.ctrl = dsp_ctrl;
-	ndsp->up = crq->ch;
-	crq->ch = &ndsp->ch;
-	if (crq->protocol == ISDN_P_B_L2DSP) {
-		crq->protocol = ISDN_P_B_RAW;
-		ndsp->hdlc = 0;
-	} else {
-		crq->protocol = ISDN_P_B_HDLC;
-		ndsp->hdlc = 1;
-	}
-	if (!try_module_get(THIS_MODULE))
-		printk(KERN_WARNING "%s:cannot get module\n",
-		       __func__);
-
-	sprintf(ndsp->name, "DSP_C%x(0x%p)",
-		ndsp->up->st->dev->id + 1, ndsp);
-	/* set frame size to start */
-	ndsp->features.hfc_id = -1; /* current PCM id */
-	ndsp->features.pcm_id = -1; /* current PCM id */
-	ndsp->pcm_slot_rx = -1; /* current CPM slot */
-	ndsp->pcm_slot_tx = -1;
-	ndsp->pcm_bank_rx = -1;
-	ndsp->pcm_bank_tx = -1;
-	ndsp->hfc_conf = -1; /* current conference number */
-	/* set tone timer */
-	timer_setup(&ndsp->tone.tl, dsp_tone_timeout, 0);
-
-	if (dtmfthreshold < 20 || dtmfthreshold > 500)
-		dtmfthreshold = 200;
-	ndsp->dtmf.treshold = dtmfthreshold * 10000;
-
-	/* init pipeline append to list */
-	spin_lock_irqsave(&dsp_lock, flags);
-	dsp_pipeline_init(&ndsp->pipeline);
-	list_add_tail(&ndsp->list, &dsp_ilist);
-	spin_unlock_irqrestore(&dsp_lock, flags);
-
-	return 0;
-}
-
-
-static struct Bprotocol DSP = {
-	.Bprotocols = (1 << (ISDN_P_B_L2DSP & ISDN_P_B_MASK))
-	| (1 << (ISDN_P_B_L2DSPHDLC & ISDN_P_B_MASK)),
-	.name = "dsp",
-	.create = dspcreate
-};
-
-static int __init dsp_init(void)
-{
-	int err;
-	int tics;
-
-	printk(KERN_INFO "DSP module %s\n", mISDN_dsp_revision);
-
-	dsp_options = options;
-	dsp_debug = debug;
-
-	/* set packet size */
-	dsp_poll = poll;
-	if (dsp_poll) {
-		if (dsp_poll > MAX_POLL) {
-			printk(KERN_ERR "%s: Wrong poll value (%d), use %d "
-			       "maximum.\n", __func__, poll, MAX_POLL);
-			err = -EINVAL;
-			return err;
-		}
-		if (dsp_poll < 8) {
-			printk(KERN_ERR "%s: Wrong poll value (%d), use 8 "
-			       "minimum.\n", __func__, dsp_poll);
-			err = -EINVAL;
-			return err;
-		}
-		dsp_tics = poll * HZ / 8000;
-		if (dsp_tics * 8000 != poll * HZ) {
-			printk(KERN_INFO "mISDN_dsp: Cannot clock every %d "
-			       "samples (0,125 ms). It is not a multiple of "
-			       "%d HZ.\n", poll, HZ);
-			err = -EINVAL;
-			return err;
-		}
-	} else {
-		poll = 8;
-		while (poll <= MAX_POLL) {
-			tics = (poll * HZ) / 8000;
-			if (tics * 8000 == poll * HZ) {
-				dsp_tics = tics;
-				dsp_poll = poll;
-				if (poll >= 64)
-					break;
-			}
-			poll++;
-		}
-	}
-	if (dsp_poll == 0) {
-		printk(KERN_INFO "mISDN_dsp: There is no multiple of kernel "
-		       "clock that equals exactly the duration of 8-256 "
-		       "samples. (Choose kernel clock speed like 100, 250, "
-		       "300, 1000)\n");
-		err = -EINVAL;
-		return err;
-	}
-	printk(KERN_INFO "mISDN_dsp: DSP clocks every %d samples. This equals "
-	       "%d jiffies.\n", dsp_poll, dsp_tics);
-
-	/* init conversion tables */
-	dsp_audio_generate_law_tables();
-	dsp_silence = (dsp_options & DSP_OPT_ULAW) ? 0xff : 0x2a;
-	dsp_audio_law_to_s32 = (dsp_options & DSP_OPT_ULAW) ?
-		dsp_audio_ulaw_to_s32 : dsp_audio_alaw_to_s32;
-	dsp_audio_generate_s2law_table();
-	dsp_audio_generate_seven();
-	dsp_audio_generate_mix_table();
-	if (dsp_options & DSP_OPT_ULAW)
-		dsp_audio_generate_ulaw_samples();
-	dsp_audio_generate_volume_changes();
-
-	err = dsp_pipeline_module_init();
-	if (err) {
-		printk(KERN_ERR "mISDN_dsp: Can't initialize pipeline, "
-		       "error(%d)\n", err);
-		return err;
-	}
-
-	err = mISDN_register_Bprotocol(&DSP);
-	if (err) {
-		printk(KERN_ERR "Can't register %s error(%d)\n", DSP.name, err);
-		return err;
-	}
-
-	/* set sample timer */
-	timer_setup(&dsp_spl_tl, dsp_cmx_send, 0);
-	dsp_spl_tl.expires = jiffies + dsp_tics;
-	dsp_spl_jiffies = dsp_spl_tl.expires;
-	add_timer(&dsp_spl_tl);
-
-	return 0;
-}
-
-
-static void __exit dsp_cleanup(void)
-{
-	mISDN_unregister_Bprotocol(&DSP);
-
-	timer_delete_sync(&dsp_spl_tl);
-
-	if (!list_empty(&dsp_ilist)) {
-		printk(KERN_ERR "mISDN_dsp: Audio DSP object inst list not "
-		       "empty.\n");
-	}
-	if (!list_empty(&conf_ilist)) {
-		printk(KERN_ERR "mISDN_dsp: Conference list not empty. Not "
-		       "all memory freed.\n");
-	}
-
-	dsp_pipeline_module_exit();
-}
-
-module_init(dsp_init);
-module_exit(dsp_cleanup);
diff --git a/drivers/isdn/mISDN/dsp_dtmf.c b/drivers/isdn/mISDN/dsp_dtmf.c
deleted file mode 100644
index 642f30be5ce2..000000000000
--- a/drivers/isdn/mISDN/dsp_dtmf.c
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * DTMF decoder.
- *
- * Copyright            by Andreas Eversberg (jolly@eversberg.eu)
- *			based on different decoders such as ISDN4Linux
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-#include <linux/mISDNif.h>
-#include <linux/mISDNdsp.h>
-#include "core.h"
-#include "dsp.h"
-
-#define NCOEFF            8     /* number of frequencies to be analyzed */
-
-/* For DTMF recognition:
- * 2 * cos(2 * PI * k / N) precalculated for all k
- */
-static u64 cos2pik[NCOEFF] =
-{
-	/* k << 15 (source: hfc-4s/8s documentation (www.colognechip.de)) */
-	55960, 53912, 51402, 48438, 38146, 32650, 26170, 18630
-};
-
-/* digit matrix */
-static char dtmf_matrix[4][4] =
-{
-	{'1', '2', '3', 'A'},
-	{'4', '5', '6', 'B'},
-	{'7', '8', '9', 'C'},
-	{'*', '0', '#', 'D'}
-};
-
-/* dtmf detection using goertzel algorithm
- * init function
- */
-void dsp_dtmf_goertzel_init(struct dsp *dsp)
-{
-	dsp->dtmf.size = 0;
-	dsp->dtmf.lastwhat = '\0';
-	dsp->dtmf.lastdigit = '\0';
-	dsp->dtmf.count = 0;
-}
-
-/* check for hardware or software features
- */
-void dsp_dtmf_hardware(struct dsp *dsp)
-{
-	int hardware = 1;
-
-	if (!dsp->dtmf.enable)
-		return;
-
-	if (!dsp->features.hfc_dtmf)
-		hardware = 0;
-
-	/* check for volume change */
-	if (dsp->tx_volume) {
-		if (dsp_debug & DEBUG_DSP_DTMF)
-			printk(KERN_DEBUG "%s dsp %s cannot do hardware DTMF, "
-			       "because tx_volume is changed\n",
-			       __func__, dsp->name);
-		hardware = 0;
-	}
-	if (dsp->rx_volume) {
-		if (dsp_debug & DEBUG_DSP_DTMF)
-			printk(KERN_DEBUG "%s dsp %s cannot do hardware DTMF, "
-			       "because rx_volume is changed\n",
-			       __func__, dsp->name);
-		hardware = 0;
-	}
-	/* check if encryption is enabled */
-	if (dsp->bf_enable) {
-		if (dsp_debug & DEBUG_DSP_DTMF)
-			printk(KERN_DEBUG "%s dsp %s cannot do hardware DTMF, "
-			       "because encryption is enabled\n",
-			       __func__, dsp->name);
-		hardware = 0;
-	}
-	/* check if pipeline exists */
-	if (dsp->pipeline.inuse) {
-		if (dsp_debug & DEBUG_DSP_DTMF)
-			printk(KERN_DEBUG "%s dsp %s cannot do hardware DTMF, "
-			       "because pipeline exists.\n",
-			       __func__, dsp->name);
-		hardware = 0;
-	}
-
-	dsp->dtmf.hardware = hardware;
-	dsp->dtmf.software = !hardware;
-}
-
-
-/*************************************************************
- * calculate the coefficients of the given sample and decode *
- *************************************************************/
-
-/* the given sample is decoded. if the sample is not long enough for a
- * complete frame, the decoding is finished and continued with the next
- * call of this function.
- *
- * the algorithm is very good for detection with a minimum of errors. i
- * tested it allot. it even works with very short tones (40ms). the only
- * disadvantage is, that it doesn't work good with different volumes of both
- * tones. this will happen, if accoustically coupled dialers are used.
- * it sometimes detects tones during speech, which is normal for decoders.
- * use sequences to given commands during calls.
- *
- * dtmf - points to a structure of the current dtmf state
- * spl and len - the sample
- * fmt - 0 = alaw, 1 = ulaw, 2 = coefficients from HFC DTMF hw-decoder
- */
-
-u8
-*dsp_dtmf_goertzel_decode(struct dsp *dsp, u8 *data, int len, int fmt)
-{
-	u8 what;
-	int size;
-	signed short *buf;
-	s32 sk, sk1, sk2;
-	int k, n, i;
-	s32 *hfccoeff;
-	s32 result[NCOEFF], tresh, treshl;
-	int lowgroup, highgroup;
-	s64 cos2pik_;
-
-	dsp->dtmf.digits[0] = '\0';
-
-	/* Note: The function will loop until the buffer has not enough samples
-	 * left to decode a full frame.
-	 */
-again:
-	/* convert samples */
-	size = dsp->dtmf.size;
-	buf = dsp->dtmf.buffer;
-	switch (fmt) {
-	case 0: /* alaw */
-	case 1: /* ulaw */
-		while (size < DSP_DTMF_NPOINTS && len) {
-			buf[size++] = dsp_audio_law_to_s32[*data++];
-			len--;
-		}
-		break;
-
-	case 2: /* HFC coefficients */
-	default:
-		if (len < 64) {
-			if (len > 0)
-				printk(KERN_ERR "%s: coefficients have invalid "
-				       "size. (is=%d < must=%d)\n",
-				       __func__, len, 64);
-			return dsp->dtmf.digits;
-		}
-		hfccoeff = (s32 *)data;
-		for (k = 0; k < NCOEFF; k++) {
-			sk2 = (*hfccoeff++) >> 4;
-			sk = (*hfccoeff++) >> 4;
-			if (sk > 32767 || sk < -32767 || sk2 > 32767
-			    || sk2 < -32767)
-				printk(KERN_WARNING
-				       "DTMF-Detection overflow\n");
-			/* compute |X(k)|**2 */
-			result[k] =
-				(sk * sk) -
-				(((cos2pik[k] * sk) >> 15) * sk2) +
-				(sk2 * sk2);
-		}
-		data += 64;
-		len -= 64;
-		goto coefficients;
-		break;
-	}
-	dsp->dtmf.size = size;
-
-	if (size < DSP_DTMF_NPOINTS)
-		return dsp->dtmf.digits;
-
-	dsp->dtmf.size = 0;
-
-	/* now we have a full buffer of signed long samples - we do goertzel */
-	for (k = 0; k < NCOEFF; k++) {
-		sk = 0;
-		sk1 = 0;
-		sk2 = 0;
-		buf = dsp->dtmf.buffer;
-		cos2pik_ = cos2pik[k];
-		for (n = 0; n < DSP_DTMF_NPOINTS; n++) {
-			sk = ((cos2pik_ * sk1) >> 15) - sk2 + (*buf++);
-			sk2 = sk1;
-			sk1 = sk;
-		}
-		sk >>= 8;
-		sk2 >>= 8;
-		if (sk > 32767 || sk < -32767 || sk2 > 32767 || sk2 < -32767)
-			printk(KERN_WARNING "DTMF-Detection overflow\n");
-		/* compute |X(k)|**2 */
-		result[k] =
-			(sk * sk) -
-			(((cos2pik[k] * sk) >> 15) * sk2) +
-			(sk2 * sk2);
-	}
-
-	/* our (squared) coefficients have been calculated, we need to process
-	 * them.
-	 */
-coefficients:
-	tresh = 0;
-	for (i = 0; i < NCOEFF; i++) {
-		if (result[i] < 0)
-			result[i] = 0;
-		if (result[i] > dsp->dtmf.treshold) {
-			if (result[i] > tresh)
-				tresh = result[i];
-		}
-	}
-
-	if (tresh == 0) {
-		what = 0;
-		goto storedigit;
-	}
-
-	if (dsp_debug & DEBUG_DSP_DTMFCOEFF) {
-		s32 tresh_100 = tresh/100;
-
-		if (tresh_100 == 0) {
-			tresh_100 = 1;
-			printk(KERN_DEBUG
-				"tresh(%d) too small set tresh/100 to 1\n",
-				tresh);
-		}
-		printk(KERN_DEBUG "a %3d %3d %3d %3d %3d %3d %3d %3d"
-		       " tr:%3d r %3d %3d %3d %3d %3d %3d %3d %3d\n",
-		       result[0] / 10000, result[1] / 10000, result[2] / 10000,
-		       result[3] / 10000, result[4] / 10000, result[5] / 10000,
-		       result[6] / 10000, result[7] / 10000, tresh / 10000,
-		       result[0] / (tresh_100), result[1] / (tresh_100),
-		       result[2] / (tresh_100), result[3] / (tresh_100),
-		       result[4] / (tresh_100), result[5] / (tresh_100),
-		       result[6] / (tresh_100), result[7] / (tresh_100));
-	}
-
-	/* calc digit (lowgroup/highgroup) */
-	lowgroup = -1;
-	highgroup = -1;
-	treshl = tresh >> 3;  /* tones which are not on, must be below 9 dB */
-	tresh = tresh >> 2;  /* touchtones must match within 6 dB */
-	for (i = 0; i < NCOEFF; i++) {
-		if (result[i] < treshl)
-			continue;  /* ignore */
-		if (result[i] < tresh) {
-			lowgroup = -1;
-			highgroup = -1;
-			break;  /* noise in between */
-		}
-		/* good level found. This is allowed only one time per group */
-		if (i < NCOEFF / 2) {
-			/* lowgroup */
-			if (lowgroup >= 0) {
-				/* Bad. Another tone found. */
-				lowgroup = -1;
-				break;
-			} else
-				lowgroup = i;
-		} else {
-			/* higroup */
-			if (highgroup >= 0) {
-				/* Bad. Another tone found. */
-				highgroup = -1;
-				break;
-			} else
-				highgroup = i - (NCOEFF / 2);
-		}
-	}
-
-	/* get digit or null */
-	what = 0;
-	if (lowgroup >= 0 && highgroup >= 0)
-		what = dtmf_matrix[lowgroup][highgroup];
-
-storedigit:
-	if (what && (dsp_debug & DEBUG_DSP_DTMF))
-		printk(KERN_DEBUG "DTMF what: %c\n", what);
-
-	if (dsp->dtmf.lastwhat != what)
-		dsp->dtmf.count = 0;
-
-	/* the tone (or no tone) must remain 3 times without change */
-	if (dsp->dtmf.count == 2) {
-		if (dsp->dtmf.lastdigit != what) {
-			dsp->dtmf.lastdigit = what;
-			if (what) {
-				if (dsp_debug & DEBUG_DSP_DTMF)
-					printk(KERN_DEBUG "DTMF digit: %c\n",
-					       what);
-				if ((strlen(dsp->dtmf.digits) + 1)
-				    < sizeof(dsp->dtmf.digits)) {
-					dsp->dtmf.digits[strlen(
-							dsp->dtmf.digits) + 1] = '\0';
-					dsp->dtmf.digits[strlen(
-							dsp->dtmf.digits)] = what;
-				}
-			}
-		}
-	} else
-		dsp->dtmf.count++;
-
-	dsp->dtmf.lastwhat = what;
-
-	goto again;
-}
diff --git a/drivers/isdn/mISDN/dsp_ecdis.h b/drivers/isdn/mISDN/dsp_ecdis.h
deleted file mode 100644
index 4bcdf321875d..000000000000
--- a/drivers/isdn/mISDN/dsp_ecdis.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * SpanDSP - a series of DSP components for telephony
- *
- * ec_disable_detector.h - A detector which should eventually meet the
- *                         G.164/G.165 requirements for detecting the
- *                         2100Hz echo cancellor disable tone.
- *
- * Written by Steve Underwood <steveu@coppice.org>
- *
- * Copyright (C) 2001 Steve Underwood
- *
- * All rights reserved.
- */
-
-#include "dsp_biquad.h"
-
-struct ec_disable_detector_state {
-	struct biquad2_state notch;
-	int notch_level;
-	int channel_level;
-	int tone_present;
-	int tone_cycle_duration;
-	int good_cycles;
-	int hit;
-};
-
-
-#define FALSE 0
-#define TRUE (!FALSE)
-
-static inline void
-echo_can_disable_detector_init(struct ec_disable_detector_state *det)
-{
-	/* Elliptic notch */
-	/* This is actually centred at 2095Hz, but gets the balance we want, due
-	   to the asymmetric walls of the notch */
-	biquad2_init(&det->notch,
-		     (int32_t)(-0.7600000 * 32768.0),
-		     (int32_t)(-0.1183852 * 32768.0),
-		     (int32_t)(-0.5104039 * 32768.0),
-		     (int32_t)(0.1567596 * 32768.0),
-		     (int32_t)(1.0000000 * 32768.0));
-
-	det->channel_level = 0;
-	det->notch_level = 0;
-	det->tone_present = FALSE;
-	det->tone_cycle_duration = 0;
-	det->good_cycles = 0;
-	det->hit = 0;
-}
-/*- End of function --------------------------------------------------------*/
-
-static inline int
-echo_can_disable_detector_update(struct ec_disable_detector_state *det,
-				 int16_t amp)
-{
-	int16_t notched;
-
-	notched = biquad2(&det->notch, amp);
-	/* Estimate the overall energy in the channel, and the energy in
-	   the notch (i.e. overall channel energy - tone energy => noise).
-	   Use abs instead of multiply for speed (is it really faster?).
-	   Damp the overall energy a little more for a stable result.
-	   Damp the notch energy a little less, so we don't damp out the
-	   blip every time the phase reverses */
-	det->channel_level += ((abs(amp) - det->channel_level) >> 5);
-	det->notch_level += ((abs(notched) - det->notch_level) >> 4);
-	if (det->channel_level > 280) {
-		/* There is adequate energy in the channel.
-		   Is it mostly at 2100Hz? */
-		if (det->notch_level * 6 < det->channel_level) {
-			/* The notch says yes, so we have the tone. */
-			if (!det->tone_present) {
-				/* Do we get a kick every 450+-25ms? */
-				if (det->tone_cycle_duration >= 425 * 8
-				    && det->tone_cycle_duration <= 475 * 8) {
-					det->good_cycles++;
-					if (det->good_cycles > 2)
-						det->hit = TRUE;
-				}
-				det->tone_cycle_duration = 0;
-			}
-			det->tone_present = TRUE;
-		} else
-			det->tone_present = FALSE;
-		det->tone_cycle_duration++;
-	} else {
-		det->tone_present = FALSE;
-		det->tone_cycle_duration = 0;
-		det->good_cycles = 0;
-	}
-	return det->hit;
-}
-/*- End of function --------------------------------------------------------*/
-/*- End of file ------------------------------------------------------------*/
diff --git a/drivers/isdn/mISDN/dsp_hwec.c b/drivers/isdn/mISDN/dsp_hwec.c
deleted file mode 100644
index 0cd216e28f00..000000000000
--- a/drivers/isdn/mISDN/dsp_hwec.c
+++ /dev/null
@@ -1,122 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * dsp_hwec.c:
- * builtin mISDN dsp pipeline element for enabling the hw echocanceller
- *
- * Copyright (C) 2007, Nadi Sarrar
- *
- * Nadi Sarrar <nadi@beronet.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/mISDNdsp.h>
-#include <linux/mISDNif.h>
-#include "core.h"
-#include "dsp.h"
-#include "dsp_hwec.h"
-
-static struct mISDN_dsp_element_arg args[] = {
-	{ "deftaps", "128", "Set the number of taps of cancellation." },
-};
-
-static struct mISDN_dsp_element dsp_hwec_p = {
-	.name = "hwec",
-	.new = NULL,
-	.free = NULL,
-	.process_tx = NULL,
-	.process_rx = NULL,
-	.num_args = ARRAY_SIZE(args),
-	.args = args,
-};
-struct mISDN_dsp_element *dsp_hwec = &dsp_hwec_p;
-
-void dsp_hwec_enable(struct dsp *dsp, const char *arg)
-{
-	int deftaps = 128,
-		len;
-	struct mISDN_ctrl_req	cq;
-
-	if (!dsp) {
-		printk(KERN_ERR "%s: failed to enable hwec: dsp is NULL\n",
-		       __func__);
-		return;
-	}
-
-	if (!arg)
-		goto _do;
-
-	len = strlen(arg);
-	if (!len)
-		goto _do;
-
-	{
-		char *dup, *next, *tok, *name, *val;
-		int tmp;
-
-		dup = next = kstrdup(arg, GFP_ATOMIC);
-		if (!dup)
-			return;
-
-		while ((tok = strsep(&next, ","))) {
-			if (!strlen(tok))
-				continue;
-			name = strsep(&tok, "=");
-			val = tok;
-
-			if (!val)
-				continue;
-
-			if (!strcmp(name, "deftaps")) {
-				if (sscanf(val, "%d", &tmp) == 1)
-					deftaps = tmp;
-			}
-		}
-
-		kfree(dup);
-	}
-
-_do:
-	printk(KERN_DEBUG "%s: enabling hwec with deftaps=%d\n",
-	       __func__, deftaps);
-	memset(&cq, 0, sizeof(cq));
-	cq.op = MISDN_CTRL_HFC_ECHOCAN_ON;
-	cq.p1 = deftaps;
-	if (!dsp->ch.peer->ctrl(&dsp->ch, CONTROL_CHANNEL, &cq)) {
-		printk(KERN_DEBUG "%s: CONTROL_CHANNEL failed\n",
-		       __func__);
-		return;
-	}
-}
-
-void dsp_hwec_disable(struct dsp *dsp)
-{
-	struct mISDN_ctrl_req	cq;
-
-	if (!dsp) {
-		printk(KERN_ERR "%s: failed to disable hwec: dsp is NULL\n",
-		       __func__);
-		return;
-	}
-
-	printk(KERN_DEBUG "%s: disabling hwec\n", __func__);
-	memset(&cq, 0, sizeof(cq));
-	cq.op = MISDN_CTRL_HFC_ECHOCAN_OFF;
-	if (!dsp->ch.peer->ctrl(&dsp->ch, CONTROL_CHANNEL, &cq)) {
-		printk(KERN_DEBUG "%s: CONTROL_CHANNEL failed\n",
-		       __func__);
-		return;
-	}
-}
-
-int dsp_hwec_init(void)
-{
-	mISDN_dsp_element_register(dsp_hwec);
-
-	return 0;
-}
-
-void dsp_hwec_exit(void)
-{
-	mISDN_dsp_element_unregister(dsp_hwec);
-}
diff --git a/drivers/isdn/mISDN/dsp_hwec.h b/drivers/isdn/mISDN/dsp_hwec.h
deleted file mode 100644
index c9cb0ea249da..000000000000
--- a/drivers/isdn/mISDN/dsp_hwec.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * dsp_hwec.h
- */
-
-extern struct mISDN_dsp_element *dsp_hwec;
-extern void dsp_hwec_enable(struct dsp *dsp, const char *arg);
-extern void dsp_hwec_disable(struct dsp *dsp);
-extern int  dsp_hwec_init(void);
-extern void dsp_hwec_exit(void);
diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c
deleted file mode 100644
index 55693dc7206b..000000000000
--- a/drivers/isdn/mISDN/dsp_pipeline.c
+++ /dev/null
@@ -1,300 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * dsp_pipeline.c: pipelined audio processing
- *
- * Copyright (C) 2007, Nadi Sarrar
- *
- * Nadi Sarrar <nadi@beronet.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/string.h>
-#include <linux/mISDNif.h>
-#include <linux/mISDNdsp.h>
-#include <linux/export.h>
-#include "dsp.h"
-#include "dsp_hwec.h"
-
-struct dsp_pipeline_entry {
-	struct mISDN_dsp_element *elem;
-	void                *p;
-	struct list_head     list;
-};
-struct dsp_element_entry {
-	struct mISDN_dsp_element *elem;
-	struct device	     dev;
-	struct list_head     list;
-};
-
-static LIST_HEAD(dsp_elements);
-
-/* sysfs */
-static const struct class elements_class = {
-	.name = "dsp_pipeline",
-};
-
-static ssize_t
-attr_show_args(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct mISDN_dsp_element *elem = dev_get_drvdata(dev);
-	int i;
-	char *p = buf;
-
-	*buf = 0;
-	for (i = 0; i < elem->num_args; i++)
-		p += sprintf(p, "Name:        %s\n%s%s%sDescription: %s\n\n",
-			     elem->args[i].name,
-			     elem->args[i].def ? "Default:     " : "",
-			     elem->args[i].def ? elem->args[i].def : "",
-			     elem->args[i].def ? "\n" : "",
-			     elem->args[i].desc);
-
-	return p - buf;
-}
-
-static struct device_attribute element_attributes[] = {
-	__ATTR(args, 0444, attr_show_args, NULL),
-};
-
-static void
-mISDN_dsp_dev_release(struct device *dev)
-{
-	struct dsp_element_entry *entry =
-		container_of(dev, struct dsp_element_entry, dev);
-	list_del(&entry->list);
-	kfree(entry);
-}
-
-int mISDN_dsp_element_register(struct mISDN_dsp_element *elem)
-{
-	struct dsp_element_entry *entry;
-	int ret, i;
-
-	if (!elem)
-		return -EINVAL;
-
-	entry = kzalloc_obj(struct dsp_element_entry, GFP_ATOMIC);
-	if (!entry)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&entry->list);
-	entry->elem = elem;
-
-	entry->dev.class = &elements_class;
-	entry->dev.release = mISDN_dsp_dev_release;
-	dev_set_drvdata(&entry->dev, elem);
-	dev_set_name(&entry->dev, "%s", elem->name);
-	ret = device_register(&entry->dev);
-	if (ret) {
-		printk(KERN_ERR "%s: failed to register %s\n",
-		       __func__, elem->name);
-		goto err1;
-	}
-	list_add_tail(&entry->list, &dsp_elements);
-
-	for (i = 0; i < ARRAY_SIZE(element_attributes); ++i) {
-		ret = device_create_file(&entry->dev,
-					 &element_attributes[i]);
-		if (ret) {
-			printk(KERN_ERR "%s: failed to create device file\n",
-			       __func__);
-			goto err2;
-		}
-	}
-
-	return 0;
-
-err2:
-	device_unregister(&entry->dev);
-	return ret;
-err1:
-	put_device(&entry->dev);
-	return ret;
-}
-EXPORT_SYMBOL(mISDN_dsp_element_register);
-
-void mISDN_dsp_element_unregister(struct mISDN_dsp_element *elem)
-{
-	struct dsp_element_entry *entry, *n;
-
-	if (!elem)
-		return;
-
-	list_for_each_entry_safe(entry, n, &dsp_elements, list)
-		if (entry->elem == elem) {
-			device_unregister(&entry->dev);
-			return;
-		}
-	printk(KERN_ERR "%s: element %s not in list.\n", __func__, elem->name);
-}
-EXPORT_SYMBOL(mISDN_dsp_element_unregister);
-
-int dsp_pipeline_module_init(void)
-{
-	int err;
-
-	err = class_register(&elements_class);
-	if (err)
-		return err;
-
-	dsp_hwec_init();
-
-	return 0;
-}
-
-void dsp_pipeline_module_exit(void)
-{
-	struct dsp_element_entry *entry, *n;
-
-	dsp_hwec_exit();
-
-	class_unregister(&elements_class);
-
-	list_for_each_entry_safe(entry, n, &dsp_elements, list) {
-		list_del(&entry->list);
-		printk(KERN_WARNING "%s: element was still registered: %s\n",
-		       __func__, entry->elem->name);
-		kfree(entry);
-	}
-}
-
-int dsp_pipeline_init(struct dsp_pipeline *pipeline)
-{
-	if (!pipeline)
-		return -EINVAL;
-
-	INIT_LIST_HEAD(&pipeline->list);
-
-	return 0;
-}
-
-static inline void _dsp_pipeline_destroy(struct dsp_pipeline *pipeline)
-{
-	struct dsp_pipeline_entry *entry, *n;
-
-	list_for_each_entry_safe(entry, n, &pipeline->list, list) {
-		list_del(&entry->list);
-		if (entry->elem == dsp_hwec)
-			dsp_hwec_disable(container_of(pipeline, struct dsp,
-						      pipeline));
-		else
-			entry->elem->free(entry->p);
-		kfree(entry);
-	}
-}
-
-void dsp_pipeline_destroy(struct dsp_pipeline *pipeline)
-{
-
-	if (!pipeline)
-		return;
-
-	_dsp_pipeline_destroy(pipeline);
-}
-
-int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg)
-{
-	int found = 0;
-	char *dup, *next, *tok, *name, *args;
-	struct dsp_element_entry *entry, *n;
-	struct dsp_pipeline_entry *pipeline_entry;
-	struct mISDN_dsp_element *elem;
-
-	if (!pipeline)
-		return -EINVAL;
-
-	if (!list_empty(&pipeline->list))
-		_dsp_pipeline_destroy(pipeline);
-
-	dup = next = kstrdup(cfg, GFP_ATOMIC);
-	if (!dup)
-		return 0;
-	while ((tok = strsep(&next, "|"))) {
-		if (!strlen(tok))
-			continue;
-		name = strsep(&tok, "(");
-		args = strsep(&tok, ")");
-		if (args && !*args)
-			args = NULL;
-
-		list_for_each_entry_safe(entry, n, &dsp_elements, list)
-			if (!strcmp(entry->elem->name, name)) {
-				elem = entry->elem;
-
-				pipeline_entry = kmalloc_obj(struct dsp_pipeline_entry,
-							     GFP_ATOMIC);
-				if (!pipeline_entry) {
-					printk(KERN_ERR "%s: failed to add "
-					       "entry to pipeline: %s (out of "
-					       "memory)\n", __func__, elem->name);
-					goto _out;
-				}
-				pipeline_entry->elem = elem;
-
-				if (elem == dsp_hwec) {
-					/* This is a hack to make the hwec
-					   available as a pipeline module */
-					dsp_hwec_enable(container_of(pipeline,
-								     struct dsp, pipeline), args);
-					list_add_tail(&pipeline_entry->list,
-						      &pipeline->list);
-				} else {
-					pipeline_entry->p = elem->new(args);
-					if (pipeline_entry->p) {
-						list_add_tail(&pipeline_entry->
-							      list, &pipeline->list);
-					} else {
-						printk(KERN_ERR "%s: failed "
-						       "to add entry to pipeline: "
-						       "%s (new() returned NULL)\n",
-						       __func__, elem->name);
-						kfree(pipeline_entry);
-					}
-				}
-				found = 1;
-				break;
-			}
-
-		if (found)
-			found = 0;
-		else
-			printk(KERN_ERR "%s: element not found, skipping: "
-			       "%s\n", __func__, name);
-	}
-
-_out:
-	if (!list_empty(&pipeline->list))
-		pipeline->inuse = 1;
-	else
-		pipeline->inuse = 0;
-
-	kfree(dup);
-	return 0;
-}
-
-void dsp_pipeline_process_tx(struct dsp_pipeline *pipeline, u8 *data, int len)
-{
-	struct dsp_pipeline_entry *entry;
-
-	if (!pipeline)
-		return;
-
-	list_for_each_entry(entry, &pipeline->list, list)
-		if (entry->elem->process_tx)
-			entry->elem->process_tx(entry->p, data, len);
-}
-
-void dsp_pipeline_process_rx(struct dsp_pipeline *pipeline, u8 *data, int len,
-			     unsigned int txlen)
-{
-	struct dsp_pipeline_entry *entry;
-
-	if (!pipeline)
-		return;
-
-	list_for_each_entry_reverse(entry, &pipeline->list, list)
-		if (entry->elem->process_rx)
-			entry->elem->process_rx(entry->p, data, len, txlen);
-}
diff --git a/drivers/isdn/mISDN/dsp_tones.c b/drivers/isdn/mISDN/dsp_tones.c
deleted file mode 100644
index fa7813ae8d97..000000000000
--- a/drivers/isdn/mISDN/dsp_tones.c
+++ /dev/null
@@ -1,550 +0,0 @@
-/*
- * Audio support data for ISDN4Linux.
- *
- * Copyright Andreas Eversberg (jolly@eversberg.eu)
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-#include <linux/gfp.h>
-#include <linux/mISDNif.h>
-#include <linux/mISDNdsp.h>
-#include "core.h"
-#include "dsp.h"
-
-
-#define DATA_S sample_silence
-#define SIZE_S (&sizeof_silence)
-#define DATA_GA sample_german_all
-#define SIZE_GA (&sizeof_german_all)
-#define DATA_GO sample_german_old
-#define SIZE_GO (&sizeof_german_old)
-#define DATA_DT sample_american_dialtone
-#define SIZE_DT (&sizeof_american_dialtone)
-#define DATA_RI sample_american_ringing
-#define SIZE_RI (&sizeof_american_ringing)
-#define DATA_BU sample_american_busy
-#define SIZE_BU (&sizeof_american_busy)
-#define DATA_S1 sample_special1
-#define SIZE_S1 (&sizeof_special1)
-#define DATA_S2 sample_special2
-#define SIZE_S2 (&sizeof_special2)
-#define DATA_S3 sample_special3
-#define SIZE_S3 (&sizeof_special3)
-
-/***************/
-/* tones loops */
-/***************/
-
-/* all tones are alaw encoded */
-/* the last sample+1 is in phase with the first sample. the error is low */
-
-static u8 sample_german_all[] = {
-	0x80, 0xab, 0x81, 0x6d, 0xfd, 0xdd, 0x5d, 0x9d,
-	0x4d, 0xd1, 0x89, 0x88, 0xd0, 0x4c, 0x9c, 0x5c,
-	0xdc, 0xfc, 0x6c,
-	0x80, 0xab, 0x81, 0x6d, 0xfd, 0xdd, 0x5d, 0x9d,
-	0x4d, 0xd1, 0x89, 0x88, 0xd0, 0x4c, 0x9c, 0x5c,
-	0xdc, 0xfc, 0x6c,
-	0x80, 0xab, 0x81, 0x6d, 0xfd, 0xdd, 0x5d, 0x9d,
-	0x4d, 0xd1, 0x89, 0x88, 0xd0, 0x4c, 0x9c, 0x5c,
-	0xdc, 0xfc, 0x6c,
-	0x80, 0xab, 0x81, 0x6d, 0xfd, 0xdd, 0x5d, 0x9d,
-	0x4d, 0xd1, 0x89, 0x88, 0xd0, 0x4c, 0x9c, 0x5c,
-	0xdc, 0xfc, 0x6c,
-};
-static u32 sizeof_german_all = sizeof(sample_german_all);
-
-static u8 sample_german_old[] = {
-	0xec, 0x68, 0xe1, 0x6d, 0x6d, 0x91, 0x51, 0xed,
-	0x6d, 0x01, 0x1e, 0x10, 0x0c, 0x90, 0x60, 0x70,
-	0x8c,
-	0xec, 0x68, 0xe1, 0x6d, 0x6d, 0x91, 0x51, 0xed,
-	0x6d, 0x01, 0x1e, 0x10, 0x0c, 0x90, 0x60, 0x70,
-	0x8c,
-	0xec, 0x68, 0xe1, 0x6d, 0x6d, 0x91, 0x51, 0xed,
-	0x6d, 0x01, 0x1e, 0x10, 0x0c, 0x90, 0x60, 0x70,
-	0x8c,
-	0xec, 0x68, 0xe1, 0x6d, 0x6d, 0x91, 0x51, 0xed,
-	0x6d, 0x01, 0x1e, 0x10, 0x0c, 0x90, 0x60, 0x70,
-	0x8c,
-};
-static u32 sizeof_german_old = sizeof(sample_german_old);
-
-static u8 sample_american_dialtone[] = {
-	0x2a, 0x18, 0x90, 0x6c, 0x4c, 0xbc, 0x4c, 0x6c,
-	0x10, 0x58, 0x32, 0xb9, 0x31, 0x2d, 0x8d, 0x0d,
-	0x8d, 0x2d, 0x31, 0x99, 0x0f, 0x28, 0x60, 0xf0,
-	0xd0, 0x50, 0xd0, 0x30, 0x60, 0x08, 0x8e, 0x67,
-	0x09, 0x19, 0x21, 0xe1, 0xd9, 0xb9, 0x29, 0x67,
-	0x83, 0x02, 0xce, 0xbe, 0xee, 0x1a, 0x1b, 0xef,
-	0xbf, 0xcf, 0x03, 0x82, 0x66, 0x28, 0xb8, 0xd8,
-	0xe0, 0x20, 0x18, 0x08, 0x66, 0x8f, 0x09, 0x61,
-	0x31, 0xd1, 0x51, 0xd1, 0xf1, 0x61, 0x29, 0x0e,
-	0x98, 0x30, 0x2c, 0x8c, 0x0c, 0x8c, 0x2c, 0x30,
-	0xb8, 0x33, 0x59, 0x11, 0x6d, 0x4d, 0xbd, 0x4d,
-	0x6d, 0x91, 0x19,
-};
-static u32 sizeof_american_dialtone = sizeof(sample_american_dialtone);
-
-static u8 sample_american_ringing[] = {
-	0x2a, 0xe0, 0xac, 0x0c, 0xbc, 0x4c, 0x8c, 0x90,
-	0x48, 0xc7, 0xc1, 0xed, 0xcd, 0x4d, 0xcd, 0xed,
-	0xc1, 0xb7, 0x08, 0x30, 0xec, 0xcc, 0xcc, 0x8c,
-	0x10, 0x58, 0x1a, 0x99, 0x71, 0xed, 0x8d, 0x8d,
-	0x2d, 0x41, 0x89, 0x9e, 0x20, 0x70, 0x2c, 0xec,
-	0x2c, 0x70, 0x20, 0x86, 0x77, 0xe1, 0x31, 0x11,
-	0xd1, 0xf1, 0x81, 0x09, 0xa3, 0x56, 0x58, 0x00,
-	0x40, 0xc0, 0x60, 0x38, 0x46, 0x43, 0x57, 0x39,
-	0xd9, 0x59, 0x99, 0xc9, 0x77, 0x2f, 0x2e, 0xc6,
-	0xd6, 0x28, 0xd6, 0x36, 0x26, 0x2e, 0x8a, 0xa3,
-	0x43, 0x63, 0x4b, 0x4a, 0x62, 0x42, 0xa2, 0x8b,
-	0x2f, 0x27, 0x37, 0xd7, 0x29, 0xd7, 0xc7, 0x2f,
-	0x2e, 0x76, 0xc8, 0x98, 0x58, 0xd8, 0x38, 0x56,
-	0x42, 0x47, 0x39, 0x61, 0xc1, 0x41, 0x01, 0x59,
-	0x57, 0xa2, 0x08, 0x80, 0xf0, 0xd0, 0x10, 0x30,
-	0xe0, 0x76, 0x87, 0x21, 0x71, 0x2d, 0xed, 0x2d,
-	0x71, 0x21, 0x9f, 0x88, 0x40, 0x2c, 0x8c, 0x8c,
-	0xec, 0x70, 0x98, 0x1b, 0x59, 0x11, 0x8d, 0xcd,
-	0xcd, 0xed, 0x31, 0x09, 0xb6, 0xc0, 0xec, 0xcc,
-	0x4c, 0xcc, 0xec, 0xc0, 0xc6, 0x49, 0x91, 0x8d,
-	0x4d, 0xbd, 0x0d, 0xad, 0xe1,
-};
-static u32 sizeof_american_ringing = sizeof(sample_american_ringing);
-
-static u8 sample_american_busy[] = {
-	0x2a, 0x00, 0x6c, 0x4c, 0x4c, 0x6c, 0xb0, 0x66,
-	0x99, 0x11, 0x6d, 0x8d, 0x2d, 0x41, 0xd7, 0x96,
-	0x60, 0xf0, 0x70, 0x40, 0x58, 0xf6, 0x53, 0x57,
-	0x09, 0x89, 0xd7, 0x5f, 0xe3, 0x2a, 0xe3, 0x5f,
-	0xd7, 0x89, 0x09, 0x57, 0x53, 0xf6, 0x58, 0x40,
-	0x70, 0xf0, 0x60, 0x96, 0xd7, 0x41, 0x2d, 0x8d,
-	0x6d, 0x11, 0x99, 0x66, 0xb0, 0x6c, 0x4c, 0x4c,
-	0x6c, 0x00, 0x2a, 0x01, 0x6d, 0x4d, 0x4d, 0x6d,
-	0xb1, 0x67, 0x98, 0x10, 0x6c, 0x8c, 0x2c, 0x40,
-	0xd6, 0x97, 0x61, 0xf1, 0x71, 0x41, 0x59, 0xf7,
-	0x52, 0x56, 0x08, 0x88, 0xd6, 0x5e, 0xe2, 0x2a,
-	0xe2, 0x5e, 0xd6, 0x88, 0x08, 0x56, 0x52, 0xf7,
-	0x59, 0x41, 0x71, 0xf1, 0x61, 0x97, 0xd6, 0x40,
-	0x2c, 0x8c, 0x6c, 0x10, 0x98, 0x67, 0xb1, 0x6d,
-	0x4d, 0x4d, 0x6d, 0x01,
-};
-static u32 sizeof_american_busy = sizeof(sample_american_busy);
-
-static u8 sample_special1[] = {
-	0x2a, 0x2c, 0xbc, 0x6c, 0xd6, 0x71, 0xbd, 0x0d,
-	0xd9, 0x80, 0xcc, 0x4c, 0x40, 0x39, 0x0d, 0xbd,
-	0x11, 0x86, 0xec, 0xbc, 0xec, 0x0e, 0x51, 0xbd,
-	0x8d, 0x89, 0x30, 0x4c, 0xcc, 0xe0, 0xe1, 0xcd,
-	0x4d, 0x31, 0x88, 0x8c, 0xbc, 0x50, 0x0f, 0xed,
-	0xbd, 0xed, 0x87, 0x10, 0xbc, 0x0c, 0x38, 0x41,
-	0x4d, 0xcd, 0x81, 0xd8, 0x0c, 0xbc, 0x70, 0xd7,
-	0x6d, 0xbd, 0x2d,
-};
-static u32 sizeof_special1 = sizeof(sample_special1);
-
-static u8 sample_special2[] = {
-	0x2a, 0xcc, 0x8c, 0xd7, 0x4d, 0x2d, 0x18, 0xbc,
-	0x10, 0xc1, 0xbd, 0xc1, 0x10, 0xbc, 0x18, 0x2d,
-	0x4d, 0xd7, 0x8c, 0xcc, 0x2a, 0xcd, 0x8d, 0xd6,
-	0x4c, 0x2c, 0x19, 0xbd, 0x11, 0xc0, 0xbc, 0xc0,
-	0x11, 0xbd, 0x19, 0x2c, 0x4c, 0xd6, 0x8d, 0xcd,
-	0x2a, 0xcc, 0x8c, 0xd7, 0x4d, 0x2d, 0x18, 0xbc,
-	0x10, 0xc1, 0xbd, 0xc1, 0x10, 0xbc, 0x18, 0x2d,
-	0x4d, 0xd7, 0x8c, 0xcc, 0x2a, 0xcd, 0x8d, 0xd6,
-	0x4c, 0x2c, 0x19, 0xbd, 0x11, 0xc0, 0xbc, 0xc0,
-	0x11, 0xbd, 0x19, 0x2c, 0x4c, 0xd6, 0x8d, 0xcd,
-};
-static u32 sizeof_special2 = sizeof(sample_special2);
-
-static u8 sample_special3[] = {
-	0x2a, 0xbc, 0x18, 0xcd, 0x11, 0x2c, 0x8c, 0xc1,
-	0x4d, 0xd6, 0xbc, 0xd6, 0x4d, 0xc1, 0x8c, 0x2c,
-	0x11, 0xcd, 0x18, 0xbc, 0x2a, 0xbd, 0x19, 0xcc,
-	0x10, 0x2d, 0x8d, 0xc0, 0x4c, 0xd7, 0xbd, 0xd7,
-	0x4c, 0xc0, 0x8d, 0x2d, 0x10, 0xcc, 0x19, 0xbd,
-	0x2a, 0xbc, 0x18, 0xcd, 0x11, 0x2c, 0x8c, 0xc1,
-	0x4d, 0xd6, 0xbc, 0xd6, 0x4d, 0xc1, 0x8c, 0x2c,
-	0x11, 0xcd, 0x18, 0xbc, 0x2a, 0xbd, 0x19, 0xcc,
-	0x10, 0x2d, 0x8d, 0xc0, 0x4c, 0xd7, 0xbd, 0xd7,
-	0x4c, 0xc0, 0x8d, 0x2d, 0x10, 0xcc, 0x19, 0xbd,
-};
-static u32 sizeof_special3 = sizeof(sample_special3);
-
-static u8 sample_silence[] = {
-	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
-	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
-	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
-	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
-	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
-	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
-	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
-	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
-	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
-	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
-	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
-	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
-};
-static u32 sizeof_silence = sizeof(sample_silence);
-
-struct tones_samples {
-	u32 *len;
-	u8 *data;
-};
-static struct
-tones_samples samples[] = {
-	{&sizeof_german_all, sample_german_all},
-	{&sizeof_german_old, sample_german_old},
-	{&sizeof_american_dialtone, sample_american_dialtone},
-	{&sizeof_american_ringing, sample_american_ringing},
-	{&sizeof_american_busy, sample_american_busy},
-	{&sizeof_special1, sample_special1},
-	{&sizeof_special2, sample_special2},
-	{&sizeof_special3, sample_special3},
-	{NULL, NULL},
-};
-
-/***********************************
- * generate ulaw from alaw samples *
- ***********************************/
-
-void
-dsp_audio_generate_ulaw_samples(void)
-{
-	int i, j;
-
-	i = 0;
-	while (samples[i].len) {
-		j = 0;
-		while (j < (*samples[i].len)) {
-			samples[i].data[j] =
-				dsp_audio_alaw_to_ulaw[samples[i].data[j]];
-			j++;
-		}
-		i++;
-	}
-}
-
-
-/****************************
- * tone sequence definition *
- ****************************/
-
-static struct pattern {
-	int tone;
-	u8 *data[10];
-	u32 *siz[10];
-	u32 seq[10];
-} pattern[] = {
-	{TONE_GERMAN_DIALTONE,
-	 {DATA_GA, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_GA, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {1900, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_GERMAN_OLDDIALTONE,
-	 {DATA_GO, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_GO, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {1998, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_AMERICAN_DIALTONE,
-	 {DATA_DT, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_DT, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {8000, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_GERMAN_DIALPBX,
-	 {DATA_GA, DATA_S, DATA_GA, DATA_S, DATA_GA, DATA_S, NULL, NULL, NULL,
-	  NULL},
-	 {SIZE_GA, SIZE_S, SIZE_GA, SIZE_S, SIZE_GA, SIZE_S, NULL, NULL, NULL,
-	  NULL},
-	 {2000, 2000, 2000, 2000, 2000, 12000, 0, 0, 0, 0} },
-
-	{TONE_GERMAN_OLDDIALPBX,
-	 {DATA_GO, DATA_S, DATA_GO, DATA_S, DATA_GO, DATA_S, NULL, NULL, NULL,
-	  NULL},
-	 {SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, NULL, NULL, NULL,
-	  NULL},
-	 {2000, 2000, 2000, 2000, 2000, 12000, 0, 0, 0, 0} },
-
-	{TONE_AMERICAN_DIALPBX,
-	 {DATA_DT, DATA_S, DATA_DT, DATA_S, DATA_DT, DATA_S, NULL, NULL, NULL,
-	  NULL},
-	 {SIZE_DT, SIZE_S, SIZE_DT, SIZE_S, SIZE_DT, SIZE_S, NULL, NULL, NULL,
-	  NULL},
-	 {2000, 2000, 2000, 2000, 2000, 12000, 0, 0, 0, 0} },
-
-	{TONE_GERMAN_RINGING,
-	 {DATA_GA, DATA_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_GA, SIZE_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {8000, 32000, 0, 0, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_GERMAN_OLDRINGING,
-	 {DATA_GO, DATA_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_GO, SIZE_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {8000, 40000, 0, 0, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_AMERICAN_RINGING,
-	 {DATA_RI, DATA_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_RI, SIZE_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {8000, 32000, 0, 0, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_GERMAN_RINGPBX,
-	 {DATA_GA, DATA_S, DATA_GA, DATA_S, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_GA, SIZE_S, SIZE_GA, SIZE_S, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {4000, 4000, 4000, 28000, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_GERMAN_OLDRINGPBX,
-	 {DATA_GO, DATA_S, DATA_GO, DATA_S, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {4000, 4000, 4000, 28000, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_AMERICAN_RINGPBX,
-	 {DATA_RI, DATA_S, DATA_RI, DATA_S, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_RI, SIZE_S, SIZE_RI, SIZE_S, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {4000, 4000, 4000, 28000, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_GERMAN_BUSY,
-	 {DATA_GA, DATA_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_GA, SIZE_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {4000, 4000, 0, 0, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_GERMAN_OLDBUSY,
-	 {DATA_GO, DATA_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_GO, SIZE_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {1000, 5000, 0, 0, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_AMERICAN_BUSY,
-	 {DATA_BU, DATA_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_BU, SIZE_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {4000, 4000, 0, 0, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_GERMAN_HANGUP,
-	 {DATA_GA, DATA_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_GA, SIZE_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {4000, 4000, 0, 0, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_GERMAN_OLDHANGUP,
-	 {DATA_GO, DATA_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_GO, SIZE_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {1000, 5000, 0, 0, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_AMERICAN_HANGUP,
-	 {DATA_DT, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_DT, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {8000, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_SPECIAL_INFO,
-	 {DATA_S1, DATA_S2, DATA_S3, DATA_S, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_S1, SIZE_S2, SIZE_S3, SIZE_S, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {2666, 2666, 2666, 8002, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_GERMAN_GASSENBESETZT,
-	 {DATA_GA, DATA_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_GA, SIZE_S, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {2000, 2000, 0, 0, 0, 0, 0, 0, 0, 0} },
-
-	{TONE_GERMAN_AUFSCHALTTON,
-	 {DATA_GO, DATA_S, DATA_GO, DATA_S, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {1000, 5000, 1000, 17000, 0, 0, 0, 0, 0, 0} },
-
-	{0,
-	 {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
-	 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
-};
-
-/******************
- * copy tone data *
- ******************/
-
-/* an sk_buff is generated from the number of samples needed.
- * the count will be changed and may begin from 0 each pattern period.
- * the clue is to precalculate the pointers and legths to use only one
- * memcpy per function call, or two memcpy if the tone sequence changes.
- *
- * pattern - the type of the pattern
- * count - the sample from the beginning of the pattern (phase)
- * len - the number of bytes
- *
- * return - the sk_buff with the sample
- *
- * if tones has finished (e.g. knocking tone), dsp->tones is turned off
- */
-void dsp_tone_copy(struct dsp *dsp, u8 *data, int len)
-{
-	int index, count, start, num;
-	struct pattern *pat;
-	struct dsp_tone *tone = &dsp->tone;
-
-	/* if we have no tone, we copy silence */
-	if (!tone->tone) {
-		memset(data, dsp_silence, len);
-		return;
-	}
-
-	/* process pattern */
-	pat = (struct pattern *)tone->pattern;
-	/* points to the current pattern */
-	index = tone->index; /* gives current sequence index */
-	count = tone->count; /* gives current sample */
-
-	/* copy sample */
-	while (len) {
-		/* find sample to start with */
-		while (42) {
-			/* wrap around */
-			if (!pat->seq[index]) {
-				count = 0;
-				index = 0;
-			}
-			/* check if we are currently playing this tone */
-			if (count < pat->seq[index])
-				break;
-			if (dsp_debug & DEBUG_DSP_TONE)
-				printk(KERN_DEBUG "%s: reaching next sequence "
-				       "(index=%d)\n", __func__, index);
-			count -= pat->seq[index];
-			index++;
-		}
-		/* calculate start and number of samples */
-		start = count % (*(pat->siz[index]));
-		num = len;
-		if (num + count > pat->seq[index])
-			num = pat->seq[index] - count;
-		if (num + start > (*(pat->siz[index])))
-			num = (*(pat->siz[index])) - start;
-		/* copy memory */
-		memcpy(data, pat->data[index] + start, num);
-		/* reduce length */
-		data += num;
-		count += num;
-		len -= num;
-	}
-	tone->index = index;
-	tone->count = count;
-
-	/* return sk_buff */
-	return;
-}
-
-
-/*******************************
- * send HW message to hfc card *
- *******************************/
-
-static void
-dsp_tone_hw_message(struct dsp *dsp, u8 *sample, int len)
-{
-	struct sk_buff *nskb;
-
-	/* unlocking is not required, because we don't expect a response */
-	nskb = _alloc_mISDN_skb(PH_CONTROL_REQ,
-				(len) ? HFC_SPL_LOOP_ON : HFC_SPL_LOOP_OFF, len, sample,
-				GFP_ATOMIC);
-	if (nskb) {
-		if (dsp->ch.peer) {
-			if (dsp->ch.recv(dsp->ch.peer, nskb))
-				dev_kfree_skb(nskb);
-		} else
-			dev_kfree_skb(nskb);
-	}
-}
-
-
-/*****************
- * timer expires *
- *****************/
-void
-dsp_tone_timeout(struct timer_list *t)
-{
-	struct dsp *dsp = timer_container_of(dsp, t, tone.tl);
-	struct dsp_tone *tone = &dsp->tone;
-	struct pattern *pat = (struct pattern *)tone->pattern;
-	int index = tone->index;
-
-	if (!tone->tone)
-		return;
-
-	index++;
-	if (!pat->seq[index])
-		index = 0;
-	tone->index = index;
-
-	/* set next tone */
-	if (pat->data[index] == DATA_S)
-		dsp_tone_hw_message(dsp, NULL, 0);
-	else
-		dsp_tone_hw_message(dsp, pat->data[index], *(pat->siz[index]));
-	/* set timer */
-	tone->tl.expires = jiffies + (pat->seq[index] * HZ) / 8000;
-	add_timer(&tone->tl);
-}
-
-
-/********************
- * set/release tone *
- ********************/
-
-/*
- * tones are relaized by streaming or by special loop commands if supported
- * by hardware. when hardware is used, the patterns will be controlled by
- * timers.
- */
-int
-dsp_tone(struct dsp *dsp, int tone)
-{
-	struct pattern *pat;
-	int i;
-	struct dsp_tone *tonet = &dsp->tone;
-
-	tonet->software = 0;
-	tonet->hardware = 0;
-
-	/* we turn off the tone */
-	if (!tone) {
-		if (dsp->features.hfc_loops && timer_pending(&tonet->tl))
-			timer_delete(&tonet->tl);
-		if (dsp->features.hfc_loops)
-			dsp_tone_hw_message(dsp, NULL, 0);
-		tonet->tone = 0;
-		return 0;
-	}
-
-	pat = NULL;
-	i = 0;
-	while (pattern[i].tone) {
-		if (pattern[i].tone == tone) {
-			pat = &pattern[i];
-			break;
-		}
-		i++;
-	}
-	if (!pat) {
-		printk(KERN_WARNING "dsp: given tone 0x%x is invalid\n", tone);
-		return -EINVAL;
-	}
-	if (dsp_debug & DEBUG_DSP_TONE)
-		printk(KERN_DEBUG "%s: now starting tone %d (index=%d)\n",
-		       __func__, tone, 0);
-	tonet->tone = tone;
-	tonet->pattern = pat;
-	tonet->index = 0;
-	tonet->count = 0;
-
-	if (dsp->features.hfc_loops) {
-		tonet->hardware = 1;
-		/* set first tone */
-		dsp_tone_hw_message(dsp, pat->data[0], *(pat->siz[0]));
-		/* set timer */
-		if (timer_pending(&tonet->tl))
-			timer_delete(&tonet->tl);
-		tonet->tl.expires = jiffies + (pat->seq[0] * HZ) / 8000;
-		add_timer(&tonet->tl);
-	} else {
-		tonet->software = 1;
-	}
-
-	return 0;
-}
diff --git a/drivers/isdn/mISDN/fsm.c b/drivers/isdn/mISDN/fsm.c
deleted file mode 100644
index 825b686496d2..000000000000
--- a/drivers/isdn/mISDN/fsm.c
+++ /dev/null
@@ -1,176 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * finite state machine implementation
- *
- * Author       Karsten Keil <kkeil@novell.com>
- *
- * Thanks to    Jan den Ouden
- *              Fritz Elfert
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include "fsm.h"
-
-#define FSM_TIMER_DEBUG 0
-
-int
-mISDN_FsmNew(struct Fsm *fsm,
-	     struct FsmNode *fnlist, int fncount)
-{
-	int i;
-
-	fsm->jumpmatrix =
-		kzalloc(array3_size(sizeof(FSMFNPTR), fsm->state_count,
-				    fsm->event_count),
-			GFP_KERNEL);
-	if (fsm->jumpmatrix == NULL)
-		return -ENOMEM;
-
-	for (i = 0; i < fncount; i++)
-		if ((fnlist[i].state >= fsm->state_count) ||
-		    (fnlist[i].event >= fsm->event_count)) {
-			printk(KERN_ERR
-			       "mISDN_FsmNew Error: %d st(%ld/%ld) ev(%ld/%ld)\n",
-			       i, (long)fnlist[i].state, (long)fsm->state_count,
-			       (long)fnlist[i].event, (long)fsm->event_count);
-		} else
-			fsm->jumpmatrix[fsm->state_count * fnlist[i].event +
-					fnlist[i].state] = (FSMFNPTR) fnlist[i].routine;
-	return 0;
-}
-EXPORT_SYMBOL(mISDN_FsmNew);
-
-void
-mISDN_FsmFree(struct Fsm *fsm)
-{
-	kfree((void *) fsm->jumpmatrix);
-}
-EXPORT_SYMBOL(mISDN_FsmFree);
-
-int
-mISDN_FsmEvent(struct FsmInst *fi, int event, void *arg)
-{
-	FSMFNPTR r;
-
-	if ((fi->state >= fi->fsm->state_count) ||
-	    (event >= fi->fsm->event_count)) {
-		printk(KERN_ERR
-		       "mISDN_FsmEvent Error st(%ld/%ld) ev(%d/%ld)\n",
-		       (long)fi->state, (long)fi->fsm->state_count, event,
-		       (long)fi->fsm->event_count);
-		return 1;
-	}
-	r = fi->fsm->jumpmatrix[fi->fsm->state_count * event + fi->state];
-	if (r) {
-		if (fi->debug)
-			fi->printdebug(fi, "State %s Event %s",
-				       fi->fsm->strState[fi->state],
-				       fi->fsm->strEvent[event]);
-		r(fi, event, arg);
-		return 0;
-	} else {
-		if (fi->debug)
-			fi->printdebug(fi, "State %s Event %s no action",
-				       fi->fsm->strState[fi->state],
-				       fi->fsm->strEvent[event]);
-		return 1;
-	}
-}
-EXPORT_SYMBOL(mISDN_FsmEvent);
-
-void
-mISDN_FsmChangeState(struct FsmInst *fi, int newstate)
-{
-	fi->state = newstate;
-	if (fi->debug)
-		fi->printdebug(fi, "ChangeState %s",
-			       fi->fsm->strState[newstate]);
-}
-EXPORT_SYMBOL(mISDN_FsmChangeState);
-
-static void
-FsmExpireTimer(struct timer_list *t)
-{
-	struct FsmTimer *ft = timer_container_of(ft, t, tl);
-#if FSM_TIMER_DEBUG
-	if (ft->fi->debug)
-		ft->fi->printdebug(ft->fi, "FsmExpireTimer %lx", (long) ft);
-#endif
-	mISDN_FsmEvent(ft->fi, ft->event, ft->arg);
-}
-
-void
-mISDN_FsmInitTimer(struct FsmInst *fi, struct FsmTimer *ft)
-{
-	ft->fi = fi;
-#if FSM_TIMER_DEBUG
-	if (ft->fi->debug)
-		ft->fi->printdebug(ft->fi, "mISDN_FsmInitTimer %lx", (long) ft);
-#endif
-	timer_setup(&ft->tl, FsmExpireTimer, 0);
-}
-EXPORT_SYMBOL(mISDN_FsmInitTimer);
-
-void
-mISDN_FsmDelTimer(struct FsmTimer *ft, int where)
-{
-#if FSM_TIMER_DEBUG
-	if (ft->fi->debug)
-		ft->fi->printdebug(ft->fi, "mISDN_FsmDelTimer %lx %d",
-				   (long) ft, where);
-#endif
-	timer_delete(&ft->tl);
-}
-EXPORT_SYMBOL(mISDN_FsmDelTimer);
-
-int
-mISDN_FsmAddTimer(struct FsmTimer *ft,
-		  int millisec, int event, void *arg, int where)
-{
-
-#if FSM_TIMER_DEBUG
-	if (ft->fi->debug)
-		ft->fi->printdebug(ft->fi, "mISDN_FsmAddTimer %lx %d %d",
-				   (long) ft, millisec, where);
-#endif
-
-	if (timer_pending(&ft->tl)) {
-		if (ft->fi->debug) {
-			printk(KERN_WARNING
-			       "mISDN_FsmAddTimer: timer already active!\n");
-			ft->fi->printdebug(ft->fi,
-					   "mISDN_FsmAddTimer already active!");
-		}
-		return -1;
-	}
-	ft->event = event;
-	ft->arg = arg;
-	ft->tl.expires = jiffies + (millisec * HZ) / 1000;
-	add_timer(&ft->tl);
-	return 0;
-}
-EXPORT_SYMBOL(mISDN_FsmAddTimer);
-
-void
-mISDN_FsmRestartTimer(struct FsmTimer *ft,
-		      int millisec, int event, void *arg, int where)
-{
-
-#if FSM_TIMER_DEBUG
-	if (ft->fi->debug)
-		ft->fi->printdebug(ft->fi, "mISDN_FsmRestartTimer %lx %d %d",
-				   (long) ft, millisec, where);
-#endif
-
-	if (timer_pending(&ft->tl))
-		timer_delete(&ft->tl);
-	ft->event = event;
-	ft->arg = arg;
-	ft->tl.expires = jiffies + (millisec * HZ) / 1000;
-	add_timer(&ft->tl);
-}
-EXPORT_SYMBOL(mISDN_FsmRestartTimer);
diff --git a/drivers/isdn/mISDN/fsm.h b/drivers/isdn/mISDN/fsm.h
deleted file mode 100644
index 211554b997f8..000000000000
--- a/drivers/isdn/mISDN/fsm.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Author       Karsten Keil <kkeil@novell.com>
- *
- * Thanks to    Jan den Ouden
- *              Fritz Elfert
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- */
-
-#ifndef _MISDN_FSM_H
-#define _MISDN_FSM_H
-
-#include <linux/timer.h>
-
-/* Statemachine */
-
-struct FsmInst;
-
-typedef void (*FSMFNPTR)(struct FsmInst *, int, void *);
-
-struct Fsm {
-	FSMFNPTR *jumpmatrix;
-	int state_count, event_count;
-	char **strEvent, **strState;
-};
-
-struct FsmInst {
-	struct Fsm *fsm;
-	int state;
-	int debug;
-	void *userdata;
-	int userint;
-	void (*printdebug) (struct FsmInst *, char *, ...);
-};
-
-struct FsmNode {
-	int state, event;
-	void (*routine) (struct FsmInst *, int, void *);
-};
-
-struct FsmTimer {
-	struct FsmInst *fi;
-	struct timer_list tl;
-	int event;
-	void *arg;
-};
-
-extern int mISDN_FsmNew(struct Fsm *, struct FsmNode *, int);
-extern void mISDN_FsmFree(struct Fsm *);
-extern int mISDN_FsmEvent(struct FsmInst *, int , void *);
-extern void mISDN_FsmChangeState(struct FsmInst *, int);
-extern void mISDN_FsmInitTimer(struct FsmInst *, struct FsmTimer *);
-extern int mISDN_FsmAddTimer(struct FsmTimer *, int, int, void *, int);
-extern void mISDN_FsmRestartTimer(struct FsmTimer *, int, int, void *, int);
-extern void mISDN_FsmDelTimer(struct FsmTimer *, int);
-
-#endif
diff --git a/drivers/isdn/mISDN/hwchannel.c b/drivers/isdn/mISDN/hwchannel.c
deleted file mode 100644
index 8c93af06ed02..000000000000
--- a/drivers/isdn/mISDN/hwchannel.c
+++ /dev/null
@@ -1,516 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *
- * Author	Karsten Keil <kkeil@novell.com>
- *
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- */
-
-#include <linux/gfp.h>
-#include <linux/module.h>
-#include <linux/mISDNhw.h>
-
-static void
-dchannel_bh(struct work_struct *ws)
-{
-	struct dchannel	*dch  = container_of(ws, struct dchannel, workq);
-	struct sk_buff	*skb;
-	int		err;
-
-	if (test_and_clear_bit(FLG_RECVQUEUE, &dch->Flags)) {
-		while ((skb = skb_dequeue(&dch->rqueue))) {
-			if (likely(dch->dev.D.peer)) {
-				err = dch->dev.D.recv(dch->dev.D.peer, skb);
-				if (err)
-					dev_kfree_skb(skb);
-			} else
-				dev_kfree_skb(skb);
-		}
-	}
-	if (test_and_clear_bit(FLG_PHCHANGE, &dch->Flags)) {
-		if (dch->phfunc)
-			dch->phfunc(dch);
-	}
-}
-
-static void
-bchannel_bh(struct work_struct *ws)
-{
-	struct bchannel	*bch  = container_of(ws, struct bchannel, workq);
-	struct sk_buff	*skb;
-	int		err;
-
-	if (test_and_clear_bit(FLG_RECVQUEUE, &bch->Flags)) {
-		while ((skb = skb_dequeue(&bch->rqueue))) {
-			bch->rcount--;
-			if (likely(bch->ch.peer)) {
-				err = bch->ch.recv(bch->ch.peer, skb);
-				if (err)
-					dev_kfree_skb(skb);
-			} else
-				dev_kfree_skb(skb);
-		}
-	}
-}
-
-int
-mISDN_initdchannel(struct dchannel *ch, int maxlen, void *phf)
-{
-	test_and_set_bit(FLG_HDLC, &ch->Flags);
-	ch->maxlen = maxlen;
-	ch->hw = NULL;
-	ch->rx_skb = NULL;
-	ch->tx_skb = NULL;
-	ch->tx_idx = 0;
-	ch->phfunc = phf;
-	skb_queue_head_init(&ch->squeue);
-	skb_queue_head_init(&ch->rqueue);
-	INIT_LIST_HEAD(&ch->dev.bchannels);
-	INIT_WORK(&ch->workq, dchannel_bh);
-	return 0;
-}
-EXPORT_SYMBOL(mISDN_initdchannel);
-
-int
-mISDN_initbchannel(struct bchannel *ch, unsigned short maxlen,
-		   unsigned short minlen)
-{
-	ch->Flags = 0;
-	ch->minlen = minlen;
-	ch->next_minlen = minlen;
-	ch->init_minlen = minlen;
-	ch->maxlen = maxlen;
-	ch->next_maxlen = maxlen;
-	ch->init_maxlen = maxlen;
-	ch->hw = NULL;
-	ch->rx_skb = NULL;
-	ch->tx_skb = NULL;
-	ch->tx_idx = 0;
-	skb_queue_head_init(&ch->rqueue);
-	ch->rcount = 0;
-	ch->next_skb = NULL;
-	INIT_WORK(&ch->workq, bchannel_bh);
-	return 0;
-}
-EXPORT_SYMBOL(mISDN_initbchannel);
-
-int
-mISDN_freedchannel(struct dchannel *ch)
-{
-	if (ch->tx_skb) {
-		dev_kfree_skb(ch->tx_skb);
-		ch->tx_skb = NULL;
-	}
-	if (ch->rx_skb) {
-		dev_kfree_skb(ch->rx_skb);
-		ch->rx_skb = NULL;
-	}
-	skb_queue_purge(&ch->squeue);
-	skb_queue_purge(&ch->rqueue);
-	flush_work(&ch->workq);
-	return 0;
-}
-EXPORT_SYMBOL(mISDN_freedchannel);
-
-void
-mISDN_clear_bchannel(struct bchannel *ch)
-{
-	if (ch->tx_skb) {
-		dev_kfree_skb(ch->tx_skb);
-		ch->tx_skb = NULL;
-	}
-	ch->tx_idx = 0;
-	if (ch->rx_skb) {
-		dev_kfree_skb(ch->rx_skb);
-		ch->rx_skb = NULL;
-	}
-	if (ch->next_skb) {
-		dev_kfree_skb(ch->next_skb);
-		ch->next_skb = NULL;
-	}
-	test_and_clear_bit(FLG_TX_BUSY, &ch->Flags);
-	test_and_clear_bit(FLG_TX_NEXT, &ch->Flags);
-	test_and_clear_bit(FLG_ACTIVE, &ch->Flags);
-	test_and_clear_bit(FLG_FILLEMPTY, &ch->Flags);
-	test_and_clear_bit(FLG_TX_EMPTY, &ch->Flags);
-	test_and_clear_bit(FLG_RX_OFF, &ch->Flags);
-	ch->dropcnt = 0;
-	ch->minlen = ch->init_minlen;
-	ch->next_minlen = ch->init_minlen;
-	ch->maxlen = ch->init_maxlen;
-	ch->next_maxlen = ch->init_maxlen;
-	skb_queue_purge(&ch->rqueue);
-	ch->rcount = 0;
-}
-EXPORT_SYMBOL(mISDN_clear_bchannel);
-
-void
-mISDN_freebchannel(struct bchannel *ch)
-{
-	cancel_work_sync(&ch->workq);
-	mISDN_clear_bchannel(ch);
-}
-EXPORT_SYMBOL(mISDN_freebchannel);
-
-int
-mISDN_ctrl_bchannel(struct bchannel *bch, struct mISDN_ctrl_req *cq)
-{
-	int ret = 0;
-
-	switch (cq->op) {
-	case MISDN_CTRL_GETOP:
-		cq->op = MISDN_CTRL_RX_BUFFER | MISDN_CTRL_FILL_EMPTY |
-			 MISDN_CTRL_RX_OFF;
-		break;
-	case MISDN_CTRL_FILL_EMPTY:
-		if (cq->p1) {
-			memset(bch->fill, cq->p2 & 0xff, MISDN_BCH_FILL_SIZE);
-			test_and_set_bit(FLG_FILLEMPTY, &bch->Flags);
-		} else {
-			test_and_clear_bit(FLG_FILLEMPTY, &bch->Flags);
-		}
-		break;
-	case MISDN_CTRL_RX_OFF:
-		/* read back dropped byte count */
-		cq->p2 = bch->dropcnt;
-		if (cq->p1)
-			test_and_set_bit(FLG_RX_OFF, &bch->Flags);
-		else
-			test_and_clear_bit(FLG_RX_OFF, &bch->Flags);
-		bch->dropcnt = 0;
-		break;
-	case MISDN_CTRL_RX_BUFFER:
-		if (cq->p2 > MISDN_CTRL_RX_SIZE_IGNORE)
-			bch->next_maxlen = cq->p2;
-		if (cq->p1 > MISDN_CTRL_RX_SIZE_IGNORE)
-			bch->next_minlen = cq->p1;
-		/* we return the old values */
-		cq->p1 = bch->minlen;
-		cq->p2 = bch->maxlen;
-		break;
-	default:
-		pr_info("mISDN unhandled control %x operation\n", cq->op);
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-EXPORT_SYMBOL(mISDN_ctrl_bchannel);
-
-static inline u_int
-get_sapi_tei(u_char *p)
-{
-	u_int	sapi, tei;
-
-	sapi = *p >> 2;
-	tei = p[1] >> 1;
-	return sapi | (tei << 8);
-}
-
-void
-recv_Dchannel(struct dchannel *dch)
-{
-	struct mISDNhead *hh;
-
-	if (dch->rx_skb->len < 2) { /* at least 2 for sapi / tei */
-		dev_kfree_skb(dch->rx_skb);
-		dch->rx_skb = NULL;
-		return;
-	}
-	hh = mISDN_HEAD_P(dch->rx_skb);
-	hh->prim = PH_DATA_IND;
-	hh->id = get_sapi_tei(dch->rx_skb->data);
-	skb_queue_tail(&dch->rqueue, dch->rx_skb);
-	dch->rx_skb = NULL;
-	schedule_event(dch, FLG_RECVQUEUE);
-}
-EXPORT_SYMBOL(recv_Dchannel);
-
-void
-recv_Echannel(struct dchannel *ech, struct dchannel *dch)
-{
-	struct mISDNhead *hh;
-
-	if (ech->rx_skb->len < 2) { /* at least 2 for sapi / tei */
-		dev_kfree_skb(ech->rx_skb);
-		ech->rx_skb = NULL;
-		return;
-	}
-	hh = mISDN_HEAD_P(ech->rx_skb);
-	hh->prim = PH_DATA_E_IND;
-	hh->id = get_sapi_tei(ech->rx_skb->data);
-	skb_queue_tail(&dch->rqueue, ech->rx_skb);
-	ech->rx_skb = NULL;
-	schedule_event(dch, FLG_RECVQUEUE);
-}
-EXPORT_SYMBOL(recv_Echannel);
-
-void
-recv_Bchannel(struct bchannel *bch, unsigned int id, bool force)
-{
-	struct mISDNhead *hh;
-
-	/* if allocation did fail upper functions still may call us */
-	if (unlikely(!bch->rx_skb))
-		return;
-	if (unlikely(!bch->rx_skb->len)) {
-		/* we have no data to send - this may happen after recovery
-		 * from overflow or too small allocation.
-		 * We need to free the buffer here */
-		dev_kfree_skb(bch->rx_skb);
-		bch->rx_skb = NULL;
-	} else {
-		if (test_bit(FLG_TRANSPARENT, &bch->Flags) &&
-		    (bch->rx_skb->len < bch->minlen) && !force)
-				return;
-		hh = mISDN_HEAD_P(bch->rx_skb);
-		hh->prim = PH_DATA_IND;
-		hh->id = id;
-		if (bch->rcount >= 64) {
-			printk(KERN_WARNING
-			       "B%d receive queue overflow - flushing!\n",
-			       bch->nr);
-			skb_queue_purge(&bch->rqueue);
-		}
-		bch->rcount++;
-		skb_queue_tail(&bch->rqueue, bch->rx_skb);
-		bch->rx_skb = NULL;
-		schedule_event(bch, FLG_RECVQUEUE);
-	}
-}
-EXPORT_SYMBOL(recv_Bchannel);
-
-void
-recv_Dchannel_skb(struct dchannel *dch, struct sk_buff *skb)
-{
-	skb_queue_tail(&dch->rqueue, skb);
-	schedule_event(dch, FLG_RECVQUEUE);
-}
-EXPORT_SYMBOL(recv_Dchannel_skb);
-
-void
-recv_Bchannel_skb(struct bchannel *bch, struct sk_buff *skb)
-{
-	if (bch->rcount >= 64) {
-		printk(KERN_WARNING "B-channel %p receive queue overflow, "
-		       "flushing!\n", bch);
-		skb_queue_purge(&bch->rqueue);
-		bch->rcount = 0;
-	}
-	bch->rcount++;
-	skb_queue_tail(&bch->rqueue, skb);
-	schedule_event(bch, FLG_RECVQUEUE);
-}
-EXPORT_SYMBOL(recv_Bchannel_skb);
-
-static void
-confirm_Dsend(struct dchannel *dch)
-{
-	struct sk_buff	*skb;
-
-	skb = _alloc_mISDN_skb(PH_DATA_CNF, mISDN_HEAD_ID(dch->tx_skb),
-			       0, NULL, GFP_ATOMIC);
-	if (!skb) {
-		printk(KERN_ERR "%s: no skb id %x\n", __func__,
-		       mISDN_HEAD_ID(dch->tx_skb));
-		return;
-	}
-	skb_queue_tail(&dch->rqueue, skb);
-	schedule_event(dch, FLG_RECVQUEUE);
-}
-
-int
-get_next_dframe(struct dchannel *dch)
-{
-	dch->tx_idx = 0;
-	dch->tx_skb = skb_dequeue(&dch->squeue);
-	if (dch->tx_skb) {
-		confirm_Dsend(dch);
-		return 1;
-	}
-	dch->tx_skb = NULL;
-	test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
-	return 0;
-}
-EXPORT_SYMBOL(get_next_dframe);
-
-static void
-confirm_Bsend(struct bchannel *bch)
-{
-	struct sk_buff	*skb;
-
-	if (bch->rcount >= 64) {
-		printk(KERN_WARNING "B-channel %p receive queue overflow, "
-		       "flushing!\n", bch);
-		skb_queue_purge(&bch->rqueue);
-		bch->rcount = 0;
-	}
-	skb = _alloc_mISDN_skb(PH_DATA_CNF, mISDN_HEAD_ID(bch->tx_skb),
-			       0, NULL, GFP_ATOMIC);
-	if (!skb) {
-		printk(KERN_ERR "%s: no skb id %x\n", __func__,
-		       mISDN_HEAD_ID(bch->tx_skb));
-		return;
-	}
-	bch->rcount++;
-	skb_queue_tail(&bch->rqueue, skb);
-	schedule_event(bch, FLG_RECVQUEUE);
-}
-
-int
-get_next_bframe(struct bchannel *bch)
-{
-	bch->tx_idx = 0;
-	if (test_bit(FLG_TX_NEXT, &bch->Flags)) {
-		bch->tx_skb = bch->next_skb;
-		if (bch->tx_skb) {
-			bch->next_skb = NULL;
-			test_and_clear_bit(FLG_TX_NEXT, &bch->Flags);
-			/* confirm imediately to allow next data */
-			confirm_Bsend(bch);
-			return 1;
-		} else {
-			test_and_clear_bit(FLG_TX_NEXT, &bch->Flags);
-			printk(KERN_WARNING "B TX_NEXT without skb\n");
-		}
-	}
-	bch->tx_skb = NULL;
-	test_and_clear_bit(FLG_TX_BUSY, &bch->Flags);
-	return 0;
-}
-EXPORT_SYMBOL(get_next_bframe);
-
-void
-queue_ch_frame(struct mISDNchannel *ch, u_int pr, int id, struct sk_buff *skb)
-{
-	struct mISDNhead *hh;
-
-	if (!skb) {
-		_queue_data(ch, pr, id, 0, NULL, GFP_ATOMIC);
-	} else {
-		if (ch->peer) {
-			hh = mISDN_HEAD_P(skb);
-			hh->prim = pr;
-			hh->id = id;
-			if (!ch->recv(ch->peer, skb))
-				return;
-		}
-		dev_kfree_skb(skb);
-	}
-}
-EXPORT_SYMBOL(queue_ch_frame);
-
-int
-dchannel_senddata(struct dchannel *ch, struct sk_buff *skb)
-{
-	/* check oversize */
-	if (skb->len <= 0) {
-		printk(KERN_WARNING "%s: skb too small\n", __func__);
-		return -EINVAL;
-	}
-	if (skb->len > ch->maxlen) {
-		printk(KERN_WARNING "%s: skb too large(%d/%d)\n",
-		       __func__, skb->len, ch->maxlen);
-		return -EINVAL;
-	}
-	/* HW lock must be obtained */
-	if (test_and_set_bit(FLG_TX_BUSY, &ch->Flags)) {
-		skb_queue_tail(&ch->squeue, skb);
-		return 0;
-	} else {
-		/* write to fifo */
-		ch->tx_skb = skb;
-		ch->tx_idx = 0;
-		return 1;
-	}
-}
-EXPORT_SYMBOL(dchannel_senddata);
-
-int
-bchannel_senddata(struct bchannel *ch, struct sk_buff *skb)
-{
-
-	/* check oversize */
-	if (skb->len <= 0) {
-		printk(KERN_WARNING "%s: skb too small\n", __func__);
-		return -EINVAL;
-	}
-	if (skb->len > ch->maxlen) {
-		printk(KERN_WARNING "%s: skb too large(%d/%d)\n",
-		       __func__, skb->len, ch->maxlen);
-		return -EINVAL;
-	}
-	/* HW lock must be obtained */
-	/* check for pending next_skb */
-	if (ch->next_skb) {
-		printk(KERN_WARNING
-		       "%s: next_skb exist ERROR (skb->len=%d next_skb->len=%d)\n",
-		       __func__, skb->len, ch->next_skb->len);
-		return -EBUSY;
-	}
-	if (test_and_set_bit(FLG_TX_BUSY, &ch->Flags)) {
-		test_and_set_bit(FLG_TX_NEXT, &ch->Flags);
-		ch->next_skb = skb;
-		return 0;
-	} else {
-		/* write to fifo */
-		ch->tx_skb = skb;
-		ch->tx_idx = 0;
-		confirm_Bsend(ch);
-		return 1;
-	}
-}
-EXPORT_SYMBOL(bchannel_senddata);
-
-/* The function allocates a new receive skb on demand with a size for the
- * requirements of the current protocol. It returns the tailroom of the
- * receive skb or an error.
- */
-int
-bchannel_get_rxbuf(struct bchannel *bch, int reqlen)
-{
-	int len;
-
-	if (bch->rx_skb) {
-		len = skb_tailroom(bch->rx_skb);
-		if (len < reqlen) {
-			pr_warn("B%d no space for %d (only %d) bytes\n",
-				bch->nr, reqlen, len);
-			if (test_bit(FLG_TRANSPARENT, &bch->Flags)) {
-				/* send what we have now and try a new buffer */
-				recv_Bchannel(bch, 0, true);
-			} else {
-				/* on HDLC we have to drop too big frames */
-				return -EMSGSIZE;
-			}
-		} else {
-			return len;
-		}
-	}
-	/* update current min/max length first */
-	if (unlikely(bch->maxlen != bch->next_maxlen))
-		bch->maxlen = bch->next_maxlen;
-	if (unlikely(bch->minlen != bch->next_minlen))
-		bch->minlen = bch->next_minlen;
-	if (unlikely(reqlen > bch->maxlen))
-		return -EMSGSIZE;
-	if (test_bit(FLG_TRANSPARENT, &bch->Flags)) {
-		if (reqlen >= bch->minlen) {
-			len = reqlen;
-		} else {
-			len = 2 * bch->minlen;
-			if (len > bch->maxlen)
-				len = bch->maxlen;
-		}
-	} else {
-		/* with HDLC we do not know the length yet */
-		len = bch->maxlen;
-	}
-	bch->rx_skb = mI_alloc_skb(len, GFP_ATOMIC);
-	if (!bch->rx_skb) {
-		pr_warn("B%d receive no memory for %d bytes\n", bch->nr, len);
-		len = -ENOMEM;
-	}
-	return len;
-}
-EXPORT_SYMBOL(bchannel_get_rxbuf);
diff --git a/drivers/isdn/mISDN/l1oip.h b/drivers/isdn/mISDN/l1oip.h
deleted file mode 100644
index 48133d022812..000000000000
--- a/drivers/isdn/mISDN/l1oip.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * see notice in l1oip.c
- */
-
-/* debugging */
-#define DEBUG_L1OIP_INIT	0x00010000
-#define DEBUG_L1OIP_SOCKET	0x00020000
-#define DEBUG_L1OIP_MGR		0x00040000
-#define DEBUG_L1OIP_MSG		0x00080000
-
-/* enable to disorder received bchannels by sequence 2143658798... */
-/*
-  #define REORDER_DEBUG
-*/
-
-/* frames */
-#define L1OIP_MAX_LEN		2048		/* max packet size form l2 */
-#define L1OIP_MAX_PERFRAME	1400		/* max data size in one frame */
-
-
-/* timers */
-#define L1OIP_KEEPALIVE		15
-#define L1OIP_TIMEOUT		65
-
-
-/* socket */
-#define L1OIP_DEFAULTPORT	931
-
-
-/* channel structure */
-struct l1oip_chan {
-	struct dchannel		*dch;
-	struct bchannel		*bch;
-	u32			tx_counter;	/* counts xmit bytes/packets */
-	u32			rx_counter;	/* counts recv bytes/packets */
-	u32			codecstate;	/* used by codec to save data */
-#ifdef REORDER_DEBUG
-	int			disorder_flag;
-	struct sk_buff		*disorder_skb;
-	u32			disorder_cnt;
-#endif
-};
-
-
-/* card structure */
-struct l1oip {
-	struct list_head        list;
-
-	/* card */
-	int			registered;	/* if registered with mISDN */
-	char			name[MISDN_MAX_IDLEN];
-	int			idx;		/* card index */
-	int			pri;		/* 1=pri, 0=bri */
-	int			d_idx;		/* current dchannel number */
-	int			b_num;		/* number of bchannels */
-	u32			id;		/* id of connection */
-	int			ondemand;	/* if transmis. is on demand */
-	int			bundle;		/* bundle channels in one frm */
-	int			codec;		/* codec to use for transmis. */
-	int			limit;		/* limit number of bchannels */
-	bool			shutdown;	/* if card is released */
-
-	/* timer */
-	struct timer_list	keep_tl;
-	struct timer_list	timeout_tl;
-	int			timeout_on;
-	struct work_struct	workq;
-
-	/* socket */
-	struct socket		*socket;	/* if set, socket is created */
-	struct completion	socket_complete;/* completion of sock thread */
-	struct task_struct	*socket_thread;
-	spinlock_t		socket_lock;	/* access sock outside thread */
-	u32			remoteip;	/* if all set, ip is assigned */
-	u16			localport;	/* must always be set */
-	u16			remoteport;	/* must always be set */
-	struct sockaddr_in	sin_local;	/* local socket name */
-	struct sockaddr_in	sin_remote;	/* remote socket name */
-	struct msghdr		sendmsg;	/* ip message to send */
-	struct kvec		sendiov;	/* iov for message */
-
-	/* frame */
-	struct l1oip_chan	chan[128];	/* channel instances */
-};
-
-extern int l1oip_law_to_4bit(u8 *data, int len, u8 *result, u32 *state);
-extern int l1oip_4bit_to_law(u8 *data, int len, u8 *result);
-extern int l1oip_alaw_to_ulaw(u8 *data, int len, u8 *result);
-extern int l1oip_ulaw_to_alaw(u8 *data, int len, u8 *result);
-extern void l1oip_4bit_free(void);
-extern int l1oip_4bit_alloc(int ulaw);
diff --git a/drivers/isdn/mISDN/l1oip_codec.c b/drivers/isdn/mISDN/l1oip_codec.c
deleted file mode 100644
index 1059234fbc67..000000000000
--- a/drivers/isdn/mISDN/l1oip_codec.c
+++ /dev/null
@@ -1,358 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
-
- * l1oip_codec.c  generic codec using lookup table
- *  -> conversion from a-Law to u-Law
- *  -> conversion from u-Law to a-Law
- *  -> compression by reducing the number of sample resolution to 4
- *
- * NOTE: It is not compatible with any standard codec like ADPCM.
- *
- * Author	Andreas Eversberg (jolly@eversberg.eu)
- *
-
- */
-
-/*
-
-  How the codec works:
-  --------------------
-
-  The volume is increased to increase the dynamic range of the audio signal.
-  Each sample is converted to a-LAW with only 16 steps of level resolution.
-  A pair of two samples are stored in one byte.
-
-  The first byte is stored in the upper bits, the second byte is stored in the
-  lower bits.
-
-  To speed up compression and decompression, two lookup tables are formed:
-
-  - 16 bits index for two samples (law encoded) with 8 bit compressed result.
-  - 8 bits index for one compressed data with 16 bits decompressed result.
-
-  NOTE: The bytes are handled as they are law-encoded.
-
-*/
-
-#include <linux/vmalloc.h>
-#include <linux/mISDNif.h>
-#include <linux/in.h>
-#include "core.h"
-#include "l1oip.h"
-
-/* definitions of codec. don't use calculations, code may run slower. */
-
-static u8 *table_com;
-static u16 *table_dec;
-
-
-/* alaw -> ulaw */
-static u8 alaw_to_ulaw[256] =
-{
-	0xab, 0x2b, 0xe3, 0x63, 0x8b, 0x0b, 0xc9, 0x49,
-	0xba, 0x3a, 0xf6, 0x76, 0x9b, 0x1b, 0xd7, 0x57,
-	0xa3, 0x23, 0xdd, 0x5d, 0x83, 0x03, 0xc1, 0x41,
-	0xb2, 0x32, 0xeb, 0x6b, 0x93, 0x13, 0xcf, 0x4f,
-	0xaf, 0x2f, 0xe7, 0x67, 0x8f, 0x0f, 0xcd, 0x4d,
-	0xbe, 0x3e, 0xfe, 0x7e, 0x9f, 0x1f, 0xdb, 0x5b,
-	0xa7, 0x27, 0xdf, 0x5f, 0x87, 0x07, 0xc5, 0x45,
-	0xb6, 0x36, 0xef, 0x6f, 0x97, 0x17, 0xd3, 0x53,
-	0xa9, 0x29, 0xe1, 0x61, 0x89, 0x09, 0xc7, 0x47,
-	0xb8, 0x38, 0xf2, 0x72, 0x99, 0x19, 0xd5, 0x55,
-	0xa1, 0x21, 0xdc, 0x5c, 0x81, 0x01, 0xbf, 0x3f,
-	0xb0, 0x30, 0xe9, 0x69, 0x91, 0x11, 0xce, 0x4e,
-	0xad, 0x2d, 0xe5, 0x65, 0x8d, 0x0d, 0xcb, 0x4b,
-	0xbc, 0x3c, 0xfa, 0x7a, 0x9d, 0x1d, 0xd9, 0x59,
-	0xa5, 0x25, 0xde, 0x5e, 0x85, 0x05, 0xc3, 0x43,
-	0xb4, 0x34, 0xed, 0x6d, 0x95, 0x15, 0xd1, 0x51,
-	0xac, 0x2c, 0xe4, 0x64, 0x8c, 0x0c, 0xca, 0x4a,
-	0xbb, 0x3b, 0xf8, 0x78, 0x9c, 0x1c, 0xd8, 0x58,
-	0xa4, 0x24, 0xde, 0x5e, 0x84, 0x04, 0xc2, 0x42,
-	0xb3, 0x33, 0xec, 0x6c, 0x94, 0x14, 0xd0, 0x50,
-	0xb0, 0x30, 0xe8, 0x68, 0x90, 0x10, 0xce, 0x4e,
-	0xbf, 0x3f, 0xfe, 0x7e, 0xa0, 0x20, 0xdc, 0x5c,
-	0xa8, 0x28, 0xe0, 0x60, 0x88, 0x08, 0xc6, 0x46,
-	0xb7, 0x37, 0xf0, 0x70, 0x98, 0x18, 0xd4, 0x54,
-	0xaa, 0x2a, 0xe2, 0x62, 0x8a, 0x0a, 0xc8, 0x48,
-	0xb9, 0x39, 0xf4, 0x74, 0x9a, 0x1a, 0xd6, 0x56,
-	0xa2, 0x22, 0xdd, 0x5d, 0x82, 0x02, 0xc0, 0x40,
-	0xb1, 0x31, 0xea, 0x6a, 0x92, 0x12, 0xcf, 0x4f,
-	0xae, 0x2e, 0xe6, 0x66, 0x8e, 0x0e, 0xcc, 0x4c,
-	0xbd, 0x3d, 0xfc, 0x7c, 0x9e, 0x1e, 0xda, 0x5a,
-	0xa6, 0x26, 0xdf, 0x5f, 0x86, 0x06, 0xc4, 0x44,
-	0xb5, 0x35, 0xee, 0x6e, 0x96, 0x16, 0xd2, 0x52
-};
-
-/* ulaw -> alaw */
-static u8 ulaw_to_alaw[256] =
-{
-	0xab, 0x55, 0xd5, 0x15, 0x95, 0x75, 0xf5, 0x35,
-	0xb5, 0x45, 0xc5, 0x05, 0x85, 0x65, 0xe5, 0x25,
-	0xa5, 0x5d, 0xdd, 0x1d, 0x9d, 0x7d, 0xfd, 0x3d,
-	0xbd, 0x4d, 0xcd, 0x0d, 0x8d, 0x6d, 0xed, 0x2d,
-	0xad, 0x51, 0xd1, 0x11, 0x91, 0x71, 0xf1, 0x31,
-	0xb1, 0x41, 0xc1, 0x01, 0x81, 0x61, 0xe1, 0x21,
-	0x59, 0xd9, 0x19, 0x99, 0x79, 0xf9, 0x39, 0xb9,
-	0x49, 0xc9, 0x09, 0x89, 0x69, 0xe9, 0x29, 0xa9,
-	0xd7, 0x17, 0x97, 0x77, 0xf7, 0x37, 0xb7, 0x47,
-	0xc7, 0x07, 0x87, 0x67, 0xe7, 0x27, 0xa7, 0xdf,
-	0x9f, 0x7f, 0xff, 0x3f, 0xbf, 0x4f, 0xcf, 0x0f,
-	0x8f, 0x6f, 0xef, 0x2f, 0x53, 0x13, 0x73, 0x33,
-	0xb3, 0x43, 0xc3, 0x03, 0x83, 0x63, 0xe3, 0x23,
-	0xa3, 0x5b, 0xdb, 0x1b, 0x9b, 0x7b, 0xfb, 0x3b,
-	0xbb, 0xbb, 0x4b, 0x4b, 0xcb, 0xcb, 0x0b, 0x0b,
-	0x8b, 0x8b, 0x6b, 0x6b, 0xeb, 0xeb, 0x2b, 0x2b,
-	0xab, 0x54, 0xd4, 0x14, 0x94, 0x74, 0xf4, 0x34,
-	0xb4, 0x44, 0xc4, 0x04, 0x84, 0x64, 0xe4, 0x24,
-	0xa4, 0x5c, 0xdc, 0x1c, 0x9c, 0x7c, 0xfc, 0x3c,
-	0xbc, 0x4c, 0xcc, 0x0c, 0x8c, 0x6c, 0xec, 0x2c,
-	0xac, 0x50, 0xd0, 0x10, 0x90, 0x70, 0xf0, 0x30,
-	0xb0, 0x40, 0xc0, 0x00, 0x80, 0x60, 0xe0, 0x20,
-	0x58, 0xd8, 0x18, 0x98, 0x78, 0xf8, 0x38, 0xb8,
-	0x48, 0xc8, 0x08, 0x88, 0x68, 0xe8, 0x28, 0xa8,
-	0xd6, 0x16, 0x96, 0x76, 0xf6, 0x36, 0xb6, 0x46,
-	0xc6, 0x06, 0x86, 0x66, 0xe6, 0x26, 0xa6, 0xde,
-	0x9e, 0x7e, 0xfe, 0x3e, 0xbe, 0x4e, 0xce, 0x0e,
-	0x8e, 0x6e, 0xee, 0x2e, 0x52, 0x12, 0x72, 0x32,
-	0xb2, 0x42, 0xc2, 0x02, 0x82, 0x62, 0xe2, 0x22,
-	0xa2, 0x5a, 0xda, 0x1a, 0x9a, 0x7a, 0xfa, 0x3a,
-	0xba, 0xba, 0x4a, 0x4a, 0xca, 0xca, 0x0a, 0x0a,
-	0x8a, 0x8a, 0x6a, 0x6a, 0xea, 0xea, 0x2a, 0x2a
-};
-
-/* alaw -> 4bit compression */
-static u8 alaw_to_4bit[256] = {
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0c, 0x03,
-	0x0d, 0x02, 0x08, 0x07, 0x0f, 0x00, 0x0b, 0x04,
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0c, 0x03,
-	0x0d, 0x02, 0x09, 0x06, 0x0f, 0x00, 0x0b, 0x04,
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0c, 0x03,
-	0x0d, 0x02, 0x08, 0x07, 0x0f, 0x00, 0x0b, 0x04,
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0c, 0x03,
-	0x0d, 0x02, 0x09, 0x06, 0x0f, 0x00, 0x0b, 0x04,
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0c, 0x03,
-	0x0d, 0x02, 0x08, 0x07, 0x0f, 0x00, 0x0b, 0x04,
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0d, 0x02,
-	0x0e, 0x02, 0x09, 0x06, 0x0f, 0x00, 0x0b, 0x04,
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0c, 0x03,
-	0x0d, 0x02, 0x08, 0x07, 0x0f, 0x00, 0x0b, 0x04,
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0c, 0x03,
-	0x0d, 0x02, 0x09, 0x06, 0x0f, 0x00, 0x0b, 0x04,
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0c, 0x03,
-	0x0d, 0x02, 0x08, 0x07, 0x0f, 0x00, 0x0b, 0x04,
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0c, 0x03,
-	0x0d, 0x02, 0x09, 0x06, 0x0f, 0x00, 0x0b, 0x04,
-	0x0e, 0x02, 0x09, 0x06, 0x0f, 0x00, 0x0b, 0x04,
-	0x0d, 0x02, 0x08, 0x07, 0x0f, 0x01, 0x0a, 0x05,
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0c, 0x03,
-	0x0d, 0x02, 0x09, 0x07, 0x0f, 0x00, 0x0b, 0x04,
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0c, 0x03,
-	0x0d, 0x02, 0x08, 0x07, 0x0f, 0x00, 0x0b, 0x04,
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0c, 0x03,
-	0x0d, 0x02, 0x09, 0x06, 0x0f, 0x00, 0x0b, 0x04,
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0c, 0x03,
-	0x0d, 0x02, 0x08, 0x07, 0x0f, 0x00, 0x0b, 0x04,
-	0x0e, 0x01, 0x0a, 0x05, 0x0f, 0x00, 0x0c, 0x03,
-	0x0d, 0x02, 0x09, 0x06, 0x0f, 0x00, 0x0b, 0x04,
-};
-
-/* 4bit -> alaw decompression */
-static u8 _4bit_to_alaw[16] = {
-	0x5d, 0x51, 0xd9, 0xd7, 0x5f, 0x53, 0xa3, 0x4b,
-	0x2a, 0x3a, 0x22, 0x2e, 0x26, 0x56, 0x20, 0x2c,
-};
-
-/* ulaw -> 4bit compression */
-static u8 ulaw_to_4bit[256] = {
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
-	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04,
-	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
-	0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05,
-	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
-	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
-	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
-	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x08,
-	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-	0x0f, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e,
-	0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e,
-	0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
-	0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
-	0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
-	0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0b, 0x0b,
-	0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
-	0x0b, 0x0b, 0x0b, 0x0b, 0x0a, 0x0a, 0x0a, 0x0a,
-	0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
-	0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
-	0x09, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-};
-
-/* 4bit -> ulaw decompression */
-static u8 _4bit_to_ulaw[16] = {
-	0x11, 0x21, 0x31, 0x40, 0x4e, 0x5c, 0x68, 0x71,
-	0xfe, 0xef, 0xe7, 0xdb, 0xcd, 0xbf, 0xaf, 0x9f,
-};
-
-
-/*
- * Compresses data to the result buffer
- * The result size must be at least half of the input buffer.
- * The number of samples also must be even!
- */
-int
-l1oip_law_to_4bit(u8 *data, int len, u8 *result, u32 *state)
-{
-	int ii, i = 0, o = 0;
-
-	if (!len)
-		return 0;
-
-	/* send saved byte and first input byte */
-	if (*state) {
-		*result++ = table_com[(((*state) << 8) & 0xff00) | (*data++)];
-		len--;
-		o++;
-	}
-
-	ii = len >> 1;
-
-	while (i < ii) {
-		*result++ = table_com[(data[0]<<8) | (data[1])];
-		data += 2;
-		i++;
-		o++;
-	}
-
-	/* if len has an odd number, we save byte for next call */
-	if (len & 1)
-		*state = 0x100 + *data;
-	else
-		*state = 0;
-
-	return o;
-}
-
-/* Decompress data to the result buffer
- * The result size must be the number of sample in packet. (2 * input data)
- * The number of samples in the result are even!
- */
-int
-l1oip_4bit_to_law(u8 *data, int len, u8 *result)
-{
-	int i = 0;
-	u16 r;
-
-	while (i < len) {
-		r = table_dec[*data++];
-		*result++ = r >> 8;
-		*result++ = r;
-		i++;
-	}
-
-	return len << 1;
-}
-
-
-/*
- * law conversion
- */
-int
-l1oip_alaw_to_ulaw(u8 *data, int len, u8 *result)
-{
-	int i = 0;
-
-	while (i < len) {
-		*result++ = alaw_to_ulaw[*data++];
-		i++;
-	}
-
-	return len;
-}
-
-int
-l1oip_ulaw_to_alaw(u8 *data, int len, u8 *result)
-{
-	int i = 0;
-
-	while (i < len) {
-		*result++ = ulaw_to_alaw[*data++];
-		i++;
-	}
-
-	return len;
-}
-
-
-/*
- * generate/free compression and decompression table
- */
-void
-l1oip_4bit_free(void)
-{
-	vfree(table_dec);
-	vfree(table_com);
-	table_com = NULL;
-	table_dec = NULL;
-}
-
-int
-l1oip_4bit_alloc(int ulaw)
-{
-	int i1, i2, c, sample;
-
-	/* in case, it is called again */
-	if (table_dec)
-		return 0;
-
-	/* alloc conversion tables */
-	table_com = vzalloc(65536);
-	table_dec = vzalloc(512);
-	if (!table_com || !table_dec) {
-		l1oip_4bit_free();
-		return -ENOMEM;
-	}
-	/* generate compression table */
-	i1 = 0;
-	while (i1 < 256) {
-		if (ulaw)
-			c = ulaw_to_4bit[i1];
-		else
-			c = alaw_to_4bit[i1];
-		i2 = 0;
-		while (i2 < 256) {
-			table_com[(i1 << 8) | i2] |= (c << 4);
-			table_com[(i2 << 8) | i1] |= c;
-			i2++;
-		}
-		i1++;
-	}
-
-	/* generate decompression table */
-	i1 = 0;
-	while (i1 < 16) {
-		if (ulaw)
-			sample = _4bit_to_ulaw[i1];
-		else
-			sample = _4bit_to_alaw[i1];
-		i2 = 0;
-		while (i2 < 16) {
-			table_dec[(i1 << 4) | i2] |= (sample << 8);
-			table_dec[(i2 << 4) | i1] |= sample;
-			i2++;
-		}
-		i1++;
-	}
-
-	return 0;
-}
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
deleted file mode 100644
index 6866a0d6b382..000000000000
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ /dev/null
@@ -1,1505 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
-
- * l1oip.c  low level driver for tunneling layer 1 over IP
- *
- * NOTE: It is not compatible with TDMoIP nor "ISDN over IP".
- *
- * Author	Andreas Eversberg (jolly@eversberg.eu)
- */
-
-/* module parameters:
- * type:
- Value 1	= BRI
- Value 2	= PRI
- Value 3 = BRI (multi channel frame, not supported yet)
- Value 4 = PRI (multi channel frame, not supported yet)
- A multi channel frame reduces overhead to a single frame for all
- b-channels, but increases delay.
- (NOTE: Multi channel frames are not implemented yet.)
-
- * codec:
- Value 0 = transparent (default)
- Value 1 = transfer ALAW
- Value 2 = transfer ULAW
- Value 3 = transfer generic 4 bit compression.
-
- * ulaw:
- 0 = we use a-Law (default)
- 1 = we use u-Law
-
- * limit:
- limitation of B-channels to control bandwidth (1...126)
- BRI: 1 or 2
- PRI: 1-30, 31-126 (126, because dchannel ist not counted here)
- Also limited ressources are used for stack, resulting in less channels.
- It is possible to have more channels than 30 in PRI mode, this must
- be supported by the application.
-
- * ip:
- byte representation of remote ip address (127.0.0.1 -> 127,0,0,1)
- If not given or four 0, no remote address is set.
- For multiple interfaces, concat ip addresses. (127,0,0,1,127,0,0,1)
-
- * port:
- port number (local interface)
- If not given or 0, port 931 is used for fist instance, 932 for next...
- For multiple interfaces, different ports must be given.
-
- * remoteport:
- port number (remote interface)
- If not given or 0, remote port equals local port
- For multiple interfaces on equal sites, different ports must be given.
-
- * ondemand:
- 0 = fixed (always transmit packets, even when remote side timed out)
- 1 = on demand (only transmit packets, when remote side is detected)
- the default is 0
- NOTE: ID must also be set for on demand.
-
- * id:
- optional value to identify frames. This value must be equal on both
- peers and should be random. If omitted or 0, no ID is transmitted.
-
- * debug:
- NOTE: only one debug value must be given for all cards
- enable debugging (see l1oip.h for debug options)
-
-
- Special mISDN controls:
-
- op = MISDN_CTRL_SETPEER*
- p1 = bytes 0-3 : remote IP address in network order (left element first)
- p2 = bytes 1-2 : remote port in network order (high byte first)
- optional:
- p2 = bytes 3-4 : local port in network order (high byte first)
-
- op = MISDN_CTRL_UNSETPEER*
-
- * Use l1oipctrl for comfortable setting or removing ip address.
- (Layer 1 Over IP CTRL)
-
-
- L1oIP-Protocol
- --------------
-
- Frame Header:
-
- 7 6 5 4 3 2 1 0
- +---------------+
- |Ver|T|I|Coding |
- +---------------+
- |  ID byte 3 *  |
- +---------------+
- |  ID byte 2 *  |
- +---------------+
- |  ID byte 1 *  |
- +---------------+
- |  ID byte 0 *  |
- +---------------+
- |M|   Channel   |
- +---------------+
- |    Length *   |
- +---------------+
- | Time Base MSB |
- +---------------+
- | Time Base LSB |
- +---------------+
- | Data....	|
-
- ...
-
- |               |
- +---------------+
- |M|   Channel   |
- +---------------+
- |    Length *   |
- +---------------+
- | Time Base MSB |
- +---------------+
- | Time Base LSB |
- +---------------+
- | Data....	|
-
- ...
-
-
- * Only included in some cases.
-
- - Ver = Version
- If version is missmatch, the frame must be ignored.
-
- - T = Type of interface
- Must be 0 for S0 or 1 for E1.
-
- - I = Id present
- If bit is set, four ID bytes are included in frame.
-
- - ID = Connection ID
- Additional ID to prevent Denial of Service attacs. Also it prevents hijacking
- connections with dynamic IP. The ID should be random and must not be 0.
-
- - Coding = Type of codec
- Must be 0 for no transcoding. Also for D-channel and other HDLC frames.
- 1 and 2 are reserved for explicitly use of a-LAW or u-LAW codec.
- 3 is used for generic table compressor.
-
- - M = More channels to come. If this flag is 1, the following byte contains
- the length of the channel data. After the data block, the next channel will
- be defined. The flag for the last channel block (or if only one channel is
- transmitted), must be 0 and no length is given.
-
- - Channel = Channel number
- 0 reserved
- 1-3 channel data for S0 (3 is D-channel)
- 1-31 channel data for E1 (16 is D-channel)
- 32-127 channel data for extended E1 (16 is D-channel)
-
- - The length is used if the M-flag is 1. It is used to find the next channel
- inside frame.
- NOTE: A value of 0 equals 256 bytes of data.
- -> For larger data blocks, a single frame must be used.
- -> For larger streams, a single frame or multiple blocks with same channel ID
- must be used.
-
- - Time Base = Timestamp of first sample in frame
- The "Time Base" is used to rearange packets and to detect packet loss.
- The 16 bits are sent in network order (MSB first) and count 1/8000 th of a
- second. This causes a wrap around each 8,192 seconds. There is no requirement
- for the initial "Time Base", but 0 should be used for the first packet.
- In case of HDLC data, this timestamp counts the packet or byte number.
-
-
- Two Timers:
-
- After initialisation, a timer of 15 seconds is started. Whenever a packet is
- transmitted, the timer is reset to 15 seconds again. If the timer expires, an
- empty packet is transmitted. This keep the connection alive.
-
- When a valid packet is received, a timer 65 seconds is started. The interface
- become ACTIVE. If the timer expires, the interface becomes INACTIVE.
-
-
- Dynamic IP handling:
-
- To allow dynamic IP, the ID must be non 0. In this case, any packet with the
- correct port number and ID will be accepted. If the remote side changes its IP
- the new IP is used for all transmitted packets until it changes again.
-
-
- On Demand:
-
- If the ondemand parameter is given, the remote IP is set to 0 on timeout.
- This will stop keepalive traffic to remote. If the remote is online again,
- traffic will continue to the remote address. This is useful for road warriors.
- This feature only works with ID set, otherwhise it is highly unsecure.
-
-
- Socket and Thread
- -----------------
-
- The complete socket opening and closing is done by a thread.
- When the thread opened a socket, the hc->socket descriptor is set. Whenever a
- packet shall be sent to the socket, the hc->socket must be checked whether not
- NULL. To prevent change in socket descriptor, the hc->socket_lock must be used.
- To change the socket, a recall of l1oip_socket_open() will safely kill the
- socket process and create a new one.
-
-*/
-
-#define L1OIP_VERSION	0	/* 0...3 */
-
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/mISDNif.h>
-#include <linux/mISDNhw.h>
-#include <linux/mISDNdsp.h>
-#include <linux/init.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/workqueue.h>
-#include <linux/kthread.h>
-#include <linux/slab.h>
-#include <linux/sched/signal.h>
-
-#include <net/sock.h>
-#include "core.h"
-#include "l1oip.h"
-
-static const char *l1oip_revision = "2.00";
-
-static int l1oip_cnt;
-static DEFINE_SPINLOCK(l1oip_lock);
-static LIST_HEAD(l1oip_ilist);
-
-#define MAX_CARDS	16
-static u_int type[MAX_CARDS];
-static u_int codec[MAX_CARDS];
-static u_int ip[MAX_CARDS * 4];
-static u_int port[MAX_CARDS];
-static u_int remoteport[MAX_CARDS];
-static u_int ondemand[MAX_CARDS];
-static u_int limit[MAX_CARDS];
-static u_int id[MAX_CARDS];
-static int debug;
-static int ulaw;
-
-MODULE_AUTHOR("Andreas Eversberg");
-MODULE_DESCRIPTION("mISDN driver for tunneling layer 1 over IP");
-MODULE_LICENSE("GPL");
-module_param_array(type, uint, NULL, S_IRUGO | S_IWUSR);
-module_param_array(codec, uint, NULL, S_IRUGO | S_IWUSR);
-module_param_array(ip, uint, NULL, S_IRUGO | S_IWUSR);
-module_param_array(port, uint, NULL, S_IRUGO | S_IWUSR);
-module_param_array(remoteport, uint, NULL, S_IRUGO | S_IWUSR);
-module_param_array(ondemand, uint, NULL, S_IRUGO | S_IWUSR);
-module_param_array(limit, uint, NULL, S_IRUGO | S_IWUSR);
-module_param_array(id, uint, NULL, S_IRUGO | S_IWUSR);
-module_param(ulaw, uint, S_IRUGO | S_IWUSR);
-module_param(debug, uint, S_IRUGO | S_IWUSR);
-
-/*
- * send a frame via socket, if open and restart timer
- */
-static int
-l1oip_socket_send(struct l1oip *hc, u8 localcodec, u8 channel, u32 chanmask,
-		  u16 timebase, u8 *buf, int len)
-{
-	u8 *p;
-	u8 frame[MAX_DFRAME_LEN_L1 + 32];
-	struct socket *socket = NULL;
-
-	if (debug & DEBUG_L1OIP_MSG)
-		printk(KERN_DEBUG "%s: sending data to socket (len = %d)\n",
-		       __func__, len);
-
-	p = frame;
-
-	/* restart timer */
-	if (time_before(hc->keep_tl.expires, jiffies + 5 * HZ) && !hc->shutdown)
-		mod_timer(&hc->keep_tl, jiffies + L1OIP_KEEPALIVE * HZ);
-	else
-		hc->keep_tl.expires = jiffies + L1OIP_KEEPALIVE * HZ;
-
-	if (debug & DEBUG_L1OIP_MSG)
-		printk(KERN_DEBUG "%s: resetting timer\n", __func__);
-
-	/* drop if we have no remote ip or port */
-	if (!hc->sin_remote.sin_addr.s_addr || !hc->sin_remote.sin_port) {
-		if (debug & DEBUG_L1OIP_MSG)
-			printk(KERN_DEBUG "%s: dropping frame, because remote "
-			       "IP is not set.\n", __func__);
-		return len;
-	}
-
-	/* assemble frame */
-	*p++ = (L1OIP_VERSION << 6) /* version and coding */
-		| (hc->pri ? 0x20 : 0x00) /* type */
-		| (hc->id ? 0x10 : 0x00) /* id */
-		| localcodec;
-	if (hc->id) {
-		*p++ = hc->id >> 24; /* id */
-		*p++ = hc->id >> 16;
-		*p++ = hc->id >> 8;
-		*p++ = hc->id;
-	}
-	*p++ =  0x00 + channel; /* m-flag, channel */
-	*p++ = timebase >> 8; /* time base */
-	*p++ = timebase;
-
-	if (buf && len) { /* add data to frame */
-		if (localcodec == 1 && ulaw)
-			l1oip_ulaw_to_alaw(buf, len, p);
-		else if (localcodec == 2 && !ulaw)
-			l1oip_alaw_to_ulaw(buf, len, p);
-		else if (localcodec == 3)
-			len = l1oip_law_to_4bit(buf, len, p,
-						&hc->chan[channel].codecstate);
-		else
-			memcpy(p, buf, len);
-	}
-	len += p - frame;
-
-	/* check for socket in safe condition */
-	spin_lock(&hc->socket_lock);
-	if (!hc->socket) {
-		spin_unlock(&hc->socket_lock);
-		return 0;
-	}
-	/* seize socket */
-	socket = hc->socket;
-	hc->socket = NULL;
-	spin_unlock(&hc->socket_lock);
-	/* send packet */
-	if (debug & DEBUG_L1OIP_MSG)
-		printk(KERN_DEBUG "%s: sending packet to socket (len "
-		       "= %d)\n", __func__, len);
-	hc->sendiov.iov_base = frame;
-	hc->sendiov.iov_len  = len;
-	len = kernel_sendmsg(socket, &hc->sendmsg, &hc->sendiov, 1, len);
-	/* give socket back */
-	hc->socket = socket; /* no locking required */
-
-	return len;
-}
-
-
-/*
- * receive channel data from socket
- */
-static void
-l1oip_socket_recv(struct l1oip *hc, u8 remotecodec, u8 channel, u16 timebase,
-		  u8 *buf, int len)
-{
-	struct sk_buff *nskb;
-	struct bchannel *bch;
-	struct dchannel *dch;
-	u8 *p;
-	u32 rx_counter;
-
-	if (len == 0) {
-		if (debug & DEBUG_L1OIP_MSG)
-			printk(KERN_DEBUG "%s: received empty keepalive data, "
-			       "ignoring\n", __func__);
-		return;
-	}
-
-	if (debug & DEBUG_L1OIP_MSG)
-		printk(KERN_DEBUG "%s: received data, sending to mISDN (%d)\n",
-		       __func__, len);
-
-	if (channel < 1 || channel > 127) {
-		printk(KERN_WARNING "%s: packet error - channel %d out of "
-		       "range\n", __func__, channel);
-		return;
-	}
-	dch = hc->chan[channel].dch;
-	bch = hc->chan[channel].bch;
-	if (!dch && !bch) {
-		printk(KERN_WARNING "%s: packet error - channel %d not in "
-		       "stack\n", __func__, channel);
-		return;
-	}
-
-	/* prepare message */
-	nskb = mI_alloc_skb((remotecodec == 3) ? (len << 1) : len, GFP_ATOMIC);
-	if (!nskb) {
-		printk(KERN_ERR "%s: No mem for skb.\n", __func__);
-		return;
-	}
-	p = skb_put(nskb, (remotecodec == 3) ? (len << 1) : len);
-
-	if (remotecodec == 1 && ulaw)
-		l1oip_alaw_to_ulaw(buf, len, p);
-	else if (remotecodec == 2 && !ulaw)
-		l1oip_ulaw_to_alaw(buf, len, p);
-	else if (remotecodec == 3)
-		len = l1oip_4bit_to_law(buf, len, p);
-	else
-		memcpy(p, buf, len);
-
-	/* send message up */
-	if (dch && len >= 2) {
-		dch->rx_skb = nskb;
-		recv_Dchannel(dch);
-	}
-	if (bch) {
-		/* expand 16 bit sequence number to 32 bit sequence number */
-		rx_counter = hc->chan[channel].rx_counter;
-		if (((s16)(timebase - rx_counter)) >= 0) {
-			/* time has changed forward */
-			if (timebase >= (rx_counter & 0xffff))
-				rx_counter =
-					(rx_counter & 0xffff0000) | timebase;
-			else
-				rx_counter = ((rx_counter & 0xffff0000) + 0x10000)
-					| timebase;
-		} else {
-			/* time has changed backwards */
-			if (timebase < (rx_counter & 0xffff))
-				rx_counter =
-					(rx_counter & 0xffff0000) | timebase;
-			else
-				rx_counter = ((rx_counter & 0xffff0000) - 0x10000)
-					| timebase;
-		}
-		hc->chan[channel].rx_counter = rx_counter;
-
-#ifdef REORDER_DEBUG
-		if (hc->chan[channel].disorder_flag) {
-			swap(hc->chan[channel].disorder_skb, nskb);
-			swap(hc->chan[channel].disorder_cnt, rx_counter);
-		}
-		hc->chan[channel].disorder_flag ^= 1;
-		if (nskb)
-#endif
-			queue_ch_frame(&bch->ch, PH_DATA_IND, rx_counter, nskb);
-	}
-}
-
-
-/*
- * parse frame and extract channel data
- */
-static void
-l1oip_socket_parse(struct l1oip *hc, struct sockaddr_in *sin, u8 *buf, int len)
-{
-	u32			packet_id;
-	u8			channel;
-	u8			remotecodec;
-	u16			timebase;
-	int			m, mlen;
-	int			len_start = len; /* initial frame length */
-	struct dchannel		*dch = hc->chan[hc->d_idx].dch;
-
-	if (debug & DEBUG_L1OIP_MSG)
-		printk(KERN_DEBUG "%s: received frame, parsing... (%d)\n",
-		       __func__, len);
-
-	/* check length */
-	if (len < 1 + 1 + 2) {
-		printk(KERN_WARNING "%s: packet error - length %d below "
-		       "4 bytes\n", __func__, len);
-		return;
-	}
-
-	/* check version */
-	if (((*buf) >> 6) != L1OIP_VERSION) {
-		printk(KERN_WARNING "%s: packet error - unknown version %d\n",
-		       __func__, buf[0]>>6);
-		return;
-	}
-
-	/* check type */
-	if (((*buf) & 0x20) && !hc->pri) {
-		printk(KERN_WARNING "%s: packet error - received E1 packet "
-		       "on S0 interface\n", __func__);
-		return;
-	}
-	if (!((*buf) & 0x20) && hc->pri) {
-		printk(KERN_WARNING "%s: packet error - received S0 packet "
-		       "on E1 interface\n", __func__);
-		return;
-	}
-
-	/* get id flag */
-	packet_id = (*buf >> 4) & 1;
-
-	/* check coding */
-	remotecodec = (*buf) & 0x0f;
-	if (remotecodec > 3) {
-		printk(KERN_WARNING "%s: packet error - remotecodec %d "
-		       "unsupported\n", __func__, remotecodec);
-		return;
-	}
-	buf++;
-	len--;
-
-	/* check packet_id */
-	if (packet_id) {
-		if (!hc->id) {
-			printk(KERN_WARNING "%s: packet error - packet has id "
-			       "0x%x, but we have not\n", __func__, packet_id);
-			return;
-		}
-		if (len < 4) {
-			printk(KERN_WARNING "%s: packet error - packet too "
-			       "short for ID value\n", __func__);
-			return;
-		}
-		packet_id = (*buf++) << 24;
-		packet_id += (*buf++) << 16;
-		packet_id += (*buf++) << 8;
-		packet_id += (*buf++);
-		len -= 4;
-
-		if (packet_id != hc->id) {
-			printk(KERN_WARNING "%s: packet error - ID mismatch, "
-			       "got 0x%x, we 0x%x\n",
-			       __func__, packet_id, hc->id);
-			return;
-		}
-	} else {
-		if (hc->id) {
-			printk(KERN_WARNING "%s: packet error - packet has no "
-			       "ID, but we have\n", __func__);
-			return;
-		}
-	}
-
-multiframe:
-	if (len < 1) {
-		printk(KERN_WARNING "%s: packet error - packet too short, "
-		       "channel expected at position %d.\n",
-		       __func__, len-len_start + 1);
-		return;
-	}
-
-	/* get channel and multiframe flag */
-	channel = *buf & 0x7f;
-	m = *buf >> 7;
-	buf++;
-	len--;
-
-	/* check length on multiframe */
-	if (m) {
-		if (len < 1) {
-			printk(KERN_WARNING "%s: packet error - packet too "
-			       "short, length expected at position %d.\n",
-			       __func__, len_start - len - 1);
-			return;
-		}
-
-		mlen = *buf++;
-		len--;
-		if (mlen == 0)
-			mlen = 256;
-		if (len < mlen + 3) {
-			printk(KERN_WARNING "%s: packet error - length %d at "
-			       "position %d exceeds total length %d.\n",
-			       __func__, mlen, len_start-len - 1, len_start);
-			return;
-		}
-		if (len == mlen + 3) {
-			printk(KERN_WARNING "%s: packet error - length %d at "
-			       "position %d will not allow additional "
-			       "packet.\n",
-			       __func__, mlen, len_start-len + 1);
-			return;
-		}
-	} else
-		mlen = len - 2; /* single frame, subtract timebase */
-
-	if (len < 2) {
-		printk(KERN_WARNING "%s: packet error - packet too short, time "
-		       "base expected at position %d.\n",
-		       __func__, len-len_start + 1);
-		return;
-	}
-
-	/* get time base */
-	timebase = (*buf++) << 8;
-	timebase |= (*buf++);
-	len -= 2;
-
-	/* if inactive, we send up a PH_ACTIVATE and activate */
-	if (!test_bit(FLG_ACTIVE, &dch->Flags)) {
-		if (debug & (DEBUG_L1OIP_MSG | DEBUG_L1OIP_SOCKET))
-			printk(KERN_DEBUG "%s: interface become active due to "
-			       "received packet\n", __func__);
-		test_and_set_bit(FLG_ACTIVE, &dch->Flags);
-		_queue_data(&dch->dev.D, PH_ACTIVATE_IND, MISDN_ID_ANY, 0,
-			    NULL, GFP_ATOMIC);
-	}
-
-	/* distribute packet */
-	l1oip_socket_recv(hc, remotecodec, channel, timebase, buf, mlen);
-	buf += mlen;
-	len -= mlen;
-
-	/* multiframe */
-	if (m)
-		goto multiframe;
-
-	/* restart timer */
-	if ((time_before(hc->timeout_tl.expires, jiffies + 5 * HZ) ||
-	     !hc->timeout_on) &&
-	    !hc->shutdown) {
-		hc->timeout_on = 1;
-		mod_timer(&hc->timeout_tl, jiffies + L1OIP_TIMEOUT * HZ);
-	} else /* only adjust timer */
-		hc->timeout_tl.expires = jiffies + L1OIP_TIMEOUT * HZ;
-
-	/* if ip or source port changes */
-	if ((hc->sin_remote.sin_addr.s_addr != sin->sin_addr.s_addr)
-	    || (hc->sin_remote.sin_port != sin->sin_port)) {
-		if (debug & DEBUG_L1OIP_SOCKET)
-			printk(KERN_DEBUG "%s: remote address changes from "
-			       "0x%08x to 0x%08x (port %d to %d)\n", __func__,
-			       ntohl(hc->sin_remote.sin_addr.s_addr),
-			       ntohl(sin->sin_addr.s_addr),
-			       ntohs(hc->sin_remote.sin_port),
-			       ntohs(sin->sin_port));
-		hc->sin_remote.sin_addr.s_addr = sin->sin_addr.s_addr;
-		hc->sin_remote.sin_port = sin->sin_port;
-	}
-}
-
-
-/*
- * socket stuff
- */
-static int
-l1oip_socket_thread(void *data)
-{
-	struct l1oip *hc = (struct l1oip *)data;
-	int ret = 0;
-	struct sockaddr_in sin_rx;
-	struct kvec iov;
-	struct msghdr msg = {.msg_name = &sin_rx,
-			     .msg_namelen = sizeof(sin_rx)};
-	unsigned char *recvbuf;
-	size_t recvbuf_size = 1500;
-	int recvlen;
-	struct socket *socket = NULL;
-	DECLARE_COMPLETION_ONSTACK(wait);
-
-	/* allocate buffer memory */
-	recvbuf = kmalloc(recvbuf_size, GFP_KERNEL);
-	if (!recvbuf) {
-		printk(KERN_ERR "%s: Failed to alloc recvbuf.\n", __func__);
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	iov.iov_base = recvbuf;
-	iov.iov_len = recvbuf_size;
-
-	/* make daemon */
-	allow_signal(SIGTERM);
-
-	/* create socket */
-	if (sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &socket)) {
-		printk(KERN_ERR "%s: Failed to create socket.\n", __func__);
-		ret = -EIO;
-		goto fail;
-	}
-
-	/* set incoming address */
-	hc->sin_local.sin_family = AF_INET;
-	hc->sin_local.sin_addr.s_addr = INADDR_ANY;
-	hc->sin_local.sin_port = htons((unsigned short)hc->localport);
-
-	/* set outgoing address */
-	hc->sin_remote.sin_family = AF_INET;
-	hc->sin_remote.sin_addr.s_addr = htonl(hc->remoteip);
-	hc->sin_remote.sin_port = htons((unsigned short)hc->remoteport);
-
-	/* bind to incoming port */
-	if (socket->ops->bind(socket, (struct sockaddr_unsized *)&hc->sin_local,
-			      sizeof(hc->sin_local))) {
-		printk(KERN_ERR "%s: Failed to bind socket to port %d.\n",
-		       __func__, hc->localport);
-		ret = -EINVAL;
-		goto fail;
-	}
-
-	/* check sk */
-	if (socket->sk == NULL) {
-		printk(KERN_ERR "%s: socket->sk == NULL\n", __func__);
-		ret = -EIO;
-		goto fail;
-	}
-
-	/* build send message */
-	hc->sendmsg.msg_name = &hc->sin_remote;
-	hc->sendmsg.msg_namelen = sizeof(hc->sin_remote);
-	hc->sendmsg.msg_control = NULL;
-	hc->sendmsg.msg_controllen = 0;
-
-	/* give away socket */
-	spin_lock(&hc->socket_lock);
-	hc->socket = socket;
-	spin_unlock(&hc->socket_lock);
-
-	/* read loop */
-	if (debug & DEBUG_L1OIP_SOCKET)
-		printk(KERN_DEBUG "%s: socket created and open\n",
-		       __func__);
-	while (!signal_pending(current)) {
-		iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, recvbuf_size);
-		recvlen = sock_recvmsg(socket, &msg, 0);
-		if (recvlen > 0) {
-			l1oip_socket_parse(hc, &sin_rx, recvbuf, recvlen);
-		} else {
-			if (debug & DEBUG_L1OIP_SOCKET)
-				printk(KERN_WARNING
-				       "%s: broken pipe on socket\n", __func__);
-		}
-	}
-
-	/* get socket back, check first if in use, maybe by send function */
-	spin_lock(&hc->socket_lock);
-	/* if hc->socket is NULL, it is in use until it is given back */
-	while (!hc->socket) {
-		spin_unlock(&hc->socket_lock);
-		schedule_timeout(HZ / 10);
-		spin_lock(&hc->socket_lock);
-	}
-	hc->socket = NULL;
-	spin_unlock(&hc->socket_lock);
-
-	if (debug & DEBUG_L1OIP_SOCKET)
-		printk(KERN_DEBUG "%s: socket thread terminating\n",
-		       __func__);
-
-fail:
-	/* free recvbuf */
-	kfree(recvbuf);
-
-	/* close socket */
-	if (socket)
-		sock_release(socket);
-
-	/* if we got killed, signal completion */
-	complete(&hc->socket_complete);
-	hc->socket_thread = NULL; /* show termination of thread */
-
-	if (debug & DEBUG_L1OIP_SOCKET)
-		printk(KERN_DEBUG "%s: socket thread terminated\n",
-		       __func__);
-	return ret;
-}
-
-static void
-l1oip_socket_close(struct l1oip *hc)
-{
-	struct dchannel *dch = hc->chan[hc->d_idx].dch;
-
-	/* kill thread */
-	if (hc->socket_thread) {
-		if (debug & DEBUG_L1OIP_SOCKET)
-			printk(KERN_DEBUG "%s: socket thread exists, "
-			       "killing...\n", __func__);
-		send_sig(SIGTERM, hc->socket_thread, 0);
-		wait_for_completion(&hc->socket_complete);
-	}
-
-	/* if active, we send up a PH_DEACTIVATE and deactivate */
-	if (test_bit(FLG_ACTIVE, &dch->Flags)) {
-		if (debug & (DEBUG_L1OIP_MSG | DEBUG_L1OIP_SOCKET))
-			printk(KERN_DEBUG "%s: interface become deactivated "
-			       "due to timeout\n", __func__);
-		test_and_clear_bit(FLG_ACTIVE, &dch->Flags);
-		_queue_data(&dch->dev.D, PH_DEACTIVATE_IND, MISDN_ID_ANY, 0,
-			    NULL, GFP_ATOMIC);
-	}
-}
-
-static int
-l1oip_socket_open(struct l1oip *hc)
-{
-	/* in case of reopen, we need to close first */
-	l1oip_socket_close(hc);
-
-	init_completion(&hc->socket_complete);
-
-	/* create receive process */
-	hc->socket_thread = kthread_run(l1oip_socket_thread, hc, "l1oip_%s",
-					hc->name);
-	if (IS_ERR(hc->socket_thread)) {
-		int err = PTR_ERR(hc->socket_thread);
-		printk(KERN_ERR "%s: Failed (%d) to create socket process.\n",
-		       __func__, err);
-		hc->socket_thread = NULL;
-		sock_release(hc->socket);
-		return err;
-	}
-	if (debug & DEBUG_L1OIP_SOCKET)
-		printk(KERN_DEBUG "%s: socket thread created\n", __func__);
-
-	return 0;
-}
-
-
-static void
-l1oip_send_bh(struct work_struct *work)
-{
-	struct l1oip *hc = container_of(work, struct l1oip, workq);
-
-	if (debug & (DEBUG_L1OIP_MSG | DEBUG_L1OIP_SOCKET))
-		printk(KERN_DEBUG "%s: keepalive timer expired, sending empty "
-		       "frame on dchannel\n", __func__);
-
-	/* send an empty l1oip frame at D-channel */
-	l1oip_socket_send(hc, 0, hc->d_idx, 0, 0, NULL, 0);
-}
-
-
-/*
- * timer stuff
- */
-static void
-l1oip_keepalive(struct timer_list *t)
-{
-	struct l1oip *hc = timer_container_of(hc, t, keep_tl);
-
-	schedule_work(&hc->workq);
-}
-
-static void
-l1oip_timeout(struct timer_list *t)
-{
-	struct l1oip			*hc = timer_container_of(hc, t,
-								     timeout_tl);
-	struct dchannel		*dch = hc->chan[hc->d_idx].dch;
-
-	if (debug & DEBUG_L1OIP_MSG)
-		printk(KERN_DEBUG "%s: timeout timer expired, turn layer one "
-		       "down.\n", __func__);
-
-	hc->timeout_on = 0; /* state that timer must be initialized next time */
-
-	/* if timeout, we send up a PH_DEACTIVATE and deactivate */
-	if (test_bit(FLG_ACTIVE, &dch->Flags)) {
-		if (debug & (DEBUG_L1OIP_MSG | DEBUG_L1OIP_SOCKET))
-			printk(KERN_DEBUG "%s: interface become deactivated "
-			       "due to timeout\n", __func__);
-		test_and_clear_bit(FLG_ACTIVE, &dch->Flags);
-		_queue_data(&dch->dev.D, PH_DEACTIVATE_IND, MISDN_ID_ANY, 0,
-			    NULL, GFP_ATOMIC);
-	}
-
-	/* if we have ondemand set, we remove ip address */
-	if (hc->ondemand) {
-		if (debug & DEBUG_L1OIP_MSG)
-			printk(KERN_DEBUG "%s: on demand causes ip address to "
-			       "be removed\n", __func__);
-		hc->sin_remote.sin_addr.s_addr = 0;
-	}
-}
-
-
-/*
- * message handling
- */
-static int
-handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
-	struct l1oip			*hc = dch->hw;
-	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
-	int			ret = -EINVAL;
-	int			l, ll;
-	unsigned char		*p;
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		if (skb->len < 1) {
-			printk(KERN_WARNING "%s: skb too small\n",
-			       __func__);
-			break;
-		}
-		if (skb->len > MAX_DFRAME_LEN_L1 || skb->len > L1OIP_MAX_LEN) {
-			printk(KERN_WARNING "%s: skb too large\n",
-			       __func__);
-			break;
-		}
-		/* send frame */
-		p = skb->data;
-		l = skb->len;
-		while (l) {
-			/*
-			 * This is technically bounded by L1OIP_MAX_PERFRAME but
-			 * MAX_DFRAME_LEN_L1 < L1OIP_MAX_PERFRAME
-			 */
-			ll = (l < MAX_DFRAME_LEN_L1) ? l : MAX_DFRAME_LEN_L1;
-			l1oip_socket_send(hc, 0, dch->slot, 0,
-					  hc->chan[dch->slot].tx_counter++, p, ll);
-			p += ll;
-			l -= ll;
-		}
-		skb_trim(skb, 0);
-		queue_ch_frame(ch, PH_DATA_CNF, hh->id, skb);
-		return 0;
-	case PH_ACTIVATE_REQ:
-		if (debug & (DEBUG_L1OIP_MSG | DEBUG_L1OIP_SOCKET))
-			printk(KERN_DEBUG "%s: PH_ACTIVATE channel %d (1..%d)\n"
-			       , __func__, dch->slot, hc->b_num + 1);
-		skb_trim(skb, 0);
-		if (test_bit(FLG_ACTIVE, &dch->Flags))
-			queue_ch_frame(ch, PH_ACTIVATE_IND, hh->id, skb);
-		else
-			queue_ch_frame(ch, PH_DEACTIVATE_IND, hh->id, skb);
-		return 0;
-	case PH_DEACTIVATE_REQ:
-		if (debug & (DEBUG_L1OIP_MSG | DEBUG_L1OIP_SOCKET))
-			printk(KERN_DEBUG "%s: PH_DEACTIVATE channel %d "
-			       "(1..%d)\n", __func__, dch->slot,
-			       hc->b_num + 1);
-		skb_trim(skb, 0);
-		if (test_bit(FLG_ACTIVE, &dch->Flags))
-			queue_ch_frame(ch, PH_ACTIVATE_IND, hh->id, skb);
-		else
-			queue_ch_frame(ch, PH_DEACTIVATE_IND, hh->id, skb);
-		return 0;
-	}
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-static int
-channel_dctrl(struct dchannel *dch, struct mISDN_ctrl_req *cq)
-{
-	int	ret = 0;
-	struct l1oip	*hc = dch->hw;
-
-	switch (cq->op) {
-	case MISDN_CTRL_GETOP:
-		cq->op = MISDN_CTRL_SETPEER | MISDN_CTRL_UNSETPEER
-			| MISDN_CTRL_GETPEER;
-		break;
-	case MISDN_CTRL_SETPEER:
-		hc->remoteip = (u32)cq->p1;
-		hc->remoteport = cq->p2 & 0xffff;
-		hc->localport = cq->p2 >> 16;
-		if (!hc->remoteport)
-			hc->remoteport = hc->localport;
-		if (debug & DEBUG_L1OIP_SOCKET)
-			printk(KERN_DEBUG "%s: got new ip address from user "
-			       "space.\n", __func__);
-		l1oip_socket_open(hc);
-		break;
-	case MISDN_CTRL_UNSETPEER:
-		if (debug & DEBUG_L1OIP_SOCKET)
-			printk(KERN_DEBUG "%s: removing ip address.\n",
-			       __func__);
-		hc->remoteip = 0;
-		l1oip_socket_open(hc);
-		break;
-	case MISDN_CTRL_GETPEER:
-		if (debug & DEBUG_L1OIP_SOCKET)
-			printk(KERN_DEBUG "%s: getting ip address.\n",
-			       __func__);
-		cq->p1 = hc->remoteip;
-		cq->p2 = hc->remoteport | (hc->localport << 16);
-		break;
-	default:
-		printk(KERN_WARNING "%s: unknown Op %x\n",
-		       __func__, cq->op);
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static int
-open_dchannel(struct l1oip *hc, struct dchannel *dch, struct channel_req *rq)
-{
-	if (debug & DEBUG_HW_OPEN)
-		printk(KERN_DEBUG "%s: dev(%d) open from %p\n", __func__,
-		       dch->dev.id, __builtin_return_address(0));
-	if (rq->protocol == ISDN_P_NONE)
-		return -EINVAL;
-	if ((dch->dev.D.protocol != ISDN_P_NONE) &&
-	    (dch->dev.D.protocol != rq->protocol)) {
-		if (debug & DEBUG_HW_OPEN)
-			printk(KERN_WARNING "%s: change protocol %x to %x\n",
-			       __func__, dch->dev.D.protocol, rq->protocol);
-	}
-	if (dch->dev.D.protocol != rq->protocol)
-		dch->dev.D.protocol = rq->protocol;
-
-	if (test_bit(FLG_ACTIVE, &dch->Flags)) {
-		_queue_data(&dch->dev.D, PH_ACTIVATE_IND, MISDN_ID_ANY,
-			    0, NULL, GFP_KERNEL);
-	}
-	rq->ch = &dch->dev.D;
-	if (!try_module_get(THIS_MODULE))
-		printk(KERN_WARNING "%s:cannot get module\n", __func__);
-	return 0;
-}
-
-static int
-open_bchannel(struct l1oip *hc, struct dchannel *dch, struct channel_req *rq)
-{
-	struct bchannel	*bch;
-	int		ch;
-
-	if (!test_channelmap(rq->adr.channel, dch->dev.channelmap))
-		return -EINVAL;
-	if (rq->protocol == ISDN_P_NONE)
-		return -EINVAL;
-	ch = rq->adr.channel; /* BRI: 1=B1 2=B2  PRI: 1..15,17.. */
-	bch = hc->chan[ch].bch;
-	if (!bch) {
-		printk(KERN_ERR "%s:internal error ch %d has no bch\n",
-		       __func__, ch);
-		return -EINVAL;
-	}
-	if (test_and_set_bit(FLG_OPEN, &bch->Flags))
-		return -EBUSY; /* b-channel can be only open once */
-	bch->ch.protocol = rq->protocol;
-	rq->ch = &bch->ch;
-	if (!try_module_get(THIS_MODULE))
-		printk(KERN_WARNING "%s:cannot get module\n", __func__);
-	return 0;
-}
-
-static int
-l1oip_dctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
-{
-	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
-	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
-	struct l1oip			*hc = dch->hw;
-	struct channel_req	*rq;
-	int			err = 0;
-
-	if (dch->debug & DEBUG_HW)
-		printk(KERN_DEBUG "%s: cmd:%x %p\n",
-		       __func__, cmd, arg);
-	switch (cmd) {
-	case OPEN_CHANNEL:
-		rq = arg;
-		switch (rq->protocol) {
-		case ISDN_P_TE_S0:
-		case ISDN_P_NT_S0:
-			if (hc->pri) {
-				err = -EINVAL;
-				break;
-			}
-			err = open_dchannel(hc, dch, rq);
-			break;
-		case ISDN_P_TE_E1:
-		case ISDN_P_NT_E1:
-			if (!hc->pri) {
-				err = -EINVAL;
-				break;
-			}
-			err = open_dchannel(hc, dch, rq);
-			break;
-		default:
-			err = open_bchannel(hc, dch, rq);
-		}
-		break;
-	case CLOSE_CHANNEL:
-		if (debug & DEBUG_HW_OPEN)
-			printk(KERN_DEBUG "%s: dev(%d) close from %p\n",
-			       __func__, dch->dev.id,
-			       __builtin_return_address(0));
-		module_put(THIS_MODULE);
-		break;
-	case CONTROL_CHANNEL:
-		err = channel_dctrl(dch, arg);
-		break;
-	default:
-		if (dch->debug & DEBUG_HW)
-			printk(KERN_DEBUG "%s: unknown command %x\n",
-			       __func__, cmd);
-		err = -EINVAL;
-	}
-	return err;
-}
-
-static int
-handle_bmsg(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct bchannel		*bch = container_of(ch, struct bchannel, ch);
-	struct l1oip			*hc = bch->hw;
-	int			ret = -EINVAL;
-	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
-	int			l, ll;
-	unsigned char		*p;
-
-	switch (hh->prim) {
-	case PH_DATA_REQ:
-		if (skb->len <= 0) {
-			printk(KERN_WARNING "%s: skb too small\n",
-			       __func__);
-			break;
-		}
-		if (skb->len > MAX_DFRAME_LEN_L1 || skb->len > L1OIP_MAX_LEN) {
-			printk(KERN_WARNING "%s: skb too large\n",
-			       __func__);
-			break;
-		}
-		/* check for AIS / ulaw-silence */
-		l = skb->len;
-		if (!memchr_inv(skb->data, 0xff, l)) {
-			if (debug & DEBUG_L1OIP_MSG)
-				printk(KERN_DEBUG "%s: got AIS, not sending, "
-				       "but counting\n", __func__);
-			hc->chan[bch->slot].tx_counter += l;
-			skb_trim(skb, 0);
-			queue_ch_frame(ch, PH_DATA_CNF, hh->id, skb);
-			return 0;
-		}
-		/* check for silence */
-		l = skb->len;
-		if (!memchr_inv(skb->data, 0x2a, l)) {
-			if (debug & DEBUG_L1OIP_MSG)
-				printk(KERN_DEBUG "%s: got silence, not sending"
-				       ", but counting\n", __func__);
-			hc->chan[bch->slot].tx_counter += l;
-			skb_trim(skb, 0);
-			queue_ch_frame(ch, PH_DATA_CNF, hh->id, skb);
-			return 0;
-		}
-
-		/* send frame */
-		p = skb->data;
-		l = skb->len;
-		while (l) {
-			/*
-			 * This is technically bounded by L1OIP_MAX_PERFRAME but
-			 * MAX_DFRAME_LEN_L1 < L1OIP_MAX_PERFRAME
-			 */
-			ll = (l < MAX_DFRAME_LEN_L1) ? l : MAX_DFRAME_LEN_L1;
-			l1oip_socket_send(hc, hc->codec, bch->slot, 0,
-					  hc->chan[bch->slot].tx_counter, p, ll);
-			hc->chan[bch->slot].tx_counter += ll;
-			p += ll;
-			l -= ll;
-		}
-		skb_trim(skb, 0);
-		queue_ch_frame(ch, PH_DATA_CNF, hh->id, skb);
-		return 0;
-	case PH_ACTIVATE_REQ:
-		if (debug & (DEBUG_L1OIP_MSG | DEBUG_L1OIP_SOCKET))
-			printk(KERN_DEBUG "%s: PH_ACTIVATE channel %d (1..%d)\n"
-			       , __func__, bch->slot, hc->b_num + 1);
-		hc->chan[bch->slot].codecstate = 0;
-		test_and_set_bit(FLG_ACTIVE, &bch->Flags);
-		skb_trim(skb, 0);
-		queue_ch_frame(ch, PH_ACTIVATE_IND, hh->id, skb);
-		return 0;
-	case PH_DEACTIVATE_REQ:
-		if (debug & (DEBUG_L1OIP_MSG | DEBUG_L1OIP_SOCKET))
-			printk(KERN_DEBUG "%s: PH_DEACTIVATE channel %d "
-			       "(1..%d)\n", __func__, bch->slot,
-			       hc->b_num + 1);
-		test_and_clear_bit(FLG_ACTIVE, &bch->Flags);
-		skb_trim(skb, 0);
-		queue_ch_frame(ch, PH_DEACTIVATE_IND, hh->id, skb);
-		return 0;
-	}
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-static int
-channel_bctrl(struct bchannel *bch, struct mISDN_ctrl_req *cq)
-{
-	int			ret = 0;
-	struct dsp_features	*features =
-		(struct dsp_features *)(*((u_long *)&cq->p1));
-
-	switch (cq->op) {
-	case MISDN_CTRL_GETOP:
-		cq->op = MISDN_CTRL_HW_FEATURES_OP;
-		break;
-	case MISDN_CTRL_HW_FEATURES: /* fill features structure */
-		if (debug & DEBUG_L1OIP_MSG)
-			printk(KERN_DEBUG "%s: HW_FEATURE request\n",
-			       __func__);
-		/* create confirm */
-		features->unclocked = 1;
-		features->unordered = 1;
-		break;
-	default:
-		printk(KERN_WARNING "%s: unknown Op %x\n",
-		       __func__, cq->op);
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static int
-l1oip_bctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
-{
-	struct bchannel	*bch = container_of(ch, struct bchannel, ch);
-	int		err = -EINVAL;
-
-	if (bch->debug & DEBUG_HW)
-		printk(KERN_DEBUG "%s: cmd:%x %p\n",
-		       __func__, cmd, arg);
-	switch (cmd) {
-	case CLOSE_CHANNEL:
-		test_and_clear_bit(FLG_OPEN, &bch->Flags);
-		test_and_clear_bit(FLG_ACTIVE, &bch->Flags);
-		ch->protocol = ISDN_P_NONE;
-		ch->peer = NULL;
-		module_put(THIS_MODULE);
-		err = 0;
-		break;
-	case CONTROL_CHANNEL:
-		err = channel_bctrl(bch, arg);
-		break;
-	default:
-		printk(KERN_WARNING "%s: unknown prim(%x)\n",
-		       __func__, cmd);
-	}
-	return err;
-}
-
-
-/*
- * cleanup module and stack
- */
-static void
-release_card(struct l1oip *hc)
-{
-	int	ch;
-
-	hc->shutdown = true;
-
-	timer_shutdown_sync(&hc->keep_tl);
-	timer_shutdown_sync(&hc->timeout_tl);
-
-	cancel_work_sync(&hc->workq);
-
-	if (hc->socket_thread)
-		l1oip_socket_close(hc);
-
-	if (hc->registered && hc->chan[hc->d_idx].dch)
-		mISDN_unregister_device(&hc->chan[hc->d_idx].dch->dev);
-	for (ch = 0; ch < 128; ch++) {
-		if (hc->chan[ch].dch) {
-			mISDN_freedchannel(hc->chan[ch].dch);
-			kfree(hc->chan[ch].dch);
-		}
-		if (hc->chan[ch].bch) {
-			mISDN_freebchannel(hc->chan[ch].bch);
-			kfree(hc->chan[ch].bch);
-#ifdef REORDER_DEBUG
-			dev_kfree_skb(hc->chan[ch].disorder_skb);
-#endif
-		}
-	}
-
-	spin_lock(&l1oip_lock);
-	list_del(&hc->list);
-	spin_unlock(&l1oip_lock);
-
-	kfree(hc);
-}
-
-static void
-l1oip_cleanup(void)
-{
-	struct l1oip *hc, *next;
-
-	list_for_each_entry_safe(hc, next, &l1oip_ilist, list)
-		release_card(hc);
-
-	l1oip_4bit_free();
-}
-
-
-/*
- * module and stack init
- */
-static int
-init_card(struct l1oip *hc, int pri, int bundle)
-{
-	struct dchannel	*dch;
-	struct bchannel	*bch;
-	int		ret;
-	int		i, ch;
-
-	spin_lock_init(&hc->socket_lock);
-	hc->idx = l1oip_cnt;
-	hc->pri = pri;
-	hc->d_idx = pri ? 16 : 3;
-	hc->b_num = pri ? 30 : 2;
-	hc->bundle = bundle;
-	if (hc->pri)
-		sprintf(hc->name, "l1oip-e1.%d", l1oip_cnt + 1);
-	else
-		sprintf(hc->name, "l1oip-s0.%d", l1oip_cnt + 1);
-
-	switch (codec[l1oip_cnt]) {
-	case 0: /* as is */
-	case 1: /* alaw */
-	case 2: /* ulaw */
-	case 3: /* 4bit */
-		break;
-	default:
-		printk(KERN_ERR "Codec(%d) not supported.\n",
-		       codec[l1oip_cnt]);
-		return -EINVAL;
-	}
-	hc->codec = codec[l1oip_cnt];
-	if (debug & DEBUG_L1OIP_INIT)
-		printk(KERN_DEBUG "%s: using codec %d\n",
-		       __func__, hc->codec);
-
-	if (id[l1oip_cnt] == 0) {
-		printk(KERN_WARNING "Warning: No 'id' value given or "
-		       "0, this is highly unsecure. Please use 32 "
-		       "bit random number 0x...\n");
-	}
-	hc->id = id[l1oip_cnt];
-	if (debug & DEBUG_L1OIP_INIT)
-		printk(KERN_DEBUG "%s: using id 0x%x\n", __func__, hc->id);
-
-	hc->ondemand = ondemand[l1oip_cnt];
-	if (hc->ondemand && !hc->id) {
-		printk(KERN_ERR "%s: ondemand option only allowed in "
-		       "conjunction with non 0 ID\n", __func__);
-		return -EINVAL;
-	}
-
-	if (limit[l1oip_cnt])
-		hc->b_num = limit[l1oip_cnt];
-	if (!pri && hc->b_num > 2) {
-		printk(KERN_ERR "Maximum limit for BRI interface is 2 "
-		       "channels.\n");
-		return -EINVAL;
-	}
-	if (pri && hc->b_num > 126) {
-		printk(KERN_ERR "Maximum limit for PRI interface is 126 "
-		       "channels.\n");
-		return -EINVAL;
-	}
-	if (pri && hc->b_num > 30) {
-		printk(KERN_WARNING "Maximum limit for BRI interface is 30 "
-		       "channels.\n");
-		printk(KERN_WARNING "Your selection of %d channels must be "
-		       "supported by application.\n", hc->limit);
-	}
-
-	hc->remoteip = ip[l1oip_cnt << 2] << 24
-		| ip[(l1oip_cnt << 2) + 1] << 16
-		| ip[(l1oip_cnt << 2) + 2] << 8
-		| ip[(l1oip_cnt << 2) + 3];
-	hc->localport = port[l1oip_cnt]?:(L1OIP_DEFAULTPORT + l1oip_cnt);
-	if (remoteport[l1oip_cnt])
-		hc->remoteport = remoteport[l1oip_cnt];
-	else
-		hc->remoteport = hc->localport;
-	if (debug & DEBUG_L1OIP_INIT)
-		printk(KERN_DEBUG "%s: using local port %d remote ip "
-		       "%d.%d.%d.%d port %d ondemand %d\n", __func__,
-		       hc->localport, hc->remoteip >> 24,
-		       (hc->remoteip >> 16) & 0xff,
-		       (hc->remoteip >> 8) & 0xff, hc->remoteip & 0xff,
-		       hc->remoteport, hc->ondemand);
-
-	dch = kzalloc_obj(struct dchannel);
-	if (!dch)
-		return -ENOMEM;
-	dch->debug = debug;
-	mISDN_initdchannel(dch, MAX_DFRAME_LEN_L1, NULL);
-	dch->hw = hc;
-	if (pri)
-		dch->dev.Dprotocols = (1 << ISDN_P_TE_E1) | (1 << ISDN_P_NT_E1);
-	else
-		dch->dev.Dprotocols = (1 << ISDN_P_TE_S0) | (1 << ISDN_P_NT_S0);
-	dch->dev.Bprotocols = (1 << (ISDN_P_B_RAW & ISDN_P_B_MASK)) |
-		(1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK));
-	dch->dev.D.send = handle_dmsg;
-	dch->dev.D.ctrl = l1oip_dctrl;
-	dch->dev.nrbchan = hc->b_num;
-	dch->slot = hc->d_idx;
-	hc->chan[hc->d_idx].dch = dch;
-	i = 1;
-	for (ch = 0; ch < dch->dev.nrbchan; ch++) {
-		if (ch == 15)
-			i++;
-		bch = kzalloc_obj(struct bchannel);
-		if (!bch) {
-			printk(KERN_ERR "%s: no memory for bchannel\n",
-			       __func__);
-			return -ENOMEM;
-		}
-		bch->nr = i + ch;
-		bch->slot = i + ch;
-		bch->debug = debug;
-		mISDN_initbchannel(bch, MAX_DATA_MEM, 0);
-		bch->hw = hc;
-		bch->ch.send = handle_bmsg;
-		bch->ch.ctrl = l1oip_bctrl;
-		bch->ch.nr = i + ch;
-		list_add(&bch->ch.list, &dch->dev.bchannels);
-		hc->chan[i + ch].bch = bch;
-		set_channelmap(bch->nr, dch->dev.channelmap);
-	}
-	/* TODO: create a parent device for this driver */
-	ret = mISDN_register_device(&dch->dev, NULL, hc->name);
-	if (ret)
-		return ret;
-	hc->registered = 1;
-
-	if (debug & DEBUG_L1OIP_INIT)
-		printk(KERN_DEBUG "%s: Setting up network card(%d)\n",
-		       __func__, l1oip_cnt + 1);
-	ret = l1oip_socket_open(hc);
-	if (ret)
-		return ret;
-
-	timer_setup(&hc->keep_tl, l1oip_keepalive, 0);
-	hc->keep_tl.expires = jiffies + 2 * HZ; /* two seconds first time */
-	add_timer(&hc->keep_tl);
-
-	timer_setup(&hc->timeout_tl, l1oip_timeout, 0);
-	hc->timeout_on = 0; /* state that we have timer off */
-
-	return 0;
-}
-
-static int __init
-l1oip_init(void)
-{
-	int		pri, bundle;
-	struct l1oip		*hc;
-	int		ret;
-
-	printk(KERN_INFO "mISDN: Layer-1-over-IP driver Rev. %s\n",
-	       l1oip_revision);
-
-	if (l1oip_4bit_alloc(ulaw))
-		return -ENOMEM;
-
-	l1oip_cnt = 0;
-	while (l1oip_cnt < MAX_CARDS && type[l1oip_cnt]) {
-		switch (type[l1oip_cnt] & 0xff) {
-		case 1:
-			pri = 0;
-			bundle = 0;
-			break;
-		case 2:
-			pri = 1;
-			bundle = 0;
-			break;
-		case 3:
-			pri = 0;
-			bundle = 1;
-			break;
-		case 4:
-			pri = 1;
-			bundle = 1;
-			break;
-		default:
-			printk(KERN_ERR "Card type(%d) not supported.\n",
-			       type[l1oip_cnt] & 0xff);
-			l1oip_cleanup();
-			return -EINVAL;
-		}
-
-		if (debug & DEBUG_L1OIP_INIT)
-			printk(KERN_DEBUG "%s: interface %d is %s with %s.\n",
-			       __func__, l1oip_cnt, pri ? "PRI" : "BRI",
-			       bundle ? "bundled IP packet for all B-channels" :
-			       "separate IP packets for every B-channel");
-
-		hc = kzalloc_obj(struct l1oip, GFP_ATOMIC);
-		if (!hc) {
-			printk(KERN_ERR "No kmem for L1-over-IP driver.\n");
-			l1oip_cleanup();
-			return -ENOMEM;
-		}
-		INIT_WORK(&hc->workq, (void *)l1oip_send_bh);
-
-		spin_lock(&l1oip_lock);
-		list_add_tail(&hc->list, &l1oip_ilist);
-		spin_unlock(&l1oip_lock);
-
-		ret = init_card(hc, pri, bundle);
-		if (ret) {
-			l1oip_cleanup();
-			return ret;
-		}
-
-		l1oip_cnt++;
-	}
-	printk(KERN_INFO "%d virtual devices registered\n", l1oip_cnt);
-	return 0;
-}
-
-module_init(l1oip_init);
-module_exit(l1oip_cleanup);
diff --git a/drivers/isdn/mISDN/layer1.c b/drivers/isdn/mISDN/layer1.c
deleted file mode 100644
index 3fbc170acf9a..000000000000
--- a/drivers/isdn/mISDN/layer1.c
+++ /dev/null
@@ -1,415 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *
- * Author	Karsten Keil <kkeil@novell.com>
- *
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- */
-
-
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/mISDNhw.h>
-#include "core.h"
-#include "layer1.h"
-#include "fsm.h"
-
-static u_int *debug;
-
-struct layer1 {
-	u_long Flags;
-	struct FsmInst l1m;
-	struct FsmTimer timer3;
-	struct FsmTimer timerX;
-	int delay;
-	int t3_value;
-	struct dchannel *dch;
-	dchannel_l1callback *dcb;
-};
-
-#define TIMER3_DEFAULT_VALUE	7000
-
-static
-struct Fsm l1fsm_s = {NULL, 0, 0, NULL, NULL};
-
-enum {
-	ST_L1_F2,
-	ST_L1_F3,
-	ST_L1_F4,
-	ST_L1_F5,
-	ST_L1_F6,
-	ST_L1_F7,
-	ST_L1_F8,
-};
-
-#define L1S_STATE_COUNT (ST_L1_F8 + 1)
-
-static char *strL1SState[] =
-{
-	"ST_L1_F2",
-	"ST_L1_F3",
-	"ST_L1_F4",
-	"ST_L1_F5",
-	"ST_L1_F6",
-	"ST_L1_F7",
-	"ST_L1_F8",
-};
-
-enum {
-	EV_PH_ACTIVATE,
-	EV_PH_DEACTIVATE,
-	EV_RESET_IND,
-	EV_DEACT_CNF,
-	EV_DEACT_IND,
-	EV_POWER_UP,
-	EV_ANYSIG_IND,
-	EV_INFO2_IND,
-	EV_INFO4_IND,
-	EV_TIMER_DEACT,
-	EV_TIMER_ACT,
-	EV_TIMER3,
-};
-
-#define L1_EVENT_COUNT (EV_TIMER3 + 1)
-
-static char *strL1Event[] =
-{
-	"EV_PH_ACTIVATE",
-	"EV_PH_DEACTIVATE",
-	"EV_RESET_IND",
-	"EV_DEACT_CNF",
-	"EV_DEACT_IND",
-	"EV_POWER_UP",
-	"EV_ANYSIG_IND",
-	"EV_INFO2_IND",
-	"EV_INFO4_IND",
-	"EV_TIMER_DEACT",
-	"EV_TIMER_ACT",
-	"EV_TIMER3",
-};
-
-static void
-l1m_debug(struct FsmInst *fi, char *fmt, ...)
-{
-	struct layer1 *l1 = fi->userdata;
-	struct va_format vaf;
-	va_list va;
-
-	va_start(va, fmt);
-
-	vaf.fmt = fmt;
-	vaf.va = &va;
-
-	printk(KERN_DEBUG "%s: %pV\n", dev_name(&l1->dch->dev.dev), &vaf);
-
-	va_end(va);
-}
-
-static void
-l1_reset(struct FsmInst *fi, int event, void *arg)
-{
-	mISDN_FsmChangeState(fi, ST_L1_F3);
-}
-
-static void
-l1_deact_cnf(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer1 *l1 = fi->userdata;
-
-	mISDN_FsmChangeState(fi, ST_L1_F3);
-	if (test_bit(FLG_L1_ACTIVATING, &l1->Flags))
-		l1->dcb(l1->dch, HW_POWERUP_REQ);
-}
-
-static void
-l1_deact_req_s(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer1 *l1 = fi->userdata;
-
-	mISDN_FsmChangeState(fi, ST_L1_F3);
-	mISDN_FsmRestartTimer(&l1->timerX, 550, EV_TIMER_DEACT, NULL, 2);
-	test_and_set_bit(FLG_L1_DEACTTIMER, &l1->Flags);
-}
-
-static void
-l1_power_up_s(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer1 *l1 = fi->userdata;
-
-	if (test_bit(FLG_L1_ACTIVATING, &l1->Flags)) {
-		mISDN_FsmChangeState(fi, ST_L1_F4);
-		l1->dcb(l1->dch, INFO3_P8);
-	} else
-		mISDN_FsmChangeState(fi, ST_L1_F3);
-}
-
-static void
-l1_go_F5(struct FsmInst *fi, int event, void *arg)
-{
-	mISDN_FsmChangeState(fi, ST_L1_F5);
-}
-
-static void
-l1_go_F8(struct FsmInst *fi, int event, void *arg)
-{
-	mISDN_FsmChangeState(fi, ST_L1_F8);
-}
-
-static void
-l1_info2_ind(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer1 *l1 = fi->userdata;
-
-	mISDN_FsmChangeState(fi, ST_L1_F6);
-	l1->dcb(l1->dch, INFO3_P8);
-}
-
-static void
-l1_info4_ind(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer1 *l1 = fi->userdata;
-
-	mISDN_FsmChangeState(fi, ST_L1_F7);
-	l1->dcb(l1->dch, INFO3_P8);
-	if (test_and_clear_bit(FLG_L1_DEACTTIMER, &l1->Flags))
-		mISDN_FsmDelTimer(&l1->timerX, 4);
-	if (!test_bit(FLG_L1_ACTIVATED, &l1->Flags)) {
-		if (test_and_clear_bit(FLG_L1_T3RUN, &l1->Flags))
-			mISDN_FsmDelTimer(&l1->timer3, 3);
-		mISDN_FsmRestartTimer(&l1->timerX, 110, EV_TIMER_ACT, NULL, 2);
-		test_and_set_bit(FLG_L1_ACTTIMER, &l1->Flags);
-	}
-}
-
-static void
-l1_timer3(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer1 *l1 = fi->userdata;
-
-	test_and_clear_bit(FLG_L1_T3RUN, &l1->Flags);
-	if (test_and_clear_bit(FLG_L1_ACTIVATING, &l1->Flags)) {
-		if (test_and_clear_bit(FLG_L1_DBLOCKED, &l1->Flags))
-			l1->dcb(l1->dch, HW_D_NOBLOCKED);
-		l1->dcb(l1->dch, PH_DEACTIVATE_IND);
-	}
-	if (l1->l1m.state != ST_L1_F6) {
-		mISDN_FsmChangeState(fi, ST_L1_F3);
-		/* do not force anything here, we need send INFO 0 */
-	}
-}
-
-static void
-l1_timer_act(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer1 *l1 = fi->userdata;
-
-	test_and_clear_bit(FLG_L1_ACTTIMER, &l1->Flags);
-	test_and_set_bit(FLG_L1_ACTIVATED, &l1->Flags);
-	l1->dcb(l1->dch, PH_ACTIVATE_IND);
-}
-
-static void
-l1_timer_deact(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer1 *l1 = fi->userdata;
-
-	test_and_clear_bit(FLG_L1_DEACTTIMER, &l1->Flags);
-	test_and_clear_bit(FLG_L1_ACTIVATED, &l1->Flags);
-	if (test_and_clear_bit(FLG_L1_DBLOCKED, &l1->Flags))
-		l1->dcb(l1->dch, HW_D_NOBLOCKED);
-	l1->dcb(l1->dch, PH_DEACTIVATE_IND);
-	l1->dcb(l1->dch, HW_DEACT_REQ);
-}
-
-static void
-l1_activate_s(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer1 *l1 = fi->userdata;
-
-	mISDN_FsmRestartTimer(&l1->timer3, l1->t3_value, EV_TIMER3, NULL, 2);
-	test_and_set_bit(FLG_L1_T3RUN, &l1->Flags);
-	/* Tell HW to send INFO 1 */
-	l1->dcb(l1->dch, HW_RESET_REQ);
-}
-
-static void
-l1_activate_no(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer1 *l1 = fi->userdata;
-
-	if ((!test_bit(FLG_L1_DEACTTIMER, &l1->Flags)) &&
-	    (!test_bit(FLG_L1_T3RUN, &l1->Flags))) {
-		test_and_clear_bit(FLG_L1_ACTIVATING, &l1->Flags);
-		if (test_and_clear_bit(FLG_L1_DBLOCKED, &l1->Flags))
-			l1->dcb(l1->dch, HW_D_NOBLOCKED);
-		l1->dcb(l1->dch, PH_DEACTIVATE_IND);
-	}
-}
-
-static struct FsmNode L1SFnList[] =
-{
-	{ST_L1_F3, EV_PH_ACTIVATE, l1_activate_s},
-	{ST_L1_F6, EV_PH_ACTIVATE, l1_activate_no},
-	{ST_L1_F8, EV_PH_ACTIVATE, l1_activate_no},
-	{ST_L1_F3, EV_RESET_IND, l1_reset},
-	{ST_L1_F4, EV_RESET_IND, l1_reset},
-	{ST_L1_F5, EV_RESET_IND, l1_reset},
-	{ST_L1_F6, EV_RESET_IND, l1_reset},
-	{ST_L1_F7, EV_RESET_IND, l1_reset},
-	{ST_L1_F8, EV_RESET_IND, l1_reset},
-	{ST_L1_F3, EV_DEACT_CNF, l1_deact_cnf},
-	{ST_L1_F4, EV_DEACT_CNF, l1_deact_cnf},
-	{ST_L1_F5, EV_DEACT_CNF, l1_deact_cnf},
-	{ST_L1_F6, EV_DEACT_CNF, l1_deact_cnf},
-	{ST_L1_F7, EV_DEACT_CNF, l1_deact_cnf},
-	{ST_L1_F8, EV_DEACT_CNF, l1_deact_cnf},
-	{ST_L1_F6, EV_DEACT_IND, l1_deact_req_s},
-	{ST_L1_F7, EV_DEACT_IND, l1_deact_req_s},
-	{ST_L1_F8, EV_DEACT_IND, l1_deact_req_s},
-	{ST_L1_F3, EV_POWER_UP,  l1_power_up_s},
-	{ST_L1_F4, EV_ANYSIG_IND, l1_go_F5},
-	{ST_L1_F6, EV_ANYSIG_IND, l1_go_F8},
-	{ST_L1_F7, EV_ANYSIG_IND, l1_go_F8},
-	{ST_L1_F3, EV_INFO2_IND, l1_info2_ind},
-	{ST_L1_F4, EV_INFO2_IND, l1_info2_ind},
-	{ST_L1_F5, EV_INFO2_IND, l1_info2_ind},
-	{ST_L1_F7, EV_INFO2_IND, l1_info2_ind},
-	{ST_L1_F8, EV_INFO2_IND, l1_info2_ind},
-	{ST_L1_F3, EV_INFO4_IND, l1_info4_ind},
-	{ST_L1_F4, EV_INFO4_IND, l1_info4_ind},
-	{ST_L1_F5, EV_INFO4_IND, l1_info4_ind},
-	{ST_L1_F6, EV_INFO4_IND, l1_info4_ind},
-	{ST_L1_F8, EV_INFO4_IND, l1_info4_ind},
-	{ST_L1_F3, EV_TIMER3, l1_timer3},
-	{ST_L1_F4, EV_TIMER3, l1_timer3},
-	{ST_L1_F5, EV_TIMER3, l1_timer3},
-	{ST_L1_F6, EV_TIMER3, l1_timer3},
-	{ST_L1_F8, EV_TIMER3, l1_timer3},
-	{ST_L1_F7, EV_TIMER_ACT, l1_timer_act},
-	{ST_L1_F3, EV_TIMER_DEACT, l1_timer_deact},
-	{ST_L1_F4, EV_TIMER_DEACT, l1_timer_deact},
-	{ST_L1_F5, EV_TIMER_DEACT, l1_timer_deact},
-	{ST_L1_F6, EV_TIMER_DEACT, l1_timer_deact},
-	{ST_L1_F7, EV_TIMER_DEACT, l1_timer_deact},
-	{ST_L1_F8, EV_TIMER_DEACT, l1_timer_deact},
-};
-
-static void
-release_l1(struct layer1 *l1) {
-	mISDN_FsmDelTimer(&l1->timerX, 0);
-	mISDN_FsmDelTimer(&l1->timer3, 0);
-	if (l1->dch)
-		l1->dch->l1 = NULL;
-	module_put(THIS_MODULE);
-	kfree(l1);
-}
-
-int
-l1_event(struct layer1 *l1, u_int event)
-{
-	int		err = 0;
-
-	if (!l1)
-		return -EINVAL;
-	switch (event) {
-	case HW_RESET_IND:
-		mISDN_FsmEvent(&l1->l1m, EV_RESET_IND, NULL);
-		break;
-	case HW_DEACT_IND:
-		mISDN_FsmEvent(&l1->l1m, EV_DEACT_IND, NULL);
-		break;
-	case HW_POWERUP_IND:
-		mISDN_FsmEvent(&l1->l1m, EV_POWER_UP, NULL);
-		break;
-	case HW_DEACT_CNF:
-		mISDN_FsmEvent(&l1->l1m, EV_DEACT_CNF, NULL);
-		break;
-	case ANYSIGNAL:
-		mISDN_FsmEvent(&l1->l1m, EV_ANYSIG_IND, NULL);
-		break;
-	case LOSTFRAMING:
-		mISDN_FsmEvent(&l1->l1m, EV_ANYSIG_IND, NULL);
-		break;
-	case INFO2:
-		mISDN_FsmEvent(&l1->l1m, EV_INFO2_IND, NULL);
-		break;
-	case INFO4_P8:
-		mISDN_FsmEvent(&l1->l1m, EV_INFO4_IND, NULL);
-		break;
-	case INFO4_P10:
-		mISDN_FsmEvent(&l1->l1m, EV_INFO4_IND, NULL);
-		break;
-	case PH_ACTIVATE_REQ:
-		if (test_bit(FLG_L1_ACTIVATED, &l1->Flags))
-			l1->dcb(l1->dch, PH_ACTIVATE_IND);
-		else {
-			test_and_set_bit(FLG_L1_ACTIVATING, &l1->Flags);
-			mISDN_FsmEvent(&l1->l1m, EV_PH_ACTIVATE, NULL);
-		}
-		break;
-	case CLOSE_CHANNEL:
-		release_l1(l1);
-		break;
-	default:
-		if ((event & ~HW_TIMER3_VMASK) == HW_TIMER3_VALUE) {
-			int val = event & HW_TIMER3_VMASK;
-
-			if (val < 5)
-				val = 5;
-			if (val > 30)
-				val = 30;
-			l1->t3_value = val;
-			break;
-		}
-		if (*debug & DEBUG_L1)
-			printk(KERN_DEBUG "%s %x unhandled\n",
-			       __func__, event);
-		err = -EINVAL;
-	}
-	return err;
-}
-EXPORT_SYMBOL(l1_event);
-
-int
-create_l1(struct dchannel *dch, dchannel_l1callback *dcb) {
-	struct layer1	*nl1;
-
-	nl1 = kzalloc_obj(struct layer1, GFP_ATOMIC);
-	if (!nl1) {
-		printk(KERN_ERR "kmalloc struct layer1 failed\n");
-		return -ENOMEM;
-	}
-	nl1->l1m.fsm = &l1fsm_s;
-	nl1->l1m.state = ST_L1_F3;
-	nl1->Flags = 0;
-	nl1->t3_value = TIMER3_DEFAULT_VALUE;
-	nl1->l1m.debug = *debug & DEBUG_L1_FSM;
-	nl1->l1m.userdata = nl1;
-	nl1->l1m.userint = 0;
-	nl1->l1m.printdebug = l1m_debug;
-	nl1->dch = dch;
-	nl1->dcb = dcb;
-	mISDN_FsmInitTimer(&nl1->l1m, &nl1->timer3);
-	mISDN_FsmInitTimer(&nl1->l1m, &nl1->timerX);
-	__module_get(THIS_MODULE);
-	dch->l1 = nl1;
-	return 0;
-}
-EXPORT_SYMBOL(create_l1);
-
-int
-Isdnl1_Init(u_int *deb)
-{
-	debug = deb;
-	l1fsm_s.state_count = L1S_STATE_COUNT;
-	l1fsm_s.event_count = L1_EVENT_COUNT;
-	l1fsm_s.strEvent = strL1Event;
-	l1fsm_s.strState = strL1SState;
-	return mISDN_FsmNew(&l1fsm_s, L1SFnList, ARRAY_SIZE(L1SFnList));
-}
-
-void
-Isdnl1_cleanup(void)
-{
-	mISDN_FsmFree(&l1fsm_s);
-}
diff --git a/drivers/isdn/mISDN/layer1.h b/drivers/isdn/mISDN/layer1.h
deleted file mode 100644
index f03e86450daf..000000000000
--- a/drivers/isdn/mISDN/layer1.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Layer 1 defines
- *
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- */
-
-#define FLG_L1_ACTIVATING	1
-#define FLG_L1_ACTIVATED	2
-#define FLG_L1_DEACTTIMER	3
-#define FLG_L1_ACTTIMER		4
-#define FLG_L1_T3RUN		5
-#define FLG_L1_PULL_REQ		6
-#define FLG_L1_UINT		7
-#define FLG_L1_DBLOCKED		8
diff --git a/drivers/isdn/mISDN/layer2.c b/drivers/isdn/mISDN/layer2.c
deleted file mode 100644
index b75869c9f78f..000000000000
--- a/drivers/isdn/mISDN/layer2.c
+++ /dev/null
@@ -1,2266 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *
- * Author	Karsten Keil <kkeil@novell.com>
- *
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- */
-
-#include <linux/mISDNif.h>
-#include <linux/slab.h>
-#include "core.h"
-#include "fsm.h"
-#include "layer2.h"
-
-static u_int *debug;
-
-static
-struct Fsm l2fsm = {NULL, 0, 0, NULL, NULL};
-
-static char *strL2State[] =
-{
-	"ST_L2_1",
-	"ST_L2_2",
-	"ST_L2_3",
-	"ST_L2_4",
-	"ST_L2_5",
-	"ST_L2_6",
-	"ST_L2_7",
-	"ST_L2_8",
-};
-
-enum {
-	EV_L2_UI,
-	EV_L2_SABME,
-	EV_L2_DISC,
-	EV_L2_DM,
-	EV_L2_UA,
-	EV_L2_FRMR,
-	EV_L2_SUPER,
-	EV_L2_I,
-	EV_L2_DL_DATA,
-	EV_L2_ACK_PULL,
-	EV_L2_DL_UNITDATA,
-	EV_L2_DL_ESTABLISH_REQ,
-	EV_L2_DL_RELEASE_REQ,
-	EV_L2_MDL_ASSIGN,
-	EV_L2_MDL_REMOVE,
-	EV_L2_MDL_ERROR,
-	EV_L1_DEACTIVATE,
-	EV_L2_T200,
-	EV_L2_T203,
-	EV_L2_T200I,
-	EV_L2_T203I,
-	EV_L2_SET_OWN_BUSY,
-	EV_L2_CLEAR_OWN_BUSY,
-	EV_L2_FRAME_ERROR,
-};
-
-#define L2_EVENT_COUNT (EV_L2_FRAME_ERROR + 1)
-
-static char *strL2Event[] =
-{
-	"EV_L2_UI",
-	"EV_L2_SABME",
-	"EV_L2_DISC",
-	"EV_L2_DM",
-	"EV_L2_UA",
-	"EV_L2_FRMR",
-	"EV_L2_SUPER",
-	"EV_L2_I",
-	"EV_L2_DL_DATA",
-	"EV_L2_ACK_PULL",
-	"EV_L2_DL_UNITDATA",
-	"EV_L2_DL_ESTABLISH_REQ",
-	"EV_L2_DL_RELEASE_REQ",
-	"EV_L2_MDL_ASSIGN",
-	"EV_L2_MDL_REMOVE",
-	"EV_L2_MDL_ERROR",
-	"EV_L1_DEACTIVATE",
-	"EV_L2_T200",
-	"EV_L2_T203",
-	"EV_L2_T200I",
-	"EV_L2_T203I",
-	"EV_L2_SET_OWN_BUSY",
-	"EV_L2_CLEAR_OWN_BUSY",
-	"EV_L2_FRAME_ERROR",
-};
-
-static void
-l2m_debug(struct FsmInst *fi, char *fmt, ...)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct va_format vaf;
-	va_list va;
-
-	if (!(*debug & DEBUG_L2_FSM))
-		return;
-
-	va_start(va, fmt);
-
-	vaf.fmt = fmt;
-	vaf.va = &va;
-
-	printk(KERN_DEBUG "%s l2 (sapi %d tei %d): %pV\n",
-	       mISDNDevName4ch(&l2->ch), l2->sapi, l2->tei, &vaf);
-
-	va_end(va);
-}
-
-inline u_int
-l2headersize(struct layer2 *l2, int ui)
-{
-	return ((test_bit(FLG_MOD128, &l2->flag) && (!ui)) ? 2 : 1) +
-		(test_bit(FLG_LAPD, &l2->flag) ? 2 : 1);
-}
-
-inline u_int
-l2addrsize(struct layer2 *l2)
-{
-	return test_bit(FLG_LAPD, &l2->flag) ? 2 : 1;
-}
-
-static u_int
-l2_newid(struct layer2 *l2)
-{
-	u_int	id;
-
-	id = l2->next_id++;
-	if (id == 0x7fff)
-		l2->next_id = 1;
-	id <<= 16;
-	id |= l2->tei << 8;
-	id |= l2->sapi;
-	return id;
-}
-
-static void
-l2up(struct layer2 *l2, u_int prim, struct sk_buff *skb)
-{
-	int	err;
-
-	if (!l2->up)
-		return;
-	mISDN_HEAD_PRIM(skb) = prim;
-	mISDN_HEAD_ID(skb) = (l2->ch.nr << 16) | l2->ch.addr;
-	err = l2->up->send(l2->up, skb);
-	if (err) {
-		printk(KERN_WARNING "%s: dev %s err=%d\n", __func__,
-		       mISDNDevName4ch(&l2->ch), err);
-		dev_kfree_skb(skb);
-	}
-}
-
-static void
-l2up_create(struct layer2 *l2, u_int prim, int len, void *arg)
-{
-	struct sk_buff	*skb;
-	struct mISDNhead *hh;
-	int		err;
-
-	if (!l2->up)
-		return;
-	skb = mI_alloc_skb(len, GFP_ATOMIC);
-	if (!skb)
-		return;
-	hh = mISDN_HEAD_P(skb);
-	hh->prim = prim;
-	hh->id = (l2->ch.nr << 16) | l2->ch.addr;
-	if (len)
-		skb_put_data(skb, arg, len);
-	err = l2->up->send(l2->up, skb);
-	if (err) {
-		printk(KERN_WARNING "%s: dev %s err=%d\n", __func__,
-		       mISDNDevName4ch(&l2->ch), err);
-		dev_kfree_skb(skb);
-	}
-}
-
-static int
-l2down_skb(struct layer2 *l2, struct sk_buff *skb) {
-	int ret;
-
-	ret = l2->ch.recv(l2->ch.peer, skb);
-	if (ret && (*debug & DEBUG_L2_RECV))
-		printk(KERN_DEBUG "l2down_skb: dev %s ret(%d)\n",
-		       mISDNDevName4ch(&l2->ch), ret);
-	return ret;
-}
-
-static int
-l2down_raw(struct layer2 *l2, struct sk_buff *skb)
-{
-	struct mISDNhead *hh = mISDN_HEAD_P(skb);
-
-	if (hh->prim == PH_DATA_REQ) {
-		if (test_and_set_bit(FLG_L1_NOTREADY, &l2->flag)) {
-			skb_queue_tail(&l2->down_queue, skb);
-			return 0;
-		}
-		l2->down_id = mISDN_HEAD_ID(skb);
-	}
-	return l2down_skb(l2, skb);
-}
-
-static int
-l2down(struct layer2 *l2, u_int prim, u_int id, struct sk_buff *skb)
-{
-	struct mISDNhead *hh = mISDN_HEAD_P(skb);
-
-	hh->prim = prim;
-	hh->id = id;
-	return l2down_raw(l2, skb);
-}
-
-static int
-l2down_create(struct layer2 *l2, u_int prim, u_int id, int len, void *arg)
-{
-	struct sk_buff	*skb;
-	int		err;
-	struct mISDNhead *hh;
-
-	skb = mI_alloc_skb(len, GFP_ATOMIC);
-	if (!skb)
-		return -ENOMEM;
-	hh = mISDN_HEAD_P(skb);
-	hh->prim = prim;
-	hh->id = id;
-	if (len)
-		skb_put_data(skb, arg, len);
-	err = l2down_raw(l2, skb);
-	if (err)
-		dev_kfree_skb(skb);
-	return err;
-}
-
-static int
-ph_data_confirm(struct layer2 *l2, struct mISDNhead *hh, struct sk_buff *skb) {
-	struct sk_buff *nskb = skb;
-	int ret = -EAGAIN;
-
-	if (test_bit(FLG_L1_NOTREADY, &l2->flag)) {
-		if (hh->id == l2->down_id) {
-			nskb = skb_dequeue(&l2->down_queue);
-			if (nskb) {
-				l2->down_id = mISDN_HEAD_ID(nskb);
-				if (l2down_skb(l2, nskb)) {
-					dev_kfree_skb(nskb);
-					l2->down_id = MISDN_ID_NONE;
-				}
-			} else
-				l2->down_id = MISDN_ID_NONE;
-			if (ret) {
-				dev_kfree_skb(skb);
-				ret = 0;
-			}
-			if (l2->down_id == MISDN_ID_NONE) {
-				test_and_clear_bit(FLG_L1_NOTREADY, &l2->flag);
-				mISDN_FsmEvent(&l2->l2m, EV_L2_ACK_PULL, NULL);
-			}
-		}
-	}
-	if (!test_and_set_bit(FLG_L1_NOTREADY, &l2->flag)) {
-		nskb = skb_dequeue(&l2->down_queue);
-		if (nskb) {
-			l2->down_id = mISDN_HEAD_ID(nskb);
-			if (l2down_skb(l2, nskb)) {
-				dev_kfree_skb(nskb);
-				l2->down_id = MISDN_ID_NONE;
-				test_and_clear_bit(FLG_L1_NOTREADY, &l2->flag);
-			}
-		} else
-			test_and_clear_bit(FLG_L1_NOTREADY, &l2->flag);
-	}
-	return ret;
-}
-
-static void
-l2_timeout(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb;
-	struct mISDNhead *hh;
-
-	skb = mI_alloc_skb(0, GFP_ATOMIC);
-	if (!skb) {
-		printk(KERN_WARNING "%s: L2(%d,%d) nr:%x timer %s no skb\n",
-		       mISDNDevName4ch(&l2->ch), l2->sapi, l2->tei,
-		       l2->ch.nr, event == EV_L2_T200 ? "T200" : "T203");
-		return;
-	}
-	hh = mISDN_HEAD_P(skb);
-	hh->prim = event == EV_L2_T200 ? DL_TIMER200_IND : DL_TIMER203_IND;
-	hh->id = l2->ch.nr;
-	if (*debug & DEBUG_TIMER)
-		printk(KERN_DEBUG "%s: L2(%d,%d) nr:%x timer %s expired\n",
-		       mISDNDevName4ch(&l2->ch), l2->sapi, l2->tei,
-		       l2->ch.nr, event == EV_L2_T200 ? "T200" : "T203");
-	if (l2->ch.st)
-		l2->ch.st->own.recv(&l2->ch.st->own, skb);
-}
-
-static int
-l2mgr(struct layer2 *l2, u_int prim, void *arg) {
-	long c = (long)arg;
-
-	printk(KERN_WARNING "l2mgr: dev %s addr:%x prim %x %c\n",
-	       mISDNDevName4ch(&l2->ch), l2->id, prim, (char)c);
-	if (test_bit(FLG_LAPD, &l2->flag) &&
-	    !test_bit(FLG_FIXED_TEI, &l2->flag)) {
-		switch (c) {
-		case 'C':
-		case 'D':
-		case 'G':
-		case 'H':
-			l2_tei(l2, prim, (u_long)arg);
-			break;
-		}
-	}
-	return 0;
-}
-
-static void
-set_peer_busy(struct layer2 *l2) {
-	test_and_set_bit(FLG_PEER_BUSY, &l2->flag);
-	if (skb_queue_len(&l2->i_queue) || skb_queue_len(&l2->ui_queue))
-		test_and_set_bit(FLG_L2BLOCK, &l2->flag);
-}
-
-static void
-clear_peer_busy(struct layer2 *l2) {
-	if (test_and_clear_bit(FLG_PEER_BUSY, &l2->flag))
-		test_and_clear_bit(FLG_L2BLOCK, &l2->flag);
-}
-
-static void
-InitWin(struct layer2 *l2)
-{
-	int i;
-
-	for (i = 0; i < MAX_WINDOW; i++)
-		l2->windowar[i] = NULL;
-}
-
-static int
-freewin(struct layer2 *l2)
-{
-	int i, cnt = 0;
-
-	for (i = 0; i < MAX_WINDOW; i++) {
-		if (l2->windowar[i]) {
-			cnt++;
-			dev_kfree_skb(l2->windowar[i]);
-			l2->windowar[i] = NULL;
-		}
-	}
-	return cnt;
-}
-
-static void
-ReleaseWin(struct layer2 *l2)
-{
-	int cnt = freewin(l2);
-
-	if (cnt)
-		printk(KERN_WARNING
-		       "isdnl2 freed %d skbuffs in release\n", cnt);
-}
-
-inline unsigned int
-cansend(struct layer2 *l2)
-{
-	unsigned int p1;
-
-	if (test_bit(FLG_MOD128, &l2->flag))
-		p1 = (l2->vs - l2->va) % 128;
-	else
-		p1 = (l2->vs - l2->va) % 8;
-	return (p1 < l2->window) && !test_bit(FLG_PEER_BUSY, &l2->flag);
-}
-
-inline void
-clear_exception(struct layer2 *l2)
-{
-	test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
-	test_and_clear_bit(FLG_REJEXC, &l2->flag);
-	test_and_clear_bit(FLG_OWN_BUSY, &l2->flag);
-	clear_peer_busy(l2);
-}
-
-static int
-sethdraddr(struct layer2 *l2, u_char *header, int rsp)
-{
-	u_char *ptr = header;
-	int crbit = rsp;
-
-	if (test_bit(FLG_LAPD, &l2->flag)) {
-		if (test_bit(FLG_LAPD_NET, &l2->flag))
-			crbit = !crbit;
-		*ptr++ = (l2->sapi << 2) | (crbit ? 2 : 0);
-		*ptr++ = (l2->tei << 1) | 1;
-		return 2;
-	} else {
-		if (test_bit(FLG_ORIG, &l2->flag))
-			crbit = !crbit;
-		if (crbit)
-			*ptr++ = l2->addr.B;
-		else
-			*ptr++ = l2->addr.A;
-		return 1;
-	}
-}
-
-static inline void
-enqueue_super(struct layer2 *l2, struct sk_buff *skb)
-{
-	if (l2down(l2, PH_DATA_REQ, l2_newid(l2), skb))
-		dev_kfree_skb(skb);
-}
-
-static inline void
-enqueue_ui(struct layer2 *l2, struct sk_buff *skb)
-{
-	if (l2->tm)
-		l2_tei(l2, MDL_STATUS_UI_IND, 0);
-	if (l2down(l2, PH_DATA_REQ, l2_newid(l2), skb))
-		dev_kfree_skb(skb);
-}
-
-inline int
-IsUI(u_char *data)
-{
-	return (data[0] & 0xef) == UI;
-}
-
-inline int
-IsUA(u_char *data)
-{
-	return (data[0] & 0xef) == UA;
-}
-
-inline int
-IsDM(u_char *data)
-{
-	return (data[0] & 0xef) == DM;
-}
-
-inline int
-IsDISC(u_char *data)
-{
-	return (data[0] & 0xef) == DISC;
-}
-
-inline int
-IsRR(u_char *data, struct layer2 *l2)
-{
-	if (test_bit(FLG_MOD128, &l2->flag))
-		return data[0] == RR;
-	else
-		return (data[0] & 0xf) == 1;
-}
-
-inline int
-IsSFrame(u_char *data, struct layer2 *l2)
-{
-	register u_char d = *data;
-
-	if (!test_bit(FLG_MOD128, &l2->flag))
-		d &= 0xf;
-	return ((d & 0xf3) == 1) && ((d & 0x0c) != 0x0c);
-}
-
-inline int
-IsSABME(u_char *data, struct layer2 *l2)
-{
-	u_char d = data[0] & ~0x10;
-
-	return test_bit(FLG_MOD128, &l2->flag) ? d == SABME : d == SABM;
-}
-
-inline int
-IsREJ(u_char *data, struct layer2 *l2)
-{
-	return test_bit(FLG_MOD128, &l2->flag) ?
-		data[0] == REJ : (data[0] & 0xf) == REJ;
-}
-
-inline int
-IsFRMR(u_char *data)
-{
-	return (data[0] & 0xef) == FRMR;
-}
-
-inline int
-IsRNR(u_char *data, struct layer2 *l2)
-{
-	return test_bit(FLG_MOD128, &l2->flag) ?
-		data[0] == RNR : (data[0] & 0xf) == RNR;
-}
-
-static int
-iframe_error(struct layer2 *l2, struct sk_buff *skb)
-{
-	u_int	i;
-	int	rsp = *skb->data & 0x2;
-
-	i = l2addrsize(l2) + (test_bit(FLG_MOD128, &l2->flag) ? 2 : 1);
-	if (test_bit(FLG_ORIG, &l2->flag))
-		rsp = !rsp;
-	if (rsp)
-		return 'L';
-	if (skb->len < i)
-		return 'N';
-	if ((skb->len - i) > l2->maxlen)
-		return 'O';
-	return 0;
-}
-
-static int
-super_error(struct layer2 *l2, struct sk_buff *skb)
-{
-	if (skb->len != l2addrsize(l2) +
-	    (test_bit(FLG_MOD128, &l2->flag) ? 2 : 1))
-		return 'N';
-	return 0;
-}
-
-static int
-unnum_error(struct layer2 *l2, struct sk_buff *skb, int wantrsp)
-{
-	int rsp = (*skb->data & 0x2) >> 1;
-	if (test_bit(FLG_ORIG, &l2->flag))
-		rsp = !rsp;
-	if (rsp != wantrsp)
-		return 'L';
-	if (skb->len != l2addrsize(l2) + 1)
-		return 'N';
-	return 0;
-}
-
-static int
-UI_error(struct layer2 *l2, struct sk_buff *skb)
-{
-	int rsp = *skb->data & 0x2;
-	if (test_bit(FLG_ORIG, &l2->flag))
-		rsp = !rsp;
-	if (rsp)
-		return 'L';
-	if (skb->len > l2->maxlen + l2addrsize(l2) + 1)
-		return 'O';
-	return 0;
-}
-
-static int
-FRMR_error(struct layer2 *l2, struct sk_buff *skb)
-{
-	u_int	headers = l2addrsize(l2) + 1;
-	u_char	*datap = skb->data + headers;
-	int	rsp = *skb->data & 0x2;
-
-	if (test_bit(FLG_ORIG, &l2->flag))
-		rsp = !rsp;
-	if (!rsp)
-		return 'L';
-	if (test_bit(FLG_MOD128, &l2->flag)) {
-		if (skb->len < headers + 5)
-			return 'N';
-		else if (*debug & DEBUG_L2)
-			l2m_debug(&l2->l2m,
-				  "FRMR information %2x %2x %2x %2x %2x",
-				  datap[0], datap[1], datap[2], datap[3], datap[4]);
-	} else {
-		if (skb->len < headers + 3)
-			return 'N';
-		else if (*debug & DEBUG_L2)
-			l2m_debug(&l2->l2m,
-				  "FRMR information %2x %2x %2x",
-				  datap[0], datap[1], datap[2]);
-	}
-	return 0;
-}
-
-static unsigned int
-legalnr(struct layer2 *l2, unsigned int nr)
-{
-	if (test_bit(FLG_MOD128, &l2->flag))
-		return ((nr - l2->va) % 128) <= ((l2->vs - l2->va) % 128);
-	else
-		return ((nr - l2->va) % 8) <= ((l2->vs - l2->va) % 8);
-}
-
-static void
-setva(struct layer2 *l2, unsigned int nr)
-{
-	struct sk_buff	*skb;
-
-	while (l2->va != nr) {
-		l2->va++;
-		if (test_bit(FLG_MOD128, &l2->flag))
-			l2->va %= 128;
-		else
-			l2->va %= 8;
-		if (l2->windowar[l2->sow]) {
-			skb_trim(l2->windowar[l2->sow], 0);
-			skb_queue_tail(&l2->tmp_queue, l2->windowar[l2->sow]);
-			l2->windowar[l2->sow] = NULL;
-		}
-		l2->sow = (l2->sow + 1) % l2->window;
-	}
-	skb = skb_dequeue(&l2->tmp_queue);
-	while (skb) {
-		dev_kfree_skb(skb);
-		skb = skb_dequeue(&l2->tmp_queue);
-	}
-}
-
-static void
-send_uframe(struct layer2 *l2, struct sk_buff *skb, u_char cmd, u_char cr)
-{
-	u_char tmp[MAX_L2HEADER_LEN];
-	int i;
-
-	i = sethdraddr(l2, tmp, cr);
-	tmp[i++] = cmd;
-	if (skb)
-		skb_trim(skb, 0);
-	else {
-		skb = mI_alloc_skb(i, GFP_ATOMIC);
-		if (!skb) {
-			printk(KERN_WARNING "%s: can't alloc skbuff in %s\n",
-			       mISDNDevName4ch(&l2->ch), __func__);
-			return;
-		}
-	}
-	skb_put_data(skb, tmp, i);
-	enqueue_super(l2, skb);
-}
-
-
-inline u_char
-get_PollFlag(struct layer2 *l2, struct sk_buff *skb)
-{
-	return skb->data[l2addrsize(l2)] & 0x10;
-}
-
-inline u_char
-get_PollFlagFree(struct layer2 *l2, struct sk_buff *skb)
-{
-	u_char PF;
-
-	PF = get_PollFlag(l2, skb);
-	dev_kfree_skb(skb);
-	return PF;
-}
-
-inline void
-start_t200(struct layer2 *l2, int i)
-{
-	mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, i);
-	test_and_set_bit(FLG_T200_RUN, &l2->flag);
-}
-
-inline void
-restart_t200(struct layer2 *l2, int i)
-{
-	mISDN_FsmRestartTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, i);
-	test_and_set_bit(FLG_T200_RUN, &l2->flag);
-}
-
-inline void
-stop_t200(struct layer2 *l2, int i)
-{
-	if (test_and_clear_bit(FLG_T200_RUN, &l2->flag))
-		mISDN_FsmDelTimer(&l2->t200, i);
-}
-
-inline void
-st5_dl_release_l2l3(struct layer2 *l2)
-{
-	int pr;
-
-	if (test_and_clear_bit(FLG_PEND_REL, &l2->flag))
-		pr = DL_RELEASE_CNF;
-	else
-		pr = DL_RELEASE_IND;
-	l2up_create(l2, pr, 0, NULL);
-}
-
-inline void
-lapb_dl_release_l2l3(struct layer2 *l2, int f)
-{
-	if (test_bit(FLG_LAPB, &l2->flag))
-		l2down_create(l2, PH_DEACTIVATE_REQ, l2_newid(l2), 0, NULL);
-	l2up_create(l2, f, 0, NULL);
-}
-
-static void
-establishlink(struct FsmInst *fi)
-{
-	struct layer2 *l2 = fi->userdata;
-	u_char cmd;
-
-	clear_exception(l2);
-	l2->rc = 0;
-	cmd = (test_bit(FLG_MOD128, &l2->flag) ? SABME : SABM) | 0x10;
-	send_uframe(l2, NULL, cmd, CMD);
-	mISDN_FsmDelTimer(&l2->t203, 1);
-	restart_t200(l2, 1);
-	test_and_clear_bit(FLG_PEND_REL, &l2->flag);
-	freewin(l2);
-	mISDN_FsmChangeState(fi, ST_L2_5);
-}
-
-static void
-l2_mdl_error_ua(struct FsmInst *fi, int event, void *arg)
-{
-	struct sk_buff *skb = arg;
-	struct layer2 *l2 = fi->userdata;
-
-	if (get_PollFlagFree(l2, skb))
-		l2mgr(l2, MDL_ERROR_IND, (void *) 'C');
-	else
-		l2mgr(l2, MDL_ERROR_IND, (void *) 'D');
-
-}
-
-static void
-l2_mdl_error_dm(struct FsmInst *fi, int event, void *arg)
-{
-	struct sk_buff *skb = arg;
-	struct layer2 *l2 = fi->userdata;
-
-	if (get_PollFlagFree(l2, skb))
-		l2mgr(l2, MDL_ERROR_IND, (void *) 'B');
-	else {
-		l2mgr(l2, MDL_ERROR_IND, (void *) 'E');
-		establishlink(fi);
-		test_and_clear_bit(FLG_L3_INIT, &l2->flag);
-	}
-}
-
-static void
-l2_st8_mdl_error_dm(struct FsmInst *fi, int event, void *arg)
-{
-	struct sk_buff *skb = arg;
-	struct layer2 *l2 = fi->userdata;
-
-	if (get_PollFlagFree(l2, skb))
-		l2mgr(l2, MDL_ERROR_IND, (void *) 'B');
-	else
-		l2mgr(l2, MDL_ERROR_IND, (void *) 'E');
-	establishlink(fi);
-	test_and_clear_bit(FLG_L3_INIT, &l2->flag);
-}
-
-static void
-l2_go_st3(struct FsmInst *fi, int event, void *arg)
-{
-	dev_kfree_skb((struct sk_buff *)arg);
-	mISDN_FsmChangeState(fi, ST_L2_3);
-}
-
-static void
-l2_mdl_assign(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2	*l2 = fi->userdata;
-
-	mISDN_FsmChangeState(fi, ST_L2_3);
-	dev_kfree_skb((struct sk_buff *)arg);
-	l2_tei(l2, MDL_ASSIGN_IND, 0);
-}
-
-static void
-l2_queue_ui_assign(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	skb_queue_tail(&l2->ui_queue, skb);
-	mISDN_FsmChangeState(fi, ST_L2_2);
-	l2_tei(l2, MDL_ASSIGN_IND, 0);
-}
-
-static void
-l2_queue_ui(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	skb_queue_tail(&l2->ui_queue, skb);
-}
-
-static void
-tx_ui(struct layer2 *l2)
-{
-	struct sk_buff *skb;
-	u_char header[MAX_L2HEADER_LEN];
-	int i;
-
-	i = sethdraddr(l2, header, CMD);
-	if (test_bit(FLG_LAPD_NET, &l2->flag))
-		header[1] = 0xff; /* tei 127 */
-	header[i++] = UI;
-	while ((skb = skb_dequeue(&l2->ui_queue))) {
-		memcpy(skb_push(skb, i), header, i);
-		enqueue_ui(l2, skb);
-	}
-}
-
-static void
-l2_send_ui(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	skb_queue_tail(&l2->ui_queue, skb);
-	tx_ui(l2);
-}
-
-static void
-l2_got_ui(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	skb_pull(skb, l2headersize(l2, 1));
-/*
- *		in states 1-3 for broadcast
- */
-
-	if (l2->tm)
-		l2_tei(l2, MDL_STATUS_UI_IND, 0);
-	l2up(l2, DL_UNITDATA_IND, skb);
-}
-
-static void
-l2_establish(struct FsmInst *fi, int event, void *arg)
-{
-	struct sk_buff *skb = arg;
-	struct layer2 *l2 = fi->userdata;
-
-	establishlink(fi);
-	test_and_set_bit(FLG_L3_INIT, &l2->flag);
-	dev_kfree_skb(skb);
-}
-
-static void
-l2_discard_i_setl3(struct FsmInst *fi, int event, void *arg)
-{
-	struct sk_buff *skb = arg;
-	struct layer2 *l2 = fi->userdata;
-
-	skb_queue_purge(&l2->i_queue);
-	test_and_set_bit(FLG_L3_INIT, &l2->flag);
-	test_and_clear_bit(FLG_PEND_REL, &l2->flag);
-	dev_kfree_skb(skb);
-}
-
-static void
-l2_l3_reestablish(struct FsmInst *fi, int event, void *arg)
-{
-	struct sk_buff *skb = arg;
-	struct layer2 *l2 = fi->userdata;
-
-	skb_queue_purge(&l2->i_queue);
-	establishlink(fi);
-	test_and_set_bit(FLG_L3_INIT, &l2->flag);
-	dev_kfree_skb(skb);
-}
-
-static void
-l2_release(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	skb_trim(skb, 0);
-	l2up(l2, DL_RELEASE_CNF, skb);
-}
-
-static void
-l2_pend_rel(struct FsmInst *fi, int event, void *arg)
-{
-	struct sk_buff *skb = arg;
-	struct layer2 *l2 = fi->userdata;
-
-	test_and_set_bit(FLG_PEND_REL, &l2->flag);
-	dev_kfree_skb(skb);
-}
-
-static void
-l2_disconnect(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	skb_queue_purge(&l2->i_queue);
-	freewin(l2);
-	mISDN_FsmChangeState(fi, ST_L2_6);
-	l2->rc = 0;
-	send_uframe(l2, NULL, DISC | 0x10, CMD);
-	mISDN_FsmDelTimer(&l2->t203, 1);
-	restart_t200(l2, 2);
-	dev_kfree_skb(skb);
-}
-
-static void
-l2_start_multi(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2	*l2 = fi->userdata;
-	struct sk_buff	*skb = arg;
-
-	l2->vs = 0;
-	l2->va = 0;
-	l2->vr = 0;
-	l2->sow = 0;
-	clear_exception(l2);
-	send_uframe(l2, NULL, UA | get_PollFlag(l2, skb), RSP);
-	mISDN_FsmChangeState(fi, ST_L2_7);
-	mISDN_FsmAddTimer(&l2->t203, l2->T203, EV_L2_T203, NULL, 3);
-	skb_trim(skb, 0);
-	l2up(l2, DL_ESTABLISH_IND, skb);
-	if (l2->tm)
-		l2_tei(l2, MDL_STATUS_UP_IND, 0);
-}
-
-static void
-l2_send_UA(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	send_uframe(l2, skb, UA | get_PollFlag(l2, skb), RSP);
-}
-
-static void
-l2_send_DM(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	send_uframe(l2, skb, DM | get_PollFlag(l2, skb), RSP);
-}
-
-static void
-l2_restart_multi(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2	*l2 = fi->userdata;
-	struct sk_buff	*skb = arg;
-	int		est = 0;
-
-	send_uframe(l2, skb, UA | get_PollFlag(l2, skb), RSP);
-
-	l2mgr(l2, MDL_ERROR_IND, (void *) 'F');
-
-	if (l2->vs != l2->va) {
-		skb_queue_purge(&l2->i_queue);
-		est = 1;
-	}
-
-	clear_exception(l2);
-	l2->vs = 0;
-	l2->va = 0;
-	l2->vr = 0;
-	l2->sow = 0;
-	mISDN_FsmChangeState(fi, ST_L2_7);
-	stop_t200(l2, 3);
-	mISDN_FsmRestartTimer(&l2->t203, l2->T203, EV_L2_T203, NULL, 3);
-
-	if (est)
-		l2up_create(l2, DL_ESTABLISH_IND, 0, NULL);
-/*		mISDN_queue_data(&l2->inst, l2->inst.id | MSG_BROADCAST,
- *		    MGR_SHORTSTATUS | INDICATION, SSTATUS_L2_ESTABLISHED,
- *		    0, NULL, 0);
- */
-	if (skb_queue_len(&l2->i_queue) && cansend(l2))
-		mISDN_FsmEvent(fi, EV_L2_ACK_PULL, NULL);
-}
-
-static void
-l2_stop_multi(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2	*l2 = fi->userdata;
-	struct sk_buff	*skb = arg;
-
-	mISDN_FsmChangeState(fi, ST_L2_4);
-	mISDN_FsmDelTimer(&l2->t203, 3);
-	stop_t200(l2, 4);
-
-	send_uframe(l2, skb, UA | get_PollFlag(l2, skb), RSP);
-	skb_queue_purge(&l2->i_queue);
-	freewin(l2);
-	lapb_dl_release_l2l3(l2, DL_RELEASE_IND);
-	if (l2->tm)
-		l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
-}
-
-static void
-l2_connected(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2	*l2 = fi->userdata;
-	struct sk_buff	*skb = arg;
-	int pr = -1;
-
-	if (!get_PollFlag(l2, skb)) {
-		l2_mdl_error_ua(fi, event, arg);
-		return;
-	}
-	dev_kfree_skb(skb);
-	if (test_and_clear_bit(FLG_PEND_REL, &l2->flag))
-		l2_disconnect(fi, event, NULL);
-	if (test_and_clear_bit(FLG_L3_INIT, &l2->flag)) {
-		pr = DL_ESTABLISH_CNF;
-	} else if (l2->vs != l2->va) {
-		skb_queue_purge(&l2->i_queue);
-		pr = DL_ESTABLISH_IND;
-	}
-	stop_t200(l2, 5);
-	l2->vr = 0;
-	l2->vs = 0;
-	l2->va = 0;
-	l2->sow = 0;
-	mISDN_FsmChangeState(fi, ST_L2_7);
-	mISDN_FsmAddTimer(&l2->t203, l2->T203, EV_L2_T203, NULL, 4);
-	if (pr != -1)
-		l2up_create(l2, pr, 0, NULL);
-
-	if (skb_queue_len(&l2->i_queue) && cansend(l2))
-		mISDN_FsmEvent(fi, EV_L2_ACK_PULL, NULL);
-
-	if (l2->tm)
-		l2_tei(l2, MDL_STATUS_UP_IND, 0);
-}
-
-static void
-l2_released(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	if (!get_PollFlag(l2, skb)) {
-		l2_mdl_error_ua(fi, event, arg);
-		return;
-	}
-	dev_kfree_skb(skb);
-	stop_t200(l2, 6);
-	lapb_dl_release_l2l3(l2, DL_RELEASE_CNF);
-	mISDN_FsmChangeState(fi, ST_L2_4);
-	if (l2->tm)
-		l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
-}
-
-static void
-l2_reestablish(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	if (!get_PollFlagFree(l2, skb)) {
-		establishlink(fi);
-		test_and_set_bit(FLG_L3_INIT, &l2->flag);
-	}
-}
-
-static void
-l2_st5_dm_release(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	if (get_PollFlagFree(l2, skb)) {
-		stop_t200(l2, 7);
-		if (!test_bit(FLG_L3_INIT, &l2->flag))
-			skb_queue_purge(&l2->i_queue);
-		if (test_bit(FLG_LAPB, &l2->flag))
-			l2down_create(l2, PH_DEACTIVATE_REQ,
-				      l2_newid(l2), 0, NULL);
-		st5_dl_release_l2l3(l2);
-		mISDN_FsmChangeState(fi, ST_L2_4);
-		if (l2->tm)
-			l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
-	}
-}
-
-static void
-l2_st6_dm_release(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	if (get_PollFlagFree(l2, skb)) {
-		stop_t200(l2, 8);
-		lapb_dl_release_l2l3(l2, DL_RELEASE_CNF);
-		mISDN_FsmChangeState(fi, ST_L2_4);
-		if (l2->tm)
-			l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
-	}
-}
-
-static void
-enquiry_cr(struct layer2 *l2, u_char typ, u_char cr, u_char pf)
-{
-	struct sk_buff *skb;
-	u_char tmp[MAX_L2HEADER_LEN];
-	int i;
-
-	i = sethdraddr(l2, tmp, cr);
-	if (test_bit(FLG_MOD128, &l2->flag)) {
-		tmp[i++] = typ;
-		tmp[i++] = (l2->vr << 1) | (pf ? 1 : 0);
-	} else
-		tmp[i++] = (l2->vr << 5) | typ | (pf ? 0x10 : 0);
-	skb = mI_alloc_skb(i, GFP_ATOMIC);
-	if (!skb) {
-		printk(KERN_WARNING "%s: isdnl2 can't alloc sbbuff in %s\n",
-		       mISDNDevName4ch(&l2->ch), __func__);
-		return;
-	}
-	skb_put_data(skb, tmp, i);
-	enqueue_super(l2, skb);
-}
-
-inline void
-enquiry_response(struct layer2 *l2)
-{
-	if (test_bit(FLG_OWN_BUSY, &l2->flag))
-		enquiry_cr(l2, RNR, RSP, 1);
-	else
-		enquiry_cr(l2, RR, RSP, 1);
-	test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
-}
-
-inline void
-transmit_enquiry(struct layer2 *l2)
-{
-	if (test_bit(FLG_OWN_BUSY, &l2->flag))
-		enquiry_cr(l2, RNR, CMD, 1);
-	else
-		enquiry_cr(l2, RR, CMD, 1);
-	test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
-	start_t200(l2, 9);
-}
-
-
-static void
-nrerrorrecovery(struct FsmInst *fi)
-{
-	struct layer2 *l2 = fi->userdata;
-
-	l2mgr(l2, MDL_ERROR_IND, (void *) 'J');
-	establishlink(fi);
-	test_and_clear_bit(FLG_L3_INIT, &l2->flag);
-}
-
-static void
-invoke_retransmission(struct layer2 *l2, unsigned int nr)
-{
-	u_int	p1;
-
-	if (l2->vs != nr) {
-		while (l2->vs != nr) {
-			(l2->vs)--;
-			if (test_bit(FLG_MOD128, &l2->flag)) {
-				l2->vs %= 128;
-				p1 = (l2->vs - l2->va) % 128;
-			} else {
-				l2->vs %= 8;
-				p1 = (l2->vs - l2->va) % 8;
-			}
-			p1 = (p1 + l2->sow) % l2->window;
-			if (l2->windowar[p1])
-				skb_queue_head(&l2->i_queue, l2->windowar[p1]);
-			else
-				printk(KERN_WARNING
-				       "%s: windowar[%d] is NULL\n",
-				       mISDNDevName4ch(&l2->ch), p1);
-			l2->windowar[p1] = NULL;
-		}
-		mISDN_FsmEvent(&l2->l2m, EV_L2_ACK_PULL, NULL);
-	}
-}
-
-static void
-l2_st7_got_super(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-	int PollFlag, rsp, typ = RR;
-	unsigned int nr;
-
-	rsp = *skb->data & 0x2;
-	if (test_bit(FLG_ORIG, &l2->flag))
-		rsp = !rsp;
-
-	skb_pull(skb, l2addrsize(l2));
-	if (IsRNR(skb->data, l2)) {
-		set_peer_busy(l2);
-		typ = RNR;
-	} else
-		clear_peer_busy(l2);
-	if (IsREJ(skb->data, l2))
-		typ = REJ;
-
-	if (test_bit(FLG_MOD128, &l2->flag)) {
-		PollFlag = (skb->data[1] & 0x1) == 0x1;
-		nr = skb->data[1] >> 1;
-	} else {
-		PollFlag = (skb->data[0] & 0x10);
-		nr = (skb->data[0] >> 5) & 0x7;
-	}
-	dev_kfree_skb(skb);
-
-	if (PollFlag) {
-		if (rsp)
-			l2mgr(l2, MDL_ERROR_IND, (void *) 'A');
-		else
-			enquiry_response(l2);
-	}
-	if (legalnr(l2, nr)) {
-		if (typ == REJ) {
-			setva(l2, nr);
-			invoke_retransmission(l2, nr);
-			stop_t200(l2, 10);
-			if (mISDN_FsmAddTimer(&l2->t203, l2->T203,
-					      EV_L2_T203, NULL, 6))
-				l2m_debug(&l2->l2m, "Restart T203 ST7 REJ");
-		} else if ((nr == l2->vs) && (typ == RR)) {
-			setva(l2, nr);
-			stop_t200(l2, 11);
-			mISDN_FsmRestartTimer(&l2->t203, l2->T203,
-					      EV_L2_T203, NULL, 7);
-		} else if ((l2->va != nr) || (typ == RNR)) {
-			setva(l2, nr);
-			if (typ != RR)
-				mISDN_FsmDelTimer(&l2->t203, 9);
-			restart_t200(l2, 12);
-		}
-		if (skb_queue_len(&l2->i_queue) && (typ == RR))
-			mISDN_FsmEvent(fi, EV_L2_ACK_PULL, NULL);
-	} else
-		nrerrorrecovery(fi);
-}
-
-static void
-l2_feed_i_if_reest(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	if (!test_bit(FLG_L3_INIT, &l2->flag))
-		skb_queue_tail(&l2->i_queue, skb);
-	else
-		dev_kfree_skb(skb);
-}
-
-static void
-l2_feed_i_pull(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	skb_queue_tail(&l2->i_queue, skb);
-	mISDN_FsmEvent(fi, EV_L2_ACK_PULL, NULL);
-}
-
-static void
-l2_feed_iqueue(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	skb_queue_tail(&l2->i_queue, skb);
-}
-
-static void
-l2_got_iframe(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2	*l2 = fi->userdata;
-	struct sk_buff	*skb = arg;
-	int		PollFlag, i;
-	u_int		ns, nr;
-
-	i = l2addrsize(l2);
-	if (test_bit(FLG_MOD128, &l2->flag)) {
-		PollFlag = ((skb->data[i + 1] & 0x1) == 0x1);
-		ns = skb->data[i] >> 1;
-		nr = (skb->data[i + 1] >> 1) & 0x7f;
-	} else {
-		PollFlag = (skb->data[i] & 0x10);
-		ns = (skb->data[i] >> 1) & 0x7;
-		nr = (skb->data[i] >> 5) & 0x7;
-	}
-	if (test_bit(FLG_OWN_BUSY, &l2->flag)) {
-		dev_kfree_skb(skb);
-		if (PollFlag)
-			enquiry_response(l2);
-	} else {
-		if (l2->vr == ns) {
-			l2->vr++;
-			if (test_bit(FLG_MOD128, &l2->flag))
-				l2->vr %= 128;
-			else
-				l2->vr %= 8;
-			test_and_clear_bit(FLG_REJEXC, &l2->flag);
-			if (PollFlag)
-				enquiry_response(l2);
-			else
-				test_and_set_bit(FLG_ACK_PEND, &l2->flag);
-			skb_pull(skb, l2headersize(l2, 0));
-			l2up(l2, DL_DATA_IND, skb);
-		} else {
-			/* n(s)!=v(r) */
-			dev_kfree_skb(skb);
-			if (test_and_set_bit(FLG_REJEXC, &l2->flag)) {
-				if (PollFlag)
-					enquiry_response(l2);
-			} else {
-				enquiry_cr(l2, REJ, RSP, PollFlag);
-				test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
-			}
-		}
-	}
-	if (legalnr(l2, nr)) {
-		if (!test_bit(FLG_PEER_BUSY, &l2->flag) &&
-		    (fi->state == ST_L2_7)) {
-			if (nr == l2->vs) {
-				stop_t200(l2, 13);
-				mISDN_FsmRestartTimer(&l2->t203, l2->T203,
-						      EV_L2_T203, NULL, 7);
-			} else if (nr != l2->va)
-				restart_t200(l2, 14);
-		}
-		setva(l2, nr);
-	} else {
-		nrerrorrecovery(fi);
-		return;
-	}
-	if (skb_queue_len(&l2->i_queue) && (fi->state == ST_L2_7))
-		mISDN_FsmEvent(fi, EV_L2_ACK_PULL, NULL);
-	if (test_and_clear_bit(FLG_ACK_PEND, &l2->flag))
-		enquiry_cr(l2, RR, RSP, 0);
-}
-
-static void
-l2_got_tei(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2	*l2 = fi->userdata;
-	u_int		info;
-
-	l2->tei = (signed char)(long)arg;
-	set_channel_address(&l2->ch, l2->sapi, l2->tei);
-	info = DL_INFO_L2_CONNECT;
-	l2up_create(l2, DL_INFORMATION_IND, sizeof(info), &info);
-	if (fi->state == ST_L2_3) {
-		establishlink(fi);
-		test_and_set_bit(FLG_L3_INIT, &l2->flag);
-	} else
-		mISDN_FsmChangeState(fi, ST_L2_4);
-	if (skb_queue_len(&l2->ui_queue))
-		tx_ui(l2);
-}
-
-static void
-l2_st5_tout_200(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-
-	if (test_bit(FLG_LAPD, &l2->flag) &&
-	    test_bit(FLG_DCHAN_BUSY, &l2->flag)) {
-		mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, 9);
-	} else if (l2->rc == l2->N200) {
-		mISDN_FsmChangeState(fi, ST_L2_4);
-		test_and_clear_bit(FLG_T200_RUN, &l2->flag);
-		skb_queue_purge(&l2->i_queue);
-		l2mgr(l2, MDL_ERROR_IND, (void *) 'G');
-		if (test_bit(FLG_LAPB, &l2->flag))
-			l2down_create(l2, PH_DEACTIVATE_REQ,
-				      l2_newid(l2), 0, NULL);
-		st5_dl_release_l2l3(l2);
-		if (l2->tm)
-			l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
-	} else {
-		l2->rc++;
-		mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, 9);
-		send_uframe(l2, NULL, (test_bit(FLG_MOD128, &l2->flag) ?
-				       SABME : SABM) | 0x10, CMD);
-	}
-}
-
-static void
-l2_st6_tout_200(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-
-	if (test_bit(FLG_LAPD, &l2->flag) &&
-	    test_bit(FLG_DCHAN_BUSY, &l2->flag)) {
-		mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, 9);
-	} else if (l2->rc == l2->N200) {
-		mISDN_FsmChangeState(fi, ST_L2_4);
-		test_and_clear_bit(FLG_T200_RUN, &l2->flag);
-		l2mgr(l2, MDL_ERROR_IND, (void *) 'H');
-		lapb_dl_release_l2l3(l2, DL_RELEASE_CNF);
-		if (l2->tm)
-			l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
-	} else {
-		l2->rc++;
-		mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200,
-				  NULL, 9);
-		send_uframe(l2, NULL, DISC | 0x10, CMD);
-	}
-}
-
-static void
-l2_st7_tout_200(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-
-	if (test_bit(FLG_LAPD, &l2->flag) &&
-	    test_bit(FLG_DCHAN_BUSY, &l2->flag)) {
-		mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, 9);
-		return;
-	}
-	test_and_clear_bit(FLG_T200_RUN, &l2->flag);
-	l2->rc = 0;
-	mISDN_FsmChangeState(fi, ST_L2_8);
-	transmit_enquiry(l2);
-	l2->rc++;
-}
-
-static void
-l2_st8_tout_200(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-
-	if (test_bit(FLG_LAPD, &l2->flag) &&
-	    test_bit(FLG_DCHAN_BUSY, &l2->flag)) {
-		mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, 9);
-		return;
-	}
-	test_and_clear_bit(FLG_T200_RUN, &l2->flag);
-	if (l2->rc == l2->N200) {
-		l2mgr(l2, MDL_ERROR_IND, (void *) 'I');
-		establishlink(fi);
-		test_and_clear_bit(FLG_L3_INIT, &l2->flag);
-	} else {
-		transmit_enquiry(l2);
-		l2->rc++;
-	}
-}
-
-static void
-l2_st7_tout_203(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-
-	if (test_bit(FLG_LAPD, &l2->flag) &&
-	    test_bit(FLG_DCHAN_BUSY, &l2->flag)) {
-		mISDN_FsmAddTimer(&l2->t203, l2->T203, EV_L2_T203, NULL, 9);
-		return;
-	}
-	mISDN_FsmChangeState(fi, ST_L2_8);
-	transmit_enquiry(l2);
-	l2->rc = 0;
-}
-
-static void
-l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2	*l2 = fi->userdata;
-	struct sk_buff	*skb, *nskb;
-	u_char		header[MAX_L2HEADER_LEN];
-	u_int		i, p1;
-
-	if (!cansend(l2))
-		return;
-
-	skb = skb_dequeue(&l2->i_queue);
-	if (!skb)
-		return;
-	i = sethdraddr(l2, header, CMD);
-	if (test_bit(FLG_MOD128, &l2->flag)) {
-		header[i++] = l2->vs << 1;
-		header[i++] = l2->vr << 1;
-	} else
-		header[i++] = (l2->vr << 5) | (l2->vs << 1);
-	nskb = skb_realloc_headroom(skb, i);
-	if (!nskb) {
-		printk(KERN_WARNING "%s: no headroom(%d) copy for IFrame\n",
-		       mISDNDevName4ch(&l2->ch), i);
-		skb_queue_head(&l2->i_queue, skb);
-		return;
-	}
-	if (test_bit(FLG_MOD128, &l2->flag)) {
-		p1 = (l2->vs - l2->va) % 128;
-		l2->vs = (l2->vs + 1) % 128;
-	} else {
-		p1 = (l2->vs - l2->va) % 8;
-		l2->vs = (l2->vs + 1) % 8;
-	}
-	p1 = (p1 + l2->sow) % l2->window;
-	if (l2->windowar[p1]) {
-		printk(KERN_WARNING "%s: l2 try overwrite ack queue entry %d\n",
-		       mISDNDevName4ch(&l2->ch), p1);
-		dev_kfree_skb(l2->windowar[p1]);
-	}
-	l2->windowar[p1] = skb;
-	memcpy(skb_push(nskb, i), header, i);
-	l2down(l2, PH_DATA_REQ, l2_newid(l2), nskb);
-	test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
-	if (!test_and_set_bit(FLG_T200_RUN, &l2->flag)) {
-		mISDN_FsmDelTimer(&l2->t203, 13);
-		mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, 11);
-	}
-}
-
-static void
-l2_st8_got_super(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-	int PollFlag, rsp, rnr = 0;
-	unsigned int nr;
-
-	rsp = *skb->data & 0x2;
-	if (test_bit(FLG_ORIG, &l2->flag))
-		rsp = !rsp;
-
-	skb_pull(skb, l2addrsize(l2));
-
-	if (IsRNR(skb->data, l2)) {
-		set_peer_busy(l2);
-		rnr = 1;
-	} else
-		clear_peer_busy(l2);
-
-	if (test_bit(FLG_MOD128, &l2->flag)) {
-		PollFlag = (skb->data[1] & 0x1) == 0x1;
-		nr = skb->data[1] >> 1;
-	} else {
-		PollFlag = (skb->data[0] & 0x10);
-		nr = (skb->data[0] >> 5) & 0x7;
-	}
-	dev_kfree_skb(skb);
-	if (rsp && PollFlag) {
-		if (legalnr(l2, nr)) {
-			if (rnr) {
-				restart_t200(l2, 15);
-			} else {
-				stop_t200(l2, 16);
-				mISDN_FsmAddTimer(&l2->t203, l2->T203,
-						  EV_L2_T203, NULL, 5);
-				setva(l2, nr);
-			}
-			invoke_retransmission(l2, nr);
-			mISDN_FsmChangeState(fi, ST_L2_7);
-			if (skb_queue_len(&l2->i_queue) && cansend(l2))
-				mISDN_FsmEvent(fi, EV_L2_ACK_PULL, NULL);
-		} else
-			nrerrorrecovery(fi);
-	} else {
-		if (!rsp && PollFlag)
-			enquiry_response(l2);
-		if (legalnr(l2, nr))
-			setva(l2, nr);
-		else
-			nrerrorrecovery(fi);
-	}
-}
-
-static void
-l2_got_FRMR(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	skb_pull(skb, l2addrsize(l2) + 1);
-
-	if (!(skb->data[0] & 1) || ((skb->data[0] & 3) == 1) || /* I or S */
-	    (IsUA(skb->data) && (fi->state == ST_L2_7))) {
-		l2mgr(l2, MDL_ERROR_IND, (void *) 'K');
-		establishlink(fi);
-		test_and_clear_bit(FLG_L3_INIT, &l2->flag);
-	}
-	dev_kfree_skb(skb);
-}
-
-static void
-l2_st24_tei_remove(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-
-	skb_queue_purge(&l2->ui_queue);
-	l2->tei = GROUP_TEI;
-	mISDN_FsmChangeState(fi, ST_L2_1);
-}
-
-static void
-l2_st3_tei_remove(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-
-	skb_queue_purge(&l2->ui_queue);
-	l2->tei = GROUP_TEI;
-	l2up_create(l2, DL_RELEASE_IND, 0, NULL);
-	mISDN_FsmChangeState(fi, ST_L2_1);
-}
-
-static void
-l2_st5_tei_remove(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-
-	skb_queue_purge(&l2->i_queue);
-	skb_queue_purge(&l2->ui_queue);
-	freewin(l2);
-	l2->tei = GROUP_TEI;
-	stop_t200(l2, 17);
-	st5_dl_release_l2l3(l2);
-	mISDN_FsmChangeState(fi, ST_L2_1);
-}
-
-static void
-l2_st6_tei_remove(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-
-	skb_queue_purge(&l2->ui_queue);
-	l2->tei = GROUP_TEI;
-	stop_t200(l2, 18);
-	l2up_create(l2, DL_RELEASE_IND, 0, NULL);
-	mISDN_FsmChangeState(fi, ST_L2_1);
-}
-
-static void
-l2_tei_remove(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-
-	skb_queue_purge(&l2->i_queue);
-	skb_queue_purge(&l2->ui_queue);
-	freewin(l2);
-	l2->tei = GROUP_TEI;
-	stop_t200(l2, 17);
-	mISDN_FsmDelTimer(&l2->t203, 19);
-	l2up_create(l2, DL_RELEASE_IND, 0, NULL);
-/*	mISDN_queue_data(&l2->inst, l2->inst.id | MSG_BROADCAST,
- *		MGR_SHORTSTATUS_IND, SSTATUS_L2_RELEASED,
- *		0, NULL, 0);
- */
-	mISDN_FsmChangeState(fi, ST_L2_1);
-}
-
-static void
-l2_st14_persistent_da(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	skb_queue_purge(&l2->i_queue);
-	skb_queue_purge(&l2->ui_queue);
-	if (test_and_clear_bit(FLG_ESTAB_PEND, &l2->flag))
-		l2up(l2, DL_RELEASE_IND, skb);
-	else
-		dev_kfree_skb(skb);
-}
-
-static void
-l2_st5_persistent_da(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	skb_queue_purge(&l2->i_queue);
-	skb_queue_purge(&l2->ui_queue);
-	freewin(l2);
-	stop_t200(l2, 19);
-	st5_dl_release_l2l3(l2);
-	mISDN_FsmChangeState(fi, ST_L2_4);
-	if (l2->tm)
-		l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
-	dev_kfree_skb(skb);
-}
-
-static void
-l2_st6_persistent_da(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	skb_queue_purge(&l2->ui_queue);
-	stop_t200(l2, 20);
-	l2up(l2, DL_RELEASE_CNF, skb);
-	mISDN_FsmChangeState(fi, ST_L2_4);
-	if (l2->tm)
-		l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
-}
-
-static void
-l2_persistent_da(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	skb_queue_purge(&l2->i_queue);
-	skb_queue_purge(&l2->ui_queue);
-	freewin(l2);
-	stop_t200(l2, 19);
-	mISDN_FsmDelTimer(&l2->t203, 19);
-	l2up(l2, DL_RELEASE_IND, skb);
-	mISDN_FsmChangeState(fi, ST_L2_4);
-	if (l2->tm)
-		l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
-}
-
-static void
-l2_set_own_busy(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	if (!test_and_set_bit(FLG_OWN_BUSY, &l2->flag)) {
-		enquiry_cr(l2, RNR, RSP, 0);
-		test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
-	}
-	dev_kfree_skb(skb);
-}
-
-static void
-l2_clear_own_busy(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-	struct sk_buff *skb = arg;
-
-	if (!test_and_clear_bit(FLG_OWN_BUSY, &l2->flag)) {
-		enquiry_cr(l2, RR, RSP, 0);
-		test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
-	}
-	dev_kfree_skb(skb);
-}
-
-static void
-l2_frame_error(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-
-	l2mgr(l2, MDL_ERROR_IND, arg);
-}
-
-static void
-l2_frame_error_reest(struct FsmInst *fi, int event, void *arg)
-{
-	struct layer2 *l2 = fi->userdata;
-
-	l2mgr(l2, MDL_ERROR_IND, arg);
-	establishlink(fi);
-	test_and_clear_bit(FLG_L3_INIT, &l2->flag);
-}
-
-static struct FsmNode L2FnList[] =
-{
-	{ST_L2_1, EV_L2_DL_ESTABLISH_REQ, l2_mdl_assign},
-	{ST_L2_2, EV_L2_DL_ESTABLISH_REQ, l2_go_st3},
-	{ST_L2_4, EV_L2_DL_ESTABLISH_REQ, l2_establish},
-	{ST_L2_5, EV_L2_DL_ESTABLISH_REQ, l2_discard_i_setl3},
-	{ST_L2_7, EV_L2_DL_ESTABLISH_REQ, l2_l3_reestablish},
-	{ST_L2_8, EV_L2_DL_ESTABLISH_REQ, l2_l3_reestablish},
-	{ST_L2_4, EV_L2_DL_RELEASE_REQ, l2_release},
-	{ST_L2_5, EV_L2_DL_RELEASE_REQ, l2_pend_rel},
-	{ST_L2_7, EV_L2_DL_RELEASE_REQ, l2_disconnect},
-	{ST_L2_8, EV_L2_DL_RELEASE_REQ, l2_disconnect},
-	{ST_L2_5, EV_L2_DL_DATA, l2_feed_i_if_reest},
-	{ST_L2_7, EV_L2_DL_DATA, l2_feed_i_pull},
-	{ST_L2_8, EV_L2_DL_DATA, l2_feed_iqueue},
-	{ST_L2_1, EV_L2_DL_UNITDATA, l2_queue_ui_assign},
-	{ST_L2_2, EV_L2_DL_UNITDATA, l2_queue_ui},
-	{ST_L2_3, EV_L2_DL_UNITDATA, l2_queue_ui},
-	{ST_L2_4, EV_L2_DL_UNITDATA, l2_send_ui},
-	{ST_L2_5, EV_L2_DL_UNITDATA, l2_send_ui},
-	{ST_L2_6, EV_L2_DL_UNITDATA, l2_send_ui},
-	{ST_L2_7, EV_L2_DL_UNITDATA, l2_send_ui},
-	{ST_L2_8, EV_L2_DL_UNITDATA, l2_send_ui},
-	{ST_L2_1, EV_L2_MDL_ASSIGN, l2_got_tei},
-	{ST_L2_2, EV_L2_MDL_ASSIGN, l2_got_tei},
-	{ST_L2_3, EV_L2_MDL_ASSIGN, l2_got_tei},
-	{ST_L2_2, EV_L2_MDL_ERROR, l2_st24_tei_remove},
-	{ST_L2_3, EV_L2_MDL_ERROR, l2_st3_tei_remove},
-	{ST_L2_4, EV_L2_MDL_REMOVE, l2_st24_tei_remove},
-	{ST_L2_5, EV_L2_MDL_REMOVE, l2_st5_tei_remove},
-	{ST_L2_6, EV_L2_MDL_REMOVE, l2_st6_tei_remove},
-	{ST_L2_7, EV_L2_MDL_REMOVE, l2_tei_remove},
-	{ST_L2_8, EV_L2_MDL_REMOVE, l2_tei_remove},
-	{ST_L2_4, EV_L2_SABME, l2_start_multi},
-	{ST_L2_5, EV_L2_SABME, l2_send_UA},
-	{ST_L2_6, EV_L2_SABME, l2_send_DM},
-	{ST_L2_7, EV_L2_SABME, l2_restart_multi},
-	{ST_L2_8, EV_L2_SABME, l2_restart_multi},
-	{ST_L2_4, EV_L2_DISC, l2_send_DM},
-	{ST_L2_5, EV_L2_DISC, l2_send_DM},
-	{ST_L2_6, EV_L2_DISC, l2_send_UA},
-	{ST_L2_7, EV_L2_DISC, l2_stop_multi},
-	{ST_L2_8, EV_L2_DISC, l2_stop_multi},
-	{ST_L2_4, EV_L2_UA, l2_mdl_error_ua},
-	{ST_L2_5, EV_L2_UA, l2_connected},
-	{ST_L2_6, EV_L2_UA, l2_released},
-	{ST_L2_7, EV_L2_UA, l2_mdl_error_ua},
-	{ST_L2_8, EV_L2_UA, l2_mdl_error_ua},
-	{ST_L2_4, EV_L2_DM, l2_reestablish},
-	{ST_L2_5, EV_L2_DM, l2_st5_dm_release},
-	{ST_L2_6, EV_L2_DM, l2_st6_dm_release},
-	{ST_L2_7, EV_L2_DM, l2_mdl_error_dm},
-	{ST_L2_8, EV_L2_DM, l2_st8_mdl_error_dm},
-	{ST_L2_1, EV_L2_UI, l2_got_ui},
-	{ST_L2_2, EV_L2_UI, l2_got_ui},
-	{ST_L2_3, EV_L2_UI, l2_got_ui},
-	{ST_L2_4, EV_L2_UI, l2_got_ui},
-	{ST_L2_5, EV_L2_UI, l2_got_ui},
-	{ST_L2_6, EV_L2_UI, l2_got_ui},
-	{ST_L2_7, EV_L2_UI, l2_got_ui},
-	{ST_L2_8, EV_L2_UI, l2_got_ui},
-	{ST_L2_7, EV_L2_FRMR, l2_got_FRMR},
-	{ST_L2_8, EV_L2_FRMR, l2_got_FRMR},
-	{ST_L2_7, EV_L2_SUPER, l2_st7_got_super},
-	{ST_L2_8, EV_L2_SUPER, l2_st8_got_super},
-	{ST_L2_7, EV_L2_I, l2_got_iframe},
-	{ST_L2_8, EV_L2_I, l2_got_iframe},
-	{ST_L2_5, EV_L2_T200, l2_timeout},
-	{ST_L2_6, EV_L2_T200, l2_timeout},
-	{ST_L2_7, EV_L2_T200, l2_timeout},
-	{ST_L2_8, EV_L2_T200, l2_timeout},
-	{ST_L2_7, EV_L2_T203, l2_timeout},
-	{ST_L2_5, EV_L2_T200I, l2_st5_tout_200},
-	{ST_L2_6, EV_L2_T200I, l2_st6_tout_200},
-	{ST_L2_7, EV_L2_T200I, l2_st7_tout_200},
-	{ST_L2_8, EV_L2_T200I, l2_st8_tout_200},
-	{ST_L2_7, EV_L2_T203I, l2_st7_tout_203},
-	{ST_L2_7, EV_L2_ACK_PULL, l2_pull_iqueue},
-	{ST_L2_7, EV_L2_SET_OWN_BUSY, l2_set_own_busy},
-	{ST_L2_8, EV_L2_SET_OWN_BUSY, l2_set_own_busy},
-	{ST_L2_7, EV_L2_CLEAR_OWN_BUSY, l2_clear_own_busy},
-	{ST_L2_8, EV_L2_CLEAR_OWN_BUSY, l2_clear_own_busy},
-	{ST_L2_4, EV_L2_FRAME_ERROR, l2_frame_error},
-	{ST_L2_5, EV_L2_FRAME_ERROR, l2_frame_error},
-	{ST_L2_6, EV_L2_FRAME_ERROR, l2_frame_error},
-	{ST_L2_7, EV_L2_FRAME_ERROR, l2_frame_error_reest},
-	{ST_L2_8, EV_L2_FRAME_ERROR, l2_frame_error_reest},
-	{ST_L2_1, EV_L1_DEACTIVATE, l2_st14_persistent_da},
-	{ST_L2_2, EV_L1_DEACTIVATE, l2_st24_tei_remove},
-	{ST_L2_3, EV_L1_DEACTIVATE, l2_st3_tei_remove},
-	{ST_L2_4, EV_L1_DEACTIVATE, l2_st14_persistent_da},
-	{ST_L2_5, EV_L1_DEACTIVATE, l2_st5_persistent_da},
-	{ST_L2_6, EV_L1_DEACTIVATE, l2_st6_persistent_da},
-	{ST_L2_7, EV_L1_DEACTIVATE, l2_persistent_da},
-	{ST_L2_8, EV_L1_DEACTIVATE, l2_persistent_da},
-};
-
-static int
-ph_data_indication(struct layer2 *l2, struct mISDNhead *hh, struct sk_buff *skb)
-{
-	u_char	*datap = skb->data;
-	int	ret = -EINVAL;
-	int	psapi, ptei;
-	u_int	l;
-	int	c = 0;
-
-	l = l2addrsize(l2);
-	if (skb->len <= l) {
-		mISDN_FsmEvent(&l2->l2m, EV_L2_FRAME_ERROR, (void *) 'N');
-		return ret;
-	}
-	if (test_bit(FLG_LAPD, &l2->flag)) { /* Maybe not needed */
-		psapi = *datap++;
-		ptei = *datap++;
-		if ((psapi & 1) || !(ptei & 1)) {
-			printk(KERN_WARNING
-			       "%s l2 D-channel frame wrong EA0/EA1\n",
-			       mISDNDevName4ch(&l2->ch));
-			return ret;
-		}
-		psapi >>= 2;
-		ptei >>= 1;
-		if (psapi != l2->sapi) {
-			/* not our business */
-			if (*debug & DEBUG_L2)
-				printk(KERN_DEBUG "%s: sapi %d/%d mismatch\n",
-				       mISDNDevName4ch(&l2->ch), psapi,
-				       l2->sapi);
-			dev_kfree_skb(skb);
-			return 0;
-		}
-		if ((ptei != l2->tei) && (ptei != GROUP_TEI)) {
-			/* not our business */
-			if (*debug & DEBUG_L2)
-				printk(KERN_DEBUG "%s: tei %d/%d mismatch\n",
-				       mISDNDevName4ch(&l2->ch), ptei, l2->tei);
-			dev_kfree_skb(skb);
-			return 0;
-		}
-	} else
-		datap += l;
-	if (!(*datap & 1)) {	/* I-Frame */
-		c = iframe_error(l2, skb);
-		if (!c)
-			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_I, skb);
-	} else if (IsSFrame(datap, l2)) {	/* S-Frame */
-		c = super_error(l2, skb);
-		if (!c)
-			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_SUPER, skb);
-	} else if (IsUI(datap)) {
-		c = UI_error(l2, skb);
-		if (!c)
-			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_UI, skb);
-	} else if (IsSABME(datap, l2)) {
-		c = unnum_error(l2, skb, CMD);
-		if (!c)
-			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_SABME, skb);
-	} else if (IsUA(datap)) {
-		c = unnum_error(l2, skb, RSP);
-		if (!c)
-			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_UA, skb);
-	} else if (IsDISC(datap)) {
-		c = unnum_error(l2, skb, CMD);
-		if (!c)
-			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_DISC, skb);
-	} else if (IsDM(datap)) {
-		c = unnum_error(l2, skb, RSP);
-		if (!c)
-			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_DM, skb);
-	} else if (IsFRMR(datap)) {
-		c = FRMR_error(l2, skb);
-		if (!c)
-			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_FRMR, skb);
-	} else
-		c = 'L';
-	if (c) {
-		printk(KERN_WARNING "%s:l2 D-channel frame error %c\n",
-		       mISDNDevName4ch(&l2->ch), c);
-		mISDN_FsmEvent(&l2->l2m, EV_L2_FRAME_ERROR, (void *)(long)c);
-	}
-	return ret;
-}
-
-static int
-l2_send(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct layer2		*l2 = container_of(ch, struct layer2, ch);
-	struct mISDNhead	*hh =  mISDN_HEAD_P(skb);
-	int			ret = -EINVAL;
-
-	if (*debug & DEBUG_L2_RECV)
-		printk(KERN_DEBUG "%s: %s prim(%x) id(%x) sapi(%d) tei(%d)\n",
-		       __func__, mISDNDevName4ch(&l2->ch), hh->prim, hh->id,
-		       l2->sapi, l2->tei);
-	if (hh->prim == DL_INTERN_MSG) {
-		struct mISDNhead *chh = hh + 1; /* saved copy */
-
-		*hh = *chh;
-		if (*debug & DEBUG_L2_RECV)
-			printk(KERN_DEBUG "%s: prim(%x) id(%x) internal msg\n",
-				mISDNDevName4ch(&l2->ch), hh->prim, hh->id);
-	}
-	switch (hh->prim) {
-	case PH_DATA_IND:
-		ret = ph_data_indication(l2, hh, skb);
-		break;
-	case PH_DATA_CNF:
-		ret = ph_data_confirm(l2, hh, skb);
-		break;
-	case PH_ACTIVATE_IND:
-		test_and_set_bit(FLG_L1_ACTIV, &l2->flag);
-		l2up_create(l2, MPH_ACTIVATE_IND, 0, NULL);
-		if (test_and_clear_bit(FLG_ESTAB_PEND, &l2->flag))
-			ret = mISDN_FsmEvent(&l2->l2m,
-					     EV_L2_DL_ESTABLISH_REQ, skb);
-		break;
-	case PH_DEACTIVATE_IND:
-		test_and_clear_bit(FLG_L1_ACTIV, &l2->flag);
-		l2up_create(l2, MPH_DEACTIVATE_IND, 0, NULL);
-		ret = mISDN_FsmEvent(&l2->l2m, EV_L1_DEACTIVATE, skb);
-		break;
-	case MPH_INFORMATION_IND:
-		if (!l2->up)
-			break;
-		ret = l2->up->send(l2->up, skb);
-		break;
-	case DL_DATA_REQ:
-		ret = mISDN_FsmEvent(&l2->l2m, EV_L2_DL_DATA, skb);
-		break;
-	case DL_UNITDATA_REQ:
-		ret = mISDN_FsmEvent(&l2->l2m, EV_L2_DL_UNITDATA, skb);
-		break;
-	case DL_ESTABLISH_REQ:
-		if (test_bit(FLG_LAPB, &l2->flag))
-			test_and_set_bit(FLG_ORIG, &l2->flag);
-		if (test_bit(FLG_L1_ACTIV, &l2->flag)) {
-			if (test_bit(FLG_LAPD, &l2->flag) ||
-			    test_bit(FLG_ORIG, &l2->flag))
-				ret = mISDN_FsmEvent(&l2->l2m,
-						     EV_L2_DL_ESTABLISH_REQ, skb);
-		} else {
-			if (test_bit(FLG_LAPD, &l2->flag) ||
-			    test_bit(FLG_ORIG, &l2->flag)) {
-				test_and_set_bit(FLG_ESTAB_PEND,
-						 &l2->flag);
-			}
-			ret = l2down(l2, PH_ACTIVATE_REQ, l2_newid(l2),
-				     skb);
-		}
-		break;
-	case DL_RELEASE_REQ:
-		if (test_bit(FLG_LAPB, &l2->flag))
-			l2down_create(l2, PH_DEACTIVATE_REQ,
-				      l2_newid(l2), 0, NULL);
-		ret = mISDN_FsmEvent(&l2->l2m, EV_L2_DL_RELEASE_REQ,
-				     skb);
-		break;
-	case DL_TIMER200_IND:
-		mISDN_FsmEvent(&l2->l2m, EV_L2_T200I, NULL);
-		break;
-	case DL_TIMER203_IND:
-		mISDN_FsmEvent(&l2->l2m, EV_L2_T203I, NULL);
-		break;
-	default:
-		if (*debug & DEBUG_L2)
-			l2m_debug(&l2->l2m, "l2 unknown pr %04x",
-				  hh->prim);
-	}
-	if (ret) {
-		dev_kfree_skb(skb);
-		ret = 0;
-	}
-	return ret;
-}
-
-int
-tei_l2(struct layer2 *l2, u_int cmd, u_long arg)
-{
-	int		ret = -EINVAL;
-
-	if (*debug & DEBUG_L2_TEI)
-		printk(KERN_DEBUG "%s: cmd(%x) in %s\n",
-		       mISDNDevName4ch(&l2->ch), cmd, __func__);
-	switch (cmd) {
-	case (MDL_ASSIGN_REQ):
-		ret = mISDN_FsmEvent(&l2->l2m, EV_L2_MDL_ASSIGN, (void *)arg);
-		break;
-	case (MDL_REMOVE_REQ):
-		ret = mISDN_FsmEvent(&l2->l2m, EV_L2_MDL_REMOVE, NULL);
-		break;
-	case (MDL_ERROR_IND):
-		ret = mISDN_FsmEvent(&l2->l2m, EV_L2_MDL_ERROR, NULL);
-		break;
-	case (MDL_ERROR_RSP):
-		/* ETS 300-125 5.3.2.1 Test: TC13010 */
-		printk(KERN_NOTICE "%s: MDL_ERROR|REQ (tei_l2)\n",
-		       mISDNDevName4ch(&l2->ch));
-		ret = mISDN_FsmEvent(&l2->l2m, EV_L2_MDL_ERROR, NULL);
-		break;
-	}
-	return ret;
-}
-
-static void
-release_l2(struct layer2 *l2)
-{
-	mISDN_FsmDelTimer(&l2->t200, 21);
-	mISDN_FsmDelTimer(&l2->t203, 16);
-	skb_queue_purge(&l2->i_queue);
-	skb_queue_purge(&l2->ui_queue);
-	skb_queue_purge(&l2->down_queue);
-	ReleaseWin(l2);
-	if (test_bit(FLG_LAPD, &l2->flag)) {
-		TEIrelease(l2);
-		if (l2->ch.st)
-			l2->ch.st->dev->D.ctrl(&l2->ch.st->dev->D,
-					       CLOSE_CHANNEL, NULL);
-	}
-	kfree(l2);
-}
-
-static int
-l2_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
-{
-	struct layer2		*l2 = container_of(ch, struct layer2, ch);
-	u_int			info;
-
-	if (*debug & DEBUG_L2_CTRL)
-		printk(KERN_DEBUG "%s: %s cmd(%x)\n",
-		       mISDNDevName4ch(ch), __func__, cmd);
-
-	switch (cmd) {
-	case OPEN_CHANNEL:
-		if (test_bit(FLG_LAPD, &l2->flag)) {
-			set_channel_address(&l2->ch, l2->sapi, l2->tei);
-			info = DL_INFO_L2_CONNECT;
-			l2up_create(l2, DL_INFORMATION_IND,
-				    sizeof(info), &info);
-		}
-		break;
-	case CLOSE_CHANNEL:
-		if (l2->ch.peer)
-			l2->ch.peer->ctrl(l2->ch.peer, CLOSE_CHANNEL, NULL);
-		release_l2(l2);
-		break;
-	}
-	return 0;
-}
-
-struct layer2 *
-create_l2(struct mISDNchannel *ch, u_int protocol, u_long options, int tei,
-	  int sapi)
-{
-	struct layer2		*l2;
-	struct channel_req	rq;
-
-	l2 = kzalloc_obj(struct layer2);
-	if (!l2) {
-		printk(KERN_ERR "kzalloc layer2 failed\n");
-		return NULL;
-	}
-	l2->next_id = 1;
-	l2->down_id = MISDN_ID_NONE;
-	l2->up = ch;
-	l2->ch.st = ch->st;
-	l2->ch.send = l2_send;
-	l2->ch.ctrl = l2_ctrl;
-	switch (protocol) {
-	case ISDN_P_LAPD_NT:
-		test_and_set_bit(FLG_LAPD, &l2->flag);
-		test_and_set_bit(FLG_LAPD_NET, &l2->flag);
-		test_and_set_bit(FLG_MOD128, &l2->flag);
-		l2->sapi = sapi;
-		l2->maxlen = MAX_DFRAME_LEN;
-		if (test_bit(OPTION_L2_PMX, &options))
-			l2->window = 7;
-		else
-			l2->window = 1;
-		if (test_bit(OPTION_L2_PTP, &options))
-			test_and_set_bit(FLG_PTP, &l2->flag);
-		if (test_bit(OPTION_L2_FIXEDTEI, &options))
-			test_and_set_bit(FLG_FIXED_TEI, &l2->flag);
-		l2->tei = tei;
-		l2->T200 = 1000;
-		l2->N200 = 3;
-		l2->T203 = 10000;
-		if (test_bit(OPTION_L2_PMX, &options))
-			rq.protocol = ISDN_P_NT_E1;
-		else
-			rq.protocol = ISDN_P_NT_S0;
-		rq.adr.channel = 0;
-		l2->ch.st->dev->D.ctrl(&l2->ch.st->dev->D, OPEN_CHANNEL, &rq);
-		break;
-	case ISDN_P_LAPD_TE:
-		test_and_set_bit(FLG_LAPD, &l2->flag);
-		test_and_set_bit(FLG_MOD128, &l2->flag);
-		test_and_set_bit(FLG_ORIG, &l2->flag);
-		l2->sapi = sapi;
-		l2->maxlen = MAX_DFRAME_LEN;
-		if (test_bit(OPTION_L2_PMX, &options))
-			l2->window = 7;
-		else
-			l2->window = 1;
-		if (test_bit(OPTION_L2_PTP, &options))
-			test_and_set_bit(FLG_PTP, &l2->flag);
-		if (test_bit(OPTION_L2_FIXEDTEI, &options))
-			test_and_set_bit(FLG_FIXED_TEI, &l2->flag);
-		l2->tei = tei;
-		l2->T200 = 1000;
-		l2->N200 = 3;
-		l2->T203 = 10000;
-		if (test_bit(OPTION_L2_PMX, &options))
-			rq.protocol = ISDN_P_TE_E1;
-		else
-			rq.protocol = ISDN_P_TE_S0;
-		rq.adr.channel = 0;
-		l2->ch.st->dev->D.ctrl(&l2->ch.st->dev->D, OPEN_CHANNEL, &rq);
-		break;
-	case ISDN_P_B_X75SLP:
-		test_and_set_bit(FLG_LAPB, &l2->flag);
-		l2->window = 7;
-		l2->maxlen = MAX_DATA_SIZE;
-		l2->T200 = 1000;
-		l2->N200 = 4;
-		l2->T203 = 5000;
-		l2->addr.A = 3;
-		l2->addr.B = 1;
-		break;
-	default:
-		printk(KERN_ERR "layer2 create failed prt %x\n",
-		       protocol);
-		kfree(l2);
-		return NULL;
-	}
-	skb_queue_head_init(&l2->i_queue);
-	skb_queue_head_init(&l2->ui_queue);
-	skb_queue_head_init(&l2->down_queue);
-	skb_queue_head_init(&l2->tmp_queue);
-	InitWin(l2);
-	l2->l2m.fsm = &l2fsm;
-	if (test_bit(FLG_LAPB, &l2->flag) ||
-	    test_bit(FLG_FIXED_TEI, &l2->flag) ||
-	    test_bit(FLG_LAPD_NET, &l2->flag))
-		l2->l2m.state = ST_L2_4;
-	else
-		l2->l2m.state = ST_L2_1;
-	l2->l2m.debug = *debug;
-	l2->l2m.userdata = l2;
-	l2->l2m.userint = 0;
-	l2->l2m.printdebug = l2m_debug;
-
-	mISDN_FsmInitTimer(&l2->l2m, &l2->t200);
-	mISDN_FsmInitTimer(&l2->l2m, &l2->t203);
-	return l2;
-}
-
-static int
-x75create(struct channel_req *crq)
-{
-	struct layer2	*l2;
-
-	if (crq->protocol != ISDN_P_B_X75SLP)
-		return -EPROTONOSUPPORT;
-	l2 = create_l2(crq->ch, crq->protocol, 0, 0, 0);
-	if (!l2)
-		return -ENOMEM;
-	crq->ch = &l2->ch;
-	crq->protocol = ISDN_P_B_HDLC;
-	return 0;
-}
-
-static struct Bprotocol X75SLP = {
-	.Bprotocols = (1 << (ISDN_P_B_X75SLP & ISDN_P_B_MASK)),
-	.name = "X75SLP",
-	.create = x75create
-};
-
-int
-Isdnl2_Init(u_int *deb)
-{
-	int res;
-	debug = deb;
-	mISDN_register_Bprotocol(&X75SLP);
-	l2fsm.state_count = L2_STATE_COUNT;
-	l2fsm.event_count = L2_EVENT_COUNT;
-	l2fsm.strEvent = strL2Event;
-	l2fsm.strState = strL2State;
-	res = mISDN_FsmNew(&l2fsm, L2FnList, ARRAY_SIZE(L2FnList));
-	if (res)
-		goto error;
-	res = TEIInit(deb);
-	if (res)
-		goto error_fsm;
-	return 0;
-
-error_fsm:
-	mISDN_FsmFree(&l2fsm);
-error:
-	mISDN_unregister_Bprotocol(&X75SLP);
-	return res;
-}
-
-void
-Isdnl2_cleanup(void)
-{
-	mISDN_unregister_Bprotocol(&X75SLP);
-	TEIFree();
-	mISDN_FsmFree(&l2fsm);
-}
diff --git a/drivers/isdn/mISDN/layer2.h b/drivers/isdn/mISDN/layer2.h
deleted file mode 100644
index c466fd94aa02..000000000000
--- a/drivers/isdn/mISDN/layer2.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Layer 2 defines
- *
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- */
-
-#include <linux/mISDNif.h>
-#include <linux/skbuff.h>
-#include "fsm.h"
-
-#define MAX_WINDOW	8
-
-struct manager {
-	struct mISDNchannel	ch;
-	struct mISDNchannel	bcast;
-	u_long			options;
-	struct list_head	layer2;
-	rwlock_t		lock;
-	struct FsmInst		deact;
-	struct FsmTimer		datimer;
-	struct sk_buff_head	sendq;
-	struct mISDNchannel	*up;
-	u_int			nextid;
-	u_int			lastid;
-};
-
-struct teimgr {
-	int			ri;
-	int			rcnt;
-	struct FsmInst		tei_m;
-	struct FsmTimer		timer;
-	int			tval, nval;
-	struct layer2		*l2;
-	struct manager		*mgr;
-};
-
-struct laddr {
-	u_char	A;
-	u_char	B;
-};
-
-struct layer2 {
-	struct list_head	list;
-	struct mISDNchannel	ch;
-	u_long			flag;
-	int			id;
-	struct mISDNchannel	*up;
-	signed char		sapi;
-	signed char		tei;
-	struct laddr		addr;
-	u_int			maxlen;
-	struct teimgr		*tm;
-	u_int			vs, va, vr;
-	int			rc;
-	u_int			window;
-	u_int			sow;
-	struct FsmInst		l2m;
-	struct FsmTimer		t200, t203;
-	int			T200, N200, T203;
-	u_int			next_id;
-	u_int			down_id;
-	struct sk_buff		*windowar[MAX_WINDOW];
-	struct sk_buff_head	i_queue;
-	struct sk_buff_head	ui_queue;
-	struct sk_buff_head	down_queue;
-	struct sk_buff_head	tmp_queue;
-};
-
-enum {
-	ST_L2_1,
-	ST_L2_2,
-	ST_L2_3,
-	ST_L2_4,
-	ST_L2_5,
-	ST_L2_6,
-	ST_L2_7,
-	ST_L2_8,
-};
-
-#define L2_STATE_COUNT (ST_L2_8 + 1)
-
-extern struct layer2	*create_l2(struct mISDNchannel *, u_int,
-				   u_long, int, int);
-extern int		tei_l2(struct layer2 *, u_int, u_long arg);
-
-
-/* from tei.c */
-extern int		l2_tei(struct layer2 *, u_int, u_long arg);
-extern void		TEIrelease(struct layer2 *);
-extern int		TEIInit(u_int *);
-extern void		TEIFree(void);
-
-#define MAX_L2HEADER_LEN 4
-
-#define RR	0x01
-#define RNR	0x05
-#define REJ	0x09
-#define SABME	0x6f
-#define SABM	0x2f
-#define DM	0x0f
-#define UI	0x03
-#define DISC	0x43
-#define UA	0x63
-#define FRMR	0x87
-#define XID	0xaf
-
-#define CMD	0
-#define RSP	1
-
-#define LC_FLUSH_WAIT 1
-
-#define FLG_LAPB	0
-#define FLG_LAPD	1
-#define FLG_ORIG	2
-#define FLG_MOD128	3
-#define FLG_PEND_REL	4
-#define FLG_L3_INIT	5
-#define FLG_T200_RUN	6
-#define FLG_ACK_PEND	7
-#define FLG_REJEXC	8
-#define FLG_OWN_BUSY	9
-#define FLG_PEER_BUSY	10
-#define FLG_DCHAN_BUSY	11
-#define FLG_L1_ACTIV	12
-#define FLG_ESTAB_PEND	13
-#define FLG_PTP		14
-#define FLG_FIXED_TEI	15
-#define FLG_L2BLOCK	16
-#define FLG_L1_NOTREADY	17
-#define FLG_LAPD_NET	18
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
deleted file mode 100644
index 77b900db1cac..000000000000
--- a/drivers/isdn/mISDN/socket.c
+++ /dev/null
@@ -1,825 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *
- * Author	Karsten Keil <kkeil@novell.com>
- *
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- */
-
-#include <linux/mISDNif.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include "core.h"
-
-static u_int	*debug;
-
-static struct proto mISDN_proto = {
-	.name		= "misdn",
-	.owner		= THIS_MODULE,
-	.obj_size	= sizeof(struct mISDN_sock)
-};
-
-#define _pms(sk)	((struct mISDN_sock *)sk)
-
-static struct mISDN_sock_list	data_sockets = {
-	.lock = __RW_LOCK_UNLOCKED(data_sockets.lock)
-};
-
-static struct mISDN_sock_list	base_sockets = {
-	.lock = __RW_LOCK_UNLOCKED(base_sockets.lock)
-};
-
-#define L2_HEADER_LEN	4
-
-static inline struct sk_buff *
-_l2_alloc_skb(unsigned int len, gfp_t gfp_mask)
-{
-	struct sk_buff  *skb;
-
-	skb = alloc_skb(len + L2_HEADER_LEN, gfp_mask);
-	if (likely(skb))
-		skb_reserve(skb, L2_HEADER_LEN);
-	return skb;
-}
-
-static void
-mISDN_sock_link(struct mISDN_sock_list *l, struct sock *sk)
-{
-	write_lock_bh(&l->lock);
-	sk_add_node(sk, &l->head);
-	write_unlock_bh(&l->lock);
-}
-
-static void mISDN_sock_unlink(struct mISDN_sock_list *l, struct sock *sk)
-{
-	write_lock_bh(&l->lock);
-	sk_del_node_init(sk);
-	write_unlock_bh(&l->lock);
-}
-
-static int
-mISDN_send(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct mISDN_sock *msk;
-	int	err;
-
-	msk = container_of(ch, struct mISDN_sock, ch);
-	if (*debug & DEBUG_SOCKET)
-		printk(KERN_DEBUG "%s len %d %p\n", __func__, skb->len, skb);
-	if (msk->sk.sk_state == MISDN_CLOSED)
-		return -EUNATCH;
-	__net_timestamp(skb);
-	err = sock_queue_rcv_skb(&msk->sk, skb);
-	if (err)
-		printk(KERN_WARNING "%s: error %d\n", __func__, err);
-	return err;
-}
-
-static int
-mISDN_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
-{
-	struct mISDN_sock *msk;
-
-	msk = container_of(ch, struct mISDN_sock, ch);
-	if (*debug & DEBUG_SOCKET)
-		printk(KERN_DEBUG "%s(%p, %x, %p)\n", __func__, ch, cmd, arg);
-	switch (cmd) {
-	case CLOSE_CHANNEL:
-		msk->sk.sk_state = MISDN_CLOSED;
-		break;
-	}
-	return 0;
-}
-
-static inline void
-mISDN_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
-{
-	struct __kernel_old_timeval	tv;
-
-	if (_pms(sk)->cmask & MISDN_TIME_STAMP) {
-		skb_get_timestamp(skb, &tv);
-		put_cmsg(msg, SOL_MISDN, MISDN_TIME_STAMP, sizeof(tv), &tv);
-	}
-}
-
-static int
-mISDN_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
-		   int flags)
-{
-	struct sk_buff		*skb;
-	struct sock		*sk = sock->sk;
-
-	int		copied, err;
-
-	if (*debug & DEBUG_SOCKET)
-		printk(KERN_DEBUG "%s: len %d, flags %x ch.nr %d, proto %x\n",
-		       __func__, (int)len, flags, _pms(sk)->ch.nr,
-		       sk->sk_protocol);
-	if (flags & (MSG_OOB))
-		return -EOPNOTSUPP;
-
-	if (sk->sk_state == MISDN_CLOSED)
-		return 0;
-
-	skb = skb_recv_datagram(sk, flags, &err);
-	if (!skb)
-		return err;
-
-	if (msg->msg_name) {
-		DECLARE_SOCKADDR(struct sockaddr_mISDN *, maddr, msg->msg_name);
-
-		maddr->family = AF_ISDN;
-		maddr->dev = _pms(sk)->dev->id;
-		if ((sk->sk_protocol == ISDN_P_LAPD_TE) ||
-		    (sk->sk_protocol == ISDN_P_LAPD_NT)) {
-			maddr->channel = (mISDN_HEAD_ID(skb) >> 16) & 0xff;
-			maddr->tei =  (mISDN_HEAD_ID(skb) >> 8) & 0xff;
-			maddr->sapi = mISDN_HEAD_ID(skb) & 0xff;
-		} else {
-			maddr->channel = _pms(sk)->ch.nr;
-			maddr->sapi = _pms(sk)->ch.addr & 0xFF;
-			maddr->tei =  (_pms(sk)->ch.addr >> 8) & 0xFF;
-		}
-		msg->msg_namelen = sizeof(*maddr);
-	}
-
-	copied = skb->len + MISDN_HEADER_LEN;
-	if (len < copied) {
-		if (flags & MSG_PEEK)
-			refcount_dec(&skb->users);
-		else
-			skb_queue_head(&sk->sk_receive_queue, skb);
-		return -ENOSPC;
-	}
-	memcpy(skb_push(skb, MISDN_HEADER_LEN), mISDN_HEAD_P(skb),
-	       MISDN_HEADER_LEN);
-
-	err = skb_copy_datagram_msg(skb, 0, msg, copied);
-
-	mISDN_sock_cmsg(sk, msg, skb);
-
-	skb_free_datagram(sk, skb);
-
-	return err ? : copied;
-}
-
-static int
-mISDN_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
-{
-	struct sock		*sk = sock->sk;
-	struct sk_buff		*skb;
-	int			err = -ENOMEM;
-
-	if (*debug & DEBUG_SOCKET)
-		printk(KERN_DEBUG "%s: len %d flags %x ch %d proto %x\n",
-		       __func__, (int)len, msg->msg_flags, _pms(sk)->ch.nr,
-		       sk->sk_protocol);
-
-	if (msg->msg_flags & MSG_OOB)
-		return -EOPNOTSUPP;
-
-	if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL | MSG_ERRQUEUE))
-		return -EINVAL;
-
-	if (len < MISDN_HEADER_LEN)
-		return -EINVAL;
-
-	if (sk->sk_state != MISDN_BOUND)
-		return -EBADFD;
-
-	lock_sock(sk);
-
-	skb = _l2_alloc_skb(len, GFP_KERNEL);
-	if (!skb)
-		goto done;
-
-	if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
-		err = -EFAULT;
-		goto done;
-	}
-
-	memcpy(mISDN_HEAD_P(skb), skb->data, MISDN_HEADER_LEN);
-	skb_pull(skb, MISDN_HEADER_LEN);
-
-	if (msg->msg_namelen >= sizeof(struct sockaddr_mISDN)) {
-		/* if we have a address, we use it */
-		DECLARE_SOCKADDR(struct sockaddr_mISDN *, maddr, msg->msg_name);
-		mISDN_HEAD_ID(skb) = maddr->channel;
-	} else { /* use default for L2 messages */
-		if ((sk->sk_protocol == ISDN_P_LAPD_TE) ||
-		    (sk->sk_protocol == ISDN_P_LAPD_NT))
-			mISDN_HEAD_ID(skb) = _pms(sk)->ch.nr;
-	}
-
-	if (*debug & DEBUG_SOCKET)
-		printk(KERN_DEBUG "%s: ID:%x\n",
-		       __func__, mISDN_HEAD_ID(skb));
-
-	err = -ENODEV;
-	if (!_pms(sk)->ch.peer)
-		goto done;
-	err = _pms(sk)->ch.recv(_pms(sk)->ch.peer, skb);
-	if (err)
-		goto done;
-	else {
-		skb = NULL;
-		err = len;
-	}
-
-done:
-	kfree_skb(skb);
-	release_sock(sk);
-	return err;
-}
-
-static int
-data_sock_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-
-	if (*debug & DEBUG_SOCKET)
-		printk(KERN_DEBUG "%s(%p) sk=%p\n", __func__, sock, sk);
-	if (!sk)
-		return 0;
-	switch (sk->sk_protocol) {
-	case ISDN_P_TE_S0:
-	case ISDN_P_NT_S0:
-	case ISDN_P_TE_E1:
-	case ISDN_P_NT_E1:
-		if (sk->sk_state == MISDN_BOUND)
-			delete_channel(&_pms(sk)->ch);
-		else
-			mISDN_sock_unlink(&data_sockets, sk);
-		break;
-	case ISDN_P_LAPD_TE:
-	case ISDN_P_LAPD_NT:
-	case ISDN_P_B_RAW:
-	case ISDN_P_B_HDLC:
-	case ISDN_P_B_X75SLP:
-	case ISDN_P_B_L2DTMF:
-	case ISDN_P_B_L2DSP:
-	case ISDN_P_B_L2DSPHDLC:
-		delete_channel(&_pms(sk)->ch);
-		mISDN_sock_unlink(&data_sockets, sk);
-		break;
-	}
-
-	lock_sock(sk);
-
-	sock_orphan(sk);
-	skb_queue_purge(&sk->sk_receive_queue);
-
-	release_sock(sk);
-	sock_put(sk);
-
-	return 0;
-}
-
-static int
-data_sock_ioctl_bound(struct sock *sk, unsigned int cmd, void __user *p)
-{
-	struct mISDN_ctrl_req	cq;
-	int			err = -EINVAL, val[2];
-	struct mISDNchannel	*bchan, *next;
-
-	lock_sock(sk);
-	if (!_pms(sk)->dev) {
-		err = -ENODEV;
-		goto done;
-	}
-	switch (cmd) {
-	case IMCTRLREQ:
-		if (copy_from_user(&cq, p, sizeof(cq))) {
-			err = -EFAULT;
-			break;
-		}
-		if ((sk->sk_protocol & ~ISDN_P_B_MASK) == ISDN_P_B_START) {
-			list_for_each_entry_safe(bchan, next,
-						 &_pms(sk)->dev->bchannels, list) {
-				if (bchan->nr == cq.channel) {
-					err = bchan->ctrl(bchan,
-							  CONTROL_CHANNEL, &cq);
-					break;
-				}
-			}
-		} else
-			err = _pms(sk)->dev->D.ctrl(&_pms(sk)->dev->D,
-						    CONTROL_CHANNEL, &cq);
-		if (err)
-			break;
-		if (copy_to_user(p, &cq, sizeof(cq)))
-			err = -EFAULT;
-		break;
-	case IMCLEAR_L2:
-		if (sk->sk_protocol != ISDN_P_LAPD_NT) {
-			err = -EINVAL;
-			break;
-		}
-		val[0] = cmd;
-		if (get_user(val[1], (int __user *)p)) {
-			err = -EFAULT;
-			break;
-		}
-		err = _pms(sk)->dev->teimgr->ctrl(_pms(sk)->dev->teimgr,
-						  CONTROL_CHANNEL, val);
-		break;
-	case IMHOLD_L1:
-		if (sk->sk_protocol != ISDN_P_LAPD_NT
-		    && sk->sk_protocol != ISDN_P_LAPD_TE) {
-			err = -EINVAL;
-			break;
-		}
-		val[0] = cmd;
-		if (get_user(val[1], (int __user *)p)) {
-			err = -EFAULT;
-			break;
-		}
-		err = _pms(sk)->dev->teimgr->ctrl(_pms(sk)->dev->teimgr,
-						  CONTROL_CHANNEL, val);
-		break;
-	default:
-		err = -EINVAL;
-		break;
-	}
-done:
-	release_sock(sk);
-	return err;
-}
-
-static int
-data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
-	int			err = 0, id;
-	struct sock		*sk = sock->sk;
-	struct mISDNdevice	*dev;
-	struct mISDNversion	ver;
-
-	switch (cmd) {
-	case IMGETVERSION:
-		ver.major = MISDN_MAJOR_VERSION;
-		ver.minor = MISDN_MINOR_VERSION;
-		ver.release = MISDN_RELEASE;
-		if (copy_to_user((void __user *)arg, &ver, sizeof(ver)))
-			err = -EFAULT;
-		break;
-	case IMGETCOUNT:
-		id = get_mdevice_count();
-		if (put_user(id, (int __user *)arg))
-			err = -EFAULT;
-		break;
-	case IMGETDEVINFO:
-		if (get_user(id, (int __user *)arg)) {
-			err = -EFAULT;
-			break;
-		}
-		dev = get_mdevice(id);
-		if (dev) {
-			struct mISDN_devinfo di;
-
-			memset(&di, 0, sizeof(di));
-			di.id = dev->id;
-			di.Dprotocols = dev->Dprotocols;
-			di.Bprotocols = dev->Bprotocols | get_all_Bprotocols();
-			di.protocol = dev->D.protocol;
-			memcpy(di.channelmap, dev->channelmap,
-			       sizeof(di.channelmap));
-			di.nrbchan = dev->nrbchan;
-			strscpy(di.name, dev_name(&dev->dev), sizeof(di.name));
-			if (copy_to_user((void __user *)arg, &di, sizeof(di)))
-				err = -EFAULT;
-		} else
-			err = -ENODEV;
-		break;
-	default:
-		if (sk->sk_state == MISDN_BOUND)
-			err = data_sock_ioctl_bound(sk, cmd,
-						    (void __user *)arg);
-		else
-			err = -ENOTCONN;
-	}
-	return err;
-}
-
-static int data_sock_setsockopt(struct socket *sock, int level, int optname,
-				sockptr_t optval, unsigned int optlen)
-{
-	struct sock *sk = sock->sk;
-	int err = 0, opt = 0;
-
-	if (*debug & DEBUG_SOCKET)
-		printk(KERN_DEBUG "%s(%p, %d, %x, optval, %d)\n", __func__, sock,
-		       level, optname, optlen);
-
-	lock_sock(sk);
-
-	switch (optname) {
-	case MISDN_TIME_STAMP:
-		err = copy_safe_from_sockptr(&opt, sizeof(opt),
-					     optval, optlen);
-		if (err)
-			break;
-
-		if (opt)
-			_pms(sk)->cmask |= MISDN_TIME_STAMP;
-		else
-			_pms(sk)->cmask &= ~MISDN_TIME_STAMP;
-		break;
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-	release_sock(sk);
-	return err;
-}
-
-static int data_sock_getsockopt(struct socket *sock, int level, int optname,
-				char __user *optval, int __user *optlen)
-{
-	struct sock *sk = sock->sk;
-	int len, opt;
-
-	if (get_user(len, optlen))
-		return -EFAULT;
-
-	if (len != sizeof(char))
-		return -EINVAL;
-
-	switch (optname) {
-	case MISDN_TIME_STAMP:
-		if (_pms(sk)->cmask & MISDN_TIME_STAMP)
-			opt = 1;
-		else
-			opt = 0;
-
-		if (put_user(opt, optval))
-			return -EFAULT;
-		break;
-	default:
-		return -ENOPROTOOPT;
-	}
-
-	return 0;
-}
-
-static int
-data_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len)
-{
-	struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr;
-	struct sock *sk = sock->sk;
-	struct sock *csk;
-	int err = 0;
-
-	if (*debug & DEBUG_SOCKET)
-		printk(KERN_DEBUG "%s(%p) sk=%p\n", __func__, sock, sk);
-	if (addr_len != sizeof(struct sockaddr_mISDN))
-		return -EINVAL;
-	if (!maddr || maddr->family != AF_ISDN)
-		return -EINVAL;
-
-	lock_sock(sk);
-
-	if (_pms(sk)->dev) {
-		err = -EALREADY;
-		goto done;
-	}
-	_pms(sk)->dev = get_mdevice(maddr->dev);
-	if (!_pms(sk)->dev) {
-		err = -ENODEV;
-		goto done;
-	}
-
-	if (sk->sk_protocol < ISDN_P_B_START) {
-		read_lock_bh(&data_sockets.lock);
-		sk_for_each(csk, &data_sockets.head) {
-			if (sk == csk)
-				continue;
-			if (_pms(csk)->dev != _pms(sk)->dev)
-				continue;
-			if (csk->sk_protocol >= ISDN_P_B_START)
-				continue;
-			if (IS_ISDN_P_TE(csk->sk_protocol)
-			    == IS_ISDN_P_TE(sk->sk_protocol))
-				continue;
-			read_unlock_bh(&data_sockets.lock);
-			err = -EBUSY;
-			goto done;
-		}
-		read_unlock_bh(&data_sockets.lock);
-	}
-
-	_pms(sk)->ch.send = mISDN_send;
-	_pms(sk)->ch.ctrl = mISDN_ctrl;
-
-	switch (sk->sk_protocol) {
-	case ISDN_P_TE_S0:
-	case ISDN_P_NT_S0:
-	case ISDN_P_TE_E1:
-	case ISDN_P_NT_E1:
-		mISDN_sock_unlink(&data_sockets, sk);
-		err = connect_layer1(_pms(sk)->dev, &_pms(sk)->ch,
-				     sk->sk_protocol, maddr);
-		if (err)
-			mISDN_sock_link(&data_sockets, sk);
-		break;
-	case ISDN_P_LAPD_TE:
-	case ISDN_P_LAPD_NT:
-		err = create_l2entity(_pms(sk)->dev, &_pms(sk)->ch,
-				      sk->sk_protocol, maddr);
-		break;
-	case ISDN_P_B_RAW:
-	case ISDN_P_B_HDLC:
-	case ISDN_P_B_X75SLP:
-	case ISDN_P_B_L2DTMF:
-	case ISDN_P_B_L2DSP:
-	case ISDN_P_B_L2DSPHDLC:
-		err = connect_Bstack(_pms(sk)->dev, &_pms(sk)->ch,
-				     sk->sk_protocol, maddr);
-		break;
-	default:
-		err = -EPROTONOSUPPORT;
-	}
-	if (err)
-		goto done;
-	sk->sk_state = MISDN_BOUND;
-	_pms(sk)->ch.protocol = sk->sk_protocol;
-
-done:
-	release_sock(sk);
-	return err;
-}
-
-static int
-data_sock_getname(struct socket *sock, struct sockaddr *addr,
-		  int peer)
-{
-	struct sockaddr_mISDN	*maddr = (struct sockaddr_mISDN *) addr;
-	struct sock		*sk = sock->sk;
-
-	if (!_pms(sk)->dev)
-		return -EBADFD;
-
-	lock_sock(sk);
-
-	maddr->family = AF_ISDN;
-	maddr->dev = _pms(sk)->dev->id;
-	maddr->channel = _pms(sk)->ch.nr;
-	maddr->sapi = _pms(sk)->ch.addr & 0xff;
-	maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xff;
-	release_sock(sk);
-	return sizeof(*maddr);
-}
-
-static const struct proto_ops data_sock_ops = {
-	.family		= PF_ISDN,
-	.owner		= THIS_MODULE,
-	.release	= data_sock_release,
-	.ioctl		= data_sock_ioctl,
-	.bind		= data_sock_bind,
-	.getname	= data_sock_getname,
-	.sendmsg	= mISDN_sock_sendmsg,
-	.recvmsg	= mISDN_sock_recvmsg,
-	.poll		= datagram_poll,
-	.listen		= sock_no_listen,
-	.shutdown	= sock_no_shutdown,
-	.setsockopt	= data_sock_setsockopt,
-	.getsockopt	= data_sock_getsockopt,
-	.connect	= sock_no_connect,
-	.socketpair	= sock_no_socketpair,
-	.accept		= sock_no_accept,
-	.mmap		= sock_no_mmap
-};
-
-static int
-data_sock_create(struct net *net, struct socket *sock, int protocol, int kern)
-{
-	struct sock *sk;
-
-	if (sock->type != SOCK_DGRAM)
-		return -ESOCKTNOSUPPORT;
-
-	sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto, kern);
-	if (!sk)
-		return -ENOMEM;
-
-	sock_init_data(sock, sk);
-
-	sock->ops = &data_sock_ops;
-	sock->state = SS_UNCONNECTED;
-	sock_reset_flag(sk, SOCK_ZAPPED);
-
-	sk->sk_protocol = protocol;
-	sk->sk_state    = MISDN_OPEN;
-	mISDN_sock_link(&data_sockets, sk);
-
-	return 0;
-}
-
-static int
-base_sock_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-
-	printk(KERN_DEBUG "%s(%p) sk=%p\n", __func__, sock, sk);
-	if (!sk)
-		return 0;
-
-	mISDN_sock_unlink(&base_sockets, sk);
-	sock_orphan(sk);
-	sock_put(sk);
-
-	return 0;
-}
-
-static int
-base_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
-	int			err = 0, id;
-	struct mISDNdevice	*dev;
-	struct mISDNversion	ver;
-
-	switch (cmd) {
-	case IMGETVERSION:
-		ver.major = MISDN_MAJOR_VERSION;
-		ver.minor = MISDN_MINOR_VERSION;
-		ver.release = MISDN_RELEASE;
-		if (copy_to_user((void __user *)arg, &ver, sizeof(ver)))
-			err = -EFAULT;
-		break;
-	case IMGETCOUNT:
-		id = get_mdevice_count();
-		if (put_user(id, (int __user *)arg))
-			err = -EFAULT;
-		break;
-	case IMGETDEVINFO:
-		if (get_user(id, (int __user *)arg)) {
-			err = -EFAULT;
-			break;
-		}
-		dev = get_mdevice(id);
-		if (dev) {
-			struct mISDN_devinfo di;
-
-			memset(&di, 0, sizeof(di));
-			di.id = dev->id;
-			di.Dprotocols = dev->Dprotocols;
-			di.Bprotocols = dev->Bprotocols | get_all_Bprotocols();
-			di.protocol = dev->D.protocol;
-			memcpy(di.channelmap, dev->channelmap,
-			       sizeof(di.channelmap));
-			di.nrbchan = dev->nrbchan;
-			strscpy(di.name, dev_name(&dev->dev), sizeof(di.name));
-			if (copy_to_user((void __user *)arg, &di, sizeof(di)))
-				err = -EFAULT;
-		} else
-			err = -ENODEV;
-		break;
-	case IMSETDEVNAME:
-	{
-		struct mISDN_devrename dn;
-		if (copy_from_user(&dn, (void __user *)arg,
-				   sizeof(dn))) {
-			err = -EFAULT;
-			break;
-		}
-		dn.name[sizeof(dn.name) - 1] = '\0';
-		dev = get_mdevice(dn.id);
-		if (dev)
-			err = device_rename(&dev->dev, dn.name);
-		else
-			err = -ENODEV;
-	}
-	break;
-	default:
-		err = -EINVAL;
-	}
-	return err;
-}
-
-static int
-base_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len)
-{
-	struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr;
-	struct sock *sk = sock->sk;
-	int err = 0;
-
-	if (addr_len < sizeof(struct sockaddr_mISDN))
-		return -EINVAL;
-
-	if (!maddr || maddr->family != AF_ISDN)
-		return -EINVAL;
-
-	lock_sock(sk);
-
-	if (_pms(sk)->dev) {
-		err = -EALREADY;
-		goto done;
-	}
-
-	_pms(sk)->dev = get_mdevice(maddr->dev);
-	if (!_pms(sk)->dev) {
-		err = -ENODEV;
-		goto done;
-	}
-	sk->sk_state = MISDN_BOUND;
-
-done:
-	release_sock(sk);
-	return err;
-}
-
-static const struct proto_ops base_sock_ops = {
-	.family		= PF_ISDN,
-	.owner		= THIS_MODULE,
-	.release	= base_sock_release,
-	.ioctl		= base_sock_ioctl,
-	.bind		= base_sock_bind,
-	.getname	= sock_no_getname,
-	.sendmsg	= sock_no_sendmsg,
-	.recvmsg	= sock_no_recvmsg,
-	.listen		= sock_no_listen,
-	.shutdown	= sock_no_shutdown,
-	.connect	= sock_no_connect,
-	.socketpair	= sock_no_socketpair,
-	.accept		= sock_no_accept,
-	.mmap		= sock_no_mmap
-};
-
-
-static int
-base_sock_create(struct net *net, struct socket *sock, int protocol, int kern)
-{
-	struct sock *sk;
-
-	if (sock->type != SOCK_RAW)
-		return -ESOCKTNOSUPPORT;
-	if (!capable(CAP_NET_RAW))
-		return -EPERM;
-
-	sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto, kern);
-	if (!sk)
-		return -ENOMEM;
-
-	sock_init_data(sock, sk);
-	sock->ops = &base_sock_ops;
-	sock->state = SS_UNCONNECTED;
-	sock_reset_flag(sk, SOCK_ZAPPED);
-	sk->sk_protocol = protocol;
-	sk->sk_state    = MISDN_OPEN;
-	mISDN_sock_link(&base_sockets, sk);
-
-	return 0;
-}
-
-static int
-mISDN_sock_create(struct net *net, struct socket *sock, int proto, int kern)
-{
-	int err = -EPROTONOSUPPORT;
-
-	switch (proto) {
-	case ISDN_P_BASE:
-		err = base_sock_create(net, sock, proto, kern);
-		break;
-	case ISDN_P_TE_S0:
-	case ISDN_P_NT_S0:
-	case ISDN_P_TE_E1:
-	case ISDN_P_NT_E1:
-	case ISDN_P_LAPD_TE:
-	case ISDN_P_LAPD_NT:
-	case ISDN_P_B_RAW:
-	case ISDN_P_B_HDLC:
-	case ISDN_P_B_X75SLP:
-	case ISDN_P_B_L2DTMF:
-	case ISDN_P_B_L2DSP:
-	case ISDN_P_B_L2DSPHDLC:
-		err = data_sock_create(net, sock, proto, kern);
-		break;
-	default:
-		return err;
-	}
-
-	return err;
-}
-
-static const struct net_proto_family mISDN_sock_family_ops = {
-	.owner  = THIS_MODULE,
-	.family = PF_ISDN,
-	.create = mISDN_sock_create,
-};
-
-int
-misdn_sock_init(u_int *deb)
-{
-	int err;
-
-	debug = deb;
-	err = sock_register(&mISDN_sock_family_ops);
-	if (err)
-		printk(KERN_ERR "%s: error(%d)\n", __func__, err);
-	return err;
-}
-
-void
-misdn_sock_cleanup(void)
-{
-	sock_unregister(PF_ISDN);
-}
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
deleted file mode 100644
index 4e96684af0aa..000000000000
--- a/drivers/isdn/mISDN/stack.c
+++ /dev/null
@@ -1,654 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *
- * Author	Karsten Keil <kkeil@novell.com>
- *
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- */
-
-#include <linux/slab.h>
-#include <linux/mISDNif.h>
-#include <linux/kthread.h>
-#include <linux/sched.h>
-#include <linux/sched/cputime.h>
-#include <linux/signal.h>
-
-#include "core.h"
-
-static u_int	*debug;
-
-static inline void
-_queue_message(struct mISDNstack *st, struct sk_buff *skb)
-{
-	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
-
-	if (*debug & DEBUG_QUEUE_FUNC)
-		printk(KERN_DEBUG "%s prim(%x) id(%x) %p\n",
-		       __func__, hh->prim, hh->id, skb);
-	skb_queue_tail(&st->msgq, skb);
-	if (likely(!test_bit(mISDN_STACK_STOPPED, &st->status))) {
-		test_and_set_bit(mISDN_STACK_WORK, &st->status);
-		wake_up_interruptible(&st->workq);
-	}
-}
-
-static int
-mISDN_queue_message(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	_queue_message(ch->st, skb);
-	return 0;
-}
-
-static struct mISDNchannel *
-get_channel4id(struct mISDNstack *st, u_int id)
-{
-	struct mISDNchannel	*ch;
-
-	mutex_lock(&st->lmutex);
-	list_for_each_entry(ch, &st->layer2, list) {
-		if (id == ch->nr)
-			goto unlock;
-	}
-	ch = NULL;
-unlock:
-	mutex_unlock(&st->lmutex);
-	return ch;
-}
-
-static void
-send_socklist(struct mISDN_sock_list *sl, struct sk_buff *skb)
-{
-	struct sock		*sk;
-	struct sk_buff		*cskb = NULL;
-
-	read_lock(&sl->lock);
-	sk_for_each(sk, &sl->head) {
-		if (sk->sk_state != MISDN_BOUND)
-			continue;
-		if (!cskb)
-			cskb = skb_copy(skb, GFP_ATOMIC);
-		if (!cskb) {
-			printk(KERN_WARNING "%s no skb\n", __func__);
-			break;
-		}
-		if (!sock_queue_rcv_skb(sk, cskb))
-			cskb = NULL;
-	}
-	read_unlock(&sl->lock);
-	dev_kfree_skb(cskb);
-}
-
-static void
-send_layer2(struct mISDNstack *st, struct sk_buff *skb)
-{
-	struct sk_buff		*cskb;
-	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
-	struct mISDNchannel	*ch;
-	int			ret;
-
-	if (!st)
-		return;
-	mutex_lock(&st->lmutex);
-	if ((hh->id & MISDN_ID_ADDR_MASK) == MISDN_ID_ANY) { /* L2 for all */
-		list_for_each_entry(ch, &st->layer2, list) {
-			if (list_is_last(&ch->list, &st->layer2)) {
-				cskb = skb;
-				skb = NULL;
-			} else {
-				cskb = skb_copy(skb, GFP_KERNEL);
-			}
-			if (cskb) {
-				ret = ch->send(ch, cskb);
-				if (ret) {
-					if (*debug & DEBUG_SEND_ERR)
-						printk(KERN_DEBUG
-						       "%s ch%d prim(%x) addr(%x)"
-						       " err %d\n",
-						       __func__, ch->nr,
-						       hh->prim, ch->addr, ret);
-					dev_kfree_skb(cskb);
-				}
-			} else {
-				printk(KERN_WARNING "%s ch%d addr %x no mem\n",
-				       __func__, ch->nr, ch->addr);
-				goto out;
-			}
-		}
-	} else {
-		list_for_each_entry(ch, &st->layer2, list) {
-			if ((hh->id & MISDN_ID_ADDR_MASK) == ch->addr) {
-				ret = ch->send(ch, skb);
-				if (!ret)
-					skb = NULL;
-				goto out;
-			}
-		}
-		ret = st->dev->teimgr->ctrl(st->dev->teimgr, CHECK_DATA, skb);
-		if (!ret)
-			skb = NULL;
-		else if (*debug & DEBUG_SEND_ERR)
-			printk(KERN_DEBUG
-			       "%s mgr prim(%x) err %d\n",
-			       __func__, hh->prim, ret);
-	}
-out:
-	mutex_unlock(&st->lmutex);
-	dev_kfree_skb(skb);
-}
-
-static inline int
-send_msg_to_layer(struct mISDNstack *st, struct sk_buff *skb)
-{
-	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
-	struct mISDNchannel	*ch;
-	int	lm;
-
-	lm = hh->prim & MISDN_LAYERMASK;
-	if (*debug & DEBUG_QUEUE_FUNC)
-		printk(KERN_DEBUG "%s prim(%x) id(%x) %p\n",
-		       __func__, hh->prim, hh->id, skb);
-	if (lm == 0x1) {
-		if (!hlist_empty(&st->l1sock.head)) {
-			__net_timestamp(skb);
-			send_socklist(&st->l1sock, skb);
-		}
-		return st->layer1->send(st->layer1, skb);
-	} else if (lm == 0x2) {
-		if (!hlist_empty(&st->l1sock.head))
-			send_socklist(&st->l1sock, skb);
-		send_layer2(st, skb);
-		return 0;
-	} else if (lm == 0x4) {
-		ch = get_channel4id(st, hh->id);
-		if (ch)
-			return ch->send(ch, skb);
-		else
-			printk(KERN_WARNING
-			       "%s: dev(%s) prim(%x) id(%x) no channel\n",
-			       __func__, dev_name(&st->dev->dev), hh->prim,
-			       hh->id);
-	} else if (lm == 0x8) {
-		WARN_ON(lm == 0x8);
-		ch = get_channel4id(st, hh->id);
-		if (ch)
-			return ch->send(ch, skb);
-		else
-			printk(KERN_WARNING
-			       "%s: dev(%s) prim(%x) id(%x) no channel\n",
-			       __func__, dev_name(&st->dev->dev), hh->prim,
-			       hh->id);
-	} else {
-		/* broadcast not handled yet */
-		printk(KERN_WARNING "%s: dev(%s) prim %x not delivered\n",
-		       __func__, dev_name(&st->dev->dev), hh->prim);
-	}
-	return -ESRCH;
-}
-
-static void
-do_clear_stack(struct mISDNstack *st)
-{
-}
-
-static int
-mISDNStackd(void *data)
-{
-	struct mISDNstack *st = data;
-#ifdef MISDN_MSG_STATS
-	u64 utime, stime;
-#endif
-	int err = 0;
-
-	sigfillset(&current->blocked);
-	if (*debug & DEBUG_MSG_THREAD)
-		printk(KERN_DEBUG "mISDNStackd %s started\n",
-		       dev_name(&st->dev->dev));
-
-	if (st->notify != NULL) {
-		complete(st->notify);
-		st->notify = NULL;
-	}
-
-	for (;;) {
-		struct sk_buff	*skb;
-
-		if (unlikely(test_bit(mISDN_STACK_STOPPED, &st->status))) {
-			test_and_clear_bit(mISDN_STACK_WORK, &st->status);
-			test_and_clear_bit(mISDN_STACK_RUNNING, &st->status);
-		} else
-			test_and_set_bit(mISDN_STACK_RUNNING, &st->status);
-		while (test_bit(mISDN_STACK_WORK, &st->status)) {
-			skb = skb_dequeue(&st->msgq);
-			if (!skb) {
-				test_and_clear_bit(mISDN_STACK_WORK,
-						   &st->status);
-				/* test if a race happens */
-				skb = skb_dequeue(&st->msgq);
-				if (!skb)
-					continue;
-				test_and_set_bit(mISDN_STACK_WORK,
-						 &st->status);
-			}
-#ifdef MISDN_MSG_STATS
-			st->msg_cnt++;
-#endif
-			err = send_msg_to_layer(st, skb);
-			if (unlikely(err)) {
-				if (*debug & DEBUG_SEND_ERR)
-					printk(KERN_DEBUG
-					       "%s: %s prim(%x) id(%x) "
-					       "send call(%d)\n",
-					       __func__, dev_name(&st->dev->dev),
-					       mISDN_HEAD_PRIM(skb),
-					       mISDN_HEAD_ID(skb), err);
-				dev_kfree_skb(skb);
-				continue;
-			}
-			if (unlikely(test_bit(mISDN_STACK_STOPPED,
-					      &st->status))) {
-				test_and_clear_bit(mISDN_STACK_WORK,
-						   &st->status);
-				test_and_clear_bit(mISDN_STACK_RUNNING,
-						   &st->status);
-				break;
-			}
-		}
-		if (test_bit(mISDN_STACK_CLEARING, &st->status)) {
-			test_and_set_bit(mISDN_STACK_STOPPED, &st->status);
-			test_and_clear_bit(mISDN_STACK_RUNNING, &st->status);
-			do_clear_stack(st);
-			test_and_clear_bit(mISDN_STACK_CLEARING, &st->status);
-			test_and_set_bit(mISDN_STACK_RESTART, &st->status);
-		}
-		if (test_and_clear_bit(mISDN_STACK_RESTART, &st->status)) {
-			test_and_clear_bit(mISDN_STACK_STOPPED, &st->status);
-			test_and_set_bit(mISDN_STACK_RUNNING, &st->status);
-			if (!skb_queue_empty(&st->msgq))
-				test_and_set_bit(mISDN_STACK_WORK,
-						 &st->status);
-		}
-		if (test_bit(mISDN_STACK_ABORT, &st->status))
-			break;
-		if (st->notify != NULL) {
-			complete(st->notify);
-			st->notify = NULL;
-		}
-#ifdef MISDN_MSG_STATS
-		st->sleep_cnt++;
-#endif
-		test_and_clear_bit(mISDN_STACK_ACTIVE, &st->status);
-		wait_event_interruptible(st->workq, (st->status &
-						     mISDN_STACK_ACTION_MASK));
-		if (*debug & DEBUG_MSG_THREAD)
-			printk(KERN_DEBUG "%s: %s wake status %08lx\n",
-			       __func__, dev_name(&st->dev->dev), st->status);
-		test_and_set_bit(mISDN_STACK_ACTIVE, &st->status);
-
-		test_and_clear_bit(mISDN_STACK_WAKEUP, &st->status);
-
-		if (test_bit(mISDN_STACK_STOPPED, &st->status)) {
-			test_and_clear_bit(mISDN_STACK_RUNNING, &st->status);
-#ifdef MISDN_MSG_STATS
-			st->stopped_cnt++;
-#endif
-		}
-	}
-#ifdef MISDN_MSG_STATS
-	printk(KERN_DEBUG "mISDNStackd daemon for %s proceed %d "
-	       "msg %d sleep %d stopped\n",
-	       dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt,
-	       st->stopped_cnt);
-	task_cputime(st->thread, &utime, &stime);
-	printk(KERN_DEBUG
-	       "mISDNStackd daemon for %s utime(%llu) stime(%llu)\n",
-	       dev_name(&st->dev->dev), utime, stime);
-	printk(KERN_DEBUG
-	       "mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n",
-	       dev_name(&st->dev->dev), st->thread->nvcsw, st->thread->nivcsw);
-	printk(KERN_DEBUG "mISDNStackd daemon for %s killed now\n",
-	       dev_name(&st->dev->dev));
-#endif
-	test_and_set_bit(mISDN_STACK_KILLED, &st->status);
-	test_and_clear_bit(mISDN_STACK_RUNNING, &st->status);
-	test_and_clear_bit(mISDN_STACK_ACTIVE, &st->status);
-	test_and_clear_bit(mISDN_STACK_ABORT, &st->status);
-	skb_queue_purge(&st->msgq);
-	st->thread = NULL;
-	if (st->notify != NULL) {
-		complete(st->notify);
-		st->notify = NULL;
-	}
-	return 0;
-}
-
-static int
-l1_receive(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	if (!ch->st)
-		return -ENODEV;
-	__net_timestamp(skb);
-	_queue_message(ch->st, skb);
-	return 0;
-}
-
-void
-set_channel_address(struct mISDNchannel *ch, u_int sapi, u_int tei)
-{
-	ch->addr = sapi | (tei << 8);
-}
-
-void
-__add_layer2(struct mISDNchannel *ch, struct mISDNstack *st)
-{
-	list_add_tail(&ch->list, &st->layer2);
-}
-
-void
-add_layer2(struct mISDNchannel *ch, struct mISDNstack *st)
-{
-	mutex_lock(&st->lmutex);
-	__add_layer2(ch, st);
-	mutex_unlock(&st->lmutex);
-}
-
-static int
-st_own_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
-{
-	if (!ch->st || !ch->st->layer1)
-		return -EINVAL;
-	return ch->st->layer1->ctrl(ch->st->layer1, cmd, arg);
-}
-
-int
-create_stack(struct mISDNdevice *dev)
-{
-	struct mISDNstack	*newst;
-	int			err;
-	DECLARE_COMPLETION_ONSTACK(done);
-
-	newst = kzalloc_obj(struct mISDNstack);
-	if (!newst) {
-		printk(KERN_ERR "kmalloc mISDN_stack failed\n");
-		return -ENOMEM;
-	}
-	newst->dev = dev;
-	INIT_LIST_HEAD(&newst->layer2);
-	INIT_HLIST_HEAD(&newst->l1sock.head);
-	rwlock_init(&newst->l1sock.lock);
-	init_waitqueue_head(&newst->workq);
-	skb_queue_head_init(&newst->msgq);
-	mutex_init(&newst->lmutex);
-	dev->D.st = newst;
-	err = create_teimanager(dev);
-	if (err) {
-		printk(KERN_ERR "kmalloc teimanager failed\n");
-		kfree(newst);
-		return err;
-	}
-	dev->teimgr->peer = &newst->own;
-	dev->teimgr->recv = mISDN_queue_message;
-	dev->teimgr->st = newst;
-	newst->layer1 = &dev->D;
-	dev->D.recv = l1_receive;
-	dev->D.peer = &newst->own;
-	newst->own.st = newst;
-	newst->own.ctrl = st_own_ctrl;
-	newst->own.send = mISDN_queue_message;
-	newst->own.recv = mISDN_queue_message;
-	if (*debug & DEBUG_CORE_FUNC)
-		printk(KERN_DEBUG "%s: st(%s)\n", __func__,
-		       dev_name(&newst->dev->dev));
-	newst->notify = &done;
-	newst->thread = kthread_run(mISDNStackd, (void *)newst, "mISDN_%s",
-				    dev_name(&newst->dev->dev));
-	if (IS_ERR(newst->thread)) {
-		err = PTR_ERR(newst->thread);
-		printk(KERN_ERR
-		       "mISDN:cannot create kernel thread for %s (%d)\n",
-		       dev_name(&newst->dev->dev), err);
-		delete_teimanager(dev->teimgr);
-		kfree(newst);
-	} else
-		wait_for_completion(&done);
-	return err;
-}
-
-int
-connect_layer1(struct mISDNdevice *dev, struct mISDNchannel *ch,
-	       u_int protocol, struct sockaddr_mISDN *adr)
-{
-	struct mISDN_sock	*msk = container_of(ch, struct mISDN_sock, ch);
-	struct channel_req	rq;
-	int			err;
-
-
-	if (*debug &  DEBUG_CORE_FUNC)
-		printk(KERN_DEBUG "%s: %s proto(%x) adr(%d %d %d %d)\n",
-		       __func__, dev_name(&dev->dev), protocol, adr->dev,
-		       adr->channel, adr->sapi, adr->tei);
-	switch (protocol) {
-	case ISDN_P_NT_S0:
-	case ISDN_P_NT_E1:
-	case ISDN_P_TE_S0:
-	case ISDN_P_TE_E1:
-		ch->recv = mISDN_queue_message;
-		ch->peer = &dev->D.st->own;
-		ch->st = dev->D.st;
-		rq.protocol = protocol;
-		rq.adr.channel = adr->channel;
-		err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq);
-		printk(KERN_DEBUG "%s: ret %d (dev %d)\n", __func__, err,
-		       dev->id);
-		if (err)
-			return err;
-		write_lock_bh(&dev->D.st->l1sock.lock);
-		sk_add_node(&msk->sk, &dev->D.st->l1sock.head);
-		write_unlock_bh(&dev->D.st->l1sock.lock);
-		break;
-	default:
-		return -ENOPROTOOPT;
-	}
-	return 0;
-}
-
-int
-connect_Bstack(struct mISDNdevice *dev, struct mISDNchannel *ch,
-	       u_int protocol, struct sockaddr_mISDN *adr)
-{
-	struct channel_req	rq, rq2;
-	int			pmask, err;
-	struct Bprotocol	*bp;
-
-	if (*debug &  DEBUG_CORE_FUNC)
-		printk(KERN_DEBUG "%s: %s proto(%x) adr(%d %d %d %d)\n",
-		       __func__, dev_name(&dev->dev), protocol,
-		       adr->dev, adr->channel, adr->sapi,
-		       adr->tei);
-	ch->st = dev->D.st;
-	pmask = 1 << (protocol & ISDN_P_B_MASK);
-	if (pmask & dev->Bprotocols) {
-		rq.protocol = protocol;
-		rq.adr = *adr;
-		err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq);
-		if (err)
-			return err;
-		ch->recv = rq.ch->send;
-		ch->peer = rq.ch;
-		rq.ch->recv = ch->send;
-		rq.ch->peer = ch;
-		rq.ch->st = dev->D.st;
-	} else {
-		bp = get_Bprotocol4mask(pmask);
-		if (!bp)
-			return -ENOPROTOOPT;
-		rq2.protocol = protocol;
-		rq2.adr = *adr;
-		rq2.ch = ch;
-		err = bp->create(&rq2);
-		if (err)
-			return err;
-		ch->recv = rq2.ch->send;
-		ch->peer = rq2.ch;
-		rq2.ch->st = dev->D.st;
-		rq.protocol = rq2.protocol;
-		rq.adr = *adr;
-		err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq);
-		if (err) {
-			rq2.ch->ctrl(rq2.ch, CLOSE_CHANNEL, NULL);
-			return err;
-		}
-		rq2.ch->recv = rq.ch->send;
-		rq2.ch->peer = rq.ch;
-		rq.ch->recv = rq2.ch->send;
-		rq.ch->peer = rq2.ch;
-		rq.ch->st = dev->D.st;
-	}
-	ch->protocol = protocol;
-	ch->nr = rq.ch->nr;
-	return 0;
-}
-
-int
-create_l2entity(struct mISDNdevice *dev, struct mISDNchannel *ch,
-		u_int protocol, struct sockaddr_mISDN *adr)
-{
-	struct channel_req	rq;
-	int			err;
-
-	if (*debug &  DEBUG_CORE_FUNC)
-		printk(KERN_DEBUG "%s: %s proto(%x) adr(%d %d %d %d)\n",
-		       __func__, dev_name(&dev->dev), protocol,
-		       adr->dev, adr->channel, adr->sapi,
-		       adr->tei);
-	rq.protocol = ISDN_P_TE_S0;
-	if (dev->Dprotocols & (1 << ISDN_P_TE_E1))
-		rq.protocol = ISDN_P_TE_E1;
-	switch (protocol) {
-	case ISDN_P_LAPD_NT:
-		rq.protocol = ISDN_P_NT_S0;
-		if (dev->Dprotocols & (1 << ISDN_P_NT_E1))
-			rq.protocol = ISDN_P_NT_E1;
-		fallthrough;
-	case ISDN_P_LAPD_TE:
-		ch->recv = mISDN_queue_message;
-		ch->peer = &dev->D.st->own;
-		ch->st = dev->D.st;
-		rq.adr.channel = 0;
-		err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq);
-		printk(KERN_DEBUG "%s: ret 1 %d\n", __func__, err);
-		if (err)
-			break;
-		rq.protocol = protocol;
-		rq.adr = *adr;
-		rq.ch = ch;
-		err = dev->teimgr->ctrl(dev->teimgr, OPEN_CHANNEL, &rq);
-		printk(KERN_DEBUG "%s: ret 2 %d\n", __func__, err);
-		if (!err) {
-			if ((protocol == ISDN_P_LAPD_NT) && !rq.ch)
-				break;
-			add_layer2(rq.ch, dev->D.st);
-			rq.ch->recv = mISDN_queue_message;
-			rq.ch->peer = &dev->D.st->own;
-			rq.ch->ctrl(rq.ch, OPEN_CHANNEL, NULL); /* can't fail */
-		}
-		break;
-	default:
-		err = -EPROTONOSUPPORT;
-	}
-	return err;
-}
-
-void
-delete_channel(struct mISDNchannel *ch)
-{
-	struct mISDN_sock	*msk = container_of(ch, struct mISDN_sock, ch);
-	struct mISDNchannel	*pch;
-
-	if (!ch->st) {
-		printk(KERN_WARNING "%s: no stack\n", __func__);
-		return;
-	}
-	if (*debug & DEBUG_CORE_FUNC)
-		printk(KERN_DEBUG "%s: st(%s) protocol(%x)\n", __func__,
-		       dev_name(&ch->st->dev->dev), ch->protocol);
-	if (ch->protocol >= ISDN_P_B_START) {
-		if (ch->peer) {
-			ch->peer->ctrl(ch->peer, CLOSE_CHANNEL, NULL);
-			ch->peer = NULL;
-		}
-		return;
-	}
-	switch (ch->protocol) {
-	case ISDN_P_NT_S0:
-	case ISDN_P_TE_S0:
-	case ISDN_P_NT_E1:
-	case ISDN_P_TE_E1:
-		write_lock_bh(&ch->st->l1sock.lock);
-		sk_del_node_init(&msk->sk);
-		write_unlock_bh(&ch->st->l1sock.lock);
-		ch->st->dev->D.ctrl(&ch->st->dev->D, CLOSE_CHANNEL, NULL);
-		break;
-	case ISDN_P_LAPD_TE:
-		pch = get_channel4id(ch->st, ch->nr);
-		if (pch) {
-			mutex_lock(&ch->st->lmutex);
-			list_del(&pch->list);
-			mutex_unlock(&ch->st->lmutex);
-			pch->ctrl(pch, CLOSE_CHANNEL, NULL);
-			pch = ch->st->dev->teimgr;
-			pch->ctrl(pch, CLOSE_CHANNEL, NULL);
-		} else
-			printk(KERN_WARNING "%s: no l2 channel\n",
-			       __func__);
-		break;
-	case ISDN_P_LAPD_NT:
-		pch = ch->st->dev->teimgr;
-		if (pch) {
-			pch->ctrl(pch, CLOSE_CHANNEL, NULL);
-		} else
-			printk(KERN_WARNING "%s: no l2 channel\n",
-			       __func__);
-		break;
-	default:
-		break;
-	}
-	return;
-}
-
-void
-delete_stack(struct mISDNdevice *dev)
-{
-	struct mISDNstack	*st = dev->D.st;
-	DECLARE_COMPLETION_ONSTACK(done);
-
-	if (*debug & DEBUG_CORE_FUNC)
-		printk(KERN_DEBUG "%s: st(%s)\n", __func__,
-		       dev_name(&st->dev->dev));
-	if (dev->teimgr)
-		delete_teimanager(dev->teimgr);
-	if (st->thread) {
-		if (st->notify) {
-			printk(KERN_WARNING "%s: notifier in use\n",
-			       __func__);
-			complete(st->notify);
-		}
-		st->notify = &done;
-		test_and_set_bit(mISDN_STACK_ABORT, &st->status);
-		test_and_set_bit(mISDN_STACK_WAKEUP, &st->status);
-		wake_up_interruptible(&st->workq);
-		wait_for_completion(&done);
-	}
-	if (!list_empty(&st->layer2))
-		printk(KERN_WARNING "%s: layer2 list not empty\n",
-		       __func__);
-	if (!hlist_empty(&st->l1sock.head))
-		printk(KERN_WARNING "%s: layer1 list not empty\n",
-		       __func__);
-	kfree(st);
-}
-
-void
-mISDN_initstack(u_int *dp)
-{
-	debug = dp;
-}
diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c
deleted file mode 100644
index 2bad3083be90..000000000000
--- a/drivers/isdn/mISDN/tei.c
+++ /dev/null
@@ -1,1416 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *
- * Author	Karsten Keil <kkeil@novell.com>
- *
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- */
-#include "layer2.h"
-#include <linux/random.h>
-#include <linux/slab.h>
-#include "core.h"
-
-#define ID_REQUEST	1
-#define ID_ASSIGNED	2
-#define ID_DENIED	3
-#define ID_CHK_REQ	4
-#define ID_CHK_RES	5
-#define ID_REMOVE	6
-#define ID_VERIFY	7
-
-#define TEI_ENTITY_ID	0xf
-
-#define MGR_PH_ACTIVE	16
-#define MGR_PH_NOTREADY	17
-
-#define DATIMER_VAL	10000
-
-static	u_int	*debug;
-
-static struct Fsm deactfsm = {NULL, 0, 0, NULL, NULL};
-static struct Fsm teifsmu = {NULL, 0, 0, NULL, NULL};
-static struct Fsm teifsmn = {NULL, 0, 0, NULL, NULL};
-
-enum {
-	ST_L1_DEACT,
-	ST_L1_DEACT_PENDING,
-	ST_L1_ACTIV,
-};
-#define DEACT_STATE_COUNT (ST_L1_ACTIV + 1)
-
-static char *strDeactState[] =
-{
-	"ST_L1_DEACT",
-	"ST_L1_DEACT_PENDING",
-	"ST_L1_ACTIV",
-};
-
-enum {
-	EV_ACTIVATE,
-	EV_ACTIVATE_IND,
-	EV_DEACTIVATE,
-	EV_DEACTIVATE_IND,
-	EV_UI,
-	EV_DATIMER,
-};
-
-#define DEACT_EVENT_COUNT (EV_DATIMER + 1)
-
-static char *strDeactEvent[] =
-{
-	"EV_ACTIVATE",
-	"EV_ACTIVATE_IND",
-	"EV_DEACTIVATE",
-	"EV_DEACTIVATE_IND",
-	"EV_UI",
-	"EV_DATIMER",
-};
-
-static void
-da_debug(struct FsmInst *fi, char *fmt, ...)
-{
-	struct manager	*mgr = fi->userdata;
-	struct va_format vaf;
-	va_list va;
-
-	if (!(*debug & DEBUG_L2_TEIFSM))
-		return;
-
-	va_start(va, fmt);
-
-	vaf.fmt = fmt;
-	vaf.va = &va;
-
-	printk(KERN_DEBUG "mgr(%d): %pV\n", mgr->ch.st->dev->id, &vaf);
-
-	va_end(va);
-}
-
-static void
-da_activate(struct FsmInst *fi, int event, void *arg)
-{
-	struct manager	*mgr = fi->userdata;
-
-	if (fi->state == ST_L1_DEACT_PENDING)
-		mISDN_FsmDelTimer(&mgr->datimer, 1);
-	mISDN_FsmChangeState(fi, ST_L1_ACTIV);
-}
-
-static void
-da_deactivate_ind(struct FsmInst *fi, int event, void *arg)
-{
-	mISDN_FsmChangeState(fi, ST_L1_DEACT);
-}
-
-static void
-da_deactivate(struct FsmInst *fi, int event, void *arg)
-{
-	struct manager	*mgr = fi->userdata;
-	struct layer2	*l2;
-	u_long		flags;
-
-	read_lock_irqsave(&mgr->lock, flags);
-	list_for_each_entry(l2, &mgr->layer2, list) {
-		if (l2->l2m.state > ST_L2_4) {
-			/* have still activ TEI */
-			read_unlock_irqrestore(&mgr->lock, flags);
-			return;
-		}
-	}
-	read_unlock_irqrestore(&mgr->lock, flags);
-	/* All TEI are inactiv */
-	if (!test_bit(OPTION_L1_HOLD, &mgr->options)) {
-		mISDN_FsmAddTimer(&mgr->datimer, DATIMER_VAL, EV_DATIMER,
-				  NULL, 1);
-		mISDN_FsmChangeState(fi, ST_L1_DEACT_PENDING);
-	}
-}
-
-static void
-da_ui(struct FsmInst *fi, int event, void *arg)
-{
-	struct manager	*mgr = fi->userdata;
-
-	/* restart da timer */
-	if (!test_bit(OPTION_L1_HOLD, &mgr->options)) {
-		mISDN_FsmDelTimer(&mgr->datimer, 2);
-		mISDN_FsmAddTimer(&mgr->datimer, DATIMER_VAL, EV_DATIMER,
-				  NULL, 2);
-	}
-}
-
-static void
-da_timer(struct FsmInst *fi, int event, void *arg)
-{
-	struct manager	*mgr = fi->userdata;
-	struct layer2	*l2;
-	u_long		flags;
-
-	/* check again */
-	read_lock_irqsave(&mgr->lock, flags);
-	list_for_each_entry(l2, &mgr->layer2, list) {
-		if (l2->l2m.state > ST_L2_4) {
-			/* have still activ TEI */
-			read_unlock_irqrestore(&mgr->lock, flags);
-			mISDN_FsmChangeState(fi, ST_L1_ACTIV);
-			return;
-		}
-	}
-	read_unlock_irqrestore(&mgr->lock, flags);
-	/* All TEI are inactiv */
-	mISDN_FsmChangeState(fi, ST_L1_DEACT);
-	_queue_data(&mgr->ch, PH_DEACTIVATE_REQ, MISDN_ID_ANY, 0, NULL,
-		    GFP_ATOMIC);
-}
-
-static struct FsmNode DeactFnList[] =
-{
-	{ST_L1_DEACT, EV_ACTIVATE_IND, da_activate},
-	{ST_L1_ACTIV, EV_DEACTIVATE_IND, da_deactivate_ind},
-	{ST_L1_ACTIV, EV_DEACTIVATE, da_deactivate},
-	{ST_L1_DEACT_PENDING, EV_ACTIVATE, da_activate},
-	{ST_L1_DEACT_PENDING, EV_UI, da_ui},
-	{ST_L1_DEACT_PENDING, EV_DATIMER, da_timer},
-};
-
-enum {
-	ST_TEI_NOP,
-	ST_TEI_IDREQ,
-	ST_TEI_IDVERIFY,
-};
-
-#define TEI_STATE_COUNT (ST_TEI_IDVERIFY + 1)
-
-static char *strTeiState[] =
-{
-	"ST_TEI_NOP",
-	"ST_TEI_IDREQ",
-	"ST_TEI_IDVERIFY",
-};
-
-enum {
-	EV_IDREQ,
-	EV_ASSIGN,
-	EV_ASSIGN_REQ,
-	EV_DENIED,
-	EV_CHKREQ,
-	EV_CHKRESP,
-	EV_REMOVE,
-	EV_VERIFY,
-	EV_TIMER,
-};
-
-#define TEI_EVENT_COUNT (EV_TIMER + 1)
-
-static char *strTeiEvent[] =
-{
-	"EV_IDREQ",
-	"EV_ASSIGN",
-	"EV_ASSIGN_REQ",
-	"EV_DENIED",
-	"EV_CHKREQ",
-	"EV_CHKRESP",
-	"EV_REMOVE",
-	"EV_VERIFY",
-	"EV_TIMER",
-};
-
-static void
-tei_debug(struct FsmInst *fi, char *fmt, ...)
-{
-	struct teimgr	*tm = fi->userdata;
-	struct va_format vaf;
-	va_list va;
-
-	if (!(*debug & DEBUG_L2_TEIFSM))
-		return;
-
-	va_start(va, fmt);
-
-	vaf.fmt = fmt;
-	vaf.va = &va;
-
-	printk(KERN_DEBUG "sapi(%d) tei(%d): %pV\n",
-	       tm->l2->sapi, tm->l2->tei, &vaf);
-
-	va_end(va);
-}
-
-
-
-static int
-get_free_id(struct manager *mgr)
-{
-	DECLARE_BITMAP(ids, 64) = { [0 ... BITS_TO_LONGS(64) - 1] = 0 };
-	int		i;
-	struct layer2	*l2;
-
-	list_for_each_entry(l2, &mgr->layer2, list) {
-		if (l2->ch.nr > 63) {
-			printk(KERN_WARNING
-			       "%s: more as 63 layer2 for one device\n",
-			       __func__);
-			return -EBUSY;
-		}
-		__set_bit(l2->ch.nr, ids);
-	}
-	i = find_next_zero_bit(ids, 64, 1);
-	if (i < 64)
-		return i;
-	printk(KERN_WARNING "%s: more as 63 layer2 for one device\n",
-	       __func__);
-	return -EBUSY;
-}
-
-static int
-get_free_tei(struct manager *mgr)
-{
-	DECLARE_BITMAP(ids, 64) = { [0 ... BITS_TO_LONGS(64) - 1] = 0 };
-	int		i;
-	struct layer2	*l2;
-
-	list_for_each_entry(l2, &mgr->layer2, list) {
-		if (l2->ch.nr == 0)
-			continue;
-		if ((l2->ch.addr & 0xff) != 0)
-			continue;
-		i = l2->ch.addr >> 8;
-		if (i < 64)
-			continue;
-		i -= 64;
-
-		__set_bit(i, ids);
-	}
-	i = find_first_zero_bit(ids, 64);
-	if (i < 64)
-		return i + 64;
-	printk(KERN_WARNING "%s: more as 63 dynamic tei for one device\n",
-	       __func__);
-	return -1;
-}
-
-static void
-teiup_create(struct manager *mgr, u_int prim, int len, void *arg)
-{
-	struct sk_buff	*skb;
-	struct mISDNhead *hh;
-	int		err;
-
-	skb = mI_alloc_skb(len, GFP_ATOMIC);
-	if (!skb)
-		return;
-	hh = mISDN_HEAD_P(skb);
-	hh->prim = prim;
-	hh->id = (mgr->ch.nr << 16) | mgr->ch.addr;
-	if (len)
-		skb_put_data(skb, arg, len);
-	err = mgr->up->send(mgr->up, skb);
-	if (err) {
-		printk(KERN_WARNING "%s: err=%d\n", __func__, err);
-		dev_kfree_skb(skb);
-	}
-}
-
-static u_int
-new_id(struct manager *mgr)
-{
-	u_int	id;
-
-	id = mgr->nextid++;
-	if (id == 0x7fff)
-		mgr->nextid = 1;
-	id <<= 16;
-	id |= GROUP_TEI << 8;
-	id |= TEI_SAPI;
-	return id;
-}
-
-static void
-do_send(struct manager *mgr)
-{
-	if (!test_bit(MGR_PH_ACTIVE, &mgr->options))
-		return;
-
-	if (!test_and_set_bit(MGR_PH_NOTREADY, &mgr->options)) {
-		struct sk_buff	*skb = skb_dequeue(&mgr->sendq);
-
-		if (!skb) {
-			test_and_clear_bit(MGR_PH_NOTREADY, &mgr->options);
-			return;
-		}
-		mgr->lastid = mISDN_HEAD_ID(skb);
-		mISDN_FsmEvent(&mgr->deact, EV_UI, NULL);
-		if (mgr->ch.recv(mgr->ch.peer, skb)) {
-			dev_kfree_skb(skb);
-			test_and_clear_bit(MGR_PH_NOTREADY, &mgr->options);
-			mgr->lastid = MISDN_ID_NONE;
-		}
-	}
-}
-
-static void
-do_ack(struct manager *mgr, u_int id)
-{
-	if (test_bit(MGR_PH_NOTREADY, &mgr->options)) {
-		if (id == mgr->lastid) {
-			if (test_bit(MGR_PH_ACTIVE, &mgr->options)) {
-				struct sk_buff	*skb;
-
-				skb = skb_dequeue(&mgr->sendq);
-				if (skb) {
-					mgr->lastid = mISDN_HEAD_ID(skb);
-					if (!mgr->ch.recv(mgr->ch.peer, skb))
-						return;
-					dev_kfree_skb(skb);
-				}
-			}
-			mgr->lastid = MISDN_ID_NONE;
-			test_and_clear_bit(MGR_PH_NOTREADY, &mgr->options);
-		}
-	}
-}
-
-static void
-mgr_send_down(struct manager *mgr, struct sk_buff *skb)
-{
-	skb_queue_tail(&mgr->sendq, skb);
-	if (!test_bit(MGR_PH_ACTIVE, &mgr->options)) {
-		_queue_data(&mgr->ch, PH_ACTIVATE_REQ, MISDN_ID_ANY, 0,
-			    NULL, GFP_KERNEL);
-	} else {
-		do_send(mgr);
-	}
-}
-
-static int
-dl_unit_data(struct manager *mgr, struct sk_buff *skb)
-{
-	if (!test_bit(MGR_OPT_NETWORK, &mgr->options)) /* only net send UI */
-		return -EINVAL;
-	if (!test_bit(MGR_PH_ACTIVE, &mgr->options))
-		_queue_data(&mgr->ch, PH_ACTIVATE_REQ, MISDN_ID_ANY, 0,
-			    NULL, GFP_KERNEL);
-	skb_push(skb, 3);
-	skb->data[0] = 0x02; /* SAPI 0 C/R = 1 */
-	skb->data[1] = 0xff; /* TEI 127 */
-	skb->data[2] = UI;   /* UI frame */
-	mISDN_HEAD_PRIM(skb) = PH_DATA_REQ;
-	mISDN_HEAD_ID(skb) = new_id(mgr);
-	skb_queue_tail(&mgr->sendq, skb);
-	do_send(mgr);
-	return 0;
-}
-
-static unsigned int
-random_ri(void)
-{
-	u16 x;
-
-	get_random_bytes(&x, sizeof(x));
-	return x;
-}
-
-static struct layer2 *
-findtei(struct manager *mgr, int tei)
-{
-	struct layer2	*l2;
-	u_long		flags;
-
-	read_lock_irqsave(&mgr->lock, flags);
-	list_for_each_entry(l2, &mgr->layer2, list) {
-		if ((l2->sapi == 0) && (l2->tei > 0) &&
-		    (l2->tei != GROUP_TEI) && (l2->tei == tei))
-			goto done;
-	}
-	l2 = NULL;
-done:
-	read_unlock_irqrestore(&mgr->lock, flags);
-	return l2;
-}
-
-static void
-put_tei_msg(struct manager *mgr, u_char m_id, unsigned int ri, int tei)
-{
-	struct sk_buff *skb;
-	u_char bp[8];
-
-	bp[0] = (TEI_SAPI << 2);
-	if (test_bit(MGR_OPT_NETWORK, &mgr->options))
-		bp[0] |= 2; /* CR:=1 for net command */
-	bp[1] = (GROUP_TEI << 1) | 0x1;
-	bp[2] = UI;
-	bp[3] = TEI_ENTITY_ID;
-	bp[4] = ri >> 8;
-	bp[5] = ri & 0xff;
-	bp[6] = m_id;
-	bp[7] = ((tei << 1) & 0xff) | 1;
-	skb = _alloc_mISDN_skb(PH_DATA_REQ, new_id(mgr), 8, bp, GFP_ATOMIC);
-	if (!skb) {
-		printk(KERN_WARNING "%s: no skb for tei msg\n", __func__);
-		return;
-	}
-	mgr_send_down(mgr, skb);
-}
-
-static void
-tei_id_request(struct FsmInst *fi, int event, void *arg)
-{
-	struct teimgr *tm = fi->userdata;
-
-	if (tm->l2->tei != GROUP_TEI) {
-		tm->tei_m.printdebug(&tm->tei_m,
-				     "assign request for already assigned tei %d",
-				     tm->l2->tei);
-		return;
-	}
-	tm->ri = random_ri();
-	if (*debug & DEBUG_L2_TEI)
-		tm->tei_m.printdebug(&tm->tei_m,
-				     "assign request ri %d", tm->ri);
-	put_tei_msg(tm->mgr, ID_REQUEST, tm->ri, GROUP_TEI);
-	mISDN_FsmChangeState(fi, ST_TEI_IDREQ);
-	mISDN_FsmAddTimer(&tm->timer, tm->tval, EV_TIMER, NULL, 1);
-	tm->nval = 3;
-}
-
-static void
-tei_id_assign(struct FsmInst *fi, int event, void *arg)
-{
-	struct teimgr	*tm = fi->userdata;
-	struct layer2	*l2;
-	u_char *dp = arg;
-	int ri, tei;
-
-	ri = ((unsigned int) *dp++ << 8);
-	ri += *dp++;
-	dp++;
-	tei = *dp >> 1;
-	if (*debug & DEBUG_L2_TEI)
-		tm->tei_m.printdebug(fi, "identity assign ri %d tei %d",
-				     ri, tei);
-	l2 = findtei(tm->mgr, tei);
-	if (l2) {	/* same tei is in use */
-		if (ri != l2->tm->ri) {
-			tm->tei_m.printdebug(fi,
-					     "possible duplicate assignment tei %d", tei);
-			tei_l2(l2, MDL_ERROR_RSP, 0);
-		}
-	} else if (ri == tm->ri) {
-		mISDN_FsmDelTimer(&tm->timer, 1);
-		mISDN_FsmChangeState(fi, ST_TEI_NOP);
-		tei_l2(tm->l2, MDL_ASSIGN_REQ, tei);
-	}
-}
-
-static void
-tei_id_test_dup(struct FsmInst *fi, int event, void *arg)
-{
-	struct teimgr	*tm = fi->userdata;
-	struct layer2	*l2;
-	u_char *dp = arg;
-	int tei, ri;
-
-	ri = ((unsigned int) *dp++ << 8);
-	ri += *dp++;
-	dp++;
-	tei = *dp >> 1;
-	if (*debug & DEBUG_L2_TEI)
-		tm->tei_m.printdebug(fi, "foreign identity assign ri %d tei %d",
-				     ri, tei);
-	l2 = findtei(tm->mgr, tei);
-	if (l2) {	/* same tei is in use */
-		if (ri != l2->tm->ri) {	/* and it wasn't our request */
-			tm->tei_m.printdebug(fi,
-					     "possible duplicate assignment tei %d", tei);
-			mISDN_FsmEvent(&l2->tm->tei_m, EV_VERIFY, NULL);
-		}
-	}
-}
-
-static void
-tei_id_denied(struct FsmInst *fi, int event, void *arg)
-{
-	struct teimgr *tm = fi->userdata;
-	u_char *dp = arg;
-	int ri, tei;
-
-	ri = ((unsigned int) *dp++ << 8);
-	ri += *dp++;
-	dp++;
-	tei = *dp >> 1;
-	if (*debug & DEBUG_L2_TEI)
-		tm->tei_m.printdebug(fi, "identity denied ri %d tei %d",
-				     ri, tei);
-}
-
-static void
-tei_id_chk_req(struct FsmInst *fi, int event, void *arg)
-{
-	struct teimgr *tm = fi->userdata;
-	u_char *dp = arg;
-	int tei;
-
-	tei = *(dp + 3) >> 1;
-	if (*debug & DEBUG_L2_TEI)
-		tm->tei_m.printdebug(fi, "identity check req tei %d", tei);
-	if ((tm->l2->tei != GROUP_TEI) && ((tei == GROUP_TEI) ||
-					   (tei == tm->l2->tei))) {
-		mISDN_FsmDelTimer(&tm->timer, 4);
-		mISDN_FsmChangeState(&tm->tei_m, ST_TEI_NOP);
-		put_tei_msg(tm->mgr, ID_CHK_RES, random_ri(), tm->l2->tei);
-	}
-}
-
-static void
-tei_id_remove(struct FsmInst *fi, int event, void *arg)
-{
-	struct teimgr *tm = fi->userdata;
-	u_char *dp = arg;
-	int tei;
-
-	tei = *(dp + 3) >> 1;
-	if (*debug & DEBUG_L2_TEI)
-		tm->tei_m.printdebug(fi, "identity remove tei %d", tei);
-	if ((tm->l2->tei != GROUP_TEI) &&
-	    ((tei == GROUP_TEI) || (tei == tm->l2->tei))) {
-		mISDN_FsmDelTimer(&tm->timer, 5);
-		mISDN_FsmChangeState(&tm->tei_m, ST_TEI_NOP);
-		tei_l2(tm->l2, MDL_REMOVE_REQ, 0);
-	}
-}
-
-static void
-tei_id_verify(struct FsmInst *fi, int event, void *arg)
-{
-	struct teimgr *tm = fi->userdata;
-
-	if (*debug & DEBUG_L2_TEI)
-		tm->tei_m.printdebug(fi, "id verify request for tei %d",
-				     tm->l2->tei);
-	put_tei_msg(tm->mgr, ID_VERIFY, 0, tm->l2->tei);
-	mISDN_FsmChangeState(&tm->tei_m, ST_TEI_IDVERIFY);
-	mISDN_FsmAddTimer(&tm->timer, tm->tval, EV_TIMER, NULL, 2);
-	tm->nval = 2;
-}
-
-static void
-tei_id_req_tout(struct FsmInst *fi, int event, void *arg)
-{
-	struct teimgr *tm = fi->userdata;
-
-	if (--tm->nval) {
-		tm->ri = random_ri();
-		if (*debug & DEBUG_L2_TEI)
-			tm->tei_m.printdebug(fi, "assign req(%d) ri %d",
-					     4 - tm->nval, tm->ri);
-		put_tei_msg(tm->mgr, ID_REQUEST, tm->ri, GROUP_TEI);
-		mISDN_FsmAddTimer(&tm->timer, tm->tval, EV_TIMER, NULL, 3);
-	} else {
-		tm->tei_m.printdebug(fi, "assign req failed");
-		tei_l2(tm->l2, MDL_ERROR_RSP, 0);
-		mISDN_FsmChangeState(fi, ST_TEI_NOP);
-	}
-}
-
-static void
-tei_id_ver_tout(struct FsmInst *fi, int event, void *arg)
-{
-	struct teimgr *tm = fi->userdata;
-
-	if (--tm->nval) {
-		if (*debug & DEBUG_L2_TEI)
-			tm->tei_m.printdebug(fi,
-					     "id verify req(%d) for tei %d",
-					     3 - tm->nval, tm->l2->tei);
-		put_tei_msg(tm->mgr, ID_VERIFY, 0, tm->l2->tei);
-		mISDN_FsmAddTimer(&tm->timer, tm->tval, EV_TIMER, NULL, 4);
-	} else {
-		tm->tei_m.printdebug(fi, "verify req for tei %d failed",
-				     tm->l2->tei);
-		tei_l2(tm->l2, MDL_REMOVE_REQ, 0);
-		mISDN_FsmChangeState(fi, ST_TEI_NOP);
-	}
-}
-
-static struct FsmNode TeiFnListUser[] =
-{
-	{ST_TEI_NOP, EV_IDREQ, tei_id_request},
-	{ST_TEI_NOP, EV_ASSIGN, tei_id_test_dup},
-	{ST_TEI_NOP, EV_VERIFY, tei_id_verify},
-	{ST_TEI_NOP, EV_REMOVE, tei_id_remove},
-	{ST_TEI_NOP, EV_CHKREQ, tei_id_chk_req},
-	{ST_TEI_IDREQ, EV_TIMER, tei_id_req_tout},
-	{ST_TEI_IDREQ, EV_ASSIGN, tei_id_assign},
-	{ST_TEI_IDREQ, EV_DENIED, tei_id_denied},
-	{ST_TEI_IDVERIFY, EV_TIMER, tei_id_ver_tout},
-	{ST_TEI_IDVERIFY, EV_REMOVE, tei_id_remove},
-	{ST_TEI_IDVERIFY, EV_CHKREQ, tei_id_chk_req},
-};
-
-static void
-tei_l2remove(struct layer2 *l2)
-{
-	put_tei_msg(l2->tm->mgr, ID_REMOVE, 0, l2->tei);
-	tei_l2(l2, MDL_REMOVE_REQ, 0);
-	list_del(&l2->ch.list);
-	l2->ch.ctrl(&l2->ch, CLOSE_CHANNEL, NULL);
-}
-
-static void
-tei_assign_req(struct FsmInst *fi, int event, void *arg)
-{
-	struct teimgr *tm = fi->userdata;
-	u_char *dp = arg;
-
-	if (tm->l2->tei == GROUP_TEI) {
-		tm->tei_m.printdebug(&tm->tei_m,
-				     "net tei assign request without tei");
-		return;
-	}
-	tm->ri = ((unsigned int) *dp++ << 8);
-	tm->ri += *dp++;
-	if (*debug & DEBUG_L2_TEI)
-		tm->tei_m.printdebug(&tm->tei_m,
-				     "net assign request ri %d teim %d", tm->ri, *dp);
-	put_tei_msg(tm->mgr, ID_ASSIGNED, tm->ri, tm->l2->tei);
-	mISDN_FsmChangeState(fi, ST_TEI_NOP);
-}
-
-static void
-tei_id_chk_req_net(struct FsmInst *fi, int event, void *arg)
-{
-	struct teimgr	*tm = fi->userdata;
-
-	if (*debug & DEBUG_L2_TEI)
-		tm->tei_m.printdebug(fi, "id check request for tei %d",
-				     tm->l2->tei);
-	tm->rcnt = 0;
-	put_tei_msg(tm->mgr, ID_CHK_REQ, 0, tm->l2->tei);
-	mISDN_FsmChangeState(&tm->tei_m, ST_TEI_IDVERIFY);
-	mISDN_FsmAddTimer(&tm->timer, tm->tval, EV_TIMER, NULL, 2);
-	tm->nval = 2;
-}
-
-static void
-tei_id_chk_resp(struct FsmInst *fi, int event, void *arg)
-{
-	struct teimgr *tm = fi->userdata;
-	u_char *dp = arg;
-	int tei;
-
-	tei = dp[3] >> 1;
-	if (*debug & DEBUG_L2_TEI)
-		tm->tei_m.printdebug(fi, "identity check resp tei %d", tei);
-	if (tei == tm->l2->tei)
-		tm->rcnt++;
-}
-
-static void
-tei_id_verify_net(struct FsmInst *fi, int event, void *arg)
-{
-	struct teimgr *tm = fi->userdata;
-	u_char *dp = arg;
-	int tei;
-
-	tei = dp[3] >> 1;
-	if (*debug & DEBUG_L2_TEI)
-		tm->tei_m.printdebug(fi, "identity verify req tei %d/%d",
-				     tei, tm->l2->tei);
-	if (tei == tm->l2->tei)
-		tei_id_chk_req_net(fi, event, arg);
-}
-
-static void
-tei_id_ver_tout_net(struct FsmInst *fi, int event, void *arg)
-{
-	struct teimgr *tm = fi->userdata;
-
-	if (tm->rcnt == 1) {
-		if (*debug & DEBUG_L2_TEI)
-			tm->tei_m.printdebug(fi,
-					     "check req for tei %d successful\n", tm->l2->tei);
-		mISDN_FsmChangeState(fi, ST_TEI_NOP);
-	} else if (tm->rcnt > 1) {
-		/* duplicate assignment; remove */
-		tei_l2remove(tm->l2);
-	} else if (--tm->nval) {
-		if (*debug & DEBUG_L2_TEI)
-			tm->tei_m.printdebug(fi,
-					     "id check req(%d) for tei %d",
-					     3 - tm->nval, tm->l2->tei);
-		put_tei_msg(tm->mgr, ID_CHK_REQ, 0, tm->l2->tei);
-		mISDN_FsmAddTimer(&tm->timer, tm->tval, EV_TIMER, NULL, 4);
-	} else {
-		tm->tei_m.printdebug(fi, "check req for tei %d failed",
-				     tm->l2->tei);
-		mISDN_FsmChangeState(fi, ST_TEI_NOP);
-		tei_l2remove(tm->l2);
-	}
-}
-
-static struct FsmNode TeiFnListNet[] =
-{
-	{ST_TEI_NOP, EV_ASSIGN_REQ, tei_assign_req},
-	{ST_TEI_NOP, EV_VERIFY, tei_id_verify_net},
-	{ST_TEI_NOP, EV_CHKREQ, tei_id_chk_req_net},
-	{ST_TEI_IDVERIFY, EV_TIMER, tei_id_ver_tout_net},
-	{ST_TEI_IDVERIFY, EV_CHKRESP, tei_id_chk_resp},
-};
-
-static void
-tei_ph_data_ind(struct teimgr *tm, u_int mt, u_char *dp, int len)
-{
-	if (test_bit(FLG_FIXED_TEI, &tm->l2->flag))
-		return;
-	if (*debug & DEBUG_L2_TEI)
-		tm->tei_m.printdebug(&tm->tei_m, "tei handler mt %x", mt);
-	if (mt == ID_ASSIGNED)
-		mISDN_FsmEvent(&tm->tei_m, EV_ASSIGN, dp);
-	else if (mt == ID_DENIED)
-		mISDN_FsmEvent(&tm->tei_m, EV_DENIED, dp);
-	else if (mt == ID_CHK_REQ)
-		mISDN_FsmEvent(&tm->tei_m, EV_CHKREQ, dp);
-	else if (mt == ID_REMOVE)
-		mISDN_FsmEvent(&tm->tei_m, EV_REMOVE, dp);
-	else if (mt == ID_VERIFY)
-		mISDN_FsmEvent(&tm->tei_m, EV_VERIFY, dp);
-	else if (mt == ID_CHK_RES)
-		mISDN_FsmEvent(&tm->tei_m, EV_CHKRESP, dp);
-}
-
-static struct layer2 *
-create_new_tei(struct manager *mgr, int tei, int sapi)
-{
-	unsigned long		opt = 0;
-	unsigned long		flags;
-	int			id;
-	struct layer2		*l2;
-	struct channel_req	rq;
-
-	if (!mgr->up)
-		return NULL;
-	if ((tei >= 0) && (tei < 64))
-		test_and_set_bit(OPTION_L2_FIXEDTEI, &opt);
-	if (mgr->ch.st->dev->Dprotocols & ((1 << ISDN_P_TE_E1) |
-	    (1 << ISDN_P_NT_E1))) {
-		test_and_set_bit(OPTION_L2_PMX, &opt);
-		rq.protocol = ISDN_P_NT_E1;
-	} else {
-		rq.protocol = ISDN_P_NT_S0;
-	}
-	l2 = create_l2(mgr->up, ISDN_P_LAPD_NT, opt, tei, sapi);
-	if (!l2) {
-		printk(KERN_WARNING "%s:no memory for layer2\n", __func__);
-		return NULL;
-	}
-	l2->tm = kzalloc_obj(struct teimgr);
-	if (!l2->tm) {
-		kfree(l2);
-		printk(KERN_WARNING "%s:no memory for teimgr\n", __func__);
-		return NULL;
-	}
-	l2->tm->mgr = mgr;
-	l2->tm->l2 = l2;
-	l2->tm->tei_m.debug = *debug & DEBUG_L2_TEIFSM;
-	l2->tm->tei_m.userdata = l2->tm;
-	l2->tm->tei_m.printdebug = tei_debug;
-	l2->tm->tei_m.fsm = &teifsmn;
-	l2->tm->tei_m.state = ST_TEI_NOP;
-	l2->tm->tval = 2000; /* T202  2 sec */
-	mISDN_FsmInitTimer(&l2->tm->tei_m, &l2->tm->timer);
-	write_lock_irqsave(&mgr->lock, flags);
-	id = get_free_id(mgr);
-	list_add_tail(&l2->list, &mgr->layer2);
-	write_unlock_irqrestore(&mgr->lock, flags);
-	if (id < 0) {
-		l2->ch.ctrl(&l2->ch, CLOSE_CHANNEL, NULL);
-		printk(KERN_WARNING "%s:no free id\n", __func__);
-		return NULL;
-	} else {
-		l2->ch.nr = id;
-		__add_layer2(&l2->ch, mgr->ch.st);
-		l2->ch.recv = mgr->ch.recv;
-		l2->ch.peer = mgr->ch.peer;
-		l2->ch.ctrl(&l2->ch, OPEN_CHANNEL, NULL);
-		/* We need open here L1 for the manager as well (refcounting) */
-		rq.adr.dev = mgr->ch.st->dev->id;
-		id = mgr->ch.st->own.ctrl(&mgr->ch.st->own, OPEN_CHANNEL, &rq);
-		if (id < 0) {
-			printk(KERN_WARNING "%s: cannot open L1\n", __func__);
-			l2->ch.ctrl(&l2->ch, CLOSE_CHANNEL, NULL);
-			l2 = NULL;
-		}
-	}
-	return l2;
-}
-
-static void
-new_tei_req(struct manager *mgr, u_char *dp)
-{
-	int		tei, ri;
-	struct layer2	*l2;
-
-	ri = dp[0] << 8;
-	ri += dp[1];
-	if (!mgr->up)
-		goto denied;
-	if (!(dp[3] & 1)) /* Extension bit != 1 */
-		goto denied;
-	if (dp[3] != 0xff)
-		tei = dp[3] >> 1; /* 3GPP TS 08.56 6.1.11.2 */
-	else
-		tei = get_free_tei(mgr);
-	if (tei < 0) {
-		printk(KERN_WARNING "%s:No free tei\n", __func__);
-		goto denied;
-	}
-	l2 = create_new_tei(mgr, tei, CTRL_SAPI);
-	if (!l2)
-		goto denied;
-	else
-		mISDN_FsmEvent(&l2->tm->tei_m, EV_ASSIGN_REQ, dp);
-	return;
-denied:
-	put_tei_msg(mgr, ID_DENIED, ri, GROUP_TEI);
-}
-
-static int
-ph_data_ind(struct manager *mgr, struct sk_buff *skb)
-{
-	int		ret = -EINVAL;
-	struct layer2	*l2, *nl2;
-	u_char		mt;
-
-	if (skb->len < 8) {
-		if (*debug  & DEBUG_L2_TEI)
-			printk(KERN_DEBUG "%s: short mgr frame %d/8\n",
-			       __func__, skb->len);
-		goto done;
-	}
-
-	if ((skb->data[0] >> 2) != TEI_SAPI) /* not for us */
-		goto done;
-	if (skb->data[0] & 1) /* EA0 formal error */
-		goto done;
-	if (!(skb->data[1] & 1)) /* EA1 formal error */
-		goto done;
-	if ((skb->data[1] >> 1) != GROUP_TEI) /* not for us */
-		goto done;
-	if ((skb->data[2] & 0xef) != UI) /* not UI */
-		goto done;
-	if (skb->data[3] != TEI_ENTITY_ID) /* not tei entity */
-		goto done;
-	mt = skb->data[6];
-	switch (mt) {
-	case ID_REQUEST:
-	case ID_CHK_RES:
-	case ID_VERIFY:
-		if (!test_bit(MGR_OPT_NETWORK, &mgr->options))
-			goto done;
-		break;
-	case ID_ASSIGNED:
-	case ID_DENIED:
-	case ID_CHK_REQ:
-	case ID_REMOVE:
-		if (test_bit(MGR_OPT_NETWORK, &mgr->options))
-			goto done;
-		break;
-	default:
-		goto done;
-	}
-	ret = 0;
-	if (mt == ID_REQUEST) {
-		new_tei_req(mgr, &skb->data[4]);
-		goto done;
-	}
-	list_for_each_entry_safe(l2, nl2, &mgr->layer2, list) {
-		tei_ph_data_ind(l2->tm, mt, &skb->data[4], skb->len - 4);
-	}
-done:
-	return ret;
-}
-
-int
-l2_tei(struct layer2 *l2, u_int cmd, u_long arg)
-{
-	struct teimgr	*tm = l2->tm;
-
-	if (test_bit(FLG_FIXED_TEI, &l2->flag))
-		return 0;
-	if (*debug & DEBUG_L2_TEI)
-		printk(KERN_DEBUG "%s: cmd(%x)\n", __func__, cmd);
-	switch (cmd) {
-	case MDL_ASSIGN_IND:
-		mISDN_FsmEvent(&tm->tei_m, EV_IDREQ, NULL);
-		break;
-	case MDL_ERROR_IND:
-		if (test_bit(MGR_OPT_NETWORK, &tm->mgr->options))
-			mISDN_FsmEvent(&tm->tei_m, EV_CHKREQ, &l2->tei);
-		if (test_bit(MGR_OPT_USER, &tm->mgr->options))
-			mISDN_FsmEvent(&tm->tei_m, EV_VERIFY, NULL);
-		break;
-	case MDL_STATUS_UP_IND:
-		if (test_bit(MGR_OPT_NETWORK, &tm->mgr->options))
-			mISDN_FsmEvent(&tm->mgr->deact, EV_ACTIVATE, NULL);
-		break;
-	case MDL_STATUS_DOWN_IND:
-		if (test_bit(MGR_OPT_NETWORK, &tm->mgr->options))
-			mISDN_FsmEvent(&tm->mgr->deact, EV_DEACTIVATE, NULL);
-		break;
-	case MDL_STATUS_UI_IND:
-		if (test_bit(MGR_OPT_NETWORK, &tm->mgr->options))
-			mISDN_FsmEvent(&tm->mgr->deact, EV_UI, NULL);
-		break;
-	}
-	return 0;
-}
-
-void
-TEIrelease(struct layer2 *l2)
-{
-	struct teimgr	*tm = l2->tm;
-	u_long		flags;
-
-	mISDN_FsmDelTimer(&tm->timer, 1);
-	write_lock_irqsave(&tm->mgr->lock, flags);
-	list_del(&l2->list);
-	write_unlock_irqrestore(&tm->mgr->lock, flags);
-	l2->tm = NULL;
-	kfree(tm);
-}
-
-static int
-create_teimgr(struct manager *mgr, struct channel_req *crq)
-{
-	struct layer2		*l2;
-	unsigned long		opt = 0;
-	unsigned long		flags;
-	int			id;
-	struct channel_req	l1rq;
-
-	if (*debug & DEBUG_L2_TEI)
-		printk(KERN_DEBUG "%s: %s proto(%x) adr(%d %d %d %d)\n",
-		       __func__, dev_name(&mgr->ch.st->dev->dev),
-		       crq->protocol, crq->adr.dev, crq->adr.channel,
-		       crq->adr.sapi, crq->adr.tei);
-	if (crq->adr.tei > GROUP_TEI)
-		return -EINVAL;
-	if (crq->adr.tei < 64)
-		test_and_set_bit(OPTION_L2_FIXEDTEI, &opt);
-	if (crq->adr.tei == 0)
-		test_and_set_bit(OPTION_L2_PTP, &opt);
-	if (test_bit(MGR_OPT_NETWORK, &mgr->options)) {
-		if (crq->protocol == ISDN_P_LAPD_TE)
-			return -EPROTONOSUPPORT;
-		if ((crq->adr.tei != 0) && (crq->adr.tei != 127))
-			return -EINVAL;
-		if (mgr->up) {
-			printk(KERN_WARNING
-			       "%s: only one network manager is allowed\n",
-			       __func__);
-			return -EBUSY;
-		}
-	} else if (test_bit(MGR_OPT_USER, &mgr->options)) {
-		if (crq->protocol == ISDN_P_LAPD_NT)
-			return -EPROTONOSUPPORT;
-		if ((crq->adr.tei >= 64) && (crq->adr.tei < GROUP_TEI))
-			return -EINVAL; /* dyn tei */
-	} else {
-		if (crq->protocol == ISDN_P_LAPD_NT)
-			test_and_set_bit(MGR_OPT_NETWORK, &mgr->options);
-		if (crq->protocol == ISDN_P_LAPD_TE)
-			test_and_set_bit(MGR_OPT_USER, &mgr->options);
-	}
-	l1rq.adr = crq->adr;
-	if (mgr->ch.st->dev->Dprotocols
-	    & ((1 << ISDN_P_TE_E1) | (1 << ISDN_P_NT_E1)))
-		test_and_set_bit(OPTION_L2_PMX, &opt);
-	if ((crq->protocol == ISDN_P_LAPD_NT) && (crq->adr.tei == 127)) {
-		mgr->up = crq->ch;
-		id = DL_INFO_L2_CONNECT;
-		teiup_create(mgr, DL_INFORMATION_IND, sizeof(id), &id);
-		if (test_bit(MGR_PH_ACTIVE, &mgr->options))
-			teiup_create(mgr, PH_ACTIVATE_IND, 0, NULL);
-		crq->ch = NULL;
-		if (!list_empty(&mgr->layer2)) {
-			read_lock_irqsave(&mgr->lock, flags);
-			list_for_each_entry(l2, &mgr->layer2, list) {
-				l2->up = mgr->up;
-				l2->ch.ctrl(&l2->ch, OPEN_CHANNEL, NULL);
-			}
-			read_unlock_irqrestore(&mgr->lock, flags);
-		}
-		return 0;
-	}
-	l2 = create_l2(crq->ch, crq->protocol, opt,
-		       crq->adr.tei, crq->adr.sapi);
-	if (!l2)
-		return -ENOMEM;
-	l2->tm = kzalloc_obj(struct teimgr);
-	if (!l2->tm) {
-		kfree(l2);
-		printk(KERN_ERR "kmalloc teimgr failed\n");
-		return -ENOMEM;
-	}
-	l2->tm->mgr = mgr;
-	l2->tm->l2 = l2;
-	l2->tm->tei_m.debug = *debug & DEBUG_L2_TEIFSM;
-	l2->tm->tei_m.userdata = l2->tm;
-	l2->tm->tei_m.printdebug = tei_debug;
-	if (crq->protocol == ISDN_P_LAPD_TE) {
-		l2->tm->tei_m.fsm = &teifsmu;
-		l2->tm->tei_m.state = ST_TEI_NOP;
-		l2->tm->tval = 1000; /* T201  1 sec */
-		if (test_bit(OPTION_L2_PMX, &opt))
-			l1rq.protocol = ISDN_P_TE_E1;
-		else
-			l1rq.protocol = ISDN_P_TE_S0;
-	} else {
-		l2->tm->tei_m.fsm = &teifsmn;
-		l2->tm->tei_m.state = ST_TEI_NOP;
-		l2->tm->tval = 2000; /* T202  2 sec */
-		if (test_bit(OPTION_L2_PMX, &opt))
-			l1rq.protocol = ISDN_P_NT_E1;
-		else
-			l1rq.protocol = ISDN_P_NT_S0;
-	}
-	mISDN_FsmInitTimer(&l2->tm->tei_m, &l2->tm->timer);
-	write_lock_irqsave(&mgr->lock, flags);
-	id = get_free_id(mgr);
-	list_add_tail(&l2->list, &mgr->layer2);
-	write_unlock_irqrestore(&mgr->lock, flags);
-	if (id >= 0) {
-		l2->ch.nr = id;
-		l2->up->nr = id;
-		crq->ch = &l2->ch;
-		/* We need open here L1 for the manager as well (refcounting) */
-		id = mgr->ch.st->own.ctrl(&mgr->ch.st->own, OPEN_CHANNEL,
-					  &l1rq);
-	}
-	if (id < 0)
-		l2->ch.ctrl(&l2->ch, CLOSE_CHANNEL, NULL);
-	return id;
-}
-
-static int
-mgr_send(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct manager	*mgr;
-	struct mISDNhead	*hh =  mISDN_HEAD_P(skb);
-	int			ret = -EINVAL;
-
-	mgr = container_of(ch, struct manager, ch);
-	if (*debug & DEBUG_L2_RECV)
-		printk(KERN_DEBUG "%s: prim(%x) id(%x)\n",
-		       __func__, hh->prim, hh->id);
-	switch (hh->prim) {
-	case PH_DATA_IND:
-		mISDN_FsmEvent(&mgr->deact, EV_UI, NULL);
-		ret = ph_data_ind(mgr, skb);
-		break;
-	case PH_DATA_CNF:
-		do_ack(mgr, hh->id);
-		ret = 0;
-		break;
-	case PH_ACTIVATE_IND:
-		test_and_set_bit(MGR_PH_ACTIVE, &mgr->options);
-		if (mgr->up)
-			teiup_create(mgr, PH_ACTIVATE_IND, 0, NULL);
-		mISDN_FsmEvent(&mgr->deact, EV_ACTIVATE_IND, NULL);
-		do_send(mgr);
-		ret = 0;
-		break;
-	case PH_DEACTIVATE_IND:
-		test_and_clear_bit(MGR_PH_ACTIVE, &mgr->options);
-		if (mgr->up)
-			teiup_create(mgr, PH_DEACTIVATE_IND, 0, NULL);
-		mISDN_FsmEvent(&mgr->deact, EV_DEACTIVATE_IND, NULL);
-		ret = 0;
-		break;
-	case DL_UNITDATA_REQ:
-		return dl_unit_data(mgr, skb);
-	}
-	if (!ret)
-		dev_kfree_skb(skb);
-	return ret;
-}
-
-static int
-free_teimanager(struct manager *mgr)
-{
-	struct layer2	*l2, *nl2;
-
-	test_and_clear_bit(OPTION_L1_HOLD, &mgr->options);
-	if (test_bit(MGR_OPT_NETWORK, &mgr->options)) {
-		/* not locked lock is taken in release tei */
-		mgr->up = NULL;
-		if (test_bit(OPTION_L2_CLEANUP, &mgr->options)) {
-			list_for_each_entry_safe(l2, nl2, &mgr->layer2, list) {
-				put_tei_msg(mgr, ID_REMOVE, 0, l2->tei);
-				mutex_lock(&mgr->ch.st->lmutex);
-				list_del(&l2->ch.list);
-				mutex_unlock(&mgr->ch.st->lmutex);
-				l2->ch.ctrl(&l2->ch, CLOSE_CHANNEL, NULL);
-			}
-			test_and_clear_bit(MGR_OPT_NETWORK, &mgr->options);
-		} else {
-			list_for_each_entry_safe(l2, nl2, &mgr->layer2, list) {
-				l2->up = NULL;
-			}
-		}
-	}
-	if (test_bit(MGR_OPT_USER, &mgr->options)) {
-		if (list_empty(&mgr->layer2))
-			test_and_clear_bit(MGR_OPT_USER, &mgr->options);
-	}
-	mgr->ch.st->dev->D.ctrl(&mgr->ch.st->dev->D, CLOSE_CHANNEL, NULL);
-	return 0;
-}
-
-static int
-ctrl_teimanager(struct manager *mgr, void *arg)
-{
-	/* currently we only have one option */
-	unsigned int *val = (unsigned int *)arg;
-
-	switch (val[0]) {
-	case IMCLEAR_L2:
-		if (val[1])
-			test_and_set_bit(OPTION_L2_CLEANUP, &mgr->options);
-		else
-			test_and_clear_bit(OPTION_L2_CLEANUP, &mgr->options);
-		break;
-	case IMHOLD_L1:
-		if (val[1])
-			test_and_set_bit(OPTION_L1_HOLD, &mgr->options);
-		else
-			test_and_clear_bit(OPTION_L1_HOLD, &mgr->options);
-		break;
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-
-/* This function does create a L2 for fixed TEI in NT Mode */
-static int
-check_data(struct manager *mgr, struct sk_buff *skb)
-{
-	struct mISDNhead	*hh =  mISDN_HEAD_P(skb);
-	int			ret, tei, sapi;
-	struct layer2		*l2;
-
-	if (*debug & DEBUG_L2_CTRL)
-		printk(KERN_DEBUG "%s: prim(%x) id(%x)\n",
-		       __func__, hh->prim, hh->id);
-	if (test_bit(MGR_OPT_USER, &mgr->options))
-		return -ENOTCONN;
-	if (hh->prim != PH_DATA_IND)
-		return -ENOTCONN;
-	if (skb->len != 3)
-		return -ENOTCONN;
-	if (skb->data[0] & 3) /* EA0 and CR must be  0 */
-		return -EINVAL;
-	sapi = skb->data[0] >> 2;
-	if (!(skb->data[1] & 1)) /* invalid EA1 */
-		return -EINVAL;
-	tei = skb->data[1] >> 1;
-	if (tei > 63) /* not a fixed tei */
-		return -ENOTCONN;
-	if ((skb->data[2] & ~0x10) != SABME)
-		return -ENOTCONN;
-	/* We got a SABME for a fixed TEI */
-	if (*debug & DEBUG_L2_CTRL)
-		printk(KERN_DEBUG "%s: SABME sapi(%d) tei(%d)\n",
-		       __func__, sapi, tei);
-	l2 = create_new_tei(mgr, tei, sapi);
-	if (!l2) {
-		if (*debug & DEBUG_L2_CTRL)
-			printk(KERN_DEBUG "%s: failed to create new tei\n",
-			       __func__);
-		return -ENOMEM;
-	}
-	ret = l2->ch.send(&l2->ch, skb);
-	return ret;
-}
-
-void
-delete_teimanager(struct mISDNchannel *ch)
-{
-	struct manager	*mgr;
-	struct layer2	*l2, *nl2;
-
-	mgr = container_of(ch, struct manager, ch);
-	/* not locked lock is taken in release tei */
-	list_for_each_entry_safe(l2, nl2, &mgr->layer2, list) {
-		mutex_lock(&mgr->ch.st->lmutex);
-		list_del(&l2->ch.list);
-		mutex_unlock(&mgr->ch.st->lmutex);
-		l2->ch.ctrl(&l2->ch, CLOSE_CHANNEL, NULL);
-	}
-	list_del(&mgr->ch.list);
-	list_del(&mgr->bcast.list);
-	skb_queue_purge(&mgr->sendq);
-	kfree(mgr);
-}
-
-static int
-mgr_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
-{
-	struct manager	*mgr;
-	int		ret = -EINVAL;
-
-	mgr = container_of(ch, struct manager, ch);
-	if (*debug & DEBUG_L2_CTRL)
-		printk(KERN_DEBUG "%s(%x, %p)\n", __func__, cmd, arg);
-	switch (cmd) {
-	case OPEN_CHANNEL:
-		ret = create_teimgr(mgr, arg);
-		break;
-	case CLOSE_CHANNEL:
-		ret = free_teimanager(mgr);
-		break;
-	case CONTROL_CHANNEL:
-		ret = ctrl_teimanager(mgr, arg);
-		break;
-	case CHECK_DATA:
-		ret = check_data(mgr, arg);
-		break;
-	}
-	return ret;
-}
-
-static int
-mgr_bcast(struct mISDNchannel *ch, struct sk_buff *skb)
-{
-	struct manager		*mgr = container_of(ch, struct manager, bcast);
-	struct mISDNhead	*hhc, *hh = mISDN_HEAD_P(skb);
-	struct sk_buff		*cskb = NULL;
-	struct layer2		*l2;
-	u_long			flags;
-	int			ret;
-
-	read_lock_irqsave(&mgr->lock, flags);
-	list_for_each_entry(l2, &mgr->layer2, list) {
-		if ((hh->id & MISDN_ID_SAPI_MASK) ==
-		    (l2->ch.addr & MISDN_ID_SAPI_MASK)) {
-			if (list_is_last(&l2->list, &mgr->layer2)) {
-				cskb = skb;
-				skb = NULL;
-			} else {
-				if (!cskb)
-					cskb = skb_copy(skb, GFP_ATOMIC);
-			}
-			if (cskb) {
-				hhc = mISDN_HEAD_P(cskb);
-				/* save original header behind normal header */
-				hhc++;
-				*hhc = *hh;
-				hhc--;
-				hhc->prim = DL_INTERN_MSG;
-				hhc->id = l2->ch.nr;
-				ret = ch->st->own.recv(&ch->st->own, cskb);
-				if (ret) {
-					if (*debug & DEBUG_SEND_ERR)
-						printk(KERN_DEBUG
-						       "%s ch%d prim(%x) addr(%x)"
-						       " err %d\n",
-						       __func__, l2->ch.nr,
-						       hh->prim, l2->ch.addr, ret);
-				} else
-					cskb = NULL;
-			} else {
-				printk(KERN_WARNING "%s ch%d addr %x no mem\n",
-				       __func__, ch->nr, ch->addr);
-				goto out;
-			}
-		}
-	}
-out:
-	read_unlock_irqrestore(&mgr->lock, flags);
-	dev_kfree_skb(cskb);
-	dev_kfree_skb(skb);
-	return 0;
-}
-
-static int
-mgr_bcast_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
-{
-
-	return -EINVAL;
-}
-
-int
-create_teimanager(struct mISDNdevice *dev)
-{
-	struct manager *mgr;
-
-	mgr = kzalloc_obj(struct manager);
-	if (!mgr)
-		return -ENOMEM;
-	INIT_LIST_HEAD(&mgr->layer2);
-	rwlock_init(&mgr->lock);
-	skb_queue_head_init(&mgr->sendq);
-	mgr->nextid = 1;
-	mgr->lastid = MISDN_ID_NONE;
-	mgr->ch.send = mgr_send;
-	mgr->ch.ctrl = mgr_ctrl;
-	mgr->ch.st = dev->D.st;
-	set_channel_address(&mgr->ch, TEI_SAPI, GROUP_TEI);
-	add_layer2(&mgr->ch, dev->D.st);
-	mgr->bcast.send = mgr_bcast;
-	mgr->bcast.ctrl = mgr_bcast_ctrl;
-	mgr->bcast.st = dev->D.st;
-	set_channel_address(&mgr->bcast, 0, GROUP_TEI);
-	add_layer2(&mgr->bcast, dev->D.st);
-	mgr->deact.debug = *debug & DEBUG_MANAGER;
-	mgr->deact.userdata = mgr;
-	mgr->deact.printdebug = da_debug;
-	mgr->deact.fsm = &deactfsm;
-	mgr->deact.state = ST_L1_DEACT;
-	mISDN_FsmInitTimer(&mgr->deact, &mgr->datimer);
-	dev->teimgr = &mgr->ch;
-	return 0;
-}
-
-int TEIInit(u_int *deb)
-{
-	int res;
-	debug = deb;
-	teifsmu.state_count = TEI_STATE_COUNT;
-	teifsmu.event_count = TEI_EVENT_COUNT;
-	teifsmu.strEvent = strTeiEvent;
-	teifsmu.strState = strTeiState;
-	res = mISDN_FsmNew(&teifsmu, TeiFnListUser, ARRAY_SIZE(TeiFnListUser));
-	if (res)
-		goto error;
-	teifsmn.state_count = TEI_STATE_COUNT;
-	teifsmn.event_count = TEI_EVENT_COUNT;
-	teifsmn.strEvent = strTeiEvent;
-	teifsmn.strState = strTeiState;
-	res = mISDN_FsmNew(&teifsmn, TeiFnListNet, ARRAY_SIZE(TeiFnListNet));
-	if (res)
-		goto error_smn;
-	deactfsm.state_count =  DEACT_STATE_COUNT;
-	deactfsm.event_count = DEACT_EVENT_COUNT;
-	deactfsm.strEvent = strDeactEvent;
-	deactfsm.strState = strDeactState;
-	res = mISDN_FsmNew(&deactfsm, DeactFnList, ARRAY_SIZE(DeactFnList));
-	if (res)
-		goto error_deact;
-	return 0;
-
-error_deact:
-	mISDN_FsmFree(&teifsmn);
-error_smn:
-	mISDN_FsmFree(&teifsmu);
-error:
-	return res;
-}
-
-void TEIFree(void)
-{
-	mISDN_FsmFree(&teifsmu);
-	mISDN_FsmFree(&teifsmn);
-	mISDN_FsmFree(&deactfsm);
-}
diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c
deleted file mode 100644
index a18845755633..000000000000
--- a/drivers/isdn/mISDN/timerdev.c
+++ /dev/null
@@ -1,295 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *
- * general timer device for using in ISDN stacks
- *
- * Author	Karsten Keil <kkeil@novell.com>
- *
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- */
-
-#include <linux/poll.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/timer.h>
-#include <linux/miscdevice.h>
-#include <linux/module.h>
-#include <linux/mISDNif.h>
-#include <linux/mutex.h>
-#include <linux/sched/signal.h>
-
-#include "core.h"
-
-static DEFINE_MUTEX(mISDN_mutex);
-static u_int	*debug;
-
-
-struct mISDNtimerdev {
-	int			next_id;
-	struct list_head	pending;
-	struct list_head	expired;
-	wait_queue_head_t	wait;
-	u_int			work;
-	spinlock_t		lock; /* protect lists */
-};
-
-struct mISDNtimer {
-	struct list_head	list;
-	struct  mISDNtimerdev	*dev;
-	struct timer_list	tl;
-	int			id;
-};
-
-static int
-mISDN_open(struct inode *ino, struct file *filep)
-{
-	struct mISDNtimerdev	*dev;
-
-	if (*debug & DEBUG_TIMER)
-		printk(KERN_DEBUG "%s(%p,%p)\n", __func__, ino, filep);
-	dev = kmalloc_obj(struct mISDNtimerdev);
-	if (!dev)
-		return -ENOMEM;
-	dev->next_id = 1;
-	INIT_LIST_HEAD(&dev->pending);
-	INIT_LIST_HEAD(&dev->expired);
-	spin_lock_init(&dev->lock);
-	dev->work = 0;
-	init_waitqueue_head(&dev->wait);
-	filep->private_data = dev;
-	return nonseekable_open(ino, filep);
-}
-
-static int
-mISDN_close(struct inode *ino, struct file *filep)
-{
-	struct mISDNtimerdev	*dev = filep->private_data;
-	struct list_head	*list = &dev->pending;
-	struct mISDNtimer	*timer, *next;
-
-	if (*debug & DEBUG_TIMER)
-		printk(KERN_DEBUG "%s(%p,%p)\n", __func__, ino, filep);
-
-	spin_lock_irq(&dev->lock);
-	while (!list_empty(list)) {
-		timer = list_first_entry(list, struct mISDNtimer, list);
-		spin_unlock_irq(&dev->lock);
-		timer_shutdown_sync(&timer->tl);
-		spin_lock_irq(&dev->lock);
-		/* it might have been moved to ->expired */
-		list_del(&timer->list);
-		kfree(timer);
-	}
-	spin_unlock_irq(&dev->lock);
-
-	list_for_each_entry_safe(timer, next, &dev->expired, list) {
-		kfree(timer);
-	}
-	kfree(dev);
-	return 0;
-}
-
-static ssize_t
-mISDN_read(struct file *filep, char __user *buf, size_t count, loff_t *off)
-{
-	struct mISDNtimerdev	*dev = filep->private_data;
-	struct list_head *list = &dev->expired;
-	struct mISDNtimer	*timer;
-	int	ret = 0;
-
-	if (*debug & DEBUG_TIMER)
-		printk(KERN_DEBUG "%s(%p, %p, %d, %p)\n", __func__,
-		       filep, buf, (int)count, off);
-
-	if (count < sizeof(int))
-		return -ENOSPC;
-
-	spin_lock_irq(&dev->lock);
-	while (list_empty(list) && (dev->work == 0)) {
-		spin_unlock_irq(&dev->lock);
-		if (filep->f_flags & O_NONBLOCK)
-			return -EAGAIN;
-		wait_event_interruptible(dev->wait, (READ_ONCE(dev->work) ||
-						     !list_empty(list)));
-		if (signal_pending(current))
-			return -ERESTARTSYS;
-		spin_lock_irq(&dev->lock);
-	}
-	if (dev->work)
-		WRITE_ONCE(dev->work, 0);
-	if (!list_empty(list)) {
-		timer = list_first_entry(list, struct mISDNtimer, list);
-		list_del(&timer->list);
-		spin_unlock_irq(&dev->lock);
-		if (put_user(timer->id, (int __user *)buf))
-			ret = -EFAULT;
-		else
-			ret = sizeof(int);
-		kfree(timer);
-	} else {
-		spin_unlock_irq(&dev->lock);
-	}
-	return ret;
-}
-
-static __poll_t
-mISDN_poll(struct file *filep, poll_table *wait)
-{
-	struct mISDNtimerdev	*dev = filep->private_data;
-	__poll_t		mask = EPOLLERR;
-
-	if (*debug & DEBUG_TIMER)
-		printk(KERN_DEBUG "%s(%p, %p)\n", __func__, filep, wait);
-	if (dev) {
-		u32 work;
-
-		poll_wait(filep, &dev->wait, wait);
-		mask = 0;
-		work = READ_ONCE(dev->work);
-		if (work || !list_empty(&dev->expired))
-			mask |= (EPOLLIN | EPOLLRDNORM);
-		if (*debug & DEBUG_TIMER)
-			printk(KERN_DEBUG "%s work(%d) empty(%d)\n", __func__,
-			       work, list_empty(&dev->expired));
-	}
-	return mask;
-}
-
-static void
-dev_expire_timer(struct timer_list *t)
-{
-	struct mISDNtimer *timer = timer_container_of(timer, t, tl);
-	u_long			flags;
-
-	spin_lock_irqsave(&timer->dev->lock, flags);
-	if (timer->id >= 0)
-		list_move_tail(&timer->list, &timer->dev->expired);
-	wake_up_interruptible(&timer->dev->wait);
-	spin_unlock_irqrestore(&timer->dev->lock, flags);
-}
-
-static int
-misdn_add_timer(struct mISDNtimerdev *dev, int timeout)
-{
-	int			id;
-	struct mISDNtimer	*timer;
-
-	if (!timeout) {
-		WRITE_ONCE(dev->work, 1);
-		wake_up_interruptible(&dev->wait);
-		id = 0;
-	} else {
-		timer = kzalloc_obj(struct mISDNtimer);
-		if (!timer)
-			return -ENOMEM;
-		timer->dev = dev;
-		timer_setup(&timer->tl, dev_expire_timer, 0);
-		spin_lock_irq(&dev->lock);
-		id = timer->id = dev->next_id++;
-		if (dev->next_id < 0)
-			dev->next_id = 1;
-		list_add_tail(&timer->list, &dev->pending);
-		timer->tl.expires = jiffies + ((HZ * (u_long)timeout) / 1000);
-		add_timer(&timer->tl);
-		spin_unlock_irq(&dev->lock);
-	}
-	return id;
-}
-
-static int
-misdn_del_timer(struct mISDNtimerdev *dev, int id)
-{
-	struct mISDNtimer	*timer;
-
-	spin_lock_irq(&dev->lock);
-	list_for_each_entry(timer, &dev->pending, list) {
-		if (timer->id == id) {
-			list_del_init(&timer->list);
-			timer->id = -1;
-			spin_unlock_irq(&dev->lock);
-			timer_shutdown_sync(&timer->tl);
-			kfree(timer);
-			return id;
-		}
-	}
-	spin_unlock_irq(&dev->lock);
-	return 0;
-}
-
-static long
-mISDN_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
-{
-	struct mISDNtimerdev	*dev = filep->private_data;
-	int			id, tout, ret = 0;
-
-
-	if (*debug & DEBUG_TIMER)
-		printk(KERN_DEBUG "%s(%p, %x, %lx)\n", __func__,
-		       filep, cmd, arg);
-	mutex_lock(&mISDN_mutex);
-	switch (cmd) {
-	case IMADDTIMER:
-		if (get_user(tout, (int __user *)arg)) {
-			ret = -EFAULT;
-			break;
-		}
-		id = misdn_add_timer(dev, tout);
-		if (*debug & DEBUG_TIMER)
-			printk(KERN_DEBUG "%s add %d id %d\n", __func__,
-			       tout, id);
-		if (id < 0) {
-			ret = id;
-			break;
-		}
-		if (put_user(id, (int __user *)arg))
-			ret = -EFAULT;
-		break;
-	case IMDELTIMER:
-		if (get_user(id, (int __user *)arg)) {
-			ret = -EFAULT;
-			break;
-		}
-		if (*debug & DEBUG_TIMER)
-			printk(KERN_DEBUG "%s del id %d\n", __func__, id);
-		id = misdn_del_timer(dev, id);
-		if (put_user(id, (int __user *)arg))
-			ret = -EFAULT;
-		break;
-	default:
-		ret = -EINVAL;
-	}
-	mutex_unlock(&mISDN_mutex);
-	return ret;
-}
-
-static const struct file_operations mISDN_fops = {
-	.owner		= THIS_MODULE,
-	.read		= mISDN_read,
-	.poll		= mISDN_poll,
-	.unlocked_ioctl	= mISDN_ioctl,
-	.open		= mISDN_open,
-	.release	= mISDN_close,
-};
-
-static struct miscdevice mISDNtimer = {
-	.minor	= MISC_DYNAMIC_MINOR,
-	.name	= "mISDNtimer",
-	.fops	= &mISDN_fops,
-};
-
-int
-mISDN_inittimer(u_int *deb)
-{
-	int	err;
-
-	debug = deb;
-	err = misc_register(&mISDNtimer);
-	if (err)
-		printk(KERN_WARNING "mISDN: Could not register timer device\n");
-	return err;
-}
-
-void mISDN_timer_cleanup(void)
-{
-	misc_deregister(&mISDNtimer);
-}
diff --git a/include/linux/isdn/capilli.h b/include/linux/isdn/capilli.h
deleted file mode 100644
index 12be09b6883b..000000000000
--- a/include/linux/isdn/capilli.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/* $Id: capilli.h,v 1.1.2.2 2004/01/16 21:09:27 keil Exp $
- * 
- * Kernel CAPI 2.0 Driver Interface for Linux
- * 
- * Copyright 1999 by Carsten Paeth <calle@calle.de>
- * 
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-#ifndef __CAPILLI_H__
-#define __CAPILLI_H__
-
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/capi.h>
-#include <linux/kernelcapi.h>
-
-typedef struct capiloaddatapart {
-	int user;		/* data in userspace ? */
-	int len;
-	unsigned char *data;
-} capiloaddatapart;
-
-typedef struct capiloaddata {
-	capiloaddatapart firmware;
-	capiloaddatapart configuration;
-} capiloaddata;
-
-typedef struct capicardparams {
-	unsigned int port;
-	unsigned irq;
-	int cardtype;
-	int cardnr;
-	unsigned int membase;
-} capicardparams;
-
-struct capi_ctr {
-	/* filled in before calling attach_capi_ctr */
-	struct module *owner;
-	void *driverdata;			/* driver specific */
-	char name[32];				/* name of controller */
-	char *driver_name;			/* name of driver */
-	int (*load_firmware)(struct capi_ctr *, capiloaddata *);
-	void (*reset_ctr)(struct capi_ctr *);
-	void (*register_appl)(struct capi_ctr *, u16 appl,
-			      capi_register_params *);
-	void (*release_appl)(struct capi_ctr *, u16 appl);
-	u16  (*send_message)(struct capi_ctr *, struct sk_buff *skb);
-	
-	char *(*procinfo)(struct capi_ctr *);
-	int (*proc_show)(struct seq_file *, void *);
-
-	/* filled in before calling ready callback */
-	u8 manu[CAPI_MANUFACTURER_LEN];		/* CAPI_GET_MANUFACTURER */
-	capi_version version;			/* CAPI_GET_VERSION */
-	capi_profile profile;			/* CAPI_GET_PROFILE */
-	u8 serial[CAPI_SERIAL_LEN];		/* CAPI_GET_SERIAL */
-
-	/* management information for kcapi */
-
-	unsigned long nrecvctlpkt;
-	unsigned long nrecvdatapkt;
-	unsigned long nsentctlpkt;
-	unsigned long nsentdatapkt;
-
-	int cnr;				/* controller number */
-	unsigned short state;			/* controller state */
-	int blocked;				/* output blocked */
-	int traceflag;				/* capi trace */
-
-	struct proc_dir_entry *procent;
-        char procfn[128];
-};
-
-int attach_capi_ctr(struct capi_ctr *);
-int detach_capi_ctr(struct capi_ctr *);
-
-void capi_ctr_ready(struct capi_ctr * card);
-void capi_ctr_down(struct capi_ctr * card);
-void capi_ctr_handle_message(struct capi_ctr * card, u16 appl, struct sk_buff *skb);
-
-// ---------------------------------------------------------------------------
-// needed for AVM capi drivers
-
-struct capi_driver {
-	char name[32];				/* driver name */
-	char revision[32];
-
-	/* management information for kcapi */
-	struct list_head list; 
-};
-
-#endif				/* __CAPILLI_H__ */
diff --git a/include/linux/isdn/capiutil.h b/include/linux/isdn/capiutil.h
deleted file mode 100644
index 953fd500dff7..000000000000
--- a/include/linux/isdn/capiutil.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* $Id: capiutil.h,v 1.5.6.2 2001/09/23 22:24:33 kai Exp $
- *
- * CAPI 2.0 defines & types
- *
- * From CAPI 2.0 Development Kit AVM 1995 (msg.c)
- * Rewritten for Linux 1996 by Carsten Paeth <calle@calle.de>
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-#ifndef __CAPIUTIL_H__
-#define __CAPIUTIL_H__
-
-#include <asm/types.h>
-
-#define CAPIMSG_BASELEN		8
-#define CAPIMSG_U8(m, off)	(m[off])
-#define CAPIMSG_U16(m, off)	(m[off]|(m[(off)+1]<<8))
-#define CAPIMSG_U32(m, off)	(m[off]|(m[(off)+1]<<8)|(m[(off)+2]<<16)|(m[(off)+3]<<24))
-#define	CAPIMSG_LEN(m)		CAPIMSG_U16(m,0)
-#define	CAPIMSG_APPID(m)	CAPIMSG_U16(m,2)
-#define	CAPIMSG_COMMAND(m)	CAPIMSG_U8(m,4)
-#define	CAPIMSG_SUBCOMMAND(m)	CAPIMSG_U8(m,5)
-#define CAPIMSG_CMD(m)		(((m[4])<<8)|(m[5]))
-#define	CAPIMSG_MSGID(m)	CAPIMSG_U16(m,6)
-#define CAPIMSG_CONTROLLER(m)	(m[8] & 0x7f)
-#define CAPIMSG_CONTROL(m)	CAPIMSG_U32(m, 8)
-#define CAPIMSG_NCCI(m)		CAPIMSG_CONTROL(m)
-#define CAPIMSG_DATALEN(m)	CAPIMSG_U16(m,16) /* DATA_B3_REQ */
-
-static inline void capimsg_setu8(void *m, int off, __u8 val)
-{
-	((__u8 *)m)[off] = val;
-}
-
-static inline void capimsg_setu16(void *m, int off, __u16 val)
-{
-	((__u8 *)m)[off] = val & 0xff;
-	((__u8 *)m)[off+1] = (val >> 8) & 0xff;
-}
-
-static inline void capimsg_setu32(void *m, int off, __u32 val)
-{
-	((__u8 *)m)[off] = val & 0xff;
-	((__u8 *)m)[off+1] = (val >> 8) & 0xff;
-	((__u8 *)m)[off+2] = (val >> 16) & 0xff;
-	((__u8 *)m)[off+3] = (val >> 24) & 0xff;
-}
-
-#define	CAPIMSG_SETLEN(m, len)		capimsg_setu16(m, 0, len)
-#define	CAPIMSG_SETAPPID(m, applid)	capimsg_setu16(m, 2, applid)
-#define	CAPIMSG_SETCOMMAND(m,cmd)	capimsg_setu8(m, 4, cmd)
-#define	CAPIMSG_SETSUBCOMMAND(m, cmd)	capimsg_setu8(m, 5, cmd)
-#define	CAPIMSG_SETMSGID(m, msgid)	capimsg_setu16(m, 6, msgid)
-#define	CAPIMSG_SETCONTROL(m, contr)	capimsg_setu32(m, 8, contr)
-#define	CAPIMSG_SETDATALEN(m, len)	capimsg_setu16(m, 16, len)
-
-#endif				/* __CAPIUTIL_H__ */
diff --git a/include/linux/kernelcapi.h b/include/linux/kernelcapi.h
deleted file mode 100644
index 94ba42bf9da1..000000000000
--- a/include/linux/kernelcapi.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * $Id: kernelcapi.h,v 1.8.6.2 2001/02/07 11:31:31 kai Exp $
- * 
- * Kernel CAPI 2.0 Interface for Linux
- * 
- * (c) Copyright 1997 by Carsten Paeth (calle@calle.in-berlin.de)
- * 
- */
-#ifndef __KERNELCAPI_H__
-#define __KERNELCAPI_H__
-
-#include <linux/list.h>
-#include <linux/skbuff.h>
-#include <linux/workqueue.h>
-#include <linux/notifier.h>
-#include <uapi/linux/kernelcapi.h>
-
-#define CAPI_NOERROR                      0x0000
-
-#define CAPI_TOOMANYAPPLS		  0x1001
-#define CAPI_LOGBLKSIZETOSMALL	          0x1002
-#define CAPI_BUFFEXECEEDS64K 	          0x1003
-#define CAPI_MSGBUFSIZETOOSMALL	          0x1004
-#define CAPI_ANZLOGCONNNOTSUPPORTED	  0x1005
-#define CAPI_REGRESERVED		  0x1006
-#define CAPI_REGBUSY 		          0x1007
-#define CAPI_REGOSRESOURCEERR	          0x1008
-#define CAPI_REGNOTINSTALLED 	          0x1009
-#define CAPI_REGCTRLERNOTSUPPORTEXTEQUIP  0x100a
-#define CAPI_REGCTRLERONLYSUPPORTEXTEQUIP 0x100b
-
-#define CAPI_ILLAPPNR		          0x1101
-#define CAPI_ILLCMDORSUBCMDORMSGTOSMALL   0x1102
-#define CAPI_SENDQUEUEFULL		  0x1103
-#define CAPI_RECEIVEQUEUEEMPTY	          0x1104
-#define CAPI_RECEIVEOVERFLOW 	          0x1105
-#define CAPI_UNKNOWNNOTPAR		  0x1106
-#define CAPI_MSGBUSY 		          0x1107
-#define CAPI_MSGOSRESOURCEERR	          0x1108
-#define CAPI_MSGNOTINSTALLED 	          0x1109
-#define CAPI_MSGCTRLERNOTSUPPORTEXTEQUIP  0x110a
-#define CAPI_MSGCTRLERONLYSUPPORTEXTEQUIP 0x110b
-
-#endif				/* __KERNELCAPI_H__ */
diff --git a/include/linux/mISDNdsp.h b/include/linux/mISDNdsp.h
deleted file mode 100644
index 00758f45fddc..000000000000
--- a/include/linux/mISDNdsp.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __mISDNdsp_H__
-#define __mISDNdsp_H__
-
-struct mISDN_dsp_element_arg {
-	char	*name;
-	char	*def;
-	char	*desc;
-};
-
-struct mISDN_dsp_element {
-	char	*name;
-	void	*(*new)(const char *arg);
-	void	(*free)(void *p);
-	void	(*process_tx)(void *p, unsigned char *data, int len);
-	void	(*process_rx)(void *p, unsigned char *data, int len,
-			unsigned int txlen);
-	int	num_args;
-	struct mISDN_dsp_element_arg
-		*args;
-};
-
-extern int  mISDN_dsp_element_register(struct mISDN_dsp_element *elem);
-extern void mISDN_dsp_element_unregister(struct mISDN_dsp_element *elem);
-
-struct dsp_features {
-	int	hfc_id; /* unique id to identify the chip (or -1) */
-	int	hfc_dtmf; /* set if HFCmulti card supports dtmf */
-	int	hfc_conf; /* set if HFCmulti card supports conferences */
-	int	hfc_loops; /* set if card supports tone loops */
-	int	hfc_echocanhw; /* set if card supports echocancelation*/
-	int	pcm_id; /* unique id to identify the pcm bus (or -1) */
-	int	pcm_slots; /* number of slots on the pcm bus */
-	int	pcm_banks; /* number of IO banks of pcm bus */
-	int	unclocked; /* data is not clocked (has jitter/loss) */
-	int	unordered; /* data is unordered (packets have index) */
-};
-
-#endif
-
diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h
deleted file mode 100644
index ef4f8eb02eac..000000000000
--- a/include/linux/mISDNhw.h
+++ /dev/null
@@ -1,192 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Author	Karsten Keil <kkeil@novell.com>
- *
- *   Basic declarations for the mISDN HW channels
- *
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- */
-
-#ifndef MISDNHW_H
-#define MISDNHW_H
-#include <linux/mISDNif.h>
-#include <linux/timer.h>
-
-/*
- * HW DEBUG 0xHHHHGGGG
- * H - hardware driver specific bits
- * G - for all drivers
- */
-
-#define DEBUG_HW		0x00000001
-#define DEBUG_HW_OPEN		0x00000002
-#define DEBUG_HW_DCHANNEL	0x00000100
-#define DEBUG_HW_DFIFO		0x00000200
-#define DEBUG_HW_BCHANNEL	0x00001000
-#define DEBUG_HW_BFIFO		0x00002000
-
-#define MAX_DFRAME_LEN_L1	300
-#define MAX_MON_FRAME		32
-#define MAX_LOG_SPACE		2048
-#define MISDN_COPY_SIZE		32
-
-/* channel->Flags bit field */
-#define FLG_TX_BUSY		0	/* tx_buf in use */
-#define FLG_TX_NEXT		1	/* next_skb in use */
-#define FLG_L1_BUSY		2	/* L1 is permanent busy */
-#define FLG_L2_ACTIVATED	3	/* activated from L2 */
-#define FLG_OPEN		5	/* channel is in use */
-#define FLG_ACTIVE		6	/* channel is activated */
-#define FLG_BUSY_TIMER		7
-/* channel type */
-#define FLG_DCHANNEL		8	/* channel is D-channel */
-#define FLG_BCHANNEL		9	/* channel is B-channel */
-#define FLG_ECHANNEL		10	/* channel is E-channel */
-#define FLG_TRANSPARENT		12	/* channel use transparent data */
-#define FLG_HDLC		13	/* channel use hdlc data */
-#define FLG_L2DATA		14	/* channel use L2 DATA primitivs */
-#define FLG_ORIGIN		15	/* channel is on origin site */
-/* channel specific stuff */
-#define FLG_FILLEMPTY		16	/* fill fifo on first frame (empty) */
-/* arcofi specific */
-#define FLG_ARCOFI_TIMER	17
-#define FLG_ARCOFI_ERROR	18
-/* isar specific */
-#define FLG_INITIALIZED		17
-#define FLG_DLEETX		18
-#define FLG_LASTDLE		19
-#define FLG_FIRST		20
-#define FLG_LASTDATA		21
-#define FLG_NMD_DATA		22
-#define FLG_FTI_RUN		23
-#define FLG_LL_OK		24
-#define FLG_LL_CONN		25
-#define FLG_DTMFSEND		26
-#define FLG_TX_EMPTY		27
-/* stop sending received data upstream */
-#define FLG_RX_OFF		28
-/* workq events */
-#define FLG_RECVQUEUE		30
-#define	FLG_PHCHANGE		31
-
-#define schedule_event(s, ev)	do { \
-					test_and_set_bit(ev, &((s)->Flags)); \
-					schedule_work(&((s)->workq)); \
-				} while (0)
-
-struct dchannel {
-	struct mISDNdevice	dev;
-	u_long			Flags;
-	struct work_struct	workq;
-	void			(*phfunc) (struct dchannel *);
-	u_int			state;
-	void			*l1;
-	void			*hw;
-	int			slot;	/* multiport card channel slot */
-	struct timer_list	timer;
-	/* receive data */
-	struct sk_buff		*rx_skb;
-	int			maxlen;
-	/* send data */
-	struct sk_buff_head	squeue;
-	struct sk_buff_head	rqueue;
-	struct sk_buff		*tx_skb;
-	int			tx_idx;
-	int			debug;
-	/* statistics */
-	int			err_crc;
-	int			err_tx;
-	int			err_rx;
-};
-
-typedef int	(dchannel_l1callback)(struct dchannel *, u_int);
-extern int	create_l1(struct dchannel *, dchannel_l1callback *);
-
-/* private L1 commands */
-#define INFO0		0x8002
-#define INFO1		0x8102
-#define INFO2		0x8202
-#define INFO3_P8	0x8302
-#define INFO3_P10	0x8402
-#define INFO4_P8	0x8502
-#define INFO4_P10	0x8602
-#define LOSTFRAMING	0x8702
-#define ANYSIGNAL	0x8802
-#define HW_POWERDOWN	0x8902
-#define HW_RESET_REQ	0x8a02
-#define HW_POWERUP_REQ	0x8b02
-#define HW_DEACT_REQ	0x8c02
-#define HW_ACTIVATE_REQ	0x8e02
-#define HW_D_NOBLOCKED  0x8f02
-#define HW_RESET_IND	0x9002
-#define HW_POWERUP_IND	0x9102
-#define HW_DEACT_IND	0x9202
-#define HW_ACTIVATE_IND	0x9302
-#define HW_DEACT_CNF	0x9402
-#define HW_TESTLOOP	0x9502
-#define HW_TESTRX_RAW	0x9602
-#define HW_TESTRX_HDLC	0x9702
-#define HW_TESTRX_OFF	0x9802
-#define HW_TIMER3_IND	0x9902
-#define HW_TIMER3_VALUE	0x9a00
-#define HW_TIMER3_VMASK	0x00FF
-
-struct layer1;
-extern int	l1_event(struct layer1 *, u_int);
-
-#define MISDN_BCH_FILL_SIZE	4
-
-struct bchannel {
-	struct mISDNchannel	ch;
-	int			nr;
-	u_long			Flags;
-	struct work_struct	workq;
-	u_int			state;
-	void			*hw;
-	int			slot;	/* multiport card channel slot */
-	struct timer_list	timer;
-	/* receive data */
-	u8			fill[MISDN_BCH_FILL_SIZE];
-	struct sk_buff		*rx_skb;
-	unsigned short		maxlen;
-	unsigned short		init_maxlen; /* initial value */
-	unsigned short		next_maxlen; /* pending value */
-	unsigned short		minlen; /* for transparent data */
-	unsigned short		init_minlen; /* initial value */
-	unsigned short		next_minlen; /* pending value */
-	/* send data */
-	struct sk_buff		*next_skb;
-	struct sk_buff		*tx_skb;
-	struct sk_buff_head	rqueue;
-	int			rcount;
-	int			tx_idx;
-	int			debug;
-	/* statistics */
-	int			err_crc;
-	int			err_tx;
-	int			err_rx;
-	int			dropcnt;
-};
-
-extern int	mISDN_initdchannel(struct dchannel *, int, void *);
-extern int	mISDN_initbchannel(struct bchannel *, unsigned short,
-				   unsigned short);
-extern int	mISDN_freedchannel(struct dchannel *);
-extern void	mISDN_clear_bchannel(struct bchannel *);
-extern void	mISDN_freebchannel(struct bchannel *);
-extern int	mISDN_ctrl_bchannel(struct bchannel *, struct mISDN_ctrl_req *);
-extern void	queue_ch_frame(struct mISDNchannel *, u_int,
-			int, struct sk_buff *);
-extern int	dchannel_senddata(struct dchannel *, struct sk_buff *);
-extern int	bchannel_senddata(struct bchannel *, struct sk_buff *);
-extern int      bchannel_get_rxbuf(struct bchannel *, int);
-extern void	recv_Dchannel(struct dchannel *);
-extern void	recv_Echannel(struct dchannel *, struct dchannel *);
-extern void	recv_Bchannel(struct bchannel *, unsigned int, bool);
-extern void	recv_Dchannel_skb(struct dchannel *, struct sk_buff *);
-extern void	recv_Bchannel_skb(struct bchannel *, struct sk_buff *);
-extern int	get_next_bframe(struct bchannel *);
-extern int	get_next_dframe(struct dchannel *);
-
-#endif
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
deleted file mode 100644
index 7aab4a769736..000000000000
--- a/include/linux/mISDNif.h
+++ /dev/null
@@ -1,603 +0,0 @@
-/*
- *
- * Author	Karsten Keil <kkeil@novell.com>
- *
- * Copyright 2008  by Karsten Keil <kkeil@novell.com>
- *
- * This code is free software; you can redistribute it and/or modify
- * it under the terms of the GNU LESSER GENERAL PUBLIC LICENSE
- * version 2.1 as published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU LESSER GENERAL PUBLIC LICENSE for more details.
- *
- */
-
-#ifndef mISDNIF_H
-#define mISDNIF_H
-
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/socket.h>
-
-/*
- * ABI Version 32 bit
- *
- * <8 bit> Major version
- *		- changed if any interface become backwards incompatible
- *
- * <8 bit> Minor version
- *              - changed if any interface is extended but backwards compatible
- *
- * <16 bit> Release number
- *              - should be incremented on every checkin
- */
-#define	MISDN_MAJOR_VERSION	1
-#define	MISDN_MINOR_VERSION	1
-#define MISDN_RELEASE		29
-
-/* primitives for information exchange
- * generell format
- * <16  bit  0 >
- * <8  bit command>
- *    BIT 8 = 1 LAYER private
- *    BIT 7 = 1 answer
- *    BIT 6 = 1 DATA
- * <8  bit target layer mask>
- *
- * Layer = 00 is reserved for general commands
-   Layer = 01  L2 -> HW
-   Layer = 02  HW -> L2
-   Layer = 04  L3 -> L2
-   Layer = 08  L2 -> L3
- * Layer = FF is reserved for broadcast commands
- */
-
-#define MISDN_CMDMASK		0xff00
-#define MISDN_LAYERMASK		0x00ff
-
-/* generell commands */
-#define OPEN_CHANNEL		0x0100
-#define CLOSE_CHANNEL		0x0200
-#define CONTROL_CHANNEL		0x0300
-#define CHECK_DATA		0x0400
-
-/* layer 2 -> layer 1 */
-#define PH_ACTIVATE_REQ		0x0101
-#define PH_DEACTIVATE_REQ	0x0201
-#define PH_DATA_REQ		0x2001
-#define MPH_ACTIVATE_REQ	0x0501
-#define MPH_DEACTIVATE_REQ	0x0601
-#define MPH_INFORMATION_REQ	0x0701
-#define PH_CONTROL_REQ		0x0801
-
-/* layer 1 -> layer 2 */
-#define PH_ACTIVATE_IND		0x0102
-#define PH_ACTIVATE_CNF		0x4102
-#define PH_DEACTIVATE_IND	0x0202
-#define PH_DEACTIVATE_CNF	0x4202
-#define PH_DATA_IND		0x2002
-#define PH_DATA_E_IND		0x3002
-#define MPH_ACTIVATE_IND	0x0502
-#define MPH_DEACTIVATE_IND	0x0602
-#define MPH_INFORMATION_IND	0x0702
-#define PH_DATA_CNF		0x6002
-#define PH_CONTROL_IND		0x0802
-#define PH_CONTROL_CNF		0x4802
-
-/* layer 3 -> layer 2 */
-#define DL_ESTABLISH_REQ	0x1004
-#define DL_RELEASE_REQ		0x1104
-#define DL_DATA_REQ		0x3004
-#define DL_UNITDATA_REQ		0x3104
-#define DL_INFORMATION_REQ	0x0004
-
-/* layer 2 -> layer 3 */
-#define DL_ESTABLISH_IND	0x1008
-#define DL_ESTABLISH_CNF	0x5008
-#define DL_RELEASE_IND		0x1108
-#define DL_RELEASE_CNF		0x5108
-#define DL_DATA_IND		0x3008
-#define DL_UNITDATA_IND		0x3108
-#define DL_INFORMATION_IND	0x0008
-
-/* intern layer 2 management */
-#define MDL_ASSIGN_REQ		0x1804
-#define MDL_ASSIGN_IND		0x1904
-#define MDL_REMOVE_REQ		0x1A04
-#define MDL_REMOVE_IND		0x1B04
-#define MDL_STATUS_UP_IND	0x1C04
-#define MDL_STATUS_DOWN_IND	0x1D04
-#define MDL_STATUS_UI_IND	0x1E04
-#define MDL_ERROR_IND		0x1F04
-#define MDL_ERROR_RSP		0x5F04
-
-/* intern layer 2 */
-#define DL_TIMER200_IND		0x7004
-#define DL_TIMER203_IND		0x7304
-#define DL_INTERN_MSG		0x7804
-
-/* DL_INFORMATION_IND types */
-#define DL_INFO_L2_CONNECT	0x0001
-#define DL_INFO_L2_REMOVED	0x0002
-
-/* PH_CONTROL types */
-/* TOUCH TONE IS 0x20XX  XX "0"..."9", "A","B","C","D","*","#" */
-#define DTMF_TONE_VAL		0x2000
-#define DTMF_TONE_MASK		0x007F
-#define DTMF_TONE_START		0x2100
-#define DTMF_TONE_STOP		0x2200
-#define DTMF_HFC_COEF		0x4000
-#define DSP_CONF_JOIN		0x2403
-#define DSP_CONF_SPLIT		0x2404
-#define DSP_RECEIVE_OFF		0x2405
-#define DSP_RECEIVE_ON		0x2406
-#define DSP_ECHO_ON		0x2407
-#define DSP_ECHO_OFF		0x2408
-#define DSP_MIX_ON		0x2409
-#define DSP_MIX_OFF		0x240a
-#define DSP_DELAY		0x240b
-#define DSP_JITTER		0x240c
-#define DSP_TXDATA_ON		0x240d
-#define DSP_TXDATA_OFF		0x240e
-#define DSP_TX_DEJITTER		0x240f
-#define DSP_TX_DEJ_OFF		0x2410
-#define DSP_TONE_PATT_ON	0x2411
-#define DSP_TONE_PATT_OFF	0x2412
-#define DSP_VOL_CHANGE_TX	0x2413
-#define DSP_VOL_CHANGE_RX	0x2414
-#define DSP_BF_ENABLE_KEY	0x2415
-#define DSP_BF_DISABLE		0x2416
-#define DSP_BF_ACCEPT		0x2416
-#define DSP_BF_REJECT		0x2417
-#define DSP_PIPELINE_CFG	0x2418
-#define HFC_VOL_CHANGE_TX	0x2601
-#define HFC_VOL_CHANGE_RX	0x2602
-#define HFC_SPL_LOOP_ON		0x2603
-#define HFC_SPL_LOOP_OFF	0x2604
-/* for T30 FAX and analog modem */
-#define HW_MOD_FRM		0x4000
-#define HW_MOD_FRH		0x4001
-#define HW_MOD_FTM		0x4002
-#define HW_MOD_FTH		0x4003
-#define HW_MOD_FTS		0x4004
-#define HW_MOD_CONNECT		0x4010
-#define HW_MOD_OK		0x4011
-#define HW_MOD_NOCARR		0x4012
-#define HW_MOD_FCERROR		0x4013
-#define HW_MOD_READY		0x4014
-#define HW_MOD_LASTDATA		0x4015
-
-/* DSP_TONE_PATT_ON parameter */
-#define TONE_OFF			0x0000
-#define TONE_GERMAN_DIALTONE		0x0001
-#define TONE_GERMAN_OLDDIALTONE		0x0002
-#define TONE_AMERICAN_DIALTONE		0x0003
-#define TONE_GERMAN_DIALPBX		0x0004
-#define TONE_GERMAN_OLDDIALPBX		0x0005
-#define TONE_AMERICAN_DIALPBX		0x0006
-#define TONE_GERMAN_RINGING		0x0007
-#define TONE_GERMAN_OLDRINGING		0x0008
-#define TONE_AMERICAN_RINGPBX		0x000b
-#define TONE_GERMAN_RINGPBX		0x000c
-#define TONE_GERMAN_OLDRINGPBX		0x000d
-#define TONE_AMERICAN_RINGING		0x000e
-#define TONE_GERMAN_BUSY		0x000f
-#define TONE_GERMAN_OLDBUSY		0x0010
-#define TONE_AMERICAN_BUSY		0x0011
-#define TONE_GERMAN_HANGUP		0x0012
-#define TONE_GERMAN_OLDHANGUP		0x0013
-#define TONE_AMERICAN_HANGUP		0x0014
-#define TONE_SPECIAL_INFO		0x0015
-#define TONE_GERMAN_GASSENBESETZT	0x0016
-#define TONE_GERMAN_AUFSCHALTTON	0x0016
-
-/* MPH_INFORMATION_IND */
-#define L1_SIGNAL_LOS_OFF	0x0010
-#define L1_SIGNAL_LOS_ON	0x0011
-#define L1_SIGNAL_AIS_OFF	0x0012
-#define L1_SIGNAL_AIS_ON	0x0013
-#define L1_SIGNAL_RDI_OFF	0x0014
-#define L1_SIGNAL_RDI_ON	0x0015
-#define L1_SIGNAL_SLIP_RX	0x0020
-#define L1_SIGNAL_SLIP_TX	0x0021
-
-/*
- * protocol ids
- * D channel 1-31
- * B channel 33 - 63
- */
-
-#define ISDN_P_NONE		0
-#define ISDN_P_BASE		0
-#define ISDN_P_TE_S0		0x01
-#define ISDN_P_NT_S0  		0x02
-#define ISDN_P_TE_E1		0x03
-#define ISDN_P_NT_E1  		0x04
-#define ISDN_P_TE_UP0		0x05
-#define ISDN_P_NT_UP0		0x06
-
-#define IS_ISDN_P_TE(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_TE_E1) || \
-				(p == ISDN_P_TE_UP0) || (p == ISDN_P_LAPD_TE))
-#define IS_ISDN_P_NT(p) ((p == ISDN_P_NT_S0) || (p == ISDN_P_NT_E1) || \
-				(p == ISDN_P_NT_UP0) || (p == ISDN_P_LAPD_NT))
-#define IS_ISDN_P_S0(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_NT_S0))
-#define IS_ISDN_P_E1(p) ((p == ISDN_P_TE_E1) || (p == ISDN_P_NT_E1))
-#define IS_ISDN_P_UP0(p) ((p == ISDN_P_TE_UP0) || (p == ISDN_P_NT_UP0))
-
-
-#define ISDN_P_LAPD_TE		0x10
-#define	ISDN_P_LAPD_NT		0x11
-
-#define ISDN_P_B_MASK		0x1f
-#define ISDN_P_B_START		0x20
-
-#define ISDN_P_B_RAW		0x21
-#define ISDN_P_B_HDLC		0x22
-#define ISDN_P_B_X75SLP		0x23
-#define ISDN_P_B_L2DTMF		0x24
-#define ISDN_P_B_L2DSP		0x25
-#define ISDN_P_B_L2DSPHDLC	0x26
-#define ISDN_P_B_T30_FAX	0x27
-#define ISDN_P_B_MODEM_ASYNC	0x28
-
-#define OPTION_L2_PMX		1
-#define OPTION_L2_PTP		2
-#define OPTION_L2_FIXEDTEI	3
-#define OPTION_L2_CLEANUP	4
-#define OPTION_L1_HOLD		5
-
-/* should be in sync with linux/kobject.h:KOBJ_NAME_LEN */
-#define MISDN_MAX_IDLEN		20
-
-struct mISDNhead {
-	unsigned int	prim;
-	unsigned int	id;
-}  __packed;
-
-#define MISDN_HEADER_LEN	sizeof(struct mISDNhead)
-#define MAX_DATA_SIZE		2048
-#define MAX_DATA_MEM		(MAX_DATA_SIZE + MISDN_HEADER_LEN)
-#define MAX_DFRAME_LEN		260
-
-#define MISDN_ID_ADDR_MASK	0xFFFF
-#define MISDN_ID_TEI_MASK	0xFF00
-#define MISDN_ID_SAPI_MASK	0x00FF
-#define MISDN_ID_TEI_ANY	0x7F00
-
-#define MISDN_ID_ANY		0xFFFF
-#define MISDN_ID_NONE		0xFFFE
-
-#define GROUP_TEI		127
-#define TEI_SAPI		63
-#define CTRL_SAPI		0
-
-#define MISDN_MAX_CHANNEL	127
-#define MISDN_CHMAP_SIZE	((MISDN_MAX_CHANNEL + 1) >> 3)
-
-#define SOL_MISDN	0
-
-struct sockaddr_mISDN {
-	sa_family_t    family;
-	unsigned char	dev;
-	unsigned char	channel;
-	unsigned char	sapi;
-	unsigned char	tei;
-};
-
-struct mISDNversion {
-	unsigned char	major;
-	unsigned char	minor;
-	unsigned short	release;
-};
-
-struct mISDN_devinfo {
-	u_int			id;
-	u_int			Dprotocols;
-	u_int			Bprotocols;
-	u_int			protocol;
-	u_char			channelmap[MISDN_CHMAP_SIZE];
-	u_int			nrbchan;
-	char			name[MISDN_MAX_IDLEN];
-};
-
-struct mISDN_devrename {
-	u_int			id;
-	char			name[MISDN_MAX_IDLEN]; /* new name */
-};
-
-/* MPH_INFORMATION_REQ payload */
-struct ph_info_ch {
-	__u32 protocol;
-	__u64 Flags;
-};
-
-struct ph_info_dch {
-	struct ph_info_ch ch;
-	__u16 state;
-	__u16 num_bch;
-};
-
-struct ph_info {
-	struct ph_info_dch dch;
-	struct ph_info_ch  bch[];
-};
-
-/* timer device ioctl */
-#define IMADDTIMER	_IOR('I', 64, int)
-#define IMDELTIMER	_IOR('I', 65, int)
-
-/* socket ioctls */
-#define	IMGETVERSION	_IOR('I', 66, int)
-#define	IMGETCOUNT	_IOR('I', 67, int)
-#define IMGETDEVINFO	_IOR('I', 68, int)
-#define IMCTRLREQ	_IOR('I', 69, int)
-#define IMCLEAR_L2	_IOR('I', 70, int)
-#define IMSETDEVNAME	_IOR('I', 71, struct mISDN_devrename)
-#define IMHOLD_L1	_IOR('I', 72, int)
-
-static inline int
-test_channelmap(u_int nr, u_char *map)
-{
-	if (nr <= MISDN_MAX_CHANNEL)
-		return map[nr >> 3] & (1 << (nr & 7));
-	else
-		return 0;
-}
-
-static inline void
-set_channelmap(u_int nr, u_char *map)
-{
-	map[nr >> 3] |= (1 << (nr & 7));
-}
-
-static inline void
-clear_channelmap(u_int nr, u_char *map)
-{
-	map[nr >> 3] &= ~(1 << (nr & 7));
-}
-
-/* CONTROL_CHANNEL parameters */
-#define MISDN_CTRL_GETOP		0x0000
-#define MISDN_CTRL_LOOP			0x0001
-#define MISDN_CTRL_CONNECT		0x0002
-#define MISDN_CTRL_DISCONNECT		0x0004
-#define MISDN_CTRL_RX_BUFFER		0x0008
-#define MISDN_CTRL_PCMCONNECT		0x0010
-#define MISDN_CTRL_PCMDISCONNECT	0x0020
-#define MISDN_CTRL_SETPEER		0x0040
-#define MISDN_CTRL_UNSETPEER		0x0080
-#define MISDN_CTRL_RX_OFF		0x0100
-#define MISDN_CTRL_FILL_EMPTY		0x0200
-#define MISDN_CTRL_GETPEER		0x0400
-#define MISDN_CTRL_L1_TIMER3		0x0800
-#define MISDN_CTRL_HW_FEATURES_OP	0x2000
-#define MISDN_CTRL_HW_FEATURES		0x2001
-#define MISDN_CTRL_HFC_OP		0x4000
-#define MISDN_CTRL_HFC_PCM_CONN		0x4001
-#define MISDN_CTRL_HFC_PCM_DISC		0x4002
-#define MISDN_CTRL_HFC_CONF_JOIN	0x4003
-#define MISDN_CTRL_HFC_CONF_SPLIT	0x4004
-#define MISDN_CTRL_HFC_RECEIVE_OFF	0x4005
-#define MISDN_CTRL_HFC_RECEIVE_ON	0x4006
-#define MISDN_CTRL_HFC_ECHOCAN_ON 	0x4007
-#define MISDN_CTRL_HFC_ECHOCAN_OFF 	0x4008
-#define MISDN_CTRL_HFC_WD_INIT		0x4009
-#define MISDN_CTRL_HFC_WD_RESET		0x400A
-
-/* special RX buffer value for MISDN_CTRL_RX_BUFFER request.p1 is the minimum
- * buffer size request.p2 the maximum. Using  MISDN_CTRL_RX_SIZE_IGNORE will
- * not change the value, but still read back the actual stetting.
- */
-#define MISDN_CTRL_RX_SIZE_IGNORE	-1
-
-/* socket options */
-#define MISDN_TIME_STAMP		0x0001
-
-struct mISDN_ctrl_req {
-	int		op;
-	int		channel;
-	int		p1;
-	int		p2;
-};
-
-/* muxer options */
-#define MISDN_OPT_ALL		1
-#define MISDN_OPT_TEIMGR	2
-
-#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/skbuff.h>
-#include <linux/net.h>
-#include <net/sock.h>
-#include <linux/completion.h>
-
-#define DEBUG_CORE		0x000000ff
-#define DEBUG_CORE_FUNC		0x00000002
-#define DEBUG_SOCKET		0x00000004
-#define DEBUG_MANAGER		0x00000008
-#define DEBUG_SEND_ERR		0x00000010
-#define DEBUG_MSG_THREAD	0x00000020
-#define DEBUG_QUEUE_FUNC	0x00000040
-#define DEBUG_L1		0x0000ff00
-#define DEBUG_L1_FSM		0x00000200
-#define DEBUG_L2		0x00ff0000
-#define DEBUG_L2_FSM		0x00020000
-#define DEBUG_L2_CTRL		0x00040000
-#define DEBUG_L2_RECV		0x00080000
-#define DEBUG_L2_TEI		0x00100000
-#define DEBUG_L2_TEIFSM		0x00200000
-#define DEBUG_TIMER		0x01000000
-#define DEBUG_CLOCK		0x02000000
-
-#define mISDN_HEAD_P(s)		((struct mISDNhead *)&s->cb[0])
-#define mISDN_HEAD_PRIM(s)	(((struct mISDNhead *)&s->cb[0])->prim)
-#define mISDN_HEAD_ID(s)	(((struct mISDNhead *)&s->cb[0])->id)
-
-/* socket states */
-#define MISDN_OPEN	1
-#define MISDN_BOUND	2
-#define MISDN_CLOSED	3
-
-struct mISDNchannel;
-struct mISDNdevice;
-struct mISDNstack;
-struct mISDNclock;
-
-struct channel_req {
-	u_int			protocol;
-	struct sockaddr_mISDN	adr;
-	struct mISDNchannel	*ch;
-};
-
-typedef	int	(ctrl_func_t)(struct mISDNchannel *, u_int, void *);
-typedef	int	(send_func_t)(struct mISDNchannel *, struct sk_buff *);
-typedef int	(create_func_t)(struct channel_req *);
-
-struct Bprotocol {
-	struct list_head	list;
-	char			*name;
-	u_int			Bprotocols;
-	create_func_t		*create;
-};
-
-struct mISDNchannel {
-	struct list_head	list;
-	u_int			protocol;
-	u_int			nr;
-	u_long			opt;
-	u_int			addr;
-	struct mISDNstack	*st;
-	struct mISDNchannel	*peer;
-	send_func_t		*send;
-	send_func_t		*recv;
-	ctrl_func_t		*ctrl;
-};
-
-struct mISDN_sock_list {
-	struct hlist_head	head;
-	rwlock_t		lock;
-};
-
-struct mISDN_sock {
-	struct sock		sk;
-	struct mISDNchannel	ch;
-	u_int			cmask;
-	struct mISDNdevice	*dev;
-};
-
-
-
-struct mISDNdevice {
-	struct mISDNchannel	D;
-	u_int			id;
-	u_int			Dprotocols;
-	u_int			Bprotocols;
-	u_int			nrbchan;
-	u_char			channelmap[MISDN_CHMAP_SIZE];
-	struct list_head	bchannels;
-	struct mISDNchannel	*teimgr;
-	struct device		dev;
-};
-
-struct mISDNstack {
-	u_long			status;
-	struct mISDNdevice	*dev;
-	struct task_struct	*thread;
-	struct completion	*notify;
-	wait_queue_head_t	workq;
-	struct sk_buff_head	msgq;
-	struct list_head	layer2;
-	struct mISDNchannel	*layer1;
-	struct mISDNchannel	own;
-	struct mutex		lmutex; /* protect lists */
-	struct mISDN_sock_list	l1sock;
-#ifdef MISDN_MSG_STATS
-	u_int			msg_cnt;
-	u_int			sleep_cnt;
-	u_int			stopped_cnt;
-#endif
-};
-
-typedef	int	(clockctl_func_t)(void *, int);
-
-struct	mISDNclock {
-	struct list_head	list;
-	char			name[64];
-	int			pri;
-	clockctl_func_t		*ctl;
-	void			*priv;
-};
-
-/* global alloc/queue functions */
-
-static inline struct sk_buff *
-mI_alloc_skb(unsigned int len, gfp_t gfp_mask)
-{
-	struct sk_buff	*skb;
-
-	skb = alloc_skb(len + MISDN_HEADER_LEN, gfp_mask);
-	if (likely(skb))
-		skb_reserve(skb, MISDN_HEADER_LEN);
-	return skb;
-}
-
-static inline struct sk_buff *
-_alloc_mISDN_skb(u_int prim, u_int id, u_int len, void *dp, gfp_t gfp_mask)
-{
-	struct sk_buff	*skb = mI_alloc_skb(len, gfp_mask);
-	struct mISDNhead *hh;
-
-	if (!skb)
-		return NULL;
-	if (len)
-		skb_put_data(skb, dp, len);
-	hh = mISDN_HEAD_P(skb);
-	hh->prim = prim;
-	hh->id = id;
-	return skb;
-}
-
-static inline void
-_queue_data(struct mISDNchannel *ch, u_int prim,
-    u_int id, u_int len, void *dp, gfp_t gfp_mask)
-{
-	struct sk_buff		*skb;
-
-	if (!ch->peer)
-		return;
-	skb = _alloc_mISDN_skb(prim, id, len, dp, gfp_mask);
-	if (!skb)
-		return;
-	if (ch->recv(ch->peer, skb))
-		dev_kfree_skb(skb);
-}
-
-/* global register/unregister functions */
-
-extern int	mISDN_register_device(struct mISDNdevice *,
-					struct device *parent, char *name);
-extern void	mISDN_unregister_device(struct mISDNdevice *);
-extern int	mISDN_register_Bprotocol(struct Bprotocol *);
-extern void	mISDN_unregister_Bprotocol(struct Bprotocol *);
-extern struct mISDNclock *mISDN_register_clock(char *, int, clockctl_func_t *,
-						void *);
-extern void	mISDN_unregister_clock(struct mISDNclock *);
-
-static inline struct mISDNdevice *dev_to_mISDN(const struct device *dev)
-{
-	if (dev)
-		return dev_get_drvdata(dev);
-	else
-		return NULL;
-}
-
-extern void	set_channel_address(struct mISDNchannel *, u_int, u_int);
-extern void	mISDN_clock_update(struct mISDNclock *, int, ktime_t *);
-extern unsigned short mISDN_clock_get(void);
-extern const char *mISDNDevName4ch(struct mISDNchannel *);
-
-#endif /* __KERNEL__ */
-#endif /* mISDNIF_H */
diff --git a/include/uapi/linux/capi.h b/include/uapi/linux/capi.h
deleted file mode 100644
index 31f946f8a88d..000000000000
--- a/include/uapi/linux/capi.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/* $Id: capi.h,v 1.4.6.1 2001/09/23 22:25:05 kai Exp $
- * 
- * CAPI 2.0 Interface for Linux
- * 
- * Copyright 1997 by Carsten Paeth (calle@calle.in-berlin.de)
- * 
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-#ifndef __LINUX_CAPI_H__
-#define __LINUX_CAPI_H__
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#ifndef __KERNEL__
-#include <linux/kernelcapi.h>
-#endif
-
-/*
- * CAPI_REGISTER
- */
-
-typedef struct capi_register_params {	/* CAPI_REGISTER */
-	__u32 level3cnt;	/* No. of simulatneous user data connections */
-	__u32 datablkcnt;	/* No. of buffered data messages */
-	__u32 datablklen;	/* Size of buffered data messages */
-} capi_register_params;
-
-#define	CAPI_REGISTER	_IOW('C',0x01,struct capi_register_params)
-
-/*
- * CAPI_GET_MANUFACTURER
- */
-
-#define CAPI_MANUFACTURER_LEN		64
-
-#define	CAPI_GET_MANUFACTURER	_IOWR('C',0x06,int)	/* broken: wanted size 64 (CAPI_MANUFACTURER_LEN) */
-
-/*
- * CAPI_GET_VERSION
- */
-
-typedef struct capi_version {
-	__u32 majorversion;
-	__u32 minorversion;
-	__u32 majormanuversion;
-	__u32 minormanuversion;
-} capi_version;
-
-#define CAPI_GET_VERSION	_IOWR('C',0x07,struct capi_version)
-
-/*
- * CAPI_GET_SERIAL
- */
-
-#define CAPI_SERIAL_LEN		8
-#define CAPI_GET_SERIAL		_IOWR('C',0x08,int)	/* broken: wanted size 8 (CAPI_SERIAL_LEN) */
-
-/*
- * CAPI_GET_PROFILE
- */
-
-typedef struct capi_profile {
-	__u16 ncontroller;	/* number of installed controller */
-	__u16 nbchannel;	/* number of B-Channels */
-	__u32 goptions;		/* global options */
-	__u32 support1;		/* B1 protocols support */
-	__u32 support2;		/* B2 protocols support */
-	__u32 support3;		/* B3 protocols support */
-	__u32 reserved[6];	/* reserved */
-	__u32 manu[5];		/* manufacturer specific information */
-} capi_profile;
-
-#define CAPI_GET_PROFILE	_IOWR('C',0x09,struct capi_profile)
-
-typedef struct capi_manufacturer_cmd {
-	unsigned long cmd;
-	void __user *data;
-} capi_manufacturer_cmd;
-
-/*
- * CAPI_MANUFACTURER_CMD
- */
-
-#define CAPI_MANUFACTURER_CMD	_IOWR('C',0x20, struct capi_manufacturer_cmd)
-
-/*
- * CAPI_GET_ERRCODE
- * capi errcode is set, * if read, write, or ioctl returns EIO,
- * ioctl returns errcode directly, and in arg, if != 0
- */
-
-#define CAPI_GET_ERRCODE	_IOR('C',0x21, __u16)
-
-/*
- * CAPI_INSTALLED
- */
-#define CAPI_INSTALLED		_IOR('C',0x22, __u16)
-
-
-/*
- * member contr is input for
- * CAPI_GET_MANUFACTURER, CAPI_GET_VERSION, CAPI_GET_SERIAL
- * and CAPI_GET_PROFILE
- */
-typedef union capi_ioctl_struct {
-	__u32 contr;
-	capi_register_params rparams;
-	__u8 manufacturer[CAPI_MANUFACTURER_LEN];
-	capi_version version;
-	__u8 serial[CAPI_SERIAL_LEN];
-	capi_profile profile;
-	capi_manufacturer_cmd cmd;
-	__u16 errcode;
-} capi_ioctl_struct;
-
-/*
- * Middleware extension
- */
-
-#define CAPIFLAG_HIGHJACKING	0x0001
-
-#define CAPI_GET_FLAGS		_IOR('C',0x23, unsigned)
-#define CAPI_SET_FLAGS		_IOR('C',0x24, unsigned)
-#define CAPI_CLR_FLAGS		_IOR('C',0x25, unsigned)
-
-#define CAPI_NCCI_OPENCOUNT	_IOR('C',0x26, unsigned)
-
-#define CAPI_NCCI_GETUNIT	_IOR('C',0x27, unsigned)
-
-#endif				/* __LINUX_CAPI_H__ */
diff --git a/include/uapi/linux/isdn/capicmd.h b/include/uapi/linux/isdn/capicmd.h
deleted file mode 100644
index 5ec88e7548a9..000000000000
--- a/include/uapi/linux/isdn/capicmd.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/* $Id: capicmd.h,v 1.2.6.2 2001/09/23 22:24:33 kai Exp $
- * 
- * CAPI 2.0 Interface for Linux
- * 
- * Copyright 1997 by Carsten Paeth <calle@calle.de>
- * 
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- */
-
-#ifndef __CAPICMD_H__
-#define __CAPICMD_H__
-
-#define CAPI_MSG_BASELEN		8
-#define CAPI_DATA_B3_REQ_LEN		(CAPI_MSG_BASELEN+4+4+2+2+2)
-#define CAPI_DATA_B3_RESP_LEN		(CAPI_MSG_BASELEN+4+2)
-#define CAPI_DISCONNECT_B3_RESP_LEN	(CAPI_MSG_BASELEN+4)
-
-/*----- CAPI commands -----*/
-#define CAPI_ALERT		    0x01
-#define CAPI_CONNECT		    0x02
-#define CAPI_CONNECT_ACTIVE	    0x03
-#define CAPI_CONNECT_B3_ACTIVE	    0x83
-#define CAPI_CONNECT_B3 	    0x82
-#define CAPI_CONNECT_B3_T90_ACTIVE  0x88
-#define CAPI_DATA_B3		    0x86
-#define CAPI_DISCONNECT_B3	    0x84
-#define CAPI_DISCONNECT 	    0x04
-#define CAPI_FACILITY		    0x80
-#define CAPI_INFO		    0x08
-#define CAPI_LISTEN		    0x05
-#define CAPI_MANUFACTURER	    0xff
-#define CAPI_RESET_B3		    0x87
-#define CAPI_SELECT_B_PROTOCOL	    0x41
-
-/*----- CAPI subcommands -----*/
-
-#define CAPI_REQ    0x80
-#define CAPI_CONF   0x81
-#define CAPI_IND    0x82
-#define CAPI_RESP   0x83
-
-/*----- CAPI combined commands -----*/
-
-#define CAPICMD(cmd,subcmd)	(((cmd)<<8)|(subcmd))
-
-#define CAPI_DISCONNECT_REQ		CAPICMD(CAPI_DISCONNECT,CAPI_REQ)
-#define CAPI_DISCONNECT_CONF		CAPICMD(CAPI_DISCONNECT,CAPI_CONF)
-#define CAPI_DISCONNECT_IND		CAPICMD(CAPI_DISCONNECT,CAPI_IND)
-#define CAPI_DISCONNECT_RESP		CAPICMD(CAPI_DISCONNECT,CAPI_RESP)
-
-#define CAPI_ALERT_REQ			CAPICMD(CAPI_ALERT,CAPI_REQ)
-#define CAPI_ALERT_CONF			CAPICMD(CAPI_ALERT,CAPI_CONF)
-
-#define CAPI_CONNECT_REQ		CAPICMD(CAPI_CONNECT,CAPI_REQ)
-#define CAPI_CONNECT_CONF		CAPICMD(CAPI_CONNECT,CAPI_CONF)
-#define CAPI_CONNECT_IND		CAPICMD(CAPI_CONNECT,CAPI_IND)
-#define CAPI_CONNECT_RESP		CAPICMD(CAPI_CONNECT,CAPI_RESP)
-
-#define CAPI_CONNECT_ACTIVE_REQ		CAPICMD(CAPI_CONNECT_ACTIVE,CAPI_REQ)
-#define CAPI_CONNECT_ACTIVE_CONF	CAPICMD(CAPI_CONNECT_ACTIVE,CAPI_CONF)
-#define CAPI_CONNECT_ACTIVE_IND		CAPICMD(CAPI_CONNECT_ACTIVE,CAPI_IND)
-#define CAPI_CONNECT_ACTIVE_RESP	CAPICMD(CAPI_CONNECT_ACTIVE,CAPI_RESP)
-
-#define CAPI_SELECT_B_PROTOCOL_REQ	CAPICMD(CAPI_SELECT_B_PROTOCOL,CAPI_REQ)
-#define CAPI_SELECT_B_PROTOCOL_CONF	CAPICMD(CAPI_SELECT_B_PROTOCOL,CAPI_CONF)
-
-#define CAPI_CONNECT_B3_ACTIVE_REQ	CAPICMD(CAPI_CONNECT_B3_ACTIVE,CAPI_REQ)
-#define CAPI_CONNECT_B3_ACTIVE_CONF	CAPICMD(CAPI_CONNECT_B3_ACTIVE,CAPI_CONF)
-#define CAPI_CONNECT_B3_ACTIVE_IND	CAPICMD(CAPI_CONNECT_B3_ACTIVE,CAPI_IND)
-#define CAPI_CONNECT_B3_ACTIVE_RESP	CAPICMD(CAPI_CONNECT_B3_ACTIVE,CAPI_RESP)
-
-#define CAPI_CONNECT_B3_REQ		CAPICMD(CAPI_CONNECT_B3,CAPI_REQ)
-#define CAPI_CONNECT_B3_CONF		CAPICMD(CAPI_CONNECT_B3,CAPI_CONF)
-#define CAPI_CONNECT_B3_IND		CAPICMD(CAPI_CONNECT_B3,CAPI_IND)
-#define CAPI_CONNECT_B3_RESP		CAPICMD(CAPI_CONNECT_B3,CAPI_RESP)
-
-
-#define CAPI_CONNECT_B3_T90_ACTIVE_IND	CAPICMD(CAPI_CONNECT_B3_T90_ACTIVE,CAPI_IND)
-#define CAPI_CONNECT_B3_T90_ACTIVE_RESP	CAPICMD(CAPI_CONNECT_B3_T90_ACTIVE,CAPI_RESP)
-
-#define CAPI_DATA_B3_REQ		CAPICMD(CAPI_DATA_B3,CAPI_REQ)
-#define CAPI_DATA_B3_CONF		CAPICMD(CAPI_DATA_B3,CAPI_CONF)
-#define CAPI_DATA_B3_IND		CAPICMD(CAPI_DATA_B3,CAPI_IND)
-#define CAPI_DATA_B3_RESP		CAPICMD(CAPI_DATA_B3,CAPI_RESP)
-
-#define CAPI_DISCONNECT_B3_REQ		CAPICMD(CAPI_DISCONNECT_B3,CAPI_REQ)
-#define CAPI_DISCONNECT_B3_CONF		CAPICMD(CAPI_DISCONNECT_B3,CAPI_CONF)
-#define CAPI_DISCONNECT_B3_IND		CAPICMD(CAPI_DISCONNECT_B3,CAPI_IND)
-#define CAPI_DISCONNECT_B3_RESP		CAPICMD(CAPI_DISCONNECT_B3,CAPI_RESP)
-
-#define CAPI_RESET_B3_REQ		CAPICMD(CAPI_RESET_B3,CAPI_REQ)
-#define CAPI_RESET_B3_CONF		CAPICMD(CAPI_RESET_B3,CAPI_CONF)
-#define CAPI_RESET_B3_IND		CAPICMD(CAPI_RESET_B3,CAPI_IND)
-#define CAPI_RESET_B3_RESP		CAPICMD(CAPI_RESET_B3,CAPI_RESP)
-
-#define CAPI_LISTEN_REQ			CAPICMD(CAPI_LISTEN,CAPI_REQ)
-#define CAPI_LISTEN_CONF		CAPICMD(CAPI_LISTEN,CAPI_CONF)
-
-#define CAPI_MANUFACTURER_REQ		CAPICMD(CAPI_MANUFACTURER,CAPI_REQ)
-#define CAPI_MANUFACTURER_CONF		CAPICMD(CAPI_MANUFACTURER,CAPI_CONF)
-#define CAPI_MANUFACTURER_IND		CAPICMD(CAPI_MANUFACTURER,CAPI_IND)
-#define CAPI_MANUFACTURER_RESP		CAPICMD(CAPI_MANUFACTURER,CAPI_RESP)
-
-#define CAPI_FACILITY_REQ		CAPICMD(CAPI_FACILITY,CAPI_REQ)
-#define CAPI_FACILITY_CONF		CAPICMD(CAPI_FACILITY,CAPI_CONF)
-#define CAPI_FACILITY_IND		CAPICMD(CAPI_FACILITY,CAPI_IND)
-#define CAPI_FACILITY_RESP		CAPICMD(CAPI_FACILITY,CAPI_RESP)
-
-#define CAPI_INFO_REQ			CAPICMD(CAPI_INFO,CAPI_REQ)
-#define CAPI_INFO_CONF			CAPICMD(CAPI_INFO,CAPI_CONF)
-#define CAPI_INFO_IND			CAPICMD(CAPI_INFO,CAPI_IND)
-#define CAPI_INFO_RESP			CAPICMD(CAPI_INFO,CAPI_RESP)
-
-#endif				/* __CAPICMD_H__ */
diff --git a/include/uapi/linux/kernelcapi.h b/include/uapi/linux/kernelcapi.h
deleted file mode 100644
index 325a856e0e20..000000000000
--- a/include/uapi/linux/kernelcapi.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * $Id: kernelcapi.h,v 1.8.6.2 2001/02/07 11:31:31 kai Exp $
- * 
- * Kernel CAPI 2.0 Interface for Linux
- * 
- * (c) Copyright 1997 by Carsten Paeth (calle@calle.in-berlin.de)
- * 
- */
-
-#ifndef _UAPI__KERNELCAPI_H__
-#define _UAPI__KERNELCAPI_H__
-
-#define CAPI_MAXAPPL	240	/* maximum number of applications  */
-#define CAPI_MAXCONTR	32	/* maximum number of controller    */
-#define CAPI_MAXDATAWINDOW	8
-
-
-typedef struct kcapi_flagdef {
-	int contr;
-	int flag;
-} kcapi_flagdef;
-
-typedef struct kcapi_carddef {
-	char		driver[32];
-	unsigned int	port;
-	unsigned	irq;
-	unsigned int	membase;
-	int		cardnr;
-} kcapi_carddef;
-
-/* new ioctls >= 10 */
-#define KCAPI_CMD_TRACE		10
-#define KCAPI_CMD_ADDCARD	11	/* OBSOLETE */
-
-/* 
- * flag > 2 => trace also data
- * flag & 1 => show trace
- */
-#define KCAPI_TRACE_OFF			0
-#define KCAPI_TRACE_SHORT_NO_DATA	1
-#define KCAPI_TRACE_FULL_NO_DATA	2
-#define KCAPI_TRACE_SHORT		3
-#define KCAPI_TRACE_FULL		4
-
-
-
-#endif /* _UAPI__KERNELCAPI_H__ */
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 6b2b65a66700..ee6457d1a5ee 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -33,7 +33,6 @@ menuconfig BT
 	     HCI Device drivers (Interface to the hardware)
 	     RFCOMM Module (RFCOMM Protocol)  
 	     BNEP Module (Bluetooth Network Encapsulation Protocol)
-	     CMTP Module (CAPI Message Transport Protocol)
 	     HIDP Module (Human Interface Device Protocol)
 
 	  Say Y here to compile Bluetooth support into the kernel or say M to
@@ -58,8 +57,6 @@ source "net/bluetooth/rfcomm/Kconfig"
 
 source "net/bluetooth/bnep/Kconfig"
 
-source "net/bluetooth/cmtp/Kconfig"
-
 source "net/bluetooth/hidp/Kconfig"
 
 config BT_LE
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index a7eede7616d8..41049b280887 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -6,7 +6,6 @@
 obj-$(CONFIG_BT)	+= bluetooth.o
 obj-$(CONFIG_BT_RFCOMM)	+= rfcomm/
 obj-$(CONFIG_BT_BNEP)	+= bnep/
-obj-$(CONFIG_BT_CMTP)	+= cmtp/
 obj-$(CONFIG_BT_HIDP)	+= hidp/
 obj-$(CONFIG_BT_6LOWPAN) += bluetooth_6lowpan.o
 
diff --git a/net/bluetooth/cmtp/Kconfig b/net/bluetooth/cmtp/Kconfig
deleted file mode 100644
index 34e923466236..000000000000
--- a/net/bluetooth/cmtp/Kconfig
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-config BT_CMTP
-	tristate "CMTP protocol support (DEPRECATED)"
-	depends on BT_BREDR && ISDN_CAPI && DEPRECATED
-	help
-	  CMTP (CAPI Message Transport Protocol) is a transport layer
-	  for CAPI messages.  CMTP is required for the Bluetooth Common
-	  ISDN Access Profile.
-
-	  Say Y here to compile CMTP support into the kernel or say M to
-	  compile it as module (cmtp).
-
diff --git a/net/bluetooth/cmtp/Makefile b/net/bluetooth/cmtp/Makefile
deleted file mode 100644
index b2262ca97499..000000000000
--- a/net/bluetooth/cmtp/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for the Linux Bluetooth CMTP layer
-#
-
-obj-$(CONFIG_BT_CMTP) += cmtp.o
-
-cmtp-objs := core.o sock.o capi.o
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
deleted file mode 100644
index b95413bffa16..000000000000
--- a/net/bluetooth/cmtp/capi.c
+++ /dev/null
@@ -1,579 +0,0 @@
-/*
-   CMTP implementation for Linux Bluetooth stack (BlueZ).
-   Copyright (C) 2002-2003 Marcel Holtmann <marcel@holtmann.org>
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License version 2 as
-   published by the Free Software Foundation;
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
-   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
-   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
-   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
-   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
-   SOFTWARE IS DISCLAIMED.
-*/
-
-#include <linux/export.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/sched/signal.h>
-#include <linux/slab.h>
-#include <linux/poll.h>
-#include <linux/fcntl.h>
-#include <linux/skbuff.h>
-#include <linux/socket.h>
-#include <linux/ioctl.h>
-#include <linux/file.h>
-#include <linux/wait.h>
-#include <linux/kthread.h>
-#include <net/sock.h>
-
-#include <linux/isdn/capilli.h>
-#include <linux/isdn/capicmd.h>
-#include <linux/isdn/capiutil.h>
-
-#include "cmtp.h"
-
-#define CAPI_INTEROPERABILITY		0x20
-
-#define CAPI_INTEROPERABILITY_REQ	CAPICMD(CAPI_INTEROPERABILITY, CAPI_REQ)
-#define CAPI_INTEROPERABILITY_CONF	CAPICMD(CAPI_INTEROPERABILITY, CAPI_CONF)
-#define CAPI_INTEROPERABILITY_IND	CAPICMD(CAPI_INTEROPERABILITY, CAPI_IND)
-#define CAPI_INTEROPERABILITY_RESP	CAPICMD(CAPI_INTEROPERABILITY, CAPI_RESP)
-
-#define CAPI_INTEROPERABILITY_REQ_LEN	(CAPI_MSG_BASELEN + 2)
-#define CAPI_INTEROPERABILITY_CONF_LEN	(CAPI_MSG_BASELEN + 4)
-#define CAPI_INTEROPERABILITY_IND_LEN	(CAPI_MSG_BASELEN + 2)
-#define CAPI_INTEROPERABILITY_RESP_LEN	(CAPI_MSG_BASELEN + 2)
-
-#define CAPI_FUNCTION_REGISTER		0
-#define CAPI_FUNCTION_RELEASE		1
-#define CAPI_FUNCTION_GET_PROFILE	2
-#define CAPI_FUNCTION_GET_MANUFACTURER	3
-#define CAPI_FUNCTION_GET_VERSION	4
-#define CAPI_FUNCTION_GET_SERIAL_NUMBER	5
-#define CAPI_FUNCTION_MANUFACTURER	6
-#define CAPI_FUNCTION_LOOPBACK		7
-
-
-#define CMTP_MSGNUM	1
-#define CMTP_APPLID	2
-#define CMTP_MAPPING	3
-
-static struct cmtp_application *cmtp_application_add(struct cmtp_session *session, __u16 appl)
-{
-	struct cmtp_application *app = kzalloc_obj(*app);
-
-	BT_DBG("session %p application %p appl %u", session, app, appl);
-
-	if (!app)
-		return NULL;
-
-	app->state = BT_OPEN;
-	app->appl = appl;
-
-	list_add_tail(&app->list, &session->applications);
-
-	return app;
-}
-
-static void cmtp_application_del(struct cmtp_session *session, struct cmtp_application *app)
-{
-	BT_DBG("session %p application %p", session, app);
-
-	if (app) {
-		list_del(&app->list);
-		kfree(app);
-	}
-}
-
-static struct cmtp_application *cmtp_application_get(struct cmtp_session *session, int pattern, __u16 value)
-{
-	struct cmtp_application *app;
-
-	list_for_each_entry(app, &session->applications, list) {
-		switch (pattern) {
-		case CMTP_MSGNUM:
-			if (app->msgnum == value)
-				return app;
-			break;
-		case CMTP_APPLID:
-			if (app->appl == value)
-				return app;
-			break;
-		case CMTP_MAPPING:
-			if (app->mapping == value)
-				return app;
-			break;
-		}
-	}
-
-	return NULL;
-}
-
-static int cmtp_msgnum_get(struct cmtp_session *session)
-{
-	session->msgnum++;
-
-	if ((session->msgnum & 0xff) > 200)
-		session->msgnum = CMTP_INITIAL_MSGNUM + 1;
-
-	return session->msgnum;
-}
-
-static void cmtp_send_capimsg(struct cmtp_session *session, struct sk_buff *skb)
-{
-	struct cmtp_scb *scb = (void *) skb->cb;
-
-	BT_DBG("session %p skb %p len %u", session, skb, skb->len);
-
-	scb->id = -1;
-	scb->data = (CAPIMSG_COMMAND(skb->data) == CAPI_DATA_B3);
-
-	skb_queue_tail(&session->transmit, skb);
-
-	wake_up_interruptible(sk_sleep(session->sock->sk));
-}
-
-static void cmtp_send_interopmsg(struct cmtp_session *session,
-					__u8 subcmd, __u16 appl, __u16 msgnum,
-					__u16 function, unsigned char *buf, int len)
-{
-	struct sk_buff *skb;
-	unsigned char *s;
-
-	BT_DBG("session %p subcmd 0x%02x appl %u msgnum %u", session, subcmd, appl, msgnum);
-
-	skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC);
-	if (!skb) {
-		BT_ERR("Can't allocate memory for interoperability packet");
-		return;
-	}
-
-	s = skb_put(skb, CAPI_MSG_BASELEN + 6 + len);
-
-	capimsg_setu16(s, 0, CAPI_MSG_BASELEN + 6 + len);
-	capimsg_setu16(s, 2, appl);
-	capimsg_setu8 (s, 4, CAPI_INTEROPERABILITY);
-	capimsg_setu8 (s, 5, subcmd);
-	capimsg_setu16(s, 6, msgnum);
-
-	/* Interoperability selector (Bluetooth Device Management) */
-	capimsg_setu16(s, 8, 0x0001);
-
-	capimsg_setu8 (s, 10, 3 + len);
-	capimsg_setu16(s, 11, function);
-	capimsg_setu8 (s, 13, len);
-
-	if (len > 0)
-		memcpy(s + 14, buf, len);
-
-	cmtp_send_capimsg(session, skb);
-}
-
-static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *skb)
-{
-	struct capi_ctr *ctrl = &session->ctrl;
-	struct cmtp_application *application;
-	__u16 appl, msgnum, func, info;
-	__u32 controller;
-
-	BT_DBG("session %p skb %p len %u", session, skb, skb->len);
-
-	switch (CAPIMSG_SUBCOMMAND(skb->data)) {
-	case CAPI_CONF:
-		if (skb->len < CAPI_MSG_BASELEN + 10)
-			break;
-
-		func = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 5);
-		info = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 8);
-
-		switch (func) {
-		case CAPI_FUNCTION_REGISTER:
-			msgnum = CAPIMSG_MSGID(skb->data);
-
-			application = cmtp_application_get(session, CMTP_MSGNUM, msgnum);
-			if (application) {
-				application->state = BT_CONNECTED;
-				application->msgnum = 0;
-				application->mapping = CAPIMSG_APPID(skb->data);
-				wake_up_interruptible(&session->wait);
-			}
-
-			break;
-
-		case CAPI_FUNCTION_RELEASE:
-			appl = CAPIMSG_APPID(skb->data);
-
-			application = cmtp_application_get(session, CMTP_MAPPING, appl);
-			if (application) {
-				application->state = BT_CLOSED;
-				application->msgnum = 0;
-				wake_up_interruptible(&session->wait);
-			}
-
-			break;
-
-		case CAPI_FUNCTION_GET_PROFILE:
-			if (skb->len < CAPI_MSG_BASELEN + 11 + sizeof(capi_profile))
-				break;
-
-			controller = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 11);
-			msgnum = CAPIMSG_MSGID(skb->data);
-
-			if (!info && (msgnum == CMTP_INITIAL_MSGNUM)) {
-				session->ncontroller = controller;
-				wake_up_interruptible(&session->wait);
-				break;
-			}
-
-			if (!info && ctrl) {
-				memcpy(&ctrl->profile,
-					skb->data + CAPI_MSG_BASELEN + 11,
-					sizeof(capi_profile));
-				session->state = BT_CONNECTED;
-				capi_ctr_ready(ctrl);
-			}
-
-			break;
-
-		case CAPI_FUNCTION_GET_MANUFACTURER:
-			if (!info && ctrl && skb->len > CAPI_MSG_BASELEN + 14)
-				strscpy_pad(ctrl->manu,
-					    skb->data + CAPI_MSG_BASELEN + 15,
-					    skb->data[CAPI_MSG_BASELEN + 14]);
-			break;
-
-		case CAPI_FUNCTION_GET_VERSION:
-			if (skb->len < CAPI_MSG_BASELEN + 32)
-				break;
-
-			if (!info && ctrl) {
-				ctrl->version.majorversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 16);
-				ctrl->version.minorversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 20);
-				ctrl->version.majormanuversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 24);
-				ctrl->version.minormanuversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 28);
-			}
-
-			break;
-
-		case CAPI_FUNCTION_GET_SERIAL_NUMBER:
-			if (!info && ctrl && skb->len > CAPI_MSG_BASELEN + 16)
-				strscpy_pad(ctrl->serial,
-					    skb->data + CAPI_MSG_BASELEN + 17,
-					    skb->data[CAPI_MSG_BASELEN + 16]);
-			break;
-		}
-
-		break;
-
-	case CAPI_IND:
-		if (skb->len < CAPI_MSG_BASELEN + 6)
-			break;
-
-		func = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 3);
-
-		if (func == CAPI_FUNCTION_LOOPBACK) {
-			int len = min_t(uint, skb->len - CAPI_MSG_BASELEN - 6,
-						skb->data[CAPI_MSG_BASELEN + 5]);
-			appl = CAPIMSG_APPID(skb->data);
-			msgnum = CAPIMSG_MSGID(skb->data);
-			cmtp_send_interopmsg(session, CAPI_RESP, appl, msgnum, func,
-						skb->data + CAPI_MSG_BASELEN + 6, len);
-		}
-
-		break;
-	}
-
-	kfree_skb(skb);
-}
-
-void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb)
-{
-	struct capi_ctr *ctrl = &session->ctrl;
-	struct cmtp_application *application;
-	__u16 appl;
-	__u32 contr;
-
-	BT_DBG("session %p skb %p len %u", session, skb, skb->len);
-
-	if (skb->len < CAPI_MSG_BASELEN)
-		return;
-
-	if (CAPIMSG_COMMAND(skb->data) == CAPI_INTEROPERABILITY) {
-		cmtp_recv_interopmsg(session, skb);
-		return;
-	}
-
-	if (session->flags & BIT(CMTP_LOOPBACK)) {
-		kfree_skb(skb);
-		return;
-	}
-
-	appl = CAPIMSG_APPID(skb->data);
-	contr = CAPIMSG_CONTROL(skb->data);
-
-	application = cmtp_application_get(session, CMTP_MAPPING, appl);
-	if (application) {
-		appl = application->appl;
-		CAPIMSG_SETAPPID(skb->data, appl);
-	} else {
-		BT_ERR("Can't find application with id %u", appl);
-		kfree_skb(skb);
-		return;
-	}
-
-	if ((contr & 0x7f) == 0x01) {
-		contr = (contr & 0xffffff80) | session->num;
-		CAPIMSG_SETCONTROL(skb->data, contr);
-	}
-
-	capi_ctr_handle_message(ctrl, appl, skb);
-}
-
-static int cmtp_load_firmware(struct capi_ctr *ctrl, capiloaddata *data)
-{
-	BT_DBG("ctrl %p data %p", ctrl, data);
-
-	return 0;
-}
-
-static void cmtp_reset_ctr(struct capi_ctr *ctrl)
-{
-	struct cmtp_session *session = ctrl->driverdata;
-
-	BT_DBG("ctrl %p", ctrl);
-
-	capi_ctr_down(ctrl);
-
-	atomic_inc(&session->terminate);
-	wake_up_process(session->task);
-}
-
-static void cmtp_register_appl(struct capi_ctr *ctrl, __u16 appl, capi_register_params *rp)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	struct cmtp_session *session = ctrl->driverdata;
-	struct cmtp_application *application;
-	unsigned long timeo = CMTP_INTEROP_TIMEOUT;
-	unsigned char buf[8];
-	int err = 0, nconn, want = rp->level3cnt;
-
-	BT_DBG("ctrl %p appl %u level3cnt %u datablkcnt %u datablklen %u",
-	       ctrl, appl, rp->level3cnt, rp->datablkcnt, rp->datablklen);
-
-	application = cmtp_application_add(session, appl);
-	if (!application) {
-		BT_ERR("Can't allocate memory for new application");
-		return;
-	}
-
-	if (want < 0)
-		nconn = ctrl->profile.nbchannel * -want;
-	else
-		nconn = want;
-
-	if (nconn == 0)
-		nconn = ctrl->profile.nbchannel;
-
-	capimsg_setu16(buf, 0, nconn);
-	capimsg_setu16(buf, 2, rp->datablkcnt);
-	capimsg_setu16(buf, 4, rp->datablklen);
-
-	application->state = BT_CONFIG;
-	application->msgnum = cmtp_msgnum_get(session);
-
-	cmtp_send_interopmsg(session, CAPI_REQ, 0x0000, application->msgnum,
-				CAPI_FUNCTION_REGISTER, buf, 6);
-
-	add_wait_queue(&session->wait, &wait);
-	while (1) {
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		if (!timeo) {
-			err = -EAGAIN;
-			break;
-		}
-
-		if (application->state == BT_CLOSED) {
-			err = -application->err;
-			break;
-		}
-
-		if (application->state == BT_CONNECTED)
-			break;
-
-		if (signal_pending(current)) {
-			err = -EINTR;
-			break;
-		}
-
-		timeo = schedule_timeout(timeo);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&session->wait, &wait);
-
-	if (err) {
-		cmtp_application_del(session, application);
-		return;
-	}
-}
-
-static void cmtp_release_appl(struct capi_ctr *ctrl, __u16 appl)
-{
-	struct cmtp_session *session = ctrl->driverdata;
-	struct cmtp_application *application;
-
-	BT_DBG("ctrl %p appl %u", ctrl, appl);
-
-	application = cmtp_application_get(session, CMTP_APPLID, appl);
-	if (!application) {
-		BT_ERR("Can't find application");
-		return;
-	}
-
-	application->msgnum = cmtp_msgnum_get(session);
-
-	cmtp_send_interopmsg(session, CAPI_REQ, application->mapping, application->msgnum,
-				CAPI_FUNCTION_RELEASE, NULL, 0);
-
-	wait_event_interruptible_timeout(session->wait,
-			(application->state == BT_CLOSED), CMTP_INTEROP_TIMEOUT);
-
-	cmtp_application_del(session, application);
-}
-
-static u16 cmtp_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
-{
-	struct cmtp_session *session = ctrl->driverdata;
-	struct cmtp_application *application;
-	__u16 appl;
-	__u32 contr;
-
-	BT_DBG("ctrl %p skb %p", ctrl, skb);
-
-	appl = CAPIMSG_APPID(skb->data);
-	contr = CAPIMSG_CONTROL(skb->data);
-
-	application = cmtp_application_get(session, CMTP_APPLID, appl);
-	if ((!application) || (application->state != BT_CONNECTED)) {
-		BT_ERR("Can't find application with id %u", appl);
-		return CAPI_ILLAPPNR;
-	}
-
-	CAPIMSG_SETAPPID(skb->data, application->mapping);
-
-	if ((contr & 0x7f) == session->num) {
-		contr = (contr & 0xffffff80) | 0x01;
-		CAPIMSG_SETCONTROL(skb->data, contr);
-	}
-
-	cmtp_send_capimsg(session, skb);
-
-	return CAPI_NOERROR;
-}
-
-static char *cmtp_procinfo(struct capi_ctr *ctrl)
-{
-	return "CAPI Message Transport Protocol";
-}
-
-static int cmtp_proc_show(struct seq_file *m, void *v)
-{
-	struct capi_ctr *ctrl = m->private;
-	struct cmtp_session *session = ctrl->driverdata;
-	struct cmtp_application *app;
-
-	seq_printf(m, "%s\n\n", cmtp_procinfo(ctrl));
-	seq_printf(m, "addr %s\n", session->name);
-	seq_printf(m, "ctrl %d\n", session->num);
-
-	list_for_each_entry(app, &session->applications, list) {
-		seq_printf(m, "appl %u -> %u\n", app->appl, app->mapping);
-	}
-
-	return 0;
-}
-
-int cmtp_attach_device(struct cmtp_session *session)
-{
-	unsigned char buf[4];
-	long ret;
-
-	BT_DBG("session %p", session);
-
-	capimsg_setu32(buf, 0, 0);
-
-	cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, CMTP_INITIAL_MSGNUM,
-				CAPI_FUNCTION_GET_PROFILE, buf, 4);
-
-	ret = wait_event_interruptible_timeout(session->wait,
-			session->ncontroller, CMTP_INTEROP_TIMEOUT);
-
-	BT_INFO("Found %d CAPI controller(s) on device %s", session->ncontroller, session->name);
-
-	if (!ret)
-		return -ETIMEDOUT;
-
-	if (!session->ncontroller)
-		return -ENODEV;
-
-	if (session->ncontroller > 1)
-		BT_INFO("Setting up only CAPI controller 1");
-
-	session->ctrl.owner      = THIS_MODULE;
-	session->ctrl.driverdata = session;
-	strcpy(session->ctrl.name, session->name);
-
-	session->ctrl.driver_name   = "cmtp";
-	session->ctrl.load_firmware = cmtp_load_firmware;
-	session->ctrl.reset_ctr     = cmtp_reset_ctr;
-	session->ctrl.register_appl = cmtp_register_appl;
-	session->ctrl.release_appl  = cmtp_release_appl;
-	session->ctrl.send_message  = cmtp_send_message;
-
-	session->ctrl.procinfo      = cmtp_procinfo;
-	session->ctrl.proc_show     = cmtp_proc_show;
-
-	if (attach_capi_ctr(&session->ctrl) < 0) {
-		BT_ERR("Can't attach new controller");
-		return -EBUSY;
-	}
-
-	session->num = session->ctrl.cnr;
-
-	BT_DBG("session %p num %d", session, session->num);
-
-	capimsg_setu32(buf, 0, 1);
-
-	cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, cmtp_msgnum_get(session),
-				CAPI_FUNCTION_GET_MANUFACTURER, buf, 4);
-
-	cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, cmtp_msgnum_get(session),
-				CAPI_FUNCTION_GET_VERSION, buf, 4);
-
-	cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, cmtp_msgnum_get(session),
-				CAPI_FUNCTION_GET_SERIAL_NUMBER, buf, 4);
-
-	cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, cmtp_msgnum_get(session),
-				CAPI_FUNCTION_GET_PROFILE, buf, 4);
-
-	return 0;
-}
-
-void cmtp_detach_device(struct cmtp_session *session)
-{
-	BT_DBG("session %p", session);
-
-	detach_capi_ctr(&session->ctrl);
-}
diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h
deleted file mode 100644
index f6b9dc4e408f..000000000000
--- a/net/bluetooth/cmtp/cmtp.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
-   CMTP implementation for Linux Bluetooth stack (BlueZ).
-   Copyright (C) 2002-2003 Marcel Holtmann <marcel@holtmann.org>
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License version 2 as
-   published by the Free Software Foundation;
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
-   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
-   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
-   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
-   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
-   SOFTWARE IS DISCLAIMED.
-*/
-
-#ifndef __CMTP_H
-#define __CMTP_H
-
-#include <linux/types.h>
-#include <net/bluetooth/bluetooth.h>
-
-#define BTNAMSIZ 21
-
-/* CMTP ioctl defines */
-#define CMTPCONNADD	_IOW('C', 200, int)
-#define CMTPCONNDEL	_IOW('C', 201, int)
-#define CMTPGETCONNLIST	_IOR('C', 210, int)
-#define CMTPGETCONNINFO	_IOR('C', 211, int)
-
-#define CMTP_LOOPBACK	0
-
-struct cmtp_connadd_req {
-	int   sock;	/* Connected socket */
-	__u32 flags;
-};
-
-struct cmtp_conndel_req {
-	bdaddr_t bdaddr;
-	__u32    flags;
-};
-
-struct cmtp_conninfo {
-	bdaddr_t bdaddr;
-	__u32    flags;
-	__u16    state;
-	int      num;
-};
-
-struct cmtp_connlist_req {
-	__u32  cnum;
-	struct cmtp_conninfo __user *ci;
-};
-
-int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock);
-int cmtp_del_connection(struct cmtp_conndel_req *req);
-int cmtp_get_connlist(struct cmtp_connlist_req *req);
-int cmtp_get_conninfo(struct cmtp_conninfo *ci);
-
-/* CMTP session defines */
-#define CMTP_INTEROP_TIMEOUT	(HZ * 5)
-#define CMTP_INITIAL_MSGNUM	0xff00
-
-struct cmtp_session {
-	struct list_head list;
-
-	struct socket *sock;
-
-	bdaddr_t bdaddr;
-
-	unsigned long state;
-	unsigned long flags;
-
-	uint mtu;
-
-	char name[BTNAMSIZ];
-
-	atomic_t terminate;
-	struct task_struct *task;
-
-	wait_queue_head_t wait;
-
-	int ncontroller;
-	int num;
-	struct capi_ctr ctrl;
-
-	struct list_head applications;
-
-	unsigned long blockids;
-	int msgnum;
-
-	struct sk_buff_head transmit;
-
-	struct sk_buff *reassembly[16];
-};
-
-struct cmtp_application {
-	struct list_head list;
-
-	unsigned long state;
-	int err;
-
-	__u16 appl;
-	__u16 mapping;
-
-	__u16 msgnum;
-};
-
-struct cmtp_scb {
-	int id;
-	int data;
-};
-
-int  cmtp_attach_device(struct cmtp_session *session);
-void cmtp_detach_device(struct cmtp_session *session);
-
-void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb);
-
-/* CMTP init defines */
-int cmtp_init_sockets(void);
-void cmtp_cleanup_sockets(void);
-
-#endif /* __CMTP_H */
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
deleted file mode 100644
index 261aeeda3236..000000000000
--- a/net/bluetooth/cmtp/core.c
+++ /dev/null
@@ -1,519 +0,0 @@
-/*
-   CMTP implementation for Linux Bluetooth stack (BlueZ).
-   Copyright (C) 2002-2003 Marcel Holtmann <marcel@holtmann.org>
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License version 2 as
-   published by the Free Software Foundation;
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
-   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
-   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
-   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
-   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
-   SOFTWARE IS DISCLAIMED.
-*/
-
-#include <linux/module.h>
-
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/poll.h>
-#include <linux/fcntl.h>
-#include <linux/freezer.h>
-#include <linux/skbuff.h>
-#include <linux/socket.h>
-#include <linux/ioctl.h>
-#include <linux/file.h>
-#include <linux/init.h>
-#include <linux/kthread.h>
-#include <net/sock.h>
-
-#include <linux/isdn/capilli.h>
-
-#include <net/bluetooth/bluetooth.h>
-#include <net/bluetooth/l2cap.h>
-
-#include "cmtp.h"
-
-#define VERSION "1.0"
-
-static DECLARE_RWSEM(cmtp_session_sem);
-static LIST_HEAD(cmtp_session_list);
-
-static struct cmtp_session *__cmtp_get_session(bdaddr_t *bdaddr)
-{
-	struct cmtp_session *session;
-
-	BT_DBG("");
-
-	list_for_each_entry(session, &cmtp_session_list, list)
-		if (!bacmp(bdaddr, &session->bdaddr))
-			return session;
-
-	return NULL;
-}
-
-static void __cmtp_link_session(struct cmtp_session *session)
-{
-	list_add(&session->list, &cmtp_session_list);
-}
-
-static void __cmtp_unlink_session(struct cmtp_session *session)
-{
-	list_del(&session->list);
-}
-
-static void __cmtp_copy_session(struct cmtp_session *session, struct cmtp_conninfo *ci)
-{
-	u32 valid_flags = BIT(CMTP_LOOPBACK);
-	memset(ci, 0, sizeof(*ci));
-	bacpy(&ci->bdaddr, &session->bdaddr);
-
-	ci->flags = session->flags & valid_flags;
-	ci->state = session->state;
-
-	ci->num = session->num;
-}
-
-
-static inline int cmtp_alloc_block_id(struct cmtp_session *session)
-{
-	int i, id = -1;
-
-	for (i = 0; i < 16; i++)
-		if (!test_and_set_bit(i, &session->blockids)) {
-			id = i;
-			break;
-		}
-
-	return id;
-}
-
-static inline void cmtp_free_block_id(struct cmtp_session *session, int id)
-{
-	clear_bit(id, &session->blockids);
-}
-
-static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const unsigned char *buf, int count)
-{
-	struct sk_buff *skb = session->reassembly[id], *nskb;
-	int size;
-
-	BT_DBG("session %p buf %p count %d", session, buf, count);
-
-	size = (skb) ? skb->len + count : count;
-
-	nskb = alloc_skb(size, GFP_ATOMIC);
-	if (!nskb) {
-		BT_ERR("Can't allocate memory for CAPI message");
-		return;
-	}
-
-	if (skb && (skb->len > 0))
-		skb_copy_from_linear_data(skb, skb_put(nskb, skb->len), skb->len);
-
-	skb_put_data(nskb, buf, count);
-
-	session->reassembly[id] = nskb;
-
-	kfree_skb(skb);
-}
-
-static inline int cmtp_recv_frame(struct cmtp_session *session, struct sk_buff *skb)
-{
-	__u8 hdr, hdrlen, id;
-	__u16 len;
-
-	BT_DBG("session %p skb %p len %d", session, skb, skb->len);
-
-	while (skb->len > 0) {
-		hdr = skb->data[0];
-
-		switch (hdr & 0xc0) {
-		case 0x40:
-			hdrlen = 2;
-			len = skb->data[1];
-			break;
-		case 0x80:
-			hdrlen = 3;
-			len = skb->data[1] | (skb->data[2] << 8);
-			break;
-		default:
-			hdrlen = 1;
-			len = 0;
-			break;
-		}
-
-		id = (hdr & 0x3c) >> 2;
-
-		BT_DBG("hdr 0x%02x hdrlen %d len %d id %d", hdr, hdrlen, len, id);
-
-		if (hdrlen + len > skb->len) {
-			BT_ERR("Wrong size or header information in CMTP frame");
-			break;
-		}
-
-		if (len == 0) {
-			skb_pull(skb, hdrlen);
-			continue;
-		}
-
-		switch (hdr & 0x03) {
-		case 0x00:
-			cmtp_add_msgpart(session, id, skb->data + hdrlen, len);
-			cmtp_recv_capimsg(session, session->reassembly[id]);
-			session->reassembly[id] = NULL;
-			break;
-		case 0x01:
-			cmtp_add_msgpart(session, id, skb->data + hdrlen, len);
-			break;
-		default:
-			kfree_skb(session->reassembly[id]);
-			session->reassembly[id] = NULL;
-			break;
-		}
-
-		skb_pull(skb, hdrlen + len);
-	}
-
-	kfree_skb(skb);
-	return 0;
-}
-
-static int cmtp_send_frame(struct cmtp_session *session, unsigned char *data, int len)
-{
-	struct socket *sock = session->sock;
-	struct kvec iv = { data, len };
-	struct msghdr msg;
-
-	BT_DBG("session %p data %p len %d", session, data, len);
-
-	if (!len)
-		return 0;
-
-	memset(&msg, 0, sizeof(msg));
-
-	return kernel_sendmsg(sock, &msg, &iv, 1, len);
-}
-
-static void cmtp_process_transmit(struct cmtp_session *session)
-{
-	struct sk_buff *skb, *nskb;
-	unsigned char *hdr;
-	unsigned int size, tail;
-
-	BT_DBG("session %p", session);
-
-	nskb = alloc_skb(session->mtu, GFP_ATOMIC);
-	if (!nskb) {
-		BT_ERR("Can't allocate memory for new frame");
-		return;
-	}
-
-	while ((skb = skb_dequeue(&session->transmit))) {
-		struct cmtp_scb *scb = (void *) skb->cb;
-
-		tail = session->mtu - nskb->len;
-		if (tail < 5) {
-			cmtp_send_frame(session, nskb->data, nskb->len);
-			skb_trim(nskb, 0);
-			tail = session->mtu;
-		}
-
-		size = min_t(uint, ((tail < 258) ? (tail - 2) : (tail - 3)), skb->len);
-
-		if (scb->id < 0) {
-			scb->id = cmtp_alloc_block_id(session);
-			if (scb->id < 0) {
-				skb_queue_head(&session->transmit, skb);
-				break;
-			}
-		}
-
-		if (size < 256) {
-			hdr = skb_put(nskb, 2);
-			hdr[0] = 0x40
-				| ((scb->id << 2) & 0x3c)
-				| ((skb->len == size) ? 0x00 : 0x01);
-			hdr[1] = size;
-		} else {
-			hdr = skb_put(nskb, 3);
-			hdr[0] = 0x80
-				| ((scb->id << 2) & 0x3c)
-				| ((skb->len == size) ? 0x00 : 0x01);
-			hdr[1] = size & 0xff;
-			hdr[2] = size >> 8;
-		}
-
-		skb_copy_from_linear_data(skb, skb_put(nskb, size), size);
-		skb_pull(skb, size);
-
-		if (skb->len > 0) {
-			skb_queue_head(&session->transmit, skb);
-		} else {
-			cmtp_free_block_id(session, scb->id);
-			if (scb->data) {
-				cmtp_send_frame(session, nskb->data, nskb->len);
-				skb_trim(nskb, 0);
-			}
-			kfree_skb(skb);
-		}
-	}
-
-	cmtp_send_frame(session, nskb->data, nskb->len);
-
-	kfree_skb(nskb);
-}
-
-static int cmtp_session(void *arg)
-{
-	struct cmtp_session *session = arg;
-	struct sock *sk = session->sock->sk;
-	struct sk_buff *skb;
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-
-	BT_DBG("session %p", session);
-
-	set_user_nice(current, -15);
-
-	add_wait_queue(sk_sleep(sk), &wait);
-	while (1) {
-		if (atomic_read(&session->terminate))
-			break;
-		if (sk->sk_state != BT_CONNECTED)
-			break;
-
-		while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
-			skb_orphan(skb);
-			if (!skb_linearize(skb))
-				cmtp_recv_frame(session, skb);
-			else
-				kfree_skb(skb);
-		}
-
-		cmtp_process_transmit(session);
-
-		/*
-		 * wait_woken() performs the necessary memory barriers
-		 * for us; see the header comment for this primitive.
-		 */
-		wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
-	}
-	remove_wait_queue(sk_sleep(sk), &wait);
-
-	down_write(&cmtp_session_sem);
-
-	if (!(session->flags & BIT(CMTP_LOOPBACK)))
-		cmtp_detach_device(session);
-
-	fput(session->sock->file);
-
-	__cmtp_unlink_session(session);
-
-	up_write(&cmtp_session_sem);
-
-	kfree(session);
-	module_put_and_kthread_exit(0);
-	return 0;
-}
-
-int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
-{
-	u32 valid_flags = BIT(CMTP_LOOPBACK);
-	struct cmtp_session *session, *s;
-	int i, err;
-
-	BT_DBG("");
-
-	if (!l2cap_is_socket(sock))
-		return -EBADFD;
-
-	if (req->flags & ~valid_flags)
-		return -EINVAL;
-
-	session = kzalloc_obj(struct cmtp_session);
-	if (!session)
-		return -ENOMEM;
-
-	down_write(&cmtp_session_sem);
-
-	s = __cmtp_get_session(&l2cap_pi(sock->sk)->chan->dst);
-	if (s && s->state == BT_CONNECTED) {
-		err = -EEXIST;
-		goto failed;
-	}
-
-	bacpy(&session->bdaddr, &l2cap_pi(sock->sk)->chan->dst);
-
-	session->mtu = min_t(uint, l2cap_pi(sock->sk)->chan->omtu,
-					l2cap_pi(sock->sk)->chan->imtu);
-
-	BT_DBG("mtu %d", session->mtu);
-
-	sprintf(session->name, "%pMR", &session->bdaddr);
-
-	session->sock  = sock;
-	session->state = BT_CONFIG;
-
-	init_waitqueue_head(&session->wait);
-
-	session->msgnum = CMTP_INITIAL_MSGNUM;
-
-	INIT_LIST_HEAD(&session->applications);
-
-	skb_queue_head_init(&session->transmit);
-
-	for (i = 0; i < 16; i++)
-		session->reassembly[i] = NULL;
-
-	session->flags = req->flags;
-
-	__cmtp_link_session(session);
-
-	__module_get(THIS_MODULE);
-	session->task = kthread_run(cmtp_session, session, "kcmtpd_ctr_%d",
-								session->num);
-	if (IS_ERR(session->task)) {
-		module_put(THIS_MODULE);
-		err = PTR_ERR(session->task);
-		goto unlink;
-	}
-
-	if (!(session->flags & BIT(CMTP_LOOPBACK))) {
-		err = cmtp_attach_device(session);
-		if (err < 0) {
-			/* Caller will call fput in case of failure, and so
-			 * will cmtp_session kthread.
-			 */
-			get_file(session->sock->file);
-
-			atomic_inc(&session->terminate);
-			wake_up_interruptible(sk_sleep(session->sock->sk));
-			up_write(&cmtp_session_sem);
-			return err;
-		}
-	}
-
-	up_write(&cmtp_session_sem);
-	return 0;
-
-unlink:
-	__cmtp_unlink_session(session);
-
-failed:
-	up_write(&cmtp_session_sem);
-	kfree(session);
-	return err;
-}
-
-int cmtp_del_connection(struct cmtp_conndel_req *req)
-{
-	u32 valid_flags = 0;
-	struct cmtp_session *session;
-	int err = 0;
-
-	BT_DBG("");
-
-	if (req->flags & ~valid_flags)
-		return -EINVAL;
-
-	down_read(&cmtp_session_sem);
-
-	session = __cmtp_get_session(&req->bdaddr);
-	if (session) {
-		/* Flush the transmit queue */
-		skb_queue_purge(&session->transmit);
-
-		/* Stop session thread */
-		atomic_inc(&session->terminate);
-
-		/*
-		 * See the comment preceding the call to wait_woken()
-		 * in cmtp_session().
-		 */
-		wake_up_interruptible(sk_sleep(session->sock->sk));
-	} else
-		err = -ENOENT;
-
-	up_read(&cmtp_session_sem);
-	return err;
-}
-
-int cmtp_get_connlist(struct cmtp_connlist_req *req)
-{
-	struct cmtp_session *session;
-	int err = 0, n = 0;
-
-	BT_DBG("");
-
-	down_read(&cmtp_session_sem);
-
-	list_for_each_entry(session, &cmtp_session_list, list) {
-		struct cmtp_conninfo ci;
-
-		__cmtp_copy_session(session, &ci);
-
-		if (copy_to_user(req->ci, &ci, sizeof(ci))) {
-			err = -EFAULT;
-			break;
-		}
-
-		if (++n >= req->cnum)
-			break;
-
-		req->ci++;
-	}
-	req->cnum = n;
-
-	up_read(&cmtp_session_sem);
-	return err;
-}
-
-int cmtp_get_conninfo(struct cmtp_conninfo *ci)
-{
-	struct cmtp_session *session;
-	int err = 0;
-
-	down_read(&cmtp_session_sem);
-
-	session = __cmtp_get_session(&ci->bdaddr);
-	if (session)
-		__cmtp_copy_session(session, ci);
-	else
-		err = -ENOENT;
-
-	up_read(&cmtp_session_sem);
-	return err;
-}
-
-
-static int __init cmtp_init(void)
-{
-	BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION);
-
-	return cmtp_init_sockets();
-}
-
-static void __exit cmtp_exit(void)
-{
-	cmtp_cleanup_sockets();
-}
-
-module_init(cmtp_init);
-module_exit(cmtp_exit);
-
-MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
-MODULE_DESCRIPTION("Bluetooth CMTP ver " VERSION);
-MODULE_VERSION(VERSION);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("bt-proto-5");
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
deleted file mode 100644
index 96d49d9fae96..000000000000
--- a/net/bluetooth/cmtp/sock.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
-   CMTP implementation for Linux Bluetooth stack (BlueZ).
-   Copyright (C) 2002-2003 Marcel Holtmann <marcel@holtmann.org>
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License version 2 as
-   published by the Free Software Foundation;
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
-   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
-   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
-   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
-   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
-   SOFTWARE IS DISCLAIMED.
-*/
-
-#include <linux/export.h>
-
-#include <linux/types.h>
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/poll.h>
-#include <linux/fcntl.h>
-#include <linux/skbuff.h>
-#include <linux/socket.h>
-#include <linux/ioctl.h>
-#include <linux/file.h>
-#include <linux/compat.h>
-#include <linux/gfp.h>
-#include <linux/uaccess.h>
-#include <net/sock.h>
-
-#include <linux/isdn/capilli.h>
-
-
-#include "cmtp.h"
-
-static struct bt_sock_list cmtp_sk_list = {
-	.lock = __RW_LOCK_UNLOCKED(cmtp_sk_list.lock)
-};
-
-static int cmtp_sock_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-
-	BT_DBG("sock %p sk %p", sock, sk);
-
-	if (!sk)
-		return 0;
-
-	bt_sock_unlink(&cmtp_sk_list, sk);
-
-	sock_orphan(sk);
-	sock_put(sk);
-
-	return 0;
-}
-
-static int do_cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, void __user *argp)
-{
-	struct cmtp_connadd_req ca;
-	struct cmtp_conndel_req cd;
-	struct cmtp_connlist_req cl;
-	struct cmtp_conninfo ci;
-	struct socket *nsock;
-	int err;
-
-	BT_DBG("cmd %x arg %p", cmd, argp);
-
-	switch (cmd) {
-	case CMTPCONNADD:
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-
-		if (copy_from_user(&ca, argp, sizeof(ca)))
-			return -EFAULT;
-
-		nsock = sockfd_lookup(ca.sock, &err);
-		if (!nsock)
-			return err;
-
-		if (nsock->sk->sk_state != BT_CONNECTED) {
-			sockfd_put(nsock);
-			return -EBADFD;
-		}
-
-		err = cmtp_add_connection(&ca, nsock);
-		if (!err) {
-			if (copy_to_user(argp, &ca, sizeof(ca)))
-				err = -EFAULT;
-		} else
-			sockfd_put(nsock);
-
-		return err;
-
-	case CMTPCONNDEL:
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-
-		if (copy_from_user(&cd, argp, sizeof(cd)))
-			return -EFAULT;
-
-		return cmtp_del_connection(&cd);
-
-	case CMTPGETCONNLIST:
-		if (copy_from_user(&cl, argp, sizeof(cl)))
-			return -EFAULT;
-
-		if (cl.cnum <= 0)
-			return -EINVAL;
-
-		err = cmtp_get_connlist(&cl);
-		if (!err && copy_to_user(argp, &cl, sizeof(cl)))
-			return -EFAULT;
-
-		return err;
-
-	case CMTPGETCONNINFO:
-		if (copy_from_user(&ci, argp, sizeof(ci)))
-			return -EFAULT;
-
-		err = cmtp_get_conninfo(&ci);
-		if (!err && copy_to_user(argp, &ci, sizeof(ci)))
-			return -EFAULT;
-
-		return err;
-	}
-
-	return -EINVAL;
-}
-
-static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
-	return do_cmtp_sock_ioctl(sock, cmd, (void __user *)arg);
-}
-
-#ifdef CONFIG_COMPAT
-static int cmtp_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
-	void __user *argp = compat_ptr(arg);
-	if (cmd == CMTPGETCONNLIST) {
-		struct cmtp_connlist_req cl;
-		u32 __user *p = argp;
-		u32 uci;
-		int err;
-
-		if (get_user(cl.cnum, p) || get_user(uci, p + 1))
-			return -EFAULT;
-
-		cl.ci = compat_ptr(uci);
-
-		if (cl.cnum <= 0)
-			return -EINVAL;
-
-		err = cmtp_get_connlist(&cl);
-
-		if (!err && put_user(cl.cnum, p))
-			err = -EFAULT;
-
-		return err;
-	}
-
-	return do_cmtp_sock_ioctl(sock, cmd, argp);
-}
-#endif
-
-static const struct proto_ops cmtp_sock_ops = {
-	.family		= PF_BLUETOOTH,
-	.owner		= THIS_MODULE,
-	.release	= cmtp_sock_release,
-	.ioctl		= cmtp_sock_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= cmtp_sock_compat_ioctl,
-#endif
-	.bind		= sock_no_bind,
-	.getname	= sock_no_getname,
-	.sendmsg	= sock_no_sendmsg,
-	.recvmsg	= sock_no_recvmsg,
-	.listen		= sock_no_listen,
-	.shutdown	= sock_no_shutdown,
-	.connect	= sock_no_connect,
-	.socketpair	= sock_no_socketpair,
-	.accept		= sock_no_accept,
-	.mmap		= sock_no_mmap
-};
-
-static struct proto cmtp_proto = {
-	.name		= "CMTP",
-	.owner		= THIS_MODULE,
-	.obj_size	= sizeof(struct bt_sock)
-};
-
-static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol,
-			    int kern)
-{
-	struct sock *sk;
-
-	BT_DBG("sock %p", sock);
-
-	if (sock->type != SOCK_RAW)
-		return -ESOCKTNOSUPPORT;
-
-	sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, kern);
-	if (!sk)
-		return -ENOMEM;
-
-	sock_init_data(sock, sk);
-
-	sock->ops = &cmtp_sock_ops;
-
-	sock->state = SS_UNCONNECTED;
-
-	sock_reset_flag(sk, SOCK_ZAPPED);
-
-	sk->sk_protocol = protocol;
-	sk->sk_state    = BT_OPEN;
-
-	bt_sock_link(&cmtp_sk_list, sk);
-
-	return 0;
-}
-
-static const struct net_proto_family cmtp_sock_family_ops = {
-	.family	= PF_BLUETOOTH,
-	.owner	= THIS_MODULE,
-	.create	= cmtp_sock_create
-};
-
-int cmtp_init_sockets(void)
-{
-	int err;
-
-	err = proto_register(&cmtp_proto, 0);
-	if (err < 0)
-		return err;
-
-	err = bt_sock_register(BTPROTO_CMTP, &cmtp_sock_family_ops);
-	if (err < 0) {
-		BT_ERR("Can't register CMTP socket");
-		goto error;
-	}
-
-	err = bt_procfs_init(&init_net, "cmtp", &cmtp_sk_list, NULL);
-	if (err < 0) {
-		BT_ERR("Failed to create CMTP proc file");
-		bt_sock_unregister(BTPROTO_HIDP);
-		goto error;
-	}
-
-	BT_INFO("CMTP socket layer initialized");
-
-	return 0;
-
-error:
-	proto_unregister(&cmtp_proto);
-	return err;
-}
-
-void cmtp_cleanup_sockets(void)
-{
-	bt_procfs_cleanup(&init_net, "cmtp");
-	bt_sock_unregister(BTPROTO_CMTP);
-	proto_unregister(&cmtp_proto);
-}
-- 
cgit v1.2.3


From dd8d4bc28ad7252610d8e79c1313a2d1e3499a51 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 20 Apr 2026 19:18:23 -0700
Subject: net: remove ax25 and amateur radio (hamradio) subsystem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the amateur radio (AX.25, NET/ROM, ROSE) protocol implementation
and all associated hamradio device drivers from the kernel tree.
This set of protocols has long been a huge bug/syzbot magnet,
and since nobody stepped up to help us deal with the influx
of the AI-generated bug reports we need to move it out of tree
to protect our sanity.

The code is moved to an out-of-tree repo:
https://github.com/linux-netdev/mod-orphan
if it's cleaned up and reworked there we can accept it back.

Minimal stub headers are kept for include/net/ax25.h (AX25_P_IP,
AX25_ADDR_LEN, ax25_address) and include/net/rose.h (ROSE_ADDR_LEN)
so that the conditional integration code in arp.c and tun.c continues
to compile and work when the out-of-tree modules are loaded.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Carlos Bilbao <carlos.bilbao@kernel.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Acked-by: Toke Høiland-Jørgensen <toke@toke.dk>
Link: https://patch.msgid.link/20260421021824.1293976-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/.renames.txt                         |    2 -
 Documentation/admin-guide/kernel-parameters.txt    |   18 -
 Documentation/networking/6pack.rst                 |  191 --
 Documentation/networking/ax25.rst                  |   17 -
 .../networking/device_drivers/hamradio/baycom.rst  |  174 --
 .../networking/device_drivers/hamradio/index.rst   |   12 -
 .../device_drivers/hamradio/z8530drv.rst           |  686 ------
 Documentation/networking/device_drivers/index.rst  |    1 -
 Documentation/networking/index.rst                 |    2 -
 Documentation/staging/magic-number.rst             |    3 -
 .../translations/it_IT/staging/magic-number.rst    |    3 -
 .../translations/sp_SP/process/magic-number.rst    |    3 -
 .../translations/zh_CN/process/magic-number.rst    |    3 -
 .../translations/zh_TW/process/magic-number.rst    |    3 -
 MAINTAINERS                                        |   73 -
 arch/mips/configs/bcm47xx_defconfig                |    1 -
 arch/mips/configs/bigsur_defconfig                 |   10 -
 arch/mips/configs/gpr_defconfig                    |   11 -
 arch/mips/configs/mtx1_defconfig                   |   11 -
 arch/mips/configs/rb532_defconfig                  |    1 -
 arch/mips/configs/rm200_defconfig                  |    7 -
 arch/mips/configs/rt305x_defconfig                 |    1 -
 arch/mips/configs/xway_defconfig                   |    1 -
 drivers/net/Makefile                               |    1 -
 drivers/net/hamradio/6pack.c                       |  912 --------
 drivers/net/hamradio/Kconfig                       |  162 --
 drivers/net/hamradio/Makefile                      |   22 -
 drivers/net/hamradio/baycom_epp.c                  | 1316 ------------
 drivers/net/hamradio/baycom_par.c                  |  598 ------
 drivers/net/hamradio/baycom_ser_fdx.c              |  678 ------
 drivers/net/hamradio/baycom_ser_hdx.c              |  727 -------
 drivers/net/hamradio/bpqether.c                    |  593 ------
 drivers/net/hamradio/hdlcdrv.c                     |  747 -------
 drivers/net/hamradio/mkiss.c                       |  980 ---------
 drivers/net/hamradio/scc.c                         | 2179 --------------------
 drivers/net/hamradio/yam.c                         | 1191 -----------
 drivers/net/hamradio/z8530.h                       |  246 ---
 include/linux/hdlcdrv.h                            |  276 ---
 include/linux/netdevice.h                          |    5 +-
 include/linux/scc.h                                |   86 -
 include/linux/yam.h                                |   67 -
 include/net/ax25.h                                 |  476 +----
 include/net/netrom.h                               |  273 ---
 include/net/rose.h                                 |  263 +--
 include/uapi/linux/baycom.h                        |   40 -
 include/uapi/linux/hdlcdrv.h                       |  111 -
 include/uapi/linux/netrom.h                        |   37 -
 include/uapi/linux/rose.h                          |   91 -
 include/uapi/linux/scc.h                           |  174 --
 net/Kconfig                                        |    1 -
 net/Makefile                                       |    3 -
 net/ax25/Kconfig                                   |  108 -
 net/ax25/Makefile                                  |   12 -
 net/ax25/af_ax25.c                                 | 2089 -------------------
 net/ax25/ax25_addr.c                               |  303 ---
 net/ax25/ax25_dev.c                                |  200 --
 net/ax25/ax25_ds_in.c                              |  298 ---
 net/ax25/ax25_ds_subr.c                            |  204 --
 net/ax25/ax25_ds_timer.c                           |  235 ---
 net/ax25/ax25_iface.c                              |  214 --
 net/ax25/ax25_in.c                                 |  455 ----
 net/ax25/ax25_ip.c                                 |  247 ---
 net/ax25/ax25_out.c                                |  398 ----
 net/ax25/ax25_route.c                              |  416 ----
 net/ax25/ax25_std_in.c                             |  443 ----
 net/ax25/ax25_std_subr.c                           |   83 -
 net/ax25/ax25_std_timer.c                          |  175 --
 net/ax25/ax25_subr.c                               |  296 ---
 net/ax25/ax25_timer.c                              |  224 --
 net/ax25/ax25_uid.c                                |  204 --
 net/ax25/sysctl_net_ax25.c                         |  181 --
 net/ipv4/arp.c                                     |    1 -
 net/netrom/Makefile                                |   10 -
 net/netrom/af_netrom.c                             | 1536 --------------
 net/netrom/nr_dev.c                                |  178 --
 net/netrom/nr_in.c                                 |  301 ---
 net/netrom/nr_loopback.c                           |   73 -
 net/netrom/nr_out.c                                |  272 ---
 net/netrom/nr_route.c                              |  989 ---------
 net/netrom/nr_subr.c                               |  280 ---
 net/netrom/nr_timer.c                              |  249 ---
 net/netrom/sysctl_net_netrom.c                     |  156 --
 net/rose/Makefile                                  |   10 -
 net/rose/af_rose.c                                 | 1687 ---------------
 net/rose/rose_dev.c                                |  141 --
 net/rose/rose_in.c                                 |  301 ---
 net/rose/rose_link.c                               |  289 ---
 net/rose/rose_loopback.c                           |  133 --
 net/rose/rose_out.c                                |  122 --
 net/rose/rose_route.c                              | 1333 ------------
 net/rose/rose_subr.c                               |  556 -----
 net/rose/rose_timer.c                              |  227 --
 net/rose/sysctl_net_rose.c                         |  125 --
 93 files changed, 6 insertions(+), 29237 deletions(-)
 delete mode 100644 Documentation/networking/6pack.rst
 delete mode 100644 Documentation/networking/ax25.rst
 delete mode 100644 Documentation/networking/device_drivers/hamradio/baycom.rst
 delete mode 100644 Documentation/networking/device_drivers/hamradio/index.rst
 delete mode 100644 Documentation/networking/device_drivers/hamradio/z8530drv.rst
 delete mode 100644 drivers/net/hamradio/6pack.c
 delete mode 100644 drivers/net/hamradio/Kconfig
 delete mode 100644 drivers/net/hamradio/Makefile
 delete mode 100644 drivers/net/hamradio/baycom_epp.c
 delete mode 100644 drivers/net/hamradio/baycom_par.c
 delete mode 100644 drivers/net/hamradio/baycom_ser_fdx.c
 delete mode 100644 drivers/net/hamradio/baycom_ser_hdx.c
 delete mode 100644 drivers/net/hamradio/bpqether.c
 delete mode 100644 drivers/net/hamradio/hdlcdrv.c
 delete mode 100644 drivers/net/hamradio/mkiss.c
 delete mode 100644 drivers/net/hamradio/scc.c
 delete mode 100644 drivers/net/hamradio/yam.c
 delete mode 100644 drivers/net/hamradio/z8530.h
 delete mode 100644 include/linux/hdlcdrv.h
 delete mode 100644 include/linux/scc.h
 delete mode 100644 include/linux/yam.h
 delete mode 100644 include/net/netrom.h
 delete mode 100644 include/uapi/linux/baycom.h
 delete mode 100644 include/uapi/linux/hdlcdrv.h
 delete mode 100644 include/uapi/linux/netrom.h
 delete mode 100644 include/uapi/linux/rose.h
 delete mode 100644 include/uapi/linux/scc.h
 delete mode 100644 net/ax25/Kconfig
 delete mode 100644 net/ax25/Makefile
 delete mode 100644 net/ax25/af_ax25.c
 delete mode 100644 net/ax25/ax25_addr.c
 delete mode 100644 net/ax25/ax25_dev.c
 delete mode 100644 net/ax25/ax25_ds_in.c
 delete mode 100644 net/ax25/ax25_ds_subr.c
 delete mode 100644 net/ax25/ax25_ds_timer.c
 delete mode 100644 net/ax25/ax25_iface.c
 delete mode 100644 net/ax25/ax25_in.c
 delete mode 100644 net/ax25/ax25_ip.c
 delete mode 100644 net/ax25/ax25_out.c
 delete mode 100644 net/ax25/ax25_route.c
 delete mode 100644 net/ax25/ax25_std_in.c
 delete mode 100644 net/ax25/ax25_std_subr.c
 delete mode 100644 net/ax25/ax25_std_timer.c
 delete mode 100644 net/ax25/ax25_subr.c
 delete mode 100644 net/ax25/ax25_timer.c
 delete mode 100644 net/ax25/ax25_uid.c
 delete mode 100644 net/ax25/sysctl_net_ax25.c
 delete mode 100644 net/netrom/Makefile
 delete mode 100644 net/netrom/af_netrom.c
 delete mode 100644 net/netrom/nr_dev.c
 delete mode 100644 net/netrom/nr_in.c
 delete mode 100644 net/netrom/nr_loopback.c
 delete mode 100644 net/netrom/nr_out.c
 delete mode 100644 net/netrom/nr_route.c
 delete mode 100644 net/netrom/nr_subr.c
 delete mode 100644 net/netrom/nr_timer.c
 delete mode 100644 net/netrom/sysctl_net_netrom.c
 delete mode 100644 net/rose/Makefile
 delete mode 100644 net/rose/af_rose.c
 delete mode 100644 net/rose/rose_dev.c
 delete mode 100644 net/rose/rose_in.c
 delete mode 100644 net/rose/rose_link.c
 delete mode 100644 net/rose/rose_loopback.c
 delete mode 100644 net/rose/rose_out.c
 delete mode 100644 net/rose/rose_route.c
 delete mode 100644 net/rose/rose_subr.c
 delete mode 100644 net/rose/rose_timer.c
 delete mode 100644 net/rose/sysctl_net_rose.c

(limited to 'include/linux')

diff --git a/Documentation/.renames.txt b/Documentation/.renames.txt
index a37d68471d50..e5f2f7447914 100644
--- a/Documentation/.renames.txt
+++ b/Documentation/.renames.txt
@@ -783,7 +783,6 @@ namespaces/compatibility-list admin-guide/namespaces/compatibility-list
 namespaces/index admin-guide/namespaces/index
 namespaces/resource-control admin-guide/namespaces/resource-control
 networking/altera_tse networking/device_drivers/ethernet/altera/altera_tse
-networking/baycom networking/device_drivers/hamradio/baycom
 networking/bpf_flow_dissector bpf/prog_flow_dissector
 networking/cxacru networking/device_drivers/atm/cxacru
 networking/defza networking/device_drivers/fddi/defza
@@ -848,7 +847,6 @@ networking/ixgbe networking/device_drivers/ethernet/intel/ixgbe
 networking/ixgbevf networking/device_drivers/ethernet/intel/ixgbevf
 networking/netdev-FAQ process/maintainer-netdev
 networking/skfp networking/device_drivers/fddi/skfp
-networking/z8530drv networking/device_drivers/hamradio/z8530drv
 nfc/index driver-api/nfc/index
 nfc/nfc-hci driver-api/nfc/nfc-hci
 nfc/nfc-pn544 driver-api/nfc/nfc-pn544
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f2ce1f4975c1..09354ff7cff2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6,7 +6,6 @@
 	APPARMOR AppArmor support is enabled.
 	ARM	ARM architecture is enabled.
 	ARM64	ARM64 architecture is enabled.
-	AX25	Appropriate AX.25 support is enabled.
 	CLK	Common clock infrastructure is enabled.
 	CMA	Contiguous Memory Area support is enabled.
 	DRM	Direct Rendering Management support is enabled.
@@ -633,23 +632,6 @@ Kernel parameters
 			1 - Enable the BAU.
 			unset - Disable the BAU.
 
-	baycom_epp=	[HW,AX25]
-			Format: <io>,<mode>
-
-	baycom_par=	[HW,AX25] BayCom Parallel Port AX.25 Modem
-			Format: <io>,<mode>
-			See header of drivers/net/hamradio/baycom_par.c.
-
-	baycom_ser_fdx=	[HW,AX25]
-			BayCom Serial Port AX.25 Modem (Full Duplex Mode)
-			Format: <io>,<irq>,<mode>[,<baud>]
-			See header of drivers/net/hamradio/baycom_ser_fdx.c.
-
-	baycom_ser_hdx=	[HW,AX25]
-			BayCom Serial Port AX.25 Modem (Half Duplex Mode)
-			Format: <io>,<irq>,<mode>
-			See header of drivers/net/hamradio/baycom_ser_hdx.c.
-
 	bdev_allow_write_mounted=
 			Format: <bool>
 			Control the ability to open a mounted block device
diff --git a/Documentation/networking/6pack.rst b/Documentation/networking/6pack.rst
deleted file mode 100644
index 66d5fd4fc821..000000000000
--- a/Documentation/networking/6pack.rst
+++ /dev/null
@@ -1,191 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-==============
-6pack Protocol
-==============
-
-This is the 6pack-mini-HOWTO, written by
-
-Andreas Könsgen DG3KQ
-
-:Internet: ajk@comnets.uni-bremen.de
-:AMPR-net: dg3kq@db0pra.ampr.org
-:AX.25:    dg3kq@db0ach.#nrw.deu.eu
-
-Last update: April 7, 1998
-
-1. What is 6pack, and what are the advantages to KISS?
-======================================================
-
-6pack is a transmission protocol for data exchange between the PC and
-the TNC over a serial line. It can be used as an alternative to KISS.
-
-6pack has two major advantages:
-
-- The PC is given full control over the radio
-  channel. Special control data is exchanged between the PC and the TNC so
-  that the PC knows at any time if the TNC is receiving data, if a TNC
-  buffer underrun or overrun has occurred, if the PTT is
-  set and so on. This control data is processed at a higher priority than
-  normal data, so a data stream can be interrupted at any time to issue an
-  important event. This helps to improve the channel access and timing
-  algorithms as everything is computed in the PC. It would even be possible
-  to experiment with something completely different from the known CSMA and
-  DAMA channel access methods.
-  This kind of real-time control is especially important to supply several
-  TNCs that are connected between each other and the PC by a daisy chain
-  (however, this feature is not supported yet by the Linux 6pack driver).
-
-- Each packet transferred over the serial line is supplied with a checksum,
-  so it is easy to detect errors due to problems on the serial line.
-  Received packets that are corrupt are not passed on to the AX.25 layer.
-  Damaged packets that the TNC has received from the PC are not transmitted.
-
-More details about 6pack are described in the file 6pack.ps that is located
-in the doc directory of the AX.25 utilities package.
-
-2. Who has developed the 6pack protocol?
-========================================
-
-The 6pack protocol has been developed by Ekki Plicht DF4OR, Henning Rech
-DF9IC and Gunter Jost DK7WJ. A driver for 6pack, written by Gunter Jost and
-Matthias Welwarsky DG2FEF, comes along with the PC version of FlexNet.
-They have also written a firmware for TNCs to perform the 6pack
-protocol (see section 4 below).
-
-3. Where can I get the latest version of 6pack for LinuX?
-=========================================================
-
-At the moment, the 6pack stuff can obtained via anonymous ftp from
-db0bm.automation.fh-aachen.de. In the directory /incoming/dg3kq,
-there is a file named 6pack.tgz.
-
-4. Preparing the TNC for 6pack operation
-========================================
-
-To be able to use 6pack, a special firmware for the TNC is needed. The EPROM
-of a newly bought TNC does not contain 6pack, so you will have to
-program an EPROM yourself. The image file for 6pack EPROMs should be
-available on any packet radio box where PC/FlexNet can be found. The name of
-the file is 6pack.bin. This file is copyrighted and maintained by the FlexNet
-team. It can be used under the terms of the license that comes along
-with PC/FlexNet. Please do not ask me about the internals of this file as I
-don't know anything about it. I used a textual description of the 6pack
-protocol to program the Linux driver.
-
-TNCs contain a 64kByte EPROM, the lower half of which is used for
-the firmware/KISS. The upper half is either empty or is sometimes
-programmed with software called TAPR. In the latter case, the TNC
-is supplied with a DIP switch so you can easily change between the
-two systems. When programming a new EPROM, one of the systems is replaced
-by 6pack. It is useful to replace TAPR, as this software is rarely used
-nowadays. If your TNC is not equipped with the switch mentioned above, you
-can build in one yourself that switches over the highest address pin
-of the EPROM between HIGH and LOW level. After having inserted the new EPROM
-and switched to 6pack, apply power to the TNC for a first test. The connect
-and the status LED are lit for about a second if the firmware initialises
-the TNC correctly.
-
-5. Building and installing the 6pack driver
-===========================================
-
-The driver has been tested with kernel version 2.1.90. Use with older
-kernels may lead to a compilation error because the interface to a kernel
-function has been changed in the 2.1.8x kernels.
-
-How to turn on 6pack support:
------------------------------
-
-- In the linux kernel configuration program, select the code maturity level
-  options menu and turn on the prompting for development drivers.
-
-- Select the amateur radio support menu and turn on the serial port 6pack
-  driver.
-
-- Compile and install the kernel and the modules.
-
-To use the driver, the kissattach program delivered with the AX.25 utilities
-has to be modified.
-
-- Do a cd to the directory that holds the kissattach sources. Edit the
-  kissattach.c file. At the top, insert the following lines::
-
-    #ifndef N_6PACK
-    #define N_6PACK (N_AX25+1)
-    #endif
-
-  Then find the line:
-
-    int disc = N_AX25;
-
-  and replace N_AX25 by N_6PACK.
-
-- Recompile kissattach. Rename it to spattach to avoid confusions.
-
-Installing the driver:
-----------------------
-
-- Do an insmod 6pack. Look at your /var/log/messages file to check if the
-  module has printed its initialization message.
-
-- Do a spattach as you would launch kissattach when starting a KISS port.
-  Check if the kernel prints the message '6pack: TNC found'.
-
-- From here, everything should work as if you were setting up a KISS port.
-  The only difference is that the network device that represents
-  the 6pack port is called sp instead of sl or ax. So, sp0 would be the
-  first 6pack port.
-
-Although the driver has been tested on various platforms, I still declare it
-ALPHA. BE CAREFUL! Sync your disks before insmoding the 6pack module
-and spattaching. Watch out if your computer behaves strangely. Read section
-6 of this file about known problems.
-
-Note that the connect and status LEDs of the TNC are controlled in a
-different way than they are when the TNC is used with PC/FlexNet. When using
-FlexNet, the connect LED is on if there is a connection; the status LED is
-on if there is data in the buffer of the PC's AX.25 engine that has to be
-transmitted. Under Linux, the 6pack layer is beyond the AX.25 layer,
-so the 6pack driver doesn't know anything about connects or data that
-has not yet been transmitted. Therefore the LEDs are controlled
-as they are in KISS mode: The connect LED is turned on if data is transferred
-from the PC to the TNC over the serial line, the status LED if data is
-sent to the PC.
-
-6. Known problems
-=================
-
-When testing the driver with 2.0.3x kernels and
-operating with data rates on the radio channel of 9600 Baud or higher,
-the driver may, on certain systems, sometimes print the message '6pack:
-bad checksum', which is due to data loss if the other station sends two
-or more subsequent packets. I have been told that this is due to a problem
-with the serial driver of 2.0.3x kernels. I don't know yet if the problem
-still exists with 2.1.x kernels, as I have heard that the serial driver
-code has been changed with 2.1.x.
-
-When shutting down the sp interface with ifconfig, the kernel crashes if
-there is still an AX.25 connection left over which an IP connection was
-running, even if that IP connection is already closed. The problem does not
-occur when there is a bare AX.25 connection still running. I don't know if
-this is a problem of the 6pack driver or something else in the kernel.
-
-The driver has been tested as a module, not yet as a kernel-builtin driver.
-
-The 6pack protocol supports daisy-chaining of TNCs in a token ring, which is
-connected to one serial port of the PC. This feature is not implemented
-and at least at the moment I won't be able to do it because I do not have
-the opportunity to build a TNC daisy-chain and test it.
-
-Some of the comments in the source code are inaccurate. They are left from
-the SLIP/KISS driver, from which the 6pack driver has been derived.
-I haven't modified or removed them yet -- sorry! The code itself needs
-some cleaning and optimizing. This will be done in a later release.
-
-If you encounter a bug or if you have a question or suggestion concerning the
-driver, feel free to mail me, using the addresses given at the beginning of
-this file.
-
-Have fun!
-
-Andreas
diff --git a/Documentation/networking/ax25.rst b/Documentation/networking/ax25.rst
deleted file mode 100644
index 89c79dd6c6f9..000000000000
--- a/Documentation/networking/ax25.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-=====
-AX.25
-=====
-
-To use the amateur radio protocols within Linux you will need to get a
-suitable copy of the AX.25 Utilities. More detailed information about
-AX.25, NET/ROM and ROSE, associated programs and utilities can be
-found on https://linux-ax25.in-berlin.de.
-
-There is a mailing list for discussing Linux amateur radio matters
-called linux-hams@vger.kernel.org. To subscribe to it, send a message to
-linux-hams+subscribe@vger.kernel.org or use the web interface at
-https://vger.kernel.org. The subject and body of the message are
-ignored.  You don't need to be subscribed to post but of course that
-means you might miss an answer.
diff --git a/Documentation/networking/device_drivers/hamradio/baycom.rst b/Documentation/networking/device_drivers/hamradio/baycom.rst
deleted file mode 100644
index fe2d010f0e86..000000000000
--- a/Documentation/networking/device_drivers/hamradio/baycom.rst
+++ /dev/null
@@ -1,174 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-===============================
-Linux Drivers for Baycom Modems
-===============================
-
-Thomas M. Sailer, HB9JNX/AE4WA, <sailer@ife.ee.ethz.ch>
-
-The drivers for the baycom modems have been split into
-separate drivers as they did not share any code, and the driver
-and device names have changed.
-
-This document describes the Linux Kernel Drivers for simple Baycom style
-amateur radio modems.
-
-The following drivers are available:
-====================================
-
-baycom_ser_fdx:
-  This driver supports the SER12 modems either full or half duplex.
-  Its baud rate may be changed via the ``baud`` module parameter,
-  therefore it supports just about every bit bang modem on a
-  serial port. Its devices are called bcsf0 through bcsf3.
-  This is the recommended driver for SER12 type modems,
-  however if you have a broken UART clone that does not have working
-  delta status bits, you may try baycom_ser_hdx.
-
-baycom_ser_hdx:
-  This is an alternative driver for SER12 type modems.
-  It only supports half duplex, and only 1200 baud. Its devices
-  are called bcsh0 through bcsh3. Use this driver only if baycom_ser_fdx
-  does not work with your UART.
-
-baycom_par:
-  This driver supports the par96 and picpar modems.
-  Its devices are called bcp0 through bcp3.
-
-baycom_epp:
-  This driver supports the EPP modem.
-  Its devices are called bce0 through bce3.
-  This driver is work-in-progress.
-
-The following modems are supported:
-
-======= ========================================================================
-ser12   This is a very simple 1200 baud AFSK modem. The modem consists only
-	of a modulator/demodulator chip, usually a TI TCM3105. The computer
-	is responsible for regenerating the receiver bit clock, as well as
-	for handling the HDLC protocol. The modem connects to a serial port,
-	hence the name. Since the serial port is not used as an async serial
-	port, the kernel driver for serial ports cannot be used, and this
-	driver only supports standard serial hardware (8250, 16450, 16550)
-
-par96   This is a modem for 9600 baud FSK compatible to the G3RUH standard.
-	The modem does all the filtering and regenerates the receiver clock.
-	Data is transferred from and to the PC via a shift register.
-	The shift register is filled with 16 bits and an interrupt is signalled.
-	The PC then empties the shift register in a burst. This modem connects
-	to the parallel port, hence the name. The modem leaves the
-	implementation of the HDLC protocol and the scrambler polynomial to
-	the PC.
-
-picpar  This is a redesign of the par96 modem by Henning Rech, DF9IC. The modem
-	is protocol compatible to par96, but uses only three low power ICs
-	and can therefore be fed from the parallel port and does not require
-	an additional power supply. Furthermore, it incorporates a carrier
-	detect circuitry.
-
-EPP     This is a high-speed modem adaptor that connects to an enhanced parallel
-	port.
-
-	Its target audience is users working over a high speed hub (76.8kbit/s).
-
-eppfpga This is a redesign of the EPP adaptor.
-======= ========================================================================
-
-All of the above modems only support half duplex communications. However,
-the driver supports the KISS (see below) fullduplex command. It then simply
-starts to send as soon as there's a packet to transmit and does not care
-about DCD, i.e. it starts to send even if there's someone else on the channel.
-This command is required by some implementations of the DAMA channel
-access protocol.
-
-
-The Interface of the drivers
-============================
-
-Unlike previous drivers, these drivers are no longer character devices,
-but they are now true kernel network interfaces. Installation is therefore
-simple. Once installed, four interfaces named bc{sf,sh,p,e}[0-3] are available.
-sethdlc from the ax25 utilities may be used to set driver states etc.
-Users of userland AX.25 stacks may use the net2kiss utility (also available
-in the ax25 utilities package) to convert packets of a network interface
-to a KISS stream on a pseudo tty. There's also a patch available from
-me for WAMPES which allows attaching a kernel network interface directly.
-
-
-Configuring the driver
-======================
-
-Every time a driver is inserted into the kernel, it has to know which
-modems it should access at which ports. This can be done with the setbaycom
-utility. If you are only using one modem, you can also configure the
-driver from the insmod command line (or by means of an option line in
-``/etc/modprobe.d/*.conf``).
-
-Examples::
-
-  modprobe baycom_ser_fdx mode="ser12*" iobase=0x3f8 irq=4
-  sethdlc -i bcsf0 -p mode "ser12*" io 0x3f8 irq 4
-
-Both lines configure the first port to drive a ser12 modem at the first
-serial port (COM1 under DOS). The * in the mode parameter instructs the driver
-to use the software DCD algorithm (see below)::
-
-  insmod baycom_par mode="picpar" iobase=0x378
-  sethdlc -i bcp0 -p mode "picpar" io 0x378
-
-Both lines configure the first port to drive a picpar modem at the
-first parallel port (LPT1 under DOS). (Note: picpar implies
-hardware DCD, par96 implies software DCD).
-
-The channel access parameters can be set with sethdlc -a or kissparms.
-Note that both utilities interpret the values slightly differently.
-
-
-Hardware DCD versus Software DCD
-================================
-
-To avoid collisions on the air, the driver must know when the channel is
-busy. This is the task of the DCD circuitry/software. The driver may either
-utilise a software DCD algorithm (options=1) or use a DCD signal from
-the hardware (options=0).
-
-======= =================================================================
-ser12   if software DCD is utilised, the radio's squelch should always be
-	open. It is highly recommended to use the software DCD algorithm,
-	as it is much faster than most hardware squelch circuitry. The
-	disadvantage is a slightly higher load on the system.
-
-par96   the software DCD algorithm for this type of modem is rather poor.
-	The modem simply does not provide enough information to implement
-	a reasonable DCD algorithm in software. Therefore, if your radio
-	feeds the DCD input of the PAR96 modem, the use of the hardware
-	DCD circuitry is recommended.
-
-picpar  the picpar modem features a builtin DCD hardware, which is highly
-	recommended.
-======= =================================================================
-
-
-
-Compatibility with the rest of the Linux kernel
-===============================================
-
-The serial driver and the baycom serial drivers compete
-for the same hardware resources. Of course only one driver can access a given
-interface at a time. The serial driver grabs all interfaces it can find at
-startup time. Therefore the baycom drivers subsequently won't be able to
-access a serial port. You might therefore find it necessary to release
-a port owned by the serial driver with 'setserial /dev/ttyS# uart none', where
-# is the number of the interface. The baycom drivers do not reserve any
-ports at startup, unless one is specified on the 'insmod' command line. Another
-method to solve the problem is to compile all drivers as modules and
-leave it to kmod to load the correct driver depending on the application.
-
-The parallel port drivers (baycom_par, baycom_epp) now use the parport subsystem
-to arbitrate the ports between different client drivers.
-
-vy 73s de
-
-Tom Sailer, sailer@ife.ee.ethz.ch
-
-hb9jnx @ hb9w.ampr.org
diff --git a/Documentation/networking/device_drivers/hamradio/index.rst b/Documentation/networking/device_drivers/hamradio/index.rst
deleted file mode 100644
index 6af481c5b020..000000000000
--- a/Documentation/networking/device_drivers/hamradio/index.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-
-Amateur Radio Device Drivers
-============================
-
-Contents:
-
-.. toctree::
-   :maxdepth: 2
-
-   baycom
-   z8530drv
diff --git a/Documentation/networking/device_drivers/hamradio/z8530drv.rst b/Documentation/networking/device_drivers/hamradio/z8530drv.rst
deleted file mode 100644
index d2942760f167..000000000000
--- a/Documentation/networking/device_drivers/hamradio/z8530drv.rst
+++ /dev/null
@@ -1,686 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-.. include:: <isonum.txt>
-
-=========================================================
-SCC.C - Linux driver for Z8530 based HDLC cards for AX.25
-=========================================================
-
-
-This is a subset of the documentation. To use this driver you MUST have the
-full package from:
-
-Internet:
-
-    1. ftp://ftp.ccac.rwth-aachen.de/pub/jr/z8530drv-utils_3.0-3.tar.gz
-
-    2. ftp://ftp.pspt.fi/pub/ham/linux/ax25/z8530drv-utils_3.0-3.tar.gz
-
-Please note that the information in this document may be hopelessly outdated.
-A new version of the documentation, along with links to other important
-Linux Kernel AX.25 documentation and programs, is available on
-http://yaina.de/jreuter
-
-Copyright |copy| 1993,2000 by Joerg Reuter DL1BKE <jreuter@yaina.de>
-
-portions Copyright |copy| 1993 Guido ten Dolle PE1NNZ
-
-for the complete copyright notice see >> Copying.Z8530DRV <<
-
-1. Initialization of the driver
-===============================
-
-To use the driver, 3 steps must be performed:
-
-     1. if compiled as module: loading the module
-     2. Setup of hardware, MODEM and KISS parameters with sccinit
-     3. Attach each channel to the Linux kernel AX.25 with "ifconfig"
-
-Unlike the versions below 2.4 this driver is a real network device
-driver. If you want to run xNOS instead of our fine kernel AX.25
-use a 2.x version (available from above sites) or read the
-AX.25-HOWTO on how to emulate a KISS TNC on network device drivers.
-
-
-1.1 Loading the module
-======================
-
-(If you're going to compile the driver as a part of the kernel image,
- skip this chapter and continue with 1.2)
-
-Before you can use a module, you'll have to load it with::
-
-	insmod scc.o
-
-please read 'man insmod' that comes with module-init-tools.
-
-You should include the insmod in one of the /etc/rc.d/rc.* files,
-and don't forget to insert a call of sccinit after that. It
-will read your /etc/z8530drv.conf.
-
-1.2. /etc/z8530drv.conf
-=======================
-
-To setup all parameters you must run /sbin/sccinit from one
-of your rc.*-files. This has to be done BEFORE you can
-"ifconfig" an interface. Sccinit reads the file /etc/z8530drv.conf
-and sets the hardware, MODEM and KISS parameters. A sample file is
-delivered with this package. Change it to your needs.
-
-The file itself consists of two main sections.
-
-1.2.1 configuration of hardware parameters
-==========================================
-
-The hardware setup section defines the following parameters for each
-Z8530::
-
-    chip    1
-    data_a  0x300                   # data port A
-    ctrl_a  0x304                   # control port A
-    data_b  0x301                   # data port B
-    ctrl_b  0x305                   # control port B
-    irq     5                       # IRQ No. 5
-    pclock  4915200                 # clock
-    board   BAYCOM                  # hardware type
-    escc    no                      # enhanced SCC chip? (8580/85180/85280)
-    vector  0                       # latch for interrupt vector
-    special no                      # address of special function register
-    option  0                       # option to set via sfr
-
-
-chip
-	- this is just a delimiter to make sccinit a bit simpler to
-	  program. A parameter has no effect.
-
-data_a
-	- the address of the data port A of this Z8530 (needed)
-ctrl_a
-	- the address of the control port A (needed)
-data_b
-	- the address of the data port B (needed)
-ctrl_b
-	- the address of the control port B (needed)
-
-irq
-	- the used IRQ for this chip. Different chips can use different
-	  IRQs or the same. If they share an interrupt, it needs to be
-	  specified within one chip-definition only.
-
-pclock  - the clock at the PCLK pin of the Z8530 (option, 4915200 is
-	  default), measured in Hertz
-
-board
-	- the "type" of the board:
-
-	   =======================  ========
-	   SCC type                 value
-	   =======================  ========
-	   PA0HZP SCC card          PA0HZP
-	   EAGLE card               EAGLE
-	   PC100 card               PC100
-	   PRIMUS-PC (DG9BL) card   PRIMUS
-	   BayCom (U)SCC card       BAYCOM
-	   =======================  ========
-
-escc
-	- if you want support for ESCC chips (8580, 85180, 85280), set
-	  this to "yes" (option, defaults to "no")
-
-vector
-	- address of the vector latch (aka "intack port") for PA0HZP
-	  cards. There can be only one vector latch for all chips!
-	  (option, defaults to 0)
-
-special
-	- address of the special function register on several cards.
-	  (option, defaults to 0)
-
-option  - The value you write into that register (option, default is 0)
-
-You can specify up to four chips (8 channels). If this is not enough,
-just change::
-
-	#define MAXSCC 4
-
-to a higher value.
-
-Example for the BAYCOM USCC:
-----------------------------
-
-::
-
-	chip    1
-	data_a  0x300                   # data port A
-	ctrl_a  0x304                   # control port A
-	data_b  0x301                   # data port B
-	ctrl_b  0x305                   # control port B
-	irq     5                       # IRQ No. 5 (#)
-	board   BAYCOM                  # hardware type (*)
-	#
-	# SCC chip 2
-	#
-	chip    2
-	data_a  0x302
-	ctrl_a  0x306
-	data_b  0x303
-	ctrl_b  0x307
-	board   BAYCOM
-
-An example for a PA0HZP card:
------------------------------
-
-::
-
-	chip 1
-	data_a 0x153
-	data_b 0x151
-	ctrl_a 0x152
-	ctrl_b 0x150
-	irq 9
-	pclock 4915200
-	board PA0HZP
-	vector 0x168
-	escc no
-	#
-	#
-	#
-	chip 2
-	data_a 0x157
-	data_b 0x155
-	ctrl_a 0x156
-	ctrl_b 0x154
-	irq 9
-	pclock 4915200
-	board PA0HZP
-	vector 0x168
-	escc no
-
-A DRSI would should probably work with this:
---------------------------------------------
-(actually: two DRSI cards...)
-
-::
-
-	chip 1
-	data_a 0x303
-	data_b 0x301
-	ctrl_a 0x302
-	ctrl_b 0x300
-	irq 7
-	pclock 4915200
-	board DRSI
-	escc no
-	#
-	#
-	#
-	chip 2
-	data_a 0x313
-	data_b 0x311
-	ctrl_a 0x312
-	ctrl_b 0x310
-	irq 7
-	pclock 4915200
-	board DRSI
-	escc no
-
-Note that you cannot use the on-board baudrate generator off DRSI
-cards. Use "mode dpll" for clock source (see below).
-
-This is based on information provided by Mike Bilow (and verified
-by Paul Helay)
-
-The utility "gencfg"
---------------------
-
-If you only know the parameters for the PE1CHL driver for DOS,
-run gencfg. It will generate the correct port addresses (I hope).
-Its parameters are exactly the same as the ones you use with
-the "attach scc" command in net, except that the string "init" must
-not appear. Example::
-
-	gencfg 2 0x150 4 2 0 1 0x168 9 4915200
-
-will print a skeleton z8530drv.conf for the OptoSCC to stdout.
-
-::
-
-	gencfg 2 0x300 2 4 5 -4 0 7 4915200 0x10
-
-does the same for the BAYCOM USCC card. In my opinion it is much easier
-to edit scc_config.h...
-
-
-1.2.2 channel configuration
-===========================
-
-The channel definition is divided into three sub sections for each
-channel:
-
-An example for scc0::
-
-	# DEVICE
-
-	device scc0	# the device for the following params
-
-	# MODEM / BUFFERS
-
-	speed 1200		# the default baudrate
-	clock dpll		# clock source:
-				# 	dpll     = normal half duplex operation
-				# 	external = MODEM provides own Rx/Tx clock
-				#	divider  = use full duplex divider if
-				#		   installed (1)
-	mode nrzi		# HDLC encoding mode
-				#	nrzi = 1k2 MODEM, G3RUH 9k6 MODEM
-				#	nrz  = DF9IC 9k6 MODEM
-				#
-	bufsize	384		# size of buffers. Note that this must include
-				# the AX.25 header, not only the data field!
-				# (optional, defaults to 384)
-
-	# KISS (Layer 1)
-
-	txdelay 36              # (see chapter 1.4)
-	persist 64
-	slot    8
-	tail    8
-	fulldup 0
-	wait    12
-	min     3
-	maxkey  7
-	idle    3
-	maxdef  120
-	group   0
-	txoff   off
-	softdcd on
-	slip    off
-
-The order WITHIN these sections is unimportant. The order OF these
-sections IS important. The MODEM parameters are set with the first
-recognized KISS parameter...
-
-Please note that you can initialize the board only once after boot
-(or insmod). You can change all parameters but "mode" and "clock"
-later with the Sccparam program or through KISS. Just to avoid
-security holes...
-
-(1) this divider is usually mounted on the SCC-PBC (PA0HZP) or not
-    present at all (BayCom). It feeds back the output of the DPLL
-    (digital pll) as transmit clock. Using this mode without a divider
-    installed will normally result in keying the transceiver until
-    maxkey expires --- of course without sending anything (useful).
-
-2. Attachment of a channel by your AX.25 software
-=================================================
-
-2.1 Kernel AX.25
-================
-
-To set up an AX.25 device you can simply type::
-
-	ifconfig scc0 44.128.1.1 hw ax25 dl0tha-7
-
-This will create a network interface with the IP number 44.128.20.107
-and the callsign "dl0tha". If you do not have any IP number (yet) you
-can use any of the 44.128.0.0 network. Note that you do not need
-axattach. The purpose of axattach (like slattach) is to create a KISS
-network device linked to a TTY. Please read the documentation of the
-ax25-utils and the AX.25-HOWTO to learn how to set the parameters of
-the kernel AX.25.
-
-2.2 NOS, NET and TFKISS
-=======================
-
-Since the TTY driver (aka KISS TNC emulation) is gone you need
-to emulate the old behaviour. The cost of using these programs is
-that you probably need to compile the kernel AX.25, regardless of whether
-you actually use it or not. First setup your /etc/ax25/axports,
-for example::
-
-	9k6	dl0tha-9  9600  255 4 9600 baud port (scc3)
-	axlink	dl0tha-15 38400 255 4 Link to NOS
-
-Now "ifconfig" the scc device::
-
-	ifconfig scc3 44.128.1.1 hw ax25 dl0tha-9
-
-You can now axattach a pseudo-TTY::
-
-	axattach /dev/ptys0 axlink
-
-and start your NOS and attach /dev/ptys0 there. The problem is that
-NOS is reachable only via digipeating through the kernel AX.25
-(disastrous on a DAMA controlled channel). To solve this problem,
-configure "rxecho" to echo the incoming frames from "9k6" to "axlink"
-and outgoing frames from "axlink" to "9k6" and start::
-
-	rxecho
-
-Or simply use "kissbridge" coming with z8530drv-utils::
-
-	ifconfig scc3 hw ax25 dl0tha-9
-	kissbridge scc3 /dev/ptys0
-
-
-3. Adjustment and Display of parameters
-=======================================
-
-3.1 Displaying SCC Parameters:
-==============================
-
-Once a SCC channel has been attached, the parameter settings and
-some statistic information can be shown using the param program::
-
-	dl1bke-u:~$ sccstat scc0
-
-	Parameters:
-
-	speed       : 1200 baud
-	txdelay     : 36
-	persist     : 255
-	slottime    : 0
-	txtail      : 8
-	fulldup     : 1
-	waittime    : 12
-	mintime     : 3 sec
-	maxkeyup    : 7 sec
-	idletime    : 3 sec
-	maxdefer    : 120 sec
-	group       : 0x00
-	txoff       : off
-	softdcd     : on
-	SLIP        : off
-
-	Status:
-
-	HDLC                  Z8530           Interrupts         Buffers
-	-----------------------------------------------------------------------
-	Sent       :     273  RxOver :     0  RxInts :   125074  Size    :  384
-	Received   :    1095  TxUnder:     0  TxInts :     4684  NoSpace :    0
-	RxErrors   :    1591                  ExInts :    11776
-	TxErrors   :       0                  SpInts :     1503
-	Tx State   :    idle
-
-
-The status info shown is:
-
-==============	==============================================================
-Sent		number of frames transmitted
-Received	number of frames received
-RxErrors	number of receive errors (CRC, ABORT)
-TxErrors	number of discarded Tx frames (due to various reasons)
-Tx State	status of the Tx interrupt handler: idle/busy/active/tail (2)
-RxOver		number of receiver overruns
-TxUnder		number of transmitter underruns
-RxInts		number of receiver interrupts
-TxInts		number of transmitter interrupts
-EpInts		number of receiver special condition interrupts
-SpInts		number of external/status interrupts
-Size		maximum size of an AX.25 frame (*with* AX.25 headers!)
-NoSpace		number of times a buffer could not get allocated
-==============	==============================================================
-
-An overrun is abnormal. If lots of these occur, the product of
-baudrate and number of interfaces is too high for the processing
-power of your computer. NoSpace errors are unlikely to be caused by the
-driver or the kernel AX.25.
-
-
-3.2 Setting Parameters
-======================
-
-
-The setting of parameters of the emulated KISS TNC is done in the
-same way in the SCC driver. You can change parameters by using
-the kissparms program from the ax25-utils package or use the program
-"sccparam"::
-
-     sccparam <device> <paramname> <decimal-|hexadecimal value>
-
-You can change the following parameters:
-
-===========   =====
-param	      value
-===========   =====
-speed         1200
-txdelay       36
-persist       255
-slottime      0
-txtail        8
-fulldup       1
-waittime      12
-mintime       3
-maxkeyup      7
-idletime      3
-maxdefer      120
-group         0x00
-txoff         off
-softdcd       on
-SLIP          off
-===========   =====
-
-
-The parameters have the following meaning:
-
-speed:
-     The baudrate on this channel in bits/sec
-
-     Example: sccparam /dev/scc3 speed 9600
-
-txdelay:
-     The delay (in units of 10 ms) after keying of the
-     transmitter, until the first byte is sent. This is usually
-     called "TXDELAY" in a TNC.  When 0 is specified, the driver
-     will just wait until the CTS signal is asserted. This
-     assumes the presence of a timer or other circuitry in the
-     MODEM and/or transmitter, that asserts CTS when the
-     transmitter is ready for data.
-     A normal value of this parameter is 30-36.
-
-     Example: sccparam /dev/scc0 txd 20
-
-persist:
-     This is the probability that the transmitter will be keyed
-     when the channel is found to be free.  It is a value from 0
-     to 255, and the probability is (value+1)/256.  The value
-     should be somewhere near 50-60, and should be lowered when
-     the channel is used more heavily.
-
-     Example: sccparam /dev/scc2 persist 20
-
-slottime:
-     This is the time between samples of the channel. It is
-     expressed in units of 10 ms.  About 200-300 ms (value 20-30)
-     seems to be a good value.
-
-     Example: sccparam /dev/scc0 slot 20
-
-tail:
-     The time the transmitter will remain keyed after the last
-     byte of a packet has been transferred to the SCC. This is
-     necessary because the CRC and a flag still have to leave the
-     SCC before the transmitter is keyed down. The value depends
-     on the baudrate selected.  A few character times should be
-     sufficient, e.g. 40ms at 1200 baud. (value 4)
-     The value of this parameter is in 10 ms units.
-
-     Example: sccparam /dev/scc2 4
-
-full:
-     The full-duplex mode switch. This can be one of the following
-     values:
-
-     0:   The interface will operate in CSMA mode (the normal
-	  half-duplex packet radio operation)
-     1:   Fullduplex mode, i.e. the transmitter will be keyed at
-	  any time, without checking the received carrier.  It
-	  will be unkeyed when there are no packets to be sent.
-     2:   Like 1, but the transmitter will remain keyed, also
-	  when there are no packets to be sent.  Flags will be
-	  sent in that case, until a timeout (parameter 10)
-	  occurs.
-
-     Example: sccparam /dev/scc0 fulldup off
-
-wait:
-     The initial waittime before any transmit attempt, after the
-     frame has been queue for transmit.  This is the length of
-     the first slot in CSMA mode.  In full duplex modes it is
-     set to 0 for maximum performance.
-     The value of this parameter is in 10 ms units.
-
-     Example: sccparam /dev/scc1 wait 4
-
-maxkey:
-     The maximal time the transmitter will be keyed to send
-     packets, in seconds.  This can be useful on busy CSMA
-     channels, to avoid "getting a bad reputation" when you are
-     generating a lot of traffic.  After the specified time has
-     elapsed, no new frame will be started. Instead, the trans-
-     mitter will be switched off for a specified time (parameter
-     min), and then the selected algorithm for keyup will be
-     started again.
-     The value 0 as well as "off" will disable this feature,
-     and allow infinite transmission time.
-
-     Example: sccparam /dev/scc0 maxk 20
-
-min:
-     This is the time the transmitter will be switched off when
-     the maximum transmission time is exceeded.
-
-     Example: sccparam /dev/scc3 min 10
-
-idle:
-     This parameter specifies the maximum idle time in full duplex
-     2 mode, in seconds.  When no frames have been sent for this
-     time, the transmitter will be keyed down.  A value of 0 is
-     has same result as the fullduplex mode 1. This parameter
-     can be disabled.
-
-     Example: sccparam /dev/scc2 idle off	# transmit forever
-
-maxdefer
-     This is the maximum time (in seconds) to wait for a free channel
-     to send. When this timer expires the transmitter will be keyed
-     IMMEDIATELY. If you love to get trouble with other users you
-     should set this to a very low value ;-)
-
-     Example: sccparam /dev/scc0 maxdefer 240	# 2 minutes
-
-
-txoff:
-     When this parameter has the value 0, the transmission of packets
-     is enable. Otherwise it is disabled.
-
-     Example: sccparam /dev/scc2 txoff on
-
-group:
-     It is possible to build special radio equipment to use more than
-     one frequency on the same band, e.g. using several receivers and
-     only one transmitter that can be switched between frequencies.
-     Also, you can connect several radios that are active on the same
-     band.  In these cases, it is not possible, or not a good idea, to
-     transmit on more than one frequency.  The SCC driver provides a
-     method to lock transmitters on different interfaces, using the
-     "param <interface> group <x>" command.  This will only work when
-     you are using CSMA mode (parameter full = 0).
-
-     The number <x> must be 0 if you want no group restrictions, and
-     can be computed as follows to create restricted groups:
-     <x> is the sum of some OCTAL numbers:
-
-
-     ===  =======================================================
-     200  This transmitter will only be keyed when all other
-	  transmitters in the group are off.
-     100  This transmitter will only be keyed when the carrier
-	  detect of all other interfaces in the group is off.
-     0xx  A byte that can be used to define different groups.
-	  Interfaces are in the same group, when the logical AND
-	  between their xx values is nonzero.
-     ===  =======================================================
-
-     Examples:
-
-     When 2 interfaces use group 201, their transmitters will never be
-     keyed at the same time.
-
-     When 2 interfaces use group 101, the transmitters will only key
-     when both channels are clear at the same time.  When group 301,
-     the transmitters will not be keyed at the same time.
-
-     Don't forget to convert the octal numbers into decimal before
-     you set the parameter.
-
-     Example: (to be written)
-
-softdcd:
-     use a software dcd instead of the real one... Useful for a very
-     slow squelch.
-
-     Example: sccparam /dev/scc0 soft on
-
-
-4. Problems
-===========
-
-If you have tx-problems with your BayCom USCC card please check
-the manufacturer of the 8530. SGS chips have a slightly
-different timing. Try Zilog...  A solution is to write to register 8
-instead to the data port, but this won't work with the ESCC chips.
-*SIGH!*
-
-A very common problem is that the PTT locks until the maxkeyup timer
-expires, although interrupts and clock source are correct. In most
-cases compiling the driver with CONFIG_SCC_DELAY (set with
-make config) solves the problems. For more hints read the (pseudo) FAQ
-and the documentation coming with z8530drv-utils.
-
-I got reports that the driver has problems on some 386-based systems.
-(i.e. Amstrad) Those systems have a bogus AT bus timing which will
-lead to delayed answers on interrupts. You can recognize these
-problems by looking at the output of Sccstat for the suspected
-port. If it shows under- and overruns you own such a system.
-
-Delayed processing of received data: This depends on
-
-- the kernel version
-
-- kernel profiling compiled or not
-
-- a high interrupt load
-
-- a high load of the machine --- running X, Xmorph, XV and Povray,
-  while compiling the kernel... hmm ... even with 32 MB RAM ...  ;-)
-  Or running a named for the whole .ampr.org domain on an 8 MB
-  box...
-
-- using information from rxecho or kissbridge.
-
-Kernel panics: please read /linux/README and find out if it
-really occurred within the scc driver.
-
-If you cannot solve a problem, send me
-
-- a description of the problem,
-- information on your hardware (computer system, scc board, modem)
-- your kernel version
-- the output of cat /proc/net/z8530
-
-4. Thor RLC100
-==============
-
-Mysteriously this board seems not to work with the driver. Anyone
-got it up-and-running?
-
-
-Many thanks to Linus Torvalds and Alan Cox for including the driver
-in the Linux standard distribution and their support.
-
-::
-
-	Joerg Reuter	ampr-net: dl1bke@db0pra.ampr.org
-			AX-25   : DL1BKE @ DB0ABH.#BAY.DEU.EU
-			Internet: jreuter@yaina.de
-			WWW     : http://yaina.de/jreuter
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 1df51c9f7827..1f54f01d24be 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -13,6 +13,5 @@ Contents:
    cellular/index
    ethernet/index
    fddi/index
-   hamradio/index
    wifi/index
    wwan/index
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 2e946924ad3f..44a422ad3b05 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -40,11 +40,9 @@ Contents:
    tls-handshake
    nfc
    6lowpan
-   6pack
    arcnet-hardware
    arcnet
    atm
-   ax25
    bonding
    cdc_mbim
    dctcp
diff --git a/Documentation/staging/magic-number.rst b/Documentation/staging/magic-number.rst
index 79afddf0e692..670d3189a976 100644
--- a/Documentation/staging/magic-number.rst
+++ b/Documentation/staging/magic-number.rst
@@ -72,11 +72,8 @@ PG_MAGIC              'P'              pg_{read,write}_hdr      ``include/uapi/l
 APM_BIOS_MAGIC        0x4101           apm_user                 ``arch/x86/kernel/apm_32.c``
 FASYNC_MAGIC          0x4601           fasync_struct            ``include/linux/fs.h``
 SLIP_MAGIC            0x5302           slip                     ``drivers/net/slip/slip.h``
-BAYCOM_MAGIC          19730510         baycom_state             ``drivers/net/hamradio/baycom_epp.c``
-HDLCDRV_MAGIC         0x5ac6e778       hdlcdrv_state            ``include/linux/hdlcdrv.h``
 KV_MAGIC              0x5f4b565f       kernel_vars_s            ``arch/mips/include/asm/sn/klkernvars.h``
 CODA_MAGIC            0xC0DAC0DA       coda_file_info           ``fs/coda/coda_fs_i.h``
-YAM_MAGIC             0xF10A7654       yam_port                 ``drivers/net/hamradio/yam.c``
 CCB_MAGIC             0xf2691ad2       ccb                      ``drivers/scsi/ncr53c8xx.c``
 QUEUE_MAGIC_FREE      0xf7e1c9a3       queue_entry              ``drivers/scsi/arm/queue.c``
 QUEUE_MAGIC_USED      0xf7e1cc33       queue_entry              ``drivers/scsi/arm/queue.c``
diff --git a/Documentation/translations/it_IT/staging/magic-number.rst b/Documentation/translations/it_IT/staging/magic-number.rst
index cd8f23571835..43dd6398300b 100644
--- a/Documentation/translations/it_IT/staging/magic-number.rst
+++ b/Documentation/translations/it_IT/staging/magic-number.rst
@@ -78,11 +78,8 @@ PG_MAGIC              'P'              pg_{read,write}_hdr      ``include/linux/
 APM_BIOS_MAGIC        0x4101           apm_user                 ``arch/x86/kernel/apm_32.c``
 FASYNC_MAGIC          0x4601           fasync_struct            ``include/linux/fs.h``
 SLIP_MAGIC            0x5302           slip                     ``drivers/net/slip.h``
-BAYCOM_MAGIC          0x19730510       baycom_state             ``drivers/net/baycom_epp.c``
-HDLCDRV_MAGIC         0x5ac6e778       hdlcdrv_state            ``include/linux/hdlcdrv.h``
 KV_MAGIC              0x5f4b565f       kernel_vars_s            ``arch/mips/include/asm/sn/klkernvars.h``
 CODA_MAGIC            0xC0DAC0DA       coda_file_info           ``fs/coda/coda_fs_i.h``
-YAM_MAGIC             0xF10A7654       yam_port                 ``drivers/net/hamradio/yam.c``
 CCB_MAGIC             0xf2691ad2       ccb                      ``drivers/scsi/ncr53c8xx.c``
 QUEUE_MAGIC_FREE      0xf7e1c9a3       queue_entry              ``drivers/scsi/arm/queue.c``
 QUEUE_MAGIC_USED      0xf7e1cc33       queue_entry              ``drivers/scsi/arm/queue.c``
diff --git a/Documentation/translations/sp_SP/process/magic-number.rst b/Documentation/translations/sp_SP/process/magic-number.rst
index beb4b4c1de11..f5b4c3f2849f 100644
--- a/Documentation/translations/sp_SP/process/magic-number.rst
+++ b/Documentation/translations/sp_SP/process/magic-number.rst
@@ -77,11 +77,8 @@ PG_MAGIC              'P'              pg_{read,write}_hdr      ``include/linux/
 APM_BIOS_MAGIC        0x4101           apm_user                 ``arch/x86/kernel/apm_32.c``
 FASYNC_MAGIC          0x4601           fasync_struct            ``include/linux/fs.h``
 SLIP_MAGIC            0x5302           slip                     ``drivers/net/slip.h``
-BAYCOM_MAGIC          0x19730510       baycom_state             ``drivers/net/baycom_epp.c``
-HDLCDRV_MAGIC         0x5ac6e778       hdlcdrv_state            ``include/linux/hdlcdrv.h``
 KV_MAGIC              0x5f4b565f       kernel_vars_s            ``arch/mips/include/asm/sn/klkernvars.h``
 CODA_MAGIC            0xC0DAC0DA       coda_file_info           ``fs/coda/coda_fs_i.h``
-YAM_MAGIC             0xF10A7654       yam_port                 ``drivers/net/hamradio/yam.c``
 CCB_MAGIC             0xf2691ad2       ccb                      ``drivers/scsi/ncr53c8xx.c``
 QUEUE_MAGIC_FREE      0xf7e1c9a3       queue_entry              ``drivers/scsi/arm/queue.c``
 QUEUE_MAGIC_USED      0xf7e1cc33       queue_entry              ``drivers/scsi/arm/queue.c``
diff --git a/Documentation/translations/zh_CN/process/magic-number.rst b/Documentation/translations/zh_CN/process/magic-number.rst
index 4ebc84cc0c54..05ee75cf4346 100644
--- a/Documentation/translations/zh_CN/process/magic-number.rst
+++ b/Documentation/translations/zh_CN/process/magic-number.rst
@@ -70,11 +70,8 @@ PG_MAGIC              'P'              pg_{read,write}_hdr      ``include/linux/
 APM_BIOS_MAGIC        0x4101           apm_user                 ``arch/x86/kernel/apm_32.c``
 FASYNC_MAGIC          0x4601           fasync_struct            ``include/linux/fs.h``
 SLIP_MAGIC            0x5302           slip                     ``drivers/net/slip.h``
-BAYCOM_MAGIC          0x19730510       baycom_state             ``drivers/net/baycom_epp.c``
-HDLCDRV_MAGIC         0x5ac6e778       hdlcdrv_state            ``include/linux/hdlcdrv.h``
 KV_MAGIC              0x5f4b565f       kernel_vars_s            ``arch/mips/include/asm/sn/klkernvars.h``
 CODA_MAGIC            0xC0DAC0DA       coda_file_info           ``fs/coda/coda_fs_i.h``
-YAM_MAGIC             0xF10A7654       yam_port                 ``drivers/net/hamradio/yam.c``
 CCB_MAGIC             0xf2691ad2       ccb                      ``drivers/scsi/ncr53c8xx.c``
 QUEUE_MAGIC_FREE      0xf7e1c9a3       queue_entry              ``drivers/scsi/arm/queue.c``
 QUEUE_MAGIC_USED      0xf7e1cc33       queue_entry              ``drivers/scsi/arm/queue.c``
diff --git a/Documentation/translations/zh_TW/process/magic-number.rst b/Documentation/translations/zh_TW/process/magic-number.rst
index 5582df6d7ca7..bc7eb025dd1e 100644
--- a/Documentation/translations/zh_TW/process/magic-number.rst
+++ b/Documentation/translations/zh_TW/process/magic-number.rst
@@ -64,11 +64,8 @@ PG_MAGIC              'P'              pg_{read,write}_hdr      ``include/linux/
 APM_BIOS_MAGIC        0x4101           apm_user                 ``arch/x86/kernel/apm_32.c``
 FASYNC_MAGIC          0x4601           fasync_struct            ``include/linux/fs.h``
 SLIP_MAGIC            0x5302           slip                     ``drivers/net/slip.h``
-BAYCOM_MAGIC          0x19730510       baycom_state             ``drivers/net/baycom_epp.c``
-HDLCDRV_MAGIC         0x5ac6e778       hdlcdrv_state            ``include/linux/hdlcdrv.h``
 KV_MAGIC              0x5f4b565f       kernel_vars_s            ``arch/mips/include/asm/sn/klkernvars.h``
 CODA_MAGIC            0xC0DAC0DA       coda_file_info           ``fs/coda/coda_fs_i.h``
-YAM_MAGIC             0xF10A7654       yam_port                 ``drivers/net/hamradio/yam.c``
 CCB_MAGIC             0xf2691ad2       ccb                      ``drivers/scsi/ncr53c8xx.c``
 QUEUE_MAGIC_FREE      0xf7e1c9a3       queue_entry              ``drivers/scsi/arm/queue.c``
 QUEUE_MAGIC_USED      0xf7e1cc33       queue_entry              ``drivers/scsi/arm/queue.c``
diff --git a/MAINTAINERS b/MAINTAINERS
index e4856d3427d9..867ca44422d8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -102,12 +102,6 @@ F:	Documentation/networking/6lowpan.rst
 F:	include/net/6lowpan.h
 F:	net/6lowpan/
 
-6PACK NETWORK DRIVER FOR AX.25
-M:	Andreas Koensgen <ajk@comnets.uni-bremen.de>
-L:	linux-hams@vger.kernel.org
-S:	Maintained
-F:	drivers/net/hamradio/6pack.c
-
 802.11 (including CFG80211/NL80211)
 M:	Johannes Berg <johannes@sipsolutions.net>
 L:	linux-wireless@vger.kernel.org
@@ -4280,14 +4274,6 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/leds/backlight/awinic,aw99706.yaml
 F:	drivers/video/backlight/aw99706.c
 
-AX.25 NETWORK LAYER
-L:	linux-hams@vger.kernel.org
-S:	Orphan
-W:	https://linux-ax25.in-berlin.de
-F:	include/net/ax25.h
-F:	include/uapi/linux/ax25.h
-F:	net/ax25/
-
 AXENTIA ARM DEVICES
 M:	Peter Rosin <peda@axentia.se>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -4429,13 +4415,6 @@ F:	include/uapi/linux/batadv_packet.h
 F:	include/uapi/linux/batman_adv.h
 F:	net/batman-adv/
 
-BAYCOM/HDLCDRV DRIVERS FOR AX.25
-M:	Thomas Sailer <t.sailer@alumni.ethz.ch>
-L:	linux-hams@vger.kernel.org
-S:	Maintained
-W:	http://www.baycom.org/~tom/ham/ham.html
-F:	drivers/net/hamradio/baycom*
-
 BCACHE (BLOCK LAYER CACHE)
 M:	Coly Li <colyli@fnnas.com>
 M:	Kent Overstreet <kent.overstreet@linux.dev>
@@ -7019,20 +6998,6 @@ S:	Maintained
 F:	drivers/rtc/rtc-ds1685.c
 F:	include/linux/rtc/ds1685.h
 
-DAMA SLAVE for AX.25
-M:	Joerg Reuter <jreuter@yaina.de>
-L:	linux-hams@vger.kernel.org
-S:	Maintained
-W:	http://yaina.de/jreuter/
-W:	http://www.qsl.net/dl1bke/
-F:	net/ax25/af_ax25.c
-F:	net/ax25/ax25_dev.c
-F:	net/ax25/ax25_ds_*
-F:	net/ax25/ax25_in.c
-F:	net/ax25/ax25_out.c
-F:	net/ax25/ax25_timer.c
-F:	net/ax25/sysctl_net_ax25.c
-
 DASHARO ACPI PLATFORM DRIVER
 M:	Michał Kopeć <michal.kopec@3mdeb.com>
 S:	Maintained
@@ -11443,11 +11408,6 @@ T:	git https://github.com/Rust-for-Linux/linux.git timekeeping-next
 F:	rust/kernel/time.rs
 F:	rust/kernel/time/
 
-HIGH-SPEED SCC DRIVER FOR AX.25
-L:	linux-hams@vger.kernel.org
-S:	Orphan
-F:	drivers/net/hamradio/scc.c
-
 HIGHPOINT ROCKETRAID 3xxx RAID DRIVER
 M:	HighPoint Linux Team <linux@highpoint-tech.com>
 S:	Supported
@@ -18271,14 +18231,6 @@ F:	net/bridge/br_netfilter*.c
 F:	net/netfilter/
 F:	tools/testing/selftests/net/netfilter/
 
-NETROM NETWORK LAYER
-L:	linux-hams@vger.kernel.org
-S:	Orphan
-W:	https://linux-ax25.in-berlin.de
-F:	include/net/netrom.h
-F:	include/uapi/linux/netrom.h
-F:	net/netrom/
-
 NETRONIX EMBEDDED CONTROLLER
 M:	Jonathan Neuschäfer <j.neuschaefer@gmx.net>
 S:	Maintained
@@ -23071,14 +23023,6 @@ F:	include/linux/mfd/rohm-bd96802.h
 F:	include/linux/mfd/rohm-generic.h
 F:	include/linux/mfd/rohm-shared.h
 
-ROSE NETWORK LAYER
-L:	linux-hams@vger.kernel.org
-S:	Orphan
-W:	https://linux-ax25.in-berlin.de
-F:	include/net/rose.h
-F:	include/uapi/linux/rose.h
-F:	net/rose/
-
 ROTATION DRIVER FOR ALLWINNER A83T
 M:	Jernej Skrabec <jernej.skrabec@gmail.com>
 L:	linux-media@vger.kernel.org
@@ -29104,13 +29048,6 @@ F:	lib/decompress_unxz.c
 F:	lib/xz/
 F:	scripts/xz_wrap.sh
 
-YAM DRIVER FOR AX.25
-M:	Jean-Paul Roubelat <jpr@f6fbb.org>
-L:	linux-hams@vger.kernel.org
-S:	Maintained
-F:	drivers/net/hamradio/yam*
-F:	include/linux/yam.h
-
 YAMA SECURITY MODULE
 M:	Kees Cook <kees@kernel.org>
 S:	Supported
@@ -29132,16 +29069,6 @@ S:	Maintained
 F:	Documentation/input/devices/yealink.rst
 F:	drivers/input/misc/yealink.*
 
-Z8530 DRIVER FOR AX.25
-M:	Joerg Reuter <jreuter@yaina.de>
-L:	linux-hams@vger.kernel.org
-S:	Maintained
-W:	http://yaina.de/jreuter/
-W:	http://www.qsl.net/dl1bke/
-F:	Documentation/networking/device_drivers/hamradio/z8530drv.rst
-F:	drivers/net/hamradio/*scc.c
-F:	drivers/net/hamradio/z8530.h
-
 ZD1211RW WIRELESS DRIVER
 L:	linux-wireless@vger.kernel.org
 S:	Orphan
diff --git a/arch/mips/configs/bcm47xx_defconfig b/arch/mips/configs/bcm47xx_defconfig
index d10b3d4adbd1..acbab8dae53f 100644
--- a/arch/mips/configs/bcm47xx_defconfig
+++ b/arch/mips/configs/bcm47xx_defconfig
@@ -28,7 +28,6 @@ CONFIG_NETFILTER=y
 CONFIG_VLAN_8021Q=y
 CONFIG_NET_SCHED=y
 CONFIG_NET_SCH_FQ_CODEL=y
-CONFIG_HAMRADIO=y
 CONFIG_CFG80211=y
 CONFIG_MAC80211=y
 CONFIG_MTD=y
diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig
index 3b64e151e187..aa63ada62e28 100644
--- a/arch/mips/configs/bigsur_defconfig
+++ b/arch/mips/configs/bigsur_defconfig
@@ -84,16 +84,6 @@ CONFIG_IP_VS_FTP=m
 CONFIG_BRIDGE=m
 CONFIG_VLAN_8021Q=m
 CONFIG_VLAN_8021Q_GVRP=y
-CONFIG_HAMRADIO=y
-CONFIG_AX25=m
-CONFIG_NETROM=m
-CONFIG_ROSE=m
-CONFIG_MKISS=m
-CONFIG_6PACK=m
-CONFIG_BPQETHER=m
-CONFIG_BAYCOM_SER_FDX=m
-CONFIG_BAYCOM_SER_HDX=m
-CONFIG_YAM=m
 CONFIG_FW_LOADER=m
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_NBD=m
diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig
index fdd28a89e336..261730af75c7 100644
--- a/arch/mips/configs/gpr_defconfig
+++ b/arch/mips/configs/gpr_defconfig
@@ -130,17 +130,6 @@ CONFIG_NET_EMATCH_TEXT=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_POLICE=y
 CONFIG_NET_PKTGEN=m
-CONFIG_HAMRADIO=y
-CONFIG_AX25=m
-# CONFIG_AX25_DAMA_SLAVE is not set
-CONFIG_NETROM=m
-CONFIG_ROSE=m
-CONFIG_MKISS=m
-CONFIG_6PACK=m
-CONFIG_BPQETHER=m
-CONFIG_BAYCOM_SER_FDX=m
-CONFIG_BAYCOM_SER_HDX=m
-CONFIG_YAM=m
 CONFIG_CFG80211=y
 CONFIG_MAC80211=y
 CONFIG_MTD=y
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index 72568f8ae653..315650c6fe0b 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -176,17 +176,6 @@ CONFIG_NET_EMATCH_TEXT=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_POLICE=y
 CONFIG_NET_PKTGEN=m
-CONFIG_HAMRADIO=y
-CONFIG_AX25=m
-# CONFIG_AX25_DAMA_SLAVE is not set
-CONFIG_NETROM=m
-CONFIG_ROSE=m
-CONFIG_MKISS=m
-CONFIG_6PACK=m
-CONFIG_BPQETHER=m
-CONFIG_BAYCOM_SER_FDX=m
-CONFIG_BAYCOM_SER_HDX=m
-CONFIG_YAM=m
 CONFIG_BT=m
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig
index 30d18b084cda..a88322fe3935 100644
--- a/arch/mips/configs/rb532_defconfig
+++ b/arch/mips/configs/rb532_defconfig
@@ -95,7 +95,6 @@ CONFIG_NET_ACT_GACT=m
 CONFIG_GACT_PROB=y
 CONFIG_NET_ACT_MIRRED=m
 CONFIG_NET_ACT_PEDIT=m
-CONFIG_HAMRADIO=y
 CONFIG_MTD=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_BLOCK2MTD=y
diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
index b1e67ff0c4f0..ad9fbd0cbb38 100644
--- a/arch/mips/configs/rm200_defconfig
+++ b/arch/mips/configs/rm200_defconfig
@@ -147,13 +147,6 @@ CONFIG_NET_CLS_FW=m
 CONFIG_NET_CLS_U32=m
 CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
-CONFIG_HAMRADIO=y
-CONFIG_AX25=m
-CONFIG_NETROM=m
-CONFIG_ROSE=m
-CONFIG_MKISS=m
-CONFIG_6PACK=m
-CONFIG_BPQETHER=m
 CONFIG_CONNECTOR=m
 CONFIG_PARPORT=m
 CONFIG_PARPORT_PC=m
diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig
index 8f9701efef19..c920976bedd0 100644
--- a/arch/mips/configs/rt305x_defconfig
+++ b/arch/mips/configs/rt305x_defconfig
@@ -64,7 +64,6 @@ CONFIG_BRIDGE=y
 # CONFIG_BRIDGE_IGMP_SNOOPING is not set
 CONFIG_VLAN_8021Q=y
 CONFIG_NET_SCHED=y
-CONFIG_HAMRADIO=y
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
 CONFIG_MTD_BLOCK=y
diff --git a/arch/mips/configs/xway_defconfig b/arch/mips/configs/xway_defconfig
index aae8497b6872..f1c53bbb72e9 100644
--- a/arch/mips/configs/xway_defconfig
+++ b/arch/mips/configs/xway_defconfig
@@ -66,7 +66,6 @@ CONFIG_BRIDGE=y
 # CONFIG_BRIDGE_IGMP_SNOOPING is not set
 CONFIG_VLAN_8021Q=y
 CONFIG_NET_SCHED=y
-CONFIG_HAMRADIO=y
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
 CONFIG_MTD_BLOCK=y
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 3b2d28127634..b87a741fc952 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -54,7 +54,6 @@ obj-y += dsa/
 endif
 obj-$(CONFIG_ETHERNET) += ethernet/
 obj-$(CONFIG_FDDI) += fddi/
-obj-$(CONFIG_HAMRADIO) += hamradio/
 obj-$(CONFIG_QCOM_IPA) += ipa/
 obj-$(CONFIG_PLIP) += plip/
 obj-$(CONFIG_PPP) += ppp/
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
deleted file mode 100644
index c8b2dc5c1bec..000000000000
--- a/drivers/net/hamradio/6pack.c
+++ /dev/null
@@ -1,912 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * 6pack.c	This module implements the 6pack protocol for kernel-based
- *		devices like TTY. It interfaces between a raw TTY and the
- *		kernel's AX.25 protocol layers.
- *
- * Authors:	Andreas Könsgen <ajk@comnets.uni-bremen.de>
- *              Ralf Baechle DL5RB <ralf@linux-mips.org>
- *
- * Quite a lot of stuff "stolen" by Joerg Reuter from slip.c, written by
- *
- *		Laurence Culhane, <loz@holmes.demon.co.uk>
- *		Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org>
- */
-
-#include <linux/module.h>
-#include <linux/uaccess.h>
-#include <linux/bitops.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/in.h>
-#include <linux/tty.h>
-#include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <linux/timer.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/spinlock.h>
-#include <linux/if_arp.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/semaphore.h>
-#include <linux/refcount.h>
-
-/* sixpack priority commands */
-#define SIXP_SEOF		0x40	/* start and end of a 6pack frame */
-#define SIXP_TX_URUN		0x48	/* transmit overrun */
-#define SIXP_RX_ORUN		0x50	/* receive overrun */
-#define SIXP_RX_BUF_OVL		0x58	/* receive buffer overflow */
-
-#define SIXP_CHKSUM		0xFF	/* valid checksum of a 6pack frame */
-
-/* masks to get certain bits out of the status bytes sent by the TNC */
-
-#define SIXP_CMD_MASK		0xC0
-#define SIXP_CHN_MASK		0x07
-#define SIXP_PRIO_CMD_MASK	0x80
-#define SIXP_STD_CMD_MASK	0x40
-#define SIXP_PRIO_DATA_MASK	0x38
-#define SIXP_TX_MASK		0x20
-#define SIXP_RX_MASK		0x10
-#define SIXP_RX_DCD_MASK	0x18
-#define SIXP_LEDS_ON		0x78
-#define SIXP_LEDS_OFF		0x60
-#define SIXP_CON		0x08
-#define SIXP_STA		0x10
-
-#define SIXP_FOUND_TNC		0xe9
-#define SIXP_CON_ON		0x68
-#define SIXP_DCD_MASK		0x08
-#define SIXP_DAMA_OFF		0
-
-/* default level 2 parameters */
-#define SIXP_TXDELAY			25	/* 250 ms */
-#define SIXP_PERSIST			50	/* in 256ths */
-#define SIXP_SLOTTIME			10	/* 100 ms */
-#define SIXP_INIT_RESYNC_TIMEOUT	(3*HZ/2) /* in 1 s */
-#define SIXP_RESYNC_TIMEOUT		5*HZ	/* in 1 s */
-
-/* 6pack configuration. */
-#define SIXP_NRUNIT			31      /* MAX number of 6pack channels */
-#define SIXP_MTU			256	/* Default MTU */
-
-enum sixpack_flags {
-	SIXPF_ERROR,	/* Parity, etc. error	*/
-};
-
-struct sixpack {
-	/* Various fields. */
-	struct tty_struct	*tty;		/* ptr to TTY structure	*/
-	struct net_device	*dev;		/* easy for intr handling  */
-
-	/* These are pointers to the malloc()ed frame buffers. */
-	int			rcount;         /* received chars counter  */
-	unsigned char		*xbuff;		/* transmitter buffer	*/
-	unsigned char		*xhead;         /* next byte to XMIT */
-	int			xleft;          /* bytes left in XMIT queue  */
-
-	u8			raw_buf[4];
-	u8			cooked_buf[400];
-
-	unsigned int		rx_count;
-	unsigned int		rx_count_cooked;
-	spinlock_t		rxlock;
-
-	unsigned long		flags;		/* Flag values/ mode etc */
-	unsigned char		mode;		/* 6pack mode */
-
-	/* 6pack stuff */
-	unsigned char		tx_delay;
-	unsigned char		persistence;
-	unsigned char		slottime;
-	unsigned char		duplex;
-	unsigned char		led_state;
-	u8			status;
-	u8			status1;
-	unsigned char		status2;
-	unsigned char		tx_enable;
-	unsigned char		tnc_state;
-
-	struct timer_list	tx_t;
-	struct timer_list	resync_t;
-	spinlock_t		lock;
-};
-
-#define AX25_6PACK_HEADER_LEN 0
-
-static void sixpack_decode(struct sixpack *, const u8 *, size_t);
-static int encode_sixpack(unsigned char *, unsigned char *, int, unsigned char);
-
-/*
- * Perform the persistence/slottime algorithm for CSMA access. If the
- * persistence check was successful, write the data to the serial driver.
- * Note that in case of DAMA operation, the data is not sent here.
- */
-
-static void sp_xmit_on_air(struct timer_list *t)
-{
-	struct sixpack *sp = timer_container_of(sp, t, tx_t);
-	int actual, when = sp->slottime;
-	static unsigned char random;
-
-	random = random * 17 + 41;
-
-	if (((sp->status1 & SIXP_DCD_MASK) == 0) && (random < sp->persistence)) {
-		sp->led_state = 0x70;
-		sp->tty->ops->write(sp->tty, &sp->led_state, 1);
-		sp->tx_enable = 1;
-		actual = sp->tty->ops->write(sp->tty, sp->xbuff, sp->status2);
-		sp->xleft -= actual;
-		sp->xhead += actual;
-		sp->led_state = 0x60;
-		sp->tty->ops->write(sp->tty, &sp->led_state, 1);
-		sp->status2 = 0;
-	} else
-		mod_timer(&sp->tx_t, jiffies + ((when + 1) * HZ) / 100);
-}
-
-/* ----> 6pack timer interrupt handler and friends. <---- */
-
-/* Encapsulate one AX.25 frame and stuff into a TTY queue. */
-static void sp_encaps(struct sixpack *sp, unsigned char *icp, int len)
-{
-	unsigned char *msg, *p = icp;
-	int actual, count;
-
-	if (len > AX25_MTU + 73) {
-		msg = "oversized transmit packet!";
-		goto out_drop;
-	}
-
-	if (p[0] > 5) {
-		msg = "invalid KISS command";
-		goto out_drop;
-	}
-
-	if ((p[0] != 0) && (len > 2)) {
-		msg = "KISS control packet too long";
-		goto out_drop;
-	}
-
-	if ((p[0] == 0) && (len < 15)) {
-		msg = "bad AX.25 packet to transmit";
-		goto out_drop;
-	}
-
-	count = encode_sixpack(p, sp->xbuff, len, sp->tx_delay);
-	set_bit(TTY_DO_WRITE_WAKEUP, &sp->tty->flags);
-
-	switch (p[0]) {
-	case 1:	sp->tx_delay = p[1];
-		return;
-	case 2:	sp->persistence = p[1];
-		return;
-	case 3:	sp->slottime = p[1];
-		return;
-	case 4:	/* ignored */
-		return;
-	case 5:	sp->duplex = p[1];
-		return;
-	}
-
-	if (p[0] != 0)
-		return;
-
-	/*
-	 * In case of fullduplex or DAMA operation, we don't take care about the
-	 * state of the DCD or of any timers, as the determination of the
-	 * correct time to send is the job of the AX.25 layer. We send
-	 * immediately after data has arrived.
-	 */
-	if (sp->duplex == 1) {
-		sp->led_state = 0x70;
-		sp->tty->ops->write(sp->tty, &sp->led_state, 1);
-		sp->tx_enable = 1;
-		actual = sp->tty->ops->write(sp->tty, sp->xbuff, count);
-		sp->xleft = count - actual;
-		sp->xhead = sp->xbuff + actual;
-		sp->led_state = 0x60;
-		sp->tty->ops->write(sp->tty, &sp->led_state, 1);
-	} else {
-		sp->xleft = count;
-		sp->xhead = sp->xbuff;
-		sp->status2 = count;
-		sp_xmit_on_air(&sp->tx_t);
-	}
-
-	return;
-
-out_drop:
-	sp->dev->stats.tx_dropped++;
-	netif_start_queue(sp->dev);
-	if (net_ratelimit())
-		printk(KERN_DEBUG "%s: %s - dropped.\n", sp->dev->name, msg);
-}
-
-/* Encapsulate an IP datagram and kick it into a TTY queue. */
-
-static netdev_tx_t sp_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct sixpack *sp = netdev_priv(dev);
-
-	if (skb->protocol == htons(ETH_P_IP))
-		return ax25_ip_xmit(skb);
-
-	spin_lock_bh(&sp->lock);
-	/* We were not busy, so we are now... :-) */
-	netif_stop_queue(dev);
-	dev->stats.tx_bytes += skb->len;
-	sp_encaps(sp, skb->data, skb->len);
-	spin_unlock_bh(&sp->lock);
-
-	dev_kfree_skb(skb);
-
-	return NETDEV_TX_OK;
-}
-
-static int sp_open_dev(struct net_device *dev)
-{
-	struct sixpack *sp = netdev_priv(dev);
-
-	if (sp->tty == NULL)
-		return -ENODEV;
-	return 0;
-}
-
-/* Close the low-level part of the 6pack channel. */
-static int sp_close(struct net_device *dev)
-{
-	struct sixpack *sp = netdev_priv(dev);
-
-	spin_lock_bh(&sp->lock);
-	if (sp->tty) {
-		/* TTY discipline is running. */
-		clear_bit(TTY_DO_WRITE_WAKEUP, &sp->tty->flags);
-	}
-	netif_stop_queue(dev);
-	spin_unlock_bh(&sp->lock);
-
-	return 0;
-}
-
-static int sp_set_mac_address(struct net_device *dev, void *addr)
-{
-	struct sockaddr_ax25 *sa = addr;
-
-	netif_tx_lock_bh(dev);
-	netif_addr_lock(dev);
-	__dev_addr_set(dev, &sa->sax25_call, AX25_ADDR_LEN);
-	netif_addr_unlock(dev);
-	netif_tx_unlock_bh(dev);
-
-	return 0;
-}
-
-static const struct net_device_ops sp_netdev_ops = {
-	.ndo_open		= sp_open_dev,
-	.ndo_stop		= sp_close,
-	.ndo_start_xmit		= sp_xmit,
-	.ndo_set_mac_address    = sp_set_mac_address,
-};
-
-static void sp_setup(struct net_device *dev)
-{
-	/* Finish setting up the DEVICE info. */
-	dev->netdev_ops		= &sp_netdev_ops;
-	dev->mtu		= SIXP_MTU;
-	dev->hard_header_len	= AX25_MAX_HEADER_LEN;
-	dev->header_ops 	= &ax25_header_ops;
-
-	dev->addr_len		= AX25_ADDR_LEN;
-	dev->type		= ARPHRD_AX25;
-	dev->tx_queue_len	= 10;
-
-	/* Only activated in AX.25 mode */
-	memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN);
-	dev_addr_set(dev, (u8 *)&ax25_defaddr);
-
-	dev->flags		= 0;
-}
-
-/* Send one completely decapsulated IP datagram to the IP layer. */
-
-/*
- * This is the routine that sends the received data to the kernel AX.25.
- * 'cmd' is the KISS command. For AX.25 data, it is zero.
- */
-
-static void sp_bump(struct sixpack *sp, char cmd)
-{
-	struct sk_buff *skb;
-	int count;
-	u8 *ptr;
-
-	count = sp->rcount + 1;
-
-	sp->dev->stats.rx_bytes += count;
-
-	if ((skb = dev_alloc_skb(count + 1)) == NULL)
-		goto out_mem;
-
-	ptr = skb_put(skb, count + 1);
-	*ptr++ = cmd;	/* KISS command */
-
-	memcpy(ptr, sp->cooked_buf + 1, count);
-	skb->protocol = ax25_type_trans(skb, sp->dev);
-	netif_rx(skb);
-	sp->dev->stats.rx_packets++;
-
-	return;
-
-out_mem:
-	sp->dev->stats.rx_dropped++;
-}
-
-
-/* ----------------------------------------------------------------------- */
-
-/*
- * Called by the TTY driver when there's room for more data.  If we have
- * more packets to send, we send them here.
- */
-static void sixpack_write_wakeup(struct tty_struct *tty)
-{
-	struct sixpack *sp = tty->disc_data;
-	int actual;
-
-	if (!sp)
-		return;
-	if (sp->xleft <= 0)  {
-		/* Now serial buffer is almost free & we can start
-		 * transmission of another packet */
-		sp->dev->stats.tx_packets++;
-		clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
-		sp->tx_enable = 0;
-		netif_wake_queue(sp->dev);
-		return;
-	}
-
-	if (sp->tx_enable) {
-		actual = tty->ops->write(tty, sp->xhead, sp->xleft);
-		sp->xleft -= actual;
-		sp->xhead += actual;
-	}
-}
-
-/* ----------------------------------------------------------------------- */
-
-/*
- * Handle the 'receiver data ready' interrupt.
- * This function is called by the tty module in the kernel when
- * a block of 6pack data has been received, which can now be decapsulated
- * and sent on to some IP layer for further processing.
- */
-static void sixpack_receive_buf(struct tty_struct *tty, const u8 *cp,
-				const u8 *fp, size_t count)
-{
-	struct sixpack *sp;
-
-	if (!count)
-		return;
-
-	sp = tty->disc_data;
-	if (!sp)
-		return;
-
-	/* Read the characters out of the buffer */
-	while (count--) {
-		if (fp && *fp++) {
-			if (!test_and_set_bit(SIXPF_ERROR, &sp->flags))
-				sp->dev->stats.rx_errors++;
-			cp++;
-			continue;
-		}
-		sixpack_decode(sp, cp, 1);
-		cp++;
-	}
-
-	tty_unthrottle(tty);
-}
-
-/*
- * Try to resync the TNC. Called by the resync timer defined in
- * decode_prio_command
- */
-
-#define TNC_UNINITIALIZED	0
-#define TNC_UNSYNC_STARTUP	1
-#define TNC_UNSYNCED		2
-#define TNC_IN_SYNC		3
-
-static void __tnc_set_sync_state(struct sixpack *sp, int new_tnc_state)
-{
-	char *msg;
-
-	switch (new_tnc_state) {
-	default:			/* gcc oh piece-o-crap ... */
-	case TNC_UNSYNC_STARTUP:
-		msg = "Synchronizing with TNC";
-		break;
-	case TNC_UNSYNCED:
-		msg = "Lost synchronization with TNC\n";
-		break;
-	case TNC_IN_SYNC:
-		msg = "Found TNC";
-		break;
-	}
-
-	sp->tnc_state = new_tnc_state;
-	printk(KERN_INFO "%s: %s\n", sp->dev->name, msg);
-}
-
-static inline void tnc_set_sync_state(struct sixpack *sp, int new_tnc_state)
-{
-	int old_tnc_state = sp->tnc_state;
-
-	if (old_tnc_state != new_tnc_state)
-		__tnc_set_sync_state(sp, new_tnc_state);
-}
-
-static void resync_tnc(struct timer_list *t)
-{
-	struct sixpack *sp = timer_container_of(sp, t, resync_t);
-	static char resync_cmd = 0xe8;
-
-	/* clear any data that might have been received */
-
-	sp->rx_count = 0;
-	sp->rx_count_cooked = 0;
-
-	/* reset state machine */
-
-	sp->status = 1;
-	sp->status1 = 1;
-	sp->status2 = 0;
-
-	/* resync the TNC */
-
-	sp->led_state = 0x60;
-	sp->tty->ops->write(sp->tty, &sp->led_state, 1);
-	sp->tty->ops->write(sp->tty, &resync_cmd, 1);
-
-
-	/* Start resync timer again -- the TNC might be still absent */
-	mod_timer(&sp->resync_t, jiffies + SIXP_RESYNC_TIMEOUT);
-}
-
-static inline int tnc_init(struct sixpack *sp)
-{
-	unsigned char inbyte = 0xe8;
-
-	tnc_set_sync_state(sp, TNC_UNSYNC_STARTUP);
-
-	sp->tty->ops->write(sp->tty, &inbyte, 1);
-
-	mod_timer(&sp->resync_t, jiffies + SIXP_RESYNC_TIMEOUT);
-
-	return 0;
-}
-
-/*
- * Open the high-level part of the 6pack channel.
- * This function is called by the TTY module when the
- * 6pack line discipline is called for.  Because we are
- * sure the tty line exists, we only have to link it to
- * a free 6pcack channel...
- */
-static int sixpack_open(struct tty_struct *tty)
-{
-	char *xbuff = NULL;
-	struct net_device *dev;
-	struct sixpack *sp;
-	unsigned long len;
-	int err = 0;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-	if (tty->ops->write == NULL)
-		return -EOPNOTSUPP;
-
-	dev = alloc_netdev(sizeof(struct sixpack), "sp%d", NET_NAME_UNKNOWN,
-			   sp_setup);
-	if (!dev) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	sp = netdev_priv(dev);
-	sp->dev = dev;
-
-	spin_lock_init(&sp->lock);
-	spin_lock_init(&sp->rxlock);
-
-	/* !!! length of the buffers. MTU is IP MTU, not PACLEN!  */
-
-	len = dev->mtu * 2;
-
-	xbuff = kmalloc(len + 4, GFP_KERNEL);
-	if (xbuff == NULL) {
-		err = -ENOBUFS;
-		goto out_free;
-	}
-
-	spin_lock_bh(&sp->lock);
-
-	sp->tty = tty;
-
-	sp->xbuff	= xbuff;
-
-	sp->rcount	= 0;
-	sp->rx_count	= 0;
-	sp->rx_count_cooked = 0;
-	sp->xleft	= 0;
-
-	sp->flags	= 0;		/* Clear ESCAPE & ERROR flags */
-
-	sp->duplex	= 0;
-	sp->tx_delay    = SIXP_TXDELAY;
-	sp->persistence = SIXP_PERSIST;
-	sp->slottime    = SIXP_SLOTTIME;
-	sp->led_state   = 0x60;
-	sp->status      = 1;
-	sp->status1     = 1;
-	sp->status2     = 0;
-	sp->tx_enable   = 0;
-
-	netif_start_queue(dev);
-
-	timer_setup(&sp->tx_t, sp_xmit_on_air, 0);
-
-	timer_setup(&sp->resync_t, resync_tnc, 0);
-
-	spin_unlock_bh(&sp->lock);
-
-	/* Done.  We have linked the TTY line to a channel. */
-	tty->disc_data = sp;
-	tty->receive_room = 65536;
-
-	/* Now we're ready to register. */
-	err = register_netdev(dev);
-	if (err)
-		goto out_free;
-
-	tnc_init(sp);
-
-	return 0;
-
-out_free:
-	kfree(xbuff);
-
-	free_netdev(dev);
-
-out:
-	return err;
-}
-
-
-/*
- * Close down a 6pack channel.
- * This means flushing out any pending queues, and then restoring the
- * TTY line discipline to what it was before it got hooked to 6pack
- * (which usually is TTY again).
- */
-static void sixpack_close(struct tty_struct *tty)
-{
-	struct sixpack *sp;
-
-	sp = tty->disc_data;
-	if (!sp)
-		return;
-
-	tty->disc_data = NULL;
-
-	/* We must stop the queue to avoid potentially scribbling
-	 * on the free buffers. The sp->dead completion is not sufficient
-	 * to protect us from sp->xbuff access.
-	 */
-	netif_stop_queue(sp->dev);
-
-	unregister_netdev(sp->dev);
-
-	timer_delete_sync(&sp->tx_t);
-	timer_delete_sync(&sp->resync_t);
-
-	/* Free all 6pack frame buffers after unreg. */
-	kfree(sp->xbuff);
-
-	free_netdev(sp->dev);
-}
-
-/* Perform I/O control on an active 6pack channel. */
-static int sixpack_ioctl(struct tty_struct *tty, unsigned int cmd,
-		unsigned long arg)
-{
-	struct sixpack *sp = tty->disc_data;
-	struct net_device *dev;
-	unsigned int tmp, err;
-
-	if (!sp)
-		return -ENXIO;
-	dev = sp->dev;
-
-	switch(cmd) {
-	case SIOCGIFNAME:
-		err = copy_to_user((void __user *) arg, dev->name,
-		                   strlen(dev->name) + 1) ? -EFAULT : 0;
-		break;
-
-	case SIOCGIFENCAP:
-		err = put_user(0, (int __user *) arg);
-		break;
-
-	case SIOCSIFENCAP:
-		if (get_user(tmp, (int __user *) arg)) {
-			err = -EFAULT;
-			break;
-		}
-
-		sp->mode = tmp;
-		dev->addr_len        = AX25_ADDR_LEN;
-		dev->hard_header_len = AX25_KISS_HEADER_LEN +
-		                       AX25_MAX_HEADER_LEN + 3;
-		dev->type            = ARPHRD_AX25;
-
-		err = 0;
-		break;
-
-	case SIOCSIFHWADDR: {
-			char addr[AX25_ADDR_LEN];
-
-			if (copy_from_user(&addr,
-					   (void __user *)arg, AX25_ADDR_LEN)) {
-				err = -EFAULT;
-				break;
-			}
-
-			netif_tx_lock_bh(dev);
-			__dev_addr_set(dev, &addr, AX25_ADDR_LEN);
-			netif_tx_unlock_bh(dev);
-			err = 0;
-			break;
-		}
-	default:
-		err = tty_mode_ioctl(tty, cmd, arg);
-	}
-
-	return err;
-}
-
-static struct tty_ldisc_ops sp_ldisc = {
-	.owner		= THIS_MODULE,
-	.num		= N_6PACK,
-	.name		= "6pack",
-	.open		= sixpack_open,
-	.close		= sixpack_close,
-	.ioctl		= sixpack_ioctl,
-	.receive_buf	= sixpack_receive_buf,
-	.write_wakeup	= sixpack_write_wakeup,
-};
-
-/* Initialize 6pack control device -- register 6pack line discipline */
-
-static int __init sixpack_init_driver(void)
-{
-	int status;
-
-	/* Register the provided line protocol discipline */
-	status = tty_register_ldisc(&sp_ldisc);
-	if (status)
-		pr_err("6pack: can't register line discipline (err = %d)\n", status);
-
-	return status;
-}
-
-static void __exit sixpack_exit_driver(void)
-{
-	tty_unregister_ldisc(&sp_ldisc);
-}
-
-/* encode an AX.25 packet into 6pack */
-
-static int encode_sixpack(unsigned char *tx_buf, unsigned char *tx_buf_raw,
-	int length, unsigned char tx_delay)
-{
-	int count = 0;
-	unsigned char checksum = 0, buf[400];
-	int raw_count = 0;
-
-	tx_buf_raw[raw_count++] = SIXP_PRIO_CMD_MASK | SIXP_TX_MASK;
-	tx_buf_raw[raw_count++] = SIXP_SEOF;
-
-	buf[0] = tx_delay;
-	for (count = 1; count < length; count++)
-		buf[count] = tx_buf[count];
-
-	for (count = 0; count < length; count++)
-		checksum += buf[count];
-	buf[length] = (unsigned char) 0xff - checksum;
-
-	for (count = 0; count <= length; count++) {
-		if ((count % 3) == 0) {
-			tx_buf_raw[raw_count++] = (buf[count] & 0x3f);
-			tx_buf_raw[raw_count] = ((buf[count] >> 2) & 0x30);
-		} else if ((count % 3) == 1) {
-			tx_buf_raw[raw_count++] |= (buf[count] & 0x0f);
-			tx_buf_raw[raw_count] =	((buf[count] >> 2) & 0x3c);
-		} else {
-			tx_buf_raw[raw_count++] |= (buf[count] & 0x03);
-			tx_buf_raw[raw_count++] = (buf[count] >> 2);
-		}
-	}
-	if ((length % 3) != 2)
-		raw_count++;
-	tx_buf_raw[raw_count++] = SIXP_SEOF;
-	return raw_count;
-}
-
-/* decode 4 sixpack-encoded bytes into 3 data bytes */
-
-static void decode_data(struct sixpack *sp, u8 inbyte)
-{
-	u8 *buf;
-
-	if (sp->rx_count != 3) {
-		sp->raw_buf[sp->rx_count++] = inbyte;
-
-		return;
-	}
-
-	if (sp->rx_count_cooked + 2 >= sizeof(sp->cooked_buf)) {
-		pr_err("6pack: cooked buffer overrun, data loss\n");
-		sp->rx_count = 0;
-		return;
-	}
-
-	buf = sp->raw_buf;
-	sp->cooked_buf[sp->rx_count_cooked++] =
-		buf[0] | ((buf[1] << 2) & 0xc0);
-	sp->cooked_buf[sp->rx_count_cooked++] =
-		(buf[1] & 0x0f) | ((buf[2] << 2) & 0xf0);
-	sp->cooked_buf[sp->rx_count_cooked++] =
-		(buf[2] & 0x03) | (inbyte << 2);
-	sp->rx_count = 0;
-}
-
-/* identify and execute a 6pack priority command byte */
-
-static void decode_prio_command(struct sixpack *sp, u8 cmd)
-{
-	ssize_t actual;
-
-	if ((cmd & SIXP_PRIO_DATA_MASK) != 0) {     /* idle ? */
-
-	/* RX and DCD flags can only be set in the same prio command,
-	   if the DCD flag has been set without the RX flag in the previous
-	   prio command. If DCD has not been set before, something in the
-	   transmission has gone wrong. In this case, RX and DCD are
-	   cleared in order to prevent the decode_data routine from
-	   reading further data that might be corrupt. */
-
-		if (((sp->status & SIXP_DCD_MASK) == 0) &&
-			((cmd & SIXP_RX_DCD_MASK) == SIXP_RX_DCD_MASK)) {
-				if (sp->status != 1)
-					printk(KERN_DEBUG "6pack: protocol violation\n");
-				else
-					sp->status = 0;
-				cmd &= ~SIXP_RX_DCD_MASK;
-		}
-		sp->status = cmd & SIXP_PRIO_DATA_MASK;
-	} else { /* output watchdog char if idle */
-		if ((sp->status2 != 0) && (sp->duplex == 1)) {
-			sp->led_state = 0x70;
-			sp->tty->ops->write(sp->tty, &sp->led_state, 1);
-			sp->tx_enable = 1;
-			actual = sp->tty->ops->write(sp->tty, sp->xbuff, sp->status2);
-			sp->xleft -= actual;
-			sp->xhead += actual;
-			sp->led_state = 0x60;
-			sp->status2 = 0;
-
-		}
-	}
-
-	/* needed to trigger the TNC watchdog */
-	sp->tty->ops->write(sp->tty, &sp->led_state, 1);
-
-        /* if the state byte has been received, the TNC is present,
-           so the resync timer can be reset. */
-
-	if (sp->tnc_state == TNC_IN_SYNC)
-		mod_timer(&sp->resync_t, jiffies + SIXP_INIT_RESYNC_TIMEOUT);
-
-	sp->status1 = cmd & SIXP_PRIO_DATA_MASK;
-}
-
-/* identify and execute a standard 6pack command byte */
-
-static void decode_std_command(struct sixpack *sp, u8 cmd)
-{
-	u8 checksum = 0, rest = 0;
-	short i;
-
-	switch (cmd & SIXP_CMD_MASK) {     /* normal command */
-	case SIXP_SEOF:
-		if ((sp->rx_count == 0) && (sp->rx_count_cooked == 0)) {
-			if ((sp->status & SIXP_RX_DCD_MASK) ==
-				SIXP_RX_DCD_MASK) {
-				sp->led_state = 0x68;
-				sp->tty->ops->write(sp->tty, &sp->led_state, 1);
-			}
-		} else {
-			sp->led_state = 0x60;
-			/* fill trailing bytes with zeroes */
-			sp->tty->ops->write(sp->tty, &sp->led_state, 1);
-			spin_lock_bh(&sp->rxlock);
-			rest = sp->rx_count;
-			if (rest != 0)
-				 for (i = rest; i <= 3; i++)
-					decode_data(sp, 0);
-			if (rest == 2)
-				sp->rx_count_cooked -= 2;
-			else if (rest == 3)
-				sp->rx_count_cooked -= 1;
-			for (i = 0; i < sp->rx_count_cooked; i++)
-				checksum += sp->cooked_buf[i];
-			if (checksum != SIXP_CHKSUM) {
-				printk(KERN_DEBUG "6pack: bad checksum %2.2x\n", checksum);
-			} else {
-				sp->rcount = sp->rx_count_cooked-2;
-				sp_bump(sp, 0);
-			}
-			sp->rx_count_cooked = 0;
-			spin_unlock_bh(&sp->rxlock);
-		}
-		break;
-	case SIXP_TX_URUN: printk(KERN_DEBUG "6pack: TX underrun\n");
-		break;
-	case SIXP_RX_ORUN: printk(KERN_DEBUG "6pack: RX overrun\n");
-		break;
-	case SIXP_RX_BUF_OVL:
-		printk(KERN_DEBUG "6pack: RX buffer overflow\n");
-	}
-}
-
-/* decode a 6pack packet */
-
-static void
-sixpack_decode(struct sixpack *sp, const u8 *pre_rbuff, size_t count)
-{
-	size_t count1;
-	u8 inbyte;
-
-	for (count1 = 0; count1 < count; count1++) {
-		inbyte = pre_rbuff[count1];
-		if (inbyte == SIXP_FOUND_TNC) {
-			tnc_set_sync_state(sp, TNC_IN_SYNC);
-			timer_delete(&sp->resync_t);
-		}
-		if ((inbyte & SIXP_PRIO_CMD_MASK) != 0)
-			decode_prio_command(sp, inbyte);
-		else if ((inbyte & SIXP_STD_CMD_MASK) != 0)
-			decode_std_command(sp, inbyte);
-		else if ((sp->status & SIXP_RX_DCD_MASK) == SIXP_RX_DCD_MASK) {
-			spin_lock_bh(&sp->rxlock);
-			decode_data(sp, inbyte);
-			spin_unlock_bh(&sp->rxlock);
-		}
-	}
-}
-
-MODULE_AUTHOR("Ralf Baechle DO1GRB <ralf@linux-mips.org>");
-MODULE_DESCRIPTION("6pack driver for AX.25");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_LDISC(N_6PACK);
-
-module_init(sixpack_init_driver);
-module_exit(sixpack_exit_driver);
diff --git a/drivers/net/hamradio/Kconfig b/drivers/net/hamradio/Kconfig
deleted file mode 100644
index 36a9aade9f33..000000000000
--- a/drivers/net/hamradio/Kconfig
+++ /dev/null
@@ -1,162 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-config MKISS
-	tristate "Serial port KISS driver"
-	depends on AX25 && TTY
-	select CRC16
-	help
-	  KISS is a protocol used for the exchange of data between a computer
-	  and a Terminal Node Controller (a small embedded system commonly
-	  used for networking over AX.25 amateur radio connections; it
-	  connects the computer's serial port with the radio's microphone
-	  input and speaker output).
-
-	  Although KISS is less advanced than the 6pack protocol, it has
-	  the advantage that it is already supported by most modern TNCs
-	  without the need for a firmware upgrade.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called mkiss.
-
-config 6PACK
-	tristate "Serial port 6PACK driver"
-	depends on AX25 && TTY
-	help
-	  6pack is a transmission protocol for the data exchange between your
-	  PC and your TNC (the Terminal Node Controller acts as a kind of
-	  modem connecting your computer's serial port to your radio's
-	  microphone input and speaker output). This protocol can be used as
-	  an alternative to KISS for networking over AX.25 amateur radio
-	  connections, but it has some extended functionality.
-
-	  Note that this driver is still experimental and might cause
-	  problems. For details about the features and the usage of the
-	  driver, read <file:Documentation/networking/6pack.rst>.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called 6pack.
-
-config BPQETHER
-	tristate "BPQ Ethernet driver"
-	depends on AX25
-	help
-	  AX.25 is the protocol used for computer communication over amateur
-	  radio. If you say Y here, you will be able to send and receive AX.25
-	  traffic over Ethernet (also called "BPQ AX.25"), which could be
-	  useful if some other computer on your local network has a direct
-	  amateur radio connection.
-
-config SCC
-	tristate "Z8530 SCC driver"
-	depends on ISA && AX25
-	help
-	  These cards are used to connect your Linux box to an amateur radio
-	  in order to communicate with other computers. If you want to use
-	  this, read
-	  <file:Documentation/networking/device_drivers/hamradio/z8530drv.rst>
-	  and the AX25-HOWTO, available from
-	  <http://www.tldp.org/docs.html#howto>. Also make sure to say Y
-	  to "Amateur Radio AX.25 Level 2" support.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called scc.
-
-config SCC_DELAY
-	bool "additional delay for PA0HZP OptoSCC compatible boards"
-	depends on SCC
-	help
-	  Say Y here if you experience problems with the SCC driver not
-	  working properly; please read
-	  <file:Documentation/networking/device_drivers/hamradio/z8530drv.rst>
-	  for details.
-
-	  If unsure, say N.
-
-config SCC_TRXECHO
-	bool "support for TRX that feedback the tx signal to rx"
-	depends on SCC
-	help
-	  Some transmitters feed the transmitted signal back to the receive
-	  line.  Say Y here to foil this by explicitly disabling the receiver
-	  during data transmission.
-
-	  If in doubt, say Y.
-
-config BAYCOM_SER_FDX
-	tristate "BAYCOM ser12 fullduplex driver for AX.25"
-	depends on AX25 && HAS_IOPORT
-	select CRC_CCITT
-	help
-	  This is one of two drivers for Baycom style simple amateur radio
-	  modems that connect to a serial interface. The driver supports the
-	  ser12 design in full-duplex mode. In addition, it allows the
-	  baudrate to be set between 300 and 4800 baud (however not all modems
-	  support all baudrates). This is the preferred driver. The next
-	  driver, "BAYCOM ser12 half-duplex driver for AX.25" is the old
-	  driver and still provided in case this driver does not work with
-	  your serial interface chip. To configure the driver, use the sethdlc
-	  utility available in the standard ax25 utilities package.
-	  For more information on the modems, see
-	  <file:Documentation/networking/device_drivers/hamradio/baycom.rst>.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called baycom_ser_fdx.  This is recommended.
-
-config BAYCOM_SER_HDX
-	tristate "BAYCOM ser12 halfduplex driver for AX.25"
-	depends on AX25 && HAS_IOPORT
-	select CRC_CCITT
-	help
-	  This is one of two drivers for Baycom style simple amateur radio
-	  modems that connect to a serial interface. The driver supports the
-	  ser12 design in half-duplex mode. This is the old driver.  It is
-	  still provided in case your serial interface chip does not work with
-	  the full-duplex driver. This driver is deprecated.  To configure
-	  the driver, use the sethdlc utility available in the standard ax25
-	  utilities package. For more information on the modems, see
-	  <file:Documentation/networking/device_drivers/hamradio/baycom.rst>.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called baycom_ser_hdx.  This is recommended.
-
-config BAYCOM_PAR
-	tristate "BAYCOM picpar and par96 driver for AX.25"
-	depends on PARPORT && AX25
-	select CRC_CCITT
-	help
-	  This is a driver for Baycom style simple amateur radio modems that
-	  connect to a parallel interface. The driver supports the picpar and
-	  par96 designs. To configure the driver, use the sethdlc utility
-	  available in the standard ax25 utilities package.
-	  For more information on the modems, see
-	  <file:Documentation/networking/device_drivers/hamradio/baycom.rst>.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called baycom_par.  This is recommended.
-
-config BAYCOM_EPP
-	tristate "BAYCOM epp driver for AX.25"
-	depends on PARPORT && AX25 && !64BIT
-	select CRC_CCITT
-	help
-	  This is a driver for Baycom style simple amateur radio modems that
-	  connect to a parallel interface. The driver supports the EPP
-	  designs. To configure the driver, use the sethdlc utility available
-	  in the standard ax25 utilities package.
-	  For more information on the modems, see
-	  <file:Documentation/networking/device_drivers/hamradio/baycom.rst>.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called baycom_epp.  This is recommended.
-
-config YAM
-	tristate "YAM driver for AX.25"
-	depends on AX25 && HAS_IOPORT
-	help
-	  The YAM is a modem for packet radio which connects to the serial
-	  port and includes some of the functions of a Terminal Node
-	  Controller. If you have one of those, say Y here.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called yam.
-
- 
diff --git a/drivers/net/hamradio/Makefile b/drivers/net/hamradio/Makefile
deleted file mode 100644
index 25fc400369ba..000000000000
--- a/drivers/net/hamradio/Makefile
+++ /dev/null
@@ -1,22 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the Linux AX.25 and HFMODEM device drivers.
-#
-#
-# 19971130 	Moved the amateur radio related network drivers from 
-#		drivers/net/ to drivers/hamradio for easier maintenance.
-#               Joerg Reuter DL1BKE <jreuter@yaina.de>
-#
-# 20000806	Rewritten to use lists instead of if-statements.
-#		Christoph Hellwig <hch@infradead.org>
-#
-
-obj-$(CONFIG_SCC)		+= scc.o
-obj-$(CONFIG_MKISS)		+= mkiss.o
-obj-$(CONFIG_6PACK)		+= 6pack.o
-obj-$(CONFIG_YAM)		+= yam.o
-obj-$(CONFIG_BPQETHER)		+= bpqether.o
-obj-$(CONFIG_BAYCOM_SER_FDX)	+= baycom_ser_fdx.o	hdlcdrv.o
-obj-$(CONFIG_BAYCOM_SER_HDX)	+= baycom_ser_hdx.o	hdlcdrv.o
-obj-$(CONFIG_BAYCOM_PAR)	+= baycom_par.o		hdlcdrv.o
-obj-$(CONFIG_BAYCOM_EPP)	+= baycom_epp.o		hdlcdrv.o
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
deleted file mode 100644
index 5fda7a0fcce0..000000000000
--- a/drivers/net/hamradio/baycom_epp.c
+++ /dev/null
@@ -1,1316 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*****************************************************************************/
-
-/*
- *	baycom_epp.c  -- baycom epp radio modem driver.
- *
- *	Copyright (C) 1998-2000
- *          Thomas Sailer (sailer@ife.ee.ethz.ch)
- *
- *  Please note that the GPL allows you to use the driver, NOT the radio.
- *  In order to use the radio, you need a license from the communications
- *  authority of your country.
- *
- *  History:
- *   0.1  xx.xx.1998  Initial version by Matthias Welwarsky (dg2fef)
- *   0.2  21.04.1998  Massive rework by Thomas Sailer
- *                    Integrated FPGA EPP modem configuration routines
- *   0.3  11.05.1998  Took FPGA config out and moved it into a separate program
- *   0.4  26.07.1999  Adapted to new lowlevel parport driver interface
- *   0.5  03.08.1999  adapt to Linus' new __setup/__initcall
- *                    removed some pre-2.2 kernel compatibility cruft
- *   0.6  10.08.1999  Check if parport can do SPP and is safe to access during interrupt contexts
- *   0.7  12.02.2000  adapted to softnet driver interface
- */
-
-/*****************************************************************************/
-
-#include <linux/crc-ccitt.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/workqueue.h>
-#include <linux/fs.h>
-#include <linux/parport.h>
-#include <linux/if_arp.h>
-#include <linux/hdlcdrv.h>
-#include <linux/baycom.h>
-#include <linux/jiffies.h>
-#include <linux/random.h>
-#include <net/ax25.h> 
-#include <linux/uaccess.h>
-
-/* --------------------------------------------------------------------- */
-
-#define BAYCOM_DEBUG
-#define BAYCOM_MAGIC 19730510
-
-/* --------------------------------------------------------------------- */
-
-static const char paranoia_str[] = KERN_ERR 
-	"baycom_epp: bad magic number for hdlcdrv_state struct in routine %s\n";
-
-static const char bc_drvname[] = "baycom_epp";
-static const char bc_drvinfo[] = KERN_INFO "baycom_epp: (C) 1998-2000 Thomas Sailer, HB9JNX/AE4WA\n"
-"baycom_epp: version 0.7\n";
-
-/* --------------------------------------------------------------------- */
-
-#define NR_PORTS 4
-
-static struct net_device *baycom_device[NR_PORTS];
-
-/* --------------------------------------------------------------------- */
-
-/* EPP status register */
-#define EPP_DCDBIT      0x80
-#define EPP_PTTBIT      0x08
-#define EPP_NREF        0x01
-#define EPP_NRAEF       0x02
-#define EPP_NRHF        0x04
-#define EPP_NTHF        0x20
-#define EPP_NTAEF       0x10
-#define EPP_NTEF        EPP_PTTBIT
-
-/* EPP control register */
-#define EPP_TX_FIFO_ENABLE 0x10
-#define EPP_RX_FIFO_ENABLE 0x08
-#define EPP_MODEM_ENABLE   0x20
-#define EPP_LEDS           0xC0
-#define EPP_IRQ_ENABLE     0x10
-
-/* LPT registers */
-#define LPTREG_ECONTROL       0x402
-#define LPTREG_CONFIGB        0x401
-#define LPTREG_CONFIGA        0x400
-#define LPTREG_EPPDATA        0x004
-#define LPTREG_EPPADDR        0x003
-#define LPTREG_CONTROL        0x002
-#define LPTREG_STATUS         0x001
-#define LPTREG_DATA           0x000
-
-/* LPT control register */
-#define LPTCTRL_PROGRAM       0x04   /* 0 to reprogram */
-#define LPTCTRL_WRITE         0x01
-#define LPTCTRL_ADDRSTB       0x08
-#define LPTCTRL_DATASTB       0x02
-#define LPTCTRL_INTEN         0x10
-
-/* LPT status register */
-#define LPTSTAT_SHIFT_NINTR   6
-#define LPTSTAT_WAIT          0x80
-#define LPTSTAT_NINTR         (1<<LPTSTAT_SHIFT_NINTR)
-#define LPTSTAT_PE            0x20
-#define LPTSTAT_DONE          0x10
-#define LPTSTAT_NERROR        0x08
-#define LPTSTAT_EPPTIMEOUT    0x01
-
-/* LPT data register */
-#define LPTDATA_SHIFT_TDI     0
-#define LPTDATA_SHIFT_TMS     2
-#define LPTDATA_TDI           (1<<LPTDATA_SHIFT_TDI)
-#define LPTDATA_TCK           0x02
-#define LPTDATA_TMS           (1<<LPTDATA_SHIFT_TMS)
-#define LPTDATA_INITBIAS      0x80
-
-
-/* EPP modem config/status bits */
-#define EPP_DCDBIT            0x80
-#define EPP_PTTBIT            0x08
-#define EPP_RXEBIT            0x01
-#define EPP_RXAEBIT           0x02
-#define EPP_RXHFULL           0x04
-
-#define EPP_NTHF              0x20
-#define EPP_NTAEF             0x10
-#define EPP_NTEF              EPP_PTTBIT
-
-#define EPP_TX_FIFO_ENABLE    0x10
-#define EPP_RX_FIFO_ENABLE    0x08
-#define EPP_MODEM_ENABLE      0x20
-#define EPP_LEDS              0xC0
-#define EPP_IRQ_ENABLE        0x10
-
-/* Xilinx 4k JTAG instructions */
-#define XC4K_IRLENGTH   3
-#define XC4K_EXTEST     0
-#define XC4K_PRELOAD    1
-#define XC4K_CONFIGURE  5
-#define XC4K_BYPASS     7
-
-#define EPP_CONVENTIONAL  0
-#define EPP_FPGA          1
-#define EPP_FPGAEXTSTATUS 2
-
-#define TXBUFFER_SIZE     ((HDLCDRV_MAXFLEN*6/5)+8)
-
-/* ---------------------------------------------------------------------- */
-/*
- * Information that need to be kept for each board.
- */
-
-struct baycom_state {
-	int magic;
-
-        struct pardevice *pdev;
-	struct net_device *dev;
-	unsigned int work_running;
-	struct delayed_work run_work;
-	unsigned int modem;
-	unsigned int bitrate;
-	unsigned char stat;
-
-	struct {
-		unsigned int intclk;
-		unsigned int fclk;
-		unsigned int bps;
-		unsigned int extmodem;
-		unsigned int loopback;
-	} cfg;
-
-        struct hdlcdrv_channel_params ch_params;
-
-        struct {
-		unsigned int bitbuf, bitstream, numbits, state;
-		unsigned char *bufptr;
-		int bufcnt;
-		unsigned char buf[TXBUFFER_SIZE];
-        } hdlcrx;
-
-        struct {
-		int calibrate;
-                int slotcnt;
-		int flags;
-		enum { tx_idle = 0, tx_keyup, tx_data, tx_tail } state;
-		unsigned char *bufptr;
-		int bufcnt;
-		unsigned char buf[TXBUFFER_SIZE];
-        } hdlctx;
-
-	unsigned int ptt_keyed;
-	struct sk_buff *skb;  /* next transmit packet  */
-
-#ifdef BAYCOM_DEBUG
-	struct debug_vals {
-		unsigned long last_jiffies;
-		unsigned cur_intcnt;
-		unsigned last_intcnt;
-		int cur_pllcorr;
-		int last_pllcorr;
-		unsigned int mod_cycles;
-		unsigned int demod_cycles;
-	} debug_vals;
-#endif /* BAYCOM_DEBUG */
-};
-
-/* --------------------------------------------------------------------- */
-
-#define KISS_VERBOSE
-
-/* --------------------------------------------------------------------- */
-
-#define PARAM_TXDELAY   1
-#define PARAM_PERSIST   2
-#define PARAM_SLOTTIME  3
-#define PARAM_TXTAIL    4
-#define PARAM_FULLDUP   5
-#define PARAM_HARDWARE  6
-#define PARAM_RETURN    255
-
-/* --------------------------------------------------------------------- */
-/*
- * the CRC routines are stolen from WAMPES
- * by Dieter Deyke
- */
-
-
-/*---------------------------------------------------------------------------*/
-
-#if 0
-static inline void append_crc_ccitt(unsigned char *buffer, int len)
-{
-	unsigned int crc = 0xffff;
-
-	for (;len>0;len--)
-		crc = (crc >> 8) ^ crc_ccitt_table[(crc ^ *buffer++) & 0xff];
-	crc ^= 0xffff;
-	*buffer++ = crc;
-	*buffer++ = crc >> 8;
-}
-#endif
-
-/*---------------------------------------------------------------------------*/
-
-static inline int check_crc_ccitt(const unsigned char *buf, int cnt)
-{
-	return (crc_ccitt(0xffff, buf, cnt) & 0xffff) == 0xf0b8;
-}
-
-/*---------------------------------------------------------------------------*/
-
-static inline int calc_crc_ccitt(const unsigned char *buf, int cnt)
-{
-	return (crc_ccitt(0xffff, buf, cnt) ^ 0xffff) & 0xffff;
-}
-
-/* ---------------------------------------------------------------------- */
-
-#define tenms_to_flags(bc,tenms) ((tenms * bc->bitrate) / 800)
-
-/* --------------------------------------------------------------------- */
-
-static inline void baycom_int_freq(struct baycom_state *bc)
-{
-#ifdef BAYCOM_DEBUG
-	unsigned long cur_jiffies = jiffies;
-	/*
-	 * measure the interrupt frequency
-	 */
-	bc->debug_vals.cur_intcnt++;
-	if (time_after_eq(cur_jiffies, bc->debug_vals.last_jiffies + HZ)) {
-		bc->debug_vals.last_jiffies = cur_jiffies;
-		bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt;
-		bc->debug_vals.cur_intcnt = 0;
-		bc->debug_vals.last_pllcorr = bc->debug_vals.cur_pllcorr;
-		bc->debug_vals.cur_pllcorr = 0;
-	}
-#endif /* BAYCOM_DEBUG */
-}
-
-/* ---------------------------------------------------------------------- */
-/*
- *    eppconfig_path should be setable  via /proc/sys.
- */
-
-static char const eppconfig_path[] = "/usr/sbin/eppfpga";
-
-static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/usr/bin:/bin", NULL };
-
-/* eppconfig: called during ifconfig up to configure the modem */
-static int eppconfig(struct baycom_state *bc)
-{
-	char modearg[256];
-	char portarg[16];
-        char *argv[] = {
-		(char *)eppconfig_path,
-		"-s",
-		"-p", portarg,
-		"-m", modearg,
-		NULL };
-
-	/* set up arguments */
-	sprintf(modearg, "%sclk,%smodem,fclk=%d,bps=%d,divider=%d%s,extstat",
-		bc->cfg.intclk ? "int" : "ext",
-		bc->cfg.extmodem ? "ext" : "int", bc->cfg.fclk, bc->cfg.bps,
-		(bc->cfg.fclk + 8 * bc->cfg.bps) / (16 * bc->cfg.bps),
-		bc->cfg.loopback ? ",loopback" : "");
-	sprintf(portarg, "%ld", bc->pdev->port->base);
-	printk(KERN_DEBUG "%s: %s -s -p %s -m %s\n", bc_drvname, eppconfig_path, portarg, modearg);
-
-	return call_usermodehelper(eppconfig_path, argv, envp, UMH_WAIT_PROC);
-}
-
-/* ---------------------------------------------------------------------- */
-
-static inline void do_kiss_params(struct baycom_state *bc,
-				  unsigned char *data, unsigned long len)
-{
-
-#ifdef KISS_VERBOSE
-#define PKP(a,b) printk(KERN_INFO "baycomm_epp: channel params: " a "\n", b)
-#else /* KISS_VERBOSE */	      
-#define PKP(a,b) 
-#endif /* KISS_VERBOSE */	      
-
-	if (len < 2)
-		return;
-	switch(data[0]) {
-	case PARAM_TXDELAY:
-		bc->ch_params.tx_delay = data[1];
-		PKP("TX delay = %ums", 10 * bc->ch_params.tx_delay);
-		break;
-	case PARAM_PERSIST:   
-		bc->ch_params.ppersist = data[1];
-		PKP("p persistence = %u", bc->ch_params.ppersist);
-		break;
-	case PARAM_SLOTTIME:  
-		bc->ch_params.slottime = data[1];
-		PKP("slot time = %ums", bc->ch_params.slottime);
-		break;
-	case PARAM_TXTAIL:    
-		bc->ch_params.tx_tail = data[1];
-		PKP("TX tail = %ums", bc->ch_params.tx_tail);
-		break;
-	case PARAM_FULLDUP:   
-		bc->ch_params.fulldup = !!data[1];
-		PKP("%s duplex", bc->ch_params.fulldup ? "full" : "half");
-		break;
-	default:
-		break;
-	}
-#undef PKP
-}
-
-/* --------------------------------------------------------------------- */
-
-static void encode_hdlc(struct baycom_state *bc)
-{
-	struct sk_buff *skb;
-	unsigned char *wp, *bp;
-	int pkt_len;
-        unsigned bitstream, notbitstream, bitbuf, numbit, crc;
-	unsigned char crcarr[2];
-	int j;
-	
-	if (bc->hdlctx.bufcnt > 0)
-		return;
-	skb = bc->skb;
-	if (!skb)
-		return;
-	bc->skb = NULL;
-	pkt_len = skb->len-1; /* strip KISS byte */
-	wp = bc->hdlctx.buf;
-	bp = skb->data+1;
-	crc = calc_crc_ccitt(bp, pkt_len);
-	crcarr[0] = crc;
-	crcarr[1] = crc >> 8;
-	*wp++ = 0x7e;
-	bitstream = bitbuf = numbit = 0;
-	while (pkt_len > -2) {
-		bitstream >>= 8;
-		bitstream |= ((unsigned int)*bp) << 8;
-		bitbuf |= ((unsigned int)*bp) << numbit;
-		notbitstream = ~bitstream;
-		bp++;
-		pkt_len--;
-		if (!pkt_len)
-			bp = crcarr;
-		for (j = 0; j < 8; j++)
-			if (unlikely(!(notbitstream & (0x1f0 << j)))) {
-				bitstream &= ~(0x100 << j);
-				bitbuf = (bitbuf & (((2 << j) << numbit) - 1)) |
-					((bitbuf & ~(((2 << j) << numbit) - 1)) << 1);
-				numbit++;
-				notbitstream = ~bitstream;
-			}
-		numbit += 8;
-		while (numbit >= 8) {
-			*wp++ = bitbuf;
-			bitbuf >>= 8;
-			numbit -= 8;
-		}
-	}
-	bitbuf |= 0x7e7e << numbit;
-	numbit += 16;
-	while (numbit >= 8) {
-		*wp++ = bitbuf;
-		bitbuf >>= 8;
-		numbit -= 8;
-	}
-	bc->hdlctx.bufptr = bc->hdlctx.buf;
-	bc->hdlctx.bufcnt = wp - bc->hdlctx.buf;
-	dev_kfree_skb(skb);
-	bc->dev->stats.tx_packets++;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int transmit(struct baycom_state *bc, int cnt, unsigned char stat)
-{
-	struct parport *pp = bc->pdev->port;
-	unsigned char tmp[128];
-	int i, j;
-
-	if (bc->hdlctx.state == tx_tail && !(stat & EPP_PTTBIT))
-		bc->hdlctx.state = tx_idle;
-	if (bc->hdlctx.state == tx_idle && bc->hdlctx.calibrate <= 0) {
-		if (bc->hdlctx.bufcnt <= 0)
-			encode_hdlc(bc);
-		if (bc->hdlctx.bufcnt <= 0)
-			return 0;
-		if (!bc->ch_params.fulldup) {
-			if (!(stat & EPP_DCDBIT)) {
-				bc->hdlctx.slotcnt = bc->ch_params.slottime;
-				return 0;
-			}
-			if ((--bc->hdlctx.slotcnt) > 0)
-				return 0;
-			bc->hdlctx.slotcnt = bc->ch_params.slottime;
-			if (get_random_u8() > bc->ch_params.ppersist)
-				return 0;
-		}
-	}
-	if (bc->hdlctx.state == tx_idle && bc->hdlctx.bufcnt > 0) {
-		bc->hdlctx.state = tx_keyup;
-		bc->hdlctx.flags = tenms_to_flags(bc, bc->ch_params.tx_delay);
-		bc->ptt_keyed++;
-	}
-	while (cnt > 0) {
-		switch (bc->hdlctx.state) {
-		case tx_keyup:
-			i = min_t(int, cnt, bc->hdlctx.flags);
-			cnt -= i;
-			bc->hdlctx.flags -= i;
-			if (bc->hdlctx.flags <= 0)
-				bc->hdlctx.state = tx_data;
-			memset(tmp, 0x7e, sizeof(tmp));
-			while (i > 0) {
-				j = (i > sizeof(tmp)) ? sizeof(tmp) : i;
-				if (j != pp->ops->epp_write_data(pp, tmp, j, 0))
-					return -1;
-				i -= j;
-			}
-			break;
-
-		case tx_data:
-			if (bc->hdlctx.bufcnt <= 0) {
-				encode_hdlc(bc);
-				if (bc->hdlctx.bufcnt <= 0) {
-					bc->hdlctx.state = tx_tail;
-					bc->hdlctx.flags = tenms_to_flags(bc, bc->ch_params.tx_tail);
-					break;
-				}
-			}
-			i = min_t(int, cnt, bc->hdlctx.bufcnt);
-			bc->hdlctx.bufcnt -= i;
-			cnt -= i;
-			if (i != pp->ops->epp_write_data(pp, bc->hdlctx.bufptr, i, 0))
-					return -1;
-			bc->hdlctx.bufptr += i;
-			break;
-			
-		case tx_tail:
-			encode_hdlc(bc);
-			if (bc->hdlctx.bufcnt > 0) {
-				bc->hdlctx.state = tx_data;
-				break;
-			}
-			i = min_t(int, cnt, bc->hdlctx.flags);
-			if (i) {
-				cnt -= i;
-				bc->hdlctx.flags -= i;
-				memset(tmp, 0x7e, sizeof(tmp));
-				while (i > 0) {
-					j = (i > sizeof(tmp)) ? sizeof(tmp) : i;
-					if (j != pp->ops->epp_write_data(pp, tmp, j, 0))
-						return -1;
-					i -= j;
-				}
-				break;
-			}
-			fallthrough;
-
-		default:
-			if (bc->hdlctx.calibrate <= 0)
-				return 0;
-			i = min_t(int, cnt, bc->hdlctx.calibrate);
-			cnt -= i;
-			bc->hdlctx.calibrate -= i;
-			memset(tmp, 0, sizeof(tmp));
-			while (i > 0) {
-				j = (i > sizeof(tmp)) ? sizeof(tmp) : i;
-				if (j != pp->ops->epp_write_data(pp, tmp, j, 0))
-					return -1;
-				i -= j;
-			}
-			break;
-		}
-	}
-	return 0;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static void do_rxpacket(struct net_device *dev)
-{
-	struct baycom_state *bc = netdev_priv(dev);
-	struct sk_buff *skb;
-	unsigned char *cp;
-	unsigned pktlen;
-
-	if (bc->hdlcrx.bufcnt < 4) 
-		return;
-	if (!check_crc_ccitt(bc->hdlcrx.buf, bc->hdlcrx.bufcnt)) 
-		return;
-	pktlen = bc->hdlcrx.bufcnt-2+1; /* KISS kludge */
-	if (!(skb = dev_alloc_skb(pktlen))) {
-		printk("%s: memory squeeze, dropping packet\n", dev->name);
-		dev->stats.rx_dropped++;
-		return;
-	}
-	cp = skb_put(skb, pktlen);
-	*cp++ = 0; /* KISS kludge */
-	memcpy(cp, bc->hdlcrx.buf, pktlen - 1);
-	skb->protocol = ax25_type_trans(skb, dev);
-	netif_rx(skb);
-	dev->stats.rx_packets++;
-}
-
-static int receive(struct net_device *dev, int cnt)
-{
-	struct baycom_state *bc = netdev_priv(dev);
-	struct parport *pp = bc->pdev->port;
-        unsigned int bitbuf, notbitstream, bitstream, numbits, state;
-	unsigned char tmp[128];
-        unsigned char *cp;
-	int cnt2, ret = 0;
-	int j;
-        
-        numbits = bc->hdlcrx.numbits;
-	state = bc->hdlcrx.state;
-	bitstream = bc->hdlcrx.bitstream;
-	bitbuf = bc->hdlcrx.bitbuf;
-	while (cnt > 0) {
-		cnt2 = (cnt > sizeof(tmp)) ? sizeof(tmp) : cnt;
-		cnt -= cnt2;
-		if (cnt2 != pp->ops->epp_read_data(pp, tmp, cnt2, 0)) {
-			ret = -1;
-			break;
-		}
-		cp = tmp;
-		for (; cnt2 > 0; cnt2--, cp++) {
-			bitstream >>= 8;
-			bitstream |= (*cp) << 8;
-			bitbuf >>= 8;
-			bitbuf |= (*cp) << 8;
-			numbits += 8;
-			notbitstream = ~bitstream;
-			for (j = 0; j < 8; j++) {
-
-				/* flag or abort */
-			        if (unlikely(!(notbitstream & (0x0fc << j)))) {
-
-					/* abort received */
-					if (!(notbitstream & (0x1fc << j)))
-						state = 0;
-
-					/* flag received */
-					else if ((bitstream & (0x1fe << j)) == (0x0fc << j)) {
-						if (state)
-							do_rxpacket(dev);
-						bc->hdlcrx.bufcnt = 0;
-						bc->hdlcrx.bufptr = bc->hdlcrx.buf;
-						state = 1;
-						numbits = 7-j;
-					}
-				}
-
-				/* stuffed bit */
-				else if (unlikely((bitstream & (0x1f8 << j)) == (0xf8 << j))) {
-					numbits--;
-					bitbuf = (bitbuf & ((~0xff) << j)) | ((bitbuf & ~((~0xff) << j)) << 1);
-					}
-				}
-			while (state && numbits >= 8) {
-				if (bc->hdlcrx.bufcnt >= TXBUFFER_SIZE) {
-					state = 0;
-				} else {
-					*(bc->hdlcrx.bufptr)++ = bitbuf >> (16-numbits);
-					bc->hdlcrx.bufcnt++;
-					numbits -= 8;
-				}
-			}
-		}
-	}
-        bc->hdlcrx.numbits = numbits;
-	bc->hdlcrx.state = state;
-	bc->hdlcrx.bitstream = bitstream;
-	bc->hdlcrx.bitbuf = bitbuf;
-	return ret;
-}
-
-/* --------------------------------------------------------------------- */
-
-#define GETTICK(x)						\
-({								\
-	x = (unsigned int)get_cycles();				\
-})
-
-static void epp_bh(struct work_struct *work)
-{
-	struct net_device *dev;
-	struct baycom_state *bc;
-	struct parport *pp;
-	unsigned char stat;
-	unsigned char tmp[2];
-	unsigned int time1 = 0, time2 = 0, time3 = 0;
-	int cnt, cnt2;
-
-	bc = container_of(work, struct baycom_state, run_work.work);
-	dev = bc->dev;
-	if (!bc->work_running)
-		return;
-	baycom_int_freq(bc);
-	pp = bc->pdev->port;
-	/* update status */
-	if (pp->ops->epp_read_addr(pp, &stat, 1, 0) != 1)
-		goto epptimeout;
-	bc->stat = stat;
-	bc->debug_vals.last_pllcorr = stat;
-	GETTICK(time1);
-	if (bc->modem == EPP_FPGAEXTSTATUS) {
-		/* get input count */
-		tmp[0] = EPP_TX_FIFO_ENABLE|EPP_RX_FIFO_ENABLE|EPP_MODEM_ENABLE|1;
-		if (pp->ops->epp_write_addr(pp, tmp, 1, 0) != 1)
-			goto epptimeout;
-		if (pp->ops->epp_read_addr(pp, tmp, 2, 0) != 2)
-			goto epptimeout;
-		cnt = tmp[0] | (tmp[1] << 8);
-		cnt &= 0x7fff;
-		/* get output count */
-		tmp[0] = EPP_TX_FIFO_ENABLE|EPP_RX_FIFO_ENABLE|EPP_MODEM_ENABLE|2;
-		if (pp->ops->epp_write_addr(pp, tmp, 1, 0) != 1)
-			goto epptimeout;
-		if (pp->ops->epp_read_addr(pp, tmp, 2, 0) != 2)
-			goto epptimeout;
-		cnt2 = tmp[0] | (tmp[1] << 8);
-		cnt2 = 16384 - (cnt2 & 0x7fff);
-		/* return to normal */
-		tmp[0] = EPP_TX_FIFO_ENABLE|EPP_RX_FIFO_ENABLE|EPP_MODEM_ENABLE;
-		if (pp->ops->epp_write_addr(pp, tmp, 1, 0) != 1)
-			goto epptimeout;
-		if (transmit(bc, cnt2, stat))
-			goto epptimeout;
-		GETTICK(time2);
-		if (receive(dev, cnt))
-			goto epptimeout;
-		if (pp->ops->epp_read_addr(pp, &stat, 1, 0) != 1)
-			goto epptimeout;
-		bc->stat = stat;
-	} else {
-		/* try to tx */
-		switch (stat & (EPP_NTAEF|EPP_NTHF)) {
-		case EPP_NTHF:
-			cnt = 2048 - 256;
-			break;
-		
-		case EPP_NTAEF:
-			cnt = 2048 - 1793;
-			break;
-		
-		case 0:
-			cnt = 0;
-			break;
-		
-		default:
-			cnt = 2048 - 1025;
-			break;
-		}
-		if (transmit(bc, cnt, stat))
-			goto epptimeout;
-		GETTICK(time2);
-		/* do receiver */
-		while ((stat & (EPP_NRAEF|EPP_NRHF)) != EPP_NRHF) {
-			switch (stat & (EPP_NRAEF|EPP_NRHF)) {
-			case EPP_NRAEF:
-				cnt = 1025;
-				break;
-
-			case 0:
-				cnt = 1793;
-				break;
-
-			default:
-				cnt = 256;
-				break;
-			}
-			if (receive(dev, cnt))
-				goto epptimeout;
-			if (pp->ops->epp_read_addr(pp, &stat, 1, 0) != 1)
-				goto epptimeout;
-		}
-		cnt = 0;
-		if (bc->bitrate < 50000)
-			cnt = 256;
-		else if (bc->bitrate < 100000)
-			cnt = 128;
-		while (cnt > 0 && stat & EPP_NREF) {
-			if (receive(dev, 1))
-				goto epptimeout;
-			cnt--;
-			if (pp->ops->epp_read_addr(pp, &stat, 1, 0) != 1)
-				goto epptimeout;
-		}
-	}
-	GETTICK(time3);
-#ifdef BAYCOM_DEBUG
-	bc->debug_vals.mod_cycles = time2 - time1;
-	bc->debug_vals.demod_cycles = time3 - time2;
-#endif /* BAYCOM_DEBUG */
-	schedule_delayed_work(&bc->run_work, 1);
-	if (!bc->skb)
-		netif_wake_queue(dev);
-	return;
- epptimeout:
-	printk(KERN_ERR "%s: EPP timeout!\n", bc_drvname);
-}
-
-/* ---------------------------------------------------------------------- */
-/*
- * ===================== network driver interface =========================
- */
-
-static netdev_tx_t baycom_send_packet(struct sk_buff *skb, struct net_device *dev)
-{
-	struct baycom_state *bc = netdev_priv(dev);
-
-	if (skb->protocol == htons(ETH_P_IP))
-		return ax25_ip_xmit(skb);
-
-	if (skb->data[0] != 0) {
-		do_kiss_params(bc, skb->data, skb->len);
-		dev_kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-	if (bc->skb) {
-		dev_kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-	/* strip KISS byte */
-	if (skb->len >= HDLCDRV_MAXFLEN+1 || skb->len < 3) {
-		dev_kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-	netif_stop_queue(dev);
-	bc->skb = skb;
-	return NETDEV_TX_OK;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int baycom_set_mac_address(struct net_device *dev, void *addr)
-{
-	struct sockaddr *sa = (struct sockaddr *)addr;
-
-	/* addr is an AX.25 shifted ASCII mac address */
-	dev_addr_set(dev, sa->sa_data);
-	return 0;                                         
-}
-
-/* --------------------------------------------------------------------- */
-
-static void epp_wakeup(void *handle)
-{
-        struct net_device *dev = (struct net_device *)handle;
-        struct baycom_state *bc = netdev_priv(dev);
-
-        printk(KERN_DEBUG "baycom_epp: %s: why am I being woken up?\n", dev->name);
-        if (!parport_claim(bc->pdev))
-                printk(KERN_DEBUG "baycom_epp: %s: I'm broken.\n", dev->name);
-}
-
-/* --------------------------------------------------------------------- */
-
-/*
- * Open/initialize the board. This is called (in the current kernel)
- * sometime after booting when the 'ifconfig' program is run.
- *
- * This routine should set everything up anew at each open, even
- * registers that "should" only need to be set once at boot, so that
- * there is non-reboot way to recover if something goes wrong.
- */
-
-static int epp_open(struct net_device *dev)
-{
-	struct baycom_state *bc = netdev_priv(dev);
-        struct parport *pp = parport_find_base(dev->base_addr);
-	unsigned int i, j;
-	unsigned char tmp[128];
-	unsigned char stat;
-	unsigned long tstart;
-	struct pardev_cb par_cb;
-	
-        if (!pp) {
-                printk(KERN_ERR "%s: parport at 0x%lx unknown\n", bc_drvname, dev->base_addr);
-                return -ENXIO;
-        }
-#if 0
-        if (pp->irq < 0) {
-                printk(KERN_ERR "%s: parport at 0x%lx has no irq\n", bc_drvname, pp->base);
-		parport_put_port(pp);
-                return -ENXIO;
-        }
-#endif
-	if ((~pp->modes) & (PARPORT_MODE_TRISTATE | PARPORT_MODE_PCSPP | PARPORT_MODE_SAFEININT)) {
-                printk(KERN_ERR "%s: parport at 0x%lx cannot be used\n",
-		       bc_drvname, pp->base);
-		parport_put_port(pp);
-                return -EIO;
-	}
-	memset(&bc->modem, 0, sizeof(bc->modem));
-	memset(&par_cb, 0, sizeof(par_cb));
-	par_cb.wakeup = epp_wakeup;
-	par_cb.private = (void *)dev;
-	par_cb.flags = PARPORT_DEV_EXCL;
-	for (i = 0; i < NR_PORTS; i++)
-		if (baycom_device[i] == dev)
-			break;
-
-	if (i == NR_PORTS) {
-		pr_err("%s: no device found\n", bc_drvname);
-		parport_put_port(pp);
-		return -ENODEV;
-	}
-
-	bc->pdev = parport_register_dev_model(pp, dev->name, &par_cb, i);
-	parport_put_port(pp);
-        if (!bc->pdev) {
-                printk(KERN_ERR "%s: cannot register parport at 0x%lx\n", bc_drvname, pp->base);
-                return -ENXIO;
-        }
-        if (parport_claim(bc->pdev)) {
-                printk(KERN_ERR "%s: parport at 0x%lx busy\n", bc_drvname, pp->base);
-                parport_unregister_device(bc->pdev);
-                return -EBUSY;
-        }
-        dev->irq = /*pp->irq*/ 0;
-	INIT_DELAYED_WORK(&bc->run_work, epp_bh);
-	bc->work_running = 1;
-	bc->modem = EPP_CONVENTIONAL;
-	if (eppconfig(bc))
-		printk(KERN_INFO "%s: no FPGA detected, assuming conventional EPP modem\n", bc_drvname);
-	else
-		bc->modem = /*EPP_FPGA*/ EPP_FPGAEXTSTATUS;
-	parport_write_control(pp, LPTCTRL_PROGRAM); /* prepare EPP mode; we aren't using interrupts */
-	/* reset the modem */
-	tmp[0] = 0;
-	tmp[1] = EPP_TX_FIFO_ENABLE|EPP_RX_FIFO_ENABLE|EPP_MODEM_ENABLE;
-	if (pp->ops->epp_write_addr(pp, tmp, 2, 0) != 2)
-		goto epptimeout;
-	/* autoprobe baud rate */
-	tstart = jiffies;
-	i = 0;
-	while (time_before(jiffies, tstart + HZ/3)) {
-		if (pp->ops->epp_read_addr(pp, &stat, 1, 0) != 1)
-			goto epptimeout;
-		if ((stat & (EPP_NRAEF|EPP_NRHF)) == EPP_NRHF) {
-			schedule();
-			continue;
-		}
-		if (pp->ops->epp_read_data(pp, tmp, 128, 0) != 128)
-			goto epptimeout;
-		if (pp->ops->epp_read_data(pp, tmp, 128, 0) != 128)
-			goto epptimeout;
-		i += 256;
-	}
-	for (j = 0; j < 256; j++) {
-		if (pp->ops->epp_read_addr(pp, &stat, 1, 0) != 1)
-			goto epptimeout;
-		if (!(stat & EPP_NREF))
-			break;
-		if (pp->ops->epp_read_data(pp, tmp, 1, 0) != 1)
-			goto epptimeout;
-		i++;
-	}
-	tstart = jiffies - tstart;
-	bc->bitrate = i * (8 * HZ) / tstart;
-	j = 1;
-	i = bc->bitrate >> 3;
-	while (j < 7 && i > 150) {
-		j++;
-		i >>= 1;
-	}
-	printk(KERN_INFO "%s: autoprobed bitrate: %d  int divider: %d  int rate: %d\n", 
-	       bc_drvname, bc->bitrate, j, bc->bitrate >> (j+2));
-	tmp[0] = EPP_TX_FIFO_ENABLE|EPP_RX_FIFO_ENABLE|EPP_MODEM_ENABLE/*|j*/;
-	if (pp->ops->epp_write_addr(pp, tmp, 1, 0) != 1)
-		goto epptimeout;
-	/*
-	 * initialise hdlc variables
-	 */
-	bc->hdlcrx.state = 0;
-	bc->hdlcrx.numbits = 0;
-	bc->hdlctx.state = tx_idle;
-	bc->hdlctx.bufcnt = 0;
-	bc->hdlctx.slotcnt = bc->ch_params.slottime;
-	bc->hdlctx.calibrate = 0;
-	/* start the bottom half stuff */
-	schedule_delayed_work(&bc->run_work, 1);
-	netif_start_queue(dev);
-	return 0;
-
- epptimeout:
-	printk(KERN_ERR "%s: epp timeout during bitrate probe\n", bc_drvname);
-	parport_write_control(pp, 0); /* reset the adapter */
-        parport_release(bc->pdev);
-        parport_unregister_device(bc->pdev);
-	return -EIO;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int epp_close(struct net_device *dev)
-{
-	struct baycom_state *bc = netdev_priv(dev);
-	struct parport *pp = bc->pdev->port;
-	unsigned char tmp[1];
-
-	bc->work_running = 0;
-	cancel_delayed_work_sync(&bc->run_work);
-	bc->stat = EPP_DCDBIT;
-	tmp[0] = 0;
-	pp->ops->epp_write_addr(pp, tmp, 1, 0);
-	parport_write_control(pp, 0); /* reset the adapter */
-        parport_release(bc->pdev);
-        parport_unregister_device(bc->pdev);
-	dev_kfree_skb(bc->skb);
-	bc->skb = NULL;
-	printk(KERN_INFO "%s: close epp at iobase 0x%lx irq %u\n",
-	       bc_drvname, dev->base_addr, dev->irq);
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int baycom_setmode(struct baycom_state *bc, const char *modestr)
-{
-	const char *cp;
-
-	if (strstr(modestr,"intclk"))
-		bc->cfg.intclk = 1;
-	if (strstr(modestr,"extclk"))
-		bc->cfg.intclk = 0;
-	if (strstr(modestr,"intmodem"))
-		bc->cfg.extmodem = 0;
-	if (strstr(modestr,"extmodem"))
-		bc->cfg.extmodem = 1;
-	if (strstr(modestr,"loopback"))
-		bc->cfg.loopback = 1;
-	if (strstr(modestr, "noloopback"))
-		bc->cfg.loopback = 0;
-	if ((cp = strstr(modestr,"fclk="))) {
-		bc->cfg.fclk = simple_strtoul(cp+5, NULL, 0);
-		if (bc->cfg.fclk < 1000000)
-			bc->cfg.fclk = 1000000;
-		if (bc->cfg.fclk > 25000000)
-			bc->cfg.fclk = 25000000;
-	}
-	if ((cp = strstr(modestr,"bps="))) {
-		bc->cfg.bps = simple_strtoul(cp+4, NULL, 0);
-		if (bc->cfg.bps < 1000)
-			bc->cfg.bps = 1000;
-		if (bc->cfg.bps > 1500000)
-			bc->cfg.bps = 1500000;
-	}
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int baycom_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
-				 void __user *data, int cmd)
-{
-	struct baycom_state *bc = netdev_priv(dev);
-	struct hdlcdrv_ioctl hi;
-
-	if (cmd != SIOCDEVPRIVATE)
-		return -ENOIOCTLCMD;
-
-	if (copy_from_user(&hi, data, sizeof(hi)))
-		return -EFAULT;
-	switch (hi.cmd) {
-	default:
-		return -ENOIOCTLCMD;
-
-	case HDLCDRVCTL_GETCHANNELPAR:
-		hi.data.cp.tx_delay = bc->ch_params.tx_delay;
-		hi.data.cp.tx_tail = bc->ch_params.tx_tail;
-		hi.data.cp.slottime = bc->ch_params.slottime;
-		hi.data.cp.ppersist = bc->ch_params.ppersist;
-		hi.data.cp.fulldup = bc->ch_params.fulldup;
-		break;
-
-	case HDLCDRVCTL_SETCHANNELPAR:
-		if (!capable(CAP_NET_ADMIN))
-			return -EACCES;
-		bc->ch_params.tx_delay = hi.data.cp.tx_delay;
-		bc->ch_params.tx_tail = hi.data.cp.tx_tail;
-		bc->ch_params.slottime = hi.data.cp.slottime;
-		bc->ch_params.ppersist = hi.data.cp.ppersist;
-		bc->ch_params.fulldup = hi.data.cp.fulldup;
-		bc->hdlctx.slotcnt = 1;
-		return 0;
-		
-	case HDLCDRVCTL_GETMODEMPAR:
-		hi.data.mp.iobase = dev->base_addr;
-		hi.data.mp.irq = dev->irq;
-		hi.data.mp.dma = dev->dma;
-		hi.data.mp.dma2 = 0;
-		hi.data.mp.seriobase = 0;
-		hi.data.mp.pariobase = 0;
-		hi.data.mp.midiiobase = 0;
-		break;
-
-	case HDLCDRVCTL_SETMODEMPAR:
-		if ((!capable(CAP_SYS_RAWIO)) || netif_running(dev))
-			return -EACCES;
-		dev->base_addr = hi.data.mp.iobase;
-		dev->irq = /*hi.data.mp.irq*/0;
-		dev->dma = /*hi.data.mp.dma*/0;
-		return 0;	
-		
-	case HDLCDRVCTL_GETSTAT:
-		hi.data.cs.ptt = !!(bc->stat & EPP_PTTBIT);
-		hi.data.cs.dcd = !(bc->stat & EPP_DCDBIT);
-		hi.data.cs.ptt_keyed = bc->ptt_keyed;
-		hi.data.cs.tx_packets = dev->stats.tx_packets;
-		hi.data.cs.tx_errors = dev->stats.tx_errors;
-		hi.data.cs.rx_packets = dev->stats.rx_packets;
-		hi.data.cs.rx_errors = dev->stats.rx_errors;
-		break;		
-
-	case HDLCDRVCTL_OLDGETSTAT:
-		hi.data.ocs.ptt = !!(bc->stat & EPP_PTTBIT);
-		hi.data.ocs.dcd = !(bc->stat & EPP_DCDBIT);
-		hi.data.ocs.ptt_keyed = bc->ptt_keyed;
-		break;		
-
-	case HDLCDRVCTL_CALIBRATE:
-		if (!capable(CAP_SYS_RAWIO))
-			return -EACCES;
-		bc->hdlctx.calibrate = hi.data.calibrate * bc->bitrate / 8;
-		return 0;
-
-	case HDLCDRVCTL_DRIVERNAME:
-		strscpy_pad(hi.data.drivername, "baycom_epp");
-		break;
-		
-	case HDLCDRVCTL_GETMODE:
-		sprintf(hi.data.modename, "%sclk,%smodem,fclk=%d,bps=%d%s", 
-			bc->cfg.intclk ? "int" : "ext",
-			bc->cfg.extmodem ? "ext" : "int", bc->cfg.fclk, bc->cfg.bps,
-			bc->cfg.loopback ? ",loopback" : "");
-		break;
-
-	case HDLCDRVCTL_SETMODE:
-		if (!capable(CAP_NET_ADMIN) || netif_running(dev))
-			return -EACCES;
-		hi.data.modename[sizeof(hi.data.modename)-1] = '\0';
-		return baycom_setmode(bc, hi.data.modename);
-
-	case HDLCDRVCTL_MODELIST:
-		strscpy_pad(hi.data.modename, "intclk,extclk,intmodem,extmodem,divider=x");
-		break;
-
-	case HDLCDRVCTL_MODEMPARMASK:
-		return HDLCDRV_PARMASK_IOBASE;
-
-	}
-	if (copy_to_user(data, &hi, sizeof(hi)))
-		return -EFAULT;
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-
-static const struct net_device_ops baycom_netdev_ops = {
-	.ndo_open	     = epp_open,
-	.ndo_stop	     = epp_close,
-	.ndo_siocdevprivate  = baycom_siocdevprivate,
-	.ndo_start_xmit      = baycom_send_packet,
-	.ndo_set_mac_address = baycom_set_mac_address,
-};
-
-/*
- * Check for a network adaptor of this type, and return '0' if one exists.
- * If dev->base_addr == 0, probe all likely locations.
- * If dev->base_addr == 1, always return failure.
- * If dev->base_addr == 2, allocate space for the device and return success
- * (detachable devices only).
- */
-static void baycom_probe(struct net_device *dev)
-{
-	const struct hdlcdrv_channel_params dflt_ch_params = { 
-		20, 2, 10, 40, 0 
-	};
-	struct baycom_state *bc;
-
-	/*
-	 * not a real probe! only initialize data structures
-	 */
-	bc = netdev_priv(dev);
-	/*
-	 * initialize the baycom_state struct
-	 */
-	bc->ch_params = dflt_ch_params;
-	bc->ptt_keyed = 0;
-
-	/*
-	 * initialize the device struct
-	 */
-
-	/* Fill in the fields of the device structure */
-	bc->skb = NULL;
-	
-	dev->netdev_ops = &baycom_netdev_ops;
-	dev->header_ops = &ax25_header_ops;
-	
-	dev->type = ARPHRD_AX25;           /* AF_AX25 device */
-	dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN;
-	dev->mtu = AX25_DEF_PACLEN;        /* eth_mtu is the default */
-	dev->addr_len = AX25_ADDR_LEN;     /* sizeof an ax.25 address */
-	memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN);
-	dev_addr_set(dev, (u8 *)&null_ax25_address);
-	dev->tx_queue_len = 16;
-
-	/* New style flags */
-	dev->flags = 0;
-}
-
-/* --------------------------------------------------------------------- */
-
-/*
- * command line settable parameters
- */
-static char *mode[NR_PORTS] = { "", };
-static int iobase[NR_PORTS] = { 0x378, };
-
-module_param_array(mode, charp, NULL, 0);
-MODULE_PARM_DESC(mode, "baycom operating mode");
-module_param_hw_array(iobase, int, ioport, NULL, 0);
-MODULE_PARM_DESC(iobase, "baycom io base address");
-
-MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu");
-MODULE_DESCRIPTION("Baycom epp amateur radio modem driver");
-MODULE_LICENSE("GPL");
-
-/* --------------------------------------------------------------------- */
-
-static int baycom_epp_par_probe(struct pardevice *par_dev)
-{
-	struct device_driver *drv = par_dev->dev.driver;
-	int len = strlen(drv->name);
-
-	if (strncmp(par_dev->name, drv->name, len))
-		return -ENODEV;
-
-	return 0;
-}
-
-static struct parport_driver baycom_epp_par_driver = {
-	.name = "bce",
-	.probe = baycom_epp_par_probe,
-};
-
-static void __init baycom_epp_dev_setup(struct net_device *dev)
-{
-	struct baycom_state *bc = netdev_priv(dev);
-
-	/*
-	 * initialize part of the baycom_state struct
-	 */
-	bc->dev = dev;
-	bc->magic = BAYCOM_MAGIC;
-	bc->cfg.fclk = 19666600;
-	bc->cfg.bps = 9600;
-	/*
-	 * initialize part of the device struct
-	 */
-	baycom_probe(dev);
-}
-
-static int __init init_baycomepp(void)
-{
-	int i, found = 0, ret;
-	char set_hw = 1;
-
-	printk(bc_drvinfo);
-
-	ret = parport_register_driver(&baycom_epp_par_driver);
-	if (ret)
-		return ret;
-
-	/*
-	 * register net devices
-	 */
-	for (i = 0; i < NR_PORTS; i++) {
-		struct net_device *dev;
-		
-		dev = alloc_netdev(sizeof(struct baycom_state), "bce%d",
-				   NET_NAME_UNKNOWN, baycom_epp_dev_setup);
-
-		if (!dev) {
-			printk(KERN_WARNING "bce%d : out of memory\n", i);
-			return found ? 0 : -ENOMEM;
-		}
-			
-		sprintf(dev->name, "bce%d", i);
-		dev->base_addr = iobase[i];
-
-		if (!mode[i])
-			set_hw = 0;
-		if (!set_hw)
-			iobase[i] = 0;
-
-		if (register_netdev(dev)) {
-			printk(KERN_WARNING "%s: cannot register net device %s\n", bc_drvname, dev->name);
-			free_netdev(dev);
-			break;
-		}
-		if (set_hw && baycom_setmode(netdev_priv(dev), mode[i]))
-			set_hw = 0;
-		baycom_device[i] = dev;
-		found++;
-	}
-
-	if (found == 0) {
-		parport_unregister_driver(&baycom_epp_par_driver);
-		return -ENXIO;
-	}
-
-	return 0;
-}
-
-static void __exit cleanup_baycomepp(void)
-{
-	int i;
-
-	for(i = 0; i < NR_PORTS; i++) {
-		struct net_device *dev = baycom_device[i];
-
-		if (dev) {
-			struct baycom_state *bc = netdev_priv(dev);
-			if (bc->magic == BAYCOM_MAGIC) {
-				unregister_netdev(dev);
-				free_netdev(dev);
-			} else
-				printk(paranoia_str, "cleanup_module");
-		}
-	}
-	parport_unregister_driver(&baycom_epp_par_driver);
-}
-
-module_init(init_baycomepp);
-module_exit(cleanup_baycomepp);
-
-/* --------------------------------------------------------------------- */
-
-#ifndef MODULE
-
-/*
- * format: baycom_epp=io,mode
- * mode: fpga config options
- */
-
-static int __init baycom_epp_setup(char *str)
-{
-        static unsigned __initdata nr_dev = 0;
-	int ints[2];
-
-        if (nr_dev >= NR_PORTS)
-                return 0;
-	str = get_options(str, 2, ints);
-	if (ints[0] < 1)
-		return 0;
-	mode[nr_dev] = str;
-	iobase[nr_dev] = ints[1];
-	nr_dev++;
-	return 1;
-}
-
-__setup("baycom_epp=", baycom_epp_setup);
-
-#endif /* MODULE */
-/* --------------------------------------------------------------------- */
diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c
deleted file mode 100644
index f03797103c6a..000000000000
--- a/drivers/net/hamradio/baycom_par.c
+++ /dev/null
@@ -1,598 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*****************************************************************************/
-
-/*
- *	baycom_par.c  -- baycom par96 and picpar radio modem driver.
- *
- *	Copyright (C) 1996-2000  Thomas Sailer (sailer@ife.ee.ethz.ch)
- *
- *  Please note that the GPL allows you to use the driver, NOT the radio.
- *  In order to use the radio, you need a license from the communications
- *  authority of your country.
- *
- *  Supported modems
- *
- *  par96:  This is a modem for 9600 baud FSK compatible to the G3RUH standard.
- *          The modem does all the filtering and regenerates the receiver clock.
- *          Data is transferred from and to the PC via a shift register.
- *          The shift register is filled with 16 bits and an interrupt is
- *          signalled. The PC then empties the shift register in a burst. This
- *          modem connects to the parallel port, hence the name. The modem
- *          leaves the implementation of the HDLC protocol and the scrambler
- *          polynomial to the PC. This modem is no longer available (at least
- *          from Baycom) and has been replaced by the PICPAR modem (see below).
- *          You may however still build one from the schematics published in
- *          cq-DL :-).
- *
- *  picpar: This is a redesign of the par96 modem by Henning Rech, DF9IC. The
- *          modem is protocol compatible to par96, but uses only three low
- *          power ICs and can therefore be fed from the parallel port and
- *          does not require an additional power supply. It features
- *          built in DCD circuitry. The driver should therefore be configured
- *          for hardware DCD.
- *
- *  Command line options (insmod command line)
- *
- *  mode     driver mode string. Valid choices are par96 and picpar.
- *  iobase   base address of the port; common values are 0x378, 0x278, 0x3bc
- *
- *  History:
- *   0.1  26.06.1996  Adapted from baycom.c and made network driver interface
- *        18.10.1996  Changed to new user space access routines (copy_{to,from}_user)
- *   0.3  26.04.1997  init code/data tagged
- *   0.4  08.07.1997  alternative ser12 decoding algorithm (uses delta CTS ints)
- *   0.5  11.11.1997  split into separate files for ser12/par96
- *   0.6  03.08.1999  adapt to Linus' new __setup/__initcall
- *                    removed some pre-2.2 kernel compatibility cruft
- *   0.7  10.08.1999  Check if parport can do SPP and is safe to access during interrupt contexts
- *   0.8  12.02.2000  adapted to softnet driver interface
- *                    removed direct parport access, uses parport driver methods
- *   0.9  03.07.2000  fix interface name handling
- */
-
-/*****************************************************************************/
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/fcntl.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/in.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <linux/hdlcdrv.h>
-#include <linux/baycom.h>
-#include <linux/parport.h>
-#include <linux/bitops.h>
-#include <linux/jiffies.h>
-
-#include <linux/uaccess.h>
-
-/* --------------------------------------------------------------------- */
-
-#define BAYCOM_DEBUG
-
-/*
- * modem options; bit mask
- */
-#define BAYCOM_OPTIONS_SOFTDCD  1
-
-/* --------------------------------------------------------------------- */
-
-static const char bc_drvname[] = "baycom_par";
-static const char bc_drvinfo[] = KERN_INFO "baycom_par: (C) 1996-2000 Thomas Sailer, HB9JNX/AE4WA\n"
-"baycom_par: version 0.9\n";
-
-/* --------------------------------------------------------------------- */
-
-#define NR_PORTS 4
-
-static struct net_device *baycom_device[NR_PORTS];
-
-/* --------------------------------------------------------------------- */
-
-#define PAR96_BURSTBITS 16
-#define PAR96_BURST     4
-#define PAR96_PTT       2
-#define PAR96_TXBIT     1
-#define PAR96_ACK       0x40
-#define PAR96_RXBIT     0x20
-#define PAR96_DCD       0x10
-#define PAR97_POWER     0xf8
-
-/* ---------------------------------------------------------------------- */
-/*
- * Information that need to be kept for each board.
- */
-
-struct baycom_state {
-	struct hdlcdrv_state hdrv;
-
-	struct pardevice *pdev;
-	unsigned int options;
-
-	struct modem_state {
-		short arb_divider;
-		unsigned char flags;
-		unsigned int shreg;
-		struct modem_state_par96 {
-			int dcd_count;
-			unsigned int dcd_shreg;
-			unsigned long descram;
-			unsigned long scram;
-		} par96;
-	} modem;
-
-#ifdef BAYCOM_DEBUG
-	struct debug_vals {
-		unsigned long last_jiffies;
-		unsigned cur_intcnt;
-		unsigned last_intcnt;
-		int cur_pllcorr;
-		int last_pllcorr;
-	} debug_vals;
-#endif /* BAYCOM_DEBUG */
-};
-
-/* --------------------------------------------------------------------- */
-
-static inline void baycom_int_freq(struct baycom_state *bc)
-{
-#ifdef BAYCOM_DEBUG
-	unsigned long cur_jiffies = jiffies;
-	/*
-	 * measure the interrupt frequency
-	 */
-	bc->debug_vals.cur_intcnt++;
-	if (time_after_eq(cur_jiffies, bc->debug_vals.last_jiffies + HZ)) {
-		bc->debug_vals.last_jiffies = cur_jiffies;
-		bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt;
-		bc->debug_vals.cur_intcnt = 0;
-		bc->debug_vals.last_pllcorr = bc->debug_vals.cur_pllcorr;
-		bc->debug_vals.cur_pllcorr = 0;
-	}
-#endif /* BAYCOM_DEBUG */
-}
-
-/* --------------------------------------------------------------------- */
-/*
- * ===================== PAR96 specific routines =========================
- */
-
-#define PAR96_DESCRAM_TAP1 0x20000
-#define PAR96_DESCRAM_TAP2 0x01000
-#define PAR96_DESCRAM_TAP3 0x00001
-
-#define PAR96_DESCRAM_TAPSH1 17
-#define PAR96_DESCRAM_TAPSH2 12
-#define PAR96_DESCRAM_TAPSH3 0
-
-#define PAR96_SCRAM_TAP1 0x20000 /* X^17 */
-#define PAR96_SCRAM_TAPN 0x00021 /* X^0+X^5 */
-
-/* --------------------------------------------------------------------- */
-
-static inline void par96_tx(struct net_device *dev, struct baycom_state *bc)
-{
-	int i;
-	unsigned int data = hdlcdrv_getbits(&bc->hdrv);
-	struct parport *pp = bc->pdev->port;
-
-	for(i = 0; i < PAR96_BURSTBITS; i++, data >>= 1) {
-		unsigned char val = PAR97_POWER;
-		bc->modem.par96.scram = ((bc->modem.par96.scram << 1) |
-					 (bc->modem.par96.scram & 1));
-		if (!(data & 1))
-			bc->modem.par96.scram ^= 1;
-		if (bc->modem.par96.scram & (PAR96_SCRAM_TAP1 << 1))
-			bc->modem.par96.scram ^=
-				(PAR96_SCRAM_TAPN << 1);
-		if (bc->modem.par96.scram & (PAR96_SCRAM_TAP1 << 2))
-			val |= PAR96_TXBIT;
-		pp->ops->write_data(pp, val);
-		pp->ops->write_data(pp, val | PAR96_BURST);
-	}
-}
-
-/* --------------------------------------------------------------------- */
-
-static inline void par96_rx(struct net_device *dev, struct baycom_state *bc)
-{
-	int i;
-	unsigned int data, mask, mask2, descx;
-	struct parport *pp = bc->pdev->port;
-
-	/*
-	 * do receiver; differential decode and descramble on the fly
-	 */
-	for(data = i = 0; i < PAR96_BURSTBITS; i++) {
-		bc->modem.par96.descram = (bc->modem.par96.descram << 1);
-		if (pp->ops->read_status(pp) & PAR96_RXBIT)
-			bc->modem.par96.descram |= 1;
-		descx = bc->modem.par96.descram ^
-			(bc->modem.par96.descram >> 1);
-		/* now the diff decoded data is inverted in descram */
-		pp->ops->write_data(pp, PAR97_POWER | PAR96_PTT);
-		descx ^= ((descx >> PAR96_DESCRAM_TAPSH1) ^
-			  (descx >> PAR96_DESCRAM_TAPSH2));
-		data >>= 1;
-		if (!(descx & 1))
-			data |= 0x8000;
-		pp->ops->write_data(pp, PAR97_POWER | PAR96_PTT | PAR96_BURST);
-	}
-	hdlcdrv_putbits(&bc->hdrv, data);
-	/*
-	 * do DCD algorithm
-	 */
-	if (bc->options & BAYCOM_OPTIONS_SOFTDCD) {
-		bc->modem.par96.dcd_shreg = (bc->modem.par96.dcd_shreg >> 16)
-			| (data << 16);
-		/* search for flags and set the dcd counter appropriately */
-		for(mask = 0x1fe00, mask2 = 0xfc00, i = 0;
-		    i < PAR96_BURSTBITS; i++, mask <<= 1, mask2 <<= 1)
-			if ((bc->modem.par96.dcd_shreg & mask) == mask2)
-				bc->modem.par96.dcd_count = HDLCDRV_MAXFLEN+4;
-		/* check for abort/noise sequences */
-		for(mask = 0x1fe00, mask2 = 0x1fe00, i = 0;
-		    i < PAR96_BURSTBITS; i++, mask <<= 1, mask2 <<= 1)
-			if (((bc->modem.par96.dcd_shreg & mask) == mask2) &&
-			    (bc->modem.par96.dcd_count >= 0))
-				bc->modem.par96.dcd_count -= HDLCDRV_MAXFLEN-10;
-		/* decrement and set the dcd variable */
-		if (bc->modem.par96.dcd_count >= 0)
-			bc->modem.par96.dcd_count -= 2;
-		hdlcdrv_setdcd(&bc->hdrv, bc->modem.par96.dcd_count > 0);
-	} else {
-		hdlcdrv_setdcd(&bc->hdrv, !!(pp->ops->read_status(pp) & PAR96_DCD));
-	}
-}
-
-/* --------------------------------------------------------------------- */
-
-static void par96_interrupt(void *dev_id)
-{
-	struct net_device *dev = dev_id;
-	struct baycom_state *bc = netdev_priv(dev);
-
-	baycom_int_freq(bc);
-	/*
-	 * check if transmitter active
-	 */
-	if (hdlcdrv_ptt(&bc->hdrv))
-		par96_tx(dev, bc);
-	else {
-		par96_rx(dev, bc);
-		if (--bc->modem.arb_divider <= 0) {
-			bc->modem.arb_divider = 6;
-			local_irq_enable();
-			hdlcdrv_arbitrate(dev, &bc->hdrv);
-		}
-	}
-	local_irq_enable();
-	hdlcdrv_transmitter(dev, &bc->hdrv);
-	hdlcdrv_receiver(dev, &bc->hdrv);
-        local_irq_disable();
-}
-
-/* --------------------------------------------------------------------- */
-
-static void par96_wakeup(void *handle)
-{
-        struct net_device *dev = (struct net_device *)handle;
-	struct baycom_state *bc = netdev_priv(dev);
-
-	printk(KERN_DEBUG "baycom_par: %s: why am I being woken up?\n", dev->name);
-	if (!parport_claim(bc->pdev))
-		printk(KERN_DEBUG "baycom_par: %s: I'm broken.\n", dev->name);
-}
-
-/* --------------------------------------------------------------------- */
-
-static int par96_open(struct net_device *dev)
-{
-	struct baycom_state *bc = netdev_priv(dev);
-	struct pardev_cb par_cb;
-	struct parport *pp;
-	int i;
-
-	if (!dev || !bc)
-		return -ENXIO;
-	pp = parport_find_base(dev->base_addr);
-	if (!pp) {
-		printk(KERN_ERR "baycom_par: parport at 0x%lx unknown\n", dev->base_addr);
-		return -ENXIO;
-	}
-	if (pp->irq < 0) {
-		printk(KERN_ERR "baycom_par: parport at 0x%lx has no irq\n", pp->base);
-		parport_put_port(pp);
-		return -ENXIO;
-	}
-	if ((~pp->modes) & (PARPORT_MODE_PCSPP | PARPORT_MODE_SAFEININT)) {
-		printk(KERN_ERR "baycom_par: parport at 0x%lx cannot be used\n", pp->base);
-		parport_put_port(pp);
-		return -ENXIO;
-	}
-	memset(&bc->modem, 0, sizeof(bc->modem));
-	bc->hdrv.par.bitrate = 9600;
-	memset(&par_cb, 0, sizeof(par_cb));
-	par_cb.wakeup = par96_wakeup;
-	par_cb.irq_func = par96_interrupt;
-	par_cb.private = (void *)dev;
-	par_cb.flags = PARPORT_DEV_EXCL;
-	for (i = 0; i < NR_PORTS; i++)
-		if (baycom_device[i] == dev)
-			break;
-
-	if (i == NR_PORTS) {
-		pr_err("%s: no device found\n", bc_drvname);
-		parport_put_port(pp);
-		return -ENODEV;
-	}
-	bc->pdev = parport_register_dev_model(pp, dev->name, &par_cb, i);
-	parport_put_port(pp);
-	if (!bc->pdev) {
-		printk(KERN_ERR "baycom_par: cannot register parport at 0x%lx\n", dev->base_addr);
-		return -ENXIO;
-	}
-	if (parport_claim(bc->pdev)) {
-		printk(KERN_ERR "baycom_par: parport at 0x%lx busy\n", pp->base);
-		parport_unregister_device(bc->pdev);
-		return -EBUSY;
-	}
-	pp = bc->pdev->port;
-	dev->irq = pp->irq;
-	pp->ops->data_forward(pp);
-        bc->hdrv.par.bitrate = 9600;
-	pp->ops->write_data(pp, PAR96_PTT | PAR97_POWER); /* switch off PTT */
-	pp->ops->enable_irq(pp);
-	printk(KERN_INFO "%s: par96 at iobase 0x%lx irq %u options 0x%x\n",
-	       bc_drvname, dev->base_addr, dev->irq, bc->options);
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int par96_close(struct net_device *dev)
-{
-	struct baycom_state *bc = netdev_priv(dev);
-	struct parport *pp;
-
-	if (!dev || !bc)
-		return -EINVAL;
-	pp = bc->pdev->port;
-	/* disable interrupt */
-	pp->ops->disable_irq(pp);
-	/* switch off PTT */
-	pp->ops->write_data(pp, PAR96_PTT | PAR97_POWER);
-	parport_release(bc->pdev);
-	parport_unregister_device(bc->pdev);
-	printk(KERN_INFO "%s: close par96 at iobase 0x%lx irq %u\n",
-	       bc_drvname, dev->base_addr, dev->irq);
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-/*
- * ===================== hdlcdrv driver interface =========================
- */
-
-static int baycom_ioctl(struct net_device *dev, void __user *data,
-			struct hdlcdrv_ioctl *hi, int cmd);
-
-/* --------------------------------------------------------------------- */
-
-static const struct hdlcdrv_ops par96_ops = {
-	.drvname = bc_drvname,
-	.drvinfo = bc_drvinfo,
-	.open    = par96_open,
-	.close   = par96_close,
-	.ioctl   = baycom_ioctl
-};
-
-/* --------------------------------------------------------------------- */
-
-static int baycom_setmode(struct baycom_state *bc, const char *modestr)
-{
-	if (!strncmp(modestr, "picpar", 6))
-		bc->options = 0;
-	else if (!strncmp(modestr, "par96", 5))
-		bc->options = BAYCOM_OPTIONS_SOFTDCD;
-	else
-		bc->options = !!strchr(modestr, '*');
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int baycom_ioctl(struct net_device *dev, void __user *data,
-			struct hdlcdrv_ioctl *hi, int cmd)
-{
-	struct baycom_state *bc;
-	struct baycom_ioctl bi;
-
-	if (!dev)
-		return -EINVAL;
-
-	bc = netdev_priv(dev);
-	BUG_ON(bc->hdrv.magic != HDLCDRV_MAGIC);
-
-	if (cmd != SIOCDEVPRIVATE)
-		return -ENOIOCTLCMD;
-	switch (hi->cmd) {
-	default:
-		break;
-
-	case HDLCDRVCTL_GETMODE:
-		strscpy(hi->data.modename, bc->options ? "par96" : "picpar");
-		if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
-			return -EFAULT;
-		return 0;
-
-	case HDLCDRVCTL_SETMODE:
-		if (netif_running(dev) || !capable(CAP_NET_ADMIN))
-			return -EACCES;
-		hi->data.modename[sizeof(hi->data.modename)-1] = '\0';
-		return baycom_setmode(bc, hi->data.modename);
-
-	case HDLCDRVCTL_MODELIST:
-		strscpy(hi->data.modename, "par96,picpar");
-		if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
-			return -EFAULT;
-		return 0;
-
-	case HDLCDRVCTL_MODEMPARMASK:
-		return HDLCDRV_PARMASK_IOBASE;
-
-	}
-
-	if (copy_from_user(&bi, data, sizeof(bi)))
-		return -EFAULT;
-	switch (bi.cmd) {
-	default:
-		return -ENOIOCTLCMD;
-
-#ifdef BAYCOM_DEBUG
-	case BAYCOMCTL_GETDEBUG:
-		bi.data.dbg.debug1 = bc->hdrv.ptt_keyed;
-		bi.data.dbg.debug2 = bc->debug_vals.last_intcnt;
-		bi.data.dbg.debug3 = bc->debug_vals.last_pllcorr;
-		break;
-#endif /* BAYCOM_DEBUG */
-
-	}
-	if (copy_to_user(data, &bi, sizeof(bi)))
-		return -EFAULT;
-	return 0;
-
-}
-
-/* --------------------------------------------------------------------- */
-
-/*
- * command line settable parameters
- */
-static char *mode[NR_PORTS] = { "picpar", };
-static int iobase[NR_PORTS] = { 0x378, };
-
-module_param_array(mode, charp, NULL, 0);
-MODULE_PARM_DESC(mode, "baycom operating mode; eg. par96 or picpar");
-module_param_hw_array(iobase, int, ioport, NULL, 0);
-MODULE_PARM_DESC(iobase, "baycom io base address");
-
-MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu");
-MODULE_DESCRIPTION("Baycom par96 and picpar amateur radio modem driver");
-MODULE_LICENSE("GPL");
-
-/* --------------------------------------------------------------------- */
-
-static int baycom_par_probe(struct pardevice *par_dev)
-{
-	struct device_driver *drv = par_dev->dev.driver;
-	int len = strlen(drv->name);
-
-	if (strncmp(par_dev->name, drv->name, len))
-		return -ENODEV;
-
-	return 0;
-}
-
-static struct parport_driver baycom_par_driver = {
-	.name = "bcp",
-	.probe = baycom_par_probe,
-};
-
-static int __init init_baycompar(void)
-{
-	int i, found = 0, ret;
-	char set_hw = 1;
-
-	printk(bc_drvinfo);
-
-	ret = parport_register_driver(&baycom_par_driver);
-	if (ret)
-		return ret;
-
-	/*
-	 * register net devices
-	 */
-	for (i = 0; i < NR_PORTS; i++) {
-		struct net_device *dev;
-		struct baycom_state *bc;
-		char ifname[IFNAMSIZ];
-
-		sprintf(ifname, "bcp%d", i);
-
-		if (!mode[i])
-			set_hw = 0;
-		if (!set_hw)
-			iobase[i] = 0;
-
-		dev = hdlcdrv_register(&par96_ops,
-				       sizeof(struct baycom_state),
-				       ifname, iobase[i], 0, 0);
-		if (IS_ERR(dev)) 
-			break;
-
-		bc = netdev_priv(dev);
-		if (set_hw && baycom_setmode(bc, mode[i]))
-			set_hw = 0;
-		found++;
-		baycom_device[i] = dev;
-	}
-
-	if (!found) {
-		parport_unregister_driver(&baycom_par_driver);
-		return -ENXIO;
-	}
-	return 0;
-}
-
-static void __exit cleanup_baycompar(void)
-{
-	int i;
-
-	for(i = 0; i < NR_PORTS; i++) {
-		struct net_device *dev = baycom_device[i];
-
-		if (dev)
-			hdlcdrv_unregister(dev);
-	}
-	parport_unregister_driver(&baycom_par_driver);
-}
-
-module_init(init_baycompar);
-module_exit(cleanup_baycompar);
-
-/* --------------------------------------------------------------------- */
-
-#ifndef MODULE
-
-/*
- * format: baycom_par=io,mode
- * mode: par96,picpar
- */
-
-static int __init baycom_par_setup(char *str)
-{
-        static unsigned nr_dev;
-	int ints[2];
-
-        if (nr_dev >= NR_PORTS)
-                return 0;
-        str = get_options(str, 2, ints);
-        if (ints[0] < 1)
-                return 0;
-        mode[nr_dev] = str;
-        iobase[nr_dev] = ints[1];
-	nr_dev++;
-	return 1;
-}
-
-__setup("baycom_par=", baycom_par_setup);
-
-#endif /* MODULE */
-/* --------------------------------------------------------------------- */
diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c
deleted file mode 100644
index ee5bd3c12040..000000000000
--- a/drivers/net/hamradio/baycom_ser_fdx.c
+++ /dev/null
@@ -1,678 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*****************************************************************************/
-
-/*
- *	baycom_ser_fdx.c  -- baycom ser12 fullduplex radio modem driver.
- *
- *	Copyright (C) 1996-2000  Thomas Sailer (sailer@ife.ee.ethz.ch)
- *
- *  Please note that the GPL allows you to use the driver, NOT the radio.
- *  In order to use the radio, you need a license from the communications
- *  authority of your country.
- *
- *  Supported modems
- *
- *  ser12:  This is a very simple 1200 baud AFSK modem. The modem consists only
- *          of a modulator/demodulator chip, usually a TI TCM3105. The computer
- *          is responsible for regenerating the receiver bit clock, as well as
- *          for handling the HDLC protocol. The modem connects to a serial port,
- *          hence the name. Since the serial port is not used as an async serial
- *          port, the kernel driver for serial ports cannot be used, and this
- *          driver only supports standard serial hardware (8250, 16450, 16550A)
- *
- *          This modem usually draws its supply current out of the otherwise unused
- *          TXD pin of the serial port. Thus a contiguous stream of 0x00-bytes
- *          is transmitted to achieve a positive supply voltage.
- *
- *  hsk:    This is a 4800 baud FSK modem, designed for TNC use. It works fine
- *          in 'baycom-mode' :-)  In contrast to the TCM3105 modem, power is
- *          externally supplied. So there's no need to provide the 0x00-byte-stream
- *          when receiving or idle, which drastically reduces interrupt load.
- *
- *  Command line options (insmod command line)
- *
- *  mode     ser#    hardware DCD
- *           ser#*   software DCD
- *           ser#+   hardware DCD, inverted signal at DCD pin
- *           '#' denotes the baud rate / 100, eg. ser12* is '1200 baud, soft DCD'
- *  iobase   base address of the port; common values are 0x3f8, 0x2f8, 0x3e8, 0x2e8
- *  baud     baud rate (between 300 and 4800)
- *  irq      interrupt line of the port; common values are 4,3
- *
- *  History:
- *   0.1  26.06.1996  Adapted from baycom.c and made network driver interface
- *        18.10.1996  Changed to new user space access routines (copy_{to,from}_user)
- *   0.3  26.04.1997  init code/data tagged
- *   0.4  08.07.1997  alternative ser12 decoding algorithm (uses delta CTS ints)
- *   0.5  11.11.1997  ser12/par96 split into separate files
- *   0.6  24.01.1998  Thorsten Kranzkowski, dl8bcu and Thomas Sailer:
- *                    reduced interrupt load in transmit case
- *                    reworked receiver
- *   0.7  03.08.1999  adapt to Linus' new __setup/__initcall
- *   0.8  10.08.1999  use module_init/module_exit
- *   0.9  12.02.2000  adapted to softnet driver interface
- *   0.10 03.07.2000  fix interface name handling
- */
-
-/*****************************************************************************/
-
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/ioport.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/hdlcdrv.h>
-#include <linux/baycom.h>
-#include <linux/jiffies.h>
-#include <linux/time64.h>
-
-#include <linux/uaccess.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-
-/* --------------------------------------------------------------------- */
-
-#define BAYCOM_DEBUG
-
-/* --------------------------------------------------------------------- */
-
-static const char bc_drvname[] = "baycom_ser_fdx";
-static const char bc_drvinfo[] = KERN_INFO "baycom_ser_fdx: (C) 1996-2000 Thomas Sailer, HB9JNX/AE4WA\n"
-"baycom_ser_fdx: version 0.10\n";
-
-/* --------------------------------------------------------------------- */
-
-#define NR_PORTS 4
-
-static struct net_device *baycom_device[NR_PORTS];
-
-/* --------------------------------------------------------------------- */
-
-#define RBR(iobase) (iobase+0)
-#define THR(iobase) (iobase+0)
-#define IER(iobase) (iobase+1)
-#define IIR(iobase) (iobase+2)
-#define FCR(iobase) (iobase+2)
-#define LCR(iobase) (iobase+3)
-#define MCR(iobase) (iobase+4)
-#define LSR(iobase) (iobase+5)
-#define MSR(iobase) (iobase+6)
-#define SCR(iobase) (iobase+7)
-#define DLL(iobase) (iobase+0)
-#define DLM(iobase) (iobase+1)
-
-#define SER12_EXTENT 8
-
-/* ---------------------------------------------------------------------- */
-/*
- * Information that need to be kept for each board.
- */
-
-struct baycom_state {
-	struct hdlcdrv_state hdrv;
-
-	unsigned int baud, baud_us, baud_arbdiv, baud_uartdiv, baud_dcdtimeout;
-	int opt_dcd;
-
-	struct modem_state {
-		unsigned char flags;
-		unsigned char ptt;
-		unsigned int shreg;
-		struct modem_state_ser12 {
-			unsigned char tx_bit;
-			unsigned char last_rxbit;
-			int dcd_sum0, dcd_sum1, dcd_sum2;
-			int dcd_time;
-			unsigned int pll_time;
-			unsigned int txshreg;
-		} ser12;
-	} modem;
-
-#ifdef BAYCOM_DEBUG
-	struct debug_vals {
-		unsigned long last_jiffies;
-		unsigned cur_intcnt;
-		unsigned last_intcnt;
-		int cur_pllcorr;
-		int last_pllcorr;
-	} debug_vals;
-#endif /* BAYCOM_DEBUG */
-};
-
-/* --------------------------------------------------------------------- */
-
-static inline void baycom_int_freq(struct baycom_state *bc)
-{
-#ifdef BAYCOM_DEBUG
-	unsigned long cur_jiffies = jiffies;
-	/*
-	 * measure the interrupt frequency
-	 */
-	bc->debug_vals.cur_intcnt++;
-	if (time_after_eq(cur_jiffies, bc->debug_vals.last_jiffies + HZ)) {
-		bc->debug_vals.last_jiffies = cur_jiffies;
-		bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt;
-		bc->debug_vals.cur_intcnt = 0;
-		bc->debug_vals.last_pllcorr = bc->debug_vals.cur_pllcorr;
-		bc->debug_vals.cur_pllcorr = 0;
-	}
-#endif /* BAYCOM_DEBUG */
-}
-
-/* --------------------------------------------------------------------- */
-/*
- * ===================== SER12 specific routines =========================
- */
-
-/* --------------------------------------------------------------------- */
-
-static inline void ser12_set_divisor(struct net_device *dev,
-                                     unsigned int divisor)
-{
-        outb(0x81, LCR(dev->base_addr));        /* DLAB = 1 */
-        outb(divisor, DLL(dev->base_addr));
-        outb(divisor >> 8, DLM(dev->base_addr));
-        outb(0x01, LCR(dev->base_addr));        /* word length = 6 */
-        /*
-         * make sure the next interrupt is generated;
-         * 0 must be used to power the modem; the modem draws its
-         * power from the TxD line
-         */
-        outb(0x00, THR(dev->base_addr));
-        /*
-         * it is important not to set the divider while transmitting;
-         * this reportedly makes some UARTs generating interrupts
-         * in the hundredthousands per second region
-         * Reported by: Ignacio.Arenaza@studi.epfl.ch (Ignacio Arenaza Nuno)
-         */
-}
-
-static __inline__ void ser12_rx(struct net_device *dev, struct baycom_state *bc, struct timespec64 *ts, unsigned char curs)
-{
-	int timediff;
-	int bdus8 = bc->baud_us >> 3;
-	int bdus4 = bc->baud_us >> 2;
-	int bdus2 = bc->baud_us >> 1;
-
-	timediff = 1000000 + ts->tv_nsec / NSEC_PER_USEC -
-					bc->modem.ser12.pll_time;
-	while (timediff >= 500000)
-		timediff -= 1000000;
-	while (timediff >= bdus2) {
-		timediff -= bc->baud_us;
-		bc->modem.ser12.pll_time += bc->baud_us;
-		bc->modem.ser12.dcd_time--;
-		/* first check if there is room to add a bit */
-		if (bc->modem.shreg & 1) {
-			hdlcdrv_putbits(&bc->hdrv, (bc->modem.shreg >> 1) ^ 0xffff);
-			bc->modem.shreg = 0x10000;
-		}
-		/* add a one bit */
-		bc->modem.shreg >>= 1;
-	}
-	if (bc->modem.ser12.dcd_time <= 0) {
-		if (!bc->opt_dcd)
-			hdlcdrv_setdcd(&bc->hdrv, (bc->modem.ser12.dcd_sum0 + 
-						   bc->modem.ser12.dcd_sum1 + 
-						   bc->modem.ser12.dcd_sum2) < 0);
-		bc->modem.ser12.dcd_sum2 = bc->modem.ser12.dcd_sum1;
-		bc->modem.ser12.dcd_sum1 = bc->modem.ser12.dcd_sum0;
-		bc->modem.ser12.dcd_sum0 = 2; /* slight bias */
-		bc->modem.ser12.dcd_time += 120;
-	}
-	if (bc->modem.ser12.last_rxbit != curs) {
-		bc->modem.ser12.last_rxbit = curs;
-		bc->modem.shreg |= 0x10000;
-		/* adjust the PLL */
-		if (timediff > 0)
-			bc->modem.ser12.pll_time += bdus8;
-		else
-			bc->modem.ser12.pll_time += 1000000 - bdus8;
-		/* update DCD */
-		if (abs(timediff) > bdus4)
-			bc->modem.ser12.dcd_sum0 += 4;
-		else
-			bc->modem.ser12.dcd_sum0--;
-#ifdef BAYCOM_DEBUG
-		bc->debug_vals.cur_pllcorr = timediff;
-#endif /* BAYCOM_DEBUG */
-	}
-	while (bc->modem.ser12.pll_time >= 1000000)
-		bc->modem.ser12.pll_time -= 1000000;
-}
-
-/* --------------------------------------------------------------------- */
-
-static irqreturn_t ser12_interrupt(int irq, void *dev_id)
-{
-	struct net_device *dev = (struct net_device *)dev_id;
-	struct baycom_state *bc = netdev_priv(dev);
-	struct timespec64 ts;
-	unsigned char iir, msr;
-	unsigned int txcount = 0;
-
-	if (!bc || bc->hdrv.magic != HDLCDRV_MAGIC)
-		return IRQ_NONE;
-	/* fast way out for shared irq */
-	if ((iir = inb(IIR(dev->base_addr))) & 1) 	
-		return IRQ_NONE;
-	/* get current time */
-	ktime_get_ts64(&ts);
-	msr = inb(MSR(dev->base_addr));
-	/* delta DCD */
-	if ((msr & 8) && bc->opt_dcd)
-		hdlcdrv_setdcd(&bc->hdrv, !((msr ^ bc->opt_dcd) & 0x80));
-	do {
-		switch (iir & 6) {
-		case 6:
-			inb(LSR(dev->base_addr));
-			break;
-			
-		case 4:
-			inb(RBR(dev->base_addr));
-			break;
-			
-		case 2:
-			/*
-			 * make sure the next interrupt is generated;
-			 * 0 must be used to power the modem; the modem draws its
-			 * power from the TxD line
-			 */
-			outb(0x00, THR(dev->base_addr));
-			baycom_int_freq(bc);
-			txcount++;
-			/*
-			 * first output the last bit (!) then call HDLC transmitter,
-			 * since this may take quite long
-			 */
-			if (bc->modem.ptt)
-				outb(0x0e | (!!bc->modem.ser12.tx_bit), MCR(dev->base_addr));
-			else
-				outb(0x0d, MCR(dev->base_addr));       /* transmitter off */
-			break;
-			
-		default:
-			msr = inb(MSR(dev->base_addr));
-			/* delta DCD */
-			if ((msr & 8) && bc->opt_dcd) 
-				hdlcdrv_setdcd(&bc->hdrv, !((msr ^ bc->opt_dcd) & 0x80));
-			break;
-		}
-		iir = inb(IIR(dev->base_addr));
-	} while (!(iir & 1));
-	ser12_rx(dev, bc, &ts, msr & 0x10); /* CTS */
-	if (bc->modem.ptt && txcount) {
-		if (bc->modem.ser12.txshreg <= 1) {
-			bc->modem.ser12.txshreg = 0x10000 | hdlcdrv_getbits(&bc->hdrv);
-			if (!hdlcdrv_ptt(&bc->hdrv)) {
-				ser12_set_divisor(dev, 115200/100/8);
-				bc->modem.ptt = 0;
-				goto end_transmit;
-			}
-		}
-		bc->modem.ser12.tx_bit = !(bc->modem.ser12.tx_bit ^ (bc->modem.ser12.txshreg & 1));
-		bc->modem.ser12.txshreg >>= 1;
-	}
- end_transmit:
-	local_irq_enable();
-	if (!bc->modem.ptt && txcount) {
-		hdlcdrv_arbitrate(dev, &bc->hdrv);
-		if (hdlcdrv_ptt(&bc->hdrv)) {
-			ser12_set_divisor(dev, bc->baud_uartdiv);
-			bc->modem.ser12.txshreg = 1;
-			bc->modem.ptt = 1;
-		}
-	}
-	hdlcdrv_transmitter(dev, &bc->hdrv);
-	hdlcdrv_receiver(dev, &bc->hdrv);
-	local_irq_disable();
-	return IRQ_HANDLED;
-}
-
-/* --------------------------------------------------------------------- */
-
-enum uart { c_uart_unknown, c_uart_8250,
-	    c_uart_16450, c_uart_16550, c_uart_16550A};
-static const char *uart_str[] = { 
-	"unknown", "8250", "16450", "16550", "16550A" 
-};
-
-static enum uart ser12_check_uart(unsigned int iobase)
-{
-	unsigned char b1,b2,b3;
-	enum uart u;
-	enum uart uart_tab[] =
-		{ c_uart_16450, c_uart_unknown, c_uart_16550, c_uart_16550A };
-
-	b1 = inb(MCR(iobase));
-	outb(b1 | 0x10, MCR(iobase));	/* loopback mode */
-	b2 = inb(MSR(iobase));
-	outb(0x1a, MCR(iobase));
-	b3 = inb(MSR(iobase)) & 0xf0;
-	outb(b1, MCR(iobase));			/* restore old values */
-	outb(b2, MSR(iobase));
-	if (b3 != 0x90)
-		return c_uart_unknown;
-	inb(RBR(iobase));
-	inb(RBR(iobase));
-	outb(0x01, FCR(iobase));		/* enable FIFOs */
-	u = uart_tab[(inb(IIR(iobase)) >> 6) & 3];
-	if (u == c_uart_16450) {
-		outb(0x5a, SCR(iobase));
-		b1 = inb(SCR(iobase));
-		outb(0xa5, SCR(iobase));
-		b2 = inb(SCR(iobase));
-		if ((b1 != 0x5a) || (b2 != 0xa5))
-			u = c_uart_8250;
-	}
-	return u;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int ser12_open(struct net_device *dev)
-{
-	const unsigned int nr_irqs = irq_get_nr_irqs();
-	struct baycom_state *bc = netdev_priv(dev);
-	enum uart u;
-
-	if (!dev || !bc)
-		return -ENXIO;
-	if (!dev->base_addr || dev->base_addr > 0xffff-SER12_EXTENT ||
-	    dev->irq < 2 || dev->irq > nr_irqs) {
-		printk(KERN_INFO "baycom_ser_fdx: invalid portnumber (max %u) "
-				"or irq (2 <= irq <= %d)\n",
-				0xffff-SER12_EXTENT, nr_irqs);
-		return -ENXIO;
-	}
-	if (bc->baud < 300 || bc->baud > 4800) {
-		printk(KERN_INFO "baycom_ser_fdx: invalid baudrate "
-				"(300...4800)\n");
-		return -EINVAL;
-	}
-	if (!request_region(dev->base_addr, SER12_EXTENT, "baycom_ser_fdx")) {
-		printk(KERN_WARNING "BAYCOM_SER_FSX: I/O port 0x%04lx busy\n",
-		       dev->base_addr);
-		return -EACCES;
-	}
-	memset(&bc->modem, 0, sizeof(bc->modem));
-	bc->hdrv.par.bitrate = bc->baud;
-	bc->baud_us = 1000000/bc->baud;
-	bc->baud_uartdiv = (115200/8)/bc->baud;
-	if ((u = ser12_check_uart(dev->base_addr)) == c_uart_unknown){
-		release_region(dev->base_addr, SER12_EXTENT);
-		return -EIO;
-	}
-	outb(0, FCR(dev->base_addr));  /* disable FIFOs */
-	outb(0x0d, MCR(dev->base_addr));
-	outb(0, IER(dev->base_addr));
-	if (request_irq(dev->irq, ser12_interrupt, IRQF_SHARED,
-			"baycom_ser_fdx", dev)) {
-		release_region(dev->base_addr, SER12_EXTENT);
-		return -EBUSY;
-	}
-	/*
-	 * set the SIO to 6 Bits/character; during receive,
-	 * the baud rate is set to produce 100 ints/sec
-	 * to feed the channel arbitration process,
-	 * during transmit to baud ints/sec to run
-	 * the transmitter
-	 */
-	ser12_set_divisor(dev, 115200/100/8);
-	/*
-	 * enable transmitter empty interrupt and modem status interrupt
-	 */
-	outb(0x0a, IER(dev->base_addr));
-	/*
-	 * make sure the next interrupt is generated;
-	 * 0 must be used to power the modem; the modem draws its
-	 * power from the TxD line
-	 */
-	outb(0x00, THR(dev->base_addr));
-	hdlcdrv_setdcd(&bc->hdrv, 0);
-	printk(KERN_INFO "%s: ser_fdx at iobase 0x%lx irq %u baud %u uart %s\n",
-	       bc_drvname, dev->base_addr, dev->irq, bc->baud, uart_str[u]);
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int ser12_close(struct net_device *dev)
-{
-	struct baycom_state *bc = netdev_priv(dev);
-
-	if (!dev || !bc)
-		return -EINVAL;
-	/*
-	 * disable interrupts
-	 */
-	outb(0, IER(dev->base_addr));
-	outb(1, MCR(dev->base_addr));
-	free_irq(dev->irq, dev);
-	release_region(dev->base_addr, SER12_EXTENT);
-	printk(KERN_INFO "%s: close ser_fdx at iobase 0x%lx irq %u\n",
-	       bc_drvname, dev->base_addr, dev->irq);
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-/*
- * ===================== hdlcdrv driver interface =========================
- */
-
-/* --------------------------------------------------------------------- */
-
-static int baycom_ioctl(struct net_device *dev, void __user *data,
-			struct hdlcdrv_ioctl *hi, int cmd);
-
-/* --------------------------------------------------------------------- */
-
-static const struct hdlcdrv_ops ser12_ops = {
-	.drvname = bc_drvname,
-	.drvinfo = bc_drvinfo,
-	.open    = ser12_open,
-	.close   = ser12_close,
-	.ioctl   = baycom_ioctl,
-};
-
-/* --------------------------------------------------------------------- */
-
-static int baycom_setmode(struct baycom_state *bc, const char *modestr)
-{
-	unsigned int baud;
-
-	if (!strncmp(modestr, "ser", 3)) {
-		baud = simple_strtoul(modestr+3, NULL, 10);
-		if (baud >= 3 && baud <= 48)
-			bc->baud = baud*100;
-	}
-	if (strchr(modestr, '*'))
-		bc->opt_dcd = 0;
-	else if (strchr(modestr, '+'))
-		bc->opt_dcd = -1;
-	else
-		bc->opt_dcd = 1;
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int baycom_ioctl(struct net_device *dev, void __user *data,
-			struct hdlcdrv_ioctl *hi, int cmd)
-{
-	struct baycom_state *bc;
-	struct baycom_ioctl bi;
-
-	if (!dev)
-		return -EINVAL;
-
-	bc = netdev_priv(dev);
-	BUG_ON(bc->hdrv.magic != HDLCDRV_MAGIC);
-
-	if (cmd != SIOCDEVPRIVATE)
-		return -ENOIOCTLCMD;
-	switch (hi->cmd) {
-	default:
-		break;
-
-	case HDLCDRVCTL_GETMODE:
-		sprintf(hi->data.modename, "ser%u", bc->baud / 100);
-		if (bc->opt_dcd <= 0)
-			strcat(hi->data.modename, (!bc->opt_dcd) ? "*" : "+");
-		if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
-			return -EFAULT;
-		return 0;
-
-	case HDLCDRVCTL_SETMODE:
-		if (netif_running(dev) || !capable(CAP_NET_ADMIN))
-			return -EACCES;
-		hi->data.modename[sizeof(hi->data.modename)-1] = '\0';
-		return baycom_setmode(bc, hi->data.modename);
-
-	case HDLCDRVCTL_MODELIST:
-		strscpy(hi->data.modename, "ser12,ser3,ser24");
-		if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
-			return -EFAULT;
-		return 0;
-
-	case HDLCDRVCTL_MODEMPARMASK:
-		return HDLCDRV_PARMASK_IOBASE | HDLCDRV_PARMASK_IRQ;
-
-	}
-
-	if (copy_from_user(&bi, data, sizeof(bi)))
-		return -EFAULT;
-	switch (bi.cmd) {
-	default:
-		return -ENOIOCTLCMD;
-
-#ifdef BAYCOM_DEBUG
-	case BAYCOMCTL_GETDEBUG:
-		bi.data.dbg.debug1 = bc->hdrv.ptt_keyed;
-		bi.data.dbg.debug2 = bc->debug_vals.last_intcnt;
-		bi.data.dbg.debug3 = bc->debug_vals.last_pllcorr;
-		break;
-#endif /* BAYCOM_DEBUG */
-
-	}
-	if (copy_to_user(data, &bi, sizeof(bi)))
-		return -EFAULT;
-	return 0;
-
-}
-
-/* --------------------------------------------------------------------- */
-
-/*
- * command line settable parameters
- */
-static char *mode[NR_PORTS] = { "ser12*", };
-static int iobase[NR_PORTS] = { 0x3f8, };
-static int irq[NR_PORTS] = { 4, };
-static int baud[NR_PORTS] = { [0 ... NR_PORTS-1] = 1200 };
-
-module_param_array(mode, charp, NULL, 0);
-MODULE_PARM_DESC(mode, "baycom operating mode; * for software DCD");
-module_param_hw_array(iobase, int, ioport, NULL, 0);
-MODULE_PARM_DESC(iobase, "baycom io base address");
-module_param_hw_array(irq, int, irq, NULL, 0);
-MODULE_PARM_DESC(irq, "baycom irq number");
-module_param_array(baud, int, NULL, 0);
-MODULE_PARM_DESC(baud, "baycom baud rate (300 to 4800)");
-
-MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu");
-MODULE_DESCRIPTION("Baycom ser12 full duplex amateur radio modem driver");
-MODULE_LICENSE("GPL");
-
-/* --------------------------------------------------------------------- */
-
-static int __init init_baycomserfdx(void)
-{
-	int i, found = 0;
-	char set_hw = 1;
-
-	printk(bc_drvinfo);
-	/*
-	 * register net devices
-	 */
-	for (i = 0; i < NR_PORTS; i++) {
-		struct net_device *dev;
-		struct baycom_state *bc;
-		char ifname[IFNAMSIZ];
-
-		sprintf(ifname, "bcsf%d", i);
-
-		if (!mode[i])
-			set_hw = 0;
-		if (!set_hw)
-			iobase[i] = irq[i] = 0;
-
-		dev = hdlcdrv_register(&ser12_ops, 
-				       sizeof(struct baycom_state),
-				       ifname, iobase[i], irq[i], 0);
-		if (IS_ERR(dev)) 
-			break;
-
-		bc = netdev_priv(dev);
-		if (set_hw && baycom_setmode(bc, mode[i]))
-			set_hw = 0;
-		bc->baud = baud[i];
-		found++;
-		baycom_device[i] = dev;
-	}
-
-	if (!found)
-		return -ENXIO;
-	return 0;
-}
-
-static void __exit cleanup_baycomserfdx(void)
-{
-	int i;
-
-	for(i = 0; i < NR_PORTS; i++) {
-		struct net_device *dev = baycom_device[i];
-		if (dev) 
-			hdlcdrv_unregister(dev);
-	}
-}
-
-module_init(init_baycomserfdx);
-module_exit(cleanup_baycomserfdx);
-
-/* --------------------------------------------------------------------- */
-
-#ifndef MODULE
-
-/*
- * format: baycom_ser_fdx=io,irq,mode
- * mode: ser#    hardware DCD
- *       ser#*   software DCD
- *       ser#+   hardware DCD, inverted signal at DCD pin
- * '#' denotes the baud rate / 100, eg. ser12* is '1200 baud, soft DCD'
- */
-
-static int __init baycom_ser_fdx_setup(char *str)
-{
-        static unsigned nr_dev;
-        int ints[4];
-
-        if (nr_dev >= NR_PORTS)
-                return 0;
-        str = get_options(str, 4, ints);
-        if (ints[0] < 2)
-                return 0;
-        mode[nr_dev] = str;
-        iobase[nr_dev] = ints[1];
-        irq[nr_dev] = ints[2];
-	if (ints[0] >= 3)
-		baud[nr_dev] = ints[3];
-	nr_dev++;
-	return 1;
-}
-
-__setup("baycom_ser_fdx=", baycom_ser_fdx_setup);
-
-#endif /* MODULE */
-/* --------------------------------------------------------------------- */
diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c
deleted file mode 100644
index 05bdad214799..000000000000
--- a/drivers/net/hamradio/baycom_ser_hdx.c
+++ /dev/null
@@ -1,727 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*****************************************************************************/
-
-/*
- *	baycom_ser_hdx.c  -- baycom ser12 halfduplex radio modem driver.
- *
- *	Copyright (C) 1996-2000  Thomas Sailer (sailer@ife.ee.ethz.ch)
- *
- *  Please note that the GPL allows you to use the driver, NOT the radio.
- *  In order to use the radio, you need a license from the communications
- *  authority of your country.
- *
- *  Supported modems
- *
- *  ser12:  This is a very simple 1200 baud AFSK modem. The modem consists only
- *          of a modulator/demodulator chip, usually a TI TCM3105. The computer
- *          is responsible for regenerating the receiver bit clock, as well as
- *          for handling the HDLC protocol. The modem connects to a serial port,
- *          hence the name. Since the serial port is not used as an async serial
- *          port, the kernel driver for serial ports cannot be used, and this
- *          driver only supports standard serial hardware (8250, 16450, 16550A)
- *
- *  Command line options (insmod command line)
- *
- *  mode     ser12    hardware DCD
- *           ser12*   software DCD
- *           ser12@   hardware/software DCD, i.e. no explicit DCD signal but hardware
- *                    mutes audio input to the modem
- *           ser12+   hardware DCD, inverted signal at DCD pin
- *  iobase   base address of the port; common values are 0x3f8, 0x2f8, 0x3e8, 0x2e8
- *  irq      interrupt line of the port; common values are 4,3
- *
- *  History:
- *   0.1  26.06.1996  Adapted from baycom.c and made network driver interface
- *        18.10.1996  Changed to new user space access routines (copy_{to,from}_user)
- *   0.3  26.04.1997  init code/data tagged
- *   0.4  08.07.1997  alternative ser12 decoding algorithm (uses delta CTS ints)
- *   0.5  11.11.1997  ser12/par96 split into separate files
- *   0.6  14.04.1998  cleanups
- *   0.7  03.08.1999  adapt to Linus' new __setup/__initcall
- *   0.8  10.08.1999  use module_init/module_exit
- *   0.9  12.02.2000  adapted to softnet driver interface
- *   0.10 03.07.2000  fix interface name handling
- */
-
-/*****************************************************************************/
-
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/ioport.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/uaccess.h>
-#include <asm/io.h>
-#include <linux/hdlcdrv.h>
-#include <linux/baycom.h>
-#include <linux/jiffies.h>
-
-/* --------------------------------------------------------------------- */
-
-#define BAYCOM_DEBUG
-
-/* --------------------------------------------------------------------- */
-
-static const char bc_drvname[] = "baycom_ser_hdx";
-static const char bc_drvinfo[] = KERN_INFO "baycom_ser_hdx: (C) 1996-2000 Thomas Sailer, HB9JNX/AE4WA\n"
-"baycom_ser_hdx: version 0.10\n";
-
-/* --------------------------------------------------------------------- */
-
-#define NR_PORTS 4
-
-static struct net_device *baycom_device[NR_PORTS];
-
-/* --------------------------------------------------------------------- */
-
-#define RBR(iobase) (iobase+0)
-#define THR(iobase) (iobase+0)
-#define IER(iobase) (iobase+1)
-#define IIR(iobase) (iobase+2)
-#define FCR(iobase) (iobase+2)
-#define LCR(iobase) (iobase+3)
-#define MCR(iobase) (iobase+4)
-#define LSR(iobase) (iobase+5)
-#define MSR(iobase) (iobase+6)
-#define SCR(iobase) (iobase+7)
-#define DLL(iobase) (iobase+0)
-#define DLM(iobase) (iobase+1)
-
-#define SER12_EXTENT 8
-
-/* ---------------------------------------------------------------------- */
-/*
- * Information that need to be kept for each board.
- */
-
-struct baycom_state {
-	struct hdlcdrv_state hdrv;
-
-	int opt_dcd;
-
-	struct modem_state {
-		short arb_divider;
-		unsigned char flags;
-		unsigned int shreg;
-		struct modem_state_ser12 {
-			unsigned char tx_bit;
-			int dcd_sum0, dcd_sum1, dcd_sum2;
-			unsigned char last_sample;
-			unsigned char last_rxbit;
-			unsigned int dcd_shreg;
-			unsigned int dcd_time;
-			unsigned int bit_pll;
-			unsigned char interm_sample;
-		} ser12;
-	} modem;
-
-#ifdef BAYCOM_DEBUG
-	struct debug_vals {
-		unsigned long last_jiffies;
-		unsigned cur_intcnt;
-		unsigned last_intcnt;
-		int cur_pllcorr;
-		int last_pllcorr;
-	} debug_vals;
-#endif /* BAYCOM_DEBUG */
-};
-
-/* --------------------------------------------------------------------- */
-
-static inline void baycom_int_freq(struct baycom_state *bc)
-{
-#ifdef BAYCOM_DEBUG
-	unsigned long cur_jiffies = jiffies;
-	/*
-	 * measure the interrupt frequency
-	 */
-	bc->debug_vals.cur_intcnt++;
-	if (time_after_eq(cur_jiffies, bc->debug_vals.last_jiffies + HZ)) {
-		bc->debug_vals.last_jiffies = cur_jiffies;
-		bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt;
-		bc->debug_vals.cur_intcnt = 0;
-		bc->debug_vals.last_pllcorr = bc->debug_vals.cur_pllcorr;
-		bc->debug_vals.cur_pllcorr = 0;
-	}
-#endif /* BAYCOM_DEBUG */
-}
-
-/* --------------------------------------------------------------------- */
-/*
- * ===================== SER12 specific routines =========================
- */
-
-static inline void ser12_set_divisor(struct net_device *dev,
-				     unsigned char divisor)
-{
-	outb(0x81, LCR(dev->base_addr));	/* DLAB = 1 */
-	outb(divisor, DLL(dev->base_addr));
-	outb(0, DLM(dev->base_addr));
-	outb(0x01, LCR(dev->base_addr));	/* word length = 6 */
-	/*
-	 * make sure the next interrupt is generated;
-	 * 0 must be used to power the modem; the modem draws its
-	 * power from the TxD line
-	 */
-	outb(0x00, THR(dev->base_addr));
-	/*
-	 * it is important not to set the divider while transmitting;
-	 * this reportedly makes some UARTs generating interrupts
-	 * in the hundredthousands per second region
-	 * Reported by: Ignacio.Arenaza@studi.epfl.ch (Ignacio Arenaza Nuno)
-	 */
-}
-
-/* --------------------------------------------------------------------- */
-
-/*
- * must call the TX arbitrator every 10ms
- */
-#define SER12_ARB_DIVIDER(bc)  (bc->opt_dcd ? 24 : 36)
-			       
-#define SER12_DCD_INTERVAL(bc) (bc->opt_dcd ? 12 : 240)
-
-static inline void ser12_tx(struct net_device *dev, struct baycom_state *bc)
-{
-	/* one interrupt per channel bit */
-	ser12_set_divisor(dev, 12);
-	/*
-	 * first output the last bit (!) then call HDLC transmitter,
-	 * since this may take quite long
-	 */
-	outb(0x0e | (!!bc->modem.ser12.tx_bit), MCR(dev->base_addr));
-	if (bc->modem.shreg <= 1)
-		bc->modem.shreg = 0x10000 | hdlcdrv_getbits(&bc->hdrv);
-	bc->modem.ser12.tx_bit = !(bc->modem.ser12.tx_bit ^
-				   (bc->modem.shreg & 1));
-	bc->modem.shreg >>= 1;
-}
-
-/* --------------------------------------------------------------------- */
-
-static inline void ser12_rx(struct net_device *dev, struct baycom_state *bc)
-{
-	unsigned char cur_s;
-	/*
-	 * do demodulator
-	 */
-	cur_s = inb(MSR(dev->base_addr)) & 0x10;	/* the CTS line */
-	hdlcdrv_channelbit(&bc->hdrv, cur_s);
-	bc->modem.ser12.dcd_shreg = (bc->modem.ser12.dcd_shreg << 1) |
-		(cur_s != bc->modem.ser12.last_sample);
-	bc->modem.ser12.last_sample = cur_s;
-	if(bc->modem.ser12.dcd_shreg & 1) {
-		if (!bc->opt_dcd) {
-			unsigned int dcdspos, dcdsneg;
-
-			dcdspos = dcdsneg = 0;
-			dcdspos += ((bc->modem.ser12.dcd_shreg >> 1) & 1);
-			if (!(bc->modem.ser12.dcd_shreg & 0x7ffffffe))
-				dcdspos += 2;
-			dcdsneg += ((bc->modem.ser12.dcd_shreg >> 2) & 1);
-			dcdsneg += ((bc->modem.ser12.dcd_shreg >> 3) & 1);
-			dcdsneg += ((bc->modem.ser12.dcd_shreg >> 4) & 1);
-
-			bc->modem.ser12.dcd_sum0 += 16*dcdspos - dcdsneg;
-		} else
-			bc->modem.ser12.dcd_sum0--;
-	}
-	if(!bc->modem.ser12.dcd_time) {
-		hdlcdrv_setdcd(&bc->hdrv, (bc->modem.ser12.dcd_sum0 +
-					   bc->modem.ser12.dcd_sum1 +
-					   bc->modem.ser12.dcd_sum2) < 0);
-		bc->modem.ser12.dcd_sum2 = bc->modem.ser12.dcd_sum1;
-		bc->modem.ser12.dcd_sum1 = bc->modem.ser12.dcd_sum0;
-		/* offset to ensure DCD off on silent input */
-		bc->modem.ser12.dcd_sum0 = 2;
-		bc->modem.ser12.dcd_time = SER12_DCD_INTERVAL(bc);
-	}
-	bc->modem.ser12.dcd_time--;
-	if (!bc->opt_dcd) {
-		/*
-		 * PLL code for the improved software DCD algorithm
-		 */
-		if (bc->modem.ser12.interm_sample) {
-			/*
-			 * intermediate sample; set timing correction to normal
-			 */
-			ser12_set_divisor(dev, 4);
-		} else {
-			/*
-			 * do PLL correction and call HDLC receiver
-			 */
-			switch (bc->modem.ser12.dcd_shreg & 7) {
-			case 1: /* transition too late */
-				ser12_set_divisor(dev, 5);
-#ifdef BAYCOM_DEBUG
-				bc->debug_vals.cur_pllcorr++;
-#endif /* BAYCOM_DEBUG */
-				break;
-			case 4:	/* transition too early */
-				ser12_set_divisor(dev, 3);
-#ifdef BAYCOM_DEBUG
-				bc->debug_vals.cur_pllcorr--;
-#endif /* BAYCOM_DEBUG */
-				break;
-			default:
-				ser12_set_divisor(dev, 4);
-				break;
-			}
-			bc->modem.shreg >>= 1;
-			if (bc->modem.ser12.last_sample ==
-			    bc->modem.ser12.last_rxbit)
-				bc->modem.shreg |= 0x10000;
-			bc->modem.ser12.last_rxbit =
-				bc->modem.ser12.last_sample;
-		}
-		if (++bc->modem.ser12.interm_sample >= 3)
-			bc->modem.ser12.interm_sample = 0;
-		/*
-		 * DCD stuff
-		 */
-		if (bc->modem.ser12.dcd_shreg & 1) {
-			unsigned int dcdspos, dcdsneg;
-
-			dcdspos = dcdsneg = 0;
-			dcdspos += ((bc->modem.ser12.dcd_shreg >> 1) & 1);
-			dcdspos += (!(bc->modem.ser12.dcd_shreg & 0x7ffffffe))
-				<< 1;
-			dcdsneg += ((bc->modem.ser12.dcd_shreg >> 2) & 1);
-			dcdsneg += ((bc->modem.ser12.dcd_shreg >> 3) & 1);
-			dcdsneg += ((bc->modem.ser12.dcd_shreg >> 4) & 1);
-
-			bc->modem.ser12.dcd_sum0 += 16*dcdspos - dcdsneg;
-		}
-	} else {
-		/*
-		 * PLL algorithm for the hardware squelch DCD algorithm
-		 */
-		if (bc->modem.ser12.interm_sample) {
-			/*
-			 * intermediate sample; set timing correction to normal
-			 */
-			ser12_set_divisor(dev, 6);
-		} else {
-			/*
-			 * do PLL correction and call HDLC receiver
-			 */
-			switch (bc->modem.ser12.dcd_shreg & 3) {
-			case 1: /* transition too late */
-				ser12_set_divisor(dev, 7);
-#ifdef BAYCOM_DEBUG
-				bc->debug_vals.cur_pllcorr++;
-#endif /* BAYCOM_DEBUG */
-				break;
-			case 2:	/* transition too early */
-				ser12_set_divisor(dev, 5);
-#ifdef BAYCOM_DEBUG
-				bc->debug_vals.cur_pllcorr--;
-#endif /* BAYCOM_DEBUG */
-				break;
-			default:
-				ser12_set_divisor(dev, 6);
-				break;
-			}
-			bc->modem.shreg >>= 1;
-			if (bc->modem.ser12.last_sample ==
-			    bc->modem.ser12.last_rxbit)
-				bc->modem.shreg |= 0x10000;
-			bc->modem.ser12.last_rxbit =
-				bc->modem.ser12.last_sample;
-		}
-		bc->modem.ser12.interm_sample = !bc->modem.ser12.interm_sample;
-		/*
-		 * DCD stuff
-		 */
-		bc->modem.ser12.dcd_sum0 -= (bc->modem.ser12.dcd_shreg & 1);
-	}
-	outb(0x0d, MCR(dev->base_addr));		/* transmitter off */
-	if (bc->modem.shreg & 1) {
-		hdlcdrv_putbits(&bc->hdrv, bc->modem.shreg >> 1);
-		bc->modem.shreg = 0x10000;
-	}
-	if(!bc->modem.ser12.dcd_time) {
-		if (bc->opt_dcd & 1) 
-			hdlcdrv_setdcd(&bc->hdrv, !((inb(MSR(dev->base_addr)) ^ bc->opt_dcd) & 0x80));
-		else
-			hdlcdrv_setdcd(&bc->hdrv, (bc->modem.ser12.dcd_sum0 +
-						   bc->modem.ser12.dcd_sum1 +
-						   bc->modem.ser12.dcd_sum2) < 0);
-		bc->modem.ser12.dcd_sum2 = bc->modem.ser12.dcd_sum1;
-		bc->modem.ser12.dcd_sum1 = bc->modem.ser12.dcd_sum0;
-		/* offset to ensure DCD off on silent input */
-		bc->modem.ser12.dcd_sum0 = 2;
-		bc->modem.ser12.dcd_time = SER12_DCD_INTERVAL(bc);
-	}
-	bc->modem.ser12.dcd_time--;
-}
-
-/* --------------------------------------------------------------------- */
-
-static irqreturn_t ser12_interrupt(int irq, void *dev_id)
-{
-	struct net_device *dev = (struct net_device *)dev_id;
-	struct baycom_state *bc = netdev_priv(dev);
-	unsigned char iir;
-
-	if (!dev || !bc || bc->hdrv.magic != HDLCDRV_MAGIC)
-		return IRQ_NONE;
-	/* fast way out */
-	if ((iir = inb(IIR(dev->base_addr))) & 1)
-		return IRQ_NONE;
-	baycom_int_freq(bc);
-	do {
-		switch (iir & 6) {
-		case 6:
-			inb(LSR(dev->base_addr));
-			break;
-			
-		case 4:
-			inb(RBR(dev->base_addr));
-			break;
-			
-		case 2:
-			/*
-			 * check if transmitter active
-			 */
-			if (hdlcdrv_ptt(&bc->hdrv))
-				ser12_tx(dev, bc);
-			else {
-				ser12_rx(dev, bc);
-				bc->modem.arb_divider--;
-			}
-			outb(0x00, THR(dev->base_addr));
-			break;
-			
-		default:
-			inb(MSR(dev->base_addr));
-			break;
-		}
-		iir = inb(IIR(dev->base_addr));
-	} while (!(iir & 1));
-	if (bc->modem.arb_divider <= 0) {
-		bc->modem.arb_divider = SER12_ARB_DIVIDER(bc);
-		local_irq_enable();
-		hdlcdrv_arbitrate(dev, &bc->hdrv);
-	}
-	local_irq_enable();
-	hdlcdrv_transmitter(dev, &bc->hdrv);
-	hdlcdrv_receiver(dev, &bc->hdrv);
-	local_irq_disable();
-	return IRQ_HANDLED;
-}
-
-/* --------------------------------------------------------------------- */
-
-enum uart { c_uart_unknown, c_uart_8250,
-	    c_uart_16450, c_uart_16550, c_uart_16550A};
-static const char *uart_str[] = { 
-	"unknown", "8250", "16450", "16550", "16550A" 
-};
-
-static enum uart ser12_check_uart(unsigned int iobase)
-{
-	unsigned char b1,b2,b3;
-	enum uart u;
-	enum uart uart_tab[] =
-		{ c_uart_16450, c_uart_unknown, c_uart_16550, c_uart_16550A };
-
-	b1 = inb(MCR(iobase));
-	outb(b1 | 0x10, MCR(iobase));	/* loopback mode */
-	b2 = inb(MSR(iobase));
-	outb(0x1a, MCR(iobase));
-	b3 = inb(MSR(iobase)) & 0xf0;
-	outb(b1, MCR(iobase));			/* restore old values */
-	outb(b2, MSR(iobase));
-	if (b3 != 0x90)
-		return c_uart_unknown;
-	inb(RBR(iobase));
-	inb(RBR(iobase));
-	outb(0x01, FCR(iobase));		/* enable FIFOs */
-	u = uart_tab[(inb(IIR(iobase)) >> 6) & 3];
-	if (u == c_uart_16450) {
-		outb(0x5a, SCR(iobase));
-		b1 = inb(SCR(iobase));
-		outb(0xa5, SCR(iobase));
-		b2 = inb(SCR(iobase));
-		if ((b1 != 0x5a) || (b2 != 0xa5))
-			u = c_uart_8250;
-	}
-	return u;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int ser12_open(struct net_device *dev)
-{
-	struct baycom_state *bc = netdev_priv(dev);
-	enum uart u;
-
-	if (!dev || !bc)
-		return -ENXIO;
-	if (!dev->base_addr || dev->base_addr > 0x1000-SER12_EXTENT ||
-	    dev->irq < 2 || dev->irq > 15)
-		return -ENXIO;
-	if (!request_region(dev->base_addr, SER12_EXTENT, "baycom_ser12"))
-		return -EACCES;
-	memset(&bc->modem, 0, sizeof(bc->modem));
-	bc->hdrv.par.bitrate = 1200;
-	if ((u = ser12_check_uart(dev->base_addr)) == c_uart_unknown) {
-		release_region(dev->base_addr, SER12_EXTENT);       
-		return -EIO;
-	}
-	outb(0, FCR(dev->base_addr));  /* disable FIFOs */
-	outb(0x0d, MCR(dev->base_addr));
-	outb(0, IER(dev->base_addr));
-	if (request_irq(dev->irq, ser12_interrupt, IRQF_SHARED,
-			"baycom_ser12", dev)) {
-		release_region(dev->base_addr, SER12_EXTENT);       
-		return -EBUSY;
-	}
-	/*
-	 * enable transmitter empty interrupt
-	 */
-	outb(2, IER(dev->base_addr));
-	/*
-	 * set the SIO to 6 Bits/character and 19200 or 28800 baud, so that
-	 * we get exactly (hopefully) 2 or 3 interrupts per radio symbol,
-	 * depending on the usage of the software DCD routine
-	 */
-	ser12_set_divisor(dev, bc->opt_dcd ? 6 : 4);
-	printk(KERN_INFO "%s: ser12 at iobase 0x%lx irq %u uart %s\n", 
-	       bc_drvname, dev->base_addr, dev->irq, uart_str[u]);
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int ser12_close(struct net_device *dev)
-{
-	struct baycom_state *bc = netdev_priv(dev);
-
-	if (!dev || !bc)
-		return -EINVAL;
-	/*
-	 * disable interrupts
-	 */
-	outb(0, IER(dev->base_addr));
-	outb(1, MCR(dev->base_addr));
-	free_irq(dev->irq, dev);
-	release_region(dev->base_addr, SER12_EXTENT);
-	printk(KERN_INFO "%s: close ser12 at iobase 0x%lx irq %u\n",
-	       bc_drvname, dev->base_addr, dev->irq);
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-/*
- * ===================== hdlcdrv driver interface =========================
- */
-
-/* --------------------------------------------------------------------- */
-
-static int baycom_ioctl(struct net_device *dev, void __user *data,
-			struct hdlcdrv_ioctl *hi, int cmd);
-
-/* --------------------------------------------------------------------- */
-
-static const struct hdlcdrv_ops ser12_ops = {
-	.drvname = bc_drvname,
-	.drvinfo = bc_drvinfo,
-	.open    = ser12_open,
-	.close   = ser12_close,
-	.ioctl   = baycom_ioctl,
-};
-
-/* --------------------------------------------------------------------- */
-
-static int baycom_setmode(struct baycom_state *bc, const char *modestr)
-{
-	if (strchr(modestr, '*'))
-		bc->opt_dcd = 0;
-	else if (strchr(modestr, '+'))
-		bc->opt_dcd = -1;
-	else if (strchr(modestr, '@'))
-		bc->opt_dcd = -2;
-	else
-		bc->opt_dcd = 1;
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int baycom_ioctl(struct net_device *dev, void __user *data,
-			struct hdlcdrv_ioctl *hi, int cmd)
-{
-	struct baycom_state *bc;
-	struct baycom_ioctl bi;
-
-	if (!dev)
-		return -EINVAL;
-
-	bc = netdev_priv(dev);
-	BUG_ON(bc->hdrv.magic != HDLCDRV_MAGIC);
-
-	if (cmd != SIOCDEVPRIVATE)
-		return -ENOIOCTLCMD;
-	switch (hi->cmd) {
-	default:
-		break;
-
-	case HDLCDRVCTL_GETMODE:
-		strscpy(hi->data.modename, "ser12");
-		if (bc->opt_dcd <= 0)
-			strcat(hi->data.modename, (!bc->opt_dcd) ? "*" : (bc->opt_dcd == -2) ? "@" : "+");
-		if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
-			return -EFAULT;
-		return 0;
-
-	case HDLCDRVCTL_SETMODE:
-		if (netif_running(dev) || !capable(CAP_NET_ADMIN))
-			return -EACCES;
-		hi->data.modename[sizeof(hi->data.modename)-1] = '\0';
-		return baycom_setmode(bc, hi->data.modename);
-
-	case HDLCDRVCTL_MODELIST:
-		strscpy(hi->data.modename, "ser12");
-		if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
-			return -EFAULT;
-		return 0;
-
-	case HDLCDRVCTL_MODEMPARMASK:
-		return HDLCDRV_PARMASK_IOBASE | HDLCDRV_PARMASK_IRQ;
-
-	}
-
-	if (copy_from_user(&bi, data, sizeof(bi)))
-		return -EFAULT;
-	switch (bi.cmd) {
-	default:
-		return -ENOIOCTLCMD;
-
-#ifdef BAYCOM_DEBUG
-	case BAYCOMCTL_GETDEBUG:
-		bi.data.dbg.debug1 = bc->hdrv.ptt_keyed;
-		bi.data.dbg.debug2 = bc->debug_vals.last_intcnt;
-		bi.data.dbg.debug3 = bc->debug_vals.last_pllcorr;
-		break;
-#endif /* BAYCOM_DEBUG */
-
-	}
-	if (copy_to_user(data, &bi, sizeof(bi)))
-		return -EFAULT;
-	return 0;
-
-}
-
-/* --------------------------------------------------------------------- */
-
-/*
- * command line settable parameters
- */
-static char *mode[NR_PORTS] = { "ser12*", };
-static int iobase[NR_PORTS] = { 0x3f8, };
-static int irq[NR_PORTS] = { 4, };
-
-module_param_array(mode, charp, NULL, 0);
-MODULE_PARM_DESC(mode, "baycom operating mode; * for software DCD");
-module_param_hw_array(iobase, int, ioport, NULL, 0);
-MODULE_PARM_DESC(iobase, "baycom io base address");
-module_param_hw_array(irq, int, irq, NULL, 0);
-MODULE_PARM_DESC(irq, "baycom irq number");
-
-MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu");
-MODULE_DESCRIPTION("Baycom ser12 half duplex amateur radio modem driver");
-MODULE_LICENSE("GPL");
-
-/* --------------------------------------------------------------------- */
-
-static int __init init_baycomserhdx(void)
-{
-	int i, found = 0;
-	char set_hw = 1;
-
-	printk(bc_drvinfo);
-	/*
-	 * register net devices
-	 */
-	for (i = 0; i < NR_PORTS; i++) {
-		struct net_device *dev;
-		struct baycom_state *bc;
-		char ifname[IFNAMSIZ];
-
-		sprintf(ifname, "bcsh%d", i);
-
-		if (!mode[i])
-			set_hw = 0;
-		if (!set_hw)
-			iobase[i] = irq[i] = 0;
-
-		dev = hdlcdrv_register(&ser12_ops, 
-				       sizeof(struct baycom_state),
-				       ifname, iobase[i], irq[i], 0);
-		if (IS_ERR(dev)) 
-			break;
-
-		bc = netdev_priv(dev);
-		if (set_hw && baycom_setmode(bc, mode[i]))
-			set_hw = 0;
-		found++;
-		baycom_device[i] = dev;
-	}
-
-	if (!found)
-		return -ENXIO;
-	return 0;
-}
-
-static void __exit cleanup_baycomserhdx(void)
-{
-	int i;
-
-	for(i = 0; i < NR_PORTS; i++) {
-		struct net_device *dev = baycom_device[i];
-
-		if (dev)
-			hdlcdrv_unregister(dev);
-	}
-}
-
-module_init(init_baycomserhdx);
-module_exit(cleanup_baycomserhdx);
-
-/* --------------------------------------------------------------------- */
-
-#ifndef MODULE
-
-/*
- * format: baycom_ser_hdx=io,irq,mode
- * mode: ser12    hardware DCD
- *       ser12*   software DCD
- *       ser12@   hardware/software DCD, i.e. no explicit DCD signal but hardware
- *                mutes audio input to the modem
- *       ser12+   hardware DCD, inverted signal at DCD pin
- */
-
-static int __init baycom_ser_hdx_setup(char *str)
-{
-        static unsigned nr_dev;
-	int ints[3];
-
-        if (nr_dev >= NR_PORTS)
-                return 0;
-	str = get_options(str, 3, ints);
-	if (ints[0] < 2)
-		return 0;
-	mode[nr_dev] = str;
-	iobase[nr_dev] = ints[1];
-	irq[nr_dev] = ints[2];
-	nr_dev++;
-	return 1;
-}
-
-__setup("baycom_ser_hdx=", baycom_ser_hdx_setup);
-
-#endif /* MODULE */
-/* --------------------------------------------------------------------- */
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
deleted file mode 100644
index 214fd1f819a1..000000000000
--- a/drivers/net/hamradio/bpqether.c
+++ /dev/null
@@ -1,593 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *	G8BPQ compatible "AX.25 via ethernet" driver release 004
- *
- *	This code REQUIRES 2.0.0 or higher/ NET3.029
- *
- *	This is a "pseudo" network driver to allow AX.25 over Ethernet
- *	using G8BPQ encapsulation. It has been extracted from the protocol
- *	implementation because
- *
- *		- things got unreadable within the protocol stack
- *		- to cure the protocol stack from "feature-ism"
- *		- a protocol implementation shouldn't need to know on
- *		  which hardware it is running
- *		- user-level programs like the AX.25 utilities shouldn't
- *		  need to know about the hardware.
- *		- IP over ethernet encapsulated AX.25 was impossible
- *		- rxecho.c did not work
- *		- to have room for extensions
- *		- it just deserves to "live" as an own driver
- *
- *	This driver can use any ethernet destination address, and can be
- *	limited to accept frames from one dedicated ethernet card only.
- *
- *	Note that the driver sets up the BPQ devices automagically on
- *	startup or (if started before the "insmod" of an ethernet device)
- *	on "ifconfig up". It hopefully will remove the BPQ on "rmmod"ing
- *	the ethernet device (in fact: as soon as another ethernet or bpq
- *	device gets "ifconfig"ured).
- *
- *	I have heard that several people are thinking of experiments
- *	with highspeed packet radio using existing ethernet cards.
- *	Well, this driver is prepared for this purpose, just add
- *	your tx key control and a txdelay / tailtime algorithm,
- *	probably some buffering, and /voila/...
- *
- *	History
- *	BPQ   001	Joerg(DL1BKE)		Extracted BPQ code from AX.25
- *						protocol stack and added my own
- *						yet existing patches
- *	BPQ   002	Joerg(DL1BKE)		Scan network device list on
- *						startup.
- *	BPQ   003	Joerg(DL1BKE)		Ethernet destination address
- *						and accepted source address
- *						can be configured by an ioctl()
- *						call.
- *						Fixed to match Linux networking
- *						changes - 2.1.15.
- *	BPQ   004	Joerg(DL1BKE)		Fixed to not lock up on ifconfig.
- */
-
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_arp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/stat.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/rtnetlink.h>
-
-#include <net/ip.h>
-#include <net/arp.h>
-#include <net/netdev_lock.h>
-#include <net/net_namespace.h>
-
-#include <linux/bpqether.h>
-
-static const char banner[] __initconst = KERN_INFO \
-	"AX.25: bpqether driver version 004\n";
-
-static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
-static int bpq_device_event(struct notifier_block *, unsigned long, void *);
-
-static struct packet_type bpq_packet_type __read_mostly = {
-	.type	= cpu_to_be16(ETH_P_BPQ),
-	.func	= bpq_rcv,
-};
-
-static struct notifier_block bpq_dev_notifier = {
-	.notifier_call = bpq_device_event,
-};
-
-
-struct bpqdev {
-	struct list_head bpq_list;	/* list of bpq devices chain */
-	struct net_device *ethdev;	/* link to ethernet device */
-	struct net_device *axdev;	/* bpq device (bpq#) */
-	char   dest_addr[6];		/* ether destination address */
-	char   acpt_addr[6];		/* accept ether frames from this address only */
-};
-
-static LIST_HEAD(bpq_devices);
-
-/* ------------------------------------------------------------------------ */
-
-
-/*
- *	Get the ethernet device for a BPQ device
- */
-static inline struct net_device *bpq_get_ether_dev(struct net_device *dev)
-{
-	struct bpqdev *bpq = netdev_priv(dev);
-
-	return bpq ? bpq->ethdev : NULL;
-}
-
-/*
- *	Get the BPQ device for the ethernet device
- */
-static inline struct net_device *bpq_get_ax25_dev(struct net_device *dev)
-{
-	struct bpqdev *bpq;
-
-	list_for_each_entry_rcu(bpq, &bpq_devices, bpq_list,
-				lockdep_rtnl_is_held()) {
-		if (bpq->ethdev == dev)
-			return bpq->axdev;
-	}
-	return NULL;
-}
-
-static inline int dev_is_ethdev(struct net_device *dev)
-{
-	return dev->type == ARPHRD_ETHER && !netdev_need_ops_lock(dev);
-}
-
-/* ------------------------------------------------------------------------ */
-
-
-/*
- *	Receive an AX.25 frame via an ethernet interface.
- */
-static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev)
-{
-	int len;
-	char * ptr;
-	struct ethhdr *eth;
-	struct bpqdev *bpq;
-
-	if (!net_eq(dev_net(dev), &init_net))
-		goto drop;
-
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
-		return NET_RX_DROP;
-
-	if (!pskb_may_pull(skb, sizeof(struct ethhdr)))
-		goto drop;
-
-	rcu_read_lock();
-	dev = bpq_get_ax25_dev(dev);
-
-	if (dev == NULL || !netif_running(dev)) 
-		goto drop_unlock;
-
-	/*
-	 * if we want to accept frames from just one ethernet device
-	 * we check the source address of the sender.
-	 */
-
-	bpq = netdev_priv(dev);
-
-	eth = eth_hdr(skb);
-
-	if (!(bpq->acpt_addr[0] & 0x01) &&
-	    !ether_addr_equal(eth->h_source, bpq->acpt_addr))
-		goto drop_unlock;
-
-	if (skb_cow(skb, sizeof(struct ethhdr)))
-		goto drop_unlock;
-
-	len = skb->data[0] + skb->data[1] * 256 - 5;
-
-	if (len < 0 || len > skb->len - 2)
-		goto drop_unlock;
-
-	skb_pull(skb, 2);	/* Remove the length bytes */
-	skb_trim(skb, len);	/* Set the length of the data */
-
-	dev->stats.rx_packets++;
-	dev->stats.rx_bytes += len;
-
-	ptr = skb_push(skb, 1);
-	*ptr = 0;
-
-	skb->protocol = ax25_type_trans(skb, dev);
-	netif_rx(skb);
-unlock:
-
-	rcu_read_unlock();
-
-	return 0;
-drop_unlock:
-	kfree_skb(skb);
-	goto unlock;
-
-drop:
-	kfree_skb(skb);
-	return 0;
-}
-
-/*
- * 	Send an AX.25 frame via an ethernet interface
- */
-static netdev_tx_t bpq_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	unsigned char *ptr;
-	struct bpqdev *bpq;
-	struct net_device *orig_dev;
-	int size;
-
-	if (skb->protocol == htons(ETH_P_IP))
-		return ax25_ip_xmit(skb);
-
-	/*
-	 * Just to be *really* sure not to send anything if the interface
-	 * is down, the ethernet device may have gone.
-	 */
-	if (!netif_running(dev)) {
-		kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-
-	skb_pull(skb, 1);			/* Drop KISS byte */
-	size = skb->len;
-
-	/*
-	 * We're about to mess with the skb which may still shared with the
-	 * generic networking code so unshare and ensure it's got enough
-	 * space for the BPQ headers.
-	 */
-	if (skb_cow(skb, AX25_BPQ_HEADER_LEN)) {
-		if (net_ratelimit())
-			pr_err("bpqether: out of memory\n");
-		kfree_skb(skb);
-
-		return NETDEV_TX_OK;
-	}
-
-	ptr = skb_push(skb, 2);			/* Make space for length */
-
-	*ptr++ = (size + 5) % 256;
-	*ptr++ = (size + 5) / 256;
-
-	bpq = netdev_priv(dev);
-
-	orig_dev = dev;
-	if ((dev = bpq_get_ether_dev(dev)) == NULL) {
-		orig_dev->stats.tx_dropped++;
-		kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-
-	skb->protocol = ax25_type_trans(skb, dev);
-	skb_reset_network_header(skb);
-	dev_hard_header(skb, dev, ETH_P_BPQ, bpq->dest_addr, NULL, 0);
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes+=skb->len;
-  
-	dev_queue_xmit(skb);
-	netif_wake_queue(dev);
-	return NETDEV_TX_OK;
-}
-
-/*
- *	Set AX.25 callsign
- */
-static int bpq_set_mac_address(struct net_device *dev, void *addr)
-{
-    struct sockaddr *sa = (struct sockaddr *)addr;
-
-    dev_addr_set(dev, sa->sa_data);
-
-    return 0;
-}
-
-/*	Ioctl commands
- *
- *		SIOCSBPQETHOPT		reserved for enhancements
- *		SIOCSBPQETHADDR		set the destination and accepted
- *					source ethernet address (broadcast
- *					or multicast: accept all)
- */
-static int bpq_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
-			      void __user *data, int cmd)
-{
-	struct bpq_ethaddr __user *ethaddr = data;
-	struct bpqdev *bpq = netdev_priv(dev);
-	struct bpq_req req;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	switch (cmd) {
-		case SIOCSBPQETHOPT:
-			if (copy_from_user(&req, data, sizeof(struct bpq_req)))
-				return -EFAULT;
-			switch (req.cmd) {
-				case SIOCGBPQETHPARAM:
-				case SIOCSBPQETHPARAM:
-				default:
-					return -EINVAL;
-			}
-
-			break;
-
-		case SIOCSBPQETHADDR:
-			if (copy_from_user(bpq->dest_addr, ethaddr->destination, ETH_ALEN))
-				return -EFAULT;
-			if (copy_from_user(bpq->acpt_addr, ethaddr->accept, ETH_ALEN))
-				return -EFAULT;
-			break;
-
-		default:
-			return -EINVAL;
-	}
-
-	return 0;
-}
-
-/*
- * open/close a device
- */
-static int bpq_open(struct net_device *dev)
-{
-	netif_start_queue(dev);
-	return 0;
-}
-
-static int bpq_close(struct net_device *dev)
-{
-	netif_stop_queue(dev);
-	return 0;
-}
-
-
-/* ------------------------------------------------------------------------ */
-
-#ifdef CONFIG_PROC_FS
-/*
- *	Proc filesystem
- */
-static void *bpq_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(RCU)
-{
-	int i = 1;
-	struct bpqdev *bpqdev;
-
-	rcu_read_lock();
-
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-	
-	list_for_each_entry_rcu(bpqdev, &bpq_devices, bpq_list) {
-		if (i == *pos)
-			return bpqdev;
-	}
-	return NULL;
-}
-
-static void *bpq_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct list_head *p;
-	struct bpqdev *bpqdev = v;
-
-	++*pos;
-
-	if (v == SEQ_START_TOKEN)
-		p = rcu_dereference(list_next_rcu(&bpq_devices));
-	else
-		p = rcu_dereference(list_next_rcu(&bpqdev->bpq_list));
-
-	return (p == &bpq_devices) ? NULL 
-		: list_entry(p, struct bpqdev, bpq_list);
-}
-
-static void bpq_seq_stop(struct seq_file *seq, void *v)
-	__releases(RCU)
-{
-	rcu_read_unlock();
-}
-
-
-static int bpq_seq_show(struct seq_file *seq, void *v)
-{
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, 
-			 "dev   ether      destination        accept from\n");
-	else {
-		const struct bpqdev *bpqdev = v;
-
-		seq_printf(seq, "%-5s %-10s %pM  ",
-			bpqdev->axdev->name, bpqdev->ethdev->name,
-			bpqdev->dest_addr);
-
-		if (is_multicast_ether_addr(bpqdev->acpt_addr))
-			seq_printf(seq, "*\n");
-		else
-			seq_printf(seq, "%pM\n", bpqdev->acpt_addr);
-
-	}
-	return 0;
-}
-
-static const struct seq_operations bpq_seqops = {
-	.start = bpq_seq_start,
-	.next = bpq_seq_next,
-	.stop = bpq_seq_stop,
-	.show = bpq_seq_show,
-};
-#endif
-/* ------------------------------------------------------------------------ */
-
-static const struct net_device_ops bpq_netdev_ops = {
-	.ndo_open	     = bpq_open,
-	.ndo_stop	     = bpq_close,
-	.ndo_start_xmit	     = bpq_xmit,
-	.ndo_set_mac_address = bpq_set_mac_address,
-	.ndo_siocdevprivate  = bpq_siocdevprivate,
-};
-
-static void bpq_setup(struct net_device *dev)
-{
-	netdev_lockdep_set_classes(dev);
-
-	dev->netdev_ops	     = &bpq_netdev_ops;
-	dev->needs_free_netdev = true;
-
-	dev->flags      = 0;
-	dev->lltx = true;	/* Allow recursion */
-
-#if IS_ENABLED(CONFIG_AX25)
-	dev->header_ops      = &ax25_header_ops;
-#endif
-
-	dev->type            = ARPHRD_AX25;
-	dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN;
-	dev->mtu             = AX25_DEF_PACLEN;
-	dev->addr_len        = AX25_ADDR_LEN;
-
-	memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN);
-	dev_addr_set(dev, (u8 *)&ax25_defaddr);
-}
-
-/*
- *	Setup a new device.
- */
-static int bpq_new_device(struct net_device *edev)
-{
-	int err;
-	struct net_device *ndev;
-	struct bpqdev *bpq;
-
-	ndev = alloc_netdev(sizeof(struct bpqdev), "bpq%d", NET_NAME_UNKNOWN,
-			    bpq_setup);
-	if (!ndev)
-		return -ENOMEM;
-
-		
-	bpq = netdev_priv(ndev);
-	dev_hold(edev);
-	bpq->ethdev = edev;
-	bpq->axdev = ndev;
-
-	eth_broadcast_addr(bpq->dest_addr);
-	eth_broadcast_addr(bpq->acpt_addr);
-
-	err = register_netdevice(ndev);
-	if (err)
-		goto error;
-
-	/* List protected by RTNL */
-	list_add_rcu(&bpq->bpq_list, &bpq_devices);
-	return 0;
-
- error:
-	dev_put(edev);
-	free_netdev(ndev);
-	return err;
-	
-}
-
-static void bpq_free_device(struct net_device *ndev)
-{
-	struct bpqdev *bpq = netdev_priv(ndev);
-
-	dev_put(bpq->ethdev);
-	list_del_rcu(&bpq->bpq_list);
-
-	unregister_netdevice(ndev);
-}
-
-/*
- *	Handle device status changes.
- */
-static int bpq_device_event(struct notifier_block *this,
-			    unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
-
-	if (!dev_is_ethdev(dev) && !bpq_get_ax25_dev(dev))
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case NETDEV_UP:		/* new ethernet device -> new BPQ interface */
-		if (bpq_get_ax25_dev(dev) == NULL)
-			bpq_new_device(dev);
-		break;
-
-	case NETDEV_DOWN:	/* ethernet device closed -> close BPQ interface */
-		if ((dev = bpq_get_ax25_dev(dev)) != NULL)
-			dev_close(dev);
-		break;
-
-	case NETDEV_UNREGISTER:	/* ethernet device removed -> free BPQ interface */
-		if ((dev = bpq_get_ax25_dev(dev)) != NULL)
-			bpq_free_device(dev);
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_DONE;
-}
-
-
-/* ------------------------------------------------------------------------ */
-
-/*
- * Initialize driver. To be called from af_ax25 if not compiled as a
- * module
- */
-static int __init bpq_init_driver(void)
-{
-#ifdef CONFIG_PROC_FS
-	if (!proc_create_seq("bpqether", 0444, init_net.proc_net, &bpq_seqops)) {
-		printk(KERN_ERR
-			"bpq: cannot create /proc/net/bpqether entry.\n");
-		return -ENOENT;
-	}
-#endif  /* CONFIG_PROC_FS */
-
-	dev_add_pack(&bpq_packet_type);
-
-	register_netdevice_notifier(&bpq_dev_notifier);
-
-	printk(banner);
-
-	return 0;
-}
-
-static void __exit bpq_cleanup_driver(void)
-{
-	struct bpqdev *bpq;
-
-	dev_remove_pack(&bpq_packet_type);
-
-	unregister_netdevice_notifier(&bpq_dev_notifier);
-
-	remove_proc_entry("bpqether", init_net.proc_net);
-
-	rtnl_lock();
-	while (!list_empty(&bpq_devices)) {
-		bpq = list_entry(bpq_devices.next, struct bpqdev, bpq_list);
-		bpq_free_device(bpq->axdev);
-	}
-	rtnl_unlock();
-}
-
-MODULE_AUTHOR("Joerg Reuter DL1BKE <jreuter@yaina.de>");
-MODULE_DESCRIPTION("Transmit and receive AX.25 packets over Ethernet");
-MODULE_LICENSE("GPL");
-module_init(bpq_init_driver);
-module_exit(bpq_cleanup_driver);
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
deleted file mode 100644
index 3b88e465d08f..000000000000
--- a/drivers/net/hamradio/hdlcdrv.c
+++ /dev/null
@@ -1,747 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*****************************************************************************/
-
-/*
- *	hdlcdrv.c  -- HDLC packet radio network driver.
- *
- *	Copyright (C) 1996-2000  Thomas Sailer (sailer@ife.ee.ethz.ch)
- *
- *  Please note that the GPL allows you to use the driver, NOT the radio.
- *  In order to use the radio, you need a license from the communications
- *  authority of your country.
- *
- *  The driver was derived from Donald Beckers skeleton.c
- *	Written 1993-94 by Donald Becker.
- *
- *  History:
- *   0.1  21.09.1996  Started
- *        18.10.1996  Changed to new user space access routines 
- *                    (copy_{to,from}_user)
- *   0.2  21.11.1996  various small changes
- *   0.3  03.03.1997  fixed (hopefully) IP not working with ax.25 as a module
- *   0.4  16.04.1997  init code/data tagged
- *   0.5  30.07.1997  made HDLC buffers bigger (solves a problem with the
- *                    soundmodem driver)
- *   0.6  05.04.1998  add spinlocks
- *   0.7  03.08.1999  removed some old compatibility cruft
- *   0.8  12.02.2000  adapted to softnet driver interface
- */
-
-/*****************************************************************************/
-
-#include <linux/capability.h>
-#include <linux/compat.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/net.h>
-#include <linux/in.h>
-#include <linux/if.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/skbuff.h>
-#include <linux/hdlcdrv.h>
-#include <linux/random.h>
-#include <net/ax25.h> 
-#include <linux/uaccess.h>
-
-#include <linux/crc-ccitt.h>
-
-/* --------------------------------------------------------------------- */
-
-#define KISS_VERBOSE
-
-/* --------------------------------------------------------------------- */
-
-#define PARAM_TXDELAY   1
-#define PARAM_PERSIST   2
-#define PARAM_SLOTTIME  3
-#define PARAM_TXTAIL    4
-#define PARAM_FULLDUP   5
-#define PARAM_HARDWARE  6
-#define PARAM_RETURN    255
-
-/* --------------------------------------------------------------------- */
-/*
- * the CRC routines are stolen from WAMPES
- * by Dieter Deyke
- */
-
-
-/*---------------------------------------------------------------------------*/
-
-static inline void append_crc_ccitt(unsigned char *buffer, int len)
-{
-	unsigned int crc = crc_ccitt(0xffff, buffer, len) ^ 0xffff;
-	buffer += len;
-	*buffer++ = crc;
-	*buffer++ = crc >> 8;
-}
-
-/*---------------------------------------------------------------------------*/
-
-static inline int check_crc_ccitt(const unsigned char *buf, int cnt)
-{
-	return (crc_ccitt(0xffff, buf, cnt) & 0xffff) == 0xf0b8;
-}
-
-/*---------------------------------------------------------------------------*/
-
-#if 0
-static int calc_crc_ccitt(const unsigned char *buf, int cnt)
-{
-	unsigned int crc = 0xffff;
-
-	for (; cnt > 0; cnt--)
-		crc = (crc >> 8) ^ crc_ccitt_table[(crc ^ *buf++) & 0xff];
-	crc ^= 0xffff;
-	return crc & 0xffff;
-}
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#define tenms_to_2flags(s,tenms) ((tenms * s->par.bitrate) / 100 / 16)
-
-/* ---------------------------------------------------------------------- */
-/*
- * The HDLC routines
- */
-
-static int hdlc_rx_add_bytes(struct hdlcdrv_state *s, unsigned int bits, 
-			     int num)
-{
-	int added = 0;
-	
-	while (s->hdlcrx.rx_state && num >= 8) {
-		if (s->hdlcrx.len >= sizeof(s->hdlcrx.buffer)) {
-			s->hdlcrx.rx_state = 0;
-			return 0;
-		}
-		*s->hdlcrx.bp++ = bits >> (32-num);
-		s->hdlcrx.len++;
-		num -= 8;
-		added += 8;
-	}
-	return added;
-}
-
-static void hdlc_rx_flag(struct net_device *dev, struct hdlcdrv_state *s)
-{
-	struct sk_buff *skb;
-	int pkt_len;
-	unsigned char *cp;
-
-	if (s->hdlcrx.len < 4) 
-		return;
-	if (!check_crc_ccitt(s->hdlcrx.buffer, s->hdlcrx.len)) 
-		return;
-	pkt_len = s->hdlcrx.len - 2 + 1; /* KISS kludge */
-	if (!(skb = dev_alloc_skb(pkt_len))) {
-		printk("%s: memory squeeze, dropping packet\n", dev->name);
-		dev->stats.rx_dropped++;
-		return;
-	}
-	cp = skb_put(skb, pkt_len);
-	*cp++ = 0; /* KISS kludge */
-	memcpy(cp, s->hdlcrx.buffer, pkt_len - 1);
-	skb->protocol = ax25_type_trans(skb, dev);
-	netif_rx(skb);
-	dev->stats.rx_packets++;
-}
-
-void hdlcdrv_receiver(struct net_device *dev, struct hdlcdrv_state *s)
-{
-	int i;
-	unsigned int mask1, mask2, mask3, mask4, mask5, mask6, word;
-	
-	if (!s || s->magic != HDLCDRV_MAGIC) 
-		return;
-	if (test_and_set_bit(0, &s->hdlcrx.in_hdlc_rx))
-		return;
-
-	while (!hdlcdrv_hbuf_empty(&s->hdlcrx.hbuf)) {
-		word = hdlcdrv_hbuf_get(&s->hdlcrx.hbuf);	
-
-#ifdef HDLCDRV_DEBUG
-		hdlcdrv_add_bitbuffer_word(&s->bitbuf_hdlc, word);
-#endif /* HDLCDRV_DEBUG */
-	       	s->hdlcrx.bitstream >>= 16;
-		s->hdlcrx.bitstream |= word << 16;
-		s->hdlcrx.bitbuf >>= 16;
-		s->hdlcrx.bitbuf |= word << 16;
-		s->hdlcrx.numbits += 16;
-		for(i = 15, mask1 = 0x1fc00, mask2 = 0x1fe00, mask3 = 0x0fc00,
-		    mask4 = 0x1f800, mask5 = 0xf800, mask6 = 0xffff; 
-		    i >= 0; 
-		    i--, mask1 <<= 1, mask2 <<= 1, mask3 <<= 1, mask4 <<= 1, 
-		    mask5 <<= 1, mask6 = (mask6 << 1) | 1) {
-			if ((s->hdlcrx.bitstream & mask1) == mask1)
-				s->hdlcrx.rx_state = 0; /* abort received */
-			else if ((s->hdlcrx.bitstream & mask2) == mask3) {
-				/* flag received */
-				if (s->hdlcrx.rx_state) {
-					hdlc_rx_add_bytes(s, s->hdlcrx.bitbuf 
-							  << (8+i),
-							  s->hdlcrx.numbits
-							  -8-i);
-					hdlc_rx_flag(dev, s);
-				}
-				s->hdlcrx.len = 0;
-				s->hdlcrx.bp = s->hdlcrx.buffer;
-				s->hdlcrx.rx_state = 1;
-				s->hdlcrx.numbits = i;
-			} else if ((s->hdlcrx.bitstream & mask4) == mask5) {
-				/* stuffed bit */
-				s->hdlcrx.numbits--;
-				s->hdlcrx.bitbuf = (s->hdlcrx.bitbuf & (~mask6)) |
-					((s->hdlcrx.bitbuf & mask6) << 1);
-			}
-		}
-		s->hdlcrx.numbits -= hdlc_rx_add_bytes(s, s->hdlcrx.bitbuf,
-						       s->hdlcrx.numbits);
-	}
-	clear_bit(0, &s->hdlcrx.in_hdlc_rx);
-}
-
-/* ---------------------------------------------------------------------- */
-
-static inline void do_kiss_params(struct hdlcdrv_state *s,
-				  unsigned char *data, unsigned long len)
-{
-
-#ifdef KISS_VERBOSE
-#define PKP(a,b) printk(KERN_INFO "hdlcdrv.c: channel params: " a "\n", b)
-#else /* KISS_VERBOSE */	      
-#define PKP(a,b) 
-#endif /* KISS_VERBOSE */	      
-
-	if (len < 2)
-		return;
-	switch(data[0]) {
-	case PARAM_TXDELAY:
-		s->ch_params.tx_delay = data[1];
-		PKP("TX delay = %ums", 10 * s->ch_params.tx_delay);
-		break;
-	case PARAM_PERSIST:   
-		s->ch_params.ppersist = data[1];
-		PKP("p persistence = %u", s->ch_params.ppersist);
-		break;
-	case PARAM_SLOTTIME:  
-		s->ch_params.slottime = data[1];
-		PKP("slot time = %ums", s->ch_params.slottime);
-		break;
-	case PARAM_TXTAIL:    
-		s->ch_params.tx_tail = data[1];
-		PKP("TX tail = %ums", s->ch_params.tx_tail);
-		break;
-	case PARAM_FULLDUP:   
-		s->ch_params.fulldup = !!data[1];
-		PKP("%s duplex", s->ch_params.fulldup ? "full" : "half");
-		break;
-	default:
-		break;
-	}
-#undef PKP
-}
-
-/* ---------------------------------------------------------------------- */
-
-void hdlcdrv_transmitter(struct net_device *dev, struct hdlcdrv_state *s)
-{
-	unsigned int mask1, mask2, mask3;
-	int i;
-	struct sk_buff *skb;
-	int pkt_len;
-
-	if (!s || s->magic != HDLCDRV_MAGIC) 
-		return;
-	if (test_and_set_bit(0, &s->hdlctx.in_hdlc_tx))
-		return;
-	for (;;) {
-		if (s->hdlctx.numbits >= 16) {
-			if (hdlcdrv_hbuf_full(&s->hdlctx.hbuf)) {
-				clear_bit(0, &s->hdlctx.in_hdlc_tx);
-				return;
-			}
-			hdlcdrv_hbuf_put(&s->hdlctx.hbuf, s->hdlctx.bitbuf);
-			s->hdlctx.bitbuf >>= 16;
-			s->hdlctx.numbits -= 16;
-		}
-		switch (s->hdlctx.tx_state) {
-		default:
-			clear_bit(0, &s->hdlctx.in_hdlc_tx);
-			return;
-		case 0:
-		case 1:
-			if (s->hdlctx.numflags) {
-				s->hdlctx.numflags--;
-				s->hdlctx.bitbuf |= 
-					0x7e7e << s->hdlctx.numbits;
-				s->hdlctx.numbits += 16;
-				break;
-			}
-			if (s->hdlctx.tx_state == 1) {
-				clear_bit(0, &s->hdlctx.in_hdlc_tx);
-				return;
-			}
-			if (!(skb = s->skb)) {
-				int flgs = tenms_to_2flags(s, s->ch_params.tx_tail);
-				if (flgs < 2)
-					flgs = 2;
-				s->hdlctx.tx_state = 1;
-				s->hdlctx.numflags = flgs;
-				break;
-			}
-			s->skb = NULL;
-			netif_wake_queue(dev);
-			pkt_len = skb->len-1; /* strip KISS byte */
-			if (pkt_len >= HDLCDRV_MAXFLEN || pkt_len < 2) {
-				s->hdlctx.tx_state = 0;
-				s->hdlctx.numflags = 1;
-				dev_kfree_skb_irq(skb);
-				break;
-			}
-			skb_copy_from_linear_data_offset(skb, 1,
-							 s->hdlctx.buffer,
-							 pkt_len);
-			dev_kfree_skb_irq(skb);
-			s->hdlctx.bp = s->hdlctx.buffer;
-			append_crc_ccitt(s->hdlctx.buffer, pkt_len);
-			s->hdlctx.len = pkt_len+2; /* the appended CRC */
-			s->hdlctx.tx_state = 2;
-			s->hdlctx.bitstream = 0;
-			dev->stats.tx_packets++;
-			break;
-		case 2:
-			if (!s->hdlctx.len) {
-				s->hdlctx.tx_state = 0;
-				s->hdlctx.numflags = 1;
-				break;
-			}
-			s->hdlctx.len--;
-			s->hdlctx.bitbuf |= *s->hdlctx.bp <<
-				s->hdlctx.numbits;
-			s->hdlctx.bitstream >>= 8;
-			s->hdlctx.bitstream |= (*s->hdlctx.bp++) << 16;
-			mask1 = 0x1f000;
-			mask2 = 0x10000;
-			mask3 = 0xffffffff >> (31-s->hdlctx.numbits);
-			s->hdlctx.numbits += 8;
-			for(i = 0; i < 8; i++, mask1 <<= 1, mask2 <<= 1, 
-			    mask3 = (mask3 << 1) | 1) {
-				if ((s->hdlctx.bitstream & mask1) != mask1) 
-					continue;
-				s->hdlctx.bitstream &= ~mask2;
-				s->hdlctx.bitbuf = 
-					(s->hdlctx.bitbuf & mask3) |
-						((s->hdlctx.bitbuf & 
-						 (~mask3)) << 1);
-				s->hdlctx.numbits++;
-				mask3 = (mask3 << 1) | 1;
-			}
-			break;
-		}
-	}
-}
-
-/* ---------------------------------------------------------------------- */
-
-static void start_tx(struct net_device *dev, struct hdlcdrv_state *s)
-{
-	s->hdlctx.tx_state = 0;
-	s->hdlctx.numflags = tenms_to_2flags(s, s->ch_params.tx_delay);
-	s->hdlctx.bitbuf = s->hdlctx.bitstream = s->hdlctx.numbits = 0;
-	hdlcdrv_transmitter(dev, s);
-	s->hdlctx.ptt = 1;
-	s->ptt_keyed++;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void hdlcdrv_arbitrate(struct net_device *dev, struct hdlcdrv_state *s)
-{
-	if (!s || s->magic != HDLCDRV_MAGIC || s->hdlctx.ptt || !s->skb) 
-		return;
-	if (s->ch_params.fulldup) {
-		start_tx(dev, s);
-		return;
-	}
-	if (s->hdlcrx.dcd) {
-		s->hdlctx.slotcnt = s->ch_params.slottime;
-		return;
-	}
-	if ((--s->hdlctx.slotcnt) > 0)
-		return;
-	s->hdlctx.slotcnt = s->ch_params.slottime;
-	if (get_random_u8() > s->ch_params.ppersist)
-		return;
-	start_tx(dev, s);
-}
-
-/* --------------------------------------------------------------------- */
-/*
- * ===================== network driver interface =========================
- */
-
-static netdev_tx_t hdlcdrv_send_packet(struct sk_buff *skb,
-				       struct net_device *dev)
-{
-	struct hdlcdrv_state *sm = netdev_priv(dev);
-
-	if (skb->protocol == htons(ETH_P_IP))
-		return ax25_ip_xmit(skb);
-
-	if (skb->data[0] != 0) {
-		do_kiss_params(sm, skb->data, skb->len);
-		dev_kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-	if (sm->skb) {
-		dev_kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-	netif_stop_queue(dev);
-	sm->skb = skb;
-	return NETDEV_TX_OK;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int hdlcdrv_set_mac_address(struct net_device *dev, void *addr)
-{
-	struct sockaddr *sa = (struct sockaddr *)addr;
-
-	/* addr is an AX.25 shifted ASCII mac address */
-	dev_addr_set(dev, sa->sa_data);
-	return 0;                                         
-}
-
-/* --------------------------------------------------------------------- */
-/*
- * Open/initialize the board. This is called (in the current kernel)
- * sometime after booting when the 'ifconfig' program is run.
- *
- * This routine should set everything up anew at each open, even
- * registers that "should" only need to be set once at boot, so that
- * there is non-reboot way to recover if something goes wrong.
- */
-
-static int hdlcdrv_open(struct net_device *dev)
-{
-	struct hdlcdrv_state *s = netdev_priv(dev);
-	int i;
-
-	if (!s->ops || !s->ops->open)
-		return -ENODEV;
-
-	/*
-	 * initialise some variables
-	 */
-	s->opened = 1;
-	s->hdlcrx.hbuf.rd = s->hdlcrx.hbuf.wr = 0;
-	s->hdlcrx.in_hdlc_rx = 0;
-	s->hdlcrx.rx_state = 0;
-	
-	s->hdlctx.hbuf.rd = s->hdlctx.hbuf.wr = 0;
-	s->hdlctx.in_hdlc_tx = 0;
-	s->hdlctx.tx_state = 1;
-	s->hdlctx.numflags = 0;
-	s->hdlctx.bitstream = s->hdlctx.bitbuf = s->hdlctx.numbits = 0;
-	s->hdlctx.ptt = 0;
-	s->hdlctx.slotcnt = s->ch_params.slottime;
-	s->hdlctx.calibrate = 0;
-
-	i = s->ops->open(dev);
-	if (i)
-		return i;
-	netif_start_queue(dev);
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-/* 
- * The inverse routine to hdlcdrv_open(). 
- */
-
-static int hdlcdrv_close(struct net_device *dev)
-{
-	struct hdlcdrv_state *s = netdev_priv(dev);
-	int i = 0;
-
-	netif_stop_queue(dev);
-
-	if (s->ops && s->ops->close)
-		i = s->ops->close(dev);
-	dev_kfree_skb(s->skb);
-	s->skb = NULL;
-	s->opened = 0;
-	return i;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int hdlcdrv_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
-				  void __user *data, int cmd)
-{
-	struct hdlcdrv_state *s = netdev_priv(dev);
-	struct hdlcdrv_ioctl bi;
-
-	if (cmd != SIOCDEVPRIVATE)
-		return -ENOIOCTLCMD;
-
-	if (in_compat_syscall()) /* to be implemented */
-		return -ENOIOCTLCMD;
-
-	if (copy_from_user(&bi, data, sizeof(bi)))
-		return -EFAULT;
-
-	switch (bi.cmd) {
-	default:
-		if (s->ops && s->ops->ioctl)
-			return s->ops->ioctl(dev, data, &bi, cmd);
-		return -ENOIOCTLCMD;
-
-	case HDLCDRVCTL_GETCHANNELPAR:
-		bi.data.cp.tx_delay = s->ch_params.tx_delay;
-		bi.data.cp.tx_tail = s->ch_params.tx_tail;
-		bi.data.cp.slottime = s->ch_params.slottime;
-		bi.data.cp.ppersist = s->ch_params.ppersist;
-		bi.data.cp.fulldup = s->ch_params.fulldup;
-		break;
-
-	case HDLCDRVCTL_SETCHANNELPAR:
-		if (!capable(CAP_NET_ADMIN))
-			return -EACCES;
-		s->ch_params.tx_delay = bi.data.cp.tx_delay;
-		s->ch_params.tx_tail = bi.data.cp.tx_tail;
-		s->ch_params.slottime = bi.data.cp.slottime;
-		s->ch_params.ppersist = bi.data.cp.ppersist;
-		s->ch_params.fulldup = bi.data.cp.fulldup;
-		s->hdlctx.slotcnt = 1;
-		return 0;
-		
-	case HDLCDRVCTL_GETMODEMPAR:
-		bi.data.mp.iobase = dev->base_addr;
-		bi.data.mp.irq = dev->irq;
-		bi.data.mp.dma = dev->dma;
-		bi.data.mp.dma2 = s->ptt_out.dma2;
-		bi.data.mp.seriobase = s->ptt_out.seriobase;
-		bi.data.mp.pariobase = s->ptt_out.pariobase;
-		bi.data.mp.midiiobase = s->ptt_out.midiiobase;
-		break;
-
-	case HDLCDRVCTL_SETMODEMPAR:
-		if ((!capable(CAP_SYS_RAWIO)) || netif_running(dev))
-			return -EACCES;
-		dev->base_addr = bi.data.mp.iobase;
-		dev->irq = bi.data.mp.irq;
-		dev->dma = bi.data.mp.dma;
-		s->ptt_out.dma2 = bi.data.mp.dma2;
-		s->ptt_out.seriobase = bi.data.mp.seriobase;
-		s->ptt_out.pariobase = bi.data.mp.pariobase;
-		s->ptt_out.midiiobase = bi.data.mp.midiiobase;
-		return 0;	
-	
-	case HDLCDRVCTL_GETSTAT:
-		bi.data.cs.ptt = hdlcdrv_ptt(s);
-		bi.data.cs.dcd = s->hdlcrx.dcd;
-		bi.data.cs.ptt_keyed = s->ptt_keyed;
-		bi.data.cs.tx_packets = dev->stats.tx_packets;
-		bi.data.cs.tx_errors = dev->stats.tx_errors;
-		bi.data.cs.rx_packets = dev->stats.rx_packets;
-		bi.data.cs.rx_errors = dev->stats.rx_errors;
-		break;		
-
-	case HDLCDRVCTL_OLDGETSTAT:
-		bi.data.ocs.ptt = hdlcdrv_ptt(s);
-		bi.data.ocs.dcd = s->hdlcrx.dcd;
-		bi.data.ocs.ptt_keyed = s->ptt_keyed;
-		break;		
-
-	case HDLCDRVCTL_CALIBRATE:
-		if(!capable(CAP_SYS_RAWIO))
-			return -EPERM;
-		if (s->par.bitrate <= 0)
-			return -EINVAL;
-		if (bi.data.calibrate > INT_MAX / s->par.bitrate)
-			return -EINVAL;
-		s->hdlctx.calibrate = bi.data.calibrate * s->par.bitrate / 16;
-		return 0;
-
-	case HDLCDRVCTL_GETSAMPLES:
-#ifndef HDLCDRV_DEBUG
-		return -EPERM;
-#else /* HDLCDRV_DEBUG */
-		if (s->bitbuf_channel.rd == s->bitbuf_channel.wr) 
-			return -EAGAIN;
-		bi.data.bits = 
-			s->bitbuf_channel.buffer[s->bitbuf_channel.rd];
-		s->bitbuf_channel.rd = (s->bitbuf_channel.rd+1) %
-			sizeof(s->bitbuf_channel.buffer);
-		break;
-#endif /* HDLCDRV_DEBUG */
-				
-	case HDLCDRVCTL_GETBITS:
-#ifndef HDLCDRV_DEBUG
-		return -EPERM;
-#else /* HDLCDRV_DEBUG */
-		if (s->bitbuf_hdlc.rd == s->bitbuf_hdlc.wr) 
-			return -EAGAIN;
-		bi.data.bits = 
-			s->bitbuf_hdlc.buffer[s->bitbuf_hdlc.rd];
-		s->bitbuf_hdlc.rd = (s->bitbuf_hdlc.rd+1) %
-			sizeof(s->bitbuf_hdlc.buffer);
-		break;		
-#endif /* HDLCDRV_DEBUG */
-
-	case HDLCDRVCTL_DRIVERNAME:
-		if (s->ops && s->ops->drvname) {
-			strscpy(bi.data.drivername, s->ops->drvname,
-				sizeof(bi.data.drivername));
-			break;
-		}
-		bi.data.drivername[0] = '\0';
-		break;
-		
-	}
-	if (copy_to_user(data, &bi, sizeof(bi)))
-		return -EFAULT;
-	return 0;
-
-}
-
-/* --------------------------------------------------------------------- */
-
-static const struct net_device_ops hdlcdrv_netdev = {
-	.ndo_open	= hdlcdrv_open,
-	.ndo_stop	= hdlcdrv_close,
-	.ndo_start_xmit = hdlcdrv_send_packet,
-	.ndo_siocdevprivate  = hdlcdrv_siocdevprivate,
-	.ndo_set_mac_address = hdlcdrv_set_mac_address,
-};
-
-/*
- * Initialize fields in hdlcdrv
- */
-static void hdlcdrv_setup(struct net_device *dev)
-{
-	static const struct hdlcdrv_channel_params dflt_ch_params = { 
-		20, 2, 10, 40, 0 
-	};
-	struct hdlcdrv_state *s = netdev_priv(dev);
-
-	/*
-	 * initialize the hdlcdrv_state struct
-	 */
-	s->ch_params = dflt_ch_params;
-	s->ptt_keyed = 0;
-
-	spin_lock_init(&s->hdlcrx.hbuf.lock);
-	s->hdlcrx.hbuf.rd = s->hdlcrx.hbuf.wr = 0;
-	s->hdlcrx.in_hdlc_rx = 0;
-	s->hdlcrx.rx_state = 0;
-	
-	spin_lock_init(&s->hdlctx.hbuf.lock);
-	s->hdlctx.hbuf.rd = s->hdlctx.hbuf.wr = 0;
-	s->hdlctx.in_hdlc_tx = 0;
-	s->hdlctx.tx_state = 1;
-	s->hdlctx.numflags = 0;
-	s->hdlctx.bitstream = s->hdlctx.bitbuf = s->hdlctx.numbits = 0;
-	s->hdlctx.ptt = 0;
-	s->hdlctx.slotcnt = s->ch_params.slottime;
-	s->hdlctx.calibrate = 0;
-
-#ifdef HDLCDRV_DEBUG
-	s->bitbuf_channel.rd = s->bitbuf_channel.wr = 0;
-	s->bitbuf_channel.shreg = 0x80;
-
-	s->bitbuf_hdlc.rd = s->bitbuf_hdlc.wr = 0;
-	s->bitbuf_hdlc.shreg = 0x80;
-#endif /* HDLCDRV_DEBUG */
-
-
-	/* Fill in the fields of the device structure */
-
-	s->skb = NULL;
-	
-	dev->netdev_ops = &hdlcdrv_netdev;
-	dev->header_ops = &ax25_header_ops;
-	
-	dev->type = ARPHRD_AX25;           /* AF_AX25 device */
-	dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN;
-	dev->mtu = AX25_DEF_PACLEN;        /* eth_mtu is the default */
-	dev->addr_len = AX25_ADDR_LEN;     /* sizeof an ax.25 address */
-	memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN);
-	dev_addr_set(dev, (u8 *)&ax25_defaddr);
-	dev->tx_queue_len = 16;
-}
-
-/* --------------------------------------------------------------------- */
-struct net_device *hdlcdrv_register(const struct hdlcdrv_ops *ops,
-				    unsigned int privsize, const char *ifname,
-				    unsigned int baseaddr, unsigned int irq, 
-				    unsigned int dma) 
-{
-	struct net_device *dev;
-	struct hdlcdrv_state *s;
-	int err;
-
-	if (privsize < sizeof(struct hdlcdrv_state))
-		privsize = sizeof(struct hdlcdrv_state);
-
-	dev = alloc_netdev(privsize, ifname, NET_NAME_UNKNOWN, hdlcdrv_setup);
-	if (!dev)
-		return ERR_PTR(-ENOMEM);
-
-	/*
-	 * initialize part of the hdlcdrv_state struct
-	 */
-	s = netdev_priv(dev);
-	s->magic = HDLCDRV_MAGIC;
-	s->ops = ops;
-	dev->base_addr = baseaddr;
-	dev->irq = irq;
-	dev->dma = dma;
-
-	err = register_netdev(dev);
-	if (err < 0) {
-		printk(KERN_WARNING "hdlcdrv: cannot register net "
-		       "device %s\n", dev->name);
-		free_netdev(dev);
-		dev = ERR_PTR(err);
-	}
-	return dev;
-}
-
-/* --------------------------------------------------------------------- */
-
-void hdlcdrv_unregister(struct net_device *dev) 
-{
-	struct hdlcdrv_state *s = netdev_priv(dev);
-
-	BUG_ON(s->magic != HDLCDRV_MAGIC);
-
-	if (s->opened && s->ops->close)
-		s->ops->close(dev);
-	unregister_netdev(dev);
-	
-	free_netdev(dev);
-}
-
-/* --------------------------------------------------------------------- */
-
-EXPORT_SYMBOL(hdlcdrv_receiver);
-EXPORT_SYMBOL(hdlcdrv_transmitter);
-EXPORT_SYMBOL(hdlcdrv_arbitrate);
-EXPORT_SYMBOL(hdlcdrv_register);
-EXPORT_SYMBOL(hdlcdrv_unregister);
-
-/* --------------------------------------------------------------------- */
-
-MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu");
-MODULE_DESCRIPTION("Packet Radio network interface HDLC encoder/decoder");
-MODULE_LICENSE("GPL");
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
deleted file mode 100644
index 5f38a002bd9e..000000000000
--- a/drivers/net/hamradio/mkiss.c
+++ /dev/null
@@ -1,980 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *
- * Copyright (C) Hans Alblas PE1AYX <hans@esrac.ele.tue.nl>
- * Copyright (C) 2004, 05 Ralf Baechle DL5RB <ralf@linux-mips.org>
- * Copyright (C) 2004, 05 Thomas Osterried DL9SAU <thomas@x-berg.in-berlin.de>
- */
-#include <linux/module.h>
-#include <linux/bitops.h>
-#include <linux/uaccess.h>
-#include <linux/crc16.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/slab.h>
-#include <linux/tty.h>
-#include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <linux/major.h>
-#include <linux/init.h>
-#include <linux/rtnetlink.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/if_arp.h>
-#include <linux/jiffies.h>
-#include <linux/refcount.h>
-
-#include <net/ax25.h>
-
-#define AX_MTU		236
-
-/* some arch define END as assembly function ending, just undef it */
-#undef	END
-/* SLIP/KISS protocol characters. */
-#define END             0300		/* indicates end of frame	*/
-#define ESC             0333		/* indicates byte stuffing	*/
-#define ESC_END         0334		/* ESC ESC_END means END 'data'	*/
-#define ESC_ESC         0335		/* ESC ESC_ESC means ESC 'data'	*/
-
-struct mkiss {
-	struct tty_struct	*tty;	/* ptr to TTY structure		*/
-	struct net_device	*dev;	/* easy for intr handling	*/
-
-	/* These are pointers to the malloc()ed frame buffers. */
-	spinlock_t		buflock;/* lock for rbuf and xbuf */
-	unsigned char		*rbuff;	/* receiver buffer		*/
-	int			rcount;	/* received chars counter       */
-	unsigned char		*xbuff;	/* transmitter buffer		*/
-	unsigned char		*xhead;	/* pointer to next byte to XMIT */
-	int			xleft;	/* bytes left in XMIT queue     */
-
-	/* Detailed SLIP statistics. */
-	int		mtu;		/* Our mtu (to spot changes!)   */
-	int		buffsize;	/* Max buffers sizes            */
-
-	unsigned long	flags;		/* Flag values/ mode etc	*/
-					/* long req'd: used by set_bit --RR */
-#define AXF_INUSE	0		/* Channel in use               */
-#define AXF_ESCAPE	1               /* ESC received                 */
-#define AXF_ERROR	2               /* Parity, etc. error           */
-#define AXF_KEEPTEST	3		/* Keepalive test flag		*/
-#define AXF_OUTWAIT	4		/* is outpacket was flag	*/
-
-	int		mode;
-        int		crcmode;	/* MW: for FlexNet, SMACK etc.  */
-	int		crcauto;	/* CRC auto mode */
-
-#define CRC_MODE_NONE		0
-#define CRC_MODE_FLEX		1
-#define CRC_MODE_SMACK		2
-#define CRC_MODE_FLEX_TEST	3
-#define CRC_MODE_SMACK_TEST	4
-
-	refcount_t		refcnt;
-	struct completion	dead;
-};
-
-/*---------------------------------------------------------------------------*/
-
-static const unsigned short crc_flex_table[] = {
-	0x0f87, 0x1e0e, 0x2c95, 0x3d1c, 0x49a3, 0x582a, 0x6ab1, 0x7b38,
-	0x83cf, 0x9246, 0xa0dd, 0xb154, 0xc5eb, 0xd462, 0xe6f9, 0xf770,
-	0x1f06, 0x0e8f, 0x3c14, 0x2d9d, 0x5922, 0x48ab, 0x7a30, 0x6bb9,
-	0x934e, 0x82c7, 0xb05c, 0xa1d5, 0xd56a, 0xc4e3, 0xf678, 0xe7f1,
-	0x2e85, 0x3f0c, 0x0d97, 0x1c1e, 0x68a1, 0x7928, 0x4bb3, 0x5a3a,
-	0xa2cd, 0xb344, 0x81df, 0x9056, 0xe4e9, 0xf560, 0xc7fb, 0xd672,
-	0x3e04, 0x2f8d, 0x1d16, 0x0c9f, 0x7820, 0x69a9, 0x5b32, 0x4abb,
-	0xb24c, 0xa3c5, 0x915e, 0x80d7, 0xf468, 0xe5e1, 0xd77a, 0xc6f3,
-	0x4d83, 0x5c0a, 0x6e91, 0x7f18, 0x0ba7, 0x1a2e, 0x28b5, 0x393c,
-	0xc1cb, 0xd042, 0xe2d9, 0xf350, 0x87ef, 0x9666, 0xa4fd, 0xb574,
-	0x5d02, 0x4c8b, 0x7e10, 0x6f99, 0x1b26, 0x0aaf, 0x3834, 0x29bd,
-	0xd14a, 0xc0c3, 0xf258, 0xe3d1, 0x976e, 0x86e7, 0xb47c, 0xa5f5,
-	0x6c81, 0x7d08, 0x4f93, 0x5e1a, 0x2aa5, 0x3b2c, 0x09b7, 0x183e,
-	0xe0c9, 0xf140, 0xc3db, 0xd252, 0xa6ed, 0xb764, 0x85ff, 0x9476,
-	0x7c00, 0x6d89, 0x5f12, 0x4e9b, 0x3a24, 0x2bad, 0x1936, 0x08bf,
-	0xf048, 0xe1c1, 0xd35a, 0xc2d3, 0xb66c, 0xa7e5, 0x957e, 0x84f7,
-	0x8b8f, 0x9a06, 0xa89d, 0xb914, 0xcdab, 0xdc22, 0xeeb9, 0xff30,
-	0x07c7, 0x164e, 0x24d5, 0x355c, 0x41e3, 0x506a, 0x62f1, 0x7378,
-	0x9b0e, 0x8a87, 0xb81c, 0xa995, 0xdd2a, 0xcca3, 0xfe38, 0xefb1,
-	0x1746, 0x06cf, 0x3454, 0x25dd, 0x5162, 0x40eb, 0x7270, 0x63f9,
-	0xaa8d, 0xbb04, 0x899f, 0x9816, 0xeca9, 0xfd20, 0xcfbb, 0xde32,
-	0x26c5, 0x374c, 0x05d7, 0x145e, 0x60e1, 0x7168, 0x43f3, 0x527a,
-	0xba0c, 0xab85, 0x991e, 0x8897, 0xfc28, 0xeda1, 0xdf3a, 0xceb3,
-	0x3644, 0x27cd, 0x1556, 0x04df, 0x7060, 0x61e9, 0x5372, 0x42fb,
-	0xc98b, 0xd802, 0xea99, 0xfb10, 0x8faf, 0x9e26, 0xacbd, 0xbd34,
-	0x45c3, 0x544a, 0x66d1, 0x7758, 0x03e7, 0x126e, 0x20f5, 0x317c,
-	0xd90a, 0xc883, 0xfa18, 0xeb91, 0x9f2e, 0x8ea7, 0xbc3c, 0xadb5,
-	0x5542, 0x44cb, 0x7650, 0x67d9, 0x1366, 0x02ef, 0x3074, 0x21fd,
-	0xe889, 0xf900, 0xcb9b, 0xda12, 0xaead, 0xbf24, 0x8dbf, 0x9c36,
-	0x64c1, 0x7548, 0x47d3, 0x565a, 0x22e5, 0x336c, 0x01f7, 0x107e,
-	0xf808, 0xe981, 0xdb1a, 0xca93, 0xbe2c, 0xafa5, 0x9d3e, 0x8cb7,
-	0x7440, 0x65c9, 0x5752, 0x46db, 0x3264, 0x23ed, 0x1176, 0x00ff
-};
-
-static unsigned short calc_crc_flex(unsigned char *cp, int size)
-{
-	unsigned short crc = 0xffff;
-
-	while (size--)
-		crc = (crc << 8) ^ crc_flex_table[((crc >> 8) ^ *cp++) & 0xff];
-
-	return crc;
-}
-
-static int check_crc_flex(unsigned char *cp, int size)
-{
-	unsigned short crc = 0xffff;
-
-	if (size < 3)
-		return -1;
-
-	while (size--)
-		crc = (crc << 8) ^ crc_flex_table[((crc >> 8) ^ *cp++) & 0xff];
-
-	if ((crc & 0xffff) != 0x7070)
-		return -1;
-
-	return 0;
-}
-
-static int check_crc_16(unsigned char *cp, int size)
-{
-	unsigned short crc = 0x0000;
-
-	if (size < 3)
-		return -1;
-
-	crc = crc16(0, cp, size);
-
-	if (crc != 0x0000)
-		return -1;
-
-	return 0;
-}
-
-/*
- * Standard encapsulation
- */
-
-static int kiss_esc(unsigned char *s, unsigned char *d, int len)
-{
-	unsigned char *ptr = d;
-	unsigned char c;
-
-	/*
-	 * Send an initial END character to flush out any data that may have
-	 * accumulated in the receiver due to line noise.
-	 */
-
-	*ptr++ = END;
-
-	while (len-- > 0) {
-		switch (c = *s++) {
-		case END:
-			*ptr++ = ESC;
-			*ptr++ = ESC_END;
-			break;
-		case ESC:
-			*ptr++ = ESC;
-			*ptr++ = ESC_ESC;
-			break;
-		default:
-			*ptr++ = c;
-			break;
-		}
-	}
-
-	*ptr++ = END;
-
-	return ptr - d;
-}
-
-/*
- * MW:
- * OK its ugly, but tell me a better solution without copying the
- * packet to a temporary buffer :-)
- */
-static int kiss_esc_crc(unsigned char *s, unsigned char *d, unsigned short crc,
-	int len)
-{
-	unsigned char *ptr = d;
-	unsigned char c=0;
-
-	*ptr++ = END;
-	while (len > 0) {
-		if (len > 2)
-			c = *s++;
-		else if (len > 1)
-			c = crc >> 8;
-		else
-			c = crc & 0xff;
-
-		len--;
-
-		switch (c) {
-		case END:
-			*ptr++ = ESC;
-			*ptr++ = ESC_END;
-			break;
-		case ESC:
-			*ptr++ = ESC;
-			*ptr++ = ESC_ESC;
-			break;
-		default:
-			*ptr++ = c;
-			break;
-		}
-	}
-	*ptr++ = END;
-
-	return ptr - d;
-}
-
-/* Send one completely decapsulated AX.25 packet to the AX.25 layer. */
-static void ax_bump(struct mkiss *ax)
-{
-	struct sk_buff *skb;
-	int count;
-
-	spin_lock_bh(&ax->buflock);
-	if (ax->rbuff[0] > 0x0f) {
-		if (ax->rbuff[0] & 0x80) {
-			if (check_crc_16(ax->rbuff, ax->rcount) < 0) {
-				ax->dev->stats.rx_errors++;
-				spin_unlock_bh(&ax->buflock);
-
-				return;
-			}
-			if (ax->crcmode != CRC_MODE_SMACK && ax->crcauto) {
-				printk(KERN_INFO
-				       "mkiss: %s: Switching to crc-smack\n",
-				       ax->dev->name);
-				ax->crcmode = CRC_MODE_SMACK;
-			}
-			ax->rcount -= 2;
-			*ax->rbuff &= ~0x80;
-		} else if (ax->rbuff[0] & 0x20)  {
-			if (check_crc_flex(ax->rbuff, ax->rcount) < 0) {
-				ax->dev->stats.rx_errors++;
-				spin_unlock_bh(&ax->buflock);
-				return;
-			}
-			if (ax->crcmode != CRC_MODE_FLEX && ax->crcauto) {
-				printk(KERN_INFO
-				       "mkiss: %s: Switching to crc-flexnet\n",
-				       ax->dev->name);
-				ax->crcmode = CRC_MODE_FLEX;
-			}
-			ax->rcount -= 2;
-
-			/*
-			 * dl9sau bugfix: the trailling two bytes flexnet crc
-			 * will not be passed to the kernel. thus we have to
-			 * correct the kissparm signature, because it indicates
-			 * a crc but there's none
-			 */
-			*ax->rbuff &= ~0x20;
-		}
-	}
-
-	count = ax->rcount;
-
-	if ((skb = dev_alloc_skb(count)) == NULL) {
-		printk(KERN_ERR "mkiss: %s: memory squeeze, dropping packet.\n",
-		       ax->dev->name);
-		ax->dev->stats.rx_dropped++;
-		spin_unlock_bh(&ax->buflock);
-		return;
-	}
-
-	skb_put_data(skb, ax->rbuff, count);
-	skb->protocol = ax25_type_trans(skb, ax->dev);
-	netif_rx(skb);
-	ax->dev->stats.rx_packets++;
-	ax->dev->stats.rx_bytes += count;
-	spin_unlock_bh(&ax->buflock);
-}
-
-static void kiss_unesc(struct mkiss *ax, unsigned char s)
-{
-	switch (s) {
-	case END:
-		/* drop keeptest bit = VSV */
-		if (test_bit(AXF_KEEPTEST, &ax->flags))
-			clear_bit(AXF_KEEPTEST, &ax->flags);
-
-		if (!test_and_clear_bit(AXF_ERROR, &ax->flags) && (ax->rcount > 2))
-			ax_bump(ax);
-
-		clear_bit(AXF_ESCAPE, &ax->flags);
-		ax->rcount = 0;
-		return;
-
-	case ESC:
-		set_bit(AXF_ESCAPE, &ax->flags);
-		return;
-	case ESC_ESC:
-		if (test_and_clear_bit(AXF_ESCAPE, &ax->flags))
-			s = ESC;
-		break;
-	case ESC_END:
-		if (test_and_clear_bit(AXF_ESCAPE, &ax->flags))
-			s = END;
-		break;
-	}
-
-	spin_lock_bh(&ax->buflock);
-	if (!test_bit(AXF_ERROR, &ax->flags)) {
-		if (ax->rcount < ax->buffsize) {
-			ax->rbuff[ax->rcount++] = s;
-			spin_unlock_bh(&ax->buflock);
-			return;
-		}
-
-		ax->dev->stats.rx_over_errors++;
-		set_bit(AXF_ERROR, &ax->flags);
-	}
-	spin_unlock_bh(&ax->buflock);
-}
-
-static int ax_set_mac_address(struct net_device *dev, void *addr)
-{
-	struct sockaddr_ax25 *sa = addr;
-
-	netif_tx_lock_bh(dev);
-	netif_addr_lock(dev);
-	__dev_addr_set(dev, &sa->sax25_call, AX25_ADDR_LEN);
-	netif_addr_unlock(dev);
-	netif_tx_unlock_bh(dev);
-
-	return 0;
-}
-
-/*---------------------------------------------------------------------------*/
-
-static void ax_changedmtu(struct mkiss *ax)
-{
-	struct net_device *dev = ax->dev;
-	unsigned char *xbuff, *rbuff, *oxbuff, *orbuff;
-	int len;
-
-	len = dev->mtu * 2;
-
-	/*
-	 * allow for arrival of larger UDP packets, even if we say not to
-	 * also fixes a bug in which SunOS sends 512-byte packets even with
-	 * an MSS of 128
-	 */
-	if (len < 576 * 2)
-		len = 576 * 2;
-
-	xbuff = kmalloc(len + 4, GFP_ATOMIC);
-	rbuff = kmalloc(len + 4, GFP_ATOMIC);
-
-	if (xbuff == NULL || rbuff == NULL)  {
-		printk(KERN_ERR "mkiss: %s: unable to grow ax25 buffers, "
-		       "MTU change cancelled.\n",
-		       ax->dev->name);
-		dev->mtu = ax->mtu;
-		kfree(xbuff);
-		kfree(rbuff);
-		return;
-	}
-
-	spin_lock_bh(&ax->buflock);
-
-	oxbuff    = ax->xbuff;
-	ax->xbuff = xbuff;
-	orbuff    = ax->rbuff;
-	ax->rbuff = rbuff;
-
-	if (ax->xleft) {
-		if (ax->xleft <= len) {
-			memcpy(ax->xbuff, ax->xhead, ax->xleft);
-		} else  {
-			ax->xleft = 0;
-			dev->stats.tx_dropped++;
-		}
-	}
-
-	ax->xhead = ax->xbuff;
-
-	if (ax->rcount) {
-		if (ax->rcount <= len) {
-			memcpy(ax->rbuff, orbuff, ax->rcount);
-		} else  {
-			ax->rcount = 0;
-			dev->stats.rx_over_errors++;
-			set_bit(AXF_ERROR, &ax->flags);
-		}
-	}
-
-	ax->mtu      = dev->mtu + 73;
-	ax->buffsize = len;
-
-	spin_unlock_bh(&ax->buflock);
-
-	kfree(oxbuff);
-	kfree(orbuff);
-}
-
-/* Encapsulate one AX.25 packet and stuff into a TTY queue. */
-static void ax_encaps(struct net_device *dev, unsigned char *icp, int len)
-{
-	struct mkiss *ax = netdev_priv(dev);
-	unsigned char *p;
-	int actual, count;
-
-	if (ax->mtu != ax->dev->mtu + 73)	/* Someone has been ifconfigging */
-		ax_changedmtu(ax);
-
-	if (len > ax->mtu) {		/* Sigh, shouldn't occur BUT ... */
-		printk(KERN_ERR "mkiss: %s: truncating oversized transmit packet!\n", ax->dev->name);
-		dev->stats.tx_dropped++;
-		netif_start_queue(dev);
-		return;
-	}
-
-	p = icp;
-
-	spin_lock_bh(&ax->buflock);
-	if ((*p & 0x0f) != 0) {
-		/* Configuration Command (kissparms(1).
-		 * Protocol spec says: never append CRC.
-		 * This fixes a very old bug in the linux
-		 * kiss driver. -- dl9sau */
-		switch (*p & 0xff) {
-		case 0x85:
-			/* command from userspace especially for us,
-			 * not for delivery to the tnc */
-			if (len > 1) {
-				int cmd = (p[1] & 0xff);
-				switch(cmd) {
-				case 3:
-				  ax->crcmode = CRC_MODE_SMACK;
-				  break;
-				case 2:
-				  ax->crcmode = CRC_MODE_FLEX;
-				  break;
-				case 1:
-				  ax->crcmode = CRC_MODE_NONE;
-				  break;
-				case 0:
-				default:
-				  ax->crcmode = CRC_MODE_SMACK_TEST;
-				  cmd = 0;
-				}
-				ax->crcauto = (cmd ? 0 : 1);
-				printk(KERN_INFO "mkiss: %s: crc mode set to %d\n",
-				       ax->dev->name, cmd);
-			}
-			spin_unlock_bh(&ax->buflock);
-			netif_start_queue(dev);
-
-			return;
-		default:
-			count = kiss_esc(p, ax->xbuff, len);
-		}
-	} else {
-		unsigned short crc;
-		switch (ax->crcmode) {
-		case CRC_MODE_SMACK_TEST:
-			ax->crcmode  = CRC_MODE_FLEX_TEST;
-			printk(KERN_INFO "mkiss: %s: Trying crc-smack\n", ax->dev->name);
-			fallthrough;
-		case CRC_MODE_SMACK:
-			*p |= 0x80;
-			crc = swab16(crc16(0, p, len));
-			count = kiss_esc_crc(p, ax->xbuff, crc, len+2);
-			break;
-		case CRC_MODE_FLEX_TEST:
-			ax->crcmode = CRC_MODE_NONE;
-			printk(KERN_INFO "mkiss: %s: Trying crc-flexnet\n", ax->dev->name);
-			fallthrough;
-		case CRC_MODE_FLEX:
-			*p |= 0x20;
-			crc = calc_crc_flex(p, len);
-			count = kiss_esc_crc(p, ax->xbuff, crc, len+2);
-			break;
-
-		default:
-			count = kiss_esc(p, ax->xbuff, len);
-		}
-	}
-	spin_unlock_bh(&ax->buflock);
-
-	set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags);
-	actual = ax->tty->ops->write(ax->tty, ax->xbuff, count);
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += actual;
-
-	netif_trans_update(ax->dev);
-	ax->xleft = count - actual;
-	ax->xhead = ax->xbuff + actual;
-}
-
-/* Encapsulate an AX.25 packet and kick it into a TTY queue. */
-static netdev_tx_t ax_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct mkiss *ax = netdev_priv(dev);
-
-	if (skb->protocol == htons(ETH_P_IP))
-		return ax25_ip_xmit(skb);
-
-	if (!netif_running(dev))  {
-		printk(KERN_ERR "mkiss: %s: xmit call when iface is down\n", dev->name);
-		return NETDEV_TX_BUSY;
-	}
-
-	if (netif_queue_stopped(dev)) {
-		/*
-		 * May be we must check transmitter timeout here ?
-		 *      14 Oct 1994 Dmitry Gorodchanin.
-		 */
-		if (time_before(jiffies, dev_trans_start(dev) + 20 * HZ)) {
-			/* 20 sec timeout not reached */
-			return NETDEV_TX_BUSY;
-		}
-
-		printk(KERN_ERR "mkiss: %s: transmit timed out, %s?\n", dev->name,
-		       (tty_chars_in_buffer(ax->tty) || ax->xleft) ?
-		       "bad line quality" : "driver error");
-
-		ax->xleft = 0;
-		clear_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags);
-		netif_start_queue(dev);
-	}
-
-	/* We were not busy, so we are now... :-) */
-	netif_stop_queue(dev);
-	ax_encaps(dev, skb->data, skb->len);
-	kfree_skb(skb);
-
-	return NETDEV_TX_OK;
-}
-
-static int ax_open_dev(struct net_device *dev)
-{
-	struct mkiss *ax = netdev_priv(dev);
-
-	if (ax->tty == NULL)
-		return -ENODEV;
-
-	return 0;
-}
-
-/* Open the low-level part of the AX25 channel. Easy! */
-static int ax_open(struct net_device *dev)
-{
-	struct mkiss *ax = netdev_priv(dev);
-	unsigned long len;
-
-	if (ax->tty == NULL)
-		return -ENODEV;
-
-	/*
-	 * Allocate the frame buffers:
-	 *
-	 * rbuff	Receive buffer.
-	 * xbuff	Transmit buffer.
-	 */
-	len = dev->mtu * 2;
-
-	/*
-	 * allow for arrival of larger UDP packets, even if we say not to
-	 * also fixes a bug in which SunOS sends 512-byte packets even with
-	 * an MSS of 128
-	 */
-	if (len < 576 * 2)
-		len = 576 * 2;
-
-	if ((ax->rbuff = kmalloc(len + 4, GFP_KERNEL)) == NULL)
-		goto norbuff;
-
-	if ((ax->xbuff = kmalloc(len + 4, GFP_KERNEL)) == NULL)
-		goto noxbuff;
-
-	ax->mtu	     = dev->mtu + 73;
-	ax->buffsize = len;
-	ax->rcount   = 0;
-	ax->xleft    = 0;
-
-	ax->flags   &= (1 << AXF_INUSE);      /* Clear ESCAPE & ERROR flags */
-
-	spin_lock_init(&ax->buflock);
-
-	return 0;
-
-noxbuff:
-	kfree(ax->rbuff);
-
-norbuff:
-	return -ENOMEM;
-}
-
-
-/* Close the low-level part of the AX25 channel. Easy! */
-static int ax_close(struct net_device *dev)
-{
-	struct mkiss *ax = netdev_priv(dev);
-
-	if (ax->tty)
-		clear_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags);
-
-	netif_stop_queue(dev);
-
-	return 0;
-}
-
-static const struct net_device_ops ax_netdev_ops = {
-	.ndo_open            = ax_open_dev,
-	.ndo_stop            = ax_close,
-	.ndo_start_xmit	     = ax_xmit,
-	.ndo_set_mac_address = ax_set_mac_address,
-};
-
-static void ax_setup(struct net_device *dev)
-{
-	/* Finish setting up the DEVICE info. */
-	dev->mtu             = AX_MTU;
-	dev->hard_header_len = AX25_MAX_HEADER_LEN;
-	dev->addr_len        = AX25_ADDR_LEN;
-	dev->type            = ARPHRD_AX25;
-	dev->tx_queue_len    = 10;
-	dev->header_ops      = &ax25_header_ops;
-	dev->netdev_ops	     = &ax_netdev_ops;
-
-
-	memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN);
-	dev_addr_set(dev, (u8 *)&ax25_defaddr);
-
-	dev->flags      = IFF_BROADCAST | IFF_MULTICAST;
-}
-
-/*
- * We have a potential race on dereferencing tty->disc_data, because the tty
- * layer provides no locking at all - thus one cpu could be running
- * sixpack_receive_buf while another calls sixpack_close, which zeroes
- * tty->disc_data and frees the memory that sixpack_receive_buf is using.  The
- * best way to fix this is to use a rwlock in the tty struct, but for now we
- * use a single global rwlock for all ttys in ppp line discipline.
- */
-static DEFINE_RWLOCK(disc_data_lock);
-
-static struct mkiss *mkiss_get(struct tty_struct *tty)
-{
-	struct mkiss *ax;
-
-	read_lock(&disc_data_lock);
-	ax = tty->disc_data;
-	if (ax)
-		refcount_inc(&ax->refcnt);
-	read_unlock(&disc_data_lock);
-
-	return ax;
-}
-
-static void mkiss_put(struct mkiss *ax)
-{
-	if (refcount_dec_and_test(&ax->refcnt))
-		complete(&ax->dead);
-}
-
-static int crc_force = 0;	/* Can be overridden with insmod */
-
-static int mkiss_open(struct tty_struct *tty)
-{
-	struct net_device *dev;
-	struct mkiss *ax;
-	int err;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-	if (tty->ops->write == NULL)
-		return -EOPNOTSUPP;
-
-	dev = alloc_netdev(sizeof(struct mkiss), "ax%d", NET_NAME_UNKNOWN,
-			   ax_setup);
-	if (!dev) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	ax = netdev_priv(dev);
-	ax->dev = dev;
-
-	spin_lock_init(&ax->buflock);
-	refcount_set(&ax->refcnt, 1);
-	init_completion(&ax->dead);
-
-	ax->tty = tty;
-	tty->disc_data = ax;
-	tty->receive_room = 65535;
-
-	tty_driver_flush_buffer(tty);
-
-	/* Restore default settings */
-	dev->type = ARPHRD_AX25;
-
-	/* Perform the low-level AX25 initialization. */
-	err = ax_open(ax->dev);
-	if (err)
-		goto out_free_netdev;
-
-	err = register_netdev(dev);
-	if (err)
-		goto out_free_buffers;
-
-	/* after register_netdev() - because else printk smashes the kernel */
-	switch (crc_force) {
-	case 3:
-		ax->crcmode  = CRC_MODE_SMACK;
-		printk(KERN_INFO "mkiss: %s: crc mode smack forced.\n",
-		       ax->dev->name);
-		break;
-	case 2:
-		ax->crcmode  = CRC_MODE_FLEX;
-		printk(KERN_INFO "mkiss: %s: crc mode flexnet forced.\n",
-		       ax->dev->name);
-		break;
-	case 1:
-		ax->crcmode  = CRC_MODE_NONE;
-		printk(KERN_INFO "mkiss: %s: crc mode disabled.\n",
-		       ax->dev->name);
-		break;
-	case 0:
-	default:
-		crc_force = 0;
-		printk(KERN_INFO "mkiss: %s: crc mode is auto.\n",
-		       ax->dev->name);
-		ax->crcmode  = CRC_MODE_SMACK_TEST;
-	}
-	ax->crcauto = (crc_force ? 0 : 1);
-
-	netif_start_queue(dev);
-
-	/* Done.  We have linked the TTY line to a channel. */
-	return 0;
-
-out_free_buffers:
-	kfree(ax->rbuff);
-	kfree(ax->xbuff);
-
-out_free_netdev:
-	free_netdev(dev);
-
-out:
-	return err;
-}
-
-static void mkiss_close(struct tty_struct *tty)
-{
-	struct mkiss *ax;
-
-	write_lock_irq(&disc_data_lock);
-	ax = tty->disc_data;
-	tty->disc_data = NULL;
-	write_unlock_irq(&disc_data_lock);
-
-	if (!ax)
-		return;
-
-	/*
-	 * We have now ensured that nobody can start using ap from now on, but
-	 * we have to wait for all existing users to finish.
-	 */
-	if (!refcount_dec_and_test(&ax->refcnt))
-		wait_for_completion(&ax->dead);
-	/*
-	 * Halt the transmit queue so that a new transmit cannot scribble
-	 * on our buffers
-	 */
-	netif_stop_queue(ax->dev);
-
-	unregister_netdev(ax->dev);
-
-	/* Free all AX25 frame buffers after unreg. */
-	kfree(ax->rbuff);
-	kfree(ax->xbuff);
-
-	ax->tty = NULL;
-
-	free_netdev(ax->dev);
-}
-
-/* Perform I/O control on an active ax25 channel. */
-static int mkiss_ioctl(struct tty_struct *tty, unsigned int cmd,
-		unsigned long arg)
-{
-	struct mkiss *ax = mkiss_get(tty);
-	struct net_device *dev;
-	unsigned int tmp, err;
-
-	/* First make sure we're connected. */
-	if (ax == NULL)
-		return -ENXIO;
-	dev = ax->dev;
-
-	switch (cmd) {
-	case SIOCGIFNAME:
-		err = copy_to_user((void __user *) arg, ax->dev->name,
-		                   strlen(ax->dev->name) + 1) ? -EFAULT : 0;
-		break;
-
-	case SIOCGIFENCAP:
-		err = put_user(4, (int __user *) arg);
-		break;
-
-	case SIOCSIFENCAP:
-		if (get_user(tmp, (int __user *) arg)) {
-			err = -EFAULT;
-			break;
-		}
-
-		ax->mode = tmp;
-		dev->addr_len        = AX25_ADDR_LEN;
-		dev->hard_header_len = AX25_KISS_HEADER_LEN +
-		                       AX25_MAX_HEADER_LEN + 3;
-		dev->type            = ARPHRD_AX25;
-
-		err = 0;
-		break;
-
-	case SIOCSIFHWADDR: {
-		char addr[AX25_ADDR_LEN];
-
-		if (copy_from_user(&addr,
-		                   (void __user *) arg, AX25_ADDR_LEN)) {
-			err = -EFAULT;
-			break;
-		}
-
-		netif_tx_lock_bh(dev);
-		__dev_addr_set(dev, addr, AX25_ADDR_LEN);
-		netif_tx_unlock_bh(dev);
-
-		err = 0;
-		break;
-	}
-	default:
-		err = -ENOIOCTLCMD;
-	}
-
-	mkiss_put(ax);
-
-	return err;
-}
-
-/*
- * Handle the 'receiver data ready' interrupt.
- * This function is called by the 'tty_io' module in the kernel when
- * a block of data has been received, which can now be decapsulated
- * and sent on to the AX.25 layer for further processing.
- */
-static void mkiss_receive_buf(struct tty_struct *tty, const u8 *cp,
-			      const u8 *fp, size_t count)
-{
-	struct mkiss *ax = mkiss_get(tty);
-
-	if (!ax)
-		return;
-
-	/*
-	 * Argh! mtu change time! - costs us the packet part received
-	 * at the change
-	 */
-	if (ax->mtu != ax->dev->mtu + 73)
-		ax_changedmtu(ax);
-
-	/* Read the characters out of the buffer */
-	while (count--) {
-		if (fp != NULL && *fp++) {
-			if (!test_and_set_bit(AXF_ERROR, &ax->flags))
-				ax->dev->stats.rx_errors++;
-			cp++;
-			continue;
-		}
-
-		kiss_unesc(ax, *cp++);
-	}
-
-	mkiss_put(ax);
-	tty_unthrottle(tty);
-}
-
-/*
- * Called by the driver when there's room for more data.  If we have
- * more packets to send, we send them here.
- */
-static void mkiss_write_wakeup(struct tty_struct *tty)
-{
-	struct mkiss *ax = mkiss_get(tty);
-	int actual;
-
-	if (!ax)
-		return;
-
-	if (ax->xleft <= 0)  {
-		/* Now serial buffer is almost free & we can start
-		 * transmission of another packet
-		 */
-		clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
-
-		netif_wake_queue(ax->dev);
-		goto out;
-	}
-
-	actual = tty->ops->write(tty, ax->xhead, ax->xleft);
-	ax->xleft -= actual;
-	ax->xhead += actual;
-
-out:
-	mkiss_put(ax);
-}
-
-static struct tty_ldisc_ops ax_ldisc = {
-	.owner		= THIS_MODULE,
-	.num		= N_AX25,
-	.name		= "mkiss",
-	.open		= mkiss_open,
-	.close		= mkiss_close,
-	.ioctl		= mkiss_ioctl,
-	.receive_buf	= mkiss_receive_buf,
-	.write_wakeup	= mkiss_write_wakeup
-};
-
-static const char banner[] __initconst = KERN_INFO \
-	"mkiss: AX.25 Multikiss, Hans Albas PE1AYX\n";
-static const char msg_regfail[] __initconst = KERN_ERR \
-	"mkiss: can't register line discipline (err = %d)\n";
-
-static int __init mkiss_init_driver(void)
-{
-	int status;
-
-	printk(banner);
-
-	status = tty_register_ldisc(&ax_ldisc);
-	if (status != 0)
-		printk(msg_regfail, status);
-
-	return status;
-}
-
-static void __exit mkiss_exit_driver(void)
-{
-	tty_unregister_ldisc(&ax_ldisc);
-}
-
-MODULE_AUTHOR("Ralf Baechle DL5RB <ralf@linux-mips.org>");
-MODULE_DESCRIPTION("KISS driver for AX.25 over TTYs");
-module_param(crc_force, int, 0);
-MODULE_PARM_DESC(crc_force, "crc [0 = auto | 1 = none | 2 = flexnet | 3 = smack]");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_LDISC(N_AX25);
-
-module_init(mkiss_init_driver);
-module_exit(mkiss_exit_driver);
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
deleted file mode 100644
index 8569db4a7140..000000000000
--- a/drivers/net/hamradio/scc.c
+++ /dev/null
@@ -1,2179 +0,0 @@
-#define RCS_ID "$Id: scc.c,v 1.75 1998/11/04 15:15:01 jreuter Exp jreuter $"
-
-#define VERSION "3.0"
-
-/*
- * Please use z8530drv-utils-3.0 with this version.
- *            ------------------
- *
- * You can find a subset of the documentation in 
- * Documentation/networking/device_drivers/hamradio/z8530drv.rst.
- */
-
-/*
-   ********************************************************************
-   *   SCC.C - Linux driver for Z8530 based HDLC cards for AX.25      *
-   ********************************************************************
-
-
-   ********************************************************************
-
-	Copyright (c) 1993, 2000 Joerg Reuter DL1BKE
-
-	portions (c) 1993 Guido ten Dolle PE1NNZ
-
-   ********************************************************************
-   
-   The driver and the programs in the archive are UNDER CONSTRUCTION.
-   The code is likely to fail, and so your kernel could --- even 
-   a whole network. 
-
-   This driver is intended for Amateur Radio use. If you are running it
-   for commercial purposes, please drop me a note. I am nosy...
-
-   ...BUT:
- 
-   ! You  m u s t  recognize the appropriate legislations of your country !
-   ! before you connect a radio to the SCC board and start to transmit or !
-   ! receive. The GPL allows you to use the  d r i v e r,  NOT the RADIO! !
-
-   For non-Amateur-Radio use please note that you might need a special
-   allowance/licence from the designer of the SCC Board and/or the
-   MODEM. 
-
-   This program is free software; you can redistribute it and/or modify 
-   it under the terms of the (modified) GNU General Public License 
-   delivered with the Linux kernel source.
-   
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should find a copy of the GNU General Public License in 
-   /usr/src/linux/COPYING; 
-   
-   ******************************************************************** 
-
-		
-   Incomplete history of z8530drv:
-   -------------------------------
-
-   1994-09-13	started to write the driver, rescued most of my own
-		code (and Hans Alblas' memory buffer pool concept) from 
-		an earlier project "sccdrv" which was initiated by 
-		Guido ten Dolle. Not much of the old driver survived, 
-		though. The first version I put my hands on was sccdrv1.3
-		from August 1993. The memory buffer pool concept
-		appeared in an unauthorized sccdrv version (1.5) from
-		August 1994.
-
-   1995-01-31	changed copyright notice to GPL without limitations.
-   
-     .
-     .	<SNIP>
-     .
-   		  
-   1996-10-05	New semester, new driver... 
-
-   		  * KISS TNC emulator removed (TTY driver)
-   		  * Source moved to drivers/net/
-   		  * Includes Z8530 defines from drivers/net/z8530.h
-   		  * Uses sk_buffer memory management
-   		  * Reduced overhead of /proc/net/z8530drv output
-   		  * Streamlined quite a lot things
-   		  * Invents brand new bugs... ;-)
-
-   		  The move to version number 3.0 reflects theses changes.
-   		  You can use 'kissbridge' if you need a KISS TNC emulator.
-
-   1996-12-13	Fixed for Linux networking changes. (G4KLX)
-   1997-01-08	Fixed the remaining problems.
-   1997-04-02	Hopefully fixed the problems with the new *_timer()
-   		routines, added calibration code.
-   1997-10-12	Made SCC_DELAY a CONFIG option, added CONFIG_SCC_TRXECHO
-   1998-01-29	Small fix to avoid lock-up on initialization
-   1998-09-29	Fixed the "grouping" bugs, tx_inhibit works again,
-   		using dev->tx_queue_len now instead of MAXQUEUE now.
-   1998-10-21	Postponed the spinlock changes, would need a lot of
-   		testing I currently don't have the time to. Softdcd doesn't
-   		work.
-   1998-11-04	Softdcd does not work correctly in DPLL mode, in fact it 
-   		never did. The DPLL locks on noise, the SYNC unit sees
-   		flags that aren't... Restarting the DPLL does not help
-   		either, it resynchronizes too slow and the first received
-   		frame gets lost.
-   2000-02-13	Fixed for new network driver interface changes, still
-   		does TX timeouts itself since it uses its own queue
-   		scheme.
-
-   Thanks to all who contributed to this driver with ideas and bug
-   reports!
-   
-   NB -- if you find errors, change something, please let me know
-      	 first before you distribute it... And please don't touch
-   	 the version number. Just replace my callsign in
-   	 "v3.0.dl1bke" with your own. Just to avoid confusion...
-
-   If you want to add your modification to the linux distribution
-   please (!) contact me first.
-   
-   New versions of the driver will be announced on the linux-hams
-   mailing list on vger.kernel.org. To subscribe send an e-mail
-   to majordomo@vger.kernel.org with the following line in
-   the body of the mail:
-   
-	   subscribe linux-hams
-	   
-   The content of the "Subject" field will be ignored.
-
-   vy 73,
-   Joerg Reuter	ampr-net: dl1bke@db0pra.ampr.org
-		AX-25   : DL1BKE @ DB0ABH.#BAY.DEU.EU
-		Internet: jreuter@yaina.de
-		www     : http://yaina.de/jreuter
-*/
-
-/* ----------------------------------------------------------------------- */
-
-#undef  SCC_LDELAY		/* slow it even a bit more down */
-#undef  SCC_DONT_CHECK		/* don't look if the SCCs you specified are available */
-
-#define SCC_MAXCHIPS	4       /* number of max. supported chips */
-#define SCC_BUFSIZE	384     /* must not exceed 4096 */
-#undef	SCC_DEBUG
-
-#define SCC_DEFAULT_CLOCK	4915200 
-				/* default pclock if nothing is specified */
-
-/* ----------------------------------------------------------------------- */
-
-#include <linux/compat.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/timer.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/string.h>
-#include <linux/in.h>
-#include <linux/fcntl.h>
-#include <linux/ptrace.h>
-#include <linux/delay.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/rtnetlink.h>
-#include <linux/if_ether.h>
-#include <linux/if_arp.h>
-#include <linux/socket.h>
-#include <linux/init.h>
-#include <linux/scc.h>
-#include <linux/ctype.h>
-#include <linux/kernel.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/bitops.h>
-
-#include <net/net_namespace.h>
-#include <net/ax25.h>
-
-#include <asm/irq.h>
-#include <asm/io.h>
-#include <linux/uaccess.h>
-
-#include "z8530.h"
-
-static const char banner[] __initconst = KERN_INFO \
-	"AX.25: Z8530 SCC driver version "VERSION".dl1bke\n";
-
-static void t_dwait(struct timer_list *t);
-static void t_txdelay(struct timer_list *t);
-static void t_tail(struct timer_list *t);
-static void t_busy(struct timer_list *);
-static void t_maxkeyup(struct timer_list *);
-static void t_idle(struct timer_list *t);
-static void scc_tx_done(struct scc_channel *);
-static void scc_start_tx_timer(struct scc_channel *,
-			       void (*)(struct timer_list *), unsigned long);
-static void scc_start_maxkeyup(struct scc_channel *);
-static void scc_start_defer(struct scc_channel *);
-
-static void z8530_init(void);
-
-static void init_channel(struct scc_channel *scc);
-static void scc_key_trx (struct scc_channel *scc, char tx);
-static void scc_init_timer(struct scc_channel *scc);
-
-static int scc_net_alloc(const char *name, struct scc_channel *scc);
-static void scc_net_setup(struct net_device *dev);
-static int scc_net_open(struct net_device *dev);
-static int scc_net_close(struct net_device *dev);
-static void scc_net_rx(struct scc_channel *scc, struct sk_buff *skb);
-static netdev_tx_t scc_net_tx(struct sk_buff *skb,
-			      struct net_device *dev);
-static int scc_net_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
-				  void __user *data, int cmd);
-static int scc_net_set_mac_address(struct net_device *dev, void *addr);
-static struct net_device_stats * scc_net_get_stats(struct net_device *dev);
-
-static unsigned char SCC_DriverName[] = "scc";
-
-static struct irqflags { unsigned char used : 1; } Ivec[NR_IRQS];
-	
-static struct scc_channel SCC_Info[2 * SCC_MAXCHIPS];	/* information per channel */
-
-static struct scc_ctrl {
-	io_port chan_A;
-	io_port chan_B;
-	int irq;
-} SCC_ctrl[SCC_MAXCHIPS+1];
-
-static unsigned char Driver_Initialized;
-static int Nchips;
-static io_port Vector_Latch;
-
-
-/* ******************************************************************** */
-/* *			Port Access Functions			      * */
-/* ******************************************************************** */
-
-/* These provide interrupt save 2-step access to the Z8530 registers */
-
-static DEFINE_SPINLOCK(iolock);	/* Guards paired accesses */
-
-static inline unsigned char InReg(io_port port, unsigned char reg)
-{
-	unsigned long flags;
-	unsigned char r;
-
-	spin_lock_irqsave(&iolock, flags);	
-#ifdef SCC_LDELAY
-	Outb(port, reg);
-	udelay(SCC_LDELAY);
-	r=Inb(port);
-	udelay(SCC_LDELAY);
-#else
-	Outb(port, reg);
-	r=Inb(port);
-#endif
-	spin_unlock_irqrestore(&iolock, flags);
-	return r;
-}
-
-static inline void OutReg(io_port port, unsigned char reg, unsigned char val)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&iolock, flags);
-#ifdef SCC_LDELAY
-	Outb(port, reg); udelay(SCC_LDELAY);
-	Outb(port, val); udelay(SCC_LDELAY);
-#else
-	Outb(port, reg);
-	Outb(port, val);
-#endif
-	spin_unlock_irqrestore(&iolock, flags);
-}
-
-static inline void wr(struct scc_channel *scc, unsigned char reg,
-	unsigned char val)
-{
-	OutReg(scc->ctrl, reg, (scc->wreg[reg] = val));
-}
-
-static inline void or(struct scc_channel *scc, unsigned char reg, unsigned char val)
-{
-	OutReg(scc->ctrl, reg, (scc->wreg[reg] |= val));
-}
-
-static inline void cl(struct scc_channel *scc, unsigned char reg, unsigned char val)
-{
-	OutReg(scc->ctrl, reg, (scc->wreg[reg] &= ~val));
-}
-
-/* ******************************************************************** */
-/* *			Some useful macros			      * */
-/* ******************************************************************** */
-
-static inline void scc_discard_buffers(struct scc_channel *scc)
-{
-	unsigned long flags;
-	
-	spin_lock_irqsave(&scc->lock, flags);	
-	if (scc->tx_buff != NULL)
-	{
-		dev_kfree_skb_irq(scc->tx_buff);
-		scc->tx_buff = NULL;
-	}
-	
-	while (!skb_queue_empty(&scc->tx_queue))
-		dev_kfree_skb_irq(skb_dequeue(&scc->tx_queue));
-
-	spin_unlock_irqrestore(&scc->lock, flags);
-}
-
-
-
-/* ******************************************************************** */
-/* *			Interrupt Service Routines		      * */
-/* ******************************************************************** */
-
-
-/* ----> subroutines for the interrupt handlers <---- */
-
-static inline void scc_notify(struct scc_channel *scc, int event)
-{
-	struct sk_buff *skb;
-	char *bp;
-	
-        if (scc->kiss.fulldup != KISS_DUPLEX_OPTIMA)
-		return;
-
-	skb = dev_alloc_skb(2);
-	if (skb != NULL)
-	{
-		bp = skb_put(skb, 2);
-		*bp++ = PARAM_HWEVENT;
-		*bp++ = event;
-		scc_net_rx(scc, skb);
-	} else
-		scc->stat.nospace++;
-}
-
-static inline void flush_rx_FIFO(struct scc_channel *scc)
-{
-	int k;
-	
-	for (k=0; k<3; k++)
-		Inb(scc->data);
-		
-	if(scc->rx_buff != NULL)		/* did we receive something? */
-	{
-		scc->stat.rxerrs++;  /* then count it as an error */
-		dev_kfree_skb_irq(scc->rx_buff);
-		scc->rx_buff = NULL;
-	}
-}
-
-static void start_hunt(struct scc_channel *scc)
-{
-	if ((scc->modem.clocksrc != CLK_EXTERNAL))
-		OutReg(scc->ctrl,R14,SEARCH|scc->wreg[R14]); /* DPLL: enter search mode */
-	or(scc,R3,ENT_HM|RxENABLE);  /* enable the receiver, hunt mode */
-}
-
-/* ----> four different interrupt handlers for Tx, Rx, changing of	*/
-/*       DCD/CTS and Rx/Tx errors					*/
-
-/* Transmitter interrupt handler */
-static inline void scc_txint(struct scc_channel *scc)
-{
-	struct sk_buff *skb;
-
-	scc->stat.txints++;
-	skb = scc->tx_buff;
-	
-	/* send first octet */
-	
-	if (skb == NULL)
-	{
-		skb = skb_dequeue(&scc->tx_queue);
-		scc->tx_buff = skb;
-		netif_wake_queue(scc->dev);
-
-		if (skb == NULL)
-		{
-			scc_tx_done(scc);
-			Outb(scc->ctrl, RES_Tx_P);
-			return;
-		}
-		
-		if (skb->len == 0)		/* Paranoia... */
-		{
-			dev_kfree_skb_irq(skb);
-			scc->tx_buff = NULL;
-			scc_tx_done(scc);
-			Outb(scc->ctrl, RES_Tx_P);
-			return;
-		}
-
-		scc->stat.tx_state = TXS_ACTIVE;
-
-		OutReg(scc->ctrl, R0, RES_Tx_CRC);
-						/* reset CRC generator */
-		or(scc,R10,ABUNDER);		/* re-install underrun protection */
-		Outb(scc->data,*skb->data);	/* send byte */
-		skb_pull(skb, 1);
-
-		if (!scc->enhanced)		/* reset EOM latch */
-			Outb(scc->ctrl,RES_EOM_L);
-		return;
-	}
-	
-	/* End Of Frame... */
-	
-	if (skb->len == 0)
-	{
-		Outb(scc->ctrl, RES_Tx_P);	/* reset pending int */
-		cl(scc, R10, ABUNDER);		/* send CRC */
-		dev_kfree_skb_irq(skb);
-		scc->tx_buff = NULL;
-		scc->stat.tx_state = TXS_NEWFRAME; /* next frame... */
-		return;
-	} 
-	
-	/* send octet */
-	
-	Outb(scc->data,*skb->data);		
-	skb_pull(skb, 1);
-}
-
-
-/* External/Status interrupt handler */
-static inline void scc_exint(struct scc_channel *scc)
-{
-	unsigned char status,changes,chg_and_stat;
-
-	scc->stat.exints++;
-
-	status = InReg(scc->ctrl,R0);
-	changes = status ^ scc->status;
-	chg_and_stat = changes & status;
-	
-	/* ABORT: generated whenever DCD drops while receiving */
-
-	if (chg_and_stat & BRK_ABRT)		/* Received an ABORT */
-		flush_rx_FIFO(scc);
-
-	/* HUNT: software DCD; on = waiting for SYNC, off = receiving frame */
-
-	if ((changes & SYNC_HUNT) && scc->kiss.softdcd)
-	{
-		if (status & SYNC_HUNT)
-		{
-			scc->dcd = 0;
-			flush_rx_FIFO(scc);
-			if ((scc->modem.clocksrc != CLK_EXTERNAL))
-				OutReg(scc->ctrl,R14,SEARCH|scc->wreg[R14]); /* DPLL: enter search mode */
-		} else {
-			scc->dcd = 1;
-		}
-
-		scc_notify(scc, scc->dcd? HWEV_DCD_OFF:HWEV_DCD_ON);
-	}
-
-	/* DCD: on = start to receive packet, off = ABORT condition */
-	/* (a successfully received packet generates a special condition int) */
-	
-	if((changes & DCD) && !scc->kiss.softdcd) /* DCD input changed state */
-	{
-		if(status & DCD)                /* DCD is now ON */
-		{
-			start_hunt(scc);
-			scc->dcd = 1;
-		} else {                        /* DCD is now OFF */
-			cl(scc,R3,ENT_HM|RxENABLE); /* disable the receiver */
-			flush_rx_FIFO(scc);
-			scc->dcd = 0;
-		}
-		
-		scc_notify(scc, scc->dcd? HWEV_DCD_ON:HWEV_DCD_OFF);
-	}
-
-#ifdef notdef
-	/* CTS: use external TxDelay (what's that good for?!)
-	 * Anyway: If we _could_ use it (BayCom USCC uses CTS for
-	 * own purposes) we _should_ use the "autoenable" feature
-	 * of the Z8530 and not this interrupt...
-	 */
-	 
-	if (chg_and_stat & CTS)			/* CTS is now ON */
-	{
-		if (scc->kiss.txdelay == 0)	/* zero TXDELAY = wait for CTS */
-			scc_start_tx_timer(scc, t_txdelay, 0);
-	}
-#endif
-	
-	if (scc->stat.tx_state == TXS_ACTIVE && (status & TxEOM))
-	{
-		scc->stat.tx_under++;	  /* oops, an underrun! count 'em */
-		Outb(scc->ctrl, RES_EXT_INT);	/* reset ext/status interrupts */
-
-		if (scc->tx_buff != NULL)
-		{
-			dev_kfree_skb_irq(scc->tx_buff);
-			scc->tx_buff = NULL;
-		}
-		
-		or(scc,R10,ABUNDER);
-		scc_start_tx_timer(scc, t_txdelay, 0);	/* restart transmission */
-	}
-		
-	scc->status = status;
-	Outb(scc->ctrl,RES_EXT_INT);
-}
-
-
-/* Receiver interrupt handler */
-static inline void scc_rxint(struct scc_channel *scc)
-{
-	struct sk_buff *skb;
-
-	scc->stat.rxints++;
-
-	if((scc->wreg[5] & RTS) && scc->kiss.fulldup == KISS_DUPLEX_HALF)
-	{
-		Inb(scc->data);		/* discard char */
-		or(scc,R3,ENT_HM);	/* enter hunt mode for next flag */
-		return;
-	}
-
-	skb = scc->rx_buff;
-	
-	if (skb == NULL)
-	{
-		skb = dev_alloc_skb(scc->stat.bufsize);
-		if (skb == NULL)
-		{
-			scc->dev_stat.rx_dropped++;
-			scc->stat.nospace++;
-			Inb(scc->data);
-			or(scc, R3, ENT_HM);
-			return;
-		}
-		
-		scc->rx_buff = skb;
-		skb_put_u8(skb, 0);	/* KISS data */
-	}
-	
-	if (skb->len >= scc->stat.bufsize)
-	{
-#ifdef notdef
-		printk(KERN_DEBUG "z8530drv: oops, scc_rxint() received huge frame...\n");
-#endif
-		dev_kfree_skb_irq(skb);
-		scc->rx_buff = NULL;
-		Inb(scc->data);
-		or(scc, R3, ENT_HM);
-		return;
-	}
-
-	skb_put_u8(skb, Inb(scc->data));
-}
-
-
-/* Receive Special Condition interrupt handler */
-static inline void scc_spint(struct scc_channel *scc)
-{
-	unsigned char status;
-	struct sk_buff *skb;
-
-	scc->stat.spints++;
-
-	status = InReg(scc->ctrl,R1);		/* read receiver status */
-	
-	Inb(scc->data);				/* throw away Rx byte */
-	skb = scc->rx_buff;
-
-	if(status & Rx_OVR)			/* receiver overrun */
-	{
-		scc->stat.rx_over++;             /* count them */
-		or(scc,R3,ENT_HM);               /* enter hunt mode for next flag */
-		
-		if (skb != NULL) 
-			dev_kfree_skb_irq(skb);
-		scc->rx_buff = skb = NULL;
-	}
-
-	if(status & END_FR && skb != NULL)	/* end of frame */
-	{
-		/* CRC okay, frame ends on 8 bit boundary and received something ? */
-		
-		if (!(status & CRC_ERR) && (status & 0xe) == RES8 && skb->len > 0)
-		{
-			/* ignore last received byte (first of the CRC bytes) */
-			skb_trim(skb, skb->len-1);
-			scc_net_rx(scc, skb);
-			scc->rx_buff = NULL;
-			scc->stat.rxframes++;
-		} else {				/* a bad frame */
-			dev_kfree_skb_irq(skb);
-			scc->rx_buff = NULL;
-			scc->stat.rxerrs++;
-		}
-	} 
-
-	Outb(scc->ctrl,ERR_RES);
-}
-
-
-/* ----> interrupt service routine for the Z8530 <---- */
-
-static void scc_isr_dispatch(struct scc_channel *scc, int vector)
-{
-	spin_lock(&scc->lock);
-	switch (vector & VECTOR_MASK)
-	{
-		case TXINT: scc_txint(scc); break;
-		case EXINT: scc_exint(scc); break;
-		case RXINT: scc_rxint(scc); break;
-		case SPINT: scc_spint(scc); break;
-	}
-	spin_unlock(&scc->lock);
-}
-
-/* If the card has a latch for the interrupt vector (like the PA0HZP card)
-   use it to get the number of the chip that generated the int.
-   If not: poll all defined chips.
- */
-
-#define SCC_IRQTIMEOUT 30000
-
-static irqreturn_t scc_isr(int irq, void *dev_id)
-{
-	int chip_irq = (long) dev_id;
-	unsigned char vector;	
-	struct scc_channel *scc;
-	struct scc_ctrl *ctrl;
-	int k;
-	
-	if (Vector_Latch)
-	{
-	    	for(k=0; k < SCC_IRQTIMEOUT; k++)
-    		{
-			Outb(Vector_Latch, 0);      /* Generate INTACK */
-        
-			/* Read the vector */
-			if((vector=Inb(Vector_Latch)) >= 16 * Nchips) break; 
-			if (vector & 0x01) break;
-        	 
-		        scc=&SCC_Info[vector >> 3 ^ 0x01];
-			if (!scc->dev) break;
-
-			scc_isr_dispatch(scc, vector);
-
-			OutReg(scc->ctrl,R0,RES_H_IUS);              /* Reset Highest IUS */
-		}  
-
-		if (k == SCC_IRQTIMEOUT)
-			printk(KERN_WARNING "z8530drv: endless loop in scc_isr()?\n");
-
-		return IRQ_HANDLED;
-	}
-
-	/* Find the SCC generating the interrupt by polling all attached SCCs
-	 * reading RR3A (the interrupt pending register)
-	 */
-
-	ctrl = SCC_ctrl;
-	while (ctrl->chan_A)
-	{
-		if (ctrl->irq != chip_irq)
-		{
-			ctrl++;
-			continue;
-		}
-
-		scc = NULL;
-		for (k = 0; InReg(ctrl->chan_A,R3) && k < SCC_IRQTIMEOUT; k++)
-		{
-			vector=InReg(ctrl->chan_B,R2);	/* Read the vector */
-			if (vector & 0x01) break; 
-
-			scc = &SCC_Info[vector >> 3 ^ 0x01];
-		        if (!scc->dev) break;
-
-			scc_isr_dispatch(scc, vector);
-		}
-
-		if (k == SCC_IRQTIMEOUT)
-		{
-			printk(KERN_WARNING "z8530drv: endless loop in scc_isr()?!\n");
-			break;
-		}
-
-		/* This looks weird and it is. At least the BayCom USCC doesn't
-		 * use the Interrupt Daisy Chain, thus we'll have to start
-		 * all over again to be sure not to miss an interrupt from 
-		 * (any of) the other chip(s)...
-		 * Honestly, the situation *is* braindamaged...
-		 */
-
-		if (scc != NULL)
-		{
-			OutReg(scc->ctrl,R0,RES_H_IUS);
-			ctrl = SCC_ctrl; 
-		} else
-			ctrl++;
-	}
-	return IRQ_HANDLED;
-}
-
-
-
-/* ******************************************************************** */
-/* *			Init Channel					*/
-/* ******************************************************************** */
-
-
-/* ----> set SCC channel speed <---- */
-
-static inline void set_brg(struct scc_channel *scc, unsigned int tc)
-{
-	cl(scc,R14,BRENABL);		/* disable baudrate generator */
-	wr(scc,R12,tc & 255);		/* brg rate LOW */
-	wr(scc,R13,tc >> 8);   		/* brg rate HIGH */
-	or(scc,R14,BRENABL);		/* enable baudrate generator */
-}
-
-static inline void set_speed(struct scc_channel *scc)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&scc->lock, flags);
-
-	if (scc->modem.speed > 0)	/* paranoia... */
-		set_brg(scc, (unsigned) (scc->clock / (scc->modem.speed * 64)) - 2);
-		
-	spin_unlock_irqrestore(&scc->lock, flags);
-}
-
-
-/* ----> initialize a SCC channel <---- */
-
-static inline void init_brg(struct scc_channel *scc)
-{
-	wr(scc, R14, BRSRC);				/* BRG source = PCLK */
-	OutReg(scc->ctrl, R14, SSBR|scc->wreg[R14]);	/* DPLL source = BRG */
-	OutReg(scc->ctrl, R14, SNRZI|scc->wreg[R14]);	/* DPLL NRZI mode */
-}
-
-/*
- * Initialization according to the Z8530 manual (SGS-Thomson's version):
- *
- * 1. Modes and constants
- *
- * WR9	11000000	chip reset
- * WR4	XXXXXXXX	Tx/Rx control, async or sync mode
- * WR1	0XX00X00	select W/REQ (optional)
- * WR2	XXXXXXXX	program interrupt vector
- * WR3	XXXXXXX0	select Rx control
- * WR5	XXXX0XXX	select Tx control
- * WR6	XXXXXXXX	sync character
- * WR7	XXXXXXXX	sync character
- * WR9	000X0XXX	select interrupt control
- * WR10	XXXXXXXX	miscellaneous control (optional)
- * WR11	XXXXXXXX	clock control
- * WR12	XXXXXXXX	time constant lower byte (optional)
- * WR13	XXXXXXXX	time constant upper byte (optional)
- * WR14	XXXXXXX0	miscellaneous control
- * WR14	XXXSSSSS	commands (optional)
- *
- * 2. Enables
- *
- * WR14	000SSSS1	baud rate enable
- * WR3	SSSSSSS1	Rx enable
- * WR5	SSSS1SSS	Tx enable
- * WR0	10000000	reset Tx CRG (optional)
- * WR1	XSS00S00	DMA enable (optional)
- *
- * 3. Interrupt status
- *
- * WR15	XXXXXXXX	enable external/status
- * WR0	00010000	reset external status
- * WR0	00010000	reset external status twice
- * WR1	SSSXXSXX	enable Rx, Tx and Ext/status
- * WR9	000SXSSS	enable master interrupt enable
- *
- * 1 = set to one, 0 = reset to zero
- * X = user defined, S = same as previous init
- *
- *
- * Note that the implementation differs in some points from above scheme.
- *
- */
- 
-static void init_channel(struct scc_channel *scc)
-{
-	timer_delete(&scc->tx_t);
-	timer_delete(&scc->tx_wdog);
-
-	disable_irq(scc->irq);
-
-	wr(scc,R4,X1CLK|SDLC);		/* *1 clock, SDLC mode */
-	wr(scc,R1,0);			/* no W/REQ operation */
-	wr(scc,R3,Rx8|RxCRC_ENAB);	/* RX 8 bits/char, CRC, disabled */	
-	wr(scc,R5,Tx8|DTR|TxCRC_ENAB);	/* TX 8 bits/char, disabled, DTR */
-	wr(scc,R6,0);			/* SDLC address zero (not used) */
-	wr(scc,R7,FLAG);		/* SDLC flag value */
-	wr(scc,R9,VIS);			/* vector includes status */
-	wr(scc,R10,(scc->modem.nrz? NRZ : NRZI)|CRCPS|ABUNDER); /* abort on underrun, preset CRC generator, NRZ(I) */
-	wr(scc,R14, 0);
-
-
-/* set clock sources:
-
-   CLK_DPLL: normal halfduplex operation
-   
-		RxClk: use DPLL
-		TxClk: use DPLL
-		TRxC mode DPLL output
-		
-   CLK_EXTERNAL: external clocking (G3RUH or DF9IC modem)
-   
-  	        BayCom: 		others:
-  	        
-  	        TxClk = pin RTxC	TxClk = pin TRxC
-  	        RxClk = pin TRxC 	RxClk = pin RTxC
-  	     
-
-   CLK_DIVIDER:
-   		RxClk = use DPLL
-   		TxClk = pin RTxC
-   		
-   		BayCom:			others:
-   		pin TRxC = DPLL		pin TRxC = BRG
-   		(RxClk * 1)		(RxClk * 32)
-*/  
-
-   		
-	switch(scc->modem.clocksrc)
-	{
-		case CLK_DPLL:
-			wr(scc, R11, RCDPLL|TCDPLL|TRxCOI|TRxCDP);
-			init_brg(scc);
-			break;
-
-		case CLK_DIVIDER:
-			wr(scc, R11, ((scc->brand & BAYCOM)? TRxCDP : TRxCBR) | RCDPLL|TCRTxCP|TRxCOI);
-			init_brg(scc);
-			break;
-
-		case CLK_EXTERNAL:
-			wr(scc, R11, (scc->brand & BAYCOM)? RCTRxCP|TCRTxCP : RCRTxCP|TCTRxCP);
-			OutReg(scc->ctrl, R14, DISDPLL);
-			break;
-
-	}
-	
-	set_speed(scc);			/* set baudrate */
-	
-	if(scc->enhanced)
-	{
-		or(scc,R15,SHDLCE|FIFOE);	/* enable FIFO, SDLC/HDLC Enhancements (From now R7 is R7') */
-		wr(scc,R7,AUTOEOM);
-	}
-
-	if(scc->kiss.softdcd || (InReg(scc->ctrl,R0) & DCD))
-						/* DCD is now ON */
-	{
-		start_hunt(scc);
-	}
-	
-	/* enable ABORT, DCD & SYNC/HUNT interrupts */
-
-	wr(scc,R15, BRKIE|TxUIE|(scc->kiss.softdcd? SYNCIE:DCDIE));
-
-	Outb(scc->ctrl,RES_EXT_INT);	/* reset ext/status interrupts */
-	Outb(scc->ctrl,RES_EXT_INT);	/* must be done twice */
-
-	or(scc,R1,INT_ALL_Rx|TxINT_ENAB|EXT_INT_ENAB); /* enable interrupts */
-	
-	scc->status = InReg(scc->ctrl,R0);	/* read initial status */
-	
-	or(scc,R9,MIE);			/* master interrupt enable */
-	
-	scc_init_timer(scc);
-			
-	enable_irq(scc->irq);
-}
-
-
-
-
-/* ******************************************************************** */
-/* *			SCC timer functions			      * */
-/* ******************************************************************** */
-
-
-/* ----> scc_key_trx sets the time constant for the baudrate 
-         generator and keys the transmitter		     <---- */
-
-static void scc_key_trx(struct scc_channel *scc, char tx)
-{
-	unsigned int time_const;
-		
-	if (scc->brand & PRIMUS)
-		Outb(scc->ctrl + 4, scc->option | (tx? 0x80 : 0));
-
-	if (scc->modem.speed < 300) 
-		scc->modem.speed = 1200;
-
-	time_const = (unsigned) (scc->clock / (scc->modem.speed * (tx? 2:64))) - 2;
-
-	disable_irq(scc->irq);
-
-	if (tx)
-	{
-		or(scc, R1, TxINT_ENAB);	/* t_maxkeyup may have reset these */
-		or(scc, R15, TxUIE);
-	}
-
-	if (scc->modem.clocksrc == CLK_DPLL)
-	{				/* force simplex operation */
-		if (tx)
-		{
-#ifdef CONFIG_SCC_TRXECHO
-			cl(scc, R3, RxENABLE|ENT_HM);	/* switch off receiver */
-			cl(scc, R15, DCDIE|SYNCIE);	/* No DCD changes, please */
-#endif
-			set_brg(scc, time_const);	/* reprogram baudrate generator */
-
-			/* DPLL -> Rx clk, BRG -> Tx CLK, TRxC mode output, TRxC = BRG */
-			wr(scc, R11, RCDPLL|TCBR|TRxCOI|TRxCBR);
-			
-			/* By popular demand: tx_inhibit */
-			if (scc->kiss.tx_inhibit)
-			{
-				or(scc,R5, TxENAB);
-				scc->wreg[R5] |= RTS;
-			} else {
-				or(scc,R5,RTS|TxENAB);	/* set the RTS line and enable TX */
-			}
-		} else {
-			cl(scc,R5,RTS|TxENAB);
-			
-			set_brg(scc, time_const);	/* reprogram baudrate generator */
-			
-			/* DPLL -> Rx clk, DPLL -> Tx CLK, TRxC mode output, TRxC = DPLL */
-			wr(scc, R11, RCDPLL|TCDPLL|TRxCOI|TRxCDP);
-
-#ifndef CONFIG_SCC_TRXECHO
-			if (scc->kiss.softdcd)
-#endif
-			{
-				or(scc,R15, scc->kiss.softdcd? SYNCIE:DCDIE);
-				start_hunt(scc);
-			}
-		}
-	} else {
-		if (tx)
-		{
-#ifdef CONFIG_SCC_TRXECHO
-			if (scc->kiss.fulldup == KISS_DUPLEX_HALF)
-			{
-				cl(scc, R3, RxENABLE);
-				cl(scc, R15, DCDIE|SYNCIE);
-			}
-#endif
-				
-			if (scc->kiss.tx_inhibit)
-			{
-				or(scc,R5, TxENAB);
-				scc->wreg[R5] |= RTS;
-			} else {	
-				or(scc,R5,RTS|TxENAB);	/* enable tx */
-			}
-		} else {
-			cl(scc,R5,RTS|TxENAB);		/* disable tx */
-
-			if ((scc->kiss.fulldup == KISS_DUPLEX_HALF) &&
-#ifndef CONFIG_SCC_TRXECHO
-			    scc->kiss.softdcd)
-#else
-			    1)
-#endif
-			{
-				or(scc, R15, scc->kiss.softdcd? SYNCIE:DCDIE);
-				start_hunt(scc);
-			}
-		}
-	}
-
-	enable_irq(scc->irq);
-}
-
-
-/* ----> SCC timer interrupt handler and friends. <---- */
-
-static void __scc_start_tx_timer(struct scc_channel *scc,
-				 void (*handler)(struct timer_list *t),
-				 unsigned long when)
-{
-	timer_delete(&scc->tx_t);
-
-	if (when == 0)
-	{
-		handler(&scc->tx_t);
-	} else 
-	if (when != TIMER_OFF)
-	{
-		scc->tx_t.function = handler;
-		scc->tx_t.expires = jiffies + (when*HZ)/100;
-		add_timer(&scc->tx_t);
-	}
-}
-
-static void scc_start_tx_timer(struct scc_channel *scc,
-			       void (*handler)(struct timer_list *t),
-			       unsigned long when)
-{
-	unsigned long flags;
-	
-	spin_lock_irqsave(&scc->lock, flags);
-	__scc_start_tx_timer(scc, handler, when);
-	spin_unlock_irqrestore(&scc->lock, flags);
-}
-
-static void scc_start_defer(struct scc_channel *scc)
-{
-	unsigned long flags;
-	
-	spin_lock_irqsave(&scc->lock, flags);
-	timer_delete(&scc->tx_wdog);
-	
-	if (scc->kiss.maxdefer != 0 && scc->kiss.maxdefer != TIMER_OFF)
-	{
-		scc->tx_wdog.function = t_busy;
-		scc->tx_wdog.expires = jiffies + HZ*scc->kiss.maxdefer;
-		add_timer(&scc->tx_wdog);
-	}
-	spin_unlock_irqrestore(&scc->lock, flags);
-}
-
-static void scc_start_maxkeyup(struct scc_channel *scc)
-{
-	unsigned long flags;
-	
-	spin_lock_irqsave(&scc->lock, flags);
-	timer_delete(&scc->tx_wdog);
-	
-	if (scc->kiss.maxkeyup != 0 && scc->kiss.maxkeyup != TIMER_OFF)
-	{
-		scc->tx_wdog.function = t_maxkeyup;
-		scc->tx_wdog.expires = jiffies + HZ*scc->kiss.maxkeyup;
-		add_timer(&scc->tx_wdog);
-	}
-	spin_unlock_irqrestore(&scc->lock, flags);
-}
-
-/* 
- * This is called from scc_txint() when there are no more frames to send.
- * Not exactly a timer function, but it is a close friend of the family...
- */
-
-static void scc_tx_done(struct scc_channel *scc)
-{
-	/* 
-	 * trx remains keyed in fulldup mode 2 until t_idle expires.
-	 */
-				 
-	switch (scc->kiss.fulldup)
-	{
-		case KISS_DUPLEX_LINK:
-			scc->stat.tx_state = TXS_IDLE2;
-			if (scc->kiss.idletime != TIMER_OFF)
-				scc_start_tx_timer(scc, t_idle,
-						   scc->kiss.idletime*100);
-			break;
-		case KISS_DUPLEX_OPTIMA:
-			scc_notify(scc, HWEV_ALL_SENT);
-			break;
-		default:
-			scc->stat.tx_state = TXS_BUSY;
-			scc_start_tx_timer(scc, t_tail, scc->kiss.tailtime);
-	}
-
-	netif_wake_queue(scc->dev);
-}
-
-
-static unsigned char Rand = 17;
-
-static inline int is_grouped(struct scc_channel *scc)
-{
-	int k;
-	struct scc_channel *scc2;
-	unsigned char grp1, grp2;
-
-	grp1 = scc->kiss.group;
-	
-	for (k = 0; k < (Nchips * 2); k++)
-	{
-		scc2 = &SCC_Info[k];
-		grp2 = scc2->kiss.group;
-		
-		if (scc2 == scc || !(scc2->dev && grp2))
-			continue;
-		
-		if ((grp1 & 0x3f) == (grp2 & 0x3f))
-		{
-			if ( (grp1 & TXGROUP) && (scc2->wreg[R5] & RTS) )
-				return 1;
-			
-			if ( (grp1 & RXGROUP) && scc2->dcd )
-				return 1;
-		}
-	}
-	return 0;
-}
-
-/* DWAIT and SLOTTIME expired
- *
- * fulldup == 0:  DCD is active or Rand > P-persistence: start t_busy timer
- *                else key trx and start txdelay
- * fulldup == 1:  key trx and start txdelay
- * fulldup == 2:  mintime expired, reset status or key trx and start txdelay
- */
-
-static void t_dwait(struct timer_list *t)
-{
-	struct scc_channel *scc = timer_container_of(scc, t, tx_t);
-	
-	if (scc->stat.tx_state == TXS_WAIT)	/* maxkeyup or idle timeout */
-	{
-		if (skb_queue_empty(&scc->tx_queue)) {	/* nothing to send */
-			scc->stat.tx_state = TXS_IDLE;
-			netif_wake_queue(scc->dev);	/* t_maxkeyup locked it. */
-			return;
-		}
-
-		scc->stat.tx_state = TXS_BUSY;
-	}
-
-	if (scc->kiss.fulldup == KISS_DUPLEX_HALF)
-	{
-		Rand = Rand * 17 + 31;
-		
-		if (scc->dcd || (scc->kiss.persist) < Rand || (scc->kiss.group && is_grouped(scc)) )
-		{
-			scc_start_defer(scc);
-			scc_start_tx_timer(scc, t_dwait, scc->kiss.slottime);
-			return ;
-		}
-	}
-
-	if ( !(scc->wreg[R5] & RTS) )
-	{
-		scc_key_trx(scc, TX_ON);
-		scc_start_tx_timer(scc, t_txdelay, scc->kiss.txdelay);
-	} else {
-		scc_start_tx_timer(scc, t_txdelay, 0);
-	}
-}
-
-
-/* TXDELAY expired
- *
- * kick transmission by a fake scc_txint(scc), start 'maxkeyup' watchdog.
- */
-
-static void t_txdelay(struct timer_list *t)
-{
-	struct scc_channel *scc = timer_container_of(scc, t, tx_t);
-
-	scc_start_maxkeyup(scc);
-
-	if (scc->tx_buff == NULL)
-	{
-		disable_irq(scc->irq);
-		scc_txint(scc);	
-		enable_irq(scc->irq);
-	}
-}
-	
-
-/* TAILTIME expired
- *
- * switch off transmitter. If we were stopped by Maxkeyup restart
- * transmission after 'mintime' seconds
- */
-
-static void t_tail(struct timer_list *t)
-{
-	struct scc_channel *scc = timer_container_of(scc, t, tx_t);
-	unsigned long flags;
-	
-	spin_lock_irqsave(&scc->lock, flags); 
-	timer_delete(&scc->tx_wdog);
-	scc_key_trx(scc, TX_OFF);
-	spin_unlock_irqrestore(&scc->lock, flags);
-
-	if (scc->stat.tx_state == TXS_TIMEOUT)		/* we had a timeout? */
-	{
-		scc->stat.tx_state = TXS_WAIT;
-		scc_start_tx_timer(scc, t_dwait, scc->kiss.mintime*100);
-		return;
-	}
-
-	scc->stat.tx_state = TXS_IDLE;
-	netif_wake_queue(scc->dev);
-}
-
-
-/* BUSY timeout
- *
- * throw away send buffers if DCD remains active too long.
- */
-
-static void t_busy(struct timer_list *t)
-{
-	struct scc_channel *scc = timer_container_of(scc, t, tx_wdog);
-
-	timer_delete(&scc->tx_t);
-	netif_stop_queue(scc->dev);	/* don't pile on the wabbit! */
-
-	scc_discard_buffers(scc);
-	scc->stat.txerrs++;
-	scc->stat.tx_state = TXS_IDLE;
-
-	netif_wake_queue(scc->dev);	
-}
-
-/* MAXKEYUP timeout
- *
- * this is our watchdog.
- */
-
-static void t_maxkeyup(struct timer_list *t)
-{
-	struct scc_channel *scc = timer_container_of(scc, t, tx_wdog);
-	unsigned long flags;
-
-	spin_lock_irqsave(&scc->lock, flags);
-	/* 
-	 * let things settle down before we start to
-	 * accept new data.
-	 */
-
-	netif_stop_queue(scc->dev);
-	scc_discard_buffers(scc);
-
-	timer_delete(&scc->tx_t);
-
-	cl(scc, R1, TxINT_ENAB);	/* force an ABORT, but don't */
-	cl(scc, R15, TxUIE);		/* count it. */
-	OutReg(scc->ctrl, R0, RES_Tx_P);
-
-	spin_unlock_irqrestore(&scc->lock, flags);
-
-	scc->stat.txerrs++;
-	scc->stat.tx_state = TXS_TIMEOUT;
-	scc_start_tx_timer(scc, t_tail, scc->kiss.tailtime);
-}
-
-/* IDLE timeout
- *
- * in fulldup mode 2 it keys down the transmitter after 'idle' seconds
- * of inactivity. We will not restart transmission before 'mintime'
- * expires.
- */
-
-static void t_idle(struct timer_list *t)
-{
-	struct scc_channel *scc = timer_container_of(scc, t, tx_t);
-	
-	timer_delete(&scc->tx_wdog);
-
-	scc_key_trx(scc, TX_OFF);
-	if(scc->kiss.mintime)
-		scc_start_tx_timer(scc, t_dwait, scc->kiss.mintime*100);
-	scc->stat.tx_state = TXS_WAIT;
-}
-
-static void scc_init_timer(struct scc_channel *scc)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&scc->lock, flags);	
-	scc->stat.tx_state = TXS_IDLE;
-	spin_unlock_irqrestore(&scc->lock, flags);
-}
-
-
-/* ******************************************************************** */
-/* *			Set/get L1 parameters			      * */
-/* ******************************************************************** */
-
-
-/*
- * this will set the "hardware" parameters through KISS commands or ioctl()
- */
-
-#define CAST(x) (unsigned long)(x)
-
-static unsigned int scc_set_param(struct scc_channel *scc, unsigned int cmd, unsigned int arg)
-{
-	switch (cmd)
-	{
-		case PARAM_TXDELAY:	scc->kiss.txdelay=arg;		break;
-		case PARAM_PERSIST:	scc->kiss.persist=arg;		break;
-		case PARAM_SLOTTIME:	scc->kiss.slottime=arg;		break;
-		case PARAM_TXTAIL:	scc->kiss.tailtime=arg;		break;
-		case PARAM_FULLDUP:	scc->kiss.fulldup=arg;		break;
-		case PARAM_DTR:		break; /* does someone need this? */
-		case PARAM_GROUP:	scc->kiss.group=arg;		break;
-		case PARAM_IDLE:	scc->kiss.idletime=arg;		break;
-		case PARAM_MIN:		scc->kiss.mintime=arg;		break;
-		case PARAM_MAXKEY:	scc->kiss.maxkeyup=arg;		break;
-		case PARAM_WAIT:	scc->kiss.waittime=arg;		break;
-		case PARAM_MAXDEFER:	scc->kiss.maxdefer=arg;		break;
-		case PARAM_TX:		scc->kiss.tx_inhibit=arg;	break;
-
-		case PARAM_SOFTDCD:	
-			scc->kiss.softdcd=arg;
-			if (arg)
-			{
-				or(scc, R15, SYNCIE);
-				cl(scc, R15, DCDIE);
-				start_hunt(scc);
-			} else {
-				or(scc, R15, DCDIE);
-				cl(scc, R15, SYNCIE);
-			}
-			break;
-				
-		case PARAM_SPEED:
-			if (arg < 256)
-				scc->modem.speed=arg*100;
-			else
-				scc->modem.speed=arg;
-
-			if (scc->stat.tx_state == 0)	/* only switch baudrate on rx... ;-) */
-				set_speed(scc);
-			break;
-			
-		case PARAM_RTS:	
-			if ( !(scc->wreg[R5] & RTS) )
-			{
-				if (arg != TX_OFF) {
-					scc_key_trx(scc, TX_ON);
-					scc_start_tx_timer(scc, t_txdelay, scc->kiss.txdelay);
-				}
-			} else {
-				if (arg == TX_OFF)
-				{
-					scc->stat.tx_state = TXS_BUSY;
-					scc_start_tx_timer(scc, t_tail, scc->kiss.tailtime);
-				}
-			}
-			break;
-			
-		case PARAM_HWEVENT:
-			scc_notify(scc, scc->dcd? HWEV_DCD_ON:HWEV_DCD_OFF);
-			break;
-
-		default:		return -EINVAL;
-	}
-	
-	return 0;
-}
-
-
- 
-static unsigned long scc_get_param(struct scc_channel *scc, unsigned int cmd)
-{
-	switch (cmd)
-	{
-		case PARAM_TXDELAY:	return CAST(scc->kiss.txdelay);
-		case PARAM_PERSIST:	return CAST(scc->kiss.persist);
-		case PARAM_SLOTTIME:	return CAST(scc->kiss.slottime);
-		case PARAM_TXTAIL:	return CAST(scc->kiss.tailtime);
-		case PARAM_FULLDUP:	return CAST(scc->kiss.fulldup);
-		case PARAM_SOFTDCD:	return CAST(scc->kiss.softdcd);
-		case PARAM_DTR:		return CAST((scc->wreg[R5] & DTR)? 1:0);
-		case PARAM_RTS:		return CAST((scc->wreg[R5] & RTS)? 1:0);
-		case PARAM_SPEED:	return CAST(scc->modem.speed);
-		case PARAM_GROUP:	return CAST(scc->kiss.group);
-		case PARAM_IDLE:	return CAST(scc->kiss.idletime);
-		case PARAM_MIN:		return CAST(scc->kiss.mintime);
-		case PARAM_MAXKEY:	return CAST(scc->kiss.maxkeyup);
-		case PARAM_WAIT:	return CAST(scc->kiss.waittime);
-		case PARAM_MAXDEFER:	return CAST(scc->kiss.maxdefer);
-		case PARAM_TX:		return CAST(scc->kiss.tx_inhibit);
-		default:		return NO_SUCH_PARAM;
-	}
-
-}
-
-#undef CAST
-
-/* ******************************************************************* */
-/* *			Send calibration pattern		     * */
-/* ******************************************************************* */
-
-static void scc_stop_calibrate(struct timer_list *t)
-{
-	struct scc_channel *scc = timer_container_of(scc, t, tx_wdog);
-	unsigned long flags;
-	
-	spin_lock_irqsave(&scc->lock, flags);
-	timer_delete(&scc->tx_wdog);
-	scc_key_trx(scc, TX_OFF);
-	wr(scc, R6, 0);
-	wr(scc, R7, FLAG);
-	Outb(scc->ctrl,RES_EXT_INT);	/* reset ext/status interrupts */
-	Outb(scc->ctrl,RES_EXT_INT);
-
-	netif_wake_queue(scc->dev);
-	spin_unlock_irqrestore(&scc->lock, flags);
-}
-
-
-static void
-scc_start_calibrate(struct scc_channel *scc, int duration, unsigned char pattern)
-{
-	unsigned long flags;
-	
-	spin_lock_irqsave(&scc->lock, flags);
-	netif_stop_queue(scc->dev);
-	scc_discard_buffers(scc);
-
-	timer_delete(&scc->tx_wdog);
-
-	scc->tx_wdog.function = scc_stop_calibrate;
-	scc->tx_wdog.expires = jiffies + HZ*duration;
-	add_timer(&scc->tx_wdog);
-
-	/* This doesn't seem to work. Why not? */	
-	wr(scc, R6, 0);
-	wr(scc, R7, pattern);
-
-	/* 
-	 * Don't know if this works. 
-	 * Damn, where is my Z8530 programming manual...? 
-	 */
-
-	Outb(scc->ctrl,RES_EXT_INT);	/* reset ext/status interrupts */
-	Outb(scc->ctrl,RES_EXT_INT);
-
-	scc_key_trx(scc, TX_ON);
-	spin_unlock_irqrestore(&scc->lock, flags);
-}
-
-/* ******************************************************************* */
-/* *		Init channel structures, special HW, etc...	     * */
-/* ******************************************************************* */
-
-/*
- * Reset the Z8530s and setup special hardware
- */
-
-static void z8530_init(void)
-{
-	const unsigned int nr_irqs = irq_get_nr_irqs();
-	struct scc_channel *scc;
-	int chip, k;
-	unsigned long flags;
-	char *flag;
-
-
-	printk(KERN_INFO "Init Z8530 driver: %u channels, IRQ", Nchips*2);
-	
-	flag=" ";
-	for (k = 0; k < nr_irqs; k++)
-		if (Ivec[k].used) 
-		{
-			printk("%s%d", flag, k);
-			flag=",";
-		}
-	printk("\n");
-
-
-	/* reset and pre-init all chips in the system */
-	for (chip = 0; chip < Nchips; chip++)
-	{
-		scc=&SCC_Info[2*chip];
-		if (!scc->ctrl) continue;
-
-		/* Special SCC cards */
-
-		if(scc->brand & EAGLE)			/* this is an EAGLE card */
-			Outb(scc->special,0x08);	/* enable interrupt on the board */
-			
-		if(scc->brand & (PC100 | PRIMUS))	/* this is a PC100/PRIMUS card */
-			Outb(scc->special,scc->option);	/* set the MODEM mode (0x22) */
-
-			
-		/* Reset and pre-init Z8530 */
-
-		spin_lock_irqsave(&scc->lock, flags);
-				
-		Outb(scc->ctrl, 0);
-		OutReg(scc->ctrl,R9,FHWRES);		/* force hardware reset */
-		udelay(100);				/* give it 'a bit' more time than required */
-		wr(scc, R2, chip*16);			/* interrupt vector */
-		wr(scc, R9, VIS);			/* vector includes status */
-		spin_unlock_irqrestore(&scc->lock, flags);		
-        }
-
- 
-	Driver_Initialized = 1;
-}
-
-/*
- * Allocate device structure, err, instance, and register driver
- */
-
-static int scc_net_alloc(const char *name, struct scc_channel *scc)
-{
-	int err;
-	struct net_device *dev;
-
-	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, scc_net_setup);
-	if (!dev) 
-		return -ENOMEM;
-
-	dev->ml_priv = scc;
-	scc->dev = dev;
-	spin_lock_init(&scc->lock);
-	timer_setup(&scc->tx_t, NULL, 0);
-	timer_setup(&scc->tx_wdog, NULL, 0);
-
-	err = register_netdevice(dev);
-	if (err) {
-		printk(KERN_ERR "%s: can't register network device (%d)\n", 
-		       name, err);
-		free_netdev(dev);
-		scc->dev = NULL;
-		return err;
-	}
-
-	return 0;
-}
-
-
-
-/* ******************************************************************** */
-/* *			    Network driver methods		      * */
-/* ******************************************************************** */
-
-static const struct net_device_ops scc_netdev_ops = {
-	.ndo_open            = scc_net_open,
-	.ndo_stop	     = scc_net_close,
-	.ndo_start_xmit	     = scc_net_tx,
-	.ndo_set_mac_address = scc_net_set_mac_address,
-	.ndo_get_stats       = scc_net_get_stats,
-	.ndo_siocdevprivate  = scc_net_siocdevprivate,
-};
-
-/* ----> Initialize device <----- */
-
-static void scc_net_setup(struct net_device *dev)
-{
-	dev->tx_queue_len    = 16;	/* should be enough... */
-
-	dev->netdev_ops	     = &scc_netdev_ops;
-	dev->header_ops      = &ax25_header_ops;
-
-	dev->flags      = 0;
-
-	dev->type = ARPHRD_AX25;
-	dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN;
-	dev->mtu = AX25_DEF_PACLEN;
-	dev->addr_len = AX25_ADDR_LEN;
-
-	memcpy(dev->broadcast, &ax25_bcast,  AX25_ADDR_LEN);
-	dev_addr_set(dev, (u8 *)&ax25_defaddr);
-}
-
-/* ----> open network device <---- */
-
-static int scc_net_open(struct net_device *dev)
-{
-	struct scc_channel *scc = (struct scc_channel *) dev->ml_priv;
-
-	if (!scc->init)
-		return -EINVAL;
-
-	scc->tx_buff = NULL;
-	skb_queue_head_init(&scc->tx_queue);
- 
-	init_channel(scc);
-
-	netif_start_queue(dev);
-	return 0;
-}
-
-/* ----> close network device <---- */
-
-static int scc_net_close(struct net_device *dev)
-{
-	struct scc_channel *scc = (struct scc_channel *) dev->ml_priv;
-	unsigned long flags;
-
-	netif_stop_queue(dev);
-
-	spin_lock_irqsave(&scc->lock, flags);	
-	Outb(scc->ctrl,0);		/* Make sure pointer is written */
-	wr(scc,R1,0);			/* disable interrupts */
-	wr(scc,R3,0);
-	spin_unlock_irqrestore(&scc->lock, flags);
-
-	timer_delete_sync(&scc->tx_t);
-	timer_delete_sync(&scc->tx_wdog);
-	
-	scc_discard_buffers(scc);
-
-	return 0;
-}
-
-/* ----> receive frame, called from scc_rxint() <---- */
-
-static void scc_net_rx(struct scc_channel *scc, struct sk_buff *skb)
-{
-	if (skb->len == 0) {
-		dev_kfree_skb_irq(skb);
-		return;
-	}
-		
-	scc->dev_stat.rx_packets++;
-	scc->dev_stat.rx_bytes += skb->len;
-
-	skb->protocol = ax25_type_trans(skb, scc->dev);
-	
-	netif_rx(skb);
-}
-
-/* ----> transmit frame <---- */
-
-static netdev_tx_t scc_net_tx(struct sk_buff *skb, struct net_device *dev)
-{
-	struct scc_channel *scc = (struct scc_channel *) dev->ml_priv;
-	unsigned long flags;
-	char kisscmd;
-
-	if (skb->protocol == htons(ETH_P_IP))
-		return ax25_ip_xmit(skb);
-
-	if (skb->len > scc->stat.bufsize || skb->len < 2) {
-		scc->dev_stat.tx_dropped++;	/* bogus frame */
-		dev_kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-	
-	scc->dev_stat.tx_packets++;
-	scc->dev_stat.tx_bytes += skb->len;
-	scc->stat.txframes++;
-	
-	kisscmd = *skb->data & 0x1f;
-	skb_pull(skb, 1);
-
-	if (kisscmd) {
-		scc_set_param(scc, kisscmd, *skb->data);
-		dev_kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-
-	spin_lock_irqsave(&scc->lock, flags);
-		
-	if (skb_queue_len(&scc->tx_queue) > scc->dev->tx_queue_len) {
-		struct sk_buff *skb_del;
-		skb_del = skb_dequeue(&scc->tx_queue);
-		dev_kfree_skb_irq(skb_del);
-	}
-	skb_queue_tail(&scc->tx_queue, skb);
-	netif_trans_update(dev);
-	
-
-	/*
-	 * Start transmission if the trx state is idle or
-	 * t_idle hasn't expired yet. Use dwait/persistence/slottime
-	 * algorithm for normal halfduplex operation.
-	 */
-
-	if(scc->stat.tx_state == TXS_IDLE || scc->stat.tx_state == TXS_IDLE2) {
-		scc->stat.tx_state = TXS_BUSY;
-		if (scc->kiss.fulldup == KISS_DUPLEX_HALF)
-			__scc_start_tx_timer(scc, t_dwait, scc->kiss.waittime);
-		else
-			__scc_start_tx_timer(scc, t_dwait, 0);
-	}
-	spin_unlock_irqrestore(&scc->lock, flags);
-	return NETDEV_TX_OK;
-}
-
-/* ----> ioctl functions <---- */
-
-/*
- * SIOCSCCCFG		- configure driver	arg: (struct scc_hw_config *) arg
- * SIOCSCCINI		- initialize driver	arg: ---
- * SIOCSCCCHANINI	- initialize channel	arg: (struct scc_modem *) arg
- * SIOCSCCSMEM		- set memory		arg: (struct scc_mem_config *) arg
- * SIOCSCCGKISS		- get level 1 parameter	arg: (struct scc_kiss_cmd *) arg
- * SIOCSCCSKISS		- set level 1 parameter arg: (struct scc_kiss_cmd *) arg
- * SIOCSCCGSTAT		- get driver status	arg: (struct scc_stat *) arg
- * SIOCSCCCAL		- send calib. pattern	arg: (struct scc_calibrate *) arg
- */
-
-static int scc_net_siocdevprivate(struct net_device *dev,
-				  struct ifreq *ifr, void __user *arg, int cmd)
-{
-	struct scc_kiss_cmd kiss_cmd;
-	struct scc_mem_config memcfg;
-	struct scc_hw_config hwcfg;
-	struct scc_calibrate cal;
-	struct scc_channel *scc = (struct scc_channel *) dev->ml_priv;
-	int chan;
-	unsigned char device_name[IFNAMSIZ];
-	
-	if (!Driver_Initialized)
-	{
-		if (cmd == SIOCSCCCFG)
-		{
-			int found = 1;
-
-			if (!capable(CAP_SYS_RAWIO)) return -EPERM;
-			if (in_compat_syscall())
-				return -EOPNOTSUPP;
-
-			if (!arg) return -EFAULT;
-
-			if (Nchips >= SCC_MAXCHIPS) 
-				return -EINVAL;
-
-			if (copy_from_user(&hwcfg, arg, sizeof(hwcfg)))
-				return -EFAULT;
-
-			if (hwcfg.irq == 2) hwcfg.irq = 9;
-
-			if (hwcfg.irq < 0 || hwcfg.irq >= irq_get_nr_irqs())
-				return -EINVAL;
-				
-			if (!Ivec[hwcfg.irq].used && hwcfg.irq)
-			{
-				if (request_irq(hwcfg.irq, scc_isr,
-						0, "AX.25 SCC",
-						(void *)(long) hwcfg.irq))
-					printk(KERN_WARNING "z8530drv: warning, cannot get IRQ %d\n", hwcfg.irq);
-				else
-					Ivec[hwcfg.irq].used = 1;
-			}
-
-			if (hwcfg.vector_latch && !Vector_Latch) {
-				if (!request_region(hwcfg.vector_latch, 1, "scc vector latch"))
-					printk(KERN_WARNING "z8530drv: warning, cannot reserve vector latch port 0x%lx\n, disabled.", hwcfg.vector_latch);
-				else
-					Vector_Latch = hwcfg.vector_latch;
-			}
-
-			if (hwcfg.clock == 0)
-				hwcfg.clock = SCC_DEFAULT_CLOCK;
-
-#ifndef SCC_DONT_CHECK
-
-			if(request_region(hwcfg.ctrl_a, 1, "scc-probe"))
-			{
-				disable_irq(hwcfg.irq);
-				Outb(hwcfg.ctrl_a, 0);
-				OutReg(hwcfg.ctrl_a, R9, FHWRES);
-				udelay(100);
-				OutReg(hwcfg.ctrl_a,R13,0x55);		/* is this chip really there? */
-				udelay(5);
-
-				if (InReg(hwcfg.ctrl_a,R13) != 0x55)
-					found = 0;
-				enable_irq(hwcfg.irq);
-				release_region(hwcfg.ctrl_a, 1);
-			}
-			else
-				found = 0;
-#endif
-
-			if (found)
-			{
-				SCC_Info[2*Nchips  ].ctrl = hwcfg.ctrl_a;
-				SCC_Info[2*Nchips  ].data = hwcfg.data_a;
-				SCC_Info[2*Nchips  ].irq  = hwcfg.irq;
-				SCC_Info[2*Nchips+1].ctrl = hwcfg.ctrl_b;
-				SCC_Info[2*Nchips+1].data = hwcfg.data_b;
-				SCC_Info[2*Nchips+1].irq  = hwcfg.irq;
-			
-				SCC_ctrl[Nchips].chan_A = hwcfg.ctrl_a;
-				SCC_ctrl[Nchips].chan_B = hwcfg.ctrl_b;
-				SCC_ctrl[Nchips].irq    = hwcfg.irq;
-			}
-
-
-			for (chan = 0; chan < 2; chan++)
-			{
-				sprintf(device_name, "%s%i", SCC_DriverName, 2*Nchips+chan);
-
-				SCC_Info[2*Nchips+chan].special = hwcfg.special;
-				SCC_Info[2*Nchips+chan].clock = hwcfg.clock;
-				SCC_Info[2*Nchips+chan].brand = hwcfg.brand;
-				SCC_Info[2*Nchips+chan].option = hwcfg.option;
-				SCC_Info[2*Nchips+chan].enhanced = hwcfg.escc;
-
-#ifdef SCC_DONT_CHECK
-				printk(KERN_INFO "%s: data port = 0x%3.3x  control port = 0x%3.3x\n",
-					device_name, 
-					SCC_Info[2*Nchips+chan].data, 
-					SCC_Info[2*Nchips+chan].ctrl);
-
-#else
-				printk(KERN_INFO "%s: data port = 0x%3.3lx  control port = 0x%3.3lx -- %s\n",
-					device_name,
-					chan? hwcfg.data_b : hwcfg.data_a, 
-					chan? hwcfg.ctrl_b : hwcfg.ctrl_a,
-					found? "found" : "missing");
-#endif
-
-				if (found)
-				{
-					request_region(SCC_Info[2*Nchips+chan].ctrl, 1, "scc ctrl");
-					request_region(SCC_Info[2*Nchips+chan].data, 1, "scc data");
-					if (Nchips+chan != 0 &&
-					    scc_net_alloc(device_name, 
-							  &SCC_Info[2*Nchips+chan]))
-					    return -EINVAL;
-				}
-			}
-			
-			if (found) Nchips++;
-			
-			return 0;
-		}
-		
-		if (cmd == SIOCSCCINI)
-		{
-			if (!capable(CAP_SYS_RAWIO))
-				return -EPERM;
-				
-			if (Nchips == 0)
-				return -EINVAL;
-
-			z8530_init();
-			return 0;
-		}
-		
-		return -EINVAL;	/* confuse the user */
-	}
-	
-	if (!scc->init)
-	{
-		if (cmd == SIOCSCCCHANINI)
-		{
-			if (!capable(CAP_NET_ADMIN)) return -EPERM;
-			if (!arg) return -EINVAL;
-			
-			scc->stat.bufsize   = SCC_BUFSIZE;
-
-			if (copy_from_user(&scc->modem, arg, sizeof(struct scc_modem)))
-				return -EINVAL;
-			
-			/* default KISS Params */
-		
-			if (scc->modem.speed < 4800)
-			{
-				scc->kiss.txdelay = 36;		/* 360 ms */
-				scc->kiss.persist = 42;		/* 25% persistence */			/* was 25 */
-				scc->kiss.slottime = 16;	/* 160 ms */
-				scc->kiss.tailtime = 4;		/* minimal reasonable value */
-				scc->kiss.fulldup = 0;		/* CSMA */
-				scc->kiss.waittime = 50;	/* 500 ms */
-				scc->kiss.maxkeyup = 10;	/* 10 s */
-				scc->kiss.mintime = 3;		/* 3 s */
-				scc->kiss.idletime = 30;	/* 30 s */
-				scc->kiss.maxdefer = 120;	/* 2 min */
-				scc->kiss.softdcd = 0;		/* hardware dcd */
-			} else {
-				scc->kiss.txdelay = 10;		/* 100 ms */
-				scc->kiss.persist = 64;		/* 25% persistence */			/* was 25 */
-				scc->kiss.slottime = 8;		/* 160 ms */
-				scc->kiss.tailtime = 1;		/* minimal reasonable value */
-				scc->kiss.fulldup = 0;		/* CSMA */
-				scc->kiss.waittime = 50;	/* 500 ms */
-				scc->kiss.maxkeyup = 7;		/* 7 s */
-				scc->kiss.mintime = 3;		/* 3 s */
-				scc->kiss.idletime = 30;	/* 30 s */
-				scc->kiss.maxdefer = 120;	/* 2 min */
-				scc->kiss.softdcd = 0;		/* hardware dcd */
-			}
-			
-			scc->tx_buff = NULL;
-			skb_queue_head_init(&scc->tx_queue);
-			scc->init = 1;
-			
-			return 0;
-		}
-		
-		return -EINVAL;
-	}
-	
-	switch(cmd)
-	{
-		case SIOCSCCRESERVED:
-			return -ENOIOCTLCMD;
-
-		case SIOCSCCSMEM:
-			if (!capable(CAP_SYS_RAWIO)) return -EPERM;
-			if (!arg || copy_from_user(&memcfg, arg, sizeof(memcfg)))
-				return -EINVAL;
-			if (memcfg.bufsize < 16)
-				return -EINVAL;
-			scc->stat.bufsize   = memcfg.bufsize;
-			return 0;
-		
-		case SIOCSCCGSTAT:
-			if (!arg || copy_to_user(arg, &scc->stat, sizeof(scc->stat)))
-				return -EINVAL;
-			return 0;
-		
-		case SIOCSCCGKISS:
-			if (!arg || copy_from_user(&kiss_cmd, arg, sizeof(kiss_cmd)))
-				return -EINVAL;
-			kiss_cmd.param = scc_get_param(scc, kiss_cmd.command);
-			if (copy_to_user(arg, &kiss_cmd, sizeof(kiss_cmd)))
-				return -EINVAL;
-			return 0;
-		
-		case SIOCSCCSKISS:
-			if (!capable(CAP_NET_ADMIN)) return -EPERM;
-			if (!arg || copy_from_user(&kiss_cmd, arg, sizeof(kiss_cmd)))
-				return -EINVAL;
-			return scc_set_param(scc, kiss_cmd.command, kiss_cmd.param);
-		
-		case SIOCSCCCAL:
-			if (!capable(CAP_SYS_RAWIO)) return -EPERM;
-			if (!arg || copy_from_user(&cal, arg, sizeof(cal)) || cal.time == 0)
-				return -EINVAL;
-
-			scc_start_calibrate(scc, cal.time, cal.pattern);
-			return 0;
-
-		default:
-			return -ENOIOCTLCMD;
-		
-	}
-	
-	return -EINVAL;
-}
-
-/* ----> set interface callsign <---- */
-
-static int scc_net_set_mac_address(struct net_device *dev, void *addr)
-{
-	struct sockaddr *sa = (struct sockaddr *) addr;
-	dev_addr_set(dev, sa->sa_data);
-	return 0;
-}
-
-/* ----> get statistics <---- */
-
-static struct net_device_stats *scc_net_get_stats(struct net_device *dev)
-{
-	struct scc_channel *scc = (struct scc_channel *) dev->ml_priv;
-	
-	scc->dev_stat.rx_errors = scc->stat.rxerrs + scc->stat.rx_over;
-	scc->dev_stat.tx_errors = scc->stat.txerrs + scc->stat.tx_under;
-	scc->dev_stat.rx_fifo_errors = scc->stat.rx_over;
-	scc->dev_stat.tx_fifo_errors = scc->stat.tx_under;
-
-	return &scc->dev_stat;
-}
-
-/* ******************************************************************** */
-/* *		dump statistics to /proc/net/z8530drv		      * */
-/* ******************************************************************** */
-
-#ifdef CONFIG_PROC_FS
-
-static inline struct scc_channel *scc_net_seq_idx(loff_t pos)
-{
-	int k;
-
-	for (k = 0; k < Nchips*2; ++k) {
-		if (!SCC_Info[k].init) 
-			continue;
-		if (pos-- == 0)
-			return &SCC_Info[k];
-	}
-	return NULL;
-}
-
-static void *scc_net_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	return *pos ? scc_net_seq_idx(*pos - 1) : SEQ_START_TOKEN;
-	
-}
-
-static void *scc_net_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	unsigned k;
-	struct scc_channel *scc = v;
-	++*pos;
-	
-	for (k = (v == SEQ_START_TOKEN) ? 0 : (scc - SCC_Info)+1;
-	     k < Nchips*2; ++k) {
-		if (SCC_Info[k].init) 
-			return &SCC_Info[k];
-	}
-	return NULL;
-}
-
-static void scc_net_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static int scc_net_seq_show(struct seq_file *seq, void *v)
-{
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(seq, "z8530drv-"VERSION"\n");
-	} else if (!Driver_Initialized) {
-		seq_puts(seq, "not initialized\n");
-	} else if (!Nchips) {
-		seq_puts(seq, "chips missing\n");
-	} else {
-		const struct scc_channel *scc = v;
-		const struct scc_stat *stat = &scc->stat;
-		const struct scc_kiss *kiss = &scc->kiss;
-
-
-		/* dev	data ctrl irq clock brand enh vector special option 
-		 *	baud nrz clocksrc softdcd bufsize
-		 *	rxints txints exints spints
-		 *	rcvd rxerrs over / xmit txerrs under / nospace bufsize
-		 *	txd pers slot tail ful wait min maxk idl defr txof grp
-		 *	W ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
-		 *	R ## ## XX ## ## ## ## ## XX ## ## ## ## ## ## ##
-		 */
-
-		seq_printf(seq, "%s\t%3.3lx %3.3lx %d %lu %2.2x %d %3.3lx %3.3lx %d\n",
-				scc->dev->name,
-				scc->data, scc->ctrl, scc->irq, scc->clock, scc->brand,
-				scc->enhanced, Vector_Latch, scc->special,
-				scc->option);
-		seq_printf(seq, "\t%lu %d %d %d %d\n",
-				scc->modem.speed, scc->modem.nrz,
-				scc->modem.clocksrc, kiss->softdcd,
-				stat->bufsize);
-		seq_printf(seq, "\t%lu %lu %lu %lu\n",
-				stat->rxints, stat->txints, stat->exints, stat->spints);
-		seq_printf(seq, "\t%lu %lu %d / %lu %lu %d / %d %d\n",
-				stat->rxframes, stat->rxerrs, stat->rx_over,
-				stat->txframes, stat->txerrs, stat->tx_under,
-				stat->nospace,  stat->tx_state);
-
-#define K(x) kiss->x
-		seq_printf(seq, "\t%d %d %d %d %d %d %d %d %d %d %d %d\n",
-				K(txdelay), K(persist), K(slottime), K(tailtime),
-				K(fulldup), K(waittime), K(mintime), K(maxkeyup),
-				K(idletime), K(maxdefer), K(tx_inhibit), K(group));
-#undef K
-#ifdef SCC_DEBUG
-		{
-			int reg;
-
-		seq_printf(seq, "\tW ");
-			for (reg = 0; reg < 16; reg++)
-				seq_printf(seq, "%2.2x ", scc->wreg[reg]);
-			seq_printf(seq, "\n");
-			
-		seq_printf(seq, "\tR %2.2x %2.2x XX ", InReg(scc->ctrl,R0), InReg(scc->ctrl,R1));
-			for (reg = 3; reg < 8; reg++)
-				seq_printf(seq, "%2.2x ", InReg(scc->ctrl, reg));
-			seq_printf(seq, "XX ");
-			for (reg = 9; reg < 16; reg++)
-				seq_printf(seq, "%2.2x ", InReg(scc->ctrl, reg));
-			seq_printf(seq, "\n");
-		}
-#endif
-		seq_putc(seq, '\n');
-	}
-
-        return 0;
-}
-
-static const struct seq_operations scc_net_seq_ops = {
-	.start  = scc_net_seq_start,
-	.next   = scc_net_seq_next,
-	.stop   = scc_net_seq_stop,
-	.show   = scc_net_seq_show,
-};
-#endif /* CONFIG_PROC_FS */
-
- 
-/* ******************************************************************** */
-/* * 			Init SCC driver 			      * */
-/* ******************************************************************** */
-
-static int __init scc_init_driver (void)
-{
-	char devname[IFNAMSIZ];
-	
-	printk(banner);
-	
-	sprintf(devname,"%s0", SCC_DriverName);
-	
-	rtnl_lock();
-	if (scc_net_alloc(devname, SCC_Info)) {
-		rtnl_unlock();
-		printk(KERN_ERR "z8530drv: cannot initialize module\n");
-		return -EIO;
-	}
-	rtnl_unlock();
-
-	proc_create_seq("z8530drv", 0, init_net.proc_net, &scc_net_seq_ops);
-
-	return 0;
-}
-
-static void __exit scc_cleanup_driver(void)
-{
-	const unsigned int nr_irqs = irq_get_nr_irqs();
-	io_port ctrl;
-	int k;
-	struct scc_channel *scc;
-	struct net_device *dev;
-	
-	if (Nchips == 0 && (dev = SCC_Info[0].dev)) 
-	{
-		unregister_netdev(dev);
-		free_netdev(dev);
-	}
-
-	/* Guard against chip prattle */
-	local_irq_disable();
-	
-	for (k = 0; k < Nchips; k++)
-		if ( (ctrl = SCC_ctrl[k].chan_A) )
-		{
-			Outb(ctrl, 0);
-			OutReg(ctrl,R9,FHWRES);	/* force hardware reset */
-			udelay(50);
-		}
-		
-	/* To unload the port must be closed so no real IRQ pending */
-	for (k = 0; k < nr_irqs ; k++)
-		if (Ivec[k].used) free_irq(k, NULL);
-		
-	local_irq_enable();
-		
-	/* Now clean up */
-	for (k = 0; k < Nchips*2; k++)
-	{
-		scc = &SCC_Info[k];
-		if (scc->ctrl)
-		{
-			release_region(scc->ctrl, 1);
-			release_region(scc->data, 1);
-		}
-		if (scc->dev)
-		{
-			unregister_netdev(scc->dev);
-			free_netdev(scc->dev);
-		}
-	}
-	
-		
-	if (Vector_Latch)
-		release_region(Vector_Latch, 1);
-
-	remove_proc_entry("z8530drv", init_net.proc_net);
-}
-
-MODULE_AUTHOR("Joerg Reuter <jreuter@yaina.de>");
-MODULE_DESCRIPTION("AX.25 Device Driver for Z8530 based HDLC cards");
-MODULE_LICENSE("GPL");
-module_init(scc_init_driver);
-module_exit(scc_cleanup_driver);
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
deleted file mode 100644
index 4106f0301ab4..000000000000
--- a/drivers/net/hamradio/yam.c
+++ /dev/null
@@ -1,1191 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*****************************************************************************/
-
-/*
- *    yam.c  -- YAM radio modem driver.
- *
- *      Copyright (C) 1998 Frederic Rible F1OAT (frible@teaser.fr)
- *      Adapted from baycom.c driver written by Thomas Sailer (sailer@ife.ee.ethz.ch)
- *
- *  Please note that the GPL allows you to use the driver, NOT the radio.
- *  In order to use the radio, you need a license from the communications
- *  authority of your country.
- *
- *  History:
- *   0.0 F1OAT 06.06.98  Begin of work with baycom.c source code V 0.3
- *   0.1 F1OAT 07.06.98  Add timer polling routine for channel arbitration
- *   0.2 F6FBB 08.06.98  Added delay after FPGA programming
- *   0.3 F6FBB 29.07.98  Delayed PTT implementation for dupmode=2
- *   0.4 F6FBB 30.07.98  Added TxTail, Slottime and Persistence
- *   0.5 F6FBB 01.08.98  Shared IRQs, /proc/net and network statistics
- *   0.6 F6FBB 25.08.98  Added 1200Bds format
- *   0.7 F6FBB 12.09.98  Added to the kernel configuration
- *   0.8 F6FBB 14.10.98  Fixed slottime/persistence timing bug
- *       OK1ZIA 2.09.01  Fixed "kfree_skb on hard IRQ" 
- *                       using dev_kfree_skb_any(). (important in 2.4 kernel)
- */
-
-/*****************************************************************************/
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/net.h>
-#include <linux/in.h>
-#include <linux/if.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <linux/bitops.h>
-#include <linux/random.h>
-#include <asm/io.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/firmware.h>
-#include <linux/platform_device.h>
-
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <net/ax25.h>
-
-#include <linux/kernel.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <net/net_namespace.h>
-
-#include <linux/uaccess.h>
-#include <linux/init.h>
-
-#include <linux/yam.h>
-
-/* --------------------------------------------------------------------- */
-
-static const char yam_drvname[] = "yam";
-static const char yam_drvinfo[] __initconst = KERN_INFO \
-	"YAM driver version 0.8 by F1OAT/F6FBB\n";
-
-/* --------------------------------------------------------------------- */
-
-#define FIRMWARE_9600	"yam/9600.bin"
-#define FIRMWARE_1200	"yam/1200.bin"
-
-#define YAM_9600	1
-#define YAM_1200	2
-
-#define NR_PORTS	4
-#define YAM_MAGIC	0xF10A7654
-
-/* Transmitter states */
-
-#define TX_OFF		0
-#define TX_HEAD		1
-#define TX_DATA		2
-#define TX_CRC1		3
-#define TX_CRC2		4
-#define TX_TAIL		5
-
-#define YAM_MAX_FRAME	1024
-
-#define DEFAULT_BITRATE	9600			/* bps */
-#define DEFAULT_HOLDD	10			/* sec */
-#define DEFAULT_TXD	300			/* ms */
-#define DEFAULT_TXTAIL	10			/* ms */
-#define DEFAULT_SLOT	100			/* ms */
-#define DEFAULT_PERS	64			/* 0->255 */
-
-struct yam_port {
-	int magic;
-	int bitrate;
-	int baudrate;
-	int iobase;
-	int irq;
-	int dupmode;
-
-	struct net_device *dev;
-
-	int nb_rxint;
-	int nb_mdint;
-
-	/* Parameters section */
-
-	int txd;				/* tx delay */
-	int holdd;				/* duplex ptt delay */
-	int txtail;				/* txtail delay */
-	int slot;				/* slottime */
-	int pers;				/* persistence */
-
-	/* Tx section */
-
-	int tx_state;
-	int tx_count;
-	int slotcnt;
-	unsigned char tx_buf[YAM_MAX_FRAME];
-	int tx_len;
-	int tx_crcl, tx_crch;
-	struct sk_buff_head send_queue;		/* Packets awaiting transmission */
-
-	/* Rx section */
-
-	int dcd;
-	unsigned char rx_buf[YAM_MAX_FRAME];
-	int rx_len;
-	int rx_crcl, rx_crch;
-};
-
-struct yam_mcs {
-	unsigned char bits[YAM_FPGA_SIZE];
-	int bitrate;
-	struct yam_mcs *next;
-};
-
-static struct net_device *yam_devs[NR_PORTS];
-
-static struct yam_mcs *yam_data;
-
-static DEFINE_TIMER(yam_timer, NULL);
-
-/* --------------------------------------------------------------------- */
-
-#define RBR(iobase)	(iobase+0)
-#define THR(iobase)	(iobase+0)
-#define IER(iobase)	(iobase+1)
-#define IIR(iobase)	(iobase+2)
-#define FCR(iobase)	(iobase+2)
-#define LCR(iobase)	(iobase+3)
-#define MCR(iobase)	(iobase+4)
-#define LSR(iobase)	(iobase+5)
-#define MSR(iobase)	(iobase+6)
-#define SCR(iobase)	(iobase+7)
-#define DLL(iobase)	(iobase+0)
-#define DLM(iobase)	(iobase+1)
-
-#define YAM_EXTENT	8
-
-/* Interrupt Identification Register Bit Masks */
-#define IIR_NOPEND	1
-#define IIR_MSR		0
-#define IIR_TX		2
-#define IIR_RX		4
-#define IIR_LSR		6
-#define IIR_TIMEOUT	12			/* Fifo mode only */
-
-#define IIR_MASK	0x0F
-
-/* Interrupt Enable Register Bit Masks */
-#define IER_RX		1			/* enable rx interrupt */
-#define IER_TX		2			/* enable tx interrupt */
-#define IER_LSR		4			/* enable line status interrupts */
-#define IER_MSR		8			/* enable modem status interrupts */
-
-/* Modem Control Register Bit Masks */
-#define MCR_DTR		0x01			/* DTR output */
-#define MCR_RTS		0x02			/* RTS output */
-#define MCR_OUT1	0x04			/* OUT1 output (not accessible in RS232) */
-#define MCR_OUT2	0x08			/* Master Interrupt enable (must be set on PCs) */
-#define MCR_LOOP	0x10			/* Loopback enable */
-
-/* Modem Status Register Bit Masks */
-#define MSR_DCTS	0x01			/* Delta CTS input */
-#define MSR_DDSR	0x02			/* Delta DSR */
-#define MSR_DRIN	0x04			/* Delta RI */
-#define MSR_DDCD	0x08			/* Delta DCD */
-#define MSR_CTS		0x10			/* CTS input */
-#define MSR_DSR		0x20			/* DSR input */
-#define MSR_RING	0x40			/* RI  input */
-#define MSR_DCD		0x80			/* DCD input */
-
-/* line status register bit mask */
-#define LSR_RXC		0x01
-#define LSR_OE		0x02
-#define LSR_PE		0x04
-#define LSR_FE		0x08
-#define LSR_BREAK	0x10
-#define LSR_THRE	0x20
-#define LSR_TSRE	0x40
-
-/* Line Control Register Bit Masks */
-#define LCR_DLAB	0x80
-#define LCR_BREAK	0x40
-#define LCR_PZERO	0x28
-#define LCR_PEVEN	0x18
-#define LCR_PODD	0x08
-#define LCR_STOP1	0x00
-#define LCR_STOP2	0x04
-#define LCR_BIT5	0x00
-#define LCR_BIT6	0x02
-#define LCR_BIT7	0x01
-#define LCR_BIT8	0x03
-
-/* YAM Modem <-> UART Port mapping */
-
-#define TX_RDY		MSR_DCTS		/* transmitter ready to send */
-#define RX_DCD		MSR_DCD			/* carrier detect */
-#define RX_FLAG		MSR_RING		/* hdlc flag received */
-#define FPGA_DONE	MSR_DSR			/* FPGA is configured */
-#define PTT_ON		(MCR_RTS|MCR_OUT2)	/* activate PTT */
-#define PTT_OFF		(MCR_DTR|MCR_OUT2)	/* release PTT */
-
-#define ENABLE_RXINT	IER_RX			/* enable uart rx interrupt during rx */
-#define ENABLE_TXINT	IER_MSR			/* enable uart ms interrupt during tx */
-#define ENABLE_RTXINT	(IER_RX|IER_MSR)	/* full duplex operations */
-
-
-/*************************************************************************
-* CRC Tables
-************************************************************************/
-
-static const unsigned char chktabl[256] =
-{0x00, 0x89, 0x12, 0x9b, 0x24, 0xad, 0x36, 0xbf, 0x48, 0xc1, 0x5a, 0xd3, 0x6c, 0xe5, 0x7e,
- 0xf7, 0x81, 0x08, 0x93, 0x1a, 0xa5, 0x2c, 0xb7, 0x3e, 0xc9, 0x40, 0xdb, 0x52, 0xed, 0x64,
- 0xff, 0x76, 0x02, 0x8b, 0x10, 0x99, 0x26, 0xaf, 0x34, 0xbd, 0x4a, 0xc3, 0x58, 0xd1, 0x6e,
- 0xe7, 0x7c, 0xf5, 0x83, 0x0a, 0x91, 0x18, 0xa7, 0x2e, 0xb5, 0x3c, 0xcb, 0x42, 0xd9, 0x50,
- 0xef, 0x66, 0xfd, 0x74, 0x04, 0x8d, 0x16, 0x9f, 0x20, 0xa9, 0x32, 0xbb, 0x4c, 0xc5, 0x5e,
- 0xd7, 0x68, 0xe1, 0x7a, 0xf3, 0x85, 0x0c, 0x97, 0x1e, 0xa1, 0x28, 0xb3, 0x3a, 0xcd, 0x44,
- 0xdf, 0x56, 0xe9, 0x60, 0xfb, 0x72, 0x06, 0x8f, 0x14, 0x9d, 0x22, 0xab, 0x30, 0xb9, 0x4e,
- 0xc7, 0x5c, 0xd5, 0x6a, 0xe3, 0x78, 0xf1, 0x87, 0x0e, 0x95, 0x1c, 0xa3, 0x2a, 0xb1, 0x38,
- 0xcf, 0x46, 0xdd, 0x54, 0xeb, 0x62, 0xf9, 0x70, 0x08, 0x81, 0x1a, 0x93, 0x2c, 0xa5, 0x3e,
- 0xb7, 0x40, 0xc9, 0x52, 0xdb, 0x64, 0xed, 0x76, 0xff, 0x89, 0x00, 0x9b, 0x12, 0xad, 0x24,
- 0xbf, 0x36, 0xc1, 0x48, 0xd3, 0x5a, 0xe5, 0x6c, 0xf7, 0x7e, 0x0a, 0x83, 0x18, 0x91, 0x2e,
- 0xa7, 0x3c, 0xb5, 0x42, 0xcb, 0x50, 0xd9, 0x66, 0xef, 0x74, 0xfd, 0x8b, 0x02, 0x99, 0x10,
- 0xaf, 0x26, 0xbd, 0x34, 0xc3, 0x4a, 0xd1, 0x58, 0xe7, 0x6e, 0xf5, 0x7c, 0x0c, 0x85, 0x1e,
- 0x97, 0x28, 0xa1, 0x3a, 0xb3, 0x44, 0xcd, 0x56, 0xdf, 0x60, 0xe9, 0x72, 0xfb, 0x8d, 0x04,
- 0x9f, 0x16, 0xa9, 0x20, 0xbb, 0x32, 0xc5, 0x4c, 0xd7, 0x5e, 0xe1, 0x68, 0xf3, 0x7a, 0x0e,
- 0x87, 0x1c, 0x95, 0x2a, 0xa3, 0x38, 0xb1, 0x46, 0xcf, 0x54, 0xdd, 0x62, 0xeb, 0x70, 0xf9,
- 0x8f, 0x06, 0x9d, 0x14, 0xab, 0x22, 0xb9, 0x30, 0xc7, 0x4e, 0xd5, 0x5c, 0xe3, 0x6a, 0xf1,
- 0x78};
-static const unsigned char chktabh[256] =
-{0x00, 0x11, 0x23, 0x32, 0x46, 0x57, 0x65, 0x74, 0x8c, 0x9d, 0xaf, 0xbe, 0xca, 0xdb, 0xe9,
- 0xf8, 0x10, 0x01, 0x33, 0x22, 0x56, 0x47, 0x75, 0x64, 0x9c, 0x8d, 0xbf, 0xae, 0xda, 0xcb,
- 0xf9, 0xe8, 0x21, 0x30, 0x02, 0x13, 0x67, 0x76, 0x44, 0x55, 0xad, 0xbc, 0x8e, 0x9f, 0xeb,
- 0xfa, 0xc8, 0xd9, 0x31, 0x20, 0x12, 0x03, 0x77, 0x66, 0x54, 0x45, 0xbd, 0xac, 0x9e, 0x8f,
- 0xfb, 0xea, 0xd8, 0xc9, 0x42, 0x53, 0x61, 0x70, 0x04, 0x15, 0x27, 0x36, 0xce, 0xdf, 0xed,
- 0xfc, 0x88, 0x99, 0xab, 0xba, 0x52, 0x43, 0x71, 0x60, 0x14, 0x05, 0x37, 0x26, 0xde, 0xcf,
- 0xfd, 0xec, 0x98, 0x89, 0xbb, 0xaa, 0x63, 0x72, 0x40, 0x51, 0x25, 0x34, 0x06, 0x17, 0xef,
- 0xfe, 0xcc, 0xdd, 0xa9, 0xb8, 0x8a, 0x9b, 0x73, 0x62, 0x50, 0x41, 0x35, 0x24, 0x16, 0x07,
- 0xff, 0xee, 0xdc, 0xcd, 0xb9, 0xa8, 0x9a, 0x8b, 0x84, 0x95, 0xa7, 0xb6, 0xc2, 0xd3, 0xe1,
- 0xf0, 0x08, 0x19, 0x2b, 0x3a, 0x4e, 0x5f, 0x6d, 0x7c, 0x94, 0x85, 0xb7, 0xa6, 0xd2, 0xc3,
- 0xf1, 0xe0, 0x18, 0x09, 0x3b, 0x2a, 0x5e, 0x4f, 0x7d, 0x6c, 0xa5, 0xb4, 0x86, 0x97, 0xe3,
- 0xf2, 0xc0, 0xd1, 0x29, 0x38, 0x0a, 0x1b, 0x6f, 0x7e, 0x4c, 0x5d, 0xb5, 0xa4, 0x96, 0x87,
- 0xf3, 0xe2, 0xd0, 0xc1, 0x39, 0x28, 0x1a, 0x0b, 0x7f, 0x6e, 0x5c, 0x4d, 0xc6, 0xd7, 0xe5,
- 0xf4, 0x80, 0x91, 0xa3, 0xb2, 0x4a, 0x5b, 0x69, 0x78, 0x0c, 0x1d, 0x2f, 0x3e, 0xd6, 0xc7,
- 0xf5, 0xe4, 0x90, 0x81, 0xb3, 0xa2, 0x5a, 0x4b, 0x79, 0x68, 0x1c, 0x0d, 0x3f, 0x2e, 0xe7,
- 0xf6, 0xc4, 0xd5, 0xa1, 0xb0, 0x82, 0x93, 0x6b, 0x7a, 0x48, 0x59, 0x2d, 0x3c, 0x0e, 0x1f,
- 0xf7, 0xe6, 0xd4, 0xc5, 0xb1, 0xa0, 0x92, 0x83, 0x7b, 0x6a, 0x58, 0x49, 0x3d, 0x2c, 0x1e,
- 0x0f};
-
-/*************************************************************************
-* FPGA functions
-************************************************************************/
-
-static void delay(int ms)
-{
-	unsigned long timeout = jiffies + ((ms * HZ) / 1000);
-	while (time_before(jiffies, timeout))
-		cpu_relax();
-}
-
-/*
- * reset FPGA
- */
-
-static void fpga_reset(int iobase)
-{
-	outb(0, IER(iobase));
-	outb(LCR_DLAB | LCR_BIT5, LCR(iobase));
-	outb(1, DLL(iobase));
-	outb(0, DLM(iobase));
-
-	outb(LCR_BIT5, LCR(iobase));
-	inb(LSR(iobase));
-	inb(MSR(iobase));
-	/* turn off FPGA supply voltage */
-	outb(MCR_OUT1 | MCR_OUT2, MCR(iobase));
-	delay(100);
-	/* turn on FPGA supply voltage again */
-	outb(MCR_DTR | MCR_RTS | MCR_OUT1 | MCR_OUT2, MCR(iobase));
-	delay(100);
-}
-
-/*
- * send one byte to FPGA
- */
-
-static int fpga_write(int iobase, unsigned char wrd)
-{
-	unsigned char bit;
-	int k;
-	unsigned long timeout = jiffies + HZ / 10;
-
-	for (k = 0; k < 8; k++) {
-		bit = (wrd & 0x80) ? (MCR_RTS | MCR_DTR) : MCR_DTR;
-		outb(bit | MCR_OUT1 | MCR_OUT2, MCR(iobase));
-		wrd <<= 1;
-		outb(0xfc, THR(iobase));
-		while ((inb(LSR(iobase)) & LSR_TSRE) == 0)
-			if (time_after(jiffies, timeout))
-				return -1;
-	}
-
-	return 0;
-}
-
-/*
- * predef should be 0 for loading user defined mcs
- * predef should be YAM_1200 for loading predef 1200 mcs
- * predef should be YAM_9600 for loading predef 9600 mcs
- */
-static unsigned char *add_mcs(unsigned char *bits, int bitrate,
-			      unsigned int predef)
-{
-	const char *fw_name[2] = {FIRMWARE_9600, FIRMWARE_1200};
-	const struct firmware *fw;
-	struct platform_device *pdev;
-	struct yam_mcs *p;
-	int err;
-
-	switch (predef) {
-	case 0:
-		fw = NULL;
-		break;
-	case YAM_1200:
-	case YAM_9600:
-		predef--;
-		pdev = platform_device_register_simple("yam", 0, NULL, 0);
-		if (IS_ERR(pdev)) {
-			printk(KERN_ERR "yam: Failed to register firmware\n");
-			return NULL;
-		}
-		err = request_firmware(&fw, fw_name[predef], &pdev->dev);
-		platform_device_unregister(pdev);
-		if (err) {
-			printk(KERN_ERR "Failed to load firmware \"%s\"\n",
-			       fw_name[predef]);
-			return NULL;
-		}
-		if (fw->size != YAM_FPGA_SIZE) {
-			printk(KERN_ERR "Bogus length %zu in firmware \"%s\"\n",
-			       fw->size, fw_name[predef]);
-			release_firmware(fw);
-			return NULL;
-		}
-		bits = (unsigned char *)fw->data;
-		break;
-	default:
-		printk(KERN_ERR "yam: Invalid predef number %u\n", predef);
-		return NULL;
-	}
-
-	/* If it already exists, replace the bit data */
-	p = yam_data;
-	while (p) {
-		if (p->bitrate == bitrate) {
-			memcpy(p->bits, bits, YAM_FPGA_SIZE);
-			goto out;
-		}
-		p = p->next;
-	}
-
-	/* Allocate a new mcs */
-	if ((p = kmalloc_obj(struct yam_mcs)) == NULL) {
-		release_firmware(fw);
-		return NULL;
-	}
-	memcpy(p->bits, bits, YAM_FPGA_SIZE);
-	p->bitrate = bitrate;
-	p->next = yam_data;
-	yam_data = p;
- out:
-	release_firmware(fw);
-	return p->bits;
-}
-
-static unsigned char *get_mcs(int bitrate)
-{
-	struct yam_mcs *p;
-
-	p = yam_data;
-	while (p) {
-		if (p->bitrate == bitrate)
-			return p->bits;
-		p = p->next;
-	}
-
-	/* Load predefined mcs data */
-	switch (bitrate) {
-	case 1200:
-		/* setting predef as YAM_1200 for loading predef 1200 mcs */
-		return add_mcs(NULL, bitrate, YAM_1200);
-	default:
-		/* setting predef as YAM_9600 for loading predef 9600 mcs */
-		return add_mcs(NULL, bitrate, YAM_9600);
-	}
-}
-
-/*
- * download bitstream to FPGA
- * data is contained in bits[] array in yam1200.h resp. yam9600.h
- */
-
-static int fpga_download(int iobase, int bitrate)
-{
-	int i, rc;
-	unsigned char *pbits;
-
-	pbits = get_mcs(bitrate);
-	if (pbits == NULL)
-		return -1;
-
-	fpga_reset(iobase);
-	for (i = 0; i < YAM_FPGA_SIZE; i++) {
-		if (fpga_write(iobase, pbits[i])) {
-			printk(KERN_ERR "yam: error in write cycle\n");
-			return -1;			/* write... */
-		}
-	}
-
-	fpga_write(iobase, 0xFF);
-	rc = inb(MSR(iobase));		/* check DONE signal */
-
-	/* Needed for some hardwares */
-	delay(50);
-
-	return (rc & MSR_DSR) ? 0 : -1;
-}
-
-
-/************************************************************************
-* Serial port init 
-************************************************************************/
-
-static void yam_set_uart(struct net_device *dev)
-{
-	struct yam_port *yp = netdev_priv(dev);
-	int divisor = 115200 / yp->baudrate;
-
-	outb(0, IER(dev->base_addr));
-	outb(LCR_DLAB | LCR_BIT8, LCR(dev->base_addr));
-	outb(divisor, DLL(dev->base_addr));
-	outb(0, DLM(dev->base_addr));
-	outb(LCR_BIT8, LCR(dev->base_addr));
-	outb(PTT_OFF, MCR(dev->base_addr));
-	outb(0x00, FCR(dev->base_addr));
-
-	/* Flush pending irq */
-
-	inb(RBR(dev->base_addr));
-	inb(MSR(dev->base_addr));
-
-	/* Enable rx irq */
-
-	outb(ENABLE_RTXINT, IER(dev->base_addr));
-}
-
-
-/* --------------------------------------------------------------------- */
-
-enum uart {
-	c_uart_unknown, c_uart_8250,
-	c_uart_16450, c_uart_16550, c_uart_16550A
-};
-
-static const char *uart_str[] =
-{"unknown", "8250", "16450", "16550", "16550A"};
-
-static enum uart yam_check_uart(unsigned int iobase)
-{
-	unsigned char b1, b2, b3;
-	enum uart u;
-	enum uart uart_tab[] =
-	{c_uart_16450, c_uart_unknown, c_uart_16550, c_uart_16550A};
-
-	b1 = inb(MCR(iobase));
-	outb(b1 | 0x10, MCR(iobase));	/* loopback mode */
-	b2 = inb(MSR(iobase));
-	outb(0x1a, MCR(iobase));
-	b3 = inb(MSR(iobase)) & 0xf0;
-	outb(b1, MCR(iobase));		/* restore old values */
-	outb(b2, MSR(iobase));
-	if (b3 != 0x90)
-		return c_uart_unknown;
-	inb(RBR(iobase));
-	inb(RBR(iobase));
-	outb(0x01, FCR(iobase));	/* enable FIFOs */
-	u = uart_tab[(inb(IIR(iobase)) >> 6) & 3];
-	if (u == c_uart_16450) {
-		outb(0x5a, SCR(iobase));
-		b1 = inb(SCR(iobase));
-		outb(0xa5, SCR(iobase));
-		b2 = inb(SCR(iobase));
-		if ((b1 != 0x5a) || (b2 != 0xa5))
-			u = c_uart_8250;
-	}
-	return u;
-}
-
-/******************************************************************************
-* Rx Section
-******************************************************************************/
-static inline void yam_rx_flag(struct net_device *dev, struct yam_port *yp)
-{
-	if (yp->dcd && yp->rx_len >= 3 && yp->rx_len < YAM_MAX_FRAME) {
-		int pkt_len = yp->rx_len - 2 + 1;	/* -CRC + kiss */
-		struct sk_buff *skb;
-
-		if ((yp->rx_crch & yp->rx_crcl) != 0xFF) {
-			/* Bad crc */
-		} else {
-			if (!(skb = dev_alloc_skb(pkt_len))) {
-				printk(KERN_WARNING "%s: memory squeeze, dropping packet\n", dev->name);
-				++dev->stats.rx_dropped;
-			} else {
-				unsigned char *cp;
-				cp = skb_put(skb, pkt_len);
-				*cp++ = 0;		/* KISS kludge */
-				memcpy(cp, yp->rx_buf, pkt_len - 1);
-				skb->protocol = ax25_type_trans(skb, dev);
-				netif_rx(skb);
-				++dev->stats.rx_packets;
-			}
-		}
-	}
-	yp->rx_len = 0;
-	yp->rx_crcl = 0x21;
-	yp->rx_crch = 0xf3;
-}
-
-static inline void yam_rx_byte(struct net_device *dev, struct yam_port *yp, unsigned char rxb)
-{
-	if (yp->rx_len < YAM_MAX_FRAME) {
-		unsigned char c = yp->rx_crcl;
-		yp->rx_crcl = (chktabl[c] ^ yp->rx_crch);
-		yp->rx_crch = (chktabh[c] ^ rxb);
-		yp->rx_buf[yp->rx_len++] = rxb;
-	}
-}
-
-/********************************************************************************
-* TX Section
-********************************************************************************/
-
-static void ptt_on(struct net_device *dev)
-{
-	outb(PTT_ON, MCR(dev->base_addr));
-}
-
-static void ptt_off(struct net_device *dev)
-{
-	outb(PTT_OFF, MCR(dev->base_addr));
-}
-
-static netdev_tx_t yam_send_packet(struct sk_buff *skb,
-					 struct net_device *dev)
-{
-	struct yam_port *yp = netdev_priv(dev);
-
-	if (skb->protocol == htons(ETH_P_IP))
-		return ax25_ip_xmit(skb);
-
-	skb_queue_tail(&yp->send_queue, skb);
-	netif_trans_update(dev);
-	return NETDEV_TX_OK;
-}
-
-static void yam_start_tx(struct net_device *dev, struct yam_port *yp)
-{
-	if ((yp->tx_state == TX_TAIL) || (yp->txd == 0))
-		yp->tx_count = 1;
-	else
-		yp->tx_count = (yp->bitrate * yp->txd) / 8000;
-	yp->tx_state = TX_HEAD;
-	ptt_on(dev);
-}
-
-static void yam_arbitrate(struct net_device *dev)
-{
-	struct yam_port *yp = netdev_priv(dev);
-
-	if (yp->magic != YAM_MAGIC || yp->tx_state != TX_OFF ||
-	    skb_queue_empty(&yp->send_queue))
-		return;
-	/* tx_state is TX_OFF and there is data to send */
-
-	if (yp->dupmode) {
-		/* Full duplex mode, don't wait */
-		yam_start_tx(dev, yp);
-		return;
-	}
-	if (yp->dcd) {
-		/* DCD on, wait slotime ... */
-		yp->slotcnt = yp->slot / 10;
-		return;
-	}
-	/* Is slottime passed ? */
-	if ((--yp->slotcnt) > 0)
-		return;
-
-	yp->slotcnt = yp->slot / 10;
-
-	/* is random > persist ? */
-	if (get_random_u8() > yp->pers)
-		return;
-
-	yam_start_tx(dev, yp);
-}
-
-static void yam_dotimer(struct timer_list *unused)
-{
-	int i;
-
-	for (i = 0; i < NR_PORTS; i++) {
-		struct net_device *dev = yam_devs[i];
-		if (dev && netif_running(dev))
-			yam_arbitrate(dev);
-	}
-	yam_timer.expires = jiffies + HZ / 100;
-	add_timer(&yam_timer);
-}
-
-static void yam_tx_byte(struct net_device *dev, struct yam_port *yp)
-{
-	struct sk_buff *skb;
-	unsigned char b, temp;
-
-	switch (yp->tx_state) {
-	case TX_OFF:
-		break;
-	case TX_HEAD:
-		if (--yp->tx_count <= 0) {
-			if (!(skb = skb_dequeue(&yp->send_queue))) {
-				ptt_off(dev);
-				yp->tx_state = TX_OFF;
-				break;
-			}
-			yp->tx_state = TX_DATA;
-			if (skb->data[0] != 0) {
-/*                              do_kiss_params(s, skb->data, skb->len); */
-				dev_kfree_skb_any(skb);
-				break;
-			}
-			yp->tx_len = skb->len - 1;	/* strip KISS byte */
-			if (yp->tx_len >= YAM_MAX_FRAME || yp->tx_len < 2) {
-				dev_kfree_skb_any(skb);
-				break;
-			}
-			skb_copy_from_linear_data_offset(skb, 1,
-							 yp->tx_buf,
-							 yp->tx_len);
-			dev_kfree_skb_any(skb);
-			yp->tx_count = 0;
-			yp->tx_crcl = 0x21;
-			yp->tx_crch = 0xf3;
-			yp->tx_state = TX_DATA;
-		}
-		break;
-	case TX_DATA:
-		b = yp->tx_buf[yp->tx_count++];
-		outb(b, THR(dev->base_addr));
-		temp = yp->tx_crcl;
-		yp->tx_crcl = chktabl[temp] ^ yp->tx_crch;
-		yp->tx_crch = chktabh[temp] ^ b;
-		if (yp->tx_count >= yp->tx_len) {
-			yp->tx_state = TX_CRC1;
-		}
-		break;
-	case TX_CRC1:
-		yp->tx_crch = chktabl[yp->tx_crcl] ^ yp->tx_crch;
-		yp->tx_crcl = chktabh[yp->tx_crcl] ^ chktabl[yp->tx_crch] ^ 0xff;
-		outb(yp->tx_crcl, THR(dev->base_addr));
-		yp->tx_state = TX_CRC2;
-		break;
-	case TX_CRC2:
-		outb(chktabh[yp->tx_crch] ^ 0xFF, THR(dev->base_addr));
-		if (skb_queue_empty(&yp->send_queue)) {
-			yp->tx_count = (yp->bitrate * yp->txtail) / 8000;
-			if (yp->dupmode == 2)
-				yp->tx_count += (yp->bitrate * yp->holdd) / 8;
-			if (yp->tx_count == 0)
-				yp->tx_count = 1;
-			yp->tx_state = TX_TAIL;
-		} else {
-			yp->tx_count = 1;
-			yp->tx_state = TX_HEAD;
-		}
-		++dev->stats.tx_packets;
-		break;
-	case TX_TAIL:
-		if (--yp->tx_count <= 0) {
-			yp->tx_state = TX_OFF;
-			ptt_off(dev);
-		}
-		break;
-	}
-}
-
-/***********************************************************************************
-* ISR routine
-************************************************************************************/
-
-static irqreturn_t yam_interrupt(int irq, void *dev_id)
-{
-	struct net_device *dev;
-	struct yam_port *yp;
-	unsigned char iir;
-	int counter = 100;
-	int i;
-	int handled = 0;
-
-	for (i = 0; i < NR_PORTS; i++) {
-		dev = yam_devs[i];
-		yp = netdev_priv(dev);
-
-		if (!netif_running(dev))
-			continue;
-
-		while ((iir = IIR_MASK & inb(IIR(dev->base_addr))) != IIR_NOPEND) {
-			unsigned char msr = inb(MSR(dev->base_addr));
-			unsigned char lsr = inb(LSR(dev->base_addr));
-			unsigned char rxb;
-
-			handled = 1;
-
-			if (lsr & LSR_OE)
-				++dev->stats.rx_fifo_errors;
-
-			yp->dcd = (msr & RX_DCD) ? 1 : 0;
-
-			if (--counter <= 0) {
-				printk(KERN_ERR "%s: too many irq iir=%d\n",
-						dev->name, iir);
-				goto out;
-			}
-			if (msr & TX_RDY) {
-				++yp->nb_mdint;
-				yam_tx_byte(dev, yp);
-			}
-			if (lsr & LSR_RXC) {
-				++yp->nb_rxint;
-				rxb = inb(RBR(dev->base_addr));
-				if (msr & RX_FLAG)
-					yam_rx_flag(dev, yp);
-				else
-					yam_rx_byte(dev, yp, rxb);
-			}
-		}
-	}
-out:
-	return IRQ_RETVAL(handled);
-}
-
-#ifdef CONFIG_PROC_FS
-
-static void *yam_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	return (*pos < NR_PORTS) ? yam_devs[*pos] : NULL;
-}
-
-static void *yam_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-	return (*pos < NR_PORTS) ? yam_devs[*pos] : NULL;
-}
-
-static void yam_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static int yam_seq_show(struct seq_file *seq, void *v)
-{
-	struct net_device *dev = v;
-	const struct yam_port *yp = netdev_priv(dev);
-
-	seq_printf(seq, "Device %s\n", dev->name);
-	seq_printf(seq, "  Up       %d\n", netif_running(dev));
-	seq_printf(seq, "  Speed    %u\n", yp->bitrate);
-	seq_printf(seq, "  IoBase   0x%x\n", yp->iobase);
-	seq_printf(seq, "  BaudRate %u\n", yp->baudrate);
-	seq_printf(seq, "  IRQ      %u\n", yp->irq);
-	seq_printf(seq, "  TxState  %u\n", yp->tx_state);
-	seq_printf(seq, "  Duplex   %u\n", yp->dupmode);
-	seq_printf(seq, "  HoldDly  %u\n", yp->holdd);
-	seq_printf(seq, "  TxDelay  %u\n", yp->txd);
-	seq_printf(seq, "  TxTail   %u\n", yp->txtail);
-	seq_printf(seq, "  SlotTime %u\n", yp->slot);
-	seq_printf(seq, "  Persist  %u\n", yp->pers);
-	seq_printf(seq, "  TxFrames %lu\n", dev->stats.tx_packets);
-	seq_printf(seq, "  RxFrames %lu\n", dev->stats.rx_packets);
-	seq_printf(seq, "  TxInt    %u\n", yp->nb_mdint);
-	seq_printf(seq, "  RxInt    %u\n", yp->nb_rxint);
-	seq_printf(seq, "  RxOver   %lu\n", dev->stats.rx_fifo_errors);
-	seq_printf(seq, "\n");
-	return 0;
-}
-
-static const struct seq_operations yam_seqops = {
-	.start = yam_seq_start,
-	.next = yam_seq_next,
-	.stop = yam_seq_stop,
-	.show = yam_seq_show,
-};
-#endif
-
-
-/* --------------------------------------------------------------------- */
-
-static int yam_open(struct net_device *dev)
-{
-	struct yam_port *yp = netdev_priv(dev);
-	enum uart u;
-	int i;
-	int ret=0;
-
-	printk(KERN_INFO "Trying %s at iobase 0x%lx irq %u\n", dev->name, dev->base_addr, dev->irq);
-
-	if (!yp->bitrate)
-		return -ENXIO;
-	if (!dev->base_addr || dev->base_addr > 0x1000 - YAM_EXTENT ||
-		dev->irq < 2 || dev->irq > 15) {
-		return -ENXIO;
-	}
-	if (!request_region(dev->base_addr, YAM_EXTENT, dev->name))
-	{
-		printk(KERN_ERR "%s: cannot 0x%lx busy\n", dev->name, dev->base_addr);
-		return -EACCES;
-	}
-	if ((u = yam_check_uart(dev->base_addr)) == c_uart_unknown) {
-		printk(KERN_ERR "%s: cannot find uart type\n", dev->name);
-		ret = -EIO;
-		goto out_release_base;
-	}
-	if (fpga_download(dev->base_addr, yp->bitrate)) {
-		printk(KERN_ERR "%s: cannot init FPGA\n", dev->name);
-		ret = -EIO;
-		goto out_release_base;
-	}
-	outb(0, IER(dev->base_addr));
-	if (request_irq(dev->irq, yam_interrupt, IRQF_SHARED, dev->name, dev)) {
-		printk(KERN_ERR "%s: irq %d busy\n", dev->name, dev->irq);
-		ret = -EBUSY;
-		goto out_release_base;
-	}
-
-	yam_set_uart(dev);
-
-	netif_start_queue(dev);
-	
-	yp->slotcnt = yp->slot / 10;
-
-	/* Reset overruns for all ports - FPGA programming makes overruns */
-	for (i = 0; i < NR_PORTS; i++) {
-		struct net_device *yam_dev = yam_devs[i];
-
-		inb(LSR(yam_dev->base_addr));
-		yam_dev->stats.rx_fifo_errors = 0;
-	}
-
-	printk(KERN_INFO "%s at iobase 0x%lx irq %u uart %s\n", dev->name, dev->base_addr, dev->irq,
-		   uart_str[u]);
-	return 0;
-
-out_release_base:
-	release_region(dev->base_addr, YAM_EXTENT);
-	return ret;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int yam_close(struct net_device *dev)
-{
-	struct sk_buff *skb;
-	struct yam_port *yp = netdev_priv(dev);
-
-	if (!dev)
-		return -EINVAL;
-
-	/*
-	 * disable interrupts
-	 */
-	outb(0, IER(dev->base_addr));
-	outb(1, MCR(dev->base_addr));
-	/* Remove IRQ handler if last */
-	free_irq(dev->irq,dev);
-	release_region(dev->base_addr, YAM_EXTENT);
-	netif_stop_queue(dev);
-	while ((skb = skb_dequeue(&yp->send_queue)))
-		dev_kfree_skb(skb);
-
-	printk(KERN_INFO "%s: close yam at iobase 0x%lx irq %u\n",
-		   yam_drvname, dev->base_addr, dev->irq);
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int yam_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd)
-{
-	struct yam_port *yp = netdev_priv(dev);
-	struct yamdrv_ioctl_cfg yi;
-	struct yamdrv_ioctl_mcs *ym;
-	int ioctl_cmd;
-
-	if (copy_from_user(&ioctl_cmd, data, sizeof(int)))
-		return -EFAULT;
-
-	if (yp->magic != YAM_MAGIC)
-		return -EINVAL;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (cmd != SIOCDEVPRIVATE)
-		return -EINVAL;
-
-	switch (ioctl_cmd) {
-
-	case SIOCYAMRESERVED:
-		return -EINVAL;			/* unused */
-
-	case SIOCYAMSMCS:
-		if (netif_running(dev))
-			return -EINVAL;		/* Cannot change this parameter when up */
-		ym = memdup_user(data, sizeof(struct yamdrv_ioctl_mcs));
-		if (IS_ERR(ym))
-			return PTR_ERR(ym);
-		if (ym->cmd != SIOCYAMSMCS || ym->bitrate > YAM_MAXBITRATE) {
-			kfree(ym);
-			return -EINVAL;
-		}
-		/* setting predef as 0 for loading userdefined mcs data */
-		add_mcs(ym->bits, ym->bitrate, 0);
-		kfree(ym);
-		break;
-
-	case SIOCYAMSCFG:
-		if (!capable(CAP_SYS_RAWIO))
-			return -EPERM;
-		if (copy_from_user(&yi, data, sizeof(struct yamdrv_ioctl_cfg)))
-			return -EFAULT;
-
-		if (yi.cmd != SIOCYAMSCFG)
-			return -EINVAL;
-		if ((yi.cfg.mask & YAM_IOBASE) && netif_running(dev))
-			return -EINVAL;		/* Cannot change this parameter when up */
-		if ((yi.cfg.mask & YAM_IRQ) && netif_running(dev))
-			return -EINVAL;		/* Cannot change this parameter when up */
-		if ((yi.cfg.mask & YAM_BITRATE) && netif_running(dev))
-			return -EINVAL;		/* Cannot change this parameter when up */
-		if ((yi.cfg.mask & YAM_BAUDRATE) && netif_running(dev))
-			return -EINVAL;		/* Cannot change this parameter when up */
-
-		if (yi.cfg.mask & YAM_IOBASE) {
-			yp->iobase = yi.cfg.iobase;
-			dev->base_addr = yi.cfg.iobase;
-		}
-		if (yi.cfg.mask & YAM_IRQ) {
-			if (yi.cfg.irq > 15)
-				return -EINVAL;
-			yp->irq = yi.cfg.irq;
-			dev->irq = yi.cfg.irq;
-		}
-		if (yi.cfg.mask & YAM_BITRATE) {
-			if (yi.cfg.bitrate > YAM_MAXBITRATE)
-				return -EINVAL;
-			yp->bitrate = yi.cfg.bitrate;
-		}
-		if (yi.cfg.mask & YAM_BAUDRATE) {
-			if (yi.cfg.baudrate > YAM_MAXBAUDRATE)
-				return -EINVAL;
-			yp->baudrate = yi.cfg.baudrate;
-		}
-		if (yi.cfg.mask & YAM_MODE) {
-			if (yi.cfg.mode > YAM_MAXMODE)
-				return -EINVAL;
-			yp->dupmode = yi.cfg.mode;
-		}
-		if (yi.cfg.mask & YAM_HOLDDLY) {
-			if (yi.cfg.holddly > YAM_MAXHOLDDLY)
-				return -EINVAL;
-			yp->holdd = yi.cfg.holddly;
-		}
-		if (yi.cfg.mask & YAM_TXDELAY) {
-			if (yi.cfg.txdelay > YAM_MAXTXDELAY)
-				return -EINVAL;
-			yp->txd = yi.cfg.txdelay;
-		}
-		if (yi.cfg.mask & YAM_TXTAIL) {
-			if (yi.cfg.txtail > YAM_MAXTXTAIL)
-				return -EINVAL;
-			yp->txtail = yi.cfg.txtail;
-		}
-		if (yi.cfg.mask & YAM_PERSIST) {
-			if (yi.cfg.persist > YAM_MAXPERSIST)
-				return -EINVAL;
-			yp->pers = yi.cfg.persist;
-		}
-		if (yi.cfg.mask & YAM_SLOTTIME) {
-			if (yi.cfg.slottime > YAM_MAXSLOTTIME)
-				return -EINVAL;
-			yp->slot = yi.cfg.slottime;
-			yp->slotcnt = yp->slot / 10;
-		}
-		break;
-
-	case SIOCYAMGCFG:
-		memset(&yi, 0, sizeof(yi));
-		yi.cfg.mask = 0xffffffff;
-		yi.cfg.iobase = yp->iobase;
-		yi.cfg.irq = yp->irq;
-		yi.cfg.bitrate = yp->bitrate;
-		yi.cfg.baudrate = yp->baudrate;
-		yi.cfg.mode = yp->dupmode;
-		yi.cfg.txdelay = yp->txd;
-		yi.cfg.holddly = yp->holdd;
-		yi.cfg.txtail = yp->txtail;
-		yi.cfg.persist = yp->pers;
-		yi.cfg.slottime = yp->slot;
-		if (copy_to_user(data, &yi, sizeof(struct yamdrv_ioctl_cfg)))
-			return -EFAULT;
-		break;
-
-	default:
-		return -EINVAL;
-
-	}
-
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-
-static int yam_set_mac_address(struct net_device *dev, void *addr)
-{
-	struct sockaddr *sa = (struct sockaddr *) addr;
-
-	/* addr is an AX.25 shifted ASCII mac address */
-	dev_addr_set(dev, sa->sa_data);
-	return 0;
-}
-
-/* --------------------------------------------------------------------- */
-
-static const struct net_device_ops yam_netdev_ops = {
-	.ndo_open	     = yam_open,
-	.ndo_stop	     = yam_close,
-	.ndo_start_xmit      = yam_send_packet,
-	.ndo_siocdevprivate  = yam_siocdevprivate,
-	.ndo_set_mac_address = yam_set_mac_address,
-};
-
-static void yam_setup(struct net_device *dev)
-{
-	struct yam_port *yp = netdev_priv(dev);
-
-	yp->magic = YAM_MAGIC;
-	yp->bitrate = DEFAULT_BITRATE;
-	yp->baudrate = DEFAULT_BITRATE * 2;
-	yp->iobase = 0;
-	yp->irq = 0;
-	yp->dupmode = 0;
-	yp->holdd = DEFAULT_HOLDD;
-	yp->txd = DEFAULT_TXD;
-	yp->txtail = DEFAULT_TXTAIL;
-	yp->slot = DEFAULT_SLOT;
-	yp->pers = DEFAULT_PERS;
-	yp->dev = dev;
-
-	dev->base_addr = yp->iobase;
-	dev->irq = yp->irq;
-
-	skb_queue_head_init(&yp->send_queue);
-
-	dev->netdev_ops = &yam_netdev_ops;
-	dev->header_ops = &ax25_header_ops;
-
-	dev->type = ARPHRD_AX25;
-	dev->hard_header_len = AX25_MAX_HEADER_LEN;
-	dev->mtu = AX25_MTU;
-	dev->addr_len = AX25_ADDR_LEN;
-	memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN);
-	dev_addr_set(dev, (u8 *)&ax25_defaddr);
-}
-
-static int __init yam_init_driver(void)
-{
-	struct net_device *dev;
-	int i, err;
-	char name[IFNAMSIZ];
-
-	printk(yam_drvinfo);
-
-	for (i = 0; i < NR_PORTS; i++) {
-		sprintf(name, "yam%d", i);
-		
-		dev = alloc_netdev(sizeof(struct yam_port), name,
-				   NET_NAME_UNKNOWN, yam_setup);
-		if (!dev) {
-			pr_err("yam: cannot allocate net device\n");
-			err = -ENOMEM;
-			goto error;
-		}
-		
-		err = register_netdev(dev);
-		if (err) {
-			printk(KERN_WARNING "yam: cannot register net device %s\n", dev->name);
-			free_netdev(dev);
-			goto error;
-		}
-		yam_devs[i] = dev;
-
-	}
-
-	timer_setup(&yam_timer, yam_dotimer, 0);
-	yam_timer.expires = jiffies + HZ / 100;
-	add_timer(&yam_timer);
-
-	proc_create_seq("yam", 0444, init_net.proc_net, &yam_seqops);
-	return 0;
- error:
-	while (--i >= 0) {
-		unregister_netdev(yam_devs[i]);
-		free_netdev(yam_devs[i]);
-	}
-	return err;
-}
-
-/* --------------------------------------------------------------------- */
-
-static void __exit yam_cleanup_driver(void)
-{
-	struct yam_mcs *p;
-	int i;
-
-	timer_delete_sync(&yam_timer);
-	for (i = 0; i < NR_PORTS; i++) {
-		struct net_device *dev = yam_devs[i];
-		if (dev) {
-			unregister_netdev(dev);
-			free_netdev(dev);
-		}
-	}
-
-	while (yam_data) {
-		p = yam_data;
-		yam_data = yam_data->next;
-		kfree(p);
-	}
-
-	remove_proc_entry("yam", init_net.proc_net);
-}
-
-/* --------------------------------------------------------------------- */
-
-MODULE_AUTHOR("Frederic Rible F1OAT frible@teaser.fr");
-MODULE_DESCRIPTION("Yam amateur radio modem driver");
-MODULE_LICENSE("GPL");
-MODULE_FIRMWARE(FIRMWARE_1200);
-MODULE_FIRMWARE(FIRMWARE_9600);
-
-module_init(yam_init_driver);
-module_exit(yam_cleanup_driver);
-
-/* --------------------------------------------------------------------- */
-
diff --git a/drivers/net/hamradio/z8530.h b/drivers/net/hamradio/z8530.h
deleted file mode 100644
index 1655901d713b..000000000000
--- a/drivers/net/hamradio/z8530.h
+++ /dev/null
@@ -1,246 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/* 8530 Serial Communications Controller Register definitions */
-#define	FLAG	0x7e
-
-/* Write Register 0 */
-#define	R0	0		/* Register selects */
-#define	R1	1
-#define	R2	2
-#define	R3	3
-#define	R4	4
-#define	R5	5
-#define	R6	6
-#define	R7	7
-#define	R8	8
-#define	R9	9
-#define	R10	10
-#define	R11	11
-#define	R12	12
-#define	R13	13
-#define	R14	14
-#define	R15	15
-
-#define	NULLCODE	0	/* Null Code */
-#define	POINT_HIGH	0x8	/* Select upper half of registers */
-#define	RES_EXT_INT	0x10	/* Reset Ext. Status Interrupts */
-#define	SEND_ABORT	0x18	/* HDLC Abort */
-#define	RES_RxINT_FC	0x20	/* Reset RxINT on First Character */
-#define	RES_Tx_P	0x28	/* Reset TxINT Pending */
-#define	ERR_RES		0x30	/* Error Reset */
-#define	RES_H_IUS	0x38	/* Reset highest IUS */
-
-#define	RES_Rx_CRC	0x40	/* Reset Rx CRC Checker */
-#define	RES_Tx_CRC	0x80	/* Reset Tx CRC Checker */
-#define	RES_EOM_L	0xC0	/* Reset EOM latch */
-
-/* Write Register 1 */
-
-#define	EXT_INT_ENAB	0x1	/* Ext Int Enable */
-#define	TxINT_ENAB	0x2	/* Tx Int Enable */
-#define	PAR_SPEC	0x4	/* Parity is special condition */
-
-#define	RxINT_DISAB	0	/* Rx Int Disable */
-#define	RxINT_FCERR	0x8	/* Rx Int on First Character Only or Error */
-#define	INT_ALL_Rx	0x10	/* Int on all Rx Characters or error */
-#define	INT_ERR_Rx	0x18	/* Int on error only */
-
-#define	WT_RDY_RT	0x20	/* Wait/Ready on R/T */
-#define	WT_FN_RDYFN	0x40	/* Wait/FN/Ready FN */
-#define	WT_RDY_ENAB	0x80	/* Wait/Ready Enable */
-
-/* Write Register #2 (Interrupt Vector) */
-
-/* Write Register 3 */
-
-#define	RxENABLE	0x1	/* Rx Enable */
-#define	SYNC_L_INH	0x2	/* Sync Character Load Inhibit */
-#define	ADD_SM		0x4	/* Address Search Mode (SDLC) */
-#define	RxCRC_ENAB	0x8	/* Rx CRC Enable */
-#define	ENT_HM		0x10	/* Enter Hunt Mode */
-#define	AUTO_ENAB	0x20	/* Auto Enables */
-#define	Rx5		0x0	/* Rx 5 Bits/Character */
-#define	Rx7		0x40	/* Rx 7 Bits/Character */
-#define	Rx6		0x80	/* Rx 6 Bits/Character */
-#define	Rx8		0xc0	/* Rx 8 Bits/Character */
-
-/* Write Register 4 */
-
-#define	PAR_ENA		0x1	/* Parity Enable */
-#define	PAR_EVEN	0x2	/* Parity Even/Odd* */
-
-#define	SYNC_ENAB	0	/* Sync Modes Enable */
-#define	SB1		0x4	/* 1 stop bit/char */
-#define	SB15		0x8	/* 1.5 stop bits/char */
-#define	SB2		0xc	/* 2 stop bits/char */
-
-#define	MONSYNC		0	/* 8 Bit Sync character */
-#define	BISYNC		0x10	/* 16 bit sync character */
-#define	SDLC		0x20	/* SDLC Mode (01111110 Sync Flag) */
-#define	EXTSYNC		0x30	/* External Sync Mode */
-
-#define	X1CLK		0x0	/* x1 clock mode */
-#define	X16CLK		0x40	/* x16 clock mode */
-#define	X32CLK		0x80	/* x32 clock mode */
-#define	X64CLK		0xC0	/* x64 clock mode */
-
-/* Write Register 5 */
-
-#define	TxCRC_ENAB	0x1	/* Tx CRC Enable */
-#define	RTS		0x2	/* RTS */
-#define	SDLC_CRC	0x4	/* SDLC/CRC-16 */
-#define	TxENAB		0x8	/* Tx Enable */
-#define	SND_BRK		0x10	/* Send Break */
-#define	Tx5		0x0	/* Tx 5 bits (or less)/character */
-#define	Tx7		0x20	/* Tx 7 bits/character */
-#define	Tx6		0x40	/* Tx 6 bits/character */
-#define	Tx8		0x60	/* Tx 8 bits/character */
-#define	DTR		0x80	/* DTR */
-
-/* Write Register 6 (Sync bits 0-7/SDLC Address Field) */
-
-/* Write Register 7 (Sync bits 8-15/SDLC 01111110) */
-
-/* Write Register 8 (transmit buffer) */
-
-/* Write Register 9 (Master interrupt control) */
-#define	VIS	1	/* Vector Includes Status */
-#define	NV	2	/* No Vector */
-#define	DLC	4	/* Disable Lower Chain */
-#define	MIE	8	/* Master Interrupt Enable */
-#define	STATHI	0x10	/* Status high */
-#define	NORESET	0	/* No reset on write to R9 */
-#define	CHRB	0x40	/* Reset channel B */
-#define	CHRA	0x80	/* Reset channel A */
-#define	FHWRES	0xc0	/* Force hardware reset */
-
-/* Write Register 10 (misc control bits) */
-#define	BIT6	1	/* 6 bit/8bit sync */
-#define	LOOPMODE 2	/* SDLC Loop mode */
-#define	ABUNDER	4	/* Abort/flag on SDLC xmit underrun */
-#define	MARKIDLE 8	/* Mark/flag on idle */
-#define	GAOP	0x10	/* Go active on poll */
-#define	NRZ	0	/* NRZ mode */
-#define	NRZI	0x20	/* NRZI mode */
-#define	FM1	0x40	/* FM1 (transition = 1) */
-#define	FM0	0x60	/* FM0 (transition = 0) */
-#define	CRCPS	0x80	/* CRC Preset I/O */
-
-/* Write Register 11 (Clock Mode control) */
-#define	TRxCXT	0	/* TRxC = Xtal output */
-#define	TRxCTC	1	/* TRxC = Transmit clock */
-#define	TRxCBR	2	/* TRxC = BR Generator Output */
-#define	TRxCDP	3	/* TRxC = DPLL output */
-#define	TRxCOI	4	/* TRxC O/I */
-#define	TCRTxCP	0	/* Transmit clock = RTxC pin */
-#define	TCTRxCP	8	/* Transmit clock = TRxC pin */
-#define	TCBR	0x10	/* Transmit clock = BR Generator output */
-#define	TCDPLL	0x18	/* Transmit clock = DPLL output */
-#define	RCRTxCP	0	/* Receive clock = RTxC pin */
-#define	RCTRxCP	0x20	/* Receive clock = TRxC pin */
-#define	RCBR	0x40	/* Receive clock = BR Generator output */
-#define	RCDPLL	0x60	/* Receive clock = DPLL output */
-#define	RTxCX	0x80	/* RTxC Xtal/No Xtal */
-
-/* Write Register 12 (lower byte of baud rate generator time constant) */
-
-/* Write Register 13 (upper byte of baud rate generator time constant) */
-
-/* Write Register 14 (Misc control bits) */
-#define	BRENABL	1	/* Baud rate generator enable */
-#define	BRSRC	2	/* Baud rate generator source */
-#define	DTRREQ	4	/* DTR/Request function */
-#define	AUTOECHO 8	/* Auto Echo */
-#define	LOOPBAK	0x10	/* Local loopback */
-#define	SEARCH	0x20	/* Enter search mode */
-#define	RMC	0x40	/* Reset missing clock */
-#define	DISDPLL	0x60	/* Disable DPLL */
-#define	SSBR	0x80	/* Set DPLL source = BR generator */
-#define	SSRTxC	0xa0	/* Set DPLL source = RTxC */
-#define	SFMM	0xc0	/* Set FM mode */
-#define	SNRZI	0xe0	/* Set NRZI mode */
-
-/* Write Register 15 (external/status interrupt control) */
-#define	ZCIE	2	/* Zero count IE */
-#define	DCDIE	8	/* DCD IE */
-#define	SYNCIE	0x10	/* Sync/hunt IE */
-#define	CTSIE	0x20	/* CTS IE */
-#define	TxUIE	0x40	/* Tx Underrun/EOM IE */
-#define	BRKIE	0x80	/* Break/Abort IE */
-
-
-/* Read Register 0 */
-#define	Rx_CH_AV	0x1	/* Rx Character Available */
-#define	ZCOUNT		0x2	/* Zero count */
-#define	Tx_BUF_EMP	0x4	/* Tx Buffer empty */
-#define	DCD		0x8	/* DCD */
-#define	SYNC_HUNT	0x10	/* Sync/hunt */
-#define	CTS		0x20	/* CTS */
-#define	TxEOM		0x40	/* Tx underrun */
-#define	BRK_ABRT	0x80	/* Break/Abort */
-
-/* Read Register 1 */
-#define	ALL_SNT		0x1	/* All sent */
-/* Residue Data for 8 Rx bits/char programmed */
-#define	RES3		0x8	/* 0/3 */
-#define	RES4		0x4	/* 0/4 */
-#define	RES5		0xc	/* 0/5 */
-#define	RES6		0x2	/* 0/6 */
-#define	RES7		0xa	/* 0/7 */
-#define	RES8		0x6	/* 0/8 */
-#define	RES18		0xe	/* 1/8 */
-#define	RES28		0x0	/* 2/8 */
-/* Special Rx Condition Interrupts */
-#define	PAR_ERR		0x10	/* Parity error */
-#define	Rx_OVR		0x20	/* Rx Overrun Error */
-#define	CRC_ERR		0x40	/* CRC/Framing Error */
-#define	END_FR		0x80	/* End of Frame (SDLC) */
-
-/* Read Register 2 (channel b only) - Interrupt vector */
-
-/* Read Register 3 (interrupt pending register) ch a only */
-#define	CHBEXT	0x1		/* Channel B Ext/Stat IP */
-#define	CHBTxIP	0x2		/* Channel B Tx IP */
-#define	CHBRxIP	0x4		/* Channel B Rx IP */
-#define	CHAEXT	0x8		/* Channel A Ext/Stat IP */
-#define	CHATxIP	0x10		/* Channel A Tx IP */
-#define	CHARxIP	0x20		/* Channel A Rx IP */
-
-/* Read Register 8 (receive data register) */
-
-/* Read Register 10  (misc status bits) */
-#define	ONLOOP	2		/* On loop */
-#define	LOOPSEND 0x10		/* Loop sending */
-#define	CLK2MIS	0x40		/* Two clocks missing */
-#define	CLK1MIS	0x80		/* One clock missing */
-
-/* Read Register 12 (lower byte of baud rate generator constant) */
-
-/* Read Register 13 (upper byte of baud rate generator constant) */
-
-/* Read Register 15 (value of WR 15) */
-
-/* Z85C30/Z85230 Enhanced SCC register definitions */
-
-/* Write Register 7' (SDLC/HDLC Programmable Enhancements) */
-#define AUTOTXF	0x01		/* Auto Tx Flag */
-#define AUTOEOM 0x02		/* Auto EOM Latch Reset */
-#define AUTORTS	0x04		/* Auto RTS */
-#define TXDNRZI 0x08		/* TxD Pulled High in SDLC NRZI mode */
-#define RXFIFOH 0x08		/* Z85230: Int on RX FIFO half full */
-#define FASTDTR 0x10		/* Fast DTR/REQ Mode */
-#define CRCCBCR	0x20		/* CRC Check Bytes Completely Received */
-#define TXFIFOE 0x20		/* Z85230: Int on TX FIFO completely empty */
-#define EXTRDEN	0x40		/* Extended Read Enabled */
-
-/* Write Register 15 (external/status interrupt control) */
-#define SHDLCE	1		/* SDLC/HDLC Enhancements Enable */
-#define FIFOE	4		/* FIFO Enable */
-
-/* Read Register 6 (frame status FIFO) */
-#define BCLSB	0xff		/* LSB of 14 bits count */
-
-/* Read Register 7 (frame status FIFO) */
-#define BCMSB	0x3f		/* MSB of 14 bits count */
-#define FDA	0x40		/* FIFO Data Available Status */
-#define FOS	0x80		/* FIFO Overflow Status */
diff --git a/include/linux/hdlcdrv.h b/include/linux/hdlcdrv.h
deleted file mode 100644
index 5d70c3f98f5b..000000000000
--- a/include/linux/hdlcdrv.h
+++ /dev/null
@@ -1,276 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * hdlcdrv.h  -- HDLC packet radio network driver.
- * The Linux soundcard driver for 1200 baud and 9600 baud packet radio
- * (C) 1996-1998 by Thomas Sailer, HB9JNX/AE4WA
- */
-#ifndef _HDLCDRV_H
-#define _HDLCDRV_H
-
-
-#include <linux/netdevice.h>
-#include <linux/if.h>
-#include <linux/spinlock.h>
-#include <uapi/linux/hdlcdrv.h>
-
-#define HDLCDRV_MAGIC      0x5ac6e778
-#define HDLCDRV_HDLCBUFFER  32 /* should be a power of 2 for speed reasons */
-#define HDLCDRV_BITBUFFER  256 /* should be a power of 2 for speed reasons */
-#undef HDLCDRV_LOOPBACK  /* define for HDLC debugging purposes */
-#define HDLCDRV_DEBUG
-
-/* maximum packet length, excluding CRC */
-#define HDLCDRV_MAXFLEN             400	
-
-
-struct hdlcdrv_hdlcbuffer {
-	spinlock_t lock;
-	unsigned rd, wr;
-	unsigned short buf[HDLCDRV_HDLCBUFFER];
-};
-
-#ifdef HDLCDRV_DEBUG
-struct hdlcdrv_bitbuffer {
-	unsigned int rd;
-	unsigned int wr;
-	unsigned int shreg;
-	unsigned char buffer[HDLCDRV_BITBUFFER];
-};
-
-static inline void hdlcdrv_add_bitbuffer(struct hdlcdrv_bitbuffer *buf, 
-					 unsigned int bit)
-{
-	unsigned char new;
-
-	new = buf->shreg & 1;
-	buf->shreg >>= 1;
-	buf->shreg |= (!!bit) << 7;
-	if (new) {
-		buf->buffer[buf->wr] = buf->shreg;
-		buf->wr = (buf->wr+1) % sizeof(buf->buffer);
-		buf->shreg = 0x80;
-	}
-}
-
-static inline void hdlcdrv_add_bitbuffer_word(struct hdlcdrv_bitbuffer *buf, 
-					      unsigned int bits)
-{
-	buf->buffer[buf->wr] = bits & 0xff;
-	buf->wr = (buf->wr+1) % sizeof(buf->buffer);
-	buf->buffer[buf->wr] = (bits >> 8) & 0xff;
-	buf->wr = (buf->wr+1) % sizeof(buf->buffer);
-
-}
-#endif /* HDLCDRV_DEBUG */
-
-/* -------------------------------------------------------------------- */
-/*
- * Information that need to be kept for each driver. 
- */
-
-struct hdlcdrv_ops {
-	/*
-	 * first some informations needed by the hdlcdrv routines
-	 */
-	const char *drvname;
-	const char *drvinfo;
-	/*
-	 * the routines called by the hdlcdrv routines
-	 */
-	int (*open)(struct net_device *);
-	int (*close)(struct net_device *);
-	int (*ioctl)(struct net_device *, void __user *,
-		     struct hdlcdrv_ioctl *, int);
-};
-
-struct hdlcdrv_state {
-	int magic;
-	int opened;
-
-	const struct hdlcdrv_ops *ops;
-
-	struct {
-		int bitrate;
-	} par;
-
-	struct hdlcdrv_pttoutput {
-		int dma2;
-		int seriobase;
-		int pariobase;
-		int midiiobase;
-		unsigned int flags;
-	} ptt_out;
-
-	struct hdlcdrv_channel_params ch_params;
-
-	struct hdlcdrv_hdlcrx {
-		struct hdlcdrv_hdlcbuffer hbuf;
-		unsigned long in_hdlc_rx;
-		/* 0 = sync hunt, != 0 receiving */
-		int rx_state;	
-		unsigned int bitstream;
-		unsigned int bitbuf;
-		int numbits;
-		unsigned char dcd;
-		
-		int len;
-		unsigned char *bp;
-		unsigned char buffer[HDLCDRV_MAXFLEN+2];
-	} hdlcrx;
-
-	struct hdlcdrv_hdlctx {
-		struct hdlcdrv_hdlcbuffer hbuf;
-		unsigned long in_hdlc_tx;
-		/*
-		 * 0 = send flags
-		 * 1 = send txtail (flags)
-		 * 2 = send packet
-		 */
-		int tx_state;	
-		int numflags;
-		unsigned int bitstream;
-		unsigned char ptt;
-		int calibrate;
-		int slotcnt;
-
-		unsigned int bitbuf;
-		int numbits;
-		
-		int len;
-		unsigned char *bp;
-		unsigned char buffer[HDLCDRV_MAXFLEN+2];
-	} hdlctx;
-
-#ifdef HDLCDRV_DEBUG
-	struct hdlcdrv_bitbuffer bitbuf_channel;
-	struct hdlcdrv_bitbuffer bitbuf_hdlc;
-#endif /* HDLCDRV_DEBUG */
-
-	int ptt_keyed;
-
-	/* queued skb for transmission */
-	struct sk_buff *skb;
-};
-
-
-/* -------------------------------------------------------------------- */
-
-static inline int hdlcdrv_hbuf_full(struct hdlcdrv_hdlcbuffer *hb) 
-{
-	unsigned long flags;
-	int ret;
-	
-	spin_lock_irqsave(&hb->lock, flags);
-	ret = !((HDLCDRV_HDLCBUFFER - 1 + hb->rd - hb->wr) % HDLCDRV_HDLCBUFFER);
-	spin_unlock_irqrestore(&hb->lock, flags);
-	return ret;
-}
-
-/* -------------------------------------------------------------------- */
-
-static inline int hdlcdrv_hbuf_empty(struct hdlcdrv_hdlcbuffer *hb)
-{
-	unsigned long flags;
-	int ret;
-	
-	spin_lock_irqsave(&hb->lock, flags);
-	ret = (hb->rd == hb->wr);
-	spin_unlock_irqrestore(&hb->lock, flags);
-	return ret;
-}
-
-/* -------------------------------------------------------------------- */
-
-static inline unsigned short hdlcdrv_hbuf_get(struct hdlcdrv_hdlcbuffer *hb)
-{
-	unsigned long flags;
-	unsigned short val;
-	unsigned newr;
-
-	spin_lock_irqsave(&hb->lock, flags);
-	if (hb->rd == hb->wr)
-		val = 0;
-	else {
-		newr = (hb->rd+1) % HDLCDRV_HDLCBUFFER;
-		val = hb->buf[hb->rd];
-		hb->rd = newr;
-	}
-	spin_unlock_irqrestore(&hb->lock, flags);
-	return val;
-}
-
-/* -------------------------------------------------------------------- */
-
-static inline void hdlcdrv_hbuf_put(struct hdlcdrv_hdlcbuffer *hb, 
-				    unsigned short val)
-{
-	unsigned newp;
-	unsigned long flags;
-	
-	spin_lock_irqsave(&hb->lock, flags);
-	newp = (hb->wr+1) % HDLCDRV_HDLCBUFFER;
-	if (newp != hb->rd) { 
-		hb->buf[hb->wr] = val & 0xffff;
-		hb->wr = newp;
-	}
-	spin_unlock_irqrestore(&hb->lock, flags);
-}
-
-/* -------------------------------------------------------------------- */
-
-static inline void hdlcdrv_putbits(struct hdlcdrv_state *s, unsigned int bits)
-{
-	hdlcdrv_hbuf_put(&s->hdlcrx.hbuf, bits);
-}
-
-static inline unsigned int hdlcdrv_getbits(struct hdlcdrv_state *s)
-{
-	unsigned int ret;
-
-	if (hdlcdrv_hbuf_empty(&s->hdlctx.hbuf)) {
-		if (s->hdlctx.calibrate > 0)
-			s->hdlctx.calibrate--;
-		else
-			s->hdlctx.ptt = 0;
-		ret = 0;
-	} else 
-		ret = hdlcdrv_hbuf_get(&s->hdlctx.hbuf);
-#ifdef HDLCDRV_LOOPBACK
-	hdlcdrv_hbuf_put(&s->hdlcrx.hbuf, ret);
-#endif /* HDLCDRV_LOOPBACK */
-	return ret;
-}
-
-static inline void hdlcdrv_channelbit(struct hdlcdrv_state *s, unsigned int bit)
-{
-#ifdef HDLCDRV_DEBUG
-	hdlcdrv_add_bitbuffer(&s->bitbuf_channel, bit);
-#endif /* HDLCDRV_DEBUG */
-}
-
-static inline void hdlcdrv_setdcd(struct hdlcdrv_state *s, int dcd)
-{
-	s->hdlcrx.dcd = !!dcd;
-}
-
-static inline int hdlcdrv_ptt(struct hdlcdrv_state *s)
-{
-	return s->hdlctx.ptt || (s->hdlctx.calibrate > 0);
-}
-
-/* -------------------------------------------------------------------- */
-
-void hdlcdrv_receiver(struct net_device *, struct hdlcdrv_state *);
-void hdlcdrv_transmitter(struct net_device *, struct hdlcdrv_state *);
-void hdlcdrv_arbitrate(struct net_device *, struct hdlcdrv_state *);
-struct net_device *hdlcdrv_register(const struct hdlcdrv_ops *ops,
-				    unsigned int privsize, const char *ifname,
-				    unsigned int baseaddr, unsigned int irq, 
-				    unsigned int dma);
-void hdlcdrv_unregister(struct net_device *dev);
-
-/* -------------------------------------------------------------------- */
-
-
-
-#endif /* _HDLCDRV_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7969fcdd5ac4..e9e2ec8d4c19 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -162,7 +162,7 @@ static inline bool dev_xmit_complete(int rc)
 
 #if defined(CONFIG_HYPERV_NET)
 # define LL_MAX_HEADER 128
-#elif defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25)
+#elif defined(CONFIG_WLAN)
 # if defined(CONFIG_MAC80211_MESH)
 #  define LL_MAX_HEADER 128
 # else
@@ -2316,9 +2316,6 @@ struct net_device {
 #if IS_ENABLED(CONFIG_ATALK)
 	void 			*atalk_ptr;
 #endif
-#if IS_ENABLED(CONFIG_AX25)
-	struct ax25_dev	__rcu	*ax25_ptr;
-#endif
 #if IS_ENABLED(CONFIG_CFG80211)
 	struct wireless_dev	*ieee80211_ptr;
 #endif
diff --git a/include/linux/scc.h b/include/linux/scc.h
deleted file mode 100644
index 745eabd17c10..000000000000
--- a/include/linux/scc.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* $Id: scc.h,v 1.29 1997/04/02 14:56:45 jreuter Exp jreuter $ */
-#ifndef	_SCC_H
-#define	_SCC_H
-
-#include <uapi/linux/scc.h>
-
-
-enum {TX_OFF, TX_ON};	/* command for scc_key_trx() */
-
-/* Vector masks in RR2B */
-
-#define VECTOR_MASK	0x06
-#define TXINT		0x00
-#define EXINT		0x02
-#define RXINT		0x04
-#define SPINT		0x06
-
-#ifdef CONFIG_SCC_DELAY
-#define Inb(port)	inb_p(port)
-#define Outb(port, val)	outb_p(val, port)
-#else
-#define Inb(port)	inb(port)
-#define Outb(port, val)	outb(val, port)
-#endif
-
-/* SCC channel control structure for KISS */
-
-struct scc_kiss {
-	unsigned char txdelay;		/* Transmit Delay 10 ms/cnt */
-	unsigned char persist;		/* Persistence (0-255) as a % */
-	unsigned char slottime;		/* Delay to wait on persistence hit */
-	unsigned char tailtime;		/* Delay after last byte written */
-	unsigned char fulldup;		/* Full Duplex mode 0=CSMA 1=DUP 2=ALWAYS KEYED */
-	unsigned char waittime;		/* Waittime before any transmit attempt */
-	unsigned int  maxkeyup;		/* Maximum time to transmit (seconds) */
-	unsigned int  mintime;		/* Minimal offtime after MAXKEYUP timeout (seconds) */
-	unsigned int  idletime;		/* Maximum idle time in ALWAYS KEYED mode (seconds) */
-	unsigned int  maxdefer;		/* Timer for CSMA channel busy limit */
-	unsigned char tx_inhibit;	/* Transmit is not allowed when set */	
-	unsigned char group;		/* Group ID for AX.25 TX interlocking */
-	unsigned char mode;		/* 'normal' or 'hwctrl' mode (unused) */
-	unsigned char softdcd;		/* Use DPLL instead of DCD pin for carrier detect */
-};
-
-
-/* SCC channel structure */
-
-struct scc_channel {
-	int init;			/* channel exists? */
-
-	struct net_device *dev;		/* link to device control structure */
-	struct net_device_stats dev_stat;/* device statistics */
-
-	char brand;			/* manufacturer of the board */
-	long clock;			/* used clock */
-
-	io_port ctrl;			/* I/O address of CONTROL register */
-	io_port	data;			/* I/O address of DATA register */
-	io_port special;		/* I/O address of special function port */
-	int irq;			/* Number of Interrupt */
-
-	char option;
-	char enhanced;			/* Enhanced SCC support */
-
-	unsigned char wreg[16]; 	/* Copy of last written value in WRx */
-	unsigned char status;		/* Copy of R0 at last external interrupt */
-	unsigned char dcd;		/* DCD status */
-
-        struct scc_kiss kiss;		/* control structure for KISS params */
-        struct scc_stat stat;		/* statistical information */
-        struct scc_modem modem; 	/* modem information */
-
-        struct sk_buff_head tx_queue;	/* next tx buffer */
-        struct sk_buff *rx_buff;	/* pointer to frame currently received */
-        struct sk_buff *tx_buff;	/* pointer to frame currently transmitted */
-
-	/* Timer */
-	struct timer_list tx_t;		/* tx timer for this channel */
-	struct timer_list tx_wdog;	/* tx watchdogs */
-	
-	/* Channel lock */
-	spinlock_t	lock;		/* Channel guard lock */
-};
-
-#endif /* defined(_SCC_H) */
diff --git a/include/linux/yam.h b/include/linux/yam.h
deleted file mode 100644
index a29b04fa1e66..000000000000
--- a/include/linux/yam.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*****************************************************************************/
-
-/*
- *	yam.h  -- YAM radio modem driver.
- *
- *	Copyright (C) 1998 Frederic Rible F1OAT (frible@teaser.fr)
- *	Adapted from baycom.c driver written by Thomas Sailer (sailer@ife.ee.ethz.ch)
- *
- *  Please note that the GPL allows you to use the driver, NOT the radio.
- *  In order to use the radio, you need a license from the communications
- *  authority of your country.
- */
-
-/*****************************************************************************/
-
-#define SIOCYAMRESERVED	(0)
-#define SIOCYAMSCFG 	(1)	/* Set configuration */
-#define SIOCYAMGCFG 	(2)	/* Get configuration */
-#define SIOCYAMSMCS 	(3)	/* Set mcs data */
-
-#define YAM_IOBASE   (1 << 0)
-#define YAM_IRQ      (1 << 1)
-#define YAM_BITRATE  (1 << 2) /* Bit rate of radio port ->57600 */
-#define YAM_MODE     (1 << 3) /* 0=simplex 1=duplex 2=duplex+tempo */
-#define YAM_HOLDDLY  (1 << 4) /* duplex tempo (sec) */
-#define YAM_TXDELAY  (1 << 5) /* Tx Delay (ms) */
-#define YAM_TXTAIL   (1 << 6) /* Tx Tail  (ms) */
-#define YAM_PERSIST  (1 << 7) /* Persist  (ms) */
-#define YAM_SLOTTIME (1 << 8) /* Slottime (ms) */
-#define YAM_BAUDRATE (1 << 9) /* Baud rate of rs232 port ->115200 */
-
-#define YAM_MAXBITRATE  57600
-#define YAM_MAXBAUDRATE 115200
-#define YAM_MAXMODE     2
-#define YAM_MAXHOLDDLY  99
-#define YAM_MAXTXDELAY  999
-#define YAM_MAXTXTAIL   999
-#define YAM_MAXPERSIST  255
-#define YAM_MAXSLOTTIME 999
-
-#define YAM_FPGA_SIZE	5302
-
-struct yamcfg {
-	unsigned int mask;		/* Mask of commands */
-	unsigned int iobase;	/* IO Base of COM port */
-	unsigned int irq;		/* IRQ of COM port */
-	unsigned int bitrate;	/* Bit rate of radio port */
-	unsigned int baudrate;	/* Baud rate of the RS232 port */
-	unsigned int txdelay;	/* TxDelay */
-	unsigned int txtail;	/* TxTail */
-	unsigned int persist;	/* Persistence */
-	unsigned int slottime;	/* Slottime */
-	unsigned int mode;		/* mode 0 (simp), 1(Dupl), 2(Dupl+delay) */
-	unsigned int holddly;	/* PTT delay in FullDuplex 2 mode */
-};
-
-struct yamdrv_ioctl_cfg {
-	int cmd;
-	struct yamcfg cfg;
-};
-
-struct yamdrv_ioctl_mcs {
-	int cmd;
-	unsigned int bitrate;
-	unsigned char bits[YAM_FPGA_SIZE];
-};
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 9fc6a6657266..6b2f518facdb 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -1,480 +1,10 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*
- *	Declarations of AX.25 type objects.
- *
- *	Alan Cox (GW4PTS) 	10/11/93
- */
 #ifndef _AX25_H
-#define _AX25_H 
+#define _AX25_H
 
 #include <linux/ax25.h>
-#include <linux/spinlock.h>
-#include <linux/timer.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/refcount.h>
-#include <net/neighbour.h>
-#include <net/sock.h>
-#include <linux/seq_file.h>
 
-#define	AX25_T1CLAMPLO  		1
-#define	AX25_T1CLAMPHI 			(30 * HZ)
-
-#define	AX25_BPQ_HEADER_LEN		16
-#define	AX25_KISS_HEADER_LEN		1
-
-#define	AX25_HEADER_LEN			17
-#define	AX25_ADDR_LEN			7
-#define	AX25_DIGI_HEADER_LEN		(AX25_MAX_DIGIS * AX25_ADDR_LEN)
-#define	AX25_MAX_HEADER_LEN		(AX25_HEADER_LEN + AX25_DIGI_HEADER_LEN)
-
-/* AX.25 Protocol IDs */
-#define AX25_P_ROSE			0x01
-#define AX25_P_VJCOMP			0x06	/* Compressed TCP/IP packet   */
-						/* Van Jacobsen (RFC 1144)    */
-#define AX25_P_VJUNCOMP			0x07	/* Uncompressed TCP/IP packet */
-						/* Van Jacobsen (RFC 1144)    */
-#define	AX25_P_SEGMENT			0x08	/* Segmentation fragment      */
-#define AX25_P_TEXNET			0xc3	/* TEXTNET datagram protocol  */
-#define AX25_P_LQ			0xc4	/* Link Quality Protocol      */
-#define AX25_P_ATALK			0xca	/* Appletalk                  */
-#define AX25_P_ATALK_ARP		0xcb	/* Appletalk ARP              */
-#define AX25_P_IP			0xcc	/* ARPA Internet Protocol     */
-#define AX25_P_ARP			0xcd	/* ARPA Address Resolution    */
-#define AX25_P_FLEXNET			0xce	/* FlexNet                    */
-#define AX25_P_NETROM 			0xcf	/* NET/ROM                    */
-#define AX25_P_TEXT 			0xF0	/* No layer 3 protocol impl.  */
-
-/* AX.25 Segment control values */
-#define	AX25_SEG_REM			0x7F
-#define	AX25_SEG_FIRST			0x80
-
-#define AX25_CBIT			0x80	/* Command/Response bit */
-#define AX25_EBIT			0x01	/* HDLC Address Extension bit */
-#define AX25_HBIT			0x80	/* Has been repeated bit */
-
-#define AX25_SSSID_SPARE		0x60	/* Unused bits in SSID for standard AX.25 */
-#define AX25_ESSID_SPARE		0x20	/* Unused bits in SSID for extended AX.25 */
-#define AX25_DAMA_FLAG			0x20	/* Well, it is *NOT* unused! (dl1bke 951121 */
-
-#define	AX25_COND_ACK_PENDING		0x01
-#define	AX25_COND_REJECT		0x02
-#define	AX25_COND_PEER_RX_BUSY		0x04
-#define	AX25_COND_OWN_RX_BUSY		0x08
-#define	AX25_COND_DAMA_MODE		0x10
-
-#ifndef _LINUX_NETDEVICE_H
-#include <linux/netdevice.h>
-#endif
-
-/* Upper sub-layer (LAPB) definitions */
-
-/* Control field templates */
-#define	AX25_I			0x00	/* Information frames */
-#define	AX25_S			0x01	/* Supervisory frames */
-#define	AX25_RR			0x01	/* Receiver ready */
-#define	AX25_RNR		0x05	/* Receiver not ready */
-#define	AX25_REJ		0x09	/* Reject */
-#define	AX25_U			0x03	/* Unnumbered frames */
-#define	AX25_SABM		0x2f	/* Set Asynchronous Balanced Mode */
-#define	AX25_SABME		0x6f	/* Set Asynchronous Balanced Mode Extended */
-#define	AX25_DISC		0x43	/* Disconnect */
-#define	AX25_DM			0x0f	/* Disconnected mode */
-#define	AX25_UA			0x63	/* Unnumbered acknowledge */
-#define	AX25_FRMR		0x87	/* Frame reject */
-#define	AX25_UI			0x03	/* Unnumbered information */
-#define	AX25_XID		0xaf	/* Exchange information */
-#define	AX25_TEST		0xe3	/* Test */
-
-#define	AX25_PF			0x10	/* Poll/final bit for standard AX.25 */
-#define	AX25_EPF		0x01	/* Poll/final bit for extended AX.25 */
-
-#define AX25_ILLEGAL		0x100	/* Impossible to be a real frame type */
-
-#define	AX25_POLLOFF		0
-#define	AX25_POLLON		1
-
-/* AX25 L2 C-bit */
-#define AX25_COMMAND		1
-#define AX25_RESPONSE		2
-
-/* Define Link State constants. */
-
-enum { 
-	AX25_STATE_0,			/* Listening */
-	AX25_STATE_1,			/* SABM sent */
-	AX25_STATE_2,			/* DISC sent */
-	AX25_STATE_3,			/* Established */
-	AX25_STATE_4			/* Recovery */
-};
-
-#define AX25_MODULUS 		8	/*  Standard AX.25 modulus */
-#define	AX25_EMODULUS		128	/*  Extended AX.25 modulus */
-
-enum {
-	AX25_PROTO_STD_SIMPLEX,
-	AX25_PROTO_STD_DUPLEX,
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	AX25_PROTO_DAMA_SLAVE,
-#endif
-	__AX25_PROTO_MAX,
-	AX25_PROTO_MAX = __AX25_PROTO_MAX -1
-};
-
-enum {
-	AX25_VALUES_IPDEFMODE,	/* 0=DG 1=VC */
-	AX25_VALUES_AXDEFMODE,	/* 0=Normal 1=Extended Seq Nos */
-	AX25_VALUES_BACKOFF,	/* 0=None 1=Linear 2=Exponential */
-	AX25_VALUES_CONMODE,	/* Allow connected modes - 0=No 1=no "PID text" 2=all PIDs */
-	AX25_VALUES_WINDOW,	/* Default window size for standard AX.25 */
-	AX25_VALUES_EWINDOW,	/* Default window size for extended AX.25 */
-	AX25_VALUES_T1,		/* Default T1 timeout value */
-	AX25_VALUES_T2,		/* Default T2 timeout value */
-	AX25_VALUES_T3,		/* Default T3 timeout value */
-	AX25_VALUES_IDLE,	/* Connected mode idle timer */
-	AX25_VALUES_N2,		/* Default N2 value */
-	AX25_VALUES_PACLEN,	/* AX.25 MTU */
-	AX25_VALUES_PROTOCOL,	/* Std AX.25, DAMA Slave */
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	AX25_VALUES_DS_TIMEOUT,	/* DAMA Slave timeout */
-#endif
-	AX25_MAX_VALUES		/* THIS MUST REMAIN THE LAST ENTRY OF THIS LIST */
-};
-
-#define	AX25_DEF_IPDEFMODE	0			/* Datagram */
-#define	AX25_DEF_AXDEFMODE	0			/* Normal */
-#define	AX25_DEF_BACKOFF	1			/* Linear backoff */
-#define	AX25_DEF_CONMODE	2			/* Connected mode allowed */
-#define	AX25_DEF_WINDOW		2			/* Window=2 */
-#define	AX25_DEF_EWINDOW	32			/* Module-128 Window=32 */
-#define	AX25_DEF_T1		10000			/* T1=10s */
-#define	AX25_DEF_T2		3000			/* T2=3s  */
-#define	AX25_DEF_T3		300000			/* T3=300s */
-#define	AX25_DEF_N2		10			/* N2=10 */
-#define AX25_DEF_IDLE		0			/* Idle=None */
-#define AX25_DEF_PACLEN		256			/* Paclen=256 */
-#define	AX25_DEF_PROTOCOL	AX25_PROTO_STD_SIMPLEX	/* Standard AX.25 */
-#define AX25_DEF_DS_TIMEOUT	180000			/* DAMA timeout 3 minutes */
-
-typedef struct ax25_uid_assoc {
-	struct hlist_node	uid_node;
-	refcount_t		refcount;
-	kuid_t			uid;
-	ax25_address		call;
-} ax25_uid_assoc;
-
-#define ax25_uid_for_each(__ax25, list) \
-	hlist_for_each_entry(__ax25, list, uid_node)
-
-#define ax25_uid_hold(ax25) \
-	refcount_inc(&((ax25)->refcount))
-
-static inline void ax25_uid_put(ax25_uid_assoc *assoc)
-{
-	if (refcount_dec_and_test(&assoc->refcount)) {
-		kfree(assoc);
-	}
-}
-
-typedef struct {
-	ax25_address		calls[AX25_MAX_DIGIS];
-	unsigned char		repeated[AX25_MAX_DIGIS];
-	unsigned char		ndigi;
-	signed char		lastrepeat;
-} ax25_digi;
-
-typedef struct ax25_route {
-	struct ax25_route	*next;
-	ax25_address		callsign;
-	struct net_device	*dev;
-	ax25_digi		*digipeat;
-	char			ip_mode;
-} ax25_route;
-
-void __ax25_put_route(ax25_route *ax25_rt);
-
-extern rwlock_t ax25_route_lock;
-
-static inline void ax25_route_lock_use(void)
-{
-	read_lock(&ax25_route_lock);
-}
-
-static inline void ax25_route_lock_unuse(void)
-{
-	read_unlock(&ax25_route_lock);
-}
-
-typedef struct {
-	char			slave;			/* slave_mode?   */
-	struct timer_list	slave_timer;		/* timeout timer */
-	unsigned short		slave_timeout;		/* when? */
-} ax25_dama_info;
-
-typedef struct ax25_dev {
-	struct list_head	list;
-
-	struct net_device	*dev;
-	netdevice_tracker	dev_tracker;
-
-	struct net_device	*forward;
-	struct ctl_table_header *sysheader;
-	int			values[AX25_MAX_VALUES];
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	ax25_dama_info		dama;
-#endif
-	refcount_t		refcount;
-	bool device_up;
-	struct rcu_head		rcu;
-} ax25_dev;
-
-typedef struct ax25_cb {
-	struct hlist_node	ax25_node;
-	ax25_address		source_addr, dest_addr;
-	ax25_digi		*digipeat;
-	ax25_dev		*ax25_dev;
-	netdevice_tracker	dev_tracker;
-	unsigned char		iamdigi;
-	unsigned char		state, modulus, pidincl;
-	unsigned short		vs, vr, va;
-	unsigned char		condition, backoff;
-	unsigned char		n2, n2count;
-	struct timer_list	t1timer, t2timer, t3timer, idletimer;
-	unsigned long		t1, t2, t3, idle, rtt;
-	unsigned short		paclen, fragno, fraglen;
-	struct sk_buff_head	write_queue;
-	struct sk_buff_head	reseq_queue;
-	struct sk_buff_head	ack_queue;
-	struct sk_buff_head	frag_queue;
-	unsigned char		window;
-	struct timer_list	timer, dtimer;
-	struct sock		*sk;		/* Backlink to socket */
-	refcount_t		refcount;
-} ax25_cb;
-
-struct ax25_sock {
-	struct sock		sk;
-	struct ax25_cb		*cb;
-};
-
-#define ax25_sk(ptr) container_of_const(ptr, struct ax25_sock, sk)
-
-static inline struct ax25_cb *sk_to_ax25(const struct sock *sk)
-{
-	return ax25_sk(sk)->cb;
-}
-
-#define ax25_for_each(__ax25, list) \
-	hlist_for_each_entry(__ax25, list, ax25_node)
-
-#define ax25_cb_hold(__ax25) \
-	refcount_inc(&((__ax25)->refcount))
-
-static __inline__ void ax25_cb_put(ax25_cb *ax25)
-{
-	if (refcount_dec_and_test(&ax25->refcount)) {
-		kfree(ax25->digipeat);
-		kfree(ax25);
-	}
-}
-
-static inline void ax25_dev_hold(ax25_dev *ax25_dev)
-{
-	refcount_inc(&ax25_dev->refcount);
-}
-
-static inline void ax25_dev_put(ax25_dev *ax25_dev)
-{
-	if (refcount_dec_and_test(&ax25_dev->refcount))
-		kfree_rcu(ax25_dev, rcu);
-}
-static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev)
-{
-	skb->dev      = dev;
-	skb_reset_mac_header(skb);
-	skb->pkt_type = PACKET_HOST;
-	return htons(ETH_P_AX25);
-}
-
-/* af_ax25.c */
-extern struct hlist_head ax25_list;
-extern spinlock_t ax25_list_lock;
-void ax25_cb_add(ax25_cb *);
-struct sock *ax25_find_listener(ax25_address *, int, struct net_device *, int);
-struct sock *ax25_get_socket(ax25_address *, ax25_address *, int);
-ax25_cb *ax25_find_cb(const ax25_address *, ax25_address *, ax25_digi *,
-		      struct net_device *);
-void ax25_send_to_raw(ax25_address *, struct sk_buff *, int);
-void ax25_destroy_socket(ax25_cb *);
-ax25_cb * __must_check ax25_create_cb(void);
-void ax25_fillin_cb(ax25_cb *, ax25_dev *);
-struct sock *ax25_make_new(struct sock *, struct ax25_dev *);
-
-/* ax25_addr.c */
-extern const ax25_address ax25_bcast;
-extern const ax25_address ax25_defaddr;
-extern const ax25_address null_ax25_address;
-char *ax2asc(char *buf, const ax25_address *);
-void asc2ax(ax25_address *addr, const char *callsign);
-int ax25cmp(const ax25_address *, const ax25_address *);
-int ax25digicmp(const ax25_digi *, const ax25_digi *);
-const unsigned char *ax25_addr_parse(const unsigned char *, int,
-	ax25_address *, ax25_address *, ax25_digi *, int *, int *);
-int ax25_addr_build(unsigned char *, const ax25_address *,
-		    const ax25_address *, const ax25_digi *, int, int);
-int ax25_addr_size(const ax25_digi *);
-void ax25_digi_invert(const ax25_digi *, ax25_digi *);
-
-/* ax25_dev.c */
-extern spinlock_t ax25_dev_lock;
-
-#if IS_ENABLED(CONFIG_AX25)
-static inline ax25_dev *ax25_dev_ax25dev(const struct net_device *dev)
-{
-	return rcu_dereference_rtnl(dev->ax25_ptr);
-}
-#endif
-
-ax25_dev *ax25_addr_ax25dev(ax25_address *);
-void ax25_dev_device_up(struct net_device *);
-void ax25_dev_device_down(struct net_device *);
-int ax25_fwd_ioctl(unsigned int, struct ax25_fwd_struct *);
-struct net_device *ax25_fwd_dev(struct net_device *);
-void ax25_dev_free(void);
-
-/* ax25_ds_in.c */
-int ax25_ds_frame_in(ax25_cb *, struct sk_buff *, int);
-
-/* ax25_ds_subr.c */
-void ax25_ds_nr_error_recovery(ax25_cb *);
-void ax25_ds_enquiry_response(ax25_cb *);
-void ax25_ds_establish_data_link(ax25_cb *);
-void ax25_dev_dama_off(ax25_dev *);
-void ax25_dama_on(ax25_cb *);
-void ax25_dama_off(ax25_cb *);
-
-/* ax25_ds_timer.c */
-void ax25_ds_setup_timer(ax25_dev *);
-void ax25_ds_set_timer(ax25_dev *);
-void ax25_ds_del_timer(ax25_dev *);
-void ax25_ds_timer(ax25_cb *);
-void ax25_ds_t1_timeout(ax25_cb *);
-void ax25_ds_heartbeat_expiry(ax25_cb *);
-void ax25_ds_t3timer_expiry(ax25_cb *);
-void ax25_ds_idletimer_expiry(ax25_cb *);
-
-/* ax25_iface.c */
-
-struct ax25_protocol {
-	struct ax25_protocol *next;
-	unsigned int pid;
-	int (*func)(struct sk_buff *, ax25_cb *);
-};
-
-void ax25_register_pid(struct ax25_protocol *ap);
-void ax25_protocol_release(unsigned int);
-
-struct ax25_linkfail {
-	struct hlist_node lf_node;
-	void (*func)(ax25_cb *, int);
-};
-
-void ax25_linkfail_register(struct ax25_linkfail *lf);
-void ax25_linkfail_release(struct ax25_linkfail *lf);
-int __must_check ax25_listen_register(const ax25_address *,
-				      struct net_device *);
-void ax25_listen_release(const ax25_address *, struct net_device *);
-int(*ax25_protocol_function(unsigned int))(struct sk_buff *, ax25_cb *);
-int ax25_listen_mine(const ax25_address *, struct net_device *);
-void ax25_link_failed(ax25_cb *, int);
-int ax25_protocol_is_registered(unsigned int);
-
-/* ax25_in.c */
-int ax25_rx_iframe(ax25_cb *, struct sk_buff *);
-int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *,
-		  struct net_device *);
-
-/* ax25_ip.c */
-netdev_tx_t ax25_ip_xmit(struct sk_buff *skb);
-extern const struct header_ops ax25_header_ops;
-
-/* ax25_out.c */
-ax25_cb *ax25_send_frame(struct sk_buff *, int, const ax25_address *,
-			 ax25_address *, ax25_digi *, struct net_device *);
-void ax25_output(ax25_cb *, int, struct sk_buff *);
-void ax25_kick(ax25_cb *);
-void ax25_transmit_buffer(ax25_cb *, struct sk_buff *, int);
-void ax25_queue_xmit(struct sk_buff *skb, struct net_device *dev);
-int ax25_check_iframes_acked(ax25_cb *, unsigned short);
-
-/* ax25_route.c */
-void ax25_rt_device_down(struct net_device *);
-int ax25_rt_ioctl(unsigned int, void __user *);
-extern const struct seq_operations ax25_rt_seqops;
-ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev);
-struct sk_buff *ax25_rt_build_path(struct sk_buff *, ax25_address *,
-				   ax25_address *, ax25_digi *);
-void ax25_rt_free(void);
-
-/* ax25_std_in.c */
-int ax25_std_frame_in(ax25_cb *, struct sk_buff *, int);
-
-/* ax25_std_subr.c */
-void ax25_std_nr_error_recovery(ax25_cb *);
-void ax25_std_establish_data_link(ax25_cb *);
-void ax25_std_transmit_enquiry(ax25_cb *);
-void ax25_std_enquiry_response(ax25_cb *);
-void ax25_std_timeout_response(ax25_cb *);
-
-/* ax25_std_timer.c */
-void ax25_std_heartbeat_expiry(ax25_cb *);
-void ax25_std_t1timer_expiry(ax25_cb *);
-void ax25_std_t2timer_expiry(ax25_cb *);
-void ax25_std_t3timer_expiry(ax25_cb *);
-void ax25_std_idletimer_expiry(ax25_cb *);
-
-/* ax25_subr.c */
-void ax25_clear_queues(ax25_cb *);
-void ax25_frames_acked(ax25_cb *, unsigned short);
-void ax25_requeue_frames(ax25_cb *);
-int ax25_validate_nr(ax25_cb *, unsigned short);
-int ax25_decode(ax25_cb *, struct sk_buff *, int *, int *, int *);
-void ax25_send_control(ax25_cb *, int, int, int);
-void ax25_return_dm(struct net_device *, ax25_address *, ax25_address *,
-		    ax25_digi *);
-void ax25_calculate_t1(ax25_cb *);
-void ax25_calculate_rtt(ax25_cb *);
-void ax25_disconnect(ax25_cb *, int);
-
-/* ax25_timer.c */
-void ax25_setup_timers(ax25_cb *);
-void ax25_start_heartbeat(ax25_cb *);
-void ax25_start_t1timer(ax25_cb *);
-void ax25_start_t2timer(ax25_cb *);
-void ax25_start_t3timer(ax25_cb *);
-void ax25_start_idletimer(ax25_cb *);
-void ax25_stop_heartbeat(ax25_cb *);
-void ax25_stop_t1timer(ax25_cb *);
-void ax25_stop_t2timer(ax25_cb *);
-void ax25_stop_t3timer(ax25_cb *);
-void ax25_stop_idletimer(ax25_cb *);
-int ax25_t1timer_running(ax25_cb *);
-unsigned long ax25_display_timer(struct timer_list *);
-
-/* ax25_uid.c */
-extern int  ax25_uid_policy;
-ax25_uid_assoc *ax25_findbyuid(kuid_t);
-int __must_check ax25_uid_ioctl(int, struct sockaddr_ax25 *);
-extern const struct seq_operations ax25_uid_seqops;
-void ax25_uid_free(void);
-
-/* sysctl_net_ax25.c */
-#ifdef CONFIG_SYSCTL
-int ax25_register_dev_sysctl(ax25_dev *ax25_dev);
-void ax25_unregister_dev_sysctl(ax25_dev *ax25_dev);
-#else
-static inline int ax25_register_dev_sysctl(ax25_dev *ax25_dev) { return 0; }
-static inline void ax25_unregister_dev_sysctl(ax25_dev *ax25_dev) {}
-#endif /* CONFIG_SYSCTL */
+#define	AX25_ADDR_LEN	7
+#define	AX25_P_IP	0xCC
 
 #endif
diff --git a/include/net/netrom.h b/include/net/netrom.h
deleted file mode 100644
index f0565a5987d1..000000000000
--- a/include/net/netrom.h
+++ /dev/null
@@ -1,273 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *	Declarations of NET/ROM type objects.
- *
- *	Jonathan Naylor G4KLX	9/4/95
- */
-
-#ifndef _NETROM_H
-#define _NETROM_H 
-
-#include <linux/netrom.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <linux/refcount.h>
-#include <linux/seq_file.h>
-#include <net/ax25.h>
-
-#define	NR_NETWORK_LEN			15
-#define	NR_TRANSPORT_LEN		5
-
-#define	NR_PROTO_IP			0x0C
-
-#define	NR_PROTOEXT			0x00
-#define	NR_CONNREQ			0x01
-#define	NR_CONNACK			0x02
-#define	NR_DISCREQ			0x03
-#define	NR_DISCACK			0x04
-#define	NR_INFO				0x05
-#define	NR_INFOACK			0x06
-#define	NR_RESET			0x07
-
-#define	NR_CHOKE_FLAG			0x80
-#define	NR_NAK_FLAG			0x40
-#define	NR_MORE_FLAG			0x20
-
-/* Define Link State constants. */
-enum {
-	NR_STATE_0,
-	NR_STATE_1,
-	NR_STATE_2,
-	NR_STATE_3
-};
-
-#define	NR_COND_ACK_PENDING		0x01
-#define	NR_COND_REJECT			0x02
-#define	NR_COND_PEER_RX_BUSY		0x04
-#define	NR_COND_OWN_RX_BUSY		0x08
-
-#define NR_DEFAULT_T1			120000		/* Outstanding frames - 120 seconds */
-#define NR_DEFAULT_T2			5000		/* Response delay     - 5 seconds */
-#define NR_DEFAULT_N2			3		/* Number of Retries - 3 */
-#define	NR_DEFAULT_T4			180000		/* Busy Delay - 180 seconds */
-#define	NR_DEFAULT_IDLE			0		/* No Activity Timeout - none */
-#define	NR_DEFAULT_WINDOW		4		/* Default Window Size - 4 */
-#define	NR_DEFAULT_OBS			6		/* Default Obsolescence Count - 6 */
-#define	NR_DEFAULT_QUAL			10		/* Default Neighbour Quality - 10 */
-#define	NR_DEFAULT_TTL			16		/* Default Time To Live - 16 */
-#define	NR_DEFAULT_ROUTING		1		/* Is routing enabled ? */
-#define	NR_DEFAULT_FAILS		2		/* Link fails until route fails */
-#define	NR_DEFAULT_RESET		0		/* Sent / accept reset cmds? */
-
-#define NR_MODULUS 			256
-#define NR_MAX_WINDOW_SIZE		127			/* Maximum Window Allowable - 127 */
-#define	NR_MAX_PACKET_SIZE		236			/* Maximum Packet Length - 236 */
-
-struct nr_sock {
-	struct sock		sock;
-	ax25_address		user_addr, source_addr, dest_addr;
-	struct net_device		*device;
-	unsigned char		my_index,   my_id;
-	unsigned char		your_index, your_id;
-	unsigned char		state, condition, bpqext, window;
-	unsigned short		vs, vr, va, vl;
-	unsigned char		n2, n2count;
-	unsigned long		t1, t2, t4, idle;
-	unsigned short		fraglen;
-	struct timer_list	t1timer;
-	struct timer_list	t2timer;
-	struct timer_list	t4timer;
-	struct timer_list	idletimer;
-	struct sk_buff_head	ack_queue;
-	struct sk_buff_head	reseq_queue;
-	struct sk_buff_head	frag_queue;
-};
-
-#define nr_sk(sk) ((struct nr_sock *)(sk))
-
-struct nr_neigh {
-	struct hlist_node	neigh_node;
-	ax25_address		callsign;
-	ax25_digi		*digipeat;
-	ax25_cb			*ax25;
-	struct net_device	*dev;
-	unsigned char		quality;
-	unsigned char		locked;
-	unsigned short		count;
-	unsigned int		number;
-	unsigned char		failed;
-	refcount_t		refcount;
-};
-
-struct nr_route {
-	unsigned char   quality;
-	unsigned char   obs_count;
-	struct nr_neigh *neighbour;
-};
-
-struct nr_node {
-	struct hlist_node	node_node;
-	ax25_address		callsign;
-	char			mnemonic[7];
-	unsigned char		which;
-	unsigned char		count;
-	struct nr_route		routes[3];
-	refcount_t		refcount;
-	spinlock_t		node_lock;
-};
-
-/*********************************************************************
- *	nr_node & nr_neigh lists, refcounting and locking
- *********************************************************************/
-
-#define nr_node_hold(__nr_node) \
-	refcount_inc(&((__nr_node)->refcount))
-
-static __inline__ void nr_node_put(struct nr_node *nr_node)
-{
-	if (refcount_dec_and_test(&nr_node->refcount)) {
-		kfree(nr_node);
-	}
-}
-
-#define nr_neigh_hold(__nr_neigh) \
-	refcount_inc(&((__nr_neigh)->refcount))
-
-static __inline__ void nr_neigh_put(struct nr_neigh *nr_neigh)
-{
-	if (refcount_dec_and_test(&nr_neigh->refcount)) {
-		if (nr_neigh->ax25)
-			ax25_cb_put(nr_neigh->ax25);
-		kfree(nr_neigh->digipeat);
-		kfree(nr_neigh);
-	}
-}
-
-/* nr_node_lock and nr_node_unlock also hold/put the node's refcounter.
- */
-static __inline__ void nr_node_lock(struct nr_node *nr_node)
-{
-	nr_node_hold(nr_node);
-	spin_lock_bh(&nr_node->node_lock);
-}
-
-static __inline__ void nr_node_unlock(struct nr_node *nr_node)
-{
-	spin_unlock_bh(&nr_node->node_lock);
-	nr_node_put(nr_node);
-}
-
-#define nr_neigh_for_each(__nr_neigh, list) \
-	hlist_for_each_entry(__nr_neigh, list, neigh_node)
-
-#define nr_neigh_for_each_safe(__nr_neigh, node2, list) \
-	hlist_for_each_entry_safe(__nr_neigh, node2, list, neigh_node)
-
-#define nr_node_for_each(__nr_node, list) \
-	hlist_for_each_entry(__nr_node, list, node_node)
-
-#define nr_node_for_each_safe(__nr_node, node2, list) \
-	hlist_for_each_entry_safe(__nr_node, node2, list, node_node)
-
-
-/*********************************************************************/
-
-/* af_netrom.c */
-extern int  sysctl_netrom_default_path_quality;
-extern int  sysctl_netrom_obsolescence_count_initialiser;
-extern int  sysctl_netrom_network_ttl_initialiser;
-extern int  sysctl_netrom_transport_timeout;
-extern int  sysctl_netrom_transport_maximum_tries;
-extern int  sysctl_netrom_transport_acknowledge_delay;
-extern int  sysctl_netrom_transport_busy_delay;
-extern int  sysctl_netrom_transport_requested_window_size;
-extern int  sysctl_netrom_transport_no_activity_timeout;
-extern int  sysctl_netrom_routing_control;
-extern int  sysctl_netrom_link_fails_count;
-extern int  sysctl_netrom_reset_circuit;
-
-int nr_rx_frame(struct sk_buff *, struct net_device *);
-void nr_destroy_socket(struct sock *);
-
-/* nr_dev.c */
-int nr_rx_ip(struct sk_buff *, struct net_device *);
-void nr_setup(struct net_device *);
-
-/* nr_in.c */
-int nr_process_rx_frame(struct sock *, struct sk_buff *);
-
-/* nr_loopback.c */
-void nr_loopback_init(void);
-void nr_loopback_clear(void);
-int nr_loopback_queue(struct sk_buff *);
-
-/* nr_out.c */
-void nr_output(struct sock *, struct sk_buff *);
-void nr_send_nak_frame(struct sock *);
-void nr_kick(struct sock *);
-void nr_transmit_buffer(struct sock *, struct sk_buff *);
-void nr_establish_data_link(struct sock *);
-void nr_enquiry_response(struct sock *);
-void nr_check_iframes_acked(struct sock *, unsigned short);
-
-/* nr_route.c */
-void nr_rt_device_down(struct net_device *);
-struct net_device *nr_dev_first(void);
-struct net_device *nr_dev_get(ax25_address *);
-int nr_rt_ioctl(unsigned int, void __user *);
-void nr_link_failed(ax25_cb *, int);
-int nr_route_frame(struct sk_buff *, ax25_cb *);
-extern const struct seq_operations nr_node_seqops;
-extern const struct seq_operations nr_neigh_seqops;
-void nr_rt_free(void);
-
-/* nr_subr.c */
-void nr_clear_queues(struct sock *);
-void nr_frames_acked(struct sock *, unsigned short);
-void nr_requeue_frames(struct sock *);
-int nr_validate_nr(struct sock *, unsigned short);
-int nr_in_rx_window(struct sock *, unsigned short);
-void nr_write_internal(struct sock *, int);
-
-void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags);
-
-/*
- * This routine is called when a Connect Acknowledge with the Choke Flag
- * set is needed to refuse a connection.
- */
-#define nr_transmit_refusal(skb, mine)					\
-do {									\
-	__nr_transmit_reply((skb), (mine), NR_CONNACK | NR_CHOKE_FLAG);	\
-} while (0)
-
-/*
- * This routine is called when we don't have a circuit matching an incoming
- * NET/ROM packet.  This is an G8PZT Xrouter extension.
- */
-#define nr_transmit_reset(skb, mine)					\
-do {									\
-	__nr_transmit_reply((skb), (mine), NR_RESET);			\
-} while (0)
-
-void nr_disconnect(struct sock *, int);
-
-/* nr_timer.c */
-void nr_init_timers(struct sock *sk);
-void nr_start_heartbeat(struct sock *);
-void nr_start_t1timer(struct sock *);
-void nr_start_t2timer(struct sock *);
-void nr_start_t4timer(struct sock *);
-void nr_start_idletimer(struct sock *);
-void nr_stop_heartbeat(struct sock *);
-void nr_stop_t1timer(struct sock *);
-void nr_stop_t2timer(struct sock *);
-void nr_stop_t4timer(struct sock *);
-void nr_stop_idletimer(struct sock *);
-int nr_t1timer_running(struct sock *);
-
-/* sysctl_net_netrom.c */
-int nr_register_sysctl(void);
-void nr_unregister_sysctl(void);
-
-#endif
diff --git a/include/net/rose.h b/include/net/rose.h
index 2b5491bbf39a..41bfcb224f0b 100644
--- a/include/net/rose.h
+++ b/include/net/rose.h
@@ -1,266 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*
- *	Declarations of Rose type objects.
- *
- *	Jonathan Naylor G4KLX	25/8/96
- */
-
 #ifndef _ROSE_H
-#define _ROSE_H 
-
-#include <linux/refcount.h>
-#include <linux/rose.h>
-#include <net/ax25.h>
-#include <net/sock.h>
-
-#define	ROSE_ADDR_LEN			5
-
-#define	ROSE_MIN_LEN			3
-
-#define	ROSE_CALL_REQ_ADDR_LEN_OFF	3
-#define	ROSE_CALL_REQ_ADDR_LEN_VAL	0xAA	/* each address is 10 digits */
-#define	ROSE_CALL_REQ_DEST_ADDR_OFF	4
-#define	ROSE_CALL_REQ_SRC_ADDR_OFF	9
-#define	ROSE_CALL_REQ_FACILITIES_OFF	14
-
-#define	ROSE_GFI			0x10
-#define	ROSE_Q_BIT			0x80
-#define	ROSE_D_BIT			0x40
-#define	ROSE_M_BIT			0x10
-
-#define	ROSE_CALL_REQUEST		0x0B
-#define	ROSE_CALL_ACCEPTED		0x0F
-#define	ROSE_CLEAR_REQUEST		0x13
-#define	ROSE_CLEAR_CONFIRMATION		0x17
-#define	ROSE_DATA			0x00
-#define	ROSE_INTERRUPT			0x23
-#define	ROSE_INTERRUPT_CONFIRMATION	0x27
-#define	ROSE_RR				0x01
-#define	ROSE_RNR			0x05
-#define	ROSE_REJ			0x09
-#define	ROSE_RESET_REQUEST		0x1B
-#define	ROSE_RESET_CONFIRMATION		0x1F
-#define	ROSE_REGISTRATION_REQUEST	0xF3
-#define	ROSE_REGISTRATION_CONFIRMATION	0xF7
-#define	ROSE_RESTART_REQUEST		0xFB
-#define	ROSE_RESTART_CONFIRMATION	0xFF
-#define	ROSE_DIAGNOSTIC			0xF1
-#define	ROSE_ILLEGAL			0xFD
-
-/* Define Link State constants. */
-
-enum {
-	ROSE_STATE_0,			/* Ready */
-	ROSE_STATE_1,			/* Awaiting Call Accepted */
-	ROSE_STATE_2,			/* Awaiting Clear Confirmation */
-	ROSE_STATE_3,			/* Data Transfer */
-	ROSE_STATE_4,			/* Awaiting Reset Confirmation */
-	ROSE_STATE_5			/* Deferred Call Acceptance */
-};
-
-#define ROSE_DEFAULT_T0			180000		/* Default T10 T20 value */
-#define ROSE_DEFAULT_T1			200000		/* Default T11 T21 value */
-#define ROSE_DEFAULT_T2			180000		/* Default T12 T22 value */
-#define	ROSE_DEFAULT_T3			180000		/* Default T13 T23 value */
-#define	ROSE_DEFAULT_HB			5000		/* Default Holdback value */
-#define	ROSE_DEFAULT_IDLE		0		/* No Activity Timeout - none */
-#define	ROSE_DEFAULT_ROUTING		1		/* Default routing flag */
-#define	ROSE_DEFAULT_FAIL_TIMEOUT	120000		/* Time until link considered usable */
-#define	ROSE_DEFAULT_MAXVC		50		/* Maximum number of VCs per neighbour */
-#define	ROSE_DEFAULT_WINDOW_SIZE	7		/* Default window size */
-
-#define ROSE_MODULUS 			8
-#define	ROSE_MAX_PACKET_SIZE		251		/* Maximum packet size */
-
-#define	ROSE_COND_ACK_PENDING		0x01
-#define	ROSE_COND_PEER_RX_BUSY		0x02
-#define	ROSE_COND_OWN_RX_BUSY		0x04
-
-#define	FAC_NATIONAL			0x00
-#define	FAC_CCITT			0x0F
-
-#define	FAC_NATIONAL_RAND		0x7F
-#define	FAC_NATIONAL_FLAGS		0x3F
-#define	FAC_NATIONAL_DEST_DIGI		0xE9
-#define	FAC_NATIONAL_SRC_DIGI		0xEB
-#define	FAC_NATIONAL_FAIL_CALL		0xED
-#define	FAC_NATIONAL_FAIL_ADD		0xEE
-#define	FAC_NATIONAL_DIGIS			0xEF
-
-#define	FAC_CCITT_DEST_NSAP		0xC9
-#define	FAC_CCITT_SRC_NSAP		0xCB
-
-struct rose_neigh {
-	struct rose_neigh	*next;
-	ax25_address		callsign;
-	ax25_digi		*digipeat;
-	ax25_cb			*ax25;
-	struct net_device		*dev;
-	unsigned short		count;
-	refcount_t		use;
-	unsigned int		number;
-	char			restarted;
-	char			dce_mode;
-	char			loopback;
-	struct sk_buff_head	queue;
-	struct timer_list	t0timer;
-	struct timer_list	ftimer;
-};
-
-struct rose_node {
-	struct rose_node	*next;
-	rose_address		address;
-	unsigned short		mask;
-	unsigned char		count;
-	char			loopback;
-	struct rose_neigh	*neighbour[3];
-};
-
-struct rose_route {
-	struct rose_route	*next;
-	unsigned int		lci1, lci2;
-	rose_address		src_addr, dest_addr;
-	ax25_address		src_call, dest_call;
-	struct rose_neigh 	*neigh1, *neigh2;
-	unsigned int		rand;
-};
-
-struct rose_sock {
-	struct sock		sock;
-	rose_address		source_addr,   dest_addr;
-	ax25_address		source_call,   dest_call;
-	unsigned char		source_ndigis, dest_ndigis;
-	ax25_address		source_digis[ROSE_MAX_DIGIS];
-	ax25_address		dest_digis[ROSE_MAX_DIGIS];
-	struct rose_neigh	*neighbour;
-	struct net_device	*device;
-	netdevice_tracker	dev_tracker;
-	unsigned int		lci, rand;
-	unsigned char		state, condition, qbitincl, defer;
-	unsigned char		cause, diagnostic;
-	unsigned short		vs, vr, va, vl;
-	unsigned long		t1, t2, t3, hb, idle;
-#ifdef M_BIT
-	unsigned short		fraglen;
-	struct sk_buff_head	frag_queue;
-#endif
-	struct sk_buff_head	ack_queue;
-	struct rose_facilities_struct facilities;
-	struct timer_list	timer;
-	struct timer_list	idletimer;
-};
-
-#define rose_sk(sk) ((struct rose_sock *)(sk))
-
-static inline void rose_neigh_hold(struct rose_neigh *rose_neigh)
-{
-	refcount_inc(&rose_neigh->use);
-}
-
-static inline void rose_neigh_put(struct rose_neigh *rose_neigh)
-{
-	if (refcount_dec_and_test(&rose_neigh->use)) {
-		if (rose_neigh->ax25)
-			ax25_cb_put(rose_neigh->ax25);
-		kfree(rose_neigh->digipeat);
-		kfree(rose_neigh);
-	}
-}
-
-/* af_rose.c */
-extern ax25_address rose_callsign;
-extern int  sysctl_rose_restart_request_timeout;
-extern int  sysctl_rose_call_request_timeout;
-extern int  sysctl_rose_reset_request_timeout;
-extern int  sysctl_rose_clear_request_timeout;
-extern int  sysctl_rose_no_activity_timeout;
-extern int  sysctl_rose_ack_hold_back_timeout;
-extern int  sysctl_rose_routing_control;
-extern int  sysctl_rose_link_fail_timeout;
-extern int  sysctl_rose_maximum_vcs;
-extern int  sysctl_rose_window_size;
-
-int rosecmp(const rose_address *, const rose_address *);
-int rosecmpm(const rose_address *, const rose_address *, unsigned short);
-char *rose2asc(char *buf, const rose_address *);
-struct sock *rose_find_socket(unsigned int, struct rose_neigh *);
-void rose_kill_by_neigh(struct rose_neigh *);
-unsigned int rose_new_lci(struct rose_neigh *);
-int rose_rx_call_request(struct sk_buff *, struct net_device *,
-			 struct rose_neigh *, unsigned int);
-void rose_destroy_socket(struct sock *);
-
-/* rose_dev.c */
-void rose_setup(struct net_device *);
-
-/* rose_in.c */
-int rose_process_rx_frame(struct sock *, struct sk_buff *);
-
-/* rose_link.c */
-void rose_start_ftimer(struct rose_neigh *);
-void rose_stop_ftimer(struct rose_neigh *);
-void rose_stop_t0timer(struct rose_neigh *);
-int rose_ftimer_running(struct rose_neigh *);
-void rose_link_rx_restart(struct sk_buff *, struct rose_neigh *,
-			  unsigned short);
-void rose_transmit_clear_request(struct rose_neigh *, unsigned int,
-				 unsigned char, unsigned char);
-void rose_transmit_link(struct sk_buff *, struct rose_neigh *);
-
-/* rose_loopback.c */
-void rose_loopback_init(void);
-void rose_loopback_clear(void);
-int rose_loopback_queue(struct sk_buff *, struct rose_neigh *);
-
-/* rose_out.c */
-void rose_kick(struct sock *);
-void rose_enquiry_response(struct sock *);
-
-/* rose_route.c */
-extern struct rose_neigh *rose_loopback_neigh;
-extern const struct seq_operations rose_neigh_seqops;
-extern const struct seq_operations rose_node_seqops;
-extern struct seq_operations rose_route_seqops;
-
-void rose_add_loopback_neigh(void);
-int __must_check rose_add_loopback_node(const rose_address *);
-void rose_del_loopback_node(const rose_address *);
-void rose_rt_device_down(struct net_device *);
-void rose_link_device_down(struct net_device *);
-struct net_device *rose_dev_first(void);
-struct net_device *rose_dev_get(rose_address *);
-struct rose_route *rose_route_free_lci(unsigned int, struct rose_neigh *);
-struct rose_neigh *rose_get_neigh(rose_address *, unsigned char *,
-				  unsigned char *, int);
-int rose_rt_ioctl(unsigned int, void __user *);
-void rose_link_failed(ax25_cb *, int);
-int rose_route_frame(struct sk_buff *, ax25_cb *);
-void rose_rt_free(void);
-
-/* rose_subr.c */
-void rose_clear_queues(struct sock *);
-void rose_frames_acked(struct sock *, unsigned short);
-void rose_requeue_frames(struct sock *);
-int rose_validate_nr(struct sock *, unsigned short);
-void rose_write_internal(struct sock *, int);
-int rose_decode(struct sk_buff *, int *, int *, int *, int *, int *);
-int rose_parse_facilities(unsigned char *, unsigned int,
-			  struct rose_facilities_struct *);
-void rose_disconnect(struct sock *, int, int, int);
-
-/* rose_timer.c */
-void rose_start_heartbeat(struct sock *);
-void rose_start_t1timer(struct sock *);
-void rose_start_t2timer(struct sock *);
-void rose_start_t3timer(struct sock *);
-void rose_start_hbtimer(struct sock *);
-void rose_start_idletimer(struct sock *);
-void rose_stop_heartbeat(struct sock *);
-void rose_stop_timer(struct sock *);
-void rose_stop_idletimer(struct sock *);
+#define _ROSE_H
 
-/* sysctl_net_rose.c */
-void rose_register_sysctl(void);
-void rose_unregister_sysctl(void);
+#define	ROSE_ADDR_LEN	5
 
 #endif
diff --git a/include/uapi/linux/baycom.h b/include/uapi/linux/baycom.h
deleted file mode 100644
index 478cb565ae52..000000000000
--- a/include/uapi/linux/baycom.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * The Linux BAYCOM driver for the Baycom serial 1200 baud modem
- * and the parallel 9600 baud modem
- * (C) 1997-1998 by Thomas Sailer, HB9JNX/AE4WA
- */
-
-#ifndef _BAYCOM_H
-#define _BAYCOM_H
-
-/* -------------------------------------------------------------------- */
-/*
- * structs for the IOCTL commands
- */
-
-struct baycom_debug_data {
-	unsigned long debug1;
-	unsigned long debug2;
-	long debug3;
-};
-
-struct baycom_ioctl {
-	int cmd;
-	union {
-		struct baycom_debug_data dbg;
-	} data;
-};
-
-/* -------------------------------------------------------------------- */
-
-/*
- * ioctl values change for baycom
- */
-#define BAYCOMCTL_GETDEBUG       0x92
-
-/* -------------------------------------------------------------------- */
-
-#endif /* _BAYCOM_H */
-
-/* --------------------------------------------------------------------- */
diff --git a/include/uapi/linux/hdlcdrv.h b/include/uapi/linux/hdlcdrv.h
deleted file mode 100644
index 9fe9499403a6..000000000000
--- a/include/uapi/linux/hdlcdrv.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * hdlcdrv.h  -- HDLC packet radio network driver.
- * The Linux soundcard driver for 1200 baud and 9600 baud packet radio
- * (C) 1996-1998 by Thomas Sailer, HB9JNX/AE4WA
- */
-
-#ifndef _UAPI_HDLCDRV_H
-#define _UAPI_HDLCDRV_H
-
-/* -------------------------------------------------------------------- */
-/*
- * structs for the IOCTL commands
- */
-
-struct hdlcdrv_params {
-	int iobase;
-	int irq;
-	int dma;
-	int dma2;
-	int seriobase;
-	int pariobase;
-	int midiiobase;
-};	
-
-struct hdlcdrv_channel_params {
-	int tx_delay;  /* the transmitter keyup delay in 10ms units */
-	int tx_tail;   /* the transmitter keyoff delay in 10ms units */
-	int slottime;  /* the slottime in 10ms; usually 10 = 100ms */
-	int ppersist;  /* the p-persistence 0..255 */
-	int fulldup;   /* some driver do not support full duplex, setting */
-	               /* this just makes them send even if DCD is on */
-};	
-
-struct hdlcdrv_old_channel_state {
-  	int ptt;
-  	int dcd;
-  	int ptt_keyed;
-};
-
-struct hdlcdrv_channel_state {
- 	int ptt;
- 	int dcd;
- 	int ptt_keyed;
- 	unsigned long tx_packets;
- 	unsigned long tx_errors;
- 	unsigned long rx_packets;
- 	unsigned long rx_errors;
-};
-
-struct hdlcdrv_ioctl {
-	int cmd;
-	union {
-		struct hdlcdrv_params mp;
-		struct hdlcdrv_channel_params cp;
-		struct hdlcdrv_channel_state cs;
-		struct hdlcdrv_old_channel_state ocs;
-		unsigned int calibrate;
-		unsigned char bits;
-		char modename[128];
-		char drivername[32];
-	} data;
-};
-
-/* -------------------------------------------------------------------- */
-
-/*
- * ioctl values
- */
-#define HDLCDRVCTL_GETMODEMPAR       0
-#define HDLCDRVCTL_SETMODEMPAR       1
-#define HDLCDRVCTL_MODEMPARMASK      2  /* not handled by hdlcdrv */
-#define HDLCDRVCTL_GETCHANNELPAR    10
-#define HDLCDRVCTL_SETCHANNELPAR    11
-#define HDLCDRVCTL_OLDGETSTAT       20
-#define HDLCDRVCTL_CALIBRATE        21
-#define HDLCDRVCTL_GETSTAT          22
-
-/*
- * these are mainly for debugging purposes
- */
-#define HDLCDRVCTL_GETSAMPLES       30
-#define HDLCDRVCTL_GETBITS          31
-
-/*
- * not handled by hdlcdrv, but by its depending drivers
- */
-#define HDLCDRVCTL_GETMODE          40
-#define HDLCDRVCTL_SETMODE          41
-#define HDLCDRVCTL_MODELIST         42
-#define HDLCDRVCTL_DRIVERNAME       43
-
-/*
- * mask of needed modem parameters, returned by HDLCDRVCTL_MODEMPARMASK
- */
-#define HDLCDRV_PARMASK_IOBASE      (1<<0)
-#define HDLCDRV_PARMASK_IRQ         (1<<1)
-#define HDLCDRV_PARMASK_DMA         (1<<2)
-#define HDLCDRV_PARMASK_DMA2        (1<<3)
-#define HDLCDRV_PARMASK_SERIOBASE   (1<<4)
-#define HDLCDRV_PARMASK_PARIOBASE   (1<<5)
-#define HDLCDRV_PARMASK_MIDIIOBASE  (1<<6)
-
-/* -------------------------------------------------------------------- */
-
-
-/* -------------------------------------------------------------------- */
-
-#endif /* _UAPI_HDLCDRV_H */
-
-/* -------------------------------------------------------------------- */
diff --git a/include/uapi/linux/netrom.h b/include/uapi/linux/netrom.h
deleted file mode 100644
index 7498ea3c3940..000000000000
--- a/include/uapi/linux/netrom.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * These are the public elements of the Linux kernel NET/ROM implementation.
- * For kernel AX.25 see the file ax25.h. This file requires ax25.h for the
- * definition of the ax25_address structure.
- */
-
-#ifndef	NETROM_KERNEL_H
-#define	NETROM_KERNEL_H
-
-#include <linux/ax25.h>
-
-#define NETROM_MTU	236
-
-#define NETROM_T1	1
-#define NETROM_T2	2
-#define NETROM_N2	3
-#define	NETROM_T4	6
-#define	NETROM_IDLE	7
-
-#define	SIOCNRDECOBS		(SIOCPROTOPRIVATE+2)
-
-struct nr_route_struct {
-#define	NETROM_NEIGH	0
-#define	NETROM_NODE	1
-	int		type;
-	ax25_address	callsign;
-	char		device[16];
-	unsigned int	quality;
-	char		mnemonic[7];
-	ax25_address	neighbour;
-	unsigned int	obs_count;
-	unsigned int	ndigis;
-	ax25_address	digipeaters[AX25_MAX_DIGIS];
-};
-
-#endif
diff --git a/include/uapi/linux/rose.h b/include/uapi/linux/rose.h
deleted file mode 100644
index 19aa4693c8fc..000000000000
--- a/include/uapi/linux/rose.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * These are the public elements of the Linux kernel Rose implementation.
- * For kernel AX.25 see the file ax25.h. This file requires ax25.h for the
- * definition of the ax25_address structure.
- */
-
-#ifndef	ROSE_KERNEL_H
-#define	ROSE_KERNEL_H
-
-#include <linux/socket.h>
-#include <linux/ax25.h>
-
-#define ROSE_MTU	251
-
-#define ROSE_MAX_DIGIS 6
-
-#define	ROSE_DEFER	1
-#define ROSE_T1		2
-#define	ROSE_T2		3
-#define	ROSE_T3		4
-#define	ROSE_IDLE	5
-#define	ROSE_QBITINCL	6
-#define	ROSE_HOLDBACK	7
-
-#define	SIOCRSGCAUSE		(SIOCPROTOPRIVATE+0)
-#define	SIOCRSSCAUSE		(SIOCPROTOPRIVATE+1)
-#define	SIOCRSL2CALL		(SIOCPROTOPRIVATE+2)
-#define	SIOCRSSL2CALL		(SIOCPROTOPRIVATE+2)
-#define	SIOCRSACCEPT		(SIOCPROTOPRIVATE+3)
-#define	SIOCRSCLRRT		(SIOCPROTOPRIVATE+4)
-#define	SIOCRSGL2CALL		(SIOCPROTOPRIVATE+5)
-#define	SIOCRSGFACILITIES	(SIOCPROTOPRIVATE+6)
-
-#define	ROSE_DTE_ORIGINATED	0x00
-#define	ROSE_NUMBER_BUSY	0x01
-#define	ROSE_INVALID_FACILITY	0x03
-#define	ROSE_NETWORK_CONGESTION	0x05
-#define	ROSE_OUT_OF_ORDER	0x09
-#define	ROSE_ACCESS_BARRED	0x0B
-#define	ROSE_NOT_OBTAINABLE	0x0D
-#define	ROSE_REMOTE_PROCEDURE	0x11
-#define	ROSE_LOCAL_PROCEDURE	0x13
-#define	ROSE_SHIP_ABSENT	0x39
-
-typedef struct {
-	char		rose_addr[5];
-} rose_address;
-
-struct sockaddr_rose {
-	__kernel_sa_family_t srose_family;
-	rose_address	srose_addr;
-	ax25_address	srose_call;
-	int		srose_ndigis;
-	ax25_address	srose_digi;
-};
-
-struct full_sockaddr_rose {
-	__kernel_sa_family_t srose_family;
-	rose_address	srose_addr;
-	ax25_address	srose_call;
-	unsigned int	srose_ndigis;
-	ax25_address	srose_digis[ROSE_MAX_DIGIS];
-};
-
-struct rose_route_struct {
-	rose_address	address;
-	unsigned short	mask;
-	ax25_address	neighbour;
-	char		device[16];
-	unsigned char	ndigis;
-	ax25_address	digipeaters[AX25_MAX_DIGIS];
-};
-
-struct rose_cause_struct {
-	unsigned char	cause;
-	unsigned char	diagnostic;
-};
-
-struct rose_facilities_struct {
-	rose_address	source_addr,   dest_addr;
-	ax25_address	source_call,   dest_call;
-	unsigned char	source_ndigis, dest_ndigis;
-	ax25_address	source_digis[ROSE_MAX_DIGIS];
-	ax25_address	dest_digis[ROSE_MAX_DIGIS];
-	unsigned int	rand;
-	rose_address	fail_addr;
-	ax25_address	fail_call;
-};
-
-#endif
diff --git a/include/uapi/linux/scc.h b/include/uapi/linux/scc.h
deleted file mode 100644
index 947edb17ce9d..000000000000
--- a/include/uapi/linux/scc.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/* $Id: scc.h,v 1.29 1997/04/02 14:56:45 jreuter Exp jreuter $ */
-
-#ifndef _UAPI_SCC_H
-#define _UAPI_SCC_H
-
-#include <linux/sockios.h>
-
-/* selection of hardware types */
-
-#define PA0HZP		0x00	/* hardware type for PA0HZP SCC card and compatible */
-#define EAGLE		0x01    /* hardware type for EAGLE card */
-#define PC100		0x02	/* hardware type for PC100 card */
-#define PRIMUS		0x04	/* hardware type for PRIMUS-PC (DG9BL) card */
-#define DRSI		0x08	/* hardware type for DRSI PC*Packet card */
-#define BAYCOM		0x10	/* hardware type for BayCom (U)SCC */
-
-/* DEV ioctl() commands */
-
-enum SCC_ioctl_cmds {
-	SIOCSCCRESERVED = SIOCDEVPRIVATE,
-	SIOCSCCCFG,
-	SIOCSCCINI,
-	SIOCSCCCHANINI,
-	SIOCSCCSMEM,
-	SIOCSCCGKISS,
-	SIOCSCCSKISS,
-	SIOCSCCGSTAT,
-	SIOCSCCCAL
-};
-
-/* Device parameter control (from WAMPES) */
-
-enum L1_params {
-	PARAM_DATA,
-	PARAM_TXDELAY,
-	PARAM_PERSIST,
-	PARAM_SLOTTIME,
-	PARAM_TXTAIL,
-	PARAM_FULLDUP,
-	PARAM_SOFTDCD,		/* was: PARAM_HW */
-	PARAM_MUTE,		/* ??? */
-	PARAM_DTR,
-	PARAM_RTS,
-	PARAM_SPEED,
-	PARAM_ENDDELAY,		/* ??? */
-	PARAM_GROUP,
-	PARAM_IDLE,
-	PARAM_MIN,
-	PARAM_MAXKEY,
-	PARAM_WAIT,
-	PARAM_MAXDEFER,
-	PARAM_TX,
-	PARAM_HWEVENT = 31,
-	PARAM_RETURN = 255	/* reset kiss mode */
-};
-
-/* fulldup parameter */
-
-enum FULLDUP_modes {
-	KISS_DUPLEX_HALF,	/* normal CSMA operation */
-	KISS_DUPLEX_FULL,	/* fullduplex, key down trx after transmission */
-	KISS_DUPLEX_LINK,	/* fullduplex, key down trx after 'idletime' sec */
-	KISS_DUPLEX_OPTIMA	/* fullduplex, let the protocol layer control the hw */
-};
-
-/* misc. parameters */
-
-#define TIMER_OFF	65535U	/* to switch off timers */
-#define NO_SUCH_PARAM	65534U	/* param not implemented */
-
-/* HWEVENT parameter */
-
-enum HWEVENT_opts {
-	HWEV_DCD_ON,
-	HWEV_DCD_OFF,
-	HWEV_ALL_SENT
-};
-
-/* channel grouping */
-
-#define RXGROUP		0100	/* if set, only tx when all channels clear */
-#define TXGROUP		0200	/* if set, don't transmit simultaneously */
-
-/* Tx/Rx clock sources */
-
-enum CLOCK_sources {
-	CLK_DPLL,	/* normal halfduplex operation */
-	CLK_EXTERNAL,	/* external clocking (G3RUH/DF9IC modems) */
-	CLK_DIVIDER,	/* Rx = DPLL, Tx = divider (fullduplex with */
-			/* modems without clock regeneration */
-	CLK_BRG		/* experimental fullduplex mode with DPLL/BRG for */
-			/* MODEMs without clock recovery */
-};
-
-/* Tx state */
-
-enum TX_state {
-	TXS_IDLE,	/* Transmitter off, no data pending */
-	TXS_BUSY,	/* waiting for permission to send / tailtime */
-	TXS_ACTIVE,	/* Transmitter on, sending data */
-	TXS_NEWFRAME,	/* reset CRC and send (next) frame */
-	TXS_IDLE2,	/* Transmitter on, no data pending */
-	TXS_WAIT,	/* Waiting for Mintime to expire */
-	TXS_TIMEOUT	/* We had a transmission timeout */
-};
-
-typedef unsigned long io_port;	/* type definition for an 'io port address' */
-
-/* SCC statistical information */
-
-struct scc_stat {
-        long rxints;            /* Receiver interrupts */
-        long txints;            /* Transmitter interrupts */
-        long exints;            /* External/status interrupts */
-        long spints;            /* Special receiver interrupts */
-
-        long txframes;          /* Packets sent */
-        long rxframes;          /* Number of Frames Actually Received */
-        long rxerrs;            /* CRC Errors */
-        long txerrs;		/* KISS errors */
-        
-	unsigned int nospace;	/* "Out of buffers" */
-	unsigned int rx_over;	/* Receiver Overruns */
-	unsigned int tx_under;	/* Transmitter Underruns */
-
-	unsigned int tx_state;	/* Transmitter state */
-	int tx_queued;		/* tx frames enqueued */
-
-	unsigned int maxqueue;	/* allocated tx_buffers */
-	unsigned int bufsize;	/* used buffersize */
-};
-
-struct scc_modem {
-	long speed;		/* Line speed, bps */
-	char clocksrc;		/* 0 = DPLL, 1 = external, 2 = divider */
-	char nrz;		/* NRZ instead of NRZI */	
-};
-
-struct scc_kiss_cmd {
-	int  	 command;	/* one of the KISS-Commands defined above */
-	unsigned param;		/* KISS-Param */
-};
-
-struct scc_hw_config {
-	io_port data_a;		/* data port channel A */
-	io_port ctrl_a;		/* control port channel A */
-	io_port data_b;		/* data port channel B */
-	io_port ctrl_b;		/* control port channel B */
-	io_port vector_latch;	/* INTACK-Latch (#) */
-	io_port	special;	/* special function port */
-
-	int	irq;		/* irq */
-	long	clock;		/* clock */
-	char	option;		/* command for function port */
-
-	char brand;		/* hardware type */
-	char escc;		/* use ext. features of a 8580/85180/85280 */
-};
-
-/* (#) only one INTACK latch allowed. */
-
-
-struct scc_mem_config {
-	unsigned int dummy;
-	unsigned int bufsize;
-};
-
-struct scc_calibrate {
-	unsigned int time;
-	unsigned char pattern;
-};
-
-#endif /* _UAPI_SCC_H */
diff --git a/net/Kconfig b/net/Kconfig
index 5c588dbcbdbd..bdea8aef7983 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -414,7 +414,6 @@ endmenu # Network testing
 
 endmenu # Networking options
 
-source "net/ax25/Kconfig"
 source "net/can/Kconfig"
 source "net/bluetooth/Kconfig"
 source "net/rxrpc/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index 98e182829eff..d2175fce0406 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -28,9 +28,6 @@ obj-y				+= dsa/
 obj-$(CONFIG_ATALK)		+= appletalk/
 obj-$(CONFIG_X25)		+= x25/
 obj-$(CONFIG_LAPB)		+= lapb/
-obj-$(CONFIG_NETROM)		+= netrom/
-obj-$(CONFIG_ROSE)		+= rose/
-obj-$(CONFIG_AX25)		+= ax25/
 obj-$(CONFIG_CAN)		+= can/
 obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
deleted file mode 100644
index 310169ce1488..000000000000
--- a/net/ax25/Kconfig
+++ /dev/null
@@ -1,108 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Amateur Radio protocols and AX.25 device configuration
-#
-
-menuconfig HAMRADIO
-	depends on NET
-	bool "Amateur Radio support"
-	help
-	  If you want to connect your Linux box to an amateur radio, answer Y
-	  here. You want to read <https://www.tapr.org/>
-	  and more specifically about AX.25 on Linux
-	  <https://linux-ax25.in-berlin.de>.
-
-	  Note that the answer to this question won't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about amateur radio.
-
-comment "Packet Radio protocols"
-	depends on HAMRADIO
-
-config AX25
-	tristate "Amateur Radio AX.25 Level 2 protocol"
-	depends on HAMRADIO
-	help
-	  This is the protocol used for computer communication over amateur
-	  radio. It is either used by itself for point-to-point links, or to
-	  carry other protocols such as tcp/ip. To use it, you need a device
-	  that connects your Linux box to your amateur radio. You can either
-	  use a low speed TNC (a Terminal Node Controller acts as a kind of
-	  modem connecting your computer's serial port to your radio's
-	  microphone input and speaker output) supporting the KISS protocol or
-	  one of the various SCC cards that are supported by the generic Z8530
-	  or the DMA SCC driver. Another option are the Baycom modem serial
-	  and parallel port hacks or the sound card modem (supported by their
-	  own drivers). If you say Y here, you also have to say Y to one of
-	  those drivers.
-
-	  Information about where to get supporting software for Linux amateur
-	  radio as well as information about how to configure an AX.25 port is
-	  contained in the AX25-HOWTO, available from
-	  <https://www.tldp.org/docs.html#howto>. You might also want to
-	  check out the file <file:Documentation/networking/ax25.rst> in the
-	  kernel source. More information about digital amateur radio in
-	  general is on the WWW at
-	  <https://www.tapr.org/>.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called ax25.
-
-config AX25_DAMA_SLAVE
-	bool "AX.25 DAMA Slave support"
-	default y
-	depends on AX25
-	help
-	  DAMA is a mechanism to prevent collisions when doing AX.25
-	  networking. A DAMA server (called "master") accepts incoming traffic
-	  from clients (called "slaves") and redistributes it to other slaves.
-	  If you say Y here, your Linux box will act as a DAMA slave; this is
-	  transparent in that you don't have to do any special DAMA
-	  configuration. Linux cannot yet act as a DAMA server.  This option
-	  only compiles DAMA slave support into the kernel.  It still needs to
-	  be enabled at runtime.  For more about DAMA see
-	  <https://linux-ax25.in-berlin.de>.  If unsure, say Y.
-
-config NETROM
-	tristate "Amateur Radio NET/ROM protocol"
-	depends on AX25
-	help
-	  NET/ROM is a network layer protocol on top of AX.25 useful for
-	  routing.
-
-	  A comprehensive listing of all the software for Linux amateur radio
-	  users as well as information about how to configure an AX.25 port is
-	  contained in the Linux Ham Wiki, available from
-	  <https://linux-ax25.in-berlin.de>. You also might want to check out
-	  the file <file:Documentation/networking/ax25.rst>. More information
-	  about digital amateur radio in general is on the WWW at
-	  <https://www.tapr.org/>.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called netrom.
-
-config ROSE
-	tristate "Amateur Radio X.25 PLP (Rose)"
-	depends on AX25
-	help
-	  The Packet Layer Protocol (PLP) is a way to route packets over X.25
-	  connections in general and amateur radio AX.25 connections in
-	  particular, essentially an alternative to NET/ROM.
-
-	  A comprehensive listing of all the software for Linux amateur radio
-	  users as well as information about how to configure an AX.25 port is
-	  contained in the Linux Ham Wiki, available from
-	  <https://linux-ax25.in-berlin.de>.  You also might want to check out
-	  the file <file:Documentation/networking/ax25.rst>. More information
-	  about digital amateur radio in general is on the WWW at
-	  <https://www.tapr.org/>.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called rose.
-
-menu "AX.25 network device drivers"
-	depends on HAMRADIO && AX25
-
-source "drivers/net/hamradio/Kconfig"
-
-endmenu
diff --git a/net/ax25/Makefile b/net/ax25/Makefile
deleted file mode 100644
index 2e53affc8568..000000000000
--- a/net/ax25/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the Linux AX.25 layer.
-#
-
-obj-$(CONFIG_AX25) += ax25.o
-
-ax25-y	 := ax25_addr.o ax25_dev.o ax25_iface.o ax25_in.o ax25_ip.o ax25_out.o \
-	    ax25_route.o ax25_std_in.o ax25_std_subr.o ax25_std_timer.o \
-	    ax25_subr.o ax25_timer.o ax25_uid.o af_ax25.o
-ax25-$(CONFIG_AX25_DAMA_SLAVE) += ax25_ds_in.o ax25_ds_subr.o ax25_ds_timer.o
-ax25-$(CONFIG_SYSCTL) += sysctl_net_ax25.o
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
deleted file mode 100644
index 9d236e64f5f5..000000000000
--- a/net/ax25/af_ax25.c
+++ /dev/null
@@ -1,2089 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) Darryl Miles G7LED (dlm@g7led.demon.co.uk)
- * Copyright (C) Steven Whitehouse GW7RRM (stevew@acm.org)
- * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
- * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de)
- * Copyright (C) Hans Alblas PE1AYX (hans@esrac.ele.tue.nl)
- * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
- */
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/sched/signal.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/termios.h>	/* For TIOCINQ/OUTQ */
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/sysctl.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <net/net_namespace.h>
-#include <net/tcp_states.h>
-#include <net/ip.h>
-#include <net/arp.h>
-
-
-
-HLIST_HEAD(ax25_list);
-DEFINE_SPINLOCK(ax25_list_lock);
-
-static const struct proto_ops ax25_proto_ops;
-
-static void ax25_free_sock(struct sock *sk)
-{
-	ax25_cb_put(sk_to_ax25(sk));
-}
-
-/*
- *	Socket removal during an interrupt is now safe.
- */
-static void ax25_cb_del(ax25_cb *ax25)
-{
-	spin_lock_bh(&ax25_list_lock);
-	if (!hlist_unhashed(&ax25->ax25_node)) {
-		hlist_del_init(&ax25->ax25_node);
-		ax25_cb_put(ax25);
-	}
-	spin_unlock_bh(&ax25_list_lock);
-}
-
-/*
- *	Kill all bound sockets on a dropped device.
- */
-static void ax25_kill_by_device(struct net_device *dev)
-{
-	ax25_dev *ax25_dev;
-	ax25_cb *s;
-	struct sock *sk;
-
-	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
-		return;
-	ax25_dev->device_up = false;
-
-	spin_lock_bh(&ax25_list_lock);
-again:
-	ax25_for_each(s, &ax25_list) {
-		if (s->ax25_dev == ax25_dev) {
-			sk = s->sk;
-			if (!sk) {
-				spin_unlock_bh(&ax25_list_lock);
-				ax25_disconnect(s, ENETUNREACH);
-				s->ax25_dev = NULL;
-				ax25_cb_del(s);
-				spin_lock_bh(&ax25_list_lock);
-				goto again;
-			}
-			sock_hold(sk);
-			spin_unlock_bh(&ax25_list_lock);
-			lock_sock(sk);
-			ax25_disconnect(s, ENETUNREACH);
-			s->ax25_dev = NULL;
-			if (sk->sk_socket) {
-				netdev_put(ax25_dev->dev,
-					   &s->dev_tracker);
-				ax25_dev_put(ax25_dev);
-			}
-			ax25_cb_del(s);
-			release_sock(sk);
-			spin_lock_bh(&ax25_list_lock);
-			sock_put(sk);
-			/* The entry could have been deleted from the
-			 * list meanwhile and thus the next pointer is
-			 * no longer valid.  Play it safe and restart
-			 * the scan.  Forward progress is ensured
-			 * because we set s->ax25_dev to NULL and we
-			 * are never passed a NULL 'dev' argument.
-			 */
-			goto again;
-		}
-	}
-	spin_unlock_bh(&ax25_list_lock);
-}
-
-/*
- *	Handle device status changes.
- */
-static int ax25_device_event(struct notifier_block *this, unsigned long event,
-			     void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
-
-	/* Reject non AX.25 devices */
-	if (dev->type != ARPHRD_AX25)
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case NETDEV_UP:
-		ax25_dev_device_up(dev);
-		break;
-	case NETDEV_DOWN:
-		ax25_kill_by_device(dev);
-		ax25_rt_device_down(dev);
-		ax25_dev_device_down(dev);
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_DONE;
-}
-
-/*
- *	Add a socket to the bound sockets list.
- */
-void ax25_cb_add(ax25_cb *ax25)
-{
-	spin_lock_bh(&ax25_list_lock);
-	ax25_cb_hold(ax25);
-	hlist_add_head(&ax25->ax25_node, &ax25_list);
-	spin_unlock_bh(&ax25_list_lock);
-}
-
-/*
- *	Find a socket that wants to accept the SABM we have just
- *	received.
- */
-struct sock *ax25_find_listener(ax25_address *addr, int digi,
-	struct net_device *dev, int type)
-{
-	ax25_cb *s;
-
-	spin_lock(&ax25_list_lock);
-	ax25_for_each(s, &ax25_list) {
-		if ((s->iamdigi && !digi) || (!s->iamdigi && digi))
-			continue;
-		if (s->sk && !ax25cmp(&s->source_addr, addr) &&
-		    s->sk->sk_type == type && s->sk->sk_state == TCP_LISTEN) {
-			/* If device is null we match any device */
-			if (s->ax25_dev == NULL || s->ax25_dev->dev == dev) {
-				sock_hold(s->sk);
-				spin_unlock(&ax25_list_lock);
-				return s->sk;
-			}
-		}
-	}
-	spin_unlock(&ax25_list_lock);
-
-	return NULL;
-}
-
-/*
- *	Find an AX.25 socket given both ends.
- */
-struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr,
-	int type)
-{
-	struct sock *sk = NULL;
-	ax25_cb *s;
-
-	spin_lock(&ax25_list_lock);
-	ax25_for_each(s, &ax25_list) {
-		if (s->sk && !ax25cmp(&s->source_addr, my_addr) &&
-		    !ax25cmp(&s->dest_addr, dest_addr) &&
-		    s->sk->sk_type == type) {
-			sk = s->sk;
-			sock_hold(sk);
-			break;
-		}
-	}
-
-	spin_unlock(&ax25_list_lock);
-
-	return sk;
-}
-
-/*
- *	Find an AX.25 control block given both ends. It will only pick up
- *	floating AX.25 control blocks or non Raw socket bound control blocks.
- */
-ax25_cb *ax25_find_cb(const ax25_address *src_addr, ax25_address *dest_addr,
-	ax25_digi *digi, struct net_device *dev)
-{
-	ax25_cb *s;
-
-	spin_lock_bh(&ax25_list_lock);
-	ax25_for_each(s, &ax25_list) {
-		if (s->sk && s->sk->sk_type != SOCK_SEQPACKET)
-			continue;
-		if (s->ax25_dev == NULL)
-			continue;
-		if (ax25cmp(&s->source_addr, src_addr) == 0 && ax25cmp(&s->dest_addr, dest_addr) == 0 && s->ax25_dev->dev == dev) {
-			if (digi != NULL && digi->ndigi != 0) {
-				if (s->digipeat == NULL)
-					continue;
-				if (ax25digicmp(s->digipeat, digi) != 0)
-					continue;
-			} else {
-				if (s->digipeat != NULL && s->digipeat->ndigi != 0)
-					continue;
-			}
-			ax25_cb_hold(s);
-			spin_unlock_bh(&ax25_list_lock);
-
-			return s;
-		}
-	}
-	spin_unlock_bh(&ax25_list_lock);
-
-	return NULL;
-}
-
-EXPORT_SYMBOL(ax25_find_cb);
-
-void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto)
-{
-	ax25_cb *s;
-	struct sk_buff *copy;
-
-	spin_lock(&ax25_list_lock);
-	ax25_for_each(s, &ax25_list) {
-		if (s->sk != NULL && ax25cmp(&s->source_addr, addr) == 0 &&
-		    s->sk->sk_type == SOCK_RAW &&
-		    s->sk->sk_protocol == proto &&
-		    s->ax25_dev->dev == skb->dev &&
-		    atomic_read(&s->sk->sk_rmem_alloc) <= s->sk->sk_rcvbuf) {
-			if ((copy = skb_clone(skb, GFP_ATOMIC)) == NULL)
-				continue;
-			if (sock_queue_rcv_skb(s->sk, copy) != 0)
-				kfree_skb(copy);
-		}
-	}
-	spin_unlock(&ax25_list_lock);
-}
-
-/*
- *	Deferred destroy.
- */
-void ax25_destroy_socket(ax25_cb *);
-
-/*
- *	Handler for deferred kills.
- */
-static void ax25_destroy_timer(struct timer_list *t)
-{
-	ax25_cb *ax25 = timer_container_of(ax25, t, dtimer);
-	struct sock *sk;
-
-	sk=ax25->sk;
-
-	bh_lock_sock(sk);
-	sock_hold(sk);
-	ax25_destroy_socket(ax25);
-	bh_unlock_sock(sk);
-	sock_put(sk);
-}
-
-/*
- *	This is called from user mode and the timers. Thus it protects itself
- *	against interrupt users but doesn't worry about being called during
- *	work. Once it is removed from the queue no interrupt or bottom half
- *	will touch it and we are (fairly 8-) ) safe.
- */
-void ax25_destroy_socket(ax25_cb *ax25)
-{
-	struct sk_buff *skb;
-
-	ax25_cb_del(ax25);
-
-	ax25_stop_heartbeat(ax25);
-	ax25_stop_t1timer(ax25);
-	ax25_stop_t2timer(ax25);
-	ax25_stop_t3timer(ax25);
-	ax25_stop_idletimer(ax25);
-
-	ax25_clear_queues(ax25);	/* Flush the queues */
-
-	if (ax25->sk != NULL) {
-		while ((skb = skb_dequeue(&ax25->sk->sk_receive_queue)) != NULL) {
-			if (skb->sk != ax25->sk) {
-				/* A pending connection */
-				ax25_cb *sax25 = sk_to_ax25(skb->sk);
-
-				/* Queue the unaccepted socket for death */
-				sock_orphan(skb->sk);
-
-				/* 9A4GL: hack to release unaccepted sockets */
-				skb->sk->sk_state = TCP_LISTEN;
-
-				ax25_start_heartbeat(sax25);
-				sax25->state = AX25_STATE_0;
-			}
-
-			kfree_skb(skb);
-		}
-		skb_queue_purge(&ax25->sk->sk_write_queue);
-	}
-
-	if (ax25->sk != NULL) {
-		if (sk_has_allocations(ax25->sk)) {
-			/* Defer: outstanding buffers */
-			timer_setup(&ax25->dtimer, ax25_destroy_timer, 0);
-			ax25->dtimer.expires  = jiffies + 2 * HZ;
-			add_timer(&ax25->dtimer);
-		} else {
-			struct sock *sk=ax25->sk;
-			ax25->sk=NULL;
-			sock_put(sk);
-		}
-	} else {
-		ax25_cb_put(ax25);
-	}
-}
-
-/*
- * dl1bke 960311: set parameters for existing AX.25 connections,
- *		  includes a KILL command to abort any connection.
- *		  VERY useful for debugging ;-)
- */
-static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
-{
-	struct ax25_ctl_struct ax25_ctl;
-	ax25_digi digi;
-	ax25_dev *ax25_dev;
-	ax25_cb *ax25;
-	unsigned int k;
-	int ret = 0;
-
-	if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl)))
-		return -EFAULT;
-
-	if (ax25_ctl.digi_count > AX25_MAX_DIGIS)
-		return -EINVAL;
-
-	if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL)
-		return -EINVAL;
-
-	ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr);
-	if (!ax25_dev)
-		return -ENODEV;
-
-	digi.ndigi = ax25_ctl.digi_count;
-	for (k = 0; k < digi.ndigi; k++)
-		digi.calls[k] = ax25_ctl.digi_addr[k];
-
-	ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev);
-	if (!ax25) {
-		ax25_dev_put(ax25_dev);
-		return -ENOTCONN;
-	}
-
-	switch (ax25_ctl.cmd) {
-	case AX25_KILL:
-		ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-#ifdef CONFIG_AX25_DAMA_SLAVE
-		if (ax25_dev->dama.slave && ax25->ax25_dev->values[AX25_VALUES_PROTOCOL] == AX25_PROTO_DAMA_SLAVE)
-			ax25_dama_off(ax25);
-#endif
-		ax25_disconnect(ax25, ENETRESET);
-		break;
-
-	case AX25_WINDOW:
-		if (ax25->modulus == AX25_MODULUS) {
-			if (ax25_ctl.arg < 1 || ax25_ctl.arg > 7)
-				goto einval_put;
-		} else {
-			if (ax25_ctl.arg < 1 || ax25_ctl.arg > 63)
-				goto einval_put;
-		}
-		ax25->window = ax25_ctl.arg;
-		break;
-
-	case AX25_T1:
-		if (ax25_ctl.arg < 1 || ax25_ctl.arg > ULONG_MAX / HZ)
-			goto einval_put;
-		ax25->rtt = (ax25_ctl.arg * HZ) / 2;
-		ax25->t1  = ax25_ctl.arg * HZ;
-		break;
-
-	case AX25_T2:
-		if (ax25_ctl.arg < 1 || ax25_ctl.arg > ULONG_MAX / HZ)
-			goto einval_put;
-		ax25->t2 = ax25_ctl.arg * HZ;
-		break;
-
-	case AX25_N2:
-		if (ax25_ctl.arg < 1 || ax25_ctl.arg > 31)
-			goto einval_put;
-		ax25->n2count = 0;
-		ax25->n2 = ax25_ctl.arg;
-		break;
-
-	case AX25_T3:
-		if (ax25_ctl.arg > ULONG_MAX / HZ)
-			goto einval_put;
-		ax25->t3 = ax25_ctl.arg * HZ;
-		break;
-
-	case AX25_IDLE:
-		if (ax25_ctl.arg > ULONG_MAX / (60 * HZ))
-			goto einval_put;
-
-		ax25->idle = ax25_ctl.arg * 60 * HZ;
-		break;
-
-	case AX25_PACLEN:
-		if (ax25_ctl.arg < 16 || ax25_ctl.arg > 65535)
-			goto einval_put;
-		ax25->paclen = ax25_ctl.arg;
-		break;
-
-	default:
-		goto einval_put;
-	  }
-
-out_put:
-	ax25_dev_put(ax25_dev);
-	ax25_cb_put(ax25);
-	return ret;
-
-einval_put:
-	ret = -EINVAL;
-	goto out_put;
-}
-
-static void ax25_fillin_cb_from_dev(ax25_cb *ax25, const ax25_dev *ax25_dev)
-{
-	ax25->rtt     = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T1]) / 2;
-	ax25->t1      = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T1]);
-	ax25->t2      = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T2]);
-	ax25->t3      = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T3]);
-	ax25->n2      = ax25_dev->values[AX25_VALUES_N2];
-	ax25->paclen  = ax25_dev->values[AX25_VALUES_PACLEN];
-	ax25->idle    = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_IDLE]);
-	ax25->backoff = ax25_dev->values[AX25_VALUES_BACKOFF];
-
-	if (ax25_dev->values[AX25_VALUES_AXDEFMODE]) {
-		ax25->modulus = AX25_EMODULUS;
-		ax25->window  = ax25_dev->values[AX25_VALUES_EWINDOW];
-	} else {
-		ax25->modulus = AX25_MODULUS;
-		ax25->window  = ax25_dev->values[AX25_VALUES_WINDOW];
-	}
-}
-
-/*
- *	Fill in a created AX.25 created control block with the default
- *	values for a particular device.
- */
-void ax25_fillin_cb(ax25_cb *ax25, ax25_dev *ax25_dev)
-{
-	ax25->ax25_dev = ax25_dev;
-
-	if (ax25->ax25_dev != NULL) {
-		ax25_fillin_cb_from_dev(ax25, ax25_dev);
-		return;
-	}
-
-	/*
-	 * No device, use kernel / AX.25 spec default values
-	 */
-	ax25->rtt     = msecs_to_jiffies(AX25_DEF_T1) / 2;
-	ax25->t1      = msecs_to_jiffies(AX25_DEF_T1);
-	ax25->t2      = msecs_to_jiffies(AX25_DEF_T2);
-	ax25->t3      = msecs_to_jiffies(AX25_DEF_T3);
-	ax25->n2      = AX25_DEF_N2;
-	ax25->paclen  = AX25_DEF_PACLEN;
-	ax25->idle    = msecs_to_jiffies(AX25_DEF_IDLE);
-	ax25->backoff = AX25_DEF_BACKOFF;
-
-	if (AX25_DEF_AXDEFMODE) {
-		ax25->modulus = AX25_EMODULUS;
-		ax25->window  = AX25_DEF_EWINDOW;
-	} else {
-		ax25->modulus = AX25_MODULUS;
-		ax25->window  = AX25_DEF_WINDOW;
-	}
-}
-
-/*
- * Create an empty AX.25 control block.
- */
-ax25_cb *ax25_create_cb(void)
-{
-	ax25_cb *ax25;
-
-	if ((ax25 = kzalloc_obj(*ax25, GFP_ATOMIC)) == NULL)
-		return NULL;
-
-	refcount_set(&ax25->refcount, 1);
-
-	skb_queue_head_init(&ax25->write_queue);
-	skb_queue_head_init(&ax25->frag_queue);
-	skb_queue_head_init(&ax25->ack_queue);
-	skb_queue_head_init(&ax25->reseq_queue);
-
-	ax25_setup_timers(ax25);
-
-	ax25_fillin_cb(ax25, NULL);
-
-	ax25->state = AX25_STATE_0;
-
-	return ax25;
-}
-
-/*
- *	Handling for system calls applied via the various interfaces to an
- *	AX25 socket object
- */
-
-static int ax25_setsockopt(struct socket *sock, int level, int optname,
-		sockptr_t optval, unsigned int optlen)
-{
-	struct sock *sk = sock->sk;
-	ax25_cb *ax25;
-	struct net_device *dev;
-	char devname[IFNAMSIZ];
-	unsigned int opt;
-	int res = 0;
-
-	if (level != SOL_AX25)
-		return -ENOPROTOOPT;
-
-	if (optlen < sizeof(unsigned int))
-		return -EINVAL;
-
-	if (copy_from_sockptr(&opt, optval, sizeof(unsigned int)))
-		return -EFAULT;
-
-	lock_sock(sk);
-	ax25 = sk_to_ax25(sk);
-
-	switch (optname) {
-	case AX25_WINDOW:
-		if (ax25->modulus == AX25_MODULUS) {
-			if (opt < 1 || opt > 7) {
-				res = -EINVAL;
-				break;
-			}
-		} else {
-			if (opt < 1 || opt > 63) {
-				res = -EINVAL;
-				break;
-			}
-		}
-		ax25->window = opt;
-		break;
-
-	case AX25_T1:
-		if (opt < 1 || opt > UINT_MAX / HZ) {
-			res = -EINVAL;
-			break;
-		}
-		ax25->rtt = (opt * HZ) >> 1;
-		ax25->t1  = opt * HZ;
-		break;
-
-	case AX25_T2:
-		if (opt < 1 || opt > UINT_MAX / HZ) {
-			res = -EINVAL;
-			break;
-		}
-		ax25->t2 = opt * HZ;
-		break;
-
-	case AX25_N2:
-		if (opt < 1 || opt > 31) {
-			res = -EINVAL;
-			break;
-		}
-		ax25->n2 = opt;
-		break;
-
-	case AX25_T3:
-		if (opt < 1 || opt > UINT_MAX / HZ) {
-			res = -EINVAL;
-			break;
-		}
-		ax25->t3 = opt * HZ;
-		break;
-
-	case AX25_IDLE:
-		if (opt > UINT_MAX / (60 * HZ)) {
-			res = -EINVAL;
-			break;
-		}
-		ax25->idle = opt * 60 * HZ;
-		break;
-
-	case AX25_BACKOFF:
-		if (opt > 2) {
-			res = -EINVAL;
-			break;
-		}
-		ax25->backoff = opt;
-		break;
-
-	case AX25_EXTSEQ:
-		ax25->modulus = opt ? AX25_EMODULUS : AX25_MODULUS;
-		break;
-
-	case AX25_PIDINCL:
-		ax25->pidincl = opt ? 1 : 0;
-		break;
-
-	case AX25_IAMDIGI:
-		ax25->iamdigi = opt ? 1 : 0;
-		break;
-
-	case AX25_PACLEN:
-		if (opt < 16 || opt > 65535) {
-			res = -EINVAL;
-			break;
-		}
-		ax25->paclen = opt;
-		break;
-
-	case SO_BINDTODEVICE:
-		if (optlen > IFNAMSIZ - 1)
-			optlen = IFNAMSIZ - 1;
-
-		memset(devname, 0, sizeof(devname));
-
-		if (copy_from_sockptr(devname, optval, optlen)) {
-			res = -EFAULT;
-			break;
-		}
-
-		if (sk->sk_type == SOCK_SEQPACKET &&
-		   (sock->state != SS_UNCONNECTED ||
-		    sk->sk_state == TCP_LISTEN)) {
-			res = -EADDRNOTAVAIL;
-			break;
-		}
-
-		rcu_read_lock();
-		dev = dev_get_by_name_rcu(&init_net, devname);
-		if (!dev) {
-			rcu_read_unlock();
-			res = -ENODEV;
-			break;
-		}
-
-		if (ax25->ax25_dev) {
-			if (dev == ax25->ax25_dev->dev) {
-				rcu_read_unlock();
-				break;
-			}
-			netdev_put(ax25->ax25_dev->dev, &ax25->dev_tracker);
-			ax25_dev_put(ax25->ax25_dev);
-		}
-
-		ax25->ax25_dev = ax25_dev_ax25dev(dev);
-		if (!ax25->ax25_dev) {
-			rcu_read_unlock();
-			res = -ENODEV;
-			break;
-		}
-		ax25_fillin_cb(ax25, ax25->ax25_dev);
-		netdev_hold(dev, &ax25->dev_tracker, GFP_ATOMIC);
-		ax25_dev_hold(ax25->ax25_dev);
-		rcu_read_unlock();
-		break;
-
-	default:
-		res = -ENOPROTOOPT;
-	}
-	release_sock(sk);
-
-	return res;
-}
-
-static int ax25_getsockopt(struct socket *sock, int level, int optname,
-	char __user *optval, int __user *optlen)
-{
-	struct sock *sk = sock->sk;
-	ax25_cb *ax25;
-	struct ax25_dev *ax25_dev;
-	char devname[IFNAMSIZ];
-	void *valptr;
-	int val = 0;
-	int maxlen, length;
-
-	if (level != SOL_AX25)
-		return -ENOPROTOOPT;
-
-	if (get_user(maxlen, optlen))
-		return -EFAULT;
-
-	if (maxlen < 1)
-		return -EFAULT;
-
-	valptr = &val;
-	length = min_t(unsigned int, maxlen, sizeof(int));
-
-	lock_sock(sk);
-	ax25 = sk_to_ax25(sk);
-
-	switch (optname) {
-	case AX25_WINDOW:
-		val = ax25->window;
-		break;
-
-	case AX25_T1:
-		val = ax25->t1 / HZ;
-		break;
-
-	case AX25_T2:
-		val = ax25->t2 / HZ;
-		break;
-
-	case AX25_N2:
-		val = ax25->n2;
-		break;
-
-	case AX25_T3:
-		val = ax25->t3 / HZ;
-		break;
-
-	case AX25_IDLE:
-		val = ax25->idle / (60 * HZ);
-		break;
-
-	case AX25_BACKOFF:
-		val = ax25->backoff;
-		break;
-
-	case AX25_EXTSEQ:
-		val = (ax25->modulus == AX25_EMODULUS);
-		break;
-
-	case AX25_PIDINCL:
-		val = ax25->pidincl;
-		break;
-
-	case AX25_IAMDIGI:
-		val = ax25->iamdigi;
-		break;
-
-	case AX25_PACLEN:
-		val = ax25->paclen;
-		break;
-
-	case SO_BINDTODEVICE:
-		ax25_dev = ax25->ax25_dev;
-
-		if (ax25_dev != NULL && ax25_dev->dev != NULL) {
-			strscpy(devname, ax25_dev->dev->name, sizeof(devname));
-			length = strlen(devname) + 1;
-		} else {
-			*devname = '\0';
-			length = 1;
-		}
-
-		valptr = devname;
-		break;
-
-	default:
-		release_sock(sk);
-		return -ENOPROTOOPT;
-	}
-	release_sock(sk);
-
-	if (put_user(length, optlen))
-		return -EFAULT;
-
-	return copy_to_user(optval, valptr, length) ? -EFAULT : 0;
-}
-
-static int ax25_listen(struct socket *sock, int backlog)
-{
-	struct sock *sk = sock->sk;
-	int res = 0;
-
-	lock_sock(sk);
-	if (sk->sk_type == SOCK_SEQPACKET && sk->sk_state != TCP_LISTEN) {
-		sk->sk_max_ack_backlog = backlog;
-		sk->sk_state           = TCP_LISTEN;
-		goto out;
-	}
-	res = -EOPNOTSUPP;
-
-out:
-	release_sock(sk);
-
-	return res;
-}
-
-/*
- * XXX: when creating ax25_sock we should update the .obj_size setting
- * below.
- */
-static struct proto ax25_proto = {
-	.name	  = "AX25",
-	.owner	  = THIS_MODULE,
-	.obj_size = sizeof(struct ax25_sock),
-};
-
-static int ax25_create(struct net *net, struct socket *sock, int protocol,
-		       int kern)
-{
-	struct sock *sk;
-	ax25_cb *ax25;
-
-	if (protocol < 0 || protocol > U8_MAX)
-		return -EINVAL;
-
-	if (!net_eq(net, &init_net))
-		return -EAFNOSUPPORT;
-
-	switch (sock->type) {
-	case SOCK_DGRAM:
-		if (protocol == 0 || protocol == PF_AX25)
-			protocol = AX25_P_TEXT;
-		break;
-
-	case SOCK_SEQPACKET:
-		switch (protocol) {
-		case 0:
-		case PF_AX25:	/* For CLX */
-			protocol = AX25_P_TEXT;
-			break;
-		case AX25_P_SEGMENT:
-#ifdef CONFIG_INET
-		case AX25_P_ARP:
-		case AX25_P_IP:
-#endif
-#ifdef CONFIG_NETROM
-		case AX25_P_NETROM:
-#endif
-#ifdef CONFIG_ROSE
-		case AX25_P_ROSE:
-#endif
-			return -ESOCKTNOSUPPORT;
-#ifdef CONFIG_NETROM_MODULE
-		case AX25_P_NETROM:
-			if (ax25_protocol_is_registered(AX25_P_NETROM))
-				return -ESOCKTNOSUPPORT;
-			break;
-#endif
-#ifdef CONFIG_ROSE_MODULE
-		case AX25_P_ROSE:
-			if (ax25_protocol_is_registered(AX25_P_ROSE))
-				return -ESOCKTNOSUPPORT;
-			break;
-#endif
-		default:
-			break;
-		}
-		break;
-
-	case SOCK_RAW:
-		if (!capable(CAP_NET_RAW))
-			return -EPERM;
-		break;
-	default:
-		return -ESOCKTNOSUPPORT;
-	}
-
-	sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, kern);
-	if (sk == NULL)
-		return -ENOMEM;
-
-	ax25 = ax25_sk(sk)->cb = ax25_create_cb();
-	if (!ax25) {
-		sk_free(sk);
-		return -ENOMEM;
-	}
-
-	sock_init_data(sock, sk);
-
-	sk->sk_destruct = ax25_free_sock;
-	sock->ops    = &ax25_proto_ops;
-	sk->sk_protocol = protocol;
-
-	ax25->sk    = sk;
-
-	return 0;
-}
-
-struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
-{
-	struct sock *sk;
-	ax25_cb *ax25, *oax25;
-
-	sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot, 0);
-	if (sk == NULL)
-		return NULL;
-
-	if ((ax25 = ax25_create_cb()) == NULL) {
-		sk_free(sk);
-		return NULL;
-	}
-
-	switch (osk->sk_type) {
-	case SOCK_DGRAM:
-		break;
-	case SOCK_SEQPACKET:
-		break;
-	default:
-		sk_free(sk);
-		ax25_cb_put(ax25);
-		return NULL;
-	}
-
-	sock_init_data(NULL, sk);
-
-	sk->sk_type     = osk->sk_type;
-	sk->sk_priority = READ_ONCE(osk->sk_priority);
-	sk->sk_protocol = osk->sk_protocol;
-	sk->sk_rcvbuf   = osk->sk_rcvbuf;
-	sk->sk_sndbuf   = osk->sk_sndbuf;
-	sk->sk_state    = TCP_ESTABLISHED;
-	sock_copy_flags(sk, osk);
-
-	oax25 = sk_to_ax25(osk);
-
-	ax25->modulus = oax25->modulus;
-	ax25->backoff = oax25->backoff;
-	ax25->pidincl = oax25->pidincl;
-	ax25->iamdigi = oax25->iamdigi;
-	ax25->rtt     = oax25->rtt;
-	ax25->t1      = oax25->t1;
-	ax25->t2      = oax25->t2;
-	ax25->t3      = oax25->t3;
-	ax25->n2      = oax25->n2;
-	ax25->idle    = oax25->idle;
-	ax25->paclen  = oax25->paclen;
-	ax25->window  = oax25->window;
-
-	ax25->ax25_dev    = ax25_dev;
-	ax25->source_addr = oax25->source_addr;
-
-	if (oax25->digipeat != NULL) {
-		ax25->digipeat = kmemdup(oax25->digipeat, sizeof(ax25_digi),
-					 GFP_ATOMIC);
-		if (ax25->digipeat == NULL) {
-			sk_free(sk);
-			ax25_cb_put(ax25);
-			return NULL;
-		}
-	}
-
-	ax25_sk(sk)->cb = ax25;
-	sk->sk_destruct = ax25_free_sock;
-	ax25->sk    = sk;
-
-	return sk;
-}
-
-static int ax25_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-	ax25_cb *ax25;
-	ax25_dev *ax25_dev;
-
-	if (sk == NULL)
-		return 0;
-
-	sock_hold(sk);
-	lock_sock(sk);
-	sock_orphan(sk);
-	ax25 = sk_to_ax25(sk);
-	ax25_dev = ax25->ax25_dev;
-
-	if (sk->sk_type == SOCK_SEQPACKET) {
-		switch (ax25->state) {
-		case AX25_STATE_0:
-			if (!sock_flag(ax25->sk, SOCK_DEAD)) {
-				release_sock(sk);
-				ax25_disconnect(ax25, 0);
-				lock_sock(sk);
-			}
-			ax25_destroy_socket(ax25);
-			break;
-
-		case AX25_STATE_1:
-		case AX25_STATE_2:
-			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-			release_sock(sk);
-			ax25_disconnect(ax25, 0);
-			lock_sock(sk);
-			if (!sock_flag(ax25->sk, SOCK_DESTROY))
-				ax25_destroy_socket(ax25);
-			break;
-
-		case AX25_STATE_3:
-		case AX25_STATE_4:
-			ax25_clear_queues(ax25);
-			ax25->n2count = 0;
-
-			switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
-			case AX25_PROTO_STD_SIMPLEX:
-			case AX25_PROTO_STD_DUPLEX:
-				ax25_send_control(ax25,
-						  AX25_DISC,
-						  AX25_POLLON,
-						  AX25_COMMAND);
-				ax25_stop_t2timer(ax25);
-				ax25_stop_t3timer(ax25);
-				ax25_stop_idletimer(ax25);
-				break;
-#ifdef CONFIG_AX25_DAMA_SLAVE
-			case AX25_PROTO_DAMA_SLAVE:
-				ax25_stop_t3timer(ax25);
-				ax25_stop_idletimer(ax25);
-				break;
-#endif
-			}
-			ax25_calculate_t1(ax25);
-			ax25_start_t1timer(ax25);
-			ax25->state = AX25_STATE_2;
-			sk->sk_state                = TCP_CLOSE;
-			sk->sk_shutdown            |= SEND_SHUTDOWN;
-			sk->sk_state_change(sk);
-			sock_set_flag(sk, SOCK_DESTROY);
-			break;
-
-		default:
-			break;
-		}
-	} else {
-		sk->sk_state     = TCP_CLOSE;
-		sk->sk_shutdown |= SEND_SHUTDOWN;
-		sk->sk_state_change(sk);
-		ax25_destroy_socket(ax25);
-	}
-	if (ax25_dev) {
-		if (!ax25_dev->device_up) {
-			timer_delete_sync(&ax25->timer);
-			timer_delete_sync(&ax25->t1timer);
-			timer_delete_sync(&ax25->t2timer);
-			timer_delete_sync(&ax25->t3timer);
-			timer_delete_sync(&ax25->idletimer);
-		}
-		netdev_put(ax25_dev->dev, &ax25->dev_tracker);
-		ax25_dev_put(ax25_dev);
-	}
-
-	sock->sk   = NULL;
-	release_sock(sk);
-	sock_put(sk);
-
-	return 0;
-}
-
-/*
- *	We support a funny extension here so you can (as root) give any callsign
- *	digipeated via a local address as source. This hack is obsolete now
- *	that we've implemented support for SO_BINDTODEVICE. It is however small
- *	and trivially backward compatible.
- */
-static int ax25_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len)
-{
-	struct sock *sk = sock->sk;
-	struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr;
-	ax25_dev *ax25_dev = NULL;
-	ax25_uid_assoc *user;
-	ax25_address call;
-	ax25_cb *ax25;
-	int err = 0;
-
-	if (addr_len != sizeof(struct sockaddr_ax25) &&
-	    addr_len != sizeof(struct full_sockaddr_ax25))
-		/* support for old structure may go away some time
-		 * ax25_bind(): uses old (6 digipeater) socket structure.
-		 */
-		if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) ||
-		    (addr_len > sizeof(struct full_sockaddr_ax25)))
-			return -EINVAL;
-
-	if (addr->fsa_ax25.sax25_family != AF_AX25)
-		return -EINVAL;
-
-	user = ax25_findbyuid(current_euid());
-	if (user) {
-		call = user->call;
-		ax25_uid_put(user);
-	} else {
-		if (ax25_uid_policy && !capable(CAP_NET_ADMIN))
-			return -EACCES;
-
-		call = addr->fsa_ax25.sax25_call;
-	}
-
-	lock_sock(sk);
-
-	ax25 = sk_to_ax25(sk);
-	if (!sock_flag(sk, SOCK_ZAPPED)) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	ax25->source_addr = call;
-
-	/*
-	 * User already set interface with SO_BINDTODEVICE
-	 */
-	if (ax25->ax25_dev != NULL)
-		goto done;
-
-	if (addr_len > sizeof(struct sockaddr_ax25) && addr->fsa_ax25.sax25_ndigis == 1) {
-		if (ax25cmp(&addr->fsa_digipeater[0], &null_ax25_address) != 0 &&
-		    (ax25_dev = ax25_addr_ax25dev(&addr->fsa_digipeater[0])) == NULL) {
-			err = -EADDRNOTAVAIL;
-			goto out;
-		}
-	} else {
-		if ((ax25_dev = ax25_addr_ax25dev(&addr->fsa_ax25.sax25_call)) == NULL) {
-			err = -EADDRNOTAVAIL;
-			goto out;
-		}
-	}
-
-	if (ax25_dev) {
-		ax25_fillin_cb(ax25, ax25_dev);
-		netdev_hold(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC);
-	}
-
-done:
-	ax25_cb_add(ax25);
-	sock_reset_flag(sk, SOCK_ZAPPED);
-
-out:
-	release_sock(sk);
-
-	return err;
-}
-
-/*
- *	FIXME: nonblock behaviour looks like it may have a bug.
- */
-static int __must_check ax25_connect(struct socket *sock,
-	struct sockaddr_unsized *uaddr, int addr_len, int flags)
-{
-	struct sock *sk = sock->sk;
-	ax25_cb *ax25 = sk_to_ax25(sk), *ax25t;
-	struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
-	ax25_digi *digi = NULL;
-	int ct = 0, err = 0;
-
-	/*
-	 * some sanity checks. code further down depends on this
-	 */
-
-	if (addr_len == sizeof(struct sockaddr_ax25))
-		/* support for this will go away in early 2.5.x
-		 * ax25_connect(): uses obsolete socket structure
-		 */
-		;
-	else if (addr_len != sizeof(struct full_sockaddr_ax25))
-		/* support for old structure may go away some time
-		 * ax25_connect(): uses old (6 digipeater) socket structure.
-		 */
-		if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) ||
-		    (addr_len > sizeof(struct full_sockaddr_ax25)))
-			return -EINVAL;
-
-
-	if (fsa->fsa_ax25.sax25_family != AF_AX25)
-		return -EINVAL;
-
-	lock_sock(sk);
-
-	/* deal with restarts */
-	if (sock->state == SS_CONNECTING) {
-		switch (sk->sk_state) {
-		case TCP_SYN_SENT: /* still trying */
-			err = -EINPROGRESS;
-			goto out_release;
-
-		case TCP_ESTABLISHED: /* connection established */
-			sock->state = SS_CONNECTED;
-			goto out_release;
-
-		case TCP_CLOSE: /* connection refused */
-			sock->state = SS_UNCONNECTED;
-			err = -ECONNREFUSED;
-			goto out_release;
-		}
-	}
-
-	if (sk->sk_state == TCP_ESTABLISHED && sk->sk_type == SOCK_SEQPACKET) {
-		err = -EISCONN;	/* No reconnect on a seqpacket socket */
-		goto out_release;
-	}
-
-	sk->sk_state   = TCP_CLOSE;
-	sock->state = SS_UNCONNECTED;
-
-	kfree(ax25->digipeat);
-	ax25->digipeat = NULL;
-
-	/*
-	 *	Handle digi-peaters to be used.
-	 */
-	if (addr_len > sizeof(struct sockaddr_ax25) &&
-	    fsa->fsa_ax25.sax25_ndigis != 0) {
-		/* Valid number of digipeaters ? */
-		if (fsa->fsa_ax25.sax25_ndigis < 1 ||
-		    fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS ||
-		    addr_len < sizeof(struct sockaddr_ax25) +
-		    sizeof(ax25_address) * fsa->fsa_ax25.sax25_ndigis) {
-			err = -EINVAL;
-			goto out_release;
-		}
-
-		if ((digi = kmalloc_obj(ax25_digi)) == NULL) {
-			err = -ENOBUFS;
-			goto out_release;
-		}
-
-		digi->ndigi      = fsa->fsa_ax25.sax25_ndigis;
-		digi->lastrepeat = -1;
-
-		while (ct < fsa->fsa_ax25.sax25_ndigis) {
-			if ((fsa->fsa_digipeater[ct].ax25_call[6] &
-			     AX25_HBIT) && ax25->iamdigi) {
-				digi->repeated[ct] = 1;
-				digi->lastrepeat   = ct;
-			} else {
-				digi->repeated[ct] = 0;
-			}
-			digi->calls[ct] = fsa->fsa_digipeater[ct];
-			ct++;
-		}
-	}
-
-	/* Must bind first - autobinding does not work. */
-	if (sock_flag(sk, SOCK_ZAPPED)) {
-		kfree(digi);
-		err = -EINVAL;
-		goto out_release;
-	}
-
-	/* Check to see if the device has been filled in, error if it hasn't. */
-	if (ax25->ax25_dev == NULL) {
-		kfree(digi);
-		err = -EHOSTUNREACH;
-		goto out_release;
-	}
-
-	if (sk->sk_type == SOCK_SEQPACKET &&
-	    (ax25t=ax25_find_cb(&ax25->source_addr, &fsa->fsa_ax25.sax25_call, digi,
-			 ax25->ax25_dev->dev))) {
-		kfree(digi);
-		err = -EADDRINUSE;		/* Already such a connection */
-		ax25_cb_put(ax25t);
-		goto out_release;
-	}
-
-	ax25->dest_addr = fsa->fsa_ax25.sax25_call;
-	ax25->digipeat  = digi;
-
-	/* First the easy one */
-	if (sk->sk_type != SOCK_SEQPACKET) {
-		sock->state = SS_CONNECTED;
-		sk->sk_state   = TCP_ESTABLISHED;
-		goto out_release;
-	}
-
-	/* Move to connecting socket, ax.25 lapb WAIT_UA.. */
-	sock->state        = SS_CONNECTING;
-	sk->sk_state          = TCP_SYN_SENT;
-
-	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
-	case AX25_PROTO_STD_SIMPLEX:
-	case AX25_PROTO_STD_DUPLEX:
-		ax25_std_establish_data_link(ax25);
-		break;
-
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	case AX25_PROTO_DAMA_SLAVE:
-		ax25->modulus = AX25_MODULUS;
-		ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
-		if (ax25->ax25_dev->dama.slave)
-			ax25_ds_establish_data_link(ax25);
-		else
-			ax25_std_establish_data_link(ax25);
-		break;
-#endif
-	}
-
-	ax25->state = AX25_STATE_1;
-
-	ax25_start_heartbeat(ax25);
-
-	/* Now the loop */
-	if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
-		err = -EINPROGRESS;
-		goto out_release;
-	}
-
-	if (sk->sk_state == TCP_SYN_SENT) {
-		DEFINE_WAIT(wait);
-
-		for (;;) {
-			prepare_to_wait(sk_sleep(sk), &wait,
-					TASK_INTERRUPTIBLE);
-			if (sk->sk_state != TCP_SYN_SENT)
-				break;
-			if (!signal_pending(current)) {
-				release_sock(sk);
-				schedule();
-				lock_sock(sk);
-				continue;
-			}
-			err = -ERESTARTSYS;
-			break;
-		}
-		finish_wait(sk_sleep(sk), &wait);
-
-		if (err)
-			goto out_release;
-	}
-
-	if (sk->sk_state != TCP_ESTABLISHED) {
-		/* Not in ABM, not in WAIT_UA -> failed */
-		sock->state = SS_UNCONNECTED;
-		err = sock_error(sk);	/* Always set at this point */
-		goto out_release;
-	}
-
-	sock->state = SS_CONNECTED;
-
-	err = 0;
-out_release:
-	release_sock(sk);
-
-	return err;
-}
-
-static int ax25_accept(struct socket *sock, struct socket *newsock,
-		       struct proto_accept_arg *arg)
-{
-	struct sk_buff *skb;
-	struct sock *newsk;
-	ax25_dev *ax25_dev;
-	DEFINE_WAIT(wait);
-	struct sock *sk;
-	ax25_cb *ax25;
-	int err = 0;
-
-	if (sock->state != SS_UNCONNECTED)
-		return -EINVAL;
-
-	if ((sk = sock->sk) == NULL)
-		return -EINVAL;
-
-	lock_sock(sk);
-	if (sk->sk_type != SOCK_SEQPACKET) {
-		err = -EOPNOTSUPP;
-		goto out;
-	}
-
-	if (sk->sk_state != TCP_LISTEN) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	/*
-	 *	The read queue this time is holding sockets ready to use
-	 *	hooked into the SABM we saved
-	 */
-	for (;;) {
-		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-		skb = skb_dequeue(&sk->sk_receive_queue);
-		if (skb)
-			break;
-
-		if (arg->flags & O_NONBLOCK) {
-			err = -EWOULDBLOCK;
-			break;
-		}
-		if (!signal_pending(current)) {
-			release_sock(sk);
-			schedule();
-			lock_sock(sk);
-			continue;
-		}
-		err = -ERESTARTSYS;
-		break;
-	}
-	finish_wait(sk_sleep(sk), &wait);
-
-	if (err)
-		goto out;
-
-	newsk		 = skb->sk;
-	sock_graft(newsk, newsock);
-
-	/* Now attach up the new socket */
-	kfree_skb(skb);
-	sk_acceptq_removed(sk);
-	newsock->state = SS_CONNECTED;
-	ax25 = sk_to_ax25(newsk);
-	ax25_dev = ax25->ax25_dev;
-	netdev_hold(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC);
-	ax25_dev_hold(ax25_dev);
-
-out:
-	release_sock(sk);
-
-	return err;
-}
-
-static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
-	int peer)
-{
-	struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
-	struct sock *sk = sock->sk;
-	unsigned char ndigi, i;
-	ax25_cb *ax25;
-	int err = 0;
-
-	memset(fsa, 0, sizeof(*fsa));
-	lock_sock(sk);
-	ax25 = sk_to_ax25(sk);
-
-	if (peer != 0) {
-		if (sk->sk_state != TCP_ESTABLISHED) {
-			err = -ENOTCONN;
-			goto out;
-		}
-
-		fsa->fsa_ax25.sax25_family = AF_AX25;
-		fsa->fsa_ax25.sax25_call   = ax25->dest_addr;
-
-		if (ax25->digipeat != NULL) {
-			ndigi = ax25->digipeat->ndigi;
-			fsa->fsa_ax25.sax25_ndigis = ndigi;
-			for (i = 0; i < ndigi; i++)
-				fsa->fsa_digipeater[i] =
-						ax25->digipeat->calls[i];
-		}
-	} else {
-		fsa->fsa_ax25.sax25_family = AF_AX25;
-		fsa->fsa_ax25.sax25_call   = ax25->source_addr;
-		fsa->fsa_ax25.sax25_ndigis = 1;
-		if (ax25->ax25_dev != NULL) {
-			memcpy(&fsa->fsa_digipeater[0],
-			       ax25->ax25_dev->dev->dev_addr, AX25_ADDR_LEN);
-		} else {
-			fsa->fsa_digipeater[0] = null_ax25_address;
-		}
-	}
-	err = sizeof (struct full_sockaddr_ax25);
-
-out:
-	release_sock(sk);
-
-	return err;
-}
-
-static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
-{
-	DECLARE_SOCKADDR(struct sockaddr_ax25 *, usax, msg->msg_name);
-	struct sock *sk = sock->sk;
-	struct sockaddr_ax25 sax;
-	struct sk_buff *skb;
-	ax25_digi dtmp, *dp;
-	ax25_cb *ax25;
-	size_t size;
-	int lv, err, addr_len = msg->msg_namelen;
-
-	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT))
-		return -EINVAL;
-
-	lock_sock(sk);
-	ax25 = sk_to_ax25(sk);
-
-	if (sock_flag(sk, SOCK_ZAPPED)) {
-		err = -EADDRNOTAVAIL;
-		goto out;
-	}
-
-	if (sk->sk_shutdown & SEND_SHUTDOWN) {
-		send_sig(SIGPIPE, current, 0);
-		err = -EPIPE;
-		goto out;
-	}
-
-	if (ax25->ax25_dev == NULL) {
-		err = -ENETUNREACH;
-		goto out;
-	}
-
-	if (len > ax25->ax25_dev->dev->mtu) {
-		err = -EMSGSIZE;
-		goto out;
-	}
-
-	if (usax != NULL) {
-		if (usax->sax25_family != AF_AX25) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		if (addr_len == sizeof(struct sockaddr_ax25))
-			/* ax25_sendmsg(): uses obsolete socket structure */
-			;
-		else if (addr_len != sizeof(struct full_sockaddr_ax25))
-			/* support for old structure may go away some time
-			 * ax25_sendmsg(): uses old (6 digipeater)
-			 * socket structure.
-			 */
-			if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) ||
-			    (addr_len > sizeof(struct full_sockaddr_ax25))) {
-				err = -EINVAL;
-				goto out;
-			}
-
-
-		if (addr_len > sizeof(struct sockaddr_ax25) && usax->sax25_ndigis != 0) {
-			int ct           = 0;
-			struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)usax;
-
-			/* Valid number of digipeaters ? */
-			if (usax->sax25_ndigis < 1 ||
-			    usax->sax25_ndigis > AX25_MAX_DIGIS ||
-			    addr_len < sizeof(struct sockaddr_ax25) +
-			    sizeof(ax25_address) * usax->sax25_ndigis) {
-				err = -EINVAL;
-				goto out;
-			}
-
-			dtmp.ndigi      = usax->sax25_ndigis;
-
-			while (ct < usax->sax25_ndigis) {
-				dtmp.repeated[ct] = 0;
-				dtmp.calls[ct]    = fsa->fsa_digipeater[ct];
-				ct++;
-			}
-
-			dtmp.lastrepeat = 0;
-		}
-
-		sax = *usax;
-		if (sk->sk_type == SOCK_SEQPACKET &&
-		    ax25cmp(&ax25->dest_addr, &sax.sax25_call)) {
-			err = -EISCONN;
-			goto out;
-		}
-		if (usax->sax25_ndigis == 0)
-			dp = NULL;
-		else
-			dp = &dtmp;
-	} else {
-		/*
-		 *	FIXME: 1003.1g - if the socket is like this because
-		 *	it has become closed (not started closed) and is VC
-		 *	we ought to SIGPIPE, EPIPE
-		 */
-		if (sk->sk_state != TCP_ESTABLISHED) {
-			err = -ENOTCONN;
-			goto out;
-		}
-		sax.sax25_family = AF_AX25;
-		sax.sax25_call   = ax25->dest_addr;
-		dp = ax25->digipeat;
-	}
-
-	/* Build a packet */
-	/* Assume the worst case */
-	size = len + ax25->ax25_dev->dev->hard_header_len;
-
-	skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT, &err);
-	if (skb == NULL)
-		goto out;
-
-	skb_reserve(skb, size - len);
-
-	/* User data follows immediately after the AX.25 data */
-	if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
-		err = -EFAULT;
-		kfree_skb(skb);
-		goto out;
-	}
-
-	skb_reset_network_header(skb);
-
-	/* Add the PID if one is not supplied by the user in the skb */
-	if (!ax25->pidincl)
-		*(u8 *)skb_push(skb, 1) = sk->sk_protocol;
-
-	if (sk->sk_type == SOCK_SEQPACKET) {
-		/* Connected mode sockets go via the LAPB machine */
-		if (sk->sk_state != TCP_ESTABLISHED) {
-			kfree_skb(skb);
-			err = -ENOTCONN;
-			goto out;
-		}
-
-		/* Shove it onto the queue and kick */
-		ax25_output(ax25, ax25->paclen, skb);
-
-		err = len;
-		goto out;
-	}
-
-	skb_push(skb, 1 + ax25_addr_size(dp));
-
-	/* Building AX.25 Header */
-
-	/* Build an AX.25 header */
-	lv = ax25_addr_build(skb->data, &ax25->source_addr, &sax.sax25_call,
-			     dp, AX25_COMMAND, AX25_MODULUS);
-
-	skb_set_transport_header(skb, lv);
-
-	*skb_transport_header(skb) = AX25_UI;
-
-	/* Datagram frames go straight out of the door as UI */
-	ax25_queue_xmit(skb, ax25->ax25_dev->dev);
-
-	err = len;
-
-out:
-	release_sock(sk);
-
-	return err;
-}
-
-static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
-			int flags)
-{
-	struct sock *sk = sock->sk;
-	struct sk_buff *skb, *last;
-	struct sk_buff_head *sk_queue;
-	int copied;
-	int err = 0;
-	int off = 0;
-	long timeo;
-
-	lock_sock(sk);
-	/*
-	 * 	This works for seqpacket too. The receiver has ordered the
-	 *	queue for us! We do one quick check first though
-	 */
-	if (sk->sk_type == SOCK_SEQPACKET && sk->sk_state != TCP_ESTABLISHED) {
-		err =  -ENOTCONN;
-		goto out;
-	}
-
-	/*  We need support for non-blocking reads. */
-	sk_queue = &sk->sk_receive_queue;
-	skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last);
-	/* If no packet is available, release_sock(sk) and try again. */
-	if (!skb) {
-		if (err != -EAGAIN)
-			goto out;
-		release_sock(sk);
-		timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-		while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err,
-							     &timeo, last)) {
-			skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off,
-						      &err, &last);
-			if (skb)
-				break;
-
-			if (err != -EAGAIN)
-				goto done;
-		}
-		if (!skb)
-			goto done;
-		lock_sock(sk);
-	}
-
-	if (!sk_to_ax25(sk)->pidincl)
-		skb_pull(skb, 1);		/* Remove PID */
-
-	skb_reset_transport_header(skb);
-	copied = skb->len;
-
-	if (copied > size) {
-		copied = size;
-		msg->msg_flags |= MSG_TRUNC;
-	}
-
-	skb_copy_datagram_msg(skb, 0, msg, copied);
-
-	if (msg->msg_name) {
-		ax25_digi digi;
-		ax25_address src;
-		const unsigned char *mac = skb_mac_header(skb);
-		DECLARE_SOCKADDR(struct sockaddr_ax25 *, sax, msg->msg_name);
-
-		memset(sax, 0, sizeof(struct full_sockaddr_ax25));
-		ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL,
-				&digi, NULL, NULL);
-		sax->sax25_family = AF_AX25;
-		/* We set this correctly, even though we may not let the
-		   application know the digi calls further down (because it
-		   did NOT ask to know them).  This could get political... **/
-		sax->sax25_ndigis = digi.ndigi;
-		sax->sax25_call   = src;
-
-		if (sax->sax25_ndigis != 0) {
-			int ct;
-			struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)sax;
-
-			for (ct = 0; ct < digi.ndigi; ct++)
-				fsa->fsa_digipeater[ct] = digi.calls[ct];
-		}
-		msg->msg_namelen = sizeof(struct full_sockaddr_ax25);
-	}
-
-	skb_free_datagram(sk, skb);
-	err = copied;
-
-out:
-	release_sock(sk);
-
-done:
-	return err;
-}
-
-static int ax25_shutdown(struct socket *sk, int how)
-{
-	/* FIXME - generate DM and RNR states */
-	return -EOPNOTSUPP;
-}
-
-static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
-	struct sock *sk = sock->sk;
-	void __user *argp = (void __user *)arg;
-	int res = 0;
-
-	lock_sock(sk);
-	switch (cmd) {
-	case TIOCOUTQ: {
-		long amount;
-
-		amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
-		if (amount < 0)
-			amount = 0;
-		res = put_user(amount, (int __user *)argp);
-		break;
-	}
-
-	case TIOCINQ: {
-		struct sk_buff *skb;
-		long amount = 0L;
-		/* These two are safe on a single CPU system as only user tasks fiddle here */
-		if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
-			amount = skb->len;
-		res = put_user(amount, (int __user *) argp);
-		break;
-	}
-
-	case SIOCAX25ADDUID:	/* Add a uid to the uid/call map table */
-	case SIOCAX25DELUID:	/* Delete a uid from the uid/call map table */
-	case SIOCAX25GETUID: {
-		struct sockaddr_ax25 sax25;
-		if (copy_from_user(&sax25, argp, sizeof(sax25))) {
-			res = -EFAULT;
-			break;
-		}
-		res = ax25_uid_ioctl(cmd, &sax25);
-		break;
-	}
-
-	case SIOCAX25NOUID: {	/* Set the default policy (default/bar) */
-		long amount;
-		if (!capable(CAP_NET_ADMIN)) {
-			res = -EPERM;
-			break;
-		}
-		if (get_user(amount, (long __user *)argp)) {
-			res = -EFAULT;
-			break;
-		}
-		if (amount < 0 || amount > AX25_NOUID_BLOCK) {
-			res = -EINVAL;
-			break;
-		}
-		ax25_uid_policy = amount;
-		res = 0;
-		break;
-	}
-
-	case SIOCADDRT:
-	case SIOCDELRT:
-	case SIOCAX25OPTRT:
-		if (!capable(CAP_NET_ADMIN)) {
-			res = -EPERM;
-			break;
-		}
-		res = ax25_rt_ioctl(cmd, argp);
-		break;
-
-	case SIOCAX25CTLCON:
-		if (!capable(CAP_NET_ADMIN)) {
-			res = -EPERM;
-			break;
-		}
-		res = ax25_ctl_ioctl(cmd, argp);
-		break;
-
-	case SIOCAX25GETINFO:
-	case SIOCAX25GETINFOOLD: {
-		ax25_cb *ax25 = sk_to_ax25(sk);
-		struct ax25_info_struct ax25_info;
-
-		ax25_info.t1        = ax25->t1   / HZ;
-		ax25_info.t2        = ax25->t2   / HZ;
-		ax25_info.t3        = ax25->t3   / HZ;
-		ax25_info.idle      = ax25->idle / (60 * HZ);
-		ax25_info.n2        = ax25->n2;
-		ax25_info.t1timer   = ax25_display_timer(&ax25->t1timer)   / HZ;
-		ax25_info.t2timer   = ax25_display_timer(&ax25->t2timer)   / HZ;
-		ax25_info.t3timer   = ax25_display_timer(&ax25->t3timer)   / HZ;
-		ax25_info.idletimer = ax25_display_timer(&ax25->idletimer) / (60 * HZ);
-		ax25_info.n2count   = ax25->n2count;
-		ax25_info.state     = ax25->state;
-		ax25_info.rcv_q     = sk_rmem_alloc_get(sk);
-		ax25_info.snd_q     = sk_wmem_alloc_get(sk);
-		ax25_info.vs        = ax25->vs;
-		ax25_info.vr        = ax25->vr;
-		ax25_info.va        = ax25->va;
-		ax25_info.vs_max    = ax25->vs; /* reserved */
-		ax25_info.paclen    = ax25->paclen;
-		ax25_info.window    = ax25->window;
-
-		/* old structure? */
-		if (cmd == SIOCAX25GETINFOOLD) {
-			static int warned = 0;
-			if (!warned) {
-				printk(KERN_INFO "%s uses old SIOCAX25GETINFO\n",
-					current->comm);
-				warned=1;
-			}
-
-			if (copy_to_user(argp, &ax25_info, sizeof(struct ax25_info_struct_deprecated))) {
-				res = -EFAULT;
-				break;
-			}
-		} else {
-			if (copy_to_user(argp, &ax25_info, sizeof(struct ax25_info_struct))) {
-				res = -EINVAL;
-				break;
-			}
-		}
-		res = 0;
-		break;
-	}
-
-	case SIOCAX25ADDFWD:
-	case SIOCAX25DELFWD: {
-		struct ax25_fwd_struct ax25_fwd;
-		if (!capable(CAP_NET_ADMIN)) {
-			res = -EPERM;
-			break;
-		}
-		if (copy_from_user(&ax25_fwd, argp, sizeof(ax25_fwd))) {
-			res = -EFAULT;
-			break;
-		}
-		res = ax25_fwd_ioctl(cmd, &ax25_fwd);
-		break;
-	}
-
-	case SIOCGIFADDR:
-	case SIOCSIFADDR:
-	case SIOCGIFDSTADDR:
-	case SIOCSIFDSTADDR:
-	case SIOCGIFBRDADDR:
-	case SIOCSIFBRDADDR:
-	case SIOCGIFNETMASK:
-	case SIOCSIFNETMASK:
-	case SIOCGIFMETRIC:
-	case SIOCSIFMETRIC:
-		res = -EINVAL;
-		break;
-
-	default:
-		res = -ENOIOCTLCMD;
-		break;
-	}
-	release_sock(sk);
-
-	return res;
-}
-
-#ifdef CONFIG_PROC_FS
-
-static void *ax25_info_start(struct seq_file *seq, loff_t *pos)
-	__acquires(ax25_list_lock)
-{
-	spin_lock_bh(&ax25_list_lock);
-	return seq_hlist_start(&ax25_list, *pos);
-}
-
-static void *ax25_info_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	return seq_hlist_next(v, &ax25_list, pos);
-}
-
-static void ax25_info_stop(struct seq_file *seq, void *v)
-	__releases(ax25_list_lock)
-{
-	spin_unlock_bh(&ax25_list_lock);
-}
-
-static int ax25_info_show(struct seq_file *seq, void *v)
-{
-	ax25_cb *ax25 = hlist_entry(v, struct ax25_cb, ax25_node);
-	char buf[11];
-	int k;
-
-
-	/*
-	 * New format:
-	 * magic dev src_addr dest_addr,digi1,digi2,.. st vs vr va t1 t1 t2 t2 t3 t3 idle idle n2 n2 rtt window paclen Snd-Q Rcv-Q inode
-	 */
-
-	seq_printf(seq, "%p %s %s%s ",
-		   ax25,
-		   ax25->ax25_dev == NULL? "???" : ax25->ax25_dev->dev->name,
-		   ax2asc(buf, &ax25->source_addr),
-		   ax25->iamdigi? "*":"");
-	seq_printf(seq, "%s", ax2asc(buf, &ax25->dest_addr));
-
-	for (k=0; (ax25->digipeat != NULL) && (k < ax25->digipeat->ndigi); k++) {
-		seq_printf(seq, ",%s%s",
-			   ax2asc(buf, &ax25->digipeat->calls[k]),
-			   ax25->digipeat->repeated[k]? "*":"");
-	}
-
-	seq_printf(seq, " %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %d %d",
-		   ax25->state,
-		   ax25->vs, ax25->vr, ax25->va,
-		   ax25_display_timer(&ax25->t1timer) / HZ, ax25->t1 / HZ,
-		   ax25_display_timer(&ax25->t2timer) / HZ, ax25->t2 / HZ,
-		   ax25_display_timer(&ax25->t3timer) / HZ, ax25->t3 / HZ,
-		   ax25_display_timer(&ax25->idletimer) / (60 * HZ),
-		   ax25->idle / (60 * HZ),
-		   ax25->n2count, ax25->n2,
-		   ax25->rtt / HZ,
-		   ax25->window,
-		   ax25->paclen);
-
-	if (ax25->sk != NULL) {
-		seq_printf(seq, " %d %d %llu\n",
-			   sk_wmem_alloc_get(ax25->sk),
-			   sk_rmem_alloc_get(ax25->sk),
-			   sock_i_ino(ax25->sk));
-	} else {
-		seq_puts(seq, " * * *\n");
-	}
-	return 0;
-}
-
-static const struct seq_operations ax25_info_seqops = {
-	.start = ax25_info_start,
-	.next = ax25_info_next,
-	.stop = ax25_info_stop,
-	.show = ax25_info_show,
-};
-#endif
-
-static const struct net_proto_family ax25_family_ops = {
-	.family =	PF_AX25,
-	.create =	ax25_create,
-	.owner	=	THIS_MODULE,
-};
-
-static const struct proto_ops ax25_proto_ops = {
-	.family		= PF_AX25,
-	.owner		= THIS_MODULE,
-	.release	= ax25_release,
-	.bind		= ax25_bind,
-	.connect	= ax25_connect,
-	.socketpair	= sock_no_socketpair,
-	.accept		= ax25_accept,
-	.getname	= ax25_getname,
-	.poll		= datagram_poll,
-	.ioctl		= ax25_ioctl,
-	.gettstamp	= sock_gettstamp,
-	.listen		= ax25_listen,
-	.shutdown	= ax25_shutdown,
-	.setsockopt	= ax25_setsockopt,
-	.getsockopt	= ax25_getsockopt,
-	.sendmsg	= ax25_sendmsg,
-	.recvmsg	= ax25_recvmsg,
-	.mmap		= sock_no_mmap,
-};
-
-/*
- *	Called by socket.c on kernel start up
- */
-static struct packet_type ax25_packet_type __read_mostly = {
-	.type	=	cpu_to_be16(ETH_P_AX25),
-	.func	=	ax25_kiss_rcv,
-};
-
-static struct notifier_block ax25_dev_notifier = {
-	.notifier_call = ax25_device_event,
-};
-
-static int __init ax25_init(void)
-{
-	int rc = proto_register(&ax25_proto, 0);
-
-	if (rc != 0)
-		goto out;
-
-	sock_register(&ax25_family_ops);
-	dev_add_pack(&ax25_packet_type);
-	register_netdevice_notifier(&ax25_dev_notifier);
-
-	proc_create_seq("ax25_route", 0444, init_net.proc_net, &ax25_rt_seqops);
-	proc_create_seq("ax25", 0444, init_net.proc_net, &ax25_info_seqops);
-	proc_create_seq("ax25_calls", 0444, init_net.proc_net,
-			&ax25_uid_seqops);
-out:
-	return rc;
-}
-module_init(ax25_init);
-
-
-MODULE_AUTHOR("Jonathan Naylor G4KLX <g4klx@g4klx.demon.co.uk>");
-MODULE_DESCRIPTION("The amateur radio AX.25 link layer protocol");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_AX25);
-
-static void __exit ax25_exit(void)
-{
-	remove_proc_entry("ax25_route", init_net.proc_net);
-	remove_proc_entry("ax25", init_net.proc_net);
-	remove_proc_entry("ax25_calls", init_net.proc_net);
-
-	unregister_netdevice_notifier(&ax25_dev_notifier);
-
-	dev_remove_pack(&ax25_packet_type);
-
-	sock_unregister(PF_AX25);
-	proto_unregister(&ax25_proto);
-
-	ax25_rt_free();
-	ax25_uid_free();
-	ax25_dev_free();
-}
-module_exit(ax25_exit);
diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c
deleted file mode 100644
index f68865a4d0ab..000000000000
--- a/net/ax25/ax25_addr.c
+++ /dev/null
@@ -1,303 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-
-/*
- * The default broadcast address of an interface is QST-0; the default address
- * is LINUX-1.  The null address is defined as a callsign of all spaces with
- * an SSID of zero.
- */
-
-const ax25_address ax25_bcast =
-	{{'Q' << 1, 'S' << 1, 'T' << 1, ' ' << 1, ' ' << 1, ' ' << 1, 0 << 1}};
-const ax25_address ax25_defaddr =
-	{{'L' << 1, 'I' << 1, 'N' << 1, 'U' << 1, 'X' << 1, ' ' << 1, 1 << 1}};
-const ax25_address null_ax25_address =
-	{{' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, 0 << 1}};
-
-EXPORT_SYMBOL_GPL(ax25_bcast);
-EXPORT_SYMBOL_GPL(ax25_defaddr);
-EXPORT_SYMBOL(null_ax25_address);
-
-/*
- *	ax25 -> ascii conversion
- */
-char *ax2asc(char *buf, const ax25_address *a)
-{
-	char c, *s;
-	int n;
-
-	for (n = 0, s = buf; n < 6; n++) {
-		c = (a->ax25_call[n] >> 1) & 0x7F;
-
-		if (c != ' ') *s++ = c;
-	}
-
-	*s++ = '-';
-
-	if ((n = ((a->ax25_call[6] >> 1) & 0x0F)) > 9) {
-		*s++ = '1';
-		n -= 10;
-	}
-
-	*s++ = n + '0';
-	*s++ = '\0';
-
-	if (*buf == '\0' || *buf == '-')
-	   return "*";
-
-	return buf;
-
-}
-
-EXPORT_SYMBOL(ax2asc);
-
-/*
- *	ascii -> ax25 conversion
- */
-void asc2ax(ax25_address *addr, const char *callsign)
-{
-	const char *s;
-	int n;
-
-	for (s = callsign, n = 0; n < 6; n++) {
-		if (*s != '\0' && *s != '-')
-			addr->ax25_call[n] = *s++;
-		else
-			addr->ax25_call[n] = ' ';
-		addr->ax25_call[n] <<= 1;
-		addr->ax25_call[n] &= 0xFE;
-	}
-
-	if (*s++ == '\0') {
-		addr->ax25_call[6] = 0x00;
-		return;
-	}
-
-	addr->ax25_call[6] = *s++ - '0';
-
-	if (*s != '\0') {
-		addr->ax25_call[6] *= 10;
-		addr->ax25_call[6] += *s++ - '0';
-	}
-
-	addr->ax25_call[6] <<= 1;
-	addr->ax25_call[6] &= 0x1E;
-}
-
-EXPORT_SYMBOL(asc2ax);
-
-/*
- *	Compare two ax.25 addresses
- */
-int ax25cmp(const ax25_address *a, const ax25_address *b)
-{
-	int ct = 0;
-
-	while (ct < 6) {
-		if ((a->ax25_call[ct] & 0xFE) != (b->ax25_call[ct] & 0xFE))	/* Clean off repeater bits */
-			return 1;
-		ct++;
-	}
-
-	if ((a->ax25_call[ct] & 0x1E) == (b->ax25_call[ct] & 0x1E))	/* SSID without control bit */
-		return 0;
-
-	return 2;			/* Partial match */
-}
-
-EXPORT_SYMBOL(ax25cmp);
-
-/*
- *	Compare two AX.25 digipeater paths.
- */
-int ax25digicmp(const ax25_digi *digi1, const ax25_digi *digi2)
-{
-	int i;
-
-	if (digi1->ndigi != digi2->ndigi)
-		return 1;
-
-	if (digi1->lastrepeat != digi2->lastrepeat)
-		return 1;
-
-	for (i = 0; i < digi1->ndigi; i++)
-		if (ax25cmp(&digi1->calls[i], &digi2->calls[i]) != 0)
-			return 1;
-
-	return 0;
-}
-
-/*
- *	Given an AX.25 address pull of to, from, digi list, command/response and the start of data
- *
- */
-const unsigned char *ax25_addr_parse(const unsigned char *buf, int len,
-	ax25_address *src, ax25_address *dest, ax25_digi *digi, int *flags,
-	int *dama)
-{
-	int d = 0;
-
-	if (len < 14) return NULL;
-
-	if (flags != NULL) {
-		*flags = 0;
-
-		if (buf[6] & AX25_CBIT)
-			*flags = AX25_COMMAND;
-		if (buf[13] & AX25_CBIT)
-			*flags = AX25_RESPONSE;
-	}
-
-	if (dama != NULL)
-		*dama = ~buf[13] & AX25_DAMA_FLAG;
-
-	/* Copy to, from */
-	if (dest != NULL)
-		memcpy(dest, buf + 0, AX25_ADDR_LEN);
-	if (src != NULL)
-		memcpy(src,  buf + 7, AX25_ADDR_LEN);
-
-	buf += 2 * AX25_ADDR_LEN;
-	len -= 2 * AX25_ADDR_LEN;
-
-	digi->lastrepeat = -1;
-	digi->ndigi      = 0;
-
-	while (!(buf[-1] & AX25_EBIT)) {
-		if (d >= AX25_MAX_DIGIS)
-			return NULL;
-		if (len < AX25_ADDR_LEN)
-			return NULL;
-
-		memcpy(&digi->calls[d], buf, AX25_ADDR_LEN);
-		digi->ndigi = d + 1;
-
-		if (buf[6] & AX25_HBIT) {
-			digi->repeated[d] = 1;
-			digi->lastrepeat  = d;
-		} else {
-			digi->repeated[d] = 0;
-		}
-
-		buf += AX25_ADDR_LEN;
-		len -= AX25_ADDR_LEN;
-		d++;
-	}
-
-	return buf;
-}
-
-/*
- *	Assemble an AX.25 header from the bits
- */
-int ax25_addr_build(unsigned char *buf, const ax25_address *src,
-	const ax25_address *dest, const ax25_digi *d, int flag, int modulus)
-{
-	int len = 0;
-	int ct  = 0;
-
-	memcpy(buf, dest, AX25_ADDR_LEN);
-	buf[6] &= ~(AX25_EBIT | AX25_CBIT);
-	buf[6] |= AX25_SSSID_SPARE;
-
-	if (flag == AX25_COMMAND) buf[6] |= AX25_CBIT;
-
-	buf += AX25_ADDR_LEN;
-	len += AX25_ADDR_LEN;
-
-	memcpy(buf, src, AX25_ADDR_LEN);
-	buf[6] &= ~(AX25_EBIT | AX25_CBIT);
-	buf[6] &= ~AX25_SSSID_SPARE;
-
-	if (modulus == AX25_MODULUS)
-		buf[6] |= AX25_SSSID_SPARE;
-	else
-		buf[6] |= AX25_ESSID_SPARE;
-
-	if (flag == AX25_RESPONSE) buf[6] |= AX25_CBIT;
-
-	/*
-	 *	Fast path the normal digiless path
-	 */
-	if (d == NULL || d->ndigi == 0) {
-		buf[6] |= AX25_EBIT;
-		return 2 * AX25_ADDR_LEN;
-	}
-
-	buf += AX25_ADDR_LEN;
-	len += AX25_ADDR_LEN;
-
-	while (ct < d->ndigi) {
-		memcpy(buf, &d->calls[ct], AX25_ADDR_LEN);
-
-		if (d->repeated[ct])
-			buf[6] |= AX25_HBIT;
-		else
-			buf[6] &= ~AX25_HBIT;
-
-		buf[6] &= ~AX25_EBIT;
-		buf[6] |= AX25_SSSID_SPARE;
-
-		buf += AX25_ADDR_LEN;
-		len += AX25_ADDR_LEN;
-		ct++;
-	}
-
-	buf[-1] |= AX25_EBIT;
-
-	return len;
-}
-
-int ax25_addr_size(const ax25_digi *dp)
-{
-	if (dp == NULL)
-		return 2 * AX25_ADDR_LEN;
-
-	return AX25_ADDR_LEN * (2 + dp->ndigi);
-}
-
-/*
- *	Reverse Digipeat List. May not pass both parameters as same struct
- */
-void ax25_digi_invert(const ax25_digi *in, ax25_digi *out)
-{
-	int ct;
-
-	out->ndigi      = in->ndigi;
-	out->lastrepeat = in->ndigi - in->lastrepeat - 2;
-
-	/* Invert the digipeaters */
-	for (ct = 0; ct < in->ndigi; ct++) {
-		out->calls[ct] = in->calls[in->ndigi - ct - 1];
-
-		if (ct <= out->lastrepeat) {
-			out->calls[ct].ax25_call[6] |= AX25_HBIT;
-			out->repeated[ct]            = 1;
-		} else {
-			out->calls[ct].ax25_call[6] &= ~AX25_HBIT;
-			out->repeated[ct]            = 0;
-		}
-	}
-}
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
deleted file mode 100644
index 3c0544fc4ad5..000000000000
--- a/net/ax25/ax25_dev.c
+++ /dev/null
@@ -1,200 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/slab.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/spinlock.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/list.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-
-static LIST_HEAD(ax25_dev_list);
-DEFINE_SPINLOCK(ax25_dev_lock);
-
-ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
-{
-	ax25_dev *ax25_dev, *res = NULL;
-
-	spin_lock_bh(&ax25_dev_lock);
-	list_for_each_entry(ax25_dev, &ax25_dev_list, list)
-		if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) {
-			res = ax25_dev;
-			ax25_dev_hold(ax25_dev);
-			break;
-		}
-	spin_unlock_bh(&ax25_dev_lock);
-
-	return res;
-}
-
-/*
- *	This is called when an interface is brought up. These are
- *	reasonable defaults.
- */
-void ax25_dev_device_up(struct net_device *dev)
-{
-	ax25_dev *ax25_dev;
-
-	ax25_dev = kzalloc_obj(*ax25_dev);
-	if (!ax25_dev) {
-		printk(KERN_ERR "AX.25: ax25_dev_device_up - out of memory\n");
-		return;
-	}
-
-	refcount_set(&ax25_dev->refcount, 1);
-	ax25_dev->dev     = dev;
-	netdev_hold(dev, &ax25_dev->dev_tracker, GFP_KERNEL);
-	ax25_dev->forward = NULL;
-	ax25_dev->device_up = true;
-
-	ax25_dev->values[AX25_VALUES_IPDEFMODE] = AX25_DEF_IPDEFMODE;
-	ax25_dev->values[AX25_VALUES_AXDEFMODE] = AX25_DEF_AXDEFMODE;
-	ax25_dev->values[AX25_VALUES_BACKOFF]   = AX25_DEF_BACKOFF;
-	ax25_dev->values[AX25_VALUES_CONMODE]   = AX25_DEF_CONMODE;
-	ax25_dev->values[AX25_VALUES_WINDOW]    = AX25_DEF_WINDOW;
-	ax25_dev->values[AX25_VALUES_EWINDOW]   = AX25_DEF_EWINDOW;
-	ax25_dev->values[AX25_VALUES_T1]        = AX25_DEF_T1;
-	ax25_dev->values[AX25_VALUES_T2]        = AX25_DEF_T2;
-	ax25_dev->values[AX25_VALUES_T3]        = AX25_DEF_T3;
-	ax25_dev->values[AX25_VALUES_IDLE]	= AX25_DEF_IDLE;
-	ax25_dev->values[AX25_VALUES_N2]        = AX25_DEF_N2;
-	ax25_dev->values[AX25_VALUES_PACLEN]	= AX25_DEF_PACLEN;
-	ax25_dev->values[AX25_VALUES_PROTOCOL]  = AX25_DEF_PROTOCOL;
-
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	ax25_dev->values[AX25_VALUES_DS_TIMEOUT]= AX25_DEF_DS_TIMEOUT;
-
-	ax25_ds_setup_timer(ax25_dev);
-#endif
-
-	spin_lock_bh(&ax25_dev_lock);
-	list_add(&ax25_dev->list, &ax25_dev_list);
-	rcu_assign_pointer(dev->ax25_ptr, ax25_dev);
-	spin_unlock_bh(&ax25_dev_lock);
-
-	ax25_register_dev_sysctl(ax25_dev);
-}
-
-void ax25_dev_device_down(struct net_device *dev)
-{
-	ax25_dev *s, *ax25_dev;
-
-	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
-		return;
-
-	ax25_unregister_dev_sysctl(ax25_dev);
-
-	spin_lock_bh(&ax25_dev_lock);
-
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	timer_shutdown_sync(&ax25_dev->dama.slave_timer);
-#endif
-
-	/*
-	 *	Remove any packet forwarding that points to this device.
-	 */
-	list_for_each_entry(s, &ax25_dev_list, list)
-		if (s->forward == dev)
-			s->forward = NULL;
-
-	list_for_each_entry(s, &ax25_dev_list, list) {
-		if (s == ax25_dev) {
-			list_del(&s->list);
-			break;
-		}
-	}
-
-	RCU_INIT_POINTER(dev->ax25_ptr, NULL);
-	spin_unlock_bh(&ax25_dev_lock);
-	netdev_put(dev, &ax25_dev->dev_tracker);
-	ax25_dev_put(ax25_dev);
-}
-
-int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
-{
-	ax25_dev *ax25_dev, *fwd_dev;
-
-	if ((ax25_dev = ax25_addr_ax25dev(&fwd->port_from)) == NULL)
-		return -EINVAL;
-
-	switch (cmd) {
-	case SIOCAX25ADDFWD:
-		fwd_dev = ax25_addr_ax25dev(&fwd->port_to);
-		if (!fwd_dev) {
-			ax25_dev_put(ax25_dev);
-			return -EINVAL;
-		}
-		if (ax25_dev->forward) {
-			ax25_dev_put(fwd_dev);
-			ax25_dev_put(ax25_dev);
-			return -EINVAL;
-		}
-		ax25_dev->forward = fwd_dev->dev;
-		ax25_dev_put(fwd_dev);
-		ax25_dev_put(ax25_dev);
-		break;
-
-	case SIOCAX25DELFWD:
-		if (!ax25_dev->forward) {
-			ax25_dev_put(ax25_dev);
-			return -EINVAL;
-		}
-		ax25_dev->forward = NULL;
-		ax25_dev_put(ax25_dev);
-		break;
-
-	default:
-		ax25_dev_put(ax25_dev);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-struct net_device *ax25_fwd_dev(struct net_device *dev)
-{
-	ax25_dev *ax25_dev;
-
-	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
-		return dev;
-
-	if (ax25_dev->forward == NULL)
-		return dev;
-
-	return ax25_dev->forward;
-}
-
-/*
- *	Free all memory associated with device structures.
- */
-void __exit ax25_dev_free(void)
-{
-	ax25_dev *s, *n;
-
-	spin_lock_bh(&ax25_dev_lock);
-	list_for_each_entry_safe(s, n, &ax25_dev_list, list) {
-		netdev_put(s->dev, &s->dev_tracker);
-		list_del(&s->list);
-		ax25_dev_put(s);
-	}
-	spin_unlock_bh(&ax25_dev_lock);
-}
diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c
deleted file mode 100644
index c62f8fb06189..000000000000
--- a/net/ax25/ax25_ds_in.c
+++ /dev/null
@@ -1,298 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-
-/*
- *	State machine for state 1, Awaiting Connection State.
- *	The handling of the timer(s) is in file ax25_ds_timer.c.
- *	Handling of state 0 and connection release is in ax25.c.
- */
-static int ax25_ds_state1_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int pf, int type)
-{
-	switch (frametype) {
-	case AX25_SABM:
-		ax25->modulus = AX25_MODULUS;
-		ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
-		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
-		break;
-
-	case AX25_SABME:
-		ax25->modulus = AX25_EMODULUS;
-		ax25->window  =  ax25->ax25_dev->values[AX25_VALUES_EWINDOW];
-		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
-		break;
-
-	case AX25_DISC:
-		ax25_send_control(ax25, AX25_DM, pf, AX25_RESPONSE);
-		break;
-
-	case AX25_UA:
-		ax25_calculate_rtt(ax25);
-		ax25_stop_t1timer(ax25);
-		ax25_start_t3timer(ax25);
-		ax25_start_idletimer(ax25);
-		ax25->vs      = 0;
-		ax25->va      = 0;
-		ax25->vr      = 0;
-		ax25->state   = AX25_STATE_3;
-		ax25->n2count = 0;
-		if (ax25->sk != NULL) {
-			bh_lock_sock(ax25->sk);
-			ax25->sk->sk_state = TCP_ESTABLISHED;
-			/*
-			 * For WAIT_SABM connections we will produce an accept
-			 * ready socket here
-			 */
-			if (!sock_flag(ax25->sk, SOCK_DEAD))
-				ax25->sk->sk_state_change(ax25->sk);
-			bh_unlock_sock(ax25->sk);
-		}
-		ax25_dama_on(ax25);
-
-		/* according to DK4EG's spec we are required to
-		 * send a RR RESPONSE FINAL NR=0.
-		 */
-
-		ax25_std_enquiry_response(ax25);
-		break;
-
-	case AX25_DM:
-		if (pf)
-			ax25_disconnect(ax25, ECONNREFUSED);
-		break;
-
-	default:
-		if (pf)
-			ax25_send_control(ax25, AX25_SABM, AX25_POLLON, AX25_COMMAND);
-		break;
-	}
-
-	return 0;
-}
-
-/*
- *	State machine for state 2, Awaiting Release State.
- *	The handling of the timer(s) is in file ax25_ds_timer.c
- *	Handling of state 0 and connection release is in ax25.c.
- */
-static int ax25_ds_state2_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int pf, int type)
-{
-	switch (frametype) {
-	case AX25_SABM:
-	case AX25_SABME:
-		ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-		ax25_dama_off(ax25);
-		break;
-
-	case AX25_DISC:
-		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
-		ax25_dama_off(ax25);
-		ax25_disconnect(ax25, 0);
-		break;
-
-	case AX25_DM:
-	case AX25_UA:
-		if (pf) {
-			ax25_dama_off(ax25);
-			ax25_disconnect(ax25, 0);
-		}
-		break;
-
-	case AX25_I:
-	case AX25_REJ:
-	case AX25_RNR:
-	case AX25_RR:
-		if (pf) {
-			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-			ax25_dama_off(ax25);
-		}
-		break;
-
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-/*
- *	State machine for state 3, Connected State.
- *	The handling of the timer(s) is in file ax25_timer.c
- *	Handling of state 0 and connection release is in ax25.c.
- */
-static int ax25_ds_state3_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int ns, int nr, int pf, int type)
-{
-	int queued = 0;
-
-	switch (frametype) {
-	case AX25_SABM:
-	case AX25_SABME:
-		if (frametype == AX25_SABM) {
-			ax25->modulus   = AX25_MODULUS;
-			ax25->window    = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
-		} else {
-			ax25->modulus   = AX25_EMODULUS;
-			ax25->window    = ax25->ax25_dev->values[AX25_VALUES_EWINDOW];
-		}
-		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
-		ax25_stop_t1timer(ax25);
-		ax25_start_t3timer(ax25);
-		ax25_start_idletimer(ax25);
-		ax25->condition = 0x00;
-		ax25->vs        = 0;
-		ax25->va        = 0;
-		ax25->vr        = 0;
-		ax25_requeue_frames(ax25);
-		ax25_dama_on(ax25);
-		break;
-
-	case AX25_DISC:
-		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
-		ax25_dama_off(ax25);
-		ax25_disconnect(ax25, 0);
-		break;
-
-	case AX25_DM:
-		ax25_dama_off(ax25);
-		ax25_disconnect(ax25, ECONNRESET);
-		break;
-
-	case AX25_RR:
-	case AX25_RNR:
-		if (frametype == AX25_RR)
-			ax25->condition &= ~AX25_COND_PEER_RX_BUSY;
-		else
-			ax25->condition |= AX25_COND_PEER_RX_BUSY;
-
-		if (ax25_validate_nr(ax25, nr)) {
-			if (ax25_check_iframes_acked(ax25, nr))
-				ax25->n2count=0;
-			if (type == AX25_COMMAND && pf)
-				ax25_ds_enquiry_response(ax25);
-		} else {
-			ax25_ds_nr_error_recovery(ax25);
-			ax25->state = AX25_STATE_1;
-		}
-		break;
-
-	case AX25_REJ:
-		ax25->condition &= ~AX25_COND_PEER_RX_BUSY;
-
-		if (ax25_validate_nr(ax25, nr)) {
-			if (ax25->va != nr)
-				ax25->n2count=0;
-
-			ax25_frames_acked(ax25, nr);
-			ax25_calculate_rtt(ax25);
-			ax25_stop_t1timer(ax25);
-			ax25_start_t3timer(ax25);
-			ax25_requeue_frames(ax25);
-
-			if (type == AX25_COMMAND && pf)
-				ax25_ds_enquiry_response(ax25);
-		} else {
-			ax25_ds_nr_error_recovery(ax25);
-			ax25->state = AX25_STATE_1;
-		}
-		break;
-
-	case AX25_I:
-		if (!ax25_validate_nr(ax25, nr)) {
-			ax25_ds_nr_error_recovery(ax25);
-			ax25->state = AX25_STATE_1;
-			break;
-		}
-		if (ax25->condition & AX25_COND_PEER_RX_BUSY) {
-			ax25_frames_acked(ax25, nr);
-			ax25->n2count = 0;
-		} else {
-			if (ax25_check_iframes_acked(ax25, nr))
-				ax25->n2count = 0;
-		}
-		if (ax25->condition & AX25_COND_OWN_RX_BUSY) {
-			if (pf) ax25_ds_enquiry_response(ax25);
-			break;
-		}
-		if (ns == ax25->vr) {
-			ax25->vr = (ax25->vr + 1) % ax25->modulus;
-			queued = ax25_rx_iframe(ax25, skb);
-			if (ax25->condition & AX25_COND_OWN_RX_BUSY)
-				ax25->vr = ns;	/* ax25->vr - 1 */
-			ax25->condition &= ~AX25_COND_REJECT;
-			if (pf) {
-				ax25_ds_enquiry_response(ax25);
-			} else {
-				if (!(ax25->condition & AX25_COND_ACK_PENDING)) {
-					ax25->condition |= AX25_COND_ACK_PENDING;
-					ax25_start_t2timer(ax25);
-				}
-			}
-		} else {
-			if (ax25->condition & AX25_COND_REJECT) {
-				if (pf) ax25_ds_enquiry_response(ax25);
-			} else {
-				ax25->condition |= AX25_COND_REJECT;
-				ax25_ds_enquiry_response(ax25);
-				ax25->condition &= ~AX25_COND_ACK_PENDING;
-			}
-		}
-		break;
-
-	case AX25_FRMR:
-	case AX25_ILLEGAL:
-		ax25_ds_establish_data_link(ax25);
-		ax25->state = AX25_STATE_1;
-		break;
-
-	default:
-		break;
-	}
-
-	return queued;
-}
-
-/*
- *	Higher level upcall for a LAPB frame
- */
-int ax25_ds_frame_in(ax25_cb *ax25, struct sk_buff *skb, int type)
-{
-	int queued = 0, frametype, ns, nr, pf;
-
-	frametype = ax25_decode(ax25, skb, &ns, &nr, &pf);
-
-	switch (ax25->state) {
-	case AX25_STATE_1:
-		queued = ax25_ds_state1_machine(ax25, skb, frametype, pf, type);
-		break;
-	case AX25_STATE_2:
-		queued = ax25_ds_state2_machine(ax25, skb, frametype, pf, type);
-		break;
-	case AX25_STATE_3:
-		queued = ax25_ds_state3_machine(ax25, skb, frametype, ns, nr, pf, type);
-		break;
-	}
-
-	return queued;
-}
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
deleted file mode 100644
index f00e27df3c76..000000000000
--- a/net/ax25/ax25_ds_subr.c
+++ /dev/null
@@ -1,204 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/spinlock.h>
-#include <linux/net.h>
-#include <linux/gfp.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-
-void ax25_ds_nr_error_recovery(ax25_cb *ax25)
-{
-	ax25_ds_establish_data_link(ax25);
-}
-
-/*
- *	dl1bke 960114: transmit I frames on DAMA poll
- */
-void ax25_ds_enquiry_response(ax25_cb *ax25)
-{
-	ax25_cb *ax25o;
-
-	/* Please note that neither DK4EG's nor DG2FEF's
-	 * DAMA spec mention the following behaviour as seen
-	 * with TheFirmware:
-	 *
-	 * 	DB0ACH->DL1BKE <RR C P R0> [DAMA]
-	 *	DL1BKE->DB0ACH <I NR=0 NS=0>
-	 *	DL1BKE-7->DB0PRA-6 DB0ACH <I C S3 R5>
-	 *	DL1BKE->DB0ACH <RR R F R0>
-	 *
-	 * The Flexnet DAMA Master implementation apparently
-	 * insists on the "proper" AX.25 behaviour:
-	 *
-	 * 	DB0ACH->DL1BKE <RR C P R0> [DAMA]
-	 *	DL1BKE->DB0ACH <RR R F R0>
-	 *	DL1BKE->DB0ACH <I NR=0 NS=0>
-	 *	DL1BKE-7->DB0PRA-6 DB0ACH <I C S3 R5>
-	 *
-	 * Flexnet refuses to send us *any* I frame if we send
-	 * a REJ in case AX25_COND_REJECT is set. It is superfluous in
-	 * this mode anyway (a RR or RNR invokes the retransmission).
-	 * Is this a Flexnet bug?
-	 */
-
-	ax25_std_enquiry_response(ax25);
-
-	if (!(ax25->condition & AX25_COND_PEER_RX_BUSY)) {
-		ax25_requeue_frames(ax25);
-		ax25_kick(ax25);
-	}
-
-	if (ax25->state == AX25_STATE_1 || ax25->state == AX25_STATE_2 || skb_peek(&ax25->ack_queue) != NULL)
-		ax25_ds_t1_timeout(ax25);
-	else
-		ax25->n2count = 0;
-
-	ax25_start_t3timer(ax25);
-	ax25_ds_set_timer(ax25->ax25_dev);
-
-	spin_lock(&ax25_list_lock);
-	ax25_for_each(ax25o, &ax25_list) {
-		if (ax25o == ax25)
-			continue;
-
-		if (ax25o->ax25_dev != ax25->ax25_dev)
-			continue;
-
-		if (ax25o->state == AX25_STATE_1 || ax25o->state == AX25_STATE_2) {
-			ax25_ds_t1_timeout(ax25o);
-			continue;
-		}
-
-		if (!(ax25o->condition & AX25_COND_PEER_RX_BUSY) && ax25o->state == AX25_STATE_3) {
-			ax25_requeue_frames(ax25o);
-			ax25_kick(ax25o);
-		}
-
-		if (ax25o->state == AX25_STATE_1 || ax25o->state == AX25_STATE_2 || skb_peek(&ax25o->ack_queue) != NULL)
-			ax25_ds_t1_timeout(ax25o);
-
-		/* do not start T3 for listening sockets (tnx DD8NE) */
-
-		if (ax25o->state != AX25_STATE_0)
-			ax25_start_t3timer(ax25o);
-	}
-	spin_unlock(&ax25_list_lock);
-}
-
-void ax25_ds_establish_data_link(ax25_cb *ax25)
-{
-	ax25->condition &= AX25_COND_DAMA_MODE;
-	ax25->n2count    = 0;
-	ax25_calculate_t1(ax25);
-	ax25_start_t1timer(ax25);
-	ax25_stop_t2timer(ax25);
-	ax25_start_t3timer(ax25);
-}
-
-/*
- *	:::FIXME:::
- *	This is a kludge. Not all drivers recognize kiss commands.
- *	We need a driver level  request to switch duplex mode, that does
- *	either SCC changing, PI config or KISS as required. Currently
- *	this request isn't reliable.
- */
-static void ax25_kiss_cmd(ax25_dev *ax25_dev, unsigned char cmd, unsigned char param)
-{
-	struct sk_buff *skb;
-	unsigned char *p;
-
-	if (ax25_dev->dev == NULL)
-		return;
-
-	if ((skb = alloc_skb(2, GFP_ATOMIC)) == NULL)
-		return;
-
-	skb_reset_network_header(skb);
-	p = skb_put(skb, 2);
-
-	*p++ = cmd;
-	*p++ = param;
-
-	skb->protocol = ax25_type_trans(skb, ax25_dev->dev);
-
-	dev_queue_xmit(skb);
-}
-
-/*
- *	A nasty problem arises if we count the number of DAMA connections
- *	wrong, especially when connections on the device already existed
- *	and our network node (or the sysop) decides to turn on DAMA Master
- *	mode. We thus flag the 'real' slave connections with
- *	ax25->dama_slave=1 and look on every disconnect if still slave
- *	connections exist.
- */
-static int ax25_check_dama_slave(ax25_dev *ax25_dev)
-{
-	ax25_cb *ax25;
-	int res = 0;
-
-	spin_lock(&ax25_list_lock);
-	ax25_for_each(ax25, &ax25_list)
-		if (ax25->ax25_dev == ax25_dev && (ax25->condition & AX25_COND_DAMA_MODE) && ax25->state > AX25_STATE_1) {
-			res = 1;
-			break;
-		}
-	spin_unlock(&ax25_list_lock);
-
-	return res;
-}
-
-static void ax25_dev_dama_on(ax25_dev *ax25_dev)
-{
-	if (ax25_dev == NULL)
-		return;
-
-	if (ax25_dev->dama.slave == 0)
-		ax25_kiss_cmd(ax25_dev, 5, 1);
-
-	ax25_dev->dama.slave = 1;
-	ax25_ds_set_timer(ax25_dev);
-}
-
-void ax25_dev_dama_off(ax25_dev *ax25_dev)
-{
-	if (ax25_dev == NULL)
-		return;
-
-	if (ax25_dev->dama.slave && !ax25_check_dama_slave(ax25_dev)) {
-		ax25_kiss_cmd(ax25_dev, 5, 0);
-		ax25_dev->dama.slave = 0;
-		ax25_ds_del_timer(ax25_dev);
-	}
-}
-
-void ax25_dama_on(ax25_cb *ax25)
-{
-	ax25_dev_dama_on(ax25->ax25_dev);
-	ax25->condition |= AX25_COND_DAMA_MODE;
-}
-
-void ax25_dama_off(ax25_cb *ax25)
-{
-	ax25->condition &= ~AX25_COND_DAMA_MODE;
-	ax25_dev_dama_off(ax25->ax25_dev);
-}
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
deleted file mode 100644
index 0c9e7775aa54..000000000000
--- a/net/ax25/ax25_ds_timer.c
+++ /dev/null
@@ -1,235 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/spinlock.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <net/tcp_states.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-
-static void ax25_ds_timeout(struct timer_list *);
-
-/*
- *	Add DAMA slave timeout timer to timer list.
- *	Unlike the connection based timers the timeout function gets
- *	triggered every second. Please note that NET_AX25_DAMA_SLAVE_TIMEOUT
- *	(aka /proc/sys/net/ax25/{dev}/dama_slave_timeout) is still in
- *	1/10th of a second.
- */
-
-void ax25_ds_setup_timer(ax25_dev *ax25_dev)
-{
-	timer_setup(&ax25_dev->dama.slave_timer, ax25_ds_timeout, 0);
-}
-
-void ax25_ds_del_timer(ax25_dev *ax25_dev)
-{
-	if (ax25_dev)
-		timer_delete(&ax25_dev->dama.slave_timer);
-}
-
-void ax25_ds_set_timer(ax25_dev *ax25_dev)
-{
-	if (ax25_dev == NULL)		/* paranoia */
-		return;
-
-	ax25_dev->dama.slave_timeout =
-		msecs_to_jiffies(ax25_dev->values[AX25_VALUES_DS_TIMEOUT]) / 10;
-	mod_timer(&ax25_dev->dama.slave_timer, jiffies + HZ);
-}
-
-/*
- *	DAMA Slave Timeout
- *	Silently discard all (slave) connections in case our master forgot us...
- */
-
-static void ax25_ds_timeout(struct timer_list *t)
-{
-	ax25_dev *ax25_dev = timer_container_of(ax25_dev, t, dama.slave_timer);
-	ax25_cb *ax25;
-
-	if (ax25_dev == NULL || !ax25_dev->dama.slave)
-		return;			/* Yikes! */
-
-	if (!ax25_dev->dama.slave_timeout || --ax25_dev->dama.slave_timeout) {
-		ax25_ds_set_timer(ax25_dev);
-		return;
-	}
-
-	spin_lock(&ax25_list_lock);
-	ax25_for_each(ax25, &ax25_list) {
-		if (ax25->ax25_dev != ax25_dev || !(ax25->condition & AX25_COND_DAMA_MODE))
-			continue;
-
-		ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-		ax25_disconnect(ax25, ETIMEDOUT);
-	}
-	spin_unlock(&ax25_list_lock);
-
-	ax25_dev_dama_off(ax25_dev);
-}
-
-void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
-{
-	struct sock *sk=ax25->sk;
-
-	if (sk)
-		bh_lock_sock(sk);
-
-	switch (ax25->state) {
-
-	case AX25_STATE_0:
-	case AX25_STATE_2:
-		/* Magic here: If we listen() and a new link dies before it
-		   is accepted() it isn't 'dead' so doesn't get removed. */
-		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
-		    (sk->sk_state == TCP_LISTEN &&
-		     sock_flag(sk, SOCK_DEAD))) {
-			if (sk) {
-				sock_hold(sk);
-				ax25_destroy_socket(ax25);
-				bh_unlock_sock(sk);
-				/* Ungrab socket and destroy it */
-				sock_put(sk);
-			} else
-				ax25_destroy_socket(ax25);
-			return;
-		}
-		break;
-
-	case AX25_STATE_3:
-		/*
-		 * Check the state of the receive buffer.
-		 */
-		if (sk != NULL) {
-			if (atomic_read(&sk->sk_rmem_alloc) <
-			    (sk->sk_rcvbuf >> 1) &&
-			    (ax25->condition & AX25_COND_OWN_RX_BUSY)) {
-				ax25->condition &= ~AX25_COND_OWN_RX_BUSY;
-				ax25->condition &= ~AX25_COND_ACK_PENDING;
-				break;
-			}
-		}
-		break;
-	}
-
-	if (sk)
-		bh_unlock_sock(sk);
-
-	ax25_start_heartbeat(ax25);
-}
-
-/* dl1bke 960114: T3 works much like the IDLE timeout, but
- *                gets reloaded with every frame for this
- *		  connection.
- */
-void ax25_ds_t3timer_expiry(ax25_cb *ax25)
-{
-	ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-	ax25_dama_off(ax25);
-	ax25_disconnect(ax25, ETIMEDOUT);
-}
-
-/* dl1bke 960228: close the connection when IDLE expires.
- *		  unlike T3 this timer gets reloaded only on
- *		  I frames.
- */
-void ax25_ds_idletimer_expiry(ax25_cb *ax25)
-{
-	ax25_clear_queues(ax25);
-
-	ax25->n2count = 0;
-	ax25->state = AX25_STATE_2;
-
-	ax25_calculate_t1(ax25);
-	ax25_start_t1timer(ax25);
-	ax25_stop_t3timer(ax25);
-
-	if (ax25->sk != NULL) {
-		bh_lock_sock(ax25->sk);
-		ax25->sk->sk_state     = TCP_CLOSE;
-		ax25->sk->sk_err       = 0;
-		ax25->sk->sk_shutdown |= SEND_SHUTDOWN;
-		if (!sock_flag(ax25->sk, SOCK_DEAD)) {
-			ax25->sk->sk_state_change(ax25->sk);
-			sock_set_flag(ax25->sk, SOCK_DEAD);
-		}
-		bh_unlock_sock(ax25->sk);
-	}
-}
-
-/* dl1bke 960114: The DAMA protocol requires to send data and SABM/DISC
- *                within the poll of any connected channel. Remember
- *                that we are not allowed to send anything unless we
- *                get polled by the Master.
- *
- *                Thus we'll have to do parts of our T1 handling in
- *                ax25_enquiry_response().
- */
-void ax25_ds_t1_timeout(ax25_cb *ax25)
-{
-	switch (ax25->state) {
-	case AX25_STATE_1:
-		if (ax25->n2count == ax25->n2) {
-			if (ax25->modulus == AX25_MODULUS) {
-				ax25_disconnect(ax25, ETIMEDOUT);
-				return;
-			} else {
-				ax25->modulus = AX25_MODULUS;
-				ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
-				ax25->n2count = 0;
-				ax25_send_control(ax25, AX25_SABM, AX25_POLLOFF, AX25_COMMAND);
-			}
-		} else {
-			ax25->n2count++;
-			if (ax25->modulus == AX25_MODULUS)
-				ax25_send_control(ax25, AX25_SABM, AX25_POLLOFF, AX25_COMMAND);
-			else
-				ax25_send_control(ax25, AX25_SABME, AX25_POLLOFF, AX25_COMMAND);
-		}
-		break;
-
-	case AX25_STATE_2:
-		if (ax25->n2count == ax25->n2) {
-			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-			if (!sock_flag(ax25->sk, SOCK_DESTROY))
-				ax25_disconnect(ax25, ETIMEDOUT);
-			return;
-		} else {
-			ax25->n2count++;
-		}
-		break;
-
-	case AX25_STATE_3:
-		if (ax25->n2count == ax25->n2) {
-			ax25_send_control(ax25, AX25_DM, AX25_POLLON, AX25_RESPONSE);
-			ax25_disconnect(ax25, ETIMEDOUT);
-			return;
-		} else {
-			ax25->n2count++;
-		}
-		break;
-	}
-
-	ax25_calculate_t1(ax25);
-	ax25_start_t1timer(ax25);
-}
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
deleted file mode 100644
index 3ad454416a5c..000000000000
--- a/net/ax25/ax25_iface.c
+++ /dev/null
@@ -1,214 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-
-static struct ax25_protocol *protocol_list;
-static DEFINE_RWLOCK(protocol_list_lock);
-
-static HLIST_HEAD(ax25_linkfail_list);
-static DEFINE_SPINLOCK(linkfail_lock);
-
-static struct listen_struct {
-	struct listen_struct *next;
-	ax25_address  callsign;
-	struct net_device *dev;
-} *listen_list = NULL;
-static DEFINE_SPINLOCK(listen_lock);
-
-/*
- * Do not register the internal protocols AX25_P_TEXT, AX25_P_SEGMENT,
- * AX25_P_IP or AX25_P_ARP ...
- */
-void ax25_register_pid(struct ax25_protocol *ap)
-{
-	write_lock_bh(&protocol_list_lock);
-	ap->next = protocol_list;
-	protocol_list = ap;
-	write_unlock_bh(&protocol_list_lock);
-}
-
-EXPORT_SYMBOL_GPL(ax25_register_pid);
-
-void ax25_protocol_release(unsigned int pid)
-{
-	struct ax25_protocol *protocol;
-
-	write_lock_bh(&protocol_list_lock);
-	protocol = protocol_list;
-	if (protocol == NULL)
-		goto out;
-
-	if (protocol->pid == pid) {
-		protocol_list = protocol->next;
-		goto out;
-	}
-
-	while (protocol != NULL && protocol->next != NULL) {
-		if (protocol->next->pid == pid) {
-			protocol->next = protocol->next->next;
-			goto out;
-		}
-
-		protocol = protocol->next;
-	}
-out:
-	write_unlock_bh(&protocol_list_lock);
-}
-
-EXPORT_SYMBOL(ax25_protocol_release);
-
-void ax25_linkfail_register(struct ax25_linkfail *lf)
-{
-	spin_lock_bh(&linkfail_lock);
-	hlist_add_head(&lf->lf_node, &ax25_linkfail_list);
-	spin_unlock_bh(&linkfail_lock);
-}
-
-EXPORT_SYMBOL(ax25_linkfail_register);
-
-void ax25_linkfail_release(struct ax25_linkfail *lf)
-{
-	spin_lock_bh(&linkfail_lock);
-	hlist_del_init(&lf->lf_node);
-	spin_unlock_bh(&linkfail_lock);
-}
-
-EXPORT_SYMBOL(ax25_linkfail_release);
-
-int ax25_listen_register(const ax25_address *callsign, struct net_device *dev)
-{
-	struct listen_struct *listen;
-
-	if (ax25_listen_mine(callsign, dev))
-		return 0;
-
-	if ((listen = kmalloc_obj(*listen, GFP_ATOMIC)) == NULL)
-		return -ENOMEM;
-
-	listen->callsign = *callsign;
-	listen->dev      = dev;
-
-	spin_lock_bh(&listen_lock);
-	listen->next = listen_list;
-	listen_list  = listen;
-	spin_unlock_bh(&listen_lock);
-
-	return 0;
-}
-
-EXPORT_SYMBOL(ax25_listen_register);
-
-void ax25_listen_release(const ax25_address *callsign, struct net_device *dev)
-{
-	struct listen_struct *s, *listen;
-
-	spin_lock_bh(&listen_lock);
-	listen = listen_list;
-	if (listen == NULL) {
-		spin_unlock_bh(&listen_lock);
-		return;
-	}
-
-	if (ax25cmp(&listen->callsign, callsign) == 0 && listen->dev == dev) {
-		listen_list = listen->next;
-		spin_unlock_bh(&listen_lock);
-		kfree(listen);
-		return;
-	}
-
-	while (listen != NULL && listen->next != NULL) {
-		if (ax25cmp(&listen->next->callsign, callsign) == 0 && listen->next->dev == dev) {
-			s = listen->next;
-			listen->next = listen->next->next;
-			spin_unlock_bh(&listen_lock);
-			kfree(s);
-			return;
-		}
-
-		listen = listen->next;
-	}
-	spin_unlock_bh(&listen_lock);
-}
-
-EXPORT_SYMBOL(ax25_listen_release);
-
-int (*ax25_protocol_function(unsigned int pid))(struct sk_buff *, ax25_cb *)
-{
-	int (*res)(struct sk_buff *, ax25_cb *) = NULL;
-	struct ax25_protocol *protocol;
-
-	read_lock(&protocol_list_lock);
-	for (protocol = protocol_list; protocol != NULL; protocol = protocol->next)
-		if (protocol->pid == pid) {
-			res = protocol->func;
-			break;
-		}
-	read_unlock(&protocol_list_lock);
-
-	return res;
-}
-
-int ax25_listen_mine(const ax25_address *callsign, struct net_device *dev)
-{
-	struct listen_struct *listen;
-
-	spin_lock_bh(&listen_lock);
-	for (listen = listen_list; listen != NULL; listen = listen->next)
-		if (ax25cmp(&listen->callsign, callsign) == 0 &&
-		    (listen->dev == dev || listen->dev == NULL)) {
-			spin_unlock_bh(&listen_lock);
-			return 1;
-	}
-	spin_unlock_bh(&listen_lock);
-
-	return 0;
-}
-
-void ax25_link_failed(ax25_cb *ax25, int reason)
-{
-	struct ax25_linkfail *lf;
-
-	spin_lock_bh(&linkfail_lock);
-	hlist_for_each_entry(lf, &ax25_linkfail_list, lf_node)
-		lf->func(ax25, reason);
-	spin_unlock_bh(&linkfail_lock);
-}
-
-int ax25_protocol_is_registered(unsigned int pid)
-{
-	struct ax25_protocol *protocol;
-	int res = 0;
-
-	read_lock_bh(&protocol_list_lock);
-	for (protocol = protocol_list; protocol != NULL; protocol = protocol->next)
-		if (protocol->pid == pid) {
-			res = 1;
-			break;
-		}
-	read_unlock_bh(&protocol_list_lock);
-
-	return res;
-}
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
deleted file mode 100644
index d75b3e9ed93d..000000000000
--- a/net/ax25/ax25_in.c
+++ /dev/null
@@ -1,455 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
- * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-
-/*
- *	Given a fragment, queue it on the fragment queue and if the fragment
- *	is complete, send it back to ax25_rx_iframe.
- */
-static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb)
-{
-	struct sk_buff *skbn, *skbo;
-
-	if (ax25->fragno != 0) {
-		if (!(*skb->data & AX25_SEG_FIRST)) {
-			if ((ax25->fragno - 1) == (*skb->data & AX25_SEG_REM)) {
-				/* Enqueue fragment */
-				ax25->fragno = *skb->data & AX25_SEG_REM;
-				skb_pull(skb, 1);	/* skip fragno */
-				ax25->fraglen += skb->len;
-				skb_queue_tail(&ax25->frag_queue, skb);
-
-				/* Last fragment received ? */
-				if (ax25->fragno == 0) {
-					skbn = alloc_skb(AX25_MAX_HEADER_LEN +
-							 ax25->fraglen,
-							 GFP_ATOMIC);
-					if (!skbn) {
-						skb_queue_purge(&ax25->frag_queue);
-						return 1;
-					}
-
-					skb_reserve(skbn, AX25_MAX_HEADER_LEN);
-
-					skbn->dev   = ax25->ax25_dev->dev;
-					skb_reset_network_header(skbn);
-					skb_reset_transport_header(skbn);
-
-					/* Copy data from the fragments */
-					while ((skbo = skb_dequeue(&ax25->frag_queue)) != NULL) {
-						skb_copy_from_linear_data(skbo,
-							  skb_put(skbn, skbo->len),
-									  skbo->len);
-						kfree_skb(skbo);
-					}
-
-					ax25->fraglen = 0;
-
-					if (ax25_rx_iframe(ax25, skbn) == 0)
-						kfree_skb(skbn);
-				}
-
-				return 1;
-			}
-		}
-	} else {
-		/* First fragment received */
-		if (*skb->data & AX25_SEG_FIRST) {
-			skb_queue_purge(&ax25->frag_queue);
-			ax25->fragno = *skb->data & AX25_SEG_REM;
-			skb_pull(skb, 1);		/* skip fragno */
-			ax25->fraglen = skb->len;
-			skb_queue_tail(&ax25->frag_queue, skb);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-/*
- *	This is where all valid I frames are sent to, to be dispatched to
- *	whichever protocol requires them.
- */
-int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
-{
-	int (*func)(struct sk_buff *, ax25_cb *);
-	unsigned char pid;
-	int queued = 0;
-
-	if (skb == NULL) return 0;
-
-	ax25_start_idletimer(ax25);
-
-	pid = *skb->data;
-
-	if (pid == AX25_P_IP) {
-		/* working around a TCP bug to keep additional listeners
-		 * happy. TCP re-uses the buffer and destroys the original
-		 * content.
-		 */
-		struct sk_buff *skbn = skb_copy(skb, GFP_ATOMIC);
-		if (skbn != NULL) {
-			kfree_skb(skb);
-			skb = skbn;
-		}
-
-		skb_pull(skb, 1);	/* Remove PID */
-		skb->mac_header = skb->network_header;
-		skb_reset_network_header(skb);
-		skb->dev      = ax25->ax25_dev->dev;
-		skb->pkt_type = PACKET_HOST;
-		skb->protocol = htons(ETH_P_IP);
-		netif_rx(skb);
-		return 1;
-	}
-	if (pid == AX25_P_SEGMENT) {
-		skb_pull(skb, 1);	/* Remove PID */
-		return ax25_rx_fragment(ax25, skb);
-	}
-
-	if ((func = ax25_protocol_function(pid)) != NULL) {
-		skb_pull(skb, 1);	/* Remove PID */
-		return (*func)(skb, ax25);
-	}
-
-	if (ax25->sk != NULL && ax25->ax25_dev->values[AX25_VALUES_CONMODE] == 2) {
-		if ((!ax25->pidincl && ax25->sk->sk_protocol == pid) ||
-		    ax25->pidincl) {
-			if (sock_queue_rcv_skb(ax25->sk, skb) == 0)
-				queued = 1;
-			else
-				ax25->condition |= AX25_COND_OWN_RX_BUSY;
-		}
-	}
-
-	return queued;
-}
-
-/*
- *	Higher level upcall for a LAPB frame
- */
-static int ax25_process_rx_frame(ax25_cb *ax25, struct sk_buff *skb, int type, int dama)
-{
-	int queued = 0;
-
-	if (ax25->state == AX25_STATE_0)
-		return 0;
-
-	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
-	case AX25_PROTO_STD_SIMPLEX:
-	case AX25_PROTO_STD_DUPLEX:
-		queued = ax25_std_frame_in(ax25, skb, type);
-		break;
-
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	case AX25_PROTO_DAMA_SLAVE:
-		if (dama || ax25->ax25_dev->dama.slave)
-			queued = ax25_ds_frame_in(ax25, skb, type);
-		else
-			queued = ax25_std_frame_in(ax25, skb, type);
-		break;
-#endif
-	}
-
-	return queued;
-}
-
-static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
-		    const ax25_address *dev_addr, struct packet_type *ptype)
-{
-	ax25_address src, dest, *next_digi = NULL;
-	int type = 0, mine = 0, dama;
-	struct sock *make, *sk;
-	ax25_digi dp, reverse_dp;
-	ax25_cb *ax25;
-	ax25_dev *ax25_dev;
-
-	/*
-	 *	Process the AX.25/LAPB frame.
-	 */
-
-	skb_reset_transport_header(skb);
-
-	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
-		goto free;
-
-	/*
-	 *	Parse the address header.
-	 */
-
-	if (ax25_addr_parse(skb->data, skb->len, &src, &dest, &dp, &type, &dama) == NULL)
-		goto free;
-
-	/*
-	 *	Ours perhaps ?
-	 */
-	if (dp.lastrepeat + 1 < dp.ndigi)		/* Not yet digipeated completely */
-		next_digi = &dp.calls[dp.lastrepeat + 1];
-
-	/*
-	 *	Pull of the AX.25 headers leaving the CTRL/PID bytes
-	 */
-	skb_pull(skb, ax25_addr_size(&dp));
-
-	/* For our port addresses ? */
-	if (ax25cmp(&dest, dev_addr) == 0 && dp.lastrepeat + 1 == dp.ndigi)
-		mine = 1;
-
-	/* Also match on any registered callsign from L3/4 */
-	if (!mine && ax25_listen_mine(&dest, dev) && dp.lastrepeat + 1 == dp.ndigi)
-		mine = 1;
-
-	/* UI frame - bypass LAPB processing */
-	if ((*skb->data & ~0x10) == AX25_UI && dp.lastrepeat + 1 == dp.ndigi) {
-		skb_set_transport_header(skb, 2); /* skip control and pid */
-
-		ax25_send_to_raw(&dest, skb, skb->data[1]);
-
-		if (!mine && ax25cmp(&dest, (ax25_address *)dev->broadcast) != 0)
-			goto free;
-
-		/* Now we are pointing at the pid byte */
-		switch (skb->data[1]) {
-		case AX25_P_IP:
-			skb_pull(skb,2);		/* drop PID/CTRL */
-			skb_reset_transport_header(skb);
-			skb_reset_network_header(skb);
-			skb->dev      = dev;
-			skb->pkt_type = PACKET_HOST;
-			skb->protocol = htons(ETH_P_IP);
-			netif_rx(skb);
-			break;
-
-		case AX25_P_ARP:
-			skb_pull(skb,2);
-			skb_reset_transport_header(skb);
-			skb_reset_network_header(skb);
-			skb->dev      = dev;
-			skb->pkt_type = PACKET_HOST;
-			skb->protocol = htons(ETH_P_ARP);
-			netif_rx(skb);
-			break;
-		case AX25_P_TEXT:
-			/* Now find a suitable dgram socket */
-			sk = ax25_get_socket(&dest, &src, SOCK_DGRAM);
-			if (sk != NULL) {
-				bh_lock_sock(sk);
-				if (atomic_read(&sk->sk_rmem_alloc) >=
-				    sk->sk_rcvbuf) {
-					kfree_skb(skb);
-				} else {
-					/*
-					 *	Remove the control and PID.
-					 */
-					skb_pull(skb, 2);
-					if (sock_queue_rcv_skb(sk, skb) != 0)
-						kfree_skb(skb);
-				}
-				bh_unlock_sock(sk);
-				sock_put(sk);
-			} else {
-				kfree_skb(skb);
-			}
-			break;
-
-		default:
-			kfree_skb(skb);	/* Will scan SOCK_AX25 RAW sockets */
-			break;
-		}
-
-		return 0;
-	}
-
-	/*
-	 *	Is connected mode supported on this device ?
-	 *	If not, should we DM the incoming frame (except DMs) or
-	 *	silently ignore them. For now we stay quiet.
-	 */
-	if (ax25_dev->values[AX25_VALUES_CONMODE] == 0)
-		goto free;
-
-	/* LAPB */
-
-	/* AX.25 state 1-4 */
-
-	ax25_digi_invert(&dp, &reverse_dp);
-
-	if ((ax25 = ax25_find_cb(&dest, &src, &reverse_dp, dev)) != NULL) {
-		/*
-		 *	Process the frame. If it is queued up internally it
-		 *	returns one otherwise we free it immediately. This
-		 *	routine itself wakes the user context layers so we do
-		 *	no further work
-		 */
-		if (ax25_process_rx_frame(ax25, skb, type, dama) == 0)
-			kfree_skb(skb);
-
-		ax25_cb_put(ax25);
-		return 0;
-	}
-
-	/* AX.25 state 0 (disconnected) */
-
-	/* a) received not a SABM(E) */
-
-	if ((*skb->data & ~AX25_PF) != AX25_SABM &&
-	    (*skb->data & ~AX25_PF) != AX25_SABME) {
-		/*
-		 *	Never reply to a DM. Also ignore any connects for
-		 *	addresses that are not our interfaces and not a socket.
-		 */
-		if ((*skb->data & ~AX25_PF) != AX25_DM && mine)
-			ax25_return_dm(dev, &src, &dest, &dp);
-
-		goto free;
-	}
-
-	/* b) received SABM(E) */
-
-	if (dp.lastrepeat + 1 == dp.ndigi)
-		sk = ax25_find_listener(&dest, 0, dev, SOCK_SEQPACKET);
-	else
-		sk = ax25_find_listener(next_digi, 1, dev, SOCK_SEQPACKET);
-
-	if (sk != NULL) {
-		bh_lock_sock(sk);
-		if (sk_acceptq_is_full(sk) ||
-		    (make = ax25_make_new(sk, ax25_dev)) == NULL) {
-			if (mine)
-				ax25_return_dm(dev, &src, &dest, &dp);
-			kfree_skb(skb);
-			bh_unlock_sock(sk);
-			sock_put(sk);
-
-			return 0;
-		}
-
-		ax25 = sk_to_ax25(make);
-		skb_set_owner_r(skb, make);
-		skb_queue_head(&sk->sk_receive_queue, skb);
-
-		make->sk_state = TCP_ESTABLISHED;
-
-		sk_acceptq_added(sk);
-		bh_unlock_sock(sk);
-	} else {
-		if (!mine)
-			goto free;
-
-		if ((ax25 = ax25_create_cb()) == NULL) {
-			ax25_return_dm(dev, &src, &dest, &dp);
-			goto free;
-		}
-
-		ax25_fillin_cb(ax25, ax25_dev);
-	}
-
-	ax25->source_addr = dest;
-	ax25->dest_addr   = src;
-
-	/*
-	 *	Sort out any digipeated paths.
-	 */
-	if (dp.ndigi && !ax25->digipeat &&
-	    (ax25->digipeat = kmalloc_obj(ax25_digi, GFP_ATOMIC)) == NULL) {
-		kfree_skb(skb);
-		ax25_destroy_socket(ax25);
-		if (sk)
-			sock_put(sk);
-		return 0;
-	}
-
-	if (dp.ndigi == 0) {
-		kfree(ax25->digipeat);
-		ax25->digipeat = NULL;
-	} else {
-		/* Reverse the source SABM's path */
-		memcpy(ax25->digipeat, &reverse_dp, sizeof(ax25_digi));
-	}
-
-	if ((*skb->data & ~AX25_PF) == AX25_SABME) {
-		ax25->modulus = AX25_EMODULUS;
-		ax25->window  = ax25_dev->values[AX25_VALUES_EWINDOW];
-	} else {
-		ax25->modulus = AX25_MODULUS;
-		ax25->window  = ax25_dev->values[AX25_VALUES_WINDOW];
-	}
-
-	ax25_send_control(ax25, AX25_UA, AX25_POLLON, AX25_RESPONSE);
-
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	if (dama && ax25->ax25_dev->values[AX25_VALUES_PROTOCOL] == AX25_PROTO_DAMA_SLAVE)
-		ax25_dama_on(ax25);
-#endif
-
-	ax25->state = AX25_STATE_3;
-
-	ax25_cb_add(ax25);
-
-	ax25_start_heartbeat(ax25);
-	ax25_start_t3timer(ax25);
-	ax25_start_idletimer(ax25);
-
-	if (sk) {
-		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_data_ready(sk);
-		sock_put(sk);
-	} else {
-free:
-		kfree_skb(skb);
-	}
-	return 0;
-}
-
-/*
- *	Receive an AX.25 frame via a SLIP interface.
- */
-int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev,
-		  struct packet_type *ptype, struct net_device *orig_dev)
-{
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (!skb)
-		return NET_RX_DROP;
-
-	skb_orphan(skb);
-
-	if (!net_eq(dev_net(dev), &init_net)) {
-		kfree_skb(skb);
-		return 0;
-	}
-
-	if ((*skb->data & 0x0F) != 0) {
-		kfree_skb(skb);	/* Not a KISS data frame */
-		return 0;
-	}
-
-	skb_pull(skb, AX25_KISS_HEADER_LEN);	/* Remove the KISS byte */
-
-	return ax25_rcv(skb, dev, (const ax25_address *)dev->dev_addr, ptype);
-}
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
deleted file mode 100644
index 215d4ccf12b9..000000000000
--- a/net/ax25/ax25_ip.c
+++ /dev/null
@@ -1,247 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/termios.h>	/* For TIOCINQ/OUTQ */
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/sysctl.h>
-#include <net/ip.h>
-#include <net/arp.h>
-
-/*
- *	IP over AX.25 encapsulation.
- */
-
-/*
- *	Shove an AX.25 UI header on an IP packet and handle ARP
- */
-
-#ifdef CONFIG_INET
-
-static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
-			    unsigned short type, const void *daddr,
-			    const void *saddr, unsigned int len)
-{
-	unsigned char *buff;
-
-	/* they sometimes come back to us... */
-	if (type == ETH_P_AX25)
-		return 0;
-
-	/* header is an AX.25 UI frame from us to them */
-	buff = skb_push(skb, AX25_HEADER_LEN);
-	*buff++ = 0x00;	/* KISS DATA */
-
-	if (daddr != NULL)
-		memcpy(buff, daddr, dev->addr_len);	/* Address specified */
-
-	buff[6] &= ~AX25_CBIT;
-	buff[6] &= ~AX25_EBIT;
-	buff[6] |= AX25_SSSID_SPARE;
-	buff    += AX25_ADDR_LEN;
-
-	if (saddr != NULL)
-		memcpy(buff, saddr, dev->addr_len);
-	else
-		memcpy(buff, dev->dev_addr, dev->addr_len);
-
-	buff[6] &= ~AX25_CBIT;
-	buff[6] |= AX25_EBIT;
-	buff[6] |= AX25_SSSID_SPARE;
-	buff    += AX25_ADDR_LEN;
-
-	*buff++  = AX25_UI;	/* UI */
-
-	/* Append a suitable AX.25 PID */
-	switch (type) {
-	case ETH_P_IP:
-		*buff++ = AX25_P_IP;
-		break;
-	case ETH_P_ARP:
-		*buff++ = AX25_P_ARP;
-		break;
-	default:
-		printk(KERN_ERR "AX.25: ax25_hard_header - wrong protocol type 0x%2.2x\n", type);
-		*buff++ = 0;
-		break;
-	}
-
-	if (daddr != NULL)
-		return AX25_HEADER_LEN;
-
-	return -AX25_HEADER_LEN;	/* Unfinished header */
-}
-
-netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
-{
-	struct sk_buff *ourskb;
-	unsigned char *bp  = skb->data;
-	ax25_route *route;
-	struct net_device *dev = NULL;
-	ax25_address *src, *dst;
-	ax25_digi *digipeat = NULL;
-	ax25_dev *ax25_dev;
-	ax25_cb *ax25;
-	char ip_mode = ' ';
-
-	dst = (ax25_address *)(bp + 1);
-	src = (ax25_address *)(bp + 8);
-
-	ax25_route_lock_use();
-	route = ax25_get_route(dst, NULL);
-	if (route) {
-		digipeat = route->digipeat;
-		dev = route->dev;
-		ip_mode = route->ip_mode;
-	}
-
-	if (dev == NULL)
-		dev = skb->dev;
-
-	rcu_read_lock();
-	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) {
-		kfree_skb(skb);
-		goto put;
-	}
-
-	if (bp[16] == AX25_P_IP) {
-		if (ip_mode == 'V' || (ip_mode == ' ' && ax25_dev->values[AX25_VALUES_IPDEFMODE])) {
-			/*
-			 *	We copy the buffer and release the original thereby
-			 *	keeping it straight
-			 *
-			 *	Note: we report 1 back so the caller will
-			 *	not feed the frame direct to the physical device
-			 *	We don't want that to happen. (It won't be upset
-			 *	as we have pulled the frame from the queue by
-			 *	freeing it).
-			 *
-			 *	NB: TCP modifies buffers that are still
-			 *	on a device queue, thus we use skb_copy()
-			 *      instead of using skb_clone() unless this
-			 *	gets fixed.
-			 */
-
-			ax25_address src_c;
-			ax25_address dst_c;
-
-			if ((ourskb = skb_copy(skb, GFP_ATOMIC)) == NULL) {
-				kfree_skb(skb);
-				goto put;
-			}
-
-			if (skb->sk != NULL)
-				skb_set_owner_w(ourskb, skb->sk);
-
-			kfree_skb(skb);
-			/* dl9sau: bugfix
-			 * after kfree_skb(), dst and src which were pointer
-			 * to bp which is part of skb->data would not be valid
-			 * anymore hope that after skb_pull(ourskb, ..) our
-			 * dsc_c and src_c will not become invalid
-			 */
-			bp  = ourskb->data;
-			dst_c = *(ax25_address *)(bp + 1);
-			src_c = *(ax25_address *)(bp + 8);
-
-			skb_pull(ourskb, AX25_HEADER_LEN - 1);	/* Keep PID */
-			skb_reset_network_header(ourskb);
-
-			ax25=ax25_send_frame(
-			    ourskb,
-			    ax25_dev->values[AX25_VALUES_PACLEN],
-			    &src_c,
-			    &dst_c, digipeat, dev);
-			if (ax25) {
-				ax25_cb_put(ax25);
-			}
-			goto put;
-		}
-	}
-
-	bp[7]  &= ~AX25_CBIT;
-	bp[7]  &= ~AX25_EBIT;
-	bp[7]  |= AX25_SSSID_SPARE;
-
-	bp[14] &= ~AX25_CBIT;
-	bp[14] |= AX25_EBIT;
-	bp[14] |= AX25_SSSID_SPARE;
-
-	skb_pull(skb, AX25_KISS_HEADER_LEN);
-
-	if (digipeat != NULL) {
-		if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL)
-			goto put;
-
-		skb = ourskb;
-	}
-
-	ax25_queue_xmit(skb, dev);
-
-put:
-	rcu_read_unlock();
-	ax25_route_lock_unuse();
-	return NETDEV_TX_OK;
-}
-
-#else	/* INET */
-
-static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
-			    unsigned short type, const void *daddr,
-			    const void *saddr, unsigned int len)
-{
-	return -AX25_HEADER_LEN;
-}
-
-netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
-{
-	kfree_skb(skb);
-	return NETDEV_TX_OK;
-}
-#endif
-
-static bool ax25_validate_header(const char *header, unsigned int len)
-{
-	ax25_digi digi;
-
-	if (!len)
-		return false;
-
-	if (header[0])
-		return true;
-
-	return ax25_addr_parse(header + 1, len - 1, NULL, NULL, &digi, NULL,
-			       NULL);
-}
-
-const struct header_ops ax25_header_ops = {
-	.create = ax25_hard_header,
-	.validate = ax25_validate_header,
-};
-
-EXPORT_SYMBOL(ax25_header_ops);
-EXPORT_SYMBOL(ax25_ip_xmit);
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
deleted file mode 100644
index 8bca2ace98e5..000000000000
--- a/net/ax25/ax25_out.c
+++ /dev/null
@@ -1,398 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/spinlock.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-
-static DEFINE_SPINLOCK(ax25_frag_lock);
-
-ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, const ax25_address *src, ax25_address *dest, ax25_digi *digi, struct net_device *dev)
-{
-	ax25_dev *ax25_dev;
-	ax25_cb *ax25;
-
-	/*
-	 * Take the default packet length for the device if zero is
-	 * specified.
-	 */
-	if (paclen == 0) {
-		rcu_read_lock();
-		ax25_dev = ax25_dev_ax25dev(dev);
-		if (!ax25_dev) {
-			rcu_read_unlock();
-			return NULL;
-		}
-		paclen = ax25_dev->values[AX25_VALUES_PACLEN];
-		rcu_read_unlock();
-	}
-
-	/*
-	 * Look for an existing connection.
-	 */
-	if ((ax25 = ax25_find_cb(src, dest, digi, dev)) != NULL) {
-		ax25_output(ax25, paclen, skb);
-		return ax25;		/* It already existed */
-	}
-
-	rcu_read_lock();
-	ax25_dev = ax25_dev_ax25dev(dev);
-	if (!ax25_dev) {
-		rcu_read_unlock();
-		return NULL;
-	}
-
-	if ((ax25 = ax25_create_cb()) == NULL) {
-		rcu_read_unlock();
-		return NULL;
-	}
-	ax25_fillin_cb(ax25, ax25_dev);
-	rcu_read_unlock();
-
-	ax25->source_addr = *src;
-	ax25->dest_addr   = *dest;
-
-	if (digi != NULL) {
-		ax25->digipeat = kmemdup(digi, sizeof(*digi), GFP_ATOMIC);
-		if (ax25->digipeat == NULL) {
-			ax25_cb_put(ax25);
-			return NULL;
-		}
-	}
-
-	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
-	case AX25_PROTO_STD_SIMPLEX:
-	case AX25_PROTO_STD_DUPLEX:
-		ax25_std_establish_data_link(ax25);
-		break;
-
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	case AX25_PROTO_DAMA_SLAVE:
-		if (ax25_dev->dama.slave)
-			ax25_ds_establish_data_link(ax25);
-		else
-			ax25_std_establish_data_link(ax25);
-		break;
-#endif
-	}
-
-	/*
-	 * There is one ref for the state machine; a caller needs
-	 * one more to put it back, just like with the existing one.
-	 */
-	ax25_cb_hold(ax25);
-
-	ax25_cb_add(ax25);
-
-	ax25->state = AX25_STATE_1;
-
-	ax25_start_heartbeat(ax25);
-
-	ax25_output(ax25, paclen, skb);
-
-	return ax25;			/* We had to create it */
-}
-
-EXPORT_SYMBOL(ax25_send_frame);
-
-/*
- *	All outgoing AX.25 I frames pass via this routine. Therefore this is
- *	where the fragmentation of frames takes place. If fragment is set to
- *	zero then we are not allowed to do fragmentation, even if the frame
- *	is too large.
- */
-void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
-{
-	struct sk_buff *skbn;
-	unsigned char *p;
-	int frontlen, len, fragno, ka9qfrag, first = 1;
-
-	if (paclen < 16) {
-		WARN_ON_ONCE(1);
-		kfree_skb(skb);
-		return;
-	}
-
-	if ((skb->len - 1) > paclen) {
-		if (*skb->data == AX25_P_TEXT) {
-			skb_pull(skb, 1); /* skip PID */
-			ka9qfrag = 0;
-		} else {
-			paclen -= 2;	/* Allow for fragment control info */
-			ka9qfrag = 1;
-		}
-
-		fragno = skb->len / paclen;
-		if (skb->len % paclen == 0) fragno--;
-
-		frontlen = skb_headroom(skb);	/* Address space + CTRL */
-
-		while (skb->len > 0) {
-			spin_lock_bh(&ax25_frag_lock);
-			if ((skbn = alloc_skb(paclen + 2 + frontlen, GFP_ATOMIC)) == NULL) {
-				spin_unlock_bh(&ax25_frag_lock);
-				printk(KERN_CRIT "AX.25: ax25_output - out of memory\n");
-				return;
-			}
-
-			if (skb->sk != NULL)
-				skb_set_owner_w(skbn, skb->sk);
-
-			spin_unlock_bh(&ax25_frag_lock);
-
-			len = (paclen > skb->len) ? skb->len : paclen;
-
-			if (ka9qfrag == 1) {
-				skb_reserve(skbn, frontlen + 2);
-				skb_set_network_header(skbn,
-						      skb_network_offset(skb));
-				skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
-				p = skb_push(skbn, 2);
-
-				*p++ = AX25_P_SEGMENT;
-
-				*p = fragno--;
-				if (first) {
-					*p |= AX25_SEG_FIRST;
-					first = 0;
-				}
-			} else {
-				skb_reserve(skbn, frontlen + 1);
-				skb_set_network_header(skbn,
-						      skb_network_offset(skb));
-				skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
-				p = skb_push(skbn, 1);
-				*p = AX25_P_TEXT;
-			}
-
-			skb_pull(skb, len);
-			skb_queue_tail(&ax25->write_queue, skbn); /* Throw it on the queue */
-		}
-
-		kfree_skb(skb);
-	} else {
-		skb_queue_tail(&ax25->write_queue, skb);	  /* Throw it on the queue */
-	}
-
-	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
-	case AX25_PROTO_STD_SIMPLEX:
-	case AX25_PROTO_STD_DUPLEX:
-		ax25_kick(ax25);
-		break;
-
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	/*
-	 * A DAMA slave is _required_ to work as normal AX.25L2V2
-	 * if no DAMA master is available.
-	 */
-	case AX25_PROTO_DAMA_SLAVE:
-		if (!ax25->ax25_dev->dama.slave) ax25_kick(ax25);
-		break;
-#endif
-	}
-}
-
-/*
- *  This procedure is passed a buffer descriptor for an iframe. It builds
- *  the rest of the control part of the frame and then writes it out.
- */
-static void ax25_send_iframe(ax25_cb *ax25, struct sk_buff *skb, int poll_bit)
-{
-	unsigned char *frame;
-
-	if (skb == NULL)
-		return;
-
-	skb_reset_network_header(skb);
-
-	if (ax25->modulus == AX25_MODULUS) {
-		frame = skb_push(skb, 1);
-
-		*frame = AX25_I;
-		*frame |= (poll_bit) ? AX25_PF : 0;
-		*frame |= (ax25->vr << 5);
-		*frame |= (ax25->vs << 1);
-	} else {
-		frame = skb_push(skb, 2);
-
-		frame[0] = AX25_I;
-		frame[0] |= (ax25->vs << 1);
-		frame[1] = (poll_bit) ? AX25_EPF : 0;
-		frame[1] |= (ax25->vr << 1);
-	}
-
-	ax25_start_idletimer(ax25);
-
-	ax25_transmit_buffer(ax25, skb, AX25_COMMAND);
-}
-
-void ax25_kick(ax25_cb *ax25)
-{
-	struct sk_buff *skb, *skbn;
-	int last = 1;
-	unsigned short start, end, next;
-
-	if (ax25->state != AX25_STATE_3 && ax25->state != AX25_STATE_4)
-		return;
-
-	if (ax25->condition & AX25_COND_PEER_RX_BUSY)
-		return;
-
-	if (skb_peek(&ax25->write_queue) == NULL)
-		return;
-
-	start = (skb_peek(&ax25->ack_queue) == NULL) ? ax25->va : ax25->vs;
-	end   = (ax25->va + ax25->window) % ax25->modulus;
-
-	if (start == end)
-		return;
-
-	/*
-	 * Transmit data until either we're out of data to send or
-	 * the window is full. Send a poll on the final I frame if
-	 * the window is filled.
-	 */
-
-	/*
-	 * Dequeue the frame and copy it.
-	 * Check for race with ax25_clear_queues().
-	 */
-	skb  = skb_dequeue(&ax25->write_queue);
-	if (!skb)
-		return;
-
-	ax25->vs = start;
-
-	do {
-		if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
-			skb_queue_head(&ax25->write_queue, skb);
-			break;
-		}
-
-		if (skb->sk != NULL)
-			skb_set_owner_w(skbn, skb->sk);
-
-		next = (ax25->vs + 1) % ax25->modulus;
-		last = (next == end);
-
-		/*
-		 * Transmit the frame copy.
-		 * bke 960114: do not set the Poll bit on the last frame
-		 * in DAMA mode.
-		 */
-		switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
-		case AX25_PROTO_STD_SIMPLEX:
-		case AX25_PROTO_STD_DUPLEX:
-			ax25_send_iframe(ax25, skbn, (last) ? AX25_POLLON : AX25_POLLOFF);
-			break;
-
-#ifdef CONFIG_AX25_DAMA_SLAVE
-		case AX25_PROTO_DAMA_SLAVE:
-			ax25_send_iframe(ax25, skbn, AX25_POLLOFF);
-			break;
-#endif
-		}
-
-		ax25->vs = next;
-
-		/*
-		 * Requeue the original data frame.
-		 */
-		skb_queue_tail(&ax25->ack_queue, skb);
-
-	} while (!last && (skb = skb_dequeue(&ax25->write_queue)) != NULL);
-
-	ax25->condition &= ~AX25_COND_ACK_PENDING;
-
-	if (!ax25_t1timer_running(ax25)) {
-		ax25_stop_t3timer(ax25);
-		ax25_calculate_t1(ax25);
-		ax25_start_t1timer(ax25);
-	}
-}
-
-void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
-{
-	unsigned char *ptr;
-	int headroom;
-
-	if (ax25->ax25_dev == NULL) {
-		ax25_disconnect(ax25, ENETUNREACH);
-		return;
-	}
-
-	headroom = ax25_addr_size(ax25->digipeat);
-
-	if (unlikely(skb_headroom(skb) < headroom)) {
-		skb = skb_expand_head(skb, headroom);
-		if (!skb) {
-			printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n");
-			return;
-		}
-	}
-
-	ptr = skb_push(skb, headroom);
-
-	ax25_addr_build(ptr, &ax25->source_addr, &ax25->dest_addr, ax25->digipeat, type, ax25->modulus);
-
-	ax25_queue_xmit(skb, ax25->ax25_dev->dev);
-}
-
-/*
- *	A small shim to dev_queue_xmit to add the KISS control byte, and do
- *	any packet forwarding in operation.
- */
-void ax25_queue_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	unsigned char *ptr;
-
-	rcu_read_lock();
-	skb->protocol = ax25_type_trans(skb, ax25_fwd_dev(dev));
-	rcu_read_unlock();
-
-	ptr  = skb_push(skb, 1);
-	*ptr = 0x00;			/* KISS */
-
-	dev_queue_xmit(skb);
-}
-
-int ax25_check_iframes_acked(ax25_cb *ax25, unsigned short nr)
-{
-	if (ax25->vs == nr) {
-		ax25_frames_acked(ax25, nr);
-		ax25_calculate_rtt(ax25);
-		ax25_stop_t1timer(ax25);
-		ax25_start_t3timer(ax25);
-		return 1;
-	} else {
-		if (ax25->va != nr) {
-			ax25_frames_acked(ax25, nr);
-			ax25_calculate_t1(ax25);
-			ax25_start_t1timer(ax25);
-			return 1;
-		}
-	}
-	return 0;
-}
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
deleted file mode 100644
index 1d5c59ccf142..000000000000
--- a/net/ax25/ax25_route.c
+++ /dev/null
@@ -1,416 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) Steven Whitehouse GW7RRM (stevew@acm.org)
- * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
- * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de)
- * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
- */
-
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/timer.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/seq_file.h>
-#include <linux/export.h>
-
-static ax25_route *ax25_route_list;
-DEFINE_RWLOCK(ax25_route_lock);
-
-void ax25_rt_device_down(struct net_device *dev)
-{
-	ax25_route *s, *t, *ax25_rt;
-
-	write_lock_bh(&ax25_route_lock);
-	ax25_rt = ax25_route_list;
-	while (ax25_rt != NULL) {
-		s       = ax25_rt;
-		ax25_rt = ax25_rt->next;
-
-		if (s->dev == dev) {
-			if (ax25_route_list == s) {
-				ax25_route_list = s->next;
-				kfree(s->digipeat);
-				kfree(s);
-			} else {
-				for (t = ax25_route_list; t != NULL; t = t->next) {
-					if (t->next == s) {
-						t->next = s->next;
-						kfree(s->digipeat);
-						kfree(s);
-						break;
-					}
-				}
-			}
-		}
-	}
-	write_unlock_bh(&ax25_route_lock);
-}
-
-static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
-{
-	ax25_route *ax25_rt;
-	ax25_dev *ax25_dev;
-	int i;
-
-	if (route->digi_count > AX25_MAX_DIGIS)
-		return -EINVAL;
-
-	ax25_dev = ax25_addr_ax25dev(&route->port_addr);
-	if (!ax25_dev)
-		return -EINVAL;
-
-	write_lock_bh(&ax25_route_lock);
-
-	ax25_rt = ax25_route_list;
-	while (ax25_rt != NULL) {
-		if (ax25cmp(&ax25_rt->callsign, &route->dest_addr) == 0 &&
-			    ax25_rt->dev == ax25_dev->dev) {
-			kfree(ax25_rt->digipeat);
-			ax25_rt->digipeat = NULL;
-			if (route->digi_count != 0) {
-				if ((ax25_rt->digipeat = kmalloc_obj(ax25_digi, GFP_ATOMIC)) == NULL) {
-					write_unlock_bh(&ax25_route_lock);
-					ax25_dev_put(ax25_dev);
-					return -ENOMEM;
-				}
-				ax25_rt->digipeat->lastrepeat = -1;
-				ax25_rt->digipeat->ndigi      = route->digi_count;
-				for (i = 0; i < route->digi_count; i++) {
-					ax25_rt->digipeat->repeated[i] = 0;
-					ax25_rt->digipeat->calls[i]    = route->digi_addr[i];
-				}
-			}
-			write_unlock_bh(&ax25_route_lock);
-			ax25_dev_put(ax25_dev);
-			return 0;
-		}
-		ax25_rt = ax25_rt->next;
-	}
-
-	if ((ax25_rt = kmalloc_obj(ax25_route, GFP_ATOMIC)) == NULL) {
-		write_unlock_bh(&ax25_route_lock);
-		ax25_dev_put(ax25_dev);
-		return -ENOMEM;
-	}
-
-	ax25_rt->callsign     = route->dest_addr;
-	ax25_rt->dev          = ax25_dev->dev;
-	ax25_rt->digipeat     = NULL;
-	ax25_rt->ip_mode      = ' ';
-	if (route->digi_count != 0) {
-		if ((ax25_rt->digipeat = kmalloc_obj(ax25_digi, GFP_ATOMIC)) == NULL) {
-			write_unlock_bh(&ax25_route_lock);
-			kfree(ax25_rt);
-			ax25_dev_put(ax25_dev);
-			return -ENOMEM;
-		}
-		ax25_rt->digipeat->lastrepeat = -1;
-		ax25_rt->digipeat->ndigi      = route->digi_count;
-		for (i = 0; i < route->digi_count; i++) {
-			ax25_rt->digipeat->repeated[i] = 0;
-			ax25_rt->digipeat->calls[i]    = route->digi_addr[i];
-		}
-	}
-	ax25_rt->next   = ax25_route_list;
-	ax25_route_list = ax25_rt;
-	write_unlock_bh(&ax25_route_lock);
-	ax25_dev_put(ax25_dev);
-
-	return 0;
-}
-
-void __ax25_put_route(ax25_route *ax25_rt)
-{
-	kfree(ax25_rt->digipeat);
-	kfree(ax25_rt);
-}
-
-static int ax25_rt_del(struct ax25_routes_struct *route)
-{
-	ax25_route *s, *t, *ax25_rt;
-	ax25_dev *ax25_dev;
-
-	if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL)
-		return -EINVAL;
-
-	write_lock_bh(&ax25_route_lock);
-
-	ax25_rt = ax25_route_list;
-	while (ax25_rt != NULL) {
-		s       = ax25_rt;
-		ax25_rt = ax25_rt->next;
-		if (s->dev == ax25_dev->dev &&
-		    ax25cmp(&route->dest_addr, &s->callsign) == 0) {
-			if (ax25_route_list == s) {
-				ax25_route_list = s->next;
-				__ax25_put_route(s);
-			} else {
-				for (t = ax25_route_list; t != NULL; t = t->next) {
-					if (t->next == s) {
-						t->next = s->next;
-						__ax25_put_route(s);
-						break;
-					}
-				}
-			}
-		}
-	}
-	write_unlock_bh(&ax25_route_lock);
-	ax25_dev_put(ax25_dev);
-
-	return 0;
-}
-
-static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option)
-{
-	ax25_route *ax25_rt;
-	ax25_dev *ax25_dev;
-	int err = 0;
-
-	if ((ax25_dev = ax25_addr_ax25dev(&rt_option->port_addr)) == NULL)
-		return -EINVAL;
-
-	write_lock_bh(&ax25_route_lock);
-
-	ax25_rt = ax25_route_list;
-	while (ax25_rt != NULL) {
-		if (ax25_rt->dev == ax25_dev->dev &&
-		    ax25cmp(&rt_option->dest_addr, &ax25_rt->callsign) == 0) {
-			switch (rt_option->cmd) {
-			case AX25_SET_RT_IPMODE:
-				switch (rt_option->arg) {
-				case ' ':
-				case 'D':
-				case 'V':
-					ax25_rt->ip_mode = rt_option->arg;
-					break;
-				default:
-					err = -EINVAL;
-					goto out;
-				}
-				break;
-			default:
-				err = -EINVAL;
-				goto out;
-			}
-		}
-		ax25_rt = ax25_rt->next;
-	}
-
-out:
-	write_unlock_bh(&ax25_route_lock);
-	ax25_dev_put(ax25_dev);
-	return err;
-}
-
-int ax25_rt_ioctl(unsigned int cmd, void __user *arg)
-{
-	struct ax25_route_opt_struct rt_option;
-	struct ax25_routes_struct route;
-
-	switch (cmd) {
-	case SIOCADDRT:
-		if (copy_from_user(&route, arg, sizeof(route)))
-			return -EFAULT;
-		return ax25_rt_add(&route);
-
-	case SIOCDELRT:
-		if (copy_from_user(&route, arg, sizeof(route)))
-			return -EFAULT;
-		return ax25_rt_del(&route);
-
-	case SIOCAX25OPTRT:
-		if (copy_from_user(&rt_option, arg, sizeof(rt_option)))
-			return -EFAULT;
-		return ax25_rt_opt(&rt_option);
-
-	default:
-		return -EINVAL;
-	}
-}
-
-#ifdef CONFIG_PROC_FS
-
-static void *ax25_rt_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(ax25_route_lock)
-{
-	struct ax25_route *ax25_rt;
-	int i = 1;
-
-	read_lock(&ax25_route_lock);
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-
-	for (ax25_rt = ax25_route_list; ax25_rt != NULL; ax25_rt = ax25_rt->next) {
-		if (i == *pos)
-			return ax25_rt;
-		++i;
-	}
-
-	return NULL;
-}
-
-static void *ax25_rt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-	return (v == SEQ_START_TOKEN) ? ax25_route_list :
-		((struct ax25_route *) v)->next;
-}
-
-static void ax25_rt_seq_stop(struct seq_file *seq, void *v)
-	__releases(ax25_route_lock)
-{
-	read_unlock(&ax25_route_lock);
-}
-
-static int ax25_rt_seq_show(struct seq_file *seq, void *v)
-{
-	char buf[11];
-
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "callsign  dev  mode digipeaters\n");
-	else {
-		struct ax25_route *ax25_rt = v;
-		const char *callsign;
-		int i;
-
-		if (ax25cmp(&ax25_rt->callsign, &null_ax25_address) == 0)
-			callsign = "default";
-		else
-			callsign = ax2asc(buf, &ax25_rt->callsign);
-
-		seq_printf(seq, "%-9s %-4s",
-			callsign,
-			ax25_rt->dev ? ax25_rt->dev->name : "???");
-
-		switch (ax25_rt->ip_mode) {
-		case 'V':
-			seq_puts(seq, "   vc");
-			break;
-		case 'D':
-			seq_puts(seq, "   dg");
-			break;
-		default:
-			seq_puts(seq, "    *");
-			break;
-		}
-
-		if (ax25_rt->digipeat != NULL)
-			for (i = 0; i < ax25_rt->digipeat->ndigi; i++)
-				seq_printf(seq, " %s",
-				     ax2asc(buf, &ax25_rt->digipeat->calls[i]));
-
-		seq_puts(seq, "\n");
-	}
-	return 0;
-}
-
-const struct seq_operations ax25_rt_seqops = {
-	.start = ax25_rt_seq_start,
-	.next = ax25_rt_seq_next,
-	.stop = ax25_rt_seq_stop,
-	.show = ax25_rt_seq_show,
-};
-#endif
-
-/*
- *	Find AX.25 route
- *
- *	Only routes with a reference count of zero can be destroyed.
- *	Must be called with ax25_route_lock read locked.
- */
-ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev)
-{
-	ax25_route *ax25_spe_rt = NULL;
-	ax25_route *ax25_def_rt = NULL;
-	ax25_route *ax25_rt;
-
-	/*
-	 *	Bind to the physical interface we heard them on, or the default
-	 *	route if none is found;
-	 */
-	for (ax25_rt = ax25_route_list; ax25_rt != NULL; ax25_rt = ax25_rt->next) {
-		if (dev == NULL) {
-			if (ax25cmp(&ax25_rt->callsign, addr) == 0 && ax25_rt->dev != NULL)
-				ax25_spe_rt = ax25_rt;
-			if (ax25cmp(&ax25_rt->callsign, &null_ax25_address) == 0 && ax25_rt->dev != NULL)
-				ax25_def_rt = ax25_rt;
-		} else {
-			if (ax25cmp(&ax25_rt->callsign, addr) == 0 && ax25_rt->dev == dev)
-				ax25_spe_rt = ax25_rt;
-			if (ax25cmp(&ax25_rt->callsign, &null_ax25_address) == 0 && ax25_rt->dev == dev)
-				ax25_def_rt = ax25_rt;
-		}
-	}
-
-	ax25_rt = ax25_def_rt;
-	if (ax25_spe_rt != NULL)
-		ax25_rt = ax25_spe_rt;
-
-	return ax25_rt;
-}
-
-
-struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
-	ax25_address *dest, ax25_digi *digi)
-{
-	unsigned char *bp;
-	int len;
-
-	len = digi->ndigi * AX25_ADDR_LEN;
-
-	if (unlikely(skb_headroom(skb) < len)) {
-		skb = skb_expand_head(skb, len);
-		if (!skb) {
-			printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n");
-			return NULL;
-		}
-	}
-
-	bp = skb_push(skb, len);
-
-	ax25_addr_build(bp, src, dest, digi, AX25_COMMAND, AX25_MODULUS);
-
-	return skb;
-}
-
-/*
- *	Free all memory associated with routing structures.
- */
-void __exit ax25_rt_free(void)
-{
-	ax25_route *s, *ax25_rt = ax25_route_list;
-
-	write_lock_bh(&ax25_route_lock);
-	while (ax25_rt != NULL) {
-		s       = ax25_rt;
-		ax25_rt = ax25_rt->next;
-
-		kfree(s->digipeat);
-		kfree(s);
-	}
-	write_unlock_bh(&ax25_route_lock);
-}
diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c
deleted file mode 100644
index ba176196ae06..000000000000
--- a/net/ax25/ax25_std_in.c
+++ /dev/null
@@ -1,443 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
- * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de)
- *
- * Most of this code is based on the SDL diagrams published in the 7th ARRL
- * Computer Networking Conference papers. The diagrams have mistakes in them,
- * but are mostly correct. Before you modify the code could you read the SDL
- * diagrams as the code is not obvious and probably very easy to break.
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-
-/*
- *	State machine for state 1, Awaiting Connection State.
- *	The handling of the timer(s) is in file ax25_std_timer.c.
- *	Handling of state 0 and connection release is in ax25.c.
- */
-static int ax25_std_state1_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int pf, int type)
-{
-	switch (frametype) {
-	case AX25_SABM:
-		ax25->modulus = AX25_MODULUS;
-		ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
-		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
-		break;
-
-	case AX25_SABME:
-		ax25->modulus = AX25_EMODULUS;
-		ax25->window  = ax25->ax25_dev->values[AX25_VALUES_EWINDOW];
-		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
-		break;
-
-	case AX25_DISC:
-		ax25_send_control(ax25, AX25_DM, pf, AX25_RESPONSE);
-		break;
-
-	case AX25_UA:
-		if (pf) {
-			ax25_calculate_rtt(ax25);
-			ax25_stop_t1timer(ax25);
-			ax25_start_t3timer(ax25);
-			ax25_start_idletimer(ax25);
-			ax25->vs      = 0;
-			ax25->va      = 0;
-			ax25->vr      = 0;
-			ax25->state   = AX25_STATE_3;
-			ax25->n2count = 0;
-			if (ax25->sk != NULL) {
-				bh_lock_sock(ax25->sk);
-				ax25->sk->sk_state = TCP_ESTABLISHED;
-				/* For WAIT_SABM connections we will produce an accept ready socket here */
-				if (!sock_flag(ax25->sk, SOCK_DEAD))
-					ax25->sk->sk_state_change(ax25->sk);
-				bh_unlock_sock(ax25->sk);
-			}
-		}
-		break;
-
-	case AX25_DM:
-		if (pf) {
-			if (ax25->modulus == AX25_MODULUS) {
-				ax25_disconnect(ax25, ECONNREFUSED);
-			} else {
-				ax25->modulus = AX25_MODULUS;
-				ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
-			}
-		}
-		break;
-
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-/*
- *	State machine for state 2, Awaiting Release State.
- *	The handling of the timer(s) is in file ax25_std_timer.c
- *	Handling of state 0 and connection release is in ax25.c.
- */
-static int ax25_std_state2_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int pf, int type)
-{
-	switch (frametype) {
-	case AX25_SABM:
-	case AX25_SABME:
-		ax25_send_control(ax25, AX25_DM, pf, AX25_RESPONSE);
-		break;
-
-	case AX25_DISC:
-		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
-		ax25_disconnect(ax25, 0);
-		break;
-
-	case AX25_DM:
-	case AX25_UA:
-		if (pf)
-			ax25_disconnect(ax25, 0);
-		break;
-
-	case AX25_I:
-	case AX25_REJ:
-	case AX25_RNR:
-	case AX25_RR:
-		if (pf) ax25_send_control(ax25, AX25_DM, AX25_POLLON, AX25_RESPONSE);
-		break;
-
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-/*
- *	State machine for state 3, Connected State.
- *	The handling of the timer(s) is in file ax25_std_timer.c
- *	Handling of state 0 and connection release is in ax25.c.
- */
-static int ax25_std_state3_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int ns, int nr, int pf, int type)
-{
-	int queued = 0;
-
-	switch (frametype) {
-	case AX25_SABM:
-	case AX25_SABME:
-		if (frametype == AX25_SABM) {
-			ax25->modulus = AX25_MODULUS;
-			ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
-		} else {
-			ax25->modulus = AX25_EMODULUS;
-			ax25->window  = ax25->ax25_dev->values[AX25_VALUES_EWINDOW];
-		}
-		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
-		ax25_stop_t1timer(ax25);
-		ax25_stop_t2timer(ax25);
-		ax25_start_t3timer(ax25);
-		ax25_start_idletimer(ax25);
-		ax25->condition = 0x00;
-		ax25->vs        = 0;
-		ax25->va        = 0;
-		ax25->vr        = 0;
-		ax25_requeue_frames(ax25);
-		break;
-
-	case AX25_DISC:
-		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
-		ax25_disconnect(ax25, 0);
-		break;
-
-	case AX25_DM:
-		ax25_disconnect(ax25, ECONNRESET);
-		break;
-
-	case AX25_RR:
-	case AX25_RNR:
-		if (frametype == AX25_RR)
-			ax25->condition &= ~AX25_COND_PEER_RX_BUSY;
-		else
-			ax25->condition |= AX25_COND_PEER_RX_BUSY;
-		if (type == AX25_COMMAND && pf)
-			ax25_std_enquiry_response(ax25);
-		if (ax25_validate_nr(ax25, nr)) {
-			ax25_check_iframes_acked(ax25, nr);
-		} else {
-			ax25_std_nr_error_recovery(ax25);
-			ax25->state = AX25_STATE_1;
-		}
-		break;
-
-	case AX25_REJ:
-		ax25->condition &= ~AX25_COND_PEER_RX_BUSY;
-		if (type == AX25_COMMAND && pf)
-			ax25_std_enquiry_response(ax25);
-		if (ax25_validate_nr(ax25, nr)) {
-			ax25_frames_acked(ax25, nr);
-			ax25_calculate_rtt(ax25);
-			ax25_stop_t1timer(ax25);
-			ax25_start_t3timer(ax25);
-			ax25_requeue_frames(ax25);
-		} else {
-			ax25_std_nr_error_recovery(ax25);
-			ax25->state = AX25_STATE_1;
-		}
-		break;
-
-	case AX25_I:
-		if (!ax25_validate_nr(ax25, nr)) {
-			ax25_std_nr_error_recovery(ax25);
-			ax25->state = AX25_STATE_1;
-			break;
-		}
-		if (ax25->condition & AX25_COND_PEER_RX_BUSY) {
-			ax25_frames_acked(ax25, nr);
-		} else {
-			ax25_check_iframes_acked(ax25, nr);
-		}
-		if (ax25->condition & AX25_COND_OWN_RX_BUSY) {
-			if (pf) ax25_std_enquiry_response(ax25);
-			break;
-		}
-		if (ns == ax25->vr) {
-			ax25->vr = (ax25->vr + 1) % ax25->modulus;
-			queued = ax25_rx_iframe(ax25, skb);
-			if (ax25->condition & AX25_COND_OWN_RX_BUSY)
-				ax25->vr = ns;	/* ax25->vr - 1 */
-			ax25->condition &= ~AX25_COND_REJECT;
-			if (pf) {
-				ax25_std_enquiry_response(ax25);
-			} else {
-				if (!(ax25->condition & AX25_COND_ACK_PENDING)) {
-					ax25->condition |= AX25_COND_ACK_PENDING;
-					ax25_start_t2timer(ax25);
-				}
-			}
-		} else {
-			if (ax25->condition & AX25_COND_REJECT) {
-				if (pf) ax25_std_enquiry_response(ax25);
-			} else {
-				ax25->condition |= AX25_COND_REJECT;
-				ax25_send_control(ax25, AX25_REJ, pf, AX25_RESPONSE);
-				ax25->condition &= ~AX25_COND_ACK_PENDING;
-			}
-		}
-		break;
-
-	case AX25_FRMR:
-	case AX25_ILLEGAL:
-		ax25_std_establish_data_link(ax25);
-		ax25->state = AX25_STATE_1;
-		break;
-
-	default:
-		break;
-	}
-
-	return queued;
-}
-
-/*
- *	State machine for state 4, Timer Recovery State.
- *	The handling of the timer(s) is in file ax25_std_timer.c
- *	Handling of state 0 and connection release is in ax25.c.
- */
-static int ax25_std_state4_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int ns, int nr, int pf, int type)
-{
-	int queued = 0;
-
-	switch (frametype) {
-	case AX25_SABM:
-	case AX25_SABME:
-		if (frametype == AX25_SABM) {
-			ax25->modulus = AX25_MODULUS;
-			ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
-		} else {
-			ax25->modulus = AX25_EMODULUS;
-			ax25->window  = ax25->ax25_dev->values[AX25_VALUES_EWINDOW];
-		}
-		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
-		ax25_stop_t1timer(ax25);
-		ax25_stop_t2timer(ax25);
-		ax25_start_t3timer(ax25);
-		ax25_start_idletimer(ax25);
-		ax25->condition = 0x00;
-		ax25->vs        = 0;
-		ax25->va        = 0;
-		ax25->vr        = 0;
-		ax25->state     = AX25_STATE_3;
-		ax25->n2count   = 0;
-		ax25_requeue_frames(ax25);
-		break;
-
-	case AX25_DISC:
-		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
-		ax25_disconnect(ax25, 0);
-		break;
-
-	case AX25_DM:
-		ax25_disconnect(ax25, ECONNRESET);
-		break;
-
-	case AX25_RR:
-	case AX25_RNR:
-		if (frametype == AX25_RR)
-			ax25->condition &= ~AX25_COND_PEER_RX_BUSY;
-		else
-			ax25->condition |= AX25_COND_PEER_RX_BUSY;
-		if (type == AX25_RESPONSE && pf) {
-			ax25_stop_t1timer(ax25);
-			ax25->n2count = 0;
-			if (ax25_validate_nr(ax25, nr)) {
-				ax25_frames_acked(ax25, nr);
-				if (ax25->vs == ax25->va) {
-					ax25_start_t3timer(ax25);
-					ax25->state   = AX25_STATE_3;
-				} else {
-					ax25_requeue_frames(ax25);
-				}
-			} else {
-				ax25_std_nr_error_recovery(ax25);
-				ax25->state = AX25_STATE_1;
-			}
-			break;
-		}
-		if (type == AX25_COMMAND && pf)
-			ax25_std_enquiry_response(ax25);
-		if (ax25_validate_nr(ax25, nr)) {
-			ax25_frames_acked(ax25, nr);
-		} else {
-			ax25_std_nr_error_recovery(ax25);
-			ax25->state = AX25_STATE_1;
-		}
-		break;
-
-	case AX25_REJ:
-		ax25->condition &= ~AX25_COND_PEER_RX_BUSY;
-		if (pf && type == AX25_RESPONSE) {
-			ax25_stop_t1timer(ax25);
-			ax25->n2count = 0;
-			if (ax25_validate_nr(ax25, nr)) {
-				ax25_frames_acked(ax25, nr);
-				if (ax25->vs == ax25->va) {
-					ax25_start_t3timer(ax25);
-					ax25->state   = AX25_STATE_3;
-				} else {
-					ax25_requeue_frames(ax25);
-				}
-			} else {
-				ax25_std_nr_error_recovery(ax25);
-				ax25->state = AX25_STATE_1;
-			}
-			break;
-		}
-		if (type == AX25_COMMAND && pf)
-			ax25_std_enquiry_response(ax25);
-		if (ax25_validate_nr(ax25, nr)) {
-			ax25_frames_acked(ax25, nr);
-			ax25_requeue_frames(ax25);
-		} else {
-			ax25_std_nr_error_recovery(ax25);
-			ax25->state = AX25_STATE_1;
-		}
-		break;
-
-	case AX25_I:
-		if (!ax25_validate_nr(ax25, nr)) {
-			ax25_std_nr_error_recovery(ax25);
-			ax25->state = AX25_STATE_1;
-			break;
-		}
-		ax25_frames_acked(ax25, nr);
-		if (ax25->condition & AX25_COND_OWN_RX_BUSY) {
-			if (pf)
-				ax25_std_enquiry_response(ax25);
-			break;
-		}
-		if (ns == ax25->vr) {
-			ax25->vr = (ax25->vr + 1) % ax25->modulus;
-			queued = ax25_rx_iframe(ax25, skb);
-			if (ax25->condition & AX25_COND_OWN_RX_BUSY)
-				ax25->vr = ns;	/* ax25->vr - 1 */
-			ax25->condition &= ~AX25_COND_REJECT;
-			if (pf) {
-				ax25_std_enquiry_response(ax25);
-			} else {
-				if (!(ax25->condition & AX25_COND_ACK_PENDING)) {
-					ax25->condition |= AX25_COND_ACK_PENDING;
-					ax25_start_t2timer(ax25);
-				}
-			}
-		} else {
-			if (ax25->condition & AX25_COND_REJECT) {
-				if (pf) ax25_std_enquiry_response(ax25);
-			} else {
-				ax25->condition |= AX25_COND_REJECT;
-				ax25_send_control(ax25, AX25_REJ, pf, AX25_RESPONSE);
-				ax25->condition &= ~AX25_COND_ACK_PENDING;
-			}
-		}
-		break;
-
-	case AX25_FRMR:
-	case AX25_ILLEGAL:
-		ax25_std_establish_data_link(ax25);
-		ax25->state = AX25_STATE_1;
-		break;
-
-	default:
-		break;
-	}
-
-	return queued;
-}
-
-/*
- *	Higher level upcall for a LAPB frame
- */
-int ax25_std_frame_in(ax25_cb *ax25, struct sk_buff *skb, int type)
-{
-	int queued = 0, frametype, ns, nr, pf;
-
-	frametype = ax25_decode(ax25, skb, &ns, &nr, &pf);
-
-	switch (ax25->state) {
-	case AX25_STATE_1:
-		queued = ax25_std_state1_machine(ax25, skb, frametype, pf, type);
-		break;
-	case AX25_STATE_2:
-		queued = ax25_std_state2_machine(ax25, skb, frametype, pf, type);
-		break;
-	case AX25_STATE_3:
-		queued = ax25_std_state3_machine(ax25, skb, frametype, ns, nr, pf, type);
-		break;
-	case AX25_STATE_4:
-		queued = ax25_std_state4_machine(ax25, skb, frametype, ns, nr, pf, type);
-		break;
-	}
-
-	ax25_kick(ax25);
-
-	return queued;
-}
diff --git a/net/ax25/ax25_std_subr.c b/net/ax25/ax25_std_subr.c
deleted file mode 100644
index 4c36f1342558..000000000000
--- a/net/ax25/ax25_std_subr.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-
-/*
- * The following routines are taken from page 170 of the 7th ARRL Computer
- * Networking Conference paper, as is the whole state machine.
- */
-
-void ax25_std_nr_error_recovery(ax25_cb *ax25)
-{
-	ax25_std_establish_data_link(ax25);
-}
-
-void ax25_std_establish_data_link(ax25_cb *ax25)
-{
-	ax25->condition = 0x00;
-	ax25->n2count   = 0;
-
-	if (ax25->modulus == AX25_MODULUS)
-		ax25_send_control(ax25, AX25_SABM, AX25_POLLON, AX25_COMMAND);
-	else
-		ax25_send_control(ax25, AX25_SABME, AX25_POLLON, AX25_COMMAND);
-
-	ax25_calculate_t1(ax25);
-	ax25_stop_idletimer(ax25);
-	ax25_stop_t3timer(ax25);
-	ax25_stop_t2timer(ax25);
-	ax25_start_t1timer(ax25);
-}
-
-void ax25_std_transmit_enquiry(ax25_cb *ax25)
-{
-	if (ax25->condition & AX25_COND_OWN_RX_BUSY)
-		ax25_send_control(ax25, AX25_RNR, AX25_POLLON, AX25_COMMAND);
-	else
-		ax25_send_control(ax25, AX25_RR, AX25_POLLON, AX25_COMMAND);
-
-	ax25->condition &= ~AX25_COND_ACK_PENDING;
-
-	ax25_calculate_t1(ax25);
-	ax25_start_t1timer(ax25);
-}
-
-void ax25_std_enquiry_response(ax25_cb *ax25)
-{
-	if (ax25->condition & AX25_COND_OWN_RX_BUSY)
-		ax25_send_control(ax25, AX25_RNR, AX25_POLLON, AX25_RESPONSE);
-	else
-		ax25_send_control(ax25, AX25_RR, AX25_POLLON, AX25_RESPONSE);
-
-	ax25->condition &= ~AX25_COND_ACK_PENDING;
-}
-
-void ax25_std_timeout_response(ax25_cb *ax25)
-{
-	if (ax25->condition & AX25_COND_OWN_RX_BUSY)
-		ax25_send_control(ax25, AX25_RNR, AX25_POLLOFF, AX25_RESPONSE);
-	else
-		ax25_send_control(ax25, AX25_RR, AX25_POLLOFF, AX25_RESPONSE);
-
-	ax25->condition &= ~AX25_COND_ACK_PENDING;
-}
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
deleted file mode 100644
index b17da41210cb..000000000000
--- a/net/ax25/ax25_std_timer.c
+++ /dev/null
@@ -1,175 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
- * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-
-void ax25_std_heartbeat_expiry(ax25_cb *ax25)
-{
-	struct sock *sk = ax25->sk;
-
-	if (sk)
-		bh_lock_sock(sk);
-
-	switch (ax25->state) {
-	case AX25_STATE_0:
-	case AX25_STATE_2:
-		/* Magic here: If we listen() and a new link dies before it
-		   is accepted() it isn't 'dead' so doesn't get removed. */
-		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
-		    (sk->sk_state == TCP_LISTEN &&
-		     sock_flag(sk, SOCK_DEAD))) {
-			if (sk) {
-				sock_hold(sk);
-				ax25_destroy_socket(ax25);
-				bh_unlock_sock(sk);
-				/* Ungrab socket and destroy it */
-				sock_put(sk);
-			} else
-				ax25_destroy_socket(ax25);
-			return;
-		}
-		break;
-
-	case AX25_STATE_3:
-	case AX25_STATE_4:
-		/*
-		 * Check the state of the receive buffer.
-		 */
-		if (sk != NULL) {
-			if (atomic_read(&sk->sk_rmem_alloc) <
-			    (sk->sk_rcvbuf >> 1) &&
-			    (ax25->condition & AX25_COND_OWN_RX_BUSY)) {
-				ax25->condition &= ~AX25_COND_OWN_RX_BUSY;
-				ax25->condition &= ~AX25_COND_ACK_PENDING;
-				ax25_send_control(ax25, AX25_RR, AX25_POLLOFF, AX25_RESPONSE);
-				break;
-			}
-		}
-	}
-
-	if (sk)
-		bh_unlock_sock(sk);
-
-	ax25_start_heartbeat(ax25);
-}
-
-void ax25_std_t2timer_expiry(ax25_cb *ax25)
-{
-	if (ax25->condition & AX25_COND_ACK_PENDING) {
-		ax25->condition &= ~AX25_COND_ACK_PENDING;
-		ax25_std_timeout_response(ax25);
-	}
-}
-
-void ax25_std_t3timer_expiry(ax25_cb *ax25)
-{
-	ax25->n2count = 0;
-	ax25_std_transmit_enquiry(ax25);
-	ax25->state   = AX25_STATE_4;
-}
-
-void ax25_std_idletimer_expiry(ax25_cb *ax25)
-{
-	ax25_clear_queues(ax25);
-
-	ax25->n2count = 0;
-	ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-	ax25->state   = AX25_STATE_2;
-
-	ax25_calculate_t1(ax25);
-	ax25_start_t1timer(ax25);
-	ax25_stop_t2timer(ax25);
-	ax25_stop_t3timer(ax25);
-
-	if (ax25->sk != NULL) {
-		bh_lock_sock(ax25->sk);
-		ax25->sk->sk_state     = TCP_CLOSE;
-		ax25->sk->sk_err       = 0;
-		ax25->sk->sk_shutdown |= SEND_SHUTDOWN;
-		if (!sock_flag(ax25->sk, SOCK_DEAD)) {
-			ax25->sk->sk_state_change(ax25->sk);
-			sock_set_flag(ax25->sk, SOCK_DEAD);
-		}
-		bh_unlock_sock(ax25->sk);
-	}
-}
-
-void ax25_std_t1timer_expiry(ax25_cb *ax25)
-{
-	switch (ax25->state) {
-	case AX25_STATE_1:
-		if (ax25->n2count == ax25->n2) {
-			if (ax25->modulus == AX25_MODULUS) {
-				ax25_disconnect(ax25, ETIMEDOUT);
-				return;
-			} else {
-				ax25->modulus = AX25_MODULUS;
-				ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
-				ax25->n2count = 0;
-				ax25_send_control(ax25, AX25_SABM, AX25_POLLON, AX25_COMMAND);
-			}
-		} else {
-			ax25->n2count++;
-			if (ax25->modulus == AX25_MODULUS)
-				ax25_send_control(ax25, AX25_SABM, AX25_POLLON, AX25_COMMAND);
-			else
-				ax25_send_control(ax25, AX25_SABME, AX25_POLLON, AX25_COMMAND);
-		}
-		break;
-
-	case AX25_STATE_2:
-		if (ax25->n2count == ax25->n2) {
-			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-			if (!sock_flag(ax25->sk, SOCK_DESTROY))
-				ax25_disconnect(ax25, ETIMEDOUT);
-			return;
-		} else {
-			ax25->n2count++;
-			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-		}
-		break;
-
-	case AX25_STATE_3:
-		ax25->n2count = 1;
-		ax25_std_transmit_enquiry(ax25);
-		ax25->state   = AX25_STATE_4;
-		break;
-
-	case AX25_STATE_4:
-		if (ax25->n2count == ax25->n2) {
-			ax25_send_control(ax25, AX25_DM, AX25_POLLON, AX25_RESPONSE);
-			ax25_disconnect(ax25, ETIMEDOUT);
-			return;
-		} else {
-			ax25->n2count++;
-			ax25_std_transmit_enquiry(ax25);
-		}
-		break;
-	}
-
-	ax25_calculate_t1(ax25);
-	ax25_start_t1timer(ax25);
-}
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
deleted file mode 100644
index bff4b203a893..000000000000
--- a/net/ax25/ax25_subr.c
+++ /dev/null
@@ -1,296 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
- * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-
-/*
- *	This routine purges all the queues of frames.
- */
-void ax25_clear_queues(ax25_cb *ax25)
-{
-	skb_queue_purge(&ax25->write_queue);
-	skb_queue_purge(&ax25->ack_queue);
-	skb_queue_purge(&ax25->reseq_queue);
-	skb_queue_purge(&ax25->frag_queue);
-}
-
-/*
- * This routine purges the input queue of those frames that have been
- * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the
- * SDL diagram.
- */
-void ax25_frames_acked(ax25_cb *ax25, unsigned short nr)
-{
-	struct sk_buff *skb;
-
-	/*
-	 * Remove all the ack-ed frames from the ack queue.
-	 */
-	if (ax25->va != nr) {
-		while (skb_peek(&ax25->ack_queue) != NULL && ax25->va != nr) {
-			skb = skb_dequeue(&ax25->ack_queue);
-			kfree_skb(skb);
-			ax25->va = (ax25->va + 1) % ax25->modulus;
-		}
-	}
-}
-
-void ax25_requeue_frames(ax25_cb *ax25)
-{
-	struct sk_buff *skb;
-
-	/*
-	 * Requeue all the un-ack-ed frames on the output queue to be picked
-	 * up by ax25_kick called from the timer. This arrangement handles the
-	 * possibility of an empty output queue.
-	 */
-	while ((skb = skb_dequeue_tail(&ax25->ack_queue)) != NULL)
-		skb_queue_head(&ax25->write_queue, skb);
-}
-
-/*
- *	Validate that the value of nr is between va and vs. Return true or
- *	false for testing.
- */
-int ax25_validate_nr(ax25_cb *ax25, unsigned short nr)
-{
-	unsigned short vc = ax25->va;
-
-	while (vc != ax25->vs) {
-		if (nr == vc) return 1;
-		vc = (vc + 1) % ax25->modulus;
-	}
-
-	if (nr == ax25->vs) return 1;
-
-	return 0;
-}
-
-/*
- *	This routine is the centralised routine for parsing the control
- *	information for the different frame formats.
- */
-int ax25_decode(ax25_cb *ax25, struct sk_buff *skb, int *ns, int *nr, int *pf)
-{
-	unsigned char *frame;
-	int frametype = AX25_ILLEGAL;
-
-	frame = skb->data;
-	*ns = *nr = *pf = 0;
-
-	if (ax25->modulus == AX25_MODULUS) {
-		if ((frame[0] & AX25_S) == 0) {
-			frametype = AX25_I;			/* I frame - carries NR/NS/PF */
-			*ns = (frame[0] >> 1) & 0x07;
-			*nr = (frame[0] >> 5) & 0x07;
-			*pf = frame[0] & AX25_PF;
-		} else if ((frame[0] & AX25_U) == 1) { 	/* S frame - take out PF/NR */
-			frametype = frame[0] & 0x0F;
-			*nr = (frame[0] >> 5) & 0x07;
-			*pf = frame[0] & AX25_PF;
-		} else if ((frame[0] & AX25_U) == 3) { 	/* U frame - take out PF */
-			frametype = frame[0] & ~AX25_PF;
-			*pf = frame[0] & AX25_PF;
-		}
-		skb_pull(skb, 1);
-	} else {
-		if ((frame[0] & AX25_S) == 0) {
-			frametype = AX25_I;			/* I frame - carries NR/NS/PF */
-			*ns = (frame[0] >> 1) & 0x7F;
-			*nr = (frame[1] >> 1) & 0x7F;
-			*pf = frame[1] & AX25_EPF;
-			skb_pull(skb, 2);
-		} else if ((frame[0] & AX25_U) == 1) { 	/* S frame - take out PF/NR */
-			frametype = frame[0] & 0x0F;
-			*nr = (frame[1] >> 1) & 0x7F;
-			*pf = frame[1] & AX25_EPF;
-			skb_pull(skb, 2);
-		} else if ((frame[0] & AX25_U) == 3) { 	/* U frame - take out PF */
-			frametype = frame[0] & ~AX25_PF;
-			*pf = frame[0] & AX25_PF;
-			skb_pull(skb, 1);
-		}
-	}
-
-	return frametype;
-}
-
-/*
- *	This routine is called when the HDLC layer internally  generates a
- *	command or  response  for  the remote machine ( eg. RR, UA etc. ).
- *	Only supervisory or unnumbered frames are processed.
- */
-void ax25_send_control(ax25_cb *ax25, int frametype, int poll_bit, int type)
-{
-	struct sk_buff *skb;
-	unsigned char  *dptr;
-
-	if ((skb = alloc_skb(ax25->ax25_dev->dev->hard_header_len + 2, GFP_ATOMIC)) == NULL)
-		return;
-
-	skb_reserve(skb, ax25->ax25_dev->dev->hard_header_len);
-
-	skb_reset_network_header(skb);
-
-	/* Assume a response - address structure for DTE */
-	if (ax25->modulus == AX25_MODULUS) {
-		dptr = skb_put(skb, 1);
-		*dptr = frametype;
-		*dptr |= (poll_bit) ? AX25_PF : 0;
-		if ((frametype & AX25_U) == AX25_S)		/* S frames carry NR */
-			*dptr |= (ax25->vr << 5);
-	} else {
-		if ((frametype & AX25_U) == AX25_U) {
-			dptr = skb_put(skb, 1);
-			*dptr = frametype;
-			*dptr |= (poll_bit) ? AX25_PF : 0;
-		} else {
-			dptr = skb_put(skb, 2);
-			dptr[0] = frametype;
-			dptr[1] = (ax25->vr << 1);
-			dptr[1] |= (poll_bit) ? AX25_EPF : 0;
-		}
-	}
-
-	ax25_transmit_buffer(ax25, skb, type);
-}
-
-/*
- *	Send a 'DM' to an unknown connection attempt, or an invalid caller.
- *
- *	Note: src here is the sender, thus it's the target of the DM
- */
-void ax25_return_dm(struct net_device *dev, ax25_address *src, ax25_address *dest, ax25_digi *digi)
-{
-	struct sk_buff *skb;
-	char *dptr;
-	ax25_digi retdigi;
-
-	if (dev == NULL)
-		return;
-
-	if ((skb = alloc_skb(dev->hard_header_len + 1, GFP_ATOMIC)) == NULL)
-		return;	/* Next SABM will get DM'd */
-
-	skb_reserve(skb, dev->hard_header_len);
-	skb_reset_network_header(skb);
-
-	ax25_digi_invert(digi, &retdigi);
-
-	dptr = skb_put(skb, 1);
-
-	*dptr = AX25_DM | AX25_PF;
-
-	/*
-	 *	Do the address ourselves
-	 */
-	dptr  = skb_push(skb, ax25_addr_size(digi));
-	dptr += ax25_addr_build(dptr, dest, src, &retdigi, AX25_RESPONSE, AX25_MODULUS);
-
-	ax25_queue_xmit(skb, dev);
-}
-
-/*
- *	Exponential backoff for AX.25
- */
-void ax25_calculate_t1(ax25_cb *ax25)
-{
-	int n, t = 2;
-
-	switch (ax25->backoff) {
-	case 0:
-		break;
-
-	case 1:
-		t += 2 * ax25->n2count;
-		break;
-
-	case 2:
-		for (n = 0; n < ax25->n2count; n++)
-			t *= 2;
-		if (t > 8) t = 8;
-		break;
-	}
-
-	ax25->t1 = t * ax25->rtt;
-}
-
-/*
- *	Calculate the Round Trip Time
- */
-void ax25_calculate_rtt(ax25_cb *ax25)
-{
-	if (ax25->backoff == 0)
-		return;
-
-	if (ax25_t1timer_running(ax25) && ax25->n2count == 0)
-		ax25->rtt = (9 * ax25->rtt + ax25->t1 - ax25_display_timer(&ax25->t1timer)) / 10;
-
-	if (ax25->rtt < AX25_T1CLAMPLO)
-		ax25->rtt = AX25_T1CLAMPLO;
-
-	if (ax25->rtt > AX25_T1CLAMPHI)
-		ax25->rtt = AX25_T1CLAMPHI;
-}
-
-void ax25_disconnect(ax25_cb *ax25, int reason)
-{
-	ax25_clear_queues(ax25);
-
-	if (reason == ENETUNREACH) {
-		timer_delete_sync(&ax25->timer);
-		timer_delete_sync(&ax25->t1timer);
-		timer_delete_sync(&ax25->t2timer);
-		timer_delete_sync(&ax25->t3timer);
-		timer_delete_sync(&ax25->idletimer);
-	} else {
-		if (ax25->sk && !sock_flag(ax25->sk, SOCK_DESTROY))
-			ax25_stop_heartbeat(ax25);
-		ax25_stop_t1timer(ax25);
-		ax25_stop_t2timer(ax25);
-		ax25_stop_t3timer(ax25);
-		ax25_stop_idletimer(ax25);
-	}
-
-	ax25->state = AX25_STATE_0;
-
-	ax25_link_failed(ax25, reason);
-
-	if (ax25->sk != NULL) {
-		local_bh_disable();
-		bh_lock_sock(ax25->sk);
-		ax25->sk->sk_state     = TCP_CLOSE;
-		ax25->sk->sk_err       = reason;
-		ax25->sk->sk_shutdown |= SEND_SHUTDOWN;
-		if (!sock_flag(ax25->sk, SOCK_DEAD)) {
-			ax25->sk->sk_state_change(ax25->sk);
-			sock_set_flag(ax25->sk, SOCK_DEAD);
-		}
-		bh_unlock_sock(ax25->sk);
-		local_bh_enable();
-	}
-}
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
deleted file mode 100644
index a69bfbc8b679..000000000000
--- a/net/ax25/ax25_timer.c
+++ /dev/null
@@ -1,224 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) Tomi Manninen OH2BNS (oh2bns@sral.fi)
- * Copyright (C) Darryl Miles G7LED (dlm@g7led.demon.co.uk)
- * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
- * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
- * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/jiffies.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-
-static void ax25_heartbeat_expiry(struct timer_list *);
-static void ax25_t1timer_expiry(struct timer_list *);
-static void ax25_t2timer_expiry(struct timer_list *);
-static void ax25_t3timer_expiry(struct timer_list *);
-static void ax25_idletimer_expiry(struct timer_list *);
-
-void ax25_setup_timers(ax25_cb *ax25)
-{
-	timer_setup(&ax25->timer, ax25_heartbeat_expiry, 0);
-	timer_setup(&ax25->t1timer, ax25_t1timer_expiry, 0);
-	timer_setup(&ax25->t2timer, ax25_t2timer_expiry, 0);
-	timer_setup(&ax25->t3timer, ax25_t3timer_expiry, 0);
-	timer_setup(&ax25->idletimer, ax25_idletimer_expiry, 0);
-}
-
-void ax25_start_heartbeat(ax25_cb *ax25)
-{
-	mod_timer(&ax25->timer, jiffies + 5 * HZ);
-}
-
-void ax25_start_t1timer(ax25_cb *ax25)
-{
-	mod_timer(&ax25->t1timer, jiffies + ax25->t1);
-}
-
-void ax25_start_t2timer(ax25_cb *ax25)
-{
-	mod_timer(&ax25->t2timer, jiffies + ax25->t2);
-}
-
-void ax25_start_t3timer(ax25_cb *ax25)
-{
-	if (ax25->t3 > 0)
-		mod_timer(&ax25->t3timer, jiffies + ax25->t3);
-	else
-		timer_delete(&ax25->t3timer);
-}
-
-void ax25_start_idletimer(ax25_cb *ax25)
-{
-	if (ax25->idle > 0)
-		mod_timer(&ax25->idletimer, jiffies + ax25->idle);
-	else
-		timer_delete(&ax25->idletimer);
-}
-
-void ax25_stop_heartbeat(ax25_cb *ax25)
-{
-	timer_delete(&ax25->timer);
-}
-
-void ax25_stop_t1timer(ax25_cb *ax25)
-{
-	timer_delete(&ax25->t1timer);
-}
-
-void ax25_stop_t2timer(ax25_cb *ax25)
-{
-	timer_delete(&ax25->t2timer);
-}
-
-void ax25_stop_t3timer(ax25_cb *ax25)
-{
-	timer_delete(&ax25->t3timer);
-}
-
-void ax25_stop_idletimer(ax25_cb *ax25)
-{
-	timer_delete(&ax25->idletimer);
-}
-
-int ax25_t1timer_running(ax25_cb *ax25)
-{
-	return timer_pending(&ax25->t1timer);
-}
-
-unsigned long ax25_display_timer(struct timer_list *timer)
-{
-	long delta = timer->expires - jiffies;
-
-	if (!timer_pending(timer))
-		return 0;
-
-	return max(0L, delta);
-}
-
-EXPORT_SYMBOL(ax25_display_timer);
-
-static void ax25_heartbeat_expiry(struct timer_list *t)
-{
-	int proto = AX25_PROTO_STD_SIMPLEX;
-	ax25_cb *ax25 = timer_container_of(ax25, t, timer);
-
-	if (ax25->ax25_dev)
-		proto = ax25->ax25_dev->values[AX25_VALUES_PROTOCOL];
-
-	switch (proto) {
-	case AX25_PROTO_STD_SIMPLEX:
-	case AX25_PROTO_STD_DUPLEX:
-		ax25_std_heartbeat_expiry(ax25);
-		break;
-
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	case AX25_PROTO_DAMA_SLAVE:
-		if (ax25->ax25_dev->dama.slave)
-			ax25_ds_heartbeat_expiry(ax25);
-		else
-			ax25_std_heartbeat_expiry(ax25);
-		break;
-#endif
-	}
-}
-
-static void ax25_t1timer_expiry(struct timer_list *t)
-{
-	ax25_cb *ax25 = timer_container_of(ax25, t, t1timer);
-
-	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
-	case AX25_PROTO_STD_SIMPLEX:
-	case AX25_PROTO_STD_DUPLEX:
-		ax25_std_t1timer_expiry(ax25);
-		break;
-
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	case AX25_PROTO_DAMA_SLAVE:
-		if (!ax25->ax25_dev->dama.slave)
-			ax25_std_t1timer_expiry(ax25);
-		break;
-#endif
-	}
-}
-
-static void ax25_t2timer_expiry(struct timer_list *t)
-{
-	ax25_cb *ax25 = timer_container_of(ax25, t, t2timer);
-
-	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
-	case AX25_PROTO_STD_SIMPLEX:
-	case AX25_PROTO_STD_DUPLEX:
-		ax25_std_t2timer_expiry(ax25);
-		break;
-
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	case AX25_PROTO_DAMA_SLAVE:
-		if (!ax25->ax25_dev->dama.slave)
-			ax25_std_t2timer_expiry(ax25);
-		break;
-#endif
-	}
-}
-
-static void ax25_t3timer_expiry(struct timer_list *t)
-{
-	ax25_cb *ax25 = timer_container_of(ax25, t, t3timer);
-
-	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
-	case AX25_PROTO_STD_SIMPLEX:
-	case AX25_PROTO_STD_DUPLEX:
-		ax25_std_t3timer_expiry(ax25);
-		break;
-
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	case AX25_PROTO_DAMA_SLAVE:
-		if (ax25->ax25_dev->dama.slave)
-			ax25_ds_t3timer_expiry(ax25);
-		else
-			ax25_std_t3timer_expiry(ax25);
-		break;
-#endif
-	}
-}
-
-static void ax25_idletimer_expiry(struct timer_list *t)
-{
-	ax25_cb *ax25 = timer_container_of(ax25, t, idletimer);
-
-	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
-	case AX25_PROTO_STD_SIMPLEX:
-	case AX25_PROTO_STD_DUPLEX:
-		ax25_std_idletimer_expiry(ax25);
-		break;
-
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	case AX25_PROTO_DAMA_SLAVE:
-		if (ax25->ax25_dev->dama.slave)
-			ax25_ds_idletimer_expiry(ax25);
-		else
-			ax25_std_idletimer_expiry(ax25);
-		break;
-#endif
-	}
-}
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
deleted file mode 100644
index 159ce74273f0..000000000000
--- a/net/ax25/ax25_uid.c
+++ /dev/null
@@ -1,204 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- */
-
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/list.h>
-#include <linux/notifier.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/stat.h>
-#include <linux/sysctl.h>
-#include <linux/export.h>
-#include <net/ip.h>
-#include <net/arp.h>
-
-/*
- *	Callsign/UID mapper. This is in kernel space for security on multi-amateur machines.
- */
-
-static HLIST_HEAD(ax25_uid_list);
-static DEFINE_RWLOCK(ax25_uid_lock);
-
-int ax25_uid_policy;
-
-EXPORT_SYMBOL(ax25_uid_policy);
-
-ax25_uid_assoc *ax25_findbyuid(kuid_t uid)
-{
-	ax25_uid_assoc *ax25_uid, *res = NULL;
-
-	read_lock(&ax25_uid_lock);
-	ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
-		if (uid_eq(ax25_uid->uid, uid)) {
-			ax25_uid_hold(ax25_uid);
-			res = ax25_uid;
-			break;
-		}
-	}
-	read_unlock(&ax25_uid_lock);
-
-	return res;
-}
-
-EXPORT_SYMBOL(ax25_findbyuid);
-
-int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
-{
-	ax25_uid_assoc *ax25_uid;
-	ax25_uid_assoc *user;
-	unsigned long res;
-
-	switch (cmd) {
-	case SIOCAX25GETUID:
-		res = -ENOENT;
-		read_lock(&ax25_uid_lock);
-		ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
-			if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) {
-				res = from_kuid_munged(current_user_ns(), ax25_uid->uid);
-				break;
-			}
-		}
-		read_unlock(&ax25_uid_lock);
-
-		return res;
-
-	case SIOCAX25ADDUID:
-	{
-		kuid_t sax25_kuid;
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-		sax25_kuid = make_kuid(current_user_ns(), sax->sax25_uid);
-		if (!uid_valid(sax25_kuid))
-			return -EINVAL;
-		user = ax25_findbyuid(sax25_kuid);
-		if (user) {
-			ax25_uid_put(user);
-			return -EEXIST;
-		}
-		if (sax->sax25_uid == 0)
-			return -EINVAL;
-		if ((ax25_uid = kmalloc_obj(*ax25_uid)) == NULL)
-			return -ENOMEM;
-
-		refcount_set(&ax25_uid->refcount, 1);
-		ax25_uid->uid  = sax25_kuid;
-		ax25_uid->call = sax->sax25_call;
-
-		write_lock(&ax25_uid_lock);
-		hlist_add_head(&ax25_uid->uid_node, &ax25_uid_list);
-		write_unlock(&ax25_uid_lock);
-
-		return 0;
-	}
-	case SIOCAX25DELUID:
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-
-		ax25_uid = NULL;
-		write_lock(&ax25_uid_lock);
-		ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
-			if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0)
-				break;
-		}
-		if (ax25_uid == NULL) {
-			write_unlock(&ax25_uid_lock);
-			return -ENOENT;
-		}
-		hlist_del_init(&ax25_uid->uid_node);
-		ax25_uid_put(ax25_uid);
-		write_unlock(&ax25_uid_lock);
-
-		return 0;
-
-	default:
-		return -EINVAL;
-	}
-
-	return -EINVAL;	/*NOTREACHED */
-}
-
-#ifdef CONFIG_PROC_FS
-
-static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(ax25_uid_lock)
-{
-	read_lock(&ax25_uid_lock);
-	return seq_hlist_start_head(&ax25_uid_list, *pos);
-}
-
-static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	return seq_hlist_next(v, &ax25_uid_list, pos);
-}
-
-static void ax25_uid_seq_stop(struct seq_file *seq, void *v)
-	__releases(ax25_uid_lock)
-{
-	read_unlock(&ax25_uid_lock);
-}
-
-static int ax25_uid_seq_show(struct seq_file *seq, void *v)
-{
-	char buf[11];
-
-	if (v == SEQ_START_TOKEN)
-		seq_printf(seq, "Policy: %d\n", ax25_uid_policy);
-	else {
-		struct ax25_uid_assoc *pt;
-
-		pt = hlist_entry(v, struct ax25_uid_assoc, uid_node);
-		seq_printf(seq, "%6d %s\n",
-			from_kuid_munged(seq_user_ns(seq), pt->uid),
-			ax2asc(buf, &pt->call));
-	}
-	return 0;
-}
-
-const struct seq_operations ax25_uid_seqops = {
-	.start = ax25_uid_seq_start,
-	.next = ax25_uid_seq_next,
-	.stop = ax25_uid_seq_stop,
-	.show = ax25_uid_seq_show,
-};
-#endif
-
-/*
- *	Free all memory associated with UID/Callsign structures.
- */
-void __exit ax25_uid_free(void)
-{
-	ax25_uid_assoc *ax25_uid;
-
-	write_lock(&ax25_uid_lock);
-again:
-	ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
-		hlist_del_init(&ax25_uid->uid_node);
-		ax25_uid_put(ax25_uid);
-		goto again;
-	}
-	write_unlock(&ax25_uid_lock);
-}
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
deleted file mode 100644
index 68753aa30334..000000000000
--- a/net/ax25/sysctl_net_ax25.c
+++ /dev/null
@@ -1,181 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com)
- */
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/sysctl.h>
-#include <linux/spinlock.h>
-#include <net/ax25.h>
-
-static int min_ipdefmode[1],    	max_ipdefmode[] = {1};
-static int min_axdefmode[1],            max_axdefmode[] = {1};
-static int min_backoff[1],		max_backoff[] = {2};
-static int min_conmode[1],		max_conmode[] = {2};
-static int min_window[] = {1},		max_window[] = {7};
-static int min_ewindow[] = {1},		max_ewindow[] = {63};
-static int min_t1[] = {1},		max_t1[] = {30000};
-static int min_t2[] = {1},		max_t2[] = {20000};
-static int min_t3[1],			max_t3[] = {3600000};
-static int min_idle[1],			max_idle[] = {65535000};
-static int min_n2[] = {1},		max_n2[] = {31};
-static int min_paclen[] = {1},		max_paclen[] = {512};
-static int min_proto[1],		max_proto[] = { AX25_PROTO_MAX };
-#ifdef CONFIG_AX25_DAMA_SLAVE
-static int min_ds_timeout[1],		max_ds_timeout[] = {65535000};
-#endif
-
-static const struct ctl_table ax25_param_table[] = {
-	{
-		.procname	= "ip_default_mode",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_ipdefmode,
-		.extra2		= &max_ipdefmode
-	},
-	{
-		.procname	= "ax25_default_mode",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_axdefmode,
-		.extra2		= &max_axdefmode
-	},
-	{
-		.procname	= "backoff_type",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_backoff,
-		.extra2		= &max_backoff
-	},
-	{
-		.procname	= "connect_mode",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_conmode,
-		.extra2		= &max_conmode
-	},
-	{
-		.procname	= "standard_window_size",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_window,
-		.extra2		= &max_window
-	},
-	{
-		.procname	= "extended_window_size",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_ewindow,
-		.extra2		= &max_ewindow
-	},
-	{
-		.procname	= "t1_timeout",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_t1,
-		.extra2		= &max_t1
-	},
-	{
-		.procname	= "t2_timeout",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_t2,
-		.extra2		= &max_t2
-	},
-	{
-		.procname	= "t3_timeout",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_t3,
-		.extra2		= &max_t3
-	},
-	{
-		.procname	= "idle_timeout",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_idle,
-		.extra2		= &max_idle
-	},
-	{
-		.procname	= "maximum_retry_count",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_n2,
-		.extra2		= &max_n2
-	},
-	{
-		.procname	= "maximum_packet_length",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_paclen,
-		.extra2		= &max_paclen
-	},
-	{
-		.procname	= "protocol",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_proto,
-		.extra2		= &max_proto
-	},
-#ifdef CONFIG_AX25_DAMA_SLAVE
-	{
-		.procname	= "dama_slave_timeout",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_ds_timeout,
-		.extra2		= &max_ds_timeout
-	},
-#endif
-};
-
-int ax25_register_dev_sysctl(ax25_dev *ax25_dev)
-{
-	char path[sizeof("net/ax25/") + IFNAMSIZ];
-	int k;
-	struct ctl_table *table;
-
-	table = kmemdup(ax25_param_table, sizeof(ax25_param_table), GFP_KERNEL);
-	if (!table)
-		return -ENOMEM;
-
-	BUILD_BUG_ON(ARRAY_SIZE(ax25_param_table) != AX25_MAX_VALUES);
-	for (k = 0; k < AX25_MAX_VALUES; k++)
-		table[k].data = &ax25_dev->values[k];
-
-	snprintf(path, sizeof(path), "net/ax25/%s", ax25_dev->dev->name);
-	ax25_dev->sysheader = register_net_sysctl_sz(&init_net, path, table,
-						     ARRAY_SIZE(ax25_param_table));
-	if (!ax25_dev->sysheader) {
-		kfree(table);
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-void ax25_unregister_dev_sysctl(ax25_dev *ax25_dev)
-{
-	struct ctl_table_header *header = ax25_dev->sysheader;
-	const struct ctl_table *table;
-
-	if (header) {
-		ax25_dev->sysheader = NULL;
-		table = header->ctl_table_arg;
-		unregister_net_sysctl_table(header);
-		kfree(table);
-	}
-}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 51d70180e1cc..d409f606aec0 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -109,7 +109,6 @@
 #include <net/sock.h>
 #include <net/arp.h>
 #include <net/ax25.h>
-#include <net/netrom.h>
 #include <net/dst_metadata.h>
 #include <net/ip_tunnels.h>
 
diff --git a/net/netrom/Makefile b/net/netrom/Makefile
deleted file mode 100644
index 603e36c9af2e..000000000000
--- a/net/netrom/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for the Linux NET/ROM layer.
-#
-
-obj-$(CONFIG_NETROM) += netrom.o
-
-netrom-y		:= af_netrom.o nr_dev.o nr_in.o nr_loopback.o \
-			   nr_out.o nr_route.o nr_subr.o nr_timer.o
-netrom-$(CONFIG_SYSCTL)	+= sysctl_net_netrom.o
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
deleted file mode 100644
index 5fc54836dfa8..000000000000
--- a/net/netrom/af_netrom.c
+++ /dev/null
@@ -1,1536 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
- * Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk)
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/slab.h>
-#include <linux/kernel.h>
-#include <linux/sched/signal.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/stat.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/skbuff.h>
-#include <net/net_namespace.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/termios.h>	/* For TIOCINQ/OUTQ */
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-#include <net/netrom.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <net/ip.h>
-#include <net/tcp_states.h>
-#include <net/arp.h>
-#include <linux/init.h>
-
-static int nr_ndevs = 4;
-
-int sysctl_netrom_default_path_quality            = NR_DEFAULT_QUAL;
-int sysctl_netrom_obsolescence_count_initialiser  = NR_DEFAULT_OBS;
-int sysctl_netrom_network_ttl_initialiser         = NR_DEFAULT_TTL;
-int sysctl_netrom_transport_timeout               = NR_DEFAULT_T1;
-int sysctl_netrom_transport_maximum_tries         = NR_DEFAULT_N2;
-int sysctl_netrom_transport_acknowledge_delay     = NR_DEFAULT_T2;
-int sysctl_netrom_transport_busy_delay            = NR_DEFAULT_T4;
-int sysctl_netrom_transport_requested_window_size = NR_DEFAULT_WINDOW;
-int sysctl_netrom_transport_no_activity_timeout   = NR_DEFAULT_IDLE;
-int sysctl_netrom_routing_control                 = NR_DEFAULT_ROUTING;
-int sysctl_netrom_link_fails_count                = NR_DEFAULT_FAILS;
-int sysctl_netrom_reset_circuit                   = NR_DEFAULT_RESET;
-
-static unsigned short circuit = 0x101;
-
-static HLIST_HEAD(nr_list);
-static DEFINE_SPINLOCK(nr_list_lock);
-
-static const struct proto_ops nr_proto_ops;
-
-/*
- * NETROM network devices are virtual network devices encapsulating NETROM
- * frames into AX.25 which will be sent through an AX.25 device, so form a
- * special "super class" of normal net devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key nr_netdev_xmit_lock_key;
-static struct lock_class_key nr_netdev_addr_lock_key;
-
-static void nr_set_lockdep_one(struct net_device *dev,
-			       struct netdev_queue *txq,
-			       void *_unused)
-{
-	lockdep_set_class(&txq->_xmit_lock, &nr_netdev_xmit_lock_key);
-}
-
-static void nr_set_lockdep_key(struct net_device *dev)
-{
-	lockdep_set_class(&dev->addr_list_lock, &nr_netdev_addr_lock_key);
-	netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL);
-}
-
-/*
- *	Socket removal during an interrupt is now safe.
- */
-static void nr_remove_socket(struct sock *sk)
-{
-	spin_lock_bh(&nr_list_lock);
-	sk_del_node_init(sk);
-	spin_unlock_bh(&nr_list_lock);
-}
-
-/*
- *	Kill all bound sockets on a dropped device.
- */
-static void nr_kill_by_device(struct net_device *dev)
-{
-	struct sock *s;
-
-	spin_lock_bh(&nr_list_lock);
-	sk_for_each(s, &nr_list)
-		if (nr_sk(s)->device == dev)
-			nr_disconnect(s, ENETUNREACH);
-	spin_unlock_bh(&nr_list_lock);
-}
-
-/*
- *	Handle device status changes.
- */
-static int nr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
-
-	if (event != NETDEV_DOWN)
-		return NOTIFY_DONE;
-
-	nr_kill_by_device(dev);
-	nr_rt_device_down(dev);
-
-	return NOTIFY_DONE;
-}
-
-/*
- *	Add a socket to the bound sockets list.
- */
-static void nr_insert_socket(struct sock *sk)
-{
-	spin_lock_bh(&nr_list_lock);
-	sk_add_node(sk, &nr_list);
-	spin_unlock_bh(&nr_list_lock);
-}
-
-/*
- *	Find a socket that wants to accept the Connect Request we just
- *	received.
- */
-static struct sock *nr_find_listener(ax25_address *addr)
-{
-	struct sock *s;
-
-	spin_lock_bh(&nr_list_lock);
-	sk_for_each(s, &nr_list)
-		if (!ax25cmp(&nr_sk(s)->source_addr, addr) &&
-		    s->sk_state == TCP_LISTEN) {
-			sock_hold(s);
-			goto found;
-		}
-	s = NULL;
-found:
-	spin_unlock_bh(&nr_list_lock);
-	return s;
-}
-
-/*
- *	Find a connected NET/ROM socket given my circuit IDs.
- */
-static struct sock *nr_find_socket(unsigned char index, unsigned char id)
-{
-	struct sock *s;
-
-	spin_lock_bh(&nr_list_lock);
-	sk_for_each(s, &nr_list) {
-		struct nr_sock *nr = nr_sk(s);
-
-		if (nr->my_index == index && nr->my_id == id) {
-			sock_hold(s);
-			goto found;
-		}
-	}
-	s = NULL;
-found:
-	spin_unlock_bh(&nr_list_lock);
-	return s;
-}
-
-/*
- *	Find a connected NET/ROM socket given their circuit IDs.
- */
-static struct sock *nr_find_peer(unsigned char index, unsigned char id,
-	ax25_address *dest)
-{
-	struct sock *s;
-
-	spin_lock_bh(&nr_list_lock);
-	sk_for_each(s, &nr_list) {
-		struct nr_sock *nr = nr_sk(s);
-
-		if (nr->your_index == index && nr->your_id == id &&
-		    !ax25cmp(&nr->dest_addr, dest)) {
-			sock_hold(s);
-			goto found;
-		}
-	}
-	s = NULL;
-found:
-	spin_unlock_bh(&nr_list_lock);
-	return s;
-}
-
-/*
- *	Find next free circuit ID.
- */
-static unsigned short nr_find_next_circuit(void)
-{
-	unsigned short id = circuit;
-	unsigned char i, j;
-	struct sock *sk;
-
-	for (;;) {
-		i = id / 256;
-		j = id % 256;
-
-		if (i != 0 && j != 0) {
-			if ((sk=nr_find_socket(i, j)) == NULL)
-				break;
-			sock_put(sk);
-		}
-
-		id++;
-	}
-
-	return id;
-}
-
-/*
- *	Deferred destroy.
- */
-void nr_destroy_socket(struct sock *);
-
-/*
- *	Handler for deferred kills.
- */
-static void nr_destroy_timer(struct timer_list *t)
-{
-	struct sock *sk = timer_container_of(sk, t, sk_timer);
-	bh_lock_sock(sk);
-	sock_hold(sk);
-	nr_destroy_socket(sk);
-	bh_unlock_sock(sk);
-	sock_put(sk);
-}
-
-/*
- *	This is called from user mode and the timers. Thus it protects itself
- *	against interrupt users but doesn't worry about being called during
- *	work. Once it is removed from the queue no interrupt or bottom half
- *	will touch it and we are (fairly 8-) ) safe.
- */
-void nr_destroy_socket(struct sock *sk)
-{
-	struct sk_buff *skb;
-
-	nr_remove_socket(sk);
-
-	nr_stop_heartbeat(sk);
-	nr_stop_t1timer(sk);
-	nr_stop_t2timer(sk);
-	nr_stop_t4timer(sk);
-	nr_stop_idletimer(sk);
-
-	nr_clear_queues(sk);		/* Flush the queues */
-
-	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
-		if (skb->sk != sk) { /* A pending connection */
-			/* Queue the unaccepted socket for death */
-			sock_set_flag(skb->sk, SOCK_DEAD);
-			nr_start_heartbeat(skb->sk);
-			nr_sk(skb->sk)->state = NR_STATE_0;
-		}
-
-		kfree_skb(skb);
-	}
-
-	if (sk_has_allocations(sk)) {
-		/* Defer: outstanding buffers */
-		sk->sk_timer.function = nr_destroy_timer;
-		sk->sk_timer.expires  = jiffies + 2 * HZ;
-		add_timer(&sk->sk_timer);
-	} else
-		sock_put(sk);
-}
-
-/*
- *	Handling for system calls applied via the various interfaces to a
- *	NET/ROM socket object.
- */
-
-static int nr_setsockopt(struct socket *sock, int level, int optname,
-		sockptr_t optval, unsigned int optlen)
-{
-	struct sock *sk = sock->sk;
-	struct nr_sock *nr = nr_sk(sk);
-	unsigned int opt;
-
-	if (level != SOL_NETROM)
-		return -ENOPROTOOPT;
-
-	if (optlen < sizeof(unsigned int))
-		return -EINVAL;
-
-	if (copy_from_sockptr(&opt, optval, sizeof(opt)))
-		return -EFAULT;
-
-	switch (optname) {
-	case NETROM_T1:
-		if (opt < 1 || opt > UINT_MAX / HZ)
-			return -EINVAL;
-		nr->t1 = opt * HZ;
-		return 0;
-
-	case NETROM_T2:
-		if (opt < 1 || opt > UINT_MAX / HZ)
-			return -EINVAL;
-		nr->t2 = opt * HZ;
-		return 0;
-
-	case NETROM_N2:
-		if (opt < 1 || opt > 31)
-			return -EINVAL;
-		nr->n2 = opt;
-		return 0;
-
-	case NETROM_T4:
-		if (opt < 1 || opt > UINT_MAX / HZ)
-			return -EINVAL;
-		nr->t4 = opt * HZ;
-		return 0;
-
-	case NETROM_IDLE:
-		if (opt > UINT_MAX / (60 * HZ))
-			return -EINVAL;
-		nr->idle = opt * 60 * HZ;
-		return 0;
-
-	default:
-		return -ENOPROTOOPT;
-	}
-}
-
-static int nr_getsockopt(struct socket *sock, int level, int optname,
-	char __user *optval, int __user *optlen)
-{
-	struct sock *sk = sock->sk;
-	struct nr_sock *nr = nr_sk(sk);
-	int val = 0;
-	int len;
-
-	if (level != SOL_NETROM)
-		return -ENOPROTOOPT;
-
-	if (get_user(len, optlen))
-		return -EFAULT;
-
-	if (len < 0)
-		return -EINVAL;
-
-	switch (optname) {
-	case NETROM_T1:
-		val = nr->t1 / HZ;
-		break;
-
-	case NETROM_T2:
-		val = nr->t2 / HZ;
-		break;
-
-	case NETROM_N2:
-		val = nr->n2;
-		break;
-
-	case NETROM_T4:
-		val = nr->t4 / HZ;
-		break;
-
-	case NETROM_IDLE:
-		val = nr->idle / (60 * HZ);
-		break;
-
-	default:
-		return -ENOPROTOOPT;
-	}
-
-	len = min_t(unsigned int, len, sizeof(int));
-
-	if (put_user(len, optlen))
-		return -EFAULT;
-
-	return copy_to_user(optval, &val, len) ? -EFAULT : 0;
-}
-
-static int nr_listen(struct socket *sock, int backlog)
-{
-	struct sock *sk = sock->sk;
-
-	lock_sock(sk);
-	if (sock->state != SS_UNCONNECTED) {
-		release_sock(sk);
-		return -EINVAL;
-	}
-
-	if (sk->sk_state != TCP_LISTEN) {
-		memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN);
-		sk->sk_max_ack_backlog = backlog;
-		sk->sk_state           = TCP_LISTEN;
-		release_sock(sk);
-		return 0;
-	}
-	release_sock(sk);
-
-	return -EOPNOTSUPP;
-}
-
-static struct proto nr_proto = {
-	.name	  = "NETROM",
-	.owner	  = THIS_MODULE,
-	.obj_size = sizeof(struct nr_sock),
-};
-
-static int nr_create(struct net *net, struct socket *sock, int protocol,
-		     int kern)
-{
-	struct sock *sk;
-	struct nr_sock *nr;
-
-	if (!net_eq(net, &init_net))
-		return -EAFNOSUPPORT;
-
-	if (sock->type != SOCK_SEQPACKET || protocol != 0)
-		return -ESOCKTNOSUPPORT;
-
-	sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, kern);
-	if (sk  == NULL)
-		return -ENOMEM;
-
-	nr = nr_sk(sk);
-
-	sock_init_data(sock, sk);
-
-	sock->ops    = &nr_proto_ops;
-	sk->sk_protocol = protocol;
-
-	skb_queue_head_init(&nr->ack_queue);
-	skb_queue_head_init(&nr->reseq_queue);
-	skb_queue_head_init(&nr->frag_queue);
-
-	nr_init_timers(sk);
-
-	nr->t1     =
-		msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout));
-	nr->t2     =
-		msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_acknowledge_delay));
-	nr->n2     =
-		msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries));
-	nr->t4     =
-		msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay));
-	nr->idle   =
-		msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_no_activity_timeout));
-	nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size);
-
-	nr->bpqext = 1;
-	nr->state  = NR_STATE_0;
-
-	return 0;
-}
-
-static struct sock *nr_make_new(struct sock *osk)
-{
-	struct sock *sk;
-	struct nr_sock *nr, *onr;
-
-	if (osk->sk_type != SOCK_SEQPACKET)
-		return NULL;
-
-	sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot, 0);
-	if (sk == NULL)
-		return NULL;
-
-	nr = nr_sk(sk);
-
-	sock_init_data(NULL, sk);
-
-	sk->sk_type     = osk->sk_type;
-	sk->sk_priority = READ_ONCE(osk->sk_priority);
-	sk->sk_protocol = osk->sk_protocol;
-	sk->sk_rcvbuf   = osk->sk_rcvbuf;
-	sk->sk_sndbuf   = osk->sk_sndbuf;
-	sk->sk_state    = TCP_ESTABLISHED;
-	sock_copy_flags(sk, osk);
-
-	skb_queue_head_init(&nr->ack_queue);
-	skb_queue_head_init(&nr->reseq_queue);
-	skb_queue_head_init(&nr->frag_queue);
-
-	nr_init_timers(sk);
-
-	onr = nr_sk(osk);
-
-	nr->t1      = onr->t1;
-	nr->t2      = onr->t2;
-	nr->n2      = onr->n2;
-	nr->t4      = onr->t4;
-	nr->idle    = onr->idle;
-	nr->window  = onr->window;
-
-	nr->device  = onr->device;
-	nr->bpqext  = onr->bpqext;
-
-	return sk;
-}
-
-static int nr_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-	struct nr_sock *nr;
-
-	if (sk == NULL) return 0;
-
-	sock_hold(sk);
-	sock_orphan(sk);
-	lock_sock(sk);
-	nr = nr_sk(sk);
-
-	switch (nr->state) {
-	case NR_STATE_0:
-	case NR_STATE_1:
-	case NR_STATE_2:
-		nr_disconnect(sk, 0);
-		nr_destroy_socket(sk);
-		break;
-
-	case NR_STATE_3:
-		nr_clear_queues(sk);
-		nr->n2count = 0;
-		nr_write_internal(sk, NR_DISCREQ);
-		nr_start_t1timer(sk);
-		nr_stop_t2timer(sk);
-		nr_stop_t4timer(sk);
-		nr_stop_idletimer(sk);
-		nr->state    = NR_STATE_2;
-		sk->sk_state    = TCP_CLOSE;
-		sk->sk_shutdown |= SEND_SHUTDOWN;
-		sk->sk_state_change(sk);
-		sock_set_flag(sk, SOCK_DESTROY);
-		break;
-
-	default:
-		break;
-	}
-
-	sock->sk   = NULL;
-	release_sock(sk);
-	sock_put(sk);
-
-	return 0;
-}
-
-static int nr_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len)
-{
-	struct sock *sk = sock->sk;
-	struct nr_sock *nr = nr_sk(sk);
-	struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr;
-	struct net_device *dev;
-	ax25_uid_assoc *user;
-	ax25_address *source;
-
-	lock_sock(sk);
-	if (!sock_flag(sk, SOCK_ZAPPED)) {
-		release_sock(sk);
-		return -EINVAL;
-	}
-	if (addr_len < sizeof(struct sockaddr_ax25) || addr_len > sizeof(struct full_sockaddr_ax25)) {
-		release_sock(sk);
-		return -EINVAL;
-	}
-	if (addr_len < (addr->fsa_ax25.sax25_ndigis * sizeof(ax25_address) + sizeof(struct sockaddr_ax25))) {
-		release_sock(sk);
-		return -EINVAL;
-	}
-	if (addr->fsa_ax25.sax25_family != AF_NETROM) {
-		release_sock(sk);
-		return -EINVAL;
-	}
-	if ((dev = nr_dev_get(&addr->fsa_ax25.sax25_call)) == NULL) {
-		release_sock(sk);
-		return -EADDRNOTAVAIL;
-	}
-
-	/*
-	 * Only the super user can set an arbitrary user callsign.
-	 */
-	if (addr->fsa_ax25.sax25_ndigis == 1) {
-		if (!capable(CAP_NET_BIND_SERVICE)) {
-			dev_put(dev);
-			release_sock(sk);
-			return -EPERM;
-		}
-		nr->user_addr   = addr->fsa_digipeater[0];
-		nr->source_addr = addr->fsa_ax25.sax25_call;
-	} else {
-		source = &addr->fsa_ax25.sax25_call;
-
-		user = ax25_findbyuid(current_euid());
-		if (user) {
-			nr->user_addr   = user->call;
-			ax25_uid_put(user);
-		} else {
-			if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) {
-				release_sock(sk);
-				dev_put(dev);
-				return -EPERM;
-			}
-			nr->user_addr   = *source;
-		}
-
-		nr->source_addr = *source;
-	}
-
-	nr->device = dev;
-	nr_insert_socket(sk);
-
-	sock_reset_flag(sk, SOCK_ZAPPED);
-	dev_put(dev);
-	release_sock(sk);
-
-	return 0;
-}
-
-static int nr_connect(struct socket *sock, struct sockaddr_unsized *uaddr,
-		      int addr_len, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct nr_sock *nr = nr_sk(sk);
-	struct sockaddr_ax25 *addr = (struct sockaddr_ax25 *)uaddr;
-	const ax25_address *source = NULL;
-	ax25_uid_assoc *user;
-	struct net_device *dev;
-	int err = 0;
-
-	lock_sock(sk);
-	if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
-		sock->state = SS_CONNECTED;
-		goto out_release;	/* Connect completed during a ERESTARTSYS event */
-	}
-
-	if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
-		sock->state = SS_UNCONNECTED;
-		err = -ECONNREFUSED;
-		goto out_release;
-	}
-
-	if (sk->sk_state == TCP_ESTABLISHED) {
-		err = -EISCONN;	/* No reconnect on a seqpacket socket */
-		goto out_release;
-	}
-
-	if (sock->state == SS_CONNECTING) {
-		err = -EALREADY;
-		goto out_release;
-	}
-
-	sk->sk_state   = TCP_CLOSE;
-	sock->state = SS_UNCONNECTED;
-
-	if (addr_len != sizeof(struct sockaddr_ax25) && addr_len != sizeof(struct full_sockaddr_ax25)) {
-		err = -EINVAL;
-		goto out_release;
-	}
-	if (addr->sax25_family != AF_NETROM) {
-		err = -EINVAL;
-		goto out_release;
-	}
-	if (sock_flag(sk, SOCK_ZAPPED)) {	/* Must bind first - autobinding in this may or may not work */
-		sock_reset_flag(sk, SOCK_ZAPPED);
-
-		if ((dev = nr_dev_first()) == NULL) {
-			err = -ENETUNREACH;
-			goto out_release;
-		}
-		source = (const ax25_address *)dev->dev_addr;
-
-		user = ax25_findbyuid(current_euid());
-		if (user) {
-			nr->user_addr   = user->call;
-			ax25_uid_put(user);
-		} else {
-			if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) {
-				dev_put(dev);
-				err = -EPERM;
-				goto out_release;
-			}
-			nr->user_addr   = *source;
-		}
-
-		nr->source_addr = *source;
-		nr->device      = dev;
-
-		dev_put(dev);
-		nr_insert_socket(sk);		/* Finish the bind */
-	}
-
-	nr->dest_addr = addr->sax25_call;
-
-	release_sock(sk);
-	circuit = nr_find_next_circuit();
-	lock_sock(sk);
-
-	nr->my_index = circuit / 256;
-	nr->my_id    = circuit % 256;
-
-	circuit++;
-
-	/* Move to connecting socket, start sending Connect Requests */
-	sock->state  = SS_CONNECTING;
-	sk->sk_state = TCP_SYN_SENT;
-
-	nr_establish_data_link(sk);
-
-	nr->state = NR_STATE_1;
-
-	nr_start_heartbeat(sk);
-
-	/* Now the loop */
-	if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
-		err = -EINPROGRESS;
-		goto out_release;
-	}
-
-	/*
-	 * A Connect Ack with Choke or timeout or failed routing will go to
-	 * closed.
-	 */
-	if (sk->sk_state == TCP_SYN_SENT) {
-		DEFINE_WAIT(wait);
-
-		for (;;) {
-			prepare_to_wait(sk_sleep(sk), &wait,
-					TASK_INTERRUPTIBLE);
-			if (sk->sk_state != TCP_SYN_SENT)
-				break;
-			if (!signal_pending(current)) {
-				release_sock(sk);
-				schedule();
-				lock_sock(sk);
-				continue;
-			}
-			err = -ERESTARTSYS;
-			break;
-		}
-		finish_wait(sk_sleep(sk), &wait);
-		if (err)
-			goto out_release;
-	}
-
-	if (sk->sk_state != TCP_ESTABLISHED) {
-		sock->state = SS_UNCONNECTED;
-		err = sock_error(sk);	/* Always set at this point */
-		goto out_release;
-	}
-
-	sock->state = SS_CONNECTED;
-
-out_release:
-	release_sock(sk);
-
-	return err;
-}
-
-static int nr_accept(struct socket *sock, struct socket *newsock,
-		     struct proto_accept_arg *arg)
-{
-	struct sk_buff *skb;
-	struct sock *newsk;
-	DEFINE_WAIT(wait);
-	struct sock *sk;
-	int err = 0;
-
-	if ((sk = sock->sk) == NULL)
-		return -EINVAL;
-
-	lock_sock(sk);
-	if (sk->sk_type != SOCK_SEQPACKET) {
-		err = -EOPNOTSUPP;
-		goto out_release;
-	}
-
-	if (sk->sk_state != TCP_LISTEN) {
-		err = -EINVAL;
-		goto out_release;
-	}
-
-	/*
-	 *	The write queue this time is holding sockets ready to use
-	 *	hooked into the SABM we saved
-	 */
-	for (;;) {
-		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-		skb = skb_dequeue(&sk->sk_receive_queue);
-		if (skb)
-			break;
-
-		if (arg->flags & O_NONBLOCK) {
-			err = -EWOULDBLOCK;
-			break;
-		}
-		if (!signal_pending(current)) {
-			release_sock(sk);
-			schedule();
-			lock_sock(sk);
-			continue;
-		}
-		err = -ERESTARTSYS;
-		break;
-	}
-	finish_wait(sk_sleep(sk), &wait);
-	if (err)
-		goto out_release;
-
-	newsk = skb->sk;
-	sock_graft(newsk, newsock);
-
-	/* Now attach up the new socket */
-	kfree_skb(skb);
-	sk_acceptq_removed(sk);
-
-out_release:
-	release_sock(sk);
-
-	return err;
-}
-
-static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
-	int peer)
-{
-	struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr;
-	struct sock *sk = sock->sk;
-	struct nr_sock *nr = nr_sk(sk);
-	int uaddr_len;
-
-	memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25));
-
-	lock_sock(sk);
-	if (peer != 0) {
-		if (sk->sk_state != TCP_ESTABLISHED) {
-			release_sock(sk);
-			return -ENOTCONN;
-		}
-		sax->fsa_ax25.sax25_family = AF_NETROM;
-		sax->fsa_ax25.sax25_ndigis = 1;
-		sax->fsa_ax25.sax25_call   = nr->user_addr;
-		memset(sax->fsa_digipeater, 0, sizeof(sax->fsa_digipeater));
-		sax->fsa_digipeater[0]     = nr->dest_addr;
-		uaddr_len = sizeof(struct full_sockaddr_ax25);
-	} else {
-		sax->fsa_ax25.sax25_family = AF_NETROM;
-		sax->fsa_ax25.sax25_ndigis = 0;
-		sax->fsa_ax25.sax25_call   = nr->source_addr;
-		uaddr_len = sizeof(struct sockaddr_ax25);
-	}
-	release_sock(sk);
-
-	return uaddr_len;
-}
-
-int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
-{
-	struct sock *sk;
-	struct sock *make;
-	struct nr_sock *nr_make;
-	ax25_address *src, *dest, *user;
-	unsigned short circuit_index, circuit_id;
-	unsigned short peer_circuit_index, peer_circuit_id;
-	unsigned short frametype, flags, window, timeout;
-	int ret;
-
-	skb_orphan(skb);
-
-	/*
-	 *	skb->data points to the netrom frame start
-	 */
-
-	src  = (ax25_address *)(skb->data + 0);
-	dest = (ax25_address *)(skb->data + 7);
-
-	circuit_index      = skb->data[15];
-	circuit_id         = skb->data[16];
-	peer_circuit_index = skb->data[17];
-	peer_circuit_id    = skb->data[18];
-	frametype          = skb->data[19] & 0x0F;
-	flags              = skb->data[19] & 0xF0;
-
-	/*
-	 * Check for an incoming IP over NET/ROM frame.
-	 */
-	if (frametype == NR_PROTOEXT &&
-	    circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) {
-		skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
-		skb_reset_transport_header(skb);
-
-		return nr_rx_ip(skb, dev);
-	}
-
-	/*
-	 * Find an existing socket connection, based on circuit ID, if it's
-	 * a Connect Request base it on their circuit ID.
-	 *
-	 * Circuit ID 0/0 is not valid but it could still be a "reset" for a
-	 * circuit that no longer exists at the other end ...
-	 */
-
-	sk = NULL;
-
-	if (circuit_index == 0 && circuit_id == 0) {
-		if (frametype == NR_CONNACK && flags == NR_CHOKE_FLAG)
-			sk = nr_find_peer(peer_circuit_index, peer_circuit_id, src);
-	} else {
-		if (frametype == NR_CONNREQ)
-			sk = nr_find_peer(circuit_index, circuit_id, src);
-		else
-			sk = nr_find_socket(circuit_index, circuit_id);
-	}
-
-	if (sk != NULL) {
-		bh_lock_sock(sk);
-		skb_reset_transport_header(skb);
-
-		if (frametype == NR_CONNACK && skb->len == 22)
-			nr_sk(sk)->bpqext = 1;
-		else
-			nr_sk(sk)->bpqext = 0;
-
-		ret = nr_process_rx_frame(sk, skb);
-		bh_unlock_sock(sk);
-		sock_put(sk);
-		return ret;
-	}
-
-	/*
-	 * Now it should be a CONNREQ.
-	 */
-	if (frametype != NR_CONNREQ) {
-		/*
-		 * Here it would be nice to be able to send a reset but
-		 * NET/ROM doesn't have one.  We've tried to extend the protocol
-		 * by sending NR_CONNACK | NR_CHOKE_FLAGS replies but that
-		 * apparently kills BPQ boxes... :-(
-		 * So now we try to follow the established behaviour of
-		 * G8PZT's Xrouter which is sending packets with command type 7
-		 * as an extension of the protocol.
-		 */
-		if (READ_ONCE(sysctl_netrom_reset_circuit) &&
-		    (frametype != NR_RESET || flags != 0))
-			nr_transmit_reset(skb, 1);
-
-		return 0;
-	}
-
-	sk = nr_find_listener(dest);
-
-	user = (ax25_address *)(skb->data + 21);
-
-	if (sk == NULL || sk_acceptq_is_full(sk) ||
-	    (make = nr_make_new(sk)) == NULL) {
-		nr_transmit_refusal(skb, 0);
-		if (sk)
-			sock_put(sk);
-		return 0;
-	}
-
-	bh_lock_sock(sk);
-
-	window = skb->data[20];
-
-	sock_hold(make);
-	skb->sk             = make;
-	skb->destructor     = sock_efree;
-	make->sk_state	    = TCP_ESTABLISHED;
-
-	/* Fill in his circuit details */
-	nr_make = nr_sk(make);
-	nr_make->source_addr = *dest;
-	nr_make->dest_addr   = *src;
-	nr_make->user_addr   = *user;
-
-	nr_make->your_index  = circuit_index;
-	nr_make->your_id     = circuit_id;
-
-	bh_unlock_sock(sk);
-	circuit = nr_find_next_circuit();
-	bh_lock_sock(sk);
-
-	nr_make->my_index    = circuit / 256;
-	nr_make->my_id       = circuit % 256;
-
-	circuit++;
-
-	/* Window negotiation */
-	if (window < nr_make->window)
-		nr_make->window = window;
-
-	/* L4 timeout negotiation */
-	if (skb->len == 37) {
-		timeout = skb->data[36] * 256 + skb->data[35];
-		if (timeout * HZ < nr_make->t1)
-			nr_make->t1 = timeout * HZ;
-		nr_make->bpqext = 1;
-	} else {
-		nr_make->bpqext = 0;
-	}
-
-	nr_write_internal(make, NR_CONNACK);
-
-	nr_make->condition = 0x00;
-	nr_make->vs        = 0;
-	nr_make->va        = 0;
-	nr_make->vr        = 0;
-	nr_make->vl        = 0;
-	nr_make->state     = NR_STATE_3;
-	sk_acceptq_added(sk);
-	skb_queue_head(&sk->sk_receive_queue, skb);
-
-	if (!sock_flag(sk, SOCK_DEAD))
-		sk->sk_data_ready(sk);
-
-	bh_unlock_sock(sk);
-	sock_put(sk);
-
-	nr_insert_socket(make);
-
-	nr_start_heartbeat(make);
-	nr_start_idletimer(make);
-
-	return 1;
-}
-
-static int nr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
-{
-	struct sock *sk = sock->sk;
-	struct nr_sock *nr = nr_sk(sk);
-	DECLARE_SOCKADDR(struct sockaddr_ax25 *, usax, msg->msg_name);
-	int err;
-	struct sockaddr_ax25 sax;
-	struct sk_buff *skb;
-	unsigned char *asmptr;
-	int size;
-
-	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT))
-		return -EINVAL;
-
-	lock_sock(sk);
-	if (sock_flag(sk, SOCK_ZAPPED)) {
-		err = -EADDRNOTAVAIL;
-		goto out;
-	}
-
-	if (sk->sk_shutdown & SEND_SHUTDOWN) {
-		send_sig(SIGPIPE, current, 0);
-		err = -EPIPE;
-		goto out;
-	}
-
-	if (nr->device == NULL) {
-		err = -ENETUNREACH;
-		goto out;
-	}
-
-	if (usax) {
-		if (msg->msg_namelen < sizeof(sax)) {
-			err = -EINVAL;
-			goto out;
-		}
-		sax = *usax;
-		if (ax25cmp(&nr->dest_addr, &sax.sax25_call) != 0) {
-			err = -EISCONN;
-			goto out;
-		}
-		if (sax.sax25_family != AF_NETROM) {
-			err = -EINVAL;
-			goto out;
-		}
-	} else {
-		if (sk->sk_state != TCP_ESTABLISHED) {
-			err = -ENOTCONN;
-			goto out;
-		}
-		sax.sax25_family = AF_NETROM;
-		sax.sax25_call   = nr->dest_addr;
-	}
-
-	/* Build a packet - the conventional user limit is 236 bytes. We can
-	   do ludicrously large NetROM frames but must not overflow */
-	if (len > 65536) {
-		err = -EMSGSIZE;
-		goto out;
-	}
-
-	size = len + NR_NETWORK_LEN + NR_TRANSPORT_LEN;
-
-	if ((skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, &err)) == NULL)
-		goto out;
-
-	skb_reserve(skb, size - len);
-	skb_reset_transport_header(skb);
-
-	/*
-	 *	Push down the NET/ROM header
-	 */
-
-	asmptr = skb_push(skb, NR_TRANSPORT_LEN);
-
-	/* Build a NET/ROM Transport header */
-
-	*asmptr++ = nr->your_index;
-	*asmptr++ = nr->your_id;
-	*asmptr++ = 0;		/* To be filled in later */
-	*asmptr++ = 0;		/*      Ditto            */
-	*asmptr++ = NR_INFO;
-
-	/*
-	 *	Put the data on the end
-	 */
-	skb_put(skb, len);
-
-	/* User data follows immediately after the NET/ROM transport header */
-	if (memcpy_from_msg(skb_transport_header(skb), msg, len)) {
-		kfree_skb(skb);
-		err = -EFAULT;
-		goto out;
-	}
-
-	if (sk->sk_state != TCP_ESTABLISHED) {
-		kfree_skb(skb);
-		err = -ENOTCONN;
-		goto out;
-	}
-
-	nr_output(sk, skb);	/* Shove it onto the queue */
-
-	err = len;
-out:
-	release_sock(sk);
-	return err;
-}
-
-static int nr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
-		      int flags)
-{
-	struct sock *sk = sock->sk;
-	DECLARE_SOCKADDR(struct sockaddr_ax25 *, sax, msg->msg_name);
-	size_t copied;
-	struct sk_buff *skb;
-	int er;
-
-	/*
-	 * This works for seqpacket too. The receiver has ordered the queue for
-	 * us! We do one quick check first though
-	 */
-
-	lock_sock(sk);
-	if (sk->sk_state != TCP_ESTABLISHED) {
-		release_sock(sk);
-		return -ENOTCONN;
-	}
-
-	/* Now we can treat all alike */
-	skb = skb_recv_datagram(sk, flags, &er);
-	if (!skb) {
-		release_sock(sk);
-		return er;
-	}
-
-	skb_reset_transport_header(skb);
-	copied     = skb->len;
-
-	if (copied > size) {
-		copied = size;
-		msg->msg_flags |= MSG_TRUNC;
-	}
-
-	er = skb_copy_datagram_msg(skb, 0, msg, copied);
-	if (er < 0) {
-		skb_free_datagram(sk, skb);
-		release_sock(sk);
-		return er;
-	}
-
-	if (sax != NULL) {
-		memset(sax, 0, sizeof(*sax));
-		sax->sax25_family = AF_NETROM;
-		skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call,
-			      AX25_ADDR_LEN);
-		msg->msg_namelen = sizeof(*sax);
-	}
-
-	skb_free_datagram(sk, skb);
-
-	release_sock(sk);
-	return copied;
-}
-
-
-static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
-	struct sock *sk = sock->sk;
-	void __user *argp = (void __user *)arg;
-
-	switch (cmd) {
-	case TIOCOUTQ: {
-		long amount;
-
-		lock_sock(sk);
-		amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
-		if (amount < 0)
-			amount = 0;
-		release_sock(sk);
-		return put_user(amount, (int __user *)argp);
-	}
-
-	case TIOCINQ: {
-		struct sk_buff *skb;
-		long amount = 0L;
-
-		lock_sock(sk);
-		/* These two are safe on a single CPU system as only user tasks fiddle here */
-		if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
-			amount = skb->len;
-		release_sock(sk);
-		return put_user(amount, (int __user *)argp);
-	}
-
-	case SIOCGIFADDR:
-	case SIOCSIFADDR:
-	case SIOCGIFDSTADDR:
-	case SIOCSIFDSTADDR:
-	case SIOCGIFBRDADDR:
-	case SIOCSIFBRDADDR:
-	case SIOCGIFNETMASK:
-	case SIOCSIFNETMASK:
-	case SIOCGIFMETRIC:
-	case SIOCSIFMETRIC:
-		return -EINVAL;
-
-	case SIOCADDRT:
-	case SIOCDELRT:
-	case SIOCNRDECOBS:
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-		return nr_rt_ioctl(cmd, argp);
-
-	default:
-		return -ENOIOCTLCMD;
-	}
-
-	return 0;
-}
-
-#ifdef CONFIG_PROC_FS
-
-static void *nr_info_start(struct seq_file *seq, loff_t *pos)
-	__acquires(&nr_list_lock)
-{
-	spin_lock_bh(&nr_list_lock);
-	return seq_hlist_start_head(&nr_list, *pos);
-}
-
-static void *nr_info_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	return seq_hlist_next(v, &nr_list, pos);
-}
-
-static void nr_info_stop(struct seq_file *seq, void *v)
-	__releases(&nr_list_lock)
-{
-	spin_unlock_bh(&nr_list_lock);
-}
-
-static int nr_info_show(struct seq_file *seq, void *v)
-{
-	struct sock *s = sk_entry(v);
-	struct net_device *dev;
-	struct nr_sock *nr;
-	const char *devname;
-	char buf[11];
-
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq,
-"user_addr dest_node src_node  dev    my  your  st  vs  vr  va    t1     t2     t4      idle   n2  wnd Snd-Q Rcv-Q inode\n");
-
-	else {
-
-		bh_lock_sock(s);
-		nr = nr_sk(s);
-
-		if ((dev = nr->device) == NULL)
-			devname = "???";
-		else
-			devname = dev->name;
-
-		seq_printf(seq, "%-9s ", ax2asc(buf, &nr->user_addr));
-		seq_printf(seq, "%-9s ", ax2asc(buf, &nr->dest_addr));
-		seq_printf(seq,
-"%-9s %-3s  %02X/%02X %02X/%02X %2d %3d %3d %3d %3lu/%03lu %2lu/%02lu %3lu/%03lu %3lu/%03lu %2d/%02d %3d %5d %5d %llu\n",
-			ax2asc(buf, &nr->source_addr),
-			devname,
-			nr->my_index,
-			nr->my_id,
-			nr->your_index,
-			nr->your_id,
-			nr->state,
-			nr->vs,
-			nr->vr,
-			nr->va,
-			ax25_display_timer(&nr->t1timer) / HZ,
-			nr->t1 / HZ,
-			ax25_display_timer(&nr->t2timer) / HZ,
-			nr->t2 / HZ,
-			ax25_display_timer(&nr->t4timer) / HZ,
-			nr->t4 / HZ,
-			ax25_display_timer(&nr->idletimer) / (60 * HZ),
-			nr->idle / (60 * HZ),
-			nr->n2count,
-			nr->n2,
-			nr->window,
-			sk_wmem_alloc_get(s),
-			sk_rmem_alloc_get(s),
-			s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : (u64)0);
-
-		bh_unlock_sock(s);
-	}
-	return 0;
-}
-
-static const struct seq_operations nr_info_seqops = {
-	.start = nr_info_start,
-	.next = nr_info_next,
-	.stop = nr_info_stop,
-	.show = nr_info_show,
-};
-#endif	/* CONFIG_PROC_FS */
-
-static const struct net_proto_family nr_family_ops = {
-	.family		=	PF_NETROM,
-	.create		=	nr_create,
-	.owner		=	THIS_MODULE,
-};
-
-static const struct proto_ops nr_proto_ops = {
-	.family		=	PF_NETROM,
-	.owner		=	THIS_MODULE,
-	.release	=	nr_release,
-	.bind		=	nr_bind,
-	.connect	=	nr_connect,
-	.socketpair	=	sock_no_socketpair,
-	.accept		=	nr_accept,
-	.getname	=	nr_getname,
-	.poll		=	datagram_poll,
-	.ioctl		=	nr_ioctl,
-	.gettstamp	=	sock_gettstamp,
-	.listen		=	nr_listen,
-	.shutdown	=	sock_no_shutdown,
-	.setsockopt	=	nr_setsockopt,
-	.getsockopt	=	nr_getsockopt,
-	.sendmsg	=	nr_sendmsg,
-	.recvmsg	=	nr_recvmsg,
-	.mmap		=	sock_no_mmap,
-};
-
-static struct notifier_block nr_dev_notifier = {
-	.notifier_call	=	nr_device_event,
-};
-
-static struct net_device **dev_nr;
-
-static struct ax25_protocol nr_pid = {
-	.pid	= AX25_P_NETROM,
-	.func	= nr_route_frame
-};
-
-static struct ax25_linkfail nr_linkfail_notifier = {
-	.func	= nr_link_failed,
-};
-
-static int __init nr_proto_init(void)
-{
-	int i;
-	int rc = proto_register(&nr_proto, 0);
-
-	if (rc)
-		return rc;
-
-	if (nr_ndevs > 0x7fffffff/sizeof(struct net_device *)) {
-		pr_err("NET/ROM: %s - nr_ndevs parameter too large\n",
-		       __func__);
-		rc = -EINVAL;
-		goto unregister_proto;
-	}
-
-	dev_nr = kzalloc_objs(struct net_device *, nr_ndevs);
-	if (!dev_nr) {
-		pr_err("NET/ROM: %s - unable to allocate device array\n",
-		       __func__);
-		rc = -ENOMEM;
-		goto unregister_proto;
-	}
-
-	for (i = 0; i < nr_ndevs; i++) {
-		char name[IFNAMSIZ];
-		struct net_device *dev;
-
-		sprintf(name, "nr%d", i);
-		dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup);
-		if (!dev) {
-			rc = -ENOMEM;
-			goto fail;
-		}
-
-		dev->base_addr = i;
-		rc = register_netdev(dev);
-		if (rc) {
-			free_netdev(dev);
-			goto fail;
-		}
-		nr_set_lockdep_key(dev);
-		dev_nr[i] = dev;
-	}
-
-	rc = sock_register(&nr_family_ops);
-	if (rc)
-		goto fail;
-
-	rc = register_netdevice_notifier(&nr_dev_notifier);
-	if (rc)
-		goto out_sock;
-
-	ax25_register_pid(&nr_pid);
-	ax25_linkfail_register(&nr_linkfail_notifier);
-
-#ifdef CONFIG_SYSCTL
-	rc = nr_register_sysctl();
-	if (rc)
-		goto out_sysctl;
-#endif
-
-	nr_loopback_init();
-
-	rc = -ENOMEM;
-	if (!proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops))
-		goto proc_remove1;
-	if (!proc_create_seq("nr_neigh", 0444, init_net.proc_net,
-			     &nr_neigh_seqops))
-		goto proc_remove2;
-	if (!proc_create_seq("nr_nodes", 0444, init_net.proc_net,
-			     &nr_node_seqops))
-		goto proc_remove3;
-
-	return 0;
-
-proc_remove3:
-	remove_proc_entry("nr_neigh", init_net.proc_net);
-proc_remove2:
-	remove_proc_entry("nr", init_net.proc_net);
-proc_remove1:
-
-	nr_loopback_clear();
-	nr_rt_free();
-
-#ifdef CONFIG_SYSCTL
-	nr_unregister_sysctl();
-out_sysctl:
-#endif
-	ax25_linkfail_release(&nr_linkfail_notifier);
-	ax25_protocol_release(AX25_P_NETROM);
-	unregister_netdevice_notifier(&nr_dev_notifier);
-out_sock:
-	sock_unregister(PF_NETROM);
-fail:
-	while (--i >= 0) {
-		unregister_netdev(dev_nr[i]);
-		free_netdev(dev_nr[i]);
-	}
-	kfree(dev_nr);
-unregister_proto:
-	proto_unregister(&nr_proto);
-	return rc;
-}
-
-module_init(nr_proto_init);
-
-module_param(nr_ndevs, int, 0);
-MODULE_PARM_DESC(nr_ndevs, "number of NET/ROM devices");
-
-MODULE_AUTHOR("Jonathan Naylor G4KLX <g4klx@g4klx.demon.co.uk>");
-MODULE_DESCRIPTION("The amateur radio NET/ROM network and transport layer protocol");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_NETROM);
-
-static void __exit nr_exit(void)
-{
-	int i;
-
-	remove_proc_entry("nr", init_net.proc_net);
-	remove_proc_entry("nr_neigh", init_net.proc_net);
-	remove_proc_entry("nr_nodes", init_net.proc_net);
-	nr_loopback_clear();
-
-	nr_rt_free();
-
-#ifdef CONFIG_SYSCTL
-	nr_unregister_sysctl();
-#endif
-
-	ax25_linkfail_release(&nr_linkfail_notifier);
-	ax25_protocol_release(AX25_P_NETROM);
-
-	unregister_netdevice_notifier(&nr_dev_notifier);
-
-	sock_unregister(PF_NETROM);
-
-	for (i = 0; i < nr_ndevs; i++) {
-		struct net_device *dev = dev_nr[i];
-		if (dev) {
-			unregister_netdev(dev);
-			free_netdev(dev);
-		}
-	}
-
-	kfree(dev_nr);
-	proto_unregister(&nr_proto);
-}
-module_exit(nr_exit);
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
deleted file mode 100644
index 2c34389c3ce6..000000000000
--- a/net/netrom/nr_dev.c
+++ /dev/null
@@ -1,178 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- */
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/sysctl.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>	/* For the statistics structure. */
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-
-#include <asm/io.h>
-
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_arp.h>
-#include <linux/skbuff.h>
-
-#include <net/ip.h>
-#include <net/arp.h>
-
-#include <net/ax25.h>
-#include <net/netrom.h>
-
-/*
- *	Only allow IP over NET/ROM frames through if the netrom device is up.
- */
-
-int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
-{
-	struct net_device_stats *stats = &dev->stats;
-
-	if (!netif_running(dev)) {
-		stats->rx_dropped++;
-		return 0;
-	}
-
-	stats->rx_packets++;
-	stats->rx_bytes += skb->len;
-
-	skb->protocol = htons(ETH_P_IP);
-
-	/* Spoof incoming device */
-	skb->dev      = dev;
-	skb->mac_header = skb->network_header;
-	skb_reset_network_header(skb);
-	skb->pkt_type = PACKET_HOST;
-
-	netif_rx(skb);
-
-	return 1;
-}
-
-static int nr_header(struct sk_buff *skb, struct net_device *dev,
-		     unsigned short type,
-		     const void *daddr, const void *saddr, unsigned int len)
-{
-	unsigned char *buff = skb_push(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
-
-	memcpy(buff, (saddr != NULL) ? saddr : dev->dev_addr, dev->addr_len);
-	buff[6] &= ~AX25_CBIT;
-	buff[6] &= ~AX25_EBIT;
-	buff[6] |= AX25_SSSID_SPARE;
-	buff    += AX25_ADDR_LEN;
-
-	if (daddr != NULL)
-		memcpy(buff, daddr, dev->addr_len);
-	buff[6] &= ~AX25_CBIT;
-	buff[6] |= AX25_EBIT;
-	buff[6] |= AX25_SSSID_SPARE;
-	buff    += AX25_ADDR_LEN;
-
-	*buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
-
-	*buff++ = NR_PROTO_IP;
-	*buff++ = NR_PROTO_IP;
-	*buff++ = 0;
-	*buff++ = 0;
-	*buff++ = NR_PROTOEXT;
-
-	if (daddr != NULL)
-		return 37;
-
-	return -37;
-}
-
-static int __must_check nr_set_mac_address(struct net_device *dev, void *addr)
-{
-	struct sockaddr *sa = addr;
-	int err;
-
-	if (!memcmp(dev->dev_addr, sa->sa_data, dev->addr_len))
-		return 0;
-
-	if (dev->flags & IFF_UP) {
-		err = ax25_listen_register((ax25_address *)sa->sa_data, NULL);
-		if (err)
-			return err;
-
-		ax25_listen_release((const ax25_address *)dev->dev_addr, NULL);
-	}
-
-	dev_addr_set(dev, sa->sa_data);
-
-	return 0;
-}
-
-static int nr_open(struct net_device *dev)
-{
-	int err;
-
-	err = ax25_listen_register((const ax25_address *)dev->dev_addr, NULL);
-	if (err)
-		return err;
-
-	netif_start_queue(dev);
-
-	return 0;
-}
-
-static int nr_close(struct net_device *dev)
-{
-	ax25_listen_release((const ax25_address *)dev->dev_addr, NULL);
-	netif_stop_queue(dev);
-	return 0;
-}
-
-static netdev_tx_t nr_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct net_device_stats *stats = &dev->stats;
-	unsigned int len = skb->len;
-
-	if (!nr_route_frame(skb, NULL)) {
-		kfree_skb(skb);
-		stats->tx_errors++;
-		return NETDEV_TX_OK;
-	}
-
-	stats->tx_packets++;
-	stats->tx_bytes += len;
-
-	return NETDEV_TX_OK;
-}
-
-static const struct header_ops nr_header_ops = {
-	.create	= nr_header,
-};
-
-static const struct net_device_ops nr_netdev_ops = {
-	.ndo_open		= nr_open,
-	.ndo_stop		= nr_close,
-	.ndo_start_xmit		= nr_xmit,
-	.ndo_set_mac_address    = nr_set_mac_address,
-};
-
-void nr_setup(struct net_device *dev)
-{
-	dev->mtu		= NR_MAX_PACKET_SIZE;
-	dev->netdev_ops		= &nr_netdev_ops;
-	dev->header_ops		= &nr_header_ops;
-	dev->hard_header_len	= NR_NETWORK_LEN + NR_TRANSPORT_LEN;
-	dev->addr_len		= AX25_ADDR_LEN;
-	dev->type		= ARPHRD_NETROM;
-
-	/* New-style flags. */
-	dev->flags		= IFF_NOARP;
-}
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
deleted file mode 100644
index 97944db6b5ac..000000000000
--- a/net/netrom/nr_in.c
+++ /dev/null
@@ -1,301 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <net/netrom.h>
-
-static int nr_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
-{
-	struct sk_buff *skbo, *skbn = skb;
-	struct nr_sock *nr = nr_sk(sk);
-
-	skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
-
-	nr_start_idletimer(sk);
-
-	if (more) {
-		nr->fraglen += skb->len;
-		skb_queue_tail(&nr->frag_queue, skb);
-		return 0;
-	}
-
-	if (!more && nr->fraglen > 0) {	/* End of fragment */
-		nr->fraglen += skb->len;
-		skb_queue_tail(&nr->frag_queue, skb);
-
-		if ((skbn = alloc_skb(nr->fraglen, GFP_ATOMIC)) == NULL)
-			return 1;
-
-		skb_reset_transport_header(skbn);
-
-		while ((skbo = skb_dequeue(&nr->frag_queue)) != NULL) {
-			skb_copy_from_linear_data(skbo,
-						  skb_put(skbn, skbo->len),
-						  skbo->len);
-			kfree_skb(skbo);
-		}
-
-		nr->fraglen = 0;
-	}
-
-	return sock_queue_rcv_skb(sk, skbn);
-}
-
-/*
- * State machine for state 1, Awaiting Connection State.
- * The handling of the timer(s) is in file nr_timer.c.
- * Handling of state 0 and connection release is in netrom.c.
- */
-static int nr_state1_machine(struct sock *sk, struct sk_buff *skb,
-	int frametype)
-{
-	switch (frametype) {
-	case NR_CONNACK: {
-		struct nr_sock *nr = nr_sk(sk);
-
-		nr_stop_t1timer(sk);
-		nr_start_idletimer(sk);
-		nr->your_index = skb->data[17];
-		nr->your_id    = skb->data[18];
-		nr->vs	       = 0;
-		nr->va	       = 0;
-		nr->vr	       = 0;
-		nr->vl	       = 0;
-		nr->state      = NR_STATE_3;
-		nr->n2count    = 0;
-		nr->window     = skb->data[20];
-		sk->sk_state   = TCP_ESTABLISHED;
-		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_state_change(sk);
-		break;
-	}
-
-	case NR_CONNACK | NR_CHOKE_FLAG:
-		nr_disconnect(sk, ECONNREFUSED);
-		break;
-
-	case NR_RESET:
-		if (READ_ONCE(sysctl_netrom_reset_circuit))
-			nr_disconnect(sk, ECONNRESET);
-		break;
-
-	default:
-		break;
-	}
-	return 0;
-}
-
-/*
- * State machine for state 2, Awaiting Release State.
- * The handling of the timer(s) is in file nr_timer.c
- * Handling of state 0 and connection release is in netrom.c.
- */
-static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
-	int frametype)
-{
-	switch (frametype) {
-	case NR_CONNACK | NR_CHOKE_FLAG:
-		nr_disconnect(sk, ECONNRESET);
-		break;
-
-	case NR_DISCREQ:
-		nr_write_internal(sk, NR_DISCACK);
-		fallthrough;
-	case NR_DISCACK:
-		nr_disconnect(sk, 0);
-		break;
-
-	case NR_RESET:
-		if (READ_ONCE(sysctl_netrom_reset_circuit))
-			nr_disconnect(sk, ECONNRESET);
-		break;
-
-	default:
-		break;
-	}
-	return 0;
-}
-
-/*
- * State machine for state 3, Connected State.
- * The handling of the timer(s) is in file nr_timer.c
- * Handling of state 0 and connection release is in netrom.c.
- */
-static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype)
-{
-	struct nr_sock *nrom = nr_sk(sk);
-	struct sk_buff_head temp_queue;
-	struct sk_buff *skbn;
-	unsigned short save_vr;
-	unsigned short nr, ns;
-	int queued = 0;
-
-	nr = skb->data[18];
-
-	switch (frametype) {
-	case NR_CONNREQ:
-		nr_write_internal(sk, NR_CONNACK);
-		break;
-
-	case NR_DISCREQ:
-		nr_write_internal(sk, NR_DISCACK);
-		nr_disconnect(sk, 0);
-		break;
-
-	case NR_CONNACK | NR_CHOKE_FLAG:
-	case NR_DISCACK:
-		nr_disconnect(sk, ECONNRESET);
-		break;
-
-	case NR_INFOACK:
-	case NR_INFOACK | NR_CHOKE_FLAG:
-	case NR_INFOACK | NR_NAK_FLAG:
-	case NR_INFOACK | NR_NAK_FLAG | NR_CHOKE_FLAG:
-		if (frametype & NR_CHOKE_FLAG) {
-			nrom->condition |= NR_COND_PEER_RX_BUSY;
-			nr_start_t4timer(sk);
-		} else {
-			nrom->condition &= ~NR_COND_PEER_RX_BUSY;
-			nr_stop_t4timer(sk);
-		}
-		if (!nr_validate_nr(sk, nr)) {
-			break;
-		}
-		if (frametype & NR_NAK_FLAG) {
-			nr_frames_acked(sk, nr);
-			nr_send_nak_frame(sk);
-		} else {
-			if (nrom->condition & NR_COND_PEER_RX_BUSY) {
-				nr_frames_acked(sk, nr);
-			} else {
-				nr_check_iframes_acked(sk, nr);
-			}
-		}
-		break;
-
-	case NR_INFO:
-	case NR_INFO | NR_NAK_FLAG:
-	case NR_INFO | NR_CHOKE_FLAG:
-	case NR_INFO | NR_MORE_FLAG:
-	case NR_INFO | NR_NAK_FLAG | NR_CHOKE_FLAG:
-	case NR_INFO | NR_CHOKE_FLAG | NR_MORE_FLAG:
-	case NR_INFO | NR_NAK_FLAG | NR_MORE_FLAG:
-	case NR_INFO | NR_NAK_FLAG | NR_CHOKE_FLAG | NR_MORE_FLAG:
-		if (frametype & NR_CHOKE_FLAG) {
-			nrom->condition |= NR_COND_PEER_RX_BUSY;
-			nr_start_t4timer(sk);
-		} else {
-			nrom->condition &= ~NR_COND_PEER_RX_BUSY;
-			nr_stop_t4timer(sk);
-		}
-		if (nr_validate_nr(sk, nr)) {
-			if (frametype & NR_NAK_FLAG) {
-				nr_frames_acked(sk, nr);
-				nr_send_nak_frame(sk);
-			} else {
-				if (nrom->condition & NR_COND_PEER_RX_BUSY) {
-					nr_frames_acked(sk, nr);
-				} else {
-					nr_check_iframes_acked(sk, nr);
-				}
-			}
-		}
-		queued = 1;
-		skb_queue_head(&nrom->reseq_queue, skb);
-		if (nrom->condition & NR_COND_OWN_RX_BUSY)
-			break;
-		skb_queue_head_init(&temp_queue);
-		do {
-			save_vr = nrom->vr;
-			while ((skbn = skb_dequeue(&nrom->reseq_queue)) != NULL) {
-				ns = skbn->data[17];
-				if (ns == nrom->vr) {
-					if (nr_queue_rx_frame(sk, skbn, frametype & NR_MORE_FLAG) == 0) {
-						nrom->vr = (nrom->vr + 1) % NR_MODULUS;
-					} else {
-						nrom->condition |= NR_COND_OWN_RX_BUSY;
-						skb_queue_tail(&temp_queue, skbn);
-					}
-				} else if (nr_in_rx_window(sk, ns)) {
-					skb_queue_tail(&temp_queue, skbn);
-				} else {
-					kfree_skb(skbn);
-				}
-			}
-			while ((skbn = skb_dequeue(&temp_queue)) != NULL) {
-				skb_queue_tail(&nrom->reseq_queue, skbn);
-			}
-		} while (save_vr != nrom->vr);
-		/*
-		 * Window is full, ack it immediately.
-		 */
-		if (((nrom->vl + nrom->window) % NR_MODULUS) == nrom->vr) {
-			nr_enquiry_response(sk);
-		} else {
-			if (!(nrom->condition & NR_COND_ACK_PENDING)) {
-				nrom->condition |= NR_COND_ACK_PENDING;
-				nr_start_t2timer(sk);
-			}
-		}
-		break;
-
-	case NR_RESET:
-		if (READ_ONCE(sysctl_netrom_reset_circuit))
-			nr_disconnect(sk, ECONNRESET);
-		break;
-
-	default:
-		break;
-	}
-	return queued;
-}
-
-/* Higher level upcall for a LAPB frame - called with sk locked */
-int nr_process_rx_frame(struct sock *sk, struct sk_buff *skb)
-{
-	struct nr_sock *nr = nr_sk(sk);
-	int queued = 0, frametype;
-
-	if (nr->state == NR_STATE_0)
-		return 0;
-
-	frametype = skb->data[19];
-
-	switch (nr->state) {
-	case NR_STATE_1:
-		queued = nr_state1_machine(sk, skb, frametype);
-		break;
-	case NR_STATE_2:
-		queued = nr_state2_machine(sk, skb, frametype);
-		break;
-	case NR_STATE_3:
-		queued = nr_state3_machine(sk, skb, frametype);
-		break;
-	}
-
-	nr_kick(sk);
-
-	return queued;
-}
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
deleted file mode 100644
index 7a9d765b30c0..000000000000
--- a/net/netrom/nr_loopback.c
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright Tomi Manninen OH2BNS (oh2bns@sral.fi)
- */
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-#include <linux/timer.h>
-#include <net/ax25.h>
-#include <linux/skbuff.h>
-#include <net/netrom.h>
-#include <linux/init.h>
-
-static void nr_loopback_timer(struct timer_list *);
-
-static struct sk_buff_head loopback_queue;
-static DEFINE_TIMER(loopback_timer, nr_loopback_timer);
-
-void __init nr_loopback_init(void)
-{
-	skb_queue_head_init(&loopback_queue);
-}
-
-static inline int nr_loopback_running(void)
-{
-	return timer_pending(&loopback_timer);
-}
-
-int nr_loopback_queue(struct sk_buff *skb)
-{
-	struct sk_buff *skbn;
-
-	if ((skbn = alloc_skb(skb->len, GFP_ATOMIC)) != NULL) {
-		skb_copy_from_linear_data(skb, skb_put(skbn, skb->len), skb->len);
-		skb_reset_transport_header(skbn);
-
-		skb_queue_tail(&loopback_queue, skbn);
-
-		if (!nr_loopback_running())
-			mod_timer(&loopback_timer, jiffies + 10);
-	}
-
-	kfree_skb(skb);
-	return 1;
-}
-
-static void nr_loopback_timer(struct timer_list *unused)
-{
-	struct sk_buff *skb;
-	ax25_address *nr_dest;
-	struct net_device *dev;
-
-	if ((skb = skb_dequeue(&loopback_queue)) != NULL) {
-		nr_dest = (ax25_address *)(skb->data + 7);
-
-		dev = nr_dev_get(nr_dest);
-
-		if (dev == NULL || nr_rx_frame(skb, dev) == 0)
-			kfree_skb(skb);
-
-		dev_put(dev);
-
-		if (!skb_queue_empty(&loopback_queue) && !nr_loopback_running())
-			mod_timer(&loopback_timer, jiffies + 10);
-	}
-}
-
-void nr_loopback_clear(void)
-{
-	timer_delete_sync(&loopback_timer);
-	skb_queue_purge(&loopback_queue);
-}
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
deleted file mode 100644
index 2b3cbceb0b52..000000000000
--- a/net/netrom/nr_out.c
+++ /dev/null
@@ -1,272 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <net/netrom.h>
-
-/*
- *	This is where all NET/ROM frames pass, except for IP-over-NET/ROM which
- *	cannot be fragmented in this manner.
- */
-void nr_output(struct sock *sk, struct sk_buff *skb)
-{
-	struct sk_buff *skbn;
-	unsigned char transport[NR_TRANSPORT_LEN];
-	int err, frontlen, len;
-
-	if (skb->len - NR_TRANSPORT_LEN > NR_MAX_PACKET_SIZE) {
-		/* Save a copy of the Transport Header */
-		skb_copy_from_linear_data(skb, transport, NR_TRANSPORT_LEN);
-		skb_pull(skb, NR_TRANSPORT_LEN);
-
-		frontlen = skb_headroom(skb);
-
-		while (skb->len > 0) {
-			if ((skbn = sock_alloc_send_skb(sk, frontlen + NR_MAX_PACKET_SIZE, 0, &err)) == NULL) {
-				kfree_skb(skb);
-				return;
-			}
-
-			skb_reserve(skbn, frontlen);
-
-			len = (NR_MAX_PACKET_SIZE > skb->len) ? skb->len : NR_MAX_PACKET_SIZE;
-
-			/* Copy the user data */
-			skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
-			skb_pull(skb, len);
-
-			/* Duplicate the Transport Header */
-			skb_push(skbn, NR_TRANSPORT_LEN);
-			skb_copy_to_linear_data(skbn, transport,
-						NR_TRANSPORT_LEN);
-			if (skb->len > 0)
-				skbn->data[4] |= NR_MORE_FLAG;
-
-			skb_queue_tail(&sk->sk_write_queue, skbn); /* Throw it on the queue */
-		}
-
-		kfree_skb(skb);
-	} else {
-		skb_queue_tail(&sk->sk_write_queue, skb);		/* Throw it on the queue */
-	}
-
-	nr_kick(sk);
-}
-
-/*
- *	This procedure is passed a buffer descriptor for an iframe. It builds
- *	the rest of the control part of the frame and then writes it out.
- */
-static void nr_send_iframe(struct sock *sk, struct sk_buff *skb)
-{
-	struct nr_sock *nr = nr_sk(sk);
-
-	if (skb == NULL)
-		return;
-
-	skb->data[2] = nr->vs;
-	skb->data[3] = nr->vr;
-
-	if (nr->condition & NR_COND_OWN_RX_BUSY)
-		skb->data[4] |= NR_CHOKE_FLAG;
-
-	nr_start_idletimer(sk);
-
-	nr_transmit_buffer(sk, skb);
-}
-
-void nr_send_nak_frame(struct sock *sk)
-{
-	struct sk_buff *skb, *skbn;
-	struct nr_sock *nr = nr_sk(sk);
-
-	if ((skb = skb_peek(&nr->ack_queue)) == NULL)
-		return;
-
-	if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL)
-		return;
-
-	skbn->data[2] = nr->va;
-	skbn->data[3] = nr->vr;
-
-	if (nr->condition & NR_COND_OWN_RX_BUSY)
-		skbn->data[4] |= NR_CHOKE_FLAG;
-
-	nr_transmit_buffer(sk, skbn);
-
-	nr->condition &= ~NR_COND_ACK_PENDING;
-	nr->vl         = nr->vr;
-
-	nr_stop_t1timer(sk);
-}
-
-void nr_kick(struct sock *sk)
-{
-	struct nr_sock *nr = nr_sk(sk);
-	struct sk_buff *skb, *skbn;
-	unsigned short start, end;
-
-	if (nr->state != NR_STATE_3)
-		return;
-
-	if (nr->condition & NR_COND_PEER_RX_BUSY)
-		return;
-
-	if (!skb_peek(&sk->sk_write_queue))
-		return;
-
-	start = (skb_peek(&nr->ack_queue) == NULL) ? nr->va : nr->vs;
-	end   = (nr->va + nr->window) % NR_MODULUS;
-
-	if (start == end)
-		return;
-
-	nr->vs = start;
-
-	/*
-	 * Transmit data until either we're out of data to send or
-	 * the window is full.
-	 */
-
-	/*
-	 * Dequeue the frame and copy it.
-	 */
-	skb = skb_dequeue(&sk->sk_write_queue);
-
-	do {
-		if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
-			skb_queue_head(&sk->sk_write_queue, skb);
-			break;
-		}
-
-		skb_set_owner_w(skbn, sk);
-
-		/*
-		 * Transmit the frame copy.
-		 */
-		nr_send_iframe(sk, skbn);
-
-		nr->vs = (nr->vs + 1) % NR_MODULUS;
-
-		/*
-		 * Requeue the original data frame.
-		 */
-		skb_queue_tail(&nr->ack_queue, skb);
-
-	} while (nr->vs != end &&
-		 (skb = skb_dequeue(&sk->sk_write_queue)) != NULL);
-
-	nr->vl         = nr->vr;
-	nr->condition &= ~NR_COND_ACK_PENDING;
-
-	if (!nr_t1timer_running(sk))
-		nr_start_t1timer(sk);
-}
-
-void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb)
-{
-	struct nr_sock *nr = nr_sk(sk);
-	unsigned char *dptr;
-
-	/*
-	 *	Add the protocol byte and network header.
-	 */
-	dptr = skb_push(skb, NR_NETWORK_LEN);
-
-	memcpy(dptr, &nr->source_addr, AX25_ADDR_LEN);
-	dptr[6] &= ~AX25_CBIT;
-	dptr[6] &= ~AX25_EBIT;
-	dptr[6] |= AX25_SSSID_SPARE;
-	dptr += AX25_ADDR_LEN;
-
-	memcpy(dptr, &nr->dest_addr, AX25_ADDR_LEN);
-	dptr[6] &= ~AX25_CBIT;
-	dptr[6] |= AX25_EBIT;
-	dptr[6] |= AX25_SSSID_SPARE;
-	dptr += AX25_ADDR_LEN;
-
-	*dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
-
-	if (!nr_route_frame(skb, NULL)) {
-		kfree_skb(skb);
-		nr_disconnect(sk, ENETUNREACH);
-	}
-}
-
-/*
- * The following routines are taken from page 170 of the 7th ARRL Computer
- * Networking Conference paper, as is the whole state machine.
- */
-
-void nr_establish_data_link(struct sock *sk)
-{
-	struct nr_sock *nr = nr_sk(sk);
-
-	nr->condition = 0x00;
-	nr->n2count   = 0;
-
-	nr_write_internal(sk, NR_CONNREQ);
-
-	nr_stop_t2timer(sk);
-	nr_stop_t4timer(sk);
-	nr_stop_idletimer(sk);
-	nr_start_t1timer(sk);
-}
-
-/*
- * Never send a NAK when we are CHOKEd.
- */
-void nr_enquiry_response(struct sock *sk)
-{
-	struct nr_sock *nr = nr_sk(sk);
-	int frametype = NR_INFOACK;
-
-	if (nr->condition & NR_COND_OWN_RX_BUSY) {
-		frametype |= NR_CHOKE_FLAG;
-	} else {
-		if (skb_peek(&nr->reseq_queue) != NULL)
-			frametype |= NR_NAK_FLAG;
-	}
-
-	nr_write_internal(sk, frametype);
-
-	nr->vl         = nr->vr;
-	nr->condition &= ~NR_COND_ACK_PENDING;
-}
-
-void nr_check_iframes_acked(struct sock *sk, unsigned short nr)
-{
-	struct nr_sock *nrom = nr_sk(sk);
-
-	if (nrom->vs == nr) {
-		nr_frames_acked(sk, nr);
-		nr_stop_t1timer(sk);
-		nrom->n2count = 0;
-	} else {
-		if (nrom->va != nr) {
-			nr_frames_acked(sk, nr);
-			nr_start_t1timer(sk);
-		}
-	}
-}
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
deleted file mode 100644
index 9cc29ae85b06..000000000000
--- a/net/netrom/nr_route.c
+++ /dev/null
@@ -1,989 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
- * Copyright Tomi Manninen OH2BNS (oh2bns@sral.fi)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <net/arp.h>
-#include <linux/if_arp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/termios.h>	/* For TIOCINQ/OUTQ */
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <net/netrom.h>
-#include <linux/seq_file.h>
-#include <linux/export.h>
-
-static unsigned int nr_neigh_no = 1;
-
-static HLIST_HEAD(nr_node_list);
-static DEFINE_SPINLOCK(nr_node_list_lock);
-static HLIST_HEAD(nr_neigh_list);
-static DEFINE_SPINLOCK(nr_neigh_list_lock);
-
-static struct nr_node *nr_node_get(ax25_address *callsign)
-{
-	struct nr_node *found = NULL;
-	struct nr_node *nr_node;
-
-	spin_lock_bh(&nr_node_list_lock);
-	nr_node_for_each(nr_node, &nr_node_list)
-		if (ax25cmp(callsign, &nr_node->callsign) == 0) {
-			nr_node_hold(nr_node);
-			found = nr_node;
-			break;
-		}
-	spin_unlock_bh(&nr_node_list_lock);
-	return found;
-}
-
-static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign,
-					 struct net_device *dev)
-{
-	struct nr_neigh *found = NULL;
-	struct nr_neigh *nr_neigh;
-
-	spin_lock_bh(&nr_neigh_list_lock);
-	nr_neigh_for_each(nr_neigh, &nr_neigh_list)
-		if (ax25cmp(callsign, &nr_neigh->callsign) == 0 &&
-		    nr_neigh->dev == dev) {
-			nr_neigh_hold(nr_neigh);
-			found = nr_neigh;
-			break;
-		}
-	spin_unlock_bh(&nr_neigh_list_lock);
-	return found;
-}
-
-static void nr_remove_neigh(struct nr_neigh *);
-
-/*      re-sort the routes in quality order.    */
-static void re_sort_routes(struct nr_node *nr_node, int x, int y)
-{
-	if (nr_node->routes[y].quality > nr_node->routes[x].quality) {
-		if (nr_node->which == x)
-			nr_node->which = y;
-		else if (nr_node->which == y)
-			nr_node->which = x;
-
-		swap(nr_node->routes[x], nr_node->routes[y]);
-	}
-}
-
-/*
- *	Add a new route to a node, and in the process add the node and the
- *	neighbour if it is new.
- */
-static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
-	ax25_address *ax25, ax25_digi *ax25_digi, struct net_device *dev,
-	int quality, int obs_count)
-{
-	struct nr_node  *nr_node;
-	struct nr_neigh *nr_neigh;
-	int i, found;
-	struct net_device *odev;
-
-	if ((odev=nr_dev_get(nr)) != NULL) {	/* Can't add routes to ourself */
-		dev_put(odev);
-		return -EINVAL;
-	}
-
-	nr_node = nr_node_get(nr);
-
-	nr_neigh = nr_neigh_get_dev(ax25, dev);
-
-	/*
-	 * The L2 link to a neighbour has failed in the past
-	 * and now a frame comes from this neighbour. We assume
-	 * it was a temporary trouble with the link and reset the
-	 * routes now (and not wait for a node broadcast).
-	 */
-	if (nr_neigh != NULL && nr_neigh->failed != 0 && quality == 0) {
-		struct nr_node *nr_nodet;
-
-		spin_lock_bh(&nr_node_list_lock);
-		nr_node_for_each(nr_nodet, &nr_node_list) {
-			nr_node_lock(nr_nodet);
-			for (i = 0; i < nr_nodet->count; i++)
-				if (nr_nodet->routes[i].neighbour == nr_neigh)
-					if (i < nr_nodet->which)
-						nr_nodet->which = i;
-			nr_node_unlock(nr_nodet);
-		}
-		spin_unlock_bh(&nr_node_list_lock);
-	}
-
-	if (nr_neigh != NULL)
-		nr_neigh->failed = 0;
-
-	if (quality == 0 && nr_neigh != NULL && nr_node != NULL) {
-		nr_neigh_put(nr_neigh);
-		nr_node_put(nr_node);
-		return 0;
-	}
-
-	if (nr_neigh == NULL) {
-		if ((nr_neigh = kmalloc(sizeof(*nr_neigh), GFP_ATOMIC)) == NULL) {
-			if (nr_node)
-				nr_node_put(nr_node);
-			return -ENOMEM;
-		}
-
-		nr_neigh->callsign = *ax25;
-		nr_neigh->digipeat = NULL;
-		nr_neigh->ax25     = NULL;
-		nr_neigh->dev      = dev;
-		nr_neigh->quality  = READ_ONCE(sysctl_netrom_default_path_quality);
-		nr_neigh->locked   = 0;
-		nr_neigh->count    = 0;
-		nr_neigh->number   = nr_neigh_no++;
-		nr_neigh->failed   = 0;
-		refcount_set(&nr_neigh->refcount, 1);
-
-		if (ax25_digi != NULL && ax25_digi->ndigi > 0) {
-			nr_neigh->digipeat = kmemdup(ax25_digi,
-						     sizeof(*ax25_digi),
-						     GFP_KERNEL);
-			if (nr_neigh->digipeat == NULL) {
-				kfree(nr_neigh);
-				if (nr_node)
-					nr_node_put(nr_node);
-				return -ENOMEM;
-			}
-		}
-
-		spin_lock_bh(&nr_neigh_list_lock);
-		hlist_add_head(&nr_neigh->neigh_node, &nr_neigh_list);
-		nr_neigh_hold(nr_neigh);
-		spin_unlock_bh(&nr_neigh_list_lock);
-	}
-
-	if (quality != 0 && ax25cmp(nr, ax25) == 0 && !nr_neigh->locked)
-		nr_neigh->quality = quality;
-
-	if (nr_node == NULL) {
-		if ((nr_node = kmalloc(sizeof(*nr_node), GFP_ATOMIC)) == NULL) {
-			if (nr_neigh)
-				nr_neigh_put(nr_neigh);
-			return -ENOMEM;
-		}
-
-		nr_node->callsign = *nr;
-		strscpy(nr_node->mnemonic, mnemonic);
-
-		nr_node->which = 0;
-		nr_node->count = 1;
-		refcount_set(&nr_node->refcount, 1);
-		spin_lock_init(&nr_node->node_lock);
-
-		nr_node->routes[0].quality   = quality;
-		nr_node->routes[0].obs_count = obs_count;
-		nr_node->routes[0].neighbour = nr_neigh;
-
-		nr_neigh_hold(nr_neigh);
-		nr_neigh->count++;
-
-		spin_lock_bh(&nr_node_list_lock);
-		hlist_add_head(&nr_node->node_node, &nr_node_list);
-		/* refcount initialized at 1 */
-		spin_unlock_bh(&nr_node_list_lock);
-
-		nr_neigh_put(nr_neigh);
-		return 0;
-	}
-	nr_node_lock(nr_node);
-
-	if (quality != 0)
-		strscpy(nr_node->mnemonic, mnemonic);
-
-	for (found = 0, i = 0; i < nr_node->count; i++) {
-		if (nr_node->routes[i].neighbour == nr_neigh) {
-			nr_node->routes[i].quality   = quality;
-			nr_node->routes[i].obs_count = obs_count;
-			found = 1;
-			break;
-		}
-	}
-
-	if (!found) {
-		/* We have space at the bottom, slot it in */
-		if (nr_node->count < 3) {
-			nr_node->routes[2] = nr_node->routes[1];
-			nr_node->routes[1] = nr_node->routes[0];
-
-			nr_node->routes[0].quality   = quality;
-			nr_node->routes[0].obs_count = obs_count;
-			nr_node->routes[0].neighbour = nr_neigh;
-
-			nr_node->which++;
-			nr_node->count++;
-			nr_neigh_hold(nr_neigh);
-			nr_neigh->count++;
-		} else {
-			/* It must be better than the worst */
-			if (quality > nr_node->routes[2].quality) {
-				nr_node->routes[2].neighbour->count--;
-				nr_neigh_put(nr_node->routes[2].neighbour);
-
-				if (nr_node->routes[2].neighbour->count == 0 && !nr_node->routes[2].neighbour->locked)
-					nr_remove_neigh(nr_node->routes[2].neighbour);
-
-				nr_node->routes[2].quality   = quality;
-				nr_node->routes[2].obs_count = obs_count;
-				nr_node->routes[2].neighbour = nr_neigh;
-
-				nr_neigh_hold(nr_neigh);
-				nr_neigh->count++;
-			}
-		}
-	}
-
-	/* Now re-sort the routes in quality order */
-	switch (nr_node->count) {
-	case 3:
-		re_sort_routes(nr_node, 0, 1);
-		re_sort_routes(nr_node, 1, 2);
-		fallthrough;
-	case 2:
-		re_sort_routes(nr_node, 0, 1);
-		break;
-	case 1:
-		break;
-	}
-
-	for (i = 0; i < nr_node->count; i++) {
-		if (nr_node->routes[i].neighbour == nr_neigh) {
-			if (i < nr_node->which)
-				nr_node->which = i;
-			break;
-		}
-	}
-
-	nr_neigh_put(nr_neigh);
-	nr_node_unlock(nr_node);
-	nr_node_put(nr_node);
-	return 0;
-}
-
-static void nr_remove_node_locked(struct nr_node *nr_node)
-{
-	lockdep_assert_held(&nr_node_list_lock);
-
-	hlist_del_init(&nr_node->node_node);
-	nr_node_put(nr_node);
-}
-
-static inline void __nr_remove_neigh(struct nr_neigh *nr_neigh)
-{
-	hlist_del_init(&nr_neigh->neigh_node);
-	nr_neigh_put(nr_neigh);
-}
-
-#define nr_remove_neigh_locked(__neigh) \
-	__nr_remove_neigh(__neigh)
-
-static void nr_remove_neigh(struct nr_neigh *nr_neigh)
-{
-	spin_lock_bh(&nr_neigh_list_lock);
-	__nr_remove_neigh(nr_neigh);
-	spin_unlock_bh(&nr_neigh_list_lock);
-}
-
-/*
- *	"Delete" a node. Strictly speaking remove a route to a node. The node
- *	is only deleted if no routes are left to it.
- */
-static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct net_device *dev)
-{
-	struct nr_node  *nr_node;
-	struct nr_neigh *nr_neigh;
-	int i;
-
-	nr_node = nr_node_get(callsign);
-
-	if (nr_node == NULL)
-		return -EINVAL;
-
-	nr_neigh = nr_neigh_get_dev(neighbour, dev);
-
-	if (nr_neigh == NULL) {
-		nr_node_put(nr_node);
-		return -EINVAL;
-	}
-
-	spin_lock_bh(&nr_node_list_lock);
-	nr_node_lock(nr_node);
-	for (i = 0; i < nr_node->count; i++) {
-		if (nr_node->routes[i].neighbour == nr_neigh) {
-			nr_neigh->count--;
-			nr_neigh_put(nr_neigh);
-
-			if (nr_neigh->count == 0 && !nr_neigh->locked)
-				nr_remove_neigh(nr_neigh);
-			nr_neigh_put(nr_neigh);
-
-			nr_node->count--;
-
-			if (nr_node->count == 0) {
-				nr_remove_node_locked(nr_node);
-			} else {
-				switch (i) {
-				case 0:
-					nr_node->routes[0] = nr_node->routes[1];
-					fallthrough;
-				case 1:
-					nr_node->routes[1] = nr_node->routes[2];
-					fallthrough;
-				case 2:
-					break;
-				}
-				nr_node_put(nr_node);
-			}
-			nr_node_unlock(nr_node);
-			spin_unlock_bh(&nr_node_list_lock);
-
-			return 0;
-		}
-	}
-	nr_neigh_put(nr_neigh);
-	nr_node_unlock(nr_node);
-	spin_unlock_bh(&nr_node_list_lock);
-	nr_node_put(nr_node);
-
-	return -EINVAL;
-}
-
-/*
- *	Lock a neighbour with a quality.
- */
-static int __must_check nr_add_neigh(ax25_address *callsign,
-	ax25_digi *ax25_digi, struct net_device *dev, unsigned int quality)
-{
-	struct nr_neigh *nr_neigh;
-
-	nr_neigh = nr_neigh_get_dev(callsign, dev);
-	if (nr_neigh) {
-		nr_neigh->quality = quality;
-		nr_neigh->locked  = 1;
-		nr_neigh_put(nr_neigh);
-		return 0;
-	}
-
-	if ((nr_neigh = kmalloc(sizeof(*nr_neigh), GFP_ATOMIC)) == NULL)
-		return -ENOMEM;
-
-	nr_neigh->callsign = *callsign;
-	nr_neigh->digipeat = NULL;
-	nr_neigh->ax25     = NULL;
-	nr_neigh->dev      = dev;
-	nr_neigh->quality  = quality;
-	nr_neigh->locked   = 1;
-	nr_neigh->count    = 0;
-	nr_neigh->number   = nr_neigh_no++;
-	nr_neigh->failed   = 0;
-	refcount_set(&nr_neigh->refcount, 1);
-
-	if (ax25_digi != NULL && ax25_digi->ndigi > 0) {
-		nr_neigh->digipeat = kmemdup(ax25_digi, sizeof(*ax25_digi),
-					     GFP_KERNEL);
-		if (nr_neigh->digipeat == NULL) {
-			kfree(nr_neigh);
-			return -ENOMEM;
-		}
-	}
-
-	spin_lock_bh(&nr_neigh_list_lock);
-	hlist_add_head(&nr_neigh->neigh_node, &nr_neigh_list);
-	/* refcount is initialized at 1 */
-	spin_unlock_bh(&nr_neigh_list_lock);
-
-	return 0;
-}
-
-/*
- *	"Delete" a neighbour. The neighbour is only removed if the number
- *	of nodes that may use it is zero.
- */
-static int nr_del_neigh(ax25_address *callsign, struct net_device *dev, unsigned int quality)
-{
-	struct nr_neigh *nr_neigh;
-
-	nr_neigh = nr_neigh_get_dev(callsign, dev);
-
-	if (nr_neigh == NULL) return -EINVAL;
-
-	nr_neigh->quality = quality;
-	nr_neigh->locked  = 0;
-
-	if (nr_neigh->count == 0)
-		nr_remove_neigh(nr_neigh);
-	nr_neigh_put(nr_neigh);
-
-	return 0;
-}
-
-/*
- *	Decrement the obsolescence count by one. If a route is reduced to a
- *	count of zero, remove it. Also remove any unlocked neighbours with
- *	zero nodes routing via it.
- */
-static int nr_dec_obs(void)
-{
-	struct nr_neigh *nr_neigh;
-	struct nr_node  *s;
-	struct hlist_node *nodet;
-	int i;
-
-	spin_lock_bh(&nr_node_list_lock);
-	nr_node_for_each_safe(s, nodet, &nr_node_list) {
-		nr_node_lock(s);
-		for (i = 0; i < s->count; i++) {
-			switch (s->routes[i].obs_count) {
-			case 0:		/* A locked entry */
-				break;
-
-			case 1:		/* From 1 -> 0 */
-				nr_neigh = s->routes[i].neighbour;
-
-				nr_neigh->count--;
-				nr_neigh_put(nr_neigh);
-
-				if (nr_neigh->count == 0 && !nr_neigh->locked)
-					nr_remove_neigh(nr_neigh);
-
-				s->count--;
-
-				switch (i) {
-				case 0:
-					s->routes[0] = s->routes[1];
-					fallthrough;
-				case 1:
-					s->routes[1] = s->routes[2];
-					break;
-				case 2:
-					break;
-				}
-				break;
-
-			default:
-				s->routes[i].obs_count--;
-				break;
-
-			}
-		}
-
-		if (s->count <= 0)
-			nr_remove_node_locked(s);
-		nr_node_unlock(s);
-	}
-	spin_unlock_bh(&nr_node_list_lock);
-
-	return 0;
-}
-
-/*
- *	A device has been removed. Remove its routes and neighbours.
- */
-void nr_rt_device_down(struct net_device *dev)
-{
-	struct nr_neigh *s;
-	struct hlist_node *nodet, *node2t;
-	struct nr_node  *t;
-	int i;
-
-	spin_lock_bh(&nr_neigh_list_lock);
-	nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) {
-		if (s->dev == dev) {
-			spin_lock_bh(&nr_node_list_lock);
-			nr_node_for_each_safe(t, node2t, &nr_node_list) {
-				nr_node_lock(t);
-				for (i = 0; i < t->count; i++) {
-					if (t->routes[i].neighbour == s) {
-						t->count--;
-
-						switch (i) {
-						case 0:
-							t->routes[0] = t->routes[1];
-							fallthrough;
-						case 1:
-							t->routes[1] = t->routes[2];
-							break;
-						case 2:
-							break;
-						}
-					}
-				}
-
-				if (t->count <= 0)
-					nr_remove_node_locked(t);
-				nr_node_unlock(t);
-			}
-			spin_unlock_bh(&nr_node_list_lock);
-
-			nr_remove_neigh_locked(s);
-		}
-	}
-	spin_unlock_bh(&nr_neigh_list_lock);
-}
-
-/*
- *	Check that the device given is a valid AX.25 interface that is "up".
- *	Or a valid ethernet interface with an AX.25 callsign binding.
- */
-static struct net_device *nr_ax25_dev_get(char *devname)
-{
-	struct net_device *dev;
-
-	if ((dev = dev_get_by_name(&init_net, devname)) == NULL)
-		return NULL;
-
-	if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25)
-		return dev;
-
-	dev_put(dev);
-	return NULL;
-}
-
-/*
- *	Find the first active NET/ROM device, usually "nr0".
- */
-struct net_device *nr_dev_first(void)
-{
-	struct net_device *dev, *first = NULL;
-
-	rcu_read_lock();
-	for_each_netdev_rcu(&init_net, dev) {
-		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM)
-			if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
-				first = dev;
-	}
-	dev_hold(first);
-	rcu_read_unlock();
-
-	return first;
-}
-
-/*
- *	Find the NET/ROM device for the given callsign.
- */
-struct net_device *nr_dev_get(ax25_address *addr)
-{
-	struct net_device *dev;
-
-	rcu_read_lock();
-	for_each_netdev_rcu(&init_net, dev) {
-		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM &&
-		    ax25cmp(addr, (const ax25_address *)dev->dev_addr) == 0) {
-			dev_hold(dev);
-			goto out;
-		}
-	}
-	dev = NULL;
-out:
-	rcu_read_unlock();
-	return dev;
-}
-
-static ax25_digi *nr_call_to_digi(ax25_digi *digi, int ndigis,
-	ax25_address *digipeaters)
-{
-	int i;
-
-	if (ndigis == 0)
-		return NULL;
-
-	for (i = 0; i < ndigis; i++) {
-		digi->calls[i]    = digipeaters[i];
-		digi->repeated[i] = 0;
-	}
-
-	digi->ndigi      = ndigis;
-	digi->lastrepeat = -1;
-
-	return digi;
-}
-
-/*
- *	Handle the ioctls that control the routing functions.
- */
-int nr_rt_ioctl(unsigned int cmd, void __user *arg)
-{
-	struct nr_route_struct nr_route;
-	struct net_device *dev;
-	ax25_digi digi;
-	int ret;
-
-	switch (cmd) {
-	case SIOCADDRT:
-		if (copy_from_user(&nr_route, arg, sizeof(struct nr_route_struct)))
-			return -EFAULT;
-		if (nr_route.ndigis > AX25_MAX_DIGIS)
-			return -EINVAL;
-		if ((dev = nr_ax25_dev_get(nr_route.device)) == NULL)
-			return -EINVAL;
-		switch (nr_route.type) {
-		case NETROM_NODE:
-			if (strnlen(nr_route.mnemonic, 7) == 7) {
-				ret = -EINVAL;
-				break;
-			}
-
-			ret = nr_add_node(&nr_route.callsign,
-				nr_route.mnemonic,
-				&nr_route.neighbour,
-				nr_call_to_digi(&digi, nr_route.ndigis,
-						nr_route.digipeaters),
-				dev, nr_route.quality,
-				nr_route.obs_count);
-			break;
-		case NETROM_NEIGH:
-			ret = nr_add_neigh(&nr_route.callsign,
-				nr_call_to_digi(&digi, nr_route.ndigis,
-						nr_route.digipeaters),
-				dev, nr_route.quality);
-			break;
-		default:
-			ret = -EINVAL;
-		}
-		dev_put(dev);
-		return ret;
-
-	case SIOCDELRT:
-		if (copy_from_user(&nr_route, arg, sizeof(struct nr_route_struct)))
-			return -EFAULT;
-		if ((dev = nr_ax25_dev_get(nr_route.device)) == NULL)
-			return -EINVAL;
-		switch (nr_route.type) {
-		case NETROM_NODE:
-			ret = nr_del_node(&nr_route.callsign,
-				&nr_route.neighbour, dev);
-			break;
-		case NETROM_NEIGH:
-			ret = nr_del_neigh(&nr_route.callsign,
-				dev, nr_route.quality);
-			break;
-		default:
-			ret = -EINVAL;
-		}
-		dev_put(dev);
-		return ret;
-
-	case SIOCNRDECOBS:
-		return nr_dec_obs();
-
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/*
- * 	A level 2 link has timed out, therefore it appears to be a poor link,
- *	then don't use that neighbour until it is reset.
- */
-void nr_link_failed(ax25_cb *ax25, int reason)
-{
-	struct nr_neigh *s, *nr_neigh = NULL;
-	struct nr_node  *nr_node = NULL;
-
-	spin_lock_bh(&nr_neigh_list_lock);
-	nr_neigh_for_each(s, &nr_neigh_list) {
-		if (s->ax25 == ax25) {
-			nr_neigh_hold(s);
-			nr_neigh = s;
-			break;
-		}
-	}
-	spin_unlock_bh(&nr_neigh_list_lock);
-
-	if (nr_neigh == NULL)
-		return;
-
-	nr_neigh->ax25 = NULL;
-	ax25_cb_put(ax25);
-
-	if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) {
-		nr_neigh_put(nr_neigh);
-		return;
-	}
-	spin_lock_bh(&nr_node_list_lock);
-	nr_node_for_each(nr_node, &nr_node_list) {
-		nr_node_lock(nr_node);
-		if (nr_node->which < nr_node->count &&
-		    nr_node->routes[nr_node->which].neighbour == nr_neigh)
-			nr_node->which++;
-		nr_node_unlock(nr_node);
-	}
-	spin_unlock_bh(&nr_node_list_lock);
-	nr_neigh_put(nr_neigh);
-}
-
-/*
- *	Route a frame to an appropriate AX.25 connection. A NULL ax25_cb
- *	indicates an internally generated frame.
- */
-int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
-{
-	ax25_address *nr_src, *nr_dest;
-	struct nr_neigh *nr_neigh;
-	struct nr_node  *nr_node;
-	struct net_device *dev;
-	unsigned char *dptr;
-	ax25_cb *ax25s;
-	int ret;
-	struct sk_buff *nskb, *oskb;
-
-	/*
-	 * Reject malformed packets early. Check that it contains at least 2
-	 * addresses and 1 byte more for Time-To-Live
-	 */
-	if (skb->len < 2 * sizeof(ax25_address) + 1)
-		return 0;
-
-	nr_src  = (ax25_address *)(skb->data + 0);
-	nr_dest = (ax25_address *)(skb->data + 7);
-
-	if (ax25 != NULL) {
-		ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat,
-				  ax25->ax25_dev->dev, 0,
-				  READ_ONCE(sysctl_netrom_obsolescence_count_initialiser));
-		if (ret)
-			return ret;
-	}
-
-	if ((dev = nr_dev_get(nr_dest)) != NULL) {	/* Its for me */
-		if (ax25 == NULL)			/* Its from me */
-			ret = nr_loopback_queue(skb);
-		else
-			ret = nr_rx_frame(skb, dev);
-		dev_put(dev);
-		return ret;
-	}
-
-	if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL)
-		return 0;
-
-	/* Its Time-To-Live has expired */
-	if (skb->data[14] == 1) {
-		return 0;
-	}
-
-	nr_node = nr_node_get(nr_dest);
-	if (nr_node == NULL)
-		return 0;
-	nr_node_lock(nr_node);
-
-	if (nr_node->which >= nr_node->count) {
-		nr_node_unlock(nr_node);
-		nr_node_put(nr_node);
-		return 0;
-	}
-
-	nr_neigh = nr_node->routes[nr_node->which].neighbour;
-
-	if ((dev = nr_dev_first()) == NULL) {
-		nr_node_unlock(nr_node);
-		nr_node_put(nr_node);
-		return 0;
-	}
-
-	/* We are going to change the netrom headers so we should get our
-	   own skb, we also did not know until now how much header space
-	   we had to reserve... - RXQ */
-	nskb = skb_copy_expand(skb, dev->hard_header_len, 0, GFP_ATOMIC);
-
-	if (!nskb) {
-		nr_node_unlock(nr_node);
-		nr_node_put(nr_node);
-		dev_put(dev);
-		return 0;
-	}
-	oskb = skb;
-	skb = nskb;
-	skb->data[14]--;
-
-	dptr  = skb_push(skb, 1);
-	*dptr = AX25_P_NETROM;
-
-	ax25s = nr_neigh->ax25;
-	nr_neigh->ax25 = ax25_send_frame(skb, 256,
-					 (const ax25_address *)dev->dev_addr,
-					 &nr_neigh->callsign,
-					 nr_neigh->digipeat, nr_neigh->dev);
-	if (ax25s)
-		ax25_cb_put(ax25s);
-
-	dev_put(dev);
-	ret = (nr_neigh->ax25 != NULL);
-	nr_node_unlock(nr_node);
-	nr_node_put(nr_node);
-
-	if (ret)
-		kfree_skb(oskb);
-
-	return ret;
-}
-
-#ifdef CONFIG_PROC_FS
-
-static void *nr_node_start(struct seq_file *seq, loff_t *pos)
-	__acquires(&nr_node_list_lock)
-{
-	spin_lock_bh(&nr_node_list_lock);
-	return seq_hlist_start_head(&nr_node_list, *pos);
-}
-
-static void *nr_node_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	return seq_hlist_next(v, &nr_node_list, pos);
-}
-
-static void nr_node_stop(struct seq_file *seq, void *v)
-	__releases(&nr_node_list_lock)
-{
-	spin_unlock_bh(&nr_node_list_lock);
-}
-
-static int nr_node_show(struct seq_file *seq, void *v)
-{
-	char buf[11];
-	int i;
-
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq,
-			 "callsign  mnemonic w n qual obs neigh qual obs neigh qual obs neigh\n");
-	else {
-		struct nr_node *nr_node = hlist_entry(v, struct nr_node,
-						      node_node);
-
-		nr_node_lock(nr_node);
-		seq_printf(seq, "%-9s %-7s  %d %d",
-			ax2asc(buf, &nr_node->callsign),
-			(nr_node->mnemonic[0] == '\0') ? "*" : nr_node->mnemonic,
-			nr_node->which + 1,
-			nr_node->count);
-
-		for (i = 0; i < nr_node->count; i++) {
-			seq_printf(seq, "  %3d   %d %05d",
-				nr_node->routes[i].quality,
-				nr_node->routes[i].obs_count,
-				nr_node->routes[i].neighbour->number);
-		}
-		nr_node_unlock(nr_node);
-
-		seq_puts(seq, "\n");
-	}
-	return 0;
-}
-
-const struct seq_operations nr_node_seqops = {
-	.start = nr_node_start,
-	.next = nr_node_next,
-	.stop = nr_node_stop,
-	.show = nr_node_show,
-};
-
-static void *nr_neigh_start(struct seq_file *seq, loff_t *pos)
-	__acquires(&nr_neigh_list_lock)
-{
-	spin_lock_bh(&nr_neigh_list_lock);
-	return seq_hlist_start_head(&nr_neigh_list, *pos);
-}
-
-static void *nr_neigh_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	return seq_hlist_next(v, &nr_neigh_list, pos);
-}
-
-static void nr_neigh_stop(struct seq_file *seq, void *v)
-	__releases(&nr_neigh_list_lock)
-{
-	spin_unlock_bh(&nr_neigh_list_lock);
-}
-
-static int nr_neigh_show(struct seq_file *seq, void *v)
-{
-	char buf[11];
-	int i;
-
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "addr  callsign  dev  qual lock count failed digipeaters\n");
-	else {
-		struct nr_neigh *nr_neigh;
-
-		nr_neigh = hlist_entry(v, struct nr_neigh, neigh_node);
-		seq_printf(seq, "%05d %-9s %-4s  %3d    %d   %3d    %3d",
-			nr_neigh->number,
-			ax2asc(buf, &nr_neigh->callsign),
-			nr_neigh->dev ? nr_neigh->dev->name : "???",
-			nr_neigh->quality,
-			nr_neigh->locked,
-			nr_neigh->count,
-			nr_neigh->failed);
-
-		if (nr_neigh->digipeat != NULL) {
-			for (i = 0; i < nr_neigh->digipeat->ndigi; i++)
-				seq_printf(seq, " %s",
-					   ax2asc(buf, &nr_neigh->digipeat->calls[i]));
-		}
-
-		seq_puts(seq, "\n");
-	}
-	return 0;
-}
-
-const struct seq_operations nr_neigh_seqops = {
-	.start = nr_neigh_start,
-	.next = nr_neigh_next,
-	.stop = nr_neigh_stop,
-	.show = nr_neigh_show,
-};
-#endif
-
-/*
- *	Free all memory associated with the nodes and routes lists.
- */
-void nr_rt_free(void)
-{
-	struct nr_neigh *s = NULL;
-	struct nr_node  *t = NULL;
-	struct hlist_node *nodet;
-
-	spin_lock_bh(&nr_neigh_list_lock);
-	spin_lock_bh(&nr_node_list_lock);
-	nr_node_for_each_safe(t, nodet, &nr_node_list) {
-		nr_node_lock(t);
-		nr_remove_node_locked(t);
-		nr_node_unlock(t);
-	}
-	nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) {
-		while(s->count) {
-			s->count--;
-			nr_neigh_put(s);
-		}
-		nr_remove_neigh_locked(s);
-	}
-	spin_unlock_bh(&nr_node_list_lock);
-	spin_unlock_bh(&nr_neigh_list_lock);
-}
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
deleted file mode 100644
index c3bbd5880850..000000000000
--- a/net/netrom/nr_subr.c
+++ /dev/null
@@ -1,280 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <net/netrom.h>
-
-/*
- *	This routine purges all of the queues of frames.
- */
-void nr_clear_queues(struct sock *sk)
-{
-	struct nr_sock *nr = nr_sk(sk);
-
-	skb_queue_purge(&sk->sk_write_queue);
-	skb_queue_purge(&nr->ack_queue);
-	skb_queue_purge(&nr->reseq_queue);
-	skb_queue_purge(&nr->frag_queue);
-}
-
-/*
- * This routine purges the input queue of those frames that have been
- * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the
- * SDL diagram.
- */
-void nr_frames_acked(struct sock *sk, unsigned short nr)
-{
-	struct nr_sock *nrom = nr_sk(sk);
-	struct sk_buff *skb;
-
-	/*
-	 * Remove all the ack-ed frames from the ack queue.
-	 */
-	if (nrom->va != nr) {
-		while (skb_peek(&nrom->ack_queue) != NULL && nrom->va != nr) {
-			skb = skb_dequeue(&nrom->ack_queue);
-			kfree_skb(skb);
-			nrom->va = (nrom->va + 1) % NR_MODULUS;
-		}
-	}
-}
-
-/*
- * Requeue all the un-ack-ed frames on the output queue to be picked
- * up by nr_kick called from the timer. This arrangement handles the
- * possibility of an empty output queue.
- */
-void nr_requeue_frames(struct sock *sk)
-{
-	struct sk_buff *skb, *skb_prev = NULL;
-
-	while ((skb = skb_dequeue(&nr_sk(sk)->ack_queue)) != NULL) {
-		if (skb_prev == NULL)
-			skb_queue_head(&sk->sk_write_queue, skb);
-		else
-			skb_append(skb_prev, skb, &sk->sk_write_queue);
-		skb_prev = skb;
-	}
-}
-
-/*
- *	Validate that the value of nr is between va and vs. Return true or
- *	false for testing.
- */
-int nr_validate_nr(struct sock *sk, unsigned short nr)
-{
-	struct nr_sock *nrom = nr_sk(sk);
-	unsigned short vc = nrom->va;
-
-	while (vc != nrom->vs) {
-		if (nr == vc) return 1;
-		vc = (vc + 1) % NR_MODULUS;
-	}
-
-	return nr == nrom->vs;
-}
-
-/*
- *	Check that ns is within the receive window.
- */
-int nr_in_rx_window(struct sock *sk, unsigned short ns)
-{
-	struct nr_sock *nr = nr_sk(sk);
-	unsigned short vc = nr->vr;
-	unsigned short vt = (nr->vl + nr->window) % NR_MODULUS;
-
-	while (vc != vt) {
-		if (ns == vc) return 1;
-		vc = (vc + 1) % NR_MODULUS;
-	}
-
-	return 0;
-}
-
-/*
- *  This routine is called when the HDLC layer internally generates a
- *  control frame.
- */
-void nr_write_internal(struct sock *sk, int frametype)
-{
-	struct nr_sock *nr = nr_sk(sk);
-	struct sk_buff *skb;
-	unsigned char  *dptr;
-	int len, timeout;
-
-	len = NR_TRANSPORT_LEN;
-
-	switch (frametype & 0x0F) {
-	case NR_CONNREQ:
-		len += 17;
-		break;
-	case NR_CONNACK:
-		len += (nr->bpqext) ? 2 : 1;
-		break;
-	case NR_DISCREQ:
-	case NR_DISCACK:
-	case NR_INFOACK:
-		break;
-	default:
-		printk(KERN_ERR "NET/ROM: nr_write_internal - invalid frame type %d\n", frametype);
-		return;
-	}
-
-	skb = alloc_skb(NR_NETWORK_LEN + len, GFP_ATOMIC);
-	if (!skb)
-		return;
-
-	/*
-	 *	Space for AX.25 and NET/ROM network header
-	 */
-	skb_reserve(skb, NR_NETWORK_LEN);
-
-	dptr = skb_put(skb, len);
-
-	switch (frametype & 0x0F) {
-	case NR_CONNREQ:
-		timeout  = nr->t1 / HZ;
-		*dptr++  = nr->my_index;
-		*dptr++  = nr->my_id;
-		*dptr++  = 0;
-		*dptr++  = 0;
-		*dptr++  = frametype;
-		*dptr++  = nr->window;
-		memcpy(dptr, &nr->user_addr, AX25_ADDR_LEN);
-		dptr[6] &= ~AX25_CBIT;
-		dptr[6] &= ~AX25_EBIT;
-		dptr[6] |= AX25_SSSID_SPARE;
-		dptr    += AX25_ADDR_LEN;
-		memcpy(dptr, &nr->source_addr, AX25_ADDR_LEN);
-		dptr[6] &= ~AX25_CBIT;
-		dptr[6] &= ~AX25_EBIT;
-		dptr[6] |= AX25_SSSID_SPARE;
-		dptr    += AX25_ADDR_LEN;
-		*dptr++  = timeout % 256;
-		*dptr++  = timeout / 256;
-		break;
-
-	case NR_CONNACK:
-		*dptr++ = nr->your_index;
-		*dptr++ = nr->your_id;
-		*dptr++ = nr->my_index;
-		*dptr++ = nr->my_id;
-		*dptr++ = frametype;
-		*dptr++ = nr->window;
-		if (nr->bpqext)
-			*dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
-		break;
-
-	case NR_DISCREQ:
-	case NR_DISCACK:
-		*dptr++ = nr->your_index;
-		*dptr++ = nr->your_id;
-		*dptr++ = 0;
-		*dptr++ = 0;
-		*dptr++ = frametype;
-		break;
-
-	case NR_INFOACK:
-		*dptr++ = nr->your_index;
-		*dptr++ = nr->your_id;
-		*dptr++ = 0;
-		*dptr++ = nr->vr;
-		*dptr++ = frametype;
-		break;
-	}
-
-	nr_transmit_buffer(sk, skb);
-}
-
-/*
- * This routine is called to send an error reply.
- */
-void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags)
-{
-	struct sk_buff *skbn;
-	unsigned char *dptr;
-	int len;
-
-	len = NR_NETWORK_LEN + NR_TRANSPORT_LEN + 1;
-
-	if ((skbn = alloc_skb(len, GFP_ATOMIC)) == NULL)
-		return;
-
-	skb_reserve(skbn, 0);
-
-	dptr = skb_put(skbn, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
-
-	skb_copy_from_linear_data_offset(skb, 7, dptr, AX25_ADDR_LEN);
-	dptr[6] &= ~AX25_CBIT;
-	dptr[6] &= ~AX25_EBIT;
-	dptr[6] |= AX25_SSSID_SPARE;
-	dptr += AX25_ADDR_LEN;
-
-	skb_copy_from_linear_data(skb, dptr, AX25_ADDR_LEN);
-	dptr[6] &= ~AX25_CBIT;
-	dptr[6] |= AX25_EBIT;
-	dptr[6] |= AX25_SSSID_SPARE;
-	dptr += AX25_ADDR_LEN;
-
-	*dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
-
-	if (mine) {
-		*dptr++ = 0;
-		*dptr++ = 0;
-		*dptr++ = skb->data[15];
-		*dptr++ = skb->data[16];
-	} else {
-		*dptr++ = skb->data[15];
-		*dptr++ = skb->data[16];
-		*dptr++ = 0;
-		*dptr++ = 0;
-	}
-
-	*dptr++ = cmdflags;
-	*dptr++ = 0;
-
-	if (!nr_route_frame(skbn, NULL))
-		kfree_skb(skbn);
-}
-
-void nr_disconnect(struct sock *sk, int reason)
-{
-	nr_stop_t1timer(sk);
-	nr_stop_t2timer(sk);
-	nr_stop_t4timer(sk);
-	nr_stop_idletimer(sk);
-
-	nr_clear_queues(sk);
-
-	nr_sk(sk)->state = NR_STATE_0;
-
-	sk->sk_state     = TCP_CLOSE;
-	sk->sk_err       = reason;
-	sk->sk_shutdown |= SEND_SHUTDOWN;
-
-	if (!sock_flag(sk, SOCK_DEAD)) {
-		sk->sk_state_change(sk);
-		sock_set_flag(sk, SOCK_DEAD);
-	}
-}
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
deleted file mode 100644
index b3a62b1f3a09..000000000000
--- a/net/netrom/nr_timer.c
+++ /dev/null
@@ -1,249 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <net/netrom.h>
-
-static void nr_heartbeat_expiry(struct timer_list *);
-static void nr_t1timer_expiry(struct timer_list *);
-static void nr_t2timer_expiry(struct timer_list *);
-static void nr_t4timer_expiry(struct timer_list *);
-static void nr_idletimer_expiry(struct timer_list *);
-
-void nr_init_timers(struct sock *sk)
-{
-	struct nr_sock *nr = nr_sk(sk);
-
-	timer_setup(&nr->t1timer, nr_t1timer_expiry, 0);
-	timer_setup(&nr->t2timer, nr_t2timer_expiry, 0);
-	timer_setup(&nr->t4timer, nr_t4timer_expiry, 0);
-	timer_setup(&nr->idletimer, nr_idletimer_expiry, 0);
-
-	/* initialized by sock_init_data */
-	sk->sk_timer.function = nr_heartbeat_expiry;
-}
-
-void nr_start_t1timer(struct sock *sk)
-{
-	struct nr_sock *nr = nr_sk(sk);
-
-	sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1);
-}
-
-void nr_start_t2timer(struct sock *sk)
-{
-	struct nr_sock *nr = nr_sk(sk);
-
-	sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2);
-}
-
-void nr_start_t4timer(struct sock *sk)
-{
-	struct nr_sock *nr = nr_sk(sk);
-
-	sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4);
-}
-
-void nr_start_idletimer(struct sock *sk)
-{
-	struct nr_sock *nr = nr_sk(sk);
-
-	if (nr->idle > 0)
-		sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle);
-}
-
-void nr_start_heartbeat(struct sock *sk)
-{
-	sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ);
-}
-
-void nr_stop_t1timer(struct sock *sk)
-{
-	sk_stop_timer(sk, &nr_sk(sk)->t1timer);
-}
-
-void nr_stop_t2timer(struct sock *sk)
-{
-	sk_stop_timer(sk, &nr_sk(sk)->t2timer);
-}
-
-void nr_stop_t4timer(struct sock *sk)
-{
-	sk_stop_timer(sk, &nr_sk(sk)->t4timer);
-}
-
-void nr_stop_idletimer(struct sock *sk)
-{
-	sk_stop_timer(sk, &nr_sk(sk)->idletimer);
-}
-
-void nr_stop_heartbeat(struct sock *sk)
-{
-	sk_stop_timer(sk, &sk->sk_timer);
-}
-
-int nr_t1timer_running(struct sock *sk)
-{
-	return timer_pending(&nr_sk(sk)->t1timer);
-}
-
-static void nr_heartbeat_expiry(struct timer_list *t)
-{
-	struct sock *sk = timer_container_of(sk, t, sk_timer);
-	struct nr_sock *nr = nr_sk(sk);
-
-	bh_lock_sock(sk);
-	switch (nr->state) {
-	case NR_STATE_0:
-		/* Magic here: If we listen() and a new link dies before it
-		   is accepted() it isn't 'dead' so doesn't get removed. */
-		if (sock_flag(sk, SOCK_DESTROY) ||
-		    (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
-			if (sk->sk_state == TCP_LISTEN)
-				sock_hold(sk);
-			bh_unlock_sock(sk);
-			nr_destroy_socket(sk);
-			goto out;
-		}
-		break;
-
-	case NR_STATE_3:
-		/*
-		 * Check for the state of the receive buffer.
-		 */
-		if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf / 2) &&
-		    (nr->condition & NR_COND_OWN_RX_BUSY)) {
-			nr->condition &= ~NR_COND_OWN_RX_BUSY;
-			nr->condition &= ~NR_COND_ACK_PENDING;
-			nr->vl         = nr->vr;
-			nr_write_internal(sk, NR_INFOACK);
-			break;
-		}
-		break;
-	}
-
-	nr_start_heartbeat(sk);
-	bh_unlock_sock(sk);
-out:
-	sock_put(sk);
-}
-
-static void nr_t2timer_expiry(struct timer_list *t)
-{
-	struct nr_sock *nr = timer_container_of(nr, t, t2timer);
-	struct sock *sk = &nr->sock;
-
-	bh_lock_sock(sk);
-	if (nr->condition & NR_COND_ACK_PENDING) {
-		nr->condition &= ~NR_COND_ACK_PENDING;
-		nr_enquiry_response(sk);
-	}
-	bh_unlock_sock(sk);
-	sock_put(sk);
-}
-
-static void nr_t4timer_expiry(struct timer_list *t)
-{
-	struct nr_sock *nr = timer_container_of(nr, t, t4timer);
-	struct sock *sk = &nr->sock;
-
-	bh_lock_sock(sk);
-	nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY;
-	bh_unlock_sock(sk);
-	sock_put(sk);
-}
-
-static void nr_idletimer_expiry(struct timer_list *t)
-{
-	struct nr_sock *nr = timer_container_of(nr, t, idletimer);
-	struct sock *sk = &nr->sock;
-
-	bh_lock_sock(sk);
-
-	nr_clear_queues(sk);
-
-	nr->n2count = 0;
-	nr_write_internal(sk, NR_DISCREQ);
-	nr->state = NR_STATE_2;
-
-	nr_start_t1timer(sk);
-	nr_stop_t2timer(sk);
-	nr_stop_t4timer(sk);
-
-	sk->sk_state     = TCP_CLOSE;
-	sk->sk_err       = 0;
-	sk->sk_shutdown |= SEND_SHUTDOWN;
-
-	if (!sock_flag(sk, SOCK_DEAD)) {
-		sk->sk_state_change(sk);
-		sock_set_flag(sk, SOCK_DEAD);
-	}
-	bh_unlock_sock(sk);
-	sock_put(sk);
-}
-
-static void nr_t1timer_expiry(struct timer_list *t)
-{
-	struct nr_sock *nr = timer_container_of(nr, t, t1timer);
-	struct sock *sk = &nr->sock;
-
-	bh_lock_sock(sk);
-	switch (nr->state) {
-	case NR_STATE_1:
-		if (nr->n2count == nr->n2) {
-			nr_disconnect(sk, ETIMEDOUT);
-			goto out;
-		} else {
-			nr->n2count++;
-			nr_write_internal(sk, NR_CONNREQ);
-		}
-		break;
-
-	case NR_STATE_2:
-		if (nr->n2count == nr->n2) {
-			nr_disconnect(sk, ETIMEDOUT);
-			goto out;
-		} else {
-			nr->n2count++;
-			nr_write_internal(sk, NR_DISCREQ);
-		}
-		break;
-
-	case NR_STATE_3:
-		if (nr->n2count == nr->n2) {
-			nr_disconnect(sk, ETIMEDOUT);
-			goto out;
-		} else {
-			nr->n2count++;
-			nr_requeue_frames(sk);
-		}
-		break;
-	}
-
-	nr_start_t1timer(sk);
-out:
-	bh_unlock_sock(sk);
-	sock_put(sk);
-}
diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
deleted file mode 100644
index 7dc0fa628f2e..000000000000
--- a/net/netrom/sysctl_net_netrom.c
+++ /dev/null
@@ -1,156 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com)
- */
-#include <linux/mm.h>
-#include <linux/sysctl.h>
-#include <linux/init.h>
-#include <net/ax25.h>
-#include <net/netrom.h>
-
-/*
- *	Values taken from NET/ROM documentation.
- */
-static int min_quality[] = {0}, max_quality[] = {255};
-static int min_obs[]     = {0}, max_obs[]     = {255};
-static int min_ttl[]     = {0}, max_ttl[]     = {255};
-static int min_t1[]      = {5 * HZ};
-static int max_t1[]      = {600 * HZ};
-static int min_n2[]      = {2}, max_n2[]      = {127};
-static int min_t2[]      = {1 * HZ};
-static int max_t2[]      = {60 * HZ};
-static int min_t4[]      = {1 * HZ};
-static int max_t4[]      = {1000 * HZ};
-static int min_window[]  = {1}, max_window[]  = {127};
-static int min_idle[]    = {0 * HZ};
-static int max_idle[]    = {65535 * HZ};
-static int min_route[]   = {0}, max_route[]   = {1};
-static int min_fails[]   = {1}, max_fails[]   = {10};
-static int min_reset[]   = {0}, max_reset[]   = {1};
-
-static struct ctl_table_header *nr_table_header;
-
-static struct ctl_table nr_table[] = {
-	{
-		.procname	= "default_path_quality",
-		.data		= &sysctl_netrom_default_path_quality,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_quality,
-		.extra2		= &max_quality
-	},
-	{
-		.procname	= "obsolescence_count_initialiser",
-		.data		= &sysctl_netrom_obsolescence_count_initialiser,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_obs,
-		.extra2		= &max_obs
-	},
-	{
-		.procname	= "network_ttl_initialiser",
-		.data		= &sysctl_netrom_network_ttl_initialiser,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_ttl,
-		.extra2		= &max_ttl
-	},
-	{
-		.procname	= "transport_timeout",
-		.data		= &sysctl_netrom_transport_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_t1,
-		.extra2		= &max_t1
-	},
-	{
-		.procname	= "transport_maximum_tries",
-		.data		= &sysctl_netrom_transport_maximum_tries,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_n2,
-		.extra2		= &max_n2
-	},
-	{
-		.procname	= "transport_acknowledge_delay",
-		.data		= &sysctl_netrom_transport_acknowledge_delay,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_t2,
-		.extra2		= &max_t2
-	},
-	{
-		.procname	= "transport_busy_delay",
-		.data		= &sysctl_netrom_transport_busy_delay,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_t4,
-		.extra2		= &max_t4
-	},
-	{
-		.procname	= "transport_requested_window_size",
-		.data		= &sysctl_netrom_transport_requested_window_size,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_window,
-		.extra2		= &max_window
-	},
-	{
-		.procname	= "transport_no_activity_timeout",
-		.data		= &sysctl_netrom_transport_no_activity_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_idle,
-		.extra2		= &max_idle
-	},
-	{
-		.procname	= "routing_control",
-		.data		= &sysctl_netrom_routing_control,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_route,
-		.extra2		= &max_route
-	},
-	{
-		.procname	= "link_fails_count",
-		.data		= &sysctl_netrom_link_fails_count,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_fails,
-		.extra2		= &max_fails
-	},
-	{
-		.procname	= "reset",
-		.data		= &sysctl_netrom_reset_circuit,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_reset,
-		.extra2		= &max_reset
-	},
-};
-
-int __init nr_register_sysctl(void)
-{
-	nr_table_header = register_net_sysctl(&init_net, "net/netrom", nr_table);
-	if (!nr_table_header)
-		return -ENOMEM;
-	return 0;
-}
-
-void nr_unregister_sysctl(void)
-{
-	unregister_net_sysctl_table(nr_table_header);
-}
diff --git a/net/rose/Makefile b/net/rose/Makefile
deleted file mode 100644
index 3e6638f5ba57..000000000000
--- a/net/rose/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for the Linux Rose (X.25 PLP) layer.
-#
-
-obj-$(CONFIG_ROSE) += rose.o
-
-rose-y	  := af_rose.o rose_dev.o rose_in.o rose_link.o rose_loopback.o \
-	     rose_out.o rose_route.o rose_subr.o rose_timer.o
-rose-$(CONFIG_SYSCTL) += sysctl_net_rose.o
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
deleted file mode 100644
index d5032840ee48..000000000000
--- a/net/rose/af_rose.c
+++ /dev/null
@@ -1,1687 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
- * Copyright (C) Terry Dawson VK2KTJ (terry@animats.net)
- * Copyright (C) Tomi Manninen OH2BNS (oh2bns@sral.fi)
- */
-
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/slab.h>
-#include <linux/kernel.h>
-#include <linux/sched/signal.h>
-#include <linux/spinlock.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/stat.h>
-#include <net/net_namespace.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/termios.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-#include <net/rose.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <net/tcp_states.h>
-#include <net/ip.h>
-#include <net/arp.h>
-
-static int rose_ndevs = 10;
-
-int sysctl_rose_restart_request_timeout = ROSE_DEFAULT_T0;
-int sysctl_rose_call_request_timeout    = ROSE_DEFAULT_T1;
-int sysctl_rose_reset_request_timeout   = ROSE_DEFAULT_T2;
-int sysctl_rose_clear_request_timeout   = ROSE_DEFAULT_T3;
-int sysctl_rose_no_activity_timeout     = ROSE_DEFAULT_IDLE;
-int sysctl_rose_ack_hold_back_timeout   = ROSE_DEFAULT_HB;
-int sysctl_rose_routing_control         = ROSE_DEFAULT_ROUTING;
-int sysctl_rose_link_fail_timeout       = ROSE_DEFAULT_FAIL_TIMEOUT;
-int sysctl_rose_maximum_vcs             = ROSE_DEFAULT_MAXVC;
-int sysctl_rose_window_size             = ROSE_DEFAULT_WINDOW_SIZE;
-
-static HLIST_HEAD(rose_list);
-static DEFINE_SPINLOCK(rose_list_lock);
-
-static const struct proto_ops rose_proto_ops;
-
-ax25_address rose_callsign;
-
-/*
- * ROSE network devices are virtual network devices encapsulating ROSE
- * frames into AX.25 which will be sent through an AX.25 device, so form a
- * special "super class" of normal net devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key rose_netdev_xmit_lock_key;
-static struct lock_class_key rose_netdev_addr_lock_key;
-
-static void rose_set_lockdep_one(struct net_device *dev,
-				 struct netdev_queue *txq,
-				 void *_unused)
-{
-	lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key);
-}
-
-static void rose_set_lockdep_key(struct net_device *dev)
-{
-	lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key);
-	netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL);
-}
-
-/*
- *	Convert a ROSE address into text.
- */
-char *rose2asc(char *buf, const rose_address *addr)
-{
-	if (addr->rose_addr[0] == 0x00 && addr->rose_addr[1] == 0x00 &&
-	    addr->rose_addr[2] == 0x00 && addr->rose_addr[3] == 0x00 &&
-	    addr->rose_addr[4] == 0x00) {
-		strcpy(buf, "*");
-	} else {
-		sprintf(buf, "%02X%02X%02X%02X%02X", addr->rose_addr[0] & 0xFF,
-						addr->rose_addr[1] & 0xFF,
-						addr->rose_addr[2] & 0xFF,
-						addr->rose_addr[3] & 0xFF,
-						addr->rose_addr[4] & 0xFF);
-	}
-
-	return buf;
-}
-
-/*
- *	Compare two ROSE addresses, 0 == equal.
- */
-int rosecmp(const rose_address *addr1, const rose_address *addr2)
-{
-	int i;
-
-	for (i = 0; i < 5; i++)
-		if (addr1->rose_addr[i] != addr2->rose_addr[i])
-			return 1;
-
-	return 0;
-}
-
-/*
- *	Compare two ROSE addresses for only mask digits, 0 == equal.
- */
-int rosecmpm(const rose_address *addr1, const rose_address *addr2,
-	     unsigned short mask)
-{
-	unsigned int i, j;
-
-	if (mask > 10)
-		return 1;
-
-	for (i = 0; i < mask; i++) {
-		j = i / 2;
-
-		if ((i % 2) != 0) {
-			if ((addr1->rose_addr[j] & 0x0F) != (addr2->rose_addr[j] & 0x0F))
-				return 1;
-		} else {
-			if ((addr1->rose_addr[j] & 0xF0) != (addr2->rose_addr[j] & 0xF0))
-				return 1;
-		}
-	}
-
-	return 0;
-}
-
-/*
- *	Socket removal during an interrupt is now safe.
- */
-static void rose_remove_socket(struct sock *sk)
-{
-	spin_lock_bh(&rose_list_lock);
-	sk_del_node_init(sk);
-	spin_unlock_bh(&rose_list_lock);
-}
-
-/*
- *	Kill all bound sockets on a broken link layer connection to a
- *	particular neighbour.
- */
-void rose_kill_by_neigh(struct rose_neigh *neigh)
-{
-	struct sock *s;
-
-	spin_lock_bh(&rose_list_lock);
-	sk_for_each(s, &rose_list) {
-		struct rose_sock *rose = rose_sk(s);
-
-		if (rose->neighbour == neigh) {
-			rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
-			rose_neigh_put(rose->neighbour);
-			rose->neighbour = NULL;
-		}
-	}
-	spin_unlock_bh(&rose_list_lock);
-}
-
-/*
- *	Kill all bound sockets on a dropped device.
- */
-static void rose_kill_by_device(struct net_device *dev)
-{
-	struct sock *sk, *array[16];
-	struct rose_sock *rose;
-	bool rescan;
-	int i, cnt;
-
-start:
-	rescan = false;
-	cnt = 0;
-	spin_lock_bh(&rose_list_lock);
-	sk_for_each(sk, &rose_list) {
-		rose = rose_sk(sk);
-		if (rose->device == dev) {
-			if (cnt == ARRAY_SIZE(array)) {
-				rescan = true;
-				break;
-			}
-			sock_hold(sk);
-			array[cnt++] = sk;
-		}
-	}
-	spin_unlock_bh(&rose_list_lock);
-
-	for (i = 0; i < cnt; i++) {
-		sk = array[i];
-		rose = rose_sk(sk);
-		lock_sock(sk);
-		spin_lock_bh(&rose_list_lock);
-		if (rose->device == dev) {
-			rose_disconnect(sk, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
-			if (rose->neighbour)
-				rose_neigh_put(rose->neighbour);
-			netdev_put(rose->device, &rose->dev_tracker);
-			rose->device = NULL;
-		}
-		spin_unlock_bh(&rose_list_lock);
-		release_sock(sk);
-		sock_put(sk);
-		cond_resched();
-	}
-	if (rescan)
-		goto start;
-}
-
-/*
- *	Handle device status changes.
- */
-static int rose_device_event(struct notifier_block *this,
-			     unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
-
-	if (event != NETDEV_DOWN)
-		return NOTIFY_DONE;
-
-	switch (dev->type) {
-	case ARPHRD_ROSE:
-		rose_kill_by_device(dev);
-		break;
-	case ARPHRD_AX25:
-		rose_link_device_down(dev);
-		rose_rt_device_down(dev);
-		break;
-	}
-
-	return NOTIFY_DONE;
-}
-
-/*
- *	Add a socket to the bound sockets list.
- */
-static void rose_insert_socket(struct sock *sk)
-{
-
-	spin_lock_bh(&rose_list_lock);
-	sk_add_node(sk, &rose_list);
-	spin_unlock_bh(&rose_list_lock);
-}
-
-/*
- *	Find a socket that wants to accept the Call Request we just
- *	received.
- */
-static struct sock *rose_find_listener(rose_address *addr, ax25_address *call)
-{
-	struct sock *s;
-
-	spin_lock_bh(&rose_list_lock);
-	sk_for_each(s, &rose_list) {
-		struct rose_sock *rose = rose_sk(s);
-
-		if (!rosecmp(&rose->source_addr, addr) &&
-		    !ax25cmp(&rose->source_call, call) &&
-		    !rose->source_ndigis && s->sk_state == TCP_LISTEN)
-			goto found;
-	}
-
-	sk_for_each(s, &rose_list) {
-		struct rose_sock *rose = rose_sk(s);
-
-		if (!rosecmp(&rose->source_addr, addr) &&
-		    !ax25cmp(&rose->source_call, &null_ax25_address) &&
-		    s->sk_state == TCP_LISTEN)
-			goto found;
-	}
-	s = NULL;
-found:
-	spin_unlock_bh(&rose_list_lock);
-	return s;
-}
-
-/*
- *	Find a connected ROSE socket given my LCI and device.
- */
-struct sock *rose_find_socket(unsigned int lci, struct rose_neigh *neigh)
-{
-	struct sock *s;
-
-	spin_lock_bh(&rose_list_lock);
-	sk_for_each(s, &rose_list) {
-		struct rose_sock *rose = rose_sk(s);
-
-		if (rose->lci == lci && rose->neighbour == neigh)
-			goto found;
-	}
-	s = NULL;
-found:
-	spin_unlock_bh(&rose_list_lock);
-	return s;
-}
-
-/*
- *	Find a unique LCI for a given device.
- */
-unsigned int rose_new_lci(struct rose_neigh *neigh)
-{
-	int lci;
-
-	if (neigh->dce_mode) {
-		for (lci = 1; lci <= sysctl_rose_maximum_vcs; lci++)
-			if (rose_find_socket(lci, neigh) == NULL && rose_route_free_lci(lci, neigh) == NULL)
-				return lci;
-	} else {
-		for (lci = sysctl_rose_maximum_vcs; lci > 0; lci--)
-			if (rose_find_socket(lci, neigh) == NULL && rose_route_free_lci(lci, neigh) == NULL)
-				return lci;
-	}
-
-	return 0;
-}
-
-/*
- *	Deferred destroy.
- */
-void rose_destroy_socket(struct sock *);
-
-/*
- *	Handler for deferred kills.
- */
-static void rose_destroy_timer(struct timer_list *t)
-{
-	struct sock *sk = timer_container_of(sk, t, sk_timer);
-
-	rose_destroy_socket(sk);
-}
-
-/*
- *	This is called from user mode and the timers. Thus it protects itself
- *	against interrupt users but doesn't worry about being called during
- *	work.  Once it is removed from the queue no interrupt or bottom half
- *	will touch it and we are (fairly 8-) ) safe.
- */
-void rose_destroy_socket(struct sock *sk)
-{
-	struct sk_buff *skb;
-
-	rose_remove_socket(sk);
-	rose_stop_heartbeat(sk);
-	rose_stop_idletimer(sk);
-	rose_stop_timer(sk);
-
-	rose_clear_queues(sk);		/* Flush the queues */
-
-	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
-		if (skb->sk != sk) {	/* A pending connection */
-			/* Queue the unaccepted socket for death */
-			sock_set_flag(skb->sk, SOCK_DEAD);
-			rose_start_heartbeat(skb->sk);
-			rose_sk(skb->sk)->state = ROSE_STATE_0;
-		}
-
-		kfree_skb(skb);
-	}
-
-	if (sk_has_allocations(sk)) {
-		/* Defer: outstanding buffers */
-		timer_setup(&sk->sk_timer, rose_destroy_timer, 0);
-		sk->sk_timer.expires  = jiffies + 10 * HZ;
-		add_timer(&sk->sk_timer);
-	} else
-		sock_put(sk);
-}
-
-/*
- *	Handling for system calls applied via the various interfaces to a
- *	ROSE socket object.
- */
-
-static int rose_setsockopt(struct socket *sock, int level, int optname,
-		sockptr_t optval, unsigned int optlen)
-{
-	struct sock *sk = sock->sk;
-	struct rose_sock *rose = rose_sk(sk);
-	unsigned int opt;
-
-	if (level != SOL_ROSE)
-		return -ENOPROTOOPT;
-
-	if (optlen < sizeof(unsigned int))
-		return -EINVAL;
-
-	if (copy_from_sockptr(&opt, optval, sizeof(unsigned int)))
-		return -EFAULT;
-
-	switch (optname) {
-	case ROSE_DEFER:
-		rose->defer = opt ? 1 : 0;
-		return 0;
-
-	case ROSE_T1:
-		if (opt < 1 || opt > UINT_MAX / HZ)
-			return -EINVAL;
-		rose->t1 = opt * HZ;
-		return 0;
-
-	case ROSE_T2:
-		if (opt < 1 || opt > UINT_MAX / HZ)
-			return -EINVAL;
-		rose->t2 = opt * HZ;
-		return 0;
-
-	case ROSE_T3:
-		if (opt < 1 || opt > UINT_MAX / HZ)
-			return -EINVAL;
-		rose->t3 = opt * HZ;
-		return 0;
-
-	case ROSE_HOLDBACK:
-		if (opt < 1 || opt > UINT_MAX / HZ)
-			return -EINVAL;
-		rose->hb = opt * HZ;
-		return 0;
-
-	case ROSE_IDLE:
-		if (opt > UINT_MAX / (60 * HZ))
-			return -EINVAL;
-		rose->idle = opt * 60 * HZ;
-		return 0;
-
-	case ROSE_QBITINCL:
-		rose->qbitincl = opt ? 1 : 0;
-		return 0;
-
-	default:
-		return -ENOPROTOOPT;
-	}
-}
-
-static int rose_getsockopt(struct socket *sock, int level, int optname,
-	char __user *optval, int __user *optlen)
-{
-	struct sock *sk = sock->sk;
-	struct rose_sock *rose = rose_sk(sk);
-	int val = 0;
-	int len;
-
-	if (level != SOL_ROSE)
-		return -ENOPROTOOPT;
-
-	if (get_user(len, optlen))
-		return -EFAULT;
-
-	if (len < 0)
-		return -EINVAL;
-
-	switch (optname) {
-	case ROSE_DEFER:
-		val = rose->defer;
-		break;
-
-	case ROSE_T1:
-		val = rose->t1 / HZ;
-		break;
-
-	case ROSE_T2:
-		val = rose->t2 / HZ;
-		break;
-
-	case ROSE_T3:
-		val = rose->t3 / HZ;
-		break;
-
-	case ROSE_HOLDBACK:
-		val = rose->hb / HZ;
-		break;
-
-	case ROSE_IDLE:
-		val = rose->idle / (60 * HZ);
-		break;
-
-	case ROSE_QBITINCL:
-		val = rose->qbitincl;
-		break;
-
-	default:
-		return -ENOPROTOOPT;
-	}
-
-	len = min_t(unsigned int, len, sizeof(int));
-
-	if (put_user(len, optlen))
-		return -EFAULT;
-
-	return copy_to_user(optval, &val, len) ? -EFAULT : 0;
-}
-
-static int rose_listen(struct socket *sock, int backlog)
-{
-	struct sock *sk = sock->sk;
-
-	lock_sock(sk);
-	if (sock->state != SS_UNCONNECTED) {
-		release_sock(sk);
-		return -EINVAL;
-	}
-
-	if (sk->sk_state != TCP_LISTEN) {
-		struct rose_sock *rose = rose_sk(sk);
-
-		rose->dest_ndigis = 0;
-		memset(&rose->dest_addr, 0, ROSE_ADDR_LEN);
-		memset(&rose->dest_call, 0, AX25_ADDR_LEN);
-		memset(rose->dest_digis, 0, AX25_ADDR_LEN * ROSE_MAX_DIGIS);
-		sk->sk_max_ack_backlog = backlog;
-		sk->sk_state           = TCP_LISTEN;
-		release_sock(sk);
-		return 0;
-	}
-	release_sock(sk);
-
-	return -EOPNOTSUPP;
-}
-
-static struct proto rose_proto = {
-	.name	  = "ROSE",
-	.owner	  = THIS_MODULE,
-	.obj_size = sizeof(struct rose_sock),
-};
-
-static int rose_create(struct net *net, struct socket *sock, int protocol,
-		       int kern)
-{
-	struct sock *sk;
-	struct rose_sock *rose;
-
-	if (!net_eq(net, &init_net))
-		return -EAFNOSUPPORT;
-
-	if (sock->type != SOCK_SEQPACKET || protocol != 0)
-		return -ESOCKTNOSUPPORT;
-
-	sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, kern);
-	if (sk == NULL)
-		return -ENOMEM;
-
-	rose = rose_sk(sk);
-
-	sock_init_data(sock, sk);
-
-	skb_queue_head_init(&rose->ack_queue);
-#ifdef M_BIT
-	skb_queue_head_init(&rose->frag_queue);
-	rose->fraglen    = 0;
-#endif
-
-	sock->ops    = &rose_proto_ops;
-	sk->sk_protocol = protocol;
-
-	timer_setup(&rose->timer, NULL, 0);
-	timer_setup(&rose->idletimer, NULL, 0);
-
-	rose->t1   = msecs_to_jiffies(sysctl_rose_call_request_timeout);
-	rose->t2   = msecs_to_jiffies(sysctl_rose_reset_request_timeout);
-	rose->t3   = msecs_to_jiffies(sysctl_rose_clear_request_timeout);
-	rose->hb   = msecs_to_jiffies(sysctl_rose_ack_hold_back_timeout);
-	rose->idle = msecs_to_jiffies(sysctl_rose_no_activity_timeout);
-
-	rose->state = ROSE_STATE_0;
-
-	return 0;
-}
-
-static struct sock *rose_make_new(struct sock *osk)
-{
-	struct sock *sk;
-	struct rose_sock *rose, *orose;
-
-	if (osk->sk_type != SOCK_SEQPACKET)
-		return NULL;
-
-	sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto, 0);
-	if (sk == NULL)
-		return NULL;
-
-	rose = rose_sk(sk);
-
-	sock_init_data(NULL, sk);
-
-	skb_queue_head_init(&rose->ack_queue);
-#ifdef M_BIT
-	skb_queue_head_init(&rose->frag_queue);
-	rose->fraglen  = 0;
-#endif
-
-	sk->sk_type     = osk->sk_type;
-	sk->sk_priority = READ_ONCE(osk->sk_priority);
-	sk->sk_protocol = osk->sk_protocol;
-	sk->sk_rcvbuf   = osk->sk_rcvbuf;
-	sk->sk_sndbuf   = osk->sk_sndbuf;
-	sk->sk_state    = TCP_ESTABLISHED;
-	sock_copy_flags(sk, osk);
-
-	timer_setup(&rose->timer, NULL, 0);
-	timer_setup(&rose->idletimer, NULL, 0);
-
-	orose		= rose_sk(osk);
-	rose->t1	= orose->t1;
-	rose->t2	= orose->t2;
-	rose->t3	= orose->t3;
-	rose->hb	= orose->hb;
-	rose->idle	= orose->idle;
-	rose->defer	= orose->defer;
-	rose->device	= orose->device;
-	if (rose->device)
-		netdev_hold(rose->device, &rose->dev_tracker, GFP_ATOMIC);
-	rose->qbitincl	= orose->qbitincl;
-
-	return sk;
-}
-
-static int rose_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-	struct rose_sock *rose;
-
-	if (sk == NULL) return 0;
-
-	sock_hold(sk);
-	sock_orphan(sk);
-	lock_sock(sk);
-	rose = rose_sk(sk);
-
-	switch (rose->state) {
-	case ROSE_STATE_0:
-		release_sock(sk);
-		rose_disconnect(sk, 0, -1, -1);
-		lock_sock(sk);
-		rose_destroy_socket(sk);
-		break;
-
-	case ROSE_STATE_2:
-		rose_neigh_put(rose->neighbour);
-		release_sock(sk);
-		rose_disconnect(sk, 0, -1, -1);
-		lock_sock(sk);
-		rose_destroy_socket(sk);
-		break;
-
-	case ROSE_STATE_1:
-	case ROSE_STATE_3:
-	case ROSE_STATE_4:
-	case ROSE_STATE_5:
-		rose_clear_queues(sk);
-		rose_stop_idletimer(sk);
-		rose_write_internal(sk, ROSE_CLEAR_REQUEST);
-		rose_start_t3timer(sk);
-		rose->state  = ROSE_STATE_2;
-		sk->sk_state    = TCP_CLOSE;
-		sk->sk_shutdown |= SEND_SHUTDOWN;
-		sk->sk_state_change(sk);
-		sock_set_flag(sk, SOCK_DEAD);
-		sock_set_flag(sk, SOCK_DESTROY);
-		break;
-
-	default:
-		break;
-	}
-
-	spin_lock_bh(&rose_list_lock);
-	netdev_put(rose->device, &rose->dev_tracker);
-	rose->device = NULL;
-	spin_unlock_bh(&rose_list_lock);
-	sock->sk = NULL;
-	release_sock(sk);
-	sock_put(sk);
-
-	return 0;
-}
-
-static int rose_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len)
-{
-	struct sock *sk = sock->sk;
-	struct rose_sock *rose = rose_sk(sk);
-	struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr;
-	struct net_device *dev;
-	ax25_address *source;
-	ax25_uid_assoc *user;
-	int err = -EINVAL;
-	int n;
-
-	if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose))
-		return -EINVAL;
-
-	if (addr->srose_family != AF_ROSE)
-		return -EINVAL;
-
-	if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1)
-		return -EINVAL;
-
-	if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS)
-		return -EINVAL;
-
-	lock_sock(sk);
-
-	if (!sock_flag(sk, SOCK_ZAPPED))
-		goto out_release;
-
-	err = -EADDRNOTAVAIL;
-	dev = rose_dev_get(&addr->srose_addr);
-	if (!dev)
-		goto out_release;
-
-	source = &addr->srose_call;
-
-	user = ax25_findbyuid(current_euid());
-	if (user) {
-		rose->source_call = user->call;
-		ax25_uid_put(user);
-	} else {
-		if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) {
-			dev_put(dev);
-			err = -EACCES;
-			goto out_release;
-		}
-		rose->source_call   = *source;
-	}
-
-	rose->source_addr   = addr->srose_addr;
-	rose->device        = dev;
-	netdev_tracker_alloc(rose->device, &rose->dev_tracker, GFP_KERNEL);
-	rose->source_ndigis = addr->srose_ndigis;
-
-	if (addr_len == sizeof(struct full_sockaddr_rose)) {
-		struct full_sockaddr_rose *full_addr = (struct full_sockaddr_rose *)uaddr;
-		for (n = 0 ; n < addr->srose_ndigis ; n++)
-			rose->source_digis[n] = full_addr->srose_digis[n];
-	} else {
-		if (rose->source_ndigis == 1) {
-			rose->source_digis[0] = addr->srose_digi;
-		}
-	}
-
-	rose_insert_socket(sk);
-
-	sock_reset_flag(sk, SOCK_ZAPPED);
-	err = 0;
-out_release:
-	release_sock(sk);
-	return err;
-}
-
-static int rose_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len,
-			int flags)
-{
-	struct sock *sk = sock->sk;
-	struct rose_sock *rose = rose_sk(sk);
-	struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr;
-	unsigned char cause, diagnostic;
-	ax25_uid_assoc *user;
-	int n, err = 0;
-
-	if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose))
-		return -EINVAL;
-
-	if (addr->srose_family != AF_ROSE)
-		return -EINVAL;
-
-	if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1)
-		return -EINVAL;
-
-	if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS)
-		return -EINVAL;
-
-	/* Source + Destination digis should not exceed ROSE_MAX_DIGIS */
-	if ((rose->source_ndigis + addr->srose_ndigis) > ROSE_MAX_DIGIS)
-		return -EINVAL;
-
-	lock_sock(sk);
-
-	if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
-		/* Connect completed during a ERESTARTSYS event */
-		sock->state = SS_CONNECTED;
-		goto out_release;
-	}
-
-	if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
-		sock->state = SS_UNCONNECTED;
-		err = -ECONNREFUSED;
-		goto out_release;
-	}
-
-	if (sk->sk_state == TCP_ESTABLISHED) {
-		/* No reconnect on a seqpacket socket */
-		err = -EISCONN;
-		goto out_release;
-	}
-
-	if (sk->sk_state == TCP_SYN_SENT) {
-		err = -EALREADY;
-		goto out_release;
-	}
-
-	sk->sk_state   = TCP_CLOSE;
-	sock->state = SS_UNCONNECTED;
-
-	rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause,
-					 &diagnostic, 0);
-	if (!rose->neighbour) {
-		err = -ENETUNREACH;
-		goto out_release;
-	}
-
-	rose->lci = rose_new_lci(rose->neighbour);
-	if (!rose->lci) {
-		err = -ENETUNREACH;
-		rose_neigh_put(rose->neighbour);
-		goto out_release;
-	}
-
-	if (sock_flag(sk, SOCK_ZAPPED)) {	/* Must bind first - autobinding in this may or may not work */
-		struct net_device *dev;
-
-		sock_reset_flag(sk, SOCK_ZAPPED);
-
-		dev = rose_dev_first();
-		if (!dev) {
-			err = -ENETUNREACH;
-			rose_neigh_put(rose->neighbour);
-			goto out_release;
-		}
-
-		user = ax25_findbyuid(current_euid());
-		if (!user) {
-			err = -EINVAL;
-			rose_neigh_put(rose->neighbour);
-			dev_put(dev);
-			goto out_release;
-		}
-
-		memcpy(&rose->source_addr, dev->dev_addr, ROSE_ADDR_LEN);
-		rose->source_call = user->call;
-		rose->device      = dev;
-		netdev_tracker_alloc(rose->device, &rose->dev_tracker,
-				     GFP_KERNEL);
-		ax25_uid_put(user);
-
-		rose_insert_socket(sk);		/* Finish the bind */
-	}
-	rose->dest_addr   = addr->srose_addr;
-	rose->dest_call   = addr->srose_call;
-	rose->rand        = ((long)rose & 0xFFFF) + rose->lci;
-	rose->dest_ndigis = addr->srose_ndigis;
-
-	if (addr_len == sizeof(struct full_sockaddr_rose)) {
-		struct full_sockaddr_rose *full_addr = (struct full_sockaddr_rose *)uaddr;
-		for (n = 0 ; n < addr->srose_ndigis ; n++)
-			rose->dest_digis[n] = full_addr->srose_digis[n];
-	} else {
-		if (rose->dest_ndigis == 1) {
-			rose->dest_digis[0] = addr->srose_digi;
-		}
-	}
-
-	/* Move to connecting socket, start sending Connect Requests */
-	sock->state   = SS_CONNECTING;
-	sk->sk_state     = TCP_SYN_SENT;
-
-	rose->state = ROSE_STATE_1;
-
-	rose_write_internal(sk, ROSE_CALL_REQUEST);
-	rose_start_heartbeat(sk);
-	rose_start_t1timer(sk);
-
-	/* Now the loop */
-	if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
-		err = -EINPROGRESS;
-		goto out_release;
-	}
-
-	/*
-	 * A Connect Ack with Choke or timeout or failed routing will go to
-	 * closed.
-	 */
-	if (sk->sk_state == TCP_SYN_SENT) {
-		DEFINE_WAIT(wait);
-
-		for (;;) {
-			prepare_to_wait(sk_sleep(sk), &wait,
-					TASK_INTERRUPTIBLE);
-			if (sk->sk_state != TCP_SYN_SENT)
-				break;
-			if (!signal_pending(current)) {
-				release_sock(sk);
-				schedule();
-				lock_sock(sk);
-				continue;
-			}
-			err = -ERESTARTSYS;
-			break;
-		}
-		finish_wait(sk_sleep(sk), &wait);
-
-		if (err)
-			goto out_release;
-	}
-
-	if (sk->sk_state != TCP_ESTABLISHED) {
-		sock->state = SS_UNCONNECTED;
-		err = sock_error(sk);	/* Always set at this point */
-		goto out_release;
-	}
-
-	sock->state = SS_CONNECTED;
-
-out_release:
-	release_sock(sk);
-
-	return err;
-}
-
-static int rose_accept(struct socket *sock, struct socket *newsock,
-		       struct proto_accept_arg *arg)
-{
-	struct sk_buff *skb;
-	struct sock *newsk;
-	DEFINE_WAIT(wait);
-	struct sock *sk;
-	int err = 0;
-
-	if ((sk = sock->sk) == NULL)
-		return -EINVAL;
-
-	lock_sock(sk);
-	if (sk->sk_type != SOCK_SEQPACKET) {
-		err = -EOPNOTSUPP;
-		goto out_release;
-	}
-
-	if (sk->sk_state != TCP_LISTEN) {
-		err = -EINVAL;
-		goto out_release;
-	}
-
-	/*
-	 *	The write queue this time is holding sockets ready to use
-	 *	hooked into the SABM we saved
-	 */
-	for (;;) {
-		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-
-		skb = skb_dequeue(&sk->sk_receive_queue);
-		if (skb)
-			break;
-
-		if (arg->flags & O_NONBLOCK) {
-			err = -EWOULDBLOCK;
-			break;
-		}
-		if (!signal_pending(current)) {
-			release_sock(sk);
-			schedule();
-			lock_sock(sk);
-			continue;
-		}
-		err = -ERESTARTSYS;
-		break;
-	}
-	finish_wait(sk_sleep(sk), &wait);
-	if (err)
-		goto out_release;
-
-	newsk = skb->sk;
-	sock_graft(newsk, newsock);
-
-	/* Now attach up the new socket */
-	skb->sk = NULL;
-	kfree_skb(skb);
-	sk_acceptq_removed(sk);
-
-out_release:
-	release_sock(sk);
-
-	return err;
-}
-
-static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
-	int peer)
-{
-	struct full_sockaddr_rose *srose = (struct full_sockaddr_rose *)uaddr;
-	struct sock *sk = sock->sk;
-	struct rose_sock *rose = rose_sk(sk);
-	int n;
-
-	memset(srose, 0, sizeof(*srose));
-	if (peer != 0) {
-		if (sk->sk_state != TCP_ESTABLISHED)
-			return -ENOTCONN;
-		srose->srose_family = AF_ROSE;
-		srose->srose_addr   = rose->dest_addr;
-		srose->srose_call   = rose->dest_call;
-		srose->srose_ndigis = rose->dest_ndigis;
-		for (n = 0; n < rose->dest_ndigis; n++)
-			srose->srose_digis[n] = rose->dest_digis[n];
-	} else {
-		srose->srose_family = AF_ROSE;
-		srose->srose_addr   = rose->source_addr;
-		srose->srose_call   = rose->source_call;
-		srose->srose_ndigis = rose->source_ndigis;
-		for (n = 0; n < rose->source_ndigis; n++)
-			srose->srose_digis[n] = rose->source_digis[n];
-	}
-
-	return sizeof(struct full_sockaddr_rose);
-}
-
-int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct rose_neigh *neigh, unsigned int lci)
-{
-	struct sock *sk;
-	struct sock *make;
-	struct rose_sock *make_rose;
-	struct rose_facilities_struct facilities;
-	int n;
-
-	skb->sk = NULL;		/* Initially we don't know who it's for */
-
-	/*
-	 *	skb->data points to the rose frame start
-	 */
-	memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
-
-	if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF,
-				   skb->len - ROSE_CALL_REQ_FACILITIES_OFF,
-				   &facilities)) {
-		rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76);
-		return 0;
-	}
-
-	sk = rose_find_listener(&facilities.source_addr, &facilities.source_call);
-
-	/*
-	 * We can't accept the Call Request.
-	 */
-	if (sk == NULL || sk_acceptq_is_full(sk) ||
-	    (make = rose_make_new(sk)) == NULL) {
-		rose_transmit_clear_request(neigh, lci, ROSE_NETWORK_CONGESTION, 120);
-		return 0;
-	}
-
-	skb->sk     = make;
-	make->sk_state = TCP_ESTABLISHED;
-	make_rose = rose_sk(make);
-
-	make_rose->lci           = lci;
-	make_rose->dest_addr     = facilities.dest_addr;
-	make_rose->dest_call     = facilities.dest_call;
-	make_rose->dest_ndigis   = facilities.dest_ndigis;
-	for (n = 0 ; n < facilities.dest_ndigis ; n++)
-		make_rose->dest_digis[n] = facilities.dest_digis[n];
-	make_rose->source_addr   = facilities.source_addr;
-	make_rose->source_call   = facilities.source_call;
-	make_rose->source_ndigis = facilities.source_ndigis;
-	for (n = 0 ; n < facilities.source_ndigis ; n++)
-		make_rose->source_digis[n] = facilities.source_digis[n];
-	make_rose->neighbour     = neigh;
-	make_rose->device        = dev;
-	/* Caller got a reference for us. */
-	netdev_tracker_alloc(make_rose->device, &make_rose->dev_tracker,
-			     GFP_ATOMIC);
-	make_rose->facilities    = facilities;
-
-	rose_neigh_hold(make_rose->neighbour);
-
-	if (rose_sk(sk)->defer) {
-		make_rose->state = ROSE_STATE_5;
-	} else {
-		rose_write_internal(make, ROSE_CALL_ACCEPTED);
-		make_rose->state = ROSE_STATE_3;
-		rose_start_idletimer(make);
-	}
-
-	make_rose->condition = 0x00;
-	make_rose->vs        = 0;
-	make_rose->va        = 0;
-	make_rose->vr        = 0;
-	make_rose->vl        = 0;
-	sk_acceptq_added(sk);
-
-	rose_insert_socket(make);
-
-	skb_queue_head(&sk->sk_receive_queue, skb);
-
-	rose_start_heartbeat(make);
-
-	if (!sock_flag(sk, SOCK_DEAD))
-		sk->sk_data_ready(sk);
-
-	return 1;
-}
-
-static int rose_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
-{
-	struct sock *sk = sock->sk;
-	struct rose_sock *rose = rose_sk(sk);
-	DECLARE_SOCKADDR(struct sockaddr_rose *, usrose, msg->msg_name);
-	int err;
-	struct full_sockaddr_rose srose;
-	struct sk_buff *skb;
-	unsigned char *asmptr;
-	int n, size, qbit = 0;
-
-	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT))
-		return -EINVAL;
-
-	if (sock_flag(sk, SOCK_ZAPPED))
-		return -EADDRNOTAVAIL;
-
-	if (sk->sk_shutdown & SEND_SHUTDOWN) {
-		send_sig(SIGPIPE, current, 0);
-		return -EPIPE;
-	}
-
-	if (rose->neighbour == NULL || rose->device == NULL)
-		return -ENETUNREACH;
-
-	if (usrose != NULL) {
-		if (msg->msg_namelen != sizeof(struct sockaddr_rose) && msg->msg_namelen != sizeof(struct full_sockaddr_rose))
-			return -EINVAL;
-		memset(&srose, 0, sizeof(struct full_sockaddr_rose));
-		memcpy(&srose, usrose, msg->msg_namelen);
-		if (rosecmp(&rose->dest_addr, &srose.srose_addr) != 0 ||
-		    ax25cmp(&rose->dest_call, &srose.srose_call) != 0)
-			return -EISCONN;
-		if (srose.srose_ndigis != rose->dest_ndigis)
-			return -EISCONN;
-		if (srose.srose_ndigis == rose->dest_ndigis) {
-			for (n = 0 ; n < srose.srose_ndigis ; n++)
-				if (ax25cmp(&rose->dest_digis[n],
-					    &srose.srose_digis[n]))
-					return -EISCONN;
-		}
-		if (srose.srose_family != AF_ROSE)
-			return -EINVAL;
-	} else {
-		if (sk->sk_state != TCP_ESTABLISHED)
-			return -ENOTCONN;
-
-		srose.srose_family = AF_ROSE;
-		srose.srose_addr   = rose->dest_addr;
-		srose.srose_call   = rose->dest_call;
-		srose.srose_ndigis = rose->dest_ndigis;
-		for (n = 0 ; n < rose->dest_ndigis ; n++)
-			srose.srose_digis[n] = rose->dest_digis[n];
-	}
-
-	/* Build a packet */
-	/* Sanity check the packet size */
-	if (len > 65535)
-		return -EMSGSIZE;
-
-	size = len + AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN;
-
-	if ((skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, &err)) == NULL)
-		return err;
-
-	skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN);
-
-	/*
-	 *	Put the data on the end
-	 */
-
-	skb_reset_transport_header(skb);
-	skb_put(skb, len);
-
-	err = memcpy_from_msg(skb_transport_header(skb), msg, len);
-	if (err) {
-		kfree_skb(skb);
-		return err;
-	}
-
-	/*
-	 *	If the Q BIT Include socket option is in force, the first
-	 *	byte of the user data is the logical value of the Q Bit.
-	 */
-	if (rose->qbitincl) {
-		qbit = skb->data[0];
-		skb_pull(skb, 1);
-	}
-
-	/*
-	 *	Push down the ROSE header
-	 */
-	asmptr = skb_push(skb, ROSE_MIN_LEN);
-
-	/* Build a ROSE Network header */
-	asmptr[0] = ((rose->lci >> 8) & 0x0F) | ROSE_GFI;
-	asmptr[1] = (rose->lci >> 0) & 0xFF;
-	asmptr[2] = ROSE_DATA;
-
-	if (qbit)
-		asmptr[0] |= ROSE_Q_BIT;
-
-	if (sk->sk_state != TCP_ESTABLISHED) {
-		kfree_skb(skb);
-		return -ENOTCONN;
-	}
-
-#ifdef M_BIT
-#define ROSE_PACLEN (256-ROSE_MIN_LEN)
-	if (skb->len - ROSE_MIN_LEN > ROSE_PACLEN) {
-		unsigned char header[ROSE_MIN_LEN];
-		struct sk_buff *skbn;
-		int frontlen;
-		int lg;
-
-		/* Save a copy of the Header */
-		skb_copy_from_linear_data(skb, header, ROSE_MIN_LEN);
-		skb_pull(skb, ROSE_MIN_LEN);
-
-		frontlen = skb_headroom(skb);
-
-		while (skb->len > 0) {
-			if ((skbn = sock_alloc_send_skb(sk, frontlen + ROSE_PACLEN, 0, &err)) == NULL) {
-				kfree_skb(skb);
-				return err;
-			}
-
-			skbn->sk   = sk;
-			skbn->free = 1;
-			skbn->arp  = 1;
-
-			skb_reserve(skbn, frontlen);
-
-			lg = (ROSE_PACLEN > skb->len) ? skb->len : ROSE_PACLEN;
-
-			/* Copy the user data */
-			skb_copy_from_linear_data(skb, skb_put(skbn, lg), lg);
-			skb_pull(skb, lg);
-
-			/* Duplicate the Header */
-			skb_push(skbn, ROSE_MIN_LEN);
-			skb_copy_to_linear_data(skbn, header, ROSE_MIN_LEN);
-
-			if (skb->len > 0)
-				skbn->data[2] |= M_BIT;
-
-			skb_queue_tail(&sk->sk_write_queue, skbn); /* Throw it on the queue */
-		}
-
-		skb->free = 1;
-		kfree_skb(skb);
-	} else {
-		skb_queue_tail(&sk->sk_write_queue, skb);		/* Throw it on the queue */
-	}
-#else
-	skb_queue_tail(&sk->sk_write_queue, skb);	/* Shove it onto the queue */
-#endif
-
-	rose_kick(sk);
-
-	return len;
-}
-
-
-static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
-			int flags)
-{
-	struct sock *sk = sock->sk;
-	struct rose_sock *rose = rose_sk(sk);
-	size_t copied;
-	unsigned char *asmptr;
-	struct sk_buff *skb;
-	int n, er, qbit;
-
-	/*
-	 * This works for seqpacket too. The receiver has ordered the queue for
-	 * us! We do one quick check first though
-	 */
-	if (sk->sk_state != TCP_ESTABLISHED)
-		return -ENOTCONN;
-
-	/* Now we can treat all alike */
-	skb = skb_recv_datagram(sk, flags, &er);
-	if (!skb)
-		return er;
-
-	qbit = (skb->data[0] & ROSE_Q_BIT) == ROSE_Q_BIT;
-
-	skb_pull(skb, ROSE_MIN_LEN);
-
-	if (rose->qbitincl) {
-		asmptr  = skb_push(skb, 1);
-		*asmptr = qbit;
-	}
-
-	skb_reset_transport_header(skb);
-	copied     = skb->len;
-
-	if (copied > size) {
-		copied = size;
-		msg->msg_flags |= MSG_TRUNC;
-	}
-
-	skb_copy_datagram_msg(skb, 0, msg, copied);
-
-	if (msg->msg_name) {
-		struct sockaddr_rose *srose;
-		DECLARE_SOCKADDR(struct full_sockaddr_rose *, full_srose,
-				 msg->msg_name);
-
-		memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose));
-		srose = msg->msg_name;
-		srose->srose_family = AF_ROSE;
-		srose->srose_addr   = rose->dest_addr;
-		srose->srose_call   = rose->dest_call;
-		srose->srose_ndigis = rose->dest_ndigis;
-		for (n = 0 ; n < rose->dest_ndigis ; n++)
-			full_srose->srose_digis[n] = rose->dest_digis[n];
-		msg->msg_namelen = sizeof(struct full_sockaddr_rose);
-	}
-
-	skb_free_datagram(sk, skb);
-
-	return copied;
-}
-
-
-static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
-	struct sock *sk = sock->sk;
-	struct rose_sock *rose = rose_sk(sk);
-	void __user *argp = (void __user *)arg;
-
-	switch (cmd) {
-	case TIOCOUTQ: {
-		long amount;
-
-		amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
-		if (amount < 0)
-			amount = 0;
-		return put_user(amount, (unsigned int __user *) argp);
-	}
-
-	case TIOCINQ: {
-		struct sk_buff *skb;
-		long amount = 0L;
-
-		spin_lock_irq(&sk->sk_receive_queue.lock);
-		if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
-			amount = skb->len;
-		spin_unlock_irq(&sk->sk_receive_queue.lock);
-		return put_user(amount, (unsigned int __user *) argp);
-	}
-
-	case SIOCGIFADDR:
-	case SIOCSIFADDR:
-	case SIOCGIFDSTADDR:
-	case SIOCSIFDSTADDR:
-	case SIOCGIFBRDADDR:
-	case SIOCSIFBRDADDR:
-	case SIOCGIFNETMASK:
-	case SIOCSIFNETMASK:
-	case SIOCGIFMETRIC:
-	case SIOCSIFMETRIC:
-		return -EINVAL;
-
-	case SIOCADDRT:
-	case SIOCDELRT:
-	case SIOCRSCLRRT:
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-		return rose_rt_ioctl(cmd, argp);
-
-	case SIOCRSGCAUSE: {
-		struct rose_cause_struct rose_cause;
-		rose_cause.cause      = rose->cause;
-		rose_cause.diagnostic = rose->diagnostic;
-		return copy_to_user(argp, &rose_cause, sizeof(struct rose_cause_struct)) ? -EFAULT : 0;
-	}
-
-	case SIOCRSSCAUSE: {
-		struct rose_cause_struct rose_cause;
-		if (copy_from_user(&rose_cause, argp, sizeof(struct rose_cause_struct)))
-			return -EFAULT;
-		rose->cause      = rose_cause.cause;
-		rose->diagnostic = rose_cause.diagnostic;
-		return 0;
-	}
-
-	case SIOCRSSL2CALL:
-		if (!capable(CAP_NET_ADMIN)) return -EPERM;
-		if (ax25cmp(&rose_callsign, &null_ax25_address) != 0)
-			ax25_listen_release(&rose_callsign, NULL);
-		if (copy_from_user(&rose_callsign, argp, sizeof(ax25_address)))
-			return -EFAULT;
-		if (ax25cmp(&rose_callsign, &null_ax25_address) != 0)
-			return ax25_listen_register(&rose_callsign, NULL);
-
-		return 0;
-
-	case SIOCRSGL2CALL:
-		return copy_to_user(argp, &rose_callsign, sizeof(ax25_address)) ? -EFAULT : 0;
-
-	case SIOCRSACCEPT:
-		if (rose->state == ROSE_STATE_5) {
-			rose_write_internal(sk, ROSE_CALL_ACCEPTED);
-			rose_start_idletimer(sk);
-			rose->condition = 0x00;
-			rose->vs        = 0;
-			rose->va        = 0;
-			rose->vr        = 0;
-			rose->vl        = 0;
-			rose->state     = ROSE_STATE_3;
-		}
-		return 0;
-
-	default:
-		return -ENOIOCTLCMD;
-	}
-
-	return 0;
-}
-
-#ifdef CONFIG_PROC_FS
-static void *rose_info_start(struct seq_file *seq, loff_t *pos)
-	__acquires(rose_list_lock)
-{
-	spin_lock_bh(&rose_list_lock);
-	return seq_hlist_start_head(&rose_list, *pos);
-}
-
-static void *rose_info_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	return seq_hlist_next(v, &rose_list, pos);
-}
-
-static void rose_info_stop(struct seq_file *seq, void *v)
-	__releases(rose_list_lock)
-{
-	spin_unlock_bh(&rose_list_lock);
-}
-
-static int rose_info_show(struct seq_file *seq, void *v)
-{
-	char buf[11], rsbuf[11];
-
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq,
-			 "dest_addr  dest_call src_addr   src_call  dev   lci neigh st vs vr va   t  t1  t2  t3  hb    idle Snd-Q Rcv-Q inode\n");
-
-	else {
-		struct sock *s = sk_entry(v);
-		struct rose_sock *rose = rose_sk(s);
-		const char *devname, *callsign;
-		const struct net_device *dev = rose->device;
-
-		if (!dev)
-			devname = "???";
-		else
-			devname = dev->name;
-
-		seq_printf(seq, "%-10s %-9s ",
-			   rose2asc(rsbuf, &rose->dest_addr),
-			   ax2asc(buf, &rose->dest_call));
-
-		if (ax25cmp(&rose->source_call, &null_ax25_address) == 0)
-			callsign = "??????-?";
-		else
-			callsign = ax2asc(buf, &rose->source_call);
-
-		seq_printf(seq,
-			   "%-10s %-9s %-5s %3.3X %05d  %d  %d  %d  %d %3lu %3lu %3lu %3lu %3lu %3lu/%03lu %5d %5d %llu\n",
-			rose2asc(rsbuf, &rose->source_addr),
-			callsign,
-			devname,
-			rose->lci & 0x0FFF,
-			(rose->neighbour) ? rose->neighbour->number : 0,
-			rose->state,
-			rose->vs,
-			rose->vr,
-			rose->va,
-			ax25_display_timer(&rose->timer) / HZ,
-			rose->t1 / HZ,
-			rose->t2 / HZ,
-			rose->t3 / HZ,
-			rose->hb / HZ,
-			ax25_display_timer(&rose->idletimer) / (60 * HZ),
-			rose->idle / (60 * HZ),
-			sk_wmem_alloc_get(s),
-			sk_rmem_alloc_get(s),
-			s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : (u64)0);
-	}
-
-	return 0;
-}
-
-static const struct seq_operations rose_info_seqops = {
-	.start = rose_info_start,
-	.next = rose_info_next,
-	.stop = rose_info_stop,
-	.show = rose_info_show,
-};
-#endif	/* CONFIG_PROC_FS */
-
-static const struct net_proto_family rose_family_ops = {
-	.family		=	PF_ROSE,
-	.create		=	rose_create,
-	.owner		=	THIS_MODULE,
-};
-
-static const struct proto_ops rose_proto_ops = {
-	.family		=	PF_ROSE,
-	.owner		=	THIS_MODULE,
-	.release	=	rose_release,
-	.bind		=	rose_bind,
-	.connect	=	rose_connect,
-	.socketpair	=	sock_no_socketpair,
-	.accept		=	rose_accept,
-	.getname	=	rose_getname,
-	.poll		=	datagram_poll,
-	.ioctl		=	rose_ioctl,
-	.gettstamp	=	sock_gettstamp,
-	.listen		=	rose_listen,
-	.shutdown	=	sock_no_shutdown,
-	.setsockopt	=	rose_setsockopt,
-	.getsockopt	=	rose_getsockopt,
-	.sendmsg	=	rose_sendmsg,
-	.recvmsg	=	rose_recvmsg,
-	.mmap		=	sock_no_mmap,
-};
-
-static struct notifier_block rose_dev_notifier = {
-	.notifier_call	=	rose_device_event,
-};
-
-static struct net_device **dev_rose;
-
-static struct ax25_protocol rose_pid = {
-	.pid	= AX25_P_ROSE,
-	.func	= rose_route_frame
-};
-
-static struct ax25_linkfail rose_linkfail_notifier = {
-	.func	= rose_link_failed
-};
-
-static int __init rose_proto_init(void)
-{
-	int i;
-	int rc;
-
-	if (rose_ndevs > 0x7FFFFFFF/sizeof(struct net_device *)) {
-		printk(KERN_ERR "ROSE: rose_proto_init - rose_ndevs parameter too large\n");
-		rc = -EINVAL;
-		goto out;
-	}
-
-	rc = proto_register(&rose_proto, 0);
-	if (rc != 0)
-		goto out;
-
-	rose_callsign = null_ax25_address;
-
-	dev_rose = kzalloc_objs(struct net_device *, rose_ndevs);
-	if (dev_rose == NULL) {
-		printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate device structure\n");
-		rc = -ENOMEM;
-		goto out_proto_unregister;
-	}
-
-	for (i = 0; i < rose_ndevs; i++) {
-		struct net_device *dev;
-		char name[IFNAMSIZ];
-
-		sprintf(name, "rose%d", i);
-		dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, rose_setup);
-		if (!dev) {
-			printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate memory\n");
-			rc = -ENOMEM;
-			goto fail;
-		}
-		rc = register_netdev(dev);
-		if (rc) {
-			printk(KERN_ERR "ROSE: netdevice registration failed\n");
-			free_netdev(dev);
-			goto fail;
-		}
-		rose_set_lockdep_key(dev);
-		dev_rose[i] = dev;
-	}
-
-	sock_register(&rose_family_ops);
-	register_netdevice_notifier(&rose_dev_notifier);
-
-	ax25_register_pid(&rose_pid);
-	ax25_linkfail_register(&rose_linkfail_notifier);
-
-#ifdef CONFIG_SYSCTL
-	rose_register_sysctl();
-#endif
-	rose_loopback_init();
-
-	rose_add_loopback_neigh();
-
-	proc_create_seq("rose", 0444, init_net.proc_net, &rose_info_seqops);
-	proc_create_seq("rose_neigh", 0444, init_net.proc_net,
-		    &rose_neigh_seqops);
-	proc_create_seq("rose_nodes", 0444, init_net.proc_net,
-		    &rose_node_seqops);
-	proc_create_seq("rose_routes", 0444, init_net.proc_net,
-		    &rose_route_seqops);
-out:
-	return rc;
-fail:
-	while (--i >= 0) {
-		unregister_netdev(dev_rose[i]);
-		free_netdev(dev_rose[i]);
-	}
-	kfree(dev_rose);
-out_proto_unregister:
-	proto_unregister(&rose_proto);
-	goto out;
-}
-module_init(rose_proto_init);
-
-module_param(rose_ndevs, int, 0);
-MODULE_PARM_DESC(rose_ndevs, "number of ROSE devices");
-
-MODULE_AUTHOR("Jonathan Naylor G4KLX <g4klx@g4klx.demon.co.uk>");
-MODULE_DESCRIPTION("The amateur radio ROSE network layer protocol");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_ROSE);
-
-static void __exit rose_exit(void)
-{
-	int i;
-
-	remove_proc_entry("rose", init_net.proc_net);
-	remove_proc_entry("rose_neigh", init_net.proc_net);
-	remove_proc_entry("rose_nodes", init_net.proc_net);
-	remove_proc_entry("rose_routes", init_net.proc_net);
-	rose_loopback_clear();
-
-	rose_rt_free();
-
-	ax25_protocol_release(AX25_P_ROSE);
-	ax25_linkfail_release(&rose_linkfail_notifier);
-
-	if (ax25cmp(&rose_callsign, &null_ax25_address) != 0)
-		ax25_listen_release(&rose_callsign, NULL);
-
-#ifdef CONFIG_SYSCTL
-	rose_unregister_sysctl();
-#endif
-	unregister_netdevice_notifier(&rose_dev_notifier);
-
-	sock_unregister(PF_ROSE);
-
-	for (i = 0; i < rose_ndevs; i++) {
-		struct net_device *dev = dev_rose[i];
-
-		if (dev) {
-			unregister_netdev(dev);
-			free_netdev(dev);
-		}
-	}
-
-	kfree(dev_rose);
-	proto_unregister(&rose_proto);
-}
-
-module_exit(rose_exit);
diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c
deleted file mode 100644
index f1a76a5820f1..000000000000
--- a/net/rose/rose_dev.c
+++ /dev/null
@@ -1,141 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- */
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/sysctl.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_arp.h>
-#include <linux/skbuff.h>
-
-#include <net/ip.h>
-#include <net/arp.h>
-
-#include <net/ax25.h>
-#include <net/rose.h>
-
-static int rose_header(struct sk_buff *skb, struct net_device *dev,
-		       unsigned short type,
-		       const void *daddr, const void *saddr, unsigned int len)
-{
-	unsigned char *buff = skb_push(skb, ROSE_MIN_LEN + 2);
-
-	if (daddr)
-		memcpy(buff + 7, daddr, dev->addr_len);
-
-	*buff++ = ROSE_GFI | ROSE_Q_BIT;
-	*buff++ = 0x00;
-	*buff++ = ROSE_DATA;
-	*buff++ = 0x7F;
-	*buff++ = AX25_P_IP;
-
-	if (daddr != NULL)
-		return 37;
-
-	return -37;
-}
-
-static int rose_set_mac_address(struct net_device *dev, void *addr)
-{
-	struct sockaddr *sa = addr;
-	int err;
-
-	if (!memcmp(dev->dev_addr, sa->sa_data, dev->addr_len))
-		return 0;
-
-	if (dev->flags & IFF_UP) {
-		err = rose_add_loopback_node((rose_address *)sa->sa_data);
-		if (err)
-			return err;
-
-		rose_del_loopback_node((const rose_address *)dev->dev_addr);
-	}
-
-	dev_addr_set(dev, sa->sa_data);
-
-	return 0;
-}
-
-static int rose_open(struct net_device *dev)
-{
-	int err;
-
-	err = rose_add_loopback_node((const rose_address *)dev->dev_addr);
-	if (err)
-		return err;
-
-	netif_start_queue(dev);
-
-	return 0;
-}
-
-static int rose_close(struct net_device *dev)
-{
-	netif_stop_queue(dev);
-	rose_del_loopback_node((const rose_address *)dev->dev_addr);
-	return 0;
-}
-
-static netdev_tx_t rose_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct net_device_stats *stats = &dev->stats;
-	unsigned int len = skb->len;
-
-	if (!netif_running(dev)) {
-		printk(KERN_ERR "ROSE: rose_xmit - called when iface is down\n");
-		return NETDEV_TX_BUSY;
-	}
-
-	if (!rose_route_frame(skb, NULL)) {
-		dev_kfree_skb(skb);
-		stats->tx_errors++;
-		return NETDEV_TX_OK;
-	}
-
-	stats->tx_packets++;
-	stats->tx_bytes += len;
-	return NETDEV_TX_OK;
-}
-
-static const struct header_ops rose_header_ops = {
-	.create	= rose_header,
-};
-
-static const struct net_device_ops rose_netdev_ops = {
-	.ndo_open		= rose_open,
-	.ndo_stop		= rose_close,
-	.ndo_start_xmit		= rose_xmit,
-	.ndo_set_mac_address    = rose_set_mac_address,
-};
-
-void rose_setup(struct net_device *dev)
-{
-	dev->mtu		= ROSE_MAX_PACKET_SIZE - 2;
-	dev->netdev_ops		= &rose_netdev_ops;
-
-	dev->header_ops		= &rose_header_ops;
-	dev->hard_header_len	= AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN;
-	dev->addr_len		= ROSE_ADDR_LEN;
-	dev->type		= ARPHRD_ROSE;
-
-	/* New-style flags. */
-	dev->flags		= IFF_NOARP;
-}
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
deleted file mode 100644
index ca4f217ef3d3..000000000000
--- a/net/rose/rose_in.c
+++ /dev/null
@@ -1,301 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- *
- * Most of this code is based on the SDL diagrams published in the 7th ARRL
- * Computer Networking Conference papers. The diagrams have mistakes in them,
- * but are mostly correct. Before you modify the code could you read the SDL
- * diagrams as the code is not obvious and probably very easy to break.
- */
-#include <linux/errno.h>
-#include <linux/filter.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <net/rose.h>
-
-/*
- * State machine for state 1, Awaiting Call Accepted State.
- * The handling of the timer(s) is in file rose_timer.c.
- * Handling of state 0 and connection release is in af_rose.c.
- */
-static int rose_state1_machine(struct sock *sk, struct sk_buff *skb, int frametype)
-{
-	struct rose_sock *rose = rose_sk(sk);
-
-	switch (frametype) {
-	case ROSE_CALL_ACCEPTED:
-		rose_stop_timer(sk);
-		rose_start_idletimer(sk);
-		rose->condition = 0x00;
-		rose->vs        = 0;
-		rose->va        = 0;
-		rose->vr        = 0;
-		rose->vl        = 0;
-		rose->state     = ROSE_STATE_3;
-		sk->sk_state	= TCP_ESTABLISHED;
-		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_state_change(sk);
-		break;
-
-	case ROSE_CLEAR_REQUEST:
-		rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION);
-		rose_disconnect(sk, ECONNREFUSED, skb->data[3], skb->data[4]);
-		rose_neigh_put(rose->neighbour);
-		break;
-
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-/*
- * State machine for state 2, Awaiting Clear Confirmation State.
- * The handling of the timer(s) is in file rose_timer.c
- * Handling of state 0 and connection release is in af_rose.c.
- */
-static int rose_state2_machine(struct sock *sk, struct sk_buff *skb, int frametype)
-{
-	struct rose_sock *rose = rose_sk(sk);
-
-	switch (frametype) {
-	case ROSE_CLEAR_REQUEST:
-		rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION);
-		rose_disconnect(sk, 0, skb->data[3], skb->data[4]);
-		rose_neigh_put(rose->neighbour);
-		break;
-
-	case ROSE_CLEAR_CONFIRMATION:
-		rose_disconnect(sk, 0, -1, -1);
-		rose_neigh_put(rose->neighbour);
-		break;
-
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-/*
- * State machine for state 3, Connected State.
- * The handling of the timer(s) is in file rose_timer.c
- * Handling of state 0 and connection release is in af_rose.c.
- */
-static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype, int ns, int nr, int q, int d, int m)
-{
-	struct rose_sock *rose = rose_sk(sk);
-	int queued = 0;
-
-	switch (frametype) {
-	case ROSE_RESET_REQUEST:
-		rose_stop_timer(sk);
-		rose_start_idletimer(sk);
-		rose_write_internal(sk, ROSE_RESET_CONFIRMATION);
-		rose->condition = 0x00;
-		rose->vs        = 0;
-		rose->vr        = 0;
-		rose->va        = 0;
-		rose->vl        = 0;
-		rose_requeue_frames(sk);
-		break;
-
-	case ROSE_CLEAR_REQUEST:
-		rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION);
-		rose_disconnect(sk, 0, skb->data[3], skb->data[4]);
-		rose_neigh_put(rose->neighbour);
-		break;
-
-	case ROSE_RR:
-	case ROSE_RNR:
-		if (!rose_validate_nr(sk, nr)) {
-			rose_write_internal(sk, ROSE_RESET_REQUEST);
-			rose->condition = 0x00;
-			rose->vs        = 0;
-			rose->vr        = 0;
-			rose->va        = 0;
-			rose->vl        = 0;
-			rose->state     = ROSE_STATE_4;
-			rose_start_t2timer(sk);
-			rose_stop_idletimer(sk);
-		} else {
-			rose_frames_acked(sk, nr);
-			if (frametype == ROSE_RNR) {
-				rose->condition |= ROSE_COND_PEER_RX_BUSY;
-			} else {
-				rose->condition &= ~ROSE_COND_PEER_RX_BUSY;
-			}
-		}
-		break;
-
-	case ROSE_DATA:	/* XXX */
-		rose->condition &= ~ROSE_COND_PEER_RX_BUSY;
-		if (!rose_validate_nr(sk, nr)) {
-			rose_write_internal(sk, ROSE_RESET_REQUEST);
-			rose->condition = 0x00;
-			rose->vs        = 0;
-			rose->vr        = 0;
-			rose->va        = 0;
-			rose->vl        = 0;
-			rose->state     = ROSE_STATE_4;
-			rose_start_t2timer(sk);
-			rose_stop_idletimer(sk);
-			break;
-		}
-		rose_frames_acked(sk, nr);
-		if (ns == rose->vr) {
-			rose_start_idletimer(sk);
-			if (!sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN) &&
-			    __sock_queue_rcv_skb(sk, skb) == 0) {
-				rose->vr = (rose->vr + 1) % ROSE_MODULUS;
-				queued = 1;
-			} else {
-				/* Should never happen ! */
-				rose_write_internal(sk, ROSE_RESET_REQUEST);
-				rose->condition = 0x00;
-				rose->vs        = 0;
-				rose->vr        = 0;
-				rose->va        = 0;
-				rose->vl        = 0;
-				rose->state     = ROSE_STATE_4;
-				rose_start_t2timer(sk);
-				rose_stop_idletimer(sk);
-				break;
-			}
-			if (atomic_read(&sk->sk_rmem_alloc) >
-			    (sk->sk_rcvbuf >> 1))
-				rose->condition |= ROSE_COND_OWN_RX_BUSY;
-		}
-		/*
-		 * If the window is full, ack the frame, else start the
-		 * acknowledge hold back timer.
-		 */
-		if (((rose->vl + sysctl_rose_window_size) % ROSE_MODULUS) == rose->vr) {
-			rose->condition &= ~ROSE_COND_ACK_PENDING;
-			rose_stop_timer(sk);
-			rose_enquiry_response(sk);
-		} else {
-			rose->condition |= ROSE_COND_ACK_PENDING;
-			rose_start_hbtimer(sk);
-		}
-		break;
-
-	default:
-		printk(KERN_WARNING "ROSE: unknown %02X in state 3\n", frametype);
-		break;
-	}
-
-	return queued;
-}
-
-/*
- * State machine for state 4, Awaiting Reset Confirmation State.
- * The handling of the timer(s) is in file rose_timer.c
- * Handling of state 0 and connection release is in af_rose.c.
- */
-static int rose_state4_machine(struct sock *sk, struct sk_buff *skb, int frametype)
-{
-	struct rose_sock *rose = rose_sk(sk);
-
-	switch (frametype) {
-	case ROSE_RESET_REQUEST:
-		rose_write_internal(sk, ROSE_RESET_CONFIRMATION);
-		fallthrough;
-	case ROSE_RESET_CONFIRMATION:
-		rose_stop_timer(sk);
-		rose_start_idletimer(sk);
-		rose->condition = 0x00;
-		rose->va        = 0;
-		rose->vr        = 0;
-		rose->vs        = 0;
-		rose->vl        = 0;
-		rose->state     = ROSE_STATE_3;
-		rose_requeue_frames(sk);
-		break;
-
-	case ROSE_CLEAR_REQUEST:
-		rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION);
-		rose_disconnect(sk, 0, skb->data[3], skb->data[4]);
-		rose_neigh_put(rose->neighbour);
-		break;
-
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-/*
- * State machine for state 5, Awaiting Call Acceptance State.
- * The handling of the timer(s) is in file rose_timer.c
- * Handling of state 0 and connection release is in af_rose.c.
- */
-static int rose_state5_machine(struct sock *sk, struct sk_buff *skb, int frametype)
-{
-	if (frametype == ROSE_CLEAR_REQUEST) {
-		rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION);
-		rose_disconnect(sk, 0, skb->data[3], skb->data[4]);
-		rose_neigh_put(rose_sk(sk)->neighbour);
-	}
-
-	return 0;
-}
-
-/* Higher level upcall for a LAPB frame */
-int rose_process_rx_frame(struct sock *sk, struct sk_buff *skb)
-{
-	struct rose_sock *rose = rose_sk(sk);
-	int queued = 0, frametype, ns, nr, q, d, m;
-
-	if (rose->state == ROSE_STATE_0)
-		return 0;
-
-	frametype = rose_decode(skb, &ns, &nr, &q, &d, &m);
-
-	/*
-	 * ROSE_CLEAR_REQUEST carries cause and diagnostic in bytes 3..4.
-	 * Reject a malformed frame that is too short to contain them.
-	 */
-	if (frametype == ROSE_CLEAR_REQUEST && skb->len < 5)
-		return 0;
-
-	switch (rose->state) {
-	case ROSE_STATE_1:
-		queued = rose_state1_machine(sk, skb, frametype);
-		break;
-	case ROSE_STATE_2:
-		queued = rose_state2_machine(sk, skb, frametype);
-		break;
-	case ROSE_STATE_3:
-		queued = rose_state3_machine(sk, skb, frametype, ns, nr, q, d, m);
-		break;
-	case ROSE_STATE_4:
-		queued = rose_state4_machine(sk, skb, frametype);
-		break;
-	case ROSE_STATE_5:
-		queued = rose_state5_machine(sk, skb, frametype);
-		break;
-	}
-
-	rose_kick(sk);
-
-	return queued;
-}
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
deleted file mode 100644
index 7746229fdc8c..000000000000
--- a/net/rose/rose_link.c
+++ /dev/null
@@ -1,289 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <net/rose.h>
-
-static void rose_ftimer_expiry(struct timer_list *);
-static void rose_t0timer_expiry(struct timer_list *);
-
-static void rose_transmit_restart_confirmation(struct rose_neigh *neigh);
-static void rose_transmit_restart_request(struct rose_neigh *neigh);
-
-void rose_start_ftimer(struct rose_neigh *neigh)
-{
-	timer_delete(&neigh->ftimer);
-
-	neigh->ftimer.function = rose_ftimer_expiry;
-	neigh->ftimer.expires  =
-		jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout);
-
-	add_timer(&neigh->ftimer);
-}
-
-static void rose_start_t0timer(struct rose_neigh *neigh)
-{
-	timer_delete(&neigh->t0timer);
-
-	neigh->t0timer.function = rose_t0timer_expiry;
-	neigh->t0timer.expires  =
-		jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout);
-
-	add_timer(&neigh->t0timer);
-}
-
-void rose_stop_ftimer(struct rose_neigh *neigh)
-{
-	timer_delete(&neigh->ftimer);
-}
-
-void rose_stop_t0timer(struct rose_neigh *neigh)
-{
-	timer_delete(&neigh->t0timer);
-}
-
-int rose_ftimer_running(struct rose_neigh *neigh)
-{
-	return timer_pending(&neigh->ftimer);
-}
-
-static int rose_t0timer_running(struct rose_neigh *neigh)
-{
-	return timer_pending(&neigh->t0timer);
-}
-
-static void rose_ftimer_expiry(struct timer_list *t)
-{
-}
-
-static void rose_t0timer_expiry(struct timer_list *t)
-{
-	struct rose_neigh *neigh = timer_container_of(neigh, t, t0timer);
-
-	rose_transmit_restart_request(neigh);
-
-	neigh->dce_mode = 0;
-
-	rose_start_t0timer(neigh);
-}
-
-/*
- *	Interface to ax25_send_frame. Changes my level 2 callsign depending
- *	on whether we have a global ROSE callsign or use the default port
- *	callsign.
- */
-static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
-{
-	const ax25_address *rose_call;
-	ax25_cb *ax25s;
-
-	if (ax25cmp(&rose_callsign, &null_ax25_address) == 0)
-		rose_call = (const ax25_address *)neigh->dev->dev_addr;
-	else
-		rose_call = &rose_callsign;
-
-	ax25s = neigh->ax25;
-	neigh->ax25 = ax25_send_frame(skb, 260, rose_call, &neigh->callsign, neigh->digipeat, neigh->dev);
-	if (ax25s)
-		ax25_cb_put(ax25s);
-
-	return neigh->ax25 != NULL;
-}
-
-/*
- *	Interface to ax25_link_up. Changes my level 2 callsign depending
- *	on whether we have a global ROSE callsign or use the default port
- *	callsign.
- */
-static int rose_link_up(struct rose_neigh *neigh)
-{
-	const ax25_address *rose_call;
-	ax25_cb *ax25s;
-
-	if (ax25cmp(&rose_callsign, &null_ax25_address) == 0)
-		rose_call = (const ax25_address *)neigh->dev->dev_addr;
-	else
-		rose_call = &rose_callsign;
-
-	ax25s = neigh->ax25;
-	neigh->ax25 = ax25_find_cb(rose_call, &neigh->callsign, neigh->digipeat, neigh->dev);
-	if (ax25s)
-		ax25_cb_put(ax25s);
-
-	return neigh->ax25 != NULL;
-}
-
-/*
- *	This handles all restart and diagnostic frames.
- */
-void rose_link_rx_restart(struct sk_buff *skb, struct rose_neigh *neigh, unsigned short frametype)
-{
-	struct sk_buff *skbn;
-
-	switch (frametype) {
-	case ROSE_RESTART_REQUEST:
-		rose_stop_t0timer(neigh);
-		neigh->restarted = 1;
-		neigh->dce_mode  = (skb->data[3] == ROSE_DTE_ORIGINATED);
-		rose_transmit_restart_confirmation(neigh);
-		break;
-
-	case ROSE_RESTART_CONFIRMATION:
-		rose_stop_t0timer(neigh);
-		neigh->restarted = 1;
-		break;
-
-	case ROSE_DIAGNOSTIC:
-		pr_warn("ROSE: received diagnostic #%d - %3ph\n", skb->data[3],
-			skb->data + 4);
-		break;
-
-	default:
-		printk(KERN_WARNING "ROSE: received unknown %02X with LCI 000\n", frametype);
-		break;
-	}
-
-	if (neigh->restarted) {
-		while ((skbn = skb_dequeue(&neigh->queue)) != NULL)
-			if (!rose_send_frame(skbn, neigh))
-				kfree_skb(skbn);
-	}
-}
-
-/*
- *	This routine is called when a Restart Request is needed
- */
-static void rose_transmit_restart_request(struct rose_neigh *neigh)
-{
-	struct sk_buff *skb;
-	unsigned char *dptr;
-	int len;
-
-	len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 3;
-
-	if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
-		return;
-
-	skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN);
-
-	dptr = skb_put(skb, ROSE_MIN_LEN + 3);
-
-	*dptr++ = AX25_P_ROSE;
-	*dptr++ = ROSE_GFI;
-	*dptr++ = 0x00;
-	*dptr++ = ROSE_RESTART_REQUEST;
-	*dptr++ = ROSE_DTE_ORIGINATED;
-	*dptr++ = 0;
-
-	if (!rose_send_frame(skb, neigh))
-		kfree_skb(skb);
-}
-
-/*
- * This routine is called when a Restart Confirmation is needed
- */
-static void rose_transmit_restart_confirmation(struct rose_neigh *neigh)
-{
-	struct sk_buff *skb;
-	unsigned char *dptr;
-	int len;
-
-	len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 1;
-
-	if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
-		return;
-
-	skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN);
-
-	dptr = skb_put(skb, ROSE_MIN_LEN + 1);
-
-	*dptr++ = AX25_P_ROSE;
-	*dptr++ = ROSE_GFI;
-	*dptr++ = 0x00;
-	*dptr++ = ROSE_RESTART_CONFIRMATION;
-
-	if (!rose_send_frame(skb, neigh))
-		kfree_skb(skb);
-}
-
-/*
- * This routine is called when a Clear Request is needed outside of the context
- * of a connected socket.
- */
-void rose_transmit_clear_request(struct rose_neigh *neigh, unsigned int lci, unsigned char cause, unsigned char diagnostic)
-{
-	struct sk_buff *skb;
-	unsigned char *dptr;
-	int len;
-
-	if (!neigh->dev)
-		return;
-
-	len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 3;
-
-	if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
-		return;
-
-	skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN);
-
-	dptr = skb_put(skb, ROSE_MIN_LEN + 3);
-
-	*dptr++ = AX25_P_ROSE;
-	*dptr++ = ((lci >> 8) & 0x0F) | ROSE_GFI;
-	*dptr++ = ((lci >> 0) & 0xFF);
-	*dptr++ = ROSE_CLEAR_REQUEST;
-	*dptr++ = cause;
-	*dptr++ = diagnostic;
-
-	if (!rose_send_frame(skb, neigh))
-		kfree_skb(skb);
-}
-
-void rose_transmit_link(struct sk_buff *skb, struct rose_neigh *neigh)
-{
-	unsigned char *dptr;
-
-	if (neigh->loopback) {
-		rose_loopback_queue(skb, neigh);
-		return;
-	}
-
-	if (!rose_link_up(neigh))
-		neigh->restarted = 0;
-
-	dptr = skb_push(skb, 1);
-	*dptr++ = AX25_P_ROSE;
-
-	if (neigh->restarted) {
-		if (!rose_send_frame(skb, neigh))
-			kfree_skb(skb);
-	} else {
-		skb_queue_tail(&neigh->queue, skb);
-
-		if (!rose_t0timer_running(neigh)) {
-			rose_transmit_restart_request(neigh);
-			neigh->dce_mode = 0;
-			rose_start_t0timer(neigh);
-		}
-	}
-}
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
deleted file mode 100644
index b538e39b3df5..000000000000
--- a/net/rose/rose_loopback.c
+++ /dev/null
@@ -1,133 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- */
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-#include <linux/timer.h>
-#include <net/ax25.h>
-#include <linux/skbuff.h>
-#include <net/rose.h>
-#include <linux/init.h>
-
-static struct sk_buff_head loopback_queue;
-#define ROSE_LOOPBACK_LIMIT 1000
-static struct timer_list loopback_timer;
-
-static void rose_set_loopback_timer(void);
-static void rose_loopback_timer(struct timer_list *unused);
-
-void rose_loopback_init(void)
-{
-	skb_queue_head_init(&loopback_queue);
-
-	timer_setup(&loopback_timer, rose_loopback_timer, 0);
-}
-
-static int rose_loopback_running(void)
-{
-	return timer_pending(&loopback_timer);
-}
-
-int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
-{
-	struct sk_buff *skbn = NULL;
-
-	if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT)
-		skbn = skb_clone(skb, GFP_ATOMIC);
-
-	if (skbn) {
-		consume_skb(skb);
-		skb_queue_tail(&loopback_queue, skbn);
-
-		if (!rose_loopback_running())
-			rose_set_loopback_timer();
-	} else {
-		kfree_skb(skb);
-	}
-
-	return 1;
-}
-
-static void rose_set_loopback_timer(void)
-{
-	mod_timer(&loopback_timer, jiffies + 10);
-}
-
-static void rose_loopback_timer(struct timer_list *unused)
-{
-	struct sk_buff *skb;
-	struct net_device *dev;
-	rose_address *dest;
-	struct sock *sk;
-	unsigned short frametype;
-	unsigned int lci_i, lci_o;
-	int count;
-
-	for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) {
-		skb = skb_dequeue(&loopback_queue);
-		if (!skb)
-			return;
-		if (skb->len < ROSE_MIN_LEN) {
-			kfree_skb(skb);
-			continue;
-		}
-		lci_i     = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
-		frametype = skb->data[2];
-		if (frametype == ROSE_CALL_REQUEST &&
-		    (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF ||
-		     skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] !=
-		     ROSE_CALL_REQ_ADDR_LEN_VAL)) {
-			kfree_skb(skb);
-			continue;
-		}
-		dest      = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF);
-		lci_o     = ROSE_DEFAULT_MAXVC + 1 - lci_i;
-
-		skb_reset_transport_header(skb);
-
-		sk = rose_find_socket(lci_o, rose_loopback_neigh);
-		if (sk) {
-			if (rose_process_rx_frame(sk, skb) == 0)
-				kfree_skb(skb);
-			continue;
-		}
-
-		if (frametype == ROSE_CALL_REQUEST) {
-			if (!rose_loopback_neigh->dev &&
-			    !rose_loopback_neigh->loopback) {
-				kfree_skb(skb);
-				continue;
-			}
-
-			dev = rose_dev_get(dest);
-			if (!dev) {
-				kfree_skb(skb);
-				continue;
-			}
-
-			if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) {
-				dev_put(dev);
-				kfree_skb(skb);
-			}
-		} else {
-			kfree_skb(skb);
-		}
-	}
-	if (!skb_queue_empty(&loopback_queue))
-		mod_timer(&loopback_timer, jiffies + 1);
-}
-
-void __exit rose_loopback_clear(void)
-{
-	struct sk_buff *skb;
-
-	timer_delete(&loopback_timer);
-
-	while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
-		skb->sk = NULL;
-		kfree_skb(skb);
-	}
-}
diff --git a/net/rose/rose_out.c b/net/rose/rose_out.c
deleted file mode 100644
index 9050e33c9604..000000000000
--- a/net/rose/rose_out.c
+++ /dev/null
@@ -1,122 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/gfp.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <net/rose.h>
-
-/*
- *	This procedure is passed a buffer descriptor for an iframe. It builds
- *	the rest of the control part of the frame and then writes it out.
- */
-static void rose_send_iframe(struct sock *sk, struct sk_buff *skb)
-{
-	struct rose_sock *rose = rose_sk(sk);
-
-	if (skb == NULL)
-		return;
-
-	skb->data[2] |= (rose->vr << 5) & 0xE0;
-	skb->data[2] |= (rose->vs << 1) & 0x0E;
-
-	rose_start_idletimer(sk);
-
-	rose_transmit_link(skb, rose->neighbour);
-}
-
-void rose_kick(struct sock *sk)
-{
-	struct rose_sock *rose = rose_sk(sk);
-	struct sk_buff *skb, *skbn;
-	unsigned short start, end;
-
-	if (rose->state != ROSE_STATE_3)
-		return;
-
-	if (rose->condition & ROSE_COND_PEER_RX_BUSY)
-		return;
-
-	if (!skb_peek(&sk->sk_write_queue))
-		return;
-
-	start = (skb_peek(&rose->ack_queue) == NULL) ? rose->va : rose->vs;
-	end   = (rose->va + sysctl_rose_window_size) % ROSE_MODULUS;
-
-	if (start == end)
-		return;
-
-	rose->vs = start;
-
-	/*
-	 * Transmit data until either we're out of data to send or
-	 * the window is full.
-	 */
-
-	skb  = skb_dequeue(&sk->sk_write_queue);
-
-	do {
-		if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
-			skb_queue_head(&sk->sk_write_queue, skb);
-			break;
-		}
-
-		skb_set_owner_w(skbn, sk);
-
-		/*
-		 * Transmit the frame copy.
-		 */
-		rose_send_iframe(sk, skbn);
-
-		rose->vs = (rose->vs + 1) % ROSE_MODULUS;
-
-		/*
-		 * Requeue the original data frame.
-		 */
-		skb_queue_tail(&rose->ack_queue, skb);
-
-	} while (rose->vs != end &&
-		 (skb = skb_dequeue(&sk->sk_write_queue)) != NULL);
-
-	rose->vl         = rose->vr;
-	rose->condition &= ~ROSE_COND_ACK_PENDING;
-
-	rose_stop_timer(sk);
-}
-
-/*
- * The following routines are taken from page 170 of the 7th ARRL Computer
- * Networking Conference paper, as is the whole state machine.
- */
-
-void rose_enquiry_response(struct sock *sk)
-{
-	struct rose_sock *rose = rose_sk(sk);
-
-	if (rose->condition & ROSE_COND_OWN_RX_BUSY)
-		rose_write_internal(sk, ROSE_RNR);
-	else
-		rose_write_internal(sk, ROSE_RR);
-
-	rose->vl         = rose->vr;
-	rose->condition &= ~ROSE_COND_ACK_PENDING;
-
-	rose_stop_timer(sk);
-}
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
deleted file mode 100644
index e31842e6b3c8..000000000000
--- a/net/rose/rose_route.c
+++ /dev/null
@@ -1,1333 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) Terry Dawson VK2KTJ (terry@animats.net)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <net/arp.h>
-#include <linux/if_arp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/uaccess.h>
-#include <linux/fcntl.h>
-#include <linux/termios.h>	/* For TIOCINQ/OUTQ */
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-#include <linux/init.h>
-#include <net/rose.h>
-#include <linux/seq_file.h>
-#include <linux/export.h>
-
-static unsigned int rose_neigh_no = 1;
-
-static struct rose_node  *rose_node_list;
-static DEFINE_SPINLOCK(rose_node_list_lock);
-static struct rose_neigh *rose_neigh_list;
-static DEFINE_SPINLOCK(rose_neigh_list_lock);
-static struct rose_route *rose_route_list;
-static DEFINE_SPINLOCK(rose_route_list_lock);
-
-struct rose_neigh *rose_loopback_neigh;
-
-/*
- *	Add a new route to a node, and in the process add the node and the
- *	neighbour if it is new.
- */
-static int __must_check rose_add_node(struct rose_route_struct *rose_route,
-	struct net_device *dev)
-{
-	struct rose_node  *rose_node, *rose_tmpn, *rose_tmpp;
-	struct rose_neigh *rose_neigh;
-	int i, res = 0;
-
-	spin_lock_bh(&rose_node_list_lock);
-	spin_lock_bh(&rose_neigh_list_lock);
-
-	rose_node = rose_node_list;
-	while (rose_node != NULL) {
-		if ((rose_node->mask == rose_route->mask) &&
-		    (rosecmpm(&rose_route->address, &rose_node->address,
-			      rose_route->mask) == 0))
-			break;
-		rose_node = rose_node->next;
-	}
-
-	if (rose_node != NULL && rose_node->loopback) {
-		res = -EINVAL;
-		goto out;
-	}
-
-	rose_neigh = rose_neigh_list;
-	while (rose_neigh != NULL) {
-		if (ax25cmp(&rose_route->neighbour,
-			    &rose_neigh->callsign) == 0 &&
-		    rose_neigh->dev == dev)
-			break;
-		rose_neigh = rose_neigh->next;
-	}
-
-	if (rose_neigh == NULL) {
-		rose_neigh = kmalloc_obj(*rose_neigh, GFP_ATOMIC);
-		if (rose_neigh == NULL) {
-			res = -ENOMEM;
-			goto out;
-		}
-
-		rose_neigh->callsign  = rose_route->neighbour;
-		rose_neigh->digipeat  = NULL;
-		rose_neigh->ax25      = NULL;
-		rose_neigh->dev       = dev;
-		rose_neigh->count     = 0;
-		rose_neigh->dce_mode  = 0;
-		rose_neigh->loopback  = 0;
-		rose_neigh->number    = rose_neigh_no++;
-		rose_neigh->restarted = 0;
-		refcount_set(&rose_neigh->use, 1);
-
-		skb_queue_head_init(&rose_neigh->queue);
-
-		timer_setup(&rose_neigh->ftimer, NULL, 0);
-		timer_setup(&rose_neigh->t0timer, NULL, 0);
-
-		if (rose_route->ndigis != 0) {
-			rose_neigh->digipeat =
-				kmalloc_obj(ax25_digi, GFP_ATOMIC);
-			if (rose_neigh->digipeat == NULL) {
-				kfree(rose_neigh);
-				res = -ENOMEM;
-				goto out;
-			}
-
-			rose_neigh->digipeat->ndigi      = rose_route->ndigis;
-			rose_neigh->digipeat->lastrepeat = -1;
-
-			for (i = 0; i < rose_route->ndigis; i++) {
-				rose_neigh->digipeat->calls[i]    =
-					rose_route->digipeaters[i];
-				rose_neigh->digipeat->repeated[i] = 0;
-			}
-		}
-
-		rose_neigh->next = rose_neigh_list;
-		rose_neigh_list  = rose_neigh;
-	}
-
-	/*
-	 * This is a new node to be inserted into the list. Find where it needs
-	 * to be inserted into the list, and insert it. We want to be sure
-	 * to order the list in descending order of mask size to ensure that
-	 * later when we are searching this list the first match will be the
-	 * best match.
-	 */
-	if (rose_node == NULL) {
-		rose_tmpn = rose_node_list;
-		rose_tmpp = NULL;
-
-		while (rose_tmpn != NULL) {
-			if (rose_tmpn->mask > rose_route->mask) {
-				rose_tmpp = rose_tmpn;
-				rose_tmpn = rose_tmpn->next;
-			} else {
-				break;
-			}
-		}
-
-		/* create new node */
-		rose_node = kmalloc_obj(*rose_node, GFP_ATOMIC);
-		if (rose_node == NULL) {
-			res = -ENOMEM;
-			goto out;
-		}
-
-		rose_node->address      = rose_route->address;
-		rose_node->mask         = rose_route->mask;
-		rose_node->count        = 1;
-		rose_node->loopback     = 0;
-		rose_node->neighbour[0] = rose_neigh;
-
-		if (rose_tmpn == NULL) {
-			if (rose_tmpp == NULL) {	/* Empty list */
-				rose_node_list  = rose_node;
-				rose_node->next = NULL;
-			} else {
-				rose_tmpp->next = rose_node;
-				rose_node->next = NULL;
-			}
-		} else {
-			if (rose_tmpp == NULL) {	/* 1st node */
-				rose_node->next = rose_node_list;
-				rose_node_list  = rose_node;
-			} else {
-				rose_tmpp->next = rose_node;
-				rose_node->next = rose_tmpn;
-			}
-		}
-		rose_neigh->count++;
-		rose_neigh_hold(rose_neigh);
-
-		goto out;
-	}
-
-	/* We have space, slot it in */
-	if (rose_node->count < 3) {
-		rose_node->neighbour[rose_node->count] = rose_neigh;
-		rose_node->count++;
-		rose_neigh->count++;
-		rose_neigh_hold(rose_neigh);
-	}
-
-out:
-	spin_unlock_bh(&rose_neigh_list_lock);
-	spin_unlock_bh(&rose_node_list_lock);
-
-	return res;
-}
-
-/*
- * Caller is holding rose_node_list_lock.
- */
-static void rose_remove_node(struct rose_node *rose_node)
-{
-	struct rose_node *s;
-
-	if ((s = rose_node_list) == rose_node) {
-		rose_node_list = rose_node->next;
-		kfree(rose_node);
-		return;
-	}
-
-	while (s != NULL && s->next != NULL) {
-		if (s->next == rose_node) {
-			s->next = rose_node->next;
-			kfree(rose_node);
-			return;
-		}
-
-		s = s->next;
-	}
-}
-
-/*
- * Caller is holding rose_neigh_list_lock.
- */
-static void rose_remove_neigh(struct rose_neigh *rose_neigh)
-{
-	struct rose_neigh *s;
-
-	timer_delete_sync(&rose_neigh->ftimer);
-	timer_delete_sync(&rose_neigh->t0timer);
-
-	skb_queue_purge(&rose_neigh->queue);
-
-	if ((s = rose_neigh_list) == rose_neigh) {
-		rose_neigh_list = rose_neigh->next;
-		return;
-	}
-
-	while (s != NULL && s->next != NULL) {
-		if (s->next == rose_neigh) {
-			s->next = rose_neigh->next;
-			return;
-		}
-
-		s = s->next;
-	}
-}
-
-/*
- * Caller is holding rose_route_list_lock.
- */
-static void rose_remove_route(struct rose_route *rose_route)
-{
-	struct rose_route *s;
-
-	if (rose_route->neigh1 != NULL)
-		rose_neigh_put(rose_route->neigh1);
-
-	if (rose_route->neigh2 != NULL)
-		rose_neigh_put(rose_route->neigh2);
-
-	if ((s = rose_route_list) == rose_route) {
-		rose_route_list = rose_route->next;
-		kfree(rose_route);
-		return;
-	}
-
-	while (s != NULL && s->next != NULL) {
-		if (s->next == rose_route) {
-			s->next = rose_route->next;
-			kfree(rose_route);
-			return;
-		}
-
-		s = s->next;
-	}
-}
-
-/*
- *	"Delete" a node. Strictly speaking remove a route to a node. The node
- *	is only deleted if no routes are left to it.
- */
-static int rose_del_node(struct rose_route_struct *rose_route,
-	struct net_device *dev)
-{
-	struct rose_node  *rose_node;
-	struct rose_neigh *rose_neigh;
-	int i, err = 0;
-
-	spin_lock_bh(&rose_node_list_lock);
-	spin_lock_bh(&rose_neigh_list_lock);
-
-	rose_node = rose_node_list;
-	while (rose_node != NULL) {
-		if ((rose_node->mask == rose_route->mask) &&
-		    (rosecmpm(&rose_route->address, &rose_node->address,
-			      rose_route->mask) == 0))
-			break;
-		rose_node = rose_node->next;
-	}
-
-	if (rose_node == NULL || rose_node->loopback) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	rose_neigh = rose_neigh_list;
-	while (rose_neigh != NULL) {
-		if (ax25cmp(&rose_route->neighbour,
-			    &rose_neigh->callsign) == 0 &&
-		    rose_neigh->dev == dev)
-			break;
-		rose_neigh = rose_neigh->next;
-	}
-
-	if (rose_neigh == NULL) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	for (i = 0; i < rose_node->count; i++) {
-		if (rose_node->neighbour[i] == rose_neigh) {
-			rose_neigh->count--;
-			rose_neigh_put(rose_neigh);
-
-			if (rose_neigh->count == 0) {
-				rose_remove_neigh(rose_neigh);
-				rose_neigh_put(rose_neigh);
-			}
-
-			rose_node->count--;
-
-			if (rose_node->count == 0) {
-				rose_remove_node(rose_node);
-			} else {
-				switch (i) {
-				case 0:
-					rose_node->neighbour[0] =
-						rose_node->neighbour[1];
-					fallthrough;
-				case 1:
-					rose_node->neighbour[1] =
-						rose_node->neighbour[2];
-					break;
-				case 2:
-					break;
-				}
-			}
-			goto out;
-		}
-	}
-	err = -EINVAL;
-
-out:
-	spin_unlock_bh(&rose_neigh_list_lock);
-	spin_unlock_bh(&rose_node_list_lock);
-
-	return err;
-}
-
-/*
- *	Add the loopback neighbour.
- */
-void rose_add_loopback_neigh(void)
-{
-	struct rose_neigh *sn;
-
-	rose_loopback_neigh = kmalloc_obj(struct rose_neigh);
-	if (!rose_loopback_neigh)
-		return;
-	sn = rose_loopback_neigh;
-
-	sn->callsign  = null_ax25_address;
-	sn->digipeat  = NULL;
-	sn->ax25      = NULL;
-	sn->dev       = NULL;
-	sn->count     = 0;
-	sn->dce_mode  = 1;
-	sn->loopback  = 1;
-	sn->number    = rose_neigh_no++;
-	sn->restarted = 1;
-	refcount_set(&sn->use, 1);
-
-	skb_queue_head_init(&sn->queue);
-
-	timer_setup(&sn->ftimer, NULL, 0);
-	timer_setup(&sn->t0timer, NULL, 0);
-
-	spin_lock_bh(&rose_neigh_list_lock);
-	sn->next = rose_neigh_list;
-	rose_neigh_list           = sn;
-	spin_unlock_bh(&rose_neigh_list_lock);
-}
-
-/*
- *	Add a loopback node.
- */
-int rose_add_loopback_node(const rose_address *address)
-{
-	struct rose_node *rose_node;
-	int err = 0;
-
-	spin_lock_bh(&rose_node_list_lock);
-
-	rose_node = rose_node_list;
-	while (rose_node != NULL) {
-		if ((rose_node->mask == 10) &&
-		     (rosecmpm(address, &rose_node->address, 10) == 0) &&
-		     rose_node->loopback)
-			break;
-		rose_node = rose_node->next;
-	}
-
-	if (rose_node != NULL)
-		goto out;
-
-	if ((rose_node = kmalloc_obj(*rose_node, GFP_ATOMIC)) == NULL) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	rose_node->address      = *address;
-	rose_node->mask         = 10;
-	rose_node->count        = 1;
-	rose_node->loopback     = 1;
-	rose_node->neighbour[0] = rose_loopback_neigh;
-
-	/* Insert at the head of list. Address is always mask=10 */
-	rose_node->next = rose_node_list;
-	rose_node_list  = rose_node;
-
-	rose_loopback_neigh->count++;
-	rose_neigh_hold(rose_loopback_neigh);
-
-out:
-	spin_unlock_bh(&rose_node_list_lock);
-
-	return err;
-}
-
-/*
- *	Delete a loopback node.
- */
-void rose_del_loopback_node(const rose_address *address)
-{
-	struct rose_node *rose_node;
-
-	spin_lock_bh(&rose_node_list_lock);
-
-	rose_node = rose_node_list;
-	while (rose_node != NULL) {
-		if ((rose_node->mask == 10) &&
-		    (rosecmpm(address, &rose_node->address, 10) == 0) &&
-		    rose_node->loopback)
-			break;
-		rose_node = rose_node->next;
-	}
-
-	if (rose_node == NULL)
-		goto out;
-
-	rose_remove_node(rose_node);
-
-	rose_loopback_neigh->count--;
-	rose_neigh_put(rose_loopback_neigh);
-
-out:
-	spin_unlock_bh(&rose_node_list_lock);
-}
-
-/*
- *	A device has been removed. Remove its routes and neighbours.
- */
-void rose_rt_device_down(struct net_device *dev)
-{
-	struct rose_neigh *s, *rose_neigh;
-	struct rose_node  *t, *rose_node;
-	int i;
-
-	spin_lock_bh(&rose_node_list_lock);
-	spin_lock_bh(&rose_neigh_list_lock);
-	rose_neigh = rose_neigh_list;
-	while (rose_neigh != NULL) {
-		s          = rose_neigh;
-		rose_neigh = rose_neigh->next;
-
-		if (s->dev != dev)
-			continue;
-
-		rose_node = rose_node_list;
-
-		while (rose_node != NULL) {
-			t         = rose_node;
-			rose_node = rose_node->next;
-
-			for (i = t->count - 1; i >= 0; i--) {
-				if (t->neighbour[i] != s)
-					continue;
-
-				t->count--;
-
-				memmove(&t->neighbour[i], &t->neighbour[i + 1],
-					sizeof(t->neighbour[0]) *
-						(t->count - i));
-				rose_neigh_put(s);
-			}
-
-			if (t->count <= 0)
-				rose_remove_node(t);
-		}
-
-		rose_remove_neigh(s);
-		rose_neigh_put(s);
-	}
-	spin_unlock_bh(&rose_neigh_list_lock);
-	spin_unlock_bh(&rose_node_list_lock);
-}
-
-#if 0 /* Currently unused */
-/*
- *	A device has been removed. Remove its links.
- */
-void rose_route_device_down(struct net_device *dev)
-{
-	struct rose_route *s, *rose_route;
-
-	spin_lock_bh(&rose_route_list_lock);
-	rose_route = rose_route_list;
-	while (rose_route != NULL) {
-		s          = rose_route;
-		rose_route = rose_route->next;
-
-		if (s->neigh1->dev == dev || s->neigh2->dev == dev)
-			rose_remove_route(s);
-	}
-	spin_unlock_bh(&rose_route_list_lock);
-}
-#endif
-
-/*
- *	Clear all nodes and neighbours out, except for neighbours with
- *	active connections going through them.
- *  Do not clear loopback neighbour and nodes.
- */
-static int rose_clear_routes(void)
-{
-	struct rose_neigh *s, *rose_neigh;
-	struct rose_node  *t, *rose_node;
-	int i;
-
-	spin_lock_bh(&rose_node_list_lock);
-	spin_lock_bh(&rose_neigh_list_lock);
-
-	rose_neigh = rose_neigh_list;
-	rose_node  = rose_node_list;
-
-	while (rose_node != NULL) {
-		t         = rose_node;
-		rose_node = rose_node->next;
-
-		if (!t->loopback) {
-			for (i = 0; i < t->count; i++)
-				rose_neigh_put(t->neighbour[i]);
-			rose_remove_node(t);
-		}
-	}
-
-	while (rose_neigh != NULL) {
-		s          = rose_neigh;
-		rose_neigh = rose_neigh->next;
-
-		if (!s->loopback) {
-			rose_remove_neigh(s);
-			rose_neigh_put(s);
-		}
-	}
-
-	spin_unlock_bh(&rose_neigh_list_lock);
-	spin_unlock_bh(&rose_node_list_lock);
-
-	return 0;
-}
-
-/*
- *	Check that the device given is a valid AX.25 interface that is "up".
- * 	called with RTNL
- */
-static struct net_device *rose_ax25_dev_find(char *devname)
-{
-	struct net_device *dev;
-
-	if ((dev = __dev_get_by_name(&init_net, devname)) == NULL)
-		return NULL;
-
-	if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25)
-		return dev;
-
-	return NULL;
-}
-
-/*
- *	Find the first active ROSE device, usually "rose0".
- */
-struct net_device *rose_dev_first(void)
-{
-	struct net_device *dev, *first = NULL;
-
-	rcu_read_lock();
-	for_each_netdev_rcu(&init_net, dev) {
-		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE)
-			if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
-				first = dev;
-	}
-	if (first)
-		dev_hold(first);
-	rcu_read_unlock();
-
-	return first;
-}
-
-/*
- *	Find the ROSE device for the given address.
- */
-struct net_device *rose_dev_get(rose_address *addr)
-{
-	struct net_device *dev;
-
-	rcu_read_lock();
-	for_each_netdev_rcu(&init_net, dev) {
-		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE &&
-		    rosecmp(addr, (const rose_address *)dev->dev_addr) == 0) {
-			dev_hold(dev);
-			goto out;
-		}
-	}
-	dev = NULL;
-out:
-	rcu_read_unlock();
-	return dev;
-}
-
-static int rose_dev_exists(rose_address *addr)
-{
-	struct net_device *dev;
-
-	rcu_read_lock();
-	for_each_netdev_rcu(&init_net, dev) {
-		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE &&
-		    rosecmp(addr, (const rose_address *)dev->dev_addr) == 0)
-			goto out;
-	}
-	dev = NULL;
-out:
-	rcu_read_unlock();
-	return dev != NULL;
-}
-
-
-
-
-struct rose_route *rose_route_free_lci(unsigned int lci, struct rose_neigh *neigh)
-{
-	struct rose_route *rose_route;
-
-	for (rose_route = rose_route_list; rose_route != NULL; rose_route = rose_route->next)
-		if ((rose_route->neigh1 == neigh && rose_route->lci1 == lci) ||
-		    (rose_route->neigh2 == neigh && rose_route->lci2 == lci))
-			return rose_route;
-
-	return NULL;
-}
-
-/*
- *	Find a neighbour or a route given a ROSE address.
- */
-struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
-	unsigned char *diagnostic, int route_frame)
-{
-	struct rose_neigh *res = NULL;
-	struct rose_node *node;
-	int failed = 0;
-	int i;
-
-	if (!route_frame) spin_lock_bh(&rose_node_list_lock);
-	for (node = rose_node_list; node != NULL; node = node->next) {
-		if (rosecmpm(addr, &node->address, node->mask) == 0) {
-			for (i = 0; i < node->count; i++) {
-				if (node->neighbour[i]->restarted) {
-					res = node->neighbour[i];
-					rose_neigh_hold(node->neighbour[i]);
-					goto out;
-				}
-			}
-		}
-	}
-	if (!route_frame) { /* connect request */
-		for (node = rose_node_list; node != NULL; node = node->next) {
-			if (rosecmpm(addr, &node->address, node->mask) == 0) {
-				for (i = 0; i < node->count; i++) {
-					if (!rose_ftimer_running(node->neighbour[i])) {
-						res = node->neighbour[i];
-						rose_neigh_hold(node->neighbour[i]);
-						goto out;
-					}
-					failed = 1;
-				}
-			}
-		}
-	}
-
-	if (failed) {
-		*cause      = ROSE_OUT_OF_ORDER;
-		*diagnostic = 0;
-	} else {
-		*cause      = ROSE_NOT_OBTAINABLE;
-		*diagnostic = 0;
-	}
-
-out:
-	if (!route_frame) spin_unlock_bh(&rose_node_list_lock);
-	return res;
-}
-
-/*
- *	Handle the ioctls that control the routing functions.
- */
-int rose_rt_ioctl(unsigned int cmd, void __user *arg)
-{
-	struct rose_route_struct rose_route;
-	struct net_device *dev;
-	int err;
-
-	switch (cmd) {
-	case SIOCADDRT:
-		if (copy_from_user(&rose_route, arg, sizeof(struct rose_route_struct)))
-			return -EFAULT;
-		if ((dev = rose_ax25_dev_find(rose_route.device)) == NULL)
-			return -EINVAL;
-		if (rose_dev_exists(&rose_route.address)) /* Can't add routes to ourself */
-			return -EINVAL;
-		if (rose_route.mask > 10) /* Mask can't be more than 10 digits */
-			return -EINVAL;
-		if (rose_route.ndigis > AX25_MAX_DIGIS)
-			return -EINVAL;
-		err = rose_add_node(&rose_route, dev);
-		return err;
-
-	case SIOCDELRT:
-		if (copy_from_user(&rose_route, arg, sizeof(struct rose_route_struct)))
-			return -EFAULT;
-		if ((dev = rose_ax25_dev_find(rose_route.device)) == NULL)
-			return -EINVAL;
-		err = rose_del_node(&rose_route, dev);
-		return err;
-
-	case SIOCRSCLRRT:
-		return rose_clear_routes();
-
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static void rose_del_route_by_neigh(struct rose_neigh *rose_neigh)
-{
-	struct rose_route *rose_route, *s;
-
-	rose_neigh->restarted = 0;
-
-	rose_stop_t0timer(rose_neigh);
-	rose_start_ftimer(rose_neigh);
-
-	skb_queue_purge(&rose_neigh->queue);
-
-	spin_lock_bh(&rose_route_list_lock);
-
-	rose_route = rose_route_list;
-
-	while (rose_route != NULL) {
-		if ((rose_route->neigh1 == rose_neigh && rose_route->neigh2 == rose_neigh) ||
-		    (rose_route->neigh1 == rose_neigh && rose_route->neigh2 == NULL)       ||
-		    (rose_route->neigh2 == rose_neigh && rose_route->neigh1 == NULL)) {
-			s = rose_route->next;
-			rose_remove_route(rose_route);
-			rose_route = s;
-			continue;
-		}
-
-		if (rose_route->neigh1 == rose_neigh) {
-			rose_neigh_put(rose_route->neigh1);
-			rose_route->neigh1 = NULL;
-			rose_transmit_clear_request(rose_route->neigh2, rose_route->lci2, ROSE_OUT_OF_ORDER, 0);
-		}
-
-		if (rose_route->neigh2 == rose_neigh) {
-			rose_neigh_put(rose_route->neigh2);
-			rose_route->neigh2 = NULL;
-			rose_transmit_clear_request(rose_route->neigh1, rose_route->lci1, ROSE_OUT_OF_ORDER, 0);
-		}
-
-		rose_route = rose_route->next;
-	}
-	spin_unlock_bh(&rose_route_list_lock);
-}
-
-/*
- * 	A level 2 link has timed out, therefore it appears to be a poor link,
- *	then don't use that neighbour until it is reset. Blow away all through
- *	routes and connections using this route.
- */
-void rose_link_failed(ax25_cb *ax25, int reason)
-{
-	struct rose_neigh *rose_neigh;
-
-	spin_lock_bh(&rose_neigh_list_lock);
-	rose_neigh = rose_neigh_list;
-	while (rose_neigh != NULL) {
-		if (rose_neigh->ax25 == ax25)
-			break;
-		rose_neigh = rose_neigh->next;
-	}
-
-	if (rose_neigh != NULL) {
-		rose_neigh->ax25 = NULL;
-		ax25_cb_put(ax25);
-
-		rose_del_route_by_neigh(rose_neigh);
-		rose_kill_by_neigh(rose_neigh);
-	}
-	spin_unlock_bh(&rose_neigh_list_lock);
-}
-
-/*
- * 	A device has been "downed" remove its link status. Blow away all
- *	through routes and connections that use this device.
- */
-void rose_link_device_down(struct net_device *dev)
-{
-	struct rose_neigh *rose_neigh;
-
-	for (rose_neigh = rose_neigh_list; rose_neigh != NULL; rose_neigh = rose_neigh->next) {
-		if (rose_neigh->dev == dev) {
-			rose_del_route_by_neigh(rose_neigh);
-			rose_kill_by_neigh(rose_neigh);
-		}
-	}
-}
-
-/*
- *	Route a frame to an appropriate AX.25 connection.
- *	A NULL ax25_cb indicates an internally generated frame.
- */
-int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
-{
-	struct rose_neigh *rose_neigh, *new_neigh;
-	struct rose_route *rose_route;
-	struct rose_facilities_struct facilities;
-	rose_address *src_addr, *dest_addr;
-	struct sock *sk;
-	unsigned short frametype;
-	unsigned int lci, new_lci;
-	unsigned char cause, diagnostic;
-	struct net_device *dev;
-	int res = 0;
-	char buf[11];
-
-	if (skb->len < ROSE_MIN_LEN)
-		return res;
-
-	if (!ax25)
-		return rose_loopback_queue(skb, NULL);
-
-	frametype = skb->data[2];
-	lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
-	if (frametype == ROSE_CALL_REQUEST &&
-	    (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF ||
-	     skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] !=
-	     ROSE_CALL_REQ_ADDR_LEN_VAL))
-		return res;
-	src_addr  = (rose_address *)(skb->data + ROSE_CALL_REQ_SRC_ADDR_OFF);
-	dest_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF);
-
-	spin_lock_bh(&rose_neigh_list_lock);
-	spin_lock_bh(&rose_route_list_lock);
-
-	rose_neigh = rose_neigh_list;
-	while (rose_neigh != NULL) {
-		if (ax25cmp(&ax25->dest_addr, &rose_neigh->callsign) == 0 &&
-		    ax25->ax25_dev->dev == rose_neigh->dev)
-			break;
-		rose_neigh = rose_neigh->next;
-	}
-
-	if (rose_neigh == NULL) {
-		printk("rose_route : unknown neighbour or device %s\n",
-		       ax2asc(buf, &ax25->dest_addr));
-		goto out;
-	}
-
-	/*
-	 *	Obviously the link is working, halt the ftimer.
-	 */
-	rose_stop_ftimer(rose_neigh);
-
-	/*
-	 *	LCI of zero is always for us, and its always a restart
-	 * 	frame.
-	 */
-	if (lci == 0) {
-		rose_link_rx_restart(skb, rose_neigh, frametype);
-		goto out;
-	}
-
-	/*
-	 *	Find an existing socket.
-	 */
-	if ((sk = rose_find_socket(lci, rose_neigh)) != NULL) {
-		if (frametype == ROSE_CALL_REQUEST) {
-			struct rose_sock *rose = rose_sk(sk);
-
-			/* Remove an existing unused socket */
-			rose_clear_queues(sk);
-			rose->cause	 = ROSE_NETWORK_CONGESTION;
-			rose->diagnostic = 0;
-			rose_neigh_put(rose->neighbour);
-			rose->neighbour	 = NULL;
-			rose->lci	 = 0;
-			rose->state	 = ROSE_STATE_0;
-			sk->sk_state	 = TCP_CLOSE;
-			sk->sk_err	 = 0;
-			sk->sk_shutdown	 |= SEND_SHUTDOWN;
-			if (!sock_flag(sk, SOCK_DEAD)) {
-				sk->sk_state_change(sk);
-				sock_set_flag(sk, SOCK_DEAD);
-			}
-		}
-		else {
-			skb_reset_transport_header(skb);
-			res = rose_process_rx_frame(sk, skb);
-			goto out;
-		}
-	}
-
-	/*
-	 *	Is is a Call Request and is it for us ?
-	 */
-	if (frametype == ROSE_CALL_REQUEST)
-		if ((dev = rose_dev_get(dest_addr)) != NULL) {
-			res = rose_rx_call_request(skb, dev, rose_neigh, lci);
-			dev_put(dev);
-			goto out;
-		}
-
-	if (!sysctl_rose_routing_control) {
-		rose_transmit_clear_request(rose_neigh, lci, ROSE_NOT_OBTAINABLE, 0);
-		goto out;
-	}
-
-	/*
-	 *	Route it to the next in line if we have an entry for it.
-	 */
-	rose_route = rose_route_list;
-	while (rose_route != NULL) {
-		if (rose_route->lci1 == lci &&
-		    rose_route->neigh1 == rose_neigh) {
-			if (frametype == ROSE_CALL_REQUEST) {
-				/* F6FBB - Remove an existing unused route */
-				rose_remove_route(rose_route);
-				break;
-			} else if (rose_route->neigh2 != NULL) {
-				skb->data[0] &= 0xF0;
-				skb->data[0] |= (rose_route->lci2 >> 8) & 0x0F;
-				skb->data[1]  = (rose_route->lci2 >> 0) & 0xFF;
-				rose_transmit_link(skb, rose_route->neigh2);
-				if (frametype == ROSE_CLEAR_CONFIRMATION)
-					rose_remove_route(rose_route);
-				res = 1;
-				goto out;
-			} else {
-				if (frametype == ROSE_CLEAR_CONFIRMATION)
-					rose_remove_route(rose_route);
-				goto out;
-			}
-		}
-		if (rose_route->lci2 == lci &&
-		    rose_route->neigh2 == rose_neigh) {
-			if (frametype == ROSE_CALL_REQUEST) {
-				/* F6FBB - Remove an existing unused route */
-				rose_remove_route(rose_route);
-				break;
-			} else if (rose_route->neigh1 != NULL) {
-				skb->data[0] &= 0xF0;
-				skb->data[0] |= (rose_route->lci1 >> 8) & 0x0F;
-				skb->data[1]  = (rose_route->lci1 >> 0) & 0xFF;
-				rose_transmit_link(skb, rose_route->neigh1);
-				if (frametype == ROSE_CLEAR_CONFIRMATION)
-					rose_remove_route(rose_route);
-				res = 1;
-				goto out;
-			} else {
-				if (frametype == ROSE_CLEAR_CONFIRMATION)
-					rose_remove_route(rose_route);
-				goto out;
-			}
-		}
-		rose_route = rose_route->next;
-	}
-
-	/*
-	 *	We know that:
-	 *	1. The frame isn't for us,
-	 *	2. It isn't "owned" by any existing route.
-	 */
-	if (frametype != ROSE_CALL_REQUEST) {	/* XXX */
-		res = 0;
-		goto out;
-	}
-
-	memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
-
-	if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF,
-				   skb->len - ROSE_CALL_REQ_FACILITIES_OFF,
-				   &facilities)) {
-		rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76);
-		goto out;
-	}
-
-	/*
-	 *	Check for routing loops.
-	 */
-	rose_route = rose_route_list;
-	while (rose_route != NULL) {
-		if (rose_route->rand == facilities.rand &&
-		    rosecmp(src_addr, &rose_route->src_addr) == 0 &&
-		    ax25cmp(&facilities.dest_call, &rose_route->src_call) == 0 &&
-		    ax25cmp(&facilities.source_call, &rose_route->dest_call) == 0) {
-			rose_transmit_clear_request(rose_neigh, lci, ROSE_NOT_OBTAINABLE, 120);
-			goto out;
-		}
-		rose_route = rose_route->next;
-	}
-
-	if ((new_neigh = rose_get_neigh(dest_addr, &cause, &diagnostic, 1)) == NULL) {
-		rose_transmit_clear_request(rose_neigh, lci, cause, diagnostic);
-		goto out;
-	}
-
-	if ((new_lci = rose_new_lci(new_neigh)) == 0) {
-		rose_transmit_clear_request(rose_neigh, lci, ROSE_NETWORK_CONGESTION, 71);
-		goto put_neigh;
-	}
-
-	if ((rose_route = kmalloc_obj(*rose_route, GFP_ATOMIC)) == NULL) {
-		rose_transmit_clear_request(rose_neigh, lci, ROSE_NETWORK_CONGESTION, 120);
-		goto put_neigh;
-	}
-
-	rose_route->lci1      = lci;
-	rose_route->src_addr  = *src_addr;
-	rose_route->dest_addr = *dest_addr;
-	rose_route->src_call  = facilities.dest_call;
-	rose_route->dest_call = facilities.source_call;
-	rose_route->rand      = facilities.rand;
-	rose_route->neigh1    = rose_neigh;
-	rose_route->lci2      = new_lci;
-	rose_route->neigh2    = new_neigh;
-
-	rose_neigh_hold(rose_route->neigh1);
-	rose_neigh_hold(rose_route->neigh2);
-
-	rose_route->next = rose_route_list;
-	rose_route_list  = rose_route;
-
-	skb->data[0] &= 0xF0;
-	skb->data[0] |= (rose_route->lci2 >> 8) & 0x0F;
-	skb->data[1]  = (rose_route->lci2 >> 0) & 0xFF;
-
-	rose_transmit_link(skb, rose_route->neigh2);
-	res = 1;
-
-put_neigh:
-	rose_neigh_put(new_neigh);
-out:
-	spin_unlock_bh(&rose_route_list_lock);
-	spin_unlock_bh(&rose_neigh_list_lock);
-
-	return res;
-}
-
-#ifdef CONFIG_PROC_FS
-
-static void *rose_node_start(struct seq_file *seq, loff_t *pos)
-	__acquires(rose_node_list_lock)
-{
-	struct rose_node *rose_node;
-	int i = 1;
-
-	spin_lock_bh(&rose_node_list_lock);
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-
-	for (rose_node = rose_node_list; rose_node && i < *pos;
-	     rose_node = rose_node->next, ++i);
-
-	return (i == *pos) ? rose_node : NULL;
-}
-
-static void *rose_node_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-
-	return (v == SEQ_START_TOKEN) ? rose_node_list
-		: ((struct rose_node *)v)->next;
-}
-
-static void rose_node_stop(struct seq_file *seq, void *v)
-	__releases(rose_node_list_lock)
-{
-	spin_unlock_bh(&rose_node_list_lock);
-}
-
-static int rose_node_show(struct seq_file *seq, void *v)
-{
-	char rsbuf[11];
-	int i;
-
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "address    mask n neigh neigh neigh\n");
-	else {
-		const struct rose_node *rose_node = v;
-		seq_printf(seq, "%-10s %04d %d",
-			   rose2asc(rsbuf, &rose_node->address),
-			   rose_node->mask,
-			   rose_node->count);
-
-		for (i = 0; i < rose_node->count; i++)
-			seq_printf(seq, " %05d", rose_node->neighbour[i]->number);
-
-		seq_puts(seq, "\n");
-	}
-	return 0;
-}
-
-const struct seq_operations rose_node_seqops = {
-	.start = rose_node_start,
-	.next = rose_node_next,
-	.stop = rose_node_stop,
-	.show = rose_node_show,
-};
-
-static void *rose_neigh_start(struct seq_file *seq, loff_t *pos)
-	__acquires(rose_neigh_list_lock)
-{
-	struct rose_neigh *rose_neigh;
-	int i = 1;
-
-	spin_lock_bh(&rose_neigh_list_lock);
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-
-	for (rose_neigh = rose_neigh_list; rose_neigh && i < *pos;
-	     rose_neigh = rose_neigh->next, ++i);
-
-	return (i == *pos) ? rose_neigh : NULL;
-}
-
-static void *rose_neigh_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-
-	return (v == SEQ_START_TOKEN) ? rose_neigh_list
-		: ((struct rose_neigh *)v)->next;
-}
-
-static void rose_neigh_stop(struct seq_file *seq, void *v)
-	__releases(rose_neigh_list_lock)
-{
-	spin_unlock_bh(&rose_neigh_list_lock);
-}
-
-static int rose_neigh_show(struct seq_file *seq, void *v)
-{
-	char buf[11];
-	int i;
-
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq,
-			 "addr  callsign  dev  count use mode restart  t0  tf digipeaters\n");
-	else {
-		struct rose_neigh *rose_neigh = v;
-
-		/* if (!rose_neigh->loopback) { */
-		seq_printf(seq, "%05d %-9s %-4s   %3d %3d  %3s     %3s %3lu %3lu",
-			   rose_neigh->number,
-			   (rose_neigh->loopback) ? "RSLOOP-0" : ax2asc(buf, &rose_neigh->callsign),
-			   rose_neigh->dev ? rose_neigh->dev->name : "???",
-			   rose_neigh->count,
-			   refcount_read(&rose_neigh->use) - rose_neigh->count - 1,
-			   (rose_neigh->dce_mode) ? "DCE" : "DTE",
-			   (rose_neigh->restarted) ? "yes" : "no",
-			   ax25_display_timer(&rose_neigh->t0timer) / HZ,
-			   ax25_display_timer(&rose_neigh->ftimer)  / HZ);
-
-		if (rose_neigh->digipeat != NULL) {
-			for (i = 0; i < rose_neigh->digipeat->ndigi; i++)
-				seq_printf(seq, " %s", ax2asc(buf, &rose_neigh->digipeat->calls[i]));
-		}
-
-		seq_puts(seq, "\n");
-	}
-	return 0;
-}
-
-
-const struct seq_operations rose_neigh_seqops = {
-	.start = rose_neigh_start,
-	.next = rose_neigh_next,
-	.stop = rose_neigh_stop,
-	.show = rose_neigh_show,
-};
-
-static void *rose_route_start(struct seq_file *seq, loff_t *pos)
-	__acquires(rose_route_list_lock)
-{
-	struct rose_route *rose_route;
-	int i = 1;
-
-	spin_lock_bh(&rose_route_list_lock);
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-
-	for (rose_route = rose_route_list; rose_route && i < *pos;
-	     rose_route = rose_route->next, ++i);
-
-	return (i == *pos) ? rose_route : NULL;
-}
-
-static void *rose_route_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-
-	return (v == SEQ_START_TOKEN) ? rose_route_list
-		: ((struct rose_route *)v)->next;
-}
-
-static void rose_route_stop(struct seq_file *seq, void *v)
-	__releases(rose_route_list_lock)
-{
-	spin_unlock_bh(&rose_route_list_lock);
-}
-
-static int rose_route_show(struct seq_file *seq, void *v)
-{
-	char buf[11], rsbuf[11];
-
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq,
-			 "lci  address     callsign   neigh  <-> lci  address     callsign   neigh\n");
-	else {
-		struct rose_route *rose_route = v;
-
-		if (rose_route->neigh1)
-			seq_printf(seq,
-				   "%3.3X  %-10s  %-9s  %05d      ",
-				   rose_route->lci1,
-				   rose2asc(rsbuf, &rose_route->src_addr),
-				   ax2asc(buf, &rose_route->src_call),
-				   rose_route->neigh1->number);
-		else
-			seq_puts(seq,
-				 "000  *           *          00000      ");
-
-		if (rose_route->neigh2)
-			seq_printf(seq,
-				   "%3.3X  %-10s  %-9s  %05d\n",
-				   rose_route->lci2,
-				   rose2asc(rsbuf, &rose_route->dest_addr),
-				   ax2asc(buf, &rose_route->dest_call),
-				   rose_route->neigh2->number);
-		 else
-			 seq_puts(seq,
-				  "000  *           *          00000\n");
-		}
-	return 0;
-}
-
-struct seq_operations rose_route_seqops = {
-	.start = rose_route_start,
-	.next = rose_route_next,
-	.stop = rose_route_stop,
-	.show = rose_route_show,
-};
-#endif /* CONFIG_PROC_FS */
-
-/*
- *	Release all memory associated with ROSE routing structures.
- */
-void __exit rose_rt_free(void)
-{
-	struct rose_neigh *s, *rose_neigh = rose_neigh_list;
-	struct rose_node  *t, *rose_node  = rose_node_list;
-	struct rose_route *u, *rose_route = rose_route_list;
-	int i;
-
-	while (rose_neigh != NULL) {
-		s          = rose_neigh;
-		rose_neigh = rose_neigh->next;
-
-		rose_remove_neigh(s);
-		rose_neigh_put(s);
-	}
-
-	while (rose_node != NULL) {
-		t         = rose_node;
-		rose_node = rose_node->next;
-
-		for (i = 0; i < t->count; i++)
-			rose_neigh_put(t->neighbour[i]);
-		rose_remove_node(t);
-	}
-
-	while (rose_route != NULL) {
-		u          = rose_route;
-		rose_route = rose_route->next;
-
-		rose_remove_route(u);
-	}
-}
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
deleted file mode 100644
index 4dbc437a9e22..000000000000
--- a/net/rose/rose_subr.c
+++ /dev/null
@@ -1,556 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <net/rose.h>
-
-static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose);
-
-/*
- *	This routine purges all of the queues of frames.
- */
-void rose_clear_queues(struct sock *sk)
-{
-	skb_queue_purge(&sk->sk_write_queue);
-	skb_queue_purge(&rose_sk(sk)->ack_queue);
-}
-
-/*
- * This routine purges the input queue of those frames that have been
- * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the
- * SDL diagram.
- */
-void rose_frames_acked(struct sock *sk, unsigned short nr)
-{
-	struct sk_buff *skb;
-	struct rose_sock *rose = rose_sk(sk);
-
-	/*
-	 * Remove all the ack-ed frames from the ack queue.
-	 */
-	if (rose->va != nr) {
-		while (skb_peek(&rose->ack_queue) != NULL && rose->va != nr) {
-			skb = skb_dequeue(&rose->ack_queue);
-			kfree_skb(skb);
-			rose->va = (rose->va + 1) % ROSE_MODULUS;
-		}
-	}
-}
-
-void rose_requeue_frames(struct sock *sk)
-{
-	struct sk_buff *skb, *skb_prev = NULL;
-
-	/*
-	 * Requeue all the un-ack-ed frames on the output queue to be picked
-	 * up by rose_kick. This arrangement handles the possibility of an
-	 * empty output queue.
-	 */
-	while ((skb = skb_dequeue(&rose_sk(sk)->ack_queue)) != NULL) {
-		if (skb_prev == NULL)
-			skb_queue_head(&sk->sk_write_queue, skb);
-		else
-			skb_append(skb_prev, skb, &sk->sk_write_queue);
-		skb_prev = skb;
-	}
-}
-
-/*
- *	Validate that the value of nr is between va and vs. Return true or
- *	false for testing.
- */
-int rose_validate_nr(struct sock *sk, unsigned short nr)
-{
-	struct rose_sock *rose = rose_sk(sk);
-	unsigned short vc = rose->va;
-
-	while (vc != rose->vs) {
-		if (nr == vc) return 1;
-		vc = (vc + 1) % ROSE_MODULUS;
-	}
-
-	return nr == rose->vs;
-}
-
-/*
- *  This routine is called when the packet layer internally generates a
- *  control frame.
- */
-void rose_write_internal(struct sock *sk, int frametype)
-{
-	struct rose_sock *rose = rose_sk(sk);
-	struct sk_buff *skb;
-	unsigned char  *dptr;
-	unsigned char  lci1, lci2;
-	int maxfaclen = 0;
-	int len, faclen;
-	int reserve;
-
-	reserve = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1;
-	len = ROSE_MIN_LEN;
-
-	switch (frametype) {
-	case ROSE_CALL_REQUEST:
-		len   += 1 + ROSE_ADDR_LEN + ROSE_ADDR_LEN;
-		maxfaclen = 256;
-		break;
-	case ROSE_CALL_ACCEPTED:
-	case ROSE_CLEAR_REQUEST:
-	case ROSE_RESET_REQUEST:
-		len   += 2;
-		break;
-	}
-
-	skb = alloc_skb(reserve + len + maxfaclen, GFP_ATOMIC);
-	if (!skb)
-		return;
-
-	/*
-	 *	Space for AX.25 header and PID.
-	 */
-	skb_reserve(skb, reserve);
-
-	dptr = skb_put(skb, len);
-
-	lci1 = (rose->lci >> 8) & 0x0F;
-	lci2 = (rose->lci >> 0) & 0xFF;
-
-	switch (frametype) {
-	case ROSE_CALL_REQUEST:
-		*dptr++ = ROSE_GFI | lci1;
-		*dptr++ = lci2;
-		*dptr++ = frametype;
-		*dptr++ = ROSE_CALL_REQ_ADDR_LEN_VAL;
-		memcpy(dptr, &rose->dest_addr,  ROSE_ADDR_LEN);
-		dptr   += ROSE_ADDR_LEN;
-		memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN);
-		dptr   += ROSE_ADDR_LEN;
-		faclen = rose_create_facilities(dptr, rose);
-		skb_put(skb, faclen);
-		dptr   += faclen;
-		break;
-
-	case ROSE_CALL_ACCEPTED:
-		*dptr++ = ROSE_GFI | lci1;
-		*dptr++ = lci2;
-		*dptr++ = frametype;
-		*dptr++ = 0x00;		/* Address length */
-		*dptr++ = 0;		/* Facilities length */
-		break;
-
-	case ROSE_CLEAR_REQUEST:
-		*dptr++ = ROSE_GFI | lci1;
-		*dptr++ = lci2;
-		*dptr++ = frametype;
-		*dptr++ = rose->cause;
-		*dptr++ = rose->diagnostic;
-		break;
-
-	case ROSE_RESET_REQUEST:
-		*dptr++ = ROSE_GFI | lci1;
-		*dptr++ = lci2;
-		*dptr++ = frametype;
-		*dptr++ = ROSE_DTE_ORIGINATED;
-		*dptr++ = 0;
-		break;
-
-	case ROSE_RR:
-	case ROSE_RNR:
-		*dptr++ = ROSE_GFI | lci1;
-		*dptr++ = lci2;
-		*dptr   = frametype;
-		*dptr++ |= (rose->vr << 5) & 0xE0;
-		break;
-
-	case ROSE_CLEAR_CONFIRMATION:
-	case ROSE_RESET_CONFIRMATION:
-		*dptr++ = ROSE_GFI | lci1;
-		*dptr++ = lci2;
-		*dptr++  = frametype;
-		break;
-
-	default:
-		printk(KERN_ERR "ROSE: rose_write_internal - invalid frametype %02X\n", frametype);
-		kfree_skb(skb);
-		return;
-	}
-
-	rose_transmit_link(skb, rose->neighbour);
-}
-
-int rose_decode(struct sk_buff *skb, int *ns, int *nr, int *q, int *d, int *m)
-{
-	unsigned char *frame;
-
-	frame = skb->data;
-
-	*ns = *nr = *q = *d = *m = 0;
-
-	switch (frame[2]) {
-	case ROSE_CALL_REQUEST:
-	case ROSE_CALL_ACCEPTED:
-	case ROSE_CLEAR_REQUEST:
-	case ROSE_CLEAR_CONFIRMATION:
-	case ROSE_RESET_REQUEST:
-	case ROSE_RESET_CONFIRMATION:
-		return frame[2];
-	default:
-		break;
-	}
-
-	if ((frame[2] & 0x1F) == ROSE_RR  ||
-	    (frame[2] & 0x1F) == ROSE_RNR) {
-		*nr = (frame[2] >> 5) & 0x07;
-		return frame[2] & 0x1F;
-	}
-
-	if ((frame[2] & 0x01) == ROSE_DATA) {
-		*q  = (frame[0] & ROSE_Q_BIT) == ROSE_Q_BIT;
-		*d  = (frame[0] & ROSE_D_BIT) == ROSE_D_BIT;
-		*m  = (frame[2] & ROSE_M_BIT) == ROSE_M_BIT;
-		*nr = (frame[2] >> 5) & 0x07;
-		*ns = (frame[2] >> 1) & 0x07;
-		return ROSE_DATA;
-	}
-
-	return ROSE_ILLEGAL;
-}
-
-static int rose_parse_national(unsigned char *p, struct rose_facilities_struct *facilities, int len)
-{
-	unsigned char *pt;
-	unsigned char l, lg, n = 0;
-	int fac_national_digis_received = 0;
-
-	do {
-		switch (*p & 0xC0) {
-		case 0x00:
-			if (len < 2)
-				return -1;
-			p   += 2;
-			n   += 2;
-			len -= 2;
-			break;
-
-		case 0x40:
-			if (len < 3)
-				return -1;
-			if (*p == FAC_NATIONAL_RAND)
-				facilities->rand = ((p[1] << 8) & 0xFF00) + ((p[2] << 0) & 0x00FF);
-			p   += 3;
-			n   += 3;
-			len -= 3;
-			break;
-
-		case 0x80:
-			if (len < 4)
-				return -1;
-			p   += 4;
-			n   += 4;
-			len -= 4;
-			break;
-
-		case 0xC0:
-			if (len < 2)
-				return -1;
-			l = p[1];
-			if (len < 2 + l)
-				return -1;
-			if (*p == FAC_NATIONAL_DEST_DIGI) {
-				if (!fac_national_digis_received) {
-					if (l < AX25_ADDR_LEN)
-						return -1;
-					memcpy(&facilities->source_digis[0], p + 2, AX25_ADDR_LEN);
-					facilities->source_ndigis = 1;
-				}
-			}
-			else if (*p == FAC_NATIONAL_SRC_DIGI) {
-				if (!fac_national_digis_received) {
-					if (l < AX25_ADDR_LEN)
-						return -1;
-					memcpy(&facilities->dest_digis[0], p + 2, AX25_ADDR_LEN);
-					facilities->dest_ndigis = 1;
-				}
-			}
-			else if (*p == FAC_NATIONAL_FAIL_CALL) {
-				if (l < AX25_ADDR_LEN)
-					return -1;
-				memcpy(&facilities->fail_call, p + 2, AX25_ADDR_LEN);
-			}
-			else if (*p == FAC_NATIONAL_FAIL_ADD) {
-				if (l < 1 + ROSE_ADDR_LEN)
-					return -1;
-				memcpy(&facilities->fail_addr, p + 3, ROSE_ADDR_LEN);
-			}
-			else if (*p == FAC_NATIONAL_DIGIS) {
-				if (l % AX25_ADDR_LEN)
-					return -1;
-				fac_national_digis_received = 1;
-				facilities->source_ndigis = 0;
-				facilities->dest_ndigis   = 0;
-				for (pt = p + 2, lg = 0 ; lg < l ; pt += AX25_ADDR_LEN, lg += AX25_ADDR_LEN) {
-					if (pt[6] & AX25_HBIT) {
-						if (facilities->dest_ndigis >= ROSE_MAX_DIGIS)
-							return -1;
-						memcpy(&facilities->dest_digis[facilities->dest_ndigis++], pt, AX25_ADDR_LEN);
-					} else {
-						if (facilities->source_ndigis >= ROSE_MAX_DIGIS)
-							return -1;
-						memcpy(&facilities->source_digis[facilities->source_ndigis++], pt, AX25_ADDR_LEN);
-					}
-				}
-			}
-			p   += l + 2;
-			n   += l + 2;
-			len -= l + 2;
-			break;
-		}
-	} while (*p != 0x00 && len > 0);
-
-	return n;
-}
-
-static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *facilities, int len)
-{
-	unsigned char l, n = 0;
-	char callsign[11];
-
-	do {
-		switch (*p & 0xC0) {
-		case 0x00:
-			if (len < 2)
-				return -1;
-			p   += 2;
-			n   += 2;
-			len -= 2;
-			break;
-
-		case 0x40:
-			if (len < 3)
-				return -1;
-			p   += 3;
-			n   += 3;
-			len -= 3;
-			break;
-
-		case 0x80:
-			if (len < 4)
-				return -1;
-			p   += 4;
-			n   += 4;
-			len -= 4;
-			break;
-
-		case 0xC0:
-			if (len < 2)
-				return -1;
-			l = p[1];
-
-			/* Prevent overflows*/
-			if (l < 10 || l > 20)
-				return -1;
-
-			if (*p == FAC_CCITT_DEST_NSAP) {
-				memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN);
-				memcpy(callsign, p + 12,   l - 10);
-				callsign[l - 10] = '\0';
-				asc2ax(&facilities->source_call, callsign);
-			}
-			if (*p == FAC_CCITT_SRC_NSAP) {
-				memcpy(&facilities->dest_addr, p + 7, ROSE_ADDR_LEN);
-				memcpy(callsign, p + 12, l - 10);
-				callsign[l - 10] = '\0';
-				asc2ax(&facilities->dest_call, callsign);
-			}
-			p   += l + 2;
-			n   += l + 2;
-			len -= l + 2;
-			break;
-		}
-	} while (*p != 0x00 && len > 0);
-
-	return n;
-}
-
-int rose_parse_facilities(unsigned char *p, unsigned packet_len,
-	struct rose_facilities_struct *facilities)
-{
-	int facilities_len, len;
-
-	facilities_len = *p++;
-
-	if (facilities_len == 0 || (unsigned int)facilities_len > packet_len)
-		return 0;
-
-	while (facilities_len >= 3 && *p == 0x00) {
-		facilities_len--;
-		p++;
-
-		switch (*p) {
-		case FAC_NATIONAL:		/* National */
-			len = rose_parse_national(p + 1, facilities, facilities_len - 1);
-			break;
-
-		case FAC_CCITT:		/* CCITT */
-			len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1);
-			break;
-
-		default:
-			printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p);
-			len = 1;
-			break;
-		}
-
-		if (len < 0)
-			return 0;
-		if (WARN_ON(len >= facilities_len))
-			return 0;
-		facilities_len -= len + 1;
-		p += len + 1;
-	}
-
-	return facilities_len == 0;
-}
-
-static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose)
-{
-	unsigned char *p = buffer + 1;
-	char *callsign;
-	char buf[11];
-	int len, nb;
-
-	/* National Facilities */
-	if (rose->rand != 0 || rose->source_ndigis == 1 || rose->dest_ndigis == 1) {
-		*p++ = 0x00;
-		*p++ = FAC_NATIONAL;
-
-		if (rose->rand != 0) {
-			*p++ = FAC_NATIONAL_RAND;
-			*p++ = (rose->rand >> 8) & 0xFF;
-			*p++ = (rose->rand >> 0) & 0xFF;
-		}
-
-		/* Sent before older facilities */
-		if ((rose->source_ndigis > 0) || (rose->dest_ndigis > 0)) {
-			int maxdigi = 0;
-			*p++ = FAC_NATIONAL_DIGIS;
-			*p++ = AX25_ADDR_LEN * (rose->source_ndigis + rose->dest_ndigis);
-			for (nb = 0 ; nb < rose->source_ndigis ; nb++) {
-				if (++maxdigi >= ROSE_MAX_DIGIS)
-					break;
-				memcpy(p, &rose->source_digis[nb], AX25_ADDR_LEN);
-				p[6] |= AX25_HBIT;
-				p += AX25_ADDR_LEN;
-			}
-			for (nb = 0 ; nb < rose->dest_ndigis ; nb++) {
-				if (++maxdigi >= ROSE_MAX_DIGIS)
-					break;
-				memcpy(p, &rose->dest_digis[nb], AX25_ADDR_LEN);
-				p[6] &= ~AX25_HBIT;
-				p += AX25_ADDR_LEN;
-			}
-		}
-
-		/* For compatibility */
-		if (rose->source_ndigis > 0) {
-			*p++ = FAC_NATIONAL_SRC_DIGI;
-			*p++ = AX25_ADDR_LEN;
-			memcpy(p, &rose->source_digis[0], AX25_ADDR_LEN);
-			p   += AX25_ADDR_LEN;
-		}
-
-		/* For compatibility */
-		if (rose->dest_ndigis > 0) {
-			*p++ = FAC_NATIONAL_DEST_DIGI;
-			*p++ = AX25_ADDR_LEN;
-			memcpy(p, &rose->dest_digis[0], AX25_ADDR_LEN);
-			p   += AX25_ADDR_LEN;
-		}
-	}
-
-	*p++ = 0x00;
-	*p++ = FAC_CCITT;
-
-	*p++ = FAC_CCITT_DEST_NSAP;
-
-	callsign = ax2asc(buf, &rose->dest_call);
-
-	*p++ = strlen(callsign) + 10;
-	*p++ = (strlen(callsign) + 9) * 2;		/* ??? */
-
-	*p++ = 0x47; *p++ = 0x00; *p++ = 0x11;
-	*p++ = ROSE_ADDR_LEN * 2;
-	memcpy(p, &rose->dest_addr, ROSE_ADDR_LEN);
-	p   += ROSE_ADDR_LEN;
-
-	memcpy(p, callsign, strlen(callsign));
-	p   += strlen(callsign);
-
-	*p++ = FAC_CCITT_SRC_NSAP;
-
-	callsign = ax2asc(buf, &rose->source_call);
-
-	*p++ = strlen(callsign) + 10;
-	*p++ = (strlen(callsign) + 9) * 2;		/* ??? */
-
-	*p++ = 0x47; *p++ = 0x00; *p++ = 0x11;
-	*p++ = ROSE_ADDR_LEN * 2;
-	memcpy(p, &rose->source_addr, ROSE_ADDR_LEN);
-	p   += ROSE_ADDR_LEN;
-
-	memcpy(p, callsign, strlen(callsign));
-	p   += strlen(callsign);
-
-	len       = p - buffer;
-	buffer[0] = len - 1;
-
-	return len;
-}
-
-void rose_disconnect(struct sock *sk, int reason, int cause, int diagnostic)
-{
-	struct rose_sock *rose = rose_sk(sk);
-
-	rose_stop_timer(sk);
-	rose_stop_idletimer(sk);
-
-	rose_clear_queues(sk);
-
-	rose->lci   = 0;
-	rose->state = ROSE_STATE_0;
-
-	if (cause != -1)
-		rose->cause = cause;
-
-	if (diagnostic != -1)
-		rose->diagnostic = diagnostic;
-
-	sk->sk_state     = TCP_CLOSE;
-	sk->sk_err       = reason;
-	sk->sk_shutdown |= SEND_SHUTDOWN;
-
-	if (!sock_flag(sk, SOCK_DEAD)) {
-		sk->sk_state_change(sk);
-		sock_set_flag(sk, SOCK_DEAD);
-	}
-}
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
deleted file mode 100644
index bb60a1654d61..000000000000
--- a/net/rose/rose_timer.c
+++ /dev/null
@@ -1,227 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
- * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org)
- */
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <net/rose.h>
-
-static void rose_heartbeat_expiry(struct timer_list *t);
-static void rose_timer_expiry(struct timer_list *);
-static void rose_idletimer_expiry(struct timer_list *);
-
-void rose_start_heartbeat(struct sock *sk)
-{
-	sk_stop_timer(sk, &sk->sk_timer);
-
-	sk->sk_timer.function = rose_heartbeat_expiry;
-	sk->sk_timer.expires  = jiffies + 5 * HZ;
-
-	sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires);
-}
-
-void rose_start_t1timer(struct sock *sk)
-{
-	struct rose_sock *rose = rose_sk(sk);
-
-	sk_stop_timer(sk, &rose->timer);
-
-	rose->timer.function = rose_timer_expiry;
-	rose->timer.expires  = jiffies + rose->t1;
-
-	sk_reset_timer(sk, &rose->timer, rose->timer.expires);
-}
-
-void rose_start_t2timer(struct sock *sk)
-{
-	struct rose_sock *rose = rose_sk(sk);
-
-	sk_stop_timer(sk, &rose->timer);
-
-	rose->timer.function = rose_timer_expiry;
-	rose->timer.expires  = jiffies + rose->t2;
-
-	sk_reset_timer(sk, &rose->timer, rose->timer.expires);
-}
-
-void rose_start_t3timer(struct sock *sk)
-{
-	struct rose_sock *rose = rose_sk(sk);
-
-	sk_stop_timer(sk, &rose->timer);
-
-	rose->timer.function = rose_timer_expiry;
-	rose->timer.expires  = jiffies + rose->t3;
-
-	sk_reset_timer(sk, &rose->timer, rose->timer.expires);
-}
-
-void rose_start_hbtimer(struct sock *sk)
-{
-	struct rose_sock *rose = rose_sk(sk);
-
-	sk_stop_timer(sk, &rose->timer);
-
-	rose->timer.function = rose_timer_expiry;
-	rose->timer.expires  = jiffies + rose->hb;
-
-	sk_reset_timer(sk, &rose->timer, rose->timer.expires);
-}
-
-void rose_start_idletimer(struct sock *sk)
-{
-	struct rose_sock *rose = rose_sk(sk);
-
-	sk_stop_timer(sk, &rose->idletimer);
-
-	if (rose->idle > 0) {
-		rose->idletimer.function = rose_idletimer_expiry;
-		rose->idletimer.expires  = jiffies + rose->idle;
-
-		sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires);
-	}
-}
-
-void rose_stop_heartbeat(struct sock *sk)
-{
-	sk_stop_timer(sk, &sk->sk_timer);
-}
-
-void rose_stop_timer(struct sock *sk)
-{
-	sk_stop_timer(sk, &rose_sk(sk)->timer);
-}
-
-void rose_stop_idletimer(struct sock *sk)
-{
-	sk_stop_timer(sk, &rose_sk(sk)->idletimer);
-}
-
-static void rose_heartbeat_expiry(struct timer_list *t)
-{
-	struct sock *sk = timer_container_of(sk, t, sk_timer);
-	struct rose_sock *rose = rose_sk(sk);
-
-	bh_lock_sock(sk);
-	if (sock_owned_by_user(sk)) {
-		sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ/20);
-		goto out;
-	}
-	switch (rose->state) {
-	case ROSE_STATE_0:
-		/* Magic here: If we listen() and a new link dies before it
-		   is accepted() it isn't 'dead' so doesn't get removed. */
-		if (sock_flag(sk, SOCK_DESTROY) ||
-		    (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
-			bh_unlock_sock(sk);
-			rose_destroy_socket(sk);
-			sock_put(sk);
-			return;
-		}
-		break;
-
-	case ROSE_STATE_3:
-		/*
-		 * Check for the state of the receive buffer.
-		 */
-		if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf / 2) &&
-		    (rose->condition & ROSE_COND_OWN_RX_BUSY)) {
-			rose->condition &= ~ROSE_COND_OWN_RX_BUSY;
-			rose->condition &= ~ROSE_COND_ACK_PENDING;
-			rose->vl         = rose->vr;
-			rose_write_internal(sk, ROSE_RR);
-			rose_stop_timer(sk);	/* HB */
-			break;
-		}
-		break;
-	}
-
-	rose_start_heartbeat(sk);
-out:
-	bh_unlock_sock(sk);
-	sock_put(sk);
-}
-
-static void rose_timer_expiry(struct timer_list *t)
-{
-	struct rose_sock *rose = timer_container_of(rose, t, timer);
-	struct sock *sk = &rose->sock;
-
-	bh_lock_sock(sk);
-	if (sock_owned_by_user(sk)) {
-		sk_reset_timer(sk, &rose->timer, jiffies + HZ/20);
-		goto out;
-	}
-	switch (rose->state) {
-	case ROSE_STATE_1:	/* T1 */
-	case ROSE_STATE_4:	/* T2 */
-		rose_write_internal(sk, ROSE_CLEAR_REQUEST);
-		rose->state = ROSE_STATE_2;
-		rose_start_t3timer(sk);
-		break;
-
-	case ROSE_STATE_2:	/* T3 */
-		rose_neigh_put(rose->neighbour);
-		rose_disconnect(sk, ETIMEDOUT, -1, -1);
-		break;
-
-	case ROSE_STATE_3:	/* HB */
-		if (rose->condition & ROSE_COND_ACK_PENDING) {
-			rose->condition &= ~ROSE_COND_ACK_PENDING;
-			rose_enquiry_response(sk);
-		}
-		break;
-	}
-out:
-	bh_unlock_sock(sk);
-	sock_put(sk);
-}
-
-static void rose_idletimer_expiry(struct timer_list *t)
-{
-	struct rose_sock *rose = timer_container_of(rose, t, idletimer);
-	struct sock *sk = &rose->sock;
-
-	bh_lock_sock(sk);
-	if (sock_owned_by_user(sk)) {
-		sk_reset_timer(sk, &rose->idletimer, jiffies + HZ/20);
-		goto out;
-	}
-	rose_clear_queues(sk);
-
-	rose_write_internal(sk, ROSE_CLEAR_REQUEST);
-	rose_sk(sk)->state = ROSE_STATE_2;
-
-	rose_start_t3timer(sk);
-
-	sk->sk_state     = TCP_CLOSE;
-	sk->sk_err       = 0;
-	sk->sk_shutdown |= SEND_SHUTDOWN;
-
-	if (!sock_flag(sk, SOCK_DEAD)) {
-		sk->sk_state_change(sk);
-		sock_set_flag(sk, SOCK_DEAD);
-	}
-out:
-	bh_unlock_sock(sk);
-	sock_put(sk);
-}
diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c
deleted file mode 100644
index d801315b7083..000000000000
--- a/net/rose/sysctl_net_rose.c
+++ /dev/null
@@ -1,125 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com)
- */
-#include <linux/mm.h>
-#include <linux/sysctl.h>
-#include <linux/init.h>
-#include <net/ax25.h>
-#include <net/rose.h>
-
-static int min_timer[]  = {1 * HZ};
-static int max_timer[]  = {300 * HZ};
-static int min_idle[]   = {0 * HZ};
-static int max_idle[]   = {65535 * HZ};
-static int min_route[1],       max_route[] = {1};
-static int min_ftimer[] = {60 * HZ};
-static int max_ftimer[] = {600 * HZ};
-static int min_maxvcs[] = {1}, max_maxvcs[] = {254};
-static int min_window[] = {1}, max_window[] = {7};
-
-static struct ctl_table_header *rose_table_header;
-
-static struct ctl_table rose_table[] = {
-	{
-		.procname	= "restart_request_timeout",
-		.data		= &sysctl_rose_restart_request_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_timer,
-		.extra2		= &max_timer
-	},
-	{
-		.procname	= "call_request_timeout",
-		.data		= &sysctl_rose_call_request_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_timer,
-		.extra2		= &max_timer
-	},
-	{
-		.procname	= "reset_request_timeout",
-		.data		= &sysctl_rose_reset_request_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_timer,
-		.extra2		= &max_timer
-	},
-	{
-		.procname	= "clear_request_timeout",
-		.data		= &sysctl_rose_clear_request_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_timer,
-		.extra2		= &max_timer
-	},
-	{
-		.procname	= "no_activity_timeout",
-		.data		= &sysctl_rose_no_activity_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_idle,
-		.extra2		= &max_idle
-	},
-	{
-		.procname	= "acknowledge_hold_back_timeout",
-		.data		= &sysctl_rose_ack_hold_back_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_timer,
-		.extra2		= &max_timer
-	},
-	{
-		.procname	= "routing_control",
-		.data		= &sysctl_rose_routing_control,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_route,
-		.extra2		= &max_route
-	},
-	{
-		.procname	= "link_fail_timeout",
-		.data		= &sysctl_rose_link_fail_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_ftimer,
-		.extra2		= &max_ftimer
-	},
-	{
-		.procname	= "maximum_virtual_circuits",
-		.data		= &sysctl_rose_maximum_vcs,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_maxvcs,
-		.extra2		= &max_maxvcs
-	},
-	{
-		.procname	= "window_size",
-		.data		= &sysctl_rose_window_size,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_window,
-		.extra2		= &max_window
-	},
-};
-
-void __init rose_register_sysctl(void)
-{
-	rose_table_header = register_net_sysctl(&init_net, "net/rose", rose_table);
-}
-
-void rose_unregister_sysctl(void)
-{
-	unregister_net_sysctl_table(rose_table_header);
-}
-- 
cgit v1.2.3


From 43eb354ecb471426e97b0ce6a0c922ec20f82027 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 16 Apr 2026 14:54:29 -0700
Subject: nstree: fix func. parameter kernel-doc warnings

Use the correct parameter name ("__ns") for function parameter kernel-doc
to avoid 3 warnings:

Warning: include/linux/nstree.h:68 function parameter '__ns' not described in 'ns_tree_add_raw'
Warning: include/linux/nstree.h:77 function parameter '__ns' not described in 'ns_tree_add'
Warning: include/linux/nstree.h:88 function parameter '__ns' not described in 'ns_tree_remove'

Fixes: 885fc8ac0a4d ("nstree: make iterator generic")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260416215429.948898-1-rdunlap@infradead.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/nstree.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nstree.h b/include/linux/nstree.h
index 175e4625bfa6..5b64d4572881 100644
--- a/include/linux/nstree.h
+++ b/include/linux/nstree.h
@@ -61,7 +61,7 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree_root *ns_t
 
 /**
  * ns_tree_add_raw - Add a namespace to a namespace
- * @ns: Namespace to add
+ * @__ns: Namespace to add
  *
  * This function adds a namespace to the appropriate namespace tree
  * without assigning a id.
@@ -70,7 +70,7 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree_root *ns_t
 
 /**
  * ns_tree_add - Add a namespace to a namespace tree
- * @ns: Namespace to add
+ * @__ns: Namespace to add
  *
  * This function assigns a new id to the namespace and adds it to the
  * appropriate namespace tree and list.
@@ -81,7 +81,7 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree_root *ns_t
 
 /**
  * ns_tree_remove - Remove a namespace from a namespace tree
- * @ns: Namespace to remove
+ * @__ns: Namespace to remove
  *
  * This function removes a namespace from the appropriate namespace
  * tree and list.
-- 
cgit v1.2.3


From 33e92e9ecf48c08cb4807e9a36f9eb01619c1a1e Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 23 Apr 2026 11:56:11 +0200
Subject: eventpoll: refresh eventpoll_release() fast-path comment

The old comment justified the lockless READ_ONCE(file->f_ep) check
with "False positives simply cannot happen because the file is on
the way to be removed and nobody ( but eventpoll ) has still a
reference to this file." That reasoning was the root of the UAF
fixed in "eventpoll: fix ep_remove struct eventpoll / struct file
UAF": __ep_remove() could clear f_ep while another close raced
past the fast path and freed the watched eventpoll / recycled the
struct file slot.

With ep_remove() now pinning @file via epi_fget() across the f_ep
clear and hlist_del_rcu(), the invariant is re-established for the
right reason: anyone who might clear f_ep holds @file alive for
the duration, so a NULL observation really does mean no
concurrent eventpoll path has work left on this file. Refresh the
comment accordingly so the next reader doesn't inherit the broken
model.

Link: https://patch.msgid.link/20260423-work-epoll-uaf-v1-8-2470f9eec0f5@kernel.org
Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
---
 include/linux/eventpoll.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index ea9ca0e4172a..728fb5dee5ed 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -39,12 +39,16 @@ static inline void eventpoll_release(struct file *file)
 {
 
 	/*
-	 * Fast check to avoid the get/release of the semaphore. Since
-	 * we're doing this outside the semaphore lock, it might return
-	 * false negatives, but we don't care. It'll help in 99.99% of cases
-	 * to avoid the semaphore lock. False positives simply cannot happen
-	 * because the file in on the way to be removed and nobody ( but
-	 * eventpoll ) has still a reference to this file.
+	 * Fast check to skip the slow path in the common case where the
+	 * file was never attached to an epoll. Safe without file->f_lock
+	 * because every f_ep writer excludes a concurrent __fput() on
+	 * @file:
+	 *   - ep_insert() requires the file alive (refcount > 0);
+	 *   - ep_remove() holds @file pinned via epi_fget() across the
+	 *     write;
+	 *   - eventpoll_release_file() runs from __fput() itself.
+	 * We are in __fput() here, so none of those can race us: a NULL
+	 * observation truly means no epoll path has work left on @file.
 	 */
 	if (likely(!READ_ONCE(file->f_ep)))
 		return;
-- 
cgit v1.2.3